diff options
Diffstat (limited to 'roms/skiboot/core')
99 files changed, 28745 insertions, 0 deletions
diff --git a/roms/skiboot/core/Makefile.inc b/roms/skiboot/core/Makefile.inc new file mode 100644 index 000000000..829800e5b --- /dev/null +++ b/roms/skiboot/core/Makefile.inc @@ -0,0 +1,28 @@ +# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +# Copyright 2012-2019 IBM Corp +# -*-Makefile-*- + +SUBDIRS += core +CORE_OBJS = relocate.o console.o stack.o init.o chip.o mem_region.o +CORE_OBJS += malloc.o lock.o cpu.o utils.o fdt.o opal.o interrupts.o timebase.o +CORE_OBJS += opal-msg.o pci.o pci-virt.o pci-slot.o pcie-slot.o +CORE_OBJS += pci-opal.o fast-reboot.o device.o exceptions.o trace.o affinity.o +CORE_OBJS += vpd.o platform.o nvram.o nvram-format.o hmi.o mce.o +CORE_OBJS += console-log.o ipmi.o time-utils.o pel.o pool.o errorlog.o +CORE_OBJS += timer.o i2c.o rtc.o flash.o sensor.o ipmi-opal.o +CORE_OBJS += flash-subpartition.o bitmap.o buddy.o pci-quirk.o powercap.o psr.o +CORE_OBJS += pci-dt-slot.o direct-controls.o cpufeatures.o +CORE_OBJS += flash-firmware-versions.o opal-dump.o + +ifeq ($(SKIBOOT_GCOV),1) +CORE_OBJS += gcov-profiling.o +CFLAGS_SKIP_core/gcov-profiling.o = -Wsuggest-attribute=const +endif + +CORE=core/built-in.a + +CFLAGS_SKIP_core/relocate.o = -pg -fstack-protector-all +CFLAGS_SKIP_core/relocate.o += -fstack-protector -fstack-protector-strong +CFLAGS_SKIP_core/relocate.o += -fprofile-arcs -ftest-coverage + +$(CORE): $(CORE_OBJS:%=core/%) diff --git a/roms/skiboot/core/affinity.c b/roms/skiboot/core/affinity.c new file mode 100644 index 000000000..0209d3cd9 --- /dev/null +++ b/roms/skiboot/core/affinity.c @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* Copyright 2013-2019 IBM Corp. */ + +/* + * + * We currently construct our associativity properties as such: + * + * - For "chip" devices (bridges, memory, ...), 4 entries: + * + * - CCM node ID + * - HW card ID + * - HW module ID + * - Chip ID + * + * The information is constructed based on the chip ID which (unlike + * pHyp) is our HW chip ID (aka "XSCOM" chip ID). We use it to retrieve + * the other properties from the corresponding chip/xscom node in the + * device-tree. If those properties are absent, 0 is used. + * + * - For "core" devices, we add a 5th entry: + * + * - Core ID + * + * Here too, we do not use the "cooked" HW processor ID from HDAT but + * instead use the real HW core ID which is basically the interrupt + * server number of thread 0 on that core. + * + * + * The ibm,associativity-reference-points property is currently set to + * 4,4 indicating that the chip ID is our only reference point. This + * should be extended to encompass the node IDs eventually. + */ +#include <skiboot.h> +#include <opal.h> +#include <device.h> +#include <console.h> +#include <trace.h> +#include <chip.h> +#include <cpu.h> +#include <affinity.h> + +static uint32_t get_chip_node_id(struct proc_chip *chip) +{ + /* If the xscom node has an ibm,ccm-node-id property, use it */ + if (dt_has_node_property(chip->devnode, "ibm,ccm-node-id", NULL)) + return dt_prop_get_u32(chip->devnode, "ibm,ccm-node-id"); + + /* + * Else use the 3 top bits of the chip ID which should be + * the node on P8 + */ + return chip->id >> 3; +} + +void add_associativity_ref_point(void) +{ + int ref2 = 0x4; + + /* + * Note about our use of reference points: + * + * Linux currently supports up to three levels of NUMA. We use the + * first reference point for the node ID and the second reference + * point for a second level of affinity. We always use the chip ID + * (4) for the first reference point. + * + * Choosing the second level of affinity is model specific + * unfortunately. Current POWER8E models should use the DCM + * as a second level of NUMA. + * + * If there is a way to obtain this information from the FSP + * that would be ideal, but for now hardwire our POWER8E setting. + * + * For GPU nodes we add a third level of NUMA, such that the + * distance of the GPU node from all other nodes is uniformly + * the highest. + */ + if (PVR_TYPE(mfspr(SPR_PVR)) == PVR_TYPE_P8E) + ref2 = 0x3; + + dt_add_property_cells(opal_node, "ibm,associativity-reference-points", + 0x4, ref2, 0x2); +} + +void add_chip_dev_associativity(struct dt_node *dev) +{ + uint32_t chip_id = dt_get_chip_id(dev); + struct proc_chip *chip = get_chip(chip_id); + uint32_t hw_cid, hw_mid; + + if (!chip) + return; + + hw_cid = dt_prop_get_u32_def(chip->devnode, "ibm,hw-card-id", 0); + hw_mid = dt_prop_get_u32_def(chip->devnode, "ibm,hw-module-id", 0); + + dt_add_property_cells(dev, "ibm,associativity", 4, + get_chip_node_id(chip), + hw_cid, hw_mid, chip_id); +} + +void add_core_associativity(struct cpu_thread *cpu) +{ + struct proc_chip *chip = get_chip(cpu->chip_id); + uint32_t hw_cid, hw_mid, core_id; + + if (!chip) + return; + + if (proc_gen == proc_gen_p8) + core_id = (cpu->pir >> 3) & 0xf; + else if (proc_gen == proc_gen_p9) + core_id = (cpu->pir >> 2) & 0x1f; + else if (proc_gen == proc_gen_p10) + core_id = (cpu->pir >> 2) & 0x1f; + else + return; + + hw_cid = dt_prop_get_u32_def(chip->devnode, "ibm,hw-card-id", 0); + hw_mid = dt_prop_get_u32_def(chip->devnode, "ibm,hw-module-id", 0); + + dt_add_property_cells(cpu->node, "ibm,associativity", 5, + get_chip_node_id(chip), + hw_cid, hw_mid, chip->id, core_id); +} diff --git a/roms/skiboot/core/bitmap.c b/roms/skiboot/core/bitmap.c new file mode 100644 index 000000000..8de1356c3 --- /dev/null +++ b/roms/skiboot/core/bitmap.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* Copyright 2016 IBM Corp. */ + +#include "bitmap.h" + +static int __bitmap_find_bit(bitmap_t map, unsigned int start, unsigned int count, + bool value) +{ + unsigned int el, first_bit; + unsigned int end = start + count; + bitmap_elem_t e, ev; + int b; + + ev = value ? -1ul : 0; + el = BITMAP_ELEM(start); + first_bit = BITMAP_BIT(start); + + while (start < end) { + e = map[el] ^ ev; + e |= ((1ul << first_bit) - 1); + if (~e) + break; + start = (start + BITMAP_ELSZ) & ~(BITMAP_ELSZ - 1); + first_bit = 0; + el++; + } + for (b = first_bit; b < BITMAP_ELSZ && start < end; b++,start++) { + if ((e & (1ull << b)) == 0) + return start; + } + + return -1; +} + +int bitmap_find_zero_bit(bitmap_t map, unsigned int start, unsigned int count) +{ + return __bitmap_find_bit(map, start, count, false); +} + +int bitmap_find_one_bit(bitmap_t map, unsigned int start, unsigned int count) +{ + return __bitmap_find_bit(map, start, count, true); +} + diff --git a/roms/skiboot/core/buddy.c b/roms/skiboot/core/buddy.c new file mode 100644 index 000000000..b36e407d1 --- /dev/null +++ b/roms/skiboot/core/buddy.c @@ -0,0 +1,292 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* Copyright 2016-2017 IBM Corp. */ + +#include <assert.h> +#include <stdlib.h> +#include <string.h> +#include <stdio.h> + +#include "buddy.h" + +#define BUDDY_DEBUG +#undef BUDDY_VERBOSE + +#ifdef BUDDY_VERBOSE +#define BUDDY_NOISE(fmt...) printf(fmt) +#else +#define BUDDY_NOISE(fmt...) do { } while(0) +#endif + +static inline unsigned int buddy_map_size(struct buddy *b) +{ + return 1u << (b->max_order + 1); +} + +static inline unsigned int buddy_order_start(struct buddy *b, + unsigned int order) +{ + unsigned int level = b->max_order - order; + + /* Starting bit of index for order */ + return 1u << level; +} + +static inline unsigned int buddy_index_to_node(struct buddy *b, + unsigned int index, + unsigned int order) +{ + /* Ensure the index is a multiple of the order */ + assert((index & ((1u << order) - 1)) == 0); + + return buddy_order_start(b, order) + (index >> order); +} + +static inline unsigned int buddy_node_to_index(struct buddy *b, + unsigned int node, + unsigned int order) +{ + unsigned int start = buddy_order_start(b, order); + + return (node - start) << order; +} + +#ifdef BUDDY_DEBUG +static void buddy_check_alloc(struct buddy *b, unsigned int node) +{ + assert(bitmap_tst_bit(b->map, node)); +} + +static void buddy_check_alloc_down(struct buddy *b, unsigned int node) +{ + unsigned int i, count = 1; + + while (node < buddy_map_size(b)) { + for (i = 0; i < count; i++) + buddy_check_alloc(b, node + i); + + /* Down one level */ + node <<= 1; + count <<= 1; + } +} +#else +static inline void buddy_check_alloc(struct buddy *b __unused, unsigned int node __unused) {} +static inline void buddy_check_alloc_down(struct buddy *b __unused, unsigned int node __unused) {} +#endif + +int buddy_alloc(struct buddy *b, unsigned int order) +{ + unsigned int o; + int node, index; + + BUDDY_NOISE("buddy_alloc(%d)\n", order); + /* + * Find the first order up the tree from our requested order that + * has at least one free node. + */ + for (o = order; o <= b->max_order; o++) { + if (b->freecounts[o] > 0) + break; + } + + /* Nothing found ? fail */ + if (o > b->max_order) { + BUDDY_NOISE(" no free nodes !\n"); + return -1; + } + + BUDDY_NOISE(" %d free node(s) at order %d, bits %d(%d)\n", + b->freecounts[o], o, + buddy_order_start(b, o), + 1u << (b->max_order - o)); + + /* Now find a free node */ + node = bitmap_find_zero_bit(b->map, buddy_order_start(b, o), + 1u << (b->max_order - o)); + + /* There should always be one */ + assert(node >= 0); + + /* Mark it allocated and decrease free count */ + bitmap_set_bit(b->map, node); + b->freecounts[o]--; + + /* We know that node was free which means all its children must have + * been marked "allocated". Double check. + */ + buddy_check_alloc_down(b, node); + + /* We have a node, we've marked it allocated, now we need to go down + * the tree until we reach "order" which is the order we need. For + * each level along the way, we mark the buddy free and leave the + * first child allocated. + */ + while (o > order) { + /* Next level down */ + o--; + node <<= 1; + + BUDDY_NOISE(" order %d, using %d marking %d free\n", + o, node, node ^ 1); + bitmap_clr_bit(b->map, node ^ 1); + b->freecounts[o]++; + assert(bitmap_tst_bit(b->map, node)); + } + + index = buddy_node_to_index(b, node, order); + + BUDDY_NOISE(" result is index %d (node %d)\n", index, node); + + /* We have a node, convert it to an element number */ + return index; +} + +bool buddy_reserve(struct buddy *b, unsigned int index, unsigned int order) +{ + unsigned int node, freenode, o; + + assert(index < (1u << b->max_order)); + + BUDDY_NOISE("buddy_reserve(%d,%d)\n", index, order); + + /* Get bit number for node */ + node = buddy_index_to_node(b, index, order); + + BUDDY_NOISE(" node=%d\n", node); + + /* Find something free */ + for (freenode = node, o = order; freenode > 0; freenode >>= 1, o++) + if (!bitmap_tst_bit(b->map, freenode)) + break; + + BUDDY_NOISE(" freenode=%d order %d\n", freenode, o); + + /* Nothing free, error out */ + if (!freenode) + return false; + + /* We sit on a free node, mark it busy */ + bitmap_set_bit(b->map, freenode); + assert(b->freecounts[o]); + b->freecounts[o]--; + + /* We know that node was free which means all its children must have + * been marked "allocated". Double check. + */ + buddy_check_alloc_down(b, freenode); + + /* Reverse-walk the path and break down nodes */ + while (o > order) { + /* Next level down */ + o--; + freenode <<= 1; + + /* Find the right one on the path to node */ + if (node & (1u << (o - order))) + freenode++; + + BUDDY_NOISE(" order %d, using %d marking %d free\n", + o, freenode, freenode ^ 1); + bitmap_clr_bit(b->map, freenode ^ 1); + b->freecounts[o]++; + assert(bitmap_tst_bit(b->map, node)); + } + assert(node == freenode); + + return true; +} + +void buddy_free(struct buddy *b, unsigned int index, unsigned int order) +{ + unsigned int node; + + assert(index < (1u << b->max_order)); + + BUDDY_NOISE("buddy_free(%d,%d)\n", index, order); + + /* Get bit number for node */ + node = buddy_index_to_node(b, index, order); + + BUDDY_NOISE(" node=%d\n", node); + + /* We assume that anything freed was fully allocated, ie, + * there is no child node of that allocation index/order + * that is already free. + * + * BUDDY_DEBUG will verify it at the cost of performances + */ + buddy_check_alloc_down(b, node); + + /* Propagate if buddy is free */ + while (order < b->max_order && !bitmap_tst_bit(b->map, node ^ 1)) { + BUDDY_NOISE(" order %d node %d buddy %d free, propagating\n", + order, node, node ^ 1); + + /* Mark buddy busy (we are already marked busy) */ + bitmap_set_bit(b->map, node ^ 1); + + /* Reduce free count */ + assert(b->freecounts[order] > 0); + b->freecounts[order]--; + + /* Get parent */ + node >>= 1; + order++; + + /* It must be busy already ! */ + buddy_check_alloc(b, node); + + BUDDY_NOISE(" testing order %d node %d\n", order, node ^ 1); + } + + /* No more coalescing, mark it free */ + bitmap_clr_bit(b->map, node); + + /* Increase the freelist count for that level */ + b->freecounts[order]++; + + BUDDY_NOISE(" free count at order %d is %d\n", + order, b->freecounts[order]); +} + +void buddy_reset(struct buddy *b) +{ + unsigned int bsize = BITMAP_BYTES(1u << (b->max_order + 1)); + + BUDDY_NOISE("buddy_reset()\n"); + /* We fill the bitmap with 1's to make it completely "busy" */ + memset(b->map, 0xff, bsize); + memset(b->freecounts, 0, sizeof(b->freecounts)); + + /* We mark the root of the tree free, this is entry 1 as entry 0 + * is unused. + */ + buddy_free(b, 0, b->max_order); +} + +struct buddy *buddy_create(unsigned int max_order) +{ + struct buddy *b; + unsigned int bsize; + + assert(max_order <= BUDDY_MAX_ORDER); + + bsize = BITMAP_BYTES(1u << (max_order + 1)); + + b = zalloc(sizeof(struct buddy) + bsize); + if (!b) + return NULL; + b->max_order = max_order; + + BUDDY_NOISE("Map @%p, size: %d bytes\n", b->map, bsize); + + buddy_reset(b); + + return b; +} + +void buddy_destroy(struct buddy *b) +{ + free(b); +} + diff --git a/roms/skiboot/core/chip.c b/roms/skiboot/core/chip.c new file mode 100644 index 000000000..2d95b2e05 --- /dev/null +++ b/roms/skiboot/core/chip.c @@ -0,0 +1,190 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* Copyright 2013-2019 IBM Corp. */ + +#include <skiboot.h> +#include <chip.h> +#include <console.h> +#include <device.h> +#include <timebase.h> +#include <cpu.h> + +static struct proc_chip *chips[MAX_CHIPS]; +enum proc_chip_quirks proc_chip_quirks; + +uint32_t pir_to_chip_id(uint32_t pir) +{ + if (proc_gen == proc_gen_p10) + return P10_PIR2GCID(pir); + else if (proc_gen == proc_gen_p9) + return P9_PIR2GCID(pir); + else if (proc_gen == proc_gen_p8) + return P8_PIR2GCID(pir); + else + assert(false); +} + +uint32_t pir_to_core_id(uint32_t pir) +{ + if (proc_gen == proc_gen_p10) { + if (this_cpu()->is_fused_core) + return P10_PIRFUSED2NORMALCOREID(pir); + else + return P10_PIR2COREID(pir); + } else if (proc_gen == proc_gen_p9) { + if (this_cpu()->is_fused_core) + return P9_PIRFUSED2NORMALCOREID(pir); + else + return P9_PIR2COREID(pir); + } else if (proc_gen == proc_gen_p8) { + return P8_PIR2COREID(pir); + } else { + assert(false); + } +} + +uint32_t pir_to_fused_core_id(uint32_t pir) +{ + if (proc_gen == proc_gen_p10) { + if (this_cpu()->is_fused_core) + return P10_PIR2FUSEDCOREID(pir); + else + return P10_PIR2COREID(pir); + } else if (proc_gen == proc_gen_p9) { + if (this_cpu()->is_fused_core) + return P9_PIR2FUSEDCOREID(pir); + else + return P9_PIR2COREID(pir); + } else if (proc_gen == proc_gen_p8) { + return P8_PIR2COREID(pir); + } else { + assert(false); + } +} + +uint32_t pir_to_thread_id(uint32_t pir) +{ + if (proc_gen == proc_gen_p10) { + if (this_cpu()->is_fused_core) + return P10_PIRFUSED2NORMALTHREADID(pir); + else + return P10_PIR2THREADID(pir); + } else if (proc_gen == proc_gen_p9) { + if (this_cpu()->is_fused_core) + return P9_PIRFUSED2NORMALTHREADID(pir); + else + return P9_PIR2THREADID(pir); + } else if (proc_gen == proc_gen_p8) { + return P8_PIR2THREADID(pir); + } else { + assert(false); + } +} + +struct proc_chip *next_chip(struct proc_chip *chip) +{ + unsigned int i; + + for (i = chip ? (chip->id + 1) : 0; i < MAX_CHIPS; i++) + if (chips[i]) + return chips[i]; + return NULL; +} + + +struct proc_chip *get_chip(uint32_t chip_id) +{ + if (chip_id >= MAX_CHIPS) + return NULL; + return chips[chip_id]; +} + +static void init_chip(struct dt_node *dn) +{ + struct proc_chip *chip; + uint32_t id; + const char *lc = NULL; + + id = dt_get_chip_id(dn); + assert(id < MAX_CHIPS); + assert(chips[id] == NULL); + + chip = zalloc(sizeof(struct proc_chip)); + assert(chip); + + chip->id = id; + chip->devnode = dn; + + chip->dbob_id = dt_prop_get_u32_def(dn, "ibm,dbob-id", 0xffffffff); + chip->pcid = dt_prop_get_u32_def(dn, "ibm,proc-chip-id", 0xffffffff); + + if (dt_prop_get_u32_def(dn, "ibm,occ-functional-state", 0)) + chip->occ_functional = true; + else + chip->occ_functional = false; + + list_head_init(&chip->i2cms); + + /* Update the location code for this chip. */ + if (dt_has_node_property(dn, "ibm,loc-code", NULL)) + lc = dt_prop_get(dn, "ibm,loc-code"); + else if (dt_has_node_property(dn, "ibm,slot-location-code", NULL)) + lc = dt_prop_get(dn, "ibm,slot-location-code"); + + if (lc) + chip->loc_code = strdup(lc); + + chip->primary_topology = dt_prop_get_u32_def(dn, + "ibm,primary-topology-index", 0xffffffff); + + prlog(PR_INFO, "CHIP: Initialised chip %d from %s\n", id, dn->name); + chips[id] = chip; +} + +void init_chips(void) +{ + struct dt_node *xn; + + /* Detect mambo chip */ + if (dt_find_by_path(dt_root, "/mambo")) { + proc_chip_quirks |= QUIRK_NO_CHIPTOD | QUIRK_MAMBO_CALLOUTS + | QUIRK_NO_F000F | QUIRK_NO_PBA | QUIRK_NO_OCC_IRQ + | QUIRK_NO_RNG; + + enable_mambo_console(); + + prlog(PR_NOTICE, "CHIP: Detected Mambo simulator\n"); + + dt_for_each_compatible(dt_root, xn, "ibm,mambo-chip") + init_chip(xn); + } + + /* Detect simics */ + if (dt_find_by_path(dt_root, "/simics")) { + proc_chip_quirks |= QUIRK_SIMICS + | QUIRK_NO_PBA | QUIRK_NO_OCC_IRQ | QUIRK_SLOW_SIM; + tb_hz = 512000; + prlog(PR_NOTICE, "CHIP: Detected Simics simulator\n"); + } + /* Detect Awan emulator */ + if (dt_find_by_path(dt_root, "/awan")) { + proc_chip_quirks |= QUIRK_NO_CHIPTOD | QUIRK_NO_F000F + | QUIRK_NO_PBA | QUIRK_NO_OCC_IRQ | QUIRK_SLOW_SIM; + tb_hz = 512000; + prlog(PR_NOTICE, "CHIP: Detected Awan emulator\n"); + } + /* Detect Qemu */ + if (dt_node_is_compatible(dt_root, "qemu,powernv") || + dt_node_is_compatible(dt_root, "qemu,powernv8") || + dt_node_is_compatible(dt_root, "qemu,powernv9") || + dt_node_is_compatible(dt_root, "qemu,powernv10") || + dt_find_by_path(dt_root, "/qemu")) { + proc_chip_quirks |= QUIRK_QEMU | QUIRK_NO_CHIPTOD + | QUIRK_NO_DIRECT_CTL | QUIRK_NO_RNG; + prlog(PR_NOTICE, "CHIP: Detected QEMU simulator\n"); + } + + /* We walk the chips based on xscom nodes in the tree */ + dt_for_each_compatible(dt_root, xn, "ibm,xscom") { + init_chip(xn); + } +} diff --git a/roms/skiboot/core/console-log.c b/roms/skiboot/core/console-log.c new file mode 100644 index 000000000..21a1442bd --- /dev/null +++ b/roms/skiboot/core/console-log.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Console Log routines + * Wraps libc and console lower level functions + * does fancy-schmancy things like timestamps and priorities + * Doesn't make waffles. + * + * Copyright 2013-2018 IBM Corp. + */ + +#include "skiboot.h" +#include "unistd.h" +#include "stdio.h" +#include "console.h" +#include "timebase.h" +#include <debug_descriptor.h> + +static int vprlog(int log_level, const char *fmt, va_list ap) +{ + int count; + char buffer[320]; + bool flush_to_drivers = true; + unsigned long tb = mftb(); + + /* It's safe to return 0 when we "did" something here + * as only printf cares about how much we wrote, and + * if you change log_level to below PR_PRINTF then you + * get everything you deserve. + * By default, only PR_DEBUG and higher are stored in memory. + * PR_TRACE and PR_INSANE are for those having a bad day. + */ + if (log_level > (debug_descriptor.console_log_levels >> 4)) + return 0; + + count = snprintf(buffer, sizeof(buffer), "[%5lu.%09lu,%d] ", + tb_to_secs(tb), tb_remaining_nsecs(tb), log_level); + count+= vsnprintf(buffer+count, sizeof(buffer)-count, fmt, ap); + + if (log_level > (debug_descriptor.console_log_levels & 0x0f)) + flush_to_drivers = false; + + console_write(flush_to_drivers, buffer, count); + + return count; +} + +/* we don't return anything as what on earth are we going to do + * if we actually fail to print a log message? Print a log message about it? + * Callers shouldn't care, prlog and friends should do something generically + * sane in such crazy situations. + */ +void _prlog(int log_level, const char* fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vprlog(log_level, fmt, ap); + va_end(ap); +} + +int _printf(const char* fmt, ...) +{ + int count; + va_list ap; + + va_start(ap, fmt); + count = vprlog(PR_PRINTF, fmt, ap); + va_end(ap); + + return count; +} diff --git a/roms/skiboot/core/console.c b/roms/skiboot/core/console.c new file mode 100644 index 000000000..2a1509025 --- /dev/null +++ b/roms/skiboot/core/console.c @@ -0,0 +1,451 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Console IO routine for use by libc + * + * fd is the classic posix 0,1,2 (stdin, stdout, stderr) + * + * Copyright 2013-2018 IBM Corp. + */ + +#include <skiboot.h> +#include <unistd.h> +#include <console.h> +#include <opal.h> +#include <device.h> +#include <processor.h> +#include <cpu.h> + +static char *con_buf = (char *)INMEM_CON_START; +static size_t con_in; +static size_t con_out; +static bool con_wrapped; + +/* Internal console driver ops */ +static struct con_ops *con_driver; + +/* External (OPAL) console driver ops */ +static struct opal_con_ops *opal_con_driver = &dummy_opal_con; + +static struct lock con_lock = LOCK_UNLOCKED; + +/* This is mapped via TCEs so we keep it alone in a page */ +struct memcons memcons __section(".data.memcons") = { + .magic = CPU_TO_BE64(MEMCONS_MAGIC), + .obuf_phys = CPU_TO_BE64(INMEM_CON_START), + .ibuf_phys = CPU_TO_BE64(INMEM_CON_START + INMEM_CON_OUT_LEN), + .obuf_size = CPU_TO_BE32(INMEM_CON_OUT_LEN), + .ibuf_size = CPU_TO_BE32(INMEM_CON_IN_LEN), +}; + +static bool dummy_console_enabled(void) +{ +#ifdef FORCE_DUMMY_CONSOLE + return true; +#else + return dt_has_node_property(dt_chosen, + "sapphire,enable-dummy-console", NULL); +#endif +} + +/* + * Helper function for adding /ibm,opal/consoles/serial@<xyz> nodes + */ +struct dt_node *add_opal_console_node(int index, const char *type, + uint32_t write_buffer_size) +{ + struct dt_node *con, *consoles; + char buffer[32]; + + consoles = dt_find_by_name(opal_node, "consoles"); + if (!consoles) { + consoles = dt_new(opal_node, "consoles"); + assert(consoles); + dt_add_property_cells(consoles, "#address-cells", 1); + dt_add_property_cells(consoles, "#size-cells", 0); + } + + con = dt_new_addr(consoles, "serial", index); + assert(con); + + snprintf(buffer, sizeof(buffer), "ibm,opal-console-%s", type); + dt_add_property_string(con, "compatible", buffer); + + dt_add_property_cells(con, "#write-buffer-size", write_buffer_size); + dt_add_property_cells(con, "reg", index); + dt_add_property_string(con, "device_type", "serial"); + + return con; +} + +void clear_console(void) +{ + memset(con_buf, 0, INMEM_CON_LEN); +} + +/* + * Flush the console buffer into the driver, returns true + * if there is more to go. + * Optionally can skip flushing to drivers, leaving messages + * just in memory console. + */ +static bool __flush_console(bool flush_to_drivers, bool need_unlock) +{ + struct cpu_thread *cpu = this_cpu(); + size_t req, len = 0; + static bool in_flush, more_flush; + + /* Is there anything to flush ? Bail out early if not */ + if (con_in == con_out || !con_driver) + return false; + + /* + * Console flushing is suspended on this CPU, typically because + * some critical locks are held that would potentially cause a + * flush to deadlock + * + * Also if it recursed on con_lock (need_unlock is false). This + * can happen due to debug code firing (e.g., list or stack + * debugging). + */ + if (cpu->con_suspend || !need_unlock) { + cpu->con_need_flush = true; + return false; + } + cpu->con_need_flush = false; + + /* + * We must call the underlying driver with the console lock + * dropped otherwise we get some deadlocks if anything down + * that path tries to printf() something. + * + * So instead what we do is we keep a static in_flush flag + * set/released with the lock held, which is used to prevent + * concurrent attempts at flushing the same chunk of buffer + * by other processors. + */ + if (in_flush) { + more_flush = true; + return false; + } + in_flush = true; + + /* + * NB: this must appear after the in_flush check since it modifies + * con_out. + */ + if (!flush_to_drivers) { + con_out = con_in; + in_flush = false; + return false; + } + + do { + more_flush = false; + + if (con_out > con_in) { + req = INMEM_CON_OUT_LEN - con_out; + more_flush = true; + } else + req = con_in - con_out; + + unlock(&con_lock); + len = con_driver->write(con_buf + con_out, req); + lock(&con_lock); + + con_out = (con_out + len) % INMEM_CON_OUT_LEN; + + /* write error? */ + if (len < req) + break; + } while(more_flush); + + in_flush = false; + return con_out != con_in; +} + +bool flush_console(void) +{ + bool ret; + + lock(&con_lock); + ret = __flush_console(true, true); + unlock(&con_lock); + + return ret; +} + +static void inmem_write(char c) +{ + uint32_t opos; + + if (!c) + return; + con_buf[con_in++] = c; + if (con_in >= INMEM_CON_OUT_LEN) { + con_in = 0; + con_wrapped = true; + } + + /* + * We must always re-generate memcons.out_pos because + * under some circumstances, the console script will + * use a broken putmemproc that does RMW on the full + * 8 bytes containing out_pos and in_prod, thus corrupting + * out_pos + */ + opos = con_in; + if (con_wrapped) + opos |= MEMCONS_OUT_POS_WRAP; + lwsync(); + memcons.out_pos = cpu_to_be32(opos); + + /* If head reaches tail, push tail around & drop chars */ + if (con_in == con_out) + con_out = (con_in + 1) % INMEM_CON_OUT_LEN; +} + +static size_t inmem_read(char *buf, size_t req) +{ + size_t read = 0; + char *ibuf = (char *)be64_to_cpu(memcons.ibuf_phys); + + while (req && be32_to_cpu(memcons.in_prod) != be32_to_cpu(memcons.in_cons)) { + *(buf++) = ibuf[be32_to_cpu(memcons.in_cons)]; + lwsync(); + memcons.in_cons = cpu_to_be32((be32_to_cpu(memcons.in_cons) + 1) % INMEM_CON_IN_LEN); + req--; + read++; + } + return read; +} + +static void write_char(char c) +{ +#ifdef MAMBO_DEBUG_CONSOLE + mambo_console_write(&c, 1); +#endif + inmem_write(c); +} + +ssize_t console_write(bool flush_to_drivers, const void *buf, size_t count) +{ + /* We use recursive locking here as we can get called + * from fairly deep debug path + */ + bool need_unlock = lock_recursive(&con_lock); + const char *cbuf = buf; + + while(count--) { + char c = *(cbuf++); + if (c == '\n') + write_char('\r'); + write_char(c); + } + + __flush_console(flush_to_drivers, need_unlock); + + if (need_unlock) + unlock(&con_lock); + + return count; +} + +ssize_t write(int fd __unused, const void *buf, size_t count) +{ + return console_write(true, buf, count); +} + +ssize_t read(int fd __unused, void *buf, size_t req_count) +{ + bool need_unlock = lock_recursive(&con_lock); + size_t count = 0; + + if (con_driver && con_driver->read) + count = con_driver->read(buf, req_count); + if (!count) + count = inmem_read(buf, req_count); + if (need_unlock) + unlock(&con_lock); + return count; +} + +/* Helper function to perform a full synchronous flush */ +void console_complete_flush(void) +{ + /* + * Using term 0 here is a dumb hack that works because the UART + * only has term 0 and the FSP doesn't have an explicit flush method. + */ + int64_t ret = opal_con_driver->flush(0); + + if (ret == OPAL_UNSUPPORTED || ret == OPAL_PARAMETER) + return; + + while (ret != OPAL_SUCCESS) { + ret = opal_con_driver->flush(0); + } +} + +/* + * set_console() + * + * This sets the driver used internally by Skiboot. This is different to the + * OPAL console driver. + */ +void set_console(struct con_ops *driver) +{ + con_driver = driver; + if (driver) + flush_console(); +} + +/* + * set_opal_console() + * + * Configure the console driver to handle the console provided by the OPAL API. + * They are different to the above in that they are typically buffered, and used + * by the host OS rather than skiboot. + */ +static bool opal_cons_init = false; + +void set_opal_console(struct opal_con_ops *driver) +{ + assert(!opal_cons_init); + opal_con_driver = driver; +} + +void init_opal_console(void) +{ + assert(!opal_cons_init); + opal_cons_init = true; + + if (dummy_console_enabled() && opal_con_driver != &dummy_opal_con) { + prlog(PR_WARNING, "OPAL: Dummy console forced, %s ignored\n", + opal_con_driver->name); + + opal_con_driver = &dummy_opal_con; + } + + prlog(PR_INFO, "OPAL: Using %s\n", opal_con_driver->name); + + if (opal_con_driver->init) + opal_con_driver->init(); + + opal_register(OPAL_CONSOLE_READ, opal_con_driver->read, 3); + opal_register(OPAL_CONSOLE_WRITE, opal_con_driver->write, 3); + opal_register(OPAL_CONSOLE_FLUSH, opal_con_driver->flush, 1); + opal_register(OPAL_CONSOLE_WRITE_BUFFER_SPACE, + opal_con_driver->space, 2); +} + +void memcons_add_properties(void) +{ + dt_add_property_u64(opal_node, "ibm,opal-memcons", (u64) &memcons); +} + +/* + * The default OPAL console. + * + * In the absence of a "real" OPAL console driver we handle the OPAL_CONSOLE_* + * calls by writing into the skiboot log buffer. Reads are a little more + * complicated since they can come from the in-memory console (BML) or from the + * internal skiboot console driver. + */ +static int64_t dummy_console_write(int64_t term_number, __be64 *length, + const uint8_t *buffer) +{ + uint64_t l; + + if (term_number != 0) + return OPAL_PARAMETER; + + if (!opal_addr_valid(length) || !opal_addr_valid(buffer)) + return OPAL_PARAMETER; + + l = be64_to_cpu(*length); + write(0, buffer, l); + + return OPAL_SUCCESS; +} + +static int64_t dummy_console_write_buffer_space(int64_t term_number, + __be64 *length) +{ + if (term_number != 0) + return OPAL_PARAMETER; + + if (!opal_addr_valid(length)) + return OPAL_PARAMETER; + + if (length) + *length = cpu_to_be64(INMEM_CON_OUT_LEN); + + return OPAL_SUCCESS; +} + +static int64_t dummy_console_read(int64_t term_number, __be64 *length, + uint8_t *buffer) +{ + uint64_t l; + + if (term_number != 0) + return OPAL_PARAMETER; + + if (!opal_addr_valid(length) || !opal_addr_valid(buffer)) + return OPAL_PARAMETER; + + l = be64_to_cpu(*length); + l = read(0, buffer, l); + *length = cpu_to_be64(l); + opal_update_pending_evt(OPAL_EVENT_CONSOLE_INPUT, 0); + + return OPAL_SUCCESS; +} + +static int64_t dummy_console_flush(int64_t term_number __unused) +{ + return OPAL_UNSUPPORTED; +} + +static void dummy_console_poll(void *data __unused) +{ + bool has_data = false; + + lock(&con_lock); + if (con_driver && con_driver->poll_read) + has_data = con_driver->poll_read(); + if (memcons.in_prod != memcons.in_cons) + has_data = true; + if (has_data) + opal_update_pending_evt(OPAL_EVENT_CONSOLE_INPUT, + OPAL_EVENT_CONSOLE_INPUT); + else + opal_update_pending_evt(OPAL_EVENT_CONSOLE_INPUT, 0); + unlock(&con_lock); +} + +void dummy_console_add_nodes(void) +{ + struct dt_property *p; + + add_opal_console_node(0, "raw", be32_to_cpu(memcons.obuf_size)); + + /* Mambo might have left a crap one, clear it */ + p = __dt_find_property(dt_chosen, "linux,stdout-path"); + if (p) + dt_del_property(dt_chosen, p); + + dt_add_property_string(dt_chosen, "linux,stdout-path", + "/ibm,opal/consoles/serial@0"); + + opal_add_poller(dummy_console_poll, NULL); +} + +struct opal_con_ops dummy_opal_con = { + .name = "Dummy Console", + .init = dummy_console_add_nodes, + .read = dummy_console_read, + .write = dummy_console_write, + .space = dummy_console_write_buffer_space, + .flush = dummy_console_flush, +}; diff --git a/roms/skiboot/core/cpu.c b/roms/skiboot/core/cpu.c new file mode 100644 index 000000000..f58aeb27a --- /dev/null +++ b/roms/skiboot/core/cpu.c @@ -0,0 +1,1785 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Code to manage and manipulate CPUs + * + * Copyright 2013-2019 IBM Corp. + */ + +#include <skiboot.h> +#include <cpu.h> +#include <device.h> +#include <mem_region.h> +#include <opal.h> +#include <stack.h> +#include <trace.h> +#include <affinity.h> +#include <chip.h> +#include <timebase.h> +#include <interrupts.h> +#include <ccan/str/str.h> +#include <ccan/container_of/container_of.h> +#include <xscom.h> + +/* The cpu_threads array is static and indexed by PIR in + * order to speed up lookup from asm entry points + */ +struct cpu_stack { + union { + uint8_t stack[STACK_SIZE]; + struct cpu_thread cpu; + }; +} __align(STACK_SIZE); + +static struct cpu_stack * const cpu_stacks = (struct cpu_stack *)CPU_STACKS_BASE; +unsigned int cpu_thread_count; +unsigned int cpu_max_pir; +struct cpu_thread *boot_cpu; +static struct lock reinit_lock = LOCK_UNLOCKED; +static bool hile_supported; +static bool radix_supported; +static unsigned long hid0_hile; +static unsigned long hid0_attn; +static bool sreset_enabled; +static bool ipi_enabled; +static bool pm_enabled; +static bool current_hile_mode = HAVE_LITTLE_ENDIAN; +static bool current_radix_mode = true; +static bool tm_suspend_enabled; + +unsigned long cpu_secondary_start __force_data = 0; + +struct cpu_job { + struct list_node link; + void (*func)(void *data); + void *data; + const char *name; + bool complete; + bool no_return; +}; + +/* attribute const as cpu_stacks is constant. */ +unsigned long __attrconst cpu_stack_bottom(unsigned int pir) +{ + return ((unsigned long)&cpu_stacks[pir]) + + sizeof(struct cpu_thread) + STACK_SAFETY_GAP; +} + +unsigned long __attrconst cpu_stack_top(unsigned int pir) +{ + /* This is the top of the normal stack. */ + return ((unsigned long)&cpu_stacks[pir]) + + NORMAL_STACK_SIZE - STACK_TOP_GAP; +} + +unsigned long __attrconst cpu_emergency_stack_top(unsigned int pir) +{ + /* This is the top of the emergency stack, above the normal stack. */ + return ((unsigned long)&cpu_stacks[pir]) + + NORMAL_STACK_SIZE + EMERGENCY_STACK_SIZE - STACK_TOP_GAP; +} + +void __nomcount cpu_relax(void) +{ + /* Relax a bit to give sibling threads some breathing space */ + smt_lowest(); + asm volatile("nop; nop; nop; nop;\n" + "nop; nop; nop; nop;\n" + "nop; nop; nop; nop;\n" + "nop; nop; nop; nop;\n"); + smt_medium(); + barrier(); +} + +static void cpu_wake(struct cpu_thread *cpu) +{ + /* Is it idle ? If not, no need to wake */ + sync(); + if (!cpu->in_idle) + return; + + if (proc_gen == proc_gen_p8) { + /* Poke IPI */ + icp_kick_cpu(cpu); + } else if (proc_gen == proc_gen_p9 || proc_gen == proc_gen_p10) { + p9_dbell_send(cpu->pir); + } +} + +/* + * If chip_id is >= 0, schedule the job on that node. + * Otherwise schedule the job anywhere. + */ +static struct cpu_thread *cpu_find_job_target(int32_t chip_id) +{ + struct cpu_thread *cpu, *best, *me = this_cpu(); + uint32_t best_count; + + /* We try to find a target to run a job. We need to avoid + * a CPU that has a "no return" job on its queue as it might + * never be able to process anything. + * + * Additionally we don't check the list but the job count + * on the target CPUs, since that is decremented *after* + * a job has been completed. + */ + + + /* First we scan all available primary threads + */ + for_each_available_cpu(cpu) { + if (chip_id >= 0 && cpu->chip_id != chip_id) + continue; + if (cpu == me || !cpu_is_thread0(cpu) || cpu->job_has_no_return) + continue; + if (cpu->job_count) + continue; + lock(&cpu->job_lock); + if (!cpu->job_count) + return cpu; + unlock(&cpu->job_lock); + } + + /* Now try again with secondary threads included and keep + * track of the one with the less jobs queued up. This is + * done in a racy way, but it's just an optimization in case + * we are overcommitted on jobs. Could could also just pick + * a random one... + */ + best = NULL; + best_count = -1u; + for_each_available_cpu(cpu) { + if (chip_id >= 0 && cpu->chip_id != chip_id) + continue; + if (cpu == me || cpu->job_has_no_return) + continue; + if (!best || cpu->job_count < best_count) { + best = cpu; + best_count = cpu->job_count; + } + if (cpu->job_count) + continue; + lock(&cpu->job_lock); + if (!cpu->job_count) + return cpu; + unlock(&cpu->job_lock); + } + + /* We haven't found anybody, do we have a bestie ? */ + if (best) { + lock(&best->job_lock); + return best; + } + + /* Go away */ + return NULL; +} + +/* job_lock is held, returns with it released */ +static void queue_job_on_cpu(struct cpu_thread *cpu, struct cpu_job *job) +{ + /* That's bad, the job will never run */ + if (cpu->job_has_no_return) { + prlog(PR_WARNING, "WARNING ! Job %s scheduled on CPU 0x%x" + " which has a no-return job on its queue !\n", + job->name, cpu->pir); + backtrace(); + } + list_add_tail(&cpu->job_queue, &job->link); + if (job->no_return) + cpu->job_has_no_return = true; + else + cpu->job_count++; + if (pm_enabled) + cpu_wake(cpu); + unlock(&cpu->job_lock); +} + +struct cpu_job *__cpu_queue_job(struct cpu_thread *cpu, + const char *name, + void (*func)(void *data), void *data, + bool no_return) +{ + struct cpu_job *job; + +#ifdef DEBUG_SERIALIZE_CPU_JOBS + if (cpu == NULL) + cpu = this_cpu(); +#endif + + if (cpu && !cpu_is_available(cpu)) { + prerror("CPU: Tried to queue job on unavailable CPU 0x%04x\n", + cpu->pir); + return NULL; + } + + job = zalloc(sizeof(struct cpu_job)); + if (!job) + return NULL; + job->func = func; + job->data = data; + job->name = name; + job->complete = false; + job->no_return = no_return; + + /* Pick a candidate. Returns with target queue locked */ + if (cpu == NULL) + cpu = cpu_find_job_target(-1); + else if (cpu != this_cpu()) + lock(&cpu->job_lock); + else + cpu = NULL; + + /* Can't be scheduled, run it now */ + if (cpu == NULL) { + if (!this_cpu()->job_has_no_return) + this_cpu()->job_has_no_return = no_return; + func(data); + job->complete = true; + return job; + } + + queue_job_on_cpu(cpu, job); + + return job; +} + +struct cpu_job *cpu_queue_job_on_node(uint32_t chip_id, + const char *name, + void (*func)(void *data), void *data) +{ + struct cpu_thread *cpu; + struct cpu_job *job; + + job = zalloc(sizeof(struct cpu_job)); + if (!job) + return NULL; + job->func = func; + job->data = data; + job->name = name; + job->complete = false; + job->no_return = false; + + /* Pick a candidate. Returns with target queue locked */ + cpu = cpu_find_job_target(chip_id); + + /* Can't be scheduled... */ + if (cpu == NULL) { + cpu = this_cpu(); + if (cpu->chip_id == chip_id) { + /* Run it now if we're the right node. */ + func(data); + job->complete = true; + return job; + } + /* Otherwise fail. */ + free(job); + return NULL; + } + + queue_job_on_cpu(cpu, job); + + return job; +} + +bool cpu_poll_job(struct cpu_job *job) +{ + lwsync(); + return job->complete; +} + +void cpu_wait_job(struct cpu_job *job, bool free_it) +{ + unsigned long time_waited = 0; + + if (!job) + return; + + while (!job->complete) { + /* This will call OPAL pollers for us */ + time_wait_ms(10); + time_waited += 10; + lwsync(); + if ((time_waited % 30000) == 0) { + prlog(PR_INFO, "cpu_wait_job(%s) for %lums\n", + job->name, time_waited); + backtrace(); + } + } + lwsync(); + + if (time_waited > 1000) + prlog(PR_DEBUG, "cpu_wait_job(%s) for %lums\n", + job->name, time_waited); + + if (free_it) + free(job); +} + +bool cpu_check_jobs(struct cpu_thread *cpu) +{ + return !list_empty_nocheck(&cpu->job_queue); +} + +void cpu_process_jobs(void) +{ + struct cpu_thread *cpu = this_cpu(); + struct cpu_job *job = NULL; + void (*func)(void *); + void *data; + + sync(); + if (!cpu_check_jobs(cpu)) + return; + + lock(&cpu->job_lock); + while (true) { + bool no_return; + + job = list_pop(&cpu->job_queue, struct cpu_job, link); + if (!job) + break; + + func = job->func; + data = job->data; + no_return = job->no_return; + unlock(&cpu->job_lock); + prlog(PR_TRACE, "running job %s on %x\n", job->name, cpu->pir); + if (no_return) + free(job); + func(data); + if (!list_empty(&cpu->locks_held)) { + if (no_return) + prlog(PR_ERR, "OPAL no-return job returned with" + "locks held!\n"); + else + prlog(PR_ERR, "OPAL job %s returning with locks held\n", + job->name); + drop_my_locks(true); + } + lock(&cpu->job_lock); + if (!no_return) { + cpu->job_count--; + lwsync(); + job->complete = true; + } + } + unlock(&cpu->job_lock); +} + +enum cpu_wake_cause { + cpu_wake_on_job, + cpu_wake_on_dec, +}; + +static unsigned int cpu_idle_p8(enum cpu_wake_cause wake_on) +{ + uint64_t lpcr = mfspr(SPR_LPCR) & ~SPR_LPCR_P8_PECE; + struct cpu_thread *cpu = this_cpu(); + unsigned int vec = 0; + + if (!pm_enabled) { + prlog_once(PR_DEBUG, "cpu_idle_p8 called pm disabled\n"); + return vec; + } + + /* Clean up ICP, be ready for IPIs */ + icp_prep_for_pm(); + + /* Synchronize with wakers */ + if (wake_on == cpu_wake_on_job) { + /* Mark ourselves in idle so other CPUs know to send an IPI */ + cpu->in_idle = true; + sync(); + + /* Check for jobs again */ + if (cpu_check_jobs(cpu) || !pm_enabled) + goto skip_sleep; + + /* Setup wakup cause in LPCR: EE (for IPI) */ + lpcr |= SPR_LPCR_P8_PECE2; + mtspr(SPR_LPCR, lpcr); + + } else { + /* Mark outselves sleeping so cpu_set_pm_enable knows to + * send an IPI + */ + cpu->in_sleep = true; + sync(); + + /* Check if PM got disabled */ + if (!pm_enabled) + goto skip_sleep; + + /* EE and DEC */ + lpcr |= SPR_LPCR_P8_PECE2 | SPR_LPCR_P8_PECE3; + mtspr(SPR_LPCR, lpcr); + } + isync(); + + /* Enter nap */ + vec = enter_p8_pm_state(false); + +skip_sleep: + /* Restore */ + sync(); + cpu->in_idle = false; + cpu->in_sleep = false; + reset_cpu_icp(); + + return vec; +} + +static unsigned int cpu_idle_p9(enum cpu_wake_cause wake_on) +{ + uint64_t lpcr = mfspr(SPR_LPCR) & ~SPR_LPCR_P9_PECE; + uint64_t psscr; + struct cpu_thread *cpu = this_cpu(); + unsigned int vec = 0; + + if (!pm_enabled) { + prlog(PR_DEBUG, "cpu_idle_p9 called on cpu 0x%04x with pm disabled\n", cpu->pir); + return vec; + } + + /* Synchronize with wakers */ + if (wake_on == cpu_wake_on_job) { + /* Mark ourselves in idle so other CPUs know to send an IPI */ + cpu->in_idle = true; + sync(); + + /* Check for jobs again */ + if (cpu_check_jobs(cpu) || !pm_enabled) + goto skip_sleep; + + /* HV DBELL for IPI */ + lpcr |= SPR_LPCR_P9_PECEL1; + } else { + /* Mark outselves sleeping so cpu_set_pm_enable knows to + * send an IPI + */ + cpu->in_sleep = true; + sync(); + + /* Check if PM got disabled */ + if (!pm_enabled) + goto skip_sleep; + + /* HV DBELL and DEC */ + lpcr |= SPR_LPCR_P9_PECEL1 | SPR_LPCR_P9_PECEL3; + } + + mtspr(SPR_LPCR, lpcr); + isync(); + + if (sreset_enabled) { + /* stop with EC=1 (sreset) and ESL=1 (enable thread switch). */ + /* PSSCR SD=0 ESL=1 EC=1 PSSL=0 TR=3 MTL=0 RL=1 */ + psscr = PPC_BIT(42) | PPC_BIT(43) | + PPC_BITMASK(54, 55) | PPC_BIT(63); + vec = enter_p9_pm_state(psscr); + } else { + /* stop with EC=0 (resumes) which does not require sreset. */ + /* PSSCR SD=0 ESL=0 EC=0 PSSL=0 TR=3 MTL=0 RL=1 */ + psscr = PPC_BITMASK(54, 55) | PPC_BIT(63); + enter_p9_pm_lite_state(psscr); + } + + /* Clear doorbell */ + p9_dbell_receive(); + + skip_sleep: + /* Restore */ + sync(); + cpu->in_idle = false; + cpu->in_sleep = false; + + return vec; +} + +static void cpu_idle_pm(enum cpu_wake_cause wake_on) +{ + unsigned int vec; + + switch(proc_gen) { + case proc_gen_p8: + vec = cpu_idle_p8(wake_on); + break; + case proc_gen_p9: + vec = cpu_idle_p9(wake_on); + break; + case proc_gen_p10: + vec = cpu_idle_p9(wake_on); + break; + default: + vec = 0; + prlog_once(PR_DEBUG, "cpu_idle_pm called with bad processor type\n"); + break; + } + + if (vec == 0x100) { + unsigned long srr1 = mfspr(SPR_SRR1); + + switch (srr1 & SPR_SRR1_PM_WAKE_MASK) { + case SPR_SRR1_PM_WAKE_SRESET: + exception_entry_pm_sreset(); + break; + default: + break; + } + mtmsrd(MSR_RI, 1); + + } else if (vec == 0x200) { + exception_entry_pm_mce(); + enable_machine_check(); + mtmsrd(MSR_RI, 1); + } +} + +void cpu_idle_job(void) +{ + if (pm_enabled) { + cpu_idle_pm(cpu_wake_on_job); + } else { + struct cpu_thread *cpu = this_cpu(); + + smt_lowest(); + /* Check for jobs again */ + while (!cpu_check_jobs(cpu)) { + if (pm_enabled) + break; + cpu_relax(); + barrier(); + } + smt_medium(); + } +} + +void cpu_idle_delay(unsigned long delay) +{ + unsigned long now = mftb(); + unsigned long end = now + delay; + unsigned long min_pm = usecs_to_tb(10); + + if (pm_enabled && delay > min_pm) { +pm: + for (;;) { + if (delay >= 0x7fffffff) + delay = 0x7fffffff; + mtspr(SPR_DEC, delay); + + cpu_idle_pm(cpu_wake_on_dec); + + now = mftb(); + if (tb_compare(now, end) == TB_AAFTERB) + break; + delay = end - now; + if (!(pm_enabled && delay > min_pm)) + goto no_pm; + } + } else { +no_pm: + smt_lowest(); + for (;;) { + now = mftb(); + if (tb_compare(now, end) == TB_AAFTERB) + break; + delay = end - now; + if (pm_enabled && delay > min_pm) { + smt_medium(); + goto pm; + } + } + smt_medium(); + } +} + +static void cpu_pm_disable(void) +{ + struct cpu_thread *cpu; + unsigned int timeout; + + pm_enabled = false; + sync(); + + if (proc_gen == proc_gen_p8) { + for_each_available_cpu(cpu) { + while (cpu->in_sleep || cpu->in_idle) { + icp_kick_cpu(cpu); + cpu_relax(); + } + } + } else if (proc_gen == proc_gen_p9 || proc_gen == proc_gen_p10) { + for_each_available_cpu(cpu) { + if (cpu->in_sleep || cpu->in_idle) + p9_dbell_send(cpu->pir); + } + + /* This code is racy with cpus entering idle, late ones miss the dbell */ + + smt_lowest(); + for_each_available_cpu(cpu) { + timeout = 0x08000000; + while ((cpu->in_sleep || cpu->in_idle) && --timeout) + barrier(); + if (!timeout) { + prlog(PR_DEBUG, "cpu_pm_disable TIMEOUT on cpu 0x%04x to exit idle\n", + cpu->pir); + p9_dbell_send(cpu->pir); + } + } + smt_medium(); + } +} + +void cpu_set_sreset_enable(bool enabled) +{ + if (sreset_enabled == enabled) + return; + + if (proc_gen == proc_gen_p8) { + /* Public P8 Mambo has broken NAP */ + if (chip_quirk(QUIRK_MAMBO_CALLOUTS)) + return; + + sreset_enabled = enabled; + sync(); + + if (!enabled) { + cpu_pm_disable(); + } else { + if (ipi_enabled) + pm_enabled = true; + } + + } else if (proc_gen == proc_gen_p9 || proc_gen == proc_gen_p10) { + sreset_enabled = enabled; + sync(); + /* + * Kick everybody out of PM so they can adjust the PM + * mode they are using (EC=0/1). + */ + cpu_pm_disable(); + if (ipi_enabled) + pm_enabled = true; + } +} + +void cpu_set_ipi_enable(bool enabled) +{ + if (ipi_enabled == enabled) + return; + + if (proc_gen == proc_gen_p8) { + ipi_enabled = enabled; + sync(); + if (!enabled) { + cpu_pm_disable(); + } else { + if (sreset_enabled) + pm_enabled = true; + } + + } else if (proc_gen == proc_gen_p9 || proc_gen == proc_gen_p10) { + ipi_enabled = enabled; + sync(); + if (!enabled) + cpu_pm_disable(); + else + pm_enabled = true; + } +} + +void cpu_process_local_jobs(void) +{ + struct cpu_thread *cpu = first_available_cpu(); + + while (cpu) { + if (cpu != this_cpu()) + return; + + cpu = next_available_cpu(cpu); + } + + if (!cpu) + cpu = first_available_cpu(); + + /* No CPU to run on, just run synchro */ + if (cpu == this_cpu()) { + prlog_once(PR_DEBUG, "Processing jobs synchronously\n"); + cpu_process_jobs(); + opal_run_pollers(); + } +} + + +struct dt_node *get_cpu_node(u32 pir) +{ + struct cpu_thread *t = find_cpu_by_pir(pir); + + return t ? t->node : NULL; +} + +/* This only covers primary, active cpus */ +struct cpu_thread *find_cpu_by_chip_id(u32 chip_id) +{ + struct cpu_thread *t; + + for_each_available_cpu(t) { + if (t->is_secondary) + continue; + if (t->chip_id == chip_id) + return t; + } + return NULL; +} + +struct cpu_thread *find_cpu_by_node(struct dt_node *cpu) +{ + struct cpu_thread *t; + + for_each_available_cpu(t) { + if (t->node == cpu) + return t; + } + return NULL; +} + +struct cpu_thread *find_cpu_by_pir(u32 pir) +{ + if (pir > cpu_max_pir) + return NULL; + return &cpu_stacks[pir].cpu; +} + +struct cpu_thread __nomcount *find_cpu_by_pir_nomcount(u32 pir) +{ + if (pir > cpu_max_pir) + return NULL; + return &cpu_stacks[pir].cpu; +} + +struct cpu_thread *find_cpu_by_server(u32 server_no) +{ + struct cpu_thread *t; + + for_each_cpu(t) { + if (t->server_no == server_no) + return t; + } + return NULL; +} + +struct cpu_thread *next_cpu(struct cpu_thread *cpu) +{ + struct cpu_stack *s; + unsigned int index = 0; + + if (cpu != NULL) { + s = container_of(cpu, struct cpu_stack, cpu); + index = s - cpu_stacks + 1; + } + for (; index <= cpu_max_pir; index++) { + cpu = &cpu_stacks[index].cpu; + if (cpu->state != cpu_state_no_cpu) + return cpu; + } + return NULL; +} + +struct cpu_thread *first_cpu(void) +{ + return next_cpu(NULL); +} + +struct cpu_thread *next_available_cpu(struct cpu_thread *cpu) +{ + do { + cpu = next_cpu(cpu); + } while(cpu && !cpu_is_available(cpu)); + + return cpu; +} + +struct cpu_thread *first_available_cpu(void) +{ + return next_available_cpu(NULL); +} + +struct cpu_thread *next_present_cpu(struct cpu_thread *cpu) +{ + do { + cpu = next_cpu(cpu); + } while(cpu && !cpu_is_present(cpu)); + + return cpu; +} + +struct cpu_thread *first_present_cpu(void) +{ + return next_present_cpu(NULL); +} + +struct cpu_thread *next_ungarded_cpu(struct cpu_thread *cpu) +{ + do { + cpu = next_cpu(cpu); + } while(cpu && cpu->state == cpu_state_unavailable); + + return cpu; +} + +struct cpu_thread *first_ungarded_cpu(void) +{ + return next_ungarded_cpu(NULL); +} + +struct cpu_thread *next_ungarded_primary(struct cpu_thread *cpu) +{ + do { + cpu = next_ungarded_cpu(cpu); + } while (cpu && !(cpu == cpu->primary || cpu == cpu->ec_primary)); + + return cpu; +} + +struct cpu_thread *first_ungarded_primary(void) +{ + return next_ungarded_primary(NULL); +} + +u8 get_available_nr_cores_in_chip(u32 chip_id) +{ + struct cpu_thread *core; + u8 nr_cores = 0; + + for_each_available_core_in_chip(core, chip_id) + nr_cores++; + + return nr_cores; +} + +struct cpu_thread *next_available_core_in_chip(struct cpu_thread *core, + u32 chip_id) +{ + do { + core = next_cpu(core); + } while(core && (!cpu_is_available(core) || + core->chip_id != chip_id || + core->is_secondary)); + return core; +} + +struct cpu_thread *first_available_core_in_chip(u32 chip_id) +{ + return next_available_core_in_chip(NULL, chip_id); +} + +uint32_t cpu_get_core_index(struct cpu_thread *cpu) +{ + return pir_to_fused_core_id(cpu->pir); +} + +void cpu_remove_node(const struct cpu_thread *t) +{ + struct dt_node *i; + + /* Find this cpu node */ + dt_for_each_node(dt_root, i) { + const struct dt_property *p; + + if (!dt_has_node_property(i, "device_type", "cpu")) + continue; + p = dt_find_property(i, "ibm,pir"); + if (!p) + continue; + if (dt_property_get_cell(p, 0) == t->pir) { + dt_free(i); + return; + } + } + prerror("CPU: Could not find cpu node %i to remove!\n", t->pir); + abort(); +} + +void cpu_disable_all_threads(struct cpu_thread *cpu) +{ + unsigned int i; + struct dt_property *p; + + for (i = 0; i <= cpu_max_pir; i++) { + struct cpu_thread *t = &cpu_stacks[i].cpu; + + if (t->primary == cpu->primary) + t->state = cpu_state_disabled; + + } + + /* Mark this core as bad so that Linux kernel don't use this CPU. */ + prlog(PR_DEBUG, "CPU: Mark CPU bad (PIR 0x%04x)...\n", cpu->pir); + p = __dt_find_property(cpu->node, "status"); + if (p) + dt_del_property(cpu->node, p); + + dt_add_property_string(cpu->node, "status", "bad"); + + /* XXX Do something to actually stop the core */ +} + +static void init_cpu_thread(struct cpu_thread *t, + enum cpu_thread_state state, + unsigned int pir) +{ + /* offset within cpu_thread to prevent stack_guard clobber */ + const size_t guard_skip = container_off_var(t, stack_guard) + + sizeof(t->stack_guard); + + memset(((void *)t) + guard_skip, 0, sizeof(struct cpu_thread) - guard_skip); + init_lock(&t->dctl_lock); + init_lock(&t->job_lock); + list_head_init(&t->job_queue); + list_head_init(&t->locks_held); + t->stack_guard = STACK_CHECK_GUARD_BASE ^ pir; + t->state = state; + t->pir = pir; +#ifdef STACK_CHECK_ENABLED + t->stack_bot_mark = LONG_MAX; +#endif + t->is_fused_core = is_fused_core(mfspr(SPR_PVR)); + assert(pir == container_of(t, struct cpu_stack, cpu) - cpu_stacks); +} + +static void enable_attn(void) +{ + unsigned long hid0; + + hid0 = mfspr(SPR_HID0); + hid0 |= hid0_attn; + set_hid0(hid0); +} + +static void disable_attn(void) +{ + unsigned long hid0; + + hid0 = mfspr(SPR_HID0); + hid0 &= ~hid0_attn; + set_hid0(hid0); +} + +extern void __trigger_attn(void); +void trigger_attn(void) +{ + enable_attn(); + __trigger_attn(); +} + +static void init_hid(void) +{ + /* attn is enabled even when HV=0, so make sure it's off */ + disable_attn(); +} + +void __nomcount pre_init_boot_cpu(void) +{ + struct cpu_thread *cpu = this_cpu(); + + /* We skip the stack guard ! */ + memset(((void *)cpu) + 8, 0, sizeof(struct cpu_thread) - 8); +} + +void init_boot_cpu(void) +{ + unsigned int pir, pvr; + + pir = mfspr(SPR_PIR); + pvr = mfspr(SPR_PVR); + + /* Get CPU family and other flags based on PVR */ + switch(PVR_TYPE(pvr)) { + case PVR_TYPE_P8E: + case PVR_TYPE_P8: + proc_gen = proc_gen_p8; + hile_supported = PVR_VERS_MAJ(mfspr(SPR_PVR)) >= 2; + hid0_hile = SPR_HID0_POWER8_HILE; + hid0_attn = SPR_HID0_POWER8_ENABLE_ATTN; + break; + case PVR_TYPE_P8NVL: + proc_gen = proc_gen_p8; + hile_supported = true; + hid0_hile = SPR_HID0_POWER8_HILE; + hid0_attn = SPR_HID0_POWER8_ENABLE_ATTN; + break; + case PVR_TYPE_P9: + case PVR_TYPE_P9P: + proc_gen = proc_gen_p9; + hile_supported = true; + radix_supported = true; + hid0_hile = SPR_HID0_POWER9_HILE; + hid0_attn = SPR_HID0_POWER9_ENABLE_ATTN; + break; + case PVR_TYPE_P10: + proc_gen = proc_gen_p10; + hile_supported = true; + radix_supported = true; + hid0_hile = SPR_HID0_POWER10_HILE; + hid0_attn = SPR_HID0_POWER10_ENABLE_ATTN; + break; + default: + proc_gen = proc_gen_unknown; + } + + /* Get a CPU thread count based on family */ + switch(proc_gen) { + case proc_gen_p8: + cpu_thread_count = 8; + prlog(PR_INFO, "CPU: P8 generation processor" + " (max %d threads/core)\n", cpu_thread_count); + break; + case proc_gen_p9: + if (is_fused_core(pvr)) + cpu_thread_count = 8; + else + cpu_thread_count = 4; + prlog(PR_INFO, "CPU: P9 generation processor" + " (max %d threads/core)\n", cpu_thread_count); + break; + case proc_gen_p10: + if (is_fused_core(pvr)) + cpu_thread_count = 8; + else + cpu_thread_count = 4; + prlog(PR_INFO, "CPU: P10 generation processor" + " (max %d threads/core)\n", cpu_thread_count); + break; + default: + prerror("CPU: Unknown PVR, assuming 1 thread\n"); + cpu_thread_count = 1; + } + + if (is_power9n(pvr) && (PVR_VERS_MAJ(pvr) == 1)) { + prerror("CPU: POWER9N DD1 is not supported\n"); + abort(); + } + + prlog(PR_DEBUG, "CPU: Boot CPU PIR is 0x%04x PVR is 0x%08x\n", + pir, pvr); + + /* + * Adjust top of RAM to include the boot CPU stack. If we have less + * RAM than this, it's not possible to boot. + */ + cpu_max_pir = pir; + top_of_ram += (cpu_max_pir + 1) * STACK_SIZE; + + /* Setup boot CPU state */ + boot_cpu = &cpu_stacks[pir].cpu; + init_cpu_thread(boot_cpu, cpu_state_active, pir); + init_boot_tracebuf(boot_cpu); + assert(this_cpu() == boot_cpu); + init_hid(); +} + +static void enable_large_dec(bool on) +{ + u64 lpcr = mfspr(SPR_LPCR); + + if (on) + lpcr |= SPR_LPCR_P9_LD; + else + lpcr &= ~SPR_LPCR_P9_LD; + + mtspr(SPR_LPCR, lpcr); + isync(); +} + +#define HIGH_BIT (1ull << 63) + +static int find_dec_bits(void) +{ + int bits = 65; /* we always decrement once */ + u64 mask = ~0ull; + + if (proc_gen < proc_gen_p9) + return 32; + + /* The ISA doesn't specify the width of the decrementer register so we + * need to discover it. When in large mode (LPCR.LD = 1) reads from the + * DEC SPR are sign extended to 64 bits and writes are truncated to the + * physical register width. We can use this behaviour to detect the + * width by starting from an all 1s value and left shifting until we + * read a value from the DEC with it's high bit cleared. + */ + + enable_large_dec(true); + + do { + bits--; + mask = mask >> 1; + mtspr(SPR_DEC, mask); + } while (mfspr(SPR_DEC) & HIGH_BIT); + + enable_large_dec(false); + + prlog(PR_DEBUG, "CPU: decrementer bits %d\n", bits); + return bits; +} + +static void init_tm_suspend_mode_property(void) +{ + struct dt_node *node; + + /* If we don't find anything, assume TM suspend is enabled */ + tm_suspend_enabled = true; + + node = dt_find_by_path(dt_root, "/ibm,opal/fw-features/tm-suspend-mode"); + if (!node) + return; + + if (dt_find_property(node, "disabled")) + tm_suspend_enabled = false; +} + +void init_cpu_max_pir(void) +{ + struct dt_node *cpus, *cpu; + + cpus = dt_find_by_path(dt_root, "/cpus"); + assert(cpus); + + /* Iterate all CPUs in the device-tree */ + dt_for_each_child(cpus, cpu) { + unsigned int pir, server_no; + + /* Skip cache nodes */ + if (strcmp(dt_prop_get(cpu, "device_type"), "cpu")) + continue; + + server_no = dt_prop_get_u32(cpu, "reg"); + + /* If PIR property is absent, assume it's the same as the + * server number + */ + pir = dt_prop_get_u32_def(cpu, "ibm,pir", server_no); + + if (cpu_max_pir < pir + cpu_thread_count - 1) + cpu_max_pir = pir + cpu_thread_count - 1; + } + + prlog(PR_DEBUG, "CPU: New max PIR set to 0x%x\n", cpu_max_pir); +} + +/* + * Set cpu->state to cpu_state_no_cpu for all secondaries, before the dt is + * parsed and they will be flipped to present as populated CPUs are found. + * + * Some configurations (e.g., with memory encryption) will not zero system + * memory at boot, so can't rely on cpu->state to be zero (== cpu_state_no_cpu). + */ +static void mark_all_secondary_cpus_absent(void) +{ + unsigned int pir; + struct cpu_thread *cpu; + + for (pir = 0; pir <= cpu_max_pir; pir++) { + cpu = &cpu_stacks[pir].cpu; + if (cpu == boot_cpu) + continue; + cpu->state = cpu_state_no_cpu; + } +} + +void init_all_cpus(void) +{ + struct dt_node *cpus, *cpu; + unsigned int pir, thread; + int dec_bits = find_dec_bits(); + + cpus = dt_find_by_path(dt_root, "/cpus"); + assert(cpus); + + init_tm_suspend_mode_property(); + + mark_all_secondary_cpus_absent(); + + /* Iterate all CPUs in the device-tree */ + dt_for_each_child(cpus, cpu) { + unsigned int server_no, chip_id, threads; + enum cpu_thread_state state; + const struct dt_property *p; + struct cpu_thread *t, *pt0, *pt1; + + /* Skip cache nodes */ + if (strcmp(dt_prop_get(cpu, "device_type"), "cpu")) + continue; + + server_no = dt_prop_get_u32(cpu, "reg"); + + /* If PIR property is absent, assume it's the same as the + * server number + */ + pir = dt_prop_get_u32_def(cpu, "ibm,pir", server_no); + + /* We should always have an ibm,chip-id property */ + chip_id = dt_get_chip_id(cpu); + + /* Only use operational CPUs */ + if (!strcmp(dt_prop_get(cpu, "status"), "okay")) { + state = cpu_state_present; + get_chip(chip_id)->ex_present = true; + } else { + state = cpu_state_unavailable; + } + + prlog(PR_INFO, "CPU: CPU from DT PIR=0x%04x Server#=0x%x" + " State=%d\n", pir, server_no, state); + + /* Check max PIR */ + if (cpu_max_pir < (pir + cpu_thread_count - 1)) { + prlog(PR_WARNING, "CPU: CPU potentially out of range" + "PIR=0x%04x MAX=0x%04x !\n", + pir, cpu_max_pir); + continue; + } + + /* Setup thread 0 */ + assert(pir <= cpu_max_pir); + t = pt0 = &cpu_stacks[pir].cpu; + if (t != boot_cpu) { + init_cpu_thread(t, state, pir); + /* Each cpu gets its own later in init_trace_buffers */ + t->trace = boot_cpu->trace; + } + if (t->is_fused_core) + pt1 = &cpu_stacks[pir + 1].cpu; + else + pt1 = pt0; + t->server_no = server_no; + t->primary = t->ec_primary = t; + t->node = cpu; + t->chip_id = chip_id; + t->icp_regs = NULL; /* Will be set later */ +#ifdef DEBUG_LOCKS + t->requested_lock = NULL; +#endif + t->core_hmi_state = 0; + t->core_hmi_state_ptr = &t->core_hmi_state; + + /* Add associativity properties */ + add_core_associativity(t); + + /* Add the decrementer width property */ + dt_add_property_cells(cpu, "ibm,dec-bits", dec_bits); + + if (t->is_fused_core) + dt_add_property(t->node, "ibm,fused-core", NULL, 0); + + /* Iterate threads */ + p = dt_find_property(cpu, "ibm,ppc-interrupt-server#s"); + if (!p) + continue; + threads = p->len / 4; + if (threads > cpu_thread_count) { + prlog(PR_WARNING, "CPU: Threads out of range for PIR 0x%04x" + " threads=%d max=%d\n", + pir, threads, cpu_thread_count); + threads = cpu_thread_count; + } + for (thread = 1; thread < threads; thread++) { + prlog(PR_TRACE, "CPU: secondary thread %d found\n", + thread); + t = &cpu_stacks[pir + thread].cpu; + init_cpu_thread(t, state, pir + thread); + t->trace = boot_cpu->trace; + t->server_no = dt_property_get_cell(p, thread); + t->is_secondary = true; + t->is_fused_core = pt0->is_fused_core; + t->primary = pt0; + t->ec_primary = (thread & 1) ? pt1 : pt0; + t->node = cpu; + t->chip_id = chip_id; + t->core_hmi_state_ptr = &pt0->core_hmi_state; + } + prlog(PR_INFO, "CPU: %d secondary threads\n", thread); + } +} + +void cpu_bringup(void) +{ + struct cpu_thread *t; + uint32_t count = 0; + + prlog(PR_INFO, "CPU: Setting up secondary CPU state\n"); + + op_display(OP_LOG, OP_MOD_CPU, 0x0000); + + /* Tell everybody to chime in ! */ + prlog(PR_INFO, "CPU: Calling in all processors...\n"); + cpu_secondary_start = 1; + sync(); + + op_display(OP_LOG, OP_MOD_CPU, 0x0002); + + for_each_cpu(t) { + if (t->state != cpu_state_present && + t->state != cpu_state_active) + continue; + + /* Add a callin timeout ? If so, call cpu_remove_node(t). */ + while (t->state != cpu_state_active) { + smt_lowest(); + sync(); + } + smt_medium(); + count++; + } + + prlog(PR_NOTICE, "CPU: All %d processors called in...\n", count); + + op_display(OP_LOG, OP_MOD_CPU, 0x0003); +} + +void cpu_callin(struct cpu_thread *cpu) +{ + sync(); + cpu->state = cpu_state_active; + sync(); + + cpu->job_has_no_return = false; + if (cpu_is_thread0(cpu)) + init_hid(); +} + +static void opal_start_thread_job(void *data) +{ + cpu_give_self_os(); + + /* We do not return, so let's mark the job as + * complete + */ + start_kernel_secondary((uint64_t)data); +} + +static int64_t opal_start_cpu_thread(uint64_t server_no, uint64_t start_address) +{ + struct cpu_thread *cpu; + struct cpu_job *job; + + if (!opal_addr_valid((void *)start_address)) + return OPAL_PARAMETER; + + cpu = find_cpu_by_server(server_no); + if (!cpu) { + prerror("OPAL: Start invalid CPU 0x%04llx !\n", server_no); + return OPAL_PARAMETER; + } + prlog(PR_DEBUG, "OPAL: Start CPU 0x%04llx (PIR 0x%04x) -> 0x%016llx\n", + server_no, cpu->pir, start_address); + + lock(&reinit_lock); + if (!cpu_is_available(cpu)) { + unlock(&reinit_lock); + prerror("OPAL: CPU not active in OPAL !\n"); + return OPAL_WRONG_STATE; + } + if (cpu->in_reinit) { + unlock(&reinit_lock); + prerror("OPAL: CPU being reinitialized !\n"); + return OPAL_WRONG_STATE; + } + job = __cpu_queue_job(cpu, "start_thread", + opal_start_thread_job, (void *)start_address, + true); + unlock(&reinit_lock); + if (!job) { + prerror("OPAL: Failed to create CPU start job !\n"); + return OPAL_INTERNAL_ERROR; + } + return OPAL_SUCCESS; +} +opal_call(OPAL_START_CPU, opal_start_cpu_thread, 2); + +static int64_t opal_query_cpu_status(uint64_t server_no, uint8_t *thread_status) +{ + struct cpu_thread *cpu; + + if (!opal_addr_valid(thread_status)) + return OPAL_PARAMETER; + + cpu = find_cpu_by_server(server_no); + if (!cpu) { + prerror("OPAL: Query invalid CPU 0x%04llx !\n", server_no); + return OPAL_PARAMETER; + } + if (!cpu_is_available(cpu) && cpu->state != cpu_state_os) { + prerror("OPAL: CPU not active in OPAL nor OS !\n"); + return OPAL_PARAMETER; + } + switch(cpu->state) { + case cpu_state_os: + *thread_status = OPAL_THREAD_STARTED; + break; + case cpu_state_active: + /* Active in skiboot -> inactive in OS */ + *thread_status = OPAL_THREAD_INACTIVE; + break; + default: + *thread_status = OPAL_THREAD_UNAVAILABLE; + } + + return OPAL_SUCCESS; +} +opal_call(OPAL_QUERY_CPU_STATUS, opal_query_cpu_status, 2); + +static int64_t opal_return_cpu(void) +{ + prlog(PR_DEBUG, "OPAL: Returning CPU 0x%04x\n", this_cpu()->pir); + + this_cpu()->in_opal_call--; + if (this_cpu()->in_opal_call != 0) { + printf("OPAL in_opal_call=%u\n", this_cpu()->in_opal_call); + } + + __secondary_cpu_entry(); + + return OPAL_HARDWARE; /* Should not happen */ +} +opal_call(OPAL_RETURN_CPU, opal_return_cpu, 0); + +struct hid0_change_req { + uint64_t clr_bits; + uint64_t set_bits; +}; + +static void cpu_change_hid0(void *__req) +{ + struct hid0_change_req *req = __req; + unsigned long hid0, new_hid0; + + hid0 = new_hid0 = mfspr(SPR_HID0); + new_hid0 &= ~req->clr_bits; + new_hid0 |= req->set_bits; + prlog(PR_DEBUG, "CPU: [%08x] HID0 change 0x%016lx -> 0x%016lx\n", + this_cpu()->pir, hid0, new_hid0); + set_hid0(new_hid0); +} + +static int64_t cpu_change_all_hid0(struct hid0_change_req *req) +{ + struct cpu_thread *cpu; + struct cpu_job **jobs; + + jobs = zalloc(sizeof(struct cpu_job *) * (cpu_max_pir + 1)); + assert(jobs); + + for_each_available_cpu(cpu) { + if (!cpu_is_thread0(cpu) && !cpu_is_core_chiplet_primary(cpu)) + continue; + if (cpu == this_cpu()) + continue; + jobs[cpu->pir] = cpu_queue_job(cpu, "cpu_change_hid0", + cpu_change_hid0, req); + } + + /* this cpu */ + cpu_change_hid0(req); + + for_each_available_cpu(cpu) { + if (jobs[cpu->pir]) + cpu_wait_job(jobs[cpu->pir], true); + } + + free(jobs); + + return OPAL_SUCCESS; +} + +void cpu_set_hile_mode(bool hile) +{ + struct hid0_change_req req; + + if (hile == current_hile_mode) + return; + + if (hile) { + req.clr_bits = 0; + req.set_bits = hid0_hile; + } else { + req.clr_bits = hid0_hile; + req.set_bits = 0; + } + cpu_change_all_hid0(&req); + current_hile_mode = hile; +} + +static void cpu_cleanup_one(void *param __unused) +{ + mtspr(SPR_AMR, 0); + mtspr(SPR_IAMR, 0); + mtspr(SPR_PCR, 0); +} + +static int64_t cpu_cleanup_all(void) +{ + struct cpu_thread *cpu; + struct cpu_job **jobs; + + jobs = zalloc(sizeof(struct cpu_job *) * (cpu_max_pir + 1)); + assert(jobs); + + for_each_available_cpu(cpu) { + if (cpu == this_cpu()) + continue; + jobs[cpu->pir] = cpu_queue_job(cpu, "cpu_cleanup", + cpu_cleanup_one, NULL); + } + + /* this cpu */ + cpu_cleanup_one(NULL); + + for_each_available_cpu(cpu) { + if (jobs[cpu->pir]) + cpu_wait_job(jobs[cpu->pir], true); + } + + free(jobs); + + + return OPAL_SUCCESS; +} + +void cpu_fast_reboot_complete(void) +{ + /* Fast reboot will have set HID0:HILE to skiboot endian */ + current_hile_mode = HAVE_LITTLE_ENDIAN; + + /* and set HID0:RADIX */ + if (proc_gen == proc_gen_p9) + current_radix_mode = true; +} + +static int64_t opal_reinit_cpus(uint64_t flags) +{ + struct hid0_change_req req = { 0, 0 }; + struct cpu_thread *cpu; + int64_t rc = OPAL_SUCCESS; + int i; + + prlog(PR_DEBUG, "OPAL: CPU re-init with flags: 0x%llx\n", flags); + + if (flags & OPAL_REINIT_CPUS_HILE_LE) + prlog(PR_INFO, "OPAL: Switch to little-endian OS\n"); + else if (flags & OPAL_REINIT_CPUS_HILE_BE) + prlog(PR_INFO, "OPAL: Switch to big-endian OS\n"); + + again: + lock(&reinit_lock); + + for (cpu = first_cpu(); cpu; cpu = next_cpu(cpu)) { + if (cpu == this_cpu() || cpu->in_reinit) + continue; + if (cpu->state == cpu_state_os) { + unlock(&reinit_lock); + /* + * That might be a race with return CPU during kexec + * where we are still, wait a bit and try again + */ + for (i = 0; (i < 1000) && + (cpu->state == cpu_state_os); i++) { + time_wait_ms(1); + } + if (cpu->state == cpu_state_os) { + prerror("OPAL: CPU 0x%x not in OPAL !\n", cpu->pir); + return OPAL_WRONG_STATE; + } + goto again; + } + cpu->in_reinit = true; + } + /* + * Now we need to mark ourselves "active" or we'll be skipped + * by the various "for_each_active_..." calls done by slw_reinit() + */ + this_cpu()->state = cpu_state_active; + this_cpu()->in_reinit = true; + unlock(&reinit_lock); + + /* + * This cleans up a few things left over by Linux + * that can cause problems in cases such as radix->hash + * transitions. Ideally Linux should do it but doing it + * here works around existing broken kernels. + */ + cpu_cleanup_all(); + + /* If HILE change via HID0 is supported ... */ + if (hile_supported && + (flags & (OPAL_REINIT_CPUS_HILE_BE | + OPAL_REINIT_CPUS_HILE_LE))) { + bool hile = !!(flags & OPAL_REINIT_CPUS_HILE_LE); + + flags &= ~(OPAL_REINIT_CPUS_HILE_BE | OPAL_REINIT_CPUS_HILE_LE); + if (hile != current_hile_mode) { + if (hile) + req.set_bits |= hid0_hile; + else + req.clr_bits |= hid0_hile; + current_hile_mode = hile; + } + } + + /* If MMU mode change is supported */ + if (radix_supported && + (flags & (OPAL_REINIT_CPUS_MMU_HASH | + OPAL_REINIT_CPUS_MMU_RADIX))) { + bool radix = !!(flags & OPAL_REINIT_CPUS_MMU_RADIX); + + flags &= ~(OPAL_REINIT_CPUS_MMU_HASH | + OPAL_REINIT_CPUS_MMU_RADIX); + + if (proc_gen == proc_gen_p9 && radix != current_radix_mode) { + if (radix) + req.set_bits |= SPR_HID0_POWER9_RADIX; + else + req.clr_bits |= SPR_HID0_POWER9_RADIX; + + current_radix_mode = radix; + } + } + + /* Cleanup the TLB. We do that unconditionally, this works + * around issues where OSes fail to invalidate the PWC in Radix + * mode for example. This only works on P9 and later, but we + * also know we don't have a problem with Linux cleanups on + * P8 so this isn't a problem. If we wanted to cleanup the + * TLB on P8 as well, we'd have to use jobs to do it locally + * on each CPU. + */ + cleanup_global_tlb(); + + /* Apply HID bits changes if any */ + if (req.set_bits || req.clr_bits) + cpu_change_all_hid0(&req); + + if (flags & OPAL_REINIT_CPUS_TM_SUSPEND_DISABLED) { + flags &= ~OPAL_REINIT_CPUS_TM_SUSPEND_DISABLED; + + if (tm_suspend_enabled) + rc = OPAL_UNSUPPORTED; + else + rc = OPAL_SUCCESS; + } + + /* Handle P8 DD1 SLW reinit */ + if (flags != 0 && proc_gen == proc_gen_p8 && !hile_supported) + rc = slw_reinit(flags); + else if (flags != 0) + rc = OPAL_UNSUPPORTED; + + /* And undo the above */ + lock(&reinit_lock); + this_cpu()->state = cpu_state_os; + for (cpu = first_cpu(); cpu; cpu = next_cpu(cpu)) + cpu->in_reinit = false; + unlock(&reinit_lock); + + return rc; +} +opal_call(OPAL_REINIT_CPUS, opal_reinit_cpus, 1); + +#define NMMU_XLAT_CTL_PTCR 0xb +static int64_t nmmu_set_ptcr(uint64_t chip_id, struct dt_node *node, uint64_t ptcr) +{ + uint32_t nmmu_base_addr; + + nmmu_base_addr = dt_get_address(node, 0, NULL); + return xscom_write(chip_id, nmmu_base_addr + NMMU_XLAT_CTL_PTCR, ptcr); +} + +/* + * Setup the the Nest MMU PTCR register for all chips in the system or + * the specified chip id. + * + * The PTCR value may be overwritten so long as all users have been + * quiesced. If it is set to an invalid memory address the system will + * checkstop if anything attempts to use it. + * + * Returns OPAL_UNSUPPORTED if no nest mmu was found. + */ +static int64_t opal_nmmu_set_ptcr(uint64_t chip_id, uint64_t ptcr) +{ + struct dt_node *node; + int64_t rc = OPAL_UNSUPPORTED; + + if (chip_id == -1ULL) + dt_for_each_compatible(dt_root, node, "ibm,power9-nest-mmu") { + chip_id = dt_get_chip_id(node); + if ((rc = nmmu_set_ptcr(chip_id, node, ptcr))) + return rc; + } + else + dt_for_each_compatible_on_chip(dt_root, node, "ibm,power9-nest-mmu", chip_id) + if ((rc = nmmu_set_ptcr(chip_id, node, ptcr))) + return rc; + + return rc; +} +opal_call(OPAL_NMMU_SET_PTCR, opal_nmmu_set_ptcr, 2); + +static void _exit_uv_mode(void *data __unused) +{ + prlog(PR_DEBUG, "Exit uv mode on cpu pir 0x%04x\n", this_cpu()->pir); + /* HW has smfctrl shared between threads but on Mambo it is per-thread */ + if (chip_quirk(QUIRK_MAMBO_CALLOUTS)) + exit_uv_mode(1); + else + exit_uv_mode(cpu_is_thread0(this_cpu())); +} + +void cpu_disable_pef(void) +{ + struct cpu_thread *cpu; + struct cpu_job **jobs; + + if (!(mfmsr() & MSR_S)) { + prlog(PR_DEBUG, "UV mode off on cpu pir 0x%04x\n", this_cpu()->pir); + return; + } + + jobs = zalloc(sizeof(struct cpu_job *) * (cpu_max_pir + 1)); + assert(jobs); + + /* Exit uv mode on all secondary threads before touching + * smfctrl on thread 0 */ + for_each_available_cpu(cpu) { + if (cpu == this_cpu()) + continue; + + if (!cpu_is_thread0(cpu)) + jobs[cpu->pir] = cpu_queue_job(cpu, "exit_uv_mode", + _exit_uv_mode, NULL); + } + + for_each_available_cpu(cpu) + if (jobs[cpu->pir]) { + cpu_wait_job(jobs[cpu->pir], true); + jobs[cpu->pir] = NULL; + } + + /* Exit uv mode and disable smfctrl on primary threads */ + for_each_available_cpu(cpu) { + if (cpu == this_cpu()) + continue; + + if (cpu_is_thread0(cpu)) + jobs[cpu->pir] = cpu_queue_job(cpu, "exit_uv_mode", + _exit_uv_mode, NULL); + } + + for_each_available_cpu(cpu) + if (jobs[cpu->pir]) + cpu_wait_job(jobs[cpu->pir], true); + + free(jobs); + + _exit_uv_mode(NULL); +} diff --git a/roms/skiboot/core/cpufeatures.c b/roms/skiboot/core/cpufeatures.c new file mode 100644 index 000000000..5620b741d --- /dev/null +++ b/roms/skiboot/core/cpufeatures.c @@ -0,0 +1,1043 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * This file deals with setup of /cpus/ibm,powerpc-cpu-features dt + * + * Copyright 2017-2019 IBM Corp. + */ + +#include <skiboot.h> +#include <cpu.h> +#include <processor.h> +#include <ccan/str/str.h> +#include <device.h> + +#ifdef DEBUG +#define DBG(fmt, a...) prlog(PR_DEBUG, "CPUFT: " fmt, ##a) +#else +#define DBG(fmt, a...) +#endif + +/* Device-tree visible constants follow */ +#define ISA_V2_07B 2070 +#define ISA_V3_0B 3000 +#define ISA_V3_1 3100 + +#define USABLE_PR (1U << 0) +#define USABLE_OS (1U << 1) +#define USABLE_HV (1U << 2) + +#define HV_SUPPORT_HFSCR (1U << 0) +#define OS_SUPPORT_FSCR (1U << 0) + +/* Following are definitions for the match tables, not the DT binding itself */ +#define ISA_BASE 0 + +#define HV_NONE 0 +#define HV_CUSTOM 1 +#define HV_HFSCR 2 + +#define OS_NONE 0 +#define OS_CUSTOM 1 +#define OS_FSCR 2 + +/* CPU bitmasks for match table */ +#define CPU_P8_DD1 (1U << 0) +#define CPU_P8_DD2 (1U << 1) +#define CPU_P9_DD1 (1U << 2) +#define CPU_P9_DD2_0_1 (1U << 3) // 2.01 or 2.1 +#define CPU_P9P (1U << 4) +#define CPU_P9_DD2_2 (1U << 5) +#define CPU_P9_DD2_3 (1U << 6) +#define CPU_P10 (1U << 7) + +#define CPU_P9_DD2 (CPU_P9_DD2_0_1|CPU_P9_DD2_2|CPU_P9_DD2_3|CPU_P9P) + +#define CPU_P8 (CPU_P8_DD1|CPU_P8_DD2) +#define CPU_P9 (CPU_P9_DD1|CPU_P9_DD2|CPU_P9P) +#define CPU_ALL (CPU_P8|CPU_P9|CPU_P10) + +struct cpu_feature { + const char *name; + uint32_t cpus_supported; + uint32_t isa; + uint32_t usable_privilege; + uint32_t hv_support; + uint32_t os_support; + uint32_t hfscr_bit_nr; + uint32_t fscr_bit_nr; + uint32_t hwcap_bit_nr; + const char *dependencies_names; /* space-delimited names */ +}; + +/* + * The base (or NULL) cpu feature set is the CPU features available + * when no child nodes of the /cpus/ibm,powerpc-cpu-features node exist. The + * base feature set is POWER8 (ISAv2.07B), less features that are listed + * explicitly. + * + * XXX: currently, the feature dependencies are not necessarily captured + * exactly or completely. This is somewhat acceptable because all + * implementations must be aware of all these features. + */ +static const struct cpu_feature cpu_features_table[] = { + /* + * Big endian as in ISAv2.07B, MSR_LE=0 + */ + { "big-endian", + CPU_ALL, + ISA_BASE, USABLE_HV|USABLE_OS|USABLE_PR, + HV_CUSTOM, OS_CUSTOM, + -1, -1, -1, + NULL, }, + + /* + * Little endian as in ISAv2.07B, MSR_LE=1. + * + * When both big and little endian are defined, there is an LPCR ILE + * bit and implementation specific way to switch HILE mode, MSR_SLE, + * etc. + */ + { "little-endian", + CPU_ALL, + ISA_BASE, USABLE_HV|USABLE_OS|USABLE_PR, + HV_CUSTOM, OS_CUSTOM, + -1, -1, -1, + NULL, }, + + /* + * MSR_HV=1 mode as in ISAv2.07B (i.e., hypervisor privileged + * instructions and registers). + */ + { "hypervisor", + CPU_ALL, + ISA_BASE, USABLE_HV, + HV_CUSTOM, OS_NONE, + -1, -1, -1, + NULL, }, + + /* + * ISAv2.07B interrupt vectors, registers, and control registers + * (e.g., AIL, ILE, HV, etc LPCR bits). + * + * This does not necessarily specify all possible interrupt types. + * floating-point, for example requires some ways to handle floating + * point exceptions, but the low level details of interrupt handler + * is not a dependency there. There will always be *some* interrupt + * handler, (and some way to provide memory magagement, etc.). + */ + { "interrupt-facilities", + CPU_ALL, + ISA_BASE, USABLE_HV|USABLE_OS, + HV_CUSTOM, OS_CUSTOM, + -1, -1, -1, + NULL, }, + + { "smt", + CPU_ALL, + ISA_BASE, USABLE_HV|USABLE_OS|USABLE_PR, + HV_CUSTOM, OS_CUSTOM, + -1, -1, 14, + NULL, }, + + /* + * ISAv2.07B Program Priority Registers (PPR) + * PPR and associated control registers (e.g. RPR, PSPB), + * priority "or" instructions, etc. + */ + { "program-priority-register", + CPU_ALL, + ISA_BASE, USABLE_HV|USABLE_OS|USABLE_PR, + HV_NONE, OS_NONE, + -1, -1, -1, + NULL, }, + + /* + * ISAv2.07B Book3S Chapter 5.7.9.1. Virtual Page Class Key Protecion + * AMR, IAMR, AMOR, UAMOR, etc registers and MMU key bits. + */ + { "virtual-page-class-key-protection", + CPU_ALL, + ISA_BASE, USABLE_HV|USABLE_OS|USABLE_PR, + HV_CUSTOM, OS_CUSTOM, + -1, -1, -1, + NULL, }, + + /* + * ISAv2.07B SAO storage control attribute + */ + { "strong-access-ordering", + CPU_ALL & ~CPU_P9_DD1, + ISA_BASE, USABLE_HV|USABLE_OS|USABLE_PR, + HV_CUSTOM, OS_CUSTOM, + -1, -1, -1, + NULL, }, + + /* + * ISAv2.07B no-execute storage control attribute + */ + { "no-execute", + CPU_ALL, + ISA_BASE, USABLE_HV|USABLE_OS, + HV_CUSTOM, OS_CUSTOM, + -1, -1, -1, + NULL, }, + + /* + * Cache inhibited attribute supported on large pages. + */ + { "cache-inhibited-large-page", + CPU_ALL, + ISA_BASE, USABLE_HV|USABLE_OS, + HV_CUSTOM, OS_CUSTOM, + -1, -1, -1, + NULL, }, + + /* + * ISAv2.07B Book3S Chapter 8. Debug Facilities + * CIEA, CIABR, DEAW, MEte, trace interrupt, etc. + * Except CFAR, branch tracing. + */ + { "debug-facilities", + CPU_ALL, + ISA_BASE, USABLE_HV|USABLE_OS, + HV_CUSTOM, OS_CUSTOM, + -1, -1, -1, + NULL, }, + + /* + * DAWR1, DAWRX1 etc. + */ + { "debug-facilities-v31", + CPU_P10, + ISA_V3_1, USABLE_HV|USABLE_OS, + HV_CUSTOM, OS_CUSTOM, + -1, -1, -1, + NULL, }, + + /* + * ISAv2.07B CFAR + */ + { "come-from-address-register", + CPU_ALL, + ISA_BASE, USABLE_HV|USABLE_OS, + HV_CUSTOM, OS_CUSTOM, + -1, -1, -1, + "debug-facilities", }, + + /* + * ISAv2.07B Branch tracing (optional in ISA) + */ + { "branch-tracing", + CPU_ALL, + ISA_BASE, USABLE_HV|USABLE_OS, + HV_CUSTOM, OS_CUSTOM, + -1, -1, -1, + "debug-facilities", }, + + /* + * ISAv2.07B Floating-point Facility + */ + { "floating-point", + CPU_ALL, + ISA_BASE, USABLE_HV|USABLE_OS|USABLE_PR, + HV_CUSTOM, OS_CUSTOM, + PPC_BITLSHIFT(63), -1, 27, + NULL, }, + + /* + * ISAv2.07B Vector Facility (VMX) + */ + { "vector", + CPU_ALL, + ISA_BASE, USABLE_HV|USABLE_OS|USABLE_PR, + HV_CUSTOM, OS_CUSTOM, + PPC_BITLSHIFT(62), -1, 28, + "floating-point", }, + + /* + * ISAv2.07B Vector-scalar Facility (VSX) + */ + { "vector-scalar", + CPU_ALL, + ISA_BASE, USABLE_HV|USABLE_OS|USABLE_PR, + HV_CUSTOM, OS_CUSTOM, + -1, -1, 7, + "vector", }, + + { "vector-crypto", + CPU_ALL, + ISA_BASE, USABLE_HV|USABLE_OS|USABLE_PR, + HV_NONE, OS_NONE, + -1, -1, 57, + "vector", }, + + /* + * ISAv2.07B Quadword Load and Store instructions + * including lqarx/stdqcx. instructions. + */ + { "quadword-load-store", + CPU_ALL, + ISA_BASE, USABLE_HV|USABLE_OS|USABLE_PR, + HV_NONE, OS_NONE, + -1, -1, -1, + NULL, }, + + /* + * ISAv2.07B Binary Coded Decimal (BCD) + * BCD fixed point instructions + */ + { "decimal-integer", + CPU_ALL, + ISA_BASE, USABLE_HV|USABLE_OS|USABLE_PR, + HV_NONE, OS_NONE, + -1, -1, -1, + NULL, }, + + /* + * ISAv2.07B Decimal floating-point Facility (DFP) + */ + { "decimal-floating-point", + CPU_ALL, + ISA_BASE, USABLE_HV|USABLE_OS|USABLE_PR, + HV_NONE, OS_NONE, + -1, -1, 10, + "floating-point", }, + + /* + * ISAv2.07B + * DSCR, default data prefetch LPCR, etc + */ + { "data-stream-control-register", + CPU_ALL, + ISA_BASE, USABLE_HV|USABLE_OS|USABLE_PR, + HV_CUSTOM, OS_CUSTOM, + PPC_BITLSHIFT(61), PPC_BITLSHIFT(61), 61, + NULL, }, + + /* + * ISAv2.07B Branch History Rolling Buffer (BHRB) + */ + { "branch-history-rolling-buffer", + CPU_ALL, + ISA_BASE, USABLE_HV|USABLE_OS|USABLE_PR, + HV_CUSTOM, OS_CUSTOM, + PPC_BITLSHIFT(59), -1, -1, + NULL, }, + + /* + * ISAv2.07B Transactional Memory Facility (TM or HTM) + */ + { "transactional-memory", + CPU_P8, /* P9 support is not enabled yet */ + ISA_BASE, USABLE_HV|USABLE_OS|USABLE_PR, + HV_CUSTOM, OS_CUSTOM, + PPC_BITLSHIFT(58), -1, 62, + NULL, }, + + /* + * ISAv3.0B TM additions + * TEXASR bit 17, self-induced vs external footprint overflow + */ + { "transactional-memory-v3", + 0, + ISA_V3_0B, USABLE_HV|USABLE_OS|USABLE_PR, + HV_NONE, OS_NONE, + -1, -1, -1, + "transactional-memory", }, + + /* + * ISAv2.07B Event-Based Branch Facility (EBB) + */ + { "event-based-branch", + CPU_ALL, + ISA_BASE, USABLE_HV|USABLE_OS|USABLE_PR, + HV_CUSTOM, OS_CUSTOM, + PPC_BITLSHIFT(56), PPC_BITLSHIFT(56), 60, + NULL, }, + + /* + * ISAv2.07B Target Address Register (TAR) + */ + { "target-address-register", + CPU_ALL, + ISA_BASE, USABLE_HV|USABLE_OS|USABLE_PR, + HV_CUSTOM, OS_CUSTOM, + PPC_BITLSHIFT(55), PPC_BITLSHIFT(55), 58, + NULL, }, + + /* + * ISAv2.07B Control Register (CTRL) + */ + { "control-register", + CPU_ALL, + ISA_BASE, USABLE_HV|USABLE_OS, + HV_CUSTOM, OS_CUSTOM, + -1, -1, -1, + NULL, }, + + /* + * ISAv2.07B Book3S Chapter 11. Processor Control. + * msgsnd, msgsndp, doorbell, etc. + * + * ISAv3.0B is not compatible (different addressing, HFSCR required + * for msgsndp). + */ + { "processor-control-facility", + CPU_P8_DD2, /* P8 DD1 has no dbell */ + ISA_BASE, USABLE_HV|USABLE_OS, + HV_CUSTOM, OS_CUSTOM, + -1, -1, -1, + NULL, }, + + /* + * ISAv2.07B PURR, SPURR registers + */ + { "processor-utilization-of-resources-register", + CPU_ALL, + ISA_BASE, USABLE_HV|USABLE_OS, + HV_CUSTOM, OS_CUSTOM, + -1, -1, -1, + NULL, }, + + /* + * POWER8 initiate coprocessor store word indexed (icswx) instruction + */ + { "coprocessor-icswx", + CPU_P8, + ISA_BASE, USABLE_HV|USABLE_OS, + HV_CUSTOM, OS_CUSTOM, + -1, -1, -1, + NULL, }, + + /* + * ISAv2.07B hash based MMU and all instructions, registers, + * data structures, exceptions, etc. + */ + { "mmu-hash", + CPU_P8, + ISA_BASE, USABLE_HV|USABLE_OS, + HV_CUSTOM, OS_CUSTOM, + -1, -1, -1, + NULL, }, + + /* + * POWER8 MCE / machine check exception. + */ + { "machine-check-power8", + CPU_P8, + ISA_BASE, USABLE_HV|USABLE_OS, + HV_CUSTOM, OS_CUSTOM, + -1, -1, -1, + NULL, }, + + /* + * POWER8 PMU / performance monitor unit. + */ + { "performance-monitor-power8", + CPU_P8, + ISA_BASE, USABLE_HV|USABLE_OS, + HV_CUSTOM, OS_CUSTOM, + -1, -1, -1, + NULL, }, + + /* + * ISAv2.07B alignment interrupts set DSISR register + * + * POWER CPUs do not used this, and it's removed from ISAv3.0B. + */ + { "alignment-interrupt-dsisr", + 0, + ISA_BASE, USABLE_HV|USABLE_OS, + HV_NONE, OS_NONE, + -1, -1, -1, + NULL, }, + + /* + * ISAv2.07B / POWER8 doze, nap, sleep, winkle instructions + * XXX: is Linux we using some BookIV specific implementation details + * in nap handling? We have no POWER8 specific key here. + */ + { "idle-nap", + CPU_P8, + ISA_BASE, USABLE_HV, + HV_CUSTOM, OS_NONE, + -1, -1, -1, + NULL, }, + + /* + * ISAv2.07B wait instruction + */ + { "wait", + CPU_P8, + ISA_BASE, USABLE_HV|USABLE_OS|USABLE_PR, + HV_NONE, OS_NONE, + -1, -1, -1, + NULL, }, + + { "subcore", + CPU_P8, + ISA_BASE, USABLE_HV|USABLE_OS, + HV_CUSTOM, OS_CUSTOM, + -1, -1, -1, + "smt", }, + + /* + * ISAv3.0B radix based MMU + */ + { "mmu-radix", + CPU_P9|CPU_P10, + ISA_V3_0B, USABLE_HV|USABLE_OS, + HV_CUSTOM, OS_CUSTOM, + -1, -1, -1, + NULL, }, + + /* + * ISAv3.0B hash based MMU, new hash pte format, PCTR, etc + */ + { "mmu-hash-v3", + CPU_P9|CPU_P10, + ISA_V3_0B, USABLE_HV|USABLE_OS, + HV_CUSTOM, OS_CUSTOM, + -1, -1, -1, + NULL, }, + + /* + * ISAv3.0B wait instruction + */ + { "wait-v3", + CPU_P9|CPU_P10, + ISA_V3_0B, USABLE_HV|USABLE_OS|USABLE_PR, + HV_NONE, OS_NONE, + -1, -1, -1, + NULL, }, + + /* + * ISAv3.0B stop idle instructions and registers + * XXX: Same question as for idle-nap + */ + { "idle-stop", + CPU_P9|CPU_P10, + ISA_V3_0B, USABLE_HV|USABLE_OS, + HV_CUSTOM, OS_CUSTOM, + -1, -1, -1, + NULL, }, + + /* + * ISAv3.0B Hypervisor Virtualization Interrupt + * Also associated system registers, LPCR EE, HEIC, HVICE, + * system reset SRR1 reason, etc. + */ + { "hypervisor-virtualization-interrupt", + CPU_P9|CPU_P10, + ISA_V3_0B, USABLE_HV, + HV_CUSTOM, OS_NONE, + -1, -1, -1, + NULL, }, + + /* + * POWER9 MCE / machine check exception. + */ + { "machine-check-power9", + CPU_P9, + ISA_V3_0B, USABLE_HV|USABLE_OS, + HV_CUSTOM, OS_CUSTOM, + -1, -1, -1, + NULL, }, + + /* + * POWER10 MCE / machine check exception. + */ + { "machine-check-power10", + CPU_P10, + ISA_V3_0B, USABLE_HV|USABLE_OS, + HV_CUSTOM, OS_CUSTOM, + -1, -1, -1, + NULL, }, + + /* + * POWER9 PMU / performance monitor unit. + */ + { "performance-monitor-power9", + CPU_P9, + ISA_V3_0B, USABLE_HV|USABLE_OS, + HV_CUSTOM, OS_CUSTOM, + -1, -1, -1, + NULL, }, + + /* + * POWER10 PMU / performance monitor unit. + */ + { "performance-monitor-power10", + CPU_P10, + ISA_V3_1, USABLE_HV|USABLE_OS, + HV_CUSTOM, OS_CUSTOM, + -1, -1, -1, + NULL, }, + + /* + * ISAv3.0B scv/rfscv system call instructions and exceptions, fscr bit + * etc. + */ + { "system-call-vectored", + CPU_P9|CPU_P10, + ISA_V3_0B, USABLE_OS|USABLE_PR, + HV_NONE, OS_CUSTOM, + -1, PPC_BITLSHIFT(51), 52, + NULL, }, + + /* + * ISAv3.0B Book3S Chapter 10. Processor Control. + * global msgsnd, msgsndp, msgsync, doorbell, etc. + */ + { "processor-control-facility-v3", + CPU_P9|CPU_P10, + ISA_V3_0B, USABLE_HV|USABLE_OS, + HV_CUSTOM, OS_NONE, + PPC_BITLSHIFT(53), -1, -1, + NULL, }, + + /* + * ISAv3.0B addpcis instruction + */ + { "pc-relative-addressing", + CPU_P9|CPU_P10, + ISA_V3_0B, USABLE_HV|USABLE_OS|USABLE_PR, + HV_NONE, OS_NONE, + -1, -1, -1, + NULL, }, + + /* + * ISAv2.07B Book3S Chapter 7. Timer Facilities + * TB, VTB, DEC, HDEC, IC, etc registers and exceptions. + * Not including PURR or SPURR registers. + */ + { "timer-facilities", + CPU_ALL, + ISA_BASE, USABLE_HV|USABLE_OS, + HV_NONE, OS_NONE, + -1, -1, -1, + NULL, }, + + /* + * ISAv3.0B Book3S Chapter 7. Timer Facilities + * Large decrementer and hypervisor decrementer + */ + { "timer-facilities-v3", + CPU_P9|CPU_P10, + ISA_V3_0B, USABLE_HV|USABLE_OS, + HV_NONE, OS_NONE, + -1, -1, -1, + "timer-facilities", }, + + /* + * ISAv3.0B deliver a random number instruction (darn) + */ + { "random-number-generator", + CPU_P9|CPU_P10, + ISA_V3_0B, USABLE_HV|USABLE_OS|USABLE_PR, + HV_NONE, OS_NONE, + -1, -1, 53, + NULL, }, + + /* + * ISAv3.0B fixed point instructions and registers + * multiply-add, modulo, count trailing zeroes, cmprb, cmpeqb, + * extswsli, mfvsrld, mtvsrdd, mtvsrws, addex, CA32, OV32, + * mcrxrx, setb + */ + { "fixed-point-v3", + CPU_P9|CPU_P10, + ISA_V3_0B, USABLE_HV|USABLE_OS|USABLE_PR, + HV_NONE, OS_NONE, + -1, -1, -1, + NULL, }, + + { "decimal-integer-v3", + CPU_P9|CPU_P10, + ISA_V3_0B, USABLE_HV|USABLE_OS|USABLE_PR, + HV_NONE, OS_NONE, + -1, -1, -1, + "fixed-point-v3 decimal-integer", }, + + /* + * ISAv3.0B lightweight mffs + */ + { "floating-point-v3", + CPU_P9|CPU_P10, + ISA_V3_0B, USABLE_HV|USABLE_OS|USABLE_PR, + HV_NONE, OS_NONE, + -1, -1, -1, + "floating-point", }, + + { "decimal-floating-point-v3", + CPU_P9|CPU_P10, + ISA_V3_0B, USABLE_HV|USABLE_OS|USABLE_PR, + HV_NONE, OS_NONE, + -1, -1, -1, + "floating-point-v3 decimal-floating-point", }, + + { "vector-v3", + CPU_P9|CPU_P10, + ISA_V3_0B, USABLE_HV|USABLE_OS|USABLE_PR, + HV_NONE, OS_NONE, + -1, -1, -1, + "vector", }, + + { "vector-scalar-v3", + CPU_P9|CPU_P10, + ISA_V3_0B, USABLE_HV|USABLE_OS|USABLE_PR, + HV_NONE, OS_NONE, + -1, -1, -1, + "vector-v3 vector-scalar" }, + + { "vector-binary128", + CPU_P9|CPU_P10, + ISA_V3_0B, USABLE_HV|USABLE_OS|USABLE_PR, + HV_NONE, OS_NONE, + -1, -1, 54, + "vector-scalar-v3", }, + + { "vector-binary16", + CPU_P9|CPU_P10, + ISA_V3_0B, USABLE_HV|USABLE_OS|USABLE_PR, + HV_NONE, OS_NONE, + -1, -1, -1, + "vector-v3", }, + + /* + * ISAv3.0B external exception for EBB + */ + { "event-based-branch-v3", + CPU_P9|CPU_P10, + ISA_V3_0B, USABLE_HV|USABLE_OS|USABLE_PR, + HV_NONE, OS_NONE, + -1, -1, -1, + "event-based-branch", }, + + /* + * ISAv3.0B Atomic Memory Operations (AMO) + */ + { "atomic-memory-operations", + CPU_P9|CPU_P10, + ISA_V3_0B, USABLE_HV|USABLE_OS|USABLE_PR, + HV_NONE, OS_NONE, + -1, -1, -1, + NULL, }, + + /* + * ISAv3.0B Copy-Paste Facility + */ + { "copy-paste", + CPU_P9|CPU_P10, + ISA_V3_0B, USABLE_HV|USABLE_OS|USABLE_PR, + HV_NONE, OS_NONE, + -1, -1, -1, + NULL, }, + + /* + * ISAv3.0B GSR SPR register + * POWER9 does not implement it + */ + { "group-start-register", + 0, + ISA_V3_0B, USABLE_HV|USABLE_OS, + HV_NONE, OS_NONE, + -1, -1, -1, + NULL, }, + + /* + * Enable matrix multiply accumulate. + */ + { "matrix-multiply-accumulate", + CPU_P10, + ISA_V3_1, USABLE_PR, + HV_CUSTOM, OS_CUSTOM, + -1, -1, 49, + NULL, }, + + /* + * Enable prefix instructions. Toolchains assume this is + * enabled for when compiling for ISA 3.1. + */ + { "prefix-instructions", + CPU_P10, + ISA_V3_1, USABLE_HV|USABLE_OS|USABLE_PR, + HV_HFSCR, OS_FSCR, + 13, 13, -1, + NULL, }, + + /* + * Due to hardware bugs in POWER9, the hypervisor needs to assist + * guests. + * + * Presence of this feature indicates presence of the bug. + * + * See linux kernel commit 4bb3c7a0208f + * and linux Documentation/powerpc/transactional_memory.txt + */ + { "tm-suspend-hypervisor-assist", + CPU_P9_DD2_2|CPU_P9_DD2_3|CPU_P9P, + ISA_V3_0B, USABLE_HV, + HV_CUSTOM, OS_NONE, + -1, -1, -1, + NULL, }, + + /* + * Due to hardware bugs in POWER9, the hypervisor can hit + * CPU bugs in the operations it needs to do for + * tm-suspend-hypervisor-assist. + * + * Presence of this "feature" means processor is affected by the bug. + * + * See linux kernel commit 4bb3c7a0208f + * and linux Documentation/powerpc/transactional_memory.txt + */ + { "tm-suspend-xer-so-bug", + CPU_P9_DD2_2, + ISA_V3_0B, USABLE_HV, + HV_CUSTOM, OS_NONE, + -1, -1, -1, + NULL, }, +}; + +static void add_cpu_feature_nodeps(struct dt_node *features, + const struct cpu_feature *f) +{ + struct dt_node *feature; + + feature = dt_new(features, f->name); + assert(feature); + + dt_add_property_cells(feature, "isa", f->isa); + dt_add_property_cells(feature, "usable-privilege", f->usable_privilege); + + if (f->usable_privilege & USABLE_HV) { + if (f->hv_support != HV_NONE) { + uint32_t s = 0; + if (f->hv_support == HV_HFSCR) + s |= HV_SUPPORT_HFSCR; + + dt_add_property_cells(feature, "hv-support", s); + if (f->hfscr_bit_nr != -1) + dt_add_property_cells(feature, "hfscr-bit-nr", f->hfscr_bit_nr); + } else { + assert(f->hfscr_bit_nr == -1); + } + } + + if (f->usable_privilege & USABLE_OS) { + if (f->os_support != OS_NONE) { + uint32_t s = 0; + if (f->os_support == OS_FSCR) + s |= OS_SUPPORT_FSCR; + dt_add_property_cells(feature, "os-support", s); + if (f->fscr_bit_nr != -1) + dt_add_property_cells(feature, "fscr-bit-nr", f->fscr_bit_nr); + } else { + assert(f->fscr_bit_nr == -1); + } + } + + if (f->usable_privilege & USABLE_PR) { + if (f->hwcap_bit_nr != -1) + dt_add_property_cells(feature, "hwcap-bit-nr", f->hwcap_bit_nr); + } + + if (f->dependencies_names) + dt_add_property(feature, "dependencies", NULL, 0); +} + +static void add_cpufeatures_dependencies(struct dt_node *features) +{ + struct dt_node *feature; + + dt_for_each_node(features, feature) { + const struct cpu_feature *f = NULL; + const char *deps_names; + struct dt_property *deps; + int nr_deps; + int i; + + /* Find features with dependencies */ + + deps = __dt_find_property(feature, "dependencies"); + if (!deps) + continue; + + /* Find the matching cpu table */ + for (i = 0; i < ARRAY_SIZE(cpu_features_table); i++) { + f = &cpu_features_table[i]; + if (!strcmp(f->name, feature->name)) + break; + } + assert(f); + assert(f->dependencies_names); + + /* + * Count number of depended features and allocate space + * for phandles in the property. + */ + deps_names = f->dependencies_names; + nr_deps = strcount(deps_names, " ") + 1; + dt_resize_property(&deps, nr_deps * sizeof(u32)); + + DBG("feature %s has %d dependencies (%s)\n", f->name, nr_deps, deps_names); + /* + * For each one, find the depended feature then advance to + * next name. + */ + for (i = 0; i < nr_deps; i++) { + struct dt_node *dep; + int len; + + if (nr_deps - i == 1) + len = strlen(deps_names); + else + len = strchr(deps_names, ' ') - deps_names; + + dt_for_each_node(features, dep) { + if (!strncmp(deps_names, dep->name, len)) + goto found_dep; + } + + prlog(PR_ERR, "CPUFT: feature %s dependencies not found\n", f->name); + break; +found_dep: + DBG(" %s found dep (%s)\n", f->name, dep->name); + dt_property_set_cell(deps, i, dep->phandle); + + /* Advance over the name + delimiter */ + deps_names += len + 1; + } + } +} + +static void add_cpufeatures(struct dt_node *cpus, + uint32_t cpu_feature_isa, uint32_t cpu_feature_cpu, + const char *cpu_name) +{ + struct dt_node *features; + int i; + + DBG("creating cpufeatures for cpu:%d isa:%d\n", cpu_feature_cpu, cpu_feature_isa); + + features = dt_new(cpus, "ibm,powerpc-cpu-features"); + assert(features); + + dt_add_property_cells(features, "isa", cpu_feature_isa); + + dt_add_property_string(features, "device_type", "cpu-features"); + dt_add_property_string(features, "compatible", "ibm,powerpc-cpu-features"); + dt_add_property_string(features, "display-name", cpu_name); + + /* add without dependencies */ + for (i = 0; i < ARRAY_SIZE(cpu_features_table); i++) { + const struct cpu_feature *f = &cpu_features_table[i]; + + if (f->cpus_supported & cpu_feature_cpu) { + DBG(" '%s'\n", f->name); + add_cpu_feature_nodeps(features, f); + } + } + + /* dependency construction pass */ + add_cpufeatures_dependencies(features); +} + +void dt_add_cpufeatures(struct dt_node *root) +{ + int version; + uint32_t cpu_feature_isa = 0; + uint32_t cpu_feature_cpu = 0; + struct dt_node *cpus; + const char *cpu_name = NULL; + + version = mfspr(SPR_PVR); + switch(PVR_TYPE(version)) { + case PVR_TYPE_P8: + if (!cpu_name) + cpu_name = "POWER8"; + /* fallthrough */ + case PVR_TYPE_P8E: + if (!cpu_name) + cpu_name = "POWER8E"; + /* fallthrough */ + cpu_feature_isa = ISA_V2_07B; + if (PVR_VERS_MAJ(version) == 1) + cpu_feature_cpu = CPU_P8_DD1; + else + cpu_feature_cpu = CPU_P8_DD2; + break; + case PVR_TYPE_P8NVL: + cpu_name = "POWER8NVL"; + cpu_feature_isa = ISA_V2_07B; + cpu_feature_cpu = CPU_P8_DD2; + break; + case PVR_TYPE_P9: + if (!cpu_name) + cpu_name = "POWER9"; + + cpu_feature_isa = ISA_V3_0B; + if (is_power9n(version) && + (PVR_VERS_MAJ(version) == 2)) { + /* P9N DD2.x */ + switch (PVR_VERS_MIN(version)) { + case 0: + case 1: + cpu_feature_cpu = CPU_P9_DD2_0_1; + break; + case 2: + cpu_feature_cpu = CPU_P9_DD2_2; + break; + case 3: + cpu_feature_cpu = CPU_P9_DD2_3; + break; + default: + assert(0); + } + } else if (is_power9c(version) && + (PVR_VERS_MAJ(version) == 1)) { + /* P9C DD1.x */ + switch (PVR_VERS_MIN(version)) { + case 1: + /* Cumulus DD1.1 => Nimbus DD2.1 */ + cpu_feature_cpu = CPU_P9_DD2_0_1; + break; + case 2: + /* Cumulus DD1.2 */ + cpu_feature_cpu = CPU_P9_DD2_2; + break; + case 3: + /* Cumulus DD1.3 */ + cpu_feature_cpu = CPU_P9_DD2_3; + break; + default: + assert(0); + } + } else { + assert(0); + } + + break; + case PVR_TYPE_P9P: + if (!cpu_name) + cpu_name = "POWER9P"; + + cpu_feature_isa = ISA_V3_0B; + cpu_feature_cpu = CPU_P9P; + break; + case PVR_TYPE_P10: + if (!cpu_name) + cpu_name = "POWER10"; + + cpu_feature_isa = ISA_V3_1; + cpu_feature_cpu = CPU_P10; + break; + default: + return; + } + + cpus = dt_new_check(root, "cpus"); + + add_cpufeatures(cpus, cpu_feature_isa, cpu_feature_cpu, cpu_name); +} diff --git a/roms/skiboot/core/device.c b/roms/skiboot/core/device.c new file mode 100644 index 000000000..b102dd973 --- /dev/null +++ b/roms/skiboot/core/device.c @@ -0,0 +1,1128 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Manipulate the device tree + * + * Copyright 2013-2019 IBM Corp. + */ + +#include <stdarg.h> +#include <device.h> +#include <stdlib.h> +#include <skiboot.h> +#include <libfdt/libfdt.h> +#include <libfdt/libfdt_internal.h> +#include <ccan/str/str.h> +#include <ccan/endian/endian.h> +#include <inttypes.h> + +/* Used to give unique handles. */ +u32 last_phandle = 0; + +struct dt_node *dt_root; +struct dt_node *dt_chosen; + +static const char *take_name(const char *name) +{ + if (!is_rodata(name) && !(name = strdup(name))) { + prerror("Failed to allocate copy of name"); + abort(); + } + return name; +} + +static void free_name(const char *name) +{ + if (!is_rodata(name)) + free((char *)name); +} + +static struct dt_node *new_node(const char *name) +{ + struct dt_node *node = malloc(sizeof *node); + if (!node) { + prerror("Failed to allocate node\n"); + abort(); + } + + node->name = take_name(name); + node->parent = NULL; + list_head_init(&node->properties); + list_head_init(&node->children); + /* FIXME: locking? */ + node->phandle = new_phandle(); + return node; +} + +struct dt_node *dt_new_root(const char *name) +{ + return new_node(name); +} + +static const char *get_unitname(const struct dt_node *node) +{ + const char *c = strchr(node->name, '@'); + + if (!c) + return NULL; + + return c + 1; +} + +int dt_cmp_subnodes(const struct dt_node *a, const struct dt_node *b) +{ + const char *a_unit = get_unitname(a); + const char *b_unit = get_unitname(b); + + ptrdiff_t basenamelen = a_unit - a->name; + + /* sort hex unit addresses by number */ + if (a_unit && b_unit && !strncmp(a->name, b->name, basenamelen)) { + unsigned long long a_num, b_num; + char *a_end, *b_end; + + a_num = strtoul(a_unit, &a_end, 16); + b_num = strtoul(b_unit, &b_end, 16); + + /* only compare if the unit addr parsed correctly */ + if (*a_end == 0 && *b_end == 0) + return (a_num > b_num) - (a_num < b_num); + } + + return strcmp(a->name, b->name); +} + +bool dt_attach_root(struct dt_node *parent, struct dt_node *root) +{ + struct dt_node *node; + + assert(!root->parent); + + if (list_empty(&parent->children)) { + list_add(&parent->children, &root->list); + root->parent = parent; + + return true; + } + + dt_for_each_child(parent, node) { + int cmp = dt_cmp_subnodes(node, root); + + /* Look for duplicates */ + if (cmp == 0) { + prerror("DT: %s failed, duplicate %s\n", + __func__, root->name); + return false; + } + + /* insert before the first node that's larger + * the the node we're inserting */ + if (cmp > 0) + break; + } + + list_add_before(&parent->children, &root->list, &node->list); + root->parent = parent; + + return true; +} + +static inline void dt_destroy(struct dt_node *dn) +{ + if (!dn) + return; + + free_name(dn->name); + free(dn); +} + +struct dt_node *dt_new(struct dt_node *parent, const char *name) +{ + struct dt_node *new; + assert(parent); + + new = new_node(name); + if (!dt_attach_root(parent, new)) { + dt_destroy(new); + return NULL; + } + return new; +} + +/* + * low level variant, we export this because there are "weird" address + * formats, such as LPC/ISA bus addresses which have a letter to identify + * which bus space the address is inside of. + */ +struct dt_node *__dt_find_by_name_addr(struct dt_node *parent, const char *name, + const char *addr) +{ + struct dt_node *node; + + if (list_empty(&parent->children)) + return NULL; + + dt_for_each_child(parent, node) { + const char *unit = get_unitname(node); + int len; + + if (!unit) + continue; + + /* match the name */ + len = (int) (unit - node->name) - 1; + if (strncmp(node->name, name, len)) + continue; + + /* match the unit */ + if (strcmp(unit, addr) == 0) + return node; + } + + dt_for_each_child(parent, node) { + struct dt_node *ret = __dt_find_by_name_addr(node, name, addr); + + if (ret) + return ret; + } + + return NULL; +} + +struct dt_node *dt_find_by_name_addr(struct dt_node *parent, const char *name, + uint64_t addr) +{ + char addr_str[16 + 1]; /* max size of a 64bit int */ + snprintf(addr_str, sizeof(addr_str), "%" PRIx64, addr); + + return __dt_find_by_name_addr(parent, name, addr_str); +} + +struct dt_node *dt_new_addr(struct dt_node *parent, const char *name, + uint64_t addr) +{ + char *lname; + struct dt_node *new; + size_t len; + + assert(parent); + len = strlen(name) + STR_MAX_CHARS(addr) + 2; + lname = malloc(len); + if (!lname) + return NULL; + snprintf(lname, len, "%s@%llx", name, (long long)addr); + new = new_node(lname); + free(lname); + if (!dt_attach_root(parent, new)) { + dt_destroy(new); + return NULL; + } + return new; +} + +struct dt_node *dt_new_2addr(struct dt_node *parent, const char *name, + uint64_t addr0, uint64_t addr1) +{ + char *lname; + struct dt_node *new; + size_t len; + assert(parent); + + len = strlen(name) + 2*STR_MAX_CHARS(addr0) + 3; + lname = malloc(len); + if (!lname) + return NULL; + snprintf(lname, len, "%s@%llx,%llx", + name, (long long)addr0, (long long)addr1); + new = new_node(lname); + free(lname); + if (!dt_attach_root(parent, new)) { + dt_destroy(new); + return NULL; + } + return new; +} + +static struct dt_node *__dt_copy(struct dt_node *node, struct dt_node *parent, + bool root) +{ + struct dt_property *prop, *new_prop; + struct dt_node *new_node, *child; + + new_node = dt_new(parent, node->name); + if (!new_node) + return NULL; + + list_for_each(&node->properties, prop, list) { + new_prop = dt_add_property(new_node, prop->name, prop->prop, + prop->len); + if (!new_prop) + goto fail; + } + + list_for_each(&node->children, child, list) { + child = __dt_copy(child, new_node, false); + if (!child) + goto fail; + } + + return new_node; + +fail: + /* dt_free will recurse for us, so only free when we unwind to the + * top-level failure */ + if (root) + dt_free(new_node); + return NULL; +} + +struct dt_node *dt_copy(struct dt_node *node, struct dt_node *parent) +{ + return __dt_copy(node, parent, true); +} + +char *dt_get_path(const struct dt_node *node) +{ + unsigned int len = 0; + const struct dt_node *n; + char *path, *p; + + /* Dealing with NULL is for test/debug purposes */ + if (!node) + return strdup("<NULL>"); + + for (n = node; n; n = n->parent) { + len += strlen(n->name); + if (n->parent || n == node) + len++; + } + path = zalloc(len + 1); + assert(path); + p = path + len; + for (n = node; n; n = n->parent) { + len = strlen(n->name); + p -= len; + memcpy(p, n->name, len); + if (n->parent || n == node) + *(--p) = '/'; + } + assert(p == path); + + return p; +} + +static const char *__dt_path_split(const char *p, + const char **namep, unsigned int *namel, + const char **addrp, unsigned int *addrl) +{ + const char *at, *sl; + + *namel = *addrl = 0; + + /* Skip initial '/' */ + while (*p == '/') + p++; + + /* Check empty path */ + if (*p == 0) + return p; + + at = strchr(p, '@'); + sl = strchr(p, '/'); + if (sl == NULL) + sl = p + strlen(p); + if (sl < at) + at = NULL; + if (at) { + *addrp = at + 1; + *addrl = sl - at - 1; + } + *namep = p; + *namel = at ? (at - p) : (sl - p); + + return sl; +} + +struct dt_node *dt_find_by_path(struct dt_node *root, const char *path) +{ + struct dt_node *n; + const char *pn, *pa, *p = path, *nn, *na; + unsigned int pnl, pal, nnl, nal; + bool match; + + /* Walk path components */ + while (*p) { + /* Extract next path component */ + p = __dt_path_split(p, &pn, &pnl, &pa, &pal); + if (pnl == 0 && pal == 0) + break; + + /* Compare with each child node */ + match = false; + list_for_each(&root->children, n, list) { + match = true; + __dt_path_split(n->name, &nn, &nnl, &na, &nal); + if (pnl && (pnl != nnl || strncmp(pn, nn, pnl))) + match = false; + if (pal && (pal != nal || strncmp(pa, na, pal))) + match = false; + if (match) { + root = n; + break; + } + } + + /* No child match */ + if (!match) + return NULL; + } + return root; +} + +struct dt_node *dt_find_by_name(struct dt_node *root, const char *name) +{ + struct dt_node *child, *match; + + list_for_each(&root->children, child, list) { + if (!strcmp(child->name, name)) + return child; + + match = dt_find_by_name(child, name); + if (match) + return match; + } + + return NULL; +} + + +struct dt_node *dt_new_check(struct dt_node *parent, const char *name) +{ + struct dt_node *node = dt_find_by_name(parent, name); + + if (!node) { + node = dt_new(parent, name); + assert(node); + } + + return node; +} + + +struct dt_node *dt_find_by_phandle(struct dt_node *root, u32 phandle) +{ + struct dt_node *node; + + dt_for_each_node(root, node) + if (node->phandle == phandle) + return node; + return NULL; +} + +static struct dt_property *new_property(struct dt_node *node, + const char *name, size_t size) +{ + struct dt_property *p = malloc(sizeof(*p) + size); + char *path; + + if (!p) { + path = dt_get_path(node); + prerror("Failed to allocate property \"%s\" for %s of %zu bytes\n", + name, path, size); + free(path); + abort(); + } + if (dt_find_property(node, name)) { + path = dt_get_path(node); + prerror("Duplicate property \"%s\" in node %s\n", + name, path); + free(path); + abort(); + + } + + p->name = take_name(name); + p->len = size; + list_add_tail(&node->properties, &p->list); + return p; +} + +struct dt_property *dt_add_property(struct dt_node *node, + const char *name, + const void *val, size_t size) +{ + struct dt_property *p; + + /* + * Filter out phandle properties, we re-generate them + * when flattening + */ + if (strcmp(name, "linux,phandle") == 0 || + strcmp(name, "phandle") == 0) { + assert(size == 4); + node->phandle = *(const u32 *)val; + if (node->phandle >= last_phandle) + set_last_phandle(node->phandle); + return NULL; + } + + p = new_property(node, name, size); + if (size) + memcpy(p->prop, val, size); + return p; +} + +void dt_resize_property(struct dt_property **prop, size_t len) +{ + size_t new_len = sizeof(**prop) + len; + + *prop = realloc(*prop, new_len); + (*prop)->len = len; + + /* Fix up linked lists in case we moved. (note: not an empty list). */ + (*prop)->list.next->prev = &(*prop)->list; + (*prop)->list.prev->next = &(*prop)->list; +} + +struct dt_property *dt_add_property_string(struct dt_node *node, + const char *name, + const char *value) +{ + size_t len = 0; + if (value) + len = strlen(value) + 1; + return dt_add_property(node, name, value, len); +} + +struct dt_property *dt_add_property_nstr(struct dt_node *node, + const char *name, + const char *value, unsigned int vlen) +{ + struct dt_property *p; + char *tmp = zalloc(vlen + 1); + + if (!tmp) + return NULL; + + strncpy(tmp, value, vlen); + p = dt_add_property(node, name, tmp, strlen(tmp)+1); + free(tmp); + + return p; +} + +struct dt_property *__dt_add_property_cells(struct dt_node *node, + const char *name, + int count, ...) +{ + struct dt_property *p; + fdt32_t *val; + unsigned int i; + va_list args; + + p = new_property(node, name, count * sizeof(u32)); + val = (fdt32_t *)p->prop; + va_start(args, count); + for (i = 0; i < count; i++) + val[i] = cpu_to_fdt32(va_arg(args, u32)); + va_end(args); + return p; +} + +struct dt_property *__dt_add_property_u64s(struct dt_node *node, + const char *name, + int count, ...) +{ + struct dt_property *p; + fdt64_t *val; + unsigned int i; + va_list args; + + p = new_property(node, name, count * sizeof(u64)); + val = (fdt64_t *)p->prop; + va_start(args, count); + for (i = 0; i < count; i++) + val[i] = cpu_to_fdt64(va_arg(args, u64)); + va_end(args); + return p; +} + +struct dt_property *__dt_add_property_strings(struct dt_node *node, + const char *name, + int count, ...) +{ + struct dt_property *p; + unsigned int i, size; + va_list args; + const char *sstr; + char *s; + + va_start(args, count); + for (i = size = 0; i < count; i++) { + sstr = va_arg(args, const char *); + if (sstr) + size += strlen(sstr) + 1; + } + va_end(args); + if (!size) + size = 1; + p = new_property(node, name, size); + s = (char *)p->prop; + *s = 0; + va_start(args, count); + for (i = 0; i < count; i++) { + sstr = va_arg(args, const char *); + if (sstr) { + strcpy(s, sstr); + s = s + strlen(sstr) + 1; + } + } + va_end(args); + return p; +} + +void dt_del_property(struct dt_node *node, struct dt_property *prop) +{ + list_del_from(&node->properties, &prop->list); + free_name(prop->name); + free(prop); +} + +u32 dt_property_get_cell(const struct dt_property *prop, u32 index) +{ + assert(prop->len >= (index+1)*sizeof(u32)); + /* Always aligned, so this works. */ + return fdt32_to_cpu(((const fdt32_t *)prop->prop)[index]); +} + +u64 dt_property_get_u64(const struct dt_property *prop, u32 index) +{ + assert(prop->len >= (index+1)*sizeof(u64)); + /* Always aligned, so this works. */ + return fdt64_to_cpu(((const fdt64_t *)prop->prop)[index]); +} + +void dt_property_set_cell(struct dt_property *prop, u32 index, u32 val) +{ + assert(prop->len >= (index+1)*sizeof(u32)); + /* Always aligned, so this works. */ + ((fdt32_t *)prop->prop)[index] = cpu_to_fdt32(val); +} + +/* First child of this node. */ +struct dt_node *dt_first(const struct dt_node *root) +{ + return list_top(&root->children, struct dt_node, list); +} + +/* Return next node, or NULL. */ +struct dt_node *dt_next(const struct dt_node *root, + const struct dt_node *prev) +{ + if (!prev) { + struct dt_node *first = dt_first(root); + + if (!first) + return NULL; + else + return first; + } + + /* Children? */ + if (!list_empty(&prev->children)) + return dt_first(prev); + + do { + /* More siblings? */ + if (prev->list.next != &prev->parent->children.n) + return list_entry(prev->list.next, struct dt_node,list); + + /* No more siblings, move up to parent. */ + prev = prev->parent; + } while (prev != root); + + return NULL; +} + +struct dt_property *__dt_find_property(struct dt_node *node, const char *name) +{ + struct dt_property *i; + + list_for_each(&node->properties, i, list) + if (strcmp(i->name, name) == 0) + return i; + return NULL; +} + +const struct dt_property *dt_find_property(const struct dt_node *node, + const char *name) +{ + const struct dt_property *i; + + list_for_each(&node->properties, i, list) + if (strcmp(i->name, name) == 0) + return i; + return NULL; +} + +void dt_check_del_prop(struct dt_node *node, const char *name) +{ + struct dt_property *p; + + p = __dt_find_property(node, name); + if (p) + dt_del_property(node, p); +} +const struct dt_property *dt_require_property(const struct dt_node *node, + const char *name, int wanted_len) +{ + const struct dt_property *p = dt_find_property(node, name); + + if (!p) { + const char *path = dt_get_path(node); + + prerror("DT: Missing required property %s/%s\n", + path, name); + assert(false); + } + if (wanted_len >= 0 && p->len != wanted_len) { + const char *path = dt_get_path(node); + + prerror("DT: Unexpected property length %s/%s\n", + path, name); + prerror("DT: Expected len: %d got len: %zu\n", + wanted_len, p->len); + assert(false); + } + + return p; +} + +bool dt_has_node_property(const struct dt_node *node, + const char *name, const char *val) +{ + const struct dt_property *p = dt_find_property(node, name); + + if (!p) + return false; + if (!val) + return true; + + return p->len == strlen(val) + 1 && memcmp(p->prop, val, p->len) == 0; +} + +bool dt_prop_find_string(const struct dt_property *p, const char *s) +{ + const char *c, *end; + + if (!p) + return false; + c = p->prop; + end = c + p->len; + + while(c < end) { + if (!strcasecmp(s, c)) + return true; + c += strlen(c) + 1; + } + return false; +} + +bool dt_node_is_compatible(const struct dt_node *node, const char *compat) +{ + const struct dt_property *p = dt_find_property(node, "compatible"); + + return dt_prop_find_string(p, compat); +} + +struct dt_node *dt_find_compatible_node(struct dt_node *root, + struct dt_node *prev, + const char *compat) +{ + struct dt_node *node = prev; + + while ((node = dt_next(root, node))) + if (dt_node_is_compatible(node, compat)) + return node; + return NULL; +} + +u64 dt_prop_get_u64(const struct dt_node *node, const char *prop) +{ + const struct dt_property *p = dt_require_property(node, prop, 8); + + return ((u64)dt_property_get_cell(p, 0) << 32) + | dt_property_get_cell(p, 1); +} + +u64 dt_prop_get_u64_def(const struct dt_node *node, const char *prop, u64 def) +{ + const struct dt_property *p = dt_find_property(node, prop); + + if (!p) + return def; + + return ((u64)dt_property_get_cell(p, 0) << 32) + | dt_property_get_cell(p, 1); +} + +u32 dt_prop_get_u32(const struct dt_node *node, const char *prop) +{ + const struct dt_property *p = dt_require_property(node, prop, 4); + + return dt_property_get_cell(p, 0); +} + +u32 dt_prop_get_u32_def(const struct dt_node *node, const char *prop, u32 def) +{ + const struct dt_property *p = dt_find_property(node, prop); + + if (!p) + return def; + + return dt_property_get_cell(p, 0); +} + +const void *dt_prop_get(const struct dt_node *node, const char *prop) +{ + const struct dt_property *p = dt_require_property(node, prop, -1); + + return p->prop; +} + +const void *dt_prop_get_def(const struct dt_node *node, const char *prop, + void *def) +{ + const struct dt_property *p = dt_find_property(node, prop); + + return p ? p->prop : def; +} + +const void *dt_prop_get_def_size(const struct dt_node *node, const char *prop, + void *def, size_t *len) +{ + const struct dt_property *p = dt_find_property(node, prop); + *len = 0; + if (p) + *len = p->len; + + return p ? p->prop : def; +} + +u32 dt_prop_get_cell(const struct dt_node *node, const char *prop, u32 cell) +{ + const struct dt_property *p = dt_require_property(node, prop, -1); + + return dt_property_get_cell(p, cell); +} + +u32 dt_prop_get_cell_def(const struct dt_node *node, const char *prop, + u32 cell, u32 def) +{ + const struct dt_property *p = dt_find_property(node, prop); + + if (!p) + return def; + + return dt_property_get_cell(p, cell); +} + +void dt_free(struct dt_node *node) +{ + struct dt_node *child; + struct dt_property *p; + + while ((child = list_top(&node->children, struct dt_node, list))) + dt_free(child); + + while ((p = list_pop(&node->properties, struct dt_property, list))) { + free_name(p->name); + free(p); + } + + if (node->parent) + list_del_from(&node->parent->children, &node->list); + dt_destroy(node); +} + +int dt_expand_node(struct dt_node *node, const void *fdt, int fdt_node) +{ + const struct fdt_property *prop; + int offset, nextoffset, err; + struct dt_node *child; + const char *name; + uint32_t tag; + + if (((err = fdt_check_header(fdt)) != 0) + || ((err = fdt_check_node_offset_(fdt, fdt_node)) < 0)) { + prerror("FDT: Error %d parsing node 0x%x\n", err, fdt_node); + return -1; + } + + nextoffset = err; + do { + offset = nextoffset; + + tag = fdt_next_tag(fdt, offset, &nextoffset); + switch (tag) { + case FDT_PROP: + prop = fdt_offset_ptr_(fdt, offset); + name = fdt_string(fdt, fdt32_to_cpu(prop->nameoff)); + dt_add_property(node, name, prop->data, + fdt32_to_cpu(prop->len)); + break; + case FDT_BEGIN_NODE: + name = fdt_get_name(fdt, offset, NULL); + child = dt_new_root(name); + assert(child); + nextoffset = dt_expand_node(child, fdt, offset); + + /* + * This may fail in case of duplicate, keep it + * going for now, we may ultimately want to + * assert + */ + if (!dt_attach_root(node, child)) + /** + * @fwts-label DTHasDuplicateNodeID + * @fwts-advice OPAL will parse the Flattened + * Device Tree(FDT), which can be generated + * from different firmware sources. During + * expansion of FDT, OPAL observed a node + * assigned multiple times (a duplicate). This + * indicates either a Hostboot bug *OR*, more + * likely, a bug in the platform XML. Check + * the platform XML for duplicate IDs for + * this type of device. Because of this + * duplicate node, OPAL won't add the hardware + * device found with a duplicate node ID into + * DT, rendering the corresponding device not + * functional. + */ + prlog(PR_ERR, "DT: Found duplicate node: %s\n", + child->name); + break; + case FDT_END: + return -1; + } + } while (tag != FDT_END_NODE); + + return nextoffset; +} + +void dt_expand(const void *fdt) +{ + prlog(PR_DEBUG, "FDT: Parsing fdt @%p\n", fdt); + + if (dt_expand_node(dt_root, fdt, 0) < 0) + abort(); +} + +u64 dt_get_number(const void *pdata, unsigned int cells) +{ + const __be32 *p = pdata; + u64 ret = 0; + + while(cells--) + ret = (ret << 32) | be32_to_cpu(*(p++)); + return ret; +} + +u32 dt_n_address_cells(const struct dt_node *node) +{ + if (!node->parent) + return 0; + return dt_prop_get_u32_def(node->parent, "#address-cells", 2); +} + +u32 dt_n_size_cells(const struct dt_node *node) +{ + if (!node->parent) + return 0; + return dt_prop_get_u32_def(node->parent, "#size-cells", 1); +} + +u64 dt_get_address(const struct dt_node *node, unsigned int index, + u64 *out_size) +{ + const struct dt_property *p; + u32 na = dt_n_address_cells(node); + u32 ns = dt_n_size_cells(node); + u32 pos, n; + + p = dt_require_property(node, "reg", -1); + n = (na + ns) * sizeof(u32); + pos = n * index; + assert((pos + n) <= p->len); + if (out_size) + *out_size = dt_get_number(p->prop + pos + na * sizeof(u32), ns); + return dt_get_number(p->prop + pos, na); +} + +u32 __dt_get_chip_id(const struct dt_node *node) +{ + const struct dt_property *prop; + + for (; node; node = node->parent) { + prop = dt_find_property(node, "ibm,chip-id"); + if (prop) + return dt_property_get_cell(prop, 0); + } + return 0xffffffff; +} + +u32 dt_get_chip_id(const struct dt_node *node) +{ + u32 id = __dt_get_chip_id(node); + assert(id != 0xffffffff); + return id; +} + +struct dt_node *dt_find_compatible_node_on_chip(struct dt_node *root, + struct dt_node *prev, + const char *compat, + uint32_t chip_id) +{ + struct dt_node *node = prev; + + while ((node = dt_next(root, node))) { + u32 cid = __dt_get_chip_id(node); + if (cid == chip_id && + dt_node_is_compatible(node, compat)) + return node; + } + return NULL; +} + +unsigned int dt_count_addresses(const struct dt_node *node) +{ + const struct dt_property *p; + u32 na = dt_n_address_cells(node); + u32 ns = dt_n_size_cells(node); + u32 n; + + p = dt_require_property(node, "reg", -1); + n = (na + ns) * sizeof(u32); + + if (n == 0) + return 0; + + return p->len / n; +} + +/* Translates an address from the given bus into its parent's address space */ +static u64 dt_translate_one(const struct dt_node *bus, u64 addr) +{ + u32 ranges_count, na, ns, parent_na; + const struct dt_property *p; + const u32 *ranges; + int i, stride; + + assert(bus->parent); + + na = dt_prop_get_u32_def(bus, "#address-cells", 2); + ns = dt_prop_get_u32_def(bus, "#size-cells", 2); + parent_na = dt_n_address_cells(bus); + + stride = na + ns + parent_na; + + /* + * FIXME: We should handle arbitrary length addresses, rather than + * limiting it to 64bit. If someone wants/needs that they + * can implement the bignum math for it :) + */ + assert(na <= 2); + assert(parent_na <= 2); + + /* We should never be trying to translate an address without a ranges */ + p = dt_require_property(bus, "ranges", -1); + + ranges = (u32 *) &p->prop; + ranges_count = (p->len / 4) / (na + parent_na + ns); + + /* An empty ranges property implies 1-1 translation */ + if (ranges_count == 0) + return addr; + + for (i = 0; i < ranges_count; i++, ranges += stride) { + /* ranges format: <child base> <parent base> <size> */ + u64 child_base = dt_get_number(ranges, na); + u64 parent_base = dt_get_number(ranges + na, parent_na); + u64 size = dt_get_number(ranges + na + parent_na, ns); + + if (addr >= child_base && addr < child_base + size) + return (addr - child_base) + parent_base; + } + + /* input address was outside the any of our mapped ranges */ + return 0; +} + +u64 dt_translate_address(const struct dt_node *node, unsigned int index, + u64 *out_size) +{ + u64 addr = dt_get_address(node, index, NULL); + struct dt_node *bus = node->parent; + + /* FIXME: One day we will probably want to use this, but for now just + * force it it to be zero since we only support returning a u64 or u32 + */ + assert(!out_size); + + /* apply each translation until we hit the root bus */ + while (bus->parent) { + addr = dt_translate_one(bus, addr); + bus = bus->parent; + } + + return addr; +} + +bool dt_node_is_enabled(struct dt_node *node) +{ + const struct dt_property *p = dt_find_property(node, "status"); + + if (!p) + return true; + + return p->len > 1 && p->prop[0] == 'o' && p->prop[1] == 'k'; +} + +/* + * Function to fixup the phandle in the subtree. + */ +void dt_adjust_subtree_phandle(struct dt_node *dev, + const char** (get_properties_to_fix)(struct dt_node *n)) +{ + struct dt_node *node; + struct dt_property *prop; + u32 phandle, max_phandle = 0, import_phandle = new_phandle(); + __be32 p; + const char **name; + + dt_for_each_node(dev, node) { + const char **props_to_update; + node->phandle += import_phandle; + + /* + * calculate max_phandle(new_tree), needed to update + * last_phandle. + */ + if (node->phandle >= max_phandle) + max_phandle = node->phandle; + + props_to_update = get_properties_to_fix(node); + if (!props_to_update) + continue; + for (name = props_to_update; *name != NULL; name++) { + prop = __dt_find_property(node, *name); + if (!prop) + continue; + phandle = dt_prop_get_u32(node, *name); + phandle += import_phandle; + p = cpu_to_be32(phandle); + memcpy((char *)&prop->prop, &p, prop->len); + } + } + + set_last_phandle(max_phandle); +} diff --git a/roms/skiboot/core/direct-controls.c b/roms/skiboot/core/direct-controls.c new file mode 100644 index 000000000..37bcf9826 --- /dev/null +++ b/roms/skiboot/core/direct-controls.c @@ -0,0 +1,1161 @@ +// SPDX-License-Identifier: Apache-2.0 +/* + * Directly control CPU cores/threads. SRESET, special wakeup, etc + * + * Copyright 2017-2019 IBM Corp. + */ + +#include <direct-controls.h> +#include <skiboot.h> +#include <opal.h> +#include <cpu.h> +#include <xscom.h> +#include <xscom-p8-regs.h> +#include <xscom-p9-regs.h> +#include <xscom-p10-regs.h> +#include <timebase.h> +#include <chip.h> + + +/**************** mambo direct controls ****************/ + +extern unsigned long callthru_tcl(const char *str, int len); + +static void mambo_sreset_cpu(struct cpu_thread *cpu) +{ + uint32_t chip_id = pir_to_chip_id(cpu->pir); + uint32_t core_id = pir_to_core_id(cpu->pir); + uint32_t thread_id = pir_to_thread_id(cpu->pir); + char tcl_cmd[50]; + + snprintf(tcl_cmd, sizeof(tcl_cmd), + "mysim cpu %i:%i:%i start_thread 0x100", + chip_id, core_id, thread_id); + callthru_tcl(tcl_cmd, strlen(tcl_cmd)); +} + +static void mambo_stop_cpu(struct cpu_thread *cpu) +{ + uint32_t chip_id = pir_to_chip_id(cpu->pir); + uint32_t core_id = pir_to_core_id(cpu->pir); + uint32_t thread_id = pir_to_thread_id(cpu->pir); + char tcl_cmd[50]; + + snprintf(tcl_cmd, sizeof(tcl_cmd), + "mysim cpu %i:%i:%i stop_thread", + chip_id, core_id, thread_id); + callthru_tcl(tcl_cmd, strlen(tcl_cmd)); +} + +/**************** POWER8 direct controls ****************/ + +static int p8_core_set_special_wakeup(struct cpu_thread *cpu) +{ + uint64_t val, poll_target, stamp; + uint32_t core_id; + int rc; + + /* + * Note: HWP checks for checkstops, but I assume we don't need to + * as we wouldn't be running if one was present + */ + + /* Grab core ID once */ + core_id = pir_to_core_id(cpu->pir); + + prlog(PR_DEBUG, "RESET Waking up core 0x%x\n", core_id); + + /* + * The original HWp reads the XSCOM first but ignores the result + * and error, let's do the same until I know for sure that is + * not necessary + */ + xscom_read(cpu->chip_id, + XSCOM_ADDR_P8_EX_SLAVE(core_id, EX_PM_SPECIAL_WAKEUP_PHYP), + &val); + + /* Then we write special wakeup */ + rc = xscom_write(cpu->chip_id, + XSCOM_ADDR_P8_EX_SLAVE(core_id, + EX_PM_SPECIAL_WAKEUP_PHYP), + PPC_BIT(0)); + if (rc) { + prerror("RESET: XSCOM error %d asserting special" + " wakeup on 0x%x\n", rc, cpu->pir); + return rc; + } + + /* + * HWP uses the history for Perf register here, dunno why it uses + * that one instead of the pHyp one, maybe to avoid clobbering it... + * + * In any case, it does that to check for run/nap vs.sleep/winkle/other + * to decide whether to poll on checkstop or not. Since we don't deal + * with checkstop conditions here, we ignore that part. + */ + + /* + * Now poll for completion of special wakeup. The HWP is nasty here, + * it will poll at 5ms intervals for up to 200ms. This is not quite + * acceptable for us at runtime, at least not until we have the + * ability to "context switch" HBRT. In practice, because we don't + * winkle, it will never take that long, so we increase the polling + * frequency to 1us per poll. However we do have to keep the same + * timeout. + * + * We don't use time_wait_ms() either for now as we don't want to + * poll the FSP here. + */ + stamp = mftb(); + poll_target = stamp + msecs_to_tb(200); + val = 0; + while (!(val & EX_PM_GP0_SPECIAL_WAKEUP_DONE)) { + /* Wait 1 us */ + time_wait_us(1); + + /* Read PM state */ + rc = xscom_read(cpu->chip_id, + XSCOM_ADDR_P8_EX_SLAVE(core_id, EX_PM_GP0), + &val); + if (rc) { + prerror("RESET: XSCOM error %d reading PM state on" + " 0x%x\n", rc, cpu->pir); + return rc; + } + /* Check timeout */ + if (mftb() > poll_target) + break; + } + + /* Success ? */ + if (val & EX_PM_GP0_SPECIAL_WAKEUP_DONE) { + uint64_t now = mftb(); + prlog(PR_TRACE, "RESET: Special wakeup complete after %ld us\n", + tb_to_usecs(now - stamp)); + return 0; + } + + /* + * We timed out ... + * + * HWP has a complex workaround for HW255321 which affects + * Murano DD1 and Venice DD1. Ignore that for now + * + * Instead we just dump some XSCOMs for error logging + */ + prerror("RESET: Timeout on special wakeup of 0x%0x\n", cpu->pir); + prerror("RESET: PM0 = 0x%016llx\n", val); + val = -1; + xscom_read(cpu->chip_id, + XSCOM_ADDR_P8_EX_SLAVE(core_id, EX_PM_SPECIAL_WAKEUP_PHYP), + &val); + prerror("RESET: SPC_WKUP = 0x%016llx\n", val); + val = -1; + xscom_read(cpu->chip_id, + XSCOM_ADDR_P8_EX_SLAVE(core_id, + EX_PM_IDLE_STATE_HISTORY_PHYP), + &val); + prerror("RESET: HISTORY = 0x%016llx\n", val); + + return OPAL_HARDWARE; +} + +static int p8_core_clear_special_wakeup(struct cpu_thread *cpu) +{ + uint64_t val; + uint32_t core_id; + int rc; + + /* + * Note: HWP checks for checkstops, but I assume we don't need to + * as we wouldn't be running if one was present + */ + + /* Grab core ID once */ + core_id = pir_to_core_id(cpu->pir); + + prlog(PR_DEBUG, "RESET: Releasing core 0x%x wakeup\n", core_id); + + /* + * The original HWp reads the XSCOM first but ignores the result + * and error, let's do the same until I know for sure that is + * not necessary + */ + xscom_read(cpu->chip_id, + XSCOM_ADDR_P8_EX_SLAVE(core_id, EX_PM_SPECIAL_WAKEUP_PHYP), + &val); + + /* Then we write special wakeup */ + rc = xscom_write(cpu->chip_id, + XSCOM_ADDR_P8_EX_SLAVE(core_id, + EX_PM_SPECIAL_WAKEUP_PHYP), 0); + if (rc) { + prerror("RESET: XSCOM error %d deasserting" + " special wakeup on 0x%x\n", rc, cpu->pir); + return rc; + } + + /* + * The original HWp reads the XSCOM again with the comment + * "This puts an inherent delay in the propagation of the reset + * transition" + */ + xscom_read(cpu->chip_id, + XSCOM_ADDR_P8_EX_SLAVE(core_id, EX_PM_SPECIAL_WAKEUP_PHYP), + &val); + + return 0; +} + +static int p8_stop_thread(struct cpu_thread *cpu) +{ + uint32_t core_id = pir_to_core_id(cpu->pir); + uint32_t chip_id = pir_to_chip_id(cpu->pir); + uint32_t thread_id = pir_to_thread_id(cpu->pir); + uint32_t xscom_addr; + + xscom_addr = XSCOM_ADDR_P8_EX(core_id, + P8_EX_TCTL_DIRECT_CONTROLS(thread_id)); + + if (xscom_write(chip_id, xscom_addr, P8_DIRECT_CTL_STOP)) { + prlog(PR_ERR, "Could not stop thread %u:%u:%u:" + " Unable to write EX_TCTL_DIRECT_CONTROLS.\n", + chip_id, core_id, thread_id); + return OPAL_HARDWARE; + } + + return OPAL_SUCCESS; +} + +static int p8_sreset_thread(struct cpu_thread *cpu) +{ + uint32_t core_id = pir_to_core_id(cpu->pir); + uint32_t chip_id = pir_to_chip_id(cpu->pir); + uint32_t thread_id = pir_to_thread_id(cpu->pir); + uint32_t xscom_addr; + + xscom_addr = XSCOM_ADDR_P8_EX(core_id, + P8_EX_TCTL_DIRECT_CONTROLS(thread_id)); + + if (xscom_write(chip_id, xscom_addr, P8_DIRECT_CTL_PRENAP)) { + prlog(PR_ERR, "Could not prenap thread %u:%u:%u:" + " Unable to write EX_TCTL_DIRECT_CONTROLS.\n", + chip_id, core_id, thread_id); + return OPAL_HARDWARE; + } + if (xscom_write(chip_id, xscom_addr, P8_DIRECT_CTL_SRESET)) { + prlog(PR_ERR, "Could not sreset thread %u:%u:%u:" + " Unable to write EX_TCTL_DIRECT_CONTROLS.\n", + chip_id, core_id, thread_id); + return OPAL_HARDWARE; + } + + return OPAL_SUCCESS; +} + + +/**************** POWER9 direct controls ****************/ + +/* Long running instructions may take time to complete. Timeout 100ms */ +#define P9_QUIESCE_POLL_INTERVAL 100 +#define P9_QUIESCE_TIMEOUT 100000 + +/* Waking may take up to 5ms for deepest sleep states. Set timeout to 100ms */ +#define P9_SPWKUP_POLL_INTERVAL 100 +#define P9_SPWKUP_TIMEOUT 100000 + +/* + * This implements direct control facilities of processor cores and threads + * using scom registers. + */ + +static int p9_core_is_gated(struct cpu_thread *cpu) +{ + uint32_t chip_id = pir_to_chip_id(cpu->pir); + uint32_t core_id = pir_to_core_id(cpu->pir); + uint32_t sshhyp_addr; + uint64_t val; + + sshhyp_addr = XSCOM_ADDR_P9_EC_SLAVE(core_id, P9_EC_PPM_SSHHYP); + + if (xscom_read(chip_id, sshhyp_addr, &val)) { + prlog(PR_ERR, "Could not query core gated on %u:%u:" + " Unable to read PPM_SSHHYP.\n", + chip_id, core_id); + return OPAL_HARDWARE; + } + + return !!(val & P9_CORE_GATED); +} + +static int p9_core_set_special_wakeup(struct cpu_thread *cpu) +{ + uint32_t chip_id = pir_to_chip_id(cpu->pir); + uint32_t core_id = pir_to_core_id(cpu->pir); + uint32_t swake_addr; + uint32_t sshhyp_addr; + uint64_t val; + int i; + + swake_addr = XSCOM_ADDR_P9_EC_SLAVE(core_id, EC_PPM_SPECIAL_WKUP_HYP); + sshhyp_addr = XSCOM_ADDR_P9_EC_SLAVE(core_id, P9_EC_PPM_SSHHYP); + + if (xscom_write(chip_id, swake_addr, P9_SPWKUP_SET)) { + prlog(PR_ERR, "Could not set special wakeup on %u:%u:" + " Unable to write PPM_SPECIAL_WKUP_HYP.\n", + chip_id, core_id); + goto out_fail; + } + + for (i = 0; i < P9_SPWKUP_TIMEOUT / P9_SPWKUP_POLL_INTERVAL; i++) { + if (xscom_read(chip_id, sshhyp_addr, &val)) { + prlog(PR_ERR, "Could not set special wakeup on %u:%u:" + " Unable to read PPM_SSHHYP.\n", + chip_id, core_id); + goto out_fail; + } + if (val & P9_SPECIAL_WKUP_DONE) { + /* + * CORE_GATED will be unset on a successful special + * wakeup of the core which indicates that the core is + * out of stop state. If CORE_GATED is still set then + * raise error. + */ + if (p9_core_is_gated(cpu)) { + /* Deassert spwu for this strange error */ + xscom_write(chip_id, swake_addr, 0); + prlog(PR_ERR, "Failed special wakeup on %u:%u" + " as CORE_GATED is set\n", + chip_id, core_id); + goto out_fail; + } else { + return 0; + } + } + time_wait_us(P9_SPWKUP_POLL_INTERVAL); + } + + prlog(PR_ERR, "Could not set special wakeup on %u:%u:" + " timeout waiting for SPECIAL_WKUP_DONE.\n", + chip_id, core_id); + +out_fail: + /* + * As per the special wakeup protocol we should not de-assert + * the special wakeup on the core until WAKEUP_DONE is set. + * So even on error do not de-assert. + */ + return OPAL_HARDWARE; +} + +static int p9_core_clear_special_wakeup(struct cpu_thread *cpu) +{ + uint32_t chip_id = pir_to_chip_id(cpu->pir); + uint32_t core_id = pir_to_core_id(cpu->pir); + uint32_t swake_addr; + + swake_addr = XSCOM_ADDR_P9_EC_SLAVE(core_id, EC_PPM_SPECIAL_WKUP_HYP); + + /* + * De-assert special wakeup after a small delay. + * The delay may help avoid problems setting and clearing special + * wakeup back-to-back. This should be confirmed. + */ + time_wait_us(1); + if (xscom_write(chip_id, swake_addr, 0)) { + prlog(PR_ERR, "Could not clear special wakeup on %u:%u:" + " Unable to write PPM_SPECIAL_WKUP_HYP.\n", + chip_id, core_id); + return OPAL_HARDWARE; + } + + /* + * Don't wait for de-assert to complete as other components + * could have requested for special wkeup. Wait for 10ms to + * avoid back-to-back asserts + */ + time_wait_us(10000); + return 0; +} + +static int p9_thread_quiesced(struct cpu_thread *cpu) +{ + uint32_t chip_id = pir_to_chip_id(cpu->pir); + uint32_t core_id = pir_to_core_id(cpu->pir); + uint32_t thread_id = pir_to_thread_id(cpu->pir); + uint32_t ras_addr; + uint64_t ras_status; + + ras_addr = XSCOM_ADDR_P9_EC(core_id, P9_RAS_STATUS); + if (xscom_read(chip_id, ras_addr, &ras_status)) { + prlog(PR_ERR, "Could not check thread state on %u:%u:" + " Unable to read RAS_STATUS.\n", + chip_id, core_id); + return OPAL_HARDWARE; + } + + /* + * This returns true when the thread is quiesced and all + * instructions completed. For sreset this may not be necessary, + * but we may want to use instruction ramming or stepping + * direct controls where it is important. + */ + if ((ras_status & P9_THREAD_QUIESCED(thread_id)) + == P9_THREAD_QUIESCED(thread_id)) + return 1; + + return 0; +} + +static int p9_cont_thread(struct cpu_thread *cpu) +{ + uint32_t chip_id = pir_to_chip_id(cpu->pir); + uint32_t core_id = pir_to_core_id(cpu->pir); + uint32_t thread_id = pir_to_thread_id(cpu->pir); + uint32_t cts_addr; + uint32_t ti_addr; + uint32_t dctl_addr; + uint64_t core_thread_state; + uint64_t thread_info; + bool active, stop; + int rc; + + rc = p9_thread_quiesced(cpu); + if (rc < 0) + return rc; + if (!rc) { + prlog(PR_ERR, "Could not cont thread %u:%u:%u:" + " Thread is not quiesced.\n", + chip_id, core_id, thread_id); + return OPAL_BUSY; + } + + cts_addr = XSCOM_ADDR_P9_EC(core_id, P9_CORE_THREAD_STATE); + ti_addr = XSCOM_ADDR_P9_EC(core_id, P9_THREAD_INFO); + dctl_addr = XSCOM_ADDR_P9_EC(core_id, P9_EC_DIRECT_CONTROLS); + + if (xscom_read(chip_id, cts_addr, &core_thread_state)) { + prlog(PR_ERR, "Could not resume thread %u:%u:%u:" + " Unable to read CORE_THREAD_STATE.\n", + chip_id, core_id, thread_id); + return OPAL_HARDWARE; + } + if (core_thread_state & PPC_BIT(56 + thread_id)) + stop = true; + else + stop = false; + + if (xscom_read(chip_id, ti_addr, &thread_info)) { + prlog(PR_ERR, "Could not resume thread %u:%u:%u:" + " Unable to read THREAD_INFO.\n", + chip_id, core_id, thread_id); + return OPAL_HARDWARE; + } + if (thread_info & PPC_BIT(thread_id)) + active = true; + else + active = false; + + if (!active || stop) { + if (xscom_write(chip_id, dctl_addr, P9_THREAD_CLEAR_MAINT(thread_id))) { + prlog(PR_ERR, "Could not resume thread %u:%u:%u:" + " Unable to write EC_DIRECT_CONTROLS.\n", + chip_id, core_id, thread_id); + } + } else { + if (xscom_write(chip_id, dctl_addr, P9_THREAD_CONT(thread_id))) { + prlog(PR_ERR, "Could not resume thread %u:%u:%u:" + " Unable to write EC_DIRECT_CONTROLS.\n", + chip_id, core_id, thread_id); + } + } + + return 0; +} + +static int p9_stop_thread(struct cpu_thread *cpu) +{ + uint32_t chip_id = pir_to_chip_id(cpu->pir); + uint32_t core_id = pir_to_core_id(cpu->pir); + uint32_t thread_id = pir_to_thread_id(cpu->pir); + uint32_t dctl_addr; + int rc; + int i; + + dctl_addr = XSCOM_ADDR_P9_EC(core_id, P9_EC_DIRECT_CONTROLS); + + rc = p9_thread_quiesced(cpu); + if (rc < 0) + return rc; + if (rc) { + prlog(PR_ERR, "Could not stop thread %u:%u:%u:" + " Thread is quiesced already.\n", + chip_id, core_id, thread_id); + return OPAL_BUSY; + } + + if (xscom_write(chip_id, dctl_addr, P9_THREAD_STOP(thread_id))) { + prlog(PR_ERR, "Could not stop thread %u:%u:%u:" + " Unable to write EC_DIRECT_CONTROLS.\n", + chip_id, core_id, thread_id); + return OPAL_HARDWARE; + } + + for (i = 0; i < P9_QUIESCE_TIMEOUT / P9_QUIESCE_POLL_INTERVAL; i++) { + int rc = p9_thread_quiesced(cpu); + if (rc < 0) + break; + if (rc) + return 0; + + time_wait_us(P9_QUIESCE_POLL_INTERVAL); + } + + prlog(PR_ERR, "Could not stop thread %u:%u:%u:" + " Unable to quiesce thread.\n", + chip_id, core_id, thread_id); + + return OPAL_HARDWARE; +} + +static int p9_sreset_thread(struct cpu_thread *cpu) +{ + uint32_t chip_id = pir_to_chip_id(cpu->pir); + uint32_t core_id = pir_to_core_id(cpu->pir); + uint32_t thread_id = pir_to_thread_id(cpu->pir); + uint32_t dctl_addr; + + dctl_addr = XSCOM_ADDR_P9_EC(core_id, P9_EC_DIRECT_CONTROLS); + + if (xscom_write(chip_id, dctl_addr, P9_THREAD_SRESET(thread_id))) { + prlog(PR_ERR, "Could not sreset thread %u:%u:%u:" + " Unable to write EC_DIRECT_CONTROLS.\n", + chip_id, core_id, thread_id); + return OPAL_HARDWARE; + } + + return 0; +} + +/**************** POWER10 direct controls ****************/ + +/* Long running instructions may take time to complete. Timeout 100ms */ +#define P10_QUIESCE_POLL_INTERVAL 100 +#define P10_QUIESCE_TIMEOUT 100000 + +/* Waking may take up to 5ms for deepest sleep states. Set timeout to 100ms */ +#define P10_SPWU_POLL_INTERVAL 100 +#define P10_SPWU_TIMEOUT 100000 + +/* + * This implements direct control facilities of processor cores and threads + * using scom registers. + */ +static int p10_core_is_gated(struct cpu_thread *cpu) +{ + uint32_t chip_id = pir_to_chip_id(cpu->pir); + uint32_t core_id = pir_to_core_id(cpu->pir); + uint32_t ssh_addr; + uint64_t val; + + ssh_addr = XSCOM_ADDR_P10_QME_CORE(core_id, P10_QME_SSH_HYP); + + if (xscom_read(chip_id, ssh_addr, &val)) { + prlog(PR_ERR, "Could not query core gated on %u:%u:" + " Unable to read QME_SSH_HYP.\n", + chip_id, core_id); + return OPAL_HARDWARE; + } + + return !!(val & P10_SSH_CORE_GATED); +} + + +static int p10_core_set_special_wakeup(struct cpu_thread *cpu) +{ + uint32_t chip_id = pir_to_chip_id(cpu->pir); + uint32_t core_id = pir_to_core_id(cpu->pir); + uint32_t spwu_addr, ssh_addr; + uint64_t val; + int i; + + /* P10 could use SPWU_HYP done bit instead of SSH? */ + spwu_addr = XSCOM_ADDR_P10_QME_CORE(core_id, P10_QME_SPWU_HYP); + ssh_addr = XSCOM_ADDR_P10_QME_CORE(core_id, P10_QME_SSH_HYP); + + if (xscom_write(chip_id, spwu_addr, P10_SPWU_REQ)) { + prlog(PR_ERR, "Could not set special wakeup on %u:%u:" + " Unable to write QME_SPWU_HYP.\n", + chip_id, core_id); + return OPAL_HARDWARE; + } + + for (i = 0; i < P10_SPWU_TIMEOUT / P10_SPWU_POLL_INTERVAL; i++) { + if (xscom_read(chip_id, ssh_addr, &val)) { + prlog(PR_ERR, "Could not set special wakeup on %u:%u:" + " Unable to read QME_SSH_HYP.\n", + chip_id, core_id); + return OPAL_HARDWARE; + } + if (val & P10_SSH_SPWU_DONE) { + /* + * CORE_GATED will be unset on a successful special + * wakeup of the core which indicates that the core is + * out of stop state. If CORE_GATED is still set then + * check SPWU register and raise error only if SPWU_DONE + * is not set, else print a warning and consider SPWU + * operation as successful. + * This is in conjunction with a micocode bug, which + * calls out the fact that SPW can succeed in the case + * the core is gated but SPWU_HYP bit is set. + */ + if (p10_core_is_gated(cpu)) { + if(xscom_read(chip_id, spwu_addr, &val)) { + prlog(PR_ERR, "Core %u:%u:" + " unable to read QME_SPWU_HYP\n", + chip_id, core_id); + return OPAL_HARDWARE; + } + if (val & P10_SPWU_DONE) { + /* + * If SPWU DONE bit is set then + * SPWU operation is complete + */ + prlog(PR_DEBUG, "Special wakeup on " + "%u:%u: core remains gated while" + " SPWU_HYP DONE set\n", + chip_id, core_id); + return 0; + } + /* Deassert spwu for this strange error */ + xscom_write(chip_id, spwu_addr, 0); + prlog(PR_ERR, + "Failed special wakeup on %u:%u" + " core remains gated.\n", + chip_id, core_id); + return OPAL_HARDWARE; + } else { + return 0; + } + } + time_wait_us(P10_SPWU_POLL_INTERVAL); + } + + prlog(PR_ERR, "Could not set special wakeup on %u:%u:" + " operation timeout.\n", + chip_id, core_id); + /* + * As per the special wakeup protocol we should not de-assert + * the special wakeup on the core until WAKEUP_DONE is set. + * So even on error do not de-assert. + */ + + return OPAL_HARDWARE; +} + +static int p10_core_clear_special_wakeup(struct cpu_thread *cpu) +{ + uint32_t chip_id = pir_to_chip_id(cpu->pir); + uint32_t core_id = pir_to_core_id(cpu->pir); + uint32_t spwu_addr; + + spwu_addr = XSCOM_ADDR_P10_QME_CORE(core_id, P10_QME_SPWU_HYP); + + /* Add a small delay here if spwu problems time_wait_us(1); */ + if (xscom_write(chip_id, spwu_addr, 0)) { + prlog(PR_ERR, "Could not clear special wakeup on %u:%u:" + " Unable to write QME_SPWU_HYP.\n", + chip_id, core_id); + return OPAL_HARDWARE; + } + + return 0; +} + +static int p10_thread_quiesced(struct cpu_thread *cpu) +{ + uint32_t chip_id = pir_to_chip_id(cpu->pir); + uint32_t core_id = pir_to_core_id(cpu->pir); + uint32_t thread_id = pir_to_thread_id(cpu->pir); + uint32_t ras_addr; + uint64_t ras_status; + + ras_addr = XSCOM_ADDR_P10_EC(core_id, P10_EC_RAS_STATUS); + if (xscom_read(chip_id, ras_addr, &ras_status)) { + prlog(PR_ERR, "Could not check thread state on %u:%u:" + " Unable to read EC_RAS_STATUS.\n", + chip_id, core_id); + return OPAL_HARDWARE; + } + + /* + * p10_thread_stop for the purpose of sreset wants QUIESCED + * and MAINT bits set. Step, RAM, etc. need more, but we don't + * use those in skiboot. + * + * P10 could try wait for more here in case of errors. + */ + if (!(ras_status & P10_THREAD_QUIESCED(thread_id))) + return 0; + + if (!(ras_status & P10_THREAD_MAINT(thread_id))) + return 0; + + return 1; +} + +static int p10_cont_thread(struct cpu_thread *cpu) +{ + uint32_t chip_id = pir_to_chip_id(cpu->pir); + uint32_t core_id = pir_to_core_id(cpu->pir); + uint32_t thread_id = pir_to_thread_id(cpu->pir); + uint32_t cts_addr; + uint32_t ti_addr; + uint32_t dctl_addr; + uint64_t core_thread_state; + uint64_t thread_info; + bool active, stop; + int rc; + int i; + + rc = p10_thread_quiesced(cpu); + if (rc < 0) + return rc; + if (!rc) { + prlog(PR_ERR, "Could not cont thread %u:%u:%u:" + " Thread is not quiesced.\n", + chip_id, core_id, thread_id); + return OPAL_BUSY; + } + + cts_addr = XSCOM_ADDR_P10_EC(core_id, P10_EC_CORE_THREAD_STATE); + ti_addr = XSCOM_ADDR_P10_EC(core_id, P10_EC_THREAD_INFO); + dctl_addr = XSCOM_ADDR_P10_EC(core_id, P10_EC_DIRECT_CONTROLS); + + if (xscom_read(chip_id, cts_addr, &core_thread_state)) { + prlog(PR_ERR, "Could not resume thread %u:%u:%u:" + " Unable to read EC_CORE_THREAD_STATE.\n", + chip_id, core_id, thread_id); + return OPAL_HARDWARE; + } + if (core_thread_state & P10_THREAD_STOPPED(thread_id)) + stop = true; + else + stop = false; + + if (xscom_read(chip_id, ti_addr, &thread_info)) { + prlog(PR_ERR, "Could not resume thread %u:%u:%u:" + " Unable to read EC_THREAD_INFO.\n", + chip_id, core_id, thread_id); + return OPAL_HARDWARE; + } + if (thread_info & P10_THREAD_ACTIVE(thread_id)) + active = true; + else + active = false; + + if (!active || stop) { + if (xscom_write(chip_id, dctl_addr, P10_THREAD_CLEAR_MAINT(thread_id))) { + prlog(PR_ERR, "Could not resume thread %u:%u:%u:" + " Unable to write EC_DIRECT_CONTROLS.\n", + chip_id, core_id, thread_id); + } + } else { + if (xscom_write(chip_id, dctl_addr, P10_THREAD_START(thread_id))) { + prlog(PR_ERR, "Could not resume thread %u:%u:%u:" + " Unable to write EC_DIRECT_CONTROLS.\n", + chip_id, core_id, thread_id); + } + } + + for (i = 0; i < P10_QUIESCE_TIMEOUT / P10_QUIESCE_POLL_INTERVAL; i++) { + int rc = p10_thread_quiesced(cpu); + if (rc < 0) + break; + if (!rc) + return 0; + + time_wait_us(P10_QUIESCE_POLL_INTERVAL); + } + + prlog(PR_ERR, "Could not start thread %u:%u:%u:" + " Unable to start thread.\n", + chip_id, core_id, thread_id); + + return OPAL_HARDWARE; +} + +static int p10_stop_thread(struct cpu_thread *cpu) +{ + uint32_t chip_id = pir_to_chip_id(cpu->pir); + uint32_t core_id = pir_to_core_id(cpu->pir); + uint32_t thread_id = pir_to_thread_id(cpu->pir); + uint32_t dctl_addr; + int rc; + int i; + + dctl_addr = XSCOM_ADDR_P10_EC(core_id, P10_EC_DIRECT_CONTROLS); + + rc = p10_thread_quiesced(cpu); + if (rc < 0) + return rc; + if (rc) { + prlog(PR_ERR, "Could not stop thread %u:%u:%u:" + " Thread is quiesced already.\n", + chip_id, core_id, thread_id); + return OPAL_BUSY; + } + + if (xscom_write(chip_id, dctl_addr, P10_THREAD_STOP(thread_id))) { + prlog(PR_ERR, "Could not stop thread %u:%u:%u:" + " Unable to write EC_DIRECT_CONTROLS.\n", + chip_id, core_id, thread_id); + return OPAL_HARDWARE; + } + + for (i = 0; i < P10_QUIESCE_TIMEOUT / P10_QUIESCE_POLL_INTERVAL; i++) { + int rc = p10_thread_quiesced(cpu); + if (rc < 0) + break; + if (rc) + return 0; + + time_wait_us(P10_QUIESCE_POLL_INTERVAL); + } + + prlog(PR_ERR, "Could not stop thread %u:%u:%u:" + " Unable to quiesce thread.\n", + chip_id, core_id, thread_id); + + return OPAL_HARDWARE; +} + +static int p10_sreset_thread(struct cpu_thread *cpu) +{ + uint32_t chip_id = pir_to_chip_id(cpu->pir); + uint32_t core_id = pir_to_core_id(cpu->pir); + uint32_t thread_id = pir_to_thread_id(cpu->pir); + uint32_t dctl_addr; + + dctl_addr = XSCOM_ADDR_P10_EC(core_id, P10_EC_DIRECT_CONTROLS); + + if (xscom_write(chip_id, dctl_addr, P10_THREAD_SRESET(thread_id))) { + prlog(PR_ERR, "Could not sreset thread %u:%u:%u:" + " Unable to write EC_DIRECT_CONTROLS.\n", + chip_id, core_id, thread_id); + return OPAL_HARDWARE; + } + + return 0; +} + +/**************** generic direct controls ****************/ + +int dctl_set_special_wakeup(struct cpu_thread *t) +{ + struct cpu_thread *c = t->ec_primary; + int rc = OPAL_SUCCESS; + + if (proc_gen == proc_gen_unknown) + return OPAL_UNSUPPORTED; + + lock(&c->dctl_lock); + if (c->special_wakeup_count == 0) { + if (proc_gen == proc_gen_p10) + rc = p10_core_set_special_wakeup(c); + else if (proc_gen == proc_gen_p9) + rc = p9_core_set_special_wakeup(c); + else /* (proc_gen == proc_gen_p8) */ + rc = p8_core_set_special_wakeup(c); + } + if (!rc) + c->special_wakeup_count++; + unlock(&c->dctl_lock); + + return rc; +} + +int dctl_clear_special_wakeup(struct cpu_thread *t) +{ + struct cpu_thread *c = t->ec_primary; + int rc = OPAL_SUCCESS; + + if (proc_gen == proc_gen_unknown) + return OPAL_UNSUPPORTED; + + lock(&c->dctl_lock); + if (!c->special_wakeup_count) + goto out; + if (c->special_wakeup_count == 1) { + if (proc_gen == proc_gen_p10) + rc = p10_core_clear_special_wakeup(c); + else if (proc_gen == proc_gen_p9) + rc = p9_core_clear_special_wakeup(c); + else /* (proc_gen == proc_gen_p8) */ + rc = p8_core_clear_special_wakeup(c); + } + if (!rc) + c->special_wakeup_count--; +out: + unlock(&c->dctl_lock); + + return rc; +} + +int dctl_core_is_gated(struct cpu_thread *t) +{ + struct cpu_thread *c = t->primary; + + if (proc_gen == proc_gen_p10) + return p10_core_is_gated(c); + else if (proc_gen == proc_gen_p9) + return p9_core_is_gated(c); + else + return OPAL_UNSUPPORTED; +} + +static int dctl_stop(struct cpu_thread *t) +{ + struct cpu_thread *c = t->ec_primary; + int rc; + + lock(&c->dctl_lock); + if (t->dctl_stopped) { + unlock(&c->dctl_lock); + return OPAL_BUSY; + } + if (proc_gen == proc_gen_p10) + rc = p10_stop_thread(t); + else if (proc_gen == proc_gen_p9) + rc = p9_stop_thread(t); + else /* (proc_gen == proc_gen_p8) */ + rc = p8_stop_thread(t); + if (!rc) + t->dctl_stopped = true; + unlock(&c->dctl_lock); + + return rc; +} + +static int dctl_cont(struct cpu_thread *t) +{ + struct cpu_thread *c = t->primary; + int rc; + + if (proc_gen != proc_gen_p10 && proc_gen != proc_gen_p9) + return OPAL_UNSUPPORTED; + + lock(&c->dctl_lock); + if (!t->dctl_stopped) { + unlock(&c->dctl_lock); + return OPAL_BUSY; + } + if (proc_gen == proc_gen_p10) + rc = p10_cont_thread(t); + else /* (proc_gen == proc_gen_p9) */ + rc = p9_cont_thread(t); + if (!rc) + t->dctl_stopped = false; + unlock(&c->dctl_lock); + + return rc; +} + +/* + * NOTE: + * The POWER8 sreset does not provide SRR registers, so it can be used + * for fast reboot, but not OPAL_SIGNAL_SYSTEM_RESET or anywhere that is + * expected to return. For now, callers beware. + */ +static int dctl_sreset(struct cpu_thread *t) +{ + struct cpu_thread *c = t->ec_primary; + int rc; + + lock(&c->dctl_lock); + if (!t->dctl_stopped) { + unlock(&c->dctl_lock); + return OPAL_BUSY; + } + if (proc_gen == proc_gen_p10) + rc = p10_sreset_thread(t); + else if (proc_gen == proc_gen_p9) + rc = p9_sreset_thread(t); + else /* (proc_gen == proc_gen_p8) */ + rc = p8_sreset_thread(t); + if (!rc) + t->dctl_stopped = false; + unlock(&c->dctl_lock); + + return rc; +} + + +/**************** fast reboot API ****************/ + +int sreset_all_prepare(void) +{ + struct cpu_thread *cpu; + + if (proc_gen == proc_gen_unknown) + return OPAL_UNSUPPORTED; + + prlog(PR_DEBUG, "RESET: Resetting from cpu: 0x%x (core 0x%x)\n", + this_cpu()->pir, pir_to_core_id(this_cpu()->pir)); + + if (chip_quirk(QUIRK_MAMBO_CALLOUTS)) { + for_each_ungarded_cpu(cpu) { + if (cpu == this_cpu()) + continue; + mambo_stop_cpu(cpu); + } + return OPAL_SUCCESS; + } + + /* Assert special wakup on all cores. Only on operational cores. */ + for_each_ungarded_primary(cpu) { + if (dctl_set_special_wakeup(cpu) != OPAL_SUCCESS) + return OPAL_HARDWARE; + } + + prlog(PR_DEBUG, "RESET: Stopping the world...\n"); + + /* Put everybody in stop except myself */ + for_each_ungarded_cpu(cpu) { + if (cpu == this_cpu()) + continue; + if (dctl_stop(cpu) != OPAL_SUCCESS) + return OPAL_HARDWARE; + + } + + return OPAL_SUCCESS; +} + +void sreset_all_finish(void) +{ + struct cpu_thread *cpu; + + if (chip_quirk(QUIRK_MAMBO_CALLOUTS)) + return; + + for_each_ungarded_primary(cpu) + dctl_clear_special_wakeup(cpu); +} + +int sreset_all_others(void) +{ + struct cpu_thread *cpu; + + prlog(PR_DEBUG, "RESET: Resetting all threads but self...\n"); + + /* + * mambo should actually implement stop as well, and implement + * the dctl_ helpers properly. Currently it's racy just sresetting. + */ + if (chip_quirk(QUIRK_MAMBO_CALLOUTS)) { + for_each_ungarded_cpu(cpu) { + if (cpu == this_cpu()) + continue; + mambo_sreset_cpu(cpu); + } + return OPAL_SUCCESS; + } + + for_each_ungarded_cpu(cpu) { + if (cpu == this_cpu()) + continue; + if (dctl_sreset(cpu) != OPAL_SUCCESS) + return OPAL_HARDWARE; + } + + return OPAL_SUCCESS; +} + + +/**************** OPAL_SIGNAL_SYSTEM_RESET API ****************/ + +/* + * This provides a way for the host to raise system reset exceptions + * on other threads using direct control scoms on POWER9. + * + * We assert special wakeup on the core first. + * Then stop target thread and wait for it to quiesce. + * Then sreset the target thread, which resumes execution on that thread. + * Then de-assert special wakeup on the core. + */ +static int64_t do_sreset_cpu(struct cpu_thread *cpu) +{ + int rc; + + if (this_cpu() == cpu) { + prlog(PR_ERR, "SRESET: Unable to reset self\n"); + return OPAL_PARAMETER; + } + + rc = dctl_set_special_wakeup(cpu); + if (rc) + return rc; + + rc = dctl_stop(cpu); + if (rc) + goto out_spwk; + + rc = dctl_sreset(cpu); + if (rc) + goto out_cont; + + dctl_clear_special_wakeup(cpu); + + return 0; + +out_cont: + dctl_cont(cpu); +out_spwk: + dctl_clear_special_wakeup(cpu); + + return rc; +} + +static struct lock sreset_lock = LOCK_UNLOCKED; + +int64_t opal_signal_system_reset(int cpu_nr) +{ + struct cpu_thread *cpu; + int64_t ret; + + if (proc_gen != proc_gen_p9 && proc_gen != proc_gen_p10) + return OPAL_UNSUPPORTED; + + /* + * Broadcasts unsupported. Not clear what threads should be + * signaled, so it's better for the OS to perform one-at-a-time + * for now. + */ + if (cpu_nr < 0) + return OPAL_CONSTRAINED; + + /* Reset a single CPU */ + cpu = find_cpu_by_server(cpu_nr); + if (!cpu) { + prlog(PR_ERR, "SRESET: could not find cpu by server %d\n", cpu_nr); + return OPAL_PARAMETER; + } + + lock(&sreset_lock); + ret = do_sreset_cpu(cpu); + unlock(&sreset_lock); + + return ret; +} + +void direct_controls_init(void) +{ + if (chip_quirk(QUIRK_MAMBO_CALLOUTS)) + return; + + if (proc_gen != proc_gen_p9 && proc_gen != proc_gen_p10) + return; + + opal_register(OPAL_SIGNAL_SYSTEM_RESET, opal_signal_system_reset, 1); +} diff --git a/roms/skiboot/core/errorlog.c b/roms/skiboot/core/errorlog.c new file mode 100644 index 000000000..f64ac3f23 --- /dev/null +++ b/roms/skiboot/core/errorlog.c @@ -0,0 +1,223 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* This file contains the front end for OPAL error logging. It is used + * to construct a struct errorlog representing the event/error to be + * logged which is then passed to the platform specific backend to log + * the actual errors. + * + * Copyright 2013-2017 IBM Corp. + */ + +#include <skiboot.h> +#include <lock.h> +#include <errorlog.h> +#include <pool.h> + +/* + * Maximum number buffers that are pre-allocated + * to hold elogs that are reported on Sapphire and + * PowerNV. + */ +#define ELOG_WRITE_MAX_RECORD 64 +/* Platform log id as per the spec */ +static uint32_t sapphire_elog_id = 0xB0000000; + +/* Reserved for future use */ +/* static uint32_t powernv_elog_id = 0xB1000000; */ + +/* Pool to allocate elog messages from */ +static struct pool elog_pool; +static struct lock elog_lock = LOCK_UNLOCKED; + +static bool elog_available = false; + +static struct errorlog *get_write_buffer(int opal_event_severity) +{ + struct errorlog *buf; + + if (!elog_available) + return NULL; + + lock(&elog_lock); + if (opal_event_severity == OPAL_ERROR_PANIC) + buf = pool_get(&elog_pool, POOL_HIGH); + else + buf = pool_get(&elog_pool, POOL_NORMAL); + + unlock(&elog_lock); + return buf; +} + +/* Reporting of error via struct errorlog */ +struct errorlog *opal_elog_create(struct opal_err_info *e_info, uint32_t tag) +{ + struct errorlog *buf; + + buf = get_write_buffer(e_info->sev); + if (buf) { + buf->error_event_type = e_info->err_type; + buf->component_id = e_info->cmp_id; + buf->subsystem_id = e_info->subsystem; + buf->event_severity = e_info->sev; + buf->event_subtype = e_info->event_subtype; + buf->reason_code = e_info->reason_code; + buf->elog_origin = ORG_SAPPHIRE; + + lock(&elog_lock); + buf->plid = ++sapphire_elog_id; + unlock(&elog_lock); + + /* Initialise the first user dump section */ + log_add_section(buf, tag); + } + + return buf; +} + +/* Add a new user data section to an existing error log */ +void log_add_section(struct errorlog *buf, uint32_t tag) +{ + size_t size = sizeof(struct elog_user_data_section) - 1; + struct elog_user_data_section *tmp; + + if (!buf) { + prerror("ELOG: Cannot add user data section. " + "Buffer is invalid\n"); + return; + } + + if ((buf->user_section_size + size) > OPAL_LOG_MAX_DUMP) { + prerror("ELOG: Size of dump data overruns buffer\n"); + return; + } + + tmp = (struct elog_user_data_section *)(buf->user_data_dump + + buf->user_section_size); + /* Use DESC if no other tag provided */ + tmp->tag = tag ? cpu_to_be32(tag) : cpu_to_be32(OPAL_ELOG_SEC_DESC); + tmp->size = cpu_to_be16(size); + + buf->user_section_size += size; + buf->user_section_count++; +} + +void opal_elog_complete(struct errorlog *buf, bool success) +{ + if (!success) + printf("Unable to log error\n"); + + lock(&elog_lock); + pool_free_object(&elog_pool, buf); + unlock(&elog_lock); +} + +void log_commit(struct errorlog *elog) +{ + int rc; + + if (!elog) + return; + + if (platform.elog_commit) { + rc = platform.elog_commit(elog); + if (rc) + prerror("ELOG: Platform commit error %d\n", rc); + + return; + } + + opal_elog_complete(elog, false); +} + +void log_append_data(struct errorlog *buf, unsigned char *data, uint16_t size) +{ + struct elog_user_data_section *section; + uint8_t n_sections; + char *buffer; + uint16_t ssize; + + if (!buf) { + prerror("ELOG: Cannot update user data. Buffer is invalid\n"); + return; + } + + if ((buf->user_section_size + size) > OPAL_LOG_MAX_DUMP) { + prerror("ELOG: Size of dump data overruns buffer\n"); + return; + } + + /* Step through user sections to find latest dump section */ + buffer = buf->user_data_dump; + n_sections = buf->user_section_count; + if (!n_sections) { + prerror("ELOG: User section invalid\n"); + return; + } + + while (--n_sections) { + section = (struct elog_user_data_section *)buffer; + buffer += be16_to_cpu(section->size); + } + + section = (struct elog_user_data_section *)buffer; + ssize = be16_to_cpu(section->size); + buffer += ssize; + memcpy(buffer, data, size); + section->size = cpu_to_be16(ssize + size); + buf->user_section_size += size; +} + +void log_append_msg(struct errorlog *buf, const char *fmt, ...) +{ + char err_msg[250]; + va_list list; + + if (!buf) { + prerror("Tried to append log to NULL buffer\n"); + return; + } + + va_start(list, fmt); + vsnprintf(err_msg, sizeof(err_msg), fmt, list); + va_end(list); + + /* Log the error on to Sapphire console */ + prerror("%s", err_msg); + + log_append_data(buf, err_msg, strlen(err_msg)); +} + +uint32_t log_simple_error(struct opal_err_info *e_info, const char *fmt, ...) +{ + struct errorlog *buf; + va_list list; + char err_msg[250]; + + va_start(list, fmt); + vsnprintf(err_msg, sizeof(err_msg), fmt, list); + va_end(list); + + /* Log the error on to Sapphire console */ + prerror("%s", err_msg); + + buf = opal_elog_create(e_info, 0); + if (buf == NULL) { + prerror("ELOG: Error getting buffer to log error\n"); + return -1; + } + + log_append_data(buf, err_msg, strlen(err_msg)); + log_commit(buf); + + return buf->plid; +} + +int elog_init(void) +{ + /* Pre-allocate memory for records */ + if (pool_init(&elog_pool, sizeof(struct errorlog), + ELOG_WRITE_MAX_RECORD, 1)) + return OPAL_RESOURCE; + + elog_available = true; + return 0; +} diff --git a/roms/skiboot/core/exceptions.c b/roms/skiboot/core/exceptions.c new file mode 100644 index 000000000..389548d16 --- /dev/null +++ b/roms/skiboot/core/exceptions.c @@ -0,0 +1,233 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Deal with exceptions when in OPAL. + * + * Copyright 2013-2014 IBM Corp. + */ + +#include <skiboot.h> +#include <stack.h> +#include <opal.h> +#include <processor.h> +#include <cpu.h> +#include <ras.h> + +#define REG "%016llx" +#define REG32 "%08x" +#define REGS_PER_LINE 4 + +static void dump_regs(struct stack_frame *stack) +{ + unsigned int i; + + prerror("CFAR : "REG" MSR : "REG"\n", stack->cfar, stack->msr); + prerror("SRR0 : "REG" SRR1 : "REG"\n", stack->srr0, stack->srr1); + prerror("HSRR0: "REG" HSRR1: "REG"\n", stack->hsrr0, stack->hsrr1); + prerror("DSISR: "REG32" DAR : "REG"\n", stack->dsisr, stack->dar); + prerror("LR : "REG" CTR : "REG"\n", stack->lr, stack->ctr); + prerror("CR : "REG32" XER : "REG32"\n", stack->cr, stack->xer); + for (i = 0; i < 16; i++) + prerror("GPR%02d: "REG" GPR%02d: "REG"\n", + i, stack->gpr[i], i + 16, stack->gpr[i + 16]); +} + +#define EXCEPTION_MAX_STR 320 + +static void handle_mce(struct stack_frame *stack, uint64_t nip, uint64_t msr, bool *fatal) +{ + uint64_t mce_flags, mce_addr; + const char *mce_err; + const char *mce_fix = NULL; + char buf[EXCEPTION_MAX_STR]; + size_t l; + + decode_mce(stack->srr0, stack->srr1, stack->dsisr, stack->dar, + &mce_flags, &mce_err, &mce_addr); + + /* Try to recover. */ + if (mce_flags & MCE_ERAT_ERROR) { + /* Real-mode still uses ERAT, flush transient bitflips */ + flush_erat(); + mce_fix = "ERAT flush"; + + } else { + *fatal = true; + } + + prerror("***********************************************\n"); + l = 0; + l += snprintf(buf + l, EXCEPTION_MAX_STR - l, + "%s MCE at "REG" ", *fatal ? "Fatal" : "Non-fatal", nip); + l += snprintf_symbol(buf + l, EXCEPTION_MAX_STR - l, nip); + l += snprintf(buf + l, EXCEPTION_MAX_STR - l, " MSR "REG, msr); + prerror("%s\n", buf); + + l = 0; + l += snprintf(buf + l, EXCEPTION_MAX_STR - l, + "Cause: %s", mce_err); + prerror("%s\n", buf); + if (mce_flags & MCE_INVOLVED_EA) { + l = 0; + l += snprintf(buf + l, EXCEPTION_MAX_STR - l, + "Effective address: 0x%016llx", mce_addr); + prerror("%s\n", buf); + } + + if (!*fatal) { + l = 0; + l += snprintf(buf + l, EXCEPTION_MAX_STR - l, + "Attempting recovery: %s", mce_fix); + prerror("%s\n", buf); + } +} + +void exception_entry(struct stack_frame *stack) +{ + bool fatal = false; + bool hv; + uint64_t nip; + uint64_t msr; + char buf[EXCEPTION_MAX_STR]; + size_t l; + + switch (stack->type) { + case 0x500: + case 0x980: + case 0xe00: + case 0xe20: + case 0xe40: + case 0xe60: + case 0xe80: + case 0xea0: + case 0xf80: + hv = true; + break; + default: + hv = false; + break; + } + + if (hv) { + nip = stack->hsrr0; + msr = stack->hsrr1; + } else { + nip = stack->srr0; + msr = stack->srr1; + } + stack->msr = msr; + stack->pc = nip; + + if (!(msr & MSR_RI)) + fatal = true; + + l = 0; + switch (stack->type) { + case 0x100: + prerror("***********************************************\n"); + if (fatal) { + l += snprintf(buf + l, EXCEPTION_MAX_STR - l, + "Fatal System Reset at "REG" ", nip); + } else { + l += snprintf(buf + l, EXCEPTION_MAX_STR - l, + "System Reset at "REG" ", nip); + } + break; + + case 0x200: + handle_mce(stack, nip, msr, &fatal); + goto no_symbol; + + case 0x700: { + struct trap_table_entry *tte; + + fatal = true; + prerror("***********************************************\n"); + for (tte = __trap_table_start; tte < __trap_table_end; tte++) { + if (tte->address == nip) { + prerror("< %s >\n", tte->message); + prerror(" .\n"); + prerror(" .\n"); + prerror(" .\n"); + prerror(" OO__)\n"); + prerror(" <\"__/\n"); + prerror(" ^ ^\n"); + break; + } + } + l += snprintf(buf + l, EXCEPTION_MAX_STR - l, + "Fatal TRAP at "REG" ", nip); + l += snprintf_symbol(buf + l, EXCEPTION_MAX_STR - l, nip); + l += snprintf(buf + l, EXCEPTION_MAX_STR - l, " MSR "REG, msr); + prerror("%s\n", buf); + dump_regs(stack); + backtrace_r1((uint64_t)stack); + if (platform.terminate) + platform.terminate(buf); + for (;;) ; + break; } + + default: + fatal = true; + prerror("***********************************************\n"); + l += snprintf(buf + l, EXCEPTION_MAX_STR - l, + "Fatal Exception 0x%llx at "REG" ", stack->type, nip); + break; + } + l += snprintf_symbol(buf + l, EXCEPTION_MAX_STR - l, nip); + l += snprintf(buf + l, EXCEPTION_MAX_STR - l, " MSR "REG, msr); + prerror("%s\n", buf); +no_symbol: + dump_regs(stack); + backtrace_r1((uint64_t)stack); + if (fatal) { + if (platform.terminate) + platform.terminate(buf); + for (;;) ; + } + + if (hv) { + /* Set up for SRR return */ + stack->srr0 = nip; + stack->srr1 = msr; + } +} + +void exception_entry_pm_sreset(void) +{ + char buf[EXCEPTION_MAX_STR]; + size_t l; + + prerror("***********************************************\n"); + l = 0; + l += snprintf(buf + l, EXCEPTION_MAX_STR - l, + "System Reset in sleep"); + prerror("%s\n", buf); + backtrace(); +} + +void __noreturn exception_entry_pm_mce(void) +{ + char buf[EXCEPTION_MAX_STR]; + size_t l; + + prerror("***********************************************\n"); + l = 0; + l += snprintf(buf + l, EXCEPTION_MAX_STR - l, + "Fatal MCE in sleep"); + prerror("%s\n", buf); + prerror("SRR0 : "REG" SRR1 : "REG"\n", + (uint64_t)mfspr(SPR_SRR0), (uint64_t)mfspr(SPR_SRR1)); + prerror("DSISR: "REG32" DAR : "REG"\n", + (uint32_t)mfspr(SPR_DSISR), (uint64_t)mfspr(SPR_DAR)); + abort(); +} + +static int64_t opal_register_exc_handler(uint64_t opal_exception __unused, + uint64_t handler_address __unused, + uint64_t glue_cache_line __unused) +{ + /* This interface is deprecated */ + return OPAL_UNSUPPORTED; +} +opal_call(OPAL_REGISTER_OPAL_EXCEPTION_HANDLER, opal_register_exc_handler, 3); + diff --git a/roms/skiboot/core/fast-reboot.c b/roms/skiboot/core/fast-reboot.c new file mode 100644 index 000000000..9f92525a9 --- /dev/null +++ b/roms/skiboot/core/fast-reboot.c @@ -0,0 +1,467 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Full IPL is slow, let's cheat! + * + * Copyright 2013-2019 IBM Corp. + */ + +#include <skiboot.h> +#include <cpu.h> +#include <console.h> +#include <fsp.h> +#include <psi.h> +#include <opal.h> +#include <mem_region.h> +#include <xscom.h> +#include <interrupts.h> +#include <cec.h> +#include <timebase.h> +#include <pci.h> +#include <xive.h> +#include <chip.h> +#include <chiptod.h> +#include <ipmi.h> +#include <direct-controls.h> +#include <nvram.h> + +/* Flag tested by the OPAL entry code */ +static volatile bool fast_boot_release; +static volatile bool spr_set_release; +static volatile bool nmi_mce_release; + +static void wait_on(volatile bool *cond) +{ + sync(); + if (!*cond) { + smt_lowest(); + while (!*cond) + barrier(); + smt_medium(); + } + sync(); +} + +static bool cpu_state_wait_all_others(enum cpu_thread_state state, + unsigned long timeout_tb) +{ + struct cpu_thread *cpu; + unsigned long end = mftb() + timeout_tb; + + sync(); + for_each_ungarded_cpu(cpu) { + if (cpu == this_cpu()) + continue; + + if (cpu->state != state) { + smt_lowest(); + while (cpu->state != state) { + barrier(); + + if (timeout_tb && (tb_compare(mftb(), end) == TB_AAFTERB)) { + smt_medium(); + return false; + } + } + smt_medium(); + } + } + sync(); + + return true; +} + +static const char *fast_reboot_disabled = NULL; + +void disable_fast_reboot(const char *reason) +{ + if (fast_reboot_disabled) + return; + + prlog(PR_NOTICE, "RESET: Fast reboot disabled: %s\n", reason); + fast_reboot_disabled = reason; +} + +void add_fast_reboot_dt_entries(void) +{ + dt_check_del_prop(opal_node, "fast-reboot"); + + if (fast_reboot_disabled) { + dt_add_property_string(opal_node, "fast-reboot", fast_reboot_disabled); + } else { + dt_add_property_string(opal_node, "fast-reboot", "okay"); + } +} + +/* + * This is called by the reboot CPU after all other CPUs have been + * quiesced and stopped, to perform various sanity checks on firmware + * data (and potentially hardware), to determine whether the fast + * reboot should go ahead. + */ +static bool fast_reboot_sanity_check(void) +{ + if (!mem_check_all()) { + disable_fast_reboot("Inconsistent firmware data"); + return false; + } + + if (!verify_romem()) { + disable_fast_reboot("Inconsistent firmware romem checksum"); + return false; + } + + return true; +} + +void fast_reboot(void) +{ + static int fast_reboot_count = 0; + + if (chip_quirk(QUIRK_NO_DIRECT_CTL)) { + prlog(PR_DEBUG, + "RESET: Fast reboot disabled by quirk\n"); + return; + } + + /* + * Ensure all other CPUs have left OPAL calls. + */ + if (!opal_quiesce(QUIESCE_HOLD, -1)) { + disable_fast_reboot("OPAL quiesce timeout"); + return; + } + + if (fast_reboot_disabled && + nvram_query_eq_dangerous("force-fast-reset", "1")) { + /* Do fast reboot even if it's been disabled */ + prlog(PR_NOTICE, "RESET: Ignoring fast reboot disabled: %s\n", + fast_reboot_disabled); + } else if (fast_reboot_disabled) { + prlog(PR_NOTICE, "RESET: Fast reboot disabled: %s\n", + fast_reboot_disabled); + opal_quiesce(QUIESCE_RESUME, -1); + return; + } + + prlog(PR_NOTICE, "RESET: Initiating fast reboot %d...\n", ++fast_reboot_count); + fast_boot_release = false; + spr_set_release = false; + nmi_mce_release = false; + sync(); + + /* Put everybody in stop except myself */ + if (sreset_all_prepare()) { + prlog(PR_NOTICE, "RESET: Fast reboot failed to prepare " + "secondaries for system reset\n"); + opal_quiesce(QUIESCE_RESUME, -1); + return; + } + + if (!fast_reboot_sanity_check()) { + opal_quiesce(QUIESCE_RESUME, -1); + return; + } + + cpu_set_sreset_enable(false); + cpu_set_ipi_enable(false); + + /* + * The fast reboot sreset vector has FIXUP_ENDIAN, so secondaries can + * cope with a wrong HILE setting. + */ + copy_sreset_vector_fast_reboot(); + + /* + * There is no point clearing special wakeup or un-quiesce due to + * failure after this point, because we will be going to full IPL. + * Less cleanup work means less opportunity to fail. + */ + + /* Send everyone else to 0x100 */ + if (sreset_all_others() != OPAL_SUCCESS) { + prlog(PR_NOTICE, "RESET: Fast reboot failed to system reset " + "secondaries\n"); + return; + } + + /* Ensure all the sresets get through */ + if (!cpu_state_wait_all_others(cpu_state_fast_reboot_entry, msecs_to_tb(1000))) { + prlog(PR_NOTICE, "RESET: Fast reboot timed out waiting for " + "secondaries to call in\n"); + return; + } + + prlog(PR_DEBUG, "RESET: Releasing special wakeups...\n"); + sreset_all_finish(); + + /* This resets our quiesce state ready to enter the new kernel. */ + opal_quiesce(QUIESCE_RESUME_FAST_REBOOT, -1); + + console_complete_flush(); + + mtmsrd(0, 1); /* Clear MSR[RI] for 0x100 reset */ + asm volatile("ba 0x100\n\t" : : : "memory"); + for (;;) + ; +} + +void __noreturn enter_nap(void); + +static void check_split_core(void) +{ + struct cpu_thread *cpu; + u64 mask, hid0; + + hid0 = mfspr(SPR_HID0); + mask = SPR_HID0_POWER8_4LPARMODE | SPR_HID0_POWER8_2LPARMODE; + + if ((hid0 & mask) == 0) + return; + + prlog(PR_INFO, "RESET: CPU 0x%04x is split !\n", this_cpu()->pir); + + /* If it's a secondary thread, just send it to nap */ + if (this_cpu()->pir & 7) { + /* Prepare to be woken up */ + icp_prep_for_pm(); + /* Setup LPCR to wakeup on external interrupts only */ + mtspr(SPR_LPCR, ((mfspr(SPR_LPCR) & ~SPR_LPCR_P8_PECE) | + SPR_LPCR_P8_PECE2)); + isync(); + /* Go to nap (doesn't return) */ + enter_nap(); + } + + prlog(PR_INFO, "RESET: Primary, unsplitting... \n"); + + /* Trigger unsplit operation and update SLW image */ + hid0 &= ~SPR_HID0_POWER8_DYNLPARDIS; + set_hid0(hid0); + opal_slw_set_reg(this_cpu()->pir, SPR_HID0, hid0); + + /* Wait for unsplit */ + while (mfspr(SPR_HID0) & mask) + cpu_relax(); + + /* Now the guys are sleeping, wake'em up. They will come back + * via reset and continue the fast reboot process normally. + * No need to wait. + */ + prlog(PR_INFO, "RESET: Waking unsplit secondaries... \n"); + + for_each_cpu(cpu) { + if (!cpu_is_sibling(cpu, this_cpu()) || (cpu == this_cpu())) + continue; + icp_kick_cpu(cpu); + } +} + +static void cleanup_cpu_state(void) +{ + struct cpu_thread *cpu = this_cpu(); + + if (proc_gen == proc_gen_p9) + xive_cpu_reset(); + else if (proc_gen == proc_gen_p10) + xive2_cpu_reset(); + + /* Per core cleanup */ + if (cpu_is_thread0(cpu) || cpu_is_core_chiplet_primary(cpu)) { + /* Shared SPRs whacked back to normal */ + + /* XXX Update the SLW copies ! Also dbl check HIDs etc... */ + init_shared_sprs(); + + if (proc_gen == proc_gen_p8) { + /* If somebody was in fast_sleep, we may have a + * workaround to undo + */ + if (cpu->in_fast_sleep) { + prlog(PR_DEBUG, "RESET: CPU 0x%04x in fast sleep" + " undoing workarounds...\n", cpu->pir); + fast_sleep_exit(); + } + + /* The TLB surely contains garbage. + * P9 clears TLBs in cpu_fast_reboot_complete + */ + cleanup_local_tlb(); + } + + /* And we might have lost TB sync */ + chiptod_wakeup_resync(); + } + + /* Per-thread additional cleanup */ + init_replicated_sprs(); + + // XXX Cleanup SLW, check HIDs ... +} + +/* Entry from asm after a fast reset */ +void __noreturn fast_reboot_entry(void); + +void __noreturn fast_reboot_entry(void) +{ + struct cpu_thread *cpu = this_cpu(); + + if (proc_gen == proc_gen_p8) { + /* We reset our ICP first ! Otherwise we might get stray + * interrupts when unsplitting + */ + reset_cpu_icp(); + + /* If we are split, we need to unsplit. Since that can send us + * to NAP, which will come back via reset, we do it now + */ + check_split_core(); + } + + /* Until SPRs (notably HID[HILE]) are set and new exception vectors + * installed, nobody should take machine checks. Try to do minimal + * work between these points. + */ + disable_machine_check(); + mtmsrd(0, 1); /* Clear RI */ + + sync(); + cpu->state = cpu_state_fast_reboot_entry; + sync(); + if (cpu == boot_cpu) { + cpu_state_wait_all_others(cpu_state_fast_reboot_entry, 0); + spr_set_release = true; + } else { + wait_on(&spr_set_release); + } + + + /* Reset SPRs */ + if (cpu_is_thread0(cpu)) + init_shared_sprs(); + init_replicated_sprs(); + + if (cpu == boot_cpu) { + /* Restore skiboot vectors */ + copy_exception_vectors(); + copy_sreset_vector(); + patch_traps(true); + } + + /* Must wait for others to because shared SPRs like HID0 are only set + * by thread0, so can't enable machine checks until those have been + * set. + */ + sync(); + cpu->state = cpu_state_present; + sync(); + if (cpu == boot_cpu) { + cpu_state_wait_all_others(cpu_state_present, 0); + nmi_mce_release = true; + } else { + wait_on(&nmi_mce_release); + } + + /* At this point skiboot exception vectors are in place and all + * cores/threads have SPRs set for running skiboot. + */ + enable_machine_check(); + mtmsrd(MSR_RI, 1); + + cleanup_cpu_state(); + + prlog(PR_DEBUG, "RESET: CPU 0x%04x reset in\n", cpu->pir); + + /* The original boot CPU (not the fast reboot initiator) takes + * command. Secondaries wait for the signal then go to their secondary + * entry point. + */ + if (cpu != boot_cpu) { + wait_on(&fast_boot_release); + + __secondary_cpu_entry(); + } + + if (proc_gen == proc_gen_p9) + xive_reset(); + else if (proc_gen == proc_gen_p10) + xive2_reset(); + + /* Let the CPU layer do some last minute global cleanups */ + cpu_fast_reboot_complete(); + + /* We can now do NAP mode */ + cpu_set_sreset_enable(true); + cpu_set_ipi_enable(true); + + prlog(PR_INFO, "RESET: Releasing secondaries...\n"); + + /* Release everybody */ + sync(); + fast_boot_release = true; + sync(); + cpu->state = cpu_state_active; + sync(); + + /* Wait for them to respond */ + cpu_state_wait_all_others(cpu_state_active, 0); + + sync(); + + prlog(PR_INFO, "RESET: All done, cleaning up...\n"); + + /* Clear release flag for next time */ + fast_boot_release = false; + + if (!chip_quirk(QUIRK_MAMBO_CALLOUTS)) { + /* + * mem_region_clear_unused avoids these preload regions + * so it can run along side image preloading. Clear these + * regions now to catch anything not overwritten by + * preload. + * + * Mambo may have embedded payload here, so don't clear + * it at all. + */ + memset(KERNEL_LOAD_BASE, 0, KERNEL_LOAD_SIZE); + memset(INITRAMFS_LOAD_BASE, 0, INITRAMFS_LOAD_SIZE); + } + + /* Start preloading kernel and ramdisk */ + start_preload_kernel(); + + /* Start clearing memory */ + start_mem_region_clear_unused(); + + if (platform.fast_reboot_init) + platform.fast_reboot_init(); + + if (proc_gen == proc_gen_p8) { + /* XXX */ + /* Reset/EOI the PSI interrupt */ + psi_irq_reset(); + } + + /* update pci nvram settings */ + pci_nvram_init(); + + /* Remove all PCI devices */ + if (pci_reset()) { + prlog(PR_NOTICE, "RESET: Fast reboot failed to reset PCI\n"); + + /* + * Can't return to caller here because we're past no-return. + * Attempt an IPL here which is what the caller would do. + */ + if (platform.cec_reboot) + platform.cec_reboot(); + for (;;) + ; + } + + ipmi_set_fw_progress_sensor(IPMI_FW_PCI_INIT); + + wait_mem_region_clear_unused(); + + /* Load and boot payload */ + load_and_boot_kernel(true); +} diff --git a/roms/skiboot/core/fdt.c b/roms/skiboot/core/fdt.c new file mode 100644 index 000000000..463dc6912 --- /dev/null +++ b/roms/skiboot/core/fdt.c @@ -0,0 +1,258 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Produce and consume flattened device trees + * + * Copyright 2013-2019 IBM Corp. + */ + +#include <skiboot.h> +#include <stdarg.h> +#include <libfdt.h> +#include <device.h> +#include <chip.h> +#include <cpu.h> +#include <opal.h> +#include <interrupts.h> +#include <fsp.h> +#include <cec.h> +#include <vpd.h> +#include <ccan/str/str.h> + +static int fdt_error; + +#undef DEBUG_FDT +#ifdef DEBUG_FDT +#define FDT_DBG(fmt, a...) prlog(PR_DEBUG, "FDT: " fmt, ##a) +#else +#define FDT_DBG(fmt, a...) +#endif + +static void __save_err(int err, const char *str) +{ + FDT_DBG("rc: %d from \"%s\"\n", err, str); + if (err && !fdt_error) { + prerror("FDT: Error %d from \"%s\"\n", err, str); + fdt_error = err; + } +} + +#define save_err(...) __save_err(__VA_ARGS__, #__VA_ARGS__) + +static void dt_property_cell(void *fdt, const char *name, u32 cell) +{ + save_err(fdt_property_cell(fdt, name, cell)); +} + +static void dt_begin_node(void *fdt, const struct dt_node *dn) +{ + save_err(fdt_begin_node(fdt, dn->name)); + + dt_property_cell(fdt, "phandle", dn->phandle); +} + +static void dt_property(void *fdt, const struct dt_property *p) +{ + save_err(fdt_property(fdt, p->name, p->prop, p->len)); +} + +static void dt_end_node(void *fdt) +{ + save_err(fdt_end_node(fdt)); +} + +#ifdef DEBUG_FDT +static void dump_fdt(void *fdt) +{ + int i, off, depth, err; + + prlog(PR_INFO, "Device tree %u@%p\n", fdt_totalsize(fdt), fdt); + err = fdt_check_header(fdt); + if (err) { + prerror("fdt_check_header: %s\n", fdt_strerror(err)); + return; + } + prlog(PR_INFO, "fdt_check_header passed\n"); + + prlog(PR_INFO, "fdt_num_mem_rsv = %u\n", fdt_num_mem_rsv(fdt)); + for (i = 0; i < fdt_num_mem_rsv(fdt); i++) { + u64 addr, size; + + err = fdt_get_mem_rsv(fdt, i, &addr, &size); + if (err) { + prlog(PR_INFO, " ERR %s\n", fdt_strerror(err)); + return; + } + prlog(PR_INFO, " mem_rsv[%i] = %lu@%#lx\n", + i, (long)addr, (long)size); + } + + for (off = fdt_next_node(fdt, 0, &depth); + off > 0; + off = fdt_next_node(fdt, off, &depth)) { + int len; + const char *name; + + name = fdt_get_name(fdt, off, &len); + if (!name) { + prerror("fdt: offset %i no name!\n", off); + return; + } + prlog(PR_INFO, "name: %s [%u]\n", name, off); + } +} +#endif + +static void flatten_dt_properties(void *fdt, const struct dt_node *dn) +{ + const struct dt_property *p; + + list_for_each(&dn->properties, p, list) { + if (strstarts(p->name, DT_PRIVATE)) + continue; + + FDT_DBG(" prop: %s size: %ld\n", p->name, p->len); + dt_property(fdt, p); + } +} + +static void flatten_dt_node(void *fdt, const struct dt_node *root, + bool exclusive) +{ + const struct dt_node *i; + + if (!exclusive) { + FDT_DBG("node: %s\n", root->name); + dt_begin_node(fdt, root); + flatten_dt_properties(fdt, root); + } + + list_for_each(&root->children, i, list) + flatten_dt_node(fdt, i, false); + + if (!exclusive) + dt_end_node(fdt); +} + +static void create_dtb_reservemap(void *fdt, const struct dt_node *root) +{ + uint64_t base, size; + const __be64 *ranges; + const struct dt_property *prop; + int i; + + /* Duplicate the reserved-ranges property into the fdt reservemap */ + prop = dt_find_property(root, "reserved-ranges"); + if (prop) { + ranges = (const void *)prop->prop; + + for (i = 0; i < prop->len / (sizeof(uint64_t) * 2); i++) { + base = be64_to_cpu(*(ranges++)); + size = be64_to_cpu(*(ranges++)); + save_err(fdt_add_reservemap_entry(fdt, base, size)); + } + } + + save_err(fdt_finish_reservemap(fdt)); +} + +static int __create_dtb(void *fdt, size_t len, + const struct dt_node *root, + bool exclusive) +{ + if (chip_quirk(QUIRK_SLOW_SIM)) + save_err(fdt_create_with_flags(fdt, len, FDT_CREATE_FLAG_NO_NAME_DEDUP)); + else + save_err(fdt_create_with_flags(fdt, len, 0)); + if (fdt_error) + goto err; + + if (root == dt_root && !exclusive) + create_dtb_reservemap(fdt, root); + else + save_err(fdt_finish_reservemap(fdt)); + + flatten_dt_node(fdt, root, exclusive); + + save_err(fdt_finish(fdt)); + if (fdt_error) { +err: + prerror("dtb: error %s\n", fdt_strerror(fdt_error)); + return fdt_error; + } + +#ifdef DEBUG_FDT + dump_fdt(fdt); +#endif + return 0; +} + +void *create_dtb(const struct dt_node *root, bool exclusive) +{ + void *fdt = NULL; + size_t len = DEVICE_TREE_MAX_SIZE; + uint32_t old_last_phandle = get_last_phandle(); + int ret; + + do { + set_last_phandle(old_last_phandle); + fdt_error = 0; + fdt = malloc(len); + if (!fdt) { + prerror("dtb: could not malloc %lu\n", (long)len); + return NULL; + } + + ret = __create_dtb(fdt, len, root, exclusive); + if (ret) { + free(fdt); + fdt = NULL; + } + + len *= 2; + } while (ret == -FDT_ERR_NOSPACE); + + return fdt; +} + +static int64_t opal_get_device_tree(uint32_t phandle, + uint64_t buf, uint64_t len) +{ + struct dt_node *root; + void *fdt = (void *)buf; + uint32_t old_last_phandle; + int64_t totalsize; + int ret; + + if (!opal_addr_valid(fdt)) + return OPAL_PARAMETER; + + root = dt_find_by_phandle(dt_root, phandle); + if (!root) + return OPAL_PARAMETER; + + if (!fdt) { + fdt = create_dtb(root, true); + if (!fdt) + return OPAL_INTERNAL_ERROR; + totalsize = fdt_totalsize(fdt); + free(fdt); + return totalsize; + } + + if (!len) + return OPAL_PARAMETER; + + fdt_error = 0; + old_last_phandle = get_last_phandle(); + ret = __create_dtb(fdt, len, root, true); + if (ret) { + set_last_phandle(old_last_phandle); + if (ret == -FDT_ERR_NOSPACE) + return OPAL_NO_MEM; + + return OPAL_EMPTY; + } + + return OPAL_SUCCESS; +} +opal_call(OPAL_GET_DEVICE_TREE, opal_get_device_tree, 3); diff --git a/roms/skiboot/core/flash-firmware-versions.c b/roms/skiboot/core/flash-firmware-versions.c new file mode 100644 index 000000000..975ac6aff --- /dev/null +++ b/roms/skiboot/core/flash-firmware-versions.c @@ -0,0 +1,164 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Parse VERSION partition, add to device tree + * + * Copyright 2013-2018 IBM Corp. + */ + +#include <skiboot.h> +#include <device.h> +#include <opal.h> +#include <libstb/secureboot.h> +#include <libstb/trustedboot.h> + +/* ibm,firmware-versions support */ +static char *version_buf; +static size_t version_buf_size = 0x2000; + +static void __flash_dt_add_fw_version(struct dt_node *fw_version, char* data) +{ + static bool first = true; + char *prop; + int version_len, i; + int len = strlen(data); + const char *skiboot_version; + const char * version_str[] = {"open-power", "buildroot", "skiboot", + "hostboot-binaries", "hostboot", "linux", + "petitboot", "occ", "capp-ucode", "sbe", + "machine-xml", "hcode"}; + + if (first) { + first = false; + + /* Increment past "key-" */ + if (memcmp(data, "open-power", strlen("open-power")) == 0) + prop = data + strlen("open-power"); + else + prop = strchr(data, '-'); + if (!prop) { + prlog(PR_DEBUG, + "FLASH: Invalid fw version format (%s)\n", data); + return; + } + prop++; + + dt_add_property_string(fw_version, "version", prop); + return; + } + + /* + * PNOR version strings are not easily consumable. Split them into + * property, value. + * + * Example input from PNOR : + * "open-power-firestone-v1.8" + * "linux-4.4.6-openpower1-8420e0f" + * + * Desired output in device tree: + * open-power = "firestone-v1.8"; + * linux = "4.4.6-openpower1-8420e0f"; + */ + for(i = 0; i < ARRAY_SIZE(version_str); i++) + { + version_len = strlen(version_str[i]); + if (len < version_len) + continue; + + if (memcmp(data, version_str[i], version_len) != 0) + continue; + + /* Found a match, add property */ + if (dt_find_property(fw_version, version_str[i])) + continue; + + /* Increment past "key-" */ + prop = data + version_len + 1; + dt_add_property_string(fw_version, version_str[i], prop); + + /* Sanity check against what Skiboot thinks its version is. */ + if (strncmp(version_str[i], "skiboot", + strlen("skiboot")) == 0) { + /* + * If Skiboot was built with Buildroot its version may + * include a 'skiboot-' prefix; ignore it. + */ + if (strncmp(version, "skiboot-", + strlen("skiboot-")) == 0) + skiboot_version = version + strlen("skiboot-"); + else + skiboot_version = version; + if (strncmp(prop, skiboot_version, + strlen(skiboot_version)) != 0) + prlog(PR_WARNING, "WARNING! Skiboot version does not match VERSION partition!\n"); + } + } +} + +void flash_dt_add_fw_version(void) +{ + uint8_t version_data[80]; + int rc; + int numbytes = 0, i = 0; + struct dt_node *fw_version; + + if (version_buf == NULL) + return; + + rc = wait_for_resource_loaded(RESOURCE_ID_VERSION, RESOURCE_SUBID_NONE); + if (rc != OPAL_SUCCESS) { + prlog(PR_WARNING, "FLASH: Failed to load VERSION data\n"); + free(version_buf); + return; + } + + fw_version = dt_new(dt_root, "ibm,firmware-versions"); + assert(fw_version); + + if (stb_is_container(version_buf, version_buf_size)) + numbytes += SECURE_BOOT_HEADERS_SIZE; + for ( ; (numbytes < version_buf_size) && version_buf[numbytes]; numbytes++) { + if (version_buf[numbytes] == '\n') { + version_data[i] = '\0'; + __flash_dt_add_fw_version(fw_version, version_data); + memset(version_data, 0, sizeof(version_data)); + i = 0; + continue; + } else if (version_buf[numbytes] == '\t') { + continue; /* skip tabs */ + } + + version_data[i++] = version_buf[numbytes]; + if (i == sizeof(version_data)) { + prlog(PR_WARNING, "VERSION item >%lu chars, skipping\n", + sizeof(version_data)); + break; + } + } + + free(version_buf); +} + +void flash_fw_version_preload(void) +{ + int rc; + + if (proc_gen < proc_gen_p9) + return; + + prlog(PR_INFO, "FLASH: Loading VERSION section\n"); + + version_buf = malloc(version_buf_size); + if (!version_buf) { + prlog(PR_WARNING, "FLASH: Failed to allocate memory\n"); + return; + } + + rc = start_preload_resource(RESOURCE_ID_VERSION, RESOURCE_SUBID_NONE, + version_buf, &version_buf_size); + if (rc != OPAL_SUCCESS) { + prlog(PR_WARNING, + "FLASH: Failed to start loading VERSION data\n"); + free(version_buf); + version_buf = NULL; + } +} diff --git a/roms/skiboot/core/flash-subpartition.c b/roms/skiboot/core/flash-subpartition.c new file mode 100644 index 000000000..6e0fec6c3 --- /dev/null +++ b/roms/skiboot/core/flash-subpartition.c @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Parse flash sub-partitions + * + * Copyright 2013-2018 IBM Corp. + */ + +#include <skiboot.h> +#include <opal-api.h> + +struct flash_hostboot_toc { + be32 ec; + be32 offset; /* From start of header. 4K aligned */ + be32 size; +}; +#define FLASH_HOSTBOOT_TOC_MAX_ENTRIES ((FLASH_SUBPART_HEADER_SIZE - 8) \ + /sizeof(struct flash_hostboot_toc)) + +struct flash_hostboot_header { + char eyecatcher[4]; + be32 version; + struct flash_hostboot_toc toc[FLASH_HOSTBOOT_TOC_MAX_ENTRIES]; +}; + +int flash_subpart_info(void *part_header, uint32_t header_len, + uint32_t part_size, uint32_t *part_actualp, + uint32_t subid, uint32_t *offset, uint32_t *size) +{ + struct flash_hostboot_header *header; + char eyecatcher[5]; + uint32_t i, ec, o, s; + uint32_t part_actual; + bool subpart_found; + + if (!part_header || ( !offset && !size && !part_actualp)) { + prlog(PR_ERR, "FLASH: invalid parameters: ph %p of %p sz %p " + "tsz %p\n", part_header, offset, size, part_actualp); + return OPAL_PARAMETER; + } + + if (header_len < FLASH_SUBPART_HEADER_SIZE) { + prlog(PR_ERR, "FLASH: subpartition header too small 0x%x\n", + header_len); + return OPAL_PARAMETER; + } + + header = (struct flash_hostboot_header*) part_header; + + /* Perform sanity */ + i = be32_to_cpu(header->version); + if (i != 1) { + prerror("FLASH: flash subpartition TOC version unknown %i\n", i); + return OPAL_RESOURCE; + } + + /* NULL terminate eyecatcher */ + strncpy(eyecatcher, header->eyecatcher, 4); + eyecatcher[4] = '\0'; + prlog(PR_DEBUG, "FLASH: flash subpartition eyecatcher %s\n", + eyecatcher); + + subpart_found = false; + part_actual = 0; + for (i = 0; i < FLASH_HOSTBOOT_TOC_MAX_ENTRIES; i++) { + + ec = be32_to_cpu(header->toc[i].ec); + o = be32_to_cpu(header->toc[i].offset); + s = be32_to_cpu(header->toc[i].size); + + /* Check for null terminating entry */ + if (!ec && !o && !s) + break; + + /* Sanity check the offset and size. */ + if (o + s > part_size) { + prerror("FLASH: flash subpartition too big: %i\n", i); + return OPAL_RESOURCE; + } + if (!s) { + prerror("FLASH: flash subpartition zero size: %i\n", i); + return OPAL_RESOURCE; + } + if (o < FLASH_SUBPART_HEADER_SIZE) { + prerror("FLASH: flash subpartition offset too small: " + "%i\n", i); + return OPAL_RESOURCE; + } + /* + * Subpartitions content are different, but multiple toc entries + * may point to the same subpartition. + */ + if (ALIGN_UP(o + s, FLASH_SUBPART_HEADER_SIZE) > part_actual) + part_actual = ALIGN_UP(o + s, FLASH_SUBPART_HEADER_SIZE); + + if (ec == subid) { + if (offset) + *offset += o; + if (size) + *size = s; + subpart_found = true; + } + } + if (!subpart_found && (offset || size)) { + prerror("FLASH: flash subpartition not found.\n"); + return OPAL_RESOURCE; + } + if (part_actualp) + *part_actualp = part_actual; + return OPAL_SUCCESS; +} diff --git a/roms/skiboot/core/flash.c b/roms/skiboot/core/flash.c new file mode 100644 index 000000000..8c1e788c4 --- /dev/null +++ b/roms/skiboot/core/flash.c @@ -0,0 +1,1186 @@ +// SPDX-License-Identifier: Apache-2.0 +/* + * Init, manage, read, write, and load resources from flash + * + * Copyright 2013-2019 IBM Corp. + * Copyright 2018-2019 Raptor Engineering, LLC + */ + +#define pr_fmt(fmt) "FLASH: " fmt + +#include <skiboot.h> +#include <cpu.h> +#include <lock.h> +#include <opal.h> +#include <opal-msg.h> +#include <platform.h> +#include <device.h> +#include <libflash/libflash.h> +#include <libflash/libffs.h> +#include <libflash/ipmi-hiomap.h> +#include <libflash/blocklevel.h> +#include <libflash/ecc.h> +#include <libstb/secureboot.h> +#include <libstb/trustedboot.h> +#include <libxz/xz.h> +#include <elf.h> +#include <timebase.h> + +struct flash { + struct list_node list; + bool busy; + bool no_erase; + struct blocklevel_device *bl; + uint64_t size; + uint32_t block_size; + int id; +}; + +static struct { + enum resource_id id; + uint32_t subid; + char name[PART_NAME_MAX+1]; +} part_name_map[] = { + { RESOURCE_ID_KERNEL, RESOURCE_SUBID_NONE, "BOOTKERNEL" }, + { RESOURCE_ID_INITRAMFS,RESOURCE_SUBID_NONE, "ROOTFS" }, + { RESOURCE_ID_CAPP, RESOURCE_SUBID_SUPPORTED, "CAPP" }, + { RESOURCE_ID_IMA_CATALOG, RESOURCE_SUBID_SUPPORTED, "IMA_CATALOG" }, + { RESOURCE_ID_VERSION, RESOURCE_SUBID_NONE, "VERSION" }, + { RESOURCE_ID_KERNEL_FW, RESOURCE_SUBID_NONE, "BOOTKERNFW" }, +}; + +static LIST_HEAD(flashes); +static struct flash *system_flash; + +/* Using a single lock as we only have one flash at present. */ +static struct lock flash_lock; + +/* nvram-on-flash support */ +static struct flash *nvram_flash; +static u32 nvram_offset, nvram_size; + +/* secboot-on-flash support */ +static struct flash *secboot_flash; +static u32 secboot_offset, secboot_size; + +bool flash_reserve(void) +{ + bool rc = false; + + if (!try_lock(&flash_lock)) + return false; + + if (!system_flash->busy) { + system_flash->busy = true; + rc = true; + } + unlock(&flash_lock); + + return rc; +} + +void flash_release(void) +{ + lock(&flash_lock); + system_flash->busy = false; + unlock(&flash_lock); +} + +bool flash_unregister(void) +{ + struct blocklevel_device *bl = system_flash->bl; + + if (bl->exit) + return bl->exit(bl); + + prlog(PR_NOTICE, "Unregister flash device is not supported\n"); + return true; +} + +int flash_secboot_info(uint32_t *total_size) +{ + int rc; + + lock(&flash_lock); + if (!secboot_flash) { + rc = OPAL_HARDWARE; + } else if (secboot_flash->busy) { + rc = OPAL_BUSY; + } else { + *total_size = secboot_size; + rc = OPAL_SUCCESS; + } + unlock(&flash_lock); + + return rc; +} + +int flash_secboot_read(void *dst, uint32_t src, uint32_t len) +{ + int rc; + + if (!try_lock(&flash_lock)) + return OPAL_BUSY; + + if (!secboot_flash) { + rc = OPAL_HARDWARE; + goto out; + } + + if (secboot_flash->busy) { + rc = OPAL_BUSY; + goto out; + } + + if ((src + len) > secboot_size) { + prerror("FLASH_SECBOOT: read out of bound (0x%x,0x%x)\n", + src, len); + rc = OPAL_PARAMETER; + goto out; + } + + secboot_flash->busy = true; + unlock(&flash_lock); + + rc = blocklevel_read(secboot_flash->bl, secboot_offset + src, dst, len); + + lock(&flash_lock); + secboot_flash->busy = false; +out: + unlock(&flash_lock); + return rc; +} + +int flash_secboot_write(uint32_t dst, void *src, uint32_t len) +{ + int rc; + + if (!try_lock(&flash_lock)) + return OPAL_BUSY; + + if (secboot_flash->busy) { + rc = OPAL_BUSY; + goto out; + } + + if ((dst + len) > secboot_size) { + prerror("FLASH_SECBOOT: write out of bound (0x%x,0x%x)\n", + dst, len); + rc = OPAL_PARAMETER; + goto out; + } + + secboot_flash->busy = true; + unlock(&flash_lock); + + rc = blocklevel_write(secboot_flash->bl, secboot_offset + dst, src, len); + + lock(&flash_lock); + secboot_flash->busy = false; +out: + unlock(&flash_lock); + return rc; +} + +static int flash_nvram_info(uint32_t *total_size) +{ + int rc; + + lock(&flash_lock); + if (!nvram_flash) { + rc = OPAL_HARDWARE; + } else if (nvram_flash->busy) { + rc = OPAL_BUSY; + } else { + *total_size = nvram_size; + rc = OPAL_SUCCESS; + } + unlock(&flash_lock); + + return rc; +} + +static int flash_nvram_start_read(void *dst, uint32_t src, uint32_t len) +{ + int rc; + + if (!try_lock(&flash_lock)) + return OPAL_BUSY; + + if (!nvram_flash) { + rc = OPAL_HARDWARE; + goto out; + } + + if (nvram_flash->busy) { + rc = OPAL_BUSY; + goto out; + } + + if ((src + len) > nvram_size) { + prerror("NVRAM: read out of bound (0x%x,0x%x)\n", + src, len); + rc = OPAL_PARAMETER; + goto out; + } + + nvram_flash->busy = true; + unlock(&flash_lock); + + rc = blocklevel_read(nvram_flash->bl, nvram_offset + src, dst, len); + + lock(&flash_lock); + nvram_flash->busy = false; +out: + unlock(&flash_lock); + if (!rc) + nvram_read_complete(true); + return rc; +} + +static int flash_nvram_write(uint32_t dst, void *src, uint32_t len) +{ + int rc; + + if (!try_lock(&flash_lock)) + return OPAL_BUSY; + + if (nvram_flash->busy) { + rc = OPAL_BUSY; + goto out; + } + + /* TODO: When we have async jobs for PRD, turn this into one */ + + if ((dst + len) > nvram_size) { + prerror("NVRAM: write out of bound (0x%x,0x%x)\n", + dst, len); + rc = OPAL_PARAMETER; + goto out; + } + + nvram_flash->busy = true; + unlock(&flash_lock); + + rc = blocklevel_write(nvram_flash->bl, nvram_offset + dst, src, len); + + lock(&flash_lock); + nvram_flash->busy = false; +out: + unlock(&flash_lock); + return rc; +} + + +static int flash_secboot_probe(struct flash *flash, struct ffs_handle *ffs) +{ + uint32_t start, size, part; + bool ecc; + int rc; + + prlog(PR_DEBUG, "FLASH: probing for SECBOOT\n"); + + rc = ffs_lookup_part(ffs, "SECBOOT", &part); + if (rc) { + prlog(PR_WARNING, "FLASH: no SECBOOT partition found\n"); + return OPAL_HARDWARE; + } + + rc = ffs_part_info(ffs, part, NULL, + &start, &size, NULL, &ecc); + if (rc) { + /** + * @fwts-label SECBOOTNoPartition + * @fwts-advice OPAL could not find an SECBOOT partition + * on the system flash. Check that the system flash + * has a valid partition table, and that the firmware + * build process has added a SECBOOT partition. + */ + prlog(PR_ERR, "FLASH: Can't parse ffs info for SECBOOT\n"); + return OPAL_HARDWARE; + } + + secboot_flash = flash; + secboot_offset = start; + secboot_size = ecc ? ecc_buffer_size_minus_ecc(size) : size; + + return 0; +} + +static int flash_nvram_probe(struct flash *flash, struct ffs_handle *ffs) +{ + uint32_t start, size, part; + bool ecc; + int rc; + + prlog(PR_INFO, "probing for NVRAM\n"); + + rc = ffs_lookup_part(ffs, "NVRAM", &part); + if (rc) { + prlog(PR_WARNING, "no NVRAM partition found\n"); + return OPAL_HARDWARE; + } + + rc = ffs_part_info(ffs, part, NULL, + &start, &size, NULL, &ecc); + if (rc) { + /** + * @fwts-label NVRAMNoPartition + * @fwts-advice OPAL could not find an NVRAM partition + * on the system flash. Check that the system flash + * has a valid partition table, and that the firmware + * build process has added a NVRAM partition. + */ + prlog(PR_ERR, "Can't parse ffs info for NVRAM\n"); + return OPAL_HARDWARE; + } + + nvram_flash = flash; + nvram_offset = start; + nvram_size = ecc ? ecc_buffer_size_minus_ecc(size) : size; + + platform.nvram_info = flash_nvram_info; + platform.nvram_start_read = flash_nvram_start_read; + platform.nvram_write = flash_nvram_write; + + return 0; +} + +/* core flash support */ + +static struct dt_node *flash_add_dt_node(struct flash *flash, int id) +{ + int i; + int rc; + const char *name; + bool ecc; + struct ffs_handle *ffs; + int ffs_part_num, ffs_part_start, ffs_part_size; + struct dt_node *flash_node; + struct dt_node *partition_container_node; + struct dt_node *partition_node; + + flash_node = dt_new_addr(opal_node, "flash", id); + dt_add_property_strings(flash_node, "compatible", "ibm,opal-flash"); + dt_add_property_cells(flash_node, "ibm,opal-id", id); + dt_add_property_u64(flash_node, "reg", flash->size); + dt_add_property_cells(flash_node, "ibm,flash-block-size", + flash->block_size); + if (flash->no_erase) + dt_add_property(flash_node, "no-erase", NULL, 0); + + /* we fix to 32-bits */ + dt_add_property_cells(flash_node, "#address-cells", 1); + dt_add_property_cells(flash_node, "#size-cells", 1); + + /* Add partition container node */ + partition_container_node = dt_new(flash_node, "partitions"); + dt_add_property_strings(partition_container_node, "compatible", "fixed-partitions"); + + /* we fix to 32-bits */ + dt_add_property_cells(partition_container_node, "#address-cells", 1); + dt_add_property_cells(partition_container_node, "#size-cells", 1); + + /* Add partitions */ + for (i = 0, name = NULL; i < ARRAY_SIZE(part_name_map); i++) { + name = part_name_map[i].name; + + rc = ffs_init(0, flash->size, flash->bl, &ffs, 1); + if (rc) { + prerror("Can't open ffs handle\n"); + continue; + } + + rc = ffs_lookup_part(ffs, name, &ffs_part_num); + if (rc) { + /* This is not an error per-se, some partitions + * are purposefully absent, don't spam the logs + */ + prlog(PR_DEBUG, "No %s partition\n", name); + continue; + } + rc = ffs_part_info(ffs, ffs_part_num, NULL, + &ffs_part_start, NULL, &ffs_part_size, &ecc); + if (rc) { + prerror("Failed to get %s partition info\n", name); + continue; + } + + partition_node = dt_new_addr(partition_container_node, "partition", ffs_part_start); + dt_add_property_strings(partition_node, "label", name); + dt_add_property_cells(partition_node, "reg", ffs_part_start, ffs_part_size); + if (part_name_map[i].id != RESOURCE_ID_KERNEL_FW) { + /* Mark all partitions other than the full PNOR and the boot kernel + * firmware as read only. These two partitions are the only partitions + * that are properly erase block aligned at this time. + */ + dt_add_property(partition_node, "read-only", NULL, 0); + } + } + + partition_node = dt_new_addr(partition_container_node, "partition", 0); + dt_add_property_strings(partition_node, "label", "PNOR"); + dt_add_property_cells(partition_node, "reg", 0, flash->size); + + return flash_node; +} + +static void setup_system_flash(struct flash *flash, struct dt_node *node, + const char *name, struct ffs_handle *ffs) +{ + char *path; + + if (!ffs) + return; + + if (system_flash) { + /** + * @fwts-label SystemFlashMultiple + * @fwts-advice OPAL Found multiple system flash. + * Since we've already found a system flash we are + * going to use that one but this ordering is not + * guaranteed so may change in future. + */ + prlog(PR_WARNING, "Attempted to register multiple system " + "flash: %s\n", name); + return; + } + + prlog(PR_NOTICE, "Found system flash: %s id:%i\n", + name, flash->id); + + system_flash = flash; + path = dt_get_path(node); + dt_add_property_string(dt_chosen, "ibm,system-flash", path); + free(path); + + prlog(PR_INFO, "registered system flash device %s\n", name); + + flash_nvram_probe(flash, ffs); + flash_secboot_probe(flash, ffs); +} + +static int num_flashes(void) +{ + struct flash *flash; + int i = 0; + + list_for_each(&flashes, flash, list) + i++; + + return i; +} + +int flash_register(struct blocklevel_device *bl) +{ + uint64_t size; + uint32_t block_size; + struct ffs_handle *ffs; + struct dt_node *node; + struct flash *flash; + const char *name; + int rc; + + rc = blocklevel_get_info(bl, &name, &size, &block_size); + if (rc) + return rc; + + if (!name) + name = "(unnamed)"; + + prlog(PR_INFO, "registering flash device %s " + "(size 0x%llx, blocksize 0x%x)\n", + name, size, block_size); + + flash = malloc(sizeof(struct flash)); + if (!flash) { + prlog(PR_ERR, "Error allocating flash structure\n"); + return OPAL_RESOURCE; + } + + flash->busy = false; + flash->bl = bl; + flash->no_erase = !(bl->flags & WRITE_NEED_ERASE); + flash->size = size; + flash->block_size = block_size; + flash->id = num_flashes(); + + rc = ffs_init(0, flash->size, bl, &ffs, 1); + if (rc) { + /** + * @fwts-label NoFFS + * @fwts-advice System flash isn't formatted as expected. + * This could mean several OPAL utilities do not function + * as expected. e.g. gard, pflash. + */ + prlog(PR_WARNING, "No ffs info; " + "using raw device only\n"); + ffs = NULL; + } + + node = flash_add_dt_node(flash, flash->id); + + setup_system_flash(flash, node, name, ffs); + + if (ffs) + ffs_close(ffs); + + lock(&flash_lock); + list_add(&flashes, &flash->list); + unlock(&flash_lock); + + return OPAL_SUCCESS; +} + +enum flash_op { + FLASH_OP_READ, + FLASH_OP_WRITE, + FLASH_OP_ERASE, +}; + +static int64_t opal_flash_op(enum flash_op op, uint64_t id, uint64_t offset, + uint64_t buf, uint64_t size, uint64_t token) +{ + struct flash *flash = NULL; + int rc; + + if (!try_lock(&flash_lock)) + return OPAL_BUSY; + + list_for_each(&flashes, flash, list) + if (flash->id == id) + break; + + if (flash->id != id) { + /* Couldn't find the flash */ + rc = OPAL_PARAMETER; + goto err; + } + + if (flash->busy) { + rc = OPAL_BUSY; + goto err; + } + + if (size >= flash->size || offset >= flash->size + || offset + size > flash->size) { + rc = OPAL_PARAMETER; + goto err; + } + + /* + * These ops intentionally have no smarts (ecc correction or erase + * before write) to them. + * Skiboot is simply exposing the PNOR flash to the host. + * The host is expected to understand that this is a raw flash + * device and treat it as such. + */ + switch (op) { + case FLASH_OP_READ: + rc = blocklevel_raw_read(flash->bl, offset, (void *)buf, size); + break; + case FLASH_OP_WRITE: + rc = blocklevel_raw_write(flash->bl, offset, (void *)buf, size); + break; + case FLASH_OP_ERASE: + rc = blocklevel_erase(flash->bl, offset, size); + break; + default: + assert(0); + } + + if (rc) { + rc = OPAL_HARDWARE; + goto err; + } + + unlock(&flash_lock); + + opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL, + cpu_to_be64(token), + cpu_to_be64(rc)); + + return OPAL_ASYNC_COMPLETION; + +err: + unlock(&flash_lock); + return rc; +} + +static int64_t opal_flash_read(uint64_t id, uint64_t offset, uint64_t buf, + uint64_t size, uint64_t token) +{ + if (!opal_addr_valid((void *)buf)) + return OPAL_PARAMETER; + + return opal_flash_op(FLASH_OP_READ, id, offset, buf, size, token); +} + +static int64_t opal_flash_write(uint64_t id, uint64_t offset, uint64_t buf, + uint64_t size, uint64_t token) +{ + if (!opal_addr_valid((void *)buf)) + return OPAL_PARAMETER; + + return opal_flash_op(FLASH_OP_WRITE, id, offset, buf, size, token); +} + +static int64_t opal_flash_erase(uint64_t id, uint64_t offset, uint64_t size, + uint64_t token) +{ + return opal_flash_op(FLASH_OP_ERASE, id, offset, 0L, size, token); +} + +opal_call(OPAL_FLASH_READ, opal_flash_read, 5); +opal_call(OPAL_FLASH_WRITE, opal_flash_write, 5); +opal_call(OPAL_FLASH_ERASE, opal_flash_erase, 4); + +/* flash resource API */ +const char *flash_map_resource_name(enum resource_id id) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(part_name_map); i++) { + if (part_name_map[i].id == id) + return part_name_map[i].name; + } + return NULL; +} + +static size_t sizeof_elf_from_hdr(void *buf) +{ + struct elf_hdr *elf = (struct elf_hdr *)buf; + size_t sz = 0; + + BUILD_ASSERT(SECURE_BOOT_HEADERS_SIZE > sizeof(struct elf_hdr)); + BUILD_ASSERT(SECURE_BOOT_HEADERS_SIZE > sizeof(struct elf64be_hdr)); + BUILD_ASSERT(SECURE_BOOT_HEADERS_SIZE > sizeof(struct elf32be_hdr)); + + if (elf->ei_ident == ELF_IDENT) { + if (elf->ei_class == ELF_CLASS_64) { + if (elf->ei_data == ELF_DATA_LSB) { + struct elf64le_hdr *kh = (struct elf64le_hdr *)buf; + sz = le64_to_cpu(kh->e_shoff) + + ((uint32_t)le16_to_cpu(kh->e_shentsize) * + (uint32_t)le16_to_cpu(kh->e_shnum)); + } else { + struct elf64be_hdr *kh = (struct elf64be_hdr *)buf; + sz = be64_to_cpu(kh->e_shoff) + + ((uint32_t)be16_to_cpu(kh->e_shentsize) * + (uint32_t)be16_to_cpu(kh->e_shnum)); + } + } else if (elf->ei_class == ELF_CLASS_32) { + if (elf->ei_data == ELF_DATA_LSB) { + struct elf32le_hdr *kh = (struct elf32le_hdr *)buf; + sz = le32_to_cpu(kh->e_shoff) + + (le16_to_cpu(kh->e_shentsize) * + le16_to_cpu(kh->e_shnum)); + } else { + struct elf32be_hdr *kh = (struct elf32be_hdr *)buf; + sz = be32_to_cpu(kh->e_shoff) + + (be16_to_cpu(kh->e_shentsize) * + be16_to_cpu(kh->e_shnum)); + } + } + } + + return sz; +} + +/* + * load a resource from FLASH + * buf and len shouldn't account for ECC even if partition is ECCed. + * + * The API here is a bit strange. + * If resource has a STB container, buf will contain it + * If loading subpartition with STB container, buff will *NOT* contain it + * For trusted boot, the whole partition containing the subpart is measured. + * + * Additionally, the logic to work out how much to read from flash is insane. + */ +static int flash_load_resource(enum resource_id id, uint32_t subid, + void *buf, size_t *len) +{ + int i; + int rc = OPAL_RESOURCE; + struct ffs_handle *ffs; + struct flash *flash; + const char *name; + bool status = false; + bool ecc; + bool part_signed = false; + void *bufp = buf; + size_t bufsz = *len; + int ffs_part_num, ffs_part_start, ffs_part_size; + int content_size = 0; + int offset = 0; + + lock(&flash_lock); + + if (!system_flash) { + /** + * @fwts-label SystemFlashNotFound + * @fwts-advice No system flash was found. Check for missing + * calls flash_register(...). + */ + prlog(PR_WARNING, "Can't load resource id:%i. " + "No system flash found\n", id); + goto out_unlock; + } + + flash = system_flash; + + if (flash->busy) + goto out_unlock; + + for (i = 0, name = NULL; i < ARRAY_SIZE(part_name_map); i++) { + if (part_name_map[i].id == id) { + name = part_name_map[i].name; + break; + } + } + if (!name) { + prerror("Couldn't find partition for id %d\n", id); + goto out_unlock; + } + /* + * If partition doesn't have a subindex but the caller specifies one, + * we fail. eg. kernel partition doesn't have a subindex + */ + if ((part_name_map[i].subid == RESOURCE_SUBID_NONE) && + (subid != RESOURCE_SUBID_NONE)) { + prerror("PLAT: Partition %s doesn't have subindex\n", name); + goto out_unlock; + } + + rc = ffs_init(0, flash->size, flash->bl, &ffs, 1); + if (rc) { + prerror("Can't open ffs handle: %d\n", rc); + goto out_unlock; + } + + rc = ffs_lookup_part(ffs, name, &ffs_part_num); + if (rc) { + /* This is not an error per-se, some partitions + * are purposefully absent, don't spam the logs + */ + prlog(PR_DEBUG, "No %s partition\n", name); + goto out_free_ffs; + } + rc = ffs_part_info(ffs, ffs_part_num, NULL, + &ffs_part_start, NULL, &ffs_part_size, &ecc); + if (rc) { + prerror("Failed to get %s partition info\n", name); + goto out_free_ffs; + } + prlog(PR_DEBUG,"%s partition %s ECC\n", + name, ecc ? "has" : "doesn't have"); + + /* + * FIXME: Make the fact we don't support partitions smaller than 4K + * more explicit. + */ + if (ffs_part_size < SECURE_BOOT_HEADERS_SIZE) { + prerror("secboot headers bigger than " + "partition size 0x%x\n", ffs_part_size); + goto out_free_ffs; + } + + rc = blocklevel_read(flash->bl, ffs_part_start, bufp, + SECURE_BOOT_HEADERS_SIZE); + if (rc) { + prerror("failed to read the first 0x%x from " + "%s partition, rc %d\n", SECURE_BOOT_HEADERS_SIZE, + name, rc); + goto out_free_ffs; + } + + part_signed = stb_is_container(bufp, SECURE_BOOT_HEADERS_SIZE); + + prlog(PR_DEBUG, "%s partition %s signed\n", name, + part_signed ? "is" : "isn't"); + + /* + * part_start/size are raw pointers into the partition. + * ie. they will account for ECC if included. + */ + + if (part_signed) { + bufp += SECURE_BOOT_HEADERS_SIZE; + bufsz -= SECURE_BOOT_HEADERS_SIZE; + content_size = stb_sw_payload_size(buf, SECURE_BOOT_HEADERS_SIZE); + *len = content_size + SECURE_BOOT_HEADERS_SIZE; + + if (content_size > bufsz) { + prerror("content size > buffer size\n"); + rc = OPAL_PARAMETER; + goto out_free_ffs; + } + + if (*len > ffs_part_size) { + prerror("FLASH: Cannot load %s. Content is larger than the partition\n", + name); + rc = OPAL_PARAMETER; + goto out_free_ffs; + } + + ffs_part_start += SECURE_BOOT_HEADERS_SIZE; + + rc = blocklevel_read(flash->bl, ffs_part_start, bufp, + content_size); + if (rc) { + prerror("failed to read content size %d" + " %s partition, rc %d\n", + content_size, name, rc); + goto out_free_ffs; + } + + if (subid == RESOURCE_SUBID_NONE) + goto done_reading; + + rc = flash_subpart_info(bufp, content_size, ffs_part_size, + NULL, subid, &offset, &content_size); + if (rc) { + prerror("Failed to parse subpart info for %s\n", + name); + goto out_free_ffs; + } + bufp += offset; + goto done_reading; + } else /* stb_signed */ { + /* + * Back to the old way of doing things, no STB header. + */ + if (subid == RESOURCE_SUBID_NONE) { + if (id == RESOURCE_ID_KERNEL || + id == RESOURCE_ID_INITRAMFS) { + /* + * Because actualSize is a lie, we compute the + * size of the BOOTKERNEL based on what the ELF + * headers say. Otherwise we end up reading more + * than we should + */ + content_size = sizeof_elf_from_hdr(buf); + if (!content_size) { + prerror("Invalid ELF header part" + " %s\n", name); + rc = OPAL_RESOURCE; + goto out_free_ffs; + } + } else { + content_size = ffs_part_size; + } + if (content_size > bufsz) { + prerror("%s content size %d > " + " buffer size %lu\n", name, + content_size, bufsz); + rc = OPAL_PARAMETER; + goto out_free_ffs; + } + prlog(PR_DEBUG, "computed %s size %u\n", + name, content_size); + rc = blocklevel_read(flash->bl, ffs_part_start, + buf, content_size); + if (rc) { + prerror("failed to read content size %d" + " %s partition, rc %d\n", + content_size, name, rc); + goto out_free_ffs; + } + *len = content_size; + goto done_reading; + } + BUILD_ASSERT(FLASH_SUBPART_HEADER_SIZE <= SECURE_BOOT_HEADERS_SIZE); + rc = flash_subpart_info(bufp, SECURE_BOOT_HEADERS_SIZE, + ffs_part_size, &ffs_part_size, subid, + &offset, &content_size); + if (rc) { + prerror("FAILED reading subpart info. rc=%d\n", + rc); + goto out_free_ffs; + } + + *len = ffs_part_size; + prlog(PR_DEBUG, "Computed %s partition size: %u " + "(subpart %u size %u offset %u)\n", name, ffs_part_size, + subid, content_size, offset); + /* + * For a sub partition, we read the whole (computed) + * partition, and then measure that. + * Afterwards, we memmove() things back into place for + * the caller. + */ + rc = blocklevel_read(flash->bl, ffs_part_start, + buf, ffs_part_size); + + bufp += offset; + } + +done_reading: + /* + * Verify and measure the retrieved PNOR partition as part of the + * secure boot and trusted boot requirements + */ + secureboot_verify(id, buf, *len); + trustedboot_measure(id, buf, *len); + + /* Find subpartition */ + if (subid != RESOURCE_SUBID_NONE) { + memmove(buf, bufp, content_size); + *len = content_size; + } + + status = true; + +out_free_ffs: + ffs_close(ffs); +out_unlock: + unlock(&flash_lock); + return status ? OPAL_SUCCESS : rc; +} + + +struct flash_load_resource_item { + enum resource_id id; + uint32_t subid; + int result; + void *buf; + size_t *len; + struct list_node link; +}; + +static LIST_HEAD(flash_load_resource_queue); +static LIST_HEAD(flash_loaded_resources); +static struct lock flash_load_resource_lock = LOCK_UNLOCKED; +static struct cpu_job *flash_load_job = NULL; + +int flash_resource_loaded(enum resource_id id, uint32_t subid) +{ + struct flash_load_resource_item *resource = NULL; + struct flash_load_resource_item *r; + int rc = OPAL_BUSY; + + lock(&flash_load_resource_lock); + list_for_each(&flash_loaded_resources, r, link) { + if (r->id == id && r->subid == subid) { + resource = r; + break; + } + } + + if (resource) { + rc = resource->result; + list_del(&resource->link); + free(resource); + } + + if (list_empty(&flash_load_resource_queue) && flash_load_job) { + cpu_wait_job(flash_load_job, true); + flash_load_job = NULL; + } + + unlock(&flash_load_resource_lock); + + return rc; +} + +/* + * Retry for 10 minutes in 5 second intervals: allow 5 minutes for a BMC reboot + * (need the BMC if we're using HIOMAP flash access), then 2x for some margin. + */ +#define FLASH_LOAD_WAIT_MS 5000 +#define FLASH_LOAD_RETRIES (2 * 5 * (60 / (FLASH_LOAD_WAIT_MS / 1000))) + +static void flash_load_resources(void *data __unused) +{ + struct flash_load_resource_item *r; + int retries = FLASH_LOAD_RETRIES; + int result = OPAL_RESOURCE; + + lock(&flash_load_resource_lock); + do { + if (list_empty(&flash_load_resource_queue)) { + break; + } + r = list_top(&flash_load_resource_queue, + struct flash_load_resource_item, link); + if (r->result != OPAL_EMPTY) + prerror("flash_load_resources() list_top unexpected " + " result %d\n", r->result); + r->result = OPAL_BUSY; + unlock(&flash_load_resource_lock); + + while (retries) { + result = flash_load_resource(r->id, r->subid, r->buf, + r->len); + if (result == OPAL_SUCCESS) { + retries = FLASH_LOAD_RETRIES; + break; + } + + if (result != FLASH_ERR_AGAIN && + result != FLASH_ERR_DEVICE_GONE) + break; + + time_wait_ms(FLASH_LOAD_WAIT_MS); + + retries--; + + prlog(PR_WARNING, + "Retrying load of %d:%d, %d attempts remain\n", + r->id, r->subid, retries); + } + + lock(&flash_load_resource_lock); + r = list_pop(&flash_load_resource_queue, + struct flash_load_resource_item, link); + /* Will reuse the result from when we hit retries == 0 */ + r->result = result; + list_add_tail(&flash_loaded_resources, &r->link); + } while(true); + unlock(&flash_load_resource_lock); +} + +static void start_flash_load_resource_job(void) +{ + if (flash_load_job) + cpu_wait_job(flash_load_job, true); + + flash_load_job = cpu_queue_job(NULL, "flash_load_resources", + flash_load_resources, NULL); + + cpu_process_local_jobs(); +} + +int flash_start_preload_resource(enum resource_id id, uint32_t subid, + void *buf, size_t *len) +{ + struct flash_load_resource_item *r; + bool start_thread = false; + + r = malloc(sizeof(struct flash_load_resource_item)); + + assert(r != NULL); + r->id = id; + r->subid = subid; + r->buf = buf; + r->len = len; + r->result = OPAL_EMPTY; + + prlog(PR_DEBUG, "Queueing preload of %x/%x\n", + r->id, r->subid); + + lock(&flash_load_resource_lock); + if (list_empty(&flash_load_resource_queue)) { + start_thread = true; + } + list_add_tail(&flash_load_resource_queue, &r->link); + unlock(&flash_load_resource_lock); + + if (start_thread) + start_flash_load_resource_job(); + + return OPAL_SUCCESS; +} + +/* + * The `libxz` decompression routines are blocking; the new decompression + * routines, wrapper around `libxz` functions, provide support for asynchronous + * decompression. There are two routines, which start the decompression, and one + * which waits for the decompression to complete. + * + * The decompressed image will be present in the `dst` parameter of + * `xz_decompress` structure. + * + * When the decompression is successful, the xz_decompress->status will be + * `OPAL_SUCCESS` else OPAL_PARAMETER, see definition of xz_decompress structure + * for details. + */ +static void xz_decompress(void *data) +{ + struct xz_decompress *xz = (struct xz_decompress *)data; + struct xz_dec *s; + struct xz_buf b; + + /* Initialize the xz library first */ + xz_crc32_init(); + s = xz_dec_init(XZ_SINGLE, 0); + if (s == NULL) { + prerror("initialization error for xz\n"); + xz->status = OPAL_NO_MEM; + return; + } + + xz->xz_error = XZ_DATA_ERROR; + xz->status = OPAL_PARTIAL; + + b.in = xz->src; + b.in_pos = 0; + b.in_size = xz->src_size; + b.out = xz->dst; + b.out_pos = 0; + b.out_size = xz->dst_size; + + /* Start decompressing */ + xz->xz_error = xz_dec_run(s, &b); + if (xz->xz_error != XZ_STREAM_END) { + prerror("failed to decompress subpartition\n"); + xz->status = OPAL_PARAMETER; + } else + xz->status = OPAL_SUCCESS; + + xz_dec_end(s); +} + +/* + * xz_start_decompress: start the decompression job and return. + * + * struct xz_decompress *xz, should be populated by the caller with + * - the starting address of the compressed binary + * - the address where the decompressed image should be placed + * - the sizes of the source and the destination + * + * xz->src: Source address (The compressed binary) + * xz->src_size: Source size + * xz->dst: Destination address (The memory area where the `src` will be + * decompressed) + * xz->dst_size: Destination size + * + * The `status` value will be OPAL_PARTIAL till the job completes (successfully + * or not) + */ +void xz_start_decompress(struct xz_decompress *xz) +{ + struct cpu_job *job; + + if (!xz) + return; + + if (!xz->dst || !xz->dst_size || !xz->src || !xz->src_size) { + xz->status = OPAL_PARAMETER; + return; + } + + job = cpu_queue_job(NULL, "xz_decompress", xz_decompress, + (void *) xz); + if (!job) { + xz->status = OPAL_NO_MEM; + return; + } + + xz->job = job; +} + +/* + * This function waits for the decompression job to complete. The `ret` + * structure member in `xz_decompress` will have the status code. + * + * status == OPAL_SUCCESS on success, else the corresponding error code. + */ +void wait_xz_decompress(struct xz_decompress *xz) +{ + if (!xz) + return; + + cpu_wait_job(xz->job, true); +} diff --git a/roms/skiboot/core/gcov-profiling.c b/roms/skiboot/core/gcov-profiling.c new file mode 100644 index 000000000..fdad51ed9 --- /dev/null +++ b/roms/skiboot/core/gcov-profiling.c @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * libgcov skeleton reimplementation to build skiboot with gcov support + * + * Copyright 2015-2018 IBM Corp. + */ + +#include <skiboot.h> +#include <compiler.h> +#include <stdio.h> + +typedef long gcov_type; + +/* + * This is GCC internal data structure. See GCC libgcc/libgcov.h for + * details. + * + * If gcc changes this, we have to change it. + */ + +typedef unsigned int gcov_unsigned_int; + +#if __GNUC__ == 4 && __GNUC_MINOR__ >= 9 +#define GCOV_COUNTERS 9 +#else +#define GCOV_COUNTERS 8 +#endif + +struct gcov_info +{ + gcov_unsigned_int version; + struct gcov_info *next; + gcov_unsigned_int stamp; + const char *filename; + void (*merge[GCOV_COUNTERS])(gcov_type *, unsigned int); + unsigned int n_functions; + struct gcov_fn_info **functions; +}; + +/* We have a list of all gcov info set up at startup */ +struct gcov_info *gcov_info_list; + +void __gcov_init(struct gcov_info* f); +void skiboot_gcov_done(void); +void __gcov_flush(void); +void __gcov_merge_add(gcov_type *counters, unsigned int n_counters); +void __gcov_merge_single(gcov_type *counters, unsigned int n_counters); +void __gcov_merge_delta(gcov_type *counters, unsigned int n_counters); +void __gcov_merge_ior(gcov_type *counters, unsigned int n_counters); +void __gcov_merge_time_profile(gcov_type *counters, unsigned int n_counters); +void __gcov_exit(void); + +void __gcov_init(struct gcov_info* f) +{ + static gcov_unsigned_int version = 0; + + if (version == 0) { + printf("GCOV version: %u\n", f->version); + version = f->version; + } + + if (gcov_info_list) + f->next = gcov_info_list; + + gcov_info_list = f; + return; +} + +void skiboot_gcov_done(void) +{ + struct gcov_info *i = gcov_info_list; + + if (i->filename) + printf("GCOV: gcov_info_list looks sane (first file: %s)\n", + i->filename); + else + prlog(PR_WARNING, "GCOV: gcov_info_list doesn't look sane. " + "i->filename == NULL."); + + printf("GCOV: gcov_info_list at 0x%p\n", gcov_info_list); +} + +void __gcov_merge_add(gcov_type *counters, unsigned int n_counters) +{ + (void)counters; + (void)n_counters; + + return; +} + +void __gcov_flush(void) +{ + return; +} + +void __gcov_merge_single(gcov_type *counters, unsigned int n_counters) +{ + (void)counters; + (void)n_counters; + + return; +} + +void __gcov_merge_delta(gcov_type *counters, unsigned int n_counters) +{ + (void)counters; + (void)n_counters; + + return; +} + +void __gcov_merge_ior(gcov_type *counters, unsigned int n_counters) +{ + (void)counters; + (void)n_counters; + return; +} + +void __gcov_merge_time_profile(gcov_type *counters, unsigned int n_counters) +{ + (void)counters; + (void)n_counters; +} + +void __gcov_exit(void) +{ +} diff --git a/roms/skiboot/core/hmi.c b/roms/skiboot/core/hmi.c new file mode 100644 index 000000000..9363cc5fb --- /dev/null +++ b/roms/skiboot/core/hmi.c @@ -0,0 +1,1558 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Deal with Hypervisor Maintenance Interrupts + * + * Copyright 2013-2019 IBM Corp. + */ + +#define pr_fmt(fmt) "HMI: " fmt + +#include <skiboot.h> +#include <opal.h> +#include <opal-msg.h> +#include <processor.h> +#include <chiptod.h> +#include <xscom.h> +#include <xscom-p8-regs.h> +#include <xscom-p9-regs.h> +#include <xscom-p10-regs.h> +#include <pci.h> +#include <cpu.h> +#include <chip.h> +#include <npu-regs.h> +#include <npu2-regs.h> +#include <npu2.h> +#include <npu.h> +#include <capp.h> +#include <nvram.h> +#include <cpu.h> + +/* + * P9 HMER register layout: + * +===+==========+============================+========+===================+ + * |Bit|Name |Description |PowerKVM|Action | + * | | | |HMI | | + * | | | |enabled | | + * | | | |for this| | + * | | | |bit ? | | + * +===+==========+============================+========+===================+ + * |0 |malfunctio|A processor core in the |Yes |Raise attn from | + * | |n_allert |system has checkstopped | |sapphire resulting | + * | | |(failed recovery) and has | |xstop | + * | | |requested a CP Sparing | | | + * | | |to occur. This is | | | + * | | |broadcasted to every | | | + * | | |processor in the system | | | + * |---+----------+----------------------------+--------+-------------------| + * |1 |Reserved |reserved |n/a | | + * |---+----------+----------------------------+--------+-------------------| + * |2 |proc_recv_|Processor recovery occurred |Yes |Log message and | + * | |done |error-bit in fir not masked | |continue working. | + * | | |(see bit 11) | | | + * |---+----------+----------------------------+--------+-------------------| + * |3 |proc_recv_|Processor went through |Yes |Log message and | + * | |error_mask|recovery for an error which | |continue working. | + * | |ed |is actually masked for | | | + * | | |reporting | | | + * |---+----------+----------------------------+--------+-------------------| + * |4 | |Timer facility experienced |Yes |Raise attn from | + * | |tfac_error|an error. | |sapphire resulting | + * | | |TB, DEC, HDEC, PURR or SPURR| |xstop | + * | | |may be corrupted (details in| | | + * | | |TFMR) | | | + * |---+----------+----------------------------+--------+-------------------| + * |5 | |TFMR SPR itself is |Yes |Raise attn from | + * | |tfmr_parit|corrupted. | |sapphire resulting | + * | |y_error |Entire timing facility may | |xstop | + * | | |be compromised. | | | + * |---+----------+----------------------------+--------+-------------------| + * |6 |ha_overflo| UPS (Uniterrupted Power |No |N/A | + * | |w_warning |System) Overflow indication | | | + * | | |indicating that the UPS | | | + * | | |DirtyAddrTable has | | | + * | | |reached a limit where it | | | + * | | |requires PHYP unload support| | | + * |---+----------+----------------------------+--------+-------------------| + * |7 |reserved |reserved |n/a |n/a | + * |---+----------+----------------------------+--------+-------------------| + * |8 |xscom_fail|An XSCOM operation caused by|No |We handle it by | + * | | |a cache inhibited load/store| |manually reading | + * | | |from this thread failed. A | |HMER register. | + * | | |trap register is | | | + * | | |available. | | | + * | | | | | | + * |---+----------+----------------------------+--------+-------------------| + * |9 |xscom_done|An XSCOM operation caused by|No |We handle it by | + * | | |a cache inhibited load/store| |manually reading | + * | | |from this thread completed. | |HMER register. | + * | | |If hypervisor | | | + * | | |intends to use this bit, it | | | + * | | |is responsible for clearing | | | + * | | |it before performing the | | | + * | | |xscom operation. | | | + * | | |NOTE: this bit should always| | | + * | | |be masked in HMEER | | | + * |---+----------+----------------------------+--------+-------------------| + * |10 |reserved |reserved |n/a |n/a | + * |---+----------+----------------------------+--------+-------------------| + * |11 |proc_recv_|Processor recovery occurred |y |Log message and | + * | |again |again before bit2 or bit3 | |continue working. | + * | | |was cleared | | | + * |---+----------+----------------------------+--------+-------------------| + * |12-|reserved |was temperature sensor |n/a |n/a | + * |15 | |passed the critical point on| | | + * | | |the way up | | | + * |---+----------+----------------------------+--------+-------------------| + * |16 | |SCOM has set a reserved FIR |No |n/a | + * | |scom_fir_h|bit to cause recovery | | | + * | |m | | | | + * |---+----------+----------------------------+--------+-------------------| + * |17 |trig_fir_h|Debug trigger has set a |No |n/a | + * | |mi |reserved FIR bit to cause | | | + * | | |recovery | | | + * |---+----------+----------------------------+--------+-------------------| + * |18 |reserved |reserved |n/a |n/a | + * |---+----------+----------------------------+--------+-------------------| + * |19 |reserved |reserved |n/a |n/a | + * |---+----------+----------------------------+--------+-------------------| + * |20 |hyp_resour|A hypervisor resource error |y |Raise attn from | + * | |ce_err |occurred: data parity error | |sapphire resulting | + * | | |on, SPRC0:3; SPR_Modereg or | |xstop. | + * | | |HMEER. | | | + * | | |Note: this bit will cause an| | | + * | | |check_stop when (HV=1, PR=0 | | | + * | | |and EE=0) | | | + * |---+----------+----------------------------+--------+-------------------| + * |21-| |if bit 8 is active, the |No |We handle it by | + * |23 |xscom_stat|reason will be detailed in | |Manually reading | + * | |us |these bits. see chapter 11.1| |HMER register. | + * | | |This bits are information | | | + * | | |only and always masked | | | + * | | |(mask = '0') | | | + * | | |If hypervisor intends to use| | | + * | | |this bit, it is responsible | | | + * | | |for clearing it before | | | + * | | |performing the xscom | | | + * | | |operation. | | | + * |---+----------+----------------------------+--------+-------------------| + * |24-|Not |Not implemented |n/a |n/a | + * |63 |implemente| | | | + * | |d | | | | + * +-- +----------+----------------------------+--------+-------------------+ + * + * Above HMER bits can be enabled/disabled by modifying + * SPR_HMEER_HMI_ENABLE_MASK #define in include/processor.h + * If you modify support for any of the bits listed above, please make sure + * you change the above table to refelct that. + * + * NOTE: Per Dave Larson, never enable 8,9,21-23 + */ + +/* + * P10 HMER register layout: + * Bit Name Description + * 0 malfunction_alert A processor core in the system has checkstopped + * (failed recovery). This is broadcasted to every + * processor in the system + * + * 1 reserved reserved + * + * 2 proc_rcvy_done Processor recovery occurred error-bit in fir not + * masked (see bit 11) + * + * 3 reserved reserved + * + * 4 tfac_error Timer facility experienced an error. TB, DEC, + * HDEC, PURR or SPURR may be corrupted (details in + * TFMR) + * + * 5 tfx_error Error occurred on transfer from tfac shadow to + * core + * + * 6 spurr_scale_limit Nominal frequency exceeded 399 percent + * + * 7 reserved reserved + * + * 8 xscom_fail An XSCOM operation caused by a cache inhibited + * load/store from this thread failed. A trap + * register is available. + * + * 9 xscom_done An XSCOM operation caused by a cache inhibited + * load/store from this thread completed. If + * hypervisor intends to use this bit, it is + * responsible for clearing it before performing the + * xscom operation. NOTE: this bit should always be + * masked in HMEER + * + * 10 reserved reserved + * + * 11 proc_rcvy_again Processor recovery occurred again before bit 2 + * was cleared + * + * 12-15 reserved reserved + * + * 16 scom_fir_hmi An error inject to PC FIR has occurred to set HMI. + * This error inject can also set FIR(61) to cause + * recovery. + * + * 17 reserved reserved + * + * 18 trig_fir_hmi Debug trigger has occurred to set HMI. This + * trigger can also set FIR(60) to cause recovery + * + * 19-20 reserved reserved + * + * 21-23 xscom_status If bit 8 is active, the reason will be detailed in + * these bits. These bits are information only and + * always masked (mask = ‘0’) If hypervisor intends + * to use this field, it is responsible for clearing + * it before performing the xscom operation. + * + * 24:63 Not implemented Not implemented. + * + * P10 HMEER enabled bits: + * Name Action + * malfunction_alert Decode and log FIR bits. + * proc_rcvy_done Log and continue. + * tfac_error Log and attempt to recover time facilities. + * tfx_error Log and attempt to recover time facilities. + * spurr_scale_limit Log and continue. XXX? + * proc_rcvy_again Log and continue. + */ + +/* Used for tracking cpu threads inside hmi handling. */ +#define HMI_STATE_CLEANUP_DONE 0x100 +#define CORE_THREAD_MASK 0x0ff +#define SUBCORE_THREAD_MASK(s_id, t_count) \ + ((((1UL) << (t_count)) - 1) << ((s_id) * (t_count))) +#define SINGLE_THREAD_MASK(t_id) ((1UL) << (t_id)) + +/* + * Number of iterations for the various timeouts. We can't use the timebase + * as it might be broken. We measured experimentally that 40 millions loops + * of cpu_relax() gives us more than 1s. The margin is comfortable enough. + */ +#define TIMEOUT_LOOPS 40000000 + +/* TFMR other errors. (other than bit 26 and 45) */ +#define SPR_TFMR_OTHER_ERRORS \ + (SPR_TFMR_TBST_CORRUPT | SPR_TFMR_TB_MISSING_SYNC | \ + SPR_TFMR_TB_MISSING_STEP | SPR_TFMR_FW_CONTROL_ERR | \ + SPR_TFMR_PURR_PARITY_ERR | SPR_TFMR_SPURR_PARITY_ERR | \ + SPR_TFMR_DEC_PARITY_ERR | SPR_TFMR_TFMR_CORRUPT | \ + SPR_TFMR_CHIP_TOD_INTERRUPT) + +/* TFMR "all core" errors (sent to all threads) */ +#define SPR_TFMR_CORE_ERRORS \ + (SPR_TFMR_TBST_CORRUPT | SPR_TFMR_TB_MISSING_SYNC | \ + SPR_TFMR_TB_MISSING_STEP | SPR_TFMR_FW_CONTROL_ERR | \ + SPR_TFMR_TFMR_CORRUPT | SPR_TFMR_TB_RESIDUE_ERR | \ + SPR_TFMR_HDEC_PARITY_ERROR | SPR_TFMR_TFAC_XFER_ERROR) + +/* TFMR "thread" errors */ +#define SPR_TFMR_THREAD_ERRORS \ + (SPR_TFMR_PURR_PARITY_ERR | SPR_TFMR_SPURR_PARITY_ERR | \ + SPR_TFMR_DEC_PARITY_ERR) + +/* + * Starting from p9, core inits are setup to escalate all core + * local checkstop to system checkstop. Review this list when that changes. + */ +static const struct core_xstop_bit_info { + uint8_t bit; /* CORE FIR bit number */ + enum OpalHMI_CoreXstopReason reason; +} xstop_bits[] = { + { 3, CORE_CHECKSTOP_IFU_REGFILE }, + { 5, CORE_CHECKSTOP_IFU_LOGIC }, + { 8, CORE_CHECKSTOP_PC_DURING_RECOV }, + { 10, CORE_CHECKSTOP_ISU_REGFILE }, + { 12, CORE_CHECKSTOP_ISU_LOGIC }, + { 21, CORE_CHECKSTOP_FXU_LOGIC }, + { 25, CORE_CHECKSTOP_VSU_LOGIC }, + { 26, CORE_CHECKSTOP_PC_RECOV_IN_MAINT_MODE }, + { 32, CORE_CHECKSTOP_LSU_REGFILE }, + { 36, CORE_CHECKSTOP_PC_FWD_PROGRESS }, + { 38, CORE_CHECKSTOP_LSU_LOGIC }, + { 45, CORE_CHECKSTOP_PC_LOGIC }, + { 48, CORE_CHECKSTOP_PC_HYP_RESOURCE }, + { 52, CORE_CHECKSTOP_PC_HANG_RECOV_FAILED }, + { 54, CORE_CHECKSTOP_PC_AMBI_HANG_DETECTED }, + { 63, CORE_CHECKSTOP_PC_SPRD_HYP_ERR_INJ }, +}; + +struct core_fir_bit_info { + uint8_t bit; /* CORE FIR bit number */ + const char *reason; +}; + +static const struct core_fir_bit_info p9_recoverable_bits[] = { + { 0, "IFU - SRAM (ICACHE parity, etc)" }, + { 2, "IFU - RegFile" }, + { 4, "IFU - Logic" }, + { 9, "ISU - RegFile" }, + { 11, "ISU - Logic" }, + { 13, "ISU - Recoverable due to not in MT window" }, + { 24, "VSU - Logic" }, + { 27, "VSU - DFU logic" }, + { 29, "LSU - SRAM (DCACHE parity, etc)" }, + { 31, "LSU - RegFile" }, + /* The following 3 bits may be set by SRAM errors. */ + { 33, "LSU - TLB multi hit" }, + { 34, "LSU - SLB multi hit" }, + { 35, "LSU - ERAT multi hit" }, + { 37, "LSU - Logic" }, + { 39, "LSU - Recoverable due to not in MT window" }, + { 43, "PC - Thread hang recovery" }, +}; + +static const struct core_fir_bit_info p10_core_fir_bits[] = { + { 0, "IFU - SRAM recoverable error (ICACHE parity error, etc.)" }, + { 1, "PC - TC checkstop" }, + { 2, "IFU - RegFile recoverable error" }, + { 3, "IFU - RegFile core checkstop" }, + { 4, "IFU - Logic recoverable error" }, + { 5, "IFU - Logic core checkstop" }, + { 7, "VSU - Inference accumulator recoverable error" }, + { 8, "PC - Recovery core checkstop" }, + { 9, "VSU - Slice Target File (STF) recoverable error" }, + { 11, "ISU - Logic recoverable error" }, + { 12, "ISU - Logic core checkstop" }, + { 14, "ISU - Machine check received while ME=0 checkstop" }, + { 15, "ISU - UE from L2" }, + { 16, "ISU - Number of UEs from L2 above threshold" }, + { 17, "ISU - UE on CI load" }, + { 18, "MMU - TLB recoverable error" }, + { 19, "MMU - SLB error" }, + { 21, "MMU - CXT recoverable error" }, + { 22, "MMU - Logic core checkstop" }, + { 23, "MMU - MMU system checkstop" }, + { 24, "VSU - Logic recoverable error" }, + { 25, "VSU - Logic core checkstop" }, + { 26, "PC - In maint mode and recovery in progress" }, + { 28, "PC - PC system checkstop" }, + { 29, "LSU - SRAM recoverable error (DCACHE parity error, etc.)" }, + { 30, "LSU - Set deleted" }, + { 31, "LSU - RegFile recoverable error" }, + { 32, "LSU - RegFile core checkstop" }, + { 33, "MMU - TLB multi hit error occurred" }, + { 34, "MMU - SLB multi hit error occurred" }, + { 35, "LSU - ERAT multi hit error occurred" }, + { 36, "PC - Forward progress error" }, + { 37, "LSU - Logic recoverable error" }, + { 38, "LSU - Logic core checkstop" }, + { 41, "LSU - System checkstop" }, + { 43, "PC - Thread hang recoverable error" }, + { 45, "PC - Logic core checkstop" }, + { 47, "PC - TimeBase facility checkstop" }, + { 52, "PC - Hang recovery failed core checkstop" }, + { 53, "PC - Core internal hang detected" }, + { 55, "PC - Nest hang detected" }, + { 56, "PC - Other core chiplet recoverable error" }, + { 57, "PC - Other core chiplet core checkstop" }, + { 58, "PC - Other core chiplet system checkstop" }, + { 59, "PC - SCOM satellite error detected" }, + { 60, "PC - Debug trigger error inject" }, + { 61, "PC - SCOM or firmware recoverable error inject" }, + { 62, "PC - Firmware checkstop error inject" }, + { 63, "PC - Firmware SPRC / SPRD checkstop" }, +}; + +static const struct nx_xstop_bit_info { + uint8_t bit; /* NX FIR bit number */ + enum OpalHMI_NestAccelXstopReason reason; +} nx_dma_xstop_bits[] = { + { 1, NX_CHECKSTOP_SHM_INVAL_STATE_ERR }, + { 15, NX_CHECKSTOP_DMA_INVAL_STATE_ERR_1 }, + { 16, NX_CHECKSTOP_DMA_INVAL_STATE_ERR_2 }, + { 20, NX_CHECKSTOP_DMA_CH0_INVAL_STATE_ERR }, + { 21, NX_CHECKSTOP_DMA_CH1_INVAL_STATE_ERR }, + { 22, NX_CHECKSTOP_DMA_CH2_INVAL_STATE_ERR }, + { 23, NX_CHECKSTOP_DMA_CH3_INVAL_STATE_ERR }, + { 24, NX_CHECKSTOP_DMA_CH4_INVAL_STATE_ERR }, + { 25, NX_CHECKSTOP_DMA_CH5_INVAL_STATE_ERR }, + { 26, NX_CHECKSTOP_DMA_CH6_INVAL_STATE_ERR }, + { 27, NX_CHECKSTOP_DMA_CH7_INVAL_STATE_ERR }, + { 31, NX_CHECKSTOP_DMA_CRB_UE }, + { 32, NX_CHECKSTOP_DMA_CRB_SUE }, +}; + +static const struct nx_xstop_bit_info nx_pbi_xstop_bits[] = { + { 12, NX_CHECKSTOP_PBI_ISN_UE }, +}; + +static struct lock hmi_lock = LOCK_UNLOCKED; +static uint32_t malf_alert_scom; +static uint32_t nx_status_reg; +static uint32_t nx_dma_engine_fir; +static uint32_t nx_pbi_fir; + +static int setup_scom_addresses(void) +{ + switch (proc_gen) { + case proc_gen_p8: + malf_alert_scom = P8_MALFUNC_ALERT; + nx_status_reg = P8_NX_STATUS_REG; + nx_dma_engine_fir = P8_NX_DMA_ENGINE_FIR; + nx_pbi_fir = P8_NX_PBI_FIR; + return 1; + case proc_gen_p9: + malf_alert_scom = P9_MALFUNC_ALERT; + nx_status_reg = P9_NX_STATUS_REG; + nx_dma_engine_fir = P9_NX_DMA_ENGINE_FIR; + nx_pbi_fir = P9_NX_PBI_FIR; + return 1; + case proc_gen_p10: + malf_alert_scom = P10_MALFUNC_ALERT; + nx_status_reg = P10_NX_STATUS_REG; + nx_dma_engine_fir = P10_NX_DMA_ENGINE_FIR; + nx_pbi_fir = P10_NX_PBI_FIR; + return 1; + default: + prerror("%s: Unknown CPU type\n", __func__); + break; + } + return 0; +} + +static int queue_hmi_event(struct OpalHMIEvent *hmi_evt, int recover, uint64_t *out_flags) +{ + size_t size; + + /* Don't queue up event if recover == -1 */ + if (recover == -1) + return 0; + + /* set disposition */ + if (recover == 1) + hmi_evt->disposition = OpalHMI_DISPOSITION_RECOVERED; + else if (recover == 0) + hmi_evt->disposition = OpalHMI_DISPOSITION_NOT_RECOVERED; + + /* + * V2 of struct OpalHMIEvent is of (5 * 64 bits) size and well packed + * structure. Hence use uint64_t pointer to pass entire structure + * using 5 params in generic message format. Instead of hard coding + * num_params divide the struct size by 8 bytes to get exact + * num_params value. + */ + size = ALIGN_UP(sizeof(*hmi_evt), sizeof(u64)); + + *out_flags |= OPAL_HMI_FLAGS_NEW_EVENT; + + /* queue up for delivery to host. */ + return _opal_queue_msg(OPAL_MSG_HMI_EVT, NULL, NULL, + size, hmi_evt); +} + +static int read_core_fir(uint32_t chip_id, uint32_t core_id, uint64_t *core_fir) +{ + int rc; + + switch (proc_gen) { + case proc_gen_p8: + rc = xscom_read(chip_id, + XSCOM_ADDR_P8_EX(core_id, P8_CORE_FIR), core_fir); + break; + case proc_gen_p9: + rc = xscom_read(chip_id, + XSCOM_ADDR_P9_EC(core_id, P9_CORE_FIR), core_fir); + break; + case proc_gen_p10: + rc = xscom_read(chip_id, + XSCOM_ADDR_P10_EC(core_id, P10_CORE_FIR), core_fir); + break; + default: + rc = OPAL_HARDWARE; + } + return rc; +} + +static int read_core_wof(uint32_t chip_id, uint32_t core_id, uint64_t *core_wof) +{ + int rc; + + switch (proc_gen) { + case proc_gen_p9: + rc = xscom_read(chip_id, + XSCOM_ADDR_P9_EC(core_id, P9_CORE_WOF), core_wof); + break; + case proc_gen_p10: + rc = xscom_read(chip_id, + XSCOM_ADDR_P10_EC(core_id, P10_CORE_WOF), core_wof); + break; + default: + rc = OPAL_HARDWARE; + } + return rc; +} + +static bool decode_core_fir(struct cpu_thread *cpu, + struct OpalHMIEvent *hmi_evt) +{ + uint64_t core_fir; + uint32_t core_id; + int i, swkup_rc; + bool found = false; + int64_t ret; + const char *loc; + + /* Sanity check */ + if (!cpu || !hmi_evt) + return false; + + core_id = pir_to_core_id(cpu->pir); + + /* Force the core to wakeup, otherwise reading core_fir is unrealiable + * if stop-state 5 is enabled. + */ + swkup_rc = dctl_set_special_wakeup(cpu); + + /* Get CORE FIR register value. */ + ret = read_core_fir(cpu->chip_id, core_id, &core_fir); + + if (!swkup_rc) + dctl_clear_special_wakeup(cpu); + + + if (ret == OPAL_WRONG_STATE) { + /* + * CPU is asleep, so it probably didn't cause the checkstop. + * If no other HMI cause is found a "catchall" checkstop + * will be raised, so if this CPU should've been awake the + * error will be handled appropriately. + */ + prlog(PR_DEBUG, + "FIR read failed, chip %d core %d asleep\n", + cpu->chip_id, core_id); + return false; + } else if (ret != OPAL_SUCCESS) { + prerror("XSCOM error reading CORE FIR\n"); + /* If the FIR can't be read, we should checkstop. */ + return true; + } + + if (!core_fir) + return false; + + loc = chip_loc_code(cpu->chip_id); + prlog(PR_INFO, "[Loc: %s]: CHIP ID: %x, CORE ID: %x, FIR: %016llx\n", + loc ? loc : "Not Available", + cpu->chip_id, core_id, core_fir); + + if (proc_gen == proc_gen_p10) { + for (i = 0; i < ARRAY_SIZE(p10_core_fir_bits); i++) { + if (core_fir & PPC_BIT(p10_core_fir_bits[i].bit)) + prlog(PR_INFO, " %s\n", p10_core_fir_bits[i].reason); + } + } + + /* Check CORE FIR bits and populate HMI event with error info. */ + for (i = 0; i < ARRAY_SIZE(xstop_bits); i++) { + if (core_fir & PPC_BIT(xstop_bits[i].bit)) { + found = true; + hmi_evt->u.xstop_error.xstop_reason + |= cpu_to_be32(xstop_bits[i].reason); + } + } + return found; +} + +static void find_core_checkstop_reason(struct OpalHMIEvent *hmi_evt, + uint64_t *out_flags) +{ + struct cpu_thread *cpu; + + /* Initialize HMI event */ + hmi_evt->severity = OpalHMI_SEV_FATAL; + hmi_evt->type = OpalHMI_ERROR_MALFUNC_ALERT; + hmi_evt->u.xstop_error.xstop_type = CHECKSTOP_TYPE_CORE; + + /* + * Check CORE FIRs and find the reason for core checkstop. + * Send a separate HMI event for each core that has checkstopped. + */ + for_each_cpu(cpu) { + /* GARDed CPUs are marked unavailable. Skip them. */ + if (cpu->state == cpu_state_unavailable) + continue; + + /* Only check on primaries (ie. core), not threads */ + if (cpu->is_secondary) + continue; + + /* Initialize xstop_error fields. */ + hmi_evt->u.xstop_error.xstop_reason = 0; + hmi_evt->u.xstop_error.u.pir = cpu_to_be32(cpu->pir); + + if (decode_core_fir(cpu, hmi_evt)) + queue_hmi_event(hmi_evt, 0, out_flags); + } +} + +static void find_capp_checkstop_reason(int flat_chip_id, + struct OpalHMIEvent *hmi_evt, + uint64_t *out_flags) +{ + struct capp_info info; + struct phb *phb; + uint64_t capp_fir; + uint64_t capp_fir_mask; + uint64_t capp_fir_action0; + uint64_t capp_fir_action1; + uint64_t reg; + int64_t rc; + + /* CAPP exists on P8 and P9 only */ + if (proc_gen != proc_gen_p8 && proc_gen != proc_gen_p9) + return; + + /* Find the CAPP on the chip associated with the HMI. */ + for_each_phb(phb) { + /* get the CAPP info */ + rc = capp_get_info(flat_chip_id, phb, &info); + if (rc == OPAL_PARAMETER) + continue; + + if (xscom_read(flat_chip_id, info.capp_fir_reg, &capp_fir) || + xscom_read(flat_chip_id, info.capp_fir_mask_reg, + &capp_fir_mask) || + xscom_read(flat_chip_id, info.capp_fir_action0_reg, + &capp_fir_action0) || + xscom_read(flat_chip_id, info.capp_fir_action1_reg, + &capp_fir_action1)) { + prerror("CAPP: Couldn't read CAPP#%d (PHB:#%x) FIR registers by XSCOM!\n", + info.capp_index, info.phb_index); + continue; + } + + if (!(capp_fir & ~capp_fir_mask)) + continue; + + prlog(PR_DEBUG, "CAPP#%d (PHB:#%x): FIR 0x%016llx mask 0x%016llx\n", + info.capp_index, info.phb_index, capp_fir, + capp_fir_mask); + prlog(PR_DEBUG, "CAPP#%d (PHB:#%x): ACTION0 0x%016llx, ACTION1 0x%016llx\n", + info.capp_index, info.phb_index, capp_fir_action0, + capp_fir_action1); + + /* + * If this bit is set (=1) a Recoverable Error has been + * detected + */ + xscom_read(flat_chip_id, info.capp_err_status_ctrl_reg, ®); + if ((reg & PPC_BIT(0)) != 0) { + phb_lock(phb); + phb->ops->set_capp_recovery(phb); + phb_unlock(phb); + + hmi_evt->severity = OpalHMI_SEV_NO_ERROR; + hmi_evt->type = OpalHMI_ERROR_CAPP_RECOVERY; + queue_hmi_event(hmi_evt, 1, out_flags); + + return; + } + } +} + +static void find_nx_checkstop_reason(int flat_chip_id, + struct OpalHMIEvent *hmi_evt, + uint64_t *out_flags) +{ + uint64_t nx_status; + uint64_t nx_dma_fir; + uint64_t nx_pbi_fir_val; + int i; + + /* Get NX status register value. */ + if (xscom_read(flat_chip_id, nx_status_reg, &nx_status) != 0) { + prerror("XSCOM error reading NX_STATUS_REG\n"); + return; + } + + /* Check if NX has driven an HMI interrupt. */ + if (!(nx_status & NX_HMI_ACTIVE)) + return; + + /* Initialize HMI event */ + hmi_evt->severity = OpalHMI_SEV_FATAL; + hmi_evt->type = OpalHMI_ERROR_MALFUNC_ALERT; + hmi_evt->u.xstop_error.xstop_type = CHECKSTOP_TYPE_NX; + hmi_evt->u.xstop_error.u.chip_id = cpu_to_be32(flat_chip_id); + + /* Get DMA & Engine FIR data register value. */ + if (xscom_read(flat_chip_id, nx_dma_engine_fir, &nx_dma_fir) != 0) { + prerror("XSCOM error reading NX_DMA_ENGINE_FIR\n"); + return; + } + + /* Get PowerBus Interface FIR data register value. */ + if (xscom_read(flat_chip_id, nx_pbi_fir, &nx_pbi_fir_val) != 0) { + prerror("XSCOM error reading NX_PBI_FIR\n"); + return; + } + + /* Find NX checkstop reason and populate HMI event with error info. */ + for (i = 0; i < ARRAY_SIZE(nx_dma_xstop_bits); i++) + if (nx_dma_fir & PPC_BIT(nx_dma_xstop_bits[i].bit)) + hmi_evt->u.xstop_error.xstop_reason + |= cpu_to_be32(nx_dma_xstop_bits[i].reason); + + for (i = 0; i < ARRAY_SIZE(nx_pbi_xstop_bits); i++) + if (nx_pbi_fir_val & PPC_BIT(nx_pbi_xstop_bits[i].bit)) + hmi_evt->u.xstop_error.xstop_reason + |= cpu_to_be32(nx_pbi_xstop_bits[i].reason); + + /* + * Set NXDMAENGFIR[38] to signal PRD that service action is required. + * Without this inject, PRD will not be able to do NX unit checkstop + * error analysis. NXDMAENGFIR[38] is a spare bit and used to report + * a software initiated attention. + * + * The behavior of this bit and all FIR bits are documented in + * RAS spreadsheet. + */ + xscom_write(flat_chip_id, nx_dma_engine_fir, PPC_BIT(38)); + + /* Send an HMI event. */ + queue_hmi_event(hmi_evt, 0, out_flags); +} + +static bool phb_is_npu2(struct dt_node *dn) +{ + return (dt_node_is_compatible(dn, "ibm,power9-npu-pciex") || + dt_node_is_compatible(dn, "ibm,power9-npu-opencapi-pciex")); +} + +static void add_npu2_xstop_reason(uint32_t *xstop_reason, uint8_t reason) +{ + int i, reason_count; + uint8_t *ptr; + + reason_count = sizeof(*xstop_reason) / sizeof(reason); + ptr = (uint8_t *) xstop_reason; + for (i = 0; i < reason_count; i++) { + if (*ptr == 0) { + *ptr = reason; + break; + } + ptr++; + } +} + +static void encode_npu2_xstop_reason(uint32_t *xstop_reason, + uint64_t fir, int fir_number) +{ + int bit; + uint8_t reason; + + /* + * There are three 64-bit FIRs but the xstop reason field of + * the hmi event is only 32-bit. Encode which FIR bit is set as: + * - 2 bits for the FIR number + * - 6 bits for the bit number (0 -> 63) + * + * So we could even encode up to 4 reasons for the HMI, if + * that can ever happen + */ + while (fir) { + bit = ilog2(fir); + reason = fir_number << 6; + reason |= (63 - bit); // IBM numbering + add_npu2_xstop_reason(xstop_reason, reason); + fir ^= 1ULL << bit; + } +} + +static void find_npu2_checkstop_reason(int flat_chip_id, + struct OpalHMIEvent *hmi_evt, + uint64_t *out_flags) +{ + struct phb *phb; + int i; + bool npu2_hmi_verbose = false, found = false; + uint64_t npu2_fir; + uint64_t npu2_fir_mask; + uint64_t npu2_fir_action0; + uint64_t npu2_fir_action1; + uint64_t npu2_fir_addr; + uint64_t npu2_fir_mask_addr; + uint64_t npu2_fir_action0_addr; + uint64_t npu2_fir_action1_addr; + uint64_t fatal_errors; + uint32_t xstop_reason = 0; + int total_errors = 0; + const char *loc; + + /* NPU2 only */ + if (PVR_TYPE(mfspr(SPR_PVR)) != PVR_TYPE_P9) + return; + + /* Find the NPU on the chip associated with the HMI. */ + for_each_phb(phb) { + /* NOTE: if a chip ever has >1 NPU this will need adjusting */ + if (phb_is_npu2(phb->dt_node) && + (dt_get_chip_id(phb->dt_node) == flat_chip_id)) { + found = true; + break; + } + } + + /* If we didn't find a NPU on the chip, it's not our checkstop. */ + if (!found) + return; + + npu2_fir_addr = NPU2_FIR_REGISTER_0; + npu2_fir_mask_addr = NPU2_FIR_REGISTER_0 + NPU2_FIR_MASK_OFFSET; + npu2_fir_action0_addr = NPU2_FIR_REGISTER_0 + NPU2_FIR_ACTION0_OFFSET; + npu2_fir_action1_addr = NPU2_FIR_REGISTER_0 + NPU2_FIR_ACTION1_OFFSET; + + for (i = 0; i < NPU2_TOTAL_FIR_REGISTERS; i++) { + /* Read all the registers necessary to find a checkstop condition. */ + if (xscom_read(flat_chip_id, npu2_fir_addr, &npu2_fir) || + xscom_read(flat_chip_id, npu2_fir_mask_addr, &npu2_fir_mask) || + xscom_read(flat_chip_id, npu2_fir_action0_addr, &npu2_fir_action0) || + xscom_read(flat_chip_id, npu2_fir_action1_addr, &npu2_fir_action1)) { + prerror("HMI: Couldn't read NPU FIR register%d with XSCOM\n", i); + continue; + } + + fatal_errors = npu2_fir & ~npu2_fir_mask & npu2_fir_action0 & npu2_fir_action1; + + if (fatal_errors) { + loc = chip_loc_code(flat_chip_id); + if (!loc) + loc = "Not Available"; + prlog(PR_ERR, "NPU: [Loc: %s] P:%d FIR#%d FIR 0x%016llx mask 0x%016llx\n", + loc, flat_chip_id, i, npu2_fir, npu2_fir_mask); + prlog(PR_ERR, "NPU: [Loc: %s] P:%d ACTION0 0x%016llx, ACTION1 0x%016llx\n", + loc, flat_chip_id, npu2_fir_action0, npu2_fir_action1); + total_errors++; + + encode_npu2_xstop_reason(&xstop_reason, fatal_errors, i); + } + + /* Can't do a fence yet, we are just logging fir information for now */ + npu2_fir_addr += NPU2_FIR_OFFSET; + npu2_fir_mask_addr += NPU2_FIR_OFFSET; + npu2_fir_action0_addr += NPU2_FIR_OFFSET; + npu2_fir_action1_addr += NPU2_FIR_OFFSET; + + } + + if (!total_errors) + return; + + npu2_hmi_verbose = nvram_query_eq_safe("npu2-hmi-verbose", "true"); + /* Force this for now until we sort out something better */ + npu2_hmi_verbose = true; + + if (npu2_hmi_verbose) { + npu2_dump_scoms(flat_chip_id); + prlog(PR_ERR, " _________________________ \n"); + prlog(PR_ERR, "< It's Debug time! >\n"); + prlog(PR_ERR, " ------------------------- \n"); + prlog(PR_ERR, " \\ ,__, \n"); + prlog(PR_ERR, " \\ (oo)____ \n"); + prlog(PR_ERR, " (__) )\\ \n"); + prlog(PR_ERR, " ||--|| * \n"); + } + + /* Set up the HMI event */ + hmi_evt->severity = OpalHMI_SEV_WARNING; + hmi_evt->type = OpalHMI_ERROR_MALFUNC_ALERT; + hmi_evt->u.xstop_error.xstop_type = CHECKSTOP_TYPE_NPU; + hmi_evt->u.xstop_error.xstop_reason = cpu_to_be32(xstop_reason); + hmi_evt->u.xstop_error.u.chip_id = cpu_to_be32(flat_chip_id); + + /* Marking the event as recoverable so that we don't crash */ + queue_hmi_event(hmi_evt, 1, out_flags); +} + +static void find_npu_checkstop_reason(int flat_chip_id, + struct OpalHMIEvent *hmi_evt, + uint64_t *out_flags) +{ + struct phb *phb; + struct npu *p = NULL; + + uint64_t npu_fir; + uint64_t npu_fir_mask; + uint64_t npu_fir_action0; + uint64_t npu_fir_action1; + uint64_t fatal_errors; + + /* Only check for NPU errors if the chip has a NPU */ + if (PVR_TYPE(mfspr(SPR_PVR)) != PVR_TYPE_P8NVL) + return find_npu2_checkstop_reason(flat_chip_id, hmi_evt, out_flags); + + /* Find the NPU on the chip associated with the HMI. */ + for_each_phb(phb) { + /* NOTE: if a chip ever has >1 NPU this will need adjusting */ + if (dt_node_is_compatible(phb->dt_node, "ibm,power8-npu-pciex") && + (dt_get_chip_id(phb->dt_node) == flat_chip_id)) { + p = phb_to_npu(phb); + break; + } + } + + /* If we didn't find a NPU on the chip, it's not our checkstop. */ + if (p == NULL) + return; + + /* Read all the registers necessary to find a checkstop condition. */ + if (xscom_read(flat_chip_id, + p->at_xscom + NX_FIR, &npu_fir) || + xscom_read(flat_chip_id, + p->at_xscom + NX_FIR_MASK, &npu_fir_mask) || + xscom_read(flat_chip_id, + p->at_xscom + NX_FIR_ACTION0, &npu_fir_action0) || + xscom_read(flat_chip_id, + p->at_xscom + NX_FIR_ACTION1, &npu_fir_action1)) { + prerror("Couldn't read NPU registers with XSCOM\n"); + return; + } + + fatal_errors = npu_fir & ~npu_fir_mask & npu_fir_action0 & npu_fir_action1; + + /* If there's no errors, we don't need to do anything. */ + if (!fatal_errors) + return; + + prlog(PR_DEBUG, "NPU: FIR 0x%016llx mask 0x%016llx\n", + npu_fir, npu_fir_mask); + prlog(PR_DEBUG, "NPU: ACTION0 0x%016llx, ACTION1 0x%016llx\n", + npu_fir_action0, npu_fir_action1); + + /* Set the NPU to fenced since it can't recover. */ + npu_set_fence_state(p, true); + + /* Set up the HMI event */ + hmi_evt->severity = OpalHMI_SEV_WARNING; + hmi_evt->type = OpalHMI_ERROR_MALFUNC_ALERT; + hmi_evt->u.xstop_error.xstop_type = CHECKSTOP_TYPE_NPU; + hmi_evt->u.xstop_error.u.chip_id = cpu_to_be32(flat_chip_id); + + /* The HMI is "recoverable" because it shouldn't crash the system */ + queue_hmi_event(hmi_evt, 1, out_flags); +} + +static void decode_malfunction(struct OpalHMIEvent *hmi_evt, uint64_t *out_flags) +{ + int i; + uint64_t malf_alert, flags; + + flags = 0; + + if (!setup_scom_addresses()) { + prerror("Failed to setup scom addresses\n"); + /* Send an unknown HMI event. */ + hmi_evt->u.xstop_error.xstop_type = CHECKSTOP_TYPE_UNKNOWN; + hmi_evt->u.xstop_error.xstop_reason = 0; + queue_hmi_event(hmi_evt, false, out_flags); + return; + } + + xscom_read(this_cpu()->chip_id, malf_alert_scom, &malf_alert); + + if (!malf_alert) + return; + + for (i = 0; i < 64; i++) { + if (malf_alert & PPC_BIT(i)) { + xscom_write(this_cpu()->chip_id, malf_alert_scom, + ~PPC_BIT(i)); + find_capp_checkstop_reason(i, hmi_evt, &flags); + find_nx_checkstop_reason(i, hmi_evt, &flags); + find_npu_checkstop_reason(i, hmi_evt, &flags); + } + } + + find_core_checkstop_reason(hmi_evt, &flags); + + /* + * If we fail to find checkstop reason, send an unknown HMI event. + */ + if (!(flags & OPAL_HMI_FLAGS_NEW_EVENT)) { + hmi_evt->u.xstop_error.xstop_type = CHECKSTOP_TYPE_UNKNOWN; + hmi_evt->u.xstop_error.xstop_reason = 0; + queue_hmi_event(hmi_evt, false, &flags); + } + *out_flags |= flags; +} + +/* + * This will "rendez-vous" all threads on the core to the rendez-vous + * id "sig". You need to make sure that "sig" is different from the + * previous rendez vous. The sig value must be between 0 and 7 with + * boot time being set to 0. + * + * Note: in theory, we could just use a flip flop "sig" in the thread + * structure (binary rendez-vous with no argument). This is a bit more + * debuggable and better at handling timeouts (arguably). + * + * This should be called with the no lock held + */ +static void hmi_rendez_vous(uint32_t sig) +{ + struct cpu_thread *t = this_cpu(); + uint32_t my_id = cpu_get_thread_index(t); + uint32_t my_shift = my_id << 2; + uint32_t *sptr = t->core_hmi_state_ptr; + uint32_t val, prev, shift, i; + uint64_t timeout; + + assert(sig <= 0x7); + + /* + * Mark ourselves as having reached the rendez vous point with + * the exit bit cleared + */ + do { + val = prev = *sptr; + val &= ~(0xfu << my_shift); + val |= sig << my_shift; + } while (cmpxchg32(sptr, prev, val) != prev); + + /* + * Wait for everybody else to reach that point, ignore the + * exit bit as another thread could have already set it. + */ + for (i = 0; i < cpu_thread_count; i++) { + shift = i << 2; + + timeout = TIMEOUT_LOOPS; + while (((*sptr >> shift) & 0x7) != sig && --timeout) + cpu_relax(); + if (!timeout) + prlog(PR_ERR, "Rendez-vous stage 1 timeout, CPU 0x%x" + " waiting for thread %d (sptr=%08x)\n", + t->pir, i, *sptr); + } + + /* Set the exit bit */ + do { + val = prev = *sptr; + val &= ~(0xfu << my_shift); + val |= (sig | 8) << my_shift; + } while (cmpxchg32(sptr, prev, val) != prev); + + /* At this point, we need to wait for everybody else to have a value + * that is *not* sig. IE. they either have set the exit bit *or* they + * have changed the rendez-vous (meaning they have moved on to another + * rendez vous point). + */ + for (i = 0; i < cpu_thread_count; i++) { + shift = i << 2; + + timeout = TIMEOUT_LOOPS; + while (((*sptr >> shift) & 0xf) == sig && --timeout) + cpu_relax(); + if (!timeout) + prlog(PR_ERR, "Rendez-vous stage 2 timeout, CPU 0x%x" + " waiting for thread %d (sptr=%08x)\n", + t->pir, i, *sptr); + } +} + +static void hmi_print_debug(const uint8_t *msg, uint64_t hmer) +{ + const char *loc; + uint32_t core_id, thread_index; + + core_id = pir_to_core_id(this_cpu()->pir); + thread_index = cpu_get_thread_index(this_cpu()); + + loc = chip_loc_code(this_cpu()->chip_id); + if (!loc) + loc = "Not Available"; + + /* Also covers P10 SPR_HMER_TFAC_SHADOW_XFER_ERROR */ + if (hmer & (SPR_HMER_TFAC_ERROR | SPR_HMER_TFMR_PARITY_ERROR)) { + prlog(PR_DEBUG, "[Loc: %s]: P:%d C:%d T:%d: TFMR(%016lx) %s\n", + loc, this_cpu()->chip_id, core_id, thread_index, + mfspr(SPR_TFMR), msg); + } else { + prlog(PR_DEBUG, "[Loc: %s]: P:%d C:%d T:%d: %s\n", + loc, this_cpu()->chip_id, core_id, thread_index, + msg); + } +} + +static int handle_thread_tfac_error(uint64_t tfmr, uint64_t *out_flags) +{ + int recover = 1; + + if (tfmr & SPR_TFMR_DEC_PARITY_ERR) + *out_flags |= OPAL_HMI_FLAGS_DEC_LOST; + if (!tfmr_recover_local_errors(tfmr)) + recover = 0; + tfmr &= ~(SPR_TFMR_PURR_PARITY_ERR | + SPR_TFMR_SPURR_PARITY_ERR | + SPR_TFMR_DEC_PARITY_ERR); + return recover; +} + +static int64_t opal_handle_hmi(void); + +static void opal_handle_hmi_job(void *data __unused) +{ + opal_handle_hmi(); +} + +/* + * Queue hmi handling job If secondaries are still in OPAL + * This function is called by thread 0. + */ +static struct cpu_job **hmi_kick_secondaries(void) +{ + struct cpu_thread *ts = this_cpu(); + struct cpu_job **hmi_jobs = NULL; + int job_sz = sizeof(struct cpu_job *) * cpu_thread_count; + int i; + + for (i = 1; i < cpu_thread_count; i++) { + ts = next_cpu(ts); + + /* Is this thread still in OPAL ? */ + if (ts->state == cpu_state_active) { + if (!hmi_jobs) { + hmi_jobs = zalloc(job_sz); + assert(hmi_jobs); + } + + prlog(PR_DEBUG, "Sending hmi job to thread %d\n", i); + hmi_jobs[i] = cpu_queue_job(ts, "handle_hmi_job", + opal_handle_hmi_job, NULL); + } + } + return hmi_jobs; +} + +static int handle_all_core_tfac_error(uint64_t tfmr, uint64_t *out_flags) +{ + struct cpu_thread *t, *t0; + int recover = -1; + struct cpu_job **hmi_jobs = NULL; + + t = this_cpu(); + t0 = find_cpu_by_pir(cpu_get_thread0(t)); + + if (t == t0 && t0->state == cpu_state_os) + hmi_jobs = hmi_kick_secondaries(); + + /* Rendez vous all threads */ + hmi_rendez_vous(1); + + /* We use a lock here as some of the TFMR bits are shared and I + * prefer avoiding doing the cleanup simultaneously. + */ + lock(&hmi_lock); + + /* First handle corrupt TFMR otherwise we can't trust anything. + * We'll use a lock here so that the threads don't try to do it at + * the same time + */ + if (tfmr & SPR_TFMR_TFMR_CORRUPT) { + /* Check if it's still in error state */ + if (mfspr(SPR_TFMR) & SPR_TFMR_TFMR_CORRUPT) + if (!recover_corrupt_tfmr()) { + unlock(&hmi_lock); + recover = 0; + goto error_out; + } + + tfmr = mfspr(SPR_TFMR); + + /* We could have got new thread errors in the meantime */ + if (tfmr & SPR_TFMR_THREAD_ERRORS) { + recover = handle_thread_tfac_error(tfmr, out_flags); + tfmr &= ~SPR_TFMR_THREAD_ERRORS; + } + if (!recover) { + unlock(&hmi_lock); + goto error_out; + } + } + + /* Tell the OS ... */ + if (tfmr & SPR_TFMR_HDEC_PARITY_ERROR) + *out_flags |= OPAL_HMI_FLAGS_HDEC_LOST; + + /* Cleanup bad HDEC or TB on all threads or subcures before we clear + * the error conditions + */ + tfmr_cleanup_core_errors(tfmr); + + /* Unlock before next rendez-vous */ + unlock(&hmi_lock); + + /* Second rendez vous, ensure the above cleanups are all done before + * we proceed further + */ + hmi_rendez_vous(2); + + /* We can now clear the error conditions in the core. */ + recover = tfmr_clear_core_errors(tfmr); + if (recover == 0) + goto error_out; + + /* Third rendez-vous. We could in theory do the timebase resync as + * part of the previous one, but I prefer having all the error + * conditions cleared before we start trying. + */ + hmi_rendez_vous(3); + + /* Now perform the actual TB recovery on thread 0 */ + if (t == t0) + recover = chiptod_recover_tb_errors(&this_cpu()->tb_resynced); + +error_out: + /* Last rendez-vous */ + hmi_rendez_vous(4); + + /* Now all threads have gone past rendez-vous 3 and not yet past another + * rendez-vous 1, so the value of tb_resynced of thread 0 of the core + * contains an accurate indication as to whether the timebase was lost. + */ + if (t0->tb_resynced) + *out_flags |= OPAL_HMI_FLAGS_TB_RESYNC; + + if (t == t0 && hmi_jobs) { + int i; + for (i = 1; i < cpu_thread_count; i++) + if (hmi_jobs[i]) + cpu_wait_job(hmi_jobs[i], true); + free(hmi_jobs); + } + + return recover; +} + +static uint64_t read_tfmr_t0(void) +{ + uint64_t tfmr_t0; + uint32_t chip_id = this_cpu()->chip_id; + uint32_t core_id = pir_to_core_id(this_cpu()->pir); + + lock(&hmi_lock); + + xscom_write(chip_id, XSCOM_ADDR_P9_EC(core_id, P9_SCOM_SPRC), + SETFIELD(P9_SCOMC_SPR_SELECT, 0, P9_SCOMC_TFMR_T0)); + xscom_read(chip_id, XSCOM_ADDR_P9_EC(core_id, P9_SCOM_SPRD), + &tfmr_t0); + unlock(&hmi_lock); + return tfmr_t0; +} + +/* P9 errata: In theory, an HDEC error is sent to all threads. However, + * due to an errata on P9 where TFMR bit 26 (HDEC parity) cannot be + * cleared on thread 1..3, I am not confident we can do a rendez-vous + * in all cases. + * + * Our current approach is to ignore that error unless it is present + * on thread 0 TFMR. Also, ignore TB residue error due to a similar + * errata as above. + */ +static void validate_latched_errors(uint64_t *tfmr) +{ + if ((*tfmr & (SPR_TFMR_HDEC_PARITY_ERROR | SPR_TFMR_TB_RESIDUE_ERR)) + && this_cpu()->is_secondary) { + uint64_t tfmr_t0 = read_tfmr_t0(); + + if (!(tfmr_t0 & SPR_TFMR_HDEC_PARITY_ERROR)) + *tfmr &= ~SPR_TFMR_HDEC_PARITY_ERROR; + + if (!(tfmr_t0 & SPR_TFMR_TB_RESIDUE_ERR)) + *tfmr &= ~SPR_TFMR_TB_RESIDUE_ERR; + } +} + +static int handle_tfac_errors(struct OpalHMIEvent *hmi_evt, uint64_t *out_flags) +{ + int recover = -1; + uint64_t tfmr = mfspr(SPR_TFMR); + + /* Initialize the hmi event with old value of TFMR */ + hmi_evt->tfmr = cpu_to_be64(tfmr); + + /* A TFMR parity/corrupt error makes us ignore all the local stuff.*/ + if (tfmr & SPR_TFMR_TFMR_CORRUPT) { + /* Mark TB as invalid for now as we don't trust TFMR, we'll fix + * it up later + */ + this_cpu()->tb_invalid = true; + goto bad_tfmr; + } + + this_cpu()->tb_invalid = !(tfmr & SPR_TFMR_TB_VALID); + + if (proc_gen == proc_gen_p9) + validate_latched_errors(&tfmr); + + /* First, handle thread local errors */ + if (tfmr & SPR_TFMR_THREAD_ERRORS) { + recover = handle_thread_tfac_error(tfmr, out_flags); + tfmr &= ~SPR_TFMR_THREAD_ERRORS; + } + + bad_tfmr: + + /* Let's see if we still have a all-core error to deal with, if + * not, we just bail out + */ + if (tfmr & SPR_TFMR_CORE_ERRORS) { + int recover2; + + /* Only update "recover" if it's not already 0 (non-recovered) + */ + recover2 = handle_all_core_tfac_error(tfmr, out_flags); + if (recover != 0) + recover = recover2; + } else if (tfmr & SPR_TFMR_CHIP_TOD_INTERRUPT) { + int recover2; + + /* + * There are some TOD errors which do not affect working of + * TOD and TB. They stay in valid state. Hence we don't need + * rendez vous. + * + * TOD errors that affects TOD/TB will report a global error + * on TFMR alongwith bit 51, and they will go in rendez vous. + */ + recover2 = chiptod_recover_tod_errors(); + if (recover != 0) + recover = recover2; + } else if (this_cpu()->tb_invalid) { + /* This shouldn't happen, TB is invalid and no global error + * was reported. We just return for now assuming one will + * be. We can't do a rendez vous without a core-global HMI. + */ + prlog(PR_ERR, "HMI: TB invalid without core error reported ! " + "CPU=%x, TFMR=0x%016lx\n", this_cpu()->pir, + mfspr(SPR_TFMR)); + } + + if (recover != -1 && hmi_evt) { + hmi_evt->severity = OpalHMI_SEV_ERROR_SYNC; + hmi_evt->type = OpalHMI_ERROR_TFAC; + queue_hmi_event(hmi_evt, recover, out_flags); + } + + /* Set the TB state looking at TFMR register before we head out. */ + this_cpu()->tb_invalid = !(mfspr(SPR_TFMR) & SPR_TFMR_TB_VALID); + + if (this_cpu()->tb_invalid) { + *out_flags |= OPAL_HMI_FLAGS_TOD_TB_FAIL; + prlog(PR_WARNING, "Failed to get TB in running state! " + "CPU=%x, TFMR=%016lx\n", this_cpu()->pir, + mfspr(SPR_TFMR)); + } + + return recover; +} + +static int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt, + uint64_t *out_flags) +{ + struct cpu_thread *cpu = this_cpu(); + int recover = 1; + uint64_t handled = 0; + + prlog(PR_DEBUG, "Received HMI interrupt: HMER = 0x%016llx\n", hmer); + /* Initialize the hmi event with old value of HMER */ + if (hmi_evt) + hmi_evt->hmer = cpu_to_be64(hmer); + + /* Handle Timer/TOD errors separately */ + if (hmer & (SPR_HMER_TFAC_ERROR | SPR_HMER_TFMR_PARITY_ERROR)) { + hmi_print_debug("Timer Facility Error", hmer); + handled = hmer & (SPR_HMER_TFAC_ERROR | SPR_HMER_TFMR_PARITY_ERROR); + mtspr(SPR_HMER, ~handled); + recover = handle_tfac_errors(hmi_evt, out_flags); + handled = 0; + } + + lock(&hmi_lock); + /* + * Not all HMIs would move TB into invalid state. Set the TB state + * looking at TFMR register. TFMR will tell us correct state of + * TB register. + */ + if (hmer & SPR_HMER_PROC_RECV_DONE) { + uint32_t chip_id = pir_to_chip_id(cpu->pir); + uint32_t core_id = pir_to_core_id(cpu->pir); + uint64_t core_wof; + + hmi_print_debug("Processor recovery occurred.", hmer); + if (!read_core_wof(chip_id, core_id, &core_wof)) { + int i; + + prlog(PR_DEBUG, "Core WOF = 0x%016llx recovered error:\n", core_wof); + if (proc_gen <= proc_gen_p9) { + for (i = 0; i < ARRAY_SIZE(p9_recoverable_bits); i++) { + if (core_wof & PPC_BIT(p9_recoverable_bits[i].bit)) + prlog(PR_DEBUG, " %s\n", p9_recoverable_bits[i].reason); + } + } else if (proc_gen == proc_gen_p10) { + for (i = 0; i < ARRAY_SIZE(p10_core_fir_bits); i++) { + if (core_wof & PPC_BIT(p10_core_fir_bits[i].bit)) + prlog(PR_DEBUG, " %s\n", p10_core_fir_bits[i].reason); + } + } + } + + handled |= SPR_HMER_PROC_RECV_DONE; + if (cpu_is_thread0(cpu) && hmi_evt) { + hmi_evt->severity = OpalHMI_SEV_NO_ERROR; + hmi_evt->type = OpalHMI_ERROR_PROC_RECOV_DONE; + queue_hmi_event(hmi_evt, recover, out_flags); + } + } + + if ((proc_gen <= proc_gen_p9) && (hmer & SPR_HMER_PROC_RECV_ERROR_MASKED)) { + handled |= SPR_HMER_PROC_RECV_ERROR_MASKED; + if (cpu_is_thread0(cpu) && hmi_evt) { + hmi_evt->severity = OpalHMI_SEV_NO_ERROR; + hmi_evt->type = OpalHMI_ERROR_PROC_RECOV_MASKED; + queue_hmi_event(hmi_evt, recover, out_flags); + } + hmi_print_debug("Processor recovery Done (masked).", hmer); + } + + if (hmer & SPR_HMER_PROC_RECV_AGAIN) { + handled |= SPR_HMER_PROC_RECV_AGAIN; + if (cpu_is_thread0(cpu) && hmi_evt) { + hmi_evt->severity = OpalHMI_SEV_NO_ERROR; + hmi_evt->type = OpalHMI_ERROR_PROC_RECOV_DONE_AGAIN; + queue_hmi_event(hmi_evt, recover, out_flags); + } + hmi_print_debug("Processor recovery occurred again before" + "bit2 was cleared\n", hmer); + } + + /* XXX: what to do with this? */ + if (hmer & SPR_HMER_SPURR_SCALE_LIMIT) { + handled |= SPR_HMER_SPURR_SCALE_LIMIT; + if (cpu_is_thread0(cpu) && hmi_evt) { + hmi_evt->severity = OpalHMI_SEV_NO_ERROR; + hmi_evt->type = OpalHMI_ERROR_PROC_RECOV_DONE; + queue_hmi_event(hmi_evt, recover, out_flags); + } + hmi_print_debug("Turbo versus nominal frequency exceeded limit.", hmer); + } + + /* Assert if we see malfunction alert, we can not continue. */ + if (hmer & SPR_HMER_MALFUNCTION_ALERT) { + handled |= SPR_HMER_MALFUNCTION_ALERT; + + hmi_print_debug("Malfunction Alert", hmer); + recover = 0; + if (hmi_evt) + decode_malfunction(hmi_evt, out_flags); + } + + /* Assert if we see Hypervisor resource error, we can not continue. */ + if ((proc_gen <= proc_gen_p9) && (hmer & SPR_HMER_HYP_RESOURCE_ERR)) { + handled |= SPR_HMER_HYP_RESOURCE_ERR; + + hmi_print_debug("Hypervisor resource error", hmer); + recover = 0; + if (hmi_evt) { + hmi_evt->severity = OpalHMI_SEV_FATAL; + hmi_evt->type = OpalHMI_ERROR_HYP_RESOURCE; + queue_hmi_event(hmi_evt, recover, out_flags); + } + } + + /* XXX: what to do with this? */ + if ((proc_gen <= proc_gen_p9) && (hmer & SPR_HMER_THD_WAKE_BLOCKED_TM_SUSPEND)) { + handled |= SPR_HMER_THD_WAKE_BLOCKED_TM_SUSPEND; + hmer &= ~SPR_HMER_THD_WAKE_BLOCKED_TM_SUSPEND; + + hmi_print_debug("Attempted to wake thread when threads in TM suspend mode.", hmer); + if (hmi_evt) { + hmi_evt->severity = OpalHMI_SEV_NO_ERROR; + hmi_evt->type = OpalHMI_ERROR_PROC_RECOV_DONE, + queue_hmi_event(hmi_evt, recover, out_flags); + } + } + + if ((proc_gen <= proc_gen_p9) && (hmer & SPR_HMER_TRIG_FIR_HMI)) { + handled |= SPR_HMER_TRIG_FIR_HMI; + hmer &= ~SPR_HMER_TRIG_FIR_HMI; + + hmi_print_debug("Clearing unknown debug trigger", hmer); + if (hmi_evt) { + hmi_evt->severity = OpalHMI_SEV_NO_ERROR; + hmi_evt->type = OpalHMI_ERROR_DEBUG_TRIG_FIR, + queue_hmi_event(hmi_evt, recover, out_flags); + } + } + if ((proc_gen == proc_gen_p10) && (hmer & SPR_HMER_P10_TRIG_FIR_HMI)) { + handled |= SPR_HMER_P10_TRIG_FIR_HMI; + hmer &= ~SPR_HMER_P10_TRIG_FIR_HMI; + + hmi_print_debug("Clearing unknown debug trigger", hmer); + if (hmi_evt) { + hmi_evt->severity = OpalHMI_SEV_NO_ERROR; + hmi_evt->type = OpalHMI_ERROR_DEBUG_TRIG_FIR, + queue_hmi_event(hmi_evt, recover, out_flags); + } + } + + if (recover == 0) + disable_fast_reboot("Unrecoverable HMI"); + /* + * HMER bits are sticky, once set to 1 they remain set to 1 until + * they are set to 0. Reset the error source bit to 0, otherwise + * we keep getting HMI interrupt again and again. Writing to HMER + * acts as an AND, so we write mask of all 1's except for the bits + * we want to clear. + */ + mtspr(SPR_HMER, ~handled); + unlock(&hmi_lock); + return recover; +} + +static int64_t opal_handle_hmi(void) +{ + uint64_t hmer, dummy_flags; + struct OpalHMIEvent hmi_evt; + + /* + * Compiled time check to see size of OpalHMIEvent do not exceed + * that of struct opal_msg. + */ + BUILD_ASSERT(sizeof(struct opal_msg) >= sizeof(struct OpalHMIEvent)); + + memset(&hmi_evt, 0, sizeof(struct OpalHMIEvent)); + hmi_evt.version = OpalHMIEvt_V2; + + hmer = mfspr(SPR_HMER); /* Get HMER register value */ + handle_hmi_exception(hmer, &hmi_evt, &dummy_flags); + + return OPAL_SUCCESS; +} +opal_call(OPAL_HANDLE_HMI, opal_handle_hmi, 0); + +static int64_t opal_handle_hmi2(__be64 *out_flags) +{ + uint64_t hmer, flags = 0; + struct OpalHMIEvent hmi_evt; + + /* + * Compiled time check to see size of OpalHMIEvent do not exceed + * that of struct opal_msg. + */ + BUILD_ASSERT(sizeof(struct opal_msg) >= sizeof(struct OpalHMIEvent)); + + memset(&hmi_evt, 0, sizeof(struct OpalHMIEvent)); + hmi_evt.version = OpalHMIEvt_V2; + + hmer = mfspr(SPR_HMER); /* Get HMER register value */ + handle_hmi_exception(hmer, &hmi_evt, &flags); + *out_flags = cpu_to_be64(flags); + + return OPAL_SUCCESS; +} +opal_call(OPAL_HANDLE_HMI2, opal_handle_hmi2, 1); diff --git a/roms/skiboot/core/i2c.c b/roms/skiboot/core/i2c.c new file mode 100644 index 000000000..b4313d430 --- /dev/null +++ b/roms/skiboot/core/i2c.c @@ -0,0 +1,288 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * I2C + * + * Copyright 2013-2019 IBM Corp. + */ + +#include <skiboot.h> +#include <i2c.h> +#include <opal.h> +#include <device.h> +#include <opal-msg.h> +#include <timebase.h> +#include <processor.h> +#include <timer.h> +#include <trace.h> + +static LIST_HEAD(i2c_bus_list); + +/* Used to assign OPAL IDs */ +static uint32_t i2c_next_bus; + +void i2c_add_bus(struct i2c_bus *bus) +{ + bus->opal_id = ++i2c_next_bus; + dt_add_property_cells(bus->dt_node, "ibm,opal-id", bus->opal_id); + + list_add_tail(&i2c_bus_list, &bus->link); +} + +struct i2c_bus *i2c_find_bus_by_id(uint32_t opal_id) +{ + struct i2c_bus *bus; + + list_for_each(&i2c_bus_list, bus, link) { + if (bus->opal_id == opal_id) + return bus; + } + return NULL; +} + +static inline void i2c_trace_req(struct i2c_request *req, int rc) +{ + struct trace_i2c t; + + memset(&t, 0, sizeof(t)); + + t.bus = req->bus->opal_id; + t.type = req->op | (req->offset_bytes << 4); + t.i2c_addr = req->dev_addr; + t.smbus_reg = req->offset & 0xffff; // FIXME: log whole offset + t.size = req->rw_len; + t.rc = rc; + + /* FIXME: trace should not be a union... */ + trace_add((void *)&t, TRACE_I2C, sizeof(t)); +} + +int64_t i2c_queue_req(struct i2c_request *req) +{ + int64_t ret = req->bus->queue_req(req); + + i2c_trace_req(req, OPAL_ASYNC_COMPLETION); + + if (!ret) + req->req_state = i2c_req_queued; + return ret; +} + +static void opal_i2c_request_complete(int rc, struct i2c_request *req) +{ + uint64_t token = (uint64_t)(unsigned long)req->user_data; + + opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL, + cpu_to_be64(token), + cpu_to_be64(rc)); + i2c_trace_req(req, rc); + + free(req); +} + +static int opal_i2c_request(uint64_t async_token, uint32_t bus_id, + struct opal_i2c_request *oreq) +{ + struct i2c_bus *bus = NULL; + struct i2c_request *req; + int rc; + + if (!opal_addr_valid(oreq)) + return OPAL_PARAMETER; + + if (oreq->flags & OPAL_I2C_ADDR_10) + return OPAL_UNSUPPORTED; + + bus = i2c_find_bus_by_id(bus_id); + if (!bus) { + /** + * @fwts-label I2CInvalidBusID + * @fwts-advice opal_i2c_request was passed an invalid bus + * ID. This has likely come from the OS rather than OPAL + * and thus could indicate an OS bug rather than an OPAL + * bug. + */ + prlog(PR_ERR, "I2C: Invalid 'bus_id' passed to the OPAL\n"); + return OPAL_PARAMETER; + } + + req = zalloc(sizeof(*req)); + if (!req) { + /** + * @fwts-label I2CFailedAllocation + * @fwts-advice OPAL failed to allocate memory for an + * i2c_request. This points to an OPAL bug as OPAL ran + * out of memory and this should never happen. + */ + prlog(PR_ERR, "I2C: Failed to allocate 'i2c_request'\n"); + return OPAL_NO_MEM; + } + + switch(oreq->type) { + case OPAL_I2C_RAW_READ: + req->op = I2C_READ; + break; + case OPAL_I2C_RAW_WRITE: + req->op = I2C_WRITE; + break; + case OPAL_I2C_SM_READ: + req->op = SMBUS_READ; + req->offset = be32_to_cpu(oreq->subaddr); + req->offset_bytes = oreq->subaddr_sz; + break; + case OPAL_I2C_SM_WRITE: + req->op = SMBUS_WRITE; + req->offset = be32_to_cpu(oreq->subaddr); + req->offset_bytes = oreq->subaddr_sz; + break; + default: + free(req); + return OPAL_PARAMETER; + } + req->dev_addr = be16_to_cpu(oreq->addr); + req->rw_len = be32_to_cpu(oreq->size); + req->rw_buf = (void *)be64_to_cpu(oreq->buffer_ra); + req->completion = opal_i2c_request_complete; + req->user_data = (void *)(unsigned long)async_token; + req->bus = bus; + + if (i2c_check_quirk(req, &rc)) { + free(req); + return rc; + } + + /* Finally, queue the OPAL i2c request and return */ + rc = i2c_queue_req(req); + if (rc) { + free(req); + return rc; + } + + return OPAL_ASYNC_COMPLETION; +} +opal_call(OPAL_I2C_REQUEST, opal_i2c_request, 3); + +#define MAX_NACK_RETRIES 2 +#define REQ_COMPLETE_POLLING 5 /* Check if req is complete + in 5ms interval */ +int64_t i2c_request_sync(struct i2c_request *req) +{ + uint64_t timer_period = msecs_to_tb(5), timer_count; + uint64_t time_to_wait = 0; + int64_t rc, waited, retries; + size_t i, count; + char buf[17]; /* 8 bytes in hex + NUL */ + + for (retries = 0; retries <= MAX_NACK_RETRIES; retries++) { + waited = 0; + timer_count = 0; + + i2c_queue_req(req); + + do { + time_to_wait = i2c_run_req(req); + if (!time_to_wait) + time_to_wait = REQ_COMPLETE_POLLING; + time_wait(time_to_wait); + waited += time_to_wait; + timer_count += time_to_wait; + if (timer_count > timer_period) { + /* + * The above request may be relying on + * timers to complete, yet there may + * not be called, especially during + * opal init. We could be looping here + * forever. So explicitly check the + * timers once in a while + */ + check_timers(false); + timer_count = 0; + } + } while (req->req_state != i2c_req_done); + + lwsync(); + rc = req->result; + + /* retry on NACK, otherwise exit */ + if (rc != OPAL_I2C_NACK_RCVD) + break; + req->req_state = i2c_req_new; + } + + i2c_trace_req(req, rc); + count = 0; + for (i = 0; i < req->rw_len && count < sizeof(buf); i++) { + count += snprintf(buf+count, sizeof(buf)-count, "%02x", + *(unsigned char *)(req->rw_buf+i)); + } + + prlog(PR_DEBUG, "I2C: %s req op=%x offset=%x buf=%s buflen=%d " + "delay=%lu/%lld rc=%lld\n", + (rc) ? "!!!!" : "----", req->op, req->offset, + buf, req->rw_len, tb_to_msecs(waited), req->timeout, rc); + + return rc; +} + +/** + * i2c_request_send - send request to i2c bus synchronously + * @bus_id: i2c bus id + * @dev_addr: address of the device + * @read_write: SMBUS_READ or SMBUS_WRITE + * @offset: any of the I2C interface offset defined + * @offset_bytes: offset size in bytes + * @buf: data to be read or written + * @buflen: buf length + * @timeout: request timeout in milliseconds + * + * Send an I2C request to a device synchronously + * + * Returns: Zero on success otherwise a negative error code + */ +int64_t i2c_request_send(int bus_id, int dev_addr, int read_write, + uint32_t offset, uint32_t offset_bytes, void* buf, + size_t buflen, int timeout) +{ + struct i2c_request *req; + struct i2c_bus *bus; + int64_t rc; + + bus = i2c_find_bus_by_id(bus_id); + if (!bus) { + /** + * @fwts-label I2CInvalidBusID + * @fwts-advice i2c_request_send was passed an invalid bus + * ID. This indicates a bug. + */ + prlog(PR_ERR, "I2C: Invalid bus_id=%x\n", bus_id); + return OPAL_PARAMETER; + } + + req = zalloc(sizeof(*req)); + if (!req) { + /** + * @fwts-label I2CAllocationFailed + * @fwts-advice OPAL failed to allocate memory for an + * i2c_request. This points to an OPAL bug as OPAL run out of + * memory and this should never happen. + */ + prlog(PR_ERR, "I2C: allocating i2c_request failed\n"); + return OPAL_INTERNAL_ERROR; + } + + req->bus = bus; + req->dev_addr = dev_addr; + req->op = read_write; + req->offset = offset; + req->offset_bytes = offset_bytes; + req->rw_buf = (void*) buf; + req->rw_len = buflen; + req->timeout = timeout; + + rc = i2c_request_sync(req); + + free(req); + if (rc) + return OPAL_HARDWARE; + + return OPAL_SUCCESS; +} diff --git a/roms/skiboot/core/init.c b/roms/skiboot/core/init.c new file mode 100644 index 000000000..a8bac28a8 --- /dev/null +++ b/roms/skiboot/core/init.c @@ -0,0 +1,1469 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * skiboot C entry point + * + * Copyright 2013-2019 IBM Corp. + */ + +#include <skiboot.h> +#include <psi.h> +#include <chiptod.h> +#include <nx.h> +#include <cpu.h> +#include <processor.h> +#include <xscom.h> +#include <opal.h> +#include <opal-msg.h> +#include <elf.h> +#include <elf-abi.h> +#include <io.h> +#include <cec.h> +#include <device.h> +#include <pci.h> +#include <lpc.h> +#include <i2c.h> +#include <chip.h> +#include <interrupts.h> +#include <mem_region.h> +#include <trace.h> +#include <console.h> +#include <fsi-master.h> +#include <centaur.h> +#include <ocmb.h> +#include <libfdt/libfdt.h> +#include <timer.h> +#include <ipmi.h> +#include <sensor.h> +#include <xive.h> +#include <nvram.h> +#include <vas.h> +#include <libstb/secureboot.h> +#include <libstb/trustedboot.h> +#include <phys-map.h> +#include <imc.h> +#include <dts.h> +#include <dio-p9.h> +#include <sbe-p9.h> +#include <debug_descriptor.h> +#include <occ.h> +#include <opal-dump.h> +#include <xscom-p10-regs.h> + +enum proc_gen proc_gen; +unsigned int pcie_max_link_speed; +bool pci_tracing; +bool verbose_eeh; +extern const char version[]; + +static uint64_t kernel_entry; +static size_t kernel_size; +static bool kernel_32bit; + +/* We backup the previous vectors here before copying our own */ +static uint8_t old_vectors[EXCEPTION_VECTORS_END]; + +#ifdef DEBUG +#define DEBUG_STR "-debug" +#else +#define DEBUG_STR "" +#endif + +#ifdef SKIBOOT_GCOV +void skiboot_gcov_done(void); +#endif + +struct debug_descriptor debug_descriptor = { + .eye_catcher = "OPALdbug", + .version = CPU_TO_BE32(DEBUG_DESC_VERSION), + .state_flags = 0, + .memcons_phys = 0, /* cpu_to_be64(&memcons) can't init constant */ + .trace_mask = 0, /* All traces disabled by default */ + /* console log level: + * high 4 bits in memory, low 4 bits driver (e.g. uart). */ +#ifdef DEBUG + .console_log_levels = (PR_TRACE << 4) | PR_DEBUG, +#else + .console_log_levels = (PR_DEBUG << 4) | PR_NOTICE, +#endif +}; + +static void checksum_romem(void); + +static bool try_load_elf64_le(struct elf_hdr *header) +{ + struct elf64le_hdr *kh = (struct elf64le_hdr *)header; + uint64_t load_base = (uint64_t)kh; + struct elf64le_phdr *ph; + unsigned int i; + + printf("INIT: 64-bit LE kernel discovered\n"); + + /* Look for a loadable program header that has our entry in it + * + * Note that we execute the kernel in-place, we don't actually + * obey the load informations in the headers. This is expected + * to work for the Linux Kernel because it's a fairly dumb ELF + * but it will not work for any ELF binary. + */ + ph = (struct elf64le_phdr *)(load_base + le64_to_cpu(kh->e_phoff)); + for (i = 0; i < le16_to_cpu(kh->e_phnum); i++, ph++) { + if (le32_to_cpu(ph->p_type) != ELF_PTYPE_LOAD) + continue; + if (le64_to_cpu(ph->p_vaddr) > le64_to_cpu(kh->e_entry) || + (le64_to_cpu(ph->p_vaddr) + le64_to_cpu(ph->p_memsz)) < + le64_to_cpu(kh->e_entry)) + continue; + + /* Get our entry */ + kernel_entry = le64_to_cpu(kh->e_entry) - + le64_to_cpu(ph->p_vaddr) + le64_to_cpu(ph->p_offset); + break; + } + + if (!kernel_entry) { + prerror("INIT: Failed to find kernel entry !\n"); + return false; + } + kernel_entry += load_base; + kernel_32bit = false; + + kernel_size = le64_to_cpu(kh->e_shoff) + + ((uint32_t)le16_to_cpu(kh->e_shentsize) * + (uint32_t)le16_to_cpu(kh->e_shnum)); + + prlog(PR_DEBUG, "INIT: 64-bit kernel entry at 0x%llx, size 0x%lx\n", + kernel_entry, kernel_size); + + return true; +} + +static bool try_load_elf64(struct elf_hdr *header) +{ + struct elf64be_hdr *kh = (struct elf64be_hdr *)header; + struct elf64le_hdr *khle = (struct elf64le_hdr *)header; + uint64_t load_base = (uint64_t)kh; + struct elf64be_phdr *ph; + struct elf64be_shdr *sh; + unsigned int i; + + /* Check it's a ppc64 LE ELF */ + if (khle->ei_ident == ELF_IDENT && + khle->ei_data == ELF_DATA_LSB && + le16_to_cpu(khle->e_machine) == ELF_MACH_PPC64) { + return try_load_elf64_le(header); + } + + /* Check it's a ppc64 ELF */ + if (kh->ei_ident != ELF_IDENT || + kh->ei_data != ELF_DATA_MSB || + be16_to_cpu(kh->e_machine) != ELF_MACH_PPC64) { + prerror("INIT: Kernel doesn't look like an ppc64 ELF\n"); + return false; + } + + /* Look for a loadable program header that has our entry in it + * + * Note that we execute the kernel in-place, we don't actually + * obey the load informations in the headers. This is expected + * to work for the Linux Kernel because it's a fairly dumb ELF + * but it will not work for any ELF binary. + */ + ph = (struct elf64be_phdr *)(load_base + be64_to_cpu(kh->e_phoff)); + for (i = 0; i < be16_to_cpu(kh->e_phnum); i++, ph++) { + if (be32_to_cpu(ph->p_type) != ELF_PTYPE_LOAD) + continue; + if (be64_to_cpu(ph->p_vaddr) > be64_to_cpu(kh->e_entry) || + (be64_to_cpu(ph->p_vaddr) + be64_to_cpu(ph->p_memsz)) < + be64_to_cpu(kh->e_entry)) + continue; + + /* Get our entry */ + kernel_entry = be64_to_cpu(kh->e_entry) - + be64_to_cpu(ph->p_vaddr) + be64_to_cpu(ph->p_offset); + break; + } + + if (!kernel_entry) { + prerror("INIT: Failed to find kernel entry !\n"); + return false; + } + + /* For the normal big-endian ELF ABI, the kernel entry points + * to a function descriptor in the data section. Linux instead + * has it point directly to code. Test whether it is pointing + * into an executable section or not to figure this out. Default + * to assuming it obeys the ABI. + */ + sh = (struct elf64be_shdr *)(load_base + be64_to_cpu(kh->e_shoff)); + for (i = 0; i < be16_to_cpu(kh->e_shnum); i++, sh++) { + if (be64_to_cpu(sh->sh_addr) <= be64_to_cpu(kh->e_entry) && + (be64_to_cpu(sh->sh_addr) + be64_to_cpu(sh->sh_size)) > + be64_to_cpu(kh->e_entry)) + break; + } + + if (i == be16_to_cpu(kh->e_shnum) || + !(be64_to_cpu(sh->sh_flags) & ELF_SFLAGS_X)) { + kernel_entry = *(uint64_t *)(kernel_entry + load_base); + kernel_entry = kernel_entry - + be64_to_cpu(ph->p_vaddr) + be64_to_cpu(ph->p_offset); + } + + kernel_entry += load_base; + kernel_32bit = false; + + kernel_size = be64_to_cpu(kh->e_shoff) + + ((uint32_t)be16_to_cpu(kh->e_shentsize) * + (uint32_t)be16_to_cpu(kh->e_shnum)); + + printf("INIT: 64-bit kernel entry at 0x%llx, size 0x%lx\n", + kernel_entry, kernel_size); + + return true; +} + +static bool try_load_elf32_le(struct elf_hdr *header) +{ + struct elf32le_hdr *kh = (struct elf32le_hdr *)header; + uint64_t load_base = (uint64_t)kh; + struct elf32le_phdr *ph; + unsigned int i; + + printf("INIT: 32-bit LE kernel discovered\n"); + + /* Look for a loadable program header that has our entry in it + * + * Note that we execute the kernel in-place, we don't actually + * obey the load informations in the headers. This is expected + * to work for the Linux Kernel because it's a fairly dumb ELF + * but it will not work for any ELF binary. + */ + ph = (struct elf32le_phdr *)(load_base + le32_to_cpu(kh->e_phoff)); + for (i = 0; i < le16_to_cpu(kh->e_phnum); i++, ph++) { + if (le32_to_cpu(ph->p_type) != ELF_PTYPE_LOAD) + continue; + if (le32_to_cpu(ph->p_vaddr) > le32_to_cpu(kh->e_entry) || + (le32_to_cpu(ph->p_vaddr) + le32_to_cpu(ph->p_memsz)) < + le32_to_cpu(kh->e_entry)) + continue; + + /* Get our entry */ + kernel_entry = le32_to_cpu(kh->e_entry) - + le32_to_cpu(ph->p_vaddr) + le32_to_cpu(ph->p_offset); + break; + } + + if (!kernel_entry) { + prerror("INIT: Failed to find kernel entry !\n"); + return false; + } + + kernel_entry += load_base; + kernel_32bit = true; + + printf("INIT: 32-bit kernel entry at 0x%llx\n", kernel_entry); + + return true; +} + +static bool try_load_elf32(struct elf_hdr *header) +{ + struct elf32be_hdr *kh = (struct elf32be_hdr *)header; + struct elf32le_hdr *khle = (struct elf32le_hdr *)header; + uint64_t load_base = (uint64_t)kh; + struct elf32be_phdr *ph; + unsigned int i; + + /* Check it's a ppc32 LE ELF */ + if (khle->ei_ident == ELF_IDENT && + khle->ei_data == ELF_DATA_LSB && + le16_to_cpu(khle->e_machine) == ELF_MACH_PPC32) { + return try_load_elf32_le(header); + } + + /* Check it's a ppc32 ELF */ + if (kh->ei_ident != ELF_IDENT || + kh->ei_data != ELF_DATA_MSB || + be16_to_cpu(kh->e_machine) != ELF_MACH_PPC32) { + prerror("INIT: Kernel doesn't look like an ppc32 ELF\n"); + return false; + } + + /* Look for a loadable program header that has our entry in it + * + * Note that we execute the kernel in-place, we don't actually + * obey the load informations in the headers. This is expected + * to work for the Linux Kernel because it's a fairly dumb ELF + * but it will not work for any ELF binary. + */ + ph = (struct elf32be_phdr *)(load_base + be32_to_cpu(kh->e_phoff)); + for (i = 0; i < be16_to_cpu(kh->e_phnum); i++, ph++) { + if (be32_to_cpu(ph->p_type) != ELF_PTYPE_LOAD) + continue; + if (be32_to_cpu(ph->p_vaddr) > be32_to_cpu(kh->e_entry) || + (be32_to_cpu(ph->p_vaddr) + be32_to_cpu(ph->p_memsz)) < + be32_to_cpu(kh->e_entry)) + continue; + + /* Get our entry */ + kernel_entry = be32_to_cpu(kh->e_entry) - + be32_to_cpu(ph->p_vaddr) + be32_to_cpu(ph->p_offset); + break; + } + + if (!kernel_entry) { + prerror("INIT: Failed to find kernel entry !\n"); + return false; + } + + kernel_entry += load_base; + kernel_32bit = true; + + printf("INIT: 32-bit kernel entry at 0x%llx\n", kernel_entry); + + return true; +} + +extern char __builtin_kernel_start[]; +extern char __builtin_kernel_end[]; +extern uint64_t boot_offset; + +static size_t initramfs_size; + +bool start_preload_kernel(void) +{ + int loaded; + + /* Try to load an external kernel payload through the platform hooks */ + kernel_size = KERNEL_LOAD_SIZE; + loaded = start_preload_resource(RESOURCE_ID_KERNEL, + RESOURCE_SUBID_NONE, + KERNEL_LOAD_BASE, + &kernel_size); + if (loaded != OPAL_SUCCESS) { + printf("INIT: platform start load kernel failed\n"); + kernel_size = 0; + return false; + } + + initramfs_size = INITRAMFS_LOAD_SIZE; + loaded = start_preload_resource(RESOURCE_ID_INITRAMFS, + RESOURCE_SUBID_NONE, + INITRAMFS_LOAD_BASE, &initramfs_size); + if (loaded != OPAL_SUCCESS) { + printf("INIT: platform start load initramfs failed\n"); + initramfs_size = 0; + return false; + } + + return true; +} + +static bool load_kernel(void) +{ + void *stb_container = NULL; + struct elf_hdr *kh; + int loaded; + + prlog(PR_NOTICE, "INIT: Waiting for kernel...\n"); + + loaded = wait_for_resource_loaded(RESOURCE_ID_KERNEL, + RESOURCE_SUBID_NONE); + + if (loaded != OPAL_SUCCESS) { + printf("INIT: platform wait for kernel load failed\n"); + kernel_size = 0; + } + + /* Try embedded kernel payload */ + if (!kernel_size) { + kernel_size = __builtin_kernel_end - __builtin_kernel_start; + if (kernel_size) { + /* Move the built-in kernel up */ + uint64_t builtin_base = + ((uint64_t)__builtin_kernel_start) - + SKIBOOT_BASE + boot_offset; + printf("Using built-in kernel\n"); + memmove(KERNEL_LOAD_BASE, (void*)builtin_base, + kernel_size); + } + } + + if (dt_has_node_property(dt_chosen, "kernel-base-address", NULL)) { + kernel_entry = dt_prop_get_u64(dt_chosen, + "kernel-base-address"); + prlog(PR_DEBUG, "INIT: Kernel image at 0x%llx\n", kernel_entry); + kh = (struct elf_hdr *)kernel_entry; + /* + * If the kernel is at 0, restore it as it was overwritten + * by our vectors. + */ + if (kernel_entry < EXCEPTION_VECTORS_END) { + cpu_set_sreset_enable(false); + memcpy_null(NULL, old_vectors, EXCEPTION_VECTORS_END); + sync_icache(); + } else { + /* Hack for STB in Mambo, assume at least 4kb in mem */ + if (!kernel_size) + kernel_size = SECURE_BOOT_HEADERS_SIZE; + if (stb_is_container((void*)kernel_entry, kernel_size)) { + stb_container = (void*)kernel_entry; + kh = (struct elf_hdr *) (kernel_entry + SECURE_BOOT_HEADERS_SIZE); + } else + kh = (struct elf_hdr *) (kernel_entry); + } + } else { + if (!kernel_size) { + printf("INIT: Assuming kernel at %p\n", + KERNEL_LOAD_BASE); + /* Hack for STB in Mambo, assume at least 4kb in mem */ + kernel_size = SECURE_BOOT_HEADERS_SIZE; + kernel_entry = (uint64_t)KERNEL_LOAD_BASE; + } + if (stb_is_container(KERNEL_LOAD_BASE, kernel_size)) { + stb_container = KERNEL_LOAD_BASE; + kh = (struct elf_hdr *) (KERNEL_LOAD_BASE + SECURE_BOOT_HEADERS_SIZE); + } else + kh = (struct elf_hdr *) (KERNEL_LOAD_BASE); + + } + + prlog(PR_DEBUG, + "INIT: Kernel loaded, size: %zu bytes (0 = unknown preload)\n", + kernel_size); + + if (kh->ei_ident != ELF_IDENT) { + prerror("INIT: ELF header not found. Assuming raw binary.\n"); + return true; + } + + if (kh->ei_class == ELF_CLASS_64) { + if (!try_load_elf64(kh)) + return false; + } else if (kh->ei_class == ELF_CLASS_32) { + if (!try_load_elf32(kh)) + return false; + } else { + prerror("INIT: Neither ELF32 not ELF64 ?\n"); + return false; + } + + if (chip_quirk(QUIRK_MAMBO_CALLOUTS)) { + secureboot_verify(RESOURCE_ID_KERNEL, + stb_container, + SECURE_BOOT_HEADERS_SIZE + kernel_size); + trustedboot_measure(RESOURCE_ID_KERNEL, + stb_container, + SECURE_BOOT_HEADERS_SIZE + kernel_size); + } + + return true; +} + +static void load_initramfs(void) +{ + uint64_t *initramfs_start; + void *stb_container = NULL; + int loaded; + + loaded = wait_for_resource_loaded(RESOURCE_ID_INITRAMFS, + RESOURCE_SUBID_NONE); + + if (loaded != OPAL_SUCCESS || !initramfs_size) + return; + + if (stb_is_container(INITRAMFS_LOAD_BASE, initramfs_size)) { + stb_container = INITRAMFS_LOAD_BASE; + initramfs_start = INITRAMFS_LOAD_BASE + SECURE_BOOT_HEADERS_SIZE; + } else { + initramfs_start = INITRAMFS_LOAD_BASE; + } + + dt_check_del_prop(dt_chosen, "linux,initrd-start"); + dt_check_del_prop(dt_chosen, "linux,initrd-end"); + + printf("INIT: Initramfs loaded, size: %zu bytes\n", initramfs_size); + + dt_add_property_u64(dt_chosen, "linux,initrd-start", + (uint64_t)initramfs_start); + dt_add_property_u64(dt_chosen, "linux,initrd-end", + (uint64_t)initramfs_start + initramfs_size); + + if (chip_quirk(QUIRK_MAMBO_CALLOUTS)) { + secureboot_verify(RESOURCE_ID_INITRAMFS, + stb_container, + SECURE_BOOT_HEADERS_SIZE + initramfs_size); + trustedboot_measure(RESOURCE_ID_INITRAMFS, + stb_container, + SECURE_BOOT_HEADERS_SIZE + initramfs_size); + } +} + +static void cpu_disable_ME_RI_one(void *param __unused) +{ + disable_machine_check(); + mtmsrd(0, 1); +} + +static int64_t cpu_disable_ME_RI_all(void) +{ + struct cpu_thread *cpu; + struct cpu_job **jobs; + + jobs = zalloc(sizeof(struct cpu_job *) * (cpu_max_pir + 1)); + assert(jobs); + + for_each_available_cpu(cpu) { + if (cpu == this_cpu()) + continue; + jobs[cpu->pir] = cpu_queue_job(cpu, "cpu_disable_ME_RI", + cpu_disable_ME_RI_one, NULL); + } + + /* this cpu */ + cpu_disable_ME_RI_one(NULL); + + for_each_available_cpu(cpu) { + if (jobs[cpu->pir]) + cpu_wait_job(jobs[cpu->pir], true); + } + + free(jobs); + + return OPAL_SUCCESS; +} + +static void *fdt; + +void __noreturn load_and_boot_kernel(bool is_reboot) +{ + const struct dt_property *memprop; + const char *cmdline, *stdoutp; + uint64_t mem_top; + + memprop = dt_find_property(dt_root, DT_PRIVATE "maxmem"); + if (memprop) + mem_top = (u64)dt_property_get_cell(memprop, 0) << 32 + | dt_property_get_cell(memprop, 1); + else /* XXX HB hack, might want to calc it */ + mem_top = 0x40000000; + + op_display(OP_LOG, OP_MOD_INIT, 0x000A); + + /* Load kernel LID */ + if (!load_kernel()) { + op_display(OP_FATAL, OP_MOD_INIT, 1); + abort(); + } + + load_initramfs(); + + trustedboot_exit_boot_services(); + + ipmi_set_fw_progress_sensor(IPMI_FW_OS_BOOT); + + + if (!is_reboot) { + /* We wait for the nvram read to complete here so we can + * grab stuff from there such as the kernel arguments + */ + nvram_wait_for_load(); + + if (!occ_sensors_init()) + dts_sensor_create_nodes(sensor_node); + + } else { + /* fdt will be rebuilt */ + free(fdt); + fdt = NULL; + + nvram_reinit(); + occ_pstates_init(); + } + + /* Use nvram bootargs over device tree */ + cmdline = nvram_query_safe("bootargs"); + if (cmdline) { + dt_check_del_prop(dt_chosen, "bootargs"); + dt_add_property_string(dt_chosen, "bootargs", cmdline); + prlog(PR_DEBUG, "INIT: Command line from NVRAM: %s\n", + cmdline); + } + + op_display(OP_LOG, OP_MOD_INIT, 0x000B); + + add_fast_reboot_dt_entries(); + + if (platform.finalise_dt) + platform.finalise_dt(is_reboot); + + /* Create the device tree blob to boot OS. */ + fdt = create_dtb(dt_root, false); + if (!fdt) { + op_display(OP_FATAL, OP_MOD_INIT, 2); + abort(); + } + + op_display(OP_LOG, OP_MOD_INIT, 0x000C); + + mem_dump_free(); + + /* Dump the selected console */ + stdoutp = dt_prop_get_def(dt_chosen, "linux,stdout-path", NULL); + prlog(PR_DEBUG, "INIT: stdout-path: %s\n", stdoutp ? stdoutp : ""); + + fdt_set_boot_cpuid_phys(fdt, this_cpu()->pir); + + /* Check there is something there before we branch to it */ + if (*(uint32_t *)kernel_entry == 0) { + prlog(PR_EMERG, "FATAL: Kernel is zeros, can't execute!\n"); + assert(0); + } + + if (platform.exit) + platform.exit(); + + /* Take processors out of nap */ + cpu_set_sreset_enable(false); + cpu_set_ipi_enable(false); + + printf("INIT: Starting kernel at 0x%llx, fdt at %p %u bytes\n", + kernel_entry, fdt, fdt_totalsize(fdt)); + + /* Disable machine checks on all */ + cpu_disable_ME_RI_all(); + + patch_traps(false); + cpu_set_hile_mode(false); /* Clear HILE on all CPUs */ + + /* init MPIPL */ + if (!is_reboot) + opal_mpipl_init(); + + checksum_romem(); + + debug_descriptor.state_flags |= OPAL_BOOT_COMPLETE; + + cpu_give_self_os(); + + if (kernel_32bit) + start_kernel32(kernel_entry, fdt, mem_top); + start_kernel(kernel_entry, fdt, mem_top); +} + +static void storage_keys_fixup(void) +{ + struct dt_node *cpus, *n; + + cpus = dt_find_by_path(dt_root, "/cpus"); + assert(cpus); + + if (proc_gen == proc_gen_unknown) + return; + + dt_for_each_child(cpus, n) { + /* There may be cache nodes in /cpus. */ + if (!dt_has_node_property(n, "device_type", "cpu") || + dt_has_node_property(n, "ibm,processor-storage-keys", NULL)) + continue; + + /* + * skiboot supports p8 & p9, both of which support the IAMR, and + * both of which support 32 keys. So advertise 32 keys for data + * accesses and 32 for instruction accesses. + */ + dt_add_property_cells(n, "ibm,processor-storage-keys", 32, 32); + } +} + +static void dt_fixups(void) +{ + struct dt_node *n; + struct dt_node *primary_lpc = NULL; + + /* lpc node missing #address/size cells. Also pick one as + * primary for now (TBD: How to convey that from HB) + */ + dt_for_each_compatible(dt_root, n, "ibm,power8-lpc") { + if (!primary_lpc || dt_has_node_property(n, "primary", NULL)) + primary_lpc = n; + if (dt_has_node_property(n, "#address-cells", NULL)) + break; + dt_add_property_cells(n, "#address-cells", 2); + dt_add_property_cells(n, "#size-cells", 1); + dt_add_property_strings(n, "status", "ok"); + } + + /* Missing "primary" property in LPC bus */ + if (primary_lpc && !dt_has_node_property(primary_lpc, "primary", NULL)) + dt_add_property(primary_lpc, "primary", NULL, 0); + + /* Missing "scom-controller" */ + dt_for_each_compatible(dt_root, n, "ibm,xscom") { + if (!dt_has_node_property(n, "scom-controller", NULL)) + dt_add_property(n, "scom-controller", NULL, 0); + } + + storage_keys_fixup(); +} + +static void add_arch_vector(void) +{ + /** + * vec5 = a PVR-list : Number-of-option-vectors : + * option-vectors[Number-of-option-vectors + 1] + */ + uint8_t vec5[] = {0x05, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00}; + + if (dt_has_node_property(dt_chosen, "ibm,architecture-vec-5", NULL)) + return; + + dt_add_property(dt_chosen, "ibm,architecture-vec-5", + vec5, sizeof(vec5)); +} + +static void dt_init_misc(void) +{ + /* Check if there's a /chosen node, if not, add one */ + dt_chosen = dt_find_by_path(dt_root, "/chosen"); + if (!dt_chosen) + dt_chosen = dt_new(dt_root, "chosen"); + assert(dt_chosen); + + /* Add IBM architecture vectors if needed */ + add_arch_vector(); + + /* Add the "OPAL virtual ICS*/ + add_ics_node(); + + /* Additional fixups. TODO: Move into platform */ + dt_fixups(); +} + +static u8 console_get_level(const char *s) +{ + if (strcmp(s, "emerg") == 0) + return PR_EMERG; + if (strcmp(s, "alert") == 0) + return PR_ALERT; + if (strcmp(s, "crit") == 0) + return PR_CRIT; + if (strcmp(s, "err") == 0) + return PR_ERR; + if (strcmp(s, "warning") == 0) + return PR_WARNING; + if (strcmp(s, "notice") == 0) + return PR_NOTICE; + if (strcmp(s, "printf") == 0) + return PR_PRINTF; + if (strcmp(s, "info") == 0) + return PR_INFO; + if (strcmp(s, "debug") == 0) + return PR_DEBUG; + if (strcmp(s, "trace") == 0) + return PR_TRACE; + if (strcmp(s, "insane") == 0) + return PR_INSANE; + /* Assume it's a number instead */ + return atoi(s); +} + +static void console_log_level(void) +{ + const char *s; + u8 level; + + /* console log level: + * high 4 bits in memory, low 4 bits driver (e.g. uart). */ + s = nvram_query_safe("log-level-driver"); + if (s) { + level = console_get_level(s); + debug_descriptor.console_log_levels = + (debug_descriptor.console_log_levels & 0xf0 ) | + (level & 0x0f); + prlog(PR_NOTICE, "console: Setting driver log level to %i\n", + level & 0x0f); + } + s = nvram_query_safe("log-level-memory"); + if (s) { + level = console_get_level(s); + debug_descriptor.console_log_levels = + (debug_descriptor.console_log_levels & 0x0f ) | + ((level & 0x0f) << 4); + prlog(PR_NOTICE, "console: Setting memory log level to %i\n", + level & 0x0f); + } +} + +typedef void (*ctorcall_t)(void); + +static void __nomcount do_ctors(void) +{ + extern ctorcall_t __ctors_start[], __ctors_end[]; + ctorcall_t *call; + + for (call = __ctors_start; call < __ctors_end; call++) + (*call)(); +} + +#ifdef ELF_ABI_v2 +static void setup_branch_null_catcher(void) +{ + asm volatile( \ + ".section .rodata" "\n\t" \ + "3: .string \"branch to NULL\"" "\n\t" \ + ".previous" "\n\t" \ + ".section .trap_table,\"aw\"" "\n\t" \ + ".llong 0" "\n\t" \ + ".llong 3b" "\n\t" \ + ".previous" "\n\t" \ + ); +} +#else +static void branch_null(void) +{ + assert(0); +} + +static void setup_branch_null_catcher(void) +{ + void (*bn)(void) = branch_null; + + /* + * FIXME: This copies the function descriptor (16 bytes) for + * ABI v1 (ie. big endian). This will be broken if we ever + * move to ABI v2 (ie little endian) + */ + memcpy_null((void *)0, bn, 16); +} +#endif + +void copy_sreset_vector(void) +{ + uint32_t *src, *dst; + + /* Copy the reset code over the entry point. */ + src = &reset_patch_start; + dst = (uint32_t *)0x100; + while(src < &reset_patch_end) + *(dst++) = *(src++); + sync_icache(); +} + +void copy_sreset_vector_fast_reboot(void) +{ + uint32_t *src, *dst; + + /* Copy the reset code over the entry point. */ + src = &reset_fast_reboot_patch_start; + dst = (uint32_t *)0x100; + while(src < &reset_fast_reboot_patch_end) + *(dst++) = *(src++); + sync_icache(); +} + +void copy_exception_vectors(void) +{ + /* Copy from 0x100 to EXCEPTION_VECTORS_END, avoid below 0x100 as + * this is the boot flag used by CPUs still potentially entering + * skiboot. + */ + memcpy((void *)0x100, (void *)(SKIBOOT_BASE + 0x100), + EXCEPTION_VECTORS_END - 0x100); + sync_icache(); +} + +/* + * When skiboot owns the exception vectors, patch in 'trap' for assert fails. + * Otherwise use assert_fail() + */ +void patch_traps(bool enable) +{ + struct trap_table_entry *tte; + + for (tte = __trap_table_start; tte < __trap_table_end; tte++) { + uint32_t *insn; + + insn = (uint32_t *)tte->address; + if (enable) { + *insn = PPC_INST_TRAP; + } else { + *insn = PPC_INST_NOP; + } + } + + sync_icache(); +} + +static void per_thread_sanity_checks(void) +{ + struct cpu_thread *cpu = this_cpu(); + + /** + * @fwts-label NonZeroHRMOR + * @fwts-advice The contents of the hypervisor real mode offset register + * (HRMOR) is bitwise orded with the address of any hypervisor real mode + * (i.e Skiboot) memory accesses. Skiboot does not support operating + * with a non-zero HRMOR and setting it will break some things (e.g + * XSCOMs) in hard-to-debug ways. + */ + assert(mfspr(SPR_HRMOR) == 0); + + /** + * @fwts-label UnknownSecondary + * @fwts-advice The boot CPU attampted to call in a secondary thread + * without initialising the corresponding cpu_thread structure. This may + * happen if the HDAT or devicetree reports too few threads or cores for + * this processor. + */ + assert(cpu->state != cpu_state_no_cpu); +} + +void pci_nvram_init(void) +{ + const char *nvram_speed; + + verbose_eeh = nvram_query_eq_safe("pci-eeh-verbose", "true"); + if (verbose_eeh) + prlog(PR_INFO, "PHB: Verbose EEH enabled\n"); + + pcie_max_link_speed = 0; + + nvram_speed = nvram_query_dangerous("pcie-max-link-speed"); + if (nvram_speed) { + pcie_max_link_speed = atoi(nvram_speed); + prlog(PR_NOTICE, "PHB: NVRAM set max link speed to GEN%i\n", + pcie_max_link_speed); + } + + pci_tracing = nvram_query_eq_safe("pci-tracing", "true"); +} + +static uint32_t mem_csum(void *_p, void *_e) +{ + size_t len = _e - _p; + uint32_t *p = _p; + uint32_t v1 = 0, v2 = 0; + uint32_t csum; + unsigned int i; + + for (i = 0; i < len; i += 4) { + uint32_t v = *p++; + v1 += v; + v2 += v1; + } + + csum = v1 ^ v2; + + return csum; +} + +static uint32_t romem_csum; + +static void checksum_romem(void) +{ + uint32_t csum; + + romem_csum = 0; + if (chip_quirk(QUIRK_SLOW_SIM)) + return; + + csum = mem_csum(_start, _head_end); + romem_csum ^= csum; + + csum = mem_csum(_stext, _romem_end); + romem_csum ^= csum; + + csum = mem_csum(__builtin_kernel_start, __builtin_kernel_end); + romem_csum ^= csum; +} + +bool verify_romem(void) +{ + uint32_t old = romem_csum; + checksum_romem(); + if (old != romem_csum) { + romem_csum = old; + prlog(PR_NOTICE, "OPAL checksums did not match\n"); + return false; + } + return true; +} + +static void mask_pc_system_xstop(void) +{ + struct cpu_thread *cpu; + uint32_t chip_id, core_id; + int rc; + + if (proc_gen != proc_gen_p10) + return; + + if (chip_quirk(QUIRK_MAMBO_CALLOUTS)) + return; + + /* + * On P10 Mask PC system checkstop (bit 28). This is needed + * for HW570622. We keep processor recovery disabled via + * HID[5] and mask the checkstop that it can cause. CME does + * the recovery handling for us. + */ + for_each_cpu(cpu) { + chip_id = cpu->chip_id; + core_id = pir_to_core_id(cpu->pir); + + rc = xscom_write(chip_id, + XSCOM_ADDR_P10_EC(core_id, P10_CORE_FIRMASK_OR), + PPC_BIT(28)); + if (rc) + prerror("Error setting FIR MASK rc:%d on PIR:%x\n", + rc, cpu->pir); + } +} + + +/* Called from head.S, thus no prototype. */ +void __noreturn __nomcount main_cpu_entry(const void *fdt); + +void __noreturn __nomcount main_cpu_entry(const void *fdt) +{ + /* + * WARNING: At this point. the timebases have + * *not* been synchronized yet. Do not use any timebase + * related functions for timeouts etc... unless you can cope + * with the speed being some random core clock divider and + * the value jumping backward when the synchronization actually + * happens (in chiptod_init() below). + * + * Also the current cpu_thread() struct is not initialized + * either so we need to clear it out first thing first (without + * putting any other useful info in there jus yet) otherwise + * printf an locks are going to play funny games with "con_suspend" + */ + pre_init_boot_cpu(); + + /* + * Point to our mem console + */ + debug_descriptor.memcons_phys = cpu_to_be64((uint64_t)&memcons); + + /* + * Before first printk, ensure console buffer is clear or + * reading tools might think it has wrapped + */ + clear_console(); + + /* Backup previous vectors as this could contain a kernel + * image. + */ + memcpy_null(old_vectors, NULL, EXCEPTION_VECTORS_END); + + /* + * Some boot firmwares enter OPAL with MSR[ME]=1, as they presumably + * handle machine checks until we take over. As we overwrite the + * previous exception vectors with our own handlers, disable MSR[ME]. + * This could be done atomically by patching in a branch then patching + * it out last, but that's a lot of effort. + */ + disable_machine_check(); + + /* Copy all vectors down to 0 */ + copy_exception_vectors(); + + /* Enable trap based asserts */ + patch_traps(true); + + /* + * Enable MSR[ME] bit so we can take MCEs. We don't currently + * recover, but we print some useful information. + */ + enable_machine_check(); + mtmsrd(MSR_RI, 1); + + /* Setup a NULL catcher to catch accidental NULL ptr calls */ + setup_branch_null_catcher(); + + /* Call library constructors */ + do_ctors(); + + prlog(PR_NOTICE, "OPAL %s%s starting...\n", version, DEBUG_STR); + + prlog(PR_DEBUG, "initial console log level: memory %d, driver %d\n", + (debug_descriptor.console_log_levels >> 4), + (debug_descriptor.console_log_levels & 0x0f)); + prlog(PR_TRACE, "OPAL is Powered By Linked-List Technology.\n"); + +#ifdef SKIBOOT_GCOV + skiboot_gcov_done(); +#endif + + /* Initialize boot cpu's cpu_thread struct */ + init_boot_cpu(); + + /* Now locks can be used */ + init_locks(); + + /* Create the OPAL call table early on, entries can be overridden + * later on (FSP console code for example) + */ + opal_table_init(); + + /* Init the physical map table so we can start mapping things */ + phys_map_init(mfspr(SPR_PVR)); + + /* + * If we are coming in with a flat device-tree, we expand it + * now. Else look for HDAT and create a device-tree from them + * + * Hack alert: When entering via the OPAL entry point, fdt + * is set to -1, we record that and pass it to parse_hdat + */ + + dt_root = dt_new_root(""); + + if (fdt == (void *)-1ul) { + if (parse_hdat(true) < 0) + abort(); + } else if (fdt == NULL) { + if (parse_hdat(false) < 0) + abort(); + } else { + dt_expand(fdt); + } + dt_add_cpufeatures(dt_root); + + /* Now that we have a full devicetree, verify that we aren't on fire. */ + per_thread_sanity_checks(); + + /* + * From there, we follow a fairly strict initialization order. + * + * First we need to build up our chip data structures and initialize + * XSCOM which will be needed for a number of susbequent things. + * + * We want XSCOM available as early as the platform probe in case the + * probe requires some HW accesses. + * + * We also initialize the FSI master at that point in case we need + * to access chips via that path early on. + */ + init_chips(); + + xscom_init(); + mfsi_init(); + + /* + * Direct controls facilities provides some controls over CPUs + * using scoms. + */ + direct_controls_init(); + + /* + * Put various bits & pieces in device-tree that might not + * already be there such as the /chosen node if not there yet, + * the ICS node, etc... This can potentially use XSCOM + */ + dt_init_misc(); + + /* + * Initialize LPC (P8 and beyond) so we can get to UART, BMC and + * other system controller. This is done before probe_platform + * so that the platform probing code can access an external + * BMC if needed. + */ + lpc_init(); + + /* + * This should be done before mem_region_init, so the stack + * region length can be set according to the maximum PIR. + */ + init_cpu_max_pir(); + + /* + * Now, we init our memory map from the device-tree, and immediately + * reserve areas which we know might contain data coming from + * HostBoot. We need to do these things before we start doing + * allocations outside of our heap, such as chip local allocs, + * otherwise we might clobber those data. + */ + mem_region_init(); + + /* + * Reserve memory required to capture OPAL dump. This should be done + * immediately after mem_region_init to avoid any clash with local + * memory allocation. + */ + opal_mpipl_reserve_mem(); + + /* Reserve HOMER and OCC area */ + homer_init(); + + /* Initialize the rest of the cpu thread structs */ + init_all_cpus(); + if (proc_gen == proc_gen_p9 || proc_gen == proc_gen_p10) + cpu_set_ipi_enable(true); + + /* Once all CPU are up apply this workaround */ + mask_pc_system_xstop(); + + /* Add the /opal node to the device-tree */ + add_opal_node(); + + /* + * We probe the platform now. This means the platform probe gets + * the opportunity to reserve additional areas of memory if needed. + * + * Note: Timebases still not synchronized. + */ + probe_platform(); + + /* Allocate our split trace buffers now. Depends add_opal_node() */ + init_trace_buffers(); + + /* On P8, get the ICPs and make sure they are in a sane state */ + init_interrupts(); + if (proc_gen == proc_gen_p8) + cpu_set_ipi_enable(true); + + /* On P9 and P10, initialize XIVE */ + if (proc_gen == proc_gen_p9) + init_xive(); + else if (proc_gen == proc_gen_p10) + xive2_init(); + + /* Grab centaurs from device-tree if present (only on FSP-less) */ + centaur_init(); + + /* initialize ocmb scom-controller */ + ocmb_init(); + + /* Initialize PSI (depends on probe_platform being called) */ + psi_init(); + + /* Initialize/enable LPC interrupts. This must be done after the + * PSI interface has been initialized since it serves as an interrupt + * source for LPC interrupts. + */ + lpc_init_interrupts(); + + /* Call in secondary CPUs */ + cpu_bringup(); + + /* We can now overwrite the 0x100 vector as we are no longer being + * entered there. + */ + copy_sreset_vector(); + + /* We can now do NAP mode */ + cpu_set_sreset_enable(true); + + /* + * Synchronize time bases. Prior to chiptod_init() the timebase + * is free-running at a frequency based on the core clock rather + * than being synchronised to the ChipTOD network. This means + * that the timestamps in early boot might be a little off compared + * to wall clock time. + */ + chiptod_init(); + + /* Initialize P9 DIO */ + p9_dio_init(); + + /* + * SBE uses TB value for scheduling timer. Hence init after + * chiptod init + */ + p9_sbe_init(); + + /* Initialize i2c */ + p8_i2c_init(); + + /* Register routine to dispatch and read sensors */ + sensor_init(); + + /* + * Initialize the opal messaging before platform.init as we are + * getting request to queue occ load opal message when host services + * got load occ request from FSP + */ + opal_init_msg(); + + /* + * We have initialized the basic HW, we can now call into the + * platform to perform subsequent inits, such as establishing + * communication with the FSP or starting IPMI. + */ + if (platform.init) + platform.init(); + + /* Read in NVRAM and set it up */ + nvram_init(); + + /* Set the console level */ + console_log_level(); + + /* Secure/Trusted Boot init. We look for /ibm,secureboot in DT */ + secureboot_init(); + trustedboot_init(); + + /* Secure variables init, handled by platform */ + if (platform.secvar_init && is_fw_secureboot()) + platform.secvar_init(); + + /* + * BMC platforms load version information from flash after + * secure/trustedboot init. + */ + if (platform.bmc) + flash_fw_version_preload(); + + /* preload the IMC catalog dtb */ + imc_catalog_preload(); + + /* Install the OPAL Console handlers */ + init_opal_console(); + + /* + * Some platforms set a flag to wait for SBE validation to be + * performed by the BMC. If this occurs it leaves the SBE in a + * bad state and the system will reboot at this point. + */ + if (platform.seeprom_update) + platform.seeprom_update(); + + /* Init SLW related stuff, including fastsleep */ + slw_init(); + + op_display(OP_LOG, OP_MOD_INIT, 0x0002); + + /* + * On some POWER9 BMC systems, we need to initialise the OCC + * before the NPU to facilitate NVLink/OpenCAPI presence + * detection, so we set it up as early as possible. On FSP + * systems, Hostboot starts booting the OCC later, so we delay + * OCC initialisation as late as possible to give it the + * maximum time to boot up. + */ + if (platform.bmc) + occ_pstates_init(); + + pci_nvram_init(); + + preload_capp_ucode(); + start_preload_kernel(); + + /* Catalog decompression routine */ + imc_decompress_catalog(); + + /* Virtual Accelerator Switchboard */ + vas_init(); + + /* NX init */ + nx_init(); + + /* Probe PHB3 on P8 */ + probe_phb3(); + + /* Probe PHB4 on P9 and PHB5 on P10 */ + probe_phb4(); + + /* Probe NPUs */ + probe_npu(); + probe_npu2(); + probe_npu3(); + + /* Initialize PCI */ + pci_init_slots(); + + /* Add OPAL timer related properties */ + late_init_timers(); + + /* Setup ibm,firmware-versions if able */ + if (platform.bmc) { + flash_dt_add_fw_version(); + ipmi_dt_add_bmc_info(); + } + + ipmi_set_fw_progress_sensor(IPMI_FW_PCI_INIT); + + /* + * These last few things must be done as late as possible + * because they rely on various other things having been setup, + * for example, add_opal_interrupts() will add all the interrupt + * sources that are going to the firmware. We can't add a new one + * after that call. Similarly, the mem_region calls will construct + * the reserve maps in the DT so we shouldn't affect the memory + * regions after that + */ + + /* Create the LPC bus interrupt-map on P9 */ + lpc_finalize_interrupts(); + + /* Add the list of interrupts going to OPAL */ + add_opal_interrupts(); + + /* Init In-Memory Collection related stuff (load the IMC dtb into memory) */ + imc_init(); + + /* Disable protected execution facility in BML */ + cpu_disable_pef(); + + /* export the trace buffers */ + trace_add_dt_props(); + + /* Now release parts of memory nodes we haven't used ourselves... */ + mem_region_release_unused(); + + /* ... and add remaining reservations to the DT */ + mem_region_add_dt_reserved(); + + /* + * Update /ibm,secureboot/ibm,cvc/memory-region to point to + * /reserved-memory/secure-crypt-algo-code instead of + * /ibm,hostboot/reserved-memory/secure-crypt-algo-code. + */ + cvc_update_reserved_memory_phandle(); + + prd_register_reserved_memory(); + + load_and_boot_kernel(false); +} + +void __noreturn __secondary_cpu_entry(void) +{ + struct cpu_thread *cpu = this_cpu(); + + /* Secondary CPU called in */ + cpu_callin(cpu); + + enable_machine_check(); + mtmsrd(MSR_RI, 1); + + /* Some XIVE setup */ + if (proc_gen == proc_gen_p9) + xive_cpu_callin(cpu); + else if (proc_gen == proc_gen_p10) + xive2_cpu_callin(cpu); + + /* Wait for work to do */ + while(true) { + if (cpu_check_jobs(cpu)) + cpu_process_jobs(); + else + cpu_idle_job(); + } +} + +/* Called from head.S, thus no prototype. */ +void __noreturn __nomcount secondary_cpu_entry(void); + +void __noreturn __nomcount secondary_cpu_entry(void) +{ + struct cpu_thread *cpu = this_cpu(); + + per_thread_sanity_checks(); + + prlog(PR_DEBUG, "INIT: CPU PIR 0x%04x called in\n", cpu->pir); + + __secondary_cpu_entry(); +} diff --git a/roms/skiboot/core/interrupts.c b/roms/skiboot/core/interrupts.c new file mode 100644 index 000000000..0a617d385 --- /dev/null +++ b/roms/skiboot/core/interrupts.c @@ -0,0 +1,513 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Excuse me, you do work for me now? + * + * Copyright 2013-2019 IBM Corp. + */ + +#include <skiboot.h> +#include <chip.h> +#include <cpu.h> +#include <fsp.h> +#include <interrupts.h> +#include <opal.h> +#include <io.h> +#include <cec.h> +#include <device.h> +#include <ccan/str/str.h> +#include <timer.h> +#include <sbe-p8.h> +#include <sbe-p9.h> + +/* ICP registers */ +#define ICP_XIRR 0x4 /* 32-bit access */ +#define ICP_CPPR 0x4 /* 8-bit access */ +#define ICP_MFRR 0xc /* 8-bit access */ + +static LIST_HEAD(irq_sources); +static LIST_HEAD(irq_sources2); +static struct lock irq_lock = LOCK_UNLOCKED; + +void __register_irq_source(struct irq_source *is, bool secondary) +{ + struct irq_source *is1; + struct list_head *list = secondary ? &irq_sources2 : &irq_sources; + + prlog(PR_DEBUG, "IRQ: Registering %04x..%04x ops @%p (data %p)%s\n", + is->start, is->end - 1, is->ops, is->data, + secondary ? " [secondary]" : ""); + + lock(&irq_lock); + list_for_each(list, is1, link) { + if (is->end > is1->start && is->start < is1->end) { + prerror("register IRQ source overlap !\n"); + prerror(" new: %x..%x old: %x..%x\n", + is->start, is->end - 1, + is1->start, is1->end - 1); + assert(0); + } + } + list_add_tail(list, &is->link); + unlock(&irq_lock); +} + +void register_irq_source(const struct irq_source_ops *ops, void *data, + uint32_t start, uint32_t count) +{ + struct irq_source *is; + + is = zalloc(sizeof(struct irq_source)); + assert(is); + is->start = start; + is->end = start + count; + is->ops = ops; + is->data = data; + + __register_irq_source(is, false); +} + +void unregister_irq_source(uint32_t start, uint32_t count) +{ + struct irq_source *is; + + /* Note: We currently only unregister from the primary sources */ + lock(&irq_lock); + list_for_each(&irq_sources, is, link) { + if (start >= is->start && start < is->end) { + if (start != is->start || + count != (is->end - is->start)) { + prerror("unregister IRQ source mismatch !\n"); + prerror("start:%x, count: %x match: %x..%x\n", + start, count, is->start, is->end); + assert(0); + } + list_del(&is->link); + unlock(&irq_lock); + /* XXX Add synchronize / RCU */ + free(is); + return; + } + } + unlock(&irq_lock); + prerror("unregister IRQ source not found !\n"); + prerror("start:%x, count: %x\n", start, count); + assert(0); +} + +struct irq_source *irq_find_source(uint32_t isn) +{ + struct irq_source *is; + + lock(&irq_lock); + /* + * XXX This really needs some kind of caching ! + */ + list_for_each(&irq_sources, is, link) { + if (isn >= is->start && isn < is->end) { + unlock(&irq_lock); + return is; + } + } + list_for_each(&irq_sources2, is, link) { + if (isn >= is->start && isn < is->end) { + unlock(&irq_lock); + return is; + } + } + unlock(&irq_lock); + + return NULL; +} + +void irq_for_each_source(void (*cb)(struct irq_source *, void *), void *data) +{ + struct irq_source *is; + + lock(&irq_lock); + list_for_each(&irq_sources, is, link) + cb(is, data); + list_for_each(&irq_sources2, is, link) + cb(is, data); + unlock(&irq_lock); +} + +/* + * This takes a 6-bit chip id and returns a 20 bit value representing + * the PSI interrupt. This includes all the fields above, ie, is a + * global interrupt number. + * + * For P8, this returns the base of the 8-interrupts block for PSI + */ +uint32_t get_psi_interrupt(uint32_t chip_id) +{ + uint32_t irq; + + switch(proc_gen) { + case proc_gen_p8: + irq = p8_chip_irq_block_base(chip_id, P8_IRQ_BLOCK_MISC); + irq += P8_IRQ_MISC_PSI_BASE; + break; + default: + assert(false); + }; + + return irq; +} + + +struct dt_node *add_ics_node(void) +{ + struct dt_node *ics = dt_new_addr(dt_root, "interrupt-controller", 0); + bool has_xive; + + if (!ics) + return NULL; + + has_xive = proc_gen >= proc_gen_p9; + + dt_add_property_cells(ics, "reg", 0, 0, 0, 0); + dt_add_property_strings(ics, "compatible", + has_xive ? "ibm,opal-xive-vc" : "IBM,ppc-xics", + "IBM,opal-xics"); + dt_add_property_cells(ics, "#address-cells", 0); + dt_add_property_cells(ics, "#interrupt-cells", 2); + dt_add_property_string(ics, "device_type", + "PowerPC-Interrupt-Source-Controller"); + dt_add_property(ics, "interrupt-controller", NULL, 0); + + return ics; +} + +uint32_t get_ics_phandle(void) +{ + struct dt_node *i; + + for (i = dt_first(dt_root); i; i = dt_next(dt_root, i)) { + if (streq(i->name, "interrupt-controller@0")) { + return i->phandle; + } + } + abort(); +} + +void add_opal_interrupts(void) +{ + struct irq_source *is; + unsigned int i, ns, tns = 0, count = 0; + uint32_t isn; + __be32 *irqs = NULL; + char *names = NULL; + + lock(&irq_lock); + list_for_each(&irq_sources, is, link) { + /* + * Don't even consider sources that don't have an interrupts + * callback or don't have an attributes one. + */ + if (!is->ops->interrupt || !is->ops->attributes) + continue; + for (isn = is->start; isn < is->end; isn++) { + uint64_t attr = is->ops->attributes(is, isn); + uint32_t iflags; + char *name; + + if (attr & IRQ_ATTR_TARGET_LINUX) + continue; + if (attr & IRQ_ATTR_TYPE_MSI) + iflags = 0; + else + iflags = 1; + name = is->ops->name ? is->ops->name(is, isn) : NULL; + ns = name ? strlen(name) : 0; + prlog(PR_DEBUG, "irq %x name: %s %s\n", + isn, + name ? name : "<null>", + iflags ? "[level]" : "[edge]"); + names = realloc(names, tns + ns + 1); + if (name) { + strcpy(names + tns, name); + tns += (ns + 1); + free(name); + } else + names[tns++] = 0; + i = count++; + irqs = realloc(irqs, 8 * count); + irqs[i*2] = cpu_to_be32(isn); + irqs[i*2+1] = cpu_to_be32(iflags); + } + } + unlock(&irq_lock); + + /* First create the standard "interrupts" property and the + * corresponding names property + */ + dt_add_property_cells(opal_node, "interrupt-parent", get_ics_phandle()); + dt_add_property(opal_node, "interrupts", irqs, count * 8); + dt_add_property(opal_node, "opal-interrupts-names", names, tns); + dt_add_property(opal_node, "interrupt-names", names, tns); + + /* Now "reduce" it to the old style "opal-interrupts" property + * format by stripping out the flags. The "opal-interrupts" + * property has one cell per interrupt, it is not a standard + * "interrupt" property. + * + * Note: Even if empty, create it, otherwise some bogus error + * handling in Linux can cause problems. + */ + for (i = 1; i < count; i++) + irqs[i] = irqs[i * 2]; + dt_add_property(opal_node, "opal-interrupts", irqs, count * 4); + + free(irqs); + free(names); +} + +/* + * This is called at init time (and one fast reboot) to sanitize the + * ICP. We set our priority to 0 to mask all interrupts and make sure + * no IPI is on the way. This is also called on wakeup from nap + */ +void reset_cpu_icp(void) +{ + void *icp = this_cpu()->icp_regs; + + if (!icp) + return; + + /* Dummy fetch */ + in_be32(icp + ICP_XIRR); + + /* Clear pending IPIs */ + out_8(icp + ICP_MFRR, 0xff); + + /* Set priority to max, ignore all incoming interrupts, EOI IPIs */ + out_be32(icp + ICP_XIRR, 2); +} + +/* Used by the PSI code to send an EOI during reset. This will also + * set the CPPR to 0 which should already be the case anyway + */ +void icp_send_eoi(uint32_t interrupt) +{ + void *icp = this_cpu()->icp_regs; + + if (!icp) + return; + + /* Set priority to max, ignore all incoming interrupts */ + out_be32(icp + ICP_XIRR, interrupt & 0xffffff); +} + +/* This is called before winkle or nap, we clear pending IPIs and + * set our priority to 1 to mask all but the IPI. + */ +void icp_prep_for_pm(void) +{ + void *icp = this_cpu()->icp_regs; + + if (!icp) + return; + + /* Clear pending IPIs */ + out_8(icp + ICP_MFRR, 0xff); + + /* Set priority to 1, ignore all incoming interrupts, EOI IPIs */ + out_be32(icp + ICP_XIRR, 0x01000002); +} + +/* This is called to wakeup somebody from winkle */ +void icp_kick_cpu(struct cpu_thread *cpu) +{ + void *icp = cpu->icp_regs; + + if (!icp) + return; + + /* Send high priority IPI */ + out_8(icp + ICP_MFRR, 0); +} + +/* Returns the number of chip ID bits used for interrupt numbers */ +static uint32_t p8_chip_id_bits(uint32_t chip) +{ + struct proc_chip *proc_chip = get_chip(chip); + + assert(proc_chip); + switch (proc_chip->type) { + case PROC_CHIP_P8_MURANO: + case PROC_CHIP_P8_VENICE: + return 6; + break; + + case PROC_CHIP_P8_NAPLES: + return 5; + break; + + default: + /* This shouldn't be called on non-P8 based systems */ + assert(0); + return 0; + break; + } +} + +/* The chip id mask is the upper p8_chip_id_bits of the irq number */ +static uint32_t chip_id_mask(uint32_t chip) +{ + uint32_t chip_id_bits = p8_chip_id_bits(chip); + uint32_t chip_id_mask; + + chip_id_mask = ((1 << chip_id_bits) - 1); + chip_id_mask <<= P8_IRQ_BITS - chip_id_bits; + return chip_id_mask; +} + +/* The block mask is what remains of the 19 bit irq number after + * removing the upper 5 or 6 bits for the chip# and the lower 11 bits + * for the number of bits per block. */ +static uint32_t block_mask(uint32_t chip) +{ + uint32_t chip_id_bits = p8_chip_id_bits(chip); + uint32_t irq_block_mask; + + irq_block_mask = P8_IRQ_BITS - chip_id_bits - P8_IVE_BITS; + irq_block_mask = ((1 << irq_block_mask) - 1) << P8_IVE_BITS; + return irq_block_mask; +} + +uint32_t p8_chip_irq_block_base(uint32_t chip, uint32_t block) +{ + uint32_t irq; + + assert(chip < (1 << p8_chip_id_bits(chip))); + irq = SETFIELD(chip_id_mask(chip), 0, chip); + irq = SETFIELD(block_mask(chip), irq, block); + + return irq; +} + +uint32_t p8_chip_irq_phb_base(uint32_t chip, uint32_t phb) +{ + assert(chip < (1 << p8_chip_id_bits(chip))); + + return p8_chip_irq_block_base(chip, phb + P8_IRQ_BLOCK_PHB_BASE); +} + +uint32_t p8_irq_to_chip(uint32_t irq) +{ + /* This assumes we only have one type of cpu in a system, + * which should be ok. */ + return GETFIELD(chip_id_mask(this_cpu()->chip_id), irq); +} + +uint32_t p8_irq_to_block(uint32_t irq) +{ + return GETFIELD(block_mask(this_cpu()->chip_id), irq); +} + +uint32_t p8_irq_to_phb(uint32_t irq) +{ + return p8_irq_to_block(irq) - P8_IRQ_BLOCK_PHB_BASE; +} + +bool __irq_source_eoi(struct irq_source *is, uint32_t isn) +{ + if (!is->ops->eoi) + return false; + + is->ops->eoi(is, isn); + return true; +} + +bool irq_source_eoi(uint32_t isn) +{ + struct irq_source *is = irq_find_source(isn); + + if (!is) + return false; + + return __irq_source_eoi(is, isn); +} + +static int64_t opal_set_xive(uint32_t isn, uint16_t server, uint8_t priority) +{ + struct irq_source *is = irq_find_source(isn); + + if (!is || !is->ops->set_xive) + return OPAL_PARAMETER; + + return is->ops->set_xive(is, isn, server, priority); +} +opal_call(OPAL_SET_XIVE, opal_set_xive, 3); + +static int64_t opal_get_xive(uint32_t isn, __be16 *server, uint8_t *priority) +{ + struct irq_source *is = irq_find_source(isn); + uint16_t s; + int64_t ret; + + if (!opal_addr_valid(server)) + return OPAL_PARAMETER; + + if (!is || !is->ops->get_xive) + return OPAL_PARAMETER; + + ret = is->ops->get_xive(is, isn, &s, priority); + *server = cpu_to_be16(s); + return ret; +} +opal_call(OPAL_GET_XIVE, opal_get_xive, 3); + +static int64_t opal_handle_interrupt(uint32_t isn, __be64 *outstanding_event_mask) +{ + struct irq_source *is = irq_find_source(isn); + int64_t rc = OPAL_SUCCESS; + + if (!opal_addr_valid(outstanding_event_mask)) + return OPAL_PARAMETER; + + /* No source ? return */ + if (!is || !is->ops->interrupt) { + rc = OPAL_PARAMETER; + goto bail; + } + + /* Run it */ + is->ops->interrupt(is, isn); + + /* Check timers if SBE timer isn't working */ + if (!p8_sbe_timer_ok() && !p9_sbe_timer_ok()) + check_timers(true); + + /* Update output events */ + bail: + if (outstanding_event_mask) + *outstanding_event_mask = cpu_to_be64(opal_pending_events); + + return rc; +} +opal_call(OPAL_HANDLE_INTERRUPT, opal_handle_interrupt, 2); + +void init_interrupts(void) +{ + struct dt_node *icp; + const struct dt_property *sranges; + struct cpu_thread *cpu; + u32 base, count, i; + u64 addr, size; + + dt_for_each_compatible(dt_root, icp, "ibm,ppc-xicp") { + sranges = dt_require_property(icp, + "ibm,interrupt-server-ranges", + -1); + base = dt_get_number(sranges->prop, 1); + count = dt_get_number(sranges->prop + 4, 1); + for (i = 0; i < count; i++) { + addr = dt_get_address(icp, i, &size); + cpu = find_cpu_by_server(base + i); + if (cpu) + cpu->icp_regs = (void *)addr; + } + } +} + diff --git a/roms/skiboot/core/ipmi-opal.c b/roms/skiboot/core/ipmi-opal.c new file mode 100644 index 000000000..cc45b409b --- /dev/null +++ b/roms/skiboot/core/ipmi-opal.c @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * IPMI OPAL calls + * + * Copyright 2013-2018 IBM Corp. + */ + +#include <stdlib.h> +#include <string.h> +#include <ipmi.h> +#include <lock.h> +#include <opal.h> +#include <device.h> +#include <ccan/list/list.h> + +static struct lock msgq_lock = LOCK_UNLOCKED; +static struct list_head msgq = LIST_HEAD_INIT(msgq); + +static void opal_send_complete(struct ipmi_msg *msg) +{ + lock(&msgq_lock); + list_add_tail(&msgq, &msg->link); + opal_update_pending_evt(ipmi_backend->opal_event_ipmi_recv, + ipmi_backend->opal_event_ipmi_recv); + unlock(&msgq_lock); +} + +static int64_t opal_ipmi_send(uint64_t interface, + struct opal_ipmi_msg *opal_ipmi_msg, uint64_t msg_len) +{ + struct ipmi_msg *msg; + + if (opal_ipmi_msg->version != OPAL_IPMI_MSG_FORMAT_VERSION_1) { + prerror("OPAL IPMI: Incorrect version\n"); + return OPAL_UNSUPPORTED; + } + + msg_len -= sizeof(struct opal_ipmi_msg); + if (msg_len > IPMI_MAX_REQ_SIZE) { + prerror("OPAL IPMI: Invalid request length\n"); + return OPAL_PARAMETER; + } + + prlog(PR_TRACE, "opal_ipmi_send(cmd: 0x%02x netfn: 0x%02x len: 0x%02llx)\n", + opal_ipmi_msg->cmd, opal_ipmi_msg->netfn >> 2, msg_len); + + msg = ipmi_mkmsg(interface, + IPMI_CODE(opal_ipmi_msg->netfn >> 2, opal_ipmi_msg->cmd), + opal_send_complete, NULL, opal_ipmi_msg->data, + msg_len, IPMI_MAX_RESP_SIZE); + if (!msg) + return OPAL_RESOURCE; + + msg->complete = opal_send_complete; + msg->error = opal_send_complete; + return ipmi_queue_msg(msg); +} + +static int64_t opal_ipmi_recv(uint64_t interface, + struct opal_ipmi_msg *opal_ipmi_msg, __be64 *msg_len) +{ + struct ipmi_msg *msg; + int64_t rc; + + lock(&msgq_lock); + msg = list_top(&msgq, struct ipmi_msg, link); + + if (!msg) { + rc = OPAL_EMPTY; + goto out_unlock; + } + + if (opal_ipmi_msg->version != OPAL_IPMI_MSG_FORMAT_VERSION_1) { + prerror("OPAL IPMI: Incorrect version\n"); + rc = OPAL_UNSUPPORTED; + goto out_del_msg; + } + + if (interface != IPMI_DEFAULT_INTERFACE) { + prerror("IPMI: Invalid interface 0x%llx in opal_ipmi_recv\n", interface); + rc = OPAL_PARAMETER; + goto out_del_msg; + } + + if (be64_to_cpu(*msg_len) - sizeof(struct opal_ipmi_msg) < msg->resp_size + 1) { + rc = OPAL_RESOURCE; + goto out_del_msg; + } + + list_del(&msg->link); + if (list_empty(&msgq)) + opal_update_pending_evt(ipmi_backend->opal_event_ipmi_recv, 0); + unlock(&msgq_lock); + + opal_ipmi_msg->cmd = msg->cmd; + opal_ipmi_msg->netfn = msg->netfn; + opal_ipmi_msg->data[0] = msg->cc; + memcpy(&opal_ipmi_msg->data[1], msg->data, msg->resp_size); + + prlog(PR_TRACE, "opal_ipmi_recv(cmd: 0x%02x netfn: 0x%02x resp_size: 0x%02x)\n", + msg->cmd, msg->netfn >> 2, msg->resp_size); + + /* Add one as the completion code is returned in the message data */ + *msg_len = cpu_to_be64(msg->resp_size + sizeof(struct opal_ipmi_msg) + 1); + ipmi_free_msg(msg); + + return OPAL_SUCCESS; + +out_del_msg: + list_del(&msg->link); + if (list_empty(&msgq)) + opal_update_pending_evt(ipmi_backend->opal_event_ipmi_recv, 0); + ipmi_free_msg(msg); +out_unlock: + unlock(&msgq_lock); + return rc; +} + +void ipmi_opal_init(void) +{ + struct dt_node *opal_ipmi, *opal_event = NULL; + + opal_ipmi = dt_new(opal_node, "ipmi"); + dt_add_property_strings(opal_ipmi, "compatible", "ibm,opal-ipmi"); + dt_add_property_cells(opal_ipmi, "ibm,ipmi-interface-id", + IPMI_DEFAULT_INTERFACE); + dt_add_property_cells(opal_ipmi, "interrupts", + ilog2(ipmi_backend->opal_event_ipmi_recv)); + + if (proc_gen >= proc_gen_p9) + opal_event = dt_find_by_name(opal_node, "event"); + if (opal_event) + dt_add_property_cells(opal_ipmi, "interrupt-parent", + opal_event->phandle); + + opal_register(OPAL_IPMI_SEND, opal_ipmi_send, 3); + opal_register(OPAL_IPMI_RECV, opal_ipmi_recv, 3); +} diff --git a/roms/skiboot/core/ipmi.c b/roms/skiboot/core/ipmi.c new file mode 100644 index 000000000..bbc1a7b69 --- /dev/null +++ b/roms/skiboot/core/ipmi.c @@ -0,0 +1,263 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * in-band IPMI, probably over bt (or via FSP mbox on FSP) + * + * Copyright 2013-2019 IBM Corp. + */ + +#include <stdio.h> +#include <string.h> +#include <bt.h> +#include <ipmi.h> +#include <opal.h> +#include <device.h> +#include <skiboot.h> +#include <lock.h> +#include <cpu.h> +#include <timebase.h> +#include <debug_descriptor.h> + +struct ipmi_backend *ipmi_backend = NULL; +static struct lock sync_lock = LOCK_UNLOCKED; +static struct ipmi_msg *sync_msg = NULL; + +void ipmi_free_msg(struct ipmi_msg *msg) +{ + /* ipmi_free_msg frees messages allocated by the + * backend. Without a backend we couldn't have allocated + * messages to free (we don't support removing backends + * yet). */ + if (!ipmi_present()) { + prerror("IPMI: Trying to free message without backend\n"); + return; + } + + msg->backend->free_msg(msg); +} + +void ipmi_init_msg(struct ipmi_msg *msg, int interface, + uint32_t code, void (*complete)(struct ipmi_msg *), + void *user_data, size_t req_size, size_t resp_size) +{ + /* We don't actually support multiple interfaces at the moment. */ + assert(interface == IPMI_DEFAULT_INTERFACE); + + msg->backend = ipmi_backend; + msg->cmd = IPMI_CMD(code); + msg->netfn = IPMI_NETFN(code) << 2; + msg->req_size = req_size; + msg->resp_size = resp_size; + msg->complete = complete; + msg->user_data = user_data; +} + +struct ipmi_msg *ipmi_mkmsg_simple(uint32_t code, void *req_data, size_t req_size) +{ + return ipmi_mkmsg(IPMI_DEFAULT_INTERFACE, code, ipmi_free_msg, NULL, + req_data, req_size, 0); +} + +struct ipmi_msg *ipmi_mkmsg(int interface, uint32_t code, + void (*complete)(struct ipmi_msg *), + void *user_data, void *req_data, size_t req_size, + size_t resp_size) +{ + struct ipmi_msg *msg; + + if (!ipmi_present()) + return NULL; + + msg = ipmi_backend->alloc_msg(req_size, resp_size); + if (!msg) + return NULL; + + ipmi_init_msg(msg, interface, code, complete, user_data, req_size, + resp_size); + + /* Commands are free to over ride this if they want to handle errors */ + msg->error = ipmi_free_msg; + + if (req_data) + memcpy(msg->data, req_data, req_size); + + return msg; +} + +int ipmi_queue_msg_head(struct ipmi_msg *msg) +{ + if (!ipmi_present()) + return OPAL_HARDWARE; + + if (!msg) { + prerror("%s: Attempting to queue NULL message\n", __func__); + return OPAL_PARAMETER; + } + + return msg->backend->queue_msg_head(msg); +} + +int ipmi_queue_msg(struct ipmi_msg *msg) +{ + /* Here we could choose which interface to use if we want to support + multiple interfaces. */ + if (!ipmi_present()) + return OPAL_HARDWARE; + + if (!msg) { + prerror("%s: Attempting to queue NULL message\n", __func__); + return OPAL_PARAMETER; + } + + return msg->backend->queue_msg(msg); +} + +int ipmi_dequeue_msg(struct ipmi_msg *msg) +{ + if (!ipmi_present()) + return OPAL_HARDWARE; + + if (!msg) { + prerror("%s: Attempting to dequeue NULL message\n", __func__); + return OPAL_PARAMETER; + } + + return msg->backend->dequeue_msg(msg); +} + +void ipmi_cmd_done(uint8_t cmd, uint8_t netfn, uint8_t cc, struct ipmi_msg *msg) +{ + msg->cc = cc; + if (msg->cmd != cmd) { + prerror("IPMI: Incorrect cmd 0x%02x in response\n", cmd); + cc = IPMI_ERR_UNSPECIFIED; + } + + if ((msg->netfn >> 2) + 1 != (netfn >> 2)) { + prerror("IPMI: Incorrect netfn 0x%02x in response\n", netfn >> 2); + cc = IPMI_ERR_UNSPECIFIED; + } + msg->netfn = netfn; + + if (cc != IPMI_CC_NO_ERROR) { + prlog(PR_DEBUG, "IPMI: Got error response. cmd=0x%x, netfn=0x%x," + " rc=0x%02x\n", msg->cmd, msg->netfn >> 2, msg->cc); + + assert(msg->error); + msg->error(msg); + } else if (msg->complete) + msg->complete(msg); + + /* At this point the message has should have been freed by the + completion functions. */ + + /* If this is a synchronous message flag that we are done */ + if (msg == sync_msg) { + sync_msg = NULL; + barrier(); + } +} + +void ipmi_queue_msg_sync(struct ipmi_msg *msg) +{ + void (*poll)(void) = msg->backend->poll; + + if (!ipmi_present()) + return; + + if (!msg) { + prerror("%s: Attempting to queue NULL message\n", __func__); + return; + } + + lock(&sync_lock); + while (sync_msg); + sync_msg = msg; + if (msg->backend->disable_retry && !opal_booting()) + msg->backend->disable_retry(msg); + ipmi_queue_msg_head(msg); + unlock(&sync_lock); + + /* + * BT response handling relies on a timer. We can't just run all + * timers because we may have been called with a lock that a timer + * wants, and they're generally not written to cope with that. + * So, just run whatever the IPMI backend needs to make forward + * progress. + */ + while (sync_msg == msg) { + if (poll) + poll(); + time_wait_ms(10); + } +} + +static void ipmi_read_event_complete(struct ipmi_msg *msg) +{ + prlog(PR_DEBUG, "IPMI read event %02x complete: %d bytes. cc: %02x\n", + msg->cmd, msg->resp_size, msg->cc); + + /* Handle power control & PNOR handshake events */ + ipmi_parse_sel(msg); + + ipmi_free_msg(msg); +} + +static void ipmi_get_message_flags_complete(struct ipmi_msg *msg) +{ + uint8_t flags = msg->data[0]; + + ipmi_free_msg(msg); + + prlog(PR_DEBUG, "IPMI Get Message Flags: %02x\n", flags); + + /* Once we see an interrupt we assume the payload has + * booted. We disable the wdt and let the OS setup its own + * wdt. + * + * This is also where we consider the OS to be booted, so we set + * the boot count sensor */ + if (flags & IPMI_MESSAGE_FLAGS_WATCHDOG_PRE_TIMEOUT) { + ipmi_wdt_stop(); + ipmi_set_boot_count(); + } + + /* Message available in the event buffer? Queue a Read Event command + * to retrieve it. The flag is cleared by performing a read */ + if (flags & IPMI_MESSAGE_FLAGS_EVENT_BUFFER) { + msg = ipmi_mkmsg(IPMI_DEFAULT_INTERFACE, IPMI_READ_EVENT, + ipmi_read_event_complete, NULL, NULL, 0, 16); + ipmi_queue_msg(msg); + } +} + +void ipmi_sms_attention(void) +{ + struct ipmi_msg *msg; + + if (!ipmi_present()) + return; + + /* todo: when we handle multiple IPMI interfaces, we'll need to + * ensure that this message is associated with the appropriate + * backend. */ + msg = ipmi_mkmsg(IPMI_DEFAULT_INTERFACE, IPMI_GET_MESSAGE_FLAGS, + ipmi_get_message_flags_complete, NULL, NULL, 0, 1); + + ipmi_queue_msg(msg); +} + +void ipmi_register_backend(struct ipmi_backend *backend) +{ + /* We only support one backend at the moment */ + assert(backend->alloc_msg); + assert(backend->free_msg); + assert(backend->queue_msg); + assert(backend->dequeue_msg); + ipmi_backend = backend; + ipmi_backend->opal_event_ipmi_recv = opal_dynamic_event_alloc(); +} + +bool ipmi_present(void) +{ + return ipmi_backend != NULL; +} diff --git a/roms/skiboot/core/lock.c b/roms/skiboot/core/lock.c new file mode 100644 index 000000000..f0ab595b1 --- /dev/null +++ b/roms/skiboot/core/lock.c @@ -0,0 +1,336 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Simple spinlock + * + * Copyright 2013-2019 IBM Corp. + */ + +#include <skiboot.h> +#include <lock.h> +#include <assert.h> +#include <processor.h> +#include <cpu.h> +#include <console.h> +#include <timebase.h> + +/* Set to bust locks. Note, this is initialized to true because our + * lock debugging code is not going to work until we have the per + * CPU data initialized + */ +bool bust_locks = true; + +#define LOCK_TIMEOUT_MS 5000 + +#ifdef DEBUG_LOCKS + +static void __nomcount lock_error(struct lock *l, const char *reason, uint16_t err) +{ + fprintf(stderr, "LOCK ERROR: %s @%p (state: 0x%016llx)\n", + reason, l, l->lock_val); + op_display(OP_FATAL, OP_MOD_LOCK, err); + + abort(); +} + +static inline void __nomcount lock_check(struct lock *l) +{ + if ((l->lock_val & 1) && (l->lock_val >> 32) == this_cpu()->pir) + lock_error(l, "Invalid recursive lock", 0); +} + +static inline void __nomcount unlock_check(struct lock *l) +{ + if (!(l->lock_val & 1)) + lock_error(l, "Unlocking unlocked lock", 1); + + if ((l->lock_val >> 32) != this_cpu()->pir) + lock_error(l, "Unlocked non-owned lock", 2); + + if (l->in_con_path && this_cpu()->con_suspend == 0) + lock_error(l, "Unlock con lock with console not suspended", 3); + + if (list_empty(&this_cpu()->locks_held)) + lock_error(l, "Releasing lock we don't hold depth", 4); +} + +static inline bool __nomcount __try_lock(struct cpu_thread *cpu, struct lock *l) +{ + uint64_t val; + + val = cpu->pir; + val <<= 32; + val |= 1; + + barrier(); + if (__cmpxchg64(&l->lock_val, 0, val) == 0) { + sync(); + return true; + } + return false; +} + +static inline bool lock_timeout(unsigned long start) +{ + /* Print warning if lock has been spinning for more than TIMEOUT_MS */ + unsigned long wait = tb_to_msecs(mftb()); + + if (wait - start > LOCK_TIMEOUT_MS) { + /* + * If the timebase is invalid, we shouldn't + * throw an error. This is possible with pending HMIs + * that need to recover TB. + */ + if( !(mfspr(SPR_TFMR) & SPR_TFMR_TB_VALID)) + return false; + return true; + } + + return false; +} +#else +static inline void lock_check(struct lock *l) { }; +static inline void unlock_check(struct lock *l) { }; +static inline bool lock_timeout(unsigned long s) { return false; } +#endif /* DEBUG_LOCKS */ + +#if defined(DEADLOCK_CHECKER) && defined(DEBUG_LOCKS) + +static struct lock dl_lock = { + .lock_val = 0, + .in_con_path = true, + .owner = LOCK_CALLER +}; + +/* Find circular dependencies in the lock requests. */ +static __nomcount inline bool check_deadlock(void) +{ + uint32_t lock_owner, start, i; + struct cpu_thread *next_cpu; + struct lock *next; + + next = this_cpu()->requested_lock; + start = this_cpu()->pir; + i = 0; + + while (i < cpu_max_pir) { + + if (!next) + return false; + + if (!(next->lock_val & 1) || next->in_con_path) + return false; + + lock_owner = next->lock_val >> 32; + + if (lock_owner == start) + return true; + + next_cpu = find_cpu_by_pir_nomcount(lock_owner); + + if (!next_cpu) + return false; + + next = next_cpu->requested_lock; + i++; + } + + return false; +} + +static void add_lock_request(struct lock *l) +{ + struct cpu_thread *curr = this_cpu(); + bool dead; + + if (curr->state != cpu_state_active && + curr->state != cpu_state_os) + return; + + /* + * For deadlock detection we must keep the lock states constant + * while doing the deadlock check. However we need to avoid + * clashing with the stack checker, so no mcount and use an + * inline implementation of the lock for the dl_lock + */ + for (;;) { + if (__try_lock(curr, &dl_lock)) + break; + smt_lowest(); + while (dl_lock.lock_val) + barrier(); + smt_medium(); + } + + curr->requested_lock = l; + + dead = check_deadlock(); + + lwsync(); + dl_lock.lock_val = 0; + + if (dead) + lock_error(l, "Deadlock detected", 0); +} + +static void remove_lock_request(void) +{ + this_cpu()->requested_lock = NULL; +} +#else +static inline void add_lock_request(struct lock *l) { }; +static inline void remove_lock_request(void) { }; +#endif /* #if defined(DEADLOCK_CHECKER) && defined(DEBUG_LOCKS) */ + +bool lock_held_by_me(struct lock *l) +{ + uint64_t pir64 = this_cpu()->pir; + + return l->lock_val == ((pir64 << 32) | 1); +} + +bool try_lock_caller(struct lock *l, const char *owner) +{ + struct cpu_thread *cpu = this_cpu(); + + if (bust_locks) + return true; + + if (l->in_con_path) + cpu->con_suspend++; + if (__try_lock(cpu, l)) { + l->owner = owner; + +#ifdef DEBUG_LOCKS_BACKTRACE + backtrace_create(l->bt_buf, LOCKS_BACKTRACE_MAX_ENTS, + &l->bt_metadata); +#endif + + list_add(&cpu->locks_held, &l->list); + return true; + } + if (l->in_con_path) + cpu->con_suspend--; + return false; +} + +void lock_caller(struct lock *l, const char *owner) +{ + bool timeout_warn = false; + unsigned long start = 0; + + if (bust_locks) + return; + + lock_check(l); + + if (try_lock_caller(l, owner)) + return; + add_lock_request(l); + +#ifdef DEBUG_LOCKS + /* + * Ensure that we get a valid start value + * as we may be handling TFMR errors and taking + * a lock to do so, so timebase could be garbage + */ + if( (mfspr(SPR_TFMR) & SPR_TFMR_TB_VALID)) + start = tb_to_msecs(mftb()); +#endif + + for (;;) { + if (try_lock_caller(l, owner)) + break; + smt_lowest(); + while (l->lock_val) + barrier(); + smt_medium(); + + if (start && !timeout_warn && lock_timeout(start)) { + /* + * Holding the lock request while printing a + * timeout and taking console locks can result + * in deadlock fals positive if the lock owner + * tries to take the console lock. So drop it. + */ + remove_lock_request(); + prlog(PR_WARNING, "WARNING: Lock has been spinning for over %dms\n", LOCK_TIMEOUT_MS); + backtrace(); + add_lock_request(l); + timeout_warn = true; + } + } + + remove_lock_request(); +} + +void unlock(struct lock *l) +{ + struct cpu_thread *cpu = this_cpu(); + + if (bust_locks) + return; + + unlock_check(l); + + l->owner = NULL; + list_del(&l->list); + lwsync(); + l->lock_val = 0; + + /* WARNING: On fast reboot, we can be reset right at that + * point, so the reset_lock in there cannot be in the con path + */ + if (l->in_con_path) { + cpu->con_suspend--; + if (cpu->con_suspend == 0 && cpu->con_need_flush) + flush_console(); + } +} + +bool lock_recursive_caller(struct lock *l, const char *caller) +{ + if (bust_locks) + return false; + + if (lock_held_by_me(l)) + return false; + + lock_caller(l, caller); + return true; +} + +void init_locks(void) +{ + bust_locks = false; +} + +void dump_locks_list(void) +{ + struct lock *l; + + prlog(PR_ERR, "Locks held:\n"); + list_for_each(&this_cpu()->locks_held, l, list) { + prlog(PR_ERR, " %s\n", l->owner); +#ifdef DEBUG_LOCKS_BACKTRACE + backtrace_print(l->bt_buf, &l->bt_metadata, NULL, NULL, true); +#endif + } +} + +void drop_my_locks(bool warn) +{ + struct lock *l; + + disable_fast_reboot("Lock corruption"); + while((l = list_top(&this_cpu()->locks_held, struct lock, list)) != NULL) { + if (warn) { + prlog(PR_ERR, " %s\n", l->owner); +#ifdef DEBUG_LOCKS_BACKTRACE + backtrace_print(l->bt_buf, &l->bt_metadata, NULL, NULL, + true); +#endif + } + unlock(l); + } +} + diff --git a/roms/skiboot/core/malloc.c b/roms/skiboot/core/malloc.c new file mode 100644 index 000000000..76996fff4 --- /dev/null +++ b/roms/skiboot/core/malloc.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Implement malloc()/free() etc on top of our memory region allocator, + * which provides mem_alloc()/mem_free(). + * + * Copyright 2013-2015 IBM Corp. + */ + +#include <mem_region.h> +#include <lock.h> +#include <string.h> +#include <mem_region-malloc.h> + +#define DEFAULT_ALIGN __alignof__(long) + +void *__memalign(size_t blocksize, size_t bytes, const char *location) +{ + void *p; + + lock(&skiboot_heap.free_list_lock); + p = mem_alloc(&skiboot_heap, bytes, blocksize, location); + unlock(&skiboot_heap.free_list_lock); + + return p; +} + +void *__malloc(size_t bytes, const char *location) +{ + return __memalign(DEFAULT_ALIGN, bytes, location); +} + +void __free(void *p, const char *location) +{ + lock(&skiboot_heap.free_list_lock); + mem_free(&skiboot_heap, p, location); + unlock(&skiboot_heap.free_list_lock); +} + +void *__realloc(void *ptr, size_t size, const char *location) +{ + void *newptr; + + /* Two classic malloc corner cases. */ + if (!size) { + __free(ptr, location); + return NULL; + } + if (!ptr) + return __malloc(size, location); + + lock(&skiboot_heap.free_list_lock); + if (mem_resize(&skiboot_heap, ptr, size, location)) { + newptr = ptr; + } else { + newptr = mem_alloc(&skiboot_heap, size, DEFAULT_ALIGN, + location); + if (newptr) { + size_t copy = mem_allocated_size(ptr); + if (copy > size) + copy = size; + memcpy(newptr, ptr, copy); + mem_free(&skiboot_heap, ptr, location); + } + } + unlock(&skiboot_heap.free_list_lock); + return newptr; +} + +void *__zalloc(size_t bytes, const char *location) +{ + void *p = __malloc(bytes, location); + + if (p) + memset(p, 0, bytes); + return p; +} diff --git a/roms/skiboot/core/mce.c b/roms/skiboot/core/mce.c new file mode 100644 index 000000000..47674abcb --- /dev/null +++ b/roms/skiboot/core/mce.c @@ -0,0 +1,309 @@ +// SPDX-License-Identifier: Apache-2.0 +/* + * Machine Check Exceptions + * + * Copyright 2020 IBM Corp. + */ + +#define pr_fmt(fmt) "MCE: " fmt + +#include <ras.h> +#include <opal.h> +#include <cpu.h> + +#define SRR1_MC_LOADSTORE(srr1) ((srr1) & PPC_BIT(42)) + +struct mce_ierror_table { + unsigned long srr1_mask; + unsigned long srr1_value; + uint64_t type; + const char *error_str; +}; + +static const struct mce_ierror_table mce_p9_ierror_table[] = { +{ 0x00000000081c0000, 0x0000000000040000, + MCE_INSNFETCH | MCE_MEMORY_ERROR | MCE_INVOLVED_EA, + "instruction fetch memory uncorrectable error", }, +{ 0x00000000081c0000, 0x0000000000080000, + MCE_INSNFETCH | MCE_SLB_ERROR | MCE_INVOLVED_EA, + "instruction fetch SLB parity error", }, +{ 0x00000000081c0000, 0x00000000000c0000, + MCE_INSNFETCH | MCE_SLB_ERROR | MCE_INVOLVED_EA, + "instruction fetch SLB multi-hit error", }, +{ 0x00000000081c0000, 0x0000000000100000, + MCE_INSNFETCH | MCE_INVOLVED_EA | MCE_ERAT_ERROR, + "instruction fetch ERAT multi-hit error", }, +{ 0x00000000081c0000, 0x0000000000140000, + MCE_INSNFETCH | MCE_INVOLVED_EA | MCE_TLB_ERROR, + "instruction fetch TLB multi-hit error", }, +{ 0x00000000081c0000, 0x0000000000180000, + MCE_INSNFETCH | MCE_MEMORY_ERROR | MCE_TABLE_WALK | MCE_INVOLVED_EA, + "instruction fetch page table access memory uncorrectable error", }, +{ 0x00000000081c0000, 0x00000000001c0000, + MCE_INSNFETCH | MCE_INVOLVED_EA, + "instruction fetch to foreign address", }, +{ 0x00000000081c0000, 0x0000000008000000, + MCE_INSNFETCH | MCE_INVOLVED_EA, + "instruction fetch foreign link time-out", }, +{ 0x00000000081c0000, 0x0000000008040000, + MCE_INSNFETCH | MCE_TABLE_WALK | MCE_INVOLVED_EA, + "instruction fetch page table access foreign link time-out", }, +{ 0x00000000081c0000, 0x00000000080c0000, + MCE_INSNFETCH | MCE_INVOLVED_EA, + "instruction fetch real address error", }, +{ 0x00000000081c0000, 0x0000000008100000, + MCE_INSNFETCH | MCE_TABLE_WALK | MCE_INVOLVED_EA, + "instruction fetch page table access real address error", }, +{ 0x00000000081c0000, 0x0000000008140000, + MCE_LOADSTORE | MCE_IMPRECISE, + "store real address asynchronous error", }, +{ 0x00000000081c0000, 0x0000000008180000, + MCE_LOADSTORE | MCE_IMPRECISE, + "store foreign link time-out asynchronous error", }, +{ 0x00000000081c0000, 0x00000000081c0000, + MCE_INSNFETCH | MCE_TABLE_WALK | MCE_INVOLVED_EA, + "instruction fetch page table access to foreign address", }, +{ 0 } }; + +static const struct mce_ierror_table mce_p10_ierror_table[] = { +{ 0x00000000081c0000, 0x0000000000040000, + MCE_INSNFETCH | MCE_MEMORY_ERROR | MCE_INVOLVED_EA, + "instruction fetch memory uncorrectable error", }, +{ 0x00000000081c0000, 0x0000000000080000, + MCE_INSNFETCH | MCE_SLB_ERROR | MCE_INVOLVED_EA, + "instruction fetch SLB parity error", }, +{ 0x00000000081c0000, 0x00000000000c0000, + MCE_INSNFETCH | MCE_SLB_ERROR | MCE_INVOLVED_EA, + "instruction fetch SLB multi-hit error", }, +{ 0x00000000081c0000, 0x0000000000100000, + MCE_INSNFETCH | MCE_INVOLVED_EA | MCE_ERAT_ERROR, + "instruction fetch ERAT multi-hit error", }, +{ 0x00000000081c0000, 0x0000000000140000, + MCE_INSNFETCH | MCE_INVOLVED_EA | MCE_TLB_ERROR, + "instruction fetch TLB multi-hit error", }, +{ 0x00000000081c0000, 0x0000000000180000, + MCE_INSNFETCH | MCE_MEMORY_ERROR | MCE_TABLE_WALK | MCE_INVOLVED_EA, + "instruction fetch page table access memory uncorrectable error", }, +{ 0x00000000081c0000, 0x00000000001c0000, + MCE_INSNFETCH | MCE_INVOLVED_EA, + "instruction fetch to control real address", }, +{ 0x00000000081c0000, 0x00000000080c0000, + MCE_INSNFETCH | MCE_INVOLVED_EA, + "instruction fetch real address error", }, +{ 0x00000000081c0000, 0x0000000008100000, + MCE_INSNFETCH | MCE_TABLE_WALK | MCE_INVOLVED_EA, + "instruction fetch page table access real address error", }, +{ 0x00000000081c0000, 0x0000000008140000, + MCE_LOADSTORE | MCE_IMPRECISE, + "store real address asynchronous error", }, +{ 0x00000000081c0000, 0x00000000081c0000, + MCE_INSNFETCH | MCE_TABLE_WALK | MCE_INVOLVED_EA, + "instruction fetch page table access to control real address", }, +{ 0 } }; + +struct mce_derror_table { + unsigned long dsisr_value; + uint64_t type; + const char *error_str; +}; + +static const struct mce_derror_table mce_p9_derror_table[] = { +{ 0x00008000, + MCE_LOADSTORE | MCE_MEMORY_ERROR, + "load/store memory uncorrectable error", }, +{ 0x00004000, + MCE_LOADSTORE | MCE_MEMORY_ERROR | MCE_TABLE_WALK | MCE_INVOLVED_EA, + "load/store page table access memory uncorrectable error", }, +{ 0x00002000, + MCE_LOADSTORE | MCE_INVOLVED_EA, + "load/store foreign link time-out", }, +{ 0x00001000, + MCE_LOADSTORE | MCE_TABLE_WALK | MCE_INVOLVED_EA, + "load/store page table access foreign link time-out", }, +{ 0x00000800, + MCE_LOADSTORE | MCE_INVOLVED_EA | MCE_ERAT_ERROR, + "load/store ERAT multi-hit error", }, +{ 0x00000400, + MCE_LOADSTORE | MCE_INVOLVED_EA | MCE_TLB_ERROR, + "load/store TLB multi-hit error", }, +{ 0x00000200, + MCE_LOADSTORE | MCE_TLBIE_ERROR, + "TLBIE or TLBIEL instruction programming error", }, +{ 0x00000100, + MCE_LOADSTORE | MCE_INVOLVED_EA | MCE_SLB_ERROR, + "load/store SLB parity error", }, +{ 0x00000080, + MCE_LOADSTORE | MCE_INVOLVED_EA | MCE_SLB_ERROR, + "load/store SLB multi-hit error", }, +{ 0x00000040, + MCE_LOADSTORE | MCE_INVOLVED_EA, + "load real address error", }, +{ 0x00000020, + MCE_LOADSTORE | MCE_TABLE_WALK, + "load/store page table access real address error", }, +{ 0x00000010, + MCE_LOADSTORE | MCE_TABLE_WALK, + "load/store page table access to foreign address", }, +{ 0x00000008, + MCE_LOADSTORE, + "load/store to foreign address", }, +{ 0 } }; + +static const struct mce_derror_table mce_p10_derror_table[] = { +{ 0x00008000, + MCE_LOADSTORE | MCE_MEMORY_ERROR, + "load/store memory uncorrectable error", }, +{ 0x00004000, + MCE_LOADSTORE | MCE_MEMORY_ERROR | MCE_TABLE_WALK | MCE_INVOLVED_EA, + "load/store page table access memory uncorrectable error", }, +{ 0x00000800, + MCE_LOADSTORE | MCE_INVOLVED_EA | MCE_ERAT_ERROR, + "load/store ERAT multi-hit error", }, +{ 0x00000400, + MCE_LOADSTORE | MCE_INVOLVED_EA | MCE_TLB_ERROR, + "load/store TLB multi-hit error", }, +{ 0x00000200, + MCE_TLBIE_ERROR, + "TLBIE or TLBIEL instruction programming error", }, +{ 0x00000100, + MCE_LOADSTORE | MCE_INVOLVED_EA | MCE_SLB_ERROR, + "load/store SLB parity error", }, +{ 0x00000080, + MCE_LOADSTORE | MCE_INVOLVED_EA | MCE_SLB_ERROR, + "load/store SLB multi-hit error", }, +{ 0x00000040, + MCE_LOADSTORE | MCE_INVOLVED_EA, + "load real address error", }, +{ 0x00000020, + MCE_LOADSTORE | MCE_TABLE_WALK, + "load/store page table access real address error", }, +{ 0x00000010, + MCE_LOADSTORE | MCE_TABLE_WALK, + "load/store page table access to control real address", }, +{ 0x00000008, + MCE_LOADSTORE, + "load/store to control real address", }, +{ 0 } }; + +static void decode_ierror(const struct mce_ierror_table table[], + uint64_t srr1, + uint64_t *type, + const char **error_str) +{ + int i; + + for (i = 0; table[i].srr1_mask; i++) { + if ((srr1 & table[i].srr1_mask) != table[i].srr1_value) + continue; + + *type = table[i].type; + *error_str = table[i].error_str; + } +} + +static void decode_derror(const struct mce_derror_table table[], + uint32_t dsisr, + uint64_t *type, + const char **error_str) +{ + int i; + + for (i = 0; table[i].dsisr_value; i++) { + if (!(dsisr & table[i].dsisr_value)) + continue; + + *type = table[i].type; + *error_str = table[i].error_str; + } +} + +static void decode_mce_p9(uint64_t srr0, uint64_t srr1, + uint32_t dsisr, uint64_t dar, + uint64_t *type, const char **error_str, + uint64_t *address) +{ + /* + * On POWER9 DD2.1 and below, it's possible to get a machine check + * caused by a paste instruction where only DSISR bit 25 is set. This + * will result in the MCE handler seeing an unknown event and the + * kernel crashing. An MCE that occurs like this is spurious, so we + * don't need to do anything in terms of servicing it. If there is + * something that needs to be serviced, the CPU will raise the MCE + * again with the correct DSISR so that it can be serviced properly. + * So detect this case and mark it as handled. + */ + if (SRR1_MC_LOADSTORE(srr1) && dsisr == 0x02000000) { + *type = MCE_NO_ERROR; + *error_str = "no error (superfluous machine check)"; + return; + } + + /* + * Async machine check due to bad real address from store or foreign + * link time out comes with the load/store bit (PPC bit 42) set in + * SRR1, but the cause comes in SRR1 not DSISR. Clear bit 42 so we're + * directed to the ierror table so it will find the cause (which + * describes it correctly as a store error). + */ + if (SRR1_MC_LOADSTORE(srr1) && + ((srr1 & 0x081c0000) == 0x08140000 || + (srr1 & 0x081c0000) == 0x08180000)) { + srr1 &= ~PPC_BIT(42); + } + + if (SRR1_MC_LOADSTORE(srr1)) { + decode_derror(mce_p9_derror_table, dsisr, type, error_str); + if (*type & MCE_INVOLVED_EA) + *address = dar; + } else { + decode_ierror(mce_p9_ierror_table, srr1, type, error_str); + if (*type & MCE_INVOLVED_EA) + *address = srr0; + } +} + +static void decode_mce_p10(uint64_t srr0, uint64_t srr1, + uint32_t dsisr, uint64_t dar, + uint64_t *type, const char **error_str, + uint64_t *address) +{ + /* + * Async machine check due to bad real address from store or foreign + * link time out comes with the load/store bit (PPC bit 42) set in + * SRR1, but the cause comes in SRR1 not DSISR. Clear bit 42 so we're + * directed to the ierror table so it will find the cause (which + * describes it correctly as a store error). + */ + if (SRR1_MC_LOADSTORE(srr1) && + (srr1 & 0x081c0000) == 0x08140000) { + srr1 &= ~PPC_BIT(42); + } + + if (SRR1_MC_LOADSTORE(srr1)) { + decode_derror(mce_p10_derror_table, dsisr, type, error_str); + if (*type & MCE_INVOLVED_EA) + *address = dar; + } else { + decode_ierror(mce_p10_ierror_table, srr1, type, error_str); + if (*type & MCE_INVOLVED_EA) + *address = srr0; + } +} + +void decode_mce(uint64_t srr0, uint64_t srr1, + uint32_t dsisr, uint64_t dar, + uint64_t *type, const char **error_str, + uint64_t *address) +{ + *type = MCE_UNKNOWN; + *error_str = "unknown error"; + *address = 0; + + if (proc_gen == proc_gen_p9) { + decode_mce_p9(srr0, srr1, dsisr, dar, type, error_str, address); + } else if (proc_gen == proc_gen_p10) { + decode_mce_p10(srr0, srr1, dsisr, dar, type, error_str, address); + } else { + *error_str = "unknown error (processor not supported)"; + } +} diff --git a/roms/skiboot/core/mem_region.c b/roms/skiboot/core/mem_region.c new file mode 100644 index 000000000..36de2d094 --- /dev/null +++ b/roms/skiboot/core/mem_region.c @@ -0,0 +1,1555 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Simple memory allocator + * + * Copyright 2013-2018 IBM Corp. + */ + +#include <inttypes.h> +#include <skiboot.h> +#include <mem-map.h> +#include <libfdt_env.h> +#include <lock.h> +#include <device.h> +#include <cpu.h> +#include <chip.h> +#include <affinity.h> +#include <types.h> +#include <mem_region.h> +#include <mem_region-malloc.h> + +/* Memory poisoning on free (if POISON_MEM_REGION set to 1) */ +#ifdef DEBUG +#define POISON_MEM_REGION 1 +#else +#define POISON_MEM_REGION 0 +#endif +#define POISON_MEM_REGION_WITH 0x99 +#define POISON_MEM_REGION_LIMIT 1*1024*1024*1024 + +/* Locking: The mem_region_lock protects the regions list from concurrent + * updates. Additions to, or removals from, the region list must be done + * with this lock held. This is typically done when we're establishing + * the memory & reserved regions. + * + * Each region has a lock (region->free_list_lock) to protect the free list + * from concurrent modification. This lock is used when we're allocating + * memory out of a specific region. + * + * If both locks are needed (eg, __local_alloc, where we need to find a region, + * then allocate from it), the mem_region_lock must be acquired before (and + * released after) the per-region lock. + */ +struct lock mem_region_lock = LOCK_UNLOCKED; + +static struct list_head regions = LIST_HEAD_INIT(regions); +static struct list_head early_reserves = LIST_HEAD_INIT(early_reserves); + +static bool mem_region_init_done = false; +static bool mem_regions_finalised = false; + +unsigned long top_of_ram = SKIBOOT_BASE + SKIBOOT_SIZE; + +static struct mem_region skiboot_os_reserve = { + .name = "ibm,os-reserve", + .start = 0, + .len = SKIBOOT_BASE, + .type = REGION_OS, +}; + +struct mem_region skiboot_heap = { + .name = "ibm,firmware-heap", + .start = HEAP_BASE, + .len = HEAP_SIZE, + .type = REGION_SKIBOOT_HEAP, +}; + +static struct mem_region skiboot_code_and_text = { + .name = "ibm,firmware-code", + .start = SKIBOOT_BASE, + .len = HEAP_BASE - SKIBOOT_BASE, + .type = REGION_SKIBOOT_FIRMWARE, +}; + +static struct mem_region skiboot_after_heap = { + .name = "ibm,firmware-data", + .start = HEAP_BASE + HEAP_SIZE, + .len = SKIBOOT_BASE + SKIBOOT_SIZE - (HEAP_BASE + HEAP_SIZE), + .type = REGION_SKIBOOT_FIRMWARE, +}; + +static struct mem_region skiboot_cpu_stacks = { + .name = "ibm,firmware-stacks", + .start = CPU_STACKS_BASE, + .len = 0, /* TBA */ + .type = REGION_SKIBOOT_FIRMWARE, +}; + +static struct mem_region skiboot_mambo_kernel = { + .name = "ibm,firmware-mambo-kernel", + .start = (unsigned long)KERNEL_LOAD_BASE, + .len = KERNEL_LOAD_SIZE, + .type = REGION_SKIBOOT_FIRMWARE, +}; + +static struct mem_region skiboot_mambo_initramfs = { + .name = "ibm,firmware-mambo-initramfs", + .start = (unsigned long)INITRAMFS_LOAD_BASE, + .len = INITRAMFS_LOAD_SIZE, + .type = REGION_SKIBOOT_FIRMWARE, +}; + + +struct alloc_hdr { + bool free : 1; + bool prev_free : 1; + bool printed : 1; + unsigned long num_longs : BITS_PER_LONG-3; /* Including header. */ + const char *location; +}; + +struct free_hdr { + struct alloc_hdr hdr; + struct list_node list; + /* ... unsigned long tailer; */ +}; + +#define ALLOC_HDR_LONGS (sizeof(struct alloc_hdr) / sizeof(long)) +#define ALLOC_MIN_LONGS (sizeof(struct free_hdr) / sizeof(long) + 1) + +/* Avoid ugly casts. */ +static void *region_start(const struct mem_region *region) +{ + return (void *)(unsigned long)region->start; +} + +/* Each free block has a tailer, so we can walk backwards. */ +static unsigned long *tailer(struct free_hdr *f) +{ + return (unsigned long *)f + f->hdr.num_longs - 1; +} + +/* This walks forward to the next hdr (or NULL if at the end). */ +static struct alloc_hdr *next_hdr(const struct mem_region *region, + const struct alloc_hdr *hdr) +{ + void *next; + + next = ((unsigned long *)hdr + hdr->num_longs); + if (next >= region_start(region) + region->len) + next = NULL; + return next; +} + +#if POISON_MEM_REGION == 1 +static void mem_poison(struct free_hdr *f) +{ + size_t poison_size = (void*)tailer(f) - (void*)(f+1); + + /* We only poison up to a limit, as otherwise boot is + * kinda slow */ + if (poison_size > POISON_MEM_REGION_LIMIT) + poison_size = POISON_MEM_REGION_LIMIT; + + memset(f+1, POISON_MEM_REGION_WITH, poison_size); +} +#endif + +/* Creates free block covering entire region. */ +static void init_allocatable_region(struct mem_region *region) +{ + struct free_hdr *f = region_start(region); + assert(region->type == REGION_SKIBOOT_HEAP || + region->type == REGION_MEMORY); + f->hdr.num_longs = region->len / sizeof(long); + f->hdr.free = true; + f->hdr.prev_free = false; + *tailer(f) = f->hdr.num_longs; + list_head_init(®ion->free_list); + list_add(®ion->free_list, &f->list); +#if POISON_MEM_REGION == 1 + mem_poison(f); +#endif +} + +static void make_free(struct mem_region *region, struct free_hdr *f, + const char *location, bool skip_poison) +{ + struct alloc_hdr *next; + +#if POISON_MEM_REGION == 1 + if (!skip_poison) + mem_poison(f); +#else + (void)skip_poison; +#endif + + if (f->hdr.prev_free) { + struct free_hdr *prev; + unsigned long *prev_tailer = (unsigned long *)f - 1; + + assert(*prev_tailer); + prev = (void *)((unsigned long *)f - *prev_tailer); + assert(prev->hdr.free); + assert(!prev->hdr.prev_free); + + /* Expand to cover the one we just freed. */ + prev->hdr.num_longs += f->hdr.num_longs; + f = prev; + } else { + f->hdr.free = true; + f->hdr.location = location; + list_add(®ion->free_list, &f->list); + } + + /* Fix up tailer. */ + *tailer(f) = f->hdr.num_longs; + + /* If next is free, coalesce it */ + next = next_hdr(region, &f->hdr); + if (next) { + next->prev_free = true; + if (next->free) { + struct free_hdr *next_free = (void *)next; + list_del_from(®ion->free_list, &next_free->list); + /* Maximum of one level of recursion */ + make_free(region, next_free, location, true); + } + } +} + +/* Can we fit this many longs with this alignment in this free block? */ +static bool fits(struct free_hdr *f, size_t longs, size_t align, size_t *offset) +{ + *offset = 0; + + while (f->hdr.num_longs >= *offset + longs) { + size_t addr; + + addr = (unsigned long)f + + (*offset + ALLOC_HDR_LONGS) * sizeof(long); + if ((addr & (align - 1)) == 0) + return true; + + /* Don't make tiny chunks! */ + if (*offset == 0) + *offset = ALLOC_MIN_LONGS; + else + (*offset)++; + } + return false; +} + +static void discard_excess(struct mem_region *region, + struct alloc_hdr *hdr, size_t alloc_longs, + const char *location, bool skip_poison) +{ + /* Do we have excess? */ + if (hdr->num_longs > alloc_longs + ALLOC_MIN_LONGS) { + struct free_hdr *post; + + /* Set up post block. */ + post = (void *)hdr + alloc_longs * sizeof(long); + post->hdr.num_longs = hdr->num_longs - alloc_longs; + post->hdr.prev_free = false; + + /* Trim our block. */ + hdr->num_longs = alloc_longs; + + /* This coalesces as required. */ + make_free(region, post, location, skip_poison); + } +} + +static const char *hdr_location(const struct alloc_hdr *hdr) +{ + /* Corrupt: step carefully! */ + if (is_rodata(hdr->location)) + return hdr->location; + return "*CORRUPT*"; +} + +static void bad_header(const struct mem_region *region, + const struct alloc_hdr *hdr, + const char *during, + const char *location) +{ + /* Corrupt: step carefully! */ + if (is_rodata(hdr->location)) + prerror("%p (in %s) %s at %s, previously %s\n", + hdr-1, region->name, during, location, hdr->location); + else + prerror("%p (in %s) %s at %s, previously %p\n", + hdr-1, region->name, during, location, hdr->location); + abort(); +} + +static bool region_is_reservable(struct mem_region *region) +{ + return region->type != REGION_OS; +} + +static bool region_is_reserved(struct mem_region *region) +{ + return region->type != REGION_OS && region->type != REGION_MEMORY; +} + +void mem_dump_allocs(void) +{ + struct mem_region *region; + struct alloc_hdr *h, *i; + + /* Second pass: populate property data */ + prlog(PR_INFO, "Memory regions:\n"); + list_for_each(®ions, region, list) { + if (!(region->type == REGION_SKIBOOT_HEAP || + region->type == REGION_MEMORY)) + continue; + prlog(PR_INFO, " 0x%012llx..%012llx : %s\n", + (long long)region->start, + (long long)(region->start + region->len - 1), + region->name); + if (region->free_list.n.next == NULL) { + prlog(PR_INFO, " no allocs\n"); + continue; + } + + /* + * XXX: When dumping the allocation list we coalase allocations + * with the same location and size into a single line. This is + * quadratic, but it makes the dump human-readable and the raw + * dump sometimes causes the log buffer to wrap. + */ + for (h = region_start(region); h; h = next_hdr(region, h)) + h->printed = false; + + for (h = region_start(region); h; h = next_hdr(region, h)) { + unsigned long bytes; + int count = 0; + + if (h->free) + continue; + if (h->printed) + continue; + + for (i = h; i; i = next_hdr(region, i)) { + if (i->free) + continue; + if (i->num_longs != h->num_longs) + continue; + if (strcmp(i->location, h->location)) + continue; + + i->printed = true; + count++; + } + + bytes = h->num_longs * sizeof(long); + prlog(PR_NOTICE, " % 8d allocs of 0x%.8lx bytes at %s (total 0x%lx)\n", + count, bytes, hdr_location(h), bytes * count); + } + } +} + +int64_t mem_dump_free(void) +{ + struct mem_region *region; + struct alloc_hdr *hdr; + int64_t total_free; + int64_t region_free; + + total_free = 0; + + prlog(PR_INFO, "Free space in HEAP memory regions:\n"); + list_for_each(®ions, region, list) { + if (!(region->type == REGION_SKIBOOT_HEAP || + region->type == REGION_MEMORY)) + continue; + region_free = 0; + + if (region->free_list.n.next == NULL) { + continue; + } + for (hdr = region_start(region); hdr; hdr = next_hdr(region, hdr)) { + if (!hdr->free) + continue; + + region_free+= hdr->num_longs * sizeof(long); + } + prlog(PR_INFO, "Region %s free: %"PRIx64"\n", + region->name, region_free); + total_free += region_free; + } + + prlog(PR_INFO, "Total free: %"PRIu64"\n", total_free); + + return total_free; +} + +static void *__mem_alloc(struct mem_region *region, size_t size, size_t align, + const char *location) +{ + size_t alloc_longs, offset; + struct free_hdr *f; + struct alloc_hdr *next; + + /* Align must be power of 2. */ + assert(!((align - 1) & align)); + + /* This should be a constant. */ + assert(is_rodata(location)); + + /* Unallocatable region? */ + if (!(region->type == REGION_SKIBOOT_HEAP || + region->type == REGION_MEMORY)) + return NULL; + + /* First allocation? */ + if (region->free_list.n.next == NULL) + init_allocatable_region(region); + + /* Don't do screwy sizes. */ + if (size > region->len) + return NULL; + + /* Don't do tiny alignments, we deal in long increments. */ + if (align < sizeof(long)) + align = sizeof(long); + + /* Convert size to number of longs, too. */ + alloc_longs = (size + sizeof(long)-1) / sizeof(long) + ALLOC_HDR_LONGS; + + /* Can't be too small for when we free it, either. */ + if (alloc_longs < ALLOC_MIN_LONGS) + alloc_longs = ALLOC_MIN_LONGS; + + /* Walk free list. */ + list_for_each(®ion->free_list, f, list) { + /* We may have to skip some to meet alignment. */ + if (fits(f, alloc_longs, align, &offset)) + goto found; + } + + return NULL; + +found: + assert(f->hdr.free); + assert(!f->hdr.prev_free); + + /* This block is no longer free. */ + list_del_from(®ion->free_list, &f->list); + f->hdr.free = false; + f->hdr.location = location; + + next = next_hdr(region, &f->hdr); + if (next) { + assert(next->prev_free); + next->prev_free = false; + } + + if (offset != 0) { + struct free_hdr *pre = f; + + f = (void *)f + offset * sizeof(long); + assert(f >= pre + 1); + + /* Set up new header. */ + f->hdr.num_longs = pre->hdr.num_longs - offset; + /* f->hdr.prev_free will be set by make_free below. */ + f->hdr.free = false; + f->hdr.location = location; + + /* Fix up old header. */ + pre->hdr.num_longs = offset; + pre->hdr.prev_free = false; + + /* This coalesces as required. */ + make_free(region, pre, location, true); + } + + /* We might be too long; put the rest back. */ + discard_excess(region, &f->hdr, alloc_longs, location, true); + + /* Clear tailer for debugging */ + *tailer(f) = 0; + + /* Their pointer is immediately after header. */ + return &f->hdr + 1; +} + +void *mem_alloc(struct mem_region *region, size_t size, size_t align, + const char *location) +{ + static bool dumped = false; + void *r; + + assert(lock_held_by_me(®ion->free_list_lock)); + + r = __mem_alloc(region, size, align, location); + if (r) + return r; + + prerror("mem_alloc(0x%lx, 0x%lx, \"%s\", %s) failed !\n", + size, align, location, region->name); + if (!dumped) { + mem_dump_allocs(); + dumped = true; + } + + return NULL; +} + +void mem_free(struct mem_region *region, void *mem, const char *location) +{ + struct alloc_hdr *hdr; + + /* This should be a constant. */ + assert(is_rodata(location)); + + assert(lock_held_by_me(®ion->free_list_lock)); + + /* Freeing NULL is always a noop. */ + if (!mem) + return; + + /* Your memory is in the region, right? */ + assert(mem >= region_start(region) + sizeof(*hdr)); + assert(mem < region_start(region) + region->len); + + /* Grab header. */ + hdr = mem - sizeof(*hdr); + + if (hdr->free) + bad_header(region, hdr, "re-freed", location); + + make_free(region, (struct free_hdr *)hdr, location, false); +} + +size_t mem_allocated_size(const void *ptr) +{ + const struct alloc_hdr *hdr = ptr - sizeof(*hdr); + return hdr->num_longs * sizeof(long) - sizeof(struct alloc_hdr); +} + +bool mem_resize(struct mem_region *region, void *mem, size_t len, + const char *location) +{ + struct alloc_hdr *hdr, *next; + struct free_hdr *f; + + /* This should be a constant. */ + assert(is_rodata(location)); + + assert(lock_held_by_me(®ion->free_list_lock)); + + /* Get header. */ + hdr = mem - sizeof(*hdr); + if (hdr->free) + bad_header(region, hdr, "resize", location); + + /* Round up size to multiple of longs. */ + len = (sizeof(*hdr) + len + sizeof(long) - 1) / sizeof(long); + + /* Can't be too small for when we free it, either. */ + if (len < ALLOC_MIN_LONGS) + len = ALLOC_MIN_LONGS; + + /* Shrinking is simple. */ + if (len <= hdr->num_longs) { + hdr->location = location; + discard_excess(region, hdr, len, location, false); + return true; + } + + /* Check if we can expand. */ + next = next_hdr(region, hdr); + if (!next || !next->free || hdr->num_longs + next->num_longs < len) + return false; + + /* OK, it's free and big enough, absorb it. */ + f = (struct free_hdr *)next; + list_del_from(®ion->free_list, &f->list); + hdr->num_longs += next->num_longs; + hdr->location = location; + + /* Update next prev_free */ + next = next_hdr(region, &f->hdr); + if (next) { + assert(next->prev_free); + next->prev_free = false; + } + + /* Clear tailer for debugging */ + *tailer(f) = 0; + + /* Now we might have *too* much. */ + discard_excess(region, hdr, len, location, true); + return true; +} + +bool mem_check(const struct mem_region *region) +{ + size_t frees = 0; + struct alloc_hdr *hdr, *prev_free = NULL; + struct free_hdr *f; + + /* Check it's sanely aligned. */ + if (region->start % sizeof(long)) { + prerror("Region '%s' not sanely aligned (%llx)\n", + region->name, (unsigned long long)region->start); + return false; + } + if ((long)region->len % sizeof(long)) { + prerror("Region '%s' not sane length (%llu)\n", + region->name, (unsigned long long)region->len); + return false; + } + + /* Not ours to play with, or empty? Don't do anything. */ + if (!(region->type == REGION_MEMORY || + region->type == REGION_SKIBOOT_HEAP) || + region->free_list.n.next == NULL) + return true; + + /* Walk linearly. */ + for (hdr = region_start(region); hdr; hdr = next_hdr(region, hdr)) { + if (hdr->num_longs < ALLOC_MIN_LONGS) { + prerror("Region '%s' %s %p (%s) size %zu\n", + region->name, hdr->free ? "free" : "alloc", + hdr, hdr_location(hdr), + hdr->num_longs * sizeof(long)); + return false; + } + if ((unsigned long)hdr + hdr->num_longs * sizeof(long) > + region->start + region->len) { + prerror("Region '%s' %s %p (%s) oversize %zu\n", + region->name, hdr->free ? "free" : "alloc", + hdr, hdr_location(hdr), + hdr->num_longs * sizeof(long)); + return false; + } + if (hdr->free) { + if (hdr->prev_free || prev_free) { + prerror("Region '%s' free %p (%s) has prev_free" + " %p (%s) %sset?\n", + region->name, hdr, hdr_location(hdr), + prev_free, + prev_free ? hdr_location(prev_free) + : "NULL", + hdr->prev_free ? "" : "un"); + return false; + } + prev_free = hdr; + frees ^= (unsigned long)hdr - region->start; + } else { + if (hdr->prev_free != (bool)prev_free) { + prerror("Region '%s' alloc %p (%s) has" + " prev_free %p %sset?\n", + region->name, hdr, hdr_location(hdr), + prev_free, hdr->prev_free ? "" : "un"); + return false; + } + prev_free = NULL; + } + } + + /* Now walk free list. */ + list_for_each(®ion->free_list, f, list) + frees ^= (unsigned long)f - region->start; + + if (frees) { + prerror("Region '%s' free list and walk do not match!\n", + region->name); + return false; + } + return true; +} + +bool mem_check_all(void) +{ + struct mem_region *r; + + list_for_each(®ions, r, list) { + if (!mem_check(r)) + return false; + } + + return true; +} + +static struct mem_region *new_region(const char *name, + uint64_t start, uint64_t len, + struct dt_node *node, + enum mem_region_type type) +{ + struct mem_region *region; + + region = malloc(sizeof(*region)); + if (!region) + return NULL; + + region->name = name; + region->start = start; + region->len = len; + region->node = node; + region->type = type; + region->free_list.n.next = NULL; + init_lock(®ion->free_list_lock); + + return region; +} + +/* We always split regions, so we only have to replace one. */ +static struct mem_region *split_region(struct mem_region *head, + uint64_t split_at, + enum mem_region_type type) +{ + struct mem_region *tail; + uint64_t end = head->start + head->len; + + tail = new_region(head->name, split_at, end - split_at, + head->node, type); + /* Original region becomes head. */ + if (tail) + head->len -= tail->len; + + return tail; +} + +static bool intersects(const struct mem_region *region, uint64_t addr) +{ + return addr > region->start && + addr < region->start + region->len; +} + +static bool maybe_split(struct mem_region *r, uint64_t split_at) +{ + struct mem_region *tail; + + if (!intersects(r, split_at)) + return true; + + tail = split_region(r, split_at, r->type); + if (!tail) + return false; + + /* Tail add is important: we may need to split again! */ + list_add_after(®ions, &tail->list, &r->list); + return true; +} + +static bool overlaps(const struct mem_region *r1, const struct mem_region *r2) +{ + return (r1->start + r1->len > r2->start + && r1->start < r2->start + r2->len); +} + +static bool contains(const struct mem_region *r1, const struct mem_region *r2) +{ + u64 r1_end = r1->start + r1->len; + u64 r2_end = r2->start + r2->len; + + return (r1->start <= r2->start && r2_end <= r1_end); +} + +static struct mem_region *get_overlap(const struct mem_region *region) +{ + struct mem_region *i; + + list_for_each(®ions, i, list) { + if (overlaps(region, i)) + return i; + } + return NULL; +} + +static void add_region_to_regions(struct mem_region *region) +{ + struct mem_region *r; + + list_for_each(®ions, r, list) { + if (r->start < region->start) + continue; + + list_add_before(®ions, ®ion->list, &r->list); + return; + } + list_add_tail(®ions, ®ion->list); +} + +static bool add_region(struct mem_region *region) +{ + struct mem_region *r; + + if (mem_regions_finalised) { + prerror("MEM: add_region(%s@0x%"PRIx64") called after finalise!\n", + region->name, region->start); + return false; + } + + /* First split any regions which intersect. */ + list_for_each(®ions, r, list) { + /* + * The new region should be fully contained by an existing one. + * If it's not then we have a problem where reservations + * partially overlap which is probably broken. + * + * NB: There *might* be situations where this is legitimate, + * but the region handling does not currently support this. + */ + if (overlaps(r, region) && !contains(r, region)) { + prerror("MEM: Partial overlap detected between regions:\n"); + prerror("MEM: %s [0x%"PRIx64"-0x%"PRIx64"] (new)\n", + region->name, region->start, + region->start + region->len); + prerror("MEM: %s [0x%"PRIx64"-0x%"PRIx64"]\n", + r->name, r->start, r->start + r->len); + return false; + } + + if (!maybe_split(r, region->start) || + !maybe_split(r, region->start + region->len)) + return false; + } + + /* Now we have only whole overlaps, if any. */ + while ((r = get_overlap(region)) != NULL) { + assert(r->start == region->start); + assert(r->len == region->len); + list_del_from(®ions, &r->list); + free(r); + } + + /* Finally, add in our own region. */ + add_region_to_regions(region); + return true; +} + +static void mem_reserve(enum mem_region_type type, const char *name, + uint64_t start, uint64_t len) +{ + struct mem_region *region; + bool added = true; + + lock(&mem_region_lock); + region = new_region(name, start, len, NULL, type); + assert(region); + + if (!mem_region_init_done) + list_add(&early_reserves, ®ion->list); + else + added = add_region(region); + + assert(added); + unlock(&mem_region_lock); +} + +void mem_reserve_fw(const char *name, uint64_t start, uint64_t len) +{ + mem_reserve(REGION_FW_RESERVED, name, start, len); +} + +void mem_reserve_hwbuf(const char *name, uint64_t start, uint64_t len) +{ + mem_reserve(REGION_RESERVED, name, start, len); +} + +static bool matches_chip_id(const __be32 ids[], size_t num, u32 chip_id) +{ + size_t i; + + for (i = 0; i < num; i++) + if (be32_to_cpu(ids[i]) == chip_id) + return true; + + return false; +} + +void *__local_alloc(unsigned int chip_id, size_t size, size_t align, + const char *location) +{ + struct mem_region *region; + void *p = NULL; + bool use_local = true; + + lock(&mem_region_lock); + +restart: + list_for_each(®ions, region, list) { + const struct dt_property *prop; + const __be32 *ids; + + if (!(region->type == REGION_SKIBOOT_HEAP || + region->type == REGION_MEMORY)) + continue; + + /* Don't allocate from normal heap. */ + if (region == &skiboot_heap) + continue; + + /* First pass, only match node local regions */ + if (use_local) { + if (!region->node) + continue; + prop = dt_find_property(region->node, "ibm,chip-id"); + ids = (const __be32 *)prop->prop; + if (!matches_chip_id(ids, prop->len/sizeof(u32), + chip_id)) + continue; + } + + /* Second pass, match anything */ + lock(®ion->free_list_lock); + p = mem_alloc(region, size, align, location); + unlock(®ion->free_list_lock); + if (p) + break; + } + + /* + * If we can't allocate the memory block from the expected + * node, we bail to any one that can accommodate our request. + */ + if (!p && use_local) { + use_local = false; + goto restart; + } + + unlock(&mem_region_lock); + + return p; +} + +struct mem_region *find_mem_region(const char *name) +{ + struct mem_region *region; + + list_for_each(®ions, region, list) { + if (streq(region->name, name)) + return region; + } + return NULL; +} + +bool mem_range_is_reserved(uint64_t start, uint64_t size) +{ + uint64_t end = start + size; + struct mem_region *region; + struct list_head *search; + + /* We may have the range covered by a number of regions, which could + * appear in any order. So, we look for a region that covers the + * start address, and bump start up to the end of that region. + * + * We repeat until we've either bumped past the end of the range, + * or we didn't find a matching region. + * + * This has a worst-case of O(n^2), but n is well bounded by the + * small number of reservations. + */ + + if (!mem_region_init_done) + search = &early_reserves; + else + search = ®ions; + + for (;;) { + bool found = false; + + list_for_each(search, region, list) { + if (!region_is_reserved(region)) + continue; + + /* does this region overlap the start address, and + * have a non-zero size? */ + if (region->start <= start && + region->start + region->len > start && + region->len) { + start = region->start + region->len; + found = true; + } + } + + /* 'end' is the first byte outside of the range */ + if (start >= end) + return true; + + if (!found) + break; + } + + return false; +} + +static void mem_region_parse_reserved_properties(void) +{ + const struct dt_property *names, *ranges; + struct mem_region *region; + + prlog(PR_DEBUG, "MEM: parsing reserved memory from " + "reserved-names/-ranges properties\n"); + + names = dt_find_property(dt_root, "reserved-names"); + ranges = dt_find_property(dt_root, "reserved-ranges"); + if (names && ranges) { + const uint64_t *range; + int n, len; + + range = (const void *)ranges->prop; + + for (n = 0; n < names->len; n += len, range += 2) { + char *name; + + len = strlen(names->prop + n) + 1; + name = strdup(names->prop + n); + + region = new_region(name, + dt_get_number(range, 2), + dt_get_number(range + 1, 2), + NULL, REGION_FW_RESERVED); + if (!add_region(region)) { + prerror("Couldn't add mem_region %s\n", name); + abort(); + } + } + } else if (names || ranges) { + prerror("Invalid properties: reserved-names=%p " + "with reserved-ranges=%p\n", + names, ranges); + abort(); + } else { + return; + } +} + +static bool mem_region_parse_reserved_nodes(const char *path) +{ + struct dt_node *parent, *node; + + parent = dt_find_by_path(dt_root, path); + if (!parent) + return false; + + prlog(PR_INFO, "MEM: parsing reserved memory from node %s\n", path); + + dt_for_each_child(parent, node) { + const struct dt_property *reg; + struct mem_region *region; + int type; + + reg = dt_find_property(node, "reg"); + if (!reg) { + char *nodepath = dt_get_path(node); + prerror("node %s has no reg property, ignoring\n", + nodepath); + free(nodepath); + continue; + } + + if (dt_has_node_property(node, "no-map", NULL)) + type = REGION_RESERVED; + else + type = REGION_FW_RESERVED; + + region = new_region(strdup(node->name), + dt_get_number(reg->prop, 2), + dt_get_number(reg->prop + sizeof(u64), 2), + node, type); + if (!add_region(region)) { + char *nodepath = dt_get_path(node); + prerror("node %s failed to add_region()\n", nodepath); + free(nodepath); + } + } + + return true; +} + +/* Trawl through device tree, create memory regions from nodes. */ +void mem_region_init(void) +{ + struct mem_region *region, *next; + struct dt_node *i; + bool rc; + + /* + * Add associativity properties outside of the lock + * to avoid recursive locking caused by allocations + * done by add_chip_dev_associativity() + */ + dt_for_each_node(dt_root, i) { + if (!dt_has_node_property(i, "device_type", "memory") && + !dt_has_node_property(i, "compatible", "pmem-region")) + continue; + + /* Add associativity properties */ + add_chip_dev_associativity(i); + } + + /* Add each memory node. */ + dt_for_each_node(dt_root, i) { + uint64_t start, len; + char *rname; +#define NODE_REGION_PREFIX "ibm,firmware-allocs-" + + if (!dt_has_node_property(i, "device_type", "memory")) + continue; + rname = zalloc(strlen(i->name) + strlen(NODE_REGION_PREFIX) + 1); + assert(rname); + strcat(rname, NODE_REGION_PREFIX); + strcat(rname, i->name); + start = dt_get_address(i, 0, &len); + lock(&mem_region_lock); + region = new_region(rname, start, len, i, REGION_MEMORY); + if (!region) { + prerror("MEM: Could not add mem region %s!\n", i->name); + abort(); + } + add_region_to_regions(region); + if ((start + len) > top_of_ram) + top_of_ram = start + len; + unlock(&mem_region_lock); + } + + /* + * This is called after we know the maximum PIR of all CPUs, + * so we can dynamically set the stack length. + */ + skiboot_cpu_stacks.len = (cpu_max_pir + 1) * STACK_SIZE; + + lock(&mem_region_lock); + + /* Now carve out our own reserved areas. */ + if (!add_region(&skiboot_os_reserve) || + !add_region(&skiboot_code_and_text) || + !add_region(&skiboot_heap) || + !add_region(&skiboot_after_heap) || + !add_region(&skiboot_cpu_stacks)) { + prerror("Out of memory adding skiboot reserved areas\n"); + abort(); + } + + if (chip_quirk(QUIRK_MAMBO_CALLOUTS)) { + if (!add_region(&skiboot_mambo_kernel) || + !add_region(&skiboot_mambo_initramfs)) { + prerror("Out of memory adding mambo payload\n"); + abort(); + } + } + + /* Add reserved reanges from HDAT */ + list_for_each_safe(&early_reserves, region, next, list) { + bool added; + + list_del(®ion->list); + added = add_region(region); + assert(added); + } + + /* Add reserved ranges from the DT */ + rc = mem_region_parse_reserved_nodes("/reserved-memory"); + if (!rc) + rc = mem_region_parse_reserved_nodes( + "/ibm,hostboot/reserved-memory"); + if (!rc) + mem_region_parse_reserved_properties(); + + mem_region_init_done = true; + unlock(&mem_region_lock); +} + +static uint64_t allocated_length(const struct mem_region *r) +{ + struct free_hdr *f, *last = NULL; + + /* No allocations at all? */ + if (r->free_list.n.next == NULL) + return 0; + + /* Find last free block. */ + list_for_each(&r->free_list, f, list) + if (f > last) + last = f; + + /* No free blocks? */ + if (!last) + return r->len; + + /* Last free block isn't at end? */ + if (next_hdr(r, &last->hdr)) + return r->len; + return (unsigned long)last - r->start; +} + +/* Separate out allocated sections into their own region. */ +void mem_region_release_unused(void) +{ + struct mem_region *r; + + lock(&mem_region_lock); + assert(!mem_regions_finalised); + + prlog(PR_INFO, "Releasing unused memory:\n"); + list_for_each(®ions, r, list) { + uint64_t used_len; + + /* If it's not allocatable, ignore it. */ + if (!(r->type == REGION_SKIBOOT_HEAP || + r->type == REGION_MEMORY)) + continue; + + used_len = allocated_length(r); + + prlog(PR_INFO, " %s: %llu/%llu used\n", + r->name, (long long)used_len, (long long)r->len); + + /* We keep the skiboot heap. */ + if (r == &skiboot_heap) + continue; + + /* Nothing used? Whole thing is for Linux. */ + if (used_len == 0) + r->type = REGION_OS; + /* Partially used? Split region. */ + else if (used_len != r->len) { + struct mem_region *for_linux; + struct free_hdr *last = region_start(r) + used_len; + + /* Remove the final free block. */ + list_del_from(&r->free_list, &last->list); + + for_linux = split_region(r, r->start + used_len, + REGION_OS); + if (!for_linux) { + prerror("OOM splitting mem node %s for linux\n", + r->name); + abort(); + } + list_add(®ions, &for_linux->list); + } + } + unlock(&mem_region_lock); +} + +static void mem_clear_range(uint64_t s, uint64_t e) +{ + uint64_t res_start, res_end; + + /* Skip exception vectors */ + if (s < EXCEPTION_VECTORS_END) + s = EXCEPTION_VECTORS_END; + + /* Skip kernel preload area */ + res_start = (uint64_t)KERNEL_LOAD_BASE; + res_end = res_start + KERNEL_LOAD_SIZE; + + if (s >= res_start && s < res_end) + s = res_end; + if (e > res_start && e <= res_end) + e = res_start; + if (e <= s) + return; + if (s < res_start && e > res_end) { + mem_clear_range(s, res_start); + mem_clear_range(res_end, e); + return; + } + + /* Skip initramfs preload area */ + res_start = (uint64_t)INITRAMFS_LOAD_BASE; + res_end = res_start + INITRAMFS_LOAD_SIZE; + + if (s >= res_start && s < res_end) + s = res_end; + if (e > res_start && e <= res_end) + e = res_start; + if (e <= s) + return; + if (s < res_start && e > res_end) { + mem_clear_range(s, res_start); + mem_clear_range(res_end, e); + return; + } + + prlog(PR_DEBUG, "Clearing region %llx-%llx\n", + (long long)s, (long long)e); + memset((void *)s, 0, e - s); +} + +struct mem_region_clear_job_args { + char *job_name; + uint64_t s,e; +}; + +static void mem_region_clear_job(void *data) +{ + struct mem_region_clear_job_args *arg = (struct mem_region_clear_job_args*)data; + mem_clear_range(arg->s, arg->e); +} + +#define MEM_REGION_CLEAR_JOB_SIZE (16ULL*(1<<30)) + +static struct cpu_job **mem_clear_jobs; +static struct mem_region_clear_job_args *mem_clear_job_args; +static int mem_clear_njobs = 0; + +void start_mem_region_clear_unused(void) +{ + struct mem_region *r; + uint64_t s,l; + uint64_t total = 0; + uint32_t chip_id; + char *path; + int i; + struct cpu_job **jobs; + struct mem_region_clear_job_args *job_args; + + lock(&mem_region_lock); + assert(mem_regions_finalised); + + mem_clear_njobs = 0; + + list_for_each(®ions, r, list) { + if (!(r->type == REGION_OS)) + continue; + mem_clear_njobs++; + /* One job per 16GB */ + mem_clear_njobs += r->len / MEM_REGION_CLEAR_JOB_SIZE; + } + + jobs = malloc(mem_clear_njobs * sizeof(struct cpu_job*)); + job_args = malloc(mem_clear_njobs * sizeof(struct mem_region_clear_job_args)); + mem_clear_jobs = jobs; + mem_clear_job_args = job_args; + + prlog(PR_NOTICE, "Clearing unused memory:\n"); + i = 0; + list_for_each(®ions, r, list) { + /* If it's not unused, ignore it. */ + if (!(r->type == REGION_OS)) + continue; + + assert(r != &skiboot_heap); + + s = r->start; + l = r->len; + while(l > MEM_REGION_CLEAR_JOB_SIZE) { + job_args[i].s = s+l - MEM_REGION_CLEAR_JOB_SIZE; + job_args[i].e = s+l; + l-=MEM_REGION_CLEAR_JOB_SIZE; + job_args[i].job_name = malloc(sizeof(char)*100); + total+=MEM_REGION_CLEAR_JOB_SIZE; + chip_id = __dt_get_chip_id(r->node); + if (chip_id == -1) + chip_id = 0; + path = dt_get_path(r->node); + snprintf(job_args[i].job_name, 100, + "clear %s, %s 0x%"PRIx64" len: %"PRIx64" on %d", + r->name, path, + job_args[i].s, + (job_args[i].e - job_args[i].s), + chip_id); + free(path); + jobs[i] = cpu_queue_job_on_node(chip_id, + job_args[i].job_name, + mem_region_clear_job, + &job_args[i]); + if (!jobs[i]) + jobs[i] = cpu_queue_job(NULL, + job_args[i].job_name, + mem_region_clear_job, + &job_args[i]); + assert(jobs[i]); + i++; + } + job_args[i].s = s; + job_args[i].e = s+l; + job_args[i].job_name = malloc(sizeof(char)*100); + total+=l; + chip_id = __dt_get_chip_id(r->node); + if (chip_id == -1) + chip_id = 0; + path = dt_get_path(r->node); + snprintf(job_args[i].job_name,100, + "clear %s, %s 0x%"PRIx64" len: 0x%"PRIx64" on %d", + r->name, path, + job_args[i].s, + (job_args[i].e - job_args[i].s), + chip_id); + free(path); + jobs[i] = cpu_queue_job_on_node(chip_id, + job_args[i].job_name, + mem_region_clear_job, + &job_args[i]); + if (!jobs[i]) + jobs[i] = cpu_queue_job(NULL, + job_args[i].job_name, + mem_region_clear_job, + &job_args[i]); + assert(jobs[i]); + i++; + } + unlock(&mem_region_lock); + cpu_process_local_jobs(); +} + +void wait_mem_region_clear_unused(void) +{ + uint64_t l; + uint64_t total = 0; + int i; + + for(i=0; i < mem_clear_njobs; i++) { + total += (mem_clear_job_args[i].e - mem_clear_job_args[i].s); + } + + l = 0; + for(i=0; i < mem_clear_njobs; i++) { + cpu_wait_job(mem_clear_jobs[i], true); + l += (mem_clear_job_args[i].e - mem_clear_job_args[i].s); + printf("Clearing memory... %"PRIu64"/%"PRIu64"GB done\n", + l>>30, total>>30); + free(mem_clear_job_args[i].job_name); + } + free(mem_clear_jobs); + free(mem_clear_job_args); +} + +static void mem_region_add_dt_reserved_node(struct dt_node *parent, + struct mem_region *region) +{ + char *name, *p; + + /* If a reserved region was established before skiboot, it may be + * referenced by a device-tree node with extra data. In that case, + * copy the node to /reserved-memory/, unless it's already there. + * + * We update region->node to the new copy here, as the prd code may + * update regions' device-tree nodes, and we want those updates to + * apply to the nodes in /reserved-memory/. + */ + if (region->type == REGION_FW_RESERVED && region->node) { + if (region->node->parent != parent) + region->node = dt_copy(region->node, parent); + return; + } + + name = strdup(region->name); + assert(name); + + /* remove any cell addresses in the region name; we have our own cell + * addresses here */ + p = strchr(name, '@'); + if (p) + *p = '\0'; + + region->node = dt_new_addr(parent, name, region->start); + assert(region->node); + dt_add_property_u64s(region->node, "reg", region->start, region->len); + + /* + * This memory is used by hardware and may need special handling. Ask + * the host kernel not to map it by default. + */ + if (region->type == REGION_RESERVED) + dt_add_property(region->node, "no-map", NULL, 0); + + free(name); +} + +void mem_region_add_dt_reserved(void) +{ + int names_len, ranges_len, len; + const struct dt_property *prop; + struct mem_region *region; + void *names, *ranges; + struct dt_node *node; + fdt64_t *range; + char *name; + + names_len = 0; + ranges_len = 0; + + /* Finalise the region list, so we know that the regions list won't be + * altered after this point. The regions' free lists may change after + * we drop the lock, but we don't access those. */ + lock(&mem_region_lock); + mem_regions_finalised = true; + + /* establish top-level reservation node */ + node = dt_find_by_path(dt_root, "reserved-memory"); + if (!node) { + node = dt_new(dt_root, "reserved-memory"); + dt_add_property_cells(node, "#address-cells", 2); + dt_add_property_cells(node, "#size-cells", 2); + dt_add_property(node, "ranges", NULL, 0); + } + + prlog(PR_INFO, "Reserved regions:\n"); + + /* First pass, create /reserved-memory/ nodes for each reservation, + * and calculate the length for the /reserved-names and + * /reserved-ranges properties */ + list_for_each(®ions, region, list) { + if (!region_is_reservable(region)) + continue; + + prlog(PR_INFO, " 0x%012llx..%012llx : %s\n", + (long long)region->start, + (long long)(region->start + region->len - 1), + region->name); + + mem_region_add_dt_reserved_node(node, region); + + /* calculate the size of the properties populated later */ + names_len += strlen(region->node->name) + 1; + ranges_len += 2 * sizeof(uint64_t); + } + + name = names = malloc(names_len); + range = ranges = malloc(ranges_len); + + /* Second pass: populate the old-style reserved-names and + * reserved-regions arrays based on the node data */ + list_for_each(®ions, region, list) { + if (!region_is_reservable(region)) + continue; + + len = strlen(region->node->name) + 1; + memcpy(name, region->node->name, len); + name += len; + + range[0] = cpu_to_fdt64(region->start); + range[1] = cpu_to_fdt64(region->len); + range += 2; + } + unlock(&mem_region_lock); + + prop = dt_find_property(dt_root, "reserved-names"); + if (prop) + dt_del_property(dt_root, (struct dt_property *)prop); + + prop = dt_find_property(dt_root, "reserved-ranges"); + if (prop) + dt_del_property(dt_root, (struct dt_property *)prop); + + dt_add_property(dt_root, "reserved-names", names, names_len); + dt_add_property(dt_root, "reserved-ranges", ranges, ranges_len); + + free(names); + free(ranges); +} + +struct mem_region *mem_region_next(struct mem_region *region) +{ + struct list_node *node; + + assert(lock_held_by_me(&mem_region_lock)); + + node = region ? ®ion->list : ®ions.n; + + if (node->next == ®ions.n) + return NULL; + + return list_entry(node->next, struct mem_region, list); +} diff --git a/roms/skiboot/core/nvram-format.c b/roms/skiboot/core/nvram-format.c new file mode 100644 index 000000000..8aa5abf22 --- /dev/null +++ b/roms/skiboot/core/nvram-format.c @@ -0,0 +1,331 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * NVRAM Format as specified in PAPR + * + * Copyright 2013-2019 IBM Corp. + */ + +#include <skiboot.h> +#include <nvram.h> + +struct chrp_nvram_hdr { + uint8_t sig; + uint8_t cksum; + be16 len; + char name[12]; +}; + +static struct chrp_nvram_hdr *skiboot_part_hdr; + +#define NVRAM_SIG_FW_PRIV 0x51 +#define NVRAM_SIG_SYSTEM 0x70 +#define NVRAM_SIG_FREE 0x7f + +#define NVRAM_NAME_COMMON "common" +#define NVRAM_NAME_FW_PRIV "ibm,skiboot" +#define NVRAM_NAME_FREE "wwwwwwwwwwww" + +/* 64k should be enough, famous last words... */ +#define NVRAM_SIZE_COMMON 0x10000 + +/* 4k should be enough, famous last words... */ +#define NVRAM_SIZE_FW_PRIV 0x1000 + +static uint8_t chrp_nv_cksum(struct chrp_nvram_hdr *hdr) +{ + struct chrp_nvram_hdr h_copy = *hdr; + uint8_t b_data, i_sum, c_sum; + uint8_t *p = (uint8_t *)&h_copy; + unsigned int nbytes = sizeof(h_copy); + + h_copy.cksum = 0; + for (c_sum = 0; nbytes; nbytes--) { + b_data = *(p++); + i_sum = c_sum + b_data; + if (i_sum < c_sum) + i_sum++; + c_sum = i_sum; + } + return c_sum; +} + +int nvram_format(void *nvram_image, uint32_t nvram_size) +{ + struct chrp_nvram_hdr *h; + unsigned int offset = 0; + + prerror("NVRAM: Re-initializing (size: 0x%08x)\n", nvram_size); + memset(nvram_image, 0, nvram_size); + + /* Create private partition */ + if (nvram_size - offset < NVRAM_SIZE_FW_PRIV) + return -1; + h = nvram_image + offset; + h->sig = NVRAM_SIG_FW_PRIV; + h->len = cpu_to_be16(NVRAM_SIZE_FW_PRIV >> 4); + strcpy(h->name, NVRAM_NAME_FW_PRIV); + h->cksum = chrp_nv_cksum(h); + prlog(PR_DEBUG, "NVRAM: Created '%s' partition at 0x%08x" + " for size 0x%08x with cksum 0x%02x\n", + NVRAM_NAME_FW_PRIV, offset, + be16_to_cpu(h->len), h->cksum); + offset += NVRAM_SIZE_FW_PRIV; + + /* Create common partition */ + if (nvram_size - offset < NVRAM_SIZE_COMMON) + return -1; + h = nvram_image + offset; + h->sig = NVRAM_SIG_SYSTEM; + h->len = cpu_to_be16(NVRAM_SIZE_COMMON >> 4); + strcpy(h->name, NVRAM_NAME_COMMON); + h->cksum = chrp_nv_cksum(h); + prlog(PR_DEBUG, "NVRAM: Created '%s' partition at 0x%08x" + " for size 0x%08x with cksum 0x%02x\n", + NVRAM_NAME_COMMON, offset, + be16_to_cpu(h->len), h->cksum); + offset += NVRAM_SIZE_COMMON; + + /* Create free space partition */ + if (nvram_size - offset < sizeof(struct chrp_nvram_hdr)) + return -1; + h = nvram_image + offset; + h->sig = NVRAM_SIG_FREE; + h->len = cpu_to_be16((nvram_size - offset) >> 4); + /* We have the full 12 bytes here */ + memcpy(h->name, NVRAM_NAME_FREE, 12); + h->cksum = chrp_nv_cksum(h); + prlog(PR_DEBUG, "NVRAM: Created '%s' partition at 0x%08x" + " for size 0x%08x with cksum 0x%02x\n", + NVRAM_NAME_FREE, offset, be16_to_cpu(h->len), h->cksum); + return 0; +} + +/* + * Check that the nvram partition layout is sane and that it + * contains our required partitions. If not, we re-format the + * lot of it + */ +int nvram_check(void *nvram_image, const uint32_t nvram_size) +{ + unsigned int offset = 0; + bool found_common = false; + + skiboot_part_hdr = NULL; + + while (offset + sizeof(struct chrp_nvram_hdr) < nvram_size) { + struct chrp_nvram_hdr *h = nvram_image + offset; + + if (chrp_nv_cksum(h) != h->cksum) { + prerror("NVRAM: Partition at offset 0x%x" + " has bad checksum: 0x%02x vs 0x%02x\n", + offset, h->cksum, chrp_nv_cksum(h)); + goto failed; + } + if (be16_to_cpu(h->len) < 1) { + prerror("NVRAM: Partition at offset 0x%x" + " has incorrect 0 length\n", offset); + goto failed; + } + + if (h->sig == NVRAM_SIG_SYSTEM && + strcmp(h->name, NVRAM_NAME_COMMON) == 0) + found_common = true; + + if (h->sig == NVRAM_SIG_FW_PRIV && + strcmp(h->name, NVRAM_NAME_FW_PRIV) == 0) + skiboot_part_hdr = h; + + offset += be16_to_cpu(h->len) << 4; + if (offset > nvram_size) { + prerror("NVRAM: Partition at offset 0x%x" + " extends beyond end of nvram !\n", offset); + goto failed; + } + } + if (!found_common) { + prlog_once(PR_ERR, "NVRAM: Common partition not found !\n"); + goto failed; + } + + if (!skiboot_part_hdr) { + prlog_once(PR_ERR, "NVRAM: Skiboot private partition not found !\n"); + goto failed; + } else { + /* + * The OF NVRAM format requires config strings to be NUL + * terminated and unused memory to be set to zero. Well behaved + * software should ensure this is done for us, but we should + * always check. + */ + const char *last_byte = (const char *) skiboot_part_hdr + + be16_to_cpu(skiboot_part_hdr->len) * 16 - 1; + + if (*last_byte != 0) { + prerror("NVRAM: Skiboot private partition is not NUL terminated"); + goto failed; + } + } + + prlog(PR_INFO, "NVRAM: Layout appears sane\n"); + assert(skiboot_part_hdr); + return 0; + failed: + return -1; +} + +static const char *find_next_key(const char *start, const char *end) +{ + /* + * Unused parts of the partition are set to NUL. If we hit two + * NULs in a row then we assume that we have hit the end of the + * partition. + */ + if (*start == 0) + return NULL; + + while (start < end) { + if (*start == 0) + return start + 1; + + start++; + } + + return NULL; +} + +static void nvram_dangerous(const char *key) +{ + prlog(PR_ERR, " ___________________________________________________________\n"); + prlog(PR_ERR, "< Dangerous NVRAM option: %s\n", key); + prlog(PR_ERR, " -----------------------------------------------------------\n"); + prlog(PR_ERR, " \\ \n"); + prlog(PR_ERR, " \\ WW \n"); + prlog(PR_ERR, " <^ \\___/| \n"); + prlog(PR_ERR, " \\ / \n"); + prlog(PR_ERR, " \\_ _/ \n"); + prlog(PR_ERR, " }{ \n"); +} + + +/* + * nvram_query_safe/dangerous() - Searches skiboot NVRAM partition + * for a key=value pair. + * + * Dangerous means it should only be used for testing as it may + * mask issues. Safe is ok for long term use. + * + * Returns a pointer to a NUL terminated string that contains the value + * associated with the given key. + */ +static const char *__nvram_query(const char *key, bool dangerous) +{ + const char *part_end, *start; + int key_len = strlen(key); + + assert(key); + + if (!nvram_has_loaded()) { + prlog(PR_DEBUG, + "NVRAM: Query for '%s' must wait for NVRAM to load\n", + key); + if (!nvram_wait_for_load()) { + prlog(PR_CRIT, "NVRAM: Failed to load\n"); + return NULL; + } + } + + /* + * The running OS can modify the NVRAM as it pleases so we need to be + * a little paranoid and check that it's ok before we try parse it. + * + * NB: nvram_validate() can update skiboot_part_hdr + */ + if (!nvram_validate()) + return NULL; + + assert(skiboot_part_hdr); + + part_end = (const char *) skiboot_part_hdr + + be16_to_cpu(skiboot_part_hdr->len) * 16 - 1; + + start = (const char *) skiboot_part_hdr + + sizeof(*skiboot_part_hdr); + + if (!key_len) { + prlog(PR_WARNING, "NVRAM: search key is empty!\n"); + return NULL; + } + + if (key_len > 32) + prlog(PR_WARNING, "NVRAM: search key '%s' is longer than 32 chars\n", key); + + while (start) { + int remaining = part_end - start; + + prlog(PR_TRACE, "NVRAM: '%s' (%lu)\n", + start, strlen(start)); + + if (key_len + 1 > remaining) + return NULL; + + if (!strncmp(key, start, key_len) && start[key_len] == '=') { + const char *value = &start[key_len + 1]; + + prlog(PR_DEBUG, "NVRAM: Searched for '%s' found '%s'\n", + key, value); + + if (dangerous) + nvram_dangerous(start); + return value; + } + + start = find_next_key(start, part_end); + } + + prlog(PR_DEBUG, "NVRAM: '%s' not found\n", key); + + return NULL; +} + +const char *nvram_query_safe(const char *key) +{ + return __nvram_query(key, false); +} + +const char *nvram_query_dangerous(const char *key) +{ + return __nvram_query(key, true); +} + +/* + * nvram_query_eq_safe/dangerous() - Check if the given 'key' exists + * and is set to 'value'. + * + * Dangerous means it should only be used for testing as it may + * mask issues. Safe is ok for long term use. + * + * Note: Its an error to check for non-existence of a key + * by passing 'value == NULL' as a key's value can never be + * NULL in nvram. + */ +static bool __nvram_query_eq(const char *key, const char *value, bool dangerous) +{ + const char *s = __nvram_query(key, dangerous); + + if (!s) + return false; + + assert(value != NULL); + return !strcmp(s, value); +} + +bool nvram_query_eq_safe(const char *key, const char *value) +{ + return __nvram_query_eq(key, value, false); +} + +bool nvram_query_eq_dangerous(const char *key, const char *value) +{ + return __nvram_query_eq(key, value, true); +} + diff --git a/roms/skiboot/core/nvram.c b/roms/skiboot/core/nvram.c new file mode 100644 index 000000000..773d20280 --- /dev/null +++ b/roms/skiboot/core/nvram.c @@ -0,0 +1,203 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * NVRAM support + * + * Copyright 2013-2018 IBM Corp. + */ + +#include <skiboot.h> +#include <fsp.h> +#include <opal.h> +#include <lock.h> +#include <device.h> +#include <platform.h> +#include <nvram.h> +#include <timebase.h> + +static void *nvram_image; +static uint32_t nvram_size; + +static bool nvram_ready; /* has the nvram been loaded? */ +static bool nvram_valid; /* is the nvram format ok? */ + +static int64_t opal_read_nvram(uint64_t buffer, uint64_t size, uint64_t offset) +{ + if (!nvram_ready) + return OPAL_HARDWARE; + + if (!opal_addr_valid((void *)buffer)) + return OPAL_PARAMETER; + + if (offset >= nvram_size || (offset + size) > nvram_size) + return OPAL_PARAMETER; + + memcpy((void *)buffer, nvram_image + offset, size); + return OPAL_SUCCESS; +} +opal_call(OPAL_READ_NVRAM, opal_read_nvram, 3); + +static int64_t opal_write_nvram(uint64_t buffer, uint64_t size, uint64_t offset) +{ + if (!nvram_ready) + return OPAL_HARDWARE; + + if (!opal_addr_valid((void *)buffer)) + return OPAL_PARAMETER; + + if (offset >= nvram_size || (offset + size) > nvram_size) + return OPAL_PARAMETER; + memcpy(nvram_image + offset, (void *)buffer, size); + if (platform.nvram_write) + platform.nvram_write(offset, nvram_image + offset, size); + + /* The host OS has written to the NVRAM so we can't be sure that it's + * well formatted. + */ + nvram_valid = false; + + return OPAL_SUCCESS; +} +opal_call(OPAL_WRITE_NVRAM, opal_write_nvram, 3); + +bool nvram_validate(void) +{ + if (!nvram_valid) { + if (!nvram_check(nvram_image, nvram_size)) + nvram_valid = true; + } + + return nvram_valid; +} + +static void nvram_reformat(void) +{ + if (nvram_format(nvram_image, nvram_size)) { + prerror("NVRAM: Failed to format NVRAM!\n"); + nvram_valid = false; + return; + } + + /* Write the whole thing back */ + if (platform.nvram_write) + platform.nvram_write(0, nvram_image, nvram_size); + + nvram_validate(); +} + +void nvram_reinit(void) +{ + /* It's possible we failed to load nvram at boot. */ + if (!nvram_ready) + nvram_init(); + else if (!nvram_validate()) + nvram_reformat(); +} + +void nvram_read_complete(bool success) +{ + struct dt_node *np; + + /* Read not successful, error out and free the buffer */ + if (!success) { + free(nvram_image); + nvram_size = 0; + return; + } + + if (!nvram_validate()) + nvram_reformat(); + + /* Add nvram node */ + np = dt_new(opal_node, "nvram"); + dt_add_property_cells(np, "#bytes", nvram_size); + dt_add_property_string(np, "compatible", "ibm,opal-nvram"); + + /* Mark ready */ + nvram_ready = true; +} + +bool nvram_wait_for_load(void) +{ + uint64_t started; + + /* Short cut */ + if (nvram_ready) + return true; + + /* Tell the caller it will never happen */ + if (!platform.nvram_info) + return false; + + /* + * One of two things has happened here. + * 1. nvram_wait_for_load() was called before nvram_init() + * 2. The read of NVRAM failed. + * Either way, this is quite a bad event. + */ + if (!nvram_image && !nvram_size) { + prlog(PR_CRIT, "NVRAM: Possible wait before nvram_init()!\n"); + return false; + } + + started = mftb(); + + while (!nvram_ready) { + opal_run_pollers(); + /* If the read fails, tell the caller */ + if (!nvram_image && !nvram_size) + return false; + } + + prlog(PR_DEBUG, "NVRAM: Waited %lums for nvram to load\n", + tb_to_msecs(mftb() - started)); + + return true; +} + +bool nvram_has_loaded(void) +{ + return nvram_ready; +} + +void nvram_init(void) +{ + int rc; + + if (!platform.nvram_info) + return; + rc = platform.nvram_info(&nvram_size); + if (rc) { + prerror("NVRAM: Error %d retrieving nvram info\n", rc); + return; + } + prlog(PR_INFO, "NVRAM: Size is %d KB\n", nvram_size >> 10); + if (nvram_size > 0x100000) { + prlog(PR_WARNING, "NVRAM: Cropping to 1MB !\n"); + nvram_size = 0x100000; + } + + /* + * We allocate the nvram image with 4k alignment to make the + * FSP backend job's easier + */ + nvram_image = memalign(0x1000, nvram_size); + if (!nvram_image) { + prerror("NVRAM: Failed to allocate nvram image\n"); + nvram_size = 0; + return; + } + + /* Read it in */ + rc = platform.nvram_start_read(nvram_image, 0, nvram_size); + if (rc) { + prerror("NVRAM: Failed to read NVRAM from FSP !\n"); + nvram_size = 0; + free(nvram_image); + return; + } + + /* + * We'll get called back later (or recursively from + * nvram_start_read) in nvram_read_complete() + */ +} diff --git a/roms/skiboot/core/opal-dump.c b/roms/skiboot/core/opal-dump.c new file mode 100644 index 000000000..4f54a3ef1 --- /dev/null +++ b/roms/skiboot/core/opal-dump.c @@ -0,0 +1,582 @@ +/* Copyright 2019 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define pr_fmt(fmt) "DUMP: " fmt + +#include <chip.h> +#include <cpu.h> +#include <device.h> +#include <mem-map.h> +#include <mem_region.h> +#include <mem_region-malloc.h> +#include <opal.h> +#include <opal-dump.h> +#include <opal-internal.h> +#include <sbe-p9.h> +#include <skiboot.h> + +#include <ccan/endian/endian.h> + +#include "hdata/spira.h" + +/* XXX Ideally we should use HDAT provided data (proc_dump_area->thread_size). + * But we are not getting this data durig boot. Hence lets reserve fixed + * memory for architected registers data collection. + */ +#define ARCH_REGS_DATA_SIZE_PER_CHIP (512 * 1024) + +/* Actual address of MDST and MDDT table */ +#define MDST_TABLE_BASE (SKIBOOT_BASE + MDST_TABLE_OFF) +#define MDDT_TABLE_BASE (SKIBOOT_BASE + MDDT_TABLE_OFF) +#define PROC_DUMP_AREA_BASE (SKIBOOT_BASE + PROC_DUMP_AREA_OFF) + +static struct spira_ntuple *ntuple_mdst; +static struct spira_ntuple *ntuple_mddt; +static struct spira_ntuple *ntuple_mdrt; + +static struct mpipl_metadata *mpipl_metadata; + +/* Dump metadata area */ +static struct opal_mpipl_fadump *opal_mpipl_data; +static struct opal_mpipl_fadump *opal_mpipl_cpu_data; + +/* + * Number of tags passed by OPAL to kernel after MPIPL boot. + * Currently it supports below tags: + * - CPU register data area + * - OPAL metadata area address + * - Kernel passed tag during MPIPL registration + * - Post MPIPL boot memory size + */ +#define MAX_OPAL_MPIPL_TAGS 0x04 +static u64 opal_mpipl_tags[MAX_OPAL_MPIPL_TAGS]; +static int opal_mpipl_max_tags = MAX_OPAL_MPIPL_TAGS; + +static u64 opal_dump_addr, opal_dump_size; + +static bool mpipl_enabled; + +static int opal_mpipl_add_entry(u8 region, u64 src, u64 dest, u64 size) +{ + int i; + int mdst_cnt = be16_to_cpu(ntuple_mdst->act_cnt); + int mddt_cnt = be16_to_cpu(ntuple_mddt->act_cnt); + struct mdst_table *mdst; + struct mddt_table *mddt; + + if (mdst_cnt >= MDST_TABLE_SIZE / sizeof(struct mdst_table)) { + prlog(PR_DEBUG, "MDST table is full\n"); + return OPAL_RESOURCE; + } + + if (mddt_cnt >= MDDT_TABLE_SIZE / sizeof(struct mddt_table)) { + prlog(PR_DEBUG, "MDDT table is full\n"); + return OPAL_RESOURCE; + } + + /* Use relocated memory address */ + mdst = (void *)(MDST_TABLE_BASE); + mddt = (void *)(MDDT_TABLE_BASE); + + /* Check for duplicate entry */ + for (i = 0; i < mdst_cnt; i++) { + if (be64_to_cpu(mdst->addr) == (src | HRMOR_BIT)) { + prlog(PR_DEBUG, + "Duplicate source address : 0x%llx", src); + return OPAL_PARAMETER; + } + mdst++; + } + for (i = 0; i < mddt_cnt; i++) { + if (be64_to_cpu(mddt->addr) == (dest | HRMOR_BIT)) { + prlog(PR_DEBUG, + "Duplicate destination address : 0x%llx", dest); + return OPAL_PARAMETER; + } + mddt++; + } + + /* Add OPAL source address to MDST entry */ + mdst->addr = cpu_to_be64(src | HRMOR_BIT); + mdst->data_region = region; + mdst->size = cpu_to_be32(size); + ntuple_mdst->act_cnt = cpu_to_be16(mdst_cnt + 1); + + /* Add OPAL destination address to MDDT entry */ + mddt->addr = cpu_to_be64(dest | HRMOR_BIT); + mddt->data_region = region; + mddt->size = cpu_to_be32(size); + ntuple_mddt->act_cnt = cpu_to_be16(mddt_cnt + 1); + + prlog(PR_TRACE, "Added new entry. src : 0x%llx, dest : 0x%llx," + " size : 0x%llx\n", src, dest, size); + return OPAL_SUCCESS; +} + +/* Remove entry from source (MDST) table */ +static int opal_mpipl_remove_entry_mdst(bool remove_all, u8 region, u64 src) +{ + bool found = false; + int i, j; + int mdst_cnt = be16_to_cpu(ntuple_mdst->act_cnt); + struct mdst_table *tmp_mdst; + struct mdst_table *mdst = (void *)(MDST_TABLE_BASE); + + for (i = 0; i < mdst_cnt;) { + if (mdst->data_region != region) { + mdst++; + i++; + continue; + } + + if (remove_all != true && + be64_to_cpu(mdst->addr) != (src | HRMOR_BIT)) { + mdst++; + i++; + continue; + } + + tmp_mdst = mdst; + memset(tmp_mdst, 0, sizeof(struct mdst_table)); + + for (j = i; j < mdst_cnt - 1; j++) { + memcpy((void *)tmp_mdst, + (void *)(tmp_mdst + 1), sizeof(struct mdst_table)); + tmp_mdst++; + memset(tmp_mdst, 0, sizeof(struct mdst_table)); + } + + mdst_cnt--; + + if (remove_all == false) { + found = true; + break; + } + } /* end - for loop */ + + ntuple_mdst->act_cnt = cpu_to_be16((u16)mdst_cnt); + + if (remove_all == false && found == false) { + prlog(PR_DEBUG, + "Source address [0x%llx] not found in MDST table\n", src); + return OPAL_PARAMETER; + } + + return OPAL_SUCCESS; +} + +/* Remove entry from destination (MDDT) table */ +static int opal_mpipl_remove_entry_mddt(bool remove_all, u8 region, u64 dest) +{ + bool found = false; + int i, j; + int mddt_cnt = be16_to_cpu(ntuple_mddt->act_cnt); + struct mddt_table *tmp_mddt; + struct mddt_table *mddt = (void *)(MDDT_TABLE_BASE); + + for (i = 0; i < mddt_cnt;) { + if (mddt->data_region != region) { + mddt++; + i++; + continue; + } + + if (remove_all != true && + be64_to_cpu(mddt->addr) != (dest | HRMOR_BIT)) { + mddt++; + i++; + continue; + } + + tmp_mddt = mddt; + memset(tmp_mddt, 0, sizeof(struct mddt_table)); + + for (j = i; j < mddt_cnt - 1; j++) { + memcpy((void *)tmp_mddt, + (void *)(tmp_mddt + 1), sizeof(struct mddt_table)); + tmp_mddt++; + memset(tmp_mddt, 0, sizeof(struct mddt_table)); + } + + mddt_cnt--; + + if (remove_all == false) { + found = true; + break; + } + } /* end - for loop */ + + ntuple_mddt->act_cnt = cpu_to_be16((u16)mddt_cnt); + + if (remove_all == false && found == false) { + prlog(PR_DEBUG, + "Dest address [0x%llx] not found in MDDT table\n", dest); + return OPAL_PARAMETER; + } + + return OPAL_SUCCESS; +} + +/* Register for OPAL dump. */ +static void opal_mpipl_register(void) +{ + u64 arch_regs_dest, arch_regs_size; + struct proc_dump_area *proc_dump = (void *)(PROC_DUMP_AREA_BASE); + + /* Add OPAL reservation detail to MDST/MDDT table */ + opal_mpipl_add_entry(DUMP_REGION_OPAL_MEMORY, + SKIBOOT_BASE, opal_dump_addr, opal_dump_size); + + /* Thread size check */ + if (proc_dump->thread_size != 0) { + prlog(PR_INFO, "Thread register entry size is available, " + "but not supported.\n"); + } + + /* Reserve memory used to capture architected register state */ + arch_regs_dest = opal_dump_addr + opal_dump_size; + arch_regs_size = nr_chips() * ARCH_REGS_DATA_SIZE_PER_CHIP; + proc_dump->alloc_addr = cpu_to_be64(arch_regs_dest | HRMOR_BIT); + proc_dump->alloc_size = cpu_to_be32(arch_regs_size); + prlog(PR_NOTICE, "Architected register dest addr : 0x%llx, " + "size : 0x%llx\n", arch_regs_dest, arch_regs_size); +} + +static int payload_mpipl_register(u64 src, u64 dest, u64 size) +{ + if (!opal_addr_valid((void *)src)) { + prlog(PR_DEBUG, "Invalid source address [0x%llx]\n", src); + return OPAL_PARAMETER; + } + + if (!opal_addr_valid((void *)dest)) { + prlog(PR_DEBUG, "Invalid dest address [0x%llx]\n", dest); + return OPAL_PARAMETER; + } + + if (size <= 0) { + prlog(PR_DEBUG, "Invalid size [0x%llx]\n", size); + return OPAL_PARAMETER; + } + + return opal_mpipl_add_entry(DUMP_REGION_KERNEL, src, dest, size); +} + +static int payload_mpipl_unregister(u64 src, u64 dest) +{ + int rc; + + /* Remove src from MDST table */ + rc = opal_mpipl_remove_entry_mdst(false, DUMP_REGION_KERNEL, src); + if (rc) + return rc; + + /* Remove dest from MDDT table */ + rc = opal_mpipl_remove_entry_mddt(false, DUMP_REGION_KERNEL, dest); + return rc; +} + +static int payload_mpipl_unregister_all(void) +{ + opal_mpipl_remove_entry_mdst(true, DUMP_REGION_KERNEL, 0); + opal_mpipl_remove_entry_mddt(true, DUMP_REGION_KERNEL, 0); + + return OPAL_SUCCESS; +} + +static int64_t opal_mpipl_update(enum opal_mpipl_ops ops, + u64 src, u64 dest, u64 size) +{ + int rc; + + switch (ops) { + case OPAL_MPIPL_ADD_RANGE: + rc = payload_mpipl_register(src, dest, size); + if (!rc) + prlog(PR_NOTICE, "Payload registered for MPIPL\n"); + break; + case OPAL_MPIPL_REMOVE_RANGE: + rc = payload_mpipl_unregister(src, dest); + if (!rc) { + prlog(PR_NOTICE, "Payload removed entry from MPIPL." + "[src : 0x%llx, dest : 0x%llx]\n", src, dest); + } + break; + case OPAL_MPIPL_REMOVE_ALL: + rc = payload_mpipl_unregister_all(); + if (!rc) + prlog(PR_NOTICE, "Payload unregistered for MPIPL\n"); + break; + case OPAL_MPIPL_FREE_PRESERVED_MEMORY: + /* Clear tags */ + memset(&opal_mpipl_tags, 0, (sizeof(u64) * MAX_OPAL_MPIPL_TAGS)); + opal_mpipl_max_tags = 0; + /* Release memory */ + free(opal_mpipl_data); + opal_mpipl_data = NULL; + free(opal_mpipl_cpu_data); + opal_mpipl_cpu_data = NULL; + /* Clear MDRT table */ + memset((void *)MDRT_TABLE_BASE, 0, MDRT_TABLE_SIZE); + /* Set MDRT count to max allocated count */ + ntuple_mdrt->act_cnt = cpu_to_be16(MDRT_TABLE_SIZE / sizeof(struct mdrt_table)); + rc = OPAL_SUCCESS; + prlog(PR_NOTICE, "Payload Invalidated MPIPL\n"); + break; + default: + prlog(PR_DEBUG, "Unsupported MPIPL update operation : 0x%x\n", ops); + rc = OPAL_PARAMETER; + break; + } + + return rc; +} + +static int64_t opal_mpipl_register_tag(enum opal_mpipl_tags tag, + uint64_t tag_val) +{ + int rc = OPAL_SUCCESS; + + switch (tag) { + case OPAL_MPIPL_TAG_BOOT_MEM: + if (tag_val <= 0 || tag_val > top_of_ram) { + prlog(PR_DEBUG, "Payload sent invalid boot mem size" + " : 0x%llx\n", tag_val); + rc = OPAL_PARAMETER; + } else { + mpipl_metadata->boot_mem_size = tag_val; + prlog(PR_NOTICE, "Boot mem size : 0x%llx\n", tag_val); + } + break; + case OPAL_MPIPL_TAG_KERNEL: + mpipl_metadata->kernel_tag = tag_val; + prlog(PR_NOTICE, "Payload sent metadata tag : 0x%llx\n", tag_val); + break; + default: + prlog(PR_DEBUG, "Payload sent unsupported tag : 0x%x\n", tag); + rc = OPAL_PARAMETER; + break; + } + return rc; +} + +static uint64_t opal_mpipl_query_tag(enum opal_mpipl_tags tag, __be64 *tag_val) +{ + if (!opal_addr_valid(tag_val)) { + prlog(PR_DEBUG, "Invalid tag address\n"); + return OPAL_PARAMETER; + } + + if (tag >= opal_mpipl_max_tags) + return OPAL_PARAMETER; + + *tag_val = cpu_to_be64(opal_mpipl_tags[tag]); + return OPAL_SUCCESS; +} + +static inline void post_mpipl_get_preserved_tags(void) +{ + if (mpipl_metadata->kernel_tag) + opal_mpipl_tags[OPAL_MPIPL_TAG_KERNEL] = mpipl_metadata->kernel_tag; + if (mpipl_metadata->boot_mem_size) + opal_mpipl_tags[OPAL_MPIPL_TAG_BOOT_MEM] = mpipl_metadata->boot_mem_size; +} + +static void post_mpipl_arch_regs_data(void) +{ + struct proc_dump_area *proc_dump = (void *)(PROC_DUMP_AREA_BASE); + + if (proc_dump->dest_addr == 0) { + prlog(PR_DEBUG, "Invalid CPU registers destination address\n"); + return; + } + + if (proc_dump->act_size == 0) { + prlog(PR_DEBUG, "Invalid CPU registers destination size\n"); + return; + } + + opal_mpipl_cpu_data = zalloc(sizeof(struct opal_mpipl_fadump) + + sizeof(struct opal_mpipl_region)); + if (!opal_mpipl_cpu_data) { + prlog(PR_ERR, "Failed to allocate memory\n"); + return; + } + + /* Fill CPU register details */ + opal_mpipl_cpu_data->version = OPAL_MPIPL_VERSION; + opal_mpipl_cpu_data->cpu_data_version = cpu_to_be32((u32)proc_dump->version); + opal_mpipl_cpu_data->cpu_data_size = proc_dump->thread_size; + opal_mpipl_cpu_data->region_cnt = cpu_to_be32(1); + + opal_mpipl_cpu_data->region[0].src = proc_dump->dest_addr & ~(cpu_to_be64(HRMOR_BIT)); + opal_mpipl_cpu_data->region[0].dest = proc_dump->dest_addr & ~(cpu_to_be64(HRMOR_BIT)); + opal_mpipl_cpu_data->region[0].size = cpu_to_be64(be32_to_cpu(proc_dump->act_size)); + + /* Update tag */ + opal_mpipl_tags[OPAL_MPIPL_TAG_CPU] = (u64)opal_mpipl_cpu_data; +} + +static void post_mpipl_get_opal_data(void) +{ + struct mdrt_table *mdrt = (void *)(MDRT_TABLE_BASE); + int i, j = 0, count = 0; + int mdrt_cnt = be16_to_cpu(ntuple_mdrt->act_cnt); + struct opal_mpipl_region *region; + + /* Count OPAL dump regions */ + for (i = 0; i < mdrt_cnt; i++) { + if (mdrt->data_region == DUMP_REGION_OPAL_MEMORY) + count++; + mdrt++; + } + + if (count == 0) { + prlog(PR_INFO, "OPAL dump is not available\n"); + return; + } + + opal_mpipl_data = zalloc(sizeof(struct opal_mpipl_fadump) + + count * sizeof(struct opal_mpipl_region)); + if (!opal_mpipl_data) { + prlog(PR_ERR, "Failed to allocate memory\n"); + return; + } + + /* Fill OPAL dump details */ + opal_mpipl_data->version = OPAL_MPIPL_VERSION; + opal_mpipl_data->crashing_pir = cpu_to_be32(mpipl_metadata->crashing_pir); + opal_mpipl_data->region_cnt = cpu_to_be32(count); + region = opal_mpipl_data->region; + + mdrt = (void *)(MDRT_TABLE_BASE); + for (i = 0; i < mdrt_cnt; i++) { + if (mdrt->data_region != DUMP_REGION_OPAL_MEMORY) { + mdrt++; + continue; + } + + region[j].src = mdrt->src_addr & ~(cpu_to_be64(HRMOR_BIT)); + region[j].dest = mdrt->dest_addr & ~(cpu_to_be64(HRMOR_BIT)); + region[j].size = cpu_to_be64(be32_to_cpu(mdrt->size)); + + prlog(PR_NOTICE, "OPAL reserved region %d - src : 0x%llx, " + "dest : 0x%llx, size : 0x%llx\n", j, + be64_to_cpu(region[j].src), be64_to_cpu(region[j].dest), + be64_to_cpu(region[j].size)); + + mdrt++; + j++; + if (j == count) + break; + } + + opal_mpipl_tags[OPAL_MPIPL_TAG_OPAL] = (u64)opal_mpipl_data; +} + +void opal_mpipl_save_crashing_pir(void) +{ + if (!is_mpipl_enabled()) + return; + + mpipl_metadata->crashing_pir = this_cpu()->pir; + prlog(PR_NOTICE, "Crashing PIR = 0x%x\n", this_cpu()->pir); +} + +void opal_mpipl_reserve_mem(void) +{ + struct dt_node *opal_node, *dump_node; + u64 arch_regs_dest, arch_regs_size; + + opal_node = dt_find_by_path(dt_root, "ibm,opal"); + if (!opal_node) + return; + + dump_node = dt_find_by_path(opal_node, "dump"); + if (!dump_node) + return; + + /* Calculcate and Reserve OPAL dump destination memory */ + opal_dump_size = SKIBOOT_SIZE + (cpu_max_pir + 1) * STACK_SIZE; + opal_dump_addr = SKIBOOT_BASE + opal_dump_size; + mem_reserve_fw("ibm,firmware-dump", + opal_dump_addr, opal_dump_size); + + /* Reserve memory to capture CPU register data */ + arch_regs_dest = opal_dump_addr + opal_dump_size; + arch_regs_size = nr_chips() * ARCH_REGS_DATA_SIZE_PER_CHIP; + mem_reserve_fw("ibm,firmware-arch-registers", + arch_regs_dest, arch_regs_size); +} + +bool is_mpipl_enabled(void) +{ + return mpipl_enabled; +} + +void opal_mpipl_init(void) +{ + void *mdst_base = (void *)MDST_TABLE_BASE; + void *mddt_base = (void *)MDDT_TABLE_BASE; + struct dt_node *dump_node; + + dump_node = dt_find_by_path(opal_node, "dump"); + if (!dump_node) + return; + + /* Get MDST and MDDT ntuple from SPIRAH */ + ntuple_mdst = &(spirah.ntuples.mdump_src); + ntuple_mddt = &(spirah.ntuples.mdump_dst); + ntuple_mdrt = &(spirah.ntuples.mdump_res); + + /* Get metadata area pointer */ + mpipl_metadata = (void *)(DUMP_METADATA_AREA_BASE); + + if (dt_find_property(dump_node, "mpipl-boot")) { + disable_fast_reboot("MPIPL Boot"); + + post_mpipl_get_preserved_tags(); + post_mpipl_get_opal_data(); + post_mpipl_arch_regs_data(); + } + + /* Clear OPAL metadata area */ + if (sizeof(struct mpipl_metadata) > DUMP_METADATA_AREA_SIZE) { + prlog(PR_ERR, "INSUFFICIENT OPAL METADATA AREA\n"); + prlog(PR_ERR, "INCREASE OPAL MEDTADATA AREA SIZE\n"); + assert(false); + } + memset(mpipl_metadata, 0, sizeof(struct mpipl_metadata)); + + /* Clear MDST and MDDT table */ + memset(mdst_base, 0, MDST_TABLE_SIZE); + ntuple_mdst->act_cnt = 0; + memset(mddt_base, 0, MDDT_TABLE_SIZE); + ntuple_mddt->act_cnt = 0; + + opal_mpipl_register(); + + /* Send OPAL relocated base address to SBE */ + p9_sbe_send_relocated_base(SKIBOOT_BASE); + + /* OPAL API for MPIPL update */ + opal_register(OPAL_MPIPL_UPDATE, opal_mpipl_update, 4); + opal_register(OPAL_MPIPL_REGISTER_TAG, opal_mpipl_register_tag, 2); + opal_register(OPAL_MPIPL_QUERY_TAG, opal_mpipl_query_tag, 2); + + /* Enable MPIPL */ + mpipl_enabled = true; +} diff --git a/roms/skiboot/core/opal-msg.c b/roms/skiboot/core/opal-msg.c new file mode 100644 index 000000000..65a2476b2 --- /dev/null +++ b/roms/skiboot/core/opal-msg.c @@ -0,0 +1,193 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * OPAL Message queue between host and skiboot + * + * Copyright 2013-2019 IBM Corp. + */ + +#define pr_fmt(fmt) "opalmsg: " fmt +#include <skiboot.h> +#include <opal-msg.h> +#include <opal-api.h> +#include <lock.h> + +#define OPAL_MAX_MSGS (OPAL_MSG_TYPE_MAX + OPAL_MAX_ASYNC_COMP - 1) + +struct opal_msg_entry { + struct list_node link; + void (*consumed)(void *data, int status); + bool extended; + void *data; + struct opal_msg msg; +}; + +static LIST_HEAD(msg_free_list); +static LIST_HEAD(msg_pending_list); + +static struct lock opal_msg_lock = LOCK_UNLOCKED; + +int _opal_queue_msg(enum opal_msg_type msg_type, void *data, + void (*consumed)(void *data, int status), + size_t params_size, const void *params) +{ + struct opal_msg_entry *entry; + uint64_t entry_size; + + if ((params_size + OPAL_MSG_HDR_SIZE) > OPAL_MSG_SIZE) { + prlog(PR_DEBUG, "param_size (0x%x) > opal_msg param size (0x%x)\n", + (u32)params_size, (u32)(OPAL_MSG_SIZE - OPAL_MSG_HDR_SIZE)); + return OPAL_PARAMETER; + } + + lock(&opal_msg_lock); + + if (params_size > OPAL_MSG_FIXED_PARAMS_SIZE) { + entry_size = sizeof(struct opal_msg_entry) + params_size; + entry_size -= OPAL_MSG_FIXED_PARAMS_SIZE; + entry = zalloc(entry_size); + if (entry) + entry->extended = true; + } else { + entry = list_pop(&msg_free_list, struct opal_msg_entry, link); + if (!entry) { + prerror("No available node in the free list, allocating\n"); + entry = zalloc(sizeof(struct opal_msg_entry)); + } + } + if (!entry) { + prerror("Allocation failed\n"); + unlock(&opal_msg_lock); + return OPAL_RESOURCE; + } + + entry->consumed = consumed; + entry->data = data; + entry->msg.msg_type = cpu_to_be32(msg_type); + entry->msg.size = cpu_to_be32(params_size); + memcpy(entry->msg.params, params, params_size); + + list_add_tail(&msg_pending_list, &entry->link); + opal_update_pending_evt(OPAL_EVENT_MSG_PENDING, + OPAL_EVENT_MSG_PENDING); + unlock(&opal_msg_lock); + + return OPAL_SUCCESS; +} + +static int64_t opal_get_msg(uint64_t *buffer, uint64_t size) +{ + struct opal_msg_entry *entry; + void (*callback)(void *data, int status); + void *data; + uint64_t msg_size; + int rc = OPAL_SUCCESS; + + if (size < sizeof(struct opal_msg) || !buffer) + return OPAL_PARAMETER; + + if (!opal_addr_valid(buffer)) + return OPAL_PARAMETER; + + lock(&opal_msg_lock); + + entry = list_pop(&msg_pending_list, struct opal_msg_entry, link); + if (!entry) { + unlock(&opal_msg_lock); + return OPAL_RESOURCE; + } + + msg_size = OPAL_MSG_HDR_SIZE + be32_to_cpu(entry->msg.size); + if (size < msg_size) { + /* Send partial data to Linux */ + prlog(PR_NOTICE, "Sending partial data [msg_type : 0x%x, " + "msg_size : 0x%x, buf_size : 0x%x]\n", + be32_to_cpu(entry->msg.msg_type), + (u32)msg_size, (u32)size); + + entry->msg.size = cpu_to_be32(size - OPAL_MSG_HDR_SIZE); + msg_size = size; + rc = OPAL_PARTIAL; + } + + memcpy((void *)buffer, (void *)&entry->msg, msg_size); + callback = entry->consumed; + data = entry->data; + + if (entry->extended) + free(entry); + else + list_add(&msg_free_list, &entry->link); + + if (list_empty(&msg_pending_list)) + opal_update_pending_evt(OPAL_EVENT_MSG_PENDING, 0); + + unlock(&opal_msg_lock); + + if (callback) + callback(data, rc); + + return rc; +} +opal_call(OPAL_GET_MSG, opal_get_msg, 2); + +static int64_t opal_check_completion(uint64_t *buffer, uint64_t size, + uint64_t token) +{ + struct opal_msg_entry *entry, *next_entry; + void (*callback)(void *data, int status) = NULL; + int rc = OPAL_BUSY; + void *data = NULL; + + if (!opal_addr_valid(buffer)) + return OPAL_PARAMETER; + + lock(&opal_msg_lock); + list_for_each_safe(&msg_pending_list, entry, next_entry, link) { + if (be32_to_cpu(entry->msg.msg_type) == OPAL_MSG_ASYNC_COMP && + be64_to_cpu(entry->msg.params[0]) == token) { + list_del(&entry->link); + callback = entry->consumed; + data = entry->data; + list_add(&msg_free_list, &entry->link); + if (list_empty(&msg_pending_list)) + opal_update_pending_evt(OPAL_EVENT_MSG_PENDING, + 0); + rc = OPAL_SUCCESS; + break; + } + } + + if (rc == OPAL_SUCCESS && size >= sizeof(struct opal_msg)) + memcpy(buffer, &entry->msg, sizeof(entry->msg)); + + unlock(&opal_msg_lock); + + if (callback) + callback(data, OPAL_SUCCESS); + + return rc; + +} +opal_call(OPAL_CHECK_ASYNC_COMPLETION, opal_check_completion, 3); + +void opal_init_msg(void) +{ + struct opal_msg_entry *entry; + int i; + + for (i = 0; i < OPAL_MAX_MSGS; i++, entry++) { + entry = zalloc(sizeof(*entry)); + if (!entry) + goto err; + list_add_tail(&msg_free_list, &entry->link); + } + return; + +err: + for (; i > 0; i--) { + entry = list_pop(&msg_free_list, struct opal_msg_entry, link); + if (entry) + free(entry); + } +} + diff --git a/roms/skiboot/core/opal.c b/roms/skiboot/core/opal.c new file mode 100644 index 000000000..2898a45ce --- /dev/null +++ b/roms/skiboot/core/opal.c @@ -0,0 +1,700 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Base support for OPAL calls + * + * Copyright 2013-2019 IBM Corp. + */ + +#include <skiboot.h> +#include <opal.h> +#include <stack.h> +#include <lock.h> +#include <fsp.h> +#include <cpu.h> +#include <interrupts.h> +#include <op-panel.h> +#include <device.h> +#include <console.h> +#include <trace.h> +#include <timebase.h> +#include <affinity.h> +#include <opal-msg.h> +#include <timer.h> +#include <elf-abi.h> +#include <errorlog.h> +#include <occ.h> + +/* Pending events to signal via opal_poll_events */ +uint64_t opal_pending_events; + +/* OPAL dispatch table defined in head.S */ +extern const uint64_t opal_branch_table[]; + +/* Number of args expected for each call. */ +static const u8 opal_num_args[OPAL_LAST+1]; + +/* OPAL anchor node */ +struct dt_node *opal_node; + +/* mask of dynamic vs fixed events; opal_allocate_dynamic_event will + * only allocate from this range */ +static const uint64_t opal_dynamic_events_mask = 0xffffffff00000000ul; +static uint64_t opal_dynamic_events; + +extern uint32_t attn_trigger; +extern uint32_t hir_trigger; + + +void opal_table_init(void) +{ + struct opal_table_entry *s = __opal_table_start; + struct opal_table_entry *e = __opal_table_end; + + prlog(PR_DEBUG, "OPAL table: %p .. %p, branch table: %p\n", + s, e, opal_branch_table); + while(s < e) { + ((uint64_t *)opal_branch_table)[s->token] = function_entry_address(s->func); + ((u8 *)opal_num_args)[s->token] = s->nargs; + s++; + } +} + +/* Called from head.S, thus no prototype */ +long opal_bad_token(uint64_t token); + +long opal_bad_token(uint64_t token) +{ + /** + * @fwts-label OPALBadToken + * @fwts-advice OPAL was called with a bad token. On POWER8 and + * earlier, Linux kernels had a bug where they wouldn't check + * if firmware supported particular OPAL calls before making them. + * It is, in fact, harmless for these cases. On systems newer than + * POWER8, this should never happen and indicates a kernel bug + * where OPAL_CHECK_TOKEN isn't being called where it should be. + */ + prlog(PR_ERR, "OPAL: Called with bad token %lld !\n", token); + + return OPAL_PARAMETER; +} + +#ifdef OPAL_TRACE_ENTRY +static void opal_trace_entry(struct stack_frame *eframe __unused) +{ + union trace t; + unsigned nargs, i; + + if (eframe->gpr[0] > OPAL_LAST) + nargs = 0; + else + nargs = opal_num_args[eframe->gpr[0]]; + + t.opal.token = cpu_to_be64(eframe->gpr[0]); + t.opal.lr = cpu_to_be64(eframe->lr); + t.opal.sp = cpu_to_be64(eframe->gpr[1]); + for(i=0; i<nargs; i++) + t.opal.r3_to_11[i] = cpu_to_be64(eframe->gpr[3+i]); + + trace_add(&t, TRACE_OPAL, offsetof(struct trace_opal, r3_to_11[nargs])); +} +#endif + +/* + * opal_quiesce_state is used as a lock. Don't use an actual lock to avoid + * lock busting. + */ +static uint32_t opal_quiesce_state; /* 0 or QUIESCE_HOLD/QUIESCE_REJECT */ +static int32_t opal_quiesce_owner; /* PIR */ +static int32_t opal_quiesce_target; /* -1 or PIR */ + +static int64_t opal_check_token(uint64_t token); + +/* Called from head.S, thus no prototype */ +int64_t opal_entry_check(struct stack_frame *eframe); + +int64_t opal_entry_check(struct stack_frame *eframe) +{ + struct cpu_thread *cpu = this_cpu(); + uint64_t token = eframe->gpr[0]; + + if (cpu->pir != mfspr(SPR_PIR)) { + printf("CPU MISMATCH ! PIR=%04lx cpu @%p -> pir=%04x token=%llu\n", + mfspr(SPR_PIR), cpu, cpu->pir, token); + abort(); + } + +#ifdef OPAL_TRACE_ENTRY + opal_trace_entry(eframe); +#endif + + if (!opal_check_token(token)) + return opal_bad_token(token); + + if (!opal_quiesce_state && cpu->in_opal_call > 1) { + disable_fast_reboot("Kernel re-entered OPAL"); + switch (token) { + case OPAL_CONSOLE_READ: + case OPAL_CONSOLE_WRITE: + case OPAL_CONSOLE_WRITE_BUFFER_SPACE: + case OPAL_CONSOLE_FLUSH: + case OPAL_POLL_EVENTS: + case OPAL_CHECK_TOKEN: + case OPAL_CEC_REBOOT: + case OPAL_CEC_REBOOT2: + case OPAL_SIGNAL_SYSTEM_RESET: + break; + default: + printf("CPU ATTEMPT TO RE-ENTER FIRMWARE! PIR=%04lx cpu @%p -> pir=%04x token=%llu\n", + mfspr(SPR_PIR), cpu, cpu->pir, token); + if (cpu->in_opal_call > 2) { + printf("Emergency stack is destroyed, can't continue.\n"); + abort(); + } + return OPAL_INTERNAL_ERROR; + } + } + + cpu->entered_opal_call_at = mftb(); + return OPAL_SUCCESS; +} + +int64_t opal_exit_check(int64_t retval, struct stack_frame *eframe); + +int64_t opal_exit_check(int64_t retval, struct stack_frame *eframe) +{ + struct cpu_thread *cpu = this_cpu(); + uint64_t token = eframe->gpr[0]; + uint64_t now = mftb(); + uint64_t call_time = tb_to_msecs(now - cpu->entered_opal_call_at); + + if (!cpu->in_opal_call) { + disable_fast_reboot("Un-accounted firmware entry"); + printf("CPU UN-ACCOUNTED FIRMWARE ENTRY! PIR=%04lx cpu @%p -> pir=%04x token=%llu retval=%lld\n", + mfspr(SPR_PIR), cpu, cpu->pir, token, retval); + cpu->in_opal_call++; /* avoid exit path underflowing */ + } else { + if (cpu->in_opal_call > 2) { + printf("Emergency stack is destroyed, can't continue.\n"); + abort(); + } + if (!list_empty(&cpu->locks_held)) { + prlog(PR_ERR, "OPAL exiting with locks held, pir=%04x token=%llu retval=%lld\n", + cpu->pir, token, retval); + drop_my_locks(true); + } + } + + if (call_time > 100 && token != OPAL_RESYNC_TIMEBASE) { + prlog((call_time < 1000) ? PR_DEBUG : PR_WARNING, + "Spent %llu msecs in OPAL call %llu!\n", + call_time, token); + } + + cpu->current_token = 0; + + return retval; +} + +int64_t opal_quiesce(uint32_t quiesce_type, int32_t cpu_target) +{ + struct cpu_thread *cpu = this_cpu(); + struct cpu_thread *target = NULL; + struct cpu_thread *c; + uint64_t end; + bool stuck = false; + + if (cpu_target >= 0) { + target = find_cpu_by_server(cpu_target); + if (!target) + return OPAL_PARAMETER; + } else if (cpu_target != -1) { + return OPAL_PARAMETER; + } + + if (quiesce_type == QUIESCE_HOLD || quiesce_type == QUIESCE_REJECT) { + if (cmpxchg32(&opal_quiesce_state, 0, quiesce_type) != 0) { + if (opal_quiesce_owner != cpu->pir) { + /* + * Nested is allowed for now just for + * internal uses, so an error is returned + * for OS callers, but no error message + * printed if we are nested. + */ + printf("opal_quiesce already quiescing\n"); + } + return OPAL_BUSY; + } + opal_quiesce_owner = cpu->pir; + opal_quiesce_target = cpu_target; + } + + if (opal_quiesce_owner != cpu->pir) { + printf("opal_quiesce CPU does not own quiesce state (must call QUIESCE_HOLD or QUIESCE_REJECT)\n"); + return OPAL_BUSY; + } + + /* Okay now we own the quiesce state */ + + if (quiesce_type == QUIESCE_RESUME || + quiesce_type == QUIESCE_RESUME_FAST_REBOOT) { + bust_locks = false; + sync(); /* release barrier vs opal entry */ + if (target) { + target->quiesce_opal_call = 0; + } else { + for_each_cpu(c) { + if (quiesce_type == QUIESCE_RESUME_FAST_REBOOT) + c->in_opal_call = 0; + + if (c == cpu) { + assert(!c->quiesce_opal_call); + continue; + } + c->quiesce_opal_call = 0; + } + } + sync(); + opal_quiesce_state = 0; + return OPAL_SUCCESS; + } + + if (quiesce_type == QUIESCE_LOCK_BREAK) { + if (opal_quiesce_target != -1) { + printf("opal_quiesce has not quiesced all CPUs (must target -1)\n"); + return OPAL_BUSY; + } + bust_locks = true; + return OPAL_SUCCESS; + } + + if (target) { + target->quiesce_opal_call = quiesce_type; + } else { + for_each_cpu(c) { + if (c == cpu) + continue; + c->quiesce_opal_call = quiesce_type; + } + } + + sync(); /* Order stores to quiesce_opal_call vs loads of in_opal_call */ + + end = mftb() + msecs_to_tb(1000); + + smt_lowest(); + if (target) { + while (target->in_opal_call) { + if (tb_compare(mftb(), end) == TB_AAFTERB) { + printf("OPAL quiesce CPU:%04x stuck in OPAL\n", target->pir); + stuck = true; + break; + } + barrier(); + } + } else { + for_each_cpu(c) { + if (c == cpu) + continue; + while (c->in_opal_call) { + if (tb_compare(mftb(), end) == TB_AAFTERB) { + printf("OPAL quiesce CPU:%04x stuck in OPAL\n", c->pir); + stuck = true; + break; + } + barrier(); + } + } + } + smt_medium(); + sync(); /* acquire barrier vs opal entry */ + + if (stuck) { + printf("OPAL quiesce could not kick all CPUs out of OPAL\n"); + return OPAL_PARTIAL; + } + + return OPAL_SUCCESS; +} +opal_call(OPAL_QUIESCE, opal_quiesce, 2); + +void __opal_register(uint64_t token, void *func, unsigned int nargs) +{ + assert(token <= OPAL_LAST); + + ((uint64_t *)opal_branch_table)[token] = function_entry_address(func); + ((u8 *)opal_num_args)[token] = nargs; +} + +/* + * add_opal_firmware_exports_node: adds properties to the device-tree which + * the OS will then change into sysfs nodes. + * The properties must be placed under /ibm,opal/firmware/exports. + * The new sysfs nodes are created under /opal/exports. + * To be correctly exported the properties must contain: + * name + * base memory location (u64) + * size (u64) + */ +static void add_opal_firmware_exports_node(struct dt_node *node) +{ + struct dt_node *exports = dt_new(node, "exports"); + uint64_t sym_start = (uint64_t)__sym_map_start; + uint64_t sym_size = (uint64_t)__sym_map_end - sym_start; + + /* + * These property names will be used by Linux as the user-visible file + * name, so make them meaningful if possible. We use _ as the separator + * here to remain consistent with existing file names in /sys/opal. + */ + dt_add_property_u64s(exports, "symbol_map", sym_start, sym_size); + dt_add_property_u64s(exports, "hdat_map", SPIRA_HEAP_BASE, + SPIRA_HEAP_SIZE); +#ifdef SKIBOOT_GCOV + dt_add_property_u64s(exports, "gcov", SKIBOOT_BASE, + HEAP_BASE - SKIBOOT_BASE); +#endif +} + +static void add_opal_firmware_node(void) +{ + struct dt_node *firmware = dt_new(opal_node, "firmware"); + uint64_t sym_start = (uint64_t)__sym_map_start; + uint64_t sym_size = (uint64_t)__sym_map_end - sym_start; + + dt_add_property_string(firmware, "compatible", "ibm,opal-firmware"); + dt_add_property_string(firmware, "name", "firmware"); + dt_add_property_string(firmware, "version", version); + /* + * As previous OS versions use symbol-map located at + * /ibm,opal/firmware we will keep a copy of symbol-map here + * for backwards compatibility + */ + dt_add_property_u64s(firmware, "symbol-map", sym_start, sym_size); + + add_opal_firmware_exports_node(firmware); +} + +void add_opal_node(void) +{ + uint64_t base, entry, size; + extern uint32_t opal_entry; + extern uint32_t boot_entry; + struct dt_node *opal_event; + + /* XXX TODO: Reorg this. We should create the base OPAL + * node early on, and have the various sub modules populate + * their own entries (console etc...) + * + * The logic of which console backend to use should be + * extracted + */ + + entry = (uint64_t)&opal_entry; + base = SKIBOOT_BASE; + size = (CPU_STACKS_BASE + + (uint64_t)(cpu_max_pir + 1) * STACK_SIZE) - SKIBOOT_BASE; + + opal_node = dt_new_check(dt_root, "ibm,opal"); + dt_add_property_cells(opal_node, "#address-cells", 0); + dt_add_property_cells(opal_node, "#size-cells", 0); + + if (proc_gen < proc_gen_p9) + dt_add_property_strings(opal_node, "compatible", "ibm,opal-v2", + "ibm,opal-v3"); + else + dt_add_property_strings(opal_node, "compatible", "ibm,opal-v3"); + + dt_add_property_cells(opal_node, "opal-msg-async-num", OPAL_MAX_ASYNC_COMP); + dt_add_property_cells(opal_node, "opal-msg-size", OPAL_MSG_SIZE); + dt_add_property_u64(opal_node, "opal-base-address", base); + dt_add_property_u64(opal_node, "opal-entry-address", entry); + dt_add_property_u64(opal_node, "opal-boot-address", (uint64_t)&boot_entry); + dt_add_property_u64(opal_node, "opal-runtime-size", size); + + /* Add irqchip interrupt controller */ + opal_event = dt_new(opal_node, "event"); + dt_add_property_strings(opal_event, "compatible", "ibm,opal-event"); + dt_add_property_cells(opal_event, "#interrupt-cells", 0x1); + dt_add_property(opal_event, "interrupt-controller", NULL, 0); + + add_opal_firmware_node(); + add_associativity_ref_point(); + memcons_add_properties(); +} + +static struct lock evt_lock = LOCK_UNLOCKED; + +void opal_update_pending_evt(uint64_t evt_mask, uint64_t evt_values) +{ + uint64_t new_evts; + + lock(&evt_lock); + new_evts = (opal_pending_events & ~evt_mask) | evt_values; + if (opal_pending_events != new_evts) { + uint64_t tok; + +#ifdef OPAL_TRACE_EVT_CHG + printf("OPAL: Evt change: 0x%016llx -> 0x%016llx\n", + opal_pending_events, new_evts); +#endif + /* + * If an event gets *set* while we are in a different call chain + * than opal_handle_interrupt() or opal_handle_hmi(), then we + * artificially generate an interrupt (OCC interrupt specifically) + * to ensure that Linux properly broadcast the event change internally + */ + if ((new_evts & ~opal_pending_events) != 0) { + tok = this_cpu()->current_token; + if (tok != OPAL_HANDLE_INTERRUPT && tok != OPAL_HANDLE_HMI) + occ_send_dummy_interrupt(); + } + opal_pending_events = new_evts; + } + unlock(&evt_lock); +} + +uint64_t opal_dynamic_event_alloc(void) +{ + uint64_t new_event; + int n; + + lock(&evt_lock); + + /* Create the event mask. This set-bit will be within the event mask + * iff there are free events, or out of the mask if there are no free + * events. If opal_dynamic_events is all ones (ie, all events are + * dynamic, and allocated), then ilog2 will return -1, and we'll have a + * zero mask. + */ + n = ilog2(~opal_dynamic_events); + new_event = 1ull << n; + + /* Ensure we're still within the allocatable dynamic events range */ + if (new_event & opal_dynamic_events_mask) + opal_dynamic_events |= new_event; + else + new_event = 0; + + unlock(&evt_lock); + return new_event; +} + +void opal_dynamic_event_free(uint64_t event) +{ + lock(&evt_lock); + opal_dynamic_events &= ~event; + unlock(&evt_lock); +} + +static uint64_t opal_test_func(uint64_t arg) +{ + printf("OPAL: Test function called with arg 0x%llx\n", arg); + + return 0xfeedf00d; +} +opal_call(OPAL_TEST, opal_test_func, 1); + +struct opal_poll_entry { + struct list_node link; + void (*poller)(void *data); + void *data; +}; + +static struct list_head opal_pollers = LIST_HEAD_INIT(opal_pollers); +static struct lock opal_poll_lock = LOCK_UNLOCKED; + +void opal_add_poller(void (*poller)(void *data), void *data) +{ + struct opal_poll_entry *ent; + + ent = zalloc(sizeof(struct opal_poll_entry)); + assert(ent); + ent->poller = poller; + ent->data = data; + lock(&opal_poll_lock); + list_add_tail(&opal_pollers, &ent->link); + unlock(&opal_poll_lock); +} + +void opal_del_poller(void (*poller)(void *data)) +{ + struct opal_poll_entry *ent; + + /* XXX This is currently unused. To solve various "interesting" + * locking issues, the pollers are run locklessly, so if we were + * to free them, we would have to be careful, using something + * akin to RCU to synchronize with other OPAL entries. For now + * if anybody uses it, print a warning and leak the entry, don't + * free it. + */ + /** + * @fwts-label UnsupportedOPALdelpoller + * @fwts-advice Currently removing a poller is DANGEROUS and + * MUST NOT be done in production firmware. + */ + prlog(PR_ALERT, "WARNING: Unsupported opal_del_poller." + " Interesting locking issues, don't call this.\n"); + + lock(&opal_poll_lock); + list_for_each(&opal_pollers, ent, link) { + if (ent->poller == poller) { + list_del(&ent->link); + /* free(ent); */ + break; + } + } + unlock(&opal_poll_lock); +} + +void opal_run_pollers(void) +{ + static int pollers_with_lock_warnings = 0; + static int poller_recursion = 0; + struct opal_poll_entry *poll_ent; + bool was_in_poller; + + /* Don't re-enter on this CPU, unless it was an OPAL re-entry */ + if (this_cpu()->in_opal_call == 1 && this_cpu()->in_poller) { + + /** + * @fwts-label OPALPollerRecursion + * @fwts-advice Recursion detected in opal_run_pollers(). This + * indicates a bug in OPAL where a poller ended up running + * pollers, which doesn't lead anywhere good. + */ + poller_recursion++; + if (poller_recursion <= 16) { + disable_fast_reboot("Poller recursion detected."); + prlog(PR_ERR, "OPAL: Poller recursion detected.\n"); + backtrace(); + + } + + if (poller_recursion == 16) + prlog(PR_ERR, "OPAL: Squashing future poller recursion warnings (>16).\n"); + + return; + } + was_in_poller = this_cpu()->in_poller; + this_cpu()->in_poller = true; + + if (!list_empty(&this_cpu()->locks_held) && pollers_with_lock_warnings < 64) { + /** + * @fwts-label OPALPollerWithLock + * @fwts-advice opal_run_pollers() was called with a lock + * held, which could lead to deadlock if not excessively + * lucky/careful. + */ + prlog(PR_ERR, "Running pollers with lock held !\n"); + dump_locks_list(); + backtrace(); + pollers_with_lock_warnings++; + if (pollers_with_lock_warnings == 64) { + /** + * @fwts-label OPALPollerWithLock64 + * @fwts-advice Your firmware is buggy, see the 64 + * messages complaining about opal_run_pollers with + * lock held. + */ + prlog(PR_ERR, "opal_run_pollers with lock run 64 " + "times, disabling warning.\n"); + } + } + + /* We run the timers first */ + check_timers(false); + + /* The pollers are run lokelessly, see comment in opal_del_poller */ + list_for_each(&opal_pollers, poll_ent, link) + poll_ent->poller(poll_ent->data); + + /* Disable poller flag */ + this_cpu()->in_poller = was_in_poller; + + /* On debug builds, print max stack usage */ + check_stacks(); +} + +static int64_t opal_poll_events(__be64 *outstanding_event_mask) +{ + + if (!opal_addr_valid(outstanding_event_mask)) + return OPAL_PARAMETER; + + /* Check if we need to trigger an attn for test use */ + if (attn_trigger == 0xdeadbeef) { + prlog(PR_EMERG, "Triggering attn\n"); + assert(false); + } + + opal_run_pollers(); + + if (outstanding_event_mask) + *outstanding_event_mask = cpu_to_be64(opal_pending_events); + + return OPAL_SUCCESS; +} +opal_call(OPAL_POLL_EVENTS, opal_poll_events, 1); + +static int64_t opal_check_token(uint64_t token) +{ + if (token > OPAL_LAST) + return OPAL_TOKEN_ABSENT; + + if (opal_branch_table[token]) + return OPAL_TOKEN_PRESENT; + + return OPAL_TOKEN_ABSENT; +} +opal_call(OPAL_CHECK_TOKEN, opal_check_token, 1); + +struct opal_sync_entry { + struct list_node link; + bool (*notify)(void *data); + void *data; +}; + +static struct list_head opal_syncers = LIST_HEAD_INIT(opal_syncers); + +void opal_add_host_sync_notifier(bool (*notify)(void *data), void *data) +{ + struct opal_sync_entry *ent; + + ent = zalloc(sizeof(struct opal_sync_entry)); + assert(ent); + ent->notify = notify; + ent->data = data; + list_add_tail(&opal_syncers, &ent->link); +} + +/* + * Remove a host sync notifier for given callback and data + */ +void opal_del_host_sync_notifier(bool (*notify)(void *data), void *data) +{ + struct opal_sync_entry *ent; + + list_for_each(&opal_syncers, ent, link) { + if (ent->notify == notify && ent->data == data) { + list_del(&ent->link); + free(ent); + return; + } + } +} + +/* + * OPAL call to handle host kexec'ing scenario + */ +static int64_t opal_sync_host_reboot(void) +{ + struct opal_sync_entry *ent, *nxt; + int ret = OPAL_SUCCESS; + + list_for_each_safe(&opal_syncers, ent, nxt, link) + if (! ent->notify(ent->data)) + ret = OPAL_BUSY_EVENT; + + return ret; +} +opal_call(OPAL_SYNC_HOST_REBOOT, opal_sync_host_reboot, 0); diff --git a/roms/skiboot/core/pci-dt-slot.c b/roms/skiboot/core/pci-dt-slot.c new file mode 100644 index 000000000..2441bf940 --- /dev/null +++ b/roms/skiboot/core/pci-dt-slot.c @@ -0,0 +1,212 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * PCI slots in the device tree. + * + * Copyright 2017-2018 IBM Corp. + */ + +#include <stdarg.h> +#include <stdbool.h> +#include <stdint.h> + +#include <skiboot.h> +#include <device.h> + +#include <pci.h> +#include <pci-cfg.h> +#include <pci-slot.h> +#include <ccan/list/list.h> + +#undef pr_fmt +#define pr_fmt(fmt) "DT-SLOT: " fmt + +struct dt_node *dt_slots; + +static struct dt_node *map_phb_to_slot(struct phb *phb) +{ + uint32_t chip_id = dt_get_chip_id(phb->dt_node); + uint32_t phb_idx = dt_prop_get_u32_def(phb->dt_node, + "ibm,phb-index", 0); + struct dt_node *slot_node; + + if (!dt_slots) + dt_slots = dt_find_by_path(dt_root, "/ibm,pcie-slots"); + + if (!dt_slots) + return NULL; + + dt_for_each_child(dt_slots, slot_node) { + u32 reg[2]; + + if (!dt_node_is_compatible(slot_node, "ibm,pcie-root-port")) + continue; + + reg[0] = dt_prop_get_cell(slot_node, "reg", 0); + reg[1] = dt_prop_get_cell(slot_node, "reg", 1); + + if (reg[0] == chip_id && reg[1] == phb_idx) + return slot_node; + } + + return NULL; +} + +static struct dt_node *find_devfn(struct dt_node *bus, uint32_t bdfn) +{ + uint32_t port_dev_id = PCI_DEV(bdfn); + struct dt_node *child; + + dt_for_each_child(bus, child) + if (dt_prop_get_u32_def(child, "reg", ~0u) == port_dev_id) + return child; + + return NULL; +} + +/* Looks for a device device under this slot. */ +static struct dt_node *find_dev_under_slot(struct dt_node *slot, + struct pci_device *pd) +{ + struct dt_node *child, *wildcard = NULL; + + /* find the device in the parent bus node */ + dt_for_each_child(slot, child) { + u32 vdid; + + /* "pluggable" and "builtin" without unit addrs are wildcards */ + if (!dt_has_node_property(child, "reg", NULL)) { + if (wildcard) + prerror("Duplicate wildcard entry! Already have %s, found %s", + wildcard->name, child->name); + + wildcard = child; + continue; + } + + /* NB: the pci_device vdid is did,vid rather than vid,did */ + vdid = dt_prop_get_cell(child, "reg", 1) << 16 | + dt_prop_get_cell(child, "reg", 0); + + if (vdid == pd->vdid) + return child; + } + + if (!wildcard) + PCIDBG(pd->phb, pd->bdfn, + "Unable to find a slot for device %.4x:%.4x\n", + (pd->vdid & 0xffff0000) >> 16, pd->vdid & 0xffff); + + return wildcard; +} + +/* + * If the `pd` is a bridge this returns a node with a compatible of + * ibm,pcie-port to indicate it's a "slot node". + */ +static struct dt_node *find_node_for_dev(struct phb *phb, + struct pci_device *pd) +{ + struct dt_node *sw_slot, *sw_up; + + assert(pd); + + if (pd->slot && pd->slot->data) + return pd->slot->data; + + /* + * Example DT: + * /root-complex@8,5/switch-up@10b5,8725/down-port@4 + */ + switch (pd->dev_type) { + case PCIE_TYPE_ROOT_PORT: // find the root-complex@<chip>,<phb> node + return map_phb_to_slot(phb); + + case PCIE_TYPE_SWITCH_DNPORT: // grab the down-port@<devfn> + /* + * Walk up the topology to find the slot that contains + * the switch upstream port is connected to. In the example + * this would be the root-complex@8,5 node. + */ + sw_slot = find_node_for_dev(phb, pd->parent->parent); + if (!sw_slot) + return NULL; + + /* find the per-device node for this switch */ + sw_up = find_dev_under_slot(sw_slot, pd->parent); + if (!sw_up) + return NULL; + + /* find this down port */ + return find_devfn(sw_up, pd->bdfn); + + default: + PCIDBG(phb, pd->bdfn, + "Trying to find a slot for non-pcie bridge type %d\n", + pd->dev_type); + assert(0); + } + + return NULL; +} + +struct dt_node *map_pci_dev_to_slot(struct phb *phb, struct pci_device *pd) +{ + struct dt_node *n; + char *path; + + assert(pd); + + /* + * Having a slot only makes sense for root and switch downstream ports. + * We don't care about PCI-X. + */ + if (pd->dev_type != PCIE_TYPE_SWITCH_DNPORT && + pd->dev_type != PCIE_TYPE_ROOT_PORT) + return NULL; + + PCIDBG(phb, pd->bdfn, "Finding slot\n"); + + n = find_node_for_dev(phb, pd); + if (!n) { + PCIDBG(phb, pd->bdfn, "No slot found!\n"); + } else { + path = dt_get_path(n); + PCIDBG(phb, pd->bdfn, "Slot found %s\n", path); + free(path); + } + + return n; +} + +int __print_slot(struct phb *phb, struct pci_device *pd, void *userdata); +int __print_slot(struct phb *phb, struct pci_device *pd, + void __unused *userdata) +{ + struct dt_node *node; + struct dt_node *pnode; + char *c = NULL; + u32 phandle = 0; + + if (!pd) + return 0; + + node = map_pci_dev_to_slot(phb, pd); + + /* at this point all node associations should be done */ + if (pd->dn && dt_has_node_property(pd->dn, "ibm,pcie-slot", NULL)) { + phandle = dt_prop_get_u32(pd->dn, "ibm,pcie-slot"); + pnode = dt_find_by_phandle(dt_root, phandle); + + assert(node == pnode); + } + + if (node) + c = dt_get_path(node); + + PCIDBG(phb, pd->bdfn, "Mapped to slot %s (%x)\n", + c ? c : "<null>", phandle); + + free(c); + + return 0; +} diff --git a/roms/skiboot/core/pci-opal.c b/roms/skiboot/core/pci-opal.c new file mode 100644 index 000000000..aa375c6aa --- /dev/null +++ b/roms/skiboot/core/pci-opal.c @@ -0,0 +1,1135 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * PCIe OPAL Calls + * + * Copyright 2013-2019 IBM Corp. + */ + +#include <skiboot.h> +#include <opal-api.h> +#include <pci.h> +#include <pci-cfg.h> +#include <pci-slot.h> +#include <opal-msg.h> +#include <timebase.h> +#include <timer.h> + +#define OPAL_PCICFG_ACCESS_READ(op, cb, type) \ +static int64_t opal_pci_config_##op(uint64_t phb_id, \ + uint64_t bus_dev_func, \ + uint64_t offset, type data) \ +{ \ + struct phb *phb = pci_get_phb(phb_id); \ + int64_t rc; \ + \ + if (!opal_addr_valid((void *)data)) \ + return OPAL_PARAMETER; \ + \ + if (!phb) \ + return OPAL_PARAMETER; \ + phb_lock(phb); \ + rc = phb->ops->cfg_##cb(phb, bus_dev_func, offset, data); \ + phb_unlock(phb); \ + \ + return rc; \ +} + +#define OPAL_PCICFG_ACCESS_WRITE(op, cb, type) \ +static int64_t opal_pci_config_##op(uint64_t phb_id, \ + uint64_t bus_dev_func, \ + uint64_t offset, type data) \ +{ \ + struct phb *phb = pci_get_phb(phb_id); \ + int64_t rc; \ + \ + if (!phb) \ + return OPAL_PARAMETER; \ + phb_lock(phb); \ + rc = phb->ops->cfg_##cb(phb, bus_dev_func, offset, data); \ + phb_unlock(phb); \ + \ + return rc; \ +} + +OPAL_PCICFG_ACCESS_READ(read_byte, read8, uint8_t *) +OPAL_PCICFG_ACCESS_READ(read_half_word, read16, uint16_t *) +OPAL_PCICFG_ACCESS_READ(read_word, read32, uint32_t *) +OPAL_PCICFG_ACCESS_WRITE(write_byte, write8, uint8_t) +OPAL_PCICFG_ACCESS_WRITE(write_half_word, write16, uint16_t) +OPAL_PCICFG_ACCESS_WRITE(write_word, write32, uint32_t) + +static int64_t opal_pci_config_read_half_word_be(uint64_t phb_id, + uint64_t bus_dev_func, + uint64_t offset, + __be16 *__data) +{ + uint16_t data; + int64_t rc; + + rc = opal_pci_config_read_half_word(phb_id, bus_dev_func, offset, &data); + *__data = cpu_to_be16(data); + + return rc; +} + +static int64_t opal_pci_config_read_word_be(uint64_t phb_id, + uint64_t bus_dev_func, + uint64_t offset, + __be32 *__data) +{ + uint32_t data; + int64_t rc; + + rc = opal_pci_config_read_word(phb_id, bus_dev_func, offset, &data); + *__data = cpu_to_be32(data); + + return rc; +} + + +opal_call(OPAL_PCI_CONFIG_READ_BYTE, opal_pci_config_read_byte, 4); +opal_call(OPAL_PCI_CONFIG_READ_HALF_WORD, opal_pci_config_read_half_word_be, 4); +opal_call(OPAL_PCI_CONFIG_READ_WORD, opal_pci_config_read_word_be, 4); +opal_call(OPAL_PCI_CONFIG_WRITE_BYTE, opal_pci_config_write_byte, 4); +opal_call(OPAL_PCI_CONFIG_WRITE_HALF_WORD, opal_pci_config_write_half_word, 4); +opal_call(OPAL_PCI_CONFIG_WRITE_WORD, opal_pci_config_write_word, 4); + +static struct lock opal_eeh_evt_lock = LOCK_UNLOCKED; +static uint64_t opal_eeh_evt = 0; + +void opal_pci_eeh_set_evt(uint64_t phb_id) +{ + lock(&opal_eeh_evt_lock); + opal_eeh_evt |= 1ULL << phb_id; + opal_update_pending_evt(OPAL_EVENT_PCI_ERROR, OPAL_EVENT_PCI_ERROR); + unlock(&opal_eeh_evt_lock); +} + +void opal_pci_eeh_clear_evt(uint64_t phb_id) +{ + lock(&opal_eeh_evt_lock); + opal_eeh_evt &= ~(1ULL << phb_id); + if (!opal_eeh_evt) + opal_update_pending_evt(OPAL_EVENT_PCI_ERROR, 0); + unlock(&opal_eeh_evt_lock); +} + +static int64_t opal_pci_eeh_freeze_status(uint64_t phb_id, uint64_t pe_number, + uint8_t *freeze_state, + __be16 *__pci_error_type, + __be64 *__phb_status) +{ + struct phb *phb = pci_get_phb(phb_id); + uint16_t pci_error_type; + int64_t rc; + + if (!opal_addr_valid(freeze_state) || !opal_addr_valid(__pci_error_type) + || !opal_addr_valid(__phb_status)) + return OPAL_PARAMETER; + + if (!phb) + return OPAL_PARAMETER; + if (!phb->ops->eeh_freeze_status) + return OPAL_UNSUPPORTED; + phb_lock(phb); + + if (__phb_status) + prlog(PR_ERR, "PHB#%04llx: %s: deprecated PHB status\n", + phb_id, __func__); + + rc = phb->ops->eeh_freeze_status(phb, pe_number, freeze_state, + &pci_error_type, NULL); + *__pci_error_type = cpu_to_be16(pci_error_type); + phb_unlock(phb); + + return rc; +} +opal_call(OPAL_PCI_EEH_FREEZE_STATUS, opal_pci_eeh_freeze_status, 5); + +static int64_t opal_pci_eeh_freeze_clear(uint64_t phb_id, uint64_t pe_number, + uint64_t eeh_action_token) +{ + struct phb *phb = pci_get_phb(phb_id); + int64_t rc; + + if (!phb) + return OPAL_PARAMETER; + if (!phb->ops->eeh_freeze_clear) + return OPAL_UNSUPPORTED; + phb_lock(phb); + rc = phb->ops->eeh_freeze_clear(phb, pe_number, eeh_action_token); + phb_unlock(phb); + + return rc; +} +opal_call(OPAL_PCI_EEH_FREEZE_CLEAR, opal_pci_eeh_freeze_clear, 3); + +static int64_t opal_pci_eeh_freeze_set(uint64_t phb_id, uint64_t pe_number, + uint64_t eeh_action_token) +{ + struct phb *phb = pci_get_phb(phb_id); + int64_t rc; + + if (!phb) + return OPAL_PARAMETER; + if (!phb->ops->eeh_freeze_set) + return OPAL_UNSUPPORTED; + phb_lock(phb); + rc = phb->ops->eeh_freeze_set(phb, pe_number, eeh_action_token); + phb_unlock(phb); + + return rc; +} +opal_call(OPAL_PCI_EEH_FREEZE_SET, opal_pci_eeh_freeze_set, 3); + +static int64_t opal_pci_err_inject(uint64_t phb_id, uint64_t pe_number, + uint32_t type, uint32_t func, + uint64_t addr, uint64_t mask) +{ + struct phb *phb = pci_get_phb(phb_id); + int64_t rc; + + if (!phb) + return OPAL_PARAMETER; + if (!phb->ops || !phb->ops->err_inject) + return OPAL_UNSUPPORTED; + + if (type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR && + type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64) + return OPAL_PARAMETER; + + phb_lock(phb); + rc = phb->ops->err_inject(phb, pe_number, type, func, addr, mask); + phb_unlock(phb); + + return rc; +} +opal_call(OPAL_PCI_ERR_INJECT, opal_pci_err_inject, 6); + +static int64_t opal_pci_phb_mmio_enable(uint64_t phb_id, uint16_t window_type, + uint16_t window_num, uint16_t enable) +{ + struct phb *phb = pci_get_phb(phb_id); + int64_t rc; + + if (!phb) + return OPAL_PARAMETER; + if (!phb->ops->phb_mmio_enable) + return OPAL_UNSUPPORTED; + phb_lock(phb); + rc = phb->ops->phb_mmio_enable(phb, window_type, window_num, enable); + phb_unlock(phb); + + return rc; +} +opal_call(OPAL_PCI_PHB_MMIO_ENABLE, opal_pci_phb_mmio_enable, 4); + +static int64_t opal_pci_set_phb_mem_window(uint64_t phb_id, + uint16_t window_type, + uint16_t window_num, + uint64_t addr, + uint64_t pci_addr, + uint64_t size) +{ + struct phb *phb = pci_get_phb(phb_id); + int64_t rc; + + if (!phb) + return OPAL_PARAMETER; + if (!phb->ops->set_phb_mem_window) + return OPAL_UNSUPPORTED; + phb_lock(phb); + rc = phb->ops->set_phb_mem_window(phb, window_type, window_num, + addr, pci_addr, size); + phb_unlock(phb); + + return rc; +} +opal_call(OPAL_PCI_SET_PHB_MEM_WINDOW, opal_pci_set_phb_mem_window, 6); + +static int64_t opal_pci_map_pe_mmio_window(uint64_t phb_id, uint64_t pe_number, + uint16_t window_type, + uint16_t window_num, + uint16_t segment_num) +{ + struct phb *phb = pci_get_phb(phb_id); + int64_t rc; + + if (!phb) + return OPAL_PARAMETER; + if (!phb->ops->map_pe_mmio_window) + return OPAL_UNSUPPORTED; + phb_lock(phb); + rc = phb->ops->map_pe_mmio_window(phb, pe_number, window_type, + window_num, segment_num); + phb_unlock(phb); + + return rc; +} +opal_call(OPAL_PCI_MAP_PE_MMIO_WINDOW, opal_pci_map_pe_mmio_window, 5); + +static int64_t opal_pci_set_pe(uint64_t phb_id, uint64_t pe_number, + uint64_t bus_dev_func, uint8_t bus_compare, + uint8_t dev_compare, uint8_t func_compare, + uint8_t pe_action) +{ + struct phb *phb = pci_get_phb(phb_id); + int64_t rc; + + if (!phb) + return OPAL_PARAMETER; + if (!phb->ops->set_pe) + return OPAL_UNSUPPORTED; + phb_lock(phb); + rc = phb->ops->set_pe(phb, pe_number, bus_dev_func, bus_compare, + dev_compare, func_compare, pe_action); + phb_unlock(phb); + + return rc; +} +opal_call(OPAL_PCI_SET_PE, opal_pci_set_pe, 7); + +static int64_t opal_pci_set_peltv(uint64_t phb_id, uint32_t parent_pe, + uint32_t child_pe, uint8_t state) +{ + struct phb *phb = pci_get_phb(phb_id); + int64_t rc; + + if (!phb) + return OPAL_PARAMETER; + if (!phb->ops->set_peltv) + return OPAL_UNSUPPORTED; + phb_lock(phb); + rc = phb->ops->set_peltv(phb, parent_pe, child_pe, state); + phb_unlock(phb); + + return rc; +} +opal_call(OPAL_PCI_SET_PELTV, opal_pci_set_peltv, 4); + +static int64_t opal_pci_set_mve(uint64_t phb_id, uint32_t mve_number, + uint64_t pe_number) +{ + struct phb *phb = pci_get_phb(phb_id); + int64_t rc; + + if (!phb) + return OPAL_PARAMETER; + if (!phb->ops->set_mve) + return OPAL_UNSUPPORTED; + phb_lock(phb); + rc = phb->ops->set_mve(phb, mve_number, pe_number); + phb_unlock(phb); + + return rc; +} +opal_call(OPAL_PCI_SET_MVE, opal_pci_set_mve, 3); + +static int64_t opal_pci_set_mve_enable(uint64_t phb_id, uint32_t mve_number, + uint32_t state) +{ + struct phb *phb = pci_get_phb(phb_id); + int64_t rc; + + if (!phb) + return OPAL_PARAMETER; + if (!phb->ops->set_mve_enable) + return OPAL_UNSUPPORTED; + phb_lock(phb); + rc = phb->ops->set_mve_enable(phb, mve_number, state); + phb_unlock(phb); + + return rc; +} +opal_call(OPAL_PCI_SET_MVE_ENABLE, opal_pci_set_mve_enable, 3); + +static int64_t opal_pci_msi_eoi(uint64_t phb_id, + uint32_t hwirq) +{ + struct phb *phb = pci_get_phb(phb_id); + int64_t rc; + + if (!phb) + return OPAL_PARAMETER; + if (!phb->ops->pci_msi_eoi) + return OPAL_UNSUPPORTED; + phb_lock(phb); + rc = phb->ops->pci_msi_eoi(phb, hwirq); + phb_unlock(phb); + + return rc; +} +opal_call(OPAL_PCI_MSI_EOI, opal_pci_msi_eoi, 2); + +static int64_t opal_pci_tce_kill(uint64_t phb_id, + uint32_t kill_type, + uint64_t pe_number, uint32_t tce_size, + uint64_t dma_addr, uint32_t npages) +{ + struct phb *phb = pci_get_phb(phb_id); + int64_t rc; + + if (!phb) + return OPAL_PARAMETER; + if (!phb->ops->tce_kill) + return OPAL_UNSUPPORTED; + phb_lock(phb); + rc = phb->ops->tce_kill(phb, kill_type, pe_number, tce_size, + dma_addr, npages); + phb_unlock(phb); + + return rc; +} +opal_call(OPAL_PCI_TCE_KILL, opal_pci_tce_kill, 6); + +static int64_t opal_pci_set_xive_pe(uint64_t phb_id, uint64_t pe_number, + uint32_t xive_num) +{ + struct phb *phb = pci_get_phb(phb_id); + int64_t rc; + + if (!phb) + return OPAL_PARAMETER; + if (!phb->ops->set_xive_pe) + return OPAL_UNSUPPORTED; + phb_lock(phb); + rc = phb->ops->set_xive_pe(phb, pe_number, xive_num); + phb_unlock(phb); + + return rc; +} +opal_call(OPAL_PCI_SET_XIVE_PE, opal_pci_set_xive_pe, 3); + +static int64_t opal_get_msi_32(uint64_t phb_id, uint32_t mve_number, + uint32_t xive_num, uint8_t msi_range, + __be32 *__msi_address, __be32 *__message_data) +{ + struct phb *phb = pci_get_phb(phb_id); + uint32_t msi_address; + uint32_t message_data; + int64_t rc; + + if (!opal_addr_valid(__msi_address) || !opal_addr_valid(__message_data)) + return OPAL_PARAMETER; + + if (!phb) + return OPAL_PARAMETER; + if (!phb->ops->get_msi_32) + return OPAL_UNSUPPORTED; + phb_lock(phb); + rc = phb->ops->get_msi_32(phb, mve_number, xive_num, msi_range, + &msi_address, &message_data); + phb_unlock(phb); + + *__msi_address = cpu_to_be32(msi_address); + *__message_data = cpu_to_be32(message_data); + + return rc; +} +opal_call(OPAL_GET_MSI_32, opal_get_msi_32, 6); + +static int64_t opal_get_msi_64(uint64_t phb_id, uint32_t mve_number, + uint32_t xive_num, uint8_t msi_range, + __be64 *__msi_address, __be32 *__message_data) +{ + struct phb *phb = pci_get_phb(phb_id); + uint64_t msi_address; + uint32_t message_data; + int64_t rc; + + if (!opal_addr_valid(__msi_address) || !opal_addr_valid(__message_data)) + return OPAL_PARAMETER; + + if (!phb) + return OPAL_PARAMETER; + if (!phb->ops->get_msi_64) + return OPAL_UNSUPPORTED; + phb_lock(phb); + rc = phb->ops->get_msi_64(phb, mve_number, xive_num, msi_range, + &msi_address, &message_data); + phb_unlock(phb); + + *__msi_address = cpu_to_be64(msi_address); + *__message_data = cpu_to_be32(message_data); + + return rc; +} +opal_call(OPAL_GET_MSI_64, opal_get_msi_64, 6); + +static int64_t opal_pci_map_pe_dma_window(uint64_t phb_id, uint64_t pe_number, + uint16_t window_id, + uint16_t tce_levels, + uint64_t tce_table_addr, + uint64_t tce_table_size, + uint64_t tce_page_size) +{ + struct phb *phb = pci_get_phb(phb_id); + int64_t rc; + + if (!phb) + return OPAL_PARAMETER; + if (!phb->ops->map_pe_dma_window) + return OPAL_UNSUPPORTED; + phb_lock(phb); + rc = phb->ops->map_pe_dma_window(phb, pe_number, window_id, + tce_levels, tce_table_addr, + tce_table_size, tce_page_size); + phb_unlock(phb); + + return rc; +} +opal_call(OPAL_PCI_MAP_PE_DMA_WINDOW, opal_pci_map_pe_dma_window, 7); + +static int64_t opal_pci_map_pe_dma_window_real(uint64_t phb_id, + uint64_t pe_number, + uint16_t window_id, + uint64_t pci_start_addr, + uint64_t pci_mem_size) +{ + struct phb *phb = pci_get_phb(phb_id); + int64_t rc; + + if (!phb) + return OPAL_PARAMETER; + if (!phb->ops->map_pe_dma_window_real) + return OPAL_UNSUPPORTED; + phb_lock(phb); + rc = phb->ops->map_pe_dma_window_real(phb, pe_number, window_id, + pci_start_addr, pci_mem_size); + phb_unlock(phb); + + return rc; +} +opal_call(OPAL_PCI_MAP_PE_DMA_WINDOW_REAL, opal_pci_map_pe_dma_window_real, 5); + +static int64_t opal_phb_set_option(uint64_t phb_id, uint64_t opt, + uint64_t setting) +{ + struct phb *phb = pci_get_phb(phb_id); + int64_t rc; + + if (!phb) + return OPAL_PARAMETER; + + if (!phb->ops->set_option) + return OPAL_UNSUPPORTED; + + phb_lock(phb); + rc = phb->ops->set_option(phb, opt, setting); + phb_unlock(phb); + + return rc; +} +opal_call(OPAL_PHB_SET_OPTION, opal_phb_set_option, 3); + +static int64_t opal_phb_get_option(uint64_t phb_id, uint64_t opt, + __be64 *setting) +{ + struct phb *phb = pci_get_phb(phb_id); + int64_t rc; + + if (!phb || !setting) + return OPAL_PARAMETER; + + if (!phb->ops->get_option) + return OPAL_UNSUPPORTED; + + phb_lock(phb); + rc = phb->ops->get_option(phb, opt, setting); + phb_unlock(phb); + + return rc; +} +opal_call(OPAL_PHB_GET_OPTION, opal_phb_get_option, 3); + +static int64_t opal_pci_reset(uint64_t id, uint8_t reset_scope, + uint8_t assert_state) +{ + struct pci_slot *slot = pci_slot_find(id); + struct phb *phb = slot ? slot->phb : NULL; + int64_t rc = OPAL_SUCCESS; + + if (!slot || !phb) + return OPAL_PARAMETER; + if (assert_state != OPAL_ASSERT_RESET && + assert_state != OPAL_DEASSERT_RESET) + return OPAL_PARAMETER; + + phb_lock(phb); + + switch(reset_scope) { + case OPAL_RESET_PHB_COMPLETE: + /* Complete reset is applicable to PHB slot only */ + if (!slot->ops.creset || slot->pd) { + rc = OPAL_UNSUPPORTED; + break; + } + + if (assert_state != OPAL_ASSERT_RESET) + break; + + rc = slot->ops.creset(slot); + if (rc < 0) + prlog(PR_ERR, "SLOT-%016llx: Error %lld on complete reset\n", + slot->id, rc); + break; + case OPAL_RESET_PCI_FUNDAMENTAL: + if (!slot->ops.freset) { + rc = OPAL_UNSUPPORTED; + break; + } + + /* We need do nothing on deassert time */ + if (assert_state != OPAL_ASSERT_RESET) + break; + + rc = slot->ops.freset(slot); + if (rc < 0) + prlog(PR_ERR, "SLOT-%016llx: Error %lld on fundamental reset\n", + slot->id, rc); + break; + case OPAL_RESET_PCI_HOT: + if (!slot->ops.hreset) { + rc = OPAL_UNSUPPORTED; + break; + } + + /* We need do nothing on deassert time */ + if (assert_state != OPAL_ASSERT_RESET) + break; + + rc = slot->ops.hreset(slot); + if (rc < 0) + prlog(PR_ERR, "SLOT-%016llx: Error %lld on hot reset\n", + slot->id, rc); + break; + case OPAL_RESET_PCI_IODA_TABLE: + /* It's allowed on PHB slot only */ + if (slot->pd || !phb->ops || !phb->ops->ioda_reset) { + rc = OPAL_UNSUPPORTED; + break; + } + + if (assert_state != OPAL_ASSERT_RESET) + break; + + rc = phb->ops->ioda_reset(phb, true); + break; + case OPAL_RESET_PHB_ERROR: + /* It's allowed on PHB slot only */ + if (slot->pd || !phb->ops || !phb->ops->papr_errinjct_reset) { + rc = OPAL_UNSUPPORTED; + break; + } + + if (assert_state != OPAL_ASSERT_RESET) + break; + + rc = phb->ops->papr_errinjct_reset(phb); + break; + default: + rc = OPAL_UNSUPPORTED; + } + phb_unlock(phb); + + return (rc > 0) ? tb_to_msecs(rc) : rc; +} +opal_call(OPAL_PCI_RESET, opal_pci_reset, 3); + +static int64_t opal_pci_reinit(uint64_t phb_id, + uint64_t reinit_scope, + uint64_t data) +{ + struct phb *phb = pci_get_phb(phb_id); + int64_t rc; + + if (!phb) + return OPAL_PARAMETER; + if (!phb->ops || !phb->ops->pci_reinit) + return OPAL_UNSUPPORTED; + + phb_lock(phb); + rc = phb->ops->pci_reinit(phb, reinit_scope, data); + phb_unlock(phb); + + return rc; +} +opal_call(OPAL_PCI_REINIT, opal_pci_reinit, 3); + +static int64_t opal_pci_poll(uint64_t id) +{ + struct pci_slot *slot = pci_slot_find(id); + struct phb *phb = slot ? slot->phb : NULL; + int64_t rc; + + if (!slot || !phb) + return OPAL_PARAMETER; + if (!slot->ops.run_sm) + return OPAL_UNSUPPORTED; + + phb_lock(phb); + rc = slot->ops.run_sm(slot); + phb_unlock(phb); + + /* Return milliseconds for caller to sleep: round up */ + if (rc > 0) { + rc = tb_to_msecs(rc); + if (rc == 0) + rc = 1; + } + + return rc; +} +opal_call(OPAL_PCI_POLL, opal_pci_poll, 1); + +static int64_t opal_pci_get_presence_state(uint64_t id, uint64_t data) +{ + struct pci_slot *slot = pci_slot_find(id); + struct phb *phb = slot ? slot->phb : NULL; + uint8_t *presence = (uint8_t *)data; + int64_t rc; + + if (!opal_addr_valid(presence)) + return OPAL_PARAMETER; + + if (!slot || !phb) + return OPAL_PARAMETER; + if (!slot->ops.get_presence_state) + return OPAL_UNSUPPORTED; + + phb_lock(phb); + rc = slot->ops.get_presence_state(slot, presence); + phb_unlock(phb); + + return rc; +} +opal_call(OPAL_PCI_GET_PRESENCE_STATE, opal_pci_get_presence_state, 2); + +static int64_t opal_pci_get_power_state(uint64_t id, uint64_t data) +{ + struct pci_slot *slot = pci_slot_find(id); + struct phb *phb = slot ? slot->phb : NULL; + uint8_t *power_state = (uint8_t *)data; + int64_t rc; + + if (!opal_addr_valid(power_state)) + return OPAL_PARAMETER; + + if (!slot || !phb) + return OPAL_PARAMETER; + if (!slot->ops.get_power_state) + return OPAL_UNSUPPORTED; + + phb_lock(phb); + rc = slot->ops.get_power_state(slot, power_state); + phb_unlock(phb); + + return rc; +} +opal_call(OPAL_PCI_GET_POWER_STATE, opal_pci_get_power_state, 2); + +static u32 get_slot_phandle(struct pci_slot *slot) +{ + struct phb *phb = slot->phb; + struct pci_device *pd = slot->pd; + + if (pd) + return pd->dn->phandle; + else + return phb->dt_node->phandle; +} + +static void rescan_slot_devices(struct pci_slot *slot) +{ + struct phb *phb = slot->phb; + struct pci_device *pd = slot->pd; + + /* + * prepare_link_change() is called (if needed) by the state + * machine during the slot reset or link polling + */ + if (phb->phb_type != phb_type_npu_v2_opencapi) { + pci_scan_bus(phb, pd->secondary_bus, + pd->subordinate_bus, &pd->children, pd, true); + pci_add_device_nodes(phb, &pd->children, pd->dn, + &phb->lstate, 0); + } else { + pci_scan_bus(phb, 0, 0xff, &phb->devices, NULL, true); + pci_add_device_nodes(phb, &phb->devices, + phb->dt_node, &phb->lstate, 0); + phb->ops->phb_final_fixup(phb); + } +} + +static void remove_slot_devices(struct pci_slot *slot) +{ + struct phb *phb = slot->phb; + struct pci_device *pd = slot->pd; + + if (phb->phb_type != phb_type_npu_v2_opencapi) + pci_remove_bus(phb, &pd->children); + else + pci_remove_bus(phb, &phb->devices); +} + +static void link_up_timer(struct timer *t, void *data, + uint64_t now __unused) +{ + struct pci_slot *slot = data; + struct phb *phb = slot->phb; + uint8_t link; + int64_t rc = 0; + + if (!phb_try_lock(phb)) { + schedule_timer(&slot->timer, msecs_to_tb(10)); + return; + } + + rc = slot->ops.run_sm(slot); + if (rc < 0) + goto out; + if (rc > 0) { + schedule_timer(t, rc); + phb_unlock(phb); + return; + } + + if (slot->ops.get_link_state(slot, &link) != OPAL_SUCCESS) + link = 0; + if (!link) { + rc = OPAL_HARDWARE; + goto out; + } + + rescan_slot_devices(slot); +out: + opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL, + cpu_to_be64(slot->async_token), + cpu_to_be64(get_slot_phandle(slot)), + cpu_to_be64(slot->power_state), + rc <= 0 ? cpu_to_be64(rc) : cpu_to_be64(OPAL_BUSY)); + phb_unlock(phb); +} + +static bool training_needed(struct pci_slot *slot) +{ + struct phb *phb = slot->phb; + struct pci_device *pd = slot->pd; + + /* only for opencapi slots for now */ + if (!pd && phb->phb_type == phb_type_npu_v2_opencapi) + return true; + return false; +} + +static void wait_for_link_up_and_rescan(struct pci_slot *slot) +{ + int64_t rc = 1; + + /* + * Links for PHB slots need to be retrained by triggering a + * fundamental reset. Other slots also need to be tested for + * readiness + */ + if (training_needed(slot)) { + pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL); + rc = slot->ops.freset(slot); + if (rc < 0) { + opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL, + cpu_to_be64(slot->async_token), + cpu_to_be64(get_slot_phandle(slot)), + cpu_to_be64(slot->power_state), + cpu_to_be64(rc)) + return; + } + } else { + pci_slot_set_state(slot, PCI_SLOT_STATE_LINK_START_POLL); + rc = msecs_to_tb(20); + } + init_timer(&slot->timer, link_up_timer, slot); + schedule_timer(&slot->timer, rc); +} + +static void set_power_timer(struct timer *t __unused, void *data, + uint64_t now __unused) +{ + struct pci_slot *slot = data; + struct phb *phb = slot->phb; + + if (!phb_try_lock(phb)) { + schedule_timer(&slot->timer, msecs_to_tb(10)); + return; + } + + switch (slot->state) { + case PCI_SLOT_STATE_SPOWER_START: + if (slot->retries-- == 0) { + pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL); + opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL, + cpu_to_be64(slot->async_token), + cpu_to_be64(get_slot_phandle(slot)), + cpu_to_be64(slot->power_state), + cpu_to_be64(OPAL_BUSY)); + } else { + schedule_timer(&slot->timer, msecs_to_tb(10)); + } + + break; + case PCI_SLOT_STATE_SPOWER_DONE: + if (slot->power_state == OPAL_PCI_SLOT_POWER_OFF) { + remove_slot_devices(slot); + pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL); + opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL, + cpu_to_be64(slot->async_token), + cpu_to_be64(get_slot_phandle(slot)), + cpu_to_be64(OPAL_PCI_SLOT_POWER_OFF), + cpu_to_be64(OPAL_SUCCESS)); + break; + } + + /* Power on */ + wait_for_link_up_and_rescan(slot); + break; + default: + prlog(PR_ERR, "PCI SLOT %016llx: Unexpected state 0x%08x\n", + slot->id, slot->state); + } + phb_unlock(phb); +} + +static int64_t opal_pci_set_power_state(uint64_t async_token, + uint64_t id, + uint64_t data) +{ + struct pci_slot *slot = pci_slot_find(id); + struct phb *phb = slot ? slot->phb : NULL; + struct pci_device *pd = slot ? slot->pd : NULL; + uint8_t *state = (uint8_t *)data; + int64_t rc; + + if (!slot || !phb) + return OPAL_PARAMETER; + + if (!opal_addr_valid(state)) + return OPAL_PARAMETER; + + phb_lock(phb); + switch (*state) { + case OPAL_PCI_SLOT_POWER_OFF: + if (!slot->ops.prepare_link_change || + !slot->ops.set_power_state) { + phb_unlock(phb); + return OPAL_UNSUPPORTED; + } + + slot->async_token = async_token; + slot->ops.prepare_link_change(slot, false); + rc = slot->ops.set_power_state(slot, PCI_SLOT_POWER_OFF); + break; + case OPAL_PCI_SLOT_POWER_ON: + if (!slot->ops.set_power_state || + !slot->ops.get_link_state) { + phb_unlock(phb); + return OPAL_UNSUPPORTED; + } + + slot->async_token = async_token; + rc = slot->ops.set_power_state(slot, PCI_SLOT_POWER_ON); + break; + case OPAL_PCI_SLOT_OFFLINE: + if (!pd) { + phb_unlock(phb); + return OPAL_PARAMETER; + } + + pci_remove_bus(phb, &pd->children); + phb_unlock(phb); + return OPAL_SUCCESS; + case OPAL_PCI_SLOT_ONLINE: + if (!pd) { + phb_unlock(phb); + return OPAL_PARAMETER; + } + pci_scan_bus(phb, pd->secondary_bus, pd->subordinate_bus, + &pd->children, pd, true); + pci_add_device_nodes(phb, &pd->children, pd->dn, + &phb->lstate, 0); + phb_unlock(phb); + return OPAL_SUCCESS; + default: + rc = OPAL_PARAMETER; + } + + /* + * OPAL_ASYNC_COMPLETION is returned when delay is needed to change + * the power state in the backend. When it can be finished without + * delay, OPAL_SUCCESS is returned. The PCI topology needs to be + * updated in both cases. + */ + if (rc == OPAL_ASYNC_COMPLETION) { + slot->retries = 500; + init_timer(&slot->timer, set_power_timer, slot); + schedule_timer(&slot->timer, msecs_to_tb(10)); + } else if (rc == OPAL_SUCCESS) { + if (*state == OPAL_PCI_SLOT_POWER_OFF) { + remove_slot_devices(slot); + } else { + wait_for_link_up_and_rescan(slot); + rc = OPAL_ASYNC_COMPLETION; + } + } + + phb_unlock(phb); + return rc; +} +opal_call(OPAL_PCI_SET_POWER_STATE, opal_pci_set_power_state, 3); + +static int64_t opal_pci_get_phb_diag_data2(uint64_t phb_id, + void *diag_buffer, + uint64_t diag_buffer_len) +{ + struct phb *phb = pci_get_phb(phb_id); + int64_t rc; + + if (!opal_addr_valid(diag_buffer)) + return OPAL_PARAMETER; + + if (!phb) + return OPAL_PARAMETER; + if (!phb->ops->get_diag_data2) + return OPAL_UNSUPPORTED; + phb_lock(phb); + rc = phb->ops->get_diag_data2(phb, diag_buffer, diag_buffer_len); + phb_unlock(phb); + + return rc; +} +opal_call(OPAL_PCI_GET_PHB_DIAG_DATA2, opal_pci_get_phb_diag_data2, 3); + +static int64_t opal_pci_next_error(uint64_t phb_id, __be64 *__first_frozen_pe, + __be16 *__pci_error_type, __be16 *__severity) +{ + struct phb *phb = pci_get_phb(phb_id); + uint64_t first_frozen_pe; + uint16_t pci_error_type; + uint16_t severity; + int64_t rc; + + if (!opal_addr_valid(__first_frozen_pe) || + !opal_addr_valid(__pci_error_type) || !opal_addr_valid(__severity)) + return OPAL_PARAMETER; + + if (!phb) + return OPAL_PARAMETER; + if (!phb->ops->next_error) + return OPAL_UNSUPPORTED; + phb_lock(phb); + + opal_pci_eeh_clear_evt(phb_id); + rc = phb->ops->next_error(phb, &first_frozen_pe, &pci_error_type, + &severity); + phb_unlock(phb); + + *__first_frozen_pe = cpu_to_be64(first_frozen_pe); + *__pci_error_type = cpu_to_be16(pci_error_type); + *__severity = cpu_to_be16(severity); + + return rc; +} +opal_call(OPAL_PCI_NEXT_ERROR, opal_pci_next_error, 4); + +static int64_t opal_pci_set_phb_capi_mode(uint64_t phb_id, uint64_t mode, uint64_t pe_number) +{ + struct phb *phb = pci_get_phb(phb_id); + int64_t rc; + + if (!phb) + return OPAL_PARAMETER; + if (!phb->ops->set_capi_mode) + return OPAL_UNSUPPORTED; + + phb_lock(phb); + rc = phb->ops->set_capi_mode(phb, mode, pe_number); + phb_unlock(phb); + return rc; +} +opal_call(OPAL_PCI_SET_PHB_CAPI_MODE, opal_pci_set_phb_capi_mode, 3); + +static int64_t opal_pci_set_p2p(uint64_t phbid_init, uint64_t phbid_target, + uint64_t desc, uint16_t pe_number) +{ + struct phb *phb_init = pci_get_phb(phbid_init); + struct phb *phb_target = pci_get_phb(phbid_target); + + if (!phb_init || !phb_target) + return OPAL_PARAMETER; + /* + * Having the 2 devices under the same PHB may require tuning + * the configuration of intermediate switch(es), more easily + * done from linux. And it shouldn't require a PHB config + * change. + * Return an error for the time being. + */ + if (phb_init == phb_target) + return OPAL_UNSUPPORTED; + if (!phb_init->ops->set_p2p || !phb_target->ops->set_p2p) + return OPAL_UNSUPPORTED; + /* + * Loads would be supported on p9 if the 2 devices are under + * the same PHB, but we ruled it out above. + */ + if (desc & OPAL_PCI_P2P_LOAD) + return OPAL_UNSUPPORTED; + + phb_lock(phb_init); + phb_init->ops->set_p2p(phb_init, OPAL_PCI_P2P_INITIATOR, desc, + pe_number); + phb_unlock(phb_init); + + phb_lock(phb_target); + phb_target->ops->set_p2p(phb_target, OPAL_PCI_P2P_TARGET, desc, + pe_number); + phb_unlock(phb_target); + return OPAL_SUCCESS; +} +opal_call(OPAL_PCI_SET_P2P, opal_pci_set_p2p, 4); + +static int64_t opal_pci_get_pbcq_tunnel_bar(uint64_t phb_id, __be64 *__addr) +{ + struct phb *phb = pci_get_phb(phb_id); + uint64_t addr; + + if (!opal_addr_valid(__addr)) + return OPAL_PARAMETER; + + if (!phb) + return OPAL_PARAMETER; + if (!phb->ops->get_tunnel_bar) + return OPAL_UNSUPPORTED; + + phb_lock(phb); + phb->ops->get_tunnel_bar(phb, &addr); + phb_unlock(phb); + + *__addr = cpu_to_be64(addr); + + return OPAL_SUCCESS; +} +opal_call(OPAL_PCI_GET_PBCQ_TUNNEL_BAR, opal_pci_get_pbcq_tunnel_bar, 2); + +static int64_t opal_pci_set_pbcq_tunnel_bar(uint64_t phb_id, uint64_t addr) +{ + struct phb *phb = pci_get_phb(phb_id); + int64_t rc; + + if (!phb) + return OPAL_PARAMETER; + if (!phb->ops->set_tunnel_bar) + return OPAL_UNSUPPORTED; + + phb_lock(phb); + rc = phb->ops->set_tunnel_bar(phb, addr); + phb_unlock(phb); + return rc; +} +opal_call(OPAL_PCI_SET_PBCQ_TUNNEL_BAR, opal_pci_set_pbcq_tunnel_bar, 2); diff --git a/roms/skiboot/core/pci-quirk.c b/roms/skiboot/core/pci-quirk.c new file mode 100644 index 000000000..5c8b091ea --- /dev/null +++ b/roms/skiboot/core/pci-quirk.c @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Deal with PCI device quirks + * + * Copyright 2017-2018 IBM Corp. + */ + +#define pr_fmt(fmt) "PCI-QUIRK: " fmt + +#include <skiboot.h> +#include <pci.h> +#include <pci-cfg.h> +#include <pci-quirk.h> +#include <platform.h> +#include <ast.h> + +static int64_t cfg_block_filter(void *dev __unused, + struct pci_cfg_reg_filter *pcrf __unused, + uint32_t offset __unused, uint32_t len, + uint32_t *data, bool write) +{ + if (write) + return OPAL_SUCCESS; + + switch (len) { + case 4: + *data = 0x0; + return OPAL_SUCCESS; + case 2: + *((uint16_t *)data) = 0x0; + return OPAL_SUCCESS; + case 1: + *((uint8_t *)data) = 0x0; + return OPAL_SUCCESS; + } + + return OPAL_PARAMETER; /* should never happen */ +} + +/* blocks config accesses to registers in the range: [start, end] */ +#define BLOCK_CFG_RANGE(pd, start, end) \ + pci_add_cfg_reg_filter(pd, start, end - start + 1, \ + PCI_REG_FLAG_WRITE | PCI_REG_FLAG_READ, \ + cfg_block_filter); + +static void quirk_microsemi_gen4_sw(struct phb *phb, struct pci_device *pd) +{ + uint8_t data; + bool frozen; + int offset; + int start; + + pci_check_clear_freeze(phb); + + /* + * Reading from 0xff should trigger a UR on the affected switches. + * If we don't get a freeze then we don't need the workaround + */ + pci_cfg_read8(phb, pd->bdfn, 0xff, &data); + frozen = pci_check_clear_freeze(phb); + if (!frozen) + return; + + for (start = -1, offset = 0; offset < 4096; offset++) { + pci_cfg_read8(phb, pd->bdfn, offset, &data); + frozen = pci_check_clear_freeze(phb); + + if (start < 0 && frozen) { /* new UR range */ + start = offset; + } else if (start >= 0 && !frozen) { /* end of range */ + BLOCK_CFG_RANGE(pd, start, offset - 1); + PCINOTICE(phb, pd->bdfn, "Applied UR workaround to [%03x..%03x]\n", start, offset - 1); + + start = -1; + } + } + + /* range lasted until the end of config space */ + if (start >= 0) { + BLOCK_CFG_RANGE(pd, start, 0xfff); + PCINOTICE(phb, pd->bdfn, "Applied UR workaround to [%03x..fff]\n", start); + } +} + +static void quirk_astbmc_vga(struct phb *phb __unused, + struct pci_device *pd) +{ + struct dt_node *np = pd->dn; + uint32_t revision, mcr_configuration, mcr_scu_mpll, mcr_scu_strap; + + if (ast_sio_is_enabled()) { + revision = ast_ahb_readl(SCU_REVISION_ID); + mcr_configuration = ast_ahb_readl(MCR_CONFIGURATION); + mcr_scu_mpll = ast_ahb_readl(MCR_SCU_MPLL); + mcr_scu_strap = ast_ahb_readl(MCR_SCU_STRAP); + } else { + /* Previously we would warn, now SIO disabled by design */ + prlog(PR_INFO, "Assumed platform default parameters for %s\n", + __func__); + revision = bmc_platform->hw->scu_revision_id; + mcr_configuration = bmc_platform->hw->mcr_configuration; + mcr_scu_mpll = bmc_platform->hw->mcr_scu_mpll; + mcr_scu_strap = bmc_platform->hw->mcr_scu_strap; + } + + dt_add_property_cells(np, "aspeed,scu-revision-id", revision); + dt_add_property_cells(np, "aspeed,mcr-configuration", mcr_configuration); + dt_add_property_cells(np, "aspeed,mcr-scu-mpll", mcr_scu_mpll); + dt_add_property_cells(np, "aspeed,mcr-scu-strap", mcr_scu_strap); +} + +/* Quirks are: {fixup function, vendor ID, (device ID or PCI_ANY_ID)} */ +static const struct pci_quirk quirk_table[] = { + /* ASPEED 2400 VGA device */ + { 0x1a03, 0x2000, &quirk_astbmc_vga }, + { 0x11f8, 0x4052, &quirk_microsemi_gen4_sw }, + { 0, 0, NULL } +}; + +static void __pci_handle_quirk(struct phb *phb, struct pci_device *pd, + const struct pci_quirk *quirks) +{ + while (quirks->vendor_id) { + if (quirks->vendor_id == PCI_VENDOR_ID(pd->vdid) && + (quirks->device_id == PCI_ANY_ID || + quirks->device_id == PCI_DEVICE_ID(pd->vdid))) + quirks->fixup(phb, pd); + quirks++; + } +} + +void pci_handle_quirk(struct phb *phb, struct pci_device *pd) +{ + __pci_handle_quirk(phb, pd, quirk_table); +} diff --git a/roms/skiboot/core/pci-slot.c b/roms/skiboot/core/pci-slot.c new file mode 100644 index 000000000..71d3d329c --- /dev/null +++ b/roms/skiboot/core/pci-slot.c @@ -0,0 +1,241 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * PCI Slots + * + * Copyright 2013-2019 IBM Corp. + */ + +#include <skiboot.h> +#include <opal-msg.h> +#include <pci-cfg.h> +#include <pci.h> +#include <pci-slot.h> + +/* Debugging options */ +#define PCI_SLOT_PREFIX "PCI-SLOT-%016llx " +#define PCI_SLOT_DBG(s, fmt, a...) \ + prlog(PR_DEBUG, PCI_SLOT_PREFIX fmt, (s)->id, ##a) + +static void pci_slot_prepare_link_change(struct pci_slot *slot, bool up) +{ + struct phb *phb = slot->phb; + struct pci_device *pd = slot->pd; + uint32_t aercap, mask; + + /* + * Mask the link down and receiver error before the link becomes + * down. Otherwise, unmask the errors when the link is up. + */ + if (pci_has_cap(pd, PCIECAP_ID_AER, true)) { + aercap = pci_cap(pd, PCIECAP_ID_AER, true); + + /* Mask link surprise down event. The event is always + * masked when the associated PCI slot supports PCI + * surprise hotplug. We needn't toggle it when the link + * bounces caused by reset and just keep it always masked. + */ + if (!pd->slot || !pd->slot->surprise_pluggable) { + pci_cfg_read32(phb, pd->bdfn, + aercap + PCIECAP_AER_UE_MASK, &mask); + if (up) + mask &= ~PCIECAP_AER_UE_MASK_SURPRISE_DOWN; + else + mask |= PCIECAP_AER_UE_MASK_SURPRISE_DOWN; + pci_cfg_write32(phb, pd->bdfn, + aercap + PCIECAP_AER_UE_MASK, mask); + } + + /* Receiver error */ + pci_cfg_read32(phb, pd->bdfn, aercap + PCIECAP_AER_CE_MASK, + &mask); + if (up) + mask &= ~PCIECAP_AER_CE_RECVR_ERR; + else + mask |= PCIECAP_AER_CE_RECVR_ERR; + pci_cfg_write32(phb, pd->bdfn, aercap + PCIECAP_AER_CE_MASK, + mask); + } + + /* + * We're coming back from reset. We need restore bus ranges + * and reinitialize the affected bridges and devices. + */ + if (up) { + pci_restore_bridge_buses(phb, pd); + if (phb->ops->device_init) + pci_walk_dev(phb, pd, phb->ops->device_init, NULL); + } +} + +static int64_t pci_slot_run_sm(struct pci_slot *slot) +{ + uint64_t now = mftb(); + int64_t ret; + + /* Return remaining timeout if we're still waiting */ + if (slot->delay_tgt_tb && + tb_compare(now, slot->delay_tgt_tb) == TB_ABEFOREB) + return slot->delay_tgt_tb - now; + + slot->delay_tgt_tb = 0; + switch (slot->state & PCI_SLOT_STATE_MASK) { + case PCI_SLOT_STATE_LINK: + ret = slot->ops.poll_link(slot); + break; + case PCI_SLOT_STATE_HRESET: + ret = slot->ops.hreset(slot); + break; + case PCI_SLOT_STATE_FRESET: + ret = slot->ops.freset(slot); + break; + case PCI_SLOT_STATE_CRESET: + ret = slot->ops.creset(slot); + break; + default: + prlog(PR_ERR, PCI_SLOT_PREFIX + "Invalid state %08x\n", slot->id, slot->state); + pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL); + ret = OPAL_HARDWARE; + } + + /* Notify about the pci slot state machine completion */ + if (ret <= 0 && slot->ops.completed_sm_run) + slot->ops.completed_sm_run(slot, ret); + + return ret; +} + +void pci_slot_add_dt_properties(struct pci_slot *slot, + struct dt_node *np) +{ + /* Bail without device node */ + if (!np) + return; + + dt_add_property_cells(np, "ibm,reset-by-firmware", 1); + dt_add_property_cells(np, "ibm,slot-pluggable", slot->pluggable); + dt_add_property_cells(np, "ibm,slot-surprise-pluggable", + slot->surprise_pluggable); + if (pci_slot_has_flags(slot, PCI_SLOT_FLAG_BROKEN_PDC)) + dt_add_property_cells(np, "ibm,slot-broken-pdc", 1); + + dt_add_property_cells(np, "ibm,slot-power-ctl", slot->power_ctl); + dt_add_property_cells(np, "ibm,slot-power-led-ctlled", + slot->power_led_ctl); + dt_add_property_cells(np, "ibm,slot-attn-led", slot->attn_led_ctl); + dt_add_property_cells(np, "ibm,slot-connector-type", + slot->connector_type); + dt_add_property_cells(np, "ibm,slot-card-desc", slot->card_desc); + dt_add_property_cells(np, "ibm,slot-card-mech", slot->card_mech); + dt_add_property_cells(np, "ibm,slot-wired-lanes", slot->wired_lanes); + dt_add_property_cells(np, "ibm,power-limit", slot->power_limit); + + if (slot->ops.add_properties) + slot->ops.add_properties(slot, np); +} + +struct pci_slot *pci_slot_alloc(struct phb *phb, + struct pci_device *pd) +{ + struct pci_slot *slot = NULL; + + /* + * The function can be used to allocate either PHB slot or normal + * one. For both cases, the @phb should be always valid. + */ + if (!phb) + return NULL; + + /* + * When @pd is NULL, we're going to create a PHB slot. Otherwise, + * a normal slot will be created. Check if the specified slot + * already exists or not. + */ + slot = pd ? pd->slot : phb->slot; + if (slot) { + prlog(PR_ERR, PCI_SLOT_PREFIX "Already exists\n", slot->id); + return slot; + } + + /* Allocate memory chunk */ + slot = zalloc(sizeof(struct pci_slot)); + if (!slot) { + prlog(PR_ERR, "%s: Out of memory\n", __func__); + return NULL; + } + + /* + * The polling function sholdn't be overridden by individual + * platforms + */ + slot->phb = phb; + slot->pd = pd; + pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL); + slot->power_state = PCI_SLOT_POWER_ON; + slot->ops.run_sm = pci_slot_run_sm; + slot->ops.prepare_link_change = pci_slot_prepare_link_change; + slot->peer_slot = NULL; + if (!pd) { + slot->id = PCI_PHB_SLOT_ID(phb); + phb->slot = slot; + } else { + slot->id = PCI_SLOT_ID(phb, pd->bdfn); + pd->slot = slot; + } + + return slot; +} + +struct pci_slot *pci_slot_find(uint64_t id) +{ + struct phb *phb; + struct pci_device *pd; + struct pci_slot *slot; + uint64_t index; + uint16_t bdfn; + + index = PCI_SLOT_PHB_INDEX(id); + phb = pci_get_phb(index); + + /* PHB slot */ + if (!(id & PCI_SLOT_ID_PREFIX)) { + slot = phb ? phb->slot : NULL; + return slot; + } + + /* Normal PCI slot */ + bdfn = PCI_SLOT_BDFN(id); + pd = phb ? pci_find_dev(phb, bdfn) : NULL; + slot = pd ? pd->slot : NULL; + return slot; +} + +void pci_slot_add_loc(struct pci_slot *slot, + struct dt_node *np, const char *label) +{ + char tmp[8], loc_code[LOC_CODE_SIZE]; + struct pci_device *pd = slot->pd; + struct phb *phb = slot->phb; + + if (!np) + return; + + /* didn't get a real slot label? generate one! */ + if (!label) { + snprintf(tmp, sizeof(tmp), "S%04x%02x", phb->opal_id, + pd->secondary_bus); + label = tmp; + } + + /* Make a <PHB_LOC_CODE>-<LABEL> pair if we have a PHB loc code */ + if (phb->base_loc_code) { + snprintf(loc_code, sizeof(loc_code), "%s-%s", + phb->base_loc_code, label); + } else { + strncpy(loc_code, label, sizeof(loc_code) - 1); + loc_code[LOC_CODE_SIZE - 1] = '\0'; + } + + dt_add_property_string(np, "ibm,slot-label", label); + dt_add_property_string(np, "ibm,slot-location-code", loc_code); +} diff --git a/roms/skiboot/core/pci-virt.c b/roms/skiboot/core/pci-virt.c new file mode 100644 index 000000000..e0cb9949c --- /dev/null +++ b/roms/skiboot/core/pci-virt.c @@ -0,0 +1,256 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Support virtual PCI devices + * + * Copyright 2013-2016 IBM Corp. + */ + +#include <skiboot.h> +#include <pci.h> +#include <pci-virt.h> + +void pci_virt_cfg_read_raw(struct pci_virt_device *pvd, + uint32_t space, uint32_t offset, + uint32_t size, uint32_t *data) +{ + uint32_t i; + + if (space >= PCI_VIRT_CFG_MAX || !pvd->config[space]) + return; + + for (*data = 0, i = 0; i < size; i++) + *data |= ((uint32_t)(pvd->config[space][offset + i]) << (i * 8)); +} + +void pci_virt_cfg_write_raw(struct pci_virt_device *pvd, + uint32_t space, uint32_t offset, + uint32_t size, uint32_t data) +{ + int i; + + if (space >= PCI_VIRT_CFG_MAX || !pvd->config[space]) + return; + + for (i = 0; i < size; i++) { + pvd->config[space][offset + i] = data; + data = (data >> 8); + } +} + +static struct pci_cfg_reg_filter *pci_virt_find_filter( + struct pci_virt_device *pvd, + uint32_t start, uint32_t len) +{ + struct pci_cfg_reg_filter *pcrf; + + if (!pvd || !len || start >= pvd->cfg_size) + return NULL; + + /* Return filter if there is overlapped region. We don't + * require strict matching for more flexibility. It also + * means the associated handler should validate the register + * offset and length. + */ + list_for_each(&pvd->pcrf, pcrf, link) { + if (start < (pcrf->start + pcrf->len) && + (start + len) > pcrf->start) + return pcrf; + } + + return NULL; +} + +struct pci_cfg_reg_filter *pci_virt_add_filter(struct pci_virt_device *pvd, + uint32_t start, + uint32_t len, + uint32_t flags, + pci_cfg_reg_func func, + void *data) +{ + struct pci_cfg_reg_filter *pcrf; + + if (!pvd || !len || (start + len) >= pvd->cfg_size) + return NULL; + if (!(flags & PCI_REG_FLAG_MASK)) + return NULL; + + pcrf = pci_virt_find_filter(pvd, start, len); + if (pcrf) { + prlog(PR_ERR, "%s: Filter [%x, %x] overlapped with [%x, %x]\n", + __func__, start, len, pcrf->start, pcrf->len); + return NULL; + } + + pcrf = zalloc(sizeof(*pcrf)); + if (!pcrf) { + prlog(PR_ERR, "%s: Out of memory!\n", __func__); + return NULL; + } + + pcrf->start = start; + pcrf->len = len; + pcrf->flags = flags; + pcrf->func = func; + pcrf->data = data; + list_add_tail(&pvd->pcrf, &pcrf->link); + + return pcrf; +} + +struct pci_virt_device *pci_virt_find_device(struct phb *phb, + uint32_t bdfn) +{ + struct pci_virt_device *pvd; + + list_for_each(&phb->virt_devices, pvd, node) { + if (pvd->bdfn == bdfn) + return pvd; + } + + return NULL; +} + +static inline bool pci_virt_cfg_valid(struct pci_virt_device *pvd, + uint32_t offset, uint32_t size) +{ + if ((offset + size) > pvd->cfg_size) + return false; + + if (!size || (size > 4)) + return false; + + if ((size & (size - 1)) || (offset & (size - 1))) + return false; + + return true; +} + +int64_t pci_virt_cfg_read(struct phb *phb, uint32_t bdfn, + uint32_t offset, uint32_t size, + uint32_t *data) +{ + struct pci_virt_device *pvd; + struct pci_cfg_reg_filter *pcrf; + int64_t ret = OPAL_SUCCESS; + + *data = 0xffffffff; + + /* Search for PCI virtual device */ + pvd = pci_virt_find_device(phb, bdfn); + if (!pvd) + return OPAL_PARAMETER; + + /* Check if config address is valid or not */ + if (!pci_virt_cfg_valid(pvd, offset, size)) + return OPAL_PARAMETER; + + /* The value is fetched from the normal config space when the + * trap handler returns OPAL_PARTIAL. Otherwise, the trap handler + * should provide the return value. + */ + pcrf = pci_virt_find_filter(pvd, offset, size); + if (!pcrf || !pcrf->func || !(pcrf->flags & PCI_REG_FLAG_READ)) + goto out; + + ret = pcrf->func(pvd, pcrf, offset, size, data, false); + if (ret != OPAL_PARTIAL) + return ret; +out: + pci_virt_cfg_read_raw(pvd, PCI_VIRT_CFG_NORMAL, offset, size, data); + return OPAL_SUCCESS; +} + +int64_t pci_virt_cfg_write(struct phb *phb, uint32_t bdfn, + uint32_t offset, uint32_t size, + uint32_t data) +{ + struct pci_virt_device *pvd; + struct pci_cfg_reg_filter *pcrf; + uint32_t val, v, r, c, i; + int64_t ret = OPAL_SUCCESS; + + /* Search for PCI virtual device */ + pvd = pci_virt_find_device(phb, bdfn); + if (!pvd) + return OPAL_PARAMETER; + + /* Check if config address is valid or not */ + if (!pci_virt_cfg_valid(pvd, offset, size)) + return OPAL_PARAMETER; + + /* The value is written to the config space if the trap handler + * returns OPAL_PARTIAL. Otherwise, the value to be written is + * dropped. + */ + pcrf = pci_virt_find_filter(pvd, offset, size); + if (!pcrf || !pcrf->func || !(pcrf->flags & PCI_REG_FLAG_WRITE)) + goto out; + + ret = pcrf->func(pvd, pcrf, offset, size, &data, true); + if (ret != OPAL_PARTIAL) + return ret; +out: + val = data; + for (i = 0; i < size; i++) { + PCI_VIRT_CFG_NORMAL_RD(pvd, offset + i, 1, &v); + PCI_VIRT_CFG_RDONLY_RD(pvd, offset + i, 1, &r); + PCI_VIRT_CFG_W1CLR_RD(pvd, offset + i, 1, &c); + + /* Drop read-only bits */ + val &= ~(r << (i * 8)); + val |= (r & v) << (i * 8); + + /* Drop W1C bits */ + val &= ~(val & ((c & v) << (i * 8))); + } + + PCI_VIRT_CFG_NORMAL_WR(pvd, offset, size, val); + return OPAL_SUCCESS; +} + +struct pci_virt_device *pci_virt_add_device(struct phb *phb, uint32_t bdfn, + uint32_t cfg_size, void *data) +{ + struct pci_virt_device *pvd; + uint8_t *cfg; + uint32_t i; + + /* The standard config header size is 64 bytes */ + if (!phb || (bdfn & 0xffff0000) || (cfg_size < 64)) + return NULL; + + /* Check if the bdfn is available */ + pvd = pci_virt_find_device(phb, bdfn); + if (pvd) { + prlog(PR_ERR, "%s: bdfn 0x%x was reserved\n", + __func__, bdfn); + return NULL; + } + + /* Populate the PCI virtual device */ + pvd = zalloc(sizeof(*pvd)); + if (!pvd) { + prlog(PR_ERR, "%s: Cannot alloate PCI virtual device (0x%x)\n", + __func__, bdfn); + return NULL; + } + + cfg = zalloc(cfg_size * PCI_VIRT_CFG_MAX); + if (!cfg) { + prlog(PR_ERR, "%s: Cannot allocate config space (0x%x)\n", + __func__, bdfn); + free(pvd); + return NULL; + } + + for (i = 0; i < PCI_VIRT_CFG_MAX; i++, cfg += cfg_size) + pvd->config[i] = cfg; + + pvd->bdfn = bdfn; + pvd->cfg_size = cfg_size; + pvd->data = data; + list_head_init(&pvd->pcrf); + list_add_tail(&phb->virt_devices, &pvd->node); + + return pvd; +} diff --git a/roms/skiboot/core/pci.c b/roms/skiboot/core/pci.c new file mode 100644 index 000000000..e195ecbf4 --- /dev/null +++ b/roms/skiboot/core/pci.c @@ -0,0 +1,1962 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Base PCI support + * + * Copyright 2013-2019 IBM Corp. + */ + +#include <skiboot.h> +#include <cpu.h> +#include <pci.h> +#include <pci-cfg.h> +#include <pci-slot.h> +#include <pci-quirk.h> +#include <timebase.h> +#include <device.h> + +#define MAX_PHB_ID 256 +static struct phb *phbs[MAX_PHB_ID]; +int last_phb_id = 0; + +/* + * Generic PCI utilities + */ + +static int64_t __pci_find_cap(struct phb *phb, uint16_t bdfn, + uint8_t want, bool check_cap_indicator) +{ + int64_t rc; + uint16_t stat, cap; + uint8_t pos, next; + + rc = pci_cfg_read16(phb, bdfn, PCI_CFG_STAT, &stat); + if (rc) + return rc; + if (check_cap_indicator && !(stat & PCI_CFG_STAT_CAP)) + return OPAL_UNSUPPORTED; + rc = pci_cfg_read8(phb, bdfn, PCI_CFG_CAP, &pos); + if (rc) + return rc; + pos &= 0xfc; + while(pos) { + rc = pci_cfg_read16(phb, bdfn, pos, &cap); + if (rc) + return rc; + if ((cap & 0xff) == want) + return pos; + next = (cap >> 8) & 0xfc; + if (next == pos) { + PCIERR(phb, bdfn, "pci_find_cap hit a loop !\n"); + break; + } + pos = next; + } + return OPAL_UNSUPPORTED; +} + +/* pci_find_cap - Find a PCI capability in a device config space + * + * This will return a config space offset (positive) or a negative + * error (OPAL error codes). + * + * OPAL_UNSUPPORTED is returned if the capability doesn't exist + */ +int64_t pci_find_cap(struct phb *phb, uint16_t bdfn, uint8_t want) +{ + return __pci_find_cap(phb, bdfn, want, true); +} + +/* pci_find_ecap - Find a PCIe extended capability in a device + * config space + * + * This will return a config space offset (positive) or a negative + * error (OPAL error code). Additionally, if the "version" argument + * is non-NULL, the capability version will be returned there. + * + * OPAL_UNSUPPORTED is returned if the capability doesn't exist + */ +int64_t pci_find_ecap(struct phb *phb, uint16_t bdfn, uint16_t want, + uint8_t *version) +{ + int64_t rc; + uint32_t cap; + uint16_t off, prev = 0; + + for (off = 0x100; off && off < 0x1000; off = (cap >> 20) & 0xffc ) { + if (off == prev) { + PCIERR(phb, bdfn, "pci_find_ecap hit a loop !\n"); + break; + } + prev = off; + rc = pci_cfg_read32(phb, bdfn, off, &cap); + if (rc) + return rc; + + /* no ecaps supported */ + if (cap == 0 || (cap & 0xffff) == 0xffff) + return OPAL_UNSUPPORTED; + + if ((cap & 0xffff) == want) { + if (version) + *version = (cap >> 16) & 0xf; + return off; + } + } + return OPAL_UNSUPPORTED; +} + +static void pci_init_pcie_cap(struct phb *phb, struct pci_device *pd) +{ + int64_t ecap = 0; + uint16_t reg; + uint32_t val; + + /* On the upstream port of PLX bridge 8724 (rev ba), PCI_STATUS + * register doesn't have capability indicator though it support + * various PCI capabilities. So we need ignore that bit when + * looking for PCI capabilities on the upstream port, which is + * limited to one that seats directly under root port. + */ + if (pd->vdid == 0x872410b5 && pd->parent && !pd->parent->parent) { + uint8_t rev; + + pci_cfg_read8(phb, pd->bdfn, PCI_CFG_REV_ID, &rev); + if (rev == 0xba) + ecap = __pci_find_cap(phb, pd->bdfn, + PCI_CFG_CAP_ID_EXP, false); + else + ecap = pci_find_cap(phb, pd->bdfn, PCI_CFG_CAP_ID_EXP); + } else { + ecap = pci_find_cap(phb, pd->bdfn, PCI_CFG_CAP_ID_EXP); + } + + if (ecap <= 0) { + pd->dev_type = PCIE_TYPE_LEGACY; + return; + } + + pci_set_cap(pd, PCI_CFG_CAP_ID_EXP, ecap, NULL, NULL, false); + + /* + * XXX We observe a problem on some PLX switches where one + * of the downstream ports appears as an upstream port, we + * fix that up here otherwise, other code will misbehave + */ + pci_cfg_read16(phb, pd->bdfn, ecap + PCICAP_EXP_CAPABILITY_REG, ®); + pd->dev_type = GETFIELD(PCICAP_EXP_CAP_TYPE, reg); + if (pd->parent && pd->parent->dev_type == PCIE_TYPE_SWITCH_UPPORT && + pd->vdid == 0x874810b5 && pd->dev_type == PCIE_TYPE_SWITCH_UPPORT) { + PCIDBG(phb, pd->bdfn, "Fixing up bad PLX downstream port !\n"); + pd->dev_type = PCIE_TYPE_SWITCH_DNPORT; + } + + /* XXX Handle ARI */ + if (pd->dev_type == PCIE_TYPE_SWITCH_DNPORT || + pd->dev_type == PCIE_TYPE_ROOT_PORT) + pd->scan_map = 0x1; + + /* Read MPS capability, whose maximal size is 4096 */ + pci_cfg_read32(phb, pd->bdfn, ecap + PCICAP_EXP_DEVCAP, &val); + pd->mps = (128 << GETFIELD(PCICAP_EXP_DEVCAP_MPSS, val)); + if (pd->mps > 4096) + pd->mps = 4096; +} + +static void pci_init_aer_cap(struct phb *phb, struct pci_device *pd) +{ + int64_t pos; + + if (!pci_has_cap(pd, PCI_CFG_CAP_ID_EXP, false)) + return; + + pos = pci_find_ecap(phb, pd->bdfn, PCIECAP_ID_AER, NULL); + if (pos > 0) + pci_set_cap(pd, PCIECAP_ID_AER, pos, NULL, NULL, true); +} + +static void pci_init_pm_cap(struct phb *phb, struct pci_device *pd) +{ + int64_t pos; + + pos = pci_find_cap(phb, pd->bdfn, PCI_CFG_CAP_ID_PM); + if (pos > 0) + pci_set_cap(pd, PCI_CFG_CAP_ID_PM, pos, NULL, NULL, false); +} + +void pci_init_capabilities(struct phb *phb, struct pci_device *pd) +{ + pci_init_pcie_cap(phb, pd); + pci_init_aer_cap(phb, pd); + pci_init_pm_cap(phb, pd); +} + +bool pci_wait_crs(struct phb *phb, uint16_t bdfn, uint32_t *out_vdid) +{ + uint32_t retries, vdid; + int64_t rc; + bool had_crs = false; + + for (retries = 0; retries < 40; retries++) { + rc = pci_cfg_read32(phb, bdfn, PCI_CFG_VENDOR_ID, &vdid); + if (rc) + return false; + if (vdid == 0xffffffff || vdid == 0x00000000) + return false; + if (vdid != 0xffff0001) + break; + had_crs = true; + time_wait_ms(100); + } + if (vdid == 0xffff0001) { + PCIERR(phb, bdfn, "CRS timeout !\n"); + return false; + } + if (had_crs) + PCIDBG(phb, bdfn, "Probe success after %d CRS\n", retries); + + if (out_vdid) + *out_vdid = vdid; + return true; +} + +static struct pci_device *pci_scan_one(struct phb *phb, struct pci_device *parent, + uint16_t bdfn) +{ + struct pci_device *pd = NULL; + uint32_t vdid; + int64_t rc; + uint8_t htype; + + if (!pci_wait_crs(phb, bdfn, &vdid)) + return NULL; + + /* Perform a dummy write to the device in order for it to + * capture it's own bus number, so any subsequent error + * messages will be properly tagged + */ + pci_cfg_write32(phb, bdfn, PCI_CFG_VENDOR_ID, vdid); + + pd = zalloc(sizeof(struct pci_device)); + if (!pd) { + PCIERR(phb, bdfn,"Failed to allocate structure pci_device !\n"); + goto fail; + } + pd->phb = phb; + pd->bdfn = bdfn; + pd->vdid = vdid; + pci_cfg_read32(phb, bdfn, PCI_CFG_SUBSYS_VENDOR_ID, &pd->sub_vdid); + pci_cfg_read32(phb, bdfn, PCI_CFG_REV_ID, &pd->class); + pd->class >>= 8; + + pd->parent = parent; + list_head_init(&pd->pcrf); + list_head_init(&pd->children); + rc = pci_cfg_read8(phb, bdfn, PCI_CFG_HDR_TYPE, &htype); + if (rc) { + PCIERR(phb, bdfn, "Failed to read header type !\n"); + goto fail; + } + pd->is_multifunction = !!(htype & 0x80); + pd->is_bridge = (htype & 0x7f) != 0; + pd->is_vf = false; + pd->scan_map = 0xffffffff; /* Default */ + pd->primary_bus = PCI_BUS_NUM(bdfn); + + pci_init_capabilities(phb, pd); + + /* If it's a bridge, sanitize the bus numbers to avoid forwarding + * + * This will help when walking down those bridges later on + */ + if (pd->is_bridge) { + pci_cfg_write8(phb, bdfn, PCI_CFG_PRIMARY_BUS, pd->primary_bus); + pci_cfg_write8(phb, bdfn, PCI_CFG_SECONDARY_BUS, 0); + pci_cfg_write8(phb, bdfn, PCI_CFG_SUBORDINATE_BUS, 0); + } + + /* XXX Need to do some basic setups, such as MPSS, MRS, + * RCB, etc... + */ + + PCIDBG(phb, bdfn, "Found VID:%04x DEV:%04x TYP:%d MF%s BR%s EX%s\n", + vdid & 0xffff, vdid >> 16, pd->dev_type, + pd->is_multifunction ? "+" : "-", + pd->is_bridge ? "+" : "-", + pci_has_cap(pd, PCI_CFG_CAP_ID_EXP, false) ? "+" : "-"); + + /* Try to get PCI slot behind the device */ + if (platform.pci_get_slot_info) + platform.pci_get_slot_info(phb, pd); + + /* Put it to the child device of list of PHB or parent */ + if (!parent) + list_add_tail(&phb->devices, &pd->link); + else + list_add_tail(&parent->children, &pd->link); + + /* + * Call PHB hook + */ + if (phb->ops->device_init) + phb->ops->device_init(phb, pd, NULL); + + return pd; + fail: + if (pd) + free(pd); + return NULL; +} + +/* pci_check_clear_freeze - Probing empty slot will result in an EEH + * freeze. Currently we have a single PE mapping + * everything (default state of our backend) so + * we just check and clear the state of PE#0 + * + * returns true if a freeze was detected + * + * NOTE: We currently only handle simple PE freeze, not PHB fencing + * (or rather our backend does) + */ +bool pci_check_clear_freeze(struct phb *phb) +{ + uint8_t freeze_state; + uint16_t pci_error_type, sev; + int64_t pe_number, rc; + + /* Retrieve the reserved PE number */ + pe_number = OPAL_PARAMETER; + if (phb->ops->get_reserved_pe_number) + pe_number = phb->ops->get_reserved_pe_number(phb); + if (pe_number < 0) + return false; + + /* Retrieve the frozen state */ + rc = phb->ops->eeh_freeze_status(phb, pe_number, &freeze_state, + &pci_error_type, &sev); + if (rc) + return true; /* phb fence? */ + + if (freeze_state == OPAL_EEH_STOPPED_NOT_FROZEN) + return false; + /* We can't handle anything worse than an ER here */ + if (sev > OPAL_EEH_SEV_NO_ERROR && + sev < OPAL_EEH_SEV_PE_ER) { + PCIERR(phb, 0, "Fatal probe in %s error !\n", __func__); + return true; + } + + phb->ops->eeh_freeze_clear(phb, pe_number, + OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); + return true; +} + +/* + * Turn off slot's power supply if there are nothing connected for + * 2 purposes: power saving obviously and initialize the slot to + * to initial power-off state for hotplug. + * + * The power should be turned on if the downstream link of the slot + * isn't up. + */ +static void pci_slot_set_power_state(struct phb *phb, + struct pci_device *pd, + uint8_t state) +{ + struct pci_slot *slot; + uint8_t cur_state; + int32_t wait = 100; + int64_t rc; + + if (!pd || !pd->slot) + return; + + slot = pd->slot; + if (!slot->pluggable || + !slot->ops.get_power_state || + !slot->ops.set_power_state) + return; + + if (state == PCI_SLOT_POWER_OFF) { + /* Bail if there're something connected */ + if (!list_empty(&pd->children)) { + PCIERR(phb, pd->bdfn, "Attempted to power off slot with attached devices!\n"); + return; + } + + pci_slot_add_flags(slot, PCI_SLOT_FLAG_BOOTUP); + rc = slot->ops.get_power_state(slot, &cur_state); + if (rc != OPAL_SUCCESS) { + PCINOTICE(phb, pd->bdfn, "Error %lld getting slot power state\n", rc); + cur_state = PCI_SLOT_POWER_OFF; + } + + pci_slot_remove_flags(slot, PCI_SLOT_FLAG_BOOTUP); + if (cur_state == PCI_SLOT_POWER_OFF) + return; + } + + pci_slot_add_flags(slot, + (PCI_SLOT_FLAG_BOOTUP | PCI_SLOT_FLAG_ENFORCE)); + rc = slot->ops.set_power_state(slot, state); + if (rc == OPAL_SUCCESS) + goto success; + if (rc != OPAL_ASYNC_COMPLETION) { + PCINOTICE(phb, pd->bdfn, "Error %lld powering %s slot\n", + rc, state == PCI_SLOT_POWER_ON ? "on" : "off"); + goto error; + } + + /* Wait until the operation is completed */ + do { + if (slot->state == PCI_SLOT_STATE_SPOWER_DONE) + break; + + check_timers(false); + time_wait_ms(10); + } while (--wait >= 0); + + if (wait < 0) { + PCINOTICE(phb, pd->bdfn, "Timeout powering %s slot\n", + state == PCI_SLOT_POWER_ON ? "on" : "off"); + goto error; + } + +success: + PCIDBG(phb, pd->bdfn, "Powering %s hotpluggable slot\n", + state == PCI_SLOT_POWER_ON ? "on" : "off"); +error: + pci_slot_remove_flags(slot, + (PCI_SLOT_FLAG_BOOTUP | PCI_SLOT_FLAG_ENFORCE)); + pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL); +} + +static bool pci_bridge_power_on(struct phb *phb, struct pci_device *pd) +{ + int32_t ecap; + uint16_t pcie_cap, slot_sts, slot_ctl, link_ctl; + uint32_t slot_cap; + int64_t rc; + + /* + * If there is a PCI slot associated with the bridge, to use + * the PCI slot's facality to power it on. + */ + if (pd->slot) { + struct pci_slot *slot = pd->slot; + uint8_t presence; + + /* + * We assume the presence state is OPAL_PCI_SLOT_PRESENT + * by default. In this way, we won't miss anything when + * the operation isn't supported or hitting error upon + * retrieving it. + */ + if (slot->ops.get_presence_state) { + rc = slot->ops.get_presence_state(slot, &presence); + if (rc == OPAL_SUCCESS && + presence == OPAL_PCI_SLOT_EMPTY) + return false; + } + + /* To power it on */ + pci_slot_set_power_state(phb, pd, PCI_SLOT_POWER_ON); + return true; + } + + if (!pci_has_cap(pd, PCI_CFG_CAP_ID_EXP, false)) + return true; + + /* Check if slot is supported */ + ecap = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false); + pci_cfg_read16(phb, pd->bdfn, + ecap + PCICAP_EXP_CAPABILITY_REG, &pcie_cap); + if (!(pcie_cap & PCICAP_EXP_CAP_SLOT)) + return true; + + /* Check presence */ + pci_cfg_read16(phb, pd->bdfn, + ecap + PCICAP_EXP_SLOTSTAT, &slot_sts); + if (!(slot_sts & PCICAP_EXP_SLOTSTAT_PDETECTST)) + return false; + + /* Ensure that power control is supported */ + pci_cfg_read32(phb, pd->bdfn, + ecap + PCICAP_EXP_SLOTCAP, &slot_cap); + if (!(slot_cap & PCICAP_EXP_SLOTCAP_PWCTRL)) + return true; + + + /* Read the slot control register, check if the slot is off */ + pci_cfg_read16(phb, pd->bdfn, ecap + PCICAP_EXP_SLOTCTL, &slot_ctl); + PCITRACE(phb, pd->bdfn, " SLOT_CTL=%04x\n", slot_ctl); + if (slot_ctl & PCICAP_EXP_SLOTCTL_PWRCTLR) { + PCIDBG(phb, pd->bdfn, "Bridge power is off, turning on ...\n"); + slot_ctl &= ~PCICAP_EXP_SLOTCTL_PWRCTLR; + slot_ctl |= SETFIELD(PCICAP_EXP_SLOTCTL_PWRI, 0, PCIE_INDIC_ON); + pci_cfg_write16(phb, pd->bdfn, + ecap + PCICAP_EXP_SLOTCTL, slot_ctl); + + /* Wait a couple of seconds */ + time_wait_ms(2000); + } + + /* Enable link */ + pci_cfg_read16(phb, pd->bdfn, ecap + PCICAP_EXP_LCTL, &link_ctl); + PCITRACE(phb, pd->bdfn, " LINK_CTL=%04x\n", link_ctl); + link_ctl &= ~PCICAP_EXP_LCTL_LINK_DIS; + pci_cfg_write16(phb, pd->bdfn, ecap + PCICAP_EXP_LCTL, link_ctl); + + return true; +} + +static bool pci_bridge_wait_link(struct phb *phb, + struct pci_device *pd, + bool was_reset) +{ + int32_t ecap = 0; + uint32_t link_cap = 0, retries = 100; + uint16_t link_sts; + + if (pci_has_cap(pd, PCI_CFG_CAP_ID_EXP, false)) { + ecap = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false); + pci_cfg_read32(phb, pd->bdfn, ecap + PCICAP_EXP_LCAP, &link_cap); + } + + /* + * If link state reporting isn't supported, wait 1 second + * if the downstream link was ever resetted. + */ + if (!(link_cap & PCICAP_EXP_LCAP_DL_ACT_REP)) { + if (was_reset) + time_wait_ms(1000); + + return true; + } + + /* + * Link state reporting is supported, wait for the link to + * come up until timeout. + */ + PCIDBG(phb, pd->bdfn, "waiting for link... \n"); + while (retries--) { + pci_cfg_read16(phb, pd->bdfn, + ecap + PCICAP_EXP_LSTAT, &link_sts); + if (link_sts & PCICAP_EXP_LSTAT_DLLL_ACT) + break; + + time_wait_ms(100); + } + + if (!(link_sts & PCICAP_EXP_LSTAT_DLLL_ACT)) { + PCIERR(phb, pd->bdfn, "Timeout waiting for downstream link\n"); + return false; + } + + /* Need another 100ms before touching the config space */ + time_wait_ms(100); + PCIDBG(phb, pd->bdfn, "link is up\n"); + + return true; +} + +/* pci_enable_bridge - Called before scanning a bridge + * + * Ensures error flags are clean, disable master abort, and + * check if the subordinate bus isn't reset, the slot is enabled + * on PCIe, etc... + */ +static bool pci_enable_bridge(struct phb *phb, struct pci_device *pd) +{ + uint16_t bctl; + bool was_reset = false; + + /* Disable master aborts, clear errors */ + pci_cfg_read16(phb, pd->bdfn, PCI_CFG_BRCTL, &bctl); + bctl &= ~PCI_CFG_BRCTL_MABORT_REPORT; + pci_cfg_write16(phb, pd->bdfn, PCI_CFG_BRCTL, bctl); + + + /* PCI-E bridge, check the slot state. We don't do that on the + * root complex as this is handled separately and not all our + * RCs implement the standard register set. + */ + if ((pd->dev_type == PCIE_TYPE_ROOT_PORT && pd->primary_bus > 0) || + pd->dev_type == PCIE_TYPE_SWITCH_DNPORT) { + if (pci_has_cap(pd, PCI_CFG_CAP_ID_EXP, false)) { + int32_t ecap; + uint32_t link_cap = 0; + uint16_t link_sts = 0; + + ecap = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false); + pci_cfg_read32(phb, pd->bdfn, + ecap + PCICAP_EXP_LCAP, &link_cap); + + /* + * No need to touch the power supply if the PCIe link has + * been up. Further more, the slot presence bit is lost while + * the PCIe link is up on the specific PCI topology. In that + * case, we need ignore the slot presence bit and go ahead for + * probing. Otherwise, the NVMe adapter won't be probed. + * + * PHB3 root port, PLX switch 8748 (10b5:8748), PLX swich 9733 + * (10b5:9733), PMC 8546 swtich (11f8:8546), NVMe adapter + * (1c58:0023). + */ + ecap = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false); + pci_cfg_read32(phb, pd->bdfn, + ecap + PCICAP_EXP_LCAP, &link_cap); + pci_cfg_read16(phb, pd->bdfn, + ecap + PCICAP_EXP_LSTAT, &link_sts); + if ((link_cap & PCICAP_EXP_LCAP_DL_ACT_REP) && + (link_sts & PCICAP_EXP_LSTAT_DLLL_ACT)) + return true; + } + + /* Power on the downstream slot or link */ + if (!pci_bridge_power_on(phb, pd)) + return false; + } + + /* Clear secondary reset */ + if (bctl & PCI_CFG_BRCTL_SECONDARY_RESET) { + PCIDBG(phb, pd->bdfn, + "Bridge secondary reset is on, clearing it ...\n"); + bctl &= ~PCI_CFG_BRCTL_SECONDARY_RESET; + pci_cfg_write16(phb, pd->bdfn, PCI_CFG_BRCTL, bctl); + time_wait_ms(1000); + was_reset = true; + } + + /* PCI-E bridge, wait for link */ + if (pd->dev_type == PCIE_TYPE_ROOT_PORT || + pd->dev_type == PCIE_TYPE_SWITCH_DNPORT) { + if (!pci_bridge_wait_link(phb, pd, was_reset)) + return false; + } + + /* Clear error status */ + pci_cfg_write16(phb, pd->bdfn, PCI_CFG_STAT, 0xffff); + return true; +} + +/* Clear up bridge resources */ +static void pci_cleanup_bridge(struct phb *phb, struct pci_device *pd) +{ + uint16_t cmd; + + pci_cfg_write16(phb, pd->bdfn, PCI_CFG_IO_BASE_U16, 0xffff); + pci_cfg_write8(phb, pd->bdfn, PCI_CFG_IO_BASE, 0xf0); + pci_cfg_write16(phb, pd->bdfn, PCI_CFG_IO_LIMIT_U16, 0); + pci_cfg_write8(phb, pd->bdfn, PCI_CFG_IO_LIMIT, 0); + pci_cfg_write16(phb, pd->bdfn, PCI_CFG_MEM_BASE, 0xfff0); + pci_cfg_write16(phb, pd->bdfn, PCI_CFG_MEM_LIMIT, 0); + pci_cfg_write32(phb, pd->bdfn, PCI_CFG_PREF_MEM_BASE_U32, 0xffffffff); + pci_cfg_write16(phb, pd->bdfn, PCI_CFG_PREF_MEM_BASE, 0xfff0); + pci_cfg_write32(phb, pd->bdfn, PCI_CFG_PREF_MEM_LIMIT_U32, 0); + pci_cfg_write16(phb, pd->bdfn, PCI_CFG_PREF_MEM_LIMIT, 0); + + /* Note: This is a bit fishy but since we have closed all the + * bridge windows above, it shouldn't be a problem. Basically + * we enable Memory, IO and Bus Master on the bridge because + * some versions of Linux will fail to do it themselves. + */ + pci_cfg_read16(phb, pd->bdfn, PCI_CFG_CMD, &cmd); + cmd |= PCI_CFG_CMD_IO_EN | PCI_CFG_CMD_MEM_EN; + cmd |= PCI_CFG_CMD_BUS_MASTER_EN; + pci_cfg_write16(phb, pd->bdfn, PCI_CFG_CMD, cmd); +} + +/* Remove all subordinate PCI devices leading from the indicated + * PCI bus. It's used to remove all PCI devices behind one PCI + * slot at unplugging time + */ +void pci_remove_bus(struct phb *phb, struct list_head *list) +{ + struct pci_device *pd, *tmp; + + list_for_each_safe(list, pd, tmp, link) { + pci_remove_bus(phb, &pd->children); + + if (phb->ops->device_remove) + phb->ops->device_remove(phb, pd); + + /* Release device node and PCI slot */ + if (pd->dn) + dt_free(pd->dn); + if (pd->slot) + free(pd->slot); + + /* Remove from parent list and release itself */ + list_del(&pd->link); + free(pd); + } +} + +static void pci_set_power_limit(struct pci_device *pd) +{ + uint32_t offset, val; + uint16_t caps; + + offset = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false); + if (!offset) + return; /* legacy dev */ + + pci_cfg_read16(pd->phb, pd->bdfn, + offset + PCICAP_EXP_CAPABILITY_REG, &caps); + + if (!(caps & PCICAP_EXP_CAP_SLOT)) + return; /* bridge has no slot capabilities */ + if (!pd->slot || !pd->slot->power_limit) + return; + + pci_cfg_read32(pd->phb, pd->bdfn, offset + PCICAP_EXP_SLOTCAP, &val); + + val = SETFIELD(PCICAP_EXP_SLOTCAP_SPLSC, val, 0); /* 1W scale */ + val = SETFIELD(PCICAP_EXP_SLOTCAP_SPLVA, val, pd->slot->power_limit); + + pci_cfg_write32(pd->phb, pd->bdfn, offset + PCICAP_EXP_SLOTCAP, val); + + /* update the cached copy in the slot */ + pd->slot->slot_cap = val; + + PCIDBG(pd->phb, pd->bdfn, "Slot power limit set to %dW\n", + pd->slot->power_limit); +} + +/* Perform a recursive scan of the bus at bus_number populating + * the list passed as an argument. This also performs the bus + * numbering, so it returns the largest bus number that was + * assigned. + * + * Note: Eventually this might want to access some VPD information + * in order to know what slots to scan and what not etc.. + * + * XXX NOTE: We might want to enable ARI along the way... + * + * XXX NOTE: We might also want to setup the PCIe MPS/MRSS properly + * here as Linux may or may not do it + */ +uint8_t pci_scan_bus(struct phb *phb, uint8_t bus, uint8_t max_bus, + struct list_head *list, struct pci_device *parent, + bool scan_downstream) +{ + struct pci_device *pd = NULL, *rc = NULL; + uint8_t dev, fn, next_bus, max_sub; + uint32_t scan_map; + + /* Decide what to scan */ + scan_map = parent ? parent->scan_map : phb->scan_map; + + /* Do scan */ + for (dev = 0; dev < 32; dev++) { + if (!(scan_map & (1ul << dev))) + continue; + + /* Scan the device */ + pd = pci_scan_one(phb, parent, (bus << 8) | (dev << 3)); + pci_check_clear_freeze(phb); + if (!pd) + continue; + + /* Record RC when its downstream link is down */ + if (!scan_downstream && dev == 0 && !rc) + rc = pd; + + /* XXX Handle ARI */ + if (!pd->is_multifunction) + continue; + for (fn = 1; fn < 8; fn++) { + pd = pci_scan_one(phb, parent, + ((uint16_t)bus << 8) | (dev << 3) | fn); + pci_check_clear_freeze(phb); + } + } + + /* Reserve all possible buses if RC's downstream link is down + * if PCI hotplug is supported. + */ + if (rc && rc->slot && rc->slot->pluggable) { + next_bus = bus + 1; + rc->secondary_bus = next_bus; + rc->subordinate_bus = max_bus; + pci_cfg_write8(phb, rc->bdfn, PCI_CFG_SECONDARY_BUS, + rc->secondary_bus); + pci_cfg_write8(phb, rc->bdfn, PCI_CFG_SUBORDINATE_BUS, + rc->subordinate_bus); + } + + /* set the power limit for any downstream slots while we're here */ + list_for_each(list, pd, link) { + if (pd->is_bridge) + pci_set_power_limit(pd); + } + + /* + * We only scan downstream if instructed to do so by the + * caller. Typically we avoid the scan when we know the + * link is down already, which happens for the top level + * root complex, and avoids a long secondary timeout + */ + if (!scan_downstream) { + list_for_each(list, pd, link) + pci_slot_set_power_state(phb, pd, PCI_SLOT_POWER_OFF); + + return bus; + } + + next_bus = bus + 1; + max_sub = bus; + + /* Scan down bridges */ + list_for_each(list, pd, link) { + bool do_scan; + + if (!pd->is_bridge) + continue; + + /* Configure the bridge with the returned values */ + if (next_bus <= bus) { + PCIERR(phb, pd->bdfn, "Out of bus numbers !\n"); + max_bus = next_bus = 0; /* Failure case */ + } + + pd->secondary_bus = next_bus; + pd->subordinate_bus = max_bus; + pci_cfg_write8(phb, pd->bdfn, PCI_CFG_SECONDARY_BUS, next_bus); + pci_cfg_write8(phb, pd->bdfn, PCI_CFG_SUBORDINATE_BUS, max_bus); + if (!next_bus) + break; + + PCIDBG(phb, pd->bdfn, "Bus %02x..%02x scanning...\n", + next_bus, max_bus); + + /* Clear up bridge resources */ + pci_cleanup_bridge(phb, pd); + + /* Configure the bridge. This will enable power to the slot + * if it's currently disabled, lift reset, etc... + * + * Return false if we know there's nothing behind the bridge + */ + do_scan = pci_enable_bridge(phb, pd); + + /* Perform recursive scan */ + if (do_scan) { + max_sub = pci_scan_bus(phb, next_bus, max_bus, + &pd->children, pd, true); + } else { + /* Empty bridge. We leave room for hotplug + * slots if the downstream port is pluggable. + */ + if (pd->slot && !pd->slot->pluggable) + max_sub = next_bus; + else { + max_sub = next_bus + 4; + if (max_sub > max_bus) + max_sub = max_bus; + } + } + + pd->subordinate_bus = max_sub; + pci_cfg_write8(phb, pd->bdfn, PCI_CFG_SUBORDINATE_BUS, max_sub); + next_bus = max_sub + 1; + + /* power off the slot if there's nothing below it */ + if (list_empty(&pd->children)) + pci_slot_set_power_state(phb, pd, PCI_SLOT_POWER_OFF); + } + + return max_sub; +} + +static int pci_get_mps(struct phb *phb, + struct pci_device *pd, void *userdata) +{ + uint32_t *mps = (uint32_t *)userdata; + + /* Only check PCI device that had MPS capacity */ + if (phb && pd && pd->mps && *mps > pd->mps) + *mps = pd->mps; + + return 0; +} + +static int pci_configure_mps(struct phb *phb, + struct pci_device *pd, + void *userdata __unused) +{ + uint32_t ecap, aercap, mps; + uint16_t val; + + assert(phb); + assert(pd); + + /* If the MPS isn't acceptable one, bail immediately */ + mps = phb->mps; + if (mps < 128 || mps > 4096) + return 1; + + /* Retrieve PCIe and AER capability */ + ecap = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false); + aercap = pci_cap(pd, PCIECAP_ID_AER, true); + + /* PCIe device always has MPS capacity */ + if (pd->mps) { + mps = ilog2(mps) - 7; + + pci_cfg_read16(phb, pd->bdfn, ecap + PCICAP_EXP_DEVCTL, &val); + val = SETFIELD(PCICAP_EXP_DEVCTL_MPS, val, mps); + pci_cfg_write16(phb, pd->bdfn, ecap + PCICAP_EXP_DEVCTL, val); + } + + /* Changing MPS on upstream PCI bridge might cause some error + * bits in PCIe and AER capability. To clear them to avoid + * confusion. + */ + if (aercap) { + pci_cfg_write32(phb, pd->bdfn, aercap + PCIECAP_AER_UE_STATUS, + 0xffffffff); + pci_cfg_write32(phb, pd->bdfn, aercap + PCIECAP_AER_CE_STATUS, + 0xffffffff); + } + if (ecap) + pci_cfg_write16(phb, pd->bdfn, ecap + PCICAP_EXP_DEVSTAT, 0xf); + + return 0; +} + +static void pci_disable_completion_timeout(struct phb *phb, struct pci_device *pd) +{ + uint32_t ecap, val; + uint16_t pcie_cap; + + /* PCIE capability required */ + if (!pci_has_cap(pd, PCI_CFG_CAP_ID_EXP, false)) + return; + + /* Check PCIe capability version */ + ecap = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false); + pci_cfg_read16(phb, pd->bdfn, + ecap + PCICAP_EXP_CAPABILITY_REG, &pcie_cap); + if ((pcie_cap & PCICAP_EXP_CAP_VERSION) <= 1) + return; + + /* Check if it has capability to disable completion timeout */ + pci_cfg_read32(phb, pd->bdfn, ecap + PCIECAP_EXP_DCAP2, &val); + if (!(val & PCICAP_EXP_DCAP2_CMPTOUT_DIS)) + return; + + /* Disable completion timeout without more check */ + pci_cfg_read32(phb, pd->bdfn, ecap + PCICAP_EXP_DCTL2, &val); + val |= PCICAP_EXP_DCTL2_CMPTOUT_DIS; + pci_cfg_write32(phb, pd->bdfn, ecap + PCICAP_EXP_DCTL2, val); +} + +void pci_device_init(struct phb *phb, struct pci_device *pd) +{ + pci_configure_mps(phb, pd, NULL); + pci_disable_completion_timeout(phb, pd); +} + +static void pci_reset_phb(void *data) +{ + struct phb *phb = data; + struct pci_slot *slot = phb->slot; + int64_t rc; + + if (!slot || !slot->ops.run_sm) { + PCINOTICE(phb, 0, "Cannot issue reset\n"); + return; + } + + pci_slot_add_flags(slot, PCI_SLOT_FLAG_BOOTUP); + rc = slot->ops.run_sm(slot); + while (rc > 0) { + PCITRACE(phb, 0, "Waiting %ld ms\n", tb_to_msecs(rc)); + time_wait(rc); + rc = slot->ops.run_sm(slot); + } + pci_slot_remove_flags(slot, PCI_SLOT_FLAG_BOOTUP); + if (rc < 0) + PCIDBG(phb, 0, "Error %lld resetting\n", rc); +} + +static void pci_scan_phb(void *data) +{ + struct phb *phb = data; + struct pci_slot *slot = phb->slot; + uint8_t link; + uint32_t mps = 0xffffffff; + int64_t rc; + + if (!slot || !slot->ops.get_link_state) { + PCIERR(phb, 0, "Cannot query link status\n"); + link = 0; + } else { + rc = slot->ops.get_link_state(slot, &link); + if (rc != OPAL_SUCCESS) { + PCIERR(phb, 0, "Error %lld querying link status\n", + rc); + link = 0; + } + } + + if (!link) + PCIDBG(phb, 0, "Link down\n"); + else + PCIDBG(phb, 0, "Link up at x%d width\n", link); + + /* Scan root port and downstream ports if applicable */ + PCIDBG(phb, 0, "Scanning (upstream%s)...\n", + link ? "+downsteam" : " only"); + pci_scan_bus(phb, 0, 0xff, &phb->devices, NULL, link); + + /* Configure MPS (Max Payload Size) for PCIe domain */ + pci_walk_dev(phb, NULL, pci_get_mps, &mps); + phb->mps = mps; + pci_walk_dev(phb, NULL, pci_configure_mps, NULL); +} + +int64_t pci_register_phb(struct phb *phb, int opal_id) +{ + /* The user didn't specify an opal_id, allocate one */ + if (opal_id == OPAL_DYNAMIC_PHB_ID) { + /* This is called at init time in non-concurrent way, so no lock needed */ + for (opal_id = 0; opal_id < ARRAY_SIZE(phbs); opal_id++) + if (!phbs[opal_id]) + break; + if (opal_id >= ARRAY_SIZE(phbs)) { + prerror("PHB: Failed to find a free ID slot\n"); + return OPAL_RESOURCE; + } + } else { + if (opal_id >= ARRAY_SIZE(phbs)) { + prerror("PHB: ID %x out of range !\n", opal_id); + return OPAL_PARAMETER; + } + /* The user did specify an opal_id, check it's free */ + if (phbs[opal_id]) { + prerror("PHB: Duplicate registration of ID %x\n", opal_id); + return OPAL_PARAMETER; + } + } + + phbs[opal_id] = phb; + phb->opal_id = opal_id; + if (opal_id > last_phb_id) + last_phb_id = opal_id; + dt_add_property_cells(phb->dt_node, "ibm,opal-phbid", 0, phb->opal_id); + PCIDBG(phb, 0, "PCI: Registered PHB\n"); + + init_lock(&phb->lock); + list_head_init(&phb->devices); + + phb->filter_map = zalloc(BITMAP_BYTES(0x10000)); + assert(phb->filter_map); + + return OPAL_SUCCESS; +} + +int64_t pci_unregister_phb(struct phb *phb) +{ + /* XXX We want some kind of RCU or RWlock to make things + * like that happen while no OPAL callback is in progress, + * that way we avoid taking a lock in each of them. + * + * Right now we don't unregister so we are fine + */ + phbs[phb->opal_id] = phb; + + return OPAL_SUCCESS; +} + +struct phb *pci_get_phb(uint64_t phb_id) +{ + if (phb_id >= ARRAY_SIZE(phbs)) + return NULL; + + /* XXX See comment in pci_unregister_phb() about locking etc... */ + return phbs[phb_id]; +} + +static const char *pci_class_name(uint32_t class_code) +{ + uint8_t class = class_code >> 16; + uint8_t sub = (class_code >> 8) & 0xff; + uint8_t pif = class_code & 0xff; + + switch(class) { + case 0x00: + switch(sub) { + case 0x00: return "device"; + case 0x01: return "vga"; + } + break; + case 0x01: + switch(sub) { + case 0x00: return "scsi"; + case 0x01: return "ide"; + case 0x02: return "fdc"; + case 0x03: return "ipi"; + case 0x04: return "raid"; + case 0x05: return "ata"; + case 0x06: return "sata"; + case 0x07: return "sas"; + default: return "mass-storage"; + } + case 0x02: + switch(sub) { + case 0x00: return "ethernet"; + case 0x01: return "token-ring"; + case 0x02: return "fddi"; + case 0x03: return "atm"; + case 0x04: return "isdn"; + case 0x05: return "worldfip"; + case 0x06: return "picmg"; + default: return "network"; + } + case 0x03: + switch(sub) { + case 0x00: return "vga"; + case 0x01: return "xga"; + case 0x02: return "3d-controller"; + default: return "display"; + } + case 0x04: + switch(sub) { + case 0x00: return "video"; + case 0x01: return "sound"; + case 0x02: return "telephony"; + default: return "multimedia-device"; + } + case 0x05: + switch(sub) { + case 0x00: return "memory"; + case 0x01: return "flash"; + default: return "memory-controller"; + } + case 0x06: + switch(sub) { + case 0x00: return "host"; + case 0x01: return "isa"; + case 0x02: return "eisa"; + case 0x03: return "mca"; + case 0x04: return "pci"; + case 0x05: return "pcmcia"; + case 0x06: return "nubus"; + case 0x07: return "cardbus"; + case 0x08: return "raceway"; + case 0x09: return "semi-transparent-pci"; + case 0x0a: return "infiniband"; + default: return "unknown-bridge"; + } + case 0x07: + switch(sub) { + case 0x00: + switch(pif) { + case 0x01: return "16450-serial"; + case 0x02: return "16550-serial"; + case 0x03: return "16650-serial"; + case 0x04: return "16750-serial"; + case 0x05: return "16850-serial"; + case 0x06: return "16950-serial"; + default: return "serial"; + } + case 0x01: + switch(pif) { + case 0x01: return "bi-directional-parallel"; + case 0x02: return "ecp-1.x-parallel"; + case 0x03: return "ieee1284-controller"; + case 0xfe: return "ieee1284-device"; + default: return "parallel"; + } + case 0x02: return "multiport-serial"; + case 0x03: + switch(pif) { + case 0x01: return "16450-modem"; + case 0x02: return "16550-modem"; + case 0x03: return "16650-modem"; + case 0x04: return "16750-modem"; + default: return "modem"; + } + case 0x04: return "gpib"; + case 0x05: return "smart-card"; + default: return "communication-controller"; + } + case 0x08: + switch(sub) { + case 0x00: + switch(pif) { + case 0x01: return "isa-pic"; + case 0x02: return "eisa-pic"; + case 0x10: return "io-apic"; + case 0x20: return "iox-apic"; + default: return "interrupt-controller"; + } + case 0x01: + switch(pif) { + case 0x01: return "isa-dma"; + case 0x02: return "eisa-dma"; + default: return "dma-controller"; + } + case 0x02: + switch(pif) { + case 0x01: return "isa-system-timer"; + case 0x02: return "eisa-system-timer"; + default: return "timer"; + } + case 0x03: + switch(pif) { + case 0x01: return "isa-rtc"; + default: return "rtc"; + } + case 0x04: return "hotplug-controller"; + case 0x05: return "sd-host-controller"; + default: return "system-peripheral"; + } + case 0x09: + switch(sub) { + case 0x00: return "keyboard"; + case 0x01: return "pen"; + case 0x02: return "mouse"; + case 0x03: return "scanner"; + case 0x04: return "gameport"; + default: return "input-controller"; + } + case 0x0a: + switch(sub) { + case 0x00: return "clock"; + default: return "docking-station"; + } + case 0x0b: + switch(sub) { + case 0x00: return "386"; + case 0x01: return "486"; + case 0x02: return "pentium"; + case 0x10: return "alpha"; + case 0x20: return "powerpc"; + case 0x30: return "mips"; + case 0x40: return "co-processor"; + default: return "cpu"; + } + case 0x0c: + switch(sub) { + case 0x00: return "firewire"; + case 0x01: return "access-bus"; + case 0x02: return "ssa"; + case 0x03: + switch(pif) { + case 0x00: return "usb-uhci"; + case 0x10: return "usb-ohci"; + case 0x20: return "usb-ehci"; + case 0x30: return "usb-xhci"; + case 0xfe: return "usb-device"; + default: return "usb"; + } + case 0x04: return "fibre-channel"; + case 0x05: return "smb"; + case 0x06: return "infiniband"; + case 0x07: + switch(pif) { + case 0x00: return "impi-smic"; + case 0x01: return "impi-kbrd"; + case 0x02: return "impi-bltr"; + default: return "impi"; + } + case 0x08: return "secos"; + case 0x09: return "canbus"; + default: return "serial-bus"; + } + case 0x0d: + switch(sub) { + case 0x00: return "irda"; + case 0x01: return "consumer-ir"; + case 0x10: return "rf-controller"; + case 0x11: return "bluetooth"; + case 0x12: return "broadband"; + case 0x20: return "enet-802.11a"; + case 0x21: return "enet-802.11b"; + default: return "wireless-controller"; + } + case 0x0e: return "intelligent-controller"; + case 0x0f: + switch(sub) { + case 0x01: return "satellite-tv"; + case 0x02: return "satellite-audio"; + case 0x03: return "satellite-voice"; + case 0x04: return "satellite-data"; + default: return "satellite-device"; + } + case 0x10: + switch(sub) { + case 0x00: return "network-encryption"; + case 0x01: return "entertainment-encryption"; + default: return "encryption"; + } + case 0x011: + switch(sub) { + case 0x00: return "dpio"; + case 0x01: return "counter"; + case 0x10: return "measurement"; + case 0x20: return "management-card"; + default: return "data-processing"; + } + } + return "device"; +} + +void pci_std_swizzle_irq_map(struct dt_node *np, + struct pci_device *pd, + struct pci_lsi_state *lstate, + uint8_t swizzle) +{ + __be32 *p, *map; + int dev, irq, esize, edevcount; + size_t map_size; + + /* Some emulated setups don't use standard interrupts + * representation + */ + if (lstate->int_size == 0) + return; + + /* Calculate the size of a map entry: + * + * 3 cells : PCI Address + * 1 cell : PCI IRQ + * 1 cell : PIC phandle + * n cells : PIC irq (n = lstate->int_size) + * + * Assumption: PIC address is 0-size + */ + esize = 3 + 1 + 1 + lstate->int_size; + + /* Number of map "device" entries + * + * A PCI Express root or downstream port needs only one + * entry for device 0. Anything else will get a full map + * for all possible 32 child device numbers + * + * If we have been passed a host bridge (pd == NULL) we also + * do a simple per-pin map + */ + if (!pd || (pd->dev_type == PCIE_TYPE_ROOT_PORT || + pd->dev_type == PCIE_TYPE_SWITCH_DNPORT)) { + edevcount = 1; + dt_add_property_cells(np, "interrupt-map-mask", 0, 0, 0, 7); + } else { + edevcount = 32; + dt_add_property_cells(np, "interrupt-map-mask", + 0xf800, 0, 0, 7); + } + map_size = esize * edevcount * 4 * sizeof(u32); + map = p = zalloc(map_size); + if (!map) { + prerror("Failed to allocate interrupt-map-mask !\n"); + return; + } + + for (dev = 0; dev < edevcount; dev++) { + for (irq = 0; irq < 4; irq++) { + /* Calculate pin */ + size_t i; + uint32_t new_irq = (irq + dev + swizzle) % 4; + + /* PCI address portion */ + *(p++) = cpu_to_be32(dev << (8 + 3)); + *(p++) = 0; + *(p++) = 0; + + /* PCI interrupt portion */ + *(p++) = cpu_to_be32(irq + 1); + + /* Parent phandle */ + *(p++) = cpu_to_be32(lstate->int_parent[new_irq]); + + /* Parent desc */ + for (i = 0; i < lstate->int_size; i++) + *(p++) = cpu_to_be32(lstate->int_val[new_irq][i]); + } + } + + dt_add_property(np, "interrupt-map", map, map_size); + free(map); +} + +static void pci_add_loc_code(struct dt_node *np) +{ + struct dt_node *p; + const char *lcode = NULL; + + for (p = np->parent; p; p = p->parent) { + /* prefer slot-label by default */ + lcode = dt_prop_get_def(p, "ibm,slot-label", NULL); + if (lcode) + break; + + /* otherwise use the fully qualified location code */ + lcode = dt_prop_get_def(p, "ibm,slot-location-code", NULL); + if (lcode) + break; + } + + if (!lcode) + lcode = dt_prop_get_def(np, "ibm,slot-location-code", NULL); + + if (!lcode) { + /* Fall back to finding a ibm,loc-code */ + for (p = np->parent; p; p = p->parent) { + lcode = dt_prop_get_def(p, "ibm,loc-code", NULL); + if (lcode) + break; + } + } + + if (!lcode) + return; + + dt_add_property_string(np, "ibm,loc-code", lcode); +} + +static void pci_print_summary_line(struct phb *phb, struct pci_device *pd, + struct dt_node *np, u32 rev_class, + const char *cname) +{ + const char *label, *dtype, *s; +#define MAX_SLOTSTR 80 + char slotstr[MAX_SLOTSTR + 1] = { 0, }; + + /* If it's a slot, it has a slot-label */ + label = dt_prop_get_def(np, "ibm,slot-label", NULL); + if (label) { + u32 lanes = dt_prop_get_u32_def(np, "ibm,slot-wired-lanes", 0); + static const char *lanestrs[] = { + "", " x1", " x2", " x4", " x8", "x16", "x32", "32b", "64b" + }; + const char *lstr = lanes > PCI_SLOT_WIRED_LANES_PCIX_64 ? "" : lanestrs[lanes]; + snprintf(slotstr, MAX_SLOTSTR, "SLOT=%3s %s", label, lstr); + /* XXX Add more slot info */ + } else { + /* + * No label, ignore downstream switch legs and root complex, + * Those would essentially be non-populated + */ + if (pd->dev_type != PCIE_TYPE_ROOT_PORT && + pd->dev_type != PCIE_TYPE_SWITCH_DNPORT) { + /* It's a mere device, get loc code */ + s = dt_prop_get_def(np, "ibm,loc-code", NULL); + if (s) + snprintf(slotstr, MAX_SLOTSTR, "LOC_CODE=%s", s); + } + } + + if (pci_has_cap(pd, PCI_CFG_CAP_ID_EXP, false)) { + static const char *pcie_types[] = { + "EP ", "LGCY", "????", "????", "ROOT", "SWUP", "SWDN", + "ETOX", "XTOE", "RINT", "EVTC" }; + if (pd->dev_type >= ARRAY_SIZE(pcie_types)) + dtype = "????"; + else + dtype = pcie_types[pd->dev_type]; + } else + dtype = pd->is_bridge ? "PCIB" : "PCID"; + + if (pd->is_bridge) + PCINOTICE(phb, pd->bdfn, + "[%s] %04x %04x R:%02x C:%06x B:%02x..%02x %s\n", + dtype, PCI_VENDOR_ID(pd->vdid), + PCI_DEVICE_ID(pd->vdid), + rev_class & 0xff, rev_class >> 8, pd->secondary_bus, + pd->subordinate_bus, slotstr); + else + PCINOTICE(phb, pd->bdfn, + "[%s] %04x %04x R:%02x C:%06x (%14s) %s\n", + dtype, PCI_VENDOR_ID(pd->vdid), + PCI_DEVICE_ID(pd->vdid), + rev_class & 0xff, rev_class >> 8, cname, slotstr); +} + +static void __noinline pci_add_one_device_node(struct phb *phb, + struct pci_device *pd, + struct dt_node *parent_node, + struct pci_lsi_state *lstate, + uint8_t swizzle) +{ + struct dt_node *np; + const char *cname; +#define MAX_NAME 256 + char name[MAX_NAME]; + char compat[MAX_NAME]; + uint32_t rev_class; + uint8_t intpin; + bool is_pcie; + + pci_cfg_read32(phb, pd->bdfn, PCI_CFG_REV_ID, &rev_class); + pci_cfg_read8(phb, pd->bdfn, PCI_CFG_INT_PIN, &intpin); + is_pcie = pci_has_cap(pd, PCI_CFG_CAP_ID_EXP, false); + + /* + * Some IBM PHBs (p7ioc?) have an invalid PCI class code. Linux + * uses prefers to read the class code from the DT rather than + * re-reading config space we can hack around it here. + */ + if (is_pcie && pd->dev_type == PCIE_TYPE_ROOT_PORT) + rev_class = (rev_class & 0xff) | 0x6040000; + cname = pci_class_name(rev_class >> 8); + + if (PCI_FUNC(pd->bdfn)) + snprintf(name, MAX_NAME - 1, "%s@%x,%x", + cname, PCI_DEV(pd->bdfn), PCI_FUNC(pd->bdfn)); + else + snprintf(name, MAX_NAME - 1, "%s@%x", + cname, PCI_DEV(pd->bdfn)); + pd->dn = np = dt_new(parent_node, name); + + /* + * NB: ibm,pci-config-space-type is the PAPR way of indicating the + * device has a 4KB config space. It's got nothing to do with the + * standard Type 0/1 config spaces defined by PCI. + */ + if (is_pcie || phb->phb_type == phb_type_npu_v2_opencapi) { + snprintf(compat, MAX_NAME, "pciex%x,%x", + PCI_VENDOR_ID(pd->vdid), PCI_DEVICE_ID(pd->vdid)); + dt_add_property_cells(np, "ibm,pci-config-space-type", 1); + } else { + snprintf(compat, MAX_NAME, "pci%x,%x", + PCI_VENDOR_ID(pd->vdid), PCI_DEVICE_ID(pd->vdid)); + dt_add_property_cells(np, "ibm,pci-config-space-type", 0); + } + dt_add_property_cells(np, "class-code", rev_class >> 8); + dt_add_property_cells(np, "revision-id", rev_class & 0xff); + dt_add_property_cells(np, "vendor-id", PCI_VENDOR_ID(pd->vdid)); + dt_add_property_cells(np, "device-id", PCI_DEVICE_ID(pd->vdid)); + if (intpin) + dt_add_property_cells(np, "interrupts", intpin); + + pci_handle_quirk(phb, pd); + + /* XXX FIXME: Add a few missing ones such as + * + * - devsel-speed (!express) + * - max-latency + * - min-grant + * - subsystem-id + * - subsystem-vendor-id + * - ... + */ + + /* Add slot properties if needed and iff this is a bridge */ + if (pd->slot) + pci_slot_add_dt_properties(pd->slot, np); + + /* + * Use the phb base location code for root ports if the platform + * doesn't provide one via slot->add_properties() operation. + */ + if (pd->dev_type == PCIE_TYPE_ROOT_PORT && phb->base_loc_code && + !dt_has_node_property(np, "ibm,slot-location-code", NULL)) + dt_add_property_string(np, "ibm,slot-location-code", + phb->base_loc_code); + + /* Make up location code */ + if (platform.pci_add_loc_code) + platform.pci_add_loc_code(np, pd); + else + pci_add_loc_code(np); + + /* XXX FIXME: We don't look for BARs, we only put the config space + * entry in the "reg" property. That's enough for Linux and we might + * even want to make this legit in future ePAPR + */ + dt_add_property_cells(np, "reg", pd->bdfn << 8, 0, 0, 0, 0); + + /* Print summary info about the device */ + pci_print_summary_line(phb, pd, np, rev_class, cname); + if (!pd->is_bridge) + return; + + dt_add_property_cells(np, "#address-cells", 3); + dt_add_property_cells(np, "#size-cells", 2); + dt_add_property_cells(np, "#interrupt-cells", 1); + + /* We want "device_type" for bridges */ + if (is_pcie) + dt_add_property_string(np, "device_type", "pciex"); + else + dt_add_property_string(np, "device_type", "pci"); + + /* Update the current interrupt swizzling level based on our own + * device number + */ + swizzle = (swizzle + PCI_DEV(pd->bdfn)) & 3; + + /* We generate a standard-swizzling interrupt map. This is pretty + * big, we *could* try to be smarter for things that aren't hotplug + * slots at least and only populate those entries for which there's + * an actual children (especially on PCI Express), but for now that + * will do + */ + pci_std_swizzle_irq_map(np, pd, lstate, swizzle); + + /* Parts of the OF address translation in the kernel will fail to + * correctly translate a PCI address if translating a 1:1 mapping + * (ie. an empty ranges property). + * Instead add a ranges property that explicitly translates 1:1. + */ + dt_add_property_cells(np, "ranges", + /* 64-bit direct mapping. We know the bridges + * don't cover the entire address space so + * use 0xf00... as a good compromise. */ + 0x02000000, 0x0, 0x0, + 0x02000000, 0x0, 0x0, + 0xf0000000, 0x0); +} + +void __noinline pci_add_device_nodes(struct phb *phb, + struct list_head *list, + struct dt_node *parent_node, + struct pci_lsi_state *lstate, + uint8_t swizzle) +{ + struct pci_device *pd; + + /* Add all child devices */ + list_for_each(list, pd, link) { + pci_add_one_device_node(phb, pd, parent_node, + lstate, swizzle); + if (list_empty(&pd->children)) + continue; + + pci_add_device_nodes(phb, &pd->children, + pd->dn, lstate, swizzle); + } +} + +static void pci_do_jobs(void (*fn)(void *)) +{ + struct cpu_job **jobs; + int i; + + jobs = zalloc(sizeof(struct cpu_job *) * ARRAY_SIZE(phbs)); + assert(jobs); + for (i = 0; i < ARRAY_SIZE(phbs); i++) { + if (!phbs[i]) { + jobs[i] = NULL; + continue; + } + + jobs[i] = __cpu_queue_job(NULL, phbs[i]->dt_node->name, + fn, phbs[i], false); + assert(jobs[i]); + + } + + /* If no secondary CPUs, do everything sync */ + cpu_process_local_jobs(); + + /* Wait until all tasks are done */ + for (i = 0; i < ARRAY_SIZE(phbs); i++) { + if (!jobs[i]) + continue; + + cpu_wait_job(jobs[i], true); + } + free(jobs); +} + +static void __pci_init_slots(void) +{ + unsigned int i; + + /* Some PHBs may need that long to debounce the presence detect + * after HW initialization. + */ + for (i = 0; i < ARRAY_SIZE(phbs); i++) { + if (phbs[i]) { + time_wait_ms(20); + break; + } + } + + if (platform.pre_pci_fixup) + platform.pre_pci_fixup(); + + prlog(PR_NOTICE, "PCI: Resetting PHBs and training links...\n"); + pci_do_jobs(pci_reset_phb); + + prlog(PR_NOTICE, "PCI: Probing slots...\n"); + pci_do_jobs(pci_scan_phb); + + if (platform.pci_probe_complete) + platform.pci_probe_complete(); + + prlog(PR_NOTICE, "PCI Summary:\n"); + + for (i = 0; i < ARRAY_SIZE(phbs); i++) { + if (!phbs[i]) + continue; + + pci_add_device_nodes(phbs[i], &phbs[i]->devices, + phbs[i]->dt_node, &phbs[i]->lstate, 0); + } + + /* PHB final fixup */ + for (i = 0; i < ARRAY_SIZE(phbs); i++) { + if (!phbs[i] || !phbs[i]->ops || !phbs[i]->ops->phb_final_fixup) + continue; + + phbs[i]->ops->phb_final_fixup(phbs[i]); + } +} + +static void __pci_reset(struct list_head *list) +{ + struct pci_device *pd; + struct pci_cfg_reg_filter *pcrf; + int i; + + while ((pd = list_pop(list, struct pci_device, link)) != NULL) { + __pci_reset(&pd->children); + dt_free(pd->dn); + free(pd->slot); + while((pcrf = list_pop(&pd->pcrf, struct pci_cfg_reg_filter, link)) != NULL) { + free(pcrf); + } + for(i=0; i < 64; i++) + if (pd->cap[i].free_func) + pd->cap[i].free_func(pd->cap[i].data); + free(pd); + } +} + +int64_t pci_reset(void) +{ + unsigned int i; + + prlog(PR_NOTICE, "PCI: Clearing all devices...\n"); + + for (i = 0; i < ARRAY_SIZE(phbs); i++) { + struct phb *phb = phbs[i]; + if (!phb) + continue; + __pci_reset(&phb->devices); + + pci_slot_set_state(phb->slot, PCI_SLOT_STATE_CRESET_START); + } + + /* Do init and discovery of PCI slots in parallel */ + __pci_init_slots(); + + return 0; +} + +void pci_init_slots(void) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(phbs); i++) { + struct phb *phb = phbs[i]; + if (!phb) + continue; + pci_slot_set_state(phb->slot, PCI_SLOT_STATE_FRESET_POWER_OFF); + } + __pci_init_slots(); +} + +/* + * Complete iteration on current level before switching to + * child level, which is the proper order for restoring + * PCI bus range on bridges. + */ +static struct pci_device *__pci_walk_dev(struct phb *phb, + struct list_head *l, + int (*cb)(struct phb *, + struct pci_device *, + void *), + void *userdata) +{ + struct pci_device *pd, *child; + + if (list_empty(l)) + return NULL; + + list_for_each(l, pd, link) { + if (cb && cb(phb, pd, userdata)) + return pd; + } + + list_for_each(l, pd, link) { + child = __pci_walk_dev(phb, &pd->children, cb, userdata); + if (child) + return child; + } + + return NULL; +} + +struct pci_device *pci_walk_dev(struct phb *phb, + struct pci_device *pd, + int (*cb)(struct phb *, + struct pci_device *, + void *), + void *userdata) +{ + if (pd) + return __pci_walk_dev(phb, &pd->children, cb, userdata); + + return __pci_walk_dev(phb, &phb->devices, cb, userdata); +} + +static int __pci_find_dev(struct phb *phb, + struct pci_device *pd, void *userdata) +{ + uint16_t bdfn = *((uint16_t *)userdata); + + if (!phb || !pd) + return 0; + + if (pd->bdfn == bdfn) + return 1; + + return 0; +} + +struct pci_device *pci_find_dev(struct phb *phb, uint16_t bdfn) +{ + return pci_walk_dev(phb, NULL, __pci_find_dev, &bdfn); +} + +static int __pci_restore_bridge_buses(struct phb *phb, + struct pci_device *pd, + void *data __unused) +{ + uint32_t vdid; + + /* If the device is behind a switch, wait for the switch */ + if (!pd->is_vf && !(pd->bdfn & 7) && pd->parent != NULL && + pd->parent->dev_type == PCIE_TYPE_SWITCH_DNPORT) { + if (!pci_bridge_wait_link(phb, pd->parent, true)) { + PCIERR(phb, pd->bdfn, "Timeout waiting for switch\n"); + return -1; + } + } + + /* Wait for config space to stop returning CRS */ + if (!pci_wait_crs(phb, pd->bdfn, &vdid)) + return -1; + + /* Make all devices below a bridge "re-capture" the bdfn */ + pci_cfg_write32(phb, pd->bdfn, PCI_CFG_VENDOR_ID, vdid); + + if (!pd->is_bridge) + return 0; + + pci_cfg_write8(phb, pd->bdfn, PCI_CFG_PRIMARY_BUS, + pd->primary_bus); + pci_cfg_write8(phb, pd->bdfn, PCI_CFG_SECONDARY_BUS, + pd->secondary_bus); + pci_cfg_write8(phb, pd->bdfn, PCI_CFG_SUBORDINATE_BUS, + pd->subordinate_bus); + return 0; +} + +void pci_restore_bridge_buses(struct phb *phb, struct pci_device *pd) +{ + pci_walk_dev(phb, pd, __pci_restore_bridge_buses, NULL); +} + +void pci_restore_slot_bus_configs(struct pci_slot *slot) +{ + /* + * We might lose the bus numbers during the reset operation + * and we need to restore them. Otherwise, some adapters (e.g. + * IPR) can't be probed properly by the kernel. We don't need + * to restore bus numbers for every kind of reset, however, + * it's not harmful to always restore the bus numbers, which + * simplifies the logic. + */ + pci_restore_bridge_buses(slot->phb, slot->pd); + if (slot->phb->ops->device_init) + pci_walk_dev(slot->phb, slot->pd, + slot->phb->ops->device_init, NULL); +} + +struct pci_cfg_reg_filter *pci_find_cfg_reg_filter(struct pci_device *pd, + uint32_t start, uint32_t len) +{ + struct pci_cfg_reg_filter *pcrf; + + /* Check on the cached range, which contains holes */ + if ((start + len) <= pd->pcrf_start || + pd->pcrf_end <= start) + return NULL; + + list_for_each(&pd->pcrf, pcrf, link) { + if (start >= pcrf->start && + (start + len) <= (pcrf->start + pcrf->len)) + return pcrf; + } + + return NULL; +} + +static bool pci_device_has_cfg_reg_filters(struct phb *phb, uint16_t bdfn) +{ + return bitmap_tst_bit(*phb->filter_map, bdfn); +} + +int64_t pci_handle_cfg_filters(struct phb *phb, uint32_t bdfn, + uint32_t offset, uint32_t len, + uint32_t *data, bool write) +{ + struct pci_device *pd; + struct pci_cfg_reg_filter *pcrf; + uint32_t flags; + + if (!pci_device_has_cfg_reg_filters(phb, bdfn)) + return OPAL_PARTIAL; + pd = pci_find_dev(phb, bdfn); + pcrf = pd ? pci_find_cfg_reg_filter(pd, offset, len) : NULL; + if (!pcrf || !pcrf->func) + return OPAL_PARTIAL; + + flags = write ? PCI_REG_FLAG_WRITE : PCI_REG_FLAG_READ; + if ((pcrf->flags & flags) != flags) + return OPAL_PARTIAL; + + return pcrf->func(pd, pcrf, offset, len, data, write); +} + +struct pci_cfg_reg_filter *pci_add_cfg_reg_filter(struct pci_device *pd, + uint32_t start, uint32_t len, + uint32_t flags, + pci_cfg_reg_func func) +{ + struct pci_cfg_reg_filter *pcrf; + + pcrf = pci_find_cfg_reg_filter(pd, start, len); + if (pcrf) + return pcrf; + + pcrf = zalloc(sizeof(*pcrf) + ((len + 0x4) & ~0x3)); + if (!pcrf) + return NULL; + + /* Don't validate the flags so that the private flags + * can be supported for debugging purpose. + */ + pcrf->flags = flags; + pcrf->start = start; + pcrf->len = len; + pcrf->func = func; + pcrf->data = (uint8_t *)(pcrf + 1); + + if (start < pd->pcrf_start) + pd->pcrf_start = start; + if (pd->pcrf_end < (start + len)) + pd->pcrf_end = start + len; + list_add_tail(&pd->pcrf, &pcrf->link); + bitmap_set_bit(*pd->phb->filter_map, pd->bdfn); + + return pcrf; +} diff --git a/roms/skiboot/core/pcie-slot.c b/roms/skiboot/core/pcie-slot.c new file mode 100644 index 000000000..03326e58f --- /dev/null +++ b/roms/skiboot/core/pcie-slot.c @@ -0,0 +1,566 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * PCIe Slots + * + * Copyright 2013-2019 IBM Corp. + */ + +#include <skiboot.h> +#include <opal-msg.h> +#include <pci-cfg.h> +#include <pci.h> +#include <pci-slot.h> + +/* Debugging options */ +#define PCIE_SLOT_PREFIX "PCIE-SLOT-%016llx " +#define PCIE_SLOT_DBG(s, fmt, a...) \ + prlog(PR_DEBUG, PCIE_SLOT_PREFIX fmt, (s)->id, ##a) + +static int64_t pcie_slot_get_presence_state(struct pci_slot *slot, uint8_t *val) +{ + struct phb *phb = slot->phb; + struct pci_device *pd = slot->pd; + uint32_t ecap; + uint16_t state; + + /* The presence is always on if it's a switch upstream port */ + if (pd->dev_type == PCIE_TYPE_SWITCH_UPPORT) { + *val = OPAL_PCI_SLOT_PRESENT; + return OPAL_SUCCESS; + } + + /* + * The presence is always on if a switch downstream port + * doesn't support slot capability according to PCIE spec. + */ + if (pd->dev_type == PCIE_TYPE_SWITCH_DNPORT && + !(slot->pcie_cap & PCICAP_EXP_CAP_SLOT)) { + *val = OPAL_PCI_SLOT_PRESENT; + return OPAL_SUCCESS; + } + + /* Retrieve presence status */ + ecap = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false); + pci_cfg_read16(phb, pd->bdfn, ecap + PCICAP_EXP_SLOTSTAT, &state); + if (state & PCICAP_EXP_SLOTSTAT_PDETECTST) + *val = OPAL_PCI_SLOT_PRESENT; + else + *val = OPAL_PCI_SLOT_EMPTY; + + return OPAL_SUCCESS; +} + +static int64_t pcie_slot_get_link_state(struct pci_slot *slot, + uint8_t *val) +{ + struct phb *phb = slot->phb; + struct pci_device *pd = slot->pd; + uint32_t ecap; + int16_t state; + + /* + * The link behind switch upstream port is always on + * since it doesn't have a valid link indicator. + */ + if (pd->dev_type == PCIE_TYPE_SWITCH_UPPORT) { + *val = 1; + return OPAL_SUCCESS; + } + + /* Retrieve link width */ + ecap = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false); + pci_cfg_read16(phb, pd->bdfn, ecap + PCICAP_EXP_LSTAT, &state); + if (state & PCICAP_EXP_LSTAT_DLLL_ACT) + *val = ((state & PCICAP_EXP_LSTAT_WIDTH) >> 4); + else + *val = 0; + + return OPAL_SUCCESS; +} + +static int64_t pcie_slot_get_power_state(struct pci_slot *slot __unused, + uint8_t *val) +{ + /* We should return the cached power state that is same to + * the PCI slot hotplug state (added/removed). Otherwise, + * the OS will see mismatched states, causing the adapter + * behind the slot can't be probed successfully on request + * of hot add. So we could run into the situation where the + * OS sees power-off but it's on in hardware. + */ + *val = slot->power_state; + + return OPAL_SUCCESS; +} + +static int64_t pcie_slot_get_attention_state(struct pci_slot *slot, + uint8_t *val) +{ + struct phb *phb = slot->phb; + struct pci_device *pd = slot->pd; + uint32_t ecap; + uint16_t state; + + /* Attention is off if the capability is missing */ + if (!(slot->slot_cap & PCICAP_EXP_SLOTCAP_ATTNI)) { + *val = 0; + return OPAL_SUCCESS; + } + + /* Retrieve attention state */ + ecap = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false); + pci_cfg_read16(phb, pd->bdfn, ecap + PCICAP_EXP_SLOTCTL, &state); + state = (state & PCICAP_EXP_SLOTCTL_ATTNI) >> 6; + switch (state) { + case PCIE_INDIC_ON: + *val = PCI_SLOT_ATTN_LED_ON; + break; + case PCIE_INDIC_BLINK: + *val = PCI_SLOT_ATTN_LED_BLINK; + break; + case PCIE_INDIC_OFF: + default: + *val = PCI_SLOT_ATTN_LED_OFF; + } + + return OPAL_SUCCESS; +} + +static int64_t pcie_slot_get_latch_state(struct pci_slot *slot, + uint8_t *val) +{ + struct phb *phb = slot->phb; + struct pci_device *pd = slot->pd; + uint32_t ecap; + uint16_t state; + + /* Latch is off if MRL sensor doesn't exist */ + if (!(slot->slot_cap & PCICAP_EXP_SLOTCAP_MRLSENS)) { + *val = 0; + return OPAL_SUCCESS; + } + + /* Retrieve MRL sensor state */ + ecap = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false); + pci_cfg_read16(phb, pd->bdfn, ecap + PCICAP_EXP_SLOTSTAT, &state); + if (state & PCICAP_EXP_SLOTSTAT_MRLSENSST) + *val = 1; + else + *val = 0; + + return OPAL_SUCCESS; +} + +static int64_t pcie_slot_set_attention_state(struct pci_slot *slot, + uint8_t val) +{ + struct phb *phb = slot->phb; + struct pci_device *pd = slot->pd; + uint32_t ecap; + uint16_t state; + + /* Drop the request if functionality doesn't exist */ + if (!(slot->slot_cap & PCICAP_EXP_SLOTCAP_ATTNI)) + return OPAL_SUCCESS; + + /* Update with the requested state */ + ecap = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false); + pci_cfg_read16(phb, pd->bdfn, ecap + PCICAP_EXP_SLOTCTL, &state); + state &= ~PCICAP_EXP_SLOTCTL_ATTNI; + switch (val) { + case PCI_SLOT_ATTN_LED_ON: + state |= (PCIE_INDIC_ON << 6); + break; + case PCI_SLOT_ATTN_LED_BLINK: + state |= (PCIE_INDIC_BLINK << 6); + break; + case PCI_SLOT_ATTN_LED_OFF: + state |= (PCIE_INDIC_OFF << 6); + break; + default: + prlog(PR_ERR, PCIE_SLOT_PREFIX + "Invalid attention state (0x%x)\n", slot->id, val); + return OPAL_PARAMETER; + } + + pci_cfg_write16(phb, pd->bdfn, ecap + PCICAP_EXP_SLOTCTL, state); + return OPAL_SUCCESS; +} + +static int64_t pcie_slot_set_power_state_ext(struct pci_slot *slot, uint8_t val, + bool surprise_check) +{ + struct phb *phb = slot->phb; + struct pci_device *pd = slot->pd; + uint32_t ecap; + uint16_t state; + + if (slot->power_state == val) + return OPAL_SUCCESS; + + /* Update the power state and return immediately if the power + * control functionality isn't supported on the PCI slot. + */ + if (!(slot->slot_cap & PCICAP_EXP_SLOTCAP_PWCTRL)) { + slot->power_state = val; + return OPAL_SUCCESS; + } + + /* + * Suprise hotpluggable slots need to be handled with care since + * many systems do not implement the presence detect side-band + * signal. Instead, they rely on in-band presence to report the + * existence of a hotplugged card. + * + * This is problematic because: + * a) When PERST is asserted in-band presence doesn't work, and + * b) Switches assert PERST as a part of the "slot power down" sequence + * + * To work around the problem we leave the slot physically powered on + * and exit early here. This way when a new card is inserted, the switch + * will raise an interrupt due to the PresDet status changing. + */ + if (surprise_check && slot->surprise_pluggable) { + slot->power_state = val; + if (val == PCI_SLOT_POWER_OFF) + return OPAL_SUCCESS; + + /* + * Some systems have the slot power disabled by default + * so we always perform the power-on step. This is not + * *strictly* required, but it's probably a good idea. + */ + } + + pci_slot_set_state(slot, PCI_SLOT_STATE_SPOWER_START); + slot->power_state = val; + ecap = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false); + pci_cfg_read16(phb, pd->bdfn, ecap + PCICAP_EXP_SLOTCTL, &state); + state &= ~(PCICAP_EXP_SLOTCTL_PWRCTLR | PCICAP_EXP_SLOTCTL_PWRI); + switch (val) { + case PCI_SLOT_POWER_OFF: + state |= (PCICAP_EXP_SLOTCTL_PWRCTLR | (PCIE_INDIC_OFF << 8)); + break; + case PCI_SLOT_POWER_ON: + state |= (PCIE_INDIC_ON << 8); + break; + default: + pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL); + prlog(PR_ERR, PCIE_SLOT_PREFIX + "Invalid power state (0x%x)\n", slot->id, val); + return OPAL_PARAMETER; + } + + pci_cfg_write16(phb, pd->bdfn, ecap + PCICAP_EXP_SLOTCTL, state); + pci_slot_set_state(slot, PCI_SLOT_STATE_SPOWER_DONE); + + return OPAL_ASYNC_COMPLETION; +} + +static int64_t pcie_slot_set_power_state(struct pci_slot *slot, uint8_t val) +{ + return pcie_slot_set_power_state_ext(slot, val, true); +} + +static int64_t pcie_slot_sm_poll_link(struct pci_slot *slot) +{ + struct phb *phb = slot->phb; + struct pci_device *pd = slot->pd; + uint32_t ecap = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false); + uint16_t val; + uint8_t presence = 0; + + switch (slot->state) { + case PCI_SLOT_STATE_LINK_START_POLL: + PCIE_SLOT_DBG(slot, "LINK: Start polling\n"); + + /* Link is down for ever without devices attached */ + if (slot->ops.get_presence_state) + slot->ops.get_presence_state(slot, &presence); + if (!presence) { + PCIE_SLOT_DBG(slot, "LINK: No adapter, end polling\n"); + pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL); + return OPAL_SUCCESS; + } + + /* Enable the link without check */ + pci_cfg_read16(phb, pd->bdfn, ecap + PCICAP_EXP_LCTL, &val); + val &= ~PCICAP_EXP_LCTL_LINK_DIS; + pci_cfg_write16(phb, pd->bdfn, ecap + PCICAP_EXP_LCTL, val); + + /* + * If the link change report isn't supported, we expect + * the link is up and stabilized after one second. + */ + if (!(slot->link_cap & PCICAP_EXP_LCAP_DL_ACT_REP)) { + pci_slot_set_state(slot, + PCI_SLOT_STATE_LINK_DELAY_FINALIZED); + return pci_slot_set_sm_timeout(slot, secs_to_tb(1)); + } + + /* + * Poll the link state if link state change report is + * supported on the link. + */ + pci_slot_set_state(slot, PCI_SLOT_STATE_LINK_POLLING); + slot->retries = 250; + return pci_slot_set_sm_timeout(slot, msecs_to_tb(20)); + case PCI_SLOT_STATE_LINK_DELAY_FINALIZED: + PCIE_SLOT_DBG(slot, "LINK: No link report, end polling\n"); + if (slot->ops.prepare_link_change) + slot->ops.prepare_link_change(slot, true); + pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL); + return OPAL_SUCCESS; + case PCI_SLOT_STATE_LINK_POLLING: + pci_cfg_read16(phb, pd->bdfn, ecap + PCICAP_EXP_LSTAT, &val); + if (val & PCICAP_EXP_LSTAT_DLLL_ACT) { + PCIE_SLOT_DBG(slot, "LINK: Link is up, end polling\n"); + if (slot->ops.prepare_link_change) + slot->ops.prepare_link_change(slot, true); + pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL); + return OPAL_SUCCESS; + } + + /* Check link state again until timeout */ + if (slot->retries-- == 0) { + prlog(PR_ERR, PCIE_SLOT_PREFIX + "LINK: Timeout waiting for up (%04x)\n", + slot->id, val); + pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL); + return OPAL_SUCCESS; + } + + return pci_slot_set_sm_timeout(slot, msecs_to_tb(20)); + default: + prlog(PR_ERR, PCIE_SLOT_PREFIX + "Link: Unexpected slot state %08x\n", + slot->id, slot->state); + } + + pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL); + return OPAL_HARDWARE; +} + +static void pcie_slot_reset(struct pci_slot *slot, bool assert) +{ + struct phb *phb = slot->phb; + struct pci_device *pd = slot->pd; + uint16_t ctl; + + pci_cfg_read16(phb, pd->bdfn, PCI_CFG_BRCTL, &ctl); + if (assert) + ctl |= PCI_CFG_BRCTL_SECONDARY_RESET; + else + ctl &= ~PCI_CFG_BRCTL_SECONDARY_RESET; + pci_cfg_write16(phb, pd->bdfn, PCI_CFG_BRCTL, ctl); +} + +static int64_t pcie_slot_sm_hreset(struct pci_slot *slot) +{ + switch (slot->state) { + case PCI_SLOT_STATE_NORMAL: + PCIE_SLOT_DBG(slot, "HRESET: Starts\n"); + if (slot->ops.prepare_link_change) { + PCIE_SLOT_DBG(slot, "HRESET: Prepare for link down\n"); + slot->ops.prepare_link_change(slot, false); + } + /* fall through */ + case PCI_SLOT_STATE_HRESET_START: + PCIE_SLOT_DBG(slot, "HRESET: Assert\n"); + pcie_slot_reset(slot, true); + pci_slot_set_state(slot, PCI_SLOT_STATE_HRESET_HOLD); + return pci_slot_set_sm_timeout(slot, msecs_to_tb(250)); + case PCI_SLOT_STATE_HRESET_HOLD: + PCIE_SLOT_DBG(slot, "HRESET: Deassert\n"); + pcie_slot_reset(slot, false); + pci_slot_set_state(slot, PCI_SLOT_STATE_LINK_START_POLL); + return pci_slot_set_sm_timeout(slot, msecs_to_tb(1800)); + default: + PCIE_SLOT_DBG(slot, "HRESET: Unexpected slot state %08x\n", + slot->state); + } + + pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL); + return OPAL_HARDWARE; +} + +/* + * Usually, individual platforms need to override the power + * management methods for fundamental reset, but the hot + * reset method is commonly shared. + */ +static int64_t pcie_slot_sm_freset(struct pci_slot *slot) +{ + uint8_t power_state = PCI_SLOT_POWER_ON; + + switch (slot->state) { + case PCI_SLOT_STATE_NORMAL: + PCIE_SLOT_DBG(slot, "FRESET: Starts\n"); + if (slot->ops.prepare_link_change) + slot->ops.prepare_link_change(slot, false); + + /* Retrieve power state */ + if (slot->ops.get_power_state) { + PCIE_SLOT_DBG(slot, "FRESET: Retrieve power state\n"); + slot->ops.get_power_state(slot, &power_state); + } + + /* In power on state, power it off */ + if (power_state == PCI_SLOT_POWER_ON) { + PCIE_SLOT_DBG(slot, "FRESET: Power is on, turn off\n"); + pcie_slot_set_power_state_ext(slot, + PCI_SLOT_POWER_OFF, false); + pci_slot_set_state(slot, + PCI_SLOT_STATE_FRESET_POWER_OFF); + return pci_slot_set_sm_timeout(slot, msecs_to_tb(50)); + } + /* No power state change, */ + /* fallthrough */ + case PCI_SLOT_STATE_FRESET_POWER_OFF: + PCIE_SLOT_DBG(slot, "FRESET: Power is off, turn on\n"); + pcie_slot_set_power_state_ext(slot, PCI_SLOT_POWER_ON, false); + + pci_slot_set_state(slot, PCI_SLOT_STATE_LINK_START_POLL); + return pci_slot_set_sm_timeout(slot, msecs_to_tb(50)); + default: + prlog(PR_ERR, PCIE_SLOT_PREFIX + "FRESET: Unexpected slot state %08x\n", + slot->id, slot->state); + } + + pci_slot_set_state(slot, PCI_SLOT_STATE_NORMAL); + return OPAL_HARDWARE; +} + +struct pci_slot *pcie_slot_create(struct phb *phb, struct pci_device *pd) +{ + struct pci_slot *slot; + uint32_t ecap; + uint16_t slot_ctl; + + /* Allocate PCI slot */ + slot = pci_slot_alloc(phb, pd); + if (!slot) + return NULL; + + /* Cache the link and slot capabilities */ + ecap = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false); + pci_cfg_read16(phb, pd->bdfn, ecap + PCICAP_EXP_CAPABILITY_REG, + &slot->pcie_cap); + pci_cfg_read32(phb, pd->bdfn, ecap + PCICAP_EXP_LCAP, + &slot->link_cap); + + /* Leave PCI slot capability blank if PCI slot isn't supported */ + if (slot->pcie_cap & PCICAP_EXP_CAP_SLOT) + pci_cfg_read32(phb, pd->bdfn, ecap + PCICAP_EXP_SLOTCAP, + &slot->slot_cap); + else + slot->slot_cap = 0; + + if (slot->slot_cap & PCICAP_EXP_SLOTCAP_HPLUG_CAP) + slot->pluggable = 1; + + /* Assume the slot is powered on by default */ + slot->power_state = PCI_SLOT_POWER_ON; + if (slot->slot_cap & PCICAP_EXP_SLOTCAP_PWCTRL) { + slot->power_ctl = 1; + + pci_cfg_read16(phb, pd->bdfn, ecap + PCICAP_EXP_SLOTCTL, + &slot_ctl); + if (slot_ctl & PCICAP_EXP_SLOTCTL_PWRCTLR) + slot->power_state = PCI_SLOT_POWER_OFF; + } + + if (slot->slot_cap & PCICAP_EXP_SLOTCAP_PWRI) + slot->power_led_ctl = PCI_SLOT_PWR_LED_CTL_KERNEL; + if (slot->slot_cap & PCICAP_EXP_SLOTCAP_ATTNI) + slot->attn_led_ctl = PCI_SLOT_ATTN_LED_CTL_KERNEL; + slot->wired_lanes = ((slot->link_cap & PCICAP_EXP_LCAP_MAXWDTH) >> 4); + + /* The surprise hotplug capability is claimed when it's supported + * in the slot's capability bits or link state change reporting is + * supported in PCIe link capability. It means the surprise hotplug + * relies on presence or link state change events. In order for the + * link state change event to be properly raised during surprise hot + * add/remove, the power supply to the slot should be always on. + * + * For PCI slots that don't claim surprise hotplug capability explicitly. + * Its PDC (Presence Detection Change) isn't reliable. To mark that as + * broken on them. + */ + if (slot->pcie_cap & PCICAP_EXP_CAP_SLOT) { + if (slot->slot_cap & PCICAP_EXP_SLOTCAP_HPLUG_SURP) { + slot->surprise_pluggable = 1; + } else if (slot->link_cap & PCICAP_EXP_LCAP_DL_ACT_REP) { + slot->surprise_pluggable = 1; + + pci_slot_add_flags(slot, PCI_SLOT_FLAG_BROKEN_PDC); + } + } + + /* Standard slot operations */ + slot->ops.get_presence_state = pcie_slot_get_presence_state; + slot->ops.get_link_state = pcie_slot_get_link_state; + slot->ops.get_power_state = pcie_slot_get_power_state; + slot->ops.get_attention_state = pcie_slot_get_attention_state; + slot->ops.get_latch_state = pcie_slot_get_latch_state; + slot->ops.set_power_state = pcie_slot_set_power_state; + slot->ops.set_attention_state = pcie_slot_set_attention_state; + + /* + * State machine (SM) based reset stuff. The poll function is always + * unified for all cases. + */ + slot->ops.poll_link = pcie_slot_sm_poll_link; + slot->ops.hreset = pcie_slot_sm_hreset; + slot->ops.freset = pcie_slot_sm_freset; + + slot->wired_lanes = PCI_SLOT_WIRED_LANES_UNKNOWN; + slot->connector_type = PCI_SLOT_CONNECTOR_PCIE_NS; + slot->card_desc = PCI_SLOT_DESC_NON_STANDARD; + slot->card_mech = PCI_SLOT_MECH_NONE; + slot->power_led_ctl = PCI_SLOT_PWR_LED_CTL_NONE; + slot->attn_led_ctl = PCI_SLOT_ATTN_LED_CTL_NONE; + + return slot; +} + +/* FIXME: this is kind of insane */ +struct pci_slot *pcie_slot_create_dynamic(struct phb *phb, + struct pci_device *pd) +{ + uint32_t ecap, val; + struct pci_slot *slot; + + if (!phb || !pd || pd->slot) + return NULL; + + /* Try to create slot whose details aren't provided by platform. */ + if (pd->dev_type != PCIE_TYPE_SWITCH_DNPORT) + return NULL; + + ecap = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false); + pci_cfg_read32(phb, pd->bdfn, ecap + PCICAP_EXP_SLOTCAP, &val); + if (!(val & PCICAP_EXP_SLOTCAP_HPLUG_CAP)) + return NULL; + + slot = pcie_slot_create(phb, pd); + + /* On superMicro's "p8dnu" platform, we create dynamic PCI slots + * for all downstream ports of PEX9733 that is connected to PHB + * direct slot. The power supply to the PCI slot is lost after + * PCI adapter is removed from it. The power supply can't be + * turned on when the slot is in empty state. The power supply + * isn't turned on automatically when inserting PCI adapter to + * the slot at later point. We set a flag to the slot here, to + * turn on the power supply in (suprise or managed) hot-add path. + * + * We have same issue with PEX8718 as above on "p8dnu" platform. + */ + if (dt_node_is_compatible(dt_root, "supermicro,p8dnu") && slot && + slot->pd && (slot->pd->vdid == 0x973310b5 || + slot->pd->vdid == 0x871810b5)) + pci_slot_add_flags(slot, PCI_SLOT_FLAG_FORCE_POWERON); + + return slot; +} diff --git a/roms/skiboot/core/pel.c b/roms/skiboot/core/pel.c new file mode 100644 index 000000000..ec13e5590 --- /dev/null +++ b/roms/skiboot/core/pel.c @@ -0,0 +1,279 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Platform Error Log (PEL) generation + * + * Copyright 2014-2016 IBM Corp + */ + +#include <string.h> +#include <errorlog.h> +#include <device.h> +#include <fsp.h> +#include <pel.h> +#include <rtc.h> + +/* Create MTMS section for sapphire log */ +static void create_mtms_section(struct errorlog *elog_data, + char *pel_buffer, int *pel_offset) +{ + const struct dt_property *p; + + struct opal_mtms_section *mtms = (struct opal_mtms_section *) + (pel_buffer + *pel_offset); + + mtms->v6header.id = cpu_to_be16(ELOG_SID_MACHINE_TYPE); + mtms->v6header.length = cpu_to_be16(MTMS_SECTION_SIZE); + mtms->v6header.version = OPAL_EXT_HRD_VER; + mtms->v6header.subtype = 0; + mtms->v6header.component_id = cpu_to_be16(elog_data->component_id); + + memset(mtms->model, 0x00, sizeof(mtms->model)); + memcpy(mtms->model, dt_prop_get(dt_root, "model"), OPAL_SYS_MODEL_LEN); + + memset(mtms->serial_no, 0x00, sizeof(mtms->serial_no)); + p = dt_find_property(dt_root, "system-id"); + if (p) + memcpy(mtms->serial_no, p->prop, OPAL_SYS_SERIAL_LEN); + else + memset(mtms->serial_no, 0, OPAL_SYS_SERIAL_LEN); + + *pel_offset += MTMS_SECTION_SIZE; +} + +/* Create extended header section */ +static void create_extended_header_section(struct errorlog *elog_data, + char *pel_buffer, int *pel_offset) +{ + const char *opalmodel = NULL; + const struct dt_property *p; + uint64_t extd_time; + uint32_t extd_date; + + struct opal_extended_header_section *extdhdr = + (struct opal_extended_header_section *) + (pel_buffer + *pel_offset); + + extdhdr->v6header.id = cpu_to_be16(ELOG_SID_EXTENDED_HEADER); + extdhdr->v6header.length = cpu_to_be16(EXTENDED_HEADER_SECTION_SIZE); + extdhdr->v6header.version = OPAL_EXT_HRD_VER; + extdhdr->v6header.subtype = 0; + extdhdr->v6header.component_id = cpu_to_be16(elog_data->component_id); + + memset(extdhdr->model, 0x00, sizeof(extdhdr->model)); + opalmodel = dt_prop_get(dt_root, "model"); + memcpy(extdhdr->model, opalmodel, OPAL_SYS_MODEL_LEN); + + memset(extdhdr->serial_no, 0x00, sizeof(extdhdr->serial_no)); + p = dt_find_property(dt_root, "system-id"); + if (p) + memcpy(extdhdr->serial_no, p->prop, OPAL_SYS_SERIAL_LEN); + else + memset(extdhdr->serial_no, 0, OPAL_SYS_SERIAL_LEN); + + memset(extdhdr->opal_release_version, 0x00, + sizeof(extdhdr->opal_release_version)); + memset(extdhdr->opal_subsys_version, 0x00, + sizeof(extdhdr->opal_subsys_version)); + + rtc_cache_get_datetime(&extd_date, &extd_time); + extdhdr->extended_header_date = cpu_to_be32(extd_date); + extdhdr->extended_header_time = cpu_to_be32(extd_time >> 32); + extdhdr->opal_symid_len = 0; + + *pel_offset += EXTENDED_HEADER_SECTION_SIZE; +} + +/* set src type */ +static void settype(struct opal_src_section *src, uint8_t src_type) +{ + char type[4]; + snprintf(type, sizeof(type), "%02X", src_type); + memcpy(src->srcstring, type, 2); +} + +/* set SRC subsystem type */ +static void setsubsys(struct opal_src_section *src, uint8_t src_subsys) +{ + char subsys[4]; + snprintf(subsys, sizeof(subsys), "%02X", src_subsys); + memcpy(src->srcstring+2, subsys, 2); +} + +/* Ser reason code of SRC */ +static void setrefcode(struct opal_src_section *src, uint16_t src_refcode) +{ + char refcode[8]; + snprintf(refcode, sizeof(refcode), "%04X", src_refcode); + memcpy(src->srcstring+4, refcode, 4); +} + +/* Create SRC section of OPAL log */ +static void create_src_section(struct errorlog *elog_data, + char *pel_buffer, int *pel_offset) +{ + struct opal_src_section *src = (struct opal_src_section *) + (pel_buffer + *pel_offset); + + src->v6header.id = cpu_to_be16(ELOG_SID_PRIMARY_SRC); + src->v6header.length = cpu_to_be16(SRC_SECTION_SIZE); + src->v6header.version = OPAL_ELOG_VERSION; + src->v6header.subtype = OPAL_ELOG_SST; + src->v6header.component_id = cpu_to_be16(elog_data->component_id); + + src->version = OPAL_SRC_SEC_VER; + src->flags = 0; + src->wordcount = OPAL_SRC_MAX_WORD_COUNT; + src->srclength = cpu_to_be16(SRC_LENGTH); + settype(src, OPAL_SRC_TYPE_ERROR); + setsubsys(src, OPAL_FAILING_SUBSYSTEM); + setrefcode(src, elog_data->reason_code); + memset(src->hexwords, 0 , (8 * 4)); + src->hexwords[0] = cpu_to_be32(OPAL_SRC_FORMAT); + src->hexwords[4] = cpu_to_be32(elog_data->additional_info[0]); + src->hexwords[5] = cpu_to_be32(elog_data->additional_info[1]); + src->hexwords[6] = cpu_to_be32(elog_data->additional_info[2]); + src->hexwords[7] = cpu_to_be32(elog_data->additional_info[3]); + *pel_offset += SRC_SECTION_SIZE; +} + +/* Create user header section */ +static void create_user_header_section(struct errorlog *elog_data, + char *pel_buffer, int *pel_offset) +{ + struct opal_user_header_section *usrhdr = + (struct opal_user_header_section *) + (pel_buffer + *pel_offset); + + usrhdr->v6header.id = cpu_to_be16(ELOG_SID_USER_HEADER); + usrhdr->v6header.length = cpu_to_be16(USER_HEADER_SECTION_SIZE); + usrhdr->v6header.version = OPAL_ELOG_VERSION; + usrhdr->v6header.subtype = OPAL_ELOG_SST; + usrhdr->v6header.component_id = cpu_to_be16(elog_data->component_id); + + usrhdr->subsystem_id = elog_data->subsystem_id; + usrhdr->event_scope = 0; + usrhdr->event_severity = elog_data->event_severity; + usrhdr->event_type = elog_data->event_subtype; + + if (elog_data->elog_origin == ORG_SAPPHIRE) + usrhdr->action_flags = cpu_to_be16(ERRL_ACTION_REPORT); + else + usrhdr->action_flags = cpu_to_be16(ERRL_ACTION_NONE); + + *pel_offset += USER_HEADER_SECTION_SIZE; +} + +/* Create private header section */ +static void create_private_header_section(struct errorlog *elog_data, + char *pel_buffer, int *pel_offset) +{ + uint64_t ctime; + uint32_t cdate; + struct opal_private_header_section *privhdr = + (struct opal_private_header_section *) + pel_buffer; + + privhdr->v6header.id = cpu_to_be16(ELOG_SID_PRIVATE_HEADER); + privhdr->v6header.length = cpu_to_be16(PRIVATE_HEADER_SECTION_SIZE); + privhdr->v6header.version = OPAL_ELOG_VERSION; + privhdr->v6header.subtype = OPAL_ELOG_SST; + privhdr->v6header.component_id = cpu_to_be16(elog_data->component_id); + privhdr->plid = cpu_to_be32(elog_data->plid); + + rtc_cache_get_datetime(&cdate, &ctime); + privhdr->create_date = cpu_to_be32(cdate); + privhdr->create_time = cpu_to_be32(ctime >> 32); + privhdr->section_count = 5; + + privhdr->creator_subid_hi = 0x00; + privhdr->creator_subid_lo = 0x00; + + if (elog_data->elog_origin == ORG_SAPPHIRE) + privhdr->creator_id = OPAL_CID_SAPPHIRE; + else + privhdr->creator_id = OPAL_CID_POWERNV; + + privhdr->log_entry_id = cpu_to_be32(elog_data->plid); /*entry id is updated by FSP*/ + + *pel_offset += PRIVATE_HEADER_SECTION_SIZE; +} + +static void create_user_defined_section(struct errorlog *elog_data, + char *pel_buffer, int *pel_offset) +{ + char *dump = (char *)pel_buffer + *pel_offset; + char *opal_buf = (char *)elog_data->user_data_dump; + struct opal_user_section *usrhdr; + struct elog_user_data_section *opal_usr_data; + struct opal_private_header_section *privhdr = + (struct opal_private_header_section *)pel_buffer; + int i; + + for (i = 0; i < elog_data->user_section_count; i++) { + + usrhdr = (struct opal_user_section *)dump; + opal_usr_data = (struct elog_user_data_section *)opal_buf; + + usrhdr->v6header.id = cpu_to_be16(ELOG_SID_USER_DEFINED); + usrhdr->v6header.length = cpu_to_be16( + sizeof(struct opal_v6_header) + + be16_to_cpu(opal_usr_data->size)); + usrhdr->v6header.version = OPAL_ELOG_VERSION; + usrhdr->v6header.subtype = OPAL_ELOG_SST; + usrhdr->v6header.component_id = cpu_to_be16(elog_data->component_id); + + memcpy(usrhdr->dump, opal_buf, be16_to_cpu(opal_usr_data->size)); + *pel_offset += be16_to_cpu(usrhdr->v6header.length); + dump += be16_to_cpu(usrhdr->v6header.length); + opal_buf += be16_to_cpu(opal_usr_data->size); + privhdr->section_count++; + } +} + +static size_t pel_user_section_size(struct errorlog *elog_data) +{ + int i; + size_t total = 0; + char *opal_buf = (char *)elog_data->user_data_dump; + struct elog_user_data_section *opal_usr_data; + + for (i = 0; i < elog_data->user_section_count; i++) { + u16 s; + + opal_usr_data = (struct elog_user_data_section *)opal_buf; + s = be16_to_cpu(opal_usr_data->size); + total += sizeof(struct opal_v6_header) + s; + opal_buf += s; + } + + return total; +} + +size_t pel_size(struct errorlog *elog_data) +{ + return PEL_MIN_SIZE + pel_user_section_size(elog_data); +} + +/* Converts an OPAL errorlog into a PEL formatted log */ +int create_pel_log(struct errorlog *elog_data, char *pel_buffer, + size_t pel_buffer_size) +{ + int pel_offset = 0; + + if (pel_buffer_size < pel_size(elog_data)) { + prerror("PEL buffer too small to create record\n"); + return 0; + } + + memset(pel_buffer, 0, pel_buffer_size); + + create_private_header_section(elog_data, pel_buffer, &pel_offset); + create_user_header_section(elog_data, pel_buffer, &pel_offset); + create_src_section(elog_data, pel_buffer, &pel_offset); + create_extended_header_section(elog_data, pel_buffer, &pel_offset); + create_mtms_section(elog_data, pel_buffer, &pel_offset); + if (elog_data->user_section_count) + create_user_defined_section(elog_data, pel_buffer, &pel_offset); + + return pel_offset; +} diff --git a/roms/skiboot/core/platform.c b/roms/skiboot/core/platform.c new file mode 100644 index 000000000..320fdea03 --- /dev/null +++ b/roms/skiboot/core/platform.c @@ -0,0 +1,319 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * OPAL Platform abstraction + * + * Some OPAL calls may/may not call into the struct platform that's + * probed during boot. There's also a bunch of platform specific init + * and configuration that's called. + * + * Copyright 2013-2019 IBM Corp. + */ + +#include <stdlib.h> +#include <skiboot.h> +#include <opal.h> +#include <console.h> +#include <timebase.h> +#include <cpu.h> +#include <chip.h> +#include <xscom.h> +#include <errorlog.h> +#include <bt.h> +#include <nvram.h> +#include <npu2.h> +#include <platforms/astbmc/astbmc.h> + +bool manufacturing_mode = false; +struct platform platform; + +DEFINE_LOG_ENTRY(OPAL_RC_ABNORMAL_REBOOT, OPAL_PLATFORM_ERR_EVT, OPAL_CEC, + OPAL_CEC_HARDWARE, OPAL_ERROR_PANIC, + OPAL_ABNORMAL_POWER_OFF); + +/* + * Various wrappers for platform functions + */ +static int64_t opal_cec_power_down(uint64_t request) +{ + prlog(PR_NOTICE, "OPAL: Shutdown request type 0x%llx...\n", request); + + opal_quiesce(QUIESCE_HOLD, -1); + + console_complete_flush(); + + if (platform.cec_power_down) + return platform.cec_power_down(request); + + return OPAL_SUCCESS; +} +opal_call(OPAL_CEC_POWER_DOWN, opal_cec_power_down, 1); + +static int64_t full_reboot(void) +{ + prlog(PR_NOTICE, "OPAL: Reboot request...\n"); + + console_complete_flush(); + + if (platform.cec_reboot) + return platform.cec_reboot(); + + return OPAL_SUCCESS; +} + +static int64_t opal_cec_reboot(void) +{ + opal_quiesce(QUIESCE_HOLD, -1); + + /* + * Fast-reset was enabled by default for a long time in an attempt to + * make it more stable by exercising it more frequently. This resulted + * in a fair amount of pain due to mis-behaving hardware and confusion + * about what a "reset" is supposed to do exactly. Additionally, + * secure variables require a full reboot to work at all. + * + * Due to all that fast-reset should only be used if it's explicitly + * enabled. It started life as a debug hack and should remain one. + */ + if (nvram_query_eq_safe("fast-reset", "1")) + fast_reboot(); + + return full_reboot(); +} +opal_call(OPAL_CEC_REBOOT, opal_cec_reboot, 0); + +static int64_t opal_cec_reboot2(uint32_t reboot_type, char *diag) +{ + struct errorlog *buf; + + opal_quiesce(QUIESCE_HOLD, -1); + + switch (reboot_type) { + case OPAL_REBOOT_NORMAL: + return opal_cec_reboot(); + case OPAL_REBOOT_PLATFORM_ERROR: + prlog(PR_EMERG, + "OPAL: Reboot requested due to Platform error.\n"); + buf = opal_elog_create(&e_info(OPAL_RC_ABNORMAL_REBOOT), 0); + if (buf) { + log_append_msg(buf, + "OPAL: Reboot requested due to Platform error."); + if (diag) { + /* Add user section "DESC" */ + log_add_section(buf, OPAL_ELOG_SEC_DESC); + log_append_data(buf, diag, strlen(diag)); + } + log_commit(buf); + } else { + prerror("OPAL: failed to log an error\n"); + } + disable_fast_reboot("Reboot due to Platform Error"); + console_complete_flush(); + return xscom_trigger_xstop(); + case OPAL_REBOOT_FULL_IPL: + prlog(PR_NOTICE, "Reboot: Full reboot requested"); + return full_reboot(); + case OPAL_REBOOT_MPIPL: + prlog(PR_NOTICE, "Reboot: OS reported error. Performing MPIPL\n"); + console_complete_flush(); + if (platform.terminate) + platform.terminate("OS reported error. Performing MPIPL\n"); + else + full_reboot(); + for (;;); + break; + case OPAL_REBOOT_FAST: + prlog(PR_NOTICE, "Reboot: Fast reboot requested by OS\n"); + fast_reboot(); + prlog(PR_NOTICE, "Reboot: Fast reboot failed\n"); + return OPAL_UNSUPPORTED; + default: + prlog(PR_NOTICE, "OPAL: Unsupported reboot request %d\n", reboot_type); + return OPAL_UNSUPPORTED; + break; + } + return OPAL_SUCCESS; +} +opal_call(OPAL_CEC_REBOOT2, opal_cec_reboot2, 2); + +static bool generic_platform_probe(void) +{ + if (dt_find_by_path(dt_root, "bmc")) { + /* We appear to have a BMC... so let's cross our fingers + * and see if we can do anything! + */ + prlog(PR_ERR, "GENERIC BMC PLATFORM: **GUESSING** that there's " + "*maybe* a BMC we can talk to.\n"); + prlog(PR_ERR, "THIS IS ****UNSUPPORTED****, BRINGUP USE ONLY.\n"); + astbmc_early_init(); + } else { + uart_init(); + } + + return true; +} + +static void generic_platform_init(void) +{ + if (uart_enabled()) + set_opal_console(&uart_opal_con); + + if (dt_find_by_path(dt_root, "bmc")) { + prlog(PR_ERR, "BMC-GUESSWORK: Here be dragons with a taste for human flesh\n"); + astbmc_init(); + } else { + /* Otherwise we go down the ultra-minimal path */ + + /* Enable a BT interface if we find one too */ + bt_init(); + } + + /* Fake a real time clock */ + fake_rtc_init(); +} + +static int64_t generic_cec_power_down(uint64_t request __unused) +{ + return OPAL_UNSUPPORTED; +} + +static int generic_resource_loaded(enum resource_id id, uint32_t subid) +{ + if (dt_find_by_path(dt_root, "bmc")) + return flash_resource_loaded(id, subid); + + return OPAL_EMPTY; +} + +static int generic_start_preload_resource(enum resource_id id, uint32_t subid, + void *buf, size_t *len) +{ + if (dt_find_by_path(dt_root, "bmc")) + return flash_start_preload_resource(id, subid, buf, len); + + return OPAL_EMPTY; +} + +/* These values will work for a ZZ booted using BML */ +static const struct platform_ocapi generic_ocapi = { + .i2c_engine = 1, + .i2c_port = 4, + .i2c_reset_addr = 0x20, + .i2c_reset_brick2 = (1 << 1), + .i2c_reset_brick3 = (1 << 6), + .i2c_reset_brick4 = 0, /* unused */ + .i2c_reset_brick5 = 0, /* unused */ + .i2c_presence_addr = 0x20, + .i2c_presence_brick2 = (1 << 2), /* bottom connector */ + .i2c_presence_brick3 = (1 << 7), /* top connector */ + .i2c_presence_brick4 = 0, /* unused */ + .i2c_presence_brick5 = 0, /* unused */ + .odl_phy_swap = true, +}; + +static struct bmc_platform generic_bmc = { + .name = "generic", +}; + +static struct platform generic_platform = { + .name = "generic", + .bmc = &generic_bmc, + .probe = generic_platform_probe, + .init = generic_platform_init, + .nvram_info = fake_nvram_info, + .nvram_start_read = fake_nvram_start_read, + .nvram_write = fake_nvram_write, + .cec_power_down = generic_cec_power_down, + .start_preload_resource = generic_start_preload_resource, + .resource_loaded = generic_resource_loaded, + .ocapi = &generic_ocapi, + .npu2_device_detect = npu2_i2c_presence_detect, /* Assumes ZZ */ +}; + +const struct bmc_platform *bmc_platform = &generic_bmc; + +void set_bmc_platform(const struct bmc_platform *bmc) +{ + if (bmc) + prlog(PR_NOTICE, "PLAT: Detected BMC platform %s\n", bmc->name); + else + bmc = &generic_bmc; + + bmc_platform = bmc; +} + +void probe_platform(void) +{ + struct platform *platforms = &__platforms_start; + unsigned int i; + + /* Detect Manufacturing mode */ + if (dt_find_property(dt_root, "ibm,manufacturing-mode")) { + /** + * @fwts-label ManufacturingMode + * @fwts-advice You are running in manufacturing mode. + * This mode should only be enabled in a factory during + * manufacturing. + */ + prlog(PR_NOTICE, "PLAT: Manufacturing mode ON\n"); + manufacturing_mode = true; + } + + for (i = 0; &platforms[i] < &__platforms_end; i++) { + if (platforms[i].probe && platforms[i].probe()) { + platform = platforms[i]; + break; + } + } + if (!platform.name) { + platform = generic_platform; + if (platform.probe) + platform.probe(); + } + + prlog(PR_NOTICE, "PLAT: Detected %s platform\n", platform.name); + + set_bmc_platform(platform.bmc); +} + + +int start_preload_resource(enum resource_id id, uint32_t subid, + void *buf, size_t *len) +{ + if (!platform.start_preload_resource) + return OPAL_UNSUPPORTED; + + return platform.start_preload_resource(id, subid, buf, len); +} + +int resource_loaded(enum resource_id id, uint32_t idx) +{ + if (!platform.resource_loaded) + return OPAL_SUCCESS; + + return platform.resource_loaded(id, idx); +} + +int wait_for_resource_loaded(enum resource_id id, uint32_t idx) +{ + int r = resource_loaded(id, idx); + int waited = 0; + + while(r == OPAL_BUSY) { + opal_run_pollers(); + r = resource_loaded(id, idx); + if (r != OPAL_BUSY) + break; + time_wait_ms_nopoll(5); + waited+=5; + } + + prlog(PR_TRACE, "PLATFORM: wait_for_resource_loaded %x/%x %u ms\n", + id, idx, waited); + return r; +} + +void op_display(enum op_severity sev, enum op_module mod, uint16_t code) +{ + if (platform.op_display) + platform.op_display(sev, mod, code); +} diff --git a/roms/skiboot/core/pool.c b/roms/skiboot/core/pool.c new file mode 100644 index 000000000..a0283199a --- /dev/null +++ b/roms/skiboot/core/pool.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * This file provides some functions to manage a pool of pre-allocated + * objects. It also provides a method to reserve a pre-defined number + * of objects for higher priorty requests. The allocations follow the + * following rules: + * + * 1. An allocation will succeed at any priority if there is more than + * the reserved number of objects free. + * 2. Only high priority allocations will succeed when there are less + * than the reserved number of objects free. + * 3. When an allocation is freed it is always added to the high priority + * pool if there are less than the reserved number of allocations + * available. + * + * Copyright 2013-2014 IBM Corp. + */ + +#include <pool.h> +#include <string.h> +#include <stdlib.h> +#include <ccan/list/list.h> + +void* pool_get(struct pool *pool, enum pool_priority priority) +{ + void *obj; + + if (!pool->free_count || + ((pool->free_count <= pool->reserved) && priority == POOL_NORMAL)) + return NULL; + + pool->free_count--; + obj = (void *) list_pop_(&pool->free_list, 0); + assert(obj); + memset(obj, 0, pool->obj_size); + return obj; +} + +void pool_free_object(struct pool *pool, void *obj) +{ + pool->free_count++; + list_add_tail(&pool->free_list, + (struct list_node *) (obj)); +} + +int pool_init(struct pool *pool, size_t obj_size, int count, int reserved) +{ + int i; + + if (obj_size < sizeof(struct list_node)) + obj_size = sizeof(struct list_node); + + assert(count >= reserved); + pool->buf = malloc(obj_size*count); + if (!pool->buf) + return -1; + + pool->obj_size = obj_size; + pool->free_count = count; + pool->reserved = reserved; + list_head_init(&pool->free_list); + + for(i = 0; i < count; i++) + list_add_tail(&pool->free_list, + (struct list_node *) (pool->buf + obj_size*i)); + + return 0; +} diff --git a/roms/skiboot/core/powercap.c b/roms/skiboot/core/powercap.c new file mode 100644 index 000000000..6ae58eb86 --- /dev/null +++ b/roms/skiboot/core/powercap.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * OPAL calls to get/set power caps + * + * Copyright 2017 IBM Corp. + */ + +#include <powercap.h> + +static int opal_get_powercap(u32 handle, int token __unused, __be32 *__pcap) +{ + if (!__pcap || !opal_addr_valid(__pcap)) + return OPAL_PARAMETER; + + if (powercap_get_class(handle) == POWERCAP_CLASS_OCC) { + u32 pcap; + int rc; + + rc = occ_get_powercap(handle, &pcap); + *__pcap = cpu_to_be32(pcap); + return rc; + } + + return OPAL_UNSUPPORTED; +}; + +opal_call(OPAL_GET_POWERCAP, opal_get_powercap, 3); + +static int opal_set_powercap(u32 handle, int token, u32 pcap) +{ + if (powercap_get_class(handle) == POWERCAP_CLASS_OCC) + return occ_set_powercap(handle, token, pcap); + + return OPAL_UNSUPPORTED; +}; + +opal_call(OPAL_SET_POWERCAP, opal_set_powercap, 3); diff --git a/roms/skiboot/core/psr.c b/roms/skiboot/core/psr.c new file mode 100644 index 000000000..75ccc6617 --- /dev/null +++ b/roms/skiboot/core/psr.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * OPAL calls to get/set Power Shift Ratio (PSR) + * + * i.e. when something has to be throttled, what gets throttled? + * + * Copyright 2017 IBM Corp. + */ + +#include <psr.h> + +static int opal_get_power_shift_ratio(u32 handle, int token __unused, + __be32 *__ratio) +{ + if (!__ratio || !opal_addr_valid(__ratio)) + return OPAL_PARAMETER; + + if (psr_get_class(handle) == PSR_CLASS_OCC) { + u32 ratio; + int rc; + + rc = occ_get_psr(handle, &ratio); + *__ratio = cpu_to_be32(ratio); + return rc; + } + + return OPAL_UNSUPPORTED; +}; + +opal_call(OPAL_GET_POWER_SHIFT_RATIO, opal_get_power_shift_ratio, 3); + +static int opal_set_power_shift_ratio(u32 handle, int token, + u32 ratio) +{ + if (psr_get_class(handle) == PSR_CLASS_OCC) + return occ_set_psr(handle, token, ratio); + + return OPAL_UNSUPPORTED; +}; + +opal_call(OPAL_SET_POWER_SHIFT_RATIO, opal_set_power_shift_ratio, 3); diff --git a/roms/skiboot/core/relocate.c b/roms/skiboot/core/relocate.c new file mode 100644 index 000000000..6295927e2 --- /dev/null +++ b/roms/skiboot/core/relocate.c @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Relocate ourselves + * + * WARNING: This code is used to self-relocate, it cannot have any + * global reference nor TOC reference. It's also called before BSS + * is cleared. + * + * Copyright 2013-2015 IBM Corp. + */ + +#include <stdbool.h> +#include <elf.h> + +/* Called from head.S, thus no header. */ +int relocate(uint64_t offset, struct elf64_dyn *dyn, struct elf64_rela *rela); + +/* Note: This code is simplified according to the assumptions + * that our link address is 0 and we are running at the + * target address already. + */ +int relocate(uint64_t offset, struct elf64_dyn *dyn, struct elf64_rela *rela) +{ + uint64_t dt_rela = 0; + uint64_t dt_relacount = 0; + unsigned int i; + + /* Look for relocation table */ + for (; dyn->d_tag != DT_NULL; dyn++) { + if (dyn->d_tag == DT_RELA) + dt_rela = dyn->d_val; + else if (dyn->d_tag == DT_RELACOUNT) + dt_relacount = dyn->d_val; + } + + /* If we miss either rela or relacount, bail */ + if (!dt_rela || !dt_relacount) + return -1; + + /* Check if the offset is consistent */ + if ((offset + dt_rela) != (uint64_t)rela) + return -2; + + /* Perform relocations */ + for (i = 0; i < dt_relacount; i++, rela++) { + uint64_t *t; + + if (ELF64_R_TYPE(rela->r_info) != R_PPC64_RELATIVE) + return -3; + t = (uint64_t *)(rela->r_offset + offset); + *t = rela->r_addend + offset; + } + + return 0; +} diff --git a/roms/skiboot/core/rtc.c b/roms/skiboot/core/rtc.c new file mode 100644 index 000000000..3c0dda71e --- /dev/null +++ b/roms/skiboot/core/rtc.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Real Time Clock (RTC) Cache + * + * Copyright 2013-2014 IBM Corp. + */ + +#include <skiboot.h> +#include <lock.h> +#include <rtc.h> +#include <timebase.h> + +static struct lock rtc_tod_lock = LOCK_UNLOCKED; + +static struct { + struct tm tm; + unsigned long tb; + bool valid; +} rtc_tod_cache; + +void rtc_cache_update(struct tm *tm) +{ + lock(&rtc_tod_lock); + rtc_tod_cache.tb = mftb(); + rtc_tod_cache.tm = *tm; + rtc_tod_cache.valid = true; + unlock(&rtc_tod_lock); +} + +int rtc_cache_get(struct tm *tm) +{ + unsigned long cache_age_sec; + + lock(&rtc_tod_lock); + + if (!rtc_tod_cache.valid) { + unlock(&rtc_tod_lock); + return -1; + } + + cache_age_sec = tb_to_msecs(mftb() - rtc_tod_cache.tb) / 1000; + *tm = rtc_tod_cache.tm; + unlock(&rtc_tod_lock); + + tm->tm_sec += cache_age_sec; + mktime(tm); + + return 0; +} + +int rtc_cache_get_datetime(uint32_t *year_month_day, + uint64_t *hour_minute_second_millisecond) +{ + struct tm tm; + + if (rtc_cache_get(&tm) < 0) + return -1; + + tm_to_datetime(&tm, year_month_day, hour_minute_second_millisecond); + + return 0; +} diff --git a/roms/skiboot/core/sensor.c b/roms/skiboot/core/sensor.c new file mode 100644 index 000000000..303d867e2 --- /dev/null +++ b/roms/skiboot/core/sensor.c @@ -0,0 +1,152 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * OPAL Sensor APIs + * + * Copyright 2013-2018 IBM Corp. + */ + +#include <sensor.h> +#include <skiboot.h> +#include <device.h> +#include <opal.h> +#include <dts.h> +#include <lock.h> +#include <occ.h> + +struct dt_node *sensor_node; + +static struct lock async_read_list_lock = LOCK_UNLOCKED; +static LIST_HEAD(async_read_list); + +struct sensor_async_read { + struct list_node link; + __be64 *val; + __be32 *opal_data; + int token; +}; + +static int add_to_async_read_list(int token, __be32 *opal_data, __be64 *val) +{ + struct sensor_async_read *req; + + req = zalloc(sizeof(*req)); + if (!req) + return OPAL_NO_MEM; + + req->token = token; + req->val = val; + req->opal_data = opal_data; + + lock(&async_read_list_lock); + list_add_tail(&async_read_list, &req->link); + unlock(&async_read_list_lock); + + return OPAL_ASYNC_COMPLETION; +} + +void check_sensor_read(int token) +{ + struct sensor_async_read *req = NULL; + + lock(&async_read_list_lock); + if (list_empty(&async_read_list)) + goto out; + + list_for_each(&async_read_list, req, link) { + if (req->token == token) + break; + } + if (!req) + goto out; + + *req->opal_data = cpu_to_be32(be64_to_cpu(*req->val)); + free(req->val); + list_del(&req->link); + free(req); +out: + unlock(&async_read_list_lock); +} + +static s64 opal_sensor_read_64(u32 sensor_hndl, int token, __be64 *data) +{ + s64 rc; + + switch (sensor_get_family(sensor_hndl)) { + case SENSOR_DTS: + rc = dts_sensor_read(sensor_hndl, token, data); + return rc; + + case SENSOR_OCC: + rc = occ_sensor_read(sensor_hndl, data); + return rc; + + default: + break; + } + + if (platform.sensor_read) { + rc = platform.sensor_read(sensor_hndl, token, data); + return rc; + } + + return OPAL_UNSUPPORTED; +} + +static int64_t opal_sensor_read(uint32_t sensor_hndl, int token, + __be32 *data) +{ + __be64 *val; + s64 rc; + + val = zalloc(sizeof(*val)); + if (!val) + return OPAL_NO_MEM; + + rc = opal_sensor_read_64(sensor_hndl, token, val); + if (rc == OPAL_SUCCESS) { + *data = cpu_to_be32(be64_to_cpu(*val)); + free(val); + } else if (rc == OPAL_ASYNC_COMPLETION) { + rc = add_to_async_read_list(token, data, val); + } + + return rc; +} + +static int opal_sensor_group_clear(u32 group_hndl, int token) +{ + switch (sensor_get_family(group_hndl)) { + case SENSOR_OCC: + return occ_sensor_group_clear(group_hndl, token); + default: + break; + } + + return OPAL_UNSUPPORTED; +} + +static int opal_sensor_group_enable(u32 group_hndl, int token, bool enable) +{ + switch (sensor_get_family(group_hndl)) { + case SENSOR_OCC: + return occ_sensor_group_enable(group_hndl, token, enable); + default: + break; + } + + return OPAL_UNSUPPORTED; +} +void sensor_init(void) +{ + sensor_node = dt_new(opal_node, "sensors"); + + dt_add_property_string(sensor_node, "compatible", "ibm,opal-sensor"); + dt_add_property_cells(sensor_node, "#address-cells", 1); + dt_add_property_cells(sensor_node, "#size-cells", 0); + + /* Register OPAL interface */ + opal_register(OPAL_SENSOR_READ, opal_sensor_read, 3); + opal_register(OPAL_SENSOR_GROUP_CLEAR, opal_sensor_group_clear, 2); + opal_register(OPAL_SENSOR_READ_U64, opal_sensor_read_64, 3); + opal_register(OPAL_SENSOR_GROUP_ENABLE, opal_sensor_group_enable, 3); +} diff --git a/roms/skiboot/core/stack.c b/roms/skiboot/core/stack.c new file mode 100644 index 000000000..3edf98411 --- /dev/null +++ b/roms/skiboot/core/stack.c @@ -0,0 +1,266 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Create/Print backtraces, check stack usage etc. + * + * Copyright 2013-2019 IBM Corp. + */ + +#include <skiboot.h> +#include <processor.h> +#include <cpu.h> +#include <stack.h> +#include <mem_region.h> +#include <unistd.h> +#include <lock.h> + +#define STACK_BUF_ENTRIES 60 +static struct bt_entry bt_buf[STACK_BUF_ENTRIES]; + +/* Dumps backtrace to buffer */ +static void __nomcount __backtrace_create(struct bt_entry *entries, + unsigned int max_ents, + struct bt_metadata *metadata, + struct stack_frame *eframe) +{ + unsigned long *fp = (unsigned long *)eframe; + unsigned long top_adj = top_of_ram; + + /* Assume one stack for early backtraces */ + if (top_of_ram == SKIBOOT_BASE + SKIBOOT_SIZE) + top_adj = top_of_ram + STACK_SIZE; + + metadata->ents = 0; + while (max_ents) { + fp = (unsigned long *)fp[0]; + if (!fp || (unsigned long)fp > top_adj) + break; + eframe = (struct stack_frame *)fp; + if (eframe->magic == STACK_INT_MAGIC) { + entries->exception_type = eframe->type; + entries->exception_pc = eframe->pc; + } else { + entries->exception_type = 0; + } + entries->sp = (unsigned long)fp; + entries->pc = fp[2]; + entries++; + metadata->ents++; + max_ents--; + } + + metadata->r1_caller = eframe->gpr[1]; + + if (fp) + metadata->token = eframe->gpr[0]; + else + metadata->token = -1UL; + + metadata->pir = mfspr(SPR_PIR); +} + +void __nomcount backtrace_create(struct bt_entry *entries, + unsigned int max_ents, + struct bt_metadata *metadata) +{ + unsigned long *fp = __builtin_frame_address(0); + struct stack_frame *eframe = (struct stack_frame *)fp; + + __backtrace_create(entries, max_ents, metadata, eframe); +} + +void backtrace_print(struct bt_entry *entries, struct bt_metadata *metadata, + char *out_buf, unsigned int *len, bool symbols) +{ + static char bt_text_buf[4096]; + int i, l = 0, max; + char *buf = out_buf; + unsigned long bottom, top, normal_top, tbot, ttop; + char mark; + + if (!out_buf) { + buf = bt_text_buf; + max = sizeof(bt_text_buf) - 16; + } else + max = *len - 1; + + bottom = cpu_stack_bottom(metadata->pir); + normal_top = cpu_stack_top(metadata->pir); + top = cpu_emergency_stack_top(metadata->pir); + tbot = SKIBOOT_BASE; + ttop = (unsigned long)&_etext; + + l += snprintf(buf, max, "CPU %04lx Backtrace:\n", metadata->pir); + for (i = 0; i < metadata->ents && l < max; i++) { + if (entries->sp < bottom || entries->sp > top) + mark = '!'; + else if (entries->sp > normal_top) + mark = 'E'; + else if (entries->pc < tbot || entries->pc > ttop) + mark = '*'; + else + mark = ' '; + l += snprintf(buf + l, max - l, + " S: %016lx R: %016lx %c ", + entries->sp, entries->pc, mark); + if (symbols) + l += snprintf_symbol(buf + l, max - l, entries->pc); + l += snprintf(buf + l, max - l, "\n"); + if (entries->exception_type) { + l += snprintf(buf + l, max - l, + " --- Interrupt 0x%lx at %016lx ---\n", + entries->exception_type, entries->exception_pc); + } + entries++; + } + if (metadata->token <= OPAL_LAST) + l += snprintf(buf + l, max - l, + " --- OPAL call token: 0x%lx caller R1: 0x%016lx ---\n", + metadata->token, metadata->r1_caller); + else if (metadata->token == -1UL) + l += snprintf(buf + l, max - l, " --- OPAL boot ---\n"); + if (!out_buf) + write(stdout->fd, bt_text_buf, l); + buf[l++] = 0; + if (len) + *len = l; +} + +/* + * To ensure that we always get backtrace output we bypass the usual console + * locking paths. The downside is that when multiple threads need to print + * a backtrace they garble each other. To prevent this we use a seperate + * lock to serialise printing of the dumps. + */ +static struct lock bt_lock = LOCK_UNLOCKED; + +void backtrace(void) +{ + struct bt_metadata metadata; + + lock(&bt_lock); + + backtrace_create(bt_buf, STACK_BUF_ENTRIES, &metadata); + backtrace_print(bt_buf, &metadata, NULL, NULL, true); + + unlock(&bt_lock); +} + +void backtrace_r1(uint64_t r1) +{ + struct bt_metadata metadata; + + lock(&bt_lock); + + __backtrace_create(bt_buf, STACK_BUF_ENTRIES, &metadata, (struct stack_frame *)r1); + backtrace_print(bt_buf, &metadata, NULL, NULL, true); + + unlock(&bt_lock); +} + +void __nomcount __stack_chk_fail(void); +void __nomcount __stack_chk_fail(void) +{ + static bool failed_once; + + if (failed_once) + return; + failed_once = true; + prlog(PR_EMERG, "Stack corruption detected !\n"); + abort(); +} + +#ifdef STACK_CHECK_ENABLED + +static int64_t lowest_stack_mark = LONG_MAX; +static struct lock stack_check_lock = LOCK_UNLOCKED; + +void __nomcount __mcount_stack_check(uint64_t sp, uint64_t lr); +void __nomcount __mcount_stack_check(uint64_t sp, uint64_t lr) +{ + struct cpu_thread *c = this_cpu(); + uint64_t base = (uint64_t)c; + uint64_t bot = base + sizeof(struct cpu_thread); + int64_t mark = sp - bot; + uint64_t top = base + NORMAL_STACK_SIZE; + + /* + * Don't check the emergency stack just yet. + */ + if (c->in_opal_call > 1) + return; + + /* + * Don't re-enter on this CPU or don't enter at all if somebody + * has spotted an overflow + */ + if (c->in_mcount) + return; + c->in_mcount = true; + + /* Capture lowest stack for this thread */ + if (mark < c->stack_bot_mark) { + lock(&stack_check_lock); + c->stack_bot_mark = mark; + c->stack_bot_pc = lr; + c->stack_bot_tok = c->current_token; + backtrace_create(c->stack_bot_bt, CPU_BACKTRACE_SIZE, + &c->stack_bot_bt_metadata); + unlock(&stack_check_lock); + + if (mark < STACK_WARNING_GAP) { + prlog(PR_EMERG, "CPU %04x Stack usage danger !" + " pc=%08llx sp=%08llx (gap=%lld) token=%lld\n", + c->pir, lr, sp, mark, c->current_token); + } + } + + /* Stack is within bounds? */ + if (sp >= (bot + STACK_SAFETY_GAP) && sp < top) { + c->in_mcount = false; + return; + } + + prlog(PR_EMERG, "CPU %04x Stack overflow detected !" + " pc=%08llx sp=%08llx (gap=%lld) token=%lld\n", + c->pir, lr, sp, mark, c->current_token); + abort(); +} + +void check_stacks(void) +{ + struct cpu_thread *c, *lowest = NULL; + + /* We should never call that from mcount */ + assert(!this_cpu()->in_mcount); + + /* Mark ourselves "in_mcount" to avoid deadlock on stack + * check lock + */ + this_cpu()->in_mcount = true; + + for_each_cpu(c) { + if (!c->stack_bot_mark || + c->stack_bot_mark >= lowest_stack_mark) + continue; + lock(&stack_check_lock); + if (c->stack_bot_mark < lowest_stack_mark) { + lowest = c; + lowest_stack_mark = c->stack_bot_mark; + } + unlock(&stack_check_lock); + } + if (lowest) { + lock(&bt_lock); + prlog(PR_NOTICE, "CPU %04x lowest stack mark %lld bytes left" + " pc=%08llx token=%lld\n", + lowest->pir, lowest->stack_bot_mark, lowest->stack_bot_pc, + lowest->stack_bot_tok); + backtrace_print(lowest->stack_bot_bt, + &lowest->stack_bot_bt_metadata, + NULL, NULL, true); + unlock(&bt_lock); + } + + this_cpu()->in_mcount = false; +} +#endif /* STACK_CHECK_ENABLED */ diff --git a/roms/skiboot/core/test/Makefile.check b/roms/skiboot/core/test/Makefile.check new file mode 100644 index 000000000..7c347bea2 --- /dev/null +++ b/roms/skiboot/core/test/Makefile.check @@ -0,0 +1,101 @@ +# -*-Makefile-*- +CORE_TEST := \ + core/test/run-bitmap \ + core/test/run-cpufeatures \ + core/test/run-device \ + core/test/run-flash-subpartition \ + core/test/run-flash-firmware-versions \ + core/test/run-mem_region \ + core/test/run-malloc \ + core/test/run-malloc-speed \ + core/test/run-mem_region_init \ + core/test/run-mem_region_next \ + core/test/run-mem_region_release_unused \ + core/test/run-mem_region_release_unused_noalloc \ + core/test/run-mem_region_reservations \ + core/test/run-mem_range_is_reserved \ + core/test/run-nvram-format \ + core/test/run-trace core/test/run-msg \ + core/test/run-pel \ + core/test/run-pool \ + core/test/run-time-utils \ + core/test/run-timebase \ + core/test/run-timer \ + core/test/run-buddy \ + core/test/run-pci-quirk + +HOSTCFLAGS+=-I . -I include -Wno-error=attributes + +CORE_TEST_NOSTUB := core/test/run-console-log +CORE_TEST_NOSTUB += core/test/run-console-log-buf-overrun +CORE_TEST_NOSTUB += core/test/run-console-log-pr_fmt +CORE_TEST_NOSTUB += core/test/run-api-test + +LCOV_EXCLUDE += $(CORE_TEST:%=%.c) core/test/stubs.c +LCOV_EXCLUDE += $(CORE_TEST_NOSTUB:%=%.c) /usr/include/* + +.PHONY : core-check +core-check: $(CORE_TEST:%=%-check) $(CORE_TEST_NOSTUB:%=%-check) + +.PHONY : core-coverage +core-coverage: $(CORE_TEST:%=%-gcov-run) +core-coverage: $(CORE_TEST_NOSTUB:%=%-gcov-run) + +check: core-check +coverage: core-coverage + +$(CORE_TEST:%=%-gcov-run) : %-run: % + $(call QTEST, TEST-COVERAGE ,$< , $<) + +$(CORE_TEST_NOSTUB:%=%-gcov-run) : %-run: % + $(call QTEST, TEST-COVERAGE ,$< , $<) + +$(CORE_TEST:%=%-check) : %-check: % + $(call QTEST, RUN-TEST ,$(VALGRIND) $<, $<) + +$(CORE_TEST_NOSTUB:%=%-check) : %-check: % + $(call QTEST, RUN-TEST ,$(VALGRIND) $<, $<) + +core/test/stubs.o: core/test/stubs.c + $(call Q, HOSTCC ,$(HOSTCC) $(HOSTCFLAGS) -g -c -o $@ $<, $<) + +$(CORE_TEST) : core/test/stubs.o + +$(CORE_TEST) : % : %.c + $(call Q, HOSTCC ,$(HOSTCC) $(HOSTCFLAGS) -O0 -g -I include -I . -I libfdt -o $@ $< core/test/stubs.o, $<) + +$(CORE_TEST_NOSTUB) : % : %.c + $(call Q, HOSTCC ,$(HOSTCC) $(HOSTCFLAGS) -O0 -g -I include -I . -I libfdt -o $@ $< , $<) + +$(CORE_TEST:%=%-gcov): %-gcov : %.c % + $(call Q, HOSTCC ,$(HOSTCC) $(HOSTCFLAGS) $(HOSTGCOVCFLAGS) -I include -I . -I libfdt -lgcov -o $@ $< core/test/stubs.o, $<) + +$(CORE_TEST_NOSTUB:%=%-gcov) : %-gcov : %.c % + $(call Q, HOSTCC ,$(HOSTCC) $(HOSTCFLAGS) $(HOSTGCOVCFLAGS) -I include -I . -I libfdt -lgcov -o $@ $< , $<) + +core/test/run-flash-firmware-versions-gcov-run: core/test/run-flash-firmware-versions-inputs-gcov-run + +core/test/run-flash-firmware-versions-inputs-gcov-run: core/test/run-flash-firmware-versions-gcov + $(call Q, TEST-COVERAGE , ./core/test/run-flash-firmware-versions-gcov core/test/firmware-versions-input/version-0 > /dev/null, $< version-0) + $(call Q, TEST-COVERAGE , ./core/test/run-flash-firmware-versions-gcov core/test/firmware-versions-input/version-1 > /dev/null, $< version-1) + $(call Q, TEST-COVERAGE , ./core/test/run-flash-firmware-versions-gcov core/test/firmware-versions-input/version-2 > /dev/null, $< version-2) + $(call Q, TEST-COVERAGE , ./core/test/run-flash-firmware-versions-gcov core/test/firmware-versions-input/version-10 > /dev/null, $< version-10) + $(call Q, TEST-COVERAGE , ./core/test/run-flash-firmware-versions-gcov core/test/firmware-versions-input/version-11 > /dev/null, $< version-11) + $(call Q, TEST-COVERAGE , ./core/test/run-flash-firmware-versions-gcov core/test/firmware-versions-input/version-16 > /dev/null, $< version-16) + $(call Q, TEST-COVERAGE , ./core/test/run-flash-firmware-versions-gcov core/test/firmware-versions-input/version-26 > /dev/null, $< version-26) + $(call Q, TEST-COVERAGE , ./core/test/run-flash-firmware-versions-gcov core/test/firmware-versions-input/version-27 > /dev/null, $< version-27) + $(call Q, TEST-COVERAGE , ./core/test/run-flash-firmware-versions-gcov core/test/firmware-versions-input/version-29 > /dev/null, $< version-29) + $(call Q, TEST-COVERAGE , ./core/test/run-flash-firmware-versions-gcov core/test/firmware-versions-input/version-trunc > /dev/null, $< version-trunc) + $(call Q, TEST-COVERAGE , ./core/test/run-flash-firmware-versions-gcov core/test/firmware-versions-input/version-long > /dev/null, $< version-long) + $(call Q, TEST-COVERAGE , ./core/test/run-flash-firmware-versions-gcov core/test/firmware-versions-input/version-nodash > /dev/null, $< version-nodash) + + +-include $(wildcard core/test/*.d) + +clean: core-test-clean + +core-test-clean: + $(RM) -f core/test/*.[od] $(CORE_TEST) $(CORE_TEST:%=%-gcov) + $(RM) -f $(CORE_TEST_NOSTUB) $(CORE_TEST_NOSTUB:%=%-gcov) + $(RM) -f *.gcda *.gcno skiboot.info + $(RM) -rf coverage-report diff --git a/roms/skiboot/core/test/dummy-cpu.h b/roms/skiboot/core/test/dummy-cpu.h new file mode 100644 index 000000000..64fb71bce --- /dev/null +++ b/roms/skiboot/core/test/dummy-cpu.h @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Copyright 2013-2018 IBM Corp. + * + * A dummy cpu.h for tests. + * We don't want to include the real skiboot cpu.h, it's PPC-specific + */ + +#ifndef __CPU_H +#define __CPU_H + +#include <stdint.h> +#include <stdbool.h> + +static unsigned int cpu_max_pir = 1; +struct cpu_thread { + unsigned int chip_id; +}; +struct cpu_job *__cpu_queue_job(struct cpu_thread *cpu, + const char *name, + void (*func)(void *data), void *data, + bool no_return); +static inline struct cpu_job *cpu_queue_job(struct cpu_thread *cpu, + const char *name, + void (*func)(void *data), + void *data) +{ + return __cpu_queue_job(cpu, name, func, data, false); +} +void cpu_wait_job(struct cpu_job *job, bool free_it); +void cpu_process_local_jobs(void); +struct cpu_job *cpu_queue_job_on_node(uint32_t chip_id, + const char *name, + void (*func)(void *data), void *data); +#endif /* __CPU_H */ diff --git a/roms/skiboot/core/test/firmware-versions-input/version-0 b/roms/skiboot/core/test/firmware-versions-input/version-0 Binary files differnew file mode 100644 index 000000000..2ab241af5 --- /dev/null +++ b/roms/skiboot/core/test/firmware-versions-input/version-0 diff --git a/roms/skiboot/core/test/firmware-versions-input/version-1 b/roms/skiboot/core/test/firmware-versions-input/version-1 Binary files differnew file mode 100644 index 000000000..746327a8b --- /dev/null +++ b/roms/skiboot/core/test/firmware-versions-input/version-1 diff --git a/roms/skiboot/core/test/firmware-versions-input/version-10 b/roms/skiboot/core/test/firmware-versions-input/version-10 Binary files differnew file mode 100644 index 000000000..013af6089 --- /dev/null +++ b/roms/skiboot/core/test/firmware-versions-input/version-10 diff --git a/roms/skiboot/core/test/firmware-versions-input/version-11 b/roms/skiboot/core/test/firmware-versions-input/version-11 Binary files differnew file mode 100644 index 000000000..55e835321 --- /dev/null +++ b/roms/skiboot/core/test/firmware-versions-input/version-11 diff --git a/roms/skiboot/core/test/firmware-versions-input/version-16 b/roms/skiboot/core/test/firmware-versions-input/version-16 Binary files differnew file mode 100644 index 000000000..8906af4e9 --- /dev/null +++ b/roms/skiboot/core/test/firmware-versions-input/version-16 diff --git a/roms/skiboot/core/test/firmware-versions-input/version-2 b/roms/skiboot/core/test/firmware-versions-input/version-2 Binary files differnew file mode 100644 index 000000000..f012ffd23 --- /dev/null +++ b/roms/skiboot/core/test/firmware-versions-input/version-2 diff --git a/roms/skiboot/core/test/firmware-versions-input/version-26 b/roms/skiboot/core/test/firmware-versions-input/version-26 Binary files differnew file mode 100644 index 000000000..adfd5bbcf --- /dev/null +++ b/roms/skiboot/core/test/firmware-versions-input/version-26 diff --git a/roms/skiboot/core/test/firmware-versions-input/version-27 b/roms/skiboot/core/test/firmware-versions-input/version-27 Binary files differnew file mode 100644 index 000000000..d7ade9863 --- /dev/null +++ b/roms/skiboot/core/test/firmware-versions-input/version-27 diff --git a/roms/skiboot/core/test/firmware-versions-input/version-29 b/roms/skiboot/core/test/firmware-versions-input/version-29 Binary files differnew file mode 100644 index 000000000..b1476a3a5 --- /dev/null +++ b/roms/skiboot/core/test/firmware-versions-input/version-29 diff --git a/roms/skiboot/core/test/firmware-versions-input/version-long b/roms/skiboot/core/test/firmware-versions-input/version-long new file mode 100644 index 000000000..f814fa6f4 --- /dev/null +++ b/roms/skiboot/core/test/firmware-versions-input/version-long @@ -0,0 +1,2 @@ +open-power-whatever-v2.0-10-g1cec21d-dirty + Well, I wonder what a short essay here will mean for parsing everything. I hope it is all okay, but we want to get greater than 80 chars. diff --git a/roms/skiboot/core/test/firmware-versions-input/version-nodash b/roms/skiboot/core/test/firmware-versions-input/version-nodash new file mode 100644 index 000000000..139aa9350 --- /dev/null +++ b/roms/skiboot/core/test/firmware-versions-input/version-nodash @@ -0,0 +1,2 @@ +no_dashes_in_version + this_is_wrong diff --git a/roms/skiboot/core/test/firmware-versions-input/version-trunc b/roms/skiboot/core/test/firmware-versions-input/version-trunc new file mode 100644 index 000000000..c9c92a01f --- /dev/null +++ b/roms/skiboot/core/test/firmware-versions-input/version-trunc @@ -0,0 +1,2 @@ +open-power-SUPERMICRO-P8DTU-V2.00.GA2-20161028 + op diff --git a/roms/skiboot/core/test/run-api-test.c b/roms/skiboot/core/test/run-api-test.c new file mode 100644 index 000000000..35e8135d4 --- /dev/null +++ b/roms/skiboot/core/test/run-api-test.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Copyright 2014-2016 IBM Corp. + * + * For now it just validates that addresses passed are sane and test the + * wrapper that validates addresses + * + * Copyright 2016 IBM Corp. + */ + +#include <config.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include <stdarg.h> +#include <compiler.h> +#include <opal-internal.h> + +#define __TEST__ +unsigned long top_of_ram; /* Fake it here */ +int main(void) +{ + unsigned long addr = 0xd000000000000000; + + top_of_ram = 16ULL * 1024 * 1024 * 1024; /* 16 GB */ + assert(opal_addr_valid((void *)addr) == false); + + addr = 0xc000000000000000; + assert(opal_addr_valid((void *)addr) == true); + + addr = 0x0; + assert(opal_addr_valid((void *)addr) == true); + + addr = ~0; + assert(opal_addr_valid((void *)addr) == false); + + addr = top_of_ram + 1; + assert(opal_addr_valid((void *)addr) == false); + return 0; +} diff --git a/roms/skiboot/core/test/run-bitmap.c b/roms/skiboot/core/test/run-bitmap.c new file mode 100644 index 000000000..e474915b8 --- /dev/null +++ b/roms/skiboot/core/test/run-bitmap.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Copyright 2017 IBM Corp. + */ + +#include "../bitmap.c" +#include <assert.h> +#include <stdlib.h> +#include <string.h> +#include <stdio.h> + +int main(void) +{ + bitmap_t *map = malloc(sizeof(bitmap_elem_t)); + int i; + memset(map, 0, sizeof(bitmap_elem_t)); + + assert(BITMAP_ELEMS(16) == (BITMAP_ELEMS(8))); + assert(BITMAP_ELEMS(128) == (BITMAP_ELEMS(64)*2)); + + assert(BITMAP_BYTES(64) == 8); + assert(BITMAP_BYTES(128) == 16); + + assert(BITMAP_BIT(1) == 0x1); + assert(BITMAP_BIT(2) == 0x2); + assert(BITMAP_BIT(3) == 0x3); + assert(BITMAP_BIT(8) == 0x8); + + assert(BITMAP_MASK(0) == 0x1); + assert(BITMAP_MASK(1) == 0x2); + assert(BITMAP_MASK(8) == 0x100); + assert(BITMAP_MASK(9) == 0x200); + + assert(BITMAP_ELEM(1) == 0); + assert(BITMAP_ELEM(128) == BITMAP_ELEMS(128)); + + bitmap_set_bit(*map, 0); + assert(*(unsigned long*)map == 0x1); + assert(bitmap_tst_bit(*map, 0) == true); + bitmap_clr_bit(*map, 0); + assert(*(unsigned long*)map == 0x00); + + bitmap_set_bit(*map, 8); + assert(*(unsigned long*)map == 0x100); + assert(bitmap_tst_bit(*map, 0) == false); + assert(bitmap_tst_bit(*map, 1) == false); + assert(bitmap_tst_bit(*map, 2) == false); + assert(bitmap_tst_bit(*map, 3) == false); + assert(bitmap_tst_bit(*map, 4) == false); + assert(bitmap_tst_bit(*map, 5) == false); + assert(bitmap_tst_bit(*map, 6) == false); + assert(bitmap_tst_bit(*map, 7) == false); + assert(bitmap_tst_bit(*map, 8) == true); + assert(bitmap_tst_bit(*map, 9) == false); + assert(bitmap_tst_bit(*map, 10) == false); + assert(bitmap_tst_bit(*map, 11) == false); + assert(bitmap_tst_bit(*map, 12) == false); + assert(bitmap_tst_bit(*map, 13) == false); + assert(bitmap_tst_bit(*map, 14) == false); + assert(bitmap_tst_bit(*map, 15) == false); + assert(bitmap_find_one_bit(*map, 0, 16) == 8); + bitmap_clr_bit(*map, 8); + assert(bitmap_find_one_bit(*map, 0, 16) == -1); + assert(*(unsigned long*)map == 0x00); + assert(bitmap_tst_bit(*map, 8) == false); + + bitmap_for_each_zero(*map, 7, i) { + bitmap_set_bit(*map, i); + } + + for (i = 0; i < 7; i++) + assert(bitmap_tst_bit(*map, i) == true); + + assert(bitmap_tst_bit(*map, 8) == false); + + + free(map); + + return 0; +} diff --git a/roms/skiboot/core/test/run-buddy.c b/roms/skiboot/core/test/run-buddy.c new file mode 100644 index 000000000..8ae26cb6c --- /dev/null +++ b/roms/skiboot/core/test/run-buddy.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Copyright 2016-2017 IBM Corp. + */ + +#include <buddy.h> +#include <assert.h> +#include <stdlib.h> +#include <stdio.h> + +static void *zalloc(size_t size) +{ + return calloc(size, 1); +} + +#include "../buddy.c" +#include "../bitmap.c" + +#define BUDDY_ORDER 8 + +int main(void) +{ + struct buddy *b; + int i, a[10]; + + b = buddy_create(BUDDY_ORDER); + assert(b); + + buddy_reserve(b, 127, 0); + buddy_reserve(b, 0, 4); + assert(buddy_reserve(b, 0, 4) == false); + + a[0] = buddy_alloc(b, 0); + assert(a[0] >= 0); + a[1] = buddy_alloc(b, 0); + assert(a[1] >= 0); + a[2] = buddy_alloc(b, 3); + assert(a[2] >= 0); + a[3] = buddy_alloc(b, 4); + assert(a[3] >= 0); + a[4] = buddy_alloc(b, 5); + assert(a[4] >= 0); + a[5] = buddy_alloc(b, 4); + assert(a[5] >= 0); + a[6] = buddy_alloc(b, 3); + assert(a[6] >= 0); + a[7] = buddy_alloc(b, 2); + assert(a[7] >= 0); + a[8] = buddy_alloc(b, 1); + assert(a[8] >= 0); + a[9] = buddy_alloc(b, 8); + assert(a[9] < 0); + + buddy_free(b, a[0], 0); + buddy_free(b, a[8], 1); + buddy_free(b, a[1], 0); + buddy_free(b, a[7], 2); + buddy_free(b, a[2], 3); + buddy_free(b, a[6], 3); + buddy_free(b, a[3], 4); + buddy_free(b, a[5], 4); + buddy_free(b, a[4], 5); + + buddy_free(b, 127, 0); + buddy_free(b, 0, 4); + + for (i = 2; i < buddy_map_size(b); i++) + assert(bitmap_tst_bit(b->map, i)); + assert(!bitmap_tst_bit(b->map, 1)); + + buddy_destroy(b); + return 0; +} diff --git a/roms/skiboot/core/test/run-console-log-buf-overrun.c b/roms/skiboot/core/test/run-console-log-buf-overrun.c new file mode 100644 index 000000000..83774c4c9 --- /dev/null +++ b/roms/skiboot/core/test/run-console-log-buf-overrun.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Copyright 2015-2016 IBM Corp. + */ + +#include <config.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include <stdarg.h> +#include <compiler.h> + +unsigned long tb_hz = 512000000; + +#define __TEST__ + +#define CHECK_BUF_ASSERT(buf, str) \ + assert(memcmp(buf, str, strlen(str)) == 0) + +#define CHECK_ASSERT(str) \ + CHECK_BUF_ASSERT(console_buffer, str) + +int huge_tb; + +static inline unsigned long mftb(void) +{ + /* + * return huge value for TB that overrun tmp[16] buffer defined + * in print_itoa(). + */ + if (huge_tb) + return 1223372515963611388; + else + return 42; +} + +#include "../../libc/include/stdio.h" +#include "../console-log.c" +#include "../../libc/stdio/snprintf.c" +#include "../../libc/stdio/vsnprintf.c" + +char console_buffer[4096]; +struct debug_descriptor debug_descriptor; + +bool flushed_to_drivers; + +ssize_t console_write(bool flush_to_drivers, const void *buf, size_t count) +{ + flushed_to_drivers = flush_to_drivers; + memcpy(console_buffer, buf, count); + return count; +} + +int main(void) +{ + unsigned long value = 0xffffffffffffffff; + char *ptr = console_buffer; + + debug_descriptor.console_log_levels = 0x75; + + /* Test for huge TB value. */ + huge_tb = 1; + + prlog(PR_EMERG, "Hello World"); + CHECK_ASSERT("[2389399445.123611388,0] Hello World"); + + memset(console_buffer, 0, sizeof(console_buffer)); + + /* Test for normal TB with huge unsigned long value */ + huge_tb = 0; + + prlog(PR_EMERG, "Hello World %lu", value); + CHECK_ASSERT("[ 0.000000042,0] Hello World 18446744073709551615"); + + printf("Hello World %lu", value); + CHECK_ASSERT("[ 0.000000042,5] Hello World 18446744073709551615"); + + /* + * Test string of size > 320 + * + * core/console-log.c:vprlog() uses buffer[320] to print message + * Try printing more than 320 bytes to test stack corruption. + * You would see Segmentation fault on stack corruption. + */ + prlog(PR_EMERG, "%330s", "Hello World"); + + memset(console_buffer, 0, sizeof(console_buffer)); + + /* + * Test boundary condition. + * + * Print string of exact size 320. We should see string truncated + * with console_buffer[319] == '\0'. + */ + memset(console_buffer, 0, sizeof(console_buffer)); + + prlog(PR_EMERG, "%300s", "Hello World"); + assert(console_buffer[319] == 0); + + /* compare truncated string */ + ptr += 320 - strlen("Hello World"); + CHECK_BUF_ASSERT(ptr, "Hello Worl"); + + return 0; +} diff --git a/roms/skiboot/core/test/run-console-log-pr_fmt.c b/roms/skiboot/core/test/run-console-log-pr_fmt.c new file mode 100644 index 000000000..457de03fb --- /dev/null +++ b/roms/skiboot/core/test/run-console-log-pr_fmt.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Copyright 2015-2016 IBM Corp. + */ + +#include <config.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include <stdarg.h> + +#define __TEST__ + +unsigned long tb_hz = 512000000; + +static inline unsigned long mftb(void) +{ + return 42; +} + +#define pr_fmt(f) "PREFIX: " f +#include "../../libc/include/stdio.h" +#include "../console-log.c" +#include "../../libc/stdio/snprintf.c" +#include "../../libc/stdio/vsnprintf.c" + +struct debug_descriptor debug_descriptor; + +bool flushed_to_drivers; +char console_buffer[4096]; + +ssize_t console_write(bool flush_to_drivers, const void *buf, size_t count) +{ + flushed_to_drivers = flush_to_drivers; + memcpy(console_buffer, buf, count); + return count; +} + +int main(void) +{ + debug_descriptor.console_log_levels = 0x75; + + prlog(PR_EMERG, "Hello World"); + assert(strcmp(console_buffer, "[ 0.000000042,0] PREFIX: Hello World") == 0); + assert(flushed_to_drivers==true); + + memset(console_buffer, 0, sizeof(console_buffer)); + + // Below log level + prlog(PR_TRACE, "Hello World"); + assert(console_buffer[0] == 0); + + // Should not be flushed to console + prlog(PR_DEBUG, "Hello World"); + assert(strcmp(console_buffer, "[ 0.000000042,7] PREFIX: Hello World") == 0); + assert(flushed_to_drivers==false); + + printf("Hello World"); + assert(strcmp(console_buffer, "[ 0.000000042,5] PREFIX: Hello World") == 0); + assert(flushed_to_drivers==true); + + return 0; +} diff --git a/roms/skiboot/core/test/run-console-log.c b/roms/skiboot/core/test/run-console-log.c new file mode 100644 index 000000000..bec281b6e --- /dev/null +++ b/roms/skiboot/core/test/run-console-log.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Copyright 2014-2016 IBM Corp. + */ + +#include <config.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include <stdarg.h> + +#define __TEST__ + +#define _printf printf + +unsigned long tb_hz = 512000000; + +static inline unsigned long mftb(void) +{ + return 42; +} + +int _printf(const char* fmt, ...); + +#include "../console-log.c" + +struct debug_descriptor debug_descriptor; + +bool flushed_to_drivers; +char console_buffer[4096]; + +ssize_t console_write(bool flush_to_drivers, const void *buf, size_t count) +{ + flushed_to_drivers = flush_to_drivers; + memcpy(console_buffer, buf, count); + return count; +} + +int main(void) +{ + debug_descriptor.console_log_levels = 0x75; + + prlog(PR_EMERG, "Hello World"); + assert(strcmp(console_buffer, "[ 0.000000042,0] Hello World") == 0); + assert(flushed_to_drivers==true); + + memset(console_buffer, 0, sizeof(console_buffer)); + + // Below log level + prlog(PR_TRACE, "Hello World"); + assert(console_buffer[0] == 0); + + // Should not be flushed to console + prlog(PR_DEBUG, "Hello World"); + assert(strcmp(console_buffer, "[ 0.000000042,7] Hello World") == 0); + assert(flushed_to_drivers==false); + + printf("Hello World"); + assert(strcmp(console_buffer, "[ 0.000000042,5] Hello World") == 0); + assert(flushed_to_drivers==true); + + return 0; +} diff --git a/roms/skiboot/core/test/run-cpufeatures.c b/roms/skiboot/core/test/run-cpufeatures.c new file mode 100644 index 000000000..bb89b2573 --- /dev/null +++ b/roms/skiboot/core/test/run-cpufeatures.c @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Copyright 2019 IBM Corp. + */ + +#include <skiboot.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> + +/* Override this for testing. */ +#define is_rodata(p) fake_is_rodata(p) + +char __rodata_start[16]; +#define __rodata_end (__rodata_start + sizeof(__rodata_start)) + +static inline bool fake_is_rodata(const void *p) +{ + return ((char *)p >= __rodata_start && (char *)p < __rodata_end); +} + +#define zalloc(bytes) calloc((bytes), 1) + +#include "../device.c" +#include <assert.h> +#include "../../test/dt_common.c" + +#define __TEST__ + +static inline unsigned long mfspr(unsigned int spr); + +#include <ccan/str/str.c> + +#include "../cpufeatures.c" + +static unsigned long fake_pvr = PVR_TYPE_P8; + +static inline unsigned long mfspr(unsigned int spr) +{ + assert(spr == SPR_PVR); + return fake_pvr; +} + +int main(void) +{ + struct dt_node *dt_root; + + dt_root = dt_new_root(""); + dt_add_cpufeatures(dt_root); + dump_dt(dt_root, 0, true); + dt_free(dt_root); + + fake_pvr = (PVR_TYPE_P8E << 16) | 0x100; // P8E DD1.0 + dt_root = dt_new_root(""); + dt_add_cpufeatures(dt_root); + dump_dt(dt_root, 0, false); + assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/mmu-radix") == 0); + assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/tm-suspend-hypervisor-assist") == 0); + assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/tm-suspend-xer-so-bug") == 0); + dt_free(dt_root); + + fake_pvr = (PVR_TYPE_P8E << 16) | 0x200; // P8E DD2.0 + dt_root = dt_new_root(""); + dt_add_cpufeatures(dt_root); + dump_dt(dt_root, 0, false); + assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/mmu-radix") == 0); + assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/tm-suspend-hypervisor-assist") == 0); + assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/tm-suspend-xer-so-bug") == 0); + dt_free(dt_root); + + fake_pvr = (PVR_TYPE_P8 << 16) | 0x100; // P8 DD1.0 + dt_root = dt_new_root(""); + dt_add_cpufeatures(dt_root); + dump_dt(dt_root, 0, false); + assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/mmu-radix") == 0); + assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/tm-suspend-hypervisor-assist") == 0); + assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/tm-suspend-xer-so-bug") == 0); + dt_free(dt_root); + + fake_pvr = (PVR_TYPE_P8 << 16) | 0x200; // P8 DD2.0 + dt_root = dt_new_root(""); + dt_add_cpufeatures(dt_root); + dump_dt(dt_root, 0, false); + assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/mmu-radix") == 0); + assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/tm-suspend-hypervisor-assist") == 0); + assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/tm-suspend-xer-so-bug") == 0); + dt_free(dt_root); + + fake_pvr = (PVR_TYPE_P8NVL << 16) | 0x100; // P8NVL DD1.0 + dt_root = dt_new_root(""); + dt_add_cpufeatures(dt_root); + dump_dt(dt_root, 0, false); + assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/mmu-radix") == 0); + assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/tm-suspend-hypervisor-assist") == 0); + assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/tm-suspend-xer-so-bug") == 0); + dt_free(dt_root); + + fake_pvr = (PVR_TYPE_P9 << 16) | 0x200; // P9 DD2.0 + dt_root = dt_new_root(""); + dt_add_cpufeatures(dt_root); + dump_dt(dt_root, 0, false); + assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/mmu-radix")); + assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/tm-suspend-hypervisor-assist") == 0); + assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/tm-suspend-xer-so-bug") == 0); + dt_free(dt_root); + + fake_pvr = (PVR_TYPE_P9 << 16) | 0x201; // P9 DD2.1 + dt_root = dt_new_root(""); + dt_add_cpufeatures(dt_root); + dump_dt(dt_root, 0, false); + assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/mmu-radix")); + assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/tm-suspend-hypervisor-assist") == 0); + assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/tm-suspend-xer-so-bug") == 0); + dt_free(dt_root); + + fake_pvr = (PVR_TYPE_P9 << 16) | 0x202; // P9 DD2.2 + dt_root = dt_new_root(""); + dt_add_cpufeatures(dt_root); + dump_dt(dt_root, 0, false); + assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/mmu-radix")); + assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/tm-suspend-hypervisor-assist") != 0); + assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/tm-suspend-xer-so-bug") != 0); + dt_free(dt_root); + + fake_pvr = (PVR_TYPE_P9 << 16) | 0x203; // P9 DD2.3 + dt_root = dt_new_root(""); + dt_add_cpufeatures(dt_root); + dump_dt(dt_root, 0, false); + assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/mmu-radix")); + assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/tm-suspend-hypervisor-assist") != 0); + assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/tm-suspend-xer-so-bug") == 0); + dt_free(dt_root); + + fake_pvr = (PVR_TYPE_P9P << 16) | 0x100; // P9P DD1.0 + dt_root = dt_new_root(""); + dt_add_cpufeatures(dt_root); + dump_dt(dt_root, 0, false); + assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/mmu-radix")); + assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/tm-suspend-hypervisor-assist") != 0); + assert(dt_find_by_path(dt_root, "cpus/ibm,powerpc-cpu-features/tm-suspend-xer-so-bug") == 0); + dt_free(dt_root); + + exit(EXIT_SUCCESS); +} diff --git a/roms/skiboot/core/test/run-device.c b/roms/skiboot/core/test/run-device.c new file mode 100644 index 000000000..4a12382bb --- /dev/null +++ b/roms/skiboot/core/test/run-device.c @@ -0,0 +1,471 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Copyright 2012-2018 IBM Corp. + */ + +#include <skiboot.h> +#include <stdlib.h> + +/* Override this for testing. */ +#define is_rodata(p) fake_is_rodata(p) + +char __rodata_start[16]; +#define __rodata_end (__rodata_start + sizeof(__rodata_start)) + +static inline bool fake_is_rodata(const void *p) +{ + return ((char *)p >= __rodata_start && (char *)p < __rodata_end); +} + +#define zalloc(bytes) calloc((bytes), 1) + +#include "../device.c" +#include <assert.h> +#include "../../test/dt_common.c" +const char *prop_to_fix[] = {"something", NULL}; +const char **props_to_fix(struct dt_node *node); + +static void check_path(const struct dt_node *node, const char * expected_path) +{ + char * path; + path = dt_get_path(node); + if (strcmp(path, expected_path) != 0) { + printf("check_path: expected %s, got %s\n", expected_path, path); + } + assert(strcmp(path, expected_path) == 0); + free(path); +} + +/* constructs a random nodes only device tree */ +static void build_tree(int max_depth, int min_depth, struct dt_node *parent) +{ + char name[64]; + int i; + + for (i = 0; i < max_depth; i++) { + struct dt_node *new; + + snprintf(name, sizeof name, "prefix@%.8x", rand()); + + new = dt_new(parent, name); + + if(max_depth > min_depth) + build_tree(max_depth - 1, min_depth, new); + } +} + +static bool is_sorted(const struct dt_node *root) +{ + struct dt_node *end = list_tail(&root->children, struct dt_node, list); + struct dt_node *node; + + dt_for_each_child(root, node) { + struct dt_node *next = + list_entry(node->list.next, struct dt_node, list); + + /* current node must be "less than" the next node */ + if (node != end && dt_cmp_subnodes(node, next) != -1) { + printf("nodes '%s' and '%s' out of order\n", + node->name, next->name); + + return false; + } + + if (!is_sorted(node)) + return false; + } + + return true; +} + +/*handler for phandle fixup test */ +const char **props_to_fix(struct dt_node *node) +{ + const struct dt_property *prop; + + prop = dt_find_property(node, "something"); + if (prop) + return prop_to_fix; + + return NULL; +} + +int main(void) +{ + struct dt_node *root, *other_root, *c1, *c2, *c2_c, *gc1, *gc2, *gc3, *ggc1, *ggc2; + struct dt_node *addrs, *addr1, *addr2; + struct dt_node *i, *subtree, *ev1, *ut1, *ut2; + const struct dt_property *p; + struct dt_property *p2; + unsigned int n; + char *s; + size_t sz; + u32 phandle, ev1_ph, new_prop_ph; + + root = dt_new_root(""); + assert(!list_top(&root->properties, struct dt_property, list)); + check_path(root, "/"); + + c1 = dt_new_check(root, "c1"); + assert(!list_top(&c1->properties, struct dt_property, list)); + check_path(c1, "/c1"); + assert(dt_find_by_name(root, "c1") == c1); + assert(dt_find_by_path(root, "/c1") == c1); + assert(dt_new(root, "c1") == NULL); + + c2 = dt_new(root, "c2"); + c2_c = dt_new_check(root, "c2"); + assert(c2 == c2_c); + assert(!list_top(&c2->properties, struct dt_property, list)); + check_path(c2, "/c2"); + assert(dt_find_by_name(root, "c2") == c2); + assert(dt_find_by_path(root, "/c2") == c2); + + gc1 = dt_new(c1, "gc1"); + assert(!list_top(&gc1->properties, struct dt_property, list)); + check_path(gc1, "/c1/gc1"); + assert(dt_find_by_name(root, "gc1") == gc1); + assert(dt_find_by_path(root, "/c1/gc1") == gc1); + + gc2 = dt_new(c1, "gc2"); + assert(!list_top(&gc2->properties, struct dt_property, list)); + check_path(gc2, "/c1/gc2"); + assert(dt_find_by_name(root, "gc2") == gc2); + assert(dt_find_by_path(root, "/c1/gc2") == gc2); + + gc3 = dt_new(c1, "gc3"); + assert(!list_top(&gc3->properties, struct dt_property, list)); + check_path(gc3, "/c1/gc3"); + assert(dt_find_by_name(root, "gc3") == gc3); + assert(dt_find_by_path(root, "/c1/gc3") == gc3); + + ggc1 = dt_new(gc1, "ggc1"); + assert(!list_top(&ggc1->properties, struct dt_property, list)); + check_path(ggc1, "/c1/gc1/ggc1"); + assert(dt_find_by_name(root, "ggc1") == ggc1); + assert(dt_find_by_path(root, "/c1/gc1/ggc1") == ggc1); + + addrs = dt_new(root, "addrs"); + assert(!list_top(&addrs->properties, struct dt_property, list)); + check_path(addrs, "/addrs"); + assert(dt_find_by_name(root, "addrs") == addrs); + assert(dt_find_by_path(root, "/addrs") == addrs); + + addr1 = dt_new_addr(addrs, "addr", 0x1337); + assert(!list_top(&addr1->properties, struct dt_property, list)); + check_path(addr1, "/addrs/addr@1337"); + assert(dt_find_by_name(root, "addr@1337") == addr1); + assert(dt_find_by_name_addr(root, "addr", 0x1337) == addr1); + assert(dt_find_by_path(root, "/addrs/addr@1337") == addr1); + assert(dt_new_addr(addrs, "addr", 0x1337) == NULL); + + addr2 = dt_new_2addr(addrs, "2addr", 0xdead, 0xbeef); + assert(!list_top(&addr2->properties, struct dt_property, list)); + check_path(addr2, "/addrs/2addr@dead,beef"); + assert(dt_find_by_name(root, "2addr@dead,beef") == addr2); + assert(dt_find_by_path(root, "/addrs/2addr@dead,beef") == addr2); + assert(dt_new_2addr(addrs, "2addr", 0xdead, 0xbeef) == NULL); + + /* Test walking the tree, checking and setting values */ + for (n = 0, i = dt_first(root); i; i = dt_next(root, i), n++) { + assert(!list_top(&i->properties, struct dt_property, list)); + dt_add_property_cells(i, "visited", 1); + } + assert(n == 9); + + for (n = 0, i = dt_first(root); i; i = dt_next(root, i), n++) { + p = list_top(&i->properties, struct dt_property, list); + assert(strcmp(p->name, "visited") == 0); + assert(p->len == sizeof(u32)); + assert(fdt32_to_cpu(*(u32 *)p->prop) == 1); + } + assert(n == 9); + + /* Test cells */ + dt_add_property_cells(c1, "some-property", 1, 2, 3); + p = dt_find_property(c1, "some-property"); + assert(p); + assert(strcmp(p->name, "some-property") == 0); + assert(p->len == sizeof(u32) * 3); + assert(fdt32_to_cpu(*(u32 *)p->prop) == 1); + assert(dt_prop_get_cell(c1, "some-property", 0) == 1); + assert(fdt32_to_cpu(*((u32 *)p->prop + 1)) == 2); + assert(dt_prop_get_cell(c1, "some-property", 1) == 2); + assert(fdt32_to_cpu(*((u32 *)p->prop + 2)) == 3); + assert(dt_prop_get_cell_def(c1, "some-property", 2, 42) == 3); + + assert(dt_prop_get_cell_def(c1, "not-a-property", 2, 42) == 42); + + /* Test u64s */ + dt_add_property_u64s(c2, "some-property", (2LL << 33), (3LL << 33), (4LL << 33)); + p = dt_find_property(c2, "some-property"); + assert(p); + assert(p->len == sizeof(u64) * 3); + assert(fdt64_to_cpu(*(u64 *)p->prop) == (2LL << 33)); + assert(fdt64_to_cpu(*((u64 *)p->prop + 1)) == (3LL << 33)); + assert(fdt64_to_cpu(*((u64 *)p->prop + 2)) == (4LL << 33)); + + /* Test u32/u64 get defaults */ + assert(dt_prop_get_u32_def(c1, "u32", 42) == 42); + dt_add_property_cells(c1, "u32", 1337); + assert(dt_prop_get_u32_def(c1, "u32", 42) == 1337); + assert(dt_prop_get_u32(c1, "u32") == 1337); + + assert(dt_prop_get_u64_def(c1, "u64", (42LL << 42)) == (42LL << 42)); + dt_add_property_u64s(c1, "u64", (1337LL << 42)); + assert(dt_prop_get_u64_def(c1, "u64", (42LL << 42)) == (1337LL << 42)); + assert(dt_prop_get_u64(c1, "u64") == (1337LL << 42)); + + /* Test freeing a single node */ + assert(!list_empty(&gc1->children)); + dt_free(ggc1); + assert(list_empty(&gc1->children)); + + /* Test rodata logic. */ + assert(!is_rodata("hello")); + assert(is_rodata(__rodata_start)); + strcpy(__rodata_start, "name"); + ggc1 = dt_new(root, __rodata_start); + assert(ggc1->name == __rodata_start); + + /* Test string node. */ + dt_add_property_string(ggc1, "somestring", "someval"); + assert(dt_has_node_property(ggc1, "somestring", "someval")); + assert(!dt_has_node_property(ggc1, "somestrin", "someval")); + assert(!dt_has_node_property(ggc1, "somestring", "someva")); + assert(!dt_has_node_property(ggc1, "somestring", "somevale")); + + /* Test nstr, which allows for non-null-terminated inputs */ + dt_add_property_nstr(ggc1, "nstring", "somevalue_long", 7); + assert(dt_has_node_property(ggc1, "nstring", "someval")); + assert(!dt_has_node_property(ggc1, "nstring", "someva")); + assert(!dt_has_node_property(ggc1, "nstring", "somevalue_long")); + + /* Test multiple strings */ + dt_add_property_strings(ggc1, "somestrings", + "These", "are", "strings!"); + p = dt_find_property(ggc1, "somestrings"); + assert(p); + assert(p->len == sizeof(char) * (6 + 4 + 9)); + s = (char *)p->prop; + assert(strcmp(s, "These") == 0); + assert(strlen(s) == 5); + s += 6; + assert(strcmp(s, "are") == 0); + assert(strlen(s) == 3); + s += 4; + assert(strcmp(s, "strings!") == 0); + assert(strlen(s) == 8); + s += 9; + assert(s == (char *)p->prop + p->len); + assert(dt_prop_find_string(p, "These")); + /* dt_prop_find_string is case insensitve */ + assert(dt_prop_find_string(p, "ARE")); + assert(!dt_prop_find_string(p, "integers!")); + /* And always returns false for NULL properties */ + assert(!dt_prop_find_string(NULL, "anything!")); + + /* Test more get/get_def varieties */ + assert(dt_prop_get_def(c1, "does-not-exist", NULL) == NULL); + sz = 0xbad; + assert(dt_prop_get_def_size(c1, "does-not-exist", NULL, &sz) == NULL); + assert(sz == 0); + dt_add_property_string(c1, "another-property", "xyzzy"); + assert(dt_prop_get_def(c1, "another-property", NULL) != NULL); + assert(strcmp(dt_prop_get(c1, "another-property"), "xyzzy") == 0); + n = 0xbad; + assert(dt_prop_get_def_size(c1, "another-property", NULL, &sz) != NULL); + assert(sz == strlen("xyzzy") + 1); + + /* Test resizing property. */ + p = p2 = __dt_find_property(c1, "some-property"); + assert(p); + n = p2->len; + while (p2 == p) { + n *= 2; + dt_resize_property(&p2, n); + } + + assert(dt_find_property(c1, "some-property") == p2); + list_check(&c1->properties, "properties after resizing"); + + dt_del_property(c1, p2); + list_check(&c1->properties, "properties after delete"); + + /* No leaks for valgrind! */ + dt_free(root); + + /* Test compatible and chip id. */ + root = dt_new_root(""); + + c1 = dt_new(root, "chip1"); + dt_add_property_cells(c1, "ibm,chip-id", 0xcafe); + assert(dt_get_chip_id(c1) == 0xcafe); + dt_add_property_strings(c1, "compatible", + "specific-fake-chip", + "generic-fake-chip"); + assert(dt_node_is_compatible(c1, "specific-fake-chip")); + assert(dt_node_is_compatible(c1, "generic-fake-chip")); + + c2 = dt_new(root, "chip2"); + dt_add_property_cells(c2, "ibm,chip-id", 0xbeef); + assert(dt_get_chip_id(c2) == 0xbeef); + dt_add_property_strings(c2, "compatible", + "specific-fake-bus", + "generic-fake-bus"); + + gc1 = dt_new(c1, "coprocessor1"); + dt_add_property_strings(gc1, "compatible", + "specific-fake-coprocessor"); + gc2 = dt_new(gc1, "coprocessor2"); + dt_add_property_strings(gc2, "compatible", + "specific-fake-coprocessor"); + gc3 = dt_new(c1, "coprocessor3"); + dt_add_property_strings(gc3, "compatible", + "specific-fake-coprocessor"); + + + assert(dt_find_compatible_node(root, NULL, "generic-fake-bus") == c2); + assert(dt_find_compatible_node(root, c2, "generic-fake-bus") == NULL); + + /* we can find all compatible nodes */ + assert(dt_find_compatible_node(c1, NULL, "specific-fake-coprocessor") == gc1); + assert(dt_find_compatible_node(c1, gc1, "specific-fake-coprocessor") == gc2); + assert(dt_find_compatible_node(c1, gc2, "specific-fake-coprocessor") == gc3); + assert(dt_find_compatible_node(c1, gc3, "specific-fake-coprocessor") == NULL); + assert(dt_find_compatible_node(root, NULL, "specific-fake-coprocessor") == gc1); + assert(dt_find_compatible_node(root, gc1, "specific-fake-coprocessor") == gc2); + assert(dt_find_compatible_node(root, gc2, "specific-fake-coprocessor") == gc3); + assert(dt_find_compatible_node(root, gc3, "specific-fake-coprocessor") == NULL); + + /* we can find the coprocessor once on the cpu */ + assert(dt_find_compatible_node_on_chip(root, + NULL, + "specific-fake-coprocessor", + 0xcafe) == gc1); + assert(dt_find_compatible_node_on_chip(root, + gc1, + "specific-fake-coprocessor", + 0xcafe) == gc2); + assert(dt_find_compatible_node_on_chip(root, + gc2, + "specific-fake-coprocessor", + 0xcafe) == gc3); + assert(dt_find_compatible_node_on_chip(root, + gc3, + "specific-fake-coprocessor", + 0xcafe) == NULL); + + /* we can't find the coprocessor on the bus */ + assert(dt_find_compatible_node_on_chip(root, + NULL, + "specific-fake-coprocessor", + 0xbeef) == NULL); + + /* Test phandles. We override the automatically generated one. */ + phandle = 0xf00; + dt_add_property(gc3, "phandle", (const void *)&phandle, 4); + assert(last_phandle == 0xf00); + assert(dt_find_by_phandle(root, 0xf00) == gc3); + assert(dt_find_by_phandle(root, 0xf0f) == NULL); + + dt_free(root); + + /* basic sorting */ + root = dt_new_root("rewt"); + dt_new(root, "a@1"); + dt_new(root, "a@2"); + dt_new(root, "a@3"); + dt_new(root, "a@4"); + dt_new(root, "b@4"); + dt_new(root, "c@4"); + + assert(is_sorted(root)); + + /* Now test dt_attach_root */ + other_root = dt_new_root("other_root"); + dt_new(other_root, "d@1"); + + assert(dt_attach_root(root, other_root)); + other_root = dt_new_root("other_root"); + assert(!dt_attach_root(root, other_root)); + dt_free(root); + + /* Test child node sorting */ + root = dt_new_root("test root"); + build_tree(5, 3, root); + + if (!is_sorted(root)) { + dump_dt(root, 1, false); + } + assert(is_sorted(root)); + + dt_free(root); + + /* check dt_translate_address */ + + /* NB: the root bus has two address cells */ + root = dt_new_root(""); + + c1 = dt_new_addr(root, "some-32bit-bus", 0x80000000); + dt_add_property_cells(c1, "#address-cells", 1); + dt_add_property_cells(c1, "#size-cells", 1); + dt_add_property_cells(c1, "ranges", 0x0, 0x8, 0x0, 0x1000); + + gc1 = dt_new_addr(c1, "test", 0x0500); + dt_add_property_cells(gc1, "reg", 0x0500, 0x10); + + assert(dt_translate_address(gc1, 0, NULL) == 0x800000500ul); + + /* try three level translation */ + + gc2 = dt_new_addr(c1, "another-32bit-bus", 0x40000000); + dt_add_property_cells(gc2, "#address-cells", 1); + dt_add_property_cells(gc2, "#size-cells", 1); + dt_add_property_cells(gc2, "ranges", 0x0, 0x600, 0x100, + 0x100, 0x800, 0x100); + + ggc1 = dt_new_addr(gc2, "test", 0x50); + dt_add_property_cells(ggc1, "reg", 0x50, 0x10); + assert(dt_translate_address(ggc1, 0, NULL) == 0x800000650ul); + + /* test multiple ranges work */ + ggc2 = dt_new_addr(gc2, "test", 0x150); + dt_add_property_cells(ggc2, "reg", 0x150, 0x10); + assert(dt_translate_address(ggc2, 0, NULL) == 0x800000850ul); + + /* try 64bit -> 64bit */ + + c2 = dt_new_addr(root, "some-64bit-bus", 0xe00000000); + dt_add_property_cells(c2, "#address-cells", 2); + dt_add_property_cells(c2, "#size-cells", 2); + dt_add_property_cells(c2, "ranges", 0x0, 0x0, 0xe, 0x0, 0x2, 0x0); + + gc2 = dt_new_addr(c2, "test", 0x100000000ul); + dt_add_property_u64s(gc2, "reg", 0x100000000ul, 0x10ul); + assert(dt_translate_address(gc2, 0, NULL) == 0xf00000000ul); + + dt_free(root); + + /* phandle fixup test */ + subtree = dt_new_root("subtree"); + ev1 = dt_new(subtree, "ev@1"); + ev1_ph = ev1->phandle; + dt_new(ev1,"a@1"); + dt_new(ev1,"a@2"); + dt_new(ev1,"a@3"); + ut1 = dt_new(subtree, "ut@1"); + dt_add_property(ut1, "something", (const void *)&ev1->phandle, 4); + ut2 = dt_new(subtree, "ut@2"); + dt_add_property(ut2, "something", (const void *)&ev1->phandle, 4); + + dt_adjust_subtree_phandle(subtree, props_to_fix); + assert(!(ev1->phandle == ev1_ph)); + new_prop_ph = dt_prop_get_u32(ut1, "something"); + assert(!(new_prop_ph == ev1_ph)); + new_prop_ph = dt_prop_get_u32(ut2, "something"); + assert(!(new_prop_ph == ev1_ph)); + dt_free(subtree); + return 0; +} + diff --git a/roms/skiboot/core/test/run-flash-firmware-versions.c b/roms/skiboot/core/test/run-flash-firmware-versions.c new file mode 100644 index 000000000..9f96f5c19 --- /dev/null +++ b/roms/skiboot/core/test/run-flash-firmware-versions.c @@ -0,0 +1,154 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Copyright 2018-2019 IBM Corp. + */ + +#include <stdlib.h> +#include <stdio.h> +#include <stdarg.h> +#include <string.h> +#include <malloc.h> +#include <stdint.h> + + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <fcntl.h> +#include <assert.h> +#include <stdlib.h> +#include <unistd.h> +#include <stdint.h> + + +#include <interrupts.h> +#include <bitutils.h> + +#include <compiler.h> + +/* + * Skiboot malloc stubs + * + * The actual prototypes for these are defined in mem_region-malloc.h, + * but that file also #defines malloc, and friends so we don't pull that in + * directly. + */ + +#define DEFAULT_ALIGN __alignof__(long) + +void *__memalign(size_t blocksize, size_t bytes, const char *location __unused); +void *__memalign(size_t blocksize, size_t bytes, const char *location __unused) +{ + return memalign(blocksize, bytes); +} + +void *__malloc(size_t bytes, const char *location); +void *__malloc(size_t bytes, const char *location) +{ + return __memalign(DEFAULT_ALIGN, bytes, location); +} + +void __free(void *p, const char *location __unused); +void __free(void *p, const char *location __unused) +{ + free(p); +} + +void *__realloc(void *ptr, size_t size, const char *location __unused); +void *__realloc(void *ptr, size_t size, const char *location __unused) +{ + return realloc(ptr, size); +} + +void *__zalloc(size_t bytes, const char *location); +void *__zalloc(size_t bytes, const char *location) +{ + void *p = __malloc(bytes, location); + + if (p) + memset(p, 0, bytes); + return p; +} + +#include <mem_region-malloc.h> + +#include <opal-api.h> + +#include "../../libfdt/fdt.c" +#include "../../libfdt/fdt_ro.c" +#include "../../libfdt/fdt_sw.c" +#include "../../libfdt/fdt_strerror.c" + +#include "../../core/device.c" + +#include "../../libstb/container-utils.h" +#include "../../libstb/container.h" +#include "../../libstb/container.c" + +#include "../flash-firmware-versions.c" +#include <assert.h> + +char __rodata_start[1], __rodata_end[1]; + +const char version[]="Hello world!"; + +enum proc_gen proc_gen = proc_gen_p8; + +static char *loaded_version_buf; +static size_t loaded_version_buf_size; + +#define min(x,y) ((x) < (y) ? x : y) + +int start_preload_resource(enum resource_id id, uint32_t subid, + void *buf, size_t *len) +{ + (void)id; + (void)subid; + (void)buf; + if (loaded_version_buf) { + *len = min(*len, loaded_version_buf_size); + memcpy(buf, loaded_version_buf, *len); + } else { + *len = 0; + } + + return 0; +} + +int wait_for_resource_loaded(enum resource_id id, uint32_t idx) +{ + (void)id; + (void)idx; + return 0; +} + +int main(int argc, char *argv[]) +{ + int fd; + struct stat ver_st; + int r; + + dt_root = dt_new_root(""); + + if (argc > 1) { + fd = open(argv[1], O_RDONLY); + + assert(fd > 0); + r = fstat(fd, &ver_st); + assert(r == 0); + + loaded_version_buf = mmap(NULL, ver_st.st_size, + PROT_READ, MAP_PRIVATE, fd, 0); + assert(loaded_version_buf != (char*)-1); + loaded_version_buf_size = ver_st.st_size; + } + + flash_fw_version_preload(); + + proc_gen = proc_gen_p9; + flash_fw_version_preload(); + flash_dt_add_fw_version(); + + return 0; +} + diff --git a/roms/skiboot/core/test/run-flash-subpartition.c b/roms/skiboot/core/test/run-flash-subpartition.c new file mode 100644 index 000000000..5b6df87f2 --- /dev/null +++ b/roms/skiboot/core/test/run-flash-subpartition.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Copyright 2013-2016 IBM Corp. + */ + +#include <skiboot.h> +#include <opal-api.h> +#include <stdlib.h> + +#include "../flash-subpartition.c" +#include <assert.h> + +/* This is a straight dump of the CAPP ucode partition header */ +char capp[4096] = {0x43, 0x41, 0x50, 0x50, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x01, 0x00, 0xea, 0x00, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x8e, 0x50, 0x00, 0x02, 0x00, 0xea, + 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x8e, 0x50, + 0x00, 0x02, 0x00, 0xef, 0x00, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x8e, 0x50, 0x00, 0x02, 0x01, 0xef, + 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x8e, 0x50, + 0x00, 0x01, 0x00, 0xd3, 0x00, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x8e, 0x50, 0x00, 0x00, 0x00, 0x00 }; + +int main(void) +{ + int rc; + uint32_t part_actual; + uint32_t offset; + uint32_t size; + uint32_t subids[] = { 0x100ea, 0x200ea, 0x200ef, 0x201ef, 0x100d3 }; + + for (int i = 0; i < sizeof(subids)/sizeof(uint32_t); i++) { + offset = 0; + rc = flash_subpart_info(capp, sizeof(capp), 0x24000, + &part_actual, subids[i], + &offset, &size); + printf("\nsubid %x\n", subids[i]); + printf("part_actual %u\n", part_actual); + printf("offset %u\n", offset); + printf("size %u\n", size); + assert (rc == 0); + assert (size == 36432); + assert (offset == 4096); + assert (part_actual == 40960); + } + + return 0; +} diff --git a/roms/skiboot/core/test/run-malloc-speed.c b/roms/skiboot/core/test/run-malloc-speed.c new file mode 100644 index 000000000..39a24f9cb --- /dev/null +++ b/roms/skiboot/core/test/run-malloc-speed.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Copyright 2013-2018 IBM Corp. + */ + +#include <config.h> + +#define BITS_PER_LONG (sizeof(long) * 8) +#include "dummy-cpu.h" + +#include <stdlib.h> + +/* Use these before we undefine them below. */ +static inline void *real_malloc(size_t size) +{ + return malloc(size); +} + +static inline void real_free(void *p) +{ + return free(p); +} + +#include <skiboot.h> + +/* We need mem_region to accept __location__ */ +#define is_rodata(p) true +#include "../malloc.c" +#include "../mem_region.c" +#include "../device.c" + +#undef malloc +#undef free +#undef realloc + +#include <assert.h> +#include <stdio.h> + +char __rodata_start[1], __rodata_end[1]; +struct dt_node *dt_root; +enum proc_chip_quirks proc_chip_quirks; + +void lock_caller(struct lock *l, const char *caller) +{ + (void)caller; + assert(!l->lock_val); + l->lock_val = 1; +} + +void unlock(struct lock *l) +{ + assert(l->lock_val); + l->lock_val = 0; +} + +bool lock_held_by_me(struct lock *l) +{ + return l->lock_val; +} + +#define TEST_HEAP_ORDER 27 +#define TEST_HEAP_SIZE (1ULL << TEST_HEAP_ORDER) + +#define NUM_ALLOCS 4096 + +int main(void) +{ + uint64_t i, len; + void **p = real_malloc(sizeof(void*)*NUM_ALLOCS); + + assert(p); + + /* Use malloc for the heap, so valgrind can find issues. */ + skiboot_heap.start = (unsigned long)real_malloc(skiboot_heap.len); + + len = skiboot_heap.len / NUM_ALLOCS - sizeof(struct alloc_hdr); + for (i = 0; i < NUM_ALLOCS; i++) { + p[i] = __malloc(len, __location__); + assert(p[i] > region_start(&skiboot_heap)); + assert(p[i] + len <= region_start(&skiboot_heap) + + skiboot_heap.len); + } + assert(mem_check(&skiboot_heap)); + assert(skiboot_heap.free_list_lock.lock_val == 0); + free(region_start(&skiboot_heap)); + real_free(p); + return 0; +} diff --git a/roms/skiboot/core/test/run-malloc.c b/roms/skiboot/core/test/run-malloc.c new file mode 100644 index 000000000..10cc64e86 --- /dev/null +++ b/roms/skiboot/core/test/run-malloc.c @@ -0,0 +1,174 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Copyright 2013-2018 IBM Corp. + */ + +#include <config.h> + +#define BITS_PER_LONG (sizeof(long) * 8) + +#include "dummy-cpu.h" + +#include <stdlib.h> + +/* Use these before we undefine them below. */ +static inline void *real_malloc(size_t size) +{ + return malloc(size); +} + +static inline void real_free(void *p) +{ + return free(p); +} + +#undef malloc +#undef free +#undef realloc + +#include <skiboot.h> + +#define is_rodata(p) true + +#include "../mem_region.c" +#include "../malloc.c" +#include "../device.c" + +#include "mem_region-malloc.h" + +#define TEST_HEAP_ORDER 16 +#define TEST_HEAP_SIZE (1ULL << TEST_HEAP_ORDER) + +struct dt_node *dt_root; +enum proc_chip_quirks proc_chip_quirks; + +void lock_caller(struct lock *l, const char *caller) +{ + (void)caller; + assert(!l->lock_val); + l->lock_val = 1; +} + +void unlock(struct lock *l) +{ + assert(l->lock_val); + l->lock_val = 0; +} + +bool lock_held_by_me(struct lock *l) +{ + return l->lock_val; +} + +static bool heap_empty(void) +{ + const struct alloc_hdr *h = region_start(&skiboot_heap); + return h->num_longs == skiboot_heap.len / sizeof(long); +} + +int main(void) +{ + char *test_heap = real_malloc(TEST_HEAP_SIZE); + char *p, *p2, *p3, *p4; + char *pr; + size_t i; + + /* Use malloc for the heap, so valgrind can find issues. */ + skiboot_heap.start = (unsigned long)test_heap; + skiboot_heap.len = TEST_HEAP_SIZE; + + /* Allocations of various sizes. */ + for (i = 0; i < TEST_HEAP_ORDER; i++) { + p = malloc(1ULL << i); + assert(p); + assert(p > (char *)test_heap); + assert(p + (1ULL << i) <= (char *)test_heap + TEST_HEAP_SIZE); + assert(!skiboot_heap.free_list_lock.lock_val); + free(p); + assert(!skiboot_heap.free_list_lock.lock_val); + assert(heap_empty()); + } + + /* Realloc as malloc. */ + skiboot_heap.free_list_lock.lock_val = 0; + p = realloc(NULL, 100); + assert(p); + assert(!skiboot_heap.free_list_lock.lock_val); + + /* Realloc as free. */ + p = realloc(p, 0); + assert(!p); + assert(!skiboot_heap.free_list_lock.lock_val); + assert(heap_empty()); + + /* Realloc longer. */ + p = realloc(NULL, 100); + assert(p); + assert(!skiboot_heap.free_list_lock.lock_val); + p2 = realloc(p, 200); + assert(p2 == p); + assert(!skiboot_heap.free_list_lock.lock_val); + free(p2); + assert(!skiboot_heap.free_list_lock.lock_val); + assert(heap_empty()); + + /* Realloc shorter. */ + skiboot_heap.free_list_lock.lock_val = 0; + p = realloc(NULL, 100); + assert(!skiboot_heap.free_list_lock.lock_val); + assert(p); + p2 = realloc(p, 1); + assert(!skiboot_heap.free_list_lock.lock_val); + assert(p2 == p); + free(p2); + assert(!skiboot_heap.free_list_lock.lock_val); + assert(heap_empty()); + + /* zalloc failure */ + p2 = zalloc(TEST_HEAP_SIZE * 2); + assert(p2 == NULL); + + /* Realloc with move. */ + p2 = malloc(TEST_HEAP_SIZE - 64 - sizeof(struct alloc_hdr)*2); + memset(p2, 'a', TEST_HEAP_SIZE - 64 - sizeof(struct alloc_hdr)*2); + assert(p2); + p = malloc(64); + memset(p, 'b', 64); + p[63] = 'c'; + assert(p); + free(p2); + + p2 = realloc(p, 128); + assert(p2 != p); + assert(p2[63] == 'c'); + free(p2); + assert(heap_empty()); + assert(!skiboot_heap.free_list_lock.lock_val); + + /* Realloc with failure to allocate new size */ + p2 = malloc(TEST_HEAP_SIZE - sizeof(struct alloc_hdr)*2); + assert(p2); + memset(p2, 'a', TEST_HEAP_SIZE - sizeof(struct alloc_hdr)*2); + p = p2; + p2 = realloc(p, TEST_HEAP_SIZE*2); + assert(p2==NULL); + memset(p, 'b', TEST_HEAP_SIZE - sizeof(struct alloc_hdr)*2); + free(p); + + /* Reproduce bug BZ109128/SW257364 */ + p = malloc(100); + p2 = malloc(100); + p3 = malloc(100); + p4 = malloc(100); + free(p2); + pr = realloc(p,216); + assert(pr); + free(p3); + free(pr); + free(p4); + assert(heap_empty()); + assert(!skiboot_heap.free_list_lock.lock_val); + + real_free(test_heap); + return 0; +} diff --git a/roms/skiboot/core/test/run-mem_range_is_reserved.c b/roms/skiboot/core/test/run-mem_range_is_reserved.c new file mode 100644 index 000000000..9891dbd9a --- /dev/null +++ b/roms/skiboot/core/test/run-mem_range_is_reserved.c @@ -0,0 +1,207 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Copyright 2015-2019 IBM Corp. + */ + +#include <config.h> + +/* The lock backtrace structures consume too much room on the skiboot heap */ +#undef DEBUG_LOCKS_BACKTRACE + +#define BITS_PER_LONG (sizeof(long) * 8) + +#include "dummy-cpu.h" + +#include <stdlib.h> + +static void *real_malloc(size_t size) +{ + return malloc(size); +} + +static void real_free(void *p) +{ + return free(p); +} + +#undef malloc +#undef free +#undef realloc + +#include <skiboot.h> +#include <mem_region-malloc.h> + +/* We need mem_region to accept __location__ */ +#define is_rodata(p) true +#include "../mem_region.c" +#include "../malloc.c" + +/* But we need device tree to make copies of names. */ +#undef is_rodata +#define is_rodata(p) false +#include "../../libc/string/strdup.c" + +#include "../device.c" +#include <assert.h> +#include <stdio.h> + +enum proc_chip_quirks proc_chip_quirks; + +void lock_caller(struct lock *l, const char *caller) +{ + (void)caller; + assert(!l->lock_val); + l->lock_val++; +} + +void unlock(struct lock *l) +{ + assert(l->lock_val); + l->lock_val--; +} + +bool lock_held_by_me(struct lock *l) +{ + return l->lock_val; +} + +#define TEST_HEAP_ORDER 16 +#define TEST_HEAP_SIZE (1ULL << TEST_HEAP_ORDER) + +static void add_mem_node(uint64_t start, uint64_t len) +{ + struct dt_node *mem; + u64 reg[2]; + char *name; + + name = (char*)malloc(sizeof("memory@") + STR_MAX_CHARS(reg[0])); + assert(name); + + /* reg contains start and length */ + reg[0] = cpu_to_be64(start); + reg[1] = cpu_to_be64(len); + + sprintf(name, "memory@%llx", (long long)start); + + mem = dt_new(dt_root, name); + dt_add_property_string(mem, "device_type", "memory"); + dt_add_property(mem, "reg", reg, sizeof(reg)); + free(name); +} + +void add_chip_dev_associativity(struct dt_node *dev __attribute__((unused))) +{ +} + +struct test_region { + uint64_t start; + uint64_t end; +}; + +static struct test { + struct test_region regions[3]; + bool reserved; +} tests[] = { + /* empty region set */ + { { { 0 } }, false }, + + /* single exact match */ + { { { 0x1000, 0x2000 }, }, true }, + + /* overlap downwards */ + { { { 0x0fff, 0x2000 }, }, true }, + + /* overlap upwards */ + { { { 0x1000, 0x2001 }, }, true }, + + /* missing first byte */ + { { { 0x1001, 0x2000 }, }, false }, + + /* missing last byte */ + { { { 0x1000, 0x1fff }, }, false }, + + /* two regions, full coverage, split before start of range */ + { { { 0x0500, 0x1000 }, { 0x1000, 0x2500 } }, true }, + + /* two regions, full coverage, split after start of range */ + { { { 0x0500, 0x1001 }, { 0x1001, 0x2500 } }, true }, + + /* two regions, full coverage, split at middle of range */ + { { { 0x0500, 0x1500 }, { 0x1500, 0x2500 } }, true }, + + /* two regions, full coverage, split before end of range */ + { { { 0x0500, 0x1fff }, { 0x1fff, 0x2500 } }, true }, + + /* two regions, full coverage, split after end of range */ + { { { 0x0500, 0x2000 }, { 0x2000, 0x2500 } }, true }, + + /* two regions, missing byte in middle of range */ + { { { 0x0500, 0x14ff }, { 0x1500, 0x2500 } }, false }, + + /* two regions, missing byte after start of range */ + { { { 0x0500, 0x1000 }, { 0x1001, 0x2500 } }, false }, + + /* two regions, missing byte before end of range */ + { { { 0x0500, 0x1fff }, { 0x2000, 0x2500 } }, false }, +}; + +static void run_test(struct test *test) +{ + struct test_region *r; + bool reserved; + + list_head_init(®ions); + + mem_region_init(); + + /* create our reservations */ + for (r = test->regions; r->start; r++) + mem_reserve_fw("r", r->start, r->end - r->start); + + reserved = mem_range_is_reserved(0x1000, 0x1000); + + if (reserved != test->reserved) { + struct mem_region *r; + fprintf(stderr, "test failed; got %s, expected %s\n", + reserved ? "reserved" : "unreserved", + test->reserved ? "reserved" : "unreserved"); + + fprintf(stderr, "reserved regions:\n"); + + list_for_each(®ions, r, list) { + fprintf(stderr, "\t: %08"PRIx64"[%08"PRIx64"] %s\n", + r->start, r->len, r->name); + } + exit(EXIT_FAILURE); + } +} + + +int main(void) +{ + unsigned int i; + void *buf; + + /* Use malloc for the heap, so valgrind can find issues. */ + skiboot_heap.start = (long)real_malloc(TEST_HEAP_SIZE); + skiboot_heap.len = TEST_HEAP_SIZE; + + /* shift the OS reserve area out of the way of our playground */ + skiboot_os_reserve.start = 0x100000; + skiboot_os_reserve.len = 0x1000; + + dt_root = dt_new_root(""); + dt_add_property_cells(dt_root, "#address-cells", 2); + dt_add_property_cells(dt_root, "#size-cells", 2); + + buf = real_malloc(1024*1024); + add_mem_node((unsigned long)buf, 1024*1024); + + for (i = 0; i < ARRAY_SIZE(tests); i++) + run_test(&tests[i]); + + dt_free(dt_root); + real_free(buf); + real_free((void *)(long)skiboot_heap.start); + return 0; +} diff --git a/roms/skiboot/core/test/run-mem_region.c b/roms/skiboot/core/test/run-mem_region.c new file mode 100644 index 000000000..50da8033c --- /dev/null +++ b/roms/skiboot/core/test/run-mem_region.c @@ -0,0 +1,252 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Copyright 2013-2019 IBM Corp. + */ + +#include <config.h> +#include <stdbool.h> +#include <stdint.h> + +/* The lock backtrace structures consume too much room on the skiboot heap */ +#undef DEBUG_LOCKS_BACKTRACE + +#define BITS_PER_LONG (sizeof(long) * 8) + +#include "dummy-cpu.h" + +#include <stdlib.h> +#include <string.h> + +/* Use these before we override definitions below. */ +static void *real_malloc(size_t size) +{ + return malloc(size); +} + +static inline void real_free(void *p) +{ + return free(p); +} + +#undef malloc +#undef free +#undef realloc + +#include <skiboot.h> + +#define is_rodata(p) true + +#include "../mem_region.c" +#include "../malloc.c" +#include "../device.c" + +#include <assert.h> +#include <stdio.h> + +struct dt_node *dt_root; +enum proc_chip_quirks proc_chip_quirks; + +void lock_caller(struct lock *l, const char *caller) +{ + (void)caller; + assert(!l->lock_val); + l->lock_val++; +} + +void unlock(struct lock *l) +{ + assert(l->lock_val); + l->lock_val--; +} + +bool lock_held_by_me(struct lock *l) +{ + return l->lock_val; +} + +#define TEST_HEAP_ORDER 16 +#define TEST_HEAP_SIZE (1ULL << TEST_HEAP_ORDER) + +static bool heap_empty(void) +{ + const struct alloc_hdr *h = region_start(&skiboot_heap); + return h->num_longs == skiboot_heap.len / sizeof(long); +} + +int main(void) +{ + char *test_heap; + void *p, *ptrs[100]; + size_t i; + struct mem_region *r; + + /* Use malloc for the heap, so valgrind can find issues. */ + test_heap = real_malloc(TEST_HEAP_SIZE); + skiboot_heap.start = (unsigned long)test_heap; + skiboot_heap.len = TEST_HEAP_SIZE; + + lock(&skiboot_heap.free_list_lock); + + /* Allocations of various sizes. */ + for (i = 0; i < TEST_HEAP_ORDER; i++) { + p = mem_alloc(&skiboot_heap, 1ULL << i, 1, "here"); + assert(p); + assert(mem_check(&skiboot_heap)); + assert(!strcmp(((struct alloc_hdr *)p)[-1].location, "here")); + assert(p > (void *)test_heap); + assert(p + (1ULL << i) <= (void *)test_heap + TEST_HEAP_SIZE); + assert(mem_allocated_size(p) >= 1ULL << i); + mem_free(&skiboot_heap, p, "freed"); + assert(heap_empty()); + assert(mem_check(&skiboot_heap)); + assert(!strcmp(((struct alloc_hdr *)p)[-1].location, "freed")); + } + p = mem_alloc(&skiboot_heap, 1ULL << i, 1, "here"); + assert(!p); + mem_free(&skiboot_heap, p, "freed"); + assert(heap_empty()); + assert(mem_check(&skiboot_heap)); + + /* Allocations of various alignments: use small alloc first. */ + ptrs[0] = mem_alloc(&skiboot_heap, 1, 1, "small"); + for (i = 0; ; i++) { + p = mem_alloc(&skiboot_heap, 1, 1ULL << i, "here"); + assert(mem_check(&skiboot_heap)); + /* We will eventually fail... */ + if (!p) { + assert(i >= TEST_HEAP_ORDER); + break; + } + assert(p); + assert((long)p % (1ULL << i) == 0); + assert(p > (void *)test_heap); + assert(p + 1 <= (void *)test_heap + TEST_HEAP_SIZE); + mem_free(&skiboot_heap, p, "freed"); + assert(mem_check(&skiboot_heap)); + } + mem_free(&skiboot_heap, ptrs[0], "small freed"); + assert(heap_empty()); + assert(mem_check(&skiboot_heap)); + + /* Many little allocations, freed in reverse order. */ + for (i = 0; i < 100; i++) { + ptrs[i] = mem_alloc(&skiboot_heap, sizeof(long), 1, "here"); + assert(ptrs[i]); + assert(ptrs[i] > (void *)test_heap); + assert(ptrs[i] + sizeof(long) + <= (void *)test_heap + TEST_HEAP_SIZE); + assert(mem_check(&skiboot_heap)); + } + mem_dump_free(); + for (i = 0; i < 100; i++) + mem_free(&skiboot_heap, ptrs[100 - 1 - i], "freed"); + + assert(heap_empty()); + assert(mem_check(&skiboot_heap)); + + /* Check the prev_free gets updated properly. */ + ptrs[0] = mem_alloc(&skiboot_heap, sizeof(long), 1, "ptrs[0]"); + ptrs[1] = mem_alloc(&skiboot_heap, sizeof(long), 1, "ptrs[1]"); + assert(ptrs[1] > ptrs[0]); + mem_free(&skiboot_heap, ptrs[0], "ptrs[0] free"); + assert(mem_check(&skiboot_heap)); + ptrs[0] = mem_alloc(&skiboot_heap, sizeof(long), 1, "ptrs[0] again"); + assert(mem_check(&skiboot_heap)); + mem_free(&skiboot_heap, ptrs[1], "ptrs[1] free"); + mem_free(&skiboot_heap, ptrs[0], "ptrs[0] free"); + assert(mem_check(&skiboot_heap)); + assert(heap_empty()); + +#if 0 + printf("Heap map:\n"); + for (i = 0; i < TEST_HEAP_SIZE / sizeof(long); i++) { + printf("%u", test_bit(skiboot_heap.bitmap, i)); + if (i % 64 == 63) + printf("\n"); + else if (i % 8 == 7) + printf(" "); + } +#endif + + /* Simple enlargement, then free */ + p = mem_alloc(&skiboot_heap, 1, 1, "one byte"); + assert(p); + assert(mem_resize(&skiboot_heap, p, 100, "hundred bytes")); + assert(mem_allocated_size(p) >= 100); + assert(mem_check(&skiboot_heap)); + assert(!strcmp(((struct alloc_hdr *)p)[-1].location, "hundred bytes")); + mem_free(&skiboot_heap, p, "freed"); + + /* Simple shrink, then free */ + p = mem_alloc(&skiboot_heap, 100, 1, "100 bytes"); + assert(p); + assert(mem_resize(&skiboot_heap, p, 1, "1 byte")); + assert(mem_allocated_size(p) < 100); + assert(mem_check(&skiboot_heap)); + assert(!strcmp(((struct alloc_hdr *)p)[-1].location, "1 byte")); + mem_free(&skiboot_heap, p, "freed"); + + /* Lots of resizing (enlarge). */ + p = mem_alloc(&skiboot_heap, 1, 1, "one byte"); + assert(p); + for (i = 1; i <= TEST_HEAP_SIZE - sizeof(struct alloc_hdr); i++) { + assert(mem_resize(&skiboot_heap, p, i, "enlarge")); + assert(mem_allocated_size(p) >= i); + assert(mem_check(&skiboot_heap)); + } + + /* Can't make it larger though. */ + assert(!mem_resize(&skiboot_heap, p, i, "enlarge")); + + for (i = TEST_HEAP_SIZE - sizeof(struct alloc_hdr); i > 0; i--) { + assert(mem_resize(&skiboot_heap, p, i, "shrink")); + assert(mem_check(&skiboot_heap)); + } + + mem_free(&skiboot_heap, p, "freed"); + assert(mem_check(&skiboot_heap)); + + unlock(&skiboot_heap.free_list_lock); + + /* lock the regions list */ + lock(&mem_region_lock); + /* Test splitting of a region. */ + r = new_region("base", (unsigned long)test_heap, + TEST_HEAP_SIZE, NULL, REGION_SKIBOOT_HEAP); + assert(add_region(r)); + r = new_region("splitter", (unsigned long)test_heap + TEST_HEAP_SIZE/4, + TEST_HEAP_SIZE/2, NULL, REGION_RESERVED); + assert(add_region(r)); + /* Now we should have *three* regions. */ + i = 0; + list_for_each(®ions, r, list) { + if (region_start(r) == test_heap) { + assert(r->len == TEST_HEAP_SIZE/4); + assert(strcmp(r->name, "base") == 0); + assert(r->type == REGION_SKIBOOT_HEAP); + } else if (region_start(r) == test_heap + TEST_HEAP_SIZE / 4) { + assert(r->len == TEST_HEAP_SIZE/2); + assert(strcmp(r->name, "splitter") == 0); + assert(r->type == REGION_RESERVED); + assert(!r->free_list.n.next); + } else if (region_start(r) == test_heap + TEST_HEAP_SIZE/4*3) { + assert(r->len == TEST_HEAP_SIZE/4); + assert(strcmp(r->name, "base") == 0); + assert(r->type == REGION_SKIBOOT_HEAP); + } else + abort(); + assert(mem_check(r)); + i++; + } + mem_dump_free(); + assert(i == 3); + while ((r = list_pop(®ions, struct mem_region, list)) != NULL) { + lock(&skiboot_heap.free_list_lock); + mem_free(&skiboot_heap, r, __location__); + unlock(&skiboot_heap.free_list_lock); + } + unlock(&mem_region_lock); + assert(skiboot_heap.free_list_lock.lock_val == 0); + real_free(test_heap); + return 0; +} diff --git a/roms/skiboot/core/test/run-mem_region_init.c b/roms/skiboot/core/test/run-mem_region_init.c new file mode 100644 index 000000000..e96282de8 --- /dev/null +++ b/roms/skiboot/core/test/run-mem_region_init.c @@ -0,0 +1,175 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Copyright 2013-2018 IBM Corp. + */ + +#include <config.h> + +#define BITS_PER_LONG (sizeof(long) * 8) + +#include "dummy-cpu.h" + +#include <stdlib.h> + +/* Use these before we undefine them below. */ +static inline void *real_malloc(size_t size) +{ + return malloc(size); +} + +static inline void real_free(void *p) +{ + return free(p); +} + +#include "../malloc.c" + +#include <skiboot.h> +/* We need mem_region to accept __location__ */ +#define is_rodata(p) true +#include "../mem_region.c" + +/* But we need device tree to make copies of names. */ +#undef is_rodata +#define is_rodata(p) false + +static inline char *skiboot_strdup(const char *str) +{ + char *ret = __malloc(strlen(str) + 1, ""); + return memcpy(ret, str, strlen(str) + 1); +} +#undef strdup +#define strdup skiboot_strdup + +#include "../device.c" + +#include <skiboot.h> + +#include <assert.h> +#include <stdio.h> + +enum proc_chip_quirks proc_chip_quirks; + +void lock_caller(struct lock *l, const char *caller) +{ + (void)caller; + assert(!l->lock_val); + l->lock_val = 1; +} + +void unlock(struct lock *l) +{ + assert(l->lock_val); + l->lock_val = 0; +} + +bool lock_held_by_me(struct lock *l) +{ + return l->lock_val; +} + +/* We actually need a lot of room for the bitmaps! */ +#define TEST_HEAP_ORDER 27 +#define TEST_HEAP_SIZE (1ULL << TEST_HEAP_ORDER) + +static void add_mem_node(uint64_t start, uint64_t len) +{ + struct dt_node *mem; + u64 reg[2]; + char *name= (char*)malloc(sizeof("memory@") + STR_MAX_CHARS(reg[0])); + + assert(name); + + /* reg contains start and length */ + reg[0] = cpu_to_be64(start); + reg[1] = cpu_to_be64(len); + + sprintf(name, "memory@%llx", (unsigned long long)start); + + mem = dt_new(dt_root, name); + assert(mem); + dt_add_property_string(mem, "device_type", "memory"); + dt_add_property(mem, "reg", reg, sizeof(reg)); + free(name); +} + +void add_chip_dev_associativity(struct dt_node *dev __attribute__((unused))) +{ +} + +int main(void) +{ + uint64_t end; + int builtins; + struct mem_region *r; + char *heap = real_malloc(TEST_HEAP_SIZE); + + /* Use malloc for the heap, so valgrind can find issues. */ + skiboot_heap.start = (unsigned long)heap; + skiboot_heap.len = TEST_HEAP_SIZE; + skiboot_os_reserve.len = 16384; + + dt_root = dt_new_root(""); + dt_add_property_cells(dt_root, "#address-cells", 2); + dt_add_property_cells(dt_root, "#size-cells", 2); + + /* Make sure we overlap the heap, at least. */ + add_mem_node(0, (uint64_t)(heap + 0x100000000ULL)); + add_mem_node((uint64_t)heap+0x100000000ULL , 0x100000000ULL); + end = (uint64_t)(heap+ 0x100000000ULL + 0x100000000ULL); + + /* Now convert. */ + mem_region_init(); + mem_dump_allocs(); + assert(mem_check(&skiboot_heap)); + + builtins = 0; + list_for_each(®ions, r, list) { + /* Regions must not overlap. */ + struct mem_region *r2, *pre = NULL, *post = NULL; + list_for_each(®ions, r2, list) { + if (r == r2) + continue; + assert(!overlaps(r, r2)); + } + + /* But should have exact neighbours. */ + list_for_each(®ions, r2, list) { + if (r == r2) + continue; + if (r2->start == r->start + r->len) + post = r2; + if (r2->start + r2->len == r->start) + pre = r2; + } + assert(r->start == 0 || pre); + assert(r->start + r->len == end || post); + + if (r == &skiboot_code_and_text || + r == &skiboot_heap || + r == &skiboot_after_heap || + r == &skiboot_cpu_stacks || + r == &skiboot_os_reserve) + builtins++; + else + assert(r->type == REGION_MEMORY); + assert(mem_check(r)); + } + assert(builtins == 5); + + dt_free(dt_root); + + while ((r = list_pop(®ions, struct mem_region, list)) != NULL) { + if (r != &skiboot_code_and_text && + r != &skiboot_heap && + r != &skiboot_after_heap && + r != &skiboot_os_reserve && + r != &skiboot_cpu_stacks) { + free(r); + } + assert(mem_check(&skiboot_heap)); + } + assert(skiboot_heap.free_list_lock.lock_val == 0); + real_free(heap); + return 0; +} diff --git a/roms/skiboot/core/test/run-mem_region_next.c b/roms/skiboot/core/test/run-mem_region_next.c new file mode 100644 index 000000000..4f2f73c55 --- /dev/null +++ b/roms/skiboot/core/test/run-mem_region_next.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Copyright 2015-2018 IBM Corp. + */ + +#include <config.h> + +#define BITS_PER_LONG (sizeof(long) * 8) + +#include "dummy-cpu.h" + +#include <stdlib.h> +#include <string.h> + +/* Use these before we override definitions below. */ +static void *real_malloc(size_t size) +{ + return malloc(size); +} + +static void real_free(void *p) +{ + return free(p); +} + +#undef malloc +#undef free + +#include <skiboot.h> + +#define is_rodata(p) true + +#include "../mem_region.c" +#include "../malloc.c" +#include "../device.c" + +#include <assert.h> +#include <stdio.h> + +enum proc_chip_quirks proc_chip_quirks; + +void lock_caller(struct lock *l, const char *caller) +{ + (void)caller; + assert(!l->lock_val); + l->lock_val++; +} + +void unlock(struct lock *l) +{ + assert(l->lock_val); + l->lock_val--; +} + +bool lock_held_by_me(struct lock *l) +{ + return l->lock_val; +} + + +#define TEST_HEAP_ORDER 16 +#define TEST_HEAP_SIZE (1ULL << TEST_HEAP_ORDER) + +int main(void) +{ + struct mem_region *r; + char *test_heap; + + /* Use malloc for the heap, so valgrind can find issues. */ + test_heap = real_malloc(TEST_HEAP_SIZE); + skiboot_heap.start = (unsigned long)test_heap; + skiboot_heap.len = TEST_HEAP_SIZE; + + lock(&mem_region_lock); + + /* empty regions */ + r = mem_region_next(NULL); + assert(!r); + + r = new_region("test.1", 0x1000, 0x1000, NULL, REGION_RESERVED); + assert(add_region(r)); + r = new_region("test.2", 0x2000, 0x1000, NULL, REGION_RESERVED); + assert(add_region(r)); + mem_regions_finalised = true; + + r = mem_region_next(NULL); + assert(r); + assert(r->start == 0x1000); + assert(r->len == 0x1000); + assert(r->type == REGION_RESERVED); + + r = mem_region_next(r); + assert(r); + assert(r->start == 0x2000); + assert(r->len == 0x1000); + assert(r->type == REGION_RESERVED); + + r = mem_region_next(r); + assert(!r); + + unlock(&mem_region_lock); + real_free(test_heap); + + return 0; +} diff --git a/roms/skiboot/core/test/run-mem_region_release_unused.c b/roms/skiboot/core/test/run-mem_region_release_unused.c new file mode 100644 index 000000000..463f54283 --- /dev/null +++ b/roms/skiboot/core/test/run-mem_region_release_unused.c @@ -0,0 +1,177 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Copyright 2013-2018 IBM Corp. + */ + +#include <config.h> + +#define BITS_PER_LONG (sizeof(long) * 8) + +#include "dummy-cpu.h" + +#include <stdlib.h> + +static void *__malloc(size_t size, const char *location __attribute__((unused))) +{ + return malloc(size); +} + +static void *__realloc(void *ptr, size_t size, const char *location __attribute__((unused))) +{ + return realloc(ptr, size); +} + +static void *__zalloc(size_t size, const char *location __attribute__((unused))) +{ + return calloc(size, 1); +} + +static inline void __free(void *p, const char *location __attribute__((unused))) +{ + return free(p); +} + +#include <skiboot.h> + +/* We need mem_region to accept __location__ */ +#define is_rodata(p) true +#include "../mem_region.c" + +/* But we need device tree to make copies of names. */ +#undef is_rodata +#define is_rodata(p) false + +#include "../device.c" +#include <assert.h> +#include <stdio.h> + +enum proc_chip_quirks proc_chip_quirks; + +void lock_caller(struct lock *l, const char *caller) +{ + (void)caller; + l->lock_val++; +} + +void unlock(struct lock *l) +{ + l->lock_val--; +} + +bool lock_held_by_me(struct lock *l) +{ + return l->lock_val; +} + +#define TEST_HEAP_ORDER 16 +#define TEST_HEAP_SIZE (1ULL << TEST_HEAP_ORDER) + +static void add_mem_node(uint64_t start, uint64_t len) +{ + struct dt_node *mem; + u64 reg[2]; + char *name; + + name = (char*)malloc(sizeof("memory@") + STR_MAX_CHARS(reg[0])); + assert(name); + + /* reg contains start and length */ + reg[0] = cpu_to_be64(start); + reg[1] = cpu_to_be64(len); + + sprintf(name, "memory@%llx", (long long)start); + + mem = dt_new(dt_root, name); + dt_add_property_string(mem, "device_type", "memory"); + dt_add_property(mem, "reg", reg, sizeof(reg)); + free(name); +} + +void add_chip_dev_associativity(struct dt_node *dev __attribute__((unused))) +{ +} + +int main(void) +{ + uint64_t i; + struct mem_region *r, *other = NULL; + void *other_mem; + const char *last; + + /* Use malloc for the heap, so valgrind can find issues. */ + skiboot_heap.start = (unsigned long)malloc(TEST_HEAP_SIZE); + skiboot_heap.len = TEST_HEAP_SIZE; + skiboot_os_reserve.len = 0; + + dt_root = dt_new_root(""); + dt_add_property_cells(dt_root, "#address-cells", 2); + dt_add_property_cells(dt_root, "#size-cells", 2); + + other_mem = malloc(1024*1024); + add_mem_node((unsigned long)other_mem, 1024*1024); + + /* Now convert. */ + mem_region_init(); + + /* Find our node to allocate from */ + list_for_each(®ions, r, list) { + if (region_start(r) == other_mem) + other = r; + } + /* This could happen if skiboot addresses clashed with our alloc. */ + assert(other); + assert(mem_check(other)); + + /* Allocate 1k from other region. */ + lock(&other->free_list_lock); + mem_alloc(other, 1024, 1, "1k"); + unlock(&other->free_list_lock); + + mem_region_release_unused(); + + assert(mem_check(&skiboot_heap)); + + /* Now we expect it to be split. */ + i = 0; + list_for_each(®ions, r, list) { + assert(mem_check(r)); + i++; + if (r == &skiboot_os_reserve) + continue; + if (r == &skiboot_code_and_text) + continue; + if (r == &skiboot_heap) + continue; + if (r == &skiboot_after_heap) + continue; + if (r == &skiboot_cpu_stacks) + continue; + if (r == other) { + assert(r->type == REGION_MEMORY); + assert(r->len < 1024 * 1024); + } else { + assert(r->type == REGION_OS); + assert(r->start == other->start + other->len); + assert(r->start + r->len == other->start + 1024*1024); + } + } + assert(i == 7); + + last = NULL; + list_for_each(®ions, r, list) { + if (last != r->name && + strncmp(r->name, NODE_REGION_PREFIX, + strlen(NODE_REGION_PREFIX)) == 0) { + /* It's safe to cast away const as this is + * only going to happen in test code */ + free((void*)r->name); + break; + } + last = r->name; + } + + dt_free(dt_root); + free((void *)(long)skiboot_heap.start); + free(other_mem); + return 0; +} diff --git a/roms/skiboot/core/test/run-mem_region_release_unused_noalloc.c b/roms/skiboot/core/test/run-mem_region_release_unused_noalloc.c new file mode 100644 index 000000000..d7adc5a9a --- /dev/null +++ b/roms/skiboot/core/test/run-mem_region_release_unused_noalloc.c @@ -0,0 +1,156 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Copyright 2013-2018 IBM Corp. + */ + +#include <config.h> + +#define BITS_PER_LONG (sizeof(long) * 8) + +#include "dummy-cpu.h" + +#include <stdlib.h> + +static void *__malloc(size_t size, const char *location __attribute__((unused))) +{ + return malloc(size); +} + +static void *__realloc(void *ptr, size_t size, const char *location __attribute__((unused))) +{ + return realloc(ptr, size); +} + +static void *__zalloc(size_t size, const char *location __attribute__((unused))) +{ + return calloc(size, 1); +} + +static inline void __free(void *p, const char *location __attribute__((unused))) +{ + return free(p); +} + +#include <skiboot.h> + +/* We need mem_region to accept __location__ */ +#define is_rodata(p) true +#include "../mem_region.c" + +/* But we need device tree to make copies of names. */ +#undef is_rodata +#define is_rodata(p) false + +#include "../device.c" +#include <assert.h> +#include <stdio.h> + +enum proc_chip_quirks proc_chip_quirks; + +void lock_caller(struct lock *l, const char *caller) +{ + (void)caller; + l->lock_val++; +} + +void unlock(struct lock *l) +{ + l->lock_val--; +} + +bool lock_held_by_me(struct lock *l) +{ + return l->lock_val; +} + +#define TEST_HEAP_ORDER 16 +#define TEST_HEAP_SIZE (1ULL << TEST_HEAP_ORDER) + +static void add_mem_node(uint64_t start, uint64_t len) +{ + struct dt_node *mem; + u64 reg[2]; + char *name; + + name = (char*)malloc(sizeof("memory@") + STR_MAX_CHARS(reg[0])); + assert(name); + + /* reg contains start and length */ + reg[0] = cpu_to_be64(start); + reg[1] = cpu_to_be64(len); + + sprintf(name, "memory@%llx", (long long)start); + + mem = dt_new(dt_root, name); + dt_add_property_string(mem, "device_type", "memory"); + dt_add_property(mem, "reg", reg, sizeof(reg)); + free(name); +} + +void add_chip_dev_associativity(struct dt_node *dev __attribute__((unused))) +{ +} + +int main(void) +{ + uint64_t i; + struct mem_region *r; + const char *last; + + /* Use malloc for the heap, so valgrind can find issues. */ + skiboot_heap.start = 0; + skiboot_heap.len = TEST_HEAP_SIZE; + skiboot_os_reserve.start = 0; + skiboot_os_reserve.len = 0; + + dt_root = dt_new_root(""); + dt_add_property_cells(dt_root, "#address-cells", 2); + dt_add_property_cells(dt_root, "#size-cells", 2); + + add_mem_node(0, 0x100000000ULL); + add_mem_node(0x100000000ULL, 0x100000000ULL); + + mem_region_init(); + + mem_region_release_unused(); + + assert(mem_check(&skiboot_heap)); + + /* Now we expect it to be split. */ + i = 0; + list_for_each(®ions, r, list) { + assert(mem_check(r)); + i++; + if (r == &skiboot_os_reserve) + continue; + if (r == &skiboot_code_and_text) + continue; + if (r == &skiboot_heap) + continue; + if (r == &skiboot_after_heap) + continue; + if (r == &skiboot_cpu_stacks) + continue; + + /* the memory nodes should all be available to the OS now */ + assert(r->type == REGION_OS); + } + assert(i == 9); + + last = NULL; + list_for_each(®ions, r, list) { + if (last != r->name && + strncmp(r->name, NODE_REGION_PREFIX, + strlen(NODE_REGION_PREFIX)) == 0) { + /* It's safe to cast away the const as + * this never happens at runtime, + * only in test and only for valgrind + */ + free((void*)r->name); + last = r->name; + } + } + + dt_free(dt_root); + return 0; +} diff --git a/roms/skiboot/core/test/run-mem_region_reservations.c b/roms/skiboot/core/test/run-mem_region_reservations.c new file mode 100644 index 000000000..c24652f41 --- /dev/null +++ b/roms/skiboot/core/test/run-mem_region_reservations.c @@ -0,0 +1,228 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Copyright 2013-2018 IBM Corp. + */ + +#include <config.h> + +#define BITS_PER_LONG (sizeof(long) * 8) + +#include "dummy-cpu.h" + +#include <stdlib.h> + +static void *real_malloc(size_t size) +{ + return malloc(size); +} + +static void real_free(void *p) +{ + return free(p); +} + +#undef malloc +#undef free +#undef realloc + +#include <skiboot.h> +#include <mem_region-malloc.h> + +/* We need mem_region to accept __location__ */ +#define is_rodata(p) true +#include "../mem_region.c" +#include "../malloc.c" + +/* But we need device tree to make copies of names. */ +#undef is_rodata +#define is_rodata(p) false +#include "../../libc/string/strdup.c" + +#include "../device.c" +#include <assert.h> +#include <stdio.h> + +enum proc_chip_quirks proc_chip_quirks; + +void lock_caller(struct lock *l, const char *caller) +{ + (void)caller; + assert(!l->lock_val); + l->lock_val++; +} + +void unlock(struct lock *l) +{ + assert(l->lock_val); + l->lock_val--; +} + +bool lock_held_by_me(struct lock *l) +{ + return l->lock_val; +} + +#define TEST_HEAP_ORDER 16 +#define TEST_HEAP_SIZE (1ULL << TEST_HEAP_ORDER) + +static void add_mem_node(uint64_t start, uint64_t len) +{ + struct dt_node *mem; + u64 reg[2]; + char *name; + + name = (char*)malloc(sizeof("memory@") + STR_MAX_CHARS(reg[0])); + assert(name); + + /* reg contains start and length */ + reg[0] = cpu_to_be64(start); + reg[1] = cpu_to_be64(len); + + sprintf(name, "memory@%llx", (long long)start); + + mem = dt_new(dt_root, name); + dt_add_property_string(mem, "device_type", "memory"); + dt_add_property(mem, "reg", reg, sizeof(reg)); + free(name); +} + +void add_chip_dev_associativity(struct dt_node *dev __attribute__((unused))) +{ +} + +static struct { + const char *name; + uint64_t addr; + bool found; +} test_regions[] = { + { "test.1", 0x1000, false }, + { "test.2", 0x2000, false }, + { "test.3", 0x4000, false }, +}; + +static void check_property_reservations(void) +{ + const struct dt_property *names, *ranges; + unsigned int i, l; + const char *name; + uint64_t *rangep; + const char *at; + + /* check dt properties */ + names = dt_find_property(dt_root, "reserved-names"); + ranges = dt_find_property(dt_root, "reserved-ranges"); + + assert(names && ranges); + + /* walk through names & ranges properies, ensuring that the test + * regions are all present */ + for (name = names->prop, rangep = (uint64_t *)ranges->prop; + name < names->prop + names->len; + name += l, rangep += 2) { + uint64_t addr; + + addr = dt_get_number(rangep, 2); + l = strlen(name) + 1; + + for (i = 0; i < ARRAY_SIZE(test_regions); i++) { + at = strchr(name, '@'); + if (strncmp(test_regions[i].name, name, + at ? at-name: strlen(name))) + continue; + assert(test_regions[i].addr == addr); + assert(!test_regions[i].found); + test_regions[i].found = true; + } + } + + for (i = 0; i < ARRAY_SIZE(test_regions); i++) { + assert(test_regions[i].found); + test_regions[i].found = false; + } +} + +static void check_node_reservations(void) +{ + struct dt_node *parent, *node; + unsigned int i; + + parent = dt_find_by_name(dt_root, "reserved-memory"); + assert(parent); + + assert(dt_prop_get_cell(parent, "#address-cells", 0) == 2); + assert(dt_prop_get_cell(parent, "#size-cells", 0) == 2); + dt_require_property(parent, "ranges", 0); + + dt_for_each_child(parent, node) { + uint64_t addr, size; + + addr = dt_get_address(node, 0, &size); + + for (i = 0; i < ARRAY_SIZE(test_regions); i++) { + if (strncmp(test_regions[i].name, node->name, + strlen(test_regions[i].name))) + continue; + + assert(!test_regions[i].found); + assert(test_regions[i].addr == addr); + assert(size == 0x1000); + test_regions[i].found = true; + } + } + + for (i = 0; i < ARRAY_SIZE(test_regions); i++) { + assert(test_regions[i].found); + test_regions[i].found = false; + } +} + +int main(void) +{ + struct mem_region *r; + unsigned int i; + void *buf; + + /* Use malloc for the heap, so valgrind can find issues. */ + skiboot_heap.start = (long)real_malloc(TEST_HEAP_SIZE); + skiboot_heap.len = TEST_HEAP_SIZE; + skiboot_os_reserve.len = skiboot_heap.start; + + dt_root = dt_new_root(""); + dt_add_property_cells(dt_root, "#address-cells", 2); + dt_add_property_cells(dt_root, "#size-cells", 2); + + buf = real_malloc(1024*1024); + add_mem_node((unsigned long)buf, 1024*1024); + + /* add pre-init reservations */ + for (i = 0; i < ARRAY_SIZE(test_regions); i++) + mem_reserve_fw(test_regions[i].name, + test_regions[i].addr, 0x1000); + + /* Now convert. */ + mem_region_init(); + + /* add a post-init reservation */ + mem_reserve_fw("test.4", 0x5000, 0x1000); + + /* release unused */ + mem_region_release_unused(); + + /* and create reservations */ + mem_region_add_dt_reserved(); + + /* ensure we can't create further reservations */ + r = new_region("test.5", 0x5000, 0x1000, NULL, REGION_RESERVED); + assert(!add_region(r)); + + /* check old property-style reservations */ + check_property_reservations(); + + /* and new node-style reservations */ + check_node_reservations(); + + dt_free(dt_root); + real_free(buf); + real_free((void *)(long)skiboot_heap.start); + return 0; +} diff --git a/roms/skiboot/core/test/run-msg.c b/roms/skiboot/core/test/run-msg.c new file mode 100644 index 000000000..3659a12d7 --- /dev/null +++ b/roms/skiboot/core/test/run-msg.c @@ -0,0 +1,281 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Copyright 2013-2019 IBM Corp. + */ + +#include <inttypes.h> +#include <stdbool.h> +#include <stddef.h> +#include <assert.h> +#include <errno.h> +#include <stdlib.h> + +static bool zalloc_should_fail = false; +static int zalloc_should_fail_after = 0; + +/* Fake top_of_ram -- needed for API's */ +unsigned long top_of_ram = 0xffffffffffffffffULL; + +static void *zalloc(size_t size) +{ + if (zalloc_should_fail && zalloc_should_fail_after == 0) { + errno = ENOMEM; + return NULL; + } + if (zalloc_should_fail_after > 0) + zalloc_should_fail_after--; + + return calloc(size, 1); +} + +#include "../opal-msg.c" +#include <skiboot.h> + +void lock_caller(struct lock *l, const char *caller) +{ + (void)caller; + assert(!l->lock_val); + l->lock_val = 1; +} + +void unlock(struct lock *l) +{ + assert(l->lock_val); + l->lock_val = 0; +} + +void opal_update_pending_evt(uint64_t evt_mask, uint64_t evt_values) +{ + (void)evt_mask; + (void)evt_values; +} + +static long magic = 8097883813087437089UL; +static void callback(void *data, int status) +{ + assert((status == OPAL_SUCCESS || status == OPAL_PARTIAL)); + assert(*(uint64_t *)data == magic); +} + +static size_t list_count(struct list_head *list) +{ + size_t count = 0; + struct opal_msg_entry *dummy; + + list_for_each(list, dummy, link) + count++; + return count; +} + +int main(void) +{ + struct opal_msg_entry* entry; + int free_size = OPAL_MAX_MSGS; + int nfree = free_size; + int npending = 0; + int r; + static struct opal_msg m; + uint64_t *m_ptr = (uint64_t *)&m; + + zalloc_should_fail = true; + zalloc_should_fail_after = 3; + opal_init_msg(); + + zalloc_should_fail = false; + opal_init_msg(); + + assert(list_count(&msg_pending_list) == npending); + assert(list_count(&msg_free_list) == nfree); + + /* Callback. */ + r = opal_queue_msg(0, &magic, callback, (u64)0, (u64)1, (u64)2); + assert(r == 0); + + assert(list_count(&msg_pending_list) == ++npending); + assert(list_count(&msg_free_list) == --nfree); + + r = opal_get_msg(m_ptr, sizeof(m)); + assert(r == 0); + + assert(m.params[0] == 0); + assert(m.params[1] == 1); + assert(m.params[2] == 2); + + assert(list_count(&msg_pending_list) == --npending); + assert(list_count(&msg_free_list) == ++nfree); + + /* No params. */ + r = opal_queue_msg(0, NULL, NULL); + assert(r == 0); + + assert(list_count(&msg_pending_list) == ++npending); + assert(list_count(&msg_free_list) == --nfree); + + r = opal_get_msg(m_ptr, sizeof(m)); + assert(r == 0); + + assert(list_count(&msg_pending_list) == --npending); + assert(list_count(&msg_free_list) == ++nfree); + + /* > 8 params (ARRAY_SIZE(entry->msg.params) */ + r = opal_queue_msg(0, NULL, NULL, 0, 1, 2, 3, 4, 5, 6, 7, 0xBADDA7A); + assert(r == 0); + + assert(list_count(&msg_pending_list) == ++npending); + assert(list_count(&msg_free_list) == nfree); + + r = opal_get_msg(m_ptr, sizeof(m)); + assert(r == OPAL_PARTIAL); + + assert(list_count(&msg_pending_list) == --npending); + assert(list_count(&msg_free_list) == nfree); + + /* Return OPAL_PARTIAL to callback */ + r = opal_queue_msg(0, &magic, callback, 0, 1, 2, 3, 4, 5, 6, 7, 0xBADDA7A); + assert(r == 0); + + assert(list_count(&msg_pending_list) == ++npending); + assert(list_count(&msg_free_list) == nfree); + + r = opal_get_msg(m_ptr, sizeof(m)); + assert(r == OPAL_PARTIAL); + + assert(list_count(&msg_pending_list) == --npending); + assert(list_count(&msg_free_list) == nfree); + + /* return OPAL_PARAMETER */ + r = _opal_queue_msg(0, NULL, NULL, OPAL_MSG_SIZE, m_ptr); + assert(r == OPAL_PARAMETER); + + assert(m.params[0] == 0); + assert(m.params[1] == 1); + assert(m.params[2] == 2); + assert(m.params[3] == 3); + assert(m.params[4] == 4); + assert(m.params[5] == 5); + assert(m.params[6] == 6); + assert(m.params[7] == 7); + + /* 8 params (ARRAY_SIZE(entry->msg.params) */ + r = opal_queue_msg(0, NULL, NULL, 0, 10, 20, 30, 40, 50, 60, 70); + assert(r == 0); + + assert(list_count(&msg_pending_list) == ++npending); + assert(list_count(&msg_free_list) == --nfree); + + r = opal_get_msg(m_ptr, sizeof(m)); + assert(r == 0); + + assert(list_count(&msg_pending_list) == --npending); + assert(list_count(&msg_free_list) == ++nfree); + + assert(m.params[0] == 0); + assert(m.params[1] == 10); + assert(m.params[2] == 20); + assert(m.params[3] == 30); + assert(m.params[4] == 40); + assert(m.params[5] == 50); + assert(m.params[6] == 60); + assert(m.params[7] == 70); + + /* Full list (no free nodes in pending). */ + while (nfree > 0) { + r = opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL); + assert(r == 0); + assert(list_count(&msg_pending_list) == ++npending); + assert(list_count(&msg_free_list) == --nfree); + } + assert(list_count(&msg_free_list) == 0); + assert(nfree == 0); + assert(npending == OPAL_MAX_MSGS); + + r = opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL); + assert(r == 0); + + assert(list_count(&msg_pending_list) == OPAL_MAX_MSGS+1); + assert(list_count(&msg_pending_list) == ++npending); + assert(list_count(&msg_free_list) == nfree); + + /* Make zalloc fail to test error handling. */ + zalloc_should_fail = true; + r = opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL); + assert(r == OPAL_RESOURCE); + + assert(list_count(&msg_pending_list) == OPAL_MAX_MSGS+1); + assert(list_count(&msg_pending_list) == npending); + assert(list_count(&msg_free_list) == nfree); + + /* Empty list (no nodes). */ + while(!list_empty(&msg_pending_list)) { + r = opal_get_msg(m_ptr, sizeof(m)); + assert(r == 0); + npending--; + nfree++; + } + assert(list_count(&msg_pending_list) == npending); + assert(list_count(&msg_free_list) == nfree); + assert(npending == 0); + assert(nfree == OPAL_MAX_MSGS+1); + + r = opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL); + assert(r == 0); + + assert(list_count(&msg_pending_list) == ++npending); + assert(list_count(&msg_free_list) == --nfree); + + /* Request invalid size. */ + r = opal_get_msg(m_ptr, sizeof(m) - 1); + assert(r == OPAL_PARAMETER); + + /* Pass null buffer. */ + r = opal_get_msg(NULL, sizeof(m)); + assert(r == OPAL_PARAMETER); + + /* Get msg when none are pending. */ + r = opal_get_msg(m_ptr, sizeof(m)); + assert(r == 0); + + r = opal_get_msg(m_ptr, sizeof(m)); + assert(r == OPAL_RESOURCE); + +#define test_queue_num(type, val) \ + r = opal_queue_msg(0, NULL, NULL, \ + (type)val, (type)val, (type)val, (type)val, \ + (type)val, (type)val, (type)val, (type)val); \ + assert(r == 0); \ + opal_get_msg(m_ptr, sizeof(m)); \ + assert(r == OPAL_SUCCESS); \ + assert(m.params[0] == (type)val); \ + assert(m.params[1] == (type)val); \ + assert(m.params[2] == (type)val); \ + assert(m.params[3] == (type)val); \ + assert(m.params[4] == (type)val); \ + assert(m.params[5] == (type)val); \ + assert(m.params[6] == (type)val); \ + assert(m.params[7] == (type)val) + + /* Test types of various widths */ + test_queue_num(u64, -1); + test_queue_num(s64, -1); + test_queue_num(u32, -1); + test_queue_num(s32, -1); + test_queue_num(u16, -1); + test_queue_num(s16, -1); + test_queue_num(u8, -1); + test_queue_num(s8, -1); + + /* Clean up the list to keep valgrind happy. */ + while(!list_empty(&msg_free_list)) { + entry = list_pop(&msg_free_list, struct opal_msg_entry, link); + assert(entry); + free(entry); + } + + while(!list_empty(&msg_pending_list)) { + entry = list_pop(&msg_pending_list, struct opal_msg_entry, link); + assert(entry); + free(entry); + } + + return 0; +} diff --git a/roms/skiboot/core/test/run-nvram-format.c b/roms/skiboot/core/test/run-nvram-format.c new file mode 100644 index 000000000..ba286bea3 --- /dev/null +++ b/roms/skiboot/core/test/run-nvram-format.c @@ -0,0 +1,167 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Copyright 2013-2019 IBM Corp. + */ + +#include <stdlib.h> + +#include "../nvram-format.c" + +bool nvram_wait_for_load(void) +{ + return true; +} + +bool nvram_validate(void) +{ + return true; +} + +bool nvram_has_loaded(void) +{ + return true; +} + +static char *nvram_reset(void *nvram_image, int size) +{ + struct chrp_nvram_hdr *h = nvram_image; + + /* entire partition used by one key */ + assert(nvram_format(nvram_image, size) == 0); + memset((char *) h + sizeof(*h), 0, NVRAM_SIZE_FW_PRIV - sizeof(*h)); + assert(nvram_check(nvram_image, size) == 0); + + return (char *) h + sizeof(*h); +} + +int main(void) +{ + char *nvram_image; + size_t sz; + struct chrp_nvram_hdr *h; + char *data; + const char *result; + + /* 1024 bytes is too small for our NVRAM */ + nvram_image = malloc(1024); + assert(nvram_format(nvram_image, 1024)!=0); + free(nvram_image); + + /* 4096 bytes is too small for our NVRAM */ + nvram_image = malloc(4096); + assert(nvram_format(nvram_image, 4096)!=0); + free(nvram_image); + + /* 64k is too small for our NVRAM */ + nvram_image = malloc(0x10000); + assert(nvram_format(nvram_image, 0x10000)!=0); + free(nvram_image); + + /* 68k is too small for our NVRAM */ + nvram_image = malloc(68*1024); + assert(nvram_format(nvram_image, 68*1024)!=0); + free(nvram_image); + + /* 68k+16 bytes (nvram header) should generate empty free space */ + sz = NVRAM_SIZE_COMMON + NVRAM_SIZE_FW_PRIV + + sizeof(struct chrp_nvram_hdr); + nvram_image = malloc(sz); + assert(nvram_format(nvram_image, sz)==0); + assert(nvram_check(nvram_image, sz)==0); + assert(nvram_image[sz-14]==0); + assert(nvram_image[sz-13]==1); + h = (struct chrp_nvram_hdr*)(&nvram_image[NVRAM_SIZE_COMMON + NVRAM_SIZE_FW_PRIV]); + assert(memcmp(h->name, "wwwwwwwwwwww", 12)==0); + free(nvram_image); + + /* 128k NVRAM check */ + nvram_image = malloc(128*1024); + assert(nvram_format(nvram_image, 128*1024)==0); + assert(nvram_check(nvram_image,128*1024)==0); + + /* Now, we corrupt it */ + nvram_image[0] = 0; + assert(nvram_check(nvram_image,128*1024) != 0); + + /* Does our NUL checking work? */ + assert(nvram_format(nvram_image, 128 * 1024) == 0); + h = (struct chrp_nvram_hdr *) nvram_image; + memset((char *) h + sizeof(*h), 0xFF, be16_to_cpu(h->len) * 16 - sizeof(*h)); + assert(nvram_check(nvram_image, 128 * 1024) != 0); + + assert(nvram_format(nvram_image, 128*1024)==0); + /* corrupt the length of the partition */ + nvram_image[2] = 0; + nvram_image[3] = 0; + assert(nvram_check(nvram_image,128*1024) != 0); + + assert(nvram_format(nvram_image, 128*1024)==0); + /* corrupt the length of the partition */ + nvram_image[2] = 0; + nvram_image[3] = 0; + /* but reset checksum! */ + h = (struct chrp_nvram_hdr*)nvram_image; + h->cksum = chrp_nv_cksum(h); + assert(nvram_check(nvram_image,128*1024) != 0); + + assert(nvram_format(nvram_image, 128*1024)==0); + /* make the length insanely beyond end of nvram */ + nvram_image[2] = 42; + nvram_image[3] = 32; + /* but reset checksum! */ + h = (struct chrp_nvram_hdr*)nvram_image; + h->cksum = chrp_nv_cksum(h); + assert(nvram_check(nvram_image,128*1024) != 0); + + assert(nvram_format(nvram_image, 128*1024)==0); + /* remove skiboot partition */ + nvram_image[12] = '\0'; + /* but reset checksum! */ + h = (struct chrp_nvram_hdr*)nvram_image; + h->cksum = chrp_nv_cksum(h); + assert(nvram_check(nvram_image,128*1024) != 0); + + assert(nvram_format(nvram_image, 128*1024)==0); + /* remove common partition */ + nvram_image[NVRAM_SIZE_FW_PRIV+5] = '\0'; + /* but reset checksum! */ + h = (struct chrp_nvram_hdr*)(&nvram_image[NVRAM_SIZE_FW_PRIV]); + h->cksum = chrp_nv_cksum(h); + assert(nvram_check(nvram_image,128*1024) != 0); + + /* test nvram_query() */ + + /* does an empty partition break us? */ + data = nvram_reset(nvram_image, 128*1024); + assert(nvram_query_safe("test") == NULL); + + /* does a zero length key break us? */ + data = nvram_reset(nvram_image, 128*1024); + data[0] = '='; + assert(nvram_query_safe("test") == NULL); + + /* does a missing = break us? */ + data = nvram_reset(nvram_image, 128*1024); + data[0] = 'a'; + assert(nvram_query_safe("test") == NULL); + + /* does an empty value break us? */ + data = nvram_reset(nvram_image, 128*1024); + data[0] = 'a'; + data[1] = '='; + result = nvram_query_safe("a"); + assert(result); + assert(strlen(result) == 0); + + /* do we trip over malformed keys? */ + data = nvram_reset(nvram_image, 128*1024); +#define TEST_1 "a\0a=\0test=test\0" + memcpy(data, TEST_1, sizeof(TEST_1)); + result = nvram_query_safe("test"); + assert(result); + assert(strcmp(result, "test") == 0); + + free(nvram_image); + + return 0; +} diff --git a/roms/skiboot/core/test/run-pci-quirk.c b/roms/skiboot/core/test/run-pci-quirk.c new file mode 100644 index 000000000..fd4d95c10 --- /dev/null +++ b/roms/skiboot/core/test/run-pci-quirk.c @@ -0,0 +1,98 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Copyright 2018 IBM Corp + */ + +#include <assert.h> +#include <stdint.h> +#include <compiler.h> +#include <stdbool.h> + +/* Stubs for quirk_astbmc_vga() */ + +struct dt_property; +struct dt_node; + +static struct bmc_platform fake_bmc; +const struct bmc_platform *bmc_platform = &fake_bmc; + +static int ast_sio_is_enabled(void) +{ + return 0; +} + +static uint32_t ast_ahb_readl(uint32_t reg) +{ + return reg; +} + +static struct dt_property *__dt_add_property_cells( + struct dt_node *node __unused, const char *name __unused, + int count __unused, ...) +{ + return (void *)0; +} + +struct pci_device; +struct pci_cfg_reg_filter; +typedef int64_t (*pci_cfg_reg_func)(void *dev, + struct pci_cfg_reg_filter *pcrf, + uint32_t offset, uint32_t len, + uint32_t *data, bool write); + + +static struct pci_cfg_reg_filter *pci_add_cfg_reg_filter( + struct pci_device *pd __unused, + uint32_t start __unused, + uint32_t len __unused, + uint32_t flags __unused, + pci_cfg_reg_func func __unused) +{ + return NULL; +} + +#include "../pci-quirk.c" + +struct pci_device test_pd; +int test_fixup_ran; + +static void test_fixup(struct phb *phb __unused, struct pci_device *pd __unused) +{ + assert(PCI_VENDOR_ID(pd->vdid) == 0x1a03); + assert(PCI_DEVICE_ID(pd->vdid) == 0x2000); + test_fixup_ran = 1; +} + +/* Quirks are: {fixup function, vendor ID, (device ID or PCI_ANY_ID)} */ +static const struct pci_quirk test_quirk_table[] = { + /* ASPEED 2400 VGA device */ + { 0x1a03, 0x2000, &test_fixup }, + { 0, 0, NULL } +}; + +#define PCI_COMPOSE_VDID(vendor, device) (((device) << 16) | (vendor)) + +int main(void) +{ + /* Unrecognised vendor and device ID */ + test_pd.vdid = PCI_COMPOSE_VDID(0xabcd, 0xef01); + __pci_handle_quirk(NULL, &test_pd, test_quirk_table); + assert(test_fixup_ran == 0); + + /* Unrecognised vendor ID, matching device ID */ + test_pd.vdid = PCI_COMPOSE_VDID(0xabcd, 0x2000); + __pci_handle_quirk(NULL, &test_pd, test_quirk_table); + assert(test_fixup_ran == 0); + + /* Matching vendor ID, unrecognised device ID */ + test_pd.vdid = PCI_COMPOSE_VDID(0x1a03, 0xef01); + __pci_handle_quirk(NULL, &test_pd, test_quirk_table); + assert(test_fixup_ran == 0); + + /* Matching vendor and device ID */ + test_pd.vdid = PCI_COMPOSE_VDID(0x1a03, 0x2000); + __pci_handle_quirk(NULL, &test_pd, test_quirk_table); + assert(test_fixup_ran == 1); + + return 0; +} diff --git a/roms/skiboot/core/test/run-pel.c b/roms/skiboot/core/test/run-pel.c new file mode 100644 index 000000000..812c8996c --- /dev/null +++ b/roms/skiboot/core/test/run-pel.c @@ -0,0 +1,120 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Test for our PEL record generation. Currently this doesn't actually + * test that the records we generate are correct, but it at least lets + * us run valgrind over the generation routines to check for buffer + * overflows, etc. + * + * Copyright 2013-2016 IBM Corp. + */ + +#include <skiboot.h> +#include <inttypes.h> +#include <assert.h> +#include <pel.h> +#include <errorlog.h> +#include <device.h> + +#define TEST_ERROR 0x1234 +#define TEST_SUBSYS 0x5678 + +DEFINE_LOG_ENTRY(TEST_ERROR, OPAL_PLATFORM_ERR_EVT, TEST_SUBSYS, + OPAL_PLATFORM_FIRMWARE, OPAL_INFO, + OPAL_NA); + +/* Override this for testing. */ +#define is_rodata(p) fake_is_rodata(p) + +char __rodata_start[16]; +#define __rodata_end (__rodata_start + sizeof(__rodata_start)) + +static inline bool fake_is_rodata(const void *p) +{ + return ((char *)p >= __rodata_start && (char *)p < __rodata_end); +} + +#define zalloc(bytes) calloc((bytes), 1) + +#include "../device.c" +#include "../pel.c" + +struct dt_node *dt_root = NULL; +char dt_prop[] = "DUMMY DT PROP"; + +int rtc_cache_get_datetime(uint32_t *year_month_day, + uint64_t *hour_minute_second_millisecond) +{ + *year_month_day = 0; + *hour_minute_second_millisecond = 0; + + return 0; +} + +int main(void) +{ + char *pel_buf; + size_t size; + struct errorlog *elog; + struct opal_err_info *opal_err_info = &err_TEST_ERROR; + char *buffer; + struct elog_user_data_section *tmp; + + dt_root = dt_new_root(""); + dt_add_property_string(dt_root, "model", "run-pel-unittest"); + + elog = malloc(sizeof(struct errorlog)); + pel_buf = malloc(PEL_MIN_SIZE + 4); + assert(elog); + assert(pel_buf); + + memset(elog, 0, sizeof(struct errorlog)); + + elog->error_event_type = opal_err_info->err_type; + elog->component_id = opal_err_info->cmp_id; + elog->subsystem_id = opal_err_info->subsystem; + elog->event_severity = opal_err_info->sev; + elog->event_subtype = opal_err_info->event_subtype; + elog->reason_code = opal_err_info->reason_code; + elog->elog_origin = ORG_SAPPHIRE; + + size = pel_size(elog); + + printf("Test buffer too small: "); + assert(0 == create_pel_log(elog, NULL, size - 1)); + + assert(size <= PEL_MIN_SIZE + 4); + assert(size == create_pel_log(elog, pel_buf, size)); + + memset(elog, 0, sizeof(struct errorlog)); + + elog->error_event_type = opal_err_info->err_type; + elog->component_id = opal_err_info->cmp_id; + elog->subsystem_id = opal_err_info->subsystem; + elog->event_severity = opal_err_info->sev; + elog->event_subtype = opal_err_info->event_subtype; + elog->reason_code = opal_err_info->reason_code; + elog->elog_origin = ORG_SAPPHIRE; + + size = pel_size(elog); + pel_buf = realloc(pel_buf, size); + assert(pel_buf); + + buffer = elog->user_data_dump + elog->user_section_size; + tmp = (struct elog_user_data_section *)buffer; + tmp->tag = OPAL_ELOG_SEC_DESC; /* ASCII of DESC */ + tmp->size = size + sizeof(struct elog_user_data_section) - 1; + strcpy(tmp->data_dump, "Hello World!"); + elog->user_section_size += tmp->size; + elog->user_section_count++; + + size = pel_size(elog); + pel_buf = realloc(pel_buf, size); + assert(pel_buf); + + assert(size == create_pel_log(elog, pel_buf, size)); + + free(pel_buf); + free(elog); + + return 0; +} diff --git a/roms/skiboot/core/test/run-pool.c b/roms/skiboot/core/test/run-pool.c new file mode 100644 index 000000000..e1c3843ff --- /dev/null +++ b/roms/skiboot/core/test/run-pool.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Copyright 2014 IBM Corp + */ + +#include <pool.h> + +#include "../pool.c" + +#define POOL_OBJ_COUNT 10 +#define POOL_RESERVED_COUNT 2 +#define POOL_NORMAL_COUNT (POOL_OBJ_COUNT - POOL_RESERVED_COUNT) + +struct test_object +{ + int a; + int b; + int c; +}; + +int main(void) +{ + int i, count = 0; + struct pool pool; + struct test_object *a[POOL_OBJ_COUNT]; + + assert(!pool_init(&pool, sizeof(struct test_object), POOL_OBJ_COUNT, + POOL_RESERVED_COUNT)); + + a[0] = pool_get(&pool, POOL_NORMAL); + assert(a[0]); + pool_free_object(&pool, a[0]); + + for(i = 0; i < POOL_NORMAL_COUNT; i++) + { + a[i] = pool_get(&pool, POOL_NORMAL); + if (a[i]) + count++; + } + assert(count == POOL_NORMAL_COUNT); + + /* Normal pool should be exhausted */ + assert(!pool_get(&pool, POOL_NORMAL)); + + /* Reserved pool should still be available */ + a[POOL_NORMAL_COUNT] = pool_get(&pool, POOL_HIGH); + assert(a[POOL_NORMAL_COUNT]); + a[POOL_NORMAL_COUNT + 1] = pool_get(&pool, POOL_HIGH); + assert(a[POOL_NORMAL_COUNT + 1]); + + pool_free_object(&pool, a[3]); + + /* Should be a free object to get now */ + a[3] = pool_get(&pool, POOL_HIGH); + assert(a[3]); + + /* This exits depending on whether all tests passed */ + return 0; +} diff --git a/roms/skiboot/core/test/run-time-utils.c b/roms/skiboot/core/test/run-time-utils.c new file mode 100644 index 000000000..04723dd61 --- /dev/null +++ b/roms/skiboot/core/test/run-time-utils.c @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Copyright 2015-2017 IBM Corp. + */ + +#include <config.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include <stdarg.h> +#include <stdio.h> + +#define __TEST__ + +#include "../time-utils.c" + +int main(void) +{ + struct tm *t = malloc(sizeof(struct tm)); + uint32_t *ymd = malloc(sizeof(uint32_t)); + uint64_t *hms = malloc(sizeof(uint64_t)); + + t->tm_year = 1982; + t->tm_mon = 0; + t->tm_mday = 29; + t->tm_hour = 7; + t->tm_min = 42; + t->tm_sec = 24; + + tm_to_datetime(t, ymd, hms); + + assert(*ymd == 0x19820129); + assert(*hms == 0x742240000000000ULL); + + memset(t, 0, sizeof(struct tm)); + + *ymd = 0x19760412; + + datetime_to_tm(*ymd, *hms, t); + assert(t->tm_year == 1976); + assert(t->tm_mon == 03); + assert(t->tm_mday == 12); + assert(t->tm_hour == 7); + assert(t->tm_min == 42); + assert(t->tm_sec == 24); + + free(t); + free(ymd); + free(hms); + return 0; +} + diff --git a/roms/skiboot/core/test/run-timebase.c b/roms/skiboot/core/test/run-timebase.c new file mode 100644 index 000000000..a613609a0 --- /dev/null +++ b/roms/skiboot/core/test/run-timebase.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Copyright 2015-2016 IBM Corp. + */ + +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> +#include <assert.h> + +#define __TEST__ +#include <timebase.h> + +unsigned long tb_hz = 512000000; + +int main(void) +{ + /* This is a fairly solid assumption that the math we're doing + * is based on tb_hz of exactly 512mhz. + * If we do start doing the math on different tb_hz, you probably + * want to go and audit every bit of code that touches tb to + * count/delay things. + */ + assert(tb_hz == 512000000); + assert(secs_to_tb(1) == tb_hz); + assert(secs_to_tb(2) == 1024000000); + assert(secs_to_tb(10) == 5120000000); + assert(tb_to_secs(512000000) == 1); + assert(tb_to_secs(5120000000) == 10); + assert(tb_to_secs(1024000000) == 2); + + assert(msecs_to_tb(1) == 512000); + assert(msecs_to_tb(100) == 51200000); + assert(msecs_to_tb(5) == 2560000); + assert(tb_to_msecs(512000) == 1); + + assert(usecs_to_tb(5) == 2560); + assert(tb_to_usecs(2560) == 5); + assert(usecs_to_tb(5)*1000 == msecs_to_tb(5)); + assert(tb_to_usecs(512000) == 1000); + + assert(tb_compare(msecs_to_tb(5), usecs_to_tb(5)) == TB_AAFTERB); + assert(tb_compare(msecs_to_tb(5), usecs_to_tb(50000)) == TB_ABEFOREB); + assert(tb_compare(msecs_to_tb(5), usecs_to_tb(5)*1000) == TB_AEQUALB); + + return 0; +} diff --git a/roms/skiboot/core/test/run-timer.c b/roms/skiboot/core/test/run-timer.c new file mode 100644 index 000000000..8f8b20ed3 --- /dev/null +++ b/roms/skiboot/core/test/run-timer.c @@ -0,0 +1,84 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Copyright 2014-2018 IBM Corp + */ + +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> + +#define __TEST__ +#include <timer.h> +#include <skiboot.h> + +#define mftb() (stamp) +#define sync() +#define smt_lowest() +#define smt_medium() + +enum proc_gen proc_gen = proc_gen_unknown; + +static uint64_t stamp, last; +struct lock; +static inline void lock_caller(struct lock *l, const char *caller) +{ + (void)caller; + (void)l; +} +static inline void unlock(struct lock *l) { (void)l; } + +unsigned long tb_hz = 512000000; + +#include "../timer.c" + +#define NUM_TIMERS 100 + +static struct timer timers[NUM_TIMERS]; +static unsigned int rand_shift, count; + +static void init_rand(void) +{ + unsigned long max = RAND_MAX; + + /* Get something reasonably small */ + while(max > 0x10000) { + rand_shift++; + max >>= 1; + } +} + +static void expiry(struct timer *t, void *data, uint64_t now) +{ + (void)data; + (void)now; + assert(t->target >= last); + count--; +} + +void p8_sbe_update_timer_expiry(uint64_t new_target) +{ + (void)new_target; + /* FIXME: do intersting SLW timer sim */ +} + +void p9_sbe_update_timer_expiry(uint64_t new_target) +{ + (void)new_target; +} + +int main(void) +{ + unsigned int i; + + init_rand(); + for (i = 0; i < NUM_TIMERS; i++) { + init_timer(&timers[i], expiry, NULL); + schedule_timer(&timers[i], random() >> rand_shift); + } + count = NUM_TIMERS; + while(count) { + check_timers(false); + stamp++; + } + return 0; +} diff --git a/roms/skiboot/core/test/run-trace.c b/roms/skiboot/core/test/run-trace.c new file mode 100644 index 000000000..88b090358 --- /dev/null +++ b/roms/skiboot/core/test/run-trace.c @@ -0,0 +1,397 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Copyright 2013-2019 IBM Corp. + */ + +#include <config.h> +#include <stdlib.h> +#include <assert.h> +#include <sched.h> +#include <stdlib.h> +#include <stdint.h> +#include <unistd.h> +#include <stdio.h> +#include <stdbool.h> +#include <sys/types.h> +#include <sys/wait.h> + +#include <skiboot-valgrind.h> + +/* Don't include these: PPC-specific */ +#define __CPU_H +#define __TIME_H +#define __PROCESSOR_H + +#if defined(__i386__) || defined(__x86_64__) +/* This is more than a lwsync, but it'll work */ +static void full_barrier(void) +{ + asm volatile("mfence" : : : "memory"); +} +#define lwsync full_barrier +#elif defined(__powerpc__) || defined(__powerpc64__) +static inline void lwsync(void) +{ + asm volatile("lwsync" : : : "memory"); +} +#else +#error "Define lwsync for this arch" +#endif + +#define zalloc(size) calloc((size), 1) + +struct cpu_thread { + uint32_t pir; + uint32_t chip_id; + struct trace_info *trace; + uint32_t server_no; + bool is_secondary; + struct cpu_thread *primary; +}; +static struct cpu_thread *this_cpu(void); + +#define CPUS 4 + +static struct cpu_thread fake_cpus[CPUS]; + +static inline struct cpu_thread *next_cpu(struct cpu_thread *cpu) +{ + if (cpu == NULL) + return &fake_cpus[0]; + cpu++; + if (cpu == &fake_cpus[CPUS]) + return NULL; + return cpu; +} + +#define first_cpu() next_cpu(NULL) + +#define for_each_cpu(cpu) \ + for (cpu = first_cpu(); cpu; cpu = next_cpu(cpu)) + +static unsigned long timestamp; +static unsigned long mftb(void) +{ + return timestamp; +} + +static void *local_alloc(unsigned int chip_id, + size_t size, size_t align) +{ + void *p; + + (void)chip_id; + if (posix_memalign(&p, align, size)) + p = NULL; + return p; +} + +struct dt_node; +extern struct dt_node *opal_node; + +#include "../trace.c" + +#include "../external/trace/trace.c" +static struct trace_reader trace_readers[CPUS]; +struct trace_reader *my_trace_reader; +#include "../device.c" + +char __rodata_start[1], __rodata_end[1]; +struct dt_node *opal_node; +struct debug_descriptor debug_descriptor = { + .trace_mask = -1 +}; + +const char *nvram_query_safe(const char *key __unused) +{ + return NULL; +} + +void lock_caller(struct lock *l, const char *caller) +{ + (void)caller; + assert(!l->lock_val); + l->lock_val = 1; +} + +void unlock(struct lock *l) +{ + assert(l->lock_val); + l->lock_val = 0; +} + +struct cpu_thread *my_fake_cpu; +static struct cpu_thread *this_cpu(void) +{ + return my_fake_cpu; +} + +#include <sys/mman.h> +#define PER_CHILD_TRACES ((RUNNING_ON_VALGRIND) ? (1024*16) : (1024*1024)) + +static void write_trace_entries(int id) +{ + void exit(int); + unsigned int i; + union trace trace; + + timestamp = id; + for (i = 0; i < PER_CHILD_TRACES; i++) { + timestamp = i * CPUS + id; + assert(sizeof(trace.hdr) % 8 == 0); + /* First child never repeats, second repeats once, etc. */ + trace_add(&trace, 3 + ((i / (id + 1)) % 0x40), + sizeof(trace.hdr)); + } + + /* Final entry has special type, so parent knows it's over. */ + trace_add(&trace, 0x70, sizeof(trace.hdr)); + exit(0); +} + +static bool all_done(const bool done[]) +{ + unsigned int i; + + for (i = 0; i < CPUS; i++) + if (!done[i]) + return false; + return true; +} + +static void test_parallel(void) +{ + void *p; + unsigned int cpu; + unsigned int i, counts[CPUS] = { 0 }, overflows[CPUS] = { 0 }; + unsigned int repeats[CPUS] = { 0 }, num_overflows[CPUS] = { 0 }; + bool done[CPUS] = { false }; + size_t len = sizeof(struct trace_info) + TBUF_SZ + sizeof(union trace); + int last = 0; + + /* Use a shared mmap to test actual parallel buffers. */ + i = (CPUS*len + getpagesize()-1)&~(getpagesize()-1); + p = mmap(NULL, i, PROT_READ|PROT_WRITE, + MAP_ANONYMOUS|MAP_SHARED, -1, 0); + + for (i = 0; i < CPUS; i++) { + fake_cpus[i].trace = p + i * len; + fake_cpus[i].trace->tb.buf_size = cpu_to_be64(TBUF_SZ); + fake_cpus[i].trace->tb.max_size = cpu_to_be32(sizeof(union trace)); + fake_cpus[i].is_secondary = false; + memset(&trace_readers[i], 0, sizeof(struct trace_reader)); + trace_readers[i].tb = &fake_cpus[i].trace->tb; + } + + for (i = 0; i < CPUS; i++) { + if (!fork()) { + /* Child. */ + my_fake_cpu = &fake_cpus[i]; + write_trace_entries(i); + } + } + + while (!all_done(done)) { + union trace t; + + for (i = 0; i < CPUS; i++) { + if (trace_get(&t, &trace_readers[(i+last) % CPUS])) + break; + } + + if (i == CPUS) { + sched_yield(); + continue; + } + i = (i + last) % CPUS; + last = i; + + if (t.hdr.type == TRACE_OVERFLOW) { + /* Conveniently, each record is 16 bytes here. */ + assert(be64_to_cpu(t.overflow.bytes_missed) % 16 == 0); + overflows[i] += be64_to_cpu(t.overflow.bytes_missed) / 16; + num_overflows[i]++; + continue; + } + + assert(be16_to_cpu(t.hdr.cpu) < CPUS); + assert(!done[be16_to_cpu(t.hdr.cpu)]); + assert(be64_to_cpu(t.hdr.timestamp) % CPUS == be16_to_cpu(t.hdr.cpu)); + if (t.hdr.type == TRACE_REPEAT) { + assert(t.hdr.len_div_8 * 8 == sizeof(t.repeat)); + assert(be16_to_cpu(t.repeat.num) != 0); + assert(be16_to_cpu(t.repeat.num) <= be16_to_cpu(t.hdr.cpu)); + repeats[be16_to_cpu(t.hdr.cpu)] += be16_to_cpu(t.repeat.num); + } else if (t.hdr.type == 0x70) { + cpu = be16_to_cpu(t.hdr.cpu); + assert(cpu < CPUS); + done[cpu] = true; + } else { + cpu = be16_to_cpu(t.hdr.cpu); + assert(cpu < CPUS); + counts[cpu]++; + } + } + + /* Gather children. */ + for (i = 0; i < CPUS; i++) { + int status; + wait(&status); + } + + for (i = 0; i < CPUS; i++) { + printf("Child %i: %u produced, %u overflows, %llu total\n", i, + counts[i], overflows[i], + (long long)be64_to_cpu(fake_cpus[i].trace->tb.end)); + assert(counts[i] + repeats[i] <= PER_CHILD_TRACES); + } + /* Child 0 never repeats. */ + assert(repeats[0] == 0); + assert(counts[0] + overflows[0] == PER_CHILD_TRACES); + + /* + * FIXME: Other children have some fuzz, since overflows may + * include repeat record we already read. And odd-numbered + * overflows may include more repeat records than normal + * records (they alternate). + */ +} + +int main(void) +{ + union trace minimal; + union trace large; + union trace trace; + unsigned int i, j; + + opal_node = dt_new_root("opal"); + dt_new(dt_new(opal_node, "firmware"), "exports"); + for (i = 0; i < CPUS; i++) { + fake_cpus[i].server_no = i; + fake_cpus[i].pir = i; + fake_cpus[i].is_secondary = (i & 0x1); + fake_cpus[i].primary = &fake_cpus[i & ~0x1]; + } + my_fake_cpu = &fake_cpus[0]; + my_trace_reader = &trace_readers[0]; + init_trace_buffers(); + + for (i = 0; i < CPUS; i++) { + trace_readers[i].tb = &fake_cpus[i].trace->tb; + assert(trace_empty(&trace_readers[i])); + assert(!trace_get(&trace, &trace_readers[i])); + } + + assert(sizeof(trace.hdr) % 8 == 0); + timestamp = 1; + trace_add(&minimal, 100, sizeof(trace.hdr)); + assert(trace_get(&trace, my_trace_reader)); + assert(trace.hdr.len_div_8 == minimal.hdr.len_div_8); + assert(be64_to_cpu(trace.hdr.timestamp) == timestamp); + + /* Make it wrap once. */ + for (i = 0; i < TBUF_SZ / (minimal.hdr.len_div_8 * 8) + 1; i++) { + timestamp = i; + trace_add(&minimal, 99 + (i%2), sizeof(trace.hdr)); + } + + assert(trace_get(&trace, my_trace_reader)); + /* First one must be overflow marker. */ + assert(trace.hdr.type == TRACE_OVERFLOW); + assert(trace.hdr.len_div_8 * 8 == sizeof(trace.overflow)); + assert(be64_to_cpu(trace.overflow.bytes_missed) == minimal.hdr.len_div_8 * 8); + + for (i = 0; i < TBUF_SZ / (minimal.hdr.len_div_8 * 8); i++) { + assert(trace_get(&trace, my_trace_reader)); + assert(trace.hdr.len_div_8 == minimal.hdr.len_div_8); + assert(be64_to_cpu(trace.hdr.timestamp) == i+1); + assert(trace.hdr.type == 99 + ((i+1)%2)); + } + assert(!trace_get(&trace, my_trace_reader)); + + /* Now put in some weird-length ones, to test overlap. + * Last power of 2, minus 8. */ + for (j = 0; (1 << j) < sizeof(large); j++); + for (i = 0; i < TBUF_SZ; i++) { + timestamp = i; + trace_add(&large, 100 + (i%2), (1 << (j-1))); + } + assert(trace_get(&trace, my_trace_reader)); + assert(trace.hdr.type == TRACE_OVERFLOW); + assert(trace_get(&trace, my_trace_reader)); + assert(trace.hdr.len_div_8 == large.hdr.len_div_8); + i = be64_to_cpu(trace.hdr.timestamp); + while (trace_get(&trace, my_trace_reader)) + assert(be64_to_cpu(trace.hdr.timestamp) == ++i); + + /* Test repeats. */ + for (i = 0; i < 65538; i++) { + timestamp = i; + trace_add(&minimal, 100, sizeof(trace.hdr)); + } + timestamp = i; + trace_add(&minimal, 101, sizeof(trace.hdr)); + timestamp = i+1; + trace_add(&minimal, 101, sizeof(trace.hdr)); + + assert(trace_get(&trace, my_trace_reader)); + assert(trace.hdr.timestamp == 0); + assert(trace.hdr.len_div_8 == minimal.hdr.len_div_8); + assert(trace.hdr.type == 100); + assert(trace_get(&trace, my_trace_reader)); + assert(trace.hdr.type == TRACE_REPEAT); + assert(trace.hdr.len_div_8 * 8 == sizeof(trace.repeat)); + assert(be16_to_cpu(trace.repeat.num) == 65535); + assert(be64_to_cpu(trace.repeat.timestamp) == 65535); + assert(trace_get(&trace, my_trace_reader)); + assert(be64_to_cpu(trace.hdr.timestamp) == 65536); + assert(trace.hdr.len_div_8 == minimal.hdr.len_div_8); + assert(trace.hdr.type == 100); + assert(trace_get(&trace, my_trace_reader)); + assert(trace.hdr.type == TRACE_REPEAT); + assert(trace.hdr.len_div_8 * 8 == sizeof(trace.repeat)); + assert(be16_to_cpu(trace.repeat.num) == 1); + assert(be64_to_cpu(trace.repeat.timestamp) == 65537); + + assert(trace_get(&trace, my_trace_reader)); + assert(be64_to_cpu(trace.hdr.timestamp) == 65538); + assert(trace.hdr.len_div_8 == minimal.hdr.len_div_8); + assert(trace.hdr.type == 101); + assert(trace_get(&trace, my_trace_reader)); + assert(trace.hdr.type == TRACE_REPEAT); + assert(trace.hdr.len_div_8 * 8 == sizeof(trace.repeat)); + assert(be16_to_cpu(trace.repeat.num) == 1); + assert(be64_to_cpu(trace.repeat.timestamp) == 65539); + + /* Now, test adding repeat while we're reading... */ + timestamp = 0; + trace_add(&minimal, 100, sizeof(trace.hdr)); + assert(trace_get(&trace, my_trace_reader)); + assert(be64_to_cpu(trace.hdr.timestamp) == 0); + assert(trace.hdr.len_div_8 == minimal.hdr.len_div_8); + assert(trace.hdr.type == 100); + + for (i = 1; i < TBUF_SZ; i++) { + timestamp = i; + trace_add(&minimal, 100, sizeof(trace.hdr)); + assert(trace_get(&trace, my_trace_reader)); + if (i % 65536 == 0) { + assert(trace.hdr.type == 100); + assert(trace.hdr.len_div_8 == minimal.hdr.len_div_8); + } else { + assert(trace.hdr.type == TRACE_REPEAT); + assert(trace.hdr.len_div_8 * 8 == sizeof(trace.repeat)); + assert(be16_to_cpu(trace.repeat.num) == 1); + } + assert(be64_to_cpu(trace.repeat.timestamp) == i); + assert(!trace_get(&trace, my_trace_reader)); + } + + for (i = 0; i < CPUS; i++) + if (!fake_cpus[i].is_secondary) + free(fake_cpus[i].trace); + + test_parallel(); + + return 0; +} diff --git a/roms/skiboot/core/test/stubs.c b/roms/skiboot/core/test/stubs.c new file mode 100644 index 000000000..0e97af249 --- /dev/null +++ b/roms/skiboot/core/test/stubs.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Copyright 2013-2019 IBM Corp + */ + +#include <stdlib.h> +#include <stdio.h> +#include <stdarg.h> +#include <string.h> +#include <stdint.h> + +#include <compiler.h> +#include "../../ccan/list/list.c" + +void _prlog(int log_level __attribute__((unused)), const char* fmt, ...) __attribute__((format (printf, 2, 3))); + +#ifndef pr_fmt +#define pr_fmt(fmt) fmt +#endif +#define prlog(l, f, ...) do { _prlog(l, pr_fmt(f), ##__VA_ARGS__); } while(0) + +void _prlog(int log_level __attribute__((unused)), const char* fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vprintf(fmt, ap); + va_end(ap); +} + +/* Add any stub functions required for linking here. */ +static void stub_function(void) +{ + abort(); +} + +struct cpu_thread; + +struct cpu_job *__cpu_queue_job(struct cpu_thread *cpu, + const char *name, + void (*func)(void *data), void *data, + bool no_return); + +void cpu_wait_job(struct cpu_job *job, bool free_it); +void cpu_process_local_jobs(void); +struct cpu_job *cpu_queue_job_on_node(uint32_t chip_id, + const char *name, + void (*func)(void *data), void *data); + +struct cpu_job *cpu_queue_job_on_node(uint32_t chip_id, + const char *name, + void (*func)(void *data), void *data) +{ + (void)chip_id; + return __cpu_queue_job(NULL, name, func, data, false); +} + +struct cpu_job *__cpu_queue_job(struct cpu_thread *cpu, + const char *name, + void (*func)(void *data), void *data, + bool no_return) +{ + (void)cpu; + (void)name; + (func)(data); + (void)no_return; + return NULL; +} + +void cpu_wait_job(struct cpu_job *job, bool free_it) +{ + (void)job; + (void)free_it; + return; +} + +void cpu_process_local_jobs(void) +{ +} + +#define STUB(fnname) \ + void fnname(void) __attribute__((weak, alias ("stub_function"))) + +STUB(fdt_begin_node); +STUB(fdt_property); +STUB(fdt_end_node); +STUB(fdt_create_with_flags); +STUB(fdt_add_reservemap_entry); +STUB(fdt_finish_reservemap); +STUB(fdt_strerror); +STUB(fdt_check_header); +STUB(fdt_check_node_offset_); +STUB(fdt_next_tag); +STUB(fdt_string); +STUB(fdt_get_name); +STUB(dt_first); +STUB(dt_next); +STUB(dt_has_node_property); +STUB(dt_get_address); +STUB(add_chip_dev_associativity); +STUB(pci_check_clear_freeze); diff --git a/roms/skiboot/core/time-utils.c b/roms/skiboot/core/time-utils.c new file mode 100644 index 000000000..e948654d3 --- /dev/null +++ b/roms/skiboot/core/time-utils.c @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Converts an OPAL formatted datetime into a struct tm. We ignore microseconds + * as Linux doesn't use them anyway. + * + * | year | month | mday | + * +------------------------------------+ + * | hour | minute | secs | reserved | + * +------------------------------------+ + * | microseconds | + * + * Copyright 2013-2014 IBM Corp. + */ + +#include <time-utils.h> + +void datetime_to_tm(uint32_t y_m_d, uint64_t h_m_s_m, struct tm *tm) +{ + uint32_t x; + + tm->tm_year = bcd_byte(y_m_d, 3) * 100 + bcd_byte(y_m_d, 2); + tm->tm_mon = bcd_byte(y_m_d, 1) - 1; + tm->tm_mday = bcd_byte(y_m_d, 0); + + x = h_m_s_m >> 32; + tm->tm_hour = bcd_byte(x, 3); + tm->tm_min = bcd_byte(x, 2); + tm->tm_sec = bcd_byte(x, 1); +} + +/* + * The OPAL API is defined as returned a u64 of a similar + * format to the FSP message; the 32-bit date field is + * in the format: + * + * | year | month | mday | + * + * ... and the 64-bit time field is in the format + * + * | hour | minutes | secs | millisec | + * | ------------------------------------- + * | millisec | reserved | + * + * We simply ignore the microseconds/milliseconds for now + * as I don't quite understand why the OPAL API defines that + * it needs 6 digits for the milliseconds :-) I suspect the + * doc got that wrong and it's supposed to be micro but + * let's ignore it. + * + * Note that Linux doesn't use nor set the ms field anyway. + */ +void tm_to_datetime(struct tm *tm, uint32_t *y_m_d, uint64_t *h_m_s_m) +{ + uint64_t h_m_s; + *y_m_d = int_to_bcd4(tm->tm_year) << 16 | + int_to_bcd2(tm->tm_mon + 1) << 8 | + int_to_bcd2(tm->tm_mday); + + h_m_s = int_to_bcd2(tm->tm_hour) << 24 | + int_to_bcd2(tm->tm_min) << 16 | + int_to_bcd2(tm->tm_sec) << 8; + + *h_m_s_m = h_m_s << 32; +} diff --git a/roms/skiboot/core/timebase.c b/roms/skiboot/core/timebase.c new file mode 100644 index 000000000..451e3710e --- /dev/null +++ b/roms/skiboot/core/timebase.c @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Wait for things, by waiting for timebase to tick over + * + * Copyright 2013-2019 IBM Corp. + */ + +#include <timebase.h> +#include <opal.h> +#include <cpu.h> +#include <chip.h> +#include <debug_descriptor.h> + +unsigned long tb_hz = 512000000; + +static void time_wait_poll(unsigned long duration) +{ + unsigned long now = mftb(); + unsigned long end = now + duration; + unsigned long period = msecs_to_tb(5); + + if (this_cpu()->tb_invalid) { + /* + * Run pollers to allow some backends to process response. + * + * In TOD failure case where TOD is unrecoverable, running + * pollers allows ipmi backend to deal with ipmi response + * from bmc and helps ipmi_queue_msg_sync() to get un-stuck. + * Thus it avoids linux kernel to hang during panic due to + * TOD failure. + */ + opal_run_pollers(); + cpu_relax(); + return; + } + + while (tb_compare(now, end) != TB_AAFTERB) { + + unsigned long remaining = end - now; + + /* Call pollers periodically but not continually to avoid + * bouncing cachelines due to lock contention. */ + if (remaining >= period) { + opal_run_pollers(); + time_wait_nopoll(period); + } else + time_wait_nopoll(remaining); + + now = mftb(); + } +} + +void time_wait(unsigned long duration) +{ + struct cpu_thread *c = this_cpu(); + + if (!list_empty(&this_cpu()->locks_held)) { + time_wait_nopoll(duration); + return; + } + + if (c != boot_cpu && opal_booting()) + time_wait_nopoll(duration); + else + time_wait_poll(duration); +} + +void time_wait_nopoll(unsigned long duration) +{ + if (this_cpu()->tb_invalid) { + cpu_relax(); + return; + } + + cpu_idle_delay(duration); +} + +void time_wait_ms(unsigned long ms) +{ + time_wait(msecs_to_tb(ms)); +} + +void time_wait_ms_nopoll(unsigned long ms) +{ + time_wait_nopoll(msecs_to_tb(ms)); +} + +void time_wait_us(unsigned long us) +{ + time_wait(usecs_to_tb(us)); +} + +void time_wait_us_nopoll(unsigned long us) +{ + time_wait_nopoll(usecs_to_tb(us)); +} + +unsigned long timespec_to_tb(const struct timespec *ts) +{ + unsigned long ns; + + /* First convert to ns */ + ns = ts->tv_sec * 1000000000ul; + ns += ts->tv_nsec; + + /* + * This is a very rough approximation, it works provided + * we never try to pass too long delays here and the TB + * frequency isn't significantly lower than 512Mhz. + * + * We could improve the precision by shifting less bits + * at the expense of capacity or do 128 bit math which + * I'm not eager to do :-) + */ + if (chip_quirk(QUIRK_SLOW_SIM)) + return (ns * (tb_hz >> 16)) / (1000000000ul >> 16); + else + return (ns * (tb_hz >> 24)) / (1000000000ul >> 24); +} + +int nanosleep(const struct timespec *req, struct timespec *rem) +{ + time_wait(timespec_to_tb(req)); + + if (rem) { + rem->tv_sec = 0; + rem->tv_nsec = 0; + } + return 0; +} + +int nanosleep_nopoll(const struct timespec *req, struct timespec *rem) +{ + time_wait_nopoll(timespec_to_tb(req)); + + if (rem) { + rem->tv_sec = 0; + rem->tv_nsec = 0; + } + return 0; +} diff --git a/roms/skiboot/core/timer.c b/roms/skiboot/core/timer.c new file mode 100644 index 000000000..652ffba30 --- /dev/null +++ b/roms/skiboot/core/timer.c @@ -0,0 +1,298 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * run something, but later. + * + * Timers are run when the SBE timer interrupt triggers (based on us setting + * it) or when the regular heartbeat call from the OS occurs and there's a + * timer that's expired. + * + * Copyright 2014-2019 IBM Corp. + */ + +#include <timer.h> +#include <timebase.h> +#include <lock.h> +#include <fsp.h> +#include <device.h> +#include <opal.h> +#include <sbe-p8.h> +#include <sbe-p9.h> + +#ifdef __TEST__ +#define this_cpu() ((void *)-1) +#define cpu_relax() +#else +#include <cpu.h> +#endif + +/* Heartbeat requested from Linux */ +#define HEARTBEAT_DEFAULT_MS 200 + +static struct lock timer_lock = LOCK_UNLOCKED; +static LIST_HEAD(timer_list); +static LIST_HEAD(timer_poll_list); +static bool timer_in_poll; +static uint64_t timer_poll_gen; + +static inline void update_timer_expiry(uint64_t target) +{ + if (proc_gen < proc_gen_p9) + p8_sbe_update_timer_expiry(target); + else + p9_sbe_update_timer_expiry(target); +} + +void init_timer(struct timer *t, timer_func_t expiry, void *data) +{ + t->link.next = t->link.prev = NULL; + t->target = 0; + t->expiry = expiry; + t->user_data = data; + t->running = NULL; +} + +static void __remove_timer(struct timer *t) +{ + list_del(&t->link); + t->link.next = t->link.prev = NULL; +} + +static void __sync_timer(struct timer *t) +{ + sync(); + + /* Guard against re-entrancy */ + assert(t->running != this_cpu()); + + while (t->running) { + unlock(&timer_lock); + smt_lowest(); + while (t->running) + barrier(); + smt_medium(); + /* Should we call the pollers here ? */ + lock(&timer_lock); + } +} + +void sync_timer(struct timer *t) +{ + lock(&timer_lock); + __sync_timer(t); + unlock(&timer_lock); +} + +void cancel_timer(struct timer *t) +{ + lock(&timer_lock); + __sync_timer(t); + if (t->link.next) + __remove_timer(t); + unlock(&timer_lock); +} + +void cancel_timer_async(struct timer *t) +{ + lock(&timer_lock); + if (t->link.next) + __remove_timer(t); + unlock(&timer_lock); +} + +static void __schedule_timer_at(struct timer *t, uint64_t when) +{ + struct timer *lt; + + /* If the timer is already scheduled, take it out */ + if (t->link.next) + __remove_timer(t); + + /* Update target */ + t->target = when; + + if (when == TIMER_POLL) { + /* It's a poller, add it to the poller list */ + t->gen = timer_poll_gen; + list_add_tail(&timer_poll_list, &t->link); + } else { + /* It's a real timer, add it in the right spot in the + * ordered timer list + */ + list_for_each(&timer_list, lt, link) { + if (when >= lt->target) + continue; + list_add_before(&timer_list, &t->link, <->link); + goto bail; + } + list_add_tail(&timer_list, &t->link); + } + bail: + /* Pick up the next timer and upddate the SBE HW timer */ + lt = list_top(&timer_list, struct timer, link); + if (lt) { + update_timer_expiry(lt->target); + } +} + +void schedule_timer_at(struct timer *t, uint64_t when) +{ + lock(&timer_lock); + __schedule_timer_at(t, when); + unlock(&timer_lock); +} + +uint64_t schedule_timer(struct timer *t, uint64_t how_long) +{ + uint64_t now = mftb(); + + if (how_long == TIMER_POLL) + schedule_timer_at(t, TIMER_POLL); + else + schedule_timer_at(t, now + how_long); + + return now; +} + +static void __check_poll_timers(uint64_t now) +{ + struct timer *t; + + /* Don't call this from multiple CPUs at once */ + if (timer_in_poll) + return; + timer_in_poll = true; + + /* + * Poll timers might re-enqueue themselves and don't have an + * expiry so we can't do like normal timers and just run until + * we hit a wall. Instead, each timer has a generation count, + * which we set to the current global gen count when we schedule + * it and update when we run it. It will only be considered if + * the generation count is different than the current one. We + * don't try to compare generations being larger or smaller + * because at boot, this can be called quite quickly and I want + * to be safe vs. wraps. + */ + timer_poll_gen++; + for (;;) { + t = list_top(&timer_poll_list, struct timer, link); + + /* Top timer has a different generation than current ? Must + * be older, we are done. + */ + if (!t || t->gen == timer_poll_gen) + break; + + /* Top of list still running, we have to delay handling it, + * let's reprogram the SLW with a small delay. We chose + * arbitrarily 1us. + */ + if (t->running) { + update_timer_expiry(now + usecs_to_tb(1)); + break; + } + + /* Allright, first remove it and mark it running */ + __remove_timer(t); + t->running = this_cpu(); + + /* Now we can unlock and call it's expiry */ + unlock(&timer_lock); + t->expiry(t, t->user_data, now); + + /* Re-lock and mark not running */ + lock(&timer_lock); + t->running = NULL; + } + timer_in_poll = false; +} + +static void __check_timers(uint64_t now) +{ + struct timer *t; + + for (;;) { + t = list_top(&timer_list, struct timer, link); + + /* Top of list not expired ? that's it ... */ + if (!t || t->target > now) + break; + + /* Top of list still running, we have to delay handling + * it. For now just skip until the next poll, when we have + * SLW interrupts, we'll probably want to trip another one + * ASAP + */ + if (t->running) + break; + + /* Allright, first remove it and mark it running */ + __remove_timer(t); + t->running = this_cpu(); + + /* Now we can unlock and call it's expiry */ + unlock(&timer_lock); + t->expiry(t, t->user_data, now); + + /* Re-lock and mark not running */ + lock(&timer_lock); + t->running = NULL; + + /* Update time stamp */ + now = mftb(); + } +} + +void check_timers(bool from_interrupt) +{ + uint64_t now = mftb(); + + /* This is the polling variant, the SLW interrupt path, when it + * exists, will use a slight variant of this that doesn't call + * the pollers + */ + + /* Lockless "peek", a bit racy but shouldn't be a problem as + * we are only looking at whether the list is empty + */ + if (list_empty_nocheck(&timer_poll_list) && + list_empty_nocheck(&timer_list)) + return; + + /* Take lock and try again */ + lock(&timer_lock); + if (!from_interrupt) + __check_poll_timers(now); + __check_timers(now); + unlock(&timer_lock); +} + +#ifndef __TEST__ + +void late_init_timers(void) +{ + int heartbeat = HEARTBEAT_DEFAULT_MS; + + /* Add a property requesting the OS to call opal_poll_event() at + * a specified interval in order for us to run our background + * low priority pollers. + * + * If a platform quirk exists, use that, else use the default. + * + * If we have an SBE timer facility, we run this 10 times slower, + * we could possibly completely get rid of it. + * + * We use a value in milliseconds, we don't want this to ever be + * faster than that. + */ + if (platform.heartbeat_time) { + heartbeat = platform.heartbeat_time(); + } else if (p9_sbe_timer_ok()) { + heartbeat = HEARTBEAT_DEFAULT_MS * 10; + } else if (p8_sbe_timer_ok()) { + heartbeat = HEARTBEAT_DEFAULT_MS * 10; + } + + dt_add_property_cells(opal_node, "ibm,heartbeat-ms", heartbeat); +} +#endif diff --git a/roms/skiboot/core/trace.c b/roms/skiboot/core/trace.c new file mode 100644 index 000000000..561bd79e0 --- /dev/null +++ b/roms/skiboot/core/trace.c @@ -0,0 +1,265 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Trace various things into in-memory buffers + * + * Copyright 2013-2019 IBM Corp. + */ + +#include <trace.h> +#include <timebase.h> +#include <lock.h> +#include <string.h> +#include <stdlib.h> +#include <inttypes.h> +#include <cpu.h> +#include <device.h> +#include <libfdt.h> +#include <processor.h> +#include <skiboot.h> +#include <opal-api.h> +#include <debug_descriptor.h> +#include <nvram.h> + +#define DEBUG_TRACES + +#define MAX_SIZE sizeof(union trace) + +/* Smaller trace buffer for early booting */ +#define BOOT_TBUF_SZ 65536 +static struct { + struct trace_info trace_info; + char buf[BOOT_TBUF_SZ + MAX_SIZE]; +} boot_tracebuf __section(".data.boot_trace"); + +void init_boot_tracebuf(struct cpu_thread *boot_cpu) +{ + init_lock(&boot_tracebuf.trace_info.lock); + boot_tracebuf.trace_info.tb.buf_size = cpu_to_be64(BOOT_TBUF_SZ); + boot_tracebuf.trace_info.tb.max_size = cpu_to_be32(MAX_SIZE); + + boot_cpu->trace = &boot_tracebuf.trace_info; +} + +static size_t tracebuf_extra(void) +{ + /* We make room for the largest possible record */ + return TBUF_SZ + MAX_SIZE; +} + +/* To avoid bloating each entry, repeats are actually specific entries. + * tb->last points to the last (non-repeat) entry. */ +static bool handle_repeat(struct tracebuf *tb, const union trace *trace) +{ + struct trace_hdr *prev; + struct trace_repeat *rpt; + u32 len; + + prev = (void *)tb->buf + be64_to_cpu(tb->last) % be64_to_cpu(tb->buf_size); + + if (prev->type != trace->hdr.type + || prev->len_div_8 != trace->hdr.len_div_8 + || prev->cpu != trace->hdr.cpu) + return false; + + len = prev->len_div_8 << 3; + if (memcmp(prev + 1, &trace->hdr + 1, len - sizeof(*prev)) != 0) + return false; + + /* If they've consumed prev entry, don't repeat. */ + if (be64_to_cpu(tb->last) < be64_to_cpu(tb->start)) + return false; + + /* OK, it's a duplicate. Do we already have repeat? */ + if (be64_to_cpu(tb->last) + len != be64_to_cpu(tb->end)) { + u64 pos = be64_to_cpu(tb->last) + len; + /* FIXME: Reader is not protected from seeing this! */ + rpt = (void *)tb->buf + pos % be64_to_cpu(tb->buf_size); + assert(pos + rpt->len_div_8*8 == be64_to_cpu(tb->end)); + assert(rpt->type == TRACE_REPEAT); + + /* If this repeat entry is full, don't repeat. */ + if (be16_to_cpu(rpt->num) == 0xFFFF) + return false; + + rpt->num = cpu_to_be16(be16_to_cpu(rpt->num) + 1); + rpt->timestamp = trace->hdr.timestamp; + return true; + } + + /* + * Generate repeat entry: it's the smallest possible entry, so we + * must have eliminated old entries. + */ + assert(trace->hdr.len_div_8 * 8 >= sizeof(*rpt)); + + rpt = (void *)tb->buf + be64_to_cpu(tb->end) % be64_to_cpu(tb->buf_size); + rpt->timestamp = trace->hdr.timestamp; + rpt->type = TRACE_REPEAT; + rpt->len_div_8 = sizeof(*rpt) >> 3; + rpt->cpu = trace->hdr.cpu; + rpt->prev_len = cpu_to_be16(trace->hdr.len_div_8 << 3); + rpt->num = cpu_to_be16(1); + lwsync(); /* write barrier: complete repeat record before exposing */ + tb->end = cpu_to_be64(be64_to_cpu(tb->end) + sizeof(*rpt)); + return true; +} + +void trace_add(union trace *trace, u8 type, u16 len) +{ + struct trace_info *ti = this_cpu()->trace; + unsigned int tsz; + + trace->hdr.type = type; + trace->hdr.len_div_8 = (len + 7) >> 3; + + tsz = trace->hdr.len_div_8 << 3; + +#ifdef DEBUG_TRACES + assert(tsz >= sizeof(trace->hdr)); + assert(tsz <= sizeof(*trace)); + assert(trace->hdr.type != TRACE_REPEAT); + assert(trace->hdr.type != TRACE_OVERFLOW); +#endif + /* Skip traces not enabled in the debug descriptor */ + if (trace->hdr.type < (8 * sizeof(debug_descriptor.trace_mask)) && + !((1ul << trace->hdr.type) & be64_to_cpu(debug_descriptor.trace_mask))) + return; + + trace->hdr.timestamp = cpu_to_be64(mftb()); + trace->hdr.cpu = cpu_to_be16(this_cpu()->server_no); + + lock(&ti->lock); + + /* Throw away old entries before we overwrite them. */ + while ((be64_to_cpu(ti->tb.start) + be64_to_cpu(ti->tb.buf_size)) + < (be64_to_cpu(ti->tb.end) + tsz)) { + struct trace_hdr *hdr; + + hdr = (void *)ti->tb.buf + + be64_to_cpu(ti->tb.start) % be64_to_cpu(ti->tb.buf_size); + ti->tb.start = cpu_to_be64(be64_to_cpu(ti->tb.start) + + (hdr->len_div_8 << 3)); + } + + /* Must update ->start before we rewrite new entries. */ + lwsync(); /* write barrier */ + + /* Check for duplicates... */ + if (!handle_repeat(&ti->tb, trace)) { + /* This may go off end, and that's why ti->tb.buf is oversize */ + memcpy(ti->tb.buf + be64_to_cpu(ti->tb.end) % be64_to_cpu(ti->tb.buf_size), + trace, tsz); + ti->tb.last = ti->tb.end; + lwsync(); /* write barrier: write entry before exposing */ + ti->tb.end = cpu_to_be64(be64_to_cpu(ti->tb.end) + tsz); + } + unlock(&ti->lock); +} + +void trace_add_dt_props(void) +{ + uint64_t boot_buf_phys = (uint64_t) &boot_tracebuf.trace_info; + struct dt_node *exports, *traces; + unsigned int i; + fdt64_t *prop; + u64 tmask; + char tname[256]; + + exports = dt_find_by_path(opal_node, "firmware/exports"); + if (!exports) + return; + + /* + * nvram hack to put all the trace buffer exports in the exports + * node. This is useful if the kernel doesn't also export subnodes. + */ + if (nvram_query_safe("flat-trace-buf")) + traces = exports; + else + traces = dt_new(exports, "traces"); + + prop = malloc(sizeof(u64) * 2 * be32_to_cpu(debug_descriptor.num_traces)); + + for (i = 0; i < be32_to_cpu(debug_descriptor.num_traces); i++) { + uint64_t addr = be64_to_cpu(debug_descriptor.trace_phys[i]); + uint64_t size = be32_to_cpu(debug_descriptor.trace_size[i]); + uint32_t pir = be16_to_cpu(debug_descriptor.trace_pir[i]); + + prop[i * 2] = cpu_to_fdt64(addr); + prop[i * 2 + 1] = cpu_to_fdt64(size); + + if (addr == boot_buf_phys) + snprintf(tname, sizeof(tname), "boot-%x", pir); + else + snprintf(tname, sizeof(tname), "trace-%x", pir); + + dt_add_property_u64s(traces, tname, addr, size); + } + + dt_add_property(opal_node, "ibm,opal-traces", + prop, sizeof(u64) * 2 * i); + free(prop); + + tmask = (uint64_t)&debug_descriptor.trace_mask; + dt_add_property_u64(opal_node, "ibm,opal-trace-mask", tmask); +} + +static void trace_add_desc(struct trace_info *t, uint64_t size, uint16_t pir) +{ + unsigned int i = be32_to_cpu(debug_descriptor.num_traces); + + if (i >= DEBUG_DESC_MAX_TRACES) { + prerror("TRACE: Debug descriptor trace list full !\n"); + return; + } + + debug_descriptor.num_traces = cpu_to_be32(i + 1); + debug_descriptor.trace_phys[i] = cpu_to_be64((uint64_t)t); + debug_descriptor.trace_tce[i] = 0; /* populated later */ + debug_descriptor.trace_size[i] = cpu_to_be32(size); + debug_descriptor.trace_pir[i] = cpu_to_be16(pir); +} + +/* Allocate trace buffers once we know memory topology */ +void init_trace_buffers(void) +{ + struct cpu_thread *t; + struct trace_info *any = &boot_tracebuf.trace_info; + uint64_t size; + + /* Boot the boot trace in the debug descriptor */ + trace_add_desc(any, sizeof(boot_tracebuf), this_cpu()->pir); + + /* Allocate a trace buffer for each primary cpu. */ + for_each_cpu(t) { + if (t->is_secondary) + continue; + + /* Use a 64K alignment for TCE mapping */ + size = ALIGN_UP(sizeof(*t->trace) + tracebuf_extra(), 0x10000); + t->trace = local_alloc(t->chip_id, size, 0x10000); + if (t->trace) { + any = t->trace; + memset(t->trace, 0, size); + init_lock(&t->trace->lock); + t->trace->tb.max_size = cpu_to_be32(MAX_SIZE); + t->trace->tb.buf_size = cpu_to_be64(TBUF_SZ); + trace_add_desc(any, sizeof(t->trace->tb) + + tracebuf_extra(), t->pir); + } else + prerror("TRACE: cpu 0x%x allocation failed\n", t->pir); + } + + /* In case any allocations failed, share trace buffers. */ + for_each_cpu(t) { + if (!t->is_secondary && !t->trace) + t->trace = any; + } + + /* And copy those to the secondaries. */ + for_each_cpu(t) { + if (!t->is_secondary) + continue; + t->trace = t->primary->trace; + } +} diff --git a/roms/skiboot/core/utils.c b/roms/skiboot/core/utils.c new file mode 100644 index 000000000..0d2f5e894 --- /dev/null +++ b/roms/skiboot/core/utils.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Misc utility functions + * + * Copyright 2013-2018 IBM Corp. + */ + +#include <skiboot.h> +#include <lock.h> +#include <fsp.h> +#include <platform.h> +#include <processor.h> +#include <cpu.h> +#include <stack.h> + +void __noreturn assert_fail(const char *msg, const char *file, + unsigned int line, const char *function) +{ + static bool in_abort = false; + + (void)function; + if (in_abort) + for (;;) ; + in_abort = true; + + /** + * @fwts-label FailedAssert2 + * @fwts-advice OPAL hit an assert(). During normal usage (even + * testing) we should never hit an assert. There are other code + * paths for controlled shutdown/panic in the event of catastrophic + * errors. + */ + prlog(PR_EMERG, "assert failed at %s:%u: %s\n", file, line, msg); + backtrace(); + + if (platform.terminate) + platform.terminate(msg); + + for (;;) ; +} + +char __attrconst tohex(uint8_t nibble) +{ + static const char __tohex[] = {'0','1','2','3','4','5','6','7','8','9', + 'A','B','C','D','E','F'}; + if (nibble > 0xf) + return '?'; + return __tohex[nibble]; +} + +static unsigned long get_symbol(unsigned long addr, char **sym, char **sym_end) +{ + unsigned long prev = 0, next; + char *psym = NULL, *p = __sym_map_start; + + *sym = *sym_end = NULL; + while(p < __sym_map_end) { + next = strtoul(p, &p, 16) | SKIBOOT_BASE; + if (next > addr && prev <= addr) { + p = psym + 3;; + if (p >= __sym_map_end) + return 0; + *sym = p; + while(p < __sym_map_end && *p != 10) + p++; + *sym_end = p; + return prev; + } + prev = next; + psym = p; + while(p < __sym_map_end && *p != 10) + p++; + p++; + } + return 0; +} + +size_t snprintf_symbol(char *buf, size_t len, uint64_t addr) +{ + unsigned long saddr; + char *sym, *sym_end; + size_t l; + + saddr = get_symbol(addr, &sym, &sym_end); + if (!saddr) + return 0; + + if (len > sym_end - sym) + l = sym_end - sym; + else + l = len - 1; + memcpy(buf, sym, l); + + /* + * This snprintf will insert the terminating NUL even if the + * symbol has used up the entire buffer less 1. + */ + l += snprintf(buf + l, len - l, "+0x%llx", addr - saddr); + + return l; +} diff --git a/roms/skiboot/core/vpd.c b/roms/skiboot/core/vpd.c new file mode 100644 index 000000000..20fe09597 --- /dev/null +++ b/roms/skiboot/core/vpd.c @@ -0,0 +1,139 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Parse Vital Product Data (VPD) + * + * Copyright 2013-2019 IBM Corp. + */ + +#include <skiboot.h> +#include <vpd.h> +#include <string.h> +#include <device.h> + +#define CHECK_SPACE(_p, _n, _e) (((_e) - (_p)) >= (_n)) + +/* Low level keyword search in a record. Can be used when we + * need to find the next keyword of a given type, for example + * when having multiple MF/SM keyword pairs + */ +const void *vpd_find_keyword(const void *rec, size_t rec_sz, + const char *kw, uint8_t *kw_size) +{ + const uint8_t *p = rec, *end = rec + rec_sz; + + while (CHECK_SPACE(p, 3, end)) { + uint8_t k1 = *(p++); + uint8_t k2 = *(p++); + uint8_t sz = *(p++); + + if (k1 == kw[0] && k2 == kw[1]) { + if (kw_size) + *kw_size = sz; + return p; + } + p += sz; + } + return NULL; +} + +/* vpd_valid - does some basic sanity checks to ensure a VPD blob is + * actually a VPD blob + */ +bool vpd_valid(const void *vvpd, size_t vpd_size) +{ + const uint8_t *vpd = vvpd; + int size, i = 0; + + /* find the record start byte */ + while (i < vpd_size) + if (vpd[i++] == 0x84) + break; + + if (i >= vpd_size) + return false; + + /* next two bytes are the record length, little endian */ + size = 2; + size += vpd[i]; + size += vpd[i + 1] << 8; + + i += size; /* skip to the end marker */ + + if (i >= vpd_size || vpd[i] != 0x78) + return false; + + return true; +} + +/* Locate a record in a VPD blob + * + * Note: This works with VPD LIDs. It will scan until it finds + * the first 0x84, so it will skip all those 0's that the VPD + * LIDs seem to contain + */ +const void *vpd_find_record(const void *vpd, size_t vpd_size, + const char *record, size_t *sz) +{ + const uint8_t *p = vpd, *end = vpd + vpd_size; + bool first_start = true; + size_t rec_sz; + uint8_t namesz = 0; + const char *rec_name; + + if (!vpd) + return NULL; + + while (CHECK_SPACE(p, 4, end)) { + /* Get header byte */ + if (*(p++) != 0x84) { + /* Skip initial crap in VPD LIDs */ + if (first_start) + continue; + break; + } + first_start = false; + rec_sz = *(p++); + rec_sz |= *(p++) << 8; + if (!CHECK_SPACE(p, rec_sz, end)) { + prerror("VPD: Malformed or truncated VPD," + " record size doesn't fit\n"); + return NULL; + } + + /* Find record name */ + rec_name = vpd_find_keyword(p, rec_sz, "RT", &namesz); + if (rec_name && strncmp(record, rec_name, namesz) == 0) { + if (sz) + *sz = rec_sz; + return p; + } + + p += rec_sz; + if (*(p++) != 0x78) { + prerror("VPD: Malformed or truncated VPD," + " missing final 0x78 in record %.4s\n", + rec_name ? rec_name : "????"); + return NULL; + } + } + return NULL; +} + +/* Locate a keyword in a record in a VPD blob + * + * Note: This works with VPD LIDs. It will scan until it finds + * the first 0x84, so it will skip all those 0's that the VPD + * LIDs seem to contain + */ +const void *vpd_find(const void *vpd, size_t vpd_size, + const char *record, const char *keyword, + uint8_t *sz) +{ + size_t rec_sz; + const uint8_t *p; + + p = vpd_find_record(vpd, vpd_size, record, &rec_sz); + if (p) + p = vpd_find_keyword(p, rec_sz, keyword, sz); + return p; +} |