diff options
author | 2023-10-10 14:33:42 +0000 | |
---|---|---|
committer | 2023-10-10 14:33:42 +0000 | |
commit | af1a266670d040d2f4083ff309d732d648afba2a (patch) | |
tree | 2fc46203448ddcc6f81546d379abfaeb323575e9 /roms/skiboot/core/opal.c | |
parent | e02cda008591317b1625707ff8e115a4841aa889 (diff) |
Change-Id: Iaf8d18082d3991dec7c0ebbea540f092188eb4ec
Diffstat (limited to 'roms/skiboot/core/opal.c')
-rw-r--r-- | roms/skiboot/core/opal.c | 700 |
1 files changed, 700 insertions, 0 deletions
diff --git a/roms/skiboot/core/opal.c b/roms/skiboot/core/opal.c new file mode 100644 index 000000000..2898a45ce --- /dev/null +++ b/roms/skiboot/core/opal.c @@ -0,0 +1,700 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Base support for OPAL calls + * + * Copyright 2013-2019 IBM Corp. + */ + +#include <skiboot.h> +#include <opal.h> +#include <stack.h> +#include <lock.h> +#include <fsp.h> +#include <cpu.h> +#include <interrupts.h> +#include <op-panel.h> +#include <device.h> +#include <console.h> +#include <trace.h> +#include <timebase.h> +#include <affinity.h> +#include <opal-msg.h> +#include <timer.h> +#include <elf-abi.h> +#include <errorlog.h> +#include <occ.h> + +/* Pending events to signal via opal_poll_events */ +uint64_t opal_pending_events; + +/* OPAL dispatch table defined in head.S */ +extern const uint64_t opal_branch_table[]; + +/* Number of args expected for each call. */ +static const u8 opal_num_args[OPAL_LAST+1]; + +/* OPAL anchor node */ +struct dt_node *opal_node; + +/* mask of dynamic vs fixed events; opal_allocate_dynamic_event will + * only allocate from this range */ +static const uint64_t opal_dynamic_events_mask = 0xffffffff00000000ul; +static uint64_t opal_dynamic_events; + +extern uint32_t attn_trigger; +extern uint32_t hir_trigger; + + +void opal_table_init(void) +{ + struct opal_table_entry *s = __opal_table_start; + struct opal_table_entry *e = __opal_table_end; + + prlog(PR_DEBUG, "OPAL table: %p .. %p, branch table: %p\n", + s, e, opal_branch_table); + while(s < e) { + ((uint64_t *)opal_branch_table)[s->token] = function_entry_address(s->func); + ((u8 *)opal_num_args)[s->token] = s->nargs; + s++; + } +} + +/* Called from head.S, thus no prototype */ +long opal_bad_token(uint64_t token); + +long opal_bad_token(uint64_t token) +{ + /** + * @fwts-label OPALBadToken + * @fwts-advice OPAL was called with a bad token. On POWER8 and + * earlier, Linux kernels had a bug where they wouldn't check + * if firmware supported particular OPAL calls before making them. + * It is, in fact, harmless for these cases. On systems newer than + * POWER8, this should never happen and indicates a kernel bug + * where OPAL_CHECK_TOKEN isn't being called where it should be. + */ + prlog(PR_ERR, "OPAL: Called with bad token %lld !\n", token); + + return OPAL_PARAMETER; +} + +#ifdef OPAL_TRACE_ENTRY +static void opal_trace_entry(struct stack_frame *eframe __unused) +{ + union trace t; + unsigned nargs, i; + + if (eframe->gpr[0] > OPAL_LAST) + nargs = 0; + else + nargs = opal_num_args[eframe->gpr[0]]; + + t.opal.token = cpu_to_be64(eframe->gpr[0]); + t.opal.lr = cpu_to_be64(eframe->lr); + t.opal.sp = cpu_to_be64(eframe->gpr[1]); + for(i=0; i<nargs; i++) + t.opal.r3_to_11[i] = cpu_to_be64(eframe->gpr[3+i]); + + trace_add(&t, TRACE_OPAL, offsetof(struct trace_opal, r3_to_11[nargs])); +} +#endif + +/* + * opal_quiesce_state is used as a lock. Don't use an actual lock to avoid + * lock busting. + */ +static uint32_t opal_quiesce_state; /* 0 or QUIESCE_HOLD/QUIESCE_REJECT */ +static int32_t opal_quiesce_owner; /* PIR */ +static int32_t opal_quiesce_target; /* -1 or PIR */ + +static int64_t opal_check_token(uint64_t token); + +/* Called from head.S, thus no prototype */ +int64_t opal_entry_check(struct stack_frame *eframe); + +int64_t opal_entry_check(struct stack_frame *eframe) +{ + struct cpu_thread *cpu = this_cpu(); + uint64_t token = eframe->gpr[0]; + + if (cpu->pir != mfspr(SPR_PIR)) { + printf("CPU MISMATCH ! PIR=%04lx cpu @%p -> pir=%04x token=%llu\n", + mfspr(SPR_PIR), cpu, cpu->pir, token); + abort(); + } + +#ifdef OPAL_TRACE_ENTRY + opal_trace_entry(eframe); +#endif + + if (!opal_check_token(token)) + return opal_bad_token(token); + + if (!opal_quiesce_state && cpu->in_opal_call > 1) { + disable_fast_reboot("Kernel re-entered OPAL"); + switch (token) { + case OPAL_CONSOLE_READ: + case OPAL_CONSOLE_WRITE: + case OPAL_CONSOLE_WRITE_BUFFER_SPACE: + case OPAL_CONSOLE_FLUSH: + case OPAL_POLL_EVENTS: + case OPAL_CHECK_TOKEN: + case OPAL_CEC_REBOOT: + case OPAL_CEC_REBOOT2: + case OPAL_SIGNAL_SYSTEM_RESET: + break; + default: + printf("CPU ATTEMPT TO RE-ENTER FIRMWARE! PIR=%04lx cpu @%p -> pir=%04x token=%llu\n", + mfspr(SPR_PIR), cpu, cpu->pir, token); + if (cpu->in_opal_call > 2) { + printf("Emergency stack is destroyed, can't continue.\n"); + abort(); + } + return OPAL_INTERNAL_ERROR; + } + } + + cpu->entered_opal_call_at = mftb(); + return OPAL_SUCCESS; +} + +int64_t opal_exit_check(int64_t retval, struct stack_frame *eframe); + +int64_t opal_exit_check(int64_t retval, struct stack_frame *eframe) +{ + struct cpu_thread *cpu = this_cpu(); + uint64_t token = eframe->gpr[0]; + uint64_t now = mftb(); + uint64_t call_time = tb_to_msecs(now - cpu->entered_opal_call_at); + + if (!cpu->in_opal_call) { + disable_fast_reboot("Un-accounted firmware entry"); + printf("CPU UN-ACCOUNTED FIRMWARE ENTRY! PIR=%04lx cpu @%p -> pir=%04x token=%llu retval=%lld\n", + mfspr(SPR_PIR), cpu, cpu->pir, token, retval); + cpu->in_opal_call++; /* avoid exit path underflowing */ + } else { + if (cpu->in_opal_call > 2) { + printf("Emergency stack is destroyed, can't continue.\n"); + abort(); + } + if (!list_empty(&cpu->locks_held)) { + prlog(PR_ERR, "OPAL exiting with locks held, pir=%04x token=%llu retval=%lld\n", + cpu->pir, token, retval); + drop_my_locks(true); + } + } + + if (call_time > 100 && token != OPAL_RESYNC_TIMEBASE) { + prlog((call_time < 1000) ? PR_DEBUG : PR_WARNING, + "Spent %llu msecs in OPAL call %llu!\n", + call_time, token); + } + + cpu->current_token = 0; + + return retval; +} + +int64_t opal_quiesce(uint32_t quiesce_type, int32_t cpu_target) +{ + struct cpu_thread *cpu = this_cpu(); + struct cpu_thread *target = NULL; + struct cpu_thread *c; + uint64_t end; + bool stuck = false; + + if (cpu_target >= 0) { + target = find_cpu_by_server(cpu_target); + if (!target) + return OPAL_PARAMETER; + } else if (cpu_target != -1) { + return OPAL_PARAMETER; + } + + if (quiesce_type == QUIESCE_HOLD || quiesce_type == QUIESCE_REJECT) { + if (cmpxchg32(&opal_quiesce_state, 0, quiesce_type) != 0) { + if (opal_quiesce_owner != cpu->pir) { + /* + * Nested is allowed for now just for + * internal uses, so an error is returned + * for OS callers, but no error message + * printed if we are nested. + */ + printf("opal_quiesce already quiescing\n"); + } + return OPAL_BUSY; + } + opal_quiesce_owner = cpu->pir; + opal_quiesce_target = cpu_target; + } + + if (opal_quiesce_owner != cpu->pir) { + printf("opal_quiesce CPU does not own quiesce state (must call QUIESCE_HOLD or QUIESCE_REJECT)\n"); + return OPAL_BUSY; + } + + /* Okay now we own the quiesce state */ + + if (quiesce_type == QUIESCE_RESUME || + quiesce_type == QUIESCE_RESUME_FAST_REBOOT) { + bust_locks = false; + sync(); /* release barrier vs opal entry */ + if (target) { + target->quiesce_opal_call = 0; + } else { + for_each_cpu(c) { + if (quiesce_type == QUIESCE_RESUME_FAST_REBOOT) + c->in_opal_call = 0; + + if (c == cpu) { + assert(!c->quiesce_opal_call); + continue; + } + c->quiesce_opal_call = 0; + } + } + sync(); + opal_quiesce_state = 0; + return OPAL_SUCCESS; + } + + if (quiesce_type == QUIESCE_LOCK_BREAK) { + if (opal_quiesce_target != -1) { + printf("opal_quiesce has not quiesced all CPUs (must target -1)\n"); + return OPAL_BUSY; + } + bust_locks = true; + return OPAL_SUCCESS; + } + + if (target) { + target->quiesce_opal_call = quiesce_type; + } else { + for_each_cpu(c) { + if (c == cpu) + continue; + c->quiesce_opal_call = quiesce_type; + } + } + + sync(); /* Order stores to quiesce_opal_call vs loads of in_opal_call */ + + end = mftb() + msecs_to_tb(1000); + + smt_lowest(); + if (target) { + while (target->in_opal_call) { + if (tb_compare(mftb(), end) == TB_AAFTERB) { + printf("OPAL quiesce CPU:%04x stuck in OPAL\n", target->pir); + stuck = true; + break; + } + barrier(); + } + } else { + for_each_cpu(c) { + if (c == cpu) + continue; + while (c->in_opal_call) { + if (tb_compare(mftb(), end) == TB_AAFTERB) { + printf("OPAL quiesce CPU:%04x stuck in OPAL\n", c->pir); + stuck = true; + break; + } + barrier(); + } + } + } + smt_medium(); + sync(); /* acquire barrier vs opal entry */ + + if (stuck) { + printf("OPAL quiesce could not kick all CPUs out of OPAL\n"); + return OPAL_PARTIAL; + } + + return OPAL_SUCCESS; +} +opal_call(OPAL_QUIESCE, opal_quiesce, 2); + +void __opal_register(uint64_t token, void *func, unsigned int nargs) +{ + assert(token <= OPAL_LAST); + + ((uint64_t *)opal_branch_table)[token] = function_entry_address(func); + ((u8 *)opal_num_args)[token] = nargs; +} + +/* + * add_opal_firmware_exports_node: adds properties to the device-tree which + * the OS will then change into sysfs nodes. + * The properties must be placed under /ibm,opal/firmware/exports. + * The new sysfs nodes are created under /opal/exports. + * To be correctly exported the properties must contain: + * name + * base memory location (u64) + * size (u64) + */ +static void add_opal_firmware_exports_node(struct dt_node *node) +{ + struct dt_node *exports = dt_new(node, "exports"); + uint64_t sym_start = (uint64_t)__sym_map_start; + uint64_t sym_size = (uint64_t)__sym_map_end - sym_start; + + /* + * These property names will be used by Linux as the user-visible file + * name, so make them meaningful if possible. We use _ as the separator + * here to remain consistent with existing file names in /sys/opal. + */ + dt_add_property_u64s(exports, "symbol_map", sym_start, sym_size); + dt_add_property_u64s(exports, "hdat_map", SPIRA_HEAP_BASE, + SPIRA_HEAP_SIZE); +#ifdef SKIBOOT_GCOV + dt_add_property_u64s(exports, "gcov", SKIBOOT_BASE, + HEAP_BASE - SKIBOOT_BASE); +#endif +} + +static void add_opal_firmware_node(void) +{ + struct dt_node *firmware = dt_new(opal_node, "firmware"); + uint64_t sym_start = (uint64_t)__sym_map_start; + uint64_t sym_size = (uint64_t)__sym_map_end - sym_start; + + dt_add_property_string(firmware, "compatible", "ibm,opal-firmware"); + dt_add_property_string(firmware, "name", "firmware"); + dt_add_property_string(firmware, "version", version); + /* + * As previous OS versions use symbol-map located at + * /ibm,opal/firmware we will keep a copy of symbol-map here + * for backwards compatibility + */ + dt_add_property_u64s(firmware, "symbol-map", sym_start, sym_size); + + add_opal_firmware_exports_node(firmware); +} + +void add_opal_node(void) +{ + uint64_t base, entry, size; + extern uint32_t opal_entry; + extern uint32_t boot_entry; + struct dt_node *opal_event; + + /* XXX TODO: Reorg this. We should create the base OPAL + * node early on, and have the various sub modules populate + * their own entries (console etc...) + * + * The logic of which console backend to use should be + * extracted + */ + + entry = (uint64_t)&opal_entry; + base = SKIBOOT_BASE; + size = (CPU_STACKS_BASE + + (uint64_t)(cpu_max_pir + 1) * STACK_SIZE) - SKIBOOT_BASE; + + opal_node = dt_new_check(dt_root, "ibm,opal"); + dt_add_property_cells(opal_node, "#address-cells", 0); + dt_add_property_cells(opal_node, "#size-cells", 0); + + if (proc_gen < proc_gen_p9) + dt_add_property_strings(opal_node, "compatible", "ibm,opal-v2", + "ibm,opal-v3"); + else + dt_add_property_strings(opal_node, "compatible", "ibm,opal-v3"); + + dt_add_property_cells(opal_node, "opal-msg-async-num", OPAL_MAX_ASYNC_COMP); + dt_add_property_cells(opal_node, "opal-msg-size", OPAL_MSG_SIZE); + dt_add_property_u64(opal_node, "opal-base-address", base); + dt_add_property_u64(opal_node, "opal-entry-address", entry); + dt_add_property_u64(opal_node, "opal-boot-address", (uint64_t)&boot_entry); + dt_add_property_u64(opal_node, "opal-runtime-size", size); + + /* Add irqchip interrupt controller */ + opal_event = dt_new(opal_node, "event"); + dt_add_property_strings(opal_event, "compatible", "ibm,opal-event"); + dt_add_property_cells(opal_event, "#interrupt-cells", 0x1); + dt_add_property(opal_event, "interrupt-controller", NULL, 0); + + add_opal_firmware_node(); + add_associativity_ref_point(); + memcons_add_properties(); +} + +static struct lock evt_lock = LOCK_UNLOCKED; + +void opal_update_pending_evt(uint64_t evt_mask, uint64_t evt_values) +{ + uint64_t new_evts; + + lock(&evt_lock); + new_evts = (opal_pending_events & ~evt_mask) | evt_values; + if (opal_pending_events != new_evts) { + uint64_t tok; + +#ifdef OPAL_TRACE_EVT_CHG + printf("OPAL: Evt change: 0x%016llx -> 0x%016llx\n", + opal_pending_events, new_evts); +#endif + /* + * If an event gets *set* while we are in a different call chain + * than opal_handle_interrupt() or opal_handle_hmi(), then we + * artificially generate an interrupt (OCC interrupt specifically) + * to ensure that Linux properly broadcast the event change internally + */ + if ((new_evts & ~opal_pending_events) != 0) { + tok = this_cpu()->current_token; + if (tok != OPAL_HANDLE_INTERRUPT && tok != OPAL_HANDLE_HMI) + occ_send_dummy_interrupt(); + } + opal_pending_events = new_evts; + } + unlock(&evt_lock); +} + +uint64_t opal_dynamic_event_alloc(void) +{ + uint64_t new_event; + int n; + + lock(&evt_lock); + + /* Create the event mask. This set-bit will be within the event mask + * iff there are free events, or out of the mask if there are no free + * events. If opal_dynamic_events is all ones (ie, all events are + * dynamic, and allocated), then ilog2 will return -1, and we'll have a + * zero mask. + */ + n = ilog2(~opal_dynamic_events); + new_event = 1ull << n; + + /* Ensure we're still within the allocatable dynamic events range */ + if (new_event & opal_dynamic_events_mask) + opal_dynamic_events |= new_event; + else + new_event = 0; + + unlock(&evt_lock); + return new_event; +} + +void opal_dynamic_event_free(uint64_t event) +{ + lock(&evt_lock); + opal_dynamic_events &= ~event; + unlock(&evt_lock); +} + +static uint64_t opal_test_func(uint64_t arg) +{ + printf("OPAL: Test function called with arg 0x%llx\n", arg); + + return 0xfeedf00d; +} +opal_call(OPAL_TEST, opal_test_func, 1); + +struct opal_poll_entry { + struct list_node link; + void (*poller)(void *data); + void *data; +}; + +static struct list_head opal_pollers = LIST_HEAD_INIT(opal_pollers); +static struct lock opal_poll_lock = LOCK_UNLOCKED; + +void opal_add_poller(void (*poller)(void *data), void *data) +{ + struct opal_poll_entry *ent; + + ent = zalloc(sizeof(struct opal_poll_entry)); + assert(ent); + ent->poller = poller; + ent->data = data; + lock(&opal_poll_lock); + list_add_tail(&opal_pollers, &ent->link); + unlock(&opal_poll_lock); +} + +void opal_del_poller(void (*poller)(void *data)) +{ + struct opal_poll_entry *ent; + + /* XXX This is currently unused. To solve various "interesting" + * locking issues, the pollers are run locklessly, so if we were + * to free them, we would have to be careful, using something + * akin to RCU to synchronize with other OPAL entries. For now + * if anybody uses it, print a warning and leak the entry, don't + * free it. + */ + /** + * @fwts-label UnsupportedOPALdelpoller + * @fwts-advice Currently removing a poller is DANGEROUS and + * MUST NOT be done in production firmware. + */ + prlog(PR_ALERT, "WARNING: Unsupported opal_del_poller." + " Interesting locking issues, don't call this.\n"); + + lock(&opal_poll_lock); + list_for_each(&opal_pollers, ent, link) { + if (ent->poller == poller) { + list_del(&ent->link); + /* free(ent); */ + break; + } + } + unlock(&opal_poll_lock); +} + +void opal_run_pollers(void) +{ + static int pollers_with_lock_warnings = 0; + static int poller_recursion = 0; + struct opal_poll_entry *poll_ent; + bool was_in_poller; + + /* Don't re-enter on this CPU, unless it was an OPAL re-entry */ + if (this_cpu()->in_opal_call == 1 && this_cpu()->in_poller) { + + /** + * @fwts-label OPALPollerRecursion + * @fwts-advice Recursion detected in opal_run_pollers(). This + * indicates a bug in OPAL where a poller ended up running + * pollers, which doesn't lead anywhere good. + */ + poller_recursion++; + if (poller_recursion <= 16) { + disable_fast_reboot("Poller recursion detected."); + prlog(PR_ERR, "OPAL: Poller recursion detected.\n"); + backtrace(); + + } + + if (poller_recursion == 16) + prlog(PR_ERR, "OPAL: Squashing future poller recursion warnings (>16).\n"); + + return; + } + was_in_poller = this_cpu()->in_poller; + this_cpu()->in_poller = true; + + if (!list_empty(&this_cpu()->locks_held) && pollers_with_lock_warnings < 64) { + /** + * @fwts-label OPALPollerWithLock + * @fwts-advice opal_run_pollers() was called with a lock + * held, which could lead to deadlock if not excessively + * lucky/careful. + */ + prlog(PR_ERR, "Running pollers with lock held !\n"); + dump_locks_list(); + backtrace(); + pollers_with_lock_warnings++; + if (pollers_with_lock_warnings == 64) { + /** + * @fwts-label OPALPollerWithLock64 + * @fwts-advice Your firmware is buggy, see the 64 + * messages complaining about opal_run_pollers with + * lock held. + */ + prlog(PR_ERR, "opal_run_pollers with lock run 64 " + "times, disabling warning.\n"); + } + } + + /* We run the timers first */ + check_timers(false); + + /* The pollers are run lokelessly, see comment in opal_del_poller */ + list_for_each(&opal_pollers, poll_ent, link) + poll_ent->poller(poll_ent->data); + + /* Disable poller flag */ + this_cpu()->in_poller = was_in_poller; + + /* On debug builds, print max stack usage */ + check_stacks(); +} + +static int64_t opal_poll_events(__be64 *outstanding_event_mask) +{ + + if (!opal_addr_valid(outstanding_event_mask)) + return OPAL_PARAMETER; + + /* Check if we need to trigger an attn for test use */ + if (attn_trigger == 0xdeadbeef) { + prlog(PR_EMERG, "Triggering attn\n"); + assert(false); + } + + opal_run_pollers(); + + if (outstanding_event_mask) + *outstanding_event_mask = cpu_to_be64(opal_pending_events); + + return OPAL_SUCCESS; +} +opal_call(OPAL_POLL_EVENTS, opal_poll_events, 1); + +static int64_t opal_check_token(uint64_t token) +{ + if (token > OPAL_LAST) + return OPAL_TOKEN_ABSENT; + + if (opal_branch_table[token]) + return OPAL_TOKEN_PRESENT; + + return OPAL_TOKEN_ABSENT; +} +opal_call(OPAL_CHECK_TOKEN, opal_check_token, 1); + +struct opal_sync_entry { + struct list_node link; + bool (*notify)(void *data); + void *data; +}; + +static struct list_head opal_syncers = LIST_HEAD_INIT(opal_syncers); + +void opal_add_host_sync_notifier(bool (*notify)(void *data), void *data) +{ + struct opal_sync_entry *ent; + + ent = zalloc(sizeof(struct opal_sync_entry)); + assert(ent); + ent->notify = notify; + ent->data = data; + list_add_tail(&opal_syncers, &ent->link); +} + +/* + * Remove a host sync notifier for given callback and data + */ +void opal_del_host_sync_notifier(bool (*notify)(void *data), void *data) +{ + struct opal_sync_entry *ent; + + list_for_each(&opal_syncers, ent, link) { + if (ent->notify == notify && ent->data == data) { + list_del(&ent->link); + free(ent); + return; + } + } +} + +/* + * OPAL call to handle host kexec'ing scenario + */ +static int64_t opal_sync_host_reboot(void) +{ + struct opal_sync_entry *ent, *nxt; + int ret = OPAL_SUCCESS; + + list_for_each_safe(&opal_syncers, ent, nxt, link) + if (! ent->notify(ent->data)) + ret = OPAL_BUSY_EVENT; + + return ret; +} +opal_call(OPAL_SYNC_HOST_REBOOT, opal_sync_host_reboot, 0); |