diff options
Diffstat (limited to 'roms/skiboot/hw/fsp/fsp-mem-err.c')
-rw-r--r-- | roms/skiboot/hw/fsp/fsp-mem-err.c | 401 |
1 files changed, 401 insertions, 0 deletions
diff --git a/roms/skiboot/hw/fsp/fsp-mem-err.c b/roms/skiboot/hw/fsp/fsp-mem-err.c new file mode 100644 index 000000000..2e3e65401 --- /dev/null +++ b/roms/skiboot/hw/fsp/fsp-mem-err.c @@ -0,0 +1,401 @@ +// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later +/* + * Sometimes some memory needs to go and sit in the naughty corner + * + * Copyright 2013-2019 IBM Corp. + */ + +#define pr_fmt(fmt) "FSPMEMERR: " fmt +#include <skiboot.h> +#include <opal.h> +#include <opal-msg.h> +#include <lock.h> +#include <fsp.h> +#include <errorlog.h> + +/* FSP sends real address of 4K memory page. */ +#define MEM_ERR_PAGE_SIZE_4K (1UL << 12) + +/* maximum number of error event to hold until linux consumes it. */ +#define MERR_MAX_RECORD 1024 + +struct fsp_mem_err_node { + struct list_node list; + struct OpalMemoryErrorData data; +}; + +static LIST_HEAD(merr_free_list); +static LIST_HEAD(mem_error_list); +/* + * lock is used to protect overwriting of merr_free_list and mem_error_list + * list. + */ +static struct lock mem_err_lock = LOCK_UNLOCKED; + +DEFINE_LOG_ENTRY(OPAL_RC_MEM_ERR_RES, OPAL_PLATFORM_ERR_EVT, OPAL_MEM_ERR, + OPAL_MISC_SUBSYSTEM, OPAL_PREDICTIVE_ERR_GENERAL, + OPAL_NA); + +DEFINE_LOG_ENTRY(OPAL_RC_MEM_ERR_DEALLOC, OPAL_PLATFORM_ERR_EVT, OPAL_MEM_ERR, + OPAL_MISC_SUBSYSTEM, OPAL_PREDICTIVE_ERR_GENERAL, + OPAL_NA); + +static bool send_response_to_fsp(u32 cmd_sub_mod) +{ + struct fsp_msg *rsp; + int rc = -ENOMEM; + + rsp = fsp_mkmsg(cmd_sub_mod, 0); + if (rsp) + rc = fsp_queue_msg(rsp, fsp_freemsg); + if (rc) { + fsp_freemsg(rsp); + /* XXX Generate error logs */ + prerror("Error %d queueing FSP memory error reply\n", rc); + return false; + } + return true; +} + +/* + * Queue up the memory error message for delivery. + * + * queue_event_for_delivery get called from two places. + * 1) from queue_mem_err_node when new fsp mem error is available and + * 2) from completion callback indicating that linux has consumed an message. + * + * TODO: + * There is a chance that, we may not get a free slot to queue our event + * for delivery to linux during both the above invocations. In that case + * we end up holding events with us until next fsp memory error comes in. + * We need to address this case either here OR fix up messaging infrastructure + * to make sure at least one slot will always be available per message type. + * + * XXX: BenH: I changed the msg infrastructure to attempt an allocation + * in that case, at least until we clarify a bit better how + * we want to handle things. + */ +static void queue_event_for_delivery(void *data __unused, int staus __unused) +{ + struct fsp_mem_err_node *entry; + uint64_t *merr_data; + int rc; + + lock(&mem_err_lock); + entry = list_pop(&mem_error_list, struct fsp_mem_err_node, list); + unlock(&mem_err_lock); + + if (!entry) + return; + + /* + * struct OpalMemoryErrorData is of (4 * 64 bits) size and well packed + * structure. Hence use uint64_t pointer to pass entire structure + * using 4 params in generic message format. + */ + merr_data = (uint64_t *)&entry->data; + + /* queue up for delivery */ + rc = opal_queue_msg(OPAL_MSG_MEM_ERR, NULL, queue_event_for_delivery, + cpu_to_be64(merr_data[0]), + cpu_to_be64(merr_data[1]), + cpu_to_be64(merr_data[2]), + cpu_to_be64(merr_data[3])); + lock(&mem_err_lock); + if (rc) { + /* + * Failed to queue up the event for delivery. No free slot + * available. There is a chance that we are trying to queue + * up multiple event at the same time. We may already have + * at least one event queued up, in that case we will be + * called again through completion callback and we should + * be able to grab empty slot then. + * + * For now, put this node back on mem_error_list. + */ + list_add(&mem_error_list, &entry->list); + } else + list_add(&merr_free_list, &entry->list); + unlock(&mem_err_lock); +} + +static int queue_mem_err_node(struct OpalMemoryErrorData *merr_evt) +{ + struct fsp_mem_err_node *entry; + + lock(&mem_err_lock); + entry = list_pop(&merr_free_list, struct fsp_mem_err_node, list); + if (!entry) { + printf("Failed to queue up memory error event.\n"); + unlock(&mem_err_lock); + return -ENOMEM; + } + + entry->data = *merr_evt; + list_add(&mem_error_list, &entry->list); + unlock(&mem_err_lock); + + /* Queue up the event for delivery to OS. */ + queue_event_for_delivery(NULL, OPAL_SUCCESS); + return 0; +} + +/* Check if memory resilience event for same address already exists. */ +static bool is_resilience_event_exist(u64 paddr) +{ + struct fsp_mem_err_node *entry; + struct OpalMemoryErrorData *merr_evt; + int found = 0; + + lock(&mem_err_lock); + list_for_each(&mem_error_list, entry, list) { + merr_evt = &entry->data; + if ((merr_evt->type == OPAL_MEM_ERR_TYPE_RESILIENCE) && + (be64_to_cpu(merr_evt->u.resilience.physical_address_start) + == paddr)) { + found = 1; + break; + } + } + unlock(&mem_err_lock); + return !!found; +} + +/* + * handle Memory Resilience error message. + * Section 28.2 of Hypervisor to FSP Mailbox Interface Specification. + * + * The flow for Memory Resilence Event is: + * 1. PRD component in FSP gets a recoverable attention from hardware when + * there is a corretable/uncorrectable memory error to free up a page. + * 2. PRD sends Memory Resilence Command to hypervisor with the real address of + * the 4K memory page in which the error occurred. + * 3. The hypervisor acknowledges with a status immediately. Immediate + * acknowledgment doesn’t require the freeing of the page to be completed. + */ +static bool handle_memory_resilience(u32 cmd_sub_mod, u64 paddr) +{ + int rc = 0; + struct OpalMemoryErrorData mem_err_evt; + struct errorlog *buf; + + memset(&mem_err_evt, 0, sizeof(struct OpalMemoryErrorData)); + /* Check arguments */ + if (paddr == 0) { + prerror("memory resilience: Invalid real address.\n"); + return send_response_to_fsp(FSP_RSP_MEM_RES | + FSP_STATUS_GENERIC_ERROR); + } + + /* Check if event already exist for same address. */ + if (is_resilience_event_exist(paddr)) + goto send_response; + + /* Populate an event. */ + mem_err_evt.version = OpalMemErr_V1; + mem_err_evt.type = OPAL_MEM_ERR_TYPE_RESILIENCE; + + switch (cmd_sub_mod) { + case FSP_CMD_MEM_RES_CE: + /* + * Should we keep counter for corrected errors in + * sapphire OR let linux (PowerNV) handle it? + * + * For now, send corrected errors to linux and let + * linux handle corrected errors thresholding. + */ + mem_err_evt.flags |= cpu_to_be16(OPAL_MEM_CORRECTED_ERROR); + mem_err_evt.u.resilience.resil_err_type = + OPAL_MEM_RESILIENCE_CE; + break; + case FSP_CMD_MEM_RES_UE: + mem_err_evt.u.resilience.resil_err_type = + OPAL_MEM_RESILIENCE_UE; + break; + case FSP_CMD_MEM_RES_UE_SCRB: + mem_err_evt.u.resilience.resil_err_type = + OPAL_MEM_RESILIENCE_UE_SCRUB; + break; + } + mem_err_evt.u.resilience.physical_address_start = cpu_to_be64(paddr); + mem_err_evt.u.resilience.physical_address_end = + cpu_to_be64(paddr + MEM_ERR_PAGE_SIZE_4K); + + /* Queue up the event and inform OS about it. */ + rc = queue_mem_err_node(&mem_err_evt); + +send_response: + /* Queue up an OK response to the resilience message itself */ + if (!rc) + return send_response_to_fsp(FSP_RSP_MEM_RES); + else { + buf = opal_elog_create(&e_info(OPAL_RC_MEM_ERR_RES), 0); + log_append_msg(buf, + "OPAL_MEM_ERR: Cannot queue up memory " + "resilience error event to the OS"); + log_add_section(buf, OPAL_ELOG_SEC_DESC); + log_append_data(buf, (char *) &mem_err_evt, + sizeof(struct OpalMemoryErrorData)); + log_commit(buf); + return false; + } +} + +/* update existing event entry if match is found. */ +static bool update_memory_deallocation_event(u64 paddr_start, u64 paddr_end) +{ + struct fsp_mem_err_node *entry; + struct OpalMemoryErrorData *merr_evt; + int found = 0; + + lock(&mem_err_lock); + list_for_each(&mem_error_list, entry, list) { + merr_evt = &entry->data; + if ((merr_evt->type == OPAL_MEM_ERR_TYPE_DYN_DALLOC) && + (be64_to_cpu(merr_evt->u.dyn_dealloc.physical_address_start) + == paddr_start)) { + found = 1; + if (be64_to_cpu(merr_evt->u.dyn_dealloc.physical_address_end) + < paddr_end) + merr_evt->u.dyn_dealloc.physical_address_end = + cpu_to_be64(paddr_end); + break; + } + } + unlock(&mem_err_lock); + return !!found; +} + +/* + * Handle dynamic memory deallocation message. + * + * When a condition occurs in which we need to do a large scale memory + * deallocation, PRD will send a starting and ending address of an area of + * memory to Hypervisor. Hypervisor then need to use this to deallocate all + * pages between and including the addresses. + * + */ +static bool handle_memory_deallocation(u64 paddr_start, u64 paddr_end) +{ + int rc = 0; + u8 err = 0; + struct OpalMemoryErrorData mem_err_evt; + struct errorlog *buf; + + memset(&mem_err_evt, 0, sizeof(struct OpalMemoryErrorData)); + /* Check arguments */ + if ((paddr_start == 0) || (paddr_end == 0)) { + prerror("memory deallocation: Invalid " + "starting/ending real address.\n"); + err = FSP_STATUS_GENERIC_ERROR; + } + + /* If we had an error, send response to fsp and return */ + if (err) + return send_response_to_fsp(FSP_RSP_MEM_DYN_DEALLOC | err); + + /* + * FSP can send dynamic memory deallocation multiple times for the + * same address/address ranges. Hence check and update if we already + * have sam event queued. + */ + if (update_memory_deallocation_event(paddr_start, paddr_end)) + goto send_response; + + /* Populate an new event. */ + mem_err_evt.version = OpalMemErr_V1; + mem_err_evt.type = OPAL_MEM_ERR_TYPE_DYN_DALLOC; + mem_err_evt.u.dyn_dealloc.dyn_err_type = + OPAL_MEM_DYNAMIC_DEALLOC; + mem_err_evt.u.dyn_dealloc.physical_address_start = cpu_to_be64(paddr_start); + mem_err_evt.u.dyn_dealloc.physical_address_end = cpu_to_be64(paddr_end); + + /* Queue up the event and inform OS about it. */ + rc = queue_mem_err_node(&mem_err_evt); + +send_response: + /* Queue up an OK response to the memory deallocation message itself */ + if (!rc) + return send_response_to_fsp(FSP_RSP_MEM_DYN_DEALLOC); + else { + buf = opal_elog_create(&e_info(OPAL_RC_MEM_ERR_DEALLOC), 0); + log_append_msg(buf, + "OPAL_MEM_ERR: Cannot queue up memory " + "deallocation error event to the OS"); + log_add_section(buf, OPAL_ELOG_SEC_DESC); + log_append_data(buf, (char *)&mem_err_evt, + sizeof(struct OpalMemoryErrorData)); + log_commit(buf); + return false; + } +} + +/* Receive a memory error mesages and handle it. */ +static bool fsp_mem_err_msg(u32 cmd_sub_mod, struct fsp_msg *msg) +{ + u64 paddr_start, paddr_end; + + printf("Received 0x%08ux command\n", cmd_sub_mod); + switch (cmd_sub_mod) { + case FSP_CMD_MEM_RES_CE: + case FSP_CMD_MEM_RES_UE: + case FSP_CMD_MEM_RES_UE_SCRB: + /* + * We get the memory relilence command from FSP for + * correctable/Uncorrectable/scrub UE errors with real + * address of 4K memory page in which the error occurred. + */ + paddr_start = be64_to_cpu(*((__be64 *)&msg->data.bytes[0])); + printf("Got memory resilience error message for " + "paddr=0x%016llux\n", paddr_start); + return handle_memory_resilience(cmd_sub_mod, paddr_start); + case FSP_CMD_MEM_DYN_DEALLOC: + paddr_start = be64_to_cpu(*((__be64 *)&msg->data.bytes[0])); + paddr_end = be64_to_cpu(*((__be64 *)&msg->data.bytes[8])); + printf("Got dynamic memory deallocation message: " + "paddr_start=0x%016llux, paddr_end=0x%016llux\n", + paddr_start, paddr_end); + return handle_memory_deallocation(paddr_start, paddr_end); + } + return false; +} + +/* + * pre allocate memory to hold maximum of 128 memory error event until linux + * consumes it. + */ +static int init_merr_free_list(uint32_t num_entries) +{ + struct fsp_mem_err_node *entry; + int i; + + entry = zalloc(sizeof(struct fsp_mem_err_node) * num_entries); + if (!entry) + return -ENOMEM; + + for (i = 0; i < num_entries; ++i, entry++) + list_add_tail(&merr_free_list, &entry->list); + + return 0; +} + +static struct fsp_client fsp_mem_err_client = { + .message = fsp_mem_err_msg, +}; + +void fsp_memory_err_init(void) +{ + int rc; + + printf("Intializing fsp memory handling.\n"); + /* If we have an FSP, register for notifications */ + if (!fsp_present()) + return; + + /* pre allocate memory for 128 record */ + rc = init_merr_free_list(MERR_MAX_RECORD); + if (rc < 0) + return; + + fsp_register_client(&fsp_mem_err_client, FSP_MCLASS_MEMORY_ERR); +} |