aboutsummaryrefslogtreecommitdiffstats
path: root/roms/skiboot/core/mem_region.c
diff options
context:
space:
mode:
Diffstat (limited to 'roms/skiboot/core/mem_region.c')
-rw-r--r--roms/skiboot/core/mem_region.c1555
1 files changed, 1555 insertions, 0 deletions
diff --git a/roms/skiboot/core/mem_region.c b/roms/skiboot/core/mem_region.c
new file mode 100644
index 000000000..36de2d094
--- /dev/null
+++ b/roms/skiboot/core/mem_region.c
@@ -0,0 +1,1555 @@
+// SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+/*
+ * Simple memory allocator
+ *
+ * Copyright 2013-2018 IBM Corp.
+ */
+
+#include <inttypes.h>
+#include <skiboot.h>
+#include <mem-map.h>
+#include <libfdt_env.h>
+#include <lock.h>
+#include <device.h>
+#include <cpu.h>
+#include <chip.h>
+#include <affinity.h>
+#include <types.h>
+#include <mem_region.h>
+#include <mem_region-malloc.h>
+
+/* Memory poisoning on free (if POISON_MEM_REGION set to 1) */
+#ifdef DEBUG
+#define POISON_MEM_REGION 1
+#else
+#define POISON_MEM_REGION 0
+#endif
+#define POISON_MEM_REGION_WITH 0x99
+#define POISON_MEM_REGION_LIMIT 1*1024*1024*1024
+
+/* Locking: The mem_region_lock protects the regions list from concurrent
+ * updates. Additions to, or removals from, the region list must be done
+ * with this lock held. This is typically done when we're establishing
+ * the memory & reserved regions.
+ *
+ * Each region has a lock (region->free_list_lock) to protect the free list
+ * from concurrent modification. This lock is used when we're allocating
+ * memory out of a specific region.
+ *
+ * If both locks are needed (eg, __local_alloc, where we need to find a region,
+ * then allocate from it), the mem_region_lock must be acquired before (and
+ * released after) the per-region lock.
+ */
+struct lock mem_region_lock = LOCK_UNLOCKED;
+
+static struct list_head regions = LIST_HEAD_INIT(regions);
+static struct list_head early_reserves = LIST_HEAD_INIT(early_reserves);
+
+static bool mem_region_init_done = false;
+static bool mem_regions_finalised = false;
+
+unsigned long top_of_ram = SKIBOOT_BASE + SKIBOOT_SIZE;
+
+static struct mem_region skiboot_os_reserve = {
+ .name = "ibm,os-reserve",
+ .start = 0,
+ .len = SKIBOOT_BASE,
+ .type = REGION_OS,
+};
+
+struct mem_region skiboot_heap = {
+ .name = "ibm,firmware-heap",
+ .start = HEAP_BASE,
+ .len = HEAP_SIZE,
+ .type = REGION_SKIBOOT_HEAP,
+};
+
+static struct mem_region skiboot_code_and_text = {
+ .name = "ibm,firmware-code",
+ .start = SKIBOOT_BASE,
+ .len = HEAP_BASE - SKIBOOT_BASE,
+ .type = REGION_SKIBOOT_FIRMWARE,
+};
+
+static struct mem_region skiboot_after_heap = {
+ .name = "ibm,firmware-data",
+ .start = HEAP_BASE + HEAP_SIZE,
+ .len = SKIBOOT_BASE + SKIBOOT_SIZE - (HEAP_BASE + HEAP_SIZE),
+ .type = REGION_SKIBOOT_FIRMWARE,
+};
+
+static struct mem_region skiboot_cpu_stacks = {
+ .name = "ibm,firmware-stacks",
+ .start = CPU_STACKS_BASE,
+ .len = 0, /* TBA */
+ .type = REGION_SKIBOOT_FIRMWARE,
+};
+
+static struct mem_region skiboot_mambo_kernel = {
+ .name = "ibm,firmware-mambo-kernel",
+ .start = (unsigned long)KERNEL_LOAD_BASE,
+ .len = KERNEL_LOAD_SIZE,
+ .type = REGION_SKIBOOT_FIRMWARE,
+};
+
+static struct mem_region skiboot_mambo_initramfs = {
+ .name = "ibm,firmware-mambo-initramfs",
+ .start = (unsigned long)INITRAMFS_LOAD_BASE,
+ .len = INITRAMFS_LOAD_SIZE,
+ .type = REGION_SKIBOOT_FIRMWARE,
+};
+
+
+struct alloc_hdr {
+ bool free : 1;
+ bool prev_free : 1;
+ bool printed : 1;
+ unsigned long num_longs : BITS_PER_LONG-3; /* Including header. */
+ const char *location;
+};
+
+struct free_hdr {
+ struct alloc_hdr hdr;
+ struct list_node list;
+ /* ... unsigned long tailer; */
+};
+
+#define ALLOC_HDR_LONGS (sizeof(struct alloc_hdr) / sizeof(long))
+#define ALLOC_MIN_LONGS (sizeof(struct free_hdr) / sizeof(long) + 1)
+
+/* Avoid ugly casts. */
+static void *region_start(const struct mem_region *region)
+{
+ return (void *)(unsigned long)region->start;
+}
+
+/* Each free block has a tailer, so we can walk backwards. */
+static unsigned long *tailer(struct free_hdr *f)
+{
+ return (unsigned long *)f + f->hdr.num_longs - 1;
+}
+
+/* This walks forward to the next hdr (or NULL if at the end). */
+static struct alloc_hdr *next_hdr(const struct mem_region *region,
+ const struct alloc_hdr *hdr)
+{
+ void *next;
+
+ next = ((unsigned long *)hdr + hdr->num_longs);
+ if (next >= region_start(region) + region->len)
+ next = NULL;
+ return next;
+}
+
+#if POISON_MEM_REGION == 1
+static void mem_poison(struct free_hdr *f)
+{
+ size_t poison_size = (void*)tailer(f) - (void*)(f+1);
+
+ /* We only poison up to a limit, as otherwise boot is
+ * kinda slow */
+ if (poison_size > POISON_MEM_REGION_LIMIT)
+ poison_size = POISON_MEM_REGION_LIMIT;
+
+ memset(f+1, POISON_MEM_REGION_WITH, poison_size);
+}
+#endif
+
+/* Creates free block covering entire region. */
+static void init_allocatable_region(struct mem_region *region)
+{
+ struct free_hdr *f = region_start(region);
+ assert(region->type == REGION_SKIBOOT_HEAP ||
+ region->type == REGION_MEMORY);
+ f->hdr.num_longs = region->len / sizeof(long);
+ f->hdr.free = true;
+ f->hdr.prev_free = false;
+ *tailer(f) = f->hdr.num_longs;
+ list_head_init(&region->free_list);
+ list_add(&region->free_list, &f->list);
+#if POISON_MEM_REGION == 1
+ mem_poison(f);
+#endif
+}
+
+static void make_free(struct mem_region *region, struct free_hdr *f,
+ const char *location, bool skip_poison)
+{
+ struct alloc_hdr *next;
+
+#if POISON_MEM_REGION == 1
+ if (!skip_poison)
+ mem_poison(f);
+#else
+ (void)skip_poison;
+#endif
+
+ if (f->hdr.prev_free) {
+ struct free_hdr *prev;
+ unsigned long *prev_tailer = (unsigned long *)f - 1;
+
+ assert(*prev_tailer);
+ prev = (void *)((unsigned long *)f - *prev_tailer);
+ assert(prev->hdr.free);
+ assert(!prev->hdr.prev_free);
+
+ /* Expand to cover the one we just freed. */
+ prev->hdr.num_longs += f->hdr.num_longs;
+ f = prev;
+ } else {
+ f->hdr.free = true;
+ f->hdr.location = location;
+ list_add(&region->free_list, &f->list);
+ }
+
+ /* Fix up tailer. */
+ *tailer(f) = f->hdr.num_longs;
+
+ /* If next is free, coalesce it */
+ next = next_hdr(region, &f->hdr);
+ if (next) {
+ next->prev_free = true;
+ if (next->free) {
+ struct free_hdr *next_free = (void *)next;
+ list_del_from(&region->free_list, &next_free->list);
+ /* Maximum of one level of recursion */
+ make_free(region, next_free, location, true);
+ }
+ }
+}
+
+/* Can we fit this many longs with this alignment in this free block? */
+static bool fits(struct free_hdr *f, size_t longs, size_t align, size_t *offset)
+{
+ *offset = 0;
+
+ while (f->hdr.num_longs >= *offset + longs) {
+ size_t addr;
+
+ addr = (unsigned long)f
+ + (*offset + ALLOC_HDR_LONGS) * sizeof(long);
+ if ((addr & (align - 1)) == 0)
+ return true;
+
+ /* Don't make tiny chunks! */
+ if (*offset == 0)
+ *offset = ALLOC_MIN_LONGS;
+ else
+ (*offset)++;
+ }
+ return false;
+}
+
+static void discard_excess(struct mem_region *region,
+ struct alloc_hdr *hdr, size_t alloc_longs,
+ const char *location, bool skip_poison)
+{
+ /* Do we have excess? */
+ if (hdr->num_longs > alloc_longs + ALLOC_MIN_LONGS) {
+ struct free_hdr *post;
+
+ /* Set up post block. */
+ post = (void *)hdr + alloc_longs * sizeof(long);
+ post->hdr.num_longs = hdr->num_longs - alloc_longs;
+ post->hdr.prev_free = false;
+
+ /* Trim our block. */
+ hdr->num_longs = alloc_longs;
+
+ /* This coalesces as required. */
+ make_free(region, post, location, skip_poison);
+ }
+}
+
+static const char *hdr_location(const struct alloc_hdr *hdr)
+{
+ /* Corrupt: step carefully! */
+ if (is_rodata(hdr->location))
+ return hdr->location;
+ return "*CORRUPT*";
+}
+
+static void bad_header(const struct mem_region *region,
+ const struct alloc_hdr *hdr,
+ const char *during,
+ const char *location)
+{
+ /* Corrupt: step carefully! */
+ if (is_rodata(hdr->location))
+ prerror("%p (in %s) %s at %s, previously %s\n",
+ hdr-1, region->name, during, location, hdr->location);
+ else
+ prerror("%p (in %s) %s at %s, previously %p\n",
+ hdr-1, region->name, during, location, hdr->location);
+ abort();
+}
+
+static bool region_is_reservable(struct mem_region *region)
+{
+ return region->type != REGION_OS;
+}
+
+static bool region_is_reserved(struct mem_region *region)
+{
+ return region->type != REGION_OS && region->type != REGION_MEMORY;
+}
+
+void mem_dump_allocs(void)
+{
+ struct mem_region *region;
+ struct alloc_hdr *h, *i;
+
+ /* Second pass: populate property data */
+ prlog(PR_INFO, "Memory regions:\n");
+ list_for_each(&regions, region, list) {
+ if (!(region->type == REGION_SKIBOOT_HEAP ||
+ region->type == REGION_MEMORY))
+ continue;
+ prlog(PR_INFO, " 0x%012llx..%012llx : %s\n",
+ (long long)region->start,
+ (long long)(region->start + region->len - 1),
+ region->name);
+ if (region->free_list.n.next == NULL) {
+ prlog(PR_INFO, " no allocs\n");
+ continue;
+ }
+
+ /*
+ * XXX: When dumping the allocation list we coalase allocations
+ * with the same location and size into a single line. This is
+ * quadratic, but it makes the dump human-readable and the raw
+ * dump sometimes causes the log buffer to wrap.
+ */
+ for (h = region_start(region); h; h = next_hdr(region, h))
+ h->printed = false;
+
+ for (h = region_start(region); h; h = next_hdr(region, h)) {
+ unsigned long bytes;
+ int count = 0;
+
+ if (h->free)
+ continue;
+ if (h->printed)
+ continue;
+
+ for (i = h; i; i = next_hdr(region, i)) {
+ if (i->free)
+ continue;
+ if (i->num_longs != h->num_longs)
+ continue;
+ if (strcmp(i->location, h->location))
+ continue;
+
+ i->printed = true;
+ count++;
+ }
+
+ bytes = h->num_longs * sizeof(long);
+ prlog(PR_NOTICE, " % 8d allocs of 0x%.8lx bytes at %s (total 0x%lx)\n",
+ count, bytes, hdr_location(h), bytes * count);
+ }
+ }
+}
+
+int64_t mem_dump_free(void)
+{
+ struct mem_region *region;
+ struct alloc_hdr *hdr;
+ int64_t total_free;
+ int64_t region_free;
+
+ total_free = 0;
+
+ prlog(PR_INFO, "Free space in HEAP memory regions:\n");
+ list_for_each(&regions, region, list) {
+ if (!(region->type == REGION_SKIBOOT_HEAP ||
+ region->type == REGION_MEMORY))
+ continue;
+ region_free = 0;
+
+ if (region->free_list.n.next == NULL) {
+ continue;
+ }
+ for (hdr = region_start(region); hdr; hdr = next_hdr(region, hdr)) {
+ if (!hdr->free)
+ continue;
+
+ region_free+= hdr->num_longs * sizeof(long);
+ }
+ prlog(PR_INFO, "Region %s free: %"PRIx64"\n",
+ region->name, region_free);
+ total_free += region_free;
+ }
+
+ prlog(PR_INFO, "Total free: %"PRIu64"\n", total_free);
+
+ return total_free;
+}
+
+static void *__mem_alloc(struct mem_region *region, size_t size, size_t align,
+ const char *location)
+{
+ size_t alloc_longs, offset;
+ struct free_hdr *f;
+ struct alloc_hdr *next;
+
+ /* Align must be power of 2. */
+ assert(!((align - 1) & align));
+
+ /* This should be a constant. */
+ assert(is_rodata(location));
+
+ /* Unallocatable region? */
+ if (!(region->type == REGION_SKIBOOT_HEAP ||
+ region->type == REGION_MEMORY))
+ return NULL;
+
+ /* First allocation? */
+ if (region->free_list.n.next == NULL)
+ init_allocatable_region(region);
+
+ /* Don't do screwy sizes. */
+ if (size > region->len)
+ return NULL;
+
+ /* Don't do tiny alignments, we deal in long increments. */
+ if (align < sizeof(long))
+ align = sizeof(long);
+
+ /* Convert size to number of longs, too. */
+ alloc_longs = (size + sizeof(long)-1) / sizeof(long) + ALLOC_HDR_LONGS;
+
+ /* Can't be too small for when we free it, either. */
+ if (alloc_longs < ALLOC_MIN_LONGS)
+ alloc_longs = ALLOC_MIN_LONGS;
+
+ /* Walk free list. */
+ list_for_each(&region->free_list, f, list) {
+ /* We may have to skip some to meet alignment. */
+ if (fits(f, alloc_longs, align, &offset))
+ goto found;
+ }
+
+ return NULL;
+
+found:
+ assert(f->hdr.free);
+ assert(!f->hdr.prev_free);
+
+ /* This block is no longer free. */
+ list_del_from(&region->free_list, &f->list);
+ f->hdr.free = false;
+ f->hdr.location = location;
+
+ next = next_hdr(region, &f->hdr);
+ if (next) {
+ assert(next->prev_free);
+ next->prev_free = false;
+ }
+
+ if (offset != 0) {
+ struct free_hdr *pre = f;
+
+ f = (void *)f + offset * sizeof(long);
+ assert(f >= pre + 1);
+
+ /* Set up new header. */
+ f->hdr.num_longs = pre->hdr.num_longs - offset;
+ /* f->hdr.prev_free will be set by make_free below. */
+ f->hdr.free = false;
+ f->hdr.location = location;
+
+ /* Fix up old header. */
+ pre->hdr.num_longs = offset;
+ pre->hdr.prev_free = false;
+
+ /* This coalesces as required. */
+ make_free(region, pre, location, true);
+ }
+
+ /* We might be too long; put the rest back. */
+ discard_excess(region, &f->hdr, alloc_longs, location, true);
+
+ /* Clear tailer for debugging */
+ *tailer(f) = 0;
+
+ /* Their pointer is immediately after header. */
+ return &f->hdr + 1;
+}
+
+void *mem_alloc(struct mem_region *region, size_t size, size_t align,
+ const char *location)
+{
+ static bool dumped = false;
+ void *r;
+
+ assert(lock_held_by_me(&region->free_list_lock));
+
+ r = __mem_alloc(region, size, align, location);
+ if (r)
+ return r;
+
+ prerror("mem_alloc(0x%lx, 0x%lx, \"%s\", %s) failed !\n",
+ size, align, location, region->name);
+ if (!dumped) {
+ mem_dump_allocs();
+ dumped = true;
+ }
+
+ return NULL;
+}
+
+void mem_free(struct mem_region *region, void *mem, const char *location)
+{
+ struct alloc_hdr *hdr;
+
+ /* This should be a constant. */
+ assert(is_rodata(location));
+
+ assert(lock_held_by_me(&region->free_list_lock));
+
+ /* Freeing NULL is always a noop. */
+ if (!mem)
+ return;
+
+ /* Your memory is in the region, right? */
+ assert(mem >= region_start(region) + sizeof(*hdr));
+ assert(mem < region_start(region) + region->len);
+
+ /* Grab header. */
+ hdr = mem - sizeof(*hdr);
+
+ if (hdr->free)
+ bad_header(region, hdr, "re-freed", location);
+
+ make_free(region, (struct free_hdr *)hdr, location, false);
+}
+
+size_t mem_allocated_size(const void *ptr)
+{
+ const struct alloc_hdr *hdr = ptr - sizeof(*hdr);
+ return hdr->num_longs * sizeof(long) - sizeof(struct alloc_hdr);
+}
+
+bool mem_resize(struct mem_region *region, void *mem, size_t len,
+ const char *location)
+{
+ struct alloc_hdr *hdr, *next;
+ struct free_hdr *f;
+
+ /* This should be a constant. */
+ assert(is_rodata(location));
+
+ assert(lock_held_by_me(&region->free_list_lock));
+
+ /* Get header. */
+ hdr = mem - sizeof(*hdr);
+ if (hdr->free)
+ bad_header(region, hdr, "resize", location);
+
+ /* Round up size to multiple of longs. */
+ len = (sizeof(*hdr) + len + sizeof(long) - 1) / sizeof(long);
+
+ /* Can't be too small for when we free it, either. */
+ if (len < ALLOC_MIN_LONGS)
+ len = ALLOC_MIN_LONGS;
+
+ /* Shrinking is simple. */
+ if (len <= hdr->num_longs) {
+ hdr->location = location;
+ discard_excess(region, hdr, len, location, false);
+ return true;
+ }
+
+ /* Check if we can expand. */
+ next = next_hdr(region, hdr);
+ if (!next || !next->free || hdr->num_longs + next->num_longs < len)
+ return false;
+
+ /* OK, it's free and big enough, absorb it. */
+ f = (struct free_hdr *)next;
+ list_del_from(&region->free_list, &f->list);
+ hdr->num_longs += next->num_longs;
+ hdr->location = location;
+
+ /* Update next prev_free */
+ next = next_hdr(region, &f->hdr);
+ if (next) {
+ assert(next->prev_free);
+ next->prev_free = false;
+ }
+
+ /* Clear tailer for debugging */
+ *tailer(f) = 0;
+
+ /* Now we might have *too* much. */
+ discard_excess(region, hdr, len, location, true);
+ return true;
+}
+
+bool mem_check(const struct mem_region *region)
+{
+ size_t frees = 0;
+ struct alloc_hdr *hdr, *prev_free = NULL;
+ struct free_hdr *f;
+
+ /* Check it's sanely aligned. */
+ if (region->start % sizeof(long)) {
+ prerror("Region '%s' not sanely aligned (%llx)\n",
+ region->name, (unsigned long long)region->start);
+ return false;
+ }
+ if ((long)region->len % sizeof(long)) {
+ prerror("Region '%s' not sane length (%llu)\n",
+ region->name, (unsigned long long)region->len);
+ return false;
+ }
+
+ /* Not ours to play with, or empty? Don't do anything. */
+ if (!(region->type == REGION_MEMORY ||
+ region->type == REGION_SKIBOOT_HEAP) ||
+ region->free_list.n.next == NULL)
+ return true;
+
+ /* Walk linearly. */
+ for (hdr = region_start(region); hdr; hdr = next_hdr(region, hdr)) {
+ if (hdr->num_longs < ALLOC_MIN_LONGS) {
+ prerror("Region '%s' %s %p (%s) size %zu\n",
+ region->name, hdr->free ? "free" : "alloc",
+ hdr, hdr_location(hdr),
+ hdr->num_longs * sizeof(long));
+ return false;
+ }
+ if ((unsigned long)hdr + hdr->num_longs * sizeof(long) >
+ region->start + region->len) {
+ prerror("Region '%s' %s %p (%s) oversize %zu\n",
+ region->name, hdr->free ? "free" : "alloc",
+ hdr, hdr_location(hdr),
+ hdr->num_longs * sizeof(long));
+ return false;
+ }
+ if (hdr->free) {
+ if (hdr->prev_free || prev_free) {
+ prerror("Region '%s' free %p (%s) has prev_free"
+ " %p (%s) %sset?\n",
+ region->name, hdr, hdr_location(hdr),
+ prev_free,
+ prev_free ? hdr_location(prev_free)
+ : "NULL",
+ hdr->prev_free ? "" : "un");
+ return false;
+ }
+ prev_free = hdr;
+ frees ^= (unsigned long)hdr - region->start;
+ } else {
+ if (hdr->prev_free != (bool)prev_free) {
+ prerror("Region '%s' alloc %p (%s) has"
+ " prev_free %p %sset?\n",
+ region->name, hdr, hdr_location(hdr),
+ prev_free, hdr->prev_free ? "" : "un");
+ return false;
+ }
+ prev_free = NULL;
+ }
+ }
+
+ /* Now walk free list. */
+ list_for_each(&region->free_list, f, list)
+ frees ^= (unsigned long)f - region->start;
+
+ if (frees) {
+ prerror("Region '%s' free list and walk do not match!\n",
+ region->name);
+ return false;
+ }
+ return true;
+}
+
+bool mem_check_all(void)
+{
+ struct mem_region *r;
+
+ list_for_each(&regions, r, list) {
+ if (!mem_check(r))
+ return false;
+ }
+
+ return true;
+}
+
+static struct mem_region *new_region(const char *name,
+ uint64_t start, uint64_t len,
+ struct dt_node *node,
+ enum mem_region_type type)
+{
+ struct mem_region *region;
+
+ region = malloc(sizeof(*region));
+ if (!region)
+ return NULL;
+
+ region->name = name;
+ region->start = start;
+ region->len = len;
+ region->node = node;
+ region->type = type;
+ region->free_list.n.next = NULL;
+ init_lock(&region->free_list_lock);
+
+ return region;
+}
+
+/* We always split regions, so we only have to replace one. */
+static struct mem_region *split_region(struct mem_region *head,
+ uint64_t split_at,
+ enum mem_region_type type)
+{
+ struct mem_region *tail;
+ uint64_t end = head->start + head->len;
+
+ tail = new_region(head->name, split_at, end - split_at,
+ head->node, type);
+ /* Original region becomes head. */
+ if (tail)
+ head->len -= tail->len;
+
+ return tail;
+}
+
+static bool intersects(const struct mem_region *region, uint64_t addr)
+{
+ return addr > region->start &&
+ addr < region->start + region->len;
+}
+
+static bool maybe_split(struct mem_region *r, uint64_t split_at)
+{
+ struct mem_region *tail;
+
+ if (!intersects(r, split_at))
+ return true;
+
+ tail = split_region(r, split_at, r->type);
+ if (!tail)
+ return false;
+
+ /* Tail add is important: we may need to split again! */
+ list_add_after(&regions, &tail->list, &r->list);
+ return true;
+}
+
+static bool overlaps(const struct mem_region *r1, const struct mem_region *r2)
+{
+ return (r1->start + r1->len > r2->start
+ && r1->start < r2->start + r2->len);
+}
+
+static bool contains(const struct mem_region *r1, const struct mem_region *r2)
+{
+ u64 r1_end = r1->start + r1->len;
+ u64 r2_end = r2->start + r2->len;
+
+ return (r1->start <= r2->start && r2_end <= r1_end);
+}
+
+static struct mem_region *get_overlap(const struct mem_region *region)
+{
+ struct mem_region *i;
+
+ list_for_each(&regions, i, list) {
+ if (overlaps(region, i))
+ return i;
+ }
+ return NULL;
+}
+
+static void add_region_to_regions(struct mem_region *region)
+{
+ struct mem_region *r;
+
+ list_for_each(&regions, r, list) {
+ if (r->start < region->start)
+ continue;
+
+ list_add_before(&regions, &region->list, &r->list);
+ return;
+ }
+ list_add_tail(&regions, &region->list);
+}
+
+static bool add_region(struct mem_region *region)
+{
+ struct mem_region *r;
+
+ if (mem_regions_finalised) {
+ prerror("MEM: add_region(%s@0x%"PRIx64") called after finalise!\n",
+ region->name, region->start);
+ return false;
+ }
+
+ /* First split any regions which intersect. */
+ list_for_each(&regions, r, list) {
+ /*
+ * The new region should be fully contained by an existing one.
+ * If it's not then we have a problem where reservations
+ * partially overlap which is probably broken.
+ *
+ * NB: There *might* be situations where this is legitimate,
+ * but the region handling does not currently support this.
+ */
+ if (overlaps(r, region) && !contains(r, region)) {
+ prerror("MEM: Partial overlap detected between regions:\n");
+ prerror("MEM: %s [0x%"PRIx64"-0x%"PRIx64"] (new)\n",
+ region->name, region->start,
+ region->start + region->len);
+ prerror("MEM: %s [0x%"PRIx64"-0x%"PRIx64"]\n",
+ r->name, r->start, r->start + r->len);
+ return false;
+ }
+
+ if (!maybe_split(r, region->start) ||
+ !maybe_split(r, region->start + region->len))
+ return false;
+ }
+
+ /* Now we have only whole overlaps, if any. */
+ while ((r = get_overlap(region)) != NULL) {
+ assert(r->start == region->start);
+ assert(r->len == region->len);
+ list_del_from(&regions, &r->list);
+ free(r);
+ }
+
+ /* Finally, add in our own region. */
+ add_region_to_regions(region);
+ return true;
+}
+
+static void mem_reserve(enum mem_region_type type, const char *name,
+ uint64_t start, uint64_t len)
+{
+ struct mem_region *region;
+ bool added = true;
+
+ lock(&mem_region_lock);
+ region = new_region(name, start, len, NULL, type);
+ assert(region);
+
+ if (!mem_region_init_done)
+ list_add(&early_reserves, &region->list);
+ else
+ added = add_region(region);
+
+ assert(added);
+ unlock(&mem_region_lock);
+}
+
+void mem_reserve_fw(const char *name, uint64_t start, uint64_t len)
+{
+ mem_reserve(REGION_FW_RESERVED, name, start, len);
+}
+
+void mem_reserve_hwbuf(const char *name, uint64_t start, uint64_t len)
+{
+ mem_reserve(REGION_RESERVED, name, start, len);
+}
+
+static bool matches_chip_id(const __be32 ids[], size_t num, u32 chip_id)
+{
+ size_t i;
+
+ for (i = 0; i < num; i++)
+ if (be32_to_cpu(ids[i]) == chip_id)
+ return true;
+
+ return false;
+}
+
+void *__local_alloc(unsigned int chip_id, size_t size, size_t align,
+ const char *location)
+{
+ struct mem_region *region;
+ void *p = NULL;
+ bool use_local = true;
+
+ lock(&mem_region_lock);
+
+restart:
+ list_for_each(&regions, region, list) {
+ const struct dt_property *prop;
+ const __be32 *ids;
+
+ if (!(region->type == REGION_SKIBOOT_HEAP ||
+ region->type == REGION_MEMORY))
+ continue;
+
+ /* Don't allocate from normal heap. */
+ if (region == &skiboot_heap)
+ continue;
+
+ /* First pass, only match node local regions */
+ if (use_local) {
+ if (!region->node)
+ continue;
+ prop = dt_find_property(region->node, "ibm,chip-id");
+ ids = (const __be32 *)prop->prop;
+ if (!matches_chip_id(ids, prop->len/sizeof(u32),
+ chip_id))
+ continue;
+ }
+
+ /* Second pass, match anything */
+ lock(&region->free_list_lock);
+ p = mem_alloc(region, size, align, location);
+ unlock(&region->free_list_lock);
+ if (p)
+ break;
+ }
+
+ /*
+ * If we can't allocate the memory block from the expected
+ * node, we bail to any one that can accommodate our request.
+ */
+ if (!p && use_local) {
+ use_local = false;
+ goto restart;
+ }
+
+ unlock(&mem_region_lock);
+
+ return p;
+}
+
+struct mem_region *find_mem_region(const char *name)
+{
+ struct mem_region *region;
+
+ list_for_each(&regions, region, list) {
+ if (streq(region->name, name))
+ return region;
+ }
+ return NULL;
+}
+
+bool mem_range_is_reserved(uint64_t start, uint64_t size)
+{
+ uint64_t end = start + size;
+ struct mem_region *region;
+ struct list_head *search;
+
+ /* We may have the range covered by a number of regions, which could
+ * appear in any order. So, we look for a region that covers the
+ * start address, and bump start up to the end of that region.
+ *
+ * We repeat until we've either bumped past the end of the range,
+ * or we didn't find a matching region.
+ *
+ * This has a worst-case of O(n^2), but n is well bounded by the
+ * small number of reservations.
+ */
+
+ if (!mem_region_init_done)
+ search = &early_reserves;
+ else
+ search = &regions;
+
+ for (;;) {
+ bool found = false;
+
+ list_for_each(search, region, list) {
+ if (!region_is_reserved(region))
+ continue;
+
+ /* does this region overlap the start address, and
+ * have a non-zero size? */
+ if (region->start <= start &&
+ region->start + region->len > start &&
+ region->len) {
+ start = region->start + region->len;
+ found = true;
+ }
+ }
+
+ /* 'end' is the first byte outside of the range */
+ if (start >= end)
+ return true;
+
+ if (!found)
+ break;
+ }
+
+ return false;
+}
+
+static void mem_region_parse_reserved_properties(void)
+{
+ const struct dt_property *names, *ranges;
+ struct mem_region *region;
+
+ prlog(PR_DEBUG, "MEM: parsing reserved memory from "
+ "reserved-names/-ranges properties\n");
+
+ names = dt_find_property(dt_root, "reserved-names");
+ ranges = dt_find_property(dt_root, "reserved-ranges");
+ if (names && ranges) {
+ const uint64_t *range;
+ int n, len;
+
+ range = (const void *)ranges->prop;
+
+ for (n = 0; n < names->len; n += len, range += 2) {
+ char *name;
+
+ len = strlen(names->prop + n) + 1;
+ name = strdup(names->prop + n);
+
+ region = new_region(name,
+ dt_get_number(range, 2),
+ dt_get_number(range + 1, 2),
+ NULL, REGION_FW_RESERVED);
+ if (!add_region(region)) {
+ prerror("Couldn't add mem_region %s\n", name);
+ abort();
+ }
+ }
+ } else if (names || ranges) {
+ prerror("Invalid properties: reserved-names=%p "
+ "with reserved-ranges=%p\n",
+ names, ranges);
+ abort();
+ } else {
+ return;
+ }
+}
+
+static bool mem_region_parse_reserved_nodes(const char *path)
+{
+ struct dt_node *parent, *node;
+
+ parent = dt_find_by_path(dt_root, path);
+ if (!parent)
+ return false;
+
+ prlog(PR_INFO, "MEM: parsing reserved memory from node %s\n", path);
+
+ dt_for_each_child(parent, node) {
+ const struct dt_property *reg;
+ struct mem_region *region;
+ int type;
+
+ reg = dt_find_property(node, "reg");
+ if (!reg) {
+ char *nodepath = dt_get_path(node);
+ prerror("node %s has no reg property, ignoring\n",
+ nodepath);
+ free(nodepath);
+ continue;
+ }
+
+ if (dt_has_node_property(node, "no-map", NULL))
+ type = REGION_RESERVED;
+ else
+ type = REGION_FW_RESERVED;
+
+ region = new_region(strdup(node->name),
+ dt_get_number(reg->prop, 2),
+ dt_get_number(reg->prop + sizeof(u64), 2),
+ node, type);
+ if (!add_region(region)) {
+ char *nodepath = dt_get_path(node);
+ prerror("node %s failed to add_region()\n", nodepath);
+ free(nodepath);
+ }
+ }
+
+ return true;
+}
+
+/* Trawl through device tree, create memory regions from nodes. */
+void mem_region_init(void)
+{
+ struct mem_region *region, *next;
+ struct dt_node *i;
+ bool rc;
+
+ /*
+ * Add associativity properties outside of the lock
+ * to avoid recursive locking caused by allocations
+ * done by add_chip_dev_associativity()
+ */
+ dt_for_each_node(dt_root, i) {
+ if (!dt_has_node_property(i, "device_type", "memory") &&
+ !dt_has_node_property(i, "compatible", "pmem-region"))
+ continue;
+
+ /* Add associativity properties */
+ add_chip_dev_associativity(i);
+ }
+
+ /* Add each memory node. */
+ dt_for_each_node(dt_root, i) {
+ uint64_t start, len;
+ char *rname;
+#define NODE_REGION_PREFIX "ibm,firmware-allocs-"
+
+ if (!dt_has_node_property(i, "device_type", "memory"))
+ continue;
+ rname = zalloc(strlen(i->name) + strlen(NODE_REGION_PREFIX) + 1);
+ assert(rname);
+ strcat(rname, NODE_REGION_PREFIX);
+ strcat(rname, i->name);
+ start = dt_get_address(i, 0, &len);
+ lock(&mem_region_lock);
+ region = new_region(rname, start, len, i, REGION_MEMORY);
+ if (!region) {
+ prerror("MEM: Could not add mem region %s!\n", i->name);
+ abort();
+ }
+ add_region_to_regions(region);
+ if ((start + len) > top_of_ram)
+ top_of_ram = start + len;
+ unlock(&mem_region_lock);
+ }
+
+ /*
+ * This is called after we know the maximum PIR of all CPUs,
+ * so we can dynamically set the stack length.
+ */
+ skiboot_cpu_stacks.len = (cpu_max_pir + 1) * STACK_SIZE;
+
+ lock(&mem_region_lock);
+
+ /* Now carve out our own reserved areas. */
+ if (!add_region(&skiboot_os_reserve) ||
+ !add_region(&skiboot_code_and_text) ||
+ !add_region(&skiboot_heap) ||
+ !add_region(&skiboot_after_heap) ||
+ !add_region(&skiboot_cpu_stacks)) {
+ prerror("Out of memory adding skiboot reserved areas\n");
+ abort();
+ }
+
+ if (chip_quirk(QUIRK_MAMBO_CALLOUTS)) {
+ if (!add_region(&skiboot_mambo_kernel) ||
+ !add_region(&skiboot_mambo_initramfs)) {
+ prerror("Out of memory adding mambo payload\n");
+ abort();
+ }
+ }
+
+ /* Add reserved reanges from HDAT */
+ list_for_each_safe(&early_reserves, region, next, list) {
+ bool added;
+
+ list_del(&region->list);
+ added = add_region(region);
+ assert(added);
+ }
+
+ /* Add reserved ranges from the DT */
+ rc = mem_region_parse_reserved_nodes("/reserved-memory");
+ if (!rc)
+ rc = mem_region_parse_reserved_nodes(
+ "/ibm,hostboot/reserved-memory");
+ if (!rc)
+ mem_region_parse_reserved_properties();
+
+ mem_region_init_done = true;
+ unlock(&mem_region_lock);
+}
+
+static uint64_t allocated_length(const struct mem_region *r)
+{
+ struct free_hdr *f, *last = NULL;
+
+ /* No allocations at all? */
+ if (r->free_list.n.next == NULL)
+ return 0;
+
+ /* Find last free block. */
+ list_for_each(&r->free_list, f, list)
+ if (f > last)
+ last = f;
+
+ /* No free blocks? */
+ if (!last)
+ return r->len;
+
+ /* Last free block isn't at end? */
+ if (next_hdr(r, &last->hdr))
+ return r->len;
+ return (unsigned long)last - r->start;
+}
+
+/* Separate out allocated sections into their own region. */
+void mem_region_release_unused(void)
+{
+ struct mem_region *r;
+
+ lock(&mem_region_lock);
+ assert(!mem_regions_finalised);
+
+ prlog(PR_INFO, "Releasing unused memory:\n");
+ list_for_each(&regions, r, list) {
+ uint64_t used_len;
+
+ /* If it's not allocatable, ignore it. */
+ if (!(r->type == REGION_SKIBOOT_HEAP ||
+ r->type == REGION_MEMORY))
+ continue;
+
+ used_len = allocated_length(r);
+
+ prlog(PR_INFO, " %s: %llu/%llu used\n",
+ r->name, (long long)used_len, (long long)r->len);
+
+ /* We keep the skiboot heap. */
+ if (r == &skiboot_heap)
+ continue;
+
+ /* Nothing used? Whole thing is for Linux. */
+ if (used_len == 0)
+ r->type = REGION_OS;
+ /* Partially used? Split region. */
+ else if (used_len != r->len) {
+ struct mem_region *for_linux;
+ struct free_hdr *last = region_start(r) + used_len;
+
+ /* Remove the final free block. */
+ list_del_from(&r->free_list, &last->list);
+
+ for_linux = split_region(r, r->start + used_len,
+ REGION_OS);
+ if (!for_linux) {
+ prerror("OOM splitting mem node %s for linux\n",
+ r->name);
+ abort();
+ }
+ list_add(&regions, &for_linux->list);
+ }
+ }
+ unlock(&mem_region_lock);
+}
+
+static void mem_clear_range(uint64_t s, uint64_t e)
+{
+ uint64_t res_start, res_end;
+
+ /* Skip exception vectors */
+ if (s < EXCEPTION_VECTORS_END)
+ s = EXCEPTION_VECTORS_END;
+
+ /* Skip kernel preload area */
+ res_start = (uint64_t)KERNEL_LOAD_BASE;
+ res_end = res_start + KERNEL_LOAD_SIZE;
+
+ if (s >= res_start && s < res_end)
+ s = res_end;
+ if (e > res_start && e <= res_end)
+ e = res_start;
+ if (e <= s)
+ return;
+ if (s < res_start && e > res_end) {
+ mem_clear_range(s, res_start);
+ mem_clear_range(res_end, e);
+ return;
+ }
+
+ /* Skip initramfs preload area */
+ res_start = (uint64_t)INITRAMFS_LOAD_BASE;
+ res_end = res_start + INITRAMFS_LOAD_SIZE;
+
+ if (s >= res_start && s < res_end)
+ s = res_end;
+ if (e > res_start && e <= res_end)
+ e = res_start;
+ if (e <= s)
+ return;
+ if (s < res_start && e > res_end) {
+ mem_clear_range(s, res_start);
+ mem_clear_range(res_end, e);
+ return;
+ }
+
+ prlog(PR_DEBUG, "Clearing region %llx-%llx\n",
+ (long long)s, (long long)e);
+ memset((void *)s, 0, e - s);
+}
+
+struct mem_region_clear_job_args {
+ char *job_name;
+ uint64_t s,e;
+};
+
+static void mem_region_clear_job(void *data)
+{
+ struct mem_region_clear_job_args *arg = (struct mem_region_clear_job_args*)data;
+ mem_clear_range(arg->s, arg->e);
+}
+
+#define MEM_REGION_CLEAR_JOB_SIZE (16ULL*(1<<30))
+
+static struct cpu_job **mem_clear_jobs;
+static struct mem_region_clear_job_args *mem_clear_job_args;
+static int mem_clear_njobs = 0;
+
+void start_mem_region_clear_unused(void)
+{
+ struct mem_region *r;
+ uint64_t s,l;
+ uint64_t total = 0;
+ uint32_t chip_id;
+ char *path;
+ int i;
+ struct cpu_job **jobs;
+ struct mem_region_clear_job_args *job_args;
+
+ lock(&mem_region_lock);
+ assert(mem_regions_finalised);
+
+ mem_clear_njobs = 0;
+
+ list_for_each(&regions, r, list) {
+ if (!(r->type == REGION_OS))
+ continue;
+ mem_clear_njobs++;
+ /* One job per 16GB */
+ mem_clear_njobs += r->len / MEM_REGION_CLEAR_JOB_SIZE;
+ }
+
+ jobs = malloc(mem_clear_njobs * sizeof(struct cpu_job*));
+ job_args = malloc(mem_clear_njobs * sizeof(struct mem_region_clear_job_args));
+ mem_clear_jobs = jobs;
+ mem_clear_job_args = job_args;
+
+ prlog(PR_NOTICE, "Clearing unused memory:\n");
+ i = 0;
+ list_for_each(&regions, r, list) {
+ /* If it's not unused, ignore it. */
+ if (!(r->type == REGION_OS))
+ continue;
+
+ assert(r != &skiboot_heap);
+
+ s = r->start;
+ l = r->len;
+ while(l > MEM_REGION_CLEAR_JOB_SIZE) {
+ job_args[i].s = s+l - MEM_REGION_CLEAR_JOB_SIZE;
+ job_args[i].e = s+l;
+ l-=MEM_REGION_CLEAR_JOB_SIZE;
+ job_args[i].job_name = malloc(sizeof(char)*100);
+ total+=MEM_REGION_CLEAR_JOB_SIZE;
+ chip_id = __dt_get_chip_id(r->node);
+ if (chip_id == -1)
+ chip_id = 0;
+ path = dt_get_path(r->node);
+ snprintf(job_args[i].job_name, 100,
+ "clear %s, %s 0x%"PRIx64" len: %"PRIx64" on %d",
+ r->name, path,
+ job_args[i].s,
+ (job_args[i].e - job_args[i].s),
+ chip_id);
+ free(path);
+ jobs[i] = cpu_queue_job_on_node(chip_id,
+ job_args[i].job_name,
+ mem_region_clear_job,
+ &job_args[i]);
+ if (!jobs[i])
+ jobs[i] = cpu_queue_job(NULL,
+ job_args[i].job_name,
+ mem_region_clear_job,
+ &job_args[i]);
+ assert(jobs[i]);
+ i++;
+ }
+ job_args[i].s = s;
+ job_args[i].e = s+l;
+ job_args[i].job_name = malloc(sizeof(char)*100);
+ total+=l;
+ chip_id = __dt_get_chip_id(r->node);
+ if (chip_id == -1)
+ chip_id = 0;
+ path = dt_get_path(r->node);
+ snprintf(job_args[i].job_name,100,
+ "clear %s, %s 0x%"PRIx64" len: 0x%"PRIx64" on %d",
+ r->name, path,
+ job_args[i].s,
+ (job_args[i].e - job_args[i].s),
+ chip_id);
+ free(path);
+ jobs[i] = cpu_queue_job_on_node(chip_id,
+ job_args[i].job_name,
+ mem_region_clear_job,
+ &job_args[i]);
+ if (!jobs[i])
+ jobs[i] = cpu_queue_job(NULL,
+ job_args[i].job_name,
+ mem_region_clear_job,
+ &job_args[i]);
+ assert(jobs[i]);
+ i++;
+ }
+ unlock(&mem_region_lock);
+ cpu_process_local_jobs();
+}
+
+void wait_mem_region_clear_unused(void)
+{
+ uint64_t l;
+ uint64_t total = 0;
+ int i;
+
+ for(i=0; i < mem_clear_njobs; i++) {
+ total += (mem_clear_job_args[i].e - mem_clear_job_args[i].s);
+ }
+
+ l = 0;
+ for(i=0; i < mem_clear_njobs; i++) {
+ cpu_wait_job(mem_clear_jobs[i], true);
+ l += (mem_clear_job_args[i].e - mem_clear_job_args[i].s);
+ printf("Clearing memory... %"PRIu64"/%"PRIu64"GB done\n",
+ l>>30, total>>30);
+ free(mem_clear_job_args[i].job_name);
+ }
+ free(mem_clear_jobs);
+ free(mem_clear_job_args);
+}
+
+static void mem_region_add_dt_reserved_node(struct dt_node *parent,
+ struct mem_region *region)
+{
+ char *name, *p;
+
+ /* If a reserved region was established before skiboot, it may be
+ * referenced by a device-tree node with extra data. In that case,
+ * copy the node to /reserved-memory/, unless it's already there.
+ *
+ * We update region->node to the new copy here, as the prd code may
+ * update regions' device-tree nodes, and we want those updates to
+ * apply to the nodes in /reserved-memory/.
+ */
+ if (region->type == REGION_FW_RESERVED && region->node) {
+ if (region->node->parent != parent)
+ region->node = dt_copy(region->node, parent);
+ return;
+ }
+
+ name = strdup(region->name);
+ assert(name);
+
+ /* remove any cell addresses in the region name; we have our own cell
+ * addresses here */
+ p = strchr(name, '@');
+ if (p)
+ *p = '\0';
+
+ region->node = dt_new_addr(parent, name, region->start);
+ assert(region->node);
+ dt_add_property_u64s(region->node, "reg", region->start, region->len);
+
+ /*
+ * This memory is used by hardware and may need special handling. Ask
+ * the host kernel not to map it by default.
+ */
+ if (region->type == REGION_RESERVED)
+ dt_add_property(region->node, "no-map", NULL, 0);
+
+ free(name);
+}
+
+void mem_region_add_dt_reserved(void)
+{
+ int names_len, ranges_len, len;
+ const struct dt_property *prop;
+ struct mem_region *region;
+ void *names, *ranges;
+ struct dt_node *node;
+ fdt64_t *range;
+ char *name;
+
+ names_len = 0;
+ ranges_len = 0;
+
+ /* Finalise the region list, so we know that the regions list won't be
+ * altered after this point. The regions' free lists may change after
+ * we drop the lock, but we don't access those. */
+ lock(&mem_region_lock);
+ mem_regions_finalised = true;
+
+ /* establish top-level reservation node */
+ node = dt_find_by_path(dt_root, "reserved-memory");
+ if (!node) {
+ node = dt_new(dt_root, "reserved-memory");
+ dt_add_property_cells(node, "#address-cells", 2);
+ dt_add_property_cells(node, "#size-cells", 2);
+ dt_add_property(node, "ranges", NULL, 0);
+ }
+
+ prlog(PR_INFO, "Reserved regions:\n");
+
+ /* First pass, create /reserved-memory/ nodes for each reservation,
+ * and calculate the length for the /reserved-names and
+ * /reserved-ranges properties */
+ list_for_each(&regions, region, list) {
+ if (!region_is_reservable(region))
+ continue;
+
+ prlog(PR_INFO, " 0x%012llx..%012llx : %s\n",
+ (long long)region->start,
+ (long long)(region->start + region->len - 1),
+ region->name);
+
+ mem_region_add_dt_reserved_node(node, region);
+
+ /* calculate the size of the properties populated later */
+ names_len += strlen(region->node->name) + 1;
+ ranges_len += 2 * sizeof(uint64_t);
+ }
+
+ name = names = malloc(names_len);
+ range = ranges = malloc(ranges_len);
+
+ /* Second pass: populate the old-style reserved-names and
+ * reserved-regions arrays based on the node data */
+ list_for_each(&regions, region, list) {
+ if (!region_is_reservable(region))
+ continue;
+
+ len = strlen(region->node->name) + 1;
+ memcpy(name, region->node->name, len);
+ name += len;
+
+ range[0] = cpu_to_fdt64(region->start);
+ range[1] = cpu_to_fdt64(region->len);
+ range += 2;
+ }
+ unlock(&mem_region_lock);
+
+ prop = dt_find_property(dt_root, "reserved-names");
+ if (prop)
+ dt_del_property(dt_root, (struct dt_property *)prop);
+
+ prop = dt_find_property(dt_root, "reserved-ranges");
+ if (prop)
+ dt_del_property(dt_root, (struct dt_property *)prop);
+
+ dt_add_property(dt_root, "reserved-names", names, names_len);
+ dt_add_property(dt_root, "reserved-ranges", ranges, ranges_len);
+
+ free(names);
+ free(ranges);
+}
+
+struct mem_region *mem_region_next(struct mem_region *region)
+{
+ struct list_node *node;
+
+ assert(lock_held_by_me(&mem_region_lock));
+
+ node = region ? &region->list : &regions.n;
+
+ if (node->next == &regions.n)
+ return NULL;
+
+ return list_entry(node->next, struct mem_region, list);
+}