diff options
author | 2023-10-10 11:40:56 +0000 | |
---|---|---|
committer | 2023-10-10 11:40:56 +0000 | |
commit | e02cda008591317b1625707ff8e115a4841aa889 (patch) | |
tree | aee302e3cf8b59ec2d32ec481be3d1afddfc8968 /hw/mem | |
parent | cc668e6b7e0ffd8c9d130513d12053cf5eda1d3b (diff) |
Introduce Virtio-loopback epsilon release:
Epsilon release introduces a new compatibility layer which make virtio-loopback
design to work with QEMU and rust-vmm vhost-user backend without require any
changes.
Signed-off-by: Timos Ampelikiotis <t.ampelikiotis@virtualopensystems.com>
Change-Id: I52e57563e08a7d0bdc002f8e928ee61ba0c53dd9
Diffstat (limited to 'hw/mem')
-rw-r--r-- | hw/mem/Kconfig | 13 | ||||
-rw-r--r-- | hw/mem/memory-device.c | 346 | ||||
-rw-r--r-- | hw/mem/meson.build | 9 | ||||
-rw-r--r-- | hw/mem/npcm7xx_mc.c | 84 | ||||
-rw-r--r-- | hw/mem/nvdimm.c | 266 | ||||
-rw-r--r-- | hw/mem/pc-dimm.c | 307 | ||||
-rw-r--r-- | hw/mem/sparse-mem.c | 150 | ||||
-rw-r--r-- | hw/mem/trace-events | 8 | ||||
-rw-r--r-- | hw/mem/trace.h | 1 |
9 files changed, 1184 insertions, 0 deletions
diff --git a/hw/mem/Kconfig b/hw/mem/Kconfig new file mode 100644 index 000000000..03dbb3c7d --- /dev/null +++ b/hw/mem/Kconfig @@ -0,0 +1,13 @@ +config DIMM + bool + select MEM_DEVICE + +config MEM_DEVICE + bool + +config NVDIMM + bool + select MEM_DEVICE + +config SPARSE_MEM + bool diff --git a/hw/mem/memory-device.c b/hw/mem/memory-device.c new file mode 100644 index 000000000..d9f830171 --- /dev/null +++ b/hw/mem/memory-device.c @@ -0,0 +1,346 @@ +/* + * Memory Device Interface + * + * Copyright ProfitBricks GmbH 2012 + * Copyright (C) 2014 Red Hat Inc + * Copyright (c) 2018 Red Hat Inc + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "hw/mem/memory-device.h" +#include "qapi/error.h" +#include "hw/boards.h" +#include "qemu/range.h" +#include "hw/virtio/vhost.h" +#include "sysemu/kvm.h" +#include "trace.h" + +static gint memory_device_addr_sort(gconstpointer a, gconstpointer b) +{ + const MemoryDeviceState *md_a = MEMORY_DEVICE(a); + const MemoryDeviceState *md_b = MEMORY_DEVICE(b); + const MemoryDeviceClass *mdc_a = MEMORY_DEVICE_GET_CLASS(a); + const MemoryDeviceClass *mdc_b = MEMORY_DEVICE_GET_CLASS(b); + const uint64_t addr_a = mdc_a->get_addr(md_a); + const uint64_t addr_b = mdc_b->get_addr(md_b); + + if (addr_a > addr_b) { + return 1; + } else if (addr_a < addr_b) { + return -1; + } + return 0; +} + +static int memory_device_build_list(Object *obj, void *opaque) +{ + GSList **list = opaque; + + if (object_dynamic_cast(obj, TYPE_MEMORY_DEVICE)) { + DeviceState *dev = DEVICE(obj); + if (dev->realized) { /* only realized memory devices matter */ + *list = g_slist_insert_sorted(*list, dev, memory_device_addr_sort); + } + } + + object_child_foreach(obj, memory_device_build_list, opaque); + return 0; +} + +static int memory_device_used_region_size(Object *obj, void *opaque) +{ + uint64_t *size = opaque; + + if (object_dynamic_cast(obj, TYPE_MEMORY_DEVICE)) { + const DeviceState *dev = DEVICE(obj); + const MemoryDeviceState *md = MEMORY_DEVICE(obj); + + if (dev->realized) { + *size += memory_device_get_region_size(md, &error_abort); + } + } + + object_child_foreach(obj, memory_device_used_region_size, opaque); + return 0; +} + +static void memory_device_check_addable(MachineState *ms, uint64_t size, + Error **errp) +{ + uint64_t used_region_size = 0; + + /* we will need a new memory slot for kvm and vhost */ + if (kvm_enabled() && !kvm_has_free_slot(ms)) { + error_setg(errp, "hypervisor has no free memory slots left"); + return; + } + if (!vhost_has_free_slot()) { + error_setg(errp, "a used vhost backend has no free memory slots left"); + return; + } + + /* will we exceed the total amount of memory specified */ + memory_device_used_region_size(OBJECT(ms), &used_region_size); + if (used_region_size + size < used_region_size || + used_region_size + size > ms->maxram_size - ms->ram_size) { + error_setg(errp, "not enough space, currently 0x%" PRIx64 + " in use of total space for memory devices 0x" RAM_ADDR_FMT, + used_region_size, ms->maxram_size - ms->ram_size); + return; + } + +} + +static uint64_t memory_device_get_free_addr(MachineState *ms, + const uint64_t *hint, + uint64_t align, uint64_t size, + Error **errp) +{ + Error *err = NULL; + GSList *list = NULL, *item; + Range as, new = range_empty; + + if (!ms->device_memory) { + error_setg(errp, "memory devices (e.g. for memory hotplug) are not " + "supported by the machine"); + return 0; + } + + if (!memory_region_size(&ms->device_memory->mr)) { + error_setg(errp, "memory devices (e.g. for memory hotplug) are not " + "enabled, please specify the maxmem option"); + return 0; + } + range_init_nofail(&as, ms->device_memory->base, + memory_region_size(&ms->device_memory->mr)); + + /* start of address space indicates the maximum alignment we expect */ + if (!QEMU_IS_ALIGNED(range_lob(&as), align)) { + warn_report("the alignment (0x%" PRIx64 ") exceeds the expected" + " maximum alignment, memory will get fragmented and not" + " all 'maxmem' might be usable for memory devices.", + align); + } + + memory_device_check_addable(ms, size, &err); + if (err) { + error_propagate(errp, err); + return 0; + } + + if (hint && !QEMU_IS_ALIGNED(*hint, align)) { + error_setg(errp, "address must be aligned to 0x%" PRIx64 " bytes", + align); + return 0; + } + + if (!QEMU_IS_ALIGNED(size, align)) { + error_setg(errp, "backend memory size must be multiple of 0x%" + PRIx64, align); + return 0; + } + + if (hint) { + if (range_init(&new, *hint, size) || !range_contains_range(&as, &new)) { + error_setg(errp, "can't add memory device [0x%" PRIx64 ":0x%" PRIx64 + "], usable range for memory devices [0x%" PRIx64 ":0x%" + PRIx64 "]", *hint, size, range_lob(&as), + range_size(&as)); + return 0; + } + } else { + if (range_init(&new, QEMU_ALIGN_UP(range_lob(&as), align), size)) { + error_setg(errp, "can't add memory device, device too big"); + return 0; + } + } + + /* find address range that will fit new memory device */ + object_child_foreach(OBJECT(ms), memory_device_build_list, &list); + for (item = list; item; item = g_slist_next(item)) { + const MemoryDeviceState *md = item->data; + const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(OBJECT(md)); + uint64_t next_addr; + Range tmp; + + range_init_nofail(&tmp, mdc->get_addr(md), + memory_device_get_region_size(md, &error_abort)); + + if (range_overlaps_range(&tmp, &new)) { + if (hint) { + const DeviceState *d = DEVICE(md); + error_setg(errp, "address range conflicts with memory device" + " id='%s'", d->id ? d->id : "(unnamed)"); + goto out; + } + + next_addr = QEMU_ALIGN_UP(range_upb(&tmp) + 1, align); + if (!next_addr || range_init(&new, next_addr, range_size(&new))) { + range_make_empty(&new); + break; + } + } else if (range_lob(&tmp) > range_upb(&new)) { + break; + } + } + + if (!range_contains_range(&as, &new)) { + error_setg(errp, "could not find position in guest address space for " + "memory device - memory fragmented due to alignments"); + } +out: + g_slist_free(list); + return range_lob(&new); +} + +MemoryDeviceInfoList *qmp_memory_device_list(void) +{ + GSList *devices = NULL, *item; + MemoryDeviceInfoList *list = NULL, **tail = &list; + + object_child_foreach(qdev_get_machine(), memory_device_build_list, + &devices); + + for (item = devices; item; item = g_slist_next(item)) { + const MemoryDeviceState *md = MEMORY_DEVICE(item->data); + const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(item->data); + MemoryDeviceInfo *info = g_new0(MemoryDeviceInfo, 1); + + mdc->fill_device_info(md, info); + + QAPI_LIST_APPEND(tail, info); + } + + g_slist_free(devices); + + return list; +} + +static int memory_device_plugged_size(Object *obj, void *opaque) +{ + uint64_t *size = opaque; + + if (object_dynamic_cast(obj, TYPE_MEMORY_DEVICE)) { + const DeviceState *dev = DEVICE(obj); + const MemoryDeviceState *md = MEMORY_DEVICE(obj); + const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(obj); + + if (dev->realized) { + *size += mdc->get_plugged_size(md, &error_abort); + } + } + + object_child_foreach(obj, memory_device_plugged_size, opaque); + return 0; +} + +uint64_t get_plugged_memory_size(void) +{ + uint64_t size = 0; + + memory_device_plugged_size(qdev_get_machine(), &size); + + return size; +} + +void memory_device_pre_plug(MemoryDeviceState *md, MachineState *ms, + const uint64_t *legacy_align, Error **errp) +{ + const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md); + Error *local_err = NULL; + uint64_t addr, align = 0; + MemoryRegion *mr; + + mr = mdc->get_memory_region(md, &local_err); + if (local_err) { + goto out; + } + + if (legacy_align) { + align = *legacy_align; + } else { + if (mdc->get_min_alignment) { + align = mdc->get_min_alignment(md); + } + align = MAX(align, memory_region_get_alignment(mr)); + } + addr = mdc->get_addr(md); + addr = memory_device_get_free_addr(ms, !addr ? NULL : &addr, align, + memory_region_size(mr), &local_err); + if (local_err) { + goto out; + } + mdc->set_addr(md, addr, &local_err); + if (!local_err) { + trace_memory_device_pre_plug(DEVICE(md)->id ? DEVICE(md)->id : "", + addr); + } +out: + error_propagate(errp, local_err); +} + +void memory_device_plug(MemoryDeviceState *md, MachineState *ms) +{ + const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md); + const uint64_t addr = mdc->get_addr(md); + MemoryRegion *mr; + + /* + * We expect that a previous call to memory_device_pre_plug() succeeded, so + * it can't fail at this point. + */ + mr = mdc->get_memory_region(md, &error_abort); + g_assert(ms->device_memory); + + memory_region_add_subregion(&ms->device_memory->mr, + addr - ms->device_memory->base, mr); + trace_memory_device_plug(DEVICE(md)->id ? DEVICE(md)->id : "", addr); +} + +void memory_device_unplug(MemoryDeviceState *md, MachineState *ms) +{ + const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md); + MemoryRegion *mr; + + /* + * We expect that a previous call to memory_device_pre_plug() succeeded, so + * it can't fail at this point. + */ + mr = mdc->get_memory_region(md, &error_abort); + g_assert(ms->device_memory); + + memory_region_del_subregion(&ms->device_memory->mr, mr); + trace_memory_device_unplug(DEVICE(md)->id ? DEVICE(md)->id : "", + mdc->get_addr(md)); +} + +uint64_t memory_device_get_region_size(const MemoryDeviceState *md, + Error **errp) +{ + const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md); + MemoryRegion *mr; + + /* dropping const here is fine as we don't touch the memory region */ + mr = mdc->get_memory_region((MemoryDeviceState *)md, errp); + if (!mr) { + return 0; + } + + return memory_region_size(mr); +} + +static const TypeInfo memory_device_info = { + .name = TYPE_MEMORY_DEVICE, + .parent = TYPE_INTERFACE, + .class_size = sizeof(MemoryDeviceClass), +}; + +static void memory_device_register_types(void) +{ + type_register_static(&memory_device_info); +} + +type_init(memory_device_register_types) diff --git a/hw/mem/meson.build b/hw/mem/meson.build new file mode 100644 index 000000000..82f86d117 --- /dev/null +++ b/hw/mem/meson.build @@ -0,0 +1,9 @@ +mem_ss = ss.source_set() +mem_ss.add(files('memory-device.c')) +mem_ss.add(when: 'CONFIG_DIMM', if_true: files('pc-dimm.c')) +mem_ss.add(when: 'CONFIG_NPCM7XX', if_true: files('npcm7xx_mc.c')) +mem_ss.add(when: 'CONFIG_NVDIMM', if_true: files('nvdimm.c')) + +softmmu_ss.add_all(when: 'CONFIG_MEM_DEVICE', if_true: mem_ss) + +softmmu_ss.add(when: 'CONFIG_SPARSE_MEM', if_true: files('sparse-mem.c')) diff --git a/hw/mem/npcm7xx_mc.c b/hw/mem/npcm7xx_mc.c new file mode 100644 index 000000000..abc5af562 --- /dev/null +++ b/hw/mem/npcm7xx_mc.c @@ -0,0 +1,84 @@ +/* + * Nuvoton NPCM7xx Memory Controller stub + * + * Copyright 2020 Google LLC + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + */ + +#include "qemu/osdep.h" + +#include "hw/mem/npcm7xx_mc.h" +#include "qapi/error.h" +#include "qemu/log.h" +#include "qemu/module.h" +#include "qemu/units.h" + +#define NPCM7XX_MC_REGS_SIZE (4 * KiB) + +static uint64_t npcm7xx_mc_read(void *opaque, hwaddr addr, unsigned int size) +{ + /* + * If bits 8..11 @ offset 0 are not zero, the boot block thinks the memory + * controller has already been initialized and will skip DDR training. + */ + if (addr == 0) { + return 0x100; + } + + qemu_log_mask(LOG_UNIMP, "%s: mostly unimplemented\n", __func__); + + return 0; +} + +static void npcm7xx_mc_write(void *opaque, hwaddr addr, uint64_t v, + unsigned int size) +{ + qemu_log_mask(LOG_UNIMP, "%s: mostly unimplemented\n", __func__); +} + +static const MemoryRegionOps npcm7xx_mc_ops = { + .read = npcm7xx_mc_read, + .write = npcm7xx_mc_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 4, + .max_access_size = 4, + .unaligned = false, + }, +}; + +static void npcm7xx_mc_realize(DeviceState *dev, Error **errp) +{ + NPCM7xxMCState *s = NPCM7XX_MC(dev); + + memory_region_init_io(&s->mmio, OBJECT(s), &npcm7xx_mc_ops, s, "regs", + NPCM7XX_MC_REGS_SIZE); + sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->mmio); +} + +static void npcm7xx_mc_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->desc = "NPCM7xx Memory Controller stub"; + dc->realize = npcm7xx_mc_realize; +} + +static const TypeInfo npcm7xx_mc_types[] = { + { + .name = TYPE_NPCM7XX_MC, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(NPCM7xxMCState), + .class_init = npcm7xx_mc_class_init, + }, +}; +DEFINE_TYPES(npcm7xx_mc_types); diff --git a/hw/mem/nvdimm.c b/hw/mem/nvdimm.c new file mode 100644 index 000000000..7397b6715 --- /dev/null +++ b/hw/mem/nvdimm.c @@ -0,0 +1,266 @@ +/* + * Non-Volatile Dual In-line Memory Module Virtualization Implementation + * + * Copyright(C) 2015 Intel Corporation. + * + * Author: + * Xiao Guangrong <guangrong.xiao@linux.intel.com> + * + * Currently, it only supports PMEM Virtualization. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/> + */ + +#include "qemu/osdep.h" +#include "qemu/module.h" +#include "qemu/pmem.h" +#include "qapi/error.h" +#include "qapi/visitor.h" +#include "hw/mem/nvdimm.h" +#include "hw/qdev-properties.h" +#include "hw/mem/memory-device.h" +#include "sysemu/hostmem.h" + +static void nvdimm_get_label_size(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + NVDIMMDevice *nvdimm = NVDIMM(obj); + uint64_t value = nvdimm->label_size; + + visit_type_size(v, name, &value, errp); +} + +static void nvdimm_set_label_size(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + NVDIMMDevice *nvdimm = NVDIMM(obj); + uint64_t value; + + if (nvdimm->nvdimm_mr) { + error_setg(errp, "cannot change property value"); + return; + } + + if (!visit_type_size(v, name, &value, errp)) { + return; + } + if (value < MIN_NAMESPACE_LABEL_SIZE) { + error_setg(errp, "Property '%s.%s' (0x%" PRIx64 ") is required" + " at least 0x%lx", object_get_typename(obj), name, value, + MIN_NAMESPACE_LABEL_SIZE); + return; + } + + nvdimm->label_size = value; +} + +static void nvdimm_get_uuid(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + NVDIMMDevice *nvdimm = NVDIMM(obj); + char *value = NULL; + + value = qemu_uuid_unparse_strdup(&nvdimm->uuid); + + visit_type_str(v, name, &value, errp); + g_free(value); +} + + +static void nvdimm_set_uuid(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + NVDIMMDevice *nvdimm = NVDIMM(obj); + char *value; + + if (!visit_type_str(v, name, &value, errp)) { + return; + } + + if (qemu_uuid_parse(value, &nvdimm->uuid) != 0) { + error_setg(errp, "Property '%s.%s' has invalid value", + object_get_typename(obj), name); + } + + g_free(value); +} + + +static void nvdimm_init(Object *obj) +{ + object_property_add(obj, NVDIMM_LABEL_SIZE_PROP, "int", + nvdimm_get_label_size, nvdimm_set_label_size, NULL, + NULL); + + object_property_add(obj, NVDIMM_UUID_PROP, "QemuUUID", nvdimm_get_uuid, + nvdimm_set_uuid, NULL, NULL); +} + +static void nvdimm_finalize(Object *obj) +{ + NVDIMMDevice *nvdimm = NVDIMM(obj); + + g_free(nvdimm->nvdimm_mr); +} + +static void nvdimm_prepare_memory_region(NVDIMMDevice *nvdimm, Error **errp) +{ + PCDIMMDevice *dimm = PC_DIMM(nvdimm); + uint64_t align, pmem_size, size; + MemoryRegion *mr; + + g_assert(!nvdimm->nvdimm_mr); + + if (!dimm->hostmem) { + error_setg(errp, "'" PC_DIMM_MEMDEV_PROP "' property must be set"); + return; + } + + mr = host_memory_backend_get_memory(dimm->hostmem); + align = memory_region_get_alignment(mr); + size = memory_region_size(mr); + + pmem_size = size - nvdimm->label_size; + nvdimm->label_data = memory_region_get_ram_ptr(mr) + pmem_size; + pmem_size = QEMU_ALIGN_DOWN(pmem_size, align); + + if (size <= nvdimm->label_size || !pmem_size) { + HostMemoryBackend *hostmem = dimm->hostmem; + + error_setg(errp, "the size of memdev %s (0x%" PRIx64 ") is too " + "small to contain nvdimm label (0x%" PRIx64 ") and " + "aligned PMEM (0x%" PRIx64 ")", + object_get_canonical_path_component(OBJECT(hostmem)), + memory_region_size(mr), nvdimm->label_size, align); + return; + } + + if (!nvdimm->unarmed && memory_region_is_rom(mr)) { + HostMemoryBackend *hostmem = dimm->hostmem; + + error_setg(errp, "'unarmed' property must be off since memdev %s " + "is read-only", + object_get_canonical_path_component(OBJECT(hostmem))); + return; + } + + nvdimm->nvdimm_mr = g_new(MemoryRegion, 1); + memory_region_init_alias(nvdimm->nvdimm_mr, OBJECT(dimm), + "nvdimm-memory", mr, 0, pmem_size); + memory_region_set_nonvolatile(nvdimm->nvdimm_mr, true); + nvdimm->nvdimm_mr->align = align; +} + +static MemoryRegion *nvdimm_md_get_memory_region(MemoryDeviceState *md, + Error **errp) +{ + NVDIMMDevice *nvdimm = NVDIMM(md); + Error *local_err = NULL; + + if (!nvdimm->nvdimm_mr) { + nvdimm_prepare_memory_region(nvdimm, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return NULL; + } + } + return nvdimm->nvdimm_mr; +} + +static void nvdimm_realize(PCDIMMDevice *dimm, Error **errp) +{ + NVDIMMDevice *nvdimm = NVDIMM(dimm); + + if (!nvdimm->nvdimm_mr) { + nvdimm_prepare_memory_region(nvdimm, errp); + } +} + +/* + * the caller should check the input parameters before calling + * label read/write functions. + */ +static void nvdimm_validate_rw_label_data(NVDIMMDevice *nvdimm, uint64_t size, + uint64_t offset) +{ + assert((nvdimm->label_size >= size + offset) && (offset + size > offset)); +} + +static void nvdimm_read_label_data(NVDIMMDevice *nvdimm, void *buf, + uint64_t size, uint64_t offset) +{ + nvdimm_validate_rw_label_data(nvdimm, size, offset); + + memcpy(buf, nvdimm->label_data + offset, size); +} + +static void nvdimm_write_label_data(NVDIMMDevice *nvdimm, const void *buf, + uint64_t size, uint64_t offset) +{ + MemoryRegion *mr; + PCDIMMDevice *dimm = PC_DIMM(nvdimm); + bool is_pmem = object_property_get_bool(OBJECT(dimm->hostmem), + "pmem", NULL); + uint64_t backend_offset; + + nvdimm_validate_rw_label_data(nvdimm, size, offset); + + if (!is_pmem) { + memcpy(nvdimm->label_data + offset, buf, size); + } else { + pmem_memcpy_persist(nvdimm->label_data + offset, buf, size); + } + + mr = host_memory_backend_get_memory(dimm->hostmem); + backend_offset = memory_region_size(mr) - nvdimm->label_size + offset; + memory_region_set_dirty(mr, backend_offset, size); +} + +static Property nvdimm_properties[] = { + DEFINE_PROP_BOOL(NVDIMM_UNARMED_PROP, NVDIMMDevice, unarmed, false), + DEFINE_PROP_END_OF_LIST(), +}; + +static void nvdimm_class_init(ObjectClass *oc, void *data) +{ + PCDIMMDeviceClass *ddc = PC_DIMM_CLASS(oc); + MemoryDeviceClass *mdc = MEMORY_DEVICE_CLASS(oc); + NVDIMMClass *nvc = NVDIMM_CLASS(oc); + DeviceClass *dc = DEVICE_CLASS(oc); + + ddc->realize = nvdimm_realize; + mdc->get_memory_region = nvdimm_md_get_memory_region; + device_class_set_props(dc, nvdimm_properties); + + nvc->read_label_data = nvdimm_read_label_data; + nvc->write_label_data = nvdimm_write_label_data; + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); +} + +static TypeInfo nvdimm_info = { + .name = TYPE_NVDIMM, + .parent = TYPE_PC_DIMM, + .class_size = sizeof(NVDIMMClass), + .class_init = nvdimm_class_init, + .instance_size = sizeof(NVDIMMDevice), + .instance_init = nvdimm_init, + .instance_finalize = nvdimm_finalize, +}; + +static void nvdimm_register_types(void) +{ + type_register_static(&nvdimm_info); +} + +type_init(nvdimm_register_types) diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c new file mode 100644 index 000000000..48b913aba --- /dev/null +++ b/hw/mem/pc-dimm.c @@ -0,0 +1,307 @@ +/* + * Dimm device for Memory Hotplug + * + * Copyright ProfitBricks GmbH 2012 + * Copyright (C) 2014 Red Hat Inc + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/> + */ + +#include "qemu/osdep.h" +#include "hw/boards.h" +#include "hw/mem/pc-dimm.h" +#include "hw/qdev-properties.h" +#include "migration/vmstate.h" +#include "hw/mem/nvdimm.h" +#include "hw/mem/memory-device.h" +#include "qapi/error.h" +#include "qapi/visitor.h" +#include "qemu/module.h" +#include "sysemu/hostmem.h" +#include "sysemu/numa.h" +#include "trace.h" + +static int pc_dimm_get_free_slot(const int *hint, int max_slots, Error **errp); + +static MemoryRegion *pc_dimm_get_memory_region(PCDIMMDevice *dimm, Error **errp) +{ + if (!dimm->hostmem) { + error_setg(errp, "'" PC_DIMM_MEMDEV_PROP "' property must be set"); + return NULL; + } + + return host_memory_backend_get_memory(dimm->hostmem); +} + +void pc_dimm_pre_plug(PCDIMMDevice *dimm, MachineState *machine, + const uint64_t *legacy_align, Error **errp) +{ + Error *local_err = NULL; + int slot; + + slot = object_property_get_int(OBJECT(dimm), PC_DIMM_SLOT_PROP, + &error_abort); + if ((slot < 0 || slot >= machine->ram_slots) && + slot != PC_DIMM_UNASSIGNED_SLOT) { + error_setg(errp, + "invalid slot number %d, valid range is [0-%" PRIu64 "]", + slot, machine->ram_slots - 1); + return; + } + + slot = pc_dimm_get_free_slot(slot == PC_DIMM_UNASSIGNED_SLOT ? NULL : &slot, + machine->ram_slots, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + object_property_set_int(OBJECT(dimm), PC_DIMM_SLOT_PROP, slot, + &error_abort); + trace_mhp_pc_dimm_assigned_slot(slot); + + memory_device_pre_plug(MEMORY_DEVICE(dimm), machine, legacy_align, + errp); +} + +void pc_dimm_plug(PCDIMMDevice *dimm, MachineState *machine) +{ + MemoryRegion *vmstate_mr = pc_dimm_get_memory_region(dimm, + &error_abort); + + memory_device_plug(MEMORY_DEVICE(dimm), machine); + vmstate_register_ram(vmstate_mr, DEVICE(dimm)); +} + +void pc_dimm_unplug(PCDIMMDevice *dimm, MachineState *machine) +{ + MemoryRegion *vmstate_mr = pc_dimm_get_memory_region(dimm, + &error_abort); + + memory_device_unplug(MEMORY_DEVICE(dimm), machine); + vmstate_unregister_ram(vmstate_mr, DEVICE(dimm)); +} + +static int pc_dimm_slot2bitmap(Object *obj, void *opaque) +{ + unsigned long *bitmap = opaque; + + if (object_dynamic_cast(obj, TYPE_PC_DIMM)) { + DeviceState *dev = DEVICE(obj); + if (dev->realized) { /* count only realized DIMMs */ + PCDIMMDevice *d = PC_DIMM(obj); + set_bit(d->slot, bitmap); + } + } + + object_child_foreach(obj, pc_dimm_slot2bitmap, opaque); + return 0; +} + +static int pc_dimm_get_free_slot(const int *hint, int max_slots, Error **errp) +{ + unsigned long *bitmap; + int slot = 0; + + if (max_slots <= 0) { + error_setg(errp, "no slots where allocated, please specify " + "the 'slots' option"); + return slot; + } + + bitmap = bitmap_new(max_slots); + object_child_foreach(qdev_get_machine(), pc_dimm_slot2bitmap, bitmap); + + /* check if requested slot is not occupied */ + if (hint) { + if (*hint >= max_slots) { + error_setg(errp, "invalid slot# %d, should be less than %d", + *hint, max_slots); + } else if (!test_bit(*hint, bitmap)) { + slot = *hint; + } else { + error_setg(errp, "slot %d is busy", *hint); + } + goto out; + } + + /* search for free slot */ + slot = find_first_zero_bit(bitmap, max_slots); + if (slot == max_slots) { + error_setg(errp, "no free slots available"); + } +out: + g_free(bitmap); + return slot; +} + +static Property pc_dimm_properties[] = { + DEFINE_PROP_UINT64(PC_DIMM_ADDR_PROP, PCDIMMDevice, addr, 0), + DEFINE_PROP_UINT32(PC_DIMM_NODE_PROP, PCDIMMDevice, node, 0), + DEFINE_PROP_INT32(PC_DIMM_SLOT_PROP, PCDIMMDevice, slot, + PC_DIMM_UNASSIGNED_SLOT), + DEFINE_PROP_LINK(PC_DIMM_MEMDEV_PROP, PCDIMMDevice, hostmem, + TYPE_MEMORY_BACKEND, HostMemoryBackend *), + DEFINE_PROP_END_OF_LIST(), +}; + +static void pc_dimm_get_size(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + Error *local_err = NULL; + uint64_t value; + + value = memory_device_get_region_size(MEMORY_DEVICE(obj), &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + visit_type_uint64(v, name, &value, errp); +} + +static void pc_dimm_init(Object *obj) +{ + object_property_add(obj, PC_DIMM_SIZE_PROP, "uint64", pc_dimm_get_size, + NULL, NULL, NULL); +} + +static void pc_dimm_realize(DeviceState *dev, Error **errp) +{ + PCDIMMDevice *dimm = PC_DIMM(dev); + PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm); + MachineState *ms = MACHINE(qdev_get_machine()); + + if (ms->numa_state) { + int nb_numa_nodes = ms->numa_state->num_nodes; + + if (((nb_numa_nodes > 0) && (dimm->node >= nb_numa_nodes)) || + (!nb_numa_nodes && dimm->node)) { + error_setg(errp, "'DIMM property " PC_DIMM_NODE_PROP " has value %" + PRIu32 "' which exceeds the number of numa nodes: %d", + dimm->node, nb_numa_nodes ? nb_numa_nodes : 1); + return; + } + } else if (dimm->node > 0) { + error_setg(errp, "machine doesn't support NUMA"); + return; + } + + if (!dimm->hostmem) { + error_setg(errp, "'" PC_DIMM_MEMDEV_PROP "' property is not set"); + return; + } else if (host_memory_backend_is_mapped(dimm->hostmem)) { + error_setg(errp, "can't use already busy memdev: %s", + object_get_canonical_path_component(OBJECT(dimm->hostmem))); + return; + } + + if (ddc->realize) { + ddc->realize(dimm, errp); + } + + host_memory_backend_set_mapped(dimm->hostmem, true); +} + +static void pc_dimm_unrealize(DeviceState *dev) +{ + PCDIMMDevice *dimm = PC_DIMM(dev); + + host_memory_backend_set_mapped(dimm->hostmem, false); +} + +static uint64_t pc_dimm_md_get_addr(const MemoryDeviceState *md) +{ + return object_property_get_uint(OBJECT(md), PC_DIMM_ADDR_PROP, + &error_abort); +} + +static void pc_dimm_md_set_addr(MemoryDeviceState *md, uint64_t addr, + Error **errp) +{ + object_property_set_uint(OBJECT(md), PC_DIMM_ADDR_PROP, addr, errp); +} + +static MemoryRegion *pc_dimm_md_get_memory_region(MemoryDeviceState *md, + Error **errp) +{ + return pc_dimm_get_memory_region(PC_DIMM(md), errp); +} + +static void pc_dimm_md_fill_device_info(const MemoryDeviceState *md, + MemoryDeviceInfo *info) +{ + PCDIMMDeviceInfo *di = g_new0(PCDIMMDeviceInfo, 1); + const DeviceClass *dc = DEVICE_GET_CLASS(md); + const PCDIMMDevice *dimm = PC_DIMM(md); + const DeviceState *dev = DEVICE(md); + + if (dev->id) { + di->has_id = true; + di->id = g_strdup(dev->id); + } + di->hotplugged = dev->hotplugged; + di->hotpluggable = dc->hotpluggable; + di->addr = dimm->addr; + di->slot = dimm->slot; + di->node = dimm->node; + di->size = object_property_get_uint(OBJECT(dimm), PC_DIMM_SIZE_PROP, + NULL); + di->memdev = object_get_canonical_path(OBJECT(dimm->hostmem)); + + if (object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM)) { + info->u.nvdimm.data = di; + info->type = MEMORY_DEVICE_INFO_KIND_NVDIMM; + } else { + info->u.dimm.data = di; + info->type = MEMORY_DEVICE_INFO_KIND_DIMM; + } +} + +static void pc_dimm_class_init(ObjectClass *oc, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + MemoryDeviceClass *mdc = MEMORY_DEVICE_CLASS(oc); + + dc->realize = pc_dimm_realize; + dc->unrealize = pc_dimm_unrealize; + device_class_set_props(dc, pc_dimm_properties); + dc->desc = "DIMM memory module"; + + mdc->get_addr = pc_dimm_md_get_addr; + mdc->set_addr = pc_dimm_md_set_addr; + /* for a dimm plugged_size == region_size */ + mdc->get_plugged_size = memory_device_get_region_size; + mdc->get_memory_region = pc_dimm_md_get_memory_region; + mdc->fill_device_info = pc_dimm_md_fill_device_info; +} + +static TypeInfo pc_dimm_info = { + .name = TYPE_PC_DIMM, + .parent = TYPE_DEVICE, + .instance_size = sizeof(PCDIMMDevice), + .instance_init = pc_dimm_init, + .class_init = pc_dimm_class_init, + .class_size = sizeof(PCDIMMDeviceClass), + .interfaces = (InterfaceInfo[]) { + { TYPE_MEMORY_DEVICE }, + { } + }, +}; + +static void pc_dimm_register_types(void) +{ + type_register_static(&pc_dimm_info); +} + +type_init(pc_dimm_register_types) diff --git a/hw/mem/sparse-mem.c b/hw/mem/sparse-mem.c new file mode 100644 index 000000000..e6640eb8e --- /dev/null +++ b/hw/mem/sparse-mem.c @@ -0,0 +1,150 @@ +/* + * A sparse memory device. Useful for fuzzing + * + * Copyright Red Hat Inc., 2021 + * + * Authors: + * Alexander Bulekov <alxndr@bu.edu> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" + +#include "hw/qdev-properties.h" +#include "hw/sysbus.h" +#include "qapi/error.h" +#include "qemu/units.h" +#include "sysemu/qtest.h" +#include "hw/mem/sparse-mem.h" + +#define SPARSE_MEM(obj) OBJECT_CHECK(SparseMemState, (obj), TYPE_SPARSE_MEM) +#define SPARSE_BLOCK_SIZE 0x1000 + +typedef struct SparseMemState { + SysBusDevice parent_obj; + MemoryRegion mmio; + uint64_t baseaddr; + uint64_t length; + uint64_t size_used; + uint64_t maxsize; + GHashTable *mapped; +} SparseMemState; + +typedef struct sparse_mem_block { + uint8_t data[SPARSE_BLOCK_SIZE]; +} sparse_mem_block; + +static uint64_t sparse_mem_read(void *opaque, hwaddr addr, unsigned int size) +{ + SparseMemState *s = opaque; + uint64_t ret = 0; + size_t pfn = addr / SPARSE_BLOCK_SIZE; + size_t offset = addr % SPARSE_BLOCK_SIZE; + sparse_mem_block *block; + + block = g_hash_table_lookup(s->mapped, (void *)pfn); + if (block) { + assert(offset + size <= sizeof(block->data)); + memcpy(&ret, block->data + offset, size); + } + return ret; +} + +static void sparse_mem_write(void *opaque, hwaddr addr, uint64_t v, + unsigned int size) +{ + SparseMemState *s = opaque; + size_t pfn = addr / SPARSE_BLOCK_SIZE; + size_t offset = addr % SPARSE_BLOCK_SIZE; + sparse_mem_block *block; + + if (!g_hash_table_lookup(s->mapped, (void *)pfn) && + s->size_used + SPARSE_BLOCK_SIZE < s->maxsize && v) { + g_hash_table_insert(s->mapped, (void *)pfn, + g_new0(sparse_mem_block, 1)); + s->size_used += sizeof(block->data); + } + block = g_hash_table_lookup(s->mapped, (void *)pfn); + if (!block) { + return; + } + + assert(offset + size <= sizeof(block->data)); + + memcpy(block->data + offset, &v, size); + +} + +static const MemoryRegionOps sparse_mem_ops = { + .read = sparse_mem_read, + .write = sparse_mem_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .valid = { + .min_access_size = 1, + .max_access_size = 8, + .unaligned = false, + }, +}; + +static Property sparse_mem_properties[] = { + /* The base address of the memory */ + DEFINE_PROP_UINT64("baseaddr", SparseMemState, baseaddr, 0x0), + /* The length of the sparse memory region */ + DEFINE_PROP_UINT64("length", SparseMemState, length, UINT64_MAX), + /* Max amount of actual memory that can be used to back the sparse memory */ + DEFINE_PROP_UINT64("maxsize", SparseMemState, maxsize, 10 * MiB), + DEFINE_PROP_END_OF_LIST(), +}; + +MemoryRegion *sparse_mem_init(uint64_t addr, uint64_t length) +{ + DeviceState *dev; + + dev = qdev_new(TYPE_SPARSE_MEM); + qdev_prop_set_uint64(dev, "baseaddr", addr); + qdev_prop_set_uint64(dev, "length", length); + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + sysbus_mmio_map_overlap(SYS_BUS_DEVICE(dev), 0, addr, -10000); + return &SPARSE_MEM(dev)->mmio; +} + +static void sparse_mem_realize(DeviceState *dev, Error **errp) +{ + SparseMemState *s = SPARSE_MEM(dev); + SysBusDevice *sbd = SYS_BUS_DEVICE(dev); + + if (!qtest_enabled()) { + error_setg(errp, "sparse_mem device should only be used " + "for testing with QTest"); + return; + } + + assert(s->baseaddr + s->length > s->baseaddr); + + s->mapped = g_hash_table_new(NULL, NULL); + memory_region_init_io(&s->mmio, OBJECT(s), &sparse_mem_ops, s, + "sparse-mem", s->length); + sysbus_init_mmio(sbd, &s->mmio); +} + +static void sparse_mem_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + device_class_set_props(dc, sparse_mem_properties); + + dc->desc = "Sparse Memory Device"; + dc->realize = sparse_mem_realize; +} + +static const TypeInfo sparse_mem_types[] = { + { + .name = TYPE_SPARSE_MEM, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(SparseMemState), + .class_init = sparse_mem_class_init, + }, +}; +DEFINE_TYPES(sparse_mem_types); diff --git a/hw/mem/trace-events b/hw/mem/trace-events new file mode 100644 index 000000000..8b6b02b5b --- /dev/null +++ b/hw/mem/trace-events @@ -0,0 +1,8 @@ +# See docs/devel/tracing.rst for syntax documentation. + +# pc-dimm.c +mhp_pc_dimm_assigned_slot(int slot) "%d" +# memory-device.c +memory_device_pre_plug(const char *id, uint64_t addr) "id=%s addr=0x%"PRIx64 +memory_device_plug(const char *id, uint64_t addr) "id=%s addr=0x%"PRIx64 +memory_device_unplug(const char *id, uint64_t addr) "id=%s addr=0x%"PRIx64 diff --git a/hw/mem/trace.h b/hw/mem/trace.h new file mode 100644 index 000000000..2f2c94540 --- /dev/null +++ b/hw/mem/trace.h @@ -0,0 +1 @@ +#include "trace/trace-hw_mem.h" |