diff options
author | Timos Ampelikiotis <t.ampelikiotis@virtualopensystems.com> | 2023-10-10 11:40:56 +0000 |
---|---|---|
committer | Timos Ampelikiotis <t.ampelikiotis@virtualopensystems.com> | 2023-10-10 11:40:56 +0000 |
commit | e02cda008591317b1625707ff8e115a4841aa889 (patch) | |
tree | aee302e3cf8b59ec2d32ec481be3d1afddfc8968 /hw/ppc | |
parent | cc668e6b7e0ffd8c9d130513d12053cf5eda1d3b (diff) |
Introduce Virtio-loopback epsilon release:
Epsilon release introduces a new compatibility layer which make virtio-loopback
design to work with QEMU and rust-vmm vhost-user backend without require any
changes.
Signed-off-by: Timos Ampelikiotis <t.ampelikiotis@virtualopensystems.com>
Change-Id: I52e57563e08a7d0bdc002f8e928ee61ba0c53dd9
Diffstat (limited to 'hw/ppc')
66 files changed, 40088 insertions, 0 deletions
diff --git a/hw/ppc/Kconfig b/hw/ppc/Kconfig new file mode 100644 index 000000000..400511c6b --- /dev/null +++ b/hw/ppc/Kconfig @@ -0,0 +1,153 @@ +config PSERIES + bool + imply PCI_DEVICES + imply TEST_DEVICES + imply VIRTIO_VGA + imply NVDIMM + select DIMM + select PCI + select SPAPR_VSCSI + select VFIO if LINUX # needed by spapr_pci_vfio.c + select XICS + select XIVE + select MSI_NONBROKEN + select FDT_PPC + select CHRP_NVRAM + select VOF + +config SPAPR_RNG + bool + default y + depends on PSERIES + +config POWERNV + bool + imply PCI_DEVICES + imply TEST_DEVICES + select ISA_IPMI_BT + select IPMI_LOCAL + select ISA_BUS + select MC146818RTC + select XICS + select XIVE + select FDT_PPC + select PCI_POWERNV + +config PPC405 + bool + select M48T59 + select PFLASH_CFI02 + select PPC4XX + select SERIAL + +config PPC440 + bool + imply PCI_DEVICES + imply TEST_DEVICES + imply E1000_PCI + select PCI_EXPRESS + select PPC4XX + select SERIAL + select FDT_PPC + +config PPC4XX + bool + select BITBANG_I2C + select PCI + select PPC_UIC + +config SAM460EX + bool + select PPC405 + select PFLASH_CFI01 + select IDE_SII3112 + select M41T80 + select PPC440 + select SERIAL + select SM501 + select SMBUS_EEPROM + select USB_EHCI_SYSBUS + select USB_OHCI + select FDT_PPC + +config PEGASOS2 + bool + select MV64361 + select VT82C686 + select IDE_VIA + select SMBUS_EEPROM + select VOF +# This should come with VT82C686 + select ACPI_X86 + imply ATI_VGA + +config PREP + bool + imply PCI_DEVICES + imply TEST_DEVICES + select CS4231A + select RAVEN_PCI + select I82378 + select LSI_SCSI_PCI + select M48T59 + select PC87312 + select RS6000_MC + select FW_CFG_PPC + +config RS6000_MC + bool + +config MAC_OLDWORLD + bool + imply PCI_DEVICES + imply SUNGEM + imply TEST_DEVICES + select ADB + select GRACKLE_PCI + select HEATHROW_PIC + select MACIO + select FW_CFG_PPC + +config MAC_NEWWORLD + bool + imply PCI_DEVICES + imply SUNGEM + imply TEST_DEVICES + select ADB + select MACIO + select MACIO_GPIO + select MAC_PMU + select UNIN_PCI + select FW_CFG_PPC + +config E500 + bool + imply AT24C + imply VIRTIO_PCI + select ETSEC + select OPENPIC + select PLATFORM_BUS + select PPCE500_PCI + select SERIAL + select MPC_I2C + select FDT_PPC + select DS1338 + +config VIRTEX + bool + select PPC4XX + select PFLASH_CFI01 + select SERIAL + select XILINX + select XILINX_ETHLITE + select FDT_PPC + +# Only used by 64-bit targets +config FW_CFG_PPC + bool + +config FDT_PPC + bool + +config VOF + bool diff --git a/hw/ppc/e500-ccsr.h b/hw/ppc/e500-ccsr.h new file mode 100644 index 000000000..249c17be3 --- /dev/null +++ b/hw/ppc/e500-ccsr.h @@ -0,0 +1,18 @@ +#ifndef E500_CCSR_H +#define E500_CCSR_H + +#include "hw/sysbus.h" +#include "qom/object.h" + +struct PPCE500CCSRState { + /*< private >*/ + SysBusDevice parent; + /*< public >*/ + + MemoryRegion ccsr_space; +}; + +#define TYPE_CCSR "e500-ccsr" +OBJECT_DECLARE_SIMPLE_TYPE(PPCE500CCSRState, CCSR) + +#endif /* E500_CCSR_H */ diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c new file mode 100644 index 000000000..960e7efcd --- /dev/null +++ b/hw/ppc/e500.c @@ -0,0 +1,1174 @@ +/* + * QEMU PowerPC e500-based platforms + * + * Copyright (C) 2009 Freescale Semiconductor, Inc. All rights reserved. + * + * Author: Yu Liu, <yu.liu@freescale.com> + * + * This file is derived from hw/ppc440_bamboo.c, + * the copyright for that material belongs to the original owners. + * + * This is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "qemu/datadir.h" +#include "qemu/units.h" +#include "qapi/error.h" +#include "e500.h" +#include "e500-ccsr.h" +#include "net/net.h" +#include "qemu/config-file.h" +#include "hw/char/serial.h" +#include "hw/pci/pci.h" +#include "sysemu/sysemu.h" +#include "sysemu/kvm.h" +#include "sysemu/reset.h" +#include "sysemu/runstate.h" +#include "kvm_ppc.h" +#include "sysemu/device_tree.h" +#include "hw/ppc/openpic.h" +#include "hw/ppc/openpic_kvm.h" +#include "hw/ppc/ppc.h" +#include "hw/qdev-properties.h" +#include "hw/loader.h" +#include "elf.h" +#include "hw/sysbus.h" +#include "qemu/host-utils.h" +#include "qemu/option.h" +#include "hw/pci-host/ppce500.h" +#include "qemu/error-report.h" +#include "hw/platform-bus.h" +#include "hw/net/fsl_etsec/etsec.h" +#include "hw/i2c/i2c.h" +#include "hw/irq.h" + +#define EPAPR_MAGIC (0x45504150) +#define BINARY_DEVICE_TREE_FILE "mpc8544ds.dtb" +#define DTC_LOAD_PAD 0x1800000 +#define DTC_PAD_MASK 0xFFFFF +#define DTB_MAX_SIZE (8 * MiB) +#define INITRD_LOAD_PAD 0x2000000 +#define INITRD_PAD_MASK 0xFFFFFF + +#define RAM_SIZES_ALIGN (64 * MiB) + +/* TODO: parameterize */ +#define MPC8544_CCSRBAR_SIZE 0x00100000ULL +#define MPC8544_MPIC_REGS_OFFSET 0x40000ULL +#define MPC8544_MSI_REGS_OFFSET 0x41600ULL +#define MPC8544_SERIAL0_REGS_OFFSET 0x4500ULL +#define MPC8544_SERIAL1_REGS_OFFSET 0x4600ULL +#define MPC8544_PCI_REGS_OFFSET 0x8000ULL +#define MPC8544_PCI_REGS_SIZE 0x1000ULL +#define MPC8544_UTIL_OFFSET 0xe0000ULL +#define MPC8XXX_GPIO_OFFSET 0x000FF000ULL +#define MPC8544_I2C_REGS_OFFSET 0x3000ULL +#define MPC8XXX_GPIO_IRQ 47 +#define MPC8544_I2C_IRQ 43 +#define RTC_REGS_OFFSET 0x68 + +#define PLATFORM_CLK_FREQ_HZ (400 * 1000 * 1000) + +struct boot_info +{ + uint32_t dt_base; + uint32_t dt_size; + uint32_t entry; +}; + +static uint32_t *pci_map_create(void *fdt, uint32_t mpic, int first_slot, + int nr_slots, int *len) +{ + int i = 0; + int slot; + int pci_irq; + int host_irq; + int last_slot = first_slot + nr_slots; + uint32_t *pci_map; + + *len = nr_slots * 4 * 7 * sizeof(uint32_t); + pci_map = g_malloc(*len); + + for (slot = first_slot; slot < last_slot; slot++) { + for (pci_irq = 0; pci_irq < 4; pci_irq++) { + pci_map[i++] = cpu_to_be32(slot << 11); + pci_map[i++] = cpu_to_be32(0x0); + pci_map[i++] = cpu_to_be32(0x0); + pci_map[i++] = cpu_to_be32(pci_irq + 1); + pci_map[i++] = cpu_to_be32(mpic); + host_irq = ppce500_pci_map_irq_slot(slot, pci_irq); + pci_map[i++] = cpu_to_be32(host_irq + 1); + pci_map[i++] = cpu_to_be32(0x1); + } + } + + assert((i * sizeof(uint32_t)) == *len); + + return pci_map; +} + +static void dt_serial_create(void *fdt, unsigned long long offset, + const char *soc, const char *mpic, + const char *alias, int idx, bool defcon) +{ + char *ser; + + ser = g_strdup_printf("%s/serial@%llx", soc, offset); + qemu_fdt_add_subnode(fdt, ser); + qemu_fdt_setprop_string(fdt, ser, "device_type", "serial"); + qemu_fdt_setprop_string(fdt, ser, "compatible", "ns16550"); + qemu_fdt_setprop_cells(fdt, ser, "reg", offset, 0x100); + qemu_fdt_setprop_cell(fdt, ser, "cell-index", idx); + qemu_fdt_setprop_cell(fdt, ser, "clock-frequency", PLATFORM_CLK_FREQ_HZ); + qemu_fdt_setprop_cells(fdt, ser, "interrupts", 42, 2); + qemu_fdt_setprop_phandle(fdt, ser, "interrupt-parent", mpic); + qemu_fdt_setprop_string(fdt, "/aliases", alias, ser); + + if (defcon) { + /* + * "linux,stdout-path" and "stdout" properties are deprecated by linux + * kernel. New platforms should only use the "stdout-path" property. Set + * the new property and continue using older property to remain + * compatible with the existing firmware. + */ + qemu_fdt_setprop_string(fdt, "/chosen", "linux,stdout-path", ser); + qemu_fdt_setprop_string(fdt, "/chosen", "stdout-path", ser); + } + g_free(ser); +} + +static void create_dt_mpc8xxx_gpio(void *fdt, const char *soc, const char *mpic) +{ + hwaddr mmio0 = MPC8XXX_GPIO_OFFSET; + int irq0 = MPC8XXX_GPIO_IRQ; + gchar *node = g_strdup_printf("%s/gpio@%"PRIx64, soc, mmio0); + gchar *poweroff = g_strdup_printf("%s/power-off", soc); + int gpio_ph; + + qemu_fdt_add_subnode(fdt, node); + qemu_fdt_setprop_string(fdt, node, "compatible", "fsl,qoriq-gpio"); + qemu_fdt_setprop_cells(fdt, node, "reg", mmio0, 0x1000); + qemu_fdt_setprop_cells(fdt, node, "interrupts", irq0, 0x2); + qemu_fdt_setprop_phandle(fdt, node, "interrupt-parent", mpic); + qemu_fdt_setprop_cells(fdt, node, "#gpio-cells", 2); + qemu_fdt_setprop(fdt, node, "gpio-controller", NULL, 0); + gpio_ph = qemu_fdt_alloc_phandle(fdt); + qemu_fdt_setprop_cell(fdt, node, "phandle", gpio_ph); + qemu_fdt_setprop_cell(fdt, node, "linux,phandle", gpio_ph); + + /* Power Off Pin */ + qemu_fdt_add_subnode(fdt, poweroff); + qemu_fdt_setprop_string(fdt, poweroff, "compatible", "gpio-poweroff"); + qemu_fdt_setprop_cells(fdt, poweroff, "gpios", gpio_ph, 0, 0); + + g_free(node); + g_free(poweroff); +} + +static void dt_rtc_create(void *fdt, const char *i2c, const char *alias) +{ + int offset = RTC_REGS_OFFSET; + + gchar *rtc = g_strdup_printf("%s/rtc@%"PRIx32, i2c, offset); + qemu_fdt_add_subnode(fdt, rtc); + qemu_fdt_setprop_string(fdt, rtc, "compatible", "pericom,pt7c4338"); + qemu_fdt_setprop_cells(fdt, rtc, "reg", offset); + qemu_fdt_setprop_string(fdt, "/aliases", alias, rtc); + + g_free(rtc); +} + +static void dt_i2c_create(void *fdt, const char *soc, const char *mpic, + const char *alias) +{ + hwaddr mmio0 = MPC8544_I2C_REGS_OFFSET; + int irq0 = MPC8544_I2C_IRQ; + + gchar *i2c = g_strdup_printf("%s/i2c@%"PRIx64, soc, mmio0); + qemu_fdt_add_subnode(fdt, i2c); + qemu_fdt_setprop_string(fdt, i2c, "device_type", "i2c"); + qemu_fdt_setprop_string(fdt, i2c, "compatible", "fsl-i2c"); + qemu_fdt_setprop_cells(fdt, i2c, "reg", mmio0, 0x14); + qemu_fdt_setprop_cells(fdt, i2c, "cell-index", 0); + qemu_fdt_setprop_cells(fdt, i2c, "interrupts", irq0, 0x2); + qemu_fdt_setprop_phandle(fdt, i2c, "interrupt-parent", mpic); + qemu_fdt_setprop_string(fdt, "/aliases", alias, i2c); + + g_free(i2c); +} + + +typedef struct PlatformDevtreeData { + void *fdt; + const char *mpic; + int irq_start; + const char *node; + PlatformBusDevice *pbus; +} PlatformDevtreeData; + +static int create_devtree_etsec(SysBusDevice *sbdev, PlatformDevtreeData *data) +{ + eTSEC *etsec = ETSEC_COMMON(sbdev); + PlatformBusDevice *pbus = data->pbus; + hwaddr mmio0 = platform_bus_get_mmio_addr(pbus, sbdev, 0); + int irq0 = platform_bus_get_irqn(pbus, sbdev, 0); + int irq1 = platform_bus_get_irqn(pbus, sbdev, 1); + int irq2 = platform_bus_get_irqn(pbus, sbdev, 2); + gchar *node = g_strdup_printf("/platform/ethernet@%"PRIx64, mmio0); + gchar *group = g_strdup_printf("%s/queue-group", node); + void *fdt = data->fdt; + + assert((int64_t)mmio0 >= 0); + assert(irq0 >= 0); + assert(irq1 >= 0); + assert(irq2 >= 0); + + qemu_fdt_add_subnode(fdt, node); + qemu_fdt_setprop(fdt, node, "ranges", NULL, 0); + qemu_fdt_setprop_string(fdt, node, "device_type", "network"); + qemu_fdt_setprop_string(fdt, node, "compatible", "fsl,etsec2"); + qemu_fdt_setprop_string(fdt, node, "model", "eTSEC"); + qemu_fdt_setprop(fdt, node, "local-mac-address", etsec->conf.macaddr.a, 6); + qemu_fdt_setprop_cells(fdt, node, "fixed-link", 0, 1, 1000, 0, 0); + qemu_fdt_setprop_cells(fdt, node, "#size-cells", 1); + qemu_fdt_setprop_cells(fdt, node, "#address-cells", 1); + + qemu_fdt_add_subnode(fdt, group); + qemu_fdt_setprop_cells(fdt, group, "reg", mmio0, 0x1000); + qemu_fdt_setprop_cells(fdt, group, "interrupts", + data->irq_start + irq0, 0x2, + data->irq_start + irq1, 0x2, + data->irq_start + irq2, 0x2); + + g_free(node); + g_free(group); + + return 0; +} + +static void sysbus_device_create_devtree(SysBusDevice *sbdev, void *opaque) +{ + PlatformDevtreeData *data = opaque; + bool matched = false; + + if (object_dynamic_cast(OBJECT(sbdev), TYPE_ETSEC_COMMON)) { + create_devtree_etsec(sbdev, data); + matched = true; + } + + if (!matched) { + error_report("Device %s is not supported by this machine yet.", + qdev_fw_name(DEVICE(sbdev))); + exit(1); + } +} + +static void platform_bus_create_devtree(PPCE500MachineState *pms, + void *fdt, const char *mpic) +{ + const PPCE500MachineClass *pmc = PPCE500_MACHINE_GET_CLASS(pms); + gchar *node = g_strdup_printf("/platform@%"PRIx64, pmc->platform_bus_base); + const char platcomp[] = "qemu,platform\0simple-bus"; + uint64_t addr = pmc->platform_bus_base; + uint64_t size = pmc->platform_bus_size; + int irq_start = pmc->platform_bus_first_irq; + + /* Create a /platform node that we can put all devices into */ + + qemu_fdt_add_subnode(fdt, node); + qemu_fdt_setprop(fdt, node, "compatible", platcomp, sizeof(platcomp)); + + /* Our platform bus region is less than 32bit big, so 1 cell is enough for + address and size */ + qemu_fdt_setprop_cells(fdt, node, "#size-cells", 1); + qemu_fdt_setprop_cells(fdt, node, "#address-cells", 1); + qemu_fdt_setprop_cells(fdt, node, "ranges", 0, addr >> 32, addr, size); + + qemu_fdt_setprop_phandle(fdt, node, "interrupt-parent", mpic); + + /* Create dt nodes for dynamic devices */ + PlatformDevtreeData data = { + .fdt = fdt, + .mpic = mpic, + .irq_start = irq_start, + .node = node, + .pbus = pms->pbus_dev, + }; + + /* Loop through all dynamic sysbus devices and create nodes for them */ + foreach_dynamic_sysbus_device(sysbus_device_create_devtree, &data); + + g_free(node); +} + +static int ppce500_load_device_tree(PPCE500MachineState *pms, + hwaddr addr, + hwaddr initrd_base, + hwaddr initrd_size, + hwaddr kernel_base, + hwaddr kernel_size, + bool dry_run) +{ + MachineState *machine = MACHINE(pms); + unsigned int smp_cpus = machine->smp.cpus; + const PPCE500MachineClass *pmc = PPCE500_MACHINE_GET_CLASS(pms); + CPUPPCState *env = first_cpu->env_ptr; + int ret = -1; + uint64_t mem_reg_property[] = { 0, cpu_to_be64(machine->ram_size) }; + int fdt_size; + void *fdt; + uint8_t hypercall[16]; + uint32_t clock_freq = PLATFORM_CLK_FREQ_HZ; + uint32_t tb_freq = PLATFORM_CLK_FREQ_HZ; + int i; + char compatible_sb[] = "fsl,mpc8544-immr\0simple-bus"; + char *soc; + char *mpic; + uint32_t mpic_ph; + uint32_t msi_ph; + char *gutil; + char *pci; + char *msi; + uint32_t *pci_map = NULL; + int len; + uint32_t pci_ranges[14] = + { + 0x2000000, 0x0, pmc->pci_mmio_bus_base, + pmc->pci_mmio_base >> 32, pmc->pci_mmio_base, + 0x0, 0x20000000, + + 0x1000000, 0x0, 0x0, + pmc->pci_pio_base >> 32, pmc->pci_pio_base, + 0x0, 0x10000, + }; + const char *dtb_file = machine->dtb; + const char *toplevel_compat = machine->dt_compatible; + + if (dtb_file) { + char *filename; + filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, dtb_file); + if (!filename) { + goto out; + } + + fdt = load_device_tree(filename, &fdt_size); + g_free(filename); + if (!fdt) { + goto out; + } + goto done; + } + + fdt = create_device_tree(&fdt_size); + if (fdt == NULL) { + goto out; + } + + /* Manipulate device tree in memory. */ + qemu_fdt_setprop_cell(fdt, "/", "#address-cells", 2); + qemu_fdt_setprop_cell(fdt, "/", "#size-cells", 2); + + qemu_fdt_add_subnode(fdt, "/memory"); + qemu_fdt_setprop_string(fdt, "/memory", "device_type", "memory"); + qemu_fdt_setprop(fdt, "/memory", "reg", mem_reg_property, + sizeof(mem_reg_property)); + + qemu_fdt_add_subnode(fdt, "/chosen"); + if (initrd_size) { + ret = qemu_fdt_setprop_cell(fdt, "/chosen", "linux,initrd-start", + initrd_base); + if (ret < 0) { + fprintf(stderr, "couldn't set /chosen/linux,initrd-start\n"); + } + + ret = qemu_fdt_setprop_cell(fdt, "/chosen", "linux,initrd-end", + (initrd_base + initrd_size)); + if (ret < 0) { + fprintf(stderr, "couldn't set /chosen/linux,initrd-end\n"); + } + + } + + if (kernel_base != -1ULL) { + qemu_fdt_setprop_cells(fdt, "/chosen", "qemu,boot-kernel", + kernel_base >> 32, kernel_base, + kernel_size >> 32, kernel_size); + } + + ret = qemu_fdt_setprop_string(fdt, "/chosen", "bootargs", + machine->kernel_cmdline); + if (ret < 0) + fprintf(stderr, "couldn't set /chosen/bootargs\n"); + + if (kvm_enabled()) { + /* Read out host's frequencies */ + clock_freq = kvmppc_get_clockfreq(); + tb_freq = kvmppc_get_tbfreq(); + + /* indicate KVM hypercall interface */ + qemu_fdt_add_subnode(fdt, "/hypervisor"); + qemu_fdt_setprop_string(fdt, "/hypervisor", "compatible", + "linux,kvm"); + kvmppc_get_hypercall(env, hypercall, sizeof(hypercall)); + qemu_fdt_setprop(fdt, "/hypervisor", "hcall-instructions", + hypercall, sizeof(hypercall)); + /* if KVM supports the idle hcall, set property indicating this */ + if (kvmppc_get_hasidle(env)) { + qemu_fdt_setprop(fdt, "/hypervisor", "has-idle", NULL, 0); + } + } + + /* Create CPU nodes */ + qemu_fdt_add_subnode(fdt, "/cpus"); + qemu_fdt_setprop_cell(fdt, "/cpus", "#address-cells", 1); + qemu_fdt_setprop_cell(fdt, "/cpus", "#size-cells", 0); + + /* We need to generate the cpu nodes in reverse order, so Linux can pick + the first node as boot node and be happy */ + for (i = smp_cpus - 1; i >= 0; i--) { + CPUState *cpu; + char *cpu_name; + uint64_t cpu_release_addr = pmc->spin_base + (i * 0x20); + + cpu = qemu_get_cpu(i); + if (cpu == NULL) { + continue; + } + env = cpu->env_ptr; + + cpu_name = g_strdup_printf("/cpus/PowerPC,8544@%x", i); + qemu_fdt_add_subnode(fdt, cpu_name); + qemu_fdt_setprop_cell(fdt, cpu_name, "clock-frequency", clock_freq); + qemu_fdt_setprop_cell(fdt, cpu_name, "timebase-frequency", tb_freq); + qemu_fdt_setprop_string(fdt, cpu_name, "device_type", "cpu"); + qemu_fdt_setprop_cell(fdt, cpu_name, "reg", i); + qemu_fdt_setprop_cell(fdt, cpu_name, "d-cache-line-size", + env->dcache_line_size); + qemu_fdt_setprop_cell(fdt, cpu_name, "i-cache-line-size", + env->icache_line_size); + qemu_fdt_setprop_cell(fdt, cpu_name, "d-cache-size", 0x8000); + qemu_fdt_setprop_cell(fdt, cpu_name, "i-cache-size", 0x8000); + qemu_fdt_setprop_cell(fdt, cpu_name, "bus-frequency", 0); + if (cpu->cpu_index) { + qemu_fdt_setprop_string(fdt, cpu_name, "status", "disabled"); + qemu_fdt_setprop_string(fdt, cpu_name, "enable-method", + "spin-table"); + qemu_fdt_setprop_u64(fdt, cpu_name, "cpu-release-addr", + cpu_release_addr); + } else { + qemu_fdt_setprop_string(fdt, cpu_name, "status", "okay"); + } + g_free(cpu_name); + } + + qemu_fdt_add_subnode(fdt, "/aliases"); + /* XXX These should go into their respective devices' code */ + soc = g_strdup_printf("/soc@%"PRIx64, pmc->ccsrbar_base); + qemu_fdt_add_subnode(fdt, soc); + qemu_fdt_setprop_string(fdt, soc, "device_type", "soc"); + qemu_fdt_setprop(fdt, soc, "compatible", compatible_sb, + sizeof(compatible_sb)); + qemu_fdt_setprop_cell(fdt, soc, "#address-cells", 1); + qemu_fdt_setprop_cell(fdt, soc, "#size-cells", 1); + qemu_fdt_setprop_cells(fdt, soc, "ranges", 0x0, + pmc->ccsrbar_base >> 32, pmc->ccsrbar_base, + MPC8544_CCSRBAR_SIZE); + /* XXX should contain a reasonable value */ + qemu_fdt_setprop_cell(fdt, soc, "bus-frequency", 0); + + mpic = g_strdup_printf("%s/pic@%llx", soc, MPC8544_MPIC_REGS_OFFSET); + qemu_fdt_add_subnode(fdt, mpic); + qemu_fdt_setprop_string(fdt, mpic, "device_type", "open-pic"); + qemu_fdt_setprop_string(fdt, mpic, "compatible", "fsl,mpic"); + qemu_fdt_setprop_cells(fdt, mpic, "reg", MPC8544_MPIC_REGS_OFFSET, + 0x40000); + qemu_fdt_setprop_cell(fdt, mpic, "#address-cells", 0); + qemu_fdt_setprop_cell(fdt, mpic, "#interrupt-cells", 2); + mpic_ph = qemu_fdt_alloc_phandle(fdt); + qemu_fdt_setprop_cell(fdt, mpic, "phandle", mpic_ph); + qemu_fdt_setprop_cell(fdt, mpic, "linux,phandle", mpic_ph); + qemu_fdt_setprop(fdt, mpic, "interrupt-controller", NULL, 0); + + /* + * We have to generate ser1 first, because Linux takes the first + * device it finds in the dt as serial output device. And we generate + * devices in reverse order to the dt. + */ + if (serial_hd(1)) { + dt_serial_create(fdt, MPC8544_SERIAL1_REGS_OFFSET, + soc, mpic, "serial1", 1, false); + } + + if (serial_hd(0)) { + dt_serial_create(fdt, MPC8544_SERIAL0_REGS_OFFSET, + soc, mpic, "serial0", 0, true); + } + + /* i2c */ + dt_i2c_create(fdt, soc, mpic, "i2c"); + + dt_rtc_create(fdt, "i2c", "rtc"); + + + gutil = g_strdup_printf("%s/global-utilities@%llx", soc, + MPC8544_UTIL_OFFSET); + qemu_fdt_add_subnode(fdt, gutil); + qemu_fdt_setprop_string(fdt, gutil, "compatible", "fsl,mpc8544-guts"); + qemu_fdt_setprop_cells(fdt, gutil, "reg", MPC8544_UTIL_OFFSET, 0x1000); + qemu_fdt_setprop(fdt, gutil, "fsl,has-rstcr", NULL, 0); + g_free(gutil); + + msi = g_strdup_printf("/%s/msi@%llx", soc, MPC8544_MSI_REGS_OFFSET); + qemu_fdt_add_subnode(fdt, msi); + qemu_fdt_setprop_string(fdt, msi, "compatible", "fsl,mpic-msi"); + qemu_fdt_setprop_cells(fdt, msi, "reg", MPC8544_MSI_REGS_OFFSET, 0x200); + msi_ph = qemu_fdt_alloc_phandle(fdt); + qemu_fdt_setprop_cells(fdt, msi, "msi-available-ranges", 0x0, 0x100); + qemu_fdt_setprop_phandle(fdt, msi, "interrupt-parent", mpic); + qemu_fdt_setprop_cells(fdt, msi, "interrupts", + 0xe0, 0x0, + 0xe1, 0x0, + 0xe2, 0x0, + 0xe3, 0x0, + 0xe4, 0x0, + 0xe5, 0x0, + 0xe6, 0x0, + 0xe7, 0x0); + qemu_fdt_setprop_cell(fdt, msi, "phandle", msi_ph); + qemu_fdt_setprop_cell(fdt, msi, "linux,phandle", msi_ph); + g_free(msi); + + pci = g_strdup_printf("/pci@%llx", + pmc->ccsrbar_base + MPC8544_PCI_REGS_OFFSET); + qemu_fdt_add_subnode(fdt, pci); + qemu_fdt_setprop_cell(fdt, pci, "cell-index", 0); + qemu_fdt_setprop_string(fdt, pci, "compatible", "fsl,mpc8540-pci"); + qemu_fdt_setprop_string(fdt, pci, "device_type", "pci"); + qemu_fdt_setprop_cells(fdt, pci, "interrupt-map-mask", 0xf800, 0x0, + 0x0, 0x7); + pci_map = pci_map_create(fdt, qemu_fdt_get_phandle(fdt, mpic), + pmc->pci_first_slot, pmc->pci_nr_slots, + &len); + qemu_fdt_setprop(fdt, pci, "interrupt-map", pci_map, len); + qemu_fdt_setprop_phandle(fdt, pci, "interrupt-parent", mpic); + qemu_fdt_setprop_cells(fdt, pci, "interrupts", 24, 2); + qemu_fdt_setprop_cells(fdt, pci, "bus-range", 0, 255); + for (i = 0; i < 14; i++) { + pci_ranges[i] = cpu_to_be32(pci_ranges[i]); + } + qemu_fdt_setprop_cell(fdt, pci, "fsl,msi", msi_ph); + qemu_fdt_setprop(fdt, pci, "ranges", pci_ranges, sizeof(pci_ranges)); + qemu_fdt_setprop_cells(fdt, pci, "reg", + (pmc->ccsrbar_base + MPC8544_PCI_REGS_OFFSET) >> 32, + (pmc->ccsrbar_base + MPC8544_PCI_REGS_OFFSET), + 0, 0x1000); + qemu_fdt_setprop_cell(fdt, pci, "clock-frequency", 66666666); + qemu_fdt_setprop_cell(fdt, pci, "#interrupt-cells", 1); + qemu_fdt_setprop_cell(fdt, pci, "#size-cells", 2); + qemu_fdt_setprop_cell(fdt, pci, "#address-cells", 3); + qemu_fdt_setprop_string(fdt, "/aliases", "pci0", pci); + g_free(pci); + + if (pmc->has_mpc8xxx_gpio) { + create_dt_mpc8xxx_gpio(fdt, soc, mpic); + } + g_free(soc); + + if (pms->pbus_dev) { + platform_bus_create_devtree(pms, fdt, mpic); + } + g_free(mpic); + + pmc->fixup_devtree(fdt); + + if (toplevel_compat) { + qemu_fdt_setprop(fdt, "/", "compatible", toplevel_compat, + strlen(toplevel_compat) + 1); + } + +done: + if (!dry_run) { + qemu_fdt_dumpdtb(fdt, fdt_size); + cpu_physical_memory_write(addr, fdt, fdt_size); + } + ret = fdt_size; + g_free(fdt); + +out: + g_free(pci_map); + + return ret; +} + +typedef struct DeviceTreeParams { + PPCE500MachineState *machine; + hwaddr addr; + hwaddr initrd_base; + hwaddr initrd_size; + hwaddr kernel_base; + hwaddr kernel_size; + Notifier notifier; +} DeviceTreeParams; + +static void ppce500_reset_device_tree(void *opaque) +{ + DeviceTreeParams *p = opaque; + ppce500_load_device_tree(p->machine, p->addr, p->initrd_base, + p->initrd_size, p->kernel_base, p->kernel_size, + false); +} + +static void ppce500_init_notify(Notifier *notifier, void *data) +{ + DeviceTreeParams *p = container_of(notifier, DeviceTreeParams, notifier); + ppce500_reset_device_tree(p); +} + +static int ppce500_prep_device_tree(PPCE500MachineState *machine, + hwaddr addr, + hwaddr initrd_base, + hwaddr initrd_size, + hwaddr kernel_base, + hwaddr kernel_size) +{ + DeviceTreeParams *p = g_new(DeviceTreeParams, 1); + p->machine = machine; + p->addr = addr; + p->initrd_base = initrd_base; + p->initrd_size = initrd_size; + p->kernel_base = kernel_base; + p->kernel_size = kernel_size; + + qemu_register_reset(ppce500_reset_device_tree, p); + p->notifier.notify = ppce500_init_notify; + qemu_add_machine_init_done_notifier(&p->notifier); + + /* Issue the device tree loader once, so that we get the size of the blob */ + return ppce500_load_device_tree(machine, addr, initrd_base, initrd_size, + kernel_base, kernel_size, true); +} + +/* Create -kernel TLB entries for BookE. */ +hwaddr booke206_page_size_to_tlb(uint64_t size) +{ + return 63 - clz64(size / KiB); +} + +static int booke206_initial_map_tsize(CPUPPCState *env) +{ + struct boot_info *bi = env->load_info; + hwaddr dt_end; + int ps; + + /* Our initial TLB entry needs to cover everything from 0 to + the device tree top */ + dt_end = bi->dt_base + bi->dt_size; + ps = booke206_page_size_to_tlb(dt_end) + 1; + if (ps & 1) { + /* e500v2 can only do even TLB size bits */ + ps++; + } + return ps; +} + +static uint64_t mmubooke_initial_mapsize(CPUPPCState *env) +{ + int tsize; + + tsize = booke206_initial_map_tsize(env); + return (1ULL << 10 << tsize); +} + +static void mmubooke_create_initial_mapping(CPUPPCState *env) +{ + ppcmas_tlb_t *tlb = booke206_get_tlbm(env, 1, 0, 0); + hwaddr size; + int ps; + + ps = booke206_initial_map_tsize(env); + size = (ps << MAS1_TSIZE_SHIFT); + tlb->mas1 = MAS1_VALID | size; + tlb->mas2 = 0; + tlb->mas7_3 = 0; + tlb->mas7_3 |= MAS3_UR | MAS3_UW | MAS3_UX | MAS3_SR | MAS3_SW | MAS3_SX; + + env->tlb_dirty = true; +} + +static void ppce500_cpu_reset_sec(void *opaque) +{ + PowerPCCPU *cpu = opaque; + CPUState *cs = CPU(cpu); + + cpu_reset(cs); + + cs->exception_index = EXCP_HLT; +} + +static void ppce500_cpu_reset(void *opaque) +{ + PowerPCCPU *cpu = opaque; + CPUState *cs = CPU(cpu); + CPUPPCState *env = &cpu->env; + struct boot_info *bi = env->load_info; + + cpu_reset(cs); + + /* Set initial guest state. */ + cs->halted = 0; + env->gpr[1] = (16 * MiB) - 8; + env->gpr[3] = bi->dt_base; + env->gpr[4] = 0; + env->gpr[5] = 0; + env->gpr[6] = EPAPR_MAGIC; + env->gpr[7] = mmubooke_initial_mapsize(env); + env->gpr[8] = 0; + env->gpr[9] = 0; + env->nip = bi->entry; + mmubooke_create_initial_mapping(env); +} + +static DeviceState *ppce500_init_mpic_qemu(PPCE500MachineState *pms, + IrqLines *irqs) +{ + DeviceState *dev; + SysBusDevice *s; + int i, j, k; + MachineState *machine = MACHINE(pms); + unsigned int smp_cpus = machine->smp.cpus; + const PPCE500MachineClass *pmc = PPCE500_MACHINE_GET_CLASS(pms); + + dev = qdev_new(TYPE_OPENPIC); + object_property_add_child(OBJECT(machine), "pic", OBJECT(dev)); + qdev_prop_set_uint32(dev, "model", pmc->mpic_version); + qdev_prop_set_uint32(dev, "nb_cpus", smp_cpus); + + s = SYS_BUS_DEVICE(dev); + sysbus_realize_and_unref(s, &error_fatal); + + k = 0; + for (i = 0; i < smp_cpus; i++) { + for (j = 0; j < OPENPIC_OUTPUT_NB; j++) { + sysbus_connect_irq(s, k++, irqs[i].irq[j]); + } + } + + return dev; +} + +static DeviceState *ppce500_init_mpic_kvm(const PPCE500MachineClass *pmc, + IrqLines *irqs, Error **errp) +{ + DeviceState *dev; + CPUState *cs; + + dev = qdev_new(TYPE_KVM_OPENPIC); + qdev_prop_set_uint32(dev, "model", pmc->mpic_version); + + if (!sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), errp)) { + object_unparent(OBJECT(dev)); + return NULL; + } + + CPU_FOREACH(cs) { + if (kvm_openpic_connect_vcpu(dev, cs)) { + fprintf(stderr, "%s: failed to connect vcpu to irqchip\n", + __func__); + abort(); + } + } + + return dev; +} + +static DeviceState *ppce500_init_mpic(PPCE500MachineState *pms, + MemoryRegion *ccsr, + IrqLines *irqs) +{ + const PPCE500MachineClass *pmc = PPCE500_MACHINE_GET_CLASS(pms); + DeviceState *dev = NULL; + SysBusDevice *s; + + if (kvm_enabled()) { + Error *err = NULL; + + if (kvm_kernel_irqchip_allowed()) { + dev = ppce500_init_mpic_kvm(pmc, irqs, &err); + } + if (kvm_kernel_irqchip_required() && !dev) { + error_reportf_err(err, + "kernel_irqchip requested but unavailable: "); + exit(1); + } + } + + if (!dev) { + dev = ppce500_init_mpic_qemu(pms, irqs); + } + + s = SYS_BUS_DEVICE(dev); + memory_region_add_subregion(ccsr, MPC8544_MPIC_REGS_OFFSET, + s->mmio[0].memory); + + return dev; +} + +static void ppce500_power_off(void *opaque, int line, int on) +{ + if (on) { + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); + } +} + +void ppce500_init(MachineState *machine) +{ + MemoryRegion *address_space_mem = get_system_memory(); + PPCE500MachineState *pms = PPCE500_MACHINE(machine); + const PPCE500MachineClass *pmc = PPCE500_MACHINE_GET_CLASS(machine); + PCIBus *pci_bus; + CPUPPCState *env = NULL; + uint64_t loadaddr; + hwaddr kernel_base = -1LL; + int kernel_size = 0; + hwaddr dt_base = 0; + hwaddr initrd_base = 0; + int initrd_size = 0; + hwaddr cur_base = 0; + char *filename; + const char *payload_name; + bool kernel_as_payload; + hwaddr bios_entry = 0; + target_long payload_size; + struct boot_info *boot_info; + int dt_size; + int i; + unsigned int smp_cpus = machine->smp.cpus; + /* irq num for pin INTA, INTB, INTC and INTD is 1, 2, 3 and + * 4 respectively */ + unsigned int pci_irq_nrs[PCI_NUM_PINS] = {1, 2, 3, 4}; + IrqLines *irqs; + DeviceState *dev, *mpicdev; + CPUPPCState *firstenv = NULL; + MemoryRegion *ccsr_addr_space; + SysBusDevice *s; + PPCE500CCSRState *ccsr; + I2CBus *i2c; + + irqs = g_new0(IrqLines, smp_cpus); + for (i = 0; i < smp_cpus; i++) { + PowerPCCPU *cpu; + CPUState *cs; + qemu_irq *input; + + cpu = POWERPC_CPU(object_new(machine->cpu_type)); + env = &cpu->env; + cs = CPU(cpu); + + if (env->mmu_model != POWERPC_MMU_BOOKE206) { + error_report("MMU model %i not supported by this machine", + env->mmu_model); + exit(1); + } + + /* + * Secondary CPU starts in halted state for now. Needs to change + * when implementing non-kernel boot. + */ + object_property_set_bool(OBJECT(cs), "start-powered-off", i != 0, + &error_fatal); + qdev_realize_and_unref(DEVICE(cs), NULL, &error_fatal); + + if (!firstenv) { + firstenv = env; + } + + input = (qemu_irq *)env->irq_inputs; + irqs[i].irq[OPENPIC_OUTPUT_INT] = input[PPCE500_INPUT_INT]; + irqs[i].irq[OPENPIC_OUTPUT_CINT] = input[PPCE500_INPUT_CINT]; + env->spr_cb[SPR_BOOKE_PIR].default_value = cs->cpu_index = i; + env->mpic_iack = pmc->ccsrbar_base + MPC8544_MPIC_REGS_OFFSET + 0xa0; + + ppc_booke_timers_init(cpu, PLATFORM_CLK_FREQ_HZ, PPC_TIMER_E500); + + /* Register reset handler */ + if (!i) { + /* Primary CPU */ + struct boot_info *boot_info; + boot_info = g_malloc0(sizeof(struct boot_info)); + qemu_register_reset(ppce500_cpu_reset, cpu); + env->load_info = boot_info; + } else { + /* Secondary CPUs */ + qemu_register_reset(ppce500_cpu_reset_sec, cpu); + } + } + + env = firstenv; + + if (!QEMU_IS_ALIGNED(machine->ram_size, RAM_SIZES_ALIGN)) { + error_report("RAM size must be multiple of %" PRIu64, RAM_SIZES_ALIGN); + exit(EXIT_FAILURE); + } + + /* Register Memory */ + memory_region_add_subregion(address_space_mem, 0, machine->ram); + + dev = qdev_new("e500-ccsr"); + object_property_add_child(qdev_get_machine(), "e500-ccsr", + OBJECT(dev)); + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + ccsr = CCSR(dev); + ccsr_addr_space = &ccsr->ccsr_space; + memory_region_add_subregion(address_space_mem, pmc->ccsrbar_base, + ccsr_addr_space); + + mpicdev = ppce500_init_mpic(pms, ccsr_addr_space, irqs); + g_free(irqs); + + /* Serial */ + if (serial_hd(0)) { + serial_mm_init(ccsr_addr_space, MPC8544_SERIAL0_REGS_OFFSET, + 0, qdev_get_gpio_in(mpicdev, 42), 399193, + serial_hd(0), DEVICE_BIG_ENDIAN); + } + + if (serial_hd(1)) { + serial_mm_init(ccsr_addr_space, MPC8544_SERIAL1_REGS_OFFSET, + 0, qdev_get_gpio_in(mpicdev, 42), 399193, + serial_hd(1), DEVICE_BIG_ENDIAN); + } + /* I2C */ + dev = qdev_new("mpc-i2c"); + s = SYS_BUS_DEVICE(dev); + sysbus_realize_and_unref(s, &error_fatal); + sysbus_connect_irq(s, 0, qdev_get_gpio_in(mpicdev, MPC8544_I2C_IRQ)); + memory_region_add_subregion(ccsr_addr_space, MPC8544_I2C_REGS_OFFSET, + sysbus_mmio_get_region(s, 0)); + i2c = (I2CBus *)qdev_get_child_bus(dev, "i2c"); + i2c_slave_create_simple(i2c, "ds1338", RTC_REGS_OFFSET); + + + /* General Utility device */ + dev = qdev_new("mpc8544-guts"); + s = SYS_BUS_DEVICE(dev); + sysbus_realize_and_unref(s, &error_fatal); + memory_region_add_subregion(ccsr_addr_space, MPC8544_UTIL_OFFSET, + sysbus_mmio_get_region(s, 0)); + + /* PCI */ + dev = qdev_new("e500-pcihost"); + object_property_add_child(qdev_get_machine(), "pci-host", OBJECT(dev)); + qdev_prop_set_uint32(dev, "first_slot", pmc->pci_first_slot); + qdev_prop_set_uint32(dev, "first_pin_irq", pci_irq_nrs[0]); + s = SYS_BUS_DEVICE(dev); + sysbus_realize_and_unref(s, &error_fatal); + for (i = 0; i < PCI_NUM_PINS; i++) { + sysbus_connect_irq(s, i, qdev_get_gpio_in(mpicdev, pci_irq_nrs[i])); + } + + memory_region_add_subregion(ccsr_addr_space, MPC8544_PCI_REGS_OFFSET, + sysbus_mmio_get_region(s, 0)); + + pci_bus = (PCIBus *)qdev_get_child_bus(dev, "pci.0"); + if (!pci_bus) + printf("couldn't create PCI controller!\n"); + + if (pci_bus) { + /* Register network interfaces. */ + for (i = 0; i < nb_nics; i++) { + pci_nic_init_nofail(&nd_table[i], pci_bus, "virtio-net-pci", NULL); + } + } + + /* Register spinning region */ + sysbus_create_simple("e500-spin", pmc->spin_base, NULL); + + if (pmc->has_mpc8xxx_gpio) { + qemu_irq poweroff_irq; + + dev = qdev_new("mpc8xxx_gpio"); + s = SYS_BUS_DEVICE(dev); + sysbus_realize_and_unref(s, &error_fatal); + sysbus_connect_irq(s, 0, qdev_get_gpio_in(mpicdev, MPC8XXX_GPIO_IRQ)); + memory_region_add_subregion(ccsr_addr_space, MPC8XXX_GPIO_OFFSET, + sysbus_mmio_get_region(s, 0)); + + /* Power Off GPIO at Pin 0 */ + poweroff_irq = qemu_allocate_irq(ppce500_power_off, NULL, 0); + qdev_connect_gpio_out(dev, 0, poweroff_irq); + } + + /* Platform Bus Device */ + if (pmc->has_platform_bus) { + dev = qdev_new(TYPE_PLATFORM_BUS_DEVICE); + dev->id = g_strdup(TYPE_PLATFORM_BUS_DEVICE); + qdev_prop_set_uint32(dev, "num_irqs", pmc->platform_bus_num_irqs); + qdev_prop_set_uint32(dev, "mmio_size", pmc->platform_bus_size); + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + pms->pbus_dev = PLATFORM_BUS_DEVICE(dev); + + s = SYS_BUS_DEVICE(pms->pbus_dev); + for (i = 0; i < pmc->platform_bus_num_irqs; i++) { + int irqn = pmc->platform_bus_first_irq + i; + sysbus_connect_irq(s, i, qdev_get_gpio_in(mpicdev, irqn)); + } + + memory_region_add_subregion(address_space_mem, + pmc->platform_bus_base, + sysbus_mmio_get_region(s, 0)); + } + + /* + * Smart firmware defaults ahead! + * + * We follow the following table to select which payload we execute. + * + * -kernel | -bios | payload + * ---------+-------+--------- + * N | Y | u-boot + * N | N | u-boot + * Y | Y | u-boot + * Y | N | kernel + * + * This ensures backwards compatibility with how we used to expose + * -kernel to users but allows them to run through u-boot as well. + */ + kernel_as_payload = false; + if (machine->firmware == NULL) { + if (machine->kernel_filename) { + payload_name = machine->kernel_filename; + kernel_as_payload = true; + } else { + payload_name = "u-boot.e500"; + } + } else { + payload_name = machine->firmware; + } + + filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, payload_name); + if (!filename) { + error_report("could not find firmware/kernel file '%s'", payload_name); + exit(1); + } + + payload_size = load_elf(filename, NULL, NULL, NULL, + &bios_entry, &loadaddr, NULL, NULL, + 1, PPC_ELF_MACHINE, 0, 0); + if (payload_size < 0) { + /* + * Hrm. No ELF image? Try a uImage, maybe someone is giving us an + * ePAPR compliant kernel + */ + loadaddr = LOAD_UIMAGE_LOADADDR_INVALID; + payload_size = load_uimage(filename, &bios_entry, &loadaddr, NULL, + NULL, NULL); + if (payload_size < 0) { + error_report("could not load firmware '%s'", filename); + exit(1); + } + } + + g_free(filename); + + if (kernel_as_payload) { + kernel_base = loadaddr; + kernel_size = payload_size; + } + + cur_base = loadaddr + payload_size; + if (cur_base < 32 * MiB) { + /* u-boot occupies memory up to 32MB, so load blobs above */ + cur_base = 32 * MiB; + } + + /* Load bare kernel only if no bios/u-boot has been provided */ + if (machine->kernel_filename && !kernel_as_payload) { + kernel_base = cur_base; + kernel_size = load_image_targphys(machine->kernel_filename, + cur_base, + machine->ram_size - cur_base); + if (kernel_size < 0) { + error_report("could not load kernel '%s'", + machine->kernel_filename); + exit(1); + } + + cur_base += kernel_size; + } + + /* Load initrd. */ + if (machine->initrd_filename) { + initrd_base = (cur_base + INITRD_LOAD_PAD) & ~INITRD_PAD_MASK; + initrd_size = load_image_targphys(machine->initrd_filename, initrd_base, + machine->ram_size - initrd_base); + + if (initrd_size < 0) { + error_report("could not load initial ram disk '%s'", + machine->initrd_filename); + exit(1); + } + + cur_base = initrd_base + initrd_size; + } + + /* + * Reserve space for dtb behind the kernel image because Linux has a bug + * where it can only handle the dtb if it's within the first 64MB of where + * <kernel> starts. dtb cannot not reach initrd_base because INITRD_LOAD_PAD + * ensures enough space between kernel and initrd. + */ + dt_base = (loadaddr + payload_size + DTC_LOAD_PAD) & ~DTC_PAD_MASK; + if (dt_base + DTB_MAX_SIZE > machine->ram_size) { + error_report("not enough memory for device tree"); + exit(1); + } + + dt_size = ppce500_prep_device_tree(pms, dt_base, + initrd_base, initrd_size, + kernel_base, kernel_size); + if (dt_size < 0) { + error_report("couldn't load device tree"); + exit(1); + } + assert(dt_size < DTB_MAX_SIZE); + + boot_info = env->load_info; + boot_info->entry = bios_entry; + boot_info->dt_base = dt_base; + boot_info->dt_size = dt_size; +} + +static void e500_ccsr_initfn(Object *obj) +{ + PPCE500CCSRState *ccsr = CCSR(obj); + memory_region_init(&ccsr->ccsr_space, obj, "e500-ccsr", + MPC8544_CCSRBAR_SIZE); +} + +static const TypeInfo e500_ccsr_info = { + .name = TYPE_CCSR, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(PPCE500CCSRState), + .instance_init = e500_ccsr_initfn, +}; + +static const TypeInfo ppce500_info = { + .name = TYPE_PPCE500_MACHINE, + .parent = TYPE_MACHINE, + .abstract = true, + .instance_size = sizeof(PPCE500MachineState), + .class_size = sizeof(PPCE500MachineClass), +}; + +static void e500_register_types(void) +{ + type_register_static(&e500_ccsr_info); + type_register_static(&ppce500_info); +} + +type_init(e500_register_types) diff --git a/hw/ppc/e500.h b/hw/ppc/e500.h new file mode 100644 index 000000000..1e5853b03 --- /dev/null +++ b/hw/ppc/e500.h @@ -0,0 +1,49 @@ +#ifndef PPCE500_H +#define PPCE500_H + +#include "hw/boards.h" +#include "hw/platform-bus.h" +#include "qom/object.h" + +struct PPCE500MachineState { + /*< private >*/ + MachineState parent_obj; + + /* points to instance of TYPE_PLATFORM_BUS_DEVICE if + * board supports dynamic sysbus devices + */ + PlatformBusDevice *pbus_dev; +}; + +struct PPCE500MachineClass { + /*< private >*/ + MachineClass parent_class; + + /* required -- must at least add toplevel board compatible */ + void (*fixup_devtree)(void *fdt); + + int pci_first_slot; + int pci_nr_slots; + + int mpic_version; + bool has_mpc8xxx_gpio; + bool has_platform_bus; + hwaddr platform_bus_base; + hwaddr platform_bus_size; + int platform_bus_first_irq; + int platform_bus_num_irqs; + hwaddr ccsrbar_base; + hwaddr pci_pio_base; + hwaddr pci_mmio_base; + hwaddr pci_mmio_bus_base; + hwaddr spin_base; +}; + +void ppce500_init(MachineState *machine); + +hwaddr booke206_page_size_to_tlb(uint64_t size); + +#define TYPE_PPCE500_MACHINE "ppce500-base-machine" +OBJECT_DECLARE_TYPE(PPCE500MachineState, PPCE500MachineClass, PPCE500_MACHINE) + +#endif diff --git a/hw/ppc/e500plat.c b/hw/ppc/e500plat.c new file mode 100644 index 000000000..fc911bbb7 --- /dev/null +++ b/hw/ppc/e500plat.c @@ -0,0 +1,122 @@ +/* + * Generic device-tree-driven paravirt PPC e500 platform + * + * Copyright 2012 Freescale Semiconductor, Inc. + * + * This is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include "qemu/osdep.h" +#include "qemu/units.h" +#include "e500.h" +#include "hw/net/fsl_etsec/etsec.h" +#include "sysemu/device_tree.h" +#include "sysemu/kvm.h" +#include "hw/sysbus.h" +#include "hw/pci/pci.h" +#include "hw/ppc/openpic.h" +#include "kvm_ppc.h" + +static void e500plat_fixup_devtree(void *fdt) +{ + const char model[] = "QEMU ppce500"; + const char compatible[] = "fsl,qemu-e500"; + + qemu_fdt_setprop(fdt, "/", "model", model, sizeof(model)); + qemu_fdt_setprop(fdt, "/", "compatible", compatible, + sizeof(compatible)); +} + +static void e500plat_init(MachineState *machine) +{ + PPCE500MachineClass *pmc = PPCE500_MACHINE_GET_CLASS(machine); + /* Older KVM versions don't support EPR which breaks guests when we announce + MPIC variants that support EPR. Revert to an older one for those */ + if (kvm_enabled() && !kvmppc_has_cap_epr()) { + pmc->mpic_version = OPENPIC_MODEL_FSL_MPIC_20; + } + + ppce500_init(machine); +} + +static void e500plat_machine_device_plug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + PPCE500MachineState *pms = PPCE500_MACHINE(hotplug_dev); + + if (pms->pbus_dev) { + MachineClass *mc = MACHINE_GET_CLASS(pms); + + if (device_is_dynamic_sysbus(mc, dev)) { + platform_bus_link_device(pms->pbus_dev, SYS_BUS_DEVICE(dev)); + } + } +} + +static +HotplugHandler *e500plat_machine_get_hotpug_handler(MachineState *machine, + DeviceState *dev) +{ + MachineClass *mc = MACHINE_GET_CLASS(machine); + + if (device_is_dynamic_sysbus(mc, dev)) { + return HOTPLUG_HANDLER(machine); + } + + return NULL; +} + +#define TYPE_E500PLAT_MACHINE MACHINE_TYPE_NAME("ppce500") + +static void e500plat_machine_class_init(ObjectClass *oc, void *data) +{ + PPCE500MachineClass *pmc = PPCE500_MACHINE_CLASS(oc); + HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); + MachineClass *mc = MACHINE_CLASS(oc); + + assert(!mc->get_hotplug_handler); + mc->get_hotplug_handler = e500plat_machine_get_hotpug_handler; + hc->plug = e500plat_machine_device_plug_cb; + + pmc->pci_first_slot = 0x1; + pmc->pci_nr_slots = PCI_SLOT_MAX - 1; + pmc->fixup_devtree = e500plat_fixup_devtree; + pmc->mpic_version = OPENPIC_MODEL_FSL_MPIC_42; + pmc->has_mpc8xxx_gpio = true; + pmc->has_platform_bus = true; + pmc->platform_bus_base = 0xf00000000ULL; + pmc->platform_bus_size = 128 * MiB; + pmc->platform_bus_first_irq = 5; + pmc->platform_bus_num_irqs = 10; + pmc->ccsrbar_base = 0xFE0000000ULL; + pmc->pci_pio_base = 0xFE1000000ULL; + pmc->pci_mmio_base = 0xC00000000ULL; + pmc->pci_mmio_bus_base = 0xE0000000ULL; + pmc->spin_base = 0xFEF000000ULL; + + mc->desc = "generic paravirt e500 platform"; + mc->init = e500plat_init; + mc->max_cpus = 32; + mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("e500v2_v30"); + mc->default_ram_id = "mpc8544ds.ram"; + machine_class_allow_dynamic_sysbus_dev(mc, TYPE_ETSEC_COMMON); + } + +static const TypeInfo e500plat_info = { + .name = TYPE_E500PLAT_MACHINE, + .parent = TYPE_PPCE500_MACHINE, + .class_init = e500plat_machine_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_HOTPLUG_HANDLER }, + { } + } +}; + +static void e500plat_register_types(void) +{ + type_register_static(&e500plat_info); +} +type_init(e500plat_register_types) diff --git a/hw/ppc/fdt.c b/hw/ppc/fdt.c new file mode 100644 index 000000000..0828ad725 --- /dev/null +++ b/hw/ppc/fdt.c @@ -0,0 +1,49 @@ +/* + * QEMU PowerPC helper routines for the device tree. + * + * Copyright (C) 2016 IBM Corp. + * + * This code is licensed under the GPL version 2 or later. See the + * COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "target/ppc/cpu.h" +#include "target/ppc/mmu-hash64.h" + +#include "hw/ppc/fdt.h" + +#if defined(TARGET_PPC64) +size_t ppc_create_page_sizes_prop(PowerPCCPU *cpu, uint32_t *prop, + size_t maxsize) +{ + size_t maxcells = maxsize / sizeof(uint32_t); + int i, j, count; + uint32_t *p = prop; + + for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) { + PPCHash64SegmentPageSizes *sps = &cpu->hash64_opts->sps[i]; + + if (!sps->page_shift) { + break; + } + for (count = 0; count < PPC_PAGE_SIZES_MAX_SZ; count++) { + if (sps->enc[count].page_shift == 0) { + break; + } + } + if ((p - prop) >= (maxcells - 3 - count * 2)) { + break; + } + *(p++) = cpu_to_be32(sps->page_shift); + *(p++) = cpu_to_be32(sps->slb_enc); + *(p++) = cpu_to_be32(count); + for (j = 0; j < count; j++) { + *(p++) = cpu_to_be32(sps->enc[j].page_shift); + *(p++) = cpu_to_be32(sps->enc[j].pte_enc); + } + } + + return (p - prop) * sizeof(uint32_t); +} +#endif diff --git a/hw/ppc/fw_cfg.c b/hw/ppc/fw_cfg.c new file mode 100644 index 000000000..a88b5c4bd --- /dev/null +++ b/hw/ppc/fw_cfg.c @@ -0,0 +1,45 @@ +/* + * fw_cfg helpers (PPC specific) + * + * Copyright (c) 2019 Red Hat, Inc. + * + * Author: + * Philippe Mathieu-Daudé <philmd@redhat.com> + * + * SPDX-License-Identifier: GPL-2.0-or-later + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "hw/ppc/ppc.h" +#include "hw/nvram/fw_cfg.h" + +const char *fw_cfg_arch_key_name(uint16_t key) +{ + static const struct { + uint16_t key; + const char *name; + } fw_cfg_arch_wellknown_keys[] = { + {FW_CFG_PPC_WIDTH, "width"}, + {FW_CFG_PPC_HEIGHT, "height"}, + {FW_CFG_PPC_DEPTH, "depth"}, + {FW_CFG_PPC_TBFREQ, "tbfreq"}, + {FW_CFG_PPC_CLOCKFREQ, "clockfreq"}, + {FW_CFG_PPC_IS_KVM, "is_kvm"}, + {FW_CFG_PPC_KVM_HC, "kvm_hc"}, + {FW_CFG_PPC_KVM_PID, "pid"}, + {FW_CFG_PPC_NVRAM_ADDR, "nvram_addr"}, + {FW_CFG_PPC_BUSFREQ, "busfreq"}, + {FW_CFG_PPC_NVRAM_FLAT, "nvram_flat"}, + {FW_CFG_PPC_VIACONFIG, "viaconfig"}, + }; + + for (size_t i = 0; i < ARRAY_SIZE(fw_cfg_arch_wellknown_keys); i++) { + if (fw_cfg_arch_wellknown_keys[i].key == key) { + return fw_cfg_arch_wellknown_keys[i].name; + } + } + return NULL; +} diff --git a/hw/ppc/mac.h b/hw/ppc/mac.h new file mode 100644 index 000000000..22c840807 --- /dev/null +++ b/hw/ppc/mac.h @@ -0,0 +1,108 @@ +/* + * QEMU PowerMac emulation shared definitions and prototypes + * + * Copyright (c) 2004-2007 Fabrice Bellard + * Copyright (c) 2007 Jocelyn Mayer + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef PPC_MAC_H +#define PPC_MAC_H + +#include "qemu/units.h" +#include "exec/memory.h" +#include "hw/boards.h" +#include "hw/sysbus.h" +#include "hw/input/adb.h" +#include "hw/misc/mos6522.h" +#include "hw/pci/pci_host.h" +#include "hw/pci-host/uninorth.h" +#include "qom/object.h" + +/* SMP is not enabled, for now */ +#define MAX_CPUS 1 + +#define NVRAM_SIZE 0x2000 +#define PROM_FILENAME "openbios-ppc" + +#define KERNEL_LOAD_ADDR 0x01000000 +#define KERNEL_GAP 0x00100000 + +#define ESCC_CLOCK 3686400 + +/* Old World IRQs */ +#define OLDWORLD_CUDA_IRQ 0x12 +#define OLDWORLD_ESCCB_IRQ 0x10 +#define OLDWORLD_ESCCA_IRQ 0xf +#define OLDWORLD_IDE0_IRQ 0xd +#define OLDWORLD_IDE0_DMA_IRQ 0x2 +#define OLDWORLD_IDE1_IRQ 0xe +#define OLDWORLD_IDE1_DMA_IRQ 0x3 + +/* New World IRQs */ +#define NEWWORLD_CUDA_IRQ 0x19 +#define NEWWORLD_PMU_IRQ 0x19 +#define NEWWORLD_ESCCB_IRQ 0x24 +#define NEWWORLD_ESCCA_IRQ 0x25 +#define NEWWORLD_IDE0_IRQ 0xd +#define NEWWORLD_IDE0_DMA_IRQ 0x2 +#define NEWWORLD_IDE1_IRQ 0xe +#define NEWWORLD_IDE1_DMA_IRQ 0x3 +#define NEWWORLD_EXTING_GPIO1 0x2f +#define NEWWORLD_EXTING_GPIO9 0x37 + +/* Core99 machine */ +#define TYPE_CORE99_MACHINE MACHINE_TYPE_NAME("mac99") +typedef struct Core99MachineState Core99MachineState; +DECLARE_INSTANCE_CHECKER(Core99MachineState, CORE99_MACHINE, + TYPE_CORE99_MACHINE) + +#define CORE99_VIA_CONFIG_CUDA 0x0 +#define CORE99_VIA_CONFIG_PMU 0x1 +#define CORE99_VIA_CONFIG_PMU_ADB 0x2 + +struct Core99MachineState { + /*< private >*/ + MachineState parent; + + uint8_t via_config; +}; + +/* Grackle PCI */ +#define TYPE_GRACKLE_PCI_HOST_BRIDGE "grackle-pcihost" + +/* Mac NVRAM */ +#define TYPE_MACIO_NVRAM "macio-nvram" +OBJECT_DECLARE_SIMPLE_TYPE(MacIONVRAMState, MACIO_NVRAM) + +struct MacIONVRAMState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + uint32_t size; + uint32_t it_shift; + + MemoryRegion mem; + uint8_t *data; +}; + +void pmac_format_nvram_partition (MacIONVRAMState *nvr, int len); +#endif /* PPC_MAC_H */ diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c new file mode 100644 index 000000000..7bb7ac399 --- /dev/null +++ b/hw/ppc/mac_newworld.c @@ -0,0 +1,663 @@ +/* + * QEMU PowerPC CHRP (currently NewWorld PowerMac) hardware System Emulator + * + * Copyright (c) 2004-2007 Fabrice Bellard + * Copyright (c) 2007 Jocelyn Mayer + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * PCI bus layout on a real G5 (U3 based): + * + * 0000:f0:0b.0 Host bridge [0600]: Apple Computer Inc. U3 AGP [106b:004b] + * 0000:f0:10.0 VGA compatible controller [0300]: ATI Technologies Inc RV350 AP [Radeon 9600] [1002:4150] + * 0001:00:00.0 Host bridge [0600]: Apple Computer Inc. CPC945 HT Bridge [106b:004a] + * 0001:00:01.0 PCI bridge [0604]: Advanced Micro Devices [AMD] AMD-8131 PCI-X Bridge [1022:7450] (rev 12) + * 0001:00:02.0 PCI bridge [0604]: Advanced Micro Devices [AMD] AMD-8131 PCI-X Bridge [1022:7450] (rev 12) + * 0001:00:03.0 PCI bridge [0604]: Apple Computer Inc. K2 HT-PCI Bridge [106b:0045] + * 0001:00:04.0 PCI bridge [0604]: Apple Computer Inc. K2 HT-PCI Bridge [106b:0046] + * 0001:00:05.0 PCI bridge [0604]: Apple Computer Inc. K2 HT-PCI Bridge [106b:0047] + * 0001:00:06.0 PCI bridge [0604]: Apple Computer Inc. K2 HT-PCI Bridge [106b:0048] + * 0001:00:07.0 PCI bridge [0604]: Apple Computer Inc. K2 HT-PCI Bridge [106b:0049] + * 0001:01:07.0 Class [ff00]: Apple Computer Inc. K2 KeyLargo Mac/IO [106b:0041] (rev 20) + * 0001:01:08.0 USB Controller [0c03]: Apple Computer Inc. K2 KeyLargo USB [106b:0040] + * 0001:01:09.0 USB Controller [0c03]: Apple Computer Inc. K2 KeyLargo USB [106b:0040] + * 0001:02:0b.0 USB Controller [0c03]: NEC Corporation USB [1033:0035] (rev 43) + * 0001:02:0b.1 USB Controller [0c03]: NEC Corporation USB [1033:0035] (rev 43) + * 0001:02:0b.2 USB Controller [0c03]: NEC Corporation USB 2.0 [1033:00e0] (rev 04) + * 0001:03:0d.0 Class [ff00]: Apple Computer Inc. K2 ATA/100 [106b:0043] + * 0001:03:0e.0 FireWire (IEEE 1394) [0c00]: Apple Computer Inc. K2 FireWire [106b:0042] + * 0001:04:0f.0 Ethernet controller [0200]: Apple Computer Inc. K2 GMAC (Sun GEM) [106b:004c] + * 0001:05:0c.0 IDE interface [0101]: Broadcom K2 SATA [1166:0240] + */ + +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "qemu/datadir.h" +#include "qapi/error.h" +#include "hw/ppc/ppc.h" +#include "hw/qdev-properties.h" +#include "hw/ppc/mac.h" +#include "hw/input/adb.h" +#include "hw/ppc/mac_dbdma.h" +#include "hw/pci/pci.h" +#include "net/net.h" +#include "sysemu/sysemu.h" +#include "hw/nvram/fw_cfg.h" +#include "hw/char/escc.h" +#include "hw/misc/macio/macio.h" +#include "hw/ppc/openpic.h" +#include "hw/loader.h" +#include "hw/fw-path-provider.h" +#include "elf.h" +#include "qemu/error-report.h" +#include "sysemu/kvm.h" +#include "sysemu/reset.h" +#include "kvm_ppc.h" +#include "hw/usb.h" +#include "hw/sysbus.h" +#include "trace.h" + +#define MAX_IDE_BUS 2 +#define CFG_ADDR 0xf0000510 +#define TBFREQ (100UL * 1000UL * 1000UL) +#define CLOCKFREQ (900UL * 1000UL * 1000UL) +#define BUSFREQ (100UL * 1000UL * 1000UL) + +#define NDRV_VGA_FILENAME "qemu_vga.ndrv" + +#define PROM_BASE 0xfff00000 +#define PROM_SIZE (1 * MiB) + +static void fw_cfg_boot_set(void *opaque, const char *boot_device, + Error **errp) +{ + fw_cfg_modify_i16(opaque, FW_CFG_BOOT_DEVICE, boot_device[0]); +} + +static uint64_t translate_kernel_address(void *opaque, uint64_t addr) +{ + return (addr & 0x0fffffff) + KERNEL_LOAD_ADDR; +} + +static void ppc_core99_reset(void *opaque) +{ + PowerPCCPU *cpu = opaque; + + cpu_reset(CPU(cpu)); + /* 970 CPUs want to get their initial IP as part of their boot protocol */ + cpu->env.nip = PROM_BASE + 0x100; +} + +/* PowerPC Mac99 hardware initialisation */ +static void ppc_core99_init(MachineState *machine) +{ + ram_addr_t ram_size = machine->ram_size; + const char *bios_name = machine->firmware ?: PROM_FILENAME; + const char *kernel_filename = machine->kernel_filename; + const char *kernel_cmdline = machine->kernel_cmdline; + const char *initrd_filename = machine->initrd_filename; + const char *boot_device = machine->boot_order; + Core99MachineState *core99_machine = CORE99_MACHINE(machine); + PowerPCCPU *cpu = NULL; + CPUPPCState *env = NULL; + char *filename; + IrqLines *openpic_irqs; + int linux_boot, i, j, k; + MemoryRegion *bios = g_new(MemoryRegion, 1); + hwaddr kernel_base, initrd_base, cmdline_base = 0; + long kernel_size, initrd_size; + UNINHostState *uninorth_pci; + PCIBus *pci_bus; + PCIDevice *macio; + ESCCState *escc; + bool has_pmu, has_adb; + MACIOIDEState *macio_ide; + BusState *adb_bus; + MacIONVRAMState *nvr; + int bios_size; + int ppc_boot_device; + DriveInfo *hd[MAX_IDE_BUS * MAX_IDE_DEVS]; + void *fw_cfg; + int machine_arch; + SysBusDevice *s; + DeviceState *dev, *pic_dev; + DeviceState *uninorth_internal_dev = NULL, *uninorth_agp_dev = NULL; + hwaddr nvram_addr = 0xFFF04000; + uint64_t tbfreq; + unsigned int smp_cpus = machine->smp.cpus; + + linux_boot = (kernel_filename != NULL); + + /* init CPUs */ + for (i = 0; i < smp_cpus; i++) { + cpu = POWERPC_CPU(cpu_create(machine->cpu_type)); + env = &cpu->env; + + /* Set time-base frequency to 100 Mhz */ + cpu_ppc_tb_init(env, TBFREQ); + qemu_register_reset(ppc_core99_reset, cpu); + } + + /* allocate RAM */ + if (machine->ram_size > 2 * GiB) { + error_report("RAM size more than 2 GiB is not supported"); + exit(1); + } + memory_region_add_subregion(get_system_memory(), 0, machine->ram); + + /* allocate and load firmware ROM */ + memory_region_init_rom(bios, NULL, "ppc_core99.bios", PROM_SIZE, + &error_fatal); + memory_region_add_subregion(get_system_memory(), PROM_BASE, bios); + + filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name); + if (filename) { + /* Load OpenBIOS (ELF) */ + bios_size = load_elf(filename, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, 1, PPC_ELF_MACHINE, 0, 0); + + if (bios_size <= 0) { + /* or load binary ROM image */ + bios_size = load_image_targphys(filename, PROM_BASE, PROM_SIZE); + } + g_free(filename); + } else { + bios_size = -1; + } + if (bios_size < 0 || bios_size > PROM_SIZE) { + error_report("could not load PowerPC bios '%s'", bios_name); + exit(1); + } + + if (linux_boot) { + int bswap_needed; + +#ifdef BSWAP_NEEDED + bswap_needed = 1; +#else + bswap_needed = 0; +#endif + kernel_base = KERNEL_LOAD_ADDR; + + kernel_size = load_elf(kernel_filename, NULL, + translate_kernel_address, NULL, NULL, NULL, + NULL, NULL, 1, PPC_ELF_MACHINE, 0, 0); + if (kernel_size < 0) + kernel_size = load_aout(kernel_filename, kernel_base, + ram_size - kernel_base, bswap_needed, + TARGET_PAGE_SIZE); + if (kernel_size < 0) + kernel_size = load_image_targphys(kernel_filename, + kernel_base, + ram_size - kernel_base); + if (kernel_size < 0) { + error_report("could not load kernel '%s'", kernel_filename); + exit(1); + } + /* load initrd */ + if (initrd_filename) { + initrd_base = TARGET_PAGE_ALIGN(kernel_base + kernel_size + KERNEL_GAP); + initrd_size = load_image_targphys(initrd_filename, initrd_base, + ram_size - initrd_base); + if (initrd_size < 0) { + error_report("could not load initial ram disk '%s'", + initrd_filename); + exit(1); + } + cmdline_base = TARGET_PAGE_ALIGN(initrd_base + initrd_size); + } else { + initrd_base = 0; + initrd_size = 0; + cmdline_base = TARGET_PAGE_ALIGN(kernel_base + kernel_size + KERNEL_GAP); + } + ppc_boot_device = 'm'; + } else { + kernel_base = 0; + kernel_size = 0; + initrd_base = 0; + initrd_size = 0; + ppc_boot_device = '\0'; + /* We consider that NewWorld PowerMac never have any floppy drive + * For now, OHW cannot boot from the network. + */ + for (i = 0; boot_device[i] != '\0'; i++) { + if (boot_device[i] >= 'c' && boot_device[i] <= 'f') { + ppc_boot_device = boot_device[i]; + break; + } + } + if (ppc_boot_device == '\0') { + error_report("No valid boot device for Mac99 machine"); + exit(1); + } + } + + /* UniN init */ + dev = qdev_new(TYPE_UNI_NORTH); + s = SYS_BUS_DEVICE(dev); + sysbus_realize_and_unref(s, &error_fatal); + memory_region_add_subregion(get_system_memory(), 0xf8000000, + sysbus_mmio_get_region(s, 0)); + + openpic_irqs = g_new0(IrqLines, smp_cpus); + for (i = 0; i < smp_cpus; i++) { + /* Mac99 IRQ connection between OpenPIC outputs pins + * and PowerPC input pins + */ + switch (PPC_INPUT(env)) { + case PPC_FLAGS_INPUT_6xx: + openpic_irqs[i].irq[OPENPIC_OUTPUT_INT] = + ((qemu_irq *)env->irq_inputs)[PPC6xx_INPUT_INT]; + openpic_irqs[i].irq[OPENPIC_OUTPUT_CINT] = + ((qemu_irq *)env->irq_inputs)[PPC6xx_INPUT_INT]; + openpic_irqs[i].irq[OPENPIC_OUTPUT_MCK] = + ((qemu_irq *)env->irq_inputs)[PPC6xx_INPUT_MCP]; + /* Not connected ? */ + openpic_irqs[i].irq[OPENPIC_OUTPUT_DEBUG] = NULL; + /* Check this */ + openpic_irqs[i].irq[OPENPIC_OUTPUT_RESET] = + ((qemu_irq *)env->irq_inputs)[PPC6xx_INPUT_HRESET]; + break; +#if defined(TARGET_PPC64) + case PPC_FLAGS_INPUT_970: + openpic_irqs[i].irq[OPENPIC_OUTPUT_INT] = + ((qemu_irq *)env->irq_inputs)[PPC970_INPUT_INT]; + openpic_irqs[i].irq[OPENPIC_OUTPUT_CINT] = + ((qemu_irq *)env->irq_inputs)[PPC970_INPUT_INT]; + openpic_irqs[i].irq[OPENPIC_OUTPUT_MCK] = + ((qemu_irq *)env->irq_inputs)[PPC970_INPUT_MCP]; + /* Not connected ? */ + openpic_irqs[i].irq[OPENPIC_OUTPUT_DEBUG] = NULL; + /* Check this */ + openpic_irqs[i].irq[OPENPIC_OUTPUT_RESET] = + ((qemu_irq *)env->irq_inputs)[PPC970_INPUT_HRESET]; + break; +#endif /* defined(TARGET_PPC64) */ + default: + error_report("Bus model not supported on mac99 machine"); + exit(1); + } + } + + if (PPC_INPUT(env) == PPC_FLAGS_INPUT_970) { + /* 970 gets a U3 bus */ + /* Uninorth AGP bus */ + dev = qdev_new(TYPE_U3_AGP_HOST_BRIDGE); + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + uninorth_pci = U3_AGP_HOST_BRIDGE(dev); + s = SYS_BUS_DEVICE(dev); + /* PCI hole */ + memory_region_add_subregion(get_system_memory(), 0x80000000ULL, + sysbus_mmio_get_region(s, 2)); + /* Register 8 MB of ISA IO space */ + memory_region_add_subregion(get_system_memory(), 0xf2000000, + sysbus_mmio_get_region(s, 3)); + sysbus_mmio_map(s, 0, 0xf0800000); + sysbus_mmio_map(s, 1, 0xf0c00000); + + machine_arch = ARCH_MAC99_U3; + } else { + /* Use values found on a real PowerMac */ + /* Uninorth AGP bus */ + uninorth_agp_dev = qdev_new(TYPE_UNI_NORTH_AGP_HOST_BRIDGE); + s = SYS_BUS_DEVICE(uninorth_agp_dev); + sysbus_realize_and_unref(s, &error_fatal); + sysbus_mmio_map(s, 0, 0xf0800000); + sysbus_mmio_map(s, 1, 0xf0c00000); + + /* Uninorth internal bus */ + uninorth_internal_dev = qdev_new( + TYPE_UNI_NORTH_INTERNAL_PCI_HOST_BRIDGE); + s = SYS_BUS_DEVICE(uninorth_internal_dev); + sysbus_realize_and_unref(s, &error_fatal); + sysbus_mmio_map(s, 0, 0xf4800000); + sysbus_mmio_map(s, 1, 0xf4c00000); + + /* Uninorth main bus */ + dev = qdev_new(TYPE_UNI_NORTH_PCI_HOST_BRIDGE); + qdev_prop_set_uint32(dev, "ofw-addr", 0xf2000000); + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + uninorth_pci = UNI_NORTH_PCI_HOST_BRIDGE(dev); + s = SYS_BUS_DEVICE(dev); + /* PCI hole */ + memory_region_add_subregion(get_system_memory(), 0x80000000ULL, + sysbus_mmio_get_region(s, 2)); + /* Register 8 MB of ISA IO space */ + memory_region_add_subregion(get_system_memory(), 0xf2000000, + sysbus_mmio_get_region(s, 3)); + sysbus_mmio_map(s, 0, 0xf2800000); + sysbus_mmio_map(s, 1, 0xf2c00000); + + machine_arch = ARCH_MAC99; + } + + machine->usb |= defaults_enabled() && !machine->usb_disabled; + has_pmu = (core99_machine->via_config != CORE99_VIA_CONFIG_CUDA); + has_adb = (core99_machine->via_config == CORE99_VIA_CONFIG_CUDA || + core99_machine->via_config == CORE99_VIA_CONFIG_PMU_ADB); + + /* Timebase Frequency */ + if (kvm_enabled()) { + tbfreq = kvmppc_get_tbfreq(); + } else { + tbfreq = TBFREQ; + } + + /* init basic PC hardware */ + pci_bus = PCI_HOST_BRIDGE(uninorth_pci)->bus; + + /* MacIO */ + macio = pci_new(-1, TYPE_NEWWORLD_MACIO); + dev = DEVICE(macio); + qdev_prop_set_uint64(dev, "frequency", tbfreq); + qdev_prop_set_bit(dev, "has-pmu", has_pmu); + qdev_prop_set_bit(dev, "has-adb", has_adb); + + escc = ESCC(object_resolve_path_component(OBJECT(macio), "escc")); + qdev_prop_set_chr(DEVICE(escc), "chrA", serial_hd(0)); + qdev_prop_set_chr(DEVICE(escc), "chrB", serial_hd(1)); + + pci_realize_and_unref(macio, pci_bus, &error_fatal); + + pic_dev = DEVICE(object_resolve_path_component(OBJECT(macio), "pic")); + for (i = 0; i < 4; i++) { + qdev_connect_gpio_out(DEVICE(uninorth_pci), i, + qdev_get_gpio_in(pic_dev, 0x1b + i)); + } + + /* TODO: additional PCI buses only wired up for 32-bit machines */ + if (PPC_INPUT(env) != PPC_FLAGS_INPUT_970) { + /* Uninorth AGP bus */ + for (i = 0; i < 4; i++) { + qdev_connect_gpio_out(uninorth_agp_dev, i, + qdev_get_gpio_in(pic_dev, 0x1b + i)); + } + + /* Uninorth internal bus */ + for (i = 0; i < 4; i++) { + qdev_connect_gpio_out(uninorth_internal_dev, i, + qdev_get_gpio_in(pic_dev, 0x1b + i)); + } + } + + /* OpenPIC */ + s = SYS_BUS_DEVICE(pic_dev); + k = 0; + for (i = 0; i < smp_cpus; i++) { + for (j = 0; j < OPENPIC_OUTPUT_NB; j++) { + sysbus_connect_irq(s, k++, openpic_irqs[i].irq[j]); + } + } + g_free(openpic_irqs); + + /* We only emulate 2 out of 3 IDE controllers for now */ + ide_drive_get(hd, ARRAY_SIZE(hd)); + + macio_ide = MACIO_IDE(object_resolve_path_component(OBJECT(macio), + "ide[0]")); + macio_ide_init_drives(macio_ide, hd); + + macio_ide = MACIO_IDE(object_resolve_path_component(OBJECT(macio), + "ide[1]")); + macio_ide_init_drives(macio_ide, &hd[MAX_IDE_DEVS]); + + if (has_adb) { + if (has_pmu) { + dev = DEVICE(object_resolve_path_component(OBJECT(macio), "pmu")); + } else { + dev = DEVICE(object_resolve_path_component(OBJECT(macio), "cuda")); + } + + adb_bus = qdev_get_child_bus(dev, "adb.0"); + dev = qdev_new(TYPE_ADB_KEYBOARD); + qdev_realize_and_unref(dev, adb_bus, &error_fatal); + + dev = qdev_new(TYPE_ADB_MOUSE); + qdev_realize_and_unref(dev, adb_bus, &error_fatal); + } + + if (machine->usb) { + pci_create_simple(pci_bus, -1, "pci-ohci"); + + /* U3 needs to use USB for input because Linux doesn't support via-cuda + on PPC64 */ + if (!has_adb || machine_arch == ARCH_MAC99_U3) { + USBBus *usb_bus = usb_bus_find(-1); + + usb_create_simple(usb_bus, "usb-kbd"); + usb_create_simple(usb_bus, "usb-mouse"); + } + } + + pci_vga_init(pci_bus); + + if (graphic_depth != 15 && graphic_depth != 32 && graphic_depth != 8) { + graphic_depth = 15; + } + + for (i = 0; i < nb_nics; i++) { + pci_nic_init_nofail(&nd_table[i], pci_bus, "sungem", NULL); + } + + /* The NewWorld NVRAM is not located in the MacIO device */ + if (kvm_enabled() && qemu_real_host_page_size > 4096) { + /* We can't combine read-write and read-only in a single page, so + move the NVRAM out of ROM again for KVM */ + nvram_addr = 0xFFE00000; + } + dev = qdev_new(TYPE_MACIO_NVRAM); + qdev_prop_set_uint32(dev, "size", 0x2000); + qdev_prop_set_uint32(dev, "it_shift", 1); + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, nvram_addr); + nvr = MACIO_NVRAM(dev); + pmac_format_nvram_partition(nvr, 0x2000); + /* No PCI init: the BIOS will do it */ + + dev = qdev_new(TYPE_FW_CFG_MEM); + fw_cfg = FW_CFG(dev); + qdev_prop_set_uint32(dev, "data_width", 1); + qdev_prop_set_bit(dev, "dma_enabled", false); + object_property_add_child(OBJECT(qdev_get_machine()), TYPE_FW_CFG, + OBJECT(fw_cfg)); + s = SYS_BUS_DEVICE(dev); + sysbus_realize_and_unref(s, &error_fatal); + sysbus_mmio_map(s, 0, CFG_ADDR); + sysbus_mmio_map(s, 1, CFG_ADDR + 2); + + fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, (uint16_t)smp_cpus); + fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)machine->smp.max_cpus); + fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size); + fw_cfg_add_i16(fw_cfg, FW_CFG_MACHINE_ID, machine_arch); + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, kernel_base); + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size); + if (kernel_cmdline) { + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_CMDLINE, cmdline_base); + pstrcpy_targphys("cmdline", cmdline_base, TARGET_PAGE_SIZE, kernel_cmdline); + } else { + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_CMDLINE, 0); + } + fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_base); + fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); + fw_cfg_add_i16(fw_cfg, FW_CFG_BOOT_DEVICE, ppc_boot_device); + + fw_cfg_add_i16(fw_cfg, FW_CFG_PPC_WIDTH, graphic_width); + fw_cfg_add_i16(fw_cfg, FW_CFG_PPC_HEIGHT, graphic_height); + fw_cfg_add_i16(fw_cfg, FW_CFG_PPC_DEPTH, graphic_depth); + + fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_VIACONFIG, core99_machine->via_config); + + fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_IS_KVM, kvm_enabled()); + if (kvm_enabled()) { + uint8_t *hypercall; + + hypercall = g_malloc(16); + kvmppc_get_hypercall(env, hypercall, 16); + fw_cfg_add_bytes(fw_cfg, FW_CFG_PPC_KVM_HC, hypercall, 16); + fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_KVM_PID, getpid()); + } + fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, tbfreq); + /* Mac OS X requires a "known good" clock-frequency value; pass it one. */ + fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_CLOCKFREQ, CLOCKFREQ); + fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_BUSFREQ, BUSFREQ); + fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_NVRAM_ADDR, nvram_addr); + + /* MacOS NDRV VGA driver */ + filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, NDRV_VGA_FILENAME); + if (filename) { + gchar *ndrv_file; + gsize ndrv_size; + + if (g_file_get_contents(filename, &ndrv_file, &ndrv_size, NULL)) { + fw_cfg_add_file(fw_cfg, "ndrv/qemu_vga.ndrv", ndrv_file, ndrv_size); + } + g_free(filename); + } + + qemu_register_boot_set(fw_cfg_boot_set, fw_cfg); +} + +/* + * Implementation of an interface to adjust firmware path + * for the bootindex property handling. + */ +static char *core99_fw_dev_path(FWPathProvider *p, BusState *bus, + DeviceState *dev) +{ + PCIDevice *pci; + MACIOIDEState *macio_ide; + + if (!strcmp(object_get_typename(OBJECT(dev)), "macio-newworld")) { + pci = PCI_DEVICE(dev); + return g_strdup_printf("mac-io@%x", PCI_SLOT(pci->devfn)); + } + + if (!strcmp(object_get_typename(OBJECT(dev)), "macio-ide")) { + macio_ide = MACIO_IDE(dev); + return g_strdup_printf("ata-3@%x", macio_ide->addr); + } + + if (!strcmp(object_get_typename(OBJECT(dev)), "ide-hd")) { + return g_strdup("disk"); + } + + if (!strcmp(object_get_typename(OBJECT(dev)), "ide-cd")) { + return g_strdup("cdrom"); + } + + if (!strcmp(object_get_typename(OBJECT(dev)), "virtio-blk-device")) { + return g_strdup("disk"); + } + + return NULL; +} +static int core99_kvm_type(MachineState *machine, const char *arg) +{ + /* Always force PR KVM */ + return 2; +} + +static void core99_machine_class_init(ObjectClass *oc, void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + FWPathProviderClass *fwc = FW_PATH_PROVIDER_CLASS(oc); + + mc->desc = "Mac99 based PowerMAC"; + mc->init = ppc_core99_init; + mc->block_default_type = IF_IDE; + mc->max_cpus = MAX_CPUS; + mc->default_boot_order = "cd"; + mc->default_display = "std"; + mc->kvm_type = core99_kvm_type; +#ifdef TARGET_PPC64 + mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("970fx_v3.1"); +#else + mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("7400_v2.9"); +#endif + mc->default_ram_id = "ppc_core99.ram"; + mc->ignore_boot_device_suffixes = true; + fwc->get_dev_path = core99_fw_dev_path; +} + +static char *core99_get_via_config(Object *obj, Error **errp) +{ + Core99MachineState *cms = CORE99_MACHINE(obj); + + switch (cms->via_config) { + default: + case CORE99_VIA_CONFIG_CUDA: + return g_strdup("cuda"); + + case CORE99_VIA_CONFIG_PMU: + return g_strdup("pmu"); + + case CORE99_VIA_CONFIG_PMU_ADB: + return g_strdup("pmu-adb"); + } +} + +static void core99_set_via_config(Object *obj, const char *value, Error **errp) +{ + Core99MachineState *cms = CORE99_MACHINE(obj); + + if (!strcmp(value, "cuda")) { + cms->via_config = CORE99_VIA_CONFIG_CUDA; + } else if (!strcmp(value, "pmu")) { + cms->via_config = CORE99_VIA_CONFIG_PMU; + } else if (!strcmp(value, "pmu-adb")) { + cms->via_config = CORE99_VIA_CONFIG_PMU_ADB; + } else { + error_setg(errp, "Invalid via value"); + error_append_hint(errp, "Valid values are cuda, pmu, pmu-adb.\n"); + } +} + +static void core99_instance_init(Object *obj) +{ + Core99MachineState *cms = CORE99_MACHINE(obj); + + /* Default via_config is CORE99_VIA_CONFIG_CUDA */ + cms->via_config = CORE99_VIA_CONFIG_CUDA; + object_property_add_str(obj, "via", core99_get_via_config, + core99_set_via_config); + object_property_set_description(obj, "via", + "Set VIA configuration. " + "Valid values are cuda, pmu and pmu-adb"); + + return; +} + +static const TypeInfo core99_machine_info = { + .name = MACHINE_TYPE_NAME("mac99"), + .parent = TYPE_MACHINE, + .class_init = core99_machine_class_init, + .instance_init = core99_instance_init, + .instance_size = sizeof(Core99MachineState), + .interfaces = (InterfaceInfo[]) { + { TYPE_FW_PATH_PROVIDER }, + { } + }, +}; + +static void mac_machine_register_types(void) +{ + type_register_static(&core99_machine_info); +} + +type_init(mac_machine_register_types) diff --git a/hw/ppc/mac_oldworld.c b/hw/ppc/mac_oldworld.c new file mode 100644 index 000000000..de2be960e --- /dev/null +++ b/hw/ppc/mac_oldworld.c @@ -0,0 +1,455 @@ + +/* + * QEMU OldWorld PowerMac (currently ~G3 Beige) hardware System Emulator + * + * Copyright (c) 2004-2007 Fabrice Bellard + * Copyright (c) 2007 Jocelyn Mayer + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "qemu/datadir.h" +#include "qemu/units.h" +#include "qapi/error.h" +#include "hw/ppc/ppc.h" +#include "hw/qdev-properties.h" +#include "mac.h" +#include "hw/input/adb.h" +#include "sysemu/sysemu.h" +#include "net/net.h" +#include "hw/isa/isa.h" +#include "hw/pci/pci.h" +#include "hw/pci/pci_host.h" +#include "hw/nvram/fw_cfg.h" +#include "hw/char/escc.h" +#include "hw/misc/macio/macio.h" +#include "hw/loader.h" +#include "hw/fw-path-provider.h" +#include "elf.h" +#include "qemu/error-report.h" +#include "sysemu/kvm.h" +#include "sysemu/reset.h" +#include "kvm_ppc.h" + +#define MAX_IDE_BUS 2 +#define CFG_ADDR 0xf0000510 +#define TBFREQ 16600000UL +#define CLOCKFREQ 266000000UL +#define BUSFREQ 66000000UL + +#define NDRV_VGA_FILENAME "qemu_vga.ndrv" + +#define GRACKLE_BASE 0xfec00000 +#define PROM_BASE 0xffc00000 +#define PROM_SIZE (4 * MiB) + +static void fw_cfg_boot_set(void *opaque, const char *boot_device, + Error **errp) +{ + fw_cfg_modify_i16(opaque, FW_CFG_BOOT_DEVICE, boot_device[0]); +} + +static uint64_t translate_kernel_address(void *opaque, uint64_t addr) +{ + return (addr & 0x0fffffff) + KERNEL_LOAD_ADDR; +} + +static void ppc_heathrow_reset(void *opaque) +{ + PowerPCCPU *cpu = opaque; + + cpu_reset(CPU(cpu)); +} + +static void ppc_heathrow_init(MachineState *machine) +{ + ram_addr_t ram_size = machine->ram_size; + const char *bios_name = machine->firmware ?: PROM_FILENAME; + const char *boot_device = machine->boot_order; + PowerPCCPU *cpu = NULL; + CPUPPCState *env = NULL; + char *filename; + int i; + MemoryRegion *bios = g_new(MemoryRegion, 1); + uint32_t kernel_base, initrd_base, cmdline_base = 0; + int32_t kernel_size, initrd_size; + PCIBus *pci_bus; + PCIDevice *macio; + MACIOIDEState *macio_ide; + ESCCState *escc; + SysBusDevice *s; + DeviceState *dev, *pic_dev, *grackle_dev; + BusState *adb_bus; + uint64_t bios_addr; + int bios_size; + unsigned int smp_cpus = machine->smp.cpus; + uint16_t ppc_boot_device; + DriveInfo *hd[MAX_IDE_BUS * MAX_IDE_DEVS]; + void *fw_cfg; + uint64_t tbfreq; + + /* init CPUs */ + for (i = 0; i < smp_cpus; i++) { + cpu = POWERPC_CPU(cpu_create(machine->cpu_type)); + env = &cpu->env; + + /* Set time-base frequency to 16.6 Mhz */ + cpu_ppc_tb_init(env, TBFREQ); + qemu_register_reset(ppc_heathrow_reset, cpu); + } + + /* allocate RAM */ + if (ram_size > 2047 * MiB) { + error_report("Too much memory for this machine: %" PRId64 " MB, " + "maximum 2047 MB", ram_size / MiB); + exit(1); + } + + memory_region_add_subregion(get_system_memory(), 0, machine->ram); + + /* allocate and load firmware ROM */ + memory_region_init_rom(bios, NULL, "ppc_heathrow.bios", PROM_SIZE, + &error_fatal); + memory_region_add_subregion(get_system_memory(), PROM_BASE, bios); + + filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name); + if (filename) { + /* Load OpenBIOS (ELF) */ + bios_size = load_elf(filename, NULL, NULL, NULL, NULL, &bios_addr, + NULL, NULL, 1, PPC_ELF_MACHINE, 0, 0); + /* Unfortunately, load_elf sign-extends reading elf32 */ + bios_addr = (uint32_t)bios_addr; + + if (bios_size <= 0) { + /* or if could not load ELF try loading a binary ROM image */ + bios_size = load_image_targphys(filename, PROM_BASE, PROM_SIZE); + bios_addr = PROM_BASE; + } + g_free(filename); + } else { + bios_size = -1; + } + if (bios_size < 0 || bios_addr - PROM_BASE + bios_size > PROM_SIZE) { + error_report("could not load PowerPC bios '%s'", bios_name); + exit(1); + } + + if (machine->kernel_filename) { + int bswap_needed; + +#ifdef BSWAP_NEEDED + bswap_needed = 1; +#else + bswap_needed = 0; +#endif + kernel_base = KERNEL_LOAD_ADDR; + kernel_size = load_elf(machine->kernel_filename, NULL, + translate_kernel_address, NULL, NULL, NULL, + NULL, NULL, 1, PPC_ELF_MACHINE, 0, 0); + if (kernel_size < 0) + kernel_size = load_aout(machine->kernel_filename, kernel_base, + ram_size - kernel_base, bswap_needed, + TARGET_PAGE_SIZE); + if (kernel_size < 0) + kernel_size = load_image_targphys(machine->kernel_filename, + kernel_base, + ram_size - kernel_base); + if (kernel_size < 0) { + error_report("could not load kernel '%s'", + machine->kernel_filename); + exit(1); + } + /* load initrd */ + if (machine->initrd_filename) { + initrd_base = TARGET_PAGE_ALIGN(kernel_base + kernel_size + + KERNEL_GAP); + initrd_size = load_image_targphys(machine->initrd_filename, + initrd_base, + ram_size - initrd_base); + if (initrd_size < 0) { + error_report("could not load initial ram disk '%s'", + machine->initrd_filename); + exit(1); + } + cmdline_base = TARGET_PAGE_ALIGN(initrd_base + initrd_size); + } else { + initrd_base = 0; + initrd_size = 0; + cmdline_base = TARGET_PAGE_ALIGN(kernel_base + kernel_size + KERNEL_GAP); + } + ppc_boot_device = 'm'; + } else { + kernel_base = 0; + kernel_size = 0; + initrd_base = 0; + initrd_size = 0; + ppc_boot_device = '\0'; + for (i = 0; boot_device[i] != '\0'; i++) { + /* TOFIX: for now, the second IDE channel is not properly + * used by OHW. The Mac floppy disk are not emulated. + * For now, OHW cannot boot from the network. + */ +#if 0 + if (boot_device[i] >= 'a' && boot_device[i] <= 'f') { + ppc_boot_device = boot_device[i]; + break; + } +#else + if (boot_device[i] >= 'c' && boot_device[i] <= 'd') { + ppc_boot_device = boot_device[i]; + break; + } +#endif + } + if (ppc_boot_device == '\0') { + error_report("No valid boot device for G3 Beige machine"); + exit(1); + } + } + + /* Timebase Frequency */ + if (kvm_enabled()) { + tbfreq = kvmppc_get_tbfreq(); + } else { + tbfreq = TBFREQ; + } + + /* Grackle PCI host bridge */ + grackle_dev = qdev_new(TYPE_GRACKLE_PCI_HOST_BRIDGE); + qdev_prop_set_uint32(grackle_dev, "ofw-addr", 0x80000000); + s = SYS_BUS_DEVICE(grackle_dev); + sysbus_realize_and_unref(s, &error_fatal); + + sysbus_mmio_map(s, 0, GRACKLE_BASE); + sysbus_mmio_map(s, 1, GRACKLE_BASE + 0x200000); + /* PCI hole */ + memory_region_add_subregion(get_system_memory(), 0x80000000ULL, + sysbus_mmio_get_region(s, 2)); + /* Register 2 MB of ISA IO space */ + memory_region_add_subregion(get_system_memory(), 0xfe000000, + sysbus_mmio_get_region(s, 3)); + + pci_bus = PCI_HOST_BRIDGE(grackle_dev)->bus; + + /* MacIO */ + macio = pci_new(PCI_DEVFN(16, 0), TYPE_OLDWORLD_MACIO); + dev = DEVICE(macio); + qdev_prop_set_uint64(dev, "frequency", tbfreq); + + escc = ESCC(object_resolve_path_component(OBJECT(macio), "escc")); + qdev_prop_set_chr(DEVICE(escc), "chrA", serial_hd(0)); + qdev_prop_set_chr(DEVICE(escc), "chrB", serial_hd(1)); + + pci_realize_and_unref(macio, pci_bus, &error_fatal); + + pic_dev = DEVICE(object_resolve_path_component(OBJECT(macio), "pic")); + for (i = 0; i < 4; i++) { + qdev_connect_gpio_out(grackle_dev, i, + qdev_get_gpio_in(pic_dev, 0x15 + i)); + } + + /* Connect the heathrow PIC outputs to the 6xx bus */ + for (i = 0; i < smp_cpus; i++) { + switch (PPC_INPUT(env)) { + case PPC_FLAGS_INPUT_6xx: + /* XXX: we register only 1 output pin for heathrow PIC */ + qdev_connect_gpio_out(pic_dev, 0, + ((qemu_irq *)env->irq_inputs)[PPC6xx_INPUT_INT]); + break; + default: + error_report("Bus model not supported on OldWorld Mac machine"); + exit(1); + } + } + + pci_vga_init(pci_bus); + + for (i = 0; i < nb_nics; i++) { + pci_nic_init_nofail(&nd_table[i], pci_bus, "ne2k_pci", NULL); + } + + /* MacIO IDE */ + ide_drive_get(hd, ARRAY_SIZE(hd)); + macio_ide = MACIO_IDE(object_resolve_path_component(OBJECT(macio), + "ide[0]")); + macio_ide_init_drives(macio_ide, hd); + + macio_ide = MACIO_IDE(object_resolve_path_component(OBJECT(macio), + "ide[1]")); + macio_ide_init_drives(macio_ide, &hd[MAX_IDE_DEVS]); + + /* MacIO CUDA/ADB */ + dev = DEVICE(object_resolve_path_component(OBJECT(macio), "cuda")); + adb_bus = qdev_get_child_bus(dev, "adb.0"); + dev = qdev_new(TYPE_ADB_KEYBOARD); + qdev_realize_and_unref(dev, adb_bus, &error_fatal); + dev = qdev_new(TYPE_ADB_MOUSE); + qdev_realize_and_unref(dev, adb_bus, &error_fatal); + + if (machine_usb(machine)) { + pci_create_simple(pci_bus, -1, "pci-ohci"); + } + + if (graphic_depth != 15 && graphic_depth != 32 && graphic_depth != 8) + graphic_depth = 15; + + /* No PCI init: the BIOS will do it */ + + dev = qdev_new(TYPE_FW_CFG_MEM); + fw_cfg = FW_CFG(dev); + qdev_prop_set_uint32(dev, "data_width", 1); + qdev_prop_set_bit(dev, "dma_enabled", false); + object_property_add_child(OBJECT(qdev_get_machine()), TYPE_FW_CFG, + OBJECT(fw_cfg)); + s = SYS_BUS_DEVICE(dev); + sysbus_realize_and_unref(s, &error_fatal); + sysbus_mmio_map(s, 0, CFG_ADDR); + sysbus_mmio_map(s, 1, CFG_ADDR + 2); + + fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, (uint16_t)smp_cpus); + fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)machine->smp.max_cpus); + fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size); + fw_cfg_add_i16(fw_cfg, FW_CFG_MACHINE_ID, ARCH_HEATHROW); + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, kernel_base); + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size); + if (machine->kernel_cmdline) { + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_CMDLINE, cmdline_base); + pstrcpy_targphys("cmdline", cmdline_base, TARGET_PAGE_SIZE, + machine->kernel_cmdline); + } else { + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_CMDLINE, 0); + } + fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_base); + fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); + fw_cfg_add_i16(fw_cfg, FW_CFG_BOOT_DEVICE, ppc_boot_device); + + fw_cfg_add_i16(fw_cfg, FW_CFG_PPC_WIDTH, graphic_width); + fw_cfg_add_i16(fw_cfg, FW_CFG_PPC_HEIGHT, graphic_height); + fw_cfg_add_i16(fw_cfg, FW_CFG_PPC_DEPTH, graphic_depth); + + fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_IS_KVM, kvm_enabled()); + if (kvm_enabled()) { + uint8_t *hypercall; + + hypercall = g_malloc(16); + kvmppc_get_hypercall(env, hypercall, 16); + fw_cfg_add_bytes(fw_cfg, FW_CFG_PPC_KVM_HC, hypercall, 16); + fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_KVM_PID, getpid()); + } + fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, tbfreq); + /* Mac OS X requires a "known good" clock-frequency value; pass it one. */ + fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_CLOCKFREQ, CLOCKFREQ); + fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_BUSFREQ, BUSFREQ); + + /* MacOS NDRV VGA driver */ + filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, NDRV_VGA_FILENAME); + if (filename) { + gchar *ndrv_file; + gsize ndrv_size; + + if (g_file_get_contents(filename, &ndrv_file, &ndrv_size, NULL)) { + fw_cfg_add_file(fw_cfg, "ndrv/qemu_vga.ndrv", ndrv_file, ndrv_size); + } + g_free(filename); + } + + qemu_register_boot_set(fw_cfg_boot_set, fw_cfg); +} + +/* + * Implementation of an interface to adjust firmware path + * for the bootindex property handling. + */ +static char *heathrow_fw_dev_path(FWPathProvider *p, BusState *bus, + DeviceState *dev) +{ + PCIDevice *pci; + MACIOIDEState *macio_ide; + + if (!strcmp(object_get_typename(OBJECT(dev)), "macio-oldworld")) { + pci = PCI_DEVICE(dev); + return g_strdup_printf("mac-io@%x", PCI_SLOT(pci->devfn)); + } + + if (!strcmp(object_get_typename(OBJECT(dev)), "macio-ide")) { + macio_ide = MACIO_IDE(dev); + return g_strdup_printf("ata-3@%x", macio_ide->addr); + } + + if (!strcmp(object_get_typename(OBJECT(dev)), "ide-hd")) { + return g_strdup("disk"); + } + + if (!strcmp(object_get_typename(OBJECT(dev)), "ide-cd")) { + return g_strdup("cdrom"); + } + + if (!strcmp(object_get_typename(OBJECT(dev)), "virtio-blk-device")) { + return g_strdup("disk"); + } + + return NULL; +} + +static int heathrow_kvm_type(MachineState *machine, const char *arg) +{ + /* Always force PR KVM */ + return 2; +} + +static void heathrow_class_init(ObjectClass *oc, void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + FWPathProviderClass *fwc = FW_PATH_PROVIDER_CLASS(oc); + + mc->desc = "Heathrow based PowerMAC"; + mc->init = ppc_heathrow_init; + mc->block_default_type = IF_IDE; + mc->max_cpus = MAX_CPUS; +#ifndef TARGET_PPC64 + mc->is_default = true; +#endif + /* TOFIX "cad" when Mac floppy is implemented */ + mc->default_boot_order = "cd"; + mc->kvm_type = heathrow_kvm_type; + mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("750_v3.1"); + mc->default_display = "std"; + mc->ignore_boot_device_suffixes = true; + mc->default_ram_id = "ppc_heathrow.ram"; + fwc->get_dev_path = heathrow_fw_dev_path; +} + +static const TypeInfo ppc_heathrow_machine_info = { + .name = MACHINE_TYPE_NAME("g3beige"), + .parent = TYPE_MACHINE, + .class_init = heathrow_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_FW_PATH_PROVIDER }, + { } + }, +}; + +static void ppc_heathrow_register_types(void) +{ + type_register_static(&ppc_heathrow_machine_info); +} + +type_init(ppc_heathrow_register_types); diff --git a/hw/ppc/meson.build b/hw/ppc/meson.build new file mode 100644 index 000000000..aa4c8e6a2 --- /dev/null +++ b/hw/ppc/meson.build @@ -0,0 +1,90 @@ +ppc_ss = ss.source_set() +ppc_ss.add(files( + 'ppc.c', + 'ppc_booke.c', +)) +ppc_ss.add(when: 'CONFIG_FDT_PPC', if_true: [files( + 'fdt.c', +), fdt]) +ppc_ss.add(when: 'CONFIG_FW_CFG_PPC', if_true: files('fw_cfg.c')) + +# IBM pSeries (sPAPR) +ppc_ss.add(when: 'CONFIG_PSERIES', if_true: files( + 'spapr.c', + 'spapr_caps.c', + 'spapr_vio.c', + 'spapr_events.c', + 'spapr_hcall.c', + 'spapr_iommu.c', + 'spapr_rtas.c', + 'spapr_pci.c', + 'spapr_rtc.c', + 'spapr_drc.c', + 'spapr_cpu_core.c', + 'spapr_ovec.c', + 'spapr_irq.c', + 'spapr_tpm_proxy.c', + 'spapr_nvdimm.c', + 'spapr_rtas_ddw.c', + 'spapr_numa.c', + 'pef.c', +)) +ppc_ss.add(when: ['CONFIG_PSERIES', 'CONFIG_TCG'], if_true: files( + 'spapr_softmmu.c', +)) +ppc_ss.add(when: 'CONFIG_SPAPR_RNG', if_true: files('spapr_rng.c')) +ppc_ss.add(when: ['CONFIG_PSERIES', 'CONFIG_LINUX'], if_true: files( + 'spapr_pci_vfio.c', + 'spapr_pci_nvlink2.c' +)) + +# IBM PowerNV +ppc_ss.add(when: 'CONFIG_POWERNV', if_true: files( + 'pnv.c', + 'pnv_xscom.c', + 'pnv_core.c', + 'pnv_lpc.c', + 'pnv_psi.c', + 'pnv_occ.c', + 'pnv_bmc.c', + 'pnv_homer.c', + 'pnv_pnor.c', +)) +# PowerPC 4xx boards +ppc_ss.add(when: 'CONFIG_PPC405', if_true: files( + 'ppc405_boards.c', + 'ppc405_uc.c')) +ppc_ss.add(when: 'CONFIG_PPC440', if_true: files( + 'ppc440_bamboo.c', + 'ppc440_pcix.c', 'ppc440_uc.c')) +ppc_ss.add(when: 'CONFIG_PPC4XX', if_true: files( + 'ppc4xx_pci.c', + 'ppc4xx_devs.c')) +ppc_ss.add(when: 'CONFIG_SAM460EX', if_true: files('sam460ex.c')) +# PReP +ppc_ss.add(when: 'CONFIG_PREP', if_true: files('prep.c')) +ppc_ss.add(when: 'CONFIG_PREP', if_true: files('prep_systemio.c')) +ppc_ss.add(when: 'CONFIG_RS6000_MC', if_true: files('rs6000_mc.c')) +# OldWorld PowerMac +ppc_ss.add(when: 'CONFIG_MAC_OLDWORLD', if_true: files('mac_oldworld.c')) +# NewWorld PowerMac +ppc_ss.add(when: 'CONFIG_MAC_NEWWORLD', if_true: files('mac_newworld.c')) +# e500 +ppc_ss.add(when: 'CONFIG_E500', if_true: files( + 'e500.c', + 'mpc8544ds.c', + 'e500plat.c' +)) +ppc_ss.add(when: 'CONFIG_E500', if_true: files( + 'mpc8544_guts.c', + 'ppce500_spin.c' +)) +# PowerPC 440 Xilinx ML507 reference board. +ppc_ss.add(when: 'CONFIG_VIRTEX', if_true: files('virtex_ml507.c')) +# Pegasos2 +ppc_ss.add(when: 'CONFIG_PEGASOS2', if_true: files('pegasos2.c')) + +ppc_ss.add(when: 'CONFIG_VOF', if_true: files('vof.c')) +ppc_ss.add(when: ['CONFIG_VOF', 'CONFIG_PSERIES'], if_true: files('spapr_vof.c')) + +hw_arch += {'ppc': ppc_ss} diff --git a/hw/ppc/mpc8544_guts.c b/hw/ppc/mpc8544_guts.c new file mode 100644 index 000000000..e8d2d51c2 --- /dev/null +++ b/hw/ppc/mpc8544_guts.c @@ -0,0 +1,142 @@ +/* + * QEMU PowerPC MPC8544 global util pseudo-device + * + * Copyright (C) 2011 Freescale Semiconductor, Inc. All rights reserved. + * + * Author: Alexander Graf, <alex@csgraf.de> + * + * This is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * ***************************************************************** + * + * The documentation for this device is noted in the MPC8544 documentation, + * file name "MPC8544ERM.pdf". You can easily find it on the web. + * + */ + +#include "qemu/osdep.h" +#include "qemu/module.h" +#include "sysemu/runstate.h" +#include "cpu.h" +#include "hw/sysbus.h" +#include "qom/object.h" + +#define MPC8544_GUTS_MMIO_SIZE 0x1000 +#define MPC8544_GUTS_RSTCR_RESET 0x02 + +#define MPC8544_GUTS_ADDR_PORPLLSR 0x00 +#define MPC8544_GUTS_ADDR_PORBMSR 0x04 +#define MPC8544_GUTS_ADDR_PORIMPSCR 0x08 +#define MPC8544_GUTS_ADDR_PORDEVSR 0x0C +#define MPC8544_GUTS_ADDR_PORDBGMSR 0x10 +#define MPC8544_GUTS_ADDR_PORDEVSR2 0x14 +#define MPC8544_GUTS_ADDR_GPPORCR 0x20 +#define MPC8544_GUTS_ADDR_GPIOCR 0x30 +#define MPC8544_GUTS_ADDR_GPOUTDR 0x40 +#define MPC8544_GUTS_ADDR_GPINDR 0x50 +#define MPC8544_GUTS_ADDR_PMUXCR 0x60 +#define MPC8544_GUTS_ADDR_DEVDISR 0x70 +#define MPC8544_GUTS_ADDR_POWMGTCSR 0x80 +#define MPC8544_GUTS_ADDR_MCPSUMR 0x90 +#define MPC8544_GUTS_ADDR_RSTRSCR 0x94 +#define MPC8544_GUTS_ADDR_PVR 0xA0 +#define MPC8544_GUTS_ADDR_SVR 0xA4 +#define MPC8544_GUTS_ADDR_RSTCR 0xB0 +#define MPC8544_GUTS_ADDR_IOVSELSR 0xC0 +#define MPC8544_GUTS_ADDR_DDRCSR 0xB20 +#define MPC8544_GUTS_ADDR_DDRCDR 0xB24 +#define MPC8544_GUTS_ADDR_DDRCLKDR 0xB28 +#define MPC8544_GUTS_ADDR_CLKOCR 0xE00 +#define MPC8544_GUTS_ADDR_SRDS1CR1 0xF04 +#define MPC8544_GUTS_ADDR_SRDS2CR1 0xF10 +#define MPC8544_GUTS_ADDR_SRDS2CR3 0xF18 + +#define TYPE_MPC8544_GUTS "mpc8544-guts" +OBJECT_DECLARE_SIMPLE_TYPE(GutsState, MPC8544_GUTS) + +struct GutsState { + /*< private >*/ + SysBusDevice parent_obj; + /*< public >*/ + + MemoryRegion iomem; +}; + + +static uint64_t mpc8544_guts_read(void *opaque, hwaddr addr, + unsigned size) +{ + uint32_t value = 0; + PowerPCCPU *cpu = POWERPC_CPU(current_cpu); + CPUPPCState *env = &cpu->env; + + addr &= MPC8544_GUTS_MMIO_SIZE - 1; + switch (addr) { + case MPC8544_GUTS_ADDR_PVR: + value = env->spr[SPR_PVR]; + break; + case MPC8544_GUTS_ADDR_SVR: + value = env->spr[SPR_E500_SVR]; + break; + default: + fprintf(stderr, "guts: Unknown register read: %x\n", (int)addr); + break; + } + + return value; +} + +static void mpc8544_guts_write(void *opaque, hwaddr addr, + uint64_t value, unsigned size) +{ + addr &= MPC8544_GUTS_MMIO_SIZE - 1; + + switch (addr) { + case MPC8544_GUTS_ADDR_RSTCR: + if (value & MPC8544_GUTS_RSTCR_RESET) { + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); + } + break; + default: + fprintf(stderr, "guts: Unknown register write: %x = %x\n", + (int)addr, (unsigned)value); + break; + } +} + +static const MemoryRegionOps mpc8544_guts_ops = { + .read = mpc8544_guts_read, + .write = mpc8544_guts_write, + .endianness = DEVICE_BIG_ENDIAN, + .valid = { + .min_access_size = 4, + .max_access_size = 4, + }, +}; + +static void mpc8544_guts_initfn(Object *obj) +{ + SysBusDevice *d = SYS_BUS_DEVICE(obj); + GutsState *s = MPC8544_GUTS(obj); + + memory_region_init_io(&s->iomem, OBJECT(s), &mpc8544_guts_ops, s, + "mpc8544.guts", MPC8544_GUTS_MMIO_SIZE); + sysbus_init_mmio(d, &s->iomem); +} + +static const TypeInfo mpc8544_guts_info = { + .name = TYPE_MPC8544_GUTS, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(GutsState), + .instance_init = mpc8544_guts_initfn, +}; + +static void mpc8544_guts_register_types(void) +{ + type_register_static(&mpc8544_guts_info); +} + +type_init(mpc8544_guts_register_types) diff --git a/hw/ppc/mpc8544ds.c b/hw/ppc/mpc8544ds.c new file mode 100644 index 000000000..81177505f --- /dev/null +++ b/hw/ppc/mpc8544ds.c @@ -0,0 +1,74 @@ +/* + * Support for the PPC e500-based mpc8544ds board + * + * Copyright 2012 Freescale Semiconductor, Inc. + * + * This is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include "qemu/osdep.h" +#include "e500.h" +#include "sysemu/device_tree.h" +#include "hw/ppc/openpic.h" +#include "qemu/error-report.h" +#include "cpu.h" + +static void mpc8544ds_fixup_devtree(void *fdt) +{ + const char model[] = "MPC8544DS"; + const char compatible[] = "MPC8544DS\0MPC85xxDS"; + + qemu_fdt_setprop(fdt, "/", "model", model, sizeof(model)); + qemu_fdt_setprop(fdt, "/", "compatible", compatible, + sizeof(compatible)); +} + +static void mpc8544ds_init(MachineState *machine) +{ + if (machine->ram_size > 0xc0000000) { + error_report("The MPC8544DS board only supports up to 3GB of RAM"); + exit(1); + } + + ppce500_init(machine); +} + +static void e500plat_machine_class_init(ObjectClass *oc, void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + PPCE500MachineClass *pmc = PPCE500_MACHINE_CLASS(oc); + + pmc->pci_first_slot = 0x11; + pmc->pci_nr_slots = 2; + pmc->fixup_devtree = mpc8544ds_fixup_devtree; + pmc->mpic_version = OPENPIC_MODEL_FSL_MPIC_20; + pmc->ccsrbar_base = 0xE0000000ULL; + pmc->pci_mmio_base = 0xC0000000ULL; + pmc->pci_mmio_bus_base = 0xC0000000ULL; + pmc->pci_pio_base = 0xE1000000ULL; + pmc->spin_base = 0xEF000000ULL; + + mc->desc = "mpc8544ds"; + mc->init = mpc8544ds_init; + mc->max_cpus = 15; + mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("e500v2_v30"); + mc->default_ram_id = "mpc8544ds.ram"; +} + +#define TYPE_MPC8544DS_MACHINE MACHINE_TYPE_NAME("mpc8544ds") + +static const TypeInfo mpc8544ds_info = { + .name = TYPE_MPC8544DS_MACHINE, + .parent = TYPE_PPCE500_MACHINE, + .class_init = e500plat_machine_class_init, +}; + +static void mpc8544ds_register_types(void) +{ + type_register_static(&mpc8544ds_info); +} + +type_init(mpc8544ds_register_types) diff --git a/hw/ppc/pef.c b/hw/ppc/pef.c new file mode 100644 index 000000000..cc44d5e33 --- /dev/null +++ b/hw/ppc/pef.c @@ -0,0 +1,142 @@ +/* + * PEF (Protected Execution Facility) for POWER support + * + * Copyright Red Hat. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" + +#include "qapi/error.h" +#include "qom/object_interfaces.h" +#include "sysemu/kvm.h" +#include "migration/blocker.h" +#include "exec/confidential-guest-support.h" +#include "hw/ppc/pef.h" + +#define TYPE_PEF_GUEST "pef-guest" +OBJECT_DECLARE_SIMPLE_TYPE(PefGuest, PEF_GUEST) + +typedef struct PefGuest PefGuest; +typedef struct PefGuestClass PefGuestClass; + +struct PefGuestClass { + ConfidentialGuestSupportClass parent_class; +}; + +/** + * PefGuest: + * + * The PefGuest object is used for creating and managing a PEF + * guest. + * + * # $QEMU \ + * -object pef-guest,id=pef0 \ + * -machine ...,confidential-guest-support=pef0 + */ +struct PefGuest { + ConfidentialGuestSupport parent_obj; +}; + +static int kvmppc_svm_init(ConfidentialGuestSupport *cgs, Error **errp) +{ +#ifdef CONFIG_KVM + static Error *pef_mig_blocker; + + if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_SECURE_GUEST)) { + error_setg(errp, + "KVM implementation does not support Secure VMs (is an ultravisor running?)"); + return -1; + } else { + int ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SECURE_GUEST, 0, 1); + + if (ret < 0) { + error_setg(errp, + "Error enabling PEF with KVM"); + return -1; + } + } + + /* add migration blocker */ + error_setg(&pef_mig_blocker, "PEF: Migration is not implemented"); + /* NB: This can fail if --only-migratable is used */ + migrate_add_blocker(pef_mig_blocker, &error_fatal); + + cgs->ready = true; + + return 0; +#else + g_assert_not_reached(); +#endif +} + +/* + * Don't set error if KVM_PPC_SVM_OFF ioctl is invoked on kernels + * that don't support this ioctl. + */ +static int kvmppc_svm_off(Error **errp) +{ +#ifdef CONFIG_KVM + int rc; + + rc = kvm_vm_ioctl(KVM_STATE(current_accel()), KVM_PPC_SVM_OFF); + if (rc && rc != -ENOTTY) { + error_setg_errno(errp, -rc, "KVM_PPC_SVM_OFF ioctl failed"); + return rc; + } + return 0; +#else + g_assert_not_reached(); +#endif +} + +int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) +{ + if (!object_dynamic_cast(OBJECT(cgs), TYPE_PEF_GUEST)) { + return 0; + } + + if (!kvm_enabled()) { + error_setg(errp, "PEF requires KVM"); + return -1; + } + + return kvmppc_svm_init(cgs, errp); +} + +int pef_kvm_reset(ConfidentialGuestSupport *cgs, Error **errp) +{ + if (!object_dynamic_cast(OBJECT(cgs), TYPE_PEF_GUEST)) { + return 0; + } + + /* + * If we don't have KVM we should never have been able to + * initialize PEF, so we should never get this far + */ + assert(kvm_enabled()); + + return kvmppc_svm_off(errp); +} + +OBJECT_DEFINE_TYPE_WITH_INTERFACES(PefGuest, + pef_guest, + PEF_GUEST, + CONFIDENTIAL_GUEST_SUPPORT, + { TYPE_USER_CREATABLE }, + { NULL }) + +static void pef_guest_class_init(ObjectClass *oc, void *data) +{ +} + +static void pef_guest_init(Object *obj) +{ +} + +static void pef_guest_finalize(Object *obj) +{ +} diff --git a/hw/ppc/pegasos2.c b/hw/ppc/pegasos2.c new file mode 100644 index 000000000..298e6b93e --- /dev/null +++ b/hw/ppc/pegasos2.c @@ -0,0 +1,952 @@ +/* + * QEMU PowerPC CHRP (Genesi/bPlan Pegasos II) hardware System Emulator + * + * Copyright (c) 2018-2021 BALATON Zoltan + * + * This work is licensed under the GNU GPL license version 2 or later. + * + */ + +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "qemu/units.h" +#include "qapi/error.h" +#include "hw/hw.h" +#include "hw/ppc/ppc.h" +#include "hw/sysbus.h" +#include "hw/pci/pci_host.h" +#include "hw/irq.h" +#include "hw/pci-host/mv64361.h" +#include "hw/isa/vt82c686.h" +#include "hw/ide/pci.h" +#include "hw/i2c/smbus_eeprom.h" +#include "hw/qdev-properties.h" +#include "sysemu/reset.h" +#include "sysemu/runstate.h" +#include "sysemu/qtest.h" +#include "hw/boards.h" +#include "hw/loader.h" +#include "hw/fw-path-provider.h" +#include "elf.h" +#include "qemu/log.h" +#include "qemu/error-report.h" +#include "sysemu/kvm.h" +#include "kvm_ppc.h" +#include "exec/address-spaces.h" +#include "qom/qom-qobject.h" +#include "qapi/qmp/qdict.h" +#include "trace.h" +#include "qemu/datadir.h" +#include "sysemu/device_tree.h" +#include "hw/ppc/vof.h" + +#include <libfdt.h> + +#define PROM_FILENAME "vof.bin" +#define PROM_ADDR 0xfff00000 +#define PROM_SIZE 0x80000 + +#define KVMPPC_HCALL_BASE 0xf000 +#define KVMPPC_H_RTAS (KVMPPC_HCALL_BASE + 0x0) +#define KVMPPC_H_VOF_CLIENT (KVMPPC_HCALL_BASE + 0x5) + +#define H_SUCCESS 0 +#define H_PRIVILEGE -3 /* Caller not privileged */ +#define H_PARAMETER -4 /* Parameter invalid, out-of-range or conflicting */ + +#define BUS_FREQ_HZ 133333333 + +#define PCI0_CFG_ADDR 0xcf8 +#define PCI0_MEM_BASE 0xc0000000 +#define PCI0_MEM_SIZE 0x20000000 +#define PCI0_IO_BASE 0xf8000000 +#define PCI0_IO_SIZE 0x10000 + +#define PCI1_CFG_ADDR 0xc78 +#define PCI1_MEM_BASE 0x80000000 +#define PCI1_MEM_SIZE 0x40000000 +#define PCI1_IO_BASE 0xfe000000 +#define PCI1_IO_SIZE 0x10000 + +#define TYPE_PEGASOS2_MACHINE MACHINE_TYPE_NAME("pegasos2") +OBJECT_DECLARE_TYPE(Pegasos2MachineState, MachineClass, PEGASOS2_MACHINE) + +struct Pegasos2MachineState { + MachineState parent_obj; + PowerPCCPU *cpu; + DeviceState *mv; + Vof *vof; + void *fdt_blob; + uint64_t kernel_addr; + uint64_t kernel_entry; + uint64_t kernel_size; +}; + +static void *build_fdt(MachineState *machine, int *fdt_size); + +static void pegasos2_cpu_reset(void *opaque) +{ + PowerPCCPU *cpu = opaque; + Pegasos2MachineState *pm = PEGASOS2_MACHINE(current_machine); + + cpu_reset(CPU(cpu)); + cpu->env.spr[SPR_HID1] = 7ULL << 28; + if (pm->vof) { + cpu->env.gpr[1] = 2 * VOF_STACK_SIZE - 0x20; + cpu->env.nip = 0x100; + } +} + +static void pegasos2_init(MachineState *machine) +{ + Pegasos2MachineState *pm = PEGASOS2_MACHINE(machine); + CPUPPCState *env; + MemoryRegion *rom = g_new(MemoryRegion, 1); + PCIBus *pci_bus; + PCIDevice *dev; + I2CBus *i2c_bus; + const char *fwname = machine->firmware ?: PROM_FILENAME; + char *filename; + int sz; + uint8_t *spd_data; + + /* init CPU */ + pm->cpu = POWERPC_CPU(cpu_create(machine->cpu_type)); + env = &pm->cpu->env; + if (PPC_INPUT(env) != PPC_FLAGS_INPUT_6xx) { + error_report("Incompatible CPU, only 6xx bus supported"); + exit(1); + } + + /* Set time-base frequency */ + cpu_ppc_tb_init(env, BUS_FREQ_HZ / 4); + qemu_register_reset(pegasos2_cpu_reset, pm->cpu); + + /* RAM */ + if (machine->ram_size > 2 * GiB) { + error_report("RAM size more than 2 GiB is not supported"); + exit(1); + } + memory_region_add_subregion(get_system_memory(), 0, machine->ram); + + /* allocate and load firmware */ + filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, fwname); + if (!filename) { + error_report("Could not find firmware '%s'", fwname); + exit(1); + } + if (!machine->firmware && !pm->vof) { + pm->vof = g_malloc0(sizeof(*pm->vof)); + } + memory_region_init_rom(rom, NULL, "pegasos2.rom", PROM_SIZE, &error_fatal); + memory_region_add_subregion(get_system_memory(), PROM_ADDR, rom); + sz = load_elf(filename, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 1, + PPC_ELF_MACHINE, 0, 0); + if (sz <= 0) { + sz = load_image_targphys(filename, pm->vof ? 0 : PROM_ADDR, PROM_SIZE); + } + if (sz <= 0 || sz > PROM_SIZE) { + error_report("Could not load firmware '%s'", filename); + exit(1); + } + g_free(filename); + if (pm->vof) { + pm->vof->fw_size = sz; + } + + /* Marvell Discovery II system controller */ + pm->mv = DEVICE(sysbus_create_simple(TYPE_MV64361, -1, + ((qemu_irq *)env->irq_inputs)[PPC6xx_INPUT_INT])); + pci_bus = mv64361_get_pci_bus(pm->mv, 1); + + /* VIA VT8231 South Bridge (multifunction PCI device) */ + /* VT8231 function 0: PCI-to-ISA Bridge */ + dev = pci_create_simple_multifunction(pci_bus, PCI_DEVFN(12, 0), true, + TYPE_VT8231_ISA); + qdev_connect_gpio_out(DEVICE(dev), 0, + qdev_get_gpio_in_named(pm->mv, "gpp", 31)); + + /* VT8231 function 1: IDE Controller */ + dev = pci_create_simple(pci_bus, PCI_DEVFN(12, 1), "via-ide"); + pci_ide_create_devs(dev); + + /* VT8231 function 2-3: USB Ports */ + pci_create_simple(pci_bus, PCI_DEVFN(12, 2), "vt82c686b-usb-uhci"); + pci_create_simple(pci_bus, PCI_DEVFN(12, 3), "vt82c686b-usb-uhci"); + + /* VT8231 function 4: Power Management Controller */ + dev = pci_create_simple(pci_bus, PCI_DEVFN(12, 4), TYPE_VT8231_PM); + i2c_bus = I2C_BUS(qdev_get_child_bus(DEVICE(dev), "i2c")); + spd_data = spd_data_generate(DDR, machine->ram_size); + smbus_eeprom_init_one(i2c_bus, 0x57, spd_data); + + /* VT8231 function 5-6: AC97 Audio & Modem */ + pci_create_simple(pci_bus, PCI_DEVFN(12, 5), TYPE_VIA_AC97); + pci_create_simple(pci_bus, PCI_DEVFN(12, 6), TYPE_VIA_MC97); + + /* other PC hardware */ + pci_vga_init(pci_bus); + + if (machine->kernel_filename) { + sz = load_elf(machine->kernel_filename, NULL, NULL, NULL, + &pm->kernel_entry, &pm->kernel_addr, NULL, NULL, 1, + PPC_ELF_MACHINE, 0, 0); + if (sz <= 0) { + error_report("Could not load kernel '%s'", + machine->kernel_filename); + exit(1); + } + pm->kernel_size = sz; + if (!pm->vof) { + warn_report("Option -kernel may be ineffective with -bios."); + } + } else if (pm->vof && !qtest_enabled()) { + warn_report("Using Virtual OpenFirmware but no -kernel option."); + } + + if (!pm->vof && machine->kernel_cmdline && machine->kernel_cmdline[0]) { + warn_report("Option -append may be ineffective with -bios."); + } +} + +static uint32_t pegasos2_mv_reg_read(Pegasos2MachineState *pm, + uint32_t addr, uint32_t len) +{ + MemoryRegion *r = sysbus_mmio_get_region(SYS_BUS_DEVICE(pm->mv), 0); + uint64_t val = 0xffffffffULL; + memory_region_dispatch_read(r, addr, &val, size_memop(len) | MO_LE, + MEMTXATTRS_UNSPECIFIED); + return val; +} + +static void pegasos2_mv_reg_write(Pegasos2MachineState *pm, uint32_t addr, + uint32_t len, uint32_t val) +{ + MemoryRegion *r = sysbus_mmio_get_region(SYS_BUS_DEVICE(pm->mv), 0); + memory_region_dispatch_write(r, addr, val, size_memop(len) | MO_LE, + MEMTXATTRS_UNSPECIFIED); +} + +static uint32_t pegasos2_pci_config_read(Pegasos2MachineState *pm, int bus, + uint32_t addr, uint32_t len) +{ + hwaddr pcicfg = bus ? PCI1_CFG_ADDR : PCI0_CFG_ADDR; + uint64_t val = 0xffffffffULL; + + if (len <= 4) { + pegasos2_mv_reg_write(pm, pcicfg, 4, addr | BIT(31)); + val = pegasos2_mv_reg_read(pm, pcicfg + 4, len); + } + return val; +} + +static void pegasos2_pci_config_write(Pegasos2MachineState *pm, int bus, + uint32_t addr, uint32_t len, uint32_t val) +{ + hwaddr pcicfg = bus ? PCI1_CFG_ADDR : PCI0_CFG_ADDR; + + pegasos2_mv_reg_write(pm, pcicfg, 4, addr | BIT(31)); + pegasos2_mv_reg_write(pm, pcicfg + 4, len, val); +} + +static void pegasos2_machine_reset(MachineState *machine) +{ + Pegasos2MachineState *pm = PEGASOS2_MACHINE(machine); + void *fdt; + uint64_t d[2]; + int sz; + + qemu_devices_reset(); + if (!pm->vof) { + return; /* Firmware should set up machine so nothing to do */ + } + + /* Otherwise, set up devices that board firmware would normally do */ + pegasos2_mv_reg_write(pm, 0, 4, 0x28020ff); + pegasos2_mv_reg_write(pm, 0x278, 4, 0xa31fc); + pegasos2_mv_reg_write(pm, 0xf300, 4, 0x11ff0400); + pegasos2_mv_reg_write(pm, 0xf10c, 4, 0x80000000); + pegasos2_mv_reg_write(pm, 0x1c, 4, 0x8000000); + pegasos2_pci_config_write(pm, 0, PCI_COMMAND, 2, PCI_COMMAND_IO | + PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER); + pegasos2_pci_config_write(pm, 1, PCI_COMMAND, 2, PCI_COMMAND_IO | + PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER); + + pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 0) << 8) | + PCI_INTERRUPT_LINE, 2, 0x9); + pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 0) << 8) | + 0x50, 1, 0x2); + + pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 1) << 8) | + PCI_INTERRUPT_LINE, 2, 0x109); + pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 1) << 8) | + PCI_CLASS_PROG, 1, 0xf); + pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 1) << 8) | + 0x40, 1, 0xb); + pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 1) << 8) | + 0x50, 4, 0x17171717); + pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 1) << 8) | + PCI_COMMAND, 2, 0x87); + + pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 2) << 8) | + PCI_INTERRUPT_LINE, 2, 0x409); + + pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 3) << 8) | + PCI_INTERRUPT_LINE, 2, 0x409); + + pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 4) << 8) | + PCI_INTERRUPT_LINE, 2, 0x9); + pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 4) << 8) | + 0x48, 4, 0xf00); + pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 4) << 8) | + 0x40, 4, 0x558020); + pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 4) << 8) | + 0x90, 4, 0xd00); + + pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 5) << 8) | + PCI_INTERRUPT_LINE, 2, 0x309); + + pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 6) << 8) | + PCI_INTERRUPT_LINE, 2, 0x309); + + /* Device tree and VOF set up */ + vof_init(pm->vof, machine->ram_size, &error_fatal); + if (vof_claim(pm->vof, 0, VOF_STACK_SIZE, VOF_STACK_SIZE) == -1) { + error_report("Memory allocation for stack failed"); + exit(1); + } + if (pm->kernel_size && + vof_claim(pm->vof, pm->kernel_addr, pm->kernel_size, 0) == -1) { + error_report("Memory for kernel is in use"); + exit(1); + } + fdt = build_fdt(machine, &sz); + /* FIXME: VOF assumes entry is same as load address */ + d[0] = cpu_to_be64(pm->kernel_entry); + d[1] = cpu_to_be64(pm->kernel_size - (pm->kernel_entry - pm->kernel_addr)); + qemu_fdt_setprop(fdt, "/chosen", "qemu,boot-kernel", d, sizeof(d)); + + qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt)); + g_free(pm->fdt_blob); + pm->fdt_blob = fdt; + + vof_build_dt(fdt, pm->vof); + vof_client_open_store(fdt, pm->vof, "/chosen", "stdout", "/failsafe"); + pm->cpu->vhyp = PPC_VIRTUAL_HYPERVISOR(machine); +} + +enum pegasos2_rtas_tokens { + RTAS_RESTART_RTAS = 0, + RTAS_NVRAM_FETCH = 1, + RTAS_NVRAM_STORE = 2, + RTAS_GET_TIME_OF_DAY = 3, + RTAS_SET_TIME_OF_DAY = 4, + RTAS_EVENT_SCAN = 6, + RTAS_CHECK_EXCEPTION = 7, + RTAS_READ_PCI_CONFIG = 8, + RTAS_WRITE_PCI_CONFIG = 9, + RTAS_DISPLAY_CHARACTER = 10, + RTAS_SET_INDICATOR = 11, + RTAS_POWER_OFF = 17, + RTAS_SUSPEND = 18, + RTAS_HIBERNATE = 19, + RTAS_SYSTEM_REBOOT = 20, +}; + +static target_ulong pegasos2_rtas(PowerPCCPU *cpu, Pegasos2MachineState *pm, + target_ulong args_real) +{ + AddressSpace *as = CPU(cpu)->as; + uint32_t token = ldl_be_phys(as, args_real); + uint32_t nargs = ldl_be_phys(as, args_real + 4); + uint32_t nrets = ldl_be_phys(as, args_real + 8); + uint32_t args = args_real + 12; + uint32_t rets = args_real + 12 + nargs * 4; + + if (nrets < 1) { + qemu_log_mask(LOG_GUEST_ERROR, "Too few return values in RTAS call\n"); + return H_PARAMETER; + } + switch (token) { + case RTAS_GET_TIME_OF_DAY: + { + QObject *qo = object_property_get_qobject(qdev_get_machine(), + "rtc-time", &error_fatal); + QDict *qd = qobject_to(QDict, qo); + + if (nargs != 0 || nrets != 8 || !qd) { + stl_be_phys(as, rets, -1); + qobject_unref(qo); + return H_PARAMETER; + } + + stl_be_phys(as, rets, 0); + stl_be_phys(as, rets + 4, qdict_get_int(qd, "tm_year") + 1900); + stl_be_phys(as, rets + 8, qdict_get_int(qd, "tm_mon") + 1); + stl_be_phys(as, rets + 12, qdict_get_int(qd, "tm_mday")); + stl_be_phys(as, rets + 16, qdict_get_int(qd, "tm_hour")); + stl_be_phys(as, rets + 20, qdict_get_int(qd, "tm_min")); + stl_be_phys(as, rets + 24, qdict_get_int(qd, "tm_sec")); + stl_be_phys(as, rets + 28, 0); + qobject_unref(qo); + return H_SUCCESS; + } + case RTAS_READ_PCI_CONFIG: + { + uint32_t addr, len, val; + + if (nargs != 2 || nrets != 2) { + stl_be_phys(as, rets, -1); + return H_PARAMETER; + } + addr = ldl_be_phys(as, args); + len = ldl_be_phys(as, args + 4); + val = pegasos2_pci_config_read(pm, !(addr >> 24), + addr & 0x0fffffff, len); + stl_be_phys(as, rets, 0); + stl_be_phys(as, rets + 4, val); + return H_SUCCESS; + } + case RTAS_WRITE_PCI_CONFIG: + { + uint32_t addr, len, val; + + if (nargs != 3 || nrets != 1) { + stl_be_phys(as, rets, -1); + return H_PARAMETER; + } + addr = ldl_be_phys(as, args); + len = ldl_be_phys(as, args + 4); + val = ldl_be_phys(as, args + 8); + pegasos2_pci_config_write(pm, !(addr >> 24), + addr & 0x0fffffff, len, val); + stl_be_phys(as, rets, 0); + return H_SUCCESS; + } + case RTAS_DISPLAY_CHARACTER: + if (nargs != 1 || nrets != 1) { + stl_be_phys(as, rets, -1); + return H_PARAMETER; + } + qemu_log_mask(LOG_UNIMP, "%c", ldl_be_phys(as, args)); + stl_be_phys(as, rets, 0); + return H_SUCCESS; + case RTAS_POWER_OFF: + { + if (nargs != 2 || nrets != 1) { + stl_be_phys(as, rets, -1); + return H_PARAMETER; + } + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); + stl_be_phys(as, rets, 0); + return H_SUCCESS; + } + default: + qemu_log_mask(LOG_UNIMP, "Unknown RTAS token %u (args=%u, rets=%u)\n", + token, nargs, nrets); + stl_be_phys(as, rets, 0); + return H_SUCCESS; + } +} + +static void pegasos2_hypercall(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu) +{ + Pegasos2MachineState *pm = PEGASOS2_MACHINE(vhyp); + CPUPPCState *env = &cpu->env; + + /* The TCG path should also be holding the BQL at this point */ + g_assert(qemu_mutex_iothread_locked()); + + if (msr_pr) { + qemu_log_mask(LOG_GUEST_ERROR, "Hypercall made with MSR[PR]=1\n"); + env->gpr[3] = H_PRIVILEGE; + } else if (env->gpr[3] == KVMPPC_H_RTAS) { + env->gpr[3] = pegasos2_rtas(cpu, pm, env->gpr[4]); + } else if (env->gpr[3] == KVMPPC_H_VOF_CLIENT) { + int ret = vof_client_call(MACHINE(pm), pm->vof, pm->fdt_blob, + env->gpr[4]); + env->gpr[3] = (ret ? H_PARAMETER : H_SUCCESS); + } else { + qemu_log_mask(LOG_GUEST_ERROR, "Unsupported hypercall " TARGET_FMT_lx + "\n", env->gpr[3]); + env->gpr[3] = -1; + } +} + +static void vhyp_nop(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu) +{ +} + +static target_ulong vhyp_encode_hpt_for_kvm_pr(PPCVirtualHypervisor *vhyp) +{ + return POWERPC_CPU(current_cpu)->env.spr[SPR_SDR1]; +} + +static bool pegasos2_setprop(MachineState *ms, const char *path, + const char *propname, void *val, int vallen) +{ + return true; +} + +static void pegasos2_machine_class_init(ObjectClass *oc, void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + PPCVirtualHypervisorClass *vhc = PPC_VIRTUAL_HYPERVISOR_CLASS(oc); + VofMachineIfClass *vmc = VOF_MACHINE_CLASS(oc); + + mc->desc = "Genesi/bPlan Pegasos II"; + mc->init = pegasos2_init; + mc->reset = pegasos2_machine_reset; + mc->block_default_type = IF_IDE; + mc->default_boot_order = "cd"; + mc->default_display = "std"; + mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("7400_v2.9"); + mc->default_ram_id = "pegasos2.ram"; + mc->default_ram_size = 512 * MiB; + + vhc->hypercall = pegasos2_hypercall; + vhc->cpu_exec_enter = vhyp_nop; + vhc->cpu_exec_exit = vhyp_nop; + vhc->encode_hpt_for_kvm_pr = vhyp_encode_hpt_for_kvm_pr; + + vmc->setprop = pegasos2_setprop; +} + +static const TypeInfo pegasos2_machine_info = { + .name = TYPE_PEGASOS2_MACHINE, + .parent = TYPE_MACHINE, + .class_init = pegasos2_machine_class_init, + .instance_size = sizeof(Pegasos2MachineState), + .interfaces = (InterfaceInfo[]) { + { TYPE_PPC_VIRTUAL_HYPERVISOR }, + { TYPE_VOF_MACHINE_IF }, + { } + }, +}; + +static void pegasos2_machine_register_types(void) +{ + type_register_static(&pegasos2_machine_info); +} + +type_init(pegasos2_machine_register_types) + +/* FDT creation for passing to firmware */ + +typedef struct { + void *fdt; + const char *path; +} FDTInfo; + +/* We do everything in reverse order so it comes out right in the tree */ + +static void dt_ide(PCIBus *bus, PCIDevice *d, FDTInfo *fi) +{ + qemu_fdt_setprop_string(fi->fdt, fi->path, "device_type", "spi"); +} + +static void dt_usb(PCIBus *bus, PCIDevice *d, FDTInfo *fi) +{ + qemu_fdt_setprop_cell(fi->fdt, fi->path, "#size-cells", 0); + qemu_fdt_setprop_cell(fi->fdt, fi->path, "#address-cells", 1); + qemu_fdt_setprop_string(fi->fdt, fi->path, "device_type", "usb"); +} + +static void dt_isa(PCIBus *bus, PCIDevice *d, FDTInfo *fi) +{ + GString *name = g_string_sized_new(64); + uint32_t cells[3]; + + qemu_fdt_setprop_cell(fi->fdt, fi->path, "#size-cells", 1); + qemu_fdt_setprop_cell(fi->fdt, fi->path, "#address-cells", 2); + qemu_fdt_setprop_string(fi->fdt, fi->path, "device_type", "isa"); + qemu_fdt_setprop_string(fi->fdt, fi->path, "name", "isa"); + + /* addional devices */ + g_string_printf(name, "%s/lpt@i3bc", fi->path); + qemu_fdt_add_subnode(fi->fdt, name->str); + qemu_fdt_setprop_cell(fi->fdt, name->str, "clock-frequency", 0); + cells[0] = cpu_to_be32(7); + cells[1] = 0; + qemu_fdt_setprop(fi->fdt, name->str, "interrupts", + cells, 2 * sizeof(cells[0])); + cells[0] = cpu_to_be32(1); + cells[1] = cpu_to_be32(0x3bc); + cells[2] = cpu_to_be32(8); + qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0])); + qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "lpt"); + qemu_fdt_setprop_string(fi->fdt, name->str, "name", "lpt"); + + g_string_printf(name, "%s/fdc@i3f0", fi->path); + qemu_fdt_add_subnode(fi->fdt, name->str); + qemu_fdt_setprop_cell(fi->fdt, name->str, "clock-frequency", 0); + cells[0] = cpu_to_be32(6); + cells[1] = 0; + qemu_fdt_setprop(fi->fdt, name->str, "interrupts", + cells, 2 * sizeof(cells[0])); + cells[0] = cpu_to_be32(1); + cells[1] = cpu_to_be32(0x3f0); + cells[2] = cpu_to_be32(8); + qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0])); + qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "fdc"); + qemu_fdt_setprop_string(fi->fdt, name->str, "name", "fdc"); + + g_string_printf(name, "%s/timer@i40", fi->path); + qemu_fdt_add_subnode(fi->fdt, name->str); + qemu_fdt_setprop_cell(fi->fdt, name->str, "clock-frequency", 0); + cells[0] = cpu_to_be32(1); + cells[1] = cpu_to_be32(0x40); + cells[2] = cpu_to_be32(8); + qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0])); + qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "timer"); + qemu_fdt_setprop_string(fi->fdt, name->str, "name", "timer"); + + g_string_printf(name, "%s/rtc@i70", fi->path); + qemu_fdt_add_subnode(fi->fdt, name->str); + qemu_fdt_setprop_string(fi->fdt, name->str, "compatible", "ds1385-rtc"); + qemu_fdt_setprop_cell(fi->fdt, name->str, "clock-frequency", 0); + cells[0] = cpu_to_be32(8); + cells[1] = 0; + qemu_fdt_setprop(fi->fdt, name->str, "interrupts", + cells, 2 * sizeof(cells[0])); + cells[0] = cpu_to_be32(1); + cells[1] = cpu_to_be32(0x70); + cells[2] = cpu_to_be32(2); + qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0])); + qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "rtc"); + qemu_fdt_setprop_string(fi->fdt, name->str, "name", "rtc"); + + g_string_printf(name, "%s/keyboard@i60", fi->path); + qemu_fdt_add_subnode(fi->fdt, name->str); + cells[0] = cpu_to_be32(1); + cells[1] = 0; + qemu_fdt_setprop(fi->fdt, name->str, "interrupts", + cells, 2 * sizeof(cells[0])); + cells[0] = cpu_to_be32(1); + cells[1] = cpu_to_be32(0x60); + cells[2] = cpu_to_be32(5); + qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0])); + qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "keyboard"); + qemu_fdt_setprop_string(fi->fdt, name->str, "name", "keyboard"); + + g_string_printf(name, "%s/8042@i60", fi->path); + qemu_fdt_add_subnode(fi->fdt, name->str); + qemu_fdt_setprop_cell(fi->fdt, name->str, "#interrupt-cells", 2); + qemu_fdt_setprop_cell(fi->fdt, name->str, "#size-cells", 0); + qemu_fdt_setprop_cell(fi->fdt, name->str, "#address-cells", 1); + qemu_fdt_setprop_string(fi->fdt, name->str, "interrupt-controller", ""); + qemu_fdt_setprop_cell(fi->fdt, name->str, "clock-frequency", 0); + cells[0] = cpu_to_be32(1); + cells[1] = cpu_to_be32(0x60); + cells[2] = cpu_to_be32(5); + qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0])); + qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", ""); + qemu_fdt_setprop_string(fi->fdt, name->str, "name", "8042"); + + g_string_printf(name, "%s/serial@i2f8", fi->path); + qemu_fdt_add_subnode(fi->fdt, name->str); + qemu_fdt_setprop_cell(fi->fdt, name->str, "clock-frequency", 0); + cells[0] = cpu_to_be32(3); + cells[1] = 0; + qemu_fdt_setprop(fi->fdt, name->str, "interrupts", + cells, 2 * sizeof(cells[0])); + cells[0] = cpu_to_be32(1); + cells[1] = cpu_to_be32(0x2f8); + cells[2] = cpu_to_be32(8); + qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0])); + qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "serial"); + qemu_fdt_setprop_string(fi->fdt, name->str, "name", "serial"); + + g_string_free(name, TRUE); +} + +static struct { + const char *id; + const char *name; + void (*dtf)(PCIBus *bus, PCIDevice *d, FDTInfo *fi); +} device_map[] = { + { "pci11ab,6460", "host", NULL }, + { "pci1106,8231", "isa", dt_isa }, + { "pci1106,571", "ide", dt_ide }, + { "pci1106,3044", "firewire", NULL }, + { "pci1106,3038", "usb", dt_usb }, + { "pci1106,8235", "other", NULL }, + { "pci1106,3058", "sound", NULL }, + { NULL, NULL } +}; + +static void add_pci_device(PCIBus *bus, PCIDevice *d, void *opaque) +{ + FDTInfo *fi = opaque; + GString *node = g_string_new(NULL); + uint32_t cells[(PCI_NUM_REGIONS + 1) * 5]; + int i, j; + const char *name = NULL; + g_autofree const gchar *pn = g_strdup_printf("pci%x,%x", + pci_get_word(&d->config[PCI_VENDOR_ID]), + pci_get_word(&d->config[PCI_DEVICE_ID])); + + for (i = 0; device_map[i].id; i++) { + if (!strcmp(pn, device_map[i].id)) { + name = device_map[i].name; + break; + } + } + g_string_printf(node, "%s/%s@%x", fi->path, (name ?: pn), + PCI_SLOT(d->devfn)); + if (PCI_FUNC(d->devfn)) { + g_string_append_printf(node, ",%x", PCI_FUNC(d->devfn)); + } + + qemu_fdt_add_subnode(fi->fdt, node->str); + if (device_map[i].dtf) { + FDTInfo cfi = { fi->fdt, node->str }; + device_map[i].dtf(bus, d, &cfi); + } + cells[0] = cpu_to_be32(d->devfn << 8); + cells[1] = 0; + cells[2] = 0; + cells[3] = 0; + cells[4] = 0; + j = 5; + for (i = 0; i < PCI_NUM_REGIONS; i++) { + if (!d->io_regions[i].size) { + continue; + } + cells[j] = cpu_to_be32(d->devfn << 8 | (PCI_BASE_ADDRESS_0 + i * 4)); + if (d->io_regions[i].type & PCI_BASE_ADDRESS_SPACE_IO) { + cells[j] |= cpu_to_be32(1 << 24); + } else { + cells[j] |= cpu_to_be32(2 << 24); + if (d->io_regions[i].type & PCI_BASE_ADDRESS_MEM_PREFETCH) { + cells[j] |= cpu_to_be32(4 << 28); + } + } + cells[j + 1] = 0; + cells[j + 2] = 0; + cells[j + 3] = cpu_to_be32(d->io_regions[i].size >> 32); + cells[j + 4] = cpu_to_be32(d->io_regions[i].size); + j += 5; + } + qemu_fdt_setprop(fi->fdt, node->str, "reg", cells, j * sizeof(cells[0])); + qemu_fdt_setprop_string(fi->fdt, node->str, "name", name ?: pn); + if (pci_get_byte(&d->config[PCI_INTERRUPT_PIN])) { + qemu_fdt_setprop_cell(fi->fdt, node->str, "interrupts", + pci_get_byte(&d->config[PCI_INTERRUPT_PIN])); + } + /* Pegasos2 firmware has subsystem-id amd subsystem-vendor-id swapped */ + qemu_fdt_setprop_cell(fi->fdt, node->str, "subsystem-vendor-id", + pci_get_word(&d->config[PCI_SUBSYSTEM_ID])); + qemu_fdt_setprop_cell(fi->fdt, node->str, "subsystem-id", + pci_get_word(&d->config[PCI_SUBSYSTEM_VENDOR_ID])); + cells[0] = pci_get_long(&d->config[PCI_CLASS_REVISION]); + qemu_fdt_setprop_cell(fi->fdt, node->str, "class-code", cells[0] >> 8); + qemu_fdt_setprop_cell(fi->fdt, node->str, "revision-id", cells[0] & 0xff); + qemu_fdt_setprop_cell(fi->fdt, node->str, "device-id", + pci_get_word(&d->config[PCI_DEVICE_ID])); + qemu_fdt_setprop_cell(fi->fdt, node->str, "vendor-id", + pci_get_word(&d->config[PCI_VENDOR_ID])); + + g_string_free(node, TRUE); +} + +static void *build_fdt(MachineState *machine, int *fdt_size) +{ + Pegasos2MachineState *pm = PEGASOS2_MACHINE(machine); + PowerPCCPU *cpu = pm->cpu; + PCIBus *pci_bus; + FDTInfo fi; + uint32_t cells[16]; + void *fdt = create_device_tree(fdt_size); + + fi.fdt = fdt; + + /* root node */ + qemu_fdt_setprop_string(fdt, "/", "CODEGEN,description", + "Pegasos CHRP PowerPC System"); + qemu_fdt_setprop_string(fdt, "/", "CODEGEN,board", "Pegasos2"); + qemu_fdt_setprop_string(fdt, "/", "CODEGEN,vendor", "bplan GmbH"); + qemu_fdt_setprop_string(fdt, "/", "revision", "2B"); + qemu_fdt_setprop_string(fdt, "/", "model", "Pegasos2"); + qemu_fdt_setprop_string(fdt, "/", "device_type", "chrp"); + qemu_fdt_setprop_cell(fdt, "/", "#address-cells", 1); + qemu_fdt_setprop_string(fdt, "/", "name", "bplan,Pegasos2"); + + /* pci@c0000000 */ + qemu_fdt_add_subnode(fdt, "/pci@c0000000"); + cells[0] = 0; + cells[1] = 0; + qemu_fdt_setprop(fdt, "/pci@c0000000", "bus-range", + cells, 2 * sizeof(cells[0])); + qemu_fdt_setprop_cell(fdt, "/pci@c0000000", "pci-bridge-number", 1); + cells[0] = cpu_to_be32(PCI0_MEM_BASE); + cells[1] = cpu_to_be32(PCI0_MEM_SIZE); + qemu_fdt_setprop(fdt, "/pci@c0000000", "reg", cells, 2 * sizeof(cells[0])); + cells[0] = cpu_to_be32(0x01000000); + cells[1] = 0; + cells[2] = 0; + cells[3] = cpu_to_be32(PCI0_IO_BASE); + cells[4] = 0; + cells[5] = cpu_to_be32(PCI0_IO_SIZE); + cells[6] = cpu_to_be32(0x02000000); + cells[7] = 0; + cells[8] = cpu_to_be32(PCI0_MEM_BASE); + cells[9] = cpu_to_be32(PCI0_MEM_BASE); + cells[10] = 0; + cells[11] = cpu_to_be32(PCI0_MEM_SIZE); + qemu_fdt_setprop(fdt, "/pci@c0000000", "ranges", + cells, 12 * sizeof(cells[0])); + qemu_fdt_setprop_cell(fdt, "/pci@c0000000", "#size-cells", 2); + qemu_fdt_setprop_cell(fdt, "/pci@c0000000", "#address-cells", 3); + qemu_fdt_setprop_string(fdt, "/pci@c0000000", "device_type", "pci"); + qemu_fdt_setprop_string(fdt, "/pci@c0000000", "name", "pci"); + + fi.path = "/pci@c0000000"; + pci_bus = mv64361_get_pci_bus(pm->mv, 0); + pci_for_each_device_reverse(pci_bus, 0, add_pci_device, &fi); + + /* pci@80000000 */ + qemu_fdt_add_subnode(fdt, "/pci@80000000"); + cells[0] = 0; + cells[1] = 0; + qemu_fdt_setprop(fdt, "/pci@80000000", "bus-range", + cells, 2 * sizeof(cells[0])); + qemu_fdt_setprop_cell(fdt, "/pci@80000000", "pci-bridge-number", 0); + cells[0] = cpu_to_be32(PCI1_MEM_BASE); + cells[1] = cpu_to_be32(PCI1_MEM_SIZE); + qemu_fdt_setprop(fdt, "/pci@80000000", "reg", cells, 2 * sizeof(cells[0])); + qemu_fdt_setprop_cell(fdt, "/pci@80000000", "8259-interrupt-acknowledge", + 0xf1000cb4); + cells[0] = cpu_to_be32(0x01000000); + cells[1] = 0; + cells[2] = 0; + cells[3] = cpu_to_be32(PCI1_IO_BASE); + cells[4] = 0; + cells[5] = cpu_to_be32(PCI1_IO_SIZE); + cells[6] = cpu_to_be32(0x02000000); + cells[7] = 0; + cells[8] = cpu_to_be32(PCI1_MEM_BASE); + cells[9] = cpu_to_be32(PCI1_MEM_BASE); + cells[10] = 0; + cells[11] = cpu_to_be32(PCI1_MEM_SIZE); + qemu_fdt_setprop(fdt, "/pci@80000000", "ranges", + cells, 12 * sizeof(cells[0])); + qemu_fdt_setprop_cell(fdt, "/pci@80000000", "#size-cells", 2); + qemu_fdt_setprop_cell(fdt, "/pci@80000000", "#address-cells", 3); + qemu_fdt_setprop_string(fdt, "/pci@80000000", "device_type", "pci"); + qemu_fdt_setprop_string(fdt, "/pci@80000000", "name", "pci"); + + fi.path = "/pci@80000000"; + pci_bus = mv64361_get_pci_bus(pm->mv, 1); + pci_for_each_device_reverse(pci_bus, 0, add_pci_device, &fi); + + qemu_fdt_add_subnode(fdt, "/failsafe"); + qemu_fdt_setprop_string(fdt, "/failsafe", "device_type", "serial"); + qemu_fdt_setprop_string(fdt, "/failsafe", "name", "failsafe"); + + qemu_fdt_add_subnode(fdt, "/rtas"); + qemu_fdt_setprop_cell(fdt, "/rtas", "system-reboot", RTAS_SYSTEM_REBOOT); + qemu_fdt_setprop_cell(fdt, "/rtas", "hibernate", RTAS_HIBERNATE); + qemu_fdt_setprop_cell(fdt, "/rtas", "suspend", RTAS_SUSPEND); + qemu_fdt_setprop_cell(fdt, "/rtas", "power-off", RTAS_POWER_OFF); + qemu_fdt_setprop_cell(fdt, "/rtas", "set-indicator", RTAS_SET_INDICATOR); + qemu_fdt_setprop_cell(fdt, "/rtas", "display-character", + RTAS_DISPLAY_CHARACTER); + qemu_fdt_setprop_cell(fdt, "/rtas", "write-pci-config", + RTAS_WRITE_PCI_CONFIG); + qemu_fdt_setprop_cell(fdt, "/rtas", "read-pci-config", + RTAS_READ_PCI_CONFIG); + /* Pegasos2 firmware misspells check-exception and guests use that */ + qemu_fdt_setprop_cell(fdt, "/rtas", "check-execption", + RTAS_CHECK_EXCEPTION); + qemu_fdt_setprop_cell(fdt, "/rtas", "event-scan", RTAS_EVENT_SCAN); + qemu_fdt_setprop_cell(fdt, "/rtas", "set-time-of-day", + RTAS_SET_TIME_OF_DAY); + qemu_fdt_setprop_cell(fdt, "/rtas", "get-time-of-day", + RTAS_GET_TIME_OF_DAY); + qemu_fdt_setprop_cell(fdt, "/rtas", "nvram-store", RTAS_NVRAM_STORE); + qemu_fdt_setprop_cell(fdt, "/rtas", "nvram-fetch", RTAS_NVRAM_FETCH); + qemu_fdt_setprop_cell(fdt, "/rtas", "restart-rtas", RTAS_RESTART_RTAS); + qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-error-log-max", 0); + qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-event-scan-rate", 0); + qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-display-device", 0); + qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-size", 20); + qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-version", 1); + + /* cpus */ + qemu_fdt_add_subnode(fdt, "/cpus"); + qemu_fdt_setprop_cell(fdt, "/cpus", "#cpus", 1); + qemu_fdt_setprop_cell(fdt, "/cpus", "#address-cells", 1); + qemu_fdt_setprop_cell(fdt, "/cpus", "#size-cells", 0); + qemu_fdt_setprop_string(fdt, "/cpus", "name", "cpus"); + + /* FIXME Get CPU name from CPU object */ + const char *cp = "/cpus/PowerPC,G4"; + qemu_fdt_add_subnode(fdt, cp); + qemu_fdt_setprop_cell(fdt, cp, "l2cr", 0); + qemu_fdt_setprop_cell(fdt, cp, "d-cache-size", 0x8000); + qemu_fdt_setprop_cell(fdt, cp, "d-cache-block-size", + cpu->env.dcache_line_size); + qemu_fdt_setprop_cell(fdt, cp, "d-cache-line-size", + cpu->env.dcache_line_size); + qemu_fdt_setprop_cell(fdt, cp, "i-cache-size", 0x8000); + qemu_fdt_setprop_cell(fdt, cp, "i-cache-block-size", + cpu->env.icache_line_size); + qemu_fdt_setprop_cell(fdt, cp, "i-cache-line-size", + cpu->env.icache_line_size); + if (cpu->env.id_tlbs) { + qemu_fdt_setprop_cell(fdt, cp, "i-tlb-sets", cpu->env.nb_ways); + qemu_fdt_setprop_cell(fdt, cp, "i-tlb-size", cpu->env.tlb_per_way); + qemu_fdt_setprop_cell(fdt, cp, "d-tlb-sets", cpu->env.nb_ways); + qemu_fdt_setprop_cell(fdt, cp, "d-tlb-size", cpu->env.tlb_per_way); + qemu_fdt_setprop_string(fdt, cp, "tlb-split", ""); + } + qemu_fdt_setprop_cell(fdt, cp, "tlb-sets", cpu->env.nb_ways); + qemu_fdt_setprop_cell(fdt, cp, "tlb-size", cpu->env.nb_tlb); + qemu_fdt_setprop_string(fdt, cp, "state", "running"); + if (cpu->env.insns_flags & PPC_ALTIVEC) { + qemu_fdt_setprop_string(fdt, cp, "altivec", ""); + qemu_fdt_setprop_string(fdt, cp, "data-streams", ""); + } + /* + * FIXME What flags do data-streams, external-control and + * performance-monitor depend on? + */ + qemu_fdt_setprop_string(fdt, cp, "external-control", ""); + if (cpu->env.insns_flags & PPC_FLOAT_FSQRT) { + qemu_fdt_setprop_string(fdt, cp, "general-purpose", ""); + } + qemu_fdt_setprop_string(fdt, cp, "performance-monitor", ""); + if (cpu->env.insns_flags & PPC_FLOAT_FRES) { + qemu_fdt_setprop_string(fdt, cp, "graphics", ""); + } + qemu_fdt_setprop_cell(fdt, cp, "reservation-granule-size", 4); + qemu_fdt_setprop_cell(fdt, cp, "timebase-frequency", + cpu->env.tb_env->tb_freq); + qemu_fdt_setprop_cell(fdt, cp, "bus-frequency", BUS_FREQ_HZ); + qemu_fdt_setprop_cell(fdt, cp, "clock-frequency", BUS_FREQ_HZ * 7.5); + qemu_fdt_setprop_cell(fdt, cp, "cpu-version", cpu->env.spr[SPR_PVR]); + cells[0] = 0; + cells[1] = 0; + qemu_fdt_setprop(fdt, cp, "reg", cells, 2 * sizeof(cells[0])); + qemu_fdt_setprop_string(fdt, cp, "device_type", "cpu"); + qemu_fdt_setprop_string(fdt, cp, "name", strrchr(cp, '/') + 1); + + /* memory */ + qemu_fdt_add_subnode(fdt, "/memory@0"); + cells[0] = 0; + cells[1] = cpu_to_be32(machine->ram_size); + qemu_fdt_setprop(fdt, "/memory@0", "reg", cells, 2 * sizeof(cells[0])); + qemu_fdt_setprop_string(fdt, "/memory@0", "device_type", "memory"); + qemu_fdt_setprop_string(fdt, "/memory@0", "name", "memory"); + + qemu_fdt_add_subnode(fdt, "/chosen"); + qemu_fdt_setprop_string(fdt, "/chosen", "bootargs", + machine->kernel_cmdline ?: ""); + qemu_fdt_setprop_string(fdt, "/chosen", "name", "chosen"); + + qemu_fdt_add_subnode(fdt, "/openprom"); + qemu_fdt_setprop_string(fdt, "/openprom", "model", "Pegasos2,1.1"); + + return fdt; +} diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c new file mode 100644 index 000000000..71e45515f --- /dev/null +++ b/hw/ppc/pnv.c @@ -0,0 +1,2132 @@ +/* + * QEMU PowerPC PowerNV machine model + * + * Copyright (c) 2016, IBM Corporation. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "qemu/datadir.h" +#include "qemu/units.h" +#include "qemu/cutils.h" +#include "qapi/error.h" +#include "sysemu/qtest.h" +#include "sysemu/sysemu.h" +#include "sysemu/numa.h" +#include "sysemu/reset.h" +#include "sysemu/runstate.h" +#include "sysemu/cpus.h" +#include "sysemu/device_tree.h" +#include "sysemu/hw_accel.h" +#include "target/ppc/cpu.h" +#include "hw/ppc/fdt.h" +#include "hw/ppc/ppc.h" +#include "hw/ppc/pnv.h" +#include "hw/ppc/pnv_core.h" +#include "hw/loader.h" +#include "hw/nmi.h" +#include "qapi/visitor.h" +#include "monitor/monitor.h" +#include "hw/intc/intc.h" +#include "hw/ipmi/ipmi.h" +#include "target/ppc/mmu-hash64.h" +#include "hw/pci/msi.h" + +#include "hw/ppc/xics.h" +#include "hw/qdev-properties.h" +#include "hw/ppc/pnv_xscom.h" +#include "hw/ppc/pnv_pnor.h" + +#include "hw/isa/isa.h" +#include "hw/char/serial.h" +#include "hw/rtc/mc146818rtc.h" + +#include <libfdt.h> + +#define FDT_MAX_SIZE (1 * MiB) + +#define FW_FILE_NAME "skiboot.lid" +#define FW_LOAD_ADDR 0x0 +#define FW_MAX_SIZE (16 * MiB) + +#define KERNEL_LOAD_ADDR 0x20000000 +#define KERNEL_MAX_SIZE (128 * MiB) +#define INITRD_LOAD_ADDR 0x28000000 +#define INITRD_MAX_SIZE (128 * MiB) + +static const char *pnv_chip_core_typename(const PnvChip *o) +{ + const char *chip_type = object_class_get_name(object_get_class(OBJECT(o))); + int len = strlen(chip_type) - strlen(PNV_CHIP_TYPE_SUFFIX); + char *s = g_strdup_printf(PNV_CORE_TYPE_NAME("%.*s"), len, chip_type); + const char *core_type = object_class_get_name(object_class_by_name(s)); + g_free(s); + return core_type; +} + +/* + * On Power Systems E880 (POWER8), the max cpus (threads) should be : + * 4 * 4 sockets * 12 cores * 8 threads = 1536 + * Let's make it 2^11 + */ +#define MAX_CPUS 2048 + +/* + * Memory nodes are created by hostboot, one for each range of memory + * that has a different "affinity". In practice, it means one range + * per chip. + */ +static void pnv_dt_memory(void *fdt, int chip_id, hwaddr start, hwaddr size) +{ + char *mem_name; + uint64_t mem_reg_property[2]; + int off; + + mem_reg_property[0] = cpu_to_be64(start); + mem_reg_property[1] = cpu_to_be64(size); + + mem_name = g_strdup_printf("memory@%"HWADDR_PRIx, start); + off = fdt_add_subnode(fdt, 0, mem_name); + g_free(mem_name); + + _FDT((fdt_setprop_string(fdt, off, "device_type", "memory"))); + _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property, + sizeof(mem_reg_property)))); + _FDT((fdt_setprop_cell(fdt, off, "ibm,chip-id", chip_id))); +} + +static int get_cpus_node(void *fdt) +{ + int cpus_offset = fdt_path_offset(fdt, "/cpus"); + + if (cpus_offset < 0) { + cpus_offset = fdt_add_subnode(fdt, 0, "cpus"); + if (cpus_offset) { + _FDT((fdt_setprop_cell(fdt, cpus_offset, "#address-cells", 0x1))); + _FDT((fdt_setprop_cell(fdt, cpus_offset, "#size-cells", 0x0))); + } + } + _FDT(cpus_offset); + return cpus_offset; +} + +/* + * The PowerNV cores (and threads) need to use real HW ids and not an + * incremental index like it has been done on other platforms. This HW + * id is stored in the CPU PIR, it is used to create cpu nodes in the + * device tree, used in XSCOM to address cores and in interrupt + * servers. + */ +static void pnv_dt_core(PnvChip *chip, PnvCore *pc, void *fdt) +{ + PowerPCCPU *cpu = pc->threads[0]; + CPUState *cs = CPU(cpu); + DeviceClass *dc = DEVICE_GET_CLASS(cs); + int smt_threads = CPU_CORE(pc)->nr_threads; + CPUPPCState *env = &cpu->env; + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs); + uint32_t servers_prop[smt_threads]; + int i; + uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40), + 0xffffffff, 0xffffffff}; + uint32_t tbfreq = PNV_TIMEBASE_FREQ; + uint32_t cpufreq = 1000000000; + uint32_t page_sizes_prop[64]; + size_t page_sizes_prop_size; + const uint8_t pa_features[] = { 24, 0, + 0xf6, 0x3f, 0xc7, 0xc0, 0x80, 0xf0, + 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, + 0x80, 0x00, 0x80, 0x00, 0x80, 0x00 }; + int offset; + char *nodename; + int cpus_offset = get_cpus_node(fdt); + + nodename = g_strdup_printf("%s@%x", dc->fw_name, pc->pir); + offset = fdt_add_subnode(fdt, cpus_offset, nodename); + _FDT(offset); + g_free(nodename); + + _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id", chip->chip_id))); + + _FDT((fdt_setprop_cell(fdt, offset, "reg", pc->pir))); + _FDT((fdt_setprop_cell(fdt, offset, "ibm,pir", pc->pir))); + _FDT((fdt_setprop_string(fdt, offset, "device_type", "cpu"))); + + _FDT((fdt_setprop_cell(fdt, offset, "cpu-version", env->spr[SPR_PVR]))); + _FDT((fdt_setprop_cell(fdt, offset, "d-cache-block-size", + env->dcache_line_size))); + _FDT((fdt_setprop_cell(fdt, offset, "d-cache-line-size", + env->dcache_line_size))); + _FDT((fdt_setprop_cell(fdt, offset, "i-cache-block-size", + env->icache_line_size))); + _FDT((fdt_setprop_cell(fdt, offset, "i-cache-line-size", + env->icache_line_size))); + + if (pcc->l1_dcache_size) { + _FDT((fdt_setprop_cell(fdt, offset, "d-cache-size", + pcc->l1_dcache_size))); + } else { + warn_report("Unknown L1 dcache size for cpu"); + } + if (pcc->l1_icache_size) { + _FDT((fdt_setprop_cell(fdt, offset, "i-cache-size", + pcc->l1_icache_size))); + } else { + warn_report("Unknown L1 icache size for cpu"); + } + + _FDT((fdt_setprop_cell(fdt, offset, "timebase-frequency", tbfreq))); + _FDT((fdt_setprop_cell(fdt, offset, "clock-frequency", cpufreq))); + _FDT((fdt_setprop_cell(fdt, offset, "ibm,slb-size", + cpu->hash64_opts->slb_size))); + _FDT((fdt_setprop_string(fdt, offset, "status", "okay"))); + _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0))); + + if (ppc_has_spr(cpu, SPR_PURR)) { + _FDT((fdt_setprop(fdt, offset, "ibm,purr", NULL, 0))); + } + + if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)) { + _FDT((fdt_setprop(fdt, offset, "ibm,processor-segment-sizes", + segs, sizeof(segs)))); + } + + /* + * Advertise VMX/VSX (vector extensions) if available + * 0 / no property == no vector extensions + * 1 == VMX / Altivec available + * 2 == VSX available + */ + if (env->insns_flags & PPC_ALTIVEC) { + uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1; + + _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", vmx))); + } + + /* + * Advertise DFP (Decimal Floating Point) if available + * 0 / no property == no DFP + * 1 == DFP available + */ + if (env->insns_flags2 & PPC2_DFP) { + _FDT((fdt_setprop_cell(fdt, offset, "ibm,dfp", 1))); + } + + page_sizes_prop_size = ppc_create_page_sizes_prop(cpu, page_sizes_prop, + sizeof(page_sizes_prop)); + if (page_sizes_prop_size) { + _FDT((fdt_setprop(fdt, offset, "ibm,segment-page-sizes", + page_sizes_prop, page_sizes_prop_size))); + } + + _FDT((fdt_setprop(fdt, offset, "ibm,pa-features", + pa_features, sizeof(pa_features)))); + + /* Build interrupt servers properties */ + for (i = 0; i < smt_threads; i++) { + servers_prop[i] = cpu_to_be32(pc->pir + i); + } + _FDT((fdt_setprop(fdt, offset, "ibm,ppc-interrupt-server#s", + servers_prop, sizeof(servers_prop)))); +} + +static void pnv_dt_icp(PnvChip *chip, void *fdt, uint32_t pir, + uint32_t nr_threads) +{ + uint64_t addr = PNV_ICP_BASE(chip) | (pir << 12); + char *name; + const char compat[] = "IBM,power8-icp\0IBM,ppc-xicp"; + uint32_t irange[2], i, rsize; + uint64_t *reg; + int offset; + + irange[0] = cpu_to_be32(pir); + irange[1] = cpu_to_be32(nr_threads); + + rsize = sizeof(uint64_t) * 2 * nr_threads; + reg = g_malloc(rsize); + for (i = 0; i < nr_threads; i++) { + reg[i * 2] = cpu_to_be64(addr | ((pir + i) * 0x1000)); + reg[i * 2 + 1] = cpu_to_be64(0x1000); + } + + name = g_strdup_printf("interrupt-controller@%"PRIX64, addr); + offset = fdt_add_subnode(fdt, 0, name); + _FDT(offset); + g_free(name); + + _FDT((fdt_setprop(fdt, offset, "compatible", compat, sizeof(compat)))); + _FDT((fdt_setprop(fdt, offset, "reg", reg, rsize))); + _FDT((fdt_setprop_string(fdt, offset, "device_type", + "PowerPC-External-Interrupt-Presentation"))); + _FDT((fdt_setprop(fdt, offset, "interrupt-controller", NULL, 0))); + _FDT((fdt_setprop(fdt, offset, "ibm,interrupt-server-ranges", + irange, sizeof(irange)))); + _FDT((fdt_setprop_cell(fdt, offset, "#interrupt-cells", 1))); + _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 0))); + g_free(reg); +} + +static void pnv_chip_power8_dt_populate(PnvChip *chip, void *fdt) +{ + static const char compat[] = "ibm,power8-xscom\0ibm,xscom"; + int i; + + pnv_dt_xscom(chip, fdt, 0, + cpu_to_be64(PNV_XSCOM_BASE(chip)), + cpu_to_be64(PNV_XSCOM_SIZE), + compat, sizeof(compat)); + + for (i = 0; i < chip->nr_cores; i++) { + PnvCore *pnv_core = chip->cores[i]; + + pnv_dt_core(chip, pnv_core, fdt); + + /* Interrupt Control Presenters (ICP). One per core. */ + pnv_dt_icp(chip, fdt, pnv_core->pir, CPU_CORE(pnv_core)->nr_threads); + } + + if (chip->ram_size) { + pnv_dt_memory(fdt, chip->chip_id, chip->ram_start, chip->ram_size); + } +} + +static void pnv_chip_power9_dt_populate(PnvChip *chip, void *fdt) +{ + static const char compat[] = "ibm,power9-xscom\0ibm,xscom"; + int i; + + pnv_dt_xscom(chip, fdt, 0, + cpu_to_be64(PNV9_XSCOM_BASE(chip)), + cpu_to_be64(PNV9_XSCOM_SIZE), + compat, sizeof(compat)); + + for (i = 0; i < chip->nr_cores; i++) { + PnvCore *pnv_core = chip->cores[i]; + + pnv_dt_core(chip, pnv_core, fdt); + } + + if (chip->ram_size) { + pnv_dt_memory(fdt, chip->chip_id, chip->ram_start, chip->ram_size); + } + + pnv_dt_lpc(chip, fdt, 0, PNV9_LPCM_BASE(chip), PNV9_LPCM_SIZE); +} + +static void pnv_chip_power10_dt_populate(PnvChip *chip, void *fdt) +{ + static const char compat[] = "ibm,power10-xscom\0ibm,xscom"; + int i; + + pnv_dt_xscom(chip, fdt, 0, + cpu_to_be64(PNV10_XSCOM_BASE(chip)), + cpu_to_be64(PNV10_XSCOM_SIZE), + compat, sizeof(compat)); + + for (i = 0; i < chip->nr_cores; i++) { + PnvCore *pnv_core = chip->cores[i]; + + pnv_dt_core(chip, pnv_core, fdt); + } + + if (chip->ram_size) { + pnv_dt_memory(fdt, chip->chip_id, chip->ram_start, chip->ram_size); + } + + pnv_dt_lpc(chip, fdt, 0, PNV10_LPCM_BASE(chip), PNV10_LPCM_SIZE); +} + +static void pnv_dt_rtc(ISADevice *d, void *fdt, int lpc_off) +{ + uint32_t io_base = d->ioport_id; + uint32_t io_regs[] = { + cpu_to_be32(1), + cpu_to_be32(io_base), + cpu_to_be32(2) + }; + char *name; + int node; + + name = g_strdup_printf("%s@i%x", qdev_fw_name(DEVICE(d)), io_base); + node = fdt_add_subnode(fdt, lpc_off, name); + _FDT(node); + g_free(name); + + _FDT((fdt_setprop(fdt, node, "reg", io_regs, sizeof(io_regs)))); + _FDT((fdt_setprop_string(fdt, node, "compatible", "pnpPNP,b00"))); +} + +static void pnv_dt_serial(ISADevice *d, void *fdt, int lpc_off) +{ + const char compatible[] = "ns16550\0pnpPNP,501"; + uint32_t io_base = d->ioport_id; + uint32_t io_regs[] = { + cpu_to_be32(1), + cpu_to_be32(io_base), + cpu_to_be32(8) + }; + char *name; + int node; + + name = g_strdup_printf("%s@i%x", qdev_fw_name(DEVICE(d)), io_base); + node = fdt_add_subnode(fdt, lpc_off, name); + _FDT(node); + g_free(name); + + _FDT((fdt_setprop(fdt, node, "reg", io_regs, sizeof(io_regs)))); + _FDT((fdt_setprop(fdt, node, "compatible", compatible, + sizeof(compatible)))); + + _FDT((fdt_setprop_cell(fdt, node, "clock-frequency", 1843200))); + _FDT((fdt_setprop_cell(fdt, node, "current-speed", 115200))); + _FDT((fdt_setprop_cell(fdt, node, "interrupts", d->isairq[0]))); + _FDT((fdt_setprop_cell(fdt, node, "interrupt-parent", + fdt_get_phandle(fdt, lpc_off)))); + + /* This is needed by Linux */ + _FDT((fdt_setprop_string(fdt, node, "device_type", "serial"))); +} + +static void pnv_dt_ipmi_bt(ISADevice *d, void *fdt, int lpc_off) +{ + const char compatible[] = "bt\0ipmi-bt"; + uint32_t io_base; + uint32_t io_regs[] = { + cpu_to_be32(1), + 0, /* 'io_base' retrieved from the 'ioport' property of 'isa-ipmi-bt' */ + cpu_to_be32(3) + }; + uint32_t irq; + char *name; + int node; + + io_base = object_property_get_int(OBJECT(d), "ioport", &error_fatal); + io_regs[1] = cpu_to_be32(io_base); + + irq = object_property_get_int(OBJECT(d), "irq", &error_fatal); + + name = g_strdup_printf("%s@i%x", qdev_fw_name(DEVICE(d)), io_base); + node = fdt_add_subnode(fdt, lpc_off, name); + _FDT(node); + g_free(name); + + _FDT((fdt_setprop(fdt, node, "reg", io_regs, sizeof(io_regs)))); + _FDT((fdt_setprop(fdt, node, "compatible", compatible, + sizeof(compatible)))); + + /* Mark it as reserved to avoid Linux trying to claim it */ + _FDT((fdt_setprop_string(fdt, node, "status", "reserved"))); + _FDT((fdt_setprop_cell(fdt, node, "interrupts", irq))); + _FDT((fdt_setprop_cell(fdt, node, "interrupt-parent", + fdt_get_phandle(fdt, lpc_off)))); +} + +typedef struct ForeachPopulateArgs { + void *fdt; + int offset; +} ForeachPopulateArgs; + +static int pnv_dt_isa_device(DeviceState *dev, void *opaque) +{ + ForeachPopulateArgs *args = opaque; + ISADevice *d = ISA_DEVICE(dev); + + if (object_dynamic_cast(OBJECT(dev), TYPE_MC146818_RTC)) { + pnv_dt_rtc(d, args->fdt, args->offset); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_ISA_SERIAL)) { + pnv_dt_serial(d, args->fdt, args->offset); + } else if (object_dynamic_cast(OBJECT(dev), "isa-ipmi-bt")) { + pnv_dt_ipmi_bt(d, args->fdt, args->offset); + } else { + error_report("unknown isa device %s@i%x", qdev_fw_name(dev), + d->ioport_id); + } + + return 0; +} + +/* + * The default LPC bus of a multichip system is on chip 0. It's + * recognized by the firmware (skiboot) using a "primary" property. + */ +static void pnv_dt_isa(PnvMachineState *pnv, void *fdt) +{ + int isa_offset = fdt_path_offset(fdt, pnv->chips[0]->dt_isa_nodename); + ForeachPopulateArgs args = { + .fdt = fdt, + .offset = isa_offset, + }; + uint32_t phandle; + + _FDT((fdt_setprop(fdt, isa_offset, "primary", NULL, 0))); + + phandle = qemu_fdt_alloc_phandle(fdt); + assert(phandle > 0); + _FDT((fdt_setprop_cell(fdt, isa_offset, "phandle", phandle))); + + /* + * ISA devices are not necessarily parented to the ISA bus so we + * can not use object_child_foreach() + */ + qbus_walk_children(BUS(pnv->isa_bus), pnv_dt_isa_device, NULL, NULL, NULL, + &args); +} + +static void pnv_dt_power_mgt(PnvMachineState *pnv, void *fdt) +{ + int off; + + off = fdt_add_subnode(fdt, 0, "ibm,opal"); + off = fdt_add_subnode(fdt, off, "power-mgt"); + + _FDT(fdt_setprop_cell(fdt, off, "ibm,enabled-stop-levels", 0xc0000000)); +} + +static void *pnv_dt_create(MachineState *machine) +{ + PnvMachineClass *pmc = PNV_MACHINE_GET_CLASS(machine); + PnvMachineState *pnv = PNV_MACHINE(machine); + void *fdt; + char *buf; + int off; + int i; + + fdt = g_malloc0(FDT_MAX_SIZE); + _FDT((fdt_create_empty_tree(fdt, FDT_MAX_SIZE))); + + /* /qemu node */ + _FDT((fdt_add_subnode(fdt, 0, "qemu"))); + + /* Root node */ + _FDT((fdt_setprop_cell(fdt, 0, "#address-cells", 0x2))); + _FDT((fdt_setprop_cell(fdt, 0, "#size-cells", 0x2))); + _FDT((fdt_setprop_string(fdt, 0, "model", + "IBM PowerNV (emulated by qemu)"))); + _FDT((fdt_setprop(fdt, 0, "compatible", pmc->compat, pmc->compat_size))); + + buf = qemu_uuid_unparse_strdup(&qemu_uuid); + _FDT((fdt_setprop_string(fdt, 0, "vm,uuid", buf))); + if (qemu_uuid_set) { + _FDT((fdt_property_string(fdt, "system-id", buf))); + } + g_free(buf); + + off = fdt_add_subnode(fdt, 0, "chosen"); + if (machine->kernel_cmdline) { + _FDT((fdt_setprop_string(fdt, off, "bootargs", + machine->kernel_cmdline))); + } + + if (pnv->initrd_size) { + uint32_t start_prop = cpu_to_be32(pnv->initrd_base); + uint32_t end_prop = cpu_to_be32(pnv->initrd_base + pnv->initrd_size); + + _FDT((fdt_setprop(fdt, off, "linux,initrd-start", + &start_prop, sizeof(start_prop)))); + _FDT((fdt_setprop(fdt, off, "linux,initrd-end", + &end_prop, sizeof(end_prop)))); + } + + /* Populate device tree for each chip */ + for (i = 0; i < pnv->num_chips; i++) { + PNV_CHIP_GET_CLASS(pnv->chips[i])->dt_populate(pnv->chips[i], fdt); + } + + /* Populate ISA devices on chip 0 */ + pnv_dt_isa(pnv, fdt); + + if (pnv->bmc) { + pnv_dt_bmc_sensors(pnv->bmc, fdt); + } + + /* Create an extra node for power management on machines that support it */ + if (pmc->dt_power_mgt) { + pmc->dt_power_mgt(pnv, fdt); + } + + return fdt; +} + +static void pnv_powerdown_notify(Notifier *n, void *opaque) +{ + PnvMachineState *pnv = container_of(n, PnvMachineState, powerdown_notifier); + + if (pnv->bmc) { + pnv_bmc_powerdown(pnv->bmc); + } +} + +static void pnv_reset(MachineState *machine) +{ + PnvMachineState *pnv = PNV_MACHINE(machine); + IPMIBmc *bmc; + void *fdt; + + qemu_devices_reset(); + + /* + * The machine should provide by default an internal BMC simulator. + * If not, try to use the BMC device that was provided on the command + * line. + */ + bmc = pnv_bmc_find(&error_fatal); + if (!pnv->bmc) { + if (!bmc) { + if (!qtest_enabled()) { + warn_report("machine has no BMC device. Use '-device " + "ipmi-bmc-sim,id=bmc0 -device isa-ipmi-bt,bmc=bmc0,irq=10' " + "to define one"); + } + } else { + pnv_bmc_set_pnor(bmc, pnv->pnor); + pnv->bmc = bmc; + } + } + + fdt = pnv_dt_create(machine); + + /* Pack resulting tree */ + _FDT((fdt_pack(fdt))); + + qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt)); + cpu_physical_memory_write(PNV_FDT_ADDR, fdt, fdt_totalsize(fdt)); + + g_free(fdt); +} + +static ISABus *pnv_chip_power8_isa_create(PnvChip *chip, Error **errp) +{ + Pnv8Chip *chip8 = PNV8_CHIP(chip); + return pnv_lpc_isa_create(&chip8->lpc, true, errp); +} + +static ISABus *pnv_chip_power8nvl_isa_create(PnvChip *chip, Error **errp) +{ + Pnv8Chip *chip8 = PNV8_CHIP(chip); + return pnv_lpc_isa_create(&chip8->lpc, false, errp); +} + +static ISABus *pnv_chip_power9_isa_create(PnvChip *chip, Error **errp) +{ + Pnv9Chip *chip9 = PNV9_CHIP(chip); + return pnv_lpc_isa_create(&chip9->lpc, false, errp); +} + +static ISABus *pnv_chip_power10_isa_create(PnvChip *chip, Error **errp) +{ + Pnv10Chip *chip10 = PNV10_CHIP(chip); + return pnv_lpc_isa_create(&chip10->lpc, false, errp); +} + +static ISABus *pnv_isa_create(PnvChip *chip, Error **errp) +{ + return PNV_CHIP_GET_CLASS(chip)->isa_create(chip, errp); +} + +static void pnv_chip_power8_pic_print_info(PnvChip *chip, Monitor *mon) +{ + Pnv8Chip *chip8 = PNV8_CHIP(chip); + int i; + + ics_pic_print_info(&chip8->psi.ics, mon); + for (i = 0; i < chip->num_phbs; i++) { + pnv_phb3_msi_pic_print_info(&chip8->phbs[i].msis, mon); + ics_pic_print_info(&chip8->phbs[i].lsis, mon); + } +} + +static void pnv_chip_power9_pic_print_info(PnvChip *chip, Monitor *mon) +{ + Pnv9Chip *chip9 = PNV9_CHIP(chip); + int i, j; + + pnv_xive_pic_print_info(&chip9->xive, mon); + pnv_psi_pic_print_info(&chip9->psi, mon); + + for (i = 0; i < PNV9_CHIP_MAX_PEC; i++) { + PnvPhb4PecState *pec = &chip9->pecs[i]; + for (j = 0; j < pec->num_stacks; j++) { + pnv_phb4_pic_print_info(&pec->stacks[j].phb, mon); + } + } +} + +static uint64_t pnv_chip_power8_xscom_core_base(PnvChip *chip, + uint32_t core_id) +{ + return PNV_XSCOM_EX_BASE(core_id); +} + +static uint64_t pnv_chip_power9_xscom_core_base(PnvChip *chip, + uint32_t core_id) +{ + return PNV9_XSCOM_EC_BASE(core_id); +} + +static uint64_t pnv_chip_power10_xscom_core_base(PnvChip *chip, + uint32_t core_id) +{ + return PNV10_XSCOM_EC_BASE(core_id); +} + +static bool pnv_match_cpu(const char *default_type, const char *cpu_type) +{ + PowerPCCPUClass *ppc_default = + POWERPC_CPU_CLASS(object_class_by_name(default_type)); + PowerPCCPUClass *ppc = + POWERPC_CPU_CLASS(object_class_by_name(cpu_type)); + + return ppc_default->pvr_match(ppc_default, ppc->pvr); +} + +static void pnv_ipmi_bt_init(ISABus *bus, IPMIBmc *bmc, uint32_t irq) +{ + ISADevice *dev = isa_new("isa-ipmi-bt"); + + object_property_set_link(OBJECT(dev), "bmc", OBJECT(bmc), &error_fatal); + object_property_set_int(OBJECT(dev), "irq", irq, &error_fatal); + isa_realize_and_unref(dev, bus, &error_fatal); +} + +static void pnv_chip_power10_pic_print_info(PnvChip *chip, Monitor *mon) +{ + Pnv10Chip *chip10 = PNV10_CHIP(chip); + + pnv_psi_pic_print_info(&chip10->psi, mon); +} + +/* Always give the first 1GB to chip 0 else we won't boot */ +static uint64_t pnv_chip_get_ram_size(PnvMachineState *pnv, int chip_id) +{ + MachineState *machine = MACHINE(pnv); + uint64_t ram_per_chip; + + assert(machine->ram_size >= 1 * GiB); + + ram_per_chip = machine->ram_size / pnv->num_chips; + if (ram_per_chip >= 1 * GiB) { + return QEMU_ALIGN_DOWN(ram_per_chip, 1 * MiB); + } + + assert(pnv->num_chips > 1); + + ram_per_chip = (machine->ram_size - 1 * GiB) / (pnv->num_chips - 1); + return chip_id == 0 ? 1 * GiB : QEMU_ALIGN_DOWN(ram_per_chip, 1 * MiB); +} + +static void pnv_init(MachineState *machine) +{ + const char *bios_name = machine->firmware ?: FW_FILE_NAME; + PnvMachineState *pnv = PNV_MACHINE(machine); + MachineClass *mc = MACHINE_GET_CLASS(machine); + char *fw_filename; + long fw_size; + uint64_t chip_ram_start = 0; + int i; + char *chip_typename; + DriveInfo *pnor = drive_get(IF_MTD, 0, 0); + DeviceState *dev; + + /* allocate RAM */ + if (machine->ram_size < mc->default_ram_size) { + char *sz = size_to_str(mc->default_ram_size); + error_report("Invalid RAM size, should be bigger than %s", sz); + g_free(sz); + exit(EXIT_FAILURE); + } + memory_region_add_subregion(get_system_memory(), 0, machine->ram); + + /* + * Create our simple PNOR device + */ + dev = qdev_new(TYPE_PNV_PNOR); + if (pnor) { + qdev_prop_set_drive(dev, "drive", blk_by_legacy_dinfo(pnor)); + } + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + pnv->pnor = PNV_PNOR(dev); + + /* load skiboot firmware */ + fw_filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name); + if (!fw_filename) { + error_report("Could not find OPAL firmware '%s'", bios_name); + exit(1); + } + + fw_size = load_image_targphys(fw_filename, pnv->fw_load_addr, FW_MAX_SIZE); + if (fw_size < 0) { + error_report("Could not load OPAL firmware '%s'", fw_filename); + exit(1); + } + g_free(fw_filename); + + /* load kernel */ + if (machine->kernel_filename) { + long kernel_size; + + kernel_size = load_image_targphys(machine->kernel_filename, + KERNEL_LOAD_ADDR, KERNEL_MAX_SIZE); + if (kernel_size < 0) { + error_report("Could not load kernel '%s'", + machine->kernel_filename); + exit(1); + } + } + + /* load initrd */ + if (machine->initrd_filename) { + pnv->initrd_base = INITRD_LOAD_ADDR; + pnv->initrd_size = load_image_targphys(machine->initrd_filename, + pnv->initrd_base, INITRD_MAX_SIZE); + if (pnv->initrd_size < 0) { + error_report("Could not load initial ram disk '%s'", + machine->initrd_filename); + exit(1); + } + } + + /* MSIs are supported on this platform */ + msi_nonbroken = true; + + /* + * Check compatibility of the specified CPU with the machine + * default. + */ + if (!pnv_match_cpu(mc->default_cpu_type, machine->cpu_type)) { + error_report("invalid CPU model '%s' for %s machine", + machine->cpu_type, mc->name); + exit(1); + } + + /* Create the processor chips */ + i = strlen(machine->cpu_type) - strlen(POWERPC_CPU_TYPE_SUFFIX); + chip_typename = g_strdup_printf(PNV_CHIP_TYPE_NAME("%.*s"), + i, machine->cpu_type); + if (!object_class_by_name(chip_typename)) { + error_report("invalid chip model '%.*s' for %s machine", + i, machine->cpu_type, mc->name); + exit(1); + } + + pnv->num_chips = + machine->smp.max_cpus / (machine->smp.cores * machine->smp.threads); + /* + * TODO: should we decide on how many chips we can create based + * on #cores and Venice vs. Murano vs. Naples chip type etc..., + */ + if (!is_power_of_2(pnv->num_chips) || pnv->num_chips > 16) { + error_report("invalid number of chips: '%d'", pnv->num_chips); + error_printf( + "Try '-smp sockets=N'. Valid values are : 1, 2, 4, 8 and 16.\n"); + exit(1); + } + + pnv->chips = g_new0(PnvChip *, pnv->num_chips); + for (i = 0; i < pnv->num_chips; i++) { + char chip_name[32]; + Object *chip = OBJECT(qdev_new(chip_typename)); + uint64_t chip_ram_size = pnv_chip_get_ram_size(pnv, i); + + pnv->chips[i] = PNV_CHIP(chip); + + /* Distribute RAM among the chips */ + object_property_set_int(chip, "ram-start", chip_ram_start, + &error_fatal); + object_property_set_int(chip, "ram-size", chip_ram_size, + &error_fatal); + chip_ram_start += chip_ram_size; + + snprintf(chip_name, sizeof(chip_name), "chip[%d]", i); + object_property_add_child(OBJECT(pnv), chip_name, chip); + object_property_set_int(chip, "chip-id", i, &error_fatal); + object_property_set_int(chip, "nr-cores", machine->smp.cores, + &error_fatal); + object_property_set_int(chip, "nr-threads", machine->smp.threads, + &error_fatal); + /* + * The POWER8 machine use the XICS interrupt interface. + * Propagate the XICS fabric to the chip and its controllers. + */ + if (object_dynamic_cast(OBJECT(pnv), TYPE_XICS_FABRIC)) { + object_property_set_link(chip, "xics", OBJECT(pnv), &error_abort); + } + if (object_dynamic_cast(OBJECT(pnv), TYPE_XIVE_FABRIC)) { + object_property_set_link(chip, "xive-fabric", OBJECT(pnv), + &error_abort); + } + sysbus_realize_and_unref(SYS_BUS_DEVICE(chip), &error_fatal); + } + g_free(chip_typename); + + /* Instantiate ISA bus on chip 0 */ + pnv->isa_bus = pnv_isa_create(pnv->chips[0], &error_fatal); + + /* Create serial port */ + serial_hds_isa_init(pnv->isa_bus, 0, MAX_ISA_SERIAL_PORTS); + + /* Create an RTC ISA device too */ + mc146818_rtc_init(pnv->isa_bus, 2000, NULL); + + /* + * Create the machine BMC simulator and the IPMI BT device for + * communication with the BMC + */ + if (defaults_enabled()) { + pnv->bmc = pnv_bmc_create(pnv->pnor); + pnv_ipmi_bt_init(pnv->isa_bus, pnv->bmc, 10); + } + + /* + * The PNOR is mapped on the LPC FW address space by the BMC. + * Since we can not reach the remote BMC machine with LPC memops, + * map it always for now. + */ + memory_region_add_subregion(pnv->chips[0]->fw_mr, PNOR_SPI_OFFSET, + &pnv->pnor->mmio); + + /* + * OpenPOWER systems use a IPMI SEL Event message to notify the + * host to powerdown + */ + pnv->powerdown_notifier.notify = pnv_powerdown_notify; + qemu_register_powerdown_notifier(&pnv->powerdown_notifier); +} + +/* + * 0:21 Reserved - Read as zeros + * 22:24 Chip ID + * 25:28 Core number + * 29:31 Thread ID + */ +static uint32_t pnv_chip_core_pir_p8(PnvChip *chip, uint32_t core_id) +{ + return (chip->chip_id << 7) | (core_id << 3); +} + +static void pnv_chip_power8_intc_create(PnvChip *chip, PowerPCCPU *cpu, + Error **errp) +{ + Pnv8Chip *chip8 = PNV8_CHIP(chip); + Error *local_err = NULL; + Object *obj; + PnvCPUState *pnv_cpu = pnv_cpu_state(cpu); + + obj = icp_create(OBJECT(cpu), TYPE_PNV_ICP, chip8->xics, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + pnv_cpu->intc = obj; +} + + +static void pnv_chip_power8_intc_reset(PnvChip *chip, PowerPCCPU *cpu) +{ + PnvCPUState *pnv_cpu = pnv_cpu_state(cpu); + + icp_reset(ICP(pnv_cpu->intc)); +} + +static void pnv_chip_power8_intc_destroy(PnvChip *chip, PowerPCCPU *cpu) +{ + PnvCPUState *pnv_cpu = pnv_cpu_state(cpu); + + icp_destroy(ICP(pnv_cpu->intc)); + pnv_cpu->intc = NULL; +} + +static void pnv_chip_power8_intc_print_info(PnvChip *chip, PowerPCCPU *cpu, + Monitor *mon) +{ + icp_pic_print_info(ICP(pnv_cpu_state(cpu)->intc), mon); +} + +/* + * 0:48 Reserved - Read as zeroes + * 49:52 Node ID + * 53:55 Chip ID + * 56 Reserved - Read as zero + * 57:61 Core number + * 62:63 Thread ID + * + * We only care about the lower bits. uint32_t is fine for the moment. + */ +static uint32_t pnv_chip_core_pir_p9(PnvChip *chip, uint32_t core_id) +{ + return (chip->chip_id << 8) | (core_id << 2); +} + +static uint32_t pnv_chip_core_pir_p10(PnvChip *chip, uint32_t core_id) +{ + return (chip->chip_id << 8) | (core_id << 2); +} + +static void pnv_chip_power9_intc_create(PnvChip *chip, PowerPCCPU *cpu, + Error **errp) +{ + Pnv9Chip *chip9 = PNV9_CHIP(chip); + Error *local_err = NULL; + Object *obj; + PnvCPUState *pnv_cpu = pnv_cpu_state(cpu); + + /* + * The core creates its interrupt presenter but the XIVE interrupt + * controller object is initialized afterwards. Hopefully, it's + * only used at runtime. + */ + obj = xive_tctx_create(OBJECT(cpu), XIVE_PRESENTER(&chip9->xive), + &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + pnv_cpu->intc = obj; +} + +static void pnv_chip_power9_intc_reset(PnvChip *chip, PowerPCCPU *cpu) +{ + PnvCPUState *pnv_cpu = pnv_cpu_state(cpu); + + xive_tctx_reset(XIVE_TCTX(pnv_cpu->intc)); +} + +static void pnv_chip_power9_intc_destroy(PnvChip *chip, PowerPCCPU *cpu) +{ + PnvCPUState *pnv_cpu = pnv_cpu_state(cpu); + + xive_tctx_destroy(XIVE_TCTX(pnv_cpu->intc)); + pnv_cpu->intc = NULL; +} + +static void pnv_chip_power9_intc_print_info(PnvChip *chip, PowerPCCPU *cpu, + Monitor *mon) +{ + xive_tctx_pic_print_info(XIVE_TCTX(pnv_cpu_state(cpu)->intc), mon); +} + +static void pnv_chip_power10_intc_create(PnvChip *chip, PowerPCCPU *cpu, + Error **errp) +{ + PnvCPUState *pnv_cpu = pnv_cpu_state(cpu); + + /* Will be defined when the interrupt controller is */ + pnv_cpu->intc = NULL; +} + +static void pnv_chip_power10_intc_reset(PnvChip *chip, PowerPCCPU *cpu) +{ + ; +} + +static void pnv_chip_power10_intc_destroy(PnvChip *chip, PowerPCCPU *cpu) +{ + PnvCPUState *pnv_cpu = pnv_cpu_state(cpu); + + pnv_cpu->intc = NULL; +} + +static void pnv_chip_power10_intc_print_info(PnvChip *chip, PowerPCCPU *cpu, + Monitor *mon) +{ +} + +/* + * Allowed core identifiers on a POWER8 Processor Chip : + * + * <EX0 reserved> + * EX1 - Venice only + * EX2 - Venice only + * EX3 - Venice only + * EX4 + * EX5 + * EX6 + * <EX7,8 reserved> <reserved> + * EX9 - Venice only + * EX10 - Venice only + * EX11 - Venice only + * EX12 + * EX13 + * EX14 + * <EX15 reserved> + */ +#define POWER8E_CORE_MASK (0x7070ull) +#define POWER8_CORE_MASK (0x7e7eull) + +/* + * POWER9 has 24 cores, ids starting at 0x0 + */ +#define POWER9_CORE_MASK (0xffffffffffffffull) + + +#define POWER10_CORE_MASK (0xffffffffffffffull) + +static void pnv_chip_power8_instance_init(Object *obj) +{ + PnvChip *chip = PNV_CHIP(obj); + Pnv8Chip *chip8 = PNV8_CHIP(obj); + PnvChipClass *pcc = PNV_CHIP_GET_CLASS(obj); + int i; + + object_property_add_link(obj, "xics", TYPE_XICS_FABRIC, + (Object **)&chip8->xics, + object_property_allow_set_link, + OBJ_PROP_LINK_STRONG); + + object_initialize_child(obj, "psi", &chip8->psi, TYPE_PNV8_PSI); + + object_initialize_child(obj, "lpc", &chip8->lpc, TYPE_PNV8_LPC); + + object_initialize_child(obj, "occ", &chip8->occ, TYPE_PNV8_OCC); + + object_initialize_child(obj, "homer", &chip8->homer, TYPE_PNV8_HOMER); + + for (i = 0; i < pcc->num_phbs; i++) { + object_initialize_child(obj, "phb[*]", &chip8->phbs[i], TYPE_PNV_PHB3); + } + + /* + * Number of PHBs is the chip default + */ + chip->num_phbs = pcc->num_phbs; +} + +static void pnv_chip_icp_realize(Pnv8Chip *chip8, Error **errp) + { + PnvChip *chip = PNV_CHIP(chip8); + PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip); + int i, j; + char *name; + + name = g_strdup_printf("icp-%x", chip->chip_id); + memory_region_init(&chip8->icp_mmio, OBJECT(chip), name, PNV_ICP_SIZE); + sysbus_init_mmio(SYS_BUS_DEVICE(chip), &chip8->icp_mmio); + g_free(name); + + sysbus_mmio_map(SYS_BUS_DEVICE(chip), 1, PNV_ICP_BASE(chip)); + + /* Map the ICP registers for each thread */ + for (i = 0; i < chip->nr_cores; i++) { + PnvCore *pnv_core = chip->cores[i]; + int core_hwid = CPU_CORE(pnv_core)->core_id; + + for (j = 0; j < CPU_CORE(pnv_core)->nr_threads; j++) { + uint32_t pir = pcc->core_pir(chip, core_hwid) + j; + PnvICPState *icp = PNV_ICP(xics_icp_get(chip8->xics, pir)); + + memory_region_add_subregion(&chip8->icp_mmio, pir << 12, + &icp->mmio); + } + } +} + +static void pnv_chip_power8_realize(DeviceState *dev, Error **errp) +{ + PnvChipClass *pcc = PNV_CHIP_GET_CLASS(dev); + PnvChip *chip = PNV_CHIP(dev); + Pnv8Chip *chip8 = PNV8_CHIP(dev); + Pnv8Psi *psi8 = &chip8->psi; + Error *local_err = NULL; + int i; + + assert(chip8->xics); + + /* XSCOM bridge is first */ + pnv_xscom_realize(chip, PNV_XSCOM_SIZE, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + sysbus_mmio_map(SYS_BUS_DEVICE(chip), 0, PNV_XSCOM_BASE(chip)); + + pcc->parent_realize(dev, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + /* Processor Service Interface (PSI) Host Bridge */ + object_property_set_int(OBJECT(&chip8->psi), "bar", PNV_PSIHB_BASE(chip), + &error_fatal); + object_property_set_link(OBJECT(&chip8->psi), ICS_PROP_XICS, + OBJECT(chip8->xics), &error_abort); + if (!qdev_realize(DEVICE(&chip8->psi), NULL, errp)) { + return; + } + pnv_xscom_add_subregion(chip, PNV_XSCOM_PSIHB_BASE, + &PNV_PSI(psi8)->xscom_regs); + + /* Create LPC controller */ + object_property_set_link(OBJECT(&chip8->lpc), "psi", OBJECT(&chip8->psi), + &error_abort); + qdev_realize(DEVICE(&chip8->lpc), NULL, &error_fatal); + pnv_xscom_add_subregion(chip, PNV_XSCOM_LPC_BASE, &chip8->lpc.xscom_regs); + + chip->fw_mr = &chip8->lpc.isa_fw; + chip->dt_isa_nodename = g_strdup_printf("/xscom@%" PRIx64 "/isa@%x", + (uint64_t) PNV_XSCOM_BASE(chip), + PNV_XSCOM_LPC_BASE); + + /* + * Interrupt Management Area. This is the memory region holding + * all the Interrupt Control Presenter (ICP) registers + */ + pnv_chip_icp_realize(chip8, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + /* Create the simplified OCC model */ + object_property_set_link(OBJECT(&chip8->occ), "psi", OBJECT(&chip8->psi), + &error_abort); + if (!qdev_realize(DEVICE(&chip8->occ), NULL, errp)) { + return; + } + pnv_xscom_add_subregion(chip, PNV_XSCOM_OCC_BASE, &chip8->occ.xscom_regs); + + /* OCC SRAM model */ + memory_region_add_subregion(get_system_memory(), PNV_OCC_SENSOR_BASE(chip), + &chip8->occ.sram_regs); + + /* HOMER */ + object_property_set_link(OBJECT(&chip8->homer), "chip", OBJECT(chip), + &error_abort); + if (!qdev_realize(DEVICE(&chip8->homer), NULL, errp)) { + return; + } + /* Homer Xscom region */ + pnv_xscom_add_subregion(chip, PNV_XSCOM_PBA_BASE, &chip8->homer.pba_regs); + + /* Homer mmio region */ + memory_region_add_subregion(get_system_memory(), PNV_HOMER_BASE(chip), + &chip8->homer.regs); + + /* PHB3 controllers */ + for (i = 0; i < chip->num_phbs; i++) { + PnvPHB3 *phb = &chip8->phbs[i]; + PnvPBCQState *pbcq = &phb->pbcq; + + object_property_set_int(OBJECT(phb), "index", i, &error_fatal); + object_property_set_int(OBJECT(phb), "chip-id", chip->chip_id, + &error_fatal); + if (!sysbus_realize(SYS_BUS_DEVICE(phb), errp)) { + return; + } + + /* Populate the XSCOM address space. */ + pnv_xscom_add_subregion(chip, + PNV_XSCOM_PBCQ_NEST_BASE + 0x400 * phb->phb_id, + &pbcq->xscom_nest_regs); + pnv_xscom_add_subregion(chip, + PNV_XSCOM_PBCQ_PCI_BASE + 0x400 * phb->phb_id, + &pbcq->xscom_pci_regs); + pnv_xscom_add_subregion(chip, + PNV_XSCOM_PBCQ_SPCI_BASE + 0x040 * phb->phb_id, + &pbcq->xscom_spci_regs); + } +} + +static uint32_t pnv_chip_power8_xscom_pcba(PnvChip *chip, uint64_t addr) +{ + addr &= (PNV_XSCOM_SIZE - 1); + return ((addr >> 4) & ~0xfull) | ((addr >> 3) & 0xf); +} + +static void pnv_chip_power8e_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PnvChipClass *k = PNV_CHIP_CLASS(klass); + + k->chip_cfam_id = 0x221ef04980000000ull; /* P8 Murano DD2.1 */ + k->cores_mask = POWER8E_CORE_MASK; + k->num_phbs = 3; + k->core_pir = pnv_chip_core_pir_p8; + k->intc_create = pnv_chip_power8_intc_create; + k->intc_reset = pnv_chip_power8_intc_reset; + k->intc_destroy = pnv_chip_power8_intc_destroy; + k->intc_print_info = pnv_chip_power8_intc_print_info; + k->isa_create = pnv_chip_power8_isa_create; + k->dt_populate = pnv_chip_power8_dt_populate; + k->pic_print_info = pnv_chip_power8_pic_print_info; + k->xscom_core_base = pnv_chip_power8_xscom_core_base; + k->xscom_pcba = pnv_chip_power8_xscom_pcba; + dc->desc = "PowerNV Chip POWER8E"; + + device_class_set_parent_realize(dc, pnv_chip_power8_realize, + &k->parent_realize); +} + +static void pnv_chip_power8_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PnvChipClass *k = PNV_CHIP_CLASS(klass); + + k->chip_cfam_id = 0x220ea04980000000ull; /* P8 Venice DD2.0 */ + k->cores_mask = POWER8_CORE_MASK; + k->num_phbs = 3; + k->core_pir = pnv_chip_core_pir_p8; + k->intc_create = pnv_chip_power8_intc_create; + k->intc_reset = pnv_chip_power8_intc_reset; + k->intc_destroy = pnv_chip_power8_intc_destroy; + k->intc_print_info = pnv_chip_power8_intc_print_info; + k->isa_create = pnv_chip_power8_isa_create; + k->dt_populate = pnv_chip_power8_dt_populate; + k->pic_print_info = pnv_chip_power8_pic_print_info; + k->xscom_core_base = pnv_chip_power8_xscom_core_base; + k->xscom_pcba = pnv_chip_power8_xscom_pcba; + dc->desc = "PowerNV Chip POWER8"; + + device_class_set_parent_realize(dc, pnv_chip_power8_realize, + &k->parent_realize); +} + +static void pnv_chip_power8nvl_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PnvChipClass *k = PNV_CHIP_CLASS(klass); + + k->chip_cfam_id = 0x120d304980000000ull; /* P8 Naples DD1.0 */ + k->cores_mask = POWER8_CORE_MASK; + k->num_phbs = 3; + k->core_pir = pnv_chip_core_pir_p8; + k->intc_create = pnv_chip_power8_intc_create; + k->intc_reset = pnv_chip_power8_intc_reset; + k->intc_destroy = pnv_chip_power8_intc_destroy; + k->intc_print_info = pnv_chip_power8_intc_print_info; + k->isa_create = pnv_chip_power8nvl_isa_create; + k->dt_populate = pnv_chip_power8_dt_populate; + k->pic_print_info = pnv_chip_power8_pic_print_info; + k->xscom_core_base = pnv_chip_power8_xscom_core_base; + k->xscom_pcba = pnv_chip_power8_xscom_pcba; + dc->desc = "PowerNV Chip POWER8NVL"; + + device_class_set_parent_realize(dc, pnv_chip_power8_realize, + &k->parent_realize); +} + +static void pnv_chip_power9_instance_init(Object *obj) +{ + PnvChip *chip = PNV_CHIP(obj); + Pnv9Chip *chip9 = PNV9_CHIP(obj); + PnvChipClass *pcc = PNV_CHIP_GET_CLASS(obj); + int i; + + object_initialize_child(obj, "xive", &chip9->xive, TYPE_PNV_XIVE); + object_property_add_alias(obj, "xive-fabric", OBJECT(&chip9->xive), + "xive-fabric"); + + object_initialize_child(obj, "psi", &chip9->psi, TYPE_PNV9_PSI); + + object_initialize_child(obj, "lpc", &chip9->lpc, TYPE_PNV9_LPC); + + object_initialize_child(obj, "occ", &chip9->occ, TYPE_PNV9_OCC); + + object_initialize_child(obj, "homer", &chip9->homer, TYPE_PNV9_HOMER); + + for (i = 0; i < PNV9_CHIP_MAX_PEC; i++) { + object_initialize_child(obj, "pec[*]", &chip9->pecs[i], + TYPE_PNV_PHB4_PEC); + } + + /* + * Number of PHBs is the chip default + */ + chip->num_phbs = pcc->num_phbs; +} + +static void pnv_chip_quad_realize(Pnv9Chip *chip9, Error **errp) +{ + PnvChip *chip = PNV_CHIP(chip9); + int i; + + chip9->nr_quads = DIV_ROUND_UP(chip->nr_cores, 4); + chip9->quads = g_new0(PnvQuad, chip9->nr_quads); + + for (i = 0; i < chip9->nr_quads; i++) { + char eq_name[32]; + PnvQuad *eq = &chip9->quads[i]; + PnvCore *pnv_core = chip->cores[i * 4]; + int core_id = CPU_CORE(pnv_core)->core_id; + + snprintf(eq_name, sizeof(eq_name), "eq[%d]", core_id); + object_initialize_child_with_props(OBJECT(chip), eq_name, eq, + sizeof(*eq), TYPE_PNV_QUAD, + &error_fatal, NULL); + + object_property_set_int(OBJECT(eq), "quad-id", core_id, &error_fatal); + qdev_realize(DEVICE(eq), NULL, &error_fatal); + + pnv_xscom_add_subregion(chip, PNV9_XSCOM_EQ_BASE(eq->quad_id), + &eq->xscom_regs); + } +} + +static void pnv_chip_power9_phb_realize(PnvChip *chip, Error **errp) +{ + Pnv9Chip *chip9 = PNV9_CHIP(chip); + int i, j; + int phb_id = 0; + + for (i = 0; i < PNV9_CHIP_MAX_PEC; i++) { + PnvPhb4PecState *pec = &chip9->pecs[i]; + PnvPhb4PecClass *pecc = PNV_PHB4_PEC_GET_CLASS(pec); + uint32_t pec_nest_base; + uint32_t pec_pci_base; + + object_property_set_int(OBJECT(pec), "index", i, &error_fatal); + /* + * PEC0 -> 1 stack + * PEC1 -> 2 stacks + * PEC2 -> 3 stacks + */ + object_property_set_int(OBJECT(pec), "num-stacks", i + 1, + &error_fatal); + object_property_set_int(OBJECT(pec), "chip-id", chip->chip_id, + &error_fatal); + object_property_set_link(OBJECT(pec), "system-memory", + OBJECT(get_system_memory()), &error_abort); + if (!qdev_realize(DEVICE(pec), NULL, errp)) { + return; + } + + pec_nest_base = pecc->xscom_nest_base(pec); + pec_pci_base = pecc->xscom_pci_base(pec); + + pnv_xscom_add_subregion(chip, pec_nest_base, &pec->nest_regs_mr); + pnv_xscom_add_subregion(chip, pec_pci_base, &pec->pci_regs_mr); + + for (j = 0; j < pec->num_stacks && phb_id < chip->num_phbs; + j++, phb_id++) { + PnvPhb4PecStack *stack = &pec->stacks[j]; + Object *obj = OBJECT(&stack->phb); + + object_property_set_int(obj, "index", phb_id, &error_fatal); + object_property_set_int(obj, "chip-id", chip->chip_id, + &error_fatal); + object_property_set_int(obj, "version", PNV_PHB4_VERSION, + &error_fatal); + object_property_set_int(obj, "device-id", PNV_PHB4_DEVICE_ID, + &error_fatal); + object_property_set_link(obj, "stack", OBJECT(stack), + &error_abort); + if (!sysbus_realize(SYS_BUS_DEVICE(obj), errp)) { + return; + } + + /* Populate the XSCOM address space. */ + pnv_xscom_add_subregion(chip, + pec_nest_base + 0x40 * (stack->stack_no + 1), + &stack->nest_regs_mr); + pnv_xscom_add_subregion(chip, + pec_pci_base + 0x40 * (stack->stack_no + 1), + &stack->pci_regs_mr); + pnv_xscom_add_subregion(chip, + pec_pci_base + PNV9_XSCOM_PEC_PCI_STK0 + + 0x40 * stack->stack_no, + &stack->phb_regs_mr); + } + } +} + +static void pnv_chip_power9_realize(DeviceState *dev, Error **errp) +{ + PnvChipClass *pcc = PNV_CHIP_GET_CLASS(dev); + Pnv9Chip *chip9 = PNV9_CHIP(dev); + PnvChip *chip = PNV_CHIP(dev); + Pnv9Psi *psi9 = &chip9->psi; + Error *local_err = NULL; + + /* XSCOM bridge is first */ + pnv_xscom_realize(chip, PNV9_XSCOM_SIZE, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + sysbus_mmio_map(SYS_BUS_DEVICE(chip), 0, PNV9_XSCOM_BASE(chip)); + + pcc->parent_realize(dev, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + pnv_chip_quad_realize(chip9, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + /* XIVE interrupt controller (POWER9) */ + object_property_set_int(OBJECT(&chip9->xive), "ic-bar", + PNV9_XIVE_IC_BASE(chip), &error_fatal); + object_property_set_int(OBJECT(&chip9->xive), "vc-bar", + PNV9_XIVE_VC_BASE(chip), &error_fatal); + object_property_set_int(OBJECT(&chip9->xive), "pc-bar", + PNV9_XIVE_PC_BASE(chip), &error_fatal); + object_property_set_int(OBJECT(&chip9->xive), "tm-bar", + PNV9_XIVE_TM_BASE(chip), &error_fatal); + object_property_set_link(OBJECT(&chip9->xive), "chip", OBJECT(chip), + &error_abort); + if (!sysbus_realize(SYS_BUS_DEVICE(&chip9->xive), errp)) { + return; + } + pnv_xscom_add_subregion(chip, PNV9_XSCOM_XIVE_BASE, + &chip9->xive.xscom_regs); + + /* Processor Service Interface (PSI) Host Bridge */ + object_property_set_int(OBJECT(&chip9->psi), "bar", PNV9_PSIHB_BASE(chip), + &error_fatal); + if (!qdev_realize(DEVICE(&chip9->psi), NULL, errp)) { + return; + } + pnv_xscom_add_subregion(chip, PNV9_XSCOM_PSIHB_BASE, + &PNV_PSI(psi9)->xscom_regs); + + /* LPC */ + object_property_set_link(OBJECT(&chip9->lpc), "psi", OBJECT(&chip9->psi), + &error_abort); + if (!qdev_realize(DEVICE(&chip9->lpc), NULL, errp)) { + return; + } + memory_region_add_subregion(get_system_memory(), PNV9_LPCM_BASE(chip), + &chip9->lpc.xscom_regs); + + chip->fw_mr = &chip9->lpc.isa_fw; + chip->dt_isa_nodename = g_strdup_printf("/lpcm-opb@%" PRIx64 "/lpc@0", + (uint64_t) PNV9_LPCM_BASE(chip)); + + /* Create the simplified OCC model */ + object_property_set_link(OBJECT(&chip9->occ), "psi", OBJECT(&chip9->psi), + &error_abort); + if (!qdev_realize(DEVICE(&chip9->occ), NULL, errp)) { + return; + } + pnv_xscom_add_subregion(chip, PNV9_XSCOM_OCC_BASE, &chip9->occ.xscom_regs); + + /* OCC SRAM model */ + memory_region_add_subregion(get_system_memory(), PNV9_OCC_SENSOR_BASE(chip), + &chip9->occ.sram_regs); + + /* HOMER */ + object_property_set_link(OBJECT(&chip9->homer), "chip", OBJECT(chip), + &error_abort); + if (!qdev_realize(DEVICE(&chip9->homer), NULL, errp)) { + return; + } + /* Homer Xscom region */ + pnv_xscom_add_subregion(chip, PNV9_XSCOM_PBA_BASE, &chip9->homer.pba_regs); + + /* Homer mmio region */ + memory_region_add_subregion(get_system_memory(), PNV9_HOMER_BASE(chip), + &chip9->homer.regs); + + /* PHBs */ + pnv_chip_power9_phb_realize(chip, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } +} + +static uint32_t pnv_chip_power9_xscom_pcba(PnvChip *chip, uint64_t addr) +{ + addr &= (PNV9_XSCOM_SIZE - 1); + return addr >> 3; +} + +static void pnv_chip_power9_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PnvChipClass *k = PNV_CHIP_CLASS(klass); + + k->chip_cfam_id = 0x220d104900008000ull; /* P9 Nimbus DD2.0 */ + k->cores_mask = POWER9_CORE_MASK; + k->core_pir = pnv_chip_core_pir_p9; + k->intc_create = pnv_chip_power9_intc_create; + k->intc_reset = pnv_chip_power9_intc_reset; + k->intc_destroy = pnv_chip_power9_intc_destroy; + k->intc_print_info = pnv_chip_power9_intc_print_info; + k->isa_create = pnv_chip_power9_isa_create; + k->dt_populate = pnv_chip_power9_dt_populate; + k->pic_print_info = pnv_chip_power9_pic_print_info; + k->xscom_core_base = pnv_chip_power9_xscom_core_base; + k->xscom_pcba = pnv_chip_power9_xscom_pcba; + dc->desc = "PowerNV Chip POWER9"; + k->num_phbs = 6; + + device_class_set_parent_realize(dc, pnv_chip_power9_realize, + &k->parent_realize); +} + +static void pnv_chip_power10_instance_init(Object *obj) +{ + Pnv10Chip *chip10 = PNV10_CHIP(obj); + + object_initialize_child(obj, "psi", &chip10->psi, TYPE_PNV10_PSI); + object_initialize_child(obj, "lpc", &chip10->lpc, TYPE_PNV10_LPC); +} + +static void pnv_chip_power10_realize(DeviceState *dev, Error **errp) +{ + PnvChipClass *pcc = PNV_CHIP_GET_CLASS(dev); + PnvChip *chip = PNV_CHIP(dev); + Pnv10Chip *chip10 = PNV10_CHIP(dev); + Error *local_err = NULL; + + /* XSCOM bridge is first */ + pnv_xscom_realize(chip, PNV10_XSCOM_SIZE, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + sysbus_mmio_map(SYS_BUS_DEVICE(chip), 0, PNV10_XSCOM_BASE(chip)); + + pcc->parent_realize(dev, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + /* Processor Service Interface (PSI) Host Bridge */ + object_property_set_int(OBJECT(&chip10->psi), "bar", + PNV10_PSIHB_BASE(chip), &error_fatal); + if (!qdev_realize(DEVICE(&chip10->psi), NULL, errp)) { + return; + } + pnv_xscom_add_subregion(chip, PNV10_XSCOM_PSIHB_BASE, + &PNV_PSI(&chip10->psi)->xscom_regs); + + /* LPC */ + object_property_set_link(OBJECT(&chip10->lpc), "psi", + OBJECT(&chip10->psi), &error_abort); + if (!qdev_realize(DEVICE(&chip10->lpc), NULL, errp)) { + return; + } + memory_region_add_subregion(get_system_memory(), PNV10_LPCM_BASE(chip), + &chip10->lpc.xscom_regs); + + chip->fw_mr = &chip10->lpc.isa_fw; + chip->dt_isa_nodename = g_strdup_printf("/lpcm-opb@%" PRIx64 "/lpc@0", + (uint64_t) PNV10_LPCM_BASE(chip)); +} + +static uint32_t pnv_chip_power10_xscom_pcba(PnvChip *chip, uint64_t addr) +{ + addr &= (PNV10_XSCOM_SIZE - 1); + return addr >> 3; +} + +static void pnv_chip_power10_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PnvChipClass *k = PNV_CHIP_CLASS(klass); + + k->chip_cfam_id = 0x120da04900008000ull; /* P10 DD1.0 (with NX) */ + k->cores_mask = POWER10_CORE_MASK; + k->core_pir = pnv_chip_core_pir_p10; + k->intc_create = pnv_chip_power10_intc_create; + k->intc_reset = pnv_chip_power10_intc_reset; + k->intc_destroy = pnv_chip_power10_intc_destroy; + k->intc_print_info = pnv_chip_power10_intc_print_info; + k->isa_create = pnv_chip_power10_isa_create; + k->dt_populate = pnv_chip_power10_dt_populate; + k->pic_print_info = pnv_chip_power10_pic_print_info; + k->xscom_core_base = pnv_chip_power10_xscom_core_base; + k->xscom_pcba = pnv_chip_power10_xscom_pcba; + dc->desc = "PowerNV Chip POWER10"; + + device_class_set_parent_realize(dc, pnv_chip_power10_realize, + &k->parent_realize); +} + +static void pnv_chip_core_sanitize(PnvChip *chip, Error **errp) +{ + PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip); + int cores_max; + + /* + * No custom mask for this chip, let's use the default one from * + * the chip class + */ + if (!chip->cores_mask) { + chip->cores_mask = pcc->cores_mask; + } + + /* filter alien core ids ! some are reserved */ + if ((chip->cores_mask & pcc->cores_mask) != chip->cores_mask) { + error_setg(errp, "warning: invalid core mask for chip Ox%"PRIx64" !", + chip->cores_mask); + return; + } + chip->cores_mask &= pcc->cores_mask; + + /* now that we have a sane layout, let check the number of cores */ + cores_max = ctpop64(chip->cores_mask); + if (chip->nr_cores > cores_max) { + error_setg(errp, "warning: too many cores for chip ! Limit is %d", + cores_max); + return; + } +} + +static void pnv_chip_core_realize(PnvChip *chip, Error **errp) +{ + Error *error = NULL; + PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip); + const char *typename = pnv_chip_core_typename(chip); + int i, core_hwid; + PnvMachineState *pnv = PNV_MACHINE(qdev_get_machine()); + + if (!object_class_by_name(typename)) { + error_setg(errp, "Unable to find PowerNV CPU Core '%s'", typename); + return; + } + + /* Cores */ + pnv_chip_core_sanitize(chip, &error); + if (error) { + error_propagate(errp, error); + return; + } + + chip->cores = g_new0(PnvCore *, chip->nr_cores); + + for (i = 0, core_hwid = 0; (core_hwid < sizeof(chip->cores_mask) * 8) + && (i < chip->nr_cores); core_hwid++) { + char core_name[32]; + PnvCore *pnv_core; + uint64_t xscom_core_base; + + if (!(chip->cores_mask & (1ull << core_hwid))) { + continue; + } + + pnv_core = PNV_CORE(object_new(typename)); + + snprintf(core_name, sizeof(core_name), "core[%d]", core_hwid); + object_property_add_child(OBJECT(chip), core_name, OBJECT(pnv_core)); + chip->cores[i] = pnv_core; + object_property_set_int(OBJECT(pnv_core), "nr-threads", + chip->nr_threads, &error_fatal); + object_property_set_int(OBJECT(pnv_core), CPU_CORE_PROP_CORE_ID, + core_hwid, &error_fatal); + object_property_set_int(OBJECT(pnv_core), "pir", + pcc->core_pir(chip, core_hwid), &error_fatal); + object_property_set_int(OBJECT(pnv_core), "hrmor", pnv->fw_load_addr, + &error_fatal); + object_property_set_link(OBJECT(pnv_core), "chip", OBJECT(chip), + &error_abort); + qdev_realize(DEVICE(pnv_core), NULL, &error_fatal); + + /* Each core has an XSCOM MMIO region */ + xscom_core_base = pcc->xscom_core_base(chip, core_hwid); + + pnv_xscom_add_subregion(chip, xscom_core_base, + &pnv_core->xscom_regs); + i++; + } +} + +static void pnv_chip_realize(DeviceState *dev, Error **errp) +{ + PnvChip *chip = PNV_CHIP(dev); + Error *error = NULL; + + /* Cores */ + pnv_chip_core_realize(chip, &error); + if (error) { + error_propagate(errp, error); + return; + } +} + +static Property pnv_chip_properties[] = { + DEFINE_PROP_UINT32("chip-id", PnvChip, chip_id, 0), + DEFINE_PROP_UINT64("ram-start", PnvChip, ram_start, 0), + DEFINE_PROP_UINT64("ram-size", PnvChip, ram_size, 0), + DEFINE_PROP_UINT32("nr-cores", PnvChip, nr_cores, 1), + DEFINE_PROP_UINT64("cores-mask", PnvChip, cores_mask, 0x0), + DEFINE_PROP_UINT32("nr-threads", PnvChip, nr_threads, 1), + DEFINE_PROP_UINT32("num-phbs", PnvChip, num_phbs, 0), + DEFINE_PROP_END_OF_LIST(), +}; + +static void pnv_chip_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + set_bit(DEVICE_CATEGORY_CPU, dc->categories); + dc->realize = pnv_chip_realize; + device_class_set_props(dc, pnv_chip_properties); + dc->desc = "PowerNV Chip"; +} + +PowerPCCPU *pnv_chip_find_cpu(PnvChip *chip, uint32_t pir) +{ + int i, j; + + for (i = 0; i < chip->nr_cores; i++) { + PnvCore *pc = chip->cores[i]; + CPUCore *cc = CPU_CORE(pc); + + for (j = 0; j < cc->nr_threads; j++) { + if (ppc_cpu_pir(pc->threads[j]) == pir) { + return pc->threads[j]; + } + } + } + return NULL; +} + +static ICSState *pnv_ics_get(XICSFabric *xi, int irq) +{ + PnvMachineState *pnv = PNV_MACHINE(xi); + int i, j; + + for (i = 0; i < pnv->num_chips; i++) { + PnvChip *chip = pnv->chips[i]; + Pnv8Chip *chip8 = PNV8_CHIP(pnv->chips[i]); + + if (ics_valid_irq(&chip8->psi.ics, irq)) { + return &chip8->psi.ics; + } + for (j = 0; j < chip->num_phbs; j++) { + if (ics_valid_irq(&chip8->phbs[j].lsis, irq)) { + return &chip8->phbs[j].lsis; + } + if (ics_valid_irq(ICS(&chip8->phbs[j].msis), irq)) { + return ICS(&chip8->phbs[j].msis); + } + } + } + return NULL; +} + +static void pnv_ics_resend(XICSFabric *xi) +{ + PnvMachineState *pnv = PNV_MACHINE(xi); + int i, j; + + for (i = 0; i < pnv->num_chips; i++) { + PnvChip *chip = pnv->chips[i]; + Pnv8Chip *chip8 = PNV8_CHIP(pnv->chips[i]); + + ics_resend(&chip8->psi.ics); + for (j = 0; j < chip->num_phbs; j++) { + ics_resend(&chip8->phbs[j].lsis); + ics_resend(ICS(&chip8->phbs[j].msis)); + } + } +} + +static ICPState *pnv_icp_get(XICSFabric *xi, int pir) +{ + PowerPCCPU *cpu = ppc_get_vcpu_by_pir(pir); + + return cpu ? ICP(pnv_cpu_state(cpu)->intc) : NULL; +} + +static void pnv_pic_print_info(InterruptStatsProvider *obj, + Monitor *mon) +{ + PnvMachineState *pnv = PNV_MACHINE(obj); + int i; + CPUState *cs; + + CPU_FOREACH(cs) { + PowerPCCPU *cpu = POWERPC_CPU(cs); + + /* XXX: loop on each chip/core/thread instead of CPU_FOREACH() */ + PNV_CHIP_GET_CLASS(pnv->chips[0])->intc_print_info(pnv->chips[0], cpu, + mon); + } + + for (i = 0; i < pnv->num_chips; i++) { + PNV_CHIP_GET_CLASS(pnv->chips[i])->pic_print_info(pnv->chips[i], mon); + } +} + +static int pnv_match_nvt(XiveFabric *xfb, uint8_t format, + uint8_t nvt_blk, uint32_t nvt_idx, + bool cam_ignore, uint8_t priority, + uint32_t logic_serv, + XiveTCTXMatch *match) +{ + PnvMachineState *pnv = PNV_MACHINE(xfb); + int total_count = 0; + int i; + + for (i = 0; i < pnv->num_chips; i++) { + Pnv9Chip *chip9 = PNV9_CHIP(pnv->chips[i]); + XivePresenter *xptr = XIVE_PRESENTER(&chip9->xive); + XivePresenterClass *xpc = XIVE_PRESENTER_GET_CLASS(xptr); + int count; + + count = xpc->match_nvt(xptr, format, nvt_blk, nvt_idx, cam_ignore, + priority, logic_serv, match); + + if (count < 0) { + return count; + } + + total_count += count; + } + + return total_count; +} + +static void pnv_machine_power8_class_init(ObjectClass *oc, void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + XICSFabricClass *xic = XICS_FABRIC_CLASS(oc); + PnvMachineClass *pmc = PNV_MACHINE_CLASS(oc); + static const char compat[] = "qemu,powernv8\0qemu,powernv\0ibm,powernv"; + + mc->desc = "IBM PowerNV (Non-Virtualized) POWER8"; + mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power8_v2.0"); + + xic->icp_get = pnv_icp_get; + xic->ics_get = pnv_ics_get; + xic->ics_resend = pnv_ics_resend; + + pmc->compat = compat; + pmc->compat_size = sizeof(compat); +} + +static void pnv_machine_power9_class_init(ObjectClass *oc, void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + XiveFabricClass *xfc = XIVE_FABRIC_CLASS(oc); + PnvMachineClass *pmc = PNV_MACHINE_CLASS(oc); + static const char compat[] = "qemu,powernv9\0ibm,powernv"; + + mc->desc = "IBM PowerNV (Non-Virtualized) POWER9"; + mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power9_v2.0"); + xfc->match_nvt = pnv_match_nvt; + + mc->alias = "powernv"; + + pmc->compat = compat; + pmc->compat_size = sizeof(compat); + pmc->dt_power_mgt = pnv_dt_power_mgt; +} + +static void pnv_machine_power10_class_init(ObjectClass *oc, void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + PnvMachineClass *pmc = PNV_MACHINE_CLASS(oc); + static const char compat[] = "qemu,powernv10\0ibm,powernv"; + + mc->desc = "IBM PowerNV (Non-Virtualized) POWER10"; + mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power10_v2.0"); + + pmc->compat = compat; + pmc->compat_size = sizeof(compat); + pmc->dt_power_mgt = pnv_dt_power_mgt; +} + +static bool pnv_machine_get_hb(Object *obj, Error **errp) +{ + PnvMachineState *pnv = PNV_MACHINE(obj); + + return !!pnv->fw_load_addr; +} + +static void pnv_machine_set_hb(Object *obj, bool value, Error **errp) +{ + PnvMachineState *pnv = PNV_MACHINE(obj); + + if (value) { + pnv->fw_load_addr = 0x8000000; + } +} + +static void pnv_cpu_do_nmi_on_cpu(CPUState *cs, run_on_cpu_data arg) +{ + PowerPCCPU *cpu = POWERPC_CPU(cs); + CPUPPCState *env = &cpu->env; + + cpu_synchronize_state(cs); + ppc_cpu_do_system_reset(cs); + if (env->spr[SPR_SRR1] & SRR1_WAKESTATE) { + /* + * Power-save wakeups, as indicated by non-zero SRR1[46:47] put the + * wakeup reason in SRR1[42:45], system reset is indicated with 0b0100 + * (PPC_BIT(43)). + */ + if (!(env->spr[SPR_SRR1] & SRR1_WAKERESET)) { + warn_report("ppc_cpu_do_system_reset does not set system reset wakeup reason"); + env->spr[SPR_SRR1] |= SRR1_WAKERESET; + } + } else { + /* + * For non-powersave system resets, SRR1[42:45] are defined to be + * implementation-dependent. The POWER9 User Manual specifies that + * an external (SCOM driven, which may come from a BMC nmi command or + * another CPU requesting a NMI IPI) system reset exception should be + * 0b0010 (PPC_BIT(44)). + */ + env->spr[SPR_SRR1] |= SRR1_WAKESCOM; + } +} + +static void pnv_nmi(NMIState *n, int cpu_index, Error **errp) +{ + CPUState *cs; + + CPU_FOREACH(cs) { + async_run_on_cpu(cs, pnv_cpu_do_nmi_on_cpu, RUN_ON_CPU_NULL); + } +} + +static void pnv_machine_class_init(ObjectClass *oc, void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + InterruptStatsProviderClass *ispc = INTERRUPT_STATS_PROVIDER_CLASS(oc); + NMIClass *nc = NMI_CLASS(oc); + + mc->desc = "IBM PowerNV (Non-Virtualized)"; + mc->init = pnv_init; + mc->reset = pnv_reset; + mc->max_cpus = MAX_CPUS; + /* Pnv provides a AHCI device for storage */ + mc->block_default_type = IF_IDE; + mc->no_parallel = 1; + mc->default_boot_order = NULL; + /* + * RAM defaults to less than 2048 for 32-bit hosts, and large + * enough to fit the maximum initrd size at it's load address + */ + mc->default_ram_size = 1 * GiB; + mc->default_ram_id = "pnv.ram"; + ispc->print_info = pnv_pic_print_info; + nc->nmi_monitor_handler = pnv_nmi; + + object_class_property_add_bool(oc, "hb-mode", + pnv_machine_get_hb, pnv_machine_set_hb); + object_class_property_set_description(oc, "hb-mode", + "Use a hostboot like boot loader"); +} + +#define DEFINE_PNV8_CHIP_TYPE(type, class_initfn) \ + { \ + .name = type, \ + .class_init = class_initfn, \ + .parent = TYPE_PNV8_CHIP, \ + } + +#define DEFINE_PNV9_CHIP_TYPE(type, class_initfn) \ + { \ + .name = type, \ + .class_init = class_initfn, \ + .parent = TYPE_PNV9_CHIP, \ + } + +#define DEFINE_PNV10_CHIP_TYPE(type, class_initfn) \ + { \ + .name = type, \ + .class_init = class_initfn, \ + .parent = TYPE_PNV10_CHIP, \ + } + +static const TypeInfo types[] = { + { + .name = MACHINE_TYPE_NAME("powernv10"), + .parent = TYPE_PNV_MACHINE, + .class_init = pnv_machine_power10_class_init, + }, + { + .name = MACHINE_TYPE_NAME("powernv9"), + .parent = TYPE_PNV_MACHINE, + .class_init = pnv_machine_power9_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_XIVE_FABRIC }, + { }, + }, + }, + { + .name = MACHINE_TYPE_NAME("powernv8"), + .parent = TYPE_PNV_MACHINE, + .class_init = pnv_machine_power8_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_XICS_FABRIC }, + { }, + }, + }, + { + .name = TYPE_PNV_MACHINE, + .parent = TYPE_MACHINE, + .abstract = true, + .instance_size = sizeof(PnvMachineState), + .class_init = pnv_machine_class_init, + .class_size = sizeof(PnvMachineClass), + .interfaces = (InterfaceInfo[]) { + { TYPE_INTERRUPT_STATS_PROVIDER }, + { TYPE_NMI }, + { }, + }, + }, + { + .name = TYPE_PNV_CHIP, + .parent = TYPE_SYS_BUS_DEVICE, + .class_init = pnv_chip_class_init, + .instance_size = sizeof(PnvChip), + .class_size = sizeof(PnvChipClass), + .abstract = true, + }, + + /* + * P10 chip and variants + */ + { + .name = TYPE_PNV10_CHIP, + .parent = TYPE_PNV_CHIP, + .instance_init = pnv_chip_power10_instance_init, + .instance_size = sizeof(Pnv10Chip), + }, + DEFINE_PNV10_CHIP_TYPE(TYPE_PNV_CHIP_POWER10, pnv_chip_power10_class_init), + + /* + * P9 chip and variants + */ + { + .name = TYPE_PNV9_CHIP, + .parent = TYPE_PNV_CHIP, + .instance_init = pnv_chip_power9_instance_init, + .instance_size = sizeof(Pnv9Chip), + }, + DEFINE_PNV9_CHIP_TYPE(TYPE_PNV_CHIP_POWER9, pnv_chip_power9_class_init), + + /* + * P8 chip and variants + */ + { + .name = TYPE_PNV8_CHIP, + .parent = TYPE_PNV_CHIP, + .instance_init = pnv_chip_power8_instance_init, + .instance_size = sizeof(Pnv8Chip), + }, + DEFINE_PNV8_CHIP_TYPE(TYPE_PNV_CHIP_POWER8, pnv_chip_power8_class_init), + DEFINE_PNV8_CHIP_TYPE(TYPE_PNV_CHIP_POWER8E, pnv_chip_power8e_class_init), + DEFINE_PNV8_CHIP_TYPE(TYPE_PNV_CHIP_POWER8NVL, + pnv_chip_power8nvl_class_init), +}; + +DEFINE_TYPES(types) diff --git a/hw/ppc/pnv_bmc.c b/hw/ppc/pnv_bmc.c new file mode 100644 index 000000000..75a22ce50 --- /dev/null +++ b/hw/ppc/pnv_bmc.c @@ -0,0 +1,313 @@ +/* + * QEMU PowerNV, BMC related functions + * + * Copyright (c) 2016-2017, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "qapi/error.h" +#include "target/ppc/cpu.h" +#include "qemu/log.h" +#include "hw/ipmi/ipmi.h" +#include "hw/ppc/fdt.h" + +#include "hw/ppc/pnv.h" + +#include <libfdt.h> + +/* TODO: include definition in ipmi.h */ +#define IPMI_SDR_FULL_TYPE 1 + +/* + * OEM SEL Event data packet sent by BMC in response of a Read Event + * Message Buffer command + */ +typedef struct OemSel { + /* SEL header */ + uint8_t id[2]; + uint8_t type; + uint8_t timestamp[4]; + uint8_t manuf_id[3]; + + /* OEM SEL data (6 bytes) follows */ + uint8_t netfun; + uint8_t cmd; + uint8_t data[4]; +} OemSel; + +#define SOFT_OFF 0x00 +#define SOFT_REBOOT 0x01 + +static bool pnv_bmc_is_simulator(IPMIBmc *bmc) +{ + return object_dynamic_cast(OBJECT(bmc), TYPE_IPMI_BMC_SIMULATOR); +} + +static void pnv_gen_oem_sel(IPMIBmc *bmc, uint8_t reboot) +{ + /* IPMI SEL Event are 16 bytes long */ + OemSel sel = { + .id = { 0x55 , 0x55 }, + .type = 0xC0, /* OEM */ + .manuf_id = { 0x0, 0x0, 0x0 }, + .timestamp = { 0x0, 0x0, 0x0, 0x0 }, + .netfun = 0x3A, /* IBM */ + .cmd = 0x04, /* AMI OEM SEL Power Notification */ + .data = { reboot, 0xFF, 0xFF, 0xFF }, + }; + + ipmi_bmc_gen_event(bmc, (uint8_t *) &sel, 0 /* do not log the event */); +} + +void pnv_bmc_powerdown(IPMIBmc *bmc) +{ + pnv_gen_oem_sel(bmc, SOFT_OFF); +} + +void pnv_dt_bmc_sensors(IPMIBmc *bmc, void *fdt) +{ + int offset; + int i; + const struct ipmi_sdr_compact *sdr; + uint16_t nextrec; + + if (!pnv_bmc_is_simulator(bmc)) { + return; + } + + offset = fdt_add_subnode(fdt, 0, "bmc"); + _FDT(offset); + + _FDT((fdt_setprop_string(fdt, offset, "name", "bmc"))); + offset = fdt_add_subnode(fdt, offset, "sensors"); + _FDT(offset); + + _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 0x1))); + _FDT((fdt_setprop_cell(fdt, offset, "#size-cells", 0x0))); + + for (i = 0; !ipmi_bmc_sdr_find(bmc, i, &sdr, &nextrec); i++) { + int off; + char *name; + + if (sdr->header.rec_type != IPMI_SDR_COMPACT_TYPE && + sdr->header.rec_type != IPMI_SDR_FULL_TYPE) { + continue; + } + + name = g_strdup_printf("sensor@%x", sdr->sensor_owner_number); + off = fdt_add_subnode(fdt, offset, name); + _FDT(off); + g_free(name); + + _FDT((fdt_setprop_cell(fdt, off, "reg", sdr->sensor_owner_number))); + _FDT((fdt_setprop_string(fdt, off, "name", "sensor"))); + _FDT((fdt_setprop_string(fdt, off, "compatible", "ibm,ipmi-sensor"))); + _FDT((fdt_setprop_cell(fdt, off, "ipmi-sensor-reading-type", + sdr->reading_type))); + _FDT((fdt_setprop_cell(fdt, off, "ipmi-entity-id", + sdr->entity_id))); + _FDT((fdt_setprop_cell(fdt, off, "ipmi-entity-instance", + sdr->entity_instance))); + _FDT((fdt_setprop_cell(fdt, off, "ipmi-sensor-type", + sdr->sensor_type))); + } +} + +/* + * HIOMAP protocol handler + */ +#define HIOMAP_C_RESET 1 +#define HIOMAP_C_GET_INFO 2 +#define HIOMAP_C_GET_FLASH_INFO 3 +#define HIOMAP_C_CREATE_READ_WINDOW 4 +#define HIOMAP_C_CLOSE_WINDOW 5 +#define HIOMAP_C_CREATE_WRITE_WINDOW 6 +#define HIOMAP_C_MARK_DIRTY 7 +#define HIOMAP_C_FLUSH 8 +#define HIOMAP_C_ACK 9 +#define HIOMAP_C_ERASE 10 +#define HIOMAP_C_DEVICE_NAME 11 +#define HIOMAP_C_LOCK 12 + +#define BLOCK_SHIFT 12 /* 4K */ + +static uint16_t bytes_to_blocks(uint32_t bytes) +{ + return bytes >> BLOCK_SHIFT; +} + +static uint32_t blocks_to_bytes(uint16_t blocks) +{ + return blocks << BLOCK_SHIFT; +} + +static int hiomap_erase(PnvPnor *pnor, uint32_t offset, uint32_t size) +{ + MemTxResult result; + int i; + + for (i = 0; i < size / 4; i++) { + result = memory_region_dispatch_write(&pnor->mmio, offset + i * 4, + 0xFFFFFFFF, MO_32, + MEMTXATTRS_UNSPECIFIED); + if (result != MEMTX_OK) { + return -1; + } + } + return 0; +} + +static void hiomap_cmd(IPMIBmcSim *ibs, uint8_t *cmd, unsigned int cmd_len, + RspBuffer *rsp) +{ + PnvPnor *pnor = PNV_PNOR(object_property_get_link(OBJECT(ibs), "pnor", + &error_abort)); + uint32_t pnor_size = pnor->size; + uint32_t pnor_addr = PNOR_SPI_OFFSET; + bool readonly = false; + + rsp_buffer_push(rsp, cmd[2]); + rsp_buffer_push(rsp, cmd[3]); + + switch (cmd[2]) { + case HIOMAP_C_MARK_DIRTY: + case HIOMAP_C_FLUSH: + case HIOMAP_C_ACK: + break; + + case HIOMAP_C_ERASE: + if (hiomap_erase(pnor, blocks_to_bytes(cmd[5] << 8 | cmd[4]), + blocks_to_bytes(cmd[7] << 8 | cmd[6]))) { + rsp_buffer_set_error(rsp, IPMI_CC_UNSPECIFIED); + } + break; + + case HIOMAP_C_GET_INFO: + rsp_buffer_push(rsp, 2); /* Version 2 */ + rsp_buffer_push(rsp, BLOCK_SHIFT); /* block size */ + rsp_buffer_push(rsp, 0); /* Timeout */ + rsp_buffer_push(rsp, 0); /* Timeout */ + break; + + case HIOMAP_C_GET_FLASH_INFO: + rsp_buffer_push(rsp, bytes_to_blocks(pnor_size) & 0xFF); + rsp_buffer_push(rsp, bytes_to_blocks(pnor_size) >> 8); + rsp_buffer_push(rsp, 0x01); /* erase size */ + rsp_buffer_push(rsp, 0x00); /* erase size */ + break; + + case HIOMAP_C_CREATE_READ_WINDOW: + readonly = true; + /* Fall through */ + + case HIOMAP_C_CREATE_WRITE_WINDOW: + memory_region_set_readonly(&pnor->mmio, readonly); + memory_region_set_enabled(&pnor->mmio, true); + + rsp_buffer_push(rsp, bytes_to_blocks(pnor_addr) & 0xFF); + rsp_buffer_push(rsp, bytes_to_blocks(pnor_addr) >> 8); + rsp_buffer_push(rsp, bytes_to_blocks(pnor_size) & 0xFF); + rsp_buffer_push(rsp, bytes_to_blocks(pnor_size) >> 8); + rsp_buffer_push(rsp, 0x00); /* offset */ + rsp_buffer_push(rsp, 0x00); /* offset */ + break; + + case HIOMAP_C_CLOSE_WINDOW: + memory_region_set_enabled(&pnor->mmio, false); + break; + + case HIOMAP_C_DEVICE_NAME: + case HIOMAP_C_RESET: + case HIOMAP_C_LOCK: + default: + qemu_log_mask(LOG_GUEST_ERROR, "HIOMAP: unknown command %02X\n", cmd[2]); + break; + } +} + +#define HIOMAP 0x5a + +static const IPMICmdHandler hiomap_cmds[] = { + [HIOMAP] = { hiomap_cmd, 3 }, +}; + +static const IPMINetfn hiomap_netfn = { + .cmd_nums = ARRAY_SIZE(hiomap_cmds), + .cmd_handlers = hiomap_cmds +}; + + +void pnv_bmc_set_pnor(IPMIBmc *bmc, PnvPnor *pnor) +{ + if (!pnv_bmc_is_simulator(bmc)) { + return; + } + + object_ref(OBJECT(pnor)); + object_property_add_const_link(OBJECT(bmc), "pnor", OBJECT(pnor)); + + /* Install the HIOMAP protocol handlers to access the PNOR */ + ipmi_sim_register_netfn(IPMI_BMC_SIMULATOR(bmc), IPMI_NETFN_OEM, + &hiomap_netfn); +} + +/* + * Instantiate the machine BMC. PowerNV uses the QEMU internal + * simulator but it could also be external. + */ +IPMIBmc *pnv_bmc_create(PnvPnor *pnor) +{ + Object *obj; + + obj = object_new(TYPE_IPMI_BMC_SIMULATOR); + qdev_realize(DEVICE(obj), NULL, &error_fatal); + pnv_bmc_set_pnor(IPMI_BMC(obj), pnor); + + return IPMI_BMC(obj); +} + +typedef struct ForeachArgs { + const char *name; + Object *obj; +} ForeachArgs; + +static int bmc_find(Object *child, void *opaque) +{ + ForeachArgs *args = opaque; + + if (object_dynamic_cast(child, args->name)) { + if (args->obj) { + return 1; + } + args->obj = child; + } + return 0; +} + +IPMIBmc *pnv_bmc_find(Error **errp) +{ + ForeachArgs args = { TYPE_IPMI_BMC, NULL }; + int ret; + + ret = object_child_foreach_recursive(object_get_root(), bmc_find, &args); + if (ret) { + error_setg(errp, "machine should have only one BMC device. " + "Use '-nodefaults'"); + return NULL; + } + + return args.obj ? IPMI_BMC(args.obj) : NULL; +} diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c new file mode 100644 index 000000000..19e8eb885 --- /dev/null +++ b/hw/ppc/pnv_core.c @@ -0,0 +1,441 @@ +/* + * QEMU PowerPC PowerNV CPU Core model + * + * Copyright (c) 2016, IBM Corporation. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "sysemu/reset.h" +#include "qapi/error.h" +#include "qemu/log.h" +#include "qemu/module.h" +#include "target/ppc/cpu.h" +#include "hw/ppc/ppc.h" +#include "hw/ppc/pnv.h" +#include "hw/ppc/pnv_core.h" +#include "hw/ppc/pnv_xscom.h" +#include "hw/ppc/xics.h" +#include "hw/qdev-properties.h" +#include "helper_regs.h" + +static const char *pnv_core_cpu_typename(PnvCore *pc) +{ + const char *core_type = object_class_get_name(object_get_class(OBJECT(pc))); + int len = strlen(core_type) - strlen(PNV_CORE_TYPE_SUFFIX); + char *s = g_strdup_printf(POWERPC_CPU_TYPE_NAME("%.*s"), len, core_type); + const char *cpu_type = object_class_get_name(object_class_by_name(s)); + g_free(s); + return cpu_type; +} + +static void pnv_core_cpu_reset(PnvCore *pc, PowerPCCPU *cpu) +{ + CPUState *cs = CPU(cpu); + CPUPPCState *env = &cpu->env; + PnvChipClass *pcc = PNV_CHIP_GET_CLASS(pc->chip); + + cpu_reset(cs); + + /* + * the skiboot firmware elects a primary thread to initialize the + * system and it can be any. + */ + env->gpr[3] = PNV_FDT_ADDR; + env->nip = 0x10; + env->msr |= MSR_HVB; /* Hypervisor mode */ + env->spr[SPR_HRMOR] = pc->hrmor; + hreg_compute_hflags(env); + + pcc->intc_reset(pc->chip, cpu); +} + +/* + * These values are read by the PowerNV HW monitors under Linux + */ +#define PNV_XSCOM_EX_DTS_RESULT0 0x50000 +#define PNV_XSCOM_EX_DTS_RESULT1 0x50001 + +static uint64_t pnv_core_power8_xscom_read(void *opaque, hwaddr addr, + unsigned int width) +{ + uint32_t offset = addr >> 3; + uint64_t val = 0; + + /* The result should be 38 C */ + switch (offset) { + case PNV_XSCOM_EX_DTS_RESULT0: + val = 0x26f024f023f0000ull; + break; + case PNV_XSCOM_EX_DTS_RESULT1: + val = 0x24f000000000000ull; + break; + default: + qemu_log_mask(LOG_UNIMP, "Warning: reading reg=0x%" HWADDR_PRIx "\n", + addr); + } + + return val; +} + +static void pnv_core_power8_xscom_write(void *opaque, hwaddr addr, uint64_t val, + unsigned int width) +{ + qemu_log_mask(LOG_UNIMP, "Warning: writing to reg=0x%" HWADDR_PRIx "\n", + addr); +} + +static const MemoryRegionOps pnv_core_power8_xscom_ops = { + .read = pnv_core_power8_xscom_read, + .write = pnv_core_power8_xscom_write, + .valid.min_access_size = 8, + .valid.max_access_size = 8, + .impl.min_access_size = 8, + .impl.max_access_size = 8, + .endianness = DEVICE_BIG_ENDIAN, +}; + + +/* + * POWER9 core controls + */ +#define PNV9_XSCOM_EC_PPM_SPECIAL_WKUP_HYP 0xf010d +#define PNV9_XSCOM_EC_PPM_SPECIAL_WKUP_OTR 0xf010a + +static uint64_t pnv_core_power9_xscom_read(void *opaque, hwaddr addr, + unsigned int width) +{ + uint32_t offset = addr >> 3; + uint64_t val = 0; + + /* The result should be 38 C */ + switch (offset) { + case PNV_XSCOM_EX_DTS_RESULT0: + val = 0x26f024f023f0000ull; + break; + case PNV_XSCOM_EX_DTS_RESULT1: + val = 0x24f000000000000ull; + break; + case PNV9_XSCOM_EC_PPM_SPECIAL_WKUP_HYP: + case PNV9_XSCOM_EC_PPM_SPECIAL_WKUP_OTR: + val = 0x0; + break; + default: + qemu_log_mask(LOG_UNIMP, "Warning: reading reg=0x%" HWADDR_PRIx "\n", + addr); + } + + return val; +} + +static void pnv_core_power9_xscom_write(void *opaque, hwaddr addr, uint64_t val, + unsigned int width) +{ + uint32_t offset = addr >> 3; + + switch (offset) { + case PNV9_XSCOM_EC_PPM_SPECIAL_WKUP_HYP: + case PNV9_XSCOM_EC_PPM_SPECIAL_WKUP_OTR: + break; + default: + qemu_log_mask(LOG_UNIMP, "Warning: writing to reg=0x%" HWADDR_PRIx "\n", + addr); + } +} + +static const MemoryRegionOps pnv_core_power9_xscom_ops = { + .read = pnv_core_power9_xscom_read, + .write = pnv_core_power9_xscom_write, + .valid.min_access_size = 8, + .valid.max_access_size = 8, + .impl.min_access_size = 8, + .impl.max_access_size = 8, + .endianness = DEVICE_BIG_ENDIAN, +}; + +static void pnv_core_cpu_realize(PnvCore *pc, PowerPCCPU *cpu, Error **errp) +{ + CPUPPCState *env = &cpu->env; + int core_pir; + int thread_index = 0; /* TODO: TCG supports only one thread */ + ppc_spr_t *pir = &env->spr_cb[SPR_PIR]; + Error *local_err = NULL; + PnvChipClass *pcc = PNV_CHIP_GET_CLASS(pc->chip); + + if (!qdev_realize(DEVICE(cpu), NULL, errp)) { + return; + } + + pcc->intc_create(pc->chip, cpu, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + core_pir = object_property_get_uint(OBJECT(pc), "pir", &error_abort); + + /* + * The PIR of a thread is the core PIR + the thread index. We will + * need to find a way to get the thread index when TCG supports + * more than 1. We could use the object name ? + */ + pir->default_value = core_pir + thread_index; + + /* Set time-base frequency to 512 MHz */ + cpu_ppc_tb_init(env, PNV_TIMEBASE_FREQ); +} + +static void pnv_core_reset(void *dev) +{ + CPUCore *cc = CPU_CORE(dev); + PnvCore *pc = PNV_CORE(dev); + int i; + + for (i = 0; i < cc->nr_threads; i++) { + pnv_core_cpu_reset(pc, pc->threads[i]); + } +} + +static void pnv_core_realize(DeviceState *dev, Error **errp) +{ + PnvCore *pc = PNV_CORE(OBJECT(dev)); + PnvCoreClass *pcc = PNV_CORE_GET_CLASS(pc); + CPUCore *cc = CPU_CORE(OBJECT(dev)); + const char *typename = pnv_core_cpu_typename(pc); + Error *local_err = NULL; + void *obj; + int i, j; + char name[32]; + + assert(pc->chip); + + pc->threads = g_new(PowerPCCPU *, cc->nr_threads); + for (i = 0; i < cc->nr_threads; i++) { + PowerPCCPU *cpu; + + obj = object_new(typename); + cpu = POWERPC_CPU(obj); + + pc->threads[i] = POWERPC_CPU(obj); + + snprintf(name, sizeof(name), "thread[%d]", i); + object_property_add_child(OBJECT(pc), name, obj); + + cpu->machine_data = g_new0(PnvCPUState, 1); + + object_unref(obj); + } + + for (j = 0; j < cc->nr_threads; j++) { + pnv_core_cpu_realize(pc, pc->threads[j], &local_err); + if (local_err) { + goto err; + } + } + + snprintf(name, sizeof(name), "xscom-core.%d", cc->core_id); + /* TODO: check PNV_XSCOM_EX_SIZE for p10 */ + pnv_xscom_region_init(&pc->xscom_regs, OBJECT(dev), pcc->xscom_ops, + pc, name, PNV_XSCOM_EX_SIZE); + + qemu_register_reset(pnv_core_reset, pc); + return; + +err: + while (--i >= 0) { + obj = OBJECT(pc->threads[i]); + object_unparent(obj); + } + g_free(pc->threads); + error_propagate(errp, local_err); +} + +static void pnv_core_cpu_unrealize(PnvCore *pc, PowerPCCPU *cpu) +{ + PnvCPUState *pnv_cpu = pnv_cpu_state(cpu); + PnvChipClass *pcc = PNV_CHIP_GET_CLASS(pc->chip); + + pcc->intc_destroy(pc->chip, cpu); + cpu_remove_sync(CPU(cpu)); + cpu->machine_data = NULL; + g_free(pnv_cpu); + object_unparent(OBJECT(cpu)); +} + +static void pnv_core_unrealize(DeviceState *dev) +{ + PnvCore *pc = PNV_CORE(dev); + CPUCore *cc = CPU_CORE(dev); + int i; + + qemu_unregister_reset(pnv_core_reset, pc); + + for (i = 0; i < cc->nr_threads; i++) { + pnv_core_cpu_unrealize(pc, pc->threads[i]); + } + g_free(pc->threads); +} + +static Property pnv_core_properties[] = { + DEFINE_PROP_UINT32("pir", PnvCore, pir, 0), + DEFINE_PROP_UINT64("hrmor", PnvCore, hrmor, 0), + DEFINE_PROP_LINK("chip", PnvCore, chip, TYPE_PNV_CHIP, PnvChip *), + DEFINE_PROP_END_OF_LIST(), +}; + +static void pnv_core_power8_class_init(ObjectClass *oc, void *data) +{ + PnvCoreClass *pcc = PNV_CORE_CLASS(oc); + + pcc->xscom_ops = &pnv_core_power8_xscom_ops; +} + +static void pnv_core_power9_class_init(ObjectClass *oc, void *data) +{ + PnvCoreClass *pcc = PNV_CORE_CLASS(oc); + + pcc->xscom_ops = &pnv_core_power9_xscom_ops; +} + +static void pnv_core_power10_class_init(ObjectClass *oc, void *data) +{ + PnvCoreClass *pcc = PNV_CORE_CLASS(oc); + + /* TODO: Use the P9 XSCOMs for now on P10 */ + pcc->xscom_ops = &pnv_core_power9_xscom_ops; +} + +static void pnv_core_class_init(ObjectClass *oc, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + + dc->realize = pnv_core_realize; + dc->unrealize = pnv_core_unrealize; + device_class_set_props(dc, pnv_core_properties); + dc->user_creatable = false; +} + +#define DEFINE_PNV_CORE_TYPE(family, cpu_model) \ + { \ + .parent = TYPE_PNV_CORE, \ + .name = PNV_CORE_TYPE_NAME(cpu_model), \ + .class_init = pnv_core_##family##_class_init, \ + } + +static const TypeInfo pnv_core_infos[] = { + { + .name = TYPE_PNV_CORE, + .parent = TYPE_CPU_CORE, + .instance_size = sizeof(PnvCore), + .class_size = sizeof(PnvCoreClass), + .class_init = pnv_core_class_init, + .abstract = true, + }, + DEFINE_PNV_CORE_TYPE(power8, "power8e_v2.1"), + DEFINE_PNV_CORE_TYPE(power8, "power8_v2.0"), + DEFINE_PNV_CORE_TYPE(power8, "power8nvl_v1.0"), + DEFINE_PNV_CORE_TYPE(power9, "power9_v2.0"), + DEFINE_PNV_CORE_TYPE(power10, "power10_v2.0"), +}; + +DEFINE_TYPES(pnv_core_infos) + +/* + * POWER9 Quads + */ + +#define P9X_EX_NCU_SPEC_BAR 0x11010 + +static uint64_t pnv_quad_xscom_read(void *opaque, hwaddr addr, + unsigned int width) +{ + uint32_t offset = addr >> 3; + uint64_t val = -1; + + switch (offset) { + case P9X_EX_NCU_SPEC_BAR: + case P9X_EX_NCU_SPEC_BAR + 0x400: /* Second EX */ + val = 0; + break; + default: + qemu_log_mask(LOG_UNIMP, "%s: writing @0x%08x\n", __func__, + offset); + } + + return val; +} + +static void pnv_quad_xscom_write(void *opaque, hwaddr addr, uint64_t val, + unsigned int width) +{ + uint32_t offset = addr >> 3; + + switch (offset) { + case P9X_EX_NCU_SPEC_BAR: + case P9X_EX_NCU_SPEC_BAR + 0x400: /* Second EX */ + break; + default: + qemu_log_mask(LOG_UNIMP, "%s: writing @0x%08x\n", __func__, + offset); + } +} + +static const MemoryRegionOps pnv_quad_xscom_ops = { + .read = pnv_quad_xscom_read, + .write = pnv_quad_xscom_write, + .valid.min_access_size = 8, + .valid.max_access_size = 8, + .impl.min_access_size = 8, + .impl.max_access_size = 8, + .endianness = DEVICE_BIG_ENDIAN, +}; + +static void pnv_quad_realize(DeviceState *dev, Error **errp) +{ + PnvQuad *eq = PNV_QUAD(dev); + char name[32]; + + snprintf(name, sizeof(name), "xscom-quad.%d", eq->quad_id); + pnv_xscom_region_init(&eq->xscom_regs, OBJECT(dev), &pnv_quad_xscom_ops, + eq, name, PNV9_XSCOM_EQ_SIZE); +} + +static Property pnv_quad_properties[] = { + DEFINE_PROP_UINT32("quad-id", PnvQuad, quad_id, 0), + DEFINE_PROP_END_OF_LIST(), +}; + +static void pnv_quad_class_init(ObjectClass *oc, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + + dc->realize = pnv_quad_realize; + device_class_set_props(dc, pnv_quad_properties); + dc->user_creatable = false; +} + +static const TypeInfo pnv_quad_info = { + .name = TYPE_PNV_QUAD, + .parent = TYPE_DEVICE, + .instance_size = sizeof(PnvQuad), + .class_init = pnv_quad_class_init, +}; + +static void pnv_core_register_types(void) +{ + type_register_static(&pnv_quad_info); +} + +type_init(pnv_core_register_types) diff --git a/hw/ppc/pnv_homer.c b/hw/ppc/pnv_homer.c new file mode 100644 index 000000000..9a262629b --- /dev/null +++ b/hw/ppc/pnv_homer.c @@ -0,0 +1,382 @@ +/* + * QEMU PowerPC PowerNV Emulation of a few HOMER related registers + * + * Copyright (c) 2019, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "qapi/error.h" +#include "exec/hwaddr.h" +#include "exec/memory.h" +#include "sysemu/cpus.h" +#include "hw/qdev-core.h" +#include "hw/qdev-properties.h" +#include "hw/ppc/pnv.h" +#include "hw/ppc/pnv_homer.h" +#include "hw/ppc/pnv_xscom.h" + + +static bool core_max_array(PnvHomer *homer, hwaddr addr) +{ + int i; + PnvHomerClass *hmrc = PNV_HOMER_GET_CLASS(homer); + + for (i = 0; i <= homer->chip->nr_cores; i++) { + if (addr == (hmrc->core_max_base + i)) { + return true; + } + } + return false; +} + +/* P8 Pstate table */ + +#define PNV8_OCC_PSTATE_VERSION 0x1f8001 +#define PNV8_OCC_PSTATE_MIN 0x1f8003 +#define PNV8_OCC_PSTATE_VALID 0x1f8000 +#define PNV8_OCC_PSTATE_THROTTLE 0x1f8002 +#define PNV8_OCC_PSTATE_NOM 0x1f8004 +#define PNV8_OCC_PSTATE_TURBO 0x1f8005 +#define PNV8_OCC_PSTATE_ULTRA_TURBO 0x1f8006 +#define PNV8_OCC_PSTATE_DATA 0x1f8008 +#define PNV8_OCC_PSTATE_ID_ZERO 0x1f8010 +#define PNV8_OCC_PSTATE_ID_ONE 0x1f8018 +#define PNV8_OCC_PSTATE_ID_TWO 0x1f8020 +#define PNV8_OCC_VDD_VOLTAGE_IDENTIFIER 0x1f8012 +#define PNV8_OCC_VCS_VOLTAGE_IDENTIFIER 0x1f8013 +#define PNV8_OCC_PSTATE_ZERO_FREQUENCY 0x1f8014 +#define PNV8_OCC_PSTATE_ONE_FREQUENCY 0x1f801c +#define PNV8_OCC_PSTATE_TWO_FREQUENCY 0x1f8024 +#define PNV8_CORE_MAX_BASE 0x1f8810 + + +static uint64_t pnv_power8_homer_read(void *opaque, hwaddr addr, + unsigned size) +{ + PnvHomer *homer = PNV_HOMER(opaque); + + switch (addr) { + case PNV8_OCC_PSTATE_VERSION: + case PNV8_OCC_PSTATE_MIN: + case PNV8_OCC_PSTATE_ID_ZERO: + return 0; + case PNV8_OCC_PSTATE_VALID: + case PNV8_OCC_PSTATE_THROTTLE: + case PNV8_OCC_PSTATE_NOM: + case PNV8_OCC_PSTATE_TURBO: + case PNV8_OCC_PSTATE_ID_ONE: + case PNV8_OCC_VDD_VOLTAGE_IDENTIFIER: + case PNV8_OCC_VCS_VOLTAGE_IDENTIFIER: + return 1; + case PNV8_OCC_PSTATE_ULTRA_TURBO: + case PNV8_OCC_PSTATE_ID_TWO: + return 2; + case PNV8_OCC_PSTATE_DATA: + return 0x1000000000000000; + /* P8 frequency for 0, 1, and 2 pstates */ + case PNV8_OCC_PSTATE_ZERO_FREQUENCY: + case PNV8_OCC_PSTATE_ONE_FREQUENCY: + case PNV8_OCC_PSTATE_TWO_FREQUENCY: + return 3000; + } + /* pstate table core max array */ + if (core_max_array(homer, addr)) { + return 1; + } + return 0; +} + +static void pnv_power8_homer_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + /* callback function defined to homer write */ + return; +} + +static const MemoryRegionOps pnv_power8_homer_ops = { + .read = pnv_power8_homer_read, + .write = pnv_power8_homer_write, + .valid.min_access_size = 1, + .valid.max_access_size = 8, + .impl.min_access_size = 1, + .impl.max_access_size = 8, + .endianness = DEVICE_BIG_ENDIAN, +}; + +/* P8 PBA BARs */ +#define PBA_BAR0 0x00 +#define PBA_BAR1 0x01 +#define PBA_BAR2 0x02 +#define PBA_BAR3 0x03 +#define PBA_BARMASK0 0x04 +#define PBA_BARMASK1 0x05 +#define PBA_BARMASK2 0x06 +#define PBA_BARMASK3 0x07 + +static uint64_t pnv_homer_power8_pba_read(void *opaque, hwaddr addr, + unsigned size) +{ + PnvHomer *homer = PNV_HOMER(opaque); + PnvChip *chip = homer->chip; + uint32_t reg = addr >> 3; + uint64_t val = 0; + + switch (reg) { + case PBA_BAR0: + val = PNV_HOMER_BASE(chip); + break; + case PBA_BARMASK0: /* P8 homer region mask */ + val = (PNV_HOMER_SIZE - 1) & 0x300000; + break; + case PBA_BAR3: /* P8 occ common area */ + val = PNV_OCC_COMMON_AREA_BASE; + break; + case PBA_BARMASK3: /* P8 occ common area mask */ + val = (PNV_OCC_COMMON_AREA_SIZE - 1) & 0x700000; + break; + default: + qemu_log_mask(LOG_UNIMP, "PBA: read to unimplemented register: Ox%" + HWADDR_PRIx "\n", addr >> 3); + } + return val; +} + +static void pnv_homer_power8_pba_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + qemu_log_mask(LOG_UNIMP, "PBA: write to unimplemented register: Ox%" + HWADDR_PRIx "\n", addr >> 3); +} + +static const MemoryRegionOps pnv_homer_power8_pba_ops = { + .read = pnv_homer_power8_pba_read, + .write = pnv_homer_power8_pba_write, + .valid.min_access_size = 8, + .valid.max_access_size = 8, + .impl.min_access_size = 8, + .impl.max_access_size = 8, + .endianness = DEVICE_BIG_ENDIAN, +}; + +static void pnv_homer_power8_class_init(ObjectClass *klass, void *data) +{ + PnvHomerClass *homer = PNV_HOMER_CLASS(klass); + + homer->pba_size = PNV_XSCOM_PBA_SIZE; + homer->pba_ops = &pnv_homer_power8_pba_ops; + homer->homer_size = PNV_HOMER_SIZE; + homer->homer_ops = &pnv_power8_homer_ops; + homer->core_max_base = PNV8_CORE_MAX_BASE; +} + +static const TypeInfo pnv_homer_power8_type_info = { + .name = TYPE_PNV8_HOMER, + .parent = TYPE_PNV_HOMER, + .instance_size = sizeof(PnvHomer), + .class_init = pnv_homer_power8_class_init, +}; + +/* P9 Pstate table */ + +#define PNV9_OCC_PSTATE_ID_ZERO 0xe2018 +#define PNV9_OCC_PSTATE_ID_ONE 0xe2020 +#define PNV9_OCC_PSTATE_ID_TWO 0xe2028 +#define PNV9_OCC_PSTATE_DATA 0xe2000 +#define PNV9_OCC_PSTATE_DATA_AREA 0xe2008 +#define PNV9_OCC_PSTATE_MIN 0xe2003 +#define PNV9_OCC_PSTATE_NOM 0xe2004 +#define PNV9_OCC_PSTATE_TURBO 0xe2005 +#define PNV9_OCC_PSTATE_ULTRA_TURBO 0xe2818 +#define PNV9_OCC_MAX_PSTATE_ULTRA_TURBO 0xe2006 +#define PNV9_OCC_PSTATE_MAJOR_VERSION 0xe2001 +#define PNV9_OCC_OPAL_RUNTIME_DATA 0xe2b85 +#define PNV9_CHIP_HOMER_IMAGE_POINTER 0x200008 +#define PNV9_CHIP_HOMER_BASE 0x0 +#define PNV9_OCC_PSTATE_ZERO_FREQUENCY 0xe201c +#define PNV9_OCC_PSTATE_ONE_FREQUENCY 0xe2024 +#define PNV9_OCC_PSTATE_TWO_FREQUENCY 0xe202c +#define PNV9_OCC_ROLE_MASTER_OR_SLAVE 0xe2002 +#define PNV9_CORE_MAX_BASE 0xe2819 + + +static uint64_t pnv_power9_homer_read(void *opaque, hwaddr addr, + unsigned size) +{ + PnvHomer *homer = PNV_HOMER(opaque); + + switch (addr) { + case PNV9_OCC_MAX_PSTATE_ULTRA_TURBO: + case PNV9_OCC_PSTATE_ID_ZERO: + return 0; + case PNV9_OCC_PSTATE_DATA: + case PNV9_OCC_ROLE_MASTER_OR_SLAVE: + case PNV9_OCC_PSTATE_NOM: + case PNV9_OCC_PSTATE_TURBO: + case PNV9_OCC_PSTATE_ID_ONE: + case PNV9_OCC_PSTATE_ULTRA_TURBO: + case PNV9_OCC_OPAL_RUNTIME_DATA: + return 1; + case PNV9_OCC_PSTATE_MIN: + case PNV9_OCC_PSTATE_ID_TWO: + return 2; + + /* 3000 khz frequency for 0, 1, and 2 pstates */ + case PNV9_OCC_PSTATE_ZERO_FREQUENCY: + case PNV9_OCC_PSTATE_ONE_FREQUENCY: + case PNV9_OCC_PSTATE_TWO_FREQUENCY: + return 3000; + case PNV9_OCC_PSTATE_MAJOR_VERSION: + return 0x90; + case PNV9_CHIP_HOMER_BASE: + case PNV9_OCC_PSTATE_DATA_AREA: + case PNV9_CHIP_HOMER_IMAGE_POINTER: + return 0x1000000000000000; + } + /* pstate table core max array */ + if (core_max_array(homer, addr)) { + return 1; + } + return 0; +} + +static void pnv_power9_homer_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + /* callback function defined to homer write */ + return; +} + +static const MemoryRegionOps pnv_power9_homer_ops = { + .read = pnv_power9_homer_read, + .write = pnv_power9_homer_write, + .valid.min_access_size = 1, + .valid.max_access_size = 8, + .impl.min_access_size = 1, + .impl.max_access_size = 8, + .endianness = DEVICE_BIG_ENDIAN, +}; + +static uint64_t pnv_homer_power9_pba_read(void *opaque, hwaddr addr, + unsigned size) +{ + PnvHomer *homer = PNV_HOMER(opaque); + PnvChip *chip = homer->chip; + uint32_t reg = addr >> 3; + uint64_t val = 0; + + switch (reg) { + case PBA_BAR0: + val = PNV9_HOMER_BASE(chip); + break; + case PBA_BARMASK0: /* P9 homer region mask */ + val = (PNV9_HOMER_SIZE - 1) & 0x300000; + break; + case PBA_BAR2: /* P9 occ common area */ + val = PNV9_OCC_COMMON_AREA_BASE; + break; + case PBA_BARMASK2: /* P9 occ common area size */ + val = (PNV9_OCC_COMMON_AREA_SIZE - 1) & 0x700000; + break; + default: + qemu_log_mask(LOG_UNIMP, "PBA: read to unimplemented register: Ox%" + HWADDR_PRIx "\n", addr >> 3); + } + return val; +} + +static void pnv_homer_power9_pba_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + qemu_log_mask(LOG_UNIMP, "PBA: write to unimplemented register: Ox%" + HWADDR_PRIx "\n", addr >> 3); +} + +static const MemoryRegionOps pnv_homer_power9_pba_ops = { + .read = pnv_homer_power9_pba_read, + .write = pnv_homer_power9_pba_write, + .valid.min_access_size = 8, + .valid.max_access_size = 8, + .impl.min_access_size = 8, + .impl.max_access_size = 8, + .endianness = DEVICE_BIG_ENDIAN, +}; + +static void pnv_homer_power9_class_init(ObjectClass *klass, void *data) +{ + PnvHomerClass *homer = PNV_HOMER_CLASS(klass); + + homer->pba_size = PNV9_XSCOM_PBA_SIZE; + homer->pba_ops = &pnv_homer_power9_pba_ops; + homer->homer_size = PNV9_HOMER_SIZE; + homer->homer_ops = &pnv_power9_homer_ops; + homer->core_max_base = PNV9_CORE_MAX_BASE; +} + +static const TypeInfo pnv_homer_power9_type_info = { + .name = TYPE_PNV9_HOMER, + .parent = TYPE_PNV_HOMER, + .instance_size = sizeof(PnvHomer), + .class_init = pnv_homer_power9_class_init, +}; + +static void pnv_homer_realize(DeviceState *dev, Error **errp) +{ + PnvHomer *homer = PNV_HOMER(dev); + PnvHomerClass *hmrc = PNV_HOMER_GET_CLASS(homer); + + assert(homer->chip); + + pnv_xscom_region_init(&homer->pba_regs, OBJECT(dev), hmrc->pba_ops, + homer, "xscom-pba", hmrc->pba_size); + + /* homer region */ + memory_region_init_io(&homer->regs, OBJECT(dev), + hmrc->homer_ops, homer, "homer-main-memory", + hmrc->homer_size); +} + +static Property pnv_homer_properties[] = { + DEFINE_PROP_LINK("chip", PnvHomer, chip, TYPE_PNV_CHIP, PnvChip *), + DEFINE_PROP_END_OF_LIST(), +}; + +static void pnv_homer_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->realize = pnv_homer_realize; + dc->desc = "PowerNV HOMER Memory"; + device_class_set_props(dc, pnv_homer_properties); + dc->user_creatable = false; +} + +static const TypeInfo pnv_homer_type_info = { + .name = TYPE_PNV_HOMER, + .parent = TYPE_DEVICE, + .instance_size = sizeof(PnvHomer), + .class_init = pnv_homer_class_init, + .class_size = sizeof(PnvHomerClass), + .abstract = true, +}; + +static void pnv_homer_register_types(void) +{ + type_register_static(&pnv_homer_type_info); + type_register_static(&pnv_homer_power8_type_info); + type_register_static(&pnv_homer_power9_type_info); +} + +type_init(pnv_homer_register_types); diff --git a/hw/ppc/pnv_lpc.c b/hw/ppc/pnv_lpc.c new file mode 100644 index 000000000..bcbca3db9 --- /dev/null +++ b/hw/ppc/pnv_lpc.c @@ -0,0 +1,853 @@ +/* + * QEMU PowerPC PowerNV LPC controller + * + * Copyright (c) 2016, IBM Corporation. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "target/ppc/cpu.h" +#include "qapi/error.h" +#include "qemu/log.h" +#include "qemu/module.h" +#include "hw/irq.h" +#include "hw/isa/isa.h" +#include "hw/qdev-properties.h" +#include "hw/ppc/pnv.h" +#include "hw/ppc/pnv_lpc.h" +#include "hw/ppc/pnv_xscom.h" +#include "hw/ppc/fdt.h" + +#include <libfdt.h> + +enum { + ECCB_CTL = 0, + ECCB_RESET = 1, + ECCB_STAT = 2, + ECCB_DATA = 3, +}; + +/* OPB Master LS registers */ +#define OPB_MASTER_LS_ROUTE0 0x8 +#define OPB_MASTER_LS_ROUTE1 0xC +#define OPB_MASTER_LS_IRQ_STAT 0x50 +#define OPB_MASTER_IRQ_LPC 0x00000800 +#define OPB_MASTER_LS_IRQ_MASK 0x54 +#define OPB_MASTER_LS_IRQ_POL 0x58 +#define OPB_MASTER_LS_IRQ_INPUT 0x5c + +/* LPC HC registers */ +#define LPC_HC_FW_SEG_IDSEL 0x24 +#define LPC_HC_FW_RD_ACC_SIZE 0x28 +#define LPC_HC_FW_RD_1B 0x00000000 +#define LPC_HC_FW_RD_2B 0x01000000 +#define LPC_HC_FW_RD_4B 0x02000000 +#define LPC_HC_FW_RD_16B 0x04000000 +#define LPC_HC_FW_RD_128B 0x07000000 +#define LPC_HC_IRQSER_CTRL 0x30 +#define LPC_HC_IRQSER_EN 0x80000000 +#define LPC_HC_IRQSER_QMODE 0x40000000 +#define LPC_HC_IRQSER_START_MASK 0x03000000 +#define LPC_HC_IRQSER_START_4CLK 0x00000000 +#define LPC_HC_IRQSER_START_6CLK 0x01000000 +#define LPC_HC_IRQSER_START_8CLK 0x02000000 +#define LPC_HC_IRQMASK 0x34 /* same bit defs as LPC_HC_IRQSTAT */ +#define LPC_HC_IRQSTAT 0x38 +#define LPC_HC_IRQ_SERIRQ0 0x80000000 /* all bits down to ... */ +#define LPC_HC_IRQ_SERIRQ16 0x00008000 /* IRQ16=IOCHK#, IRQ2=SMI# */ +#define LPC_HC_IRQ_SERIRQ_ALL 0xffff8000 +#define LPC_HC_IRQ_LRESET 0x00000400 +#define LPC_HC_IRQ_SYNC_ABNORM_ERR 0x00000080 +#define LPC_HC_IRQ_SYNC_NORESP_ERR 0x00000040 +#define LPC_HC_IRQ_SYNC_NORM_ERR 0x00000020 +#define LPC_HC_IRQ_SYNC_TIMEOUT_ERR 0x00000010 +#define LPC_HC_IRQ_SYNC_TARG_TAR_ERR 0x00000008 +#define LPC_HC_IRQ_SYNC_BM_TAR_ERR 0x00000004 +#define LPC_HC_IRQ_SYNC_BM0_REQ 0x00000002 +#define LPC_HC_IRQ_SYNC_BM1_REQ 0x00000001 +#define LPC_HC_ERROR_ADDRESS 0x40 + +#define LPC_OPB_SIZE 0x100000000ull + +#define ISA_IO_SIZE 0x00010000 +#define ISA_MEM_SIZE 0x10000000 +#define ISA_FW_SIZE 0x10000000 +#define LPC_IO_OPB_ADDR 0xd0010000 +#define LPC_IO_OPB_SIZE 0x00010000 +#define LPC_MEM_OPB_ADDR 0xe0000000 +#define LPC_MEM_OPB_SIZE 0x10000000 +#define LPC_FW_OPB_ADDR 0xf0000000 +#define LPC_FW_OPB_SIZE 0x10000000 + +#define LPC_OPB_REGS_OPB_ADDR 0xc0010000 +#define LPC_OPB_REGS_OPB_SIZE 0x00000060 +#define LPC_OPB_REGS_OPBA_ADDR 0xc0011000 +#define LPC_OPB_REGS_OPBA_SIZE 0x00000008 +#define LPC_HC_REGS_OPB_ADDR 0xc0012000 +#define LPC_HC_REGS_OPB_SIZE 0x00000100 + +static int pnv_lpc_dt_xscom(PnvXScomInterface *dev, void *fdt, int xscom_offset) +{ + const char compat[] = "ibm,power8-lpc\0ibm,lpc"; + char *name; + int offset; + uint32_t lpc_pcba = PNV_XSCOM_LPC_BASE; + uint32_t reg[] = { + cpu_to_be32(lpc_pcba), + cpu_to_be32(PNV_XSCOM_LPC_SIZE) + }; + + name = g_strdup_printf("isa@%x", lpc_pcba); + offset = fdt_add_subnode(fdt, xscom_offset, name); + _FDT(offset); + g_free(name); + + _FDT((fdt_setprop(fdt, offset, "reg", reg, sizeof(reg)))); + _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 2))); + _FDT((fdt_setprop_cell(fdt, offset, "#size-cells", 1))); + _FDT((fdt_setprop(fdt, offset, "compatible", compat, sizeof(compat)))); + return 0; +} + +/* POWER9 only */ +int pnv_dt_lpc(PnvChip *chip, void *fdt, int root_offset, uint64_t lpcm_addr, + uint64_t lpcm_size) +{ + const char compat[] = "ibm,power9-lpcm-opb\0simple-bus"; + const char lpc_compat[] = "ibm,power9-lpc\0ibm,lpc"; + char *name; + int offset, lpcm_offset; + uint32_t opb_ranges[8] = { 0, + cpu_to_be32(lpcm_addr >> 32), + cpu_to_be32((uint32_t)lpcm_addr), + cpu_to_be32(lpcm_size / 2), + cpu_to_be32(lpcm_size / 2), + cpu_to_be32(lpcm_addr >> 32), + cpu_to_be32(lpcm_size / 2), + cpu_to_be32(lpcm_size / 2), + }; + uint32_t opb_reg[4] = { cpu_to_be32(lpcm_addr >> 32), + cpu_to_be32((uint32_t)lpcm_addr), + cpu_to_be32(lpcm_size >> 32), + cpu_to_be32((uint32_t)lpcm_size), + }; + uint32_t lpc_ranges[12] = { 0, 0, + cpu_to_be32(LPC_MEM_OPB_ADDR), + cpu_to_be32(LPC_MEM_OPB_SIZE), + cpu_to_be32(1), 0, + cpu_to_be32(LPC_IO_OPB_ADDR), + cpu_to_be32(LPC_IO_OPB_SIZE), + cpu_to_be32(3), 0, + cpu_to_be32(LPC_FW_OPB_ADDR), + cpu_to_be32(LPC_FW_OPB_SIZE), + }; + uint32_t reg[2]; + + /* + * OPB bus + */ + name = g_strdup_printf("lpcm-opb@%"PRIx64, lpcm_addr); + lpcm_offset = fdt_add_subnode(fdt, root_offset, name); + _FDT(lpcm_offset); + g_free(name); + + _FDT((fdt_setprop(fdt, lpcm_offset, "reg", opb_reg, sizeof(opb_reg)))); + _FDT((fdt_setprop_cell(fdt, lpcm_offset, "#address-cells", 1))); + _FDT((fdt_setprop_cell(fdt, lpcm_offset, "#size-cells", 1))); + _FDT((fdt_setprop(fdt, lpcm_offset, "compatible", compat, sizeof(compat)))); + _FDT((fdt_setprop_cell(fdt, lpcm_offset, "ibm,chip-id", chip->chip_id))); + _FDT((fdt_setprop(fdt, lpcm_offset, "ranges", opb_ranges, + sizeof(opb_ranges)))); + + /* + * OPB Master registers + */ + name = g_strdup_printf("opb-master@%x", LPC_OPB_REGS_OPB_ADDR); + offset = fdt_add_subnode(fdt, lpcm_offset, name); + _FDT(offset); + g_free(name); + + reg[0] = cpu_to_be32(LPC_OPB_REGS_OPB_ADDR); + reg[1] = cpu_to_be32(LPC_OPB_REGS_OPB_SIZE); + _FDT((fdt_setprop(fdt, offset, "reg", reg, sizeof(reg)))); + _FDT((fdt_setprop_string(fdt, offset, "compatible", + "ibm,power9-lpcm-opb-master"))); + + /* + * OPB arbitrer registers + */ + name = g_strdup_printf("opb-arbitrer@%x", LPC_OPB_REGS_OPBA_ADDR); + offset = fdt_add_subnode(fdt, lpcm_offset, name); + _FDT(offset); + g_free(name); + + reg[0] = cpu_to_be32(LPC_OPB_REGS_OPBA_ADDR); + reg[1] = cpu_to_be32(LPC_OPB_REGS_OPBA_SIZE); + _FDT((fdt_setprop(fdt, offset, "reg", reg, sizeof(reg)))); + _FDT((fdt_setprop_string(fdt, offset, "compatible", + "ibm,power9-lpcm-opb-arbiter"))); + + /* + * LPC Host Controller registers + */ + name = g_strdup_printf("lpc-controller@%x", LPC_HC_REGS_OPB_ADDR); + offset = fdt_add_subnode(fdt, lpcm_offset, name); + _FDT(offset); + g_free(name); + + reg[0] = cpu_to_be32(LPC_HC_REGS_OPB_ADDR); + reg[1] = cpu_to_be32(LPC_HC_REGS_OPB_SIZE); + _FDT((fdt_setprop(fdt, offset, "reg", reg, sizeof(reg)))); + _FDT((fdt_setprop_string(fdt, offset, "compatible", + "ibm,power9-lpc-controller"))); + + name = g_strdup_printf("lpc@0"); + offset = fdt_add_subnode(fdt, lpcm_offset, name); + _FDT(offset); + g_free(name); + _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 2))); + _FDT((fdt_setprop_cell(fdt, offset, "#size-cells", 1))); + _FDT((fdt_setprop(fdt, offset, "compatible", lpc_compat, + sizeof(lpc_compat)))); + _FDT((fdt_setprop(fdt, offset, "ranges", lpc_ranges, + sizeof(lpc_ranges)))); + + return 0; +} + +/* + * These read/write handlers of the OPB address space should be common + * with the P9 LPC Controller which uses direct MMIOs. + * + * TODO: rework to use address_space_stq() and address_space_ldq() + * instead. + */ +static bool opb_read(PnvLpcController *lpc, uint32_t addr, uint8_t *data, + int sz) +{ + /* XXX Handle access size limits and FW read caching here */ + return !address_space_read(&lpc->opb_as, addr, MEMTXATTRS_UNSPECIFIED, + data, sz); +} + +static bool opb_write(PnvLpcController *lpc, uint32_t addr, uint8_t *data, + int sz) +{ + /* XXX Handle access size limits here */ + return !address_space_write(&lpc->opb_as, addr, MEMTXATTRS_UNSPECIFIED, + data, sz); +} + +#define ECCB_CTL_READ PPC_BIT(15) +#define ECCB_CTL_SZ_LSH (63 - 7) +#define ECCB_CTL_SZ_MASK PPC_BITMASK(4, 7) +#define ECCB_CTL_ADDR_MASK PPC_BITMASK(32, 63) + +#define ECCB_STAT_OP_DONE PPC_BIT(52) +#define ECCB_STAT_OP_ERR PPC_BIT(52) +#define ECCB_STAT_RD_DATA_LSH (63 - 37) +#define ECCB_STAT_RD_DATA_MASK (0xffffffff << ECCB_STAT_RD_DATA_LSH) + +static void pnv_lpc_do_eccb(PnvLpcController *lpc, uint64_t cmd) +{ + /* XXX Check for magic bits at the top, addr size etc... */ + unsigned int sz = (cmd & ECCB_CTL_SZ_MASK) >> ECCB_CTL_SZ_LSH; + uint32_t opb_addr = cmd & ECCB_CTL_ADDR_MASK; + uint8_t data[8]; + bool success; + + if (sz > sizeof(data)) { + qemu_log_mask(LOG_GUEST_ERROR, + "ECCB: invalid operation at @0x%08x size %d\n", opb_addr, sz); + return; + } + + if (cmd & ECCB_CTL_READ) { + success = opb_read(lpc, opb_addr, data, sz); + if (success) { + lpc->eccb_stat_reg = ECCB_STAT_OP_DONE | + (((uint64_t)data[0]) << 24 | + ((uint64_t)data[1]) << 16 | + ((uint64_t)data[2]) << 8 | + ((uint64_t)data[3])) << ECCB_STAT_RD_DATA_LSH; + } else { + lpc->eccb_stat_reg = ECCB_STAT_OP_DONE | + (0xffffffffull << ECCB_STAT_RD_DATA_LSH); + } + } else { + data[0] = lpc->eccb_data_reg >> 24; + data[1] = lpc->eccb_data_reg >> 16; + data[2] = lpc->eccb_data_reg >> 8; + data[3] = lpc->eccb_data_reg; + + success = opb_write(lpc, opb_addr, data, sz); + lpc->eccb_stat_reg = ECCB_STAT_OP_DONE; + } + /* XXX Which error bit (if any) to signal OPB error ? */ +} + +static uint64_t pnv_lpc_xscom_read(void *opaque, hwaddr addr, unsigned size) +{ + PnvLpcController *lpc = PNV_LPC(opaque); + uint32_t offset = addr >> 3; + uint64_t val = 0; + + switch (offset & 3) { + case ECCB_CTL: + case ECCB_RESET: + val = 0; + break; + case ECCB_STAT: + val = lpc->eccb_stat_reg; + lpc->eccb_stat_reg = 0; + break; + case ECCB_DATA: + val = ((uint64_t)lpc->eccb_data_reg) << 32; + break; + } + return val; +} + +static void pnv_lpc_xscom_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + PnvLpcController *lpc = PNV_LPC(opaque); + uint32_t offset = addr >> 3; + + switch (offset & 3) { + case ECCB_CTL: + pnv_lpc_do_eccb(lpc, val); + break; + case ECCB_RESET: + /* XXXX */ + break; + case ECCB_STAT: + break; + case ECCB_DATA: + lpc->eccb_data_reg = val >> 32; + break; + } +} + +static const MemoryRegionOps pnv_lpc_xscom_ops = { + .read = pnv_lpc_xscom_read, + .write = pnv_lpc_xscom_write, + .valid.min_access_size = 8, + .valid.max_access_size = 8, + .impl.min_access_size = 8, + .impl.max_access_size = 8, + .endianness = DEVICE_BIG_ENDIAN, +}; + +static uint64_t pnv_lpc_mmio_read(void *opaque, hwaddr addr, unsigned size) +{ + PnvLpcController *lpc = PNV_LPC(opaque); + uint64_t val = 0; + uint32_t opb_addr = addr & ECCB_CTL_ADDR_MASK; + MemTxResult result; + + switch (size) { + case 4: + val = address_space_ldl(&lpc->opb_as, opb_addr, MEMTXATTRS_UNSPECIFIED, + &result); + break; + case 1: + val = address_space_ldub(&lpc->opb_as, opb_addr, MEMTXATTRS_UNSPECIFIED, + &result); + break; + default: + qemu_log_mask(LOG_GUEST_ERROR, "OPB read failed at @0x%" + HWADDR_PRIx " invalid size %d\n", addr, size); + return 0; + } + + if (result != MEMTX_OK) { + qemu_log_mask(LOG_GUEST_ERROR, "OPB read failed at @0x%" + HWADDR_PRIx "\n", addr); + } + + return val; +} + +static void pnv_lpc_mmio_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + PnvLpcController *lpc = PNV_LPC(opaque); + uint32_t opb_addr = addr & ECCB_CTL_ADDR_MASK; + MemTxResult result; + + switch (size) { + case 4: + address_space_stl(&lpc->opb_as, opb_addr, val, MEMTXATTRS_UNSPECIFIED, + &result); + break; + case 1: + address_space_stb(&lpc->opb_as, opb_addr, val, MEMTXATTRS_UNSPECIFIED, + &result); + break; + default: + qemu_log_mask(LOG_GUEST_ERROR, "OPB write failed at @0x%" + HWADDR_PRIx " invalid size %d\n", addr, size); + return; + } + + if (result != MEMTX_OK) { + qemu_log_mask(LOG_GUEST_ERROR, "OPB write failed at @0x%" + HWADDR_PRIx "\n", addr); + } +} + +static const MemoryRegionOps pnv_lpc_mmio_ops = { + .read = pnv_lpc_mmio_read, + .write = pnv_lpc_mmio_write, + .impl = { + .min_access_size = 1, + .max_access_size = 4, + }, + .endianness = DEVICE_BIG_ENDIAN, +}; + +static void pnv_lpc_eval_irqs(PnvLpcController *lpc) +{ + bool lpc_to_opb_irq = false; + PnvLpcClass *plc = PNV_LPC_GET_CLASS(lpc); + + /* Update LPC controller to OPB line */ + if (lpc->lpc_hc_irqser_ctrl & LPC_HC_IRQSER_EN) { + uint32_t irqs; + + irqs = lpc->lpc_hc_irqstat & lpc->lpc_hc_irqmask; + lpc_to_opb_irq = (irqs != 0); + } + + /* We don't honor the polarity register, it's pointless and unused + * anyway + */ + if (lpc_to_opb_irq) { + lpc->opb_irq_input |= OPB_MASTER_IRQ_LPC; + } else { + lpc->opb_irq_input &= ~OPB_MASTER_IRQ_LPC; + } + + /* Update OPB internal latch */ + lpc->opb_irq_stat |= lpc->opb_irq_input & lpc->opb_irq_mask; + + /* Reflect the interrupt */ + pnv_psi_irq_set(lpc->psi, plc->psi_irq, lpc->opb_irq_stat != 0); +} + +static uint64_t lpc_hc_read(void *opaque, hwaddr addr, unsigned size) +{ + PnvLpcController *lpc = opaque; + uint64_t val = 0xfffffffffffffffful; + + switch (addr) { + case LPC_HC_FW_SEG_IDSEL: + val = lpc->lpc_hc_fw_seg_idsel; + break; + case LPC_HC_FW_RD_ACC_SIZE: + val = lpc->lpc_hc_fw_rd_acc_size; + break; + case LPC_HC_IRQSER_CTRL: + val = lpc->lpc_hc_irqser_ctrl; + break; + case LPC_HC_IRQMASK: + val = lpc->lpc_hc_irqmask; + break; + case LPC_HC_IRQSTAT: + val = lpc->lpc_hc_irqstat; + break; + case LPC_HC_ERROR_ADDRESS: + val = lpc->lpc_hc_error_addr; + break; + default: + qemu_log_mask(LOG_UNIMP, "LPC HC Unimplemented register: 0x%" + HWADDR_PRIx "\n", addr); + } + return val; +} + +static void lpc_hc_write(void *opaque, hwaddr addr, uint64_t val, + unsigned size) +{ + PnvLpcController *lpc = opaque; + + /* XXX Filter out reserved bits */ + + switch (addr) { + case LPC_HC_FW_SEG_IDSEL: + /* XXX Actually figure out how that works as this impact + * memory regions/aliases + */ + lpc->lpc_hc_fw_seg_idsel = val; + break; + case LPC_HC_FW_RD_ACC_SIZE: + lpc->lpc_hc_fw_rd_acc_size = val; + break; + case LPC_HC_IRQSER_CTRL: + lpc->lpc_hc_irqser_ctrl = val; + pnv_lpc_eval_irqs(lpc); + break; + case LPC_HC_IRQMASK: + lpc->lpc_hc_irqmask = val; + pnv_lpc_eval_irqs(lpc); + break; + case LPC_HC_IRQSTAT: + lpc->lpc_hc_irqstat &= ~val; + pnv_lpc_eval_irqs(lpc); + break; + case LPC_HC_ERROR_ADDRESS: + break; + default: + qemu_log_mask(LOG_UNIMP, "LPC HC Unimplemented register: 0x%" + HWADDR_PRIx "\n", addr); + } +} + +static const MemoryRegionOps lpc_hc_ops = { + .read = lpc_hc_read, + .write = lpc_hc_write, + .endianness = DEVICE_BIG_ENDIAN, + .valid = { + .min_access_size = 4, + .max_access_size = 4, + }, + .impl = { + .min_access_size = 4, + .max_access_size = 4, + }, +}; + +static uint64_t opb_master_read(void *opaque, hwaddr addr, unsigned size) +{ + PnvLpcController *lpc = opaque; + uint64_t val = 0xfffffffffffffffful; + + switch (addr) { + case OPB_MASTER_LS_ROUTE0: /* TODO */ + val = lpc->opb_irq_route0; + break; + case OPB_MASTER_LS_ROUTE1: /* TODO */ + val = lpc->opb_irq_route1; + break; + case OPB_MASTER_LS_IRQ_STAT: + val = lpc->opb_irq_stat; + break; + case OPB_MASTER_LS_IRQ_MASK: + val = lpc->opb_irq_mask; + break; + case OPB_MASTER_LS_IRQ_POL: + val = lpc->opb_irq_pol; + break; + case OPB_MASTER_LS_IRQ_INPUT: + val = lpc->opb_irq_input; + break; + default: + qemu_log_mask(LOG_UNIMP, "OPBM: read on unimplemented register: 0x%" + HWADDR_PRIx "\n", addr); + } + + return val; +} + +static void opb_master_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + PnvLpcController *lpc = opaque; + + switch (addr) { + case OPB_MASTER_LS_ROUTE0: /* TODO */ + lpc->opb_irq_route0 = val; + break; + case OPB_MASTER_LS_ROUTE1: /* TODO */ + lpc->opb_irq_route1 = val; + break; + case OPB_MASTER_LS_IRQ_STAT: + lpc->opb_irq_stat &= ~val; + pnv_lpc_eval_irqs(lpc); + break; + case OPB_MASTER_LS_IRQ_MASK: + lpc->opb_irq_mask = val; + pnv_lpc_eval_irqs(lpc); + break; + case OPB_MASTER_LS_IRQ_POL: + lpc->opb_irq_pol = val; + pnv_lpc_eval_irqs(lpc); + break; + case OPB_MASTER_LS_IRQ_INPUT: + /* Read only */ + break; + default: + qemu_log_mask(LOG_UNIMP, "OPBM: write on unimplemented register: 0x%" + HWADDR_PRIx " val=0x%08"PRIx64"\n", addr, val); + } +} + +static const MemoryRegionOps opb_master_ops = { + .read = opb_master_read, + .write = opb_master_write, + .endianness = DEVICE_BIG_ENDIAN, + .valid = { + .min_access_size = 4, + .max_access_size = 4, + }, + .impl = { + .min_access_size = 4, + .max_access_size = 4, + }, +}; + +static void pnv_lpc_power8_realize(DeviceState *dev, Error **errp) +{ + PnvLpcController *lpc = PNV_LPC(dev); + PnvLpcClass *plc = PNV_LPC_GET_CLASS(dev); + Error *local_err = NULL; + + plc->parent_realize(dev, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + /* P8 uses a XSCOM region for LPC registers */ + pnv_xscom_region_init(&lpc->xscom_regs, OBJECT(lpc), + &pnv_lpc_xscom_ops, lpc, "xscom-lpc", + PNV_XSCOM_LPC_SIZE); +} + +static void pnv_lpc_power8_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PnvXScomInterfaceClass *xdc = PNV_XSCOM_INTERFACE_CLASS(klass); + PnvLpcClass *plc = PNV_LPC_CLASS(klass); + + dc->desc = "PowerNV LPC Controller POWER8"; + + xdc->dt_xscom = pnv_lpc_dt_xscom; + + plc->psi_irq = PSIHB_IRQ_LPC_I2C; + + device_class_set_parent_realize(dc, pnv_lpc_power8_realize, + &plc->parent_realize); +} + +static const TypeInfo pnv_lpc_power8_info = { + .name = TYPE_PNV8_LPC, + .parent = TYPE_PNV_LPC, + .class_init = pnv_lpc_power8_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_PNV_XSCOM_INTERFACE }, + { } + } +}; + +static void pnv_lpc_power9_realize(DeviceState *dev, Error **errp) +{ + PnvLpcController *lpc = PNV_LPC(dev); + PnvLpcClass *plc = PNV_LPC_GET_CLASS(dev); + Error *local_err = NULL; + + plc->parent_realize(dev, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + /* P9 uses a MMIO region */ + memory_region_init_io(&lpc->xscom_regs, OBJECT(lpc), &pnv_lpc_mmio_ops, + lpc, "lpcm", PNV9_LPCM_SIZE); +} + +static void pnv_lpc_power9_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PnvLpcClass *plc = PNV_LPC_CLASS(klass); + + dc->desc = "PowerNV LPC Controller POWER9"; + + plc->psi_irq = PSIHB9_IRQ_LPCHC; + + device_class_set_parent_realize(dc, pnv_lpc_power9_realize, + &plc->parent_realize); +} + +static const TypeInfo pnv_lpc_power9_info = { + .name = TYPE_PNV9_LPC, + .parent = TYPE_PNV_LPC, + .class_init = pnv_lpc_power9_class_init, +}; + +static void pnv_lpc_power10_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->desc = "PowerNV LPC Controller POWER10"; +} + +static const TypeInfo pnv_lpc_power10_info = { + .name = TYPE_PNV10_LPC, + .parent = TYPE_PNV9_LPC, + .class_init = pnv_lpc_power10_class_init, +}; + +static void pnv_lpc_realize(DeviceState *dev, Error **errp) +{ + PnvLpcController *lpc = PNV_LPC(dev); + + assert(lpc->psi); + + /* Reg inits */ + lpc->lpc_hc_fw_rd_acc_size = LPC_HC_FW_RD_4B; + + /* Create address space and backing MR for the OPB bus */ + memory_region_init(&lpc->opb_mr, OBJECT(dev), "lpc-opb", 0x100000000ull); + address_space_init(&lpc->opb_as, &lpc->opb_mr, "lpc-opb"); + + /* Create ISA IO and Mem space regions which are the root of + * the ISA bus (ie, ISA address spaces). We don't create a + * separate one for FW which we alias to memory. + */ + memory_region_init(&lpc->isa_io, OBJECT(dev), "isa-io", ISA_IO_SIZE); + memory_region_init(&lpc->isa_mem, OBJECT(dev), "isa-mem", ISA_MEM_SIZE); + memory_region_init(&lpc->isa_fw, OBJECT(dev), "isa-fw", ISA_FW_SIZE); + + /* Create windows from the OPB space to the ISA space */ + memory_region_init_alias(&lpc->opb_isa_io, OBJECT(dev), "lpc-isa-io", + &lpc->isa_io, 0, LPC_IO_OPB_SIZE); + memory_region_add_subregion(&lpc->opb_mr, LPC_IO_OPB_ADDR, + &lpc->opb_isa_io); + memory_region_init_alias(&lpc->opb_isa_mem, OBJECT(dev), "lpc-isa-mem", + &lpc->isa_mem, 0, LPC_MEM_OPB_SIZE); + memory_region_add_subregion(&lpc->opb_mr, LPC_MEM_OPB_ADDR, + &lpc->opb_isa_mem); + memory_region_init_alias(&lpc->opb_isa_fw, OBJECT(dev), "lpc-isa-fw", + &lpc->isa_fw, 0, LPC_FW_OPB_SIZE); + memory_region_add_subregion(&lpc->opb_mr, LPC_FW_OPB_ADDR, + &lpc->opb_isa_fw); + + /* Create MMIO regions for LPC HC and OPB registers */ + memory_region_init_io(&lpc->opb_master_regs, OBJECT(dev), &opb_master_ops, + lpc, "lpc-opb-master", LPC_OPB_REGS_OPB_SIZE); + memory_region_add_subregion(&lpc->opb_mr, LPC_OPB_REGS_OPB_ADDR, + &lpc->opb_master_regs); + memory_region_init_io(&lpc->lpc_hc_regs, OBJECT(dev), &lpc_hc_ops, lpc, + "lpc-hc", LPC_HC_REGS_OPB_SIZE); + memory_region_add_subregion(&lpc->opb_mr, LPC_HC_REGS_OPB_ADDR, + &lpc->lpc_hc_regs); +} + +static Property pnv_lpc_properties[] = { + DEFINE_PROP_LINK("psi", PnvLpcController, psi, TYPE_PNV_PSI, PnvPsi *), + DEFINE_PROP_END_OF_LIST(), +}; + +static void pnv_lpc_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->realize = pnv_lpc_realize; + dc->desc = "PowerNV LPC Controller"; + device_class_set_props(dc, pnv_lpc_properties); + dc->user_creatable = false; +} + +static const TypeInfo pnv_lpc_info = { + .name = TYPE_PNV_LPC, + .parent = TYPE_DEVICE, + .instance_size = sizeof(PnvLpcController), + .class_init = pnv_lpc_class_init, + .class_size = sizeof(PnvLpcClass), + .abstract = true, +}; + +static void pnv_lpc_register_types(void) +{ + type_register_static(&pnv_lpc_info); + type_register_static(&pnv_lpc_power8_info); + type_register_static(&pnv_lpc_power9_info); + type_register_static(&pnv_lpc_power10_info); +} + +type_init(pnv_lpc_register_types) + +/* If we don't use the built-in LPC interrupt deserializer, we need + * to provide a set of qirqs for the ISA bus or things will go bad. + * + * Most machines using pre-Naples chips (without said deserializer) + * have a CPLD that will collect the SerIRQ and shoot them as a + * single level interrupt to the P8 chip. So let's setup a hook + * for doing just that. + */ +static void pnv_lpc_isa_irq_handler_cpld(void *opaque, int n, int level) +{ + PnvMachineState *pnv = PNV_MACHINE(qdev_get_machine()); + uint32_t old_state = pnv->cpld_irqstate; + PnvLpcController *lpc = PNV_LPC(opaque); + + if (level) { + pnv->cpld_irqstate |= 1u << n; + } else { + pnv->cpld_irqstate &= ~(1u << n); + } + + if (pnv->cpld_irqstate != old_state) { + pnv_psi_irq_set(lpc->psi, PSIHB_IRQ_EXTERNAL, pnv->cpld_irqstate != 0); + } +} + +static void pnv_lpc_isa_irq_handler(void *opaque, int n, int level) +{ + PnvLpcController *lpc = PNV_LPC(opaque); + + /* The Naples HW latches the 1 levels, clearing is done by SW */ + if (level) { + lpc->lpc_hc_irqstat |= LPC_HC_IRQ_SERIRQ0 >> n; + pnv_lpc_eval_irqs(lpc); + } +} + +ISABus *pnv_lpc_isa_create(PnvLpcController *lpc, bool use_cpld, Error **errp) +{ + Error *local_err = NULL; + ISABus *isa_bus; + qemu_irq *irqs; + qemu_irq_handler handler; + + /* let isa_bus_new() create its own bridge on SysBus otherwise + * devices specified on the command line won't find the bus and + * will fail to create. + */ + isa_bus = isa_bus_new(NULL, &lpc->isa_mem, &lpc->isa_io, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return NULL; + } + + /* Not all variants have a working serial irq decoder. If not, + * handling of LPC interrupts becomes a platform issue (some + * platforms have a CPLD to do it). + */ + if (use_cpld) { + handler = pnv_lpc_isa_irq_handler_cpld; + } else { + handler = pnv_lpc_isa_irq_handler; + } + + irqs = qemu_allocate_irqs(handler, lpc, ISA_NUM_IRQS); + + isa_bus_irqs(isa_bus, irqs); + + return isa_bus; +} diff --git a/hw/ppc/pnv_occ.c b/hw/ppc/pnv_occ.c new file mode 100644 index 000000000..5a716c256 --- /dev/null +++ b/hw/ppc/pnv_occ.c @@ -0,0 +1,302 @@ +/* + * QEMU PowerPC PowerNV Emulation of a few OCC related registers + * + * Copyright (c) 2015-2017, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "target/ppc/cpu.h" +#include "qapi/error.h" +#include "qemu/log.h" +#include "qemu/module.h" +#include "hw/qdev-properties.h" +#include "hw/ppc/pnv.h" +#include "hw/ppc/pnv_xscom.h" +#include "hw/ppc/pnv_occ.h" + +#define OCB_OCI_OCCMISC 0x4020 +#define OCB_OCI_OCCMISC_AND 0x4021 +#define OCB_OCI_OCCMISC_OR 0x4022 + +/* OCC sensors */ +#define OCC_SENSOR_DATA_BLOCK_OFFSET 0x580000 +#define OCC_SENSOR_DATA_VALID 0x580001 +#define OCC_SENSOR_DATA_VERSION 0x580002 +#define OCC_SENSOR_DATA_READING_VERSION 0x580004 +#define OCC_SENSOR_DATA_NR_SENSORS 0x580008 +#define OCC_SENSOR_DATA_NAMES_OFFSET 0x580010 +#define OCC_SENSOR_DATA_READING_PING_OFFSET 0x580014 +#define OCC_SENSOR_DATA_READING_PONG_OFFSET 0x58000c +#define OCC_SENSOR_DATA_NAME_LENGTH 0x58000d +#define OCC_SENSOR_NAME_STRUCTURE_TYPE 0x580023 +#define OCC_SENSOR_LOC_CORE 0x580022 +#define OCC_SENSOR_LOC_GPU 0x580020 +#define OCC_SENSOR_TYPE_POWER 0x580003 +#define OCC_SENSOR_NAME 0x580005 +#define HWMON_SENSORS_MASK 0x58001e +#define SLW_IMAGE_BASE 0x0 + +static void pnv_occ_set_misc(PnvOCC *occ, uint64_t val) +{ + bool irq_state; + PnvOCCClass *poc = PNV_OCC_GET_CLASS(occ); + + val &= 0xffff000000000000ull; + + occ->occmisc = val; + irq_state = !!(val >> 63); + pnv_psi_irq_set(occ->psi, poc->psi_irq, irq_state); +} + +static uint64_t pnv_occ_power8_xscom_read(void *opaque, hwaddr addr, + unsigned size) +{ + PnvOCC *occ = PNV_OCC(opaque); + uint32_t offset = addr >> 3; + uint64_t val = 0; + + switch (offset) { + case OCB_OCI_OCCMISC: + val = occ->occmisc; + break; + default: + qemu_log_mask(LOG_UNIMP, "OCC Unimplemented register: Ox%" + HWADDR_PRIx "\n", addr >> 3); + } + return val; +} + +static void pnv_occ_power8_xscom_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + PnvOCC *occ = PNV_OCC(opaque); + uint32_t offset = addr >> 3; + + switch (offset) { + case OCB_OCI_OCCMISC_AND: + pnv_occ_set_misc(occ, occ->occmisc & val); + break; + case OCB_OCI_OCCMISC_OR: + pnv_occ_set_misc(occ, occ->occmisc | val); + break; + case OCB_OCI_OCCMISC: + pnv_occ_set_misc(occ, val); + break; + default: + qemu_log_mask(LOG_UNIMP, "OCC Unimplemented register: Ox%" + HWADDR_PRIx "\n", addr >> 3); + } +} + +static uint64_t pnv_occ_common_area_read(void *opaque, hwaddr addr, + unsigned width) +{ + switch (addr) { + /* + * occ-sensor sanity check that asserts the sensor + * header block + */ + case OCC_SENSOR_DATA_BLOCK_OFFSET: + case OCC_SENSOR_DATA_VALID: + case OCC_SENSOR_DATA_VERSION: + case OCC_SENSOR_DATA_READING_VERSION: + case OCC_SENSOR_DATA_NR_SENSORS: + case OCC_SENSOR_DATA_NAMES_OFFSET: + case OCC_SENSOR_DATA_READING_PING_OFFSET: + case OCC_SENSOR_DATA_READING_PONG_OFFSET: + case OCC_SENSOR_NAME_STRUCTURE_TYPE: + return 1; + case OCC_SENSOR_DATA_NAME_LENGTH: + return 0x30; + case OCC_SENSOR_LOC_CORE: + return 0x0040; + case OCC_SENSOR_TYPE_POWER: + return 0x0080; + case OCC_SENSOR_NAME: + return 0x1000; + case HWMON_SENSORS_MASK: + case OCC_SENSOR_LOC_GPU: + return 0x8e00; + case SLW_IMAGE_BASE: + return 0x1000000000000000; + } + return 0; +} + +static void pnv_occ_common_area_write(void *opaque, hwaddr addr, + uint64_t val, unsigned width) +{ + /* callback function defined to occ common area write */ + return; +} + +static const MemoryRegionOps pnv_occ_power8_xscom_ops = { + .read = pnv_occ_power8_xscom_read, + .write = pnv_occ_power8_xscom_write, + .valid.min_access_size = 8, + .valid.max_access_size = 8, + .impl.min_access_size = 8, + .impl.max_access_size = 8, + .endianness = DEVICE_BIG_ENDIAN, +}; + +const MemoryRegionOps pnv_occ_sram_ops = { + .read = pnv_occ_common_area_read, + .write = pnv_occ_common_area_write, + .valid.min_access_size = 1, + .valid.max_access_size = 8, + .impl.min_access_size = 1, + .impl.max_access_size = 8, + .endianness = DEVICE_BIG_ENDIAN, +}; + +static void pnv_occ_power8_class_init(ObjectClass *klass, void *data) +{ + PnvOCCClass *poc = PNV_OCC_CLASS(klass); + + poc->xscom_size = PNV_XSCOM_OCC_SIZE; + poc->xscom_ops = &pnv_occ_power8_xscom_ops; + poc->psi_irq = PSIHB_IRQ_OCC; +} + +static const TypeInfo pnv_occ_power8_type_info = { + .name = TYPE_PNV8_OCC, + .parent = TYPE_PNV_OCC, + .instance_size = sizeof(PnvOCC), + .class_init = pnv_occ_power8_class_init, +}; + +#define P9_OCB_OCI_OCCMISC 0x6080 +#define P9_OCB_OCI_OCCMISC_CLEAR 0x6081 +#define P9_OCB_OCI_OCCMISC_OR 0x6082 + + +static uint64_t pnv_occ_power9_xscom_read(void *opaque, hwaddr addr, + unsigned size) +{ + PnvOCC *occ = PNV_OCC(opaque); + uint32_t offset = addr >> 3; + uint64_t val = 0; + + switch (offset) { + case P9_OCB_OCI_OCCMISC: + val = occ->occmisc; + break; + default: + qemu_log_mask(LOG_UNIMP, "OCC Unimplemented register: Ox%" + HWADDR_PRIx "\n", addr >> 3); + } + return val; +} + +static void pnv_occ_power9_xscom_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + PnvOCC *occ = PNV_OCC(opaque); + uint32_t offset = addr >> 3; + + switch (offset) { + case P9_OCB_OCI_OCCMISC_CLEAR: + pnv_occ_set_misc(occ, 0); + break; + case P9_OCB_OCI_OCCMISC_OR: + pnv_occ_set_misc(occ, occ->occmisc | val); + break; + case P9_OCB_OCI_OCCMISC: + pnv_occ_set_misc(occ, val); + break; + default: + qemu_log_mask(LOG_UNIMP, "OCC Unimplemented register: Ox%" + HWADDR_PRIx "\n", addr >> 3); + } +} + +static const MemoryRegionOps pnv_occ_power9_xscom_ops = { + .read = pnv_occ_power9_xscom_read, + .write = pnv_occ_power9_xscom_write, + .valid.min_access_size = 8, + .valid.max_access_size = 8, + .impl.min_access_size = 8, + .impl.max_access_size = 8, + .endianness = DEVICE_BIG_ENDIAN, +}; + +static void pnv_occ_power9_class_init(ObjectClass *klass, void *data) +{ + PnvOCCClass *poc = PNV_OCC_CLASS(klass); + + poc->xscom_size = PNV9_XSCOM_OCC_SIZE; + poc->xscom_ops = &pnv_occ_power9_xscom_ops; + poc->psi_irq = PSIHB9_IRQ_OCC; +} + +static const TypeInfo pnv_occ_power9_type_info = { + .name = TYPE_PNV9_OCC, + .parent = TYPE_PNV_OCC, + .instance_size = sizeof(PnvOCC), + .class_init = pnv_occ_power9_class_init, +}; + +static void pnv_occ_realize(DeviceState *dev, Error **errp) +{ + PnvOCC *occ = PNV_OCC(dev); + PnvOCCClass *poc = PNV_OCC_GET_CLASS(occ); + + assert(occ->psi); + + occ->occmisc = 0; + + /* XScom region for OCC registers */ + pnv_xscom_region_init(&occ->xscom_regs, OBJECT(dev), poc->xscom_ops, + occ, "xscom-occ", poc->xscom_size); + + /* OCC common area mmio region for OCC SRAM registers */ + memory_region_init_io(&occ->sram_regs, OBJECT(dev), &pnv_occ_sram_ops, + occ, "occ-common-area", + PNV_OCC_SENSOR_DATA_BLOCK_SIZE); +} + +static Property pnv_occ_properties[] = { + DEFINE_PROP_LINK("psi", PnvOCC, psi, TYPE_PNV_PSI, PnvPsi *), + DEFINE_PROP_END_OF_LIST(), +}; + +static void pnv_occ_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->realize = pnv_occ_realize; + dc->desc = "PowerNV OCC Controller"; + device_class_set_props(dc, pnv_occ_properties); + dc->user_creatable = false; +} + +static const TypeInfo pnv_occ_type_info = { + .name = TYPE_PNV_OCC, + .parent = TYPE_DEVICE, + .instance_size = sizeof(PnvOCC), + .class_init = pnv_occ_class_init, + .class_size = sizeof(PnvOCCClass), + .abstract = true, +}; + +static void pnv_occ_register_types(void) +{ + type_register_static(&pnv_occ_type_info); + type_register_static(&pnv_occ_power8_type_info); + type_register_static(&pnv_occ_power9_type_info); +} + +type_init(pnv_occ_register_types); diff --git a/hw/ppc/pnv_pnor.c b/hw/ppc/pnv_pnor.c new file mode 100644 index 000000000..83ecccca2 --- /dev/null +++ b/hw/ppc/pnv_pnor.c @@ -0,0 +1,141 @@ +/* + * QEMU PowerNV PNOR simple model + * + * Copyright (c) 2015-2019, IBM Corporation. + * + * This code is licensed under the GPL version 2 or later. See the + * COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qemu/units.h" +#include "sysemu/block-backend.h" +#include "sysemu/blockdev.h" +#include "hw/loader.h" +#include "hw/ppc/pnv_pnor.h" +#include "hw/qdev-properties.h" +#include "hw/qdev-properties-system.h" + +static uint64_t pnv_pnor_read(void *opaque, hwaddr addr, unsigned size) +{ + PnvPnor *s = PNV_PNOR(opaque); + uint64_t ret = 0; + int i; + + for (i = 0; i < size; i++) { + ret |= (uint64_t) s->storage[addr + i] << (8 * (size - i - 1)); + } + + return ret; +} + +static void pnv_pnor_update(PnvPnor *s, int offset, int size) +{ + int offset_end; + int ret; + + if (!s->blk || !blk_is_writable(s->blk)) { + return; + } + + offset_end = offset + size; + offset = QEMU_ALIGN_DOWN(offset, BDRV_SECTOR_SIZE); + offset_end = QEMU_ALIGN_UP(offset_end, BDRV_SECTOR_SIZE); + + ret = blk_pwrite(s->blk, offset, s->storage + offset, + offset_end - offset, 0); + if (ret < 0) { + error_report("Could not update PNOR offset=0x%" PRIx32" : %s", offset, + strerror(-ret)); + } +} + +static void pnv_pnor_write(void *opaque, hwaddr addr, uint64_t data, + unsigned size) +{ + PnvPnor *s = PNV_PNOR(opaque); + int i; + + for (i = 0; i < size; i++) { + s->storage[addr + i] = (data >> (8 * (size - i - 1))) & 0xFF; + } + pnv_pnor_update(s, addr, size); +} + +/* + * TODO: Check endianness: skiboot is BIG, Aspeed AHB is LITTLE, flash + * is BIG. + */ +static const MemoryRegionOps pnv_pnor_ops = { + .read = pnv_pnor_read, + .write = pnv_pnor_write, + .endianness = DEVICE_BIG_ENDIAN, + .valid = { + .min_access_size = 1, + .max_access_size = 4, + }, +}; + +static void pnv_pnor_realize(DeviceState *dev, Error **errp) +{ + PnvPnor *s = PNV_PNOR(dev); + int ret; + + if (s->blk) { + uint64_t perm = BLK_PERM_CONSISTENT_READ | + (blk_supports_write_perm(s->blk) ? BLK_PERM_WRITE : 0); + ret = blk_set_perm(s->blk, perm, BLK_PERM_ALL, errp); + if (ret < 0) { + return; + } + + s->size = blk_getlength(s->blk); + if (s->size <= 0) { + error_setg(errp, "failed to get flash size"); + return; + } + + s->storage = blk_blockalign(s->blk, s->size); + + if (blk_pread(s->blk, 0, s->storage, s->size) != s->size) { + error_setg(errp, "failed to read the initial flash content"); + return; + } + } else { + s->storage = blk_blockalign(NULL, s->size); + memset(s->storage, 0xFF, s->size); + } + + memory_region_init_io(&s->mmio, OBJECT(s), &pnv_pnor_ops, s, + TYPE_PNV_PNOR, s->size); +} + +static Property pnv_pnor_properties[] = { + DEFINE_PROP_INT64("size", PnvPnor, size, 128 * MiB), + DEFINE_PROP_DRIVE("drive", PnvPnor, blk), + DEFINE_PROP_END_OF_LIST(), +}; + +static void pnv_pnor_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->realize = pnv_pnor_realize; + device_class_set_props(dc, pnv_pnor_properties); +} + +static const TypeInfo pnv_pnor_info = { + .name = TYPE_PNV_PNOR, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(PnvPnor), + .class_init = pnv_pnor_class_init, +}; + +static void pnv_pnor_register_types(void) +{ + type_register_static(&pnv_pnor_info); +} + +type_init(pnv_pnor_register_types) diff --git a/hw/ppc/pnv_psi.c b/hw/ppc/pnv_psi.c new file mode 100644 index 000000000..cd9a2c595 --- /dev/null +++ b/hw/ppc/pnv_psi.c @@ -0,0 +1,967 @@ +/* + * QEMU PowerPC PowerNV Processor Service Interface (PSI) model + * + * Copyright (c) 2015-2017, IBM Corporation. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "hw/irq.h" +#include "target/ppc/cpu.h" +#include "qemu/log.h" +#include "qemu/module.h" +#include "sysemu/reset.h" +#include "qapi/error.h" +#include "monitor/monitor.h" + + +#include "hw/ppc/fdt.h" +#include "hw/ppc/pnv.h" +#include "hw/ppc/pnv_xscom.h" +#include "hw/qdev-properties.h" +#include "hw/ppc/pnv_psi.h" + +#include <libfdt.h> + +#define PSIHB_XSCOM_FIR_RW 0x00 +#define PSIHB_XSCOM_FIR_AND 0x01 +#define PSIHB_XSCOM_FIR_OR 0x02 +#define PSIHB_XSCOM_FIRMASK_RW 0x03 +#define PSIHB_XSCOM_FIRMASK_AND 0x04 +#define PSIHB_XSCOM_FIRMASK_OR 0x05 +#define PSIHB_XSCOM_FIRACT0 0x06 +#define PSIHB_XSCOM_FIRACT1 0x07 + +/* Host Bridge Base Address Register */ +#define PSIHB_XSCOM_BAR 0x0a +#define PSIHB_BAR_EN 0x0000000000000001ull + +/* FSP Base Address Register */ +#define PSIHB_XSCOM_FSPBAR 0x0b + +/* PSI Host Bridge Control/Status Register */ +#define PSIHB_XSCOM_CR 0x0e +#define PSIHB_CR_FSP_CMD_ENABLE 0x8000000000000000ull +#define PSIHB_CR_FSP_MMIO_ENABLE 0x4000000000000000ull +#define PSIHB_CR_FSP_IRQ_ENABLE 0x1000000000000000ull +#define PSIHB_CR_FSP_ERR_RSP_ENABLE 0x0800000000000000ull +#define PSIHB_CR_PSI_LINK_ENABLE 0x0400000000000000ull +#define PSIHB_CR_FSP_RESET 0x0200000000000000ull +#define PSIHB_CR_PSIHB_RESET 0x0100000000000000ull +#define PSIHB_CR_PSI_IRQ 0x0000800000000000ull +#define PSIHB_CR_FSP_IRQ 0x0000400000000000ull +#define PSIHB_CR_FSP_LINK_ACTIVE 0x0000200000000000ull +#define PSIHB_CR_IRQ_CMD_EXPECT 0x0000010000000000ull + /* and more ... */ + +/* PSIHB Status / Error Mask Register */ +#define PSIHB_XSCOM_SEMR 0x0f + +/* XIVR, to signal interrupts to the CEC firmware. more XIVR below. */ +#define PSIHB_XSCOM_XIVR_FSP 0x10 +#define PSIHB_XIVR_SERVER_SH 40 +#define PSIHB_XIVR_SERVER_MSK (0xffffull << PSIHB_XIVR_SERVER_SH) +#define PSIHB_XIVR_PRIO_SH 32 +#define PSIHB_XIVR_PRIO_MSK (0xffull << PSIHB_XIVR_PRIO_SH) +#define PSIHB_XIVR_SRC_SH 29 +#define PSIHB_XIVR_SRC_MSK (0x7ull << PSIHB_XIVR_SRC_SH) +#define PSIHB_XIVR_PENDING 0x01000000ull + +/* PSI Host Bridge Set Control/ Status Register */ +#define PSIHB_XSCOM_SCR 0x12 + +/* PSI Host Bridge Clear Control/ Status Register */ +#define PSIHB_XSCOM_CCR 0x13 + +/* DMA Upper Address Register */ +#define PSIHB_XSCOM_DMA_UPADD 0x14 + +/* Interrupt Status */ +#define PSIHB_XSCOM_IRQ_STAT 0x15 +#define PSIHB_IRQ_STAT_OCC 0x0000001000000000ull +#define PSIHB_IRQ_STAT_FSI 0x0000000800000000ull +#define PSIHB_IRQ_STAT_LPCI2C 0x0000000400000000ull +#define PSIHB_IRQ_STAT_LOCERR 0x0000000200000000ull +#define PSIHB_IRQ_STAT_EXT 0x0000000100000000ull + +/* remaining XIVR */ +#define PSIHB_XSCOM_XIVR_OCC 0x16 +#define PSIHB_XSCOM_XIVR_FSI 0x17 +#define PSIHB_XSCOM_XIVR_LPCI2C 0x18 +#define PSIHB_XSCOM_XIVR_LOCERR 0x19 +#define PSIHB_XSCOM_XIVR_EXT 0x1a + +/* Interrupt Requester Source Compare Register */ +#define PSIHB_XSCOM_IRSN 0x1b +#define PSIHB_IRSN_COMP_SH 45 +#define PSIHB_IRSN_COMP_MSK (0x7ffffull << PSIHB_IRSN_COMP_SH) +#define PSIHB_IRSN_IRQ_MUX 0x0000000800000000ull +#define PSIHB_IRSN_IRQ_RESET 0x0000000400000000ull +#define PSIHB_IRSN_DOWNSTREAM_EN 0x0000000200000000ull +#define PSIHB_IRSN_UPSTREAM_EN 0x0000000100000000ull +#define PSIHB_IRSN_COMPMASK_SH 13 +#define PSIHB_IRSN_COMPMASK_MSK (0x7ffffull << PSIHB_IRSN_COMPMASK_SH) + +#define PSIHB_BAR_MASK 0x0003fffffff00000ull +#define PSIHB_FSPBAR_MASK 0x0003ffff00000000ull + +#define PSIHB9_BAR_MASK 0x00fffffffff00000ull +#define PSIHB9_FSPBAR_MASK 0x00ffffff00000000ull + +#define PSIHB_REG(addr) (((addr) >> 3) + PSIHB_XSCOM_BAR) + +static void pnv_psi_set_bar(PnvPsi *psi, uint64_t bar) +{ + PnvPsiClass *ppc = PNV_PSI_GET_CLASS(psi); + MemoryRegion *sysmem = get_system_memory(); + uint64_t old = psi->regs[PSIHB_XSCOM_BAR]; + + psi->regs[PSIHB_XSCOM_BAR] = bar & (ppc->bar_mask | PSIHB_BAR_EN); + + /* Update MR, always remove it first */ + if (old & PSIHB_BAR_EN) { + memory_region_del_subregion(sysmem, &psi->regs_mr); + } + + /* Then add it back if needed */ + if (bar & PSIHB_BAR_EN) { + uint64_t addr = bar & ppc->bar_mask; + memory_region_add_subregion(sysmem, addr, &psi->regs_mr); + } +} + +static void pnv_psi_update_fsp_mr(PnvPsi *psi) +{ + /* TODO: Update FSP MR if/when we support FSP BAR */ +} + +static void pnv_psi_set_cr(PnvPsi *psi, uint64_t cr) +{ + uint64_t old = psi->regs[PSIHB_XSCOM_CR]; + + psi->regs[PSIHB_XSCOM_CR] = cr; + + /* Check some bit changes */ + if ((old ^ psi->regs[PSIHB_XSCOM_CR]) & PSIHB_CR_FSP_MMIO_ENABLE) { + pnv_psi_update_fsp_mr(psi); + } +} + +static void pnv_psi_set_irsn(PnvPsi *psi, uint64_t val) +{ + ICSState *ics = &PNV8_PSI(psi)->ics; + + /* In this model we ignore the up/down enable bits for now + * as SW doesn't use them (other than setting them at boot). + * We ignore IRQ_MUX, its meaning isn't clear and we don't use + * it and finally we ignore reset (XXX fix that ?) + */ + psi->regs[PSIHB_XSCOM_IRSN] = val & (PSIHB_IRSN_COMP_MSK | + PSIHB_IRSN_IRQ_MUX | + PSIHB_IRSN_IRQ_RESET | + PSIHB_IRSN_DOWNSTREAM_EN | + PSIHB_IRSN_UPSTREAM_EN); + + /* We ignore the compare mask as well, our ICS emulation is too + * simplistic to make any use if it, and we extract the offset + * from the compare value + */ + ics->offset = (val & PSIHB_IRSN_COMP_MSK) >> PSIHB_IRSN_COMP_SH; +} + +/* + * FSP and PSI interrupts are muxed under the same number. + */ +static const uint32_t xivr_regs[] = { + [PSIHB_IRQ_PSI] = PSIHB_XSCOM_XIVR_FSP, + [PSIHB_IRQ_FSP] = PSIHB_XSCOM_XIVR_FSP, + [PSIHB_IRQ_OCC] = PSIHB_XSCOM_XIVR_OCC, + [PSIHB_IRQ_FSI] = PSIHB_XSCOM_XIVR_FSI, + [PSIHB_IRQ_LPC_I2C] = PSIHB_XSCOM_XIVR_LPCI2C, + [PSIHB_IRQ_LOCAL_ERR] = PSIHB_XSCOM_XIVR_LOCERR, + [PSIHB_IRQ_EXTERNAL] = PSIHB_XSCOM_XIVR_EXT, +}; + +static const uint32_t stat_regs[] = { + [PSIHB_IRQ_PSI] = PSIHB_XSCOM_CR, + [PSIHB_IRQ_FSP] = PSIHB_XSCOM_CR, + [PSIHB_IRQ_OCC] = PSIHB_XSCOM_IRQ_STAT, + [PSIHB_IRQ_FSI] = PSIHB_XSCOM_IRQ_STAT, + [PSIHB_IRQ_LPC_I2C] = PSIHB_XSCOM_IRQ_STAT, + [PSIHB_IRQ_LOCAL_ERR] = PSIHB_XSCOM_IRQ_STAT, + [PSIHB_IRQ_EXTERNAL] = PSIHB_XSCOM_IRQ_STAT, +}; + +static const uint64_t stat_bits[] = { + [PSIHB_IRQ_PSI] = PSIHB_CR_PSI_IRQ, + [PSIHB_IRQ_FSP] = PSIHB_CR_FSP_IRQ, + [PSIHB_IRQ_OCC] = PSIHB_IRQ_STAT_OCC, + [PSIHB_IRQ_FSI] = PSIHB_IRQ_STAT_FSI, + [PSIHB_IRQ_LPC_I2C] = PSIHB_IRQ_STAT_LPCI2C, + [PSIHB_IRQ_LOCAL_ERR] = PSIHB_IRQ_STAT_LOCERR, + [PSIHB_IRQ_EXTERNAL] = PSIHB_IRQ_STAT_EXT, +}; + +void pnv_psi_irq_set(PnvPsi *psi, int irq, bool state) +{ + PNV_PSI_GET_CLASS(psi)->irq_set(psi, irq, state); +} + +static void pnv_psi_power8_irq_set(PnvPsi *psi, int irq, bool state) +{ + uint32_t xivr_reg; + uint32_t stat_reg; + uint32_t src; + bool masked; + + if (irq > PSIHB_IRQ_EXTERNAL) { + qemu_log_mask(LOG_GUEST_ERROR, "PSI: Unsupported irq %d\n", irq); + return; + } + + xivr_reg = xivr_regs[irq]; + stat_reg = stat_regs[irq]; + + src = (psi->regs[xivr_reg] & PSIHB_XIVR_SRC_MSK) >> PSIHB_XIVR_SRC_SH; + if (state) { + psi->regs[stat_reg] |= stat_bits[irq]; + /* TODO: optimization, check mask here. That means + * re-evaluating when unmasking + */ + qemu_irq_raise(psi->qirqs[src]); + } else { + psi->regs[stat_reg] &= ~stat_bits[irq]; + + /* FSP and PSI are muxed so don't lower if either is still set */ + if (stat_reg != PSIHB_XSCOM_CR || + !(psi->regs[stat_reg] & (PSIHB_CR_PSI_IRQ | PSIHB_CR_FSP_IRQ))) { + qemu_irq_lower(psi->qirqs[src]); + } else { + state = true; + } + } + + /* Note about the emulation of the pending bit: This isn't + * entirely correct. The pending bit should be cleared when the + * EOI has been received. However, we don't have callbacks on EOI + * (especially not under KVM) so no way to emulate that properly, + * so instead we just set that bit as the logical "output" of the + * XIVR (ie pending & !masked) + * + * CLG: We could define a new ICS object with a custom eoi() + * handler to clear the pending bit. But I am not sure this would + * be useful for the software anyhow. + */ + masked = (psi->regs[xivr_reg] & PSIHB_XIVR_PRIO_MSK) == PSIHB_XIVR_PRIO_MSK; + if (state && !masked) { + psi->regs[xivr_reg] |= PSIHB_XIVR_PENDING; + } else { + psi->regs[xivr_reg] &= ~PSIHB_XIVR_PENDING; + } +} + +static void pnv_psi_set_xivr(PnvPsi *psi, uint32_t reg, uint64_t val) +{ + ICSState *ics = &PNV8_PSI(psi)->ics; + uint16_t server; + uint8_t prio; + uint8_t src; + + psi->regs[reg] = (psi->regs[reg] & PSIHB_XIVR_PENDING) | + (val & (PSIHB_XIVR_SERVER_MSK | + PSIHB_XIVR_PRIO_MSK | + PSIHB_XIVR_SRC_MSK)); + val = psi->regs[reg]; + server = (val & PSIHB_XIVR_SERVER_MSK) >> PSIHB_XIVR_SERVER_SH; + prio = (val & PSIHB_XIVR_PRIO_MSK) >> PSIHB_XIVR_PRIO_SH; + src = (val & PSIHB_XIVR_SRC_MSK) >> PSIHB_XIVR_SRC_SH; + + if (src >= PSI_NUM_INTERRUPTS) { + qemu_log_mask(LOG_GUEST_ERROR, "PSI: Unsupported irq %d\n", src); + return; + } + + /* Remove pending bit if the IRQ is masked */ + if ((psi->regs[reg] & PSIHB_XIVR_PRIO_MSK) == PSIHB_XIVR_PRIO_MSK) { + psi->regs[reg] &= ~PSIHB_XIVR_PENDING; + } + + /* The low order 2 bits are the link pointer (Type II interrupts). + * Shift back to get a valid IRQ server. + */ + server >>= 2; + + /* Now because of source remapping, weird things can happen + * if you change the source number dynamically, our simple ICS + * doesn't deal with remapping. So we just poke a different + * ICS entry based on what source number was written. This will + * do for now but a more accurate implementation would instead + * use a fixed server/prio and a remapper of the generated irq. + */ + ics_write_xive(ics, src, server, prio, prio); +} + +static uint64_t pnv_psi_reg_read(PnvPsi *psi, uint32_t offset, bool mmio) +{ + uint64_t val = 0xffffffffffffffffull; + + switch (offset) { + case PSIHB_XSCOM_FIR_RW: + case PSIHB_XSCOM_FIRACT0: + case PSIHB_XSCOM_FIRACT1: + case PSIHB_XSCOM_BAR: + case PSIHB_XSCOM_FSPBAR: + case PSIHB_XSCOM_CR: + case PSIHB_XSCOM_XIVR_FSP: + case PSIHB_XSCOM_XIVR_OCC: + case PSIHB_XSCOM_XIVR_FSI: + case PSIHB_XSCOM_XIVR_LPCI2C: + case PSIHB_XSCOM_XIVR_LOCERR: + case PSIHB_XSCOM_XIVR_EXT: + case PSIHB_XSCOM_IRQ_STAT: + case PSIHB_XSCOM_SEMR: + case PSIHB_XSCOM_DMA_UPADD: + case PSIHB_XSCOM_IRSN: + val = psi->regs[offset]; + break; + default: + qemu_log_mask(LOG_UNIMP, "PSI: read at 0x%" PRIx32 "\n", offset); + } + return val; +} + +static void pnv_psi_reg_write(PnvPsi *psi, uint32_t offset, uint64_t val, + bool mmio) +{ + switch (offset) { + case PSIHB_XSCOM_FIR_RW: + case PSIHB_XSCOM_FIRACT0: + case PSIHB_XSCOM_FIRACT1: + case PSIHB_XSCOM_SEMR: + case PSIHB_XSCOM_DMA_UPADD: + psi->regs[offset] = val; + break; + case PSIHB_XSCOM_FIR_OR: + psi->regs[PSIHB_XSCOM_FIR_RW] |= val; + break; + case PSIHB_XSCOM_FIR_AND: + psi->regs[PSIHB_XSCOM_FIR_RW] &= val; + break; + case PSIHB_XSCOM_BAR: + /* Only XSCOM can write this one */ + if (!mmio) { + pnv_psi_set_bar(psi, val); + } else { + qemu_log_mask(LOG_GUEST_ERROR, "PSI: invalid write of BAR\n"); + } + break; + case PSIHB_XSCOM_FSPBAR: + psi->regs[PSIHB_XSCOM_FSPBAR] = val & PSIHB_FSPBAR_MASK; + pnv_psi_update_fsp_mr(psi); + break; + case PSIHB_XSCOM_CR: + pnv_psi_set_cr(psi, val); + break; + case PSIHB_XSCOM_SCR: + pnv_psi_set_cr(psi, psi->regs[PSIHB_XSCOM_CR] | val); + break; + case PSIHB_XSCOM_CCR: + pnv_psi_set_cr(psi, psi->regs[PSIHB_XSCOM_CR] & ~val); + break; + case PSIHB_XSCOM_XIVR_FSP: + case PSIHB_XSCOM_XIVR_OCC: + case PSIHB_XSCOM_XIVR_FSI: + case PSIHB_XSCOM_XIVR_LPCI2C: + case PSIHB_XSCOM_XIVR_LOCERR: + case PSIHB_XSCOM_XIVR_EXT: + pnv_psi_set_xivr(psi, offset, val); + break; + case PSIHB_XSCOM_IRQ_STAT: + /* Read only */ + qemu_log_mask(LOG_GUEST_ERROR, "PSI: invalid write of IRQ_STAT\n"); + break; + case PSIHB_XSCOM_IRSN: + pnv_psi_set_irsn(psi, val); + break; + default: + qemu_log_mask(LOG_UNIMP, "PSI: write at 0x%" PRIx32 "\n", offset); + } +} + +/* + * The values of the registers when accessed through the MMIO region + * follow the relation : xscom = (mmio + 0x50) >> 3 + */ +static uint64_t pnv_psi_mmio_read(void *opaque, hwaddr addr, unsigned size) +{ + return pnv_psi_reg_read(opaque, PSIHB_REG(addr), true); +} + +static void pnv_psi_mmio_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + pnv_psi_reg_write(opaque, PSIHB_REG(addr), val, true); +} + +static const MemoryRegionOps psi_mmio_ops = { + .read = pnv_psi_mmio_read, + .write = pnv_psi_mmio_write, + .endianness = DEVICE_BIG_ENDIAN, + .valid = { + .min_access_size = 8, + .max_access_size = 8, + }, + .impl = { + .min_access_size = 8, + .max_access_size = 8, + }, +}; + +static uint64_t pnv_psi_xscom_read(void *opaque, hwaddr addr, unsigned size) +{ + return pnv_psi_reg_read(opaque, addr >> 3, false); +} + +static void pnv_psi_xscom_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + pnv_psi_reg_write(opaque, addr >> 3, val, false); +} + +static const MemoryRegionOps pnv_psi_xscom_ops = { + .read = pnv_psi_xscom_read, + .write = pnv_psi_xscom_write, + .endianness = DEVICE_BIG_ENDIAN, + .valid = { + .min_access_size = 8, + .max_access_size = 8, + }, + .impl = { + .min_access_size = 8, + .max_access_size = 8, + } +}; + +static void pnv_psi_reset(DeviceState *dev) +{ + PnvPsi *psi = PNV_PSI(dev); + + memset(psi->regs, 0x0, sizeof(psi->regs)); + + psi->regs[PSIHB_XSCOM_BAR] = psi->bar | PSIHB_BAR_EN; +} + +static void pnv_psi_reset_handler(void *dev) +{ + device_cold_reset(DEVICE(dev)); +} + +static void pnv_psi_realize(DeviceState *dev, Error **errp) +{ + PnvPsi *psi = PNV_PSI(dev); + + /* Default BAR for MMIO region */ + pnv_psi_set_bar(psi, psi->bar | PSIHB_BAR_EN); + + qemu_register_reset(pnv_psi_reset_handler, dev); +} + +static void pnv_psi_power8_instance_init(Object *obj) +{ + Pnv8Psi *psi8 = PNV8_PSI(obj); + + object_initialize_child(obj, "ics-psi", &psi8->ics, TYPE_ICS); + object_property_add_alias(obj, ICS_PROP_XICS, OBJECT(&psi8->ics), + ICS_PROP_XICS); +} + +static const uint8_t irq_to_xivr[] = { + PSIHB_XSCOM_XIVR_FSP, + PSIHB_XSCOM_XIVR_OCC, + PSIHB_XSCOM_XIVR_FSI, + PSIHB_XSCOM_XIVR_LPCI2C, + PSIHB_XSCOM_XIVR_LOCERR, + PSIHB_XSCOM_XIVR_EXT, +}; + +static void pnv_psi_power8_realize(DeviceState *dev, Error **errp) +{ + PnvPsi *psi = PNV_PSI(dev); + ICSState *ics = &PNV8_PSI(psi)->ics; + unsigned int i; + + /* Create PSI interrupt control source */ + if (!object_property_set_int(OBJECT(ics), "nr-irqs", PSI_NUM_INTERRUPTS, + errp)) { + return; + } + if (!qdev_realize(DEVICE(ics), NULL, errp)) { + return; + } + + for (i = 0; i < ics->nr_irqs; i++) { + ics_set_irq_type(ics, i, true); + } + + psi->qirqs = qemu_allocate_irqs(ics_set_irq, ics, ics->nr_irqs); + + /* XSCOM region for PSI registers */ + pnv_xscom_region_init(&psi->xscom_regs, OBJECT(dev), &pnv_psi_xscom_ops, + psi, "xscom-psi", PNV_XSCOM_PSIHB_SIZE); + + /* Initialize MMIO region */ + memory_region_init_io(&psi->regs_mr, OBJECT(dev), &psi_mmio_ops, psi, + "psihb", PNV_PSIHB_SIZE); + + /* Default sources in XIVR */ + for (i = 0; i < PSI_NUM_INTERRUPTS; i++) { + uint8_t xivr = irq_to_xivr[i]; + psi->regs[xivr] = PSIHB_XIVR_PRIO_MSK | + ((uint64_t) i << PSIHB_XIVR_SRC_SH); + } + + pnv_psi_realize(dev, errp); +} + +static int pnv_psi_dt_xscom(PnvXScomInterface *dev, void *fdt, int xscom_offset) +{ + PnvPsiClass *ppc = PNV_PSI_GET_CLASS(dev); + char *name; + int offset; + uint32_t reg[] = { + cpu_to_be32(ppc->xscom_pcba), + cpu_to_be32(ppc->xscom_size) + }; + + name = g_strdup_printf("psihb@%x", ppc->xscom_pcba); + offset = fdt_add_subnode(fdt, xscom_offset, name); + _FDT(offset); + g_free(name); + + _FDT(fdt_setprop(fdt, offset, "reg", reg, sizeof(reg))); + _FDT(fdt_setprop_cell(fdt, offset, "#address-cells", 2)); + _FDT(fdt_setprop_cell(fdt, offset, "#size-cells", 1)); + _FDT(fdt_setprop(fdt, offset, "compatible", ppc->compat, + ppc->compat_size)); + return 0; +} + +static Property pnv_psi_properties[] = { + DEFINE_PROP_UINT64("bar", PnvPsi, bar, 0), + DEFINE_PROP_UINT64("fsp-bar", PnvPsi, fsp_bar, 0), + DEFINE_PROP_END_OF_LIST(), +}; + +static void pnv_psi_power8_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PnvPsiClass *ppc = PNV_PSI_CLASS(klass); + static const char compat[] = "ibm,power8-psihb-x\0ibm,psihb-x"; + + dc->desc = "PowerNV PSI Controller POWER8"; + dc->realize = pnv_psi_power8_realize; + + ppc->xscom_pcba = PNV_XSCOM_PSIHB_BASE; + ppc->xscom_size = PNV_XSCOM_PSIHB_SIZE; + ppc->bar_mask = PSIHB_BAR_MASK; + ppc->irq_set = pnv_psi_power8_irq_set; + ppc->compat = compat; + ppc->compat_size = sizeof(compat); +} + +static const TypeInfo pnv_psi_power8_info = { + .name = TYPE_PNV8_PSI, + .parent = TYPE_PNV_PSI, + .instance_size = sizeof(Pnv8Psi), + .instance_init = pnv_psi_power8_instance_init, + .class_init = pnv_psi_power8_class_init, +}; + + +/* Common registers */ + +#define PSIHB9_CR 0x20 +#define PSIHB9_SEMR 0x28 + +/* P9 registers */ + +#define PSIHB9_INTERRUPT_CONTROL 0x58 +#define PSIHB9_IRQ_METHOD PPC_BIT(0) +#define PSIHB9_IRQ_RESET PPC_BIT(1) +#define PSIHB9_ESB_CI_BASE 0x60 +#define PSIHB9_ESB_CI_64K PPC_BIT(1) +#define PSIHB9_ESB_CI_ADDR_MASK PPC_BITMASK(8, 47) +#define PSIHB9_ESB_CI_VALID PPC_BIT(63) +#define PSIHB9_ESB_NOTIF_ADDR 0x68 +#define PSIHB9_ESB_NOTIF_ADDR_MASK PPC_BITMASK(8, 60) +#define PSIHB9_ESB_NOTIF_VALID PPC_BIT(63) +#define PSIHB9_IVT_OFFSET 0x70 +#define PSIHB9_IVT_OFF_SHIFT 32 + +#define PSIHB9_IRQ_LEVEL 0x78 /* assertion */ +#define PSIHB9_IRQ_LEVEL_PSI PPC_BIT(0) +#define PSIHB9_IRQ_LEVEL_OCC PPC_BIT(1) +#define PSIHB9_IRQ_LEVEL_FSI PPC_BIT(2) +#define PSIHB9_IRQ_LEVEL_LPCHC PPC_BIT(3) +#define PSIHB9_IRQ_LEVEL_LOCAL_ERR PPC_BIT(4) +#define PSIHB9_IRQ_LEVEL_GLOBAL_ERR PPC_BIT(5) +#define PSIHB9_IRQ_LEVEL_TPM PPC_BIT(6) +#define PSIHB9_IRQ_LEVEL_LPC_SIRQ1 PPC_BIT(7) +#define PSIHB9_IRQ_LEVEL_LPC_SIRQ2 PPC_BIT(8) +#define PSIHB9_IRQ_LEVEL_LPC_SIRQ3 PPC_BIT(9) +#define PSIHB9_IRQ_LEVEL_LPC_SIRQ4 PPC_BIT(10) +#define PSIHB9_IRQ_LEVEL_SBE_I2C PPC_BIT(11) +#define PSIHB9_IRQ_LEVEL_DIO PPC_BIT(12) +#define PSIHB9_IRQ_LEVEL_PSU PPC_BIT(13) +#define PSIHB9_IRQ_LEVEL_I2C_C PPC_BIT(14) +#define PSIHB9_IRQ_LEVEL_I2C_D PPC_BIT(15) +#define PSIHB9_IRQ_LEVEL_I2C_E PPC_BIT(16) +#define PSIHB9_IRQ_LEVEL_SBE PPC_BIT(19) + +#define PSIHB9_IRQ_STAT 0x80 /* P bit */ +#define PSIHB9_IRQ_STAT_PSI PPC_BIT(0) +#define PSIHB9_IRQ_STAT_OCC PPC_BIT(1) +#define PSIHB9_IRQ_STAT_FSI PPC_BIT(2) +#define PSIHB9_IRQ_STAT_LPCHC PPC_BIT(3) +#define PSIHB9_IRQ_STAT_LOCAL_ERR PPC_BIT(4) +#define PSIHB9_IRQ_STAT_GLOBAL_ERR PPC_BIT(5) +#define PSIHB9_IRQ_STAT_TPM PPC_BIT(6) +#define PSIHB9_IRQ_STAT_LPC_SIRQ1 PPC_BIT(7) +#define PSIHB9_IRQ_STAT_LPC_SIRQ2 PPC_BIT(8) +#define PSIHB9_IRQ_STAT_LPC_SIRQ3 PPC_BIT(9) +#define PSIHB9_IRQ_STAT_LPC_SIRQ4 PPC_BIT(10) +#define PSIHB9_IRQ_STAT_SBE_I2C PPC_BIT(11) +#define PSIHB9_IRQ_STAT_DIO PPC_BIT(12) +#define PSIHB9_IRQ_STAT_PSU PPC_BIT(13) + +static void pnv_psi_notify(XiveNotifier *xf, uint32_t srcno) +{ + PnvPsi *psi = PNV_PSI(xf); + uint64_t notif_port = psi->regs[PSIHB_REG(PSIHB9_ESB_NOTIF_ADDR)]; + bool valid = notif_port & PSIHB9_ESB_NOTIF_VALID; + uint64_t notify_addr = notif_port & ~PSIHB9_ESB_NOTIF_VALID; + + uint32_t offset = + (psi->regs[PSIHB_REG(PSIHB9_IVT_OFFSET)] >> PSIHB9_IVT_OFF_SHIFT); + uint64_t data = XIVE_TRIGGER_PQ | offset | srcno; + MemTxResult result; + + if (!valid) { + return; + } + + address_space_stq_be(&address_space_memory, notify_addr, data, + MEMTXATTRS_UNSPECIFIED, &result); + if (result != MEMTX_OK) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: trigger failed @%" + HWADDR_PRIx "\n", __func__, notif_port); + return; + } +} + +static uint64_t pnv_psi_p9_mmio_read(void *opaque, hwaddr addr, unsigned size) +{ + PnvPsi *psi = PNV_PSI(opaque); + uint32_t reg = PSIHB_REG(addr); + uint64_t val = -1; + + switch (addr) { + case PSIHB9_CR: + case PSIHB9_SEMR: + /* FSP stuff */ + case PSIHB9_INTERRUPT_CONTROL: + case PSIHB9_ESB_CI_BASE: + case PSIHB9_ESB_NOTIF_ADDR: + case PSIHB9_IVT_OFFSET: + val = psi->regs[reg]; + break; + default: + qemu_log_mask(LOG_GUEST_ERROR, "PSI: read at 0x%" PRIx64 "\n", addr); + } + + return val; +} + +static void pnv_psi_p9_mmio_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + PnvPsi *psi = PNV_PSI(opaque); + Pnv9Psi *psi9 = PNV9_PSI(psi); + uint32_t reg = PSIHB_REG(addr); + MemoryRegion *sysmem = get_system_memory(); + + switch (addr) { + case PSIHB9_CR: + case PSIHB9_SEMR: + /* FSP stuff */ + break; + case PSIHB9_INTERRUPT_CONTROL: + if (val & PSIHB9_IRQ_RESET) { + device_cold_reset(DEVICE(&psi9->source)); + } + psi->regs[reg] = val; + break; + + case PSIHB9_ESB_CI_BASE: + if (!(val & PSIHB9_ESB_CI_VALID)) { + if (psi->regs[reg] & PSIHB9_ESB_CI_VALID) { + memory_region_del_subregion(sysmem, &psi9->source.esb_mmio); + } + } else { + if (!(psi->regs[reg] & PSIHB9_ESB_CI_VALID)) { + memory_region_add_subregion(sysmem, + val & ~PSIHB9_ESB_CI_VALID, + &psi9->source.esb_mmio); + } + } + psi->regs[reg] = val; + break; + + case PSIHB9_ESB_NOTIF_ADDR: + psi->regs[reg] = val; + break; + case PSIHB9_IVT_OFFSET: + psi->regs[reg] = val; + break; + default: + qemu_log_mask(LOG_GUEST_ERROR, "PSI: write at 0x%" PRIx64 "\n", addr); + } +} + +static const MemoryRegionOps pnv_psi_p9_mmio_ops = { + .read = pnv_psi_p9_mmio_read, + .write = pnv_psi_p9_mmio_write, + .endianness = DEVICE_BIG_ENDIAN, + .valid = { + .min_access_size = 8, + .max_access_size = 8, + }, + .impl = { + .min_access_size = 8, + .max_access_size = 8, + }, +}; + +static uint64_t pnv_psi_p9_xscom_read(void *opaque, hwaddr addr, unsigned size) +{ + /* No read are expected */ + qemu_log_mask(LOG_GUEST_ERROR, "PSI: xscom read at 0x%" PRIx64 "\n", addr); + return -1; +} + +static void pnv_psi_p9_xscom_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + PnvPsi *psi = PNV_PSI(opaque); + + /* XSCOM is only used to set the PSIHB MMIO region */ + switch (addr >> 3) { + case PSIHB_XSCOM_BAR: + pnv_psi_set_bar(psi, val); + break; + default: + qemu_log_mask(LOG_GUEST_ERROR, "PSI: xscom write at 0x%" PRIx64 "\n", + addr); + } +} + +static const MemoryRegionOps pnv_psi_p9_xscom_ops = { + .read = pnv_psi_p9_xscom_read, + .write = pnv_psi_p9_xscom_write, + .endianness = DEVICE_BIG_ENDIAN, + .valid = { + .min_access_size = 8, + .max_access_size = 8, + }, + .impl = { + .min_access_size = 8, + .max_access_size = 8, + } +}; + +static void pnv_psi_power9_irq_set(PnvPsi *psi, int irq, bool state) +{ + uint64_t irq_method = psi->regs[PSIHB_REG(PSIHB9_INTERRUPT_CONTROL)]; + + if (irq > PSIHB9_NUM_IRQS) { + qemu_log_mask(LOG_GUEST_ERROR, "PSI: Unsupported irq %d\n", irq); + return; + } + + if (irq_method & PSIHB9_IRQ_METHOD) { + qemu_log_mask(LOG_GUEST_ERROR, "PSI: LSI IRQ method no supported\n"); + return; + } + + /* Update LSI levels */ + if (state) { + psi->regs[PSIHB_REG(PSIHB9_IRQ_LEVEL)] |= PPC_BIT(irq); + } else { + psi->regs[PSIHB_REG(PSIHB9_IRQ_LEVEL)] &= ~PPC_BIT(irq); + } + + qemu_set_irq(psi->qirqs[irq], state); +} + +static void pnv_psi_power9_reset(DeviceState *dev) +{ + Pnv9Psi *psi = PNV9_PSI(dev); + + pnv_psi_reset(dev); + + if (memory_region_is_mapped(&psi->source.esb_mmio)) { + memory_region_del_subregion(get_system_memory(), &psi->source.esb_mmio); + } +} + +static void pnv_psi_power9_instance_init(Object *obj) +{ + Pnv9Psi *psi = PNV9_PSI(obj); + + object_initialize_child(obj, "source", &psi->source, TYPE_XIVE_SOURCE); +} + +static void pnv_psi_power9_realize(DeviceState *dev, Error **errp) +{ + PnvPsi *psi = PNV_PSI(dev); + XiveSource *xsrc = &PNV9_PSI(psi)->source; + int i; + + /* This is the only device with 4k ESB pages */ + object_property_set_int(OBJECT(xsrc), "shift", XIVE_ESB_4K, &error_fatal); + object_property_set_int(OBJECT(xsrc), "nr-irqs", PSIHB9_NUM_IRQS, + &error_fatal); + object_property_set_link(OBJECT(xsrc), "xive", OBJECT(psi), &error_abort); + if (!qdev_realize(DEVICE(xsrc), NULL, errp)) { + return; + } + + for (i = 0; i < xsrc->nr_irqs; i++) { + xive_source_irq_set_lsi(xsrc, i); + } + + psi->qirqs = qemu_allocate_irqs(xive_source_set_irq, xsrc, xsrc->nr_irqs); + + /* XSCOM region for PSI registers */ + pnv_xscom_region_init(&psi->xscom_regs, OBJECT(dev), &pnv_psi_p9_xscom_ops, + psi, "xscom-psi", PNV9_XSCOM_PSIHB_SIZE); + + /* MMIO region for PSI registers */ + memory_region_init_io(&psi->regs_mr, OBJECT(dev), &pnv_psi_p9_mmio_ops, psi, + "psihb", PNV9_PSIHB_SIZE); + + pnv_psi_realize(dev, errp); +} + +static void pnv_psi_power9_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PnvPsiClass *ppc = PNV_PSI_CLASS(klass); + XiveNotifierClass *xfc = XIVE_NOTIFIER_CLASS(klass); + static const char compat[] = "ibm,power9-psihb-x\0ibm,psihb-x"; + + dc->desc = "PowerNV PSI Controller POWER9"; + dc->realize = pnv_psi_power9_realize; + dc->reset = pnv_psi_power9_reset; + + ppc->xscom_pcba = PNV9_XSCOM_PSIHB_BASE; + ppc->xscom_size = PNV9_XSCOM_PSIHB_SIZE; + ppc->bar_mask = PSIHB9_BAR_MASK; + ppc->irq_set = pnv_psi_power9_irq_set; + ppc->compat = compat; + ppc->compat_size = sizeof(compat); + + xfc->notify = pnv_psi_notify; +} + +static const TypeInfo pnv_psi_power9_info = { + .name = TYPE_PNV9_PSI, + .parent = TYPE_PNV_PSI, + .instance_size = sizeof(Pnv9Psi), + .instance_init = pnv_psi_power9_instance_init, + .class_init = pnv_psi_power9_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_XIVE_NOTIFIER }, + { }, + }, +}; + +static void pnv_psi_power10_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PnvPsiClass *ppc = PNV_PSI_CLASS(klass); + static const char compat[] = "ibm,power10-psihb-x\0ibm,psihb-x"; + + dc->desc = "PowerNV PSI Controller POWER10"; + + ppc->xscom_pcba = PNV10_XSCOM_PSIHB_BASE; + ppc->xscom_size = PNV10_XSCOM_PSIHB_SIZE; + ppc->compat = compat; + ppc->compat_size = sizeof(compat); +} + +static const TypeInfo pnv_psi_power10_info = { + .name = TYPE_PNV10_PSI, + .parent = TYPE_PNV9_PSI, + .class_init = pnv_psi_power10_class_init, +}; + +static void pnv_psi_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PnvXScomInterfaceClass *xdc = PNV_XSCOM_INTERFACE_CLASS(klass); + + xdc->dt_xscom = pnv_psi_dt_xscom; + + dc->desc = "PowerNV PSI Controller"; + device_class_set_props(dc, pnv_psi_properties); + dc->reset = pnv_psi_reset; + dc->user_creatable = false; +} + +static const TypeInfo pnv_psi_info = { + .name = TYPE_PNV_PSI, + .parent = TYPE_DEVICE, + .instance_size = sizeof(PnvPsi), + .class_init = pnv_psi_class_init, + .class_size = sizeof(PnvPsiClass), + .abstract = true, + .interfaces = (InterfaceInfo[]) { + { TYPE_PNV_XSCOM_INTERFACE }, + { } + } +}; + +static void pnv_psi_register_types(void) +{ + type_register_static(&pnv_psi_info); + type_register_static(&pnv_psi_power8_info); + type_register_static(&pnv_psi_power9_info); + type_register_static(&pnv_psi_power10_info); +} + +type_init(pnv_psi_register_types); + +void pnv_psi_pic_print_info(Pnv9Psi *psi9, Monitor *mon) +{ + PnvPsi *psi = PNV_PSI(psi9); + + uint32_t offset = + (psi->regs[PSIHB_REG(PSIHB9_IVT_OFFSET)] >> PSIHB9_IVT_OFF_SHIFT); + + monitor_printf(mon, "PSIHB Source %08x .. %08x\n", + offset, offset + psi9->source.nr_irqs - 1); + xive_source_pic_print_info(&psi9->source, offset, mon); +} diff --git a/hw/ppc/pnv_xscom.c b/hw/ppc/pnv_xscom.c new file mode 100644 index 000000000..9ce018dbc --- /dev/null +++ b/hw/ppc/pnv_xscom.c @@ -0,0 +1,324 @@ +/* + * QEMU PowerPC PowerNV XSCOM bus + * + * Copyright (c) 2016, IBM Corporation. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "qemu/module.h" +#include "sysemu/hw_accel.h" +#include "target/ppc/cpu.h" +#include "hw/sysbus.h" + +#include "hw/ppc/fdt.h" +#include "hw/ppc/pnv.h" +#include "hw/ppc/pnv_xscom.h" + +#include <libfdt.h> + +/* PRD registers */ +#define PRD_P8_IPOLL_REG_MASK 0x01020013 +#define PRD_P8_IPOLL_REG_STATUS 0x01020014 +#define PRD_P9_IPOLL_REG_MASK 0x000F0033 +#define PRD_P9_IPOLL_REG_STATUS 0x000F0034 + +static void xscom_complete(CPUState *cs, uint64_t hmer_bits) +{ + /* + * TODO: When the read/write comes from the monitor, NULL is + * passed for the cpu, and no CPU completion is generated. + */ + if (cs) { + PowerPCCPU *cpu = POWERPC_CPU(cs); + CPUPPCState *env = &cpu->env; + + /* + * TODO: Need a CPU helper to set HMER, also handle generation + * of HMIs + */ + cpu_synchronize_state(cs); + env->spr[SPR_HMER] |= hmer_bits; + } +} + +static uint32_t pnv_xscom_pcba(PnvChip *chip, uint64_t addr) +{ + return PNV_CHIP_GET_CLASS(chip)->xscom_pcba(chip, addr); +} + +static uint64_t xscom_read_default(PnvChip *chip, uint32_t pcba) +{ + switch (pcba) { + case 0xf000f: + return PNV_CHIP_GET_CLASS(chip)->chip_cfam_id; + case 0x18002: /* ECID2 */ + return 0; + + case 0x1010c00: /* PIBAM FIR */ + case 0x1010c03: /* PIBAM FIR MASK */ + + /* PRD registers */ + case PRD_P8_IPOLL_REG_MASK: + case PRD_P8_IPOLL_REG_STATUS: + case PRD_P9_IPOLL_REG_MASK: + case PRD_P9_IPOLL_REG_STATUS: + + /* P9 xscom reset */ + case 0x0090018: /* Receive status reg */ + case 0x0090012: /* log register */ + case 0x0090013: /* error register */ + + /* P8 xscom reset */ + case 0x2020007: /* ADU stuff, log register */ + case 0x2020009: /* ADU stuff, error register */ + case 0x202000f: /* ADU stuff, receive status register*/ + return 0; + case 0x2013f01: /* PBA stuff */ + case 0x2013f05: /* PBA stuff */ + return 0; + case 0x2013028: /* CAPP stuff */ + case 0x201302a: /* CAPP stuff */ + case 0x2013801: /* CAPP stuff */ + case 0x2013802: /* CAPP stuff */ + + /* P9 CAPP regs */ + case 0x2010841: + case 0x2010842: + case 0x201082a: + case 0x2010828: + case 0x4010841: + case 0x4010842: + case 0x401082a: + case 0x4010828: + return 0; + default: + return -1; + } +} + +static bool xscom_write_default(PnvChip *chip, uint32_t pcba, uint64_t val) +{ + /* We ignore writes to these */ + switch (pcba) { + case 0xf000f: /* chip id is RO */ + case 0x1010c00: /* PIBAM FIR */ + case 0x1010c01: /* PIBAM FIR */ + case 0x1010c02: /* PIBAM FIR */ + case 0x1010c03: /* PIBAM FIR MASK */ + case 0x1010c04: /* PIBAM FIR MASK */ + case 0x1010c05: /* PIBAM FIR MASK */ + /* P9 xscom reset */ + case 0x0090018: /* Receive status reg */ + case 0x0090012: /* log register */ + case 0x0090013: /* error register */ + + /* P8 xscom reset */ + case 0x2020007: /* ADU stuff, log register */ + case 0x2020009: /* ADU stuff, error register */ + case 0x202000f: /* ADU stuff, receive status register*/ + + case 0x2013028: /* CAPP stuff */ + case 0x201302a: /* CAPP stuff */ + case 0x2013801: /* CAPP stuff */ + case 0x2013802: /* CAPP stuff */ + + /* P9 CAPP regs */ + case 0x2010841: + case 0x2010842: + case 0x201082a: + case 0x2010828: + case 0x4010841: + case 0x4010842: + case 0x401082a: + case 0x4010828: + + /* P8 PRD registers */ + case PRD_P8_IPOLL_REG_MASK: + case PRD_P8_IPOLL_REG_STATUS: + case PRD_P9_IPOLL_REG_MASK: + case PRD_P9_IPOLL_REG_STATUS: + return true; + default: + return false; + } +} + +static uint64_t xscom_read(void *opaque, hwaddr addr, unsigned width) +{ + PnvChip *chip = opaque; + uint32_t pcba = pnv_xscom_pcba(chip, addr); + uint64_t val = 0; + MemTxResult result; + + /* Handle some SCOMs here before dispatch */ + val = xscom_read_default(chip, pcba); + if (val != -1) { + goto complete; + } + + val = address_space_ldq(&chip->xscom_as, (uint64_t) pcba << 3, + MEMTXATTRS_UNSPECIFIED, &result); + if (result != MEMTX_OK) { + qemu_log_mask(LOG_GUEST_ERROR, "XSCOM read failed at @0x%" + HWADDR_PRIx " pcba=0x%08x\n", addr, pcba); + xscom_complete(current_cpu, HMER_XSCOM_FAIL | HMER_XSCOM_DONE); + return 0; + } + +complete: + xscom_complete(current_cpu, HMER_XSCOM_DONE); + return val; +} + +static void xscom_write(void *opaque, hwaddr addr, uint64_t val, + unsigned width) +{ + PnvChip *chip = opaque; + uint32_t pcba = pnv_xscom_pcba(chip, addr); + MemTxResult result; + + /* Handle some SCOMs here before dispatch */ + if (xscom_write_default(chip, pcba, val)) { + goto complete; + } + + address_space_stq(&chip->xscom_as, (uint64_t) pcba << 3, val, + MEMTXATTRS_UNSPECIFIED, &result); + if (result != MEMTX_OK) { + qemu_log_mask(LOG_GUEST_ERROR, "XSCOM write failed at @0x%" + HWADDR_PRIx " pcba=0x%08x data=0x%" PRIx64 "\n", + addr, pcba, val); + xscom_complete(current_cpu, HMER_XSCOM_FAIL | HMER_XSCOM_DONE); + return; + } + +complete: + xscom_complete(current_cpu, HMER_XSCOM_DONE); +} + +const MemoryRegionOps pnv_xscom_ops = { + .read = xscom_read, + .write = xscom_write, + .valid.min_access_size = 8, + .valid.max_access_size = 8, + .impl.min_access_size = 8, + .impl.max_access_size = 8, + .endianness = DEVICE_BIG_ENDIAN, +}; + +void pnv_xscom_realize(PnvChip *chip, uint64_t size, Error **errp) +{ + SysBusDevice *sbd = SYS_BUS_DEVICE(chip); + char *name; + + name = g_strdup_printf("xscom-%x", chip->chip_id); + memory_region_init_io(&chip->xscom_mmio, OBJECT(chip), &pnv_xscom_ops, + chip, name, size); + sysbus_init_mmio(sbd, &chip->xscom_mmio); + + memory_region_init(&chip->xscom, OBJECT(chip), name, size); + address_space_init(&chip->xscom_as, &chip->xscom, name); + g_free(name); +} + +static const TypeInfo pnv_xscom_interface_info = { + .name = TYPE_PNV_XSCOM_INTERFACE, + .parent = TYPE_INTERFACE, + .class_size = sizeof(PnvXScomInterfaceClass), +}; + +static void pnv_xscom_register_types(void) +{ + type_register_static(&pnv_xscom_interface_info); +} + +type_init(pnv_xscom_register_types) + +typedef struct ForeachPopulateArgs { + void *fdt; + int xscom_offset; +} ForeachPopulateArgs; + +static int xscom_dt_child(Object *child, void *opaque) +{ + if (object_dynamic_cast(child, TYPE_PNV_XSCOM_INTERFACE)) { + ForeachPopulateArgs *args = opaque; + PnvXScomInterface *xd = PNV_XSCOM_INTERFACE(child); + PnvXScomInterfaceClass *xc = PNV_XSCOM_INTERFACE_GET_CLASS(xd); + + /* + * Only "realized" devices should be configured in the DT + */ + if (xc->dt_xscom && DEVICE(child)->realized) { + _FDT((xc->dt_xscom(xd, args->fdt, args->xscom_offset))); + } + } + return 0; +} + +int pnv_dt_xscom(PnvChip *chip, void *fdt, int root_offset, + uint64_t xscom_base, uint64_t xscom_size, + const char *compat, int compat_size) +{ + uint64_t reg[] = { xscom_base, xscom_size }; + int xscom_offset; + ForeachPopulateArgs args; + char *name; + + name = g_strdup_printf("xscom@%" PRIx64, be64_to_cpu(reg[0])); + xscom_offset = fdt_add_subnode(fdt, root_offset, name); + _FDT(xscom_offset); + g_free(name); + _FDT((fdt_setprop_cell(fdt, xscom_offset, "ibm,chip-id", chip->chip_id))); + /* + * On P10, the xscom bus id has been deprecated and the chip id is + * calculated from the "Primary topology table index". See skiboot. + */ + _FDT((fdt_setprop_cell(fdt, xscom_offset, "ibm,primary-topology-index", + chip->chip_id))); + _FDT((fdt_setprop_cell(fdt, xscom_offset, "#address-cells", 1))); + _FDT((fdt_setprop_cell(fdt, xscom_offset, "#size-cells", 1))); + _FDT((fdt_setprop(fdt, xscom_offset, "reg", reg, sizeof(reg)))); + _FDT((fdt_setprop(fdt, xscom_offset, "compatible", compat, compat_size))); + _FDT((fdt_setprop(fdt, xscom_offset, "scom-controller", NULL, 0))); + + args.fdt = fdt; + args.xscom_offset = xscom_offset; + + /* + * Loop on the whole object hierarchy to catch all + * PnvXScomInterface objects which can lie a bit deeper than the + * first layer. + */ + object_child_foreach_recursive(OBJECT(chip), xscom_dt_child, &args); + return 0; +} + +void pnv_xscom_add_subregion(PnvChip *chip, hwaddr offset, MemoryRegion *mr) +{ + memory_region_add_subregion(&chip->xscom, offset << 3, mr); +} + +void pnv_xscom_region_init(MemoryRegion *mr, + Object *owner, + const MemoryRegionOps *ops, + void *opaque, + const char *name, + uint64_t size) +{ + memory_region_init_io(mr, owner, ops, opaque, name, size << 3); +} diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c new file mode 100644 index 000000000..e8127599c --- /dev/null +++ b/hw/ppc/ppc.c @@ -0,0 +1,1465 @@ +/* + * QEMU generic PowerPC hardware System Emulator + * + * Copyright (c) 2003-2007 Jocelyn Mayer + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "hw/irq.h" +#include "hw/ppc/ppc.h" +#include "hw/ppc/ppc_e500.h" +#include "qemu/timer.h" +#include "sysemu/cpus.h" +#include "qemu/log.h" +#include "qemu/main-loop.h" +#include "qemu/error-report.h" +#include "sysemu/kvm.h" +#include "sysemu/runstate.h" +#include "kvm_ppc.h" +#include "migration/vmstate.h" +#include "trace.h" + +static void cpu_ppc_tb_stop (CPUPPCState *env); +static void cpu_ppc_tb_start (CPUPPCState *env); + +void ppc_set_irq(PowerPCCPU *cpu, int n_IRQ, int level) +{ + CPUState *cs = CPU(cpu); + CPUPPCState *env = &cpu->env; + unsigned int old_pending; + bool locked = false; + + /* We may already have the BQL if coming from the reset path */ + if (!qemu_mutex_iothread_locked()) { + locked = true; + qemu_mutex_lock_iothread(); + } + + old_pending = env->pending_interrupts; + + if (level) { + env->pending_interrupts |= 1 << n_IRQ; + cpu_interrupt(cs, CPU_INTERRUPT_HARD); + } else { + env->pending_interrupts &= ~(1 << n_IRQ); + if (env->pending_interrupts == 0) { + cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD); + } + } + + if (old_pending != env->pending_interrupts) { + kvmppc_set_interrupt(cpu, n_IRQ, level); + } + + + trace_ppc_irq_set_exit(env, n_IRQ, level, env->pending_interrupts, + CPU(cpu)->interrupt_request); + + if (locked) { + qemu_mutex_unlock_iothread(); + } +} + +/* PowerPC 6xx / 7xx internal IRQ controller */ +static void ppc6xx_set_irq(void *opaque, int pin, int level) +{ + PowerPCCPU *cpu = opaque; + CPUPPCState *env = &cpu->env; + int cur_level; + + trace_ppc_irq_set(env, pin, level); + + cur_level = (env->irq_input_state >> pin) & 1; + /* Don't generate spurious events */ + if ((cur_level == 1 && level == 0) || (cur_level == 0 && level != 0)) { + CPUState *cs = CPU(cpu); + + switch (pin) { + case PPC6xx_INPUT_TBEN: + /* Level sensitive - active high */ + trace_ppc_irq_set_state("time base", level); + if (level) { + cpu_ppc_tb_start(env); + } else { + cpu_ppc_tb_stop(env); + } + break; + case PPC6xx_INPUT_INT: + /* Level sensitive - active high */ + trace_ppc_irq_set_state("external IRQ", level); + ppc_set_irq(cpu, PPC_INTERRUPT_EXT, level); + break; + case PPC6xx_INPUT_SMI: + /* Level sensitive - active high */ + trace_ppc_irq_set_state("SMI IRQ", level); + ppc_set_irq(cpu, PPC_INTERRUPT_SMI, level); + break; + case PPC6xx_INPUT_MCP: + /* Negative edge sensitive */ + /* XXX: TODO: actual reaction may depends on HID0 status + * 603/604/740/750: check HID0[EMCP] + */ + if (cur_level == 1 && level == 0) { + trace_ppc_irq_set_state("machine check", 1); + ppc_set_irq(cpu, PPC_INTERRUPT_MCK, 1); + } + break; + case PPC6xx_INPUT_CKSTP_IN: + /* Level sensitive - active low */ + /* XXX: TODO: relay the signal to CKSTP_OUT pin */ + /* XXX: Note that the only way to restart the CPU is to reset it */ + if (level) { + trace_ppc_irq_cpu("stop"); + cs->halted = 1; + } + break; + case PPC6xx_INPUT_HRESET: + /* Level sensitive - active low */ + if (level) { + trace_ppc_irq_reset("CPU"); + cpu_interrupt(cs, CPU_INTERRUPT_RESET); + } + break; + case PPC6xx_INPUT_SRESET: + trace_ppc_irq_set_state("RESET IRQ", level); + ppc_set_irq(cpu, PPC_INTERRUPT_RESET, level); + break; + default: + g_assert_not_reached(); + } + if (level) + env->irq_input_state |= 1 << pin; + else + env->irq_input_state &= ~(1 << pin); + } +} + +void ppc6xx_irq_init(PowerPCCPU *cpu) +{ + CPUPPCState *env = &cpu->env; + + env->irq_inputs = (void **)qemu_allocate_irqs(&ppc6xx_set_irq, cpu, + PPC6xx_INPUT_NB); +} + +#if defined(TARGET_PPC64) +/* PowerPC 970 internal IRQ controller */ +static void ppc970_set_irq(void *opaque, int pin, int level) +{ + PowerPCCPU *cpu = opaque; + CPUPPCState *env = &cpu->env; + int cur_level; + + trace_ppc_irq_set(env, pin, level); + + cur_level = (env->irq_input_state >> pin) & 1; + /* Don't generate spurious events */ + if ((cur_level == 1 && level == 0) || (cur_level == 0 && level != 0)) { + CPUState *cs = CPU(cpu); + + switch (pin) { + case PPC970_INPUT_INT: + /* Level sensitive - active high */ + trace_ppc_irq_set_state("external IRQ", level); + ppc_set_irq(cpu, PPC_INTERRUPT_EXT, level); + break; + case PPC970_INPUT_THINT: + /* Level sensitive - active high */ + trace_ppc_irq_set_state("SMI IRQ", level); + ppc_set_irq(cpu, PPC_INTERRUPT_THERM, level); + break; + case PPC970_INPUT_MCP: + /* Negative edge sensitive */ + /* XXX: TODO: actual reaction may depends on HID0 status + * 603/604/740/750: check HID0[EMCP] + */ + if (cur_level == 1 && level == 0) { + trace_ppc_irq_set_state("machine check", 1); + ppc_set_irq(cpu, PPC_INTERRUPT_MCK, 1); + } + break; + case PPC970_INPUT_CKSTP: + /* Level sensitive - active low */ + /* XXX: TODO: relay the signal to CKSTP_OUT pin */ + if (level) { + trace_ppc_irq_cpu("stop"); + cs->halted = 1; + } else { + trace_ppc_irq_cpu("restart"); + cs->halted = 0; + qemu_cpu_kick(cs); + } + break; + case PPC970_INPUT_HRESET: + /* Level sensitive - active low */ + if (level) { + cpu_interrupt(cs, CPU_INTERRUPT_RESET); + } + break; + case PPC970_INPUT_SRESET: + trace_ppc_irq_set_state("RESET IRQ", level); + ppc_set_irq(cpu, PPC_INTERRUPT_RESET, level); + break; + case PPC970_INPUT_TBEN: + trace_ppc_irq_set_state("TBEN IRQ", level); + /* XXX: TODO */ + break; + default: + g_assert_not_reached(); + } + if (level) + env->irq_input_state |= 1 << pin; + else + env->irq_input_state &= ~(1 << pin); + } +} + +void ppc970_irq_init(PowerPCCPU *cpu) +{ + CPUPPCState *env = &cpu->env; + + env->irq_inputs = (void **)qemu_allocate_irqs(&ppc970_set_irq, cpu, + PPC970_INPUT_NB); +} + +/* POWER7 internal IRQ controller */ +static void power7_set_irq(void *opaque, int pin, int level) +{ + PowerPCCPU *cpu = opaque; + + trace_ppc_irq_set(&cpu->env, pin, level); + + switch (pin) { + case POWER7_INPUT_INT: + /* Level sensitive - active high */ + trace_ppc_irq_set_state("external IRQ", level); + ppc_set_irq(cpu, PPC_INTERRUPT_EXT, level); + break; + default: + g_assert_not_reached(); + } +} + +void ppcPOWER7_irq_init(PowerPCCPU *cpu) +{ + CPUPPCState *env = &cpu->env; + + env->irq_inputs = (void **)qemu_allocate_irqs(&power7_set_irq, cpu, + POWER7_INPUT_NB); +} + +/* POWER9 internal IRQ controller */ +static void power9_set_irq(void *opaque, int pin, int level) +{ + PowerPCCPU *cpu = opaque; + + trace_ppc_irq_set(&cpu->env, pin, level); + + switch (pin) { + case POWER9_INPUT_INT: + /* Level sensitive - active high */ + trace_ppc_irq_set_state("external IRQ", level); + ppc_set_irq(cpu, PPC_INTERRUPT_EXT, level); + break; + case POWER9_INPUT_HINT: + /* Level sensitive - active high */ + trace_ppc_irq_set_state("HV external IRQ", level); + ppc_set_irq(cpu, PPC_INTERRUPT_HVIRT, level); + break; + default: + g_assert_not_reached(); + return; + } +} + +void ppcPOWER9_irq_init(PowerPCCPU *cpu) +{ + CPUPPCState *env = &cpu->env; + + env->irq_inputs = (void **)qemu_allocate_irqs(&power9_set_irq, cpu, + POWER9_INPUT_NB); +} +#endif /* defined(TARGET_PPC64) */ + +void ppc40x_core_reset(PowerPCCPU *cpu) +{ + CPUPPCState *env = &cpu->env; + target_ulong dbsr; + + qemu_log_mask(CPU_LOG_RESET, "Reset PowerPC core\n"); + cpu_interrupt(CPU(cpu), CPU_INTERRUPT_RESET); + dbsr = env->spr[SPR_40x_DBSR]; + dbsr &= ~0x00000300; + dbsr |= 0x00000100; + env->spr[SPR_40x_DBSR] = dbsr; +} + +void ppc40x_chip_reset(PowerPCCPU *cpu) +{ + CPUPPCState *env = &cpu->env; + target_ulong dbsr; + + qemu_log_mask(CPU_LOG_RESET, "Reset PowerPC chip\n"); + cpu_interrupt(CPU(cpu), CPU_INTERRUPT_RESET); + /* XXX: TODO reset all internal peripherals */ + dbsr = env->spr[SPR_40x_DBSR]; + dbsr &= ~0x00000300; + dbsr |= 0x00000200; + env->spr[SPR_40x_DBSR] = dbsr; +} + +void ppc40x_system_reset(PowerPCCPU *cpu) +{ + qemu_log_mask(CPU_LOG_RESET, "Reset PowerPC system\n"); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); +} + +void store_40x_dbcr0(CPUPPCState *env, uint32_t val) +{ + PowerPCCPU *cpu = env_archcpu(env); + + qemu_mutex_lock_iothread(); + + switch ((val >> 28) & 0x3) { + case 0x0: + /* No action */ + break; + case 0x1: + /* Core reset */ + ppc40x_core_reset(cpu); + break; + case 0x2: + /* Chip reset */ + ppc40x_chip_reset(cpu); + break; + case 0x3: + /* System reset */ + ppc40x_system_reset(cpu); + break; + } + + qemu_mutex_unlock_iothread(); +} + +/* PowerPC 40x internal IRQ controller */ +static void ppc40x_set_irq(void *opaque, int pin, int level) +{ + PowerPCCPU *cpu = opaque; + CPUPPCState *env = &cpu->env; + int cur_level; + + trace_ppc_irq_set(env, pin, level); + + cur_level = (env->irq_input_state >> pin) & 1; + /* Don't generate spurious events */ + if ((cur_level == 1 && level == 0) || (cur_level == 0 && level != 0)) { + CPUState *cs = CPU(cpu); + + switch (pin) { + case PPC40x_INPUT_RESET_SYS: + if (level) { + trace_ppc_irq_reset("system"); + ppc40x_system_reset(cpu); + } + break; + case PPC40x_INPUT_RESET_CHIP: + if (level) { + trace_ppc_irq_reset("chip"); + ppc40x_chip_reset(cpu); + } + break; + case PPC40x_INPUT_RESET_CORE: + /* XXX: TODO: update DBSR[MRR] */ + if (level) { + trace_ppc_irq_reset("core"); + ppc40x_core_reset(cpu); + } + break; + case PPC40x_INPUT_CINT: + /* Level sensitive - active high */ + trace_ppc_irq_set_state("critical IRQ", level); + ppc_set_irq(cpu, PPC_INTERRUPT_CEXT, level); + break; + case PPC40x_INPUT_INT: + /* Level sensitive - active high */ + trace_ppc_irq_set_state("external IRQ", level); + ppc_set_irq(cpu, PPC_INTERRUPT_EXT, level); + break; + case PPC40x_INPUT_HALT: + /* Level sensitive - active low */ + if (level) { + trace_ppc_irq_cpu("stop"); + cs->halted = 1; + } else { + trace_ppc_irq_cpu("restart"); + cs->halted = 0; + qemu_cpu_kick(cs); + } + break; + case PPC40x_INPUT_DEBUG: + /* Level sensitive - active high */ + trace_ppc_irq_set_state("debug pin", level); + ppc_set_irq(cpu, PPC_INTERRUPT_DEBUG, level); + break; + default: + g_assert_not_reached(); + } + if (level) + env->irq_input_state |= 1 << pin; + else + env->irq_input_state &= ~(1 << pin); + } +} + +void ppc40x_irq_init(PowerPCCPU *cpu) +{ + CPUPPCState *env = &cpu->env; + + env->irq_inputs = (void **)qemu_allocate_irqs(&ppc40x_set_irq, + cpu, PPC40x_INPUT_NB); +} + +/* PowerPC E500 internal IRQ controller */ +static void ppce500_set_irq(void *opaque, int pin, int level) +{ + PowerPCCPU *cpu = opaque; + CPUPPCState *env = &cpu->env; + int cur_level; + + trace_ppc_irq_set(env, pin, level); + + cur_level = (env->irq_input_state >> pin) & 1; + /* Don't generate spurious events */ + if ((cur_level == 1 && level == 0) || (cur_level == 0 && level != 0)) { + switch (pin) { + case PPCE500_INPUT_MCK: + if (level) { + trace_ppc_irq_reset("system"); + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); + } + break; + case PPCE500_INPUT_RESET_CORE: + if (level) { + trace_ppc_irq_reset("core"); + ppc_set_irq(cpu, PPC_INTERRUPT_MCK, level); + } + break; + case PPCE500_INPUT_CINT: + /* Level sensitive - active high */ + trace_ppc_irq_set_state("critical IRQ", level); + ppc_set_irq(cpu, PPC_INTERRUPT_CEXT, level); + break; + case PPCE500_INPUT_INT: + /* Level sensitive - active high */ + trace_ppc_irq_set_state("core IRQ", level); + ppc_set_irq(cpu, PPC_INTERRUPT_EXT, level); + break; + case PPCE500_INPUT_DEBUG: + /* Level sensitive - active high */ + trace_ppc_irq_set_state("debug pin", level); + ppc_set_irq(cpu, PPC_INTERRUPT_DEBUG, level); + break; + default: + g_assert_not_reached(); + } + if (level) + env->irq_input_state |= 1 << pin; + else + env->irq_input_state &= ~(1 << pin); + } +} + +void ppce500_irq_init(PowerPCCPU *cpu) +{ + CPUPPCState *env = &cpu->env; + + env->irq_inputs = (void **)qemu_allocate_irqs(&ppce500_set_irq, + cpu, PPCE500_INPUT_NB); +} + +/* Enable or Disable the E500 EPR capability */ +void ppce500_set_mpic_proxy(bool enabled) +{ + CPUState *cs; + + CPU_FOREACH(cs) { + PowerPCCPU *cpu = POWERPC_CPU(cs); + + cpu->env.mpic_proxy = enabled; + if (kvm_enabled()) { + kvmppc_set_mpic_proxy(cpu, enabled); + } + } +} + +/*****************************************************************************/ +/* PowerPC time base and decrementer emulation */ + +uint64_t cpu_ppc_get_tb(ppc_tb_t *tb_env, uint64_t vmclk, int64_t tb_offset) +{ + /* TB time in tb periods */ + return muldiv64(vmclk, tb_env->tb_freq, NANOSECONDS_PER_SECOND) + tb_offset; +} + +uint64_t cpu_ppc_load_tbl (CPUPPCState *env) +{ + ppc_tb_t *tb_env = env->tb_env; + uint64_t tb; + + if (kvm_enabled()) { + return env->spr[SPR_TBL]; + } + + tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->tb_offset); + trace_ppc_tb_load(tb); + + return tb; +} + +static inline uint32_t _cpu_ppc_load_tbu(CPUPPCState *env) +{ + ppc_tb_t *tb_env = env->tb_env; + uint64_t tb; + + tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->tb_offset); + trace_ppc_tb_load(tb); + + return tb >> 32; +} + +uint32_t cpu_ppc_load_tbu (CPUPPCState *env) +{ + if (kvm_enabled()) { + return env->spr[SPR_TBU]; + } + + return _cpu_ppc_load_tbu(env); +} + +static inline void cpu_ppc_store_tb(ppc_tb_t *tb_env, uint64_t vmclk, + int64_t *tb_offsetp, uint64_t value) +{ + *tb_offsetp = value - + muldiv64(vmclk, tb_env->tb_freq, NANOSECONDS_PER_SECOND); + + trace_ppc_tb_store(value, *tb_offsetp); +} + +void cpu_ppc_store_tbl (CPUPPCState *env, uint32_t value) +{ + ppc_tb_t *tb_env = env->tb_env; + uint64_t tb; + + tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->tb_offset); + tb &= 0xFFFFFFFF00000000ULL; + cpu_ppc_store_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), + &tb_env->tb_offset, tb | (uint64_t)value); +} + +static inline void _cpu_ppc_store_tbu(CPUPPCState *env, uint32_t value) +{ + ppc_tb_t *tb_env = env->tb_env; + uint64_t tb; + + tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->tb_offset); + tb &= 0x00000000FFFFFFFFULL; + cpu_ppc_store_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), + &tb_env->tb_offset, ((uint64_t)value << 32) | tb); +} + +void cpu_ppc_store_tbu (CPUPPCState *env, uint32_t value) +{ + _cpu_ppc_store_tbu(env, value); +} + +uint64_t cpu_ppc_load_atbl (CPUPPCState *env) +{ + ppc_tb_t *tb_env = env->tb_env; + uint64_t tb; + + tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->atb_offset); + trace_ppc_tb_load(tb); + + return tb; +} + +uint32_t cpu_ppc_load_atbu (CPUPPCState *env) +{ + ppc_tb_t *tb_env = env->tb_env; + uint64_t tb; + + tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->atb_offset); + trace_ppc_tb_load(tb); + + return tb >> 32; +} + +void cpu_ppc_store_atbl (CPUPPCState *env, uint32_t value) +{ + ppc_tb_t *tb_env = env->tb_env; + uint64_t tb; + + tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->atb_offset); + tb &= 0xFFFFFFFF00000000ULL; + cpu_ppc_store_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), + &tb_env->atb_offset, tb | (uint64_t)value); +} + +void cpu_ppc_store_atbu (CPUPPCState *env, uint32_t value) +{ + ppc_tb_t *tb_env = env->tb_env; + uint64_t tb; + + tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->atb_offset); + tb &= 0x00000000FFFFFFFFULL; + cpu_ppc_store_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), + &tb_env->atb_offset, ((uint64_t)value << 32) | tb); +} + +uint64_t cpu_ppc_load_vtb(CPUPPCState *env) +{ + ppc_tb_t *tb_env = env->tb_env; + + return cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), + tb_env->vtb_offset); +} + +void cpu_ppc_store_vtb(CPUPPCState *env, uint64_t value) +{ + ppc_tb_t *tb_env = env->tb_env; + + cpu_ppc_store_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), + &tb_env->vtb_offset, value); +} + +void cpu_ppc_store_tbu40(CPUPPCState *env, uint64_t value) +{ + ppc_tb_t *tb_env = env->tb_env; + uint64_t tb; + + tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), + tb_env->tb_offset); + tb &= 0xFFFFFFUL; + tb |= (value & ~0xFFFFFFUL); + cpu_ppc_store_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), + &tb_env->tb_offset, tb); +} + +static void cpu_ppc_tb_stop (CPUPPCState *env) +{ + ppc_tb_t *tb_env = env->tb_env; + uint64_t tb, atb, vmclk; + + /* If the time base is already frozen, do nothing */ + if (tb_env->tb_freq != 0) { + vmclk = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + /* Get the time base */ + tb = cpu_ppc_get_tb(tb_env, vmclk, tb_env->tb_offset); + /* Get the alternate time base */ + atb = cpu_ppc_get_tb(tb_env, vmclk, tb_env->atb_offset); + /* Store the time base value (ie compute the current offset) */ + cpu_ppc_store_tb(tb_env, vmclk, &tb_env->tb_offset, tb); + /* Store the alternate time base value (compute the current offset) */ + cpu_ppc_store_tb(tb_env, vmclk, &tb_env->atb_offset, atb); + /* Set the time base frequency to zero */ + tb_env->tb_freq = 0; + /* Now, the time bases are frozen to tb_offset / atb_offset value */ + } +} + +static void cpu_ppc_tb_start (CPUPPCState *env) +{ + ppc_tb_t *tb_env = env->tb_env; + uint64_t tb, atb, vmclk; + + /* If the time base is not frozen, do nothing */ + if (tb_env->tb_freq == 0) { + vmclk = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + /* Get the time base from tb_offset */ + tb = tb_env->tb_offset; + /* Get the alternate time base from atb_offset */ + atb = tb_env->atb_offset; + /* Restore the tb frequency from the decrementer frequency */ + tb_env->tb_freq = tb_env->decr_freq; + /* Store the time base value */ + cpu_ppc_store_tb(tb_env, vmclk, &tb_env->tb_offset, tb); + /* Store the alternate time base value */ + cpu_ppc_store_tb(tb_env, vmclk, &tb_env->atb_offset, atb); + } +} + +bool ppc_decr_clear_on_delivery(CPUPPCState *env) +{ + ppc_tb_t *tb_env = env->tb_env; + int flags = PPC_DECR_UNDERFLOW_TRIGGERED | PPC_DECR_UNDERFLOW_LEVEL; + return ((tb_env->flags & flags) == PPC_DECR_UNDERFLOW_TRIGGERED); +} + +static inline int64_t _cpu_ppc_load_decr(CPUPPCState *env, uint64_t next) +{ + ppc_tb_t *tb_env = env->tb_env; + int64_t decr, diff; + + diff = next - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + if (diff >= 0) { + decr = muldiv64(diff, tb_env->decr_freq, NANOSECONDS_PER_SECOND); + } else if (tb_env->flags & PPC_TIMER_BOOKE) { + decr = 0; + } else { + decr = -muldiv64(-diff, tb_env->decr_freq, NANOSECONDS_PER_SECOND); + } + trace_ppc_decr_load(decr); + + return decr; +} + +target_ulong cpu_ppc_load_decr(CPUPPCState *env) +{ + ppc_tb_t *tb_env = env->tb_env; + uint64_t decr; + + if (kvm_enabled()) { + return env->spr[SPR_DECR]; + } + + decr = _cpu_ppc_load_decr(env, tb_env->decr_next); + + /* + * If large decrementer is enabled then the decrementer is signed extened + * to 64 bits, otherwise it is a 32 bit value. + */ + if (env->spr[SPR_LPCR] & LPCR_LD) { + return decr; + } + return (uint32_t) decr; +} + +target_ulong cpu_ppc_load_hdecr(CPUPPCState *env) +{ + PowerPCCPU *cpu = env_archcpu(env); + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); + ppc_tb_t *tb_env = env->tb_env; + uint64_t hdecr; + + hdecr = _cpu_ppc_load_decr(env, tb_env->hdecr_next); + + /* + * If we have a large decrementer (POWER9 or later) then hdecr is sign + * extended to 64 bits, otherwise it is 32 bits. + */ + if (pcc->lrg_decr_bits > 32) { + return hdecr; + } + return (uint32_t) hdecr; +} + +uint64_t cpu_ppc_load_purr (CPUPPCState *env) +{ + ppc_tb_t *tb_env = env->tb_env; + + return cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), + tb_env->purr_offset); +} + +/* When decrementer expires, + * all we need to do is generate or queue a CPU exception + */ +static inline void cpu_ppc_decr_excp(PowerPCCPU *cpu) +{ + /* Raise it */ + trace_ppc_decr_excp("raise"); + ppc_set_irq(cpu, PPC_INTERRUPT_DECR, 1); +} + +static inline void cpu_ppc_decr_lower(PowerPCCPU *cpu) +{ + ppc_set_irq(cpu, PPC_INTERRUPT_DECR, 0); +} + +static inline void cpu_ppc_hdecr_excp(PowerPCCPU *cpu) +{ + CPUPPCState *env = &cpu->env; + + /* Raise it */ + trace_ppc_decr_excp("raise HV"); + + /* The architecture specifies that we don't deliver HDEC + * interrupts in a PM state. Not only they don't cause a + * wakeup but they also get effectively discarded. + */ + if (!env->resume_as_sreset) { + ppc_set_irq(cpu, PPC_INTERRUPT_HDECR, 1); + } +} + +static inline void cpu_ppc_hdecr_lower(PowerPCCPU *cpu) +{ + ppc_set_irq(cpu, PPC_INTERRUPT_HDECR, 0); +} + +static void __cpu_ppc_store_decr(PowerPCCPU *cpu, uint64_t *nextp, + QEMUTimer *timer, + void (*raise_excp)(void *), + void (*lower_excp)(PowerPCCPU *), + target_ulong decr, target_ulong value, + int nr_bits) +{ + CPUPPCState *env = &cpu->env; + ppc_tb_t *tb_env = env->tb_env; + uint64_t now, next; + int64_t signed_value; + int64_t signed_decr; + + /* Truncate value to decr_width and sign extend for simplicity */ + signed_value = sextract64(value, 0, nr_bits); + signed_decr = sextract64(decr, 0, nr_bits); + + trace_ppc_decr_store(nr_bits, decr, value); + + if (kvm_enabled()) { + /* KVM handles decrementer exceptions, we don't need our own timer */ + return; + } + + /* + * Going from 2 -> 1, 1 -> 0 or 0 -> -1 is the event to generate a DEC + * interrupt. + * + * If we get a really small DEC value, we can assume that by the time we + * handled it we should inject an interrupt already. + * + * On MSB level based DEC implementations the MSB always means the interrupt + * is pending, so raise it on those. + * + * On MSB edge based DEC implementations the MSB going from 0 -> 1 triggers + * an edge interrupt, so raise it here too. + */ + if ((value < 3) || + ((tb_env->flags & PPC_DECR_UNDERFLOW_LEVEL) && signed_value < 0) || + ((tb_env->flags & PPC_DECR_UNDERFLOW_TRIGGERED) && signed_value < 0 + && signed_decr >= 0)) { + (*raise_excp)(cpu); + return; + } + + /* On MSB level based systems a 0 for the MSB stops interrupt delivery */ + if (signed_value >= 0 && (tb_env->flags & PPC_DECR_UNDERFLOW_LEVEL)) { + (*lower_excp)(cpu); + } + + /* Calculate the next timer event */ + now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + next = now + muldiv64(value, NANOSECONDS_PER_SECOND, tb_env->decr_freq); + *nextp = next; + + /* Adjust timer */ + timer_mod(timer, next); +} + +static inline void _cpu_ppc_store_decr(PowerPCCPU *cpu, target_ulong decr, + target_ulong value, int nr_bits) +{ + ppc_tb_t *tb_env = cpu->env.tb_env; + + __cpu_ppc_store_decr(cpu, &tb_env->decr_next, tb_env->decr_timer, + tb_env->decr_timer->cb, &cpu_ppc_decr_lower, decr, + value, nr_bits); +} + +void cpu_ppc_store_decr(CPUPPCState *env, target_ulong value) +{ + PowerPCCPU *cpu = env_archcpu(env); + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); + int nr_bits = 32; + + if (env->spr[SPR_LPCR] & LPCR_LD) { + nr_bits = pcc->lrg_decr_bits; + } + + _cpu_ppc_store_decr(cpu, cpu_ppc_load_decr(env), value, nr_bits); +} + +static void cpu_ppc_decr_cb(void *opaque) +{ + PowerPCCPU *cpu = opaque; + + cpu_ppc_decr_excp(cpu); +} + +static inline void _cpu_ppc_store_hdecr(PowerPCCPU *cpu, target_ulong hdecr, + target_ulong value, int nr_bits) +{ + ppc_tb_t *tb_env = cpu->env.tb_env; + + if (tb_env->hdecr_timer != NULL) { + __cpu_ppc_store_decr(cpu, &tb_env->hdecr_next, tb_env->hdecr_timer, + tb_env->hdecr_timer->cb, &cpu_ppc_hdecr_lower, + hdecr, value, nr_bits); + } +} + +void cpu_ppc_store_hdecr(CPUPPCState *env, target_ulong value) +{ + PowerPCCPU *cpu = env_archcpu(env); + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); + + _cpu_ppc_store_hdecr(cpu, cpu_ppc_load_hdecr(env), value, + pcc->lrg_decr_bits); +} + +static void cpu_ppc_hdecr_cb(void *opaque) +{ + PowerPCCPU *cpu = opaque; + + cpu_ppc_hdecr_excp(cpu); +} + +void cpu_ppc_store_purr(CPUPPCState *env, uint64_t value) +{ + ppc_tb_t *tb_env = env->tb_env; + + cpu_ppc_store_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), + &tb_env->purr_offset, value); +} + +static void cpu_ppc_set_tb_clk (void *opaque, uint32_t freq) +{ + CPUPPCState *env = opaque; + PowerPCCPU *cpu = env_archcpu(env); + ppc_tb_t *tb_env = env->tb_env; + + tb_env->tb_freq = freq; + tb_env->decr_freq = freq; + /* There is a bug in Linux 2.4 kernels: + * if a decrementer exception is pending when it enables msr_ee at startup, + * it's not ready to handle it... + */ + _cpu_ppc_store_decr(cpu, 0xFFFFFFFF, 0xFFFFFFFF, 32); + _cpu_ppc_store_hdecr(cpu, 0xFFFFFFFF, 0xFFFFFFFF, 32); + cpu_ppc_store_purr(env, 0x0000000000000000ULL); +} + +static void timebase_save(PPCTimebase *tb) +{ + uint64_t ticks = cpu_get_host_ticks(); + PowerPCCPU *first_ppc_cpu = POWERPC_CPU(first_cpu); + + if (!first_ppc_cpu->env.tb_env) { + error_report("No timebase object"); + return; + } + + /* not used anymore, we keep it for compatibility */ + tb->time_of_the_day_ns = qemu_clock_get_ns(QEMU_CLOCK_HOST); + /* + * tb_offset is only expected to be changed by QEMU so + * there is no need to update it from KVM here + */ + tb->guest_timebase = ticks + first_ppc_cpu->env.tb_env->tb_offset; + + tb->runstate_paused = + runstate_check(RUN_STATE_PAUSED) || runstate_check(RUN_STATE_SAVE_VM); +} + +static void timebase_load(PPCTimebase *tb) +{ + CPUState *cpu; + PowerPCCPU *first_ppc_cpu = POWERPC_CPU(first_cpu); + int64_t tb_off_adj, tb_off; + unsigned long freq; + + if (!first_ppc_cpu->env.tb_env) { + error_report("No timebase object"); + return; + } + + freq = first_ppc_cpu->env.tb_env->tb_freq; + + tb_off_adj = tb->guest_timebase - cpu_get_host_ticks(); + + tb_off = first_ppc_cpu->env.tb_env->tb_offset; + trace_ppc_tb_adjust(tb_off, tb_off_adj, tb_off_adj - tb_off, + (tb_off_adj - tb_off) / freq); + + /* Set new offset to all CPUs */ + CPU_FOREACH(cpu) { + PowerPCCPU *pcpu = POWERPC_CPU(cpu); + pcpu->env.tb_env->tb_offset = tb_off_adj; + kvmppc_set_reg_tb_offset(pcpu, pcpu->env.tb_env->tb_offset); + } +} + +void cpu_ppc_clock_vm_state_change(void *opaque, bool running, + RunState state) +{ + PPCTimebase *tb = opaque; + + if (running) { + timebase_load(tb); + } else { + timebase_save(tb); + } +} + +/* + * When migrating a running guest, read the clock just + * before migration, so that the guest clock counts + * during the events between: + * + * * vm_stop() + * * + * * pre_save() + * + * This reduces clock difference on migration from 5s + * to 0.1s (when max_downtime == 5s), because sending the + * final pages of memory (which happens between vm_stop() + * and pre_save()) takes max_downtime. + */ +static int timebase_pre_save(void *opaque) +{ + PPCTimebase *tb = opaque; + + /* guest_timebase won't be overridden in case of paused guest or savevm */ + if (!tb->runstate_paused) { + timebase_save(tb); + } + + return 0; +} + +const VMStateDescription vmstate_ppc_timebase = { + .name = "timebase", + .version_id = 1, + .minimum_version_id = 1, + .minimum_version_id_old = 1, + .pre_save = timebase_pre_save, + .fields = (VMStateField []) { + VMSTATE_UINT64(guest_timebase, PPCTimebase), + VMSTATE_INT64(time_of_the_day_ns, PPCTimebase), + VMSTATE_END_OF_LIST() + }, +}; + +/* Set up (once) timebase frequency (in Hz) */ +clk_setup_cb cpu_ppc_tb_init (CPUPPCState *env, uint32_t freq) +{ + PowerPCCPU *cpu = env_archcpu(env); + ppc_tb_t *tb_env; + + tb_env = g_malloc0(sizeof(ppc_tb_t)); + env->tb_env = tb_env; + tb_env->flags = PPC_DECR_UNDERFLOW_TRIGGERED; + if (is_book3s_arch2x(env)) { + /* All Book3S 64bit CPUs implement level based DEC logic */ + tb_env->flags |= PPC_DECR_UNDERFLOW_LEVEL; + } + /* Create new timer */ + tb_env->decr_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, &cpu_ppc_decr_cb, cpu); + if (env->has_hv_mode) { + tb_env->hdecr_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, &cpu_ppc_hdecr_cb, + cpu); + } else { + tb_env->hdecr_timer = NULL; + } + cpu_ppc_set_tb_clk(env, freq); + + return &cpu_ppc_set_tb_clk; +} + +/* Specific helpers for POWER & PowerPC 601 RTC */ +void cpu_ppc601_store_rtcu (CPUPPCState *env, uint32_t value) +{ + _cpu_ppc_store_tbu(env, value); +} + +uint32_t cpu_ppc601_load_rtcu (CPUPPCState *env) +{ + return _cpu_ppc_load_tbu(env); +} + +void cpu_ppc601_store_rtcl (CPUPPCState *env, uint32_t value) +{ + cpu_ppc_store_tbl(env, value & 0x3FFFFF80); +} + +uint32_t cpu_ppc601_load_rtcl (CPUPPCState *env) +{ + return cpu_ppc_load_tbl(env) & 0x3FFFFF80; +} + +/*****************************************************************************/ +/* PowerPC 40x timers */ + +/* PIT, FIT & WDT */ +typedef struct ppc40x_timer_t ppc40x_timer_t; +struct ppc40x_timer_t { + uint64_t pit_reload; /* PIT auto-reload value */ + uint64_t fit_next; /* Tick for next FIT interrupt */ + QEMUTimer *fit_timer; + uint64_t wdt_next; /* Tick for next WDT interrupt */ + QEMUTimer *wdt_timer; + + /* 405 have the PIT, 440 have a DECR. */ + unsigned int decr_excp; +}; + +/* Fixed interval timer */ +static void cpu_4xx_fit_cb (void *opaque) +{ + PowerPCCPU *cpu; + CPUPPCState *env; + ppc_tb_t *tb_env; + ppc40x_timer_t *ppc40x_timer; + uint64_t now, next; + + env = opaque; + cpu = env_archcpu(env); + tb_env = env->tb_env; + ppc40x_timer = tb_env->opaque; + now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + switch ((env->spr[SPR_40x_TCR] >> 24) & 0x3) { + case 0: + next = 1 << 9; + break; + case 1: + next = 1 << 13; + break; + case 2: + next = 1 << 17; + break; + case 3: + next = 1 << 21; + break; + default: + /* Cannot occur, but makes gcc happy */ + return; + } + next = now + muldiv64(next, NANOSECONDS_PER_SECOND, tb_env->tb_freq); + if (next == now) + next++; + timer_mod(ppc40x_timer->fit_timer, next); + env->spr[SPR_40x_TSR] |= 1 << 26; + if ((env->spr[SPR_40x_TCR] >> 23) & 0x1) { + ppc_set_irq(cpu, PPC_INTERRUPT_FIT, 1); + } + trace_ppc4xx_fit((int)((env->spr[SPR_40x_TCR] >> 23) & 0x1), + env->spr[SPR_40x_TCR], env->spr[SPR_40x_TSR]); +} + +/* Programmable interval timer */ +static void start_stop_pit (CPUPPCState *env, ppc_tb_t *tb_env, int is_excp) +{ + ppc40x_timer_t *ppc40x_timer; + uint64_t now, next; + + ppc40x_timer = tb_env->opaque; + if (ppc40x_timer->pit_reload <= 1 || + !((env->spr[SPR_40x_TCR] >> 26) & 0x1) || + (is_excp && !((env->spr[SPR_40x_TCR] >> 22) & 0x1))) { + /* Stop PIT */ + trace_ppc4xx_pit_stop(); + timer_del(tb_env->decr_timer); + } else { + trace_ppc4xx_pit_start(ppc40x_timer->pit_reload); + now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + next = now + muldiv64(ppc40x_timer->pit_reload, + NANOSECONDS_PER_SECOND, tb_env->decr_freq); + if (is_excp) + next += tb_env->decr_next - now; + if (next == now) + next++; + timer_mod(tb_env->decr_timer, next); + tb_env->decr_next = next; + } +} + +static void cpu_4xx_pit_cb (void *opaque) +{ + PowerPCCPU *cpu; + CPUPPCState *env; + ppc_tb_t *tb_env; + ppc40x_timer_t *ppc40x_timer; + + env = opaque; + cpu = env_archcpu(env); + tb_env = env->tb_env; + ppc40x_timer = tb_env->opaque; + env->spr[SPR_40x_TSR] |= 1 << 27; + if ((env->spr[SPR_40x_TCR] >> 26) & 0x1) { + ppc_set_irq(cpu, ppc40x_timer->decr_excp, 1); + } + start_stop_pit(env, tb_env, 1); + trace_ppc4xx_pit((int)((env->spr[SPR_40x_TCR] >> 22) & 0x1), + (int)((env->spr[SPR_40x_TCR] >> 26) & 0x1), + env->spr[SPR_40x_TCR], env->spr[SPR_40x_TSR], + ppc40x_timer->pit_reload); +} + +/* Watchdog timer */ +static void cpu_4xx_wdt_cb (void *opaque) +{ + PowerPCCPU *cpu; + CPUPPCState *env; + ppc_tb_t *tb_env; + ppc40x_timer_t *ppc40x_timer; + uint64_t now, next; + + env = opaque; + cpu = env_archcpu(env); + tb_env = env->tb_env; + ppc40x_timer = tb_env->opaque; + now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + switch ((env->spr[SPR_40x_TCR] >> 30) & 0x3) { + case 0: + next = 1 << 17; + break; + case 1: + next = 1 << 21; + break; + case 2: + next = 1 << 25; + break; + case 3: + next = 1 << 29; + break; + default: + /* Cannot occur, but makes gcc happy */ + return; + } + next = now + muldiv64(next, NANOSECONDS_PER_SECOND, tb_env->decr_freq); + if (next == now) + next++; + trace_ppc4xx_wdt(env->spr[SPR_40x_TCR], env->spr[SPR_40x_TSR]); + switch ((env->spr[SPR_40x_TSR] >> 30) & 0x3) { + case 0x0: + case 0x1: + timer_mod(ppc40x_timer->wdt_timer, next); + ppc40x_timer->wdt_next = next; + env->spr[SPR_40x_TSR] |= 1U << 31; + break; + case 0x2: + timer_mod(ppc40x_timer->wdt_timer, next); + ppc40x_timer->wdt_next = next; + env->spr[SPR_40x_TSR] |= 1 << 30; + if ((env->spr[SPR_40x_TCR] >> 27) & 0x1) { + ppc_set_irq(cpu, PPC_INTERRUPT_WDT, 1); + } + break; + case 0x3: + env->spr[SPR_40x_TSR] &= ~0x30000000; + env->spr[SPR_40x_TSR] |= env->spr[SPR_40x_TCR] & 0x30000000; + switch ((env->spr[SPR_40x_TCR] >> 28) & 0x3) { + case 0x0: + /* No reset */ + break; + case 0x1: /* Core reset */ + ppc40x_core_reset(cpu); + break; + case 0x2: /* Chip reset */ + ppc40x_chip_reset(cpu); + break; + case 0x3: /* System reset */ + ppc40x_system_reset(cpu); + break; + } + } +} + +void store_40x_pit (CPUPPCState *env, target_ulong val) +{ + ppc_tb_t *tb_env; + ppc40x_timer_t *ppc40x_timer; + + tb_env = env->tb_env; + ppc40x_timer = tb_env->opaque; + trace_ppc40x_store_pit(val); + ppc40x_timer->pit_reload = val; + start_stop_pit(env, tb_env, 0); +} + +target_ulong load_40x_pit (CPUPPCState *env) +{ + return cpu_ppc_load_decr(env); +} + +static void ppc_40x_set_tb_clk (void *opaque, uint32_t freq) +{ + CPUPPCState *env = opaque; + ppc_tb_t *tb_env = env->tb_env; + + trace_ppc40x_set_tb_clk(freq); + tb_env->tb_freq = freq; + tb_env->decr_freq = freq; + /* XXX: we should also update all timers */ +} + +clk_setup_cb ppc_40x_timers_init (CPUPPCState *env, uint32_t freq, + unsigned int decr_excp) +{ + ppc_tb_t *tb_env; + ppc40x_timer_t *ppc40x_timer; + + tb_env = g_malloc0(sizeof(ppc_tb_t)); + env->tb_env = tb_env; + tb_env->flags = PPC_DECR_UNDERFLOW_TRIGGERED; + ppc40x_timer = g_malloc0(sizeof(ppc40x_timer_t)); + tb_env->tb_freq = freq; + tb_env->decr_freq = freq; + tb_env->opaque = ppc40x_timer; + trace_ppc40x_timers_init(freq); + if (ppc40x_timer != NULL) { + /* We use decr timer for PIT */ + tb_env->decr_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, &cpu_4xx_pit_cb, env); + ppc40x_timer->fit_timer = + timer_new_ns(QEMU_CLOCK_VIRTUAL, &cpu_4xx_fit_cb, env); + ppc40x_timer->wdt_timer = + timer_new_ns(QEMU_CLOCK_VIRTUAL, &cpu_4xx_wdt_cb, env); + ppc40x_timer->decr_excp = decr_excp; + } + + return &ppc_40x_set_tb_clk; +} + +/*****************************************************************************/ +/* Embedded PowerPC Device Control Registers */ +typedef struct ppc_dcrn_t ppc_dcrn_t; +struct ppc_dcrn_t { + dcr_read_cb dcr_read; + dcr_write_cb dcr_write; + void *opaque; +}; + +/* XXX: on 460, DCR addresses are 32 bits wide, + * using DCRIPR to get the 22 upper bits of the DCR address + */ +#define DCRN_NB 1024 +struct ppc_dcr_t { + ppc_dcrn_t dcrn[DCRN_NB]; + int (*read_error)(int dcrn); + int (*write_error)(int dcrn); +}; + +int ppc_dcr_read (ppc_dcr_t *dcr_env, int dcrn, uint32_t *valp) +{ + ppc_dcrn_t *dcr; + + if (dcrn < 0 || dcrn >= DCRN_NB) + goto error; + dcr = &dcr_env->dcrn[dcrn]; + if (dcr->dcr_read == NULL) + goto error; + *valp = (*dcr->dcr_read)(dcr->opaque, dcrn); + + return 0; + + error: + if (dcr_env->read_error != NULL) + return (*dcr_env->read_error)(dcrn); + + return -1; +} + +int ppc_dcr_write (ppc_dcr_t *dcr_env, int dcrn, uint32_t val) +{ + ppc_dcrn_t *dcr; + + if (dcrn < 0 || dcrn >= DCRN_NB) + goto error; + dcr = &dcr_env->dcrn[dcrn]; + if (dcr->dcr_write == NULL) + goto error; + (*dcr->dcr_write)(dcr->opaque, dcrn, val); + + return 0; + + error: + if (dcr_env->write_error != NULL) + return (*dcr_env->write_error)(dcrn); + + return -1; +} + +int ppc_dcr_register (CPUPPCState *env, int dcrn, void *opaque, + dcr_read_cb dcr_read, dcr_write_cb dcr_write) +{ + ppc_dcr_t *dcr_env; + ppc_dcrn_t *dcr; + + dcr_env = env->dcr_env; + if (dcr_env == NULL) + return -1; + if (dcrn < 0 || dcrn >= DCRN_NB) + return -1; + dcr = &dcr_env->dcrn[dcrn]; + if (dcr->opaque != NULL || + dcr->dcr_read != NULL || + dcr->dcr_write != NULL) + return -1; + dcr->opaque = opaque; + dcr->dcr_read = dcr_read; + dcr->dcr_write = dcr_write; + + return 0; +} + +int ppc_dcr_init (CPUPPCState *env, int (*read_error)(int dcrn), + int (*write_error)(int dcrn)) +{ + ppc_dcr_t *dcr_env; + + dcr_env = g_malloc0(sizeof(ppc_dcr_t)); + dcr_env->read_error = read_error; + dcr_env->write_error = write_error; + env->dcr_env = dcr_env; + + return 0; +} + +/*****************************************************************************/ + +int ppc_cpu_pir(PowerPCCPU *cpu) +{ + CPUPPCState *env = &cpu->env; + return env->spr_cb[SPR_PIR].default_value; +} + +PowerPCCPU *ppc_get_vcpu_by_pir(int pir) +{ + CPUState *cs; + + CPU_FOREACH(cs) { + PowerPCCPU *cpu = POWERPC_CPU(cs); + + if (ppc_cpu_pir(cpu) == pir) { + return cpu; + } + } + + return NULL; +} + +void ppc_irq_reset(PowerPCCPU *cpu) +{ + CPUPPCState *env = &cpu->env; + + env->irq_input_state = 0; + kvmppc_set_interrupt(cpu, PPC_INTERRUPT_EXT, 0); +} diff --git a/hw/ppc/ppc405.h b/hw/ppc/ppc405.h new file mode 100644 index 000000000..c58f73988 --- /dev/null +++ b/hw/ppc/ppc405.h @@ -0,0 +1,72 @@ +/* + * QEMU PowerPC 405 shared definitions + * + * Copyright (c) 2007 Jocelyn Mayer + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef PPC405_H +#define PPC405_H + +#include "hw/ppc/ppc4xx.h" + +/* Bootinfo as set-up by u-boot */ +typedef struct ppc4xx_bd_info_t ppc4xx_bd_info_t; +struct ppc4xx_bd_info_t { + uint32_t bi_memstart; + uint32_t bi_memsize; + uint32_t bi_flashstart; + uint32_t bi_flashsize; + uint32_t bi_flashoffset; /* 0x10 */ + uint32_t bi_sramstart; + uint32_t bi_sramsize; + uint32_t bi_bootflags; + uint32_t bi_ipaddr; /* 0x20 */ + uint8_t bi_enetaddr[6]; + uint16_t bi_ethspeed; + uint32_t bi_intfreq; + uint32_t bi_busfreq; /* 0x30 */ + uint32_t bi_baudrate; + uint8_t bi_s_version[4]; + uint8_t bi_r_version[32]; + uint32_t bi_procfreq; + uint32_t bi_plb_busfreq; + uint32_t bi_pci_busfreq; + uint8_t bi_pci_enetaddr[6]; + uint32_t bi_pci_enetaddr2[6]; + uint32_t bi_opbfreq; + uint32_t bi_iic_fast[2]; +}; + +/* PowerPC 405 core */ +ram_addr_t ppc405_set_bootinfo (CPUPPCState *env, ppc4xx_bd_info_t *bd, + uint32_t flags); + +void ppc4xx_plb_init(CPUPPCState *env); +void ppc405_ebc_init(CPUPPCState *env); + +CPUPPCState *ppc405ep_init(MemoryRegion *address_space_mem, + MemoryRegion ram_memories[2], + hwaddr ram_bases[2], + hwaddr ram_sizes[2], + uint32_t sysclk, DeviceState **uicdev, + int do_init); + +#endif /* PPC405_H */ diff --git a/hw/ppc/ppc405_boards.c b/hw/ppc/ppc405_boards.c new file mode 100644 index 000000000..972a7a4a3 --- /dev/null +++ b/hw/ppc/ppc405_boards.c @@ -0,0 +1,564 @@ +/* + * QEMU PowerPC 405 evaluation boards emulation + * + * Copyright (c) 2007 Jocelyn Mayer + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "qemu/units.h" +#include "qapi/error.h" +#include "qemu-common.h" +#include "qemu/datadir.h" +#include "cpu.h" +#include "hw/ppc/ppc.h" +#include "hw/qdev-properties.h" +#include "hw/sysbus.h" +#include "ppc405.h" +#include "hw/rtc/m48t59.h" +#include "hw/block/flash.h" +#include "sysemu/qtest.h" +#include "sysemu/reset.h" +#include "sysemu/block-backend.h" +#include "hw/boards.h" +#include "qemu/error-report.h" +#include "hw/loader.h" +#include "qemu/cutils.h" + +#define BIOS_FILENAME "ppc405_rom.bin" +#define BIOS_SIZE (2 * MiB) + +#define KERNEL_LOAD_ADDR 0x00000000 +#define INITRD_LOAD_ADDR 0x01800000 + +#define USE_FLASH_BIOS + +/*****************************************************************************/ +/* PPC405EP reference board (IBM) */ +/* Standalone board with: + * - PowerPC 405EP CPU + * - SDRAM (0x00000000) + * - Flash (0xFFF80000) + * - SRAM (0xFFF00000) + * - NVRAM (0xF0000000) + * - FPGA (0xF0300000) + */ +typedef struct ref405ep_fpga_t ref405ep_fpga_t; +struct ref405ep_fpga_t { + uint8_t reg0; + uint8_t reg1; +}; + +static uint64_t ref405ep_fpga_readb(void *opaque, hwaddr addr, unsigned size) +{ + ref405ep_fpga_t *fpga; + uint32_t ret; + + fpga = opaque; + switch (addr) { + case 0x0: + ret = fpga->reg0; + break; + case 0x1: + ret = fpga->reg1; + break; + default: + ret = 0; + break; + } + + return ret; +} + +static void ref405ep_fpga_writeb(void *opaque, hwaddr addr, uint64_t value, + unsigned size) +{ + ref405ep_fpga_t *fpga; + + fpga = opaque; + switch (addr) { + case 0x0: + /* Read only */ + break; + case 0x1: + fpga->reg1 = value; + break; + default: + break; + } +} + +static const MemoryRegionOps ref405ep_fpga_ops = { + .read = ref405ep_fpga_readb, + .write = ref405ep_fpga_writeb, + .impl.min_access_size = 1, + .impl.max_access_size = 1, + .valid.min_access_size = 1, + .valid.max_access_size = 4, + .endianness = DEVICE_BIG_ENDIAN, +}; + +static void ref405ep_fpga_reset (void *opaque) +{ + ref405ep_fpga_t *fpga; + + fpga = opaque; + fpga->reg0 = 0x00; + fpga->reg1 = 0x0F; +} + +static void ref405ep_fpga_init(MemoryRegion *sysmem, uint32_t base) +{ + ref405ep_fpga_t *fpga; + MemoryRegion *fpga_memory = g_new(MemoryRegion, 1); + + fpga = g_malloc0(sizeof(ref405ep_fpga_t)); + memory_region_init_io(fpga_memory, NULL, &ref405ep_fpga_ops, fpga, + "fpga", 0x00000100); + memory_region_add_subregion(sysmem, base, fpga_memory); + qemu_register_reset(&ref405ep_fpga_reset, fpga); +} + +static void ref405ep_init(MachineState *machine) +{ + MachineClass *mc = MACHINE_GET_CLASS(machine); + const char *bios_name = machine->firmware ?: BIOS_FILENAME; + const char *kernel_filename = machine->kernel_filename; + const char *kernel_cmdline = machine->kernel_cmdline; + const char *initrd_filename = machine->initrd_filename; + char *filename; + ppc4xx_bd_info_t bd; + CPUPPCState *env; + DeviceState *dev; + SysBusDevice *s; + MemoryRegion *bios; + MemoryRegion *sram = g_new(MemoryRegion, 1); + ram_addr_t bdloc; + MemoryRegion *ram_memories = g_new(MemoryRegion, 2); + hwaddr ram_bases[2], ram_sizes[2]; + target_ulong sram_size; + long bios_size; + //int phy_addr = 0; + //static int phy_addr = 1; + target_ulong kernel_base, initrd_base; + long kernel_size, initrd_size; + int linux_boot; + int len; + DriveInfo *dinfo; + MemoryRegion *sysmem = get_system_memory(); + DeviceState *uicdev; + + if (machine->ram_size != mc->default_ram_size) { + char *sz = size_to_str(mc->default_ram_size); + error_report("Invalid RAM size, should be %s", sz); + g_free(sz); + exit(EXIT_FAILURE); + } + + /* XXX: fix this */ + memory_region_init_alias(&ram_memories[0], NULL, "ef405ep.ram.alias", + machine->ram, 0, machine->ram_size); + ram_bases[0] = 0; + ram_sizes[0] = machine->ram_size; + memory_region_init(&ram_memories[1], NULL, "ef405ep.ram1", 0); + ram_bases[1] = 0x00000000; + ram_sizes[1] = 0x00000000; + env = ppc405ep_init(sysmem, ram_memories, ram_bases, ram_sizes, + 33333333, &uicdev, kernel_filename == NULL ? 0 : 1); + /* allocate SRAM */ + sram_size = 512 * KiB; + memory_region_init_ram(sram, NULL, "ef405ep.sram", sram_size, + &error_fatal); + memory_region_add_subregion(sysmem, 0xFFF00000, sram); + /* allocate and load BIOS */ +#ifdef USE_FLASH_BIOS + dinfo = drive_get(IF_PFLASH, 0, 0); + if (dinfo) { + bios_size = 8 * MiB; + pflash_cfi02_register((uint32_t)(-bios_size), + "ef405ep.bios", bios_size, + blk_by_legacy_dinfo(dinfo), + 64 * KiB, 1, + 2, 0x0001, 0x22DA, 0x0000, 0x0000, 0x555, 0x2AA, + 1); + } else +#endif + { + bios = g_new(MemoryRegion, 1); + memory_region_init_rom(bios, NULL, "ef405ep.bios", BIOS_SIZE, + &error_fatal); + + filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name); + if (filename) { + bios_size = load_image_size(filename, + memory_region_get_ram_ptr(bios), + BIOS_SIZE); + g_free(filename); + if (bios_size < 0) { + error_report("Could not load PowerPC BIOS '%s'", bios_name); + exit(1); + } + bios_size = (bios_size + 0xfff) & ~0xfff; + memory_region_add_subregion(sysmem, (uint32_t)(-bios_size), bios); + } else if (!qtest_enabled() || kernel_filename != NULL) { + error_report("Could not load PowerPC BIOS '%s'", bios_name); + exit(1); + } else { + /* Avoid an uninitialized variable warning */ + bios_size = -1; + } + } + /* Register FPGA */ + ref405ep_fpga_init(sysmem, 0xF0300000); + /* Register NVRAM */ + dev = qdev_new("sysbus-m48t08"); + qdev_prop_set_int32(dev, "base-year", 1968); + s = SYS_BUS_DEVICE(dev); + sysbus_realize_and_unref(s, &error_fatal); + sysbus_mmio_map(s, 0, 0xF0000000); + /* Load kernel */ + linux_boot = (kernel_filename != NULL); + if (linux_boot) { + memset(&bd, 0, sizeof(bd)); + bd.bi_memstart = 0x00000000; + bd.bi_memsize = machine->ram_size; + bd.bi_flashstart = -bios_size; + bd.bi_flashsize = -bios_size; + bd.bi_flashoffset = 0; + bd.bi_sramstart = 0xFFF00000; + bd.bi_sramsize = sram_size; + bd.bi_bootflags = 0; + bd.bi_intfreq = 133333333; + bd.bi_busfreq = 33333333; + bd.bi_baudrate = 115200; + bd.bi_s_version[0] = 'Q'; + bd.bi_s_version[1] = 'M'; + bd.bi_s_version[2] = 'U'; + bd.bi_s_version[3] = '\0'; + bd.bi_r_version[0] = 'Q'; + bd.bi_r_version[1] = 'E'; + bd.bi_r_version[2] = 'M'; + bd.bi_r_version[3] = 'U'; + bd.bi_r_version[4] = '\0'; + bd.bi_procfreq = 133333333; + bd.bi_plb_busfreq = 33333333; + bd.bi_pci_busfreq = 33333333; + bd.bi_opbfreq = 33333333; + bdloc = ppc405_set_bootinfo(env, &bd, 0x00000001); + env->gpr[3] = bdloc; + kernel_base = KERNEL_LOAD_ADDR; + /* now we can load the kernel */ + kernel_size = load_image_targphys(kernel_filename, kernel_base, + machine->ram_size - kernel_base); + if (kernel_size < 0) { + error_report("could not load kernel '%s'", kernel_filename); + exit(1); + } + printf("Load kernel size %ld at " TARGET_FMT_lx, + kernel_size, kernel_base); + /* load initrd */ + if (initrd_filename) { + initrd_base = INITRD_LOAD_ADDR; + initrd_size = load_image_targphys(initrd_filename, initrd_base, + machine->ram_size - initrd_base); + if (initrd_size < 0) { + error_report("could not load initial ram disk '%s'", + initrd_filename); + exit(1); + } + } else { + initrd_base = 0; + initrd_size = 0; + } + env->gpr[4] = initrd_base; + env->gpr[5] = initrd_size; + if (kernel_cmdline != NULL) { + len = strlen(kernel_cmdline); + bdloc -= ((len + 255) & ~255); + cpu_physical_memory_write(bdloc, kernel_cmdline, len + 1); + env->gpr[6] = bdloc; + env->gpr[7] = bdloc + len; + } else { + env->gpr[6] = 0; + env->gpr[7] = 0; + } + env->nip = KERNEL_LOAD_ADDR; + } else { + kernel_base = 0; + kernel_size = 0; + initrd_base = 0; + initrd_size = 0; + bdloc = 0; + } +} + +static void ref405ep_class_init(ObjectClass *oc, void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + + mc->desc = "ref405ep"; + mc->init = ref405ep_init; + mc->default_ram_size = 0x08000000; + mc->default_ram_id = "ef405ep.ram"; +} + +static const TypeInfo ref405ep_type = { + .name = MACHINE_TYPE_NAME("ref405ep"), + .parent = TYPE_MACHINE, + .class_init = ref405ep_class_init, +}; + +/*****************************************************************************/ +/* AMCC Taihu evaluation board */ +/* - PowerPC 405EP processor + * - SDRAM 128 MB at 0x00000000 + * - Boot flash 2 MB at 0xFFE00000 + * - Application flash 32 MB at 0xFC000000 + * - 2 serial ports + * - 2 ethernet PHY + * - 1 USB 1.1 device 0x50000000 + * - 1 LCD display 0x50100000 + * - 1 CPLD 0x50100000 + * - 1 I2C EEPROM + * - 1 I2C thermal sensor + * - a set of LEDs + * - bit-bang SPI port using GPIOs + * - 1 EBC interface connector 0 0x50200000 + * - 1 cardbus controller + expansion slot. + * - 1 PCI expansion slot. + */ +typedef struct taihu_cpld_t taihu_cpld_t; +struct taihu_cpld_t { + uint8_t reg0; + uint8_t reg1; +}; + +static uint64_t taihu_cpld_read(void *opaque, hwaddr addr, unsigned size) +{ + taihu_cpld_t *cpld; + uint32_t ret; + + cpld = opaque; + switch (addr) { + case 0x0: + ret = cpld->reg0; + break; + case 0x1: + ret = cpld->reg1; + break; + default: + ret = 0; + break; + } + + return ret; +} + +static void taihu_cpld_write(void *opaque, hwaddr addr, + uint64_t value, unsigned size) +{ + taihu_cpld_t *cpld; + + cpld = opaque; + switch (addr) { + case 0x0: + /* Read only */ + break; + case 0x1: + cpld->reg1 = value; + break; + default: + break; + } +} + +static const MemoryRegionOps taihu_cpld_ops = { + .read = taihu_cpld_read, + .write = taihu_cpld_write, + .impl = { + .min_access_size = 1, + .max_access_size = 1, + }, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static void taihu_cpld_reset (void *opaque) +{ + taihu_cpld_t *cpld; + + cpld = opaque; + cpld->reg0 = 0x01; + cpld->reg1 = 0x80; +} + +static void taihu_cpld_init(MemoryRegion *sysmem, uint32_t base) +{ + taihu_cpld_t *cpld; + MemoryRegion *cpld_memory = g_new(MemoryRegion, 1); + + cpld = g_malloc0(sizeof(taihu_cpld_t)); + memory_region_init_io(cpld_memory, NULL, &taihu_cpld_ops, cpld, "cpld", 0x100); + memory_region_add_subregion(sysmem, base, cpld_memory); + qemu_register_reset(&taihu_cpld_reset, cpld); +} + +static void taihu_405ep_init(MachineState *machine) +{ + MachineClass *mc = MACHINE_GET_CLASS(machine); + const char *bios_name = machine->firmware ?: BIOS_FILENAME; + const char *kernel_filename = machine->kernel_filename; + const char *initrd_filename = machine->initrd_filename; + char *filename; + MemoryRegion *sysmem = get_system_memory(); + MemoryRegion *bios; + MemoryRegion *ram_memories = g_new(MemoryRegion, 2); + hwaddr ram_bases[2], ram_sizes[2]; + long bios_size; + target_ulong kernel_base, initrd_base; + long kernel_size, initrd_size; + int linux_boot; + int fl_idx; + DriveInfo *dinfo; + DeviceState *uicdev; + + if (machine->ram_size != mc->default_ram_size) { + char *sz = size_to_str(mc->default_ram_size); + error_report("Invalid RAM size, should be %s", sz); + g_free(sz); + exit(EXIT_FAILURE); + } + + ram_bases[0] = 0; + ram_sizes[0] = 0x04000000; + memory_region_init_alias(&ram_memories[0], NULL, + "taihu_405ep.ram-0", machine->ram, ram_bases[0], + ram_sizes[0]); + ram_bases[1] = 0x04000000; + ram_sizes[1] = 0x04000000; + memory_region_init_alias(&ram_memories[1], NULL, + "taihu_405ep.ram-1", machine->ram, ram_bases[1], + ram_sizes[1]); + ppc405ep_init(sysmem, ram_memories, ram_bases, ram_sizes, + 33333333, &uicdev, kernel_filename == NULL ? 0 : 1); + /* allocate and load BIOS */ + fl_idx = 0; +#if defined(USE_FLASH_BIOS) + dinfo = drive_get(IF_PFLASH, 0, fl_idx); + if (dinfo) { + bios_size = 2 * MiB; + pflash_cfi02_register(0xFFE00000, + "taihu_405ep.bios", bios_size, + blk_by_legacy_dinfo(dinfo), + 64 * KiB, 1, + 4, 0x0001, 0x22DA, 0x0000, 0x0000, 0x555, 0x2AA, + 1); + fl_idx++; + } else +#endif + { + bios = g_new(MemoryRegion, 1); + memory_region_init_rom(bios, NULL, "taihu_405ep.bios", BIOS_SIZE, + &error_fatal); + filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name); + if (filename) { + bios_size = load_image_size(filename, + memory_region_get_ram_ptr(bios), + BIOS_SIZE); + g_free(filename); + if (bios_size < 0) { + error_report("Could not load PowerPC BIOS '%s'", bios_name); + exit(1); + } + bios_size = (bios_size + 0xfff) & ~0xfff; + memory_region_add_subregion(sysmem, (uint32_t)(-bios_size), bios); + } else if (!qtest_enabled()) { + error_report("Could not load PowerPC BIOS '%s'", bios_name); + exit(1); + } + } + /* Register Linux flash */ + dinfo = drive_get(IF_PFLASH, 0, fl_idx); + if (dinfo) { + bios_size = 32 * MiB; + pflash_cfi02_register(0xfc000000, "taihu_405ep.flash", bios_size, + blk_by_legacy_dinfo(dinfo), + 64 * KiB, 1, + 4, 0x0001, 0x22DA, 0x0000, 0x0000, 0x555, 0x2AA, + 1); + fl_idx++; + } + /* Register CLPD & LCD display */ + taihu_cpld_init(sysmem, 0x50100000); + /* Load kernel */ + linux_boot = (kernel_filename != NULL); + if (linux_boot) { + kernel_base = KERNEL_LOAD_ADDR; + /* now we can load the kernel */ + kernel_size = load_image_targphys(kernel_filename, kernel_base, + machine->ram_size - kernel_base); + if (kernel_size < 0) { + error_report("could not load kernel '%s'", kernel_filename); + exit(1); + } + /* load initrd */ + if (initrd_filename) { + initrd_base = INITRD_LOAD_ADDR; + initrd_size = load_image_targphys(initrd_filename, initrd_base, + machine->ram_size - initrd_base); + if (initrd_size < 0) { + error_report("could not load initial ram disk '%s'", + initrd_filename); + exit(1); + } + } else { + initrd_base = 0; + initrd_size = 0; + } + } else { + kernel_base = 0; + kernel_size = 0; + initrd_base = 0; + initrd_size = 0; + } +} + +static void taihu_class_init(ObjectClass *oc, void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + + mc->desc = "taihu"; + mc->init = taihu_405ep_init; + mc->default_ram_size = 0x08000000; + mc->default_ram_id = "taihu_405ep.ram"; +} + +static const TypeInfo taihu_type = { + .name = MACHINE_TYPE_NAME("taihu"), + .parent = TYPE_MACHINE, + .class_init = taihu_class_init, +}; + +static void ppc405_machine_init(void) +{ + type_register_static(&ref405ep_type); + type_register_static(&taihu_type); +} + +type_init(ppc405_machine_init) diff --git a/hw/ppc/ppc405_uc.c b/hw/ppc/ppc405_uc.c new file mode 100644 index 000000000..e632c408b --- /dev/null +++ b/hw/ppc/ppc405_uc.c @@ -0,0 +1,1547 @@ +/* + * QEMU PowerPC 405 embedded processors emulation + * + * Copyright (c) 2007 Jocelyn Mayer + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "qemu/units.h" +#include "qapi/error.h" +#include "cpu.h" +#include "hw/ppc/ppc.h" +#include "hw/i2c/ppc4xx_i2c.h" +#include "hw/irq.h" +#include "ppc405.h" +#include "hw/char/serial.h" +#include "qemu/timer.h" +#include "sysemu/reset.h" +#include "sysemu/sysemu.h" +#include "exec/address-spaces.h" +#include "hw/intc/ppc-uic.h" +#include "hw/qdev-properties.h" +#include "qapi/error.h" + +//#define DEBUG_OPBA +//#define DEBUG_SDRAM +//#define DEBUG_GPIO +//#define DEBUG_SERIAL +//#define DEBUG_OCM +//#define DEBUG_GPT +//#define DEBUG_CLOCKS +//#define DEBUG_CLOCKS_LL + +ram_addr_t ppc405_set_bootinfo (CPUPPCState *env, ppc4xx_bd_info_t *bd, + uint32_t flags) +{ + CPUState *cs = env_cpu(env); + ram_addr_t bdloc; + int i, n; + + /* We put the bd structure at the top of memory */ + if (bd->bi_memsize >= 0x01000000UL) + bdloc = 0x01000000UL - sizeof(struct ppc4xx_bd_info_t); + else + bdloc = bd->bi_memsize - sizeof(struct ppc4xx_bd_info_t); + stl_be_phys(cs->as, bdloc + 0x00, bd->bi_memstart); + stl_be_phys(cs->as, bdloc + 0x04, bd->bi_memsize); + stl_be_phys(cs->as, bdloc + 0x08, bd->bi_flashstart); + stl_be_phys(cs->as, bdloc + 0x0C, bd->bi_flashsize); + stl_be_phys(cs->as, bdloc + 0x10, bd->bi_flashoffset); + stl_be_phys(cs->as, bdloc + 0x14, bd->bi_sramstart); + stl_be_phys(cs->as, bdloc + 0x18, bd->bi_sramsize); + stl_be_phys(cs->as, bdloc + 0x1C, bd->bi_bootflags); + stl_be_phys(cs->as, bdloc + 0x20, bd->bi_ipaddr); + for (i = 0; i < 6; i++) { + stb_phys(cs->as, bdloc + 0x24 + i, bd->bi_enetaddr[i]); + } + stw_be_phys(cs->as, bdloc + 0x2A, bd->bi_ethspeed); + stl_be_phys(cs->as, bdloc + 0x2C, bd->bi_intfreq); + stl_be_phys(cs->as, bdloc + 0x30, bd->bi_busfreq); + stl_be_phys(cs->as, bdloc + 0x34, bd->bi_baudrate); + for (i = 0; i < 4; i++) { + stb_phys(cs->as, bdloc + 0x38 + i, bd->bi_s_version[i]); + } + for (i = 0; i < 32; i++) { + stb_phys(cs->as, bdloc + 0x3C + i, bd->bi_r_version[i]); + } + stl_be_phys(cs->as, bdloc + 0x5C, bd->bi_plb_busfreq); + stl_be_phys(cs->as, bdloc + 0x60, bd->bi_pci_busfreq); + for (i = 0; i < 6; i++) { + stb_phys(cs->as, bdloc + 0x64 + i, bd->bi_pci_enetaddr[i]); + } + n = 0x6A; + if (flags & 0x00000001) { + for (i = 0; i < 6; i++) + stb_phys(cs->as, bdloc + n++, bd->bi_pci_enetaddr2[i]); + } + stl_be_phys(cs->as, bdloc + n, bd->bi_opbfreq); + n += 4; + for (i = 0; i < 2; i++) { + stl_be_phys(cs->as, bdloc + n, bd->bi_iic_fast[i]); + n += 4; + } + + return bdloc; +} + +/*****************************************************************************/ +/* Shared peripherals */ + +/*****************************************************************************/ +/* Peripheral local bus arbitrer */ +enum { + PLB3A0_ACR = 0x077, + PLB4A0_ACR = 0x081, + PLB0_BESR = 0x084, + PLB0_BEAR = 0x086, + PLB0_ACR = 0x087, + PLB4A1_ACR = 0x089, +}; + +typedef struct ppc4xx_plb_t ppc4xx_plb_t; +struct ppc4xx_plb_t { + uint32_t acr; + uint32_t bear; + uint32_t besr; +}; + +static uint32_t dcr_read_plb (void *opaque, int dcrn) +{ + ppc4xx_plb_t *plb; + uint32_t ret; + + plb = opaque; + switch (dcrn) { + case PLB0_ACR: + ret = plb->acr; + break; + case PLB0_BEAR: + ret = plb->bear; + break; + case PLB0_BESR: + ret = plb->besr; + break; + default: + /* Avoid gcc warning */ + ret = 0; + break; + } + + return ret; +} + +static void dcr_write_plb (void *opaque, int dcrn, uint32_t val) +{ + ppc4xx_plb_t *plb; + + plb = opaque; + switch (dcrn) { + case PLB0_ACR: + /* We don't care about the actual parameters written as + * we don't manage any priorities on the bus + */ + plb->acr = val & 0xF8000000; + break; + case PLB0_BEAR: + /* Read only */ + break; + case PLB0_BESR: + /* Write-clear */ + plb->besr &= ~val; + break; + } +} + +static void ppc4xx_plb_reset (void *opaque) +{ + ppc4xx_plb_t *plb; + + plb = opaque; + plb->acr = 0x00000000; + plb->bear = 0x00000000; + plb->besr = 0x00000000; +} + +void ppc4xx_plb_init(CPUPPCState *env) +{ + ppc4xx_plb_t *plb; + + plb = g_malloc0(sizeof(ppc4xx_plb_t)); + ppc_dcr_register(env, PLB3A0_ACR, plb, &dcr_read_plb, &dcr_write_plb); + ppc_dcr_register(env, PLB4A0_ACR, plb, &dcr_read_plb, &dcr_write_plb); + ppc_dcr_register(env, PLB0_ACR, plb, &dcr_read_plb, &dcr_write_plb); + ppc_dcr_register(env, PLB0_BEAR, plb, &dcr_read_plb, &dcr_write_plb); + ppc_dcr_register(env, PLB0_BESR, plb, &dcr_read_plb, &dcr_write_plb); + ppc_dcr_register(env, PLB4A1_ACR, plb, &dcr_read_plb, &dcr_write_plb); + qemu_register_reset(ppc4xx_plb_reset, plb); +} + +/*****************************************************************************/ +/* PLB to OPB bridge */ +enum { + POB0_BESR0 = 0x0A0, + POB0_BESR1 = 0x0A2, + POB0_BEAR = 0x0A4, +}; + +typedef struct ppc4xx_pob_t ppc4xx_pob_t; +struct ppc4xx_pob_t { + uint32_t bear; + uint32_t besr0; + uint32_t besr1; +}; + +static uint32_t dcr_read_pob (void *opaque, int dcrn) +{ + ppc4xx_pob_t *pob; + uint32_t ret; + + pob = opaque; + switch (dcrn) { + case POB0_BEAR: + ret = pob->bear; + break; + case POB0_BESR0: + ret = pob->besr0; + break; + case POB0_BESR1: + ret = pob->besr1; + break; + default: + /* Avoid gcc warning */ + ret = 0; + break; + } + + return ret; +} + +static void dcr_write_pob (void *opaque, int dcrn, uint32_t val) +{ + ppc4xx_pob_t *pob; + + pob = opaque; + switch (dcrn) { + case POB0_BEAR: + /* Read only */ + break; + case POB0_BESR0: + /* Write-clear */ + pob->besr0 &= ~val; + break; + case POB0_BESR1: + /* Write-clear */ + pob->besr1 &= ~val; + break; + } +} + +static void ppc4xx_pob_reset (void *opaque) +{ + ppc4xx_pob_t *pob; + + pob = opaque; + /* No error */ + pob->bear = 0x00000000; + pob->besr0 = 0x0000000; + pob->besr1 = 0x0000000; +} + +static void ppc4xx_pob_init(CPUPPCState *env) +{ + ppc4xx_pob_t *pob; + + pob = g_malloc0(sizeof(ppc4xx_pob_t)); + ppc_dcr_register(env, POB0_BEAR, pob, &dcr_read_pob, &dcr_write_pob); + ppc_dcr_register(env, POB0_BESR0, pob, &dcr_read_pob, &dcr_write_pob); + ppc_dcr_register(env, POB0_BESR1, pob, &dcr_read_pob, &dcr_write_pob); + qemu_register_reset(ppc4xx_pob_reset, pob); +} + +/*****************************************************************************/ +/* OPB arbitrer */ +typedef struct ppc4xx_opba_t ppc4xx_opba_t; +struct ppc4xx_opba_t { + MemoryRegion io; + uint8_t cr; + uint8_t pr; +}; + +static uint64_t opba_readb(void *opaque, hwaddr addr, unsigned size) +{ + ppc4xx_opba_t *opba; + uint32_t ret; + +#ifdef DEBUG_OPBA + printf("%s: addr " TARGET_FMT_plx "\n", __func__, addr); +#endif + opba = opaque; + switch (addr) { + case 0x00: + ret = opba->cr; + break; + case 0x01: + ret = opba->pr; + break; + default: + ret = 0x00; + break; + } + + return ret; +} + +static void opba_writeb(void *opaque, hwaddr addr, uint64_t value, + unsigned size) +{ + ppc4xx_opba_t *opba; + +#ifdef DEBUG_OPBA + printf("%s: addr " TARGET_FMT_plx " val %08" PRIx32 "\n", __func__, addr, + value); +#endif + opba = opaque; + switch (addr) { + case 0x00: + opba->cr = value & 0xF8; + break; + case 0x01: + opba->pr = value & 0xFF; + break; + default: + break; + } +} +static const MemoryRegionOps opba_ops = { + .read = opba_readb, + .write = opba_writeb, + .impl.min_access_size = 1, + .impl.max_access_size = 1, + .valid.min_access_size = 1, + .valid.max_access_size = 4, + .endianness = DEVICE_BIG_ENDIAN, +}; + +static void ppc4xx_opba_reset (void *opaque) +{ + ppc4xx_opba_t *opba; + + opba = opaque; + opba->cr = 0x00; /* No dynamic priorities - park disabled */ + opba->pr = 0x11; +} + +static void ppc4xx_opba_init(hwaddr base) +{ + ppc4xx_opba_t *opba; + + opba = g_malloc0(sizeof(ppc4xx_opba_t)); +#ifdef DEBUG_OPBA + printf("%s: offset " TARGET_FMT_plx "\n", __func__, base); +#endif + memory_region_init_io(&opba->io, NULL, &opba_ops, opba, "opba", 0x002); + memory_region_add_subregion(get_system_memory(), base, &opba->io); + qemu_register_reset(ppc4xx_opba_reset, opba); +} + +/*****************************************************************************/ +/* Code decompression controller */ +/* XXX: TODO */ + +/*****************************************************************************/ +/* Peripheral controller */ +typedef struct ppc4xx_ebc_t ppc4xx_ebc_t; +struct ppc4xx_ebc_t { + uint32_t addr; + uint32_t bcr[8]; + uint32_t bap[8]; + uint32_t bear; + uint32_t besr0; + uint32_t besr1; + uint32_t cfg; +}; + +enum { + EBC0_CFGADDR = 0x012, + EBC0_CFGDATA = 0x013, +}; + +static uint32_t dcr_read_ebc (void *opaque, int dcrn) +{ + ppc4xx_ebc_t *ebc; + uint32_t ret; + + ebc = opaque; + switch (dcrn) { + case EBC0_CFGADDR: + ret = ebc->addr; + break; + case EBC0_CFGDATA: + switch (ebc->addr) { + case 0x00: /* B0CR */ + ret = ebc->bcr[0]; + break; + case 0x01: /* B1CR */ + ret = ebc->bcr[1]; + break; + case 0x02: /* B2CR */ + ret = ebc->bcr[2]; + break; + case 0x03: /* B3CR */ + ret = ebc->bcr[3]; + break; + case 0x04: /* B4CR */ + ret = ebc->bcr[4]; + break; + case 0x05: /* B5CR */ + ret = ebc->bcr[5]; + break; + case 0x06: /* B6CR */ + ret = ebc->bcr[6]; + break; + case 0x07: /* B7CR */ + ret = ebc->bcr[7]; + break; + case 0x10: /* B0AP */ + ret = ebc->bap[0]; + break; + case 0x11: /* B1AP */ + ret = ebc->bap[1]; + break; + case 0x12: /* B2AP */ + ret = ebc->bap[2]; + break; + case 0x13: /* B3AP */ + ret = ebc->bap[3]; + break; + case 0x14: /* B4AP */ + ret = ebc->bap[4]; + break; + case 0x15: /* B5AP */ + ret = ebc->bap[5]; + break; + case 0x16: /* B6AP */ + ret = ebc->bap[6]; + break; + case 0x17: /* B7AP */ + ret = ebc->bap[7]; + break; + case 0x20: /* BEAR */ + ret = ebc->bear; + break; + case 0x21: /* BESR0 */ + ret = ebc->besr0; + break; + case 0x22: /* BESR1 */ + ret = ebc->besr1; + break; + case 0x23: /* CFG */ + ret = ebc->cfg; + break; + default: + ret = 0x00000000; + break; + } + break; + default: + ret = 0x00000000; + break; + } + + return ret; +} + +static void dcr_write_ebc (void *opaque, int dcrn, uint32_t val) +{ + ppc4xx_ebc_t *ebc; + + ebc = opaque; + switch (dcrn) { + case EBC0_CFGADDR: + ebc->addr = val; + break; + case EBC0_CFGDATA: + switch (ebc->addr) { + case 0x00: /* B0CR */ + break; + case 0x01: /* B1CR */ + break; + case 0x02: /* B2CR */ + break; + case 0x03: /* B3CR */ + break; + case 0x04: /* B4CR */ + break; + case 0x05: /* B5CR */ + break; + case 0x06: /* B6CR */ + break; + case 0x07: /* B7CR */ + break; + case 0x10: /* B0AP */ + break; + case 0x11: /* B1AP */ + break; + case 0x12: /* B2AP */ + break; + case 0x13: /* B3AP */ + break; + case 0x14: /* B4AP */ + break; + case 0x15: /* B5AP */ + break; + case 0x16: /* B6AP */ + break; + case 0x17: /* B7AP */ + break; + case 0x20: /* BEAR */ + break; + case 0x21: /* BESR0 */ + break; + case 0x22: /* BESR1 */ + break; + case 0x23: /* CFG */ + break; + default: + break; + } + break; + default: + break; + } +} + +static void ebc_reset (void *opaque) +{ + ppc4xx_ebc_t *ebc; + int i; + + ebc = opaque; + ebc->addr = 0x00000000; + ebc->bap[0] = 0x7F8FFE80; + ebc->bcr[0] = 0xFFE28000; + for (i = 0; i < 8; i++) { + ebc->bap[i] = 0x00000000; + ebc->bcr[i] = 0x00000000; + } + ebc->besr0 = 0x00000000; + ebc->besr1 = 0x00000000; + ebc->cfg = 0x80400000; +} + +void ppc405_ebc_init(CPUPPCState *env) +{ + ppc4xx_ebc_t *ebc; + + ebc = g_malloc0(sizeof(ppc4xx_ebc_t)); + qemu_register_reset(&ebc_reset, ebc); + ppc_dcr_register(env, EBC0_CFGADDR, + ebc, &dcr_read_ebc, &dcr_write_ebc); + ppc_dcr_register(env, EBC0_CFGDATA, + ebc, &dcr_read_ebc, &dcr_write_ebc); +} + +/*****************************************************************************/ +/* DMA controller */ +enum { + DMA0_CR0 = 0x100, + DMA0_CT0 = 0x101, + DMA0_DA0 = 0x102, + DMA0_SA0 = 0x103, + DMA0_SG0 = 0x104, + DMA0_CR1 = 0x108, + DMA0_CT1 = 0x109, + DMA0_DA1 = 0x10A, + DMA0_SA1 = 0x10B, + DMA0_SG1 = 0x10C, + DMA0_CR2 = 0x110, + DMA0_CT2 = 0x111, + DMA0_DA2 = 0x112, + DMA0_SA2 = 0x113, + DMA0_SG2 = 0x114, + DMA0_CR3 = 0x118, + DMA0_CT3 = 0x119, + DMA0_DA3 = 0x11A, + DMA0_SA3 = 0x11B, + DMA0_SG3 = 0x11C, + DMA0_SR = 0x120, + DMA0_SGC = 0x123, + DMA0_SLP = 0x125, + DMA0_POL = 0x126, +}; + +typedef struct ppc405_dma_t ppc405_dma_t; +struct ppc405_dma_t { + qemu_irq irqs[4]; + uint32_t cr[4]; + uint32_t ct[4]; + uint32_t da[4]; + uint32_t sa[4]; + uint32_t sg[4]; + uint32_t sr; + uint32_t sgc; + uint32_t slp; + uint32_t pol; +}; + +static uint32_t dcr_read_dma (void *opaque, int dcrn) +{ + return 0; +} + +static void dcr_write_dma (void *opaque, int dcrn, uint32_t val) +{ +} + +static void ppc405_dma_reset (void *opaque) +{ + ppc405_dma_t *dma; + int i; + + dma = opaque; + for (i = 0; i < 4; i++) { + dma->cr[i] = 0x00000000; + dma->ct[i] = 0x00000000; + dma->da[i] = 0x00000000; + dma->sa[i] = 0x00000000; + dma->sg[i] = 0x00000000; + } + dma->sr = 0x00000000; + dma->sgc = 0x00000000; + dma->slp = 0x7C000000; + dma->pol = 0x00000000; +} + +static void ppc405_dma_init(CPUPPCState *env, qemu_irq irqs[4]) +{ + ppc405_dma_t *dma; + + dma = g_malloc0(sizeof(ppc405_dma_t)); + memcpy(dma->irqs, irqs, 4 * sizeof(qemu_irq)); + qemu_register_reset(&ppc405_dma_reset, dma); + ppc_dcr_register(env, DMA0_CR0, + dma, &dcr_read_dma, &dcr_write_dma); + ppc_dcr_register(env, DMA0_CT0, + dma, &dcr_read_dma, &dcr_write_dma); + ppc_dcr_register(env, DMA0_DA0, + dma, &dcr_read_dma, &dcr_write_dma); + ppc_dcr_register(env, DMA0_SA0, + dma, &dcr_read_dma, &dcr_write_dma); + ppc_dcr_register(env, DMA0_SG0, + dma, &dcr_read_dma, &dcr_write_dma); + ppc_dcr_register(env, DMA0_CR1, + dma, &dcr_read_dma, &dcr_write_dma); + ppc_dcr_register(env, DMA0_CT1, + dma, &dcr_read_dma, &dcr_write_dma); + ppc_dcr_register(env, DMA0_DA1, + dma, &dcr_read_dma, &dcr_write_dma); + ppc_dcr_register(env, DMA0_SA1, + dma, &dcr_read_dma, &dcr_write_dma); + ppc_dcr_register(env, DMA0_SG1, + dma, &dcr_read_dma, &dcr_write_dma); + ppc_dcr_register(env, DMA0_CR2, + dma, &dcr_read_dma, &dcr_write_dma); + ppc_dcr_register(env, DMA0_CT2, + dma, &dcr_read_dma, &dcr_write_dma); + ppc_dcr_register(env, DMA0_DA2, + dma, &dcr_read_dma, &dcr_write_dma); + ppc_dcr_register(env, DMA0_SA2, + dma, &dcr_read_dma, &dcr_write_dma); + ppc_dcr_register(env, DMA0_SG2, + dma, &dcr_read_dma, &dcr_write_dma); + ppc_dcr_register(env, DMA0_CR3, + dma, &dcr_read_dma, &dcr_write_dma); + ppc_dcr_register(env, DMA0_CT3, + dma, &dcr_read_dma, &dcr_write_dma); + ppc_dcr_register(env, DMA0_DA3, + dma, &dcr_read_dma, &dcr_write_dma); + ppc_dcr_register(env, DMA0_SA3, + dma, &dcr_read_dma, &dcr_write_dma); + ppc_dcr_register(env, DMA0_SG3, + dma, &dcr_read_dma, &dcr_write_dma); + ppc_dcr_register(env, DMA0_SR, + dma, &dcr_read_dma, &dcr_write_dma); + ppc_dcr_register(env, DMA0_SGC, + dma, &dcr_read_dma, &dcr_write_dma); + ppc_dcr_register(env, DMA0_SLP, + dma, &dcr_read_dma, &dcr_write_dma); + ppc_dcr_register(env, DMA0_POL, + dma, &dcr_read_dma, &dcr_write_dma); +} + +/*****************************************************************************/ +/* GPIO */ +typedef struct ppc405_gpio_t ppc405_gpio_t; +struct ppc405_gpio_t { + MemoryRegion io; + uint32_t or; + uint32_t tcr; + uint32_t osrh; + uint32_t osrl; + uint32_t tsrh; + uint32_t tsrl; + uint32_t odr; + uint32_t ir; + uint32_t rr1; + uint32_t isr1h; + uint32_t isr1l; +}; + +static uint64_t ppc405_gpio_read(void *opaque, hwaddr addr, unsigned size) +{ +#ifdef DEBUG_GPIO + printf("%s: addr " TARGET_FMT_plx " size %d\n", __func__, addr, size); +#endif + + return 0; +} + +static void ppc405_gpio_write(void *opaque, hwaddr addr, uint64_t value, + unsigned size) +{ +#ifdef DEBUG_GPIO + printf("%s: addr " TARGET_FMT_plx " size %d val %08" PRIx32 "\n", + __func__, addr, size, value); +#endif +} + +static const MemoryRegionOps ppc405_gpio_ops = { + .read = ppc405_gpio_read, + .write = ppc405_gpio_write, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static void ppc405_gpio_reset (void *opaque) +{ +} + +static void ppc405_gpio_init(hwaddr base) +{ + ppc405_gpio_t *gpio; + + gpio = g_malloc0(sizeof(ppc405_gpio_t)); +#ifdef DEBUG_GPIO + printf("%s: offset " TARGET_FMT_plx "\n", __func__, base); +#endif + memory_region_init_io(&gpio->io, NULL, &ppc405_gpio_ops, gpio, "pgio", 0x038); + memory_region_add_subregion(get_system_memory(), base, &gpio->io); + qemu_register_reset(&ppc405_gpio_reset, gpio); +} + +/*****************************************************************************/ +/* On Chip Memory */ +enum { + OCM0_ISARC = 0x018, + OCM0_ISACNTL = 0x019, + OCM0_DSARC = 0x01A, + OCM0_DSACNTL = 0x01B, +}; + +typedef struct ppc405_ocm_t ppc405_ocm_t; +struct ppc405_ocm_t { + MemoryRegion ram; + MemoryRegion isarc_ram; + MemoryRegion dsarc_ram; + uint32_t isarc; + uint32_t isacntl; + uint32_t dsarc; + uint32_t dsacntl; +}; + +static void ocm_update_mappings (ppc405_ocm_t *ocm, + uint32_t isarc, uint32_t isacntl, + uint32_t dsarc, uint32_t dsacntl) +{ +#ifdef DEBUG_OCM + printf("OCM update ISA %08" PRIx32 " %08" PRIx32 " (%08" PRIx32 + " %08" PRIx32 ") DSA %08" PRIx32 " %08" PRIx32 + " (%08" PRIx32 " %08" PRIx32 ")\n", + isarc, isacntl, dsarc, dsacntl, + ocm->isarc, ocm->isacntl, ocm->dsarc, ocm->dsacntl); +#endif + if (ocm->isarc != isarc || + (ocm->isacntl & 0x80000000) != (isacntl & 0x80000000)) { + if (ocm->isacntl & 0x80000000) { + /* Unmap previously assigned memory region */ + printf("OCM unmap ISA %08" PRIx32 "\n", ocm->isarc); + memory_region_del_subregion(get_system_memory(), &ocm->isarc_ram); + } + if (isacntl & 0x80000000) { + /* Map new instruction memory region */ +#ifdef DEBUG_OCM + printf("OCM map ISA %08" PRIx32 "\n", isarc); +#endif + memory_region_add_subregion(get_system_memory(), isarc, + &ocm->isarc_ram); + } + } + if (ocm->dsarc != dsarc || + (ocm->dsacntl & 0x80000000) != (dsacntl & 0x80000000)) { + if (ocm->dsacntl & 0x80000000) { + /* Beware not to unmap the region we just mapped */ + if (!(isacntl & 0x80000000) || ocm->dsarc != isarc) { + /* Unmap previously assigned memory region */ +#ifdef DEBUG_OCM + printf("OCM unmap DSA %08" PRIx32 "\n", ocm->dsarc); +#endif + memory_region_del_subregion(get_system_memory(), + &ocm->dsarc_ram); + } + } + if (dsacntl & 0x80000000) { + /* Beware not to remap the region we just mapped */ + if (!(isacntl & 0x80000000) || dsarc != isarc) { + /* Map new data memory region */ +#ifdef DEBUG_OCM + printf("OCM map DSA %08" PRIx32 "\n", dsarc); +#endif + memory_region_add_subregion(get_system_memory(), dsarc, + &ocm->dsarc_ram); + } + } + } +} + +static uint32_t dcr_read_ocm (void *opaque, int dcrn) +{ + ppc405_ocm_t *ocm; + uint32_t ret; + + ocm = opaque; + switch (dcrn) { + case OCM0_ISARC: + ret = ocm->isarc; + break; + case OCM0_ISACNTL: + ret = ocm->isacntl; + break; + case OCM0_DSARC: + ret = ocm->dsarc; + break; + case OCM0_DSACNTL: + ret = ocm->dsacntl; + break; + default: + ret = 0; + break; + } + + return ret; +} + +static void dcr_write_ocm (void *opaque, int dcrn, uint32_t val) +{ + ppc405_ocm_t *ocm; + uint32_t isarc, dsarc, isacntl, dsacntl; + + ocm = opaque; + isarc = ocm->isarc; + dsarc = ocm->dsarc; + isacntl = ocm->isacntl; + dsacntl = ocm->dsacntl; + switch (dcrn) { + case OCM0_ISARC: + isarc = val & 0xFC000000; + break; + case OCM0_ISACNTL: + isacntl = val & 0xC0000000; + break; + case OCM0_DSARC: + isarc = val & 0xFC000000; + break; + case OCM0_DSACNTL: + isacntl = val & 0xC0000000; + break; + } + ocm_update_mappings(ocm, isarc, isacntl, dsarc, dsacntl); + ocm->isarc = isarc; + ocm->dsarc = dsarc; + ocm->isacntl = isacntl; + ocm->dsacntl = dsacntl; +} + +static void ocm_reset (void *opaque) +{ + ppc405_ocm_t *ocm; + uint32_t isarc, dsarc, isacntl, dsacntl; + + ocm = opaque; + isarc = 0x00000000; + isacntl = 0x00000000; + dsarc = 0x00000000; + dsacntl = 0x00000000; + ocm_update_mappings(ocm, isarc, isacntl, dsarc, dsacntl); + ocm->isarc = isarc; + ocm->dsarc = dsarc; + ocm->isacntl = isacntl; + ocm->dsacntl = dsacntl; +} + +static void ppc405_ocm_init(CPUPPCState *env) +{ + ppc405_ocm_t *ocm; + + ocm = g_malloc0(sizeof(ppc405_ocm_t)); + /* XXX: Size is 4096 or 0x04000000 */ + memory_region_init_ram(&ocm->isarc_ram, NULL, "ppc405.ocm", 4 * KiB, + &error_fatal); + memory_region_init_alias(&ocm->dsarc_ram, NULL, "ppc405.dsarc", + &ocm->isarc_ram, 0, 4 * KiB); + qemu_register_reset(&ocm_reset, ocm); + ppc_dcr_register(env, OCM0_ISARC, + ocm, &dcr_read_ocm, &dcr_write_ocm); + ppc_dcr_register(env, OCM0_ISACNTL, + ocm, &dcr_read_ocm, &dcr_write_ocm); + ppc_dcr_register(env, OCM0_DSARC, + ocm, &dcr_read_ocm, &dcr_write_ocm); + ppc_dcr_register(env, OCM0_DSACNTL, + ocm, &dcr_read_ocm, &dcr_write_ocm); +} + +/*****************************************************************************/ +/* General purpose timers */ +typedef struct ppc4xx_gpt_t ppc4xx_gpt_t; +struct ppc4xx_gpt_t { + MemoryRegion iomem; + int64_t tb_offset; + uint32_t tb_freq; + QEMUTimer *timer; + qemu_irq irqs[5]; + uint32_t oe; + uint32_t ol; + uint32_t im; + uint32_t is; + uint32_t ie; + uint32_t comp[5]; + uint32_t mask[5]; +}; + +static int ppc4xx_gpt_compare (ppc4xx_gpt_t *gpt, int n) +{ + /* XXX: TODO */ + return 0; +} + +static void ppc4xx_gpt_set_output (ppc4xx_gpt_t *gpt, int n, int level) +{ + /* XXX: TODO */ +} + +static void ppc4xx_gpt_set_outputs (ppc4xx_gpt_t *gpt) +{ + uint32_t mask; + int i; + + mask = 0x80000000; + for (i = 0; i < 5; i++) { + if (gpt->oe & mask) { + /* Output is enabled */ + if (ppc4xx_gpt_compare(gpt, i)) { + /* Comparison is OK */ + ppc4xx_gpt_set_output(gpt, i, gpt->ol & mask); + } else { + /* Comparison is KO */ + ppc4xx_gpt_set_output(gpt, i, gpt->ol & mask ? 0 : 1); + } + } + mask = mask >> 1; + } +} + +static void ppc4xx_gpt_set_irqs (ppc4xx_gpt_t *gpt) +{ + uint32_t mask; + int i; + + mask = 0x00008000; + for (i = 0; i < 5; i++) { + if (gpt->is & gpt->im & mask) + qemu_irq_raise(gpt->irqs[i]); + else + qemu_irq_lower(gpt->irqs[i]); + mask = mask >> 1; + } +} + +static void ppc4xx_gpt_compute_timer (ppc4xx_gpt_t *gpt) +{ + /* XXX: TODO */ +} + +static uint64_t ppc4xx_gpt_read(void *opaque, hwaddr addr, unsigned size) +{ + ppc4xx_gpt_t *gpt; + uint32_t ret; + int idx; + +#ifdef DEBUG_GPT + printf("%s: addr " TARGET_FMT_plx "\n", __func__, addr); +#endif + gpt = opaque; + switch (addr) { + case 0x00: + /* Time base counter */ + ret = muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + gpt->tb_offset, + gpt->tb_freq, NANOSECONDS_PER_SECOND); + break; + case 0x10: + /* Output enable */ + ret = gpt->oe; + break; + case 0x14: + /* Output level */ + ret = gpt->ol; + break; + case 0x18: + /* Interrupt mask */ + ret = gpt->im; + break; + case 0x1C: + case 0x20: + /* Interrupt status */ + ret = gpt->is; + break; + case 0x24: + /* Interrupt enable */ + ret = gpt->ie; + break; + case 0x80 ... 0x90: + /* Compare timer */ + idx = (addr - 0x80) >> 2; + ret = gpt->comp[idx]; + break; + case 0xC0 ... 0xD0: + /* Compare mask */ + idx = (addr - 0xC0) >> 2; + ret = gpt->mask[idx]; + break; + default: + ret = -1; + break; + } + + return ret; +} + +static void ppc4xx_gpt_write(void *opaque, hwaddr addr, uint64_t value, + unsigned size) +{ + ppc4xx_gpt_t *gpt; + int idx; + +#ifdef DEBUG_I2C + printf("%s: addr " TARGET_FMT_plx " val %08" PRIx32 "\n", __func__, addr, + value); +#endif + gpt = opaque; + switch (addr) { + case 0x00: + /* Time base counter */ + gpt->tb_offset = muldiv64(value, NANOSECONDS_PER_SECOND, gpt->tb_freq) + - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + ppc4xx_gpt_compute_timer(gpt); + break; + case 0x10: + /* Output enable */ + gpt->oe = value & 0xF8000000; + ppc4xx_gpt_set_outputs(gpt); + break; + case 0x14: + /* Output level */ + gpt->ol = value & 0xF8000000; + ppc4xx_gpt_set_outputs(gpt); + break; + case 0x18: + /* Interrupt mask */ + gpt->im = value & 0x0000F800; + break; + case 0x1C: + /* Interrupt status set */ + gpt->is |= value & 0x0000F800; + ppc4xx_gpt_set_irqs(gpt); + break; + case 0x20: + /* Interrupt status clear */ + gpt->is &= ~(value & 0x0000F800); + ppc4xx_gpt_set_irqs(gpt); + break; + case 0x24: + /* Interrupt enable */ + gpt->ie = value & 0x0000F800; + ppc4xx_gpt_set_irqs(gpt); + break; + case 0x80 ... 0x90: + /* Compare timer */ + idx = (addr - 0x80) >> 2; + gpt->comp[idx] = value & 0xF8000000; + ppc4xx_gpt_compute_timer(gpt); + break; + case 0xC0 ... 0xD0: + /* Compare mask */ + idx = (addr - 0xC0) >> 2; + gpt->mask[idx] = value & 0xF8000000; + ppc4xx_gpt_compute_timer(gpt); + break; + } +} + +static const MemoryRegionOps gpt_ops = { + .read = ppc4xx_gpt_read, + .write = ppc4xx_gpt_write, + .valid.min_access_size = 4, + .valid.max_access_size = 4, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static void ppc4xx_gpt_cb (void *opaque) +{ + ppc4xx_gpt_t *gpt; + + gpt = opaque; + ppc4xx_gpt_set_irqs(gpt); + ppc4xx_gpt_set_outputs(gpt); + ppc4xx_gpt_compute_timer(gpt); +} + +static void ppc4xx_gpt_reset (void *opaque) +{ + ppc4xx_gpt_t *gpt; + int i; + + gpt = opaque; + timer_del(gpt->timer); + gpt->oe = 0x00000000; + gpt->ol = 0x00000000; + gpt->im = 0x00000000; + gpt->is = 0x00000000; + gpt->ie = 0x00000000; + for (i = 0; i < 5; i++) { + gpt->comp[i] = 0x00000000; + gpt->mask[i] = 0x00000000; + } +} + +static void ppc4xx_gpt_init(hwaddr base, qemu_irq irqs[5]) +{ + ppc4xx_gpt_t *gpt; + int i; + + gpt = g_malloc0(sizeof(ppc4xx_gpt_t)); + for (i = 0; i < 5; i++) { + gpt->irqs[i] = irqs[i]; + } + gpt->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, &ppc4xx_gpt_cb, gpt); +#ifdef DEBUG_GPT + printf("%s: offset " TARGET_FMT_plx "\n", __func__, base); +#endif + memory_region_init_io(&gpt->iomem, NULL, &gpt_ops, gpt, "gpt", 0x0d4); + memory_region_add_subregion(get_system_memory(), base, &gpt->iomem); + qemu_register_reset(ppc4xx_gpt_reset, gpt); +} + +/*****************************************************************************/ +/* PowerPC 405EP */ +/* CPU control */ +enum { + PPC405EP_CPC0_PLLMR0 = 0x0F0, + PPC405EP_CPC0_BOOT = 0x0F1, + PPC405EP_CPC0_EPCTL = 0x0F3, + PPC405EP_CPC0_PLLMR1 = 0x0F4, + PPC405EP_CPC0_UCR = 0x0F5, + PPC405EP_CPC0_SRR = 0x0F6, + PPC405EP_CPC0_JTAGID = 0x0F7, + PPC405EP_CPC0_PCI = 0x0F9, +#if 0 + PPC405EP_CPC0_ER = xxx, + PPC405EP_CPC0_FR = xxx, + PPC405EP_CPC0_SR = xxx, +#endif +}; + +enum { + PPC405EP_CPU_CLK = 0, + PPC405EP_PLB_CLK = 1, + PPC405EP_OPB_CLK = 2, + PPC405EP_EBC_CLK = 3, + PPC405EP_MAL_CLK = 4, + PPC405EP_PCI_CLK = 5, + PPC405EP_UART0_CLK = 6, + PPC405EP_UART1_CLK = 7, + PPC405EP_CLK_NB = 8, +}; + +typedef struct ppc405ep_cpc_t ppc405ep_cpc_t; +struct ppc405ep_cpc_t { + uint32_t sysclk; + clk_setup_t clk_setup[PPC405EP_CLK_NB]; + uint32_t boot; + uint32_t epctl; + uint32_t pllmr[2]; + uint32_t ucr; + uint32_t srr; + uint32_t jtagid; + uint32_t pci; + /* Clock and power management */ + uint32_t er; + uint32_t fr; + uint32_t sr; +}; + +static void ppc405ep_compute_clocks (ppc405ep_cpc_t *cpc) +{ + uint32_t CPU_clk, PLB_clk, OPB_clk, EBC_clk, MAL_clk, PCI_clk; + uint32_t UART0_clk, UART1_clk; + uint64_t VCO_out, PLL_out; + int M, D; + + VCO_out = 0; + if ((cpc->pllmr[1] & 0x80000000) && !(cpc->pllmr[1] & 0x40000000)) { + M = (((cpc->pllmr[1] >> 20) - 1) & 0xF) + 1; /* FBMUL */ +#ifdef DEBUG_CLOCKS_LL + printf("FBMUL %01" PRIx32 " %d\n", (cpc->pllmr[1] >> 20) & 0xF, M); +#endif + D = 8 - ((cpc->pllmr[1] >> 16) & 0x7); /* FWDA */ +#ifdef DEBUG_CLOCKS_LL + printf("FWDA %01" PRIx32 " %d\n", (cpc->pllmr[1] >> 16) & 0x7, D); +#endif + VCO_out = (uint64_t)cpc->sysclk * M * D; + if (VCO_out < 500000000UL || VCO_out > 1000000000UL) { + /* Error - unlock the PLL */ + printf("VCO out of range %" PRIu64 "\n", VCO_out); +#if 0 + cpc->pllmr[1] &= ~0x80000000; + goto pll_bypass; +#endif + } + PLL_out = VCO_out / D; + /* Pretend the PLL is locked */ + cpc->boot |= 0x00000001; + } else { +#if 0 + pll_bypass: +#endif + PLL_out = cpc->sysclk; + if (cpc->pllmr[1] & 0x40000000) { + /* Pretend the PLL is not locked */ + cpc->boot &= ~0x00000001; + } + } + /* Now, compute all other clocks */ + D = ((cpc->pllmr[0] >> 20) & 0x3) + 1; /* CCDV */ +#ifdef DEBUG_CLOCKS_LL + printf("CCDV %01" PRIx32 " %d\n", (cpc->pllmr[0] >> 20) & 0x3, D); +#endif + CPU_clk = PLL_out / D; + D = ((cpc->pllmr[0] >> 16) & 0x3) + 1; /* CBDV */ +#ifdef DEBUG_CLOCKS_LL + printf("CBDV %01" PRIx32 " %d\n", (cpc->pllmr[0] >> 16) & 0x3, D); +#endif + PLB_clk = CPU_clk / D; + D = ((cpc->pllmr[0] >> 12) & 0x3) + 1; /* OPDV */ +#ifdef DEBUG_CLOCKS_LL + printf("OPDV %01" PRIx32 " %d\n", (cpc->pllmr[0] >> 12) & 0x3, D); +#endif + OPB_clk = PLB_clk / D; + D = ((cpc->pllmr[0] >> 8) & 0x3) + 2; /* EPDV */ +#ifdef DEBUG_CLOCKS_LL + printf("EPDV %01" PRIx32 " %d\n", (cpc->pllmr[0] >> 8) & 0x3, D); +#endif + EBC_clk = PLB_clk / D; + D = ((cpc->pllmr[0] >> 4) & 0x3) + 1; /* MPDV */ +#ifdef DEBUG_CLOCKS_LL + printf("MPDV %01" PRIx32 " %d\n", (cpc->pllmr[0] >> 4) & 0x3, D); +#endif + MAL_clk = PLB_clk / D; + D = (cpc->pllmr[0] & 0x3) + 1; /* PPDV */ +#ifdef DEBUG_CLOCKS_LL + printf("PPDV %01" PRIx32 " %d\n", cpc->pllmr[0] & 0x3, D); +#endif + PCI_clk = PLB_clk / D; + D = ((cpc->ucr - 1) & 0x7F) + 1; /* U0DIV */ +#ifdef DEBUG_CLOCKS_LL + printf("U0DIV %01" PRIx32 " %d\n", cpc->ucr & 0x7F, D); +#endif + UART0_clk = PLL_out / D; + D = (((cpc->ucr >> 8) - 1) & 0x7F) + 1; /* U1DIV */ +#ifdef DEBUG_CLOCKS_LL + printf("U1DIV %01" PRIx32 " %d\n", (cpc->ucr >> 8) & 0x7F, D); +#endif + UART1_clk = PLL_out / D; +#ifdef DEBUG_CLOCKS + printf("Setup PPC405EP clocks - sysclk %" PRIu32 " VCO %" PRIu64 + " PLL out %" PRIu64 " Hz\n", cpc->sysclk, VCO_out, PLL_out); + printf("CPU %" PRIu32 " PLB %" PRIu32 " OPB %" PRIu32 " EBC %" PRIu32 + " MAL %" PRIu32 " PCI %" PRIu32 " UART0 %" PRIu32 + " UART1 %" PRIu32 "\n", + CPU_clk, PLB_clk, OPB_clk, EBC_clk, MAL_clk, PCI_clk, + UART0_clk, UART1_clk); +#endif + /* Setup CPU clocks */ + clk_setup(&cpc->clk_setup[PPC405EP_CPU_CLK], CPU_clk); + /* Setup PLB clock */ + clk_setup(&cpc->clk_setup[PPC405EP_PLB_CLK], PLB_clk); + /* Setup OPB clock */ + clk_setup(&cpc->clk_setup[PPC405EP_OPB_CLK], OPB_clk); + /* Setup external clock */ + clk_setup(&cpc->clk_setup[PPC405EP_EBC_CLK], EBC_clk); + /* Setup MAL clock */ + clk_setup(&cpc->clk_setup[PPC405EP_MAL_CLK], MAL_clk); + /* Setup PCI clock */ + clk_setup(&cpc->clk_setup[PPC405EP_PCI_CLK], PCI_clk); + /* Setup UART0 clock */ + clk_setup(&cpc->clk_setup[PPC405EP_UART0_CLK], UART0_clk); + /* Setup UART1 clock */ + clk_setup(&cpc->clk_setup[PPC405EP_UART1_CLK], UART1_clk); +} + +static uint32_t dcr_read_epcpc (void *opaque, int dcrn) +{ + ppc405ep_cpc_t *cpc; + uint32_t ret; + + cpc = opaque; + switch (dcrn) { + case PPC405EP_CPC0_BOOT: + ret = cpc->boot; + break; + case PPC405EP_CPC0_EPCTL: + ret = cpc->epctl; + break; + case PPC405EP_CPC0_PLLMR0: + ret = cpc->pllmr[0]; + break; + case PPC405EP_CPC0_PLLMR1: + ret = cpc->pllmr[1]; + break; + case PPC405EP_CPC0_UCR: + ret = cpc->ucr; + break; + case PPC405EP_CPC0_SRR: + ret = cpc->srr; + break; + case PPC405EP_CPC0_JTAGID: + ret = cpc->jtagid; + break; + case PPC405EP_CPC0_PCI: + ret = cpc->pci; + break; + default: + /* Avoid gcc warning */ + ret = 0; + break; + } + + return ret; +} + +static void dcr_write_epcpc (void *opaque, int dcrn, uint32_t val) +{ + ppc405ep_cpc_t *cpc; + + cpc = opaque; + switch (dcrn) { + case PPC405EP_CPC0_BOOT: + /* Read-only register */ + break; + case PPC405EP_CPC0_EPCTL: + /* Don't care for now */ + cpc->epctl = val & 0xC00000F3; + break; + case PPC405EP_CPC0_PLLMR0: + cpc->pllmr[0] = val & 0x00633333; + ppc405ep_compute_clocks(cpc); + break; + case PPC405EP_CPC0_PLLMR1: + cpc->pllmr[1] = val & 0xC0F73FFF; + ppc405ep_compute_clocks(cpc); + break; + case PPC405EP_CPC0_UCR: + /* UART control - don't care for now */ + cpc->ucr = val & 0x003F7F7F; + break; + case PPC405EP_CPC0_SRR: + cpc->srr = val; + break; + case PPC405EP_CPC0_JTAGID: + /* Read-only */ + break; + case PPC405EP_CPC0_PCI: + cpc->pci = val; + break; + } +} + +static void ppc405ep_cpc_reset (void *opaque) +{ + ppc405ep_cpc_t *cpc = opaque; + + cpc->boot = 0x00000010; /* Boot from PCI - IIC EEPROM disabled */ + cpc->epctl = 0x00000000; + cpc->pllmr[0] = 0x00011010; + cpc->pllmr[1] = 0x40000000; + cpc->ucr = 0x00000000; + cpc->srr = 0x00040000; + cpc->pci = 0x00000000; + cpc->er = 0x00000000; + cpc->fr = 0x00000000; + cpc->sr = 0x00000000; + ppc405ep_compute_clocks(cpc); +} + +/* XXX: sysclk should be between 25 and 100 MHz */ +static void ppc405ep_cpc_init (CPUPPCState *env, clk_setup_t clk_setup[8], + uint32_t sysclk) +{ + ppc405ep_cpc_t *cpc; + + cpc = g_malloc0(sizeof(ppc405ep_cpc_t)); + memcpy(cpc->clk_setup, clk_setup, + PPC405EP_CLK_NB * sizeof(clk_setup_t)); + cpc->jtagid = 0x20267049; + cpc->sysclk = sysclk; + qemu_register_reset(&ppc405ep_cpc_reset, cpc); + ppc_dcr_register(env, PPC405EP_CPC0_BOOT, cpc, + &dcr_read_epcpc, &dcr_write_epcpc); + ppc_dcr_register(env, PPC405EP_CPC0_EPCTL, cpc, + &dcr_read_epcpc, &dcr_write_epcpc); + ppc_dcr_register(env, PPC405EP_CPC0_PLLMR0, cpc, + &dcr_read_epcpc, &dcr_write_epcpc); + ppc_dcr_register(env, PPC405EP_CPC0_PLLMR1, cpc, + &dcr_read_epcpc, &dcr_write_epcpc); + ppc_dcr_register(env, PPC405EP_CPC0_UCR, cpc, + &dcr_read_epcpc, &dcr_write_epcpc); + ppc_dcr_register(env, PPC405EP_CPC0_SRR, cpc, + &dcr_read_epcpc, &dcr_write_epcpc); + ppc_dcr_register(env, PPC405EP_CPC0_JTAGID, cpc, + &dcr_read_epcpc, &dcr_write_epcpc); + ppc_dcr_register(env, PPC405EP_CPC0_PCI, cpc, + &dcr_read_epcpc, &dcr_write_epcpc); +#if 0 + ppc_dcr_register(env, PPC405EP_CPC0_ER, cpc, + &dcr_read_epcpc, &dcr_write_epcpc); + ppc_dcr_register(env, PPC405EP_CPC0_FR, cpc, + &dcr_read_epcpc, &dcr_write_epcpc); + ppc_dcr_register(env, PPC405EP_CPC0_SR, cpc, + &dcr_read_epcpc, &dcr_write_epcpc); +#endif +} + +CPUPPCState *ppc405ep_init(MemoryRegion *address_space_mem, + MemoryRegion ram_memories[2], + hwaddr ram_bases[2], + hwaddr ram_sizes[2], + uint32_t sysclk, DeviceState **uicdevp, + int do_init) +{ + clk_setup_t clk_setup[PPC405EP_CLK_NB], tlb_clk_setup; + qemu_irq dma_irqs[4], gpt_irqs[5], mal_irqs[4]; + PowerPCCPU *cpu; + CPUPPCState *env; + DeviceState *uicdev; + SysBusDevice *uicsbd; + + memset(clk_setup, 0, sizeof(clk_setup)); + /* init CPUs */ + cpu = ppc4xx_init(POWERPC_CPU_TYPE_NAME("405ep"), + &clk_setup[PPC405EP_CPU_CLK], + &tlb_clk_setup, sysclk); + env = &cpu->env; + clk_setup[PPC405EP_CPU_CLK].cb = tlb_clk_setup.cb; + clk_setup[PPC405EP_CPU_CLK].opaque = tlb_clk_setup.opaque; + /* Internal devices init */ + /* Memory mapped devices registers */ + /* PLB arbitrer */ + ppc4xx_plb_init(env); + /* PLB to OPB bridge */ + ppc4xx_pob_init(env); + /* OBP arbitrer */ + ppc4xx_opba_init(0xef600600); + /* Initialize timers */ + ppc_booke_timers_init(cpu, sysclk, 0); + /* Universal interrupt controller */ + uicdev = qdev_new(TYPE_PPC_UIC); + uicsbd = SYS_BUS_DEVICE(uicdev); + + object_property_set_link(OBJECT(uicdev), "cpu", OBJECT(cpu), + &error_fatal); + sysbus_realize_and_unref(uicsbd, &error_fatal); + + sysbus_connect_irq(uicsbd, PPCUIC_OUTPUT_INT, + ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_INT]); + sysbus_connect_irq(uicsbd, PPCUIC_OUTPUT_CINT, + ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_CINT]); + + *uicdevp = uicdev; + + /* SDRAM controller */ + /* XXX 405EP has no ECC interrupt */ + ppc4xx_sdram_init(env, qdev_get_gpio_in(uicdev, 17), 2, ram_memories, + ram_bases, ram_sizes, do_init); + /* External bus controller */ + ppc405_ebc_init(env); + /* DMA controller */ + dma_irqs[0] = qdev_get_gpio_in(uicdev, 5); + dma_irqs[1] = qdev_get_gpio_in(uicdev, 6); + dma_irqs[2] = qdev_get_gpio_in(uicdev, 7); + dma_irqs[3] = qdev_get_gpio_in(uicdev, 8); + ppc405_dma_init(env, dma_irqs); + /* IIC controller */ + sysbus_create_simple(TYPE_PPC4xx_I2C, 0xef600500, + qdev_get_gpio_in(uicdev, 2)); + /* GPIO */ + ppc405_gpio_init(0xef600700); + /* Serial ports */ + if (serial_hd(0) != NULL) { + serial_mm_init(address_space_mem, 0xef600300, 0, + qdev_get_gpio_in(uicdev, 0), + PPC_SERIAL_MM_BAUDBASE, serial_hd(0), + DEVICE_BIG_ENDIAN); + } + if (serial_hd(1) != NULL) { + serial_mm_init(address_space_mem, 0xef600400, 0, + qdev_get_gpio_in(uicdev, 1), + PPC_SERIAL_MM_BAUDBASE, serial_hd(1), + DEVICE_BIG_ENDIAN); + } + /* OCM */ + ppc405_ocm_init(env); + /* GPT */ + gpt_irqs[0] = qdev_get_gpio_in(uicdev, 19); + gpt_irqs[1] = qdev_get_gpio_in(uicdev, 20); + gpt_irqs[2] = qdev_get_gpio_in(uicdev, 21); + gpt_irqs[3] = qdev_get_gpio_in(uicdev, 22); + gpt_irqs[4] = qdev_get_gpio_in(uicdev, 23); + ppc4xx_gpt_init(0xef600000, gpt_irqs); + /* PCI */ + /* Uses UIC IRQs 3, 16, 18 */ + /* MAL */ + mal_irqs[0] = qdev_get_gpio_in(uicdev, 11); + mal_irqs[1] = qdev_get_gpio_in(uicdev, 12); + mal_irqs[2] = qdev_get_gpio_in(uicdev, 13); + mal_irqs[3] = qdev_get_gpio_in(uicdev, 14); + ppc4xx_mal_init(env, 4, 2, mal_irqs); + /* Ethernet */ + /* Uses UIC IRQs 9, 15, 17 */ + /* CPU control */ + ppc405ep_cpc_init(env, clk_setup, sysclk); + + return env; +} diff --git a/hw/ppc/ppc440.h b/hw/ppc/ppc440.h new file mode 100644 index 000000000..7cef93612 --- /dev/null +++ b/hw/ppc/ppc440.h @@ -0,0 +1,27 @@ +/* + * QEMU PowerPC 440 shared definitions + * + * Copyright (c) 2012 François Revol + * Copyright (c) 2016-2018 BALATON Zoltan + * + * This work is licensed under the GNU GPL license version 2 or later. + * + */ + +#ifndef PPC440_H +#define PPC440_H + +#include "hw/ppc/ppc.h" + +void ppc4xx_l2sram_init(CPUPPCState *env); +void ppc4xx_cpr_init(CPUPPCState *env); +void ppc4xx_sdr_init(CPUPPCState *env); +void ppc440_sdram_init(CPUPPCState *env, int nbanks, + MemoryRegion *ram_memories, + hwaddr *ram_bases, hwaddr *ram_sizes, + int do_init); +void ppc4xx_ahb_init(CPUPPCState *env); +void ppc4xx_dma_init(CPUPPCState *env, int dcr_base); +void ppc460ex_pcie_init(CPUPPCState *env); + +#endif /* PPC440_H */ diff --git a/hw/ppc/ppc440_bamboo.c b/hw/ppc/ppc440_bamboo.c new file mode 100644 index 000000000..7fb620b9a --- /dev/null +++ b/hw/ppc/ppc440_bamboo.c @@ -0,0 +1,307 @@ +/* + * QEMU PowerPC 440 Bamboo board emulation + * + * Copyright 2007 IBM Corporation. + * Authors: + * Jerone Young <jyoung5@us.ibm.com> + * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com> + * Hollis Blanchard <hollisb@us.ibm.com> + * + * This work is licensed under the GNU GPL license version 2 or later. + * + */ + +#include "qemu/osdep.h" +#include "qemu/units.h" +#include "qemu/error-report.h" +#include "qemu-common.h" +#include "qemu/datadir.h" +#include "qemu/error-report.h" +#include "net/net.h" +#include "hw/pci/pci.h" +#include "hw/boards.h" +#include "sysemu/kvm.h" +#include "kvm_ppc.h" +#include "sysemu/device_tree.h" +#include "hw/loader.h" +#include "elf.h" +#include "hw/char/serial.h" +#include "hw/ppc/ppc.h" +#include "ppc405.h" +#include "sysemu/sysemu.h" +#include "sysemu/reset.h" +#include "hw/sysbus.h" +#include "hw/intc/ppc-uic.h" +#include "hw/qdev-properties.h" +#include "qapi/error.h" + +#define BINARY_DEVICE_TREE_FILE "bamboo.dtb" + +/* from u-boot */ +#define KERNEL_ADDR 0x1000000 +#define FDT_ADDR 0x1800000 +#define RAMDISK_ADDR 0x1900000 + +#define PPC440EP_PCI_CONFIG 0xeec00000 +#define PPC440EP_PCI_INTACK 0xeed00000 +#define PPC440EP_PCI_SPECIAL 0xeed00000 +#define PPC440EP_PCI_REGS 0xef400000 +#define PPC440EP_PCI_IO 0xe8000000 +#define PPC440EP_PCI_IOLEN 0x00010000 + +#define PPC440EP_SDRAM_NR_BANKS 4 + +static const ram_addr_t ppc440ep_sdram_bank_sizes[] = { + 256 * MiB, 128 * MiB, 64 * MiB, 32 * MiB, 16 * MiB, 8 * MiB, 0 +}; + +static hwaddr entry; + +static int bamboo_load_device_tree(hwaddr addr, + uint32_t ramsize, + hwaddr initrd_base, + hwaddr initrd_size, + const char *kernel_cmdline) +{ + int ret = -1; + uint32_t mem_reg_property[] = { 0, 0, cpu_to_be32(ramsize) }; + char *filename; + int fdt_size; + void *fdt; + uint32_t tb_freq = 400000000; + uint32_t clock_freq = 400000000; + + filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, BINARY_DEVICE_TREE_FILE); + if (!filename) { + return -1; + } + fdt = load_device_tree(filename, &fdt_size); + g_free(filename); + if (fdt == NULL) { + return -1; + } + + /* Manipulate device tree in memory. */ + + ret = qemu_fdt_setprop(fdt, "/memory", "reg", mem_reg_property, + sizeof(mem_reg_property)); + if (ret < 0) + fprintf(stderr, "couldn't set /memory/reg\n"); + + ret = qemu_fdt_setprop_cell(fdt, "/chosen", "linux,initrd-start", + initrd_base); + if (ret < 0) + fprintf(stderr, "couldn't set /chosen/linux,initrd-start\n"); + + ret = qemu_fdt_setprop_cell(fdt, "/chosen", "linux,initrd-end", + (initrd_base + initrd_size)); + if (ret < 0) + fprintf(stderr, "couldn't set /chosen/linux,initrd-end\n"); + + ret = qemu_fdt_setprop_string(fdt, "/chosen", "bootargs", + kernel_cmdline); + if (ret < 0) + fprintf(stderr, "couldn't set /chosen/bootargs\n"); + + /* Copy data from the host device tree into the guest. Since the guest can + * directly access the timebase without host involvement, we must expose + * the correct frequencies. */ + if (kvm_enabled()) { + tb_freq = kvmppc_get_tbfreq(); + clock_freq = kvmppc_get_clockfreq(); + } + + qemu_fdt_setprop_cell(fdt, "/cpus/cpu@0", "clock-frequency", + clock_freq); + qemu_fdt_setprop_cell(fdt, "/cpus/cpu@0", "timebase-frequency", + tb_freq); + + rom_add_blob_fixed(BINARY_DEVICE_TREE_FILE, fdt, fdt_size, addr); + g_free(fdt); + return 0; +} + +/* Create reset TLB entries for BookE, spanning the 32bit addr space. */ +static void mmubooke_create_initial_mapping(CPUPPCState *env, + target_ulong va, + hwaddr pa) +{ + ppcemb_tlb_t *tlb = &env->tlb.tlbe[0]; + + tlb->attr = 0; + tlb->prot = PAGE_VALID | ((PAGE_READ | PAGE_WRITE | PAGE_EXEC) << 4); + tlb->size = 1U << 31; /* up to 0x80000000 */ + tlb->EPN = va & TARGET_PAGE_MASK; + tlb->RPN = pa & TARGET_PAGE_MASK; + tlb->PID = 0; + + tlb = &env->tlb.tlbe[1]; + tlb->attr = 0; + tlb->prot = PAGE_VALID | ((PAGE_READ | PAGE_WRITE | PAGE_EXEC) << 4); + tlb->size = 1U << 31; /* up to 0xffffffff */ + tlb->EPN = 0x80000000 & TARGET_PAGE_MASK; + tlb->RPN = 0x80000000 & TARGET_PAGE_MASK; + tlb->PID = 0; +} + +static void main_cpu_reset(void *opaque) +{ + PowerPCCPU *cpu = opaque; + CPUPPCState *env = &cpu->env; + + cpu_reset(CPU(cpu)); + env->gpr[1] = (16 * MiB) - 8; + env->gpr[3] = FDT_ADDR; + env->nip = entry; + + /* Create a mapping for the kernel. */ + mmubooke_create_initial_mapping(env, 0, 0); +} + +static void bamboo_init(MachineState *machine) +{ + const char *kernel_filename = machine->kernel_filename; + const char *kernel_cmdline = machine->kernel_cmdline; + const char *initrd_filename = machine->initrd_filename; + unsigned int pci_irq_nrs[4] = { 28, 27, 26, 25 }; + MemoryRegion *address_space_mem = get_system_memory(); + MemoryRegion *isa = g_new(MemoryRegion, 1); + MemoryRegion *ram_memories = g_new(MemoryRegion, PPC440EP_SDRAM_NR_BANKS); + hwaddr ram_bases[PPC440EP_SDRAM_NR_BANKS]; + hwaddr ram_sizes[PPC440EP_SDRAM_NR_BANKS]; + PCIBus *pcibus; + PowerPCCPU *cpu; + CPUPPCState *env; + target_long initrd_size = 0; + DeviceState *dev; + DeviceState *uicdev; + SysBusDevice *uicsbd; + int success; + int i; + + cpu = POWERPC_CPU(cpu_create(machine->cpu_type)); + env = &cpu->env; + + if (env->mmu_model != POWERPC_MMU_BOOKE) { + error_report("MMU model %i not supported by this machine", + env->mmu_model); + exit(1); + } + + qemu_register_reset(main_cpu_reset, cpu); + ppc_booke_timers_init(cpu, 400000000, 0); + ppc_dcr_init(env, NULL, NULL); + + /* interrupt controller */ + uicdev = qdev_new(TYPE_PPC_UIC); + uicsbd = SYS_BUS_DEVICE(uicdev); + + object_property_set_link(OBJECT(uicdev), "cpu", OBJECT(cpu), + &error_fatal); + sysbus_realize_and_unref(uicsbd, &error_fatal); + + sysbus_connect_irq(uicsbd, PPCUIC_OUTPUT_INT, + ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_INT]); + sysbus_connect_irq(uicsbd, PPCUIC_OUTPUT_CINT, + ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_CINT]); + + /* SDRAM controller */ + memset(ram_bases, 0, sizeof(ram_bases)); + memset(ram_sizes, 0, sizeof(ram_sizes)); + ppc4xx_sdram_banks(machine->ram, PPC440EP_SDRAM_NR_BANKS, ram_memories, + ram_bases, ram_sizes, ppc440ep_sdram_bank_sizes); + /* XXX 440EP's ECC interrupts are on UIC1, but we've only created UIC0. */ + ppc4xx_sdram_init(env, + qdev_get_gpio_in(uicdev, 14), + PPC440EP_SDRAM_NR_BANKS, ram_memories, + ram_bases, ram_sizes, 1); + + /* PCI */ + dev = sysbus_create_varargs(TYPE_PPC4xx_PCI_HOST_BRIDGE, + PPC440EP_PCI_CONFIG, + qdev_get_gpio_in(uicdev, pci_irq_nrs[0]), + qdev_get_gpio_in(uicdev, pci_irq_nrs[1]), + qdev_get_gpio_in(uicdev, pci_irq_nrs[2]), + qdev_get_gpio_in(uicdev, pci_irq_nrs[3]), + NULL); + pcibus = (PCIBus *)qdev_get_child_bus(dev, "pci.0"); + if (!pcibus) { + error_report("couldn't create PCI controller"); + exit(1); + } + + memory_region_init_alias(isa, NULL, "isa_mmio", + get_system_io(), 0, PPC440EP_PCI_IOLEN); + memory_region_add_subregion(get_system_memory(), PPC440EP_PCI_IO, isa); + + if (serial_hd(0) != NULL) { + serial_mm_init(address_space_mem, 0xef600300, 0, + qdev_get_gpio_in(uicdev, 0), + PPC_SERIAL_MM_BAUDBASE, serial_hd(0), + DEVICE_BIG_ENDIAN); + } + if (serial_hd(1) != NULL) { + serial_mm_init(address_space_mem, 0xef600400, 0, + qdev_get_gpio_in(uicdev, 1), + PPC_SERIAL_MM_BAUDBASE, serial_hd(1), + DEVICE_BIG_ENDIAN); + } + + if (pcibus) { + /* Register network interfaces. */ + for (i = 0; i < nb_nics; i++) { + /* There are no PCI NICs on the Bamboo board, but there are + * PCI slots, so we can pick whatever default model we want. */ + pci_nic_init_nofail(&nd_table[i], pcibus, "e1000", NULL); + } + } + + /* Load kernel. */ + if (kernel_filename) { + hwaddr loadaddr = LOAD_UIMAGE_LOADADDR_INVALID; + success = load_uimage(kernel_filename, &entry, &loadaddr, NULL, + NULL, NULL); + if (success < 0) { + uint64_t elf_entry; + success = load_elf(kernel_filename, NULL, NULL, NULL, &elf_entry, + NULL, NULL, NULL, 1, PPC_ELF_MACHINE, 0, 0); + entry = elf_entry; + } + /* XXX try again as binary */ + if (success < 0) { + error_report("could not load kernel '%s'", kernel_filename); + exit(1); + } + } + + /* Load initrd. */ + if (initrd_filename) { + initrd_size = load_image_targphys(initrd_filename, RAMDISK_ADDR, + machine->ram_size - RAMDISK_ADDR); + + if (initrd_size < 0) { + error_report("could not load ram disk '%s' at %x", + initrd_filename, RAMDISK_ADDR); + exit(1); + } + } + + /* If we're loading a kernel directly, we must load the device tree too. */ + if (kernel_filename) { + if (bamboo_load_device_tree(FDT_ADDR, machine->ram_size, RAMDISK_ADDR, + initrd_size, kernel_cmdline) < 0) { + error_report("couldn't load device tree"); + exit(1); + } + } +} + +static void bamboo_machine_init(MachineClass *mc) +{ + mc->desc = "bamboo"; + mc->init = bamboo_init; + mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("440epb"); + mc->default_ram_id = "ppc4xx.sdram"; +} + +DEFINE_MACHINE("bamboo", bamboo_machine_init) diff --git a/hw/ppc/ppc440_pcix.c b/hw/ppc/ppc440_pcix.c new file mode 100644 index 000000000..788d25514 --- /dev/null +++ b/hw/ppc/ppc440_pcix.c @@ -0,0 +1,538 @@ +/* + * Emulation of the ibm,plb-pcix PCI controller + * This is found in some 440 SoCs e.g. the 460EX. + * + * Copyright (c) 2016-2018 BALATON Zoltan + * + * Derived from ppc4xx_pci.c and pci-host/ppce500.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "qemu/log.h" +#include "qemu/module.h" +#include "hw/irq.h" +#include "hw/ppc/ppc.h" +#include "hw/ppc/ppc4xx.h" +#include "hw/pci/pci.h" +#include "hw/pci/pci_host.h" +#include "trace.h" +#include "qom/object.h" + +struct PLBOutMap { + uint64_t la; + uint64_t pcia; + uint32_t sa; + MemoryRegion mr; +}; + +struct PLBInMap { + uint64_t sa; + uint64_t la; + MemoryRegion mr; +}; + +#define TYPE_PPC440_PCIX_HOST_BRIDGE "ppc440-pcix-host" +OBJECT_DECLARE_SIMPLE_TYPE(PPC440PCIXState, PPC440_PCIX_HOST_BRIDGE) + +#define PPC440_PCIX_NR_POMS 3 +#define PPC440_PCIX_NR_PIMS 3 + +struct PPC440PCIXState { + PCIHostState parent_obj; + + PCIDevice *dev; + struct PLBOutMap pom[PPC440_PCIX_NR_POMS]; + struct PLBInMap pim[PPC440_PCIX_NR_PIMS]; + uint32_t sts; + qemu_irq irq; + AddressSpace bm_as; + MemoryRegion bm; + + MemoryRegion container; + MemoryRegion iomem; + MemoryRegion busmem; +}; + +#define PPC440_REG_BASE 0x80000 +#define PPC440_REG_SIZE 0xff + +#define PCIC0_CFGADDR 0x0 +#define PCIC0_CFGDATA 0x4 + +#define PCIX0_POM0LAL 0x68 +#define PCIX0_POM0LAH 0x6c +#define PCIX0_POM0SA 0x70 +#define PCIX0_POM0PCIAL 0x74 +#define PCIX0_POM0PCIAH 0x78 +#define PCIX0_POM1LAL 0x7c +#define PCIX0_POM1LAH 0x80 +#define PCIX0_POM1SA 0x84 +#define PCIX0_POM1PCIAL 0x88 +#define PCIX0_POM1PCIAH 0x8c +#define PCIX0_POM2SA 0x90 + +#define PCIX0_PIM0SAL 0x98 +#define PCIX0_PIM0LAL 0x9c +#define PCIX0_PIM0LAH 0xa0 +#define PCIX0_PIM1SA 0xa4 +#define PCIX0_PIM1LAL 0xa8 +#define PCIX0_PIM1LAH 0xac +#define PCIX0_PIM2SAL 0xb0 +#define PCIX0_PIM2LAL 0xb4 +#define PCIX0_PIM2LAH 0xb8 +#define PCIX0_PIM0SAH 0xf8 +#define PCIX0_PIM2SAH 0xfc + +#define PCIX0_STS 0xe0 + +#define PCI_ALL_SIZE (PPC440_REG_BASE + PPC440_REG_SIZE) + +static void ppc440_pcix_clear_region(MemoryRegion *parent, + MemoryRegion *mem) +{ + if (memory_region_is_mapped(mem)) { + memory_region_del_subregion(parent, mem); + object_unparent(OBJECT(mem)); + } +} + +/* DMA mapping */ +static void ppc440_pcix_update_pim(PPC440PCIXState *s, int idx) +{ + MemoryRegion *mem = &s->pim[idx].mr; + char *name; + uint64_t size; + + /* Before we modify anything, unmap and destroy the region */ + ppc440_pcix_clear_region(&s->bm, mem); + + if (!(s->pim[idx].sa & 1)) { + /* Not enabled, nothing to do */ + return; + } + + name = g_strdup_printf("PCI Inbound Window %d", idx); + size = ~(s->pim[idx].sa & ~7ULL) + 1; + memory_region_init_alias(mem, OBJECT(s), name, get_system_memory(), + s->pim[idx].la, size); + memory_region_add_subregion_overlap(&s->bm, 0, mem, -1); + g_free(name); + + trace_ppc440_pcix_update_pim(idx, size, s->pim[idx].la); +} + +/* BAR mapping */ +static void ppc440_pcix_update_pom(PPC440PCIXState *s, int idx) +{ + MemoryRegion *mem = &s->pom[idx].mr; + MemoryRegion *address_space_mem = get_system_memory(); + char *name; + uint32_t size; + + /* Before we modify anything, unmap and destroy the region */ + ppc440_pcix_clear_region(address_space_mem, mem); + + if (!(s->pom[idx].sa & 1)) { + /* Not enabled, nothing to do */ + return; + } + + name = g_strdup_printf("PCI Outbound Window %d", idx); + size = ~(s->pom[idx].sa & 0xfffffffe) + 1; + if (!size) { + size = 0xffffffff; + } + memory_region_init_alias(mem, OBJECT(s), name, &s->busmem, + s->pom[idx].pcia, size); + memory_region_add_subregion(address_space_mem, s->pom[idx].la, mem); + g_free(name); + + trace_ppc440_pcix_update_pom(idx, size, s->pom[idx].la, s->pom[idx].pcia); +} + +static void ppc440_pcix_reg_write4(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + struct PPC440PCIXState *s = opaque; + + trace_ppc440_pcix_reg_write(addr, val, size); + switch (addr) { + case PCI_VENDOR_ID ... PCI_MAX_LAT: + stl_le_p(s->dev->config + addr, val); + break; + + case PCIX0_POM0LAL: + s->pom[0].la &= 0xffffffff00000000ULL; + s->pom[0].la |= val; + ppc440_pcix_update_pom(s, 0); + break; + case PCIX0_POM0LAH: + s->pom[0].la &= 0xffffffffULL; + s->pom[0].la |= val << 32; + ppc440_pcix_update_pom(s, 0); + break; + case PCIX0_POM0SA: + s->pom[0].sa = val; + ppc440_pcix_update_pom(s, 0); + break; + case PCIX0_POM0PCIAL: + s->pom[0].pcia &= 0xffffffff00000000ULL; + s->pom[0].pcia |= val; + ppc440_pcix_update_pom(s, 0); + break; + case PCIX0_POM0PCIAH: + s->pom[0].pcia &= 0xffffffffULL; + s->pom[0].pcia |= val << 32; + ppc440_pcix_update_pom(s, 0); + break; + case PCIX0_POM1LAL: + s->pom[1].la &= 0xffffffff00000000ULL; + s->pom[1].la |= val; + ppc440_pcix_update_pom(s, 1); + break; + case PCIX0_POM1LAH: + s->pom[1].la &= 0xffffffffULL; + s->pom[1].la |= val << 32; + ppc440_pcix_update_pom(s, 1); + break; + case PCIX0_POM1SA: + s->pom[1].sa = val; + ppc440_pcix_update_pom(s, 1); + break; + case PCIX0_POM1PCIAL: + s->pom[1].pcia &= 0xffffffff00000000ULL; + s->pom[1].pcia |= val; + ppc440_pcix_update_pom(s, 1); + break; + case PCIX0_POM1PCIAH: + s->pom[1].pcia &= 0xffffffffULL; + s->pom[1].pcia |= val << 32; + ppc440_pcix_update_pom(s, 1); + break; + case PCIX0_POM2SA: + s->pom[2].sa = val; + break; + + case PCIX0_PIM0SAL: + s->pim[0].sa &= 0xffffffff00000000ULL; + s->pim[0].sa |= val; + ppc440_pcix_update_pim(s, 0); + break; + case PCIX0_PIM0LAL: + s->pim[0].la &= 0xffffffff00000000ULL; + s->pim[0].la |= val; + ppc440_pcix_update_pim(s, 0); + break; + case PCIX0_PIM0LAH: + s->pim[0].la &= 0xffffffffULL; + s->pim[0].la |= val << 32; + ppc440_pcix_update_pim(s, 0); + break; + case PCIX0_PIM1SA: + s->pim[1].sa = val; + ppc440_pcix_update_pim(s, 1); + break; + case PCIX0_PIM1LAL: + s->pim[1].la &= 0xffffffff00000000ULL; + s->pim[1].la |= val; + ppc440_pcix_update_pim(s, 1); + break; + case PCIX0_PIM1LAH: + s->pim[1].la &= 0xffffffffULL; + s->pim[1].la |= val << 32; + ppc440_pcix_update_pim(s, 1); + break; + case PCIX0_PIM2SAL: + s->pim[2].sa &= 0xffffffff00000000ULL; + s->pim[2].sa |= val; + ppc440_pcix_update_pim(s, 2); + break; + case PCIX0_PIM2LAL: + s->pim[2].la &= 0xffffffff00000000ULL; + s->pim[2].la |= val; + ppc440_pcix_update_pim(s, 2); + break; + case PCIX0_PIM2LAH: + s->pim[2].la &= 0xffffffffULL; + s->pim[2].la |= val << 32; + ppc440_pcix_update_pim(s, 2); + break; + + case PCIX0_STS: + s->sts = val; + break; + + case PCIX0_PIM0SAH: + s->pim[0].sa &= 0xffffffffULL; + s->pim[0].sa |= val << 32; + ppc440_pcix_update_pim(s, 0); + break; + case PCIX0_PIM2SAH: + s->pim[2].sa &= 0xffffffffULL; + s->pim[2].sa |= val << 32; + ppc440_pcix_update_pim(s, 2); + break; + + default: + qemu_log_mask(LOG_UNIMP, + "%s: unhandled PCI internal register 0x%"HWADDR_PRIx"\n", + __func__, addr); + break; + } +} + +static uint64_t ppc440_pcix_reg_read4(void *opaque, hwaddr addr, + unsigned size) +{ + struct PPC440PCIXState *s = opaque; + uint32_t val; + + switch (addr) { + case PCI_VENDOR_ID ... PCI_MAX_LAT: + val = ldl_le_p(s->dev->config + addr); + break; + + case PCIX0_POM0LAL: + val = s->pom[0].la; + break; + case PCIX0_POM0LAH: + val = s->pom[0].la >> 32; + break; + case PCIX0_POM0SA: + val = s->pom[0].sa; + break; + case PCIX0_POM0PCIAL: + val = s->pom[0].pcia; + break; + case PCIX0_POM0PCIAH: + val = s->pom[0].pcia >> 32; + break; + case PCIX0_POM1LAL: + val = s->pom[1].la; + break; + case PCIX0_POM1LAH: + val = s->pom[1].la >> 32; + break; + case PCIX0_POM1SA: + val = s->pom[1].sa; + break; + case PCIX0_POM1PCIAL: + val = s->pom[1].pcia; + break; + case PCIX0_POM1PCIAH: + val = s->pom[1].pcia >> 32; + break; + case PCIX0_POM2SA: + val = s->pom[2].sa; + break; + + case PCIX0_PIM0SAL: + val = s->pim[0].sa; + break; + case PCIX0_PIM0LAL: + val = s->pim[0].la; + break; + case PCIX0_PIM0LAH: + val = s->pim[0].la >> 32; + break; + case PCIX0_PIM1SA: + val = s->pim[1].sa; + break; + case PCIX0_PIM1LAL: + val = s->pim[1].la; + break; + case PCIX0_PIM1LAH: + val = s->pim[1].la >> 32; + break; + case PCIX0_PIM2SAL: + val = s->pim[2].sa; + break; + case PCIX0_PIM2LAL: + val = s->pim[2].la; + break; + case PCIX0_PIM2LAH: + val = s->pim[2].la >> 32; + break; + + case PCIX0_STS: + val = s->sts; + break; + + case PCIX0_PIM0SAH: + val = s->pim[0].sa >> 32; + break; + case PCIX0_PIM2SAH: + val = s->pim[2].sa >> 32; + break; + + default: + qemu_log_mask(LOG_UNIMP, + "%s: invalid PCI internal register 0x%" HWADDR_PRIx "\n", + __func__, addr); + val = 0; + } + + trace_ppc440_pcix_reg_read(addr, val); + return val; +} + +static const MemoryRegionOps pci_reg_ops = { + .read = ppc440_pcix_reg_read4, + .write = ppc440_pcix_reg_write4, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +static void ppc440_pcix_reset(DeviceState *dev) +{ + struct PPC440PCIXState *s = PPC440_PCIX_HOST_BRIDGE(dev); + int i; + + for (i = 0; i < PPC440_PCIX_NR_POMS; i++) { + ppc440_pcix_clear_region(get_system_memory(), &s->pom[i].mr); + } + for (i = 0; i < PPC440_PCIX_NR_PIMS; i++) { + ppc440_pcix_clear_region(&s->bm, &s->pim[i].mr); + } + memset(s->pom, 0, sizeof(s->pom)); + memset(s->pim, 0, sizeof(s->pim)); + for (i = 0; i < PPC440_PCIX_NR_PIMS; i++) { + s->pim[i].sa = 0xffffffff00000000ULL; + } + s->sts = 0; +} + +/* + * All four IRQ[ABCD] pins from all slots are tied to a single board + * IRQ, so our mapping function here maps everything to IRQ 0. + * The code in pci_change_irq_level() tracks the number of times + * the mapped IRQ is asserted and deasserted, so if multiple devices + * assert an IRQ at the same time the behaviour is correct. + * + * This may need further refactoring for boards that use multiple IRQ lines. + */ +static int ppc440_pcix_map_irq(PCIDevice *pci_dev, int irq_num) +{ + trace_ppc440_pcix_map_irq(pci_dev->devfn, irq_num, 0); + return 0; +} + +static void ppc440_pcix_set_irq(void *opaque, int irq_num, int level) +{ + qemu_irq *pci_irq = opaque; + + trace_ppc440_pcix_set_irq(irq_num); + if (irq_num < 0) { + error_report("%s: PCI irq %d", __func__, irq_num); + return; + } + qemu_set_irq(*pci_irq, level); +} + +static AddressSpace *ppc440_pcix_set_iommu(PCIBus *b, void *opaque, int devfn) +{ + PPC440PCIXState *s = opaque; + + return &s->bm_as; +} + +/* + * Some guests on sam460ex write all kinds of garbage here such as + * missing enable bit and low bits set and still expect this to work + * (apparently it does on real hardware because these boot there) so + * we have to override these ops here and fix it up + */ +static void pci_host_config_write(void *opaque, hwaddr addr, + uint64_t val, unsigned len) +{ + PCIHostState *s = opaque; + + if (addr != 0 || len != 4) { + return; + } + s->config_reg = (val & 0xfffffffcULL) | (1UL << 31); +} + +static uint64_t pci_host_config_read(void *opaque, hwaddr addr, + unsigned len) +{ + PCIHostState *s = opaque; + uint32_t val = s->config_reg; + + return val; +} + +const MemoryRegionOps ppc440_pcix_host_conf_ops = { + .read = pci_host_config_read, + .write = pci_host_config_write, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +static void ppc440_pcix_realize(DeviceState *dev, Error **errp) +{ + SysBusDevice *sbd = SYS_BUS_DEVICE(dev); + PPC440PCIXState *s; + PCIHostState *h; + + h = PCI_HOST_BRIDGE(dev); + s = PPC440_PCIX_HOST_BRIDGE(dev); + + sysbus_init_irq(sbd, &s->irq); + memory_region_init(&s->busmem, OBJECT(dev), "pci bus memory", UINT64_MAX); + h->bus = pci_register_root_bus(dev, NULL, ppc440_pcix_set_irq, + ppc440_pcix_map_irq, &s->irq, &s->busmem, + get_system_io(), PCI_DEVFN(0, 0), 1, TYPE_PCI_BUS); + + s->dev = pci_create_simple(h->bus, PCI_DEVFN(0, 0), "ppc4xx-host-bridge"); + + memory_region_init(&s->bm, OBJECT(s), "bm-ppc440-pcix", UINT64_MAX); + memory_region_add_subregion(&s->bm, 0x0, &s->busmem); + address_space_init(&s->bm_as, &s->bm, "pci-bm"); + pci_setup_iommu(h->bus, ppc440_pcix_set_iommu, s); + + memory_region_init(&s->container, OBJECT(s), "pci-container", PCI_ALL_SIZE); + memory_region_init_io(&h->conf_mem, OBJECT(s), &ppc440_pcix_host_conf_ops, + h, "pci-conf-idx", 4); + memory_region_init_io(&h->data_mem, OBJECT(s), &pci_host_data_le_ops, + h, "pci-conf-data", 4); + memory_region_init_io(&s->iomem, OBJECT(s), &pci_reg_ops, s, + "pci.reg", PPC440_REG_SIZE); + memory_region_add_subregion(&s->container, PCIC0_CFGADDR, &h->conf_mem); + memory_region_add_subregion(&s->container, PCIC0_CFGDATA, &h->data_mem); + memory_region_add_subregion(&s->container, PPC440_REG_BASE, &s->iomem); + sysbus_init_mmio(sbd, &s->container); +} + +static void ppc440_pcix_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->realize = ppc440_pcix_realize; + dc->reset = ppc440_pcix_reset; +} + +static const TypeInfo ppc440_pcix_info = { + .name = TYPE_PPC440_PCIX_HOST_BRIDGE, + .parent = TYPE_PCI_HOST_BRIDGE, + .instance_size = sizeof(PPC440PCIXState), + .class_init = ppc440_pcix_class_init, +}; + +static void ppc440_pcix_register_types(void) +{ + type_register_static(&ppc440_pcix_info); +} + +type_init(ppc440_pcix_register_types) diff --git a/hw/ppc/ppc440_uc.c b/hw/ppc/ppc440_uc.c new file mode 100644 index 000000000..993e3ba95 --- /dev/null +++ b/hw/ppc/ppc440_uc.c @@ -0,0 +1,1377 @@ +/* + * QEMU PowerPC 440 embedded processors emulation + * + * Copyright (c) 2012 François Revol + * Copyright (c) 2016-2019 BALATON Zoltan + * + * This work is licensed under the GNU GPL license version 2 or later. + * + */ + +#include "qemu/osdep.h" +#include "qemu/units.h" +#include "qemu/error-report.h" +#include "qapi/error.h" +#include "qemu/log.h" +#include "qemu/module.h" +#include "hw/irq.h" +#include "exec/memory.h" +#include "hw/ppc/ppc.h" +#include "hw/qdev-properties.h" +#include "hw/pci/pci.h" +#include "sysemu/block-backend.h" +#include "sysemu/reset.h" +#include "ppc440.h" +#include "qom/object.h" + +/*****************************************************************************/ +/* L2 Cache as SRAM */ +/* FIXME:fix names */ +enum { + DCR_L2CACHE_BASE = 0x30, + DCR_L2CACHE_CFG = DCR_L2CACHE_BASE, + DCR_L2CACHE_CMD, + DCR_L2CACHE_ADDR, + DCR_L2CACHE_DATA, + DCR_L2CACHE_STAT, + DCR_L2CACHE_CVER, + DCR_L2CACHE_SNP0, + DCR_L2CACHE_SNP1, + DCR_L2CACHE_END = DCR_L2CACHE_SNP1, +}; + +/* base is 460ex-specific, cf. U-Boot, ppc4xx-isram.h */ +enum { + DCR_ISRAM0_BASE = 0x20, + DCR_ISRAM0_SB0CR = DCR_ISRAM0_BASE, + DCR_ISRAM0_SB1CR, + DCR_ISRAM0_SB2CR, + DCR_ISRAM0_SB3CR, + DCR_ISRAM0_BEAR, + DCR_ISRAM0_BESR0, + DCR_ISRAM0_BESR1, + DCR_ISRAM0_PMEG, + DCR_ISRAM0_CID, + DCR_ISRAM0_REVID, + DCR_ISRAM0_DPC, + DCR_ISRAM0_END = DCR_ISRAM0_DPC +}; + +enum { + DCR_ISRAM1_BASE = 0xb0, + DCR_ISRAM1_SB0CR = DCR_ISRAM1_BASE, + /* single bank */ + DCR_ISRAM1_BEAR = DCR_ISRAM1_BASE + 0x04, + DCR_ISRAM1_BESR0, + DCR_ISRAM1_BESR1, + DCR_ISRAM1_PMEG, + DCR_ISRAM1_CID, + DCR_ISRAM1_REVID, + DCR_ISRAM1_DPC, + DCR_ISRAM1_END = DCR_ISRAM1_DPC +}; + +typedef struct ppc4xx_l2sram_t { + MemoryRegion bank[4]; + uint32_t l2cache[8]; + uint32_t isram0[11]; +} ppc4xx_l2sram_t; + +#ifdef MAP_L2SRAM +static void l2sram_update_mappings(ppc4xx_l2sram_t *l2sram, + uint32_t isarc, uint32_t isacntl, + uint32_t dsarc, uint32_t dsacntl) +{ + if (l2sram->isarc != isarc || + (l2sram->isacntl & 0x80000000) != (isacntl & 0x80000000)) { + if (l2sram->isacntl & 0x80000000) { + /* Unmap previously assigned memory region */ + memory_region_del_subregion(get_system_memory(), + &l2sram->isarc_ram); + } + if (isacntl & 0x80000000) { + /* Map new instruction memory region */ + memory_region_add_subregion(get_system_memory(), isarc, + &l2sram->isarc_ram); + } + } + if (l2sram->dsarc != dsarc || + (l2sram->dsacntl & 0x80000000) != (dsacntl & 0x80000000)) { + if (l2sram->dsacntl & 0x80000000) { + /* Beware not to unmap the region we just mapped */ + if (!(isacntl & 0x80000000) || l2sram->dsarc != isarc) { + /* Unmap previously assigned memory region */ + memory_region_del_subregion(get_system_memory(), + &l2sram->dsarc_ram); + } + } + if (dsacntl & 0x80000000) { + /* Beware not to remap the region we just mapped */ + if (!(isacntl & 0x80000000) || dsarc != isarc) { + /* Map new data memory region */ + memory_region_add_subregion(get_system_memory(), dsarc, + &l2sram->dsarc_ram); + } + } + } +} +#endif + +static uint32_t dcr_read_l2sram(void *opaque, int dcrn) +{ + ppc4xx_l2sram_t *l2sram = opaque; + uint32_t ret = 0; + + switch (dcrn) { + case DCR_L2CACHE_CFG: + case DCR_L2CACHE_CMD: + case DCR_L2CACHE_ADDR: + case DCR_L2CACHE_DATA: + case DCR_L2CACHE_STAT: + case DCR_L2CACHE_CVER: + case DCR_L2CACHE_SNP0: + case DCR_L2CACHE_SNP1: + ret = l2sram->l2cache[dcrn - DCR_L2CACHE_BASE]; + break; + + case DCR_ISRAM0_SB0CR: + case DCR_ISRAM0_SB1CR: + case DCR_ISRAM0_SB2CR: + case DCR_ISRAM0_SB3CR: + case DCR_ISRAM0_BEAR: + case DCR_ISRAM0_BESR0: + case DCR_ISRAM0_BESR1: + case DCR_ISRAM0_PMEG: + case DCR_ISRAM0_CID: + case DCR_ISRAM0_REVID: + case DCR_ISRAM0_DPC: + ret = l2sram->isram0[dcrn - DCR_ISRAM0_BASE]; + break; + + default: + break; + } + + return ret; +} + +static void dcr_write_l2sram(void *opaque, int dcrn, uint32_t val) +{ + /*ppc4xx_l2sram_t *l2sram = opaque;*/ + /* FIXME: Actually handle L2 cache mapping */ + + switch (dcrn) { + case DCR_L2CACHE_CFG: + case DCR_L2CACHE_CMD: + case DCR_L2CACHE_ADDR: + case DCR_L2CACHE_DATA: + case DCR_L2CACHE_STAT: + case DCR_L2CACHE_CVER: + case DCR_L2CACHE_SNP0: + case DCR_L2CACHE_SNP1: + /*l2sram->l2cache[dcrn - DCR_L2CACHE_BASE] = val;*/ + break; + + case DCR_ISRAM0_SB0CR: + case DCR_ISRAM0_SB1CR: + case DCR_ISRAM0_SB2CR: + case DCR_ISRAM0_SB3CR: + case DCR_ISRAM0_BEAR: + case DCR_ISRAM0_BESR0: + case DCR_ISRAM0_BESR1: + case DCR_ISRAM0_PMEG: + case DCR_ISRAM0_CID: + case DCR_ISRAM0_REVID: + case DCR_ISRAM0_DPC: + /*l2sram->isram0[dcrn - DCR_L2CACHE_BASE] = val;*/ + break; + + case DCR_ISRAM1_SB0CR: + case DCR_ISRAM1_BEAR: + case DCR_ISRAM1_BESR0: + case DCR_ISRAM1_BESR1: + case DCR_ISRAM1_PMEG: + case DCR_ISRAM1_CID: + case DCR_ISRAM1_REVID: + case DCR_ISRAM1_DPC: + /*l2sram->isram1[dcrn - DCR_L2CACHE_BASE] = val;*/ + break; + } + /*l2sram_update_mappings(l2sram, isarc, isacntl, dsarc, dsacntl);*/ +} + +static void l2sram_reset(void *opaque) +{ + ppc4xx_l2sram_t *l2sram = opaque; + + memset(l2sram->l2cache, 0, sizeof(l2sram->l2cache)); + l2sram->l2cache[DCR_L2CACHE_STAT - DCR_L2CACHE_BASE] = 0x80000000; + memset(l2sram->isram0, 0, sizeof(l2sram->isram0)); + /*l2sram_update_mappings(l2sram, isarc, isacntl, dsarc, dsacntl);*/ +} + +void ppc4xx_l2sram_init(CPUPPCState *env) +{ + ppc4xx_l2sram_t *l2sram; + + l2sram = g_malloc0(sizeof(*l2sram)); + /* XXX: Size is 4*64kB for 460ex, cf. U-Boot, ppc4xx-isram.h */ + memory_region_init_ram(&l2sram->bank[0], NULL, "ppc4xx.l2sram_bank0", + 64 * KiB, &error_abort); + memory_region_init_ram(&l2sram->bank[1], NULL, "ppc4xx.l2sram_bank1", + 64 * KiB, &error_abort); + memory_region_init_ram(&l2sram->bank[2], NULL, "ppc4xx.l2sram_bank2", + 64 * KiB, &error_abort); + memory_region_init_ram(&l2sram->bank[3], NULL, "ppc4xx.l2sram_bank3", + 64 * KiB, &error_abort); + qemu_register_reset(&l2sram_reset, l2sram); + ppc_dcr_register(env, DCR_L2CACHE_CFG, + l2sram, &dcr_read_l2sram, &dcr_write_l2sram); + ppc_dcr_register(env, DCR_L2CACHE_CMD, + l2sram, &dcr_read_l2sram, &dcr_write_l2sram); + ppc_dcr_register(env, DCR_L2CACHE_ADDR, + l2sram, &dcr_read_l2sram, &dcr_write_l2sram); + ppc_dcr_register(env, DCR_L2CACHE_DATA, + l2sram, &dcr_read_l2sram, &dcr_write_l2sram); + ppc_dcr_register(env, DCR_L2CACHE_STAT, + l2sram, &dcr_read_l2sram, &dcr_write_l2sram); + ppc_dcr_register(env, DCR_L2CACHE_CVER, + l2sram, &dcr_read_l2sram, &dcr_write_l2sram); + ppc_dcr_register(env, DCR_L2CACHE_SNP0, + l2sram, &dcr_read_l2sram, &dcr_write_l2sram); + ppc_dcr_register(env, DCR_L2CACHE_SNP1, + l2sram, &dcr_read_l2sram, &dcr_write_l2sram); + + ppc_dcr_register(env, DCR_ISRAM0_SB0CR, + l2sram, &dcr_read_l2sram, &dcr_write_l2sram); + ppc_dcr_register(env, DCR_ISRAM0_SB1CR, + l2sram, &dcr_read_l2sram, &dcr_write_l2sram); + ppc_dcr_register(env, DCR_ISRAM0_SB2CR, + l2sram, &dcr_read_l2sram, &dcr_write_l2sram); + ppc_dcr_register(env, DCR_ISRAM0_SB3CR, + l2sram, &dcr_read_l2sram, &dcr_write_l2sram); + ppc_dcr_register(env, DCR_ISRAM0_PMEG, + l2sram, &dcr_read_l2sram, &dcr_write_l2sram); + ppc_dcr_register(env, DCR_ISRAM0_DPC, + l2sram, &dcr_read_l2sram, &dcr_write_l2sram); + + ppc_dcr_register(env, DCR_ISRAM1_SB0CR, + l2sram, &dcr_read_l2sram, &dcr_write_l2sram); + ppc_dcr_register(env, DCR_ISRAM1_PMEG, + l2sram, &dcr_read_l2sram, &dcr_write_l2sram); + ppc_dcr_register(env, DCR_ISRAM1_DPC, + l2sram, &dcr_read_l2sram, &dcr_write_l2sram); +} + +/*****************************************************************************/ +/* Clocking Power on Reset */ +enum { + CPR0_CFGADDR = 0xC, + CPR0_CFGDATA = 0xD, + + CPR0_PLLD = 0x060, + CPR0_PLBED = 0x080, + CPR0_OPBD = 0x0C0, + CPR0_PERD = 0x0E0, + CPR0_AHBD = 0x100, +}; + +typedef struct ppc4xx_cpr_t { + uint32_t addr; +} ppc4xx_cpr_t; + +static uint32_t dcr_read_cpr(void *opaque, int dcrn) +{ + ppc4xx_cpr_t *cpr = opaque; + uint32_t ret = 0; + + switch (dcrn) { + case CPR0_CFGADDR: + ret = cpr->addr; + break; + case CPR0_CFGDATA: + switch (cpr->addr) { + case CPR0_PLLD: + ret = (0xb5 << 24) | (1 << 16) | (9 << 8); + break; + case CPR0_PLBED: + ret = (5 << 24); + break; + case CPR0_OPBD: + ret = (2 << 24); + break; + case CPR0_PERD: + case CPR0_AHBD: + ret = (1 << 24); + break; + default: + break; + } + break; + default: + break; + } + + return ret; +} + +static void dcr_write_cpr(void *opaque, int dcrn, uint32_t val) +{ + ppc4xx_cpr_t *cpr = opaque; + + switch (dcrn) { + case CPR0_CFGADDR: + cpr->addr = val; + break; + case CPR0_CFGDATA: + break; + default: + break; + } +} + +static void ppc4xx_cpr_reset(void *opaque) +{ + ppc4xx_cpr_t *cpr = opaque; + + cpr->addr = 0; +} + +void ppc4xx_cpr_init(CPUPPCState *env) +{ + ppc4xx_cpr_t *cpr; + + cpr = g_malloc0(sizeof(*cpr)); + ppc_dcr_register(env, CPR0_CFGADDR, cpr, &dcr_read_cpr, &dcr_write_cpr); + ppc_dcr_register(env, CPR0_CFGDATA, cpr, &dcr_read_cpr, &dcr_write_cpr); + qemu_register_reset(ppc4xx_cpr_reset, cpr); +} + +/*****************************************************************************/ +/* System DCRs */ +typedef struct ppc4xx_sdr_t ppc4xx_sdr_t; +struct ppc4xx_sdr_t { + uint32_t addr; +}; + +enum { + SDR0_CFGADDR = 0x00e, + SDR0_CFGDATA, + SDR0_STRP0 = 0x020, + SDR0_STRP1, + SDR0_102 = 0x66, + SDR0_103, + SDR0_128 = 0x80, + SDR0_ECID3 = 0x083, + SDR0_DDR0 = 0x0e1, + SDR0_USB0 = 0x320, +}; + +enum { + PESDR0_LOOP = 0x303, + PESDR0_RCSSET, + PESDR0_RCSSTS, + PESDR0_RSTSTA = 0x310, + PESDR1_LOOP = 0x343, + PESDR1_RCSSET, + PESDR1_RCSSTS, + PESDR1_RSTSTA = 0x365, +}; + +#define SDR0_DDR0_DDRM_ENCODE(n) ((((unsigned long)(n)) & 0x03) << 29) +#define SDR0_DDR0_DDRM_DDR1 0x20000000 +#define SDR0_DDR0_DDRM_DDR2 0x40000000 + +static uint32_t dcr_read_sdr(void *opaque, int dcrn) +{ + ppc4xx_sdr_t *sdr = opaque; + uint32_t ret = 0; + + switch (dcrn) { + case SDR0_CFGADDR: + ret = sdr->addr; + break; + case SDR0_CFGDATA: + switch (sdr->addr) { + case SDR0_STRP0: + ret = (0xb5 << 8) | (1 << 4) | 9; + break; + case SDR0_STRP1: + ret = (5 << 29) | (2 << 26) | (1 << 24); + break; + case SDR0_ECID3: + ret = 1 << 20; /* No Security/Kasumi support */ + break; + case SDR0_DDR0: + ret = SDR0_DDR0_DDRM_ENCODE(1) | SDR0_DDR0_DDRM_DDR1; + break; + case PESDR0_RCSSET: + case PESDR1_RCSSET: + ret = (1 << 24) | (1 << 16); + break; + case PESDR0_RCSSTS: + case PESDR1_RCSSTS: + ret = (1 << 16) | (1 << 12); + break; + case PESDR0_RSTSTA: + case PESDR1_RSTSTA: + ret = 1; + break; + case PESDR0_LOOP: + case PESDR1_LOOP: + ret = 1 << 12; + break; + default: + break; + } + break; + default: + break; + } + + return ret; +} + +static void dcr_write_sdr(void *opaque, int dcrn, uint32_t val) +{ + ppc4xx_sdr_t *sdr = opaque; + + switch (dcrn) { + case SDR0_CFGADDR: + sdr->addr = val; + break; + case SDR0_CFGDATA: + switch (sdr->addr) { + case 0x00: /* B0CR */ + break; + default: + break; + } + break; + default: + break; + } +} + +static void sdr_reset(void *opaque) +{ + ppc4xx_sdr_t *sdr = opaque; + + sdr->addr = 0; +} + +void ppc4xx_sdr_init(CPUPPCState *env) +{ + ppc4xx_sdr_t *sdr; + + sdr = g_malloc0(sizeof(*sdr)); + qemu_register_reset(&sdr_reset, sdr); + ppc_dcr_register(env, SDR0_CFGADDR, + sdr, &dcr_read_sdr, &dcr_write_sdr); + ppc_dcr_register(env, SDR0_CFGDATA, + sdr, &dcr_read_sdr, &dcr_write_sdr); + ppc_dcr_register(env, SDR0_102, + sdr, &dcr_read_sdr, &dcr_write_sdr); + ppc_dcr_register(env, SDR0_103, + sdr, &dcr_read_sdr, &dcr_write_sdr); + ppc_dcr_register(env, SDR0_128, + sdr, &dcr_read_sdr, &dcr_write_sdr); + ppc_dcr_register(env, SDR0_USB0, + sdr, &dcr_read_sdr, &dcr_write_sdr); +} + +/*****************************************************************************/ +/* SDRAM controller */ +typedef struct ppc440_sdram_t { + uint32_t addr; + int nbanks; + MemoryRegion containers[4]; /* used for clipping */ + MemoryRegion *ram_memories; + hwaddr ram_bases[4]; + hwaddr ram_sizes[4]; + uint32_t bcr[4]; +} ppc440_sdram_t; + +enum { + SDRAM0_CFGADDR = 0x10, + SDRAM0_CFGDATA, + SDRAM_R0BAS = 0x40, + SDRAM_R1BAS, + SDRAM_R2BAS, + SDRAM_R3BAS, + SDRAM_CONF1HB = 0x45, + SDRAM_PLBADDULL = 0x4a, + SDRAM_CONF1LL = 0x4b, + SDRAM_CONFPATHB = 0x4f, + SDRAM_PLBADDUHB = 0x50, +}; + +static uint32_t sdram_bcr(hwaddr ram_base, hwaddr ram_size) +{ + uint32_t bcr; + + switch (ram_size) { + case (8 * MiB): + bcr = 0xffc0; + break; + case (16 * MiB): + bcr = 0xff80; + break; + case (32 * MiB): + bcr = 0xff00; + break; + case (64 * MiB): + bcr = 0xfe00; + break; + case (128 * MiB): + bcr = 0xfc00; + break; + case (256 * MiB): + bcr = 0xf800; + break; + case (512 * MiB): + bcr = 0xf000; + break; + case (1 * GiB): + bcr = 0xe000; + break; + case (2 * GiB): + bcr = 0xc000; + break; + case (4 * GiB): + bcr = 0x8000; + break; + default: + error_report("invalid RAM size " TARGET_FMT_plx, ram_size); + return 0; + } + bcr |= ram_base >> 2 & 0xffe00000; + bcr |= 1; + + return bcr; +} + +static inline hwaddr sdram_base(uint32_t bcr) +{ + return (bcr & 0xffe00000) << 2; +} + +static uint64_t sdram_size(uint32_t bcr) +{ + uint64_t size; + int sh; + + sh = 1024 - ((bcr >> 6) & 0x3ff); + size = 8 * MiB * sh; + + return size; +} + +static void sdram_set_bcr(ppc440_sdram_t *sdram, int i, + uint32_t bcr, int enabled) +{ + if (sdram->bcr[i] & 1) { + /* First unmap RAM if enabled */ + memory_region_del_subregion(get_system_memory(), + &sdram->containers[i]); + memory_region_del_subregion(&sdram->containers[i], + &sdram->ram_memories[i]); + object_unparent(OBJECT(&sdram->containers[i])); + } + sdram->bcr[i] = bcr & 0xffe0ffc1; + if (enabled && (bcr & 1)) { + memory_region_init(&sdram->containers[i], NULL, "sdram-containers", + sdram_size(bcr)); + memory_region_add_subregion(&sdram->containers[i], 0, + &sdram->ram_memories[i]); + memory_region_add_subregion(get_system_memory(), + sdram_base(bcr), + &sdram->containers[i]); + } +} + +static void sdram_map_bcr(ppc440_sdram_t *sdram) +{ + int i; + + for (i = 0; i < sdram->nbanks; i++) { + if (sdram->ram_sizes[i] != 0) { + sdram_set_bcr(sdram, i, sdram_bcr(sdram->ram_bases[i], + sdram->ram_sizes[i]), 1); + } else { + sdram_set_bcr(sdram, i, 0, 0); + } + } +} + +static uint32_t dcr_read_sdram(void *opaque, int dcrn) +{ + ppc440_sdram_t *sdram = opaque; + uint32_t ret = 0; + + switch (dcrn) { + case SDRAM_R0BAS: + case SDRAM_R1BAS: + case SDRAM_R2BAS: + case SDRAM_R3BAS: + if (sdram->ram_sizes[dcrn - SDRAM_R0BAS]) { + ret = sdram_bcr(sdram->ram_bases[dcrn - SDRAM_R0BAS], + sdram->ram_sizes[dcrn - SDRAM_R0BAS]); + } + break; + case SDRAM_CONF1HB: + case SDRAM_CONF1LL: + case SDRAM_CONFPATHB: + case SDRAM_PLBADDULL: + case SDRAM_PLBADDUHB: + break; + case SDRAM0_CFGADDR: + ret = sdram->addr; + break; + case SDRAM0_CFGDATA: + switch (sdram->addr) { + case 0x14: /* SDRAM_MCSTAT (405EX) */ + case 0x1F: + ret = 0x80000000; + break; + case 0x21: /* SDRAM_MCOPT2 */ + ret = 0x08000000; + break; + case 0x40: /* SDRAM_MB0CF */ + ret = 0x00008001; + break; + case 0x7A: /* SDRAM_DLCR */ + ret = 0x02000000; + break; + case 0xE1: /* SDR0_DDR0 */ + ret = SDR0_DDR0_DDRM_ENCODE(1) | SDR0_DDR0_DDRM_DDR1; + break; + default: + break; + } + break; + default: + break; + } + + return ret; +} + +static void dcr_write_sdram(void *opaque, int dcrn, uint32_t val) +{ + ppc440_sdram_t *sdram = opaque; + + switch (dcrn) { + case SDRAM_R0BAS: + case SDRAM_R1BAS: + case SDRAM_R2BAS: + case SDRAM_R3BAS: + case SDRAM_CONF1HB: + case SDRAM_CONF1LL: + case SDRAM_CONFPATHB: + case SDRAM_PLBADDULL: + case SDRAM_PLBADDUHB: + break; + case SDRAM0_CFGADDR: + sdram->addr = val; + break; + case SDRAM0_CFGDATA: + switch (sdram->addr) { + case 0x00: /* B0CR */ + break; + default: + break; + } + break; + default: + break; + } +} + +static void sdram_reset(void *opaque) +{ + ppc440_sdram_t *sdram = opaque; + + sdram->addr = 0; +} + +void ppc440_sdram_init(CPUPPCState *env, int nbanks, + MemoryRegion *ram_memories, + hwaddr *ram_bases, hwaddr *ram_sizes, + int do_init) +{ + ppc440_sdram_t *sdram; + + sdram = g_malloc0(sizeof(*sdram)); + sdram->nbanks = nbanks; + sdram->ram_memories = ram_memories; + memcpy(sdram->ram_bases, ram_bases, nbanks * sizeof(hwaddr)); + memcpy(sdram->ram_sizes, ram_sizes, nbanks * sizeof(hwaddr)); + qemu_register_reset(&sdram_reset, sdram); + ppc_dcr_register(env, SDRAM0_CFGADDR, + sdram, &dcr_read_sdram, &dcr_write_sdram); + ppc_dcr_register(env, SDRAM0_CFGDATA, + sdram, &dcr_read_sdram, &dcr_write_sdram); + if (do_init) { + sdram_map_bcr(sdram); + } + + ppc_dcr_register(env, SDRAM_R0BAS, + sdram, &dcr_read_sdram, &dcr_write_sdram); + ppc_dcr_register(env, SDRAM_R1BAS, + sdram, &dcr_read_sdram, &dcr_write_sdram); + ppc_dcr_register(env, SDRAM_R2BAS, + sdram, &dcr_read_sdram, &dcr_write_sdram); + ppc_dcr_register(env, SDRAM_R3BAS, + sdram, &dcr_read_sdram, &dcr_write_sdram); + ppc_dcr_register(env, SDRAM_CONF1HB, + sdram, &dcr_read_sdram, &dcr_write_sdram); + ppc_dcr_register(env, SDRAM_PLBADDULL, + sdram, &dcr_read_sdram, &dcr_write_sdram); + ppc_dcr_register(env, SDRAM_CONF1LL, + sdram, &dcr_read_sdram, &dcr_write_sdram); + ppc_dcr_register(env, SDRAM_CONFPATHB, + sdram, &dcr_read_sdram, &dcr_write_sdram); + ppc_dcr_register(env, SDRAM_PLBADDUHB, + sdram, &dcr_read_sdram, &dcr_write_sdram); +} + +/*****************************************************************************/ +/* PLB to AHB bridge */ +enum { + AHB_TOP = 0xA4, + AHB_BOT = 0xA5, +}; + +typedef struct ppc4xx_ahb_t { + uint32_t top; + uint32_t bot; +} ppc4xx_ahb_t; + +static uint32_t dcr_read_ahb(void *opaque, int dcrn) +{ + ppc4xx_ahb_t *ahb = opaque; + uint32_t ret = 0; + + switch (dcrn) { + case AHB_TOP: + ret = ahb->top; + break; + case AHB_BOT: + ret = ahb->bot; + break; + default: + break; + } + + return ret; +} + +static void dcr_write_ahb(void *opaque, int dcrn, uint32_t val) +{ + ppc4xx_ahb_t *ahb = opaque; + + switch (dcrn) { + case AHB_TOP: + ahb->top = val; + break; + case AHB_BOT: + ahb->bot = val; + break; + } +} + +static void ppc4xx_ahb_reset(void *opaque) +{ + ppc4xx_ahb_t *ahb = opaque; + + /* No error */ + ahb->top = 0; + ahb->bot = 0; +} + +void ppc4xx_ahb_init(CPUPPCState *env) +{ + ppc4xx_ahb_t *ahb; + + ahb = g_malloc0(sizeof(*ahb)); + ppc_dcr_register(env, AHB_TOP, ahb, &dcr_read_ahb, &dcr_write_ahb); + ppc_dcr_register(env, AHB_BOT, ahb, &dcr_read_ahb, &dcr_write_ahb); + qemu_register_reset(ppc4xx_ahb_reset, ahb); +} + +/*****************************************************************************/ +/* DMA controller */ + +#define DMA0_CR_CE (1 << 31) +#define DMA0_CR_PW (1 << 26 | 1 << 25) +#define DMA0_CR_DAI (1 << 24) +#define DMA0_CR_SAI (1 << 23) +#define DMA0_CR_DEC (1 << 2) + +enum { + DMA0_CR = 0x00, + DMA0_CT, + DMA0_SAH, + DMA0_SAL, + DMA0_DAH, + DMA0_DAL, + DMA0_SGH, + DMA0_SGL, + + DMA0_SR = 0x20, + DMA0_SGC = 0x23, + DMA0_SLP = 0x25, + DMA0_POL = 0x26, +}; + +typedef struct { + uint32_t cr; + uint32_t ct; + uint64_t sa; + uint64_t da; + uint64_t sg; +} PPC4xxDmaChnl; + +typedef struct { + int base; + PPC4xxDmaChnl ch[4]; + uint32_t sr; +} PPC4xxDmaState; + +static uint32_t dcr_read_dma(void *opaque, int dcrn) +{ + PPC4xxDmaState *dma = opaque; + uint32_t val = 0; + int addr = dcrn - dma->base; + int chnl = addr / 8; + + switch (addr) { + case 0x00 ... 0x1f: + switch (addr % 8) { + case DMA0_CR: + val = dma->ch[chnl].cr; + break; + case DMA0_CT: + val = dma->ch[chnl].ct; + break; + case DMA0_SAH: + val = dma->ch[chnl].sa >> 32; + break; + case DMA0_SAL: + val = dma->ch[chnl].sa; + break; + case DMA0_DAH: + val = dma->ch[chnl].da >> 32; + break; + case DMA0_DAL: + val = dma->ch[chnl].da; + break; + case DMA0_SGH: + val = dma->ch[chnl].sg >> 32; + break; + case DMA0_SGL: + val = dma->ch[chnl].sg; + break; + } + break; + case DMA0_SR: + val = dma->sr; + break; + default: + qemu_log_mask(LOG_UNIMP, "%s: unimplemented register %x (%d, %x)\n", + __func__, dcrn, chnl, addr); + } + + return val; +} + +static void dcr_write_dma(void *opaque, int dcrn, uint32_t val) +{ + PPC4xxDmaState *dma = opaque; + int addr = dcrn - dma->base; + int chnl = addr / 8; + + switch (addr) { + case 0x00 ... 0x1f: + switch (addr % 8) { + case DMA0_CR: + dma->ch[chnl].cr = val; + if (val & DMA0_CR_CE) { + int count = dma->ch[chnl].ct & 0xffff; + + if (count) { + int width, i, sidx, didx; + uint8_t *rptr, *wptr; + hwaddr rlen, wlen; + + sidx = didx = 0; + width = 1 << ((val & DMA0_CR_PW) >> 25); + rptr = cpu_physical_memory_map(dma->ch[chnl].sa, &rlen, + false); + wptr = cpu_physical_memory_map(dma->ch[chnl].da, &wlen, + true); + if (rptr && wptr) { + if (!(val & DMA0_CR_DEC) && + val & DMA0_CR_SAI && val & DMA0_CR_DAI) { + /* optimise common case */ + memmove(wptr, rptr, count * width); + sidx = didx = count * width; + } else { + /* do it the slow way */ + for (sidx = didx = i = 0; i < count; i++) { + uint64_t v = ldn_le_p(rptr + sidx, width); + stn_le_p(wptr + didx, width, v); + if (val & DMA0_CR_SAI) { + sidx += width; + } + if (val & DMA0_CR_DAI) { + didx += width; + } + } + } + } + if (wptr) { + cpu_physical_memory_unmap(wptr, wlen, 1, didx); + } + if (rptr) { + cpu_physical_memory_unmap(rptr, rlen, 0, sidx); + } + } + } + break; + case DMA0_CT: + dma->ch[chnl].ct = val; + break; + case DMA0_SAH: + dma->ch[chnl].sa &= 0xffffffffULL; + dma->ch[chnl].sa |= (uint64_t)val << 32; + break; + case DMA0_SAL: + dma->ch[chnl].sa &= 0xffffffff00000000ULL; + dma->ch[chnl].sa |= val; + break; + case DMA0_DAH: + dma->ch[chnl].da &= 0xffffffffULL; + dma->ch[chnl].da |= (uint64_t)val << 32; + break; + case DMA0_DAL: + dma->ch[chnl].da &= 0xffffffff00000000ULL; + dma->ch[chnl].da |= val; + break; + case DMA0_SGH: + dma->ch[chnl].sg &= 0xffffffffULL; + dma->ch[chnl].sg |= (uint64_t)val << 32; + break; + case DMA0_SGL: + dma->ch[chnl].sg &= 0xffffffff00000000ULL; + dma->ch[chnl].sg |= val; + break; + } + break; + case DMA0_SR: + dma->sr &= ~val; + break; + default: + qemu_log_mask(LOG_UNIMP, "%s: unimplemented register %x (%d, %x)\n", + __func__, dcrn, chnl, addr); + } +} + +static void ppc4xx_dma_reset(void *opaque) +{ + PPC4xxDmaState *dma = opaque; + int dma_base = dma->base; + + memset(dma, 0, sizeof(*dma)); + dma->base = dma_base; +} + +void ppc4xx_dma_init(CPUPPCState *env, int dcr_base) +{ + PPC4xxDmaState *dma; + int i; + + dma = g_malloc0(sizeof(*dma)); + dma->base = dcr_base; + qemu_register_reset(&ppc4xx_dma_reset, dma); + for (i = 0; i < 4; i++) { + ppc_dcr_register(env, dcr_base + i * 8 + DMA0_CR, + dma, &dcr_read_dma, &dcr_write_dma); + ppc_dcr_register(env, dcr_base + i * 8 + DMA0_CT, + dma, &dcr_read_dma, &dcr_write_dma); + ppc_dcr_register(env, dcr_base + i * 8 + DMA0_SAH, + dma, &dcr_read_dma, &dcr_write_dma); + ppc_dcr_register(env, dcr_base + i * 8 + DMA0_SAL, + dma, &dcr_read_dma, &dcr_write_dma); + ppc_dcr_register(env, dcr_base + i * 8 + DMA0_DAH, + dma, &dcr_read_dma, &dcr_write_dma); + ppc_dcr_register(env, dcr_base + i * 8 + DMA0_DAL, + dma, &dcr_read_dma, &dcr_write_dma); + ppc_dcr_register(env, dcr_base + i * 8 + DMA0_SGH, + dma, &dcr_read_dma, &dcr_write_dma); + ppc_dcr_register(env, dcr_base + i * 8 + DMA0_SGL, + dma, &dcr_read_dma, &dcr_write_dma); + } + ppc_dcr_register(env, dcr_base + DMA0_SR, + dma, &dcr_read_dma, &dcr_write_dma); + ppc_dcr_register(env, dcr_base + DMA0_SGC, + dma, &dcr_read_dma, &dcr_write_dma); + ppc_dcr_register(env, dcr_base + DMA0_SLP, + dma, &dcr_read_dma, &dcr_write_dma); + ppc_dcr_register(env, dcr_base + DMA0_POL, + dma, &dcr_read_dma, &dcr_write_dma); +} + +/*****************************************************************************/ +/* PCI Express controller */ +/* FIXME: This is not complete and does not work, only implemented partially + * to allow firmware and guests to find an empty bus. Cards should use PCI. + */ +#include "hw/pci/pcie_host.h" + +#define TYPE_PPC460EX_PCIE_HOST "ppc460ex-pcie-host" +OBJECT_DECLARE_SIMPLE_TYPE(PPC460EXPCIEState, PPC460EX_PCIE_HOST) + +struct PPC460EXPCIEState { + PCIExpressHost host; + + MemoryRegion iomem; + qemu_irq irq[4]; + int32_t dcrn_base; + + uint64_t cfg_base; + uint32_t cfg_mask; + uint64_t msg_base; + uint32_t msg_mask; + uint64_t omr1_base; + uint64_t omr1_mask; + uint64_t omr2_base; + uint64_t omr2_mask; + uint64_t omr3_base; + uint64_t omr3_mask; + uint64_t reg_base; + uint32_t reg_mask; + uint32_t special; + uint32_t cfg; +}; + +#define DCRN_PCIE0_BASE 0x100 +#define DCRN_PCIE1_BASE 0x120 + +enum { + PEGPL_CFGBAH = 0x0, + PEGPL_CFGBAL, + PEGPL_CFGMSK, + PEGPL_MSGBAH, + PEGPL_MSGBAL, + PEGPL_MSGMSK, + PEGPL_OMR1BAH, + PEGPL_OMR1BAL, + PEGPL_OMR1MSKH, + PEGPL_OMR1MSKL, + PEGPL_OMR2BAH, + PEGPL_OMR2BAL, + PEGPL_OMR2MSKH, + PEGPL_OMR2MSKL, + PEGPL_OMR3BAH, + PEGPL_OMR3BAL, + PEGPL_OMR3MSKH, + PEGPL_OMR3MSKL, + PEGPL_REGBAH, + PEGPL_REGBAL, + PEGPL_REGMSK, + PEGPL_SPECIAL, + PEGPL_CFG, +}; + +static uint32_t dcr_read_pcie(void *opaque, int dcrn) +{ + PPC460EXPCIEState *state = opaque; + uint32_t ret = 0; + + switch (dcrn - state->dcrn_base) { + case PEGPL_CFGBAH: + ret = state->cfg_base >> 32; + break; + case PEGPL_CFGBAL: + ret = state->cfg_base; + break; + case PEGPL_CFGMSK: + ret = state->cfg_mask; + break; + case PEGPL_MSGBAH: + ret = state->msg_base >> 32; + break; + case PEGPL_MSGBAL: + ret = state->msg_base; + break; + case PEGPL_MSGMSK: + ret = state->msg_mask; + break; + case PEGPL_OMR1BAH: + ret = state->omr1_base >> 32; + break; + case PEGPL_OMR1BAL: + ret = state->omr1_base; + break; + case PEGPL_OMR1MSKH: + ret = state->omr1_mask >> 32; + break; + case PEGPL_OMR1MSKL: + ret = state->omr1_mask; + break; + case PEGPL_OMR2BAH: + ret = state->omr2_base >> 32; + break; + case PEGPL_OMR2BAL: + ret = state->omr2_base; + break; + case PEGPL_OMR2MSKH: + ret = state->omr2_mask >> 32; + break; + case PEGPL_OMR2MSKL: + ret = state->omr3_mask; + break; + case PEGPL_OMR3BAH: + ret = state->omr3_base >> 32; + break; + case PEGPL_OMR3BAL: + ret = state->omr3_base; + break; + case PEGPL_OMR3MSKH: + ret = state->omr3_mask >> 32; + break; + case PEGPL_OMR3MSKL: + ret = state->omr3_mask; + break; + case PEGPL_REGBAH: + ret = state->reg_base >> 32; + break; + case PEGPL_REGBAL: + ret = state->reg_base; + break; + case PEGPL_REGMSK: + ret = state->reg_mask; + break; + case PEGPL_SPECIAL: + ret = state->special; + break; + case PEGPL_CFG: + ret = state->cfg; + break; + } + + return ret; +} + +static void dcr_write_pcie(void *opaque, int dcrn, uint32_t val) +{ + PPC460EXPCIEState *s = opaque; + uint64_t size; + + switch (dcrn - s->dcrn_base) { + case PEGPL_CFGBAH: + s->cfg_base = ((uint64_t)val << 32) | (s->cfg_base & 0xffffffff); + break; + case PEGPL_CFGBAL: + s->cfg_base = (s->cfg_base & 0xffffffff00000000ULL) | val; + break; + case PEGPL_CFGMSK: + s->cfg_mask = val; + size = ~(val & 0xfffffffe) + 1; + pcie_host_mmcfg_update(PCIE_HOST_BRIDGE(s), val & 1, s->cfg_base, size); + break; + case PEGPL_MSGBAH: + s->msg_base = ((uint64_t)val << 32) | (s->msg_base & 0xffffffff); + break; + case PEGPL_MSGBAL: + s->msg_base = (s->msg_base & 0xffffffff00000000ULL) | val; + break; + case PEGPL_MSGMSK: + s->msg_mask = val; + break; + case PEGPL_OMR1BAH: + s->omr1_base = ((uint64_t)val << 32) | (s->omr1_base & 0xffffffff); + break; + case PEGPL_OMR1BAL: + s->omr1_base = (s->omr1_base & 0xffffffff00000000ULL) | val; + break; + case PEGPL_OMR1MSKH: + s->omr1_mask = ((uint64_t)val << 32) | (s->omr1_mask & 0xffffffff); + break; + case PEGPL_OMR1MSKL: + s->omr1_mask = (s->omr1_mask & 0xffffffff00000000ULL) | val; + break; + case PEGPL_OMR2BAH: + s->omr2_base = ((uint64_t)val << 32) | (s->omr2_base & 0xffffffff); + break; + case PEGPL_OMR2BAL: + s->omr2_base = (s->omr2_base & 0xffffffff00000000ULL) | val; + break; + case PEGPL_OMR2MSKH: + s->omr2_mask = ((uint64_t)val << 32) | (s->omr2_mask & 0xffffffff); + break; + case PEGPL_OMR2MSKL: + s->omr2_mask = (s->omr2_mask & 0xffffffff00000000ULL) | val; + break; + case PEGPL_OMR3BAH: + s->omr3_base = ((uint64_t)val << 32) | (s->omr3_base & 0xffffffff); + break; + case PEGPL_OMR3BAL: + s->omr3_base = (s->omr3_base & 0xffffffff00000000ULL) | val; + break; + case PEGPL_OMR3MSKH: + s->omr3_mask = ((uint64_t)val << 32) | (s->omr3_mask & 0xffffffff); + break; + case PEGPL_OMR3MSKL: + s->omr3_mask = (s->omr3_mask & 0xffffffff00000000ULL) | val; + break; + case PEGPL_REGBAH: + s->reg_base = ((uint64_t)val << 32) | (s->reg_base & 0xffffffff); + break; + case PEGPL_REGBAL: + s->reg_base = (s->reg_base & 0xffffffff00000000ULL) | val; + break; + case PEGPL_REGMSK: + s->reg_mask = val; + /* FIXME: how is size encoded? */ + size = (val == 0x7001 ? 4096 : ~(val & 0xfffffffe) + 1); + break; + case PEGPL_SPECIAL: + s->special = val; + break; + case PEGPL_CFG: + s->cfg = val; + break; + } +} + +static void ppc460ex_set_irq(void *opaque, int irq_num, int level) +{ + PPC460EXPCIEState *s = opaque; + qemu_set_irq(s->irq[irq_num], level); +} + +static void ppc460ex_pcie_realize(DeviceState *dev, Error **errp) +{ + PPC460EXPCIEState *s = PPC460EX_PCIE_HOST(dev); + PCIHostState *pci = PCI_HOST_BRIDGE(dev); + int i, id; + char buf[16]; + + switch (s->dcrn_base) { + case DCRN_PCIE0_BASE: + id = 0; + break; + case DCRN_PCIE1_BASE: + id = 1; + break; + default: + error_setg(errp, "invalid PCIe DCRN base"); + return; + } + snprintf(buf, sizeof(buf), "pcie%d-io", id); + memory_region_init(&s->iomem, OBJECT(s), buf, UINT64_MAX); + for (i = 0; i < 4; i++) { + sysbus_init_irq(SYS_BUS_DEVICE(dev), &s->irq[i]); + } + snprintf(buf, sizeof(buf), "pcie.%d", id); + pci->bus = pci_register_root_bus(DEVICE(s), buf, ppc460ex_set_irq, + pci_swizzle_map_irq_fn, s, &s->iomem, + get_system_io(), 0, 4, TYPE_PCIE_BUS); +} + +static Property ppc460ex_pcie_props[] = { + DEFINE_PROP_INT32("dcrn-base", PPC460EXPCIEState, dcrn_base, -1), + DEFINE_PROP_END_OF_LIST(), +}; + +static void ppc460ex_pcie_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); + dc->realize = ppc460ex_pcie_realize; + device_class_set_props(dc, ppc460ex_pcie_props); + dc->hotpluggable = false; +} + +static const TypeInfo ppc460ex_pcie_host_info = { + .name = TYPE_PPC460EX_PCIE_HOST, + .parent = TYPE_PCIE_HOST_BRIDGE, + .instance_size = sizeof(PPC460EXPCIEState), + .class_init = ppc460ex_pcie_class_init, +}; + +static void ppc460ex_pcie_register(void) +{ + type_register_static(&ppc460ex_pcie_host_info); +} + +type_init(ppc460ex_pcie_register) + +static void ppc460ex_pcie_register_dcrs(PPC460EXPCIEState *s, CPUPPCState *env) +{ + ppc_dcr_register(env, s->dcrn_base + PEGPL_CFGBAH, s, + &dcr_read_pcie, &dcr_write_pcie); + ppc_dcr_register(env, s->dcrn_base + PEGPL_CFGBAL, s, + &dcr_read_pcie, &dcr_write_pcie); + ppc_dcr_register(env, s->dcrn_base + PEGPL_CFGMSK, s, + &dcr_read_pcie, &dcr_write_pcie); + ppc_dcr_register(env, s->dcrn_base + PEGPL_MSGBAH, s, + &dcr_read_pcie, &dcr_write_pcie); + ppc_dcr_register(env, s->dcrn_base + PEGPL_MSGBAL, s, + &dcr_read_pcie, &dcr_write_pcie); + ppc_dcr_register(env, s->dcrn_base + PEGPL_MSGMSK, s, + &dcr_read_pcie, &dcr_write_pcie); + ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR1BAH, s, + &dcr_read_pcie, &dcr_write_pcie); + ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR1BAL, s, + &dcr_read_pcie, &dcr_write_pcie); + ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR1MSKH, s, + &dcr_read_pcie, &dcr_write_pcie); + ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR1MSKL, s, + &dcr_read_pcie, &dcr_write_pcie); + ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR2BAH, s, + &dcr_read_pcie, &dcr_write_pcie); + ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR2BAL, s, + &dcr_read_pcie, &dcr_write_pcie); + ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR2MSKH, s, + &dcr_read_pcie, &dcr_write_pcie); + ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR2MSKL, s, + &dcr_read_pcie, &dcr_write_pcie); + ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR3BAH, s, + &dcr_read_pcie, &dcr_write_pcie); + ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR3BAL, s, + &dcr_read_pcie, &dcr_write_pcie); + ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR3MSKH, s, + &dcr_read_pcie, &dcr_write_pcie); + ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR3MSKL, s, + &dcr_read_pcie, &dcr_write_pcie); + ppc_dcr_register(env, s->dcrn_base + PEGPL_REGBAH, s, + &dcr_read_pcie, &dcr_write_pcie); + ppc_dcr_register(env, s->dcrn_base + PEGPL_REGBAL, s, + &dcr_read_pcie, &dcr_write_pcie); + ppc_dcr_register(env, s->dcrn_base + PEGPL_REGMSK, s, + &dcr_read_pcie, &dcr_write_pcie); + ppc_dcr_register(env, s->dcrn_base + PEGPL_SPECIAL, s, + &dcr_read_pcie, &dcr_write_pcie); + ppc_dcr_register(env, s->dcrn_base + PEGPL_CFG, s, + &dcr_read_pcie, &dcr_write_pcie); +} + +void ppc460ex_pcie_init(CPUPPCState *env) +{ + DeviceState *dev; + + dev = qdev_new(TYPE_PPC460EX_PCIE_HOST); + qdev_prop_set_int32(dev, "dcrn-base", DCRN_PCIE0_BASE); + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + ppc460ex_pcie_register_dcrs(PPC460EX_PCIE_HOST(dev), env); + + dev = qdev_new(TYPE_PPC460EX_PCIE_HOST); + qdev_prop_set_int32(dev, "dcrn-base", DCRN_PCIE1_BASE); + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + ppc460ex_pcie_register_dcrs(PPC460EX_PCIE_HOST(dev), env); +} diff --git a/hw/ppc/ppc4xx_devs.c b/hw/ppc/ppc4xx_devs.c new file mode 100644 index 000000000..980c48944 --- /dev/null +++ b/hw/ppc/ppc4xx_devs.c @@ -0,0 +1,715 @@ +/* + * QEMU PowerPC 4xx embedded processors shared devices emulation + * + * Copyright (c) 2007 Jocelyn Mayer + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "qemu/units.h" +#include "sysemu/reset.h" +#include "cpu.h" +#include "hw/irq.h" +#include "hw/ppc/ppc.h" +#include "hw/ppc/ppc4xx.h" +#include "hw/intc/ppc-uic.h" +#include "hw/qdev-properties.h" +#include "qemu/log.h" +#include "exec/address-spaces.h" +#include "qemu/error-report.h" +#include "qapi/error.h" + +/*#define DEBUG_UIC*/ + +#ifdef DEBUG_UIC +# define LOG_UIC(...) qemu_log_mask(CPU_LOG_INT, ## __VA_ARGS__) +#else +# define LOG_UIC(...) do { } while (0) +#endif + +static void ppc4xx_reset(void *opaque) +{ + PowerPCCPU *cpu = opaque; + + cpu_reset(CPU(cpu)); +} + +/*****************************************************************************/ +/* Generic PowerPC 4xx processor instantiation */ +PowerPCCPU *ppc4xx_init(const char *cpu_type, + clk_setup_t *cpu_clk, clk_setup_t *tb_clk, + uint32_t sysclk) +{ + PowerPCCPU *cpu; + CPUPPCState *env; + + /* init CPUs */ + cpu = POWERPC_CPU(cpu_create(cpu_type)); + env = &cpu->env; + + cpu_clk->cb = NULL; /* We don't care about CPU clock frequency changes */ + cpu_clk->opaque = env; + /* Set time-base frequency to sysclk */ + tb_clk->cb = ppc_40x_timers_init(env, sysclk, PPC_INTERRUPT_PIT); + tb_clk->opaque = env; + ppc_dcr_init(env, NULL, NULL); + /* Register qemu callbacks */ + qemu_register_reset(ppc4xx_reset, cpu); + + return cpu; +} + +/*****************************************************************************/ +/* SDRAM controller */ +typedef struct ppc4xx_sdram_t ppc4xx_sdram_t; +struct ppc4xx_sdram_t { + uint32_t addr; + int nbanks; + MemoryRegion containers[4]; /* used for clipping */ + MemoryRegion *ram_memories; + hwaddr ram_bases[4]; + hwaddr ram_sizes[4]; + uint32_t besr0; + uint32_t besr1; + uint32_t bear; + uint32_t cfg; + uint32_t status; + uint32_t rtr; + uint32_t pmit; + uint32_t bcr[4]; + uint32_t tr; + uint32_t ecccfg; + uint32_t eccesr; + qemu_irq irq; +}; + +enum { + SDRAM0_CFGADDR = 0x010, + SDRAM0_CFGDATA = 0x011, +}; + +/* XXX: TOFIX: some patches have made this code become inconsistent: + * there are type inconsistencies, mixing hwaddr, target_ulong + * and uint32_t + */ +static uint32_t sdram_bcr (hwaddr ram_base, + hwaddr ram_size) +{ + uint32_t bcr; + + switch (ram_size) { + case 4 * MiB: + bcr = 0x00000000; + break; + case 8 * MiB: + bcr = 0x00020000; + break; + case 16 * MiB: + bcr = 0x00040000; + break; + case 32 * MiB: + bcr = 0x00060000; + break; + case 64 * MiB: + bcr = 0x00080000; + break; + case 128 * MiB: + bcr = 0x000A0000; + break; + case 256 * MiB: + bcr = 0x000C0000; + break; + default: + printf("%s: invalid RAM size " TARGET_FMT_plx "\n", __func__, + ram_size); + return 0x00000000; + } + bcr |= ram_base & 0xFF800000; + bcr |= 1; + + return bcr; +} + +static inline hwaddr sdram_base(uint32_t bcr) +{ + return bcr & 0xFF800000; +} + +static target_ulong sdram_size (uint32_t bcr) +{ + target_ulong size; + int sh; + + sh = (bcr >> 17) & 0x7; + if (sh == 7) + size = -1; + else + size = (4 * MiB) << sh; + + return size; +} + +static void sdram_set_bcr(ppc4xx_sdram_t *sdram, int i, + uint32_t bcr, int enabled) +{ + if (sdram->bcr[i] & 0x00000001) { + /* Unmap RAM */ +#ifdef DEBUG_SDRAM + printf("%s: unmap RAM area " TARGET_FMT_plx " " TARGET_FMT_lx "\n", + __func__, sdram_base(sdram->bcr[i]), sdram_size(sdram->bcr[i])); +#endif + memory_region_del_subregion(get_system_memory(), + &sdram->containers[i]); + memory_region_del_subregion(&sdram->containers[i], + &sdram->ram_memories[i]); + object_unparent(OBJECT(&sdram->containers[i])); + } + sdram->bcr[i] = bcr & 0xFFDEE001; + if (enabled && (bcr & 0x00000001)) { +#ifdef DEBUG_SDRAM + printf("%s: Map RAM area " TARGET_FMT_plx " " TARGET_FMT_lx "\n", + __func__, sdram_base(bcr), sdram_size(bcr)); +#endif + memory_region_init(&sdram->containers[i], NULL, "sdram-containers", + sdram_size(bcr)); + memory_region_add_subregion(&sdram->containers[i], 0, + &sdram->ram_memories[i]); + memory_region_add_subregion(get_system_memory(), + sdram_base(bcr), + &sdram->containers[i]); + } +} + +static void sdram_map_bcr (ppc4xx_sdram_t *sdram) +{ + int i; + + for (i = 0; i < sdram->nbanks; i++) { + if (sdram->ram_sizes[i] != 0) { + sdram_set_bcr(sdram, i, sdram_bcr(sdram->ram_bases[i], + sdram->ram_sizes[i]), 1); + } else { + sdram_set_bcr(sdram, i, 0x00000000, 0); + } + } +} + +static void sdram_unmap_bcr (ppc4xx_sdram_t *sdram) +{ + int i; + + for (i = 0; i < sdram->nbanks; i++) { +#ifdef DEBUG_SDRAM + printf("%s: Unmap RAM area " TARGET_FMT_plx " " TARGET_FMT_lx "\n", + __func__, sdram_base(sdram->bcr[i]), sdram_size(sdram->bcr[i])); +#endif + memory_region_del_subregion(get_system_memory(), + &sdram->ram_memories[i]); + } +} + +static uint32_t dcr_read_sdram (void *opaque, int dcrn) +{ + ppc4xx_sdram_t *sdram; + uint32_t ret; + + sdram = opaque; + switch (dcrn) { + case SDRAM0_CFGADDR: + ret = sdram->addr; + break; + case SDRAM0_CFGDATA: + switch (sdram->addr) { + case 0x00: /* SDRAM_BESR0 */ + ret = sdram->besr0; + break; + case 0x08: /* SDRAM_BESR1 */ + ret = sdram->besr1; + break; + case 0x10: /* SDRAM_BEAR */ + ret = sdram->bear; + break; + case 0x20: /* SDRAM_CFG */ + ret = sdram->cfg; + break; + case 0x24: /* SDRAM_STATUS */ + ret = sdram->status; + break; + case 0x30: /* SDRAM_RTR */ + ret = sdram->rtr; + break; + case 0x34: /* SDRAM_PMIT */ + ret = sdram->pmit; + break; + case 0x40: /* SDRAM_B0CR */ + ret = sdram->bcr[0]; + break; + case 0x44: /* SDRAM_B1CR */ + ret = sdram->bcr[1]; + break; + case 0x48: /* SDRAM_B2CR */ + ret = sdram->bcr[2]; + break; + case 0x4C: /* SDRAM_B3CR */ + ret = sdram->bcr[3]; + break; + case 0x80: /* SDRAM_TR */ + ret = -1; /* ? */ + break; + case 0x94: /* SDRAM_ECCCFG */ + ret = sdram->ecccfg; + break; + case 0x98: /* SDRAM_ECCESR */ + ret = sdram->eccesr; + break; + default: /* Error */ + ret = -1; + break; + } + break; + default: + /* Avoid gcc warning */ + ret = 0x00000000; + break; + } + + return ret; +} + +static void dcr_write_sdram (void *opaque, int dcrn, uint32_t val) +{ + ppc4xx_sdram_t *sdram; + + sdram = opaque; + switch (dcrn) { + case SDRAM0_CFGADDR: + sdram->addr = val; + break; + case SDRAM0_CFGDATA: + switch (sdram->addr) { + case 0x00: /* SDRAM_BESR0 */ + sdram->besr0 &= ~val; + break; + case 0x08: /* SDRAM_BESR1 */ + sdram->besr1 &= ~val; + break; + case 0x10: /* SDRAM_BEAR */ + sdram->bear = val; + break; + case 0x20: /* SDRAM_CFG */ + val &= 0xFFE00000; + if (!(sdram->cfg & 0x80000000) && (val & 0x80000000)) { +#ifdef DEBUG_SDRAM + printf("%s: enable SDRAM controller\n", __func__); +#endif + /* validate all RAM mappings */ + sdram_map_bcr(sdram); + sdram->status &= ~0x80000000; + } else if ((sdram->cfg & 0x80000000) && !(val & 0x80000000)) { +#ifdef DEBUG_SDRAM + printf("%s: disable SDRAM controller\n", __func__); +#endif + /* invalidate all RAM mappings */ + sdram_unmap_bcr(sdram); + sdram->status |= 0x80000000; + } + if (!(sdram->cfg & 0x40000000) && (val & 0x40000000)) + sdram->status |= 0x40000000; + else if ((sdram->cfg & 0x40000000) && !(val & 0x40000000)) + sdram->status &= ~0x40000000; + sdram->cfg = val; + break; + case 0x24: /* SDRAM_STATUS */ + /* Read-only register */ + break; + case 0x30: /* SDRAM_RTR */ + sdram->rtr = val & 0x3FF80000; + break; + case 0x34: /* SDRAM_PMIT */ + sdram->pmit = (val & 0xF8000000) | 0x07C00000; + break; + case 0x40: /* SDRAM_B0CR */ + sdram_set_bcr(sdram, 0, val, sdram->cfg & 0x80000000); + break; + case 0x44: /* SDRAM_B1CR */ + sdram_set_bcr(sdram, 1, val, sdram->cfg & 0x80000000); + break; + case 0x48: /* SDRAM_B2CR */ + sdram_set_bcr(sdram, 2, val, sdram->cfg & 0x80000000); + break; + case 0x4C: /* SDRAM_B3CR */ + sdram_set_bcr(sdram, 3, val, sdram->cfg & 0x80000000); + break; + case 0x80: /* SDRAM_TR */ + sdram->tr = val & 0x018FC01F; + break; + case 0x94: /* SDRAM_ECCCFG */ + sdram->ecccfg = val & 0x00F00000; + break; + case 0x98: /* SDRAM_ECCESR */ + val &= 0xFFF0F000; + if (sdram->eccesr == 0 && val != 0) + qemu_irq_raise(sdram->irq); + else if (sdram->eccesr != 0 && val == 0) + qemu_irq_lower(sdram->irq); + sdram->eccesr = val; + break; + default: /* Error */ + break; + } + break; + } +} + +static void sdram_reset (void *opaque) +{ + ppc4xx_sdram_t *sdram; + + sdram = opaque; + sdram->addr = 0x00000000; + sdram->bear = 0x00000000; + sdram->besr0 = 0x00000000; /* No error */ + sdram->besr1 = 0x00000000; /* No error */ + sdram->cfg = 0x00000000; + sdram->ecccfg = 0x00000000; /* No ECC */ + sdram->eccesr = 0x00000000; /* No error */ + sdram->pmit = 0x07C00000; + sdram->rtr = 0x05F00000; + sdram->tr = 0x00854009; + /* We pre-initialize RAM banks */ + sdram->status = 0x00000000; + sdram->cfg = 0x00800000; +} + +void ppc4xx_sdram_init (CPUPPCState *env, qemu_irq irq, int nbanks, + MemoryRegion *ram_memories, + hwaddr *ram_bases, + hwaddr *ram_sizes, + int do_init) +{ + ppc4xx_sdram_t *sdram; + + sdram = g_malloc0(sizeof(ppc4xx_sdram_t)); + sdram->irq = irq; + sdram->nbanks = nbanks; + sdram->ram_memories = ram_memories; + memset(sdram->ram_bases, 0, 4 * sizeof(hwaddr)); + memcpy(sdram->ram_bases, ram_bases, + nbanks * sizeof(hwaddr)); + memset(sdram->ram_sizes, 0, 4 * sizeof(hwaddr)); + memcpy(sdram->ram_sizes, ram_sizes, + nbanks * sizeof(hwaddr)); + qemu_register_reset(&sdram_reset, sdram); + ppc_dcr_register(env, SDRAM0_CFGADDR, + sdram, &dcr_read_sdram, &dcr_write_sdram); + ppc_dcr_register(env, SDRAM0_CFGDATA, + sdram, &dcr_read_sdram, &dcr_write_sdram); + if (do_init) + sdram_map_bcr(sdram); +} + +/* + * Split RAM between SDRAM banks. + * + * sdram_bank_sizes[] must be in descending order, that is sizes[i] > sizes[i+1] + * and must be 0-terminated. + * + * The 4xx SDRAM controller supports a small number of banks, and each bank + * must be one of a small set of sizes. The number of banks and the supported + * sizes varies by SoC. + */ +void ppc4xx_sdram_banks(MemoryRegion *ram, int nr_banks, + MemoryRegion ram_memories[], + hwaddr ram_bases[], hwaddr ram_sizes[], + const ram_addr_t sdram_bank_sizes[]) +{ + ram_addr_t size_left = memory_region_size(ram); + ram_addr_t base = 0; + ram_addr_t bank_size; + int i; + int j; + + for (i = 0; i < nr_banks; i++) { + for (j = 0; sdram_bank_sizes[j] != 0; j++) { + bank_size = sdram_bank_sizes[j]; + if (bank_size <= size_left) { + char name[32]; + + ram_bases[i] = base; + ram_sizes[i] = bank_size; + base += bank_size; + size_left -= bank_size; + snprintf(name, sizeof(name), "ppc4xx.sdram%d", i); + memory_region_init_alias(&ram_memories[i], NULL, name, ram, + ram_bases[i], ram_sizes[i]); + break; + } + } + if (!size_left) { + /* No need to use the remaining banks. */ + break; + } + } + + if (size_left) { + ram_addr_t used_size = memory_region_size(ram) - size_left; + GString *s = g_string_new(NULL); + + for (i = 0; sdram_bank_sizes[i]; i++) { + g_string_append_printf(s, "%" PRIi64 "%s", + sdram_bank_sizes[i] / MiB, + sdram_bank_sizes[i + 1] ? ", " : ""); + } + error_report("at most %d bank%s of %s MiB each supported", + nr_banks, nr_banks == 1 ? "" : "s", s->str); + error_printf("Possible valid RAM size: %" PRIi64 " MiB \n", + used_size ? used_size / MiB : sdram_bank_sizes[i - 1] / MiB); + + g_string_free(s, true); + exit(EXIT_FAILURE); + } +} + +/*****************************************************************************/ +/* MAL */ + +enum { + MAL0_CFG = 0x180, + MAL0_ESR = 0x181, + MAL0_IER = 0x182, + MAL0_TXCASR = 0x184, + MAL0_TXCARR = 0x185, + MAL0_TXEOBISR = 0x186, + MAL0_TXDEIR = 0x187, + MAL0_RXCASR = 0x190, + MAL0_RXCARR = 0x191, + MAL0_RXEOBISR = 0x192, + MAL0_RXDEIR = 0x193, + MAL0_TXCTP0R = 0x1A0, + MAL0_RXCTP0R = 0x1C0, + MAL0_RCBS0 = 0x1E0, + MAL0_RCBS1 = 0x1E1, +}; + +typedef struct ppc4xx_mal_t ppc4xx_mal_t; +struct ppc4xx_mal_t { + qemu_irq irqs[4]; + uint32_t cfg; + uint32_t esr; + uint32_t ier; + uint32_t txcasr; + uint32_t txcarr; + uint32_t txeobisr; + uint32_t txdeir; + uint32_t rxcasr; + uint32_t rxcarr; + uint32_t rxeobisr; + uint32_t rxdeir; + uint32_t *txctpr; + uint32_t *rxctpr; + uint32_t *rcbs; + uint8_t txcnum; + uint8_t rxcnum; +}; + +static void ppc4xx_mal_reset(void *opaque) +{ + ppc4xx_mal_t *mal; + + mal = opaque; + mal->cfg = 0x0007C000; + mal->esr = 0x00000000; + mal->ier = 0x00000000; + mal->rxcasr = 0x00000000; + mal->rxdeir = 0x00000000; + mal->rxeobisr = 0x00000000; + mal->txcasr = 0x00000000; + mal->txdeir = 0x00000000; + mal->txeobisr = 0x00000000; +} + +static uint32_t dcr_read_mal(void *opaque, int dcrn) +{ + ppc4xx_mal_t *mal; + uint32_t ret; + + mal = opaque; + switch (dcrn) { + case MAL0_CFG: + ret = mal->cfg; + break; + case MAL0_ESR: + ret = mal->esr; + break; + case MAL0_IER: + ret = mal->ier; + break; + case MAL0_TXCASR: + ret = mal->txcasr; + break; + case MAL0_TXCARR: + ret = mal->txcarr; + break; + case MAL0_TXEOBISR: + ret = mal->txeobisr; + break; + case MAL0_TXDEIR: + ret = mal->txdeir; + break; + case MAL0_RXCASR: + ret = mal->rxcasr; + break; + case MAL0_RXCARR: + ret = mal->rxcarr; + break; + case MAL0_RXEOBISR: + ret = mal->rxeobisr; + break; + case MAL0_RXDEIR: + ret = mal->rxdeir; + break; + default: + ret = 0; + break; + } + if (dcrn >= MAL0_TXCTP0R && dcrn < MAL0_TXCTP0R + mal->txcnum) { + ret = mal->txctpr[dcrn - MAL0_TXCTP0R]; + } + if (dcrn >= MAL0_RXCTP0R && dcrn < MAL0_RXCTP0R + mal->rxcnum) { + ret = mal->rxctpr[dcrn - MAL0_RXCTP0R]; + } + if (dcrn >= MAL0_RCBS0 && dcrn < MAL0_RCBS0 + mal->rxcnum) { + ret = mal->rcbs[dcrn - MAL0_RCBS0]; + } + + return ret; +} + +static void dcr_write_mal(void *opaque, int dcrn, uint32_t val) +{ + ppc4xx_mal_t *mal; + + mal = opaque; + switch (dcrn) { + case MAL0_CFG: + if (val & 0x80000000) { + ppc4xx_mal_reset(mal); + } + mal->cfg = val & 0x00FFC087; + break; + case MAL0_ESR: + /* Read/clear */ + mal->esr &= ~val; + break; + case MAL0_IER: + mal->ier = val & 0x0000001F; + break; + case MAL0_TXCASR: + mal->txcasr = val & 0xF0000000; + break; + case MAL0_TXCARR: + mal->txcarr = val & 0xF0000000; + break; + case MAL0_TXEOBISR: + /* Read/clear */ + mal->txeobisr &= ~val; + break; + case MAL0_TXDEIR: + /* Read/clear */ + mal->txdeir &= ~val; + break; + case MAL0_RXCASR: + mal->rxcasr = val & 0xC0000000; + break; + case MAL0_RXCARR: + mal->rxcarr = val & 0xC0000000; + break; + case MAL0_RXEOBISR: + /* Read/clear */ + mal->rxeobisr &= ~val; + break; + case MAL0_RXDEIR: + /* Read/clear */ + mal->rxdeir &= ~val; + break; + } + if (dcrn >= MAL0_TXCTP0R && dcrn < MAL0_TXCTP0R + mal->txcnum) { + mal->txctpr[dcrn - MAL0_TXCTP0R] = val; + } + if (dcrn >= MAL0_RXCTP0R && dcrn < MAL0_RXCTP0R + mal->rxcnum) { + mal->rxctpr[dcrn - MAL0_RXCTP0R] = val; + } + if (dcrn >= MAL0_RCBS0 && dcrn < MAL0_RCBS0 + mal->rxcnum) { + mal->rcbs[dcrn - MAL0_RCBS0] = val & 0x000000FF; + } +} + +void ppc4xx_mal_init(CPUPPCState *env, uint8_t txcnum, uint8_t rxcnum, + qemu_irq irqs[4]) +{ + ppc4xx_mal_t *mal; + int i; + + assert(txcnum <= 32 && rxcnum <= 32); + mal = g_malloc0(sizeof(*mal)); + mal->txcnum = txcnum; + mal->rxcnum = rxcnum; + mal->txctpr = g_new0(uint32_t, txcnum); + mal->rxctpr = g_new0(uint32_t, rxcnum); + mal->rcbs = g_new0(uint32_t, rxcnum); + for (i = 0; i < 4; i++) { + mal->irqs[i] = irqs[i]; + } + qemu_register_reset(&ppc4xx_mal_reset, mal); + ppc_dcr_register(env, MAL0_CFG, + mal, &dcr_read_mal, &dcr_write_mal); + ppc_dcr_register(env, MAL0_ESR, + mal, &dcr_read_mal, &dcr_write_mal); + ppc_dcr_register(env, MAL0_IER, + mal, &dcr_read_mal, &dcr_write_mal); + ppc_dcr_register(env, MAL0_TXCASR, + mal, &dcr_read_mal, &dcr_write_mal); + ppc_dcr_register(env, MAL0_TXCARR, + mal, &dcr_read_mal, &dcr_write_mal); + ppc_dcr_register(env, MAL0_TXEOBISR, + mal, &dcr_read_mal, &dcr_write_mal); + ppc_dcr_register(env, MAL0_TXDEIR, + mal, &dcr_read_mal, &dcr_write_mal); + ppc_dcr_register(env, MAL0_RXCASR, + mal, &dcr_read_mal, &dcr_write_mal); + ppc_dcr_register(env, MAL0_RXCARR, + mal, &dcr_read_mal, &dcr_write_mal); + ppc_dcr_register(env, MAL0_RXEOBISR, + mal, &dcr_read_mal, &dcr_write_mal); + ppc_dcr_register(env, MAL0_RXDEIR, + mal, &dcr_read_mal, &dcr_write_mal); + for (i = 0; i < txcnum; i++) { + ppc_dcr_register(env, MAL0_TXCTP0R + i, + mal, &dcr_read_mal, &dcr_write_mal); + } + for (i = 0; i < rxcnum; i++) { + ppc_dcr_register(env, MAL0_RXCTP0R + i, + mal, &dcr_read_mal, &dcr_write_mal); + } + for (i = 0; i < rxcnum; i++) { + ppc_dcr_register(env, MAL0_RCBS0 + i, + mal, &dcr_read_mal, &dcr_write_mal); + } +} diff --git a/hw/ppc/ppc4xx_pci.c b/hw/ppc/ppc4xx_pci.c new file mode 100644 index 000000000..304a29349 --- /dev/null +++ b/hw/ppc/ppc4xx_pci.c @@ -0,0 +1,389 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + * + * Copyright IBM Corp. 2008 + * + * Authors: Hollis Blanchard <hollisb@us.ibm.com> + */ + +/* This file implements emulation of the 32-bit PCI controller found in some + * 4xx SoCs, such as the 440EP. */ + +#include "qemu/osdep.h" +#include "hw/irq.h" +#include "hw/ppc/ppc.h" +#include "hw/ppc/ppc4xx.h" +#include "migration/vmstate.h" +#include "qemu/module.h" +#include "sysemu/reset.h" +#include "hw/pci/pci.h" +#include "hw/pci/pci_host.h" +#include "trace.h" +#include "qom/object.h" + +struct PCIMasterMap { + uint32_t la; + uint32_t ma; + uint32_t pcila; + uint32_t pciha; +}; + +struct PCITargetMap { + uint32_t ms; + uint32_t la; +}; + +OBJECT_DECLARE_SIMPLE_TYPE(PPC4xxPCIState, PPC4xx_PCI_HOST_BRIDGE) + +#define PPC4xx_PCI_NR_PMMS 3 +#define PPC4xx_PCI_NR_PTMS 2 + +#define PPC4xx_PCI_NUM_DEVS 5 + +struct PPC4xxPCIState { + PCIHostState parent_obj; + + struct PCIMasterMap pmm[PPC4xx_PCI_NR_PMMS]; + struct PCITargetMap ptm[PPC4xx_PCI_NR_PTMS]; + qemu_irq irq[PPC4xx_PCI_NUM_DEVS]; + + MemoryRegion container; + MemoryRegion iomem; +}; + +#define PCIC0_CFGADDR 0x0 +#define PCIC0_CFGDATA 0x4 + +/* PLB Memory Map (PMM) registers specify which PLB addresses are translated to + * PCI accesses. */ +#define PCIL0_PMM0LA 0x0 +#define PCIL0_PMM0MA 0x4 +#define PCIL0_PMM0PCILA 0x8 +#define PCIL0_PMM0PCIHA 0xc +#define PCIL0_PMM1LA 0x10 +#define PCIL0_PMM1MA 0x14 +#define PCIL0_PMM1PCILA 0x18 +#define PCIL0_PMM1PCIHA 0x1c +#define PCIL0_PMM2LA 0x20 +#define PCIL0_PMM2MA 0x24 +#define PCIL0_PMM2PCILA 0x28 +#define PCIL0_PMM2PCIHA 0x2c + +/* PCI Target Map (PTM) registers specify which PCI addresses are translated to + * PLB accesses. */ +#define PCIL0_PTM1MS 0x30 +#define PCIL0_PTM1LA 0x34 +#define PCIL0_PTM2MS 0x38 +#define PCIL0_PTM2LA 0x3c +#define PCI_REG_BASE 0x800000 +#define PCI_REG_SIZE 0x40 + +#define PCI_ALL_SIZE (PCI_REG_BASE + PCI_REG_SIZE) + +static void ppc4xx_pci_reg_write4(void *opaque, hwaddr offset, + uint64_t value, unsigned size) +{ + struct PPC4xxPCIState *pci = opaque; + + /* We ignore all target attempts at PCI configuration, effectively + * assuming a bidirectional 1:1 mapping of PLB and PCI space. */ + + switch (offset) { + case PCIL0_PMM0LA: + pci->pmm[0].la = value; + break; + case PCIL0_PMM0MA: + pci->pmm[0].ma = value; + break; + case PCIL0_PMM0PCIHA: + pci->pmm[0].pciha = value; + break; + case PCIL0_PMM0PCILA: + pci->pmm[0].pcila = value; + break; + + case PCIL0_PMM1LA: + pci->pmm[1].la = value; + break; + case PCIL0_PMM1MA: + pci->pmm[1].ma = value; + break; + case PCIL0_PMM1PCIHA: + pci->pmm[1].pciha = value; + break; + case PCIL0_PMM1PCILA: + pci->pmm[1].pcila = value; + break; + + case PCIL0_PMM2LA: + pci->pmm[2].la = value; + break; + case PCIL0_PMM2MA: + pci->pmm[2].ma = value; + break; + case PCIL0_PMM2PCIHA: + pci->pmm[2].pciha = value; + break; + case PCIL0_PMM2PCILA: + pci->pmm[2].pcila = value; + break; + + case PCIL0_PTM1MS: + pci->ptm[0].ms = value; + break; + case PCIL0_PTM1LA: + pci->ptm[0].la = value; + break; + case PCIL0_PTM2MS: + pci->ptm[1].ms = value; + break; + case PCIL0_PTM2LA: + pci->ptm[1].la = value; + break; + + default: + printf("%s: unhandled PCI internal register 0x%lx\n", __func__, + (unsigned long)offset); + break; + } +} + +static uint64_t ppc4xx_pci_reg_read4(void *opaque, hwaddr offset, + unsigned size) +{ + struct PPC4xxPCIState *pci = opaque; + uint32_t value; + + switch (offset) { + case PCIL0_PMM0LA: + value = pci->pmm[0].la; + break; + case PCIL0_PMM0MA: + value = pci->pmm[0].ma; + break; + case PCIL0_PMM0PCIHA: + value = pci->pmm[0].pciha; + break; + case PCIL0_PMM0PCILA: + value = pci->pmm[0].pcila; + break; + + case PCIL0_PMM1LA: + value = pci->pmm[1].la; + break; + case PCIL0_PMM1MA: + value = pci->pmm[1].ma; + break; + case PCIL0_PMM1PCIHA: + value = pci->pmm[1].pciha; + break; + case PCIL0_PMM1PCILA: + value = pci->pmm[1].pcila; + break; + + case PCIL0_PMM2LA: + value = pci->pmm[2].la; + break; + case PCIL0_PMM2MA: + value = pci->pmm[2].ma; + break; + case PCIL0_PMM2PCIHA: + value = pci->pmm[2].pciha; + break; + case PCIL0_PMM2PCILA: + value = pci->pmm[2].pcila; + break; + + case PCIL0_PTM1MS: + value = pci->ptm[0].ms; + break; + case PCIL0_PTM1LA: + value = pci->ptm[0].la; + break; + case PCIL0_PTM2MS: + value = pci->ptm[1].ms; + break; + case PCIL0_PTM2LA: + value = pci->ptm[1].la; + break; + + default: + printf("%s: invalid PCI internal register 0x%lx\n", __func__, + (unsigned long)offset); + value = 0; + } + + return value; +} + +static const MemoryRegionOps pci_reg_ops = { + .read = ppc4xx_pci_reg_read4, + .write = ppc4xx_pci_reg_write4, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +static void ppc4xx_pci_reset(void *opaque) +{ + struct PPC4xxPCIState *pci = opaque; + + memset(pci->pmm, 0, sizeof(pci->pmm)); + memset(pci->ptm, 0, sizeof(pci->ptm)); +} + +/* On Bamboo, all pins from each slot are tied to a single board IRQ. This + * may need further refactoring for other boards. */ +static int ppc4xx_pci_map_irq(PCIDevice *pci_dev, int irq_num) +{ + int slot = PCI_SLOT(pci_dev->devfn); + + trace_ppc4xx_pci_map_irq(pci_dev->devfn, irq_num, slot); + + return slot > 0 ? slot - 1 : PPC4xx_PCI_NUM_DEVS - 1; +} + +static void ppc4xx_pci_set_irq(void *opaque, int irq_num, int level) +{ + qemu_irq *pci_irqs = opaque; + + trace_ppc4xx_pci_set_irq(irq_num); + assert(irq_num >= 0 && irq_num < PPC4xx_PCI_NUM_DEVS); + qemu_set_irq(pci_irqs[irq_num], level); +} + +static const VMStateDescription vmstate_pci_master_map = { + .name = "pci_master_map", + .version_id = 0, + .minimum_version_id = 0, + .fields = (VMStateField[]) { + VMSTATE_UINT32(la, struct PCIMasterMap), + VMSTATE_UINT32(ma, struct PCIMasterMap), + VMSTATE_UINT32(pcila, struct PCIMasterMap), + VMSTATE_UINT32(pciha, struct PCIMasterMap), + VMSTATE_END_OF_LIST() + } +}; + +static const VMStateDescription vmstate_pci_target_map = { + .name = "pci_target_map", + .version_id = 0, + .minimum_version_id = 0, + .fields = (VMStateField[]) { + VMSTATE_UINT32(ms, struct PCITargetMap), + VMSTATE_UINT32(la, struct PCITargetMap), + VMSTATE_END_OF_LIST() + } +}; + +static const VMStateDescription vmstate_ppc4xx_pci = { + .name = "ppc4xx_pci", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_STRUCT_ARRAY(pmm, PPC4xxPCIState, PPC4xx_PCI_NR_PMMS, 1, + vmstate_pci_master_map, + struct PCIMasterMap), + VMSTATE_STRUCT_ARRAY(ptm, PPC4xxPCIState, PPC4xx_PCI_NR_PTMS, 1, + vmstate_pci_target_map, + struct PCITargetMap), + VMSTATE_END_OF_LIST() + } +}; + +/* XXX Interrupt acknowledge cycles not supported. */ +static void ppc4xx_pcihost_realize(DeviceState *dev, Error **errp) +{ + SysBusDevice *sbd = SYS_BUS_DEVICE(dev); + PPC4xxPCIState *s; + PCIHostState *h; + PCIBus *b; + int i; + + h = PCI_HOST_BRIDGE(dev); + s = PPC4xx_PCI_HOST_BRIDGE(dev); + + for (i = 0; i < ARRAY_SIZE(s->irq); i++) { + sysbus_init_irq(sbd, &s->irq[i]); + } + + b = pci_register_root_bus(dev, NULL, ppc4xx_pci_set_irq, + ppc4xx_pci_map_irq, s->irq, get_system_memory(), + get_system_io(), 0, ARRAY_SIZE(s->irq), + TYPE_PCI_BUS); + h->bus = b; + + pci_create_simple(b, 0, "ppc4xx-host-bridge"); + + /* XXX split into 2 memory regions, one for config space, one for regs */ + memory_region_init(&s->container, OBJECT(s), "pci-container", PCI_ALL_SIZE); + memory_region_init_io(&h->conf_mem, OBJECT(s), &pci_host_conf_le_ops, h, + "pci-conf-idx", 4); + memory_region_init_io(&h->data_mem, OBJECT(s), &pci_host_data_le_ops, h, + "pci-conf-data", 4); + memory_region_init_io(&s->iomem, OBJECT(s), &pci_reg_ops, s, + "pci.reg", PCI_REG_SIZE); + memory_region_add_subregion(&s->container, PCIC0_CFGADDR, &h->conf_mem); + memory_region_add_subregion(&s->container, PCIC0_CFGDATA, &h->data_mem); + memory_region_add_subregion(&s->container, PCI_REG_BASE, &s->iomem); + sysbus_init_mmio(sbd, &s->container); + qemu_register_reset(ppc4xx_pci_reset, s); +} + +static void ppc4xx_host_bridge_class_init(ObjectClass *klass, void *data) +{ + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->desc = "Host bridge"; + k->vendor_id = PCI_VENDOR_ID_IBM; + k->device_id = PCI_DEVICE_ID_IBM_440GX; + k->class_id = PCI_CLASS_BRIDGE_OTHER; + /* + * PCI-facing part of the host bridge, not usable without the + * host-facing part, which can't be device_add'ed, yet. + */ + dc->user_creatable = false; +} + +static const TypeInfo ppc4xx_host_bridge_info = { + .name = "ppc4xx-host-bridge", + .parent = TYPE_PCI_DEVICE, + .instance_size = sizeof(PCIDevice), + .class_init = ppc4xx_host_bridge_class_init, + .interfaces = (InterfaceInfo[]) { + { INTERFACE_CONVENTIONAL_PCI_DEVICE }, + { }, + }, +}; + +static void ppc4xx_pcihost_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->realize = ppc4xx_pcihost_realize; + dc->vmsd = &vmstate_ppc4xx_pci; +} + +static const TypeInfo ppc4xx_pcihost_info = { + .name = TYPE_PPC4xx_PCI_HOST_BRIDGE, + .parent = TYPE_PCI_HOST_BRIDGE, + .instance_size = sizeof(PPC4xxPCIState), + .class_init = ppc4xx_pcihost_class_init, +}; + +static void ppc4xx_pci_register_types(void) +{ + type_register_static(&ppc4xx_pcihost_info); + type_register_static(&ppc4xx_host_bridge_info); +} + +type_init(ppc4xx_pci_register_types) diff --git a/hw/ppc/ppc_booke.c b/hw/ppc/ppc_booke.c new file mode 100644 index 000000000..10b643861 --- /dev/null +++ b/hw/ppc/ppc_booke.c @@ -0,0 +1,369 @@ +/* + * QEMU PowerPC Booke hardware System Emulator + * + * Copyright (c) 2011 AdaCore + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "hw/ppc/ppc.h" +#include "qemu/timer.h" +#include "sysemu/reset.h" +#include "sysemu/runstate.h" +#include "hw/loader.h" +#include "kvm_ppc.h" + + +/* Timer Control Register */ + +#define TCR_WP_SHIFT 30 /* Watchdog Timer Period */ +#define TCR_WP_MASK (0x3U << TCR_WP_SHIFT) +#define TCR_WRC_SHIFT 28 /* Watchdog Timer Reset Control */ +#define TCR_WRC_MASK (0x3U << TCR_WRC_SHIFT) +#define TCR_WIE (1U << 27) /* Watchdog Timer Interrupt Enable */ +#define TCR_DIE (1U << 26) /* Decrementer Interrupt Enable */ +#define TCR_FP_SHIFT 24 /* Fixed-Interval Timer Period */ +#define TCR_FP_MASK (0x3U << TCR_FP_SHIFT) +#define TCR_FIE (1U << 23) /* Fixed-Interval Timer Interrupt Enable */ +#define TCR_ARE (1U << 22) /* Auto-Reload Enable */ + +/* Timer Control Register (e500 specific fields) */ + +#define TCR_E500_FPEXT_SHIFT 13 /* Fixed-Interval Timer Period Extension */ +#define TCR_E500_FPEXT_MASK (0xf << TCR_E500_FPEXT_SHIFT) +#define TCR_E500_WPEXT_SHIFT 17 /* Watchdog Timer Period Extension */ +#define TCR_E500_WPEXT_MASK (0xf << TCR_E500_WPEXT_SHIFT) + +/* Timer Status Register */ + +#define TSR_FIS (1U << 26) /* Fixed-Interval Timer Interrupt Status */ +#define TSR_DIS (1U << 27) /* Decrementer Interrupt Status */ +#define TSR_WRS_SHIFT 28 /* Watchdog Timer Reset Status */ +#define TSR_WRS_MASK (0x3U << TSR_WRS_SHIFT) +#define TSR_WIS (1U << 30) /* Watchdog Timer Interrupt Status */ +#define TSR_ENW (1U << 31) /* Enable Next Watchdog Timer */ + +typedef struct booke_timer_t booke_timer_t; +struct booke_timer_t { + + uint64_t fit_next; + QEMUTimer *fit_timer; + + uint64_t wdt_next; + QEMUTimer *wdt_timer; + + uint32_t flags; +}; + +static void booke_update_irq(PowerPCCPU *cpu) +{ + CPUPPCState *env = &cpu->env; + + ppc_set_irq(cpu, PPC_INTERRUPT_DECR, + (env->spr[SPR_BOOKE_TSR] & TSR_DIS + && env->spr[SPR_BOOKE_TCR] & TCR_DIE)); + + ppc_set_irq(cpu, PPC_INTERRUPT_WDT, + (env->spr[SPR_BOOKE_TSR] & TSR_WIS + && env->spr[SPR_BOOKE_TCR] & TCR_WIE)); + + ppc_set_irq(cpu, PPC_INTERRUPT_FIT, + (env->spr[SPR_BOOKE_TSR] & TSR_FIS + && env->spr[SPR_BOOKE_TCR] & TCR_FIE)); +} + +/* Return the location of the bit of time base at which the FIT will raise an + interrupt */ +static uint8_t booke_get_fit_target(CPUPPCState *env, ppc_tb_t *tb_env) +{ + uint8_t fp = (env->spr[SPR_BOOKE_TCR] & TCR_FP_MASK) >> TCR_FP_SHIFT; + + if (tb_env->flags & PPC_TIMER_E500) { + /* e500 Fixed-interval timer period extension */ + uint32_t fpext = (env->spr[SPR_BOOKE_TCR] & TCR_E500_FPEXT_MASK) + >> TCR_E500_FPEXT_SHIFT; + fp = 63 - (fp | fpext << 2); + } else { + fp = env->fit_period[fp]; + } + + return fp; +} + +/* Return the location of the bit of time base at which the WDT will raise an + interrupt */ +static uint8_t booke_get_wdt_target(CPUPPCState *env, ppc_tb_t *tb_env) +{ + uint8_t wp = (env->spr[SPR_BOOKE_TCR] & TCR_WP_MASK) >> TCR_WP_SHIFT; + + if (tb_env->flags & PPC_TIMER_E500) { + /* e500 Watchdog timer period extension */ + uint32_t wpext = (env->spr[SPR_BOOKE_TCR] & TCR_E500_WPEXT_MASK) + >> TCR_E500_WPEXT_SHIFT; + wp = 63 - (wp | wpext << 2); + } else { + wp = env->wdt_period[wp]; + } + + return wp; +} + +static void booke_update_fixed_timer(CPUPPCState *env, + uint8_t target_bit, + uint64_t *next, + QEMUTimer *timer, + int tsr_bit) +{ + ppc_tb_t *tb_env = env->tb_env; + uint64_t delta_tick, ticks = 0; + uint64_t tb; + uint64_t period; + uint64_t now; + + if (!(env->spr[SPR_BOOKE_TSR] & tsr_bit)) { + /* + * Don't arm the timer again when the guest has the current + * interrupt still pending. Wait for it to ack it. + */ + return; + } + + now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + tb = cpu_ppc_get_tb(tb_env, now, tb_env->tb_offset); + period = 1ULL << target_bit; + delta_tick = period - (tb & (period - 1)); + + /* the timer triggers only when the selected bit toggles from 0 to 1 */ + if (tb & period) { + ticks = period; + } + + if (ticks + delta_tick < ticks) { + /* Overflow, so assume the biggest number we can express. */ + ticks = UINT64_MAX; + } else { + ticks += delta_tick; + } + + *next = now + muldiv64(ticks, NANOSECONDS_PER_SECOND, tb_env->tb_freq); + if ((*next < now) || (*next > INT64_MAX)) { + /* Overflow, so assume the biggest number the qemu timer supports. */ + *next = INT64_MAX; + } + + /* XXX: If expire time is now. We can't run the callback because we don't + * have access to it. So we just set the timer one nanosecond later. + */ + + if (*next == now) { + (*next)++; + } else { + /* + * There's no point to fake any granularity that's more fine grained + * than milliseconds. Anything beyond that just overloads the system. + */ + *next = MAX(*next, now + SCALE_MS); + } + + /* Fire the next timer */ + timer_mod(timer, *next); +} + +static void booke_decr_cb(void *opaque) +{ + PowerPCCPU *cpu = opaque; + CPUPPCState *env = &cpu->env; + + env->spr[SPR_BOOKE_TSR] |= TSR_DIS; + booke_update_irq(cpu); + + if (env->spr[SPR_BOOKE_TCR] & TCR_ARE) { + /* Do not reload 0, it is already there. It would just trigger + * the timer again and lead to infinite loop */ + if (env->spr[SPR_BOOKE_DECAR] != 0) { + /* Auto Reload */ + cpu_ppc_store_decr(env, env->spr[SPR_BOOKE_DECAR]); + } + } +} + +static void booke_fit_cb(void *opaque) +{ + PowerPCCPU *cpu = opaque; + CPUPPCState *env = &cpu->env; + ppc_tb_t *tb_env; + booke_timer_t *booke_timer; + + tb_env = env->tb_env; + booke_timer = tb_env->opaque; + env->spr[SPR_BOOKE_TSR] |= TSR_FIS; + + booke_update_irq(cpu); + + booke_update_fixed_timer(env, + booke_get_fit_target(env, tb_env), + &booke_timer->fit_next, + booke_timer->fit_timer, + TSR_FIS); +} + +static void booke_wdt_cb(void *opaque) +{ + PowerPCCPU *cpu = opaque; + CPUPPCState *env = &cpu->env; + ppc_tb_t *tb_env; + booke_timer_t *booke_timer; + + tb_env = env->tb_env; + booke_timer = tb_env->opaque; + + /* TODO: There's lots of complicated stuff to do here */ + + booke_update_irq(cpu); + + booke_update_fixed_timer(env, + booke_get_wdt_target(env, tb_env), + &booke_timer->wdt_next, + booke_timer->wdt_timer, + TSR_WIS); +} + +void store_booke_tsr(CPUPPCState *env, target_ulong val) +{ + PowerPCCPU *cpu = env_archcpu(env); + ppc_tb_t *tb_env = env->tb_env; + booke_timer_t *booke_timer = tb_env->opaque; + + env->spr[SPR_BOOKE_TSR] &= ~val; + kvmppc_clear_tsr_bits(cpu, val); + + if (val & TSR_FIS) { + booke_update_fixed_timer(env, + booke_get_fit_target(env, tb_env), + &booke_timer->fit_next, + booke_timer->fit_timer, + TSR_FIS); + } + + if (val & TSR_WIS) { + booke_update_fixed_timer(env, + booke_get_wdt_target(env, tb_env), + &booke_timer->wdt_next, + booke_timer->wdt_timer, + TSR_WIS); + } + + booke_update_irq(cpu); +} + +void store_booke_tcr(CPUPPCState *env, target_ulong val) +{ + PowerPCCPU *cpu = env_archcpu(env); + ppc_tb_t *tb_env = env->tb_env; + booke_timer_t *booke_timer = tb_env->opaque; + + env->spr[SPR_BOOKE_TCR] = val; + kvmppc_set_tcr(cpu); + + booke_update_irq(cpu); + + booke_update_fixed_timer(env, + booke_get_fit_target(env, tb_env), + &booke_timer->fit_next, + booke_timer->fit_timer, + TSR_FIS); + + booke_update_fixed_timer(env, + booke_get_wdt_target(env, tb_env), + &booke_timer->wdt_next, + booke_timer->wdt_timer, + TSR_WIS); +} + +static void ppc_booke_timer_reset_handle(void *opaque) +{ + PowerPCCPU *cpu = opaque; + CPUPPCState *env = &cpu->env; + + store_booke_tcr(env, 0); + store_booke_tsr(env, -1); +} + +/* + * This function will be called whenever the CPU state changes. + * CPU states are defined "typedef enum RunState". + * Regarding timer, When CPU state changes to running after debug halt + * or similar cases which takes time then in between final watchdog + * expiry happenes. This will cause exit to QEMU and configured watchdog + * action will be taken. To avoid this we always clear the watchdog state when + * state changes to running. + */ +static void cpu_state_change_handler(void *opaque, bool running, RunState state) +{ + PowerPCCPU *cpu = opaque; + CPUPPCState *env = &cpu->env; + + if (!running) { + return; + } + + /* + * Clear watchdog interrupt condition by clearing TSR. + */ + store_booke_tsr(env, TSR_ENW | TSR_WIS | TSR_WRS_MASK); +} + +void ppc_booke_timers_init(PowerPCCPU *cpu, uint32_t freq, uint32_t flags) +{ + ppc_tb_t *tb_env; + booke_timer_t *booke_timer; + int ret = 0; + + tb_env = g_malloc0(sizeof(ppc_tb_t)); + booke_timer = g_malloc0(sizeof(booke_timer_t)); + + cpu->env.tb_env = tb_env; + tb_env->flags = flags | PPC_TIMER_BOOKE | PPC_DECR_ZERO_TRIGGERED; + + tb_env->tb_freq = freq; + tb_env->decr_freq = freq; + tb_env->opaque = booke_timer; + tb_env->decr_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, &booke_decr_cb, cpu); + + booke_timer->fit_timer = + timer_new_ns(QEMU_CLOCK_VIRTUAL, &booke_fit_cb, cpu); + booke_timer->wdt_timer = + timer_new_ns(QEMU_CLOCK_VIRTUAL, &booke_wdt_cb, cpu); + + ret = kvmppc_booke_watchdog_enable(cpu); + + if (ret) { + /* TODO: Start the QEMU emulated watchdog if not running on KVM. + * Also start the QEMU emulated watchdog if KVM does not support + * emulated watchdog or somehow it is not enabled (supported but + * not enabled is though some bug and requires debugging :)). + */ + } + + qemu_add_vm_change_state_handler(cpu_state_change_handler, cpu); + + qemu_register_reset(ppc_booke_timer_reset_handle, cpu); +} diff --git a/hw/ppc/ppce500_spin.c b/hw/ppc/ppce500_spin.c new file mode 100644 index 000000000..d57b19979 --- /dev/null +++ b/hw/ppc/ppce500_spin.c @@ -0,0 +1,209 @@ +/* + * QEMU PowerPC e500v2 ePAPR spinning code + * + * Copyright (C) 2011 Freescale Semiconductor, Inc. All rights reserved. + * + * Author: Alexander Graf, <agraf@suse.de> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + * + * This code is not really a device, but models an interface that usually + * firmware takes care of. It's used when QEMU plays the role of firmware. + * + * Specification: + * + * https://www.power.org/resources/downloads/Power_ePAPR_APPROVED_v1.1.pdf + * + */ + +#include "qemu/osdep.h" +#include "qemu/module.h" +#include "qemu/units.h" +#include "hw/hw.h" +#include "hw/sysbus.h" +#include "sysemu/hw_accel.h" +#include "e500.h" +#include "qom/object.h" + +#define MAX_CPUS 32 + +typedef struct spin_info { + uint64_t addr; + uint64_t r3; + uint32_t resv; + uint32_t pir; + uint64_t reserved; +} QEMU_PACKED SpinInfo; + +#define TYPE_E500_SPIN "e500-spin" +OBJECT_DECLARE_SIMPLE_TYPE(SpinState, E500_SPIN) + +struct SpinState { + SysBusDevice parent_obj; + + MemoryRegion iomem; + SpinInfo spin[MAX_CPUS]; +}; + +static void spin_reset(DeviceState *dev) +{ + SpinState *s = E500_SPIN(dev); + int i; + + for (i = 0; i < MAX_CPUS; i++) { + SpinInfo *info = &s->spin[i]; + + stl_p(&info->pir, i); + stq_p(&info->r3, i); + stq_p(&info->addr, 1); + } +} + +static void mmubooke_create_initial_mapping(CPUPPCState *env, + target_ulong va, + hwaddr pa, + hwaddr len) +{ + ppcmas_tlb_t *tlb = booke206_get_tlbm(env, 1, 0, 1); + hwaddr size; + + size = (booke206_page_size_to_tlb(len) << MAS1_TSIZE_SHIFT); + tlb->mas1 = MAS1_VALID | size; + tlb->mas2 = (va & TARGET_PAGE_MASK) | MAS2_M; + tlb->mas7_3 = pa & TARGET_PAGE_MASK; + tlb->mas7_3 |= MAS3_UR | MAS3_UW | MAS3_UX | MAS3_SR | MAS3_SW | MAS3_SX; + env->tlb_dirty = true; +} + +static void spin_kick(CPUState *cs, run_on_cpu_data data) +{ + PowerPCCPU *cpu = POWERPC_CPU(cs); + CPUPPCState *env = &cpu->env; + SpinInfo *curspin = data.host_ptr; + hwaddr map_size = 64 * MiB; + hwaddr map_start; + + cpu_synchronize_state(cs); + stl_p(&curspin->pir, env->spr[SPR_BOOKE_PIR]); + env->nip = ldq_p(&curspin->addr) & (map_size - 1); + env->gpr[3] = ldq_p(&curspin->r3); + env->gpr[4] = 0; + env->gpr[5] = 0; + env->gpr[6] = 0; + env->gpr[7] = map_size; + env->gpr[8] = 0; + env->gpr[9] = 0; + + map_start = ldq_p(&curspin->addr) & ~(map_size - 1); + mmubooke_create_initial_mapping(env, 0, map_start, map_size); + + cs->halted = 0; + cs->exception_index = -1; + cs->stopped = false; + qemu_cpu_kick(cs); +} + +static void spin_write(void *opaque, hwaddr addr, uint64_t value, + unsigned len) +{ + SpinState *s = opaque; + int env_idx = addr / sizeof(SpinInfo); + CPUState *cpu; + SpinInfo *curspin = &s->spin[env_idx]; + uint8_t *curspin_p = (uint8_t*)curspin; + + cpu = qemu_get_cpu(env_idx); + if (cpu == NULL) { + /* Unknown CPU */ + return; + } + + if (cpu->cpu_index == 0) { + /* primary CPU doesn't spin */ + return; + } + + curspin_p = &curspin_p[addr % sizeof(SpinInfo)]; + switch (len) { + case 1: + stb_p(curspin_p, value); + break; + case 2: + stw_p(curspin_p, value); + break; + case 4: + stl_p(curspin_p, value); + break; + } + + if (!(ldq_p(&curspin->addr) & 1)) { + /* run CPU */ + run_on_cpu(cpu, spin_kick, RUN_ON_CPU_HOST_PTR(curspin)); + } +} + +static uint64_t spin_read(void *opaque, hwaddr addr, unsigned len) +{ + SpinState *s = opaque; + uint8_t *spin_p = &((uint8_t*)s->spin)[addr]; + + switch (len) { + case 1: + return ldub_p(spin_p); + case 2: + return lduw_p(spin_p); + case 4: + return ldl_p(spin_p); + default: + hw_error("ppce500: unexpected %s with len = %u", __func__, len); + } +} + +static const MemoryRegionOps spin_rw_ops = { + .read = spin_read, + .write = spin_write, + .endianness = DEVICE_BIG_ENDIAN, +}; + +static void ppce500_spin_initfn(Object *obj) +{ + SysBusDevice *dev = SYS_BUS_DEVICE(obj); + SpinState *s = E500_SPIN(dev); + + memory_region_init_io(&s->iomem, obj, &spin_rw_ops, s, + "e500 spin pv device", sizeof(SpinInfo) * MAX_CPUS); + sysbus_init_mmio(dev, &s->iomem); +} + +static void ppce500_spin_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->reset = spin_reset; +} + +static const TypeInfo ppce500_spin_info = { + .name = TYPE_E500_SPIN, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(SpinState), + .instance_init = ppce500_spin_initfn, + .class_init = ppce500_spin_class_init, +}; + +static void ppce500_spin_register_types(void) +{ + type_register_static(&ppce500_spin_info); +} + +type_init(ppce500_spin_register_types) diff --git a/hw/ppc/prep.c b/hw/ppc/prep.c new file mode 100644 index 000000000..25a2e86b4 --- /dev/null +++ b/hw/ppc/prep.c @@ -0,0 +1,440 @@ +/* + * QEMU PPC PREP hardware System Emulator + * + * Copyright (c) 2003-2007 Jocelyn Mayer + * Copyright (c) 2017 Hervé Poussineau + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "hw/rtc/m48t59.h" +#include "hw/char/serial.h" +#include "hw/block/fdc.h" +#include "net/net.h" +#include "hw/isa/isa.h" +#include "hw/pci/pci.h" +#include "hw/pci/pci_host.h" +#include "hw/ppc/ppc.h" +#include "hw/boards.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qemu/log.h" +#include "hw/loader.h" +#include "hw/rtc/mc146818rtc.h" +#include "hw/isa/pc87312.h" +#include "hw/qdev-properties.h" +#include "sysemu/kvm.h" +#include "sysemu/reset.h" +#include "trace.h" +#include "elf.h" +#include "qemu/units.h" +#include "kvm_ppc.h" + +/* SMP is not enabled, for now */ +#define MAX_CPUS 1 + +#define MAX_IDE_BUS 2 + +#define CFG_ADDR 0xf0000510 + +#define KERNEL_LOAD_ADDR 0x01000000 +#define INITRD_LOAD_ADDR 0x01800000 + +#define NVRAM_SIZE 0x2000 + +static void fw_cfg_boot_set(void *opaque, const char *boot_device, + Error **errp) +{ + fw_cfg_modify_i16(opaque, FW_CFG_BOOT_DEVICE, boot_device[0]); +} + +static void ppc_prep_reset(void *opaque) +{ + PowerPCCPU *cpu = opaque; + + cpu_reset(CPU(cpu)); +} + + +/*****************************************************************************/ +/* NVRAM helpers */ +static inline uint32_t nvram_read(Nvram *nvram, uint32_t addr) +{ + NvramClass *k = NVRAM_GET_CLASS(nvram); + return (k->read)(nvram, addr); +} + +static inline void nvram_write(Nvram *nvram, uint32_t addr, uint32_t val) +{ + NvramClass *k = NVRAM_GET_CLASS(nvram); + (k->write)(nvram, addr, val); +} + +static void NVRAM_set_byte(Nvram *nvram, uint32_t addr, uint8_t value) +{ + nvram_write(nvram, addr, value); +} + +static uint8_t NVRAM_get_byte(Nvram *nvram, uint32_t addr) +{ + return nvram_read(nvram, addr); +} + +static void NVRAM_set_word(Nvram *nvram, uint32_t addr, uint16_t value) +{ + nvram_write(nvram, addr, value >> 8); + nvram_write(nvram, addr + 1, value & 0xFF); +} + +static uint16_t NVRAM_get_word(Nvram *nvram, uint32_t addr) +{ + uint16_t tmp; + + tmp = nvram_read(nvram, addr) << 8; + tmp |= nvram_read(nvram, addr + 1); + + return tmp; +} + +static void NVRAM_set_lword(Nvram *nvram, uint32_t addr, uint32_t value) +{ + nvram_write(nvram, addr, value >> 24); + nvram_write(nvram, addr + 1, (value >> 16) & 0xFF); + nvram_write(nvram, addr + 2, (value >> 8) & 0xFF); + nvram_write(nvram, addr + 3, value & 0xFF); +} + +static void NVRAM_set_string(Nvram *nvram, uint32_t addr, const char *str, + uint32_t max) +{ + int i; + + for (i = 0; i < max && str[i] != '\0'; i++) { + nvram_write(nvram, addr + i, str[i]); + } + nvram_write(nvram, addr + i, str[i]); + nvram_write(nvram, addr + max - 1, '\0'); +} + +static uint16_t NVRAM_crc_update (uint16_t prev, uint16_t value) +{ + uint16_t tmp; + uint16_t pd, pd1, pd2; + + tmp = prev >> 8; + pd = prev ^ value; + pd1 = pd & 0x000F; + pd2 = ((pd >> 4) & 0x000F) ^ pd1; + tmp ^= (pd1 << 3) | (pd1 << 8); + tmp ^= pd2 | (pd2 << 7) | (pd2 << 12); + + return tmp; +} + +static uint16_t NVRAM_compute_crc (Nvram *nvram, uint32_t start, uint32_t count) +{ + uint32_t i; + uint16_t crc = 0xFFFF; + int odd; + + odd = count & 1; + count &= ~1; + for (i = 0; i != count; i++) { + crc = NVRAM_crc_update(crc, NVRAM_get_word(nvram, start + i)); + } + if (odd) { + crc = NVRAM_crc_update(crc, NVRAM_get_byte(nvram, start + i) << 8); + } + + return crc; +} + +#define CMDLINE_ADDR 0x017ff000 + +static int PPC_NVRAM_set_params (Nvram *nvram, uint16_t NVRAM_size, + const char *arch, + uint32_t RAM_size, int boot_device, + uint32_t kernel_image, uint32_t kernel_size, + const char *cmdline, + uint32_t initrd_image, uint32_t initrd_size, + uint32_t NVRAM_image, + int width, int height, int depth) +{ + uint16_t crc; + + /* Set parameters for Open Hack'Ware BIOS */ + NVRAM_set_string(nvram, 0x00, "QEMU_BIOS", 16); + NVRAM_set_lword(nvram, 0x10, 0x00000002); /* structure v2 */ + NVRAM_set_word(nvram, 0x14, NVRAM_size); + NVRAM_set_string(nvram, 0x20, arch, 16); + NVRAM_set_lword(nvram, 0x30, RAM_size); + NVRAM_set_byte(nvram, 0x34, boot_device); + NVRAM_set_lword(nvram, 0x38, kernel_image); + NVRAM_set_lword(nvram, 0x3C, kernel_size); + if (cmdline) { + /* XXX: put the cmdline in NVRAM too ? */ + pstrcpy_targphys("cmdline", CMDLINE_ADDR, RAM_size - CMDLINE_ADDR, + cmdline); + NVRAM_set_lword(nvram, 0x40, CMDLINE_ADDR); + NVRAM_set_lword(nvram, 0x44, strlen(cmdline)); + } else { + NVRAM_set_lword(nvram, 0x40, 0); + NVRAM_set_lword(nvram, 0x44, 0); + } + NVRAM_set_lword(nvram, 0x48, initrd_image); + NVRAM_set_lword(nvram, 0x4C, initrd_size); + NVRAM_set_lword(nvram, 0x50, NVRAM_image); + + NVRAM_set_word(nvram, 0x54, width); + NVRAM_set_word(nvram, 0x56, height); + NVRAM_set_word(nvram, 0x58, depth); + crc = NVRAM_compute_crc(nvram, 0x00, 0xF8); + NVRAM_set_word(nvram, 0xFC, crc); + + return 0; +} + +static int prep_set_cmos_checksum(DeviceState *dev, void *opaque) +{ + uint16_t checksum = *(uint16_t *)opaque; + ISADevice *rtc; + + if (object_dynamic_cast(OBJECT(dev), TYPE_MC146818_RTC)) { + rtc = ISA_DEVICE(dev); + rtc_set_memory(rtc, 0x2e, checksum & 0xff); + rtc_set_memory(rtc, 0x3e, checksum & 0xff); + rtc_set_memory(rtc, 0x2f, checksum >> 8); + rtc_set_memory(rtc, 0x3f, checksum >> 8); + + object_property_add_alias(qdev_get_machine(), "rtc-time", OBJECT(rtc), + "date"); + } + return 0; +} + +static void ibm_40p_init(MachineState *machine) +{ + const char *bios_name = machine->firmware ?: "openbios-ppc"; + CPUPPCState *env = NULL; + uint16_t cmos_checksum; + PowerPCCPU *cpu; + DeviceState *dev, *i82378_dev; + SysBusDevice *pcihost, *s; + Nvram *m48t59 = NULL; + PCIBus *pci_bus; + ISADevice *isa_dev; + ISABus *isa_bus; + void *fw_cfg; + int i; + uint32_t kernel_base = 0, initrd_base = 0; + long kernel_size = 0, initrd_size = 0; + char boot_device; + + /* init CPU */ + cpu = POWERPC_CPU(cpu_create(machine->cpu_type)); + env = &cpu->env; + if (PPC_INPUT(env) != PPC_FLAGS_INPUT_6xx) { + error_report("only 6xx bus is supported on this machine"); + exit(1); + } + + if (env->flags & POWERPC_FLAG_RTC_CLK) { + /* POWER / PowerPC 601 RTC clock frequency is 7.8125 MHz */ + cpu_ppc_tb_init(env, 7812500UL); + } else { + /* Set time-base frequency to 100 Mhz */ + cpu_ppc_tb_init(env, 100UL * 1000UL * 1000UL); + } + qemu_register_reset(ppc_prep_reset, cpu); + + /* PCI host */ + dev = qdev_new("raven-pcihost"); + qdev_prop_set_string(dev, "bios-name", bios_name); + qdev_prop_set_uint32(dev, "elf-machine", PPC_ELF_MACHINE); + pcihost = SYS_BUS_DEVICE(dev); + object_property_add_child(qdev_get_machine(), "raven", OBJECT(dev)); + sysbus_realize_and_unref(pcihost, &error_fatal); + pci_bus = PCI_BUS(qdev_get_child_bus(dev, "pci.0")); + if (!pci_bus) { + error_report("could not create PCI host controller"); + exit(1); + } + + /* PCI -> ISA bridge */ + i82378_dev = DEVICE(pci_create_simple(pci_bus, PCI_DEVFN(11, 0), "i82378")); + qdev_connect_gpio_out(i82378_dev, 0, + cpu->env.irq_inputs[PPC6xx_INPUT_INT]); + sysbus_connect_irq(pcihost, 0, qdev_get_gpio_in(i82378_dev, 15)); + isa_bus = ISA_BUS(qdev_get_child_bus(i82378_dev, "isa.0")); + + /* Memory controller */ + isa_dev = isa_new("rs6000-mc"); + dev = DEVICE(isa_dev); + qdev_prop_set_uint32(dev, "ram-size", machine->ram_size); + isa_realize_and_unref(isa_dev, isa_bus, &error_fatal); + + /* RTC */ + isa_dev = isa_new(TYPE_MC146818_RTC); + dev = DEVICE(isa_dev); + qdev_prop_set_int32(dev, "base_year", 1900); + isa_realize_and_unref(isa_dev, isa_bus, &error_fatal); + + /* initialize CMOS checksums */ + cmos_checksum = 0x6aa9; + qbus_walk_children(BUS(isa_bus), prep_set_cmos_checksum, NULL, NULL, NULL, + &cmos_checksum); + + /* add some more devices */ + if (defaults_enabled()) { + m48t59 = NVRAM(isa_create_simple(isa_bus, "isa-m48t59")); + + isa_dev = isa_new("cs4231a"); + dev = DEVICE(isa_dev); + qdev_prop_set_uint32(dev, "iobase", 0x830); + qdev_prop_set_uint32(dev, "irq", 10); + isa_realize_and_unref(isa_dev, isa_bus, &error_fatal); + + isa_dev = isa_new("pc87312"); + dev = DEVICE(isa_dev); + qdev_prop_set_uint32(dev, "config", 12); + isa_realize_and_unref(isa_dev, isa_bus, &error_fatal); + + isa_dev = isa_new("prep-systemio"); + dev = DEVICE(isa_dev); + qdev_prop_set_uint32(dev, "ibm-planar-id", 0xfc); + qdev_prop_set_uint32(dev, "equipment", 0xc0); + isa_realize_and_unref(isa_dev, isa_bus, &error_fatal); + + dev = DEVICE(pci_create_simple(pci_bus, PCI_DEVFN(1, 0), + "lsi53c810")); + lsi53c8xx_handle_legacy_cmdline(dev); + qdev_connect_gpio_out(dev, 0, qdev_get_gpio_in(i82378_dev, 13)); + + /* XXX: s3-trio at PCI_DEVFN(2, 0) */ + pci_vga_init(pci_bus); + + for (i = 0; i < nb_nics; i++) { + pci_nic_init_nofail(&nd_table[i], pci_bus, "pcnet", + i == 0 ? "3" : NULL); + } + } + + /* Prepare firmware configuration for OpenBIOS */ + dev = qdev_new(TYPE_FW_CFG_MEM); + fw_cfg = FW_CFG(dev); + qdev_prop_set_uint32(dev, "data_width", 1); + qdev_prop_set_bit(dev, "dma_enabled", false); + object_property_add_child(OBJECT(qdev_get_machine()), TYPE_FW_CFG, + OBJECT(fw_cfg)); + s = SYS_BUS_DEVICE(dev); + sysbus_realize_and_unref(s, &error_fatal); + sysbus_mmio_map(s, 0, CFG_ADDR); + sysbus_mmio_map(s, 1, CFG_ADDR + 2); + + if (machine->kernel_filename) { + /* load kernel */ + kernel_base = KERNEL_LOAD_ADDR; + kernel_size = load_image_targphys(machine->kernel_filename, + kernel_base, + machine->ram_size - kernel_base); + if (kernel_size < 0) { + error_report("could not load kernel '%s'", + machine->kernel_filename); + exit(1); + } + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, kernel_base); + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size); + /* load initrd */ + if (machine->initrd_filename) { + initrd_base = INITRD_LOAD_ADDR; + initrd_size = load_image_targphys(machine->initrd_filename, + initrd_base, + machine->ram_size - initrd_base); + if (initrd_size < 0) { + error_report("could not load initial ram disk '%s'", + machine->initrd_filename); + exit(1); + } + fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_base); + fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size); + } + if (machine->kernel_cmdline && *machine->kernel_cmdline) { + fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_CMDLINE, CMDLINE_ADDR); + pstrcpy_targphys("cmdline", CMDLINE_ADDR, TARGET_PAGE_SIZE, + machine->kernel_cmdline); + fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA, + machine->kernel_cmdline); + fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE, + strlen(machine->kernel_cmdline) + 1); + } + boot_device = 'm'; + } else { + boot_device = machine->boot_order[0]; + } + + fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)machine->smp.max_cpus); + fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)machine->ram_size); + fw_cfg_add_i16(fw_cfg, FW_CFG_MACHINE_ID, ARCH_PREP); + + fw_cfg_add_i16(fw_cfg, FW_CFG_PPC_WIDTH, graphic_width); + fw_cfg_add_i16(fw_cfg, FW_CFG_PPC_HEIGHT, graphic_height); + fw_cfg_add_i16(fw_cfg, FW_CFG_PPC_DEPTH, graphic_depth); + + fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_IS_KVM, kvm_enabled()); + if (kvm_enabled()) { + uint8_t *hypercall; + + fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, kvmppc_get_tbfreq()); + hypercall = g_malloc(16); + kvmppc_get_hypercall(env, hypercall, 16); + fw_cfg_add_bytes(fw_cfg, FW_CFG_PPC_KVM_HC, hypercall, 16); + fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_KVM_PID, getpid()); + } else { + fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, NANOSECONDS_PER_SECOND); + } + fw_cfg_add_i16(fw_cfg, FW_CFG_BOOT_DEVICE, boot_device); + qemu_register_boot_set(fw_cfg_boot_set, fw_cfg); + + /* Prepare firmware configuration for Open Hack'Ware */ + if (m48t59) { + PPC_NVRAM_set_params(m48t59, NVRAM_SIZE, "PREP", machine->ram_size, + boot_device, + kernel_base, kernel_size, + machine->kernel_cmdline, + initrd_base, initrd_size, + /* XXX: need an option to load a NVRAM image */ + 0, + graphic_width, graphic_height, graphic_depth); + } +} + +static void ibm_40p_machine_init(MachineClass *mc) +{ + mc->desc = "IBM RS/6000 7020 (40p)", + mc->init = ibm_40p_init; + mc->max_cpus = 1; + mc->default_ram_size = 128 * MiB; + mc->block_default_type = IF_SCSI; + mc->default_boot_order = "c"; + mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("604"); + mc->default_display = "std"; +} + +DEFINE_MACHINE("40p", ibm_40p_machine_init) diff --git a/hw/ppc/prep_systemio.c b/hw/ppc/prep_systemio.c new file mode 100644 index 000000000..b2bd78324 --- /dev/null +++ b/hw/ppc/prep_systemio.c @@ -0,0 +1,315 @@ +/* + * QEMU PReP System I/O emulation + * + * Copyright (c) 2017 Hervé Poussineau + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "hw/irq.h" +#include "hw/isa/isa.h" +#include "hw/qdev-properties.h" +#include "migration/vmstate.h" +#include "exec/address-spaces.h" +#include "qom/object.h" +#include "qemu/error-report.h" /* for error_report() */ +#include "qemu/module.h" +#include "sysemu/runstate.h" +#include "cpu.h" +#include "trace.h" + +#define TYPE_PREP_SYSTEMIO "prep-systemio" +OBJECT_DECLARE_SIMPLE_TYPE(PrepSystemIoState, PREP_SYSTEMIO) + +/* Bit as defined in PowerPC Reference Plaform v1.1, sect. 6.1.5, p. 132 */ +#define PREP_BIT(n) (1 << (7 - (n))) + +struct PrepSystemIoState { + ISADevice parent_obj; + MemoryRegion ppc_parity_mem; + + qemu_irq non_contiguous_io_map_irq; + uint8_t sreset; /* 0x0092 */ + uint8_t equipment; /* 0x080c */ + uint8_t system_control; /* 0x081c */ + uint8_t iomap_type; /* 0x0850 */ + uint8_t ibm_planar_id; /* 0x0852 */ + qemu_irq softreset_irq; + PortioList portio; +}; + +/* PORT 0092 -- Special Port 92 (Read/Write) */ + +enum { + PORT0092_SOFTRESET = PREP_BIT(7), + PORT0092_LE_MODE = PREP_BIT(6), +}; + +static void prep_port0092_write(void *opaque, uint32_t addr, uint32_t val) +{ + PrepSystemIoState *s = opaque; + + trace_prep_systemio_write(addr, val); + + s->sreset = val & PORT0092_SOFTRESET; + qemu_set_irq(s->softreset_irq, s->sreset); + + if ((val & PORT0092_LE_MODE) != 0) { + /* XXX Not supported yet */ + error_report("little-endian mode not supported"); + vm_stop(RUN_STATE_PAUSED); + } else { + /* Nothing to do */ + } +} + +static uint32_t prep_port0092_read(void *opaque, uint32_t addr) +{ + PrepSystemIoState *s = opaque; + trace_prep_systemio_read(addr, s->sreset); + return s->sreset; +} + +/* PORT 0808 -- Hardfile Light Register (Write Only) */ + +enum { + PORT0808_HARDFILE_LIGHT_ON = PREP_BIT(7), +}; + +static void prep_port0808_write(void *opaque, uint32_t addr, uint32_t val) +{ + trace_prep_systemio_write(addr, val); +} + +/* PORT 0810 -- Password Protect 1 Register (Write Only) */ + +/* reset by port 0x4D in the SIO */ +static void prep_port0810_write(void *opaque, uint32_t addr, uint32_t val) +{ + trace_prep_systemio_write(addr, val); +} + +/* PORT 0812 -- Password Protect 2 Register (Write Only) */ + +/* reset by port 0x4D in the SIO */ +static void prep_port0812_write(void *opaque, uint32_t addr, uint32_t val) +{ + trace_prep_systemio_write(addr, val); +} + +/* PORT 0814 -- L2 Invalidate Register (Write Only) */ + +static void prep_port0814_write(void *opaque, uint32_t addr, uint32_t val) +{ + trace_prep_systemio_write(addr, val); +} + +/* PORT 0818 -- Reserved for Keylock (Read Only) */ + +enum { + PORT0818_KEYLOCK_SIGNAL_HIGH = PREP_BIT(7), +}; + +static uint32_t prep_port0818_read(void *opaque, uint32_t addr) +{ + uint32_t val = 0; + trace_prep_systemio_read(addr, val); + return val; +} + +/* PORT 080C -- Equipment */ + +enum { + PORT080C_SCSIFUSE = PREP_BIT(1), + PORT080C_L2_COPYBACK = PREP_BIT(4), + PORT080C_L2_256 = PREP_BIT(5), + PORT080C_UPGRADE_CPU = PREP_BIT(6), + PORT080C_L2 = PREP_BIT(7), +}; + +static uint32_t prep_port080c_read(void *opaque, uint32_t addr) +{ + PrepSystemIoState *s = opaque; + trace_prep_systemio_read(addr, s->equipment); + return s->equipment; +} + +/* PORT 081C -- System Control Register (Read/Write) */ + +enum { + PORT081C_FLOPPY_MOTOR_INHIBIT = PREP_BIT(3), + PORT081C_MASK_TEA = PREP_BIT(2), + PORT081C_L2_UPDATE_INHIBIT = PREP_BIT(1), + PORT081C_L2_CACHEMISS_INHIBIT = PREP_BIT(0), +}; + +static void prep_port081c_write(void *opaque, uint32_t addr, uint32_t val) +{ + static const uint8_t mask = PORT081C_FLOPPY_MOTOR_INHIBIT | + PORT081C_MASK_TEA | + PORT081C_L2_UPDATE_INHIBIT | + PORT081C_L2_CACHEMISS_INHIBIT; + PrepSystemIoState *s = opaque; + trace_prep_systemio_write(addr, val); + s->system_control = val & mask; +} + +static uint32_t prep_port081c_read(void *opaque, uint32_t addr) +{ + PrepSystemIoState *s = opaque; + trace_prep_systemio_read(addr, s->system_control); + return s->system_control; +} + +/* System Board Identification */ + +static uint32_t prep_port0852_read(void *opaque, uint32_t addr) +{ + PrepSystemIoState *s = opaque; + trace_prep_systemio_read(addr, s->ibm_planar_id); + return s->ibm_planar_id; +} + +/* PORT 0850 -- I/O Map Type Register (Read/Write) */ + +enum { + PORT0850_IOMAP_NONCONTIGUOUS = PREP_BIT(7), +}; + +static uint32_t prep_port0850_read(void *opaque, uint32_t addr) +{ + PrepSystemIoState *s = opaque; + trace_prep_systemio_read(addr, s->iomap_type); + return s->iomap_type; +} + +static void prep_port0850_write(void *opaque, uint32_t addr, uint32_t val) +{ + PrepSystemIoState *s = opaque; + + trace_prep_systemio_write(addr, val); + qemu_set_irq(s->non_contiguous_io_map_irq, + val & PORT0850_IOMAP_NONCONTIGUOUS); + s->iomap_type = val & PORT0850_IOMAP_NONCONTIGUOUS; +} + +static const MemoryRegionPortio ppc_io800_port_list[] = { + { 0x092, 1, 1, .read = prep_port0092_read, + .write = prep_port0092_write, }, + { 0x808, 1, 1, .write = prep_port0808_write, }, + { 0x80c, 1, 1, .read = prep_port080c_read, }, + { 0x810, 1, 1, .write = prep_port0810_write, }, + { 0x812, 1, 1, .write = prep_port0812_write, }, + { 0x814, 1, 1, .write = prep_port0814_write, }, + { 0x818, 1, 1, .read = prep_port0818_read }, + { 0x81c, 1, 1, .read = prep_port081c_read, + .write = prep_port081c_write, }, + { 0x850, 1, 1, .read = prep_port0850_read, + .write = prep_port0850_write, }, + { 0x852, 1, 1, .read = prep_port0852_read, }, + PORTIO_END_OF_LIST() +}; + +static uint64_t ppc_parity_error_readl(void *opaque, hwaddr addr, + unsigned int size) +{ + uint32_t val = 0; + trace_prep_systemio_read((unsigned int)addr, val); + return val; +} + +static void ppc_parity_error_writel(void *opaque, hwaddr addr, + uint64_t data, unsigned size) +{ + qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid access\n", __func__); +} + +static const MemoryRegionOps ppc_parity_error_ops = { + .read = ppc_parity_error_readl, + .write = ppc_parity_error_writel, + .valid = { + .min_access_size = 4, + .max_access_size = 4, + }, +}; + +static void prep_systemio_realize(DeviceState *dev, Error **errp) +{ + ISADevice *isa = ISA_DEVICE(dev); + PrepSystemIoState *s = PREP_SYSTEMIO(dev); + PowerPCCPU *cpu; + + qdev_init_gpio_out(dev, &s->non_contiguous_io_map_irq, 1); + s->iomap_type = PORT0850_IOMAP_NONCONTIGUOUS; + qemu_set_irq(s->non_contiguous_io_map_irq, + s->iomap_type & PORT0850_IOMAP_NONCONTIGUOUS); + cpu = POWERPC_CPU(first_cpu); + s->softreset_irq = cpu->env.irq_inputs[PPC6xx_INPUT_HRESET]; + + isa_register_portio_list(isa, &s->portio, 0x0, ppc_io800_port_list, s, + "systemio800"); + + memory_region_init_io(&s->ppc_parity_mem, OBJECT(dev), + &ppc_parity_error_ops, s, "ppc-parity", 0x4); + memory_region_add_subregion(get_system_memory(), 0xbfffeff0, + &s->ppc_parity_mem); +} + +static const VMStateDescription vmstate_prep_systemio = { + .name = "prep_systemio", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT8(sreset, PrepSystemIoState), + VMSTATE_UINT8(system_control, PrepSystemIoState), + VMSTATE_UINT8(iomap_type, PrepSystemIoState), + VMSTATE_END_OF_LIST() + }, +}; + +static Property prep_systemio_properties[] = { + DEFINE_PROP_UINT8("ibm-planar-id", PrepSystemIoState, ibm_planar_id, 0), + DEFINE_PROP_UINT8("equipment", PrepSystemIoState, equipment, 0), + DEFINE_PROP_END_OF_LIST() +}; + +static void prep_systemio_class_initfn(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->realize = prep_systemio_realize; + dc->vmsd = &vmstate_prep_systemio; + device_class_set_props(dc, prep_systemio_properties); +} + +static TypeInfo prep_systemio800_info = { + .name = TYPE_PREP_SYSTEMIO, + .parent = TYPE_ISA_DEVICE, + .instance_size = sizeof(PrepSystemIoState), + .class_init = prep_systemio_class_initfn, +}; + +static void prep_systemio_register_types(void) +{ + type_register_static(&prep_systemio800_info); +} + +type_init(prep_systemio_register_types) diff --git a/hw/ppc/rs6000_mc.c b/hw/ppc/rs6000_mc.c new file mode 100644 index 000000000..c0bc212e9 --- /dev/null +++ b/hw/ppc/rs6000_mc.c @@ -0,0 +1,238 @@ +/* + * QEMU RS/6000 memory controller + * + * Copyright (c) 2017 Hervé Poussineau + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) version 3 or any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "qemu/units.h" +#include "hw/isa/isa.h" +#include "hw/qdev-properties.h" +#include "migration/vmstate.h" +#include "exec/address-spaces.h" +#include "qapi/error.h" +#include "trace.h" +#include "qom/object.h" + +#define TYPE_RS6000MC "rs6000-mc" +OBJECT_DECLARE_SIMPLE_TYPE(RS6000MCState, RS6000MC) + +struct RS6000MCState { + ISADevice parent_obj; + /* see US patent 5,684,979 for details (expired 2001-11-04) */ + uint32_t ram_size; + bool autoconfigure; + MemoryRegion simm[6]; + unsigned int simm_size[6]; + uint32_t end_address[8]; + uint8_t port0820_index; + PortioList portio; +}; + +/* P0RT 0803 -- SIMM ID Register (32/8 MB) (Read Only) */ + +static uint32_t rs6000mc_port0803_read(void *opaque, uint32_t addr) +{ + RS6000MCState *s = opaque; + uint32_t val = 0; + int socket; + + /* (1 << socket) indicates 32 MB SIMM at given socket */ + for (socket = 0; socket < 6; socket++) { + if (s->simm_size[socket] == 32) { + val |= (1 << socket); + } + } + + trace_rs6000mc_id_read(addr, val); + return val; +} + +/* PORT 0804 -- SIMM Presence Register (Read Only) */ + +static uint32_t rs6000mc_port0804_read(void *opaque, uint32_t addr) +{ + RS6000MCState *s = opaque; + uint32_t val = 0xff; + int socket; + + /* (1 << socket) indicates SIMM absence at given socket */ + for (socket = 0; socket < 6; socket++) { + if (s->simm_size[socket]) { + val &= ~(1 << socket); + } + } + s->port0820_index = 0; + + trace_rs6000mc_presence_read(addr, val); + return val; +} + +/* Memory Controller Size Programming Register */ + +static uint32_t rs6000mc_port0820_read(void *opaque, uint32_t addr) +{ + RS6000MCState *s = opaque; + uint32_t val = s->end_address[s->port0820_index] & 0x1f; + s->port0820_index = (s->port0820_index + 1) & 7; + trace_rs6000mc_size_read(addr, val); + return val; +} + +static void rs6000mc_port0820_write(void *opaque, uint32_t addr, uint32_t val) +{ + RS6000MCState *s = opaque; + uint8_t socket = val >> 5; + uint32_t end_address = val & 0x1f; + + trace_rs6000mc_size_write(addr, val); + s->end_address[socket] = end_address; + if (socket > 0 && socket < 7) { + if (s->simm_size[socket - 1]) { + uint32_t size; + uint32_t start_address = 0; + if (socket > 1) { + start_address = s->end_address[socket - 1]; + } + + size = end_address - start_address; + memory_region_set_enabled(&s->simm[socket - 1], size != 0); + memory_region_set_address(&s->simm[socket - 1], + start_address * 8 * MiB); + } + } +} + +/* Read Memory Parity Error */ + +enum { + PORT0841_NO_ERROR_DETECTED = 0x01, +}; + +static uint32_t rs6000mc_port0841_read(void *opaque, uint32_t addr) +{ + uint32_t val = PORT0841_NO_ERROR_DETECTED; + trace_rs6000mc_parity_read(addr, val); + return val; +} + +static const MemoryRegionPortio rs6000mc_port_list[] = { + { 0x803, 1, 1, .read = rs6000mc_port0803_read }, + { 0x804, 1, 1, .read = rs6000mc_port0804_read }, + { 0x820, 1, 1, .read = rs6000mc_port0820_read, + .write = rs6000mc_port0820_write, }, + { 0x841, 1, 1, .read = rs6000mc_port0841_read }, + PORTIO_END_OF_LIST() +}; + +static void rs6000mc_realize(DeviceState *dev, Error **errp) +{ + RS6000MCState *s = RS6000MC(dev); + int socket = 0; + unsigned int ram_size = s->ram_size / MiB; + Error *local_err = NULL; + + while (socket < 6) { + if (ram_size >= 64) { + s->simm_size[socket] = 32; + s->simm_size[socket + 1] = 32; + ram_size -= 64; + } else if (ram_size >= 16) { + s->simm_size[socket] = 8; + s->simm_size[socket + 1] = 8; + ram_size -= 16; + } else { + /* Not enough memory */ + break; + } + socket += 2; + } + + for (socket = 0; socket < 6; socket++) { + if (s->simm_size[socket]) { + char name[] = "simm.?"; + name[5] = socket + '0'; + memory_region_init_ram(&s->simm[socket], OBJECT(dev), name, + s->simm_size[socket] * MiB, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + memory_region_add_subregion_overlap(get_system_memory(), 0, + &s->simm[socket], socket); + } + } + if (ram_size) { + /* unable to push all requested RAM in SIMMs */ + error_setg(errp, "RAM size incompatible with this board. " + "Try again with something else, like %" PRId64 " MB", + s->ram_size / MiB - ram_size); + return; + } + + if (s->autoconfigure) { + uint32_t start_address = 0; + for (socket = 0; socket < 6; socket++) { + if (s->simm_size[socket]) { + memory_region_set_enabled(&s->simm[socket], true); + memory_region_set_address(&s->simm[socket], start_address); + start_address += memory_region_size(&s->simm[socket]); + } + } + } + + isa_register_portio_list(ISA_DEVICE(dev), &s->portio, 0x0, + rs6000mc_port_list, s, "rs6000mc"); +} + +static const VMStateDescription vmstate_rs6000mc = { + .name = "rs6000-mc", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT8(port0820_index, RS6000MCState), + VMSTATE_END_OF_LIST() + }, +}; + +static Property rs6000mc_properties[] = { + DEFINE_PROP_UINT32("ram-size", RS6000MCState, ram_size, 0), + DEFINE_PROP_BOOL("auto-configure", RS6000MCState, autoconfigure, true), + DEFINE_PROP_END_OF_LIST() +}; + +static void rs6000mc_class_initfn(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->realize = rs6000mc_realize; + dc->vmsd = &vmstate_rs6000mc; + device_class_set_props(dc, rs6000mc_properties); +} + +static const TypeInfo rs6000mc_info = { + .name = TYPE_RS6000MC, + .parent = TYPE_ISA_DEVICE, + .instance_size = sizeof(RS6000MCState), + .class_init = rs6000mc_class_initfn, +}; + +static void rs6000mc_types(void) +{ + type_register_static(&rs6000mc_info); +} + +type_init(rs6000mc_types) diff --git a/hw/ppc/sam460ex.c b/hw/ppc/sam460ex.c new file mode 100644 index 000000000..0737234d6 --- /dev/null +++ b/hw/ppc/sam460ex.c @@ -0,0 +1,516 @@ +/* + * QEMU aCube Sam460ex board emulation + * + * Copyright (c) 2012 François Revol + * Copyright (c) 2016-2019 BALATON Zoltan + * + * This file is derived from hw/ppc440_bamboo.c, + * the copyright for that material belongs to the original owners. + * + * This work is licensed under the GNU GPL license version 2 or later. + * + */ + +#include "qemu/osdep.h" +#include "qemu/units.h" +#include "qemu-common.h" +#include "qemu/datadir.h" +#include "qemu/error-report.h" +#include "qapi/error.h" +#include "hw/boards.h" +#include "sysemu/kvm.h" +#include "kvm_ppc.h" +#include "sysemu/device_tree.h" +#include "sysemu/block-backend.h" +#include "hw/loader.h" +#include "elf.h" +#include "exec/memory.h" +#include "ppc440.h" +#include "ppc405.h" +#include "hw/block/flash.h" +#include "sysemu/sysemu.h" +#include "sysemu/reset.h" +#include "hw/sysbus.h" +#include "hw/char/serial.h" +#include "hw/i2c/ppc4xx_i2c.h" +#include "hw/i2c/smbus_eeprom.h" +#include "hw/usb/hcd-ehci.h" +#include "hw/ppc/fdt.h" +#include "hw/qdev-properties.h" +#include "hw/intc/ppc-uic.h" + +#include <libfdt.h> + +#define BINARY_DEVICE_TREE_FILE "canyonlands.dtb" +#define UBOOT_FILENAME "u-boot-sam460-20100605.bin" +/* to extract the official U-Boot bin from the updater: */ +/* dd bs=1 skip=$(($(stat -c '%s' updater/updater-460) - 0x80000)) \ + if=updater/updater-460 of=u-boot-sam460-20100605.bin */ + +/* from Sam460 U-Boot include/configs/Sam460ex.h */ +#define FLASH_BASE 0xfff00000 +#define FLASH_BASE_H 0x4 +#define FLASH_SIZE (1 * MiB) +#define UBOOT_LOAD_BASE 0xfff80000 +#define UBOOT_SIZE 0x00080000 +#define UBOOT_ENTRY 0xfffffffc + +/* from U-Boot */ +#define EPAPR_MAGIC (0x45504150) +#define KERNEL_ADDR 0x1000000 +#define FDT_ADDR 0x1800000 +#define RAMDISK_ADDR 0x1900000 + +/* Sam460ex IRQ MAP: + IRQ0 = ETH_INT + IRQ1 = FPGA_INT + IRQ2 = PCI_INT (PCIA, PCIB, PCIC, PCIB) + IRQ3 = FPGA_INT2 + IRQ11 = RTC_INT + IRQ12 = SM502_INT +*/ + +#define CPU_FREQ 1150000000 +#define PLB_FREQ 230000000 +#define OPB_FREQ 115000000 +#define EBC_FREQ 115000000 +#define UART_FREQ 11059200 +#define SDRAM_NR_BANKS 4 + +/* The SoC could also handle 4 GiB but firmware does not work with that. */ +/* Maybe it overflows a signed 32 bit number somewhere? */ +static const ram_addr_t ppc460ex_sdram_bank_sizes[] = { + 2 * GiB, 1 * GiB, 512 * MiB, 256 * MiB, 128 * MiB, 64 * MiB, + 32 * MiB, 0 +}; + +struct boot_info { + uint32_t dt_base; + uint32_t dt_size; + uint32_t entry; +}; + +static int sam460ex_load_uboot(void) +{ + /* + * This first creates 1MiB of flash memory mapped at the end of + * the 32-bit address space (0xFFF00000..0xFFFFFFFF). + * + * If_PFLASH unit 0 is defined, the flash memory is initialized + * from that block backend. + * + * Else, it's initialized to zero. And then 512KiB of ROM get + * mapped on top of its second half (0xFFF80000..0xFFFFFFFF), + * initialized from u-boot-sam460-20100605.bin. + * + * This doesn't smell right. + * + * The physical hardware appears to have 512KiB flash memory. + * + * TODO Figure out what we really need here, and clean this up. + */ + + DriveInfo *dinfo; + + dinfo = drive_get(IF_PFLASH, 0, 0); + if (!pflash_cfi01_register(FLASH_BASE | ((hwaddr)FLASH_BASE_H << 32), + "sam460ex.flash", FLASH_SIZE, + dinfo ? blk_by_legacy_dinfo(dinfo) : NULL, + 64 * KiB, 1, 0x89, 0x18, 0x0000, 0x0, 1)) { + error_report("Error registering flash memory"); + /* XXX: return an error instead? */ + exit(1); + } + + if (!dinfo) { + /*error_report("No flash image given with the 'pflash' parameter," + " using default u-boot image");*/ + rom_add_file_fixed(UBOOT_FILENAME, + UBOOT_LOAD_BASE | ((hwaddr)FLASH_BASE_H << 32), + -1); + } + + return 0; +} + +static int sam460ex_load_device_tree(hwaddr addr, + uint32_t ramsize, + hwaddr initrd_base, + hwaddr initrd_size, + const char *kernel_cmdline) +{ + uint32_t mem_reg_property[] = { 0, 0, cpu_to_be32(ramsize) }; + char *filename; + int fdt_size; + void *fdt; + uint32_t tb_freq = CPU_FREQ; + uint32_t clock_freq = CPU_FREQ; + int offset; + + filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, BINARY_DEVICE_TREE_FILE); + if (!filename) { + error_report("Couldn't find dtb file `%s'", BINARY_DEVICE_TREE_FILE); + exit(1); + } + fdt = load_device_tree(filename, &fdt_size); + if (!fdt) { + error_report("Couldn't load dtb file `%s'", filename); + g_free(filename); + exit(1); + } + g_free(filename); + + /* Manipulate device tree in memory. */ + + qemu_fdt_setprop(fdt, "/memory", "reg", mem_reg_property, + sizeof(mem_reg_property)); + + /* default FDT doesn't have a /chosen node... */ + qemu_fdt_add_subnode(fdt, "/chosen"); + + qemu_fdt_setprop_cell(fdt, "/chosen", "linux,initrd-start", initrd_base); + + qemu_fdt_setprop_cell(fdt, "/chosen", "linux,initrd-end", + (initrd_base + initrd_size)); + + qemu_fdt_setprop_string(fdt, "/chosen", "bootargs", kernel_cmdline); + + /* Copy data from the host device tree into the guest. Since the guest can + * directly access the timebase without host involvement, we must expose + * the correct frequencies. */ + if (kvm_enabled()) { + tb_freq = kvmppc_get_tbfreq(); + clock_freq = kvmppc_get_clockfreq(); + } + + qemu_fdt_setprop_cell(fdt, "/cpus/cpu@0", "clock-frequency", + clock_freq); + qemu_fdt_setprop_cell(fdt, "/cpus/cpu@0", "timebase-frequency", + tb_freq); + + /* Remove cpm node if it exists (it is not emulated) */ + offset = fdt_path_offset(fdt, "/cpm"); + if (offset >= 0) { + _FDT(fdt_nop_node(fdt, offset)); + } + + /* set serial port clocks */ + offset = fdt_node_offset_by_compatible(fdt, -1, "ns16550"); + while (offset >= 0) { + _FDT(fdt_setprop_cell(fdt, offset, "clock-frequency", UART_FREQ)); + offset = fdt_node_offset_by_compatible(fdt, offset, "ns16550"); + } + + /* some more clocks */ + qemu_fdt_setprop_cell(fdt, "/plb", "clock-frequency", + PLB_FREQ); + qemu_fdt_setprop_cell(fdt, "/plb/opb", "clock-frequency", + OPB_FREQ); + qemu_fdt_setprop_cell(fdt, "/plb/opb/ebc", "clock-frequency", + EBC_FREQ); + + rom_add_blob_fixed(BINARY_DEVICE_TREE_FILE, fdt, fdt_size, addr); + g_free(fdt); + + return fdt_size; +} + +/* Create reset TLB entries for BookE, mapping only the flash memory. */ +static void mmubooke_create_initial_mapping_uboot(CPUPPCState *env) +{ + ppcemb_tlb_t *tlb = &env->tlb.tlbe[0]; + + /* on reset the flash is mapped by a shadow TLB, + * but since we don't implement them we need to use + * the same values U-Boot will use to avoid a fault. + */ + tlb->attr = 0; + tlb->prot = PAGE_VALID | ((PAGE_READ | PAGE_WRITE | PAGE_EXEC) << 4); + tlb->size = 0x10000000; /* up to 0xffffffff */ + tlb->EPN = 0xf0000000 & TARGET_PAGE_MASK; + tlb->RPN = (0xf0000000 & TARGET_PAGE_MASK) | 0x4; + tlb->PID = 0; +} + +/* Create reset TLB entries for BookE, spanning the 32bit addr space. */ +static void mmubooke_create_initial_mapping(CPUPPCState *env, + target_ulong va, + hwaddr pa) +{ + ppcemb_tlb_t *tlb = &env->tlb.tlbe[0]; + + tlb->attr = 0; + tlb->prot = PAGE_VALID | ((PAGE_READ | PAGE_WRITE | PAGE_EXEC) << 4); + tlb->size = 1 << 31; /* up to 0x80000000 */ + tlb->EPN = va & TARGET_PAGE_MASK; + tlb->RPN = pa & TARGET_PAGE_MASK; + tlb->PID = 0; +} + +static void main_cpu_reset(void *opaque) +{ + PowerPCCPU *cpu = opaque; + CPUPPCState *env = &cpu->env; + struct boot_info *bi = env->load_info; + + cpu_reset(CPU(cpu)); + + /* either we have a kernel to boot or we jump to U-Boot */ + if (bi->entry != UBOOT_ENTRY) { + env->gpr[1] = (16 * MiB) - 8; + env->gpr[3] = FDT_ADDR; + env->nip = bi->entry; + + /* Create a mapping for the kernel. */ + mmubooke_create_initial_mapping(env, 0, 0); + env->gpr[6] = tswap32(EPAPR_MAGIC); + env->gpr[7] = (16 * MiB) - 8; /* bi->ima_size; */ + + } else { + env->nip = UBOOT_ENTRY; + mmubooke_create_initial_mapping_uboot(env); + } +} + +static void sam460ex_init(MachineState *machine) +{ + MemoryRegion *address_space_mem = get_system_memory(); + MemoryRegion *isa = g_new(MemoryRegion, 1); + MemoryRegion *ram_memories = g_new(MemoryRegion, SDRAM_NR_BANKS); + hwaddr ram_bases[SDRAM_NR_BANKS] = {0}; + hwaddr ram_sizes[SDRAM_NR_BANKS] = {0}; + MemoryRegion *l2cache_ram = g_new(MemoryRegion, 1); + DeviceState *uic[4]; + qemu_irq mal_irqs[4]; + int i; + PCIBus *pci_bus; + PowerPCCPU *cpu; + CPUPPCState *env; + I2CBus *i2c; + hwaddr entry = UBOOT_ENTRY; + target_long initrd_size = 0; + DeviceState *dev; + SysBusDevice *sbdev; + struct boot_info *boot_info; + uint8_t *spd_data; + int success; + + cpu = POWERPC_CPU(cpu_create(machine->cpu_type)); + env = &cpu->env; + if (env->mmu_model != POWERPC_MMU_BOOKE) { + error_report("Only MMU model BookE is supported by this machine."); + exit(1); + } + + qemu_register_reset(main_cpu_reset, cpu); + boot_info = g_malloc0(sizeof(*boot_info)); + env->load_info = boot_info; + + ppc_booke_timers_init(cpu, CPU_FREQ, 0); + ppc_dcr_init(env, NULL, NULL); + + /* PLB arbitrer */ + ppc4xx_plb_init(env); + + /* interrupt controllers */ + for (i = 0; i < ARRAY_SIZE(uic); i++) { + SysBusDevice *sbd; + /* + * UICs 1, 2 and 3 are cascaded through UIC 0. + * input_ints[n] is the interrupt number on UIC 0 which + * the INT output of UIC n is connected to. The CINT output + * of UIC n connects to input_ints[n] + 1. + * The entry in input_ints[] for UIC 0 is ignored, because UIC 0's + * INT and CINT outputs are connected to the CPU. + */ + const int input_ints[] = { -1, 30, 10, 16 }; + + uic[i] = qdev_new(TYPE_PPC_UIC); + sbd = SYS_BUS_DEVICE(uic[i]); + + qdev_prop_set_uint32(uic[i], "dcr-base", 0xc0 + i * 0x10); + object_property_set_link(OBJECT(uic[i]), "cpu", OBJECT(cpu), + &error_fatal); + sysbus_realize_and_unref(sbd, &error_fatal); + + if (i == 0) { + sysbus_connect_irq(sbd, PPCUIC_OUTPUT_INT, + ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_INT]); + sysbus_connect_irq(sbd, PPCUIC_OUTPUT_CINT, + ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_CINT]); + } else { + sysbus_connect_irq(sbd, PPCUIC_OUTPUT_INT, + qdev_get_gpio_in(uic[0], input_ints[i])); + sysbus_connect_irq(sbd, PPCUIC_OUTPUT_CINT, + qdev_get_gpio_in(uic[0], input_ints[i] + 1)); + } + } + + /* SDRAM controller */ + /* put all RAM on first bank because board has one slot + * and firmware only checks that */ + ppc4xx_sdram_banks(machine->ram, 1, ram_memories, ram_bases, ram_sizes, + ppc460ex_sdram_bank_sizes); + + /* FIXME: does 460EX have ECC interrupts? */ + ppc440_sdram_init(env, SDRAM_NR_BANKS, ram_memories, + ram_bases, ram_sizes, 1); + + /* IIC controllers and devices */ + dev = sysbus_create_simple(TYPE_PPC4xx_I2C, 0x4ef600700, + qdev_get_gpio_in(uic[0], 2)); + i2c = PPC4xx_I2C(dev)->bus; + /* SPD EEPROM on RAM module */ + spd_data = spd_data_generate(ram_sizes[0] < 128 * MiB ? DDR : DDR2, + ram_sizes[0]); + spd_data[20] = 4; /* SO-DIMM module */ + smbus_eeprom_init_one(i2c, 0x50, spd_data); + /* RTC */ + i2c_slave_create_simple(i2c, "m41t80", 0x68); + + dev = sysbus_create_simple(TYPE_PPC4xx_I2C, 0x4ef600800, + qdev_get_gpio_in(uic[0], 3)); + + /* External bus controller */ + ppc405_ebc_init(env); + + /* CPR */ + ppc4xx_cpr_init(env); + + /* PLB to AHB bridge */ + ppc4xx_ahb_init(env); + + /* System DCRs */ + ppc4xx_sdr_init(env); + + /* MAL */ + for (i = 0; i < ARRAY_SIZE(mal_irqs); i++) { + mal_irqs[0] = qdev_get_gpio_in(uic[2], 3 + i); + } + ppc4xx_mal_init(env, 4, 16, mal_irqs); + + /* DMA */ + ppc4xx_dma_init(env, 0x200); + + /* 256K of L2 cache as memory */ + ppc4xx_l2sram_init(env); + /* FIXME: remove this after fixing l2sram mapping in ppc440_uc.c? */ + memory_region_init_ram(l2cache_ram, NULL, "ppc440.l2cache_ram", 256 * KiB, + &error_abort); + memory_region_add_subregion(address_space_mem, 0x400000000LL, l2cache_ram); + + /* USB */ + sysbus_create_simple(TYPE_PPC4xx_EHCI, 0x4bffd0400, + qdev_get_gpio_in(uic[2], 29)); + dev = qdev_new("sysbus-ohci"); + qdev_prop_set_string(dev, "masterbus", "usb-bus.0"); + qdev_prop_set_uint32(dev, "num-ports", 6); + sbdev = SYS_BUS_DEVICE(dev); + sysbus_realize_and_unref(sbdev, &error_fatal); + sysbus_mmio_map(sbdev, 0, 0x4bffd0000); + sysbus_connect_irq(sbdev, 0, qdev_get_gpio_in(uic[2], 30)); + usb_create_simple(usb_bus_find(-1), "usb-kbd"); + usb_create_simple(usb_bus_find(-1), "usb-mouse"); + + /* PCI bus */ + ppc460ex_pcie_init(env); + /* All PCI irqs are connected to the same UIC pin (cf. UBoot source) */ + dev = sysbus_create_simple("ppc440-pcix-host", 0xc0ec00000, + qdev_get_gpio_in(uic[1], 0)); + pci_bus = PCI_BUS(qdev_get_child_bus(dev, "pci.0")); + + memory_region_init_alias(isa, NULL, "isa_mmio", get_system_io(), + 0, 0x10000); + memory_region_add_subregion(get_system_memory(), 0xc08000000, isa); + + /* PCI devices */ + pci_create_simple(pci_bus, PCI_DEVFN(6, 0), "sm501"); + /* SoC has a single SATA port but we don't emulate that yet + * However, firmware and usual clients have driver for SiI311x + * so add one for convenience by default */ + if (defaults_enabled()) { + pci_create_simple(pci_bus, -1, "sii3112"); + } + + /* SoC has 4 UARTs + * but board has only one wired and two are present in fdt */ + if (serial_hd(0) != NULL) { + serial_mm_init(address_space_mem, 0x4ef600300, 0, + qdev_get_gpio_in(uic[1], 1), + PPC_SERIAL_MM_BAUDBASE, serial_hd(0), + DEVICE_BIG_ENDIAN); + } + if (serial_hd(1) != NULL) { + serial_mm_init(address_space_mem, 0x4ef600400, 0, + qdev_get_gpio_in(uic[0], 1), + PPC_SERIAL_MM_BAUDBASE, serial_hd(1), + DEVICE_BIG_ENDIAN); + } + + /* Load U-Boot image. */ + if (!machine->kernel_filename) { + success = sam460ex_load_uboot(); + if (success < 0) { + error_report("could not load firmware"); + exit(1); + } + } + + /* Load kernel. */ + if (machine->kernel_filename) { + hwaddr loadaddr = LOAD_UIMAGE_LOADADDR_INVALID; + success = load_uimage(machine->kernel_filename, &entry, &loadaddr, + NULL, NULL, NULL); + if (success < 0) { + uint64_t elf_entry; + + success = load_elf(machine->kernel_filename, NULL, NULL, NULL, + &elf_entry, NULL, NULL, NULL, + 1, PPC_ELF_MACHINE, 0, 0); + entry = elf_entry; + } + /* XXX try again as binary */ + if (success < 0) { + error_report("could not load kernel '%s'", + machine->kernel_filename); + exit(1); + } + } + + /* Load initrd. */ + if (machine->initrd_filename) { + initrd_size = load_image_targphys(machine->initrd_filename, + RAMDISK_ADDR, + machine->ram_size - RAMDISK_ADDR); + if (initrd_size < 0) { + error_report("could not load ram disk '%s' at %x", + machine->initrd_filename, RAMDISK_ADDR); + exit(1); + } + } + + /* If we're loading a kernel directly, we must load the device tree too. */ + if (machine->kernel_filename) { + int dt_size; + + dt_size = sam460ex_load_device_tree(FDT_ADDR, machine->ram_size, + RAMDISK_ADDR, initrd_size, + machine->kernel_cmdline); + + boot_info->dt_base = FDT_ADDR; + boot_info->dt_size = dt_size; + } + + boot_info->entry = entry; +} + +static void sam460ex_machine_init(MachineClass *mc) +{ + mc->desc = "aCube Sam460ex"; + mc->init = sam460ex_init; + mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("460exb"); + mc->default_ram_size = 512 * MiB; + mc->default_ram_id = "ppc4xx.sdram"; +} + +DEFINE_MACHINE("sam460ex", sam460ex_machine_init) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c new file mode 100644 index 000000000..3b5fd749b --- /dev/null +++ b/hw/ppc/spapr.c @@ -0,0 +1,5136 @@ +/* + * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator + * + * Copyright (c) 2004-2007 Fabrice Bellard + * Copyright (c) 2007 Jocelyn Mayer + * Copyright (c) 2010 David Gibson, IBM Corporation. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "qemu/datadir.h" +#include "qapi/error.h" +#include "qapi/qapi-events-machine.h" +#include "qapi/qapi-events-qdev.h" +#include "qapi/visitor.h" +#include "sysemu/sysemu.h" +#include "sysemu/hostmem.h" +#include "sysemu/numa.h" +#include "sysemu/qtest.h" +#include "sysemu/reset.h" +#include "sysemu/runstate.h" +#include "qemu/log.h" +#include "hw/fw-path-provider.h" +#include "elf.h" +#include "net/net.h" +#include "sysemu/device_tree.h" +#include "sysemu/cpus.h" +#include "sysemu/hw_accel.h" +#include "kvm_ppc.h" +#include "migration/misc.h" +#include "migration/qemu-file-types.h" +#include "migration/global_state.h" +#include "migration/register.h" +#include "migration/blocker.h" +#include "mmu-hash64.h" +#include "mmu-book3s-v3.h" +#include "cpu-models.h" +#include "hw/core/cpu.h" + +#include "hw/ppc/ppc.h" +#include "hw/loader.h" + +#include "hw/ppc/fdt.h" +#include "hw/ppc/spapr.h" +#include "hw/ppc/spapr_vio.h" +#include "hw/qdev-properties.h" +#include "hw/pci-host/spapr.h" +#include "hw/pci/msi.h" + +#include "hw/pci/pci.h" +#include "hw/scsi/scsi.h" +#include "hw/virtio/virtio-scsi.h" +#include "hw/virtio/vhost-scsi-common.h" + +#include "exec/ram_addr.h" +#include "hw/usb.h" +#include "qemu/config-file.h" +#include "qemu/error-report.h" +#include "trace.h" +#include "hw/nmi.h" +#include "hw/intc/intc.h" + +#include "hw/ppc/spapr_cpu_core.h" +#include "hw/mem/memory-device.h" +#include "hw/ppc/spapr_tpm_proxy.h" +#include "hw/ppc/spapr_nvdimm.h" +#include "hw/ppc/spapr_numa.h" +#include "hw/ppc/pef.h" + +#include "monitor/monitor.h" + +#include <libfdt.h> + +/* SLOF memory layout: + * + * SLOF raw image loaded at 0, copies its romfs right below the flat + * device-tree, then position SLOF itself 31M below that + * + * So we set FW_OVERHEAD to 40MB which should account for all of that + * and more + * + * We load our kernel at 4M, leaving space for SLOF initial image + */ +#define FDT_MAX_ADDR 0x80000000 /* FDT must stay below that */ +#define FW_MAX_SIZE 0x400000 +#define FW_FILE_NAME "slof.bin" +#define FW_FILE_NAME_VOF "vof.bin" +#define FW_OVERHEAD 0x2800000 +#define KERNEL_LOAD_ADDR FW_MAX_SIZE + +#define MIN_RMA_SLOF (128 * MiB) + +#define PHANDLE_INTC 0x00001111 + +/* These two functions implement the VCPU id numbering: one to compute them + * all and one to identify thread 0 of a VCORE. Any change to the first one + * is likely to have an impact on the second one, so let's keep them close. + */ +static int spapr_vcpu_id(SpaprMachineState *spapr, int cpu_index) +{ + MachineState *ms = MACHINE(spapr); + unsigned int smp_threads = ms->smp.threads; + + assert(spapr->vsmt); + return + (cpu_index / smp_threads) * spapr->vsmt + cpu_index % smp_threads; +} +static bool spapr_is_thread0_in_vcore(SpaprMachineState *spapr, + PowerPCCPU *cpu) +{ + assert(spapr->vsmt); + return spapr_get_vcpu_id(cpu) % spapr->vsmt == 0; +} + +static bool pre_2_10_vmstate_dummy_icp_needed(void *opaque) +{ + /* Dummy entries correspond to unused ICPState objects in older QEMUs, + * and newer QEMUs don't even have them. In both cases, we don't want + * to send anything on the wire. + */ + return false; +} + +static const VMStateDescription pre_2_10_vmstate_dummy_icp = { + .name = "icp/server", + .version_id = 1, + .minimum_version_id = 1, + .needed = pre_2_10_vmstate_dummy_icp_needed, + .fields = (VMStateField[]) { + VMSTATE_UNUSED(4), /* uint32_t xirr */ + VMSTATE_UNUSED(1), /* uint8_t pending_priority */ + VMSTATE_UNUSED(1), /* uint8_t mfrr */ + VMSTATE_END_OF_LIST() + }, +}; + +static void pre_2_10_vmstate_register_dummy_icp(int i) +{ + vmstate_register(NULL, i, &pre_2_10_vmstate_dummy_icp, + (void *)(uintptr_t) i); +} + +static void pre_2_10_vmstate_unregister_dummy_icp(int i) +{ + vmstate_unregister(NULL, &pre_2_10_vmstate_dummy_icp, + (void *)(uintptr_t) i); +} + +int spapr_max_server_number(SpaprMachineState *spapr) +{ + MachineState *ms = MACHINE(spapr); + + assert(spapr->vsmt); + return DIV_ROUND_UP(ms->smp.max_cpus * spapr->vsmt, ms->smp.threads); +} + +static int spapr_fixup_cpu_smt_dt(void *fdt, int offset, PowerPCCPU *cpu, + int smt_threads) +{ + int i, ret = 0; + uint32_t servers_prop[smt_threads]; + uint32_t gservers_prop[smt_threads * 2]; + int index = spapr_get_vcpu_id(cpu); + + if (cpu->compat_pvr) { + ret = fdt_setprop_cell(fdt, offset, "cpu-version", cpu->compat_pvr); + if (ret < 0) { + return ret; + } + } + + /* Build interrupt servers and gservers properties */ + for (i = 0; i < smt_threads; i++) { + servers_prop[i] = cpu_to_be32(index + i); + /* Hack, direct the group queues back to cpu 0 */ + gservers_prop[i*2] = cpu_to_be32(index + i); + gservers_prop[i*2 + 1] = 0; + } + ret = fdt_setprop(fdt, offset, "ibm,ppc-interrupt-server#s", + servers_prop, sizeof(servers_prop)); + if (ret < 0) { + return ret; + } + ret = fdt_setprop(fdt, offset, "ibm,ppc-interrupt-gserver#s", + gservers_prop, sizeof(gservers_prop)); + + return ret; +} + +static void spapr_dt_pa_features(SpaprMachineState *spapr, + PowerPCCPU *cpu, + void *fdt, int offset) +{ + uint8_t pa_features_206[] = { 6, 0, + 0xf6, 0x1f, 0xc7, 0x00, 0x80, 0xc0 }; + uint8_t pa_features_207[] = { 24, 0, + 0xf6, 0x1f, 0xc7, 0xc0, 0x80, 0xf0, + 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, + 0x80, 0x00, 0x80, 0x00, 0x00, 0x00 }; + uint8_t pa_features_300[] = { 66, 0, + /* 0: MMU|FPU|SLB|RUN|DABR|NX, 1: fri[nzpm]|DABRX|SPRG3|SLB0|PP110 */ + /* 2: VPM|DS205|PPR|DS202|DS206, 3: LSD|URG, SSO, 5: LE|CFAR|EB|LSQ */ + 0xf6, 0x1f, 0xc7, 0xc0, 0x80, 0xf0, /* 0 - 5 */ + /* 6: DS207 */ + 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, /* 6 - 11 */ + /* 16: Vector */ + 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, /* 12 - 17 */ + /* 18: Vec. Scalar, 20: Vec. XOR, 22: HTM */ + 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, /* 18 - 23 */ + /* 24: Ext. Dec, 26: 64 bit ftrs, 28: PM ftrs */ + 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 24 - 29 */ + /* 30: MMR, 32: LE atomic, 34: EBB + ext EBB */ + 0x80, 0x00, 0x80, 0x00, 0xC0, 0x00, /* 30 - 35 */ + /* 36: SPR SO, 38: Copy/Paste, 40: Radix MMU */ + 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 36 - 41 */ + /* 42: PM, 44: PC RA, 46: SC vec'd */ + 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 42 - 47 */ + /* 48: SIMD, 50: QP BFP, 52: String */ + 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 48 - 53 */ + /* 54: DecFP, 56: DecI, 58: SHA */ + 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 54 - 59 */ + /* 60: NM atomic, 62: RNG */ + 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, /* 60 - 65 */ + }; + uint8_t *pa_features = NULL; + size_t pa_size; + + if (ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_2_06, 0, cpu->compat_pvr)) { + pa_features = pa_features_206; + pa_size = sizeof(pa_features_206); + } + if (ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_2_07, 0, cpu->compat_pvr)) { + pa_features = pa_features_207; + pa_size = sizeof(pa_features_207); + } + if (ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0, cpu->compat_pvr)) { + pa_features = pa_features_300; + pa_size = sizeof(pa_features_300); + } + if (!pa_features) { + return; + } + + if (ppc_hash64_has(cpu, PPC_HASH64_CI_LARGEPAGE)) { + /* + * Note: we keep CI large pages off by default because a 64K capable + * guest provisioned with large pages might otherwise try to map a qemu + * framebuffer (or other kind of memory mapped PCI BAR) using 64K pages + * even if that qemu runs on a 4k host. + * We dd this bit back here if we are confident this is not an issue + */ + pa_features[3] |= 0x20; + } + if ((spapr_get_cap(spapr, SPAPR_CAP_HTM) != 0) && pa_size > 24) { + pa_features[24] |= 0x80; /* Transactional memory support */ + } + if (spapr->cas_pre_isa3_guest && pa_size > 40) { + /* Workaround for broken kernels that attempt (guest) radix + * mode when they can't handle it, if they see the radix bit set + * in pa-features. So hide it from them. */ + pa_features[40 + 2] &= ~0x80; /* Radix MMU */ + } + + _FDT((fdt_setprop(fdt, offset, "ibm,pa-features", pa_features, pa_size))); +} + +static hwaddr spapr_node0_size(MachineState *machine) +{ + if (machine->numa_state->num_nodes) { + int i; + for (i = 0; i < machine->numa_state->num_nodes; ++i) { + if (machine->numa_state->nodes[i].node_mem) { + return MIN(pow2floor(machine->numa_state->nodes[i].node_mem), + machine->ram_size); + } + } + } + return machine->ram_size; +} + +static void add_str(GString *s, const gchar *s1) +{ + g_string_append_len(s, s1, strlen(s1) + 1); +} + +static int spapr_dt_memory_node(SpaprMachineState *spapr, void *fdt, int nodeid, + hwaddr start, hwaddr size) +{ + char mem_name[32]; + uint64_t mem_reg_property[2]; + int off; + + mem_reg_property[0] = cpu_to_be64(start); + mem_reg_property[1] = cpu_to_be64(size); + + sprintf(mem_name, "memory@%" HWADDR_PRIx, start); + off = fdt_add_subnode(fdt, 0, mem_name); + _FDT(off); + _FDT((fdt_setprop_string(fdt, off, "device_type", "memory"))); + _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property, + sizeof(mem_reg_property)))); + spapr_numa_write_associativity_dt(spapr, fdt, off, nodeid); + return off; +} + +static uint32_t spapr_pc_dimm_node(MemoryDeviceInfoList *list, ram_addr_t addr) +{ + MemoryDeviceInfoList *info; + + for (info = list; info; info = info->next) { + MemoryDeviceInfo *value = info->value; + + if (value && value->type == MEMORY_DEVICE_INFO_KIND_DIMM) { + PCDIMMDeviceInfo *pcdimm_info = value->u.dimm.data; + + if (addr >= pcdimm_info->addr && + addr < (pcdimm_info->addr + pcdimm_info->size)) { + return pcdimm_info->node; + } + } + } + + return -1; +} + +struct sPAPRDrconfCellV2 { + uint32_t seq_lmbs; + uint64_t base_addr; + uint32_t drc_index; + uint32_t aa_index; + uint32_t flags; +} QEMU_PACKED; + +typedef struct DrconfCellQueue { + struct sPAPRDrconfCellV2 cell; + QSIMPLEQ_ENTRY(DrconfCellQueue) entry; +} DrconfCellQueue; + +static DrconfCellQueue * +spapr_get_drconf_cell(uint32_t seq_lmbs, uint64_t base_addr, + uint32_t drc_index, uint32_t aa_index, + uint32_t flags) +{ + DrconfCellQueue *elem; + + elem = g_malloc0(sizeof(*elem)); + elem->cell.seq_lmbs = cpu_to_be32(seq_lmbs); + elem->cell.base_addr = cpu_to_be64(base_addr); + elem->cell.drc_index = cpu_to_be32(drc_index); + elem->cell.aa_index = cpu_to_be32(aa_index); + elem->cell.flags = cpu_to_be32(flags); + + return elem; +} + +static int spapr_dt_dynamic_memory_v2(SpaprMachineState *spapr, void *fdt, + int offset, MemoryDeviceInfoList *dimms) +{ + MachineState *machine = MACHINE(spapr); + uint8_t *int_buf, *cur_index; + int ret; + uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE; + uint64_t addr, cur_addr, size; + uint32_t nr_boot_lmbs = (machine->device_memory->base / lmb_size); + uint64_t mem_end = machine->device_memory->base + + memory_region_size(&machine->device_memory->mr); + uint32_t node, buf_len, nr_entries = 0; + SpaprDrc *drc; + DrconfCellQueue *elem, *next; + MemoryDeviceInfoList *info; + QSIMPLEQ_HEAD(, DrconfCellQueue) drconf_queue + = QSIMPLEQ_HEAD_INITIALIZER(drconf_queue); + + /* Entry to cover RAM and the gap area */ + elem = spapr_get_drconf_cell(nr_boot_lmbs, 0, 0, -1, + SPAPR_LMB_FLAGS_RESERVED | + SPAPR_LMB_FLAGS_DRC_INVALID); + QSIMPLEQ_INSERT_TAIL(&drconf_queue, elem, entry); + nr_entries++; + + cur_addr = machine->device_memory->base; + for (info = dimms; info; info = info->next) { + PCDIMMDeviceInfo *di = info->value->u.dimm.data; + + addr = di->addr; + size = di->size; + node = di->node; + + /* + * The NVDIMM area is hotpluggable after the NVDIMM is unplugged. The + * area is marked hotpluggable in the next iteration for the bigger + * chunk including the NVDIMM occupied area. + */ + if (info->value->type == MEMORY_DEVICE_INFO_KIND_NVDIMM) + continue; + + /* Entry for hot-pluggable area */ + if (cur_addr < addr) { + drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB, cur_addr / lmb_size); + g_assert(drc); + elem = spapr_get_drconf_cell((addr - cur_addr) / lmb_size, + cur_addr, spapr_drc_index(drc), -1, 0); + QSIMPLEQ_INSERT_TAIL(&drconf_queue, elem, entry); + nr_entries++; + } + + /* Entry for DIMM */ + drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB, addr / lmb_size); + g_assert(drc); + elem = spapr_get_drconf_cell(size / lmb_size, addr, + spapr_drc_index(drc), node, + (SPAPR_LMB_FLAGS_ASSIGNED | + SPAPR_LMB_FLAGS_HOTREMOVABLE)); + QSIMPLEQ_INSERT_TAIL(&drconf_queue, elem, entry); + nr_entries++; + cur_addr = addr + size; + } + + /* Entry for remaining hotpluggable area */ + if (cur_addr < mem_end) { + drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB, cur_addr / lmb_size); + g_assert(drc); + elem = spapr_get_drconf_cell((mem_end - cur_addr) / lmb_size, + cur_addr, spapr_drc_index(drc), -1, 0); + QSIMPLEQ_INSERT_TAIL(&drconf_queue, elem, entry); + nr_entries++; + } + + buf_len = nr_entries * sizeof(struct sPAPRDrconfCellV2) + sizeof(uint32_t); + int_buf = cur_index = g_malloc0(buf_len); + *(uint32_t *)int_buf = cpu_to_be32(nr_entries); + cur_index += sizeof(nr_entries); + + QSIMPLEQ_FOREACH_SAFE(elem, &drconf_queue, entry, next) { + memcpy(cur_index, &elem->cell, sizeof(elem->cell)); + cur_index += sizeof(elem->cell); + QSIMPLEQ_REMOVE(&drconf_queue, elem, DrconfCellQueue, entry); + g_free(elem); + } + + ret = fdt_setprop(fdt, offset, "ibm,dynamic-memory-v2", int_buf, buf_len); + g_free(int_buf); + if (ret < 0) { + return -1; + } + return 0; +} + +static int spapr_dt_dynamic_memory(SpaprMachineState *spapr, void *fdt, + int offset, MemoryDeviceInfoList *dimms) +{ + MachineState *machine = MACHINE(spapr); + int i, ret; + uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE; + uint32_t device_lmb_start = machine->device_memory->base / lmb_size; + uint32_t nr_lmbs = (machine->device_memory->base + + memory_region_size(&machine->device_memory->mr)) / + lmb_size; + uint32_t *int_buf, *cur_index, buf_len; + + /* + * Allocate enough buffer size to fit in ibm,dynamic-memory + */ + buf_len = (nr_lmbs * SPAPR_DR_LMB_LIST_ENTRY_SIZE + 1) * sizeof(uint32_t); + cur_index = int_buf = g_malloc0(buf_len); + int_buf[0] = cpu_to_be32(nr_lmbs); + cur_index++; + for (i = 0; i < nr_lmbs; i++) { + uint64_t addr = i * lmb_size; + uint32_t *dynamic_memory = cur_index; + + if (i >= device_lmb_start) { + SpaprDrc *drc; + + drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB, i); + g_assert(drc); + + dynamic_memory[0] = cpu_to_be32(addr >> 32); + dynamic_memory[1] = cpu_to_be32(addr & 0xffffffff); + dynamic_memory[2] = cpu_to_be32(spapr_drc_index(drc)); + dynamic_memory[3] = cpu_to_be32(0); /* reserved */ + dynamic_memory[4] = cpu_to_be32(spapr_pc_dimm_node(dimms, addr)); + if (memory_region_present(get_system_memory(), addr)) { + dynamic_memory[5] = cpu_to_be32(SPAPR_LMB_FLAGS_ASSIGNED); + } else { + dynamic_memory[5] = cpu_to_be32(0); + } + } else { + /* + * LMB information for RMA, boot time RAM and gap b/n RAM and + * device memory region -- all these are marked as reserved + * and as having no valid DRC. + */ + dynamic_memory[0] = cpu_to_be32(addr >> 32); + dynamic_memory[1] = cpu_to_be32(addr & 0xffffffff); + dynamic_memory[2] = cpu_to_be32(0); + dynamic_memory[3] = cpu_to_be32(0); /* reserved */ + dynamic_memory[4] = cpu_to_be32(-1); + dynamic_memory[5] = cpu_to_be32(SPAPR_LMB_FLAGS_RESERVED | + SPAPR_LMB_FLAGS_DRC_INVALID); + } + + cur_index += SPAPR_DR_LMB_LIST_ENTRY_SIZE; + } + ret = fdt_setprop(fdt, offset, "ibm,dynamic-memory", int_buf, buf_len); + g_free(int_buf); + if (ret < 0) { + return -1; + } + return 0; +} + +/* + * Adds ibm,dynamic-reconfiguration-memory node. + * Refer to docs/specs/ppc-spapr-hotplug.txt for the documentation + * of this device tree node. + */ +static int spapr_dt_dynamic_reconfiguration_memory(SpaprMachineState *spapr, + void *fdt) +{ + MachineState *machine = MACHINE(spapr); + int ret, offset; + uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE; + uint32_t prop_lmb_size[] = {cpu_to_be32(lmb_size >> 32), + cpu_to_be32(lmb_size & 0xffffffff)}; + MemoryDeviceInfoList *dimms = NULL; + + /* + * Don't create the node if there is no device memory + */ + if (machine->ram_size == machine->maxram_size) { + return 0; + } + + offset = fdt_add_subnode(fdt, 0, "ibm,dynamic-reconfiguration-memory"); + + ret = fdt_setprop(fdt, offset, "ibm,lmb-size", prop_lmb_size, + sizeof(prop_lmb_size)); + if (ret < 0) { + return ret; + } + + ret = fdt_setprop_cell(fdt, offset, "ibm,memory-flags-mask", 0xff); + if (ret < 0) { + return ret; + } + + ret = fdt_setprop_cell(fdt, offset, "ibm,memory-preservation-time", 0x0); + if (ret < 0) { + return ret; + } + + /* ibm,dynamic-memory or ibm,dynamic-memory-v2 */ + dimms = qmp_memory_device_list(); + if (spapr_ovec_test(spapr->ov5_cas, OV5_DRMEM_V2)) { + ret = spapr_dt_dynamic_memory_v2(spapr, fdt, offset, dimms); + } else { + ret = spapr_dt_dynamic_memory(spapr, fdt, offset, dimms); + } + qapi_free_MemoryDeviceInfoList(dimms); + + if (ret < 0) { + return ret; + } + + ret = spapr_numa_write_assoc_lookup_arrays(spapr, fdt, offset); + + return ret; +} + +static int spapr_dt_memory(SpaprMachineState *spapr, void *fdt) +{ + MachineState *machine = MACHINE(spapr); + SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); + hwaddr mem_start, node_size; + int i, nb_nodes = machine->numa_state->num_nodes; + NodeInfo *nodes = machine->numa_state->nodes; + + for (i = 0, mem_start = 0; i < nb_nodes; ++i) { + if (!nodes[i].node_mem) { + continue; + } + if (mem_start >= machine->ram_size) { + node_size = 0; + } else { + node_size = nodes[i].node_mem; + if (node_size > machine->ram_size - mem_start) { + node_size = machine->ram_size - mem_start; + } + } + if (!mem_start) { + /* spapr_machine_init() checks for rma_size <= node0_size + * already */ + spapr_dt_memory_node(spapr, fdt, i, 0, spapr->rma_size); + mem_start += spapr->rma_size; + node_size -= spapr->rma_size; + } + for ( ; node_size; ) { + hwaddr sizetmp = pow2floor(node_size); + + /* mem_start != 0 here */ + if (ctzl(mem_start) < ctzl(sizetmp)) { + sizetmp = 1ULL << ctzl(mem_start); + } + + spapr_dt_memory_node(spapr, fdt, i, mem_start, sizetmp); + node_size -= sizetmp; + mem_start += sizetmp; + } + } + + /* Generate ibm,dynamic-reconfiguration-memory node if required */ + if (spapr_ovec_test(spapr->ov5_cas, OV5_DRCONF_MEMORY)) { + int ret; + + g_assert(smc->dr_lmb_enabled); + ret = spapr_dt_dynamic_reconfiguration_memory(spapr, fdt); + if (ret) { + return ret; + } + } + + return 0; +} + +static void spapr_dt_cpu(CPUState *cs, void *fdt, int offset, + SpaprMachineState *spapr) +{ + MachineState *ms = MACHINE(spapr); + PowerPCCPU *cpu = POWERPC_CPU(cs); + CPUPPCState *env = &cpu->env; + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs); + int index = spapr_get_vcpu_id(cpu); + uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40), + 0xffffffff, 0xffffffff}; + uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() + : SPAPR_TIMEBASE_FREQ; + uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000; + uint32_t page_sizes_prop[64]; + size_t page_sizes_prop_size; + unsigned int smp_threads = ms->smp.threads; + uint32_t vcpus_per_socket = smp_threads * ms->smp.cores; + uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)}; + int compat_smt = MIN(smp_threads, ppc_compat_max_vthreads(cpu)); + SpaprDrc *drc; + int drc_index; + uint32_t radix_AP_encodings[PPC_PAGE_SIZES_MAX_SZ]; + int i; + + drc = spapr_drc_by_id(TYPE_SPAPR_DRC_CPU, index); + if (drc) { + drc_index = spapr_drc_index(drc); + _FDT((fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index))); + } + + _FDT((fdt_setprop_cell(fdt, offset, "reg", index))); + _FDT((fdt_setprop_string(fdt, offset, "device_type", "cpu"))); + + _FDT((fdt_setprop_cell(fdt, offset, "cpu-version", env->spr[SPR_PVR]))); + _FDT((fdt_setprop_cell(fdt, offset, "d-cache-block-size", + env->dcache_line_size))); + _FDT((fdt_setprop_cell(fdt, offset, "d-cache-line-size", + env->dcache_line_size))); + _FDT((fdt_setprop_cell(fdt, offset, "i-cache-block-size", + env->icache_line_size))); + _FDT((fdt_setprop_cell(fdt, offset, "i-cache-line-size", + env->icache_line_size))); + + if (pcc->l1_dcache_size) { + _FDT((fdt_setprop_cell(fdt, offset, "d-cache-size", + pcc->l1_dcache_size))); + } else { + warn_report("Unknown L1 dcache size for cpu"); + } + if (pcc->l1_icache_size) { + _FDT((fdt_setprop_cell(fdt, offset, "i-cache-size", + pcc->l1_icache_size))); + } else { + warn_report("Unknown L1 icache size for cpu"); + } + + _FDT((fdt_setprop_cell(fdt, offset, "timebase-frequency", tbfreq))); + _FDT((fdt_setprop_cell(fdt, offset, "clock-frequency", cpufreq))); + _FDT((fdt_setprop_cell(fdt, offset, "slb-size", cpu->hash64_opts->slb_size))); + _FDT((fdt_setprop_cell(fdt, offset, "ibm,slb-size", cpu->hash64_opts->slb_size))); + _FDT((fdt_setprop_string(fdt, offset, "status", "okay"))); + _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0))); + + if (ppc_has_spr(cpu, SPR_PURR)) { + _FDT((fdt_setprop_cell(fdt, offset, "ibm,purr", 1))); + } + if (ppc_has_spr(cpu, SPR_PURR)) { + _FDT((fdt_setprop_cell(fdt, offset, "ibm,spurr", 1))); + } + + if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)) { + _FDT((fdt_setprop(fdt, offset, "ibm,processor-segment-sizes", + segs, sizeof(segs)))); + } + + /* Advertise VSX (vector extensions) if available + * 1 == VMX / Altivec available + * 2 == VSX available + * + * Only CPUs for which we create core types in spapr_cpu_core.c + * are possible, and all of those have VMX */ + if (spapr_get_cap(spapr, SPAPR_CAP_VSX) != 0) { + _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", 2))); + } else { + _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", 1))); + } + + /* Advertise DFP (Decimal Floating Point) if available + * 0 / no property == no DFP + * 1 == DFP available */ + if (spapr_get_cap(spapr, SPAPR_CAP_DFP) != 0) { + _FDT((fdt_setprop_cell(fdt, offset, "ibm,dfp", 1))); + } + + page_sizes_prop_size = ppc_create_page_sizes_prop(cpu, page_sizes_prop, + sizeof(page_sizes_prop)); + if (page_sizes_prop_size) { + _FDT((fdt_setprop(fdt, offset, "ibm,segment-page-sizes", + page_sizes_prop, page_sizes_prop_size))); + } + + spapr_dt_pa_features(spapr, cpu, fdt, offset); + + _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id", + cs->cpu_index / vcpus_per_socket))); + + _FDT((fdt_setprop(fdt, offset, "ibm,pft-size", + pft_size_prop, sizeof(pft_size_prop)))); + + if (ms->numa_state->num_nodes > 1) { + _FDT(spapr_numa_fixup_cpu_dt(spapr, fdt, offset, cpu)); + } + + _FDT(spapr_fixup_cpu_smt_dt(fdt, offset, cpu, compat_smt)); + + if (pcc->radix_page_info) { + for (i = 0; i < pcc->radix_page_info->count; i++) { + radix_AP_encodings[i] = + cpu_to_be32(pcc->radix_page_info->entries[i]); + } + _FDT((fdt_setprop(fdt, offset, "ibm,processor-radix-AP-encodings", + radix_AP_encodings, + pcc->radix_page_info->count * + sizeof(radix_AP_encodings[0])))); + } + + /* + * We set this property to let the guest know that it can use the large + * decrementer and its width in bits. + */ + if (spapr_get_cap(spapr, SPAPR_CAP_LARGE_DECREMENTER) != SPAPR_CAP_OFF) + _FDT((fdt_setprop_u32(fdt, offset, "ibm,dec-bits", + pcc->lrg_decr_bits))); +} + +static void spapr_dt_cpus(void *fdt, SpaprMachineState *spapr) +{ + CPUState **rev; + CPUState *cs; + int n_cpus; + int cpus_offset; + int i; + + cpus_offset = fdt_add_subnode(fdt, 0, "cpus"); + _FDT(cpus_offset); + _FDT((fdt_setprop_cell(fdt, cpus_offset, "#address-cells", 0x1))); + _FDT((fdt_setprop_cell(fdt, cpus_offset, "#size-cells", 0x0))); + + /* + * We walk the CPUs in reverse order to ensure that CPU DT nodes + * created by fdt_add_subnode() end up in the right order in FDT + * for the guest kernel the enumerate the CPUs correctly. + * + * The CPU list cannot be traversed in reverse order, so we need + * to do extra work. + */ + n_cpus = 0; + rev = NULL; + CPU_FOREACH(cs) { + rev = g_renew(CPUState *, rev, n_cpus + 1); + rev[n_cpus++] = cs; + } + + for (i = n_cpus - 1; i >= 0; i--) { + CPUState *cs = rev[i]; + PowerPCCPU *cpu = POWERPC_CPU(cs); + int index = spapr_get_vcpu_id(cpu); + DeviceClass *dc = DEVICE_GET_CLASS(cs); + g_autofree char *nodename = NULL; + int offset; + + if (!spapr_is_thread0_in_vcore(spapr, cpu)) { + continue; + } + + nodename = g_strdup_printf("%s@%x", dc->fw_name, index); + offset = fdt_add_subnode(fdt, cpus_offset, nodename); + _FDT(offset); + spapr_dt_cpu(cs, fdt, offset, spapr); + } + + g_free(rev); +} + +static int spapr_dt_rng(void *fdt) +{ + int node; + int ret; + + node = qemu_fdt_add_subnode(fdt, "/ibm,platform-facilities"); + if (node <= 0) { + return -1; + } + ret = fdt_setprop_string(fdt, node, "device_type", + "ibm,platform-facilities"); + ret |= fdt_setprop_cell(fdt, node, "#address-cells", 0x1); + ret |= fdt_setprop_cell(fdt, node, "#size-cells", 0x0); + + node = fdt_add_subnode(fdt, node, "ibm,random-v1"); + if (node <= 0) { + return -1; + } + ret |= fdt_setprop_string(fdt, node, "compatible", "ibm,random"); + + return ret ? -1 : 0; +} + +static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt) +{ + MachineState *ms = MACHINE(spapr); + int rtas; + GString *hypertas = g_string_sized_new(256); + GString *qemu_hypertas = g_string_sized_new(256); + uint64_t max_device_addr = MACHINE(spapr)->device_memory->base + + memory_region_size(&MACHINE(spapr)->device_memory->mr); + uint32_t lrdr_capacity[] = { + cpu_to_be32(max_device_addr >> 32), + cpu_to_be32(max_device_addr & 0xffffffff), + cpu_to_be32(SPAPR_MEMORY_BLOCK_SIZE >> 32), + cpu_to_be32(SPAPR_MEMORY_BLOCK_SIZE & 0xffffffff), + cpu_to_be32(ms->smp.max_cpus / ms->smp.threads), + }; + + _FDT(rtas = fdt_add_subnode(fdt, 0, "rtas")); + + /* hypertas */ + add_str(hypertas, "hcall-pft"); + add_str(hypertas, "hcall-term"); + add_str(hypertas, "hcall-dabr"); + add_str(hypertas, "hcall-interrupt"); + add_str(hypertas, "hcall-tce"); + add_str(hypertas, "hcall-vio"); + add_str(hypertas, "hcall-splpar"); + add_str(hypertas, "hcall-join"); + add_str(hypertas, "hcall-bulk"); + add_str(hypertas, "hcall-set-mode"); + add_str(hypertas, "hcall-sprg0"); + add_str(hypertas, "hcall-copy"); + add_str(hypertas, "hcall-debug"); + add_str(hypertas, "hcall-vphn"); + if (spapr_get_cap(spapr, SPAPR_CAP_RPT_INVALIDATE) == SPAPR_CAP_ON) { + add_str(hypertas, "hcall-rpt-invalidate"); + } + + add_str(qemu_hypertas, "hcall-memop1"); + + if (!kvm_enabled() || kvmppc_spapr_use_multitce()) { + add_str(hypertas, "hcall-multi-tce"); + } + + if (spapr->resize_hpt != SPAPR_RESIZE_HPT_DISABLED) { + add_str(hypertas, "hcall-hpt-resize"); + } + + _FDT(fdt_setprop(fdt, rtas, "ibm,hypertas-functions", + hypertas->str, hypertas->len)); + g_string_free(hypertas, TRUE); + _FDT(fdt_setprop(fdt, rtas, "qemu,hypertas-functions", + qemu_hypertas->str, qemu_hypertas->len)); + g_string_free(qemu_hypertas, TRUE); + + spapr_numa_write_rtas_dt(spapr, fdt, rtas); + + /* + * FWNMI reserves RTAS_ERROR_LOG_MAX for the machine check error log, + * and 16 bytes per CPU for system reset error log plus an extra 8 bytes. + * + * The system reset requirements are driven by existing Linux and PowerVM + * implementation which (contrary to PAPR) saves r3 in the error log + * structure like machine check, so Linux expects to find the saved r3 + * value at the address in r3 upon FWNMI-enabled sreset interrupt (and + * does not look at the error value). + * + * System reset interrupts are not subject to interlock like machine + * check, so this memory area could be corrupted if the sreset is + * interrupted by a machine check (or vice versa) if it was shared. To + * prevent this, system reset uses per-CPU areas for the sreset save + * area. A system reset that interrupts a system reset handler could + * still overwrite this area, but Linux doesn't try to recover in that + * case anyway. + * + * The extra 8 bytes is required because Linux's FWNMI error log check + * is off-by-one. + * + * RTAS_MIN_SIZE is required for the RTAS blob itself. + */ + _FDT(fdt_setprop_cell(fdt, rtas, "rtas-size", RTAS_MIN_SIZE + + RTAS_ERROR_LOG_MAX + + ms->smp.max_cpus * sizeof(uint64_t) * 2 + + sizeof(uint64_t))); + _FDT(fdt_setprop_cell(fdt, rtas, "rtas-error-log-max", + RTAS_ERROR_LOG_MAX)); + _FDT(fdt_setprop_cell(fdt, rtas, "rtas-event-scan-rate", + RTAS_EVENT_SCAN_RATE)); + + g_assert(msi_nonbroken); + _FDT(fdt_setprop(fdt, rtas, "ibm,change-msix-capable", NULL, 0)); + + /* + * According to PAPR, rtas ibm,os-term does not guarantee a return + * back to the guest cpu. + * + * While an additional ibm,extended-os-term property indicates + * that rtas call return will always occur. Set this property. + */ + _FDT(fdt_setprop(fdt, rtas, "ibm,extended-os-term", NULL, 0)); + + _FDT(fdt_setprop(fdt, rtas, "ibm,lrdr-capacity", + lrdr_capacity, sizeof(lrdr_capacity))); + + spapr_dt_rtas_tokens(fdt, rtas); +} + +/* + * Prepare ibm,arch-vec-5-platform-support, which indicates the MMU + * and the XIVE features that the guest may request and thus the valid + * values for bytes 23..26 of option vector 5: + */ +static void spapr_dt_ov5_platform_support(SpaprMachineState *spapr, void *fdt, + int chosen) +{ + PowerPCCPU *first_ppc_cpu = POWERPC_CPU(first_cpu); + + char val[2 * 4] = { + 23, 0x00, /* XICS / XIVE mode */ + 24, 0x00, /* Hash/Radix, filled in below. */ + 25, 0x00, /* Hash options: Segment Tables == no, GTSE == no. */ + 26, 0x40, /* Radix options: GTSE == yes. */ + }; + + if (spapr->irq->xics && spapr->irq->xive) { + val[1] = SPAPR_OV5_XIVE_BOTH; + } else if (spapr->irq->xive) { + val[1] = SPAPR_OV5_XIVE_EXPLOIT; + } else { + assert(spapr->irq->xics); + val[1] = SPAPR_OV5_XIVE_LEGACY; + } + + if (!ppc_check_compat(first_ppc_cpu, CPU_POWERPC_LOGICAL_3_00, 0, + first_ppc_cpu->compat_pvr)) { + /* + * If we're in a pre POWER9 compat mode then the guest should + * do hash and use the legacy interrupt mode + */ + val[1] = SPAPR_OV5_XIVE_LEGACY; /* XICS */ + val[3] = 0x00; /* Hash */ + spapr_check_mmu_mode(false); + } else if (kvm_enabled()) { + if (kvmppc_has_cap_mmu_radix() && kvmppc_has_cap_mmu_hash_v3()) { + val[3] = 0x80; /* OV5_MMU_BOTH */ + } else if (kvmppc_has_cap_mmu_radix()) { + val[3] = 0x40; /* OV5_MMU_RADIX_300 */ + } else { + val[3] = 0x00; /* Hash */ + } + } else { + /* V3 MMU supports both hash and radix in tcg (with dynamic switching) */ + val[3] = 0xC0; + } + _FDT(fdt_setprop(fdt, chosen, "ibm,arch-vec-5-platform-support", + val, sizeof(val))); +} + +static void spapr_dt_chosen(SpaprMachineState *spapr, void *fdt, bool reset) +{ + MachineState *machine = MACHINE(spapr); + SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine); + int chosen; + + _FDT(chosen = fdt_add_subnode(fdt, 0, "chosen")); + + if (reset) { + const char *boot_device = spapr->boot_device; + char *stdout_path = spapr_vio_stdout_path(spapr->vio_bus); + size_t cb = 0; + char *bootlist = get_boot_devices_list(&cb); + + if (machine->kernel_cmdline && machine->kernel_cmdline[0]) { + _FDT(fdt_setprop_string(fdt, chosen, "bootargs", + machine->kernel_cmdline)); + } + + if (spapr->initrd_size) { + _FDT(fdt_setprop_cell(fdt, chosen, "linux,initrd-start", + spapr->initrd_base)); + _FDT(fdt_setprop_cell(fdt, chosen, "linux,initrd-end", + spapr->initrd_base + spapr->initrd_size)); + } + + if (spapr->kernel_size) { + uint64_t kprop[2] = { cpu_to_be64(spapr->kernel_addr), + cpu_to_be64(spapr->kernel_size) }; + + _FDT(fdt_setprop(fdt, chosen, "qemu,boot-kernel", + &kprop, sizeof(kprop))); + if (spapr->kernel_le) { + _FDT(fdt_setprop(fdt, chosen, "qemu,boot-kernel-le", NULL, 0)); + } + } + if (boot_menu) { + _FDT((fdt_setprop_cell(fdt, chosen, "qemu,boot-menu", boot_menu))); + } + _FDT(fdt_setprop_cell(fdt, chosen, "qemu,graphic-width", graphic_width)); + _FDT(fdt_setprop_cell(fdt, chosen, "qemu,graphic-height", graphic_height)); + _FDT(fdt_setprop_cell(fdt, chosen, "qemu,graphic-depth", graphic_depth)); + + if (cb && bootlist) { + int i; + + for (i = 0; i < cb; i++) { + if (bootlist[i] == '\n') { + bootlist[i] = ' '; + } + } + _FDT(fdt_setprop_string(fdt, chosen, "qemu,boot-list", bootlist)); + } + + if (boot_device && strlen(boot_device)) { + _FDT(fdt_setprop_string(fdt, chosen, "qemu,boot-device", boot_device)); + } + + if (!spapr->has_graphics && stdout_path) { + /* + * "linux,stdout-path" and "stdout" properties are + * deprecated by linux kernel. New platforms should only + * use the "stdout-path" property. Set the new property + * and continue using older property to remain compatible + * with the existing firmware. + */ + _FDT(fdt_setprop_string(fdt, chosen, "linux,stdout-path", stdout_path)); + _FDT(fdt_setprop_string(fdt, chosen, "stdout-path", stdout_path)); + } + + /* + * We can deal with BAR reallocation just fine, advertise it + * to the guest + */ + if (smc->linux_pci_probe) { + _FDT(fdt_setprop_cell(fdt, chosen, "linux,pci-probe-only", 0)); + } + + spapr_dt_ov5_platform_support(spapr, fdt, chosen); + + g_free(stdout_path); + g_free(bootlist); + } + + _FDT(spapr_dt_ovec(fdt, chosen, spapr->ov5_cas, "ibm,architecture-vec-5")); +} + +static void spapr_dt_hypervisor(SpaprMachineState *spapr, void *fdt) +{ + /* The /hypervisor node isn't in PAPR - this is a hack to allow PR + * KVM to work under pHyp with some guest co-operation */ + int hypervisor; + uint8_t hypercall[16]; + + _FDT(hypervisor = fdt_add_subnode(fdt, 0, "hypervisor")); + /* indicate KVM hypercall interface */ + _FDT(fdt_setprop_string(fdt, hypervisor, "compatible", "linux,kvm")); + if (kvmppc_has_cap_fixup_hcalls()) { + /* + * Older KVM versions with older guest kernels were broken + * with the magic page, don't allow the guest to map it. + */ + if (!kvmppc_get_hypercall(first_cpu->env_ptr, hypercall, + sizeof(hypercall))) { + _FDT(fdt_setprop(fdt, hypervisor, "hcall-instructions", + hypercall, sizeof(hypercall))); + } + } +} + +void *spapr_build_fdt(SpaprMachineState *spapr, bool reset, size_t space) +{ + MachineState *machine = MACHINE(spapr); + MachineClass *mc = MACHINE_GET_CLASS(machine); + SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine); + uint32_t root_drc_type_mask = 0; + int ret; + void *fdt; + SpaprPhbState *phb; + char *buf; + + fdt = g_malloc0(space); + _FDT((fdt_create_empty_tree(fdt, space))); + + /* Root node */ + _FDT(fdt_setprop_string(fdt, 0, "device_type", "chrp")); + _FDT(fdt_setprop_string(fdt, 0, "model", "IBM pSeries (emulated by qemu)")); + _FDT(fdt_setprop_string(fdt, 0, "compatible", "qemu,pseries")); + + /* Guest UUID & Name*/ + buf = qemu_uuid_unparse_strdup(&qemu_uuid); + _FDT(fdt_setprop_string(fdt, 0, "vm,uuid", buf)); + if (qemu_uuid_set) { + _FDT(fdt_setprop_string(fdt, 0, "system-id", buf)); + } + g_free(buf); + + if (qemu_get_vm_name()) { + _FDT(fdt_setprop_string(fdt, 0, "ibm,partition-name", + qemu_get_vm_name())); + } + + /* Host Model & Serial Number */ + if (spapr->host_model) { + _FDT(fdt_setprop_string(fdt, 0, "host-model", spapr->host_model)); + } else if (smc->broken_host_serial_model && kvmppc_get_host_model(&buf)) { + _FDT(fdt_setprop_string(fdt, 0, "host-model", buf)); + g_free(buf); + } + + if (spapr->host_serial) { + _FDT(fdt_setprop_string(fdt, 0, "host-serial", spapr->host_serial)); + } else if (smc->broken_host_serial_model && kvmppc_get_host_serial(&buf)) { + _FDT(fdt_setprop_string(fdt, 0, "host-serial", buf)); + g_free(buf); + } + + _FDT(fdt_setprop_cell(fdt, 0, "#address-cells", 2)); + _FDT(fdt_setprop_cell(fdt, 0, "#size-cells", 2)); + + /* /interrupt controller */ + spapr_irq_dt(spapr, spapr_max_server_number(spapr), fdt, PHANDLE_INTC); + + ret = spapr_dt_memory(spapr, fdt); + if (ret < 0) { + error_report("couldn't setup memory nodes in fdt"); + exit(1); + } + + /* /vdevice */ + spapr_dt_vdevice(spapr->vio_bus, fdt); + + if (object_resolve_path_type("", TYPE_SPAPR_RNG, NULL)) { + ret = spapr_dt_rng(fdt); + if (ret < 0) { + error_report("could not set up rng device in the fdt"); + exit(1); + } + } + + QLIST_FOREACH(phb, &spapr->phbs, list) { + ret = spapr_dt_phb(spapr, phb, PHANDLE_INTC, fdt, NULL); + if (ret < 0) { + error_report("couldn't setup PCI devices in fdt"); + exit(1); + } + } + + spapr_dt_cpus(fdt, spapr); + + /* ibm,drc-indexes and friends */ + if (smc->dr_lmb_enabled) { + root_drc_type_mask |= SPAPR_DR_CONNECTOR_TYPE_LMB; + } + if (smc->dr_phb_enabled) { + root_drc_type_mask |= SPAPR_DR_CONNECTOR_TYPE_PHB; + } + if (mc->nvdimm_supported) { + root_drc_type_mask |= SPAPR_DR_CONNECTOR_TYPE_PMEM; + } + if (root_drc_type_mask) { + _FDT(spapr_dt_drc(fdt, 0, NULL, root_drc_type_mask)); + } + + if (mc->has_hotpluggable_cpus) { + int offset = fdt_path_offset(fdt, "/cpus"); + ret = spapr_dt_drc(fdt, offset, NULL, SPAPR_DR_CONNECTOR_TYPE_CPU); + if (ret < 0) { + error_report("Couldn't set up CPU DR device tree properties"); + exit(1); + } + } + + /* /event-sources */ + spapr_dt_events(spapr, fdt); + + /* /rtas */ + spapr_dt_rtas(spapr, fdt); + + /* /chosen */ + spapr_dt_chosen(spapr, fdt, reset); + + /* /hypervisor */ + if (kvm_enabled()) { + spapr_dt_hypervisor(spapr, fdt); + } + + /* Build memory reserve map */ + if (reset) { + if (spapr->kernel_size) { + _FDT((fdt_add_mem_rsv(fdt, spapr->kernel_addr, + spapr->kernel_size))); + } + if (spapr->initrd_size) { + _FDT((fdt_add_mem_rsv(fdt, spapr->initrd_base, + spapr->initrd_size))); + } + } + + /* NVDIMM devices */ + if (mc->nvdimm_supported) { + spapr_dt_persistent_memory(spapr, fdt); + } + + return fdt; +} + +static uint64_t translate_kernel_address(void *opaque, uint64_t addr) +{ + SpaprMachineState *spapr = opaque; + + return (addr & 0x0fffffff) + spapr->kernel_addr; +} + +static void emulate_spapr_hypercall(PPCVirtualHypervisor *vhyp, + PowerPCCPU *cpu) +{ + CPUPPCState *env = &cpu->env; + + /* The TCG path should also be holding the BQL at this point */ + g_assert(qemu_mutex_iothread_locked()); + + if (msr_pr) { + hcall_dprintf("Hypercall made with MSR[PR]=1\n"); + env->gpr[3] = H_PRIVILEGE; + } else { + env->gpr[3] = spapr_hypercall(cpu, env->gpr[3], &env->gpr[4]); + } +} + +struct LPCRSyncState { + target_ulong value; + target_ulong mask; +}; + +static void do_lpcr_sync(CPUState *cs, run_on_cpu_data arg) +{ + struct LPCRSyncState *s = arg.host_ptr; + PowerPCCPU *cpu = POWERPC_CPU(cs); + CPUPPCState *env = &cpu->env; + target_ulong lpcr; + + cpu_synchronize_state(cs); + lpcr = env->spr[SPR_LPCR]; + lpcr &= ~s->mask; + lpcr |= s->value; + ppc_store_lpcr(cpu, lpcr); +} + +void spapr_set_all_lpcrs(target_ulong value, target_ulong mask) +{ + CPUState *cs; + struct LPCRSyncState s = { + .value = value, + .mask = mask + }; + CPU_FOREACH(cs) { + run_on_cpu(cs, do_lpcr_sync, RUN_ON_CPU_HOST_PTR(&s)); + } +} + +static void spapr_get_pate(PPCVirtualHypervisor *vhyp, ppc_v3_pate_t *entry) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(vhyp); + + /* Copy PATE1:GR into PATE0:HR */ + entry->dw0 = spapr->patb_entry & PATE0_HR; + entry->dw1 = spapr->patb_entry; +} + +#define HPTE(_table, _i) (void *)(((uint64_t *)(_table)) + ((_i) * 2)) +#define HPTE_VALID(_hpte) (tswap64(*((uint64_t *)(_hpte))) & HPTE64_V_VALID) +#define HPTE_DIRTY(_hpte) (tswap64(*((uint64_t *)(_hpte))) & HPTE64_V_HPTE_DIRTY) +#define CLEAN_HPTE(_hpte) ((*(uint64_t *)(_hpte)) &= tswap64(~HPTE64_V_HPTE_DIRTY)) +#define DIRTY_HPTE(_hpte) ((*(uint64_t *)(_hpte)) |= tswap64(HPTE64_V_HPTE_DIRTY)) + +/* + * Get the fd to access the kernel htab, re-opening it if necessary + */ +static int get_htab_fd(SpaprMachineState *spapr) +{ + Error *local_err = NULL; + + if (spapr->htab_fd >= 0) { + return spapr->htab_fd; + } + + spapr->htab_fd = kvmppc_get_htab_fd(false, 0, &local_err); + if (spapr->htab_fd < 0) { + error_report_err(local_err); + } + + return spapr->htab_fd; +} + +void close_htab_fd(SpaprMachineState *spapr) +{ + if (spapr->htab_fd >= 0) { + close(spapr->htab_fd); + } + spapr->htab_fd = -1; +} + +static hwaddr spapr_hpt_mask(PPCVirtualHypervisor *vhyp) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(vhyp); + + return HTAB_SIZE(spapr) / HASH_PTEG_SIZE_64 - 1; +} + +static target_ulong spapr_encode_hpt_for_kvm_pr(PPCVirtualHypervisor *vhyp) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(vhyp); + + assert(kvm_enabled()); + + if (!spapr->htab) { + return 0; + } + + return (target_ulong)(uintptr_t)spapr->htab | (spapr->htab_shift - 18); +} + +static const ppc_hash_pte64_t *spapr_map_hptes(PPCVirtualHypervisor *vhyp, + hwaddr ptex, int n) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(vhyp); + hwaddr pte_offset = ptex * HASH_PTE_SIZE_64; + + if (!spapr->htab) { + /* + * HTAB is controlled by KVM. Fetch into temporary buffer + */ + ppc_hash_pte64_t *hptes = g_malloc(n * HASH_PTE_SIZE_64); + kvmppc_read_hptes(hptes, ptex, n); + return hptes; + } + + /* + * HTAB is controlled by QEMU. Just point to the internally + * accessible PTEG. + */ + return (const ppc_hash_pte64_t *)(spapr->htab + pte_offset); +} + +static void spapr_unmap_hptes(PPCVirtualHypervisor *vhyp, + const ppc_hash_pte64_t *hptes, + hwaddr ptex, int n) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(vhyp); + + if (!spapr->htab) { + g_free((void *)hptes); + } + + /* Nothing to do for qemu managed HPT */ +} + +void spapr_store_hpte(PowerPCCPU *cpu, hwaddr ptex, + uint64_t pte0, uint64_t pte1) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(cpu->vhyp); + hwaddr offset = ptex * HASH_PTE_SIZE_64; + + if (!spapr->htab) { + kvmppc_write_hpte(ptex, pte0, pte1); + } else { + if (pte0 & HPTE64_V_VALID) { + stq_p(spapr->htab + offset + HPTE64_DW1, pte1); + /* + * When setting valid, we write PTE1 first. This ensures + * proper synchronization with the reading code in + * ppc_hash64_pteg_search() + */ + smp_wmb(); + stq_p(spapr->htab + offset, pte0); + } else { + stq_p(spapr->htab + offset, pte0); + /* + * When clearing it we set PTE0 first. This ensures proper + * synchronization with the reading code in + * ppc_hash64_pteg_search() + */ + smp_wmb(); + stq_p(spapr->htab + offset + HPTE64_DW1, pte1); + } + } +} + +static void spapr_hpte_set_c(PPCVirtualHypervisor *vhyp, hwaddr ptex, + uint64_t pte1) +{ + hwaddr offset = ptex * HASH_PTE_SIZE_64 + HPTE64_DW1_C; + SpaprMachineState *spapr = SPAPR_MACHINE(vhyp); + + if (!spapr->htab) { + /* There should always be a hash table when this is called */ + error_report("spapr_hpte_set_c called with no hash table !"); + return; + } + + /* The HW performs a non-atomic byte update */ + stb_p(spapr->htab + offset, (pte1 & 0xff) | 0x80); +} + +static void spapr_hpte_set_r(PPCVirtualHypervisor *vhyp, hwaddr ptex, + uint64_t pte1) +{ + hwaddr offset = ptex * HASH_PTE_SIZE_64 + HPTE64_DW1_R; + SpaprMachineState *spapr = SPAPR_MACHINE(vhyp); + + if (!spapr->htab) { + /* There should always be a hash table when this is called */ + error_report("spapr_hpte_set_r called with no hash table !"); + return; + } + + /* The HW performs a non-atomic byte update */ + stb_p(spapr->htab + offset, ((pte1 >> 8) & 0xff) | 0x01); +} + +int spapr_hpt_shift_for_ramsize(uint64_t ramsize) +{ + int shift; + + /* We aim for a hash table of size 1/128 the size of RAM (rounded + * up). The PAPR recommendation is actually 1/64 of RAM size, but + * that's much more than is needed for Linux guests */ + shift = ctz64(pow2ceil(ramsize)) - 7; + shift = MAX(shift, 18); /* Minimum architected size */ + shift = MIN(shift, 46); /* Maximum architected size */ + return shift; +} + +void spapr_free_hpt(SpaprMachineState *spapr) +{ + g_free(spapr->htab); + spapr->htab = NULL; + spapr->htab_shift = 0; + close_htab_fd(spapr); +} + +int spapr_reallocate_hpt(SpaprMachineState *spapr, int shift, Error **errp) +{ + ERRP_GUARD(); + long rc; + + /* Clean up any HPT info from a previous boot */ + spapr_free_hpt(spapr); + + rc = kvmppc_reset_htab(shift); + + if (rc == -EOPNOTSUPP) { + error_setg(errp, "HPT not supported in nested guests"); + return -EOPNOTSUPP; + } + + if (rc < 0) { + /* kernel-side HPT needed, but couldn't allocate one */ + error_setg_errno(errp, errno, "Failed to allocate KVM HPT of order %d", + shift); + error_append_hint(errp, "Try smaller maxmem?\n"); + return -errno; + } else if (rc > 0) { + /* kernel-side HPT allocated */ + if (rc != shift) { + error_setg(errp, + "Requested order %d HPT, but kernel allocated order %ld", + shift, rc); + error_append_hint(errp, "Try smaller maxmem?\n"); + return -ENOSPC; + } + + spapr->htab_shift = shift; + spapr->htab = NULL; + } else { + /* kernel-side HPT not needed, allocate in userspace instead */ + size_t size = 1ULL << shift; + int i; + + spapr->htab = qemu_memalign(size, size); + memset(spapr->htab, 0, size); + spapr->htab_shift = shift; + + for (i = 0; i < size / HASH_PTE_SIZE_64; i++) { + DIRTY_HPTE(HPTE(spapr->htab, i)); + } + } + /* We're setting up a hash table, so that means we're not radix */ + spapr->patb_entry = 0; + spapr_set_all_lpcrs(0, LPCR_HR | LPCR_UPRT); + return 0; +} + +void spapr_setup_hpt(SpaprMachineState *spapr) +{ + int hpt_shift; + + if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DISABLED) { + hpt_shift = spapr_hpt_shift_for_ramsize(MACHINE(spapr)->maxram_size); + } else { + uint64_t current_ram_size; + + current_ram_size = MACHINE(spapr)->ram_size + get_plugged_memory_size(); + hpt_shift = spapr_hpt_shift_for_ramsize(current_ram_size); + } + spapr_reallocate_hpt(spapr, hpt_shift, &error_fatal); + + if (kvm_enabled()) { + hwaddr vrma_limit = kvmppc_vrma_limit(spapr->htab_shift); + + /* Check our RMA fits in the possible VRMA */ + if (vrma_limit < spapr->rma_size) { + error_report("Unable to create %" HWADDR_PRIu + "MiB RMA (VRMA only allows %" HWADDR_PRIu "MiB", + spapr->rma_size / MiB, vrma_limit / MiB); + exit(EXIT_FAILURE); + } + } +} + +void spapr_check_mmu_mode(bool guest_radix) +{ + if (guest_radix) { + if (kvm_enabled() && !kvmppc_has_cap_mmu_radix()) { + error_report("Guest requested unavailable MMU mode (radix)."); + exit(EXIT_FAILURE); + } + } else { + if (kvm_enabled() && kvmppc_has_cap_mmu_radix() + && !kvmppc_has_cap_mmu_hash_v3()) { + error_report("Guest requested unavailable MMU mode (hash)."); + exit(EXIT_FAILURE); + } + } +} + +static void spapr_machine_reset(MachineState *machine) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(machine); + PowerPCCPU *first_ppc_cpu; + hwaddr fdt_addr; + void *fdt; + int rc; + + pef_kvm_reset(machine->cgs, &error_fatal); + spapr_caps_apply(spapr); + + first_ppc_cpu = POWERPC_CPU(first_cpu); + if (kvm_enabled() && kvmppc_has_cap_mmu_radix() && + ppc_type_check_compat(machine->cpu_type, CPU_POWERPC_LOGICAL_3_00, 0, + spapr->max_compat_pvr)) { + /* + * If using KVM with radix mode available, VCPUs can be started + * without a HPT because KVM will start them in radix mode. + * Set the GR bit in PATE so that we know there is no HPT. + */ + spapr->patb_entry = PATE1_GR; + spapr_set_all_lpcrs(LPCR_HR | LPCR_UPRT, LPCR_HR | LPCR_UPRT); + } else { + spapr_setup_hpt(spapr); + } + + qemu_devices_reset(); + + spapr_ovec_cleanup(spapr->ov5_cas); + spapr->ov5_cas = spapr_ovec_new(); + + ppc_set_compat_all(spapr->max_compat_pvr, &error_fatal); + + /* + * This is fixing some of the default configuration of the XIVE + * devices. To be called after the reset of the machine devices. + */ + spapr_irq_reset(spapr, &error_fatal); + + /* + * There is no CAS under qtest. Simulate one to please the code that + * depends on spapr->ov5_cas. This is especially needed to test device + * unplug, so we do that before resetting the DRCs. + */ + if (qtest_enabled()) { + spapr_ovec_cleanup(spapr->ov5_cas); + spapr->ov5_cas = spapr_ovec_clone(spapr->ov5); + } + + /* DRC reset may cause a device to be unplugged. This will cause troubles + * if this device is used by another device (eg, a running vhost backend + * will crash QEMU if the DIMM holding the vring goes away). To avoid such + * situations, we reset DRCs after all devices have been reset. + */ + spapr_drc_reset_all(spapr); + + spapr_clear_pending_events(spapr); + + /* + * We place the device tree just below either the top of the RMA, + * or just below 2GB, whichever is lower, so that it can be + * processed with 32-bit real mode code if necessary + */ + fdt_addr = MIN(spapr->rma_size, FDT_MAX_ADDR) - FDT_MAX_SIZE; + + fdt = spapr_build_fdt(spapr, true, FDT_MAX_SIZE); + if (spapr->vof) { + spapr_vof_reset(spapr, fdt, &error_fatal); + /* + * Do not pack the FDT as the client may change properties. + * VOF client does not expect the FDT so we do not load it to the VM. + */ + } else { + rc = fdt_pack(fdt); + /* Should only fail if we've built a corrupted tree */ + assert(rc == 0); + + spapr_cpu_set_entry_state(first_ppc_cpu, SPAPR_ENTRY_POINT, + 0, fdt_addr, 0); + cpu_physical_memory_write(fdt_addr, fdt, fdt_totalsize(fdt)); + } + qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt)); + + g_free(spapr->fdt_blob); + spapr->fdt_size = fdt_totalsize(fdt); + spapr->fdt_initial_size = spapr->fdt_size; + spapr->fdt_blob = fdt; + + /* Set up the entry state */ + first_ppc_cpu->env.gpr[5] = 0; + + spapr->fwnmi_system_reset_addr = -1; + spapr->fwnmi_machine_check_addr = -1; + spapr->fwnmi_machine_check_interlock = -1; + + /* Signal all vCPUs waiting on this condition */ + qemu_cond_broadcast(&spapr->fwnmi_machine_check_interlock_cond); + + migrate_del_blocker(spapr->fwnmi_migration_blocker); +} + +static void spapr_create_nvram(SpaprMachineState *spapr) +{ + DeviceState *dev = qdev_new("spapr-nvram"); + DriveInfo *dinfo = drive_get(IF_PFLASH, 0, 0); + + if (dinfo) { + qdev_prop_set_drive_err(dev, "drive", blk_by_legacy_dinfo(dinfo), + &error_fatal); + } + + qdev_realize_and_unref(dev, &spapr->vio_bus->bus, &error_fatal); + + spapr->nvram = (struct SpaprNvram *)dev; +} + +static void spapr_rtc_create(SpaprMachineState *spapr) +{ + object_initialize_child_with_props(OBJECT(spapr), "rtc", &spapr->rtc, + sizeof(spapr->rtc), TYPE_SPAPR_RTC, + &error_fatal, NULL); + qdev_realize(DEVICE(&spapr->rtc), NULL, &error_fatal); + object_property_add_alias(OBJECT(spapr), "rtc-time", OBJECT(&spapr->rtc), + "date"); +} + +/* Returns whether we want to use VGA or not */ +static bool spapr_vga_init(PCIBus *pci_bus, Error **errp) +{ + switch (vga_interface_type) { + case VGA_NONE: + return false; + case VGA_DEVICE: + return true; + case VGA_STD: + case VGA_VIRTIO: + case VGA_CIRRUS: + return pci_vga_init(pci_bus) != NULL; + default: + error_setg(errp, + "Unsupported VGA mode, only -vga std or -vga virtio is supported"); + return false; + } +} + +static int spapr_pre_load(void *opaque) +{ + int rc; + + rc = spapr_caps_pre_load(opaque); + if (rc) { + return rc; + } + + return 0; +} + +static int spapr_post_load(void *opaque, int version_id) +{ + SpaprMachineState *spapr = (SpaprMachineState *)opaque; + int err = 0; + + err = spapr_caps_post_migration(spapr); + if (err) { + return err; + } + + /* + * In earlier versions, there was no separate qdev for the PAPR + * RTC, so the RTC offset was stored directly in sPAPREnvironment. + * So when migrating from those versions, poke the incoming offset + * value into the RTC device + */ + if (version_id < 3) { + err = spapr_rtc_import_offset(&spapr->rtc, spapr->rtc_offset); + if (err) { + return err; + } + } + + if (kvm_enabled() && spapr->patb_entry) { + PowerPCCPU *cpu = POWERPC_CPU(first_cpu); + bool radix = !!(spapr->patb_entry & PATE1_GR); + bool gtse = !!(cpu->env.spr[SPR_LPCR] & LPCR_GTSE); + + /* + * Update LPCR:HR and UPRT as they may not be set properly in + * the stream + */ + spapr_set_all_lpcrs(radix ? (LPCR_HR | LPCR_UPRT) : 0, + LPCR_HR | LPCR_UPRT); + + err = kvmppc_configure_v3_mmu(cpu, radix, gtse, spapr->patb_entry); + if (err) { + error_report("Process table config unsupported by the host"); + return -EINVAL; + } + } + + err = spapr_irq_post_load(spapr, version_id); + if (err) { + return err; + } + + return err; +} + +static int spapr_pre_save(void *opaque) +{ + int rc; + + rc = spapr_caps_pre_save(opaque); + if (rc) { + return rc; + } + + return 0; +} + +static bool version_before_3(void *opaque, int version_id) +{ + return version_id < 3; +} + +static bool spapr_pending_events_needed(void *opaque) +{ + SpaprMachineState *spapr = (SpaprMachineState *)opaque; + return !QTAILQ_EMPTY(&spapr->pending_events); +} + +static const VMStateDescription vmstate_spapr_event_entry = { + .name = "spapr_event_log_entry", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32(summary, SpaprEventLogEntry), + VMSTATE_UINT32(extended_length, SpaprEventLogEntry), + VMSTATE_VBUFFER_ALLOC_UINT32(extended_log, SpaprEventLogEntry, 0, + NULL, extended_length), + VMSTATE_END_OF_LIST() + }, +}; + +static const VMStateDescription vmstate_spapr_pending_events = { + .name = "spapr_pending_events", + .version_id = 1, + .minimum_version_id = 1, + .needed = spapr_pending_events_needed, + .fields = (VMStateField[]) { + VMSTATE_QTAILQ_V(pending_events, SpaprMachineState, 1, + vmstate_spapr_event_entry, SpaprEventLogEntry, next), + VMSTATE_END_OF_LIST() + }, +}; + +static bool spapr_ov5_cas_needed(void *opaque) +{ + SpaprMachineState *spapr = opaque; + SpaprOptionVector *ov5_mask = spapr_ovec_new(); + bool cas_needed; + + /* Prior to the introduction of SpaprOptionVector, we had two option + * vectors we dealt with: OV5_FORM1_AFFINITY, and OV5_DRCONF_MEMORY. + * Both of these options encode machine topology into the device-tree + * in such a way that the now-booted OS should still be able to interact + * appropriately with QEMU regardless of what options were actually + * negotiatied on the source side. + * + * As such, we can avoid migrating the CAS-negotiated options if these + * are the only options available on the current machine/platform. + * Since these are the only options available for pseries-2.7 and + * earlier, this allows us to maintain old->new/new->old migration + * compatibility. + * + * For QEMU 2.8+, there are additional CAS-negotiatable options available + * via default pseries-2.8 machines and explicit command-line parameters. + * Some of these options, like OV5_HP_EVT, *do* require QEMU to be aware + * of the actual CAS-negotiated values to continue working properly. For + * example, availability of memory unplug depends on knowing whether + * OV5_HP_EVT was negotiated via CAS. + * + * Thus, for any cases where the set of available CAS-negotiatable + * options extends beyond OV5_FORM1_AFFINITY and OV5_DRCONF_MEMORY, we + * include the CAS-negotiated options in the migration stream, unless + * if they affect boot time behaviour only. + */ + spapr_ovec_set(ov5_mask, OV5_FORM1_AFFINITY); + spapr_ovec_set(ov5_mask, OV5_DRCONF_MEMORY); + spapr_ovec_set(ov5_mask, OV5_DRMEM_V2); + + /* We need extra information if we have any bits outside the mask + * defined above */ + cas_needed = !spapr_ovec_subset(spapr->ov5, ov5_mask); + + spapr_ovec_cleanup(ov5_mask); + + return cas_needed; +} + +static const VMStateDescription vmstate_spapr_ov5_cas = { + .name = "spapr_option_vector_ov5_cas", + .version_id = 1, + .minimum_version_id = 1, + .needed = spapr_ov5_cas_needed, + .fields = (VMStateField[]) { + VMSTATE_STRUCT_POINTER_V(ov5_cas, SpaprMachineState, 1, + vmstate_spapr_ovec, SpaprOptionVector), + VMSTATE_END_OF_LIST() + }, +}; + +static bool spapr_patb_entry_needed(void *opaque) +{ + SpaprMachineState *spapr = opaque; + + return !!spapr->patb_entry; +} + +static const VMStateDescription vmstate_spapr_patb_entry = { + .name = "spapr_patb_entry", + .version_id = 1, + .minimum_version_id = 1, + .needed = spapr_patb_entry_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT64(patb_entry, SpaprMachineState), + VMSTATE_END_OF_LIST() + }, +}; + +static bool spapr_irq_map_needed(void *opaque) +{ + SpaprMachineState *spapr = opaque; + + return spapr->irq_map && !bitmap_empty(spapr->irq_map, spapr->irq_map_nr); +} + +static const VMStateDescription vmstate_spapr_irq_map = { + .name = "spapr_irq_map", + .version_id = 1, + .minimum_version_id = 1, + .needed = spapr_irq_map_needed, + .fields = (VMStateField[]) { + VMSTATE_BITMAP(irq_map, SpaprMachineState, 0, irq_map_nr), + VMSTATE_END_OF_LIST() + }, +}; + +static bool spapr_dtb_needed(void *opaque) +{ + SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(opaque); + + return smc->update_dt_enabled; +} + +static int spapr_dtb_pre_load(void *opaque) +{ + SpaprMachineState *spapr = (SpaprMachineState *)opaque; + + g_free(spapr->fdt_blob); + spapr->fdt_blob = NULL; + spapr->fdt_size = 0; + + return 0; +} + +static const VMStateDescription vmstate_spapr_dtb = { + .name = "spapr_dtb", + .version_id = 1, + .minimum_version_id = 1, + .needed = spapr_dtb_needed, + .pre_load = spapr_dtb_pre_load, + .fields = (VMStateField[]) { + VMSTATE_UINT32(fdt_initial_size, SpaprMachineState), + VMSTATE_UINT32(fdt_size, SpaprMachineState), + VMSTATE_VBUFFER_ALLOC_UINT32(fdt_blob, SpaprMachineState, 0, NULL, + fdt_size), + VMSTATE_END_OF_LIST() + }, +}; + +static bool spapr_fwnmi_needed(void *opaque) +{ + SpaprMachineState *spapr = (SpaprMachineState *)opaque; + + return spapr->fwnmi_machine_check_addr != -1; +} + +static int spapr_fwnmi_pre_save(void *opaque) +{ + SpaprMachineState *spapr = (SpaprMachineState *)opaque; + + /* + * Check if machine check handling is in progress and print a + * warning message. + */ + if (spapr->fwnmi_machine_check_interlock != -1) { + warn_report("A machine check is being handled during migration. The" + "handler may run and log hardware error on the destination"); + } + + return 0; +} + +static const VMStateDescription vmstate_spapr_fwnmi = { + .name = "spapr_fwnmi", + .version_id = 1, + .minimum_version_id = 1, + .needed = spapr_fwnmi_needed, + .pre_save = spapr_fwnmi_pre_save, + .fields = (VMStateField[]) { + VMSTATE_UINT64(fwnmi_system_reset_addr, SpaprMachineState), + VMSTATE_UINT64(fwnmi_machine_check_addr, SpaprMachineState), + VMSTATE_INT32(fwnmi_machine_check_interlock, SpaprMachineState), + VMSTATE_END_OF_LIST() + }, +}; + +static const VMStateDescription vmstate_spapr = { + .name = "spapr", + .version_id = 3, + .minimum_version_id = 1, + .pre_load = spapr_pre_load, + .post_load = spapr_post_load, + .pre_save = spapr_pre_save, + .fields = (VMStateField[]) { + /* used to be @next_irq */ + VMSTATE_UNUSED_BUFFER(version_before_3, 0, 4), + + /* RTC offset */ + VMSTATE_UINT64_TEST(rtc_offset, SpaprMachineState, version_before_3), + + VMSTATE_PPC_TIMEBASE_V(tb, SpaprMachineState, 2), + VMSTATE_END_OF_LIST() + }, + .subsections = (const VMStateDescription*[]) { + &vmstate_spapr_ov5_cas, + &vmstate_spapr_patb_entry, + &vmstate_spapr_pending_events, + &vmstate_spapr_cap_htm, + &vmstate_spapr_cap_vsx, + &vmstate_spapr_cap_dfp, + &vmstate_spapr_cap_cfpc, + &vmstate_spapr_cap_sbbc, + &vmstate_spapr_cap_ibs, + &vmstate_spapr_cap_hpt_maxpagesize, + &vmstate_spapr_irq_map, + &vmstate_spapr_cap_nested_kvm_hv, + &vmstate_spapr_dtb, + &vmstate_spapr_cap_large_decr, + &vmstate_spapr_cap_ccf_assist, + &vmstate_spapr_cap_fwnmi, + &vmstate_spapr_fwnmi, + &vmstate_spapr_cap_rpt_invalidate, + NULL + } +}; + +static int htab_save_setup(QEMUFile *f, void *opaque) +{ + SpaprMachineState *spapr = opaque; + + /* "Iteration" header */ + if (!spapr->htab_shift) { + qemu_put_be32(f, -1); + } else { + qemu_put_be32(f, spapr->htab_shift); + } + + if (spapr->htab) { + spapr->htab_save_index = 0; + spapr->htab_first_pass = true; + } else { + if (spapr->htab_shift) { + assert(kvm_enabled()); + } + } + + + return 0; +} + +static void htab_save_chunk(QEMUFile *f, SpaprMachineState *spapr, + int chunkstart, int n_valid, int n_invalid) +{ + qemu_put_be32(f, chunkstart); + qemu_put_be16(f, n_valid); + qemu_put_be16(f, n_invalid); + qemu_put_buffer(f, HPTE(spapr->htab, chunkstart), + HASH_PTE_SIZE_64 * n_valid); +} + +static void htab_save_end_marker(QEMUFile *f) +{ + qemu_put_be32(f, 0); + qemu_put_be16(f, 0); + qemu_put_be16(f, 0); +} + +static void htab_save_first_pass(QEMUFile *f, SpaprMachineState *spapr, + int64_t max_ns) +{ + bool has_timeout = max_ns != -1; + int htabslots = HTAB_SIZE(spapr) / HASH_PTE_SIZE_64; + int index = spapr->htab_save_index; + int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); + + assert(spapr->htab_first_pass); + + do { + int chunkstart; + + /* Consume invalid HPTEs */ + while ((index < htabslots) + && !HPTE_VALID(HPTE(spapr->htab, index))) { + CLEAN_HPTE(HPTE(spapr->htab, index)); + index++; + } + + /* Consume valid HPTEs */ + chunkstart = index; + while ((index < htabslots) && (index - chunkstart < USHRT_MAX) + && HPTE_VALID(HPTE(spapr->htab, index))) { + CLEAN_HPTE(HPTE(spapr->htab, index)); + index++; + } + + if (index > chunkstart) { + int n_valid = index - chunkstart; + + htab_save_chunk(f, spapr, chunkstart, n_valid, 0); + + if (has_timeout && + (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) > max_ns) { + break; + } + } + } while ((index < htabslots) && !qemu_file_rate_limit(f)); + + if (index >= htabslots) { + assert(index == htabslots); + index = 0; + spapr->htab_first_pass = false; + } + spapr->htab_save_index = index; +} + +static int htab_save_later_pass(QEMUFile *f, SpaprMachineState *spapr, + int64_t max_ns) +{ + bool final = max_ns < 0; + int htabslots = HTAB_SIZE(spapr) / HASH_PTE_SIZE_64; + int examined = 0, sent = 0; + int index = spapr->htab_save_index; + int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); + + assert(!spapr->htab_first_pass); + + do { + int chunkstart, invalidstart; + + /* Consume non-dirty HPTEs */ + while ((index < htabslots) + && !HPTE_DIRTY(HPTE(spapr->htab, index))) { + index++; + examined++; + } + + chunkstart = index; + /* Consume valid dirty HPTEs */ + while ((index < htabslots) && (index - chunkstart < USHRT_MAX) + && HPTE_DIRTY(HPTE(spapr->htab, index)) + && HPTE_VALID(HPTE(spapr->htab, index))) { + CLEAN_HPTE(HPTE(spapr->htab, index)); + index++; + examined++; + } + + invalidstart = index; + /* Consume invalid dirty HPTEs */ + while ((index < htabslots) && (index - invalidstart < USHRT_MAX) + && HPTE_DIRTY(HPTE(spapr->htab, index)) + && !HPTE_VALID(HPTE(spapr->htab, index))) { + CLEAN_HPTE(HPTE(spapr->htab, index)); + index++; + examined++; + } + + if (index > chunkstart) { + int n_valid = invalidstart - chunkstart; + int n_invalid = index - invalidstart; + + htab_save_chunk(f, spapr, chunkstart, n_valid, n_invalid); + sent += index - chunkstart; + + if (!final && (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) > max_ns) { + break; + } + } + + if (examined >= htabslots) { + break; + } + + if (index >= htabslots) { + assert(index == htabslots); + index = 0; + } + } while ((examined < htabslots) && (!qemu_file_rate_limit(f) || final)); + + if (index >= htabslots) { + assert(index == htabslots); + index = 0; + } + + spapr->htab_save_index = index; + + return (examined >= htabslots) && (sent == 0) ? 1 : 0; +} + +#define MAX_ITERATION_NS 5000000 /* 5 ms */ +#define MAX_KVM_BUF_SIZE 2048 + +static int htab_save_iterate(QEMUFile *f, void *opaque) +{ + SpaprMachineState *spapr = opaque; + int fd; + int rc = 0; + + /* Iteration header */ + if (!spapr->htab_shift) { + qemu_put_be32(f, -1); + return 1; + } else { + qemu_put_be32(f, 0); + } + + if (!spapr->htab) { + assert(kvm_enabled()); + + fd = get_htab_fd(spapr); + if (fd < 0) { + return fd; + } + + rc = kvmppc_save_htab(f, fd, MAX_KVM_BUF_SIZE, MAX_ITERATION_NS); + if (rc < 0) { + return rc; + } + } else if (spapr->htab_first_pass) { + htab_save_first_pass(f, spapr, MAX_ITERATION_NS); + } else { + rc = htab_save_later_pass(f, spapr, MAX_ITERATION_NS); + } + + htab_save_end_marker(f); + + return rc; +} + +static int htab_save_complete(QEMUFile *f, void *opaque) +{ + SpaprMachineState *spapr = opaque; + int fd; + + /* Iteration header */ + if (!spapr->htab_shift) { + qemu_put_be32(f, -1); + return 0; + } else { + qemu_put_be32(f, 0); + } + + if (!spapr->htab) { + int rc; + + assert(kvm_enabled()); + + fd = get_htab_fd(spapr); + if (fd < 0) { + return fd; + } + + rc = kvmppc_save_htab(f, fd, MAX_KVM_BUF_SIZE, -1); + if (rc < 0) { + return rc; + } + } else { + if (spapr->htab_first_pass) { + htab_save_first_pass(f, spapr, -1); + } + htab_save_later_pass(f, spapr, -1); + } + + /* End marker */ + htab_save_end_marker(f); + + return 0; +} + +static int htab_load(QEMUFile *f, void *opaque, int version_id) +{ + SpaprMachineState *spapr = opaque; + uint32_t section_hdr; + int fd = -1; + Error *local_err = NULL; + + if (version_id < 1 || version_id > 1) { + error_report("htab_load() bad version"); + return -EINVAL; + } + + section_hdr = qemu_get_be32(f); + + if (section_hdr == -1) { + spapr_free_hpt(spapr); + return 0; + } + + if (section_hdr) { + int ret; + + /* First section gives the htab size */ + ret = spapr_reallocate_hpt(spapr, section_hdr, &local_err); + if (ret < 0) { + error_report_err(local_err); + return ret; + } + return 0; + } + + if (!spapr->htab) { + assert(kvm_enabled()); + + fd = kvmppc_get_htab_fd(true, 0, &local_err); + if (fd < 0) { + error_report_err(local_err); + return fd; + } + } + + while (true) { + uint32_t index; + uint16_t n_valid, n_invalid; + + index = qemu_get_be32(f); + n_valid = qemu_get_be16(f); + n_invalid = qemu_get_be16(f); + + if ((index == 0) && (n_valid == 0) && (n_invalid == 0)) { + /* End of Stream */ + break; + } + + if ((index + n_valid + n_invalid) > + (HTAB_SIZE(spapr) / HASH_PTE_SIZE_64)) { + /* Bad index in stream */ + error_report( + "htab_load() bad index %d (%hd+%hd entries) in htab stream (htab_shift=%d)", + index, n_valid, n_invalid, spapr->htab_shift); + return -EINVAL; + } + + if (spapr->htab) { + if (n_valid) { + qemu_get_buffer(f, HPTE(spapr->htab, index), + HASH_PTE_SIZE_64 * n_valid); + } + if (n_invalid) { + memset(HPTE(spapr->htab, index + n_valid), 0, + HASH_PTE_SIZE_64 * n_invalid); + } + } else { + int rc; + + assert(fd >= 0); + + rc = kvmppc_load_htab_chunk(f, fd, index, n_valid, n_invalid, + &local_err); + if (rc < 0) { + error_report_err(local_err); + return rc; + } + } + } + + if (!spapr->htab) { + assert(fd >= 0); + close(fd); + } + + return 0; +} + +static void htab_save_cleanup(void *opaque) +{ + SpaprMachineState *spapr = opaque; + + close_htab_fd(spapr); +} + +static SaveVMHandlers savevm_htab_handlers = { + .save_setup = htab_save_setup, + .save_live_iterate = htab_save_iterate, + .save_live_complete_precopy = htab_save_complete, + .save_cleanup = htab_save_cleanup, + .load_state = htab_load, +}; + +static void spapr_boot_set(void *opaque, const char *boot_device, + Error **errp) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(opaque); + + g_free(spapr->boot_device); + spapr->boot_device = g_strdup(boot_device); +} + +static void spapr_create_lmb_dr_connectors(SpaprMachineState *spapr) +{ + MachineState *machine = MACHINE(spapr); + uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE; + uint32_t nr_lmbs = (machine->maxram_size - machine->ram_size)/lmb_size; + int i; + + for (i = 0; i < nr_lmbs; i++) { + uint64_t addr; + + addr = i * lmb_size + machine->device_memory->base; + spapr_dr_connector_new(OBJECT(spapr), TYPE_SPAPR_DRC_LMB, + addr / lmb_size); + } +} + +/* + * If RAM size, maxmem size and individual node mem sizes aren't aligned + * to SPAPR_MEMORY_BLOCK_SIZE(256MB), then refuse to start the guest + * since we can't support such unaligned sizes with DRCONF_MEMORY. + */ +static void spapr_validate_node_memory(MachineState *machine, Error **errp) +{ + int i; + + if (machine->ram_size % SPAPR_MEMORY_BLOCK_SIZE) { + error_setg(errp, "Memory size 0x" RAM_ADDR_FMT + " is not aligned to %" PRIu64 " MiB", + machine->ram_size, + SPAPR_MEMORY_BLOCK_SIZE / MiB); + return; + } + + if (machine->maxram_size % SPAPR_MEMORY_BLOCK_SIZE) { + error_setg(errp, "Maximum memory size 0x" RAM_ADDR_FMT + " is not aligned to %" PRIu64 " MiB", + machine->ram_size, + SPAPR_MEMORY_BLOCK_SIZE / MiB); + return; + } + + for (i = 0; i < machine->numa_state->num_nodes; i++) { + if (machine->numa_state->nodes[i].node_mem % SPAPR_MEMORY_BLOCK_SIZE) { + error_setg(errp, + "Node %d memory size 0x%" PRIx64 + " is not aligned to %" PRIu64 " MiB", + i, machine->numa_state->nodes[i].node_mem, + SPAPR_MEMORY_BLOCK_SIZE / MiB); + return; + } + } +} + +/* find cpu slot in machine->possible_cpus by core_id */ +static CPUArchId *spapr_find_cpu_slot(MachineState *ms, uint32_t id, int *idx) +{ + int index = id / ms->smp.threads; + + if (index >= ms->possible_cpus->len) { + return NULL; + } + if (idx) { + *idx = index; + } + return &ms->possible_cpus->cpus[index]; +} + +static void spapr_set_vsmt_mode(SpaprMachineState *spapr, Error **errp) +{ + MachineState *ms = MACHINE(spapr); + SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); + Error *local_err = NULL; + bool vsmt_user = !!spapr->vsmt; + int kvm_smt = kvmppc_smt_threads(); + int ret; + unsigned int smp_threads = ms->smp.threads; + + if (!kvm_enabled() && (smp_threads > 1)) { + error_setg(errp, "TCG cannot support more than 1 thread/core " + "on a pseries machine"); + return; + } + if (!is_power_of_2(smp_threads)) { + error_setg(errp, "Cannot support %d threads/core on a pseries " + "machine because it must be a power of 2", smp_threads); + return; + } + + /* Detemine the VSMT mode to use: */ + if (vsmt_user) { + if (spapr->vsmt < smp_threads) { + error_setg(errp, "Cannot support VSMT mode %d" + " because it must be >= threads/core (%d)", + spapr->vsmt, smp_threads); + return; + } + /* In this case, spapr->vsmt has been set by the command line */ + } else if (!smc->smp_threads_vsmt) { + /* + * Default VSMT value is tricky, because we need it to be as + * consistent as possible (for migration), but this requires + * changing it for at least some existing cases. We pick 8 as + * the value that we'd get with KVM on POWER8, the + * overwhelmingly common case in production systems. + */ + spapr->vsmt = MAX(8, smp_threads); + } else { + spapr->vsmt = smp_threads; + } + + /* KVM: If necessary, set the SMT mode: */ + if (kvm_enabled() && (spapr->vsmt != kvm_smt)) { + ret = kvmppc_set_smt_threads(spapr->vsmt); + if (ret) { + /* Looks like KVM isn't able to change VSMT mode */ + error_setg(&local_err, + "Failed to set KVM's VSMT mode to %d (errno %d)", + spapr->vsmt, ret); + /* We can live with that if the default one is big enough + * for the number of threads, and a submultiple of the one + * we want. In this case we'll waste some vcpu ids, but + * behaviour will be correct */ + if ((kvm_smt >= smp_threads) && ((spapr->vsmt % kvm_smt) == 0)) { + warn_report_err(local_err); + } else { + if (!vsmt_user) { + error_append_hint(&local_err, + "On PPC, a VM with %d threads/core" + " on a host with %d threads/core" + " requires the use of VSMT mode %d.\n", + smp_threads, kvm_smt, spapr->vsmt); + } + kvmppc_error_append_smt_possible_hint(&local_err); + error_propagate(errp, local_err); + } + } + } + /* else TCG: nothing to do currently */ +} + +static void spapr_init_cpus(SpaprMachineState *spapr) +{ + MachineState *machine = MACHINE(spapr); + MachineClass *mc = MACHINE_GET_CLASS(machine); + SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine); + const char *type = spapr_get_cpu_core_type(machine->cpu_type); + const CPUArchIdList *possible_cpus; + unsigned int smp_cpus = machine->smp.cpus; + unsigned int smp_threads = machine->smp.threads; + unsigned int max_cpus = machine->smp.max_cpus; + int boot_cores_nr = smp_cpus / smp_threads; + int i; + + possible_cpus = mc->possible_cpu_arch_ids(machine); + if (mc->has_hotpluggable_cpus) { + if (smp_cpus % smp_threads) { + error_report("smp_cpus (%u) must be multiple of threads (%u)", + smp_cpus, smp_threads); + exit(1); + } + if (max_cpus % smp_threads) { + error_report("max_cpus (%u) must be multiple of threads (%u)", + max_cpus, smp_threads); + exit(1); + } + } else { + if (max_cpus != smp_cpus) { + error_report("This machine version does not support CPU hotplug"); + exit(1); + } + boot_cores_nr = possible_cpus->len; + } + + if (smc->pre_2_10_has_unused_icps) { + int i; + + for (i = 0; i < spapr_max_server_number(spapr); i++) { + /* Dummy entries get deregistered when real ICPState objects + * are registered during CPU core hotplug. + */ + pre_2_10_vmstate_register_dummy_icp(i); + } + } + + for (i = 0; i < possible_cpus->len; i++) { + int core_id = i * smp_threads; + + if (mc->has_hotpluggable_cpus) { + spapr_dr_connector_new(OBJECT(spapr), TYPE_SPAPR_DRC_CPU, + spapr_vcpu_id(spapr, core_id)); + } + + if (i < boot_cores_nr) { + Object *core = object_new(type); + int nr_threads = smp_threads; + + /* Handle the partially filled core for older machine types */ + if ((i + 1) * smp_threads >= smp_cpus) { + nr_threads = smp_cpus - i * smp_threads; + } + + object_property_set_int(core, "nr-threads", nr_threads, + &error_fatal); + object_property_set_int(core, CPU_CORE_PROP_CORE_ID, core_id, + &error_fatal); + qdev_realize(DEVICE(core), NULL, &error_fatal); + + object_unref(core); + } + } +} + +static PCIHostState *spapr_create_default_phb(void) +{ + DeviceState *dev; + + dev = qdev_new(TYPE_SPAPR_PCI_HOST_BRIDGE); + qdev_prop_set_uint32(dev, "index", 0); + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + + return PCI_HOST_BRIDGE(dev); +} + +static hwaddr spapr_rma_size(SpaprMachineState *spapr, Error **errp) +{ + MachineState *machine = MACHINE(spapr); + SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); + hwaddr rma_size = machine->ram_size; + hwaddr node0_size = spapr_node0_size(machine); + + /* RMA has to fit in the first NUMA node */ + rma_size = MIN(rma_size, node0_size); + + /* + * VRMA access is via a special 1TiB SLB mapping, so the RMA can + * never exceed that + */ + rma_size = MIN(rma_size, 1 * TiB); + + /* + * Clamp the RMA size based on machine type. This is for + * migration compatibility with older qemu versions, which limited + * the RMA size for complicated and mostly bad reasons. + */ + if (smc->rma_limit) { + rma_size = MIN(rma_size, smc->rma_limit); + } + + if (rma_size < MIN_RMA_SLOF) { + error_setg(errp, + "pSeries SLOF firmware requires >= %" HWADDR_PRIx + "ldMiB guest RMA (Real Mode Area memory)", + MIN_RMA_SLOF / MiB); + return 0; + } + + return rma_size; +} + +static void spapr_create_nvdimm_dr_connectors(SpaprMachineState *spapr) +{ + MachineState *machine = MACHINE(spapr); + int i; + + for (i = 0; i < machine->ram_slots; i++) { + spapr_dr_connector_new(OBJECT(spapr), TYPE_SPAPR_DRC_PMEM, i); + } +} + +/* pSeries LPAR / sPAPR hardware init */ +static void spapr_machine_init(MachineState *machine) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(machine); + SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine); + MachineClass *mc = MACHINE_GET_CLASS(machine); + const char *bios_default = spapr->vof ? FW_FILE_NAME_VOF : FW_FILE_NAME; + const char *bios_name = machine->firmware ?: bios_default; + const char *kernel_filename = machine->kernel_filename; + const char *initrd_filename = machine->initrd_filename; + PCIHostState *phb; + int i; + MemoryRegion *sysmem = get_system_memory(); + long load_limit, fw_size; + char *filename; + Error *resize_hpt_err = NULL; + + /* + * if Secure VM (PEF) support is configured, then initialize it + */ + pef_kvm_init(machine->cgs, &error_fatal); + + msi_nonbroken = true; + + QLIST_INIT(&spapr->phbs); + QTAILQ_INIT(&spapr->pending_dimm_unplugs); + + /* Determine capabilities to run with */ + spapr_caps_init(spapr); + + kvmppc_check_papr_resize_hpt(&resize_hpt_err); + if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DEFAULT) { + /* + * If the user explicitly requested a mode we should either + * supply it, or fail completely (which we do below). But if + * it's not set explicitly, we reset our mode to something + * that works + */ + if (resize_hpt_err) { + spapr->resize_hpt = SPAPR_RESIZE_HPT_DISABLED; + error_free(resize_hpt_err); + resize_hpt_err = NULL; + } else { + spapr->resize_hpt = smc->resize_hpt_default; + } + } + + assert(spapr->resize_hpt != SPAPR_RESIZE_HPT_DEFAULT); + + if ((spapr->resize_hpt != SPAPR_RESIZE_HPT_DISABLED) && resize_hpt_err) { + /* + * User requested HPT resize, but this host can't supply it. Bail out + */ + error_report_err(resize_hpt_err); + exit(1); + } + error_free(resize_hpt_err); + + spapr->rma_size = spapr_rma_size(spapr, &error_fatal); + + /* Setup a load limit for the ramdisk leaving room for SLOF and FDT */ + load_limit = MIN(spapr->rma_size, FDT_MAX_ADDR) - FW_OVERHEAD; + + /* + * VSMT must be set in order to be able to compute VCPU ids, ie to + * call spapr_max_server_number() or spapr_vcpu_id(). + */ + spapr_set_vsmt_mode(spapr, &error_fatal); + + /* Set up Interrupt Controller before we create the VCPUs */ + spapr_irq_init(spapr, &error_fatal); + + /* Set up containers for ibm,client-architecture-support negotiated options + */ + spapr->ov5 = spapr_ovec_new(); + spapr->ov5_cas = spapr_ovec_new(); + + if (smc->dr_lmb_enabled) { + spapr_ovec_set(spapr->ov5, OV5_DRCONF_MEMORY); + spapr_validate_node_memory(machine, &error_fatal); + } + + spapr_ovec_set(spapr->ov5, OV5_FORM1_AFFINITY); + + /* Do not advertise FORM2 NUMA support for pseries-6.1 and older */ + if (!smc->pre_6_2_numa_affinity) { + spapr_ovec_set(spapr->ov5, OV5_FORM2_AFFINITY); + } + + /* advertise support for dedicated HP event source to guests */ + if (spapr->use_hotplug_event_source) { + spapr_ovec_set(spapr->ov5, OV5_HP_EVT); + } + + /* advertise support for HPT resizing */ + if (spapr->resize_hpt != SPAPR_RESIZE_HPT_DISABLED) { + spapr_ovec_set(spapr->ov5, OV5_HPT_RESIZE); + } + + /* advertise support for ibm,dyamic-memory-v2 */ + spapr_ovec_set(spapr->ov5, OV5_DRMEM_V2); + + /* advertise XIVE on POWER9 machines */ + if (spapr->irq->xive) { + spapr_ovec_set(spapr->ov5, OV5_XIVE_EXPLOIT); + } + + /* init CPUs */ + spapr_init_cpus(spapr); + + spapr->gpu_numa_id = spapr_numa_initial_nvgpu_numa_id(machine); + + /* Init numa_assoc_array */ + spapr_numa_associativity_init(spapr, machine); + + if ((!kvm_enabled() || kvmppc_has_cap_mmu_radix()) && + ppc_type_check_compat(machine->cpu_type, CPU_POWERPC_LOGICAL_3_00, 0, + spapr->max_compat_pvr)) { + spapr_ovec_set(spapr->ov5, OV5_MMU_RADIX_300); + /* KVM and TCG always allow GTSE with radix... */ + spapr_ovec_set(spapr->ov5, OV5_MMU_RADIX_GTSE); + } + /* ... but not with hash (currently). */ + + if (kvm_enabled()) { + /* Enable H_LOGICAL_CI_* so SLOF can talk to in-kernel devices */ + kvmppc_enable_logical_ci_hcalls(); + kvmppc_enable_set_mode_hcall(); + + /* H_CLEAR_MOD/_REF are mandatory in PAPR, but off by default */ + kvmppc_enable_clear_ref_mod_hcalls(); + + /* Enable H_PAGE_INIT */ + kvmppc_enable_h_page_init(); + } + + /* map RAM */ + memory_region_add_subregion(sysmem, 0, machine->ram); + + /* always allocate the device memory information */ + machine->device_memory = g_malloc0(sizeof(*machine->device_memory)); + + /* initialize hotplug memory address space */ + if (machine->ram_size < machine->maxram_size) { + ram_addr_t device_mem_size = machine->maxram_size - machine->ram_size; + /* + * Limit the number of hotpluggable memory slots to half the number + * slots that KVM supports, leaving the other half for PCI and other + * devices. However ensure that number of slots doesn't drop below 32. + */ + int max_memslots = kvm_enabled() ? kvm_get_max_memslots() / 2 : + SPAPR_MAX_RAM_SLOTS; + + if (max_memslots < SPAPR_MAX_RAM_SLOTS) { + max_memslots = SPAPR_MAX_RAM_SLOTS; + } + if (machine->ram_slots > max_memslots) { + error_report("Specified number of memory slots %" + PRIu64" exceeds max supported %d", + machine->ram_slots, max_memslots); + exit(1); + } + + machine->device_memory->base = ROUND_UP(machine->ram_size, + SPAPR_DEVICE_MEM_ALIGN); + memory_region_init(&machine->device_memory->mr, OBJECT(spapr), + "device-memory", device_mem_size); + memory_region_add_subregion(sysmem, machine->device_memory->base, + &machine->device_memory->mr); + } + + if (smc->dr_lmb_enabled) { + spapr_create_lmb_dr_connectors(spapr); + } + + if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI) == SPAPR_CAP_ON) { + /* Create the error string for live migration blocker */ + error_setg(&spapr->fwnmi_migration_blocker, + "A machine check is being handled during migration. The handler" + "may run and log hardware error on the destination"); + } + + if (mc->nvdimm_supported) { + spapr_create_nvdimm_dr_connectors(spapr); + } + + /* Set up RTAS event infrastructure */ + spapr_events_init(spapr); + + /* Set up the RTC RTAS interfaces */ + spapr_rtc_create(spapr); + + /* Set up VIO bus */ + spapr->vio_bus = spapr_vio_bus_init(); + + for (i = 0; serial_hd(i); i++) { + spapr_vty_create(spapr->vio_bus, serial_hd(i)); + } + + /* We always have at least the nvram device on VIO */ + spapr_create_nvram(spapr); + + /* + * Setup hotplug / dynamic-reconfiguration connectors. top-level + * connectors (described in root DT node's "ibm,drc-types" property) + * are pre-initialized here. additional child connectors (such as + * connectors for a PHBs PCI slots) are added as needed during their + * parent's realization. + */ + if (smc->dr_phb_enabled) { + for (i = 0; i < SPAPR_MAX_PHBS; i++) { + spapr_dr_connector_new(OBJECT(machine), TYPE_SPAPR_DRC_PHB, i); + } + } + + /* Set up PCI */ + spapr_pci_rtas_init(); + + phb = spapr_create_default_phb(); + + for (i = 0; i < nb_nics; i++) { + NICInfo *nd = &nd_table[i]; + + if (!nd->model) { + nd->model = g_strdup("spapr-vlan"); + } + + if (g_str_equal(nd->model, "spapr-vlan") || + g_str_equal(nd->model, "ibmveth")) { + spapr_vlan_create(spapr->vio_bus, nd); + } else { + pci_nic_init_nofail(&nd_table[i], phb->bus, nd->model, NULL); + } + } + + for (i = 0; i <= drive_get_max_bus(IF_SCSI); i++) { + spapr_vscsi_create(spapr->vio_bus); + } + + /* Graphics */ + if (spapr_vga_init(phb->bus, &error_fatal)) { + spapr->has_graphics = true; + machine->usb |= defaults_enabled() && !machine->usb_disabled; + } + + if (machine->usb) { + if (smc->use_ohci_by_default) { + pci_create_simple(phb->bus, -1, "pci-ohci"); + } else { + pci_create_simple(phb->bus, -1, "nec-usb-xhci"); + } + + if (spapr->has_graphics) { + USBBus *usb_bus = usb_bus_find(-1); + + usb_create_simple(usb_bus, "usb-kbd"); + usb_create_simple(usb_bus, "usb-mouse"); + } + } + + if (kernel_filename) { + spapr->kernel_size = load_elf(kernel_filename, NULL, + translate_kernel_address, spapr, + NULL, NULL, NULL, NULL, 1, + PPC_ELF_MACHINE, 0, 0); + if (spapr->kernel_size == ELF_LOAD_WRONG_ENDIAN) { + spapr->kernel_size = load_elf(kernel_filename, NULL, + translate_kernel_address, spapr, + NULL, NULL, NULL, NULL, 0, + PPC_ELF_MACHINE, 0, 0); + spapr->kernel_le = spapr->kernel_size > 0; + } + if (spapr->kernel_size < 0) { + error_report("error loading %s: %s", kernel_filename, + load_elf_strerror(spapr->kernel_size)); + exit(1); + } + + /* load initrd */ + if (initrd_filename) { + /* Try to locate the initrd in the gap between the kernel + * and the firmware. Add a bit of space just in case + */ + spapr->initrd_base = (spapr->kernel_addr + spapr->kernel_size + + 0x1ffff) & ~0xffff; + spapr->initrd_size = load_image_targphys(initrd_filename, + spapr->initrd_base, + load_limit + - spapr->initrd_base); + if (spapr->initrd_size < 0) { + error_report("could not load initial ram disk '%s'", + initrd_filename); + exit(1); + } + } + } + + filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name); + if (!filename) { + error_report("Could not find LPAR firmware '%s'", bios_name); + exit(1); + } + fw_size = load_image_targphys(filename, 0, FW_MAX_SIZE); + if (fw_size <= 0) { + error_report("Could not load LPAR firmware '%s'", filename); + exit(1); + } + g_free(filename); + + /* FIXME: Should register things through the MachineState's qdev + * interface, this is a legacy from the sPAPREnvironment structure + * which predated MachineState but had a similar function */ + vmstate_register(NULL, 0, &vmstate_spapr, spapr); + register_savevm_live("spapr/htab", VMSTATE_INSTANCE_ID_ANY, 1, + &savevm_htab_handlers, spapr); + + qbus_set_hotplug_handler(sysbus_get_default(), OBJECT(machine)); + + qemu_register_boot_set(spapr_boot_set, spapr); + + /* + * Nothing needs to be done to resume a suspended guest because + * suspending does not change the machine state, so no need for + * a ->wakeup method. + */ + qemu_register_wakeup_support(); + + if (kvm_enabled()) { + /* to stop and start vmclock */ + qemu_add_vm_change_state_handler(cpu_ppc_clock_vm_state_change, + &spapr->tb); + + kvmppc_spapr_enable_inkernel_multitce(); + } + + qemu_cond_init(&spapr->fwnmi_machine_check_interlock_cond); + if (spapr->vof) { + spapr->vof->fw_size = fw_size; /* for claim() on itself */ + spapr_register_hypercall(KVMPPC_H_VOF_CLIENT, spapr_h_vof_client); + } +} + +#define DEFAULT_KVM_TYPE "auto" +static int spapr_kvm_type(MachineState *machine, const char *vm_type) +{ + /* + * The use of g_ascii_strcasecmp() for 'hv' and 'pr' is to + * accomodate the 'HV' and 'PV' formats that exists in the + * wild. The 'auto' mode is being introduced already as + * lower-case, thus we don't need to bother checking for + * "AUTO". + */ + if (!vm_type || !strcmp(vm_type, DEFAULT_KVM_TYPE)) { + return 0; + } + + if (!g_ascii_strcasecmp(vm_type, "hv")) { + return 1; + } + + if (!g_ascii_strcasecmp(vm_type, "pr")) { + return 2; + } + + error_report("Unknown kvm-type specified '%s'", vm_type); + exit(1); +} + +/* + * Implementation of an interface to adjust firmware path + * for the bootindex property handling. + */ +static char *spapr_get_fw_dev_path(FWPathProvider *p, BusState *bus, + DeviceState *dev) +{ +#define CAST(type, obj, name) \ + ((type *)object_dynamic_cast(OBJECT(obj), (name))) + SCSIDevice *d = CAST(SCSIDevice, dev, TYPE_SCSI_DEVICE); + SpaprPhbState *phb = CAST(SpaprPhbState, dev, TYPE_SPAPR_PCI_HOST_BRIDGE); + VHostSCSICommon *vsc = CAST(VHostSCSICommon, dev, TYPE_VHOST_SCSI_COMMON); + PCIDevice *pcidev = CAST(PCIDevice, dev, TYPE_PCI_DEVICE); + + if (d) { + void *spapr = CAST(void, bus->parent, "spapr-vscsi"); + VirtIOSCSI *virtio = CAST(VirtIOSCSI, bus->parent, TYPE_VIRTIO_SCSI); + USBDevice *usb = CAST(USBDevice, bus->parent, TYPE_USB_DEVICE); + + if (spapr) { + /* + * Replace "channel@0/disk@0,0" with "disk@8000000000000000": + * In the top 16 bits of the 64-bit LUN, we use SRP luns of the form + * 0x8000 | (target << 8) | (bus << 5) | lun + * (see the "Logical unit addressing format" table in SAM5) + */ + unsigned id = 0x8000 | (d->id << 8) | (d->channel << 5) | d->lun; + return g_strdup_printf("%s@%"PRIX64, qdev_fw_name(dev), + (uint64_t)id << 48); + } else if (virtio) { + /* + * We use SRP luns of the form 01000000 | (target << 8) | lun + * in the top 32 bits of the 64-bit LUN + * Note: the quote above is from SLOF and it is wrong, + * the actual binding is: + * swap 0100 or 10 << or 20 << ( target lun-id -- srplun ) + */ + unsigned id = 0x1000000 | (d->id << 16) | d->lun; + if (d->lun >= 256) { + /* Use the LUN "flat space addressing method" */ + id |= 0x4000; + } + return g_strdup_printf("%s@%"PRIX64, qdev_fw_name(dev), + (uint64_t)id << 32); + } else if (usb) { + /* + * We use SRP luns of the form 01000000 | (usb-port << 16) | lun + * in the top 32 bits of the 64-bit LUN + */ + unsigned usb_port = atoi(usb->port->path); + unsigned id = 0x1000000 | (usb_port << 16) | d->lun; + return g_strdup_printf("%s@%"PRIX64, qdev_fw_name(dev), + (uint64_t)id << 32); + } + } + + /* + * SLOF probes the USB devices, and if it recognizes that the device is a + * storage device, it changes its name to "storage" instead of "usb-host", + * and additionally adds a child node for the SCSI LUN, so the correct + * boot path in SLOF is something like .../storage@1/disk@xxx" instead. + */ + if (strcmp("usb-host", qdev_fw_name(dev)) == 0) { + USBDevice *usbdev = CAST(USBDevice, dev, TYPE_USB_DEVICE); + if (usb_device_is_scsi_storage(usbdev)) { + return g_strdup_printf("storage@%s/disk", usbdev->port->path); + } + } + + if (phb) { + /* Replace "pci" with "pci@800000020000000" */ + return g_strdup_printf("pci@%"PRIX64, phb->buid); + } + + if (vsc) { + /* Same logic as virtio above */ + unsigned id = 0x1000000 | (vsc->target << 16) | vsc->lun; + return g_strdup_printf("disk@%"PRIX64, (uint64_t)id << 32); + } + + if (g_str_equal("pci-bridge", qdev_fw_name(dev))) { + /* SLOF uses "pci" instead of "pci-bridge" for PCI bridges */ + PCIDevice *pcidev = CAST(PCIDevice, dev, TYPE_PCI_DEVICE); + return g_strdup_printf("pci@%x", PCI_SLOT(pcidev->devfn)); + } + + if (pcidev) { + return spapr_pci_fw_dev_name(pcidev); + } + + return NULL; +} + +static char *spapr_get_kvm_type(Object *obj, Error **errp) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(obj); + + return g_strdup(spapr->kvm_type); +} + +static void spapr_set_kvm_type(Object *obj, const char *value, Error **errp) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(obj); + + g_free(spapr->kvm_type); + spapr->kvm_type = g_strdup(value); +} + +static bool spapr_get_modern_hotplug_events(Object *obj, Error **errp) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(obj); + + return spapr->use_hotplug_event_source; +} + +static void spapr_set_modern_hotplug_events(Object *obj, bool value, + Error **errp) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(obj); + + spapr->use_hotplug_event_source = value; +} + +static bool spapr_get_msix_emulation(Object *obj, Error **errp) +{ + return true; +} + +static char *spapr_get_resize_hpt(Object *obj, Error **errp) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(obj); + + switch (spapr->resize_hpt) { + case SPAPR_RESIZE_HPT_DEFAULT: + return g_strdup("default"); + case SPAPR_RESIZE_HPT_DISABLED: + return g_strdup("disabled"); + case SPAPR_RESIZE_HPT_ENABLED: + return g_strdup("enabled"); + case SPAPR_RESIZE_HPT_REQUIRED: + return g_strdup("required"); + } + g_assert_not_reached(); +} + +static void spapr_set_resize_hpt(Object *obj, const char *value, Error **errp) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(obj); + + if (strcmp(value, "default") == 0) { + spapr->resize_hpt = SPAPR_RESIZE_HPT_DEFAULT; + } else if (strcmp(value, "disabled") == 0) { + spapr->resize_hpt = SPAPR_RESIZE_HPT_DISABLED; + } else if (strcmp(value, "enabled") == 0) { + spapr->resize_hpt = SPAPR_RESIZE_HPT_ENABLED; + } else if (strcmp(value, "required") == 0) { + spapr->resize_hpt = SPAPR_RESIZE_HPT_REQUIRED; + } else { + error_setg(errp, "Bad value for \"resize-hpt\" property"); + } +} + +static bool spapr_get_vof(Object *obj, Error **errp) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(obj); + + return spapr->vof != NULL; +} + +static void spapr_set_vof(Object *obj, bool value, Error **errp) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(obj); + + if (spapr->vof) { + vof_cleanup(spapr->vof); + g_free(spapr->vof); + spapr->vof = NULL; + } + if (!value) { + return; + } + spapr->vof = g_malloc0(sizeof(*spapr->vof)); +} + +static char *spapr_get_ic_mode(Object *obj, Error **errp) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(obj); + + if (spapr->irq == &spapr_irq_xics_legacy) { + return g_strdup("legacy"); + } else if (spapr->irq == &spapr_irq_xics) { + return g_strdup("xics"); + } else if (spapr->irq == &spapr_irq_xive) { + return g_strdup("xive"); + } else if (spapr->irq == &spapr_irq_dual) { + return g_strdup("dual"); + } + g_assert_not_reached(); +} + +static void spapr_set_ic_mode(Object *obj, const char *value, Error **errp) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(obj); + + if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { + error_setg(errp, "This machine only uses the legacy XICS backend, don't pass ic-mode"); + return; + } + + /* The legacy IRQ backend can not be set */ + if (strcmp(value, "xics") == 0) { + spapr->irq = &spapr_irq_xics; + } else if (strcmp(value, "xive") == 0) { + spapr->irq = &spapr_irq_xive; + } else if (strcmp(value, "dual") == 0) { + spapr->irq = &spapr_irq_dual; + } else { + error_setg(errp, "Bad value for \"ic-mode\" property"); + } +} + +static char *spapr_get_host_model(Object *obj, Error **errp) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(obj); + + return g_strdup(spapr->host_model); +} + +static void spapr_set_host_model(Object *obj, const char *value, Error **errp) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(obj); + + g_free(spapr->host_model); + spapr->host_model = g_strdup(value); +} + +static char *spapr_get_host_serial(Object *obj, Error **errp) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(obj); + + return g_strdup(spapr->host_serial); +} + +static void spapr_set_host_serial(Object *obj, const char *value, Error **errp) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(obj); + + g_free(spapr->host_serial); + spapr->host_serial = g_strdup(value); +} + +static void spapr_instance_init(Object *obj) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(obj); + SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); + MachineState *ms = MACHINE(spapr); + MachineClass *mc = MACHINE_GET_CLASS(ms); + + /* + * NVDIMM support went live in 5.1 without considering that, in + * other archs, the user needs to enable NVDIMM support with the + * 'nvdimm' machine option and the default behavior is NVDIMM + * support disabled. It is too late to roll back to the standard + * behavior without breaking 5.1 guests. + */ + if (mc->nvdimm_supported) { + ms->nvdimms_state->is_enabled = true; + } + + spapr->htab_fd = -1; + spapr->use_hotplug_event_source = true; + spapr->kvm_type = g_strdup(DEFAULT_KVM_TYPE); + object_property_add_str(obj, "kvm-type", + spapr_get_kvm_type, spapr_set_kvm_type); + object_property_set_description(obj, "kvm-type", + "Specifies the KVM virtualization mode (auto," + " hv, pr). Defaults to 'auto'. This mode will use" + " any available KVM module loaded in the host," + " where kvm_hv takes precedence if both kvm_hv and" + " kvm_pr are loaded."); + object_property_add_bool(obj, "modern-hotplug-events", + spapr_get_modern_hotplug_events, + spapr_set_modern_hotplug_events); + object_property_set_description(obj, "modern-hotplug-events", + "Use dedicated hotplug event mechanism in" + " place of standard EPOW events when possible" + " (required for memory hot-unplug support)"); + ppc_compat_add_property(obj, "max-cpu-compat", &spapr->max_compat_pvr, + "Maximum permitted CPU compatibility mode"); + + object_property_add_str(obj, "resize-hpt", + spapr_get_resize_hpt, spapr_set_resize_hpt); + object_property_set_description(obj, "resize-hpt", + "Resizing of the Hash Page Table (enabled, disabled, required)"); + object_property_add_uint32_ptr(obj, "vsmt", + &spapr->vsmt, OBJ_PROP_FLAG_READWRITE); + object_property_set_description(obj, "vsmt", + "Virtual SMT: KVM behaves as if this were" + " the host's SMT mode"); + + object_property_add_bool(obj, "vfio-no-msix-emulation", + spapr_get_msix_emulation, NULL); + + object_property_add_uint64_ptr(obj, "kernel-addr", + &spapr->kernel_addr, OBJ_PROP_FLAG_READWRITE); + object_property_set_description(obj, "kernel-addr", + stringify(KERNEL_LOAD_ADDR) + " for -kernel is the default"); + spapr->kernel_addr = KERNEL_LOAD_ADDR; + + object_property_add_bool(obj, "x-vof", spapr_get_vof, spapr_set_vof); + object_property_set_description(obj, "x-vof", + "Enable Virtual Open Firmware (experimental)"); + + /* The machine class defines the default interrupt controller mode */ + spapr->irq = smc->irq; + object_property_add_str(obj, "ic-mode", spapr_get_ic_mode, + spapr_set_ic_mode); + object_property_set_description(obj, "ic-mode", + "Specifies the interrupt controller mode (xics, xive, dual)"); + + object_property_add_str(obj, "host-model", + spapr_get_host_model, spapr_set_host_model); + object_property_set_description(obj, "host-model", + "Host model to advertise in guest device tree"); + object_property_add_str(obj, "host-serial", + spapr_get_host_serial, spapr_set_host_serial); + object_property_set_description(obj, "host-serial", + "Host serial number to advertise in guest device tree"); +} + +static void spapr_machine_finalizefn(Object *obj) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(obj); + + g_free(spapr->kvm_type); +} + +void spapr_do_system_reset_on_cpu(CPUState *cs, run_on_cpu_data arg) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); + PowerPCCPU *cpu = POWERPC_CPU(cs); + CPUPPCState *env = &cpu->env; + + cpu_synchronize_state(cs); + /* If FWNMI is inactive, addr will be -1, which will deliver to 0x100 */ + if (spapr->fwnmi_system_reset_addr != -1) { + uint64_t rtas_addr, addr; + + /* get rtas addr from fdt */ + rtas_addr = spapr_get_rtas_addr(); + if (!rtas_addr) { + qemu_system_guest_panicked(NULL); + return; + } + + addr = rtas_addr + RTAS_ERROR_LOG_MAX + cs->cpu_index * sizeof(uint64_t)*2; + stq_be_phys(&address_space_memory, addr, env->gpr[3]); + stq_be_phys(&address_space_memory, addr + sizeof(uint64_t), 0); + env->gpr[3] = addr; + } + ppc_cpu_do_system_reset(cs); + if (spapr->fwnmi_system_reset_addr != -1) { + env->nip = spapr->fwnmi_system_reset_addr; + } +} + +static void spapr_nmi(NMIState *n, int cpu_index, Error **errp) +{ + CPUState *cs; + + CPU_FOREACH(cs) { + async_run_on_cpu(cs, spapr_do_system_reset_on_cpu, RUN_ON_CPU_NULL); + } +} + +int spapr_lmb_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr, + void *fdt, int *fdt_start_offset, Error **errp) +{ + uint64_t addr; + uint32_t node; + + addr = spapr_drc_index(drc) * SPAPR_MEMORY_BLOCK_SIZE; + node = object_property_get_uint(OBJECT(drc->dev), PC_DIMM_NODE_PROP, + &error_abort); + *fdt_start_offset = spapr_dt_memory_node(spapr, fdt, node, addr, + SPAPR_MEMORY_BLOCK_SIZE); + return 0; +} + +static void spapr_add_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size, + bool dedicated_hp_event_source) +{ + SpaprDrc *drc; + uint32_t nr_lmbs = size/SPAPR_MEMORY_BLOCK_SIZE; + int i; + uint64_t addr = addr_start; + bool hotplugged = spapr_drc_hotplugged(dev); + + for (i = 0; i < nr_lmbs; i++) { + drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB, + addr / SPAPR_MEMORY_BLOCK_SIZE); + g_assert(drc); + + /* + * memory_device_get_free_addr() provided a range of free addresses + * that doesn't overlap with any existing mapping at pre-plug. The + * corresponding LMB DRCs are thus assumed to be all attachable. + */ + spapr_drc_attach(drc, dev); + if (!hotplugged) { + spapr_drc_reset(drc); + } + addr += SPAPR_MEMORY_BLOCK_SIZE; + } + /* send hotplug notification to the + * guest only in case of hotplugged memory + */ + if (hotplugged) { + if (dedicated_hp_event_source) { + drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB, + addr_start / SPAPR_MEMORY_BLOCK_SIZE); + g_assert(drc); + spapr_hotplug_req_add_by_count_indexed(SPAPR_DR_CONNECTOR_TYPE_LMB, + nr_lmbs, + spapr_drc_index(drc)); + } else { + spapr_hotplug_req_add_by_count(SPAPR_DR_CONNECTOR_TYPE_LMB, + nr_lmbs); + } + } +} + +static void spapr_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev) +{ + SpaprMachineState *ms = SPAPR_MACHINE(hotplug_dev); + PCDIMMDevice *dimm = PC_DIMM(dev); + uint64_t size, addr; + int64_t slot; + bool is_nvdimm = object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM); + + size = memory_device_get_region_size(MEMORY_DEVICE(dev), &error_abort); + + pc_dimm_plug(dimm, MACHINE(ms)); + + if (!is_nvdimm) { + addr = object_property_get_uint(OBJECT(dimm), + PC_DIMM_ADDR_PROP, &error_abort); + spapr_add_lmbs(dev, addr, size, + spapr_ovec_test(ms->ov5_cas, OV5_HP_EVT)); + } else { + slot = object_property_get_int(OBJECT(dimm), + PC_DIMM_SLOT_PROP, &error_abort); + /* We should have valid slot number at this point */ + g_assert(slot >= 0); + spapr_add_nvdimm(dev, slot); + } +} + +static void spapr_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) +{ + const SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(hotplug_dev); + SpaprMachineState *spapr = SPAPR_MACHINE(hotplug_dev); + bool is_nvdimm = object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM); + PCDIMMDevice *dimm = PC_DIMM(dev); + Error *local_err = NULL; + uint64_t size; + Object *memdev; + hwaddr pagesize; + + if (!smc->dr_lmb_enabled) { + error_setg(errp, "Memory hotplug not supported for this machine"); + return; + } + + size = memory_device_get_region_size(MEMORY_DEVICE(dimm), &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + if (is_nvdimm) { + if (!spapr_nvdimm_validate(hotplug_dev, NVDIMM(dev), size, errp)) { + return; + } + } else if (size % SPAPR_MEMORY_BLOCK_SIZE) { + error_setg(errp, "Hotplugged memory size must be a multiple of " + "%" PRIu64 " MB", SPAPR_MEMORY_BLOCK_SIZE / MiB); + return; + } + + memdev = object_property_get_link(OBJECT(dimm), PC_DIMM_MEMDEV_PROP, + &error_abort); + pagesize = host_memory_backend_pagesize(MEMORY_BACKEND(memdev)); + if (!spapr_check_pagesize(spapr, pagesize, errp)) { + return; + } + + pc_dimm_pre_plug(dimm, MACHINE(hotplug_dev), NULL, errp); +} + +struct SpaprDimmState { + PCDIMMDevice *dimm; + uint32_t nr_lmbs; + QTAILQ_ENTRY(SpaprDimmState) next; +}; + +static SpaprDimmState *spapr_pending_dimm_unplugs_find(SpaprMachineState *s, + PCDIMMDevice *dimm) +{ + SpaprDimmState *dimm_state = NULL; + + QTAILQ_FOREACH(dimm_state, &s->pending_dimm_unplugs, next) { + if (dimm_state->dimm == dimm) { + break; + } + } + return dimm_state; +} + +static SpaprDimmState *spapr_pending_dimm_unplugs_add(SpaprMachineState *spapr, + uint32_t nr_lmbs, + PCDIMMDevice *dimm) +{ + SpaprDimmState *ds = NULL; + + /* + * If this request is for a DIMM whose removal had failed earlier + * (due to guest's refusal to remove the LMBs), we would have this + * dimm already in the pending_dimm_unplugs list. In that + * case don't add again. + */ + ds = spapr_pending_dimm_unplugs_find(spapr, dimm); + if (!ds) { + ds = g_malloc0(sizeof(SpaprDimmState)); + ds->nr_lmbs = nr_lmbs; + ds->dimm = dimm; + QTAILQ_INSERT_HEAD(&spapr->pending_dimm_unplugs, ds, next); + } + return ds; +} + +static void spapr_pending_dimm_unplugs_remove(SpaprMachineState *spapr, + SpaprDimmState *dimm_state) +{ + QTAILQ_REMOVE(&spapr->pending_dimm_unplugs, dimm_state, next); + g_free(dimm_state); +} + +static SpaprDimmState *spapr_recover_pending_dimm_state(SpaprMachineState *ms, + PCDIMMDevice *dimm) +{ + SpaprDrc *drc; + uint64_t size = memory_device_get_region_size(MEMORY_DEVICE(dimm), + &error_abort); + uint32_t nr_lmbs = size / SPAPR_MEMORY_BLOCK_SIZE; + uint32_t avail_lmbs = 0; + uint64_t addr_start, addr; + int i; + + addr_start = object_property_get_uint(OBJECT(dimm), PC_DIMM_ADDR_PROP, + &error_abort); + + addr = addr_start; + for (i = 0; i < nr_lmbs; i++) { + drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB, + addr / SPAPR_MEMORY_BLOCK_SIZE); + g_assert(drc); + if (drc->dev) { + avail_lmbs++; + } + addr += SPAPR_MEMORY_BLOCK_SIZE; + } + + return spapr_pending_dimm_unplugs_add(ms, avail_lmbs, dimm); +} + +void spapr_memory_unplug_rollback(SpaprMachineState *spapr, DeviceState *dev) +{ + SpaprDimmState *ds; + PCDIMMDevice *dimm; + SpaprDrc *drc; + uint32_t nr_lmbs; + uint64_t size, addr_start, addr; + g_autofree char *qapi_error = NULL; + int i; + + if (!dev) { + return; + } + + dimm = PC_DIMM(dev); + ds = spapr_pending_dimm_unplugs_find(spapr, dimm); + + /* + * 'ds == NULL' would mean that the DIMM doesn't have a pending + * unplug state, but one of its DRC is marked as unplug_requested. + * This is bad and weird enough to g_assert() out. + */ + g_assert(ds); + + spapr_pending_dimm_unplugs_remove(spapr, ds); + + size = memory_device_get_region_size(MEMORY_DEVICE(dimm), &error_abort); + nr_lmbs = size / SPAPR_MEMORY_BLOCK_SIZE; + + addr_start = object_property_get_uint(OBJECT(dimm), PC_DIMM_ADDR_PROP, + &error_abort); + + addr = addr_start; + for (i = 0; i < nr_lmbs; i++) { + drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB, + addr / SPAPR_MEMORY_BLOCK_SIZE); + g_assert(drc); + + drc->unplug_requested = false; + addr += SPAPR_MEMORY_BLOCK_SIZE; + } + + /* + * Tell QAPI that something happened and the memory + * hotunplug wasn't successful. Keep sending + * MEM_UNPLUG_ERROR even while sending + * DEVICE_UNPLUG_GUEST_ERROR until the deprecation of + * MEM_UNPLUG_ERROR is due. + */ + qapi_error = g_strdup_printf("Memory hotunplug rejected by the guest " + "for device %s", dev->id); + + qapi_event_send_mem_unplug_error(dev->id ? : "", qapi_error); + + qapi_event_send_device_unplug_guest_error(!!dev->id, dev->id, + dev->canonical_path); +} + +/* Callback to be called during DRC release. */ +void spapr_lmb_release(DeviceState *dev) +{ + HotplugHandler *hotplug_ctrl = qdev_get_hotplug_handler(dev); + SpaprMachineState *spapr = SPAPR_MACHINE(hotplug_ctrl); + SpaprDimmState *ds = spapr_pending_dimm_unplugs_find(spapr, PC_DIMM(dev)); + + /* This information will get lost if a migration occurs + * during the unplug process. In this case recover it. */ + if (ds == NULL) { + ds = spapr_recover_pending_dimm_state(spapr, PC_DIMM(dev)); + g_assert(ds); + /* The DRC being examined by the caller at least must be counted */ + g_assert(ds->nr_lmbs); + } + + if (--ds->nr_lmbs) { + return; + } + + /* + * Now that all the LMBs have been removed by the guest, call the + * unplug handler chain. This can never fail. + */ + hotplug_handler_unplug(hotplug_ctrl, dev, &error_abort); + object_unparent(OBJECT(dev)); +} + +static void spapr_memory_unplug(HotplugHandler *hotplug_dev, DeviceState *dev) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(hotplug_dev); + SpaprDimmState *ds = spapr_pending_dimm_unplugs_find(spapr, PC_DIMM(dev)); + + /* We really shouldn't get this far without anything to unplug */ + g_assert(ds); + + pc_dimm_unplug(PC_DIMM(dev), MACHINE(hotplug_dev)); + qdev_unrealize(dev); + spapr_pending_dimm_unplugs_remove(spapr, ds); +} + +static void spapr_memory_unplug_request(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(hotplug_dev); + PCDIMMDevice *dimm = PC_DIMM(dev); + uint32_t nr_lmbs; + uint64_t size, addr_start, addr; + int i; + SpaprDrc *drc; + + if (object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM)) { + error_setg(errp, "nvdimm device hot unplug is not supported yet."); + return; + } + + size = memory_device_get_region_size(MEMORY_DEVICE(dimm), &error_abort); + nr_lmbs = size / SPAPR_MEMORY_BLOCK_SIZE; + + addr_start = object_property_get_uint(OBJECT(dimm), PC_DIMM_ADDR_PROP, + &error_abort); + + /* + * An existing pending dimm state for this DIMM means that there is an + * unplug operation in progress, waiting for the spapr_lmb_release + * callback to complete the job (BQL can't cover that far). In this case, + * bail out to avoid detaching DRCs that were already released. + */ + if (spapr_pending_dimm_unplugs_find(spapr, dimm)) { + error_setg(errp, "Memory unplug already in progress for device %s", + dev->id); + return; + } + + spapr_pending_dimm_unplugs_add(spapr, nr_lmbs, dimm); + + addr = addr_start; + for (i = 0; i < nr_lmbs; i++) { + drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB, + addr / SPAPR_MEMORY_BLOCK_SIZE); + g_assert(drc); + + spapr_drc_unplug_request(drc); + addr += SPAPR_MEMORY_BLOCK_SIZE; + } + + drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB, + addr_start / SPAPR_MEMORY_BLOCK_SIZE); + spapr_hotplug_req_remove_by_count_indexed(SPAPR_DR_CONNECTOR_TYPE_LMB, + nr_lmbs, spapr_drc_index(drc)); +} + +/* Callback to be called during DRC release. */ +void spapr_core_release(DeviceState *dev) +{ + HotplugHandler *hotplug_ctrl = qdev_get_hotplug_handler(dev); + + /* Call the unplug handler chain. This can never fail. */ + hotplug_handler_unplug(hotplug_ctrl, dev, &error_abort); + object_unparent(OBJECT(dev)); +} + +static void spapr_core_unplug(HotplugHandler *hotplug_dev, DeviceState *dev) +{ + MachineState *ms = MACHINE(hotplug_dev); + SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(ms); + CPUCore *cc = CPU_CORE(dev); + CPUArchId *core_slot = spapr_find_cpu_slot(ms, cc->core_id, NULL); + + if (smc->pre_2_10_has_unused_icps) { + SpaprCpuCore *sc = SPAPR_CPU_CORE(OBJECT(dev)); + int i; + + for (i = 0; i < cc->nr_threads; i++) { + CPUState *cs = CPU(sc->threads[i]); + + pre_2_10_vmstate_register_dummy_icp(cs->cpu_index); + } + } + + assert(core_slot); + core_slot->cpu = NULL; + qdev_unrealize(dev); +} + +static +void spapr_core_unplug_request(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(OBJECT(hotplug_dev)); + int index; + SpaprDrc *drc; + CPUCore *cc = CPU_CORE(dev); + + if (!spapr_find_cpu_slot(MACHINE(hotplug_dev), cc->core_id, &index)) { + error_setg(errp, "Unable to find CPU core with core-id: %d", + cc->core_id); + return; + } + if (index == 0) { + error_setg(errp, "Boot CPU core may not be unplugged"); + return; + } + + drc = spapr_drc_by_id(TYPE_SPAPR_DRC_CPU, + spapr_vcpu_id(spapr, cc->core_id)); + g_assert(drc); + + if (!spapr_drc_unplug_requested(drc)) { + spapr_drc_unplug_request(drc); + } + + /* + * spapr_hotplug_req_remove_by_index is left unguarded, out of the + * "!spapr_drc_unplug_requested" check, to allow for multiple IRQ + * pulses removing the same CPU. Otherwise, in an failed hotunplug + * attempt (e.g. the kernel will refuse to remove the last online + * CPU), we will never attempt it again because unplug_requested + * will still be 'true' in that case. + */ + spapr_hotplug_req_remove_by_index(drc); +} + +int spapr_core_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr, + void *fdt, int *fdt_start_offset, Error **errp) +{ + SpaprCpuCore *core = SPAPR_CPU_CORE(drc->dev); + CPUState *cs = CPU(core->threads[0]); + PowerPCCPU *cpu = POWERPC_CPU(cs); + DeviceClass *dc = DEVICE_GET_CLASS(cs); + int id = spapr_get_vcpu_id(cpu); + g_autofree char *nodename = NULL; + int offset; + + nodename = g_strdup_printf("%s@%x", dc->fw_name, id); + offset = fdt_add_subnode(fdt, 0, nodename); + + spapr_dt_cpu(cs, fdt, offset, spapr); + + /* + * spapr_dt_cpu() does not fill the 'name' property in the + * CPU node. The function is called during boot process, before + * and after CAS, and overwriting the 'name' property written + * by SLOF is not allowed. + * + * Write it manually after spapr_dt_cpu(). This makes the hotplug + * CPUs more compatible with the coldplugged ones, which have + * the 'name' property. Linux Kernel also relies on this + * property to identify CPU nodes. + */ + _FDT((fdt_setprop_string(fdt, offset, "name", nodename))); + + *fdt_start_offset = offset; + return 0; +} + +static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(OBJECT(hotplug_dev)); + MachineClass *mc = MACHINE_GET_CLASS(spapr); + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); + SpaprCpuCore *core = SPAPR_CPU_CORE(OBJECT(dev)); + CPUCore *cc = CPU_CORE(dev); + CPUState *cs; + SpaprDrc *drc; + CPUArchId *core_slot; + int index; + bool hotplugged = spapr_drc_hotplugged(dev); + int i; + + core_slot = spapr_find_cpu_slot(MACHINE(hotplug_dev), cc->core_id, &index); + g_assert(core_slot); /* Already checked in spapr_core_pre_plug() */ + + drc = spapr_drc_by_id(TYPE_SPAPR_DRC_CPU, + spapr_vcpu_id(spapr, cc->core_id)); + + g_assert(drc || !mc->has_hotpluggable_cpus); + + if (drc) { + /* + * spapr_core_pre_plug() already buys us this is a brand new + * core being plugged into a free slot. Nothing should already + * be attached to the corresponding DRC. + */ + spapr_drc_attach(drc, dev); + + if (hotplugged) { + /* + * Send hotplug notification interrupt to the guest only + * in case of hotplugged CPUs. + */ + spapr_hotplug_req_add_by_index(drc); + } else { + spapr_drc_reset(drc); + } + } + + core_slot->cpu = OBJECT(dev); + + /* + * Set compatibility mode to match the boot CPU, which was either set + * by the machine reset code or by CAS. This really shouldn't fail at + * this point. + */ + if (hotplugged) { + for (i = 0; i < cc->nr_threads; i++) { + ppc_set_compat(core->threads[i], POWERPC_CPU(first_cpu)->compat_pvr, + &error_abort); + } + } + + if (smc->pre_2_10_has_unused_icps) { + for (i = 0; i < cc->nr_threads; i++) { + cs = CPU(core->threads[i]); + pre_2_10_vmstate_unregister_dummy_icp(cs->cpu_index); + } + } +} + +static void spapr_core_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) +{ + MachineState *machine = MACHINE(OBJECT(hotplug_dev)); + MachineClass *mc = MACHINE_GET_CLASS(hotplug_dev); + CPUCore *cc = CPU_CORE(dev); + const char *base_core_type = spapr_get_cpu_core_type(machine->cpu_type); + const char *type = object_get_typename(OBJECT(dev)); + CPUArchId *core_slot; + int index; + unsigned int smp_threads = machine->smp.threads; + + if (dev->hotplugged && !mc->has_hotpluggable_cpus) { + error_setg(errp, "CPU hotplug not supported for this machine"); + return; + } + + if (strcmp(base_core_type, type)) { + error_setg(errp, "CPU core type should be %s", base_core_type); + return; + } + + if (cc->core_id % smp_threads) { + error_setg(errp, "invalid core id %d", cc->core_id); + return; + } + + /* + * In general we should have homogeneous threads-per-core, but old + * (pre hotplug support) machine types allow the last core to have + * reduced threads as a compatibility hack for when we allowed + * total vcpus not a multiple of threads-per-core. + */ + if (mc->has_hotpluggable_cpus && (cc->nr_threads != smp_threads)) { + error_setg(errp, "invalid nr-threads %d, must be %d", cc->nr_threads, + smp_threads); + return; + } + + core_slot = spapr_find_cpu_slot(MACHINE(hotplug_dev), cc->core_id, &index); + if (!core_slot) { + error_setg(errp, "core id %d out of range", cc->core_id); + return; + } + + if (core_slot->cpu) { + error_setg(errp, "core %d already populated", cc->core_id); + return; + } + + numa_cpu_pre_plug(core_slot, dev, errp); +} + +int spapr_phb_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr, + void *fdt, int *fdt_start_offset, Error **errp) +{ + SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(drc->dev); + int intc_phandle; + + intc_phandle = spapr_irq_get_phandle(spapr, spapr->fdt_blob, errp); + if (intc_phandle <= 0) { + return -1; + } + + if (spapr_dt_phb(spapr, sphb, intc_phandle, fdt, fdt_start_offset)) { + error_setg(errp, "unable to create FDT node for PHB %d", sphb->index); + return -1; + } + + /* generally SLOF creates these, for hotplug it's up to QEMU */ + _FDT(fdt_setprop_string(fdt, *fdt_start_offset, "name", "pci")); + + return 0; +} + +static bool spapr_phb_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(OBJECT(hotplug_dev)); + SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(dev); + SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); + const unsigned windows_supported = spapr_phb_windows_supported(sphb); + SpaprDrc *drc; + + if (dev->hotplugged && !smc->dr_phb_enabled) { + error_setg(errp, "PHB hotplug not supported for this machine"); + return false; + } + + if (sphb->index == (uint32_t)-1) { + error_setg(errp, "\"index\" for PAPR PHB is mandatory"); + return false; + } + + drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PHB, sphb->index); + if (drc && drc->dev) { + error_setg(errp, "PHB %d already attached", sphb->index); + return false; + } + + /* + * This will check that sphb->index doesn't exceed the maximum number of + * PHBs for the current machine type. + */ + return + smc->phb_placement(spapr, sphb->index, + &sphb->buid, &sphb->io_win_addr, + &sphb->mem_win_addr, &sphb->mem64_win_addr, + windows_supported, sphb->dma_liobn, + &sphb->nv2_gpa_win_addr, &sphb->nv2_atsd_win_addr, + errp); +} + +static void spapr_phb_plug(HotplugHandler *hotplug_dev, DeviceState *dev) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(OBJECT(hotplug_dev)); + SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); + SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(dev); + SpaprDrc *drc; + bool hotplugged = spapr_drc_hotplugged(dev); + + if (!smc->dr_phb_enabled) { + return; + } + + drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PHB, sphb->index); + /* hotplug hooks should check it's enabled before getting this far */ + assert(drc); + + /* spapr_phb_pre_plug() already checked the DRC is attachable */ + spapr_drc_attach(drc, dev); + + if (hotplugged) { + spapr_hotplug_req_add_by_index(drc); + } else { + spapr_drc_reset(drc); + } +} + +void spapr_phb_release(DeviceState *dev) +{ + HotplugHandler *hotplug_ctrl = qdev_get_hotplug_handler(dev); + + hotplug_handler_unplug(hotplug_ctrl, dev, &error_abort); + object_unparent(OBJECT(dev)); +} + +static void spapr_phb_unplug(HotplugHandler *hotplug_dev, DeviceState *dev) +{ + qdev_unrealize(dev); +} + +static void spapr_phb_unplug_request(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(dev); + SpaprDrc *drc; + + drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PHB, sphb->index); + assert(drc); + + if (!spapr_drc_unplug_requested(drc)) { + spapr_drc_unplug_request(drc); + spapr_hotplug_req_remove_by_index(drc); + } else { + error_setg(errp, + "PCI Host Bridge unplug already in progress for device %s", + dev->id); + } +} + +static +bool spapr_tpm_proxy_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(OBJECT(hotplug_dev)); + + if (spapr->tpm_proxy != NULL) { + error_setg(errp, "Only one TPM proxy can be specified for this machine"); + return false; + } + + return true; +} + +static void spapr_tpm_proxy_plug(HotplugHandler *hotplug_dev, DeviceState *dev) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(OBJECT(hotplug_dev)); + SpaprTpmProxy *tpm_proxy = SPAPR_TPM_PROXY(dev); + + /* Already checked in spapr_tpm_proxy_pre_plug() */ + g_assert(spapr->tpm_proxy == NULL); + + spapr->tpm_proxy = tpm_proxy; +} + +static void spapr_tpm_proxy_unplug(HotplugHandler *hotplug_dev, DeviceState *dev) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(OBJECT(hotplug_dev)); + + qdev_unrealize(dev); + object_unparent(OBJECT(dev)); + spapr->tpm_proxy = NULL; +} + +static void spapr_machine_device_plug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { + spapr_memory_plug(hotplug_dev, dev); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) { + spapr_core_plug(hotplug_dev, dev); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_PCI_HOST_BRIDGE)) { + spapr_phb_plug(hotplug_dev, dev); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_TPM_PROXY)) { + spapr_tpm_proxy_plug(hotplug_dev, dev); + } +} + +static void spapr_machine_device_unplug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { + spapr_memory_unplug(hotplug_dev, dev); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) { + spapr_core_unplug(hotplug_dev, dev); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_PCI_HOST_BRIDGE)) { + spapr_phb_unplug(hotplug_dev, dev); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_TPM_PROXY)) { + spapr_tpm_proxy_unplug(hotplug_dev, dev); + } +} + +bool spapr_memory_hot_unplug_supported(SpaprMachineState *spapr) +{ + return spapr_ovec_test(spapr->ov5_cas, OV5_HP_EVT) || + /* + * CAS will process all pending unplug requests. + * + * HACK: a guest could theoretically have cleared all bits in OV5, + * but none of the guests we care for do. + */ + spapr_ovec_empty(spapr->ov5_cas); +} + +static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + SpaprMachineState *sms = SPAPR_MACHINE(OBJECT(hotplug_dev)); + MachineClass *mc = MACHINE_GET_CLASS(sms); + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); + + if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { + if (spapr_memory_hot_unplug_supported(sms)) { + spapr_memory_unplug_request(hotplug_dev, dev, errp); + } else { + error_setg(errp, "Memory hot unplug not supported for this guest"); + } + } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) { + if (!mc->has_hotpluggable_cpus) { + error_setg(errp, "CPU hot unplug not supported on this machine"); + return; + } + spapr_core_unplug_request(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_PCI_HOST_BRIDGE)) { + if (!smc->dr_phb_enabled) { + error_setg(errp, "PHB hot unplug not supported on this machine"); + return; + } + spapr_phb_unplug_request(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_TPM_PROXY)) { + spapr_tpm_proxy_unplug(hotplug_dev, dev); + } +} + +static void spapr_machine_device_pre_plug(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) { + spapr_memory_pre_plug(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) { + spapr_core_pre_plug(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_PCI_HOST_BRIDGE)) { + spapr_phb_pre_plug(hotplug_dev, dev, errp); + } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_TPM_PROXY)) { + spapr_tpm_proxy_pre_plug(hotplug_dev, dev, errp); + } +} + +static HotplugHandler *spapr_get_hotplug_handler(MachineState *machine, + DeviceState *dev) +{ + if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) || + object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE) || + object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_PCI_HOST_BRIDGE) || + object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_TPM_PROXY)) { + return HOTPLUG_HANDLER(machine); + } + if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) { + PCIDevice *pcidev = PCI_DEVICE(dev); + PCIBus *root = pci_device_root_bus(pcidev); + SpaprPhbState *phb = + (SpaprPhbState *)object_dynamic_cast(OBJECT(BUS(root)->parent), + TYPE_SPAPR_PCI_HOST_BRIDGE); + + if (phb) { + return HOTPLUG_HANDLER(phb); + } + } + return NULL; +} + +static CpuInstanceProperties +spapr_cpu_index_to_props(MachineState *machine, unsigned cpu_index) +{ + CPUArchId *core_slot; + MachineClass *mc = MACHINE_GET_CLASS(machine); + + /* make sure possible_cpu are intialized */ + mc->possible_cpu_arch_ids(machine); + /* get CPU core slot containing thread that matches cpu_index */ + core_slot = spapr_find_cpu_slot(machine, cpu_index, NULL); + assert(core_slot); + return core_slot->props; +} + +static int64_t spapr_get_default_cpu_node_id(const MachineState *ms, int idx) +{ + return idx / ms->smp.cores % ms->numa_state->num_nodes; +} + +static const CPUArchIdList *spapr_possible_cpu_arch_ids(MachineState *machine) +{ + int i; + unsigned int smp_threads = machine->smp.threads; + unsigned int smp_cpus = machine->smp.cpus; + const char *core_type; + int spapr_max_cores = machine->smp.max_cpus / smp_threads; + MachineClass *mc = MACHINE_GET_CLASS(machine); + + if (!mc->has_hotpluggable_cpus) { + spapr_max_cores = QEMU_ALIGN_UP(smp_cpus, smp_threads) / smp_threads; + } + if (machine->possible_cpus) { + assert(machine->possible_cpus->len == spapr_max_cores); + return machine->possible_cpus; + } + + core_type = spapr_get_cpu_core_type(machine->cpu_type); + if (!core_type) { + error_report("Unable to find sPAPR CPU Core definition"); + exit(1); + } + + machine->possible_cpus = g_malloc0(sizeof(CPUArchIdList) + + sizeof(CPUArchId) * spapr_max_cores); + machine->possible_cpus->len = spapr_max_cores; + for (i = 0; i < machine->possible_cpus->len; i++) { + int core_id = i * smp_threads; + + machine->possible_cpus->cpus[i].type = core_type; + machine->possible_cpus->cpus[i].vcpus_count = smp_threads; + machine->possible_cpus->cpus[i].arch_id = core_id; + machine->possible_cpus->cpus[i].props.has_core_id = true; + machine->possible_cpus->cpus[i].props.core_id = core_id; + } + return machine->possible_cpus; +} + +static bool spapr_phb_placement(SpaprMachineState *spapr, uint32_t index, + uint64_t *buid, hwaddr *pio, + hwaddr *mmio32, hwaddr *mmio64, + unsigned n_dma, uint32_t *liobns, + hwaddr *nv2gpa, hwaddr *nv2atsd, Error **errp) +{ + /* + * New-style PHB window placement. + * + * Goals: Gives large (1TiB), naturally aligned 64-bit MMIO window + * for each PHB, in addition to 2GiB 32-bit MMIO and 64kiB PIO + * windows. + * + * Some guest kernels can't work with MMIO windows above 1<<46 + * (64TiB), so we place up to 31 PHBs in the area 32TiB..64TiB + * + * 32TiB..(33TiB+1984kiB) contains the 64kiB PIO windows for each + * PHB stacked together. (32TiB+2GiB)..(32TiB+64GiB) contains the + * 2GiB 32-bit MMIO windows for each PHB. Then 33..64TiB has the + * 1TiB 64-bit MMIO windows for each PHB. + */ + const uint64_t base_buid = 0x800000020000000ULL; + int i; + + /* Sanity check natural alignments */ + QEMU_BUILD_BUG_ON((SPAPR_PCI_BASE % SPAPR_PCI_MEM64_WIN_SIZE) != 0); + QEMU_BUILD_BUG_ON((SPAPR_PCI_LIMIT % SPAPR_PCI_MEM64_WIN_SIZE) != 0); + QEMU_BUILD_BUG_ON((SPAPR_PCI_MEM64_WIN_SIZE % SPAPR_PCI_MEM32_WIN_SIZE) != 0); + QEMU_BUILD_BUG_ON((SPAPR_PCI_MEM32_WIN_SIZE % SPAPR_PCI_IO_WIN_SIZE) != 0); + /* Sanity check bounds */ + QEMU_BUILD_BUG_ON((SPAPR_MAX_PHBS * SPAPR_PCI_IO_WIN_SIZE) > + SPAPR_PCI_MEM32_WIN_SIZE); + QEMU_BUILD_BUG_ON((SPAPR_MAX_PHBS * SPAPR_PCI_MEM32_WIN_SIZE) > + SPAPR_PCI_MEM64_WIN_SIZE); + + if (index >= SPAPR_MAX_PHBS) { + error_setg(errp, "\"index\" for PAPR PHB is too large (max %llu)", + SPAPR_MAX_PHBS - 1); + return false; + } + + *buid = base_buid + index; + for (i = 0; i < n_dma; ++i) { + liobns[i] = SPAPR_PCI_LIOBN(index, i); + } + + *pio = SPAPR_PCI_BASE + index * SPAPR_PCI_IO_WIN_SIZE; + *mmio32 = SPAPR_PCI_BASE + (index + 1) * SPAPR_PCI_MEM32_WIN_SIZE; + *mmio64 = SPAPR_PCI_BASE + (index + 1) * SPAPR_PCI_MEM64_WIN_SIZE; + + *nv2gpa = SPAPR_PCI_NV2RAM64_WIN_BASE + index * SPAPR_PCI_NV2RAM64_WIN_SIZE; + *nv2atsd = SPAPR_PCI_NV2ATSD_WIN_BASE + index * SPAPR_PCI_NV2ATSD_WIN_SIZE; + return true; +} + +static ICSState *spapr_ics_get(XICSFabric *dev, int irq) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(dev); + + return ics_valid_irq(spapr->ics, irq) ? spapr->ics : NULL; +} + +static void spapr_ics_resend(XICSFabric *dev) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(dev); + + ics_resend(spapr->ics); +} + +static ICPState *spapr_icp_get(XICSFabric *xi, int vcpu_id) +{ + PowerPCCPU *cpu = spapr_find_cpu(vcpu_id); + + return cpu ? spapr_cpu_state(cpu)->icp : NULL; +} + +static void spapr_pic_print_info(InterruptStatsProvider *obj, + Monitor *mon) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(obj); + + spapr_irq_print_info(spapr, mon); + monitor_printf(mon, "irqchip: %s\n", + kvm_irqchip_in_kernel() ? "in-kernel" : "emulated"); +} + +/* + * This is a XIVE only operation + */ +static int spapr_match_nvt(XiveFabric *xfb, uint8_t format, + uint8_t nvt_blk, uint32_t nvt_idx, + bool cam_ignore, uint8_t priority, + uint32_t logic_serv, XiveTCTXMatch *match) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(xfb); + XivePresenter *xptr = XIVE_PRESENTER(spapr->active_intc); + XivePresenterClass *xpc = XIVE_PRESENTER_GET_CLASS(xptr); + int count; + + count = xpc->match_nvt(xptr, format, nvt_blk, nvt_idx, cam_ignore, + priority, logic_serv, match); + if (count < 0) { + return count; + } + + /* + * When we implement the save and restore of the thread interrupt + * contexts in the enter/exit CPU handlers of the machine and the + * escalations in QEMU, we should be able to handle non dispatched + * vCPUs. + * + * Until this is done, the sPAPR machine should find at least one + * matching context always. + */ + if (count == 0) { + qemu_log_mask(LOG_GUEST_ERROR, "XIVE: NVT %x/%x is not dispatched\n", + nvt_blk, nvt_idx); + } + + return count; +} + +int spapr_get_vcpu_id(PowerPCCPU *cpu) +{ + return cpu->vcpu_id; +} + +bool spapr_set_vcpu_id(PowerPCCPU *cpu, int cpu_index, Error **errp) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); + MachineState *ms = MACHINE(spapr); + int vcpu_id; + + vcpu_id = spapr_vcpu_id(spapr, cpu_index); + + if (kvm_enabled() && !kvm_vcpu_id_is_valid(vcpu_id)) { + error_setg(errp, "Can't create CPU with id %d in KVM", vcpu_id); + error_append_hint(errp, "Adjust the number of cpus to %d " + "or try to raise the number of threads per core\n", + vcpu_id * ms->smp.threads / spapr->vsmt); + return false; + } + + cpu->vcpu_id = vcpu_id; + return true; +} + +PowerPCCPU *spapr_find_cpu(int vcpu_id) +{ + CPUState *cs; + + CPU_FOREACH(cs) { + PowerPCCPU *cpu = POWERPC_CPU(cs); + + if (spapr_get_vcpu_id(cpu) == vcpu_id) { + return cpu; + } + } + + return NULL; +} + +static void spapr_cpu_exec_enter(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu) +{ + SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu); + + /* These are only called by TCG, KVM maintains dispatch state */ + + spapr_cpu->prod = false; + if (spapr_cpu->vpa_addr) { + CPUState *cs = CPU(cpu); + uint32_t dispatch; + + dispatch = ldl_be_phys(cs->as, + spapr_cpu->vpa_addr + VPA_DISPATCH_COUNTER); + dispatch++; + if ((dispatch & 1) != 0) { + qemu_log_mask(LOG_GUEST_ERROR, + "VPA: incorrect dispatch counter value for " + "dispatched partition %u, correcting.\n", dispatch); + dispatch++; + } + stl_be_phys(cs->as, + spapr_cpu->vpa_addr + VPA_DISPATCH_COUNTER, dispatch); + } +} + +static void spapr_cpu_exec_exit(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu) +{ + SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu); + + if (spapr_cpu->vpa_addr) { + CPUState *cs = CPU(cpu); + uint32_t dispatch; + + dispatch = ldl_be_phys(cs->as, + spapr_cpu->vpa_addr + VPA_DISPATCH_COUNTER); + dispatch++; + if ((dispatch & 1) != 1) { + qemu_log_mask(LOG_GUEST_ERROR, + "VPA: incorrect dispatch counter value for " + "preempted partition %u, correcting.\n", dispatch); + dispatch++; + } + stl_be_phys(cs->as, + spapr_cpu->vpa_addr + VPA_DISPATCH_COUNTER, dispatch); + } +} + +static void spapr_machine_class_init(ObjectClass *oc, void *data) +{ + MachineClass *mc = MACHINE_CLASS(oc); + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(oc); + FWPathProviderClass *fwc = FW_PATH_PROVIDER_CLASS(oc); + NMIClass *nc = NMI_CLASS(oc); + HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); + PPCVirtualHypervisorClass *vhc = PPC_VIRTUAL_HYPERVISOR_CLASS(oc); + XICSFabricClass *xic = XICS_FABRIC_CLASS(oc); + InterruptStatsProviderClass *ispc = INTERRUPT_STATS_PROVIDER_CLASS(oc); + XiveFabricClass *xfc = XIVE_FABRIC_CLASS(oc); + VofMachineIfClass *vmc = VOF_MACHINE_CLASS(oc); + + mc->desc = "pSeries Logical Partition (PAPR compliant)"; + mc->ignore_boot_device_suffixes = true; + + /* + * We set up the default / latest behaviour here. The class_init + * functions for the specific versioned machine types can override + * these details for backwards compatibility + */ + mc->init = spapr_machine_init; + mc->reset = spapr_machine_reset; + mc->block_default_type = IF_SCSI; + + /* + * Setting max_cpus to INT32_MAX. Both KVM and TCG max_cpus values + * should be limited by the host capability instead of hardcoded. + * max_cpus for KVM guests will be checked in kvm_init(), and TCG + * guests are welcome to have as many CPUs as the host are capable + * of emulate. + */ + mc->max_cpus = INT32_MAX; + + mc->no_parallel = 1; + mc->default_boot_order = ""; + mc->default_ram_size = 512 * MiB; + mc->default_ram_id = "ppc_spapr.ram"; + mc->default_display = "std"; + mc->kvm_type = spapr_kvm_type; + machine_class_allow_dynamic_sysbus_dev(mc, TYPE_SPAPR_PCI_HOST_BRIDGE); + mc->pci_allow_0_address = true; + assert(!mc->get_hotplug_handler); + mc->get_hotplug_handler = spapr_get_hotplug_handler; + hc->pre_plug = spapr_machine_device_pre_plug; + hc->plug = spapr_machine_device_plug; + mc->cpu_index_to_instance_props = spapr_cpu_index_to_props; + mc->get_default_cpu_node_id = spapr_get_default_cpu_node_id; + mc->possible_cpu_arch_ids = spapr_possible_cpu_arch_ids; + hc->unplug_request = spapr_machine_device_unplug_request; + hc->unplug = spapr_machine_device_unplug; + + smc->dr_lmb_enabled = true; + smc->update_dt_enabled = true; + mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power9_v2.0"); + mc->has_hotpluggable_cpus = true; + mc->nvdimm_supported = true; + smc->resize_hpt_default = SPAPR_RESIZE_HPT_ENABLED; + fwc->get_dev_path = spapr_get_fw_dev_path; + nc->nmi_monitor_handler = spapr_nmi; + smc->phb_placement = spapr_phb_placement; + vhc->hypercall = emulate_spapr_hypercall; + vhc->hpt_mask = spapr_hpt_mask; + vhc->map_hptes = spapr_map_hptes; + vhc->unmap_hptes = spapr_unmap_hptes; + vhc->hpte_set_c = spapr_hpte_set_c; + vhc->hpte_set_r = spapr_hpte_set_r; + vhc->get_pate = spapr_get_pate; + vhc->encode_hpt_for_kvm_pr = spapr_encode_hpt_for_kvm_pr; + vhc->cpu_exec_enter = spapr_cpu_exec_enter; + vhc->cpu_exec_exit = spapr_cpu_exec_exit; + xic->ics_get = spapr_ics_get; + xic->ics_resend = spapr_ics_resend; + xic->icp_get = spapr_icp_get; + ispc->print_info = spapr_pic_print_info; + /* Force NUMA node memory size to be a multiple of + * SPAPR_MEMORY_BLOCK_SIZE (256M) since that's the granularity + * in which LMBs are represented and hot-added + */ + mc->numa_mem_align_shift = 28; + mc->auto_enable_numa = true; + + smc->default_caps.caps[SPAPR_CAP_HTM] = SPAPR_CAP_OFF; + smc->default_caps.caps[SPAPR_CAP_VSX] = SPAPR_CAP_ON; + smc->default_caps.caps[SPAPR_CAP_DFP] = SPAPR_CAP_ON; + smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND; + smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND; + smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_WORKAROUND; + smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 16; /* 64kiB */ + smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF; + smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON; + smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_ON; + smc->default_caps.caps[SPAPR_CAP_FWNMI] = SPAPR_CAP_ON; + smc->default_caps.caps[SPAPR_CAP_RPT_INVALIDATE] = SPAPR_CAP_OFF; + spapr_caps_add_properties(smc); + smc->irq = &spapr_irq_dual; + smc->dr_phb_enabled = true; + smc->linux_pci_probe = true; + smc->smp_threads_vsmt = true; + smc->nr_xirqs = SPAPR_NR_XIRQS; + xfc->match_nvt = spapr_match_nvt; + vmc->client_architecture_support = spapr_vof_client_architecture_support; + vmc->quiesce = spapr_vof_quiesce; + vmc->setprop = spapr_vof_setprop; +} + +static const TypeInfo spapr_machine_info = { + .name = TYPE_SPAPR_MACHINE, + .parent = TYPE_MACHINE, + .abstract = true, + .instance_size = sizeof(SpaprMachineState), + .instance_init = spapr_instance_init, + .instance_finalize = spapr_machine_finalizefn, + .class_size = sizeof(SpaprMachineClass), + .class_init = spapr_machine_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_FW_PATH_PROVIDER }, + { TYPE_NMI }, + { TYPE_HOTPLUG_HANDLER }, + { TYPE_PPC_VIRTUAL_HYPERVISOR }, + { TYPE_XICS_FABRIC }, + { TYPE_INTERRUPT_STATS_PROVIDER }, + { TYPE_XIVE_FABRIC }, + { TYPE_VOF_MACHINE_IF }, + { } + }, +}; + +static void spapr_machine_latest_class_options(MachineClass *mc) +{ + mc->alias = "pseries"; + mc->is_default = true; +} + +#define DEFINE_SPAPR_MACHINE(suffix, verstr, latest) \ + static void spapr_machine_##suffix##_class_init(ObjectClass *oc, \ + void *data) \ + { \ + MachineClass *mc = MACHINE_CLASS(oc); \ + spapr_machine_##suffix##_class_options(mc); \ + if (latest) { \ + spapr_machine_latest_class_options(mc); \ + } \ + } \ + static const TypeInfo spapr_machine_##suffix##_info = { \ + .name = MACHINE_TYPE_NAME("pseries-" verstr), \ + .parent = TYPE_SPAPR_MACHINE, \ + .class_init = spapr_machine_##suffix##_class_init, \ + }; \ + static void spapr_machine_register_##suffix(void) \ + { \ + type_register(&spapr_machine_##suffix##_info); \ + } \ + type_init(spapr_machine_register_##suffix) + +/* + * pseries-6.2 + */ +static void spapr_machine_6_2_class_options(MachineClass *mc) +{ + /* Defaults for the latest behaviour inherited from the base class */ +} + +DEFINE_SPAPR_MACHINE(6_2, "6.2", true); + +/* + * pseries-6.1 + */ +static void spapr_machine_6_1_class_options(MachineClass *mc) +{ + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); + + spapr_machine_6_2_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_6_1, hw_compat_6_1_len); + smc->pre_6_2_numa_affinity = true; + mc->smp_props.prefer_sockets = true; +} + +DEFINE_SPAPR_MACHINE(6_1, "6.1", false); + +/* + * pseries-6.0 + */ +static void spapr_machine_6_0_class_options(MachineClass *mc) +{ + spapr_machine_6_1_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_6_0, hw_compat_6_0_len); +} + +DEFINE_SPAPR_MACHINE(6_0, "6.0", false); + +/* + * pseries-5.2 + */ +static void spapr_machine_5_2_class_options(MachineClass *mc) +{ + spapr_machine_6_0_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_5_2, hw_compat_5_2_len); +} + +DEFINE_SPAPR_MACHINE(5_2, "5.2", false); + +/* + * pseries-5.1 + */ +static void spapr_machine_5_1_class_options(MachineClass *mc) +{ + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); + + spapr_machine_5_2_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_5_1, hw_compat_5_1_len); + smc->pre_5_2_numa_associativity = true; +} + +DEFINE_SPAPR_MACHINE(5_1, "5.1", false); + +/* + * pseries-5.0 + */ +static void spapr_machine_5_0_class_options(MachineClass *mc) +{ + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); + static GlobalProperty compat[] = { + { TYPE_SPAPR_PCI_HOST_BRIDGE, "pre-5.1-associativity", "on" }, + }; + + spapr_machine_5_1_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_5_0, hw_compat_5_0_len); + compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); + mc->numa_mem_supported = true; + smc->pre_5_1_assoc_refpoints = true; +} + +DEFINE_SPAPR_MACHINE(5_0, "5.0", false); + +/* + * pseries-4.2 + */ +static void spapr_machine_4_2_class_options(MachineClass *mc) +{ + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); + + spapr_machine_5_0_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_4_2, hw_compat_4_2_len); + smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF; + smc->default_caps.caps[SPAPR_CAP_FWNMI] = SPAPR_CAP_OFF; + smc->rma_limit = 16 * GiB; + mc->nvdimm_supported = false; +} + +DEFINE_SPAPR_MACHINE(4_2, "4.2", false); + +/* + * pseries-4.1 + */ +static void spapr_machine_4_1_class_options(MachineClass *mc) +{ + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); + static GlobalProperty compat[] = { + /* Only allow 4kiB and 64kiB IOMMU pagesizes */ + { TYPE_SPAPR_PCI_HOST_BRIDGE, "pgsz", "0x11000" }, + }; + + spapr_machine_4_2_class_options(mc); + smc->linux_pci_probe = false; + smc->smp_threads_vsmt = false; + compat_props_add(mc->compat_props, hw_compat_4_1, hw_compat_4_1_len); + compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); +} + +DEFINE_SPAPR_MACHINE(4_1, "4.1", false); + +/* + * pseries-4.0 + */ +static bool phb_placement_4_0(SpaprMachineState *spapr, uint32_t index, + uint64_t *buid, hwaddr *pio, + hwaddr *mmio32, hwaddr *mmio64, + unsigned n_dma, uint32_t *liobns, + hwaddr *nv2gpa, hwaddr *nv2atsd, Error **errp) +{ + if (!spapr_phb_placement(spapr, index, buid, pio, mmio32, mmio64, n_dma, + liobns, nv2gpa, nv2atsd, errp)) { + return false; + } + + *nv2gpa = 0; + *nv2atsd = 0; + return true; +} +static void spapr_machine_4_0_class_options(MachineClass *mc) +{ + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); + + spapr_machine_4_1_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_4_0, hw_compat_4_0_len); + smc->phb_placement = phb_placement_4_0; + smc->irq = &spapr_irq_xics; + smc->pre_4_1_migration = true; +} + +DEFINE_SPAPR_MACHINE(4_0, "4.0", false); + +/* + * pseries-3.1 + */ +static void spapr_machine_3_1_class_options(MachineClass *mc) +{ + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); + + spapr_machine_4_0_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_3_1, hw_compat_3_1_len); + + mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power8_v2.0"); + smc->update_dt_enabled = false; + smc->dr_phb_enabled = false; + smc->broken_host_serial_model = true; + smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_BROKEN; + smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_BROKEN; + smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_BROKEN; + smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_OFF; +} + +DEFINE_SPAPR_MACHINE(3_1, "3.1", false); + +/* + * pseries-3.0 + */ + +static void spapr_machine_3_0_class_options(MachineClass *mc) +{ + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); + + spapr_machine_3_1_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_3_0, hw_compat_3_0_len); + + smc->legacy_irq_allocation = true; + smc->nr_xirqs = 0x400; + smc->irq = &spapr_irq_xics_legacy; +} + +DEFINE_SPAPR_MACHINE(3_0, "3.0", false); + +/* + * pseries-2.12 + */ +static void spapr_machine_2_12_class_options(MachineClass *mc) +{ + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); + static GlobalProperty compat[] = { + { TYPE_POWERPC_CPU, "pre-3.0-migration", "on" }, + { TYPE_SPAPR_CPU_CORE, "pre-3.0-migration", "on" }, + }; + + spapr_machine_3_0_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_2_12, hw_compat_2_12_len); + compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); + + /* We depend on kvm_enabled() to choose a default value for the + * hpt-max-page-size capability. Of course we can't do it here + * because this is too early and the HW accelerator isn't initialzed + * yet. Postpone this to machine init (see default_caps_with_cpu()). + */ + smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 0; +} + +DEFINE_SPAPR_MACHINE(2_12, "2.12", false); + +static void spapr_machine_2_12_sxxm_class_options(MachineClass *mc) +{ + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); + + spapr_machine_2_12_class_options(mc); + smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND; + smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND; + smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_FIXED_CCD; +} + +DEFINE_SPAPR_MACHINE(2_12_sxxm, "2.12-sxxm", false); + +/* + * pseries-2.11 + */ + +static void spapr_machine_2_11_class_options(MachineClass *mc) +{ + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); + + spapr_machine_2_12_class_options(mc); + smc->default_caps.caps[SPAPR_CAP_HTM] = SPAPR_CAP_ON; + compat_props_add(mc->compat_props, hw_compat_2_11, hw_compat_2_11_len); +} + +DEFINE_SPAPR_MACHINE(2_11, "2.11", false); + +/* + * pseries-2.10 + */ + +static void spapr_machine_2_10_class_options(MachineClass *mc) +{ + spapr_machine_2_11_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_2_10, hw_compat_2_10_len); +} + +DEFINE_SPAPR_MACHINE(2_10, "2.10", false); + +/* + * pseries-2.9 + */ + +static void spapr_machine_2_9_class_options(MachineClass *mc) +{ + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); + static GlobalProperty compat[] = { + { TYPE_POWERPC_CPU, "pre-2.10-migration", "on" }, + }; + + spapr_machine_2_10_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_2_9, hw_compat_2_9_len); + compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); + smc->pre_2_10_has_unused_icps = true; + smc->resize_hpt_default = SPAPR_RESIZE_HPT_DISABLED; +} + +DEFINE_SPAPR_MACHINE(2_9, "2.9", false); + +/* + * pseries-2.8 + */ + +static void spapr_machine_2_8_class_options(MachineClass *mc) +{ + static GlobalProperty compat[] = { + { TYPE_SPAPR_PCI_HOST_BRIDGE, "pcie-extended-configuration-space", "off" }, + }; + + spapr_machine_2_9_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_2_8, hw_compat_2_8_len); + compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); + mc->numa_mem_align_shift = 23; +} + +DEFINE_SPAPR_MACHINE(2_8, "2.8", false); + +/* + * pseries-2.7 + */ + +static bool phb_placement_2_7(SpaprMachineState *spapr, uint32_t index, + uint64_t *buid, hwaddr *pio, + hwaddr *mmio32, hwaddr *mmio64, + unsigned n_dma, uint32_t *liobns, + hwaddr *nv2gpa, hwaddr *nv2atsd, Error **errp) +{ + /* Legacy PHB placement for pseries-2.7 and earlier machine types */ + const uint64_t base_buid = 0x800000020000000ULL; + const hwaddr phb_spacing = 0x1000000000ULL; /* 64 GiB */ + const hwaddr mmio_offset = 0xa0000000; /* 2 GiB + 512 MiB */ + const hwaddr pio_offset = 0x80000000; /* 2 GiB */ + const uint32_t max_index = 255; + const hwaddr phb0_alignment = 0x10000000000ULL; /* 1 TiB */ + + uint64_t ram_top = MACHINE(spapr)->ram_size; + hwaddr phb0_base, phb_base; + int i; + + /* Do we have device memory? */ + if (MACHINE(spapr)->maxram_size > ram_top) { + /* Can't just use maxram_size, because there may be an + * alignment gap between normal and device memory regions + */ + ram_top = MACHINE(spapr)->device_memory->base + + memory_region_size(&MACHINE(spapr)->device_memory->mr); + } + + phb0_base = QEMU_ALIGN_UP(ram_top, phb0_alignment); + + if (index > max_index) { + error_setg(errp, "\"index\" for PAPR PHB is too large (max %u)", + max_index); + return false; + } + + *buid = base_buid + index; + for (i = 0; i < n_dma; ++i) { + liobns[i] = SPAPR_PCI_LIOBN(index, i); + } + + phb_base = phb0_base + index * phb_spacing; + *pio = phb_base + pio_offset; + *mmio32 = phb_base + mmio_offset; + /* + * We don't set the 64-bit MMIO window, relying on the PHB's + * fallback behaviour of automatically splitting a large "32-bit" + * window into contiguous 32-bit and 64-bit windows + */ + + *nv2gpa = 0; + *nv2atsd = 0; + return true; +} + +static void spapr_machine_2_7_class_options(MachineClass *mc) +{ + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); + static GlobalProperty compat[] = { + { TYPE_SPAPR_PCI_HOST_BRIDGE, "mem_win_size", "0xf80000000", }, + { TYPE_SPAPR_PCI_HOST_BRIDGE, "mem64_win_size", "0", }, + { TYPE_POWERPC_CPU, "pre-2.8-migration", "on", }, + { TYPE_SPAPR_PCI_HOST_BRIDGE, "pre-2.8-migration", "on", }, + }; + + spapr_machine_2_8_class_options(mc); + mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power7_v2.3"); + mc->default_machine_opts = "modern-hotplug-events=off"; + compat_props_add(mc->compat_props, hw_compat_2_7, hw_compat_2_7_len); + compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); + smc->phb_placement = phb_placement_2_7; +} + +DEFINE_SPAPR_MACHINE(2_7, "2.7", false); + +/* + * pseries-2.6 + */ + +static void spapr_machine_2_6_class_options(MachineClass *mc) +{ + static GlobalProperty compat[] = { + { TYPE_SPAPR_PCI_HOST_BRIDGE, "ddw", "off" }, + }; + + spapr_machine_2_7_class_options(mc); + mc->has_hotpluggable_cpus = false; + compat_props_add(mc->compat_props, hw_compat_2_6, hw_compat_2_6_len); + compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); +} + +DEFINE_SPAPR_MACHINE(2_6, "2.6", false); + +/* + * pseries-2.5 + */ + +static void spapr_machine_2_5_class_options(MachineClass *mc) +{ + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); + static GlobalProperty compat[] = { + { "spapr-vlan", "use-rx-buffer-pools", "off" }, + }; + + spapr_machine_2_6_class_options(mc); + smc->use_ohci_by_default = true; + compat_props_add(mc->compat_props, hw_compat_2_5, hw_compat_2_5_len); + compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); +} + +DEFINE_SPAPR_MACHINE(2_5, "2.5", false); + +/* + * pseries-2.4 + */ + +static void spapr_machine_2_4_class_options(MachineClass *mc) +{ + SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc); + + spapr_machine_2_5_class_options(mc); + smc->dr_lmb_enabled = false; + compat_props_add(mc->compat_props, hw_compat_2_4, hw_compat_2_4_len); +} + +DEFINE_SPAPR_MACHINE(2_4, "2.4", false); + +/* + * pseries-2.3 + */ + +static void spapr_machine_2_3_class_options(MachineClass *mc) +{ + static GlobalProperty compat[] = { + { "spapr-pci-host-bridge", "dynamic-reconfiguration", "off" }, + }; + spapr_machine_2_4_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_2_3, hw_compat_2_3_len); + compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); +} +DEFINE_SPAPR_MACHINE(2_3, "2.3", false); + +/* + * pseries-2.2 + */ + +static void spapr_machine_2_2_class_options(MachineClass *mc) +{ + static GlobalProperty compat[] = { + { TYPE_SPAPR_PCI_HOST_BRIDGE, "mem_win_size", "0x20000000" }, + }; + + spapr_machine_2_3_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_2_2, hw_compat_2_2_len); + compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); + mc->default_machine_opts = "modern-hotplug-events=off,suppress-vmdesc=on"; +} +DEFINE_SPAPR_MACHINE(2_2, "2.2", false); + +/* + * pseries-2.1 + */ + +static void spapr_machine_2_1_class_options(MachineClass *mc) +{ + spapr_machine_2_2_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_2_1, hw_compat_2_1_len); +} +DEFINE_SPAPR_MACHINE(2_1, "2.1", false); + +static void spapr_machine_register_types(void) +{ + type_register_static(&spapr_machine_info); +} + +type_init(spapr_machine_register_types) diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c new file mode 100644 index 000000000..ed7c077a0 --- /dev/null +++ b/hw/ppc/spapr_caps.c @@ -0,0 +1,944 @@ +/* + * QEMU PowerPC pSeries Logical Partition capabilities handling + * + * Copyright (c) 2017 David Gibson, Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "qapi/error.h" +#include "qapi/visitor.h" +#include "sysemu/hw_accel.h" +#include "exec/ram_addr.h" +#include "target/ppc/cpu.h" +#include "target/ppc/mmu-hash64.h" +#include "cpu-models.h" +#include "kvm_ppc.h" +#include "migration/vmstate.h" +#include "sysemu/tcg.h" + +#include "hw/ppc/spapr.h" + +typedef struct SpaprCapPossible { + int num; /* size of vals array below */ + const char *help; /* help text for vals */ + /* + * Note: + * - because of the way compatibility is determined vals MUST be ordered + * such that later options are a superset of all preceding options. + * - the order of vals must be preserved, that is their index is important, + * however vals may be added to the end of the list so long as the above + * point is observed + */ + const char *vals[]; +} SpaprCapPossible; + +typedef struct SpaprCapabilityInfo { + const char *name; + const char *description; + int index; + + /* Getter and Setter Function Pointers */ + ObjectPropertyAccessor *get; + ObjectPropertyAccessor *set; + const char *type; + /* Possible values if this is a custom string type */ + SpaprCapPossible *possible; + /* Make sure the virtual hardware can support this capability */ + void (*apply)(SpaprMachineState *spapr, uint8_t val, Error **errp); + void (*cpu_apply)(SpaprMachineState *spapr, PowerPCCPU *cpu, + uint8_t val, Error **errp); + bool (*migrate_needed)(void *opaque); +} SpaprCapabilityInfo; + +static void spapr_cap_get_bool(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + SpaprCapabilityInfo *cap = opaque; + SpaprMachineState *spapr = SPAPR_MACHINE(obj); + bool value = spapr_get_cap(spapr, cap->index) == SPAPR_CAP_ON; + + visit_type_bool(v, name, &value, errp); +} + +static void spapr_cap_set_bool(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + SpaprCapabilityInfo *cap = opaque; + SpaprMachineState *spapr = SPAPR_MACHINE(obj); + bool value; + + if (!visit_type_bool(v, name, &value, errp)) { + return; + } + + spapr->cmd_line_caps[cap->index] = true; + spapr->eff.caps[cap->index] = value ? SPAPR_CAP_ON : SPAPR_CAP_OFF; +} + + +static void spapr_cap_get_string(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + SpaprCapabilityInfo *cap = opaque; + SpaprMachineState *spapr = SPAPR_MACHINE(obj); + char *val = NULL; + uint8_t value = spapr_get_cap(spapr, cap->index); + + if (value >= cap->possible->num) { + error_setg(errp, "Invalid value (%d) for cap-%s", value, cap->name); + return; + } + + val = g_strdup(cap->possible->vals[value]); + + visit_type_str(v, name, &val, errp); + g_free(val); +} + +static void spapr_cap_set_string(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + SpaprCapabilityInfo *cap = opaque; + SpaprMachineState *spapr = SPAPR_MACHINE(obj); + uint8_t i; + char *val; + + if (!visit_type_str(v, name, &val, errp)) { + return; + } + + if (!strcmp(val, "?")) { + error_setg(errp, "%s", cap->possible->help); + goto out; + } + for (i = 0; i < cap->possible->num; i++) { + if (!strcasecmp(val, cap->possible->vals[i])) { + spapr->cmd_line_caps[cap->index] = true; + spapr->eff.caps[cap->index] = i; + goto out; + } + } + + error_setg(errp, "Invalid capability mode \"%s\" for cap-%s", val, + cap->name); +out: + g_free(val); +} + +static void spapr_cap_get_pagesize(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + SpaprCapabilityInfo *cap = opaque; + SpaprMachineState *spapr = SPAPR_MACHINE(obj); + uint8_t val = spapr_get_cap(spapr, cap->index); + uint64_t pagesize = (1ULL << val); + + visit_type_size(v, name, &pagesize, errp); +} + +static void spapr_cap_set_pagesize(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + SpaprCapabilityInfo *cap = opaque; + SpaprMachineState *spapr = SPAPR_MACHINE(obj); + uint64_t pagesize; + uint8_t val; + + if (!visit_type_size(v, name, &pagesize, errp)) { + return; + } + + if (!is_power_of_2(pagesize)) { + error_setg(errp, "cap-%s must be a power of 2", cap->name); + return; + } + + val = ctz64(pagesize); + spapr->cmd_line_caps[cap->index] = true; + spapr->eff.caps[cap->index] = val; +} + +static void cap_htm_apply(SpaprMachineState *spapr, uint8_t val, Error **errp) +{ + ERRP_GUARD(); + if (!val) { + /* TODO: We don't support disabling htm yet */ + return; + } + if (tcg_enabled()) { + error_setg(errp, "No Transactional Memory support in TCG"); + error_append_hint(errp, "Try appending -machine cap-htm=off\n"); + } else if (kvm_enabled() && !kvmppc_has_cap_htm()) { + error_setg(errp, + "KVM implementation does not support Transactional Memory"); + error_append_hint(errp, "Try appending -machine cap-htm=off\n"); + } +} + +static void cap_vsx_apply(SpaprMachineState *spapr, uint8_t val, Error **errp) +{ + ERRP_GUARD(); + PowerPCCPU *cpu = POWERPC_CPU(first_cpu); + CPUPPCState *env = &cpu->env; + + if (!val) { + /* TODO: We don't support disabling vsx yet */ + return; + } + /* Allowable CPUs in spapr_cpu_core.c should already have gotten + * rid of anything that doesn't do VMX */ + g_assert(env->insns_flags & PPC_ALTIVEC); + if (!(env->insns_flags2 & PPC2_VSX)) { + error_setg(errp, "VSX support not available"); + error_append_hint(errp, "Try appending -machine cap-vsx=off\n"); + } +} + +static void cap_dfp_apply(SpaprMachineState *spapr, uint8_t val, Error **errp) +{ + ERRP_GUARD(); + PowerPCCPU *cpu = POWERPC_CPU(first_cpu); + CPUPPCState *env = &cpu->env; + + if (!val) { + /* TODO: We don't support disabling dfp yet */ + return; + } + if (!(env->insns_flags2 & PPC2_DFP)) { + error_setg(errp, "DFP support not available"); + error_append_hint(errp, "Try appending -machine cap-dfp=off\n"); + } +} + +SpaprCapPossible cap_cfpc_possible = { + .num = 3, + .vals = {"broken", "workaround", "fixed"}, + .help = "broken - no protection, workaround - workaround available," + " fixed - fixed in hardware", +}; + +static void cap_safe_cache_apply(SpaprMachineState *spapr, uint8_t val, + Error **errp) +{ + ERRP_GUARD(); + uint8_t kvm_val = kvmppc_get_cap_safe_cache(); + + if (tcg_enabled() && val) { + /* TCG only supports broken, allow other values and print a warning */ + warn_report("TCG doesn't support requested feature, cap-cfpc=%s", + cap_cfpc_possible.vals[val]); + } else if (kvm_enabled() && (val > kvm_val)) { + error_setg(errp, + "Requested safe cache capability level not supported by KVM"); + error_append_hint(errp, "Try appending -machine cap-cfpc=%s\n", + cap_cfpc_possible.vals[kvm_val]); + } +} + +SpaprCapPossible cap_sbbc_possible = { + .num = 3, + .vals = {"broken", "workaround", "fixed"}, + .help = "broken - no protection, workaround - workaround available," + " fixed - fixed in hardware", +}; + +static void cap_safe_bounds_check_apply(SpaprMachineState *spapr, uint8_t val, + Error **errp) +{ + ERRP_GUARD(); + uint8_t kvm_val = kvmppc_get_cap_safe_bounds_check(); + + if (tcg_enabled() && val) { + /* TCG only supports broken, allow other values and print a warning */ + warn_report("TCG doesn't support requested feature, cap-sbbc=%s", + cap_sbbc_possible.vals[val]); + } else if (kvm_enabled() && (val > kvm_val)) { + error_setg(errp, +"Requested safe bounds check capability level not supported by KVM"); + error_append_hint(errp, "Try appending -machine cap-sbbc=%s\n", + cap_sbbc_possible.vals[kvm_val]); + } +} + +SpaprCapPossible cap_ibs_possible = { + .num = 5, + /* Note workaround only maintained for compatibility */ + .vals = {"broken", "workaround", "fixed-ibs", "fixed-ccd", "fixed-na"}, + .help = "broken - no protection, workaround - count cache flush" + ", fixed-ibs - indirect branch serialisation," + " fixed-ccd - cache count disabled," + " fixed-na - fixed in hardware (no longer applicable)", +}; + +static void cap_safe_indirect_branch_apply(SpaprMachineState *spapr, + uint8_t val, Error **errp) +{ + ERRP_GUARD(); + uint8_t kvm_val = kvmppc_get_cap_safe_indirect_branch(); + + if (tcg_enabled() && val) { + /* TCG only supports broken, allow other values and print a warning */ + warn_report("TCG doesn't support requested feature, cap-ibs=%s", + cap_ibs_possible.vals[val]); + } else if (kvm_enabled() && (val > kvm_val)) { + error_setg(errp, +"Requested safe indirect branch capability level not supported by KVM"); + error_append_hint(errp, "Try appending -machine cap-ibs=%s\n", + cap_ibs_possible.vals[kvm_val]); + } +} + +#define VALUE_DESC_TRISTATE " (broken, workaround, fixed)" + +bool spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize, + Error **errp) +{ + hwaddr maxpagesize = (1ULL << spapr->eff.caps[SPAPR_CAP_HPT_MAXPAGESIZE]); + + if (!kvmppc_hpt_needs_host_contiguous_pages()) { + return true; + } + + if (maxpagesize > pagesize) { + error_setg(errp, + "Can't support %"HWADDR_PRIu" kiB guest pages with %" + HWADDR_PRIu" kiB host pages with this KVM implementation", + maxpagesize >> 10, pagesize >> 10); + return false; + } + + return true; +} + +static void cap_hpt_maxpagesize_apply(SpaprMachineState *spapr, + uint8_t val, Error **errp) +{ + if (val < 12) { + error_setg(errp, "Require at least 4kiB hpt-max-page-size"); + return; + } else if (val < 16) { + warn_report("Many guests require at least 64kiB hpt-max-page-size"); + } + + spapr_check_pagesize(spapr, qemu_minrampagesize(), errp); +} + +static bool cap_hpt_maxpagesize_migrate_needed(void *opaque) +{ + return !SPAPR_MACHINE_GET_CLASS(opaque)->pre_4_1_migration; +} + +static bool spapr_pagesize_cb(void *opaque, uint32_t seg_pshift, + uint32_t pshift) +{ + unsigned maxshift = *((unsigned *)opaque); + + assert(pshift >= seg_pshift); + + /* Don't allow the guest to use pages bigger than the configured + * maximum size */ + if (pshift > maxshift) { + return false; + } + + /* For whatever reason, KVM doesn't allow multiple pagesizes + * within a segment, *except* for the case of 16M pages in a 4k or + * 64k segment. Always exclude other cases, so that TCG and KVM + * guests see a consistent environment */ + if ((pshift != seg_pshift) && (pshift != 24)) { + return false; + } + + return true; +} + +static void ppc_hash64_filter_pagesizes(PowerPCCPU *cpu, + bool (*cb)(void *, uint32_t, uint32_t), + void *opaque) +{ + PPCHash64Options *opts = cpu->hash64_opts; + int i; + int n = 0; + bool ci_largepage = false; + + assert(opts); + + n = 0; + for (i = 0; i < ARRAY_SIZE(opts->sps); i++) { + PPCHash64SegmentPageSizes *sps = &opts->sps[i]; + int j; + int m = 0; + + assert(n <= i); + + if (!sps->page_shift) { + break; + } + + for (j = 0; j < ARRAY_SIZE(sps->enc); j++) { + PPCHash64PageSize *ps = &sps->enc[j]; + + assert(m <= j); + if (!ps->page_shift) { + break; + } + + if (cb(opaque, sps->page_shift, ps->page_shift)) { + if (ps->page_shift >= 16) { + ci_largepage = true; + } + sps->enc[m++] = *ps; + } + } + + /* Clear rest of the row */ + for (j = m; j < ARRAY_SIZE(sps->enc); j++) { + memset(&sps->enc[j], 0, sizeof(sps->enc[j])); + } + + if (m) { + n++; + } + } + + /* Clear the rest of the table */ + for (i = n; i < ARRAY_SIZE(opts->sps); i++) { + memset(&opts->sps[i], 0, sizeof(opts->sps[i])); + } + + if (!ci_largepage) { + opts->flags &= ~PPC_HASH64_CI_LARGEPAGE; + } +} + +static void cap_hpt_maxpagesize_cpu_apply(SpaprMachineState *spapr, + PowerPCCPU *cpu, + uint8_t val, Error **errp) +{ + unsigned maxshift = val; + + ppc_hash64_filter_pagesizes(cpu, spapr_pagesize_cb, &maxshift); +} + +static void cap_nested_kvm_hv_apply(SpaprMachineState *spapr, + uint8_t val, Error **errp) +{ + ERRP_GUARD(); + PowerPCCPU *cpu = POWERPC_CPU(first_cpu); + + if (!val) { + /* capability disabled by default */ + return; + } + + if (tcg_enabled()) { + error_setg(errp, "No Nested KVM-HV support in TCG"); + error_append_hint(errp, "Try appending -machine cap-nested-hv=off\n"); + } else if (kvm_enabled()) { + if (!ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0, + spapr->max_compat_pvr)) { + error_setg(errp, "Nested KVM-HV only supported on POWER9"); + error_append_hint(errp, + "Try appending -machine max-cpu-compat=power9\n"); + return; + } + + if (!kvmppc_has_cap_nested_kvm_hv()) { + error_setg(errp, + "KVM implementation does not support Nested KVM-HV"); + error_append_hint(errp, + "Try appending -machine cap-nested-hv=off\n"); + } else if (kvmppc_set_cap_nested_kvm_hv(val) < 0) { + error_setg(errp, "Error enabling cap-nested-hv with KVM"); + error_append_hint(errp, + "Try appending -machine cap-nested-hv=off\n"); + } + } +} + +static void cap_large_decr_apply(SpaprMachineState *spapr, + uint8_t val, Error **errp) +{ + ERRP_GUARD(); + PowerPCCPU *cpu = POWERPC_CPU(first_cpu); + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); + + if (!val) { + return; /* Disabled by default */ + } + + if (tcg_enabled()) { + if (!ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0, + spapr->max_compat_pvr)) { + error_setg(errp, "Large decrementer only supported on POWER9"); + error_append_hint(errp, "Try -cpu POWER9\n"); + return; + } + } else if (kvm_enabled()) { + int kvm_nr_bits = kvmppc_get_cap_large_decr(); + + if (!kvm_nr_bits) { + error_setg(errp, "No large decrementer support"); + error_append_hint(errp, + "Try appending -machine cap-large-decr=off\n"); + } else if (pcc->lrg_decr_bits != kvm_nr_bits) { + error_setg(errp, + "KVM large decrementer size (%d) differs to model (%d)", + kvm_nr_bits, pcc->lrg_decr_bits); + error_append_hint(errp, + "Try appending -machine cap-large-decr=off\n"); + } + } +} + +static void cap_large_decr_cpu_apply(SpaprMachineState *spapr, + PowerPCCPU *cpu, + uint8_t val, Error **errp) +{ + ERRP_GUARD(); + CPUPPCState *env = &cpu->env; + target_ulong lpcr = env->spr[SPR_LPCR]; + + if (kvm_enabled()) { + if (kvmppc_enable_cap_large_decr(cpu, val)) { + error_setg(errp, "No large decrementer support"); + error_append_hint(errp, + "Try appending -machine cap-large-decr=off\n"); + } + } + + if (val) { + lpcr |= LPCR_LD; + } else { + lpcr &= ~LPCR_LD; + } + ppc_store_lpcr(cpu, lpcr); +} + +static void cap_ccf_assist_apply(SpaprMachineState *spapr, uint8_t val, + Error **errp) +{ + ERRP_GUARD(); + uint8_t kvm_val = kvmppc_get_cap_count_cache_flush_assist(); + + if (tcg_enabled() && val) { + /* TCG doesn't implement anything here, but allow with a warning */ + warn_report("TCG doesn't support requested feature, cap-ccf-assist=on"); + } else if (kvm_enabled() && (val > kvm_val)) { + uint8_t kvm_ibs = kvmppc_get_cap_safe_indirect_branch(); + + if (kvm_ibs == SPAPR_CAP_FIXED_CCD) { + /* + * If we don't have CCF assist on the host, the assist + * instruction is a harmless no-op. It won't correctly + * implement the cache count flush *but* if we have + * count-cache-disabled in the host, that flush is + * unnnecessary. So, specifically allow this case. This + * allows us to have better performance on POWER9 DD2.3, + * while still working on POWER9 DD2.2 and POWER8 host + * cpus. + */ + return; + } + error_setg(errp, + "Requested count cache flush assist capability level not supported by KVM"); + error_append_hint(errp, "Try appending -machine cap-ccf-assist=off\n"); + } +} + +static void cap_fwnmi_apply(SpaprMachineState *spapr, uint8_t val, + Error **errp) +{ + ERRP_GUARD(); + if (!val) { + return; /* Disabled by default */ + } + + if (kvm_enabled()) { + if (!kvmppc_get_fwnmi()) { + error_setg(errp, +"Firmware Assisted Non-Maskable Interrupts(FWNMI) not supported by KVM."); + error_append_hint(errp, "Try appending -machine cap-fwnmi=off\n"); + } + } +} + +static void cap_rpt_invalidate_apply(SpaprMachineState *spapr, + uint8_t val, Error **errp) +{ + ERRP_GUARD(); + + if (!val) { + /* capability disabled by default */ + return; + } + + if (tcg_enabled()) { + error_setg(errp, "No H_RPT_INVALIDATE support in TCG"); + error_append_hint(errp, + "Try appending -machine cap-rpt-invalidate=off\n"); + } else if (kvm_enabled()) { + if (!kvmppc_has_cap_mmu_radix()) { + error_setg(errp, "H_RPT_INVALIDATE only supported on Radix"); + return; + } + + if (!kvmppc_has_cap_rpt_invalidate()) { + error_setg(errp, + "KVM implementation does not support H_RPT_INVALIDATE"); + error_append_hint(errp, + "Try appending -machine cap-rpt-invalidate=off\n"); + } else { + kvmppc_enable_h_rpt_invalidate(); + } + } +} + +SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = { + [SPAPR_CAP_HTM] = { + .name = "htm", + .description = "Allow Hardware Transactional Memory (HTM)", + .index = SPAPR_CAP_HTM, + .get = spapr_cap_get_bool, + .set = spapr_cap_set_bool, + .type = "bool", + .apply = cap_htm_apply, + }, + [SPAPR_CAP_VSX] = { + .name = "vsx", + .description = "Allow Vector Scalar Extensions (VSX)", + .index = SPAPR_CAP_VSX, + .get = spapr_cap_get_bool, + .set = spapr_cap_set_bool, + .type = "bool", + .apply = cap_vsx_apply, + }, + [SPAPR_CAP_DFP] = { + .name = "dfp", + .description = "Allow Decimal Floating Point (DFP)", + .index = SPAPR_CAP_DFP, + .get = spapr_cap_get_bool, + .set = spapr_cap_set_bool, + .type = "bool", + .apply = cap_dfp_apply, + }, + [SPAPR_CAP_CFPC] = { + .name = "cfpc", + .description = "Cache Flush on Privilege Change" VALUE_DESC_TRISTATE, + .index = SPAPR_CAP_CFPC, + .get = spapr_cap_get_string, + .set = spapr_cap_set_string, + .type = "string", + .possible = &cap_cfpc_possible, + .apply = cap_safe_cache_apply, + }, + [SPAPR_CAP_SBBC] = { + .name = "sbbc", + .description = "Speculation Barrier Bounds Checking" VALUE_DESC_TRISTATE, + .index = SPAPR_CAP_SBBC, + .get = spapr_cap_get_string, + .set = spapr_cap_set_string, + .type = "string", + .possible = &cap_sbbc_possible, + .apply = cap_safe_bounds_check_apply, + }, + [SPAPR_CAP_IBS] = { + .name = "ibs", + .description = + "Indirect Branch Speculation (broken, workaround, fixed-ibs," + "fixed-ccd, fixed-na)", + .index = SPAPR_CAP_IBS, + .get = spapr_cap_get_string, + .set = spapr_cap_set_string, + .type = "string", + .possible = &cap_ibs_possible, + .apply = cap_safe_indirect_branch_apply, + }, + [SPAPR_CAP_HPT_MAXPAGESIZE] = { + .name = "hpt-max-page-size", + .description = "Maximum page size for Hash Page Table guests", + .index = SPAPR_CAP_HPT_MAXPAGESIZE, + .get = spapr_cap_get_pagesize, + .set = spapr_cap_set_pagesize, + .type = "int", + .apply = cap_hpt_maxpagesize_apply, + .cpu_apply = cap_hpt_maxpagesize_cpu_apply, + .migrate_needed = cap_hpt_maxpagesize_migrate_needed, + }, + [SPAPR_CAP_NESTED_KVM_HV] = { + .name = "nested-hv", + .description = "Allow Nested KVM-HV", + .index = SPAPR_CAP_NESTED_KVM_HV, + .get = spapr_cap_get_bool, + .set = spapr_cap_set_bool, + .type = "bool", + .apply = cap_nested_kvm_hv_apply, + }, + [SPAPR_CAP_LARGE_DECREMENTER] = { + .name = "large-decr", + .description = "Allow Large Decrementer", + .index = SPAPR_CAP_LARGE_DECREMENTER, + .get = spapr_cap_get_bool, + .set = spapr_cap_set_bool, + .type = "bool", + .apply = cap_large_decr_apply, + .cpu_apply = cap_large_decr_cpu_apply, + }, + [SPAPR_CAP_CCF_ASSIST] = { + .name = "ccf-assist", + .description = "Count Cache Flush Assist via HW Instruction", + .index = SPAPR_CAP_CCF_ASSIST, + .get = spapr_cap_get_bool, + .set = spapr_cap_set_bool, + .type = "bool", + .apply = cap_ccf_assist_apply, + }, + [SPAPR_CAP_FWNMI] = { + .name = "fwnmi", + .description = "Implements PAPR FWNMI option", + .index = SPAPR_CAP_FWNMI, + .get = spapr_cap_get_bool, + .set = spapr_cap_set_bool, + .type = "bool", + .apply = cap_fwnmi_apply, + }, + [SPAPR_CAP_RPT_INVALIDATE] = { + .name = "rpt-invalidate", + .description = "Allow H_RPT_INVALIDATE", + .index = SPAPR_CAP_RPT_INVALIDATE, + .get = spapr_cap_get_bool, + .set = spapr_cap_set_bool, + .type = "bool", + .apply = cap_rpt_invalidate_apply, + }, +}; + +static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr, + const char *cputype) +{ + SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); + SpaprCapabilities caps; + + caps = smc->default_caps; + + if (!ppc_type_check_compat(cputype, CPU_POWERPC_LOGICAL_3_00, + 0, spapr->max_compat_pvr)) { + caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_OFF; + } + + if (!ppc_type_check_compat(cputype, CPU_POWERPC_LOGICAL_2_07, + 0, spapr->max_compat_pvr)) { + caps.caps[SPAPR_CAP_HTM] = SPAPR_CAP_OFF; + caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_BROKEN; + } + + if (!ppc_type_check_compat(cputype, CPU_POWERPC_LOGICAL_2_06_PLUS, + 0, spapr->max_compat_pvr)) { + caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_BROKEN; + } + + if (!ppc_type_check_compat(cputype, CPU_POWERPC_LOGICAL_2_06, + 0, spapr->max_compat_pvr)) { + caps.caps[SPAPR_CAP_VSX] = SPAPR_CAP_OFF; + caps.caps[SPAPR_CAP_DFP] = SPAPR_CAP_OFF; + caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_BROKEN; + } + + /* This is for pseries-2.12 and older */ + if (smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] == 0) { + uint8_t mps; + + if (kvmppc_hpt_needs_host_contiguous_pages()) { + mps = ctz64(qemu_minrampagesize()); + } else { + mps = 34; /* allow everything up to 16GiB, i.e. everything */ + } + + caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = mps; + } + + return caps; +} + +int spapr_caps_pre_load(void *opaque) +{ + SpaprMachineState *spapr = opaque; + + /* Set to default so we can tell if this came in with the migration */ + spapr->mig = spapr->def; + return 0; +} + +int spapr_caps_pre_save(void *opaque) +{ + SpaprMachineState *spapr = opaque; + + spapr->mig = spapr->eff; + return 0; +} + +/* This has to be called from the top-level spapr post_load, not the + * caps specific one. Otherwise it wouldn't be called when the source + * caps are all defaults, which could still conflict with overridden + * caps on the destination */ +int spapr_caps_post_migration(SpaprMachineState *spapr) +{ + int i; + bool ok = true; + SpaprCapabilities dstcaps = spapr->eff; + SpaprCapabilities srccaps; + + srccaps = default_caps_with_cpu(spapr, MACHINE(spapr)->cpu_type); + for (i = 0; i < SPAPR_CAP_NUM; i++) { + /* If not default value then assume came in with the migration */ + if (spapr->mig.caps[i] != spapr->def.caps[i]) { + srccaps.caps[i] = spapr->mig.caps[i]; + } + } + + for (i = 0; i < SPAPR_CAP_NUM; i++) { + SpaprCapabilityInfo *info = &capability_table[i]; + + if (srccaps.caps[i] > dstcaps.caps[i]) { + error_report("cap-%s higher level (%d) in incoming stream than on destination (%d)", + info->name, srccaps.caps[i], dstcaps.caps[i]); + ok = false; + } + + if (srccaps.caps[i] < dstcaps.caps[i]) { + warn_report("cap-%s lower level (%d) in incoming stream than on destination (%d)", + info->name, srccaps.caps[i], dstcaps.caps[i]); + } + } + + return ok ? 0 : -EINVAL; +} + +/* Used to generate the migration field and needed function for a spapr cap */ +#define SPAPR_CAP_MIG_STATE(sname, cap) \ +static bool spapr_cap_##sname##_needed(void *opaque) \ +{ \ + SpaprMachineState *spapr = opaque; \ + bool (*needed)(void *opaque) = \ + capability_table[cap].migrate_needed; \ + \ + return needed ? needed(opaque) : true && \ + spapr->cmd_line_caps[cap] && \ + (spapr->eff.caps[cap] != \ + spapr->def.caps[cap]); \ +} \ + \ +const VMStateDescription vmstate_spapr_cap_##sname = { \ + .name = "spapr/cap/" #sname, \ + .version_id = 1, \ + .minimum_version_id = 1, \ + .needed = spapr_cap_##sname##_needed, \ + .fields = (VMStateField[]) { \ + VMSTATE_UINT8(mig.caps[cap], \ + SpaprMachineState), \ + VMSTATE_END_OF_LIST() \ + }, \ +} + +SPAPR_CAP_MIG_STATE(htm, SPAPR_CAP_HTM); +SPAPR_CAP_MIG_STATE(vsx, SPAPR_CAP_VSX); +SPAPR_CAP_MIG_STATE(dfp, SPAPR_CAP_DFP); +SPAPR_CAP_MIG_STATE(cfpc, SPAPR_CAP_CFPC); +SPAPR_CAP_MIG_STATE(sbbc, SPAPR_CAP_SBBC); +SPAPR_CAP_MIG_STATE(ibs, SPAPR_CAP_IBS); +SPAPR_CAP_MIG_STATE(hpt_maxpagesize, SPAPR_CAP_HPT_MAXPAGESIZE); +SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV); +SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER); +SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST); +SPAPR_CAP_MIG_STATE(fwnmi, SPAPR_CAP_FWNMI); +SPAPR_CAP_MIG_STATE(rpt_invalidate, SPAPR_CAP_RPT_INVALIDATE); + +void spapr_caps_init(SpaprMachineState *spapr) +{ + SpaprCapabilities default_caps; + int i; + + /* Compute the actual set of caps we should run with */ + default_caps = default_caps_with_cpu(spapr, MACHINE(spapr)->cpu_type); + + for (i = 0; i < SPAPR_CAP_NUM; i++) { + /* Store the defaults */ + spapr->def.caps[i] = default_caps.caps[i]; + /* If not set on the command line then apply the default value */ + if (!spapr->cmd_line_caps[i]) { + spapr->eff.caps[i] = default_caps.caps[i]; + } + } +} + +void spapr_caps_apply(SpaprMachineState *spapr) +{ + int i; + + for (i = 0; i < SPAPR_CAP_NUM; i++) { + SpaprCapabilityInfo *info = &capability_table[i]; + + /* + * If the apply function can't set the desired level and thinks it's + * fatal, it should cause that. + */ + info->apply(spapr, spapr->eff.caps[i], &error_fatal); + } +} + +void spapr_caps_cpu_apply(SpaprMachineState *spapr, PowerPCCPU *cpu) +{ + int i; + + for (i = 0; i < SPAPR_CAP_NUM; i++) { + SpaprCapabilityInfo *info = &capability_table[i]; + + /* + * If the apply function can't set the desired level and thinks it's + * fatal, it should cause that. + */ + if (info->cpu_apply) { + info->cpu_apply(spapr, cpu, spapr->eff.caps[i], &error_fatal); + } + } +} + +void spapr_caps_add_properties(SpaprMachineClass *smc) +{ + ObjectClass *klass = OBJECT_CLASS(smc); + int i; + + for (i = 0; i < ARRAY_SIZE(capability_table); i++) { + SpaprCapabilityInfo *cap = &capability_table[i]; + char *name = g_strdup_printf("cap-%s", cap->name); + char *desc; + + object_class_property_add(klass, name, cap->type, + cap->get, cap->set, + NULL, cap); + + desc = g_strdup_printf("%s", cap->description); + object_class_property_set_description(klass, name, desc); + g_free(name); + g_free(desc); + } +} diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c new file mode 100644 index 000000000..58e7341cb --- /dev/null +++ b/hw/ppc/spapr_cpu_core.c @@ -0,0 +1,391 @@ +/* + * sPAPR CPU core device, acts as container of CPU thread devices. + * + * Copyright (C) 2016 Bharata B Rao <bharata@linux.vnet.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "hw/cpu/core.h" +#include "hw/ppc/spapr_cpu_core.h" +#include "hw/qdev-properties.h" +#include "migration/vmstate.h" +#include "target/ppc/cpu.h" +#include "hw/ppc/spapr.h" +#include "qapi/error.h" +#include "sysemu/cpus.h" +#include "sysemu/kvm.h" +#include "target/ppc/kvm_ppc.h" +#include "hw/ppc/ppc.h" +#include "target/ppc/mmu-hash64.h" +#include "sysemu/numa.h" +#include "sysemu/reset.h" +#include "sysemu/hw_accel.h" +#include "qemu/error-report.h" + +static void spapr_reset_vcpu(PowerPCCPU *cpu) +{ + CPUState *cs = CPU(cpu); + CPUPPCState *env = &cpu->env; + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); + SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu); + target_ulong lpcr; + SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); + + cpu_reset(cs); + + env->spr[SPR_HIOR] = 0; + + lpcr = env->spr[SPR_LPCR]; + + /* Set emulated LPCR to not send interrupts to hypervisor. Note that + * under KVM, the actual HW LPCR will be set differently by KVM itself, + * the settings below ensure proper operations with TCG in absence of + * a real hypervisor. + * + * Disable Power-saving mode Exit Cause exceptions for the CPU, so + * we don't get spurious wakups before an RTAS start-cpu call. + * For the same reason, set PSSCR_EC. + */ + lpcr &= ~(LPCR_VPM1 | LPCR_ISL | LPCR_KBV | pcc->lpcr_pm); + lpcr |= LPCR_LPES0 | LPCR_LPES1; + env->spr[SPR_PSSCR] |= PSSCR_EC; + + ppc_store_lpcr(cpu, lpcr); + + /* Set a full AMOR so guest can use the AMR as it sees fit */ + env->spr[SPR_AMOR] = 0xffffffffffffffffull; + + spapr_cpu->vpa_addr = 0; + spapr_cpu->slb_shadow_addr = 0; + spapr_cpu->slb_shadow_size = 0; + spapr_cpu->dtl_addr = 0; + spapr_cpu->dtl_size = 0; + + spapr_caps_cpu_apply(spapr, cpu); + + kvm_check_mmu(cpu, &error_fatal); + + spapr_irq_cpu_intc_reset(spapr, cpu); +} + +void spapr_cpu_set_entry_state(PowerPCCPU *cpu, target_ulong nip, + target_ulong r1, target_ulong r3, + target_ulong r4) +{ + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); + CPUPPCState *env = &cpu->env; + + env->nip = nip; + env->gpr[1] = r1; + env->gpr[3] = r3; + env->gpr[4] = r4; + kvmppc_set_reg_ppc_online(cpu, 1); + CPU(cpu)->halted = 0; + /* Enable Power-saving mode Exit Cause exceptions */ + ppc_store_lpcr(cpu, env->spr[SPR_LPCR] | pcc->lpcr_pm); +} + +/* + * Return the sPAPR CPU core type for @model which essentially is the CPU + * model specified with -cpu cmdline option. + */ +const char *spapr_get_cpu_core_type(const char *cpu_type) +{ + int len = strlen(cpu_type) - strlen(POWERPC_CPU_TYPE_SUFFIX); + char *core_type = g_strdup_printf(SPAPR_CPU_CORE_TYPE_NAME("%.*s"), + len, cpu_type); + ObjectClass *oc = object_class_by_name(core_type); + + g_free(core_type); + if (!oc) { + return NULL; + } + + return object_class_get_name(oc); +} + +static bool slb_shadow_needed(void *opaque) +{ + SpaprCpuState *spapr_cpu = opaque; + + return spapr_cpu->slb_shadow_addr != 0; +} + +static const VMStateDescription vmstate_spapr_cpu_slb_shadow = { + .name = "spapr_cpu/vpa/slb_shadow", + .version_id = 1, + .minimum_version_id = 1, + .needed = slb_shadow_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT64(slb_shadow_addr, SpaprCpuState), + VMSTATE_UINT64(slb_shadow_size, SpaprCpuState), + VMSTATE_END_OF_LIST() + } +}; + +static bool dtl_needed(void *opaque) +{ + SpaprCpuState *spapr_cpu = opaque; + + return spapr_cpu->dtl_addr != 0; +} + +static const VMStateDescription vmstate_spapr_cpu_dtl = { + .name = "spapr_cpu/vpa/dtl", + .version_id = 1, + .minimum_version_id = 1, + .needed = dtl_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT64(dtl_addr, SpaprCpuState), + VMSTATE_UINT64(dtl_size, SpaprCpuState), + VMSTATE_END_OF_LIST() + } +}; + +static bool vpa_needed(void *opaque) +{ + SpaprCpuState *spapr_cpu = opaque; + + return spapr_cpu->vpa_addr != 0; +} + +static const VMStateDescription vmstate_spapr_cpu_vpa = { + .name = "spapr_cpu/vpa", + .version_id = 1, + .minimum_version_id = 1, + .needed = vpa_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT64(vpa_addr, SpaprCpuState), + VMSTATE_END_OF_LIST() + }, + .subsections = (const VMStateDescription * []) { + &vmstate_spapr_cpu_slb_shadow, + &vmstate_spapr_cpu_dtl, + NULL + } +}; + +static const VMStateDescription vmstate_spapr_cpu_state = { + .name = "spapr_cpu", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_END_OF_LIST() + }, + .subsections = (const VMStateDescription * []) { + &vmstate_spapr_cpu_vpa, + NULL + } +}; + +static void spapr_unrealize_vcpu(PowerPCCPU *cpu, SpaprCpuCore *sc) +{ + if (!sc->pre_3_0_migration) { + vmstate_unregister(NULL, &vmstate_spapr_cpu_state, cpu->machine_data); + } + spapr_irq_cpu_intc_destroy(SPAPR_MACHINE(qdev_get_machine()), cpu); + qdev_unrealize(DEVICE(cpu)); +} + +/* + * Called when CPUs are hot-plugged. + */ +static void spapr_cpu_core_reset(DeviceState *dev) +{ + CPUCore *cc = CPU_CORE(dev); + SpaprCpuCore *sc = SPAPR_CPU_CORE(dev); + int i; + + for (i = 0; i < cc->nr_threads; i++) { + spapr_reset_vcpu(sc->threads[i]); + } +} + +/* + * Called by the machine reset. + */ +static void spapr_cpu_core_reset_handler(void *opaque) +{ + spapr_cpu_core_reset(opaque); +} + +static void spapr_delete_vcpu(PowerPCCPU *cpu) +{ + SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu); + + cpu->machine_data = NULL; + g_free(spapr_cpu); + object_unparent(OBJECT(cpu)); +} + +static void spapr_cpu_core_unrealize(DeviceState *dev) +{ + SpaprCpuCore *sc = SPAPR_CPU_CORE(OBJECT(dev)); + CPUCore *cc = CPU_CORE(dev); + int i; + + for (i = 0; i < cc->nr_threads; i++) { + if (sc->threads[i]) { + /* + * Since this we can get here from the error path of + * spapr_cpu_core_realize(), make sure we only unrealize + * vCPUs that have already been realized. + */ + if (object_property_get_bool(OBJECT(sc->threads[i]), "realized", + &error_abort)) { + spapr_unrealize_vcpu(sc->threads[i], sc); + } + spapr_delete_vcpu(sc->threads[i]); + } + } + g_free(sc->threads); + qemu_unregister_reset(spapr_cpu_core_reset_handler, sc); +} + +static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr, + SpaprCpuCore *sc, Error **errp) +{ + CPUPPCState *env = &cpu->env; + CPUState *cs = CPU(cpu); + + if (!qdev_realize(DEVICE(cpu), NULL, errp)) { + return false; + } + + /* Set time-base frequency to 512 MHz */ + cpu_ppc_tb_init(env, SPAPR_TIMEBASE_FREQ); + + cpu_ppc_set_vhyp(cpu, PPC_VIRTUAL_HYPERVISOR(spapr)); + kvmppc_set_papr(cpu); + + if (spapr_irq_cpu_intc_create(spapr, cpu, errp) < 0) { + qdev_unrealize(DEVICE(cpu)); + return false; + } + + if (!sc->pre_3_0_migration) { + vmstate_register(NULL, cs->cpu_index, &vmstate_spapr_cpu_state, + cpu->machine_data); + } + return true; +} + +static PowerPCCPU *spapr_create_vcpu(SpaprCpuCore *sc, int i, Error **errp) +{ + SpaprCpuCoreClass *scc = SPAPR_CPU_CORE_GET_CLASS(sc); + CPUCore *cc = CPU_CORE(sc); + g_autoptr(Object) obj = NULL; + g_autofree char *id = NULL; + CPUState *cs; + PowerPCCPU *cpu; + + obj = object_new(scc->cpu_type); + + cs = CPU(obj); + cpu = POWERPC_CPU(obj); + /* + * All CPUs start halted. CPU0 is unhalted from the machine level reset code + * and the rest are explicitly started up by the guest using an RTAS call. + */ + cs->start_powered_off = true; + cs->cpu_index = cc->core_id + i; + if (!spapr_set_vcpu_id(cpu, cs->cpu_index, errp)) { + return NULL; + } + + cpu->node_id = sc->node_id; + + id = g_strdup_printf("thread[%d]", i); + object_property_add_child(OBJECT(sc), id, obj); + + cpu->machine_data = g_new0(SpaprCpuState, 1); + + return cpu; +} + +static void spapr_cpu_core_realize(DeviceState *dev, Error **errp) +{ + /* We don't use SPAPR_MACHINE() in order to exit gracefully if the user + * tries to add a sPAPR CPU core to a non-pseries machine. + */ + SpaprMachineState *spapr = + (SpaprMachineState *) object_dynamic_cast(qdev_get_machine(), + TYPE_SPAPR_MACHINE); + SpaprCpuCore *sc = SPAPR_CPU_CORE(OBJECT(dev)); + CPUCore *cc = CPU_CORE(OBJECT(dev)); + int i; + + if (!spapr) { + error_setg(errp, TYPE_SPAPR_CPU_CORE " needs a pseries machine"); + return; + } + + qemu_register_reset(spapr_cpu_core_reset_handler, sc); + sc->threads = g_new0(PowerPCCPU *, cc->nr_threads); + for (i = 0; i < cc->nr_threads; i++) { + sc->threads[i] = spapr_create_vcpu(sc, i, errp); + if (!sc->threads[i] || + !spapr_realize_vcpu(sc->threads[i], spapr, sc, errp)) { + spapr_cpu_core_unrealize(dev); + return; + } + } +} + +static Property spapr_cpu_core_properties[] = { + DEFINE_PROP_INT32("node-id", SpaprCpuCore, node_id, CPU_UNSET_NUMA_NODE_ID), + DEFINE_PROP_BOOL("pre-3.0-migration", SpaprCpuCore, pre_3_0_migration, + false), + DEFINE_PROP_END_OF_LIST() +}; + +static void spapr_cpu_core_class_init(ObjectClass *oc, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + SpaprCpuCoreClass *scc = SPAPR_CPU_CORE_CLASS(oc); + + dc->realize = spapr_cpu_core_realize; + dc->unrealize = spapr_cpu_core_unrealize; + dc->reset = spapr_cpu_core_reset; + device_class_set_props(dc, spapr_cpu_core_properties); + scc->cpu_type = data; +} + +#define DEFINE_SPAPR_CPU_CORE_TYPE(cpu_model) \ + { \ + .parent = TYPE_SPAPR_CPU_CORE, \ + .class_data = (void *) POWERPC_CPU_TYPE_NAME(cpu_model), \ + .class_init = spapr_cpu_core_class_init, \ + .name = SPAPR_CPU_CORE_TYPE_NAME(cpu_model), \ + } + +static const TypeInfo spapr_cpu_core_type_infos[] = { + { + .name = TYPE_SPAPR_CPU_CORE, + .parent = TYPE_CPU_CORE, + .abstract = true, + .instance_size = sizeof(SpaprCpuCore), + .class_size = sizeof(SpaprCpuCoreClass), + }, + DEFINE_SPAPR_CPU_CORE_TYPE("970_v2.2"), + DEFINE_SPAPR_CPU_CORE_TYPE("970mp_v1.0"), + DEFINE_SPAPR_CPU_CORE_TYPE("970mp_v1.1"), + DEFINE_SPAPR_CPU_CORE_TYPE("power5+_v2.1"), + DEFINE_SPAPR_CPU_CORE_TYPE("power7_v2.3"), + DEFINE_SPAPR_CPU_CORE_TYPE("power7+_v2.1"), + DEFINE_SPAPR_CPU_CORE_TYPE("power8_v2.0"), + DEFINE_SPAPR_CPU_CORE_TYPE("power8e_v2.1"), + DEFINE_SPAPR_CPU_CORE_TYPE("power8nvl_v1.0"), + DEFINE_SPAPR_CPU_CORE_TYPE("power9_v1.0"), + DEFINE_SPAPR_CPU_CORE_TYPE("power9_v2.0"), + DEFINE_SPAPR_CPU_CORE_TYPE("power10_v1.0"), + DEFINE_SPAPR_CPU_CORE_TYPE("power10_v2.0"), +#ifdef CONFIG_KVM + DEFINE_SPAPR_CPU_CORE_TYPE("host"), +#endif +}; + +DEFINE_TYPES(spapr_cpu_core_type_infos) diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c new file mode 100644 index 000000000..f8ac0a10d --- /dev/null +++ b/hw/ppc/spapr_drc.c @@ -0,0 +1,1326 @@ +/* + * QEMU SPAPR Dynamic Reconfiguration Connector Implementation + * + * Copyright IBM Corp. 2014 + * + * Authors: + * Michael Roth <mdroth@linux.vnet.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qapi/qmp/qnull.h" +#include "qemu/cutils.h" +#include "hw/ppc/spapr_drc.h" +#include "qom/object.h" +#include "migration/vmstate.h" +#include "qapi/error.h" +#include "qapi/qapi-events-qdev.h" +#include "qapi/visitor.h" +#include "qemu/error-report.h" +#include "hw/ppc/spapr.h" /* for RTAS return codes */ +#include "hw/pci-host/spapr.h" /* spapr_phb_remove_pci_device_cb callback */ +#include "hw/ppc/spapr_nvdimm.h" +#include "sysemu/device_tree.h" +#include "sysemu/reset.h" +#include "trace.h" + +#define DRC_CONTAINER_PATH "/dr-connector" +#define DRC_INDEX_TYPE_SHIFT 28 +#define DRC_INDEX_ID_MASK ((1ULL << DRC_INDEX_TYPE_SHIFT) - 1) + +SpaprDrcType spapr_drc_type(SpaprDrc *drc) +{ + SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + + return 1 << drck->typeshift; +} + +uint32_t spapr_drc_index(SpaprDrc *drc) +{ + SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + + /* no set format for a drc index: it only needs to be globally + * unique. this is how we encode the DRC type on bare-metal + * however, so might as well do that here + */ + return (drck->typeshift << DRC_INDEX_TYPE_SHIFT) + | (drc->id & DRC_INDEX_ID_MASK); +} + +static void spapr_drc_release(SpaprDrc *drc) +{ + SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + + drck->release(drc->dev); + + drc->unplug_requested = false; + g_free(drc->fdt); + drc->fdt = NULL; + drc->fdt_start_offset = 0; + object_property_del(OBJECT(drc), "device"); + drc->dev = NULL; +} + +static uint32_t drc_isolate_physical(SpaprDrc *drc) +{ + switch (drc->state) { + case SPAPR_DRC_STATE_PHYSICAL_POWERON: + return RTAS_OUT_SUCCESS; /* Nothing to do */ + case SPAPR_DRC_STATE_PHYSICAL_CONFIGURED: + break; /* see below */ + case SPAPR_DRC_STATE_PHYSICAL_UNISOLATE: + return RTAS_OUT_PARAM_ERROR; /* not allowed */ + default: + g_assert_not_reached(); + } + + drc->state = SPAPR_DRC_STATE_PHYSICAL_POWERON; + + if (drc->unplug_requested) { + uint32_t drc_index = spapr_drc_index(drc); + trace_spapr_drc_set_isolation_state_finalizing(drc_index); + spapr_drc_release(drc); + } + + return RTAS_OUT_SUCCESS; +} + +static uint32_t drc_unisolate_physical(SpaprDrc *drc) +{ + switch (drc->state) { + case SPAPR_DRC_STATE_PHYSICAL_UNISOLATE: + case SPAPR_DRC_STATE_PHYSICAL_CONFIGURED: + return RTAS_OUT_SUCCESS; /* Nothing to do */ + case SPAPR_DRC_STATE_PHYSICAL_POWERON: + break; /* see below */ + default: + g_assert_not_reached(); + } + + /* cannot unisolate a non-existent resource, and, or resources + * which are in an 'UNUSABLE' allocation state. (PAPR 2.7, + * 13.5.3.5) + */ + if (!drc->dev) { + return RTAS_OUT_NO_SUCH_INDICATOR; + } + + drc->state = SPAPR_DRC_STATE_PHYSICAL_UNISOLATE; + drc->ccs_offset = drc->fdt_start_offset; + drc->ccs_depth = 0; + + return RTAS_OUT_SUCCESS; +} + +static uint32_t drc_isolate_logical(SpaprDrc *drc) +{ + switch (drc->state) { + case SPAPR_DRC_STATE_LOGICAL_AVAILABLE: + case SPAPR_DRC_STATE_LOGICAL_UNUSABLE: + return RTAS_OUT_SUCCESS; /* Nothing to do */ + case SPAPR_DRC_STATE_LOGICAL_CONFIGURED: + break; /* see below */ + case SPAPR_DRC_STATE_LOGICAL_UNISOLATE: + return RTAS_OUT_PARAM_ERROR; /* not allowed */ + default: + g_assert_not_reached(); + } + + /* + * Fail any requests to ISOLATE the LMB DRC if this LMB doesn't + * belong to a DIMM device that is marked for removal. + * + * Currently the guest userspace tool drmgr that drives the memory + * hotplug/unplug will just try to remove a set of 'removable' LMBs + * in response to a hot unplug request that is based on drc-count. + * If the LMB being removed doesn't belong to a DIMM device that is + * actually being unplugged, fail the isolation request here. + */ + if (spapr_drc_type(drc) == SPAPR_DR_CONNECTOR_TYPE_LMB + && !drc->unplug_requested) { + return RTAS_OUT_HW_ERROR; + } + + drc->state = SPAPR_DRC_STATE_LOGICAL_AVAILABLE; + + return RTAS_OUT_SUCCESS; +} + +static uint32_t drc_unisolate_logical(SpaprDrc *drc) +{ + SpaprMachineState *spapr = NULL; + + switch (drc->state) { + case SPAPR_DRC_STATE_LOGICAL_UNISOLATE: + case SPAPR_DRC_STATE_LOGICAL_CONFIGURED: + /* + * Unisolating a logical DRC that was marked for unplug + * means that the kernel is refusing the removal. + */ + if (drc->unplug_requested && drc->dev) { + if (spapr_drc_type(drc) == SPAPR_DR_CONNECTOR_TYPE_LMB) { + spapr = SPAPR_MACHINE(qdev_get_machine()); + + spapr_memory_unplug_rollback(spapr, drc->dev); + } + + drc->unplug_requested = false; + + if (drc->dev->id) { + error_report("Device hotunplug rejected by the guest " + "for device %s", drc->dev->id); + } + + qapi_event_send_device_unplug_guest_error(!!drc->dev->id, + drc->dev->id, + drc->dev->canonical_path); + } + + return RTAS_OUT_SUCCESS; /* Nothing to do */ + case SPAPR_DRC_STATE_LOGICAL_AVAILABLE: + break; /* see below */ + case SPAPR_DRC_STATE_LOGICAL_UNUSABLE: + return RTAS_OUT_NO_SUCH_INDICATOR; /* not allowed */ + default: + g_assert_not_reached(); + } + + /* Move to AVAILABLE state should have ensured device was present */ + g_assert(drc->dev); + + drc->state = SPAPR_DRC_STATE_LOGICAL_UNISOLATE; + drc->ccs_offset = drc->fdt_start_offset; + drc->ccs_depth = 0; + + return RTAS_OUT_SUCCESS; +} + +static uint32_t drc_set_usable(SpaprDrc *drc) +{ + switch (drc->state) { + case SPAPR_DRC_STATE_LOGICAL_AVAILABLE: + case SPAPR_DRC_STATE_LOGICAL_UNISOLATE: + case SPAPR_DRC_STATE_LOGICAL_CONFIGURED: + return RTAS_OUT_SUCCESS; /* Nothing to do */ + case SPAPR_DRC_STATE_LOGICAL_UNUSABLE: + break; /* see below */ + default: + g_assert_not_reached(); + } + + /* if there's no resource/device associated with the DRC, there's + * no way for us to put it in an allocation state consistent with + * being 'USABLE'. PAPR 2.7, 13.5.3.4 documents that this should + * result in an RTAS return code of -3 / "no such indicator" + */ + if (!drc->dev) { + return RTAS_OUT_NO_SUCH_INDICATOR; + } + if (drc->unplug_requested) { + /* Don't allow the guest to move a device away from UNUSABLE + * state when we want to unplug it */ + return RTAS_OUT_NO_SUCH_INDICATOR; + } + + drc->state = SPAPR_DRC_STATE_LOGICAL_AVAILABLE; + + return RTAS_OUT_SUCCESS; +} + +static uint32_t drc_set_unusable(SpaprDrc *drc) +{ + switch (drc->state) { + case SPAPR_DRC_STATE_LOGICAL_UNUSABLE: + return RTAS_OUT_SUCCESS; /* Nothing to do */ + case SPAPR_DRC_STATE_LOGICAL_AVAILABLE: + break; /* see below */ + case SPAPR_DRC_STATE_LOGICAL_UNISOLATE: + case SPAPR_DRC_STATE_LOGICAL_CONFIGURED: + return RTAS_OUT_NO_SUCH_INDICATOR; /* not allowed */ + default: + g_assert_not_reached(); + } + + drc->state = SPAPR_DRC_STATE_LOGICAL_UNUSABLE; + if (drc->unplug_requested) { + uint32_t drc_index = spapr_drc_index(drc); + trace_spapr_drc_set_allocation_state_finalizing(drc_index); + spapr_drc_release(drc); + } + + return RTAS_OUT_SUCCESS; +} + +static char *spapr_drc_name(SpaprDrc *drc) +{ + SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + + /* human-readable name for a DRC to encode into the DT + * description. this is mainly only used within a guest in place + * of the unique DRC index. + * + * in the case of VIO/PCI devices, it corresponds to a "location + * code" that maps a logical device/function (DRC index) to a + * physical (or virtual in the case of VIO) location in the system + * by chaining together the "location label" for each + * encapsulating component. + * + * since this is more to do with diagnosing physical hardware + * issues than guest compatibility, we choose location codes/DRC + * names that adhere to the documented format, but avoid encoding + * the entire topology information into the label/code, instead + * just using the location codes based on the labels for the + * endpoints (VIO/PCI adaptor connectors), which is basically just + * "C" followed by an integer ID. + * + * DRC names as documented by PAPR+ v2.7, 13.5.2.4 + * location codes as documented by PAPR+ v2.7, 12.3.1.5 + */ + return g_strdup_printf("%s%d", drck->drc_name_prefix, drc->id); +} + +/* + * dr-entity-sense sensor value + * returned via get-sensor-state RTAS calls + * as expected by state diagram in PAPR+ 2.7, 13.4 + * based on the current allocation/indicator/power states + * for the DR connector. + */ +static SpaprDREntitySense physical_entity_sense(SpaprDrc *drc) +{ + /* this assumes all PCI devices are assigned to a 'live insertion' + * power domain, where QEMU manages power state automatically as + * opposed to the guest. present, non-PCI resources are unaffected + * by power state. + */ + return drc->dev ? SPAPR_DR_ENTITY_SENSE_PRESENT + : SPAPR_DR_ENTITY_SENSE_EMPTY; +} + +static SpaprDREntitySense logical_entity_sense(SpaprDrc *drc) +{ + switch (drc->state) { + case SPAPR_DRC_STATE_LOGICAL_UNUSABLE: + return SPAPR_DR_ENTITY_SENSE_UNUSABLE; + case SPAPR_DRC_STATE_LOGICAL_AVAILABLE: + case SPAPR_DRC_STATE_LOGICAL_UNISOLATE: + case SPAPR_DRC_STATE_LOGICAL_CONFIGURED: + g_assert(drc->dev); + return SPAPR_DR_ENTITY_SENSE_PRESENT; + default: + g_assert_not_reached(); + } +} + +static void prop_get_index(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + SpaprDrc *drc = SPAPR_DR_CONNECTOR(obj); + uint32_t value = spapr_drc_index(drc); + visit_type_uint32(v, name, &value, errp); +} + +static void prop_get_fdt(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + SpaprDrc *drc = SPAPR_DR_CONNECTOR(obj); + QNull *null = NULL; + int fdt_offset_next, fdt_offset, fdt_depth; + void *fdt; + + if (!drc->fdt) { + visit_type_null(v, NULL, &null, errp); + qobject_unref(null); + return; + } + + fdt = drc->fdt; + fdt_offset = drc->fdt_start_offset; + fdt_depth = 0; + + do { + const char *name = NULL; + const struct fdt_property *prop = NULL; + int prop_len = 0, name_len = 0; + uint32_t tag; + bool ok; + + tag = fdt_next_tag(fdt, fdt_offset, &fdt_offset_next); + switch (tag) { + case FDT_BEGIN_NODE: + fdt_depth++; + name = fdt_get_name(fdt, fdt_offset, &name_len); + if (!visit_start_struct(v, name, NULL, 0, errp)) { + return; + } + break; + case FDT_END_NODE: + /* shouldn't ever see an FDT_END_NODE before FDT_BEGIN_NODE */ + g_assert(fdt_depth > 0); + ok = visit_check_struct(v, errp); + visit_end_struct(v, NULL); + if (!ok) { + return; + } + fdt_depth--; + break; + case FDT_PROP: { + int i; + prop = fdt_get_property_by_offset(fdt, fdt_offset, &prop_len); + name = fdt_string(fdt, fdt32_to_cpu(prop->nameoff)); + if (!visit_start_list(v, name, NULL, 0, errp)) { + return; + } + for (i = 0; i < prop_len; i++) { + if (!visit_type_uint8(v, NULL, (uint8_t *)&prop->data[i], + errp)) { + return; + } + } + ok = visit_check_list(v, errp); + visit_end_list(v, NULL); + if (!ok) { + return; + } + break; + } + default: + error_report("device FDT in unexpected state: %d", tag); + abort(); + } + fdt_offset = fdt_offset_next; + } while (fdt_depth != 0); +} + +void spapr_drc_attach(SpaprDrc *drc, DeviceState *d) +{ + trace_spapr_drc_attach(spapr_drc_index(drc)); + + g_assert(!drc->dev); + g_assert((drc->state == SPAPR_DRC_STATE_LOGICAL_UNUSABLE) + || (drc->state == SPAPR_DRC_STATE_PHYSICAL_POWERON)); + + drc->dev = d; + + object_property_add_link(OBJECT(drc), "device", + object_get_typename(OBJECT(drc->dev)), + (Object **)(&drc->dev), + NULL, 0); +} + +void spapr_drc_unplug_request(SpaprDrc *drc) +{ + SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + + trace_spapr_drc_unplug_request(spapr_drc_index(drc)); + + g_assert(drc->dev); + + drc->unplug_requested = true; + + if (drc->state != drck->empty_state) { + trace_spapr_drc_awaiting_quiesce(spapr_drc_index(drc)); + return; + } + + spapr_drc_release(drc); +} + +bool spapr_drc_reset(SpaprDrc *drc) +{ + SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + bool unplug_completed = false; + + trace_spapr_drc_reset(spapr_drc_index(drc)); + + /* immediately upon reset we can safely assume DRCs whose devices + * are pending removal can be safely removed. + */ + if (drc->unplug_requested) { + spapr_drc_release(drc); + unplug_completed = true; + } + + if (drc->dev) { + /* A device present at reset is ready to go, same as coldplugged */ + drc->state = drck->ready_state; + /* + * Ensure that we are able to send the FDT fragment again + * via configure-connector call if the guest requests. + */ + drc->ccs_offset = drc->fdt_start_offset; + drc->ccs_depth = 0; + } else { + drc->state = drck->empty_state; + drc->ccs_offset = -1; + drc->ccs_depth = -1; + } + + return unplug_completed; +} + +static bool spapr_drc_unplug_requested_needed(void *opaque) +{ + return spapr_drc_unplug_requested(opaque); +} + +static const VMStateDescription vmstate_spapr_drc_unplug_requested = { + .name = "spapr_drc/unplug_requested", + .version_id = 1, + .minimum_version_id = 1, + .needed = spapr_drc_unplug_requested_needed, + .fields = (VMStateField []) { + VMSTATE_BOOL(unplug_requested, SpaprDrc), + VMSTATE_END_OF_LIST() + } +}; + +static bool spapr_drc_needed(void *opaque) +{ + SpaprDrc *drc = opaque; + SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + + /* + * If no dev is plugged in there is no need to migrate the DRC state + * nor to reset the DRC at CAS. + */ + if (!drc->dev) { + return false; + } + + /* + * We need to reset the DRC at CAS or to migrate the DRC state if it's + * not equal to the expected long-term state, which is the same as the + * coldplugged initial state, or if an unplug request is pending. + */ + return drc->state != drck->ready_state || + spapr_drc_unplug_requested(drc); +} + +static const VMStateDescription vmstate_spapr_drc = { + .name = "spapr_drc", + .version_id = 1, + .minimum_version_id = 1, + .needed = spapr_drc_needed, + .fields = (VMStateField []) { + VMSTATE_UINT32(state, SpaprDrc), + VMSTATE_END_OF_LIST() + }, + .subsections = (const VMStateDescription * []) { + &vmstate_spapr_drc_unplug_requested, + NULL + } +}; + +static void drc_realize(DeviceState *d, Error **errp) +{ + SpaprDrc *drc = SPAPR_DR_CONNECTOR(d); + Object *root_container; + gchar *link_name; + const char *child_name; + + trace_spapr_drc_realize(spapr_drc_index(drc)); + /* NOTE: we do this as part of realize/unrealize due to the fact + * that the guest will communicate with the DRC via RTAS calls + * referencing the global DRC index. By unlinking the DRC + * from DRC_CONTAINER_PATH/<drc_index> we effectively make it + * inaccessible by the guest, since lookups rely on this path + * existing in the composition tree + */ + root_container = container_get(object_get_root(), DRC_CONTAINER_PATH); + link_name = g_strdup_printf("%x", spapr_drc_index(drc)); + child_name = object_get_canonical_path_component(OBJECT(drc)); + trace_spapr_drc_realize_child(spapr_drc_index(drc), child_name); + object_property_add_alias(root_container, link_name, + drc->owner, child_name); + g_free(link_name); + vmstate_register(VMSTATE_IF(drc), spapr_drc_index(drc), &vmstate_spapr_drc, + drc); + trace_spapr_drc_realize_complete(spapr_drc_index(drc)); +} + +static void drc_unrealize(DeviceState *d) +{ + SpaprDrc *drc = SPAPR_DR_CONNECTOR(d); + Object *root_container; + gchar *name; + + trace_spapr_drc_unrealize(spapr_drc_index(drc)); + vmstate_unregister(VMSTATE_IF(drc), &vmstate_spapr_drc, drc); + root_container = container_get(object_get_root(), DRC_CONTAINER_PATH); + name = g_strdup_printf("%x", spapr_drc_index(drc)); + object_property_del(root_container, name); + g_free(name); +} + +SpaprDrc *spapr_dr_connector_new(Object *owner, const char *type, + uint32_t id) +{ + SpaprDrc *drc = SPAPR_DR_CONNECTOR(object_new(type)); + char *prop_name; + + drc->id = id; + drc->owner = owner; + prop_name = g_strdup_printf("dr-connector[%"PRIu32"]", + spapr_drc_index(drc)); + object_property_add_child(owner, prop_name, OBJECT(drc)); + object_unref(OBJECT(drc)); + qdev_realize(DEVICE(drc), NULL, NULL); + g_free(prop_name); + + return drc; +} + +static void spapr_dr_connector_instance_init(Object *obj) +{ + SpaprDrc *drc = SPAPR_DR_CONNECTOR(obj); + SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + + object_property_add_uint32_ptr(obj, "id", &drc->id, OBJ_PROP_FLAG_READ); + object_property_add(obj, "index", "uint32", prop_get_index, + NULL, NULL, NULL); + object_property_add(obj, "fdt", "struct", prop_get_fdt, + NULL, NULL, NULL); + drc->state = drck->empty_state; +} + +static void spapr_dr_connector_class_init(ObjectClass *k, void *data) +{ + DeviceClass *dk = DEVICE_CLASS(k); + + dk->realize = drc_realize; + dk->unrealize = drc_unrealize; + /* + * Reason: DR connector needs to be wired to either the machine or to a + * PHB in spapr_dr_connector_new(). + */ + dk->user_creatable = false; +} + +static bool drc_physical_needed(void *opaque) +{ + SpaprDrcPhysical *drcp = (SpaprDrcPhysical *)opaque; + SpaprDrc *drc = SPAPR_DR_CONNECTOR(drcp); + + if ((drc->dev && (drcp->dr_indicator == SPAPR_DR_INDICATOR_ACTIVE)) + || (!drc->dev && (drcp->dr_indicator == SPAPR_DR_INDICATOR_INACTIVE))) { + return false; + } + return true; +} + +static const VMStateDescription vmstate_spapr_drc_physical = { + .name = "spapr_drc/physical", + .version_id = 1, + .minimum_version_id = 1, + .needed = drc_physical_needed, + .fields = (VMStateField []) { + VMSTATE_UINT32(dr_indicator, SpaprDrcPhysical), + VMSTATE_END_OF_LIST() + } +}; + +static void drc_physical_reset(void *opaque) +{ + SpaprDrc *drc = SPAPR_DR_CONNECTOR(opaque); + SpaprDrcPhysical *drcp = SPAPR_DRC_PHYSICAL(drc); + + if (drc->dev) { + drcp->dr_indicator = SPAPR_DR_INDICATOR_ACTIVE; + } else { + drcp->dr_indicator = SPAPR_DR_INDICATOR_INACTIVE; + } +} + +static void realize_physical(DeviceState *d, Error **errp) +{ + SpaprDrcPhysical *drcp = SPAPR_DRC_PHYSICAL(d); + Error *local_err = NULL; + + drc_realize(d, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + + vmstate_register(VMSTATE_IF(drcp), + spapr_drc_index(SPAPR_DR_CONNECTOR(drcp)), + &vmstate_spapr_drc_physical, drcp); + qemu_register_reset(drc_physical_reset, drcp); +} + +static void unrealize_physical(DeviceState *d) +{ + SpaprDrcPhysical *drcp = SPAPR_DRC_PHYSICAL(d); + + drc_unrealize(d); + vmstate_unregister(VMSTATE_IF(drcp), &vmstate_spapr_drc_physical, drcp); + qemu_unregister_reset(drc_physical_reset, drcp); +} + +static void spapr_drc_physical_class_init(ObjectClass *k, void *data) +{ + DeviceClass *dk = DEVICE_CLASS(k); + SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_CLASS(k); + + dk->realize = realize_physical; + dk->unrealize = unrealize_physical; + drck->dr_entity_sense = physical_entity_sense; + drck->isolate = drc_isolate_physical; + drck->unisolate = drc_unisolate_physical; + drck->ready_state = SPAPR_DRC_STATE_PHYSICAL_CONFIGURED; + drck->empty_state = SPAPR_DRC_STATE_PHYSICAL_POWERON; +} + +static void spapr_drc_logical_class_init(ObjectClass *k, void *data) +{ + SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_CLASS(k); + + drck->dr_entity_sense = logical_entity_sense; + drck->isolate = drc_isolate_logical; + drck->unisolate = drc_unisolate_logical; + drck->ready_state = SPAPR_DRC_STATE_LOGICAL_CONFIGURED; + drck->empty_state = SPAPR_DRC_STATE_LOGICAL_UNUSABLE; +} + +static void spapr_drc_cpu_class_init(ObjectClass *k, void *data) +{ + SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_CLASS(k); + + drck->typeshift = SPAPR_DR_CONNECTOR_TYPE_SHIFT_CPU; + drck->typename = "CPU"; + drck->drc_name_prefix = "CPU "; + drck->release = spapr_core_release; + drck->dt_populate = spapr_core_dt_populate; +} + +static void spapr_drc_pci_class_init(ObjectClass *k, void *data) +{ + SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_CLASS(k); + + drck->typeshift = SPAPR_DR_CONNECTOR_TYPE_SHIFT_PCI; + drck->typename = "28"; + drck->drc_name_prefix = "C"; + drck->release = spapr_phb_remove_pci_device_cb; + drck->dt_populate = spapr_pci_dt_populate; +} + +static void spapr_drc_lmb_class_init(ObjectClass *k, void *data) +{ + SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_CLASS(k); + + drck->typeshift = SPAPR_DR_CONNECTOR_TYPE_SHIFT_LMB; + drck->typename = "MEM"; + drck->drc_name_prefix = "LMB "; + drck->release = spapr_lmb_release; + drck->dt_populate = spapr_lmb_dt_populate; +} + +static void spapr_drc_phb_class_init(ObjectClass *k, void *data) +{ + SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_CLASS(k); + + drck->typeshift = SPAPR_DR_CONNECTOR_TYPE_SHIFT_PHB; + drck->typename = "PHB"; + drck->drc_name_prefix = "PHB "; + drck->release = spapr_phb_release; + drck->dt_populate = spapr_phb_dt_populate; +} + +static void spapr_drc_pmem_class_init(ObjectClass *k, void *data) +{ + SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_CLASS(k); + + drck->typeshift = SPAPR_DR_CONNECTOR_TYPE_SHIFT_PMEM; + drck->typename = "PMEM"; + drck->drc_name_prefix = "PMEM "; + drck->release = NULL; + drck->dt_populate = spapr_pmem_dt_populate; +} + +static const TypeInfo spapr_dr_connector_info = { + .name = TYPE_SPAPR_DR_CONNECTOR, + .parent = TYPE_DEVICE, + .instance_size = sizeof(SpaprDrc), + .instance_init = spapr_dr_connector_instance_init, + .class_size = sizeof(SpaprDrcClass), + .class_init = spapr_dr_connector_class_init, + .abstract = true, +}; + +static const TypeInfo spapr_drc_physical_info = { + .name = TYPE_SPAPR_DRC_PHYSICAL, + .parent = TYPE_SPAPR_DR_CONNECTOR, + .instance_size = sizeof(SpaprDrcPhysical), + .class_init = spapr_drc_physical_class_init, + .abstract = true, +}; + +static const TypeInfo spapr_drc_logical_info = { + .name = TYPE_SPAPR_DRC_LOGICAL, + .parent = TYPE_SPAPR_DR_CONNECTOR, + .class_init = spapr_drc_logical_class_init, + .abstract = true, +}; + +static const TypeInfo spapr_drc_cpu_info = { + .name = TYPE_SPAPR_DRC_CPU, + .parent = TYPE_SPAPR_DRC_LOGICAL, + .class_init = spapr_drc_cpu_class_init, +}; + +static const TypeInfo spapr_drc_pci_info = { + .name = TYPE_SPAPR_DRC_PCI, + .parent = TYPE_SPAPR_DRC_PHYSICAL, + .class_init = spapr_drc_pci_class_init, +}; + +static const TypeInfo spapr_drc_lmb_info = { + .name = TYPE_SPAPR_DRC_LMB, + .parent = TYPE_SPAPR_DRC_LOGICAL, + .class_init = spapr_drc_lmb_class_init, +}; + +static const TypeInfo spapr_drc_phb_info = { + .name = TYPE_SPAPR_DRC_PHB, + .parent = TYPE_SPAPR_DRC_LOGICAL, + .instance_size = sizeof(SpaprDrc), + .class_init = spapr_drc_phb_class_init, +}; + +static const TypeInfo spapr_drc_pmem_info = { + .name = TYPE_SPAPR_DRC_PMEM, + .parent = TYPE_SPAPR_DRC_LOGICAL, + .class_init = spapr_drc_pmem_class_init, +}; + +/* helper functions for external users */ + +SpaprDrc *spapr_drc_by_index(uint32_t index) +{ + Object *obj; + gchar *name; + + name = g_strdup_printf("%s/%x", DRC_CONTAINER_PATH, index); + obj = object_resolve_path(name, NULL); + g_free(name); + + return !obj ? NULL : SPAPR_DR_CONNECTOR(obj); +} + +SpaprDrc *spapr_drc_by_id(const char *type, uint32_t id) +{ + SpaprDrcClass *drck + = SPAPR_DR_CONNECTOR_CLASS(object_class_by_name(type)); + + return spapr_drc_by_index(drck->typeshift << DRC_INDEX_TYPE_SHIFT + | (id & DRC_INDEX_ID_MASK)); +} + +/** + * spapr_dt_drc + * + * @fdt: libfdt device tree + * @path: path in the DT to generate properties + * @owner: parent Object/DeviceState for which to generate DRC + * descriptions for + * @drc_type_mask: mask of SpaprDrcType values corresponding + * to the types of DRCs to generate entries for + * + * generate OF properties to describe DRC topology/indices to guests + * + * as documented in PAPR+ v2.1, 13.5.2 + */ +int spapr_dt_drc(void *fdt, int offset, Object *owner, uint32_t drc_type_mask) +{ + Object *root_container; + ObjectProperty *prop; + ObjectPropertyIterator iter; + uint32_t drc_count = 0; + GArray *drc_indexes, *drc_power_domains; + GString *drc_names, *drc_types; + int ret; + + /* + * This should really be only called once per node since it overwrites + * the OF properties if they already exist. + */ + g_assert(!fdt_get_property(fdt, offset, "ibm,drc-indexes", NULL)); + + /* the first entry of each properties is a 32-bit integer encoding + * the number of elements in the array. we won't know this until + * we complete the iteration through all the matching DRCs, but + * reserve the space now and set the offsets accordingly so we + * can fill them in later. + */ + drc_indexes = g_array_new(false, true, sizeof(uint32_t)); + drc_indexes = g_array_set_size(drc_indexes, 1); + drc_power_domains = g_array_new(false, true, sizeof(uint32_t)); + drc_power_domains = g_array_set_size(drc_power_domains, 1); + drc_names = g_string_set_size(g_string_new(NULL), sizeof(uint32_t)); + drc_types = g_string_set_size(g_string_new(NULL), sizeof(uint32_t)); + + /* aliases for all DRConnector objects will be rooted in QOM + * composition tree at DRC_CONTAINER_PATH + */ + root_container = container_get(object_get_root(), DRC_CONTAINER_PATH); + + object_property_iter_init(&iter, root_container); + while ((prop = object_property_iter_next(&iter))) { + Object *obj; + SpaprDrc *drc; + SpaprDrcClass *drck; + char *drc_name = NULL; + uint32_t drc_index, drc_power_domain; + + if (!strstart(prop->type, "link<", NULL)) { + continue; + } + + obj = object_property_get_link(root_container, prop->name, + &error_abort); + drc = SPAPR_DR_CONNECTOR(obj); + drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + + if (owner && (drc->owner != owner)) { + continue; + } + + if ((spapr_drc_type(drc) & drc_type_mask) == 0) { + continue; + } + + drc_count++; + + /* ibm,drc-indexes */ + drc_index = cpu_to_be32(spapr_drc_index(drc)); + g_array_append_val(drc_indexes, drc_index); + + /* ibm,drc-power-domains */ + drc_power_domain = cpu_to_be32(-1); + g_array_append_val(drc_power_domains, drc_power_domain); + + /* ibm,drc-names */ + drc_name = spapr_drc_name(drc); + drc_names = g_string_append(drc_names, drc_name); + drc_names = g_string_insert_len(drc_names, -1, "\0", 1); + g_free(drc_name); + + /* ibm,drc-types */ + drc_types = g_string_append(drc_types, drck->typename); + drc_types = g_string_insert_len(drc_types, -1, "\0", 1); + } + + /* now write the drc count into the space we reserved at the + * beginning of the arrays previously + */ + *(uint32_t *)drc_indexes->data = cpu_to_be32(drc_count); + *(uint32_t *)drc_power_domains->data = cpu_to_be32(drc_count); + *(uint32_t *)drc_names->str = cpu_to_be32(drc_count); + *(uint32_t *)drc_types->str = cpu_to_be32(drc_count); + + ret = fdt_setprop(fdt, offset, "ibm,drc-indexes", + drc_indexes->data, + drc_indexes->len * sizeof(uint32_t)); + if (ret) { + error_report("Couldn't create ibm,drc-indexes property"); + goto out; + } + + ret = fdt_setprop(fdt, offset, "ibm,drc-power-domains", + drc_power_domains->data, + drc_power_domains->len * sizeof(uint32_t)); + if (ret) { + error_report("Couldn't finalize ibm,drc-power-domains property"); + goto out; + } + + ret = fdt_setprop(fdt, offset, "ibm,drc-names", + drc_names->str, drc_names->len); + if (ret) { + error_report("Couldn't finalize ibm,drc-names property"); + goto out; + } + + ret = fdt_setprop(fdt, offset, "ibm,drc-types", + drc_types->str, drc_types->len); + if (ret) { + error_report("Couldn't finalize ibm,drc-types property"); + goto out; + } + +out: + g_array_free(drc_indexes, true); + g_array_free(drc_power_domains, true); + g_string_free(drc_names, true); + g_string_free(drc_types, true); + + return ret; +} + +void spapr_drc_reset_all(SpaprMachineState *spapr) +{ + Object *drc_container; + ObjectProperty *prop; + ObjectPropertyIterator iter; + + drc_container = container_get(object_get_root(), DRC_CONTAINER_PATH); +restart: + object_property_iter_init(&iter, drc_container); + while ((prop = object_property_iter_next(&iter))) { + SpaprDrc *drc; + + if (!strstart(prop->type, "link<", NULL)) { + continue; + } + drc = SPAPR_DR_CONNECTOR(object_property_get_link(drc_container, + prop->name, + &error_abort)); + + /* + * This will complete any pending plug/unplug requests. + * In case of a unplugged PHB or PCI bridge, this will + * cause some DRCs to be destroyed and thus potentially + * invalidate the iterator. + */ + if (spapr_drc_reset(drc)) { + goto restart; + } + } +} + +/* + * RTAS calls + */ + +static uint32_t rtas_set_isolation_state(uint32_t idx, uint32_t state) +{ + SpaprDrc *drc = spapr_drc_by_index(idx); + SpaprDrcClass *drck; + + if (!drc) { + return RTAS_OUT_NO_SUCH_INDICATOR; + } + + trace_spapr_drc_set_isolation_state(spapr_drc_index(drc), state); + + drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + + switch (state) { + case SPAPR_DR_ISOLATION_STATE_ISOLATED: + return drck->isolate(drc); + + case SPAPR_DR_ISOLATION_STATE_UNISOLATED: + return drck->unisolate(drc); + + default: + return RTAS_OUT_PARAM_ERROR; + } +} + +static uint32_t rtas_set_allocation_state(uint32_t idx, uint32_t state) +{ + SpaprDrc *drc = spapr_drc_by_index(idx); + + if (!drc || !object_dynamic_cast(OBJECT(drc), TYPE_SPAPR_DRC_LOGICAL)) { + return RTAS_OUT_NO_SUCH_INDICATOR; + } + + trace_spapr_drc_set_allocation_state(spapr_drc_index(drc), state); + + switch (state) { + case SPAPR_DR_ALLOCATION_STATE_USABLE: + return drc_set_usable(drc); + + case SPAPR_DR_ALLOCATION_STATE_UNUSABLE: + return drc_set_unusable(drc); + + default: + return RTAS_OUT_PARAM_ERROR; + } +} + +static uint32_t rtas_set_dr_indicator(uint32_t idx, uint32_t state) +{ + SpaprDrc *drc = spapr_drc_by_index(idx); + + if (!drc || !object_dynamic_cast(OBJECT(drc), TYPE_SPAPR_DRC_PHYSICAL)) { + return RTAS_OUT_NO_SUCH_INDICATOR; + } + if ((state != SPAPR_DR_INDICATOR_INACTIVE) + && (state != SPAPR_DR_INDICATOR_ACTIVE) + && (state != SPAPR_DR_INDICATOR_IDENTIFY) + && (state != SPAPR_DR_INDICATOR_ACTION)) { + return RTAS_OUT_PARAM_ERROR; /* bad state parameter */ + } + + trace_spapr_drc_set_dr_indicator(idx, state); + SPAPR_DRC_PHYSICAL(drc)->dr_indicator = state; + return RTAS_OUT_SUCCESS; +} + +static void rtas_set_indicator(PowerPCCPU *cpu, SpaprMachineState *spapr, + uint32_t token, + uint32_t nargs, target_ulong args, + uint32_t nret, target_ulong rets) +{ + uint32_t type, idx, state; + uint32_t ret = RTAS_OUT_SUCCESS; + + if (nargs != 3 || nret != 1) { + ret = RTAS_OUT_PARAM_ERROR; + goto out; + } + + type = rtas_ld(args, 0); + idx = rtas_ld(args, 1); + state = rtas_ld(args, 2); + + switch (type) { + case RTAS_SENSOR_TYPE_ISOLATION_STATE: + ret = rtas_set_isolation_state(idx, state); + break; + case RTAS_SENSOR_TYPE_DR: + ret = rtas_set_dr_indicator(idx, state); + break; + case RTAS_SENSOR_TYPE_ALLOCATION_STATE: + ret = rtas_set_allocation_state(idx, state); + break; + default: + ret = RTAS_OUT_NOT_SUPPORTED; + } + +out: + rtas_st(rets, 0, ret); +} + +static void rtas_get_sensor_state(PowerPCCPU *cpu, SpaprMachineState *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, uint32_t nret, + target_ulong rets) +{ + uint32_t sensor_type; + uint32_t sensor_index; + uint32_t sensor_state = 0; + SpaprDrc *drc; + SpaprDrcClass *drck; + uint32_t ret = RTAS_OUT_SUCCESS; + + if (nargs != 2 || nret != 2) { + ret = RTAS_OUT_PARAM_ERROR; + goto out; + } + + sensor_type = rtas_ld(args, 0); + sensor_index = rtas_ld(args, 1); + + if (sensor_type != RTAS_SENSOR_TYPE_ENTITY_SENSE) { + /* currently only DR-related sensors are implemented */ + trace_spapr_rtas_get_sensor_state_not_supported(sensor_index, + sensor_type); + ret = RTAS_OUT_NOT_SUPPORTED; + goto out; + } + + drc = spapr_drc_by_index(sensor_index); + if (!drc) { + trace_spapr_rtas_get_sensor_state_invalid(sensor_index); + ret = RTAS_OUT_PARAM_ERROR; + goto out; + } + drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + sensor_state = drck->dr_entity_sense(drc); + +out: + rtas_st(rets, 0, ret); + rtas_st(rets, 1, sensor_state); +} + +/* configure-connector work area offsets, int32_t units for field + * indexes, bytes for field offset/len values. + * + * as documented by PAPR+ v2.7, 13.5.3.5 + */ +#define CC_IDX_NODE_NAME_OFFSET 2 +#define CC_IDX_PROP_NAME_OFFSET 2 +#define CC_IDX_PROP_LEN 3 +#define CC_IDX_PROP_DATA_OFFSET 4 +#define CC_VAL_DATA_OFFSET ((CC_IDX_PROP_DATA_OFFSET + 1) * 4) +#define CC_WA_LEN 4096 + +static void configure_connector_st(target_ulong addr, target_ulong offset, + const void *buf, size_t len) +{ + cpu_physical_memory_write(ppc64_phys_to_real(addr + offset), + buf, MIN(len, CC_WA_LEN - offset)); +} + +static void rtas_ibm_configure_connector(PowerPCCPU *cpu, + SpaprMachineState *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, uint32_t nret, + target_ulong rets) +{ + uint64_t wa_addr; + uint64_t wa_offset; + uint32_t drc_index; + SpaprDrc *drc; + SpaprDrcClass *drck; + SpaprDRCCResponse resp = SPAPR_DR_CC_RESPONSE_CONTINUE; + int rc; + + if (nargs != 2 || nret != 1) { + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); + return; + } + + wa_addr = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 0); + + drc_index = rtas_ld(wa_addr, 0); + drc = spapr_drc_by_index(drc_index); + if (!drc) { + trace_spapr_rtas_ibm_configure_connector_invalid(drc_index); + rc = RTAS_OUT_PARAM_ERROR; + goto out; + } + + if ((drc->state != SPAPR_DRC_STATE_LOGICAL_UNISOLATE) + && (drc->state != SPAPR_DRC_STATE_PHYSICAL_UNISOLATE) + && (drc->state != SPAPR_DRC_STATE_LOGICAL_CONFIGURED) + && (drc->state != SPAPR_DRC_STATE_PHYSICAL_CONFIGURED)) { + /* + * Need to unisolate the device before configuring + * or it should already be in configured state to + * allow configure-connector be called repeatedly. + */ + rc = SPAPR_DR_CC_RESPONSE_NOT_CONFIGURABLE; + goto out; + } + + drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc); + + /* + * This indicates that the kernel is reconfiguring a LMB due to + * a failed hotunplug. Rollback the DIMM unplug process. + */ + if (spapr_drc_type(drc) == SPAPR_DR_CONNECTOR_TYPE_LMB && + drc->unplug_requested) { + spapr_memory_unplug_rollback(spapr, drc->dev); + } + + if (!drc->fdt) { + void *fdt; + int fdt_size; + + fdt = create_device_tree(&fdt_size); + + if (drck->dt_populate(drc, spapr, fdt, &drc->fdt_start_offset, + NULL)) { + g_free(fdt); + rc = SPAPR_DR_CC_RESPONSE_ERROR; + goto out; + } + + drc->fdt = fdt; + drc->ccs_offset = drc->fdt_start_offset; + drc->ccs_depth = 0; + } + + do { + uint32_t tag; + const char *name; + const struct fdt_property *prop; + int fdt_offset_next, prop_len; + + tag = fdt_next_tag(drc->fdt, drc->ccs_offset, &fdt_offset_next); + + switch (tag) { + case FDT_BEGIN_NODE: + drc->ccs_depth++; + name = fdt_get_name(drc->fdt, drc->ccs_offset, NULL); + + /* provide the name of the next OF node */ + wa_offset = CC_VAL_DATA_OFFSET; + rtas_st(wa_addr, CC_IDX_NODE_NAME_OFFSET, wa_offset); + configure_connector_st(wa_addr, wa_offset, name, strlen(name) + 1); + resp = SPAPR_DR_CC_RESPONSE_NEXT_CHILD; + break; + case FDT_END_NODE: + drc->ccs_depth--; + if (drc->ccs_depth == 0) { + uint32_t drc_index = spapr_drc_index(drc); + + /* done sending the device tree, move to configured state */ + trace_spapr_drc_set_configured(drc_index); + drc->state = drck->ready_state; + /* + * Ensure that we are able to send the FDT fragment + * again via configure-connector call if the guest requests. + */ + drc->ccs_offset = drc->fdt_start_offset; + drc->ccs_depth = 0; + fdt_offset_next = drc->fdt_start_offset; + resp = SPAPR_DR_CC_RESPONSE_SUCCESS; + } else { + resp = SPAPR_DR_CC_RESPONSE_PREV_PARENT; + } + break; + case FDT_PROP: + prop = fdt_get_property_by_offset(drc->fdt, drc->ccs_offset, + &prop_len); + name = fdt_string(drc->fdt, fdt32_to_cpu(prop->nameoff)); + + /* provide the name of the next OF property */ + wa_offset = CC_VAL_DATA_OFFSET; + rtas_st(wa_addr, CC_IDX_PROP_NAME_OFFSET, wa_offset); + configure_connector_st(wa_addr, wa_offset, name, strlen(name) + 1); + + /* provide the length and value of the OF property. data gets + * placed immediately after NULL terminator of the OF property's + * name string + */ + wa_offset += strlen(name) + 1, + rtas_st(wa_addr, CC_IDX_PROP_LEN, prop_len); + rtas_st(wa_addr, CC_IDX_PROP_DATA_OFFSET, wa_offset); + configure_connector_st(wa_addr, wa_offset, prop->data, prop_len); + resp = SPAPR_DR_CC_RESPONSE_NEXT_PROPERTY; + break; + case FDT_END: + resp = SPAPR_DR_CC_RESPONSE_ERROR; + default: + /* keep seeking for an actionable tag */ + break; + } + if (drc->ccs_offset >= 0) { + drc->ccs_offset = fdt_offset_next; + } + } while (resp == SPAPR_DR_CC_RESPONSE_CONTINUE); + + rc = resp; +out: + rtas_st(rets, 0, rc); +} + +static void spapr_drc_register_types(void) +{ + type_register_static(&spapr_dr_connector_info); + type_register_static(&spapr_drc_physical_info); + type_register_static(&spapr_drc_logical_info); + type_register_static(&spapr_drc_cpu_info); + type_register_static(&spapr_drc_pci_info); + type_register_static(&spapr_drc_lmb_info); + type_register_static(&spapr_drc_phb_info); + type_register_static(&spapr_drc_pmem_info); + + spapr_rtas_register(RTAS_SET_INDICATOR, "set-indicator", + rtas_set_indicator); + spapr_rtas_register(RTAS_GET_SENSOR_STATE, "get-sensor-state", + rtas_get_sensor_state); + spapr_rtas_register(RTAS_IBM_CONFIGURE_CONNECTOR, "ibm,configure-connector", + rtas_ibm_configure_connector); +} +type_init(spapr_drc_register_types) diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c new file mode 100644 index 000000000..630e86282 --- /dev/null +++ b/hw/ppc/spapr_events.c @@ -0,0 +1,1082 @@ +/* + * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator + * + * RTAS events handling + * + * Copyright (c) 2012 David Gibson, IBM Corporation. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "sysemu/device_tree.h" +#include "sysemu/runstate.h" + +#include "hw/ppc/fdt.h" +#include "hw/ppc/spapr.h" +#include "hw/ppc/spapr_vio.h" +#include "hw/pci/pci.h" +#include "hw/irq.h" +#include "hw/pci-host/spapr.h" +#include "hw/ppc/spapr_drc.h" +#include "qemu/help_option.h" +#include "qemu/bcd.h" +#include "qemu/main-loop.h" +#include "hw/ppc/spapr_ovec.h" +#include <libfdt.h> +#include "migration/blocker.h" + +#define RTAS_LOG_VERSION_MASK 0xff000000 +#define RTAS_LOG_VERSION_6 0x06000000 +#define RTAS_LOG_SEVERITY_MASK 0x00e00000 +#define RTAS_LOG_SEVERITY_ALREADY_REPORTED 0x00c00000 +#define RTAS_LOG_SEVERITY_FATAL 0x00a00000 +#define RTAS_LOG_SEVERITY_ERROR 0x00800000 +#define RTAS_LOG_SEVERITY_ERROR_SYNC 0x00600000 +#define RTAS_LOG_SEVERITY_WARNING 0x00400000 +#define RTAS_LOG_SEVERITY_EVENT 0x00200000 +#define RTAS_LOG_SEVERITY_NO_ERROR 0x00000000 +#define RTAS_LOG_DISPOSITION_MASK 0x00180000 +#define RTAS_LOG_DISPOSITION_FULLY_RECOVERED 0x00000000 +#define RTAS_LOG_DISPOSITION_LIMITED_RECOVERY 0x00080000 +#define RTAS_LOG_DISPOSITION_NOT_RECOVERED 0x00100000 +#define RTAS_LOG_OPTIONAL_PART_PRESENT 0x00040000 +#define RTAS_LOG_INITIATOR_MASK 0x0000f000 +#define RTAS_LOG_INITIATOR_UNKNOWN 0x00000000 +#define RTAS_LOG_INITIATOR_CPU 0x00001000 +#define RTAS_LOG_INITIATOR_PCI 0x00002000 +#define RTAS_LOG_INITIATOR_MEMORY 0x00004000 +#define RTAS_LOG_INITIATOR_HOTPLUG 0x00006000 +#define RTAS_LOG_TARGET_MASK 0x00000f00 +#define RTAS_LOG_TARGET_UNKNOWN 0x00000000 +#define RTAS_LOG_TARGET_CPU 0x00000100 +#define RTAS_LOG_TARGET_PCI 0x00000200 +#define RTAS_LOG_TARGET_MEMORY 0x00000400 +#define RTAS_LOG_TARGET_HOTPLUG 0x00000600 +#define RTAS_LOG_TYPE_MASK 0x000000ff +#define RTAS_LOG_TYPE_OTHER 0x00000000 +#define RTAS_LOG_TYPE_RETRY 0x00000001 +#define RTAS_LOG_TYPE_TCE_ERR 0x00000002 +#define RTAS_LOG_TYPE_INTERN_DEV_FAIL 0x00000003 +#define RTAS_LOG_TYPE_TIMEOUT 0x00000004 +#define RTAS_LOG_TYPE_DATA_PARITY 0x00000005 +#define RTAS_LOG_TYPE_ADDR_PARITY 0x00000006 +#define RTAS_LOG_TYPE_CACHE_PARITY 0x00000007 +#define RTAS_LOG_TYPE_ADDR_INVALID 0x00000008 +#define RTAS_LOG_TYPE_ECC_UNCORR 0x00000009 +#define RTAS_LOG_TYPE_ECC_CORR 0x0000000a +#define RTAS_LOG_TYPE_EPOW 0x00000040 +#define RTAS_LOG_TYPE_HOTPLUG 0x000000e5 + +struct rtas_error_log { + uint32_t summary; + uint32_t extended_length; +} QEMU_PACKED; + +struct rtas_event_log_v6 { + uint8_t b0; +#define RTAS_LOG_V6_B0_VALID 0x80 +#define RTAS_LOG_V6_B0_UNRECOVERABLE_ERROR 0x40 +#define RTAS_LOG_V6_B0_RECOVERABLE_ERROR 0x20 +#define RTAS_LOG_V6_B0_DEGRADED_OPERATION 0x10 +#define RTAS_LOG_V6_B0_PREDICTIVE_ERROR 0x08 +#define RTAS_LOG_V6_B0_NEW_LOG 0x04 +#define RTAS_LOG_V6_B0_BIGENDIAN 0x02 + uint8_t _resv1; + uint8_t b2; +#define RTAS_LOG_V6_B2_POWERPC_FORMAT 0x80 +#define RTAS_LOG_V6_B2_LOG_FORMAT_MASK 0x0f +#define RTAS_LOG_V6_B2_LOG_FORMAT_PLATFORM_EVENT 0x0e + uint8_t _resv2[9]; + uint32_t company; +#define RTAS_LOG_V6_COMPANY_IBM 0x49424d00 /* IBM<null> */ +} QEMU_PACKED; + +struct rtas_event_log_v6_section_header { + uint16_t section_id; + uint16_t section_length; + uint8_t section_version; + uint8_t section_subtype; + uint16_t creator_component_id; +} QEMU_PACKED; + +struct rtas_event_log_v6_maina { +#define RTAS_LOG_V6_SECTION_ID_MAINA 0x5048 /* PH */ + struct rtas_event_log_v6_section_header hdr; + uint32_t creation_date; /* BCD: YYYYMMDD */ + uint32_t creation_time; /* BCD: HHMMSS00 */ + uint8_t _platform1[8]; + char creator_id; + uint8_t _resv1[2]; + uint8_t section_count; + uint8_t _resv2[4]; + uint8_t _platform2[8]; + uint32_t plid; + uint8_t _platform3[4]; +} QEMU_PACKED; + +struct rtas_event_log_v6_mainb { +#define RTAS_LOG_V6_SECTION_ID_MAINB 0x5548 /* UH */ + struct rtas_event_log_v6_section_header hdr; + uint8_t subsystem_id; + uint8_t _platform1; + uint8_t event_severity; + uint8_t event_subtype; + uint8_t _platform2[4]; + uint8_t _resv1[2]; + uint16_t action_flags; + uint8_t _resv2[4]; +} QEMU_PACKED; + +struct rtas_event_log_v6_epow { +#define RTAS_LOG_V6_SECTION_ID_EPOW 0x4550 /* EP */ + struct rtas_event_log_v6_section_header hdr; + uint8_t sensor_value; +#define RTAS_LOG_V6_EPOW_ACTION_RESET 0 +#define RTAS_LOG_V6_EPOW_ACTION_WARN_COOLING 1 +#define RTAS_LOG_V6_EPOW_ACTION_WARN_POWER 2 +#define RTAS_LOG_V6_EPOW_ACTION_SYSTEM_SHUTDOWN 3 +#define RTAS_LOG_V6_EPOW_ACTION_SYSTEM_HALT 4 +#define RTAS_LOG_V6_EPOW_ACTION_MAIN_ENCLOSURE 5 +#define RTAS_LOG_V6_EPOW_ACTION_POWER_OFF 7 + uint8_t event_modifier; +#define RTAS_LOG_V6_EPOW_MODIFIER_NORMAL 1 +#define RTAS_LOG_V6_EPOW_MODIFIER_ON_UPS 2 +#define RTAS_LOG_V6_EPOW_MODIFIER_CRITICAL 3 +#define RTAS_LOG_V6_EPOW_MODIFIER_TEMPERATURE 4 + uint8_t extended_modifier; +#define RTAS_LOG_V6_EPOW_XMODIFIER_SYSTEM_WIDE 0 +#define RTAS_LOG_V6_EPOW_XMODIFIER_PARTITION_SPECIFIC 1 + uint8_t _resv; + uint64_t reason_code; +} QEMU_PACKED; + +struct epow_extended_log { + struct rtas_event_log_v6 v6hdr; + struct rtas_event_log_v6_maina maina; + struct rtas_event_log_v6_mainb mainb; + struct rtas_event_log_v6_epow epow; +} QEMU_PACKED; + +union drc_identifier { + uint32_t index; + uint32_t count; + struct { + uint32_t count; + uint32_t index; + } count_indexed; + char name[1]; +} QEMU_PACKED; + +struct rtas_event_log_v6_hp { +#define RTAS_LOG_V6_SECTION_ID_HOTPLUG 0x4850 /* HP */ + struct rtas_event_log_v6_section_header hdr; + uint8_t hotplug_type; +#define RTAS_LOG_V6_HP_TYPE_CPU 1 +#define RTAS_LOG_V6_HP_TYPE_MEMORY 2 +#define RTAS_LOG_V6_HP_TYPE_SLOT 3 +#define RTAS_LOG_V6_HP_TYPE_PHB 4 +#define RTAS_LOG_V6_HP_TYPE_PCI 5 +#define RTAS_LOG_V6_HP_TYPE_PMEM 6 + uint8_t hotplug_action; +#define RTAS_LOG_V6_HP_ACTION_ADD 1 +#define RTAS_LOG_V6_HP_ACTION_REMOVE 2 + uint8_t hotplug_identifier; +#define RTAS_LOG_V6_HP_ID_DRC_NAME 1 +#define RTAS_LOG_V6_HP_ID_DRC_INDEX 2 +#define RTAS_LOG_V6_HP_ID_DRC_COUNT 3 +#define RTAS_LOG_V6_HP_ID_DRC_COUNT_INDEXED 4 + uint8_t reserved; + union drc_identifier drc_id; +} QEMU_PACKED; + +struct hp_extended_log { + struct rtas_event_log_v6 v6hdr; + struct rtas_event_log_v6_maina maina; + struct rtas_event_log_v6_mainb mainb; + struct rtas_event_log_v6_hp hp; +} QEMU_PACKED; + +struct rtas_event_log_v6_mc { +#define RTAS_LOG_V6_SECTION_ID_MC 0x4D43 /* MC */ + struct rtas_event_log_v6_section_header hdr; + uint32_t fru_id; + uint32_t proc_id; + uint8_t error_type; +#define RTAS_LOG_V6_MC_TYPE_UE 0 +#define RTAS_LOG_V6_MC_TYPE_SLB 1 +#define RTAS_LOG_V6_MC_TYPE_ERAT 2 +#define RTAS_LOG_V6_MC_TYPE_TLB 4 +#define RTAS_LOG_V6_MC_TYPE_D_CACHE 5 +#define RTAS_LOG_V6_MC_TYPE_I_CACHE 7 + uint8_t sub_err_type; +#define RTAS_LOG_V6_MC_UE_INDETERMINATE 0 +#define RTAS_LOG_V6_MC_UE_IFETCH 1 +#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH 2 +#define RTAS_LOG_V6_MC_UE_LOAD_STORE 3 +#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE 4 +#define RTAS_LOG_V6_MC_SLB_PARITY 0 +#define RTAS_LOG_V6_MC_SLB_MULTIHIT 1 +#define RTAS_LOG_V6_MC_SLB_INDETERMINATE 2 +#define RTAS_LOG_V6_MC_ERAT_PARITY 1 +#define RTAS_LOG_V6_MC_ERAT_MULTIHIT 2 +#define RTAS_LOG_V6_MC_ERAT_INDETERMINATE 3 +#define RTAS_LOG_V6_MC_TLB_PARITY 1 +#define RTAS_LOG_V6_MC_TLB_MULTIHIT 2 +#define RTAS_LOG_V6_MC_TLB_INDETERMINATE 3 +/* + * Per PAPR, + * For UE error type, set bit 1 of sub_err_type to indicate effective addr is + * provided. For other error types (SLB/ERAT/TLB), set bit 0 to indicate + * same. + */ +#define RTAS_LOG_V6_MC_UE_EA_ADDR_PROVIDED 0x40 +#define RTAS_LOG_V6_MC_EA_ADDR_PROVIDED 0x80 + uint8_t reserved_1[6]; + uint64_t effective_address; + uint64_t logical_address; +} QEMU_PACKED; + +struct mc_extended_log { + struct rtas_event_log_v6 v6hdr; + struct rtas_event_log_v6_mc mc; +} QEMU_PACKED; + +struct MC_ierror_table { + unsigned long srr1_mask; + unsigned long srr1_value; + bool nip_valid; /* nip is a valid indicator of faulting address */ + uint8_t error_type; + uint8_t error_subtype; + unsigned int initiator; + unsigned int severity; +}; + +static const struct MC_ierror_table mc_ierror_table[] = { +{ 0x00000000081c0000, 0x0000000000040000, true, + RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_IFETCH, + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000000080000, true, + RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY, + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x00000000000c0000, true, + RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT, + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000000100000, true, + RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT, + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000000140000, true, + RTAS_LOG_V6_MC_TYPE_TLB, RTAS_LOG_V6_MC_TLB_MULTIHIT, + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, +{ 0x00000000081c0000, 0x0000000000180000, true, + RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH, + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, } }; + +struct MC_derror_table { + unsigned long dsisr_value; + bool dar_valid; /* dar is a valid indicator of faulting address */ + uint8_t error_type; + uint8_t error_subtype; + unsigned int initiator; + unsigned int severity; +}; + +static const struct MC_derror_table mc_derror_table[] = { +{ 0x00008000, false, + RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_LOAD_STORE, + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, +{ 0x00004000, true, + RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE, + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, +{ 0x00000800, true, + RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT, + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, +{ 0x00000400, true, + RTAS_LOG_V6_MC_TYPE_TLB, RTAS_LOG_V6_MC_TLB_MULTIHIT, + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, +{ 0x00000080, true, + RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT, /* Before PARITY */ + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, }, +{ 0x00000100, true, + RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY, + RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, } }; + +#define SRR1_MC_LOADSTORE(srr1) ((srr1) & PPC_BIT(42)) + +typedef enum EventClass { + EVENT_CLASS_INTERNAL_ERRORS = 0, + EVENT_CLASS_EPOW = 1, + EVENT_CLASS_RESERVED = 2, + EVENT_CLASS_HOT_PLUG = 3, + EVENT_CLASS_IO = 4, + EVENT_CLASS_MAX +} EventClassIndex; +#define EVENT_CLASS_MASK(index) (1 << (31 - index)) + +static const char * const event_names[EVENT_CLASS_MAX] = { + [EVENT_CLASS_INTERNAL_ERRORS] = "internal-errors", + [EVENT_CLASS_EPOW] = "epow-events", + [EVENT_CLASS_HOT_PLUG] = "hot-plug-events", + [EVENT_CLASS_IO] = "ibm,io-events", +}; + +struct SpaprEventSource { + int irq; + uint32_t mask; + bool enabled; +}; + +static SpaprEventSource *spapr_event_sources_new(void) +{ + return g_new0(SpaprEventSource, EVENT_CLASS_MAX); +} + +static void spapr_event_sources_register(SpaprEventSource *event_sources, + EventClassIndex index, int irq) +{ + /* we only support 1 irq per event class at the moment */ + g_assert(event_sources); + g_assert(!event_sources[index].enabled); + event_sources[index].irq = irq; + event_sources[index].mask = EVENT_CLASS_MASK(index); + event_sources[index].enabled = true; +} + +static const SpaprEventSource * +spapr_event_sources_get_source(SpaprEventSource *event_sources, + EventClassIndex index) +{ + g_assert(index < EVENT_CLASS_MAX); + g_assert(event_sources); + + return &event_sources[index]; +} + +void spapr_dt_events(SpaprMachineState *spapr, void *fdt) +{ + uint32_t irq_ranges[EVENT_CLASS_MAX * 2]; + int i, count = 0, event_sources; + SpaprEventSource *events = spapr->event_sources; + + g_assert(events); + + _FDT(event_sources = fdt_add_subnode(fdt, 0, "event-sources")); + + for (i = 0, count = 0; i < EVENT_CLASS_MAX; i++) { + int node_offset; + uint32_t interrupts[2]; + const SpaprEventSource *source = + spapr_event_sources_get_source(events, i); + const char *source_name = event_names[i]; + + if (!source->enabled) { + continue; + } + + spapr_dt_irq(interrupts, source->irq, false); + + _FDT(node_offset = fdt_add_subnode(fdt, event_sources, source_name)); + _FDT(fdt_setprop(fdt, node_offset, "interrupts", interrupts, + sizeof(interrupts))); + + irq_ranges[count++] = interrupts[0]; + irq_ranges[count++] = cpu_to_be32(1); + } + + _FDT((fdt_setprop(fdt, event_sources, "interrupt-controller", NULL, 0))); + _FDT((fdt_setprop_cell(fdt, event_sources, "#interrupt-cells", 2))); + _FDT((fdt_setprop(fdt, event_sources, "interrupt-ranges", + irq_ranges, count * sizeof(uint32_t)))); +} + +static const SpaprEventSource * +rtas_event_log_to_source(SpaprMachineState *spapr, int log_type) +{ + const SpaprEventSource *source; + + g_assert(spapr->event_sources); + + switch (log_type) { + case RTAS_LOG_TYPE_HOTPLUG: + source = spapr_event_sources_get_source(spapr->event_sources, + EVENT_CLASS_HOT_PLUG); + if (spapr_ovec_test(spapr->ov5_cas, OV5_HP_EVT)) { + g_assert(source->enabled); + break; + } + /* fall through back to epow for legacy hotplug interrupt source */ + case RTAS_LOG_TYPE_EPOW: + source = spapr_event_sources_get_source(spapr->event_sources, + EVENT_CLASS_EPOW); + break; + default: + source = NULL; + } + + return source; +} + +static int rtas_event_log_to_irq(SpaprMachineState *spapr, int log_type) +{ + const SpaprEventSource *source; + + source = rtas_event_log_to_source(spapr, log_type); + g_assert(source); + g_assert(source->enabled); + + return source->irq; +} + +static uint32_t spapr_event_log_entry_type(SpaprEventLogEntry *entry) +{ + return entry->summary & RTAS_LOG_TYPE_MASK; +} + +static void rtas_event_log_queue(SpaprMachineState *spapr, + SpaprEventLogEntry *entry) +{ + QTAILQ_INSERT_TAIL(&spapr->pending_events, entry, next); +} + +static SpaprEventLogEntry *rtas_event_log_dequeue(SpaprMachineState *spapr, + uint32_t event_mask) +{ + SpaprEventLogEntry *entry = NULL; + + QTAILQ_FOREACH(entry, &spapr->pending_events, next) { + const SpaprEventSource *source = + rtas_event_log_to_source(spapr, + spapr_event_log_entry_type(entry)); + + g_assert(source); + if (source->mask & event_mask) { + break; + } + } + + if (entry) { + QTAILQ_REMOVE(&spapr->pending_events, entry, next); + } + + return entry; +} + +static bool rtas_event_log_contains(SpaprMachineState *spapr, uint32_t event_mask) +{ + SpaprEventLogEntry *entry = NULL; + + QTAILQ_FOREACH(entry, &spapr->pending_events, next) { + const SpaprEventSource *source = + rtas_event_log_to_source(spapr, + spapr_event_log_entry_type(entry)); + + if (source->mask & event_mask) { + return true; + } + } + + return false; +} + +static uint32_t next_plid; + +static void spapr_init_v6hdr(struct rtas_event_log_v6 *v6hdr) +{ + v6hdr->b0 = RTAS_LOG_V6_B0_VALID | RTAS_LOG_V6_B0_NEW_LOG + | RTAS_LOG_V6_B0_BIGENDIAN; + v6hdr->b2 = RTAS_LOG_V6_B2_POWERPC_FORMAT + | RTAS_LOG_V6_B2_LOG_FORMAT_PLATFORM_EVENT; + v6hdr->company = cpu_to_be32(RTAS_LOG_V6_COMPANY_IBM); +} + +static void spapr_init_maina(SpaprMachineState *spapr, + struct rtas_event_log_v6_maina *maina, + int section_count) +{ + struct tm tm; + int year; + + maina->hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_MAINA); + maina->hdr.section_length = cpu_to_be16(sizeof(*maina)); + /* FIXME: section version, subtype and creator id? */ + spapr_rtc_read(&spapr->rtc, &tm, NULL); + year = tm.tm_year + 1900; + maina->creation_date = cpu_to_be32((to_bcd(year / 100) << 24) + | (to_bcd(year % 100) << 16) + | (to_bcd(tm.tm_mon + 1) << 8) + | to_bcd(tm.tm_mday)); + maina->creation_time = cpu_to_be32((to_bcd(tm.tm_hour) << 24) + | (to_bcd(tm.tm_min) << 16) + | (to_bcd(tm.tm_sec) << 8)); + maina->creator_id = 'H'; /* Hypervisor */ + maina->section_count = section_count; + maina->plid = next_plid++; +} + +static void spapr_powerdown_req(Notifier *n, void *opaque) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); + SpaprEventLogEntry *entry; + struct rtas_event_log_v6 *v6hdr; + struct rtas_event_log_v6_maina *maina; + struct rtas_event_log_v6_mainb *mainb; + struct rtas_event_log_v6_epow *epow; + struct epow_extended_log *new_epow; + + entry = g_new(SpaprEventLogEntry, 1); + new_epow = g_malloc0(sizeof(*new_epow)); + entry->extended_log = new_epow; + + v6hdr = &new_epow->v6hdr; + maina = &new_epow->maina; + mainb = &new_epow->mainb; + epow = &new_epow->epow; + + entry->summary = RTAS_LOG_VERSION_6 + | RTAS_LOG_SEVERITY_EVENT + | RTAS_LOG_DISPOSITION_NOT_RECOVERED + | RTAS_LOG_OPTIONAL_PART_PRESENT + | RTAS_LOG_TYPE_EPOW; + entry->extended_length = sizeof(*new_epow); + + spapr_init_v6hdr(v6hdr); + spapr_init_maina(spapr, maina, 3 /* Main-A, Main-B and EPOW */); + + mainb->hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_MAINB); + mainb->hdr.section_length = cpu_to_be16(sizeof(*mainb)); + /* FIXME: section version, subtype and creator id? */ + mainb->subsystem_id = 0xa0; /* External environment */ + mainb->event_severity = 0x00; /* Informational / non-error */ + mainb->event_subtype = 0xd0; /* Normal shutdown */ + + epow->hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_EPOW); + epow->hdr.section_length = cpu_to_be16(sizeof(*epow)); + epow->hdr.section_version = 2; /* includes extended modifier */ + /* FIXME: section subtype and creator id? */ + epow->sensor_value = RTAS_LOG_V6_EPOW_ACTION_SYSTEM_SHUTDOWN; + epow->event_modifier = RTAS_LOG_V6_EPOW_MODIFIER_NORMAL; + epow->extended_modifier = RTAS_LOG_V6_EPOW_XMODIFIER_PARTITION_SPECIFIC; + + rtas_event_log_queue(spapr, entry); + + qemu_irq_pulse(spapr_qirq(spapr, + rtas_event_log_to_irq(spapr, RTAS_LOG_TYPE_EPOW))); +} + +static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action, + SpaprDrcType drc_type, + union drc_identifier *drc_id) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); + SpaprEventLogEntry *entry; + struct hp_extended_log *new_hp; + struct rtas_event_log_v6 *v6hdr; + struct rtas_event_log_v6_maina *maina; + struct rtas_event_log_v6_mainb *mainb; + struct rtas_event_log_v6_hp *hp; + + entry = g_new(SpaprEventLogEntry, 1); + new_hp = g_malloc0(sizeof(struct hp_extended_log)); + entry->extended_log = new_hp; + + v6hdr = &new_hp->v6hdr; + maina = &new_hp->maina; + mainb = &new_hp->mainb; + hp = &new_hp->hp; + + entry->summary = RTAS_LOG_VERSION_6 + | RTAS_LOG_SEVERITY_EVENT + | RTAS_LOG_DISPOSITION_NOT_RECOVERED + | RTAS_LOG_OPTIONAL_PART_PRESENT + | RTAS_LOG_INITIATOR_HOTPLUG + | RTAS_LOG_TYPE_HOTPLUG; + entry->extended_length = sizeof(*new_hp); + + spapr_init_v6hdr(v6hdr); + spapr_init_maina(spapr, maina, 3 /* Main-A, Main-B, HP */); + + mainb->hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_MAINB); + mainb->hdr.section_length = cpu_to_be16(sizeof(*mainb)); + mainb->subsystem_id = 0x80; /* External environment */ + mainb->event_severity = 0x00; /* Informational / non-error */ + mainb->event_subtype = 0x00; /* Normal shutdown */ + + hp->hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_HOTPLUG); + hp->hdr.section_length = cpu_to_be16(sizeof(*hp)); + hp->hdr.section_version = 1; /* includes extended modifier */ + hp->hotplug_action = hp_action; + hp->hotplug_identifier = hp_id; + + switch (drc_type) { + case SPAPR_DR_CONNECTOR_TYPE_PCI: + hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_PCI; + break; + case SPAPR_DR_CONNECTOR_TYPE_LMB: + hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_MEMORY; + break; + case SPAPR_DR_CONNECTOR_TYPE_CPU: + hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_CPU; + break; + case SPAPR_DR_CONNECTOR_TYPE_PHB: + hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_PHB; + break; + case SPAPR_DR_CONNECTOR_TYPE_PMEM: + hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_PMEM; + break; + default: + /* we shouldn't be signaling hotplug events for resources + * that don't support them + */ + g_assert(false); + return; + } + + if (hp_id == RTAS_LOG_V6_HP_ID_DRC_COUNT) { + hp->drc_id.count = cpu_to_be32(drc_id->count); + } else if (hp_id == RTAS_LOG_V6_HP_ID_DRC_INDEX) { + hp->drc_id.index = cpu_to_be32(drc_id->index); + } else if (hp_id == RTAS_LOG_V6_HP_ID_DRC_COUNT_INDEXED) { + /* we should not be using count_indexed value unless the guest + * supports dedicated hotplug event source + */ + g_assert(spapr_memory_hot_unplug_supported(spapr)); + hp->drc_id.count_indexed.count = + cpu_to_be32(drc_id->count_indexed.count); + hp->drc_id.count_indexed.index = + cpu_to_be32(drc_id->count_indexed.index); + } + + rtas_event_log_queue(spapr, entry); + + qemu_irq_pulse(spapr_qirq(spapr, + rtas_event_log_to_irq(spapr, RTAS_LOG_TYPE_HOTPLUG))); +} + +void spapr_hotplug_req_add_by_index(SpaprDrc *drc) +{ + SpaprDrcType drc_type = spapr_drc_type(drc); + union drc_identifier drc_id; + + drc_id.index = spapr_drc_index(drc); + spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_INDEX, + RTAS_LOG_V6_HP_ACTION_ADD, drc_type, &drc_id); +} + +void spapr_hotplug_req_remove_by_index(SpaprDrc *drc) +{ + SpaprDrcType drc_type = spapr_drc_type(drc); + union drc_identifier drc_id; + + drc_id.index = spapr_drc_index(drc); + spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_INDEX, + RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id); +} + +void spapr_hotplug_req_add_by_count(SpaprDrcType drc_type, + uint32_t count) +{ + union drc_identifier drc_id; + + drc_id.count = count; + spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_COUNT, + RTAS_LOG_V6_HP_ACTION_ADD, drc_type, &drc_id); +} + +void spapr_hotplug_req_remove_by_count(SpaprDrcType drc_type, + uint32_t count) +{ + union drc_identifier drc_id; + + drc_id.count = count; + spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_COUNT, + RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id); +} + +void spapr_hotplug_req_add_by_count_indexed(SpaprDrcType drc_type, + uint32_t count, uint32_t index) +{ + union drc_identifier drc_id; + + drc_id.count_indexed.count = count; + drc_id.count_indexed.index = index; + spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_COUNT_INDEXED, + RTAS_LOG_V6_HP_ACTION_ADD, drc_type, &drc_id); +} + +void spapr_hotplug_req_remove_by_count_indexed(SpaprDrcType drc_type, + uint32_t count, uint32_t index) +{ + union drc_identifier drc_id; + + drc_id.count_indexed.count = count; + drc_id.count_indexed.index = index; + spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_COUNT_INDEXED, + RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id); +} + +static void spapr_mc_set_ea_provided_flag(struct mc_extended_log *ext_elog) +{ + switch (ext_elog->mc.error_type) { + case RTAS_LOG_V6_MC_TYPE_UE: + ext_elog->mc.sub_err_type |= RTAS_LOG_V6_MC_UE_EA_ADDR_PROVIDED; + break; + case RTAS_LOG_V6_MC_TYPE_SLB: + case RTAS_LOG_V6_MC_TYPE_ERAT: + case RTAS_LOG_V6_MC_TYPE_TLB: + ext_elog->mc.sub_err_type |= RTAS_LOG_V6_MC_EA_ADDR_PROVIDED; + break; + default: + break; + } +} + +static uint32_t spapr_mce_get_elog_type(PowerPCCPU *cpu, bool recovered, + struct mc_extended_log *ext_elog) +{ + int i; + CPUPPCState *env = &cpu->env; + uint32_t summary; + uint64_t dsisr = env->spr[SPR_DSISR]; + + summary = RTAS_LOG_VERSION_6 | RTAS_LOG_OPTIONAL_PART_PRESENT; + if (recovered) { + summary |= RTAS_LOG_DISPOSITION_FULLY_RECOVERED; + } else { + summary |= RTAS_LOG_DISPOSITION_NOT_RECOVERED; + } + + if (SRR1_MC_LOADSTORE(env->spr[SPR_SRR1])) { + for (i = 0; i < ARRAY_SIZE(mc_derror_table); i++) { + if (!(dsisr & mc_derror_table[i].dsisr_value)) { + continue; + } + + ext_elog->mc.error_type = mc_derror_table[i].error_type; + ext_elog->mc.sub_err_type = mc_derror_table[i].error_subtype; + if (mc_derror_table[i].dar_valid) { + ext_elog->mc.effective_address = cpu_to_be64(env->spr[SPR_DAR]); + spapr_mc_set_ea_provided_flag(ext_elog); + } + + summary |= mc_derror_table[i].initiator + | mc_derror_table[i].severity; + + return summary; + } + } else { + for (i = 0; i < ARRAY_SIZE(mc_ierror_table); i++) { + if ((env->spr[SPR_SRR1] & mc_ierror_table[i].srr1_mask) != + mc_ierror_table[i].srr1_value) { + continue; + } + + ext_elog->mc.error_type = mc_ierror_table[i].error_type; + ext_elog->mc.sub_err_type = mc_ierror_table[i].error_subtype; + if (mc_ierror_table[i].nip_valid) { + ext_elog->mc.effective_address = cpu_to_be64(env->nip); + spapr_mc_set_ea_provided_flag(ext_elog); + } + + summary |= mc_ierror_table[i].initiator + | mc_ierror_table[i].severity; + + return summary; + } + } + + summary |= RTAS_LOG_INITIATOR_CPU; + return summary; +} + +static void spapr_mce_dispatch_elog(SpaprMachineState *spapr, PowerPCCPU *cpu, + bool recovered) +{ + CPUState *cs = CPU(cpu); + CPUPPCState *env = &cpu->env; + uint64_t rtas_addr; + struct rtas_error_log log; + struct mc_extended_log *ext_elog; + uint32_t summary; + + ext_elog = g_malloc0(sizeof(*ext_elog)); + summary = spapr_mce_get_elog_type(cpu, recovered, ext_elog); + + log.summary = cpu_to_be32(summary); + log.extended_length = cpu_to_be32(sizeof(*ext_elog)); + + spapr_init_v6hdr(&ext_elog->v6hdr); + ext_elog->mc.hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_MC); + ext_elog->mc.hdr.section_length = + cpu_to_be16(sizeof(struct rtas_event_log_v6_mc)); + ext_elog->mc.hdr.section_version = 1; + + /* get rtas addr from fdt */ + rtas_addr = spapr_get_rtas_addr(); + if (!rtas_addr) { + if (!recovered) { + error_report( +"FWNMI: Unable to deliver machine check to guest: rtas_addr not found."); + qemu_system_guest_panicked(NULL); + } else { + warn_report( +"FWNMI: Unable to deliver machine check to guest: rtas_addr not found. " +"Machine check recovered."); + } + g_free(ext_elog); + return; + } + + /* + * By taking the interlock, we assume that the MCE will be + * delivered to the guest. CAUTION: don't add anything that could + * prevent the MCE to be delivered after this line, otherwise the + * guest won't be able to release the interlock and ultimately + * hang/crash? + */ + spapr->fwnmi_machine_check_interlock = cpu->vcpu_id; + + stq_be_phys(&address_space_memory, rtas_addr + RTAS_ERROR_LOG_OFFSET, + env->gpr[3]); + cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET + + sizeof(env->gpr[3]), &log, sizeof(log)); + cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET + + sizeof(env->gpr[3]) + sizeof(log), ext_elog, + sizeof(*ext_elog)); + g_free(ext_elog); + + env->gpr[3] = rtas_addr + RTAS_ERROR_LOG_OFFSET; + + ppc_cpu_do_fwnmi_machine_check(cs, spapr->fwnmi_machine_check_addr); +} + +void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); + CPUState *cs = CPU(cpu); + int ret; + + if (spapr->fwnmi_machine_check_addr == -1) { + /* Non-FWNMI case, deliver it like an architected CPU interrupt. */ + cs->exception_index = POWERPC_EXCP_MCHECK; + ppc_cpu_do_interrupt(cs); + return; + } + + /* Wait for FWNMI interlock. */ + while (spapr->fwnmi_machine_check_interlock != -1) { + /* + * Check whether the same CPU got machine check error + * while still handling the mc error (i.e., before + * that CPU called "ibm,nmi-interlock") + */ + if (spapr->fwnmi_machine_check_interlock == cpu->vcpu_id) { + if (!recovered) { + error_report( +"FWNMI: Unable to deliver machine check to guest: nested machine check."); + qemu_system_guest_panicked(NULL); + } else { + warn_report( +"FWNMI: Unable to deliver machine check to guest: nested machine check. " +"Machine check recovered."); + } + return; + } + qemu_cond_wait_iothread(&spapr->fwnmi_machine_check_interlock_cond); + if (spapr->fwnmi_machine_check_addr == -1) { + /* + * If the machine was reset while waiting for the interlock, + * abort the delivery. The machine check applies to a context + * that no longer exists, so it wouldn't make sense to deliver + * it now. + */ + return; + } + } + + /* + * Try to block migration while FWNMI is being handled, so the + * machine check handler runs where the information passed to it + * actually makes sense. This shouldn't actually block migration, + * only delay it slightly, assuming migration is retried. If the + * attempt to block fails, carry on. Unfortunately, it always + * fails when running with -only-migrate. A proper interface to + * delay migration completion for a bit could avoid that. + */ + ret = migrate_add_blocker(spapr->fwnmi_migration_blocker, NULL); + if (ret == -EBUSY) { + warn_report("Received a fwnmi while migration was in progress"); + } + + spapr_mce_dispatch_elog(spapr, cpu, recovered); +} + +static void check_exception(PowerPCCPU *cpu, SpaprMachineState *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, + uint32_t nret, target_ulong rets) +{ + uint32_t mask, buf, len, event_len; + SpaprEventLogEntry *event; + struct rtas_error_log header; + int i; + + if ((nargs < 6) || (nargs > 7) || nret != 1) { + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); + return; + } + + mask = rtas_ld(args, 2); + buf = rtas_ld(args, 4); + len = rtas_ld(args, 5); + + event = rtas_event_log_dequeue(spapr, mask); + if (!event) { + goto out_no_events; + } + + event_len = event->extended_length + sizeof(header); + + if (event_len < len) { + len = event_len; + } + + header.summary = cpu_to_be32(event->summary); + header.extended_length = cpu_to_be32(event->extended_length); + cpu_physical_memory_write(buf, &header, sizeof(header)); + cpu_physical_memory_write(buf + sizeof(header), event->extended_log, + event->extended_length); + rtas_st(rets, 0, RTAS_OUT_SUCCESS); + g_free(event->extended_log); + g_free(event); + + /* according to PAPR+, the IRQ must be left asserted, or re-asserted, if + * there are still pending events to be fetched via check-exception. We + * do the latter here, since our code relies on edge-triggered + * interrupts. + */ + for (i = 0; i < EVENT_CLASS_MAX; i++) { + if (rtas_event_log_contains(spapr, EVENT_CLASS_MASK(i))) { + const SpaprEventSource *source = + spapr_event_sources_get_source(spapr->event_sources, i); + + g_assert(source->enabled); + qemu_irq_pulse(spapr_qirq(spapr, source->irq)); + } + } + + return; + +out_no_events: + rtas_st(rets, 0, RTAS_OUT_NO_ERRORS_FOUND); +} + +static void event_scan(PowerPCCPU *cpu, SpaprMachineState *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, + uint32_t nret, target_ulong rets) +{ + int i; + if (nargs != 4 || nret != 1) { + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); + return; + } + + for (i = 0; i < EVENT_CLASS_MAX; i++) { + if (rtas_event_log_contains(spapr, EVENT_CLASS_MASK(i))) { + const SpaprEventSource *source = + spapr_event_sources_get_source(spapr->event_sources, i); + + g_assert(source->enabled); + qemu_irq_pulse(spapr_qirq(spapr, source->irq)); + } + } + + rtas_st(rets, 0, RTAS_OUT_NO_ERRORS_FOUND); +} + +void spapr_clear_pending_events(SpaprMachineState *spapr) +{ + SpaprEventLogEntry *entry = NULL, *next_entry; + + QTAILQ_FOREACH_SAFE(entry, &spapr->pending_events, next, next_entry) { + QTAILQ_REMOVE(&spapr->pending_events, entry, next); + g_free(entry->extended_log); + g_free(entry); + } +} + +void spapr_clear_pending_hotplug_events(SpaprMachineState *spapr) +{ + SpaprEventLogEntry *entry = NULL, *next_entry; + + QTAILQ_FOREACH_SAFE(entry, &spapr->pending_events, next, next_entry) { + if (spapr_event_log_entry_type(entry) == RTAS_LOG_TYPE_HOTPLUG) { + QTAILQ_REMOVE(&spapr->pending_events, entry, next); + g_free(entry->extended_log); + g_free(entry); + } + } +} + +void spapr_events_init(SpaprMachineState *spapr) +{ + int epow_irq = SPAPR_IRQ_EPOW; + + if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { + epow_irq = spapr_irq_findone(spapr, &error_fatal); + } + + spapr_irq_claim(spapr, epow_irq, false, &error_fatal); + + QTAILQ_INIT(&spapr->pending_events); + + spapr->event_sources = spapr_event_sources_new(); + + spapr_event_sources_register(spapr->event_sources, EVENT_CLASS_EPOW, + epow_irq); + + /* NOTE: if machine supports modern/dedicated hotplug event source, + * we add it to the device-tree unconditionally. This means we may + * have cases where the source is enabled in QEMU, but unused by the + * guest because it does not support modern hotplug events, so we + * take care to rely on checking for negotiation of OV5_HP_EVT option + * before attempting to use it to signal events, rather than simply + * checking that it's enabled. + */ + if (spapr->use_hotplug_event_source) { + int hp_irq = SPAPR_IRQ_HOTPLUG; + + if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { + hp_irq = spapr_irq_findone(spapr, &error_fatal); + } + + spapr_irq_claim(spapr, hp_irq, false, &error_fatal); + + spapr_event_sources_register(spapr->event_sources, EVENT_CLASS_HOT_PLUG, + hp_irq); + } + + spapr->epow_notifier.notify = spapr_powerdown_req; + qemu_register_powerdown_notifier(&spapr->epow_notifier); + spapr_rtas_register(RTAS_CHECK_EXCEPTION, "check-exception", + check_exception); + spapr_rtas_register(RTAS_EVENT_SCAN, "event-scan", event_scan); +} diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c new file mode 100644 index 000000000..222c1b6bb --- /dev/null +++ b/hw/ppc/spapr_hcall.c @@ -0,0 +1,1557 @@ +#include "qemu/osdep.h" +#include "qemu/cutils.h" +#include "qapi/error.h" +#include "sysemu/hw_accel.h" +#include "sysemu/runstate.h" +#include "qemu/log.h" +#include "qemu/main-loop.h" +#include "qemu/module.h" +#include "qemu/error-report.h" +#include "exec/exec-all.h" +#include "helper_regs.h" +#include "hw/ppc/spapr.h" +#include "hw/ppc/spapr_cpu_core.h" +#include "mmu-hash64.h" +#include "cpu-models.h" +#include "trace.h" +#include "kvm_ppc.h" +#include "hw/ppc/fdt.h" +#include "hw/ppc/spapr_ovec.h" +#include "hw/ppc/spapr_numa.h" +#include "mmu-book3s-v3.h" +#include "hw/mem/memory-device.h" + +bool is_ram_address(SpaprMachineState *spapr, hwaddr addr) +{ + MachineState *machine = MACHINE(spapr); + DeviceMemoryState *dms = machine->device_memory; + + if (addr < machine->ram_size) { + return true; + } + if ((addr >= dms->base) + && ((addr - dms->base) < memory_region_size(&dms->mr))) { + return true; + } + + return false; +} + +/* Convert a return code from the KVM ioctl()s implementing resize HPT + * into a PAPR hypercall return code */ +static target_ulong resize_hpt_convert_rc(int ret) +{ + if (ret >= 100000) { + return H_LONG_BUSY_ORDER_100_SEC; + } else if (ret >= 10000) { + return H_LONG_BUSY_ORDER_10_SEC; + } else if (ret >= 1000) { + return H_LONG_BUSY_ORDER_1_SEC; + } else if (ret >= 100) { + return H_LONG_BUSY_ORDER_100_MSEC; + } else if (ret >= 10) { + return H_LONG_BUSY_ORDER_10_MSEC; + } else if (ret > 0) { + return H_LONG_BUSY_ORDER_1_MSEC; + } + + switch (ret) { + case 0: + return H_SUCCESS; + case -EPERM: + return H_AUTHORITY; + case -EINVAL: + return H_PARAMETER; + case -ENXIO: + return H_CLOSED; + case -ENOSPC: + return H_PTEG_FULL; + case -EBUSY: + return H_BUSY; + case -ENOMEM: + return H_NO_MEM; + default: + return H_HARDWARE; + } +} + +static target_ulong h_resize_hpt_prepare(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + target_ulong flags = args[0]; + int shift = args[1]; + uint64_t current_ram_size; + int rc; + + if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DISABLED) { + return H_AUTHORITY; + } + + if (!spapr->htab_shift) { + /* Radix guest, no HPT */ + return H_NOT_AVAILABLE; + } + + trace_spapr_h_resize_hpt_prepare(flags, shift); + + if (flags != 0) { + return H_PARAMETER; + } + + if (shift && ((shift < 18) || (shift > 46))) { + return H_PARAMETER; + } + + current_ram_size = MACHINE(spapr)->ram_size + get_plugged_memory_size(); + + /* We only allow the guest to allocate an HPT one order above what + * we'd normally give them (to stop a small guest claiming a huge + * chunk of resources in the HPT */ + if (shift > (spapr_hpt_shift_for_ramsize(current_ram_size) + 1)) { + return H_RESOURCE; + } + + rc = kvmppc_resize_hpt_prepare(cpu, flags, shift); + if (rc != -ENOSYS) { + return resize_hpt_convert_rc(rc); + } + + if (kvm_enabled()) { + return H_HARDWARE; + } + + return softmmu_resize_hpt_prepare(cpu, spapr, shift); +} + +static void do_push_sregs_to_kvm_pr(CPUState *cs, run_on_cpu_data data) +{ + int ret; + + cpu_synchronize_state(cs); + + ret = kvmppc_put_books_sregs(POWERPC_CPU(cs)); + if (ret < 0) { + error_report("failed to push sregs to KVM: %s", strerror(-ret)); + exit(1); + } +} + +void push_sregs_to_kvm_pr(SpaprMachineState *spapr) +{ + CPUState *cs; + + /* + * This is a hack for the benefit of KVM PR - it abuses the SDR1 + * slot in kvm_sregs to communicate the userspace address of the + * HPT + */ + if (!kvm_enabled() || !spapr->htab) { + return; + } + + CPU_FOREACH(cs) { + run_on_cpu(cs, do_push_sregs_to_kvm_pr, RUN_ON_CPU_NULL); + } +} + +static target_ulong h_resize_hpt_commit(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + target_ulong flags = args[0]; + target_ulong shift = args[1]; + int rc; + + if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DISABLED) { + return H_AUTHORITY; + } + + if (!spapr->htab_shift) { + /* Radix guest, no HPT */ + return H_NOT_AVAILABLE; + } + + trace_spapr_h_resize_hpt_commit(flags, shift); + + rc = kvmppc_resize_hpt_commit(cpu, flags, shift); + if (rc != -ENOSYS) { + rc = resize_hpt_convert_rc(rc); + if (rc == H_SUCCESS) { + /* Need to set the new htab_shift in the machine state */ + spapr->htab_shift = shift; + } + return rc; + } + + if (kvm_enabled()) { + return H_HARDWARE; + } + + return softmmu_resize_hpt_commit(cpu, spapr, flags, shift); +} + + + +static target_ulong h_set_sprg0(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + cpu_synchronize_state(CPU(cpu)); + cpu->env.spr[SPR_SPRG0] = args[0]; + + return H_SUCCESS; +} + +static target_ulong h_set_dabr(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + if (!ppc_has_spr(cpu, SPR_DABR)) { + return H_HARDWARE; /* DABR register not available */ + } + cpu_synchronize_state(CPU(cpu)); + + if (ppc_has_spr(cpu, SPR_DABRX)) { + cpu->env.spr[SPR_DABRX] = 0x3; /* Use Problem and Privileged state */ + } else if (!(args[0] & 0x4)) { /* Breakpoint Translation set? */ + return H_RESERVED_DABR; + } + + cpu->env.spr[SPR_DABR] = args[0]; + return H_SUCCESS; +} + +static target_ulong h_set_xdabr(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + target_ulong dabrx = args[1]; + + if (!ppc_has_spr(cpu, SPR_DABR) || !ppc_has_spr(cpu, SPR_DABRX)) { + return H_HARDWARE; + } + + if ((dabrx & ~0xfULL) != 0 || (dabrx & H_DABRX_HYPERVISOR) != 0 + || (dabrx & (H_DABRX_KERNEL | H_DABRX_USER)) == 0) { + return H_PARAMETER; + } + + cpu_synchronize_state(CPU(cpu)); + cpu->env.spr[SPR_DABRX] = dabrx; + cpu->env.spr[SPR_DABR] = args[0]; + + return H_SUCCESS; +} + +static target_ulong h_page_init(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + target_ulong flags = args[0]; + hwaddr dst = args[1]; + hwaddr src = args[2]; + hwaddr len = TARGET_PAGE_SIZE; + uint8_t *pdst, *psrc; + target_long ret = H_SUCCESS; + + if (flags & ~(H_ICACHE_SYNCHRONIZE | H_ICACHE_INVALIDATE + | H_COPY_PAGE | H_ZERO_PAGE)) { + qemu_log_mask(LOG_UNIMP, "h_page_init: Bad flags (" TARGET_FMT_lx "\n", + flags); + return H_PARAMETER; + } + + /* Map-in destination */ + if (!is_ram_address(spapr, dst) || (dst & ~TARGET_PAGE_MASK) != 0) { + return H_PARAMETER; + } + pdst = cpu_physical_memory_map(dst, &len, true); + if (!pdst || len != TARGET_PAGE_SIZE) { + return H_PARAMETER; + } + + if (flags & H_COPY_PAGE) { + /* Map-in source, copy to destination, and unmap source again */ + if (!is_ram_address(spapr, src) || (src & ~TARGET_PAGE_MASK) != 0) { + ret = H_PARAMETER; + goto unmap_out; + } + psrc = cpu_physical_memory_map(src, &len, false); + if (!psrc || len != TARGET_PAGE_SIZE) { + ret = H_PARAMETER; + goto unmap_out; + } + memcpy(pdst, psrc, len); + cpu_physical_memory_unmap(psrc, len, 0, len); + } else if (flags & H_ZERO_PAGE) { + memset(pdst, 0, len); /* Just clear the destination page */ + } + + if (kvm_enabled() && (flags & H_ICACHE_SYNCHRONIZE) != 0) { + kvmppc_dcbst_range(cpu, pdst, len); + } + if (flags & (H_ICACHE_SYNCHRONIZE | H_ICACHE_INVALIDATE)) { + if (kvm_enabled()) { + kvmppc_icbi_range(cpu, pdst, len); + } else { + tb_flush(CPU(cpu)); + } + } + +unmap_out: + cpu_physical_memory_unmap(pdst, TARGET_PAGE_SIZE, 1, len); + return ret; +} + +#define FLAGS_REGISTER_VPA 0x0000200000000000ULL +#define FLAGS_REGISTER_DTL 0x0000400000000000ULL +#define FLAGS_REGISTER_SLBSHADOW 0x0000600000000000ULL +#define FLAGS_DEREGISTER_VPA 0x0000a00000000000ULL +#define FLAGS_DEREGISTER_DTL 0x0000c00000000000ULL +#define FLAGS_DEREGISTER_SLBSHADOW 0x0000e00000000000ULL + +static target_ulong register_vpa(PowerPCCPU *cpu, target_ulong vpa) +{ + CPUState *cs = CPU(cpu); + CPUPPCState *env = &cpu->env; + SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu); + uint16_t size; + uint8_t tmp; + + if (vpa == 0) { + hcall_dprintf("Can't cope with registering a VPA at logical 0\n"); + return H_HARDWARE; + } + + if (vpa % env->dcache_line_size) { + return H_PARAMETER; + } + /* FIXME: bounds check the address */ + + size = lduw_be_phys(cs->as, vpa + 0x4); + + if (size < VPA_MIN_SIZE) { + return H_PARAMETER; + } + + /* VPA is not allowed to cross a page boundary */ + if ((vpa / 4096) != ((vpa + size - 1) / 4096)) { + return H_PARAMETER; + } + + spapr_cpu->vpa_addr = vpa; + + tmp = ldub_phys(cs->as, spapr_cpu->vpa_addr + VPA_SHARED_PROC_OFFSET); + tmp |= VPA_SHARED_PROC_VAL; + stb_phys(cs->as, spapr_cpu->vpa_addr + VPA_SHARED_PROC_OFFSET, tmp); + + return H_SUCCESS; +} + +static target_ulong deregister_vpa(PowerPCCPU *cpu, target_ulong vpa) +{ + SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu); + + if (spapr_cpu->slb_shadow_addr) { + return H_RESOURCE; + } + + if (spapr_cpu->dtl_addr) { + return H_RESOURCE; + } + + spapr_cpu->vpa_addr = 0; + return H_SUCCESS; +} + +static target_ulong register_slb_shadow(PowerPCCPU *cpu, target_ulong addr) +{ + SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu); + uint32_t size; + + if (addr == 0) { + hcall_dprintf("Can't cope with SLB shadow at logical 0\n"); + return H_HARDWARE; + } + + size = ldl_be_phys(CPU(cpu)->as, addr + 0x4); + if (size < 0x8) { + return H_PARAMETER; + } + + if ((addr / 4096) != ((addr + size - 1) / 4096)) { + return H_PARAMETER; + } + + if (!spapr_cpu->vpa_addr) { + return H_RESOURCE; + } + + spapr_cpu->slb_shadow_addr = addr; + spapr_cpu->slb_shadow_size = size; + + return H_SUCCESS; +} + +static target_ulong deregister_slb_shadow(PowerPCCPU *cpu, target_ulong addr) +{ + SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu); + + spapr_cpu->slb_shadow_addr = 0; + spapr_cpu->slb_shadow_size = 0; + return H_SUCCESS; +} + +static target_ulong register_dtl(PowerPCCPU *cpu, target_ulong addr) +{ + SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu); + uint32_t size; + + if (addr == 0) { + hcall_dprintf("Can't cope with DTL at logical 0\n"); + return H_HARDWARE; + } + + size = ldl_be_phys(CPU(cpu)->as, addr + 0x4); + + if (size < 48) { + return H_PARAMETER; + } + + if (!spapr_cpu->vpa_addr) { + return H_RESOURCE; + } + + spapr_cpu->dtl_addr = addr; + spapr_cpu->dtl_size = size; + + return H_SUCCESS; +} + +static target_ulong deregister_dtl(PowerPCCPU *cpu, target_ulong addr) +{ + SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu); + + spapr_cpu->dtl_addr = 0; + spapr_cpu->dtl_size = 0; + + return H_SUCCESS; +} + +static target_ulong h_register_vpa(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + target_ulong flags = args[0]; + target_ulong procno = args[1]; + target_ulong vpa = args[2]; + target_ulong ret = H_PARAMETER; + PowerPCCPU *tcpu; + + tcpu = spapr_find_cpu(procno); + if (!tcpu) { + return H_PARAMETER; + } + + switch (flags) { + case FLAGS_REGISTER_VPA: + ret = register_vpa(tcpu, vpa); + break; + + case FLAGS_DEREGISTER_VPA: + ret = deregister_vpa(tcpu, vpa); + break; + + case FLAGS_REGISTER_SLBSHADOW: + ret = register_slb_shadow(tcpu, vpa); + break; + + case FLAGS_DEREGISTER_SLBSHADOW: + ret = deregister_slb_shadow(tcpu, vpa); + break; + + case FLAGS_REGISTER_DTL: + ret = register_dtl(tcpu, vpa); + break; + + case FLAGS_DEREGISTER_DTL: + ret = deregister_dtl(tcpu, vpa); + break; + } + + return ret; +} + +static target_ulong h_cede(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + CPUPPCState *env = &cpu->env; + CPUState *cs = CPU(cpu); + SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu); + + env->msr |= (1ULL << MSR_EE); + hreg_compute_hflags(env); + + if (spapr_cpu->prod) { + spapr_cpu->prod = false; + return H_SUCCESS; + } + + if (!cpu_has_work(cs)) { + cs->halted = 1; + cs->exception_index = EXCP_HLT; + cs->exit_request = 1; + } + + return H_SUCCESS; +} + +/* + * Confer to self, aka join. Cede could use the same pattern as well, if + * EXCP_HLT can be changed to ECXP_HALTED. + */ +static target_ulong h_confer_self(PowerPCCPU *cpu) +{ + CPUState *cs = CPU(cpu); + SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu); + + if (spapr_cpu->prod) { + spapr_cpu->prod = false; + return H_SUCCESS; + } + cs->halted = 1; + cs->exception_index = EXCP_HALTED; + cs->exit_request = 1; + + return H_SUCCESS; +} + +static target_ulong h_join(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + CPUPPCState *env = &cpu->env; + CPUState *cs; + bool last_unjoined = true; + + if (env->msr & (1ULL << MSR_EE)) { + return H_BAD_MODE; + } + + /* + * Must not join the last CPU running. Interestingly, no such restriction + * for H_CONFER-to-self, but that is probably not intended to be used + * when H_JOIN is available. + */ + CPU_FOREACH(cs) { + PowerPCCPU *c = POWERPC_CPU(cs); + CPUPPCState *e = &c->env; + if (c == cpu) { + continue; + } + + /* Don't have a way to indicate joined, so use halted && MSR[EE]=0 */ + if (!cs->halted || (e->msr & (1ULL << MSR_EE))) { + last_unjoined = false; + break; + } + } + if (last_unjoined) { + return H_CONTINUE; + } + + return h_confer_self(cpu); +} + +static target_ulong h_confer(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + target_long target = args[0]; + uint32_t dispatch = args[1]; + CPUState *cs = CPU(cpu); + SpaprCpuState *spapr_cpu; + + /* + * -1 means confer to all other CPUs without dispatch counter check, + * otherwise it's a targeted confer. + */ + if (target != -1) { + PowerPCCPU *target_cpu = spapr_find_cpu(target); + uint32_t target_dispatch; + + if (!target_cpu) { + return H_PARAMETER; + } + + /* + * target == self is a special case, we wait until prodded, without + * dispatch counter check. + */ + if (cpu == target_cpu) { + return h_confer_self(cpu); + } + + spapr_cpu = spapr_cpu_state(target_cpu); + if (!spapr_cpu->vpa_addr || ((dispatch & 1) == 0)) { + return H_SUCCESS; + } + + target_dispatch = ldl_be_phys(cs->as, + spapr_cpu->vpa_addr + VPA_DISPATCH_COUNTER); + if (target_dispatch != dispatch) { + return H_SUCCESS; + } + + /* + * The targeted confer does not do anything special beyond yielding + * the current vCPU, but even this should be better than nothing. + * At least for single-threaded tcg, it gives the target a chance to + * run before we run again. Multi-threaded tcg does not really do + * anything with EXCP_YIELD yet. + */ + } + + cs->exception_index = EXCP_YIELD; + cs->exit_request = 1; + cpu_loop_exit(cs); + + return H_SUCCESS; +} + +static target_ulong h_prod(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + target_long target = args[0]; + PowerPCCPU *tcpu; + CPUState *cs; + SpaprCpuState *spapr_cpu; + + tcpu = spapr_find_cpu(target); + cs = CPU(tcpu); + if (!cs) { + return H_PARAMETER; + } + + spapr_cpu = spapr_cpu_state(tcpu); + spapr_cpu->prod = true; + cs->halted = 0; + qemu_cpu_kick(cs); + + return H_SUCCESS; +} + +static target_ulong h_rtas(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + target_ulong rtas_r3 = args[0]; + uint32_t token = rtas_ld(rtas_r3, 0); + uint32_t nargs = rtas_ld(rtas_r3, 1); + uint32_t nret = rtas_ld(rtas_r3, 2); + + return spapr_rtas_call(cpu, spapr, token, nargs, rtas_r3 + 12, + nret, rtas_r3 + 12 + 4*nargs); +} + +static target_ulong h_logical_load(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + CPUState *cs = CPU(cpu); + target_ulong size = args[0]; + target_ulong addr = args[1]; + + switch (size) { + case 1: + args[0] = ldub_phys(cs->as, addr); + return H_SUCCESS; + case 2: + args[0] = lduw_phys(cs->as, addr); + return H_SUCCESS; + case 4: + args[0] = ldl_phys(cs->as, addr); + return H_SUCCESS; + case 8: + args[0] = ldq_phys(cs->as, addr); + return H_SUCCESS; + } + return H_PARAMETER; +} + +static target_ulong h_logical_store(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + CPUState *cs = CPU(cpu); + + target_ulong size = args[0]; + target_ulong addr = args[1]; + target_ulong val = args[2]; + + switch (size) { + case 1: + stb_phys(cs->as, addr, val); + return H_SUCCESS; + case 2: + stw_phys(cs->as, addr, val); + return H_SUCCESS; + case 4: + stl_phys(cs->as, addr, val); + return H_SUCCESS; + case 8: + stq_phys(cs->as, addr, val); + return H_SUCCESS; + } + return H_PARAMETER; +} + +static target_ulong h_logical_memop(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + CPUState *cs = CPU(cpu); + + target_ulong dst = args[0]; /* Destination address */ + target_ulong src = args[1]; /* Source address */ + target_ulong esize = args[2]; /* Element size (0=1,1=2,2=4,3=8) */ + target_ulong count = args[3]; /* Element count */ + target_ulong op = args[4]; /* 0 = copy, 1 = invert */ + uint64_t tmp; + unsigned int mask = (1 << esize) - 1; + int step = 1 << esize; + + if (count > 0x80000000) { + return H_PARAMETER; + } + + if ((dst & mask) || (src & mask) || (op > 1)) { + return H_PARAMETER; + } + + if (dst >= src && dst < (src + (count << esize))) { + dst = dst + ((count - 1) << esize); + src = src + ((count - 1) << esize); + step = -step; + } + + while (count--) { + switch (esize) { + case 0: + tmp = ldub_phys(cs->as, src); + break; + case 1: + tmp = lduw_phys(cs->as, src); + break; + case 2: + tmp = ldl_phys(cs->as, src); + break; + case 3: + tmp = ldq_phys(cs->as, src); + break; + default: + return H_PARAMETER; + } + if (op == 1) { + tmp = ~tmp; + } + switch (esize) { + case 0: + stb_phys(cs->as, dst, tmp); + break; + case 1: + stw_phys(cs->as, dst, tmp); + break; + case 2: + stl_phys(cs->as, dst, tmp); + break; + case 3: + stq_phys(cs->as, dst, tmp); + break; + } + dst = dst + step; + src = src + step; + } + + return H_SUCCESS; +} + +static target_ulong h_logical_icbi(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + /* Nothing to do on emulation, KVM will trap this in the kernel */ + return H_SUCCESS; +} + +static target_ulong h_logical_dcbf(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + /* Nothing to do on emulation, KVM will trap this in the kernel */ + return H_SUCCESS; +} + +static target_ulong h_set_mode_resource_le(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong mflags, + target_ulong value1, + target_ulong value2) +{ + if (value1) { + return H_P3; + } + if (value2) { + return H_P4; + } + + switch (mflags) { + case H_SET_MODE_ENDIAN_BIG: + spapr_set_all_lpcrs(0, LPCR_ILE); + spapr_pci_switch_vga(spapr, true); + return H_SUCCESS; + + case H_SET_MODE_ENDIAN_LITTLE: + spapr_set_all_lpcrs(LPCR_ILE, LPCR_ILE); + spapr_pci_switch_vga(spapr, false); + return H_SUCCESS; + } + + return H_UNSUPPORTED_FLAG; +} + +static target_ulong h_set_mode_resource_addr_trans_mode(PowerPCCPU *cpu, + target_ulong mflags, + target_ulong value1, + target_ulong value2) +{ + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); + + if (!(pcc->insns_flags2 & PPC2_ISA207S)) { + return H_P2; + } + if (value1) { + return H_P3; + } + if (value2) { + return H_P4; + } + + if (mflags == 1) { + /* AIL=1 is reserved in POWER8/POWER9/POWER10 */ + return H_UNSUPPORTED_FLAG; + } + + if (mflags == 2 && (pcc->insns_flags2 & PPC2_ISA310)) { + /* AIL=2 is reserved in POWER10 (ISA v3.1) */ + return H_UNSUPPORTED_FLAG; + } + + spapr_set_all_lpcrs(mflags << LPCR_AIL_SHIFT, LPCR_AIL); + + return H_SUCCESS; +} + +static target_ulong h_set_mode(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + target_ulong resource = args[1]; + target_ulong ret = H_P2; + + switch (resource) { + case H_SET_MODE_RESOURCE_LE: + ret = h_set_mode_resource_le(cpu, spapr, args[0], args[2], args[3]); + break; + case H_SET_MODE_RESOURCE_ADDR_TRANS_MODE: + ret = h_set_mode_resource_addr_trans_mode(cpu, args[0], + args[2], args[3]); + break; + } + + return ret; +} + +static target_ulong h_clean_slb(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + qemu_log_mask(LOG_UNIMP, "Unimplemented SPAPR hcall 0x"TARGET_FMT_lx"%s\n", + opcode, " (H_CLEAN_SLB)"); + return H_FUNCTION; +} + +static target_ulong h_invalidate_pid(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + qemu_log_mask(LOG_UNIMP, "Unimplemented SPAPR hcall 0x"TARGET_FMT_lx"%s\n", + opcode, " (H_INVALIDATE_PID)"); + return H_FUNCTION; +} + +static void spapr_check_setup_free_hpt(SpaprMachineState *spapr, + uint64_t patbe_old, uint64_t patbe_new) +{ + /* + * We have 4 Options: + * HASH->HASH || RADIX->RADIX || NOTHING->RADIX : Do Nothing + * HASH->RADIX : Free HPT + * RADIX->HASH : Allocate HPT + * NOTHING->HASH : Allocate HPT + * Note: NOTHING implies the case where we said the guest could choose + * later and so assumed radix and now it's called H_REG_PROC_TBL + */ + + if ((patbe_old & PATE1_GR) == (patbe_new & PATE1_GR)) { + /* We assume RADIX, so this catches all the "Do Nothing" cases */ + } else if (!(patbe_old & PATE1_GR)) { + /* HASH->RADIX : Free HPT */ + spapr_free_hpt(spapr); + } else if (!(patbe_new & PATE1_GR)) { + /* RADIX->HASH || NOTHING->HASH : Allocate HPT */ + spapr_setup_hpt(spapr); + } + return; +} + +#define FLAGS_MASK 0x01FULL +#define FLAG_MODIFY 0x10 +#define FLAG_REGISTER 0x08 +#define FLAG_RADIX 0x04 +#define FLAG_HASH_PROC_TBL 0x02 +#define FLAG_GTSE 0x01 + +static target_ulong h_register_process_table(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + target_ulong flags = args[0]; + target_ulong proc_tbl = args[1]; + target_ulong page_size = args[2]; + target_ulong table_size = args[3]; + target_ulong update_lpcr = 0; + uint64_t cproc; + + if (flags & ~FLAGS_MASK) { /* Check no reserved bits are set */ + return H_PARAMETER; + } + if (flags & FLAG_MODIFY) { + if (flags & FLAG_REGISTER) { + if (flags & FLAG_RADIX) { /* Register new RADIX process table */ + if (proc_tbl & 0xfff || proc_tbl >> 60) { + return H_P2; + } else if (page_size) { + return H_P3; + } else if (table_size > 24) { + return H_P4; + } + cproc = PATE1_GR | proc_tbl | table_size; + } else { /* Register new HPT process table */ + if (flags & FLAG_HASH_PROC_TBL) { /* Hash with Segment Tables */ + /* TODO - Not Supported */ + /* Technically caused by flag bits => H_PARAMETER */ + return H_PARAMETER; + } else { /* Hash with SLB */ + if (proc_tbl >> 38) { + return H_P2; + } else if (page_size & ~0x7) { + return H_P3; + } else if (table_size > 24) { + return H_P4; + } + } + cproc = (proc_tbl << 25) | page_size << 5 | table_size; + } + + } else { /* Deregister current process table */ + /* + * Set to benign value: (current GR) | 0. This allows + * deregistration in KVM to succeed even if the radix bit + * in flags doesn't match the radix bit in the old PATE. + */ + cproc = spapr->patb_entry & PATE1_GR; + } + } else { /* Maintain current registration */ + if (!(flags & FLAG_RADIX) != !(spapr->patb_entry & PATE1_GR)) { + /* Technically caused by flag bits => H_PARAMETER */ + return H_PARAMETER; /* Existing Process Table Mismatch */ + } + cproc = spapr->patb_entry; + } + + /* Check if we need to setup OR free the hpt */ + spapr_check_setup_free_hpt(spapr, spapr->patb_entry, cproc); + + spapr->patb_entry = cproc; /* Save new process table */ + + /* Update the UPRT, HR and GTSE bits in the LPCR for all cpus */ + if (flags & FLAG_RADIX) /* Radix must use process tables, also set HR */ + update_lpcr |= (LPCR_UPRT | LPCR_HR); + else if (flags & FLAG_HASH_PROC_TBL) /* Hash with process tables */ + update_lpcr |= LPCR_UPRT; + if (flags & FLAG_GTSE) /* Guest translation shootdown enable */ + update_lpcr |= LPCR_GTSE; + + spapr_set_all_lpcrs(update_lpcr, LPCR_UPRT | LPCR_HR | LPCR_GTSE); + + if (kvm_enabled()) { + return kvmppc_configure_v3_mmu(cpu, flags & FLAG_RADIX, + flags & FLAG_GTSE, cproc); + } + return H_SUCCESS; +} + +#define H_SIGNAL_SYS_RESET_ALL -1 +#define H_SIGNAL_SYS_RESET_ALLBUTSELF -2 + +static target_ulong h_signal_sys_reset(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + target_long target = args[0]; + CPUState *cs; + + if (target < 0) { + /* Broadcast */ + if (target < H_SIGNAL_SYS_RESET_ALLBUTSELF) { + return H_PARAMETER; + } + + CPU_FOREACH(cs) { + PowerPCCPU *c = POWERPC_CPU(cs); + + if (target == H_SIGNAL_SYS_RESET_ALLBUTSELF) { + if (c == cpu) { + continue; + } + } + run_on_cpu(cs, spapr_do_system_reset_on_cpu, RUN_ON_CPU_NULL); + } + return H_SUCCESS; + + } else { + /* Unicast */ + cs = CPU(spapr_find_cpu(target)); + if (cs) { + run_on_cpu(cs, spapr_do_system_reset_on_cpu, RUN_ON_CPU_NULL); + return H_SUCCESS; + } + return H_PARAMETER; + } +} + +/* Returns either a logical PVR or zero if none was found */ +static uint32_t cas_check_pvr(PowerPCCPU *cpu, uint32_t max_compat, + target_ulong *addr, bool *raw_mode_supported) +{ + bool explicit_match = false; /* Matched the CPU's real PVR */ + uint32_t best_compat = 0; + int i; + + /* + * We scan the supplied table of PVRs looking for two things + * 1. Is our real CPU PVR in the list? + * 2. What's the "best" listed logical PVR + */ + for (i = 0; i < 512; ++i) { + uint32_t pvr, pvr_mask; + + pvr_mask = ldl_be_phys(&address_space_memory, *addr); + pvr = ldl_be_phys(&address_space_memory, *addr + 4); + *addr += 8; + + if (~pvr_mask & pvr) { + break; /* Terminator record */ + } + + if ((cpu->env.spr[SPR_PVR] & pvr_mask) == (pvr & pvr_mask)) { + explicit_match = true; + } else { + if (ppc_check_compat(cpu, pvr, best_compat, max_compat)) { + best_compat = pvr; + } + } + } + + *raw_mode_supported = explicit_match; + + /* Parsing finished */ + trace_spapr_cas_pvr(cpu->compat_pvr, explicit_match, best_compat); + + return best_compat; +} + +static +target_ulong do_client_architecture_support(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong vec, + target_ulong fdt_bufsize) +{ + target_ulong ov_table; /* Working address in data buffer */ + uint32_t cas_pvr; + SpaprOptionVector *ov1_guest, *ov5_guest; + bool guest_radix; + bool raw_mode_supported = false; + bool guest_xive; + CPUState *cs; + void *fdt; + uint32_t max_compat = spapr->max_compat_pvr; + + /* CAS is supposed to be called early when only the boot vCPU is active. */ + CPU_FOREACH(cs) { + if (cs == CPU(cpu)) { + continue; + } + if (!cs->halted) { + warn_report("guest has multiple active vCPUs at CAS, which is not allowed"); + return H_MULTI_THREADS_ACTIVE; + } + } + + cas_pvr = cas_check_pvr(cpu, max_compat, &vec, &raw_mode_supported); + if (!cas_pvr && (!raw_mode_supported || max_compat)) { + /* + * We couldn't find a suitable compatibility mode, and either + * the guest doesn't support "raw" mode for this CPU, or "raw" + * mode is disabled because a maximum compat mode is set. + */ + error_report("Couldn't negotiate a suitable PVR during CAS"); + return H_HARDWARE; + } + + /* Update CPUs */ + if (cpu->compat_pvr != cas_pvr) { + Error *local_err = NULL; + + if (ppc_set_compat_all(cas_pvr, &local_err) < 0) { + /* We fail to set compat mode (likely because running with KVM PR), + * but maybe we can fallback to raw mode if the guest supports it. + */ + if (!raw_mode_supported) { + error_report_err(local_err); + return H_HARDWARE; + } + error_free(local_err); + } + } + + /* For the future use: here @ov_table points to the first option vector */ + ov_table = vec; + + ov1_guest = spapr_ovec_parse_vector(ov_table, 1); + if (!ov1_guest) { + warn_report("guest didn't provide option vector 1"); + return H_PARAMETER; + } + ov5_guest = spapr_ovec_parse_vector(ov_table, 5); + if (!ov5_guest) { + spapr_ovec_cleanup(ov1_guest); + warn_report("guest didn't provide option vector 5"); + return H_PARAMETER; + } + if (spapr_ovec_test(ov5_guest, OV5_MMU_BOTH)) { + error_report("guest requested hash and radix MMU, which is invalid."); + exit(EXIT_FAILURE); + } + if (spapr_ovec_test(ov5_guest, OV5_XIVE_BOTH)) { + error_report("guest requested an invalid interrupt mode"); + exit(EXIT_FAILURE); + } + + guest_radix = spapr_ovec_test(ov5_guest, OV5_MMU_RADIX_300); + + guest_xive = spapr_ovec_test(ov5_guest, OV5_XIVE_EXPLOIT); + + /* + * HPT resizing is a bit of a special case, because when enabled + * we assume an HPT guest will support it until it says it + * doesn't, instead of assuming it won't support it until it says + * it does. Strictly speaking that approach could break for + * guests which don't make a CAS call, but those are so old we + * don't care about them. Without that assumption we'd have to + * make at least a temporary allocation of an HPT sized for max + * memory, which could be impossibly difficult under KVM HV if + * maxram is large. + */ + if (!guest_radix && !spapr_ovec_test(ov5_guest, OV5_HPT_RESIZE)) { + int maxshift = spapr_hpt_shift_for_ramsize(MACHINE(spapr)->maxram_size); + + if (spapr->resize_hpt == SPAPR_RESIZE_HPT_REQUIRED) { + error_report( + "h_client_architecture_support: Guest doesn't support HPT resizing, but resize-hpt=required"); + exit(1); + } + + if (spapr->htab_shift < maxshift) { + /* Guest doesn't know about HPT resizing, so we + * pre-emptively resize for the maximum permitted RAM. At + * the point this is called, nothing should have been + * entered into the existing HPT */ + spapr_reallocate_hpt(spapr, maxshift, &error_fatal); + push_sregs_to_kvm_pr(spapr); + } + } + + /* NOTE: there are actually a number of ov5 bits where input from the + * guest is always zero, and the platform/QEMU enables them independently + * of guest input. To model these properly we'd want some sort of mask, + * but since they only currently apply to memory migration as defined + * by LoPAPR 1.1, 14.5.4.8, which QEMU doesn't implement, we don't need + * to worry about this for now. + */ + + /* full range of negotiated ov5 capabilities */ + spapr_ovec_intersect(spapr->ov5_cas, spapr->ov5, ov5_guest); + spapr_ovec_cleanup(ov5_guest); + + spapr_check_mmu_mode(guest_radix); + + spapr->cas_pre_isa3_guest = !spapr_ovec_test(ov1_guest, OV1_PPC_3_00); + spapr_ovec_cleanup(ov1_guest); + + /* + * Check for NUMA affinity conditions now that we know which NUMA + * affinity the guest will use. + */ + spapr_numa_associativity_check(spapr); + + /* + * Ensure the guest asks for an interrupt mode we support; + * otherwise terminate the boot. + */ + if (guest_xive) { + if (!spapr->irq->xive) { + error_report( +"Guest requested unavailable interrupt mode (XIVE), try the ic-mode=xive or ic-mode=dual machine property"); + exit(EXIT_FAILURE); + } + } else { + if (!spapr->irq->xics) { + error_report( +"Guest requested unavailable interrupt mode (XICS), either don't set the ic-mode machine property or try ic-mode=xics or ic-mode=dual"); + exit(EXIT_FAILURE); + } + } + + spapr_irq_update_active_intc(spapr); + + /* + * Process all pending hot-plug/unplug requests now. An updated full + * rendered FDT will be returned to the guest. + */ + spapr_drc_reset_all(spapr); + spapr_clear_pending_hotplug_events(spapr); + + /* + * If spapr_machine_reset() did not set up a HPT but one is necessary + * (because the guest isn't going to use radix) then set it up here. + */ + if ((spapr->patb_entry & PATE1_GR) && !guest_radix) { + /* legacy hash or new hash: */ + spapr_setup_hpt(spapr); + } + + fdt = spapr_build_fdt(spapr, spapr->vof != NULL, fdt_bufsize); + g_free(spapr->fdt_blob); + spapr->fdt_size = fdt_totalsize(fdt); + spapr->fdt_initial_size = spapr->fdt_size; + spapr->fdt_blob = fdt; + + return H_SUCCESS; +} + +static target_ulong h_client_architecture_support(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + target_ulong vec = ppc64_phys_to_real(args[0]); + target_ulong fdt_buf = args[1]; + target_ulong fdt_bufsize = args[2]; + target_ulong ret; + SpaprDeviceTreeUpdateHeader hdr = { .version_id = 1 }; + + if (fdt_bufsize < sizeof(hdr)) { + error_report("SLOF provided insufficient CAS buffer " + TARGET_FMT_lu " (min: %zu)", fdt_bufsize, sizeof(hdr)); + exit(EXIT_FAILURE); + } + + fdt_bufsize -= sizeof(hdr); + + ret = do_client_architecture_support(cpu, spapr, vec, fdt_bufsize); + if (ret == H_SUCCESS) { + _FDT((fdt_pack(spapr->fdt_blob))); + spapr->fdt_size = fdt_totalsize(spapr->fdt_blob); + spapr->fdt_initial_size = spapr->fdt_size; + + cpu_physical_memory_write(fdt_buf, &hdr, sizeof(hdr)); + cpu_physical_memory_write(fdt_buf + sizeof(hdr), spapr->fdt_blob, + spapr->fdt_size); + trace_spapr_cas_continue(spapr->fdt_size + sizeof(hdr)); + } + + return ret; +} + +target_ulong spapr_vof_client_architecture_support(MachineState *ms, + CPUState *cs, + target_ulong ovec_addr) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(ms); + + target_ulong ret = do_client_architecture_support(POWERPC_CPU(cs), spapr, + ovec_addr, FDT_MAX_SIZE); + + /* + * This adds stdout and generates phandles for boottime and CAS FDTs. + * It is alright to update the FDT here as do_client_architecture_support() + * does not pack it. + */ + spapr_vof_client_dt_finalize(spapr, spapr->fdt_blob); + + return ret; +} + +static target_ulong h_get_cpu_characteristics(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + uint64_t characteristics = H_CPU_CHAR_HON_BRANCH_HINTS & + ~H_CPU_CHAR_THR_RECONF_TRIG; + uint64_t behaviour = H_CPU_BEHAV_FAVOUR_SECURITY; + uint8_t safe_cache = spapr_get_cap(spapr, SPAPR_CAP_CFPC); + uint8_t safe_bounds_check = spapr_get_cap(spapr, SPAPR_CAP_SBBC); + uint8_t safe_indirect_branch = spapr_get_cap(spapr, SPAPR_CAP_IBS); + uint8_t count_cache_flush_assist = spapr_get_cap(spapr, + SPAPR_CAP_CCF_ASSIST); + + switch (safe_cache) { + case SPAPR_CAP_WORKAROUND: + characteristics |= H_CPU_CHAR_L1D_FLUSH_ORI30; + characteristics |= H_CPU_CHAR_L1D_FLUSH_TRIG2; + characteristics |= H_CPU_CHAR_L1D_THREAD_PRIV; + behaviour |= H_CPU_BEHAV_L1D_FLUSH_PR; + break; + case SPAPR_CAP_FIXED: + behaviour |= H_CPU_BEHAV_NO_L1D_FLUSH_ENTRY; + behaviour |= H_CPU_BEHAV_NO_L1D_FLUSH_UACCESS; + break; + default: /* broken */ + assert(safe_cache == SPAPR_CAP_BROKEN); + behaviour |= H_CPU_BEHAV_L1D_FLUSH_PR; + break; + } + + switch (safe_bounds_check) { + case SPAPR_CAP_WORKAROUND: + characteristics |= H_CPU_CHAR_SPEC_BAR_ORI31; + behaviour |= H_CPU_BEHAV_BNDS_CHK_SPEC_BAR; + break; + case SPAPR_CAP_FIXED: + break; + default: /* broken */ + assert(safe_bounds_check == SPAPR_CAP_BROKEN); + behaviour |= H_CPU_BEHAV_BNDS_CHK_SPEC_BAR; + break; + } + + switch (safe_indirect_branch) { + case SPAPR_CAP_FIXED_NA: + break; + case SPAPR_CAP_FIXED_CCD: + characteristics |= H_CPU_CHAR_CACHE_COUNT_DIS; + break; + case SPAPR_CAP_FIXED_IBS: + characteristics |= H_CPU_CHAR_BCCTRL_SERIALISED; + break; + case SPAPR_CAP_WORKAROUND: + behaviour |= H_CPU_BEHAV_FLUSH_COUNT_CACHE; + if (count_cache_flush_assist) { + characteristics |= H_CPU_CHAR_BCCTR_FLUSH_ASSIST; + } + break; + default: /* broken */ + assert(safe_indirect_branch == SPAPR_CAP_BROKEN); + break; + } + + args[0] = characteristics; + args[1] = behaviour; + return H_SUCCESS; +} + +static target_ulong h_update_dt(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + target_ulong dt = ppc64_phys_to_real(args[0]); + struct fdt_header hdr = { 0 }; + unsigned cb; + SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); + void *fdt; + + cpu_physical_memory_read(dt, &hdr, sizeof(hdr)); + cb = fdt32_to_cpu(hdr.totalsize); + + if (!smc->update_dt_enabled) { + return H_SUCCESS; + } + + /* Check that the fdt did not grow out of proportion */ + if (cb > spapr->fdt_initial_size * 2) { + trace_spapr_update_dt_failed_size(spapr->fdt_initial_size, cb, + fdt32_to_cpu(hdr.magic)); + return H_PARAMETER; + } + + fdt = g_malloc0(cb); + cpu_physical_memory_read(dt, fdt, cb); + + /* Check the fdt consistency */ + if (fdt_check_full(fdt, cb)) { + trace_spapr_update_dt_failed_check(spapr->fdt_initial_size, cb, + fdt32_to_cpu(hdr.magic)); + return H_PARAMETER; + } + + g_free(spapr->fdt_blob); + spapr->fdt_size = cb; + spapr->fdt_blob = fdt; + trace_spapr_update_dt(cb); + + return H_SUCCESS; +} + +static spapr_hcall_fn papr_hypercall_table[(MAX_HCALL_OPCODE / 4) + 1]; +static spapr_hcall_fn kvmppc_hypercall_table[KVMPPC_HCALL_MAX - KVMPPC_HCALL_BASE + 1]; +static spapr_hcall_fn svm_hypercall_table[(SVM_HCALL_MAX - SVM_HCALL_BASE) / 4 + 1]; + +void spapr_register_hypercall(target_ulong opcode, spapr_hcall_fn fn) +{ + spapr_hcall_fn *slot; + + if (opcode <= MAX_HCALL_OPCODE) { + assert((opcode & 0x3) == 0); + + slot = &papr_hypercall_table[opcode / 4]; + } else if (opcode >= SVM_HCALL_BASE && opcode <= SVM_HCALL_MAX) { + /* we only have SVM-related hcall numbers assigned in multiples of 4 */ + assert((opcode & 0x3) == 0); + + slot = &svm_hypercall_table[(opcode - SVM_HCALL_BASE) / 4]; + } else { + assert((opcode >= KVMPPC_HCALL_BASE) && (opcode <= KVMPPC_HCALL_MAX)); + + slot = &kvmppc_hypercall_table[opcode - KVMPPC_HCALL_BASE]; + } + + assert(!(*slot)); + *slot = fn; +} + +target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode, + target_ulong *args) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); + + if ((opcode <= MAX_HCALL_OPCODE) + && ((opcode & 0x3) == 0)) { + spapr_hcall_fn fn = papr_hypercall_table[opcode / 4]; + + if (fn) { + return fn(cpu, spapr, opcode, args); + } + } else if ((opcode >= SVM_HCALL_BASE) && + (opcode <= SVM_HCALL_MAX)) { + spapr_hcall_fn fn = svm_hypercall_table[(opcode - SVM_HCALL_BASE) / 4]; + + if (fn) { + return fn(cpu, spapr, opcode, args); + } + } else if ((opcode >= KVMPPC_HCALL_BASE) && + (opcode <= KVMPPC_HCALL_MAX)) { + spapr_hcall_fn fn = kvmppc_hypercall_table[opcode - KVMPPC_HCALL_BASE]; + + if (fn) { + return fn(cpu, spapr, opcode, args); + } + } + + qemu_log_mask(LOG_UNIMP, "Unimplemented SPAPR hcall 0x" TARGET_FMT_lx "\n", + opcode); + return H_FUNCTION; +} + +#ifndef CONFIG_TCG +static target_ulong h_softmmu(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + g_assert_not_reached(); +} + +static void hypercall_register_softmmu(void) +{ + /* hcall-pft */ + spapr_register_hypercall(H_ENTER, h_softmmu); + spapr_register_hypercall(H_REMOVE, h_softmmu); + spapr_register_hypercall(H_PROTECT, h_softmmu); + spapr_register_hypercall(H_READ, h_softmmu); + + /* hcall-bulk */ + spapr_register_hypercall(H_BULK_REMOVE, h_softmmu); +} +#else +static void hypercall_register_softmmu(void) +{ + /* DO NOTHING */ +} +#endif + +static void hypercall_register_types(void) +{ + hypercall_register_softmmu(); + + /* hcall-hpt-resize */ + spapr_register_hypercall(H_RESIZE_HPT_PREPARE, h_resize_hpt_prepare); + spapr_register_hypercall(H_RESIZE_HPT_COMMIT, h_resize_hpt_commit); + + /* hcall-splpar */ + spapr_register_hypercall(H_REGISTER_VPA, h_register_vpa); + spapr_register_hypercall(H_CEDE, h_cede); + spapr_register_hypercall(H_CONFER, h_confer); + spapr_register_hypercall(H_PROD, h_prod); + + /* hcall-join */ + spapr_register_hypercall(H_JOIN, h_join); + + spapr_register_hypercall(H_SIGNAL_SYS_RESET, h_signal_sys_reset); + + /* processor register resource access h-calls */ + spapr_register_hypercall(H_SET_SPRG0, h_set_sprg0); + spapr_register_hypercall(H_SET_DABR, h_set_dabr); + spapr_register_hypercall(H_SET_XDABR, h_set_xdabr); + spapr_register_hypercall(H_PAGE_INIT, h_page_init); + spapr_register_hypercall(H_SET_MODE, h_set_mode); + + /* In Memory Table MMU h-calls */ + spapr_register_hypercall(H_CLEAN_SLB, h_clean_slb); + spapr_register_hypercall(H_INVALIDATE_PID, h_invalidate_pid); + spapr_register_hypercall(H_REGISTER_PROC_TBL, h_register_process_table); + + /* hcall-get-cpu-characteristics */ + spapr_register_hypercall(H_GET_CPU_CHARACTERISTICS, + h_get_cpu_characteristics); + + /* "debugger" hcalls (also used by SLOF). Note: We do -not- differenciate + * here between the "CI" and the "CACHE" variants, they will use whatever + * mapping attributes qemu is using. When using KVM, the kernel will + * enforce the attributes more strongly + */ + spapr_register_hypercall(H_LOGICAL_CI_LOAD, h_logical_load); + spapr_register_hypercall(H_LOGICAL_CI_STORE, h_logical_store); + spapr_register_hypercall(H_LOGICAL_CACHE_LOAD, h_logical_load); + spapr_register_hypercall(H_LOGICAL_CACHE_STORE, h_logical_store); + spapr_register_hypercall(H_LOGICAL_ICBI, h_logical_icbi); + spapr_register_hypercall(H_LOGICAL_DCBF, h_logical_dcbf); + spapr_register_hypercall(KVMPPC_H_LOGICAL_MEMOP, h_logical_memop); + + /* qemu/KVM-PPC specific hcalls */ + spapr_register_hypercall(KVMPPC_H_RTAS, h_rtas); + + /* ibm,client-architecture-support support */ + spapr_register_hypercall(KVMPPC_H_CAS, h_client_architecture_support); + + spapr_register_hypercall(KVMPPC_H_UPDATE_DT, h_update_dt); +} + +type_init(hypercall_register_types) diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c new file mode 100644 index 000000000..db0107185 --- /dev/null +++ b/hw/ppc/spapr_iommu.c @@ -0,0 +1,718 @@ +/* + * QEMU sPAPR IOMMU (TCE) code + * + * Copyright (c) 2010 David Gibson, IBM Corporation <dwg@au1.ibm.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "qemu/log.h" +#include "qemu/module.h" +#include "sysemu/kvm.h" +#include "kvm_ppc.h" +#include "migration/vmstate.h" +#include "sysemu/dma.h" +#include "trace.h" + +#include "hw/ppc/spapr.h" +#include "hw/ppc/spapr_vio.h" + +#include <libfdt.h> + +enum SpaprTceAccess { + SPAPR_TCE_FAULT = 0, + SPAPR_TCE_RO = 1, + SPAPR_TCE_WO = 2, + SPAPR_TCE_RW = 3, +}; + +#define IOMMU_PAGE_SIZE(shift) (1ULL << (shift)) +#define IOMMU_PAGE_MASK(shift) (~(IOMMU_PAGE_SIZE(shift) - 1)) + +static QLIST_HEAD(, SpaprTceTable) spapr_tce_tables; + +SpaprTceTable *spapr_tce_find_by_liobn(target_ulong liobn) +{ + SpaprTceTable *tcet; + + if (liobn & 0xFFFFFFFF00000000ULL) { + hcall_dprintf("Request for out-of-bounds LIOBN 0x" TARGET_FMT_lx "\n", + liobn); + return NULL; + } + + QLIST_FOREACH(tcet, &spapr_tce_tables, list) { + if (tcet->liobn == (uint32_t)liobn) { + return tcet; + } + } + + return NULL; +} + +static IOMMUAccessFlags spapr_tce_iommu_access_flags(uint64_t tce) +{ + switch (tce & SPAPR_TCE_RW) { + case SPAPR_TCE_FAULT: + return IOMMU_NONE; + case SPAPR_TCE_RO: + return IOMMU_RO; + case SPAPR_TCE_WO: + return IOMMU_WO; + default: /* SPAPR_TCE_RW */ + return IOMMU_RW; + } +} + +static uint64_t *spapr_tce_alloc_table(uint32_t liobn, + uint32_t page_shift, + uint64_t bus_offset, + uint32_t nb_table, + int *fd, + bool need_vfio) +{ + uint64_t *table = NULL; + + if (kvm_enabled()) { + table = kvmppc_create_spapr_tce(liobn, page_shift, bus_offset, nb_table, + fd, need_vfio); + } + + if (!table) { + *fd = -1; + table = g_new0(uint64_t, nb_table); + } + + trace_spapr_iommu_new_table(liobn, table, *fd); + + return table; +} + +static void spapr_tce_free_table(uint64_t *table, int fd, uint32_t nb_table) +{ + if (!kvm_enabled() || + (kvmppc_remove_spapr_tce(table, fd, nb_table) != 0)) { + g_free(table); + } +} + +/* Called from RCU critical section */ +static IOMMUTLBEntry spapr_tce_translate_iommu(IOMMUMemoryRegion *iommu, + hwaddr addr, + IOMMUAccessFlags flag, + int iommu_idx) +{ + SpaprTceTable *tcet = container_of(iommu, SpaprTceTable, iommu); + uint64_t tce; + IOMMUTLBEntry ret = { + .target_as = &address_space_memory, + .iova = 0, + .translated_addr = 0, + .addr_mask = ~(hwaddr)0, + .perm = IOMMU_NONE, + }; + + if ((addr >> tcet->page_shift) < tcet->nb_table) { + /* Check if we are in bound */ + hwaddr page_mask = IOMMU_PAGE_MASK(tcet->page_shift); + + tce = tcet->table[addr >> tcet->page_shift]; + ret.iova = addr & page_mask; + ret.translated_addr = tce & page_mask; + ret.addr_mask = ~page_mask; + ret.perm = spapr_tce_iommu_access_flags(tce); + } + trace_spapr_iommu_xlate(tcet->liobn, addr, ret.translated_addr, ret.perm, + ret.addr_mask); + + return ret; +} + +static void spapr_tce_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n) +{ + MemoryRegion *mr = MEMORY_REGION(iommu_mr); + IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr); + hwaddr addr, granularity; + IOMMUTLBEntry iotlb; + SpaprTceTable *tcet = container_of(iommu_mr, SpaprTceTable, iommu); + + if (tcet->skipping_replay) { + return; + } + + granularity = memory_region_iommu_get_min_page_size(iommu_mr); + + for (addr = 0; addr < memory_region_size(mr); addr += granularity) { + iotlb = imrc->translate(iommu_mr, addr, IOMMU_NONE, n->iommu_idx); + if (iotlb.perm != IOMMU_NONE) { + n->notify(n, &iotlb); + } + + /* + * if (2^64 - MR size) < granularity, it's possible to get an + * infinite loop here. This should catch such a wraparound. + */ + if ((addr + granularity) < addr) { + break; + } + } +} + +static int spapr_tce_table_pre_save(void *opaque) +{ + SpaprTceTable *tcet = SPAPR_TCE_TABLE(opaque); + + tcet->mig_table = tcet->table; + tcet->mig_nb_table = tcet->nb_table; + + trace_spapr_iommu_pre_save(tcet->liobn, tcet->mig_nb_table, + tcet->bus_offset, tcet->page_shift); + + return 0; +} + +static uint64_t spapr_tce_get_min_page_size(IOMMUMemoryRegion *iommu) +{ + SpaprTceTable *tcet = container_of(iommu, SpaprTceTable, iommu); + + return 1ULL << tcet->page_shift; +} + +static int spapr_tce_get_attr(IOMMUMemoryRegion *iommu, + enum IOMMUMemoryRegionAttr attr, void *data) +{ + SpaprTceTable *tcet = container_of(iommu, SpaprTceTable, iommu); + + if (attr == IOMMU_ATTR_SPAPR_TCE_FD && kvmppc_has_cap_spapr_vfio()) { + *(int *) data = tcet->fd; + return 0; + } + + return -EINVAL; +} + +static int spapr_tce_notify_flag_changed(IOMMUMemoryRegion *iommu, + IOMMUNotifierFlag old, + IOMMUNotifierFlag new, + Error **errp) +{ + struct SpaprTceTable *tbl = container_of(iommu, SpaprTceTable, iommu); + + if (new & IOMMU_NOTIFIER_DEVIOTLB_UNMAP) { + error_setg(errp, "spart_tce does not support dev-iotlb yet"); + return -EINVAL; + } + + if (old == IOMMU_NOTIFIER_NONE && new != IOMMU_NOTIFIER_NONE) { + spapr_tce_set_need_vfio(tbl, true); + } else if (old != IOMMU_NOTIFIER_NONE && new == IOMMU_NOTIFIER_NONE) { + spapr_tce_set_need_vfio(tbl, false); + } + return 0; +} + +static int spapr_tce_table_post_load(void *opaque, int version_id) +{ + SpaprTceTable *tcet = SPAPR_TCE_TABLE(opaque); + uint32_t old_nb_table = tcet->nb_table; + uint64_t old_bus_offset = tcet->bus_offset; + uint32_t old_page_shift = tcet->page_shift; + + if (tcet->vdev) { + spapr_vio_set_bypass(tcet->vdev, tcet->bypass); + } + + if (tcet->mig_nb_table != tcet->nb_table) { + spapr_tce_table_disable(tcet); + } + + if (tcet->mig_nb_table) { + if (!tcet->nb_table) { + spapr_tce_table_enable(tcet, old_page_shift, old_bus_offset, + tcet->mig_nb_table); + } + + memcpy(tcet->table, tcet->mig_table, + tcet->nb_table * sizeof(tcet->table[0])); + + free(tcet->mig_table); + tcet->mig_table = NULL; + } + + trace_spapr_iommu_post_load(tcet->liobn, old_nb_table, tcet->nb_table, + tcet->bus_offset, tcet->page_shift); + + return 0; +} + +static bool spapr_tce_table_ex_needed(void *opaque) +{ + SpaprTceTable *tcet = opaque; + + return tcet->bus_offset || tcet->page_shift != 0xC; +} + +static const VMStateDescription vmstate_spapr_tce_table_ex = { + .name = "spapr_iommu_ex", + .version_id = 1, + .minimum_version_id = 1, + .needed = spapr_tce_table_ex_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT64(bus_offset, SpaprTceTable), + VMSTATE_UINT32(page_shift, SpaprTceTable), + VMSTATE_END_OF_LIST() + }, +}; + +static const VMStateDescription vmstate_spapr_tce_table = { + .name = "spapr_iommu", + .version_id = 2, + .minimum_version_id = 2, + .pre_save = spapr_tce_table_pre_save, + .post_load = spapr_tce_table_post_load, + .fields = (VMStateField []) { + /* Sanity check */ + VMSTATE_UINT32_EQUAL(liobn, SpaprTceTable, NULL), + + /* IOMMU state */ + VMSTATE_UINT32(mig_nb_table, SpaprTceTable), + VMSTATE_BOOL(bypass, SpaprTceTable), + VMSTATE_VARRAY_UINT32_ALLOC(mig_table, SpaprTceTable, mig_nb_table, 0, + vmstate_info_uint64, uint64_t), + + VMSTATE_END_OF_LIST() + }, + .subsections = (const VMStateDescription*[]) { + &vmstate_spapr_tce_table_ex, + NULL + } +}; + +static void spapr_tce_table_realize(DeviceState *dev, Error **errp) +{ + SpaprTceTable *tcet = SPAPR_TCE_TABLE(dev); + Object *tcetobj = OBJECT(tcet); + gchar *tmp; + + tcet->fd = -1; + tcet->need_vfio = false; + tmp = g_strdup_printf("tce-root-%x", tcet->liobn); + memory_region_init(&tcet->root, tcetobj, tmp, UINT64_MAX); + g_free(tmp); + + tmp = g_strdup_printf("tce-iommu-%x", tcet->liobn); + memory_region_init_iommu(&tcet->iommu, sizeof(tcet->iommu), + TYPE_SPAPR_IOMMU_MEMORY_REGION, + tcetobj, tmp, 0); + g_free(tmp); + + QLIST_INSERT_HEAD(&spapr_tce_tables, tcet, list); + + vmstate_register(VMSTATE_IF(tcet), tcet->liobn, &vmstate_spapr_tce_table, + tcet); +} + +void spapr_tce_set_need_vfio(SpaprTceTable *tcet, bool need_vfio) +{ + size_t table_size = tcet->nb_table * sizeof(uint64_t); + uint64_t *oldtable; + int newfd = -1; + + g_assert(need_vfio != tcet->need_vfio); + + tcet->need_vfio = need_vfio; + + if (!need_vfio || (tcet->fd != -1 && kvmppc_has_cap_spapr_vfio())) { + return; + } + + oldtable = tcet->table; + + tcet->table = spapr_tce_alloc_table(tcet->liobn, + tcet->page_shift, + tcet->bus_offset, + tcet->nb_table, + &newfd, + need_vfio); + memcpy(tcet->table, oldtable, table_size); + + spapr_tce_free_table(oldtable, tcet->fd, tcet->nb_table); + + tcet->fd = newfd; +} + +SpaprTceTable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn) +{ + SpaprTceTable *tcet; + gchar *tmp; + + if (spapr_tce_find_by_liobn(liobn)) { + error_report("Attempted to create TCE table with duplicate" + " LIOBN 0x%x", liobn); + return NULL; + } + + tcet = SPAPR_TCE_TABLE(object_new(TYPE_SPAPR_TCE_TABLE)); + tcet->liobn = liobn; + + tmp = g_strdup_printf("tce-table-%x", liobn); + object_property_add_child(OBJECT(owner), tmp, OBJECT(tcet)); + g_free(tmp); + object_unref(OBJECT(tcet)); + + qdev_realize(DEVICE(tcet), NULL, NULL); + + return tcet; +} + +void spapr_tce_table_enable(SpaprTceTable *tcet, + uint32_t page_shift, uint64_t bus_offset, + uint32_t nb_table) +{ + if (tcet->nb_table) { + warn_report("trying to enable already enabled TCE table"); + return; + } + + tcet->bus_offset = bus_offset; + tcet->page_shift = page_shift; + tcet->nb_table = nb_table; + tcet->table = spapr_tce_alloc_table(tcet->liobn, + tcet->page_shift, + tcet->bus_offset, + tcet->nb_table, + &tcet->fd, + tcet->need_vfio); + + memory_region_set_size(MEMORY_REGION(&tcet->iommu), + (uint64_t)tcet->nb_table << tcet->page_shift); + memory_region_add_subregion(&tcet->root, tcet->bus_offset, + MEMORY_REGION(&tcet->iommu)); +} + +void spapr_tce_table_disable(SpaprTceTable *tcet) +{ + if (!tcet->nb_table) { + return; + } + + memory_region_del_subregion(&tcet->root, MEMORY_REGION(&tcet->iommu)); + memory_region_set_size(MEMORY_REGION(&tcet->iommu), 0); + + spapr_tce_free_table(tcet->table, tcet->fd, tcet->nb_table); + tcet->fd = -1; + tcet->table = NULL; + tcet->bus_offset = 0; + tcet->page_shift = 0; + tcet->nb_table = 0; +} + +static void spapr_tce_table_unrealize(DeviceState *dev) +{ + SpaprTceTable *tcet = SPAPR_TCE_TABLE(dev); + + vmstate_unregister(VMSTATE_IF(tcet), &vmstate_spapr_tce_table, tcet); + + QLIST_REMOVE(tcet, list); + + spapr_tce_table_disable(tcet); +} + +MemoryRegion *spapr_tce_get_iommu(SpaprTceTable *tcet) +{ + return &tcet->root; +} + +static void spapr_tce_reset(DeviceState *dev) +{ + SpaprTceTable *tcet = SPAPR_TCE_TABLE(dev); + size_t table_size = tcet->nb_table * sizeof(uint64_t); + + if (tcet->nb_table) { + memset(tcet->table, 0, table_size); + } +} + +static target_ulong put_tce_emu(SpaprTceTable *tcet, target_ulong ioba, + target_ulong tce) +{ + IOMMUTLBEvent event; + hwaddr page_mask = IOMMU_PAGE_MASK(tcet->page_shift); + unsigned long index = (ioba - tcet->bus_offset) >> tcet->page_shift; + + if (index >= tcet->nb_table) { + hcall_dprintf("spapr_vio_put_tce on out-of-bounds IOBA 0x" + TARGET_FMT_lx "\n", ioba); + return H_PARAMETER; + } + + tcet->table[index] = tce; + + event.entry.target_as = &address_space_memory, + event.entry.iova = (ioba - tcet->bus_offset) & page_mask; + event.entry.translated_addr = tce & page_mask; + event.entry.addr_mask = ~page_mask; + event.entry.perm = spapr_tce_iommu_access_flags(tce); + event.type = event.entry.perm ? IOMMU_NOTIFIER_MAP : IOMMU_NOTIFIER_UNMAP; + memory_region_notify_iommu(&tcet->iommu, 0, event); + + return H_SUCCESS; +} + +static target_ulong h_put_tce_indirect(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + int i; + target_ulong liobn = args[0]; + target_ulong ioba = args[1]; + target_ulong ioba1 = ioba; + target_ulong tce_list = args[2]; + target_ulong npages = args[3]; + target_ulong ret = H_PARAMETER, tce = 0; + SpaprTceTable *tcet = spapr_tce_find_by_liobn(liobn); + CPUState *cs = CPU(cpu); + hwaddr page_mask, page_size; + + if (!tcet) { + return H_PARAMETER; + } + + if ((npages > 512) || (tce_list & SPAPR_TCE_PAGE_MASK)) { + return H_PARAMETER; + } + + page_mask = IOMMU_PAGE_MASK(tcet->page_shift); + page_size = IOMMU_PAGE_SIZE(tcet->page_shift); + ioba &= page_mask; + + for (i = 0; i < npages; ++i, ioba += page_size) { + tce = ldq_be_phys(cs->as, tce_list + i * sizeof(target_ulong)); + + ret = put_tce_emu(tcet, ioba, tce); + if (ret) { + break; + } + } + + /* Trace last successful or the first problematic entry */ + i = i ? (i - 1) : 0; + if (SPAPR_IS_PCI_LIOBN(liobn)) { + trace_spapr_iommu_pci_indirect(liobn, ioba1, tce_list, i, tce, ret); + } else { + trace_spapr_iommu_indirect(liobn, ioba1, tce_list, i, tce, ret); + } + return ret; +} + +static target_ulong h_stuff_tce(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + int i; + target_ulong liobn = args[0]; + target_ulong ioba = args[1]; + target_ulong tce_value = args[2]; + target_ulong npages = args[3]; + target_ulong ret = H_PARAMETER; + SpaprTceTable *tcet = spapr_tce_find_by_liobn(liobn); + hwaddr page_mask, page_size; + + if (!tcet) { + return H_PARAMETER; + } + + if (npages > tcet->nb_table) { + return H_PARAMETER; + } + + page_mask = IOMMU_PAGE_MASK(tcet->page_shift); + page_size = IOMMU_PAGE_SIZE(tcet->page_shift); + ioba &= page_mask; + + for (i = 0; i < npages; ++i, ioba += page_size) { + ret = put_tce_emu(tcet, ioba, tce_value); + if (ret) { + break; + } + } + if (SPAPR_IS_PCI_LIOBN(liobn)) { + trace_spapr_iommu_pci_stuff(liobn, ioba, tce_value, npages, ret); + } else { + trace_spapr_iommu_stuff(liobn, ioba, tce_value, npages, ret); + } + + return ret; +} + +static target_ulong h_put_tce(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + target_ulong liobn = args[0]; + target_ulong ioba = args[1]; + target_ulong tce = args[2]; + target_ulong ret = H_PARAMETER; + SpaprTceTable *tcet = spapr_tce_find_by_liobn(liobn); + + if (tcet) { + hwaddr page_mask = IOMMU_PAGE_MASK(tcet->page_shift); + + ioba &= page_mask; + + ret = put_tce_emu(tcet, ioba, tce); + } + if (SPAPR_IS_PCI_LIOBN(liobn)) { + trace_spapr_iommu_pci_put(liobn, ioba, tce, ret); + } else { + trace_spapr_iommu_put(liobn, ioba, tce, ret); + } + + return ret; +} + +static target_ulong get_tce_emu(SpaprTceTable *tcet, target_ulong ioba, + target_ulong *tce) +{ + unsigned long index = (ioba - tcet->bus_offset) >> tcet->page_shift; + + if (index >= tcet->nb_table) { + hcall_dprintf("spapr_iommu_get_tce on out-of-bounds IOBA 0x" + TARGET_FMT_lx "\n", ioba); + return H_PARAMETER; + } + + *tce = tcet->table[index]; + + return H_SUCCESS; +} + +static target_ulong h_get_tce(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + target_ulong liobn = args[0]; + target_ulong ioba = args[1]; + target_ulong tce = 0; + target_ulong ret = H_PARAMETER; + SpaprTceTable *tcet = spapr_tce_find_by_liobn(liobn); + + if (tcet) { + hwaddr page_mask = IOMMU_PAGE_MASK(tcet->page_shift); + + ioba &= page_mask; + + ret = get_tce_emu(tcet, ioba, &tce); + if (!ret) { + args[0] = tce; + } + } + if (SPAPR_IS_PCI_LIOBN(liobn)) { + trace_spapr_iommu_pci_get(liobn, ioba, ret, tce); + } else { + trace_spapr_iommu_get(liobn, ioba, ret, tce); + } + + return ret; +} + +int spapr_dma_dt(void *fdt, int node_off, const char *propname, + uint32_t liobn, uint64_t window, uint32_t size) +{ + uint32_t dma_prop[5]; + int ret; + + dma_prop[0] = cpu_to_be32(liobn); + dma_prop[1] = cpu_to_be32(window >> 32); + dma_prop[2] = cpu_to_be32(window & 0xFFFFFFFF); + dma_prop[3] = 0; /* window size is 32 bits */ + dma_prop[4] = cpu_to_be32(size); + + ret = fdt_setprop_cell(fdt, node_off, "ibm,#dma-address-cells", 2); + if (ret < 0) { + return ret; + } + + ret = fdt_setprop_cell(fdt, node_off, "ibm,#dma-size-cells", 2); + if (ret < 0) { + return ret; + } + + ret = fdt_setprop(fdt, node_off, propname, dma_prop, sizeof(dma_prop)); + if (ret < 0) { + return ret; + } + + return 0; +} + +int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname, + SpaprTceTable *tcet) +{ + if (!tcet) { + return 0; + } + + return spapr_dma_dt(fdt, node_off, propname, + tcet->liobn, 0, tcet->nb_table << tcet->page_shift); +} + +static void spapr_tce_table_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + dc->realize = spapr_tce_table_realize; + dc->reset = spapr_tce_reset; + dc->unrealize = spapr_tce_table_unrealize; + /* Reason: This is just an internal device for handling the hypercalls */ + dc->user_creatable = false; + + QLIST_INIT(&spapr_tce_tables); + + /* hcall-tce */ + spapr_register_hypercall(H_PUT_TCE, h_put_tce); + spapr_register_hypercall(H_GET_TCE, h_get_tce); + spapr_register_hypercall(H_PUT_TCE_INDIRECT, h_put_tce_indirect); + spapr_register_hypercall(H_STUFF_TCE, h_stuff_tce); +} + +static TypeInfo spapr_tce_table_info = { + .name = TYPE_SPAPR_TCE_TABLE, + .parent = TYPE_DEVICE, + .instance_size = sizeof(SpaprTceTable), + .class_init = spapr_tce_table_class_init, +}; + +static void spapr_iommu_memory_region_class_init(ObjectClass *klass, void *data) +{ + IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass); + + imrc->translate = spapr_tce_translate_iommu; + imrc->replay = spapr_tce_replay; + imrc->get_min_page_size = spapr_tce_get_min_page_size; + imrc->notify_flag_changed = spapr_tce_notify_flag_changed; + imrc->get_attr = spapr_tce_get_attr; +} + +static const TypeInfo spapr_iommu_memory_region_info = { + .parent = TYPE_IOMMU_MEMORY_REGION, + .name = TYPE_SPAPR_IOMMU_MEMORY_REGION, + .class_init = spapr_iommu_memory_region_class_init, +}; + +static void register_types(void) +{ + type_register_static(&spapr_tce_table_info); + type_register_static(&spapr_iommu_memory_region_info); +} + +type_init(register_types); diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c new file mode 100644 index 000000000..a0d1e1298 --- /dev/null +++ b/hw/ppc/spapr_irq.c @@ -0,0 +1,599 @@ +/* + * QEMU PowerPC sPAPR IRQ interface + * + * Copyright (c) 2018, IBM Corporation. + * + * This code is licensed under the GPL version 2 or later. See the + * COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "qemu/error-report.h" +#include "qapi/error.h" +#include "hw/irq.h" +#include "hw/ppc/spapr.h" +#include "hw/ppc/spapr_cpu_core.h" +#include "hw/ppc/spapr_xive.h" +#include "hw/ppc/xics.h" +#include "hw/ppc/xics_spapr.h" +#include "hw/qdev-properties.h" +#include "cpu-models.h" +#include "sysemu/kvm.h" + +#include "trace.h" + +static const TypeInfo spapr_intc_info = { + .name = TYPE_SPAPR_INTC, + .parent = TYPE_INTERFACE, + .class_size = sizeof(SpaprInterruptControllerClass), +}; + +static void spapr_irq_msi_init(SpaprMachineState *spapr) +{ + if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { + /* Legacy mode doesn't use this allocator */ + return; + } + + spapr->irq_map_nr = spapr_irq_nr_msis(spapr); + spapr->irq_map = bitmap_new(spapr->irq_map_nr); +} + +int spapr_irq_msi_alloc(SpaprMachineState *spapr, uint32_t num, bool align, + Error **errp) +{ + int irq; + + /* + * The 'align_mask' parameter of bitmap_find_next_zero_area() + * should be one less than a power of 2; 0 means no + * alignment. Adapt the 'align' value of the former allocator + * to fit the requirements of bitmap_find_next_zero_area() + */ + align -= 1; + + irq = bitmap_find_next_zero_area(spapr->irq_map, spapr->irq_map_nr, 0, num, + align); + if (irq == spapr->irq_map_nr) { + error_setg(errp, "can't find a free %d-IRQ block", num); + return -1; + } + + bitmap_set(spapr->irq_map, irq, num); + + return irq + SPAPR_IRQ_MSI; +} + +void spapr_irq_msi_free(SpaprMachineState *spapr, int irq, uint32_t num) +{ + bitmap_clear(spapr->irq_map, irq - SPAPR_IRQ_MSI, num); +} + +int spapr_irq_init_kvm(SpaprInterruptControllerInitKvm fn, + SpaprInterruptController *intc, + uint32_t nr_servers, + Error **errp) +{ + Error *local_err = NULL; + + if (kvm_enabled() && kvm_kernel_irqchip_allowed()) { + if (fn(intc, nr_servers, &local_err) < 0) { + if (kvm_kernel_irqchip_required()) { + error_prepend(&local_err, + "kernel_irqchip requested but unavailable: "); + error_propagate(errp, local_err); + return -1; + } + + /* + * We failed to initialize the KVM device, fallback to + * emulated mode + */ + error_prepend(&local_err, + "kernel_irqchip allowed but unavailable: "); + error_append_hint(&local_err, + "Falling back to kernel-irqchip=off\n"); + warn_report_err(local_err); + } + } + + return 0; +} + +/* + * XICS IRQ backend. + */ + +SpaprIrq spapr_irq_xics = { + .xics = true, + .xive = false, +}; + +/* + * XIVE IRQ backend. + */ + +SpaprIrq spapr_irq_xive = { + .xics = false, + .xive = true, +}; + +/* + * Dual XIVE and XICS IRQ backend. + * + * Both interrupt mode, XIVE and XICS, objects are created but the + * machine starts in legacy interrupt mode (XICS). It can be changed + * by the CAS negotiation process and, in that case, the new mode is + * activated after an extra machine reset. + */ + +/* + * Define values in sync with the XIVE and XICS backend + */ +SpaprIrq spapr_irq_dual = { + .xics = true, + .xive = true, +}; + + +static int spapr_irq_check(SpaprMachineState *spapr, Error **errp) +{ + ERRP_GUARD(); + MachineState *machine = MACHINE(spapr); + + /* + * Sanity checks on non-P9 machines. On these, XIVE is not + * advertised, see spapr_dt_ov5_platform_support() + */ + if (!ppc_type_check_compat(machine->cpu_type, CPU_POWERPC_LOGICAL_3_00, + 0, spapr->max_compat_pvr)) { + /* + * If the 'dual' interrupt mode is selected, force XICS as CAS + * negotiation is useless. + */ + if (spapr->irq == &spapr_irq_dual) { + spapr->irq = &spapr_irq_xics; + return 0; + } + + /* + * Non-P9 machines using only XIVE is a bogus setup. We have two + * scenarios to take into account because of the compat mode: + * + * 1. POWER7/8 machines should fail to init later on when creating + * the XIVE interrupt presenters because a POWER9 exception + * model is required. + + * 2. POWER9 machines using the POWER8 compat mode won't fail and + * will let the OS boot with a partial XIVE setup : DT + * properties but no hcalls. + * + * To cover both and not confuse the OS, add an early failure in + * QEMU. + */ + if (!spapr->irq->xics) { + error_setg(errp, "XIVE-only machines require a POWER9 CPU"); + return -1; + } + } + + /* + * On a POWER9 host, some older KVM XICS devices cannot be destroyed and + * re-created. Same happens with KVM nested guests. Detect that early to + * avoid QEMU to exit later when the guest reboots. + */ + if (kvm_enabled() && + spapr->irq == &spapr_irq_dual && + kvm_kernel_irqchip_required() && + xics_kvm_has_broken_disconnect()) { + error_setg(errp, + "KVM is incompatible with ic-mode=dual,kernel-irqchip=on"); + error_append_hint(errp, + "This can happen with an old KVM or in a KVM nested guest.\n"); + error_append_hint(errp, + "Try without kernel-irqchip or with kernel-irqchip=off.\n"); + return -1; + } + + return 0; +} + +/* + * sPAPR IRQ frontend routines for devices + */ +#define ALL_INTCS(spapr_) \ + { SPAPR_INTC((spapr_)->ics), SPAPR_INTC((spapr_)->xive), } + +int spapr_irq_cpu_intc_create(SpaprMachineState *spapr, + PowerPCCPU *cpu, Error **errp) +{ + SpaprInterruptController *intcs[] = ALL_INTCS(spapr); + int i; + int rc; + + for (i = 0; i < ARRAY_SIZE(intcs); i++) { + SpaprInterruptController *intc = intcs[i]; + if (intc) { + SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc); + rc = sicc->cpu_intc_create(intc, cpu, errp); + if (rc < 0) { + return rc; + } + } + } + + return 0; +} + +void spapr_irq_cpu_intc_reset(SpaprMachineState *spapr, PowerPCCPU *cpu) +{ + SpaprInterruptController *intcs[] = ALL_INTCS(spapr); + int i; + + for (i = 0; i < ARRAY_SIZE(intcs); i++) { + SpaprInterruptController *intc = intcs[i]; + if (intc) { + SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc); + sicc->cpu_intc_reset(intc, cpu); + } + } +} + +void spapr_irq_cpu_intc_destroy(SpaprMachineState *spapr, PowerPCCPU *cpu) +{ + SpaprInterruptController *intcs[] = ALL_INTCS(spapr); + int i; + + for (i = 0; i < ARRAY_SIZE(intcs); i++) { + SpaprInterruptController *intc = intcs[i]; + if (intc) { + SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc); + sicc->cpu_intc_destroy(intc, cpu); + } + } +} + +static void spapr_set_irq(void *opaque, int irq, int level) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(opaque); + SpaprInterruptControllerClass *sicc + = SPAPR_INTC_GET_CLASS(spapr->active_intc); + + sicc->set_irq(spapr->active_intc, irq, level); +} + +void spapr_irq_print_info(SpaprMachineState *spapr, Monitor *mon) +{ + SpaprInterruptControllerClass *sicc + = SPAPR_INTC_GET_CLASS(spapr->active_intc); + + sicc->print_info(spapr->active_intc, mon); +} + +void spapr_irq_dt(SpaprMachineState *spapr, uint32_t nr_servers, + void *fdt, uint32_t phandle) +{ + SpaprInterruptControllerClass *sicc + = SPAPR_INTC_GET_CLASS(spapr->active_intc); + + sicc->dt(spapr->active_intc, nr_servers, fdt, phandle); +} + +uint32_t spapr_irq_nr_msis(SpaprMachineState *spapr) +{ + SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); + + if (smc->legacy_irq_allocation) { + return smc->nr_xirqs; + } else { + return SPAPR_XIRQ_BASE + smc->nr_xirqs - SPAPR_IRQ_MSI; + } +} + +void spapr_irq_init(SpaprMachineState *spapr, Error **errp) +{ + SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); + + if (kvm_enabled() && kvm_kernel_irqchip_split()) { + error_setg(errp, "kernel_irqchip split mode not supported on pseries"); + return; + } + + if (spapr_irq_check(spapr, errp) < 0) { + return; + } + + /* Initialize the MSI IRQ allocator. */ + spapr_irq_msi_init(spapr); + + if (spapr->irq->xics) { + Object *obj; + + obj = object_new(TYPE_ICS_SPAPR); + + object_property_add_child(OBJECT(spapr), "ics", obj); + object_property_set_link(obj, ICS_PROP_XICS, OBJECT(spapr), + &error_abort); + object_property_set_int(obj, "nr-irqs", smc->nr_xirqs, &error_abort); + if (!qdev_realize(DEVICE(obj), NULL, errp)) { + return; + } + + spapr->ics = ICS_SPAPR(obj); + } + + if (spapr->irq->xive) { + uint32_t nr_servers = spapr_max_server_number(spapr); + DeviceState *dev; + int i; + + dev = qdev_new(TYPE_SPAPR_XIVE); + qdev_prop_set_uint32(dev, "nr-irqs", smc->nr_xirqs + SPAPR_XIRQ_BASE); + /* + * 8 XIVE END structures per CPU. One for each available + * priority + */ + qdev_prop_set_uint32(dev, "nr-ends", nr_servers << 3); + object_property_set_link(OBJECT(dev), "xive-fabric", OBJECT(spapr), + &error_abort); + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + + spapr->xive = SPAPR_XIVE(dev); + + /* Enable the CPU IPIs */ + for (i = 0; i < nr_servers; ++i) { + SpaprInterruptControllerClass *sicc + = SPAPR_INTC_GET_CLASS(spapr->xive); + + if (sicc->claim_irq(SPAPR_INTC(spapr->xive), SPAPR_IRQ_IPI + i, + false, errp) < 0) { + return; + } + } + + spapr_xive_hcall_init(spapr); + } + + spapr->qirqs = qemu_allocate_irqs(spapr_set_irq, spapr, + smc->nr_xirqs + SPAPR_XIRQ_BASE); + + /* + * Mostly we don't actually need this until reset, except that not + * having this set up can cause VFIO devices to issue a + * false-positive warning during realize(), because they don't yet + * have an in-kernel irq chip. + */ + spapr_irq_update_active_intc(spapr); +} + +int spapr_irq_claim(SpaprMachineState *spapr, int irq, bool lsi, Error **errp) +{ + SpaprInterruptController *intcs[] = ALL_INTCS(spapr); + int i; + SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); + int rc; + + assert(irq >= SPAPR_XIRQ_BASE); + assert(irq < (smc->nr_xirqs + SPAPR_XIRQ_BASE)); + + for (i = 0; i < ARRAY_SIZE(intcs); i++) { + SpaprInterruptController *intc = intcs[i]; + if (intc) { + SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc); + rc = sicc->claim_irq(intc, irq, lsi, errp); + if (rc < 0) { + return rc; + } + } + } + + return 0; +} + +void spapr_irq_free(SpaprMachineState *spapr, int irq, int num) +{ + SpaprInterruptController *intcs[] = ALL_INTCS(spapr); + int i, j; + SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); + + assert(irq >= SPAPR_XIRQ_BASE); + assert((irq + num) <= (smc->nr_xirqs + SPAPR_XIRQ_BASE)); + + for (i = irq; i < (irq + num); i++) { + for (j = 0; j < ARRAY_SIZE(intcs); j++) { + SpaprInterruptController *intc = intcs[j]; + + if (intc) { + SpaprInterruptControllerClass *sicc + = SPAPR_INTC_GET_CLASS(intc); + sicc->free_irq(intc, i); + } + } + } +} + +qemu_irq spapr_qirq(SpaprMachineState *spapr, int irq) +{ + SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); + + /* + * This interface is basically for VIO and PHB devices to find the + * right qemu_irq to manipulate, so we only allow access to the + * external irqs for now. Currently anything which needs to + * access the IPIs most naturally gets there via the guest side + * interfaces, we can change this if we need to in future. + */ + assert(irq >= SPAPR_XIRQ_BASE); + assert(irq < (smc->nr_xirqs + SPAPR_XIRQ_BASE)); + + if (spapr->ics) { + assert(ics_valid_irq(spapr->ics, irq)); + } + if (spapr->xive) { + assert(irq < spapr->xive->nr_irqs); + assert(xive_eas_is_valid(&spapr->xive->eat[irq])); + } + + return spapr->qirqs[irq]; +} + +int spapr_irq_post_load(SpaprMachineState *spapr, int version_id) +{ + SpaprInterruptControllerClass *sicc; + + spapr_irq_update_active_intc(spapr); + sicc = SPAPR_INTC_GET_CLASS(spapr->active_intc); + return sicc->post_load(spapr->active_intc, version_id); +} + +void spapr_irq_reset(SpaprMachineState *spapr, Error **errp) +{ + assert(!spapr->irq_map || bitmap_empty(spapr->irq_map, spapr->irq_map_nr)); + + spapr_irq_update_active_intc(spapr); +} + +int spapr_irq_get_phandle(SpaprMachineState *spapr, void *fdt, Error **errp) +{ + const char *nodename = "interrupt-controller"; + int offset, phandle; + + offset = fdt_subnode_offset(fdt, 0, nodename); + if (offset < 0) { + error_setg(errp, "Can't find node \"%s\": %s", + nodename, fdt_strerror(offset)); + return -1; + } + + phandle = fdt_get_phandle(fdt, offset); + if (!phandle) { + error_setg(errp, "Can't get phandle of node \"%s\"", nodename); + return -1; + } + + return phandle; +} + +static void set_active_intc(SpaprMachineState *spapr, + SpaprInterruptController *new_intc) +{ + SpaprInterruptControllerClass *sicc; + uint32_t nr_servers = spapr_max_server_number(spapr); + + assert(new_intc); + + if (new_intc == spapr->active_intc) { + /* Nothing to do */ + return; + } + + if (spapr->active_intc) { + sicc = SPAPR_INTC_GET_CLASS(spapr->active_intc); + if (sicc->deactivate) { + sicc->deactivate(spapr->active_intc); + } + } + + sicc = SPAPR_INTC_GET_CLASS(new_intc); + if (sicc->activate) { + sicc->activate(new_intc, nr_servers, &error_fatal); + } + + spapr->active_intc = new_intc; + + /* + * We've changed the kernel irqchip, let VFIO devices know they + * need to readjust. + */ + kvm_irqchip_change_notify(); +} + +void spapr_irq_update_active_intc(SpaprMachineState *spapr) +{ + SpaprInterruptController *new_intc; + + if (!spapr->ics) { + /* + * XXX before we run CAS, ov5_cas is initialized empty, which + * indicates XICS, even if we have ic-mode=xive. TODO: clean + * up the CAS path so that we have a clearer way of handling + * this. + */ + new_intc = SPAPR_INTC(spapr->xive); + } else if (spapr->ov5_cas + && spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) { + new_intc = SPAPR_INTC(spapr->xive); + } else { + new_intc = SPAPR_INTC(spapr->ics); + } + + set_active_intc(spapr, new_intc); +} + +/* + * XICS legacy routines - to deprecate one day + */ + +static int ics_find_free_block(ICSState *ics, int num, int alignnum) +{ + int first, i; + + for (first = 0; first < ics->nr_irqs; first += alignnum) { + if (num > (ics->nr_irqs - first)) { + return -1; + } + for (i = first; i < first + num; ++i) { + if (!ics_irq_free(ics, i)) { + break; + } + } + if (i == (first + num)) { + return first; + } + } + + return -1; +} + +int spapr_irq_find(SpaprMachineState *spapr, int num, bool align, Error **errp) +{ + ICSState *ics = spapr->ics; + int first = -1; + + assert(ics); + + /* + * MSIMesage::data is used for storing VIRQ so + * it has to be aligned to num to support multiple + * MSI vectors. MSI-X is not affected by this. + * The hint is used for the first IRQ, the rest should + * be allocated continuously. + */ + if (align) { + assert((num == 1) || (num == 2) || (num == 4) || + (num == 8) || (num == 16) || (num == 32)); + first = ics_find_free_block(ics, num, num); + } else { + first = ics_find_free_block(ics, num, 1); + } + + if (first < 0) { + error_setg(errp, "can't find a free %d-IRQ block", num); + return -1; + } + + return first + ics->offset; +} + +SpaprIrq spapr_irq_xics_legacy = { + .xics = true, + .xive = false, +}; + +static void spapr_irq_register_types(void) +{ + type_register_static(&spapr_intc_info); +} + +type_init(spapr_irq_register_types) diff --git a/hw/ppc/spapr_numa.c b/hw/ppc/spapr_numa.c new file mode 100644 index 000000000..e9ef7e764 --- /dev/null +++ b/hw/ppc/spapr_numa.c @@ -0,0 +1,697 @@ +/* + * QEMU PowerPC pSeries Logical Partition NUMA associativity handling + * + * Copyright IBM Corp. 2020 + * + * Authors: + * Daniel Henrique Barboza <danielhb413@gmail.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "hw/ppc/spapr_numa.h" +#include "hw/pci-host/spapr.h" +#include "hw/ppc/fdt.h" + +/* Moved from hw/ppc/spapr_pci_nvlink2.c */ +#define SPAPR_GPU_NUMA_ID (cpu_to_be32(1)) + +/* + * Retrieves max_dist_ref_points of the current NUMA affinity. + */ +static int get_max_dist_ref_points(SpaprMachineState *spapr) +{ + if (spapr_ovec_test(spapr->ov5_cas, OV5_FORM2_AFFINITY)) { + return FORM2_DIST_REF_POINTS; + } + + return FORM1_DIST_REF_POINTS; +} + +/* + * Retrieves numa_assoc_size of the current NUMA affinity. + */ +static int get_numa_assoc_size(SpaprMachineState *spapr) +{ + if (spapr_ovec_test(spapr->ov5_cas, OV5_FORM2_AFFINITY)) { + return FORM2_NUMA_ASSOC_SIZE; + } + + return FORM1_NUMA_ASSOC_SIZE; +} + +/* + * Retrieves vcpu_assoc_size of the current NUMA affinity. + * + * vcpu_assoc_size is the size of ibm,associativity array + * for CPUs, which has an extra element (vcpu_id) in the end. + */ +static int get_vcpu_assoc_size(SpaprMachineState *spapr) +{ + return get_numa_assoc_size(spapr) + 1; +} + +/* + * Retrieves the ibm,associativity array of NUMA node 'node_id' + * for the current NUMA affinity. + */ +static const uint32_t *get_associativity(SpaprMachineState *spapr, int node_id) +{ + if (spapr_ovec_test(spapr->ov5_cas, OV5_FORM2_AFFINITY)) { + return spapr->FORM2_assoc_array[node_id]; + } + return spapr->FORM1_assoc_array[node_id]; +} + +/* + * Wrapper that returns node distance from ms->numa_state->nodes + * after handling edge cases where the distance might be absent. + */ +static int get_numa_distance(MachineState *ms, int src, int dst) +{ + NodeInfo *numa_info = ms->numa_state->nodes; + int ret = numa_info[src].distance[dst]; + + if (ret != 0) { + return ret; + } + + /* + * In case QEMU adds a default NUMA single node when the user + * did not add any, or where the user did not supply distances, + * the distance will be absent (zero). Return local/remote + * distance in this case. + */ + if (src == dst) { + return NUMA_DISTANCE_MIN; + } + + return NUMA_DISTANCE_DEFAULT; +} + +static bool spapr_numa_is_symmetrical(MachineState *ms) +{ + int nb_numa_nodes = ms->numa_state->num_nodes; + int src, dst; + + for (src = 0; src < nb_numa_nodes; src++) { + for (dst = src; dst < nb_numa_nodes; dst++) { + if (get_numa_distance(ms, src, dst) != + get_numa_distance(ms, dst, src)) { + return false; + } + } + } + + return true; +} + +/* + * NVLink2-connected GPU RAM needs to be placed on a separate NUMA node. + * We assign a new numa ID per GPU in spapr_pci_collect_nvgpu() which is + * called from vPHB reset handler so we initialize the counter here. + * If no NUMA is configured from the QEMU side, we start from 1 as GPU RAM + * must be equally distant from any other node. + * The final value of spapr->gpu_numa_id is going to be written to + * max-associativity-domains in spapr_build_fdt(). + */ +unsigned int spapr_numa_initial_nvgpu_numa_id(MachineState *machine) +{ + return MAX(1, machine->numa_state->num_nodes); +} + +/* + * This function will translate the user distances into + * what the kernel understand as possible values: 10 + * (local distance), 20, 40, 80 and 160, and return the equivalent + * NUMA level for each. Current heuristic is: + * - local distance (10) returns numa_level = 0x4, meaning there is + * no rounding for local distance + * - distances between 11 and 30 inclusive -> rounded to 20, + * numa_level = 0x3 + * - distances between 31 and 60 inclusive -> rounded to 40, + * numa_level = 0x2 + * - distances between 61 and 120 inclusive -> rounded to 80, + * numa_level = 0x1 + * - everything above 120 returns numa_level = 0 to indicate that + * there is no match. This will be calculated as disntace = 160 + * by the kernel (as of v5.9) + */ +static uint8_t spapr_numa_get_numa_level(uint8_t distance) +{ + if (distance == 10) { + return 0x4; + } else if (distance > 11 && distance <= 30) { + return 0x3; + } else if (distance > 31 && distance <= 60) { + return 0x2; + } else if (distance > 61 && distance <= 120) { + return 0x1; + } + + return 0; +} + +static void spapr_numa_define_FORM1_domains(SpaprMachineState *spapr) +{ + MachineState *ms = MACHINE(spapr); + int nb_numa_nodes = ms->numa_state->num_nodes; + int src, dst, i, j; + + /* + * Fill all associativity domains of non-zero NUMA nodes with + * node_id. This is required because the default value (0) is + * considered a match with associativity domains of node 0. + */ + for (i = 1; i < nb_numa_nodes; i++) { + for (j = 1; j < FORM1_DIST_REF_POINTS; j++) { + spapr->FORM1_assoc_array[i][j] = cpu_to_be32(i); + } + } + + for (src = 0; src < nb_numa_nodes; src++) { + for (dst = src; dst < nb_numa_nodes; dst++) { + /* + * This is how the associativity domain between A and B + * is calculated: + * + * - get the distance D between them + * - get the correspondent NUMA level 'n_level' for D + * - all associativity arrays were initialized with their own + * numa_ids, and we're calculating the distance in node_id + * ascending order, starting from node id 0 (the first node + * retrieved by numa_state). This will have a cascade effect in + * the algorithm because the associativity domains that node 0 + * defines will be carried over to other nodes, and node 1 + * associativities will be carried over after taking node 0 + * associativities into account, and so on. This happens because + * we'll assign assoc_src as the associativity domain of dst + * as well, for all NUMA levels beyond and including n_level. + * + * The PPC kernel expects the associativity domains of node 0 to + * be always 0, and this algorithm will grant that by default. + */ + uint8_t distance = get_numa_distance(ms, src, dst); + uint8_t n_level = spapr_numa_get_numa_level(distance); + uint32_t assoc_src; + + /* + * n_level = 0 means that the distance is greater than our last + * rounded value (120). In this case there is no NUMA level match + * between src and dst and we can skip the remaining of the loop. + * + * The Linux kernel will assume that the distance between src and + * dst, in this case of no match, is 10 (local distance) doubled + * for each NUMA it didn't match. We have FORM1_DIST_REF_POINTS + * levels (4), so this gives us 10*2*2*2*2 = 160. + * + * This logic can be seen in the Linux kernel source code, as of + * v5.9, in arch/powerpc/mm/numa.c, function __node_distance(). + */ + if (n_level == 0) { + continue; + } + + /* + * We must assign all assoc_src to dst, starting from n_level + * and going up to 0x1. + */ + for (i = n_level; i > 0; i--) { + assoc_src = spapr->FORM1_assoc_array[src][i]; + spapr->FORM1_assoc_array[dst][i] = assoc_src; + } + } + } + +} + +static void spapr_numa_FORM1_affinity_check(MachineState *machine) +{ + int i; + + /* + * Check we don't have a memory-less/cpu-less NUMA node + * Firmware relies on the existing memory/cpu topology to provide the + * NUMA topology to the kernel. + * And the linux kernel needs to know the NUMA topology at start + * to be able to hotplug CPUs later. + */ + if (machine->numa_state->num_nodes) { + for (i = 0; i < machine->numa_state->num_nodes; ++i) { + /* check for memory-less node */ + if (machine->numa_state->nodes[i].node_mem == 0) { + CPUState *cs; + int found = 0; + /* check for cpu-less node */ + CPU_FOREACH(cs) { + PowerPCCPU *cpu = POWERPC_CPU(cs); + if (cpu->node_id == i) { + found = 1; + break; + } + } + /* memory-less and cpu-less node */ + if (!found) { + error_report( +"Memory-less/cpu-less nodes are not supported with FORM1 NUMA (node %d)", i); + exit(EXIT_FAILURE); + } + } + } + } + + if (!spapr_numa_is_symmetrical(machine)) { + error_report( +"Asymmetrical NUMA topologies aren't supported in the pSeries machine using FORM1 NUMA"); + exit(EXIT_FAILURE); + } +} + +/* + * Set NUMA machine state data based on FORM1 affinity semantics. + */ +static void spapr_numa_FORM1_affinity_init(SpaprMachineState *spapr, + MachineState *machine) +{ + SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); + int nb_numa_nodes = machine->numa_state->num_nodes; + int i, j, max_nodes_with_gpus; + + /* + * For all associativity arrays: first position is the size, + * position FORM1_DIST_REF_POINTS is always the numa_id, + * represented by the index 'i'. + * + * This will break on sparse NUMA setups, when/if QEMU starts + * to support it, because there will be no more guarantee that + * 'i' will be a valid node_id set by the user. + */ + for (i = 0; i < nb_numa_nodes; i++) { + spapr->FORM1_assoc_array[i][0] = cpu_to_be32(FORM1_DIST_REF_POINTS); + spapr->FORM1_assoc_array[i][FORM1_DIST_REF_POINTS] = cpu_to_be32(i); + } + + /* + * Initialize NVLink GPU associativity arrays. We know that + * the first GPU will take the first available NUMA id, and + * we'll have a maximum of NVGPU_MAX_NUM GPUs in the machine. + * At this point we're not sure if there are GPUs or not, but + * let's initialize the associativity arrays and allow NVLink + * GPUs to be handled like regular NUMA nodes later on. + */ + max_nodes_with_gpus = nb_numa_nodes + NVGPU_MAX_NUM; + + for (i = nb_numa_nodes; i < max_nodes_with_gpus; i++) { + spapr->FORM1_assoc_array[i][0] = cpu_to_be32(FORM1_DIST_REF_POINTS); + + for (j = 1; j < FORM1_DIST_REF_POINTS; j++) { + uint32_t gpu_assoc = smc->pre_5_1_assoc_refpoints ? + SPAPR_GPU_NUMA_ID : cpu_to_be32(i); + spapr->FORM1_assoc_array[i][j] = gpu_assoc; + } + + spapr->FORM1_assoc_array[i][FORM1_DIST_REF_POINTS] = cpu_to_be32(i); + } + + /* + * Guests pseries-5.1 and older uses zeroed associativity domains, + * i.e. no domain definition based on NUMA distance input. + * + * Same thing with guests that have only one NUMA node. + */ + if (smc->pre_5_2_numa_associativity || + machine->numa_state->num_nodes <= 1) { + return; + } + + spapr_numa_define_FORM1_domains(spapr); +} + +/* + * Init NUMA FORM2 machine state data + */ +static void spapr_numa_FORM2_affinity_init(SpaprMachineState *spapr) +{ + int i; + + /* + * For all resources but CPUs, FORM2 associativity arrays will + * be a size 2 array with the following format: + * + * ibm,associativity = {1, numa_id} + * + * CPUs will write an additional 'vcpu_id' on top of the arrays + * being initialized here. 'numa_id' is represented by the + * index 'i' of the loop. + * + * Given that this initialization is also valid for GPU associativity + * arrays, handle everything in one single step by populating the + * arrays up to NUMA_NODES_MAX_NUM. + */ + for (i = 0; i < NUMA_NODES_MAX_NUM; i++) { + spapr->FORM2_assoc_array[i][0] = cpu_to_be32(1); + spapr->FORM2_assoc_array[i][1] = cpu_to_be32(i); + } +} + +void spapr_numa_associativity_init(SpaprMachineState *spapr, + MachineState *machine) +{ + spapr_numa_FORM1_affinity_init(spapr, machine); + spapr_numa_FORM2_affinity_init(spapr); +} + +void spapr_numa_associativity_check(SpaprMachineState *spapr) +{ + /* + * FORM2 does not have any restrictions we need to handle + * at CAS time, for now. + */ + if (spapr_ovec_test(spapr->ov5_cas, OV5_FORM2_AFFINITY)) { + return; + } + + spapr_numa_FORM1_affinity_check(MACHINE(spapr)); +} + +void spapr_numa_write_associativity_dt(SpaprMachineState *spapr, void *fdt, + int offset, int nodeid) +{ + const uint32_t *associativity = get_associativity(spapr, nodeid); + + _FDT((fdt_setprop(fdt, offset, "ibm,associativity", + associativity, + get_numa_assoc_size(spapr) * sizeof(uint32_t)))); +} + +static uint32_t *spapr_numa_get_vcpu_assoc(SpaprMachineState *spapr, + PowerPCCPU *cpu) +{ + const uint32_t *associativity = get_associativity(spapr, cpu->node_id); + int max_distance_ref_points = get_max_dist_ref_points(spapr); + int vcpu_assoc_size = get_vcpu_assoc_size(spapr); + uint32_t *vcpu_assoc = g_new(uint32_t, vcpu_assoc_size); + int index = spapr_get_vcpu_id(cpu); + + /* + * VCPUs have an extra 'cpu_id' value in ibm,associativity + * compared to other resources. Increment the size at index + * 0, put cpu_id last, then copy the remaining associativity + * domains. + */ + vcpu_assoc[0] = cpu_to_be32(max_distance_ref_points + 1); + vcpu_assoc[vcpu_assoc_size - 1] = cpu_to_be32(index); + memcpy(vcpu_assoc + 1, associativity + 1, + (vcpu_assoc_size - 2) * sizeof(uint32_t)); + + return vcpu_assoc; +} + +int spapr_numa_fixup_cpu_dt(SpaprMachineState *spapr, void *fdt, + int offset, PowerPCCPU *cpu) +{ + g_autofree uint32_t *vcpu_assoc = NULL; + int vcpu_assoc_size = get_vcpu_assoc_size(spapr); + + vcpu_assoc = spapr_numa_get_vcpu_assoc(spapr, cpu); + + /* Advertise NUMA via ibm,associativity */ + return fdt_setprop(fdt, offset, "ibm,associativity", vcpu_assoc, + vcpu_assoc_size * sizeof(uint32_t)); +} + + +int spapr_numa_write_assoc_lookup_arrays(SpaprMachineState *spapr, void *fdt, + int offset) +{ + MachineState *machine = MACHINE(spapr); + int max_distance_ref_points = get_max_dist_ref_points(spapr); + int nb_numa_nodes = machine->numa_state->num_nodes; + int nr_nodes = nb_numa_nodes ? nb_numa_nodes : 1; + uint32_t *int_buf, *cur_index, buf_len; + int ret, i; + + /* ibm,associativity-lookup-arrays */ + buf_len = (nr_nodes * max_distance_ref_points + 2) * sizeof(uint32_t); + cur_index = int_buf = g_malloc0(buf_len); + int_buf[0] = cpu_to_be32(nr_nodes); + /* Number of entries per associativity list */ + int_buf[1] = cpu_to_be32(max_distance_ref_points); + cur_index += 2; + for (i = 0; i < nr_nodes; i++) { + /* + * For the lookup-array we use the ibm,associativity array of the + * current NUMA affinity, without the first element (size). + */ + const uint32_t *associativity = get_associativity(spapr, i); + memcpy(cur_index, ++associativity, + sizeof(uint32_t) * max_distance_ref_points); + cur_index += max_distance_ref_points; + } + ret = fdt_setprop(fdt, offset, "ibm,associativity-lookup-arrays", int_buf, + (cur_index - int_buf) * sizeof(uint32_t)); + g_free(int_buf); + + return ret; +} + +static void spapr_numa_FORM1_write_rtas_dt(SpaprMachineState *spapr, + void *fdt, int rtas) +{ + MachineState *ms = MACHINE(spapr); + SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); + uint32_t number_nvgpus_nodes = spapr->gpu_numa_id - + spapr_numa_initial_nvgpu_numa_id(ms); + uint32_t refpoints[] = { + cpu_to_be32(0x4), + cpu_to_be32(0x3), + cpu_to_be32(0x2), + cpu_to_be32(0x1), + }; + uint32_t nr_refpoints = ARRAY_SIZE(refpoints); + uint32_t maxdomain = ms->numa_state->num_nodes + number_nvgpus_nodes; + uint32_t maxdomains[] = { + cpu_to_be32(4), + cpu_to_be32(maxdomain), + cpu_to_be32(maxdomain), + cpu_to_be32(maxdomain), + cpu_to_be32(maxdomain) + }; + + if (smc->pre_5_2_numa_associativity || + ms->numa_state->num_nodes <= 1) { + uint32_t legacy_refpoints[] = { + cpu_to_be32(0x4), + cpu_to_be32(0x4), + cpu_to_be32(0x2), + }; + uint32_t legacy_maxdomain = spapr->gpu_numa_id > 1 ? 1 : 0; + uint32_t legacy_maxdomains[] = { + cpu_to_be32(4), + cpu_to_be32(legacy_maxdomain), + cpu_to_be32(legacy_maxdomain), + cpu_to_be32(legacy_maxdomain), + cpu_to_be32(spapr->gpu_numa_id), + }; + + G_STATIC_ASSERT(sizeof(legacy_refpoints) <= sizeof(refpoints)); + G_STATIC_ASSERT(sizeof(legacy_maxdomains) <= sizeof(maxdomains)); + + nr_refpoints = 3; + + memcpy(refpoints, legacy_refpoints, sizeof(legacy_refpoints)); + memcpy(maxdomains, legacy_maxdomains, sizeof(legacy_maxdomains)); + + /* pseries-5.0 and older reference-points array is {0x4, 0x4} */ + if (smc->pre_5_1_assoc_refpoints) { + nr_refpoints = 2; + } + } + + _FDT(fdt_setprop(fdt, rtas, "ibm,associativity-reference-points", + refpoints, nr_refpoints * sizeof(refpoints[0]))); + + _FDT(fdt_setprop(fdt, rtas, "ibm,max-associativity-domains", + maxdomains, sizeof(maxdomains))); +} + +static void spapr_numa_FORM2_write_rtas_tables(SpaprMachineState *spapr, + void *fdt, int rtas) +{ + MachineState *ms = MACHINE(spapr); + int nb_numa_nodes = ms->numa_state->num_nodes; + int distance_table_entries = nb_numa_nodes * nb_numa_nodes; + g_autofree uint32_t *lookup_index_table = NULL; + g_autofree uint8_t *distance_table = NULL; + int src, dst, i, distance_table_size; + + /* + * ibm,numa-lookup-index-table: array with length and a + * list of NUMA ids present in the guest. + */ + lookup_index_table = g_new0(uint32_t, nb_numa_nodes + 1); + lookup_index_table[0] = cpu_to_be32(nb_numa_nodes); + + for (i = 0; i < nb_numa_nodes; i++) { + lookup_index_table[i + 1] = cpu_to_be32(i); + } + + _FDT(fdt_setprop(fdt, rtas, "ibm,numa-lookup-index-table", + lookup_index_table, + (nb_numa_nodes + 1) * sizeof(uint32_t))); + + /* + * ibm,numa-distance-table: contains all node distances. First + * element is the size of the table as uint32, followed up + * by all the uint8 distances from the first NUMA node, then all + * distances from the second NUMA node and so on. + * + * ibm,numa-lookup-index-table is used by guest to navigate this + * array because NUMA ids can be sparse (node 0 is the first, + * node 8 is the second ...). + */ + distance_table_size = distance_table_entries * sizeof(uint8_t) + + sizeof(uint32_t); + distance_table = g_new0(uint8_t, distance_table_size); + stl_be_p(distance_table, distance_table_entries); + + /* Skip the uint32_t array length at the start */ + i = sizeof(uint32_t); + + for (src = 0; src < nb_numa_nodes; src++) { + for (dst = 0; dst < nb_numa_nodes; dst++) { + distance_table[i++] = get_numa_distance(ms, src, dst); + } + } + + _FDT(fdt_setprop(fdt, rtas, "ibm,numa-distance-table", + distance_table, distance_table_size)); +} + +/* + * This helper could be compressed in a single function with + * FORM1 logic since we're setting the same DT values, with the + * difference being a call to spapr_numa_FORM2_write_rtas_tables() + * in the end. The separation was made to avoid clogging FORM1 code + * which already has to deal with compat modes from previous + * QEMU machine types. + */ +static void spapr_numa_FORM2_write_rtas_dt(SpaprMachineState *spapr, + void *fdt, int rtas) +{ + MachineState *ms = MACHINE(spapr); + uint32_t number_nvgpus_nodes = spapr->gpu_numa_id - + spapr_numa_initial_nvgpu_numa_id(ms); + + /* + * In FORM2, ibm,associativity-reference-points will point to + * the element in the ibm,associativity array that contains the + * primary domain index (for FORM2, the first element). + * + * This value (in our case, the numa-id) is then used as an index + * to retrieve all other attributes of the node (distance, + * bandwidth, latency) via ibm,numa-lookup-index-table and other + * ibm,numa-*-table properties. + */ + uint32_t refpoints[] = { cpu_to_be32(1) }; + + uint32_t maxdomain = ms->numa_state->num_nodes + number_nvgpus_nodes; + uint32_t maxdomains[] = { cpu_to_be32(1), cpu_to_be32(maxdomain) }; + + _FDT(fdt_setprop(fdt, rtas, "ibm,associativity-reference-points", + refpoints, sizeof(refpoints))); + + _FDT(fdt_setprop(fdt, rtas, "ibm,max-associativity-domains", + maxdomains, sizeof(maxdomains))); + + spapr_numa_FORM2_write_rtas_tables(spapr, fdt, rtas); +} + +/* + * Helper that writes ibm,associativity-reference-points and + * max-associativity-domains in the RTAS pointed by @rtas + * in the DT @fdt. + */ +void spapr_numa_write_rtas_dt(SpaprMachineState *spapr, void *fdt, int rtas) +{ + if (spapr_ovec_test(spapr->ov5_cas, OV5_FORM2_AFFINITY)) { + spapr_numa_FORM2_write_rtas_dt(spapr, fdt, rtas); + return; + } + + spapr_numa_FORM1_write_rtas_dt(spapr, fdt, rtas); +} + +static target_ulong h_home_node_associativity(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + g_autofree uint32_t *vcpu_assoc = NULL; + target_ulong flags = args[0]; + target_ulong procno = args[1]; + PowerPCCPU *tcpu; + int idx, assoc_idx; + int vcpu_assoc_size = get_vcpu_assoc_size(spapr); + + /* only support procno from H_REGISTER_VPA */ + if (flags != 0x1) { + return H_FUNCTION; + } + + tcpu = spapr_find_cpu(procno); + if (tcpu == NULL) { + return H_P2; + } + + /* + * Given that we want to be flexible with the sizes and indexes, + * we must consider that there is a hard limit of how many + * associativities domain we can fit in R4 up to R9, which would be + * 12 associativity domains for vcpus. Assert and bail if that's + * not the case. + */ + g_assert((vcpu_assoc_size - 1) <= 12); + + vcpu_assoc = spapr_numa_get_vcpu_assoc(spapr, tcpu); + /* assoc_idx starts at 1 to skip associativity size */ + assoc_idx = 1; + +#define ASSOCIATIVITY(a, b) (((uint64_t)(a) << 32) | \ + ((uint64_t)(b) & 0xffffffff)) + + for (idx = 0; idx < 6; idx++) { + int32_t a, b; + + /* + * vcpu_assoc[] will contain the associativity domains for tcpu, + * including tcpu->node_id and procno, meaning that we don't + * need to use these variables here. + * + * We'll read 2 values at a time to fill up the ASSOCIATIVITY() + * macro. The ternary will fill the remaining registers with -1 + * after we went through vcpu_assoc[]. + */ + a = assoc_idx < vcpu_assoc_size ? + be32_to_cpu(vcpu_assoc[assoc_idx++]) : -1; + b = assoc_idx < vcpu_assoc_size ? + be32_to_cpu(vcpu_assoc[assoc_idx++]) : -1; + + args[idx] = ASSOCIATIVITY(a, b); + } +#undef ASSOCIATIVITY + + return H_SUCCESS; +} + +static void spapr_numa_register_types(void) +{ + /* Virtual Processor Home Node */ + spapr_register_hypercall(H_HOME_NODE_ASSOCIATIVITY, + h_home_node_associativity); +} + +type_init(spapr_numa_register_types) diff --git a/hw/ppc/spapr_nvdimm.c b/hw/ppc/spapr_nvdimm.c new file mode 100644 index 000000000..91de1052f --- /dev/null +++ b/hw/ppc/spapr_nvdimm.c @@ -0,0 +1,528 @@ +/* + * QEMU PAPR Storage Class Memory Interfaces + * + * Copyright (c) 2019-2020, IBM Corporation. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "hw/ppc/spapr_drc.h" +#include "hw/ppc/spapr_nvdimm.h" +#include "hw/mem/nvdimm.h" +#include "qemu/nvdimm-utils.h" +#include "hw/ppc/fdt.h" +#include "qemu/range.h" +#include "hw/ppc/spapr_numa.h" + +/* DIMM health bitmap bitmap indicators. Taken from kernel's papr_scm.c */ +/* SCM device is unable to persist memory contents */ +#define PAPR_PMEM_UNARMED PPC_BIT(0) + +/* + * The nvdimm size should be aligned to SCM block size. + * The SCM block size should be aligned to SPAPR_MEMORY_BLOCK_SIZE + * in order to have SCM regions not to overlap with dimm memory regions. + * The SCM devices can have variable block sizes. For now, fixing the + * block size to the minimum value. + */ +#define SPAPR_MINIMUM_SCM_BLOCK_SIZE SPAPR_MEMORY_BLOCK_SIZE + +/* Have an explicit check for alignment */ +QEMU_BUILD_BUG_ON(SPAPR_MINIMUM_SCM_BLOCK_SIZE % SPAPR_MEMORY_BLOCK_SIZE); + +bool spapr_nvdimm_validate(HotplugHandler *hotplug_dev, NVDIMMDevice *nvdimm, + uint64_t size, Error **errp) +{ + const MachineClass *mc = MACHINE_GET_CLASS(hotplug_dev); + const MachineState *ms = MACHINE(hotplug_dev); + g_autofree char *uuidstr = NULL; + QemuUUID uuid; + int ret; + + if (!mc->nvdimm_supported) { + error_setg(errp, "NVDIMM hotplug not supported for this machine"); + return false; + } + + if (!ms->nvdimms_state->is_enabled) { + error_setg(errp, "nvdimm device found but 'nvdimm=off' was set"); + return false; + } + + if (object_property_get_int(OBJECT(nvdimm), NVDIMM_LABEL_SIZE_PROP, + &error_abort) == 0) { + error_setg(errp, "PAPR requires NVDIMM devices to have label-size set"); + return false; + } + + if (size % SPAPR_MINIMUM_SCM_BLOCK_SIZE) { + error_setg(errp, "PAPR requires NVDIMM memory size (excluding label)" + " to be a multiple of %" PRIu64 "MB", + SPAPR_MINIMUM_SCM_BLOCK_SIZE / MiB); + return false; + } + + uuidstr = object_property_get_str(OBJECT(nvdimm), NVDIMM_UUID_PROP, + &error_abort); + ret = qemu_uuid_parse(uuidstr, &uuid); + g_assert(!ret); + + if (qemu_uuid_is_null(&uuid)) { + error_setg(errp, "NVDIMM device requires the uuid to be set"); + return false; + } + + return true; +} + + +void spapr_add_nvdimm(DeviceState *dev, uint64_t slot) +{ + SpaprDrc *drc; + bool hotplugged = spapr_drc_hotplugged(dev); + + drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PMEM, slot); + g_assert(drc); + + /* + * pc_dimm_get_free_slot() provided a free slot at pre-plug. The + * corresponding DRC is thus assumed to be attachable. + */ + spapr_drc_attach(drc, dev); + + if (hotplugged) { + spapr_hotplug_req_add_by_index(drc); + } +} + +static int spapr_dt_nvdimm(SpaprMachineState *spapr, void *fdt, + int parent_offset, NVDIMMDevice *nvdimm) +{ + int child_offset; + char *buf; + SpaprDrc *drc; + uint32_t drc_idx; + uint32_t node = object_property_get_uint(OBJECT(nvdimm), PC_DIMM_NODE_PROP, + &error_abort); + uint64_t slot = object_property_get_uint(OBJECT(nvdimm), PC_DIMM_SLOT_PROP, + &error_abort); + uint64_t lsize = nvdimm->label_size; + uint64_t size = object_property_get_int(OBJECT(nvdimm), PC_DIMM_SIZE_PROP, + NULL); + + drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PMEM, slot); + g_assert(drc); + + drc_idx = spapr_drc_index(drc); + + buf = g_strdup_printf("ibm,pmemory@%x", drc_idx); + child_offset = fdt_add_subnode(fdt, parent_offset, buf); + g_free(buf); + + _FDT(child_offset); + + _FDT((fdt_setprop_cell(fdt, child_offset, "reg", drc_idx))); + _FDT((fdt_setprop_string(fdt, child_offset, "compatible", "ibm,pmemory"))); + _FDT((fdt_setprop_string(fdt, child_offset, "device_type", "ibm,pmemory"))); + + spapr_numa_write_associativity_dt(spapr, fdt, child_offset, node); + + buf = qemu_uuid_unparse_strdup(&nvdimm->uuid); + _FDT((fdt_setprop_string(fdt, child_offset, "ibm,unit-guid", buf))); + g_free(buf); + + _FDT((fdt_setprop_cell(fdt, child_offset, "ibm,my-drc-index", drc_idx))); + + _FDT((fdt_setprop_u64(fdt, child_offset, "ibm,block-size", + SPAPR_MINIMUM_SCM_BLOCK_SIZE))); + _FDT((fdt_setprop_u64(fdt, child_offset, "ibm,number-of-blocks", + size / SPAPR_MINIMUM_SCM_BLOCK_SIZE))); + _FDT((fdt_setprop_cell(fdt, child_offset, "ibm,metadata-size", lsize))); + + _FDT((fdt_setprop_string(fdt, child_offset, "ibm,pmem-application", + "operating-system"))); + _FDT(fdt_setprop(fdt, child_offset, "ibm,cache-flush-required", NULL, 0)); + + return child_offset; +} + +int spapr_pmem_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr, + void *fdt, int *fdt_start_offset, Error **errp) +{ + NVDIMMDevice *nvdimm = NVDIMM(drc->dev); + + *fdt_start_offset = spapr_dt_nvdimm(spapr, fdt, 0, nvdimm); + + return 0; +} + +void spapr_dt_persistent_memory(SpaprMachineState *spapr, void *fdt) +{ + int offset = fdt_subnode_offset(fdt, 0, "ibm,persistent-memory"); + GSList *iter, *nvdimms = nvdimm_get_device_list(); + + if (offset < 0) { + offset = fdt_add_subnode(fdt, 0, "ibm,persistent-memory"); + _FDT(offset); + _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 0x1))); + _FDT((fdt_setprop_cell(fdt, offset, "#size-cells", 0x0))); + _FDT((fdt_setprop_string(fdt, offset, "device_type", + "ibm,persistent-memory"))); + } + + /* Create DT entries for cold plugged NVDIMM devices */ + for (iter = nvdimms; iter; iter = iter->next) { + NVDIMMDevice *nvdimm = iter->data; + + spapr_dt_nvdimm(spapr, fdt, offset, nvdimm); + } + g_slist_free(nvdimms); + + return; +} + +static target_ulong h_scm_read_metadata(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + uint32_t drc_index = args[0]; + uint64_t offset = args[1]; + uint64_t len = args[2]; + SpaprDrc *drc = spapr_drc_by_index(drc_index); + NVDIMMDevice *nvdimm; + NVDIMMClass *ddc; + uint64_t data = 0; + uint8_t buf[8] = { 0 }; + + if (!drc || !drc->dev || + spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) { + return H_PARAMETER; + } + + if (len != 1 && len != 2 && + len != 4 && len != 8) { + return H_P3; + } + + nvdimm = NVDIMM(drc->dev); + if ((offset + len < offset) || + (nvdimm->label_size < len + offset)) { + return H_P2; + } + + ddc = NVDIMM_GET_CLASS(nvdimm); + ddc->read_label_data(nvdimm, buf, len, offset); + + switch (len) { + case 1: + data = ldub_p(buf); + break; + case 2: + data = lduw_be_p(buf); + break; + case 4: + data = ldl_be_p(buf); + break; + case 8: + data = ldq_be_p(buf); + break; + default: + g_assert_not_reached(); + } + + args[0] = data; + + return H_SUCCESS; +} + +static target_ulong h_scm_write_metadata(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + uint32_t drc_index = args[0]; + uint64_t offset = args[1]; + uint64_t data = args[2]; + uint64_t len = args[3]; + SpaprDrc *drc = spapr_drc_by_index(drc_index); + NVDIMMDevice *nvdimm; + NVDIMMClass *ddc; + uint8_t buf[8] = { 0 }; + + if (!drc || !drc->dev || + spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) { + return H_PARAMETER; + } + + if (len != 1 && len != 2 && + len != 4 && len != 8) { + return H_P4; + } + + nvdimm = NVDIMM(drc->dev); + if ((offset + len < offset) || + (nvdimm->label_size < len + offset)) { + return H_P2; + } + + switch (len) { + case 1: + if (data & 0xffffffffffffff00) { + return H_P2; + } + stb_p(buf, data); + break; + case 2: + if (data & 0xffffffffffff0000) { + return H_P2; + } + stw_be_p(buf, data); + break; + case 4: + if (data & 0xffffffff00000000) { + return H_P2; + } + stl_be_p(buf, data); + break; + case 8: + stq_be_p(buf, data); + break; + default: + g_assert_not_reached(); + } + + ddc = NVDIMM_GET_CLASS(nvdimm); + ddc->write_label_data(nvdimm, buf, len, offset); + + return H_SUCCESS; +} + +static target_ulong h_scm_bind_mem(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + uint32_t drc_index = args[0]; + uint64_t starting_idx = args[1]; + uint64_t no_of_scm_blocks_to_bind = args[2]; + uint64_t target_logical_mem_addr = args[3]; + uint64_t continue_token = args[4]; + uint64_t size; + uint64_t total_no_of_scm_blocks; + SpaprDrc *drc = spapr_drc_by_index(drc_index); + hwaddr addr; + NVDIMMDevice *nvdimm; + + if (!drc || !drc->dev || + spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) { + return H_PARAMETER; + } + + /* + * Currently continue token should be zero qemu has already bound + * everything and this hcall doesnt return H_BUSY. + */ + if (continue_token > 0) { + return H_P5; + } + + /* Currently qemu assigns the address. */ + if (target_logical_mem_addr != 0xffffffffffffffff) { + return H_OVERLAP; + } + + nvdimm = NVDIMM(drc->dev); + + size = object_property_get_uint(OBJECT(nvdimm), + PC_DIMM_SIZE_PROP, &error_abort); + + total_no_of_scm_blocks = size / SPAPR_MINIMUM_SCM_BLOCK_SIZE; + + if (starting_idx > total_no_of_scm_blocks) { + return H_P2; + } + + if (((starting_idx + no_of_scm_blocks_to_bind) < starting_idx) || + ((starting_idx + no_of_scm_blocks_to_bind) > total_no_of_scm_blocks)) { + return H_P3; + } + + addr = object_property_get_uint(OBJECT(nvdimm), + PC_DIMM_ADDR_PROP, &error_abort); + + addr += starting_idx * SPAPR_MINIMUM_SCM_BLOCK_SIZE; + + /* Already bound, Return target logical address in R5 */ + args[1] = addr; + args[2] = no_of_scm_blocks_to_bind; + + return H_SUCCESS; +} + +static target_ulong h_scm_unbind_mem(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + uint32_t drc_index = args[0]; + uint64_t starting_scm_logical_addr = args[1]; + uint64_t no_of_scm_blocks_to_unbind = args[2]; + uint64_t continue_token = args[3]; + uint64_t size_to_unbind; + Range blockrange = range_empty; + Range nvdimmrange = range_empty; + SpaprDrc *drc = spapr_drc_by_index(drc_index); + NVDIMMDevice *nvdimm; + uint64_t size, addr; + + if (!drc || !drc->dev || + spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) { + return H_PARAMETER; + } + + /* continue_token should be zero as this hcall doesn't return H_BUSY. */ + if (continue_token > 0) { + return H_P4; + } + + /* Check if starting_scm_logical_addr is block aligned */ + if (!QEMU_IS_ALIGNED(starting_scm_logical_addr, + SPAPR_MINIMUM_SCM_BLOCK_SIZE)) { + return H_P2; + } + + size_to_unbind = no_of_scm_blocks_to_unbind * SPAPR_MINIMUM_SCM_BLOCK_SIZE; + if (no_of_scm_blocks_to_unbind == 0 || no_of_scm_blocks_to_unbind != + size_to_unbind / SPAPR_MINIMUM_SCM_BLOCK_SIZE) { + return H_P3; + } + + nvdimm = NVDIMM(drc->dev); + size = object_property_get_int(OBJECT(nvdimm), PC_DIMM_SIZE_PROP, + &error_abort); + addr = object_property_get_int(OBJECT(nvdimm), PC_DIMM_ADDR_PROP, + &error_abort); + + range_init_nofail(&nvdimmrange, addr, size); + range_init_nofail(&blockrange, starting_scm_logical_addr, size_to_unbind); + + if (!range_contains_range(&nvdimmrange, &blockrange)) { + return H_P3; + } + + args[1] = no_of_scm_blocks_to_unbind; + + /* let unplug take care of actual unbind */ + return H_SUCCESS; +} + +#define H_UNBIND_SCOPE_ALL 0x1 +#define H_UNBIND_SCOPE_DRC 0x2 + +static target_ulong h_scm_unbind_all(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + uint64_t target_scope = args[0]; + uint32_t drc_index = args[1]; + uint64_t continue_token = args[2]; + NVDIMMDevice *nvdimm; + uint64_t size; + uint64_t no_of_scm_blocks_unbound = 0; + + /* continue_token should be zero as this hcall doesn't return H_BUSY. */ + if (continue_token > 0) { + return H_P4; + } + + if (target_scope == H_UNBIND_SCOPE_DRC) { + SpaprDrc *drc = spapr_drc_by_index(drc_index); + + if (!drc || !drc->dev || + spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) { + return H_P2; + } + + nvdimm = NVDIMM(drc->dev); + size = object_property_get_int(OBJECT(nvdimm), PC_DIMM_SIZE_PROP, + &error_abort); + + no_of_scm_blocks_unbound = size / SPAPR_MINIMUM_SCM_BLOCK_SIZE; + } else if (target_scope == H_UNBIND_SCOPE_ALL) { + GSList *list, *nvdimms; + + nvdimms = nvdimm_get_device_list(); + for (list = nvdimms; list; list = list->next) { + nvdimm = list->data; + size = object_property_get_int(OBJECT(nvdimm), PC_DIMM_SIZE_PROP, + &error_abort); + + no_of_scm_blocks_unbound += size / SPAPR_MINIMUM_SCM_BLOCK_SIZE; + } + g_slist_free(nvdimms); + } else { + return H_PARAMETER; + } + + args[1] = no_of_scm_blocks_unbound; + + /* let unplug take care of actual unbind */ + return H_SUCCESS; +} + +static target_ulong h_scm_health(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + + NVDIMMDevice *nvdimm; + uint64_t hbitmap = 0; + uint32_t drc_index = args[0]; + SpaprDrc *drc = spapr_drc_by_index(drc_index); + const uint64_t hbitmap_mask = PAPR_PMEM_UNARMED; + + + /* Ensure that the drc is valid & is valid PMEM dimm and is plugged in */ + if (!drc || !drc->dev || + spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) { + return H_PARAMETER; + } + + nvdimm = NVDIMM(drc->dev); + + /* Update if the nvdimm is unarmed and send its status via health bitmaps */ + if (object_property_get_bool(OBJECT(nvdimm), NVDIMM_UNARMED_PROP, NULL)) { + hbitmap |= PAPR_PMEM_UNARMED; + } + + /* Update the out args with health bitmap/mask */ + args[0] = hbitmap; + args[1] = hbitmap_mask; + + return H_SUCCESS; +} + +static void spapr_scm_register_types(void) +{ + /* qemu/scm specific hcalls */ + spapr_register_hypercall(H_SCM_READ_METADATA, h_scm_read_metadata); + spapr_register_hypercall(H_SCM_WRITE_METADATA, h_scm_write_metadata); + spapr_register_hypercall(H_SCM_BIND_MEM, h_scm_bind_mem); + spapr_register_hypercall(H_SCM_UNBIND_MEM, h_scm_unbind_mem); + spapr_register_hypercall(H_SCM_UNBIND_ALL, h_scm_unbind_all); + spapr_register_hypercall(H_SCM_HEALTH, h_scm_health); +} + +type_init(spapr_scm_register_types) diff --git a/hw/ppc/spapr_ovec.c b/hw/ppc/spapr_ovec.c new file mode 100644 index 000000000..b2567caa5 --- /dev/null +++ b/hw/ppc/spapr_ovec.c @@ -0,0 +1,241 @@ +/* + * QEMU SPAPR Architecture Option Vector Helper Functions + * + * Copyright IBM Corp. 2016 + * + * Authors: + * Bharata B Rao <bharata@linux.vnet.ibm.com> + * Michael Roth <mdroth@linux.vnet.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "hw/ppc/spapr_ovec.h" +#include "migration/vmstate.h" +#include "qemu/bitmap.h" +#include "exec/address-spaces.h" +#include "qemu/error-report.h" +#include "trace.h" +#include <libfdt.h> + +#define OV_MAXBYTES 256 /* not including length byte */ +#define OV_MAXBITS (OV_MAXBYTES * BITS_PER_BYTE) + +/* we *could* work with bitmaps directly, but handling the bitmap privately + * allows us to more safely make assumptions about the bitmap size and + * simplify the calling code somewhat + */ +struct SpaprOptionVector { + unsigned long *bitmap; + int32_t bitmap_size; /* only used for migration */ +}; + +const VMStateDescription vmstate_spapr_ovec = { + .name = "spapr_option_vector", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_BITMAP(bitmap, SpaprOptionVector, 1, bitmap_size), + VMSTATE_END_OF_LIST() + } +}; + +SpaprOptionVector *spapr_ovec_new(void) +{ + SpaprOptionVector *ov; + + ov = g_new0(SpaprOptionVector, 1); + ov->bitmap = bitmap_new(OV_MAXBITS); + ov->bitmap_size = OV_MAXBITS; + + return ov; +} + +SpaprOptionVector *spapr_ovec_clone(SpaprOptionVector *ov_orig) +{ + SpaprOptionVector *ov; + + g_assert(ov_orig); + + ov = spapr_ovec_new(); + bitmap_copy(ov->bitmap, ov_orig->bitmap, OV_MAXBITS); + + return ov; +} + +void spapr_ovec_intersect(SpaprOptionVector *ov, + SpaprOptionVector *ov1, + SpaprOptionVector *ov2) +{ + g_assert(ov); + g_assert(ov1); + g_assert(ov2); + + bitmap_and(ov->bitmap, ov1->bitmap, ov2->bitmap, OV_MAXBITS); +} + +/* returns true if ov1 has a subset of bits in ov2 */ +bool spapr_ovec_subset(SpaprOptionVector *ov1, SpaprOptionVector *ov2) +{ + unsigned long *tmp = bitmap_new(OV_MAXBITS); + bool result; + + g_assert(ov1); + g_assert(ov2); + + bitmap_andnot(tmp, ov1->bitmap, ov2->bitmap, OV_MAXBITS); + result = bitmap_empty(tmp, OV_MAXBITS); + + g_free(tmp); + + return result; +} + +void spapr_ovec_cleanup(SpaprOptionVector *ov) +{ + if (ov) { + g_free(ov->bitmap); + g_free(ov); + } +} + +void spapr_ovec_set(SpaprOptionVector *ov, long bitnr) +{ + g_assert(ov); + g_assert(bitnr < OV_MAXBITS); + + set_bit(bitnr, ov->bitmap); +} + +void spapr_ovec_clear(SpaprOptionVector *ov, long bitnr) +{ + g_assert(ov); + g_assert(bitnr < OV_MAXBITS); + + clear_bit(bitnr, ov->bitmap); +} + +bool spapr_ovec_test(SpaprOptionVector *ov, long bitnr) +{ + g_assert(ov); + g_assert(bitnr < OV_MAXBITS); + + return test_bit(bitnr, ov->bitmap) ? true : false; +} + +bool spapr_ovec_empty(SpaprOptionVector *ov) +{ + g_assert(ov); + + return bitmap_empty(ov->bitmap, OV_MAXBITS); +} + +static void guest_byte_to_bitmap(uint8_t entry, unsigned long *bitmap, + long bitmap_offset) +{ + int i; + + for (i = 0; i < BITS_PER_BYTE; i++) { + if (entry & (1 << (BITS_PER_BYTE - 1 - i))) { + bitmap_set(bitmap, bitmap_offset + i, 1); + } + } +} + +static uint8_t guest_byte_from_bitmap(unsigned long *bitmap, long bitmap_offset) +{ + uint8_t entry = 0; + int i; + + for (i = 0; i < BITS_PER_BYTE; i++) { + if (test_bit(bitmap_offset + i, bitmap)) { + entry |= (1 << (BITS_PER_BYTE - 1 - i)); + } + } + + return entry; +} + +static target_ulong vector_addr(target_ulong table_addr, int vector) +{ + uint16_t vector_count, vector_len; + int i; + + vector_count = ldub_phys(&address_space_memory, table_addr) + 1; + if (vector > vector_count) { + return 0; + } + table_addr++; /* skip nr option vectors */ + + for (i = 0; i < vector - 1; i++) { + vector_len = ldub_phys(&address_space_memory, table_addr) + 1; + table_addr += vector_len + 1; /* bit-vector + length byte */ + } + return table_addr; +} + +SpaprOptionVector *spapr_ovec_parse_vector(target_ulong table_addr, int vector) +{ + SpaprOptionVector *ov; + target_ulong addr; + uint16_t vector_len; + int i; + + g_assert(table_addr); + g_assert(vector >= 1); /* vector numbering starts at 1 */ + + addr = vector_addr(table_addr, vector); + if (!addr) { + /* specified vector isn't present */ + return NULL; + } + + vector_len = ldub_phys(&address_space_memory, addr++) + 1; + g_assert(vector_len <= OV_MAXBYTES); + ov = spapr_ovec_new(); + + for (i = 0; i < vector_len; i++) { + uint8_t entry = ldub_phys(&address_space_memory, addr + i); + if (entry) { + trace_spapr_ovec_parse_vector(vector, i + 1, vector_len, entry); + guest_byte_to_bitmap(entry, ov->bitmap, i * BITS_PER_BYTE); + } + } + + return ov; +} + +int spapr_dt_ovec(void *fdt, int fdt_offset, + SpaprOptionVector *ov, const char *name) +{ + uint8_t vec[OV_MAXBYTES + 1]; + uint16_t vec_len; + unsigned long lastbit; + int i; + + g_assert(ov); + + lastbit = find_last_bit(ov->bitmap, OV_MAXBITS); + /* if no bits are set, include at least 1 byte of the vector so we can + * still encoded this in the device tree while abiding by the same + * encoding/sizing expected in ibm,client-architecture-support + */ + vec_len = (lastbit == OV_MAXBITS) ? 1 : lastbit / BITS_PER_BYTE + 1; + g_assert(vec_len <= OV_MAXBYTES); + /* guest expects vector len encoded as vec_len - 1, since the length byte + * is assumed and not included, and the first byte of the vector + * is assumed as well + */ + vec[0] = vec_len - 1; + + for (i = 1; i < vec_len + 1; i++) { + vec[i] = guest_byte_from_bitmap(ov->bitmap, (i - 1) * BITS_PER_BYTE); + if (vec[i]) { + trace_spapr_ovec_populate_dt(i, vec_len, vec[i]); + } + } + + return fdt_setprop(fdt, fdt_offset, name, vec, vec_len + 1); +} diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c new file mode 100644 index 000000000..5bfd4aa9e --- /dev/null +++ b/hw/ppc/spapr_pci.c @@ -0,0 +1,2530 @@ +/* + * QEMU sPAPR PCI host originated from Uninorth PCI host + * + * Copyright (c) 2011 Alexey Kardashevskiy, IBM Corporation. + * Copyright (C) 2011 David Gibson, IBM Corporation. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "hw/irq.h" +#include "hw/sysbus.h" +#include "migration/vmstate.h" +#include "hw/pci/pci.h" +#include "hw/pci/msi.h" +#include "hw/pci/msix.h" +#include "hw/pci/pci_host.h" +#include "hw/ppc/spapr.h" +#include "hw/pci-host/spapr.h" +#include "exec/ram_addr.h" +#include <libfdt.h> +#include "trace.h" +#include "qemu/error-report.h" +#include "qemu/module.h" +#include "qapi/qmp/qerror.h" +#include "hw/ppc/fdt.h" +#include "hw/pci/pci_bridge.h" +#include "hw/pci/pci_bus.h" +#include "hw/pci/pci_ids.h" +#include "hw/ppc/spapr_drc.h" +#include "hw/qdev-properties.h" +#include "sysemu/device_tree.h" +#include "sysemu/kvm.h" +#include "sysemu/hostmem.h" +#include "sysemu/numa.h" +#include "hw/ppc/spapr_numa.h" +#include "qemu/log.h" + +/* Copied from the kernel arch/powerpc/platforms/pseries/msi.c */ +#define RTAS_QUERY_FN 0 +#define RTAS_CHANGE_FN 1 +#define RTAS_RESET_FN 2 +#define RTAS_CHANGE_MSI_FN 3 +#define RTAS_CHANGE_MSIX_FN 4 + +/* Interrupt types to return on RTAS_CHANGE_* */ +#define RTAS_TYPE_MSI 1 +#define RTAS_TYPE_MSIX 2 + +SpaprPhbState *spapr_pci_find_phb(SpaprMachineState *spapr, uint64_t buid) +{ + SpaprPhbState *sphb; + + QLIST_FOREACH(sphb, &spapr->phbs, list) { + if (sphb->buid != buid) { + continue; + } + return sphb; + } + + return NULL; +} + +PCIDevice *spapr_pci_find_dev(SpaprMachineState *spapr, uint64_t buid, + uint32_t config_addr) +{ + SpaprPhbState *sphb = spapr_pci_find_phb(spapr, buid); + PCIHostState *phb = PCI_HOST_BRIDGE(sphb); + int bus_num = (config_addr >> 16) & 0xFF; + int devfn = (config_addr >> 8) & 0xFF; + + if (!phb) { + return NULL; + } + + return pci_find_device(phb->bus, bus_num, devfn); +} + +static uint32_t rtas_pci_cfgaddr(uint32_t arg) +{ + /* This handles the encoding of extended config space addresses */ + return ((arg >> 20) & 0xf00) | (arg & 0xff); +} + +static void finish_read_pci_config(SpaprMachineState *spapr, uint64_t buid, + uint32_t addr, uint32_t size, + target_ulong rets) +{ + PCIDevice *pci_dev; + uint32_t val; + + if ((size != 1) && (size != 2) && (size != 4)) { + /* access must be 1, 2 or 4 bytes */ + rtas_st(rets, 0, RTAS_OUT_HW_ERROR); + return; + } + + pci_dev = spapr_pci_find_dev(spapr, buid, addr); + addr = rtas_pci_cfgaddr(addr); + + if (!pci_dev || (addr % size) || (addr >= pci_config_size(pci_dev))) { + /* Access must be to a valid device, within bounds and + * naturally aligned */ + rtas_st(rets, 0, RTAS_OUT_HW_ERROR); + return; + } + + val = pci_host_config_read_common(pci_dev, addr, + pci_config_size(pci_dev), size); + + rtas_st(rets, 0, RTAS_OUT_SUCCESS); + rtas_st(rets, 1, val); +} + +static void rtas_ibm_read_pci_config(PowerPCCPU *cpu, SpaprMachineState *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, + uint32_t nret, target_ulong rets) +{ + uint64_t buid; + uint32_t size, addr; + + if ((nargs != 4) || (nret != 2)) { + rtas_st(rets, 0, RTAS_OUT_HW_ERROR); + return; + } + + buid = rtas_ldq(args, 1); + size = rtas_ld(args, 3); + addr = rtas_ld(args, 0); + + finish_read_pci_config(spapr, buid, addr, size, rets); +} + +static void rtas_read_pci_config(PowerPCCPU *cpu, SpaprMachineState *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, + uint32_t nret, target_ulong rets) +{ + uint32_t size, addr; + + if ((nargs != 2) || (nret != 2)) { + rtas_st(rets, 0, RTAS_OUT_HW_ERROR); + return; + } + + size = rtas_ld(args, 1); + addr = rtas_ld(args, 0); + + finish_read_pci_config(spapr, 0, addr, size, rets); +} + +static void finish_write_pci_config(SpaprMachineState *spapr, uint64_t buid, + uint32_t addr, uint32_t size, + uint32_t val, target_ulong rets) +{ + PCIDevice *pci_dev; + + if ((size != 1) && (size != 2) && (size != 4)) { + /* access must be 1, 2 or 4 bytes */ + rtas_st(rets, 0, RTAS_OUT_HW_ERROR); + return; + } + + pci_dev = spapr_pci_find_dev(spapr, buid, addr); + addr = rtas_pci_cfgaddr(addr); + + if (!pci_dev || (addr % size) || (addr >= pci_config_size(pci_dev))) { + /* Access must be to a valid device, within bounds and + * naturally aligned */ + rtas_st(rets, 0, RTAS_OUT_HW_ERROR); + return; + } + + pci_host_config_write_common(pci_dev, addr, pci_config_size(pci_dev), + val, size); + + rtas_st(rets, 0, RTAS_OUT_SUCCESS); +} + +static void rtas_ibm_write_pci_config(PowerPCCPU *cpu, SpaprMachineState *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, + uint32_t nret, target_ulong rets) +{ + uint64_t buid; + uint32_t val, size, addr; + + if ((nargs != 5) || (nret != 1)) { + rtas_st(rets, 0, RTAS_OUT_HW_ERROR); + return; + } + + buid = rtas_ldq(args, 1); + val = rtas_ld(args, 4); + size = rtas_ld(args, 3); + addr = rtas_ld(args, 0); + + finish_write_pci_config(spapr, buid, addr, size, val, rets); +} + +static void rtas_write_pci_config(PowerPCCPU *cpu, SpaprMachineState *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, + uint32_t nret, target_ulong rets) +{ + uint32_t val, size, addr; + + if ((nargs != 3) || (nret != 1)) { + rtas_st(rets, 0, RTAS_OUT_HW_ERROR); + return; + } + + + val = rtas_ld(args, 2); + size = rtas_ld(args, 1); + addr = rtas_ld(args, 0); + + finish_write_pci_config(spapr, 0, addr, size, val, rets); +} + +/* + * Set MSI/MSIX message data. + * This is required for msi_notify()/msix_notify() which + * will write at the addresses via spapr_msi_write(). + * + * If hwaddr == 0, all entries will have .data == first_irq i.e. + * table will be reset. + */ +static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr, bool msix, + unsigned first_irq, unsigned req_num) +{ + unsigned i; + MSIMessage msg = { .address = addr, .data = first_irq }; + + if (!msix) { + msi_set_message(pdev, msg); + trace_spapr_pci_msi_setup(pdev->name, 0, msg.address); + return; + } + + for (i = 0; i < req_num; ++i) { + msix_set_message(pdev, i, msg); + trace_spapr_pci_msi_setup(pdev->name, i, msg.address); + if (addr) { + ++msg.data; + } + } +} + +static void rtas_ibm_change_msi(PowerPCCPU *cpu, SpaprMachineState *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, uint32_t nret, + target_ulong rets) +{ + SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); + uint32_t config_addr = rtas_ld(args, 0); + uint64_t buid = rtas_ldq(args, 1); + unsigned int func = rtas_ld(args, 3); + unsigned int req_num = rtas_ld(args, 4); /* 0 == remove all */ + unsigned int seq_num = rtas_ld(args, 5); + unsigned int ret_intr_type; + unsigned int irq, max_irqs = 0; + SpaprPhbState *phb = NULL; + PCIDevice *pdev = NULL; + SpaprPciMsi *msi; + int *config_addr_key; + Error *err = NULL; + int i; + + /* Fins SpaprPhbState */ + phb = spapr_pci_find_phb(spapr, buid); + if (phb) { + pdev = spapr_pci_find_dev(spapr, buid, config_addr); + } + if (!phb || !pdev) { + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); + return; + } + + switch (func) { + case RTAS_CHANGE_FN: + if (msi_present(pdev)) { + ret_intr_type = RTAS_TYPE_MSI; + } else if (msix_present(pdev)) { + ret_intr_type = RTAS_TYPE_MSIX; + } else { + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); + return; + } + break; + case RTAS_CHANGE_MSI_FN: + if (msi_present(pdev)) { + ret_intr_type = RTAS_TYPE_MSI; + } else { + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); + return; + } + break; + case RTAS_CHANGE_MSIX_FN: + if (msix_present(pdev)) { + ret_intr_type = RTAS_TYPE_MSIX; + } else { + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); + return; + } + break; + default: + error_report("rtas_ibm_change_msi(%u) is not implemented", func); + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); + return; + } + + msi = (SpaprPciMsi *) g_hash_table_lookup(phb->msi, &config_addr); + + /* Releasing MSIs */ + if (!req_num) { + if (!msi) { + trace_spapr_pci_msi("Releasing wrong config", config_addr); + rtas_st(rets, 0, RTAS_OUT_HW_ERROR); + return; + } + + if (msi_present(pdev)) { + spapr_msi_setmsg(pdev, 0, false, 0, 0); + } + if (msix_present(pdev)) { + spapr_msi_setmsg(pdev, 0, true, 0, 0); + } + g_hash_table_remove(phb->msi, &config_addr); + + trace_spapr_pci_msi("Released MSIs", config_addr); + rtas_st(rets, 0, RTAS_OUT_SUCCESS); + rtas_st(rets, 1, 0); + return; + } + + /* Enabling MSI */ + + /* Check if the device supports as many IRQs as requested */ + if (ret_intr_type == RTAS_TYPE_MSI) { + max_irqs = msi_nr_vectors_allocated(pdev); + } else if (ret_intr_type == RTAS_TYPE_MSIX) { + max_irqs = pdev->msix_entries_nr; + } + if (!max_irqs) { + error_report("Requested interrupt type %d is not enabled for device %x", + ret_intr_type, config_addr); + rtas_st(rets, 0, -1); /* Hardware error */ + return; + } + /* Correct the number if the guest asked for too many */ + if (req_num > max_irqs) { + trace_spapr_pci_msi_retry(config_addr, req_num, max_irqs); + req_num = max_irqs; + irq = 0; /* to avoid misleading trace */ + goto out; + } + + /* Allocate MSIs */ + if (smc->legacy_irq_allocation) { + irq = spapr_irq_find(spapr, req_num, ret_intr_type == RTAS_TYPE_MSI, + &err); + } else { + irq = spapr_irq_msi_alloc(spapr, req_num, + ret_intr_type == RTAS_TYPE_MSI, &err); + } + if (err) { + error_reportf_err(err, "Can't allocate MSIs for device %x: ", + config_addr); + rtas_st(rets, 0, RTAS_OUT_HW_ERROR); + return; + } + + for (i = 0; i < req_num; i++) { + spapr_irq_claim(spapr, irq + i, false, &err); + if (err) { + if (i) { + spapr_irq_free(spapr, irq, i); + } + if (!smc->legacy_irq_allocation) { + spapr_irq_msi_free(spapr, irq, req_num); + } + error_reportf_err(err, "Can't allocate MSIs for device %x: ", + config_addr); + rtas_st(rets, 0, RTAS_OUT_HW_ERROR); + return; + } + } + + /* Release previous MSIs */ + if (msi) { + g_hash_table_remove(phb->msi, &config_addr); + } + + /* Setup MSI/MSIX vectors in the device (via cfgspace or MSIX BAR) */ + spapr_msi_setmsg(pdev, SPAPR_PCI_MSI_WINDOW, ret_intr_type == RTAS_TYPE_MSIX, + irq, req_num); + + /* Add MSI device to cache */ + msi = g_new(SpaprPciMsi, 1); + msi->first_irq = irq; + msi->num = req_num; + config_addr_key = g_new(int, 1); + *config_addr_key = config_addr; + g_hash_table_insert(phb->msi, config_addr_key, msi); + +out: + rtas_st(rets, 0, RTAS_OUT_SUCCESS); + rtas_st(rets, 1, req_num); + rtas_st(rets, 2, ++seq_num); + if (nret > 3) { + rtas_st(rets, 3, ret_intr_type); + } + + trace_spapr_pci_rtas_ibm_change_msi(config_addr, func, req_num, irq); +} + +static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu, + SpaprMachineState *spapr, + uint32_t token, + uint32_t nargs, + target_ulong args, + uint32_t nret, + target_ulong rets) +{ + uint32_t config_addr = rtas_ld(args, 0); + uint64_t buid = rtas_ldq(args, 1); + unsigned int intr_src_num = -1, ioa_intr_num = rtas_ld(args, 3); + SpaprPhbState *phb = NULL; + PCIDevice *pdev = NULL; + SpaprPciMsi *msi; + + /* Find SpaprPhbState */ + phb = spapr_pci_find_phb(spapr, buid); + if (phb) { + pdev = spapr_pci_find_dev(spapr, buid, config_addr); + } + if (!phb || !pdev) { + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); + return; + } + + /* Find device descriptor and start IRQ */ + msi = (SpaprPciMsi *) g_hash_table_lookup(phb->msi, &config_addr); + if (!msi || !msi->first_irq || !msi->num || (ioa_intr_num >= msi->num)) { + trace_spapr_pci_msi("Failed to return vector", config_addr); + rtas_st(rets, 0, RTAS_OUT_HW_ERROR); + return; + } + intr_src_num = msi->first_irq + ioa_intr_num; + trace_spapr_pci_rtas_ibm_query_interrupt_source_number(ioa_intr_num, + intr_src_num); + + rtas_st(rets, 0, RTAS_OUT_SUCCESS); + rtas_st(rets, 1, intr_src_num); + rtas_st(rets, 2, 1);/* 0 == level; 1 == edge */ +} + +static void rtas_ibm_set_eeh_option(PowerPCCPU *cpu, + SpaprMachineState *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, uint32_t nret, + target_ulong rets) +{ + SpaprPhbState *sphb; + uint32_t addr, option; + uint64_t buid; + int ret; + + if ((nargs != 4) || (nret != 1)) { + goto param_error_exit; + } + + buid = rtas_ldq(args, 1); + addr = rtas_ld(args, 0); + option = rtas_ld(args, 3); + + sphb = spapr_pci_find_phb(spapr, buid); + if (!sphb) { + goto param_error_exit; + } + + if (!spapr_phb_eeh_available(sphb)) { + goto param_error_exit; + } + + ret = spapr_phb_vfio_eeh_set_option(sphb, addr, option); + rtas_st(rets, 0, ret); + return; + +param_error_exit: + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); +} + +static void rtas_ibm_get_config_addr_info2(PowerPCCPU *cpu, + SpaprMachineState *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, uint32_t nret, + target_ulong rets) +{ + SpaprPhbState *sphb; + PCIDevice *pdev; + uint32_t addr, option; + uint64_t buid; + + if ((nargs != 4) || (nret != 2)) { + goto param_error_exit; + } + + buid = rtas_ldq(args, 1); + sphb = spapr_pci_find_phb(spapr, buid); + if (!sphb) { + goto param_error_exit; + } + + if (!spapr_phb_eeh_available(sphb)) { + goto param_error_exit; + } + + /* + * We always have PE address of form "00BB0001". "BB" + * represents the bus number of PE's primary bus. + */ + option = rtas_ld(args, 3); + switch (option) { + case RTAS_GET_PE_ADDR: + addr = rtas_ld(args, 0); + pdev = spapr_pci_find_dev(spapr, buid, addr); + if (!pdev) { + goto param_error_exit; + } + + rtas_st(rets, 1, (pci_bus_num(pci_get_bus(pdev)) << 16) + 1); + break; + case RTAS_GET_PE_MODE: + rtas_st(rets, 1, RTAS_PE_MODE_SHARED); + break; + default: + goto param_error_exit; + } + + rtas_st(rets, 0, RTAS_OUT_SUCCESS); + return; + +param_error_exit: + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); +} + +static void rtas_ibm_read_slot_reset_state2(PowerPCCPU *cpu, + SpaprMachineState *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, uint32_t nret, + target_ulong rets) +{ + SpaprPhbState *sphb; + uint64_t buid; + int state, ret; + + if ((nargs != 3) || (nret != 4 && nret != 5)) { + goto param_error_exit; + } + + buid = rtas_ldq(args, 1); + sphb = spapr_pci_find_phb(spapr, buid); + if (!sphb) { + goto param_error_exit; + } + + if (!spapr_phb_eeh_available(sphb)) { + goto param_error_exit; + } + + ret = spapr_phb_vfio_eeh_get_state(sphb, &state); + rtas_st(rets, 0, ret); + if (ret != RTAS_OUT_SUCCESS) { + return; + } + + rtas_st(rets, 1, state); + rtas_st(rets, 2, RTAS_EEH_SUPPORT); + rtas_st(rets, 3, RTAS_EEH_PE_UNAVAIL_INFO); + if (nret >= 5) { + rtas_st(rets, 4, RTAS_EEH_PE_RECOVER_INFO); + } + return; + +param_error_exit: + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); +} + +static void rtas_ibm_set_slot_reset(PowerPCCPU *cpu, + SpaprMachineState *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, uint32_t nret, + target_ulong rets) +{ + SpaprPhbState *sphb; + uint32_t option; + uint64_t buid; + int ret; + + if ((nargs != 4) || (nret != 1)) { + goto param_error_exit; + } + + buid = rtas_ldq(args, 1); + option = rtas_ld(args, 3); + sphb = spapr_pci_find_phb(spapr, buid); + if (!sphb) { + goto param_error_exit; + } + + if (!spapr_phb_eeh_available(sphb)) { + goto param_error_exit; + } + + ret = spapr_phb_vfio_eeh_reset(sphb, option); + rtas_st(rets, 0, ret); + return; + +param_error_exit: + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); +} + +static void rtas_ibm_configure_pe(PowerPCCPU *cpu, + SpaprMachineState *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, uint32_t nret, + target_ulong rets) +{ + SpaprPhbState *sphb; + uint64_t buid; + int ret; + + if ((nargs != 3) || (nret != 1)) { + goto param_error_exit; + } + + buid = rtas_ldq(args, 1); + sphb = spapr_pci_find_phb(spapr, buid); + if (!sphb) { + goto param_error_exit; + } + + if (!spapr_phb_eeh_available(sphb)) { + goto param_error_exit; + } + + ret = spapr_phb_vfio_eeh_configure(sphb); + rtas_st(rets, 0, ret); + return; + +param_error_exit: + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); +} + +/* To support it later */ +static void rtas_ibm_slot_error_detail(PowerPCCPU *cpu, + SpaprMachineState *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, uint32_t nret, + target_ulong rets) +{ + SpaprPhbState *sphb; + int option; + uint64_t buid; + + if ((nargs != 8) || (nret != 1)) { + goto param_error_exit; + } + + buid = rtas_ldq(args, 1); + sphb = spapr_pci_find_phb(spapr, buid); + if (!sphb) { + goto param_error_exit; + } + + if (!spapr_phb_eeh_available(sphb)) { + goto param_error_exit; + } + + option = rtas_ld(args, 7); + switch (option) { + case RTAS_SLOT_TEMP_ERR_LOG: + case RTAS_SLOT_PERM_ERR_LOG: + break; + default: + goto param_error_exit; + } + + /* We don't have error log yet */ + rtas_st(rets, 0, RTAS_OUT_NO_ERRORS_FOUND); + return; + +param_error_exit: + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); +} + +static void pci_spapr_set_irq(void *opaque, int irq_num, int level) +{ + /* + * Here we use the number returned by pci_swizzle_map_irq_fn to find a + * corresponding qemu_irq. + */ + SpaprPhbState *phb = opaque; + SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); + + trace_spapr_pci_lsi_set(phb->dtbusname, irq_num, phb->lsi_table[irq_num].irq); + qemu_set_irq(spapr_qirq(spapr, phb->lsi_table[irq_num].irq), level); +} + +static PCIINTxRoute spapr_route_intx_pin_to_irq(void *opaque, int pin) +{ + SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(opaque); + PCIINTxRoute route; + + route.mode = PCI_INTX_ENABLED; + route.irq = sphb->lsi_table[pin].irq; + + return route; +} + +static uint64_t spapr_msi_read(void *opaque, hwaddr addr, unsigned size) +{ + qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid access\n", __func__); + return 0; +} + +/* + * MSI/MSIX memory region implementation. + * The handler handles both MSI and MSIX. + * The vector number is encoded in least bits in data. + */ +static void spapr_msi_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size) +{ + SpaprMachineState *spapr = opaque; + uint32_t irq = data; + + trace_spapr_pci_msi_write(addr, data, irq); + + qemu_irq_pulse(spapr_qirq(spapr, irq)); +} + +static const MemoryRegionOps spapr_msi_ops = { + /* + * .read result is undefined by PCI spec. + * define .read method to avoid assert failure in memory_region_init_io + */ + .read = spapr_msi_read, + .write = spapr_msi_write, + .endianness = DEVICE_LITTLE_ENDIAN +}; + +/* + * PHB PCI device + */ +static AddressSpace *spapr_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn) +{ + SpaprPhbState *phb = opaque; + + return &phb->iommu_as; +} + +static char *spapr_phb_vfio_get_loc_code(SpaprPhbState *sphb, PCIDevice *pdev) +{ + g_autofree char *path = NULL; + g_autofree char *host = NULL; + g_autofree char *devspec = NULL; + char *buf = NULL; + + /* Get the PCI VFIO host id */ + host = object_property_get_str(OBJECT(pdev), "host", NULL); + if (!host) { + return NULL; + } + + /* Construct the path of the file that will give us the DT location */ + path = g_strdup_printf("/sys/bus/pci/devices/%s/devspec", host); + if (!g_file_get_contents(path, &devspec, NULL, NULL)) { + return NULL; + } + + /* Construct and read from host device tree the loc-code */ + path = g_strdup_printf("/proc/device-tree%s/ibm,loc-code", devspec); + if (!g_file_get_contents(path, &buf, NULL, NULL)) { + return NULL; + } + return buf; +} + +static char *spapr_phb_get_loc_code(SpaprPhbState *sphb, PCIDevice *pdev) +{ + char *buf; + const char *devtype = "qemu"; + uint32_t busnr = pci_bus_num(PCI_BUS(qdev_get_parent_bus(DEVICE(pdev)))); + + if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) { + buf = spapr_phb_vfio_get_loc_code(sphb, pdev); + if (buf) { + return buf; + } + devtype = "vfio"; + } + /* + * For emulated devices and VFIO-failure case, make up + * the loc-code. + */ + buf = g_strdup_printf("%s_%s:%04x:%02x:%02x.%x", + devtype, pdev->name, sphb->index, busnr, + PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn)); + return buf; +} + +/* Macros to operate with address in OF binding to PCI */ +#define b_x(x, p, l) (((x) & ((1<<(l))-1)) << (p)) +#define b_n(x) b_x((x), 31, 1) /* 0 if relocatable */ +#define b_p(x) b_x((x), 30, 1) /* 1 if prefetchable */ +#define b_t(x) b_x((x), 29, 1) /* 1 if the address is aliased */ +#define b_ss(x) b_x((x), 24, 2) /* the space code */ +#define b_bbbbbbbb(x) b_x((x), 16, 8) /* bus number */ +#define b_ddddd(x) b_x((x), 11, 5) /* device number */ +#define b_fff(x) b_x((x), 8, 3) /* function number */ +#define b_rrrrrrrr(x) b_x((x), 0, 8) /* register number */ + +/* for 'reg' OF properties */ +#define RESOURCE_CELLS_SIZE 2 +#define RESOURCE_CELLS_ADDRESS 3 + +typedef struct ResourceFields { + uint32_t phys_hi; + uint32_t phys_mid; + uint32_t phys_lo; + uint32_t size_hi; + uint32_t size_lo; +} QEMU_PACKED ResourceFields; + +typedef struct ResourceProps { + ResourceFields reg[8]; + uint32_t reg_len; +} ResourceProps; + +/* fill in the 'reg' OF properties for + * a PCI device. 'reg' describes resource requirements for a + * device's IO/MEM regions. + * + * the property is an array of ('phys-addr', 'size') pairs describing + * the addressable regions of the PCI device, where 'phys-addr' is a + * RESOURCE_CELLS_ADDRESS-tuple of 32-bit integers corresponding to + * (phys.hi, phys.mid, phys.lo), and 'size' is a + * RESOURCE_CELLS_SIZE-tuple corresponding to (size.hi, size.lo). + * + * phys.hi = 0xYYXXXXZZ, where: + * 0xYY = npt000ss + * ||| | + * ||| +-- space code + * ||| | + * ||| + 00 if configuration space + * ||| + 01 if IO region, + * ||| + 10 if 32-bit MEM region + * ||| + 11 if 64-bit MEM region + * ||| + * ||+------ for non-relocatable IO: 1 if aliased + * || for relocatable IO: 1 if below 64KB + * || for MEM: 1 if below 1MB + * |+------- 1 if region is prefetchable + * +-------- 1 if region is non-relocatable + * 0xXXXX = bbbbbbbb dddddfff, encoding bus, slot, and function + * bits respectively + * 0xZZ = rrrrrrrr, the register number of the BAR corresponding + * to the region + * + * phys.mid and phys.lo correspond respectively to the hi/lo portions + * of the actual address of the region. + * + * note also that addresses defined in this property are, at least + * for PAPR guests, relative to the PHBs IO/MEM windows, and + * correspond directly to the addresses in the BARs. + * + * in accordance with PCI Bus Binding to Open Firmware, + * IEEE Std 1275-1994, section 4.1.1, as implemented by PAPR+ v2.7, + * Appendix C. + */ +static void populate_resource_props(PCIDevice *d, ResourceProps *rp) +{ + int bus_num = pci_bus_num(PCI_BUS(qdev_get_parent_bus(DEVICE(d)))); + uint32_t dev_id = (b_bbbbbbbb(bus_num) | + b_ddddd(PCI_SLOT(d->devfn)) | + b_fff(PCI_FUNC(d->devfn))); + ResourceFields *reg; + int i, reg_idx = 0; + + /* config space region */ + reg = &rp->reg[reg_idx++]; + reg->phys_hi = cpu_to_be32(dev_id); + reg->phys_mid = 0; + reg->phys_lo = 0; + reg->size_hi = 0; + reg->size_lo = 0; + + for (i = 0; i < PCI_NUM_REGIONS; i++) { + if (!d->io_regions[i].size) { + continue; + } + + reg = &rp->reg[reg_idx++]; + + reg->phys_hi = cpu_to_be32(dev_id | b_rrrrrrrr(pci_bar(d, i))); + if (d->io_regions[i].type & PCI_BASE_ADDRESS_SPACE_IO) { + reg->phys_hi |= cpu_to_be32(b_ss(1)); + } else if (d->io_regions[i].type & PCI_BASE_ADDRESS_MEM_TYPE_64) { + reg->phys_hi |= cpu_to_be32(b_ss(3)); + } else { + reg->phys_hi |= cpu_to_be32(b_ss(2)); + } + reg->phys_mid = 0; + reg->phys_lo = 0; + reg->size_hi = cpu_to_be32(d->io_regions[i].size >> 32); + reg->size_lo = cpu_to_be32(d->io_regions[i].size); + } + + rp->reg_len = reg_idx * sizeof(ResourceFields); +} + +typedef struct PCIClass PCIClass; +typedef struct PCISubClass PCISubClass; +typedef struct PCIIFace PCIIFace; + +struct PCIIFace { + int iface; + const char *name; +}; + +struct PCISubClass { + int subclass; + const char *name; + const PCIIFace *iface; +}; + +struct PCIClass { + const char *name; + const PCISubClass *subc; +}; + +static const PCISubClass undef_subclass[] = { + { PCI_CLASS_NOT_DEFINED_VGA, "display", NULL }, + { 0xFF, NULL, NULL }, +}; + +static const PCISubClass mass_subclass[] = { + { PCI_CLASS_STORAGE_SCSI, "scsi", NULL }, + { PCI_CLASS_STORAGE_IDE, "ide", NULL }, + { PCI_CLASS_STORAGE_FLOPPY, "fdc", NULL }, + { PCI_CLASS_STORAGE_IPI, "ipi", NULL }, + { PCI_CLASS_STORAGE_RAID, "raid", NULL }, + { PCI_CLASS_STORAGE_ATA, "ata", NULL }, + { PCI_CLASS_STORAGE_SATA, "sata", NULL }, + { PCI_CLASS_STORAGE_SAS, "sas", NULL }, + { 0xFF, NULL, NULL }, +}; + +static const PCISubClass net_subclass[] = { + { PCI_CLASS_NETWORK_ETHERNET, "ethernet", NULL }, + { PCI_CLASS_NETWORK_TOKEN_RING, "token-ring", NULL }, + { PCI_CLASS_NETWORK_FDDI, "fddi", NULL }, + { PCI_CLASS_NETWORK_ATM, "atm", NULL }, + { PCI_CLASS_NETWORK_ISDN, "isdn", NULL }, + { PCI_CLASS_NETWORK_WORLDFIP, "worldfip", NULL }, + { PCI_CLASS_NETWORK_PICMG214, "picmg", NULL }, + { 0xFF, NULL, NULL }, +}; + +static const PCISubClass displ_subclass[] = { + { PCI_CLASS_DISPLAY_VGA, "vga", NULL }, + { PCI_CLASS_DISPLAY_XGA, "xga", NULL }, + { PCI_CLASS_DISPLAY_3D, "3d-controller", NULL }, + { 0xFF, NULL, NULL }, +}; + +static const PCISubClass media_subclass[] = { + { PCI_CLASS_MULTIMEDIA_VIDEO, "video", NULL }, + { PCI_CLASS_MULTIMEDIA_AUDIO, "sound", NULL }, + { PCI_CLASS_MULTIMEDIA_PHONE, "telephony", NULL }, + { 0xFF, NULL, NULL }, +}; + +static const PCISubClass mem_subclass[] = { + { PCI_CLASS_MEMORY_RAM, "memory", NULL }, + { PCI_CLASS_MEMORY_FLASH, "flash", NULL }, + { 0xFF, NULL, NULL }, +}; + +static const PCISubClass bridg_subclass[] = { + { PCI_CLASS_BRIDGE_HOST, "host", NULL }, + { PCI_CLASS_BRIDGE_ISA, "isa", NULL }, + { PCI_CLASS_BRIDGE_EISA, "eisa", NULL }, + { PCI_CLASS_BRIDGE_MC, "mca", NULL }, + { PCI_CLASS_BRIDGE_PCI, "pci", NULL }, + { PCI_CLASS_BRIDGE_PCMCIA, "pcmcia", NULL }, + { PCI_CLASS_BRIDGE_NUBUS, "nubus", NULL }, + { PCI_CLASS_BRIDGE_CARDBUS, "cardbus", NULL }, + { PCI_CLASS_BRIDGE_RACEWAY, "raceway", NULL }, + { PCI_CLASS_BRIDGE_PCI_SEMITP, "semi-transparent-pci", NULL }, + { PCI_CLASS_BRIDGE_IB_PCI, "infiniband", NULL }, + { 0xFF, NULL, NULL }, +}; + +static const PCISubClass comm_subclass[] = { + { PCI_CLASS_COMMUNICATION_SERIAL, "serial", NULL }, + { PCI_CLASS_COMMUNICATION_PARALLEL, "parallel", NULL }, + { PCI_CLASS_COMMUNICATION_MULTISERIAL, "multiport-serial", NULL }, + { PCI_CLASS_COMMUNICATION_MODEM, "modem", NULL }, + { PCI_CLASS_COMMUNICATION_GPIB, "gpib", NULL }, + { PCI_CLASS_COMMUNICATION_SC, "smart-card", NULL }, + { 0xFF, NULL, NULL, }, +}; + +static const PCIIFace pic_iface[] = { + { PCI_CLASS_SYSTEM_PIC_IOAPIC, "io-apic" }, + { PCI_CLASS_SYSTEM_PIC_IOXAPIC, "io-xapic" }, + { 0xFF, NULL }, +}; + +static const PCISubClass sys_subclass[] = { + { PCI_CLASS_SYSTEM_PIC, "interrupt-controller", pic_iface }, + { PCI_CLASS_SYSTEM_DMA, "dma-controller", NULL }, + { PCI_CLASS_SYSTEM_TIMER, "timer", NULL }, + { PCI_CLASS_SYSTEM_RTC, "rtc", NULL }, + { PCI_CLASS_SYSTEM_PCI_HOTPLUG, "hot-plug-controller", NULL }, + { PCI_CLASS_SYSTEM_SDHCI, "sd-host-controller", NULL }, + { 0xFF, NULL, NULL }, +}; + +static const PCISubClass inp_subclass[] = { + { PCI_CLASS_INPUT_KEYBOARD, "keyboard", NULL }, + { PCI_CLASS_INPUT_PEN, "pen", NULL }, + { PCI_CLASS_INPUT_MOUSE, "mouse", NULL }, + { PCI_CLASS_INPUT_SCANNER, "scanner", NULL }, + { PCI_CLASS_INPUT_GAMEPORT, "gameport", NULL }, + { 0xFF, NULL, NULL }, +}; + +static const PCISubClass dock_subclass[] = { + { PCI_CLASS_DOCKING_GENERIC, "dock", NULL }, + { 0xFF, NULL, NULL }, +}; + +static const PCISubClass cpu_subclass[] = { + { PCI_CLASS_PROCESSOR_PENTIUM, "pentium", NULL }, + { PCI_CLASS_PROCESSOR_POWERPC, "powerpc", NULL }, + { PCI_CLASS_PROCESSOR_MIPS, "mips", NULL }, + { PCI_CLASS_PROCESSOR_CO, "co-processor", NULL }, + { 0xFF, NULL, NULL }, +}; + +static const PCIIFace usb_iface[] = { + { PCI_CLASS_SERIAL_USB_UHCI, "usb-uhci" }, + { PCI_CLASS_SERIAL_USB_OHCI, "usb-ohci", }, + { PCI_CLASS_SERIAL_USB_EHCI, "usb-ehci" }, + { PCI_CLASS_SERIAL_USB_XHCI, "usb-xhci" }, + { PCI_CLASS_SERIAL_USB_UNKNOWN, "usb-unknown" }, + { PCI_CLASS_SERIAL_USB_DEVICE, "usb-device" }, + { 0xFF, NULL }, +}; + +static const PCISubClass ser_subclass[] = { + { PCI_CLASS_SERIAL_FIREWIRE, "firewire", NULL }, + { PCI_CLASS_SERIAL_ACCESS, "access-bus", NULL }, + { PCI_CLASS_SERIAL_SSA, "ssa", NULL }, + { PCI_CLASS_SERIAL_USB, "usb", usb_iface }, + { PCI_CLASS_SERIAL_FIBER, "fibre-channel", NULL }, + { PCI_CLASS_SERIAL_SMBUS, "smb", NULL }, + { PCI_CLASS_SERIAL_IB, "infiniband", NULL }, + { PCI_CLASS_SERIAL_IPMI, "ipmi", NULL }, + { PCI_CLASS_SERIAL_SERCOS, "sercos", NULL }, + { PCI_CLASS_SERIAL_CANBUS, "canbus", NULL }, + { 0xFF, NULL, NULL }, +}; + +static const PCISubClass wrl_subclass[] = { + { PCI_CLASS_WIRELESS_IRDA, "irda", NULL }, + { PCI_CLASS_WIRELESS_CIR, "consumer-ir", NULL }, + { PCI_CLASS_WIRELESS_RF_CONTROLLER, "rf-controller", NULL }, + { PCI_CLASS_WIRELESS_BLUETOOTH, "bluetooth", NULL }, + { PCI_CLASS_WIRELESS_BROADBAND, "broadband", NULL }, + { 0xFF, NULL, NULL }, +}; + +static const PCISubClass sat_subclass[] = { + { PCI_CLASS_SATELLITE_TV, "satellite-tv", NULL }, + { PCI_CLASS_SATELLITE_AUDIO, "satellite-audio", NULL }, + { PCI_CLASS_SATELLITE_VOICE, "satellite-voice", NULL }, + { PCI_CLASS_SATELLITE_DATA, "satellite-data", NULL }, + { 0xFF, NULL, NULL }, +}; + +static const PCISubClass crypt_subclass[] = { + { PCI_CLASS_CRYPT_NETWORK, "network-encryption", NULL }, + { PCI_CLASS_CRYPT_ENTERTAINMENT, + "entertainment-encryption", NULL }, + { 0xFF, NULL, NULL }, +}; + +static const PCISubClass spc_subclass[] = { + { PCI_CLASS_SP_DPIO, "dpio", NULL }, + { PCI_CLASS_SP_PERF, "counter", NULL }, + { PCI_CLASS_SP_SYNCH, "measurement", NULL }, + { PCI_CLASS_SP_MANAGEMENT, "management-card", NULL }, + { 0xFF, NULL, NULL }, +}; + +static const PCIClass pci_classes[] = { + { "legacy-device", undef_subclass }, + { "mass-storage", mass_subclass }, + { "network", net_subclass }, + { "display", displ_subclass, }, + { "multimedia-device", media_subclass }, + { "memory-controller", mem_subclass }, + { "unknown-bridge", bridg_subclass }, + { "communication-controller", comm_subclass}, + { "system-peripheral", sys_subclass }, + { "input-controller", inp_subclass }, + { "docking-station", dock_subclass }, + { "cpu", cpu_subclass }, + { "serial-bus", ser_subclass }, + { "wireless-controller", wrl_subclass }, + { "intelligent-io", NULL }, + { "satellite-device", sat_subclass }, + { "encryption", crypt_subclass }, + { "data-processing-controller", spc_subclass }, +}; + +static const char *dt_name_from_class(uint8_t class, uint8_t subclass, + uint8_t iface) +{ + const PCIClass *pclass; + const PCISubClass *psubclass; + const PCIIFace *piface; + const char *name; + + if (class >= ARRAY_SIZE(pci_classes)) { + return "pci"; + } + + pclass = pci_classes + class; + name = pclass->name; + + if (pclass->subc == NULL) { + return name; + } + + psubclass = pclass->subc; + while ((psubclass->subclass & 0xff) != 0xff) { + if ((psubclass->subclass & 0xff) == subclass) { + name = psubclass->name; + break; + } + psubclass++; + } + + piface = psubclass->iface; + if (piface == NULL) { + return name; + } + while ((piface->iface & 0xff) != 0xff) { + if ((piface->iface & 0xff) == iface) { + name = piface->name; + break; + } + piface++; + } + + return name; +} + +/* + * DRC helper functions + */ + +static uint32_t drc_id_from_devfn(SpaprPhbState *phb, + uint8_t chassis, int32_t devfn) +{ + return (phb->index << 16) | (chassis << 8) | devfn; +} + +static SpaprDrc *drc_from_devfn(SpaprPhbState *phb, + uint8_t chassis, int32_t devfn) +{ + return spapr_drc_by_id(TYPE_SPAPR_DRC_PCI, + drc_id_from_devfn(phb, chassis, devfn)); +} + +static uint8_t chassis_from_bus(PCIBus *bus) +{ + if (pci_bus_is_root(bus)) { + return 0; + } else { + PCIDevice *bridge = pci_bridge_get_device(bus); + + return object_property_get_uint(OBJECT(bridge), "chassis_nr", + &error_abort); + } +} + +static SpaprDrc *drc_from_dev(SpaprPhbState *phb, PCIDevice *dev) +{ + uint8_t chassis = chassis_from_bus(pci_get_bus(dev)); + + return drc_from_devfn(phb, chassis, dev->devfn); +} + +static void add_drcs(SpaprPhbState *phb, PCIBus *bus) +{ + Object *owner; + int i; + uint8_t chassis; + + if (!phb->dr_enabled) { + return; + } + + chassis = chassis_from_bus(bus); + + if (pci_bus_is_root(bus)) { + owner = OBJECT(phb); + } else { + owner = OBJECT(pci_bridge_get_device(bus)); + } + + for (i = 0; i < PCI_SLOT_MAX * PCI_FUNC_MAX; i++) { + spapr_dr_connector_new(owner, TYPE_SPAPR_DRC_PCI, + drc_id_from_devfn(phb, chassis, i)); + } +} + +static void remove_drcs(SpaprPhbState *phb, PCIBus *bus) +{ + int i; + uint8_t chassis; + + if (!phb->dr_enabled) { + return; + } + + chassis = chassis_from_bus(bus); + + for (i = PCI_SLOT_MAX * PCI_FUNC_MAX - 1; i >= 0; i--) { + SpaprDrc *drc = drc_from_devfn(phb, chassis, i); + + if (drc) { + object_unparent(OBJECT(drc)); + } + } +} + +typedef struct PciWalkFdt { + void *fdt; + int offset; + SpaprPhbState *sphb; + int err; +} PciWalkFdt; + +static int spapr_dt_pci_device(SpaprPhbState *sphb, PCIDevice *dev, + void *fdt, int parent_offset); + +static void spapr_dt_pci_device_cb(PCIBus *bus, PCIDevice *pdev, + void *opaque) +{ + PciWalkFdt *p = opaque; + int err; + + if (p->err) { + /* Something's already broken, don't keep going */ + return; + } + + err = spapr_dt_pci_device(p->sphb, pdev, p->fdt, p->offset); + if (err < 0) { + p->err = err; + } +} + +/* Augment PCI device node with bridge specific information */ +static int spapr_dt_pci_bus(SpaprPhbState *sphb, PCIBus *bus, + void *fdt, int offset) +{ + Object *owner; + PciWalkFdt cbinfo = { + .fdt = fdt, + .offset = offset, + .sphb = sphb, + .err = 0, + }; + int ret; + + _FDT(fdt_setprop_cell(fdt, offset, "#address-cells", + RESOURCE_CELLS_ADDRESS)); + _FDT(fdt_setprop_cell(fdt, offset, "#size-cells", + RESOURCE_CELLS_SIZE)); + + assert(bus); + pci_for_each_device_under_bus_reverse(bus, spapr_dt_pci_device_cb, &cbinfo); + if (cbinfo.err) { + return cbinfo.err; + } + + if (pci_bus_is_root(bus)) { + owner = OBJECT(sphb); + } else { + owner = OBJECT(pci_bridge_get_device(bus)); + } + + ret = spapr_dt_drc(fdt, offset, owner, + SPAPR_DR_CONNECTOR_TYPE_PCI); + if (ret) { + return ret; + } + + return offset; +} + +char *spapr_pci_fw_dev_name(PCIDevice *dev) +{ + const gchar *basename; + int slot = PCI_SLOT(dev->devfn); + int func = PCI_FUNC(dev->devfn); + uint32_t ccode = pci_default_read_config(dev, PCI_CLASS_PROG, 3); + + basename = dt_name_from_class((ccode >> 16) & 0xff, (ccode >> 8) & 0xff, + ccode & 0xff); + + if (func != 0) { + return g_strdup_printf("%s@%x,%x", basename, slot, func); + } else { + return g_strdup_printf("%s@%x", basename, slot); + } +} + +/* create OF node for pci device and required OF DT properties */ +static int spapr_dt_pci_device(SpaprPhbState *sphb, PCIDevice *dev, + void *fdt, int parent_offset) +{ + int offset; + g_autofree gchar *nodename = spapr_pci_fw_dev_name(dev); + PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev); + ResourceProps rp; + SpaprDrc *drc = drc_from_dev(sphb, dev); + uint32_t vendor_id = pci_default_read_config(dev, PCI_VENDOR_ID, 2); + uint32_t device_id = pci_default_read_config(dev, PCI_DEVICE_ID, 2); + uint32_t revision_id = pci_default_read_config(dev, PCI_REVISION_ID, 1); + uint32_t ccode = pci_default_read_config(dev, PCI_CLASS_PROG, 3); + uint32_t irq_pin = pci_default_read_config(dev, PCI_INTERRUPT_PIN, 1); + uint32_t subsystem_id = pci_default_read_config(dev, PCI_SUBSYSTEM_ID, 2); + uint32_t subsystem_vendor_id = + pci_default_read_config(dev, PCI_SUBSYSTEM_VENDOR_ID, 2); + uint32_t cache_line_size = + pci_default_read_config(dev, PCI_CACHE_LINE_SIZE, 1); + uint32_t pci_status = pci_default_read_config(dev, PCI_STATUS, 2); + gchar *loc_code; + + _FDT(offset = fdt_add_subnode(fdt, parent_offset, nodename)); + + /* in accordance with PAPR+ v2.7 13.6.3, Table 181 */ + _FDT(fdt_setprop_cell(fdt, offset, "vendor-id", vendor_id)); + _FDT(fdt_setprop_cell(fdt, offset, "device-id", device_id)); + _FDT(fdt_setprop_cell(fdt, offset, "revision-id", revision_id)); + + _FDT(fdt_setprop_cell(fdt, offset, "class-code", ccode)); + if (irq_pin) { + _FDT(fdt_setprop_cell(fdt, offset, "interrupts", irq_pin)); + } + + if (subsystem_id) { + _FDT(fdt_setprop_cell(fdt, offset, "subsystem-id", subsystem_id)); + } + + if (subsystem_vendor_id) { + _FDT(fdt_setprop_cell(fdt, offset, "subsystem-vendor-id", + subsystem_vendor_id)); + } + + _FDT(fdt_setprop_cell(fdt, offset, "cache-line-size", cache_line_size)); + + + /* the following fdt cells are masked off the pci status register */ + _FDT(fdt_setprop_cell(fdt, offset, "devsel-speed", + PCI_STATUS_DEVSEL_MASK & pci_status)); + + if (pci_status & PCI_STATUS_FAST_BACK) { + _FDT(fdt_setprop(fdt, offset, "fast-back-to-back", NULL, 0)); + } + if (pci_status & PCI_STATUS_66MHZ) { + _FDT(fdt_setprop(fdt, offset, "66mhz-capable", NULL, 0)); + } + if (pci_status & PCI_STATUS_UDF) { + _FDT(fdt_setprop(fdt, offset, "udf-supported", NULL, 0)); + } + + loc_code = spapr_phb_get_loc_code(sphb, dev); + _FDT(fdt_setprop_string(fdt, offset, "ibm,loc-code", loc_code)); + g_free(loc_code); + + if (drc) { + _FDT(fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", + spapr_drc_index(drc))); + } + + if (msi_present(dev)) { + uint32_t max_msi = msi_nr_vectors_allocated(dev); + if (max_msi) { + _FDT(fdt_setprop_cell(fdt, offset, "ibm,req#msi", max_msi)); + } + } + if (msix_present(dev)) { + uint32_t max_msix = dev->msix_entries_nr; + if (max_msix) { + _FDT(fdt_setprop_cell(fdt, offset, "ibm,req#msi-x", max_msix)); + } + } + + populate_resource_props(dev, &rp); + _FDT(fdt_setprop(fdt, offset, "reg", (uint8_t *)rp.reg, rp.reg_len)); + + if (sphb->pcie_ecs && pci_is_express(dev)) { + _FDT(fdt_setprop_cell(fdt, offset, "ibm,pci-config-space-type", 0x1)); + } + + spapr_phb_nvgpu_populate_pcidev_dt(dev, fdt, offset, sphb); + + if (!pc->is_bridge) { + /* Properties only for non-bridges */ + uint32_t min_grant = pci_default_read_config(dev, PCI_MIN_GNT, 1); + uint32_t max_latency = pci_default_read_config(dev, PCI_MAX_LAT, 1); + _FDT(fdt_setprop_cell(fdt, offset, "min-grant", min_grant)); + _FDT(fdt_setprop_cell(fdt, offset, "max-latency", max_latency)); + return offset; + } else { + PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev)); + + return spapr_dt_pci_bus(sphb, sec_bus, fdt, offset); + } +} + +/* Callback to be called during DRC release. */ +void spapr_phb_remove_pci_device_cb(DeviceState *dev) +{ + HotplugHandler *hotplug_ctrl = qdev_get_hotplug_handler(dev); + + hotplug_handler_unplug(hotplug_ctrl, dev, &error_abort); + object_unparent(OBJECT(dev)); +} + +int spapr_pci_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr, + void *fdt, int *fdt_start_offset, Error **errp) +{ + HotplugHandler *plug_handler = qdev_get_hotplug_handler(drc->dev); + SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(plug_handler); + PCIDevice *pdev = PCI_DEVICE(drc->dev); + + *fdt_start_offset = spapr_dt_pci_device(sphb, pdev, fdt, 0); + return 0; +} + +static void spapr_pci_bridge_plug(SpaprPhbState *phb, + PCIBridge *bridge) +{ + PCIBus *bus = pci_bridge_get_sec_bus(bridge); + + add_drcs(phb, bus); +} + +/* Returns non-zero if the value of "chassis_nr" is already in use */ +static int check_chassis_nr(Object *obj, void *opaque) +{ + int new_chassis_nr = + object_property_get_uint(opaque, "chassis_nr", &error_abort); + int chassis_nr = + object_property_get_uint(obj, "chassis_nr", NULL); + + if (!object_dynamic_cast(obj, TYPE_PCI_BRIDGE)) { + return 0; + } + + /* Skip unsupported bridge types */ + if (!chassis_nr) { + return 0; + } + + /* Skip self */ + if (obj == opaque) { + return 0; + } + + return chassis_nr == new_chassis_nr; +} + +static bool bridge_has_valid_chassis_nr(Object *bridge, Error **errp) +{ + int chassis_nr = + object_property_get_uint(bridge, "chassis_nr", NULL); + + /* + * slotid_cap_init() already ensures that "chassis_nr" isn't null for + * standard PCI bridges, so this really tells if "chassis_nr" is present + * or not. + */ + if (!chassis_nr) { + error_setg(errp, "PCI Bridge lacks a \"chassis_nr\" property"); + error_append_hint(errp, "Try -device pci-bridge instead.\n"); + return false; + } + + /* We want unique values for "chassis_nr" */ + if (object_child_foreach_recursive(object_get_root(), check_chassis_nr, + bridge)) { + error_setg(errp, "Bridge chassis %d already in use", chassis_nr); + return false; + } + + return true; +} + +static void spapr_pci_pre_plug(HotplugHandler *plug_handler, + DeviceState *plugged_dev, Error **errp) +{ + SpaprPhbState *phb = SPAPR_PCI_HOST_BRIDGE(DEVICE(plug_handler)); + PCIDevice *pdev = PCI_DEVICE(plugged_dev); + PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(plugged_dev); + SpaprDrc *drc = drc_from_dev(phb, pdev); + PCIBus *bus = PCI_BUS(qdev_get_parent_bus(DEVICE(pdev))); + uint32_t slotnr = PCI_SLOT(pdev->devfn); + + if (!phb->dr_enabled) { + /* if this is a hotplug operation initiated by the user + * we need to let them know it's not enabled + */ + if (plugged_dev->hotplugged) { + error_setg(errp, QERR_BUS_NO_HOTPLUG, + object_get_typename(OBJECT(phb))); + return; + } + } + + if (pc->is_bridge) { + if (!bridge_has_valid_chassis_nr(OBJECT(plugged_dev), errp)) { + return; + } + } + + /* Following the QEMU convention used for PCIe multifunction + * hotplug, we do not allow functions to be hotplugged to a + * slot that already has function 0 present + */ + if (plugged_dev->hotplugged && bus->devices[PCI_DEVFN(slotnr, 0)] && + PCI_FUNC(pdev->devfn) != 0) { + error_setg(errp, "PCI: slot %d function 0 already occupied by %s," + " additional functions can no longer be exposed to guest.", + slotnr, bus->devices[PCI_DEVFN(slotnr, 0)]->name); + } + + if (drc && drc->dev) { + error_setg(errp, "PCI: slot %d already occupied by %s", slotnr, + pci_get_function_0(PCI_DEVICE(drc->dev))->name); + return; + } +} + +static void spapr_pci_plug(HotplugHandler *plug_handler, + DeviceState *plugged_dev, Error **errp) +{ + SpaprPhbState *phb = SPAPR_PCI_HOST_BRIDGE(DEVICE(plug_handler)); + PCIDevice *pdev = PCI_DEVICE(plugged_dev); + PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(plugged_dev); + SpaprDrc *drc = drc_from_dev(phb, pdev); + uint32_t slotnr = PCI_SLOT(pdev->devfn); + + /* + * If DR is disabled we don't need to do anything in the case of + * hotplug or coldplug callbacks. + */ + if (!phb->dr_enabled) { + return; + } + + g_assert(drc); + + if (pc->is_bridge) { + spapr_pci_bridge_plug(phb, PCI_BRIDGE(plugged_dev)); + } + + /* spapr_pci_pre_plug() already checked the DRC is attachable */ + spapr_drc_attach(drc, DEVICE(pdev)); + + /* If this is function 0, signal hotplug for all the device functions. + * Otherwise defer sending the hotplug event. + */ + if (!spapr_drc_hotplugged(plugged_dev)) { + spapr_drc_reset(drc); + } else if (PCI_FUNC(pdev->devfn) == 0) { + int i; + uint8_t chassis = chassis_from_bus(pci_get_bus(pdev)); + + for (i = 0; i < 8; i++) { + SpaprDrc *func_drc; + SpaprDrcClass *func_drck; + SpaprDREntitySense state; + + func_drc = drc_from_devfn(phb, chassis, PCI_DEVFN(slotnr, i)); + func_drck = SPAPR_DR_CONNECTOR_GET_CLASS(func_drc); + state = func_drck->dr_entity_sense(func_drc); + + if (state == SPAPR_DR_ENTITY_SENSE_PRESENT) { + spapr_hotplug_req_add_by_index(func_drc); + } + } + } +} + +static void spapr_pci_bridge_unplug(SpaprPhbState *phb, + PCIBridge *bridge) +{ + PCIBus *bus = pci_bridge_get_sec_bus(bridge); + + remove_drcs(phb, bus); +} + +static void spapr_pci_unplug(HotplugHandler *plug_handler, + DeviceState *plugged_dev, Error **errp) +{ + PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(plugged_dev); + SpaprPhbState *phb = SPAPR_PCI_HOST_BRIDGE(DEVICE(plug_handler)); + + /* some version guests do not wait for completion of a device + * cleanup (generally done asynchronously by the kernel) before + * signaling to QEMU that the device is safe, but instead sleep + * for some 'safe' period of time. unfortunately on a busy host + * this sleep isn't guaranteed to be long enough, resulting in + * bad things like IRQ lines being left asserted during final + * device removal. to deal with this we call reset just prior + * to finalizing the device, which will put the device back into + * an 'idle' state, as the device cleanup code expects. + */ + pci_device_reset(PCI_DEVICE(plugged_dev)); + + if (pc->is_bridge) { + spapr_pci_bridge_unplug(phb, PCI_BRIDGE(plugged_dev)); + return; + } + + qdev_unrealize(plugged_dev); +} + +static void spapr_pci_unplug_request(HotplugHandler *plug_handler, + DeviceState *plugged_dev, Error **errp) +{ + SpaprPhbState *phb = SPAPR_PCI_HOST_BRIDGE(DEVICE(plug_handler)); + PCIDevice *pdev = PCI_DEVICE(plugged_dev); + SpaprDrc *drc = drc_from_dev(phb, pdev); + + if (!phb->dr_enabled) { + error_setg(errp, QERR_BUS_NO_HOTPLUG, + object_get_typename(OBJECT(phb))); + return; + } + + g_assert(drc); + g_assert(drc->dev == plugged_dev); + + if (!spapr_drc_unplug_requested(drc)) { + PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(plugged_dev); + uint32_t slotnr = PCI_SLOT(pdev->devfn); + SpaprDrc *func_drc; + SpaprDrcClass *func_drck; + SpaprDREntitySense state; + int i; + uint8_t chassis = chassis_from_bus(pci_get_bus(pdev)); + + if (pc->is_bridge) { + error_setg(errp, "PCI: Hot unplug of PCI bridges not supported"); + return; + } + if (object_property_get_uint(OBJECT(pdev), "nvlink2-tgt", NULL)) { + error_setg(errp, "PCI: Cannot unplug NVLink2 devices"); + return; + } + + /* ensure any other present functions are pending unplug */ + if (PCI_FUNC(pdev->devfn) == 0) { + for (i = 1; i < 8; i++) { + func_drc = drc_from_devfn(phb, chassis, PCI_DEVFN(slotnr, i)); + func_drck = SPAPR_DR_CONNECTOR_GET_CLASS(func_drc); + state = func_drck->dr_entity_sense(func_drc); + if (state == SPAPR_DR_ENTITY_SENSE_PRESENT + && !spapr_drc_unplug_requested(func_drc)) { + /* + * Attempting to remove function 0 of a multifunction + * device will will cascade into removing all child + * functions, even if their unplug weren't requested + * beforehand. + */ + spapr_drc_unplug_request(func_drc); + } + } + } + + spapr_drc_unplug_request(drc); + + /* if this isn't func 0, defer unplug event. otherwise signal removal + * for all present functions + */ + if (PCI_FUNC(pdev->devfn) == 0) { + for (i = 7; i >= 0; i--) { + func_drc = drc_from_devfn(phb, chassis, PCI_DEVFN(slotnr, i)); + func_drck = SPAPR_DR_CONNECTOR_GET_CLASS(func_drc); + state = func_drck->dr_entity_sense(func_drc); + if (state == SPAPR_DR_ENTITY_SENSE_PRESENT) { + spapr_hotplug_req_remove_by_index(func_drc); + } + } + } + } else { + error_setg(errp, + "PCI device unplug already in progress for device %s", + drc->dev->id); + } +} + +static void spapr_phb_finalizefn(Object *obj) +{ + SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(obj); + + g_free(sphb->dtbusname); + sphb->dtbusname = NULL; +} + +static void spapr_phb_unrealize(DeviceState *dev) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); + SysBusDevice *s = SYS_BUS_DEVICE(dev); + PCIHostState *phb = PCI_HOST_BRIDGE(s); + SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(phb); + SpaprTceTable *tcet; + int i; + const unsigned windows_supported = spapr_phb_windows_supported(sphb); + + spapr_phb_nvgpu_free(sphb); + + if (sphb->msi) { + g_hash_table_unref(sphb->msi); + sphb->msi = NULL; + } + + /* + * Remove IO/MMIO subregions and aliases, rest should get cleaned + * via PHB's unrealize->object_finalize + */ + for (i = windows_supported - 1; i >= 0; i--) { + tcet = spapr_tce_find_by_liobn(sphb->dma_liobn[i]); + if (tcet) { + memory_region_del_subregion(&sphb->iommu_root, + spapr_tce_get_iommu(tcet)); + } + } + + remove_drcs(sphb, phb->bus); + + for (i = PCI_NUM_PINS - 1; i >= 0; i--) { + if (sphb->lsi_table[i].irq) { + spapr_irq_free(spapr, sphb->lsi_table[i].irq, 1); + sphb->lsi_table[i].irq = 0; + } + } + + QLIST_REMOVE(sphb, list); + + memory_region_del_subregion(&sphb->iommu_root, &sphb->msiwindow); + + /* + * An attached PCI device may have memory listeners, eg. VFIO PCI. We have + * unmapped all sections. Remove the listeners now, before destroying the + * address space. + */ + address_space_remove_listeners(&sphb->iommu_as); + address_space_destroy(&sphb->iommu_as); + + qbus_set_hotplug_handler(BUS(phb->bus), NULL); + pci_unregister_root_bus(phb->bus); + + memory_region_del_subregion(get_system_memory(), &sphb->iowindow); + if (sphb->mem64_win_pciaddr != (hwaddr)-1) { + memory_region_del_subregion(get_system_memory(), &sphb->mem64window); + } + memory_region_del_subregion(get_system_memory(), &sphb->mem32window); +} + +static void spapr_phb_destroy_msi(gpointer opaque) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); + SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); + SpaprPciMsi *msi = opaque; + + if (!smc->legacy_irq_allocation) { + spapr_irq_msi_free(spapr, msi->first_irq, msi->num); + } + spapr_irq_free(spapr, msi->first_irq, msi->num); + g_free(msi); +} + +static void spapr_phb_realize(DeviceState *dev, Error **errp) +{ + ERRP_GUARD(); + /* We don't use SPAPR_MACHINE() in order to exit gracefully if the user + * tries to add a sPAPR PHB to a non-pseries machine. + */ + SpaprMachineState *spapr = + (SpaprMachineState *) object_dynamic_cast(qdev_get_machine(), + TYPE_SPAPR_MACHINE); + SpaprMachineClass *smc = spapr ? SPAPR_MACHINE_GET_CLASS(spapr) : NULL; + SysBusDevice *s = SYS_BUS_DEVICE(dev); + SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(s); + PCIHostState *phb = PCI_HOST_BRIDGE(s); + MachineState *ms = MACHINE(spapr); + char *namebuf; + int i; + PCIBus *bus; + uint64_t msi_window_size = 4096; + SpaprTceTable *tcet; + const unsigned windows_supported = spapr_phb_windows_supported(sphb); + + if (!spapr) { + error_setg(errp, TYPE_SPAPR_PCI_HOST_BRIDGE " needs a pseries machine"); + return; + } + + assert(sphb->index != (uint32_t)-1); /* checked in spapr_phb_pre_plug() */ + + if (sphb->mem64_win_size != 0) { + if (sphb->mem_win_size > SPAPR_PCI_MEM32_WIN_SIZE) { + error_setg(errp, "32-bit memory window of size 0x%"HWADDR_PRIx + " (max 2 GiB)", sphb->mem_win_size); + return; + } + + /* 64-bit window defaults to identity mapping */ + sphb->mem64_win_pciaddr = sphb->mem64_win_addr; + } else if (sphb->mem_win_size > SPAPR_PCI_MEM32_WIN_SIZE) { + /* + * For compatibility with old configuration, if no 64-bit MMIO + * window is specified, but the ordinary (32-bit) memory + * window is specified as > 2GiB, we treat it as a 2GiB 32-bit + * window, with a 64-bit MMIO window following on immediately + * afterwards + */ + sphb->mem64_win_size = sphb->mem_win_size - SPAPR_PCI_MEM32_WIN_SIZE; + sphb->mem64_win_addr = sphb->mem_win_addr + SPAPR_PCI_MEM32_WIN_SIZE; + sphb->mem64_win_pciaddr = + SPAPR_PCI_MEM_WIN_BUS_OFFSET + SPAPR_PCI_MEM32_WIN_SIZE; + sphb->mem_win_size = SPAPR_PCI_MEM32_WIN_SIZE; + } + + if (spapr_pci_find_phb(spapr, sphb->buid)) { + SpaprPhbState *s; + + error_setg(errp, "PCI host bridges must have unique indexes"); + error_append_hint(errp, "The following indexes are already in use:"); + QLIST_FOREACH(s, &spapr->phbs, list) { + error_append_hint(errp, " %d", s->index); + } + error_append_hint(errp, "\nTry another value for the index property\n"); + return; + } + + if (sphb->numa_node != -1 && + (sphb->numa_node >= MAX_NODES || + !ms->numa_state->nodes[sphb->numa_node].present)) { + error_setg(errp, "Invalid NUMA node ID for PCI host bridge"); + return; + } + + sphb->dtbusname = g_strdup_printf("pci@%" PRIx64, sphb->buid); + + /* Initialize memory regions */ + namebuf = g_strdup_printf("%s.mmio", sphb->dtbusname); + memory_region_init(&sphb->memspace, OBJECT(sphb), namebuf, UINT64_MAX); + g_free(namebuf); + + namebuf = g_strdup_printf("%s.mmio32-alias", sphb->dtbusname); + memory_region_init_alias(&sphb->mem32window, OBJECT(sphb), + namebuf, &sphb->memspace, + SPAPR_PCI_MEM_WIN_BUS_OFFSET, sphb->mem_win_size); + g_free(namebuf); + memory_region_add_subregion(get_system_memory(), sphb->mem_win_addr, + &sphb->mem32window); + + if (sphb->mem64_win_size != 0) { + namebuf = g_strdup_printf("%s.mmio64-alias", sphb->dtbusname); + memory_region_init_alias(&sphb->mem64window, OBJECT(sphb), + namebuf, &sphb->memspace, + sphb->mem64_win_pciaddr, sphb->mem64_win_size); + g_free(namebuf); + + memory_region_add_subregion(get_system_memory(), + sphb->mem64_win_addr, + &sphb->mem64window); + } + + /* Initialize IO regions */ + namebuf = g_strdup_printf("%s.io", sphb->dtbusname); + memory_region_init(&sphb->iospace, OBJECT(sphb), + namebuf, SPAPR_PCI_IO_WIN_SIZE); + g_free(namebuf); + + namebuf = g_strdup_printf("%s.io-alias", sphb->dtbusname); + memory_region_init_alias(&sphb->iowindow, OBJECT(sphb), namebuf, + &sphb->iospace, 0, SPAPR_PCI_IO_WIN_SIZE); + g_free(namebuf); + memory_region_add_subregion(get_system_memory(), sphb->io_win_addr, + &sphb->iowindow); + + bus = pci_register_root_bus(dev, NULL, + pci_spapr_set_irq, pci_swizzle_map_irq_fn, sphb, + &sphb->memspace, &sphb->iospace, + PCI_DEVFN(0, 0), PCI_NUM_PINS, + TYPE_PCI_BUS); + + /* + * Despite resembling a vanilla PCI bus in most ways, the PAPR + * para-virtualized PCI bus *does* permit PCI-E extended config + * space access + */ + if (sphb->pcie_ecs) { + bus->flags |= PCI_BUS_EXTENDED_CONFIG_SPACE; + } + phb->bus = bus; + qbus_set_hotplug_handler(BUS(phb->bus), OBJECT(sphb)); + + /* + * Initialize PHB address space. + * By default there will be at least one subregion for default + * 32bit DMA window. + * Later the guest might want to create another DMA window + * which will become another memory subregion. + */ + namebuf = g_strdup_printf("%s.iommu-root", sphb->dtbusname); + memory_region_init(&sphb->iommu_root, OBJECT(sphb), + namebuf, UINT64_MAX); + g_free(namebuf); + address_space_init(&sphb->iommu_as, &sphb->iommu_root, + sphb->dtbusname); + + /* + * As MSI/MSIX interrupts trigger by writing at MSI/MSIX vectors, + * we need to allocate some memory to catch those writes coming + * from msi_notify()/msix_notify(). + * As MSIMessage:addr is going to be the same and MSIMessage:data + * is going to be a VIRQ number, 4 bytes of the MSI MR will only + * be used. + * + * For KVM we want to ensure that this memory is a full page so that + * our memory slot is of page size granularity. + */ + if (kvm_enabled()) { + msi_window_size = qemu_real_host_page_size; + } + + memory_region_init_io(&sphb->msiwindow, OBJECT(sphb), &spapr_msi_ops, spapr, + "msi", msi_window_size); + memory_region_add_subregion(&sphb->iommu_root, SPAPR_PCI_MSI_WINDOW, + &sphb->msiwindow); + + pci_setup_iommu(bus, spapr_pci_dma_iommu, sphb); + + pci_bus_set_route_irq_fn(bus, spapr_route_intx_pin_to_irq); + + QLIST_INSERT_HEAD(&spapr->phbs, sphb, list); + + /* Initialize the LSI table */ + for (i = 0; i < PCI_NUM_PINS; i++) { + int irq = SPAPR_IRQ_PCI_LSI + sphb->index * PCI_NUM_PINS + i; + + if (smc->legacy_irq_allocation) { + irq = spapr_irq_findone(spapr, errp); + if (irq < 0) { + error_prepend(errp, "can't allocate LSIs: "); + /* + * Older machines will never support PHB hotplug, ie, this is an + * init only path and QEMU will terminate. No need to rollback. + */ + return; + } + } + + if (spapr_irq_claim(spapr, irq, true, errp) < 0) { + error_prepend(errp, "can't allocate LSIs: "); + goto unrealize; + } + + sphb->lsi_table[i].irq = irq; + } + + /* allocate connectors for child PCI devices */ + add_drcs(sphb, phb->bus); + + /* DMA setup */ + for (i = 0; i < windows_supported; ++i) { + tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn[i]); + if (!tcet) { + error_setg(errp, "Creating window#%d failed for %s", + i, sphb->dtbusname); + goto unrealize; + } + memory_region_add_subregion(&sphb->iommu_root, 0, + spapr_tce_get_iommu(tcet)); + } + + sphb->msi = g_hash_table_new_full(g_int_hash, g_int_equal, g_free, + spapr_phb_destroy_msi); + return; + +unrealize: + spapr_phb_unrealize(dev); +} + +static int spapr_phb_children_reset(Object *child, void *opaque) +{ + DeviceState *dev = (DeviceState *) object_dynamic_cast(child, TYPE_DEVICE); + + if (dev) { + device_legacy_reset(dev); + } + + return 0; +} + +void spapr_phb_dma_reset(SpaprPhbState *sphb) +{ + int i; + SpaprTceTable *tcet; + + for (i = 0; i < SPAPR_PCI_DMA_MAX_WINDOWS; ++i) { + tcet = spapr_tce_find_by_liobn(sphb->dma_liobn[i]); + + if (tcet && tcet->nb_table) { + spapr_tce_table_disable(tcet); + } + } + + /* Register default 32bit DMA window */ + tcet = spapr_tce_find_by_liobn(sphb->dma_liobn[0]); + spapr_tce_table_enable(tcet, SPAPR_TCE_PAGE_SHIFT, sphb->dma_win_addr, + sphb->dma_win_size >> SPAPR_TCE_PAGE_SHIFT); +} + +static void spapr_phb_reset(DeviceState *qdev) +{ + SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(qdev); + Error *err = NULL; + + spapr_phb_dma_reset(sphb); + spapr_phb_nvgpu_free(sphb); + spapr_phb_nvgpu_setup(sphb, &err); + if (err) { + error_report_err(err); + } + + /* Reset the IOMMU state */ + object_child_foreach(OBJECT(qdev), spapr_phb_children_reset, NULL); + + if (spapr_phb_eeh_available(SPAPR_PCI_HOST_BRIDGE(qdev))) { + spapr_phb_vfio_reset(qdev); + } + + g_hash_table_remove_all(sphb->msi); +} + +static Property spapr_phb_properties[] = { + DEFINE_PROP_UINT32("index", SpaprPhbState, index, -1), + DEFINE_PROP_UINT64("mem_win_size", SpaprPhbState, mem_win_size, + SPAPR_PCI_MEM32_WIN_SIZE), + DEFINE_PROP_UINT64("mem64_win_size", SpaprPhbState, mem64_win_size, + SPAPR_PCI_MEM64_WIN_SIZE), + DEFINE_PROP_UINT64("io_win_size", SpaprPhbState, io_win_size, + SPAPR_PCI_IO_WIN_SIZE), + DEFINE_PROP_BOOL("dynamic-reconfiguration", SpaprPhbState, dr_enabled, + true), + /* Default DMA window is 0..1GB */ + DEFINE_PROP_UINT64("dma_win_addr", SpaprPhbState, dma_win_addr, 0), + DEFINE_PROP_UINT64("dma_win_size", SpaprPhbState, dma_win_size, 0x40000000), + DEFINE_PROP_UINT64("dma64_win_addr", SpaprPhbState, dma64_win_addr, + 0x800000000000000ULL), + DEFINE_PROP_BOOL("ddw", SpaprPhbState, ddw_enabled, true), + DEFINE_PROP_UINT64("pgsz", SpaprPhbState, page_size_mask, + (1ULL << 12) | (1ULL << 16) + | (1ULL << 21) | (1ULL << 24)), + DEFINE_PROP_UINT32("numa_node", SpaprPhbState, numa_node, -1), + DEFINE_PROP_BOOL("pre-2.8-migration", SpaprPhbState, + pre_2_8_migration, false), + DEFINE_PROP_BOOL("pcie-extended-configuration-space", SpaprPhbState, + pcie_ecs, true), + DEFINE_PROP_UINT64("gpa", SpaprPhbState, nv2_gpa_win_addr, 0), + DEFINE_PROP_UINT64("atsd", SpaprPhbState, nv2_atsd_win_addr, 0), + DEFINE_PROP_BOOL("pre-5.1-associativity", SpaprPhbState, + pre_5_1_assoc, false), + DEFINE_PROP_END_OF_LIST(), +}; + +static const VMStateDescription vmstate_spapr_pci_lsi = { + .name = "spapr_pci/lsi", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_UINT32_EQUAL(irq, SpaprPciLsi, NULL), + + VMSTATE_END_OF_LIST() + }, +}; + +static const VMStateDescription vmstate_spapr_pci_msi = { + .name = "spapr_pci/msi", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField []) { + VMSTATE_UINT32(key, SpaprPciMsiMig), + VMSTATE_UINT32(value.first_irq, SpaprPciMsiMig), + VMSTATE_UINT32(value.num, SpaprPciMsiMig), + VMSTATE_END_OF_LIST() + }, +}; + +static int spapr_pci_pre_save(void *opaque) +{ + SpaprPhbState *sphb = opaque; + GHashTableIter iter; + gpointer key, value; + int i; + + if (sphb->pre_2_8_migration) { + sphb->mig_liobn = sphb->dma_liobn[0]; + sphb->mig_mem_win_addr = sphb->mem_win_addr; + sphb->mig_mem_win_size = sphb->mem_win_size; + sphb->mig_io_win_addr = sphb->io_win_addr; + sphb->mig_io_win_size = sphb->io_win_size; + + if ((sphb->mem64_win_size != 0) + && (sphb->mem64_win_addr + == (sphb->mem_win_addr + sphb->mem_win_size))) { + sphb->mig_mem_win_size += sphb->mem64_win_size; + } + } + + g_free(sphb->msi_devs); + sphb->msi_devs = NULL; + sphb->msi_devs_num = g_hash_table_size(sphb->msi); + if (!sphb->msi_devs_num) { + return 0; + } + sphb->msi_devs = g_new(SpaprPciMsiMig, sphb->msi_devs_num); + + g_hash_table_iter_init(&iter, sphb->msi); + for (i = 0; g_hash_table_iter_next(&iter, &key, &value); ++i) { + sphb->msi_devs[i].key = *(uint32_t *) key; + sphb->msi_devs[i].value = *(SpaprPciMsi *) value; + } + + return 0; +} + +static int spapr_pci_post_save(void *opaque) +{ + SpaprPhbState *sphb = opaque; + + g_free(sphb->msi_devs); + sphb->msi_devs = NULL; + sphb->msi_devs_num = 0; + return 0; +} + +static int spapr_pci_post_load(void *opaque, int version_id) +{ + SpaprPhbState *sphb = opaque; + gpointer key, value; + int i; + + for (i = 0; i < sphb->msi_devs_num; ++i) { + key = g_memdup(&sphb->msi_devs[i].key, + sizeof(sphb->msi_devs[i].key)); + value = g_memdup(&sphb->msi_devs[i].value, + sizeof(sphb->msi_devs[i].value)); + g_hash_table_insert(sphb->msi, key, value); + } + g_free(sphb->msi_devs); + sphb->msi_devs = NULL; + sphb->msi_devs_num = 0; + + return 0; +} + +static bool pre_2_8_migration(void *opaque, int version_id) +{ + SpaprPhbState *sphb = opaque; + + return sphb->pre_2_8_migration; +} + +static const VMStateDescription vmstate_spapr_pci = { + .name = "spapr_pci", + .version_id = 2, + .minimum_version_id = 2, + .pre_save = spapr_pci_pre_save, + .post_save = spapr_pci_post_save, + .post_load = spapr_pci_post_load, + .fields = (VMStateField[]) { + VMSTATE_UINT64_EQUAL(buid, SpaprPhbState, NULL), + VMSTATE_UINT32_TEST(mig_liobn, SpaprPhbState, pre_2_8_migration), + VMSTATE_UINT64_TEST(mig_mem_win_addr, SpaprPhbState, pre_2_8_migration), + VMSTATE_UINT64_TEST(mig_mem_win_size, SpaprPhbState, pre_2_8_migration), + VMSTATE_UINT64_TEST(mig_io_win_addr, SpaprPhbState, pre_2_8_migration), + VMSTATE_UINT64_TEST(mig_io_win_size, SpaprPhbState, pre_2_8_migration), + VMSTATE_STRUCT_ARRAY(lsi_table, SpaprPhbState, PCI_NUM_PINS, 0, + vmstate_spapr_pci_lsi, SpaprPciLsi), + VMSTATE_INT32(msi_devs_num, SpaprPhbState), + VMSTATE_STRUCT_VARRAY_ALLOC(msi_devs, SpaprPhbState, msi_devs_num, 0, + vmstate_spapr_pci_msi, SpaprPciMsiMig), + VMSTATE_END_OF_LIST() + }, +}; + +static const char *spapr_phb_root_bus_path(PCIHostState *host_bridge, + PCIBus *rootbus) +{ + SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(host_bridge); + + return sphb->dtbusname; +} + +static void spapr_phb_class_init(ObjectClass *klass, void *data) +{ + PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass); + DeviceClass *dc = DEVICE_CLASS(klass); + HotplugHandlerClass *hp = HOTPLUG_HANDLER_CLASS(klass); + + hc->root_bus_path = spapr_phb_root_bus_path; + dc->realize = spapr_phb_realize; + dc->unrealize = spapr_phb_unrealize; + device_class_set_props(dc, spapr_phb_properties); + dc->reset = spapr_phb_reset; + dc->vmsd = &vmstate_spapr_pci; + /* Supported by TYPE_SPAPR_MACHINE */ + dc->user_creatable = true; + set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); + hp->pre_plug = spapr_pci_pre_plug; + hp->plug = spapr_pci_plug; + hp->unplug = spapr_pci_unplug; + hp->unplug_request = spapr_pci_unplug_request; +} + +static const TypeInfo spapr_phb_info = { + .name = TYPE_SPAPR_PCI_HOST_BRIDGE, + .parent = TYPE_PCI_HOST_BRIDGE, + .instance_size = sizeof(SpaprPhbState), + .instance_finalize = spapr_phb_finalizefn, + .class_init = spapr_phb_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_HOTPLUG_HANDLER }, + { } + } +}; + +static void spapr_phb_pci_enumerate_bridge(PCIBus *bus, PCIDevice *pdev, + void *opaque) +{ + unsigned int *bus_no = opaque; + PCIBus *sec_bus = NULL; + + if ((pci_default_read_config(pdev, PCI_HEADER_TYPE, 1) != + PCI_HEADER_TYPE_BRIDGE)) { + return; + } + + (*bus_no)++; + pci_default_write_config(pdev, PCI_PRIMARY_BUS, pci_dev_bus_num(pdev), 1); + pci_default_write_config(pdev, PCI_SECONDARY_BUS, *bus_no, 1); + pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, *bus_no, 1); + + sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(pdev)); + if (!sec_bus) { + return; + } + + pci_for_each_device_under_bus(sec_bus, spapr_phb_pci_enumerate_bridge, + bus_no); + pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, *bus_no, 1); +} + +static void spapr_phb_pci_enumerate(SpaprPhbState *phb) +{ + PCIBus *bus = PCI_HOST_BRIDGE(phb)->bus; + unsigned int bus_no = 0; + + pci_for_each_device_under_bus(bus, spapr_phb_pci_enumerate_bridge, + &bus_no); + +} + +int spapr_dt_phb(SpaprMachineState *spapr, SpaprPhbState *phb, + uint32_t intc_phandle, void *fdt, int *node_offset) +{ + int bus_off, i, j, ret; + uint32_t bus_range[] = { cpu_to_be32(0), cpu_to_be32(0xff) }; + struct { + uint32_t hi; + uint64_t child; + uint64_t parent; + uint64_t size; + } QEMU_PACKED ranges[] = { + { + cpu_to_be32(b_ss(1)), cpu_to_be64(0), + cpu_to_be64(phb->io_win_addr), + cpu_to_be64(memory_region_size(&phb->iospace)), + }, + { + cpu_to_be32(b_ss(2)), cpu_to_be64(SPAPR_PCI_MEM_WIN_BUS_OFFSET), + cpu_to_be64(phb->mem_win_addr), + cpu_to_be64(phb->mem_win_size), + }, + { + cpu_to_be32(b_ss(3)), cpu_to_be64(phb->mem64_win_pciaddr), + cpu_to_be64(phb->mem64_win_addr), + cpu_to_be64(phb->mem64_win_size), + }, + }; + const unsigned sizeof_ranges = + (phb->mem64_win_size ? 3 : 2) * sizeof(ranges[0]); + uint64_t bus_reg[] = { cpu_to_be64(phb->buid), 0 }; + uint32_t interrupt_map_mask[] = { + cpu_to_be32(b_ddddd(-1)|b_fff(0)), 0x0, 0x0, cpu_to_be32(-1)}; + uint32_t interrupt_map[PCI_SLOT_MAX * PCI_NUM_PINS][7]; + uint32_t ddw_applicable[] = { + cpu_to_be32(RTAS_IBM_QUERY_PE_DMA_WINDOW), + cpu_to_be32(RTAS_IBM_CREATE_PE_DMA_WINDOW), + cpu_to_be32(RTAS_IBM_REMOVE_PE_DMA_WINDOW) + }; + uint32_t ddw_extensions[] = { + cpu_to_be32(1), + cpu_to_be32(RTAS_IBM_RESET_PE_DMA_WINDOW) + }; + SpaprTceTable *tcet; + SpaprDrc *drc; + Error *err = NULL; + + /* Start populating the FDT */ + _FDT(bus_off = fdt_add_subnode(fdt, 0, phb->dtbusname)); + if (node_offset) { + *node_offset = bus_off; + } + + /* Write PHB properties */ + _FDT(fdt_setprop_string(fdt, bus_off, "device_type", "pci")); + _FDT(fdt_setprop_string(fdt, bus_off, "compatible", "IBM,Logical_PHB")); + _FDT(fdt_setprop_cell(fdt, bus_off, "#interrupt-cells", 0x1)); + _FDT(fdt_setprop(fdt, bus_off, "used-by-rtas", NULL, 0)); + _FDT(fdt_setprop(fdt, bus_off, "bus-range", &bus_range, sizeof(bus_range))); + _FDT(fdt_setprop(fdt, bus_off, "ranges", &ranges, sizeof_ranges)); + _FDT(fdt_setprop(fdt, bus_off, "reg", &bus_reg, sizeof(bus_reg))); + _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pci-config-space-type", 0x1)); + _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pe-total-#msi", + spapr_irq_nr_msis(spapr))); + + /* Dynamic DMA window */ + if (phb->ddw_enabled) { + _FDT(fdt_setprop(fdt, bus_off, "ibm,ddw-applicable", &ddw_applicable, + sizeof(ddw_applicable))); + _FDT(fdt_setprop(fdt, bus_off, "ibm,ddw-extensions", + &ddw_extensions, sizeof(ddw_extensions))); + } + + /* Advertise NUMA via ibm,associativity */ + if (phb->numa_node != -1) { + spapr_numa_write_associativity_dt(spapr, fdt, bus_off, phb->numa_node); + } + + /* Build the interrupt-map, this must matches what is done + * in pci_swizzle_map_irq_fn + */ + _FDT(fdt_setprop(fdt, bus_off, "interrupt-map-mask", + &interrupt_map_mask, sizeof(interrupt_map_mask))); + for (i = 0; i < PCI_SLOT_MAX; i++) { + for (j = 0; j < PCI_NUM_PINS; j++) { + uint32_t *irqmap = interrupt_map[i*PCI_NUM_PINS + j]; + int lsi_num = pci_swizzle(i, j); + + irqmap[0] = cpu_to_be32(b_ddddd(i)|b_fff(0)); + irqmap[1] = 0; + irqmap[2] = 0; + irqmap[3] = cpu_to_be32(j+1); + irqmap[4] = cpu_to_be32(intc_phandle); + spapr_dt_irq(&irqmap[5], phb->lsi_table[lsi_num].irq, true); + } + } + /* Write interrupt map */ + _FDT(fdt_setprop(fdt, bus_off, "interrupt-map", &interrupt_map, + sizeof(interrupt_map))); + + tcet = spapr_tce_find_by_liobn(phb->dma_liobn[0]); + if (!tcet) { + return -1; + } + spapr_dma_dt(fdt, bus_off, "ibm,dma-window", + tcet->liobn, tcet->bus_offset, + tcet->nb_table << tcet->page_shift); + + drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PHB, phb->index); + if (drc) { + uint32_t drc_index = cpu_to_be32(spapr_drc_index(drc)); + + _FDT(fdt_setprop(fdt, bus_off, "ibm,my-drc-index", &drc_index, + sizeof(drc_index))); + } + + /* Walk the bridges and program the bus numbers*/ + spapr_phb_pci_enumerate(phb); + _FDT(fdt_setprop_cell(fdt, bus_off, "qemu,phb-enumerated", 0x1)); + + /* Walk the bridge and subordinate buses */ + ret = spapr_dt_pci_bus(phb, PCI_HOST_BRIDGE(phb)->bus, fdt, bus_off); + if (ret < 0) { + return ret; + } + + spapr_phb_nvgpu_populate_dt(phb, fdt, bus_off, &err); + if (err) { + error_report_err(err); + } + spapr_phb_nvgpu_ram_populate_dt(phb, fdt); + + return 0; +} + +void spapr_pci_rtas_init(void) +{ + spapr_rtas_register(RTAS_READ_PCI_CONFIG, "read-pci-config", + rtas_read_pci_config); + spapr_rtas_register(RTAS_WRITE_PCI_CONFIG, "write-pci-config", + rtas_write_pci_config); + spapr_rtas_register(RTAS_IBM_READ_PCI_CONFIG, "ibm,read-pci-config", + rtas_ibm_read_pci_config); + spapr_rtas_register(RTAS_IBM_WRITE_PCI_CONFIG, "ibm,write-pci-config", + rtas_ibm_write_pci_config); + if (msi_nonbroken) { + spapr_rtas_register(RTAS_IBM_QUERY_INTERRUPT_SOURCE_NUMBER, + "ibm,query-interrupt-source-number", + rtas_ibm_query_interrupt_source_number); + spapr_rtas_register(RTAS_IBM_CHANGE_MSI, "ibm,change-msi", + rtas_ibm_change_msi); + } + + spapr_rtas_register(RTAS_IBM_SET_EEH_OPTION, + "ibm,set-eeh-option", + rtas_ibm_set_eeh_option); + spapr_rtas_register(RTAS_IBM_GET_CONFIG_ADDR_INFO2, + "ibm,get-config-addr-info2", + rtas_ibm_get_config_addr_info2); + spapr_rtas_register(RTAS_IBM_READ_SLOT_RESET_STATE2, + "ibm,read-slot-reset-state2", + rtas_ibm_read_slot_reset_state2); + spapr_rtas_register(RTAS_IBM_SET_SLOT_RESET, + "ibm,set-slot-reset", + rtas_ibm_set_slot_reset); + spapr_rtas_register(RTAS_IBM_CONFIGURE_PE, + "ibm,configure-pe", + rtas_ibm_configure_pe); + spapr_rtas_register(RTAS_IBM_SLOT_ERROR_DETAIL, + "ibm,slot-error-detail", + rtas_ibm_slot_error_detail); +} + +static void spapr_pci_register_types(void) +{ + type_register_static(&spapr_phb_info); +} + +type_init(spapr_pci_register_types) + +static int spapr_switch_one_vga(DeviceState *dev, void *opaque) +{ + bool be = *(bool *)opaque; + + if (object_dynamic_cast(OBJECT(dev), "VGA") + || object_dynamic_cast(OBJECT(dev), "secondary-vga") + || object_dynamic_cast(OBJECT(dev), "bochs-display") + || object_dynamic_cast(OBJECT(dev), "virtio-vga")) { + object_property_set_bool(OBJECT(dev), "big-endian-framebuffer", be, + &error_abort); + } + return 0; +} + +void spapr_pci_switch_vga(SpaprMachineState *spapr, bool big_endian) +{ + SpaprPhbState *sphb; + + /* + * For backward compatibility with existing guests, we switch + * the endianness of the VGA controller when changing the guest + * interrupt mode + */ + QLIST_FOREACH(sphb, &spapr->phbs, list) { + BusState *bus = &PCI_HOST_BRIDGE(sphb)->bus->qbus; + qbus_walk_children(bus, spapr_switch_one_vga, NULL, NULL, NULL, + &big_endian); + } +} diff --git a/hw/ppc/spapr_pci_nvlink2.c b/hw/ppc/spapr_pci_nvlink2.c new file mode 100644 index 000000000..7fb0cf4d0 --- /dev/null +++ b/hw/ppc/spapr_pci_nvlink2.c @@ -0,0 +1,445 @@ +/* + * QEMU sPAPR PCI for NVLink2 pass through + * + * Copyright (c) 2019 Alexey Kardashevskiy, IBM Corporation. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu-common.h" +#include "hw/pci/pci.h" +#include "hw/pci-host/spapr.h" +#include "hw/ppc/spapr_numa.h" +#include "qemu/error-report.h" +#include "hw/ppc/fdt.h" +#include "hw/pci/pci_bridge.h" + +#define PHANDLE_PCIDEV(phb, pdev) (0x12000000 | \ + (((phb)->index) << 16) | ((pdev)->devfn)) +#define PHANDLE_GPURAM(phb, n) (0x110000FF | ((n) << 8) | \ + (((phb)->index) << 16)) +#define PHANDLE_NVLINK(phb, gn, nn) (0x00130000 | (((phb)->index) << 8) | \ + ((gn) << 4) | (nn)) + +typedef struct SpaprPhbPciNvGpuSlot { + uint64_t tgt; + uint64_t gpa; + unsigned numa_id; + PCIDevice *gpdev; + int linknum; + struct { + uint64_t atsd_gpa; + PCIDevice *npdev; + uint32_t link_speed; + } links[NVGPU_MAX_LINKS]; +} SpaprPhbPciNvGpuSlot; + +struct SpaprPhbPciNvGpuConfig { + uint64_t nv2_ram_current; + uint64_t nv2_atsd_current; + int num; /* number of non empty (i.e. tgt!=0) entries in slots[] */ + SpaprPhbPciNvGpuSlot slots[NVGPU_MAX_NUM]; + Error *err; +}; + +static SpaprPhbPciNvGpuSlot * +spapr_nvgpu_get_slot(SpaprPhbPciNvGpuConfig *nvgpus, uint64_t tgt) +{ + int i; + + /* Search for partially collected "slot" */ + for (i = 0; i < nvgpus->num; ++i) { + if (nvgpus->slots[i].tgt == tgt) { + return &nvgpus->slots[i]; + } + } + + if (nvgpus->num == ARRAY_SIZE(nvgpus->slots)) { + return NULL; + } + + i = nvgpus->num; + nvgpus->slots[i].tgt = tgt; + ++nvgpus->num; + + return &nvgpus->slots[i]; +} + +static void spapr_pci_collect_nvgpu(SpaprPhbPciNvGpuConfig *nvgpus, + PCIDevice *pdev, uint64_t tgt, + MemoryRegion *mr, Error **errp) +{ + MachineState *machine = MACHINE(qdev_get_machine()); + SpaprMachineState *spapr = SPAPR_MACHINE(machine); + SpaprPhbPciNvGpuSlot *nvslot = spapr_nvgpu_get_slot(nvgpus, tgt); + + if (!nvslot) { + error_setg(errp, "Found too many GPUs per vPHB"); + return; + } + g_assert(!nvslot->gpdev); + nvslot->gpdev = pdev; + + nvslot->gpa = nvgpus->nv2_ram_current; + nvgpus->nv2_ram_current += memory_region_size(mr); + nvslot->numa_id = spapr->gpu_numa_id; + ++spapr->gpu_numa_id; +} + +static void spapr_pci_collect_nvnpu(SpaprPhbPciNvGpuConfig *nvgpus, + PCIDevice *pdev, uint64_t tgt, + MemoryRegion *mr, Error **errp) +{ + SpaprPhbPciNvGpuSlot *nvslot = spapr_nvgpu_get_slot(nvgpus, tgt); + int j; + + if (!nvslot) { + error_setg(errp, "Found too many NVLink bridges per vPHB"); + return; + } + + j = nvslot->linknum; + if (j == ARRAY_SIZE(nvslot->links)) { + error_setg(errp, "Found too many NVLink bridges per GPU"); + return; + } + ++nvslot->linknum; + + g_assert(!nvslot->links[j].npdev); + nvslot->links[j].npdev = pdev; + nvslot->links[j].atsd_gpa = nvgpus->nv2_atsd_current; + nvgpus->nv2_atsd_current += memory_region_size(mr); + nvslot->links[j].link_speed = + object_property_get_uint(OBJECT(pdev), "nvlink2-link-speed", NULL); +} + +static void spapr_phb_pci_collect_nvgpu(PCIBus *bus, PCIDevice *pdev, + void *opaque) +{ + PCIBus *sec_bus; + Object *po = OBJECT(pdev); + uint64_t tgt = object_property_get_uint(po, "nvlink2-tgt", NULL); + + if (tgt) { + Error *local_err = NULL; + SpaprPhbPciNvGpuConfig *nvgpus = opaque; + Object *mr_gpu = object_property_get_link(po, "nvlink2-mr[0]", NULL); + Object *mr_npu = object_property_get_link(po, "nvlink2-atsd-mr[0]", + NULL); + + g_assert(mr_gpu || mr_npu); + if (mr_gpu) { + spapr_pci_collect_nvgpu(nvgpus, pdev, tgt, MEMORY_REGION(mr_gpu), + &local_err); + } else { + spapr_pci_collect_nvnpu(nvgpus, pdev, tgt, MEMORY_REGION(mr_npu), + &local_err); + } + error_propagate(&nvgpus->err, local_err); + } + if ((pci_default_read_config(pdev, PCI_HEADER_TYPE, 1) != + PCI_HEADER_TYPE_BRIDGE)) { + return; + } + + sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(pdev)); + if (!sec_bus) { + return; + } + + pci_for_each_device_under_bus(sec_bus, spapr_phb_pci_collect_nvgpu, opaque); +} + +void spapr_phb_nvgpu_setup(SpaprPhbState *sphb, Error **errp) +{ + int i, j, valid_gpu_num; + PCIBus *bus; + + /* Search for GPUs and NPUs */ + if (!sphb->nv2_gpa_win_addr || !sphb->nv2_atsd_win_addr) { + return; + } + + sphb->nvgpus = g_new0(SpaprPhbPciNvGpuConfig, 1); + sphb->nvgpus->nv2_ram_current = sphb->nv2_gpa_win_addr; + sphb->nvgpus->nv2_atsd_current = sphb->nv2_atsd_win_addr; + + bus = PCI_HOST_BRIDGE(sphb)->bus; + pci_for_each_device_under_bus(bus, spapr_phb_pci_collect_nvgpu, + sphb->nvgpus); + + if (sphb->nvgpus->err) { + error_propagate(errp, sphb->nvgpus->err); + sphb->nvgpus->err = NULL; + goto cleanup_exit; + } + + /* Add found GPU RAM and ATSD MRs if found */ + for (i = 0, valid_gpu_num = 0; i < sphb->nvgpus->num; ++i) { + Object *nvmrobj; + SpaprPhbPciNvGpuSlot *nvslot = &sphb->nvgpus->slots[i]; + + if (!nvslot->gpdev) { + continue; + } + nvmrobj = object_property_get_link(OBJECT(nvslot->gpdev), + "nvlink2-mr[0]", NULL); + /* ATSD is pointless without GPU RAM MR so skip those */ + if (!nvmrobj) { + continue; + } + + ++valid_gpu_num; + memory_region_add_subregion(get_system_memory(), nvslot->gpa, + MEMORY_REGION(nvmrobj)); + + for (j = 0; j < nvslot->linknum; ++j) { + Object *atsdmrobj; + + atsdmrobj = object_property_get_link(OBJECT(nvslot->links[j].npdev), + "nvlink2-atsd-mr[0]", NULL); + if (!atsdmrobj) { + continue; + } + memory_region_add_subregion(get_system_memory(), + nvslot->links[j].atsd_gpa, + MEMORY_REGION(atsdmrobj)); + } + } + + if (valid_gpu_num) { + return; + } + /* We did not find any interesting GPU */ +cleanup_exit: + g_free(sphb->nvgpus); + sphb->nvgpus = NULL; +} + +void spapr_phb_nvgpu_free(SpaprPhbState *sphb) +{ + int i, j; + + if (!sphb->nvgpus) { + return; + } + + for (i = 0; i < sphb->nvgpus->num; ++i) { + SpaprPhbPciNvGpuSlot *nvslot = &sphb->nvgpus->slots[i]; + Object *nv_mrobj = object_property_get_link(OBJECT(nvslot->gpdev), + "nvlink2-mr[0]", NULL); + + if (nv_mrobj) { + memory_region_del_subregion(get_system_memory(), + MEMORY_REGION(nv_mrobj)); + } + for (j = 0; j < nvslot->linknum; ++j) { + PCIDevice *npdev = nvslot->links[j].npdev; + Object *atsd_mrobj; + atsd_mrobj = object_property_get_link(OBJECT(npdev), + "nvlink2-atsd-mr[0]", NULL); + if (atsd_mrobj) { + memory_region_del_subregion(get_system_memory(), + MEMORY_REGION(atsd_mrobj)); + } + } + } + g_free(sphb->nvgpus); + sphb->nvgpus = NULL; +} + +void spapr_phb_nvgpu_populate_dt(SpaprPhbState *sphb, void *fdt, int bus_off, + Error **errp) +{ + int i, j, atsdnum = 0; + uint64_t atsd[8]; /* The existing limitation of known guests */ + + if (!sphb->nvgpus) { + return; + } + + for (i = 0; (i < sphb->nvgpus->num) && (atsdnum < ARRAY_SIZE(atsd)); ++i) { + SpaprPhbPciNvGpuSlot *nvslot = &sphb->nvgpus->slots[i]; + + if (!nvslot->gpdev) { + continue; + } + for (j = 0; j < nvslot->linknum; ++j) { + if (!nvslot->links[j].atsd_gpa) { + continue; + } + + if (atsdnum == ARRAY_SIZE(atsd)) { + error_report("Only %"PRIuPTR" ATSD registers supported", + ARRAY_SIZE(atsd)); + break; + } + atsd[atsdnum] = cpu_to_be64(nvslot->links[j].atsd_gpa); + ++atsdnum; + } + } + + if (!atsdnum) { + error_setg(errp, "No ATSD registers found"); + return; + } + + if (!spapr_phb_eeh_available(sphb)) { + /* + * ibm,mmio-atsd contains ATSD registers; these belong to an NPU PHB + * which we do not emulate as a separate device. Instead we put + * ibm,mmio-atsd to the vPHB with GPU and make sure that we do not + * put GPUs from different IOMMU groups to the same vPHB to ensure + * that the guest will use ATSDs from the corresponding NPU. + */ + error_setg(errp, "ATSD requires separate vPHB per GPU IOMMU group"); + return; + } + + _FDT((fdt_setprop(fdt, bus_off, "ibm,mmio-atsd", atsd, + atsdnum * sizeof(atsd[0])))); +} + +void spapr_phb_nvgpu_ram_populate_dt(SpaprPhbState *sphb, void *fdt) +{ + int i, j, linkidx, npuoff; + char *npuname; + + if (!sphb->nvgpus) { + return; + } + + npuname = g_strdup_printf("npuphb%d", sphb->index); + npuoff = fdt_add_subnode(fdt, 0, npuname); + _FDT(npuoff); + _FDT(fdt_setprop_cell(fdt, npuoff, "#address-cells", 1)); + _FDT(fdt_setprop_cell(fdt, npuoff, "#size-cells", 0)); + /* Advertise NPU as POWER9 so the guest can enable NPU2 contexts */ + _FDT((fdt_setprop_string(fdt, npuoff, "compatible", "ibm,power9-npu"))); + g_free(npuname); + + for (i = 0, linkidx = 0; i < sphb->nvgpus->num; ++i) { + for (j = 0; j < sphb->nvgpus->slots[i].linknum; ++j) { + char *linkname = g_strdup_printf("link@%d", linkidx); + int off = fdt_add_subnode(fdt, npuoff, linkname); + + _FDT(off); + /* _FDT((fdt_setprop_cell(fdt, off, "reg", linkidx))); */ + _FDT((fdt_setprop_string(fdt, off, "compatible", + "ibm,npu-link"))); + _FDT((fdt_setprop_cell(fdt, off, "phandle", + PHANDLE_NVLINK(sphb, i, j)))); + _FDT((fdt_setprop_cell(fdt, off, "ibm,npu-link-index", linkidx))); + g_free(linkname); + ++linkidx; + } + } + + /* Add memory nodes for GPU RAM and mark them unusable */ + for (i = 0; i < sphb->nvgpus->num; ++i) { + SpaprPhbPciNvGpuSlot *nvslot = &sphb->nvgpus->slots[i]; + Object *nv_mrobj = object_property_get_link(OBJECT(nvslot->gpdev), + "nvlink2-mr[0]", + &error_abort); + uint64_t size = object_property_get_uint(nv_mrobj, "size", NULL); + uint64_t mem_reg[2] = { cpu_to_be64(nvslot->gpa), cpu_to_be64(size) }; + char *mem_name = g_strdup_printf("memory@%"PRIx64, nvslot->gpa); + int off = fdt_add_subnode(fdt, 0, mem_name); + + _FDT(off); + _FDT((fdt_setprop_string(fdt, off, "device_type", "memory"))); + _FDT((fdt_setprop(fdt, off, "reg", mem_reg, sizeof(mem_reg)))); + + spapr_numa_write_associativity_dt(SPAPR_MACHINE(qdev_get_machine()), + fdt, off, nvslot->numa_id); + + _FDT((fdt_setprop_string(fdt, off, "compatible", + "ibm,coherent-device-memory"))); + + mem_reg[1] = cpu_to_be64(0); + _FDT((fdt_setprop(fdt, off, "linux,usable-memory", mem_reg, + sizeof(mem_reg)))); + _FDT((fdt_setprop_cell(fdt, off, "phandle", + PHANDLE_GPURAM(sphb, i)))); + g_free(mem_name); + } + +} + +void spapr_phb_nvgpu_populate_pcidev_dt(PCIDevice *dev, void *fdt, int offset, + SpaprPhbState *sphb) +{ + int i, j; + + if (!sphb->nvgpus) { + return; + } + + for (i = 0; i < sphb->nvgpus->num; ++i) { + SpaprPhbPciNvGpuSlot *nvslot = &sphb->nvgpus->slots[i]; + + /* Skip "slot" without attached GPU */ + if (!nvslot->gpdev) { + continue; + } + if (dev == nvslot->gpdev) { + uint32_t npus[nvslot->linknum]; + + for (j = 0; j < nvslot->linknum; ++j) { + PCIDevice *npdev = nvslot->links[j].npdev; + + npus[j] = cpu_to_be32(PHANDLE_PCIDEV(sphb, npdev)); + } + _FDT(fdt_setprop(fdt, offset, "ibm,npu", npus, + j * sizeof(npus[0]))); + _FDT((fdt_setprop_cell(fdt, offset, "phandle", + PHANDLE_PCIDEV(sphb, dev)))); + continue; + } + + for (j = 0; j < nvslot->linknum; ++j) { + if (dev != nvslot->links[j].npdev) { + continue; + } + + _FDT((fdt_setprop_cell(fdt, offset, "phandle", + PHANDLE_PCIDEV(sphb, dev)))); + _FDT(fdt_setprop_cell(fdt, offset, "ibm,gpu", + PHANDLE_PCIDEV(sphb, nvslot->gpdev))); + _FDT((fdt_setprop_cell(fdt, offset, "ibm,nvlink", + PHANDLE_NVLINK(sphb, i, j)))); + /* + * If we ever want to emulate GPU RAM at the same location as on + * the host - here is the encoding GPA->TGT: + * + * gta = ((sphb->nv2_gpa >> 42) & 0x1) << 42; + * gta |= ((sphb->nv2_gpa >> 45) & 0x3) << 43; + * gta |= ((sphb->nv2_gpa >> 49) & 0x3) << 45; + * gta |= sphb->nv2_gpa & ((1UL << 43) - 1); + */ + _FDT(fdt_setprop_cell(fdt, offset, "memory-region", + PHANDLE_GPURAM(sphb, i))); + _FDT(fdt_setprop_u64(fdt, offset, "ibm,device-tgt-addr", + nvslot->tgt)); + _FDT(fdt_setprop_cell(fdt, offset, "ibm,nvlink-speed", + nvslot->links[j].link_speed)); + } + } +} diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c new file mode 100644 index 000000000..2a76b4e0b --- /dev/null +++ b/hw/ppc/spapr_pci_vfio.c @@ -0,0 +1,217 @@ +/* + * QEMU sPAPR PCI host for VFIO + * + * Copyright (c) 2011-2014 Alexey Kardashevskiy, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include <linux/vfio.h> +#include "hw/ppc/spapr.h" +#include "hw/pci-host/spapr.h" +#include "hw/pci/msix.h" +#include "hw/vfio/vfio.h" +#include "qemu/error-report.h" + +bool spapr_phb_eeh_available(SpaprPhbState *sphb) +{ + return vfio_eeh_as_ok(&sphb->iommu_as); +} + +static void spapr_phb_vfio_eeh_reenable(SpaprPhbState *sphb) +{ + vfio_eeh_as_op(&sphb->iommu_as, VFIO_EEH_PE_ENABLE); +} + +void spapr_phb_vfio_reset(DeviceState *qdev) +{ + /* + * The PE might be in frozen state. To reenable the EEH + * functionality on it will clean the frozen state, which + * ensures that the contained PCI devices will work properly + * after reboot. + */ + spapr_phb_vfio_eeh_reenable(SPAPR_PCI_HOST_BRIDGE(qdev)); +} + +static void spapr_eeh_pci_find_device(PCIBus *bus, PCIDevice *pdev, + void *opaque) +{ + bool *found = opaque; + + if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) { + *found = true; + } +} + +int spapr_phb_vfio_eeh_set_option(SpaprPhbState *sphb, + unsigned int addr, int option) +{ + uint32_t op; + int ret; + + switch (option) { + case RTAS_EEH_DISABLE: + op = VFIO_EEH_PE_DISABLE; + break; + case RTAS_EEH_ENABLE: { + PCIHostState *phb; + bool found = false; + + /* + * The EEH functionality is enabled per sphb level instead of + * per PCI device. We have already identified this specific sphb + * based on buid passed as argument to ibm,set-eeh-option rtas + * call. Now we just need to check the validity of the PCI + * pass-through devices (vfio-pci) under this sphb bus. + * We have already validated that all the devices under this sphb + * are from same iommu group (within same PE) before comming here. + * + * Prior to linux commit 98ba956f6a389 ("powerpc/pseries/eeh: + * Rework device EEH PE determination") kernel would call + * eeh-set-option for each device in the PE using the device's + * config_address as the argument rather than the PE address. + * Hence if we check validity of supplied config_addr whether + * it matches to this PHB will cause issues with older kernel + * versions v5.9 and older. If we return an error from + * eeh-set-option when the argument isn't a valid PE address + * then older kernels (v5.9 and older) will interpret that as + * EEH not being supported. + */ + phb = PCI_HOST_BRIDGE(sphb); + pci_for_each_device(phb->bus, (addr >> 16) & 0xFF, + spapr_eeh_pci_find_device, &found); + + if (!found) { + return RTAS_OUT_PARAM_ERROR; + } + + op = VFIO_EEH_PE_ENABLE; + break; + } + case RTAS_EEH_THAW_IO: + op = VFIO_EEH_PE_UNFREEZE_IO; + break; + case RTAS_EEH_THAW_DMA: + op = VFIO_EEH_PE_UNFREEZE_DMA; + break; + default: + return RTAS_OUT_PARAM_ERROR; + } + + ret = vfio_eeh_as_op(&sphb->iommu_as, op); + if (ret < 0) { + return RTAS_OUT_HW_ERROR; + } + + return RTAS_OUT_SUCCESS; +} + +int spapr_phb_vfio_eeh_get_state(SpaprPhbState *sphb, int *state) +{ + int ret; + + ret = vfio_eeh_as_op(&sphb->iommu_as, VFIO_EEH_PE_GET_STATE); + if (ret < 0) { + return RTAS_OUT_PARAM_ERROR; + } + + *state = ret; + return RTAS_OUT_SUCCESS; +} + +static void spapr_phb_vfio_eeh_clear_dev_msix(PCIBus *bus, + PCIDevice *pdev, + void *opaque) +{ + /* Check if the device is VFIO PCI device */ + if (!object_dynamic_cast(OBJECT(pdev), "vfio-pci")) { + return; + } + + /* + * The MSIx table will be cleaned out by reset. We need + * disable it so that it can be reenabled properly. Also, + * the cached MSIx table should be cleared as it's not + * reflecting the contents in hardware. + */ + if (msix_enabled(pdev)) { + uint16_t flags; + + flags = pci_host_config_read_common(pdev, + pdev->msix_cap + PCI_MSIX_FLAGS, + pci_config_size(pdev), 2); + flags &= ~PCI_MSIX_FLAGS_ENABLE; + pci_host_config_write_common(pdev, + pdev->msix_cap + PCI_MSIX_FLAGS, + pci_config_size(pdev), flags, 2); + } + + msix_reset(pdev); +} + +static void spapr_phb_vfio_eeh_clear_bus_msix(PCIBus *bus, void *opaque) +{ + pci_for_each_device_under_bus(bus, spapr_phb_vfio_eeh_clear_dev_msix, + NULL); +} + +static void spapr_phb_vfio_eeh_pre_reset(SpaprPhbState *sphb) +{ + PCIHostState *phb = PCI_HOST_BRIDGE(sphb); + + pci_for_each_bus(phb->bus, spapr_phb_vfio_eeh_clear_bus_msix, NULL); +} + +int spapr_phb_vfio_eeh_reset(SpaprPhbState *sphb, int option) +{ + uint32_t op; + int ret; + + switch (option) { + case RTAS_SLOT_RESET_DEACTIVATE: + op = VFIO_EEH_PE_RESET_DEACTIVATE; + break; + case RTAS_SLOT_RESET_HOT: + spapr_phb_vfio_eeh_pre_reset(sphb); + op = VFIO_EEH_PE_RESET_HOT; + break; + case RTAS_SLOT_RESET_FUNDAMENTAL: + spapr_phb_vfio_eeh_pre_reset(sphb); + op = VFIO_EEH_PE_RESET_FUNDAMENTAL; + break; + default: + return RTAS_OUT_PARAM_ERROR; + } + + ret = vfio_eeh_as_op(&sphb->iommu_as, op); + if (ret < 0) { + return RTAS_OUT_HW_ERROR; + } + + return RTAS_OUT_SUCCESS; +} + +int spapr_phb_vfio_eeh_configure(SpaprPhbState *sphb) +{ + int ret; + + ret = vfio_eeh_as_op(&sphb->iommu_as, VFIO_EEH_PE_CONFIGURE); + if (ret < 0) { + return RTAS_OUT_PARAM_ERROR; + } + + return RTAS_OUT_SUCCESS; +} diff --git a/hw/ppc/spapr_rng.c b/hw/ppc/spapr_rng.c new file mode 100644 index 000000000..df5c4b968 --- /dev/null +++ b/hw/ppc/spapr_rng.c @@ -0,0 +1,162 @@ +/* + * QEMU sPAPR random number generator "device" for H_RANDOM hypercall + * + * Copyright 2015 Thomas Huth, Red Hat Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qemu/main-loop.h" +#include "qemu/module.h" +#include "sysemu/device_tree.h" +#include "sysemu/rng.h" +#include "hw/ppc/spapr.h" +#include "hw/qdev-properties.h" +#include "kvm_ppc.h" +#include "qom/object.h" + +OBJECT_DECLARE_SIMPLE_TYPE(SpaprRngState, SPAPR_RNG) + +struct SpaprRngState { + /*< private >*/ + DeviceState ds; + RngBackend *backend; + bool use_kvm; +}; + +struct HRandomData { + QemuSemaphore sem; + union { + uint64_t v64; + uint8_t v8[8]; + } val; + int received; +}; +typedef struct HRandomData HRandomData; + +/* Callback function for the RngBackend */ +static void random_recv(void *dest, const void *src, size_t size) +{ + HRandomData *hrdp = dest; + + if (src && size > 0) { + assert(size + hrdp->received <= sizeof(hrdp->val.v8)); + memcpy(&hrdp->val.v8[hrdp->received], src, size); + hrdp->received += size; + } + + qemu_sem_post(&hrdp->sem); +} + +/* Handler for the H_RANDOM hypercall */ +static target_ulong h_random(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + SpaprRngState *rngstate; + HRandomData hrdata; + + rngstate = SPAPR_RNG(object_resolve_path_type("", TYPE_SPAPR_RNG, NULL)); + + if (!rngstate || !rngstate->backend) { + return H_HARDWARE; + } + + qemu_sem_init(&hrdata.sem, 0); + hrdata.val.v64 = 0; + hrdata.received = 0; + + while (hrdata.received < 8) { + rng_backend_request_entropy(rngstate->backend, 8 - hrdata.received, + random_recv, &hrdata); + qemu_mutex_unlock_iothread(); + qemu_sem_wait(&hrdata.sem); + qemu_mutex_lock_iothread(); + } + + qemu_sem_destroy(&hrdata.sem); + args[0] = hrdata.val.v64; + + return H_SUCCESS; +} + +static void spapr_rng_instance_init(Object *obj) +{ + if (object_resolve_path_type("", TYPE_SPAPR_RNG, NULL) != NULL) { + error_report("spapr-rng can not be instantiated twice!"); + return; + } + + object_property_set_description(obj, "rng", + "ID of the random number generator backend"); +} + +static void spapr_rng_realize(DeviceState *dev, Error **errp) +{ + + SpaprRngState *rngstate = SPAPR_RNG(dev); + + if (rngstate->use_kvm) { + if (kvmppc_enable_hwrng() == 0) { + return; + } + /* + * If user specified both, use-kvm and a backend, we fall back to + * the backend now. If not, provide an appropriate error message. + */ + if (!rngstate->backend) { + error_setg(errp, "Could not initialize in-kernel H_RANDOM call!"); + return; + } + } + + if (rngstate->backend) { + spapr_register_hypercall(H_RANDOM, h_random); + } else { + error_setg(errp, "spapr-rng needs an RNG backend!"); + } +} + +static Property spapr_rng_properties[] = { + DEFINE_PROP_BOOL("use-kvm", SpaprRngState, use_kvm, false), + DEFINE_PROP_LINK("rng", SpaprRngState, backend, TYPE_RNG_BACKEND, + RngBackend *), + DEFINE_PROP_END_OF_LIST(), +}; + +static void spapr_rng_class_init(ObjectClass *oc, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + + dc->realize = spapr_rng_realize; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); + device_class_set_props(dc, spapr_rng_properties); + dc->hotpluggable = false; +} + +static const TypeInfo spapr_rng_info = { + .name = TYPE_SPAPR_RNG, + .parent = TYPE_DEVICE, + .instance_size = sizeof(SpaprRngState), + .instance_init = spapr_rng_instance_init, + .class_init = spapr_rng_class_init, +}; + +static void spapr_rng_register_type(void) +{ + type_register_static(&spapr_rng_info); +} +type_init(spapr_rng_register_type) diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c new file mode 100644 index 000000000..b476382ae --- /dev/null +++ b/hw/ppc/spapr_rtas.c @@ -0,0 +1,636 @@ +/* + * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator + * + * Hypercall based emulated RTAS + * + * Copyright (c) 2010-2011 David Gibson, IBM Corporation. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "qemu/error-report.h" +#include "sysemu/sysemu.h" +#include "sysemu/device_tree.h" +#include "sysemu/cpus.h" +#include "sysemu/hw_accel.h" +#include "sysemu/runstate.h" +#include "kvm_ppc.h" + +#include "hw/ppc/spapr.h" +#include "hw/ppc/spapr_vio.h" +#include "hw/ppc/spapr_rtas.h" +#include "hw/ppc/spapr_cpu_core.h" +#include "hw/ppc/ppc.h" + +#include <libfdt.h> +#include "hw/ppc/spapr_drc.h" +#include "qemu/cutils.h" +#include "trace.h" +#include "hw/ppc/fdt.h" +#include "target/ppc/mmu-hash64.h" +#include "target/ppc/mmu-book3s-v3.h" +#include "migration/blocker.h" +#include "helper_regs.h" + +static void rtas_display_character(PowerPCCPU *cpu, SpaprMachineState *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, + uint32_t nret, target_ulong rets) +{ + uint8_t c = rtas_ld(args, 0); + SpaprVioDevice *sdev = vty_lookup(spapr, 0); + + if (!sdev) { + rtas_st(rets, 0, RTAS_OUT_HW_ERROR); + } else { + vty_putchars(sdev, &c, sizeof(c)); + rtas_st(rets, 0, RTAS_OUT_SUCCESS); + } +} + +static void rtas_power_off(PowerPCCPU *cpu, SpaprMachineState *spapr, + uint32_t token, uint32_t nargs, target_ulong args, + uint32_t nret, target_ulong rets) +{ + if (nargs != 2 || nret != 1) { + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); + return; + } + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); + cpu_stop_current(); + rtas_st(rets, 0, RTAS_OUT_SUCCESS); +} + +static void rtas_system_reboot(PowerPCCPU *cpu, SpaprMachineState *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, + uint32_t nret, target_ulong rets) +{ + if (nargs != 0 || nret != 1) { + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); + return; + } + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); + rtas_st(rets, 0, RTAS_OUT_SUCCESS); +} + +static void rtas_query_cpu_stopped_state(PowerPCCPU *cpu_, + SpaprMachineState *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, + uint32_t nret, target_ulong rets) +{ + target_ulong id; + PowerPCCPU *cpu; + + if (nargs != 1 || nret != 2) { + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); + return; + } + + id = rtas_ld(args, 0); + cpu = spapr_find_cpu(id); + if (cpu != NULL) { + if (CPU(cpu)->halted) { + rtas_st(rets, 1, 0); + } else { + rtas_st(rets, 1, 2); + } + + rtas_st(rets, 0, RTAS_OUT_SUCCESS); + return; + } + + /* Didn't find a matching cpu */ + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); +} + +static void rtas_start_cpu(PowerPCCPU *callcpu, SpaprMachineState *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, + uint32_t nret, target_ulong rets) +{ + target_ulong id, start, r3; + PowerPCCPU *newcpu; + CPUPPCState *env; + target_ulong lpcr; + target_ulong caller_lpcr; + + if (nargs != 3 || nret != 1) { + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); + return; + } + + id = rtas_ld(args, 0); + start = rtas_ld(args, 1); + r3 = rtas_ld(args, 2); + + newcpu = spapr_find_cpu(id); + if (!newcpu) { + /* Didn't find a matching cpu */ + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); + return; + } + + env = &newcpu->env; + + if (!CPU(newcpu)->halted) { + rtas_st(rets, 0, RTAS_OUT_HW_ERROR); + return; + } + + cpu_synchronize_state(CPU(newcpu)); + + env->msr = (1ULL << MSR_SF) | (1ULL << MSR_ME); + hreg_compute_hflags(env); + + caller_lpcr = callcpu->env.spr[SPR_LPCR]; + lpcr = env->spr[SPR_LPCR]; + + /* Set ILE the same way */ + lpcr = (lpcr & ~LPCR_ILE) | (caller_lpcr & LPCR_ILE); + + /* Set AIL the same way */ + lpcr = (lpcr & ~LPCR_AIL) | (caller_lpcr & LPCR_AIL); + + if (env->mmu_model == POWERPC_MMU_3_00) { + /* + * New cpus are expected to start in the same radix/hash mode + * as the existing CPUs + */ + if (ppc64_v3_radix(callcpu)) { + lpcr |= LPCR_UPRT | LPCR_GTSE | LPCR_HR; + } else { + lpcr &= ~(LPCR_UPRT | LPCR_GTSE | LPCR_HR); + } + env->spr[SPR_PSSCR] &= ~PSSCR_EC; + } + ppc_store_lpcr(newcpu, lpcr); + + /* + * Set the timebase offset of the new CPU to that of the invoking + * CPU. This helps hotplugged CPU to have the correct timebase + * offset. + */ + newcpu->env.tb_env->tb_offset = callcpu->env.tb_env->tb_offset; + + spapr_cpu_set_entry_state(newcpu, start, 0, r3, 0); + + qemu_cpu_kick(CPU(newcpu)); + + rtas_st(rets, 0, RTAS_OUT_SUCCESS); +} + +static void rtas_stop_self(PowerPCCPU *cpu, SpaprMachineState *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, + uint32_t nret, target_ulong rets) +{ + CPUState *cs = CPU(cpu); + CPUPPCState *env = &cpu->env; + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); + + /* Disable Power-saving mode Exit Cause exceptions for the CPU. + * This could deliver an interrupt on a dying CPU and crash the + * guest. + * For the same reason, set PSSCR_EC. + */ + ppc_store_lpcr(cpu, env->spr[SPR_LPCR] & ~pcc->lpcr_pm); + env->spr[SPR_PSSCR] |= PSSCR_EC; + cs->halted = 1; + kvmppc_set_reg_ppc_online(cpu, 0); + qemu_cpu_kick(cs); +} + +static void rtas_ibm_suspend_me(PowerPCCPU *cpu, SpaprMachineState *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, + uint32_t nret, target_ulong rets) +{ + CPUState *cs; + + if (nargs != 0 || nret != 1) { + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); + return; + } + + CPU_FOREACH(cs) { + PowerPCCPU *c = POWERPC_CPU(cs); + CPUPPCState *e = &c->env; + if (c == cpu) { + continue; + } + + /* See h_join */ + if (!cs->halted || (e->msr & (1ULL << MSR_EE))) { + rtas_st(rets, 0, H_MULTI_THREADS_ACTIVE); + return; + } + } + + qemu_system_suspend_request(); + rtas_st(rets, 0, RTAS_OUT_SUCCESS); +} + +static inline int sysparm_st(target_ulong addr, target_ulong len, + const void *val, uint16_t vallen) +{ + hwaddr phys = ppc64_phys_to_real(addr); + + if (len < 2) { + return RTAS_OUT_SYSPARM_PARAM_ERROR; + } + stw_be_phys(&address_space_memory, phys, vallen); + cpu_physical_memory_write(phys + 2, val, MIN(len - 2, vallen)); + return RTAS_OUT_SUCCESS; +} + +static void rtas_ibm_get_system_parameter(PowerPCCPU *cpu, + SpaprMachineState *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, + uint32_t nret, target_ulong rets) +{ + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu); + MachineState *ms = MACHINE(spapr); + target_ulong parameter = rtas_ld(args, 0); + target_ulong buffer = rtas_ld(args, 1); + target_ulong length = rtas_ld(args, 2); + target_ulong ret; + + switch (parameter) { + case RTAS_SYSPARM_SPLPAR_CHARACTERISTICS: { + char *param_val = g_strdup_printf("MaxEntCap=%d," + "DesMem=%" PRIu64 "," + "DesProcs=%d," + "MaxPlatProcs=%d", + ms->smp.max_cpus, + ms->ram_size / MiB, + ms->smp.cpus, + ms->smp.max_cpus); + if (pcc->n_host_threads > 0) { + char *hostthr_val, *old = param_val; + + /* + * Add HostThrs property. This property is not present in PAPR but + * is expected by some guests to communicate the number of physical + * host threads per core on the system so that they can scale + * information which varies based on the thread configuration. + */ + hostthr_val = g_strdup_printf(",HostThrs=%d", pcc->n_host_threads); + param_val = g_strconcat(param_val, hostthr_val, NULL); + g_free(hostthr_val); + g_free(old); + } + ret = sysparm_st(buffer, length, param_val, strlen(param_val) + 1); + g_free(param_val); + break; + } + case RTAS_SYSPARM_DIAGNOSTICS_RUN_MODE: { + uint8_t param_val = DIAGNOSTICS_RUN_MODE_DISABLED; + + ret = sysparm_st(buffer, length, ¶m_val, sizeof(param_val)); + break; + } + case RTAS_SYSPARM_UUID: + ret = sysparm_st(buffer, length, (unsigned char *)&qemu_uuid, + (qemu_uuid_set ? 16 : 0)); + break; + default: + ret = RTAS_OUT_NOT_SUPPORTED; + } + + rtas_st(rets, 0, ret); +} + +static void rtas_ibm_set_system_parameter(PowerPCCPU *cpu, + SpaprMachineState *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, + uint32_t nret, target_ulong rets) +{ + target_ulong parameter = rtas_ld(args, 0); + target_ulong ret = RTAS_OUT_NOT_SUPPORTED; + + switch (parameter) { + case RTAS_SYSPARM_SPLPAR_CHARACTERISTICS: + case RTAS_SYSPARM_DIAGNOSTICS_RUN_MODE: + case RTAS_SYSPARM_UUID: + ret = RTAS_OUT_NOT_AUTHORIZED; + break; + } + + rtas_st(rets, 0, ret); +} + +static void rtas_ibm_os_term(PowerPCCPU *cpu, + SpaprMachineState *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, + uint32_t nret, target_ulong rets) +{ + target_ulong msgaddr = rtas_ld(args, 0); + char msg[512]; + + cpu_physical_memory_read(msgaddr, msg, sizeof(msg) - 1); + msg[sizeof(msg) - 1] = 0; + + error_report("OS terminated: %s", msg); + qemu_system_guest_panicked(NULL); + + rtas_st(rets, 0, RTAS_OUT_SUCCESS); +} + +static void rtas_set_power_level(PowerPCCPU *cpu, SpaprMachineState *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, uint32_t nret, + target_ulong rets) +{ + int32_t power_domain; + + if (nargs != 2 || nret != 2) { + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); + return; + } + + /* we currently only use a single, "live insert" powerdomain for + * hotplugged/dlpar'd resources, so the power is always live/full (100) + */ + power_domain = rtas_ld(args, 0); + if (power_domain != -1) { + rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED); + return; + } + + rtas_st(rets, 0, RTAS_OUT_SUCCESS); + rtas_st(rets, 1, 100); +} + +static void rtas_get_power_level(PowerPCCPU *cpu, SpaprMachineState *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, uint32_t nret, + target_ulong rets) +{ + int32_t power_domain; + + if (nargs != 1 || nret != 2) { + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); + return; + } + + /* we currently only use a single, "live insert" powerdomain for + * hotplugged/dlpar'd resources, so the power is always live/full (100) + */ + power_domain = rtas_ld(args, 0); + if (power_domain != -1) { + rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED); + return; + } + + rtas_st(rets, 0, RTAS_OUT_SUCCESS); + rtas_st(rets, 1, 100); +} + +static void rtas_ibm_nmi_register(PowerPCCPU *cpu, + SpaprMachineState *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, + uint32_t nret, target_ulong rets) +{ + hwaddr rtas_addr; + target_ulong sreset_addr, mce_addr; + + if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI) == SPAPR_CAP_OFF) { + rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED); + return; + } + + rtas_addr = spapr_get_rtas_addr(); + if (!rtas_addr) { + rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED); + return; + } + + sreset_addr = rtas_ld(args, 0); + mce_addr = rtas_ld(args, 1); + + /* PAPR requires these are in the first 32M of memory and within RMA */ + if (sreset_addr >= 32 * MiB || sreset_addr >= spapr->rma_size || + mce_addr >= 32 * MiB || mce_addr >= spapr->rma_size) { + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); + return; + } + + if (kvm_enabled()) { + if (kvmppc_set_fwnmi(cpu) < 0) { + rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED); + return; + } + } + + spapr->fwnmi_system_reset_addr = sreset_addr; + spapr->fwnmi_machine_check_addr = mce_addr; + + rtas_st(rets, 0, RTAS_OUT_SUCCESS); +} + +static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu, + SpaprMachineState *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, + uint32_t nret, target_ulong rets) +{ + if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI) == SPAPR_CAP_OFF) { + rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED); + return; + } + + if (spapr->fwnmi_machine_check_addr == -1) { + qemu_log_mask(LOG_GUEST_ERROR, +"FWNMI: ibm,nmi-interlock RTAS called with FWNMI not registered.\n"); + + /* NMI register not called */ + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); + return; + } + + if (spapr->fwnmi_machine_check_interlock != cpu->vcpu_id) { + /* + * The vCPU that hit the NMI should invoke "ibm,nmi-interlock" + * This should be PARAM_ERROR, but Linux calls "ibm,nmi-interlock" + * for system reset interrupts, despite them not being interlocked. + * PowerVM silently ignores this and returns success here. Returning + * failure causes Linux to print the error "FWNMI: nmi-interlock + * failed: -3", although no other apparent ill effects, this is a + * regression for the user when enabling FWNMI. So for now, match + * PowerVM. When most Linux clients are fixed, this could be + * changed. + */ + rtas_st(rets, 0, RTAS_OUT_SUCCESS); + return; + } + + /* + * vCPU issuing "ibm,nmi-interlock" is done with NMI handling, + * hence unset fwnmi_machine_check_interlock. + */ + spapr->fwnmi_machine_check_interlock = -1; + qemu_cond_signal(&spapr->fwnmi_machine_check_interlock_cond); + rtas_st(rets, 0, RTAS_OUT_SUCCESS); + migrate_del_blocker(spapr->fwnmi_migration_blocker); +} + +static struct rtas_call { + const char *name; + spapr_rtas_fn fn; +} rtas_table[RTAS_TOKEN_MAX - RTAS_TOKEN_BASE]; + +target_ulong spapr_rtas_call(PowerPCCPU *cpu, SpaprMachineState *spapr, + uint32_t token, uint32_t nargs, target_ulong args, + uint32_t nret, target_ulong rets) +{ + if ((token >= RTAS_TOKEN_BASE) && (token < RTAS_TOKEN_MAX)) { + struct rtas_call *call = rtas_table + (token - RTAS_TOKEN_BASE); + + if (call->fn) { + call->fn(cpu, spapr, token, nargs, args, nret, rets); + return H_SUCCESS; + } + } + + /* HACK: Some Linux early debug code uses RTAS display-character, + * but assumes the token value is 0xa (which it is on some real + * machines) without looking it up in the device tree. This + * special case makes this work */ + if (token == 0xa) { + rtas_display_character(cpu, spapr, 0xa, nargs, args, nret, rets); + return H_SUCCESS; + } + + hcall_dprintf("Unknown RTAS token 0x%x\n", token); + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); + return H_PARAMETER; +} + +uint64_t qtest_rtas_call(char *cmd, uint32_t nargs, uint64_t args, + uint32_t nret, uint64_t rets) +{ + int token; + + for (token = 0; token < RTAS_TOKEN_MAX - RTAS_TOKEN_BASE; token++) { + if (strcmp(cmd, rtas_table[token].name) == 0) { + SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); + PowerPCCPU *cpu = POWERPC_CPU(first_cpu); + + rtas_table[token].fn(cpu, spapr, token + RTAS_TOKEN_BASE, + nargs, args, nret, rets); + return H_SUCCESS; + } + } + return H_PARAMETER; +} + +void spapr_rtas_register(int token, const char *name, spapr_rtas_fn fn) +{ + assert((token >= RTAS_TOKEN_BASE) && (token < RTAS_TOKEN_MAX)); + + token -= RTAS_TOKEN_BASE; + + assert(!name || !rtas_table[token].name); + + rtas_table[token].name = name; + rtas_table[token].fn = fn; +} + +void spapr_dt_rtas_tokens(void *fdt, int rtas) +{ + int i; + + for (i = 0; i < RTAS_TOKEN_MAX - RTAS_TOKEN_BASE; i++) { + struct rtas_call *call = &rtas_table[i]; + + if (!call->name) { + continue; + } + + _FDT(fdt_setprop_cell(fdt, rtas, call->name, i + RTAS_TOKEN_BASE)); + } +} + +hwaddr spapr_get_rtas_addr(void) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); + int rtas_node; + const fdt32_t *rtas_data; + void *fdt = spapr->fdt_blob; + + /* fetch rtas addr from fdt */ + rtas_node = fdt_path_offset(fdt, "/rtas"); + if (rtas_node < 0) { + return 0; + } + + rtas_data = fdt_getprop(fdt, rtas_node, "linux,rtas-base", NULL); + if (!rtas_data) { + return 0; + } + + /* + * We assume that the OS called RTAS instantiate-rtas, but some other + * OS might call RTAS instantiate-rtas-64 instead. This fine as of now + * as SLOF only supports 32-bit variant. + */ + return (hwaddr)fdt32_to_cpu(*rtas_data); +} + +static void core_rtas_register_types(void) +{ + spapr_rtas_register(RTAS_DISPLAY_CHARACTER, "display-character", + rtas_display_character); + spapr_rtas_register(RTAS_POWER_OFF, "power-off", rtas_power_off); + spapr_rtas_register(RTAS_SYSTEM_REBOOT, "system-reboot", + rtas_system_reboot); + spapr_rtas_register(RTAS_QUERY_CPU_STOPPED_STATE, "query-cpu-stopped-state", + rtas_query_cpu_stopped_state); + spapr_rtas_register(RTAS_START_CPU, "start-cpu", rtas_start_cpu); + spapr_rtas_register(RTAS_STOP_SELF, "stop-self", rtas_stop_self); + spapr_rtas_register(RTAS_IBM_SUSPEND_ME, "ibm,suspend-me", + rtas_ibm_suspend_me); + spapr_rtas_register(RTAS_IBM_GET_SYSTEM_PARAMETER, + "ibm,get-system-parameter", + rtas_ibm_get_system_parameter); + spapr_rtas_register(RTAS_IBM_SET_SYSTEM_PARAMETER, + "ibm,set-system-parameter", + rtas_ibm_set_system_parameter); + spapr_rtas_register(RTAS_IBM_OS_TERM, "ibm,os-term", + rtas_ibm_os_term); + spapr_rtas_register(RTAS_SET_POWER_LEVEL, "set-power-level", + rtas_set_power_level); + spapr_rtas_register(RTAS_GET_POWER_LEVEL, "get-power-level", + rtas_get_power_level); + spapr_rtas_register(RTAS_IBM_NMI_REGISTER, "ibm,nmi-register", + rtas_ibm_nmi_register); + spapr_rtas_register(RTAS_IBM_NMI_INTERLOCK, "ibm,nmi-interlock", + rtas_ibm_nmi_interlock); +} + +type_init(core_rtas_register_types) diff --git a/hw/ppc/spapr_rtas_ddw.c b/hw/ppc/spapr_rtas_ddw.c new file mode 100644 index 000000000..3e826e130 --- /dev/null +++ b/hw/ppc/spapr_rtas_ddw.c @@ -0,0 +1,291 @@ +/* + * QEMU sPAPR Dynamic DMA windows support + * + * Copyright (c) 2015 Alexey Kardashevskiy, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, + * or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "qemu/module.h" +#include "hw/ppc/spapr.h" +#include "hw/pci-host/spapr.h" +#include "trace.h" + +static int spapr_phb_get_active_win_num_cb(Object *child, void *opaque) +{ + SpaprTceTable *tcet; + + tcet = (SpaprTceTable *) object_dynamic_cast(child, TYPE_SPAPR_TCE_TABLE); + if (tcet && tcet->nb_table) { + ++*(unsigned *)opaque; + } + return 0; +} + +static unsigned spapr_phb_get_active_win_num(SpaprPhbState *sphb) +{ + unsigned ret = 0; + + object_child_foreach(OBJECT(sphb), spapr_phb_get_active_win_num_cb, &ret); + + return ret; +} + +static int spapr_phb_get_free_liobn_cb(Object *child, void *opaque) +{ + SpaprTceTable *tcet; + + tcet = (SpaprTceTable *) object_dynamic_cast(child, TYPE_SPAPR_TCE_TABLE); + if (tcet && !tcet->nb_table) { + *(uint32_t *)opaque = tcet->liobn; + return 1; + } + return 0; +} + +static unsigned spapr_phb_get_free_liobn(SpaprPhbState *sphb) +{ + uint32_t liobn = 0; + + object_child_foreach(OBJECT(sphb), spapr_phb_get_free_liobn_cb, &liobn); + + return liobn; +} + +static uint32_t spapr_page_mask_to_query_mask(uint64_t page_mask) +{ + int i; + uint32_t mask = 0; + const struct { int shift; uint32_t mask; } masks[] = { + { 12, RTAS_DDW_PGSIZE_4K }, + { 16, RTAS_DDW_PGSIZE_64K }, + { 24, RTAS_DDW_PGSIZE_16M }, + { 25, RTAS_DDW_PGSIZE_32M }, + { 26, RTAS_DDW_PGSIZE_64M }, + { 27, RTAS_DDW_PGSIZE_128M }, + { 28, RTAS_DDW_PGSIZE_256M }, + { 34, RTAS_DDW_PGSIZE_16G }, + }; + + for (i = 0; i < ARRAY_SIZE(masks); ++i) { + if (page_mask & (1ULL << masks[i].shift)) { + mask |= masks[i].mask; + } + } + + return mask; +} + +static void rtas_ibm_query_pe_dma_window(PowerPCCPU *cpu, + SpaprMachineState *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, + uint32_t nret, target_ulong rets) +{ + SpaprPhbState *sphb; + uint64_t buid; + uint32_t avail, addr, pgmask = 0; + + if ((nargs != 3) || (nret != 5)) { + goto param_error_exit; + } + + buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2); + addr = rtas_ld(args, 0); + sphb = spapr_pci_find_phb(spapr, buid); + if (!sphb || !sphb->ddw_enabled) { + goto param_error_exit; + } + + /* Translate page mask to LoPAPR format */ + pgmask = spapr_page_mask_to_query_mask(sphb->page_size_mask); + + avail = SPAPR_PCI_DMA_MAX_WINDOWS - spapr_phb_get_active_win_num(sphb); + + rtas_st(rets, 0, RTAS_OUT_SUCCESS); + rtas_st(rets, 1, avail); + rtas_st(rets, 2, 0x80000000); /* The largest window we can possibly have */ + rtas_st(rets, 3, pgmask); + rtas_st(rets, 4, 0); /* DMA migration mask, not supported */ + + trace_spapr_iommu_ddw_query(buid, addr, avail, 0x80000000, pgmask); + return; + +param_error_exit: + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); +} + +static void rtas_ibm_create_pe_dma_window(PowerPCCPU *cpu, + SpaprMachineState *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, + uint32_t nret, target_ulong rets) +{ + SpaprPhbState *sphb; + SpaprTceTable *tcet = NULL; + uint32_t addr, page_shift, window_shift, liobn; + uint64_t buid, win_addr; + int windows; + + if ((nargs != 5) || (nret != 4)) { + goto param_error_exit; + } + + buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2); + addr = rtas_ld(args, 0); + sphb = spapr_pci_find_phb(spapr, buid); + if (!sphb || !sphb->ddw_enabled) { + goto param_error_exit; + } + + page_shift = rtas_ld(args, 3); + window_shift = rtas_ld(args, 4); + liobn = spapr_phb_get_free_liobn(sphb); + windows = spapr_phb_get_active_win_num(sphb); + + if (!(sphb->page_size_mask & (1ULL << page_shift)) || + (window_shift < page_shift)) { + goto param_error_exit; + } + + if (!liobn || !sphb->ddw_enabled || windows == SPAPR_PCI_DMA_MAX_WINDOWS) { + goto hw_error_exit; + } + + tcet = spapr_tce_find_by_liobn(liobn); + if (!tcet) { + goto hw_error_exit; + } + + win_addr = (windows == 0) ? sphb->dma_win_addr : sphb->dma64_win_addr; + /* + * We have just created a window, we know for the fact that it is empty, + * use a hack to avoid iterating over the table as it is quite possible + * to have billions of TCEs, all empty. + * Note that we cannot delay this to the first H_PUT_TCE as this hcall is + * mostly likely to be handled in KVM so QEMU just does not know if it + * happened. + */ + tcet->skipping_replay = true; + spapr_tce_table_enable(tcet, page_shift, win_addr, + 1ULL << (window_shift - page_shift)); + tcet->skipping_replay = false; + if (!tcet->nb_table) { + goto hw_error_exit; + } + + trace_spapr_iommu_ddw_create(buid, addr, 1ULL << page_shift, + 1ULL << window_shift, tcet->bus_offset, liobn); + + rtas_st(rets, 0, RTAS_OUT_SUCCESS); + rtas_st(rets, 1, liobn); + rtas_st(rets, 2, tcet->bus_offset >> 32); + rtas_st(rets, 3, tcet->bus_offset & ((uint32_t) -1)); + + return; + +hw_error_exit: + rtas_st(rets, 0, RTAS_OUT_HW_ERROR); + return; + +param_error_exit: + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); +} + +static void rtas_ibm_remove_pe_dma_window(PowerPCCPU *cpu, + SpaprMachineState *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, + uint32_t nret, target_ulong rets) +{ + SpaprPhbState *sphb; + SpaprTceTable *tcet; + uint32_t liobn; + + if ((nargs != 1) || (nret != 1)) { + goto param_error_exit; + } + + liobn = rtas_ld(args, 0); + tcet = spapr_tce_find_by_liobn(liobn); + if (!tcet) { + goto param_error_exit; + } + + sphb = SPAPR_PCI_HOST_BRIDGE(OBJECT(tcet)->parent); + if (!sphb || !sphb->ddw_enabled || !tcet->nb_table) { + goto param_error_exit; + } + + spapr_tce_table_disable(tcet); + trace_spapr_iommu_ddw_remove(liobn); + + rtas_st(rets, 0, RTAS_OUT_SUCCESS); + return; + +param_error_exit: + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); +} + +static void rtas_ibm_reset_pe_dma_window(PowerPCCPU *cpu, + SpaprMachineState *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, + uint32_t nret, target_ulong rets) +{ + SpaprPhbState *sphb; + uint64_t buid; + uint32_t addr; + + if ((nargs != 3) || (nret != 1)) { + goto param_error_exit; + } + + buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2); + addr = rtas_ld(args, 0); + sphb = spapr_pci_find_phb(spapr, buid); + if (!sphb || !sphb->ddw_enabled) { + goto param_error_exit; + } + + spapr_phb_dma_reset(sphb); + trace_spapr_iommu_ddw_reset(buid, addr); + + rtas_st(rets, 0, RTAS_OUT_SUCCESS); + + return; + +param_error_exit: + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); +} + +static void spapr_rtas_ddw_init(void) +{ + spapr_rtas_register(RTAS_IBM_QUERY_PE_DMA_WINDOW, + "ibm,query-pe-dma-window", + rtas_ibm_query_pe_dma_window); + spapr_rtas_register(RTAS_IBM_CREATE_PE_DMA_WINDOW, + "ibm,create-pe-dma-window", + rtas_ibm_create_pe_dma_window); + spapr_rtas_register(RTAS_IBM_REMOVE_PE_DMA_WINDOW, + "ibm,remove-pe-dma-window", + rtas_ibm_remove_pe_dma_window); + spapr_rtas_register(RTAS_IBM_RESET_PE_DMA_WINDOW, + "ibm,reset-pe-dma-window", + rtas_ibm_reset_pe_dma_window); +} + +type_init(spapr_rtas_ddw_init) diff --git a/hw/ppc/spapr_rtc.c b/hw/ppc/spapr_rtc.c new file mode 100644 index 000000000..fba4dfca3 --- /dev/null +++ b/hw/ppc/spapr_rtc.c @@ -0,0 +1,190 @@ +/* + * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator + * + * RTAS Real Time Clock + * + * Copyright (c) 2010-2011 David Gibson, IBM Corporation. + * Copyright 2014 David Gibson, Red Hat. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "qemu/timer.h" +#include "sysemu/sysemu.h" +#include "hw/ppc/spapr.h" +#include "migration/vmstate.h" +#include "qapi/error.h" +#include "qapi/qapi-events-misc-target.h" +#include "qemu/cutils.h" +#include "qemu/module.h" + +void spapr_rtc_read(SpaprRtcState *rtc, struct tm *tm, uint32_t *ns) +{ + int64_t host_ns = qemu_clock_get_ns(rtc_clock); + int64_t guest_ns; + time_t guest_s; + + assert(rtc); + + guest_ns = host_ns + rtc->ns_offset; + guest_s = guest_ns / NANOSECONDS_PER_SECOND; + + if (tm) { + gmtime_r(&guest_s, tm); + } + if (ns) { + *ns = guest_ns; + } +} + +int spapr_rtc_import_offset(SpaprRtcState *rtc, int64_t legacy_offset) +{ + if (!rtc) { + return -ENODEV; + } + + rtc->ns_offset = legacy_offset * NANOSECONDS_PER_SECOND; + + return 0; +} + +static void rtas_get_time_of_day(PowerPCCPU *cpu, SpaprMachineState *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, + uint32_t nret, target_ulong rets) +{ + struct tm tm; + uint32_t ns; + + if ((nargs != 0) || (nret != 8)) { + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); + return; + } + + spapr_rtc_read(&spapr->rtc, &tm, &ns); + + rtas_st(rets, 0, RTAS_OUT_SUCCESS); + rtas_st(rets, 1, tm.tm_year + 1900); + rtas_st(rets, 2, tm.tm_mon + 1); + rtas_st(rets, 3, tm.tm_mday); + rtas_st(rets, 4, tm.tm_hour); + rtas_st(rets, 5, tm.tm_min); + rtas_st(rets, 6, tm.tm_sec); + rtas_st(rets, 7, ns); +} + +static void rtas_set_time_of_day(PowerPCCPU *cpu, SpaprMachineState *spapr, + uint32_t token, uint32_t nargs, + target_ulong args, + uint32_t nret, target_ulong rets) +{ + SpaprRtcState *rtc = &spapr->rtc; + struct tm tm; + time_t new_s; + int64_t host_ns; + + if ((nargs != 7) || (nret != 1)) { + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); + return; + } + + tm.tm_year = rtas_ld(args, 0) - 1900; + tm.tm_mon = rtas_ld(args, 1) - 1; + tm.tm_mday = rtas_ld(args, 2); + tm.tm_hour = rtas_ld(args, 3); + tm.tm_min = rtas_ld(args, 4); + tm.tm_sec = rtas_ld(args, 5); + + new_s = mktimegm(&tm); + if (new_s == -1) { + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); + return; + } + + /* Generate a monitor event for the change */ + qapi_event_send_rtc_change(qemu_timedate_diff(&tm)); + + host_ns = qemu_clock_get_ns(rtc_clock); + + rtc->ns_offset = (new_s * NANOSECONDS_PER_SECOND) - host_ns; + + rtas_st(rets, 0, RTAS_OUT_SUCCESS); +} + +static void spapr_rtc_qom_date(Object *obj, struct tm *current_tm, Error **errp) +{ + spapr_rtc_read(SPAPR_RTC(obj), current_tm, NULL); +} + +static void spapr_rtc_realize(DeviceState *dev, Error **errp) +{ + SpaprRtcState *rtc = SPAPR_RTC(dev); + struct tm tm; + time_t host_s; + int64_t rtc_ns; + + /* Initialize the RTAS RTC from host time */ + + qemu_get_timedate(&tm, 0); + host_s = mktimegm(&tm); + rtc_ns = qemu_clock_get_ns(rtc_clock); + rtc->ns_offset = host_s * NANOSECONDS_PER_SECOND - rtc_ns; + + object_property_add_tm(OBJECT(rtc), "date", spapr_rtc_qom_date); +} + +static const VMStateDescription vmstate_spapr_rtc = { + .name = "spapr/rtc", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_INT64(ns_offset, SpaprRtcState), + VMSTATE_END_OF_LIST() + }, +}; + +static void spapr_rtc_class_init(ObjectClass *oc, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(oc); + + dc->realize = spapr_rtc_realize; + dc->vmsd = &vmstate_spapr_rtc; + /* Reason: This is an internal device only for handling the hypercalls */ + dc->user_creatable = false; + + spapr_rtas_register(RTAS_GET_TIME_OF_DAY, "get-time-of-day", + rtas_get_time_of_day); + spapr_rtas_register(RTAS_SET_TIME_OF_DAY, "set-time-of-day", + rtas_set_time_of_day); +} + +static const TypeInfo spapr_rtc_info = { + .name = TYPE_SPAPR_RTC, + .parent = TYPE_DEVICE, + .instance_size = sizeof(SpaprRtcState), + .class_init = spapr_rtc_class_init, +}; + +static void spapr_rtc_register_types(void) +{ + type_register_static(&spapr_rtc_info); +} +type_init(spapr_rtc_register_types) diff --git a/hw/ppc/spapr_softmmu.c b/hw/ppc/spapr_softmmu.c new file mode 100644 index 000000000..4ee03c83e --- /dev/null +++ b/hw/ppc/spapr_softmmu.c @@ -0,0 +1,612 @@ +#include "qemu/osdep.h" +#include "qemu/cutils.h" +#include "cpu.h" +#include "helper_regs.h" +#include "hw/ppc/spapr.h" +#include "mmu-hash64.h" +#include "mmu-book3s-v3.h" + +static inline bool valid_ptex(PowerPCCPU *cpu, target_ulong ptex) +{ + /* + * hash value/pteg group index is normalized by HPT mask + */ + if (((ptex & ~7ULL) / HPTES_PER_GROUP) & ~ppc_hash64_hpt_mask(cpu)) { + return false; + } + return true; +} + +static target_ulong h_enter(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + target_ulong flags = args[0]; + target_ulong ptex = args[1]; + target_ulong pteh = args[2]; + target_ulong ptel = args[3]; + unsigned apshift; + target_ulong raddr; + target_ulong slot; + const ppc_hash_pte64_t *hptes; + + apshift = ppc_hash64_hpte_page_shift_noslb(cpu, pteh, ptel); + if (!apshift) { + /* Bad page size encoding */ + return H_PARAMETER; + } + + raddr = (ptel & HPTE64_R_RPN) & ~((1ULL << apshift) - 1); + + if (is_ram_address(spapr, raddr)) { + /* Regular RAM - should have WIMG=0010 */ + if ((ptel & HPTE64_R_WIMG) != HPTE64_R_M) { + return H_PARAMETER; + } + } else { + target_ulong wimg_flags; + /* Looks like an IO address */ + /* FIXME: What WIMG combinations could be sensible for IO? + * For now we allow WIMG=010x, but are there others? */ + /* FIXME: Should we check against registered IO addresses? */ + wimg_flags = (ptel & (HPTE64_R_W | HPTE64_R_I | HPTE64_R_M)); + + if (wimg_flags != HPTE64_R_I && + wimg_flags != (HPTE64_R_I | HPTE64_R_M)) { + return H_PARAMETER; + } + } + + pteh &= ~0x60ULL; + + if (!valid_ptex(cpu, ptex)) { + return H_PARAMETER; + } + + slot = ptex & 7ULL; + ptex = ptex & ~7ULL; + + if (likely((flags & H_EXACT) == 0)) { + hptes = ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP); + for (slot = 0; slot < 8; slot++) { + if (!(ppc_hash64_hpte0(cpu, hptes, slot) & HPTE64_V_VALID)) { + break; + } + } + ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP); + if (slot == 8) { + return H_PTEG_FULL; + } + } else { + hptes = ppc_hash64_map_hptes(cpu, ptex + slot, 1); + if (ppc_hash64_hpte0(cpu, hptes, 0) & HPTE64_V_VALID) { + ppc_hash64_unmap_hptes(cpu, hptes, ptex + slot, 1); + return H_PTEG_FULL; + } + ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1); + } + + spapr_store_hpte(cpu, ptex + slot, pteh | HPTE64_V_HPTE_DIRTY, ptel); + + args[0] = ptex + slot; + return H_SUCCESS; +} + +typedef enum { + REMOVE_SUCCESS = 0, + REMOVE_NOT_FOUND = 1, + REMOVE_PARM = 2, + REMOVE_HW = 3, +} RemoveResult; + +static RemoveResult remove_hpte(PowerPCCPU *cpu + , target_ulong ptex, + target_ulong avpn, + target_ulong flags, + target_ulong *vp, target_ulong *rp) +{ + const ppc_hash_pte64_t *hptes; + target_ulong v, r; + + if (!valid_ptex(cpu, ptex)) { + return REMOVE_PARM; + } + + hptes = ppc_hash64_map_hptes(cpu, ptex, 1); + v = ppc_hash64_hpte0(cpu, hptes, 0); + r = ppc_hash64_hpte1(cpu, hptes, 0); + ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1); + + if ((v & HPTE64_V_VALID) == 0 || + ((flags & H_AVPN) && (v & ~0x7fULL) != avpn) || + ((flags & H_ANDCOND) && (v & avpn) != 0)) { + return REMOVE_NOT_FOUND; + } + *vp = v; + *rp = r; + spapr_store_hpte(cpu, ptex, HPTE64_V_HPTE_DIRTY, 0); + ppc_hash64_tlb_flush_hpte(cpu, ptex, v, r); + return REMOVE_SUCCESS; +} + +static target_ulong h_remove(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + CPUPPCState *env = &cpu->env; + target_ulong flags = args[0]; + target_ulong ptex = args[1]; + target_ulong avpn = args[2]; + RemoveResult ret; + + ret = remove_hpte(cpu, ptex, avpn, flags, + &args[0], &args[1]); + + switch (ret) { + case REMOVE_SUCCESS: + check_tlb_flush(env, true); + return H_SUCCESS; + + case REMOVE_NOT_FOUND: + return H_NOT_FOUND; + + case REMOVE_PARM: + return H_PARAMETER; + + case REMOVE_HW: + return H_HARDWARE; + } + + g_assert_not_reached(); +} + +#define H_BULK_REMOVE_TYPE 0xc000000000000000ULL +#define H_BULK_REMOVE_REQUEST 0x4000000000000000ULL +#define H_BULK_REMOVE_RESPONSE 0x8000000000000000ULL +#define H_BULK_REMOVE_END 0xc000000000000000ULL +#define H_BULK_REMOVE_CODE 0x3000000000000000ULL +#define H_BULK_REMOVE_SUCCESS 0x0000000000000000ULL +#define H_BULK_REMOVE_NOT_FOUND 0x1000000000000000ULL +#define H_BULK_REMOVE_PARM 0x2000000000000000ULL +#define H_BULK_REMOVE_HW 0x3000000000000000ULL +#define H_BULK_REMOVE_RC 0x0c00000000000000ULL +#define H_BULK_REMOVE_FLAGS 0x0300000000000000ULL +#define H_BULK_REMOVE_ABSOLUTE 0x0000000000000000ULL +#define H_BULK_REMOVE_ANDCOND 0x0100000000000000ULL +#define H_BULK_REMOVE_AVPN 0x0200000000000000ULL +#define H_BULK_REMOVE_PTEX 0x00ffffffffffffffULL + +#define H_BULK_REMOVE_MAX_BATCH 4 + +static target_ulong h_bulk_remove(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + CPUPPCState *env = &cpu->env; + int i; + target_ulong rc = H_SUCCESS; + + for (i = 0; i < H_BULK_REMOVE_MAX_BATCH; i++) { + target_ulong *tsh = &args[i*2]; + target_ulong tsl = args[i*2 + 1]; + target_ulong v, r, ret; + + if ((*tsh & H_BULK_REMOVE_TYPE) == H_BULK_REMOVE_END) { + break; + } else if ((*tsh & H_BULK_REMOVE_TYPE) != H_BULK_REMOVE_REQUEST) { + return H_PARAMETER; + } + + *tsh &= H_BULK_REMOVE_PTEX | H_BULK_REMOVE_FLAGS; + *tsh |= H_BULK_REMOVE_RESPONSE; + + if ((*tsh & H_BULK_REMOVE_ANDCOND) && (*tsh & H_BULK_REMOVE_AVPN)) { + *tsh |= H_BULK_REMOVE_PARM; + return H_PARAMETER; + } + + ret = remove_hpte(cpu, *tsh & H_BULK_REMOVE_PTEX, tsl, + (*tsh & H_BULK_REMOVE_FLAGS) >> 26, + &v, &r); + + *tsh |= ret << 60; + + switch (ret) { + case REMOVE_SUCCESS: + *tsh |= (r & (HPTE64_R_C | HPTE64_R_R)) << 43; + break; + + case REMOVE_PARM: + rc = H_PARAMETER; + goto exit; + + case REMOVE_HW: + rc = H_HARDWARE; + goto exit; + } + } + exit: + check_tlb_flush(env, true); + + return rc; +} + +static target_ulong h_protect(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + CPUPPCState *env = &cpu->env; + target_ulong flags = args[0]; + target_ulong ptex = args[1]; + target_ulong avpn = args[2]; + const ppc_hash_pte64_t *hptes; + target_ulong v, r; + + if (!valid_ptex(cpu, ptex)) { + return H_PARAMETER; + } + + hptes = ppc_hash64_map_hptes(cpu, ptex, 1); + v = ppc_hash64_hpte0(cpu, hptes, 0); + r = ppc_hash64_hpte1(cpu, hptes, 0); + ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1); + + if ((v & HPTE64_V_VALID) == 0 || + ((flags & H_AVPN) && (v & ~0x7fULL) != avpn)) { + return H_NOT_FOUND; + } + + r &= ~(HPTE64_R_PP0 | HPTE64_R_PP | HPTE64_R_N | + HPTE64_R_KEY_HI | HPTE64_R_KEY_LO); + r |= (flags << 55) & HPTE64_R_PP0; + r |= (flags << 48) & HPTE64_R_KEY_HI; + r |= flags & (HPTE64_R_PP | HPTE64_R_N | HPTE64_R_KEY_LO); + spapr_store_hpte(cpu, ptex, + (v & ~HPTE64_V_VALID) | HPTE64_V_HPTE_DIRTY, 0); + ppc_hash64_tlb_flush_hpte(cpu, ptex, v, r); + /* Flush the tlb */ + check_tlb_flush(env, true); + /* Don't need a memory barrier, due to qemu's global lock */ + spapr_store_hpte(cpu, ptex, v | HPTE64_V_HPTE_DIRTY, r); + return H_SUCCESS; +} + +static target_ulong h_read(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + target_ulong flags = args[0]; + target_ulong ptex = args[1]; + int i, ridx, n_entries = 1; + const ppc_hash_pte64_t *hptes; + + if (!valid_ptex(cpu, ptex)) { + return H_PARAMETER; + } + + if (flags & H_READ_4) { + /* Clear the two low order bits */ + ptex &= ~(3ULL); + n_entries = 4; + } + + hptes = ppc_hash64_map_hptes(cpu, ptex, n_entries); + for (i = 0, ridx = 0; i < n_entries; i++) { + args[ridx++] = ppc_hash64_hpte0(cpu, hptes, i); + args[ridx++] = ppc_hash64_hpte1(cpu, hptes, i); + } + ppc_hash64_unmap_hptes(cpu, hptes, ptex, n_entries); + + return H_SUCCESS; +} + +struct SpaprPendingHpt { + /* These fields are read-only after initialization */ + int shift; + QemuThread thread; + + /* These fields are protected by the BQL */ + bool complete; + + /* These fields are private to the preparation thread if + * !complete, otherwise protected by the BQL */ + int ret; + void *hpt; +}; + +static void free_pending_hpt(SpaprPendingHpt *pending) +{ + if (pending->hpt) { + qemu_vfree(pending->hpt); + } + + g_free(pending); +} + +static void *hpt_prepare_thread(void *opaque) +{ + SpaprPendingHpt *pending = opaque; + size_t size = 1ULL << pending->shift; + + pending->hpt = qemu_try_memalign(size, size); + if (pending->hpt) { + memset(pending->hpt, 0, size); + pending->ret = H_SUCCESS; + } else { + pending->ret = H_NO_MEM; + } + + qemu_mutex_lock_iothread(); + + if (SPAPR_MACHINE(qdev_get_machine())->pending_hpt == pending) { + /* Ready to go */ + pending->complete = true; + } else { + /* We've been cancelled, clean ourselves up */ + free_pending_hpt(pending); + } + + qemu_mutex_unlock_iothread(); + return NULL; +} + +/* Must be called with BQL held */ +static void cancel_hpt_prepare(SpaprMachineState *spapr) +{ + SpaprPendingHpt *pending = spapr->pending_hpt; + + /* Let the thread know it's cancelled */ + spapr->pending_hpt = NULL; + + if (!pending) { + /* Nothing to do */ + return; + } + + if (!pending->complete) { + /* thread will clean itself up */ + return; + } + + free_pending_hpt(pending); +} + +target_ulong softmmu_resize_hpt_prepare(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong shift) +{ + SpaprPendingHpt *pending = spapr->pending_hpt; + + if (pending) { + /* something already in progress */ + if (pending->shift == shift) { + /* and it's suitable */ + if (pending->complete) { + return pending->ret; + } else { + return H_LONG_BUSY_ORDER_100_MSEC; + } + } + + /* not suitable, cancel and replace */ + cancel_hpt_prepare(spapr); + } + + if (!shift) { + /* nothing to do */ + return H_SUCCESS; + } + + /* start new prepare */ + + pending = g_new0(SpaprPendingHpt, 1); + pending->shift = shift; + pending->ret = H_HARDWARE; + + qemu_thread_create(&pending->thread, "sPAPR HPT prepare", + hpt_prepare_thread, pending, QEMU_THREAD_DETACHED); + + spapr->pending_hpt = pending; + + /* In theory we could estimate the time more accurately based on + * the new size, but there's not much point */ + return H_LONG_BUSY_ORDER_100_MSEC; +} + +static uint64_t new_hpte_load0(void *htab, uint64_t pteg, int slot) +{ + uint8_t *addr = htab; + + addr += pteg * HASH_PTEG_SIZE_64; + addr += slot * HASH_PTE_SIZE_64; + return ldq_p(addr); +} + +static void new_hpte_store(void *htab, uint64_t pteg, int slot, + uint64_t pte0, uint64_t pte1) +{ + uint8_t *addr = htab; + + addr += pteg * HASH_PTEG_SIZE_64; + addr += slot * HASH_PTE_SIZE_64; + + stq_p(addr, pte0); + stq_p(addr + HPTE64_DW1, pte1); +} + +static int rehash_hpte(PowerPCCPU *cpu, + const ppc_hash_pte64_t *hptes, + void *old_hpt, uint64_t oldsize, + void *new_hpt, uint64_t newsize, + uint64_t pteg, int slot) +{ + uint64_t old_hash_mask = (oldsize >> 7) - 1; + uint64_t new_hash_mask = (newsize >> 7) - 1; + target_ulong pte0 = ppc_hash64_hpte0(cpu, hptes, slot); + target_ulong pte1; + uint64_t avpn; + unsigned base_pg_shift; + uint64_t hash, new_pteg, replace_pte0; + + if (!(pte0 & HPTE64_V_VALID) || !(pte0 & HPTE64_V_BOLTED)) { + return H_SUCCESS; + } + + pte1 = ppc_hash64_hpte1(cpu, hptes, slot); + + base_pg_shift = ppc_hash64_hpte_page_shift_noslb(cpu, pte0, pte1); + assert(base_pg_shift); /* H_ENTER shouldn't allow a bad encoding */ + avpn = HPTE64_V_AVPN_VAL(pte0) & ~(((1ULL << base_pg_shift) - 1) >> 23); + + if (pte0 & HPTE64_V_SECONDARY) { + pteg = ~pteg; + } + + if ((pte0 & HPTE64_V_SSIZE) == HPTE64_V_SSIZE_256M) { + uint64_t offset, vsid; + + /* We only have 28 - 23 bits of offset in avpn */ + offset = (avpn & 0x1f) << 23; + vsid = avpn >> 5; + /* We can find more bits from the pteg value */ + if (base_pg_shift < 23) { + offset |= ((vsid ^ pteg) & old_hash_mask) << base_pg_shift; + } + + hash = vsid ^ (offset >> base_pg_shift); + } else if ((pte0 & HPTE64_V_SSIZE) == HPTE64_V_SSIZE_1T) { + uint64_t offset, vsid; + + /* We only have 40 - 23 bits of seg_off in avpn */ + offset = (avpn & 0x1ffff) << 23; + vsid = avpn >> 17; + if (base_pg_shift < 23) { + offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask) + << base_pg_shift; + } + + hash = vsid ^ (vsid << 25) ^ (offset >> base_pg_shift); + } else { + error_report("rehash_pte: Bad segment size in HPTE"); + return H_HARDWARE; + } + + new_pteg = hash & new_hash_mask; + if (pte0 & HPTE64_V_SECONDARY) { + assert(~pteg == (hash & old_hash_mask)); + new_pteg = ~new_pteg; + } else { + assert(pteg == (hash & old_hash_mask)); + } + assert((oldsize != newsize) || (pteg == new_pteg)); + replace_pte0 = new_hpte_load0(new_hpt, new_pteg, slot); + /* + * Strictly speaking, we don't need all these tests, since we only + * ever rehash bolted HPTEs. We might in future handle non-bolted + * HPTEs, though so make the logic correct for those cases as + * well. + */ + if (replace_pte0 & HPTE64_V_VALID) { + assert(newsize < oldsize); + if (replace_pte0 & HPTE64_V_BOLTED) { + if (pte0 & HPTE64_V_BOLTED) { + /* Bolted collision, nothing we can do */ + return H_PTEG_FULL; + } else { + /* Discard this hpte */ + return H_SUCCESS; + } + } + } + + new_hpte_store(new_hpt, new_pteg, slot, pte0, pte1); + return H_SUCCESS; +} + +static int rehash_hpt(PowerPCCPU *cpu, + void *old_hpt, uint64_t oldsize, + void *new_hpt, uint64_t newsize) +{ + uint64_t n_ptegs = oldsize >> 7; + uint64_t pteg; + int slot; + int rc; + + for (pteg = 0; pteg < n_ptegs; pteg++) { + hwaddr ptex = pteg * HPTES_PER_GROUP; + const ppc_hash_pte64_t *hptes + = ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP); + + if (!hptes) { + return H_HARDWARE; + } + + for (slot = 0; slot < HPTES_PER_GROUP; slot++) { + rc = rehash_hpte(cpu, hptes, old_hpt, oldsize, new_hpt, newsize, + pteg, slot); + if (rc != H_SUCCESS) { + ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP); + return rc; + } + } + ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP); + } + + return H_SUCCESS; +} + +target_ulong softmmu_resize_hpt_commit(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong flags, + target_ulong shift) +{ + SpaprPendingHpt *pending = spapr->pending_hpt; + int rc; + size_t newsize; + + if (flags != 0) { + return H_PARAMETER; + } + + if (!pending || (pending->shift != shift)) { + /* no matching prepare */ + return H_CLOSED; + } + + if (!pending->complete) { + /* prepare has not completed */ + return H_BUSY; + } + + /* Shouldn't have got past PREPARE without an HPT */ + g_assert(spapr->htab_shift); + + newsize = 1ULL << pending->shift; + rc = rehash_hpt(cpu, spapr->htab, HTAB_SIZE(spapr), + pending->hpt, newsize); + if (rc == H_SUCCESS) { + qemu_vfree(spapr->htab); + spapr->htab = pending->hpt; + spapr->htab_shift = pending->shift; + + push_sregs_to_kvm_pr(spapr); + + pending->hpt = NULL; /* so it's not free()d */ + } + + /* Clean up */ + spapr->pending_hpt = NULL; + free_pending_hpt(pending); + + return rc; +} + +static void hypercall_register_types(void) +{ + /* hcall-pft */ + spapr_register_hypercall(H_ENTER, h_enter); + spapr_register_hypercall(H_REMOVE, h_remove); + spapr_register_hypercall(H_PROTECT, h_protect); + spapr_register_hypercall(H_READ, h_read); + + /* hcall-bulk */ + spapr_register_hypercall(H_BULK_REMOVE, h_bulk_remove); + +} + +type_init(hypercall_register_types) diff --git a/hw/ppc/spapr_tpm_proxy.c b/hw/ppc/spapr_tpm_proxy.c new file mode 100644 index 000000000..245408674 --- /dev/null +++ b/hw/ppc/spapr_tpm_proxy.c @@ -0,0 +1,177 @@ +/* + * SPAPR TPM Proxy/Hypercall + * + * Copyright IBM Corp. 2019 + * + * Authors: + * Michael Roth <mdroth@linux.vnet.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "sysemu/reset.h" +#include "hw/ppc/spapr.h" +#include "hw/qdev-properties.h" +#include "trace.h" + +#define TPM_SPAPR_BUFSIZE 4096 + +enum { + TPM_COMM_OP_EXECUTE = 1, + TPM_COMM_OP_CLOSE_SESSION = 2, +}; + +static void spapr_tpm_proxy_reset(void *opaque) +{ + SpaprTpmProxy *tpm_proxy = SPAPR_TPM_PROXY(opaque); + + if (tpm_proxy->host_fd != -1) { + close(tpm_proxy->host_fd); + tpm_proxy->host_fd = -1; + } +} + +static ssize_t tpm_execute(SpaprTpmProxy *tpm_proxy, target_ulong *args) +{ + uint64_t data_in = ppc64_phys_to_real(args[1]); + target_ulong data_in_size = args[2]; + uint64_t data_out = ppc64_phys_to_real(args[3]); + target_ulong data_out_size = args[4]; + uint8_t buf_in[TPM_SPAPR_BUFSIZE]; + uint8_t buf_out[TPM_SPAPR_BUFSIZE]; + ssize_t ret; + + trace_spapr_tpm_execute(data_in, data_in_size, data_out, data_out_size); + + if (data_in_size > TPM_SPAPR_BUFSIZE) { + error_report("invalid TPM input buffer size: " TARGET_FMT_lu, + data_in_size); + return H_P3; + } + + if (data_out_size < TPM_SPAPR_BUFSIZE) { + error_report("invalid TPM output buffer size: " TARGET_FMT_lu, + data_out_size); + return H_P5; + } + + if (tpm_proxy->host_fd == -1) { + tpm_proxy->host_fd = open(tpm_proxy->host_path, O_RDWR); + if (tpm_proxy->host_fd == -1) { + error_report("failed to open TPM device %s: %d", + tpm_proxy->host_path, errno); + return H_RESOURCE; + } + } + + cpu_physical_memory_read(data_in, buf_in, data_in_size); + + do { + ret = write(tpm_proxy->host_fd, buf_in, data_in_size); + if (ret > 0) { + data_in_size -= ret; + } + } while ((ret >= 0 && data_in_size > 0) || (ret == -1 && errno == EINTR)); + + if (ret == -1) { + error_report("failed to write to TPM device %s: %d", + tpm_proxy->host_path, errno); + return H_RESOURCE; + } + + do { + ret = read(tpm_proxy->host_fd, buf_out, data_out_size); + } while (ret == 0 || (ret == -1 && errno == EINTR)); + + if (ret == -1) { + error_report("failed to read from TPM device %s: %d", + tpm_proxy->host_path, errno); + return H_RESOURCE; + } + + cpu_physical_memory_write(data_out, buf_out, ret); + args[0] = ret; + + return H_SUCCESS; +} + +static target_ulong h_tpm_comm(PowerPCCPU *cpu, + SpaprMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + target_ulong op = args[0]; + SpaprTpmProxy *tpm_proxy = spapr->tpm_proxy; + + if (!tpm_proxy) { + error_report("TPM proxy not available"); + return H_FUNCTION; + } + + trace_spapr_h_tpm_comm(tpm_proxy->host_path, op); + + switch (op) { + case TPM_COMM_OP_EXECUTE: + return tpm_execute(tpm_proxy, args); + case TPM_COMM_OP_CLOSE_SESSION: + spapr_tpm_proxy_reset(tpm_proxy); + return H_SUCCESS; + default: + return H_PARAMETER; + } +} + +static void spapr_tpm_proxy_realize(DeviceState *d, Error **errp) +{ + SpaprTpmProxy *tpm_proxy = SPAPR_TPM_PROXY(d); + + if (tpm_proxy->host_path == NULL) { + error_setg(errp, "must specify 'host-path' option for device"); + return; + } + + tpm_proxy->host_fd = -1; + qemu_register_reset(spapr_tpm_proxy_reset, tpm_proxy); +} + +static void spapr_tpm_proxy_unrealize(DeviceState *d) +{ + SpaprTpmProxy *tpm_proxy = SPAPR_TPM_PROXY(d); + + qemu_unregister_reset(spapr_tpm_proxy_reset, tpm_proxy); +} + +static Property spapr_tpm_proxy_properties[] = { + DEFINE_PROP_STRING("host-path", SpaprTpmProxy, host_path), + DEFINE_PROP_END_OF_LIST(), +}; + +static void spapr_tpm_proxy_class_init(ObjectClass *k, void *data) +{ + DeviceClass *dk = DEVICE_CLASS(k); + + dk->realize = spapr_tpm_proxy_realize; + dk->unrealize = spapr_tpm_proxy_unrealize; + dk->user_creatable = true; + device_class_set_props(dk, spapr_tpm_proxy_properties); +} + +static const TypeInfo spapr_tpm_proxy_info = { + .name = TYPE_SPAPR_TPM_PROXY, + .parent = TYPE_DEVICE, + .instance_size = sizeof(SpaprTpmProxy), + .class_init = spapr_tpm_proxy_class_init, +}; + +static void spapr_tpm_proxy_register_types(void) +{ + type_register_static(&spapr_tpm_proxy_info); + spapr_register_hypercall(SVM_H_TPM_COMM, h_tpm_comm); +} + +type_init(spapr_tpm_proxy_register_types) diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c new file mode 100644 index 000000000..b975ed29c --- /dev/null +++ b/hw/ppc/spapr_vio.c @@ -0,0 +1,741 @@ +/* + * QEMU sPAPR VIO code + * + * Copyright (c) 2010 David Gibson, IBM Corporation <dwg@au1.ibm.com> + * Based on the s390 virtio bus code: + * Copyright (c) 2009 Alexander Graf <agraf@suse.de> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "qapi/error.h" +#include "qapi/visitor.h" +#include "qemu/log.h" +#include "hw/loader.h" +#include "elf.h" +#include "hw/sysbus.h" +#include "sysemu/kvm.h" +#include "sysemu/device_tree.h" +#include "kvm_ppc.h" +#include "migration/vmstate.h" + +#include "hw/ppc/spapr.h" +#include "hw/ppc/spapr_vio.h" +#include "hw/ppc/fdt.h" +#include "trace.h" + +#include <libfdt.h> + +#define SPAPR_VIO_REG_BASE 0x71000000 + +static char *spapr_vio_get_dev_name(DeviceState *qdev) +{ + SpaprVioDevice *dev = VIO_SPAPR_DEVICE(qdev); + SpaprVioDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev); + + /* Device tree style name device@reg */ + return g_strdup_printf("%s@%x", pc->dt_name, dev->reg); +} + +static void spapr_vio_bus_class_init(ObjectClass *klass, void *data) +{ + BusClass *k = BUS_CLASS(klass); + + k->get_dev_path = spapr_vio_get_dev_name; + k->get_fw_dev_path = spapr_vio_get_dev_name; +} + +static const TypeInfo spapr_vio_bus_info = { + .name = TYPE_SPAPR_VIO_BUS, + .parent = TYPE_BUS, + .class_init = spapr_vio_bus_class_init, + .instance_size = sizeof(SpaprVioBus), +}; + +SpaprVioDevice *spapr_vio_find_by_reg(SpaprVioBus *bus, uint32_t reg) +{ + BusChild *kid; + SpaprVioDevice *dev = NULL; + + QTAILQ_FOREACH(kid, &bus->bus.children, sibling) { + dev = (SpaprVioDevice *)kid->child; + if (dev->reg == reg) { + return dev; + } + } + + return NULL; +} + +static int vio_make_devnode(SpaprVioDevice *dev, + void *fdt) +{ + SpaprVioDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev); + int vdevice_off, node_off, ret; + char *dt_name; + const char *dt_compatible; + + vdevice_off = fdt_path_offset(fdt, "/vdevice"); + if (vdevice_off < 0) { + return vdevice_off; + } + + dt_name = spapr_vio_get_dev_name(DEVICE(dev)); + node_off = fdt_add_subnode(fdt, vdevice_off, dt_name); + g_free(dt_name); + if (node_off < 0) { + return node_off; + } + + ret = fdt_setprop_cell(fdt, node_off, "reg", dev->reg); + if (ret < 0) { + return ret; + } + + if (pc->dt_type) { + ret = fdt_setprop_string(fdt, node_off, "device_type", + pc->dt_type); + if (ret < 0) { + return ret; + } + } + + if (pc->get_dt_compatible) { + dt_compatible = pc->get_dt_compatible(dev); + } else { + dt_compatible = pc->dt_compatible; + } + + if (dt_compatible) { + ret = fdt_setprop_string(fdt, node_off, "compatible", + dt_compatible); + if (ret < 0) { + return ret; + } + } + + if (dev->irq) { + uint32_t ints_prop[2]; + + spapr_dt_irq(ints_prop, dev->irq, false); + ret = fdt_setprop(fdt, node_off, "interrupts", ints_prop, + sizeof(ints_prop)); + if (ret < 0) { + return ret; + } + } + + ret = spapr_tcet_dma_dt(fdt, node_off, "ibm,my-dma-window", dev->tcet); + if (ret < 0) { + return ret; + } + + if (pc->devnode) { + ret = (pc->devnode)(dev, fdt, node_off); + if (ret < 0) { + return ret; + } + } + + return node_off; +} + +/* + * CRQ handling + */ +static target_ulong h_reg_crq(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + target_ulong reg = args[0]; + target_ulong queue_addr = args[1]; + target_ulong queue_len = args[2]; + SpaprVioDevice *dev = spapr_vio_find_by_reg(spapr->vio_bus, reg); + + if (!dev) { + hcall_dprintf("Unit 0x" TARGET_FMT_lx " does not exist\n", reg); + return H_PARAMETER; + } + + /* We can't grok a queue size bigger than 256M for now */ + if (queue_len < 0x1000 || queue_len > 0x10000000) { + hcall_dprintf("Queue size too small or too big (0x" TARGET_FMT_lx + ")\n", queue_len); + return H_PARAMETER; + } + + /* Check queue alignment */ + if (queue_addr & 0xfff) { + hcall_dprintf("Queue not aligned (0x" TARGET_FMT_lx ")\n", queue_addr); + return H_PARAMETER; + } + + /* Check if device supports CRQs */ + if (!dev->crq.SendFunc) { + hcall_dprintf("Device does not support CRQ\n"); + return H_NOT_FOUND; + } + + /* Already a queue ? */ + if (dev->crq.qsize) { + hcall_dprintf("CRQ already registered\n"); + return H_RESOURCE; + } + dev->crq.qladdr = queue_addr; + dev->crq.qsize = queue_len; + dev->crq.qnext = 0; + + trace_spapr_vio_h_reg_crq(reg, queue_addr, queue_len); + return H_SUCCESS; +} + +static target_ulong free_crq(SpaprVioDevice *dev) +{ + dev->crq.qladdr = 0; + dev->crq.qsize = 0; + dev->crq.qnext = 0; + + trace_spapr_vio_free_crq(dev->reg); + + return H_SUCCESS; +} + +static target_ulong h_free_crq(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + target_ulong reg = args[0]; + SpaprVioDevice *dev = spapr_vio_find_by_reg(spapr->vio_bus, reg); + + if (!dev) { + hcall_dprintf("Unit 0x" TARGET_FMT_lx " does not exist\n", reg); + return H_PARAMETER; + } + + return free_crq(dev); +} + +static target_ulong h_send_crq(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + target_ulong reg = args[0]; + target_ulong msg_hi = args[1]; + target_ulong msg_lo = args[2]; + SpaprVioDevice *dev = spapr_vio_find_by_reg(spapr->vio_bus, reg); + uint64_t crq_mangle[2]; + + if (!dev) { + hcall_dprintf("Unit 0x" TARGET_FMT_lx " does not exist\n", reg); + return H_PARAMETER; + } + crq_mangle[0] = cpu_to_be64(msg_hi); + crq_mangle[1] = cpu_to_be64(msg_lo); + + if (dev->crq.SendFunc) { + return dev->crq.SendFunc(dev, (uint8_t *)crq_mangle); + } + + return H_HARDWARE; +} + +static target_ulong h_enable_crq(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *args) +{ + target_ulong reg = args[0]; + SpaprVioDevice *dev = spapr_vio_find_by_reg(spapr->vio_bus, reg); + + if (!dev) { + hcall_dprintf("Unit 0x" TARGET_FMT_lx " does not exist\n", reg); + return H_PARAMETER; + } + + return 0; +} + +/* Returns negative error, 0 success, or positive: queue full */ +int spapr_vio_send_crq(SpaprVioDevice *dev, uint8_t *crq) +{ + int rc; + uint8_t byte; + + if (!dev->crq.qsize) { + error_report("spapr_vio_send_creq on uninitialized queue"); + return -1; + } + + /* Maybe do a fast path for KVM just writing to the pages */ + rc = spapr_vio_dma_read(dev, dev->crq.qladdr + dev->crq.qnext, &byte, 1); + if (rc) { + return rc; + } + if (byte != 0) { + return 1; + } + + rc = spapr_vio_dma_write(dev, dev->crq.qladdr + dev->crq.qnext + 8, + &crq[8], 8); + if (rc) { + return rc; + } + + kvmppc_eieio(); + + rc = spapr_vio_dma_write(dev, dev->crq.qladdr + dev->crq.qnext, crq, 8); + if (rc) { + return rc; + } + + dev->crq.qnext = (dev->crq.qnext + 16) % dev->crq.qsize; + + if (dev->signal_state & 1) { + spapr_vio_irq_pulse(dev); + } + + return 0; +} + +/* "quiesce" handling */ + +static void spapr_vio_quiesce_one(SpaprVioDevice *dev) +{ + if (dev->tcet) { + device_cold_reset(DEVICE(dev->tcet)); + } + free_crq(dev); +} + +void spapr_vio_set_bypass(SpaprVioDevice *dev, bool bypass) +{ + if (!dev->tcet) { + return; + } + + memory_region_set_enabled(&dev->mrbypass, bypass); + memory_region_set_enabled(spapr_tce_get_iommu(dev->tcet), !bypass); + + dev->tcet->bypass = bypass; +} + +static void rtas_set_tce_bypass(PowerPCCPU *cpu, SpaprMachineState *spapr, + uint32_t token, + uint32_t nargs, target_ulong args, + uint32_t nret, target_ulong rets) +{ + SpaprVioBus *bus = spapr->vio_bus; + SpaprVioDevice *dev; + uint32_t unit, enable; + + if (nargs != 2) { + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); + return; + } + unit = rtas_ld(args, 0); + enable = rtas_ld(args, 1); + dev = spapr_vio_find_by_reg(bus, unit); + if (!dev) { + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); + return; + } + + if (!dev->tcet) { + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); + return; + } + + spapr_vio_set_bypass(dev, !!enable); + + rtas_st(rets, 0, RTAS_OUT_SUCCESS); +} + +static void rtas_quiesce(PowerPCCPU *cpu, SpaprMachineState *spapr, + uint32_t token, + uint32_t nargs, target_ulong args, + uint32_t nret, target_ulong rets) +{ + SpaprVioBus *bus = spapr->vio_bus; + BusChild *kid; + SpaprVioDevice *dev = NULL; + + if (nargs != 0) { + rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR); + return; + } + + QTAILQ_FOREACH(kid, &bus->bus.children, sibling) { + dev = (SpaprVioDevice *)kid->child; + spapr_vio_quiesce_one(dev); + } + + rtas_st(rets, 0, RTAS_OUT_SUCCESS); +} + +static SpaprVioDevice *reg_conflict(SpaprVioDevice *dev) +{ + SpaprVioBus *bus = SPAPR_VIO_BUS(dev->qdev.parent_bus); + BusChild *kid; + SpaprVioDevice *other; + + /* + * Check for a device other than the given one which is already + * using the requested address. We have to open code this because + * the given dev might already be in the list. + */ + QTAILQ_FOREACH(kid, &bus->bus.children, sibling) { + other = VIO_SPAPR_DEVICE(kid->child); + + if (other != dev && other->reg == dev->reg) { + return other; + } + } + + return 0; +} + +static void spapr_vio_busdev_reset(DeviceState *qdev) +{ + SpaprVioDevice *dev = VIO_SPAPR_DEVICE(qdev); + SpaprVioDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev); + + /* Shut down the request queue and TCEs if necessary */ + spapr_vio_quiesce_one(dev); + + dev->signal_state = 0; + + spapr_vio_set_bypass(dev, false); + if (pc->reset) { + pc->reset(dev); + } +} + +/* + * The register property of a VIO device is defined in libvirt using + * 0x1000 as a base register number plus a 0x1000 increment. For the + * VIO tty device, the base number is changed to 0x30000000. QEMU uses + * a base register number of 0x71000000 and then a simple increment. + * + * The formula below tries to compute a unique index number from the + * register value that will be used to define the IRQ number of the + * VIO device. + * + * A maximum of 256 VIO devices is covered. Collisions are possible + * but they will be detected when the IRQ is claimed. + */ +static inline uint32_t spapr_vio_reg_to_irq(uint32_t reg) +{ + uint32_t irq; + + if (reg >= SPAPR_VIO_REG_BASE) { + /* + * VIO device register values when allocated by QEMU. For + * these, we simply mask the high bits to fit the overall + * range: [0x00 - 0xff]. + * + * The nvram VIO device (reg=0x71000000) is a static device of + * the pseries machine and so is always allocated by QEMU. Its + * IRQ number is 0x0. + */ + irq = reg & 0xff; + + } else if (reg >= 0x30000000) { + /* + * VIO tty devices register values, when allocated by libvirt, + * are mapped in range [0xf0 - 0xff], gives us a maximum of 16 + * vtys. + */ + irq = 0xf0 | ((reg >> 12) & 0xf); + + } else { + /* + * Other VIO devices register values, when allocated by + * libvirt, should be mapped in range [0x00 - 0xef]. Conflicts + * will be detected when IRQ is claimed. + */ + irq = (reg >> 12) & 0xff; + } + + return SPAPR_IRQ_VIO | irq; +} + +static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); + SpaprVioDevice *dev = (SpaprVioDevice *)qdev; + SpaprVioDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev); + char *id; + + if (dev->reg != -1) { + /* + * Explicitly assigned address, just verify that no-one else + * is using it. other mechanism). We have to open code this + * rather than using spapr_vio_find_by_reg() because sdev + * itself is already in the list. + */ + SpaprVioDevice *other = reg_conflict(dev); + + if (other) { + error_setg(errp, "%s and %s devices conflict at address %#x", + object_get_typename(OBJECT(qdev)), + object_get_typename(OBJECT(&other->qdev)), + dev->reg); + return; + } + } else { + /* Need to assign an address */ + SpaprVioBus *bus = SPAPR_VIO_BUS(dev->qdev.parent_bus); + + do { + dev->reg = bus->next_reg++; + } while (reg_conflict(dev)); + } + + /* Don't overwrite ids assigned on the command line */ + if (!dev->qdev.id) { + id = spapr_vio_get_dev_name(DEVICE(dev)); + dev->qdev.id = id; + } + + dev->irq = spapr_vio_reg_to_irq(dev->reg); + + if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) { + int irq = spapr_irq_findone(spapr, errp); + + if (irq < 0) { + return; + } + dev->irq = irq; + } + + if (spapr_irq_claim(spapr, dev->irq, false, errp) < 0) { + return; + } + + if (pc->rtce_window_size) { + uint32_t liobn = SPAPR_VIO_LIOBN(dev->reg); + + memory_region_init(&dev->mrroot, OBJECT(dev), "iommu-spapr-root", + MACHINE(spapr)->ram_size); + memory_region_init_alias(&dev->mrbypass, OBJECT(dev), + "iommu-spapr-bypass", get_system_memory(), + 0, MACHINE(spapr)->ram_size); + memory_region_add_subregion_overlap(&dev->mrroot, 0, &dev->mrbypass, 1); + address_space_init(&dev->as, &dev->mrroot, qdev->id); + + dev->tcet = spapr_tce_new_table(qdev, liobn); + spapr_tce_table_enable(dev->tcet, SPAPR_TCE_PAGE_SHIFT, 0, + pc->rtce_window_size >> SPAPR_TCE_PAGE_SHIFT); + dev->tcet->vdev = dev; + memory_region_add_subregion_overlap(&dev->mrroot, 0, + spapr_tce_get_iommu(dev->tcet), 2); + } + + pc->realize(dev, errp); +} + +static target_ulong h_vio_signal(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, + target_ulong *args) +{ + target_ulong reg = args[0]; + target_ulong mode = args[1]; + SpaprVioDevice *dev = spapr_vio_find_by_reg(spapr->vio_bus, reg); + SpaprVioDeviceClass *pc; + + if (!dev) { + return H_PARAMETER; + } + + pc = VIO_SPAPR_DEVICE_GET_CLASS(dev); + + if (mode & ~pc->signal_mask) { + return H_PARAMETER; + } + + dev->signal_state = mode; + + return H_SUCCESS; +} + +SpaprVioBus *spapr_vio_bus_init(void) +{ + SpaprVioBus *bus; + BusState *qbus; + DeviceState *dev; + + /* Create bridge device */ + dev = qdev_new(TYPE_SPAPR_VIO_BRIDGE); + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + + /* Create bus on bridge device */ + qbus = qbus_new(TYPE_SPAPR_VIO_BUS, dev, "spapr-vio"); + bus = SPAPR_VIO_BUS(qbus); + bus->next_reg = SPAPR_VIO_REG_BASE; + + /* hcall-vio */ + spapr_register_hypercall(H_VIO_SIGNAL, h_vio_signal); + + /* hcall-crq */ + spapr_register_hypercall(H_REG_CRQ, h_reg_crq); + spapr_register_hypercall(H_FREE_CRQ, h_free_crq); + spapr_register_hypercall(H_SEND_CRQ, h_send_crq); + spapr_register_hypercall(H_ENABLE_CRQ, h_enable_crq); + + /* RTAS calls */ + spapr_rtas_register(RTAS_IBM_SET_TCE_BYPASS, "ibm,set-tce-bypass", + rtas_set_tce_bypass); + spapr_rtas_register(RTAS_QUIESCE, "quiesce", rtas_quiesce); + + return bus; +} + +static void spapr_vio_bridge_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->fw_name = "vdevice"; +} + +static const TypeInfo spapr_vio_bridge_info = { + .name = TYPE_SPAPR_VIO_BRIDGE, + .parent = TYPE_SYS_BUS_DEVICE, + .class_init = spapr_vio_bridge_class_init, +}; + +const VMStateDescription vmstate_spapr_vio = { + .name = "spapr_vio", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + /* Sanity check */ + VMSTATE_UINT32_EQUAL(reg, SpaprVioDevice, NULL), + VMSTATE_UINT32_EQUAL(irq, SpaprVioDevice, NULL), + + /* General VIO device state */ + VMSTATE_UINT64(signal_state, SpaprVioDevice), + VMSTATE_UINT64(crq.qladdr, SpaprVioDevice), + VMSTATE_UINT32(crq.qsize, SpaprVioDevice), + VMSTATE_UINT32(crq.qnext, SpaprVioDevice), + + VMSTATE_END_OF_LIST() + }, +}; + +static void vio_spapr_device_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *k = DEVICE_CLASS(klass); + k->realize = spapr_vio_busdev_realize; + k->reset = spapr_vio_busdev_reset; + k->bus_type = TYPE_SPAPR_VIO_BUS; +} + +static const TypeInfo spapr_vio_type_info = { + .name = TYPE_VIO_SPAPR_DEVICE, + .parent = TYPE_DEVICE, + .instance_size = sizeof(SpaprVioDevice), + .abstract = true, + .class_size = sizeof(SpaprVioDeviceClass), + .class_init = vio_spapr_device_class_init, +}; + +static void spapr_vio_register_types(void) +{ + type_register_static(&spapr_vio_bus_info); + type_register_static(&spapr_vio_bridge_info); + type_register_static(&spapr_vio_type_info); +} + +type_init(spapr_vio_register_types) + +static int compare_reg(const void *p1, const void *p2) +{ + SpaprVioDevice const *dev1, *dev2; + + dev1 = (SpaprVioDevice *)*(DeviceState **)p1; + dev2 = (SpaprVioDevice *)*(DeviceState **)p2; + + if (dev1->reg < dev2->reg) { + return -1; + } + if (dev1->reg == dev2->reg) { + return 0; + } + + /* dev1->reg > dev2->reg */ + return 1; +} + +void spapr_dt_vdevice(SpaprVioBus *bus, void *fdt) +{ + DeviceState *qdev, **qdevs; + BusChild *kid; + int i, num, ret = 0; + int node; + + _FDT(node = fdt_add_subnode(fdt, 0, "vdevice")); + + _FDT(fdt_setprop_string(fdt, node, "device_type", "vdevice")); + _FDT(fdt_setprop_string(fdt, node, "compatible", "IBM,vdevice")); + _FDT(fdt_setprop_cell(fdt, node, "#address-cells", 1)); + _FDT(fdt_setprop_cell(fdt, node, "#size-cells", 0)); + _FDT(fdt_setprop_cell(fdt, node, "#interrupt-cells", 2)); + _FDT(fdt_setprop(fdt, node, "interrupt-controller", NULL, 0)); + + /* Count qdevs on the bus list */ + num = 0; + QTAILQ_FOREACH(kid, &bus->bus.children, sibling) { + num++; + } + + /* Copy out into an array of pointers */ + qdevs = g_new(DeviceState *, num); + num = 0; + QTAILQ_FOREACH(kid, &bus->bus.children, sibling) { + qdevs[num++] = kid->child; + } + + /* Sort the array */ + qsort(qdevs, num, sizeof(qdev), compare_reg); + + /* Hack alert. Give the devices to libfdt in reverse order, we happen + * to know that will mean they are in forward order in the tree. */ + for (i = num - 1; i >= 0; i--) { + SpaprVioDevice *dev = (SpaprVioDevice *)(qdevs[i]); + SpaprVioDeviceClass *vdc = VIO_SPAPR_DEVICE_GET_CLASS(dev); + + ret = vio_make_devnode(dev, fdt); + if (ret < 0) { + error_report("Couldn't create device node /vdevice/%s@%"PRIx32, + vdc->dt_name, dev->reg); + exit(1); + } + } + + g_free(qdevs); +} + +gchar *spapr_vio_stdout_path(SpaprVioBus *bus) +{ + SpaprVioDevice *dev; + char *name, *path; + + dev = spapr_vty_get_default(bus); + if (!dev) { + return NULL; + } + + name = spapr_vio_get_dev_name(DEVICE(dev)); + path = g_strdup_printf("/vdevice/%s", name); + + g_free(name); + return path; +} diff --git a/hw/ppc/spapr_vof.c b/hw/ppc/spapr_vof.c new file mode 100644 index 000000000..40ce8fe00 --- /dev/null +++ b/hw/ppc/spapr_vof.c @@ -0,0 +1,167 @@ +/* + * SPAPR machine hooks to Virtual Open Firmware, + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "qapi/error.h" +#include "hw/ppc/spapr.h" +#include "hw/ppc/spapr_vio.h" +#include "hw/ppc/spapr_cpu_core.h" +#include "hw/ppc/fdt.h" +#include "hw/ppc/vof.h" +#include "sysemu/sysemu.h" +#include "qom/qom-qobject.h" +#include "trace.h" + +target_ulong spapr_h_vof_client(PowerPCCPU *cpu, SpaprMachineState *spapr, + target_ulong opcode, target_ulong *_args) +{ + int ret = vof_client_call(MACHINE(spapr), spapr->vof, spapr->fdt_blob, + ppc64_phys_to_real(_args[0])); + + if (ret) { + return H_PARAMETER; + } + return H_SUCCESS; +} + +void spapr_vof_client_dt_finalize(SpaprMachineState *spapr, void *fdt) +{ + char *stdout_path = spapr_vio_stdout_path(spapr->vio_bus); + + vof_build_dt(fdt, spapr->vof); + + if (spapr->vof->bootargs) { + int chosen; + + _FDT(chosen = fdt_path_offset(fdt, "/chosen")); + /* + * If the client did not change "bootargs", spapr_dt_chosen() must have + * stored machine->kernel_cmdline in it before getting here. + */ + _FDT(fdt_setprop_string(fdt, chosen, "bootargs", spapr->vof->bootargs)); + } + + /* + * SLOF-less setup requires an open instance of stdout for early + * kernel printk. By now all phandles are settled so we can open + * the default serial console. + */ + if (stdout_path) { + _FDT(vof_client_open_store(fdt, spapr->vof, "/chosen", "stdout", + stdout_path)); + } +} + +void spapr_vof_reset(SpaprMachineState *spapr, void *fdt, Error **errp) +{ + target_ulong stack_ptr; + Vof *vof = spapr->vof; + PowerPCCPU *first_ppc_cpu = POWERPC_CPU(first_cpu); + + vof_init(vof, spapr->rma_size, errp); + + stack_ptr = vof_claim(vof, 0, VOF_STACK_SIZE, VOF_STACK_SIZE); + if (stack_ptr == -1) { + error_setg(errp, "Memory allocation for stack failed"); + return; + } + /* Stack grows downwards plus reserve space for the minimum stack frame */ + stack_ptr += VOF_STACK_SIZE - 0x20; + + if (spapr->kernel_size && + vof_claim(vof, spapr->kernel_addr, spapr->kernel_size, 0) == -1) { + error_setg(errp, "Memory for kernel is in use"); + return; + } + + if (spapr->initrd_size && + vof_claim(vof, spapr->initrd_base, spapr->initrd_size, 0) == -1) { + error_setg(errp, "Memory for initramdisk is in use"); + return; + } + + spapr_vof_client_dt_finalize(spapr, fdt); + + spapr_cpu_set_entry_state(first_ppc_cpu, SPAPR_ENTRY_POINT, + stack_ptr, spapr->initrd_base, + spapr->initrd_size); + /* VOF is 32bit BE so enforce MSR here */ + first_ppc_cpu->env.msr &= ~((1ULL << MSR_SF) | (1ULL << MSR_LE)); + + /* + * At this point the expected allocation map is: + * + * 0..c38 - the initial firmware + * 8000..10000 - stack + * 400000.. - kernel + * 3ea0000.. - initramdisk + * + * We skip writing FDT as nothing expects it; OF client interface is + * going to be used for reading the device tree. + */ +} + +void spapr_vof_quiesce(MachineState *ms) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(ms); + + spapr->fdt_size = fdt_totalsize(spapr->fdt_blob); + spapr->fdt_initial_size = spapr->fdt_size; +} + +bool spapr_vof_setprop(MachineState *ms, const char *path, const char *propname, + void *val, int vallen) +{ + SpaprMachineState *spapr = SPAPR_MACHINE(ms); + + /* + * We only allow changing properties which we know how to update in QEMU + * OR + * the ones which we know that they need to survive during "quiesce". + */ + + if (strcmp(path, "/rtas") == 0) { + if (strcmp(propname, "linux,rtas-base") == 0 || + strcmp(propname, "linux,rtas-entry") == 0) { + /* These need to survive quiesce so let them store in the FDT */ + return true; + } + } + + if (strcmp(path, "/chosen") == 0) { + if (strcmp(propname, "bootargs") == 0) { + Vof *vof = spapr->vof; + + g_free(vof->bootargs); + vof->bootargs = g_strndup(val, vallen); + return true; + } + if (strcmp(propname, "linux,initrd-start") == 0) { + if (vallen == sizeof(uint32_t)) { + spapr->initrd_base = ldl_be_p(val); + return true; + } + if (vallen == sizeof(uint64_t)) { + spapr->initrd_base = ldq_be_p(val); + return true; + } + return false; + } + if (strcmp(propname, "linux,initrd-end") == 0) { + if (vallen == sizeof(uint32_t)) { + spapr->initrd_size = ldl_be_p(val) - spapr->initrd_base; + return true; + } + if (vallen == sizeof(uint64_t)) { + spapr->initrd_size = ldq_be_p(val) - spapr->initrd_base; + return true; + } + return false; + } + } + + return true; +} diff --git a/hw/ppc/trace-events b/hw/ppc/trace-events new file mode 100644 index 000000000..3bf43fa34 --- /dev/null +++ b/hw/ppc/trace-events @@ -0,0 +1,143 @@ +# See docs/devel/tracing.rst for syntax documentation. + +# spapr_pci.c +spapr_pci_msi(const char *msg, uint32_t ca) "%s (cfg=0x%x)" +spapr_pci_msi_setup(const char *name, unsigned vector, uint64_t addr) "dev\"%s\" vector %u, addr=0x%"PRIx64 +spapr_pci_rtas_ibm_change_msi(unsigned cfg, unsigned func, unsigned req, unsigned first) "cfgaddr 0x%x func %u, requested %u, first irq %u" +spapr_pci_rtas_ibm_query_interrupt_source_number(unsigned ioa, unsigned intr) "queries for #%u, IRQ%u" +spapr_pci_msi_write(uint64_t addr, uint64_t data, uint32_t dt_irq) "@0x%"PRIx64"<=0x%"PRIx64" IRQ %u" +spapr_pci_lsi_set(const char *busname, int pin, uint32_t irq) "%s PIN%d IRQ %u" +spapr_pci_msi_retry(unsigned config_addr, unsigned req_num, unsigned max_irqs) "Guest device at 0x%x asked %u, have only %u" + +# spapr_hcall.c +spapr_cas_continue(unsigned long n) "Copy changes to the guest: %ld bytes" +spapr_cas_pvr(uint32_t cur_pvr, bool explicit_match, uint32_t new_pvr) "current=0x%x, explicit_match=%u, new=0x%x" +spapr_h_resize_hpt_prepare(uint64_t flags, uint64_t shift) "flags=0x%"PRIx64", shift=%"PRIu64 +spapr_h_resize_hpt_commit(uint64_t flags, uint64_t shift) "flags=0x%"PRIx64", shift=%"PRIu64 +spapr_update_dt(unsigned cb) "New blob %u bytes" +spapr_update_dt_failed_size(unsigned cbold, unsigned cbnew, unsigned magic) "Old blob %u bytes, new blob %u bytes, magic 0x%x" +spapr_update_dt_failed_check(unsigned cbold, unsigned cbnew, unsigned magic) "Old blob %u bytes, new blob %u bytes, magic 0x%x" + +# spapr_tpm_proxy.c +spapr_h_tpm_comm(const char *device_path, uint64_t operation) "tpm_device_path=%s operation=0x%"PRIx64 +spapr_tpm_execute(uint64_t data_in, uint64_t data_in_sz, uint64_t data_out, uint64_t data_out_sz) "data_in=0x%"PRIx64", data_in_sz=%"PRIu64", data_out=0x%"PRIx64", data_out_sz=%"PRIu64 + +# spapr_iommu.c +spapr_iommu_put(uint64_t liobn, uint64_t ioba, uint64_t tce, uint64_t ret) "liobn=0x%"PRIx64" ioba=0x%"PRIx64" tce=0x%"PRIx64" ret=%"PRId64 +spapr_iommu_get(uint64_t liobn, uint64_t ioba, uint64_t ret, uint64_t tce) "liobn=0x%"PRIx64" ioba=0x%"PRIx64" ret=%"PRId64" tce=0x%"PRIx64 +spapr_iommu_indirect(uint64_t liobn, uint64_t ioba, uint64_t tce, uint64_t iobaN, uint64_t tceN, uint64_t ret) "liobn=0x%"PRIx64" ioba=0x%"PRIx64" tcelist=0x%"PRIx64" iobaN=0x%"PRIx64" tceN=0x%"PRIx64" ret=%"PRId64 +spapr_iommu_stuff(uint64_t liobn, uint64_t ioba, uint64_t tce_value, uint64_t npages, uint64_t ret) "liobn=0x%"PRIx64" ioba=0x%"PRIx64" tcevalue=0x%"PRIx64" npages=%"PRId64" ret=%"PRId64 +spapr_iommu_pci_put(uint64_t liobn, uint64_t ioba, uint64_t tce, uint64_t ret) "liobn=0x%"PRIx64" ioba=0x%"PRIx64" tce=0x%"PRIx64" ret=%"PRId64 +spapr_iommu_pci_get(uint64_t liobn, uint64_t ioba, uint64_t ret, uint64_t tce) "liobn=0x%"PRIx64" ioba=0x%"PRIx64" ret=%"PRId64" tce=0x%"PRIx64 +spapr_iommu_pci_indirect(uint64_t liobn, uint64_t ioba, uint64_t tce, uint64_t iobaN, uint64_t tceN, uint64_t ret) "liobn=0x%"PRIx64" ioba=0x%"PRIx64" tcelist=0x%"PRIx64" iobaN=0x%"PRIx64" tceN=0x%"PRIx64" ret=%"PRId64 +spapr_iommu_pci_stuff(uint64_t liobn, uint64_t ioba, uint64_t tce_value, uint64_t npages, uint64_t ret) "liobn=0x%"PRIx64" ioba=0x%"PRIx64" tcevalue=0x%"PRIx64" npages=%"PRId64" ret=%"PRId64 +spapr_iommu_xlate(uint64_t liobn, uint64_t ioba, uint64_t tce, unsigned perm, unsigned pgsize) "liobn=0x%"PRIx64" 0x%"PRIx64" -> 0x%"PRIx64" perm=%u mask=0x%x" +spapr_iommu_new_table(uint64_t liobn, void *table, int fd) "liobn=0x%"PRIx64" table=%p fd=%d" +spapr_iommu_pre_save(uint64_t liobn, uint32_t nb, uint64_t offs, uint32_t ps) "liobn=%"PRIx64" %"PRIx32" bus_offset=0x%"PRIx64" ps=%"PRIu32 +spapr_iommu_post_load(uint64_t liobn, uint32_t pre_nb, uint32_t post_nb, uint64_t offs, uint32_t ps) "liobn=%"PRIx64" %"PRIx32" => 0x%"PRIx32" bus_offset=0x%"PRIx64" ps=%"PRIu32 + +# spapr_rtas_ddw.c +spapr_iommu_ddw_query(uint64_t buid, uint32_t cfgaddr, unsigned wa, uint64_t win_size, uint32_t pgmask) "buid=0x%"PRIx64" addr=0x%"PRIx32", %u windows available, max window size=0x%"PRIx64", mask=0x%"PRIx32 +spapr_iommu_ddw_create(uint64_t buid, uint32_t cfgaddr, uint64_t pg_size, uint64_t req_size, uint64_t start, uint32_t liobn) "buid=0x%"PRIx64" addr=0x%"PRIx32", page size=0x%"PRIx64", requested=0x%"PRIx64", start addr=0x%"PRIx64", liobn=0x%"PRIx32 +spapr_iommu_ddw_remove(uint32_t liobn) "liobn=0x%"PRIx32 +spapr_iommu_ddw_reset(uint64_t buid, uint32_t cfgaddr) "buid=0x%"PRIx64" addr=0x%"PRIx32 + +# spapr_drc.c +spapr_drc_set_isolation_state(uint32_t index, int state) "drc: 0x%"PRIx32", state: 0x%"PRIx32 +spapr_drc_set_isolation_state_finalizing(uint32_t index) "drc: 0x%"PRIx32 +spapr_drc_set_dr_indicator(uint32_t index, int state) "drc: 0x%"PRIx32", state: 0x%x" +spapr_drc_set_allocation_state(uint32_t index, int state) "drc: 0x%"PRIx32", state: 0x%x" +spapr_drc_set_allocation_state_finalizing(uint32_t index) "drc: 0x%"PRIx32 +spapr_drc_set_configured(uint32_t index) "drc: 0x%"PRIx32 +spapr_drc_attach(uint32_t index) "drc: 0x%"PRIx32 +spapr_drc_unplug_request(uint32_t index) "drc: 0x%"PRIx32 +spapr_drc_awaiting_quiesce(uint32_t index) "drc: 0x%"PRIx32 +spapr_drc_reset(uint32_t index) "drc: 0x%"PRIx32 +spapr_drc_realize(uint32_t index) "drc: 0x%"PRIx32 +spapr_drc_realize_child(uint32_t index, const char *childname) "drc: 0x%"PRIx32", child name: %s" +spapr_drc_realize_complete(uint32_t index) "drc: 0x%"PRIx32 +spapr_drc_unrealize(uint32_t index) "drc: 0x%"PRIx32 + +# spapr_ovec.c +spapr_ovec_parse_vector(int vector, int byte, uint16_t vec_len, uint8_t entry) "read guest vector %2d, byte %3d / %3d: 0x%.2x" +spapr_ovec_populate_dt(int byte, uint16_t vec_len, uint8_t entry) "encoding guest vector byte %3d / %3d: 0x%.2x" + +# spapr_drc.c +spapr_rtas_get_sensor_state_not_supported(uint32_t index, uint32_t type) "sensor index: 0x%"PRIx32", type: %"PRIu32 +spapr_rtas_get_sensor_state_invalid(uint32_t index) "sensor index: 0x%"PRIx32 +spapr_rtas_ibm_configure_connector_invalid(uint32_t index) "DRC index: 0x%"PRIx32 + +# spapr_vio.c +spapr_vio_h_reg_crq(uint64_t reg, uint64_t queue_addr, uint64_t queue_len) "CRQ for dev 0x%" PRIx64 " registered at 0x%" PRIx64 "/0x%" PRIx64 +spapr_vio_free_crq(uint32_t reg) "CRQ for dev 0x%" PRIx32 " freed" + +# vof.c +vof_error_str_truncated(const char *s, int len) "%s truncated to %d" +vof_error_param(const char *method, int nargscheck, int nretcheck, int nargs, int nret) "%s takes/returns %d/%d, not %d/%d" +vof_error_unknown_service(const char *service, int nargs, int nret) "\"%s\" args=%d rets=%d" +vof_error_unknown_method(const char *method) "\"%s\"" +vof_error_unknown_ihandle_close(uint32_t ih) "ih=0x%x" +vof_error_unknown_path(const char *path) "\"%s\"" +vof_error_write(uint32_t ih) "ih=0x%x" +vof_finddevice(const char *path, uint32_t ph) "\"%s\" => ph=0x%x" +vof_claim(uint32_t virt, uint32_t size, uint32_t align, uint32_t ret) "virt=0x%x size=0x%x align=0x%x => 0x%x" +vof_release(uint32_t virt, uint32_t size, uint32_t ret) "virt=0x%x size=0x%x => 0x%x" +vof_method(uint32_t ihandle, const char *method, uint32_t param, uint32_t ret, uint32_t ret2) "ih=0x%x \"%s\"(0x%x) => 0x%x 0x%x" +vof_getprop(uint32_t ph, const char *prop, uint32_t ret, const char *val) "ph=0x%x \"%s\" => len=%d [%s]" +vof_getproplen(uint32_t ph, const char *prop, uint32_t ret) "ph=0x%x \"%s\" => len=%d" +vof_setprop(uint32_t ph, const char *prop, const char *val, uint32_t vallen, uint32_t ret) "ph=0x%x \"%s\" [%s] len=%d => ret=%d" +vof_open(const char *path, uint32_t ph, uint32_t ih) "%s ph=0x%x => ih=0x%x" +vof_interpret(const char *cmd, uint32_t param1, uint32_t param2, uint32_t ret, uint32_t ret2) "[%s] 0x%x 0x%x => 0x%x 0x%x" +vof_package_to_path(uint32_t ph, const char *tmp, int ret) "ph=0x%x => %s len=%d" +vof_instance_to_path(uint32_t ih, uint32_t ph, const char *tmp, int ret) "ih=0x%x ph=0x%x => %s len=%d" +vof_instance_to_package(uint32_t ih, uint32_t ph) "ih=0x%x => ph=0x%x" +vof_write(uint32_t ih, unsigned cb, const char *msg) "ih=0x%x [%u] \"%s\"" +vof_avail(uint64_t start, uint64_t end, uint64_t size) "0x%"PRIx64"..0x%"PRIx64" size=0x%"PRIx64 +vof_claimed(uint64_t start, uint64_t end, uint64_t size) "0x%"PRIx64"..0x%"PRIx64" size=0x%"PRIx64 + +# ppc.c +ppc_tb_adjust(uint64_t offs1, uint64_t offs2, int64_t diff, int64_t seconds) "adjusted from 0x%"PRIx64" to 0x%"PRIx64", diff %"PRId64" (%"PRId64"s)" +ppc_tb_load(uint64_t tb) "tb 0x%016" PRIx64 +ppc_tb_store(uint64_t tb, uint64_t offset) "tb 0x%016" PRIx64 " offset 0x%08" PRIx64 + +ppc_decr_load(uint64_t tb) "decr 0x%016" PRIx64 +ppc_decr_excp(const char *action) "%s decrementer" +ppc_decr_store(uint32_t nr_bits, uint64_t decr, uint64_t value) "%d-bit 0x%016" PRIx64 " => 0x%016" PRIx64 + +ppc4xx_fit(uint32_t ir, uint64_t tcr, uint64_t tsr) "ir %d TCR 0x%" PRIx64 " TSR 0x%" PRIx64 +ppc4xx_pit_stop(void) "" +ppc4xx_pit_start(uint64_t reload) "PIT 0x%016" PRIx64 +ppc4xx_pit(uint32_t ar, uint32_t ir, uint64_t tcr, uint64_t tsr, uint64_t reload) "ar %d ir %d TCR 0x%" PRIx64 " TSR 0x%" PRIx64 " PIT 0x%016" PRIx64 +ppc4xx_wdt(uint64_t tcr, uint64_t tsr) "TCR 0x%" PRIx64 " TSR 0x%" PRIx64 +ppc40x_store_pit(uint64_t value) "val 0x%" PRIx64 +ppc40x_set_tb_clk(uint32_t value) "new frequency %" PRIu32 +ppc40x_timers_init(uint32_t value) "frequency %" PRIu32 + +ppc_irq_set(void *env, uint32_t pin, uint32_t level) "env [%p] pin %d level %d" +ppc_irq_set_exit(void *env, uint32_t n_IRQ, uint32_t level, uint32_t pending, uint32_t request) "env [%p] n_IRQ %d level %d => pending 0x%08" PRIx32 " req 0x%08" PRIx32 +ppc_irq_set_state(const char *name, uint32_t level) "\"%s\" level %d" +ppc_irq_reset(const char *name) "%s" +ppc_irq_cpu(const char *action) "%s" + +# prep_systemio.c +prep_systemio_read(uint32_t addr, uint32_t val) "read addr=0x%x val=0x%x" +prep_systemio_write(uint32_t addr, uint32_t val) "write addr=0x%x val=0x%x" + +# rs6000_mc.c +rs6000mc_id_read(uint32_t addr, uint32_t val) "read addr=0x%x val=0x%x" +rs6000mc_presence_read(uint32_t addr, uint32_t val) "read addr=0x%x val=0x%x" +rs6000mc_size_read(uint32_t addr, uint32_t val) "read addr=0x%x val=0x%x" +rs6000mc_size_write(uint32_t addr, uint32_t val) "write addr=0x%x val=0x%x" +rs6000mc_parity_read(uint32_t addr, uint32_t val) "read addr=0x%x val=0x%x" + +# ppc4xx_pci.c +ppc4xx_pci_map_irq(int32_t devfn, int irq_num, int slot) "devfn 0x%x irq %d -> %d" +ppc4xx_pci_set_irq(int irq_num) "PCI irq %d" + +# ppc440_pcix.c +ppc440_pcix_map_irq(int32_t devfn, int irq_num, int slot) "devfn 0x%x irq %d -> %d" +ppc440_pcix_set_irq(int irq_num) "PCI irq %d" +ppc440_pcix_update_pim(int idx, uint64_t size, uint64_t la) "Added window %d of size=0x%" PRIx64 " to CPU=0x%" PRIx64 +ppc440_pcix_update_pom(int idx, uint32_t size, uint64_t la, uint64_t pcia) "Added window %d of size=0x%x from CPU=0x%" PRIx64 " to PCI=0x%" PRIx64 +ppc440_pcix_reg_read(uint64_t addr, uint32_t val) "addr 0x%" PRIx64 " = 0x%" PRIx32 +ppc440_pcix_reg_write(uint64_t addr, uint32_t val, uint32_t size) "addr 0x%" PRIx64 " = 0x%" PRIx32 " size 0x%" PRIx32 diff --git a/hw/ppc/trace.h b/hw/ppc/trace.h new file mode 100644 index 000000000..87c4198e6 --- /dev/null +++ b/hw/ppc/trace.h @@ -0,0 +1 @@ +#include "trace/trace-hw_ppc.h" diff --git a/hw/ppc/virtex_ml507.c b/hw/ppc/virtex_ml507.c new file mode 100644 index 000000000..9c575403b --- /dev/null +++ b/hw/ppc/virtex_ml507.c @@ -0,0 +1,316 @@ +/* + * Model of Xilinx Virtex5 ML507 PPC-440 refdesign. + * + * Copyright (c) 2010 Edgar E. Iglesias. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "qemu/datadir.h" +#include "qemu/units.h" +#include "cpu.h" +#include "hw/sysbus.h" +#include "hw/char/serial.h" +#include "hw/block/flash.h" +#include "sysemu/sysemu.h" +#include "sysemu/reset.h" +#include "hw/boards.h" +#include "sysemu/device_tree.h" +#include "hw/loader.h" +#include "elf.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qemu/option.h" + +#include "hw/intc/ppc-uic.h" +#include "hw/ppc/ppc.h" +#include "hw/ppc/ppc4xx.h" +#include "hw/qdev-properties.h" +#include "ppc405.h" + +#define EPAPR_MAGIC (0x45504150) +#define FLASH_SIZE (16 * MiB) + +#define INTC_BASEADDR 0x81800000 +#define UART16550_BASEADDR 0x83e01003 +#define TIMER_BASEADDR 0x83c00000 +#define PFLASH_BASEADDR 0xfc000000 + +#define TIMER_IRQ 3 +#define UART16550_IRQ 9 + +static struct boot_info +{ + uint32_t bootstrap_pc; + uint32_t cmdline; + uint32_t fdt; + uint32_t ima_size; + void *vfdt; +} boot_info; + +/* Create reset TLB entries for BookE, spanning the 32bit addr space. */ +static void mmubooke_create_initial_mapping(CPUPPCState *env, + target_ulong va, + hwaddr pa) +{ + ppcemb_tlb_t *tlb = &env->tlb.tlbe[0]; + + tlb->attr = 0; + tlb->prot = PAGE_VALID | ((PAGE_READ | PAGE_WRITE | PAGE_EXEC) << 4); + tlb->size = 1U << 31; /* up to 0x80000000 */ + tlb->EPN = va & TARGET_PAGE_MASK; + tlb->RPN = pa & TARGET_PAGE_MASK; + tlb->PID = 0; + + tlb = &env->tlb.tlbe[1]; + tlb->attr = 0; + tlb->prot = PAGE_VALID | ((PAGE_READ | PAGE_WRITE | PAGE_EXEC) << 4); + tlb->size = 1U << 31; /* up to 0xffffffff */ + tlb->EPN = 0x80000000 & TARGET_PAGE_MASK; + tlb->RPN = 0x80000000 & TARGET_PAGE_MASK; + tlb->PID = 0; +} + +static PowerPCCPU *ppc440_init_xilinx(const char *cpu_type, uint32_t sysclk) +{ + PowerPCCPU *cpu; + CPUPPCState *env; + DeviceState *uicdev; + SysBusDevice *uicsbd; + + cpu = POWERPC_CPU(cpu_create(cpu_type)); + env = &cpu->env; + + ppc_booke_timers_init(cpu, sysclk, 0/* no flags */); + + ppc_dcr_init(env, NULL, NULL); + + /* interrupt controller */ + uicdev = qdev_new(TYPE_PPC_UIC); + uicsbd = SYS_BUS_DEVICE(uicdev); + + object_property_set_link(OBJECT(uicdev), "cpu", OBJECT(cpu), + &error_fatal); + sysbus_realize_and_unref(uicsbd, &error_fatal); + + sysbus_connect_irq(uicsbd, PPCUIC_OUTPUT_INT, + ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_INT]); + sysbus_connect_irq(uicsbd, PPCUIC_OUTPUT_CINT, + ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_CINT]); + + /* This board doesn't wire anything up to the inputs of the UIC. */ + return cpu; +} + +static void main_cpu_reset(void *opaque) +{ + PowerPCCPU *cpu = opaque; + CPUPPCState *env = &cpu->env; + struct boot_info *bi = env->load_info; + + cpu_reset(CPU(cpu)); + /* Linux Kernel Parameters (passing device tree): + * r3: pointer to the fdt + * r4: 0 + * r5: 0 + * r6: epapr magic + * r7: size of IMA in bytes + * r8: 0 + * r9: 0 + */ + env->gpr[1] = (16 * MiB) - 8; + /* Provide a device-tree. */ + env->gpr[3] = bi->fdt; + env->nip = bi->bootstrap_pc; + + /* Create a mapping for the kernel. */ + mmubooke_create_initial_mapping(env, 0, 0); + env->gpr[6] = tswap32(EPAPR_MAGIC); + env->gpr[7] = bi->ima_size; +} + +#define BINARY_DEVICE_TREE_FILE "virtex-ml507.dtb" +static int xilinx_load_device_tree(hwaddr addr, + uint32_t ramsize, + hwaddr initrd_base, + hwaddr initrd_size, + const char *kernel_cmdline) +{ + char *path; + int fdt_size; + void *fdt = NULL; + int r; + const char *dtb_filename; + + dtb_filename = current_machine->dtb; + if (dtb_filename) { + fdt = load_device_tree(dtb_filename, &fdt_size); + if (!fdt) { + error_report("Error while loading device tree file '%s'", + dtb_filename); + } + } else { + /* Try the local "ppc.dtb" override. */ + fdt = load_device_tree("ppc.dtb", &fdt_size); + if (!fdt) { + path = qemu_find_file(QEMU_FILE_TYPE_BIOS, BINARY_DEVICE_TREE_FILE); + if (path) { + fdt = load_device_tree(path, &fdt_size); + g_free(path); + } + } + } + if (!fdt) { + return 0; + } + + r = qemu_fdt_setprop_cell(fdt, "/chosen", "linux,initrd-start", + initrd_base); + if (r < 0) { + error_report("couldn't set /chosen/linux,initrd-start"); + } + + r = qemu_fdt_setprop_cell(fdt, "/chosen", "linux,initrd-end", + (initrd_base + initrd_size)); + if (r < 0) { + error_report("couldn't set /chosen/linux,initrd-end"); + } + + r = qemu_fdt_setprop_string(fdt, "/chosen", "bootargs", kernel_cmdline); + if (r < 0) + fprintf(stderr, "couldn't set /chosen/bootargs\n"); + cpu_physical_memory_write(addr, fdt, fdt_size); + g_free(fdt); + return fdt_size; +} + +static void virtex_init(MachineState *machine) +{ + const char *kernel_filename = machine->kernel_filename; + const char *kernel_cmdline = machine->kernel_cmdline; + hwaddr initrd_base = 0; + int initrd_size = 0; + MemoryRegion *address_space_mem = get_system_memory(); + DeviceState *dev; + PowerPCCPU *cpu; + CPUPPCState *env; + hwaddr ram_base = 0; + DriveInfo *dinfo; + qemu_irq irq[32], *cpu_irq; + int kernel_size; + int i; + + /* init CPUs */ + cpu = ppc440_init_xilinx(machine->cpu_type, 400000000); + env = &cpu->env; + + if (env->mmu_model != POWERPC_MMU_BOOKE) { + error_report("MMU model %i not supported by this machine", + env->mmu_model); + exit(1); + } + + qemu_register_reset(main_cpu_reset, cpu); + + memory_region_add_subregion(address_space_mem, ram_base, machine->ram); + + dinfo = drive_get(IF_PFLASH, 0, 0); + pflash_cfi01_register(PFLASH_BASEADDR, "virtex.flash", FLASH_SIZE, + dinfo ? blk_by_legacy_dinfo(dinfo) : NULL, + 64 * KiB, 1, 0x89, 0x18, 0x0000, 0x0, 1); + + cpu_irq = (qemu_irq *) &env->irq_inputs[PPC40x_INPUT_INT]; + dev = qdev_new("xlnx.xps-intc"); + qdev_prop_set_uint32(dev, "kind-of-intr", 0); + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, INTC_BASEADDR); + sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, cpu_irq[0]); + for (i = 0; i < 32; i++) { + irq[i] = qdev_get_gpio_in(dev, i); + } + + serial_mm_init(address_space_mem, UART16550_BASEADDR, 2, irq[UART16550_IRQ], + 115200, serial_hd(0), DEVICE_LITTLE_ENDIAN); + + /* 2 timers at irq 2 @ 62 Mhz. */ + dev = qdev_new("xlnx.xps-timer"); + qdev_prop_set_uint32(dev, "one-timer-only", 0); + qdev_prop_set_uint32(dev, "clock-frequency", 62 * 1000000); + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, TIMER_BASEADDR); + sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, irq[TIMER_IRQ]); + + if (kernel_filename) { + uint64_t entry, high; + hwaddr boot_offset; + + /* Boots a kernel elf binary. */ + kernel_size = load_elf(kernel_filename, NULL, NULL, NULL, + &entry, NULL, &high, NULL, 1, PPC_ELF_MACHINE, + 0, 0); + boot_info.bootstrap_pc = entry & 0x00ffffff; + + if (kernel_size < 0) { + boot_offset = 0x1200000; + /* If we failed loading ELF's try a raw image. */ + kernel_size = load_image_targphys(kernel_filename, + boot_offset, + machine->ram_size); + boot_info.bootstrap_pc = boot_offset; + high = boot_info.bootstrap_pc + kernel_size + 8192; + } + + boot_info.ima_size = kernel_size; + + /* Load initrd. */ + if (machine->initrd_filename) { + initrd_base = high = ROUND_UP(high, 4); + initrd_size = load_image_targphys(machine->initrd_filename, + high, machine->ram_size - high); + + if (initrd_size < 0) { + error_report("couldn't load ram disk '%s'", + machine->initrd_filename); + exit(1); + } + high = ROUND_UP(high + initrd_size, 4); + } + + /* Provide a device-tree. */ + boot_info.fdt = high + (8192 * 2); + boot_info.fdt &= ~8191; + + xilinx_load_device_tree(boot_info.fdt, machine->ram_size, + initrd_base, initrd_size, + kernel_cmdline); + } + env->load_info = &boot_info; +} + +static void virtex_machine_init(MachineClass *mc) +{ + mc->desc = "Xilinx Virtex ML507 reference design"; + mc->init = virtex_init; + mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("440-xilinx"); + mc->default_ram_id = "ram"; +} + +DEFINE_MACHINE("virtex-ml507", virtex_machine_init) diff --git a/hw/ppc/vof.c b/hw/ppc/vof.c new file mode 100644 index 000000000..73adc44ec --- /dev/null +++ b/hw/ppc/vof.c @@ -0,0 +1,1062 @@ +/* + * QEMU PowerPC Virtual Open Firmware. + * + * This implements client interface from OpenFirmware IEEE1275 on the QEMU + * side to leave only a very basic firmware in the VM. + * + * Copyright (c) 2021 IBM Corporation. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "qemu/timer.h" +#include "qemu/range.h" +#include "qemu/units.h" +#include "qemu/log.h" +#include "qapi/error.h" +#include "exec/ram_addr.h" +#include "exec/address-spaces.h" +#include "hw/ppc/vof.h" +#include "hw/ppc/fdt.h" +#include "sysemu/runstate.h" +#include "qom/qom-qobject.h" +#include "trace.h" + +#include <libfdt.h> + +/* + * OF 1275 "nextprop" description suggests is it 32 bytes max but + * LoPAPR defines "ibm,query-interrupt-source-number" which is 33 chars long. + */ +#define OF_PROPNAME_LEN_MAX 64 + +#define VOF_MAX_PATH 256 +#define VOF_MAX_SETPROPLEN 2048 +#define VOF_MAX_METHODLEN 256 +#define VOF_MAX_FORTHCODE 256 +#define VOF_VTY_BUF_SIZE 256 + +typedef struct { + uint64_t start; + uint64_t size; +} OfClaimed; + +typedef struct { + char *path; /* the path used to open the instance */ + uint32_t phandle; +} OfInstance; + +static int readstr(hwaddr pa, char *buf, int size) +{ + if (VOF_MEM_READ(pa, buf, size) != MEMTX_OK) { + return -1; + } + if (strnlen(buf, size) == size) { + buf[size - 1] = '\0'; + trace_vof_error_str_truncated(buf, size); + return -1; + } + return 0; +} + +static bool cmpservice(const char *s, unsigned nargs, unsigned nret, + const char *s1, unsigned nargscheck, unsigned nretcheck) +{ + if (strcmp(s, s1)) { + return false; + } + if ((nargscheck && (nargs != nargscheck)) || + (nretcheck && (nret != nretcheck))) { + trace_vof_error_param(s, nargscheck, nretcheck, nargs, nret); + return false; + } + + return true; +} + +static void prop_format(char *tval, int tlen, const void *prop, int len) +{ + int i; + const unsigned char *c; + char *t; + const char bin[] = "..."; + + for (i = 0, c = prop; i < len; ++i, ++c) { + if (*c == '\0' && i == len - 1) { + strncpy(tval, prop, tlen - 1); + return; + } + if (*c < 0x20 || *c >= 0x80) { + break; + } + } + + for (i = 0, c = prop, t = tval; i < len; ++i, ++c) { + if (t >= tval + tlen - sizeof(bin) - 1 - 2 - 1) { + strcpy(t, bin); + return; + } + if (i && i % 4 == 0 && i != len - 1) { + strcat(t, " "); + ++t; + } + t += sprintf(t, "%02X", *c & 0xFF); + } +} + +static int get_path(const void *fdt, int offset, char *buf, int len) +{ + int ret; + + ret = fdt_get_path(fdt, offset, buf, len - 1); + if (ret < 0) { + return ret; + } + + buf[len - 1] = '\0'; + + return strlen(buf) + 1; +} + +static int phandle_to_path(const void *fdt, uint32_t ph, char *buf, int len) +{ + int ret; + + ret = fdt_node_offset_by_phandle(fdt, ph); + if (ret < 0) { + return ret; + } + + return get_path(fdt, ret, buf, len); +} + +static int path_offset(const void *fdt, const char *path) +{ + g_autofree char *p = NULL; + char *at; + + /* + * https://www.devicetree.org/open-firmware/bindings/ppc/release/ppc-2_1.html#HDR16 + * + * "Conversion from numeric representation to text representation shall use + * the lower case forms of the hexadecimal digits in the range a..f, + * suppressing leading zeros". + */ + p = g_strdup(path); + for (at = strchr(p, '@'); at && *at; ) { + if (*at == '/') { + at = strchr(at, '@'); + } else { + *at = tolower(*at); + ++at; + } + } + + return fdt_path_offset(fdt, p); +} + +static uint32_t vof_finddevice(const void *fdt, uint32_t nodeaddr) +{ + char fullnode[VOF_MAX_PATH]; + uint32_t ret = PROM_ERROR; + int offset; + + if (readstr(nodeaddr, fullnode, sizeof(fullnode))) { + return (uint32_t) ret; + } + + offset = path_offset(fdt, fullnode); + if (offset >= 0) { + ret = fdt_get_phandle(fdt, offset); + } + trace_vof_finddevice(fullnode, ret); + return ret; +} + +static const void *getprop(const void *fdt, int nodeoff, const char *propname, + int *proplen, bool *write0) +{ + const char *unit, *prop; + const void *ret = fdt_getprop(fdt, nodeoff, propname, proplen); + + if (ret) { + if (write0) { + *write0 = false; + } + return ret; + } + + if (strcmp(propname, "name")) { + return NULL; + } + /* + * We return a value for "name" from path if queried but property does not + * exist. @proplen does not include the unit part in this case. + */ + prop = fdt_get_name(fdt, nodeoff, proplen); + if (!prop) { + *proplen = 0; + return NULL; + } + + unit = memchr(prop, '@', *proplen); + if (unit) { + *proplen = unit - prop; + } + *proplen += 1; + + /* + * Since it might be cut at "@" and there will be no trailing zero + * in the prop buffer, tell the caller to write zero at the end. + */ + if (write0) { + *write0 = true; + } + return prop; +} + +static uint32_t vof_getprop(const void *fdt, uint32_t nodeph, uint32_t pname, + uint32_t valaddr, uint32_t vallen) +{ + char propname[OF_PROPNAME_LEN_MAX + 1]; + uint32_t ret = 0; + int proplen = 0; + const void *prop; + char trval[64] = ""; + int nodeoff = fdt_node_offset_by_phandle(fdt, nodeph); + bool write0; + + if (nodeoff < 0) { + return PROM_ERROR; + } + if (readstr(pname, propname, sizeof(propname))) { + return PROM_ERROR; + } + prop = getprop(fdt, nodeoff, propname, &proplen, &write0); + if (prop) { + const char zero = 0; + int cb = MIN(proplen, vallen); + + if (VOF_MEM_WRITE(valaddr, prop, cb) != MEMTX_OK || + /* if that was "name" with a unit address, overwrite '@' with '0' */ + (write0 && + cb == proplen && + VOF_MEM_WRITE(valaddr + cb - 1, &zero, 1) != MEMTX_OK)) { + ret = PROM_ERROR; + } else { + /* + * OF1275 says: + * "Size is either the actual size of the property, or -1 if name + * does not exist", hence returning proplen instead of cb. + */ + ret = proplen; + /* Do not format a value if tracepoint is silent, for performance */ + if (trace_event_get_state(TRACE_VOF_GETPROP) && + qemu_loglevel_mask(LOG_TRACE)) { + prop_format(trval, sizeof(trval), prop, ret); + } + } + } else { + ret = PROM_ERROR; + } + trace_vof_getprop(nodeph, propname, ret, trval); + + return ret; +} + +static uint32_t vof_getproplen(const void *fdt, uint32_t nodeph, uint32_t pname) +{ + char propname[OF_PROPNAME_LEN_MAX + 1]; + uint32_t ret = 0; + int proplen = 0; + const void *prop; + int nodeoff = fdt_node_offset_by_phandle(fdt, nodeph); + + if (nodeoff < 0) { + return PROM_ERROR; + } + if (readstr(pname, propname, sizeof(propname))) { + return PROM_ERROR; + } + prop = getprop(fdt, nodeoff, propname, &proplen, NULL); + if (prop) { + ret = proplen; + } else { + ret = PROM_ERROR; + } + trace_vof_getproplen(nodeph, propname, ret); + + return ret; +} + +static uint32_t vof_setprop(MachineState *ms, void *fdt, Vof *vof, + uint32_t nodeph, uint32_t pname, + uint32_t valaddr, uint32_t vallen) +{ + char propname[OF_PROPNAME_LEN_MAX + 1]; + uint32_t ret = PROM_ERROR; + int offset, rc; + char trval[64] = ""; + char nodepath[VOF_MAX_PATH] = ""; + Object *vmo = object_dynamic_cast(OBJECT(ms), TYPE_VOF_MACHINE_IF); + VofMachineIfClass *vmc; + g_autofree char *val = NULL; + + if (vallen > VOF_MAX_SETPROPLEN) { + goto trace_exit; + } + if (readstr(pname, propname, sizeof(propname))) { + goto trace_exit; + } + offset = fdt_node_offset_by_phandle(fdt, nodeph); + if (offset < 0) { + goto trace_exit; + } + rc = get_path(fdt, offset, nodepath, sizeof(nodepath)); + if (rc <= 0) { + goto trace_exit; + } + + val = g_malloc0(vallen); + if (VOF_MEM_READ(valaddr, val, vallen) != MEMTX_OK) { + goto trace_exit; + } + + if (!vmo) { + goto trace_exit; + } + + vmc = VOF_MACHINE_GET_CLASS(vmo); + if (!vmc->setprop || !vmc->setprop(ms, nodepath, propname, val, vallen)) { + goto trace_exit; + } + + rc = fdt_setprop(fdt, offset, propname, val, vallen); + if (rc) { + goto trace_exit; + } + + if (trace_event_get_state(TRACE_VOF_SETPROP) && + qemu_loglevel_mask(LOG_TRACE)) { + prop_format(trval, sizeof(trval), val, vallen); + } + ret = vallen; + +trace_exit: + trace_vof_setprop(nodeph, propname, trval, vallen, ret); + + return ret; +} + +static uint32_t vof_nextprop(const void *fdt, uint32_t phandle, + uint32_t prevaddr, uint32_t nameaddr) +{ + int offset, nodeoff = fdt_node_offset_by_phandle(fdt, phandle); + char prev[OF_PROPNAME_LEN_MAX + 1]; + const char *tmp; + + if (readstr(prevaddr, prev, sizeof(prev))) { + return PROM_ERROR; + } + + fdt_for_each_property_offset(offset, fdt, nodeoff) { + if (!fdt_getprop_by_offset(fdt, offset, &tmp, NULL)) { + return 0; + } + if (prev[0] == '\0' || strcmp(prev, tmp) == 0) { + if (prev[0] != '\0') { + offset = fdt_next_property_offset(fdt, offset); + if (offset < 0) { + return 0; + } + } + if (!fdt_getprop_by_offset(fdt, offset, &tmp, NULL)) { + return 0; + } + + if (VOF_MEM_WRITE(nameaddr, tmp, strlen(tmp) + 1) != MEMTX_OK) { + return PROM_ERROR; + } + return 1; + } + } + + return 0; +} + +static uint32_t vof_peer(const void *fdt, uint32_t phandle) +{ + uint32_t ret = 0; + int rc; + + if (phandle == 0) { + rc = fdt_path_offset(fdt, "/"); + } else { + rc = fdt_next_subnode(fdt, fdt_node_offset_by_phandle(fdt, phandle)); + } + + if (rc >= 0) { + ret = fdt_get_phandle(fdt, rc); + } + + return ret; +} + +static uint32_t vof_child(const void *fdt, uint32_t phandle) +{ + uint32_t ret = 0; + int rc = fdt_first_subnode(fdt, fdt_node_offset_by_phandle(fdt, phandle)); + + if (rc >= 0) { + ret = fdt_get_phandle(fdt, rc); + } + + return ret; +} + +static uint32_t vof_parent(const void *fdt, uint32_t phandle) +{ + uint32_t ret = 0; + int rc = fdt_parent_offset(fdt, fdt_node_offset_by_phandle(fdt, phandle)); + + if (rc >= 0) { + ret = fdt_get_phandle(fdt, rc); + } + + return ret; +} + +static uint32_t vof_do_open(void *fdt, Vof *vof, int offset, const char *path) +{ + uint32_t ret = PROM_ERROR; + OfInstance *inst = NULL; + + if (vof->of_instance_last == 0xFFFFFFFF) { + /* We do not recycle ihandles yet */ + goto trace_exit; + } + + inst = g_new0(OfInstance, 1); + inst->phandle = fdt_get_phandle(fdt, offset); + g_assert(inst->phandle); + ++vof->of_instance_last; + + inst->path = g_strdup(path); + g_hash_table_insert(vof->of_instances, + GINT_TO_POINTER(vof->of_instance_last), + inst); + ret = vof->of_instance_last; + +trace_exit: + trace_vof_open(path, inst ? inst->phandle : 0, ret); + + return ret; +} + +uint32_t vof_client_open_store(void *fdt, Vof *vof, const char *nodename, + const char *prop, const char *path) +{ + int offset, node = fdt_path_offset(fdt, nodename); + uint32_t inst; + + offset = fdt_path_offset(fdt, path); + if (offset < 0) { + trace_vof_error_unknown_path(path); + return PROM_ERROR; + } + + inst = vof_do_open(fdt, vof, offset, path); + + return fdt_setprop_cell(fdt, node, prop, inst) >= 0 ? 0 : PROM_ERROR; +} + +static uint32_t vof_open(void *fdt, Vof *vof, uint32_t pathaddr) +{ + char path[VOF_MAX_PATH]; + int offset; + + if (readstr(pathaddr, path, sizeof(path))) { + return PROM_ERROR; + } + + offset = path_offset(fdt, path); + if (offset < 0) { + trace_vof_error_unknown_path(path); + return PROM_ERROR; + } + + return vof_do_open(fdt, vof, offset, path); +} + +static void vof_close(Vof *vof, uint32_t ihandle) +{ + if (!g_hash_table_remove(vof->of_instances, GINT_TO_POINTER(ihandle))) { + trace_vof_error_unknown_ihandle_close(ihandle); + } +} + +static uint32_t vof_instance_to_package(Vof *vof, uint32_t ihandle) +{ + gpointer instp = g_hash_table_lookup(vof->of_instances, + GINT_TO_POINTER(ihandle)); + uint32_t ret = PROM_ERROR; + + if (instp) { + ret = ((OfInstance *)instp)->phandle; + } + trace_vof_instance_to_package(ihandle, ret); + + return ret; +} + +static uint32_t vof_package_to_path(const void *fdt, uint32_t phandle, + uint32_t buf, uint32_t len) +{ + int rc; + char tmp[VOF_MAX_PATH] = ""; + + rc = phandle_to_path(fdt, phandle, tmp, sizeof(tmp)); + if (rc > 0) { + if (VOF_MEM_WRITE(buf, tmp, rc) != MEMTX_OK) { + rc = -1; + } + } + + trace_vof_package_to_path(phandle, tmp, rc); + + return rc > 0 ? (uint32_t)rc : PROM_ERROR; +} + +static uint32_t vof_instance_to_path(void *fdt, Vof *vof, uint32_t ihandle, + uint32_t buf, uint32_t len) +{ + int rc = -1; + uint32_t phandle = vof_instance_to_package(vof, ihandle); + char tmp[VOF_MAX_PATH] = ""; + + if (phandle != -1) { + rc = phandle_to_path(fdt, phandle, tmp, sizeof(tmp)); + if (rc > 0) { + if (VOF_MEM_WRITE(buf, tmp, rc) != MEMTX_OK) { + rc = -1; + } + } + } + trace_vof_instance_to_path(ihandle, phandle, tmp, rc); + + return rc > 0 ? (uint32_t)rc : PROM_ERROR; +} + +static uint32_t vof_write(Vof *vof, uint32_t ihandle, uint32_t buf, + uint32_t len) +{ + char tmp[VOF_VTY_BUF_SIZE]; + unsigned cb; + OfInstance *inst = (OfInstance *) + g_hash_table_lookup(vof->of_instances, GINT_TO_POINTER(ihandle)); + + if (!inst) { + trace_vof_error_write(ihandle); + return PROM_ERROR; + } + + for ( ; len > 0; len -= cb) { + cb = MIN(len, sizeof(tmp) - 1); + if (VOF_MEM_READ(buf, tmp, cb) != MEMTX_OK) { + return PROM_ERROR; + } + + /* FIXME: there is no backend(s) yet so just call a trace */ + if (trace_event_get_state(TRACE_VOF_WRITE) && + qemu_loglevel_mask(LOG_TRACE)) { + tmp[cb] = '\0'; + trace_vof_write(ihandle, cb, tmp); + } + } + + return len; +} + +static void vof_claimed_dump(GArray *claimed) +{ + int i; + OfClaimed c; + + if (trace_event_get_state(TRACE_VOF_CLAIMED) && + qemu_loglevel_mask(LOG_TRACE)) { + + for (i = 0; i < claimed->len; ++i) { + c = g_array_index(claimed, OfClaimed, i); + trace_vof_claimed(c.start, c.start + c.size, c.size); + } + } +} + +static bool vof_claim_avail(GArray *claimed, uint64_t virt, uint64_t size) +{ + int i; + OfClaimed c; + + for (i = 0; i < claimed->len; ++i) { + c = g_array_index(claimed, OfClaimed, i); + if (ranges_overlap(c.start, c.size, virt, size)) { + return false; + } + } + + return true; +} + +static void vof_claim_add(GArray *claimed, uint64_t virt, uint64_t size) +{ + OfClaimed newclaim; + + newclaim.start = virt; + newclaim.size = size; + g_array_append_val(claimed, newclaim); +} + +static gint of_claimed_compare_func(gconstpointer a, gconstpointer b) +{ + return ((OfClaimed *)a)->start - ((OfClaimed *)b)->start; +} + +static void vof_dt_memory_available(void *fdt, GArray *claimed, uint64_t base) +{ + int i, n, offset, proplen = 0, sc, ac; + target_ulong mem0_end; + const uint8_t *mem0_reg; + g_autofree uint8_t *avail = NULL; + uint8_t *availcur; + + if (!fdt || !claimed) { + return; + } + + offset = fdt_path_offset(fdt, "/"); + _FDT(offset); + ac = fdt_address_cells(fdt, offset); + g_assert(ac == 1 || ac == 2); + sc = fdt_size_cells(fdt, offset); + g_assert(sc == 1 || sc == 2); + + offset = fdt_path_offset(fdt, "/memory@0"); + _FDT(offset); + + mem0_reg = fdt_getprop(fdt, offset, "reg", &proplen); + g_assert(mem0_reg && proplen == sizeof(uint32_t) * (ac + sc)); + if (sc == 2) { + mem0_end = be64_to_cpu(*(uint64_t *)(mem0_reg + sizeof(uint32_t) * ac)); + } else { + mem0_end = be32_to_cpu(*(uint32_t *)(mem0_reg + sizeof(uint32_t) * ac)); + } + + g_array_sort(claimed, of_claimed_compare_func); + vof_claimed_dump(claimed); + + /* + * VOF resides in the first page so we do not need to check if there is + * available memory before the first claimed block + */ + g_assert(claimed->len && (g_array_index(claimed, OfClaimed, 0).start == 0)); + + avail = g_malloc0(sizeof(uint32_t) * (ac + sc) * claimed->len); + for (i = 0, n = 0, availcur = avail; i < claimed->len; ++i) { + OfClaimed c = g_array_index(claimed, OfClaimed, i); + uint64_t start, size; + + start = c.start + c.size; + if (i < claimed->len - 1) { + OfClaimed cn = g_array_index(claimed, OfClaimed, i + 1); + + size = cn.start - start; + } else { + size = mem0_end - start; + } + + if (ac == 2) { + *(uint64_t *) availcur = cpu_to_be64(start); + } else { + *(uint32_t *) availcur = cpu_to_be32(start); + } + availcur += sizeof(uint32_t) * ac; + if (sc == 2) { + *(uint64_t *) availcur = cpu_to_be64(size); + } else { + *(uint32_t *) availcur = cpu_to_be32(size); + } + availcur += sizeof(uint32_t) * sc; + + if (size) { + trace_vof_avail(c.start + c.size, c.start + c.size + size, size); + ++n; + } + } + _FDT((fdt_setprop(fdt, offset, "available", avail, availcur - avail))); +} + +/* + * OF1275: + * "Allocates size bytes of memory. If align is zero, the allocated range + * begins at the virtual address virt. Otherwise, an aligned address is + * automatically chosen and the input argument virt is ignored". + * + * In other words, exactly one of @virt and @align is non-zero. + */ +uint64_t vof_claim(Vof *vof, uint64_t virt, uint64_t size, + uint64_t align) +{ + uint64_t ret; + + if (size == 0) { + ret = -1; + } else if (align == 0) { + if (!vof_claim_avail(vof->claimed, virt, size)) { + ret = -1; + } else { + ret = virt; + } + } else { + vof->claimed_base = QEMU_ALIGN_UP(vof->claimed_base, align); + while (1) { + if (vof->claimed_base >= vof->top_addr) { + error_report("Out of RMA memory for the OF client"); + return -1; + } + if (vof_claim_avail(vof->claimed, vof->claimed_base, size)) { + break; + } + vof->claimed_base += size; + } + ret = vof->claimed_base; + } + + if (ret != -1) { + vof->claimed_base = MAX(vof->claimed_base, ret + size); + vof_claim_add(vof->claimed, ret, size); + } + trace_vof_claim(virt, size, align, ret); + + return ret; +} + +static uint32_t vof_release(Vof *vof, uint64_t virt, uint64_t size) +{ + uint32_t ret = PROM_ERROR; + int i; + GArray *claimed = vof->claimed; + OfClaimed c; + + for (i = 0; i < claimed->len; ++i) { + c = g_array_index(claimed, OfClaimed, i); + if (c.start == virt && c.size == size) { + g_array_remove_index(claimed, i); + ret = 0; + break; + } + } + + trace_vof_release(virt, size, ret); + + return ret; +} + +static void vof_instantiate_rtas(Error **errp) +{ + error_setg(errp, "The firmware should have instantiated RTAS"); +} + +static uint32_t vof_call_method(MachineState *ms, Vof *vof, uint32_t methodaddr, + uint32_t ihandle, uint32_t param1, + uint32_t param2, uint32_t param3, + uint32_t param4, uint32_t *ret2) +{ + uint32_t ret = PROM_ERROR; + char method[VOF_MAX_METHODLEN] = ""; + OfInstance *inst; + + if (!ihandle) { + goto trace_exit; + } + + inst = (OfInstance *)g_hash_table_lookup(vof->of_instances, + GINT_TO_POINTER(ihandle)); + if (!inst) { + goto trace_exit; + } + + if (readstr(methodaddr, method, sizeof(method))) { + goto trace_exit; + } + + if (strcmp(inst->path, "/") == 0) { + if (strcmp(method, "ibm,client-architecture-support") == 0) { + Object *vmo = object_dynamic_cast(OBJECT(ms), TYPE_VOF_MACHINE_IF); + + if (vmo) { + VofMachineIfClass *vmc = VOF_MACHINE_GET_CLASS(vmo); + + g_assert(vmc->client_architecture_support); + ret = (uint32_t)vmc->client_architecture_support(ms, first_cpu, + param1); + } + + *ret2 = 0; + } + } else if (strcmp(inst->path, "/rtas") == 0) { + if (strcmp(method, "instantiate-rtas") == 0) { + vof_instantiate_rtas(&error_fatal); + ret = 0; + *ret2 = param1; /* rtas-base */ + } + } else { + trace_vof_error_unknown_method(method); + } + +trace_exit: + trace_vof_method(ihandle, method, param1, ret, *ret2); + + return ret; +} + +static uint32_t vof_call_interpret(uint32_t cmdaddr, uint32_t param1, + uint32_t param2, uint32_t *ret2) +{ + uint32_t ret = PROM_ERROR; + char cmd[VOF_MAX_FORTHCODE] = ""; + + /* No interpret implemented so just call a trace */ + readstr(cmdaddr, cmd, sizeof(cmd)); + trace_vof_interpret(cmd, param1, param2, ret, *ret2); + + return ret; +} + +static void vof_quiesce(MachineState *ms, void *fdt, Vof *vof) +{ + Object *vmo = object_dynamic_cast(OBJECT(ms), TYPE_VOF_MACHINE_IF); + /* After "quiesce", no change is expected to the FDT, pack FDT to ensure */ + int rc = fdt_pack(fdt); + + assert(rc == 0); + + if (vmo) { + VofMachineIfClass *vmc = VOF_MACHINE_GET_CLASS(vmo); + + if (vmc->quiesce) { + vmc->quiesce(ms); + } + } + + vof_claimed_dump(vof->claimed); +} + +static uint32_t vof_client_handle(MachineState *ms, void *fdt, Vof *vof, + const char *service, + uint32_t *args, unsigned nargs, + uint32_t *rets, unsigned nrets) +{ + uint32_t ret = 0; + + /* @nrets includes the value which this function returns */ +#define cmpserv(s, a, r) \ + cmpservice(service, nargs, nrets, (s), (a), (r)) + + if (cmpserv("finddevice", 1, 1)) { + ret = vof_finddevice(fdt, args[0]); + } else if (cmpserv("getprop", 4, 1)) { + ret = vof_getprop(fdt, args[0], args[1], args[2], args[3]); + } else if (cmpserv("getproplen", 2, 1)) { + ret = vof_getproplen(fdt, args[0], args[1]); + } else if (cmpserv("setprop", 4, 1)) { + ret = vof_setprop(ms, fdt, vof, args[0], args[1], args[2], args[3]); + } else if (cmpserv("nextprop", 3, 1)) { + ret = vof_nextprop(fdt, args[0], args[1], args[2]); + } else if (cmpserv("peer", 1, 1)) { + ret = vof_peer(fdt, args[0]); + } else if (cmpserv("child", 1, 1)) { + ret = vof_child(fdt, args[0]); + } else if (cmpserv("parent", 1, 1)) { + ret = vof_parent(fdt, args[0]); + } else if (cmpserv("open", 1, 1)) { + ret = vof_open(fdt, vof, args[0]); + } else if (cmpserv("close", 1, 0)) { + vof_close(vof, args[0]); + } else if (cmpserv("instance-to-package", 1, 1)) { + ret = vof_instance_to_package(vof, args[0]); + } else if (cmpserv("package-to-path", 3, 1)) { + ret = vof_package_to_path(fdt, args[0], args[1], args[2]); + } else if (cmpserv("instance-to-path", 3, 1)) { + ret = vof_instance_to_path(fdt, vof, args[0], args[1], args[2]); + } else if (cmpserv("write", 3, 1)) { + ret = vof_write(vof, args[0], args[1], args[2]); + } else if (cmpserv("claim", 3, 1)) { + uint64_t ret64 = vof_claim(vof, args[0], args[1], args[2]); + + if (ret64 < 0x100000000UL) { + vof_dt_memory_available(fdt, vof->claimed, vof->claimed_base); + ret = (uint32_t)ret64; + } else { + if (ret64 != -1) { + vof_release(vof, ret, args[1]); + } + ret = PROM_ERROR; + } + } else if (cmpserv("release", 2, 0)) { + ret = vof_release(vof, args[0], args[1]); + if (ret != PROM_ERROR) { + vof_dt_memory_available(fdt, vof->claimed, vof->claimed_base); + } + } else if (cmpserv("call-method", 0, 0)) { + ret = vof_call_method(ms, vof, args[0], args[1], args[2], args[3], + args[4], args[5], rets); + } else if (cmpserv("interpret", 0, 0)) { + ret = vof_call_interpret(args[0], args[1], args[2], rets); + } else if (cmpserv("milliseconds", 0, 1)) { + ret = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); + } else if (cmpserv("quiesce", 0, 0)) { + vof_quiesce(ms, fdt, vof); + } else if (cmpserv("exit", 0, 0)) { + error_report("Stopped as the VM requested \"exit\""); + vm_stop(RUN_STATE_PAUSED); + } else { + trace_vof_error_unknown_service(service, nargs, nrets); + ret = -1; + } + +#undef cmpserv + + return ret; +} + +/* Defined as Big Endian */ +struct prom_args { + uint32_t service; + uint32_t nargs; + uint32_t nret; + uint32_t args[10]; +} QEMU_PACKED; + +int vof_client_call(MachineState *ms, Vof *vof, void *fdt, + target_ulong args_real) +{ + struct prom_args args_be; + uint32_t args[ARRAY_SIZE(args_be.args)]; + uint32_t rets[ARRAY_SIZE(args_be.args)] = { 0 }, ret; + char service[64]; + unsigned nargs, nret, i; + + if (VOF_MEM_READ(args_real, &args_be, sizeof(args_be)) != MEMTX_OK) { + return -EINVAL; + } + nargs = be32_to_cpu(args_be.nargs); + if (nargs >= ARRAY_SIZE(args_be.args)) { + return -EINVAL; + } + + if (VOF_MEM_READ(be32_to_cpu(args_be.service), service, sizeof(service)) != + MEMTX_OK) { + return -EINVAL; + } + if (strnlen(service, sizeof(service)) == sizeof(service)) { + /* Too long service name */ + return -EINVAL; + } + + for (i = 0; i < nargs; ++i) { + args[i] = be32_to_cpu(args_be.args[i]); + } + + nret = be32_to_cpu(args_be.nret); + if (nret > ARRAY_SIZE(args_be.args) - nargs) { + return -EINVAL; + } + ret = vof_client_handle(ms, fdt, vof, service, args, nargs, rets, nret); + if (!nret) { + return 0; + } + + /* @nrets includes the value which this function returns */ + args_be.args[nargs] = cpu_to_be32(ret); + for (i = 1; i < nret; ++i) { + args_be.args[nargs + i] = cpu_to_be32(rets[i - 1]); + } + + if (VOF_MEM_WRITE(args_real + offsetof(struct prom_args, args[nargs]), + args_be.args + nargs, sizeof(args_be.args[0]) * nret) != + MEMTX_OK) { + return -EINVAL; + } + + return 0; +} + +static void vof_instance_free(gpointer data) +{ + OfInstance *inst = (OfInstance *)data; + + g_free(inst->path); + g_free(inst); +} + +void vof_init(Vof *vof, uint64_t top_addr, Error **errp) +{ + vof_cleanup(vof); + + vof->of_instances = g_hash_table_new_full(g_direct_hash, g_direct_equal, + NULL, vof_instance_free); + vof->claimed = g_array_new(false, false, sizeof(OfClaimed)); + + /* Keep allocations in 32bit as CLI ABI can only return cells==32bit */ + vof->top_addr = MIN(top_addr, 4 * GiB); + if (vof_claim(vof, 0, vof->fw_size, 0) == -1) { + error_setg(errp, "Memory for firmware is in use"); + } +} + +void vof_cleanup(Vof *vof) +{ + if (vof->claimed) { + g_array_unref(vof->claimed); + } + if (vof->of_instances) { + g_hash_table_unref(vof->of_instances); + } + vof->claimed = NULL; + vof->of_instances = NULL; +} + +void vof_build_dt(void *fdt, Vof *vof) +{ + uint32_t phandle = fdt_get_max_phandle(fdt); + int offset, proplen = 0; + const void *prop; + + /* Assign phandles to nodes without predefined phandles (like XICS/XIVE) */ + for (offset = fdt_next_node(fdt, -1, NULL); + offset >= 0; + offset = fdt_next_node(fdt, offset, NULL)) { + prop = fdt_getprop(fdt, offset, "phandle", &proplen); + if (prop) { + continue; + } + ++phandle; + _FDT(fdt_setprop_cell(fdt, offset, "phandle", phandle)); + } + + vof_dt_memory_available(fdt, vof->claimed, vof->claimed_base); +} + +static const TypeInfo vof_machine_if_info = { + .name = TYPE_VOF_MACHINE_IF, + .parent = TYPE_INTERFACE, + .class_size = sizeof(VofMachineIfClass), +}; + +static void vof_machine_if_register_types(void) +{ + type_register_static(&vof_machine_if_info); +} +type_init(vof_machine_if_register_types) |