aboutsummaryrefslogtreecommitdiffstats
path: root/hw/ppc
diff options
context:
space:
mode:
authorTimos Ampelikiotis <t.ampelikiotis@virtualopensystems.com>2023-10-10 11:40:56 +0000
committerTimos Ampelikiotis <t.ampelikiotis@virtualopensystems.com>2023-10-10 11:40:56 +0000
commite02cda008591317b1625707ff8e115a4841aa889 (patch)
treeaee302e3cf8b59ec2d32ec481be3d1afddfc8968 /hw/ppc
parentcc668e6b7e0ffd8c9d130513d12053cf5eda1d3b (diff)
Introduce Virtio-loopback epsilon release:
Epsilon release introduces a new compatibility layer which make virtio-loopback design to work with QEMU and rust-vmm vhost-user backend without require any changes. Signed-off-by: Timos Ampelikiotis <t.ampelikiotis@virtualopensystems.com> Change-Id: I52e57563e08a7d0bdc002f8e928ee61ba0c53dd9
Diffstat (limited to 'hw/ppc')
-rw-r--r--hw/ppc/Kconfig153
-rw-r--r--hw/ppc/e500-ccsr.h18
-rw-r--r--hw/ppc/e500.c1174
-rw-r--r--hw/ppc/e500.h49
-rw-r--r--hw/ppc/e500plat.c122
-rw-r--r--hw/ppc/fdt.c49
-rw-r--r--hw/ppc/fw_cfg.c45
-rw-r--r--hw/ppc/mac.h108
-rw-r--r--hw/ppc/mac_newworld.c663
-rw-r--r--hw/ppc/mac_oldworld.c455
-rw-r--r--hw/ppc/meson.build90
-rw-r--r--hw/ppc/mpc8544_guts.c142
-rw-r--r--hw/ppc/mpc8544ds.c74
-rw-r--r--hw/ppc/pef.c142
-rw-r--r--hw/ppc/pegasos2.c952
-rw-r--r--hw/ppc/pnv.c2132
-rw-r--r--hw/ppc/pnv_bmc.c313
-rw-r--r--hw/ppc/pnv_core.c441
-rw-r--r--hw/ppc/pnv_homer.c382
-rw-r--r--hw/ppc/pnv_lpc.c853
-rw-r--r--hw/ppc/pnv_occ.c302
-rw-r--r--hw/ppc/pnv_pnor.c141
-rw-r--r--hw/ppc/pnv_psi.c967
-rw-r--r--hw/ppc/pnv_xscom.c324
-rw-r--r--hw/ppc/ppc.c1465
-rw-r--r--hw/ppc/ppc405.h72
-rw-r--r--hw/ppc/ppc405_boards.c564
-rw-r--r--hw/ppc/ppc405_uc.c1547
-rw-r--r--hw/ppc/ppc440.h27
-rw-r--r--hw/ppc/ppc440_bamboo.c307
-rw-r--r--hw/ppc/ppc440_pcix.c538
-rw-r--r--hw/ppc/ppc440_uc.c1377
-rw-r--r--hw/ppc/ppc4xx_devs.c715
-rw-r--r--hw/ppc/ppc4xx_pci.c389
-rw-r--r--hw/ppc/ppc_booke.c369
-rw-r--r--hw/ppc/ppce500_spin.c209
-rw-r--r--hw/ppc/prep.c440
-rw-r--r--hw/ppc/prep_systemio.c315
-rw-r--r--hw/ppc/rs6000_mc.c238
-rw-r--r--hw/ppc/sam460ex.c516
-rw-r--r--hw/ppc/spapr.c5136
-rw-r--r--hw/ppc/spapr_caps.c944
-rw-r--r--hw/ppc/spapr_cpu_core.c391
-rw-r--r--hw/ppc/spapr_drc.c1326
-rw-r--r--hw/ppc/spapr_events.c1082
-rw-r--r--hw/ppc/spapr_hcall.c1557
-rw-r--r--hw/ppc/spapr_iommu.c718
-rw-r--r--hw/ppc/spapr_irq.c599
-rw-r--r--hw/ppc/spapr_numa.c697
-rw-r--r--hw/ppc/spapr_nvdimm.c528
-rw-r--r--hw/ppc/spapr_ovec.c241
-rw-r--r--hw/ppc/spapr_pci.c2530
-rw-r--r--hw/ppc/spapr_pci_nvlink2.c445
-rw-r--r--hw/ppc/spapr_pci_vfio.c217
-rw-r--r--hw/ppc/spapr_rng.c162
-rw-r--r--hw/ppc/spapr_rtas.c636
-rw-r--r--hw/ppc/spapr_rtas_ddw.c291
-rw-r--r--hw/ppc/spapr_rtc.c190
-rw-r--r--hw/ppc/spapr_softmmu.c612
-rw-r--r--hw/ppc/spapr_tpm_proxy.c177
-rw-r--r--hw/ppc/spapr_vio.c741
-rw-r--r--hw/ppc/spapr_vof.c167
-rw-r--r--hw/ppc/trace-events143
-rw-r--r--hw/ppc/trace.h1
-rw-r--r--hw/ppc/virtex_ml507.c316
-rw-r--r--hw/ppc/vof.c1062
66 files changed, 40088 insertions, 0 deletions
diff --git a/hw/ppc/Kconfig b/hw/ppc/Kconfig
new file mode 100644
index 000000000..400511c6b
--- /dev/null
+++ b/hw/ppc/Kconfig
@@ -0,0 +1,153 @@
+config PSERIES
+ bool
+ imply PCI_DEVICES
+ imply TEST_DEVICES
+ imply VIRTIO_VGA
+ imply NVDIMM
+ select DIMM
+ select PCI
+ select SPAPR_VSCSI
+ select VFIO if LINUX # needed by spapr_pci_vfio.c
+ select XICS
+ select XIVE
+ select MSI_NONBROKEN
+ select FDT_PPC
+ select CHRP_NVRAM
+ select VOF
+
+config SPAPR_RNG
+ bool
+ default y
+ depends on PSERIES
+
+config POWERNV
+ bool
+ imply PCI_DEVICES
+ imply TEST_DEVICES
+ select ISA_IPMI_BT
+ select IPMI_LOCAL
+ select ISA_BUS
+ select MC146818RTC
+ select XICS
+ select XIVE
+ select FDT_PPC
+ select PCI_POWERNV
+
+config PPC405
+ bool
+ select M48T59
+ select PFLASH_CFI02
+ select PPC4XX
+ select SERIAL
+
+config PPC440
+ bool
+ imply PCI_DEVICES
+ imply TEST_DEVICES
+ imply E1000_PCI
+ select PCI_EXPRESS
+ select PPC4XX
+ select SERIAL
+ select FDT_PPC
+
+config PPC4XX
+ bool
+ select BITBANG_I2C
+ select PCI
+ select PPC_UIC
+
+config SAM460EX
+ bool
+ select PPC405
+ select PFLASH_CFI01
+ select IDE_SII3112
+ select M41T80
+ select PPC440
+ select SERIAL
+ select SM501
+ select SMBUS_EEPROM
+ select USB_EHCI_SYSBUS
+ select USB_OHCI
+ select FDT_PPC
+
+config PEGASOS2
+ bool
+ select MV64361
+ select VT82C686
+ select IDE_VIA
+ select SMBUS_EEPROM
+ select VOF
+# This should come with VT82C686
+ select ACPI_X86
+ imply ATI_VGA
+
+config PREP
+ bool
+ imply PCI_DEVICES
+ imply TEST_DEVICES
+ select CS4231A
+ select RAVEN_PCI
+ select I82378
+ select LSI_SCSI_PCI
+ select M48T59
+ select PC87312
+ select RS6000_MC
+ select FW_CFG_PPC
+
+config RS6000_MC
+ bool
+
+config MAC_OLDWORLD
+ bool
+ imply PCI_DEVICES
+ imply SUNGEM
+ imply TEST_DEVICES
+ select ADB
+ select GRACKLE_PCI
+ select HEATHROW_PIC
+ select MACIO
+ select FW_CFG_PPC
+
+config MAC_NEWWORLD
+ bool
+ imply PCI_DEVICES
+ imply SUNGEM
+ imply TEST_DEVICES
+ select ADB
+ select MACIO
+ select MACIO_GPIO
+ select MAC_PMU
+ select UNIN_PCI
+ select FW_CFG_PPC
+
+config E500
+ bool
+ imply AT24C
+ imply VIRTIO_PCI
+ select ETSEC
+ select OPENPIC
+ select PLATFORM_BUS
+ select PPCE500_PCI
+ select SERIAL
+ select MPC_I2C
+ select FDT_PPC
+ select DS1338
+
+config VIRTEX
+ bool
+ select PPC4XX
+ select PFLASH_CFI01
+ select SERIAL
+ select XILINX
+ select XILINX_ETHLITE
+ select FDT_PPC
+
+# Only used by 64-bit targets
+config FW_CFG_PPC
+ bool
+
+config FDT_PPC
+ bool
+
+config VOF
+ bool
diff --git a/hw/ppc/e500-ccsr.h b/hw/ppc/e500-ccsr.h
new file mode 100644
index 000000000..249c17be3
--- /dev/null
+++ b/hw/ppc/e500-ccsr.h
@@ -0,0 +1,18 @@
+#ifndef E500_CCSR_H
+#define E500_CCSR_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+struct PPCE500CCSRState {
+ /*< private >*/
+ SysBusDevice parent;
+ /*< public >*/
+
+ MemoryRegion ccsr_space;
+};
+
+#define TYPE_CCSR "e500-ccsr"
+OBJECT_DECLARE_SIMPLE_TYPE(PPCE500CCSRState, CCSR)
+
+#endif /* E500_CCSR_H */
diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
new file mode 100644
index 000000000..960e7efcd
--- /dev/null
+++ b/hw/ppc/e500.c
@@ -0,0 +1,1174 @@
+/*
+ * QEMU PowerPC e500-based platforms
+ *
+ * Copyright (C) 2009 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: Yu Liu, <yu.liu@freescale.com>
+ *
+ * This file is derived from hw/ppc440_bamboo.c,
+ * the copyright for that material belongs to the original owners.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/datadir.h"
+#include "qemu/units.h"
+#include "qapi/error.h"
+#include "e500.h"
+#include "e500-ccsr.h"
+#include "net/net.h"
+#include "qemu/config-file.h"
+#include "hw/char/serial.h"
+#include "hw/pci/pci.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/kvm.h"
+#include "sysemu/reset.h"
+#include "sysemu/runstate.h"
+#include "kvm_ppc.h"
+#include "sysemu/device_tree.h"
+#include "hw/ppc/openpic.h"
+#include "hw/ppc/openpic_kvm.h"
+#include "hw/ppc/ppc.h"
+#include "hw/qdev-properties.h"
+#include "hw/loader.h"
+#include "elf.h"
+#include "hw/sysbus.h"
+#include "qemu/host-utils.h"
+#include "qemu/option.h"
+#include "hw/pci-host/ppce500.h"
+#include "qemu/error-report.h"
+#include "hw/platform-bus.h"
+#include "hw/net/fsl_etsec/etsec.h"
+#include "hw/i2c/i2c.h"
+#include "hw/irq.h"
+
+#define EPAPR_MAGIC (0x45504150)
+#define BINARY_DEVICE_TREE_FILE "mpc8544ds.dtb"
+#define DTC_LOAD_PAD 0x1800000
+#define DTC_PAD_MASK 0xFFFFF
+#define DTB_MAX_SIZE (8 * MiB)
+#define INITRD_LOAD_PAD 0x2000000
+#define INITRD_PAD_MASK 0xFFFFFF
+
+#define RAM_SIZES_ALIGN (64 * MiB)
+
+/* TODO: parameterize */
+#define MPC8544_CCSRBAR_SIZE 0x00100000ULL
+#define MPC8544_MPIC_REGS_OFFSET 0x40000ULL
+#define MPC8544_MSI_REGS_OFFSET 0x41600ULL
+#define MPC8544_SERIAL0_REGS_OFFSET 0x4500ULL
+#define MPC8544_SERIAL1_REGS_OFFSET 0x4600ULL
+#define MPC8544_PCI_REGS_OFFSET 0x8000ULL
+#define MPC8544_PCI_REGS_SIZE 0x1000ULL
+#define MPC8544_UTIL_OFFSET 0xe0000ULL
+#define MPC8XXX_GPIO_OFFSET 0x000FF000ULL
+#define MPC8544_I2C_REGS_OFFSET 0x3000ULL
+#define MPC8XXX_GPIO_IRQ 47
+#define MPC8544_I2C_IRQ 43
+#define RTC_REGS_OFFSET 0x68
+
+#define PLATFORM_CLK_FREQ_HZ (400 * 1000 * 1000)
+
+struct boot_info
+{
+ uint32_t dt_base;
+ uint32_t dt_size;
+ uint32_t entry;
+};
+
+static uint32_t *pci_map_create(void *fdt, uint32_t mpic, int first_slot,
+ int nr_slots, int *len)
+{
+ int i = 0;
+ int slot;
+ int pci_irq;
+ int host_irq;
+ int last_slot = first_slot + nr_slots;
+ uint32_t *pci_map;
+
+ *len = nr_slots * 4 * 7 * sizeof(uint32_t);
+ pci_map = g_malloc(*len);
+
+ for (slot = first_slot; slot < last_slot; slot++) {
+ for (pci_irq = 0; pci_irq < 4; pci_irq++) {
+ pci_map[i++] = cpu_to_be32(slot << 11);
+ pci_map[i++] = cpu_to_be32(0x0);
+ pci_map[i++] = cpu_to_be32(0x0);
+ pci_map[i++] = cpu_to_be32(pci_irq + 1);
+ pci_map[i++] = cpu_to_be32(mpic);
+ host_irq = ppce500_pci_map_irq_slot(slot, pci_irq);
+ pci_map[i++] = cpu_to_be32(host_irq + 1);
+ pci_map[i++] = cpu_to_be32(0x1);
+ }
+ }
+
+ assert((i * sizeof(uint32_t)) == *len);
+
+ return pci_map;
+}
+
+static void dt_serial_create(void *fdt, unsigned long long offset,
+ const char *soc, const char *mpic,
+ const char *alias, int idx, bool defcon)
+{
+ char *ser;
+
+ ser = g_strdup_printf("%s/serial@%llx", soc, offset);
+ qemu_fdt_add_subnode(fdt, ser);
+ qemu_fdt_setprop_string(fdt, ser, "device_type", "serial");
+ qemu_fdt_setprop_string(fdt, ser, "compatible", "ns16550");
+ qemu_fdt_setprop_cells(fdt, ser, "reg", offset, 0x100);
+ qemu_fdt_setprop_cell(fdt, ser, "cell-index", idx);
+ qemu_fdt_setprop_cell(fdt, ser, "clock-frequency", PLATFORM_CLK_FREQ_HZ);
+ qemu_fdt_setprop_cells(fdt, ser, "interrupts", 42, 2);
+ qemu_fdt_setprop_phandle(fdt, ser, "interrupt-parent", mpic);
+ qemu_fdt_setprop_string(fdt, "/aliases", alias, ser);
+
+ if (defcon) {
+ /*
+ * "linux,stdout-path" and "stdout" properties are deprecated by linux
+ * kernel. New platforms should only use the "stdout-path" property. Set
+ * the new property and continue using older property to remain
+ * compatible with the existing firmware.
+ */
+ qemu_fdt_setprop_string(fdt, "/chosen", "linux,stdout-path", ser);
+ qemu_fdt_setprop_string(fdt, "/chosen", "stdout-path", ser);
+ }
+ g_free(ser);
+}
+
+static void create_dt_mpc8xxx_gpio(void *fdt, const char *soc, const char *mpic)
+{
+ hwaddr mmio0 = MPC8XXX_GPIO_OFFSET;
+ int irq0 = MPC8XXX_GPIO_IRQ;
+ gchar *node = g_strdup_printf("%s/gpio@%"PRIx64, soc, mmio0);
+ gchar *poweroff = g_strdup_printf("%s/power-off", soc);
+ int gpio_ph;
+
+ qemu_fdt_add_subnode(fdt, node);
+ qemu_fdt_setprop_string(fdt, node, "compatible", "fsl,qoriq-gpio");
+ qemu_fdt_setprop_cells(fdt, node, "reg", mmio0, 0x1000);
+ qemu_fdt_setprop_cells(fdt, node, "interrupts", irq0, 0x2);
+ qemu_fdt_setprop_phandle(fdt, node, "interrupt-parent", mpic);
+ qemu_fdt_setprop_cells(fdt, node, "#gpio-cells", 2);
+ qemu_fdt_setprop(fdt, node, "gpio-controller", NULL, 0);
+ gpio_ph = qemu_fdt_alloc_phandle(fdt);
+ qemu_fdt_setprop_cell(fdt, node, "phandle", gpio_ph);
+ qemu_fdt_setprop_cell(fdt, node, "linux,phandle", gpio_ph);
+
+ /* Power Off Pin */
+ qemu_fdt_add_subnode(fdt, poweroff);
+ qemu_fdt_setprop_string(fdt, poweroff, "compatible", "gpio-poweroff");
+ qemu_fdt_setprop_cells(fdt, poweroff, "gpios", gpio_ph, 0, 0);
+
+ g_free(node);
+ g_free(poweroff);
+}
+
+static void dt_rtc_create(void *fdt, const char *i2c, const char *alias)
+{
+ int offset = RTC_REGS_OFFSET;
+
+ gchar *rtc = g_strdup_printf("%s/rtc@%"PRIx32, i2c, offset);
+ qemu_fdt_add_subnode(fdt, rtc);
+ qemu_fdt_setprop_string(fdt, rtc, "compatible", "pericom,pt7c4338");
+ qemu_fdt_setprop_cells(fdt, rtc, "reg", offset);
+ qemu_fdt_setprop_string(fdt, "/aliases", alias, rtc);
+
+ g_free(rtc);
+}
+
+static void dt_i2c_create(void *fdt, const char *soc, const char *mpic,
+ const char *alias)
+{
+ hwaddr mmio0 = MPC8544_I2C_REGS_OFFSET;
+ int irq0 = MPC8544_I2C_IRQ;
+
+ gchar *i2c = g_strdup_printf("%s/i2c@%"PRIx64, soc, mmio0);
+ qemu_fdt_add_subnode(fdt, i2c);
+ qemu_fdt_setprop_string(fdt, i2c, "device_type", "i2c");
+ qemu_fdt_setprop_string(fdt, i2c, "compatible", "fsl-i2c");
+ qemu_fdt_setprop_cells(fdt, i2c, "reg", mmio0, 0x14);
+ qemu_fdt_setprop_cells(fdt, i2c, "cell-index", 0);
+ qemu_fdt_setprop_cells(fdt, i2c, "interrupts", irq0, 0x2);
+ qemu_fdt_setprop_phandle(fdt, i2c, "interrupt-parent", mpic);
+ qemu_fdt_setprop_string(fdt, "/aliases", alias, i2c);
+
+ g_free(i2c);
+}
+
+
+typedef struct PlatformDevtreeData {
+ void *fdt;
+ const char *mpic;
+ int irq_start;
+ const char *node;
+ PlatformBusDevice *pbus;
+} PlatformDevtreeData;
+
+static int create_devtree_etsec(SysBusDevice *sbdev, PlatformDevtreeData *data)
+{
+ eTSEC *etsec = ETSEC_COMMON(sbdev);
+ PlatformBusDevice *pbus = data->pbus;
+ hwaddr mmio0 = platform_bus_get_mmio_addr(pbus, sbdev, 0);
+ int irq0 = platform_bus_get_irqn(pbus, sbdev, 0);
+ int irq1 = platform_bus_get_irqn(pbus, sbdev, 1);
+ int irq2 = platform_bus_get_irqn(pbus, sbdev, 2);
+ gchar *node = g_strdup_printf("/platform/ethernet@%"PRIx64, mmio0);
+ gchar *group = g_strdup_printf("%s/queue-group", node);
+ void *fdt = data->fdt;
+
+ assert((int64_t)mmio0 >= 0);
+ assert(irq0 >= 0);
+ assert(irq1 >= 0);
+ assert(irq2 >= 0);
+
+ qemu_fdt_add_subnode(fdt, node);
+ qemu_fdt_setprop(fdt, node, "ranges", NULL, 0);
+ qemu_fdt_setprop_string(fdt, node, "device_type", "network");
+ qemu_fdt_setprop_string(fdt, node, "compatible", "fsl,etsec2");
+ qemu_fdt_setprop_string(fdt, node, "model", "eTSEC");
+ qemu_fdt_setprop(fdt, node, "local-mac-address", etsec->conf.macaddr.a, 6);
+ qemu_fdt_setprop_cells(fdt, node, "fixed-link", 0, 1, 1000, 0, 0);
+ qemu_fdt_setprop_cells(fdt, node, "#size-cells", 1);
+ qemu_fdt_setprop_cells(fdt, node, "#address-cells", 1);
+
+ qemu_fdt_add_subnode(fdt, group);
+ qemu_fdt_setprop_cells(fdt, group, "reg", mmio0, 0x1000);
+ qemu_fdt_setprop_cells(fdt, group, "interrupts",
+ data->irq_start + irq0, 0x2,
+ data->irq_start + irq1, 0x2,
+ data->irq_start + irq2, 0x2);
+
+ g_free(node);
+ g_free(group);
+
+ return 0;
+}
+
+static void sysbus_device_create_devtree(SysBusDevice *sbdev, void *opaque)
+{
+ PlatformDevtreeData *data = opaque;
+ bool matched = false;
+
+ if (object_dynamic_cast(OBJECT(sbdev), TYPE_ETSEC_COMMON)) {
+ create_devtree_etsec(sbdev, data);
+ matched = true;
+ }
+
+ if (!matched) {
+ error_report("Device %s is not supported by this machine yet.",
+ qdev_fw_name(DEVICE(sbdev)));
+ exit(1);
+ }
+}
+
+static void platform_bus_create_devtree(PPCE500MachineState *pms,
+ void *fdt, const char *mpic)
+{
+ const PPCE500MachineClass *pmc = PPCE500_MACHINE_GET_CLASS(pms);
+ gchar *node = g_strdup_printf("/platform@%"PRIx64, pmc->platform_bus_base);
+ const char platcomp[] = "qemu,platform\0simple-bus";
+ uint64_t addr = pmc->platform_bus_base;
+ uint64_t size = pmc->platform_bus_size;
+ int irq_start = pmc->platform_bus_first_irq;
+
+ /* Create a /platform node that we can put all devices into */
+
+ qemu_fdt_add_subnode(fdt, node);
+ qemu_fdt_setprop(fdt, node, "compatible", platcomp, sizeof(platcomp));
+
+ /* Our platform bus region is less than 32bit big, so 1 cell is enough for
+ address and size */
+ qemu_fdt_setprop_cells(fdt, node, "#size-cells", 1);
+ qemu_fdt_setprop_cells(fdt, node, "#address-cells", 1);
+ qemu_fdt_setprop_cells(fdt, node, "ranges", 0, addr >> 32, addr, size);
+
+ qemu_fdt_setprop_phandle(fdt, node, "interrupt-parent", mpic);
+
+ /* Create dt nodes for dynamic devices */
+ PlatformDevtreeData data = {
+ .fdt = fdt,
+ .mpic = mpic,
+ .irq_start = irq_start,
+ .node = node,
+ .pbus = pms->pbus_dev,
+ };
+
+ /* Loop through all dynamic sysbus devices and create nodes for them */
+ foreach_dynamic_sysbus_device(sysbus_device_create_devtree, &data);
+
+ g_free(node);
+}
+
+static int ppce500_load_device_tree(PPCE500MachineState *pms,
+ hwaddr addr,
+ hwaddr initrd_base,
+ hwaddr initrd_size,
+ hwaddr kernel_base,
+ hwaddr kernel_size,
+ bool dry_run)
+{
+ MachineState *machine = MACHINE(pms);
+ unsigned int smp_cpus = machine->smp.cpus;
+ const PPCE500MachineClass *pmc = PPCE500_MACHINE_GET_CLASS(pms);
+ CPUPPCState *env = first_cpu->env_ptr;
+ int ret = -1;
+ uint64_t mem_reg_property[] = { 0, cpu_to_be64(machine->ram_size) };
+ int fdt_size;
+ void *fdt;
+ uint8_t hypercall[16];
+ uint32_t clock_freq = PLATFORM_CLK_FREQ_HZ;
+ uint32_t tb_freq = PLATFORM_CLK_FREQ_HZ;
+ int i;
+ char compatible_sb[] = "fsl,mpc8544-immr\0simple-bus";
+ char *soc;
+ char *mpic;
+ uint32_t mpic_ph;
+ uint32_t msi_ph;
+ char *gutil;
+ char *pci;
+ char *msi;
+ uint32_t *pci_map = NULL;
+ int len;
+ uint32_t pci_ranges[14] =
+ {
+ 0x2000000, 0x0, pmc->pci_mmio_bus_base,
+ pmc->pci_mmio_base >> 32, pmc->pci_mmio_base,
+ 0x0, 0x20000000,
+
+ 0x1000000, 0x0, 0x0,
+ pmc->pci_pio_base >> 32, pmc->pci_pio_base,
+ 0x0, 0x10000,
+ };
+ const char *dtb_file = machine->dtb;
+ const char *toplevel_compat = machine->dt_compatible;
+
+ if (dtb_file) {
+ char *filename;
+ filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, dtb_file);
+ if (!filename) {
+ goto out;
+ }
+
+ fdt = load_device_tree(filename, &fdt_size);
+ g_free(filename);
+ if (!fdt) {
+ goto out;
+ }
+ goto done;
+ }
+
+ fdt = create_device_tree(&fdt_size);
+ if (fdt == NULL) {
+ goto out;
+ }
+
+ /* Manipulate device tree in memory. */
+ qemu_fdt_setprop_cell(fdt, "/", "#address-cells", 2);
+ qemu_fdt_setprop_cell(fdt, "/", "#size-cells", 2);
+
+ qemu_fdt_add_subnode(fdt, "/memory");
+ qemu_fdt_setprop_string(fdt, "/memory", "device_type", "memory");
+ qemu_fdt_setprop(fdt, "/memory", "reg", mem_reg_property,
+ sizeof(mem_reg_property));
+
+ qemu_fdt_add_subnode(fdt, "/chosen");
+ if (initrd_size) {
+ ret = qemu_fdt_setprop_cell(fdt, "/chosen", "linux,initrd-start",
+ initrd_base);
+ if (ret < 0) {
+ fprintf(stderr, "couldn't set /chosen/linux,initrd-start\n");
+ }
+
+ ret = qemu_fdt_setprop_cell(fdt, "/chosen", "linux,initrd-end",
+ (initrd_base + initrd_size));
+ if (ret < 0) {
+ fprintf(stderr, "couldn't set /chosen/linux,initrd-end\n");
+ }
+
+ }
+
+ if (kernel_base != -1ULL) {
+ qemu_fdt_setprop_cells(fdt, "/chosen", "qemu,boot-kernel",
+ kernel_base >> 32, kernel_base,
+ kernel_size >> 32, kernel_size);
+ }
+
+ ret = qemu_fdt_setprop_string(fdt, "/chosen", "bootargs",
+ machine->kernel_cmdline);
+ if (ret < 0)
+ fprintf(stderr, "couldn't set /chosen/bootargs\n");
+
+ if (kvm_enabled()) {
+ /* Read out host's frequencies */
+ clock_freq = kvmppc_get_clockfreq();
+ tb_freq = kvmppc_get_tbfreq();
+
+ /* indicate KVM hypercall interface */
+ qemu_fdt_add_subnode(fdt, "/hypervisor");
+ qemu_fdt_setprop_string(fdt, "/hypervisor", "compatible",
+ "linux,kvm");
+ kvmppc_get_hypercall(env, hypercall, sizeof(hypercall));
+ qemu_fdt_setprop(fdt, "/hypervisor", "hcall-instructions",
+ hypercall, sizeof(hypercall));
+ /* if KVM supports the idle hcall, set property indicating this */
+ if (kvmppc_get_hasidle(env)) {
+ qemu_fdt_setprop(fdt, "/hypervisor", "has-idle", NULL, 0);
+ }
+ }
+
+ /* Create CPU nodes */
+ qemu_fdt_add_subnode(fdt, "/cpus");
+ qemu_fdt_setprop_cell(fdt, "/cpus", "#address-cells", 1);
+ qemu_fdt_setprop_cell(fdt, "/cpus", "#size-cells", 0);
+
+ /* We need to generate the cpu nodes in reverse order, so Linux can pick
+ the first node as boot node and be happy */
+ for (i = smp_cpus - 1; i >= 0; i--) {
+ CPUState *cpu;
+ char *cpu_name;
+ uint64_t cpu_release_addr = pmc->spin_base + (i * 0x20);
+
+ cpu = qemu_get_cpu(i);
+ if (cpu == NULL) {
+ continue;
+ }
+ env = cpu->env_ptr;
+
+ cpu_name = g_strdup_printf("/cpus/PowerPC,8544@%x", i);
+ qemu_fdt_add_subnode(fdt, cpu_name);
+ qemu_fdt_setprop_cell(fdt, cpu_name, "clock-frequency", clock_freq);
+ qemu_fdt_setprop_cell(fdt, cpu_name, "timebase-frequency", tb_freq);
+ qemu_fdt_setprop_string(fdt, cpu_name, "device_type", "cpu");
+ qemu_fdt_setprop_cell(fdt, cpu_name, "reg", i);
+ qemu_fdt_setprop_cell(fdt, cpu_name, "d-cache-line-size",
+ env->dcache_line_size);
+ qemu_fdt_setprop_cell(fdt, cpu_name, "i-cache-line-size",
+ env->icache_line_size);
+ qemu_fdt_setprop_cell(fdt, cpu_name, "d-cache-size", 0x8000);
+ qemu_fdt_setprop_cell(fdt, cpu_name, "i-cache-size", 0x8000);
+ qemu_fdt_setprop_cell(fdt, cpu_name, "bus-frequency", 0);
+ if (cpu->cpu_index) {
+ qemu_fdt_setprop_string(fdt, cpu_name, "status", "disabled");
+ qemu_fdt_setprop_string(fdt, cpu_name, "enable-method",
+ "spin-table");
+ qemu_fdt_setprop_u64(fdt, cpu_name, "cpu-release-addr",
+ cpu_release_addr);
+ } else {
+ qemu_fdt_setprop_string(fdt, cpu_name, "status", "okay");
+ }
+ g_free(cpu_name);
+ }
+
+ qemu_fdt_add_subnode(fdt, "/aliases");
+ /* XXX These should go into their respective devices' code */
+ soc = g_strdup_printf("/soc@%"PRIx64, pmc->ccsrbar_base);
+ qemu_fdt_add_subnode(fdt, soc);
+ qemu_fdt_setprop_string(fdt, soc, "device_type", "soc");
+ qemu_fdt_setprop(fdt, soc, "compatible", compatible_sb,
+ sizeof(compatible_sb));
+ qemu_fdt_setprop_cell(fdt, soc, "#address-cells", 1);
+ qemu_fdt_setprop_cell(fdt, soc, "#size-cells", 1);
+ qemu_fdt_setprop_cells(fdt, soc, "ranges", 0x0,
+ pmc->ccsrbar_base >> 32, pmc->ccsrbar_base,
+ MPC8544_CCSRBAR_SIZE);
+ /* XXX should contain a reasonable value */
+ qemu_fdt_setprop_cell(fdt, soc, "bus-frequency", 0);
+
+ mpic = g_strdup_printf("%s/pic@%llx", soc, MPC8544_MPIC_REGS_OFFSET);
+ qemu_fdt_add_subnode(fdt, mpic);
+ qemu_fdt_setprop_string(fdt, mpic, "device_type", "open-pic");
+ qemu_fdt_setprop_string(fdt, mpic, "compatible", "fsl,mpic");
+ qemu_fdt_setprop_cells(fdt, mpic, "reg", MPC8544_MPIC_REGS_OFFSET,
+ 0x40000);
+ qemu_fdt_setprop_cell(fdt, mpic, "#address-cells", 0);
+ qemu_fdt_setprop_cell(fdt, mpic, "#interrupt-cells", 2);
+ mpic_ph = qemu_fdt_alloc_phandle(fdt);
+ qemu_fdt_setprop_cell(fdt, mpic, "phandle", mpic_ph);
+ qemu_fdt_setprop_cell(fdt, mpic, "linux,phandle", mpic_ph);
+ qemu_fdt_setprop(fdt, mpic, "interrupt-controller", NULL, 0);
+
+ /*
+ * We have to generate ser1 first, because Linux takes the first
+ * device it finds in the dt as serial output device. And we generate
+ * devices in reverse order to the dt.
+ */
+ if (serial_hd(1)) {
+ dt_serial_create(fdt, MPC8544_SERIAL1_REGS_OFFSET,
+ soc, mpic, "serial1", 1, false);
+ }
+
+ if (serial_hd(0)) {
+ dt_serial_create(fdt, MPC8544_SERIAL0_REGS_OFFSET,
+ soc, mpic, "serial0", 0, true);
+ }
+
+ /* i2c */
+ dt_i2c_create(fdt, soc, mpic, "i2c");
+
+ dt_rtc_create(fdt, "i2c", "rtc");
+
+
+ gutil = g_strdup_printf("%s/global-utilities@%llx", soc,
+ MPC8544_UTIL_OFFSET);
+ qemu_fdt_add_subnode(fdt, gutil);
+ qemu_fdt_setprop_string(fdt, gutil, "compatible", "fsl,mpc8544-guts");
+ qemu_fdt_setprop_cells(fdt, gutil, "reg", MPC8544_UTIL_OFFSET, 0x1000);
+ qemu_fdt_setprop(fdt, gutil, "fsl,has-rstcr", NULL, 0);
+ g_free(gutil);
+
+ msi = g_strdup_printf("/%s/msi@%llx", soc, MPC8544_MSI_REGS_OFFSET);
+ qemu_fdt_add_subnode(fdt, msi);
+ qemu_fdt_setprop_string(fdt, msi, "compatible", "fsl,mpic-msi");
+ qemu_fdt_setprop_cells(fdt, msi, "reg", MPC8544_MSI_REGS_OFFSET, 0x200);
+ msi_ph = qemu_fdt_alloc_phandle(fdt);
+ qemu_fdt_setprop_cells(fdt, msi, "msi-available-ranges", 0x0, 0x100);
+ qemu_fdt_setprop_phandle(fdt, msi, "interrupt-parent", mpic);
+ qemu_fdt_setprop_cells(fdt, msi, "interrupts",
+ 0xe0, 0x0,
+ 0xe1, 0x0,
+ 0xe2, 0x0,
+ 0xe3, 0x0,
+ 0xe4, 0x0,
+ 0xe5, 0x0,
+ 0xe6, 0x0,
+ 0xe7, 0x0);
+ qemu_fdt_setprop_cell(fdt, msi, "phandle", msi_ph);
+ qemu_fdt_setprop_cell(fdt, msi, "linux,phandle", msi_ph);
+ g_free(msi);
+
+ pci = g_strdup_printf("/pci@%llx",
+ pmc->ccsrbar_base + MPC8544_PCI_REGS_OFFSET);
+ qemu_fdt_add_subnode(fdt, pci);
+ qemu_fdt_setprop_cell(fdt, pci, "cell-index", 0);
+ qemu_fdt_setprop_string(fdt, pci, "compatible", "fsl,mpc8540-pci");
+ qemu_fdt_setprop_string(fdt, pci, "device_type", "pci");
+ qemu_fdt_setprop_cells(fdt, pci, "interrupt-map-mask", 0xf800, 0x0,
+ 0x0, 0x7);
+ pci_map = pci_map_create(fdt, qemu_fdt_get_phandle(fdt, mpic),
+ pmc->pci_first_slot, pmc->pci_nr_slots,
+ &len);
+ qemu_fdt_setprop(fdt, pci, "interrupt-map", pci_map, len);
+ qemu_fdt_setprop_phandle(fdt, pci, "interrupt-parent", mpic);
+ qemu_fdt_setprop_cells(fdt, pci, "interrupts", 24, 2);
+ qemu_fdt_setprop_cells(fdt, pci, "bus-range", 0, 255);
+ for (i = 0; i < 14; i++) {
+ pci_ranges[i] = cpu_to_be32(pci_ranges[i]);
+ }
+ qemu_fdt_setprop_cell(fdt, pci, "fsl,msi", msi_ph);
+ qemu_fdt_setprop(fdt, pci, "ranges", pci_ranges, sizeof(pci_ranges));
+ qemu_fdt_setprop_cells(fdt, pci, "reg",
+ (pmc->ccsrbar_base + MPC8544_PCI_REGS_OFFSET) >> 32,
+ (pmc->ccsrbar_base + MPC8544_PCI_REGS_OFFSET),
+ 0, 0x1000);
+ qemu_fdt_setprop_cell(fdt, pci, "clock-frequency", 66666666);
+ qemu_fdt_setprop_cell(fdt, pci, "#interrupt-cells", 1);
+ qemu_fdt_setprop_cell(fdt, pci, "#size-cells", 2);
+ qemu_fdt_setprop_cell(fdt, pci, "#address-cells", 3);
+ qemu_fdt_setprop_string(fdt, "/aliases", "pci0", pci);
+ g_free(pci);
+
+ if (pmc->has_mpc8xxx_gpio) {
+ create_dt_mpc8xxx_gpio(fdt, soc, mpic);
+ }
+ g_free(soc);
+
+ if (pms->pbus_dev) {
+ platform_bus_create_devtree(pms, fdt, mpic);
+ }
+ g_free(mpic);
+
+ pmc->fixup_devtree(fdt);
+
+ if (toplevel_compat) {
+ qemu_fdt_setprop(fdt, "/", "compatible", toplevel_compat,
+ strlen(toplevel_compat) + 1);
+ }
+
+done:
+ if (!dry_run) {
+ qemu_fdt_dumpdtb(fdt, fdt_size);
+ cpu_physical_memory_write(addr, fdt, fdt_size);
+ }
+ ret = fdt_size;
+ g_free(fdt);
+
+out:
+ g_free(pci_map);
+
+ return ret;
+}
+
+typedef struct DeviceTreeParams {
+ PPCE500MachineState *machine;
+ hwaddr addr;
+ hwaddr initrd_base;
+ hwaddr initrd_size;
+ hwaddr kernel_base;
+ hwaddr kernel_size;
+ Notifier notifier;
+} DeviceTreeParams;
+
+static void ppce500_reset_device_tree(void *opaque)
+{
+ DeviceTreeParams *p = opaque;
+ ppce500_load_device_tree(p->machine, p->addr, p->initrd_base,
+ p->initrd_size, p->kernel_base, p->kernel_size,
+ false);
+}
+
+static void ppce500_init_notify(Notifier *notifier, void *data)
+{
+ DeviceTreeParams *p = container_of(notifier, DeviceTreeParams, notifier);
+ ppce500_reset_device_tree(p);
+}
+
+static int ppce500_prep_device_tree(PPCE500MachineState *machine,
+ hwaddr addr,
+ hwaddr initrd_base,
+ hwaddr initrd_size,
+ hwaddr kernel_base,
+ hwaddr kernel_size)
+{
+ DeviceTreeParams *p = g_new(DeviceTreeParams, 1);
+ p->machine = machine;
+ p->addr = addr;
+ p->initrd_base = initrd_base;
+ p->initrd_size = initrd_size;
+ p->kernel_base = kernel_base;
+ p->kernel_size = kernel_size;
+
+ qemu_register_reset(ppce500_reset_device_tree, p);
+ p->notifier.notify = ppce500_init_notify;
+ qemu_add_machine_init_done_notifier(&p->notifier);
+
+ /* Issue the device tree loader once, so that we get the size of the blob */
+ return ppce500_load_device_tree(machine, addr, initrd_base, initrd_size,
+ kernel_base, kernel_size, true);
+}
+
+/* Create -kernel TLB entries for BookE. */
+hwaddr booke206_page_size_to_tlb(uint64_t size)
+{
+ return 63 - clz64(size / KiB);
+}
+
+static int booke206_initial_map_tsize(CPUPPCState *env)
+{
+ struct boot_info *bi = env->load_info;
+ hwaddr dt_end;
+ int ps;
+
+ /* Our initial TLB entry needs to cover everything from 0 to
+ the device tree top */
+ dt_end = bi->dt_base + bi->dt_size;
+ ps = booke206_page_size_to_tlb(dt_end) + 1;
+ if (ps & 1) {
+ /* e500v2 can only do even TLB size bits */
+ ps++;
+ }
+ return ps;
+}
+
+static uint64_t mmubooke_initial_mapsize(CPUPPCState *env)
+{
+ int tsize;
+
+ tsize = booke206_initial_map_tsize(env);
+ return (1ULL << 10 << tsize);
+}
+
+static void mmubooke_create_initial_mapping(CPUPPCState *env)
+{
+ ppcmas_tlb_t *tlb = booke206_get_tlbm(env, 1, 0, 0);
+ hwaddr size;
+ int ps;
+
+ ps = booke206_initial_map_tsize(env);
+ size = (ps << MAS1_TSIZE_SHIFT);
+ tlb->mas1 = MAS1_VALID | size;
+ tlb->mas2 = 0;
+ tlb->mas7_3 = 0;
+ tlb->mas7_3 |= MAS3_UR | MAS3_UW | MAS3_UX | MAS3_SR | MAS3_SW | MAS3_SX;
+
+ env->tlb_dirty = true;
+}
+
+static void ppce500_cpu_reset_sec(void *opaque)
+{
+ PowerPCCPU *cpu = opaque;
+ CPUState *cs = CPU(cpu);
+
+ cpu_reset(cs);
+
+ cs->exception_index = EXCP_HLT;
+}
+
+static void ppce500_cpu_reset(void *opaque)
+{
+ PowerPCCPU *cpu = opaque;
+ CPUState *cs = CPU(cpu);
+ CPUPPCState *env = &cpu->env;
+ struct boot_info *bi = env->load_info;
+
+ cpu_reset(cs);
+
+ /* Set initial guest state. */
+ cs->halted = 0;
+ env->gpr[1] = (16 * MiB) - 8;
+ env->gpr[3] = bi->dt_base;
+ env->gpr[4] = 0;
+ env->gpr[5] = 0;
+ env->gpr[6] = EPAPR_MAGIC;
+ env->gpr[7] = mmubooke_initial_mapsize(env);
+ env->gpr[8] = 0;
+ env->gpr[9] = 0;
+ env->nip = bi->entry;
+ mmubooke_create_initial_mapping(env);
+}
+
+static DeviceState *ppce500_init_mpic_qemu(PPCE500MachineState *pms,
+ IrqLines *irqs)
+{
+ DeviceState *dev;
+ SysBusDevice *s;
+ int i, j, k;
+ MachineState *machine = MACHINE(pms);
+ unsigned int smp_cpus = machine->smp.cpus;
+ const PPCE500MachineClass *pmc = PPCE500_MACHINE_GET_CLASS(pms);
+
+ dev = qdev_new(TYPE_OPENPIC);
+ object_property_add_child(OBJECT(machine), "pic", OBJECT(dev));
+ qdev_prop_set_uint32(dev, "model", pmc->mpic_version);
+ qdev_prop_set_uint32(dev, "nb_cpus", smp_cpus);
+
+ s = SYS_BUS_DEVICE(dev);
+ sysbus_realize_and_unref(s, &error_fatal);
+
+ k = 0;
+ for (i = 0; i < smp_cpus; i++) {
+ for (j = 0; j < OPENPIC_OUTPUT_NB; j++) {
+ sysbus_connect_irq(s, k++, irqs[i].irq[j]);
+ }
+ }
+
+ return dev;
+}
+
+static DeviceState *ppce500_init_mpic_kvm(const PPCE500MachineClass *pmc,
+ IrqLines *irqs, Error **errp)
+{
+ DeviceState *dev;
+ CPUState *cs;
+
+ dev = qdev_new(TYPE_KVM_OPENPIC);
+ qdev_prop_set_uint32(dev, "model", pmc->mpic_version);
+
+ if (!sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), errp)) {
+ object_unparent(OBJECT(dev));
+ return NULL;
+ }
+
+ CPU_FOREACH(cs) {
+ if (kvm_openpic_connect_vcpu(dev, cs)) {
+ fprintf(stderr, "%s: failed to connect vcpu to irqchip\n",
+ __func__);
+ abort();
+ }
+ }
+
+ return dev;
+}
+
+static DeviceState *ppce500_init_mpic(PPCE500MachineState *pms,
+ MemoryRegion *ccsr,
+ IrqLines *irqs)
+{
+ const PPCE500MachineClass *pmc = PPCE500_MACHINE_GET_CLASS(pms);
+ DeviceState *dev = NULL;
+ SysBusDevice *s;
+
+ if (kvm_enabled()) {
+ Error *err = NULL;
+
+ if (kvm_kernel_irqchip_allowed()) {
+ dev = ppce500_init_mpic_kvm(pmc, irqs, &err);
+ }
+ if (kvm_kernel_irqchip_required() && !dev) {
+ error_reportf_err(err,
+ "kernel_irqchip requested but unavailable: ");
+ exit(1);
+ }
+ }
+
+ if (!dev) {
+ dev = ppce500_init_mpic_qemu(pms, irqs);
+ }
+
+ s = SYS_BUS_DEVICE(dev);
+ memory_region_add_subregion(ccsr, MPC8544_MPIC_REGS_OFFSET,
+ s->mmio[0].memory);
+
+ return dev;
+}
+
+static void ppce500_power_off(void *opaque, int line, int on)
+{
+ if (on) {
+ qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
+ }
+}
+
+void ppce500_init(MachineState *machine)
+{
+ MemoryRegion *address_space_mem = get_system_memory();
+ PPCE500MachineState *pms = PPCE500_MACHINE(machine);
+ const PPCE500MachineClass *pmc = PPCE500_MACHINE_GET_CLASS(machine);
+ PCIBus *pci_bus;
+ CPUPPCState *env = NULL;
+ uint64_t loadaddr;
+ hwaddr kernel_base = -1LL;
+ int kernel_size = 0;
+ hwaddr dt_base = 0;
+ hwaddr initrd_base = 0;
+ int initrd_size = 0;
+ hwaddr cur_base = 0;
+ char *filename;
+ const char *payload_name;
+ bool kernel_as_payload;
+ hwaddr bios_entry = 0;
+ target_long payload_size;
+ struct boot_info *boot_info;
+ int dt_size;
+ int i;
+ unsigned int smp_cpus = machine->smp.cpus;
+ /* irq num for pin INTA, INTB, INTC and INTD is 1, 2, 3 and
+ * 4 respectively */
+ unsigned int pci_irq_nrs[PCI_NUM_PINS] = {1, 2, 3, 4};
+ IrqLines *irqs;
+ DeviceState *dev, *mpicdev;
+ CPUPPCState *firstenv = NULL;
+ MemoryRegion *ccsr_addr_space;
+ SysBusDevice *s;
+ PPCE500CCSRState *ccsr;
+ I2CBus *i2c;
+
+ irqs = g_new0(IrqLines, smp_cpus);
+ for (i = 0; i < smp_cpus; i++) {
+ PowerPCCPU *cpu;
+ CPUState *cs;
+ qemu_irq *input;
+
+ cpu = POWERPC_CPU(object_new(machine->cpu_type));
+ env = &cpu->env;
+ cs = CPU(cpu);
+
+ if (env->mmu_model != POWERPC_MMU_BOOKE206) {
+ error_report("MMU model %i not supported by this machine",
+ env->mmu_model);
+ exit(1);
+ }
+
+ /*
+ * Secondary CPU starts in halted state for now. Needs to change
+ * when implementing non-kernel boot.
+ */
+ object_property_set_bool(OBJECT(cs), "start-powered-off", i != 0,
+ &error_fatal);
+ qdev_realize_and_unref(DEVICE(cs), NULL, &error_fatal);
+
+ if (!firstenv) {
+ firstenv = env;
+ }
+
+ input = (qemu_irq *)env->irq_inputs;
+ irqs[i].irq[OPENPIC_OUTPUT_INT] = input[PPCE500_INPUT_INT];
+ irqs[i].irq[OPENPIC_OUTPUT_CINT] = input[PPCE500_INPUT_CINT];
+ env->spr_cb[SPR_BOOKE_PIR].default_value = cs->cpu_index = i;
+ env->mpic_iack = pmc->ccsrbar_base + MPC8544_MPIC_REGS_OFFSET + 0xa0;
+
+ ppc_booke_timers_init(cpu, PLATFORM_CLK_FREQ_HZ, PPC_TIMER_E500);
+
+ /* Register reset handler */
+ if (!i) {
+ /* Primary CPU */
+ struct boot_info *boot_info;
+ boot_info = g_malloc0(sizeof(struct boot_info));
+ qemu_register_reset(ppce500_cpu_reset, cpu);
+ env->load_info = boot_info;
+ } else {
+ /* Secondary CPUs */
+ qemu_register_reset(ppce500_cpu_reset_sec, cpu);
+ }
+ }
+
+ env = firstenv;
+
+ if (!QEMU_IS_ALIGNED(machine->ram_size, RAM_SIZES_ALIGN)) {
+ error_report("RAM size must be multiple of %" PRIu64, RAM_SIZES_ALIGN);
+ exit(EXIT_FAILURE);
+ }
+
+ /* Register Memory */
+ memory_region_add_subregion(address_space_mem, 0, machine->ram);
+
+ dev = qdev_new("e500-ccsr");
+ object_property_add_child(qdev_get_machine(), "e500-ccsr",
+ OBJECT(dev));
+ sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+ ccsr = CCSR(dev);
+ ccsr_addr_space = &ccsr->ccsr_space;
+ memory_region_add_subregion(address_space_mem, pmc->ccsrbar_base,
+ ccsr_addr_space);
+
+ mpicdev = ppce500_init_mpic(pms, ccsr_addr_space, irqs);
+ g_free(irqs);
+
+ /* Serial */
+ if (serial_hd(0)) {
+ serial_mm_init(ccsr_addr_space, MPC8544_SERIAL0_REGS_OFFSET,
+ 0, qdev_get_gpio_in(mpicdev, 42), 399193,
+ serial_hd(0), DEVICE_BIG_ENDIAN);
+ }
+
+ if (serial_hd(1)) {
+ serial_mm_init(ccsr_addr_space, MPC8544_SERIAL1_REGS_OFFSET,
+ 0, qdev_get_gpio_in(mpicdev, 42), 399193,
+ serial_hd(1), DEVICE_BIG_ENDIAN);
+ }
+ /* I2C */
+ dev = qdev_new("mpc-i2c");
+ s = SYS_BUS_DEVICE(dev);
+ sysbus_realize_and_unref(s, &error_fatal);
+ sysbus_connect_irq(s, 0, qdev_get_gpio_in(mpicdev, MPC8544_I2C_IRQ));
+ memory_region_add_subregion(ccsr_addr_space, MPC8544_I2C_REGS_OFFSET,
+ sysbus_mmio_get_region(s, 0));
+ i2c = (I2CBus *)qdev_get_child_bus(dev, "i2c");
+ i2c_slave_create_simple(i2c, "ds1338", RTC_REGS_OFFSET);
+
+
+ /* General Utility device */
+ dev = qdev_new("mpc8544-guts");
+ s = SYS_BUS_DEVICE(dev);
+ sysbus_realize_and_unref(s, &error_fatal);
+ memory_region_add_subregion(ccsr_addr_space, MPC8544_UTIL_OFFSET,
+ sysbus_mmio_get_region(s, 0));
+
+ /* PCI */
+ dev = qdev_new("e500-pcihost");
+ object_property_add_child(qdev_get_machine(), "pci-host", OBJECT(dev));
+ qdev_prop_set_uint32(dev, "first_slot", pmc->pci_first_slot);
+ qdev_prop_set_uint32(dev, "first_pin_irq", pci_irq_nrs[0]);
+ s = SYS_BUS_DEVICE(dev);
+ sysbus_realize_and_unref(s, &error_fatal);
+ for (i = 0; i < PCI_NUM_PINS; i++) {
+ sysbus_connect_irq(s, i, qdev_get_gpio_in(mpicdev, pci_irq_nrs[i]));
+ }
+
+ memory_region_add_subregion(ccsr_addr_space, MPC8544_PCI_REGS_OFFSET,
+ sysbus_mmio_get_region(s, 0));
+
+ pci_bus = (PCIBus *)qdev_get_child_bus(dev, "pci.0");
+ if (!pci_bus)
+ printf("couldn't create PCI controller!\n");
+
+ if (pci_bus) {
+ /* Register network interfaces. */
+ for (i = 0; i < nb_nics; i++) {
+ pci_nic_init_nofail(&nd_table[i], pci_bus, "virtio-net-pci", NULL);
+ }
+ }
+
+ /* Register spinning region */
+ sysbus_create_simple("e500-spin", pmc->spin_base, NULL);
+
+ if (pmc->has_mpc8xxx_gpio) {
+ qemu_irq poweroff_irq;
+
+ dev = qdev_new("mpc8xxx_gpio");
+ s = SYS_BUS_DEVICE(dev);
+ sysbus_realize_and_unref(s, &error_fatal);
+ sysbus_connect_irq(s, 0, qdev_get_gpio_in(mpicdev, MPC8XXX_GPIO_IRQ));
+ memory_region_add_subregion(ccsr_addr_space, MPC8XXX_GPIO_OFFSET,
+ sysbus_mmio_get_region(s, 0));
+
+ /* Power Off GPIO at Pin 0 */
+ poweroff_irq = qemu_allocate_irq(ppce500_power_off, NULL, 0);
+ qdev_connect_gpio_out(dev, 0, poweroff_irq);
+ }
+
+ /* Platform Bus Device */
+ if (pmc->has_platform_bus) {
+ dev = qdev_new(TYPE_PLATFORM_BUS_DEVICE);
+ dev->id = g_strdup(TYPE_PLATFORM_BUS_DEVICE);
+ qdev_prop_set_uint32(dev, "num_irqs", pmc->platform_bus_num_irqs);
+ qdev_prop_set_uint32(dev, "mmio_size", pmc->platform_bus_size);
+ sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+ pms->pbus_dev = PLATFORM_BUS_DEVICE(dev);
+
+ s = SYS_BUS_DEVICE(pms->pbus_dev);
+ for (i = 0; i < pmc->platform_bus_num_irqs; i++) {
+ int irqn = pmc->platform_bus_first_irq + i;
+ sysbus_connect_irq(s, i, qdev_get_gpio_in(mpicdev, irqn));
+ }
+
+ memory_region_add_subregion(address_space_mem,
+ pmc->platform_bus_base,
+ sysbus_mmio_get_region(s, 0));
+ }
+
+ /*
+ * Smart firmware defaults ahead!
+ *
+ * We follow the following table to select which payload we execute.
+ *
+ * -kernel | -bios | payload
+ * ---------+-------+---------
+ * N | Y | u-boot
+ * N | N | u-boot
+ * Y | Y | u-boot
+ * Y | N | kernel
+ *
+ * This ensures backwards compatibility with how we used to expose
+ * -kernel to users but allows them to run through u-boot as well.
+ */
+ kernel_as_payload = false;
+ if (machine->firmware == NULL) {
+ if (machine->kernel_filename) {
+ payload_name = machine->kernel_filename;
+ kernel_as_payload = true;
+ } else {
+ payload_name = "u-boot.e500";
+ }
+ } else {
+ payload_name = machine->firmware;
+ }
+
+ filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, payload_name);
+ if (!filename) {
+ error_report("could not find firmware/kernel file '%s'", payload_name);
+ exit(1);
+ }
+
+ payload_size = load_elf(filename, NULL, NULL, NULL,
+ &bios_entry, &loadaddr, NULL, NULL,
+ 1, PPC_ELF_MACHINE, 0, 0);
+ if (payload_size < 0) {
+ /*
+ * Hrm. No ELF image? Try a uImage, maybe someone is giving us an
+ * ePAPR compliant kernel
+ */
+ loadaddr = LOAD_UIMAGE_LOADADDR_INVALID;
+ payload_size = load_uimage(filename, &bios_entry, &loadaddr, NULL,
+ NULL, NULL);
+ if (payload_size < 0) {
+ error_report("could not load firmware '%s'", filename);
+ exit(1);
+ }
+ }
+
+ g_free(filename);
+
+ if (kernel_as_payload) {
+ kernel_base = loadaddr;
+ kernel_size = payload_size;
+ }
+
+ cur_base = loadaddr + payload_size;
+ if (cur_base < 32 * MiB) {
+ /* u-boot occupies memory up to 32MB, so load blobs above */
+ cur_base = 32 * MiB;
+ }
+
+ /* Load bare kernel only if no bios/u-boot has been provided */
+ if (machine->kernel_filename && !kernel_as_payload) {
+ kernel_base = cur_base;
+ kernel_size = load_image_targphys(machine->kernel_filename,
+ cur_base,
+ machine->ram_size - cur_base);
+ if (kernel_size < 0) {
+ error_report("could not load kernel '%s'",
+ machine->kernel_filename);
+ exit(1);
+ }
+
+ cur_base += kernel_size;
+ }
+
+ /* Load initrd. */
+ if (machine->initrd_filename) {
+ initrd_base = (cur_base + INITRD_LOAD_PAD) & ~INITRD_PAD_MASK;
+ initrd_size = load_image_targphys(machine->initrd_filename, initrd_base,
+ machine->ram_size - initrd_base);
+
+ if (initrd_size < 0) {
+ error_report("could not load initial ram disk '%s'",
+ machine->initrd_filename);
+ exit(1);
+ }
+
+ cur_base = initrd_base + initrd_size;
+ }
+
+ /*
+ * Reserve space for dtb behind the kernel image because Linux has a bug
+ * where it can only handle the dtb if it's within the first 64MB of where
+ * <kernel> starts. dtb cannot not reach initrd_base because INITRD_LOAD_PAD
+ * ensures enough space between kernel and initrd.
+ */
+ dt_base = (loadaddr + payload_size + DTC_LOAD_PAD) & ~DTC_PAD_MASK;
+ if (dt_base + DTB_MAX_SIZE > machine->ram_size) {
+ error_report("not enough memory for device tree");
+ exit(1);
+ }
+
+ dt_size = ppce500_prep_device_tree(pms, dt_base,
+ initrd_base, initrd_size,
+ kernel_base, kernel_size);
+ if (dt_size < 0) {
+ error_report("couldn't load device tree");
+ exit(1);
+ }
+ assert(dt_size < DTB_MAX_SIZE);
+
+ boot_info = env->load_info;
+ boot_info->entry = bios_entry;
+ boot_info->dt_base = dt_base;
+ boot_info->dt_size = dt_size;
+}
+
+static void e500_ccsr_initfn(Object *obj)
+{
+ PPCE500CCSRState *ccsr = CCSR(obj);
+ memory_region_init(&ccsr->ccsr_space, obj, "e500-ccsr",
+ MPC8544_CCSRBAR_SIZE);
+}
+
+static const TypeInfo e500_ccsr_info = {
+ .name = TYPE_CCSR,
+ .parent = TYPE_SYS_BUS_DEVICE,
+ .instance_size = sizeof(PPCE500CCSRState),
+ .instance_init = e500_ccsr_initfn,
+};
+
+static const TypeInfo ppce500_info = {
+ .name = TYPE_PPCE500_MACHINE,
+ .parent = TYPE_MACHINE,
+ .abstract = true,
+ .instance_size = sizeof(PPCE500MachineState),
+ .class_size = sizeof(PPCE500MachineClass),
+};
+
+static void e500_register_types(void)
+{
+ type_register_static(&e500_ccsr_info);
+ type_register_static(&ppce500_info);
+}
+
+type_init(e500_register_types)
diff --git a/hw/ppc/e500.h b/hw/ppc/e500.h
new file mode 100644
index 000000000..1e5853b03
--- /dev/null
+++ b/hw/ppc/e500.h
@@ -0,0 +1,49 @@
+#ifndef PPCE500_H
+#define PPCE500_H
+
+#include "hw/boards.h"
+#include "hw/platform-bus.h"
+#include "qom/object.h"
+
+struct PPCE500MachineState {
+ /*< private >*/
+ MachineState parent_obj;
+
+ /* points to instance of TYPE_PLATFORM_BUS_DEVICE if
+ * board supports dynamic sysbus devices
+ */
+ PlatformBusDevice *pbus_dev;
+};
+
+struct PPCE500MachineClass {
+ /*< private >*/
+ MachineClass parent_class;
+
+ /* required -- must at least add toplevel board compatible */
+ void (*fixup_devtree)(void *fdt);
+
+ int pci_first_slot;
+ int pci_nr_slots;
+
+ int mpic_version;
+ bool has_mpc8xxx_gpio;
+ bool has_platform_bus;
+ hwaddr platform_bus_base;
+ hwaddr platform_bus_size;
+ int platform_bus_first_irq;
+ int platform_bus_num_irqs;
+ hwaddr ccsrbar_base;
+ hwaddr pci_pio_base;
+ hwaddr pci_mmio_base;
+ hwaddr pci_mmio_bus_base;
+ hwaddr spin_base;
+};
+
+void ppce500_init(MachineState *machine);
+
+hwaddr booke206_page_size_to_tlb(uint64_t size);
+
+#define TYPE_PPCE500_MACHINE "ppce500-base-machine"
+OBJECT_DECLARE_TYPE(PPCE500MachineState, PPCE500MachineClass, PPCE500_MACHINE)
+
+#endif
diff --git a/hw/ppc/e500plat.c b/hw/ppc/e500plat.c
new file mode 100644
index 000000000..fc911bbb7
--- /dev/null
+++ b/hw/ppc/e500plat.c
@@ -0,0 +1,122 @@
+/*
+ * Generic device-tree-driven paravirt PPC e500 platform
+ *
+ * Copyright 2012 Freescale Semiconductor, Inc.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include "e500.h"
+#include "hw/net/fsl_etsec/etsec.h"
+#include "sysemu/device_tree.h"
+#include "sysemu/kvm.h"
+#include "hw/sysbus.h"
+#include "hw/pci/pci.h"
+#include "hw/ppc/openpic.h"
+#include "kvm_ppc.h"
+
+static void e500plat_fixup_devtree(void *fdt)
+{
+ const char model[] = "QEMU ppce500";
+ const char compatible[] = "fsl,qemu-e500";
+
+ qemu_fdt_setprop(fdt, "/", "model", model, sizeof(model));
+ qemu_fdt_setprop(fdt, "/", "compatible", compatible,
+ sizeof(compatible));
+}
+
+static void e500plat_init(MachineState *machine)
+{
+ PPCE500MachineClass *pmc = PPCE500_MACHINE_GET_CLASS(machine);
+ /* Older KVM versions don't support EPR which breaks guests when we announce
+ MPIC variants that support EPR. Revert to an older one for those */
+ if (kvm_enabled() && !kvmppc_has_cap_epr()) {
+ pmc->mpic_version = OPENPIC_MODEL_FSL_MPIC_20;
+ }
+
+ ppce500_init(machine);
+}
+
+static void e500plat_machine_device_plug_cb(HotplugHandler *hotplug_dev,
+ DeviceState *dev, Error **errp)
+{
+ PPCE500MachineState *pms = PPCE500_MACHINE(hotplug_dev);
+
+ if (pms->pbus_dev) {
+ MachineClass *mc = MACHINE_GET_CLASS(pms);
+
+ if (device_is_dynamic_sysbus(mc, dev)) {
+ platform_bus_link_device(pms->pbus_dev, SYS_BUS_DEVICE(dev));
+ }
+ }
+}
+
+static
+HotplugHandler *e500plat_machine_get_hotpug_handler(MachineState *machine,
+ DeviceState *dev)
+{
+ MachineClass *mc = MACHINE_GET_CLASS(machine);
+
+ if (device_is_dynamic_sysbus(mc, dev)) {
+ return HOTPLUG_HANDLER(machine);
+ }
+
+ return NULL;
+}
+
+#define TYPE_E500PLAT_MACHINE MACHINE_TYPE_NAME("ppce500")
+
+static void e500plat_machine_class_init(ObjectClass *oc, void *data)
+{
+ PPCE500MachineClass *pmc = PPCE500_MACHINE_CLASS(oc);
+ HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
+ MachineClass *mc = MACHINE_CLASS(oc);
+
+ assert(!mc->get_hotplug_handler);
+ mc->get_hotplug_handler = e500plat_machine_get_hotpug_handler;
+ hc->plug = e500plat_machine_device_plug_cb;
+
+ pmc->pci_first_slot = 0x1;
+ pmc->pci_nr_slots = PCI_SLOT_MAX - 1;
+ pmc->fixup_devtree = e500plat_fixup_devtree;
+ pmc->mpic_version = OPENPIC_MODEL_FSL_MPIC_42;
+ pmc->has_mpc8xxx_gpio = true;
+ pmc->has_platform_bus = true;
+ pmc->platform_bus_base = 0xf00000000ULL;
+ pmc->platform_bus_size = 128 * MiB;
+ pmc->platform_bus_first_irq = 5;
+ pmc->platform_bus_num_irqs = 10;
+ pmc->ccsrbar_base = 0xFE0000000ULL;
+ pmc->pci_pio_base = 0xFE1000000ULL;
+ pmc->pci_mmio_base = 0xC00000000ULL;
+ pmc->pci_mmio_bus_base = 0xE0000000ULL;
+ pmc->spin_base = 0xFEF000000ULL;
+
+ mc->desc = "generic paravirt e500 platform";
+ mc->init = e500plat_init;
+ mc->max_cpus = 32;
+ mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("e500v2_v30");
+ mc->default_ram_id = "mpc8544ds.ram";
+ machine_class_allow_dynamic_sysbus_dev(mc, TYPE_ETSEC_COMMON);
+ }
+
+static const TypeInfo e500plat_info = {
+ .name = TYPE_E500PLAT_MACHINE,
+ .parent = TYPE_PPCE500_MACHINE,
+ .class_init = e500plat_machine_class_init,
+ .interfaces = (InterfaceInfo[]) {
+ { TYPE_HOTPLUG_HANDLER },
+ { }
+ }
+};
+
+static void e500plat_register_types(void)
+{
+ type_register_static(&e500plat_info);
+}
+type_init(e500plat_register_types)
diff --git a/hw/ppc/fdt.c b/hw/ppc/fdt.c
new file mode 100644
index 000000000..0828ad725
--- /dev/null
+++ b/hw/ppc/fdt.c
@@ -0,0 +1,49 @@
+/*
+ * QEMU PowerPC helper routines for the device tree.
+ *
+ * Copyright (C) 2016 IBM Corp.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "target/ppc/cpu.h"
+#include "target/ppc/mmu-hash64.h"
+
+#include "hw/ppc/fdt.h"
+
+#if defined(TARGET_PPC64)
+size_t ppc_create_page_sizes_prop(PowerPCCPU *cpu, uint32_t *prop,
+ size_t maxsize)
+{
+ size_t maxcells = maxsize / sizeof(uint32_t);
+ int i, j, count;
+ uint32_t *p = prop;
+
+ for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
+ PPCHash64SegmentPageSizes *sps = &cpu->hash64_opts->sps[i];
+
+ if (!sps->page_shift) {
+ break;
+ }
+ for (count = 0; count < PPC_PAGE_SIZES_MAX_SZ; count++) {
+ if (sps->enc[count].page_shift == 0) {
+ break;
+ }
+ }
+ if ((p - prop) >= (maxcells - 3 - count * 2)) {
+ break;
+ }
+ *(p++) = cpu_to_be32(sps->page_shift);
+ *(p++) = cpu_to_be32(sps->slb_enc);
+ *(p++) = cpu_to_be32(count);
+ for (j = 0; j < count; j++) {
+ *(p++) = cpu_to_be32(sps->enc[j].page_shift);
+ *(p++) = cpu_to_be32(sps->enc[j].pte_enc);
+ }
+ }
+
+ return (p - prop) * sizeof(uint32_t);
+}
+#endif
diff --git a/hw/ppc/fw_cfg.c b/hw/ppc/fw_cfg.c
new file mode 100644
index 000000000..a88b5c4bd
--- /dev/null
+++ b/hw/ppc/fw_cfg.c
@@ -0,0 +1,45 @@
+/*
+ * fw_cfg helpers (PPC specific)
+ *
+ * Copyright (c) 2019 Red Hat, Inc.
+ *
+ * Author:
+ * Philippe Mathieu-Daudé <philmd@redhat.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/ppc/ppc.h"
+#include "hw/nvram/fw_cfg.h"
+
+const char *fw_cfg_arch_key_name(uint16_t key)
+{
+ static const struct {
+ uint16_t key;
+ const char *name;
+ } fw_cfg_arch_wellknown_keys[] = {
+ {FW_CFG_PPC_WIDTH, "width"},
+ {FW_CFG_PPC_HEIGHT, "height"},
+ {FW_CFG_PPC_DEPTH, "depth"},
+ {FW_CFG_PPC_TBFREQ, "tbfreq"},
+ {FW_CFG_PPC_CLOCKFREQ, "clockfreq"},
+ {FW_CFG_PPC_IS_KVM, "is_kvm"},
+ {FW_CFG_PPC_KVM_HC, "kvm_hc"},
+ {FW_CFG_PPC_KVM_PID, "pid"},
+ {FW_CFG_PPC_NVRAM_ADDR, "nvram_addr"},
+ {FW_CFG_PPC_BUSFREQ, "busfreq"},
+ {FW_CFG_PPC_NVRAM_FLAT, "nvram_flat"},
+ {FW_CFG_PPC_VIACONFIG, "viaconfig"},
+ };
+
+ for (size_t i = 0; i < ARRAY_SIZE(fw_cfg_arch_wellknown_keys); i++) {
+ if (fw_cfg_arch_wellknown_keys[i].key == key) {
+ return fw_cfg_arch_wellknown_keys[i].name;
+ }
+ }
+ return NULL;
+}
diff --git a/hw/ppc/mac.h b/hw/ppc/mac.h
new file mode 100644
index 000000000..22c840807
--- /dev/null
+++ b/hw/ppc/mac.h
@@ -0,0 +1,108 @@
+/*
+ * QEMU PowerMac emulation shared definitions and prototypes
+ *
+ * Copyright (c) 2004-2007 Fabrice Bellard
+ * Copyright (c) 2007 Jocelyn Mayer
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef PPC_MAC_H
+#define PPC_MAC_H
+
+#include "qemu/units.h"
+#include "exec/memory.h"
+#include "hw/boards.h"
+#include "hw/sysbus.h"
+#include "hw/input/adb.h"
+#include "hw/misc/mos6522.h"
+#include "hw/pci/pci_host.h"
+#include "hw/pci-host/uninorth.h"
+#include "qom/object.h"
+
+/* SMP is not enabled, for now */
+#define MAX_CPUS 1
+
+#define NVRAM_SIZE 0x2000
+#define PROM_FILENAME "openbios-ppc"
+
+#define KERNEL_LOAD_ADDR 0x01000000
+#define KERNEL_GAP 0x00100000
+
+#define ESCC_CLOCK 3686400
+
+/* Old World IRQs */
+#define OLDWORLD_CUDA_IRQ 0x12
+#define OLDWORLD_ESCCB_IRQ 0x10
+#define OLDWORLD_ESCCA_IRQ 0xf
+#define OLDWORLD_IDE0_IRQ 0xd
+#define OLDWORLD_IDE0_DMA_IRQ 0x2
+#define OLDWORLD_IDE1_IRQ 0xe
+#define OLDWORLD_IDE1_DMA_IRQ 0x3
+
+/* New World IRQs */
+#define NEWWORLD_CUDA_IRQ 0x19
+#define NEWWORLD_PMU_IRQ 0x19
+#define NEWWORLD_ESCCB_IRQ 0x24
+#define NEWWORLD_ESCCA_IRQ 0x25
+#define NEWWORLD_IDE0_IRQ 0xd
+#define NEWWORLD_IDE0_DMA_IRQ 0x2
+#define NEWWORLD_IDE1_IRQ 0xe
+#define NEWWORLD_IDE1_DMA_IRQ 0x3
+#define NEWWORLD_EXTING_GPIO1 0x2f
+#define NEWWORLD_EXTING_GPIO9 0x37
+
+/* Core99 machine */
+#define TYPE_CORE99_MACHINE MACHINE_TYPE_NAME("mac99")
+typedef struct Core99MachineState Core99MachineState;
+DECLARE_INSTANCE_CHECKER(Core99MachineState, CORE99_MACHINE,
+ TYPE_CORE99_MACHINE)
+
+#define CORE99_VIA_CONFIG_CUDA 0x0
+#define CORE99_VIA_CONFIG_PMU 0x1
+#define CORE99_VIA_CONFIG_PMU_ADB 0x2
+
+struct Core99MachineState {
+ /*< private >*/
+ MachineState parent;
+
+ uint8_t via_config;
+};
+
+/* Grackle PCI */
+#define TYPE_GRACKLE_PCI_HOST_BRIDGE "grackle-pcihost"
+
+/* Mac NVRAM */
+#define TYPE_MACIO_NVRAM "macio-nvram"
+OBJECT_DECLARE_SIMPLE_TYPE(MacIONVRAMState, MACIO_NVRAM)
+
+struct MacIONVRAMState {
+ /*< private >*/
+ SysBusDevice parent_obj;
+ /*< public >*/
+
+ uint32_t size;
+ uint32_t it_shift;
+
+ MemoryRegion mem;
+ uint8_t *data;
+};
+
+void pmac_format_nvram_partition (MacIONVRAMState *nvr, int len);
+#endif /* PPC_MAC_H */
diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c
new file mode 100644
index 000000000..7bb7ac399
--- /dev/null
+++ b/hw/ppc/mac_newworld.c
@@ -0,0 +1,663 @@
+/*
+ * QEMU PowerPC CHRP (currently NewWorld PowerMac) hardware System Emulator
+ *
+ * Copyright (c) 2004-2007 Fabrice Bellard
+ * Copyright (c) 2007 Jocelyn Mayer
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ * PCI bus layout on a real G5 (U3 based):
+ *
+ * 0000:f0:0b.0 Host bridge [0600]: Apple Computer Inc. U3 AGP [106b:004b]
+ * 0000:f0:10.0 VGA compatible controller [0300]: ATI Technologies Inc RV350 AP [Radeon 9600] [1002:4150]
+ * 0001:00:00.0 Host bridge [0600]: Apple Computer Inc. CPC945 HT Bridge [106b:004a]
+ * 0001:00:01.0 PCI bridge [0604]: Advanced Micro Devices [AMD] AMD-8131 PCI-X Bridge [1022:7450] (rev 12)
+ * 0001:00:02.0 PCI bridge [0604]: Advanced Micro Devices [AMD] AMD-8131 PCI-X Bridge [1022:7450] (rev 12)
+ * 0001:00:03.0 PCI bridge [0604]: Apple Computer Inc. K2 HT-PCI Bridge [106b:0045]
+ * 0001:00:04.0 PCI bridge [0604]: Apple Computer Inc. K2 HT-PCI Bridge [106b:0046]
+ * 0001:00:05.0 PCI bridge [0604]: Apple Computer Inc. K2 HT-PCI Bridge [106b:0047]
+ * 0001:00:06.0 PCI bridge [0604]: Apple Computer Inc. K2 HT-PCI Bridge [106b:0048]
+ * 0001:00:07.0 PCI bridge [0604]: Apple Computer Inc. K2 HT-PCI Bridge [106b:0049]
+ * 0001:01:07.0 Class [ff00]: Apple Computer Inc. K2 KeyLargo Mac/IO [106b:0041] (rev 20)
+ * 0001:01:08.0 USB Controller [0c03]: Apple Computer Inc. K2 KeyLargo USB [106b:0040]
+ * 0001:01:09.0 USB Controller [0c03]: Apple Computer Inc. K2 KeyLargo USB [106b:0040]
+ * 0001:02:0b.0 USB Controller [0c03]: NEC Corporation USB [1033:0035] (rev 43)
+ * 0001:02:0b.1 USB Controller [0c03]: NEC Corporation USB [1033:0035] (rev 43)
+ * 0001:02:0b.2 USB Controller [0c03]: NEC Corporation USB 2.0 [1033:00e0] (rev 04)
+ * 0001:03:0d.0 Class [ff00]: Apple Computer Inc. K2 ATA/100 [106b:0043]
+ * 0001:03:0e.0 FireWire (IEEE 1394) [0c00]: Apple Computer Inc. K2 FireWire [106b:0042]
+ * 0001:04:0f.0 Ethernet controller [0200]: Apple Computer Inc. K2 GMAC (Sun GEM) [106b:004c]
+ * 0001:05:0c.0 IDE interface [0101]: Broadcom K2 SATA [1166:0240]
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/datadir.h"
+#include "qapi/error.h"
+#include "hw/ppc/ppc.h"
+#include "hw/qdev-properties.h"
+#include "hw/ppc/mac.h"
+#include "hw/input/adb.h"
+#include "hw/ppc/mac_dbdma.h"
+#include "hw/pci/pci.h"
+#include "net/net.h"
+#include "sysemu/sysemu.h"
+#include "hw/nvram/fw_cfg.h"
+#include "hw/char/escc.h"
+#include "hw/misc/macio/macio.h"
+#include "hw/ppc/openpic.h"
+#include "hw/loader.h"
+#include "hw/fw-path-provider.h"
+#include "elf.h"
+#include "qemu/error-report.h"
+#include "sysemu/kvm.h"
+#include "sysemu/reset.h"
+#include "kvm_ppc.h"
+#include "hw/usb.h"
+#include "hw/sysbus.h"
+#include "trace.h"
+
+#define MAX_IDE_BUS 2
+#define CFG_ADDR 0xf0000510
+#define TBFREQ (100UL * 1000UL * 1000UL)
+#define CLOCKFREQ (900UL * 1000UL * 1000UL)
+#define BUSFREQ (100UL * 1000UL * 1000UL)
+
+#define NDRV_VGA_FILENAME "qemu_vga.ndrv"
+
+#define PROM_BASE 0xfff00000
+#define PROM_SIZE (1 * MiB)
+
+static void fw_cfg_boot_set(void *opaque, const char *boot_device,
+ Error **errp)
+{
+ fw_cfg_modify_i16(opaque, FW_CFG_BOOT_DEVICE, boot_device[0]);
+}
+
+static uint64_t translate_kernel_address(void *opaque, uint64_t addr)
+{
+ return (addr & 0x0fffffff) + KERNEL_LOAD_ADDR;
+}
+
+static void ppc_core99_reset(void *opaque)
+{
+ PowerPCCPU *cpu = opaque;
+
+ cpu_reset(CPU(cpu));
+ /* 970 CPUs want to get their initial IP as part of their boot protocol */
+ cpu->env.nip = PROM_BASE + 0x100;
+}
+
+/* PowerPC Mac99 hardware initialisation */
+static void ppc_core99_init(MachineState *machine)
+{
+ ram_addr_t ram_size = machine->ram_size;
+ const char *bios_name = machine->firmware ?: PROM_FILENAME;
+ const char *kernel_filename = machine->kernel_filename;
+ const char *kernel_cmdline = machine->kernel_cmdline;
+ const char *initrd_filename = machine->initrd_filename;
+ const char *boot_device = machine->boot_order;
+ Core99MachineState *core99_machine = CORE99_MACHINE(machine);
+ PowerPCCPU *cpu = NULL;
+ CPUPPCState *env = NULL;
+ char *filename;
+ IrqLines *openpic_irqs;
+ int linux_boot, i, j, k;
+ MemoryRegion *bios = g_new(MemoryRegion, 1);
+ hwaddr kernel_base, initrd_base, cmdline_base = 0;
+ long kernel_size, initrd_size;
+ UNINHostState *uninorth_pci;
+ PCIBus *pci_bus;
+ PCIDevice *macio;
+ ESCCState *escc;
+ bool has_pmu, has_adb;
+ MACIOIDEState *macio_ide;
+ BusState *adb_bus;
+ MacIONVRAMState *nvr;
+ int bios_size;
+ int ppc_boot_device;
+ DriveInfo *hd[MAX_IDE_BUS * MAX_IDE_DEVS];
+ void *fw_cfg;
+ int machine_arch;
+ SysBusDevice *s;
+ DeviceState *dev, *pic_dev;
+ DeviceState *uninorth_internal_dev = NULL, *uninorth_agp_dev = NULL;
+ hwaddr nvram_addr = 0xFFF04000;
+ uint64_t tbfreq;
+ unsigned int smp_cpus = machine->smp.cpus;
+
+ linux_boot = (kernel_filename != NULL);
+
+ /* init CPUs */
+ for (i = 0; i < smp_cpus; i++) {
+ cpu = POWERPC_CPU(cpu_create(machine->cpu_type));
+ env = &cpu->env;
+
+ /* Set time-base frequency to 100 Mhz */
+ cpu_ppc_tb_init(env, TBFREQ);
+ qemu_register_reset(ppc_core99_reset, cpu);
+ }
+
+ /* allocate RAM */
+ if (machine->ram_size > 2 * GiB) {
+ error_report("RAM size more than 2 GiB is not supported");
+ exit(1);
+ }
+ memory_region_add_subregion(get_system_memory(), 0, machine->ram);
+
+ /* allocate and load firmware ROM */
+ memory_region_init_rom(bios, NULL, "ppc_core99.bios", PROM_SIZE,
+ &error_fatal);
+ memory_region_add_subregion(get_system_memory(), PROM_BASE, bios);
+
+ filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
+ if (filename) {
+ /* Load OpenBIOS (ELF) */
+ bios_size = load_elf(filename, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, 1, PPC_ELF_MACHINE, 0, 0);
+
+ if (bios_size <= 0) {
+ /* or load binary ROM image */
+ bios_size = load_image_targphys(filename, PROM_BASE, PROM_SIZE);
+ }
+ g_free(filename);
+ } else {
+ bios_size = -1;
+ }
+ if (bios_size < 0 || bios_size > PROM_SIZE) {
+ error_report("could not load PowerPC bios '%s'", bios_name);
+ exit(1);
+ }
+
+ if (linux_boot) {
+ int bswap_needed;
+
+#ifdef BSWAP_NEEDED
+ bswap_needed = 1;
+#else
+ bswap_needed = 0;
+#endif
+ kernel_base = KERNEL_LOAD_ADDR;
+
+ kernel_size = load_elf(kernel_filename, NULL,
+ translate_kernel_address, NULL, NULL, NULL,
+ NULL, NULL, 1, PPC_ELF_MACHINE, 0, 0);
+ if (kernel_size < 0)
+ kernel_size = load_aout(kernel_filename, kernel_base,
+ ram_size - kernel_base, bswap_needed,
+ TARGET_PAGE_SIZE);
+ if (kernel_size < 0)
+ kernel_size = load_image_targphys(kernel_filename,
+ kernel_base,
+ ram_size - kernel_base);
+ if (kernel_size < 0) {
+ error_report("could not load kernel '%s'", kernel_filename);
+ exit(1);
+ }
+ /* load initrd */
+ if (initrd_filename) {
+ initrd_base = TARGET_PAGE_ALIGN(kernel_base + kernel_size + KERNEL_GAP);
+ initrd_size = load_image_targphys(initrd_filename, initrd_base,
+ ram_size - initrd_base);
+ if (initrd_size < 0) {
+ error_report("could not load initial ram disk '%s'",
+ initrd_filename);
+ exit(1);
+ }
+ cmdline_base = TARGET_PAGE_ALIGN(initrd_base + initrd_size);
+ } else {
+ initrd_base = 0;
+ initrd_size = 0;
+ cmdline_base = TARGET_PAGE_ALIGN(kernel_base + kernel_size + KERNEL_GAP);
+ }
+ ppc_boot_device = 'm';
+ } else {
+ kernel_base = 0;
+ kernel_size = 0;
+ initrd_base = 0;
+ initrd_size = 0;
+ ppc_boot_device = '\0';
+ /* We consider that NewWorld PowerMac never have any floppy drive
+ * For now, OHW cannot boot from the network.
+ */
+ for (i = 0; boot_device[i] != '\0'; i++) {
+ if (boot_device[i] >= 'c' && boot_device[i] <= 'f') {
+ ppc_boot_device = boot_device[i];
+ break;
+ }
+ }
+ if (ppc_boot_device == '\0') {
+ error_report("No valid boot device for Mac99 machine");
+ exit(1);
+ }
+ }
+
+ /* UniN init */
+ dev = qdev_new(TYPE_UNI_NORTH);
+ s = SYS_BUS_DEVICE(dev);
+ sysbus_realize_and_unref(s, &error_fatal);
+ memory_region_add_subregion(get_system_memory(), 0xf8000000,
+ sysbus_mmio_get_region(s, 0));
+
+ openpic_irqs = g_new0(IrqLines, smp_cpus);
+ for (i = 0; i < smp_cpus; i++) {
+ /* Mac99 IRQ connection between OpenPIC outputs pins
+ * and PowerPC input pins
+ */
+ switch (PPC_INPUT(env)) {
+ case PPC_FLAGS_INPUT_6xx:
+ openpic_irqs[i].irq[OPENPIC_OUTPUT_INT] =
+ ((qemu_irq *)env->irq_inputs)[PPC6xx_INPUT_INT];
+ openpic_irqs[i].irq[OPENPIC_OUTPUT_CINT] =
+ ((qemu_irq *)env->irq_inputs)[PPC6xx_INPUT_INT];
+ openpic_irqs[i].irq[OPENPIC_OUTPUT_MCK] =
+ ((qemu_irq *)env->irq_inputs)[PPC6xx_INPUT_MCP];
+ /* Not connected ? */
+ openpic_irqs[i].irq[OPENPIC_OUTPUT_DEBUG] = NULL;
+ /* Check this */
+ openpic_irqs[i].irq[OPENPIC_OUTPUT_RESET] =
+ ((qemu_irq *)env->irq_inputs)[PPC6xx_INPUT_HRESET];
+ break;
+#if defined(TARGET_PPC64)
+ case PPC_FLAGS_INPUT_970:
+ openpic_irqs[i].irq[OPENPIC_OUTPUT_INT] =
+ ((qemu_irq *)env->irq_inputs)[PPC970_INPUT_INT];
+ openpic_irqs[i].irq[OPENPIC_OUTPUT_CINT] =
+ ((qemu_irq *)env->irq_inputs)[PPC970_INPUT_INT];
+ openpic_irqs[i].irq[OPENPIC_OUTPUT_MCK] =
+ ((qemu_irq *)env->irq_inputs)[PPC970_INPUT_MCP];
+ /* Not connected ? */
+ openpic_irqs[i].irq[OPENPIC_OUTPUT_DEBUG] = NULL;
+ /* Check this */
+ openpic_irqs[i].irq[OPENPIC_OUTPUT_RESET] =
+ ((qemu_irq *)env->irq_inputs)[PPC970_INPUT_HRESET];
+ break;
+#endif /* defined(TARGET_PPC64) */
+ default:
+ error_report("Bus model not supported on mac99 machine");
+ exit(1);
+ }
+ }
+
+ if (PPC_INPUT(env) == PPC_FLAGS_INPUT_970) {
+ /* 970 gets a U3 bus */
+ /* Uninorth AGP bus */
+ dev = qdev_new(TYPE_U3_AGP_HOST_BRIDGE);
+ sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+ uninorth_pci = U3_AGP_HOST_BRIDGE(dev);
+ s = SYS_BUS_DEVICE(dev);
+ /* PCI hole */
+ memory_region_add_subregion(get_system_memory(), 0x80000000ULL,
+ sysbus_mmio_get_region(s, 2));
+ /* Register 8 MB of ISA IO space */
+ memory_region_add_subregion(get_system_memory(), 0xf2000000,
+ sysbus_mmio_get_region(s, 3));
+ sysbus_mmio_map(s, 0, 0xf0800000);
+ sysbus_mmio_map(s, 1, 0xf0c00000);
+
+ machine_arch = ARCH_MAC99_U3;
+ } else {
+ /* Use values found on a real PowerMac */
+ /* Uninorth AGP bus */
+ uninorth_agp_dev = qdev_new(TYPE_UNI_NORTH_AGP_HOST_BRIDGE);
+ s = SYS_BUS_DEVICE(uninorth_agp_dev);
+ sysbus_realize_and_unref(s, &error_fatal);
+ sysbus_mmio_map(s, 0, 0xf0800000);
+ sysbus_mmio_map(s, 1, 0xf0c00000);
+
+ /* Uninorth internal bus */
+ uninorth_internal_dev = qdev_new(
+ TYPE_UNI_NORTH_INTERNAL_PCI_HOST_BRIDGE);
+ s = SYS_BUS_DEVICE(uninorth_internal_dev);
+ sysbus_realize_and_unref(s, &error_fatal);
+ sysbus_mmio_map(s, 0, 0xf4800000);
+ sysbus_mmio_map(s, 1, 0xf4c00000);
+
+ /* Uninorth main bus */
+ dev = qdev_new(TYPE_UNI_NORTH_PCI_HOST_BRIDGE);
+ qdev_prop_set_uint32(dev, "ofw-addr", 0xf2000000);
+ sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+ uninorth_pci = UNI_NORTH_PCI_HOST_BRIDGE(dev);
+ s = SYS_BUS_DEVICE(dev);
+ /* PCI hole */
+ memory_region_add_subregion(get_system_memory(), 0x80000000ULL,
+ sysbus_mmio_get_region(s, 2));
+ /* Register 8 MB of ISA IO space */
+ memory_region_add_subregion(get_system_memory(), 0xf2000000,
+ sysbus_mmio_get_region(s, 3));
+ sysbus_mmio_map(s, 0, 0xf2800000);
+ sysbus_mmio_map(s, 1, 0xf2c00000);
+
+ machine_arch = ARCH_MAC99;
+ }
+
+ machine->usb |= defaults_enabled() && !machine->usb_disabled;
+ has_pmu = (core99_machine->via_config != CORE99_VIA_CONFIG_CUDA);
+ has_adb = (core99_machine->via_config == CORE99_VIA_CONFIG_CUDA ||
+ core99_machine->via_config == CORE99_VIA_CONFIG_PMU_ADB);
+
+ /* Timebase Frequency */
+ if (kvm_enabled()) {
+ tbfreq = kvmppc_get_tbfreq();
+ } else {
+ tbfreq = TBFREQ;
+ }
+
+ /* init basic PC hardware */
+ pci_bus = PCI_HOST_BRIDGE(uninorth_pci)->bus;
+
+ /* MacIO */
+ macio = pci_new(-1, TYPE_NEWWORLD_MACIO);
+ dev = DEVICE(macio);
+ qdev_prop_set_uint64(dev, "frequency", tbfreq);
+ qdev_prop_set_bit(dev, "has-pmu", has_pmu);
+ qdev_prop_set_bit(dev, "has-adb", has_adb);
+
+ escc = ESCC(object_resolve_path_component(OBJECT(macio), "escc"));
+ qdev_prop_set_chr(DEVICE(escc), "chrA", serial_hd(0));
+ qdev_prop_set_chr(DEVICE(escc), "chrB", serial_hd(1));
+
+ pci_realize_and_unref(macio, pci_bus, &error_fatal);
+
+ pic_dev = DEVICE(object_resolve_path_component(OBJECT(macio), "pic"));
+ for (i = 0; i < 4; i++) {
+ qdev_connect_gpio_out(DEVICE(uninorth_pci), i,
+ qdev_get_gpio_in(pic_dev, 0x1b + i));
+ }
+
+ /* TODO: additional PCI buses only wired up for 32-bit machines */
+ if (PPC_INPUT(env) != PPC_FLAGS_INPUT_970) {
+ /* Uninorth AGP bus */
+ for (i = 0; i < 4; i++) {
+ qdev_connect_gpio_out(uninorth_agp_dev, i,
+ qdev_get_gpio_in(pic_dev, 0x1b + i));
+ }
+
+ /* Uninorth internal bus */
+ for (i = 0; i < 4; i++) {
+ qdev_connect_gpio_out(uninorth_internal_dev, i,
+ qdev_get_gpio_in(pic_dev, 0x1b + i));
+ }
+ }
+
+ /* OpenPIC */
+ s = SYS_BUS_DEVICE(pic_dev);
+ k = 0;
+ for (i = 0; i < smp_cpus; i++) {
+ for (j = 0; j < OPENPIC_OUTPUT_NB; j++) {
+ sysbus_connect_irq(s, k++, openpic_irqs[i].irq[j]);
+ }
+ }
+ g_free(openpic_irqs);
+
+ /* We only emulate 2 out of 3 IDE controllers for now */
+ ide_drive_get(hd, ARRAY_SIZE(hd));
+
+ macio_ide = MACIO_IDE(object_resolve_path_component(OBJECT(macio),
+ "ide[0]"));
+ macio_ide_init_drives(macio_ide, hd);
+
+ macio_ide = MACIO_IDE(object_resolve_path_component(OBJECT(macio),
+ "ide[1]"));
+ macio_ide_init_drives(macio_ide, &hd[MAX_IDE_DEVS]);
+
+ if (has_adb) {
+ if (has_pmu) {
+ dev = DEVICE(object_resolve_path_component(OBJECT(macio), "pmu"));
+ } else {
+ dev = DEVICE(object_resolve_path_component(OBJECT(macio), "cuda"));
+ }
+
+ adb_bus = qdev_get_child_bus(dev, "adb.0");
+ dev = qdev_new(TYPE_ADB_KEYBOARD);
+ qdev_realize_and_unref(dev, adb_bus, &error_fatal);
+
+ dev = qdev_new(TYPE_ADB_MOUSE);
+ qdev_realize_and_unref(dev, adb_bus, &error_fatal);
+ }
+
+ if (machine->usb) {
+ pci_create_simple(pci_bus, -1, "pci-ohci");
+
+ /* U3 needs to use USB for input because Linux doesn't support via-cuda
+ on PPC64 */
+ if (!has_adb || machine_arch == ARCH_MAC99_U3) {
+ USBBus *usb_bus = usb_bus_find(-1);
+
+ usb_create_simple(usb_bus, "usb-kbd");
+ usb_create_simple(usb_bus, "usb-mouse");
+ }
+ }
+
+ pci_vga_init(pci_bus);
+
+ if (graphic_depth != 15 && graphic_depth != 32 && graphic_depth != 8) {
+ graphic_depth = 15;
+ }
+
+ for (i = 0; i < nb_nics; i++) {
+ pci_nic_init_nofail(&nd_table[i], pci_bus, "sungem", NULL);
+ }
+
+ /* The NewWorld NVRAM is not located in the MacIO device */
+ if (kvm_enabled() && qemu_real_host_page_size > 4096) {
+ /* We can't combine read-write and read-only in a single page, so
+ move the NVRAM out of ROM again for KVM */
+ nvram_addr = 0xFFE00000;
+ }
+ dev = qdev_new(TYPE_MACIO_NVRAM);
+ qdev_prop_set_uint32(dev, "size", 0x2000);
+ qdev_prop_set_uint32(dev, "it_shift", 1);
+ sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+ sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, nvram_addr);
+ nvr = MACIO_NVRAM(dev);
+ pmac_format_nvram_partition(nvr, 0x2000);
+ /* No PCI init: the BIOS will do it */
+
+ dev = qdev_new(TYPE_FW_CFG_MEM);
+ fw_cfg = FW_CFG(dev);
+ qdev_prop_set_uint32(dev, "data_width", 1);
+ qdev_prop_set_bit(dev, "dma_enabled", false);
+ object_property_add_child(OBJECT(qdev_get_machine()), TYPE_FW_CFG,
+ OBJECT(fw_cfg));
+ s = SYS_BUS_DEVICE(dev);
+ sysbus_realize_and_unref(s, &error_fatal);
+ sysbus_mmio_map(s, 0, CFG_ADDR);
+ sysbus_mmio_map(s, 1, CFG_ADDR + 2);
+
+ fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, (uint16_t)smp_cpus);
+ fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)machine->smp.max_cpus);
+ fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size);
+ fw_cfg_add_i16(fw_cfg, FW_CFG_MACHINE_ID, machine_arch);
+ fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, kernel_base);
+ fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size);
+ if (kernel_cmdline) {
+ fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_CMDLINE, cmdline_base);
+ pstrcpy_targphys("cmdline", cmdline_base, TARGET_PAGE_SIZE, kernel_cmdline);
+ } else {
+ fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_CMDLINE, 0);
+ }
+ fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_base);
+ fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size);
+ fw_cfg_add_i16(fw_cfg, FW_CFG_BOOT_DEVICE, ppc_boot_device);
+
+ fw_cfg_add_i16(fw_cfg, FW_CFG_PPC_WIDTH, graphic_width);
+ fw_cfg_add_i16(fw_cfg, FW_CFG_PPC_HEIGHT, graphic_height);
+ fw_cfg_add_i16(fw_cfg, FW_CFG_PPC_DEPTH, graphic_depth);
+
+ fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_VIACONFIG, core99_machine->via_config);
+
+ fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_IS_KVM, kvm_enabled());
+ if (kvm_enabled()) {
+ uint8_t *hypercall;
+
+ hypercall = g_malloc(16);
+ kvmppc_get_hypercall(env, hypercall, 16);
+ fw_cfg_add_bytes(fw_cfg, FW_CFG_PPC_KVM_HC, hypercall, 16);
+ fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_KVM_PID, getpid());
+ }
+ fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, tbfreq);
+ /* Mac OS X requires a "known good" clock-frequency value; pass it one. */
+ fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_CLOCKFREQ, CLOCKFREQ);
+ fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_BUSFREQ, BUSFREQ);
+ fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_NVRAM_ADDR, nvram_addr);
+
+ /* MacOS NDRV VGA driver */
+ filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, NDRV_VGA_FILENAME);
+ if (filename) {
+ gchar *ndrv_file;
+ gsize ndrv_size;
+
+ if (g_file_get_contents(filename, &ndrv_file, &ndrv_size, NULL)) {
+ fw_cfg_add_file(fw_cfg, "ndrv/qemu_vga.ndrv", ndrv_file, ndrv_size);
+ }
+ g_free(filename);
+ }
+
+ qemu_register_boot_set(fw_cfg_boot_set, fw_cfg);
+}
+
+/*
+ * Implementation of an interface to adjust firmware path
+ * for the bootindex property handling.
+ */
+static char *core99_fw_dev_path(FWPathProvider *p, BusState *bus,
+ DeviceState *dev)
+{
+ PCIDevice *pci;
+ MACIOIDEState *macio_ide;
+
+ if (!strcmp(object_get_typename(OBJECT(dev)), "macio-newworld")) {
+ pci = PCI_DEVICE(dev);
+ return g_strdup_printf("mac-io@%x", PCI_SLOT(pci->devfn));
+ }
+
+ if (!strcmp(object_get_typename(OBJECT(dev)), "macio-ide")) {
+ macio_ide = MACIO_IDE(dev);
+ return g_strdup_printf("ata-3@%x", macio_ide->addr);
+ }
+
+ if (!strcmp(object_get_typename(OBJECT(dev)), "ide-hd")) {
+ return g_strdup("disk");
+ }
+
+ if (!strcmp(object_get_typename(OBJECT(dev)), "ide-cd")) {
+ return g_strdup("cdrom");
+ }
+
+ if (!strcmp(object_get_typename(OBJECT(dev)), "virtio-blk-device")) {
+ return g_strdup("disk");
+ }
+
+ return NULL;
+}
+static int core99_kvm_type(MachineState *machine, const char *arg)
+{
+ /* Always force PR KVM */
+ return 2;
+}
+
+static void core99_machine_class_init(ObjectClass *oc, void *data)
+{
+ MachineClass *mc = MACHINE_CLASS(oc);
+ FWPathProviderClass *fwc = FW_PATH_PROVIDER_CLASS(oc);
+
+ mc->desc = "Mac99 based PowerMAC";
+ mc->init = ppc_core99_init;
+ mc->block_default_type = IF_IDE;
+ mc->max_cpus = MAX_CPUS;
+ mc->default_boot_order = "cd";
+ mc->default_display = "std";
+ mc->kvm_type = core99_kvm_type;
+#ifdef TARGET_PPC64
+ mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("970fx_v3.1");
+#else
+ mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("7400_v2.9");
+#endif
+ mc->default_ram_id = "ppc_core99.ram";
+ mc->ignore_boot_device_suffixes = true;
+ fwc->get_dev_path = core99_fw_dev_path;
+}
+
+static char *core99_get_via_config(Object *obj, Error **errp)
+{
+ Core99MachineState *cms = CORE99_MACHINE(obj);
+
+ switch (cms->via_config) {
+ default:
+ case CORE99_VIA_CONFIG_CUDA:
+ return g_strdup("cuda");
+
+ case CORE99_VIA_CONFIG_PMU:
+ return g_strdup("pmu");
+
+ case CORE99_VIA_CONFIG_PMU_ADB:
+ return g_strdup("pmu-adb");
+ }
+}
+
+static void core99_set_via_config(Object *obj, const char *value, Error **errp)
+{
+ Core99MachineState *cms = CORE99_MACHINE(obj);
+
+ if (!strcmp(value, "cuda")) {
+ cms->via_config = CORE99_VIA_CONFIG_CUDA;
+ } else if (!strcmp(value, "pmu")) {
+ cms->via_config = CORE99_VIA_CONFIG_PMU;
+ } else if (!strcmp(value, "pmu-adb")) {
+ cms->via_config = CORE99_VIA_CONFIG_PMU_ADB;
+ } else {
+ error_setg(errp, "Invalid via value");
+ error_append_hint(errp, "Valid values are cuda, pmu, pmu-adb.\n");
+ }
+}
+
+static void core99_instance_init(Object *obj)
+{
+ Core99MachineState *cms = CORE99_MACHINE(obj);
+
+ /* Default via_config is CORE99_VIA_CONFIG_CUDA */
+ cms->via_config = CORE99_VIA_CONFIG_CUDA;
+ object_property_add_str(obj, "via", core99_get_via_config,
+ core99_set_via_config);
+ object_property_set_description(obj, "via",
+ "Set VIA configuration. "
+ "Valid values are cuda, pmu and pmu-adb");
+
+ return;
+}
+
+static const TypeInfo core99_machine_info = {
+ .name = MACHINE_TYPE_NAME("mac99"),
+ .parent = TYPE_MACHINE,
+ .class_init = core99_machine_class_init,
+ .instance_init = core99_instance_init,
+ .instance_size = sizeof(Core99MachineState),
+ .interfaces = (InterfaceInfo[]) {
+ { TYPE_FW_PATH_PROVIDER },
+ { }
+ },
+};
+
+static void mac_machine_register_types(void)
+{
+ type_register_static(&core99_machine_info);
+}
+
+type_init(mac_machine_register_types)
diff --git a/hw/ppc/mac_oldworld.c b/hw/ppc/mac_oldworld.c
new file mode 100644
index 000000000..de2be960e
--- /dev/null
+++ b/hw/ppc/mac_oldworld.c
@@ -0,0 +1,455 @@
+
+/*
+ * QEMU OldWorld PowerMac (currently ~G3 Beige) hardware System Emulator
+ *
+ * Copyright (c) 2004-2007 Fabrice Bellard
+ * Copyright (c) 2007 Jocelyn Mayer
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/datadir.h"
+#include "qemu/units.h"
+#include "qapi/error.h"
+#include "hw/ppc/ppc.h"
+#include "hw/qdev-properties.h"
+#include "mac.h"
+#include "hw/input/adb.h"
+#include "sysemu/sysemu.h"
+#include "net/net.h"
+#include "hw/isa/isa.h"
+#include "hw/pci/pci.h"
+#include "hw/pci/pci_host.h"
+#include "hw/nvram/fw_cfg.h"
+#include "hw/char/escc.h"
+#include "hw/misc/macio/macio.h"
+#include "hw/loader.h"
+#include "hw/fw-path-provider.h"
+#include "elf.h"
+#include "qemu/error-report.h"
+#include "sysemu/kvm.h"
+#include "sysemu/reset.h"
+#include "kvm_ppc.h"
+
+#define MAX_IDE_BUS 2
+#define CFG_ADDR 0xf0000510
+#define TBFREQ 16600000UL
+#define CLOCKFREQ 266000000UL
+#define BUSFREQ 66000000UL
+
+#define NDRV_VGA_FILENAME "qemu_vga.ndrv"
+
+#define GRACKLE_BASE 0xfec00000
+#define PROM_BASE 0xffc00000
+#define PROM_SIZE (4 * MiB)
+
+static void fw_cfg_boot_set(void *opaque, const char *boot_device,
+ Error **errp)
+{
+ fw_cfg_modify_i16(opaque, FW_CFG_BOOT_DEVICE, boot_device[0]);
+}
+
+static uint64_t translate_kernel_address(void *opaque, uint64_t addr)
+{
+ return (addr & 0x0fffffff) + KERNEL_LOAD_ADDR;
+}
+
+static void ppc_heathrow_reset(void *opaque)
+{
+ PowerPCCPU *cpu = opaque;
+
+ cpu_reset(CPU(cpu));
+}
+
+static void ppc_heathrow_init(MachineState *machine)
+{
+ ram_addr_t ram_size = machine->ram_size;
+ const char *bios_name = machine->firmware ?: PROM_FILENAME;
+ const char *boot_device = machine->boot_order;
+ PowerPCCPU *cpu = NULL;
+ CPUPPCState *env = NULL;
+ char *filename;
+ int i;
+ MemoryRegion *bios = g_new(MemoryRegion, 1);
+ uint32_t kernel_base, initrd_base, cmdline_base = 0;
+ int32_t kernel_size, initrd_size;
+ PCIBus *pci_bus;
+ PCIDevice *macio;
+ MACIOIDEState *macio_ide;
+ ESCCState *escc;
+ SysBusDevice *s;
+ DeviceState *dev, *pic_dev, *grackle_dev;
+ BusState *adb_bus;
+ uint64_t bios_addr;
+ int bios_size;
+ unsigned int smp_cpus = machine->smp.cpus;
+ uint16_t ppc_boot_device;
+ DriveInfo *hd[MAX_IDE_BUS * MAX_IDE_DEVS];
+ void *fw_cfg;
+ uint64_t tbfreq;
+
+ /* init CPUs */
+ for (i = 0; i < smp_cpus; i++) {
+ cpu = POWERPC_CPU(cpu_create(machine->cpu_type));
+ env = &cpu->env;
+
+ /* Set time-base frequency to 16.6 Mhz */
+ cpu_ppc_tb_init(env, TBFREQ);
+ qemu_register_reset(ppc_heathrow_reset, cpu);
+ }
+
+ /* allocate RAM */
+ if (ram_size > 2047 * MiB) {
+ error_report("Too much memory for this machine: %" PRId64 " MB, "
+ "maximum 2047 MB", ram_size / MiB);
+ exit(1);
+ }
+
+ memory_region_add_subregion(get_system_memory(), 0, machine->ram);
+
+ /* allocate and load firmware ROM */
+ memory_region_init_rom(bios, NULL, "ppc_heathrow.bios", PROM_SIZE,
+ &error_fatal);
+ memory_region_add_subregion(get_system_memory(), PROM_BASE, bios);
+
+ filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
+ if (filename) {
+ /* Load OpenBIOS (ELF) */
+ bios_size = load_elf(filename, NULL, NULL, NULL, NULL, &bios_addr,
+ NULL, NULL, 1, PPC_ELF_MACHINE, 0, 0);
+ /* Unfortunately, load_elf sign-extends reading elf32 */
+ bios_addr = (uint32_t)bios_addr;
+
+ if (bios_size <= 0) {
+ /* or if could not load ELF try loading a binary ROM image */
+ bios_size = load_image_targphys(filename, PROM_BASE, PROM_SIZE);
+ bios_addr = PROM_BASE;
+ }
+ g_free(filename);
+ } else {
+ bios_size = -1;
+ }
+ if (bios_size < 0 || bios_addr - PROM_BASE + bios_size > PROM_SIZE) {
+ error_report("could not load PowerPC bios '%s'", bios_name);
+ exit(1);
+ }
+
+ if (machine->kernel_filename) {
+ int bswap_needed;
+
+#ifdef BSWAP_NEEDED
+ bswap_needed = 1;
+#else
+ bswap_needed = 0;
+#endif
+ kernel_base = KERNEL_LOAD_ADDR;
+ kernel_size = load_elf(machine->kernel_filename, NULL,
+ translate_kernel_address, NULL, NULL, NULL,
+ NULL, NULL, 1, PPC_ELF_MACHINE, 0, 0);
+ if (kernel_size < 0)
+ kernel_size = load_aout(machine->kernel_filename, kernel_base,
+ ram_size - kernel_base, bswap_needed,
+ TARGET_PAGE_SIZE);
+ if (kernel_size < 0)
+ kernel_size = load_image_targphys(machine->kernel_filename,
+ kernel_base,
+ ram_size - kernel_base);
+ if (kernel_size < 0) {
+ error_report("could not load kernel '%s'",
+ machine->kernel_filename);
+ exit(1);
+ }
+ /* load initrd */
+ if (machine->initrd_filename) {
+ initrd_base = TARGET_PAGE_ALIGN(kernel_base + kernel_size +
+ KERNEL_GAP);
+ initrd_size = load_image_targphys(machine->initrd_filename,
+ initrd_base,
+ ram_size - initrd_base);
+ if (initrd_size < 0) {
+ error_report("could not load initial ram disk '%s'",
+ machine->initrd_filename);
+ exit(1);
+ }
+ cmdline_base = TARGET_PAGE_ALIGN(initrd_base + initrd_size);
+ } else {
+ initrd_base = 0;
+ initrd_size = 0;
+ cmdline_base = TARGET_PAGE_ALIGN(kernel_base + kernel_size + KERNEL_GAP);
+ }
+ ppc_boot_device = 'm';
+ } else {
+ kernel_base = 0;
+ kernel_size = 0;
+ initrd_base = 0;
+ initrd_size = 0;
+ ppc_boot_device = '\0';
+ for (i = 0; boot_device[i] != '\0'; i++) {
+ /* TOFIX: for now, the second IDE channel is not properly
+ * used by OHW. The Mac floppy disk are not emulated.
+ * For now, OHW cannot boot from the network.
+ */
+#if 0
+ if (boot_device[i] >= 'a' && boot_device[i] <= 'f') {
+ ppc_boot_device = boot_device[i];
+ break;
+ }
+#else
+ if (boot_device[i] >= 'c' && boot_device[i] <= 'd') {
+ ppc_boot_device = boot_device[i];
+ break;
+ }
+#endif
+ }
+ if (ppc_boot_device == '\0') {
+ error_report("No valid boot device for G3 Beige machine");
+ exit(1);
+ }
+ }
+
+ /* Timebase Frequency */
+ if (kvm_enabled()) {
+ tbfreq = kvmppc_get_tbfreq();
+ } else {
+ tbfreq = TBFREQ;
+ }
+
+ /* Grackle PCI host bridge */
+ grackle_dev = qdev_new(TYPE_GRACKLE_PCI_HOST_BRIDGE);
+ qdev_prop_set_uint32(grackle_dev, "ofw-addr", 0x80000000);
+ s = SYS_BUS_DEVICE(grackle_dev);
+ sysbus_realize_and_unref(s, &error_fatal);
+
+ sysbus_mmio_map(s, 0, GRACKLE_BASE);
+ sysbus_mmio_map(s, 1, GRACKLE_BASE + 0x200000);
+ /* PCI hole */
+ memory_region_add_subregion(get_system_memory(), 0x80000000ULL,
+ sysbus_mmio_get_region(s, 2));
+ /* Register 2 MB of ISA IO space */
+ memory_region_add_subregion(get_system_memory(), 0xfe000000,
+ sysbus_mmio_get_region(s, 3));
+
+ pci_bus = PCI_HOST_BRIDGE(grackle_dev)->bus;
+
+ /* MacIO */
+ macio = pci_new(PCI_DEVFN(16, 0), TYPE_OLDWORLD_MACIO);
+ dev = DEVICE(macio);
+ qdev_prop_set_uint64(dev, "frequency", tbfreq);
+
+ escc = ESCC(object_resolve_path_component(OBJECT(macio), "escc"));
+ qdev_prop_set_chr(DEVICE(escc), "chrA", serial_hd(0));
+ qdev_prop_set_chr(DEVICE(escc), "chrB", serial_hd(1));
+
+ pci_realize_and_unref(macio, pci_bus, &error_fatal);
+
+ pic_dev = DEVICE(object_resolve_path_component(OBJECT(macio), "pic"));
+ for (i = 0; i < 4; i++) {
+ qdev_connect_gpio_out(grackle_dev, i,
+ qdev_get_gpio_in(pic_dev, 0x15 + i));
+ }
+
+ /* Connect the heathrow PIC outputs to the 6xx bus */
+ for (i = 0; i < smp_cpus; i++) {
+ switch (PPC_INPUT(env)) {
+ case PPC_FLAGS_INPUT_6xx:
+ /* XXX: we register only 1 output pin for heathrow PIC */
+ qdev_connect_gpio_out(pic_dev, 0,
+ ((qemu_irq *)env->irq_inputs)[PPC6xx_INPUT_INT]);
+ break;
+ default:
+ error_report("Bus model not supported on OldWorld Mac machine");
+ exit(1);
+ }
+ }
+
+ pci_vga_init(pci_bus);
+
+ for (i = 0; i < nb_nics; i++) {
+ pci_nic_init_nofail(&nd_table[i], pci_bus, "ne2k_pci", NULL);
+ }
+
+ /* MacIO IDE */
+ ide_drive_get(hd, ARRAY_SIZE(hd));
+ macio_ide = MACIO_IDE(object_resolve_path_component(OBJECT(macio),
+ "ide[0]"));
+ macio_ide_init_drives(macio_ide, hd);
+
+ macio_ide = MACIO_IDE(object_resolve_path_component(OBJECT(macio),
+ "ide[1]"));
+ macio_ide_init_drives(macio_ide, &hd[MAX_IDE_DEVS]);
+
+ /* MacIO CUDA/ADB */
+ dev = DEVICE(object_resolve_path_component(OBJECT(macio), "cuda"));
+ adb_bus = qdev_get_child_bus(dev, "adb.0");
+ dev = qdev_new(TYPE_ADB_KEYBOARD);
+ qdev_realize_and_unref(dev, adb_bus, &error_fatal);
+ dev = qdev_new(TYPE_ADB_MOUSE);
+ qdev_realize_and_unref(dev, adb_bus, &error_fatal);
+
+ if (machine_usb(machine)) {
+ pci_create_simple(pci_bus, -1, "pci-ohci");
+ }
+
+ if (graphic_depth != 15 && graphic_depth != 32 && graphic_depth != 8)
+ graphic_depth = 15;
+
+ /* No PCI init: the BIOS will do it */
+
+ dev = qdev_new(TYPE_FW_CFG_MEM);
+ fw_cfg = FW_CFG(dev);
+ qdev_prop_set_uint32(dev, "data_width", 1);
+ qdev_prop_set_bit(dev, "dma_enabled", false);
+ object_property_add_child(OBJECT(qdev_get_machine()), TYPE_FW_CFG,
+ OBJECT(fw_cfg));
+ s = SYS_BUS_DEVICE(dev);
+ sysbus_realize_and_unref(s, &error_fatal);
+ sysbus_mmio_map(s, 0, CFG_ADDR);
+ sysbus_mmio_map(s, 1, CFG_ADDR + 2);
+
+ fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, (uint16_t)smp_cpus);
+ fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)machine->smp.max_cpus);
+ fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size);
+ fw_cfg_add_i16(fw_cfg, FW_CFG_MACHINE_ID, ARCH_HEATHROW);
+ fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, kernel_base);
+ fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size);
+ if (machine->kernel_cmdline) {
+ fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_CMDLINE, cmdline_base);
+ pstrcpy_targphys("cmdline", cmdline_base, TARGET_PAGE_SIZE,
+ machine->kernel_cmdline);
+ } else {
+ fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_CMDLINE, 0);
+ }
+ fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_base);
+ fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size);
+ fw_cfg_add_i16(fw_cfg, FW_CFG_BOOT_DEVICE, ppc_boot_device);
+
+ fw_cfg_add_i16(fw_cfg, FW_CFG_PPC_WIDTH, graphic_width);
+ fw_cfg_add_i16(fw_cfg, FW_CFG_PPC_HEIGHT, graphic_height);
+ fw_cfg_add_i16(fw_cfg, FW_CFG_PPC_DEPTH, graphic_depth);
+
+ fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_IS_KVM, kvm_enabled());
+ if (kvm_enabled()) {
+ uint8_t *hypercall;
+
+ hypercall = g_malloc(16);
+ kvmppc_get_hypercall(env, hypercall, 16);
+ fw_cfg_add_bytes(fw_cfg, FW_CFG_PPC_KVM_HC, hypercall, 16);
+ fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_KVM_PID, getpid());
+ }
+ fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, tbfreq);
+ /* Mac OS X requires a "known good" clock-frequency value; pass it one. */
+ fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_CLOCKFREQ, CLOCKFREQ);
+ fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_BUSFREQ, BUSFREQ);
+
+ /* MacOS NDRV VGA driver */
+ filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, NDRV_VGA_FILENAME);
+ if (filename) {
+ gchar *ndrv_file;
+ gsize ndrv_size;
+
+ if (g_file_get_contents(filename, &ndrv_file, &ndrv_size, NULL)) {
+ fw_cfg_add_file(fw_cfg, "ndrv/qemu_vga.ndrv", ndrv_file, ndrv_size);
+ }
+ g_free(filename);
+ }
+
+ qemu_register_boot_set(fw_cfg_boot_set, fw_cfg);
+}
+
+/*
+ * Implementation of an interface to adjust firmware path
+ * for the bootindex property handling.
+ */
+static char *heathrow_fw_dev_path(FWPathProvider *p, BusState *bus,
+ DeviceState *dev)
+{
+ PCIDevice *pci;
+ MACIOIDEState *macio_ide;
+
+ if (!strcmp(object_get_typename(OBJECT(dev)), "macio-oldworld")) {
+ pci = PCI_DEVICE(dev);
+ return g_strdup_printf("mac-io@%x", PCI_SLOT(pci->devfn));
+ }
+
+ if (!strcmp(object_get_typename(OBJECT(dev)), "macio-ide")) {
+ macio_ide = MACIO_IDE(dev);
+ return g_strdup_printf("ata-3@%x", macio_ide->addr);
+ }
+
+ if (!strcmp(object_get_typename(OBJECT(dev)), "ide-hd")) {
+ return g_strdup("disk");
+ }
+
+ if (!strcmp(object_get_typename(OBJECT(dev)), "ide-cd")) {
+ return g_strdup("cdrom");
+ }
+
+ if (!strcmp(object_get_typename(OBJECT(dev)), "virtio-blk-device")) {
+ return g_strdup("disk");
+ }
+
+ return NULL;
+}
+
+static int heathrow_kvm_type(MachineState *machine, const char *arg)
+{
+ /* Always force PR KVM */
+ return 2;
+}
+
+static void heathrow_class_init(ObjectClass *oc, void *data)
+{
+ MachineClass *mc = MACHINE_CLASS(oc);
+ FWPathProviderClass *fwc = FW_PATH_PROVIDER_CLASS(oc);
+
+ mc->desc = "Heathrow based PowerMAC";
+ mc->init = ppc_heathrow_init;
+ mc->block_default_type = IF_IDE;
+ mc->max_cpus = MAX_CPUS;
+#ifndef TARGET_PPC64
+ mc->is_default = true;
+#endif
+ /* TOFIX "cad" when Mac floppy is implemented */
+ mc->default_boot_order = "cd";
+ mc->kvm_type = heathrow_kvm_type;
+ mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("750_v3.1");
+ mc->default_display = "std";
+ mc->ignore_boot_device_suffixes = true;
+ mc->default_ram_id = "ppc_heathrow.ram";
+ fwc->get_dev_path = heathrow_fw_dev_path;
+}
+
+static const TypeInfo ppc_heathrow_machine_info = {
+ .name = MACHINE_TYPE_NAME("g3beige"),
+ .parent = TYPE_MACHINE,
+ .class_init = heathrow_class_init,
+ .interfaces = (InterfaceInfo[]) {
+ { TYPE_FW_PATH_PROVIDER },
+ { }
+ },
+};
+
+static void ppc_heathrow_register_types(void)
+{
+ type_register_static(&ppc_heathrow_machine_info);
+}
+
+type_init(ppc_heathrow_register_types);
diff --git a/hw/ppc/meson.build b/hw/ppc/meson.build
new file mode 100644
index 000000000..aa4c8e6a2
--- /dev/null
+++ b/hw/ppc/meson.build
@@ -0,0 +1,90 @@
+ppc_ss = ss.source_set()
+ppc_ss.add(files(
+ 'ppc.c',
+ 'ppc_booke.c',
+))
+ppc_ss.add(when: 'CONFIG_FDT_PPC', if_true: [files(
+ 'fdt.c',
+), fdt])
+ppc_ss.add(when: 'CONFIG_FW_CFG_PPC', if_true: files('fw_cfg.c'))
+
+# IBM pSeries (sPAPR)
+ppc_ss.add(when: 'CONFIG_PSERIES', if_true: files(
+ 'spapr.c',
+ 'spapr_caps.c',
+ 'spapr_vio.c',
+ 'spapr_events.c',
+ 'spapr_hcall.c',
+ 'spapr_iommu.c',
+ 'spapr_rtas.c',
+ 'spapr_pci.c',
+ 'spapr_rtc.c',
+ 'spapr_drc.c',
+ 'spapr_cpu_core.c',
+ 'spapr_ovec.c',
+ 'spapr_irq.c',
+ 'spapr_tpm_proxy.c',
+ 'spapr_nvdimm.c',
+ 'spapr_rtas_ddw.c',
+ 'spapr_numa.c',
+ 'pef.c',
+))
+ppc_ss.add(when: ['CONFIG_PSERIES', 'CONFIG_TCG'], if_true: files(
+ 'spapr_softmmu.c',
+))
+ppc_ss.add(when: 'CONFIG_SPAPR_RNG', if_true: files('spapr_rng.c'))
+ppc_ss.add(when: ['CONFIG_PSERIES', 'CONFIG_LINUX'], if_true: files(
+ 'spapr_pci_vfio.c',
+ 'spapr_pci_nvlink2.c'
+))
+
+# IBM PowerNV
+ppc_ss.add(when: 'CONFIG_POWERNV', if_true: files(
+ 'pnv.c',
+ 'pnv_xscom.c',
+ 'pnv_core.c',
+ 'pnv_lpc.c',
+ 'pnv_psi.c',
+ 'pnv_occ.c',
+ 'pnv_bmc.c',
+ 'pnv_homer.c',
+ 'pnv_pnor.c',
+))
+# PowerPC 4xx boards
+ppc_ss.add(when: 'CONFIG_PPC405', if_true: files(
+ 'ppc405_boards.c',
+ 'ppc405_uc.c'))
+ppc_ss.add(when: 'CONFIG_PPC440', if_true: files(
+ 'ppc440_bamboo.c',
+ 'ppc440_pcix.c', 'ppc440_uc.c'))
+ppc_ss.add(when: 'CONFIG_PPC4XX', if_true: files(
+ 'ppc4xx_pci.c',
+ 'ppc4xx_devs.c'))
+ppc_ss.add(when: 'CONFIG_SAM460EX', if_true: files('sam460ex.c'))
+# PReP
+ppc_ss.add(when: 'CONFIG_PREP', if_true: files('prep.c'))
+ppc_ss.add(when: 'CONFIG_PREP', if_true: files('prep_systemio.c'))
+ppc_ss.add(when: 'CONFIG_RS6000_MC', if_true: files('rs6000_mc.c'))
+# OldWorld PowerMac
+ppc_ss.add(when: 'CONFIG_MAC_OLDWORLD', if_true: files('mac_oldworld.c'))
+# NewWorld PowerMac
+ppc_ss.add(when: 'CONFIG_MAC_NEWWORLD', if_true: files('mac_newworld.c'))
+# e500
+ppc_ss.add(when: 'CONFIG_E500', if_true: files(
+ 'e500.c',
+ 'mpc8544ds.c',
+ 'e500plat.c'
+))
+ppc_ss.add(when: 'CONFIG_E500', if_true: files(
+ 'mpc8544_guts.c',
+ 'ppce500_spin.c'
+))
+# PowerPC 440 Xilinx ML507 reference board.
+ppc_ss.add(when: 'CONFIG_VIRTEX', if_true: files('virtex_ml507.c'))
+# Pegasos2
+ppc_ss.add(when: 'CONFIG_PEGASOS2', if_true: files('pegasos2.c'))
+
+ppc_ss.add(when: 'CONFIG_VOF', if_true: files('vof.c'))
+ppc_ss.add(when: ['CONFIG_VOF', 'CONFIG_PSERIES'], if_true: files('spapr_vof.c'))
+
+hw_arch += {'ppc': ppc_ss}
diff --git a/hw/ppc/mpc8544_guts.c b/hw/ppc/mpc8544_guts.c
new file mode 100644
index 000000000..e8d2d51c2
--- /dev/null
+++ b/hw/ppc/mpc8544_guts.c
@@ -0,0 +1,142 @@
+/*
+ * QEMU PowerPC MPC8544 global util pseudo-device
+ *
+ * Copyright (C) 2011 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: Alexander Graf, <alex@csgraf.de>
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * *****************************************************************
+ *
+ * The documentation for this device is noted in the MPC8544 documentation,
+ * file name "MPC8544ERM.pdf". You can easily find it on the web.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/module.h"
+#include "sysemu/runstate.h"
+#include "cpu.h"
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define MPC8544_GUTS_MMIO_SIZE 0x1000
+#define MPC8544_GUTS_RSTCR_RESET 0x02
+
+#define MPC8544_GUTS_ADDR_PORPLLSR 0x00
+#define MPC8544_GUTS_ADDR_PORBMSR 0x04
+#define MPC8544_GUTS_ADDR_PORIMPSCR 0x08
+#define MPC8544_GUTS_ADDR_PORDEVSR 0x0C
+#define MPC8544_GUTS_ADDR_PORDBGMSR 0x10
+#define MPC8544_GUTS_ADDR_PORDEVSR2 0x14
+#define MPC8544_GUTS_ADDR_GPPORCR 0x20
+#define MPC8544_GUTS_ADDR_GPIOCR 0x30
+#define MPC8544_GUTS_ADDR_GPOUTDR 0x40
+#define MPC8544_GUTS_ADDR_GPINDR 0x50
+#define MPC8544_GUTS_ADDR_PMUXCR 0x60
+#define MPC8544_GUTS_ADDR_DEVDISR 0x70
+#define MPC8544_GUTS_ADDR_POWMGTCSR 0x80
+#define MPC8544_GUTS_ADDR_MCPSUMR 0x90
+#define MPC8544_GUTS_ADDR_RSTRSCR 0x94
+#define MPC8544_GUTS_ADDR_PVR 0xA0
+#define MPC8544_GUTS_ADDR_SVR 0xA4
+#define MPC8544_GUTS_ADDR_RSTCR 0xB0
+#define MPC8544_GUTS_ADDR_IOVSELSR 0xC0
+#define MPC8544_GUTS_ADDR_DDRCSR 0xB20
+#define MPC8544_GUTS_ADDR_DDRCDR 0xB24
+#define MPC8544_GUTS_ADDR_DDRCLKDR 0xB28
+#define MPC8544_GUTS_ADDR_CLKOCR 0xE00
+#define MPC8544_GUTS_ADDR_SRDS1CR1 0xF04
+#define MPC8544_GUTS_ADDR_SRDS2CR1 0xF10
+#define MPC8544_GUTS_ADDR_SRDS2CR3 0xF18
+
+#define TYPE_MPC8544_GUTS "mpc8544-guts"
+OBJECT_DECLARE_SIMPLE_TYPE(GutsState, MPC8544_GUTS)
+
+struct GutsState {
+ /*< private >*/
+ SysBusDevice parent_obj;
+ /*< public >*/
+
+ MemoryRegion iomem;
+};
+
+
+static uint64_t mpc8544_guts_read(void *opaque, hwaddr addr,
+ unsigned size)
+{
+ uint32_t value = 0;
+ PowerPCCPU *cpu = POWERPC_CPU(current_cpu);
+ CPUPPCState *env = &cpu->env;
+
+ addr &= MPC8544_GUTS_MMIO_SIZE - 1;
+ switch (addr) {
+ case MPC8544_GUTS_ADDR_PVR:
+ value = env->spr[SPR_PVR];
+ break;
+ case MPC8544_GUTS_ADDR_SVR:
+ value = env->spr[SPR_E500_SVR];
+ break;
+ default:
+ fprintf(stderr, "guts: Unknown register read: %x\n", (int)addr);
+ break;
+ }
+
+ return value;
+}
+
+static void mpc8544_guts_write(void *opaque, hwaddr addr,
+ uint64_t value, unsigned size)
+{
+ addr &= MPC8544_GUTS_MMIO_SIZE - 1;
+
+ switch (addr) {
+ case MPC8544_GUTS_ADDR_RSTCR:
+ if (value & MPC8544_GUTS_RSTCR_RESET) {
+ qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
+ }
+ break;
+ default:
+ fprintf(stderr, "guts: Unknown register write: %x = %x\n",
+ (int)addr, (unsigned)value);
+ break;
+ }
+}
+
+static const MemoryRegionOps mpc8544_guts_ops = {
+ .read = mpc8544_guts_read,
+ .write = mpc8544_guts_write,
+ .endianness = DEVICE_BIG_ENDIAN,
+ .valid = {
+ .min_access_size = 4,
+ .max_access_size = 4,
+ },
+};
+
+static void mpc8544_guts_initfn(Object *obj)
+{
+ SysBusDevice *d = SYS_BUS_DEVICE(obj);
+ GutsState *s = MPC8544_GUTS(obj);
+
+ memory_region_init_io(&s->iomem, OBJECT(s), &mpc8544_guts_ops, s,
+ "mpc8544.guts", MPC8544_GUTS_MMIO_SIZE);
+ sysbus_init_mmio(d, &s->iomem);
+}
+
+static const TypeInfo mpc8544_guts_info = {
+ .name = TYPE_MPC8544_GUTS,
+ .parent = TYPE_SYS_BUS_DEVICE,
+ .instance_size = sizeof(GutsState),
+ .instance_init = mpc8544_guts_initfn,
+};
+
+static void mpc8544_guts_register_types(void)
+{
+ type_register_static(&mpc8544_guts_info);
+}
+
+type_init(mpc8544_guts_register_types)
diff --git a/hw/ppc/mpc8544ds.c b/hw/ppc/mpc8544ds.c
new file mode 100644
index 000000000..81177505f
--- /dev/null
+++ b/hw/ppc/mpc8544ds.c
@@ -0,0 +1,74 @@
+/*
+ * Support for the PPC e500-based mpc8544ds board
+ *
+ * Copyright 2012 Freescale Semiconductor, Inc.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include "qemu/osdep.h"
+#include "e500.h"
+#include "sysemu/device_tree.h"
+#include "hw/ppc/openpic.h"
+#include "qemu/error-report.h"
+#include "cpu.h"
+
+static void mpc8544ds_fixup_devtree(void *fdt)
+{
+ const char model[] = "MPC8544DS";
+ const char compatible[] = "MPC8544DS\0MPC85xxDS";
+
+ qemu_fdt_setprop(fdt, "/", "model", model, sizeof(model));
+ qemu_fdt_setprop(fdt, "/", "compatible", compatible,
+ sizeof(compatible));
+}
+
+static void mpc8544ds_init(MachineState *machine)
+{
+ if (machine->ram_size > 0xc0000000) {
+ error_report("The MPC8544DS board only supports up to 3GB of RAM");
+ exit(1);
+ }
+
+ ppce500_init(machine);
+}
+
+static void e500plat_machine_class_init(ObjectClass *oc, void *data)
+{
+ MachineClass *mc = MACHINE_CLASS(oc);
+ PPCE500MachineClass *pmc = PPCE500_MACHINE_CLASS(oc);
+
+ pmc->pci_first_slot = 0x11;
+ pmc->pci_nr_slots = 2;
+ pmc->fixup_devtree = mpc8544ds_fixup_devtree;
+ pmc->mpic_version = OPENPIC_MODEL_FSL_MPIC_20;
+ pmc->ccsrbar_base = 0xE0000000ULL;
+ pmc->pci_mmio_base = 0xC0000000ULL;
+ pmc->pci_mmio_bus_base = 0xC0000000ULL;
+ pmc->pci_pio_base = 0xE1000000ULL;
+ pmc->spin_base = 0xEF000000ULL;
+
+ mc->desc = "mpc8544ds";
+ mc->init = mpc8544ds_init;
+ mc->max_cpus = 15;
+ mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("e500v2_v30");
+ mc->default_ram_id = "mpc8544ds.ram";
+}
+
+#define TYPE_MPC8544DS_MACHINE MACHINE_TYPE_NAME("mpc8544ds")
+
+static const TypeInfo mpc8544ds_info = {
+ .name = TYPE_MPC8544DS_MACHINE,
+ .parent = TYPE_PPCE500_MACHINE,
+ .class_init = e500plat_machine_class_init,
+};
+
+static void mpc8544ds_register_types(void)
+{
+ type_register_static(&mpc8544ds_info);
+}
+
+type_init(mpc8544ds_register_types)
diff --git a/hw/ppc/pef.c b/hw/ppc/pef.c
new file mode 100644
index 000000000..cc44d5e33
--- /dev/null
+++ b/hw/ppc/pef.c
@@ -0,0 +1,142 @@
+/*
+ * PEF (Protected Execution Facility) for POWER support
+ *
+ * Copyright Red Hat.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+
+#include "qapi/error.h"
+#include "qom/object_interfaces.h"
+#include "sysemu/kvm.h"
+#include "migration/blocker.h"
+#include "exec/confidential-guest-support.h"
+#include "hw/ppc/pef.h"
+
+#define TYPE_PEF_GUEST "pef-guest"
+OBJECT_DECLARE_SIMPLE_TYPE(PefGuest, PEF_GUEST)
+
+typedef struct PefGuest PefGuest;
+typedef struct PefGuestClass PefGuestClass;
+
+struct PefGuestClass {
+ ConfidentialGuestSupportClass parent_class;
+};
+
+/**
+ * PefGuest:
+ *
+ * The PefGuest object is used for creating and managing a PEF
+ * guest.
+ *
+ * # $QEMU \
+ * -object pef-guest,id=pef0 \
+ * -machine ...,confidential-guest-support=pef0
+ */
+struct PefGuest {
+ ConfidentialGuestSupport parent_obj;
+};
+
+static int kvmppc_svm_init(ConfidentialGuestSupport *cgs, Error **errp)
+{
+#ifdef CONFIG_KVM
+ static Error *pef_mig_blocker;
+
+ if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_SECURE_GUEST)) {
+ error_setg(errp,
+ "KVM implementation does not support Secure VMs (is an ultravisor running?)");
+ return -1;
+ } else {
+ int ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SECURE_GUEST, 0, 1);
+
+ if (ret < 0) {
+ error_setg(errp,
+ "Error enabling PEF with KVM");
+ return -1;
+ }
+ }
+
+ /* add migration blocker */
+ error_setg(&pef_mig_blocker, "PEF: Migration is not implemented");
+ /* NB: This can fail if --only-migratable is used */
+ migrate_add_blocker(pef_mig_blocker, &error_fatal);
+
+ cgs->ready = true;
+
+ return 0;
+#else
+ g_assert_not_reached();
+#endif
+}
+
+/*
+ * Don't set error if KVM_PPC_SVM_OFF ioctl is invoked on kernels
+ * that don't support this ioctl.
+ */
+static int kvmppc_svm_off(Error **errp)
+{
+#ifdef CONFIG_KVM
+ int rc;
+
+ rc = kvm_vm_ioctl(KVM_STATE(current_accel()), KVM_PPC_SVM_OFF);
+ if (rc && rc != -ENOTTY) {
+ error_setg_errno(errp, -rc, "KVM_PPC_SVM_OFF ioctl failed");
+ return rc;
+ }
+ return 0;
+#else
+ g_assert_not_reached();
+#endif
+}
+
+int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
+{
+ if (!object_dynamic_cast(OBJECT(cgs), TYPE_PEF_GUEST)) {
+ return 0;
+ }
+
+ if (!kvm_enabled()) {
+ error_setg(errp, "PEF requires KVM");
+ return -1;
+ }
+
+ return kvmppc_svm_init(cgs, errp);
+}
+
+int pef_kvm_reset(ConfidentialGuestSupport *cgs, Error **errp)
+{
+ if (!object_dynamic_cast(OBJECT(cgs), TYPE_PEF_GUEST)) {
+ return 0;
+ }
+
+ /*
+ * If we don't have KVM we should never have been able to
+ * initialize PEF, so we should never get this far
+ */
+ assert(kvm_enabled());
+
+ return kvmppc_svm_off(errp);
+}
+
+OBJECT_DEFINE_TYPE_WITH_INTERFACES(PefGuest,
+ pef_guest,
+ PEF_GUEST,
+ CONFIDENTIAL_GUEST_SUPPORT,
+ { TYPE_USER_CREATABLE },
+ { NULL })
+
+static void pef_guest_class_init(ObjectClass *oc, void *data)
+{
+}
+
+static void pef_guest_init(Object *obj)
+{
+}
+
+static void pef_guest_finalize(Object *obj)
+{
+}
diff --git a/hw/ppc/pegasos2.c b/hw/ppc/pegasos2.c
new file mode 100644
index 000000000..298e6b93e
--- /dev/null
+++ b/hw/ppc/pegasos2.c
@@ -0,0 +1,952 @@
+/*
+ * QEMU PowerPC CHRP (Genesi/bPlan Pegasos II) hardware System Emulator
+ *
+ * Copyright (c) 2018-2021 BALATON Zoltan
+ *
+ * This work is licensed under the GNU GPL license version 2 or later.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/units.h"
+#include "qapi/error.h"
+#include "hw/hw.h"
+#include "hw/ppc/ppc.h"
+#include "hw/sysbus.h"
+#include "hw/pci/pci_host.h"
+#include "hw/irq.h"
+#include "hw/pci-host/mv64361.h"
+#include "hw/isa/vt82c686.h"
+#include "hw/ide/pci.h"
+#include "hw/i2c/smbus_eeprom.h"
+#include "hw/qdev-properties.h"
+#include "sysemu/reset.h"
+#include "sysemu/runstate.h"
+#include "sysemu/qtest.h"
+#include "hw/boards.h"
+#include "hw/loader.h"
+#include "hw/fw-path-provider.h"
+#include "elf.h"
+#include "qemu/log.h"
+#include "qemu/error-report.h"
+#include "sysemu/kvm.h"
+#include "kvm_ppc.h"
+#include "exec/address-spaces.h"
+#include "qom/qom-qobject.h"
+#include "qapi/qmp/qdict.h"
+#include "trace.h"
+#include "qemu/datadir.h"
+#include "sysemu/device_tree.h"
+#include "hw/ppc/vof.h"
+
+#include <libfdt.h>
+
+#define PROM_FILENAME "vof.bin"
+#define PROM_ADDR 0xfff00000
+#define PROM_SIZE 0x80000
+
+#define KVMPPC_HCALL_BASE 0xf000
+#define KVMPPC_H_RTAS (KVMPPC_HCALL_BASE + 0x0)
+#define KVMPPC_H_VOF_CLIENT (KVMPPC_HCALL_BASE + 0x5)
+
+#define H_SUCCESS 0
+#define H_PRIVILEGE -3 /* Caller not privileged */
+#define H_PARAMETER -4 /* Parameter invalid, out-of-range or conflicting */
+
+#define BUS_FREQ_HZ 133333333
+
+#define PCI0_CFG_ADDR 0xcf8
+#define PCI0_MEM_BASE 0xc0000000
+#define PCI0_MEM_SIZE 0x20000000
+#define PCI0_IO_BASE 0xf8000000
+#define PCI0_IO_SIZE 0x10000
+
+#define PCI1_CFG_ADDR 0xc78
+#define PCI1_MEM_BASE 0x80000000
+#define PCI1_MEM_SIZE 0x40000000
+#define PCI1_IO_BASE 0xfe000000
+#define PCI1_IO_SIZE 0x10000
+
+#define TYPE_PEGASOS2_MACHINE MACHINE_TYPE_NAME("pegasos2")
+OBJECT_DECLARE_TYPE(Pegasos2MachineState, MachineClass, PEGASOS2_MACHINE)
+
+struct Pegasos2MachineState {
+ MachineState parent_obj;
+ PowerPCCPU *cpu;
+ DeviceState *mv;
+ Vof *vof;
+ void *fdt_blob;
+ uint64_t kernel_addr;
+ uint64_t kernel_entry;
+ uint64_t kernel_size;
+};
+
+static void *build_fdt(MachineState *machine, int *fdt_size);
+
+static void pegasos2_cpu_reset(void *opaque)
+{
+ PowerPCCPU *cpu = opaque;
+ Pegasos2MachineState *pm = PEGASOS2_MACHINE(current_machine);
+
+ cpu_reset(CPU(cpu));
+ cpu->env.spr[SPR_HID1] = 7ULL << 28;
+ if (pm->vof) {
+ cpu->env.gpr[1] = 2 * VOF_STACK_SIZE - 0x20;
+ cpu->env.nip = 0x100;
+ }
+}
+
+static void pegasos2_init(MachineState *machine)
+{
+ Pegasos2MachineState *pm = PEGASOS2_MACHINE(machine);
+ CPUPPCState *env;
+ MemoryRegion *rom = g_new(MemoryRegion, 1);
+ PCIBus *pci_bus;
+ PCIDevice *dev;
+ I2CBus *i2c_bus;
+ const char *fwname = machine->firmware ?: PROM_FILENAME;
+ char *filename;
+ int sz;
+ uint8_t *spd_data;
+
+ /* init CPU */
+ pm->cpu = POWERPC_CPU(cpu_create(machine->cpu_type));
+ env = &pm->cpu->env;
+ if (PPC_INPUT(env) != PPC_FLAGS_INPUT_6xx) {
+ error_report("Incompatible CPU, only 6xx bus supported");
+ exit(1);
+ }
+
+ /* Set time-base frequency */
+ cpu_ppc_tb_init(env, BUS_FREQ_HZ / 4);
+ qemu_register_reset(pegasos2_cpu_reset, pm->cpu);
+
+ /* RAM */
+ if (machine->ram_size > 2 * GiB) {
+ error_report("RAM size more than 2 GiB is not supported");
+ exit(1);
+ }
+ memory_region_add_subregion(get_system_memory(), 0, machine->ram);
+
+ /* allocate and load firmware */
+ filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, fwname);
+ if (!filename) {
+ error_report("Could not find firmware '%s'", fwname);
+ exit(1);
+ }
+ if (!machine->firmware && !pm->vof) {
+ pm->vof = g_malloc0(sizeof(*pm->vof));
+ }
+ memory_region_init_rom(rom, NULL, "pegasos2.rom", PROM_SIZE, &error_fatal);
+ memory_region_add_subregion(get_system_memory(), PROM_ADDR, rom);
+ sz = load_elf(filename, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 1,
+ PPC_ELF_MACHINE, 0, 0);
+ if (sz <= 0) {
+ sz = load_image_targphys(filename, pm->vof ? 0 : PROM_ADDR, PROM_SIZE);
+ }
+ if (sz <= 0 || sz > PROM_SIZE) {
+ error_report("Could not load firmware '%s'", filename);
+ exit(1);
+ }
+ g_free(filename);
+ if (pm->vof) {
+ pm->vof->fw_size = sz;
+ }
+
+ /* Marvell Discovery II system controller */
+ pm->mv = DEVICE(sysbus_create_simple(TYPE_MV64361, -1,
+ ((qemu_irq *)env->irq_inputs)[PPC6xx_INPUT_INT]));
+ pci_bus = mv64361_get_pci_bus(pm->mv, 1);
+
+ /* VIA VT8231 South Bridge (multifunction PCI device) */
+ /* VT8231 function 0: PCI-to-ISA Bridge */
+ dev = pci_create_simple_multifunction(pci_bus, PCI_DEVFN(12, 0), true,
+ TYPE_VT8231_ISA);
+ qdev_connect_gpio_out(DEVICE(dev), 0,
+ qdev_get_gpio_in_named(pm->mv, "gpp", 31));
+
+ /* VT8231 function 1: IDE Controller */
+ dev = pci_create_simple(pci_bus, PCI_DEVFN(12, 1), "via-ide");
+ pci_ide_create_devs(dev);
+
+ /* VT8231 function 2-3: USB Ports */
+ pci_create_simple(pci_bus, PCI_DEVFN(12, 2), "vt82c686b-usb-uhci");
+ pci_create_simple(pci_bus, PCI_DEVFN(12, 3), "vt82c686b-usb-uhci");
+
+ /* VT8231 function 4: Power Management Controller */
+ dev = pci_create_simple(pci_bus, PCI_DEVFN(12, 4), TYPE_VT8231_PM);
+ i2c_bus = I2C_BUS(qdev_get_child_bus(DEVICE(dev), "i2c"));
+ spd_data = spd_data_generate(DDR, machine->ram_size);
+ smbus_eeprom_init_one(i2c_bus, 0x57, spd_data);
+
+ /* VT8231 function 5-6: AC97 Audio & Modem */
+ pci_create_simple(pci_bus, PCI_DEVFN(12, 5), TYPE_VIA_AC97);
+ pci_create_simple(pci_bus, PCI_DEVFN(12, 6), TYPE_VIA_MC97);
+
+ /* other PC hardware */
+ pci_vga_init(pci_bus);
+
+ if (machine->kernel_filename) {
+ sz = load_elf(machine->kernel_filename, NULL, NULL, NULL,
+ &pm->kernel_entry, &pm->kernel_addr, NULL, NULL, 1,
+ PPC_ELF_MACHINE, 0, 0);
+ if (sz <= 0) {
+ error_report("Could not load kernel '%s'",
+ machine->kernel_filename);
+ exit(1);
+ }
+ pm->kernel_size = sz;
+ if (!pm->vof) {
+ warn_report("Option -kernel may be ineffective with -bios.");
+ }
+ } else if (pm->vof && !qtest_enabled()) {
+ warn_report("Using Virtual OpenFirmware but no -kernel option.");
+ }
+
+ if (!pm->vof && machine->kernel_cmdline && machine->kernel_cmdline[0]) {
+ warn_report("Option -append may be ineffective with -bios.");
+ }
+}
+
+static uint32_t pegasos2_mv_reg_read(Pegasos2MachineState *pm,
+ uint32_t addr, uint32_t len)
+{
+ MemoryRegion *r = sysbus_mmio_get_region(SYS_BUS_DEVICE(pm->mv), 0);
+ uint64_t val = 0xffffffffULL;
+ memory_region_dispatch_read(r, addr, &val, size_memop(len) | MO_LE,
+ MEMTXATTRS_UNSPECIFIED);
+ return val;
+}
+
+static void pegasos2_mv_reg_write(Pegasos2MachineState *pm, uint32_t addr,
+ uint32_t len, uint32_t val)
+{
+ MemoryRegion *r = sysbus_mmio_get_region(SYS_BUS_DEVICE(pm->mv), 0);
+ memory_region_dispatch_write(r, addr, val, size_memop(len) | MO_LE,
+ MEMTXATTRS_UNSPECIFIED);
+}
+
+static uint32_t pegasos2_pci_config_read(Pegasos2MachineState *pm, int bus,
+ uint32_t addr, uint32_t len)
+{
+ hwaddr pcicfg = bus ? PCI1_CFG_ADDR : PCI0_CFG_ADDR;
+ uint64_t val = 0xffffffffULL;
+
+ if (len <= 4) {
+ pegasos2_mv_reg_write(pm, pcicfg, 4, addr | BIT(31));
+ val = pegasos2_mv_reg_read(pm, pcicfg + 4, len);
+ }
+ return val;
+}
+
+static void pegasos2_pci_config_write(Pegasos2MachineState *pm, int bus,
+ uint32_t addr, uint32_t len, uint32_t val)
+{
+ hwaddr pcicfg = bus ? PCI1_CFG_ADDR : PCI0_CFG_ADDR;
+
+ pegasos2_mv_reg_write(pm, pcicfg, 4, addr | BIT(31));
+ pegasos2_mv_reg_write(pm, pcicfg + 4, len, val);
+}
+
+static void pegasos2_machine_reset(MachineState *machine)
+{
+ Pegasos2MachineState *pm = PEGASOS2_MACHINE(machine);
+ void *fdt;
+ uint64_t d[2];
+ int sz;
+
+ qemu_devices_reset();
+ if (!pm->vof) {
+ return; /* Firmware should set up machine so nothing to do */
+ }
+
+ /* Otherwise, set up devices that board firmware would normally do */
+ pegasos2_mv_reg_write(pm, 0, 4, 0x28020ff);
+ pegasos2_mv_reg_write(pm, 0x278, 4, 0xa31fc);
+ pegasos2_mv_reg_write(pm, 0xf300, 4, 0x11ff0400);
+ pegasos2_mv_reg_write(pm, 0xf10c, 4, 0x80000000);
+ pegasos2_mv_reg_write(pm, 0x1c, 4, 0x8000000);
+ pegasos2_pci_config_write(pm, 0, PCI_COMMAND, 2, PCI_COMMAND_IO |
+ PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
+ pegasos2_pci_config_write(pm, 1, PCI_COMMAND, 2, PCI_COMMAND_IO |
+ PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
+
+ pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 0) << 8) |
+ PCI_INTERRUPT_LINE, 2, 0x9);
+ pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 0) << 8) |
+ 0x50, 1, 0x2);
+
+ pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 1) << 8) |
+ PCI_INTERRUPT_LINE, 2, 0x109);
+ pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 1) << 8) |
+ PCI_CLASS_PROG, 1, 0xf);
+ pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 1) << 8) |
+ 0x40, 1, 0xb);
+ pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 1) << 8) |
+ 0x50, 4, 0x17171717);
+ pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 1) << 8) |
+ PCI_COMMAND, 2, 0x87);
+
+ pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 2) << 8) |
+ PCI_INTERRUPT_LINE, 2, 0x409);
+
+ pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 3) << 8) |
+ PCI_INTERRUPT_LINE, 2, 0x409);
+
+ pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 4) << 8) |
+ PCI_INTERRUPT_LINE, 2, 0x9);
+ pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 4) << 8) |
+ 0x48, 4, 0xf00);
+ pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 4) << 8) |
+ 0x40, 4, 0x558020);
+ pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 4) << 8) |
+ 0x90, 4, 0xd00);
+
+ pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 5) << 8) |
+ PCI_INTERRUPT_LINE, 2, 0x309);
+
+ pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 6) << 8) |
+ PCI_INTERRUPT_LINE, 2, 0x309);
+
+ /* Device tree and VOF set up */
+ vof_init(pm->vof, machine->ram_size, &error_fatal);
+ if (vof_claim(pm->vof, 0, VOF_STACK_SIZE, VOF_STACK_SIZE) == -1) {
+ error_report("Memory allocation for stack failed");
+ exit(1);
+ }
+ if (pm->kernel_size &&
+ vof_claim(pm->vof, pm->kernel_addr, pm->kernel_size, 0) == -1) {
+ error_report("Memory for kernel is in use");
+ exit(1);
+ }
+ fdt = build_fdt(machine, &sz);
+ /* FIXME: VOF assumes entry is same as load address */
+ d[0] = cpu_to_be64(pm->kernel_entry);
+ d[1] = cpu_to_be64(pm->kernel_size - (pm->kernel_entry - pm->kernel_addr));
+ qemu_fdt_setprop(fdt, "/chosen", "qemu,boot-kernel", d, sizeof(d));
+
+ qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt));
+ g_free(pm->fdt_blob);
+ pm->fdt_blob = fdt;
+
+ vof_build_dt(fdt, pm->vof);
+ vof_client_open_store(fdt, pm->vof, "/chosen", "stdout", "/failsafe");
+ pm->cpu->vhyp = PPC_VIRTUAL_HYPERVISOR(machine);
+}
+
+enum pegasos2_rtas_tokens {
+ RTAS_RESTART_RTAS = 0,
+ RTAS_NVRAM_FETCH = 1,
+ RTAS_NVRAM_STORE = 2,
+ RTAS_GET_TIME_OF_DAY = 3,
+ RTAS_SET_TIME_OF_DAY = 4,
+ RTAS_EVENT_SCAN = 6,
+ RTAS_CHECK_EXCEPTION = 7,
+ RTAS_READ_PCI_CONFIG = 8,
+ RTAS_WRITE_PCI_CONFIG = 9,
+ RTAS_DISPLAY_CHARACTER = 10,
+ RTAS_SET_INDICATOR = 11,
+ RTAS_POWER_OFF = 17,
+ RTAS_SUSPEND = 18,
+ RTAS_HIBERNATE = 19,
+ RTAS_SYSTEM_REBOOT = 20,
+};
+
+static target_ulong pegasos2_rtas(PowerPCCPU *cpu, Pegasos2MachineState *pm,
+ target_ulong args_real)
+{
+ AddressSpace *as = CPU(cpu)->as;
+ uint32_t token = ldl_be_phys(as, args_real);
+ uint32_t nargs = ldl_be_phys(as, args_real + 4);
+ uint32_t nrets = ldl_be_phys(as, args_real + 8);
+ uint32_t args = args_real + 12;
+ uint32_t rets = args_real + 12 + nargs * 4;
+
+ if (nrets < 1) {
+ qemu_log_mask(LOG_GUEST_ERROR, "Too few return values in RTAS call\n");
+ return H_PARAMETER;
+ }
+ switch (token) {
+ case RTAS_GET_TIME_OF_DAY:
+ {
+ QObject *qo = object_property_get_qobject(qdev_get_machine(),
+ "rtc-time", &error_fatal);
+ QDict *qd = qobject_to(QDict, qo);
+
+ if (nargs != 0 || nrets != 8 || !qd) {
+ stl_be_phys(as, rets, -1);
+ qobject_unref(qo);
+ return H_PARAMETER;
+ }
+
+ stl_be_phys(as, rets, 0);
+ stl_be_phys(as, rets + 4, qdict_get_int(qd, "tm_year") + 1900);
+ stl_be_phys(as, rets + 8, qdict_get_int(qd, "tm_mon") + 1);
+ stl_be_phys(as, rets + 12, qdict_get_int(qd, "tm_mday"));
+ stl_be_phys(as, rets + 16, qdict_get_int(qd, "tm_hour"));
+ stl_be_phys(as, rets + 20, qdict_get_int(qd, "tm_min"));
+ stl_be_phys(as, rets + 24, qdict_get_int(qd, "tm_sec"));
+ stl_be_phys(as, rets + 28, 0);
+ qobject_unref(qo);
+ return H_SUCCESS;
+ }
+ case RTAS_READ_PCI_CONFIG:
+ {
+ uint32_t addr, len, val;
+
+ if (nargs != 2 || nrets != 2) {
+ stl_be_phys(as, rets, -1);
+ return H_PARAMETER;
+ }
+ addr = ldl_be_phys(as, args);
+ len = ldl_be_phys(as, args + 4);
+ val = pegasos2_pci_config_read(pm, !(addr >> 24),
+ addr & 0x0fffffff, len);
+ stl_be_phys(as, rets, 0);
+ stl_be_phys(as, rets + 4, val);
+ return H_SUCCESS;
+ }
+ case RTAS_WRITE_PCI_CONFIG:
+ {
+ uint32_t addr, len, val;
+
+ if (nargs != 3 || nrets != 1) {
+ stl_be_phys(as, rets, -1);
+ return H_PARAMETER;
+ }
+ addr = ldl_be_phys(as, args);
+ len = ldl_be_phys(as, args + 4);
+ val = ldl_be_phys(as, args + 8);
+ pegasos2_pci_config_write(pm, !(addr >> 24),
+ addr & 0x0fffffff, len, val);
+ stl_be_phys(as, rets, 0);
+ return H_SUCCESS;
+ }
+ case RTAS_DISPLAY_CHARACTER:
+ if (nargs != 1 || nrets != 1) {
+ stl_be_phys(as, rets, -1);
+ return H_PARAMETER;
+ }
+ qemu_log_mask(LOG_UNIMP, "%c", ldl_be_phys(as, args));
+ stl_be_phys(as, rets, 0);
+ return H_SUCCESS;
+ case RTAS_POWER_OFF:
+ {
+ if (nargs != 2 || nrets != 1) {
+ stl_be_phys(as, rets, -1);
+ return H_PARAMETER;
+ }
+ qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
+ stl_be_phys(as, rets, 0);
+ return H_SUCCESS;
+ }
+ default:
+ qemu_log_mask(LOG_UNIMP, "Unknown RTAS token %u (args=%u, rets=%u)\n",
+ token, nargs, nrets);
+ stl_be_phys(as, rets, 0);
+ return H_SUCCESS;
+ }
+}
+
+static void pegasos2_hypercall(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu)
+{
+ Pegasos2MachineState *pm = PEGASOS2_MACHINE(vhyp);
+ CPUPPCState *env = &cpu->env;
+
+ /* The TCG path should also be holding the BQL at this point */
+ g_assert(qemu_mutex_iothread_locked());
+
+ if (msr_pr) {
+ qemu_log_mask(LOG_GUEST_ERROR, "Hypercall made with MSR[PR]=1\n");
+ env->gpr[3] = H_PRIVILEGE;
+ } else if (env->gpr[3] == KVMPPC_H_RTAS) {
+ env->gpr[3] = pegasos2_rtas(cpu, pm, env->gpr[4]);
+ } else if (env->gpr[3] == KVMPPC_H_VOF_CLIENT) {
+ int ret = vof_client_call(MACHINE(pm), pm->vof, pm->fdt_blob,
+ env->gpr[4]);
+ env->gpr[3] = (ret ? H_PARAMETER : H_SUCCESS);
+ } else {
+ qemu_log_mask(LOG_GUEST_ERROR, "Unsupported hypercall " TARGET_FMT_lx
+ "\n", env->gpr[3]);
+ env->gpr[3] = -1;
+ }
+}
+
+static void vhyp_nop(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu)
+{
+}
+
+static target_ulong vhyp_encode_hpt_for_kvm_pr(PPCVirtualHypervisor *vhyp)
+{
+ return POWERPC_CPU(current_cpu)->env.spr[SPR_SDR1];
+}
+
+static bool pegasos2_setprop(MachineState *ms, const char *path,
+ const char *propname, void *val, int vallen)
+{
+ return true;
+}
+
+static void pegasos2_machine_class_init(ObjectClass *oc, void *data)
+{
+ MachineClass *mc = MACHINE_CLASS(oc);
+ PPCVirtualHypervisorClass *vhc = PPC_VIRTUAL_HYPERVISOR_CLASS(oc);
+ VofMachineIfClass *vmc = VOF_MACHINE_CLASS(oc);
+
+ mc->desc = "Genesi/bPlan Pegasos II";
+ mc->init = pegasos2_init;
+ mc->reset = pegasos2_machine_reset;
+ mc->block_default_type = IF_IDE;
+ mc->default_boot_order = "cd";
+ mc->default_display = "std";
+ mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("7400_v2.9");
+ mc->default_ram_id = "pegasos2.ram";
+ mc->default_ram_size = 512 * MiB;
+
+ vhc->hypercall = pegasos2_hypercall;
+ vhc->cpu_exec_enter = vhyp_nop;
+ vhc->cpu_exec_exit = vhyp_nop;
+ vhc->encode_hpt_for_kvm_pr = vhyp_encode_hpt_for_kvm_pr;
+
+ vmc->setprop = pegasos2_setprop;
+}
+
+static const TypeInfo pegasos2_machine_info = {
+ .name = TYPE_PEGASOS2_MACHINE,
+ .parent = TYPE_MACHINE,
+ .class_init = pegasos2_machine_class_init,
+ .instance_size = sizeof(Pegasos2MachineState),
+ .interfaces = (InterfaceInfo[]) {
+ { TYPE_PPC_VIRTUAL_HYPERVISOR },
+ { TYPE_VOF_MACHINE_IF },
+ { }
+ },
+};
+
+static void pegasos2_machine_register_types(void)
+{
+ type_register_static(&pegasos2_machine_info);
+}
+
+type_init(pegasos2_machine_register_types)
+
+/* FDT creation for passing to firmware */
+
+typedef struct {
+ void *fdt;
+ const char *path;
+} FDTInfo;
+
+/* We do everything in reverse order so it comes out right in the tree */
+
+static void dt_ide(PCIBus *bus, PCIDevice *d, FDTInfo *fi)
+{
+ qemu_fdt_setprop_string(fi->fdt, fi->path, "device_type", "spi");
+}
+
+static void dt_usb(PCIBus *bus, PCIDevice *d, FDTInfo *fi)
+{
+ qemu_fdt_setprop_cell(fi->fdt, fi->path, "#size-cells", 0);
+ qemu_fdt_setprop_cell(fi->fdt, fi->path, "#address-cells", 1);
+ qemu_fdt_setprop_string(fi->fdt, fi->path, "device_type", "usb");
+}
+
+static void dt_isa(PCIBus *bus, PCIDevice *d, FDTInfo *fi)
+{
+ GString *name = g_string_sized_new(64);
+ uint32_t cells[3];
+
+ qemu_fdt_setprop_cell(fi->fdt, fi->path, "#size-cells", 1);
+ qemu_fdt_setprop_cell(fi->fdt, fi->path, "#address-cells", 2);
+ qemu_fdt_setprop_string(fi->fdt, fi->path, "device_type", "isa");
+ qemu_fdt_setprop_string(fi->fdt, fi->path, "name", "isa");
+
+ /* addional devices */
+ g_string_printf(name, "%s/lpt@i3bc", fi->path);
+ qemu_fdt_add_subnode(fi->fdt, name->str);
+ qemu_fdt_setprop_cell(fi->fdt, name->str, "clock-frequency", 0);
+ cells[0] = cpu_to_be32(7);
+ cells[1] = 0;
+ qemu_fdt_setprop(fi->fdt, name->str, "interrupts",
+ cells, 2 * sizeof(cells[0]));
+ cells[0] = cpu_to_be32(1);
+ cells[1] = cpu_to_be32(0x3bc);
+ cells[2] = cpu_to_be32(8);
+ qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0]));
+ qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "lpt");
+ qemu_fdt_setprop_string(fi->fdt, name->str, "name", "lpt");
+
+ g_string_printf(name, "%s/fdc@i3f0", fi->path);
+ qemu_fdt_add_subnode(fi->fdt, name->str);
+ qemu_fdt_setprop_cell(fi->fdt, name->str, "clock-frequency", 0);
+ cells[0] = cpu_to_be32(6);
+ cells[1] = 0;
+ qemu_fdt_setprop(fi->fdt, name->str, "interrupts",
+ cells, 2 * sizeof(cells[0]));
+ cells[0] = cpu_to_be32(1);
+ cells[1] = cpu_to_be32(0x3f0);
+ cells[2] = cpu_to_be32(8);
+ qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0]));
+ qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "fdc");
+ qemu_fdt_setprop_string(fi->fdt, name->str, "name", "fdc");
+
+ g_string_printf(name, "%s/timer@i40", fi->path);
+ qemu_fdt_add_subnode(fi->fdt, name->str);
+ qemu_fdt_setprop_cell(fi->fdt, name->str, "clock-frequency", 0);
+ cells[0] = cpu_to_be32(1);
+ cells[1] = cpu_to_be32(0x40);
+ cells[2] = cpu_to_be32(8);
+ qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0]));
+ qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "timer");
+ qemu_fdt_setprop_string(fi->fdt, name->str, "name", "timer");
+
+ g_string_printf(name, "%s/rtc@i70", fi->path);
+ qemu_fdt_add_subnode(fi->fdt, name->str);
+ qemu_fdt_setprop_string(fi->fdt, name->str, "compatible", "ds1385-rtc");
+ qemu_fdt_setprop_cell(fi->fdt, name->str, "clock-frequency", 0);
+ cells[0] = cpu_to_be32(8);
+ cells[1] = 0;
+ qemu_fdt_setprop(fi->fdt, name->str, "interrupts",
+ cells, 2 * sizeof(cells[0]));
+ cells[0] = cpu_to_be32(1);
+ cells[1] = cpu_to_be32(0x70);
+ cells[2] = cpu_to_be32(2);
+ qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0]));
+ qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "rtc");
+ qemu_fdt_setprop_string(fi->fdt, name->str, "name", "rtc");
+
+ g_string_printf(name, "%s/keyboard@i60", fi->path);
+ qemu_fdt_add_subnode(fi->fdt, name->str);
+ cells[0] = cpu_to_be32(1);
+ cells[1] = 0;
+ qemu_fdt_setprop(fi->fdt, name->str, "interrupts",
+ cells, 2 * sizeof(cells[0]));
+ cells[0] = cpu_to_be32(1);
+ cells[1] = cpu_to_be32(0x60);
+ cells[2] = cpu_to_be32(5);
+ qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0]));
+ qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "keyboard");
+ qemu_fdt_setprop_string(fi->fdt, name->str, "name", "keyboard");
+
+ g_string_printf(name, "%s/8042@i60", fi->path);
+ qemu_fdt_add_subnode(fi->fdt, name->str);
+ qemu_fdt_setprop_cell(fi->fdt, name->str, "#interrupt-cells", 2);
+ qemu_fdt_setprop_cell(fi->fdt, name->str, "#size-cells", 0);
+ qemu_fdt_setprop_cell(fi->fdt, name->str, "#address-cells", 1);
+ qemu_fdt_setprop_string(fi->fdt, name->str, "interrupt-controller", "");
+ qemu_fdt_setprop_cell(fi->fdt, name->str, "clock-frequency", 0);
+ cells[0] = cpu_to_be32(1);
+ cells[1] = cpu_to_be32(0x60);
+ cells[2] = cpu_to_be32(5);
+ qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0]));
+ qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "");
+ qemu_fdt_setprop_string(fi->fdt, name->str, "name", "8042");
+
+ g_string_printf(name, "%s/serial@i2f8", fi->path);
+ qemu_fdt_add_subnode(fi->fdt, name->str);
+ qemu_fdt_setprop_cell(fi->fdt, name->str, "clock-frequency", 0);
+ cells[0] = cpu_to_be32(3);
+ cells[1] = 0;
+ qemu_fdt_setprop(fi->fdt, name->str, "interrupts",
+ cells, 2 * sizeof(cells[0]));
+ cells[0] = cpu_to_be32(1);
+ cells[1] = cpu_to_be32(0x2f8);
+ cells[2] = cpu_to_be32(8);
+ qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0]));
+ qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "serial");
+ qemu_fdt_setprop_string(fi->fdt, name->str, "name", "serial");
+
+ g_string_free(name, TRUE);
+}
+
+static struct {
+ const char *id;
+ const char *name;
+ void (*dtf)(PCIBus *bus, PCIDevice *d, FDTInfo *fi);
+} device_map[] = {
+ { "pci11ab,6460", "host", NULL },
+ { "pci1106,8231", "isa", dt_isa },
+ { "pci1106,571", "ide", dt_ide },
+ { "pci1106,3044", "firewire", NULL },
+ { "pci1106,3038", "usb", dt_usb },
+ { "pci1106,8235", "other", NULL },
+ { "pci1106,3058", "sound", NULL },
+ { NULL, NULL }
+};
+
+static void add_pci_device(PCIBus *bus, PCIDevice *d, void *opaque)
+{
+ FDTInfo *fi = opaque;
+ GString *node = g_string_new(NULL);
+ uint32_t cells[(PCI_NUM_REGIONS + 1) * 5];
+ int i, j;
+ const char *name = NULL;
+ g_autofree const gchar *pn = g_strdup_printf("pci%x,%x",
+ pci_get_word(&d->config[PCI_VENDOR_ID]),
+ pci_get_word(&d->config[PCI_DEVICE_ID]));
+
+ for (i = 0; device_map[i].id; i++) {
+ if (!strcmp(pn, device_map[i].id)) {
+ name = device_map[i].name;
+ break;
+ }
+ }
+ g_string_printf(node, "%s/%s@%x", fi->path, (name ?: pn),
+ PCI_SLOT(d->devfn));
+ if (PCI_FUNC(d->devfn)) {
+ g_string_append_printf(node, ",%x", PCI_FUNC(d->devfn));
+ }
+
+ qemu_fdt_add_subnode(fi->fdt, node->str);
+ if (device_map[i].dtf) {
+ FDTInfo cfi = { fi->fdt, node->str };
+ device_map[i].dtf(bus, d, &cfi);
+ }
+ cells[0] = cpu_to_be32(d->devfn << 8);
+ cells[1] = 0;
+ cells[2] = 0;
+ cells[3] = 0;
+ cells[4] = 0;
+ j = 5;
+ for (i = 0; i < PCI_NUM_REGIONS; i++) {
+ if (!d->io_regions[i].size) {
+ continue;
+ }
+ cells[j] = cpu_to_be32(d->devfn << 8 | (PCI_BASE_ADDRESS_0 + i * 4));
+ if (d->io_regions[i].type & PCI_BASE_ADDRESS_SPACE_IO) {
+ cells[j] |= cpu_to_be32(1 << 24);
+ } else {
+ cells[j] |= cpu_to_be32(2 << 24);
+ if (d->io_regions[i].type & PCI_BASE_ADDRESS_MEM_PREFETCH) {
+ cells[j] |= cpu_to_be32(4 << 28);
+ }
+ }
+ cells[j + 1] = 0;
+ cells[j + 2] = 0;
+ cells[j + 3] = cpu_to_be32(d->io_regions[i].size >> 32);
+ cells[j + 4] = cpu_to_be32(d->io_regions[i].size);
+ j += 5;
+ }
+ qemu_fdt_setprop(fi->fdt, node->str, "reg", cells, j * sizeof(cells[0]));
+ qemu_fdt_setprop_string(fi->fdt, node->str, "name", name ?: pn);
+ if (pci_get_byte(&d->config[PCI_INTERRUPT_PIN])) {
+ qemu_fdt_setprop_cell(fi->fdt, node->str, "interrupts",
+ pci_get_byte(&d->config[PCI_INTERRUPT_PIN]));
+ }
+ /* Pegasos2 firmware has subsystem-id amd subsystem-vendor-id swapped */
+ qemu_fdt_setprop_cell(fi->fdt, node->str, "subsystem-vendor-id",
+ pci_get_word(&d->config[PCI_SUBSYSTEM_ID]));
+ qemu_fdt_setprop_cell(fi->fdt, node->str, "subsystem-id",
+ pci_get_word(&d->config[PCI_SUBSYSTEM_VENDOR_ID]));
+ cells[0] = pci_get_long(&d->config[PCI_CLASS_REVISION]);
+ qemu_fdt_setprop_cell(fi->fdt, node->str, "class-code", cells[0] >> 8);
+ qemu_fdt_setprop_cell(fi->fdt, node->str, "revision-id", cells[0] & 0xff);
+ qemu_fdt_setprop_cell(fi->fdt, node->str, "device-id",
+ pci_get_word(&d->config[PCI_DEVICE_ID]));
+ qemu_fdt_setprop_cell(fi->fdt, node->str, "vendor-id",
+ pci_get_word(&d->config[PCI_VENDOR_ID]));
+
+ g_string_free(node, TRUE);
+}
+
+static void *build_fdt(MachineState *machine, int *fdt_size)
+{
+ Pegasos2MachineState *pm = PEGASOS2_MACHINE(machine);
+ PowerPCCPU *cpu = pm->cpu;
+ PCIBus *pci_bus;
+ FDTInfo fi;
+ uint32_t cells[16];
+ void *fdt = create_device_tree(fdt_size);
+
+ fi.fdt = fdt;
+
+ /* root node */
+ qemu_fdt_setprop_string(fdt, "/", "CODEGEN,description",
+ "Pegasos CHRP PowerPC System");
+ qemu_fdt_setprop_string(fdt, "/", "CODEGEN,board", "Pegasos2");
+ qemu_fdt_setprop_string(fdt, "/", "CODEGEN,vendor", "bplan GmbH");
+ qemu_fdt_setprop_string(fdt, "/", "revision", "2B");
+ qemu_fdt_setprop_string(fdt, "/", "model", "Pegasos2");
+ qemu_fdt_setprop_string(fdt, "/", "device_type", "chrp");
+ qemu_fdt_setprop_cell(fdt, "/", "#address-cells", 1);
+ qemu_fdt_setprop_string(fdt, "/", "name", "bplan,Pegasos2");
+
+ /* pci@c0000000 */
+ qemu_fdt_add_subnode(fdt, "/pci@c0000000");
+ cells[0] = 0;
+ cells[1] = 0;
+ qemu_fdt_setprop(fdt, "/pci@c0000000", "bus-range",
+ cells, 2 * sizeof(cells[0]));
+ qemu_fdt_setprop_cell(fdt, "/pci@c0000000", "pci-bridge-number", 1);
+ cells[0] = cpu_to_be32(PCI0_MEM_BASE);
+ cells[1] = cpu_to_be32(PCI0_MEM_SIZE);
+ qemu_fdt_setprop(fdt, "/pci@c0000000", "reg", cells, 2 * sizeof(cells[0]));
+ cells[0] = cpu_to_be32(0x01000000);
+ cells[1] = 0;
+ cells[2] = 0;
+ cells[3] = cpu_to_be32(PCI0_IO_BASE);
+ cells[4] = 0;
+ cells[5] = cpu_to_be32(PCI0_IO_SIZE);
+ cells[6] = cpu_to_be32(0x02000000);
+ cells[7] = 0;
+ cells[8] = cpu_to_be32(PCI0_MEM_BASE);
+ cells[9] = cpu_to_be32(PCI0_MEM_BASE);
+ cells[10] = 0;
+ cells[11] = cpu_to_be32(PCI0_MEM_SIZE);
+ qemu_fdt_setprop(fdt, "/pci@c0000000", "ranges",
+ cells, 12 * sizeof(cells[0]));
+ qemu_fdt_setprop_cell(fdt, "/pci@c0000000", "#size-cells", 2);
+ qemu_fdt_setprop_cell(fdt, "/pci@c0000000", "#address-cells", 3);
+ qemu_fdt_setprop_string(fdt, "/pci@c0000000", "device_type", "pci");
+ qemu_fdt_setprop_string(fdt, "/pci@c0000000", "name", "pci");
+
+ fi.path = "/pci@c0000000";
+ pci_bus = mv64361_get_pci_bus(pm->mv, 0);
+ pci_for_each_device_reverse(pci_bus, 0, add_pci_device, &fi);
+
+ /* pci@80000000 */
+ qemu_fdt_add_subnode(fdt, "/pci@80000000");
+ cells[0] = 0;
+ cells[1] = 0;
+ qemu_fdt_setprop(fdt, "/pci@80000000", "bus-range",
+ cells, 2 * sizeof(cells[0]));
+ qemu_fdt_setprop_cell(fdt, "/pci@80000000", "pci-bridge-number", 0);
+ cells[0] = cpu_to_be32(PCI1_MEM_BASE);
+ cells[1] = cpu_to_be32(PCI1_MEM_SIZE);
+ qemu_fdt_setprop(fdt, "/pci@80000000", "reg", cells, 2 * sizeof(cells[0]));
+ qemu_fdt_setprop_cell(fdt, "/pci@80000000", "8259-interrupt-acknowledge",
+ 0xf1000cb4);
+ cells[0] = cpu_to_be32(0x01000000);
+ cells[1] = 0;
+ cells[2] = 0;
+ cells[3] = cpu_to_be32(PCI1_IO_BASE);
+ cells[4] = 0;
+ cells[5] = cpu_to_be32(PCI1_IO_SIZE);
+ cells[6] = cpu_to_be32(0x02000000);
+ cells[7] = 0;
+ cells[8] = cpu_to_be32(PCI1_MEM_BASE);
+ cells[9] = cpu_to_be32(PCI1_MEM_BASE);
+ cells[10] = 0;
+ cells[11] = cpu_to_be32(PCI1_MEM_SIZE);
+ qemu_fdt_setprop(fdt, "/pci@80000000", "ranges",
+ cells, 12 * sizeof(cells[0]));
+ qemu_fdt_setprop_cell(fdt, "/pci@80000000", "#size-cells", 2);
+ qemu_fdt_setprop_cell(fdt, "/pci@80000000", "#address-cells", 3);
+ qemu_fdt_setprop_string(fdt, "/pci@80000000", "device_type", "pci");
+ qemu_fdt_setprop_string(fdt, "/pci@80000000", "name", "pci");
+
+ fi.path = "/pci@80000000";
+ pci_bus = mv64361_get_pci_bus(pm->mv, 1);
+ pci_for_each_device_reverse(pci_bus, 0, add_pci_device, &fi);
+
+ qemu_fdt_add_subnode(fdt, "/failsafe");
+ qemu_fdt_setprop_string(fdt, "/failsafe", "device_type", "serial");
+ qemu_fdt_setprop_string(fdt, "/failsafe", "name", "failsafe");
+
+ qemu_fdt_add_subnode(fdt, "/rtas");
+ qemu_fdt_setprop_cell(fdt, "/rtas", "system-reboot", RTAS_SYSTEM_REBOOT);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "hibernate", RTAS_HIBERNATE);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "suspend", RTAS_SUSPEND);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "power-off", RTAS_POWER_OFF);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "set-indicator", RTAS_SET_INDICATOR);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "display-character",
+ RTAS_DISPLAY_CHARACTER);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "write-pci-config",
+ RTAS_WRITE_PCI_CONFIG);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "read-pci-config",
+ RTAS_READ_PCI_CONFIG);
+ /* Pegasos2 firmware misspells check-exception and guests use that */
+ qemu_fdt_setprop_cell(fdt, "/rtas", "check-execption",
+ RTAS_CHECK_EXCEPTION);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "event-scan", RTAS_EVENT_SCAN);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "set-time-of-day",
+ RTAS_SET_TIME_OF_DAY);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "get-time-of-day",
+ RTAS_GET_TIME_OF_DAY);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "nvram-store", RTAS_NVRAM_STORE);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "nvram-fetch", RTAS_NVRAM_FETCH);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "restart-rtas", RTAS_RESTART_RTAS);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-error-log-max", 0);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-event-scan-rate", 0);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-display-device", 0);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-size", 20);
+ qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-version", 1);
+
+ /* cpus */
+ qemu_fdt_add_subnode(fdt, "/cpus");
+ qemu_fdt_setprop_cell(fdt, "/cpus", "#cpus", 1);
+ qemu_fdt_setprop_cell(fdt, "/cpus", "#address-cells", 1);
+ qemu_fdt_setprop_cell(fdt, "/cpus", "#size-cells", 0);
+ qemu_fdt_setprop_string(fdt, "/cpus", "name", "cpus");
+
+ /* FIXME Get CPU name from CPU object */
+ const char *cp = "/cpus/PowerPC,G4";
+ qemu_fdt_add_subnode(fdt, cp);
+ qemu_fdt_setprop_cell(fdt, cp, "l2cr", 0);
+ qemu_fdt_setprop_cell(fdt, cp, "d-cache-size", 0x8000);
+ qemu_fdt_setprop_cell(fdt, cp, "d-cache-block-size",
+ cpu->env.dcache_line_size);
+ qemu_fdt_setprop_cell(fdt, cp, "d-cache-line-size",
+ cpu->env.dcache_line_size);
+ qemu_fdt_setprop_cell(fdt, cp, "i-cache-size", 0x8000);
+ qemu_fdt_setprop_cell(fdt, cp, "i-cache-block-size",
+ cpu->env.icache_line_size);
+ qemu_fdt_setprop_cell(fdt, cp, "i-cache-line-size",
+ cpu->env.icache_line_size);
+ if (cpu->env.id_tlbs) {
+ qemu_fdt_setprop_cell(fdt, cp, "i-tlb-sets", cpu->env.nb_ways);
+ qemu_fdt_setprop_cell(fdt, cp, "i-tlb-size", cpu->env.tlb_per_way);
+ qemu_fdt_setprop_cell(fdt, cp, "d-tlb-sets", cpu->env.nb_ways);
+ qemu_fdt_setprop_cell(fdt, cp, "d-tlb-size", cpu->env.tlb_per_way);
+ qemu_fdt_setprop_string(fdt, cp, "tlb-split", "");
+ }
+ qemu_fdt_setprop_cell(fdt, cp, "tlb-sets", cpu->env.nb_ways);
+ qemu_fdt_setprop_cell(fdt, cp, "tlb-size", cpu->env.nb_tlb);
+ qemu_fdt_setprop_string(fdt, cp, "state", "running");
+ if (cpu->env.insns_flags & PPC_ALTIVEC) {
+ qemu_fdt_setprop_string(fdt, cp, "altivec", "");
+ qemu_fdt_setprop_string(fdt, cp, "data-streams", "");
+ }
+ /*
+ * FIXME What flags do data-streams, external-control and
+ * performance-monitor depend on?
+ */
+ qemu_fdt_setprop_string(fdt, cp, "external-control", "");
+ if (cpu->env.insns_flags & PPC_FLOAT_FSQRT) {
+ qemu_fdt_setprop_string(fdt, cp, "general-purpose", "");
+ }
+ qemu_fdt_setprop_string(fdt, cp, "performance-monitor", "");
+ if (cpu->env.insns_flags & PPC_FLOAT_FRES) {
+ qemu_fdt_setprop_string(fdt, cp, "graphics", "");
+ }
+ qemu_fdt_setprop_cell(fdt, cp, "reservation-granule-size", 4);
+ qemu_fdt_setprop_cell(fdt, cp, "timebase-frequency",
+ cpu->env.tb_env->tb_freq);
+ qemu_fdt_setprop_cell(fdt, cp, "bus-frequency", BUS_FREQ_HZ);
+ qemu_fdt_setprop_cell(fdt, cp, "clock-frequency", BUS_FREQ_HZ * 7.5);
+ qemu_fdt_setprop_cell(fdt, cp, "cpu-version", cpu->env.spr[SPR_PVR]);
+ cells[0] = 0;
+ cells[1] = 0;
+ qemu_fdt_setprop(fdt, cp, "reg", cells, 2 * sizeof(cells[0]));
+ qemu_fdt_setprop_string(fdt, cp, "device_type", "cpu");
+ qemu_fdt_setprop_string(fdt, cp, "name", strrchr(cp, '/') + 1);
+
+ /* memory */
+ qemu_fdt_add_subnode(fdt, "/memory@0");
+ cells[0] = 0;
+ cells[1] = cpu_to_be32(machine->ram_size);
+ qemu_fdt_setprop(fdt, "/memory@0", "reg", cells, 2 * sizeof(cells[0]));
+ qemu_fdt_setprop_string(fdt, "/memory@0", "device_type", "memory");
+ qemu_fdt_setprop_string(fdt, "/memory@0", "name", "memory");
+
+ qemu_fdt_add_subnode(fdt, "/chosen");
+ qemu_fdt_setprop_string(fdt, "/chosen", "bootargs",
+ machine->kernel_cmdline ?: "");
+ qemu_fdt_setprop_string(fdt, "/chosen", "name", "chosen");
+
+ qemu_fdt_add_subnode(fdt, "/openprom");
+ qemu_fdt_setprop_string(fdt, "/openprom", "model", "Pegasos2,1.1");
+
+ return fdt;
+}
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
new file mode 100644
index 000000000..71e45515f
--- /dev/null
+++ b/hw/ppc/pnv.c
@@ -0,0 +1,2132 @@
+/*
+ * QEMU PowerPC PowerNV machine model
+ *
+ * Copyright (c) 2016, IBM Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/datadir.h"
+#include "qemu/units.h"
+#include "qemu/cutils.h"
+#include "qapi/error.h"
+#include "sysemu/qtest.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/numa.h"
+#include "sysemu/reset.h"
+#include "sysemu/runstate.h"
+#include "sysemu/cpus.h"
+#include "sysemu/device_tree.h"
+#include "sysemu/hw_accel.h"
+#include "target/ppc/cpu.h"
+#include "hw/ppc/fdt.h"
+#include "hw/ppc/ppc.h"
+#include "hw/ppc/pnv.h"
+#include "hw/ppc/pnv_core.h"
+#include "hw/loader.h"
+#include "hw/nmi.h"
+#include "qapi/visitor.h"
+#include "monitor/monitor.h"
+#include "hw/intc/intc.h"
+#include "hw/ipmi/ipmi.h"
+#include "target/ppc/mmu-hash64.h"
+#include "hw/pci/msi.h"
+
+#include "hw/ppc/xics.h"
+#include "hw/qdev-properties.h"
+#include "hw/ppc/pnv_xscom.h"
+#include "hw/ppc/pnv_pnor.h"
+
+#include "hw/isa/isa.h"
+#include "hw/char/serial.h"
+#include "hw/rtc/mc146818rtc.h"
+
+#include <libfdt.h>
+
+#define FDT_MAX_SIZE (1 * MiB)
+
+#define FW_FILE_NAME "skiboot.lid"
+#define FW_LOAD_ADDR 0x0
+#define FW_MAX_SIZE (16 * MiB)
+
+#define KERNEL_LOAD_ADDR 0x20000000
+#define KERNEL_MAX_SIZE (128 * MiB)
+#define INITRD_LOAD_ADDR 0x28000000
+#define INITRD_MAX_SIZE (128 * MiB)
+
+static const char *pnv_chip_core_typename(const PnvChip *o)
+{
+ const char *chip_type = object_class_get_name(object_get_class(OBJECT(o)));
+ int len = strlen(chip_type) - strlen(PNV_CHIP_TYPE_SUFFIX);
+ char *s = g_strdup_printf(PNV_CORE_TYPE_NAME("%.*s"), len, chip_type);
+ const char *core_type = object_class_get_name(object_class_by_name(s));
+ g_free(s);
+ return core_type;
+}
+
+/*
+ * On Power Systems E880 (POWER8), the max cpus (threads) should be :
+ * 4 * 4 sockets * 12 cores * 8 threads = 1536
+ * Let's make it 2^11
+ */
+#define MAX_CPUS 2048
+
+/*
+ * Memory nodes are created by hostboot, one for each range of memory
+ * that has a different "affinity". In practice, it means one range
+ * per chip.
+ */
+static void pnv_dt_memory(void *fdt, int chip_id, hwaddr start, hwaddr size)
+{
+ char *mem_name;
+ uint64_t mem_reg_property[2];
+ int off;
+
+ mem_reg_property[0] = cpu_to_be64(start);
+ mem_reg_property[1] = cpu_to_be64(size);
+
+ mem_name = g_strdup_printf("memory@%"HWADDR_PRIx, start);
+ off = fdt_add_subnode(fdt, 0, mem_name);
+ g_free(mem_name);
+
+ _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
+ _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
+ sizeof(mem_reg_property))));
+ _FDT((fdt_setprop_cell(fdt, off, "ibm,chip-id", chip_id)));
+}
+
+static int get_cpus_node(void *fdt)
+{
+ int cpus_offset = fdt_path_offset(fdt, "/cpus");
+
+ if (cpus_offset < 0) {
+ cpus_offset = fdt_add_subnode(fdt, 0, "cpus");
+ if (cpus_offset) {
+ _FDT((fdt_setprop_cell(fdt, cpus_offset, "#address-cells", 0x1)));
+ _FDT((fdt_setprop_cell(fdt, cpus_offset, "#size-cells", 0x0)));
+ }
+ }
+ _FDT(cpus_offset);
+ return cpus_offset;
+}
+
+/*
+ * The PowerNV cores (and threads) need to use real HW ids and not an
+ * incremental index like it has been done on other platforms. This HW
+ * id is stored in the CPU PIR, it is used to create cpu nodes in the
+ * device tree, used in XSCOM to address cores and in interrupt
+ * servers.
+ */
+static void pnv_dt_core(PnvChip *chip, PnvCore *pc, void *fdt)
+{
+ PowerPCCPU *cpu = pc->threads[0];
+ CPUState *cs = CPU(cpu);
+ DeviceClass *dc = DEVICE_GET_CLASS(cs);
+ int smt_threads = CPU_CORE(pc)->nr_threads;
+ CPUPPCState *env = &cpu->env;
+ PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
+ uint32_t servers_prop[smt_threads];
+ int i;
+ uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
+ 0xffffffff, 0xffffffff};
+ uint32_t tbfreq = PNV_TIMEBASE_FREQ;
+ uint32_t cpufreq = 1000000000;
+ uint32_t page_sizes_prop[64];
+ size_t page_sizes_prop_size;
+ const uint8_t pa_features[] = { 24, 0,
+ 0xf6, 0x3f, 0xc7, 0xc0, 0x80, 0xf0,
+ 0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x80, 0x00,
+ 0x80, 0x00, 0x80, 0x00, 0x80, 0x00 };
+ int offset;
+ char *nodename;
+ int cpus_offset = get_cpus_node(fdt);
+
+ nodename = g_strdup_printf("%s@%x", dc->fw_name, pc->pir);
+ offset = fdt_add_subnode(fdt, cpus_offset, nodename);
+ _FDT(offset);
+ g_free(nodename);
+
+ _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id", chip->chip_id)));
+
+ _FDT((fdt_setprop_cell(fdt, offset, "reg", pc->pir)));
+ _FDT((fdt_setprop_cell(fdt, offset, "ibm,pir", pc->pir)));
+ _FDT((fdt_setprop_string(fdt, offset, "device_type", "cpu")));
+
+ _FDT((fdt_setprop_cell(fdt, offset, "cpu-version", env->spr[SPR_PVR])));
+ _FDT((fdt_setprop_cell(fdt, offset, "d-cache-block-size",
+ env->dcache_line_size)));
+ _FDT((fdt_setprop_cell(fdt, offset, "d-cache-line-size",
+ env->dcache_line_size)));
+ _FDT((fdt_setprop_cell(fdt, offset, "i-cache-block-size",
+ env->icache_line_size)));
+ _FDT((fdt_setprop_cell(fdt, offset, "i-cache-line-size",
+ env->icache_line_size)));
+
+ if (pcc->l1_dcache_size) {
+ _FDT((fdt_setprop_cell(fdt, offset, "d-cache-size",
+ pcc->l1_dcache_size)));
+ } else {
+ warn_report("Unknown L1 dcache size for cpu");
+ }
+ if (pcc->l1_icache_size) {
+ _FDT((fdt_setprop_cell(fdt, offset, "i-cache-size",
+ pcc->l1_icache_size)));
+ } else {
+ warn_report("Unknown L1 icache size for cpu");
+ }
+
+ _FDT((fdt_setprop_cell(fdt, offset, "timebase-frequency", tbfreq)));
+ _FDT((fdt_setprop_cell(fdt, offset, "clock-frequency", cpufreq)));
+ _FDT((fdt_setprop_cell(fdt, offset, "ibm,slb-size",
+ cpu->hash64_opts->slb_size)));
+ _FDT((fdt_setprop_string(fdt, offset, "status", "okay")));
+ _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0)));
+
+ if (ppc_has_spr(cpu, SPR_PURR)) {
+ _FDT((fdt_setprop(fdt, offset, "ibm,purr", NULL, 0)));
+ }
+
+ if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)) {
+ _FDT((fdt_setprop(fdt, offset, "ibm,processor-segment-sizes",
+ segs, sizeof(segs))));
+ }
+
+ /*
+ * Advertise VMX/VSX (vector extensions) if available
+ * 0 / no property == no vector extensions
+ * 1 == VMX / Altivec available
+ * 2 == VSX available
+ */
+ if (env->insns_flags & PPC_ALTIVEC) {
+ uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1;
+
+ _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", vmx)));
+ }
+
+ /*
+ * Advertise DFP (Decimal Floating Point) if available
+ * 0 / no property == no DFP
+ * 1 == DFP available
+ */
+ if (env->insns_flags2 & PPC2_DFP) {
+ _FDT((fdt_setprop_cell(fdt, offset, "ibm,dfp", 1)));
+ }
+
+ page_sizes_prop_size = ppc_create_page_sizes_prop(cpu, page_sizes_prop,
+ sizeof(page_sizes_prop));
+ if (page_sizes_prop_size) {
+ _FDT((fdt_setprop(fdt, offset, "ibm,segment-page-sizes",
+ page_sizes_prop, page_sizes_prop_size)));
+ }
+
+ _FDT((fdt_setprop(fdt, offset, "ibm,pa-features",
+ pa_features, sizeof(pa_features))));
+
+ /* Build interrupt servers properties */
+ for (i = 0; i < smt_threads; i++) {
+ servers_prop[i] = cpu_to_be32(pc->pir + i);
+ }
+ _FDT((fdt_setprop(fdt, offset, "ibm,ppc-interrupt-server#s",
+ servers_prop, sizeof(servers_prop))));
+}
+
+static void pnv_dt_icp(PnvChip *chip, void *fdt, uint32_t pir,
+ uint32_t nr_threads)
+{
+ uint64_t addr = PNV_ICP_BASE(chip) | (pir << 12);
+ char *name;
+ const char compat[] = "IBM,power8-icp\0IBM,ppc-xicp";
+ uint32_t irange[2], i, rsize;
+ uint64_t *reg;
+ int offset;
+
+ irange[0] = cpu_to_be32(pir);
+ irange[1] = cpu_to_be32(nr_threads);
+
+ rsize = sizeof(uint64_t) * 2 * nr_threads;
+ reg = g_malloc(rsize);
+ for (i = 0; i < nr_threads; i++) {
+ reg[i * 2] = cpu_to_be64(addr | ((pir + i) * 0x1000));
+ reg[i * 2 + 1] = cpu_to_be64(0x1000);
+ }
+
+ name = g_strdup_printf("interrupt-controller@%"PRIX64, addr);
+ offset = fdt_add_subnode(fdt, 0, name);
+ _FDT(offset);
+ g_free(name);
+
+ _FDT((fdt_setprop(fdt, offset, "compatible", compat, sizeof(compat))));
+ _FDT((fdt_setprop(fdt, offset, "reg", reg, rsize)));
+ _FDT((fdt_setprop_string(fdt, offset, "device_type",
+ "PowerPC-External-Interrupt-Presentation")));
+ _FDT((fdt_setprop(fdt, offset, "interrupt-controller", NULL, 0)));
+ _FDT((fdt_setprop(fdt, offset, "ibm,interrupt-server-ranges",
+ irange, sizeof(irange))));
+ _FDT((fdt_setprop_cell(fdt, offset, "#interrupt-cells", 1)));
+ _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 0)));
+ g_free(reg);
+}
+
+static void pnv_chip_power8_dt_populate(PnvChip *chip, void *fdt)
+{
+ static const char compat[] = "ibm,power8-xscom\0ibm,xscom";
+ int i;
+
+ pnv_dt_xscom(chip, fdt, 0,
+ cpu_to_be64(PNV_XSCOM_BASE(chip)),
+ cpu_to_be64(PNV_XSCOM_SIZE),
+ compat, sizeof(compat));
+
+ for (i = 0; i < chip->nr_cores; i++) {
+ PnvCore *pnv_core = chip->cores[i];
+
+ pnv_dt_core(chip, pnv_core, fdt);
+
+ /* Interrupt Control Presenters (ICP). One per core. */
+ pnv_dt_icp(chip, fdt, pnv_core->pir, CPU_CORE(pnv_core)->nr_threads);
+ }
+
+ if (chip->ram_size) {
+ pnv_dt_memory(fdt, chip->chip_id, chip->ram_start, chip->ram_size);
+ }
+}
+
+static void pnv_chip_power9_dt_populate(PnvChip *chip, void *fdt)
+{
+ static const char compat[] = "ibm,power9-xscom\0ibm,xscom";
+ int i;
+
+ pnv_dt_xscom(chip, fdt, 0,
+ cpu_to_be64(PNV9_XSCOM_BASE(chip)),
+ cpu_to_be64(PNV9_XSCOM_SIZE),
+ compat, sizeof(compat));
+
+ for (i = 0; i < chip->nr_cores; i++) {
+ PnvCore *pnv_core = chip->cores[i];
+
+ pnv_dt_core(chip, pnv_core, fdt);
+ }
+
+ if (chip->ram_size) {
+ pnv_dt_memory(fdt, chip->chip_id, chip->ram_start, chip->ram_size);
+ }
+
+ pnv_dt_lpc(chip, fdt, 0, PNV9_LPCM_BASE(chip), PNV9_LPCM_SIZE);
+}
+
+static void pnv_chip_power10_dt_populate(PnvChip *chip, void *fdt)
+{
+ static const char compat[] = "ibm,power10-xscom\0ibm,xscom";
+ int i;
+
+ pnv_dt_xscom(chip, fdt, 0,
+ cpu_to_be64(PNV10_XSCOM_BASE(chip)),
+ cpu_to_be64(PNV10_XSCOM_SIZE),
+ compat, sizeof(compat));
+
+ for (i = 0; i < chip->nr_cores; i++) {
+ PnvCore *pnv_core = chip->cores[i];
+
+ pnv_dt_core(chip, pnv_core, fdt);
+ }
+
+ if (chip->ram_size) {
+ pnv_dt_memory(fdt, chip->chip_id, chip->ram_start, chip->ram_size);
+ }
+
+ pnv_dt_lpc(chip, fdt, 0, PNV10_LPCM_BASE(chip), PNV10_LPCM_SIZE);
+}
+
+static void pnv_dt_rtc(ISADevice *d, void *fdt, int lpc_off)
+{
+ uint32_t io_base = d->ioport_id;
+ uint32_t io_regs[] = {
+ cpu_to_be32(1),
+ cpu_to_be32(io_base),
+ cpu_to_be32(2)
+ };
+ char *name;
+ int node;
+
+ name = g_strdup_printf("%s@i%x", qdev_fw_name(DEVICE(d)), io_base);
+ node = fdt_add_subnode(fdt, lpc_off, name);
+ _FDT(node);
+ g_free(name);
+
+ _FDT((fdt_setprop(fdt, node, "reg", io_regs, sizeof(io_regs))));
+ _FDT((fdt_setprop_string(fdt, node, "compatible", "pnpPNP,b00")));
+}
+
+static void pnv_dt_serial(ISADevice *d, void *fdt, int lpc_off)
+{
+ const char compatible[] = "ns16550\0pnpPNP,501";
+ uint32_t io_base = d->ioport_id;
+ uint32_t io_regs[] = {
+ cpu_to_be32(1),
+ cpu_to_be32(io_base),
+ cpu_to_be32(8)
+ };
+ char *name;
+ int node;
+
+ name = g_strdup_printf("%s@i%x", qdev_fw_name(DEVICE(d)), io_base);
+ node = fdt_add_subnode(fdt, lpc_off, name);
+ _FDT(node);
+ g_free(name);
+
+ _FDT((fdt_setprop(fdt, node, "reg", io_regs, sizeof(io_regs))));
+ _FDT((fdt_setprop(fdt, node, "compatible", compatible,
+ sizeof(compatible))));
+
+ _FDT((fdt_setprop_cell(fdt, node, "clock-frequency", 1843200)));
+ _FDT((fdt_setprop_cell(fdt, node, "current-speed", 115200)));
+ _FDT((fdt_setprop_cell(fdt, node, "interrupts", d->isairq[0])));
+ _FDT((fdt_setprop_cell(fdt, node, "interrupt-parent",
+ fdt_get_phandle(fdt, lpc_off))));
+
+ /* This is needed by Linux */
+ _FDT((fdt_setprop_string(fdt, node, "device_type", "serial")));
+}
+
+static void pnv_dt_ipmi_bt(ISADevice *d, void *fdt, int lpc_off)
+{
+ const char compatible[] = "bt\0ipmi-bt";
+ uint32_t io_base;
+ uint32_t io_regs[] = {
+ cpu_to_be32(1),
+ 0, /* 'io_base' retrieved from the 'ioport' property of 'isa-ipmi-bt' */
+ cpu_to_be32(3)
+ };
+ uint32_t irq;
+ char *name;
+ int node;
+
+ io_base = object_property_get_int(OBJECT(d), "ioport", &error_fatal);
+ io_regs[1] = cpu_to_be32(io_base);
+
+ irq = object_property_get_int(OBJECT(d), "irq", &error_fatal);
+
+ name = g_strdup_printf("%s@i%x", qdev_fw_name(DEVICE(d)), io_base);
+ node = fdt_add_subnode(fdt, lpc_off, name);
+ _FDT(node);
+ g_free(name);
+
+ _FDT((fdt_setprop(fdt, node, "reg", io_regs, sizeof(io_regs))));
+ _FDT((fdt_setprop(fdt, node, "compatible", compatible,
+ sizeof(compatible))));
+
+ /* Mark it as reserved to avoid Linux trying to claim it */
+ _FDT((fdt_setprop_string(fdt, node, "status", "reserved")));
+ _FDT((fdt_setprop_cell(fdt, node, "interrupts", irq)));
+ _FDT((fdt_setprop_cell(fdt, node, "interrupt-parent",
+ fdt_get_phandle(fdt, lpc_off))));
+}
+
+typedef struct ForeachPopulateArgs {
+ void *fdt;
+ int offset;
+} ForeachPopulateArgs;
+
+static int pnv_dt_isa_device(DeviceState *dev, void *opaque)
+{
+ ForeachPopulateArgs *args = opaque;
+ ISADevice *d = ISA_DEVICE(dev);
+
+ if (object_dynamic_cast(OBJECT(dev), TYPE_MC146818_RTC)) {
+ pnv_dt_rtc(d, args->fdt, args->offset);
+ } else if (object_dynamic_cast(OBJECT(dev), TYPE_ISA_SERIAL)) {
+ pnv_dt_serial(d, args->fdt, args->offset);
+ } else if (object_dynamic_cast(OBJECT(dev), "isa-ipmi-bt")) {
+ pnv_dt_ipmi_bt(d, args->fdt, args->offset);
+ } else {
+ error_report("unknown isa device %s@i%x", qdev_fw_name(dev),
+ d->ioport_id);
+ }
+
+ return 0;
+}
+
+/*
+ * The default LPC bus of a multichip system is on chip 0. It's
+ * recognized by the firmware (skiboot) using a "primary" property.
+ */
+static void pnv_dt_isa(PnvMachineState *pnv, void *fdt)
+{
+ int isa_offset = fdt_path_offset(fdt, pnv->chips[0]->dt_isa_nodename);
+ ForeachPopulateArgs args = {
+ .fdt = fdt,
+ .offset = isa_offset,
+ };
+ uint32_t phandle;
+
+ _FDT((fdt_setprop(fdt, isa_offset, "primary", NULL, 0)));
+
+ phandle = qemu_fdt_alloc_phandle(fdt);
+ assert(phandle > 0);
+ _FDT((fdt_setprop_cell(fdt, isa_offset, "phandle", phandle)));
+
+ /*
+ * ISA devices are not necessarily parented to the ISA bus so we
+ * can not use object_child_foreach()
+ */
+ qbus_walk_children(BUS(pnv->isa_bus), pnv_dt_isa_device, NULL, NULL, NULL,
+ &args);
+}
+
+static void pnv_dt_power_mgt(PnvMachineState *pnv, void *fdt)
+{
+ int off;
+
+ off = fdt_add_subnode(fdt, 0, "ibm,opal");
+ off = fdt_add_subnode(fdt, off, "power-mgt");
+
+ _FDT(fdt_setprop_cell(fdt, off, "ibm,enabled-stop-levels", 0xc0000000));
+}
+
+static void *pnv_dt_create(MachineState *machine)
+{
+ PnvMachineClass *pmc = PNV_MACHINE_GET_CLASS(machine);
+ PnvMachineState *pnv = PNV_MACHINE(machine);
+ void *fdt;
+ char *buf;
+ int off;
+ int i;
+
+ fdt = g_malloc0(FDT_MAX_SIZE);
+ _FDT((fdt_create_empty_tree(fdt, FDT_MAX_SIZE)));
+
+ /* /qemu node */
+ _FDT((fdt_add_subnode(fdt, 0, "qemu")));
+
+ /* Root node */
+ _FDT((fdt_setprop_cell(fdt, 0, "#address-cells", 0x2)));
+ _FDT((fdt_setprop_cell(fdt, 0, "#size-cells", 0x2)));
+ _FDT((fdt_setprop_string(fdt, 0, "model",
+ "IBM PowerNV (emulated by qemu)")));
+ _FDT((fdt_setprop(fdt, 0, "compatible", pmc->compat, pmc->compat_size)));
+
+ buf = qemu_uuid_unparse_strdup(&qemu_uuid);
+ _FDT((fdt_setprop_string(fdt, 0, "vm,uuid", buf)));
+ if (qemu_uuid_set) {
+ _FDT((fdt_property_string(fdt, "system-id", buf)));
+ }
+ g_free(buf);
+
+ off = fdt_add_subnode(fdt, 0, "chosen");
+ if (machine->kernel_cmdline) {
+ _FDT((fdt_setprop_string(fdt, off, "bootargs",
+ machine->kernel_cmdline)));
+ }
+
+ if (pnv->initrd_size) {
+ uint32_t start_prop = cpu_to_be32(pnv->initrd_base);
+ uint32_t end_prop = cpu_to_be32(pnv->initrd_base + pnv->initrd_size);
+
+ _FDT((fdt_setprop(fdt, off, "linux,initrd-start",
+ &start_prop, sizeof(start_prop))));
+ _FDT((fdt_setprop(fdt, off, "linux,initrd-end",
+ &end_prop, sizeof(end_prop))));
+ }
+
+ /* Populate device tree for each chip */
+ for (i = 0; i < pnv->num_chips; i++) {
+ PNV_CHIP_GET_CLASS(pnv->chips[i])->dt_populate(pnv->chips[i], fdt);
+ }
+
+ /* Populate ISA devices on chip 0 */
+ pnv_dt_isa(pnv, fdt);
+
+ if (pnv->bmc) {
+ pnv_dt_bmc_sensors(pnv->bmc, fdt);
+ }
+
+ /* Create an extra node for power management on machines that support it */
+ if (pmc->dt_power_mgt) {
+ pmc->dt_power_mgt(pnv, fdt);
+ }
+
+ return fdt;
+}
+
+static void pnv_powerdown_notify(Notifier *n, void *opaque)
+{
+ PnvMachineState *pnv = container_of(n, PnvMachineState, powerdown_notifier);
+
+ if (pnv->bmc) {
+ pnv_bmc_powerdown(pnv->bmc);
+ }
+}
+
+static void pnv_reset(MachineState *machine)
+{
+ PnvMachineState *pnv = PNV_MACHINE(machine);
+ IPMIBmc *bmc;
+ void *fdt;
+
+ qemu_devices_reset();
+
+ /*
+ * The machine should provide by default an internal BMC simulator.
+ * If not, try to use the BMC device that was provided on the command
+ * line.
+ */
+ bmc = pnv_bmc_find(&error_fatal);
+ if (!pnv->bmc) {
+ if (!bmc) {
+ if (!qtest_enabled()) {
+ warn_report("machine has no BMC device. Use '-device "
+ "ipmi-bmc-sim,id=bmc0 -device isa-ipmi-bt,bmc=bmc0,irq=10' "
+ "to define one");
+ }
+ } else {
+ pnv_bmc_set_pnor(bmc, pnv->pnor);
+ pnv->bmc = bmc;
+ }
+ }
+
+ fdt = pnv_dt_create(machine);
+
+ /* Pack resulting tree */
+ _FDT((fdt_pack(fdt)));
+
+ qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt));
+ cpu_physical_memory_write(PNV_FDT_ADDR, fdt, fdt_totalsize(fdt));
+
+ g_free(fdt);
+}
+
+static ISABus *pnv_chip_power8_isa_create(PnvChip *chip, Error **errp)
+{
+ Pnv8Chip *chip8 = PNV8_CHIP(chip);
+ return pnv_lpc_isa_create(&chip8->lpc, true, errp);
+}
+
+static ISABus *pnv_chip_power8nvl_isa_create(PnvChip *chip, Error **errp)
+{
+ Pnv8Chip *chip8 = PNV8_CHIP(chip);
+ return pnv_lpc_isa_create(&chip8->lpc, false, errp);
+}
+
+static ISABus *pnv_chip_power9_isa_create(PnvChip *chip, Error **errp)
+{
+ Pnv9Chip *chip9 = PNV9_CHIP(chip);
+ return pnv_lpc_isa_create(&chip9->lpc, false, errp);
+}
+
+static ISABus *pnv_chip_power10_isa_create(PnvChip *chip, Error **errp)
+{
+ Pnv10Chip *chip10 = PNV10_CHIP(chip);
+ return pnv_lpc_isa_create(&chip10->lpc, false, errp);
+}
+
+static ISABus *pnv_isa_create(PnvChip *chip, Error **errp)
+{
+ return PNV_CHIP_GET_CLASS(chip)->isa_create(chip, errp);
+}
+
+static void pnv_chip_power8_pic_print_info(PnvChip *chip, Monitor *mon)
+{
+ Pnv8Chip *chip8 = PNV8_CHIP(chip);
+ int i;
+
+ ics_pic_print_info(&chip8->psi.ics, mon);
+ for (i = 0; i < chip->num_phbs; i++) {
+ pnv_phb3_msi_pic_print_info(&chip8->phbs[i].msis, mon);
+ ics_pic_print_info(&chip8->phbs[i].lsis, mon);
+ }
+}
+
+static void pnv_chip_power9_pic_print_info(PnvChip *chip, Monitor *mon)
+{
+ Pnv9Chip *chip9 = PNV9_CHIP(chip);
+ int i, j;
+
+ pnv_xive_pic_print_info(&chip9->xive, mon);
+ pnv_psi_pic_print_info(&chip9->psi, mon);
+
+ for (i = 0; i < PNV9_CHIP_MAX_PEC; i++) {
+ PnvPhb4PecState *pec = &chip9->pecs[i];
+ for (j = 0; j < pec->num_stacks; j++) {
+ pnv_phb4_pic_print_info(&pec->stacks[j].phb, mon);
+ }
+ }
+}
+
+static uint64_t pnv_chip_power8_xscom_core_base(PnvChip *chip,
+ uint32_t core_id)
+{
+ return PNV_XSCOM_EX_BASE(core_id);
+}
+
+static uint64_t pnv_chip_power9_xscom_core_base(PnvChip *chip,
+ uint32_t core_id)
+{
+ return PNV9_XSCOM_EC_BASE(core_id);
+}
+
+static uint64_t pnv_chip_power10_xscom_core_base(PnvChip *chip,
+ uint32_t core_id)
+{
+ return PNV10_XSCOM_EC_BASE(core_id);
+}
+
+static bool pnv_match_cpu(const char *default_type, const char *cpu_type)
+{
+ PowerPCCPUClass *ppc_default =
+ POWERPC_CPU_CLASS(object_class_by_name(default_type));
+ PowerPCCPUClass *ppc =
+ POWERPC_CPU_CLASS(object_class_by_name(cpu_type));
+
+ return ppc_default->pvr_match(ppc_default, ppc->pvr);
+}
+
+static void pnv_ipmi_bt_init(ISABus *bus, IPMIBmc *bmc, uint32_t irq)
+{
+ ISADevice *dev = isa_new("isa-ipmi-bt");
+
+ object_property_set_link(OBJECT(dev), "bmc", OBJECT(bmc), &error_fatal);
+ object_property_set_int(OBJECT(dev), "irq", irq, &error_fatal);
+ isa_realize_and_unref(dev, bus, &error_fatal);
+}
+
+static void pnv_chip_power10_pic_print_info(PnvChip *chip, Monitor *mon)
+{
+ Pnv10Chip *chip10 = PNV10_CHIP(chip);
+
+ pnv_psi_pic_print_info(&chip10->psi, mon);
+}
+
+/* Always give the first 1GB to chip 0 else we won't boot */
+static uint64_t pnv_chip_get_ram_size(PnvMachineState *pnv, int chip_id)
+{
+ MachineState *machine = MACHINE(pnv);
+ uint64_t ram_per_chip;
+
+ assert(machine->ram_size >= 1 * GiB);
+
+ ram_per_chip = machine->ram_size / pnv->num_chips;
+ if (ram_per_chip >= 1 * GiB) {
+ return QEMU_ALIGN_DOWN(ram_per_chip, 1 * MiB);
+ }
+
+ assert(pnv->num_chips > 1);
+
+ ram_per_chip = (machine->ram_size - 1 * GiB) / (pnv->num_chips - 1);
+ return chip_id == 0 ? 1 * GiB : QEMU_ALIGN_DOWN(ram_per_chip, 1 * MiB);
+}
+
+static void pnv_init(MachineState *machine)
+{
+ const char *bios_name = machine->firmware ?: FW_FILE_NAME;
+ PnvMachineState *pnv = PNV_MACHINE(machine);
+ MachineClass *mc = MACHINE_GET_CLASS(machine);
+ char *fw_filename;
+ long fw_size;
+ uint64_t chip_ram_start = 0;
+ int i;
+ char *chip_typename;
+ DriveInfo *pnor = drive_get(IF_MTD, 0, 0);
+ DeviceState *dev;
+
+ /* allocate RAM */
+ if (machine->ram_size < mc->default_ram_size) {
+ char *sz = size_to_str(mc->default_ram_size);
+ error_report("Invalid RAM size, should be bigger than %s", sz);
+ g_free(sz);
+ exit(EXIT_FAILURE);
+ }
+ memory_region_add_subregion(get_system_memory(), 0, machine->ram);
+
+ /*
+ * Create our simple PNOR device
+ */
+ dev = qdev_new(TYPE_PNV_PNOR);
+ if (pnor) {
+ qdev_prop_set_drive(dev, "drive", blk_by_legacy_dinfo(pnor));
+ }
+ sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+ pnv->pnor = PNV_PNOR(dev);
+
+ /* load skiboot firmware */
+ fw_filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
+ if (!fw_filename) {
+ error_report("Could not find OPAL firmware '%s'", bios_name);
+ exit(1);
+ }
+
+ fw_size = load_image_targphys(fw_filename, pnv->fw_load_addr, FW_MAX_SIZE);
+ if (fw_size < 0) {
+ error_report("Could not load OPAL firmware '%s'", fw_filename);
+ exit(1);
+ }
+ g_free(fw_filename);
+
+ /* load kernel */
+ if (machine->kernel_filename) {
+ long kernel_size;
+
+ kernel_size = load_image_targphys(machine->kernel_filename,
+ KERNEL_LOAD_ADDR, KERNEL_MAX_SIZE);
+ if (kernel_size < 0) {
+ error_report("Could not load kernel '%s'",
+ machine->kernel_filename);
+ exit(1);
+ }
+ }
+
+ /* load initrd */
+ if (machine->initrd_filename) {
+ pnv->initrd_base = INITRD_LOAD_ADDR;
+ pnv->initrd_size = load_image_targphys(machine->initrd_filename,
+ pnv->initrd_base, INITRD_MAX_SIZE);
+ if (pnv->initrd_size < 0) {
+ error_report("Could not load initial ram disk '%s'",
+ machine->initrd_filename);
+ exit(1);
+ }
+ }
+
+ /* MSIs are supported on this platform */
+ msi_nonbroken = true;
+
+ /*
+ * Check compatibility of the specified CPU with the machine
+ * default.
+ */
+ if (!pnv_match_cpu(mc->default_cpu_type, machine->cpu_type)) {
+ error_report("invalid CPU model '%s' for %s machine",
+ machine->cpu_type, mc->name);
+ exit(1);
+ }
+
+ /* Create the processor chips */
+ i = strlen(machine->cpu_type) - strlen(POWERPC_CPU_TYPE_SUFFIX);
+ chip_typename = g_strdup_printf(PNV_CHIP_TYPE_NAME("%.*s"),
+ i, machine->cpu_type);
+ if (!object_class_by_name(chip_typename)) {
+ error_report("invalid chip model '%.*s' for %s machine",
+ i, machine->cpu_type, mc->name);
+ exit(1);
+ }
+
+ pnv->num_chips =
+ machine->smp.max_cpus / (machine->smp.cores * machine->smp.threads);
+ /*
+ * TODO: should we decide on how many chips we can create based
+ * on #cores and Venice vs. Murano vs. Naples chip type etc...,
+ */
+ if (!is_power_of_2(pnv->num_chips) || pnv->num_chips > 16) {
+ error_report("invalid number of chips: '%d'", pnv->num_chips);
+ error_printf(
+ "Try '-smp sockets=N'. Valid values are : 1, 2, 4, 8 and 16.\n");
+ exit(1);
+ }
+
+ pnv->chips = g_new0(PnvChip *, pnv->num_chips);
+ for (i = 0; i < pnv->num_chips; i++) {
+ char chip_name[32];
+ Object *chip = OBJECT(qdev_new(chip_typename));
+ uint64_t chip_ram_size = pnv_chip_get_ram_size(pnv, i);
+
+ pnv->chips[i] = PNV_CHIP(chip);
+
+ /* Distribute RAM among the chips */
+ object_property_set_int(chip, "ram-start", chip_ram_start,
+ &error_fatal);
+ object_property_set_int(chip, "ram-size", chip_ram_size,
+ &error_fatal);
+ chip_ram_start += chip_ram_size;
+
+ snprintf(chip_name, sizeof(chip_name), "chip[%d]", i);
+ object_property_add_child(OBJECT(pnv), chip_name, chip);
+ object_property_set_int(chip, "chip-id", i, &error_fatal);
+ object_property_set_int(chip, "nr-cores", machine->smp.cores,
+ &error_fatal);
+ object_property_set_int(chip, "nr-threads", machine->smp.threads,
+ &error_fatal);
+ /*
+ * The POWER8 machine use the XICS interrupt interface.
+ * Propagate the XICS fabric to the chip and its controllers.
+ */
+ if (object_dynamic_cast(OBJECT(pnv), TYPE_XICS_FABRIC)) {
+ object_property_set_link(chip, "xics", OBJECT(pnv), &error_abort);
+ }
+ if (object_dynamic_cast(OBJECT(pnv), TYPE_XIVE_FABRIC)) {
+ object_property_set_link(chip, "xive-fabric", OBJECT(pnv),
+ &error_abort);
+ }
+ sysbus_realize_and_unref(SYS_BUS_DEVICE(chip), &error_fatal);
+ }
+ g_free(chip_typename);
+
+ /* Instantiate ISA bus on chip 0 */
+ pnv->isa_bus = pnv_isa_create(pnv->chips[0], &error_fatal);
+
+ /* Create serial port */
+ serial_hds_isa_init(pnv->isa_bus, 0, MAX_ISA_SERIAL_PORTS);
+
+ /* Create an RTC ISA device too */
+ mc146818_rtc_init(pnv->isa_bus, 2000, NULL);
+
+ /*
+ * Create the machine BMC simulator and the IPMI BT device for
+ * communication with the BMC
+ */
+ if (defaults_enabled()) {
+ pnv->bmc = pnv_bmc_create(pnv->pnor);
+ pnv_ipmi_bt_init(pnv->isa_bus, pnv->bmc, 10);
+ }
+
+ /*
+ * The PNOR is mapped on the LPC FW address space by the BMC.
+ * Since we can not reach the remote BMC machine with LPC memops,
+ * map it always for now.
+ */
+ memory_region_add_subregion(pnv->chips[0]->fw_mr, PNOR_SPI_OFFSET,
+ &pnv->pnor->mmio);
+
+ /*
+ * OpenPOWER systems use a IPMI SEL Event message to notify the
+ * host to powerdown
+ */
+ pnv->powerdown_notifier.notify = pnv_powerdown_notify;
+ qemu_register_powerdown_notifier(&pnv->powerdown_notifier);
+}
+
+/*
+ * 0:21 Reserved - Read as zeros
+ * 22:24 Chip ID
+ * 25:28 Core number
+ * 29:31 Thread ID
+ */
+static uint32_t pnv_chip_core_pir_p8(PnvChip *chip, uint32_t core_id)
+{
+ return (chip->chip_id << 7) | (core_id << 3);
+}
+
+static void pnv_chip_power8_intc_create(PnvChip *chip, PowerPCCPU *cpu,
+ Error **errp)
+{
+ Pnv8Chip *chip8 = PNV8_CHIP(chip);
+ Error *local_err = NULL;
+ Object *obj;
+ PnvCPUState *pnv_cpu = pnv_cpu_state(cpu);
+
+ obj = icp_create(OBJECT(cpu), TYPE_PNV_ICP, chip8->xics, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ pnv_cpu->intc = obj;
+}
+
+
+static void pnv_chip_power8_intc_reset(PnvChip *chip, PowerPCCPU *cpu)
+{
+ PnvCPUState *pnv_cpu = pnv_cpu_state(cpu);
+
+ icp_reset(ICP(pnv_cpu->intc));
+}
+
+static void pnv_chip_power8_intc_destroy(PnvChip *chip, PowerPCCPU *cpu)
+{
+ PnvCPUState *pnv_cpu = pnv_cpu_state(cpu);
+
+ icp_destroy(ICP(pnv_cpu->intc));
+ pnv_cpu->intc = NULL;
+}
+
+static void pnv_chip_power8_intc_print_info(PnvChip *chip, PowerPCCPU *cpu,
+ Monitor *mon)
+{
+ icp_pic_print_info(ICP(pnv_cpu_state(cpu)->intc), mon);
+}
+
+/*
+ * 0:48 Reserved - Read as zeroes
+ * 49:52 Node ID
+ * 53:55 Chip ID
+ * 56 Reserved - Read as zero
+ * 57:61 Core number
+ * 62:63 Thread ID
+ *
+ * We only care about the lower bits. uint32_t is fine for the moment.
+ */
+static uint32_t pnv_chip_core_pir_p9(PnvChip *chip, uint32_t core_id)
+{
+ return (chip->chip_id << 8) | (core_id << 2);
+}
+
+static uint32_t pnv_chip_core_pir_p10(PnvChip *chip, uint32_t core_id)
+{
+ return (chip->chip_id << 8) | (core_id << 2);
+}
+
+static void pnv_chip_power9_intc_create(PnvChip *chip, PowerPCCPU *cpu,
+ Error **errp)
+{
+ Pnv9Chip *chip9 = PNV9_CHIP(chip);
+ Error *local_err = NULL;
+ Object *obj;
+ PnvCPUState *pnv_cpu = pnv_cpu_state(cpu);
+
+ /*
+ * The core creates its interrupt presenter but the XIVE interrupt
+ * controller object is initialized afterwards. Hopefully, it's
+ * only used at runtime.
+ */
+ obj = xive_tctx_create(OBJECT(cpu), XIVE_PRESENTER(&chip9->xive),
+ &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ pnv_cpu->intc = obj;
+}
+
+static void pnv_chip_power9_intc_reset(PnvChip *chip, PowerPCCPU *cpu)
+{
+ PnvCPUState *pnv_cpu = pnv_cpu_state(cpu);
+
+ xive_tctx_reset(XIVE_TCTX(pnv_cpu->intc));
+}
+
+static void pnv_chip_power9_intc_destroy(PnvChip *chip, PowerPCCPU *cpu)
+{
+ PnvCPUState *pnv_cpu = pnv_cpu_state(cpu);
+
+ xive_tctx_destroy(XIVE_TCTX(pnv_cpu->intc));
+ pnv_cpu->intc = NULL;
+}
+
+static void pnv_chip_power9_intc_print_info(PnvChip *chip, PowerPCCPU *cpu,
+ Monitor *mon)
+{
+ xive_tctx_pic_print_info(XIVE_TCTX(pnv_cpu_state(cpu)->intc), mon);
+}
+
+static void pnv_chip_power10_intc_create(PnvChip *chip, PowerPCCPU *cpu,
+ Error **errp)
+{
+ PnvCPUState *pnv_cpu = pnv_cpu_state(cpu);
+
+ /* Will be defined when the interrupt controller is */
+ pnv_cpu->intc = NULL;
+}
+
+static void pnv_chip_power10_intc_reset(PnvChip *chip, PowerPCCPU *cpu)
+{
+ ;
+}
+
+static void pnv_chip_power10_intc_destroy(PnvChip *chip, PowerPCCPU *cpu)
+{
+ PnvCPUState *pnv_cpu = pnv_cpu_state(cpu);
+
+ pnv_cpu->intc = NULL;
+}
+
+static void pnv_chip_power10_intc_print_info(PnvChip *chip, PowerPCCPU *cpu,
+ Monitor *mon)
+{
+}
+
+/*
+ * Allowed core identifiers on a POWER8 Processor Chip :
+ *
+ * <EX0 reserved>
+ * EX1 - Venice only
+ * EX2 - Venice only
+ * EX3 - Venice only
+ * EX4
+ * EX5
+ * EX6
+ * <EX7,8 reserved> <reserved>
+ * EX9 - Venice only
+ * EX10 - Venice only
+ * EX11 - Venice only
+ * EX12
+ * EX13
+ * EX14
+ * <EX15 reserved>
+ */
+#define POWER8E_CORE_MASK (0x7070ull)
+#define POWER8_CORE_MASK (0x7e7eull)
+
+/*
+ * POWER9 has 24 cores, ids starting at 0x0
+ */
+#define POWER9_CORE_MASK (0xffffffffffffffull)
+
+
+#define POWER10_CORE_MASK (0xffffffffffffffull)
+
+static void pnv_chip_power8_instance_init(Object *obj)
+{
+ PnvChip *chip = PNV_CHIP(obj);
+ Pnv8Chip *chip8 = PNV8_CHIP(obj);
+ PnvChipClass *pcc = PNV_CHIP_GET_CLASS(obj);
+ int i;
+
+ object_property_add_link(obj, "xics", TYPE_XICS_FABRIC,
+ (Object **)&chip8->xics,
+ object_property_allow_set_link,
+ OBJ_PROP_LINK_STRONG);
+
+ object_initialize_child(obj, "psi", &chip8->psi, TYPE_PNV8_PSI);
+
+ object_initialize_child(obj, "lpc", &chip8->lpc, TYPE_PNV8_LPC);
+
+ object_initialize_child(obj, "occ", &chip8->occ, TYPE_PNV8_OCC);
+
+ object_initialize_child(obj, "homer", &chip8->homer, TYPE_PNV8_HOMER);
+
+ for (i = 0; i < pcc->num_phbs; i++) {
+ object_initialize_child(obj, "phb[*]", &chip8->phbs[i], TYPE_PNV_PHB3);
+ }
+
+ /*
+ * Number of PHBs is the chip default
+ */
+ chip->num_phbs = pcc->num_phbs;
+}
+
+static void pnv_chip_icp_realize(Pnv8Chip *chip8, Error **errp)
+ {
+ PnvChip *chip = PNV_CHIP(chip8);
+ PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
+ int i, j;
+ char *name;
+
+ name = g_strdup_printf("icp-%x", chip->chip_id);
+ memory_region_init(&chip8->icp_mmio, OBJECT(chip), name, PNV_ICP_SIZE);
+ sysbus_init_mmio(SYS_BUS_DEVICE(chip), &chip8->icp_mmio);
+ g_free(name);
+
+ sysbus_mmio_map(SYS_BUS_DEVICE(chip), 1, PNV_ICP_BASE(chip));
+
+ /* Map the ICP registers for each thread */
+ for (i = 0; i < chip->nr_cores; i++) {
+ PnvCore *pnv_core = chip->cores[i];
+ int core_hwid = CPU_CORE(pnv_core)->core_id;
+
+ for (j = 0; j < CPU_CORE(pnv_core)->nr_threads; j++) {
+ uint32_t pir = pcc->core_pir(chip, core_hwid) + j;
+ PnvICPState *icp = PNV_ICP(xics_icp_get(chip8->xics, pir));
+
+ memory_region_add_subregion(&chip8->icp_mmio, pir << 12,
+ &icp->mmio);
+ }
+ }
+}
+
+static void pnv_chip_power8_realize(DeviceState *dev, Error **errp)
+{
+ PnvChipClass *pcc = PNV_CHIP_GET_CLASS(dev);
+ PnvChip *chip = PNV_CHIP(dev);
+ Pnv8Chip *chip8 = PNV8_CHIP(dev);
+ Pnv8Psi *psi8 = &chip8->psi;
+ Error *local_err = NULL;
+ int i;
+
+ assert(chip8->xics);
+
+ /* XSCOM bridge is first */
+ pnv_xscom_realize(chip, PNV_XSCOM_SIZE, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+ sysbus_mmio_map(SYS_BUS_DEVICE(chip), 0, PNV_XSCOM_BASE(chip));
+
+ pcc->parent_realize(dev, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ /* Processor Service Interface (PSI) Host Bridge */
+ object_property_set_int(OBJECT(&chip8->psi), "bar", PNV_PSIHB_BASE(chip),
+ &error_fatal);
+ object_property_set_link(OBJECT(&chip8->psi), ICS_PROP_XICS,
+ OBJECT(chip8->xics), &error_abort);
+ if (!qdev_realize(DEVICE(&chip8->psi), NULL, errp)) {
+ return;
+ }
+ pnv_xscom_add_subregion(chip, PNV_XSCOM_PSIHB_BASE,
+ &PNV_PSI(psi8)->xscom_regs);
+
+ /* Create LPC controller */
+ object_property_set_link(OBJECT(&chip8->lpc), "psi", OBJECT(&chip8->psi),
+ &error_abort);
+ qdev_realize(DEVICE(&chip8->lpc), NULL, &error_fatal);
+ pnv_xscom_add_subregion(chip, PNV_XSCOM_LPC_BASE, &chip8->lpc.xscom_regs);
+
+ chip->fw_mr = &chip8->lpc.isa_fw;
+ chip->dt_isa_nodename = g_strdup_printf("/xscom@%" PRIx64 "/isa@%x",
+ (uint64_t) PNV_XSCOM_BASE(chip),
+ PNV_XSCOM_LPC_BASE);
+
+ /*
+ * Interrupt Management Area. This is the memory region holding
+ * all the Interrupt Control Presenter (ICP) registers
+ */
+ pnv_chip_icp_realize(chip8, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ /* Create the simplified OCC model */
+ object_property_set_link(OBJECT(&chip8->occ), "psi", OBJECT(&chip8->psi),
+ &error_abort);
+ if (!qdev_realize(DEVICE(&chip8->occ), NULL, errp)) {
+ return;
+ }
+ pnv_xscom_add_subregion(chip, PNV_XSCOM_OCC_BASE, &chip8->occ.xscom_regs);
+
+ /* OCC SRAM model */
+ memory_region_add_subregion(get_system_memory(), PNV_OCC_SENSOR_BASE(chip),
+ &chip8->occ.sram_regs);
+
+ /* HOMER */
+ object_property_set_link(OBJECT(&chip8->homer), "chip", OBJECT(chip),
+ &error_abort);
+ if (!qdev_realize(DEVICE(&chip8->homer), NULL, errp)) {
+ return;
+ }
+ /* Homer Xscom region */
+ pnv_xscom_add_subregion(chip, PNV_XSCOM_PBA_BASE, &chip8->homer.pba_regs);
+
+ /* Homer mmio region */
+ memory_region_add_subregion(get_system_memory(), PNV_HOMER_BASE(chip),
+ &chip8->homer.regs);
+
+ /* PHB3 controllers */
+ for (i = 0; i < chip->num_phbs; i++) {
+ PnvPHB3 *phb = &chip8->phbs[i];
+ PnvPBCQState *pbcq = &phb->pbcq;
+
+ object_property_set_int(OBJECT(phb), "index", i, &error_fatal);
+ object_property_set_int(OBJECT(phb), "chip-id", chip->chip_id,
+ &error_fatal);
+ if (!sysbus_realize(SYS_BUS_DEVICE(phb), errp)) {
+ return;
+ }
+
+ /* Populate the XSCOM address space. */
+ pnv_xscom_add_subregion(chip,
+ PNV_XSCOM_PBCQ_NEST_BASE + 0x400 * phb->phb_id,
+ &pbcq->xscom_nest_regs);
+ pnv_xscom_add_subregion(chip,
+ PNV_XSCOM_PBCQ_PCI_BASE + 0x400 * phb->phb_id,
+ &pbcq->xscom_pci_regs);
+ pnv_xscom_add_subregion(chip,
+ PNV_XSCOM_PBCQ_SPCI_BASE + 0x040 * phb->phb_id,
+ &pbcq->xscom_spci_regs);
+ }
+}
+
+static uint32_t pnv_chip_power8_xscom_pcba(PnvChip *chip, uint64_t addr)
+{
+ addr &= (PNV_XSCOM_SIZE - 1);
+ return ((addr >> 4) & ~0xfull) | ((addr >> 3) & 0xf);
+}
+
+static void pnv_chip_power8e_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ PnvChipClass *k = PNV_CHIP_CLASS(klass);
+
+ k->chip_cfam_id = 0x221ef04980000000ull; /* P8 Murano DD2.1 */
+ k->cores_mask = POWER8E_CORE_MASK;
+ k->num_phbs = 3;
+ k->core_pir = pnv_chip_core_pir_p8;
+ k->intc_create = pnv_chip_power8_intc_create;
+ k->intc_reset = pnv_chip_power8_intc_reset;
+ k->intc_destroy = pnv_chip_power8_intc_destroy;
+ k->intc_print_info = pnv_chip_power8_intc_print_info;
+ k->isa_create = pnv_chip_power8_isa_create;
+ k->dt_populate = pnv_chip_power8_dt_populate;
+ k->pic_print_info = pnv_chip_power8_pic_print_info;
+ k->xscom_core_base = pnv_chip_power8_xscom_core_base;
+ k->xscom_pcba = pnv_chip_power8_xscom_pcba;
+ dc->desc = "PowerNV Chip POWER8E";
+
+ device_class_set_parent_realize(dc, pnv_chip_power8_realize,
+ &k->parent_realize);
+}
+
+static void pnv_chip_power8_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ PnvChipClass *k = PNV_CHIP_CLASS(klass);
+
+ k->chip_cfam_id = 0x220ea04980000000ull; /* P8 Venice DD2.0 */
+ k->cores_mask = POWER8_CORE_MASK;
+ k->num_phbs = 3;
+ k->core_pir = pnv_chip_core_pir_p8;
+ k->intc_create = pnv_chip_power8_intc_create;
+ k->intc_reset = pnv_chip_power8_intc_reset;
+ k->intc_destroy = pnv_chip_power8_intc_destroy;
+ k->intc_print_info = pnv_chip_power8_intc_print_info;
+ k->isa_create = pnv_chip_power8_isa_create;
+ k->dt_populate = pnv_chip_power8_dt_populate;
+ k->pic_print_info = pnv_chip_power8_pic_print_info;
+ k->xscom_core_base = pnv_chip_power8_xscom_core_base;
+ k->xscom_pcba = pnv_chip_power8_xscom_pcba;
+ dc->desc = "PowerNV Chip POWER8";
+
+ device_class_set_parent_realize(dc, pnv_chip_power8_realize,
+ &k->parent_realize);
+}
+
+static void pnv_chip_power8nvl_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ PnvChipClass *k = PNV_CHIP_CLASS(klass);
+
+ k->chip_cfam_id = 0x120d304980000000ull; /* P8 Naples DD1.0 */
+ k->cores_mask = POWER8_CORE_MASK;
+ k->num_phbs = 3;
+ k->core_pir = pnv_chip_core_pir_p8;
+ k->intc_create = pnv_chip_power8_intc_create;
+ k->intc_reset = pnv_chip_power8_intc_reset;
+ k->intc_destroy = pnv_chip_power8_intc_destroy;
+ k->intc_print_info = pnv_chip_power8_intc_print_info;
+ k->isa_create = pnv_chip_power8nvl_isa_create;
+ k->dt_populate = pnv_chip_power8_dt_populate;
+ k->pic_print_info = pnv_chip_power8_pic_print_info;
+ k->xscom_core_base = pnv_chip_power8_xscom_core_base;
+ k->xscom_pcba = pnv_chip_power8_xscom_pcba;
+ dc->desc = "PowerNV Chip POWER8NVL";
+
+ device_class_set_parent_realize(dc, pnv_chip_power8_realize,
+ &k->parent_realize);
+}
+
+static void pnv_chip_power9_instance_init(Object *obj)
+{
+ PnvChip *chip = PNV_CHIP(obj);
+ Pnv9Chip *chip9 = PNV9_CHIP(obj);
+ PnvChipClass *pcc = PNV_CHIP_GET_CLASS(obj);
+ int i;
+
+ object_initialize_child(obj, "xive", &chip9->xive, TYPE_PNV_XIVE);
+ object_property_add_alias(obj, "xive-fabric", OBJECT(&chip9->xive),
+ "xive-fabric");
+
+ object_initialize_child(obj, "psi", &chip9->psi, TYPE_PNV9_PSI);
+
+ object_initialize_child(obj, "lpc", &chip9->lpc, TYPE_PNV9_LPC);
+
+ object_initialize_child(obj, "occ", &chip9->occ, TYPE_PNV9_OCC);
+
+ object_initialize_child(obj, "homer", &chip9->homer, TYPE_PNV9_HOMER);
+
+ for (i = 0; i < PNV9_CHIP_MAX_PEC; i++) {
+ object_initialize_child(obj, "pec[*]", &chip9->pecs[i],
+ TYPE_PNV_PHB4_PEC);
+ }
+
+ /*
+ * Number of PHBs is the chip default
+ */
+ chip->num_phbs = pcc->num_phbs;
+}
+
+static void pnv_chip_quad_realize(Pnv9Chip *chip9, Error **errp)
+{
+ PnvChip *chip = PNV_CHIP(chip9);
+ int i;
+
+ chip9->nr_quads = DIV_ROUND_UP(chip->nr_cores, 4);
+ chip9->quads = g_new0(PnvQuad, chip9->nr_quads);
+
+ for (i = 0; i < chip9->nr_quads; i++) {
+ char eq_name[32];
+ PnvQuad *eq = &chip9->quads[i];
+ PnvCore *pnv_core = chip->cores[i * 4];
+ int core_id = CPU_CORE(pnv_core)->core_id;
+
+ snprintf(eq_name, sizeof(eq_name), "eq[%d]", core_id);
+ object_initialize_child_with_props(OBJECT(chip), eq_name, eq,
+ sizeof(*eq), TYPE_PNV_QUAD,
+ &error_fatal, NULL);
+
+ object_property_set_int(OBJECT(eq), "quad-id", core_id, &error_fatal);
+ qdev_realize(DEVICE(eq), NULL, &error_fatal);
+
+ pnv_xscom_add_subregion(chip, PNV9_XSCOM_EQ_BASE(eq->quad_id),
+ &eq->xscom_regs);
+ }
+}
+
+static void pnv_chip_power9_phb_realize(PnvChip *chip, Error **errp)
+{
+ Pnv9Chip *chip9 = PNV9_CHIP(chip);
+ int i, j;
+ int phb_id = 0;
+
+ for (i = 0; i < PNV9_CHIP_MAX_PEC; i++) {
+ PnvPhb4PecState *pec = &chip9->pecs[i];
+ PnvPhb4PecClass *pecc = PNV_PHB4_PEC_GET_CLASS(pec);
+ uint32_t pec_nest_base;
+ uint32_t pec_pci_base;
+
+ object_property_set_int(OBJECT(pec), "index", i, &error_fatal);
+ /*
+ * PEC0 -> 1 stack
+ * PEC1 -> 2 stacks
+ * PEC2 -> 3 stacks
+ */
+ object_property_set_int(OBJECT(pec), "num-stacks", i + 1,
+ &error_fatal);
+ object_property_set_int(OBJECT(pec), "chip-id", chip->chip_id,
+ &error_fatal);
+ object_property_set_link(OBJECT(pec), "system-memory",
+ OBJECT(get_system_memory()), &error_abort);
+ if (!qdev_realize(DEVICE(pec), NULL, errp)) {
+ return;
+ }
+
+ pec_nest_base = pecc->xscom_nest_base(pec);
+ pec_pci_base = pecc->xscom_pci_base(pec);
+
+ pnv_xscom_add_subregion(chip, pec_nest_base, &pec->nest_regs_mr);
+ pnv_xscom_add_subregion(chip, pec_pci_base, &pec->pci_regs_mr);
+
+ for (j = 0; j < pec->num_stacks && phb_id < chip->num_phbs;
+ j++, phb_id++) {
+ PnvPhb4PecStack *stack = &pec->stacks[j];
+ Object *obj = OBJECT(&stack->phb);
+
+ object_property_set_int(obj, "index", phb_id, &error_fatal);
+ object_property_set_int(obj, "chip-id", chip->chip_id,
+ &error_fatal);
+ object_property_set_int(obj, "version", PNV_PHB4_VERSION,
+ &error_fatal);
+ object_property_set_int(obj, "device-id", PNV_PHB4_DEVICE_ID,
+ &error_fatal);
+ object_property_set_link(obj, "stack", OBJECT(stack),
+ &error_abort);
+ if (!sysbus_realize(SYS_BUS_DEVICE(obj), errp)) {
+ return;
+ }
+
+ /* Populate the XSCOM address space. */
+ pnv_xscom_add_subregion(chip,
+ pec_nest_base + 0x40 * (stack->stack_no + 1),
+ &stack->nest_regs_mr);
+ pnv_xscom_add_subregion(chip,
+ pec_pci_base + 0x40 * (stack->stack_no + 1),
+ &stack->pci_regs_mr);
+ pnv_xscom_add_subregion(chip,
+ pec_pci_base + PNV9_XSCOM_PEC_PCI_STK0 +
+ 0x40 * stack->stack_no,
+ &stack->phb_regs_mr);
+ }
+ }
+}
+
+static void pnv_chip_power9_realize(DeviceState *dev, Error **errp)
+{
+ PnvChipClass *pcc = PNV_CHIP_GET_CLASS(dev);
+ Pnv9Chip *chip9 = PNV9_CHIP(dev);
+ PnvChip *chip = PNV_CHIP(dev);
+ Pnv9Psi *psi9 = &chip9->psi;
+ Error *local_err = NULL;
+
+ /* XSCOM bridge is first */
+ pnv_xscom_realize(chip, PNV9_XSCOM_SIZE, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+ sysbus_mmio_map(SYS_BUS_DEVICE(chip), 0, PNV9_XSCOM_BASE(chip));
+
+ pcc->parent_realize(dev, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ pnv_chip_quad_realize(chip9, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ /* XIVE interrupt controller (POWER9) */
+ object_property_set_int(OBJECT(&chip9->xive), "ic-bar",
+ PNV9_XIVE_IC_BASE(chip), &error_fatal);
+ object_property_set_int(OBJECT(&chip9->xive), "vc-bar",
+ PNV9_XIVE_VC_BASE(chip), &error_fatal);
+ object_property_set_int(OBJECT(&chip9->xive), "pc-bar",
+ PNV9_XIVE_PC_BASE(chip), &error_fatal);
+ object_property_set_int(OBJECT(&chip9->xive), "tm-bar",
+ PNV9_XIVE_TM_BASE(chip), &error_fatal);
+ object_property_set_link(OBJECT(&chip9->xive), "chip", OBJECT(chip),
+ &error_abort);
+ if (!sysbus_realize(SYS_BUS_DEVICE(&chip9->xive), errp)) {
+ return;
+ }
+ pnv_xscom_add_subregion(chip, PNV9_XSCOM_XIVE_BASE,
+ &chip9->xive.xscom_regs);
+
+ /* Processor Service Interface (PSI) Host Bridge */
+ object_property_set_int(OBJECT(&chip9->psi), "bar", PNV9_PSIHB_BASE(chip),
+ &error_fatal);
+ if (!qdev_realize(DEVICE(&chip9->psi), NULL, errp)) {
+ return;
+ }
+ pnv_xscom_add_subregion(chip, PNV9_XSCOM_PSIHB_BASE,
+ &PNV_PSI(psi9)->xscom_regs);
+
+ /* LPC */
+ object_property_set_link(OBJECT(&chip9->lpc), "psi", OBJECT(&chip9->psi),
+ &error_abort);
+ if (!qdev_realize(DEVICE(&chip9->lpc), NULL, errp)) {
+ return;
+ }
+ memory_region_add_subregion(get_system_memory(), PNV9_LPCM_BASE(chip),
+ &chip9->lpc.xscom_regs);
+
+ chip->fw_mr = &chip9->lpc.isa_fw;
+ chip->dt_isa_nodename = g_strdup_printf("/lpcm-opb@%" PRIx64 "/lpc@0",
+ (uint64_t) PNV9_LPCM_BASE(chip));
+
+ /* Create the simplified OCC model */
+ object_property_set_link(OBJECT(&chip9->occ), "psi", OBJECT(&chip9->psi),
+ &error_abort);
+ if (!qdev_realize(DEVICE(&chip9->occ), NULL, errp)) {
+ return;
+ }
+ pnv_xscom_add_subregion(chip, PNV9_XSCOM_OCC_BASE, &chip9->occ.xscom_regs);
+
+ /* OCC SRAM model */
+ memory_region_add_subregion(get_system_memory(), PNV9_OCC_SENSOR_BASE(chip),
+ &chip9->occ.sram_regs);
+
+ /* HOMER */
+ object_property_set_link(OBJECT(&chip9->homer), "chip", OBJECT(chip),
+ &error_abort);
+ if (!qdev_realize(DEVICE(&chip9->homer), NULL, errp)) {
+ return;
+ }
+ /* Homer Xscom region */
+ pnv_xscom_add_subregion(chip, PNV9_XSCOM_PBA_BASE, &chip9->homer.pba_regs);
+
+ /* Homer mmio region */
+ memory_region_add_subregion(get_system_memory(), PNV9_HOMER_BASE(chip),
+ &chip9->homer.regs);
+
+ /* PHBs */
+ pnv_chip_power9_phb_realize(chip, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+}
+
+static uint32_t pnv_chip_power9_xscom_pcba(PnvChip *chip, uint64_t addr)
+{
+ addr &= (PNV9_XSCOM_SIZE - 1);
+ return addr >> 3;
+}
+
+static void pnv_chip_power9_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ PnvChipClass *k = PNV_CHIP_CLASS(klass);
+
+ k->chip_cfam_id = 0x220d104900008000ull; /* P9 Nimbus DD2.0 */
+ k->cores_mask = POWER9_CORE_MASK;
+ k->core_pir = pnv_chip_core_pir_p9;
+ k->intc_create = pnv_chip_power9_intc_create;
+ k->intc_reset = pnv_chip_power9_intc_reset;
+ k->intc_destroy = pnv_chip_power9_intc_destroy;
+ k->intc_print_info = pnv_chip_power9_intc_print_info;
+ k->isa_create = pnv_chip_power9_isa_create;
+ k->dt_populate = pnv_chip_power9_dt_populate;
+ k->pic_print_info = pnv_chip_power9_pic_print_info;
+ k->xscom_core_base = pnv_chip_power9_xscom_core_base;
+ k->xscom_pcba = pnv_chip_power9_xscom_pcba;
+ dc->desc = "PowerNV Chip POWER9";
+ k->num_phbs = 6;
+
+ device_class_set_parent_realize(dc, pnv_chip_power9_realize,
+ &k->parent_realize);
+}
+
+static void pnv_chip_power10_instance_init(Object *obj)
+{
+ Pnv10Chip *chip10 = PNV10_CHIP(obj);
+
+ object_initialize_child(obj, "psi", &chip10->psi, TYPE_PNV10_PSI);
+ object_initialize_child(obj, "lpc", &chip10->lpc, TYPE_PNV10_LPC);
+}
+
+static void pnv_chip_power10_realize(DeviceState *dev, Error **errp)
+{
+ PnvChipClass *pcc = PNV_CHIP_GET_CLASS(dev);
+ PnvChip *chip = PNV_CHIP(dev);
+ Pnv10Chip *chip10 = PNV10_CHIP(dev);
+ Error *local_err = NULL;
+
+ /* XSCOM bridge is first */
+ pnv_xscom_realize(chip, PNV10_XSCOM_SIZE, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+ sysbus_mmio_map(SYS_BUS_DEVICE(chip), 0, PNV10_XSCOM_BASE(chip));
+
+ pcc->parent_realize(dev, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ /* Processor Service Interface (PSI) Host Bridge */
+ object_property_set_int(OBJECT(&chip10->psi), "bar",
+ PNV10_PSIHB_BASE(chip), &error_fatal);
+ if (!qdev_realize(DEVICE(&chip10->psi), NULL, errp)) {
+ return;
+ }
+ pnv_xscom_add_subregion(chip, PNV10_XSCOM_PSIHB_BASE,
+ &PNV_PSI(&chip10->psi)->xscom_regs);
+
+ /* LPC */
+ object_property_set_link(OBJECT(&chip10->lpc), "psi",
+ OBJECT(&chip10->psi), &error_abort);
+ if (!qdev_realize(DEVICE(&chip10->lpc), NULL, errp)) {
+ return;
+ }
+ memory_region_add_subregion(get_system_memory(), PNV10_LPCM_BASE(chip),
+ &chip10->lpc.xscom_regs);
+
+ chip->fw_mr = &chip10->lpc.isa_fw;
+ chip->dt_isa_nodename = g_strdup_printf("/lpcm-opb@%" PRIx64 "/lpc@0",
+ (uint64_t) PNV10_LPCM_BASE(chip));
+}
+
+static uint32_t pnv_chip_power10_xscom_pcba(PnvChip *chip, uint64_t addr)
+{
+ addr &= (PNV10_XSCOM_SIZE - 1);
+ return addr >> 3;
+}
+
+static void pnv_chip_power10_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ PnvChipClass *k = PNV_CHIP_CLASS(klass);
+
+ k->chip_cfam_id = 0x120da04900008000ull; /* P10 DD1.0 (with NX) */
+ k->cores_mask = POWER10_CORE_MASK;
+ k->core_pir = pnv_chip_core_pir_p10;
+ k->intc_create = pnv_chip_power10_intc_create;
+ k->intc_reset = pnv_chip_power10_intc_reset;
+ k->intc_destroy = pnv_chip_power10_intc_destroy;
+ k->intc_print_info = pnv_chip_power10_intc_print_info;
+ k->isa_create = pnv_chip_power10_isa_create;
+ k->dt_populate = pnv_chip_power10_dt_populate;
+ k->pic_print_info = pnv_chip_power10_pic_print_info;
+ k->xscom_core_base = pnv_chip_power10_xscom_core_base;
+ k->xscom_pcba = pnv_chip_power10_xscom_pcba;
+ dc->desc = "PowerNV Chip POWER10";
+
+ device_class_set_parent_realize(dc, pnv_chip_power10_realize,
+ &k->parent_realize);
+}
+
+static void pnv_chip_core_sanitize(PnvChip *chip, Error **errp)
+{
+ PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
+ int cores_max;
+
+ /*
+ * No custom mask for this chip, let's use the default one from *
+ * the chip class
+ */
+ if (!chip->cores_mask) {
+ chip->cores_mask = pcc->cores_mask;
+ }
+
+ /* filter alien core ids ! some are reserved */
+ if ((chip->cores_mask & pcc->cores_mask) != chip->cores_mask) {
+ error_setg(errp, "warning: invalid core mask for chip Ox%"PRIx64" !",
+ chip->cores_mask);
+ return;
+ }
+ chip->cores_mask &= pcc->cores_mask;
+
+ /* now that we have a sane layout, let check the number of cores */
+ cores_max = ctpop64(chip->cores_mask);
+ if (chip->nr_cores > cores_max) {
+ error_setg(errp, "warning: too many cores for chip ! Limit is %d",
+ cores_max);
+ return;
+ }
+}
+
+static void pnv_chip_core_realize(PnvChip *chip, Error **errp)
+{
+ Error *error = NULL;
+ PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
+ const char *typename = pnv_chip_core_typename(chip);
+ int i, core_hwid;
+ PnvMachineState *pnv = PNV_MACHINE(qdev_get_machine());
+
+ if (!object_class_by_name(typename)) {
+ error_setg(errp, "Unable to find PowerNV CPU Core '%s'", typename);
+ return;
+ }
+
+ /* Cores */
+ pnv_chip_core_sanitize(chip, &error);
+ if (error) {
+ error_propagate(errp, error);
+ return;
+ }
+
+ chip->cores = g_new0(PnvCore *, chip->nr_cores);
+
+ for (i = 0, core_hwid = 0; (core_hwid < sizeof(chip->cores_mask) * 8)
+ && (i < chip->nr_cores); core_hwid++) {
+ char core_name[32];
+ PnvCore *pnv_core;
+ uint64_t xscom_core_base;
+
+ if (!(chip->cores_mask & (1ull << core_hwid))) {
+ continue;
+ }
+
+ pnv_core = PNV_CORE(object_new(typename));
+
+ snprintf(core_name, sizeof(core_name), "core[%d]", core_hwid);
+ object_property_add_child(OBJECT(chip), core_name, OBJECT(pnv_core));
+ chip->cores[i] = pnv_core;
+ object_property_set_int(OBJECT(pnv_core), "nr-threads",
+ chip->nr_threads, &error_fatal);
+ object_property_set_int(OBJECT(pnv_core), CPU_CORE_PROP_CORE_ID,
+ core_hwid, &error_fatal);
+ object_property_set_int(OBJECT(pnv_core), "pir",
+ pcc->core_pir(chip, core_hwid), &error_fatal);
+ object_property_set_int(OBJECT(pnv_core), "hrmor", pnv->fw_load_addr,
+ &error_fatal);
+ object_property_set_link(OBJECT(pnv_core), "chip", OBJECT(chip),
+ &error_abort);
+ qdev_realize(DEVICE(pnv_core), NULL, &error_fatal);
+
+ /* Each core has an XSCOM MMIO region */
+ xscom_core_base = pcc->xscom_core_base(chip, core_hwid);
+
+ pnv_xscom_add_subregion(chip, xscom_core_base,
+ &pnv_core->xscom_regs);
+ i++;
+ }
+}
+
+static void pnv_chip_realize(DeviceState *dev, Error **errp)
+{
+ PnvChip *chip = PNV_CHIP(dev);
+ Error *error = NULL;
+
+ /* Cores */
+ pnv_chip_core_realize(chip, &error);
+ if (error) {
+ error_propagate(errp, error);
+ return;
+ }
+}
+
+static Property pnv_chip_properties[] = {
+ DEFINE_PROP_UINT32("chip-id", PnvChip, chip_id, 0),
+ DEFINE_PROP_UINT64("ram-start", PnvChip, ram_start, 0),
+ DEFINE_PROP_UINT64("ram-size", PnvChip, ram_size, 0),
+ DEFINE_PROP_UINT32("nr-cores", PnvChip, nr_cores, 1),
+ DEFINE_PROP_UINT64("cores-mask", PnvChip, cores_mask, 0x0),
+ DEFINE_PROP_UINT32("nr-threads", PnvChip, nr_threads, 1),
+ DEFINE_PROP_UINT32("num-phbs", PnvChip, num_phbs, 0),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void pnv_chip_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ set_bit(DEVICE_CATEGORY_CPU, dc->categories);
+ dc->realize = pnv_chip_realize;
+ device_class_set_props(dc, pnv_chip_properties);
+ dc->desc = "PowerNV Chip";
+}
+
+PowerPCCPU *pnv_chip_find_cpu(PnvChip *chip, uint32_t pir)
+{
+ int i, j;
+
+ for (i = 0; i < chip->nr_cores; i++) {
+ PnvCore *pc = chip->cores[i];
+ CPUCore *cc = CPU_CORE(pc);
+
+ for (j = 0; j < cc->nr_threads; j++) {
+ if (ppc_cpu_pir(pc->threads[j]) == pir) {
+ return pc->threads[j];
+ }
+ }
+ }
+ return NULL;
+}
+
+static ICSState *pnv_ics_get(XICSFabric *xi, int irq)
+{
+ PnvMachineState *pnv = PNV_MACHINE(xi);
+ int i, j;
+
+ for (i = 0; i < pnv->num_chips; i++) {
+ PnvChip *chip = pnv->chips[i];
+ Pnv8Chip *chip8 = PNV8_CHIP(pnv->chips[i]);
+
+ if (ics_valid_irq(&chip8->psi.ics, irq)) {
+ return &chip8->psi.ics;
+ }
+ for (j = 0; j < chip->num_phbs; j++) {
+ if (ics_valid_irq(&chip8->phbs[j].lsis, irq)) {
+ return &chip8->phbs[j].lsis;
+ }
+ if (ics_valid_irq(ICS(&chip8->phbs[j].msis), irq)) {
+ return ICS(&chip8->phbs[j].msis);
+ }
+ }
+ }
+ return NULL;
+}
+
+static void pnv_ics_resend(XICSFabric *xi)
+{
+ PnvMachineState *pnv = PNV_MACHINE(xi);
+ int i, j;
+
+ for (i = 0; i < pnv->num_chips; i++) {
+ PnvChip *chip = pnv->chips[i];
+ Pnv8Chip *chip8 = PNV8_CHIP(pnv->chips[i]);
+
+ ics_resend(&chip8->psi.ics);
+ for (j = 0; j < chip->num_phbs; j++) {
+ ics_resend(&chip8->phbs[j].lsis);
+ ics_resend(ICS(&chip8->phbs[j].msis));
+ }
+ }
+}
+
+static ICPState *pnv_icp_get(XICSFabric *xi, int pir)
+{
+ PowerPCCPU *cpu = ppc_get_vcpu_by_pir(pir);
+
+ return cpu ? ICP(pnv_cpu_state(cpu)->intc) : NULL;
+}
+
+static void pnv_pic_print_info(InterruptStatsProvider *obj,
+ Monitor *mon)
+{
+ PnvMachineState *pnv = PNV_MACHINE(obj);
+ int i;
+ CPUState *cs;
+
+ CPU_FOREACH(cs) {
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
+
+ /* XXX: loop on each chip/core/thread instead of CPU_FOREACH() */
+ PNV_CHIP_GET_CLASS(pnv->chips[0])->intc_print_info(pnv->chips[0], cpu,
+ mon);
+ }
+
+ for (i = 0; i < pnv->num_chips; i++) {
+ PNV_CHIP_GET_CLASS(pnv->chips[i])->pic_print_info(pnv->chips[i], mon);
+ }
+}
+
+static int pnv_match_nvt(XiveFabric *xfb, uint8_t format,
+ uint8_t nvt_blk, uint32_t nvt_idx,
+ bool cam_ignore, uint8_t priority,
+ uint32_t logic_serv,
+ XiveTCTXMatch *match)
+{
+ PnvMachineState *pnv = PNV_MACHINE(xfb);
+ int total_count = 0;
+ int i;
+
+ for (i = 0; i < pnv->num_chips; i++) {
+ Pnv9Chip *chip9 = PNV9_CHIP(pnv->chips[i]);
+ XivePresenter *xptr = XIVE_PRESENTER(&chip9->xive);
+ XivePresenterClass *xpc = XIVE_PRESENTER_GET_CLASS(xptr);
+ int count;
+
+ count = xpc->match_nvt(xptr, format, nvt_blk, nvt_idx, cam_ignore,
+ priority, logic_serv, match);
+
+ if (count < 0) {
+ return count;
+ }
+
+ total_count += count;
+ }
+
+ return total_count;
+}
+
+static void pnv_machine_power8_class_init(ObjectClass *oc, void *data)
+{
+ MachineClass *mc = MACHINE_CLASS(oc);
+ XICSFabricClass *xic = XICS_FABRIC_CLASS(oc);
+ PnvMachineClass *pmc = PNV_MACHINE_CLASS(oc);
+ static const char compat[] = "qemu,powernv8\0qemu,powernv\0ibm,powernv";
+
+ mc->desc = "IBM PowerNV (Non-Virtualized) POWER8";
+ mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power8_v2.0");
+
+ xic->icp_get = pnv_icp_get;
+ xic->ics_get = pnv_ics_get;
+ xic->ics_resend = pnv_ics_resend;
+
+ pmc->compat = compat;
+ pmc->compat_size = sizeof(compat);
+}
+
+static void pnv_machine_power9_class_init(ObjectClass *oc, void *data)
+{
+ MachineClass *mc = MACHINE_CLASS(oc);
+ XiveFabricClass *xfc = XIVE_FABRIC_CLASS(oc);
+ PnvMachineClass *pmc = PNV_MACHINE_CLASS(oc);
+ static const char compat[] = "qemu,powernv9\0ibm,powernv";
+
+ mc->desc = "IBM PowerNV (Non-Virtualized) POWER9";
+ mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power9_v2.0");
+ xfc->match_nvt = pnv_match_nvt;
+
+ mc->alias = "powernv";
+
+ pmc->compat = compat;
+ pmc->compat_size = sizeof(compat);
+ pmc->dt_power_mgt = pnv_dt_power_mgt;
+}
+
+static void pnv_machine_power10_class_init(ObjectClass *oc, void *data)
+{
+ MachineClass *mc = MACHINE_CLASS(oc);
+ PnvMachineClass *pmc = PNV_MACHINE_CLASS(oc);
+ static const char compat[] = "qemu,powernv10\0ibm,powernv";
+
+ mc->desc = "IBM PowerNV (Non-Virtualized) POWER10";
+ mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power10_v2.0");
+
+ pmc->compat = compat;
+ pmc->compat_size = sizeof(compat);
+ pmc->dt_power_mgt = pnv_dt_power_mgt;
+}
+
+static bool pnv_machine_get_hb(Object *obj, Error **errp)
+{
+ PnvMachineState *pnv = PNV_MACHINE(obj);
+
+ return !!pnv->fw_load_addr;
+}
+
+static void pnv_machine_set_hb(Object *obj, bool value, Error **errp)
+{
+ PnvMachineState *pnv = PNV_MACHINE(obj);
+
+ if (value) {
+ pnv->fw_load_addr = 0x8000000;
+ }
+}
+
+static void pnv_cpu_do_nmi_on_cpu(CPUState *cs, run_on_cpu_data arg)
+{
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
+ CPUPPCState *env = &cpu->env;
+
+ cpu_synchronize_state(cs);
+ ppc_cpu_do_system_reset(cs);
+ if (env->spr[SPR_SRR1] & SRR1_WAKESTATE) {
+ /*
+ * Power-save wakeups, as indicated by non-zero SRR1[46:47] put the
+ * wakeup reason in SRR1[42:45], system reset is indicated with 0b0100
+ * (PPC_BIT(43)).
+ */
+ if (!(env->spr[SPR_SRR1] & SRR1_WAKERESET)) {
+ warn_report("ppc_cpu_do_system_reset does not set system reset wakeup reason");
+ env->spr[SPR_SRR1] |= SRR1_WAKERESET;
+ }
+ } else {
+ /*
+ * For non-powersave system resets, SRR1[42:45] are defined to be
+ * implementation-dependent. The POWER9 User Manual specifies that
+ * an external (SCOM driven, which may come from a BMC nmi command or
+ * another CPU requesting a NMI IPI) system reset exception should be
+ * 0b0010 (PPC_BIT(44)).
+ */
+ env->spr[SPR_SRR1] |= SRR1_WAKESCOM;
+ }
+}
+
+static void pnv_nmi(NMIState *n, int cpu_index, Error **errp)
+{
+ CPUState *cs;
+
+ CPU_FOREACH(cs) {
+ async_run_on_cpu(cs, pnv_cpu_do_nmi_on_cpu, RUN_ON_CPU_NULL);
+ }
+}
+
+static void pnv_machine_class_init(ObjectClass *oc, void *data)
+{
+ MachineClass *mc = MACHINE_CLASS(oc);
+ InterruptStatsProviderClass *ispc = INTERRUPT_STATS_PROVIDER_CLASS(oc);
+ NMIClass *nc = NMI_CLASS(oc);
+
+ mc->desc = "IBM PowerNV (Non-Virtualized)";
+ mc->init = pnv_init;
+ mc->reset = pnv_reset;
+ mc->max_cpus = MAX_CPUS;
+ /* Pnv provides a AHCI device for storage */
+ mc->block_default_type = IF_IDE;
+ mc->no_parallel = 1;
+ mc->default_boot_order = NULL;
+ /*
+ * RAM defaults to less than 2048 for 32-bit hosts, and large
+ * enough to fit the maximum initrd size at it's load address
+ */
+ mc->default_ram_size = 1 * GiB;
+ mc->default_ram_id = "pnv.ram";
+ ispc->print_info = pnv_pic_print_info;
+ nc->nmi_monitor_handler = pnv_nmi;
+
+ object_class_property_add_bool(oc, "hb-mode",
+ pnv_machine_get_hb, pnv_machine_set_hb);
+ object_class_property_set_description(oc, "hb-mode",
+ "Use a hostboot like boot loader");
+}
+
+#define DEFINE_PNV8_CHIP_TYPE(type, class_initfn) \
+ { \
+ .name = type, \
+ .class_init = class_initfn, \
+ .parent = TYPE_PNV8_CHIP, \
+ }
+
+#define DEFINE_PNV9_CHIP_TYPE(type, class_initfn) \
+ { \
+ .name = type, \
+ .class_init = class_initfn, \
+ .parent = TYPE_PNV9_CHIP, \
+ }
+
+#define DEFINE_PNV10_CHIP_TYPE(type, class_initfn) \
+ { \
+ .name = type, \
+ .class_init = class_initfn, \
+ .parent = TYPE_PNV10_CHIP, \
+ }
+
+static const TypeInfo types[] = {
+ {
+ .name = MACHINE_TYPE_NAME("powernv10"),
+ .parent = TYPE_PNV_MACHINE,
+ .class_init = pnv_machine_power10_class_init,
+ },
+ {
+ .name = MACHINE_TYPE_NAME("powernv9"),
+ .parent = TYPE_PNV_MACHINE,
+ .class_init = pnv_machine_power9_class_init,
+ .interfaces = (InterfaceInfo[]) {
+ { TYPE_XIVE_FABRIC },
+ { },
+ },
+ },
+ {
+ .name = MACHINE_TYPE_NAME("powernv8"),
+ .parent = TYPE_PNV_MACHINE,
+ .class_init = pnv_machine_power8_class_init,
+ .interfaces = (InterfaceInfo[]) {
+ { TYPE_XICS_FABRIC },
+ { },
+ },
+ },
+ {
+ .name = TYPE_PNV_MACHINE,
+ .parent = TYPE_MACHINE,
+ .abstract = true,
+ .instance_size = sizeof(PnvMachineState),
+ .class_init = pnv_machine_class_init,
+ .class_size = sizeof(PnvMachineClass),
+ .interfaces = (InterfaceInfo[]) {
+ { TYPE_INTERRUPT_STATS_PROVIDER },
+ { TYPE_NMI },
+ { },
+ },
+ },
+ {
+ .name = TYPE_PNV_CHIP,
+ .parent = TYPE_SYS_BUS_DEVICE,
+ .class_init = pnv_chip_class_init,
+ .instance_size = sizeof(PnvChip),
+ .class_size = sizeof(PnvChipClass),
+ .abstract = true,
+ },
+
+ /*
+ * P10 chip and variants
+ */
+ {
+ .name = TYPE_PNV10_CHIP,
+ .parent = TYPE_PNV_CHIP,
+ .instance_init = pnv_chip_power10_instance_init,
+ .instance_size = sizeof(Pnv10Chip),
+ },
+ DEFINE_PNV10_CHIP_TYPE(TYPE_PNV_CHIP_POWER10, pnv_chip_power10_class_init),
+
+ /*
+ * P9 chip and variants
+ */
+ {
+ .name = TYPE_PNV9_CHIP,
+ .parent = TYPE_PNV_CHIP,
+ .instance_init = pnv_chip_power9_instance_init,
+ .instance_size = sizeof(Pnv9Chip),
+ },
+ DEFINE_PNV9_CHIP_TYPE(TYPE_PNV_CHIP_POWER9, pnv_chip_power9_class_init),
+
+ /*
+ * P8 chip and variants
+ */
+ {
+ .name = TYPE_PNV8_CHIP,
+ .parent = TYPE_PNV_CHIP,
+ .instance_init = pnv_chip_power8_instance_init,
+ .instance_size = sizeof(Pnv8Chip),
+ },
+ DEFINE_PNV8_CHIP_TYPE(TYPE_PNV_CHIP_POWER8, pnv_chip_power8_class_init),
+ DEFINE_PNV8_CHIP_TYPE(TYPE_PNV_CHIP_POWER8E, pnv_chip_power8e_class_init),
+ DEFINE_PNV8_CHIP_TYPE(TYPE_PNV_CHIP_POWER8NVL,
+ pnv_chip_power8nvl_class_init),
+};
+
+DEFINE_TYPES(types)
diff --git a/hw/ppc/pnv_bmc.c b/hw/ppc/pnv_bmc.c
new file mode 100644
index 000000000..75a22ce50
--- /dev/null
+++ b/hw/ppc/pnv_bmc.c
@@ -0,0 +1,313 @@
+/*
+ * QEMU PowerNV, BMC related functions
+ *
+ * Copyright (c) 2016-2017, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qapi/error.h"
+#include "target/ppc/cpu.h"
+#include "qemu/log.h"
+#include "hw/ipmi/ipmi.h"
+#include "hw/ppc/fdt.h"
+
+#include "hw/ppc/pnv.h"
+
+#include <libfdt.h>
+
+/* TODO: include definition in ipmi.h */
+#define IPMI_SDR_FULL_TYPE 1
+
+/*
+ * OEM SEL Event data packet sent by BMC in response of a Read Event
+ * Message Buffer command
+ */
+typedef struct OemSel {
+ /* SEL header */
+ uint8_t id[2];
+ uint8_t type;
+ uint8_t timestamp[4];
+ uint8_t manuf_id[3];
+
+ /* OEM SEL data (6 bytes) follows */
+ uint8_t netfun;
+ uint8_t cmd;
+ uint8_t data[4];
+} OemSel;
+
+#define SOFT_OFF 0x00
+#define SOFT_REBOOT 0x01
+
+static bool pnv_bmc_is_simulator(IPMIBmc *bmc)
+{
+ return object_dynamic_cast(OBJECT(bmc), TYPE_IPMI_BMC_SIMULATOR);
+}
+
+static void pnv_gen_oem_sel(IPMIBmc *bmc, uint8_t reboot)
+{
+ /* IPMI SEL Event are 16 bytes long */
+ OemSel sel = {
+ .id = { 0x55 , 0x55 },
+ .type = 0xC0, /* OEM */
+ .manuf_id = { 0x0, 0x0, 0x0 },
+ .timestamp = { 0x0, 0x0, 0x0, 0x0 },
+ .netfun = 0x3A, /* IBM */
+ .cmd = 0x04, /* AMI OEM SEL Power Notification */
+ .data = { reboot, 0xFF, 0xFF, 0xFF },
+ };
+
+ ipmi_bmc_gen_event(bmc, (uint8_t *) &sel, 0 /* do not log the event */);
+}
+
+void pnv_bmc_powerdown(IPMIBmc *bmc)
+{
+ pnv_gen_oem_sel(bmc, SOFT_OFF);
+}
+
+void pnv_dt_bmc_sensors(IPMIBmc *bmc, void *fdt)
+{
+ int offset;
+ int i;
+ const struct ipmi_sdr_compact *sdr;
+ uint16_t nextrec;
+
+ if (!pnv_bmc_is_simulator(bmc)) {
+ return;
+ }
+
+ offset = fdt_add_subnode(fdt, 0, "bmc");
+ _FDT(offset);
+
+ _FDT((fdt_setprop_string(fdt, offset, "name", "bmc")));
+ offset = fdt_add_subnode(fdt, offset, "sensors");
+ _FDT(offset);
+
+ _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 0x1)));
+ _FDT((fdt_setprop_cell(fdt, offset, "#size-cells", 0x0)));
+
+ for (i = 0; !ipmi_bmc_sdr_find(bmc, i, &sdr, &nextrec); i++) {
+ int off;
+ char *name;
+
+ if (sdr->header.rec_type != IPMI_SDR_COMPACT_TYPE &&
+ sdr->header.rec_type != IPMI_SDR_FULL_TYPE) {
+ continue;
+ }
+
+ name = g_strdup_printf("sensor@%x", sdr->sensor_owner_number);
+ off = fdt_add_subnode(fdt, offset, name);
+ _FDT(off);
+ g_free(name);
+
+ _FDT((fdt_setprop_cell(fdt, off, "reg", sdr->sensor_owner_number)));
+ _FDT((fdt_setprop_string(fdt, off, "name", "sensor")));
+ _FDT((fdt_setprop_string(fdt, off, "compatible", "ibm,ipmi-sensor")));
+ _FDT((fdt_setprop_cell(fdt, off, "ipmi-sensor-reading-type",
+ sdr->reading_type)));
+ _FDT((fdt_setprop_cell(fdt, off, "ipmi-entity-id",
+ sdr->entity_id)));
+ _FDT((fdt_setprop_cell(fdt, off, "ipmi-entity-instance",
+ sdr->entity_instance)));
+ _FDT((fdt_setprop_cell(fdt, off, "ipmi-sensor-type",
+ sdr->sensor_type)));
+ }
+}
+
+/*
+ * HIOMAP protocol handler
+ */
+#define HIOMAP_C_RESET 1
+#define HIOMAP_C_GET_INFO 2
+#define HIOMAP_C_GET_FLASH_INFO 3
+#define HIOMAP_C_CREATE_READ_WINDOW 4
+#define HIOMAP_C_CLOSE_WINDOW 5
+#define HIOMAP_C_CREATE_WRITE_WINDOW 6
+#define HIOMAP_C_MARK_DIRTY 7
+#define HIOMAP_C_FLUSH 8
+#define HIOMAP_C_ACK 9
+#define HIOMAP_C_ERASE 10
+#define HIOMAP_C_DEVICE_NAME 11
+#define HIOMAP_C_LOCK 12
+
+#define BLOCK_SHIFT 12 /* 4K */
+
+static uint16_t bytes_to_blocks(uint32_t bytes)
+{
+ return bytes >> BLOCK_SHIFT;
+}
+
+static uint32_t blocks_to_bytes(uint16_t blocks)
+{
+ return blocks << BLOCK_SHIFT;
+}
+
+static int hiomap_erase(PnvPnor *pnor, uint32_t offset, uint32_t size)
+{
+ MemTxResult result;
+ int i;
+
+ for (i = 0; i < size / 4; i++) {
+ result = memory_region_dispatch_write(&pnor->mmio, offset + i * 4,
+ 0xFFFFFFFF, MO_32,
+ MEMTXATTRS_UNSPECIFIED);
+ if (result != MEMTX_OK) {
+ return -1;
+ }
+ }
+ return 0;
+}
+
+static void hiomap_cmd(IPMIBmcSim *ibs, uint8_t *cmd, unsigned int cmd_len,
+ RspBuffer *rsp)
+{
+ PnvPnor *pnor = PNV_PNOR(object_property_get_link(OBJECT(ibs), "pnor",
+ &error_abort));
+ uint32_t pnor_size = pnor->size;
+ uint32_t pnor_addr = PNOR_SPI_OFFSET;
+ bool readonly = false;
+
+ rsp_buffer_push(rsp, cmd[2]);
+ rsp_buffer_push(rsp, cmd[3]);
+
+ switch (cmd[2]) {
+ case HIOMAP_C_MARK_DIRTY:
+ case HIOMAP_C_FLUSH:
+ case HIOMAP_C_ACK:
+ break;
+
+ case HIOMAP_C_ERASE:
+ if (hiomap_erase(pnor, blocks_to_bytes(cmd[5] << 8 | cmd[4]),
+ blocks_to_bytes(cmd[7] << 8 | cmd[6]))) {
+ rsp_buffer_set_error(rsp, IPMI_CC_UNSPECIFIED);
+ }
+ break;
+
+ case HIOMAP_C_GET_INFO:
+ rsp_buffer_push(rsp, 2); /* Version 2 */
+ rsp_buffer_push(rsp, BLOCK_SHIFT); /* block size */
+ rsp_buffer_push(rsp, 0); /* Timeout */
+ rsp_buffer_push(rsp, 0); /* Timeout */
+ break;
+
+ case HIOMAP_C_GET_FLASH_INFO:
+ rsp_buffer_push(rsp, bytes_to_blocks(pnor_size) & 0xFF);
+ rsp_buffer_push(rsp, bytes_to_blocks(pnor_size) >> 8);
+ rsp_buffer_push(rsp, 0x01); /* erase size */
+ rsp_buffer_push(rsp, 0x00); /* erase size */
+ break;
+
+ case HIOMAP_C_CREATE_READ_WINDOW:
+ readonly = true;
+ /* Fall through */
+
+ case HIOMAP_C_CREATE_WRITE_WINDOW:
+ memory_region_set_readonly(&pnor->mmio, readonly);
+ memory_region_set_enabled(&pnor->mmio, true);
+
+ rsp_buffer_push(rsp, bytes_to_blocks(pnor_addr) & 0xFF);
+ rsp_buffer_push(rsp, bytes_to_blocks(pnor_addr) >> 8);
+ rsp_buffer_push(rsp, bytes_to_blocks(pnor_size) & 0xFF);
+ rsp_buffer_push(rsp, bytes_to_blocks(pnor_size) >> 8);
+ rsp_buffer_push(rsp, 0x00); /* offset */
+ rsp_buffer_push(rsp, 0x00); /* offset */
+ break;
+
+ case HIOMAP_C_CLOSE_WINDOW:
+ memory_region_set_enabled(&pnor->mmio, false);
+ break;
+
+ case HIOMAP_C_DEVICE_NAME:
+ case HIOMAP_C_RESET:
+ case HIOMAP_C_LOCK:
+ default:
+ qemu_log_mask(LOG_GUEST_ERROR, "HIOMAP: unknown command %02X\n", cmd[2]);
+ break;
+ }
+}
+
+#define HIOMAP 0x5a
+
+static const IPMICmdHandler hiomap_cmds[] = {
+ [HIOMAP] = { hiomap_cmd, 3 },
+};
+
+static const IPMINetfn hiomap_netfn = {
+ .cmd_nums = ARRAY_SIZE(hiomap_cmds),
+ .cmd_handlers = hiomap_cmds
+};
+
+
+void pnv_bmc_set_pnor(IPMIBmc *bmc, PnvPnor *pnor)
+{
+ if (!pnv_bmc_is_simulator(bmc)) {
+ return;
+ }
+
+ object_ref(OBJECT(pnor));
+ object_property_add_const_link(OBJECT(bmc), "pnor", OBJECT(pnor));
+
+ /* Install the HIOMAP protocol handlers to access the PNOR */
+ ipmi_sim_register_netfn(IPMI_BMC_SIMULATOR(bmc), IPMI_NETFN_OEM,
+ &hiomap_netfn);
+}
+
+/*
+ * Instantiate the machine BMC. PowerNV uses the QEMU internal
+ * simulator but it could also be external.
+ */
+IPMIBmc *pnv_bmc_create(PnvPnor *pnor)
+{
+ Object *obj;
+
+ obj = object_new(TYPE_IPMI_BMC_SIMULATOR);
+ qdev_realize(DEVICE(obj), NULL, &error_fatal);
+ pnv_bmc_set_pnor(IPMI_BMC(obj), pnor);
+
+ return IPMI_BMC(obj);
+}
+
+typedef struct ForeachArgs {
+ const char *name;
+ Object *obj;
+} ForeachArgs;
+
+static int bmc_find(Object *child, void *opaque)
+{
+ ForeachArgs *args = opaque;
+
+ if (object_dynamic_cast(child, args->name)) {
+ if (args->obj) {
+ return 1;
+ }
+ args->obj = child;
+ }
+ return 0;
+}
+
+IPMIBmc *pnv_bmc_find(Error **errp)
+{
+ ForeachArgs args = { TYPE_IPMI_BMC, NULL };
+ int ret;
+
+ ret = object_child_foreach_recursive(object_get_root(), bmc_find, &args);
+ if (ret) {
+ error_setg(errp, "machine should have only one BMC device. "
+ "Use '-nodefaults'");
+ return NULL;
+ }
+
+ return args.obj ? IPMI_BMC(args.obj) : NULL;
+}
diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
new file mode 100644
index 000000000..19e8eb885
--- /dev/null
+++ b/hw/ppc/pnv_core.c
@@ -0,0 +1,441 @@
+/*
+ * QEMU PowerPC PowerNV CPU Core model
+ *
+ * Copyright (c) 2016, IBM Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "sysemu/reset.h"
+#include "qapi/error.h"
+#include "qemu/log.h"
+#include "qemu/module.h"
+#include "target/ppc/cpu.h"
+#include "hw/ppc/ppc.h"
+#include "hw/ppc/pnv.h"
+#include "hw/ppc/pnv_core.h"
+#include "hw/ppc/pnv_xscom.h"
+#include "hw/ppc/xics.h"
+#include "hw/qdev-properties.h"
+#include "helper_regs.h"
+
+static const char *pnv_core_cpu_typename(PnvCore *pc)
+{
+ const char *core_type = object_class_get_name(object_get_class(OBJECT(pc)));
+ int len = strlen(core_type) - strlen(PNV_CORE_TYPE_SUFFIX);
+ char *s = g_strdup_printf(POWERPC_CPU_TYPE_NAME("%.*s"), len, core_type);
+ const char *cpu_type = object_class_get_name(object_class_by_name(s));
+ g_free(s);
+ return cpu_type;
+}
+
+static void pnv_core_cpu_reset(PnvCore *pc, PowerPCCPU *cpu)
+{
+ CPUState *cs = CPU(cpu);
+ CPUPPCState *env = &cpu->env;
+ PnvChipClass *pcc = PNV_CHIP_GET_CLASS(pc->chip);
+
+ cpu_reset(cs);
+
+ /*
+ * the skiboot firmware elects a primary thread to initialize the
+ * system and it can be any.
+ */
+ env->gpr[3] = PNV_FDT_ADDR;
+ env->nip = 0x10;
+ env->msr |= MSR_HVB; /* Hypervisor mode */
+ env->spr[SPR_HRMOR] = pc->hrmor;
+ hreg_compute_hflags(env);
+
+ pcc->intc_reset(pc->chip, cpu);
+}
+
+/*
+ * These values are read by the PowerNV HW monitors under Linux
+ */
+#define PNV_XSCOM_EX_DTS_RESULT0 0x50000
+#define PNV_XSCOM_EX_DTS_RESULT1 0x50001
+
+static uint64_t pnv_core_power8_xscom_read(void *opaque, hwaddr addr,
+ unsigned int width)
+{
+ uint32_t offset = addr >> 3;
+ uint64_t val = 0;
+
+ /* The result should be 38 C */
+ switch (offset) {
+ case PNV_XSCOM_EX_DTS_RESULT0:
+ val = 0x26f024f023f0000ull;
+ break;
+ case PNV_XSCOM_EX_DTS_RESULT1:
+ val = 0x24f000000000000ull;
+ break;
+ default:
+ qemu_log_mask(LOG_UNIMP, "Warning: reading reg=0x%" HWADDR_PRIx "\n",
+ addr);
+ }
+
+ return val;
+}
+
+static void pnv_core_power8_xscom_write(void *opaque, hwaddr addr, uint64_t val,
+ unsigned int width)
+{
+ qemu_log_mask(LOG_UNIMP, "Warning: writing to reg=0x%" HWADDR_PRIx "\n",
+ addr);
+}
+
+static const MemoryRegionOps pnv_core_power8_xscom_ops = {
+ .read = pnv_core_power8_xscom_read,
+ .write = pnv_core_power8_xscom_write,
+ .valid.min_access_size = 8,
+ .valid.max_access_size = 8,
+ .impl.min_access_size = 8,
+ .impl.max_access_size = 8,
+ .endianness = DEVICE_BIG_ENDIAN,
+};
+
+
+/*
+ * POWER9 core controls
+ */
+#define PNV9_XSCOM_EC_PPM_SPECIAL_WKUP_HYP 0xf010d
+#define PNV9_XSCOM_EC_PPM_SPECIAL_WKUP_OTR 0xf010a
+
+static uint64_t pnv_core_power9_xscom_read(void *opaque, hwaddr addr,
+ unsigned int width)
+{
+ uint32_t offset = addr >> 3;
+ uint64_t val = 0;
+
+ /* The result should be 38 C */
+ switch (offset) {
+ case PNV_XSCOM_EX_DTS_RESULT0:
+ val = 0x26f024f023f0000ull;
+ break;
+ case PNV_XSCOM_EX_DTS_RESULT1:
+ val = 0x24f000000000000ull;
+ break;
+ case PNV9_XSCOM_EC_PPM_SPECIAL_WKUP_HYP:
+ case PNV9_XSCOM_EC_PPM_SPECIAL_WKUP_OTR:
+ val = 0x0;
+ break;
+ default:
+ qemu_log_mask(LOG_UNIMP, "Warning: reading reg=0x%" HWADDR_PRIx "\n",
+ addr);
+ }
+
+ return val;
+}
+
+static void pnv_core_power9_xscom_write(void *opaque, hwaddr addr, uint64_t val,
+ unsigned int width)
+{
+ uint32_t offset = addr >> 3;
+
+ switch (offset) {
+ case PNV9_XSCOM_EC_PPM_SPECIAL_WKUP_HYP:
+ case PNV9_XSCOM_EC_PPM_SPECIAL_WKUP_OTR:
+ break;
+ default:
+ qemu_log_mask(LOG_UNIMP, "Warning: writing to reg=0x%" HWADDR_PRIx "\n",
+ addr);
+ }
+}
+
+static const MemoryRegionOps pnv_core_power9_xscom_ops = {
+ .read = pnv_core_power9_xscom_read,
+ .write = pnv_core_power9_xscom_write,
+ .valid.min_access_size = 8,
+ .valid.max_access_size = 8,
+ .impl.min_access_size = 8,
+ .impl.max_access_size = 8,
+ .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static void pnv_core_cpu_realize(PnvCore *pc, PowerPCCPU *cpu, Error **errp)
+{
+ CPUPPCState *env = &cpu->env;
+ int core_pir;
+ int thread_index = 0; /* TODO: TCG supports only one thread */
+ ppc_spr_t *pir = &env->spr_cb[SPR_PIR];
+ Error *local_err = NULL;
+ PnvChipClass *pcc = PNV_CHIP_GET_CLASS(pc->chip);
+
+ if (!qdev_realize(DEVICE(cpu), NULL, errp)) {
+ return;
+ }
+
+ pcc->intc_create(pc->chip, cpu, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ core_pir = object_property_get_uint(OBJECT(pc), "pir", &error_abort);
+
+ /*
+ * The PIR of a thread is the core PIR + the thread index. We will
+ * need to find a way to get the thread index when TCG supports
+ * more than 1. We could use the object name ?
+ */
+ pir->default_value = core_pir + thread_index;
+
+ /* Set time-base frequency to 512 MHz */
+ cpu_ppc_tb_init(env, PNV_TIMEBASE_FREQ);
+}
+
+static void pnv_core_reset(void *dev)
+{
+ CPUCore *cc = CPU_CORE(dev);
+ PnvCore *pc = PNV_CORE(dev);
+ int i;
+
+ for (i = 0; i < cc->nr_threads; i++) {
+ pnv_core_cpu_reset(pc, pc->threads[i]);
+ }
+}
+
+static void pnv_core_realize(DeviceState *dev, Error **errp)
+{
+ PnvCore *pc = PNV_CORE(OBJECT(dev));
+ PnvCoreClass *pcc = PNV_CORE_GET_CLASS(pc);
+ CPUCore *cc = CPU_CORE(OBJECT(dev));
+ const char *typename = pnv_core_cpu_typename(pc);
+ Error *local_err = NULL;
+ void *obj;
+ int i, j;
+ char name[32];
+
+ assert(pc->chip);
+
+ pc->threads = g_new(PowerPCCPU *, cc->nr_threads);
+ for (i = 0; i < cc->nr_threads; i++) {
+ PowerPCCPU *cpu;
+
+ obj = object_new(typename);
+ cpu = POWERPC_CPU(obj);
+
+ pc->threads[i] = POWERPC_CPU(obj);
+
+ snprintf(name, sizeof(name), "thread[%d]", i);
+ object_property_add_child(OBJECT(pc), name, obj);
+
+ cpu->machine_data = g_new0(PnvCPUState, 1);
+
+ object_unref(obj);
+ }
+
+ for (j = 0; j < cc->nr_threads; j++) {
+ pnv_core_cpu_realize(pc, pc->threads[j], &local_err);
+ if (local_err) {
+ goto err;
+ }
+ }
+
+ snprintf(name, sizeof(name), "xscom-core.%d", cc->core_id);
+ /* TODO: check PNV_XSCOM_EX_SIZE for p10 */
+ pnv_xscom_region_init(&pc->xscom_regs, OBJECT(dev), pcc->xscom_ops,
+ pc, name, PNV_XSCOM_EX_SIZE);
+
+ qemu_register_reset(pnv_core_reset, pc);
+ return;
+
+err:
+ while (--i >= 0) {
+ obj = OBJECT(pc->threads[i]);
+ object_unparent(obj);
+ }
+ g_free(pc->threads);
+ error_propagate(errp, local_err);
+}
+
+static void pnv_core_cpu_unrealize(PnvCore *pc, PowerPCCPU *cpu)
+{
+ PnvCPUState *pnv_cpu = pnv_cpu_state(cpu);
+ PnvChipClass *pcc = PNV_CHIP_GET_CLASS(pc->chip);
+
+ pcc->intc_destroy(pc->chip, cpu);
+ cpu_remove_sync(CPU(cpu));
+ cpu->machine_data = NULL;
+ g_free(pnv_cpu);
+ object_unparent(OBJECT(cpu));
+}
+
+static void pnv_core_unrealize(DeviceState *dev)
+{
+ PnvCore *pc = PNV_CORE(dev);
+ CPUCore *cc = CPU_CORE(dev);
+ int i;
+
+ qemu_unregister_reset(pnv_core_reset, pc);
+
+ for (i = 0; i < cc->nr_threads; i++) {
+ pnv_core_cpu_unrealize(pc, pc->threads[i]);
+ }
+ g_free(pc->threads);
+}
+
+static Property pnv_core_properties[] = {
+ DEFINE_PROP_UINT32("pir", PnvCore, pir, 0),
+ DEFINE_PROP_UINT64("hrmor", PnvCore, hrmor, 0),
+ DEFINE_PROP_LINK("chip", PnvCore, chip, TYPE_PNV_CHIP, PnvChip *),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void pnv_core_power8_class_init(ObjectClass *oc, void *data)
+{
+ PnvCoreClass *pcc = PNV_CORE_CLASS(oc);
+
+ pcc->xscom_ops = &pnv_core_power8_xscom_ops;
+}
+
+static void pnv_core_power9_class_init(ObjectClass *oc, void *data)
+{
+ PnvCoreClass *pcc = PNV_CORE_CLASS(oc);
+
+ pcc->xscom_ops = &pnv_core_power9_xscom_ops;
+}
+
+static void pnv_core_power10_class_init(ObjectClass *oc, void *data)
+{
+ PnvCoreClass *pcc = PNV_CORE_CLASS(oc);
+
+ /* TODO: Use the P9 XSCOMs for now on P10 */
+ pcc->xscom_ops = &pnv_core_power9_xscom_ops;
+}
+
+static void pnv_core_class_init(ObjectClass *oc, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(oc);
+
+ dc->realize = pnv_core_realize;
+ dc->unrealize = pnv_core_unrealize;
+ device_class_set_props(dc, pnv_core_properties);
+ dc->user_creatable = false;
+}
+
+#define DEFINE_PNV_CORE_TYPE(family, cpu_model) \
+ { \
+ .parent = TYPE_PNV_CORE, \
+ .name = PNV_CORE_TYPE_NAME(cpu_model), \
+ .class_init = pnv_core_##family##_class_init, \
+ }
+
+static const TypeInfo pnv_core_infos[] = {
+ {
+ .name = TYPE_PNV_CORE,
+ .parent = TYPE_CPU_CORE,
+ .instance_size = sizeof(PnvCore),
+ .class_size = sizeof(PnvCoreClass),
+ .class_init = pnv_core_class_init,
+ .abstract = true,
+ },
+ DEFINE_PNV_CORE_TYPE(power8, "power8e_v2.1"),
+ DEFINE_PNV_CORE_TYPE(power8, "power8_v2.0"),
+ DEFINE_PNV_CORE_TYPE(power8, "power8nvl_v1.0"),
+ DEFINE_PNV_CORE_TYPE(power9, "power9_v2.0"),
+ DEFINE_PNV_CORE_TYPE(power10, "power10_v2.0"),
+};
+
+DEFINE_TYPES(pnv_core_infos)
+
+/*
+ * POWER9 Quads
+ */
+
+#define P9X_EX_NCU_SPEC_BAR 0x11010
+
+static uint64_t pnv_quad_xscom_read(void *opaque, hwaddr addr,
+ unsigned int width)
+{
+ uint32_t offset = addr >> 3;
+ uint64_t val = -1;
+
+ switch (offset) {
+ case P9X_EX_NCU_SPEC_BAR:
+ case P9X_EX_NCU_SPEC_BAR + 0x400: /* Second EX */
+ val = 0;
+ break;
+ default:
+ qemu_log_mask(LOG_UNIMP, "%s: writing @0x%08x\n", __func__,
+ offset);
+ }
+
+ return val;
+}
+
+static void pnv_quad_xscom_write(void *opaque, hwaddr addr, uint64_t val,
+ unsigned int width)
+{
+ uint32_t offset = addr >> 3;
+
+ switch (offset) {
+ case P9X_EX_NCU_SPEC_BAR:
+ case P9X_EX_NCU_SPEC_BAR + 0x400: /* Second EX */
+ break;
+ default:
+ qemu_log_mask(LOG_UNIMP, "%s: writing @0x%08x\n", __func__,
+ offset);
+ }
+}
+
+static const MemoryRegionOps pnv_quad_xscom_ops = {
+ .read = pnv_quad_xscom_read,
+ .write = pnv_quad_xscom_write,
+ .valid.min_access_size = 8,
+ .valid.max_access_size = 8,
+ .impl.min_access_size = 8,
+ .impl.max_access_size = 8,
+ .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static void pnv_quad_realize(DeviceState *dev, Error **errp)
+{
+ PnvQuad *eq = PNV_QUAD(dev);
+ char name[32];
+
+ snprintf(name, sizeof(name), "xscom-quad.%d", eq->quad_id);
+ pnv_xscom_region_init(&eq->xscom_regs, OBJECT(dev), &pnv_quad_xscom_ops,
+ eq, name, PNV9_XSCOM_EQ_SIZE);
+}
+
+static Property pnv_quad_properties[] = {
+ DEFINE_PROP_UINT32("quad-id", PnvQuad, quad_id, 0),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void pnv_quad_class_init(ObjectClass *oc, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(oc);
+
+ dc->realize = pnv_quad_realize;
+ device_class_set_props(dc, pnv_quad_properties);
+ dc->user_creatable = false;
+}
+
+static const TypeInfo pnv_quad_info = {
+ .name = TYPE_PNV_QUAD,
+ .parent = TYPE_DEVICE,
+ .instance_size = sizeof(PnvQuad),
+ .class_init = pnv_quad_class_init,
+};
+
+static void pnv_core_register_types(void)
+{
+ type_register_static(&pnv_quad_info);
+}
+
+type_init(pnv_core_register_types)
diff --git a/hw/ppc/pnv_homer.c b/hw/ppc/pnv_homer.c
new file mode 100644
index 000000000..9a262629b
--- /dev/null
+++ b/hw/ppc/pnv_homer.c
@@ -0,0 +1,382 @@
+/*
+ * QEMU PowerPC PowerNV Emulation of a few HOMER related registers
+ *
+ * Copyright (c) 2019, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qapi/error.h"
+#include "exec/hwaddr.h"
+#include "exec/memory.h"
+#include "sysemu/cpus.h"
+#include "hw/qdev-core.h"
+#include "hw/qdev-properties.h"
+#include "hw/ppc/pnv.h"
+#include "hw/ppc/pnv_homer.h"
+#include "hw/ppc/pnv_xscom.h"
+
+
+static bool core_max_array(PnvHomer *homer, hwaddr addr)
+{
+ int i;
+ PnvHomerClass *hmrc = PNV_HOMER_GET_CLASS(homer);
+
+ for (i = 0; i <= homer->chip->nr_cores; i++) {
+ if (addr == (hmrc->core_max_base + i)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+/* P8 Pstate table */
+
+#define PNV8_OCC_PSTATE_VERSION 0x1f8001
+#define PNV8_OCC_PSTATE_MIN 0x1f8003
+#define PNV8_OCC_PSTATE_VALID 0x1f8000
+#define PNV8_OCC_PSTATE_THROTTLE 0x1f8002
+#define PNV8_OCC_PSTATE_NOM 0x1f8004
+#define PNV8_OCC_PSTATE_TURBO 0x1f8005
+#define PNV8_OCC_PSTATE_ULTRA_TURBO 0x1f8006
+#define PNV8_OCC_PSTATE_DATA 0x1f8008
+#define PNV8_OCC_PSTATE_ID_ZERO 0x1f8010
+#define PNV8_OCC_PSTATE_ID_ONE 0x1f8018
+#define PNV8_OCC_PSTATE_ID_TWO 0x1f8020
+#define PNV8_OCC_VDD_VOLTAGE_IDENTIFIER 0x1f8012
+#define PNV8_OCC_VCS_VOLTAGE_IDENTIFIER 0x1f8013
+#define PNV8_OCC_PSTATE_ZERO_FREQUENCY 0x1f8014
+#define PNV8_OCC_PSTATE_ONE_FREQUENCY 0x1f801c
+#define PNV8_OCC_PSTATE_TWO_FREQUENCY 0x1f8024
+#define PNV8_CORE_MAX_BASE 0x1f8810
+
+
+static uint64_t pnv_power8_homer_read(void *opaque, hwaddr addr,
+ unsigned size)
+{
+ PnvHomer *homer = PNV_HOMER(opaque);
+
+ switch (addr) {
+ case PNV8_OCC_PSTATE_VERSION:
+ case PNV8_OCC_PSTATE_MIN:
+ case PNV8_OCC_PSTATE_ID_ZERO:
+ return 0;
+ case PNV8_OCC_PSTATE_VALID:
+ case PNV8_OCC_PSTATE_THROTTLE:
+ case PNV8_OCC_PSTATE_NOM:
+ case PNV8_OCC_PSTATE_TURBO:
+ case PNV8_OCC_PSTATE_ID_ONE:
+ case PNV8_OCC_VDD_VOLTAGE_IDENTIFIER:
+ case PNV8_OCC_VCS_VOLTAGE_IDENTIFIER:
+ return 1;
+ case PNV8_OCC_PSTATE_ULTRA_TURBO:
+ case PNV8_OCC_PSTATE_ID_TWO:
+ return 2;
+ case PNV8_OCC_PSTATE_DATA:
+ return 0x1000000000000000;
+ /* P8 frequency for 0, 1, and 2 pstates */
+ case PNV8_OCC_PSTATE_ZERO_FREQUENCY:
+ case PNV8_OCC_PSTATE_ONE_FREQUENCY:
+ case PNV8_OCC_PSTATE_TWO_FREQUENCY:
+ return 3000;
+ }
+ /* pstate table core max array */
+ if (core_max_array(homer, addr)) {
+ return 1;
+ }
+ return 0;
+}
+
+static void pnv_power8_homer_write(void *opaque, hwaddr addr,
+ uint64_t val, unsigned size)
+{
+ /* callback function defined to homer write */
+ return;
+}
+
+static const MemoryRegionOps pnv_power8_homer_ops = {
+ .read = pnv_power8_homer_read,
+ .write = pnv_power8_homer_write,
+ .valid.min_access_size = 1,
+ .valid.max_access_size = 8,
+ .impl.min_access_size = 1,
+ .impl.max_access_size = 8,
+ .endianness = DEVICE_BIG_ENDIAN,
+};
+
+/* P8 PBA BARs */
+#define PBA_BAR0 0x00
+#define PBA_BAR1 0x01
+#define PBA_BAR2 0x02
+#define PBA_BAR3 0x03
+#define PBA_BARMASK0 0x04
+#define PBA_BARMASK1 0x05
+#define PBA_BARMASK2 0x06
+#define PBA_BARMASK3 0x07
+
+static uint64_t pnv_homer_power8_pba_read(void *opaque, hwaddr addr,
+ unsigned size)
+{
+ PnvHomer *homer = PNV_HOMER(opaque);
+ PnvChip *chip = homer->chip;
+ uint32_t reg = addr >> 3;
+ uint64_t val = 0;
+
+ switch (reg) {
+ case PBA_BAR0:
+ val = PNV_HOMER_BASE(chip);
+ break;
+ case PBA_BARMASK0: /* P8 homer region mask */
+ val = (PNV_HOMER_SIZE - 1) & 0x300000;
+ break;
+ case PBA_BAR3: /* P8 occ common area */
+ val = PNV_OCC_COMMON_AREA_BASE;
+ break;
+ case PBA_BARMASK3: /* P8 occ common area mask */
+ val = (PNV_OCC_COMMON_AREA_SIZE - 1) & 0x700000;
+ break;
+ default:
+ qemu_log_mask(LOG_UNIMP, "PBA: read to unimplemented register: Ox%"
+ HWADDR_PRIx "\n", addr >> 3);
+ }
+ return val;
+}
+
+static void pnv_homer_power8_pba_write(void *opaque, hwaddr addr,
+ uint64_t val, unsigned size)
+{
+ qemu_log_mask(LOG_UNIMP, "PBA: write to unimplemented register: Ox%"
+ HWADDR_PRIx "\n", addr >> 3);
+}
+
+static const MemoryRegionOps pnv_homer_power8_pba_ops = {
+ .read = pnv_homer_power8_pba_read,
+ .write = pnv_homer_power8_pba_write,
+ .valid.min_access_size = 8,
+ .valid.max_access_size = 8,
+ .impl.min_access_size = 8,
+ .impl.max_access_size = 8,
+ .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static void pnv_homer_power8_class_init(ObjectClass *klass, void *data)
+{
+ PnvHomerClass *homer = PNV_HOMER_CLASS(klass);
+
+ homer->pba_size = PNV_XSCOM_PBA_SIZE;
+ homer->pba_ops = &pnv_homer_power8_pba_ops;
+ homer->homer_size = PNV_HOMER_SIZE;
+ homer->homer_ops = &pnv_power8_homer_ops;
+ homer->core_max_base = PNV8_CORE_MAX_BASE;
+}
+
+static const TypeInfo pnv_homer_power8_type_info = {
+ .name = TYPE_PNV8_HOMER,
+ .parent = TYPE_PNV_HOMER,
+ .instance_size = sizeof(PnvHomer),
+ .class_init = pnv_homer_power8_class_init,
+};
+
+/* P9 Pstate table */
+
+#define PNV9_OCC_PSTATE_ID_ZERO 0xe2018
+#define PNV9_OCC_PSTATE_ID_ONE 0xe2020
+#define PNV9_OCC_PSTATE_ID_TWO 0xe2028
+#define PNV9_OCC_PSTATE_DATA 0xe2000
+#define PNV9_OCC_PSTATE_DATA_AREA 0xe2008
+#define PNV9_OCC_PSTATE_MIN 0xe2003
+#define PNV9_OCC_PSTATE_NOM 0xe2004
+#define PNV9_OCC_PSTATE_TURBO 0xe2005
+#define PNV9_OCC_PSTATE_ULTRA_TURBO 0xe2818
+#define PNV9_OCC_MAX_PSTATE_ULTRA_TURBO 0xe2006
+#define PNV9_OCC_PSTATE_MAJOR_VERSION 0xe2001
+#define PNV9_OCC_OPAL_RUNTIME_DATA 0xe2b85
+#define PNV9_CHIP_HOMER_IMAGE_POINTER 0x200008
+#define PNV9_CHIP_HOMER_BASE 0x0
+#define PNV9_OCC_PSTATE_ZERO_FREQUENCY 0xe201c
+#define PNV9_OCC_PSTATE_ONE_FREQUENCY 0xe2024
+#define PNV9_OCC_PSTATE_TWO_FREQUENCY 0xe202c
+#define PNV9_OCC_ROLE_MASTER_OR_SLAVE 0xe2002
+#define PNV9_CORE_MAX_BASE 0xe2819
+
+
+static uint64_t pnv_power9_homer_read(void *opaque, hwaddr addr,
+ unsigned size)
+{
+ PnvHomer *homer = PNV_HOMER(opaque);
+
+ switch (addr) {
+ case PNV9_OCC_MAX_PSTATE_ULTRA_TURBO:
+ case PNV9_OCC_PSTATE_ID_ZERO:
+ return 0;
+ case PNV9_OCC_PSTATE_DATA:
+ case PNV9_OCC_ROLE_MASTER_OR_SLAVE:
+ case PNV9_OCC_PSTATE_NOM:
+ case PNV9_OCC_PSTATE_TURBO:
+ case PNV9_OCC_PSTATE_ID_ONE:
+ case PNV9_OCC_PSTATE_ULTRA_TURBO:
+ case PNV9_OCC_OPAL_RUNTIME_DATA:
+ return 1;
+ case PNV9_OCC_PSTATE_MIN:
+ case PNV9_OCC_PSTATE_ID_TWO:
+ return 2;
+
+ /* 3000 khz frequency for 0, 1, and 2 pstates */
+ case PNV9_OCC_PSTATE_ZERO_FREQUENCY:
+ case PNV9_OCC_PSTATE_ONE_FREQUENCY:
+ case PNV9_OCC_PSTATE_TWO_FREQUENCY:
+ return 3000;
+ case PNV9_OCC_PSTATE_MAJOR_VERSION:
+ return 0x90;
+ case PNV9_CHIP_HOMER_BASE:
+ case PNV9_OCC_PSTATE_DATA_AREA:
+ case PNV9_CHIP_HOMER_IMAGE_POINTER:
+ return 0x1000000000000000;
+ }
+ /* pstate table core max array */
+ if (core_max_array(homer, addr)) {
+ return 1;
+ }
+ return 0;
+}
+
+static void pnv_power9_homer_write(void *opaque, hwaddr addr,
+ uint64_t val, unsigned size)
+{
+ /* callback function defined to homer write */
+ return;
+}
+
+static const MemoryRegionOps pnv_power9_homer_ops = {
+ .read = pnv_power9_homer_read,
+ .write = pnv_power9_homer_write,
+ .valid.min_access_size = 1,
+ .valid.max_access_size = 8,
+ .impl.min_access_size = 1,
+ .impl.max_access_size = 8,
+ .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static uint64_t pnv_homer_power9_pba_read(void *opaque, hwaddr addr,
+ unsigned size)
+{
+ PnvHomer *homer = PNV_HOMER(opaque);
+ PnvChip *chip = homer->chip;
+ uint32_t reg = addr >> 3;
+ uint64_t val = 0;
+
+ switch (reg) {
+ case PBA_BAR0:
+ val = PNV9_HOMER_BASE(chip);
+ break;
+ case PBA_BARMASK0: /* P9 homer region mask */
+ val = (PNV9_HOMER_SIZE - 1) & 0x300000;
+ break;
+ case PBA_BAR2: /* P9 occ common area */
+ val = PNV9_OCC_COMMON_AREA_BASE;
+ break;
+ case PBA_BARMASK2: /* P9 occ common area size */
+ val = (PNV9_OCC_COMMON_AREA_SIZE - 1) & 0x700000;
+ break;
+ default:
+ qemu_log_mask(LOG_UNIMP, "PBA: read to unimplemented register: Ox%"
+ HWADDR_PRIx "\n", addr >> 3);
+ }
+ return val;
+}
+
+static void pnv_homer_power9_pba_write(void *opaque, hwaddr addr,
+ uint64_t val, unsigned size)
+{
+ qemu_log_mask(LOG_UNIMP, "PBA: write to unimplemented register: Ox%"
+ HWADDR_PRIx "\n", addr >> 3);
+}
+
+static const MemoryRegionOps pnv_homer_power9_pba_ops = {
+ .read = pnv_homer_power9_pba_read,
+ .write = pnv_homer_power9_pba_write,
+ .valid.min_access_size = 8,
+ .valid.max_access_size = 8,
+ .impl.min_access_size = 8,
+ .impl.max_access_size = 8,
+ .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static void pnv_homer_power9_class_init(ObjectClass *klass, void *data)
+{
+ PnvHomerClass *homer = PNV_HOMER_CLASS(klass);
+
+ homer->pba_size = PNV9_XSCOM_PBA_SIZE;
+ homer->pba_ops = &pnv_homer_power9_pba_ops;
+ homer->homer_size = PNV9_HOMER_SIZE;
+ homer->homer_ops = &pnv_power9_homer_ops;
+ homer->core_max_base = PNV9_CORE_MAX_BASE;
+}
+
+static const TypeInfo pnv_homer_power9_type_info = {
+ .name = TYPE_PNV9_HOMER,
+ .parent = TYPE_PNV_HOMER,
+ .instance_size = sizeof(PnvHomer),
+ .class_init = pnv_homer_power9_class_init,
+};
+
+static void pnv_homer_realize(DeviceState *dev, Error **errp)
+{
+ PnvHomer *homer = PNV_HOMER(dev);
+ PnvHomerClass *hmrc = PNV_HOMER_GET_CLASS(homer);
+
+ assert(homer->chip);
+
+ pnv_xscom_region_init(&homer->pba_regs, OBJECT(dev), hmrc->pba_ops,
+ homer, "xscom-pba", hmrc->pba_size);
+
+ /* homer region */
+ memory_region_init_io(&homer->regs, OBJECT(dev),
+ hmrc->homer_ops, homer, "homer-main-memory",
+ hmrc->homer_size);
+}
+
+static Property pnv_homer_properties[] = {
+ DEFINE_PROP_LINK("chip", PnvHomer, chip, TYPE_PNV_CHIP, PnvChip *),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void pnv_homer_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ dc->realize = pnv_homer_realize;
+ dc->desc = "PowerNV HOMER Memory";
+ device_class_set_props(dc, pnv_homer_properties);
+ dc->user_creatable = false;
+}
+
+static const TypeInfo pnv_homer_type_info = {
+ .name = TYPE_PNV_HOMER,
+ .parent = TYPE_DEVICE,
+ .instance_size = sizeof(PnvHomer),
+ .class_init = pnv_homer_class_init,
+ .class_size = sizeof(PnvHomerClass),
+ .abstract = true,
+};
+
+static void pnv_homer_register_types(void)
+{
+ type_register_static(&pnv_homer_type_info);
+ type_register_static(&pnv_homer_power8_type_info);
+ type_register_static(&pnv_homer_power9_type_info);
+}
+
+type_init(pnv_homer_register_types);
diff --git a/hw/ppc/pnv_lpc.c b/hw/ppc/pnv_lpc.c
new file mode 100644
index 000000000..bcbca3db9
--- /dev/null
+++ b/hw/ppc/pnv_lpc.c
@@ -0,0 +1,853 @@
+/*
+ * QEMU PowerPC PowerNV LPC controller
+ *
+ * Copyright (c) 2016, IBM Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "target/ppc/cpu.h"
+#include "qapi/error.h"
+#include "qemu/log.h"
+#include "qemu/module.h"
+#include "hw/irq.h"
+#include "hw/isa/isa.h"
+#include "hw/qdev-properties.h"
+#include "hw/ppc/pnv.h"
+#include "hw/ppc/pnv_lpc.h"
+#include "hw/ppc/pnv_xscom.h"
+#include "hw/ppc/fdt.h"
+
+#include <libfdt.h>
+
+enum {
+ ECCB_CTL = 0,
+ ECCB_RESET = 1,
+ ECCB_STAT = 2,
+ ECCB_DATA = 3,
+};
+
+/* OPB Master LS registers */
+#define OPB_MASTER_LS_ROUTE0 0x8
+#define OPB_MASTER_LS_ROUTE1 0xC
+#define OPB_MASTER_LS_IRQ_STAT 0x50
+#define OPB_MASTER_IRQ_LPC 0x00000800
+#define OPB_MASTER_LS_IRQ_MASK 0x54
+#define OPB_MASTER_LS_IRQ_POL 0x58
+#define OPB_MASTER_LS_IRQ_INPUT 0x5c
+
+/* LPC HC registers */
+#define LPC_HC_FW_SEG_IDSEL 0x24
+#define LPC_HC_FW_RD_ACC_SIZE 0x28
+#define LPC_HC_FW_RD_1B 0x00000000
+#define LPC_HC_FW_RD_2B 0x01000000
+#define LPC_HC_FW_RD_4B 0x02000000
+#define LPC_HC_FW_RD_16B 0x04000000
+#define LPC_HC_FW_RD_128B 0x07000000
+#define LPC_HC_IRQSER_CTRL 0x30
+#define LPC_HC_IRQSER_EN 0x80000000
+#define LPC_HC_IRQSER_QMODE 0x40000000
+#define LPC_HC_IRQSER_START_MASK 0x03000000
+#define LPC_HC_IRQSER_START_4CLK 0x00000000
+#define LPC_HC_IRQSER_START_6CLK 0x01000000
+#define LPC_HC_IRQSER_START_8CLK 0x02000000
+#define LPC_HC_IRQMASK 0x34 /* same bit defs as LPC_HC_IRQSTAT */
+#define LPC_HC_IRQSTAT 0x38
+#define LPC_HC_IRQ_SERIRQ0 0x80000000 /* all bits down to ... */
+#define LPC_HC_IRQ_SERIRQ16 0x00008000 /* IRQ16=IOCHK#, IRQ2=SMI# */
+#define LPC_HC_IRQ_SERIRQ_ALL 0xffff8000
+#define LPC_HC_IRQ_LRESET 0x00000400
+#define LPC_HC_IRQ_SYNC_ABNORM_ERR 0x00000080
+#define LPC_HC_IRQ_SYNC_NORESP_ERR 0x00000040
+#define LPC_HC_IRQ_SYNC_NORM_ERR 0x00000020
+#define LPC_HC_IRQ_SYNC_TIMEOUT_ERR 0x00000010
+#define LPC_HC_IRQ_SYNC_TARG_TAR_ERR 0x00000008
+#define LPC_HC_IRQ_SYNC_BM_TAR_ERR 0x00000004
+#define LPC_HC_IRQ_SYNC_BM0_REQ 0x00000002
+#define LPC_HC_IRQ_SYNC_BM1_REQ 0x00000001
+#define LPC_HC_ERROR_ADDRESS 0x40
+
+#define LPC_OPB_SIZE 0x100000000ull
+
+#define ISA_IO_SIZE 0x00010000
+#define ISA_MEM_SIZE 0x10000000
+#define ISA_FW_SIZE 0x10000000
+#define LPC_IO_OPB_ADDR 0xd0010000
+#define LPC_IO_OPB_SIZE 0x00010000
+#define LPC_MEM_OPB_ADDR 0xe0000000
+#define LPC_MEM_OPB_SIZE 0x10000000
+#define LPC_FW_OPB_ADDR 0xf0000000
+#define LPC_FW_OPB_SIZE 0x10000000
+
+#define LPC_OPB_REGS_OPB_ADDR 0xc0010000
+#define LPC_OPB_REGS_OPB_SIZE 0x00000060
+#define LPC_OPB_REGS_OPBA_ADDR 0xc0011000
+#define LPC_OPB_REGS_OPBA_SIZE 0x00000008
+#define LPC_HC_REGS_OPB_ADDR 0xc0012000
+#define LPC_HC_REGS_OPB_SIZE 0x00000100
+
+static int pnv_lpc_dt_xscom(PnvXScomInterface *dev, void *fdt, int xscom_offset)
+{
+ const char compat[] = "ibm,power8-lpc\0ibm,lpc";
+ char *name;
+ int offset;
+ uint32_t lpc_pcba = PNV_XSCOM_LPC_BASE;
+ uint32_t reg[] = {
+ cpu_to_be32(lpc_pcba),
+ cpu_to_be32(PNV_XSCOM_LPC_SIZE)
+ };
+
+ name = g_strdup_printf("isa@%x", lpc_pcba);
+ offset = fdt_add_subnode(fdt, xscom_offset, name);
+ _FDT(offset);
+ g_free(name);
+
+ _FDT((fdt_setprop(fdt, offset, "reg", reg, sizeof(reg))));
+ _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 2)));
+ _FDT((fdt_setprop_cell(fdt, offset, "#size-cells", 1)));
+ _FDT((fdt_setprop(fdt, offset, "compatible", compat, sizeof(compat))));
+ return 0;
+}
+
+/* POWER9 only */
+int pnv_dt_lpc(PnvChip *chip, void *fdt, int root_offset, uint64_t lpcm_addr,
+ uint64_t lpcm_size)
+{
+ const char compat[] = "ibm,power9-lpcm-opb\0simple-bus";
+ const char lpc_compat[] = "ibm,power9-lpc\0ibm,lpc";
+ char *name;
+ int offset, lpcm_offset;
+ uint32_t opb_ranges[8] = { 0,
+ cpu_to_be32(lpcm_addr >> 32),
+ cpu_to_be32((uint32_t)lpcm_addr),
+ cpu_to_be32(lpcm_size / 2),
+ cpu_to_be32(lpcm_size / 2),
+ cpu_to_be32(lpcm_addr >> 32),
+ cpu_to_be32(lpcm_size / 2),
+ cpu_to_be32(lpcm_size / 2),
+ };
+ uint32_t opb_reg[4] = { cpu_to_be32(lpcm_addr >> 32),
+ cpu_to_be32((uint32_t)lpcm_addr),
+ cpu_to_be32(lpcm_size >> 32),
+ cpu_to_be32((uint32_t)lpcm_size),
+ };
+ uint32_t lpc_ranges[12] = { 0, 0,
+ cpu_to_be32(LPC_MEM_OPB_ADDR),
+ cpu_to_be32(LPC_MEM_OPB_SIZE),
+ cpu_to_be32(1), 0,
+ cpu_to_be32(LPC_IO_OPB_ADDR),
+ cpu_to_be32(LPC_IO_OPB_SIZE),
+ cpu_to_be32(3), 0,
+ cpu_to_be32(LPC_FW_OPB_ADDR),
+ cpu_to_be32(LPC_FW_OPB_SIZE),
+ };
+ uint32_t reg[2];
+
+ /*
+ * OPB bus
+ */
+ name = g_strdup_printf("lpcm-opb@%"PRIx64, lpcm_addr);
+ lpcm_offset = fdt_add_subnode(fdt, root_offset, name);
+ _FDT(lpcm_offset);
+ g_free(name);
+
+ _FDT((fdt_setprop(fdt, lpcm_offset, "reg", opb_reg, sizeof(opb_reg))));
+ _FDT((fdt_setprop_cell(fdt, lpcm_offset, "#address-cells", 1)));
+ _FDT((fdt_setprop_cell(fdt, lpcm_offset, "#size-cells", 1)));
+ _FDT((fdt_setprop(fdt, lpcm_offset, "compatible", compat, sizeof(compat))));
+ _FDT((fdt_setprop_cell(fdt, lpcm_offset, "ibm,chip-id", chip->chip_id)));
+ _FDT((fdt_setprop(fdt, lpcm_offset, "ranges", opb_ranges,
+ sizeof(opb_ranges))));
+
+ /*
+ * OPB Master registers
+ */
+ name = g_strdup_printf("opb-master@%x", LPC_OPB_REGS_OPB_ADDR);
+ offset = fdt_add_subnode(fdt, lpcm_offset, name);
+ _FDT(offset);
+ g_free(name);
+
+ reg[0] = cpu_to_be32(LPC_OPB_REGS_OPB_ADDR);
+ reg[1] = cpu_to_be32(LPC_OPB_REGS_OPB_SIZE);
+ _FDT((fdt_setprop(fdt, offset, "reg", reg, sizeof(reg))));
+ _FDT((fdt_setprop_string(fdt, offset, "compatible",
+ "ibm,power9-lpcm-opb-master")));
+
+ /*
+ * OPB arbitrer registers
+ */
+ name = g_strdup_printf("opb-arbitrer@%x", LPC_OPB_REGS_OPBA_ADDR);
+ offset = fdt_add_subnode(fdt, lpcm_offset, name);
+ _FDT(offset);
+ g_free(name);
+
+ reg[0] = cpu_to_be32(LPC_OPB_REGS_OPBA_ADDR);
+ reg[1] = cpu_to_be32(LPC_OPB_REGS_OPBA_SIZE);
+ _FDT((fdt_setprop(fdt, offset, "reg", reg, sizeof(reg))));
+ _FDT((fdt_setprop_string(fdt, offset, "compatible",
+ "ibm,power9-lpcm-opb-arbiter")));
+
+ /*
+ * LPC Host Controller registers
+ */
+ name = g_strdup_printf("lpc-controller@%x", LPC_HC_REGS_OPB_ADDR);
+ offset = fdt_add_subnode(fdt, lpcm_offset, name);
+ _FDT(offset);
+ g_free(name);
+
+ reg[0] = cpu_to_be32(LPC_HC_REGS_OPB_ADDR);
+ reg[1] = cpu_to_be32(LPC_HC_REGS_OPB_SIZE);
+ _FDT((fdt_setprop(fdt, offset, "reg", reg, sizeof(reg))));
+ _FDT((fdt_setprop_string(fdt, offset, "compatible",
+ "ibm,power9-lpc-controller")));
+
+ name = g_strdup_printf("lpc@0");
+ offset = fdt_add_subnode(fdt, lpcm_offset, name);
+ _FDT(offset);
+ g_free(name);
+ _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 2)));
+ _FDT((fdt_setprop_cell(fdt, offset, "#size-cells", 1)));
+ _FDT((fdt_setprop(fdt, offset, "compatible", lpc_compat,
+ sizeof(lpc_compat))));
+ _FDT((fdt_setprop(fdt, offset, "ranges", lpc_ranges,
+ sizeof(lpc_ranges))));
+
+ return 0;
+}
+
+/*
+ * These read/write handlers of the OPB address space should be common
+ * with the P9 LPC Controller which uses direct MMIOs.
+ *
+ * TODO: rework to use address_space_stq() and address_space_ldq()
+ * instead.
+ */
+static bool opb_read(PnvLpcController *lpc, uint32_t addr, uint8_t *data,
+ int sz)
+{
+ /* XXX Handle access size limits and FW read caching here */
+ return !address_space_read(&lpc->opb_as, addr, MEMTXATTRS_UNSPECIFIED,
+ data, sz);
+}
+
+static bool opb_write(PnvLpcController *lpc, uint32_t addr, uint8_t *data,
+ int sz)
+{
+ /* XXX Handle access size limits here */
+ return !address_space_write(&lpc->opb_as, addr, MEMTXATTRS_UNSPECIFIED,
+ data, sz);
+}
+
+#define ECCB_CTL_READ PPC_BIT(15)
+#define ECCB_CTL_SZ_LSH (63 - 7)
+#define ECCB_CTL_SZ_MASK PPC_BITMASK(4, 7)
+#define ECCB_CTL_ADDR_MASK PPC_BITMASK(32, 63)
+
+#define ECCB_STAT_OP_DONE PPC_BIT(52)
+#define ECCB_STAT_OP_ERR PPC_BIT(52)
+#define ECCB_STAT_RD_DATA_LSH (63 - 37)
+#define ECCB_STAT_RD_DATA_MASK (0xffffffff << ECCB_STAT_RD_DATA_LSH)
+
+static void pnv_lpc_do_eccb(PnvLpcController *lpc, uint64_t cmd)
+{
+ /* XXX Check for magic bits at the top, addr size etc... */
+ unsigned int sz = (cmd & ECCB_CTL_SZ_MASK) >> ECCB_CTL_SZ_LSH;
+ uint32_t opb_addr = cmd & ECCB_CTL_ADDR_MASK;
+ uint8_t data[8];
+ bool success;
+
+ if (sz > sizeof(data)) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "ECCB: invalid operation at @0x%08x size %d\n", opb_addr, sz);
+ return;
+ }
+
+ if (cmd & ECCB_CTL_READ) {
+ success = opb_read(lpc, opb_addr, data, sz);
+ if (success) {
+ lpc->eccb_stat_reg = ECCB_STAT_OP_DONE |
+ (((uint64_t)data[0]) << 24 |
+ ((uint64_t)data[1]) << 16 |
+ ((uint64_t)data[2]) << 8 |
+ ((uint64_t)data[3])) << ECCB_STAT_RD_DATA_LSH;
+ } else {
+ lpc->eccb_stat_reg = ECCB_STAT_OP_DONE |
+ (0xffffffffull << ECCB_STAT_RD_DATA_LSH);
+ }
+ } else {
+ data[0] = lpc->eccb_data_reg >> 24;
+ data[1] = lpc->eccb_data_reg >> 16;
+ data[2] = lpc->eccb_data_reg >> 8;
+ data[3] = lpc->eccb_data_reg;
+
+ success = opb_write(lpc, opb_addr, data, sz);
+ lpc->eccb_stat_reg = ECCB_STAT_OP_DONE;
+ }
+ /* XXX Which error bit (if any) to signal OPB error ? */
+}
+
+static uint64_t pnv_lpc_xscom_read(void *opaque, hwaddr addr, unsigned size)
+{
+ PnvLpcController *lpc = PNV_LPC(opaque);
+ uint32_t offset = addr >> 3;
+ uint64_t val = 0;
+
+ switch (offset & 3) {
+ case ECCB_CTL:
+ case ECCB_RESET:
+ val = 0;
+ break;
+ case ECCB_STAT:
+ val = lpc->eccb_stat_reg;
+ lpc->eccb_stat_reg = 0;
+ break;
+ case ECCB_DATA:
+ val = ((uint64_t)lpc->eccb_data_reg) << 32;
+ break;
+ }
+ return val;
+}
+
+static void pnv_lpc_xscom_write(void *opaque, hwaddr addr,
+ uint64_t val, unsigned size)
+{
+ PnvLpcController *lpc = PNV_LPC(opaque);
+ uint32_t offset = addr >> 3;
+
+ switch (offset & 3) {
+ case ECCB_CTL:
+ pnv_lpc_do_eccb(lpc, val);
+ break;
+ case ECCB_RESET:
+ /* XXXX */
+ break;
+ case ECCB_STAT:
+ break;
+ case ECCB_DATA:
+ lpc->eccb_data_reg = val >> 32;
+ break;
+ }
+}
+
+static const MemoryRegionOps pnv_lpc_xscom_ops = {
+ .read = pnv_lpc_xscom_read,
+ .write = pnv_lpc_xscom_write,
+ .valid.min_access_size = 8,
+ .valid.max_access_size = 8,
+ .impl.min_access_size = 8,
+ .impl.max_access_size = 8,
+ .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static uint64_t pnv_lpc_mmio_read(void *opaque, hwaddr addr, unsigned size)
+{
+ PnvLpcController *lpc = PNV_LPC(opaque);
+ uint64_t val = 0;
+ uint32_t opb_addr = addr & ECCB_CTL_ADDR_MASK;
+ MemTxResult result;
+
+ switch (size) {
+ case 4:
+ val = address_space_ldl(&lpc->opb_as, opb_addr, MEMTXATTRS_UNSPECIFIED,
+ &result);
+ break;
+ case 1:
+ val = address_space_ldub(&lpc->opb_as, opb_addr, MEMTXATTRS_UNSPECIFIED,
+ &result);
+ break;
+ default:
+ qemu_log_mask(LOG_GUEST_ERROR, "OPB read failed at @0x%"
+ HWADDR_PRIx " invalid size %d\n", addr, size);
+ return 0;
+ }
+
+ if (result != MEMTX_OK) {
+ qemu_log_mask(LOG_GUEST_ERROR, "OPB read failed at @0x%"
+ HWADDR_PRIx "\n", addr);
+ }
+
+ return val;
+}
+
+static void pnv_lpc_mmio_write(void *opaque, hwaddr addr,
+ uint64_t val, unsigned size)
+{
+ PnvLpcController *lpc = PNV_LPC(opaque);
+ uint32_t opb_addr = addr & ECCB_CTL_ADDR_MASK;
+ MemTxResult result;
+
+ switch (size) {
+ case 4:
+ address_space_stl(&lpc->opb_as, opb_addr, val, MEMTXATTRS_UNSPECIFIED,
+ &result);
+ break;
+ case 1:
+ address_space_stb(&lpc->opb_as, opb_addr, val, MEMTXATTRS_UNSPECIFIED,
+ &result);
+ break;
+ default:
+ qemu_log_mask(LOG_GUEST_ERROR, "OPB write failed at @0x%"
+ HWADDR_PRIx " invalid size %d\n", addr, size);
+ return;
+ }
+
+ if (result != MEMTX_OK) {
+ qemu_log_mask(LOG_GUEST_ERROR, "OPB write failed at @0x%"
+ HWADDR_PRIx "\n", addr);
+ }
+}
+
+static const MemoryRegionOps pnv_lpc_mmio_ops = {
+ .read = pnv_lpc_mmio_read,
+ .write = pnv_lpc_mmio_write,
+ .impl = {
+ .min_access_size = 1,
+ .max_access_size = 4,
+ },
+ .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static void pnv_lpc_eval_irqs(PnvLpcController *lpc)
+{
+ bool lpc_to_opb_irq = false;
+ PnvLpcClass *plc = PNV_LPC_GET_CLASS(lpc);
+
+ /* Update LPC controller to OPB line */
+ if (lpc->lpc_hc_irqser_ctrl & LPC_HC_IRQSER_EN) {
+ uint32_t irqs;
+
+ irqs = lpc->lpc_hc_irqstat & lpc->lpc_hc_irqmask;
+ lpc_to_opb_irq = (irqs != 0);
+ }
+
+ /* We don't honor the polarity register, it's pointless and unused
+ * anyway
+ */
+ if (lpc_to_opb_irq) {
+ lpc->opb_irq_input |= OPB_MASTER_IRQ_LPC;
+ } else {
+ lpc->opb_irq_input &= ~OPB_MASTER_IRQ_LPC;
+ }
+
+ /* Update OPB internal latch */
+ lpc->opb_irq_stat |= lpc->opb_irq_input & lpc->opb_irq_mask;
+
+ /* Reflect the interrupt */
+ pnv_psi_irq_set(lpc->psi, plc->psi_irq, lpc->opb_irq_stat != 0);
+}
+
+static uint64_t lpc_hc_read(void *opaque, hwaddr addr, unsigned size)
+{
+ PnvLpcController *lpc = opaque;
+ uint64_t val = 0xfffffffffffffffful;
+
+ switch (addr) {
+ case LPC_HC_FW_SEG_IDSEL:
+ val = lpc->lpc_hc_fw_seg_idsel;
+ break;
+ case LPC_HC_FW_RD_ACC_SIZE:
+ val = lpc->lpc_hc_fw_rd_acc_size;
+ break;
+ case LPC_HC_IRQSER_CTRL:
+ val = lpc->lpc_hc_irqser_ctrl;
+ break;
+ case LPC_HC_IRQMASK:
+ val = lpc->lpc_hc_irqmask;
+ break;
+ case LPC_HC_IRQSTAT:
+ val = lpc->lpc_hc_irqstat;
+ break;
+ case LPC_HC_ERROR_ADDRESS:
+ val = lpc->lpc_hc_error_addr;
+ break;
+ default:
+ qemu_log_mask(LOG_UNIMP, "LPC HC Unimplemented register: 0x%"
+ HWADDR_PRIx "\n", addr);
+ }
+ return val;
+}
+
+static void lpc_hc_write(void *opaque, hwaddr addr, uint64_t val,
+ unsigned size)
+{
+ PnvLpcController *lpc = opaque;
+
+ /* XXX Filter out reserved bits */
+
+ switch (addr) {
+ case LPC_HC_FW_SEG_IDSEL:
+ /* XXX Actually figure out how that works as this impact
+ * memory regions/aliases
+ */
+ lpc->lpc_hc_fw_seg_idsel = val;
+ break;
+ case LPC_HC_FW_RD_ACC_SIZE:
+ lpc->lpc_hc_fw_rd_acc_size = val;
+ break;
+ case LPC_HC_IRQSER_CTRL:
+ lpc->lpc_hc_irqser_ctrl = val;
+ pnv_lpc_eval_irqs(lpc);
+ break;
+ case LPC_HC_IRQMASK:
+ lpc->lpc_hc_irqmask = val;
+ pnv_lpc_eval_irqs(lpc);
+ break;
+ case LPC_HC_IRQSTAT:
+ lpc->lpc_hc_irqstat &= ~val;
+ pnv_lpc_eval_irqs(lpc);
+ break;
+ case LPC_HC_ERROR_ADDRESS:
+ break;
+ default:
+ qemu_log_mask(LOG_UNIMP, "LPC HC Unimplemented register: 0x%"
+ HWADDR_PRIx "\n", addr);
+ }
+}
+
+static const MemoryRegionOps lpc_hc_ops = {
+ .read = lpc_hc_read,
+ .write = lpc_hc_write,
+ .endianness = DEVICE_BIG_ENDIAN,
+ .valid = {
+ .min_access_size = 4,
+ .max_access_size = 4,
+ },
+ .impl = {
+ .min_access_size = 4,
+ .max_access_size = 4,
+ },
+};
+
+static uint64_t opb_master_read(void *opaque, hwaddr addr, unsigned size)
+{
+ PnvLpcController *lpc = opaque;
+ uint64_t val = 0xfffffffffffffffful;
+
+ switch (addr) {
+ case OPB_MASTER_LS_ROUTE0: /* TODO */
+ val = lpc->opb_irq_route0;
+ break;
+ case OPB_MASTER_LS_ROUTE1: /* TODO */
+ val = lpc->opb_irq_route1;
+ break;
+ case OPB_MASTER_LS_IRQ_STAT:
+ val = lpc->opb_irq_stat;
+ break;
+ case OPB_MASTER_LS_IRQ_MASK:
+ val = lpc->opb_irq_mask;
+ break;
+ case OPB_MASTER_LS_IRQ_POL:
+ val = lpc->opb_irq_pol;
+ break;
+ case OPB_MASTER_LS_IRQ_INPUT:
+ val = lpc->opb_irq_input;
+ break;
+ default:
+ qemu_log_mask(LOG_UNIMP, "OPBM: read on unimplemented register: 0x%"
+ HWADDR_PRIx "\n", addr);
+ }
+
+ return val;
+}
+
+static void opb_master_write(void *opaque, hwaddr addr,
+ uint64_t val, unsigned size)
+{
+ PnvLpcController *lpc = opaque;
+
+ switch (addr) {
+ case OPB_MASTER_LS_ROUTE0: /* TODO */
+ lpc->opb_irq_route0 = val;
+ break;
+ case OPB_MASTER_LS_ROUTE1: /* TODO */
+ lpc->opb_irq_route1 = val;
+ break;
+ case OPB_MASTER_LS_IRQ_STAT:
+ lpc->opb_irq_stat &= ~val;
+ pnv_lpc_eval_irqs(lpc);
+ break;
+ case OPB_MASTER_LS_IRQ_MASK:
+ lpc->opb_irq_mask = val;
+ pnv_lpc_eval_irqs(lpc);
+ break;
+ case OPB_MASTER_LS_IRQ_POL:
+ lpc->opb_irq_pol = val;
+ pnv_lpc_eval_irqs(lpc);
+ break;
+ case OPB_MASTER_LS_IRQ_INPUT:
+ /* Read only */
+ break;
+ default:
+ qemu_log_mask(LOG_UNIMP, "OPBM: write on unimplemented register: 0x%"
+ HWADDR_PRIx " val=0x%08"PRIx64"\n", addr, val);
+ }
+}
+
+static const MemoryRegionOps opb_master_ops = {
+ .read = opb_master_read,
+ .write = opb_master_write,
+ .endianness = DEVICE_BIG_ENDIAN,
+ .valid = {
+ .min_access_size = 4,
+ .max_access_size = 4,
+ },
+ .impl = {
+ .min_access_size = 4,
+ .max_access_size = 4,
+ },
+};
+
+static void pnv_lpc_power8_realize(DeviceState *dev, Error **errp)
+{
+ PnvLpcController *lpc = PNV_LPC(dev);
+ PnvLpcClass *plc = PNV_LPC_GET_CLASS(dev);
+ Error *local_err = NULL;
+
+ plc->parent_realize(dev, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ /* P8 uses a XSCOM region for LPC registers */
+ pnv_xscom_region_init(&lpc->xscom_regs, OBJECT(lpc),
+ &pnv_lpc_xscom_ops, lpc, "xscom-lpc",
+ PNV_XSCOM_LPC_SIZE);
+}
+
+static void pnv_lpc_power8_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ PnvXScomInterfaceClass *xdc = PNV_XSCOM_INTERFACE_CLASS(klass);
+ PnvLpcClass *plc = PNV_LPC_CLASS(klass);
+
+ dc->desc = "PowerNV LPC Controller POWER8";
+
+ xdc->dt_xscom = pnv_lpc_dt_xscom;
+
+ plc->psi_irq = PSIHB_IRQ_LPC_I2C;
+
+ device_class_set_parent_realize(dc, pnv_lpc_power8_realize,
+ &plc->parent_realize);
+}
+
+static const TypeInfo pnv_lpc_power8_info = {
+ .name = TYPE_PNV8_LPC,
+ .parent = TYPE_PNV_LPC,
+ .class_init = pnv_lpc_power8_class_init,
+ .interfaces = (InterfaceInfo[]) {
+ { TYPE_PNV_XSCOM_INTERFACE },
+ { }
+ }
+};
+
+static void pnv_lpc_power9_realize(DeviceState *dev, Error **errp)
+{
+ PnvLpcController *lpc = PNV_LPC(dev);
+ PnvLpcClass *plc = PNV_LPC_GET_CLASS(dev);
+ Error *local_err = NULL;
+
+ plc->parent_realize(dev, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ /* P9 uses a MMIO region */
+ memory_region_init_io(&lpc->xscom_regs, OBJECT(lpc), &pnv_lpc_mmio_ops,
+ lpc, "lpcm", PNV9_LPCM_SIZE);
+}
+
+static void pnv_lpc_power9_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ PnvLpcClass *plc = PNV_LPC_CLASS(klass);
+
+ dc->desc = "PowerNV LPC Controller POWER9";
+
+ plc->psi_irq = PSIHB9_IRQ_LPCHC;
+
+ device_class_set_parent_realize(dc, pnv_lpc_power9_realize,
+ &plc->parent_realize);
+}
+
+static const TypeInfo pnv_lpc_power9_info = {
+ .name = TYPE_PNV9_LPC,
+ .parent = TYPE_PNV_LPC,
+ .class_init = pnv_lpc_power9_class_init,
+};
+
+static void pnv_lpc_power10_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ dc->desc = "PowerNV LPC Controller POWER10";
+}
+
+static const TypeInfo pnv_lpc_power10_info = {
+ .name = TYPE_PNV10_LPC,
+ .parent = TYPE_PNV9_LPC,
+ .class_init = pnv_lpc_power10_class_init,
+};
+
+static void pnv_lpc_realize(DeviceState *dev, Error **errp)
+{
+ PnvLpcController *lpc = PNV_LPC(dev);
+
+ assert(lpc->psi);
+
+ /* Reg inits */
+ lpc->lpc_hc_fw_rd_acc_size = LPC_HC_FW_RD_4B;
+
+ /* Create address space and backing MR for the OPB bus */
+ memory_region_init(&lpc->opb_mr, OBJECT(dev), "lpc-opb", 0x100000000ull);
+ address_space_init(&lpc->opb_as, &lpc->opb_mr, "lpc-opb");
+
+ /* Create ISA IO and Mem space regions which are the root of
+ * the ISA bus (ie, ISA address spaces). We don't create a
+ * separate one for FW which we alias to memory.
+ */
+ memory_region_init(&lpc->isa_io, OBJECT(dev), "isa-io", ISA_IO_SIZE);
+ memory_region_init(&lpc->isa_mem, OBJECT(dev), "isa-mem", ISA_MEM_SIZE);
+ memory_region_init(&lpc->isa_fw, OBJECT(dev), "isa-fw", ISA_FW_SIZE);
+
+ /* Create windows from the OPB space to the ISA space */
+ memory_region_init_alias(&lpc->opb_isa_io, OBJECT(dev), "lpc-isa-io",
+ &lpc->isa_io, 0, LPC_IO_OPB_SIZE);
+ memory_region_add_subregion(&lpc->opb_mr, LPC_IO_OPB_ADDR,
+ &lpc->opb_isa_io);
+ memory_region_init_alias(&lpc->opb_isa_mem, OBJECT(dev), "lpc-isa-mem",
+ &lpc->isa_mem, 0, LPC_MEM_OPB_SIZE);
+ memory_region_add_subregion(&lpc->opb_mr, LPC_MEM_OPB_ADDR,
+ &lpc->opb_isa_mem);
+ memory_region_init_alias(&lpc->opb_isa_fw, OBJECT(dev), "lpc-isa-fw",
+ &lpc->isa_fw, 0, LPC_FW_OPB_SIZE);
+ memory_region_add_subregion(&lpc->opb_mr, LPC_FW_OPB_ADDR,
+ &lpc->opb_isa_fw);
+
+ /* Create MMIO regions for LPC HC and OPB registers */
+ memory_region_init_io(&lpc->opb_master_regs, OBJECT(dev), &opb_master_ops,
+ lpc, "lpc-opb-master", LPC_OPB_REGS_OPB_SIZE);
+ memory_region_add_subregion(&lpc->opb_mr, LPC_OPB_REGS_OPB_ADDR,
+ &lpc->opb_master_regs);
+ memory_region_init_io(&lpc->lpc_hc_regs, OBJECT(dev), &lpc_hc_ops, lpc,
+ "lpc-hc", LPC_HC_REGS_OPB_SIZE);
+ memory_region_add_subregion(&lpc->opb_mr, LPC_HC_REGS_OPB_ADDR,
+ &lpc->lpc_hc_regs);
+}
+
+static Property pnv_lpc_properties[] = {
+ DEFINE_PROP_LINK("psi", PnvLpcController, psi, TYPE_PNV_PSI, PnvPsi *),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void pnv_lpc_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ dc->realize = pnv_lpc_realize;
+ dc->desc = "PowerNV LPC Controller";
+ device_class_set_props(dc, pnv_lpc_properties);
+ dc->user_creatable = false;
+}
+
+static const TypeInfo pnv_lpc_info = {
+ .name = TYPE_PNV_LPC,
+ .parent = TYPE_DEVICE,
+ .instance_size = sizeof(PnvLpcController),
+ .class_init = pnv_lpc_class_init,
+ .class_size = sizeof(PnvLpcClass),
+ .abstract = true,
+};
+
+static void pnv_lpc_register_types(void)
+{
+ type_register_static(&pnv_lpc_info);
+ type_register_static(&pnv_lpc_power8_info);
+ type_register_static(&pnv_lpc_power9_info);
+ type_register_static(&pnv_lpc_power10_info);
+}
+
+type_init(pnv_lpc_register_types)
+
+/* If we don't use the built-in LPC interrupt deserializer, we need
+ * to provide a set of qirqs for the ISA bus or things will go bad.
+ *
+ * Most machines using pre-Naples chips (without said deserializer)
+ * have a CPLD that will collect the SerIRQ and shoot them as a
+ * single level interrupt to the P8 chip. So let's setup a hook
+ * for doing just that.
+ */
+static void pnv_lpc_isa_irq_handler_cpld(void *opaque, int n, int level)
+{
+ PnvMachineState *pnv = PNV_MACHINE(qdev_get_machine());
+ uint32_t old_state = pnv->cpld_irqstate;
+ PnvLpcController *lpc = PNV_LPC(opaque);
+
+ if (level) {
+ pnv->cpld_irqstate |= 1u << n;
+ } else {
+ pnv->cpld_irqstate &= ~(1u << n);
+ }
+
+ if (pnv->cpld_irqstate != old_state) {
+ pnv_psi_irq_set(lpc->psi, PSIHB_IRQ_EXTERNAL, pnv->cpld_irqstate != 0);
+ }
+}
+
+static void pnv_lpc_isa_irq_handler(void *opaque, int n, int level)
+{
+ PnvLpcController *lpc = PNV_LPC(opaque);
+
+ /* The Naples HW latches the 1 levels, clearing is done by SW */
+ if (level) {
+ lpc->lpc_hc_irqstat |= LPC_HC_IRQ_SERIRQ0 >> n;
+ pnv_lpc_eval_irqs(lpc);
+ }
+}
+
+ISABus *pnv_lpc_isa_create(PnvLpcController *lpc, bool use_cpld, Error **errp)
+{
+ Error *local_err = NULL;
+ ISABus *isa_bus;
+ qemu_irq *irqs;
+ qemu_irq_handler handler;
+
+ /* let isa_bus_new() create its own bridge on SysBus otherwise
+ * devices specified on the command line won't find the bus and
+ * will fail to create.
+ */
+ isa_bus = isa_bus_new(NULL, &lpc->isa_mem, &lpc->isa_io, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return NULL;
+ }
+
+ /* Not all variants have a working serial irq decoder. If not,
+ * handling of LPC interrupts becomes a platform issue (some
+ * platforms have a CPLD to do it).
+ */
+ if (use_cpld) {
+ handler = pnv_lpc_isa_irq_handler_cpld;
+ } else {
+ handler = pnv_lpc_isa_irq_handler;
+ }
+
+ irqs = qemu_allocate_irqs(handler, lpc, ISA_NUM_IRQS);
+
+ isa_bus_irqs(isa_bus, irqs);
+
+ return isa_bus;
+}
diff --git a/hw/ppc/pnv_occ.c b/hw/ppc/pnv_occ.c
new file mode 100644
index 000000000..5a716c256
--- /dev/null
+++ b/hw/ppc/pnv_occ.c
@@ -0,0 +1,302 @@
+/*
+ * QEMU PowerPC PowerNV Emulation of a few OCC related registers
+ *
+ * Copyright (c) 2015-2017, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "target/ppc/cpu.h"
+#include "qapi/error.h"
+#include "qemu/log.h"
+#include "qemu/module.h"
+#include "hw/qdev-properties.h"
+#include "hw/ppc/pnv.h"
+#include "hw/ppc/pnv_xscom.h"
+#include "hw/ppc/pnv_occ.h"
+
+#define OCB_OCI_OCCMISC 0x4020
+#define OCB_OCI_OCCMISC_AND 0x4021
+#define OCB_OCI_OCCMISC_OR 0x4022
+
+/* OCC sensors */
+#define OCC_SENSOR_DATA_BLOCK_OFFSET 0x580000
+#define OCC_SENSOR_DATA_VALID 0x580001
+#define OCC_SENSOR_DATA_VERSION 0x580002
+#define OCC_SENSOR_DATA_READING_VERSION 0x580004
+#define OCC_SENSOR_DATA_NR_SENSORS 0x580008
+#define OCC_SENSOR_DATA_NAMES_OFFSET 0x580010
+#define OCC_SENSOR_DATA_READING_PING_OFFSET 0x580014
+#define OCC_SENSOR_DATA_READING_PONG_OFFSET 0x58000c
+#define OCC_SENSOR_DATA_NAME_LENGTH 0x58000d
+#define OCC_SENSOR_NAME_STRUCTURE_TYPE 0x580023
+#define OCC_SENSOR_LOC_CORE 0x580022
+#define OCC_SENSOR_LOC_GPU 0x580020
+#define OCC_SENSOR_TYPE_POWER 0x580003
+#define OCC_SENSOR_NAME 0x580005
+#define HWMON_SENSORS_MASK 0x58001e
+#define SLW_IMAGE_BASE 0x0
+
+static void pnv_occ_set_misc(PnvOCC *occ, uint64_t val)
+{
+ bool irq_state;
+ PnvOCCClass *poc = PNV_OCC_GET_CLASS(occ);
+
+ val &= 0xffff000000000000ull;
+
+ occ->occmisc = val;
+ irq_state = !!(val >> 63);
+ pnv_psi_irq_set(occ->psi, poc->psi_irq, irq_state);
+}
+
+static uint64_t pnv_occ_power8_xscom_read(void *opaque, hwaddr addr,
+ unsigned size)
+{
+ PnvOCC *occ = PNV_OCC(opaque);
+ uint32_t offset = addr >> 3;
+ uint64_t val = 0;
+
+ switch (offset) {
+ case OCB_OCI_OCCMISC:
+ val = occ->occmisc;
+ break;
+ default:
+ qemu_log_mask(LOG_UNIMP, "OCC Unimplemented register: Ox%"
+ HWADDR_PRIx "\n", addr >> 3);
+ }
+ return val;
+}
+
+static void pnv_occ_power8_xscom_write(void *opaque, hwaddr addr,
+ uint64_t val, unsigned size)
+{
+ PnvOCC *occ = PNV_OCC(opaque);
+ uint32_t offset = addr >> 3;
+
+ switch (offset) {
+ case OCB_OCI_OCCMISC_AND:
+ pnv_occ_set_misc(occ, occ->occmisc & val);
+ break;
+ case OCB_OCI_OCCMISC_OR:
+ pnv_occ_set_misc(occ, occ->occmisc | val);
+ break;
+ case OCB_OCI_OCCMISC:
+ pnv_occ_set_misc(occ, val);
+ break;
+ default:
+ qemu_log_mask(LOG_UNIMP, "OCC Unimplemented register: Ox%"
+ HWADDR_PRIx "\n", addr >> 3);
+ }
+}
+
+static uint64_t pnv_occ_common_area_read(void *opaque, hwaddr addr,
+ unsigned width)
+{
+ switch (addr) {
+ /*
+ * occ-sensor sanity check that asserts the sensor
+ * header block
+ */
+ case OCC_SENSOR_DATA_BLOCK_OFFSET:
+ case OCC_SENSOR_DATA_VALID:
+ case OCC_SENSOR_DATA_VERSION:
+ case OCC_SENSOR_DATA_READING_VERSION:
+ case OCC_SENSOR_DATA_NR_SENSORS:
+ case OCC_SENSOR_DATA_NAMES_OFFSET:
+ case OCC_SENSOR_DATA_READING_PING_OFFSET:
+ case OCC_SENSOR_DATA_READING_PONG_OFFSET:
+ case OCC_SENSOR_NAME_STRUCTURE_TYPE:
+ return 1;
+ case OCC_SENSOR_DATA_NAME_LENGTH:
+ return 0x30;
+ case OCC_SENSOR_LOC_CORE:
+ return 0x0040;
+ case OCC_SENSOR_TYPE_POWER:
+ return 0x0080;
+ case OCC_SENSOR_NAME:
+ return 0x1000;
+ case HWMON_SENSORS_MASK:
+ case OCC_SENSOR_LOC_GPU:
+ return 0x8e00;
+ case SLW_IMAGE_BASE:
+ return 0x1000000000000000;
+ }
+ return 0;
+}
+
+static void pnv_occ_common_area_write(void *opaque, hwaddr addr,
+ uint64_t val, unsigned width)
+{
+ /* callback function defined to occ common area write */
+ return;
+}
+
+static const MemoryRegionOps pnv_occ_power8_xscom_ops = {
+ .read = pnv_occ_power8_xscom_read,
+ .write = pnv_occ_power8_xscom_write,
+ .valid.min_access_size = 8,
+ .valid.max_access_size = 8,
+ .impl.min_access_size = 8,
+ .impl.max_access_size = 8,
+ .endianness = DEVICE_BIG_ENDIAN,
+};
+
+const MemoryRegionOps pnv_occ_sram_ops = {
+ .read = pnv_occ_common_area_read,
+ .write = pnv_occ_common_area_write,
+ .valid.min_access_size = 1,
+ .valid.max_access_size = 8,
+ .impl.min_access_size = 1,
+ .impl.max_access_size = 8,
+ .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static void pnv_occ_power8_class_init(ObjectClass *klass, void *data)
+{
+ PnvOCCClass *poc = PNV_OCC_CLASS(klass);
+
+ poc->xscom_size = PNV_XSCOM_OCC_SIZE;
+ poc->xscom_ops = &pnv_occ_power8_xscom_ops;
+ poc->psi_irq = PSIHB_IRQ_OCC;
+}
+
+static const TypeInfo pnv_occ_power8_type_info = {
+ .name = TYPE_PNV8_OCC,
+ .parent = TYPE_PNV_OCC,
+ .instance_size = sizeof(PnvOCC),
+ .class_init = pnv_occ_power8_class_init,
+};
+
+#define P9_OCB_OCI_OCCMISC 0x6080
+#define P9_OCB_OCI_OCCMISC_CLEAR 0x6081
+#define P9_OCB_OCI_OCCMISC_OR 0x6082
+
+
+static uint64_t pnv_occ_power9_xscom_read(void *opaque, hwaddr addr,
+ unsigned size)
+{
+ PnvOCC *occ = PNV_OCC(opaque);
+ uint32_t offset = addr >> 3;
+ uint64_t val = 0;
+
+ switch (offset) {
+ case P9_OCB_OCI_OCCMISC:
+ val = occ->occmisc;
+ break;
+ default:
+ qemu_log_mask(LOG_UNIMP, "OCC Unimplemented register: Ox%"
+ HWADDR_PRIx "\n", addr >> 3);
+ }
+ return val;
+}
+
+static void pnv_occ_power9_xscom_write(void *opaque, hwaddr addr,
+ uint64_t val, unsigned size)
+{
+ PnvOCC *occ = PNV_OCC(opaque);
+ uint32_t offset = addr >> 3;
+
+ switch (offset) {
+ case P9_OCB_OCI_OCCMISC_CLEAR:
+ pnv_occ_set_misc(occ, 0);
+ break;
+ case P9_OCB_OCI_OCCMISC_OR:
+ pnv_occ_set_misc(occ, occ->occmisc | val);
+ break;
+ case P9_OCB_OCI_OCCMISC:
+ pnv_occ_set_misc(occ, val);
+ break;
+ default:
+ qemu_log_mask(LOG_UNIMP, "OCC Unimplemented register: Ox%"
+ HWADDR_PRIx "\n", addr >> 3);
+ }
+}
+
+static const MemoryRegionOps pnv_occ_power9_xscom_ops = {
+ .read = pnv_occ_power9_xscom_read,
+ .write = pnv_occ_power9_xscom_write,
+ .valid.min_access_size = 8,
+ .valid.max_access_size = 8,
+ .impl.min_access_size = 8,
+ .impl.max_access_size = 8,
+ .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static void pnv_occ_power9_class_init(ObjectClass *klass, void *data)
+{
+ PnvOCCClass *poc = PNV_OCC_CLASS(klass);
+
+ poc->xscom_size = PNV9_XSCOM_OCC_SIZE;
+ poc->xscom_ops = &pnv_occ_power9_xscom_ops;
+ poc->psi_irq = PSIHB9_IRQ_OCC;
+}
+
+static const TypeInfo pnv_occ_power9_type_info = {
+ .name = TYPE_PNV9_OCC,
+ .parent = TYPE_PNV_OCC,
+ .instance_size = sizeof(PnvOCC),
+ .class_init = pnv_occ_power9_class_init,
+};
+
+static void pnv_occ_realize(DeviceState *dev, Error **errp)
+{
+ PnvOCC *occ = PNV_OCC(dev);
+ PnvOCCClass *poc = PNV_OCC_GET_CLASS(occ);
+
+ assert(occ->psi);
+
+ occ->occmisc = 0;
+
+ /* XScom region for OCC registers */
+ pnv_xscom_region_init(&occ->xscom_regs, OBJECT(dev), poc->xscom_ops,
+ occ, "xscom-occ", poc->xscom_size);
+
+ /* OCC common area mmio region for OCC SRAM registers */
+ memory_region_init_io(&occ->sram_regs, OBJECT(dev), &pnv_occ_sram_ops,
+ occ, "occ-common-area",
+ PNV_OCC_SENSOR_DATA_BLOCK_SIZE);
+}
+
+static Property pnv_occ_properties[] = {
+ DEFINE_PROP_LINK("psi", PnvOCC, psi, TYPE_PNV_PSI, PnvPsi *),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void pnv_occ_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ dc->realize = pnv_occ_realize;
+ dc->desc = "PowerNV OCC Controller";
+ device_class_set_props(dc, pnv_occ_properties);
+ dc->user_creatable = false;
+}
+
+static const TypeInfo pnv_occ_type_info = {
+ .name = TYPE_PNV_OCC,
+ .parent = TYPE_DEVICE,
+ .instance_size = sizeof(PnvOCC),
+ .class_init = pnv_occ_class_init,
+ .class_size = sizeof(PnvOCCClass),
+ .abstract = true,
+};
+
+static void pnv_occ_register_types(void)
+{
+ type_register_static(&pnv_occ_type_info);
+ type_register_static(&pnv_occ_power8_type_info);
+ type_register_static(&pnv_occ_power9_type_info);
+}
+
+type_init(pnv_occ_register_types);
diff --git a/hw/ppc/pnv_pnor.c b/hw/ppc/pnv_pnor.c
new file mode 100644
index 000000000..83ecccca2
--- /dev/null
+++ b/hw/ppc/pnv_pnor.c
@@ -0,0 +1,141 @@
+/*
+ * QEMU PowerNV PNOR simple model
+ *
+ * Copyright (c) 2015-2019, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "qemu/units.h"
+#include "sysemu/block-backend.h"
+#include "sysemu/blockdev.h"
+#include "hw/loader.h"
+#include "hw/ppc/pnv_pnor.h"
+#include "hw/qdev-properties.h"
+#include "hw/qdev-properties-system.h"
+
+static uint64_t pnv_pnor_read(void *opaque, hwaddr addr, unsigned size)
+{
+ PnvPnor *s = PNV_PNOR(opaque);
+ uint64_t ret = 0;
+ int i;
+
+ for (i = 0; i < size; i++) {
+ ret |= (uint64_t) s->storage[addr + i] << (8 * (size - i - 1));
+ }
+
+ return ret;
+}
+
+static void pnv_pnor_update(PnvPnor *s, int offset, int size)
+{
+ int offset_end;
+ int ret;
+
+ if (!s->blk || !blk_is_writable(s->blk)) {
+ return;
+ }
+
+ offset_end = offset + size;
+ offset = QEMU_ALIGN_DOWN(offset, BDRV_SECTOR_SIZE);
+ offset_end = QEMU_ALIGN_UP(offset_end, BDRV_SECTOR_SIZE);
+
+ ret = blk_pwrite(s->blk, offset, s->storage + offset,
+ offset_end - offset, 0);
+ if (ret < 0) {
+ error_report("Could not update PNOR offset=0x%" PRIx32" : %s", offset,
+ strerror(-ret));
+ }
+}
+
+static void pnv_pnor_write(void *opaque, hwaddr addr, uint64_t data,
+ unsigned size)
+{
+ PnvPnor *s = PNV_PNOR(opaque);
+ int i;
+
+ for (i = 0; i < size; i++) {
+ s->storage[addr + i] = (data >> (8 * (size - i - 1))) & 0xFF;
+ }
+ pnv_pnor_update(s, addr, size);
+}
+
+/*
+ * TODO: Check endianness: skiboot is BIG, Aspeed AHB is LITTLE, flash
+ * is BIG.
+ */
+static const MemoryRegionOps pnv_pnor_ops = {
+ .read = pnv_pnor_read,
+ .write = pnv_pnor_write,
+ .endianness = DEVICE_BIG_ENDIAN,
+ .valid = {
+ .min_access_size = 1,
+ .max_access_size = 4,
+ },
+};
+
+static void pnv_pnor_realize(DeviceState *dev, Error **errp)
+{
+ PnvPnor *s = PNV_PNOR(dev);
+ int ret;
+
+ if (s->blk) {
+ uint64_t perm = BLK_PERM_CONSISTENT_READ |
+ (blk_supports_write_perm(s->blk) ? BLK_PERM_WRITE : 0);
+ ret = blk_set_perm(s->blk, perm, BLK_PERM_ALL, errp);
+ if (ret < 0) {
+ return;
+ }
+
+ s->size = blk_getlength(s->blk);
+ if (s->size <= 0) {
+ error_setg(errp, "failed to get flash size");
+ return;
+ }
+
+ s->storage = blk_blockalign(s->blk, s->size);
+
+ if (blk_pread(s->blk, 0, s->storage, s->size) != s->size) {
+ error_setg(errp, "failed to read the initial flash content");
+ return;
+ }
+ } else {
+ s->storage = blk_blockalign(NULL, s->size);
+ memset(s->storage, 0xFF, s->size);
+ }
+
+ memory_region_init_io(&s->mmio, OBJECT(s), &pnv_pnor_ops, s,
+ TYPE_PNV_PNOR, s->size);
+}
+
+static Property pnv_pnor_properties[] = {
+ DEFINE_PROP_INT64("size", PnvPnor, size, 128 * MiB),
+ DEFINE_PROP_DRIVE("drive", PnvPnor, blk),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void pnv_pnor_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ dc->realize = pnv_pnor_realize;
+ device_class_set_props(dc, pnv_pnor_properties);
+}
+
+static const TypeInfo pnv_pnor_info = {
+ .name = TYPE_PNV_PNOR,
+ .parent = TYPE_SYS_BUS_DEVICE,
+ .instance_size = sizeof(PnvPnor),
+ .class_init = pnv_pnor_class_init,
+};
+
+static void pnv_pnor_register_types(void)
+{
+ type_register_static(&pnv_pnor_info);
+}
+
+type_init(pnv_pnor_register_types)
diff --git a/hw/ppc/pnv_psi.c b/hw/ppc/pnv_psi.c
new file mode 100644
index 000000000..cd9a2c595
--- /dev/null
+++ b/hw/ppc/pnv_psi.c
@@ -0,0 +1,967 @@
+/*
+ * QEMU PowerPC PowerNV Processor Service Interface (PSI) model
+ *
+ * Copyright (c) 2015-2017, IBM Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/irq.h"
+#include "target/ppc/cpu.h"
+#include "qemu/log.h"
+#include "qemu/module.h"
+#include "sysemu/reset.h"
+#include "qapi/error.h"
+#include "monitor/monitor.h"
+
+
+#include "hw/ppc/fdt.h"
+#include "hw/ppc/pnv.h"
+#include "hw/ppc/pnv_xscom.h"
+#include "hw/qdev-properties.h"
+#include "hw/ppc/pnv_psi.h"
+
+#include <libfdt.h>
+
+#define PSIHB_XSCOM_FIR_RW 0x00
+#define PSIHB_XSCOM_FIR_AND 0x01
+#define PSIHB_XSCOM_FIR_OR 0x02
+#define PSIHB_XSCOM_FIRMASK_RW 0x03
+#define PSIHB_XSCOM_FIRMASK_AND 0x04
+#define PSIHB_XSCOM_FIRMASK_OR 0x05
+#define PSIHB_XSCOM_FIRACT0 0x06
+#define PSIHB_XSCOM_FIRACT1 0x07
+
+/* Host Bridge Base Address Register */
+#define PSIHB_XSCOM_BAR 0x0a
+#define PSIHB_BAR_EN 0x0000000000000001ull
+
+/* FSP Base Address Register */
+#define PSIHB_XSCOM_FSPBAR 0x0b
+
+/* PSI Host Bridge Control/Status Register */
+#define PSIHB_XSCOM_CR 0x0e
+#define PSIHB_CR_FSP_CMD_ENABLE 0x8000000000000000ull
+#define PSIHB_CR_FSP_MMIO_ENABLE 0x4000000000000000ull
+#define PSIHB_CR_FSP_IRQ_ENABLE 0x1000000000000000ull
+#define PSIHB_CR_FSP_ERR_RSP_ENABLE 0x0800000000000000ull
+#define PSIHB_CR_PSI_LINK_ENABLE 0x0400000000000000ull
+#define PSIHB_CR_FSP_RESET 0x0200000000000000ull
+#define PSIHB_CR_PSIHB_RESET 0x0100000000000000ull
+#define PSIHB_CR_PSI_IRQ 0x0000800000000000ull
+#define PSIHB_CR_FSP_IRQ 0x0000400000000000ull
+#define PSIHB_CR_FSP_LINK_ACTIVE 0x0000200000000000ull
+#define PSIHB_CR_IRQ_CMD_EXPECT 0x0000010000000000ull
+ /* and more ... */
+
+/* PSIHB Status / Error Mask Register */
+#define PSIHB_XSCOM_SEMR 0x0f
+
+/* XIVR, to signal interrupts to the CEC firmware. more XIVR below. */
+#define PSIHB_XSCOM_XIVR_FSP 0x10
+#define PSIHB_XIVR_SERVER_SH 40
+#define PSIHB_XIVR_SERVER_MSK (0xffffull << PSIHB_XIVR_SERVER_SH)
+#define PSIHB_XIVR_PRIO_SH 32
+#define PSIHB_XIVR_PRIO_MSK (0xffull << PSIHB_XIVR_PRIO_SH)
+#define PSIHB_XIVR_SRC_SH 29
+#define PSIHB_XIVR_SRC_MSK (0x7ull << PSIHB_XIVR_SRC_SH)
+#define PSIHB_XIVR_PENDING 0x01000000ull
+
+/* PSI Host Bridge Set Control/ Status Register */
+#define PSIHB_XSCOM_SCR 0x12
+
+/* PSI Host Bridge Clear Control/ Status Register */
+#define PSIHB_XSCOM_CCR 0x13
+
+/* DMA Upper Address Register */
+#define PSIHB_XSCOM_DMA_UPADD 0x14
+
+/* Interrupt Status */
+#define PSIHB_XSCOM_IRQ_STAT 0x15
+#define PSIHB_IRQ_STAT_OCC 0x0000001000000000ull
+#define PSIHB_IRQ_STAT_FSI 0x0000000800000000ull
+#define PSIHB_IRQ_STAT_LPCI2C 0x0000000400000000ull
+#define PSIHB_IRQ_STAT_LOCERR 0x0000000200000000ull
+#define PSIHB_IRQ_STAT_EXT 0x0000000100000000ull
+
+/* remaining XIVR */
+#define PSIHB_XSCOM_XIVR_OCC 0x16
+#define PSIHB_XSCOM_XIVR_FSI 0x17
+#define PSIHB_XSCOM_XIVR_LPCI2C 0x18
+#define PSIHB_XSCOM_XIVR_LOCERR 0x19
+#define PSIHB_XSCOM_XIVR_EXT 0x1a
+
+/* Interrupt Requester Source Compare Register */
+#define PSIHB_XSCOM_IRSN 0x1b
+#define PSIHB_IRSN_COMP_SH 45
+#define PSIHB_IRSN_COMP_MSK (0x7ffffull << PSIHB_IRSN_COMP_SH)
+#define PSIHB_IRSN_IRQ_MUX 0x0000000800000000ull
+#define PSIHB_IRSN_IRQ_RESET 0x0000000400000000ull
+#define PSIHB_IRSN_DOWNSTREAM_EN 0x0000000200000000ull
+#define PSIHB_IRSN_UPSTREAM_EN 0x0000000100000000ull
+#define PSIHB_IRSN_COMPMASK_SH 13
+#define PSIHB_IRSN_COMPMASK_MSK (0x7ffffull << PSIHB_IRSN_COMPMASK_SH)
+
+#define PSIHB_BAR_MASK 0x0003fffffff00000ull
+#define PSIHB_FSPBAR_MASK 0x0003ffff00000000ull
+
+#define PSIHB9_BAR_MASK 0x00fffffffff00000ull
+#define PSIHB9_FSPBAR_MASK 0x00ffffff00000000ull
+
+#define PSIHB_REG(addr) (((addr) >> 3) + PSIHB_XSCOM_BAR)
+
+static void pnv_psi_set_bar(PnvPsi *psi, uint64_t bar)
+{
+ PnvPsiClass *ppc = PNV_PSI_GET_CLASS(psi);
+ MemoryRegion *sysmem = get_system_memory();
+ uint64_t old = psi->regs[PSIHB_XSCOM_BAR];
+
+ psi->regs[PSIHB_XSCOM_BAR] = bar & (ppc->bar_mask | PSIHB_BAR_EN);
+
+ /* Update MR, always remove it first */
+ if (old & PSIHB_BAR_EN) {
+ memory_region_del_subregion(sysmem, &psi->regs_mr);
+ }
+
+ /* Then add it back if needed */
+ if (bar & PSIHB_BAR_EN) {
+ uint64_t addr = bar & ppc->bar_mask;
+ memory_region_add_subregion(sysmem, addr, &psi->regs_mr);
+ }
+}
+
+static void pnv_psi_update_fsp_mr(PnvPsi *psi)
+{
+ /* TODO: Update FSP MR if/when we support FSP BAR */
+}
+
+static void pnv_psi_set_cr(PnvPsi *psi, uint64_t cr)
+{
+ uint64_t old = psi->regs[PSIHB_XSCOM_CR];
+
+ psi->regs[PSIHB_XSCOM_CR] = cr;
+
+ /* Check some bit changes */
+ if ((old ^ psi->regs[PSIHB_XSCOM_CR]) & PSIHB_CR_FSP_MMIO_ENABLE) {
+ pnv_psi_update_fsp_mr(psi);
+ }
+}
+
+static void pnv_psi_set_irsn(PnvPsi *psi, uint64_t val)
+{
+ ICSState *ics = &PNV8_PSI(psi)->ics;
+
+ /* In this model we ignore the up/down enable bits for now
+ * as SW doesn't use them (other than setting them at boot).
+ * We ignore IRQ_MUX, its meaning isn't clear and we don't use
+ * it and finally we ignore reset (XXX fix that ?)
+ */
+ psi->regs[PSIHB_XSCOM_IRSN] = val & (PSIHB_IRSN_COMP_MSK |
+ PSIHB_IRSN_IRQ_MUX |
+ PSIHB_IRSN_IRQ_RESET |
+ PSIHB_IRSN_DOWNSTREAM_EN |
+ PSIHB_IRSN_UPSTREAM_EN);
+
+ /* We ignore the compare mask as well, our ICS emulation is too
+ * simplistic to make any use if it, and we extract the offset
+ * from the compare value
+ */
+ ics->offset = (val & PSIHB_IRSN_COMP_MSK) >> PSIHB_IRSN_COMP_SH;
+}
+
+/*
+ * FSP and PSI interrupts are muxed under the same number.
+ */
+static const uint32_t xivr_regs[] = {
+ [PSIHB_IRQ_PSI] = PSIHB_XSCOM_XIVR_FSP,
+ [PSIHB_IRQ_FSP] = PSIHB_XSCOM_XIVR_FSP,
+ [PSIHB_IRQ_OCC] = PSIHB_XSCOM_XIVR_OCC,
+ [PSIHB_IRQ_FSI] = PSIHB_XSCOM_XIVR_FSI,
+ [PSIHB_IRQ_LPC_I2C] = PSIHB_XSCOM_XIVR_LPCI2C,
+ [PSIHB_IRQ_LOCAL_ERR] = PSIHB_XSCOM_XIVR_LOCERR,
+ [PSIHB_IRQ_EXTERNAL] = PSIHB_XSCOM_XIVR_EXT,
+};
+
+static const uint32_t stat_regs[] = {
+ [PSIHB_IRQ_PSI] = PSIHB_XSCOM_CR,
+ [PSIHB_IRQ_FSP] = PSIHB_XSCOM_CR,
+ [PSIHB_IRQ_OCC] = PSIHB_XSCOM_IRQ_STAT,
+ [PSIHB_IRQ_FSI] = PSIHB_XSCOM_IRQ_STAT,
+ [PSIHB_IRQ_LPC_I2C] = PSIHB_XSCOM_IRQ_STAT,
+ [PSIHB_IRQ_LOCAL_ERR] = PSIHB_XSCOM_IRQ_STAT,
+ [PSIHB_IRQ_EXTERNAL] = PSIHB_XSCOM_IRQ_STAT,
+};
+
+static const uint64_t stat_bits[] = {
+ [PSIHB_IRQ_PSI] = PSIHB_CR_PSI_IRQ,
+ [PSIHB_IRQ_FSP] = PSIHB_CR_FSP_IRQ,
+ [PSIHB_IRQ_OCC] = PSIHB_IRQ_STAT_OCC,
+ [PSIHB_IRQ_FSI] = PSIHB_IRQ_STAT_FSI,
+ [PSIHB_IRQ_LPC_I2C] = PSIHB_IRQ_STAT_LPCI2C,
+ [PSIHB_IRQ_LOCAL_ERR] = PSIHB_IRQ_STAT_LOCERR,
+ [PSIHB_IRQ_EXTERNAL] = PSIHB_IRQ_STAT_EXT,
+};
+
+void pnv_psi_irq_set(PnvPsi *psi, int irq, bool state)
+{
+ PNV_PSI_GET_CLASS(psi)->irq_set(psi, irq, state);
+}
+
+static void pnv_psi_power8_irq_set(PnvPsi *psi, int irq, bool state)
+{
+ uint32_t xivr_reg;
+ uint32_t stat_reg;
+ uint32_t src;
+ bool masked;
+
+ if (irq > PSIHB_IRQ_EXTERNAL) {
+ qemu_log_mask(LOG_GUEST_ERROR, "PSI: Unsupported irq %d\n", irq);
+ return;
+ }
+
+ xivr_reg = xivr_regs[irq];
+ stat_reg = stat_regs[irq];
+
+ src = (psi->regs[xivr_reg] & PSIHB_XIVR_SRC_MSK) >> PSIHB_XIVR_SRC_SH;
+ if (state) {
+ psi->regs[stat_reg] |= stat_bits[irq];
+ /* TODO: optimization, check mask here. That means
+ * re-evaluating when unmasking
+ */
+ qemu_irq_raise(psi->qirqs[src]);
+ } else {
+ psi->regs[stat_reg] &= ~stat_bits[irq];
+
+ /* FSP and PSI are muxed so don't lower if either is still set */
+ if (stat_reg != PSIHB_XSCOM_CR ||
+ !(psi->regs[stat_reg] & (PSIHB_CR_PSI_IRQ | PSIHB_CR_FSP_IRQ))) {
+ qemu_irq_lower(psi->qirqs[src]);
+ } else {
+ state = true;
+ }
+ }
+
+ /* Note about the emulation of the pending bit: This isn't
+ * entirely correct. The pending bit should be cleared when the
+ * EOI has been received. However, we don't have callbacks on EOI
+ * (especially not under KVM) so no way to emulate that properly,
+ * so instead we just set that bit as the logical "output" of the
+ * XIVR (ie pending & !masked)
+ *
+ * CLG: We could define a new ICS object with a custom eoi()
+ * handler to clear the pending bit. But I am not sure this would
+ * be useful for the software anyhow.
+ */
+ masked = (psi->regs[xivr_reg] & PSIHB_XIVR_PRIO_MSK) == PSIHB_XIVR_PRIO_MSK;
+ if (state && !masked) {
+ psi->regs[xivr_reg] |= PSIHB_XIVR_PENDING;
+ } else {
+ psi->regs[xivr_reg] &= ~PSIHB_XIVR_PENDING;
+ }
+}
+
+static void pnv_psi_set_xivr(PnvPsi *psi, uint32_t reg, uint64_t val)
+{
+ ICSState *ics = &PNV8_PSI(psi)->ics;
+ uint16_t server;
+ uint8_t prio;
+ uint8_t src;
+
+ psi->regs[reg] = (psi->regs[reg] & PSIHB_XIVR_PENDING) |
+ (val & (PSIHB_XIVR_SERVER_MSK |
+ PSIHB_XIVR_PRIO_MSK |
+ PSIHB_XIVR_SRC_MSK));
+ val = psi->regs[reg];
+ server = (val & PSIHB_XIVR_SERVER_MSK) >> PSIHB_XIVR_SERVER_SH;
+ prio = (val & PSIHB_XIVR_PRIO_MSK) >> PSIHB_XIVR_PRIO_SH;
+ src = (val & PSIHB_XIVR_SRC_MSK) >> PSIHB_XIVR_SRC_SH;
+
+ if (src >= PSI_NUM_INTERRUPTS) {
+ qemu_log_mask(LOG_GUEST_ERROR, "PSI: Unsupported irq %d\n", src);
+ return;
+ }
+
+ /* Remove pending bit if the IRQ is masked */
+ if ((psi->regs[reg] & PSIHB_XIVR_PRIO_MSK) == PSIHB_XIVR_PRIO_MSK) {
+ psi->regs[reg] &= ~PSIHB_XIVR_PENDING;
+ }
+
+ /* The low order 2 bits are the link pointer (Type II interrupts).
+ * Shift back to get a valid IRQ server.
+ */
+ server >>= 2;
+
+ /* Now because of source remapping, weird things can happen
+ * if you change the source number dynamically, our simple ICS
+ * doesn't deal with remapping. So we just poke a different
+ * ICS entry based on what source number was written. This will
+ * do for now but a more accurate implementation would instead
+ * use a fixed server/prio and a remapper of the generated irq.
+ */
+ ics_write_xive(ics, src, server, prio, prio);
+}
+
+static uint64_t pnv_psi_reg_read(PnvPsi *psi, uint32_t offset, bool mmio)
+{
+ uint64_t val = 0xffffffffffffffffull;
+
+ switch (offset) {
+ case PSIHB_XSCOM_FIR_RW:
+ case PSIHB_XSCOM_FIRACT0:
+ case PSIHB_XSCOM_FIRACT1:
+ case PSIHB_XSCOM_BAR:
+ case PSIHB_XSCOM_FSPBAR:
+ case PSIHB_XSCOM_CR:
+ case PSIHB_XSCOM_XIVR_FSP:
+ case PSIHB_XSCOM_XIVR_OCC:
+ case PSIHB_XSCOM_XIVR_FSI:
+ case PSIHB_XSCOM_XIVR_LPCI2C:
+ case PSIHB_XSCOM_XIVR_LOCERR:
+ case PSIHB_XSCOM_XIVR_EXT:
+ case PSIHB_XSCOM_IRQ_STAT:
+ case PSIHB_XSCOM_SEMR:
+ case PSIHB_XSCOM_DMA_UPADD:
+ case PSIHB_XSCOM_IRSN:
+ val = psi->regs[offset];
+ break;
+ default:
+ qemu_log_mask(LOG_UNIMP, "PSI: read at 0x%" PRIx32 "\n", offset);
+ }
+ return val;
+}
+
+static void pnv_psi_reg_write(PnvPsi *psi, uint32_t offset, uint64_t val,
+ bool mmio)
+{
+ switch (offset) {
+ case PSIHB_XSCOM_FIR_RW:
+ case PSIHB_XSCOM_FIRACT0:
+ case PSIHB_XSCOM_FIRACT1:
+ case PSIHB_XSCOM_SEMR:
+ case PSIHB_XSCOM_DMA_UPADD:
+ psi->regs[offset] = val;
+ break;
+ case PSIHB_XSCOM_FIR_OR:
+ psi->regs[PSIHB_XSCOM_FIR_RW] |= val;
+ break;
+ case PSIHB_XSCOM_FIR_AND:
+ psi->regs[PSIHB_XSCOM_FIR_RW] &= val;
+ break;
+ case PSIHB_XSCOM_BAR:
+ /* Only XSCOM can write this one */
+ if (!mmio) {
+ pnv_psi_set_bar(psi, val);
+ } else {
+ qemu_log_mask(LOG_GUEST_ERROR, "PSI: invalid write of BAR\n");
+ }
+ break;
+ case PSIHB_XSCOM_FSPBAR:
+ psi->regs[PSIHB_XSCOM_FSPBAR] = val & PSIHB_FSPBAR_MASK;
+ pnv_psi_update_fsp_mr(psi);
+ break;
+ case PSIHB_XSCOM_CR:
+ pnv_psi_set_cr(psi, val);
+ break;
+ case PSIHB_XSCOM_SCR:
+ pnv_psi_set_cr(psi, psi->regs[PSIHB_XSCOM_CR] | val);
+ break;
+ case PSIHB_XSCOM_CCR:
+ pnv_psi_set_cr(psi, psi->regs[PSIHB_XSCOM_CR] & ~val);
+ break;
+ case PSIHB_XSCOM_XIVR_FSP:
+ case PSIHB_XSCOM_XIVR_OCC:
+ case PSIHB_XSCOM_XIVR_FSI:
+ case PSIHB_XSCOM_XIVR_LPCI2C:
+ case PSIHB_XSCOM_XIVR_LOCERR:
+ case PSIHB_XSCOM_XIVR_EXT:
+ pnv_psi_set_xivr(psi, offset, val);
+ break;
+ case PSIHB_XSCOM_IRQ_STAT:
+ /* Read only */
+ qemu_log_mask(LOG_GUEST_ERROR, "PSI: invalid write of IRQ_STAT\n");
+ break;
+ case PSIHB_XSCOM_IRSN:
+ pnv_psi_set_irsn(psi, val);
+ break;
+ default:
+ qemu_log_mask(LOG_UNIMP, "PSI: write at 0x%" PRIx32 "\n", offset);
+ }
+}
+
+/*
+ * The values of the registers when accessed through the MMIO region
+ * follow the relation : xscom = (mmio + 0x50) >> 3
+ */
+static uint64_t pnv_psi_mmio_read(void *opaque, hwaddr addr, unsigned size)
+{
+ return pnv_psi_reg_read(opaque, PSIHB_REG(addr), true);
+}
+
+static void pnv_psi_mmio_write(void *opaque, hwaddr addr,
+ uint64_t val, unsigned size)
+{
+ pnv_psi_reg_write(opaque, PSIHB_REG(addr), val, true);
+}
+
+static const MemoryRegionOps psi_mmio_ops = {
+ .read = pnv_psi_mmio_read,
+ .write = pnv_psi_mmio_write,
+ .endianness = DEVICE_BIG_ENDIAN,
+ .valid = {
+ .min_access_size = 8,
+ .max_access_size = 8,
+ },
+ .impl = {
+ .min_access_size = 8,
+ .max_access_size = 8,
+ },
+};
+
+static uint64_t pnv_psi_xscom_read(void *opaque, hwaddr addr, unsigned size)
+{
+ return pnv_psi_reg_read(opaque, addr >> 3, false);
+}
+
+static void pnv_psi_xscom_write(void *opaque, hwaddr addr,
+ uint64_t val, unsigned size)
+{
+ pnv_psi_reg_write(opaque, addr >> 3, val, false);
+}
+
+static const MemoryRegionOps pnv_psi_xscom_ops = {
+ .read = pnv_psi_xscom_read,
+ .write = pnv_psi_xscom_write,
+ .endianness = DEVICE_BIG_ENDIAN,
+ .valid = {
+ .min_access_size = 8,
+ .max_access_size = 8,
+ },
+ .impl = {
+ .min_access_size = 8,
+ .max_access_size = 8,
+ }
+};
+
+static void pnv_psi_reset(DeviceState *dev)
+{
+ PnvPsi *psi = PNV_PSI(dev);
+
+ memset(psi->regs, 0x0, sizeof(psi->regs));
+
+ psi->regs[PSIHB_XSCOM_BAR] = psi->bar | PSIHB_BAR_EN;
+}
+
+static void pnv_psi_reset_handler(void *dev)
+{
+ device_cold_reset(DEVICE(dev));
+}
+
+static void pnv_psi_realize(DeviceState *dev, Error **errp)
+{
+ PnvPsi *psi = PNV_PSI(dev);
+
+ /* Default BAR for MMIO region */
+ pnv_psi_set_bar(psi, psi->bar | PSIHB_BAR_EN);
+
+ qemu_register_reset(pnv_psi_reset_handler, dev);
+}
+
+static void pnv_psi_power8_instance_init(Object *obj)
+{
+ Pnv8Psi *psi8 = PNV8_PSI(obj);
+
+ object_initialize_child(obj, "ics-psi", &psi8->ics, TYPE_ICS);
+ object_property_add_alias(obj, ICS_PROP_XICS, OBJECT(&psi8->ics),
+ ICS_PROP_XICS);
+}
+
+static const uint8_t irq_to_xivr[] = {
+ PSIHB_XSCOM_XIVR_FSP,
+ PSIHB_XSCOM_XIVR_OCC,
+ PSIHB_XSCOM_XIVR_FSI,
+ PSIHB_XSCOM_XIVR_LPCI2C,
+ PSIHB_XSCOM_XIVR_LOCERR,
+ PSIHB_XSCOM_XIVR_EXT,
+};
+
+static void pnv_psi_power8_realize(DeviceState *dev, Error **errp)
+{
+ PnvPsi *psi = PNV_PSI(dev);
+ ICSState *ics = &PNV8_PSI(psi)->ics;
+ unsigned int i;
+
+ /* Create PSI interrupt control source */
+ if (!object_property_set_int(OBJECT(ics), "nr-irqs", PSI_NUM_INTERRUPTS,
+ errp)) {
+ return;
+ }
+ if (!qdev_realize(DEVICE(ics), NULL, errp)) {
+ return;
+ }
+
+ for (i = 0; i < ics->nr_irqs; i++) {
+ ics_set_irq_type(ics, i, true);
+ }
+
+ psi->qirqs = qemu_allocate_irqs(ics_set_irq, ics, ics->nr_irqs);
+
+ /* XSCOM region for PSI registers */
+ pnv_xscom_region_init(&psi->xscom_regs, OBJECT(dev), &pnv_psi_xscom_ops,
+ psi, "xscom-psi", PNV_XSCOM_PSIHB_SIZE);
+
+ /* Initialize MMIO region */
+ memory_region_init_io(&psi->regs_mr, OBJECT(dev), &psi_mmio_ops, psi,
+ "psihb", PNV_PSIHB_SIZE);
+
+ /* Default sources in XIVR */
+ for (i = 0; i < PSI_NUM_INTERRUPTS; i++) {
+ uint8_t xivr = irq_to_xivr[i];
+ psi->regs[xivr] = PSIHB_XIVR_PRIO_MSK |
+ ((uint64_t) i << PSIHB_XIVR_SRC_SH);
+ }
+
+ pnv_psi_realize(dev, errp);
+}
+
+static int pnv_psi_dt_xscom(PnvXScomInterface *dev, void *fdt, int xscom_offset)
+{
+ PnvPsiClass *ppc = PNV_PSI_GET_CLASS(dev);
+ char *name;
+ int offset;
+ uint32_t reg[] = {
+ cpu_to_be32(ppc->xscom_pcba),
+ cpu_to_be32(ppc->xscom_size)
+ };
+
+ name = g_strdup_printf("psihb@%x", ppc->xscom_pcba);
+ offset = fdt_add_subnode(fdt, xscom_offset, name);
+ _FDT(offset);
+ g_free(name);
+
+ _FDT(fdt_setprop(fdt, offset, "reg", reg, sizeof(reg)));
+ _FDT(fdt_setprop_cell(fdt, offset, "#address-cells", 2));
+ _FDT(fdt_setprop_cell(fdt, offset, "#size-cells", 1));
+ _FDT(fdt_setprop(fdt, offset, "compatible", ppc->compat,
+ ppc->compat_size));
+ return 0;
+}
+
+static Property pnv_psi_properties[] = {
+ DEFINE_PROP_UINT64("bar", PnvPsi, bar, 0),
+ DEFINE_PROP_UINT64("fsp-bar", PnvPsi, fsp_bar, 0),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void pnv_psi_power8_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ PnvPsiClass *ppc = PNV_PSI_CLASS(klass);
+ static const char compat[] = "ibm,power8-psihb-x\0ibm,psihb-x";
+
+ dc->desc = "PowerNV PSI Controller POWER8";
+ dc->realize = pnv_psi_power8_realize;
+
+ ppc->xscom_pcba = PNV_XSCOM_PSIHB_BASE;
+ ppc->xscom_size = PNV_XSCOM_PSIHB_SIZE;
+ ppc->bar_mask = PSIHB_BAR_MASK;
+ ppc->irq_set = pnv_psi_power8_irq_set;
+ ppc->compat = compat;
+ ppc->compat_size = sizeof(compat);
+}
+
+static const TypeInfo pnv_psi_power8_info = {
+ .name = TYPE_PNV8_PSI,
+ .parent = TYPE_PNV_PSI,
+ .instance_size = sizeof(Pnv8Psi),
+ .instance_init = pnv_psi_power8_instance_init,
+ .class_init = pnv_psi_power8_class_init,
+};
+
+
+/* Common registers */
+
+#define PSIHB9_CR 0x20
+#define PSIHB9_SEMR 0x28
+
+/* P9 registers */
+
+#define PSIHB9_INTERRUPT_CONTROL 0x58
+#define PSIHB9_IRQ_METHOD PPC_BIT(0)
+#define PSIHB9_IRQ_RESET PPC_BIT(1)
+#define PSIHB9_ESB_CI_BASE 0x60
+#define PSIHB9_ESB_CI_64K PPC_BIT(1)
+#define PSIHB9_ESB_CI_ADDR_MASK PPC_BITMASK(8, 47)
+#define PSIHB9_ESB_CI_VALID PPC_BIT(63)
+#define PSIHB9_ESB_NOTIF_ADDR 0x68
+#define PSIHB9_ESB_NOTIF_ADDR_MASK PPC_BITMASK(8, 60)
+#define PSIHB9_ESB_NOTIF_VALID PPC_BIT(63)
+#define PSIHB9_IVT_OFFSET 0x70
+#define PSIHB9_IVT_OFF_SHIFT 32
+
+#define PSIHB9_IRQ_LEVEL 0x78 /* assertion */
+#define PSIHB9_IRQ_LEVEL_PSI PPC_BIT(0)
+#define PSIHB9_IRQ_LEVEL_OCC PPC_BIT(1)
+#define PSIHB9_IRQ_LEVEL_FSI PPC_BIT(2)
+#define PSIHB9_IRQ_LEVEL_LPCHC PPC_BIT(3)
+#define PSIHB9_IRQ_LEVEL_LOCAL_ERR PPC_BIT(4)
+#define PSIHB9_IRQ_LEVEL_GLOBAL_ERR PPC_BIT(5)
+#define PSIHB9_IRQ_LEVEL_TPM PPC_BIT(6)
+#define PSIHB9_IRQ_LEVEL_LPC_SIRQ1 PPC_BIT(7)
+#define PSIHB9_IRQ_LEVEL_LPC_SIRQ2 PPC_BIT(8)
+#define PSIHB9_IRQ_LEVEL_LPC_SIRQ3 PPC_BIT(9)
+#define PSIHB9_IRQ_LEVEL_LPC_SIRQ4 PPC_BIT(10)
+#define PSIHB9_IRQ_LEVEL_SBE_I2C PPC_BIT(11)
+#define PSIHB9_IRQ_LEVEL_DIO PPC_BIT(12)
+#define PSIHB9_IRQ_LEVEL_PSU PPC_BIT(13)
+#define PSIHB9_IRQ_LEVEL_I2C_C PPC_BIT(14)
+#define PSIHB9_IRQ_LEVEL_I2C_D PPC_BIT(15)
+#define PSIHB9_IRQ_LEVEL_I2C_E PPC_BIT(16)
+#define PSIHB9_IRQ_LEVEL_SBE PPC_BIT(19)
+
+#define PSIHB9_IRQ_STAT 0x80 /* P bit */
+#define PSIHB9_IRQ_STAT_PSI PPC_BIT(0)
+#define PSIHB9_IRQ_STAT_OCC PPC_BIT(1)
+#define PSIHB9_IRQ_STAT_FSI PPC_BIT(2)
+#define PSIHB9_IRQ_STAT_LPCHC PPC_BIT(3)
+#define PSIHB9_IRQ_STAT_LOCAL_ERR PPC_BIT(4)
+#define PSIHB9_IRQ_STAT_GLOBAL_ERR PPC_BIT(5)
+#define PSIHB9_IRQ_STAT_TPM PPC_BIT(6)
+#define PSIHB9_IRQ_STAT_LPC_SIRQ1 PPC_BIT(7)
+#define PSIHB9_IRQ_STAT_LPC_SIRQ2 PPC_BIT(8)
+#define PSIHB9_IRQ_STAT_LPC_SIRQ3 PPC_BIT(9)
+#define PSIHB9_IRQ_STAT_LPC_SIRQ4 PPC_BIT(10)
+#define PSIHB9_IRQ_STAT_SBE_I2C PPC_BIT(11)
+#define PSIHB9_IRQ_STAT_DIO PPC_BIT(12)
+#define PSIHB9_IRQ_STAT_PSU PPC_BIT(13)
+
+static void pnv_psi_notify(XiveNotifier *xf, uint32_t srcno)
+{
+ PnvPsi *psi = PNV_PSI(xf);
+ uint64_t notif_port = psi->regs[PSIHB_REG(PSIHB9_ESB_NOTIF_ADDR)];
+ bool valid = notif_port & PSIHB9_ESB_NOTIF_VALID;
+ uint64_t notify_addr = notif_port & ~PSIHB9_ESB_NOTIF_VALID;
+
+ uint32_t offset =
+ (psi->regs[PSIHB_REG(PSIHB9_IVT_OFFSET)] >> PSIHB9_IVT_OFF_SHIFT);
+ uint64_t data = XIVE_TRIGGER_PQ | offset | srcno;
+ MemTxResult result;
+
+ if (!valid) {
+ return;
+ }
+
+ address_space_stq_be(&address_space_memory, notify_addr, data,
+ MEMTXATTRS_UNSPECIFIED, &result);
+ if (result != MEMTX_OK) {
+ qemu_log_mask(LOG_GUEST_ERROR, "%s: trigger failed @%"
+ HWADDR_PRIx "\n", __func__, notif_port);
+ return;
+ }
+}
+
+static uint64_t pnv_psi_p9_mmio_read(void *opaque, hwaddr addr, unsigned size)
+{
+ PnvPsi *psi = PNV_PSI(opaque);
+ uint32_t reg = PSIHB_REG(addr);
+ uint64_t val = -1;
+
+ switch (addr) {
+ case PSIHB9_CR:
+ case PSIHB9_SEMR:
+ /* FSP stuff */
+ case PSIHB9_INTERRUPT_CONTROL:
+ case PSIHB9_ESB_CI_BASE:
+ case PSIHB9_ESB_NOTIF_ADDR:
+ case PSIHB9_IVT_OFFSET:
+ val = psi->regs[reg];
+ break;
+ default:
+ qemu_log_mask(LOG_GUEST_ERROR, "PSI: read at 0x%" PRIx64 "\n", addr);
+ }
+
+ return val;
+}
+
+static void pnv_psi_p9_mmio_write(void *opaque, hwaddr addr,
+ uint64_t val, unsigned size)
+{
+ PnvPsi *psi = PNV_PSI(opaque);
+ Pnv9Psi *psi9 = PNV9_PSI(psi);
+ uint32_t reg = PSIHB_REG(addr);
+ MemoryRegion *sysmem = get_system_memory();
+
+ switch (addr) {
+ case PSIHB9_CR:
+ case PSIHB9_SEMR:
+ /* FSP stuff */
+ break;
+ case PSIHB9_INTERRUPT_CONTROL:
+ if (val & PSIHB9_IRQ_RESET) {
+ device_cold_reset(DEVICE(&psi9->source));
+ }
+ psi->regs[reg] = val;
+ break;
+
+ case PSIHB9_ESB_CI_BASE:
+ if (!(val & PSIHB9_ESB_CI_VALID)) {
+ if (psi->regs[reg] & PSIHB9_ESB_CI_VALID) {
+ memory_region_del_subregion(sysmem, &psi9->source.esb_mmio);
+ }
+ } else {
+ if (!(psi->regs[reg] & PSIHB9_ESB_CI_VALID)) {
+ memory_region_add_subregion(sysmem,
+ val & ~PSIHB9_ESB_CI_VALID,
+ &psi9->source.esb_mmio);
+ }
+ }
+ psi->regs[reg] = val;
+ break;
+
+ case PSIHB9_ESB_NOTIF_ADDR:
+ psi->regs[reg] = val;
+ break;
+ case PSIHB9_IVT_OFFSET:
+ psi->regs[reg] = val;
+ break;
+ default:
+ qemu_log_mask(LOG_GUEST_ERROR, "PSI: write at 0x%" PRIx64 "\n", addr);
+ }
+}
+
+static const MemoryRegionOps pnv_psi_p9_mmio_ops = {
+ .read = pnv_psi_p9_mmio_read,
+ .write = pnv_psi_p9_mmio_write,
+ .endianness = DEVICE_BIG_ENDIAN,
+ .valid = {
+ .min_access_size = 8,
+ .max_access_size = 8,
+ },
+ .impl = {
+ .min_access_size = 8,
+ .max_access_size = 8,
+ },
+};
+
+static uint64_t pnv_psi_p9_xscom_read(void *opaque, hwaddr addr, unsigned size)
+{
+ /* No read are expected */
+ qemu_log_mask(LOG_GUEST_ERROR, "PSI: xscom read at 0x%" PRIx64 "\n", addr);
+ return -1;
+}
+
+static void pnv_psi_p9_xscom_write(void *opaque, hwaddr addr,
+ uint64_t val, unsigned size)
+{
+ PnvPsi *psi = PNV_PSI(opaque);
+
+ /* XSCOM is only used to set the PSIHB MMIO region */
+ switch (addr >> 3) {
+ case PSIHB_XSCOM_BAR:
+ pnv_psi_set_bar(psi, val);
+ break;
+ default:
+ qemu_log_mask(LOG_GUEST_ERROR, "PSI: xscom write at 0x%" PRIx64 "\n",
+ addr);
+ }
+}
+
+static const MemoryRegionOps pnv_psi_p9_xscom_ops = {
+ .read = pnv_psi_p9_xscom_read,
+ .write = pnv_psi_p9_xscom_write,
+ .endianness = DEVICE_BIG_ENDIAN,
+ .valid = {
+ .min_access_size = 8,
+ .max_access_size = 8,
+ },
+ .impl = {
+ .min_access_size = 8,
+ .max_access_size = 8,
+ }
+};
+
+static void pnv_psi_power9_irq_set(PnvPsi *psi, int irq, bool state)
+{
+ uint64_t irq_method = psi->regs[PSIHB_REG(PSIHB9_INTERRUPT_CONTROL)];
+
+ if (irq > PSIHB9_NUM_IRQS) {
+ qemu_log_mask(LOG_GUEST_ERROR, "PSI: Unsupported irq %d\n", irq);
+ return;
+ }
+
+ if (irq_method & PSIHB9_IRQ_METHOD) {
+ qemu_log_mask(LOG_GUEST_ERROR, "PSI: LSI IRQ method no supported\n");
+ return;
+ }
+
+ /* Update LSI levels */
+ if (state) {
+ psi->regs[PSIHB_REG(PSIHB9_IRQ_LEVEL)] |= PPC_BIT(irq);
+ } else {
+ psi->regs[PSIHB_REG(PSIHB9_IRQ_LEVEL)] &= ~PPC_BIT(irq);
+ }
+
+ qemu_set_irq(psi->qirqs[irq], state);
+}
+
+static void pnv_psi_power9_reset(DeviceState *dev)
+{
+ Pnv9Psi *psi = PNV9_PSI(dev);
+
+ pnv_psi_reset(dev);
+
+ if (memory_region_is_mapped(&psi->source.esb_mmio)) {
+ memory_region_del_subregion(get_system_memory(), &psi->source.esb_mmio);
+ }
+}
+
+static void pnv_psi_power9_instance_init(Object *obj)
+{
+ Pnv9Psi *psi = PNV9_PSI(obj);
+
+ object_initialize_child(obj, "source", &psi->source, TYPE_XIVE_SOURCE);
+}
+
+static void pnv_psi_power9_realize(DeviceState *dev, Error **errp)
+{
+ PnvPsi *psi = PNV_PSI(dev);
+ XiveSource *xsrc = &PNV9_PSI(psi)->source;
+ int i;
+
+ /* This is the only device with 4k ESB pages */
+ object_property_set_int(OBJECT(xsrc), "shift", XIVE_ESB_4K, &error_fatal);
+ object_property_set_int(OBJECT(xsrc), "nr-irqs", PSIHB9_NUM_IRQS,
+ &error_fatal);
+ object_property_set_link(OBJECT(xsrc), "xive", OBJECT(psi), &error_abort);
+ if (!qdev_realize(DEVICE(xsrc), NULL, errp)) {
+ return;
+ }
+
+ for (i = 0; i < xsrc->nr_irqs; i++) {
+ xive_source_irq_set_lsi(xsrc, i);
+ }
+
+ psi->qirqs = qemu_allocate_irqs(xive_source_set_irq, xsrc, xsrc->nr_irqs);
+
+ /* XSCOM region for PSI registers */
+ pnv_xscom_region_init(&psi->xscom_regs, OBJECT(dev), &pnv_psi_p9_xscom_ops,
+ psi, "xscom-psi", PNV9_XSCOM_PSIHB_SIZE);
+
+ /* MMIO region for PSI registers */
+ memory_region_init_io(&psi->regs_mr, OBJECT(dev), &pnv_psi_p9_mmio_ops, psi,
+ "psihb", PNV9_PSIHB_SIZE);
+
+ pnv_psi_realize(dev, errp);
+}
+
+static void pnv_psi_power9_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ PnvPsiClass *ppc = PNV_PSI_CLASS(klass);
+ XiveNotifierClass *xfc = XIVE_NOTIFIER_CLASS(klass);
+ static const char compat[] = "ibm,power9-psihb-x\0ibm,psihb-x";
+
+ dc->desc = "PowerNV PSI Controller POWER9";
+ dc->realize = pnv_psi_power9_realize;
+ dc->reset = pnv_psi_power9_reset;
+
+ ppc->xscom_pcba = PNV9_XSCOM_PSIHB_BASE;
+ ppc->xscom_size = PNV9_XSCOM_PSIHB_SIZE;
+ ppc->bar_mask = PSIHB9_BAR_MASK;
+ ppc->irq_set = pnv_psi_power9_irq_set;
+ ppc->compat = compat;
+ ppc->compat_size = sizeof(compat);
+
+ xfc->notify = pnv_psi_notify;
+}
+
+static const TypeInfo pnv_psi_power9_info = {
+ .name = TYPE_PNV9_PSI,
+ .parent = TYPE_PNV_PSI,
+ .instance_size = sizeof(Pnv9Psi),
+ .instance_init = pnv_psi_power9_instance_init,
+ .class_init = pnv_psi_power9_class_init,
+ .interfaces = (InterfaceInfo[]) {
+ { TYPE_XIVE_NOTIFIER },
+ { },
+ },
+};
+
+static void pnv_psi_power10_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ PnvPsiClass *ppc = PNV_PSI_CLASS(klass);
+ static const char compat[] = "ibm,power10-psihb-x\0ibm,psihb-x";
+
+ dc->desc = "PowerNV PSI Controller POWER10";
+
+ ppc->xscom_pcba = PNV10_XSCOM_PSIHB_BASE;
+ ppc->xscom_size = PNV10_XSCOM_PSIHB_SIZE;
+ ppc->compat = compat;
+ ppc->compat_size = sizeof(compat);
+}
+
+static const TypeInfo pnv_psi_power10_info = {
+ .name = TYPE_PNV10_PSI,
+ .parent = TYPE_PNV9_PSI,
+ .class_init = pnv_psi_power10_class_init,
+};
+
+static void pnv_psi_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ PnvXScomInterfaceClass *xdc = PNV_XSCOM_INTERFACE_CLASS(klass);
+
+ xdc->dt_xscom = pnv_psi_dt_xscom;
+
+ dc->desc = "PowerNV PSI Controller";
+ device_class_set_props(dc, pnv_psi_properties);
+ dc->reset = pnv_psi_reset;
+ dc->user_creatable = false;
+}
+
+static const TypeInfo pnv_psi_info = {
+ .name = TYPE_PNV_PSI,
+ .parent = TYPE_DEVICE,
+ .instance_size = sizeof(PnvPsi),
+ .class_init = pnv_psi_class_init,
+ .class_size = sizeof(PnvPsiClass),
+ .abstract = true,
+ .interfaces = (InterfaceInfo[]) {
+ { TYPE_PNV_XSCOM_INTERFACE },
+ { }
+ }
+};
+
+static void pnv_psi_register_types(void)
+{
+ type_register_static(&pnv_psi_info);
+ type_register_static(&pnv_psi_power8_info);
+ type_register_static(&pnv_psi_power9_info);
+ type_register_static(&pnv_psi_power10_info);
+}
+
+type_init(pnv_psi_register_types);
+
+void pnv_psi_pic_print_info(Pnv9Psi *psi9, Monitor *mon)
+{
+ PnvPsi *psi = PNV_PSI(psi9);
+
+ uint32_t offset =
+ (psi->regs[PSIHB_REG(PSIHB9_IVT_OFFSET)] >> PSIHB9_IVT_OFF_SHIFT);
+
+ monitor_printf(mon, "PSIHB Source %08x .. %08x\n",
+ offset, offset + psi9->source.nr_irqs - 1);
+ xive_source_pic_print_info(&psi9->source, offset, mon);
+}
diff --git a/hw/ppc/pnv_xscom.c b/hw/ppc/pnv_xscom.c
new file mode 100644
index 000000000..9ce018dbc
--- /dev/null
+++ b/hw/ppc/pnv_xscom.c
@@ -0,0 +1,324 @@
+/*
+ * QEMU PowerPC PowerNV XSCOM bus
+ *
+ * Copyright (c) 2016, IBM Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qemu/module.h"
+#include "sysemu/hw_accel.h"
+#include "target/ppc/cpu.h"
+#include "hw/sysbus.h"
+
+#include "hw/ppc/fdt.h"
+#include "hw/ppc/pnv.h"
+#include "hw/ppc/pnv_xscom.h"
+
+#include <libfdt.h>
+
+/* PRD registers */
+#define PRD_P8_IPOLL_REG_MASK 0x01020013
+#define PRD_P8_IPOLL_REG_STATUS 0x01020014
+#define PRD_P9_IPOLL_REG_MASK 0x000F0033
+#define PRD_P9_IPOLL_REG_STATUS 0x000F0034
+
+static void xscom_complete(CPUState *cs, uint64_t hmer_bits)
+{
+ /*
+ * TODO: When the read/write comes from the monitor, NULL is
+ * passed for the cpu, and no CPU completion is generated.
+ */
+ if (cs) {
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
+ CPUPPCState *env = &cpu->env;
+
+ /*
+ * TODO: Need a CPU helper to set HMER, also handle generation
+ * of HMIs
+ */
+ cpu_synchronize_state(cs);
+ env->spr[SPR_HMER] |= hmer_bits;
+ }
+}
+
+static uint32_t pnv_xscom_pcba(PnvChip *chip, uint64_t addr)
+{
+ return PNV_CHIP_GET_CLASS(chip)->xscom_pcba(chip, addr);
+}
+
+static uint64_t xscom_read_default(PnvChip *chip, uint32_t pcba)
+{
+ switch (pcba) {
+ case 0xf000f:
+ return PNV_CHIP_GET_CLASS(chip)->chip_cfam_id;
+ case 0x18002: /* ECID2 */
+ return 0;
+
+ case 0x1010c00: /* PIBAM FIR */
+ case 0x1010c03: /* PIBAM FIR MASK */
+
+ /* PRD registers */
+ case PRD_P8_IPOLL_REG_MASK:
+ case PRD_P8_IPOLL_REG_STATUS:
+ case PRD_P9_IPOLL_REG_MASK:
+ case PRD_P9_IPOLL_REG_STATUS:
+
+ /* P9 xscom reset */
+ case 0x0090018: /* Receive status reg */
+ case 0x0090012: /* log register */
+ case 0x0090013: /* error register */
+
+ /* P8 xscom reset */
+ case 0x2020007: /* ADU stuff, log register */
+ case 0x2020009: /* ADU stuff, error register */
+ case 0x202000f: /* ADU stuff, receive status register*/
+ return 0;
+ case 0x2013f01: /* PBA stuff */
+ case 0x2013f05: /* PBA stuff */
+ return 0;
+ case 0x2013028: /* CAPP stuff */
+ case 0x201302a: /* CAPP stuff */
+ case 0x2013801: /* CAPP stuff */
+ case 0x2013802: /* CAPP stuff */
+
+ /* P9 CAPP regs */
+ case 0x2010841:
+ case 0x2010842:
+ case 0x201082a:
+ case 0x2010828:
+ case 0x4010841:
+ case 0x4010842:
+ case 0x401082a:
+ case 0x4010828:
+ return 0;
+ default:
+ return -1;
+ }
+}
+
+static bool xscom_write_default(PnvChip *chip, uint32_t pcba, uint64_t val)
+{
+ /* We ignore writes to these */
+ switch (pcba) {
+ case 0xf000f: /* chip id is RO */
+ case 0x1010c00: /* PIBAM FIR */
+ case 0x1010c01: /* PIBAM FIR */
+ case 0x1010c02: /* PIBAM FIR */
+ case 0x1010c03: /* PIBAM FIR MASK */
+ case 0x1010c04: /* PIBAM FIR MASK */
+ case 0x1010c05: /* PIBAM FIR MASK */
+ /* P9 xscom reset */
+ case 0x0090018: /* Receive status reg */
+ case 0x0090012: /* log register */
+ case 0x0090013: /* error register */
+
+ /* P8 xscom reset */
+ case 0x2020007: /* ADU stuff, log register */
+ case 0x2020009: /* ADU stuff, error register */
+ case 0x202000f: /* ADU stuff, receive status register*/
+
+ case 0x2013028: /* CAPP stuff */
+ case 0x201302a: /* CAPP stuff */
+ case 0x2013801: /* CAPP stuff */
+ case 0x2013802: /* CAPP stuff */
+
+ /* P9 CAPP regs */
+ case 0x2010841:
+ case 0x2010842:
+ case 0x201082a:
+ case 0x2010828:
+ case 0x4010841:
+ case 0x4010842:
+ case 0x401082a:
+ case 0x4010828:
+
+ /* P8 PRD registers */
+ case PRD_P8_IPOLL_REG_MASK:
+ case PRD_P8_IPOLL_REG_STATUS:
+ case PRD_P9_IPOLL_REG_MASK:
+ case PRD_P9_IPOLL_REG_STATUS:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static uint64_t xscom_read(void *opaque, hwaddr addr, unsigned width)
+{
+ PnvChip *chip = opaque;
+ uint32_t pcba = pnv_xscom_pcba(chip, addr);
+ uint64_t val = 0;
+ MemTxResult result;
+
+ /* Handle some SCOMs here before dispatch */
+ val = xscom_read_default(chip, pcba);
+ if (val != -1) {
+ goto complete;
+ }
+
+ val = address_space_ldq(&chip->xscom_as, (uint64_t) pcba << 3,
+ MEMTXATTRS_UNSPECIFIED, &result);
+ if (result != MEMTX_OK) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XSCOM read failed at @0x%"
+ HWADDR_PRIx " pcba=0x%08x\n", addr, pcba);
+ xscom_complete(current_cpu, HMER_XSCOM_FAIL | HMER_XSCOM_DONE);
+ return 0;
+ }
+
+complete:
+ xscom_complete(current_cpu, HMER_XSCOM_DONE);
+ return val;
+}
+
+static void xscom_write(void *opaque, hwaddr addr, uint64_t val,
+ unsigned width)
+{
+ PnvChip *chip = opaque;
+ uint32_t pcba = pnv_xscom_pcba(chip, addr);
+ MemTxResult result;
+
+ /* Handle some SCOMs here before dispatch */
+ if (xscom_write_default(chip, pcba, val)) {
+ goto complete;
+ }
+
+ address_space_stq(&chip->xscom_as, (uint64_t) pcba << 3, val,
+ MEMTXATTRS_UNSPECIFIED, &result);
+ if (result != MEMTX_OK) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XSCOM write failed at @0x%"
+ HWADDR_PRIx " pcba=0x%08x data=0x%" PRIx64 "\n",
+ addr, pcba, val);
+ xscom_complete(current_cpu, HMER_XSCOM_FAIL | HMER_XSCOM_DONE);
+ return;
+ }
+
+complete:
+ xscom_complete(current_cpu, HMER_XSCOM_DONE);
+}
+
+const MemoryRegionOps pnv_xscom_ops = {
+ .read = xscom_read,
+ .write = xscom_write,
+ .valid.min_access_size = 8,
+ .valid.max_access_size = 8,
+ .impl.min_access_size = 8,
+ .impl.max_access_size = 8,
+ .endianness = DEVICE_BIG_ENDIAN,
+};
+
+void pnv_xscom_realize(PnvChip *chip, uint64_t size, Error **errp)
+{
+ SysBusDevice *sbd = SYS_BUS_DEVICE(chip);
+ char *name;
+
+ name = g_strdup_printf("xscom-%x", chip->chip_id);
+ memory_region_init_io(&chip->xscom_mmio, OBJECT(chip), &pnv_xscom_ops,
+ chip, name, size);
+ sysbus_init_mmio(sbd, &chip->xscom_mmio);
+
+ memory_region_init(&chip->xscom, OBJECT(chip), name, size);
+ address_space_init(&chip->xscom_as, &chip->xscom, name);
+ g_free(name);
+}
+
+static const TypeInfo pnv_xscom_interface_info = {
+ .name = TYPE_PNV_XSCOM_INTERFACE,
+ .parent = TYPE_INTERFACE,
+ .class_size = sizeof(PnvXScomInterfaceClass),
+};
+
+static void pnv_xscom_register_types(void)
+{
+ type_register_static(&pnv_xscom_interface_info);
+}
+
+type_init(pnv_xscom_register_types)
+
+typedef struct ForeachPopulateArgs {
+ void *fdt;
+ int xscom_offset;
+} ForeachPopulateArgs;
+
+static int xscom_dt_child(Object *child, void *opaque)
+{
+ if (object_dynamic_cast(child, TYPE_PNV_XSCOM_INTERFACE)) {
+ ForeachPopulateArgs *args = opaque;
+ PnvXScomInterface *xd = PNV_XSCOM_INTERFACE(child);
+ PnvXScomInterfaceClass *xc = PNV_XSCOM_INTERFACE_GET_CLASS(xd);
+
+ /*
+ * Only "realized" devices should be configured in the DT
+ */
+ if (xc->dt_xscom && DEVICE(child)->realized) {
+ _FDT((xc->dt_xscom(xd, args->fdt, args->xscom_offset)));
+ }
+ }
+ return 0;
+}
+
+int pnv_dt_xscom(PnvChip *chip, void *fdt, int root_offset,
+ uint64_t xscom_base, uint64_t xscom_size,
+ const char *compat, int compat_size)
+{
+ uint64_t reg[] = { xscom_base, xscom_size };
+ int xscom_offset;
+ ForeachPopulateArgs args;
+ char *name;
+
+ name = g_strdup_printf("xscom@%" PRIx64, be64_to_cpu(reg[0]));
+ xscom_offset = fdt_add_subnode(fdt, root_offset, name);
+ _FDT(xscom_offset);
+ g_free(name);
+ _FDT((fdt_setprop_cell(fdt, xscom_offset, "ibm,chip-id", chip->chip_id)));
+ /*
+ * On P10, the xscom bus id has been deprecated and the chip id is
+ * calculated from the "Primary topology table index". See skiboot.
+ */
+ _FDT((fdt_setprop_cell(fdt, xscom_offset, "ibm,primary-topology-index",
+ chip->chip_id)));
+ _FDT((fdt_setprop_cell(fdt, xscom_offset, "#address-cells", 1)));
+ _FDT((fdt_setprop_cell(fdt, xscom_offset, "#size-cells", 1)));
+ _FDT((fdt_setprop(fdt, xscom_offset, "reg", reg, sizeof(reg))));
+ _FDT((fdt_setprop(fdt, xscom_offset, "compatible", compat, compat_size)));
+ _FDT((fdt_setprop(fdt, xscom_offset, "scom-controller", NULL, 0)));
+
+ args.fdt = fdt;
+ args.xscom_offset = xscom_offset;
+
+ /*
+ * Loop on the whole object hierarchy to catch all
+ * PnvXScomInterface objects which can lie a bit deeper than the
+ * first layer.
+ */
+ object_child_foreach_recursive(OBJECT(chip), xscom_dt_child, &args);
+ return 0;
+}
+
+void pnv_xscom_add_subregion(PnvChip *chip, hwaddr offset, MemoryRegion *mr)
+{
+ memory_region_add_subregion(&chip->xscom, offset << 3, mr);
+}
+
+void pnv_xscom_region_init(MemoryRegion *mr,
+ Object *owner,
+ const MemoryRegionOps *ops,
+ void *opaque,
+ const char *name,
+ uint64_t size)
+{
+ memory_region_init_io(mr, owner, ops, opaque, name, size << 3);
+}
diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c
new file mode 100644
index 000000000..e8127599c
--- /dev/null
+++ b/hw/ppc/ppc.c
@@ -0,0 +1,1465 @@
+/*
+ * QEMU generic PowerPC hardware System Emulator
+ *
+ * Copyright (c) 2003-2007 Jocelyn Mayer
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/irq.h"
+#include "hw/ppc/ppc.h"
+#include "hw/ppc/ppc_e500.h"
+#include "qemu/timer.h"
+#include "sysemu/cpus.h"
+#include "qemu/log.h"
+#include "qemu/main-loop.h"
+#include "qemu/error-report.h"
+#include "sysemu/kvm.h"
+#include "sysemu/runstate.h"
+#include "kvm_ppc.h"
+#include "migration/vmstate.h"
+#include "trace.h"
+
+static void cpu_ppc_tb_stop (CPUPPCState *env);
+static void cpu_ppc_tb_start (CPUPPCState *env);
+
+void ppc_set_irq(PowerPCCPU *cpu, int n_IRQ, int level)
+{
+ CPUState *cs = CPU(cpu);
+ CPUPPCState *env = &cpu->env;
+ unsigned int old_pending;
+ bool locked = false;
+
+ /* We may already have the BQL if coming from the reset path */
+ if (!qemu_mutex_iothread_locked()) {
+ locked = true;
+ qemu_mutex_lock_iothread();
+ }
+
+ old_pending = env->pending_interrupts;
+
+ if (level) {
+ env->pending_interrupts |= 1 << n_IRQ;
+ cpu_interrupt(cs, CPU_INTERRUPT_HARD);
+ } else {
+ env->pending_interrupts &= ~(1 << n_IRQ);
+ if (env->pending_interrupts == 0) {
+ cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD);
+ }
+ }
+
+ if (old_pending != env->pending_interrupts) {
+ kvmppc_set_interrupt(cpu, n_IRQ, level);
+ }
+
+
+ trace_ppc_irq_set_exit(env, n_IRQ, level, env->pending_interrupts,
+ CPU(cpu)->interrupt_request);
+
+ if (locked) {
+ qemu_mutex_unlock_iothread();
+ }
+}
+
+/* PowerPC 6xx / 7xx internal IRQ controller */
+static void ppc6xx_set_irq(void *opaque, int pin, int level)
+{
+ PowerPCCPU *cpu = opaque;
+ CPUPPCState *env = &cpu->env;
+ int cur_level;
+
+ trace_ppc_irq_set(env, pin, level);
+
+ cur_level = (env->irq_input_state >> pin) & 1;
+ /* Don't generate spurious events */
+ if ((cur_level == 1 && level == 0) || (cur_level == 0 && level != 0)) {
+ CPUState *cs = CPU(cpu);
+
+ switch (pin) {
+ case PPC6xx_INPUT_TBEN:
+ /* Level sensitive - active high */
+ trace_ppc_irq_set_state("time base", level);
+ if (level) {
+ cpu_ppc_tb_start(env);
+ } else {
+ cpu_ppc_tb_stop(env);
+ }
+ break;
+ case PPC6xx_INPUT_INT:
+ /* Level sensitive - active high */
+ trace_ppc_irq_set_state("external IRQ", level);
+ ppc_set_irq(cpu, PPC_INTERRUPT_EXT, level);
+ break;
+ case PPC6xx_INPUT_SMI:
+ /* Level sensitive - active high */
+ trace_ppc_irq_set_state("SMI IRQ", level);
+ ppc_set_irq(cpu, PPC_INTERRUPT_SMI, level);
+ break;
+ case PPC6xx_INPUT_MCP:
+ /* Negative edge sensitive */
+ /* XXX: TODO: actual reaction may depends on HID0 status
+ * 603/604/740/750: check HID0[EMCP]
+ */
+ if (cur_level == 1 && level == 0) {
+ trace_ppc_irq_set_state("machine check", 1);
+ ppc_set_irq(cpu, PPC_INTERRUPT_MCK, 1);
+ }
+ break;
+ case PPC6xx_INPUT_CKSTP_IN:
+ /* Level sensitive - active low */
+ /* XXX: TODO: relay the signal to CKSTP_OUT pin */
+ /* XXX: Note that the only way to restart the CPU is to reset it */
+ if (level) {
+ trace_ppc_irq_cpu("stop");
+ cs->halted = 1;
+ }
+ break;
+ case PPC6xx_INPUT_HRESET:
+ /* Level sensitive - active low */
+ if (level) {
+ trace_ppc_irq_reset("CPU");
+ cpu_interrupt(cs, CPU_INTERRUPT_RESET);
+ }
+ break;
+ case PPC6xx_INPUT_SRESET:
+ trace_ppc_irq_set_state("RESET IRQ", level);
+ ppc_set_irq(cpu, PPC_INTERRUPT_RESET, level);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ if (level)
+ env->irq_input_state |= 1 << pin;
+ else
+ env->irq_input_state &= ~(1 << pin);
+ }
+}
+
+void ppc6xx_irq_init(PowerPCCPU *cpu)
+{
+ CPUPPCState *env = &cpu->env;
+
+ env->irq_inputs = (void **)qemu_allocate_irqs(&ppc6xx_set_irq, cpu,
+ PPC6xx_INPUT_NB);
+}
+
+#if defined(TARGET_PPC64)
+/* PowerPC 970 internal IRQ controller */
+static void ppc970_set_irq(void *opaque, int pin, int level)
+{
+ PowerPCCPU *cpu = opaque;
+ CPUPPCState *env = &cpu->env;
+ int cur_level;
+
+ trace_ppc_irq_set(env, pin, level);
+
+ cur_level = (env->irq_input_state >> pin) & 1;
+ /* Don't generate spurious events */
+ if ((cur_level == 1 && level == 0) || (cur_level == 0 && level != 0)) {
+ CPUState *cs = CPU(cpu);
+
+ switch (pin) {
+ case PPC970_INPUT_INT:
+ /* Level sensitive - active high */
+ trace_ppc_irq_set_state("external IRQ", level);
+ ppc_set_irq(cpu, PPC_INTERRUPT_EXT, level);
+ break;
+ case PPC970_INPUT_THINT:
+ /* Level sensitive - active high */
+ trace_ppc_irq_set_state("SMI IRQ", level);
+ ppc_set_irq(cpu, PPC_INTERRUPT_THERM, level);
+ break;
+ case PPC970_INPUT_MCP:
+ /* Negative edge sensitive */
+ /* XXX: TODO: actual reaction may depends on HID0 status
+ * 603/604/740/750: check HID0[EMCP]
+ */
+ if (cur_level == 1 && level == 0) {
+ trace_ppc_irq_set_state("machine check", 1);
+ ppc_set_irq(cpu, PPC_INTERRUPT_MCK, 1);
+ }
+ break;
+ case PPC970_INPUT_CKSTP:
+ /* Level sensitive - active low */
+ /* XXX: TODO: relay the signal to CKSTP_OUT pin */
+ if (level) {
+ trace_ppc_irq_cpu("stop");
+ cs->halted = 1;
+ } else {
+ trace_ppc_irq_cpu("restart");
+ cs->halted = 0;
+ qemu_cpu_kick(cs);
+ }
+ break;
+ case PPC970_INPUT_HRESET:
+ /* Level sensitive - active low */
+ if (level) {
+ cpu_interrupt(cs, CPU_INTERRUPT_RESET);
+ }
+ break;
+ case PPC970_INPUT_SRESET:
+ trace_ppc_irq_set_state("RESET IRQ", level);
+ ppc_set_irq(cpu, PPC_INTERRUPT_RESET, level);
+ break;
+ case PPC970_INPUT_TBEN:
+ trace_ppc_irq_set_state("TBEN IRQ", level);
+ /* XXX: TODO */
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ if (level)
+ env->irq_input_state |= 1 << pin;
+ else
+ env->irq_input_state &= ~(1 << pin);
+ }
+}
+
+void ppc970_irq_init(PowerPCCPU *cpu)
+{
+ CPUPPCState *env = &cpu->env;
+
+ env->irq_inputs = (void **)qemu_allocate_irqs(&ppc970_set_irq, cpu,
+ PPC970_INPUT_NB);
+}
+
+/* POWER7 internal IRQ controller */
+static void power7_set_irq(void *opaque, int pin, int level)
+{
+ PowerPCCPU *cpu = opaque;
+
+ trace_ppc_irq_set(&cpu->env, pin, level);
+
+ switch (pin) {
+ case POWER7_INPUT_INT:
+ /* Level sensitive - active high */
+ trace_ppc_irq_set_state("external IRQ", level);
+ ppc_set_irq(cpu, PPC_INTERRUPT_EXT, level);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+void ppcPOWER7_irq_init(PowerPCCPU *cpu)
+{
+ CPUPPCState *env = &cpu->env;
+
+ env->irq_inputs = (void **)qemu_allocate_irqs(&power7_set_irq, cpu,
+ POWER7_INPUT_NB);
+}
+
+/* POWER9 internal IRQ controller */
+static void power9_set_irq(void *opaque, int pin, int level)
+{
+ PowerPCCPU *cpu = opaque;
+
+ trace_ppc_irq_set(&cpu->env, pin, level);
+
+ switch (pin) {
+ case POWER9_INPUT_INT:
+ /* Level sensitive - active high */
+ trace_ppc_irq_set_state("external IRQ", level);
+ ppc_set_irq(cpu, PPC_INTERRUPT_EXT, level);
+ break;
+ case POWER9_INPUT_HINT:
+ /* Level sensitive - active high */
+ trace_ppc_irq_set_state("HV external IRQ", level);
+ ppc_set_irq(cpu, PPC_INTERRUPT_HVIRT, level);
+ break;
+ default:
+ g_assert_not_reached();
+ return;
+ }
+}
+
+void ppcPOWER9_irq_init(PowerPCCPU *cpu)
+{
+ CPUPPCState *env = &cpu->env;
+
+ env->irq_inputs = (void **)qemu_allocate_irqs(&power9_set_irq, cpu,
+ POWER9_INPUT_NB);
+}
+#endif /* defined(TARGET_PPC64) */
+
+void ppc40x_core_reset(PowerPCCPU *cpu)
+{
+ CPUPPCState *env = &cpu->env;
+ target_ulong dbsr;
+
+ qemu_log_mask(CPU_LOG_RESET, "Reset PowerPC core\n");
+ cpu_interrupt(CPU(cpu), CPU_INTERRUPT_RESET);
+ dbsr = env->spr[SPR_40x_DBSR];
+ dbsr &= ~0x00000300;
+ dbsr |= 0x00000100;
+ env->spr[SPR_40x_DBSR] = dbsr;
+}
+
+void ppc40x_chip_reset(PowerPCCPU *cpu)
+{
+ CPUPPCState *env = &cpu->env;
+ target_ulong dbsr;
+
+ qemu_log_mask(CPU_LOG_RESET, "Reset PowerPC chip\n");
+ cpu_interrupt(CPU(cpu), CPU_INTERRUPT_RESET);
+ /* XXX: TODO reset all internal peripherals */
+ dbsr = env->spr[SPR_40x_DBSR];
+ dbsr &= ~0x00000300;
+ dbsr |= 0x00000200;
+ env->spr[SPR_40x_DBSR] = dbsr;
+}
+
+void ppc40x_system_reset(PowerPCCPU *cpu)
+{
+ qemu_log_mask(CPU_LOG_RESET, "Reset PowerPC system\n");
+ qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
+}
+
+void store_40x_dbcr0(CPUPPCState *env, uint32_t val)
+{
+ PowerPCCPU *cpu = env_archcpu(env);
+
+ qemu_mutex_lock_iothread();
+
+ switch ((val >> 28) & 0x3) {
+ case 0x0:
+ /* No action */
+ break;
+ case 0x1:
+ /* Core reset */
+ ppc40x_core_reset(cpu);
+ break;
+ case 0x2:
+ /* Chip reset */
+ ppc40x_chip_reset(cpu);
+ break;
+ case 0x3:
+ /* System reset */
+ ppc40x_system_reset(cpu);
+ break;
+ }
+
+ qemu_mutex_unlock_iothread();
+}
+
+/* PowerPC 40x internal IRQ controller */
+static void ppc40x_set_irq(void *opaque, int pin, int level)
+{
+ PowerPCCPU *cpu = opaque;
+ CPUPPCState *env = &cpu->env;
+ int cur_level;
+
+ trace_ppc_irq_set(env, pin, level);
+
+ cur_level = (env->irq_input_state >> pin) & 1;
+ /* Don't generate spurious events */
+ if ((cur_level == 1 && level == 0) || (cur_level == 0 && level != 0)) {
+ CPUState *cs = CPU(cpu);
+
+ switch (pin) {
+ case PPC40x_INPUT_RESET_SYS:
+ if (level) {
+ trace_ppc_irq_reset("system");
+ ppc40x_system_reset(cpu);
+ }
+ break;
+ case PPC40x_INPUT_RESET_CHIP:
+ if (level) {
+ trace_ppc_irq_reset("chip");
+ ppc40x_chip_reset(cpu);
+ }
+ break;
+ case PPC40x_INPUT_RESET_CORE:
+ /* XXX: TODO: update DBSR[MRR] */
+ if (level) {
+ trace_ppc_irq_reset("core");
+ ppc40x_core_reset(cpu);
+ }
+ break;
+ case PPC40x_INPUT_CINT:
+ /* Level sensitive - active high */
+ trace_ppc_irq_set_state("critical IRQ", level);
+ ppc_set_irq(cpu, PPC_INTERRUPT_CEXT, level);
+ break;
+ case PPC40x_INPUT_INT:
+ /* Level sensitive - active high */
+ trace_ppc_irq_set_state("external IRQ", level);
+ ppc_set_irq(cpu, PPC_INTERRUPT_EXT, level);
+ break;
+ case PPC40x_INPUT_HALT:
+ /* Level sensitive - active low */
+ if (level) {
+ trace_ppc_irq_cpu("stop");
+ cs->halted = 1;
+ } else {
+ trace_ppc_irq_cpu("restart");
+ cs->halted = 0;
+ qemu_cpu_kick(cs);
+ }
+ break;
+ case PPC40x_INPUT_DEBUG:
+ /* Level sensitive - active high */
+ trace_ppc_irq_set_state("debug pin", level);
+ ppc_set_irq(cpu, PPC_INTERRUPT_DEBUG, level);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ if (level)
+ env->irq_input_state |= 1 << pin;
+ else
+ env->irq_input_state &= ~(1 << pin);
+ }
+}
+
+void ppc40x_irq_init(PowerPCCPU *cpu)
+{
+ CPUPPCState *env = &cpu->env;
+
+ env->irq_inputs = (void **)qemu_allocate_irqs(&ppc40x_set_irq,
+ cpu, PPC40x_INPUT_NB);
+}
+
+/* PowerPC E500 internal IRQ controller */
+static void ppce500_set_irq(void *opaque, int pin, int level)
+{
+ PowerPCCPU *cpu = opaque;
+ CPUPPCState *env = &cpu->env;
+ int cur_level;
+
+ trace_ppc_irq_set(env, pin, level);
+
+ cur_level = (env->irq_input_state >> pin) & 1;
+ /* Don't generate spurious events */
+ if ((cur_level == 1 && level == 0) || (cur_level == 0 && level != 0)) {
+ switch (pin) {
+ case PPCE500_INPUT_MCK:
+ if (level) {
+ trace_ppc_irq_reset("system");
+ qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
+ }
+ break;
+ case PPCE500_INPUT_RESET_CORE:
+ if (level) {
+ trace_ppc_irq_reset("core");
+ ppc_set_irq(cpu, PPC_INTERRUPT_MCK, level);
+ }
+ break;
+ case PPCE500_INPUT_CINT:
+ /* Level sensitive - active high */
+ trace_ppc_irq_set_state("critical IRQ", level);
+ ppc_set_irq(cpu, PPC_INTERRUPT_CEXT, level);
+ break;
+ case PPCE500_INPUT_INT:
+ /* Level sensitive - active high */
+ trace_ppc_irq_set_state("core IRQ", level);
+ ppc_set_irq(cpu, PPC_INTERRUPT_EXT, level);
+ break;
+ case PPCE500_INPUT_DEBUG:
+ /* Level sensitive - active high */
+ trace_ppc_irq_set_state("debug pin", level);
+ ppc_set_irq(cpu, PPC_INTERRUPT_DEBUG, level);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ if (level)
+ env->irq_input_state |= 1 << pin;
+ else
+ env->irq_input_state &= ~(1 << pin);
+ }
+}
+
+void ppce500_irq_init(PowerPCCPU *cpu)
+{
+ CPUPPCState *env = &cpu->env;
+
+ env->irq_inputs = (void **)qemu_allocate_irqs(&ppce500_set_irq,
+ cpu, PPCE500_INPUT_NB);
+}
+
+/* Enable or Disable the E500 EPR capability */
+void ppce500_set_mpic_proxy(bool enabled)
+{
+ CPUState *cs;
+
+ CPU_FOREACH(cs) {
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
+
+ cpu->env.mpic_proxy = enabled;
+ if (kvm_enabled()) {
+ kvmppc_set_mpic_proxy(cpu, enabled);
+ }
+ }
+}
+
+/*****************************************************************************/
+/* PowerPC time base and decrementer emulation */
+
+uint64_t cpu_ppc_get_tb(ppc_tb_t *tb_env, uint64_t vmclk, int64_t tb_offset)
+{
+ /* TB time in tb periods */
+ return muldiv64(vmclk, tb_env->tb_freq, NANOSECONDS_PER_SECOND) + tb_offset;
+}
+
+uint64_t cpu_ppc_load_tbl (CPUPPCState *env)
+{
+ ppc_tb_t *tb_env = env->tb_env;
+ uint64_t tb;
+
+ if (kvm_enabled()) {
+ return env->spr[SPR_TBL];
+ }
+
+ tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->tb_offset);
+ trace_ppc_tb_load(tb);
+
+ return tb;
+}
+
+static inline uint32_t _cpu_ppc_load_tbu(CPUPPCState *env)
+{
+ ppc_tb_t *tb_env = env->tb_env;
+ uint64_t tb;
+
+ tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->tb_offset);
+ trace_ppc_tb_load(tb);
+
+ return tb >> 32;
+}
+
+uint32_t cpu_ppc_load_tbu (CPUPPCState *env)
+{
+ if (kvm_enabled()) {
+ return env->spr[SPR_TBU];
+ }
+
+ return _cpu_ppc_load_tbu(env);
+}
+
+static inline void cpu_ppc_store_tb(ppc_tb_t *tb_env, uint64_t vmclk,
+ int64_t *tb_offsetp, uint64_t value)
+{
+ *tb_offsetp = value -
+ muldiv64(vmclk, tb_env->tb_freq, NANOSECONDS_PER_SECOND);
+
+ trace_ppc_tb_store(value, *tb_offsetp);
+}
+
+void cpu_ppc_store_tbl (CPUPPCState *env, uint32_t value)
+{
+ ppc_tb_t *tb_env = env->tb_env;
+ uint64_t tb;
+
+ tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->tb_offset);
+ tb &= 0xFFFFFFFF00000000ULL;
+ cpu_ppc_store_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
+ &tb_env->tb_offset, tb | (uint64_t)value);
+}
+
+static inline void _cpu_ppc_store_tbu(CPUPPCState *env, uint32_t value)
+{
+ ppc_tb_t *tb_env = env->tb_env;
+ uint64_t tb;
+
+ tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->tb_offset);
+ tb &= 0x00000000FFFFFFFFULL;
+ cpu_ppc_store_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
+ &tb_env->tb_offset, ((uint64_t)value << 32) | tb);
+}
+
+void cpu_ppc_store_tbu (CPUPPCState *env, uint32_t value)
+{
+ _cpu_ppc_store_tbu(env, value);
+}
+
+uint64_t cpu_ppc_load_atbl (CPUPPCState *env)
+{
+ ppc_tb_t *tb_env = env->tb_env;
+ uint64_t tb;
+
+ tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->atb_offset);
+ trace_ppc_tb_load(tb);
+
+ return tb;
+}
+
+uint32_t cpu_ppc_load_atbu (CPUPPCState *env)
+{
+ ppc_tb_t *tb_env = env->tb_env;
+ uint64_t tb;
+
+ tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->atb_offset);
+ trace_ppc_tb_load(tb);
+
+ return tb >> 32;
+}
+
+void cpu_ppc_store_atbl (CPUPPCState *env, uint32_t value)
+{
+ ppc_tb_t *tb_env = env->tb_env;
+ uint64_t tb;
+
+ tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->atb_offset);
+ tb &= 0xFFFFFFFF00000000ULL;
+ cpu_ppc_store_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
+ &tb_env->atb_offset, tb | (uint64_t)value);
+}
+
+void cpu_ppc_store_atbu (CPUPPCState *env, uint32_t value)
+{
+ ppc_tb_t *tb_env = env->tb_env;
+ uint64_t tb;
+
+ tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->atb_offset);
+ tb &= 0x00000000FFFFFFFFULL;
+ cpu_ppc_store_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
+ &tb_env->atb_offset, ((uint64_t)value << 32) | tb);
+}
+
+uint64_t cpu_ppc_load_vtb(CPUPPCState *env)
+{
+ ppc_tb_t *tb_env = env->tb_env;
+
+ return cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
+ tb_env->vtb_offset);
+}
+
+void cpu_ppc_store_vtb(CPUPPCState *env, uint64_t value)
+{
+ ppc_tb_t *tb_env = env->tb_env;
+
+ cpu_ppc_store_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
+ &tb_env->vtb_offset, value);
+}
+
+void cpu_ppc_store_tbu40(CPUPPCState *env, uint64_t value)
+{
+ ppc_tb_t *tb_env = env->tb_env;
+ uint64_t tb;
+
+ tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
+ tb_env->tb_offset);
+ tb &= 0xFFFFFFUL;
+ tb |= (value & ~0xFFFFFFUL);
+ cpu_ppc_store_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
+ &tb_env->tb_offset, tb);
+}
+
+static void cpu_ppc_tb_stop (CPUPPCState *env)
+{
+ ppc_tb_t *tb_env = env->tb_env;
+ uint64_t tb, atb, vmclk;
+
+ /* If the time base is already frozen, do nothing */
+ if (tb_env->tb_freq != 0) {
+ vmclk = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+ /* Get the time base */
+ tb = cpu_ppc_get_tb(tb_env, vmclk, tb_env->tb_offset);
+ /* Get the alternate time base */
+ atb = cpu_ppc_get_tb(tb_env, vmclk, tb_env->atb_offset);
+ /* Store the time base value (ie compute the current offset) */
+ cpu_ppc_store_tb(tb_env, vmclk, &tb_env->tb_offset, tb);
+ /* Store the alternate time base value (compute the current offset) */
+ cpu_ppc_store_tb(tb_env, vmclk, &tb_env->atb_offset, atb);
+ /* Set the time base frequency to zero */
+ tb_env->tb_freq = 0;
+ /* Now, the time bases are frozen to tb_offset / atb_offset value */
+ }
+}
+
+static void cpu_ppc_tb_start (CPUPPCState *env)
+{
+ ppc_tb_t *tb_env = env->tb_env;
+ uint64_t tb, atb, vmclk;
+
+ /* If the time base is not frozen, do nothing */
+ if (tb_env->tb_freq == 0) {
+ vmclk = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+ /* Get the time base from tb_offset */
+ tb = tb_env->tb_offset;
+ /* Get the alternate time base from atb_offset */
+ atb = tb_env->atb_offset;
+ /* Restore the tb frequency from the decrementer frequency */
+ tb_env->tb_freq = tb_env->decr_freq;
+ /* Store the time base value */
+ cpu_ppc_store_tb(tb_env, vmclk, &tb_env->tb_offset, tb);
+ /* Store the alternate time base value */
+ cpu_ppc_store_tb(tb_env, vmclk, &tb_env->atb_offset, atb);
+ }
+}
+
+bool ppc_decr_clear_on_delivery(CPUPPCState *env)
+{
+ ppc_tb_t *tb_env = env->tb_env;
+ int flags = PPC_DECR_UNDERFLOW_TRIGGERED | PPC_DECR_UNDERFLOW_LEVEL;
+ return ((tb_env->flags & flags) == PPC_DECR_UNDERFLOW_TRIGGERED);
+}
+
+static inline int64_t _cpu_ppc_load_decr(CPUPPCState *env, uint64_t next)
+{
+ ppc_tb_t *tb_env = env->tb_env;
+ int64_t decr, diff;
+
+ diff = next - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+ if (diff >= 0) {
+ decr = muldiv64(diff, tb_env->decr_freq, NANOSECONDS_PER_SECOND);
+ } else if (tb_env->flags & PPC_TIMER_BOOKE) {
+ decr = 0;
+ } else {
+ decr = -muldiv64(-diff, tb_env->decr_freq, NANOSECONDS_PER_SECOND);
+ }
+ trace_ppc_decr_load(decr);
+
+ return decr;
+}
+
+target_ulong cpu_ppc_load_decr(CPUPPCState *env)
+{
+ ppc_tb_t *tb_env = env->tb_env;
+ uint64_t decr;
+
+ if (kvm_enabled()) {
+ return env->spr[SPR_DECR];
+ }
+
+ decr = _cpu_ppc_load_decr(env, tb_env->decr_next);
+
+ /*
+ * If large decrementer is enabled then the decrementer is signed extened
+ * to 64 bits, otherwise it is a 32 bit value.
+ */
+ if (env->spr[SPR_LPCR] & LPCR_LD) {
+ return decr;
+ }
+ return (uint32_t) decr;
+}
+
+target_ulong cpu_ppc_load_hdecr(CPUPPCState *env)
+{
+ PowerPCCPU *cpu = env_archcpu(env);
+ PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
+ ppc_tb_t *tb_env = env->tb_env;
+ uint64_t hdecr;
+
+ hdecr = _cpu_ppc_load_decr(env, tb_env->hdecr_next);
+
+ /*
+ * If we have a large decrementer (POWER9 or later) then hdecr is sign
+ * extended to 64 bits, otherwise it is 32 bits.
+ */
+ if (pcc->lrg_decr_bits > 32) {
+ return hdecr;
+ }
+ return (uint32_t) hdecr;
+}
+
+uint64_t cpu_ppc_load_purr (CPUPPCState *env)
+{
+ ppc_tb_t *tb_env = env->tb_env;
+
+ return cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
+ tb_env->purr_offset);
+}
+
+/* When decrementer expires,
+ * all we need to do is generate or queue a CPU exception
+ */
+static inline void cpu_ppc_decr_excp(PowerPCCPU *cpu)
+{
+ /* Raise it */
+ trace_ppc_decr_excp("raise");
+ ppc_set_irq(cpu, PPC_INTERRUPT_DECR, 1);
+}
+
+static inline void cpu_ppc_decr_lower(PowerPCCPU *cpu)
+{
+ ppc_set_irq(cpu, PPC_INTERRUPT_DECR, 0);
+}
+
+static inline void cpu_ppc_hdecr_excp(PowerPCCPU *cpu)
+{
+ CPUPPCState *env = &cpu->env;
+
+ /* Raise it */
+ trace_ppc_decr_excp("raise HV");
+
+ /* The architecture specifies that we don't deliver HDEC
+ * interrupts in a PM state. Not only they don't cause a
+ * wakeup but they also get effectively discarded.
+ */
+ if (!env->resume_as_sreset) {
+ ppc_set_irq(cpu, PPC_INTERRUPT_HDECR, 1);
+ }
+}
+
+static inline void cpu_ppc_hdecr_lower(PowerPCCPU *cpu)
+{
+ ppc_set_irq(cpu, PPC_INTERRUPT_HDECR, 0);
+}
+
+static void __cpu_ppc_store_decr(PowerPCCPU *cpu, uint64_t *nextp,
+ QEMUTimer *timer,
+ void (*raise_excp)(void *),
+ void (*lower_excp)(PowerPCCPU *),
+ target_ulong decr, target_ulong value,
+ int nr_bits)
+{
+ CPUPPCState *env = &cpu->env;
+ ppc_tb_t *tb_env = env->tb_env;
+ uint64_t now, next;
+ int64_t signed_value;
+ int64_t signed_decr;
+
+ /* Truncate value to decr_width and sign extend for simplicity */
+ signed_value = sextract64(value, 0, nr_bits);
+ signed_decr = sextract64(decr, 0, nr_bits);
+
+ trace_ppc_decr_store(nr_bits, decr, value);
+
+ if (kvm_enabled()) {
+ /* KVM handles decrementer exceptions, we don't need our own timer */
+ return;
+ }
+
+ /*
+ * Going from 2 -> 1, 1 -> 0 or 0 -> -1 is the event to generate a DEC
+ * interrupt.
+ *
+ * If we get a really small DEC value, we can assume that by the time we
+ * handled it we should inject an interrupt already.
+ *
+ * On MSB level based DEC implementations the MSB always means the interrupt
+ * is pending, so raise it on those.
+ *
+ * On MSB edge based DEC implementations the MSB going from 0 -> 1 triggers
+ * an edge interrupt, so raise it here too.
+ */
+ if ((value < 3) ||
+ ((tb_env->flags & PPC_DECR_UNDERFLOW_LEVEL) && signed_value < 0) ||
+ ((tb_env->flags & PPC_DECR_UNDERFLOW_TRIGGERED) && signed_value < 0
+ && signed_decr >= 0)) {
+ (*raise_excp)(cpu);
+ return;
+ }
+
+ /* On MSB level based systems a 0 for the MSB stops interrupt delivery */
+ if (signed_value >= 0 && (tb_env->flags & PPC_DECR_UNDERFLOW_LEVEL)) {
+ (*lower_excp)(cpu);
+ }
+
+ /* Calculate the next timer event */
+ now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+ next = now + muldiv64(value, NANOSECONDS_PER_SECOND, tb_env->decr_freq);
+ *nextp = next;
+
+ /* Adjust timer */
+ timer_mod(timer, next);
+}
+
+static inline void _cpu_ppc_store_decr(PowerPCCPU *cpu, target_ulong decr,
+ target_ulong value, int nr_bits)
+{
+ ppc_tb_t *tb_env = cpu->env.tb_env;
+
+ __cpu_ppc_store_decr(cpu, &tb_env->decr_next, tb_env->decr_timer,
+ tb_env->decr_timer->cb, &cpu_ppc_decr_lower, decr,
+ value, nr_bits);
+}
+
+void cpu_ppc_store_decr(CPUPPCState *env, target_ulong value)
+{
+ PowerPCCPU *cpu = env_archcpu(env);
+ PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
+ int nr_bits = 32;
+
+ if (env->spr[SPR_LPCR] & LPCR_LD) {
+ nr_bits = pcc->lrg_decr_bits;
+ }
+
+ _cpu_ppc_store_decr(cpu, cpu_ppc_load_decr(env), value, nr_bits);
+}
+
+static void cpu_ppc_decr_cb(void *opaque)
+{
+ PowerPCCPU *cpu = opaque;
+
+ cpu_ppc_decr_excp(cpu);
+}
+
+static inline void _cpu_ppc_store_hdecr(PowerPCCPU *cpu, target_ulong hdecr,
+ target_ulong value, int nr_bits)
+{
+ ppc_tb_t *tb_env = cpu->env.tb_env;
+
+ if (tb_env->hdecr_timer != NULL) {
+ __cpu_ppc_store_decr(cpu, &tb_env->hdecr_next, tb_env->hdecr_timer,
+ tb_env->hdecr_timer->cb, &cpu_ppc_hdecr_lower,
+ hdecr, value, nr_bits);
+ }
+}
+
+void cpu_ppc_store_hdecr(CPUPPCState *env, target_ulong value)
+{
+ PowerPCCPU *cpu = env_archcpu(env);
+ PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
+
+ _cpu_ppc_store_hdecr(cpu, cpu_ppc_load_hdecr(env), value,
+ pcc->lrg_decr_bits);
+}
+
+static void cpu_ppc_hdecr_cb(void *opaque)
+{
+ PowerPCCPU *cpu = opaque;
+
+ cpu_ppc_hdecr_excp(cpu);
+}
+
+void cpu_ppc_store_purr(CPUPPCState *env, uint64_t value)
+{
+ ppc_tb_t *tb_env = env->tb_env;
+
+ cpu_ppc_store_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
+ &tb_env->purr_offset, value);
+}
+
+static void cpu_ppc_set_tb_clk (void *opaque, uint32_t freq)
+{
+ CPUPPCState *env = opaque;
+ PowerPCCPU *cpu = env_archcpu(env);
+ ppc_tb_t *tb_env = env->tb_env;
+
+ tb_env->tb_freq = freq;
+ tb_env->decr_freq = freq;
+ /* There is a bug in Linux 2.4 kernels:
+ * if a decrementer exception is pending when it enables msr_ee at startup,
+ * it's not ready to handle it...
+ */
+ _cpu_ppc_store_decr(cpu, 0xFFFFFFFF, 0xFFFFFFFF, 32);
+ _cpu_ppc_store_hdecr(cpu, 0xFFFFFFFF, 0xFFFFFFFF, 32);
+ cpu_ppc_store_purr(env, 0x0000000000000000ULL);
+}
+
+static void timebase_save(PPCTimebase *tb)
+{
+ uint64_t ticks = cpu_get_host_ticks();
+ PowerPCCPU *first_ppc_cpu = POWERPC_CPU(first_cpu);
+
+ if (!first_ppc_cpu->env.tb_env) {
+ error_report("No timebase object");
+ return;
+ }
+
+ /* not used anymore, we keep it for compatibility */
+ tb->time_of_the_day_ns = qemu_clock_get_ns(QEMU_CLOCK_HOST);
+ /*
+ * tb_offset is only expected to be changed by QEMU so
+ * there is no need to update it from KVM here
+ */
+ tb->guest_timebase = ticks + first_ppc_cpu->env.tb_env->tb_offset;
+
+ tb->runstate_paused =
+ runstate_check(RUN_STATE_PAUSED) || runstate_check(RUN_STATE_SAVE_VM);
+}
+
+static void timebase_load(PPCTimebase *tb)
+{
+ CPUState *cpu;
+ PowerPCCPU *first_ppc_cpu = POWERPC_CPU(first_cpu);
+ int64_t tb_off_adj, tb_off;
+ unsigned long freq;
+
+ if (!first_ppc_cpu->env.tb_env) {
+ error_report("No timebase object");
+ return;
+ }
+
+ freq = first_ppc_cpu->env.tb_env->tb_freq;
+
+ tb_off_adj = tb->guest_timebase - cpu_get_host_ticks();
+
+ tb_off = first_ppc_cpu->env.tb_env->tb_offset;
+ trace_ppc_tb_adjust(tb_off, tb_off_adj, tb_off_adj - tb_off,
+ (tb_off_adj - tb_off) / freq);
+
+ /* Set new offset to all CPUs */
+ CPU_FOREACH(cpu) {
+ PowerPCCPU *pcpu = POWERPC_CPU(cpu);
+ pcpu->env.tb_env->tb_offset = tb_off_adj;
+ kvmppc_set_reg_tb_offset(pcpu, pcpu->env.tb_env->tb_offset);
+ }
+}
+
+void cpu_ppc_clock_vm_state_change(void *opaque, bool running,
+ RunState state)
+{
+ PPCTimebase *tb = opaque;
+
+ if (running) {
+ timebase_load(tb);
+ } else {
+ timebase_save(tb);
+ }
+}
+
+/*
+ * When migrating a running guest, read the clock just
+ * before migration, so that the guest clock counts
+ * during the events between:
+ *
+ * * vm_stop()
+ * *
+ * * pre_save()
+ *
+ * This reduces clock difference on migration from 5s
+ * to 0.1s (when max_downtime == 5s), because sending the
+ * final pages of memory (which happens between vm_stop()
+ * and pre_save()) takes max_downtime.
+ */
+static int timebase_pre_save(void *opaque)
+{
+ PPCTimebase *tb = opaque;
+
+ /* guest_timebase won't be overridden in case of paused guest or savevm */
+ if (!tb->runstate_paused) {
+ timebase_save(tb);
+ }
+
+ return 0;
+}
+
+const VMStateDescription vmstate_ppc_timebase = {
+ .name = "timebase",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .minimum_version_id_old = 1,
+ .pre_save = timebase_pre_save,
+ .fields = (VMStateField []) {
+ VMSTATE_UINT64(guest_timebase, PPCTimebase),
+ VMSTATE_INT64(time_of_the_day_ns, PPCTimebase),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+/* Set up (once) timebase frequency (in Hz) */
+clk_setup_cb cpu_ppc_tb_init (CPUPPCState *env, uint32_t freq)
+{
+ PowerPCCPU *cpu = env_archcpu(env);
+ ppc_tb_t *tb_env;
+
+ tb_env = g_malloc0(sizeof(ppc_tb_t));
+ env->tb_env = tb_env;
+ tb_env->flags = PPC_DECR_UNDERFLOW_TRIGGERED;
+ if (is_book3s_arch2x(env)) {
+ /* All Book3S 64bit CPUs implement level based DEC logic */
+ tb_env->flags |= PPC_DECR_UNDERFLOW_LEVEL;
+ }
+ /* Create new timer */
+ tb_env->decr_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, &cpu_ppc_decr_cb, cpu);
+ if (env->has_hv_mode) {
+ tb_env->hdecr_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, &cpu_ppc_hdecr_cb,
+ cpu);
+ } else {
+ tb_env->hdecr_timer = NULL;
+ }
+ cpu_ppc_set_tb_clk(env, freq);
+
+ return &cpu_ppc_set_tb_clk;
+}
+
+/* Specific helpers for POWER & PowerPC 601 RTC */
+void cpu_ppc601_store_rtcu (CPUPPCState *env, uint32_t value)
+{
+ _cpu_ppc_store_tbu(env, value);
+}
+
+uint32_t cpu_ppc601_load_rtcu (CPUPPCState *env)
+{
+ return _cpu_ppc_load_tbu(env);
+}
+
+void cpu_ppc601_store_rtcl (CPUPPCState *env, uint32_t value)
+{
+ cpu_ppc_store_tbl(env, value & 0x3FFFFF80);
+}
+
+uint32_t cpu_ppc601_load_rtcl (CPUPPCState *env)
+{
+ return cpu_ppc_load_tbl(env) & 0x3FFFFF80;
+}
+
+/*****************************************************************************/
+/* PowerPC 40x timers */
+
+/* PIT, FIT & WDT */
+typedef struct ppc40x_timer_t ppc40x_timer_t;
+struct ppc40x_timer_t {
+ uint64_t pit_reload; /* PIT auto-reload value */
+ uint64_t fit_next; /* Tick for next FIT interrupt */
+ QEMUTimer *fit_timer;
+ uint64_t wdt_next; /* Tick for next WDT interrupt */
+ QEMUTimer *wdt_timer;
+
+ /* 405 have the PIT, 440 have a DECR. */
+ unsigned int decr_excp;
+};
+
+/* Fixed interval timer */
+static void cpu_4xx_fit_cb (void *opaque)
+{
+ PowerPCCPU *cpu;
+ CPUPPCState *env;
+ ppc_tb_t *tb_env;
+ ppc40x_timer_t *ppc40x_timer;
+ uint64_t now, next;
+
+ env = opaque;
+ cpu = env_archcpu(env);
+ tb_env = env->tb_env;
+ ppc40x_timer = tb_env->opaque;
+ now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+ switch ((env->spr[SPR_40x_TCR] >> 24) & 0x3) {
+ case 0:
+ next = 1 << 9;
+ break;
+ case 1:
+ next = 1 << 13;
+ break;
+ case 2:
+ next = 1 << 17;
+ break;
+ case 3:
+ next = 1 << 21;
+ break;
+ default:
+ /* Cannot occur, but makes gcc happy */
+ return;
+ }
+ next = now + muldiv64(next, NANOSECONDS_PER_SECOND, tb_env->tb_freq);
+ if (next == now)
+ next++;
+ timer_mod(ppc40x_timer->fit_timer, next);
+ env->spr[SPR_40x_TSR] |= 1 << 26;
+ if ((env->spr[SPR_40x_TCR] >> 23) & 0x1) {
+ ppc_set_irq(cpu, PPC_INTERRUPT_FIT, 1);
+ }
+ trace_ppc4xx_fit((int)((env->spr[SPR_40x_TCR] >> 23) & 0x1),
+ env->spr[SPR_40x_TCR], env->spr[SPR_40x_TSR]);
+}
+
+/* Programmable interval timer */
+static void start_stop_pit (CPUPPCState *env, ppc_tb_t *tb_env, int is_excp)
+{
+ ppc40x_timer_t *ppc40x_timer;
+ uint64_t now, next;
+
+ ppc40x_timer = tb_env->opaque;
+ if (ppc40x_timer->pit_reload <= 1 ||
+ !((env->spr[SPR_40x_TCR] >> 26) & 0x1) ||
+ (is_excp && !((env->spr[SPR_40x_TCR] >> 22) & 0x1))) {
+ /* Stop PIT */
+ trace_ppc4xx_pit_stop();
+ timer_del(tb_env->decr_timer);
+ } else {
+ trace_ppc4xx_pit_start(ppc40x_timer->pit_reload);
+ now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+ next = now + muldiv64(ppc40x_timer->pit_reload,
+ NANOSECONDS_PER_SECOND, tb_env->decr_freq);
+ if (is_excp)
+ next += tb_env->decr_next - now;
+ if (next == now)
+ next++;
+ timer_mod(tb_env->decr_timer, next);
+ tb_env->decr_next = next;
+ }
+}
+
+static void cpu_4xx_pit_cb (void *opaque)
+{
+ PowerPCCPU *cpu;
+ CPUPPCState *env;
+ ppc_tb_t *tb_env;
+ ppc40x_timer_t *ppc40x_timer;
+
+ env = opaque;
+ cpu = env_archcpu(env);
+ tb_env = env->tb_env;
+ ppc40x_timer = tb_env->opaque;
+ env->spr[SPR_40x_TSR] |= 1 << 27;
+ if ((env->spr[SPR_40x_TCR] >> 26) & 0x1) {
+ ppc_set_irq(cpu, ppc40x_timer->decr_excp, 1);
+ }
+ start_stop_pit(env, tb_env, 1);
+ trace_ppc4xx_pit((int)((env->spr[SPR_40x_TCR] >> 22) & 0x1),
+ (int)((env->spr[SPR_40x_TCR] >> 26) & 0x1),
+ env->spr[SPR_40x_TCR], env->spr[SPR_40x_TSR],
+ ppc40x_timer->pit_reload);
+}
+
+/* Watchdog timer */
+static void cpu_4xx_wdt_cb (void *opaque)
+{
+ PowerPCCPU *cpu;
+ CPUPPCState *env;
+ ppc_tb_t *tb_env;
+ ppc40x_timer_t *ppc40x_timer;
+ uint64_t now, next;
+
+ env = opaque;
+ cpu = env_archcpu(env);
+ tb_env = env->tb_env;
+ ppc40x_timer = tb_env->opaque;
+ now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+ switch ((env->spr[SPR_40x_TCR] >> 30) & 0x3) {
+ case 0:
+ next = 1 << 17;
+ break;
+ case 1:
+ next = 1 << 21;
+ break;
+ case 2:
+ next = 1 << 25;
+ break;
+ case 3:
+ next = 1 << 29;
+ break;
+ default:
+ /* Cannot occur, but makes gcc happy */
+ return;
+ }
+ next = now + muldiv64(next, NANOSECONDS_PER_SECOND, tb_env->decr_freq);
+ if (next == now)
+ next++;
+ trace_ppc4xx_wdt(env->spr[SPR_40x_TCR], env->spr[SPR_40x_TSR]);
+ switch ((env->spr[SPR_40x_TSR] >> 30) & 0x3) {
+ case 0x0:
+ case 0x1:
+ timer_mod(ppc40x_timer->wdt_timer, next);
+ ppc40x_timer->wdt_next = next;
+ env->spr[SPR_40x_TSR] |= 1U << 31;
+ break;
+ case 0x2:
+ timer_mod(ppc40x_timer->wdt_timer, next);
+ ppc40x_timer->wdt_next = next;
+ env->spr[SPR_40x_TSR] |= 1 << 30;
+ if ((env->spr[SPR_40x_TCR] >> 27) & 0x1) {
+ ppc_set_irq(cpu, PPC_INTERRUPT_WDT, 1);
+ }
+ break;
+ case 0x3:
+ env->spr[SPR_40x_TSR] &= ~0x30000000;
+ env->spr[SPR_40x_TSR] |= env->spr[SPR_40x_TCR] & 0x30000000;
+ switch ((env->spr[SPR_40x_TCR] >> 28) & 0x3) {
+ case 0x0:
+ /* No reset */
+ break;
+ case 0x1: /* Core reset */
+ ppc40x_core_reset(cpu);
+ break;
+ case 0x2: /* Chip reset */
+ ppc40x_chip_reset(cpu);
+ break;
+ case 0x3: /* System reset */
+ ppc40x_system_reset(cpu);
+ break;
+ }
+ }
+}
+
+void store_40x_pit (CPUPPCState *env, target_ulong val)
+{
+ ppc_tb_t *tb_env;
+ ppc40x_timer_t *ppc40x_timer;
+
+ tb_env = env->tb_env;
+ ppc40x_timer = tb_env->opaque;
+ trace_ppc40x_store_pit(val);
+ ppc40x_timer->pit_reload = val;
+ start_stop_pit(env, tb_env, 0);
+}
+
+target_ulong load_40x_pit (CPUPPCState *env)
+{
+ return cpu_ppc_load_decr(env);
+}
+
+static void ppc_40x_set_tb_clk (void *opaque, uint32_t freq)
+{
+ CPUPPCState *env = opaque;
+ ppc_tb_t *tb_env = env->tb_env;
+
+ trace_ppc40x_set_tb_clk(freq);
+ tb_env->tb_freq = freq;
+ tb_env->decr_freq = freq;
+ /* XXX: we should also update all timers */
+}
+
+clk_setup_cb ppc_40x_timers_init (CPUPPCState *env, uint32_t freq,
+ unsigned int decr_excp)
+{
+ ppc_tb_t *tb_env;
+ ppc40x_timer_t *ppc40x_timer;
+
+ tb_env = g_malloc0(sizeof(ppc_tb_t));
+ env->tb_env = tb_env;
+ tb_env->flags = PPC_DECR_UNDERFLOW_TRIGGERED;
+ ppc40x_timer = g_malloc0(sizeof(ppc40x_timer_t));
+ tb_env->tb_freq = freq;
+ tb_env->decr_freq = freq;
+ tb_env->opaque = ppc40x_timer;
+ trace_ppc40x_timers_init(freq);
+ if (ppc40x_timer != NULL) {
+ /* We use decr timer for PIT */
+ tb_env->decr_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, &cpu_4xx_pit_cb, env);
+ ppc40x_timer->fit_timer =
+ timer_new_ns(QEMU_CLOCK_VIRTUAL, &cpu_4xx_fit_cb, env);
+ ppc40x_timer->wdt_timer =
+ timer_new_ns(QEMU_CLOCK_VIRTUAL, &cpu_4xx_wdt_cb, env);
+ ppc40x_timer->decr_excp = decr_excp;
+ }
+
+ return &ppc_40x_set_tb_clk;
+}
+
+/*****************************************************************************/
+/* Embedded PowerPC Device Control Registers */
+typedef struct ppc_dcrn_t ppc_dcrn_t;
+struct ppc_dcrn_t {
+ dcr_read_cb dcr_read;
+ dcr_write_cb dcr_write;
+ void *opaque;
+};
+
+/* XXX: on 460, DCR addresses are 32 bits wide,
+ * using DCRIPR to get the 22 upper bits of the DCR address
+ */
+#define DCRN_NB 1024
+struct ppc_dcr_t {
+ ppc_dcrn_t dcrn[DCRN_NB];
+ int (*read_error)(int dcrn);
+ int (*write_error)(int dcrn);
+};
+
+int ppc_dcr_read (ppc_dcr_t *dcr_env, int dcrn, uint32_t *valp)
+{
+ ppc_dcrn_t *dcr;
+
+ if (dcrn < 0 || dcrn >= DCRN_NB)
+ goto error;
+ dcr = &dcr_env->dcrn[dcrn];
+ if (dcr->dcr_read == NULL)
+ goto error;
+ *valp = (*dcr->dcr_read)(dcr->opaque, dcrn);
+
+ return 0;
+
+ error:
+ if (dcr_env->read_error != NULL)
+ return (*dcr_env->read_error)(dcrn);
+
+ return -1;
+}
+
+int ppc_dcr_write (ppc_dcr_t *dcr_env, int dcrn, uint32_t val)
+{
+ ppc_dcrn_t *dcr;
+
+ if (dcrn < 0 || dcrn >= DCRN_NB)
+ goto error;
+ dcr = &dcr_env->dcrn[dcrn];
+ if (dcr->dcr_write == NULL)
+ goto error;
+ (*dcr->dcr_write)(dcr->opaque, dcrn, val);
+
+ return 0;
+
+ error:
+ if (dcr_env->write_error != NULL)
+ return (*dcr_env->write_error)(dcrn);
+
+ return -1;
+}
+
+int ppc_dcr_register (CPUPPCState *env, int dcrn, void *opaque,
+ dcr_read_cb dcr_read, dcr_write_cb dcr_write)
+{
+ ppc_dcr_t *dcr_env;
+ ppc_dcrn_t *dcr;
+
+ dcr_env = env->dcr_env;
+ if (dcr_env == NULL)
+ return -1;
+ if (dcrn < 0 || dcrn >= DCRN_NB)
+ return -1;
+ dcr = &dcr_env->dcrn[dcrn];
+ if (dcr->opaque != NULL ||
+ dcr->dcr_read != NULL ||
+ dcr->dcr_write != NULL)
+ return -1;
+ dcr->opaque = opaque;
+ dcr->dcr_read = dcr_read;
+ dcr->dcr_write = dcr_write;
+
+ return 0;
+}
+
+int ppc_dcr_init (CPUPPCState *env, int (*read_error)(int dcrn),
+ int (*write_error)(int dcrn))
+{
+ ppc_dcr_t *dcr_env;
+
+ dcr_env = g_malloc0(sizeof(ppc_dcr_t));
+ dcr_env->read_error = read_error;
+ dcr_env->write_error = write_error;
+ env->dcr_env = dcr_env;
+
+ return 0;
+}
+
+/*****************************************************************************/
+
+int ppc_cpu_pir(PowerPCCPU *cpu)
+{
+ CPUPPCState *env = &cpu->env;
+ return env->spr_cb[SPR_PIR].default_value;
+}
+
+PowerPCCPU *ppc_get_vcpu_by_pir(int pir)
+{
+ CPUState *cs;
+
+ CPU_FOREACH(cs) {
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
+
+ if (ppc_cpu_pir(cpu) == pir) {
+ return cpu;
+ }
+ }
+
+ return NULL;
+}
+
+void ppc_irq_reset(PowerPCCPU *cpu)
+{
+ CPUPPCState *env = &cpu->env;
+
+ env->irq_input_state = 0;
+ kvmppc_set_interrupt(cpu, PPC_INTERRUPT_EXT, 0);
+}
diff --git a/hw/ppc/ppc405.h b/hw/ppc/ppc405.h
new file mode 100644
index 000000000..c58f73988
--- /dev/null
+++ b/hw/ppc/ppc405.h
@@ -0,0 +1,72 @@
+/*
+ * QEMU PowerPC 405 shared definitions
+ *
+ * Copyright (c) 2007 Jocelyn Mayer
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef PPC405_H
+#define PPC405_H
+
+#include "hw/ppc/ppc4xx.h"
+
+/* Bootinfo as set-up by u-boot */
+typedef struct ppc4xx_bd_info_t ppc4xx_bd_info_t;
+struct ppc4xx_bd_info_t {
+ uint32_t bi_memstart;
+ uint32_t bi_memsize;
+ uint32_t bi_flashstart;
+ uint32_t bi_flashsize;
+ uint32_t bi_flashoffset; /* 0x10 */
+ uint32_t bi_sramstart;
+ uint32_t bi_sramsize;
+ uint32_t bi_bootflags;
+ uint32_t bi_ipaddr; /* 0x20 */
+ uint8_t bi_enetaddr[6];
+ uint16_t bi_ethspeed;
+ uint32_t bi_intfreq;
+ uint32_t bi_busfreq; /* 0x30 */
+ uint32_t bi_baudrate;
+ uint8_t bi_s_version[4];
+ uint8_t bi_r_version[32];
+ uint32_t bi_procfreq;
+ uint32_t bi_plb_busfreq;
+ uint32_t bi_pci_busfreq;
+ uint8_t bi_pci_enetaddr[6];
+ uint32_t bi_pci_enetaddr2[6];
+ uint32_t bi_opbfreq;
+ uint32_t bi_iic_fast[2];
+};
+
+/* PowerPC 405 core */
+ram_addr_t ppc405_set_bootinfo (CPUPPCState *env, ppc4xx_bd_info_t *bd,
+ uint32_t flags);
+
+void ppc4xx_plb_init(CPUPPCState *env);
+void ppc405_ebc_init(CPUPPCState *env);
+
+CPUPPCState *ppc405ep_init(MemoryRegion *address_space_mem,
+ MemoryRegion ram_memories[2],
+ hwaddr ram_bases[2],
+ hwaddr ram_sizes[2],
+ uint32_t sysclk, DeviceState **uicdev,
+ int do_init);
+
+#endif /* PPC405_H */
diff --git a/hw/ppc/ppc405_boards.c b/hw/ppc/ppc405_boards.c
new file mode 100644
index 000000000..972a7a4a3
--- /dev/null
+++ b/hw/ppc/ppc405_boards.c
@@ -0,0 +1,564 @@
+/*
+ * QEMU PowerPC 405 evaluation boards emulation
+ *
+ * Copyright (c) 2007 Jocelyn Mayer
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include "qapi/error.h"
+#include "qemu-common.h"
+#include "qemu/datadir.h"
+#include "cpu.h"
+#include "hw/ppc/ppc.h"
+#include "hw/qdev-properties.h"
+#include "hw/sysbus.h"
+#include "ppc405.h"
+#include "hw/rtc/m48t59.h"
+#include "hw/block/flash.h"
+#include "sysemu/qtest.h"
+#include "sysemu/reset.h"
+#include "sysemu/block-backend.h"
+#include "hw/boards.h"
+#include "qemu/error-report.h"
+#include "hw/loader.h"
+#include "qemu/cutils.h"
+
+#define BIOS_FILENAME "ppc405_rom.bin"
+#define BIOS_SIZE (2 * MiB)
+
+#define KERNEL_LOAD_ADDR 0x00000000
+#define INITRD_LOAD_ADDR 0x01800000
+
+#define USE_FLASH_BIOS
+
+/*****************************************************************************/
+/* PPC405EP reference board (IBM) */
+/* Standalone board with:
+ * - PowerPC 405EP CPU
+ * - SDRAM (0x00000000)
+ * - Flash (0xFFF80000)
+ * - SRAM (0xFFF00000)
+ * - NVRAM (0xF0000000)
+ * - FPGA (0xF0300000)
+ */
+typedef struct ref405ep_fpga_t ref405ep_fpga_t;
+struct ref405ep_fpga_t {
+ uint8_t reg0;
+ uint8_t reg1;
+};
+
+static uint64_t ref405ep_fpga_readb(void *opaque, hwaddr addr, unsigned size)
+{
+ ref405ep_fpga_t *fpga;
+ uint32_t ret;
+
+ fpga = opaque;
+ switch (addr) {
+ case 0x0:
+ ret = fpga->reg0;
+ break;
+ case 0x1:
+ ret = fpga->reg1;
+ break;
+ default:
+ ret = 0;
+ break;
+ }
+
+ return ret;
+}
+
+static void ref405ep_fpga_writeb(void *opaque, hwaddr addr, uint64_t value,
+ unsigned size)
+{
+ ref405ep_fpga_t *fpga;
+
+ fpga = opaque;
+ switch (addr) {
+ case 0x0:
+ /* Read only */
+ break;
+ case 0x1:
+ fpga->reg1 = value;
+ break;
+ default:
+ break;
+ }
+}
+
+static const MemoryRegionOps ref405ep_fpga_ops = {
+ .read = ref405ep_fpga_readb,
+ .write = ref405ep_fpga_writeb,
+ .impl.min_access_size = 1,
+ .impl.max_access_size = 1,
+ .valid.min_access_size = 1,
+ .valid.max_access_size = 4,
+ .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static void ref405ep_fpga_reset (void *opaque)
+{
+ ref405ep_fpga_t *fpga;
+
+ fpga = opaque;
+ fpga->reg0 = 0x00;
+ fpga->reg1 = 0x0F;
+}
+
+static void ref405ep_fpga_init(MemoryRegion *sysmem, uint32_t base)
+{
+ ref405ep_fpga_t *fpga;
+ MemoryRegion *fpga_memory = g_new(MemoryRegion, 1);
+
+ fpga = g_malloc0(sizeof(ref405ep_fpga_t));
+ memory_region_init_io(fpga_memory, NULL, &ref405ep_fpga_ops, fpga,
+ "fpga", 0x00000100);
+ memory_region_add_subregion(sysmem, base, fpga_memory);
+ qemu_register_reset(&ref405ep_fpga_reset, fpga);
+}
+
+static void ref405ep_init(MachineState *machine)
+{
+ MachineClass *mc = MACHINE_GET_CLASS(machine);
+ const char *bios_name = machine->firmware ?: BIOS_FILENAME;
+ const char *kernel_filename = machine->kernel_filename;
+ const char *kernel_cmdline = machine->kernel_cmdline;
+ const char *initrd_filename = machine->initrd_filename;
+ char *filename;
+ ppc4xx_bd_info_t bd;
+ CPUPPCState *env;
+ DeviceState *dev;
+ SysBusDevice *s;
+ MemoryRegion *bios;
+ MemoryRegion *sram = g_new(MemoryRegion, 1);
+ ram_addr_t bdloc;
+ MemoryRegion *ram_memories = g_new(MemoryRegion, 2);
+ hwaddr ram_bases[2], ram_sizes[2];
+ target_ulong sram_size;
+ long bios_size;
+ //int phy_addr = 0;
+ //static int phy_addr = 1;
+ target_ulong kernel_base, initrd_base;
+ long kernel_size, initrd_size;
+ int linux_boot;
+ int len;
+ DriveInfo *dinfo;
+ MemoryRegion *sysmem = get_system_memory();
+ DeviceState *uicdev;
+
+ if (machine->ram_size != mc->default_ram_size) {
+ char *sz = size_to_str(mc->default_ram_size);
+ error_report("Invalid RAM size, should be %s", sz);
+ g_free(sz);
+ exit(EXIT_FAILURE);
+ }
+
+ /* XXX: fix this */
+ memory_region_init_alias(&ram_memories[0], NULL, "ef405ep.ram.alias",
+ machine->ram, 0, machine->ram_size);
+ ram_bases[0] = 0;
+ ram_sizes[0] = machine->ram_size;
+ memory_region_init(&ram_memories[1], NULL, "ef405ep.ram1", 0);
+ ram_bases[1] = 0x00000000;
+ ram_sizes[1] = 0x00000000;
+ env = ppc405ep_init(sysmem, ram_memories, ram_bases, ram_sizes,
+ 33333333, &uicdev, kernel_filename == NULL ? 0 : 1);
+ /* allocate SRAM */
+ sram_size = 512 * KiB;
+ memory_region_init_ram(sram, NULL, "ef405ep.sram", sram_size,
+ &error_fatal);
+ memory_region_add_subregion(sysmem, 0xFFF00000, sram);
+ /* allocate and load BIOS */
+#ifdef USE_FLASH_BIOS
+ dinfo = drive_get(IF_PFLASH, 0, 0);
+ if (dinfo) {
+ bios_size = 8 * MiB;
+ pflash_cfi02_register((uint32_t)(-bios_size),
+ "ef405ep.bios", bios_size,
+ blk_by_legacy_dinfo(dinfo),
+ 64 * KiB, 1,
+ 2, 0x0001, 0x22DA, 0x0000, 0x0000, 0x555, 0x2AA,
+ 1);
+ } else
+#endif
+ {
+ bios = g_new(MemoryRegion, 1);
+ memory_region_init_rom(bios, NULL, "ef405ep.bios", BIOS_SIZE,
+ &error_fatal);
+
+ filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
+ if (filename) {
+ bios_size = load_image_size(filename,
+ memory_region_get_ram_ptr(bios),
+ BIOS_SIZE);
+ g_free(filename);
+ if (bios_size < 0) {
+ error_report("Could not load PowerPC BIOS '%s'", bios_name);
+ exit(1);
+ }
+ bios_size = (bios_size + 0xfff) & ~0xfff;
+ memory_region_add_subregion(sysmem, (uint32_t)(-bios_size), bios);
+ } else if (!qtest_enabled() || kernel_filename != NULL) {
+ error_report("Could not load PowerPC BIOS '%s'", bios_name);
+ exit(1);
+ } else {
+ /* Avoid an uninitialized variable warning */
+ bios_size = -1;
+ }
+ }
+ /* Register FPGA */
+ ref405ep_fpga_init(sysmem, 0xF0300000);
+ /* Register NVRAM */
+ dev = qdev_new("sysbus-m48t08");
+ qdev_prop_set_int32(dev, "base-year", 1968);
+ s = SYS_BUS_DEVICE(dev);
+ sysbus_realize_and_unref(s, &error_fatal);
+ sysbus_mmio_map(s, 0, 0xF0000000);
+ /* Load kernel */
+ linux_boot = (kernel_filename != NULL);
+ if (linux_boot) {
+ memset(&bd, 0, sizeof(bd));
+ bd.bi_memstart = 0x00000000;
+ bd.bi_memsize = machine->ram_size;
+ bd.bi_flashstart = -bios_size;
+ bd.bi_flashsize = -bios_size;
+ bd.bi_flashoffset = 0;
+ bd.bi_sramstart = 0xFFF00000;
+ bd.bi_sramsize = sram_size;
+ bd.bi_bootflags = 0;
+ bd.bi_intfreq = 133333333;
+ bd.bi_busfreq = 33333333;
+ bd.bi_baudrate = 115200;
+ bd.bi_s_version[0] = 'Q';
+ bd.bi_s_version[1] = 'M';
+ bd.bi_s_version[2] = 'U';
+ bd.bi_s_version[3] = '\0';
+ bd.bi_r_version[0] = 'Q';
+ bd.bi_r_version[1] = 'E';
+ bd.bi_r_version[2] = 'M';
+ bd.bi_r_version[3] = 'U';
+ bd.bi_r_version[4] = '\0';
+ bd.bi_procfreq = 133333333;
+ bd.bi_plb_busfreq = 33333333;
+ bd.bi_pci_busfreq = 33333333;
+ bd.bi_opbfreq = 33333333;
+ bdloc = ppc405_set_bootinfo(env, &bd, 0x00000001);
+ env->gpr[3] = bdloc;
+ kernel_base = KERNEL_LOAD_ADDR;
+ /* now we can load the kernel */
+ kernel_size = load_image_targphys(kernel_filename, kernel_base,
+ machine->ram_size - kernel_base);
+ if (kernel_size < 0) {
+ error_report("could not load kernel '%s'", kernel_filename);
+ exit(1);
+ }
+ printf("Load kernel size %ld at " TARGET_FMT_lx,
+ kernel_size, kernel_base);
+ /* load initrd */
+ if (initrd_filename) {
+ initrd_base = INITRD_LOAD_ADDR;
+ initrd_size = load_image_targphys(initrd_filename, initrd_base,
+ machine->ram_size - initrd_base);
+ if (initrd_size < 0) {
+ error_report("could not load initial ram disk '%s'",
+ initrd_filename);
+ exit(1);
+ }
+ } else {
+ initrd_base = 0;
+ initrd_size = 0;
+ }
+ env->gpr[4] = initrd_base;
+ env->gpr[5] = initrd_size;
+ if (kernel_cmdline != NULL) {
+ len = strlen(kernel_cmdline);
+ bdloc -= ((len + 255) & ~255);
+ cpu_physical_memory_write(bdloc, kernel_cmdline, len + 1);
+ env->gpr[6] = bdloc;
+ env->gpr[7] = bdloc + len;
+ } else {
+ env->gpr[6] = 0;
+ env->gpr[7] = 0;
+ }
+ env->nip = KERNEL_LOAD_ADDR;
+ } else {
+ kernel_base = 0;
+ kernel_size = 0;
+ initrd_base = 0;
+ initrd_size = 0;
+ bdloc = 0;
+ }
+}
+
+static void ref405ep_class_init(ObjectClass *oc, void *data)
+{
+ MachineClass *mc = MACHINE_CLASS(oc);
+
+ mc->desc = "ref405ep";
+ mc->init = ref405ep_init;
+ mc->default_ram_size = 0x08000000;
+ mc->default_ram_id = "ef405ep.ram";
+}
+
+static const TypeInfo ref405ep_type = {
+ .name = MACHINE_TYPE_NAME("ref405ep"),
+ .parent = TYPE_MACHINE,
+ .class_init = ref405ep_class_init,
+};
+
+/*****************************************************************************/
+/* AMCC Taihu evaluation board */
+/* - PowerPC 405EP processor
+ * - SDRAM 128 MB at 0x00000000
+ * - Boot flash 2 MB at 0xFFE00000
+ * - Application flash 32 MB at 0xFC000000
+ * - 2 serial ports
+ * - 2 ethernet PHY
+ * - 1 USB 1.1 device 0x50000000
+ * - 1 LCD display 0x50100000
+ * - 1 CPLD 0x50100000
+ * - 1 I2C EEPROM
+ * - 1 I2C thermal sensor
+ * - a set of LEDs
+ * - bit-bang SPI port using GPIOs
+ * - 1 EBC interface connector 0 0x50200000
+ * - 1 cardbus controller + expansion slot.
+ * - 1 PCI expansion slot.
+ */
+typedef struct taihu_cpld_t taihu_cpld_t;
+struct taihu_cpld_t {
+ uint8_t reg0;
+ uint8_t reg1;
+};
+
+static uint64_t taihu_cpld_read(void *opaque, hwaddr addr, unsigned size)
+{
+ taihu_cpld_t *cpld;
+ uint32_t ret;
+
+ cpld = opaque;
+ switch (addr) {
+ case 0x0:
+ ret = cpld->reg0;
+ break;
+ case 0x1:
+ ret = cpld->reg1;
+ break;
+ default:
+ ret = 0;
+ break;
+ }
+
+ return ret;
+}
+
+static void taihu_cpld_write(void *opaque, hwaddr addr,
+ uint64_t value, unsigned size)
+{
+ taihu_cpld_t *cpld;
+
+ cpld = opaque;
+ switch (addr) {
+ case 0x0:
+ /* Read only */
+ break;
+ case 0x1:
+ cpld->reg1 = value;
+ break;
+ default:
+ break;
+ }
+}
+
+static const MemoryRegionOps taihu_cpld_ops = {
+ .read = taihu_cpld_read,
+ .write = taihu_cpld_write,
+ .impl = {
+ .min_access_size = 1,
+ .max_access_size = 1,
+ },
+ .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static void taihu_cpld_reset (void *opaque)
+{
+ taihu_cpld_t *cpld;
+
+ cpld = opaque;
+ cpld->reg0 = 0x01;
+ cpld->reg1 = 0x80;
+}
+
+static void taihu_cpld_init(MemoryRegion *sysmem, uint32_t base)
+{
+ taihu_cpld_t *cpld;
+ MemoryRegion *cpld_memory = g_new(MemoryRegion, 1);
+
+ cpld = g_malloc0(sizeof(taihu_cpld_t));
+ memory_region_init_io(cpld_memory, NULL, &taihu_cpld_ops, cpld, "cpld", 0x100);
+ memory_region_add_subregion(sysmem, base, cpld_memory);
+ qemu_register_reset(&taihu_cpld_reset, cpld);
+}
+
+static void taihu_405ep_init(MachineState *machine)
+{
+ MachineClass *mc = MACHINE_GET_CLASS(machine);
+ const char *bios_name = machine->firmware ?: BIOS_FILENAME;
+ const char *kernel_filename = machine->kernel_filename;
+ const char *initrd_filename = machine->initrd_filename;
+ char *filename;
+ MemoryRegion *sysmem = get_system_memory();
+ MemoryRegion *bios;
+ MemoryRegion *ram_memories = g_new(MemoryRegion, 2);
+ hwaddr ram_bases[2], ram_sizes[2];
+ long bios_size;
+ target_ulong kernel_base, initrd_base;
+ long kernel_size, initrd_size;
+ int linux_boot;
+ int fl_idx;
+ DriveInfo *dinfo;
+ DeviceState *uicdev;
+
+ if (machine->ram_size != mc->default_ram_size) {
+ char *sz = size_to_str(mc->default_ram_size);
+ error_report("Invalid RAM size, should be %s", sz);
+ g_free(sz);
+ exit(EXIT_FAILURE);
+ }
+
+ ram_bases[0] = 0;
+ ram_sizes[0] = 0x04000000;
+ memory_region_init_alias(&ram_memories[0], NULL,
+ "taihu_405ep.ram-0", machine->ram, ram_bases[0],
+ ram_sizes[0]);
+ ram_bases[1] = 0x04000000;
+ ram_sizes[1] = 0x04000000;
+ memory_region_init_alias(&ram_memories[1], NULL,
+ "taihu_405ep.ram-1", machine->ram, ram_bases[1],
+ ram_sizes[1]);
+ ppc405ep_init(sysmem, ram_memories, ram_bases, ram_sizes,
+ 33333333, &uicdev, kernel_filename == NULL ? 0 : 1);
+ /* allocate and load BIOS */
+ fl_idx = 0;
+#if defined(USE_FLASH_BIOS)
+ dinfo = drive_get(IF_PFLASH, 0, fl_idx);
+ if (dinfo) {
+ bios_size = 2 * MiB;
+ pflash_cfi02_register(0xFFE00000,
+ "taihu_405ep.bios", bios_size,
+ blk_by_legacy_dinfo(dinfo),
+ 64 * KiB, 1,
+ 4, 0x0001, 0x22DA, 0x0000, 0x0000, 0x555, 0x2AA,
+ 1);
+ fl_idx++;
+ } else
+#endif
+ {
+ bios = g_new(MemoryRegion, 1);
+ memory_region_init_rom(bios, NULL, "taihu_405ep.bios", BIOS_SIZE,
+ &error_fatal);
+ filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
+ if (filename) {
+ bios_size = load_image_size(filename,
+ memory_region_get_ram_ptr(bios),
+ BIOS_SIZE);
+ g_free(filename);
+ if (bios_size < 0) {
+ error_report("Could not load PowerPC BIOS '%s'", bios_name);
+ exit(1);
+ }
+ bios_size = (bios_size + 0xfff) & ~0xfff;
+ memory_region_add_subregion(sysmem, (uint32_t)(-bios_size), bios);
+ } else if (!qtest_enabled()) {
+ error_report("Could not load PowerPC BIOS '%s'", bios_name);
+ exit(1);
+ }
+ }
+ /* Register Linux flash */
+ dinfo = drive_get(IF_PFLASH, 0, fl_idx);
+ if (dinfo) {
+ bios_size = 32 * MiB;
+ pflash_cfi02_register(0xfc000000, "taihu_405ep.flash", bios_size,
+ blk_by_legacy_dinfo(dinfo),
+ 64 * KiB, 1,
+ 4, 0x0001, 0x22DA, 0x0000, 0x0000, 0x555, 0x2AA,
+ 1);
+ fl_idx++;
+ }
+ /* Register CLPD & LCD display */
+ taihu_cpld_init(sysmem, 0x50100000);
+ /* Load kernel */
+ linux_boot = (kernel_filename != NULL);
+ if (linux_boot) {
+ kernel_base = KERNEL_LOAD_ADDR;
+ /* now we can load the kernel */
+ kernel_size = load_image_targphys(kernel_filename, kernel_base,
+ machine->ram_size - kernel_base);
+ if (kernel_size < 0) {
+ error_report("could not load kernel '%s'", kernel_filename);
+ exit(1);
+ }
+ /* load initrd */
+ if (initrd_filename) {
+ initrd_base = INITRD_LOAD_ADDR;
+ initrd_size = load_image_targphys(initrd_filename, initrd_base,
+ machine->ram_size - initrd_base);
+ if (initrd_size < 0) {
+ error_report("could not load initial ram disk '%s'",
+ initrd_filename);
+ exit(1);
+ }
+ } else {
+ initrd_base = 0;
+ initrd_size = 0;
+ }
+ } else {
+ kernel_base = 0;
+ kernel_size = 0;
+ initrd_base = 0;
+ initrd_size = 0;
+ }
+}
+
+static void taihu_class_init(ObjectClass *oc, void *data)
+{
+ MachineClass *mc = MACHINE_CLASS(oc);
+
+ mc->desc = "taihu";
+ mc->init = taihu_405ep_init;
+ mc->default_ram_size = 0x08000000;
+ mc->default_ram_id = "taihu_405ep.ram";
+}
+
+static const TypeInfo taihu_type = {
+ .name = MACHINE_TYPE_NAME("taihu"),
+ .parent = TYPE_MACHINE,
+ .class_init = taihu_class_init,
+};
+
+static void ppc405_machine_init(void)
+{
+ type_register_static(&ref405ep_type);
+ type_register_static(&taihu_type);
+}
+
+type_init(ppc405_machine_init)
diff --git a/hw/ppc/ppc405_uc.c b/hw/ppc/ppc405_uc.c
new file mode 100644
index 000000000..e632c408b
--- /dev/null
+++ b/hw/ppc/ppc405_uc.c
@@ -0,0 +1,1547 @@
+/*
+ * QEMU PowerPC 405 embedded processors emulation
+ *
+ * Copyright (c) 2007 Jocelyn Mayer
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include "qapi/error.h"
+#include "cpu.h"
+#include "hw/ppc/ppc.h"
+#include "hw/i2c/ppc4xx_i2c.h"
+#include "hw/irq.h"
+#include "ppc405.h"
+#include "hw/char/serial.h"
+#include "qemu/timer.h"
+#include "sysemu/reset.h"
+#include "sysemu/sysemu.h"
+#include "exec/address-spaces.h"
+#include "hw/intc/ppc-uic.h"
+#include "hw/qdev-properties.h"
+#include "qapi/error.h"
+
+//#define DEBUG_OPBA
+//#define DEBUG_SDRAM
+//#define DEBUG_GPIO
+//#define DEBUG_SERIAL
+//#define DEBUG_OCM
+//#define DEBUG_GPT
+//#define DEBUG_CLOCKS
+//#define DEBUG_CLOCKS_LL
+
+ram_addr_t ppc405_set_bootinfo (CPUPPCState *env, ppc4xx_bd_info_t *bd,
+ uint32_t flags)
+{
+ CPUState *cs = env_cpu(env);
+ ram_addr_t bdloc;
+ int i, n;
+
+ /* We put the bd structure at the top of memory */
+ if (bd->bi_memsize >= 0x01000000UL)
+ bdloc = 0x01000000UL - sizeof(struct ppc4xx_bd_info_t);
+ else
+ bdloc = bd->bi_memsize - sizeof(struct ppc4xx_bd_info_t);
+ stl_be_phys(cs->as, bdloc + 0x00, bd->bi_memstart);
+ stl_be_phys(cs->as, bdloc + 0x04, bd->bi_memsize);
+ stl_be_phys(cs->as, bdloc + 0x08, bd->bi_flashstart);
+ stl_be_phys(cs->as, bdloc + 0x0C, bd->bi_flashsize);
+ stl_be_phys(cs->as, bdloc + 0x10, bd->bi_flashoffset);
+ stl_be_phys(cs->as, bdloc + 0x14, bd->bi_sramstart);
+ stl_be_phys(cs->as, bdloc + 0x18, bd->bi_sramsize);
+ stl_be_phys(cs->as, bdloc + 0x1C, bd->bi_bootflags);
+ stl_be_phys(cs->as, bdloc + 0x20, bd->bi_ipaddr);
+ for (i = 0; i < 6; i++) {
+ stb_phys(cs->as, bdloc + 0x24 + i, bd->bi_enetaddr[i]);
+ }
+ stw_be_phys(cs->as, bdloc + 0x2A, bd->bi_ethspeed);
+ stl_be_phys(cs->as, bdloc + 0x2C, bd->bi_intfreq);
+ stl_be_phys(cs->as, bdloc + 0x30, bd->bi_busfreq);
+ stl_be_phys(cs->as, bdloc + 0x34, bd->bi_baudrate);
+ for (i = 0; i < 4; i++) {
+ stb_phys(cs->as, bdloc + 0x38 + i, bd->bi_s_version[i]);
+ }
+ for (i = 0; i < 32; i++) {
+ stb_phys(cs->as, bdloc + 0x3C + i, bd->bi_r_version[i]);
+ }
+ stl_be_phys(cs->as, bdloc + 0x5C, bd->bi_plb_busfreq);
+ stl_be_phys(cs->as, bdloc + 0x60, bd->bi_pci_busfreq);
+ for (i = 0; i < 6; i++) {
+ stb_phys(cs->as, bdloc + 0x64 + i, bd->bi_pci_enetaddr[i]);
+ }
+ n = 0x6A;
+ if (flags & 0x00000001) {
+ for (i = 0; i < 6; i++)
+ stb_phys(cs->as, bdloc + n++, bd->bi_pci_enetaddr2[i]);
+ }
+ stl_be_phys(cs->as, bdloc + n, bd->bi_opbfreq);
+ n += 4;
+ for (i = 0; i < 2; i++) {
+ stl_be_phys(cs->as, bdloc + n, bd->bi_iic_fast[i]);
+ n += 4;
+ }
+
+ return bdloc;
+}
+
+/*****************************************************************************/
+/* Shared peripherals */
+
+/*****************************************************************************/
+/* Peripheral local bus arbitrer */
+enum {
+ PLB3A0_ACR = 0x077,
+ PLB4A0_ACR = 0x081,
+ PLB0_BESR = 0x084,
+ PLB0_BEAR = 0x086,
+ PLB0_ACR = 0x087,
+ PLB4A1_ACR = 0x089,
+};
+
+typedef struct ppc4xx_plb_t ppc4xx_plb_t;
+struct ppc4xx_plb_t {
+ uint32_t acr;
+ uint32_t bear;
+ uint32_t besr;
+};
+
+static uint32_t dcr_read_plb (void *opaque, int dcrn)
+{
+ ppc4xx_plb_t *plb;
+ uint32_t ret;
+
+ plb = opaque;
+ switch (dcrn) {
+ case PLB0_ACR:
+ ret = plb->acr;
+ break;
+ case PLB0_BEAR:
+ ret = plb->bear;
+ break;
+ case PLB0_BESR:
+ ret = plb->besr;
+ break;
+ default:
+ /* Avoid gcc warning */
+ ret = 0;
+ break;
+ }
+
+ return ret;
+}
+
+static void dcr_write_plb (void *opaque, int dcrn, uint32_t val)
+{
+ ppc4xx_plb_t *plb;
+
+ plb = opaque;
+ switch (dcrn) {
+ case PLB0_ACR:
+ /* We don't care about the actual parameters written as
+ * we don't manage any priorities on the bus
+ */
+ plb->acr = val & 0xF8000000;
+ break;
+ case PLB0_BEAR:
+ /* Read only */
+ break;
+ case PLB0_BESR:
+ /* Write-clear */
+ plb->besr &= ~val;
+ break;
+ }
+}
+
+static void ppc4xx_plb_reset (void *opaque)
+{
+ ppc4xx_plb_t *plb;
+
+ plb = opaque;
+ plb->acr = 0x00000000;
+ plb->bear = 0x00000000;
+ plb->besr = 0x00000000;
+}
+
+void ppc4xx_plb_init(CPUPPCState *env)
+{
+ ppc4xx_plb_t *plb;
+
+ plb = g_malloc0(sizeof(ppc4xx_plb_t));
+ ppc_dcr_register(env, PLB3A0_ACR, plb, &dcr_read_plb, &dcr_write_plb);
+ ppc_dcr_register(env, PLB4A0_ACR, plb, &dcr_read_plb, &dcr_write_plb);
+ ppc_dcr_register(env, PLB0_ACR, plb, &dcr_read_plb, &dcr_write_plb);
+ ppc_dcr_register(env, PLB0_BEAR, plb, &dcr_read_plb, &dcr_write_plb);
+ ppc_dcr_register(env, PLB0_BESR, plb, &dcr_read_plb, &dcr_write_plb);
+ ppc_dcr_register(env, PLB4A1_ACR, plb, &dcr_read_plb, &dcr_write_plb);
+ qemu_register_reset(ppc4xx_plb_reset, plb);
+}
+
+/*****************************************************************************/
+/* PLB to OPB bridge */
+enum {
+ POB0_BESR0 = 0x0A0,
+ POB0_BESR1 = 0x0A2,
+ POB0_BEAR = 0x0A4,
+};
+
+typedef struct ppc4xx_pob_t ppc4xx_pob_t;
+struct ppc4xx_pob_t {
+ uint32_t bear;
+ uint32_t besr0;
+ uint32_t besr1;
+};
+
+static uint32_t dcr_read_pob (void *opaque, int dcrn)
+{
+ ppc4xx_pob_t *pob;
+ uint32_t ret;
+
+ pob = opaque;
+ switch (dcrn) {
+ case POB0_BEAR:
+ ret = pob->bear;
+ break;
+ case POB0_BESR0:
+ ret = pob->besr0;
+ break;
+ case POB0_BESR1:
+ ret = pob->besr1;
+ break;
+ default:
+ /* Avoid gcc warning */
+ ret = 0;
+ break;
+ }
+
+ return ret;
+}
+
+static void dcr_write_pob (void *opaque, int dcrn, uint32_t val)
+{
+ ppc4xx_pob_t *pob;
+
+ pob = opaque;
+ switch (dcrn) {
+ case POB0_BEAR:
+ /* Read only */
+ break;
+ case POB0_BESR0:
+ /* Write-clear */
+ pob->besr0 &= ~val;
+ break;
+ case POB0_BESR1:
+ /* Write-clear */
+ pob->besr1 &= ~val;
+ break;
+ }
+}
+
+static void ppc4xx_pob_reset (void *opaque)
+{
+ ppc4xx_pob_t *pob;
+
+ pob = opaque;
+ /* No error */
+ pob->bear = 0x00000000;
+ pob->besr0 = 0x0000000;
+ pob->besr1 = 0x0000000;
+}
+
+static void ppc4xx_pob_init(CPUPPCState *env)
+{
+ ppc4xx_pob_t *pob;
+
+ pob = g_malloc0(sizeof(ppc4xx_pob_t));
+ ppc_dcr_register(env, POB0_BEAR, pob, &dcr_read_pob, &dcr_write_pob);
+ ppc_dcr_register(env, POB0_BESR0, pob, &dcr_read_pob, &dcr_write_pob);
+ ppc_dcr_register(env, POB0_BESR1, pob, &dcr_read_pob, &dcr_write_pob);
+ qemu_register_reset(ppc4xx_pob_reset, pob);
+}
+
+/*****************************************************************************/
+/* OPB arbitrer */
+typedef struct ppc4xx_opba_t ppc4xx_opba_t;
+struct ppc4xx_opba_t {
+ MemoryRegion io;
+ uint8_t cr;
+ uint8_t pr;
+};
+
+static uint64_t opba_readb(void *opaque, hwaddr addr, unsigned size)
+{
+ ppc4xx_opba_t *opba;
+ uint32_t ret;
+
+#ifdef DEBUG_OPBA
+ printf("%s: addr " TARGET_FMT_plx "\n", __func__, addr);
+#endif
+ opba = opaque;
+ switch (addr) {
+ case 0x00:
+ ret = opba->cr;
+ break;
+ case 0x01:
+ ret = opba->pr;
+ break;
+ default:
+ ret = 0x00;
+ break;
+ }
+
+ return ret;
+}
+
+static void opba_writeb(void *opaque, hwaddr addr, uint64_t value,
+ unsigned size)
+{
+ ppc4xx_opba_t *opba;
+
+#ifdef DEBUG_OPBA
+ printf("%s: addr " TARGET_FMT_plx " val %08" PRIx32 "\n", __func__, addr,
+ value);
+#endif
+ opba = opaque;
+ switch (addr) {
+ case 0x00:
+ opba->cr = value & 0xF8;
+ break;
+ case 0x01:
+ opba->pr = value & 0xFF;
+ break;
+ default:
+ break;
+ }
+}
+static const MemoryRegionOps opba_ops = {
+ .read = opba_readb,
+ .write = opba_writeb,
+ .impl.min_access_size = 1,
+ .impl.max_access_size = 1,
+ .valid.min_access_size = 1,
+ .valid.max_access_size = 4,
+ .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static void ppc4xx_opba_reset (void *opaque)
+{
+ ppc4xx_opba_t *opba;
+
+ opba = opaque;
+ opba->cr = 0x00; /* No dynamic priorities - park disabled */
+ opba->pr = 0x11;
+}
+
+static void ppc4xx_opba_init(hwaddr base)
+{
+ ppc4xx_opba_t *opba;
+
+ opba = g_malloc0(sizeof(ppc4xx_opba_t));
+#ifdef DEBUG_OPBA
+ printf("%s: offset " TARGET_FMT_plx "\n", __func__, base);
+#endif
+ memory_region_init_io(&opba->io, NULL, &opba_ops, opba, "opba", 0x002);
+ memory_region_add_subregion(get_system_memory(), base, &opba->io);
+ qemu_register_reset(ppc4xx_opba_reset, opba);
+}
+
+/*****************************************************************************/
+/* Code decompression controller */
+/* XXX: TODO */
+
+/*****************************************************************************/
+/* Peripheral controller */
+typedef struct ppc4xx_ebc_t ppc4xx_ebc_t;
+struct ppc4xx_ebc_t {
+ uint32_t addr;
+ uint32_t bcr[8];
+ uint32_t bap[8];
+ uint32_t bear;
+ uint32_t besr0;
+ uint32_t besr1;
+ uint32_t cfg;
+};
+
+enum {
+ EBC0_CFGADDR = 0x012,
+ EBC0_CFGDATA = 0x013,
+};
+
+static uint32_t dcr_read_ebc (void *opaque, int dcrn)
+{
+ ppc4xx_ebc_t *ebc;
+ uint32_t ret;
+
+ ebc = opaque;
+ switch (dcrn) {
+ case EBC0_CFGADDR:
+ ret = ebc->addr;
+ break;
+ case EBC0_CFGDATA:
+ switch (ebc->addr) {
+ case 0x00: /* B0CR */
+ ret = ebc->bcr[0];
+ break;
+ case 0x01: /* B1CR */
+ ret = ebc->bcr[1];
+ break;
+ case 0x02: /* B2CR */
+ ret = ebc->bcr[2];
+ break;
+ case 0x03: /* B3CR */
+ ret = ebc->bcr[3];
+ break;
+ case 0x04: /* B4CR */
+ ret = ebc->bcr[4];
+ break;
+ case 0x05: /* B5CR */
+ ret = ebc->bcr[5];
+ break;
+ case 0x06: /* B6CR */
+ ret = ebc->bcr[6];
+ break;
+ case 0x07: /* B7CR */
+ ret = ebc->bcr[7];
+ break;
+ case 0x10: /* B0AP */
+ ret = ebc->bap[0];
+ break;
+ case 0x11: /* B1AP */
+ ret = ebc->bap[1];
+ break;
+ case 0x12: /* B2AP */
+ ret = ebc->bap[2];
+ break;
+ case 0x13: /* B3AP */
+ ret = ebc->bap[3];
+ break;
+ case 0x14: /* B4AP */
+ ret = ebc->bap[4];
+ break;
+ case 0x15: /* B5AP */
+ ret = ebc->bap[5];
+ break;
+ case 0x16: /* B6AP */
+ ret = ebc->bap[6];
+ break;
+ case 0x17: /* B7AP */
+ ret = ebc->bap[7];
+ break;
+ case 0x20: /* BEAR */
+ ret = ebc->bear;
+ break;
+ case 0x21: /* BESR0 */
+ ret = ebc->besr0;
+ break;
+ case 0x22: /* BESR1 */
+ ret = ebc->besr1;
+ break;
+ case 0x23: /* CFG */
+ ret = ebc->cfg;
+ break;
+ default:
+ ret = 0x00000000;
+ break;
+ }
+ break;
+ default:
+ ret = 0x00000000;
+ break;
+ }
+
+ return ret;
+}
+
+static void dcr_write_ebc (void *opaque, int dcrn, uint32_t val)
+{
+ ppc4xx_ebc_t *ebc;
+
+ ebc = opaque;
+ switch (dcrn) {
+ case EBC0_CFGADDR:
+ ebc->addr = val;
+ break;
+ case EBC0_CFGDATA:
+ switch (ebc->addr) {
+ case 0x00: /* B0CR */
+ break;
+ case 0x01: /* B1CR */
+ break;
+ case 0x02: /* B2CR */
+ break;
+ case 0x03: /* B3CR */
+ break;
+ case 0x04: /* B4CR */
+ break;
+ case 0x05: /* B5CR */
+ break;
+ case 0x06: /* B6CR */
+ break;
+ case 0x07: /* B7CR */
+ break;
+ case 0x10: /* B0AP */
+ break;
+ case 0x11: /* B1AP */
+ break;
+ case 0x12: /* B2AP */
+ break;
+ case 0x13: /* B3AP */
+ break;
+ case 0x14: /* B4AP */
+ break;
+ case 0x15: /* B5AP */
+ break;
+ case 0x16: /* B6AP */
+ break;
+ case 0x17: /* B7AP */
+ break;
+ case 0x20: /* BEAR */
+ break;
+ case 0x21: /* BESR0 */
+ break;
+ case 0x22: /* BESR1 */
+ break;
+ case 0x23: /* CFG */
+ break;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+static void ebc_reset (void *opaque)
+{
+ ppc4xx_ebc_t *ebc;
+ int i;
+
+ ebc = opaque;
+ ebc->addr = 0x00000000;
+ ebc->bap[0] = 0x7F8FFE80;
+ ebc->bcr[0] = 0xFFE28000;
+ for (i = 0; i < 8; i++) {
+ ebc->bap[i] = 0x00000000;
+ ebc->bcr[i] = 0x00000000;
+ }
+ ebc->besr0 = 0x00000000;
+ ebc->besr1 = 0x00000000;
+ ebc->cfg = 0x80400000;
+}
+
+void ppc405_ebc_init(CPUPPCState *env)
+{
+ ppc4xx_ebc_t *ebc;
+
+ ebc = g_malloc0(sizeof(ppc4xx_ebc_t));
+ qemu_register_reset(&ebc_reset, ebc);
+ ppc_dcr_register(env, EBC0_CFGADDR,
+ ebc, &dcr_read_ebc, &dcr_write_ebc);
+ ppc_dcr_register(env, EBC0_CFGDATA,
+ ebc, &dcr_read_ebc, &dcr_write_ebc);
+}
+
+/*****************************************************************************/
+/* DMA controller */
+enum {
+ DMA0_CR0 = 0x100,
+ DMA0_CT0 = 0x101,
+ DMA0_DA0 = 0x102,
+ DMA0_SA0 = 0x103,
+ DMA0_SG0 = 0x104,
+ DMA0_CR1 = 0x108,
+ DMA0_CT1 = 0x109,
+ DMA0_DA1 = 0x10A,
+ DMA0_SA1 = 0x10B,
+ DMA0_SG1 = 0x10C,
+ DMA0_CR2 = 0x110,
+ DMA0_CT2 = 0x111,
+ DMA0_DA2 = 0x112,
+ DMA0_SA2 = 0x113,
+ DMA0_SG2 = 0x114,
+ DMA0_CR3 = 0x118,
+ DMA0_CT3 = 0x119,
+ DMA0_DA3 = 0x11A,
+ DMA0_SA3 = 0x11B,
+ DMA0_SG3 = 0x11C,
+ DMA0_SR = 0x120,
+ DMA0_SGC = 0x123,
+ DMA0_SLP = 0x125,
+ DMA0_POL = 0x126,
+};
+
+typedef struct ppc405_dma_t ppc405_dma_t;
+struct ppc405_dma_t {
+ qemu_irq irqs[4];
+ uint32_t cr[4];
+ uint32_t ct[4];
+ uint32_t da[4];
+ uint32_t sa[4];
+ uint32_t sg[4];
+ uint32_t sr;
+ uint32_t sgc;
+ uint32_t slp;
+ uint32_t pol;
+};
+
+static uint32_t dcr_read_dma (void *opaque, int dcrn)
+{
+ return 0;
+}
+
+static void dcr_write_dma (void *opaque, int dcrn, uint32_t val)
+{
+}
+
+static void ppc405_dma_reset (void *opaque)
+{
+ ppc405_dma_t *dma;
+ int i;
+
+ dma = opaque;
+ for (i = 0; i < 4; i++) {
+ dma->cr[i] = 0x00000000;
+ dma->ct[i] = 0x00000000;
+ dma->da[i] = 0x00000000;
+ dma->sa[i] = 0x00000000;
+ dma->sg[i] = 0x00000000;
+ }
+ dma->sr = 0x00000000;
+ dma->sgc = 0x00000000;
+ dma->slp = 0x7C000000;
+ dma->pol = 0x00000000;
+}
+
+static void ppc405_dma_init(CPUPPCState *env, qemu_irq irqs[4])
+{
+ ppc405_dma_t *dma;
+
+ dma = g_malloc0(sizeof(ppc405_dma_t));
+ memcpy(dma->irqs, irqs, 4 * sizeof(qemu_irq));
+ qemu_register_reset(&ppc405_dma_reset, dma);
+ ppc_dcr_register(env, DMA0_CR0,
+ dma, &dcr_read_dma, &dcr_write_dma);
+ ppc_dcr_register(env, DMA0_CT0,
+ dma, &dcr_read_dma, &dcr_write_dma);
+ ppc_dcr_register(env, DMA0_DA0,
+ dma, &dcr_read_dma, &dcr_write_dma);
+ ppc_dcr_register(env, DMA0_SA0,
+ dma, &dcr_read_dma, &dcr_write_dma);
+ ppc_dcr_register(env, DMA0_SG0,
+ dma, &dcr_read_dma, &dcr_write_dma);
+ ppc_dcr_register(env, DMA0_CR1,
+ dma, &dcr_read_dma, &dcr_write_dma);
+ ppc_dcr_register(env, DMA0_CT1,
+ dma, &dcr_read_dma, &dcr_write_dma);
+ ppc_dcr_register(env, DMA0_DA1,
+ dma, &dcr_read_dma, &dcr_write_dma);
+ ppc_dcr_register(env, DMA0_SA1,
+ dma, &dcr_read_dma, &dcr_write_dma);
+ ppc_dcr_register(env, DMA0_SG1,
+ dma, &dcr_read_dma, &dcr_write_dma);
+ ppc_dcr_register(env, DMA0_CR2,
+ dma, &dcr_read_dma, &dcr_write_dma);
+ ppc_dcr_register(env, DMA0_CT2,
+ dma, &dcr_read_dma, &dcr_write_dma);
+ ppc_dcr_register(env, DMA0_DA2,
+ dma, &dcr_read_dma, &dcr_write_dma);
+ ppc_dcr_register(env, DMA0_SA2,
+ dma, &dcr_read_dma, &dcr_write_dma);
+ ppc_dcr_register(env, DMA0_SG2,
+ dma, &dcr_read_dma, &dcr_write_dma);
+ ppc_dcr_register(env, DMA0_CR3,
+ dma, &dcr_read_dma, &dcr_write_dma);
+ ppc_dcr_register(env, DMA0_CT3,
+ dma, &dcr_read_dma, &dcr_write_dma);
+ ppc_dcr_register(env, DMA0_DA3,
+ dma, &dcr_read_dma, &dcr_write_dma);
+ ppc_dcr_register(env, DMA0_SA3,
+ dma, &dcr_read_dma, &dcr_write_dma);
+ ppc_dcr_register(env, DMA0_SG3,
+ dma, &dcr_read_dma, &dcr_write_dma);
+ ppc_dcr_register(env, DMA0_SR,
+ dma, &dcr_read_dma, &dcr_write_dma);
+ ppc_dcr_register(env, DMA0_SGC,
+ dma, &dcr_read_dma, &dcr_write_dma);
+ ppc_dcr_register(env, DMA0_SLP,
+ dma, &dcr_read_dma, &dcr_write_dma);
+ ppc_dcr_register(env, DMA0_POL,
+ dma, &dcr_read_dma, &dcr_write_dma);
+}
+
+/*****************************************************************************/
+/* GPIO */
+typedef struct ppc405_gpio_t ppc405_gpio_t;
+struct ppc405_gpio_t {
+ MemoryRegion io;
+ uint32_t or;
+ uint32_t tcr;
+ uint32_t osrh;
+ uint32_t osrl;
+ uint32_t tsrh;
+ uint32_t tsrl;
+ uint32_t odr;
+ uint32_t ir;
+ uint32_t rr1;
+ uint32_t isr1h;
+ uint32_t isr1l;
+};
+
+static uint64_t ppc405_gpio_read(void *opaque, hwaddr addr, unsigned size)
+{
+#ifdef DEBUG_GPIO
+ printf("%s: addr " TARGET_FMT_plx " size %d\n", __func__, addr, size);
+#endif
+
+ return 0;
+}
+
+static void ppc405_gpio_write(void *opaque, hwaddr addr, uint64_t value,
+ unsigned size)
+{
+#ifdef DEBUG_GPIO
+ printf("%s: addr " TARGET_FMT_plx " size %d val %08" PRIx32 "\n",
+ __func__, addr, size, value);
+#endif
+}
+
+static const MemoryRegionOps ppc405_gpio_ops = {
+ .read = ppc405_gpio_read,
+ .write = ppc405_gpio_write,
+ .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static void ppc405_gpio_reset (void *opaque)
+{
+}
+
+static void ppc405_gpio_init(hwaddr base)
+{
+ ppc405_gpio_t *gpio;
+
+ gpio = g_malloc0(sizeof(ppc405_gpio_t));
+#ifdef DEBUG_GPIO
+ printf("%s: offset " TARGET_FMT_plx "\n", __func__, base);
+#endif
+ memory_region_init_io(&gpio->io, NULL, &ppc405_gpio_ops, gpio, "pgio", 0x038);
+ memory_region_add_subregion(get_system_memory(), base, &gpio->io);
+ qemu_register_reset(&ppc405_gpio_reset, gpio);
+}
+
+/*****************************************************************************/
+/* On Chip Memory */
+enum {
+ OCM0_ISARC = 0x018,
+ OCM0_ISACNTL = 0x019,
+ OCM0_DSARC = 0x01A,
+ OCM0_DSACNTL = 0x01B,
+};
+
+typedef struct ppc405_ocm_t ppc405_ocm_t;
+struct ppc405_ocm_t {
+ MemoryRegion ram;
+ MemoryRegion isarc_ram;
+ MemoryRegion dsarc_ram;
+ uint32_t isarc;
+ uint32_t isacntl;
+ uint32_t dsarc;
+ uint32_t dsacntl;
+};
+
+static void ocm_update_mappings (ppc405_ocm_t *ocm,
+ uint32_t isarc, uint32_t isacntl,
+ uint32_t dsarc, uint32_t dsacntl)
+{
+#ifdef DEBUG_OCM
+ printf("OCM update ISA %08" PRIx32 " %08" PRIx32 " (%08" PRIx32
+ " %08" PRIx32 ") DSA %08" PRIx32 " %08" PRIx32
+ " (%08" PRIx32 " %08" PRIx32 ")\n",
+ isarc, isacntl, dsarc, dsacntl,
+ ocm->isarc, ocm->isacntl, ocm->dsarc, ocm->dsacntl);
+#endif
+ if (ocm->isarc != isarc ||
+ (ocm->isacntl & 0x80000000) != (isacntl & 0x80000000)) {
+ if (ocm->isacntl & 0x80000000) {
+ /* Unmap previously assigned memory region */
+ printf("OCM unmap ISA %08" PRIx32 "\n", ocm->isarc);
+ memory_region_del_subregion(get_system_memory(), &ocm->isarc_ram);
+ }
+ if (isacntl & 0x80000000) {
+ /* Map new instruction memory region */
+#ifdef DEBUG_OCM
+ printf("OCM map ISA %08" PRIx32 "\n", isarc);
+#endif
+ memory_region_add_subregion(get_system_memory(), isarc,
+ &ocm->isarc_ram);
+ }
+ }
+ if (ocm->dsarc != dsarc ||
+ (ocm->dsacntl & 0x80000000) != (dsacntl & 0x80000000)) {
+ if (ocm->dsacntl & 0x80000000) {
+ /* Beware not to unmap the region we just mapped */
+ if (!(isacntl & 0x80000000) || ocm->dsarc != isarc) {
+ /* Unmap previously assigned memory region */
+#ifdef DEBUG_OCM
+ printf("OCM unmap DSA %08" PRIx32 "\n", ocm->dsarc);
+#endif
+ memory_region_del_subregion(get_system_memory(),
+ &ocm->dsarc_ram);
+ }
+ }
+ if (dsacntl & 0x80000000) {
+ /* Beware not to remap the region we just mapped */
+ if (!(isacntl & 0x80000000) || dsarc != isarc) {
+ /* Map new data memory region */
+#ifdef DEBUG_OCM
+ printf("OCM map DSA %08" PRIx32 "\n", dsarc);
+#endif
+ memory_region_add_subregion(get_system_memory(), dsarc,
+ &ocm->dsarc_ram);
+ }
+ }
+ }
+}
+
+static uint32_t dcr_read_ocm (void *opaque, int dcrn)
+{
+ ppc405_ocm_t *ocm;
+ uint32_t ret;
+
+ ocm = opaque;
+ switch (dcrn) {
+ case OCM0_ISARC:
+ ret = ocm->isarc;
+ break;
+ case OCM0_ISACNTL:
+ ret = ocm->isacntl;
+ break;
+ case OCM0_DSARC:
+ ret = ocm->dsarc;
+ break;
+ case OCM0_DSACNTL:
+ ret = ocm->dsacntl;
+ break;
+ default:
+ ret = 0;
+ break;
+ }
+
+ return ret;
+}
+
+static void dcr_write_ocm (void *opaque, int dcrn, uint32_t val)
+{
+ ppc405_ocm_t *ocm;
+ uint32_t isarc, dsarc, isacntl, dsacntl;
+
+ ocm = opaque;
+ isarc = ocm->isarc;
+ dsarc = ocm->dsarc;
+ isacntl = ocm->isacntl;
+ dsacntl = ocm->dsacntl;
+ switch (dcrn) {
+ case OCM0_ISARC:
+ isarc = val & 0xFC000000;
+ break;
+ case OCM0_ISACNTL:
+ isacntl = val & 0xC0000000;
+ break;
+ case OCM0_DSARC:
+ isarc = val & 0xFC000000;
+ break;
+ case OCM0_DSACNTL:
+ isacntl = val & 0xC0000000;
+ break;
+ }
+ ocm_update_mappings(ocm, isarc, isacntl, dsarc, dsacntl);
+ ocm->isarc = isarc;
+ ocm->dsarc = dsarc;
+ ocm->isacntl = isacntl;
+ ocm->dsacntl = dsacntl;
+}
+
+static void ocm_reset (void *opaque)
+{
+ ppc405_ocm_t *ocm;
+ uint32_t isarc, dsarc, isacntl, dsacntl;
+
+ ocm = opaque;
+ isarc = 0x00000000;
+ isacntl = 0x00000000;
+ dsarc = 0x00000000;
+ dsacntl = 0x00000000;
+ ocm_update_mappings(ocm, isarc, isacntl, dsarc, dsacntl);
+ ocm->isarc = isarc;
+ ocm->dsarc = dsarc;
+ ocm->isacntl = isacntl;
+ ocm->dsacntl = dsacntl;
+}
+
+static void ppc405_ocm_init(CPUPPCState *env)
+{
+ ppc405_ocm_t *ocm;
+
+ ocm = g_malloc0(sizeof(ppc405_ocm_t));
+ /* XXX: Size is 4096 or 0x04000000 */
+ memory_region_init_ram(&ocm->isarc_ram, NULL, "ppc405.ocm", 4 * KiB,
+ &error_fatal);
+ memory_region_init_alias(&ocm->dsarc_ram, NULL, "ppc405.dsarc",
+ &ocm->isarc_ram, 0, 4 * KiB);
+ qemu_register_reset(&ocm_reset, ocm);
+ ppc_dcr_register(env, OCM0_ISARC,
+ ocm, &dcr_read_ocm, &dcr_write_ocm);
+ ppc_dcr_register(env, OCM0_ISACNTL,
+ ocm, &dcr_read_ocm, &dcr_write_ocm);
+ ppc_dcr_register(env, OCM0_DSARC,
+ ocm, &dcr_read_ocm, &dcr_write_ocm);
+ ppc_dcr_register(env, OCM0_DSACNTL,
+ ocm, &dcr_read_ocm, &dcr_write_ocm);
+}
+
+/*****************************************************************************/
+/* General purpose timers */
+typedef struct ppc4xx_gpt_t ppc4xx_gpt_t;
+struct ppc4xx_gpt_t {
+ MemoryRegion iomem;
+ int64_t tb_offset;
+ uint32_t tb_freq;
+ QEMUTimer *timer;
+ qemu_irq irqs[5];
+ uint32_t oe;
+ uint32_t ol;
+ uint32_t im;
+ uint32_t is;
+ uint32_t ie;
+ uint32_t comp[5];
+ uint32_t mask[5];
+};
+
+static int ppc4xx_gpt_compare (ppc4xx_gpt_t *gpt, int n)
+{
+ /* XXX: TODO */
+ return 0;
+}
+
+static void ppc4xx_gpt_set_output (ppc4xx_gpt_t *gpt, int n, int level)
+{
+ /* XXX: TODO */
+}
+
+static void ppc4xx_gpt_set_outputs (ppc4xx_gpt_t *gpt)
+{
+ uint32_t mask;
+ int i;
+
+ mask = 0x80000000;
+ for (i = 0; i < 5; i++) {
+ if (gpt->oe & mask) {
+ /* Output is enabled */
+ if (ppc4xx_gpt_compare(gpt, i)) {
+ /* Comparison is OK */
+ ppc4xx_gpt_set_output(gpt, i, gpt->ol & mask);
+ } else {
+ /* Comparison is KO */
+ ppc4xx_gpt_set_output(gpt, i, gpt->ol & mask ? 0 : 1);
+ }
+ }
+ mask = mask >> 1;
+ }
+}
+
+static void ppc4xx_gpt_set_irqs (ppc4xx_gpt_t *gpt)
+{
+ uint32_t mask;
+ int i;
+
+ mask = 0x00008000;
+ for (i = 0; i < 5; i++) {
+ if (gpt->is & gpt->im & mask)
+ qemu_irq_raise(gpt->irqs[i]);
+ else
+ qemu_irq_lower(gpt->irqs[i]);
+ mask = mask >> 1;
+ }
+}
+
+static void ppc4xx_gpt_compute_timer (ppc4xx_gpt_t *gpt)
+{
+ /* XXX: TODO */
+}
+
+static uint64_t ppc4xx_gpt_read(void *opaque, hwaddr addr, unsigned size)
+{
+ ppc4xx_gpt_t *gpt;
+ uint32_t ret;
+ int idx;
+
+#ifdef DEBUG_GPT
+ printf("%s: addr " TARGET_FMT_plx "\n", __func__, addr);
+#endif
+ gpt = opaque;
+ switch (addr) {
+ case 0x00:
+ /* Time base counter */
+ ret = muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + gpt->tb_offset,
+ gpt->tb_freq, NANOSECONDS_PER_SECOND);
+ break;
+ case 0x10:
+ /* Output enable */
+ ret = gpt->oe;
+ break;
+ case 0x14:
+ /* Output level */
+ ret = gpt->ol;
+ break;
+ case 0x18:
+ /* Interrupt mask */
+ ret = gpt->im;
+ break;
+ case 0x1C:
+ case 0x20:
+ /* Interrupt status */
+ ret = gpt->is;
+ break;
+ case 0x24:
+ /* Interrupt enable */
+ ret = gpt->ie;
+ break;
+ case 0x80 ... 0x90:
+ /* Compare timer */
+ idx = (addr - 0x80) >> 2;
+ ret = gpt->comp[idx];
+ break;
+ case 0xC0 ... 0xD0:
+ /* Compare mask */
+ idx = (addr - 0xC0) >> 2;
+ ret = gpt->mask[idx];
+ break;
+ default:
+ ret = -1;
+ break;
+ }
+
+ return ret;
+}
+
+static void ppc4xx_gpt_write(void *opaque, hwaddr addr, uint64_t value,
+ unsigned size)
+{
+ ppc4xx_gpt_t *gpt;
+ int idx;
+
+#ifdef DEBUG_I2C
+ printf("%s: addr " TARGET_FMT_plx " val %08" PRIx32 "\n", __func__, addr,
+ value);
+#endif
+ gpt = opaque;
+ switch (addr) {
+ case 0x00:
+ /* Time base counter */
+ gpt->tb_offset = muldiv64(value, NANOSECONDS_PER_SECOND, gpt->tb_freq)
+ - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+ ppc4xx_gpt_compute_timer(gpt);
+ break;
+ case 0x10:
+ /* Output enable */
+ gpt->oe = value & 0xF8000000;
+ ppc4xx_gpt_set_outputs(gpt);
+ break;
+ case 0x14:
+ /* Output level */
+ gpt->ol = value & 0xF8000000;
+ ppc4xx_gpt_set_outputs(gpt);
+ break;
+ case 0x18:
+ /* Interrupt mask */
+ gpt->im = value & 0x0000F800;
+ break;
+ case 0x1C:
+ /* Interrupt status set */
+ gpt->is |= value & 0x0000F800;
+ ppc4xx_gpt_set_irqs(gpt);
+ break;
+ case 0x20:
+ /* Interrupt status clear */
+ gpt->is &= ~(value & 0x0000F800);
+ ppc4xx_gpt_set_irqs(gpt);
+ break;
+ case 0x24:
+ /* Interrupt enable */
+ gpt->ie = value & 0x0000F800;
+ ppc4xx_gpt_set_irqs(gpt);
+ break;
+ case 0x80 ... 0x90:
+ /* Compare timer */
+ idx = (addr - 0x80) >> 2;
+ gpt->comp[idx] = value & 0xF8000000;
+ ppc4xx_gpt_compute_timer(gpt);
+ break;
+ case 0xC0 ... 0xD0:
+ /* Compare mask */
+ idx = (addr - 0xC0) >> 2;
+ gpt->mask[idx] = value & 0xF8000000;
+ ppc4xx_gpt_compute_timer(gpt);
+ break;
+ }
+}
+
+static const MemoryRegionOps gpt_ops = {
+ .read = ppc4xx_gpt_read,
+ .write = ppc4xx_gpt_write,
+ .valid.min_access_size = 4,
+ .valid.max_access_size = 4,
+ .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static void ppc4xx_gpt_cb (void *opaque)
+{
+ ppc4xx_gpt_t *gpt;
+
+ gpt = opaque;
+ ppc4xx_gpt_set_irqs(gpt);
+ ppc4xx_gpt_set_outputs(gpt);
+ ppc4xx_gpt_compute_timer(gpt);
+}
+
+static void ppc4xx_gpt_reset (void *opaque)
+{
+ ppc4xx_gpt_t *gpt;
+ int i;
+
+ gpt = opaque;
+ timer_del(gpt->timer);
+ gpt->oe = 0x00000000;
+ gpt->ol = 0x00000000;
+ gpt->im = 0x00000000;
+ gpt->is = 0x00000000;
+ gpt->ie = 0x00000000;
+ for (i = 0; i < 5; i++) {
+ gpt->comp[i] = 0x00000000;
+ gpt->mask[i] = 0x00000000;
+ }
+}
+
+static void ppc4xx_gpt_init(hwaddr base, qemu_irq irqs[5])
+{
+ ppc4xx_gpt_t *gpt;
+ int i;
+
+ gpt = g_malloc0(sizeof(ppc4xx_gpt_t));
+ for (i = 0; i < 5; i++) {
+ gpt->irqs[i] = irqs[i];
+ }
+ gpt->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, &ppc4xx_gpt_cb, gpt);
+#ifdef DEBUG_GPT
+ printf("%s: offset " TARGET_FMT_plx "\n", __func__, base);
+#endif
+ memory_region_init_io(&gpt->iomem, NULL, &gpt_ops, gpt, "gpt", 0x0d4);
+ memory_region_add_subregion(get_system_memory(), base, &gpt->iomem);
+ qemu_register_reset(ppc4xx_gpt_reset, gpt);
+}
+
+/*****************************************************************************/
+/* PowerPC 405EP */
+/* CPU control */
+enum {
+ PPC405EP_CPC0_PLLMR0 = 0x0F0,
+ PPC405EP_CPC0_BOOT = 0x0F1,
+ PPC405EP_CPC0_EPCTL = 0x0F3,
+ PPC405EP_CPC0_PLLMR1 = 0x0F4,
+ PPC405EP_CPC0_UCR = 0x0F5,
+ PPC405EP_CPC0_SRR = 0x0F6,
+ PPC405EP_CPC0_JTAGID = 0x0F7,
+ PPC405EP_CPC0_PCI = 0x0F9,
+#if 0
+ PPC405EP_CPC0_ER = xxx,
+ PPC405EP_CPC0_FR = xxx,
+ PPC405EP_CPC0_SR = xxx,
+#endif
+};
+
+enum {
+ PPC405EP_CPU_CLK = 0,
+ PPC405EP_PLB_CLK = 1,
+ PPC405EP_OPB_CLK = 2,
+ PPC405EP_EBC_CLK = 3,
+ PPC405EP_MAL_CLK = 4,
+ PPC405EP_PCI_CLK = 5,
+ PPC405EP_UART0_CLK = 6,
+ PPC405EP_UART1_CLK = 7,
+ PPC405EP_CLK_NB = 8,
+};
+
+typedef struct ppc405ep_cpc_t ppc405ep_cpc_t;
+struct ppc405ep_cpc_t {
+ uint32_t sysclk;
+ clk_setup_t clk_setup[PPC405EP_CLK_NB];
+ uint32_t boot;
+ uint32_t epctl;
+ uint32_t pllmr[2];
+ uint32_t ucr;
+ uint32_t srr;
+ uint32_t jtagid;
+ uint32_t pci;
+ /* Clock and power management */
+ uint32_t er;
+ uint32_t fr;
+ uint32_t sr;
+};
+
+static void ppc405ep_compute_clocks (ppc405ep_cpc_t *cpc)
+{
+ uint32_t CPU_clk, PLB_clk, OPB_clk, EBC_clk, MAL_clk, PCI_clk;
+ uint32_t UART0_clk, UART1_clk;
+ uint64_t VCO_out, PLL_out;
+ int M, D;
+
+ VCO_out = 0;
+ if ((cpc->pllmr[1] & 0x80000000) && !(cpc->pllmr[1] & 0x40000000)) {
+ M = (((cpc->pllmr[1] >> 20) - 1) & 0xF) + 1; /* FBMUL */
+#ifdef DEBUG_CLOCKS_LL
+ printf("FBMUL %01" PRIx32 " %d\n", (cpc->pllmr[1] >> 20) & 0xF, M);
+#endif
+ D = 8 - ((cpc->pllmr[1] >> 16) & 0x7); /* FWDA */
+#ifdef DEBUG_CLOCKS_LL
+ printf("FWDA %01" PRIx32 " %d\n", (cpc->pllmr[1] >> 16) & 0x7, D);
+#endif
+ VCO_out = (uint64_t)cpc->sysclk * M * D;
+ if (VCO_out < 500000000UL || VCO_out > 1000000000UL) {
+ /* Error - unlock the PLL */
+ printf("VCO out of range %" PRIu64 "\n", VCO_out);
+#if 0
+ cpc->pllmr[1] &= ~0x80000000;
+ goto pll_bypass;
+#endif
+ }
+ PLL_out = VCO_out / D;
+ /* Pretend the PLL is locked */
+ cpc->boot |= 0x00000001;
+ } else {
+#if 0
+ pll_bypass:
+#endif
+ PLL_out = cpc->sysclk;
+ if (cpc->pllmr[1] & 0x40000000) {
+ /* Pretend the PLL is not locked */
+ cpc->boot &= ~0x00000001;
+ }
+ }
+ /* Now, compute all other clocks */
+ D = ((cpc->pllmr[0] >> 20) & 0x3) + 1; /* CCDV */
+#ifdef DEBUG_CLOCKS_LL
+ printf("CCDV %01" PRIx32 " %d\n", (cpc->pllmr[0] >> 20) & 0x3, D);
+#endif
+ CPU_clk = PLL_out / D;
+ D = ((cpc->pllmr[0] >> 16) & 0x3) + 1; /* CBDV */
+#ifdef DEBUG_CLOCKS_LL
+ printf("CBDV %01" PRIx32 " %d\n", (cpc->pllmr[0] >> 16) & 0x3, D);
+#endif
+ PLB_clk = CPU_clk / D;
+ D = ((cpc->pllmr[0] >> 12) & 0x3) + 1; /* OPDV */
+#ifdef DEBUG_CLOCKS_LL
+ printf("OPDV %01" PRIx32 " %d\n", (cpc->pllmr[0] >> 12) & 0x3, D);
+#endif
+ OPB_clk = PLB_clk / D;
+ D = ((cpc->pllmr[0] >> 8) & 0x3) + 2; /* EPDV */
+#ifdef DEBUG_CLOCKS_LL
+ printf("EPDV %01" PRIx32 " %d\n", (cpc->pllmr[0] >> 8) & 0x3, D);
+#endif
+ EBC_clk = PLB_clk / D;
+ D = ((cpc->pllmr[0] >> 4) & 0x3) + 1; /* MPDV */
+#ifdef DEBUG_CLOCKS_LL
+ printf("MPDV %01" PRIx32 " %d\n", (cpc->pllmr[0] >> 4) & 0x3, D);
+#endif
+ MAL_clk = PLB_clk / D;
+ D = (cpc->pllmr[0] & 0x3) + 1; /* PPDV */
+#ifdef DEBUG_CLOCKS_LL
+ printf("PPDV %01" PRIx32 " %d\n", cpc->pllmr[0] & 0x3, D);
+#endif
+ PCI_clk = PLB_clk / D;
+ D = ((cpc->ucr - 1) & 0x7F) + 1; /* U0DIV */
+#ifdef DEBUG_CLOCKS_LL
+ printf("U0DIV %01" PRIx32 " %d\n", cpc->ucr & 0x7F, D);
+#endif
+ UART0_clk = PLL_out / D;
+ D = (((cpc->ucr >> 8) - 1) & 0x7F) + 1; /* U1DIV */
+#ifdef DEBUG_CLOCKS_LL
+ printf("U1DIV %01" PRIx32 " %d\n", (cpc->ucr >> 8) & 0x7F, D);
+#endif
+ UART1_clk = PLL_out / D;
+#ifdef DEBUG_CLOCKS
+ printf("Setup PPC405EP clocks - sysclk %" PRIu32 " VCO %" PRIu64
+ " PLL out %" PRIu64 " Hz\n", cpc->sysclk, VCO_out, PLL_out);
+ printf("CPU %" PRIu32 " PLB %" PRIu32 " OPB %" PRIu32 " EBC %" PRIu32
+ " MAL %" PRIu32 " PCI %" PRIu32 " UART0 %" PRIu32
+ " UART1 %" PRIu32 "\n",
+ CPU_clk, PLB_clk, OPB_clk, EBC_clk, MAL_clk, PCI_clk,
+ UART0_clk, UART1_clk);
+#endif
+ /* Setup CPU clocks */
+ clk_setup(&cpc->clk_setup[PPC405EP_CPU_CLK], CPU_clk);
+ /* Setup PLB clock */
+ clk_setup(&cpc->clk_setup[PPC405EP_PLB_CLK], PLB_clk);
+ /* Setup OPB clock */
+ clk_setup(&cpc->clk_setup[PPC405EP_OPB_CLK], OPB_clk);
+ /* Setup external clock */
+ clk_setup(&cpc->clk_setup[PPC405EP_EBC_CLK], EBC_clk);
+ /* Setup MAL clock */
+ clk_setup(&cpc->clk_setup[PPC405EP_MAL_CLK], MAL_clk);
+ /* Setup PCI clock */
+ clk_setup(&cpc->clk_setup[PPC405EP_PCI_CLK], PCI_clk);
+ /* Setup UART0 clock */
+ clk_setup(&cpc->clk_setup[PPC405EP_UART0_CLK], UART0_clk);
+ /* Setup UART1 clock */
+ clk_setup(&cpc->clk_setup[PPC405EP_UART1_CLK], UART1_clk);
+}
+
+static uint32_t dcr_read_epcpc (void *opaque, int dcrn)
+{
+ ppc405ep_cpc_t *cpc;
+ uint32_t ret;
+
+ cpc = opaque;
+ switch (dcrn) {
+ case PPC405EP_CPC0_BOOT:
+ ret = cpc->boot;
+ break;
+ case PPC405EP_CPC0_EPCTL:
+ ret = cpc->epctl;
+ break;
+ case PPC405EP_CPC0_PLLMR0:
+ ret = cpc->pllmr[0];
+ break;
+ case PPC405EP_CPC0_PLLMR1:
+ ret = cpc->pllmr[1];
+ break;
+ case PPC405EP_CPC0_UCR:
+ ret = cpc->ucr;
+ break;
+ case PPC405EP_CPC0_SRR:
+ ret = cpc->srr;
+ break;
+ case PPC405EP_CPC0_JTAGID:
+ ret = cpc->jtagid;
+ break;
+ case PPC405EP_CPC0_PCI:
+ ret = cpc->pci;
+ break;
+ default:
+ /* Avoid gcc warning */
+ ret = 0;
+ break;
+ }
+
+ return ret;
+}
+
+static void dcr_write_epcpc (void *opaque, int dcrn, uint32_t val)
+{
+ ppc405ep_cpc_t *cpc;
+
+ cpc = opaque;
+ switch (dcrn) {
+ case PPC405EP_CPC0_BOOT:
+ /* Read-only register */
+ break;
+ case PPC405EP_CPC0_EPCTL:
+ /* Don't care for now */
+ cpc->epctl = val & 0xC00000F3;
+ break;
+ case PPC405EP_CPC0_PLLMR0:
+ cpc->pllmr[0] = val & 0x00633333;
+ ppc405ep_compute_clocks(cpc);
+ break;
+ case PPC405EP_CPC0_PLLMR1:
+ cpc->pllmr[1] = val & 0xC0F73FFF;
+ ppc405ep_compute_clocks(cpc);
+ break;
+ case PPC405EP_CPC0_UCR:
+ /* UART control - don't care for now */
+ cpc->ucr = val & 0x003F7F7F;
+ break;
+ case PPC405EP_CPC0_SRR:
+ cpc->srr = val;
+ break;
+ case PPC405EP_CPC0_JTAGID:
+ /* Read-only */
+ break;
+ case PPC405EP_CPC0_PCI:
+ cpc->pci = val;
+ break;
+ }
+}
+
+static void ppc405ep_cpc_reset (void *opaque)
+{
+ ppc405ep_cpc_t *cpc = opaque;
+
+ cpc->boot = 0x00000010; /* Boot from PCI - IIC EEPROM disabled */
+ cpc->epctl = 0x00000000;
+ cpc->pllmr[0] = 0x00011010;
+ cpc->pllmr[1] = 0x40000000;
+ cpc->ucr = 0x00000000;
+ cpc->srr = 0x00040000;
+ cpc->pci = 0x00000000;
+ cpc->er = 0x00000000;
+ cpc->fr = 0x00000000;
+ cpc->sr = 0x00000000;
+ ppc405ep_compute_clocks(cpc);
+}
+
+/* XXX: sysclk should be between 25 and 100 MHz */
+static void ppc405ep_cpc_init (CPUPPCState *env, clk_setup_t clk_setup[8],
+ uint32_t sysclk)
+{
+ ppc405ep_cpc_t *cpc;
+
+ cpc = g_malloc0(sizeof(ppc405ep_cpc_t));
+ memcpy(cpc->clk_setup, clk_setup,
+ PPC405EP_CLK_NB * sizeof(clk_setup_t));
+ cpc->jtagid = 0x20267049;
+ cpc->sysclk = sysclk;
+ qemu_register_reset(&ppc405ep_cpc_reset, cpc);
+ ppc_dcr_register(env, PPC405EP_CPC0_BOOT, cpc,
+ &dcr_read_epcpc, &dcr_write_epcpc);
+ ppc_dcr_register(env, PPC405EP_CPC0_EPCTL, cpc,
+ &dcr_read_epcpc, &dcr_write_epcpc);
+ ppc_dcr_register(env, PPC405EP_CPC0_PLLMR0, cpc,
+ &dcr_read_epcpc, &dcr_write_epcpc);
+ ppc_dcr_register(env, PPC405EP_CPC0_PLLMR1, cpc,
+ &dcr_read_epcpc, &dcr_write_epcpc);
+ ppc_dcr_register(env, PPC405EP_CPC0_UCR, cpc,
+ &dcr_read_epcpc, &dcr_write_epcpc);
+ ppc_dcr_register(env, PPC405EP_CPC0_SRR, cpc,
+ &dcr_read_epcpc, &dcr_write_epcpc);
+ ppc_dcr_register(env, PPC405EP_CPC0_JTAGID, cpc,
+ &dcr_read_epcpc, &dcr_write_epcpc);
+ ppc_dcr_register(env, PPC405EP_CPC0_PCI, cpc,
+ &dcr_read_epcpc, &dcr_write_epcpc);
+#if 0
+ ppc_dcr_register(env, PPC405EP_CPC0_ER, cpc,
+ &dcr_read_epcpc, &dcr_write_epcpc);
+ ppc_dcr_register(env, PPC405EP_CPC0_FR, cpc,
+ &dcr_read_epcpc, &dcr_write_epcpc);
+ ppc_dcr_register(env, PPC405EP_CPC0_SR, cpc,
+ &dcr_read_epcpc, &dcr_write_epcpc);
+#endif
+}
+
+CPUPPCState *ppc405ep_init(MemoryRegion *address_space_mem,
+ MemoryRegion ram_memories[2],
+ hwaddr ram_bases[2],
+ hwaddr ram_sizes[2],
+ uint32_t sysclk, DeviceState **uicdevp,
+ int do_init)
+{
+ clk_setup_t clk_setup[PPC405EP_CLK_NB], tlb_clk_setup;
+ qemu_irq dma_irqs[4], gpt_irqs[5], mal_irqs[4];
+ PowerPCCPU *cpu;
+ CPUPPCState *env;
+ DeviceState *uicdev;
+ SysBusDevice *uicsbd;
+
+ memset(clk_setup, 0, sizeof(clk_setup));
+ /* init CPUs */
+ cpu = ppc4xx_init(POWERPC_CPU_TYPE_NAME("405ep"),
+ &clk_setup[PPC405EP_CPU_CLK],
+ &tlb_clk_setup, sysclk);
+ env = &cpu->env;
+ clk_setup[PPC405EP_CPU_CLK].cb = tlb_clk_setup.cb;
+ clk_setup[PPC405EP_CPU_CLK].opaque = tlb_clk_setup.opaque;
+ /* Internal devices init */
+ /* Memory mapped devices registers */
+ /* PLB arbitrer */
+ ppc4xx_plb_init(env);
+ /* PLB to OPB bridge */
+ ppc4xx_pob_init(env);
+ /* OBP arbitrer */
+ ppc4xx_opba_init(0xef600600);
+ /* Initialize timers */
+ ppc_booke_timers_init(cpu, sysclk, 0);
+ /* Universal interrupt controller */
+ uicdev = qdev_new(TYPE_PPC_UIC);
+ uicsbd = SYS_BUS_DEVICE(uicdev);
+
+ object_property_set_link(OBJECT(uicdev), "cpu", OBJECT(cpu),
+ &error_fatal);
+ sysbus_realize_and_unref(uicsbd, &error_fatal);
+
+ sysbus_connect_irq(uicsbd, PPCUIC_OUTPUT_INT,
+ ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_INT]);
+ sysbus_connect_irq(uicsbd, PPCUIC_OUTPUT_CINT,
+ ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_CINT]);
+
+ *uicdevp = uicdev;
+
+ /* SDRAM controller */
+ /* XXX 405EP has no ECC interrupt */
+ ppc4xx_sdram_init(env, qdev_get_gpio_in(uicdev, 17), 2, ram_memories,
+ ram_bases, ram_sizes, do_init);
+ /* External bus controller */
+ ppc405_ebc_init(env);
+ /* DMA controller */
+ dma_irqs[0] = qdev_get_gpio_in(uicdev, 5);
+ dma_irqs[1] = qdev_get_gpio_in(uicdev, 6);
+ dma_irqs[2] = qdev_get_gpio_in(uicdev, 7);
+ dma_irqs[3] = qdev_get_gpio_in(uicdev, 8);
+ ppc405_dma_init(env, dma_irqs);
+ /* IIC controller */
+ sysbus_create_simple(TYPE_PPC4xx_I2C, 0xef600500,
+ qdev_get_gpio_in(uicdev, 2));
+ /* GPIO */
+ ppc405_gpio_init(0xef600700);
+ /* Serial ports */
+ if (serial_hd(0) != NULL) {
+ serial_mm_init(address_space_mem, 0xef600300, 0,
+ qdev_get_gpio_in(uicdev, 0),
+ PPC_SERIAL_MM_BAUDBASE, serial_hd(0),
+ DEVICE_BIG_ENDIAN);
+ }
+ if (serial_hd(1) != NULL) {
+ serial_mm_init(address_space_mem, 0xef600400, 0,
+ qdev_get_gpio_in(uicdev, 1),
+ PPC_SERIAL_MM_BAUDBASE, serial_hd(1),
+ DEVICE_BIG_ENDIAN);
+ }
+ /* OCM */
+ ppc405_ocm_init(env);
+ /* GPT */
+ gpt_irqs[0] = qdev_get_gpio_in(uicdev, 19);
+ gpt_irqs[1] = qdev_get_gpio_in(uicdev, 20);
+ gpt_irqs[2] = qdev_get_gpio_in(uicdev, 21);
+ gpt_irqs[3] = qdev_get_gpio_in(uicdev, 22);
+ gpt_irqs[4] = qdev_get_gpio_in(uicdev, 23);
+ ppc4xx_gpt_init(0xef600000, gpt_irqs);
+ /* PCI */
+ /* Uses UIC IRQs 3, 16, 18 */
+ /* MAL */
+ mal_irqs[0] = qdev_get_gpio_in(uicdev, 11);
+ mal_irqs[1] = qdev_get_gpio_in(uicdev, 12);
+ mal_irqs[2] = qdev_get_gpio_in(uicdev, 13);
+ mal_irqs[3] = qdev_get_gpio_in(uicdev, 14);
+ ppc4xx_mal_init(env, 4, 2, mal_irqs);
+ /* Ethernet */
+ /* Uses UIC IRQs 9, 15, 17 */
+ /* CPU control */
+ ppc405ep_cpc_init(env, clk_setup, sysclk);
+
+ return env;
+}
diff --git a/hw/ppc/ppc440.h b/hw/ppc/ppc440.h
new file mode 100644
index 000000000..7cef93612
--- /dev/null
+++ b/hw/ppc/ppc440.h
@@ -0,0 +1,27 @@
+/*
+ * QEMU PowerPC 440 shared definitions
+ *
+ * Copyright (c) 2012 François Revol
+ * Copyright (c) 2016-2018 BALATON Zoltan
+ *
+ * This work is licensed under the GNU GPL license version 2 or later.
+ *
+ */
+
+#ifndef PPC440_H
+#define PPC440_H
+
+#include "hw/ppc/ppc.h"
+
+void ppc4xx_l2sram_init(CPUPPCState *env);
+void ppc4xx_cpr_init(CPUPPCState *env);
+void ppc4xx_sdr_init(CPUPPCState *env);
+void ppc440_sdram_init(CPUPPCState *env, int nbanks,
+ MemoryRegion *ram_memories,
+ hwaddr *ram_bases, hwaddr *ram_sizes,
+ int do_init);
+void ppc4xx_ahb_init(CPUPPCState *env);
+void ppc4xx_dma_init(CPUPPCState *env, int dcr_base);
+void ppc460ex_pcie_init(CPUPPCState *env);
+
+#endif /* PPC440_H */
diff --git a/hw/ppc/ppc440_bamboo.c b/hw/ppc/ppc440_bamboo.c
new file mode 100644
index 000000000..7fb620b9a
--- /dev/null
+++ b/hw/ppc/ppc440_bamboo.c
@@ -0,0 +1,307 @@
+/*
+ * QEMU PowerPC 440 Bamboo board emulation
+ *
+ * Copyright 2007 IBM Corporation.
+ * Authors:
+ * Jerone Young <jyoung5@us.ibm.com>
+ * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
+ * Hollis Blanchard <hollisb@us.ibm.com>
+ *
+ * This work is licensed under the GNU GPL license version 2 or later.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include "qemu/error-report.h"
+#include "qemu-common.h"
+#include "qemu/datadir.h"
+#include "qemu/error-report.h"
+#include "net/net.h"
+#include "hw/pci/pci.h"
+#include "hw/boards.h"
+#include "sysemu/kvm.h"
+#include "kvm_ppc.h"
+#include "sysemu/device_tree.h"
+#include "hw/loader.h"
+#include "elf.h"
+#include "hw/char/serial.h"
+#include "hw/ppc/ppc.h"
+#include "ppc405.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/reset.h"
+#include "hw/sysbus.h"
+#include "hw/intc/ppc-uic.h"
+#include "hw/qdev-properties.h"
+#include "qapi/error.h"
+
+#define BINARY_DEVICE_TREE_FILE "bamboo.dtb"
+
+/* from u-boot */
+#define KERNEL_ADDR 0x1000000
+#define FDT_ADDR 0x1800000
+#define RAMDISK_ADDR 0x1900000
+
+#define PPC440EP_PCI_CONFIG 0xeec00000
+#define PPC440EP_PCI_INTACK 0xeed00000
+#define PPC440EP_PCI_SPECIAL 0xeed00000
+#define PPC440EP_PCI_REGS 0xef400000
+#define PPC440EP_PCI_IO 0xe8000000
+#define PPC440EP_PCI_IOLEN 0x00010000
+
+#define PPC440EP_SDRAM_NR_BANKS 4
+
+static const ram_addr_t ppc440ep_sdram_bank_sizes[] = {
+ 256 * MiB, 128 * MiB, 64 * MiB, 32 * MiB, 16 * MiB, 8 * MiB, 0
+};
+
+static hwaddr entry;
+
+static int bamboo_load_device_tree(hwaddr addr,
+ uint32_t ramsize,
+ hwaddr initrd_base,
+ hwaddr initrd_size,
+ const char *kernel_cmdline)
+{
+ int ret = -1;
+ uint32_t mem_reg_property[] = { 0, 0, cpu_to_be32(ramsize) };
+ char *filename;
+ int fdt_size;
+ void *fdt;
+ uint32_t tb_freq = 400000000;
+ uint32_t clock_freq = 400000000;
+
+ filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, BINARY_DEVICE_TREE_FILE);
+ if (!filename) {
+ return -1;
+ }
+ fdt = load_device_tree(filename, &fdt_size);
+ g_free(filename);
+ if (fdt == NULL) {
+ return -1;
+ }
+
+ /* Manipulate device tree in memory. */
+
+ ret = qemu_fdt_setprop(fdt, "/memory", "reg", mem_reg_property,
+ sizeof(mem_reg_property));
+ if (ret < 0)
+ fprintf(stderr, "couldn't set /memory/reg\n");
+
+ ret = qemu_fdt_setprop_cell(fdt, "/chosen", "linux,initrd-start",
+ initrd_base);
+ if (ret < 0)
+ fprintf(stderr, "couldn't set /chosen/linux,initrd-start\n");
+
+ ret = qemu_fdt_setprop_cell(fdt, "/chosen", "linux,initrd-end",
+ (initrd_base + initrd_size));
+ if (ret < 0)
+ fprintf(stderr, "couldn't set /chosen/linux,initrd-end\n");
+
+ ret = qemu_fdt_setprop_string(fdt, "/chosen", "bootargs",
+ kernel_cmdline);
+ if (ret < 0)
+ fprintf(stderr, "couldn't set /chosen/bootargs\n");
+
+ /* Copy data from the host device tree into the guest. Since the guest can
+ * directly access the timebase without host involvement, we must expose
+ * the correct frequencies. */
+ if (kvm_enabled()) {
+ tb_freq = kvmppc_get_tbfreq();
+ clock_freq = kvmppc_get_clockfreq();
+ }
+
+ qemu_fdt_setprop_cell(fdt, "/cpus/cpu@0", "clock-frequency",
+ clock_freq);
+ qemu_fdt_setprop_cell(fdt, "/cpus/cpu@0", "timebase-frequency",
+ tb_freq);
+
+ rom_add_blob_fixed(BINARY_DEVICE_TREE_FILE, fdt, fdt_size, addr);
+ g_free(fdt);
+ return 0;
+}
+
+/* Create reset TLB entries for BookE, spanning the 32bit addr space. */
+static void mmubooke_create_initial_mapping(CPUPPCState *env,
+ target_ulong va,
+ hwaddr pa)
+{
+ ppcemb_tlb_t *tlb = &env->tlb.tlbe[0];
+
+ tlb->attr = 0;
+ tlb->prot = PAGE_VALID | ((PAGE_READ | PAGE_WRITE | PAGE_EXEC) << 4);
+ tlb->size = 1U << 31; /* up to 0x80000000 */
+ tlb->EPN = va & TARGET_PAGE_MASK;
+ tlb->RPN = pa & TARGET_PAGE_MASK;
+ tlb->PID = 0;
+
+ tlb = &env->tlb.tlbe[1];
+ tlb->attr = 0;
+ tlb->prot = PAGE_VALID | ((PAGE_READ | PAGE_WRITE | PAGE_EXEC) << 4);
+ tlb->size = 1U << 31; /* up to 0xffffffff */
+ tlb->EPN = 0x80000000 & TARGET_PAGE_MASK;
+ tlb->RPN = 0x80000000 & TARGET_PAGE_MASK;
+ tlb->PID = 0;
+}
+
+static void main_cpu_reset(void *opaque)
+{
+ PowerPCCPU *cpu = opaque;
+ CPUPPCState *env = &cpu->env;
+
+ cpu_reset(CPU(cpu));
+ env->gpr[1] = (16 * MiB) - 8;
+ env->gpr[3] = FDT_ADDR;
+ env->nip = entry;
+
+ /* Create a mapping for the kernel. */
+ mmubooke_create_initial_mapping(env, 0, 0);
+}
+
+static void bamboo_init(MachineState *machine)
+{
+ const char *kernel_filename = machine->kernel_filename;
+ const char *kernel_cmdline = machine->kernel_cmdline;
+ const char *initrd_filename = machine->initrd_filename;
+ unsigned int pci_irq_nrs[4] = { 28, 27, 26, 25 };
+ MemoryRegion *address_space_mem = get_system_memory();
+ MemoryRegion *isa = g_new(MemoryRegion, 1);
+ MemoryRegion *ram_memories = g_new(MemoryRegion, PPC440EP_SDRAM_NR_BANKS);
+ hwaddr ram_bases[PPC440EP_SDRAM_NR_BANKS];
+ hwaddr ram_sizes[PPC440EP_SDRAM_NR_BANKS];
+ PCIBus *pcibus;
+ PowerPCCPU *cpu;
+ CPUPPCState *env;
+ target_long initrd_size = 0;
+ DeviceState *dev;
+ DeviceState *uicdev;
+ SysBusDevice *uicsbd;
+ int success;
+ int i;
+
+ cpu = POWERPC_CPU(cpu_create(machine->cpu_type));
+ env = &cpu->env;
+
+ if (env->mmu_model != POWERPC_MMU_BOOKE) {
+ error_report("MMU model %i not supported by this machine",
+ env->mmu_model);
+ exit(1);
+ }
+
+ qemu_register_reset(main_cpu_reset, cpu);
+ ppc_booke_timers_init(cpu, 400000000, 0);
+ ppc_dcr_init(env, NULL, NULL);
+
+ /* interrupt controller */
+ uicdev = qdev_new(TYPE_PPC_UIC);
+ uicsbd = SYS_BUS_DEVICE(uicdev);
+
+ object_property_set_link(OBJECT(uicdev), "cpu", OBJECT(cpu),
+ &error_fatal);
+ sysbus_realize_and_unref(uicsbd, &error_fatal);
+
+ sysbus_connect_irq(uicsbd, PPCUIC_OUTPUT_INT,
+ ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_INT]);
+ sysbus_connect_irq(uicsbd, PPCUIC_OUTPUT_CINT,
+ ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_CINT]);
+
+ /* SDRAM controller */
+ memset(ram_bases, 0, sizeof(ram_bases));
+ memset(ram_sizes, 0, sizeof(ram_sizes));
+ ppc4xx_sdram_banks(machine->ram, PPC440EP_SDRAM_NR_BANKS, ram_memories,
+ ram_bases, ram_sizes, ppc440ep_sdram_bank_sizes);
+ /* XXX 440EP's ECC interrupts are on UIC1, but we've only created UIC0. */
+ ppc4xx_sdram_init(env,
+ qdev_get_gpio_in(uicdev, 14),
+ PPC440EP_SDRAM_NR_BANKS, ram_memories,
+ ram_bases, ram_sizes, 1);
+
+ /* PCI */
+ dev = sysbus_create_varargs(TYPE_PPC4xx_PCI_HOST_BRIDGE,
+ PPC440EP_PCI_CONFIG,
+ qdev_get_gpio_in(uicdev, pci_irq_nrs[0]),
+ qdev_get_gpio_in(uicdev, pci_irq_nrs[1]),
+ qdev_get_gpio_in(uicdev, pci_irq_nrs[2]),
+ qdev_get_gpio_in(uicdev, pci_irq_nrs[3]),
+ NULL);
+ pcibus = (PCIBus *)qdev_get_child_bus(dev, "pci.0");
+ if (!pcibus) {
+ error_report("couldn't create PCI controller");
+ exit(1);
+ }
+
+ memory_region_init_alias(isa, NULL, "isa_mmio",
+ get_system_io(), 0, PPC440EP_PCI_IOLEN);
+ memory_region_add_subregion(get_system_memory(), PPC440EP_PCI_IO, isa);
+
+ if (serial_hd(0) != NULL) {
+ serial_mm_init(address_space_mem, 0xef600300, 0,
+ qdev_get_gpio_in(uicdev, 0),
+ PPC_SERIAL_MM_BAUDBASE, serial_hd(0),
+ DEVICE_BIG_ENDIAN);
+ }
+ if (serial_hd(1) != NULL) {
+ serial_mm_init(address_space_mem, 0xef600400, 0,
+ qdev_get_gpio_in(uicdev, 1),
+ PPC_SERIAL_MM_BAUDBASE, serial_hd(1),
+ DEVICE_BIG_ENDIAN);
+ }
+
+ if (pcibus) {
+ /* Register network interfaces. */
+ for (i = 0; i < nb_nics; i++) {
+ /* There are no PCI NICs on the Bamboo board, but there are
+ * PCI slots, so we can pick whatever default model we want. */
+ pci_nic_init_nofail(&nd_table[i], pcibus, "e1000", NULL);
+ }
+ }
+
+ /* Load kernel. */
+ if (kernel_filename) {
+ hwaddr loadaddr = LOAD_UIMAGE_LOADADDR_INVALID;
+ success = load_uimage(kernel_filename, &entry, &loadaddr, NULL,
+ NULL, NULL);
+ if (success < 0) {
+ uint64_t elf_entry;
+ success = load_elf(kernel_filename, NULL, NULL, NULL, &elf_entry,
+ NULL, NULL, NULL, 1, PPC_ELF_MACHINE, 0, 0);
+ entry = elf_entry;
+ }
+ /* XXX try again as binary */
+ if (success < 0) {
+ error_report("could not load kernel '%s'", kernel_filename);
+ exit(1);
+ }
+ }
+
+ /* Load initrd. */
+ if (initrd_filename) {
+ initrd_size = load_image_targphys(initrd_filename, RAMDISK_ADDR,
+ machine->ram_size - RAMDISK_ADDR);
+
+ if (initrd_size < 0) {
+ error_report("could not load ram disk '%s' at %x",
+ initrd_filename, RAMDISK_ADDR);
+ exit(1);
+ }
+ }
+
+ /* If we're loading a kernel directly, we must load the device tree too. */
+ if (kernel_filename) {
+ if (bamboo_load_device_tree(FDT_ADDR, machine->ram_size, RAMDISK_ADDR,
+ initrd_size, kernel_cmdline) < 0) {
+ error_report("couldn't load device tree");
+ exit(1);
+ }
+ }
+}
+
+static void bamboo_machine_init(MachineClass *mc)
+{
+ mc->desc = "bamboo";
+ mc->init = bamboo_init;
+ mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("440epb");
+ mc->default_ram_id = "ppc4xx.sdram";
+}
+
+DEFINE_MACHINE("bamboo", bamboo_machine_init)
diff --git a/hw/ppc/ppc440_pcix.c b/hw/ppc/ppc440_pcix.c
new file mode 100644
index 000000000..788d25514
--- /dev/null
+++ b/hw/ppc/ppc440_pcix.c
@@ -0,0 +1,538 @@
+/*
+ * Emulation of the ibm,plb-pcix PCI controller
+ * This is found in some 440 SoCs e.g. the 460EX.
+ *
+ * Copyright (c) 2016-2018 BALATON Zoltan
+ *
+ * Derived from ppc4xx_pci.c and pci-host/ppce500.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "qemu/log.h"
+#include "qemu/module.h"
+#include "hw/irq.h"
+#include "hw/ppc/ppc.h"
+#include "hw/ppc/ppc4xx.h"
+#include "hw/pci/pci.h"
+#include "hw/pci/pci_host.h"
+#include "trace.h"
+#include "qom/object.h"
+
+struct PLBOutMap {
+ uint64_t la;
+ uint64_t pcia;
+ uint32_t sa;
+ MemoryRegion mr;
+};
+
+struct PLBInMap {
+ uint64_t sa;
+ uint64_t la;
+ MemoryRegion mr;
+};
+
+#define TYPE_PPC440_PCIX_HOST_BRIDGE "ppc440-pcix-host"
+OBJECT_DECLARE_SIMPLE_TYPE(PPC440PCIXState, PPC440_PCIX_HOST_BRIDGE)
+
+#define PPC440_PCIX_NR_POMS 3
+#define PPC440_PCIX_NR_PIMS 3
+
+struct PPC440PCIXState {
+ PCIHostState parent_obj;
+
+ PCIDevice *dev;
+ struct PLBOutMap pom[PPC440_PCIX_NR_POMS];
+ struct PLBInMap pim[PPC440_PCIX_NR_PIMS];
+ uint32_t sts;
+ qemu_irq irq;
+ AddressSpace bm_as;
+ MemoryRegion bm;
+
+ MemoryRegion container;
+ MemoryRegion iomem;
+ MemoryRegion busmem;
+};
+
+#define PPC440_REG_BASE 0x80000
+#define PPC440_REG_SIZE 0xff
+
+#define PCIC0_CFGADDR 0x0
+#define PCIC0_CFGDATA 0x4
+
+#define PCIX0_POM0LAL 0x68
+#define PCIX0_POM0LAH 0x6c
+#define PCIX0_POM0SA 0x70
+#define PCIX0_POM0PCIAL 0x74
+#define PCIX0_POM0PCIAH 0x78
+#define PCIX0_POM1LAL 0x7c
+#define PCIX0_POM1LAH 0x80
+#define PCIX0_POM1SA 0x84
+#define PCIX0_POM1PCIAL 0x88
+#define PCIX0_POM1PCIAH 0x8c
+#define PCIX0_POM2SA 0x90
+
+#define PCIX0_PIM0SAL 0x98
+#define PCIX0_PIM0LAL 0x9c
+#define PCIX0_PIM0LAH 0xa0
+#define PCIX0_PIM1SA 0xa4
+#define PCIX0_PIM1LAL 0xa8
+#define PCIX0_PIM1LAH 0xac
+#define PCIX0_PIM2SAL 0xb0
+#define PCIX0_PIM2LAL 0xb4
+#define PCIX0_PIM2LAH 0xb8
+#define PCIX0_PIM0SAH 0xf8
+#define PCIX0_PIM2SAH 0xfc
+
+#define PCIX0_STS 0xe0
+
+#define PCI_ALL_SIZE (PPC440_REG_BASE + PPC440_REG_SIZE)
+
+static void ppc440_pcix_clear_region(MemoryRegion *parent,
+ MemoryRegion *mem)
+{
+ if (memory_region_is_mapped(mem)) {
+ memory_region_del_subregion(parent, mem);
+ object_unparent(OBJECT(mem));
+ }
+}
+
+/* DMA mapping */
+static void ppc440_pcix_update_pim(PPC440PCIXState *s, int idx)
+{
+ MemoryRegion *mem = &s->pim[idx].mr;
+ char *name;
+ uint64_t size;
+
+ /* Before we modify anything, unmap and destroy the region */
+ ppc440_pcix_clear_region(&s->bm, mem);
+
+ if (!(s->pim[idx].sa & 1)) {
+ /* Not enabled, nothing to do */
+ return;
+ }
+
+ name = g_strdup_printf("PCI Inbound Window %d", idx);
+ size = ~(s->pim[idx].sa & ~7ULL) + 1;
+ memory_region_init_alias(mem, OBJECT(s), name, get_system_memory(),
+ s->pim[idx].la, size);
+ memory_region_add_subregion_overlap(&s->bm, 0, mem, -1);
+ g_free(name);
+
+ trace_ppc440_pcix_update_pim(idx, size, s->pim[idx].la);
+}
+
+/* BAR mapping */
+static void ppc440_pcix_update_pom(PPC440PCIXState *s, int idx)
+{
+ MemoryRegion *mem = &s->pom[idx].mr;
+ MemoryRegion *address_space_mem = get_system_memory();
+ char *name;
+ uint32_t size;
+
+ /* Before we modify anything, unmap and destroy the region */
+ ppc440_pcix_clear_region(address_space_mem, mem);
+
+ if (!(s->pom[idx].sa & 1)) {
+ /* Not enabled, nothing to do */
+ return;
+ }
+
+ name = g_strdup_printf("PCI Outbound Window %d", idx);
+ size = ~(s->pom[idx].sa & 0xfffffffe) + 1;
+ if (!size) {
+ size = 0xffffffff;
+ }
+ memory_region_init_alias(mem, OBJECT(s), name, &s->busmem,
+ s->pom[idx].pcia, size);
+ memory_region_add_subregion(address_space_mem, s->pom[idx].la, mem);
+ g_free(name);
+
+ trace_ppc440_pcix_update_pom(idx, size, s->pom[idx].la, s->pom[idx].pcia);
+}
+
+static void ppc440_pcix_reg_write4(void *opaque, hwaddr addr,
+ uint64_t val, unsigned size)
+{
+ struct PPC440PCIXState *s = opaque;
+
+ trace_ppc440_pcix_reg_write(addr, val, size);
+ switch (addr) {
+ case PCI_VENDOR_ID ... PCI_MAX_LAT:
+ stl_le_p(s->dev->config + addr, val);
+ break;
+
+ case PCIX0_POM0LAL:
+ s->pom[0].la &= 0xffffffff00000000ULL;
+ s->pom[0].la |= val;
+ ppc440_pcix_update_pom(s, 0);
+ break;
+ case PCIX0_POM0LAH:
+ s->pom[0].la &= 0xffffffffULL;
+ s->pom[0].la |= val << 32;
+ ppc440_pcix_update_pom(s, 0);
+ break;
+ case PCIX0_POM0SA:
+ s->pom[0].sa = val;
+ ppc440_pcix_update_pom(s, 0);
+ break;
+ case PCIX0_POM0PCIAL:
+ s->pom[0].pcia &= 0xffffffff00000000ULL;
+ s->pom[0].pcia |= val;
+ ppc440_pcix_update_pom(s, 0);
+ break;
+ case PCIX0_POM0PCIAH:
+ s->pom[0].pcia &= 0xffffffffULL;
+ s->pom[0].pcia |= val << 32;
+ ppc440_pcix_update_pom(s, 0);
+ break;
+ case PCIX0_POM1LAL:
+ s->pom[1].la &= 0xffffffff00000000ULL;
+ s->pom[1].la |= val;
+ ppc440_pcix_update_pom(s, 1);
+ break;
+ case PCIX0_POM1LAH:
+ s->pom[1].la &= 0xffffffffULL;
+ s->pom[1].la |= val << 32;
+ ppc440_pcix_update_pom(s, 1);
+ break;
+ case PCIX0_POM1SA:
+ s->pom[1].sa = val;
+ ppc440_pcix_update_pom(s, 1);
+ break;
+ case PCIX0_POM1PCIAL:
+ s->pom[1].pcia &= 0xffffffff00000000ULL;
+ s->pom[1].pcia |= val;
+ ppc440_pcix_update_pom(s, 1);
+ break;
+ case PCIX0_POM1PCIAH:
+ s->pom[1].pcia &= 0xffffffffULL;
+ s->pom[1].pcia |= val << 32;
+ ppc440_pcix_update_pom(s, 1);
+ break;
+ case PCIX0_POM2SA:
+ s->pom[2].sa = val;
+ break;
+
+ case PCIX0_PIM0SAL:
+ s->pim[0].sa &= 0xffffffff00000000ULL;
+ s->pim[0].sa |= val;
+ ppc440_pcix_update_pim(s, 0);
+ break;
+ case PCIX0_PIM0LAL:
+ s->pim[0].la &= 0xffffffff00000000ULL;
+ s->pim[0].la |= val;
+ ppc440_pcix_update_pim(s, 0);
+ break;
+ case PCIX0_PIM0LAH:
+ s->pim[0].la &= 0xffffffffULL;
+ s->pim[0].la |= val << 32;
+ ppc440_pcix_update_pim(s, 0);
+ break;
+ case PCIX0_PIM1SA:
+ s->pim[1].sa = val;
+ ppc440_pcix_update_pim(s, 1);
+ break;
+ case PCIX0_PIM1LAL:
+ s->pim[1].la &= 0xffffffff00000000ULL;
+ s->pim[1].la |= val;
+ ppc440_pcix_update_pim(s, 1);
+ break;
+ case PCIX0_PIM1LAH:
+ s->pim[1].la &= 0xffffffffULL;
+ s->pim[1].la |= val << 32;
+ ppc440_pcix_update_pim(s, 1);
+ break;
+ case PCIX0_PIM2SAL:
+ s->pim[2].sa &= 0xffffffff00000000ULL;
+ s->pim[2].sa |= val;
+ ppc440_pcix_update_pim(s, 2);
+ break;
+ case PCIX0_PIM2LAL:
+ s->pim[2].la &= 0xffffffff00000000ULL;
+ s->pim[2].la |= val;
+ ppc440_pcix_update_pim(s, 2);
+ break;
+ case PCIX0_PIM2LAH:
+ s->pim[2].la &= 0xffffffffULL;
+ s->pim[2].la |= val << 32;
+ ppc440_pcix_update_pim(s, 2);
+ break;
+
+ case PCIX0_STS:
+ s->sts = val;
+ break;
+
+ case PCIX0_PIM0SAH:
+ s->pim[0].sa &= 0xffffffffULL;
+ s->pim[0].sa |= val << 32;
+ ppc440_pcix_update_pim(s, 0);
+ break;
+ case PCIX0_PIM2SAH:
+ s->pim[2].sa &= 0xffffffffULL;
+ s->pim[2].sa |= val << 32;
+ ppc440_pcix_update_pim(s, 2);
+ break;
+
+ default:
+ qemu_log_mask(LOG_UNIMP,
+ "%s: unhandled PCI internal register 0x%"HWADDR_PRIx"\n",
+ __func__, addr);
+ break;
+ }
+}
+
+static uint64_t ppc440_pcix_reg_read4(void *opaque, hwaddr addr,
+ unsigned size)
+{
+ struct PPC440PCIXState *s = opaque;
+ uint32_t val;
+
+ switch (addr) {
+ case PCI_VENDOR_ID ... PCI_MAX_LAT:
+ val = ldl_le_p(s->dev->config + addr);
+ break;
+
+ case PCIX0_POM0LAL:
+ val = s->pom[0].la;
+ break;
+ case PCIX0_POM0LAH:
+ val = s->pom[0].la >> 32;
+ break;
+ case PCIX0_POM0SA:
+ val = s->pom[0].sa;
+ break;
+ case PCIX0_POM0PCIAL:
+ val = s->pom[0].pcia;
+ break;
+ case PCIX0_POM0PCIAH:
+ val = s->pom[0].pcia >> 32;
+ break;
+ case PCIX0_POM1LAL:
+ val = s->pom[1].la;
+ break;
+ case PCIX0_POM1LAH:
+ val = s->pom[1].la >> 32;
+ break;
+ case PCIX0_POM1SA:
+ val = s->pom[1].sa;
+ break;
+ case PCIX0_POM1PCIAL:
+ val = s->pom[1].pcia;
+ break;
+ case PCIX0_POM1PCIAH:
+ val = s->pom[1].pcia >> 32;
+ break;
+ case PCIX0_POM2SA:
+ val = s->pom[2].sa;
+ break;
+
+ case PCIX0_PIM0SAL:
+ val = s->pim[0].sa;
+ break;
+ case PCIX0_PIM0LAL:
+ val = s->pim[0].la;
+ break;
+ case PCIX0_PIM0LAH:
+ val = s->pim[0].la >> 32;
+ break;
+ case PCIX0_PIM1SA:
+ val = s->pim[1].sa;
+ break;
+ case PCIX0_PIM1LAL:
+ val = s->pim[1].la;
+ break;
+ case PCIX0_PIM1LAH:
+ val = s->pim[1].la >> 32;
+ break;
+ case PCIX0_PIM2SAL:
+ val = s->pim[2].sa;
+ break;
+ case PCIX0_PIM2LAL:
+ val = s->pim[2].la;
+ break;
+ case PCIX0_PIM2LAH:
+ val = s->pim[2].la >> 32;
+ break;
+
+ case PCIX0_STS:
+ val = s->sts;
+ break;
+
+ case PCIX0_PIM0SAH:
+ val = s->pim[0].sa >> 32;
+ break;
+ case PCIX0_PIM2SAH:
+ val = s->pim[2].sa >> 32;
+ break;
+
+ default:
+ qemu_log_mask(LOG_UNIMP,
+ "%s: invalid PCI internal register 0x%" HWADDR_PRIx "\n",
+ __func__, addr);
+ val = 0;
+ }
+
+ trace_ppc440_pcix_reg_read(addr, val);
+ return val;
+}
+
+static const MemoryRegionOps pci_reg_ops = {
+ .read = ppc440_pcix_reg_read4,
+ .write = ppc440_pcix_reg_write4,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static void ppc440_pcix_reset(DeviceState *dev)
+{
+ struct PPC440PCIXState *s = PPC440_PCIX_HOST_BRIDGE(dev);
+ int i;
+
+ for (i = 0; i < PPC440_PCIX_NR_POMS; i++) {
+ ppc440_pcix_clear_region(get_system_memory(), &s->pom[i].mr);
+ }
+ for (i = 0; i < PPC440_PCIX_NR_PIMS; i++) {
+ ppc440_pcix_clear_region(&s->bm, &s->pim[i].mr);
+ }
+ memset(s->pom, 0, sizeof(s->pom));
+ memset(s->pim, 0, sizeof(s->pim));
+ for (i = 0; i < PPC440_PCIX_NR_PIMS; i++) {
+ s->pim[i].sa = 0xffffffff00000000ULL;
+ }
+ s->sts = 0;
+}
+
+/*
+ * All four IRQ[ABCD] pins from all slots are tied to a single board
+ * IRQ, so our mapping function here maps everything to IRQ 0.
+ * The code in pci_change_irq_level() tracks the number of times
+ * the mapped IRQ is asserted and deasserted, so if multiple devices
+ * assert an IRQ at the same time the behaviour is correct.
+ *
+ * This may need further refactoring for boards that use multiple IRQ lines.
+ */
+static int ppc440_pcix_map_irq(PCIDevice *pci_dev, int irq_num)
+{
+ trace_ppc440_pcix_map_irq(pci_dev->devfn, irq_num, 0);
+ return 0;
+}
+
+static void ppc440_pcix_set_irq(void *opaque, int irq_num, int level)
+{
+ qemu_irq *pci_irq = opaque;
+
+ trace_ppc440_pcix_set_irq(irq_num);
+ if (irq_num < 0) {
+ error_report("%s: PCI irq %d", __func__, irq_num);
+ return;
+ }
+ qemu_set_irq(*pci_irq, level);
+}
+
+static AddressSpace *ppc440_pcix_set_iommu(PCIBus *b, void *opaque, int devfn)
+{
+ PPC440PCIXState *s = opaque;
+
+ return &s->bm_as;
+}
+
+/*
+ * Some guests on sam460ex write all kinds of garbage here such as
+ * missing enable bit and low bits set and still expect this to work
+ * (apparently it does on real hardware because these boot there) so
+ * we have to override these ops here and fix it up
+ */
+static void pci_host_config_write(void *opaque, hwaddr addr,
+ uint64_t val, unsigned len)
+{
+ PCIHostState *s = opaque;
+
+ if (addr != 0 || len != 4) {
+ return;
+ }
+ s->config_reg = (val & 0xfffffffcULL) | (1UL << 31);
+}
+
+static uint64_t pci_host_config_read(void *opaque, hwaddr addr,
+ unsigned len)
+{
+ PCIHostState *s = opaque;
+ uint32_t val = s->config_reg;
+
+ return val;
+}
+
+const MemoryRegionOps ppc440_pcix_host_conf_ops = {
+ .read = pci_host_config_read,
+ .write = pci_host_config_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static void ppc440_pcix_realize(DeviceState *dev, Error **errp)
+{
+ SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
+ PPC440PCIXState *s;
+ PCIHostState *h;
+
+ h = PCI_HOST_BRIDGE(dev);
+ s = PPC440_PCIX_HOST_BRIDGE(dev);
+
+ sysbus_init_irq(sbd, &s->irq);
+ memory_region_init(&s->busmem, OBJECT(dev), "pci bus memory", UINT64_MAX);
+ h->bus = pci_register_root_bus(dev, NULL, ppc440_pcix_set_irq,
+ ppc440_pcix_map_irq, &s->irq, &s->busmem,
+ get_system_io(), PCI_DEVFN(0, 0), 1, TYPE_PCI_BUS);
+
+ s->dev = pci_create_simple(h->bus, PCI_DEVFN(0, 0), "ppc4xx-host-bridge");
+
+ memory_region_init(&s->bm, OBJECT(s), "bm-ppc440-pcix", UINT64_MAX);
+ memory_region_add_subregion(&s->bm, 0x0, &s->busmem);
+ address_space_init(&s->bm_as, &s->bm, "pci-bm");
+ pci_setup_iommu(h->bus, ppc440_pcix_set_iommu, s);
+
+ memory_region_init(&s->container, OBJECT(s), "pci-container", PCI_ALL_SIZE);
+ memory_region_init_io(&h->conf_mem, OBJECT(s), &ppc440_pcix_host_conf_ops,
+ h, "pci-conf-idx", 4);
+ memory_region_init_io(&h->data_mem, OBJECT(s), &pci_host_data_le_ops,
+ h, "pci-conf-data", 4);
+ memory_region_init_io(&s->iomem, OBJECT(s), &pci_reg_ops, s,
+ "pci.reg", PPC440_REG_SIZE);
+ memory_region_add_subregion(&s->container, PCIC0_CFGADDR, &h->conf_mem);
+ memory_region_add_subregion(&s->container, PCIC0_CFGDATA, &h->data_mem);
+ memory_region_add_subregion(&s->container, PPC440_REG_BASE, &s->iomem);
+ sysbus_init_mmio(sbd, &s->container);
+}
+
+static void ppc440_pcix_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ dc->realize = ppc440_pcix_realize;
+ dc->reset = ppc440_pcix_reset;
+}
+
+static const TypeInfo ppc440_pcix_info = {
+ .name = TYPE_PPC440_PCIX_HOST_BRIDGE,
+ .parent = TYPE_PCI_HOST_BRIDGE,
+ .instance_size = sizeof(PPC440PCIXState),
+ .class_init = ppc440_pcix_class_init,
+};
+
+static void ppc440_pcix_register_types(void)
+{
+ type_register_static(&ppc440_pcix_info);
+}
+
+type_init(ppc440_pcix_register_types)
diff --git a/hw/ppc/ppc440_uc.c b/hw/ppc/ppc440_uc.c
new file mode 100644
index 000000000..993e3ba95
--- /dev/null
+++ b/hw/ppc/ppc440_uc.c
@@ -0,0 +1,1377 @@
+/*
+ * QEMU PowerPC 440 embedded processors emulation
+ *
+ * Copyright (c) 2012 François Revol
+ * Copyright (c) 2016-2019 BALATON Zoltan
+ *
+ * This work is licensed under the GNU GPL license version 2 or later.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "qemu/log.h"
+#include "qemu/module.h"
+#include "hw/irq.h"
+#include "exec/memory.h"
+#include "hw/ppc/ppc.h"
+#include "hw/qdev-properties.h"
+#include "hw/pci/pci.h"
+#include "sysemu/block-backend.h"
+#include "sysemu/reset.h"
+#include "ppc440.h"
+#include "qom/object.h"
+
+/*****************************************************************************/
+/* L2 Cache as SRAM */
+/* FIXME:fix names */
+enum {
+ DCR_L2CACHE_BASE = 0x30,
+ DCR_L2CACHE_CFG = DCR_L2CACHE_BASE,
+ DCR_L2CACHE_CMD,
+ DCR_L2CACHE_ADDR,
+ DCR_L2CACHE_DATA,
+ DCR_L2CACHE_STAT,
+ DCR_L2CACHE_CVER,
+ DCR_L2CACHE_SNP0,
+ DCR_L2CACHE_SNP1,
+ DCR_L2CACHE_END = DCR_L2CACHE_SNP1,
+};
+
+/* base is 460ex-specific, cf. U-Boot, ppc4xx-isram.h */
+enum {
+ DCR_ISRAM0_BASE = 0x20,
+ DCR_ISRAM0_SB0CR = DCR_ISRAM0_BASE,
+ DCR_ISRAM0_SB1CR,
+ DCR_ISRAM0_SB2CR,
+ DCR_ISRAM0_SB3CR,
+ DCR_ISRAM0_BEAR,
+ DCR_ISRAM0_BESR0,
+ DCR_ISRAM0_BESR1,
+ DCR_ISRAM0_PMEG,
+ DCR_ISRAM0_CID,
+ DCR_ISRAM0_REVID,
+ DCR_ISRAM0_DPC,
+ DCR_ISRAM0_END = DCR_ISRAM0_DPC
+};
+
+enum {
+ DCR_ISRAM1_BASE = 0xb0,
+ DCR_ISRAM1_SB0CR = DCR_ISRAM1_BASE,
+ /* single bank */
+ DCR_ISRAM1_BEAR = DCR_ISRAM1_BASE + 0x04,
+ DCR_ISRAM1_BESR0,
+ DCR_ISRAM1_BESR1,
+ DCR_ISRAM1_PMEG,
+ DCR_ISRAM1_CID,
+ DCR_ISRAM1_REVID,
+ DCR_ISRAM1_DPC,
+ DCR_ISRAM1_END = DCR_ISRAM1_DPC
+};
+
+typedef struct ppc4xx_l2sram_t {
+ MemoryRegion bank[4];
+ uint32_t l2cache[8];
+ uint32_t isram0[11];
+} ppc4xx_l2sram_t;
+
+#ifdef MAP_L2SRAM
+static void l2sram_update_mappings(ppc4xx_l2sram_t *l2sram,
+ uint32_t isarc, uint32_t isacntl,
+ uint32_t dsarc, uint32_t dsacntl)
+{
+ if (l2sram->isarc != isarc ||
+ (l2sram->isacntl & 0x80000000) != (isacntl & 0x80000000)) {
+ if (l2sram->isacntl & 0x80000000) {
+ /* Unmap previously assigned memory region */
+ memory_region_del_subregion(get_system_memory(),
+ &l2sram->isarc_ram);
+ }
+ if (isacntl & 0x80000000) {
+ /* Map new instruction memory region */
+ memory_region_add_subregion(get_system_memory(), isarc,
+ &l2sram->isarc_ram);
+ }
+ }
+ if (l2sram->dsarc != dsarc ||
+ (l2sram->dsacntl & 0x80000000) != (dsacntl & 0x80000000)) {
+ if (l2sram->dsacntl & 0x80000000) {
+ /* Beware not to unmap the region we just mapped */
+ if (!(isacntl & 0x80000000) || l2sram->dsarc != isarc) {
+ /* Unmap previously assigned memory region */
+ memory_region_del_subregion(get_system_memory(),
+ &l2sram->dsarc_ram);
+ }
+ }
+ if (dsacntl & 0x80000000) {
+ /* Beware not to remap the region we just mapped */
+ if (!(isacntl & 0x80000000) || dsarc != isarc) {
+ /* Map new data memory region */
+ memory_region_add_subregion(get_system_memory(), dsarc,
+ &l2sram->dsarc_ram);
+ }
+ }
+ }
+}
+#endif
+
+static uint32_t dcr_read_l2sram(void *opaque, int dcrn)
+{
+ ppc4xx_l2sram_t *l2sram = opaque;
+ uint32_t ret = 0;
+
+ switch (dcrn) {
+ case DCR_L2CACHE_CFG:
+ case DCR_L2CACHE_CMD:
+ case DCR_L2CACHE_ADDR:
+ case DCR_L2CACHE_DATA:
+ case DCR_L2CACHE_STAT:
+ case DCR_L2CACHE_CVER:
+ case DCR_L2CACHE_SNP0:
+ case DCR_L2CACHE_SNP1:
+ ret = l2sram->l2cache[dcrn - DCR_L2CACHE_BASE];
+ break;
+
+ case DCR_ISRAM0_SB0CR:
+ case DCR_ISRAM0_SB1CR:
+ case DCR_ISRAM0_SB2CR:
+ case DCR_ISRAM0_SB3CR:
+ case DCR_ISRAM0_BEAR:
+ case DCR_ISRAM0_BESR0:
+ case DCR_ISRAM0_BESR1:
+ case DCR_ISRAM0_PMEG:
+ case DCR_ISRAM0_CID:
+ case DCR_ISRAM0_REVID:
+ case DCR_ISRAM0_DPC:
+ ret = l2sram->isram0[dcrn - DCR_ISRAM0_BASE];
+ break;
+
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+static void dcr_write_l2sram(void *opaque, int dcrn, uint32_t val)
+{
+ /*ppc4xx_l2sram_t *l2sram = opaque;*/
+ /* FIXME: Actually handle L2 cache mapping */
+
+ switch (dcrn) {
+ case DCR_L2CACHE_CFG:
+ case DCR_L2CACHE_CMD:
+ case DCR_L2CACHE_ADDR:
+ case DCR_L2CACHE_DATA:
+ case DCR_L2CACHE_STAT:
+ case DCR_L2CACHE_CVER:
+ case DCR_L2CACHE_SNP0:
+ case DCR_L2CACHE_SNP1:
+ /*l2sram->l2cache[dcrn - DCR_L2CACHE_BASE] = val;*/
+ break;
+
+ case DCR_ISRAM0_SB0CR:
+ case DCR_ISRAM0_SB1CR:
+ case DCR_ISRAM0_SB2CR:
+ case DCR_ISRAM0_SB3CR:
+ case DCR_ISRAM0_BEAR:
+ case DCR_ISRAM0_BESR0:
+ case DCR_ISRAM0_BESR1:
+ case DCR_ISRAM0_PMEG:
+ case DCR_ISRAM0_CID:
+ case DCR_ISRAM0_REVID:
+ case DCR_ISRAM0_DPC:
+ /*l2sram->isram0[dcrn - DCR_L2CACHE_BASE] = val;*/
+ break;
+
+ case DCR_ISRAM1_SB0CR:
+ case DCR_ISRAM1_BEAR:
+ case DCR_ISRAM1_BESR0:
+ case DCR_ISRAM1_BESR1:
+ case DCR_ISRAM1_PMEG:
+ case DCR_ISRAM1_CID:
+ case DCR_ISRAM1_REVID:
+ case DCR_ISRAM1_DPC:
+ /*l2sram->isram1[dcrn - DCR_L2CACHE_BASE] = val;*/
+ break;
+ }
+ /*l2sram_update_mappings(l2sram, isarc, isacntl, dsarc, dsacntl);*/
+}
+
+static void l2sram_reset(void *opaque)
+{
+ ppc4xx_l2sram_t *l2sram = opaque;
+
+ memset(l2sram->l2cache, 0, sizeof(l2sram->l2cache));
+ l2sram->l2cache[DCR_L2CACHE_STAT - DCR_L2CACHE_BASE] = 0x80000000;
+ memset(l2sram->isram0, 0, sizeof(l2sram->isram0));
+ /*l2sram_update_mappings(l2sram, isarc, isacntl, dsarc, dsacntl);*/
+}
+
+void ppc4xx_l2sram_init(CPUPPCState *env)
+{
+ ppc4xx_l2sram_t *l2sram;
+
+ l2sram = g_malloc0(sizeof(*l2sram));
+ /* XXX: Size is 4*64kB for 460ex, cf. U-Boot, ppc4xx-isram.h */
+ memory_region_init_ram(&l2sram->bank[0], NULL, "ppc4xx.l2sram_bank0",
+ 64 * KiB, &error_abort);
+ memory_region_init_ram(&l2sram->bank[1], NULL, "ppc4xx.l2sram_bank1",
+ 64 * KiB, &error_abort);
+ memory_region_init_ram(&l2sram->bank[2], NULL, "ppc4xx.l2sram_bank2",
+ 64 * KiB, &error_abort);
+ memory_region_init_ram(&l2sram->bank[3], NULL, "ppc4xx.l2sram_bank3",
+ 64 * KiB, &error_abort);
+ qemu_register_reset(&l2sram_reset, l2sram);
+ ppc_dcr_register(env, DCR_L2CACHE_CFG,
+ l2sram, &dcr_read_l2sram, &dcr_write_l2sram);
+ ppc_dcr_register(env, DCR_L2CACHE_CMD,
+ l2sram, &dcr_read_l2sram, &dcr_write_l2sram);
+ ppc_dcr_register(env, DCR_L2CACHE_ADDR,
+ l2sram, &dcr_read_l2sram, &dcr_write_l2sram);
+ ppc_dcr_register(env, DCR_L2CACHE_DATA,
+ l2sram, &dcr_read_l2sram, &dcr_write_l2sram);
+ ppc_dcr_register(env, DCR_L2CACHE_STAT,
+ l2sram, &dcr_read_l2sram, &dcr_write_l2sram);
+ ppc_dcr_register(env, DCR_L2CACHE_CVER,
+ l2sram, &dcr_read_l2sram, &dcr_write_l2sram);
+ ppc_dcr_register(env, DCR_L2CACHE_SNP0,
+ l2sram, &dcr_read_l2sram, &dcr_write_l2sram);
+ ppc_dcr_register(env, DCR_L2CACHE_SNP1,
+ l2sram, &dcr_read_l2sram, &dcr_write_l2sram);
+
+ ppc_dcr_register(env, DCR_ISRAM0_SB0CR,
+ l2sram, &dcr_read_l2sram, &dcr_write_l2sram);
+ ppc_dcr_register(env, DCR_ISRAM0_SB1CR,
+ l2sram, &dcr_read_l2sram, &dcr_write_l2sram);
+ ppc_dcr_register(env, DCR_ISRAM0_SB2CR,
+ l2sram, &dcr_read_l2sram, &dcr_write_l2sram);
+ ppc_dcr_register(env, DCR_ISRAM0_SB3CR,
+ l2sram, &dcr_read_l2sram, &dcr_write_l2sram);
+ ppc_dcr_register(env, DCR_ISRAM0_PMEG,
+ l2sram, &dcr_read_l2sram, &dcr_write_l2sram);
+ ppc_dcr_register(env, DCR_ISRAM0_DPC,
+ l2sram, &dcr_read_l2sram, &dcr_write_l2sram);
+
+ ppc_dcr_register(env, DCR_ISRAM1_SB0CR,
+ l2sram, &dcr_read_l2sram, &dcr_write_l2sram);
+ ppc_dcr_register(env, DCR_ISRAM1_PMEG,
+ l2sram, &dcr_read_l2sram, &dcr_write_l2sram);
+ ppc_dcr_register(env, DCR_ISRAM1_DPC,
+ l2sram, &dcr_read_l2sram, &dcr_write_l2sram);
+}
+
+/*****************************************************************************/
+/* Clocking Power on Reset */
+enum {
+ CPR0_CFGADDR = 0xC,
+ CPR0_CFGDATA = 0xD,
+
+ CPR0_PLLD = 0x060,
+ CPR0_PLBED = 0x080,
+ CPR0_OPBD = 0x0C0,
+ CPR0_PERD = 0x0E0,
+ CPR0_AHBD = 0x100,
+};
+
+typedef struct ppc4xx_cpr_t {
+ uint32_t addr;
+} ppc4xx_cpr_t;
+
+static uint32_t dcr_read_cpr(void *opaque, int dcrn)
+{
+ ppc4xx_cpr_t *cpr = opaque;
+ uint32_t ret = 0;
+
+ switch (dcrn) {
+ case CPR0_CFGADDR:
+ ret = cpr->addr;
+ break;
+ case CPR0_CFGDATA:
+ switch (cpr->addr) {
+ case CPR0_PLLD:
+ ret = (0xb5 << 24) | (1 << 16) | (9 << 8);
+ break;
+ case CPR0_PLBED:
+ ret = (5 << 24);
+ break;
+ case CPR0_OPBD:
+ ret = (2 << 24);
+ break;
+ case CPR0_PERD:
+ case CPR0_AHBD:
+ ret = (1 << 24);
+ break;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+static void dcr_write_cpr(void *opaque, int dcrn, uint32_t val)
+{
+ ppc4xx_cpr_t *cpr = opaque;
+
+ switch (dcrn) {
+ case CPR0_CFGADDR:
+ cpr->addr = val;
+ break;
+ case CPR0_CFGDATA:
+ break;
+ default:
+ break;
+ }
+}
+
+static void ppc4xx_cpr_reset(void *opaque)
+{
+ ppc4xx_cpr_t *cpr = opaque;
+
+ cpr->addr = 0;
+}
+
+void ppc4xx_cpr_init(CPUPPCState *env)
+{
+ ppc4xx_cpr_t *cpr;
+
+ cpr = g_malloc0(sizeof(*cpr));
+ ppc_dcr_register(env, CPR0_CFGADDR, cpr, &dcr_read_cpr, &dcr_write_cpr);
+ ppc_dcr_register(env, CPR0_CFGDATA, cpr, &dcr_read_cpr, &dcr_write_cpr);
+ qemu_register_reset(ppc4xx_cpr_reset, cpr);
+}
+
+/*****************************************************************************/
+/* System DCRs */
+typedef struct ppc4xx_sdr_t ppc4xx_sdr_t;
+struct ppc4xx_sdr_t {
+ uint32_t addr;
+};
+
+enum {
+ SDR0_CFGADDR = 0x00e,
+ SDR0_CFGDATA,
+ SDR0_STRP0 = 0x020,
+ SDR0_STRP1,
+ SDR0_102 = 0x66,
+ SDR0_103,
+ SDR0_128 = 0x80,
+ SDR0_ECID3 = 0x083,
+ SDR0_DDR0 = 0x0e1,
+ SDR0_USB0 = 0x320,
+};
+
+enum {
+ PESDR0_LOOP = 0x303,
+ PESDR0_RCSSET,
+ PESDR0_RCSSTS,
+ PESDR0_RSTSTA = 0x310,
+ PESDR1_LOOP = 0x343,
+ PESDR1_RCSSET,
+ PESDR1_RCSSTS,
+ PESDR1_RSTSTA = 0x365,
+};
+
+#define SDR0_DDR0_DDRM_ENCODE(n) ((((unsigned long)(n)) & 0x03) << 29)
+#define SDR0_DDR0_DDRM_DDR1 0x20000000
+#define SDR0_DDR0_DDRM_DDR2 0x40000000
+
+static uint32_t dcr_read_sdr(void *opaque, int dcrn)
+{
+ ppc4xx_sdr_t *sdr = opaque;
+ uint32_t ret = 0;
+
+ switch (dcrn) {
+ case SDR0_CFGADDR:
+ ret = sdr->addr;
+ break;
+ case SDR0_CFGDATA:
+ switch (sdr->addr) {
+ case SDR0_STRP0:
+ ret = (0xb5 << 8) | (1 << 4) | 9;
+ break;
+ case SDR0_STRP1:
+ ret = (5 << 29) | (2 << 26) | (1 << 24);
+ break;
+ case SDR0_ECID3:
+ ret = 1 << 20; /* No Security/Kasumi support */
+ break;
+ case SDR0_DDR0:
+ ret = SDR0_DDR0_DDRM_ENCODE(1) | SDR0_DDR0_DDRM_DDR1;
+ break;
+ case PESDR0_RCSSET:
+ case PESDR1_RCSSET:
+ ret = (1 << 24) | (1 << 16);
+ break;
+ case PESDR0_RCSSTS:
+ case PESDR1_RCSSTS:
+ ret = (1 << 16) | (1 << 12);
+ break;
+ case PESDR0_RSTSTA:
+ case PESDR1_RSTSTA:
+ ret = 1;
+ break;
+ case PESDR0_LOOP:
+ case PESDR1_LOOP:
+ ret = 1 << 12;
+ break;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+static void dcr_write_sdr(void *opaque, int dcrn, uint32_t val)
+{
+ ppc4xx_sdr_t *sdr = opaque;
+
+ switch (dcrn) {
+ case SDR0_CFGADDR:
+ sdr->addr = val;
+ break;
+ case SDR0_CFGDATA:
+ switch (sdr->addr) {
+ case 0x00: /* B0CR */
+ break;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+static void sdr_reset(void *opaque)
+{
+ ppc4xx_sdr_t *sdr = opaque;
+
+ sdr->addr = 0;
+}
+
+void ppc4xx_sdr_init(CPUPPCState *env)
+{
+ ppc4xx_sdr_t *sdr;
+
+ sdr = g_malloc0(sizeof(*sdr));
+ qemu_register_reset(&sdr_reset, sdr);
+ ppc_dcr_register(env, SDR0_CFGADDR,
+ sdr, &dcr_read_sdr, &dcr_write_sdr);
+ ppc_dcr_register(env, SDR0_CFGDATA,
+ sdr, &dcr_read_sdr, &dcr_write_sdr);
+ ppc_dcr_register(env, SDR0_102,
+ sdr, &dcr_read_sdr, &dcr_write_sdr);
+ ppc_dcr_register(env, SDR0_103,
+ sdr, &dcr_read_sdr, &dcr_write_sdr);
+ ppc_dcr_register(env, SDR0_128,
+ sdr, &dcr_read_sdr, &dcr_write_sdr);
+ ppc_dcr_register(env, SDR0_USB0,
+ sdr, &dcr_read_sdr, &dcr_write_sdr);
+}
+
+/*****************************************************************************/
+/* SDRAM controller */
+typedef struct ppc440_sdram_t {
+ uint32_t addr;
+ int nbanks;
+ MemoryRegion containers[4]; /* used for clipping */
+ MemoryRegion *ram_memories;
+ hwaddr ram_bases[4];
+ hwaddr ram_sizes[4];
+ uint32_t bcr[4];
+} ppc440_sdram_t;
+
+enum {
+ SDRAM0_CFGADDR = 0x10,
+ SDRAM0_CFGDATA,
+ SDRAM_R0BAS = 0x40,
+ SDRAM_R1BAS,
+ SDRAM_R2BAS,
+ SDRAM_R3BAS,
+ SDRAM_CONF1HB = 0x45,
+ SDRAM_PLBADDULL = 0x4a,
+ SDRAM_CONF1LL = 0x4b,
+ SDRAM_CONFPATHB = 0x4f,
+ SDRAM_PLBADDUHB = 0x50,
+};
+
+static uint32_t sdram_bcr(hwaddr ram_base, hwaddr ram_size)
+{
+ uint32_t bcr;
+
+ switch (ram_size) {
+ case (8 * MiB):
+ bcr = 0xffc0;
+ break;
+ case (16 * MiB):
+ bcr = 0xff80;
+ break;
+ case (32 * MiB):
+ bcr = 0xff00;
+ break;
+ case (64 * MiB):
+ bcr = 0xfe00;
+ break;
+ case (128 * MiB):
+ bcr = 0xfc00;
+ break;
+ case (256 * MiB):
+ bcr = 0xf800;
+ break;
+ case (512 * MiB):
+ bcr = 0xf000;
+ break;
+ case (1 * GiB):
+ bcr = 0xe000;
+ break;
+ case (2 * GiB):
+ bcr = 0xc000;
+ break;
+ case (4 * GiB):
+ bcr = 0x8000;
+ break;
+ default:
+ error_report("invalid RAM size " TARGET_FMT_plx, ram_size);
+ return 0;
+ }
+ bcr |= ram_base >> 2 & 0xffe00000;
+ bcr |= 1;
+
+ return bcr;
+}
+
+static inline hwaddr sdram_base(uint32_t bcr)
+{
+ return (bcr & 0xffe00000) << 2;
+}
+
+static uint64_t sdram_size(uint32_t bcr)
+{
+ uint64_t size;
+ int sh;
+
+ sh = 1024 - ((bcr >> 6) & 0x3ff);
+ size = 8 * MiB * sh;
+
+ return size;
+}
+
+static void sdram_set_bcr(ppc440_sdram_t *sdram, int i,
+ uint32_t bcr, int enabled)
+{
+ if (sdram->bcr[i] & 1) {
+ /* First unmap RAM if enabled */
+ memory_region_del_subregion(get_system_memory(),
+ &sdram->containers[i]);
+ memory_region_del_subregion(&sdram->containers[i],
+ &sdram->ram_memories[i]);
+ object_unparent(OBJECT(&sdram->containers[i]));
+ }
+ sdram->bcr[i] = bcr & 0xffe0ffc1;
+ if (enabled && (bcr & 1)) {
+ memory_region_init(&sdram->containers[i], NULL, "sdram-containers",
+ sdram_size(bcr));
+ memory_region_add_subregion(&sdram->containers[i], 0,
+ &sdram->ram_memories[i]);
+ memory_region_add_subregion(get_system_memory(),
+ sdram_base(bcr),
+ &sdram->containers[i]);
+ }
+}
+
+static void sdram_map_bcr(ppc440_sdram_t *sdram)
+{
+ int i;
+
+ for (i = 0; i < sdram->nbanks; i++) {
+ if (sdram->ram_sizes[i] != 0) {
+ sdram_set_bcr(sdram, i, sdram_bcr(sdram->ram_bases[i],
+ sdram->ram_sizes[i]), 1);
+ } else {
+ sdram_set_bcr(sdram, i, 0, 0);
+ }
+ }
+}
+
+static uint32_t dcr_read_sdram(void *opaque, int dcrn)
+{
+ ppc440_sdram_t *sdram = opaque;
+ uint32_t ret = 0;
+
+ switch (dcrn) {
+ case SDRAM_R0BAS:
+ case SDRAM_R1BAS:
+ case SDRAM_R2BAS:
+ case SDRAM_R3BAS:
+ if (sdram->ram_sizes[dcrn - SDRAM_R0BAS]) {
+ ret = sdram_bcr(sdram->ram_bases[dcrn - SDRAM_R0BAS],
+ sdram->ram_sizes[dcrn - SDRAM_R0BAS]);
+ }
+ break;
+ case SDRAM_CONF1HB:
+ case SDRAM_CONF1LL:
+ case SDRAM_CONFPATHB:
+ case SDRAM_PLBADDULL:
+ case SDRAM_PLBADDUHB:
+ break;
+ case SDRAM0_CFGADDR:
+ ret = sdram->addr;
+ break;
+ case SDRAM0_CFGDATA:
+ switch (sdram->addr) {
+ case 0x14: /* SDRAM_MCSTAT (405EX) */
+ case 0x1F:
+ ret = 0x80000000;
+ break;
+ case 0x21: /* SDRAM_MCOPT2 */
+ ret = 0x08000000;
+ break;
+ case 0x40: /* SDRAM_MB0CF */
+ ret = 0x00008001;
+ break;
+ case 0x7A: /* SDRAM_DLCR */
+ ret = 0x02000000;
+ break;
+ case 0xE1: /* SDR0_DDR0 */
+ ret = SDR0_DDR0_DDRM_ENCODE(1) | SDR0_DDR0_DDRM_DDR1;
+ break;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+static void dcr_write_sdram(void *opaque, int dcrn, uint32_t val)
+{
+ ppc440_sdram_t *sdram = opaque;
+
+ switch (dcrn) {
+ case SDRAM_R0BAS:
+ case SDRAM_R1BAS:
+ case SDRAM_R2BAS:
+ case SDRAM_R3BAS:
+ case SDRAM_CONF1HB:
+ case SDRAM_CONF1LL:
+ case SDRAM_CONFPATHB:
+ case SDRAM_PLBADDULL:
+ case SDRAM_PLBADDUHB:
+ break;
+ case SDRAM0_CFGADDR:
+ sdram->addr = val;
+ break;
+ case SDRAM0_CFGDATA:
+ switch (sdram->addr) {
+ case 0x00: /* B0CR */
+ break;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+static void sdram_reset(void *opaque)
+{
+ ppc440_sdram_t *sdram = opaque;
+
+ sdram->addr = 0;
+}
+
+void ppc440_sdram_init(CPUPPCState *env, int nbanks,
+ MemoryRegion *ram_memories,
+ hwaddr *ram_bases, hwaddr *ram_sizes,
+ int do_init)
+{
+ ppc440_sdram_t *sdram;
+
+ sdram = g_malloc0(sizeof(*sdram));
+ sdram->nbanks = nbanks;
+ sdram->ram_memories = ram_memories;
+ memcpy(sdram->ram_bases, ram_bases, nbanks * sizeof(hwaddr));
+ memcpy(sdram->ram_sizes, ram_sizes, nbanks * sizeof(hwaddr));
+ qemu_register_reset(&sdram_reset, sdram);
+ ppc_dcr_register(env, SDRAM0_CFGADDR,
+ sdram, &dcr_read_sdram, &dcr_write_sdram);
+ ppc_dcr_register(env, SDRAM0_CFGDATA,
+ sdram, &dcr_read_sdram, &dcr_write_sdram);
+ if (do_init) {
+ sdram_map_bcr(sdram);
+ }
+
+ ppc_dcr_register(env, SDRAM_R0BAS,
+ sdram, &dcr_read_sdram, &dcr_write_sdram);
+ ppc_dcr_register(env, SDRAM_R1BAS,
+ sdram, &dcr_read_sdram, &dcr_write_sdram);
+ ppc_dcr_register(env, SDRAM_R2BAS,
+ sdram, &dcr_read_sdram, &dcr_write_sdram);
+ ppc_dcr_register(env, SDRAM_R3BAS,
+ sdram, &dcr_read_sdram, &dcr_write_sdram);
+ ppc_dcr_register(env, SDRAM_CONF1HB,
+ sdram, &dcr_read_sdram, &dcr_write_sdram);
+ ppc_dcr_register(env, SDRAM_PLBADDULL,
+ sdram, &dcr_read_sdram, &dcr_write_sdram);
+ ppc_dcr_register(env, SDRAM_CONF1LL,
+ sdram, &dcr_read_sdram, &dcr_write_sdram);
+ ppc_dcr_register(env, SDRAM_CONFPATHB,
+ sdram, &dcr_read_sdram, &dcr_write_sdram);
+ ppc_dcr_register(env, SDRAM_PLBADDUHB,
+ sdram, &dcr_read_sdram, &dcr_write_sdram);
+}
+
+/*****************************************************************************/
+/* PLB to AHB bridge */
+enum {
+ AHB_TOP = 0xA4,
+ AHB_BOT = 0xA5,
+};
+
+typedef struct ppc4xx_ahb_t {
+ uint32_t top;
+ uint32_t bot;
+} ppc4xx_ahb_t;
+
+static uint32_t dcr_read_ahb(void *opaque, int dcrn)
+{
+ ppc4xx_ahb_t *ahb = opaque;
+ uint32_t ret = 0;
+
+ switch (dcrn) {
+ case AHB_TOP:
+ ret = ahb->top;
+ break;
+ case AHB_BOT:
+ ret = ahb->bot;
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+static void dcr_write_ahb(void *opaque, int dcrn, uint32_t val)
+{
+ ppc4xx_ahb_t *ahb = opaque;
+
+ switch (dcrn) {
+ case AHB_TOP:
+ ahb->top = val;
+ break;
+ case AHB_BOT:
+ ahb->bot = val;
+ break;
+ }
+}
+
+static void ppc4xx_ahb_reset(void *opaque)
+{
+ ppc4xx_ahb_t *ahb = opaque;
+
+ /* No error */
+ ahb->top = 0;
+ ahb->bot = 0;
+}
+
+void ppc4xx_ahb_init(CPUPPCState *env)
+{
+ ppc4xx_ahb_t *ahb;
+
+ ahb = g_malloc0(sizeof(*ahb));
+ ppc_dcr_register(env, AHB_TOP, ahb, &dcr_read_ahb, &dcr_write_ahb);
+ ppc_dcr_register(env, AHB_BOT, ahb, &dcr_read_ahb, &dcr_write_ahb);
+ qemu_register_reset(ppc4xx_ahb_reset, ahb);
+}
+
+/*****************************************************************************/
+/* DMA controller */
+
+#define DMA0_CR_CE (1 << 31)
+#define DMA0_CR_PW (1 << 26 | 1 << 25)
+#define DMA0_CR_DAI (1 << 24)
+#define DMA0_CR_SAI (1 << 23)
+#define DMA0_CR_DEC (1 << 2)
+
+enum {
+ DMA0_CR = 0x00,
+ DMA0_CT,
+ DMA0_SAH,
+ DMA0_SAL,
+ DMA0_DAH,
+ DMA0_DAL,
+ DMA0_SGH,
+ DMA0_SGL,
+
+ DMA0_SR = 0x20,
+ DMA0_SGC = 0x23,
+ DMA0_SLP = 0x25,
+ DMA0_POL = 0x26,
+};
+
+typedef struct {
+ uint32_t cr;
+ uint32_t ct;
+ uint64_t sa;
+ uint64_t da;
+ uint64_t sg;
+} PPC4xxDmaChnl;
+
+typedef struct {
+ int base;
+ PPC4xxDmaChnl ch[4];
+ uint32_t sr;
+} PPC4xxDmaState;
+
+static uint32_t dcr_read_dma(void *opaque, int dcrn)
+{
+ PPC4xxDmaState *dma = opaque;
+ uint32_t val = 0;
+ int addr = dcrn - dma->base;
+ int chnl = addr / 8;
+
+ switch (addr) {
+ case 0x00 ... 0x1f:
+ switch (addr % 8) {
+ case DMA0_CR:
+ val = dma->ch[chnl].cr;
+ break;
+ case DMA0_CT:
+ val = dma->ch[chnl].ct;
+ break;
+ case DMA0_SAH:
+ val = dma->ch[chnl].sa >> 32;
+ break;
+ case DMA0_SAL:
+ val = dma->ch[chnl].sa;
+ break;
+ case DMA0_DAH:
+ val = dma->ch[chnl].da >> 32;
+ break;
+ case DMA0_DAL:
+ val = dma->ch[chnl].da;
+ break;
+ case DMA0_SGH:
+ val = dma->ch[chnl].sg >> 32;
+ break;
+ case DMA0_SGL:
+ val = dma->ch[chnl].sg;
+ break;
+ }
+ break;
+ case DMA0_SR:
+ val = dma->sr;
+ break;
+ default:
+ qemu_log_mask(LOG_UNIMP, "%s: unimplemented register %x (%d, %x)\n",
+ __func__, dcrn, chnl, addr);
+ }
+
+ return val;
+}
+
+static void dcr_write_dma(void *opaque, int dcrn, uint32_t val)
+{
+ PPC4xxDmaState *dma = opaque;
+ int addr = dcrn - dma->base;
+ int chnl = addr / 8;
+
+ switch (addr) {
+ case 0x00 ... 0x1f:
+ switch (addr % 8) {
+ case DMA0_CR:
+ dma->ch[chnl].cr = val;
+ if (val & DMA0_CR_CE) {
+ int count = dma->ch[chnl].ct & 0xffff;
+
+ if (count) {
+ int width, i, sidx, didx;
+ uint8_t *rptr, *wptr;
+ hwaddr rlen, wlen;
+
+ sidx = didx = 0;
+ width = 1 << ((val & DMA0_CR_PW) >> 25);
+ rptr = cpu_physical_memory_map(dma->ch[chnl].sa, &rlen,
+ false);
+ wptr = cpu_physical_memory_map(dma->ch[chnl].da, &wlen,
+ true);
+ if (rptr && wptr) {
+ if (!(val & DMA0_CR_DEC) &&
+ val & DMA0_CR_SAI && val & DMA0_CR_DAI) {
+ /* optimise common case */
+ memmove(wptr, rptr, count * width);
+ sidx = didx = count * width;
+ } else {
+ /* do it the slow way */
+ for (sidx = didx = i = 0; i < count; i++) {
+ uint64_t v = ldn_le_p(rptr + sidx, width);
+ stn_le_p(wptr + didx, width, v);
+ if (val & DMA0_CR_SAI) {
+ sidx += width;
+ }
+ if (val & DMA0_CR_DAI) {
+ didx += width;
+ }
+ }
+ }
+ }
+ if (wptr) {
+ cpu_physical_memory_unmap(wptr, wlen, 1, didx);
+ }
+ if (rptr) {
+ cpu_physical_memory_unmap(rptr, rlen, 0, sidx);
+ }
+ }
+ }
+ break;
+ case DMA0_CT:
+ dma->ch[chnl].ct = val;
+ break;
+ case DMA0_SAH:
+ dma->ch[chnl].sa &= 0xffffffffULL;
+ dma->ch[chnl].sa |= (uint64_t)val << 32;
+ break;
+ case DMA0_SAL:
+ dma->ch[chnl].sa &= 0xffffffff00000000ULL;
+ dma->ch[chnl].sa |= val;
+ break;
+ case DMA0_DAH:
+ dma->ch[chnl].da &= 0xffffffffULL;
+ dma->ch[chnl].da |= (uint64_t)val << 32;
+ break;
+ case DMA0_DAL:
+ dma->ch[chnl].da &= 0xffffffff00000000ULL;
+ dma->ch[chnl].da |= val;
+ break;
+ case DMA0_SGH:
+ dma->ch[chnl].sg &= 0xffffffffULL;
+ dma->ch[chnl].sg |= (uint64_t)val << 32;
+ break;
+ case DMA0_SGL:
+ dma->ch[chnl].sg &= 0xffffffff00000000ULL;
+ dma->ch[chnl].sg |= val;
+ break;
+ }
+ break;
+ case DMA0_SR:
+ dma->sr &= ~val;
+ break;
+ default:
+ qemu_log_mask(LOG_UNIMP, "%s: unimplemented register %x (%d, %x)\n",
+ __func__, dcrn, chnl, addr);
+ }
+}
+
+static void ppc4xx_dma_reset(void *opaque)
+{
+ PPC4xxDmaState *dma = opaque;
+ int dma_base = dma->base;
+
+ memset(dma, 0, sizeof(*dma));
+ dma->base = dma_base;
+}
+
+void ppc4xx_dma_init(CPUPPCState *env, int dcr_base)
+{
+ PPC4xxDmaState *dma;
+ int i;
+
+ dma = g_malloc0(sizeof(*dma));
+ dma->base = dcr_base;
+ qemu_register_reset(&ppc4xx_dma_reset, dma);
+ for (i = 0; i < 4; i++) {
+ ppc_dcr_register(env, dcr_base + i * 8 + DMA0_CR,
+ dma, &dcr_read_dma, &dcr_write_dma);
+ ppc_dcr_register(env, dcr_base + i * 8 + DMA0_CT,
+ dma, &dcr_read_dma, &dcr_write_dma);
+ ppc_dcr_register(env, dcr_base + i * 8 + DMA0_SAH,
+ dma, &dcr_read_dma, &dcr_write_dma);
+ ppc_dcr_register(env, dcr_base + i * 8 + DMA0_SAL,
+ dma, &dcr_read_dma, &dcr_write_dma);
+ ppc_dcr_register(env, dcr_base + i * 8 + DMA0_DAH,
+ dma, &dcr_read_dma, &dcr_write_dma);
+ ppc_dcr_register(env, dcr_base + i * 8 + DMA0_DAL,
+ dma, &dcr_read_dma, &dcr_write_dma);
+ ppc_dcr_register(env, dcr_base + i * 8 + DMA0_SGH,
+ dma, &dcr_read_dma, &dcr_write_dma);
+ ppc_dcr_register(env, dcr_base + i * 8 + DMA0_SGL,
+ dma, &dcr_read_dma, &dcr_write_dma);
+ }
+ ppc_dcr_register(env, dcr_base + DMA0_SR,
+ dma, &dcr_read_dma, &dcr_write_dma);
+ ppc_dcr_register(env, dcr_base + DMA0_SGC,
+ dma, &dcr_read_dma, &dcr_write_dma);
+ ppc_dcr_register(env, dcr_base + DMA0_SLP,
+ dma, &dcr_read_dma, &dcr_write_dma);
+ ppc_dcr_register(env, dcr_base + DMA0_POL,
+ dma, &dcr_read_dma, &dcr_write_dma);
+}
+
+/*****************************************************************************/
+/* PCI Express controller */
+/* FIXME: This is not complete and does not work, only implemented partially
+ * to allow firmware and guests to find an empty bus. Cards should use PCI.
+ */
+#include "hw/pci/pcie_host.h"
+
+#define TYPE_PPC460EX_PCIE_HOST "ppc460ex-pcie-host"
+OBJECT_DECLARE_SIMPLE_TYPE(PPC460EXPCIEState, PPC460EX_PCIE_HOST)
+
+struct PPC460EXPCIEState {
+ PCIExpressHost host;
+
+ MemoryRegion iomem;
+ qemu_irq irq[4];
+ int32_t dcrn_base;
+
+ uint64_t cfg_base;
+ uint32_t cfg_mask;
+ uint64_t msg_base;
+ uint32_t msg_mask;
+ uint64_t omr1_base;
+ uint64_t omr1_mask;
+ uint64_t omr2_base;
+ uint64_t omr2_mask;
+ uint64_t omr3_base;
+ uint64_t omr3_mask;
+ uint64_t reg_base;
+ uint32_t reg_mask;
+ uint32_t special;
+ uint32_t cfg;
+};
+
+#define DCRN_PCIE0_BASE 0x100
+#define DCRN_PCIE1_BASE 0x120
+
+enum {
+ PEGPL_CFGBAH = 0x0,
+ PEGPL_CFGBAL,
+ PEGPL_CFGMSK,
+ PEGPL_MSGBAH,
+ PEGPL_MSGBAL,
+ PEGPL_MSGMSK,
+ PEGPL_OMR1BAH,
+ PEGPL_OMR1BAL,
+ PEGPL_OMR1MSKH,
+ PEGPL_OMR1MSKL,
+ PEGPL_OMR2BAH,
+ PEGPL_OMR2BAL,
+ PEGPL_OMR2MSKH,
+ PEGPL_OMR2MSKL,
+ PEGPL_OMR3BAH,
+ PEGPL_OMR3BAL,
+ PEGPL_OMR3MSKH,
+ PEGPL_OMR3MSKL,
+ PEGPL_REGBAH,
+ PEGPL_REGBAL,
+ PEGPL_REGMSK,
+ PEGPL_SPECIAL,
+ PEGPL_CFG,
+};
+
+static uint32_t dcr_read_pcie(void *opaque, int dcrn)
+{
+ PPC460EXPCIEState *state = opaque;
+ uint32_t ret = 0;
+
+ switch (dcrn - state->dcrn_base) {
+ case PEGPL_CFGBAH:
+ ret = state->cfg_base >> 32;
+ break;
+ case PEGPL_CFGBAL:
+ ret = state->cfg_base;
+ break;
+ case PEGPL_CFGMSK:
+ ret = state->cfg_mask;
+ break;
+ case PEGPL_MSGBAH:
+ ret = state->msg_base >> 32;
+ break;
+ case PEGPL_MSGBAL:
+ ret = state->msg_base;
+ break;
+ case PEGPL_MSGMSK:
+ ret = state->msg_mask;
+ break;
+ case PEGPL_OMR1BAH:
+ ret = state->omr1_base >> 32;
+ break;
+ case PEGPL_OMR1BAL:
+ ret = state->omr1_base;
+ break;
+ case PEGPL_OMR1MSKH:
+ ret = state->omr1_mask >> 32;
+ break;
+ case PEGPL_OMR1MSKL:
+ ret = state->omr1_mask;
+ break;
+ case PEGPL_OMR2BAH:
+ ret = state->omr2_base >> 32;
+ break;
+ case PEGPL_OMR2BAL:
+ ret = state->omr2_base;
+ break;
+ case PEGPL_OMR2MSKH:
+ ret = state->omr2_mask >> 32;
+ break;
+ case PEGPL_OMR2MSKL:
+ ret = state->omr3_mask;
+ break;
+ case PEGPL_OMR3BAH:
+ ret = state->omr3_base >> 32;
+ break;
+ case PEGPL_OMR3BAL:
+ ret = state->omr3_base;
+ break;
+ case PEGPL_OMR3MSKH:
+ ret = state->omr3_mask >> 32;
+ break;
+ case PEGPL_OMR3MSKL:
+ ret = state->omr3_mask;
+ break;
+ case PEGPL_REGBAH:
+ ret = state->reg_base >> 32;
+ break;
+ case PEGPL_REGBAL:
+ ret = state->reg_base;
+ break;
+ case PEGPL_REGMSK:
+ ret = state->reg_mask;
+ break;
+ case PEGPL_SPECIAL:
+ ret = state->special;
+ break;
+ case PEGPL_CFG:
+ ret = state->cfg;
+ break;
+ }
+
+ return ret;
+}
+
+static void dcr_write_pcie(void *opaque, int dcrn, uint32_t val)
+{
+ PPC460EXPCIEState *s = opaque;
+ uint64_t size;
+
+ switch (dcrn - s->dcrn_base) {
+ case PEGPL_CFGBAH:
+ s->cfg_base = ((uint64_t)val << 32) | (s->cfg_base & 0xffffffff);
+ break;
+ case PEGPL_CFGBAL:
+ s->cfg_base = (s->cfg_base & 0xffffffff00000000ULL) | val;
+ break;
+ case PEGPL_CFGMSK:
+ s->cfg_mask = val;
+ size = ~(val & 0xfffffffe) + 1;
+ pcie_host_mmcfg_update(PCIE_HOST_BRIDGE(s), val & 1, s->cfg_base, size);
+ break;
+ case PEGPL_MSGBAH:
+ s->msg_base = ((uint64_t)val << 32) | (s->msg_base & 0xffffffff);
+ break;
+ case PEGPL_MSGBAL:
+ s->msg_base = (s->msg_base & 0xffffffff00000000ULL) | val;
+ break;
+ case PEGPL_MSGMSK:
+ s->msg_mask = val;
+ break;
+ case PEGPL_OMR1BAH:
+ s->omr1_base = ((uint64_t)val << 32) | (s->omr1_base & 0xffffffff);
+ break;
+ case PEGPL_OMR1BAL:
+ s->omr1_base = (s->omr1_base & 0xffffffff00000000ULL) | val;
+ break;
+ case PEGPL_OMR1MSKH:
+ s->omr1_mask = ((uint64_t)val << 32) | (s->omr1_mask & 0xffffffff);
+ break;
+ case PEGPL_OMR1MSKL:
+ s->omr1_mask = (s->omr1_mask & 0xffffffff00000000ULL) | val;
+ break;
+ case PEGPL_OMR2BAH:
+ s->omr2_base = ((uint64_t)val << 32) | (s->omr2_base & 0xffffffff);
+ break;
+ case PEGPL_OMR2BAL:
+ s->omr2_base = (s->omr2_base & 0xffffffff00000000ULL) | val;
+ break;
+ case PEGPL_OMR2MSKH:
+ s->omr2_mask = ((uint64_t)val << 32) | (s->omr2_mask & 0xffffffff);
+ break;
+ case PEGPL_OMR2MSKL:
+ s->omr2_mask = (s->omr2_mask & 0xffffffff00000000ULL) | val;
+ break;
+ case PEGPL_OMR3BAH:
+ s->omr3_base = ((uint64_t)val << 32) | (s->omr3_base & 0xffffffff);
+ break;
+ case PEGPL_OMR3BAL:
+ s->omr3_base = (s->omr3_base & 0xffffffff00000000ULL) | val;
+ break;
+ case PEGPL_OMR3MSKH:
+ s->omr3_mask = ((uint64_t)val << 32) | (s->omr3_mask & 0xffffffff);
+ break;
+ case PEGPL_OMR3MSKL:
+ s->omr3_mask = (s->omr3_mask & 0xffffffff00000000ULL) | val;
+ break;
+ case PEGPL_REGBAH:
+ s->reg_base = ((uint64_t)val << 32) | (s->reg_base & 0xffffffff);
+ break;
+ case PEGPL_REGBAL:
+ s->reg_base = (s->reg_base & 0xffffffff00000000ULL) | val;
+ break;
+ case PEGPL_REGMSK:
+ s->reg_mask = val;
+ /* FIXME: how is size encoded? */
+ size = (val == 0x7001 ? 4096 : ~(val & 0xfffffffe) + 1);
+ break;
+ case PEGPL_SPECIAL:
+ s->special = val;
+ break;
+ case PEGPL_CFG:
+ s->cfg = val;
+ break;
+ }
+}
+
+static void ppc460ex_set_irq(void *opaque, int irq_num, int level)
+{
+ PPC460EXPCIEState *s = opaque;
+ qemu_set_irq(s->irq[irq_num], level);
+}
+
+static void ppc460ex_pcie_realize(DeviceState *dev, Error **errp)
+{
+ PPC460EXPCIEState *s = PPC460EX_PCIE_HOST(dev);
+ PCIHostState *pci = PCI_HOST_BRIDGE(dev);
+ int i, id;
+ char buf[16];
+
+ switch (s->dcrn_base) {
+ case DCRN_PCIE0_BASE:
+ id = 0;
+ break;
+ case DCRN_PCIE1_BASE:
+ id = 1;
+ break;
+ default:
+ error_setg(errp, "invalid PCIe DCRN base");
+ return;
+ }
+ snprintf(buf, sizeof(buf), "pcie%d-io", id);
+ memory_region_init(&s->iomem, OBJECT(s), buf, UINT64_MAX);
+ for (i = 0; i < 4; i++) {
+ sysbus_init_irq(SYS_BUS_DEVICE(dev), &s->irq[i]);
+ }
+ snprintf(buf, sizeof(buf), "pcie.%d", id);
+ pci->bus = pci_register_root_bus(DEVICE(s), buf, ppc460ex_set_irq,
+ pci_swizzle_map_irq_fn, s, &s->iomem,
+ get_system_io(), 0, 4, TYPE_PCIE_BUS);
+}
+
+static Property ppc460ex_pcie_props[] = {
+ DEFINE_PROP_INT32("dcrn-base", PPC460EXPCIEState, dcrn_base, -1),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void ppc460ex_pcie_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
+ dc->realize = ppc460ex_pcie_realize;
+ device_class_set_props(dc, ppc460ex_pcie_props);
+ dc->hotpluggable = false;
+}
+
+static const TypeInfo ppc460ex_pcie_host_info = {
+ .name = TYPE_PPC460EX_PCIE_HOST,
+ .parent = TYPE_PCIE_HOST_BRIDGE,
+ .instance_size = sizeof(PPC460EXPCIEState),
+ .class_init = ppc460ex_pcie_class_init,
+};
+
+static void ppc460ex_pcie_register(void)
+{
+ type_register_static(&ppc460ex_pcie_host_info);
+}
+
+type_init(ppc460ex_pcie_register)
+
+static void ppc460ex_pcie_register_dcrs(PPC460EXPCIEState *s, CPUPPCState *env)
+{
+ ppc_dcr_register(env, s->dcrn_base + PEGPL_CFGBAH, s,
+ &dcr_read_pcie, &dcr_write_pcie);
+ ppc_dcr_register(env, s->dcrn_base + PEGPL_CFGBAL, s,
+ &dcr_read_pcie, &dcr_write_pcie);
+ ppc_dcr_register(env, s->dcrn_base + PEGPL_CFGMSK, s,
+ &dcr_read_pcie, &dcr_write_pcie);
+ ppc_dcr_register(env, s->dcrn_base + PEGPL_MSGBAH, s,
+ &dcr_read_pcie, &dcr_write_pcie);
+ ppc_dcr_register(env, s->dcrn_base + PEGPL_MSGBAL, s,
+ &dcr_read_pcie, &dcr_write_pcie);
+ ppc_dcr_register(env, s->dcrn_base + PEGPL_MSGMSK, s,
+ &dcr_read_pcie, &dcr_write_pcie);
+ ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR1BAH, s,
+ &dcr_read_pcie, &dcr_write_pcie);
+ ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR1BAL, s,
+ &dcr_read_pcie, &dcr_write_pcie);
+ ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR1MSKH, s,
+ &dcr_read_pcie, &dcr_write_pcie);
+ ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR1MSKL, s,
+ &dcr_read_pcie, &dcr_write_pcie);
+ ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR2BAH, s,
+ &dcr_read_pcie, &dcr_write_pcie);
+ ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR2BAL, s,
+ &dcr_read_pcie, &dcr_write_pcie);
+ ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR2MSKH, s,
+ &dcr_read_pcie, &dcr_write_pcie);
+ ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR2MSKL, s,
+ &dcr_read_pcie, &dcr_write_pcie);
+ ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR3BAH, s,
+ &dcr_read_pcie, &dcr_write_pcie);
+ ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR3BAL, s,
+ &dcr_read_pcie, &dcr_write_pcie);
+ ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR3MSKH, s,
+ &dcr_read_pcie, &dcr_write_pcie);
+ ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR3MSKL, s,
+ &dcr_read_pcie, &dcr_write_pcie);
+ ppc_dcr_register(env, s->dcrn_base + PEGPL_REGBAH, s,
+ &dcr_read_pcie, &dcr_write_pcie);
+ ppc_dcr_register(env, s->dcrn_base + PEGPL_REGBAL, s,
+ &dcr_read_pcie, &dcr_write_pcie);
+ ppc_dcr_register(env, s->dcrn_base + PEGPL_REGMSK, s,
+ &dcr_read_pcie, &dcr_write_pcie);
+ ppc_dcr_register(env, s->dcrn_base + PEGPL_SPECIAL, s,
+ &dcr_read_pcie, &dcr_write_pcie);
+ ppc_dcr_register(env, s->dcrn_base + PEGPL_CFG, s,
+ &dcr_read_pcie, &dcr_write_pcie);
+}
+
+void ppc460ex_pcie_init(CPUPPCState *env)
+{
+ DeviceState *dev;
+
+ dev = qdev_new(TYPE_PPC460EX_PCIE_HOST);
+ qdev_prop_set_int32(dev, "dcrn-base", DCRN_PCIE0_BASE);
+ sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+ ppc460ex_pcie_register_dcrs(PPC460EX_PCIE_HOST(dev), env);
+
+ dev = qdev_new(TYPE_PPC460EX_PCIE_HOST);
+ qdev_prop_set_int32(dev, "dcrn-base", DCRN_PCIE1_BASE);
+ sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+ ppc460ex_pcie_register_dcrs(PPC460EX_PCIE_HOST(dev), env);
+}
diff --git a/hw/ppc/ppc4xx_devs.c b/hw/ppc/ppc4xx_devs.c
new file mode 100644
index 000000000..980c48944
--- /dev/null
+++ b/hw/ppc/ppc4xx_devs.c
@@ -0,0 +1,715 @@
+/*
+ * QEMU PowerPC 4xx embedded processors shared devices emulation
+ *
+ * Copyright (c) 2007 Jocelyn Mayer
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include "sysemu/reset.h"
+#include "cpu.h"
+#include "hw/irq.h"
+#include "hw/ppc/ppc.h"
+#include "hw/ppc/ppc4xx.h"
+#include "hw/intc/ppc-uic.h"
+#include "hw/qdev-properties.h"
+#include "qemu/log.h"
+#include "exec/address-spaces.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+
+/*#define DEBUG_UIC*/
+
+#ifdef DEBUG_UIC
+# define LOG_UIC(...) qemu_log_mask(CPU_LOG_INT, ## __VA_ARGS__)
+#else
+# define LOG_UIC(...) do { } while (0)
+#endif
+
+static void ppc4xx_reset(void *opaque)
+{
+ PowerPCCPU *cpu = opaque;
+
+ cpu_reset(CPU(cpu));
+}
+
+/*****************************************************************************/
+/* Generic PowerPC 4xx processor instantiation */
+PowerPCCPU *ppc4xx_init(const char *cpu_type,
+ clk_setup_t *cpu_clk, clk_setup_t *tb_clk,
+ uint32_t sysclk)
+{
+ PowerPCCPU *cpu;
+ CPUPPCState *env;
+
+ /* init CPUs */
+ cpu = POWERPC_CPU(cpu_create(cpu_type));
+ env = &cpu->env;
+
+ cpu_clk->cb = NULL; /* We don't care about CPU clock frequency changes */
+ cpu_clk->opaque = env;
+ /* Set time-base frequency to sysclk */
+ tb_clk->cb = ppc_40x_timers_init(env, sysclk, PPC_INTERRUPT_PIT);
+ tb_clk->opaque = env;
+ ppc_dcr_init(env, NULL, NULL);
+ /* Register qemu callbacks */
+ qemu_register_reset(ppc4xx_reset, cpu);
+
+ return cpu;
+}
+
+/*****************************************************************************/
+/* SDRAM controller */
+typedef struct ppc4xx_sdram_t ppc4xx_sdram_t;
+struct ppc4xx_sdram_t {
+ uint32_t addr;
+ int nbanks;
+ MemoryRegion containers[4]; /* used for clipping */
+ MemoryRegion *ram_memories;
+ hwaddr ram_bases[4];
+ hwaddr ram_sizes[4];
+ uint32_t besr0;
+ uint32_t besr1;
+ uint32_t bear;
+ uint32_t cfg;
+ uint32_t status;
+ uint32_t rtr;
+ uint32_t pmit;
+ uint32_t bcr[4];
+ uint32_t tr;
+ uint32_t ecccfg;
+ uint32_t eccesr;
+ qemu_irq irq;
+};
+
+enum {
+ SDRAM0_CFGADDR = 0x010,
+ SDRAM0_CFGDATA = 0x011,
+};
+
+/* XXX: TOFIX: some patches have made this code become inconsistent:
+ * there are type inconsistencies, mixing hwaddr, target_ulong
+ * and uint32_t
+ */
+static uint32_t sdram_bcr (hwaddr ram_base,
+ hwaddr ram_size)
+{
+ uint32_t bcr;
+
+ switch (ram_size) {
+ case 4 * MiB:
+ bcr = 0x00000000;
+ break;
+ case 8 * MiB:
+ bcr = 0x00020000;
+ break;
+ case 16 * MiB:
+ bcr = 0x00040000;
+ break;
+ case 32 * MiB:
+ bcr = 0x00060000;
+ break;
+ case 64 * MiB:
+ bcr = 0x00080000;
+ break;
+ case 128 * MiB:
+ bcr = 0x000A0000;
+ break;
+ case 256 * MiB:
+ bcr = 0x000C0000;
+ break;
+ default:
+ printf("%s: invalid RAM size " TARGET_FMT_plx "\n", __func__,
+ ram_size);
+ return 0x00000000;
+ }
+ bcr |= ram_base & 0xFF800000;
+ bcr |= 1;
+
+ return bcr;
+}
+
+static inline hwaddr sdram_base(uint32_t bcr)
+{
+ return bcr & 0xFF800000;
+}
+
+static target_ulong sdram_size (uint32_t bcr)
+{
+ target_ulong size;
+ int sh;
+
+ sh = (bcr >> 17) & 0x7;
+ if (sh == 7)
+ size = -1;
+ else
+ size = (4 * MiB) << sh;
+
+ return size;
+}
+
+static void sdram_set_bcr(ppc4xx_sdram_t *sdram, int i,
+ uint32_t bcr, int enabled)
+{
+ if (sdram->bcr[i] & 0x00000001) {
+ /* Unmap RAM */
+#ifdef DEBUG_SDRAM
+ printf("%s: unmap RAM area " TARGET_FMT_plx " " TARGET_FMT_lx "\n",
+ __func__, sdram_base(sdram->bcr[i]), sdram_size(sdram->bcr[i]));
+#endif
+ memory_region_del_subregion(get_system_memory(),
+ &sdram->containers[i]);
+ memory_region_del_subregion(&sdram->containers[i],
+ &sdram->ram_memories[i]);
+ object_unparent(OBJECT(&sdram->containers[i]));
+ }
+ sdram->bcr[i] = bcr & 0xFFDEE001;
+ if (enabled && (bcr & 0x00000001)) {
+#ifdef DEBUG_SDRAM
+ printf("%s: Map RAM area " TARGET_FMT_plx " " TARGET_FMT_lx "\n",
+ __func__, sdram_base(bcr), sdram_size(bcr));
+#endif
+ memory_region_init(&sdram->containers[i], NULL, "sdram-containers",
+ sdram_size(bcr));
+ memory_region_add_subregion(&sdram->containers[i], 0,
+ &sdram->ram_memories[i]);
+ memory_region_add_subregion(get_system_memory(),
+ sdram_base(bcr),
+ &sdram->containers[i]);
+ }
+}
+
+static void sdram_map_bcr (ppc4xx_sdram_t *sdram)
+{
+ int i;
+
+ for (i = 0; i < sdram->nbanks; i++) {
+ if (sdram->ram_sizes[i] != 0) {
+ sdram_set_bcr(sdram, i, sdram_bcr(sdram->ram_bases[i],
+ sdram->ram_sizes[i]), 1);
+ } else {
+ sdram_set_bcr(sdram, i, 0x00000000, 0);
+ }
+ }
+}
+
+static void sdram_unmap_bcr (ppc4xx_sdram_t *sdram)
+{
+ int i;
+
+ for (i = 0; i < sdram->nbanks; i++) {
+#ifdef DEBUG_SDRAM
+ printf("%s: Unmap RAM area " TARGET_FMT_plx " " TARGET_FMT_lx "\n",
+ __func__, sdram_base(sdram->bcr[i]), sdram_size(sdram->bcr[i]));
+#endif
+ memory_region_del_subregion(get_system_memory(),
+ &sdram->ram_memories[i]);
+ }
+}
+
+static uint32_t dcr_read_sdram (void *opaque, int dcrn)
+{
+ ppc4xx_sdram_t *sdram;
+ uint32_t ret;
+
+ sdram = opaque;
+ switch (dcrn) {
+ case SDRAM0_CFGADDR:
+ ret = sdram->addr;
+ break;
+ case SDRAM0_CFGDATA:
+ switch (sdram->addr) {
+ case 0x00: /* SDRAM_BESR0 */
+ ret = sdram->besr0;
+ break;
+ case 0x08: /* SDRAM_BESR1 */
+ ret = sdram->besr1;
+ break;
+ case 0x10: /* SDRAM_BEAR */
+ ret = sdram->bear;
+ break;
+ case 0x20: /* SDRAM_CFG */
+ ret = sdram->cfg;
+ break;
+ case 0x24: /* SDRAM_STATUS */
+ ret = sdram->status;
+ break;
+ case 0x30: /* SDRAM_RTR */
+ ret = sdram->rtr;
+ break;
+ case 0x34: /* SDRAM_PMIT */
+ ret = sdram->pmit;
+ break;
+ case 0x40: /* SDRAM_B0CR */
+ ret = sdram->bcr[0];
+ break;
+ case 0x44: /* SDRAM_B1CR */
+ ret = sdram->bcr[1];
+ break;
+ case 0x48: /* SDRAM_B2CR */
+ ret = sdram->bcr[2];
+ break;
+ case 0x4C: /* SDRAM_B3CR */
+ ret = sdram->bcr[3];
+ break;
+ case 0x80: /* SDRAM_TR */
+ ret = -1; /* ? */
+ break;
+ case 0x94: /* SDRAM_ECCCFG */
+ ret = sdram->ecccfg;
+ break;
+ case 0x98: /* SDRAM_ECCESR */
+ ret = sdram->eccesr;
+ break;
+ default: /* Error */
+ ret = -1;
+ break;
+ }
+ break;
+ default:
+ /* Avoid gcc warning */
+ ret = 0x00000000;
+ break;
+ }
+
+ return ret;
+}
+
+static void dcr_write_sdram (void *opaque, int dcrn, uint32_t val)
+{
+ ppc4xx_sdram_t *sdram;
+
+ sdram = opaque;
+ switch (dcrn) {
+ case SDRAM0_CFGADDR:
+ sdram->addr = val;
+ break;
+ case SDRAM0_CFGDATA:
+ switch (sdram->addr) {
+ case 0x00: /* SDRAM_BESR0 */
+ sdram->besr0 &= ~val;
+ break;
+ case 0x08: /* SDRAM_BESR1 */
+ sdram->besr1 &= ~val;
+ break;
+ case 0x10: /* SDRAM_BEAR */
+ sdram->bear = val;
+ break;
+ case 0x20: /* SDRAM_CFG */
+ val &= 0xFFE00000;
+ if (!(sdram->cfg & 0x80000000) && (val & 0x80000000)) {
+#ifdef DEBUG_SDRAM
+ printf("%s: enable SDRAM controller\n", __func__);
+#endif
+ /* validate all RAM mappings */
+ sdram_map_bcr(sdram);
+ sdram->status &= ~0x80000000;
+ } else if ((sdram->cfg & 0x80000000) && !(val & 0x80000000)) {
+#ifdef DEBUG_SDRAM
+ printf("%s: disable SDRAM controller\n", __func__);
+#endif
+ /* invalidate all RAM mappings */
+ sdram_unmap_bcr(sdram);
+ sdram->status |= 0x80000000;
+ }
+ if (!(sdram->cfg & 0x40000000) && (val & 0x40000000))
+ sdram->status |= 0x40000000;
+ else if ((sdram->cfg & 0x40000000) && !(val & 0x40000000))
+ sdram->status &= ~0x40000000;
+ sdram->cfg = val;
+ break;
+ case 0x24: /* SDRAM_STATUS */
+ /* Read-only register */
+ break;
+ case 0x30: /* SDRAM_RTR */
+ sdram->rtr = val & 0x3FF80000;
+ break;
+ case 0x34: /* SDRAM_PMIT */
+ sdram->pmit = (val & 0xF8000000) | 0x07C00000;
+ break;
+ case 0x40: /* SDRAM_B0CR */
+ sdram_set_bcr(sdram, 0, val, sdram->cfg & 0x80000000);
+ break;
+ case 0x44: /* SDRAM_B1CR */
+ sdram_set_bcr(sdram, 1, val, sdram->cfg & 0x80000000);
+ break;
+ case 0x48: /* SDRAM_B2CR */
+ sdram_set_bcr(sdram, 2, val, sdram->cfg & 0x80000000);
+ break;
+ case 0x4C: /* SDRAM_B3CR */
+ sdram_set_bcr(sdram, 3, val, sdram->cfg & 0x80000000);
+ break;
+ case 0x80: /* SDRAM_TR */
+ sdram->tr = val & 0x018FC01F;
+ break;
+ case 0x94: /* SDRAM_ECCCFG */
+ sdram->ecccfg = val & 0x00F00000;
+ break;
+ case 0x98: /* SDRAM_ECCESR */
+ val &= 0xFFF0F000;
+ if (sdram->eccesr == 0 && val != 0)
+ qemu_irq_raise(sdram->irq);
+ else if (sdram->eccesr != 0 && val == 0)
+ qemu_irq_lower(sdram->irq);
+ sdram->eccesr = val;
+ break;
+ default: /* Error */
+ break;
+ }
+ break;
+ }
+}
+
+static void sdram_reset (void *opaque)
+{
+ ppc4xx_sdram_t *sdram;
+
+ sdram = opaque;
+ sdram->addr = 0x00000000;
+ sdram->bear = 0x00000000;
+ sdram->besr0 = 0x00000000; /* No error */
+ sdram->besr1 = 0x00000000; /* No error */
+ sdram->cfg = 0x00000000;
+ sdram->ecccfg = 0x00000000; /* No ECC */
+ sdram->eccesr = 0x00000000; /* No error */
+ sdram->pmit = 0x07C00000;
+ sdram->rtr = 0x05F00000;
+ sdram->tr = 0x00854009;
+ /* We pre-initialize RAM banks */
+ sdram->status = 0x00000000;
+ sdram->cfg = 0x00800000;
+}
+
+void ppc4xx_sdram_init (CPUPPCState *env, qemu_irq irq, int nbanks,
+ MemoryRegion *ram_memories,
+ hwaddr *ram_bases,
+ hwaddr *ram_sizes,
+ int do_init)
+{
+ ppc4xx_sdram_t *sdram;
+
+ sdram = g_malloc0(sizeof(ppc4xx_sdram_t));
+ sdram->irq = irq;
+ sdram->nbanks = nbanks;
+ sdram->ram_memories = ram_memories;
+ memset(sdram->ram_bases, 0, 4 * sizeof(hwaddr));
+ memcpy(sdram->ram_bases, ram_bases,
+ nbanks * sizeof(hwaddr));
+ memset(sdram->ram_sizes, 0, 4 * sizeof(hwaddr));
+ memcpy(sdram->ram_sizes, ram_sizes,
+ nbanks * sizeof(hwaddr));
+ qemu_register_reset(&sdram_reset, sdram);
+ ppc_dcr_register(env, SDRAM0_CFGADDR,
+ sdram, &dcr_read_sdram, &dcr_write_sdram);
+ ppc_dcr_register(env, SDRAM0_CFGDATA,
+ sdram, &dcr_read_sdram, &dcr_write_sdram);
+ if (do_init)
+ sdram_map_bcr(sdram);
+}
+
+/*
+ * Split RAM between SDRAM banks.
+ *
+ * sdram_bank_sizes[] must be in descending order, that is sizes[i] > sizes[i+1]
+ * and must be 0-terminated.
+ *
+ * The 4xx SDRAM controller supports a small number of banks, and each bank
+ * must be one of a small set of sizes. The number of banks and the supported
+ * sizes varies by SoC.
+ */
+void ppc4xx_sdram_banks(MemoryRegion *ram, int nr_banks,
+ MemoryRegion ram_memories[],
+ hwaddr ram_bases[], hwaddr ram_sizes[],
+ const ram_addr_t sdram_bank_sizes[])
+{
+ ram_addr_t size_left = memory_region_size(ram);
+ ram_addr_t base = 0;
+ ram_addr_t bank_size;
+ int i;
+ int j;
+
+ for (i = 0; i < nr_banks; i++) {
+ for (j = 0; sdram_bank_sizes[j] != 0; j++) {
+ bank_size = sdram_bank_sizes[j];
+ if (bank_size <= size_left) {
+ char name[32];
+
+ ram_bases[i] = base;
+ ram_sizes[i] = bank_size;
+ base += bank_size;
+ size_left -= bank_size;
+ snprintf(name, sizeof(name), "ppc4xx.sdram%d", i);
+ memory_region_init_alias(&ram_memories[i], NULL, name, ram,
+ ram_bases[i], ram_sizes[i]);
+ break;
+ }
+ }
+ if (!size_left) {
+ /* No need to use the remaining banks. */
+ break;
+ }
+ }
+
+ if (size_left) {
+ ram_addr_t used_size = memory_region_size(ram) - size_left;
+ GString *s = g_string_new(NULL);
+
+ for (i = 0; sdram_bank_sizes[i]; i++) {
+ g_string_append_printf(s, "%" PRIi64 "%s",
+ sdram_bank_sizes[i] / MiB,
+ sdram_bank_sizes[i + 1] ? ", " : "");
+ }
+ error_report("at most %d bank%s of %s MiB each supported",
+ nr_banks, nr_banks == 1 ? "" : "s", s->str);
+ error_printf("Possible valid RAM size: %" PRIi64 " MiB \n",
+ used_size ? used_size / MiB : sdram_bank_sizes[i - 1] / MiB);
+
+ g_string_free(s, true);
+ exit(EXIT_FAILURE);
+ }
+}
+
+/*****************************************************************************/
+/* MAL */
+
+enum {
+ MAL0_CFG = 0x180,
+ MAL0_ESR = 0x181,
+ MAL0_IER = 0x182,
+ MAL0_TXCASR = 0x184,
+ MAL0_TXCARR = 0x185,
+ MAL0_TXEOBISR = 0x186,
+ MAL0_TXDEIR = 0x187,
+ MAL0_RXCASR = 0x190,
+ MAL0_RXCARR = 0x191,
+ MAL0_RXEOBISR = 0x192,
+ MAL0_RXDEIR = 0x193,
+ MAL0_TXCTP0R = 0x1A0,
+ MAL0_RXCTP0R = 0x1C0,
+ MAL0_RCBS0 = 0x1E0,
+ MAL0_RCBS1 = 0x1E1,
+};
+
+typedef struct ppc4xx_mal_t ppc4xx_mal_t;
+struct ppc4xx_mal_t {
+ qemu_irq irqs[4];
+ uint32_t cfg;
+ uint32_t esr;
+ uint32_t ier;
+ uint32_t txcasr;
+ uint32_t txcarr;
+ uint32_t txeobisr;
+ uint32_t txdeir;
+ uint32_t rxcasr;
+ uint32_t rxcarr;
+ uint32_t rxeobisr;
+ uint32_t rxdeir;
+ uint32_t *txctpr;
+ uint32_t *rxctpr;
+ uint32_t *rcbs;
+ uint8_t txcnum;
+ uint8_t rxcnum;
+};
+
+static void ppc4xx_mal_reset(void *opaque)
+{
+ ppc4xx_mal_t *mal;
+
+ mal = opaque;
+ mal->cfg = 0x0007C000;
+ mal->esr = 0x00000000;
+ mal->ier = 0x00000000;
+ mal->rxcasr = 0x00000000;
+ mal->rxdeir = 0x00000000;
+ mal->rxeobisr = 0x00000000;
+ mal->txcasr = 0x00000000;
+ mal->txdeir = 0x00000000;
+ mal->txeobisr = 0x00000000;
+}
+
+static uint32_t dcr_read_mal(void *opaque, int dcrn)
+{
+ ppc4xx_mal_t *mal;
+ uint32_t ret;
+
+ mal = opaque;
+ switch (dcrn) {
+ case MAL0_CFG:
+ ret = mal->cfg;
+ break;
+ case MAL0_ESR:
+ ret = mal->esr;
+ break;
+ case MAL0_IER:
+ ret = mal->ier;
+ break;
+ case MAL0_TXCASR:
+ ret = mal->txcasr;
+ break;
+ case MAL0_TXCARR:
+ ret = mal->txcarr;
+ break;
+ case MAL0_TXEOBISR:
+ ret = mal->txeobisr;
+ break;
+ case MAL0_TXDEIR:
+ ret = mal->txdeir;
+ break;
+ case MAL0_RXCASR:
+ ret = mal->rxcasr;
+ break;
+ case MAL0_RXCARR:
+ ret = mal->rxcarr;
+ break;
+ case MAL0_RXEOBISR:
+ ret = mal->rxeobisr;
+ break;
+ case MAL0_RXDEIR:
+ ret = mal->rxdeir;
+ break;
+ default:
+ ret = 0;
+ break;
+ }
+ if (dcrn >= MAL0_TXCTP0R && dcrn < MAL0_TXCTP0R + mal->txcnum) {
+ ret = mal->txctpr[dcrn - MAL0_TXCTP0R];
+ }
+ if (dcrn >= MAL0_RXCTP0R && dcrn < MAL0_RXCTP0R + mal->rxcnum) {
+ ret = mal->rxctpr[dcrn - MAL0_RXCTP0R];
+ }
+ if (dcrn >= MAL0_RCBS0 && dcrn < MAL0_RCBS0 + mal->rxcnum) {
+ ret = mal->rcbs[dcrn - MAL0_RCBS0];
+ }
+
+ return ret;
+}
+
+static void dcr_write_mal(void *opaque, int dcrn, uint32_t val)
+{
+ ppc4xx_mal_t *mal;
+
+ mal = opaque;
+ switch (dcrn) {
+ case MAL0_CFG:
+ if (val & 0x80000000) {
+ ppc4xx_mal_reset(mal);
+ }
+ mal->cfg = val & 0x00FFC087;
+ break;
+ case MAL0_ESR:
+ /* Read/clear */
+ mal->esr &= ~val;
+ break;
+ case MAL0_IER:
+ mal->ier = val & 0x0000001F;
+ break;
+ case MAL0_TXCASR:
+ mal->txcasr = val & 0xF0000000;
+ break;
+ case MAL0_TXCARR:
+ mal->txcarr = val & 0xF0000000;
+ break;
+ case MAL0_TXEOBISR:
+ /* Read/clear */
+ mal->txeobisr &= ~val;
+ break;
+ case MAL0_TXDEIR:
+ /* Read/clear */
+ mal->txdeir &= ~val;
+ break;
+ case MAL0_RXCASR:
+ mal->rxcasr = val & 0xC0000000;
+ break;
+ case MAL0_RXCARR:
+ mal->rxcarr = val & 0xC0000000;
+ break;
+ case MAL0_RXEOBISR:
+ /* Read/clear */
+ mal->rxeobisr &= ~val;
+ break;
+ case MAL0_RXDEIR:
+ /* Read/clear */
+ mal->rxdeir &= ~val;
+ break;
+ }
+ if (dcrn >= MAL0_TXCTP0R && dcrn < MAL0_TXCTP0R + mal->txcnum) {
+ mal->txctpr[dcrn - MAL0_TXCTP0R] = val;
+ }
+ if (dcrn >= MAL0_RXCTP0R && dcrn < MAL0_RXCTP0R + mal->rxcnum) {
+ mal->rxctpr[dcrn - MAL0_RXCTP0R] = val;
+ }
+ if (dcrn >= MAL0_RCBS0 && dcrn < MAL0_RCBS0 + mal->rxcnum) {
+ mal->rcbs[dcrn - MAL0_RCBS0] = val & 0x000000FF;
+ }
+}
+
+void ppc4xx_mal_init(CPUPPCState *env, uint8_t txcnum, uint8_t rxcnum,
+ qemu_irq irqs[4])
+{
+ ppc4xx_mal_t *mal;
+ int i;
+
+ assert(txcnum <= 32 && rxcnum <= 32);
+ mal = g_malloc0(sizeof(*mal));
+ mal->txcnum = txcnum;
+ mal->rxcnum = rxcnum;
+ mal->txctpr = g_new0(uint32_t, txcnum);
+ mal->rxctpr = g_new0(uint32_t, rxcnum);
+ mal->rcbs = g_new0(uint32_t, rxcnum);
+ for (i = 0; i < 4; i++) {
+ mal->irqs[i] = irqs[i];
+ }
+ qemu_register_reset(&ppc4xx_mal_reset, mal);
+ ppc_dcr_register(env, MAL0_CFG,
+ mal, &dcr_read_mal, &dcr_write_mal);
+ ppc_dcr_register(env, MAL0_ESR,
+ mal, &dcr_read_mal, &dcr_write_mal);
+ ppc_dcr_register(env, MAL0_IER,
+ mal, &dcr_read_mal, &dcr_write_mal);
+ ppc_dcr_register(env, MAL0_TXCASR,
+ mal, &dcr_read_mal, &dcr_write_mal);
+ ppc_dcr_register(env, MAL0_TXCARR,
+ mal, &dcr_read_mal, &dcr_write_mal);
+ ppc_dcr_register(env, MAL0_TXEOBISR,
+ mal, &dcr_read_mal, &dcr_write_mal);
+ ppc_dcr_register(env, MAL0_TXDEIR,
+ mal, &dcr_read_mal, &dcr_write_mal);
+ ppc_dcr_register(env, MAL0_RXCASR,
+ mal, &dcr_read_mal, &dcr_write_mal);
+ ppc_dcr_register(env, MAL0_RXCARR,
+ mal, &dcr_read_mal, &dcr_write_mal);
+ ppc_dcr_register(env, MAL0_RXEOBISR,
+ mal, &dcr_read_mal, &dcr_write_mal);
+ ppc_dcr_register(env, MAL0_RXDEIR,
+ mal, &dcr_read_mal, &dcr_write_mal);
+ for (i = 0; i < txcnum; i++) {
+ ppc_dcr_register(env, MAL0_TXCTP0R + i,
+ mal, &dcr_read_mal, &dcr_write_mal);
+ }
+ for (i = 0; i < rxcnum; i++) {
+ ppc_dcr_register(env, MAL0_RXCTP0R + i,
+ mal, &dcr_read_mal, &dcr_write_mal);
+ }
+ for (i = 0; i < rxcnum; i++) {
+ ppc_dcr_register(env, MAL0_RCBS0 + i,
+ mal, &dcr_read_mal, &dcr_write_mal);
+ }
+}
diff --git a/hw/ppc/ppc4xx_pci.c b/hw/ppc/ppc4xx_pci.c
new file mode 100644
index 000000000..304a29349
--- /dev/null
+++ b/hw/ppc/ppc4xx_pci.c
@@ -0,0 +1,389 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+/* This file implements emulation of the 32-bit PCI controller found in some
+ * 4xx SoCs, such as the 440EP. */
+
+#include "qemu/osdep.h"
+#include "hw/irq.h"
+#include "hw/ppc/ppc.h"
+#include "hw/ppc/ppc4xx.h"
+#include "migration/vmstate.h"
+#include "qemu/module.h"
+#include "sysemu/reset.h"
+#include "hw/pci/pci.h"
+#include "hw/pci/pci_host.h"
+#include "trace.h"
+#include "qom/object.h"
+
+struct PCIMasterMap {
+ uint32_t la;
+ uint32_t ma;
+ uint32_t pcila;
+ uint32_t pciha;
+};
+
+struct PCITargetMap {
+ uint32_t ms;
+ uint32_t la;
+};
+
+OBJECT_DECLARE_SIMPLE_TYPE(PPC4xxPCIState, PPC4xx_PCI_HOST_BRIDGE)
+
+#define PPC4xx_PCI_NR_PMMS 3
+#define PPC4xx_PCI_NR_PTMS 2
+
+#define PPC4xx_PCI_NUM_DEVS 5
+
+struct PPC4xxPCIState {
+ PCIHostState parent_obj;
+
+ struct PCIMasterMap pmm[PPC4xx_PCI_NR_PMMS];
+ struct PCITargetMap ptm[PPC4xx_PCI_NR_PTMS];
+ qemu_irq irq[PPC4xx_PCI_NUM_DEVS];
+
+ MemoryRegion container;
+ MemoryRegion iomem;
+};
+
+#define PCIC0_CFGADDR 0x0
+#define PCIC0_CFGDATA 0x4
+
+/* PLB Memory Map (PMM) registers specify which PLB addresses are translated to
+ * PCI accesses. */
+#define PCIL0_PMM0LA 0x0
+#define PCIL0_PMM0MA 0x4
+#define PCIL0_PMM0PCILA 0x8
+#define PCIL0_PMM0PCIHA 0xc
+#define PCIL0_PMM1LA 0x10
+#define PCIL0_PMM1MA 0x14
+#define PCIL0_PMM1PCILA 0x18
+#define PCIL0_PMM1PCIHA 0x1c
+#define PCIL0_PMM2LA 0x20
+#define PCIL0_PMM2MA 0x24
+#define PCIL0_PMM2PCILA 0x28
+#define PCIL0_PMM2PCIHA 0x2c
+
+/* PCI Target Map (PTM) registers specify which PCI addresses are translated to
+ * PLB accesses. */
+#define PCIL0_PTM1MS 0x30
+#define PCIL0_PTM1LA 0x34
+#define PCIL0_PTM2MS 0x38
+#define PCIL0_PTM2LA 0x3c
+#define PCI_REG_BASE 0x800000
+#define PCI_REG_SIZE 0x40
+
+#define PCI_ALL_SIZE (PCI_REG_BASE + PCI_REG_SIZE)
+
+static void ppc4xx_pci_reg_write4(void *opaque, hwaddr offset,
+ uint64_t value, unsigned size)
+{
+ struct PPC4xxPCIState *pci = opaque;
+
+ /* We ignore all target attempts at PCI configuration, effectively
+ * assuming a bidirectional 1:1 mapping of PLB and PCI space. */
+
+ switch (offset) {
+ case PCIL0_PMM0LA:
+ pci->pmm[0].la = value;
+ break;
+ case PCIL0_PMM0MA:
+ pci->pmm[0].ma = value;
+ break;
+ case PCIL0_PMM0PCIHA:
+ pci->pmm[0].pciha = value;
+ break;
+ case PCIL0_PMM0PCILA:
+ pci->pmm[0].pcila = value;
+ break;
+
+ case PCIL0_PMM1LA:
+ pci->pmm[1].la = value;
+ break;
+ case PCIL0_PMM1MA:
+ pci->pmm[1].ma = value;
+ break;
+ case PCIL0_PMM1PCIHA:
+ pci->pmm[1].pciha = value;
+ break;
+ case PCIL0_PMM1PCILA:
+ pci->pmm[1].pcila = value;
+ break;
+
+ case PCIL0_PMM2LA:
+ pci->pmm[2].la = value;
+ break;
+ case PCIL0_PMM2MA:
+ pci->pmm[2].ma = value;
+ break;
+ case PCIL0_PMM2PCIHA:
+ pci->pmm[2].pciha = value;
+ break;
+ case PCIL0_PMM2PCILA:
+ pci->pmm[2].pcila = value;
+ break;
+
+ case PCIL0_PTM1MS:
+ pci->ptm[0].ms = value;
+ break;
+ case PCIL0_PTM1LA:
+ pci->ptm[0].la = value;
+ break;
+ case PCIL0_PTM2MS:
+ pci->ptm[1].ms = value;
+ break;
+ case PCIL0_PTM2LA:
+ pci->ptm[1].la = value;
+ break;
+
+ default:
+ printf("%s: unhandled PCI internal register 0x%lx\n", __func__,
+ (unsigned long)offset);
+ break;
+ }
+}
+
+static uint64_t ppc4xx_pci_reg_read4(void *opaque, hwaddr offset,
+ unsigned size)
+{
+ struct PPC4xxPCIState *pci = opaque;
+ uint32_t value;
+
+ switch (offset) {
+ case PCIL0_PMM0LA:
+ value = pci->pmm[0].la;
+ break;
+ case PCIL0_PMM0MA:
+ value = pci->pmm[0].ma;
+ break;
+ case PCIL0_PMM0PCIHA:
+ value = pci->pmm[0].pciha;
+ break;
+ case PCIL0_PMM0PCILA:
+ value = pci->pmm[0].pcila;
+ break;
+
+ case PCIL0_PMM1LA:
+ value = pci->pmm[1].la;
+ break;
+ case PCIL0_PMM1MA:
+ value = pci->pmm[1].ma;
+ break;
+ case PCIL0_PMM1PCIHA:
+ value = pci->pmm[1].pciha;
+ break;
+ case PCIL0_PMM1PCILA:
+ value = pci->pmm[1].pcila;
+ break;
+
+ case PCIL0_PMM2LA:
+ value = pci->pmm[2].la;
+ break;
+ case PCIL0_PMM2MA:
+ value = pci->pmm[2].ma;
+ break;
+ case PCIL0_PMM2PCIHA:
+ value = pci->pmm[2].pciha;
+ break;
+ case PCIL0_PMM2PCILA:
+ value = pci->pmm[2].pcila;
+ break;
+
+ case PCIL0_PTM1MS:
+ value = pci->ptm[0].ms;
+ break;
+ case PCIL0_PTM1LA:
+ value = pci->ptm[0].la;
+ break;
+ case PCIL0_PTM2MS:
+ value = pci->ptm[1].ms;
+ break;
+ case PCIL0_PTM2LA:
+ value = pci->ptm[1].la;
+ break;
+
+ default:
+ printf("%s: invalid PCI internal register 0x%lx\n", __func__,
+ (unsigned long)offset);
+ value = 0;
+ }
+
+ return value;
+}
+
+static const MemoryRegionOps pci_reg_ops = {
+ .read = ppc4xx_pci_reg_read4,
+ .write = ppc4xx_pci_reg_write4,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static void ppc4xx_pci_reset(void *opaque)
+{
+ struct PPC4xxPCIState *pci = opaque;
+
+ memset(pci->pmm, 0, sizeof(pci->pmm));
+ memset(pci->ptm, 0, sizeof(pci->ptm));
+}
+
+/* On Bamboo, all pins from each slot are tied to a single board IRQ. This
+ * may need further refactoring for other boards. */
+static int ppc4xx_pci_map_irq(PCIDevice *pci_dev, int irq_num)
+{
+ int slot = PCI_SLOT(pci_dev->devfn);
+
+ trace_ppc4xx_pci_map_irq(pci_dev->devfn, irq_num, slot);
+
+ return slot > 0 ? slot - 1 : PPC4xx_PCI_NUM_DEVS - 1;
+}
+
+static void ppc4xx_pci_set_irq(void *opaque, int irq_num, int level)
+{
+ qemu_irq *pci_irqs = opaque;
+
+ trace_ppc4xx_pci_set_irq(irq_num);
+ assert(irq_num >= 0 && irq_num < PPC4xx_PCI_NUM_DEVS);
+ qemu_set_irq(pci_irqs[irq_num], level);
+}
+
+static const VMStateDescription vmstate_pci_master_map = {
+ .name = "pci_master_map",
+ .version_id = 0,
+ .minimum_version_id = 0,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT32(la, struct PCIMasterMap),
+ VMSTATE_UINT32(ma, struct PCIMasterMap),
+ VMSTATE_UINT32(pcila, struct PCIMasterMap),
+ VMSTATE_UINT32(pciha, struct PCIMasterMap),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static const VMStateDescription vmstate_pci_target_map = {
+ .name = "pci_target_map",
+ .version_id = 0,
+ .minimum_version_id = 0,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT32(ms, struct PCITargetMap),
+ VMSTATE_UINT32(la, struct PCITargetMap),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static const VMStateDescription vmstate_ppc4xx_pci = {
+ .name = "ppc4xx_pci",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_STRUCT_ARRAY(pmm, PPC4xxPCIState, PPC4xx_PCI_NR_PMMS, 1,
+ vmstate_pci_master_map,
+ struct PCIMasterMap),
+ VMSTATE_STRUCT_ARRAY(ptm, PPC4xxPCIState, PPC4xx_PCI_NR_PTMS, 1,
+ vmstate_pci_target_map,
+ struct PCITargetMap),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+/* XXX Interrupt acknowledge cycles not supported. */
+static void ppc4xx_pcihost_realize(DeviceState *dev, Error **errp)
+{
+ SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
+ PPC4xxPCIState *s;
+ PCIHostState *h;
+ PCIBus *b;
+ int i;
+
+ h = PCI_HOST_BRIDGE(dev);
+ s = PPC4xx_PCI_HOST_BRIDGE(dev);
+
+ for (i = 0; i < ARRAY_SIZE(s->irq); i++) {
+ sysbus_init_irq(sbd, &s->irq[i]);
+ }
+
+ b = pci_register_root_bus(dev, NULL, ppc4xx_pci_set_irq,
+ ppc4xx_pci_map_irq, s->irq, get_system_memory(),
+ get_system_io(), 0, ARRAY_SIZE(s->irq),
+ TYPE_PCI_BUS);
+ h->bus = b;
+
+ pci_create_simple(b, 0, "ppc4xx-host-bridge");
+
+ /* XXX split into 2 memory regions, one for config space, one for regs */
+ memory_region_init(&s->container, OBJECT(s), "pci-container", PCI_ALL_SIZE);
+ memory_region_init_io(&h->conf_mem, OBJECT(s), &pci_host_conf_le_ops, h,
+ "pci-conf-idx", 4);
+ memory_region_init_io(&h->data_mem, OBJECT(s), &pci_host_data_le_ops, h,
+ "pci-conf-data", 4);
+ memory_region_init_io(&s->iomem, OBJECT(s), &pci_reg_ops, s,
+ "pci.reg", PCI_REG_SIZE);
+ memory_region_add_subregion(&s->container, PCIC0_CFGADDR, &h->conf_mem);
+ memory_region_add_subregion(&s->container, PCIC0_CFGDATA, &h->data_mem);
+ memory_region_add_subregion(&s->container, PCI_REG_BASE, &s->iomem);
+ sysbus_init_mmio(sbd, &s->container);
+ qemu_register_reset(ppc4xx_pci_reset, s);
+}
+
+static void ppc4xx_host_bridge_class_init(ObjectClass *klass, void *data)
+{
+ PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ dc->desc = "Host bridge";
+ k->vendor_id = PCI_VENDOR_ID_IBM;
+ k->device_id = PCI_DEVICE_ID_IBM_440GX;
+ k->class_id = PCI_CLASS_BRIDGE_OTHER;
+ /*
+ * PCI-facing part of the host bridge, not usable without the
+ * host-facing part, which can't be device_add'ed, yet.
+ */
+ dc->user_creatable = false;
+}
+
+static const TypeInfo ppc4xx_host_bridge_info = {
+ .name = "ppc4xx-host-bridge",
+ .parent = TYPE_PCI_DEVICE,
+ .instance_size = sizeof(PCIDevice),
+ .class_init = ppc4xx_host_bridge_class_init,
+ .interfaces = (InterfaceInfo[]) {
+ { INTERFACE_CONVENTIONAL_PCI_DEVICE },
+ { },
+ },
+};
+
+static void ppc4xx_pcihost_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ dc->realize = ppc4xx_pcihost_realize;
+ dc->vmsd = &vmstate_ppc4xx_pci;
+}
+
+static const TypeInfo ppc4xx_pcihost_info = {
+ .name = TYPE_PPC4xx_PCI_HOST_BRIDGE,
+ .parent = TYPE_PCI_HOST_BRIDGE,
+ .instance_size = sizeof(PPC4xxPCIState),
+ .class_init = ppc4xx_pcihost_class_init,
+};
+
+static void ppc4xx_pci_register_types(void)
+{
+ type_register_static(&ppc4xx_pcihost_info);
+ type_register_static(&ppc4xx_host_bridge_info);
+}
+
+type_init(ppc4xx_pci_register_types)
diff --git a/hw/ppc/ppc_booke.c b/hw/ppc/ppc_booke.c
new file mode 100644
index 000000000..10b643861
--- /dev/null
+++ b/hw/ppc/ppc_booke.c
@@ -0,0 +1,369 @@
+/*
+ * QEMU PowerPC Booke hardware System Emulator
+ *
+ * Copyright (c) 2011 AdaCore
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "hw/ppc/ppc.h"
+#include "qemu/timer.h"
+#include "sysemu/reset.h"
+#include "sysemu/runstate.h"
+#include "hw/loader.h"
+#include "kvm_ppc.h"
+
+
+/* Timer Control Register */
+
+#define TCR_WP_SHIFT 30 /* Watchdog Timer Period */
+#define TCR_WP_MASK (0x3U << TCR_WP_SHIFT)
+#define TCR_WRC_SHIFT 28 /* Watchdog Timer Reset Control */
+#define TCR_WRC_MASK (0x3U << TCR_WRC_SHIFT)
+#define TCR_WIE (1U << 27) /* Watchdog Timer Interrupt Enable */
+#define TCR_DIE (1U << 26) /* Decrementer Interrupt Enable */
+#define TCR_FP_SHIFT 24 /* Fixed-Interval Timer Period */
+#define TCR_FP_MASK (0x3U << TCR_FP_SHIFT)
+#define TCR_FIE (1U << 23) /* Fixed-Interval Timer Interrupt Enable */
+#define TCR_ARE (1U << 22) /* Auto-Reload Enable */
+
+/* Timer Control Register (e500 specific fields) */
+
+#define TCR_E500_FPEXT_SHIFT 13 /* Fixed-Interval Timer Period Extension */
+#define TCR_E500_FPEXT_MASK (0xf << TCR_E500_FPEXT_SHIFT)
+#define TCR_E500_WPEXT_SHIFT 17 /* Watchdog Timer Period Extension */
+#define TCR_E500_WPEXT_MASK (0xf << TCR_E500_WPEXT_SHIFT)
+
+/* Timer Status Register */
+
+#define TSR_FIS (1U << 26) /* Fixed-Interval Timer Interrupt Status */
+#define TSR_DIS (1U << 27) /* Decrementer Interrupt Status */
+#define TSR_WRS_SHIFT 28 /* Watchdog Timer Reset Status */
+#define TSR_WRS_MASK (0x3U << TSR_WRS_SHIFT)
+#define TSR_WIS (1U << 30) /* Watchdog Timer Interrupt Status */
+#define TSR_ENW (1U << 31) /* Enable Next Watchdog Timer */
+
+typedef struct booke_timer_t booke_timer_t;
+struct booke_timer_t {
+
+ uint64_t fit_next;
+ QEMUTimer *fit_timer;
+
+ uint64_t wdt_next;
+ QEMUTimer *wdt_timer;
+
+ uint32_t flags;
+};
+
+static void booke_update_irq(PowerPCCPU *cpu)
+{
+ CPUPPCState *env = &cpu->env;
+
+ ppc_set_irq(cpu, PPC_INTERRUPT_DECR,
+ (env->spr[SPR_BOOKE_TSR] & TSR_DIS
+ && env->spr[SPR_BOOKE_TCR] & TCR_DIE));
+
+ ppc_set_irq(cpu, PPC_INTERRUPT_WDT,
+ (env->spr[SPR_BOOKE_TSR] & TSR_WIS
+ && env->spr[SPR_BOOKE_TCR] & TCR_WIE));
+
+ ppc_set_irq(cpu, PPC_INTERRUPT_FIT,
+ (env->spr[SPR_BOOKE_TSR] & TSR_FIS
+ && env->spr[SPR_BOOKE_TCR] & TCR_FIE));
+}
+
+/* Return the location of the bit of time base at which the FIT will raise an
+ interrupt */
+static uint8_t booke_get_fit_target(CPUPPCState *env, ppc_tb_t *tb_env)
+{
+ uint8_t fp = (env->spr[SPR_BOOKE_TCR] & TCR_FP_MASK) >> TCR_FP_SHIFT;
+
+ if (tb_env->flags & PPC_TIMER_E500) {
+ /* e500 Fixed-interval timer period extension */
+ uint32_t fpext = (env->spr[SPR_BOOKE_TCR] & TCR_E500_FPEXT_MASK)
+ >> TCR_E500_FPEXT_SHIFT;
+ fp = 63 - (fp | fpext << 2);
+ } else {
+ fp = env->fit_period[fp];
+ }
+
+ return fp;
+}
+
+/* Return the location of the bit of time base at which the WDT will raise an
+ interrupt */
+static uint8_t booke_get_wdt_target(CPUPPCState *env, ppc_tb_t *tb_env)
+{
+ uint8_t wp = (env->spr[SPR_BOOKE_TCR] & TCR_WP_MASK) >> TCR_WP_SHIFT;
+
+ if (tb_env->flags & PPC_TIMER_E500) {
+ /* e500 Watchdog timer period extension */
+ uint32_t wpext = (env->spr[SPR_BOOKE_TCR] & TCR_E500_WPEXT_MASK)
+ >> TCR_E500_WPEXT_SHIFT;
+ wp = 63 - (wp | wpext << 2);
+ } else {
+ wp = env->wdt_period[wp];
+ }
+
+ return wp;
+}
+
+static void booke_update_fixed_timer(CPUPPCState *env,
+ uint8_t target_bit,
+ uint64_t *next,
+ QEMUTimer *timer,
+ int tsr_bit)
+{
+ ppc_tb_t *tb_env = env->tb_env;
+ uint64_t delta_tick, ticks = 0;
+ uint64_t tb;
+ uint64_t period;
+ uint64_t now;
+
+ if (!(env->spr[SPR_BOOKE_TSR] & tsr_bit)) {
+ /*
+ * Don't arm the timer again when the guest has the current
+ * interrupt still pending. Wait for it to ack it.
+ */
+ return;
+ }
+
+ now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+ tb = cpu_ppc_get_tb(tb_env, now, tb_env->tb_offset);
+ period = 1ULL << target_bit;
+ delta_tick = period - (tb & (period - 1));
+
+ /* the timer triggers only when the selected bit toggles from 0 to 1 */
+ if (tb & period) {
+ ticks = period;
+ }
+
+ if (ticks + delta_tick < ticks) {
+ /* Overflow, so assume the biggest number we can express. */
+ ticks = UINT64_MAX;
+ } else {
+ ticks += delta_tick;
+ }
+
+ *next = now + muldiv64(ticks, NANOSECONDS_PER_SECOND, tb_env->tb_freq);
+ if ((*next < now) || (*next > INT64_MAX)) {
+ /* Overflow, so assume the biggest number the qemu timer supports. */
+ *next = INT64_MAX;
+ }
+
+ /* XXX: If expire time is now. We can't run the callback because we don't
+ * have access to it. So we just set the timer one nanosecond later.
+ */
+
+ if (*next == now) {
+ (*next)++;
+ } else {
+ /*
+ * There's no point to fake any granularity that's more fine grained
+ * than milliseconds. Anything beyond that just overloads the system.
+ */
+ *next = MAX(*next, now + SCALE_MS);
+ }
+
+ /* Fire the next timer */
+ timer_mod(timer, *next);
+}
+
+static void booke_decr_cb(void *opaque)
+{
+ PowerPCCPU *cpu = opaque;
+ CPUPPCState *env = &cpu->env;
+
+ env->spr[SPR_BOOKE_TSR] |= TSR_DIS;
+ booke_update_irq(cpu);
+
+ if (env->spr[SPR_BOOKE_TCR] & TCR_ARE) {
+ /* Do not reload 0, it is already there. It would just trigger
+ * the timer again and lead to infinite loop */
+ if (env->spr[SPR_BOOKE_DECAR] != 0) {
+ /* Auto Reload */
+ cpu_ppc_store_decr(env, env->spr[SPR_BOOKE_DECAR]);
+ }
+ }
+}
+
+static void booke_fit_cb(void *opaque)
+{
+ PowerPCCPU *cpu = opaque;
+ CPUPPCState *env = &cpu->env;
+ ppc_tb_t *tb_env;
+ booke_timer_t *booke_timer;
+
+ tb_env = env->tb_env;
+ booke_timer = tb_env->opaque;
+ env->spr[SPR_BOOKE_TSR] |= TSR_FIS;
+
+ booke_update_irq(cpu);
+
+ booke_update_fixed_timer(env,
+ booke_get_fit_target(env, tb_env),
+ &booke_timer->fit_next,
+ booke_timer->fit_timer,
+ TSR_FIS);
+}
+
+static void booke_wdt_cb(void *opaque)
+{
+ PowerPCCPU *cpu = opaque;
+ CPUPPCState *env = &cpu->env;
+ ppc_tb_t *tb_env;
+ booke_timer_t *booke_timer;
+
+ tb_env = env->tb_env;
+ booke_timer = tb_env->opaque;
+
+ /* TODO: There's lots of complicated stuff to do here */
+
+ booke_update_irq(cpu);
+
+ booke_update_fixed_timer(env,
+ booke_get_wdt_target(env, tb_env),
+ &booke_timer->wdt_next,
+ booke_timer->wdt_timer,
+ TSR_WIS);
+}
+
+void store_booke_tsr(CPUPPCState *env, target_ulong val)
+{
+ PowerPCCPU *cpu = env_archcpu(env);
+ ppc_tb_t *tb_env = env->tb_env;
+ booke_timer_t *booke_timer = tb_env->opaque;
+
+ env->spr[SPR_BOOKE_TSR] &= ~val;
+ kvmppc_clear_tsr_bits(cpu, val);
+
+ if (val & TSR_FIS) {
+ booke_update_fixed_timer(env,
+ booke_get_fit_target(env, tb_env),
+ &booke_timer->fit_next,
+ booke_timer->fit_timer,
+ TSR_FIS);
+ }
+
+ if (val & TSR_WIS) {
+ booke_update_fixed_timer(env,
+ booke_get_wdt_target(env, tb_env),
+ &booke_timer->wdt_next,
+ booke_timer->wdt_timer,
+ TSR_WIS);
+ }
+
+ booke_update_irq(cpu);
+}
+
+void store_booke_tcr(CPUPPCState *env, target_ulong val)
+{
+ PowerPCCPU *cpu = env_archcpu(env);
+ ppc_tb_t *tb_env = env->tb_env;
+ booke_timer_t *booke_timer = tb_env->opaque;
+
+ env->spr[SPR_BOOKE_TCR] = val;
+ kvmppc_set_tcr(cpu);
+
+ booke_update_irq(cpu);
+
+ booke_update_fixed_timer(env,
+ booke_get_fit_target(env, tb_env),
+ &booke_timer->fit_next,
+ booke_timer->fit_timer,
+ TSR_FIS);
+
+ booke_update_fixed_timer(env,
+ booke_get_wdt_target(env, tb_env),
+ &booke_timer->wdt_next,
+ booke_timer->wdt_timer,
+ TSR_WIS);
+}
+
+static void ppc_booke_timer_reset_handle(void *opaque)
+{
+ PowerPCCPU *cpu = opaque;
+ CPUPPCState *env = &cpu->env;
+
+ store_booke_tcr(env, 0);
+ store_booke_tsr(env, -1);
+}
+
+/*
+ * This function will be called whenever the CPU state changes.
+ * CPU states are defined "typedef enum RunState".
+ * Regarding timer, When CPU state changes to running after debug halt
+ * or similar cases which takes time then in between final watchdog
+ * expiry happenes. This will cause exit to QEMU and configured watchdog
+ * action will be taken. To avoid this we always clear the watchdog state when
+ * state changes to running.
+ */
+static void cpu_state_change_handler(void *opaque, bool running, RunState state)
+{
+ PowerPCCPU *cpu = opaque;
+ CPUPPCState *env = &cpu->env;
+
+ if (!running) {
+ return;
+ }
+
+ /*
+ * Clear watchdog interrupt condition by clearing TSR.
+ */
+ store_booke_tsr(env, TSR_ENW | TSR_WIS | TSR_WRS_MASK);
+}
+
+void ppc_booke_timers_init(PowerPCCPU *cpu, uint32_t freq, uint32_t flags)
+{
+ ppc_tb_t *tb_env;
+ booke_timer_t *booke_timer;
+ int ret = 0;
+
+ tb_env = g_malloc0(sizeof(ppc_tb_t));
+ booke_timer = g_malloc0(sizeof(booke_timer_t));
+
+ cpu->env.tb_env = tb_env;
+ tb_env->flags = flags | PPC_TIMER_BOOKE | PPC_DECR_ZERO_TRIGGERED;
+
+ tb_env->tb_freq = freq;
+ tb_env->decr_freq = freq;
+ tb_env->opaque = booke_timer;
+ tb_env->decr_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, &booke_decr_cb, cpu);
+
+ booke_timer->fit_timer =
+ timer_new_ns(QEMU_CLOCK_VIRTUAL, &booke_fit_cb, cpu);
+ booke_timer->wdt_timer =
+ timer_new_ns(QEMU_CLOCK_VIRTUAL, &booke_wdt_cb, cpu);
+
+ ret = kvmppc_booke_watchdog_enable(cpu);
+
+ if (ret) {
+ /* TODO: Start the QEMU emulated watchdog if not running on KVM.
+ * Also start the QEMU emulated watchdog if KVM does not support
+ * emulated watchdog or somehow it is not enabled (supported but
+ * not enabled is though some bug and requires debugging :)).
+ */
+ }
+
+ qemu_add_vm_change_state_handler(cpu_state_change_handler, cpu);
+
+ qemu_register_reset(ppc_booke_timer_reset_handle, cpu);
+}
diff --git a/hw/ppc/ppce500_spin.c b/hw/ppc/ppce500_spin.c
new file mode 100644
index 000000000..d57b19979
--- /dev/null
+++ b/hw/ppc/ppce500_spin.c
@@ -0,0 +1,209 @@
+/*
+ * QEMU PowerPC e500v2 ePAPR spinning code
+ *
+ * Copyright (C) 2011 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: Alexander Graf, <agraf@suse.de>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * This code is not really a device, but models an interface that usually
+ * firmware takes care of. It's used when QEMU plays the role of firmware.
+ *
+ * Specification:
+ *
+ * https://www.power.org/resources/downloads/Power_ePAPR_APPROVED_v1.1.pdf
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/module.h"
+#include "qemu/units.h"
+#include "hw/hw.h"
+#include "hw/sysbus.h"
+#include "sysemu/hw_accel.h"
+#include "e500.h"
+#include "qom/object.h"
+
+#define MAX_CPUS 32
+
+typedef struct spin_info {
+ uint64_t addr;
+ uint64_t r3;
+ uint32_t resv;
+ uint32_t pir;
+ uint64_t reserved;
+} QEMU_PACKED SpinInfo;
+
+#define TYPE_E500_SPIN "e500-spin"
+OBJECT_DECLARE_SIMPLE_TYPE(SpinState, E500_SPIN)
+
+struct SpinState {
+ SysBusDevice parent_obj;
+
+ MemoryRegion iomem;
+ SpinInfo spin[MAX_CPUS];
+};
+
+static void spin_reset(DeviceState *dev)
+{
+ SpinState *s = E500_SPIN(dev);
+ int i;
+
+ for (i = 0; i < MAX_CPUS; i++) {
+ SpinInfo *info = &s->spin[i];
+
+ stl_p(&info->pir, i);
+ stq_p(&info->r3, i);
+ stq_p(&info->addr, 1);
+ }
+}
+
+static void mmubooke_create_initial_mapping(CPUPPCState *env,
+ target_ulong va,
+ hwaddr pa,
+ hwaddr len)
+{
+ ppcmas_tlb_t *tlb = booke206_get_tlbm(env, 1, 0, 1);
+ hwaddr size;
+
+ size = (booke206_page_size_to_tlb(len) << MAS1_TSIZE_SHIFT);
+ tlb->mas1 = MAS1_VALID | size;
+ tlb->mas2 = (va & TARGET_PAGE_MASK) | MAS2_M;
+ tlb->mas7_3 = pa & TARGET_PAGE_MASK;
+ tlb->mas7_3 |= MAS3_UR | MAS3_UW | MAS3_UX | MAS3_SR | MAS3_SW | MAS3_SX;
+ env->tlb_dirty = true;
+}
+
+static void spin_kick(CPUState *cs, run_on_cpu_data data)
+{
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
+ CPUPPCState *env = &cpu->env;
+ SpinInfo *curspin = data.host_ptr;
+ hwaddr map_size = 64 * MiB;
+ hwaddr map_start;
+
+ cpu_synchronize_state(cs);
+ stl_p(&curspin->pir, env->spr[SPR_BOOKE_PIR]);
+ env->nip = ldq_p(&curspin->addr) & (map_size - 1);
+ env->gpr[3] = ldq_p(&curspin->r3);
+ env->gpr[4] = 0;
+ env->gpr[5] = 0;
+ env->gpr[6] = 0;
+ env->gpr[7] = map_size;
+ env->gpr[8] = 0;
+ env->gpr[9] = 0;
+
+ map_start = ldq_p(&curspin->addr) & ~(map_size - 1);
+ mmubooke_create_initial_mapping(env, 0, map_start, map_size);
+
+ cs->halted = 0;
+ cs->exception_index = -1;
+ cs->stopped = false;
+ qemu_cpu_kick(cs);
+}
+
+static void spin_write(void *opaque, hwaddr addr, uint64_t value,
+ unsigned len)
+{
+ SpinState *s = opaque;
+ int env_idx = addr / sizeof(SpinInfo);
+ CPUState *cpu;
+ SpinInfo *curspin = &s->spin[env_idx];
+ uint8_t *curspin_p = (uint8_t*)curspin;
+
+ cpu = qemu_get_cpu(env_idx);
+ if (cpu == NULL) {
+ /* Unknown CPU */
+ return;
+ }
+
+ if (cpu->cpu_index == 0) {
+ /* primary CPU doesn't spin */
+ return;
+ }
+
+ curspin_p = &curspin_p[addr % sizeof(SpinInfo)];
+ switch (len) {
+ case 1:
+ stb_p(curspin_p, value);
+ break;
+ case 2:
+ stw_p(curspin_p, value);
+ break;
+ case 4:
+ stl_p(curspin_p, value);
+ break;
+ }
+
+ if (!(ldq_p(&curspin->addr) & 1)) {
+ /* run CPU */
+ run_on_cpu(cpu, spin_kick, RUN_ON_CPU_HOST_PTR(curspin));
+ }
+}
+
+static uint64_t spin_read(void *opaque, hwaddr addr, unsigned len)
+{
+ SpinState *s = opaque;
+ uint8_t *spin_p = &((uint8_t*)s->spin)[addr];
+
+ switch (len) {
+ case 1:
+ return ldub_p(spin_p);
+ case 2:
+ return lduw_p(spin_p);
+ case 4:
+ return ldl_p(spin_p);
+ default:
+ hw_error("ppce500: unexpected %s with len = %u", __func__, len);
+ }
+}
+
+static const MemoryRegionOps spin_rw_ops = {
+ .read = spin_read,
+ .write = spin_write,
+ .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static void ppce500_spin_initfn(Object *obj)
+{
+ SysBusDevice *dev = SYS_BUS_DEVICE(obj);
+ SpinState *s = E500_SPIN(dev);
+
+ memory_region_init_io(&s->iomem, obj, &spin_rw_ops, s,
+ "e500 spin pv device", sizeof(SpinInfo) * MAX_CPUS);
+ sysbus_init_mmio(dev, &s->iomem);
+}
+
+static void ppce500_spin_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ dc->reset = spin_reset;
+}
+
+static const TypeInfo ppce500_spin_info = {
+ .name = TYPE_E500_SPIN,
+ .parent = TYPE_SYS_BUS_DEVICE,
+ .instance_size = sizeof(SpinState),
+ .instance_init = ppce500_spin_initfn,
+ .class_init = ppce500_spin_class_init,
+};
+
+static void ppce500_spin_register_types(void)
+{
+ type_register_static(&ppce500_spin_info);
+}
+
+type_init(ppce500_spin_register_types)
diff --git a/hw/ppc/prep.c b/hw/ppc/prep.c
new file mode 100644
index 000000000..25a2e86b4
--- /dev/null
+++ b/hw/ppc/prep.c
@@ -0,0 +1,440 @@
+/*
+ * QEMU PPC PREP hardware System Emulator
+ *
+ * Copyright (c) 2003-2007 Jocelyn Mayer
+ * Copyright (c) 2017 Hervé Poussineau
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/rtc/m48t59.h"
+#include "hw/char/serial.h"
+#include "hw/block/fdc.h"
+#include "net/net.h"
+#include "hw/isa/isa.h"
+#include "hw/pci/pci.h"
+#include "hw/pci/pci_host.h"
+#include "hw/ppc/ppc.h"
+#include "hw/boards.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "qemu/log.h"
+#include "hw/loader.h"
+#include "hw/rtc/mc146818rtc.h"
+#include "hw/isa/pc87312.h"
+#include "hw/qdev-properties.h"
+#include "sysemu/kvm.h"
+#include "sysemu/reset.h"
+#include "trace.h"
+#include "elf.h"
+#include "qemu/units.h"
+#include "kvm_ppc.h"
+
+/* SMP is not enabled, for now */
+#define MAX_CPUS 1
+
+#define MAX_IDE_BUS 2
+
+#define CFG_ADDR 0xf0000510
+
+#define KERNEL_LOAD_ADDR 0x01000000
+#define INITRD_LOAD_ADDR 0x01800000
+
+#define NVRAM_SIZE 0x2000
+
+static void fw_cfg_boot_set(void *opaque, const char *boot_device,
+ Error **errp)
+{
+ fw_cfg_modify_i16(opaque, FW_CFG_BOOT_DEVICE, boot_device[0]);
+}
+
+static void ppc_prep_reset(void *opaque)
+{
+ PowerPCCPU *cpu = opaque;
+
+ cpu_reset(CPU(cpu));
+}
+
+
+/*****************************************************************************/
+/* NVRAM helpers */
+static inline uint32_t nvram_read(Nvram *nvram, uint32_t addr)
+{
+ NvramClass *k = NVRAM_GET_CLASS(nvram);
+ return (k->read)(nvram, addr);
+}
+
+static inline void nvram_write(Nvram *nvram, uint32_t addr, uint32_t val)
+{
+ NvramClass *k = NVRAM_GET_CLASS(nvram);
+ (k->write)(nvram, addr, val);
+}
+
+static void NVRAM_set_byte(Nvram *nvram, uint32_t addr, uint8_t value)
+{
+ nvram_write(nvram, addr, value);
+}
+
+static uint8_t NVRAM_get_byte(Nvram *nvram, uint32_t addr)
+{
+ return nvram_read(nvram, addr);
+}
+
+static void NVRAM_set_word(Nvram *nvram, uint32_t addr, uint16_t value)
+{
+ nvram_write(nvram, addr, value >> 8);
+ nvram_write(nvram, addr + 1, value & 0xFF);
+}
+
+static uint16_t NVRAM_get_word(Nvram *nvram, uint32_t addr)
+{
+ uint16_t tmp;
+
+ tmp = nvram_read(nvram, addr) << 8;
+ tmp |= nvram_read(nvram, addr + 1);
+
+ return tmp;
+}
+
+static void NVRAM_set_lword(Nvram *nvram, uint32_t addr, uint32_t value)
+{
+ nvram_write(nvram, addr, value >> 24);
+ nvram_write(nvram, addr + 1, (value >> 16) & 0xFF);
+ nvram_write(nvram, addr + 2, (value >> 8) & 0xFF);
+ nvram_write(nvram, addr + 3, value & 0xFF);
+}
+
+static void NVRAM_set_string(Nvram *nvram, uint32_t addr, const char *str,
+ uint32_t max)
+{
+ int i;
+
+ for (i = 0; i < max && str[i] != '\0'; i++) {
+ nvram_write(nvram, addr + i, str[i]);
+ }
+ nvram_write(nvram, addr + i, str[i]);
+ nvram_write(nvram, addr + max - 1, '\0');
+}
+
+static uint16_t NVRAM_crc_update (uint16_t prev, uint16_t value)
+{
+ uint16_t tmp;
+ uint16_t pd, pd1, pd2;
+
+ tmp = prev >> 8;
+ pd = prev ^ value;
+ pd1 = pd & 0x000F;
+ pd2 = ((pd >> 4) & 0x000F) ^ pd1;
+ tmp ^= (pd1 << 3) | (pd1 << 8);
+ tmp ^= pd2 | (pd2 << 7) | (pd2 << 12);
+
+ return tmp;
+}
+
+static uint16_t NVRAM_compute_crc (Nvram *nvram, uint32_t start, uint32_t count)
+{
+ uint32_t i;
+ uint16_t crc = 0xFFFF;
+ int odd;
+
+ odd = count & 1;
+ count &= ~1;
+ for (i = 0; i != count; i++) {
+ crc = NVRAM_crc_update(crc, NVRAM_get_word(nvram, start + i));
+ }
+ if (odd) {
+ crc = NVRAM_crc_update(crc, NVRAM_get_byte(nvram, start + i) << 8);
+ }
+
+ return crc;
+}
+
+#define CMDLINE_ADDR 0x017ff000
+
+static int PPC_NVRAM_set_params (Nvram *nvram, uint16_t NVRAM_size,
+ const char *arch,
+ uint32_t RAM_size, int boot_device,
+ uint32_t kernel_image, uint32_t kernel_size,
+ const char *cmdline,
+ uint32_t initrd_image, uint32_t initrd_size,
+ uint32_t NVRAM_image,
+ int width, int height, int depth)
+{
+ uint16_t crc;
+
+ /* Set parameters for Open Hack'Ware BIOS */
+ NVRAM_set_string(nvram, 0x00, "QEMU_BIOS", 16);
+ NVRAM_set_lword(nvram, 0x10, 0x00000002); /* structure v2 */
+ NVRAM_set_word(nvram, 0x14, NVRAM_size);
+ NVRAM_set_string(nvram, 0x20, arch, 16);
+ NVRAM_set_lword(nvram, 0x30, RAM_size);
+ NVRAM_set_byte(nvram, 0x34, boot_device);
+ NVRAM_set_lword(nvram, 0x38, kernel_image);
+ NVRAM_set_lword(nvram, 0x3C, kernel_size);
+ if (cmdline) {
+ /* XXX: put the cmdline in NVRAM too ? */
+ pstrcpy_targphys("cmdline", CMDLINE_ADDR, RAM_size - CMDLINE_ADDR,
+ cmdline);
+ NVRAM_set_lword(nvram, 0x40, CMDLINE_ADDR);
+ NVRAM_set_lword(nvram, 0x44, strlen(cmdline));
+ } else {
+ NVRAM_set_lword(nvram, 0x40, 0);
+ NVRAM_set_lword(nvram, 0x44, 0);
+ }
+ NVRAM_set_lword(nvram, 0x48, initrd_image);
+ NVRAM_set_lword(nvram, 0x4C, initrd_size);
+ NVRAM_set_lword(nvram, 0x50, NVRAM_image);
+
+ NVRAM_set_word(nvram, 0x54, width);
+ NVRAM_set_word(nvram, 0x56, height);
+ NVRAM_set_word(nvram, 0x58, depth);
+ crc = NVRAM_compute_crc(nvram, 0x00, 0xF8);
+ NVRAM_set_word(nvram, 0xFC, crc);
+
+ return 0;
+}
+
+static int prep_set_cmos_checksum(DeviceState *dev, void *opaque)
+{
+ uint16_t checksum = *(uint16_t *)opaque;
+ ISADevice *rtc;
+
+ if (object_dynamic_cast(OBJECT(dev), TYPE_MC146818_RTC)) {
+ rtc = ISA_DEVICE(dev);
+ rtc_set_memory(rtc, 0x2e, checksum & 0xff);
+ rtc_set_memory(rtc, 0x3e, checksum & 0xff);
+ rtc_set_memory(rtc, 0x2f, checksum >> 8);
+ rtc_set_memory(rtc, 0x3f, checksum >> 8);
+
+ object_property_add_alias(qdev_get_machine(), "rtc-time", OBJECT(rtc),
+ "date");
+ }
+ return 0;
+}
+
+static void ibm_40p_init(MachineState *machine)
+{
+ const char *bios_name = machine->firmware ?: "openbios-ppc";
+ CPUPPCState *env = NULL;
+ uint16_t cmos_checksum;
+ PowerPCCPU *cpu;
+ DeviceState *dev, *i82378_dev;
+ SysBusDevice *pcihost, *s;
+ Nvram *m48t59 = NULL;
+ PCIBus *pci_bus;
+ ISADevice *isa_dev;
+ ISABus *isa_bus;
+ void *fw_cfg;
+ int i;
+ uint32_t kernel_base = 0, initrd_base = 0;
+ long kernel_size = 0, initrd_size = 0;
+ char boot_device;
+
+ /* init CPU */
+ cpu = POWERPC_CPU(cpu_create(machine->cpu_type));
+ env = &cpu->env;
+ if (PPC_INPUT(env) != PPC_FLAGS_INPUT_6xx) {
+ error_report("only 6xx bus is supported on this machine");
+ exit(1);
+ }
+
+ if (env->flags & POWERPC_FLAG_RTC_CLK) {
+ /* POWER / PowerPC 601 RTC clock frequency is 7.8125 MHz */
+ cpu_ppc_tb_init(env, 7812500UL);
+ } else {
+ /* Set time-base frequency to 100 Mhz */
+ cpu_ppc_tb_init(env, 100UL * 1000UL * 1000UL);
+ }
+ qemu_register_reset(ppc_prep_reset, cpu);
+
+ /* PCI host */
+ dev = qdev_new("raven-pcihost");
+ qdev_prop_set_string(dev, "bios-name", bios_name);
+ qdev_prop_set_uint32(dev, "elf-machine", PPC_ELF_MACHINE);
+ pcihost = SYS_BUS_DEVICE(dev);
+ object_property_add_child(qdev_get_machine(), "raven", OBJECT(dev));
+ sysbus_realize_and_unref(pcihost, &error_fatal);
+ pci_bus = PCI_BUS(qdev_get_child_bus(dev, "pci.0"));
+ if (!pci_bus) {
+ error_report("could not create PCI host controller");
+ exit(1);
+ }
+
+ /* PCI -> ISA bridge */
+ i82378_dev = DEVICE(pci_create_simple(pci_bus, PCI_DEVFN(11, 0), "i82378"));
+ qdev_connect_gpio_out(i82378_dev, 0,
+ cpu->env.irq_inputs[PPC6xx_INPUT_INT]);
+ sysbus_connect_irq(pcihost, 0, qdev_get_gpio_in(i82378_dev, 15));
+ isa_bus = ISA_BUS(qdev_get_child_bus(i82378_dev, "isa.0"));
+
+ /* Memory controller */
+ isa_dev = isa_new("rs6000-mc");
+ dev = DEVICE(isa_dev);
+ qdev_prop_set_uint32(dev, "ram-size", machine->ram_size);
+ isa_realize_and_unref(isa_dev, isa_bus, &error_fatal);
+
+ /* RTC */
+ isa_dev = isa_new(TYPE_MC146818_RTC);
+ dev = DEVICE(isa_dev);
+ qdev_prop_set_int32(dev, "base_year", 1900);
+ isa_realize_and_unref(isa_dev, isa_bus, &error_fatal);
+
+ /* initialize CMOS checksums */
+ cmos_checksum = 0x6aa9;
+ qbus_walk_children(BUS(isa_bus), prep_set_cmos_checksum, NULL, NULL, NULL,
+ &cmos_checksum);
+
+ /* add some more devices */
+ if (defaults_enabled()) {
+ m48t59 = NVRAM(isa_create_simple(isa_bus, "isa-m48t59"));
+
+ isa_dev = isa_new("cs4231a");
+ dev = DEVICE(isa_dev);
+ qdev_prop_set_uint32(dev, "iobase", 0x830);
+ qdev_prop_set_uint32(dev, "irq", 10);
+ isa_realize_and_unref(isa_dev, isa_bus, &error_fatal);
+
+ isa_dev = isa_new("pc87312");
+ dev = DEVICE(isa_dev);
+ qdev_prop_set_uint32(dev, "config", 12);
+ isa_realize_and_unref(isa_dev, isa_bus, &error_fatal);
+
+ isa_dev = isa_new("prep-systemio");
+ dev = DEVICE(isa_dev);
+ qdev_prop_set_uint32(dev, "ibm-planar-id", 0xfc);
+ qdev_prop_set_uint32(dev, "equipment", 0xc0);
+ isa_realize_and_unref(isa_dev, isa_bus, &error_fatal);
+
+ dev = DEVICE(pci_create_simple(pci_bus, PCI_DEVFN(1, 0),
+ "lsi53c810"));
+ lsi53c8xx_handle_legacy_cmdline(dev);
+ qdev_connect_gpio_out(dev, 0, qdev_get_gpio_in(i82378_dev, 13));
+
+ /* XXX: s3-trio at PCI_DEVFN(2, 0) */
+ pci_vga_init(pci_bus);
+
+ for (i = 0; i < nb_nics; i++) {
+ pci_nic_init_nofail(&nd_table[i], pci_bus, "pcnet",
+ i == 0 ? "3" : NULL);
+ }
+ }
+
+ /* Prepare firmware configuration for OpenBIOS */
+ dev = qdev_new(TYPE_FW_CFG_MEM);
+ fw_cfg = FW_CFG(dev);
+ qdev_prop_set_uint32(dev, "data_width", 1);
+ qdev_prop_set_bit(dev, "dma_enabled", false);
+ object_property_add_child(OBJECT(qdev_get_machine()), TYPE_FW_CFG,
+ OBJECT(fw_cfg));
+ s = SYS_BUS_DEVICE(dev);
+ sysbus_realize_and_unref(s, &error_fatal);
+ sysbus_mmio_map(s, 0, CFG_ADDR);
+ sysbus_mmio_map(s, 1, CFG_ADDR + 2);
+
+ if (machine->kernel_filename) {
+ /* load kernel */
+ kernel_base = KERNEL_LOAD_ADDR;
+ kernel_size = load_image_targphys(machine->kernel_filename,
+ kernel_base,
+ machine->ram_size - kernel_base);
+ if (kernel_size < 0) {
+ error_report("could not load kernel '%s'",
+ machine->kernel_filename);
+ exit(1);
+ }
+ fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, kernel_base);
+ fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size);
+ /* load initrd */
+ if (machine->initrd_filename) {
+ initrd_base = INITRD_LOAD_ADDR;
+ initrd_size = load_image_targphys(machine->initrd_filename,
+ initrd_base,
+ machine->ram_size - initrd_base);
+ if (initrd_size < 0) {
+ error_report("could not load initial ram disk '%s'",
+ machine->initrd_filename);
+ exit(1);
+ }
+ fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_base);
+ fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size);
+ }
+ if (machine->kernel_cmdline && *machine->kernel_cmdline) {
+ fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_CMDLINE, CMDLINE_ADDR);
+ pstrcpy_targphys("cmdline", CMDLINE_ADDR, TARGET_PAGE_SIZE,
+ machine->kernel_cmdline);
+ fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA,
+ machine->kernel_cmdline);
+ fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE,
+ strlen(machine->kernel_cmdline) + 1);
+ }
+ boot_device = 'm';
+ } else {
+ boot_device = machine->boot_order[0];
+ }
+
+ fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)machine->smp.max_cpus);
+ fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)machine->ram_size);
+ fw_cfg_add_i16(fw_cfg, FW_CFG_MACHINE_ID, ARCH_PREP);
+
+ fw_cfg_add_i16(fw_cfg, FW_CFG_PPC_WIDTH, graphic_width);
+ fw_cfg_add_i16(fw_cfg, FW_CFG_PPC_HEIGHT, graphic_height);
+ fw_cfg_add_i16(fw_cfg, FW_CFG_PPC_DEPTH, graphic_depth);
+
+ fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_IS_KVM, kvm_enabled());
+ if (kvm_enabled()) {
+ uint8_t *hypercall;
+
+ fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, kvmppc_get_tbfreq());
+ hypercall = g_malloc(16);
+ kvmppc_get_hypercall(env, hypercall, 16);
+ fw_cfg_add_bytes(fw_cfg, FW_CFG_PPC_KVM_HC, hypercall, 16);
+ fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_KVM_PID, getpid());
+ } else {
+ fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, NANOSECONDS_PER_SECOND);
+ }
+ fw_cfg_add_i16(fw_cfg, FW_CFG_BOOT_DEVICE, boot_device);
+ qemu_register_boot_set(fw_cfg_boot_set, fw_cfg);
+
+ /* Prepare firmware configuration for Open Hack'Ware */
+ if (m48t59) {
+ PPC_NVRAM_set_params(m48t59, NVRAM_SIZE, "PREP", machine->ram_size,
+ boot_device,
+ kernel_base, kernel_size,
+ machine->kernel_cmdline,
+ initrd_base, initrd_size,
+ /* XXX: need an option to load a NVRAM image */
+ 0,
+ graphic_width, graphic_height, graphic_depth);
+ }
+}
+
+static void ibm_40p_machine_init(MachineClass *mc)
+{
+ mc->desc = "IBM RS/6000 7020 (40p)",
+ mc->init = ibm_40p_init;
+ mc->max_cpus = 1;
+ mc->default_ram_size = 128 * MiB;
+ mc->block_default_type = IF_SCSI;
+ mc->default_boot_order = "c";
+ mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("604");
+ mc->default_display = "std";
+}
+
+DEFINE_MACHINE("40p", ibm_40p_machine_init)
diff --git a/hw/ppc/prep_systemio.c b/hw/ppc/prep_systemio.c
new file mode 100644
index 000000000..b2bd78324
--- /dev/null
+++ b/hw/ppc/prep_systemio.c
@@ -0,0 +1,315 @@
+/*
+ * QEMU PReP System I/O emulation
+ *
+ * Copyright (c) 2017 Hervé Poussineau
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "hw/irq.h"
+#include "hw/isa/isa.h"
+#include "hw/qdev-properties.h"
+#include "migration/vmstate.h"
+#include "exec/address-spaces.h"
+#include "qom/object.h"
+#include "qemu/error-report.h" /* for error_report() */
+#include "qemu/module.h"
+#include "sysemu/runstate.h"
+#include "cpu.h"
+#include "trace.h"
+
+#define TYPE_PREP_SYSTEMIO "prep-systemio"
+OBJECT_DECLARE_SIMPLE_TYPE(PrepSystemIoState, PREP_SYSTEMIO)
+
+/* Bit as defined in PowerPC Reference Plaform v1.1, sect. 6.1.5, p. 132 */
+#define PREP_BIT(n) (1 << (7 - (n)))
+
+struct PrepSystemIoState {
+ ISADevice parent_obj;
+ MemoryRegion ppc_parity_mem;
+
+ qemu_irq non_contiguous_io_map_irq;
+ uint8_t sreset; /* 0x0092 */
+ uint8_t equipment; /* 0x080c */
+ uint8_t system_control; /* 0x081c */
+ uint8_t iomap_type; /* 0x0850 */
+ uint8_t ibm_planar_id; /* 0x0852 */
+ qemu_irq softreset_irq;
+ PortioList portio;
+};
+
+/* PORT 0092 -- Special Port 92 (Read/Write) */
+
+enum {
+ PORT0092_SOFTRESET = PREP_BIT(7),
+ PORT0092_LE_MODE = PREP_BIT(6),
+};
+
+static void prep_port0092_write(void *opaque, uint32_t addr, uint32_t val)
+{
+ PrepSystemIoState *s = opaque;
+
+ trace_prep_systemio_write(addr, val);
+
+ s->sreset = val & PORT0092_SOFTRESET;
+ qemu_set_irq(s->softreset_irq, s->sreset);
+
+ if ((val & PORT0092_LE_MODE) != 0) {
+ /* XXX Not supported yet */
+ error_report("little-endian mode not supported");
+ vm_stop(RUN_STATE_PAUSED);
+ } else {
+ /* Nothing to do */
+ }
+}
+
+static uint32_t prep_port0092_read(void *opaque, uint32_t addr)
+{
+ PrepSystemIoState *s = opaque;
+ trace_prep_systemio_read(addr, s->sreset);
+ return s->sreset;
+}
+
+/* PORT 0808 -- Hardfile Light Register (Write Only) */
+
+enum {
+ PORT0808_HARDFILE_LIGHT_ON = PREP_BIT(7),
+};
+
+static void prep_port0808_write(void *opaque, uint32_t addr, uint32_t val)
+{
+ trace_prep_systemio_write(addr, val);
+}
+
+/* PORT 0810 -- Password Protect 1 Register (Write Only) */
+
+/* reset by port 0x4D in the SIO */
+static void prep_port0810_write(void *opaque, uint32_t addr, uint32_t val)
+{
+ trace_prep_systemio_write(addr, val);
+}
+
+/* PORT 0812 -- Password Protect 2 Register (Write Only) */
+
+/* reset by port 0x4D in the SIO */
+static void prep_port0812_write(void *opaque, uint32_t addr, uint32_t val)
+{
+ trace_prep_systemio_write(addr, val);
+}
+
+/* PORT 0814 -- L2 Invalidate Register (Write Only) */
+
+static void prep_port0814_write(void *opaque, uint32_t addr, uint32_t val)
+{
+ trace_prep_systemio_write(addr, val);
+}
+
+/* PORT 0818 -- Reserved for Keylock (Read Only) */
+
+enum {
+ PORT0818_KEYLOCK_SIGNAL_HIGH = PREP_BIT(7),
+};
+
+static uint32_t prep_port0818_read(void *opaque, uint32_t addr)
+{
+ uint32_t val = 0;
+ trace_prep_systemio_read(addr, val);
+ return val;
+}
+
+/* PORT 080C -- Equipment */
+
+enum {
+ PORT080C_SCSIFUSE = PREP_BIT(1),
+ PORT080C_L2_COPYBACK = PREP_BIT(4),
+ PORT080C_L2_256 = PREP_BIT(5),
+ PORT080C_UPGRADE_CPU = PREP_BIT(6),
+ PORT080C_L2 = PREP_BIT(7),
+};
+
+static uint32_t prep_port080c_read(void *opaque, uint32_t addr)
+{
+ PrepSystemIoState *s = opaque;
+ trace_prep_systemio_read(addr, s->equipment);
+ return s->equipment;
+}
+
+/* PORT 081C -- System Control Register (Read/Write) */
+
+enum {
+ PORT081C_FLOPPY_MOTOR_INHIBIT = PREP_BIT(3),
+ PORT081C_MASK_TEA = PREP_BIT(2),
+ PORT081C_L2_UPDATE_INHIBIT = PREP_BIT(1),
+ PORT081C_L2_CACHEMISS_INHIBIT = PREP_BIT(0),
+};
+
+static void prep_port081c_write(void *opaque, uint32_t addr, uint32_t val)
+{
+ static const uint8_t mask = PORT081C_FLOPPY_MOTOR_INHIBIT |
+ PORT081C_MASK_TEA |
+ PORT081C_L2_UPDATE_INHIBIT |
+ PORT081C_L2_CACHEMISS_INHIBIT;
+ PrepSystemIoState *s = opaque;
+ trace_prep_systemio_write(addr, val);
+ s->system_control = val & mask;
+}
+
+static uint32_t prep_port081c_read(void *opaque, uint32_t addr)
+{
+ PrepSystemIoState *s = opaque;
+ trace_prep_systemio_read(addr, s->system_control);
+ return s->system_control;
+}
+
+/* System Board Identification */
+
+static uint32_t prep_port0852_read(void *opaque, uint32_t addr)
+{
+ PrepSystemIoState *s = opaque;
+ trace_prep_systemio_read(addr, s->ibm_planar_id);
+ return s->ibm_planar_id;
+}
+
+/* PORT 0850 -- I/O Map Type Register (Read/Write) */
+
+enum {
+ PORT0850_IOMAP_NONCONTIGUOUS = PREP_BIT(7),
+};
+
+static uint32_t prep_port0850_read(void *opaque, uint32_t addr)
+{
+ PrepSystemIoState *s = opaque;
+ trace_prep_systemio_read(addr, s->iomap_type);
+ return s->iomap_type;
+}
+
+static void prep_port0850_write(void *opaque, uint32_t addr, uint32_t val)
+{
+ PrepSystemIoState *s = opaque;
+
+ trace_prep_systemio_write(addr, val);
+ qemu_set_irq(s->non_contiguous_io_map_irq,
+ val & PORT0850_IOMAP_NONCONTIGUOUS);
+ s->iomap_type = val & PORT0850_IOMAP_NONCONTIGUOUS;
+}
+
+static const MemoryRegionPortio ppc_io800_port_list[] = {
+ { 0x092, 1, 1, .read = prep_port0092_read,
+ .write = prep_port0092_write, },
+ { 0x808, 1, 1, .write = prep_port0808_write, },
+ { 0x80c, 1, 1, .read = prep_port080c_read, },
+ { 0x810, 1, 1, .write = prep_port0810_write, },
+ { 0x812, 1, 1, .write = prep_port0812_write, },
+ { 0x814, 1, 1, .write = prep_port0814_write, },
+ { 0x818, 1, 1, .read = prep_port0818_read },
+ { 0x81c, 1, 1, .read = prep_port081c_read,
+ .write = prep_port081c_write, },
+ { 0x850, 1, 1, .read = prep_port0850_read,
+ .write = prep_port0850_write, },
+ { 0x852, 1, 1, .read = prep_port0852_read, },
+ PORTIO_END_OF_LIST()
+};
+
+static uint64_t ppc_parity_error_readl(void *opaque, hwaddr addr,
+ unsigned int size)
+{
+ uint32_t val = 0;
+ trace_prep_systemio_read((unsigned int)addr, val);
+ return val;
+}
+
+static void ppc_parity_error_writel(void *opaque, hwaddr addr,
+ uint64_t data, unsigned size)
+{
+ qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid access\n", __func__);
+}
+
+static const MemoryRegionOps ppc_parity_error_ops = {
+ .read = ppc_parity_error_readl,
+ .write = ppc_parity_error_writel,
+ .valid = {
+ .min_access_size = 4,
+ .max_access_size = 4,
+ },
+};
+
+static void prep_systemio_realize(DeviceState *dev, Error **errp)
+{
+ ISADevice *isa = ISA_DEVICE(dev);
+ PrepSystemIoState *s = PREP_SYSTEMIO(dev);
+ PowerPCCPU *cpu;
+
+ qdev_init_gpio_out(dev, &s->non_contiguous_io_map_irq, 1);
+ s->iomap_type = PORT0850_IOMAP_NONCONTIGUOUS;
+ qemu_set_irq(s->non_contiguous_io_map_irq,
+ s->iomap_type & PORT0850_IOMAP_NONCONTIGUOUS);
+ cpu = POWERPC_CPU(first_cpu);
+ s->softreset_irq = cpu->env.irq_inputs[PPC6xx_INPUT_HRESET];
+
+ isa_register_portio_list(isa, &s->portio, 0x0, ppc_io800_port_list, s,
+ "systemio800");
+
+ memory_region_init_io(&s->ppc_parity_mem, OBJECT(dev),
+ &ppc_parity_error_ops, s, "ppc-parity", 0x4);
+ memory_region_add_subregion(get_system_memory(), 0xbfffeff0,
+ &s->ppc_parity_mem);
+}
+
+static const VMStateDescription vmstate_prep_systemio = {
+ .name = "prep_systemio",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT8(sreset, PrepSystemIoState),
+ VMSTATE_UINT8(system_control, PrepSystemIoState),
+ VMSTATE_UINT8(iomap_type, PrepSystemIoState),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+static Property prep_systemio_properties[] = {
+ DEFINE_PROP_UINT8("ibm-planar-id", PrepSystemIoState, ibm_planar_id, 0),
+ DEFINE_PROP_UINT8("equipment", PrepSystemIoState, equipment, 0),
+ DEFINE_PROP_END_OF_LIST()
+};
+
+static void prep_systemio_class_initfn(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ dc->realize = prep_systemio_realize;
+ dc->vmsd = &vmstate_prep_systemio;
+ device_class_set_props(dc, prep_systemio_properties);
+}
+
+static TypeInfo prep_systemio800_info = {
+ .name = TYPE_PREP_SYSTEMIO,
+ .parent = TYPE_ISA_DEVICE,
+ .instance_size = sizeof(PrepSystemIoState),
+ .class_init = prep_systemio_class_initfn,
+};
+
+static void prep_systemio_register_types(void)
+{
+ type_register_static(&prep_systemio800_info);
+}
+
+type_init(prep_systemio_register_types)
diff --git a/hw/ppc/rs6000_mc.c b/hw/ppc/rs6000_mc.c
new file mode 100644
index 000000000..c0bc212e9
--- /dev/null
+++ b/hw/ppc/rs6000_mc.c
@@ -0,0 +1,238 @@
+/*
+ * QEMU RS/6000 memory controller
+ *
+ * Copyright (c) 2017 Hervé Poussineau
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) version 3 or any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include "hw/isa/isa.h"
+#include "hw/qdev-properties.h"
+#include "migration/vmstate.h"
+#include "exec/address-spaces.h"
+#include "qapi/error.h"
+#include "trace.h"
+#include "qom/object.h"
+
+#define TYPE_RS6000MC "rs6000-mc"
+OBJECT_DECLARE_SIMPLE_TYPE(RS6000MCState, RS6000MC)
+
+struct RS6000MCState {
+ ISADevice parent_obj;
+ /* see US patent 5,684,979 for details (expired 2001-11-04) */
+ uint32_t ram_size;
+ bool autoconfigure;
+ MemoryRegion simm[6];
+ unsigned int simm_size[6];
+ uint32_t end_address[8];
+ uint8_t port0820_index;
+ PortioList portio;
+};
+
+/* P0RT 0803 -- SIMM ID Register (32/8 MB) (Read Only) */
+
+static uint32_t rs6000mc_port0803_read(void *opaque, uint32_t addr)
+{
+ RS6000MCState *s = opaque;
+ uint32_t val = 0;
+ int socket;
+
+ /* (1 << socket) indicates 32 MB SIMM at given socket */
+ for (socket = 0; socket < 6; socket++) {
+ if (s->simm_size[socket] == 32) {
+ val |= (1 << socket);
+ }
+ }
+
+ trace_rs6000mc_id_read(addr, val);
+ return val;
+}
+
+/* PORT 0804 -- SIMM Presence Register (Read Only) */
+
+static uint32_t rs6000mc_port0804_read(void *opaque, uint32_t addr)
+{
+ RS6000MCState *s = opaque;
+ uint32_t val = 0xff;
+ int socket;
+
+ /* (1 << socket) indicates SIMM absence at given socket */
+ for (socket = 0; socket < 6; socket++) {
+ if (s->simm_size[socket]) {
+ val &= ~(1 << socket);
+ }
+ }
+ s->port0820_index = 0;
+
+ trace_rs6000mc_presence_read(addr, val);
+ return val;
+}
+
+/* Memory Controller Size Programming Register */
+
+static uint32_t rs6000mc_port0820_read(void *opaque, uint32_t addr)
+{
+ RS6000MCState *s = opaque;
+ uint32_t val = s->end_address[s->port0820_index] & 0x1f;
+ s->port0820_index = (s->port0820_index + 1) & 7;
+ trace_rs6000mc_size_read(addr, val);
+ return val;
+}
+
+static void rs6000mc_port0820_write(void *opaque, uint32_t addr, uint32_t val)
+{
+ RS6000MCState *s = opaque;
+ uint8_t socket = val >> 5;
+ uint32_t end_address = val & 0x1f;
+
+ trace_rs6000mc_size_write(addr, val);
+ s->end_address[socket] = end_address;
+ if (socket > 0 && socket < 7) {
+ if (s->simm_size[socket - 1]) {
+ uint32_t size;
+ uint32_t start_address = 0;
+ if (socket > 1) {
+ start_address = s->end_address[socket - 1];
+ }
+
+ size = end_address - start_address;
+ memory_region_set_enabled(&s->simm[socket - 1], size != 0);
+ memory_region_set_address(&s->simm[socket - 1],
+ start_address * 8 * MiB);
+ }
+ }
+}
+
+/* Read Memory Parity Error */
+
+enum {
+ PORT0841_NO_ERROR_DETECTED = 0x01,
+};
+
+static uint32_t rs6000mc_port0841_read(void *opaque, uint32_t addr)
+{
+ uint32_t val = PORT0841_NO_ERROR_DETECTED;
+ trace_rs6000mc_parity_read(addr, val);
+ return val;
+}
+
+static const MemoryRegionPortio rs6000mc_port_list[] = {
+ { 0x803, 1, 1, .read = rs6000mc_port0803_read },
+ { 0x804, 1, 1, .read = rs6000mc_port0804_read },
+ { 0x820, 1, 1, .read = rs6000mc_port0820_read,
+ .write = rs6000mc_port0820_write, },
+ { 0x841, 1, 1, .read = rs6000mc_port0841_read },
+ PORTIO_END_OF_LIST()
+};
+
+static void rs6000mc_realize(DeviceState *dev, Error **errp)
+{
+ RS6000MCState *s = RS6000MC(dev);
+ int socket = 0;
+ unsigned int ram_size = s->ram_size / MiB;
+ Error *local_err = NULL;
+
+ while (socket < 6) {
+ if (ram_size >= 64) {
+ s->simm_size[socket] = 32;
+ s->simm_size[socket + 1] = 32;
+ ram_size -= 64;
+ } else if (ram_size >= 16) {
+ s->simm_size[socket] = 8;
+ s->simm_size[socket + 1] = 8;
+ ram_size -= 16;
+ } else {
+ /* Not enough memory */
+ break;
+ }
+ socket += 2;
+ }
+
+ for (socket = 0; socket < 6; socket++) {
+ if (s->simm_size[socket]) {
+ char name[] = "simm.?";
+ name[5] = socket + '0';
+ memory_region_init_ram(&s->simm[socket], OBJECT(dev), name,
+ s->simm_size[socket] * MiB, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+ memory_region_add_subregion_overlap(get_system_memory(), 0,
+ &s->simm[socket], socket);
+ }
+ }
+ if (ram_size) {
+ /* unable to push all requested RAM in SIMMs */
+ error_setg(errp, "RAM size incompatible with this board. "
+ "Try again with something else, like %" PRId64 " MB",
+ s->ram_size / MiB - ram_size);
+ return;
+ }
+
+ if (s->autoconfigure) {
+ uint32_t start_address = 0;
+ for (socket = 0; socket < 6; socket++) {
+ if (s->simm_size[socket]) {
+ memory_region_set_enabled(&s->simm[socket], true);
+ memory_region_set_address(&s->simm[socket], start_address);
+ start_address += memory_region_size(&s->simm[socket]);
+ }
+ }
+ }
+
+ isa_register_portio_list(ISA_DEVICE(dev), &s->portio, 0x0,
+ rs6000mc_port_list, s, "rs6000mc");
+}
+
+static const VMStateDescription vmstate_rs6000mc = {
+ .name = "rs6000-mc",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT8(port0820_index, RS6000MCState),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+static Property rs6000mc_properties[] = {
+ DEFINE_PROP_UINT32("ram-size", RS6000MCState, ram_size, 0),
+ DEFINE_PROP_BOOL("auto-configure", RS6000MCState, autoconfigure, true),
+ DEFINE_PROP_END_OF_LIST()
+};
+
+static void rs6000mc_class_initfn(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ dc->realize = rs6000mc_realize;
+ dc->vmsd = &vmstate_rs6000mc;
+ device_class_set_props(dc, rs6000mc_properties);
+}
+
+static const TypeInfo rs6000mc_info = {
+ .name = TYPE_RS6000MC,
+ .parent = TYPE_ISA_DEVICE,
+ .instance_size = sizeof(RS6000MCState),
+ .class_init = rs6000mc_class_initfn,
+};
+
+static void rs6000mc_types(void)
+{
+ type_register_static(&rs6000mc_info);
+}
+
+type_init(rs6000mc_types)
diff --git a/hw/ppc/sam460ex.c b/hw/ppc/sam460ex.c
new file mode 100644
index 000000000..0737234d6
--- /dev/null
+++ b/hw/ppc/sam460ex.c
@@ -0,0 +1,516 @@
+/*
+ * QEMU aCube Sam460ex board emulation
+ *
+ * Copyright (c) 2012 François Revol
+ * Copyright (c) 2016-2019 BALATON Zoltan
+ *
+ * This file is derived from hw/ppc440_bamboo.c,
+ * the copyright for that material belongs to the original owners.
+ *
+ * This work is licensed under the GNU GPL license version 2 or later.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include "qemu-common.h"
+#include "qemu/datadir.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "hw/boards.h"
+#include "sysemu/kvm.h"
+#include "kvm_ppc.h"
+#include "sysemu/device_tree.h"
+#include "sysemu/block-backend.h"
+#include "hw/loader.h"
+#include "elf.h"
+#include "exec/memory.h"
+#include "ppc440.h"
+#include "ppc405.h"
+#include "hw/block/flash.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/reset.h"
+#include "hw/sysbus.h"
+#include "hw/char/serial.h"
+#include "hw/i2c/ppc4xx_i2c.h"
+#include "hw/i2c/smbus_eeprom.h"
+#include "hw/usb/hcd-ehci.h"
+#include "hw/ppc/fdt.h"
+#include "hw/qdev-properties.h"
+#include "hw/intc/ppc-uic.h"
+
+#include <libfdt.h>
+
+#define BINARY_DEVICE_TREE_FILE "canyonlands.dtb"
+#define UBOOT_FILENAME "u-boot-sam460-20100605.bin"
+/* to extract the official U-Boot bin from the updater: */
+/* dd bs=1 skip=$(($(stat -c '%s' updater/updater-460) - 0x80000)) \
+ if=updater/updater-460 of=u-boot-sam460-20100605.bin */
+
+/* from Sam460 U-Boot include/configs/Sam460ex.h */
+#define FLASH_BASE 0xfff00000
+#define FLASH_BASE_H 0x4
+#define FLASH_SIZE (1 * MiB)
+#define UBOOT_LOAD_BASE 0xfff80000
+#define UBOOT_SIZE 0x00080000
+#define UBOOT_ENTRY 0xfffffffc
+
+/* from U-Boot */
+#define EPAPR_MAGIC (0x45504150)
+#define KERNEL_ADDR 0x1000000
+#define FDT_ADDR 0x1800000
+#define RAMDISK_ADDR 0x1900000
+
+/* Sam460ex IRQ MAP:
+ IRQ0 = ETH_INT
+ IRQ1 = FPGA_INT
+ IRQ2 = PCI_INT (PCIA, PCIB, PCIC, PCIB)
+ IRQ3 = FPGA_INT2
+ IRQ11 = RTC_INT
+ IRQ12 = SM502_INT
+*/
+
+#define CPU_FREQ 1150000000
+#define PLB_FREQ 230000000
+#define OPB_FREQ 115000000
+#define EBC_FREQ 115000000
+#define UART_FREQ 11059200
+#define SDRAM_NR_BANKS 4
+
+/* The SoC could also handle 4 GiB but firmware does not work with that. */
+/* Maybe it overflows a signed 32 bit number somewhere? */
+static const ram_addr_t ppc460ex_sdram_bank_sizes[] = {
+ 2 * GiB, 1 * GiB, 512 * MiB, 256 * MiB, 128 * MiB, 64 * MiB,
+ 32 * MiB, 0
+};
+
+struct boot_info {
+ uint32_t dt_base;
+ uint32_t dt_size;
+ uint32_t entry;
+};
+
+static int sam460ex_load_uboot(void)
+{
+ /*
+ * This first creates 1MiB of flash memory mapped at the end of
+ * the 32-bit address space (0xFFF00000..0xFFFFFFFF).
+ *
+ * If_PFLASH unit 0 is defined, the flash memory is initialized
+ * from that block backend.
+ *
+ * Else, it's initialized to zero. And then 512KiB of ROM get
+ * mapped on top of its second half (0xFFF80000..0xFFFFFFFF),
+ * initialized from u-boot-sam460-20100605.bin.
+ *
+ * This doesn't smell right.
+ *
+ * The physical hardware appears to have 512KiB flash memory.
+ *
+ * TODO Figure out what we really need here, and clean this up.
+ */
+
+ DriveInfo *dinfo;
+
+ dinfo = drive_get(IF_PFLASH, 0, 0);
+ if (!pflash_cfi01_register(FLASH_BASE | ((hwaddr)FLASH_BASE_H << 32),
+ "sam460ex.flash", FLASH_SIZE,
+ dinfo ? blk_by_legacy_dinfo(dinfo) : NULL,
+ 64 * KiB, 1, 0x89, 0x18, 0x0000, 0x0, 1)) {
+ error_report("Error registering flash memory");
+ /* XXX: return an error instead? */
+ exit(1);
+ }
+
+ if (!dinfo) {
+ /*error_report("No flash image given with the 'pflash' parameter,"
+ " using default u-boot image");*/
+ rom_add_file_fixed(UBOOT_FILENAME,
+ UBOOT_LOAD_BASE | ((hwaddr)FLASH_BASE_H << 32),
+ -1);
+ }
+
+ return 0;
+}
+
+static int sam460ex_load_device_tree(hwaddr addr,
+ uint32_t ramsize,
+ hwaddr initrd_base,
+ hwaddr initrd_size,
+ const char *kernel_cmdline)
+{
+ uint32_t mem_reg_property[] = { 0, 0, cpu_to_be32(ramsize) };
+ char *filename;
+ int fdt_size;
+ void *fdt;
+ uint32_t tb_freq = CPU_FREQ;
+ uint32_t clock_freq = CPU_FREQ;
+ int offset;
+
+ filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, BINARY_DEVICE_TREE_FILE);
+ if (!filename) {
+ error_report("Couldn't find dtb file `%s'", BINARY_DEVICE_TREE_FILE);
+ exit(1);
+ }
+ fdt = load_device_tree(filename, &fdt_size);
+ if (!fdt) {
+ error_report("Couldn't load dtb file `%s'", filename);
+ g_free(filename);
+ exit(1);
+ }
+ g_free(filename);
+
+ /* Manipulate device tree in memory. */
+
+ qemu_fdt_setprop(fdt, "/memory", "reg", mem_reg_property,
+ sizeof(mem_reg_property));
+
+ /* default FDT doesn't have a /chosen node... */
+ qemu_fdt_add_subnode(fdt, "/chosen");
+
+ qemu_fdt_setprop_cell(fdt, "/chosen", "linux,initrd-start", initrd_base);
+
+ qemu_fdt_setprop_cell(fdt, "/chosen", "linux,initrd-end",
+ (initrd_base + initrd_size));
+
+ qemu_fdt_setprop_string(fdt, "/chosen", "bootargs", kernel_cmdline);
+
+ /* Copy data from the host device tree into the guest. Since the guest can
+ * directly access the timebase without host involvement, we must expose
+ * the correct frequencies. */
+ if (kvm_enabled()) {
+ tb_freq = kvmppc_get_tbfreq();
+ clock_freq = kvmppc_get_clockfreq();
+ }
+
+ qemu_fdt_setprop_cell(fdt, "/cpus/cpu@0", "clock-frequency",
+ clock_freq);
+ qemu_fdt_setprop_cell(fdt, "/cpus/cpu@0", "timebase-frequency",
+ tb_freq);
+
+ /* Remove cpm node if it exists (it is not emulated) */
+ offset = fdt_path_offset(fdt, "/cpm");
+ if (offset >= 0) {
+ _FDT(fdt_nop_node(fdt, offset));
+ }
+
+ /* set serial port clocks */
+ offset = fdt_node_offset_by_compatible(fdt, -1, "ns16550");
+ while (offset >= 0) {
+ _FDT(fdt_setprop_cell(fdt, offset, "clock-frequency", UART_FREQ));
+ offset = fdt_node_offset_by_compatible(fdt, offset, "ns16550");
+ }
+
+ /* some more clocks */
+ qemu_fdt_setprop_cell(fdt, "/plb", "clock-frequency",
+ PLB_FREQ);
+ qemu_fdt_setprop_cell(fdt, "/plb/opb", "clock-frequency",
+ OPB_FREQ);
+ qemu_fdt_setprop_cell(fdt, "/plb/opb/ebc", "clock-frequency",
+ EBC_FREQ);
+
+ rom_add_blob_fixed(BINARY_DEVICE_TREE_FILE, fdt, fdt_size, addr);
+ g_free(fdt);
+
+ return fdt_size;
+}
+
+/* Create reset TLB entries for BookE, mapping only the flash memory. */
+static void mmubooke_create_initial_mapping_uboot(CPUPPCState *env)
+{
+ ppcemb_tlb_t *tlb = &env->tlb.tlbe[0];
+
+ /* on reset the flash is mapped by a shadow TLB,
+ * but since we don't implement them we need to use
+ * the same values U-Boot will use to avoid a fault.
+ */
+ tlb->attr = 0;
+ tlb->prot = PAGE_VALID | ((PAGE_READ | PAGE_WRITE | PAGE_EXEC) << 4);
+ tlb->size = 0x10000000; /* up to 0xffffffff */
+ tlb->EPN = 0xf0000000 & TARGET_PAGE_MASK;
+ tlb->RPN = (0xf0000000 & TARGET_PAGE_MASK) | 0x4;
+ tlb->PID = 0;
+}
+
+/* Create reset TLB entries for BookE, spanning the 32bit addr space. */
+static void mmubooke_create_initial_mapping(CPUPPCState *env,
+ target_ulong va,
+ hwaddr pa)
+{
+ ppcemb_tlb_t *tlb = &env->tlb.tlbe[0];
+
+ tlb->attr = 0;
+ tlb->prot = PAGE_VALID | ((PAGE_READ | PAGE_WRITE | PAGE_EXEC) << 4);
+ tlb->size = 1 << 31; /* up to 0x80000000 */
+ tlb->EPN = va & TARGET_PAGE_MASK;
+ tlb->RPN = pa & TARGET_PAGE_MASK;
+ tlb->PID = 0;
+}
+
+static void main_cpu_reset(void *opaque)
+{
+ PowerPCCPU *cpu = opaque;
+ CPUPPCState *env = &cpu->env;
+ struct boot_info *bi = env->load_info;
+
+ cpu_reset(CPU(cpu));
+
+ /* either we have a kernel to boot or we jump to U-Boot */
+ if (bi->entry != UBOOT_ENTRY) {
+ env->gpr[1] = (16 * MiB) - 8;
+ env->gpr[3] = FDT_ADDR;
+ env->nip = bi->entry;
+
+ /* Create a mapping for the kernel. */
+ mmubooke_create_initial_mapping(env, 0, 0);
+ env->gpr[6] = tswap32(EPAPR_MAGIC);
+ env->gpr[7] = (16 * MiB) - 8; /* bi->ima_size; */
+
+ } else {
+ env->nip = UBOOT_ENTRY;
+ mmubooke_create_initial_mapping_uboot(env);
+ }
+}
+
+static void sam460ex_init(MachineState *machine)
+{
+ MemoryRegion *address_space_mem = get_system_memory();
+ MemoryRegion *isa = g_new(MemoryRegion, 1);
+ MemoryRegion *ram_memories = g_new(MemoryRegion, SDRAM_NR_BANKS);
+ hwaddr ram_bases[SDRAM_NR_BANKS] = {0};
+ hwaddr ram_sizes[SDRAM_NR_BANKS] = {0};
+ MemoryRegion *l2cache_ram = g_new(MemoryRegion, 1);
+ DeviceState *uic[4];
+ qemu_irq mal_irqs[4];
+ int i;
+ PCIBus *pci_bus;
+ PowerPCCPU *cpu;
+ CPUPPCState *env;
+ I2CBus *i2c;
+ hwaddr entry = UBOOT_ENTRY;
+ target_long initrd_size = 0;
+ DeviceState *dev;
+ SysBusDevice *sbdev;
+ struct boot_info *boot_info;
+ uint8_t *spd_data;
+ int success;
+
+ cpu = POWERPC_CPU(cpu_create(machine->cpu_type));
+ env = &cpu->env;
+ if (env->mmu_model != POWERPC_MMU_BOOKE) {
+ error_report("Only MMU model BookE is supported by this machine.");
+ exit(1);
+ }
+
+ qemu_register_reset(main_cpu_reset, cpu);
+ boot_info = g_malloc0(sizeof(*boot_info));
+ env->load_info = boot_info;
+
+ ppc_booke_timers_init(cpu, CPU_FREQ, 0);
+ ppc_dcr_init(env, NULL, NULL);
+
+ /* PLB arbitrer */
+ ppc4xx_plb_init(env);
+
+ /* interrupt controllers */
+ for (i = 0; i < ARRAY_SIZE(uic); i++) {
+ SysBusDevice *sbd;
+ /*
+ * UICs 1, 2 and 3 are cascaded through UIC 0.
+ * input_ints[n] is the interrupt number on UIC 0 which
+ * the INT output of UIC n is connected to. The CINT output
+ * of UIC n connects to input_ints[n] + 1.
+ * The entry in input_ints[] for UIC 0 is ignored, because UIC 0's
+ * INT and CINT outputs are connected to the CPU.
+ */
+ const int input_ints[] = { -1, 30, 10, 16 };
+
+ uic[i] = qdev_new(TYPE_PPC_UIC);
+ sbd = SYS_BUS_DEVICE(uic[i]);
+
+ qdev_prop_set_uint32(uic[i], "dcr-base", 0xc0 + i * 0x10);
+ object_property_set_link(OBJECT(uic[i]), "cpu", OBJECT(cpu),
+ &error_fatal);
+ sysbus_realize_and_unref(sbd, &error_fatal);
+
+ if (i == 0) {
+ sysbus_connect_irq(sbd, PPCUIC_OUTPUT_INT,
+ ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_INT]);
+ sysbus_connect_irq(sbd, PPCUIC_OUTPUT_CINT,
+ ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_CINT]);
+ } else {
+ sysbus_connect_irq(sbd, PPCUIC_OUTPUT_INT,
+ qdev_get_gpio_in(uic[0], input_ints[i]));
+ sysbus_connect_irq(sbd, PPCUIC_OUTPUT_CINT,
+ qdev_get_gpio_in(uic[0], input_ints[i] + 1));
+ }
+ }
+
+ /* SDRAM controller */
+ /* put all RAM on first bank because board has one slot
+ * and firmware only checks that */
+ ppc4xx_sdram_banks(machine->ram, 1, ram_memories, ram_bases, ram_sizes,
+ ppc460ex_sdram_bank_sizes);
+
+ /* FIXME: does 460EX have ECC interrupts? */
+ ppc440_sdram_init(env, SDRAM_NR_BANKS, ram_memories,
+ ram_bases, ram_sizes, 1);
+
+ /* IIC controllers and devices */
+ dev = sysbus_create_simple(TYPE_PPC4xx_I2C, 0x4ef600700,
+ qdev_get_gpio_in(uic[0], 2));
+ i2c = PPC4xx_I2C(dev)->bus;
+ /* SPD EEPROM on RAM module */
+ spd_data = spd_data_generate(ram_sizes[0] < 128 * MiB ? DDR : DDR2,
+ ram_sizes[0]);
+ spd_data[20] = 4; /* SO-DIMM module */
+ smbus_eeprom_init_one(i2c, 0x50, spd_data);
+ /* RTC */
+ i2c_slave_create_simple(i2c, "m41t80", 0x68);
+
+ dev = sysbus_create_simple(TYPE_PPC4xx_I2C, 0x4ef600800,
+ qdev_get_gpio_in(uic[0], 3));
+
+ /* External bus controller */
+ ppc405_ebc_init(env);
+
+ /* CPR */
+ ppc4xx_cpr_init(env);
+
+ /* PLB to AHB bridge */
+ ppc4xx_ahb_init(env);
+
+ /* System DCRs */
+ ppc4xx_sdr_init(env);
+
+ /* MAL */
+ for (i = 0; i < ARRAY_SIZE(mal_irqs); i++) {
+ mal_irqs[0] = qdev_get_gpio_in(uic[2], 3 + i);
+ }
+ ppc4xx_mal_init(env, 4, 16, mal_irqs);
+
+ /* DMA */
+ ppc4xx_dma_init(env, 0x200);
+
+ /* 256K of L2 cache as memory */
+ ppc4xx_l2sram_init(env);
+ /* FIXME: remove this after fixing l2sram mapping in ppc440_uc.c? */
+ memory_region_init_ram(l2cache_ram, NULL, "ppc440.l2cache_ram", 256 * KiB,
+ &error_abort);
+ memory_region_add_subregion(address_space_mem, 0x400000000LL, l2cache_ram);
+
+ /* USB */
+ sysbus_create_simple(TYPE_PPC4xx_EHCI, 0x4bffd0400,
+ qdev_get_gpio_in(uic[2], 29));
+ dev = qdev_new("sysbus-ohci");
+ qdev_prop_set_string(dev, "masterbus", "usb-bus.0");
+ qdev_prop_set_uint32(dev, "num-ports", 6);
+ sbdev = SYS_BUS_DEVICE(dev);
+ sysbus_realize_and_unref(sbdev, &error_fatal);
+ sysbus_mmio_map(sbdev, 0, 0x4bffd0000);
+ sysbus_connect_irq(sbdev, 0, qdev_get_gpio_in(uic[2], 30));
+ usb_create_simple(usb_bus_find(-1), "usb-kbd");
+ usb_create_simple(usb_bus_find(-1), "usb-mouse");
+
+ /* PCI bus */
+ ppc460ex_pcie_init(env);
+ /* All PCI irqs are connected to the same UIC pin (cf. UBoot source) */
+ dev = sysbus_create_simple("ppc440-pcix-host", 0xc0ec00000,
+ qdev_get_gpio_in(uic[1], 0));
+ pci_bus = PCI_BUS(qdev_get_child_bus(dev, "pci.0"));
+
+ memory_region_init_alias(isa, NULL, "isa_mmio", get_system_io(),
+ 0, 0x10000);
+ memory_region_add_subregion(get_system_memory(), 0xc08000000, isa);
+
+ /* PCI devices */
+ pci_create_simple(pci_bus, PCI_DEVFN(6, 0), "sm501");
+ /* SoC has a single SATA port but we don't emulate that yet
+ * However, firmware and usual clients have driver for SiI311x
+ * so add one for convenience by default */
+ if (defaults_enabled()) {
+ pci_create_simple(pci_bus, -1, "sii3112");
+ }
+
+ /* SoC has 4 UARTs
+ * but board has only one wired and two are present in fdt */
+ if (serial_hd(0) != NULL) {
+ serial_mm_init(address_space_mem, 0x4ef600300, 0,
+ qdev_get_gpio_in(uic[1], 1),
+ PPC_SERIAL_MM_BAUDBASE, serial_hd(0),
+ DEVICE_BIG_ENDIAN);
+ }
+ if (serial_hd(1) != NULL) {
+ serial_mm_init(address_space_mem, 0x4ef600400, 0,
+ qdev_get_gpio_in(uic[0], 1),
+ PPC_SERIAL_MM_BAUDBASE, serial_hd(1),
+ DEVICE_BIG_ENDIAN);
+ }
+
+ /* Load U-Boot image. */
+ if (!machine->kernel_filename) {
+ success = sam460ex_load_uboot();
+ if (success < 0) {
+ error_report("could not load firmware");
+ exit(1);
+ }
+ }
+
+ /* Load kernel. */
+ if (machine->kernel_filename) {
+ hwaddr loadaddr = LOAD_UIMAGE_LOADADDR_INVALID;
+ success = load_uimage(machine->kernel_filename, &entry, &loadaddr,
+ NULL, NULL, NULL);
+ if (success < 0) {
+ uint64_t elf_entry;
+
+ success = load_elf(machine->kernel_filename, NULL, NULL, NULL,
+ &elf_entry, NULL, NULL, NULL,
+ 1, PPC_ELF_MACHINE, 0, 0);
+ entry = elf_entry;
+ }
+ /* XXX try again as binary */
+ if (success < 0) {
+ error_report("could not load kernel '%s'",
+ machine->kernel_filename);
+ exit(1);
+ }
+ }
+
+ /* Load initrd. */
+ if (machine->initrd_filename) {
+ initrd_size = load_image_targphys(machine->initrd_filename,
+ RAMDISK_ADDR,
+ machine->ram_size - RAMDISK_ADDR);
+ if (initrd_size < 0) {
+ error_report("could not load ram disk '%s' at %x",
+ machine->initrd_filename, RAMDISK_ADDR);
+ exit(1);
+ }
+ }
+
+ /* If we're loading a kernel directly, we must load the device tree too. */
+ if (machine->kernel_filename) {
+ int dt_size;
+
+ dt_size = sam460ex_load_device_tree(FDT_ADDR, machine->ram_size,
+ RAMDISK_ADDR, initrd_size,
+ machine->kernel_cmdline);
+
+ boot_info->dt_base = FDT_ADDR;
+ boot_info->dt_size = dt_size;
+ }
+
+ boot_info->entry = entry;
+}
+
+static void sam460ex_machine_init(MachineClass *mc)
+{
+ mc->desc = "aCube Sam460ex";
+ mc->init = sam460ex_init;
+ mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("460exb");
+ mc->default_ram_size = 512 * MiB;
+ mc->default_ram_id = "ppc4xx.sdram";
+}
+
+DEFINE_MACHINE("sam460ex", sam460ex_machine_init)
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
new file mode 100644
index 000000000..3b5fd749b
--- /dev/null
+++ b/hw/ppc/spapr.c
@@ -0,0 +1,5136 @@
+/*
+ * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator
+ *
+ * Copyright (c) 2004-2007 Fabrice Bellard
+ * Copyright (c) 2007 Jocelyn Mayer
+ * Copyright (c) 2010 David Gibson, IBM Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/datadir.h"
+#include "qapi/error.h"
+#include "qapi/qapi-events-machine.h"
+#include "qapi/qapi-events-qdev.h"
+#include "qapi/visitor.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/hostmem.h"
+#include "sysemu/numa.h"
+#include "sysemu/qtest.h"
+#include "sysemu/reset.h"
+#include "sysemu/runstate.h"
+#include "qemu/log.h"
+#include "hw/fw-path-provider.h"
+#include "elf.h"
+#include "net/net.h"
+#include "sysemu/device_tree.h"
+#include "sysemu/cpus.h"
+#include "sysemu/hw_accel.h"
+#include "kvm_ppc.h"
+#include "migration/misc.h"
+#include "migration/qemu-file-types.h"
+#include "migration/global_state.h"
+#include "migration/register.h"
+#include "migration/blocker.h"
+#include "mmu-hash64.h"
+#include "mmu-book3s-v3.h"
+#include "cpu-models.h"
+#include "hw/core/cpu.h"
+
+#include "hw/ppc/ppc.h"
+#include "hw/loader.h"
+
+#include "hw/ppc/fdt.h"
+#include "hw/ppc/spapr.h"
+#include "hw/ppc/spapr_vio.h"
+#include "hw/qdev-properties.h"
+#include "hw/pci-host/spapr.h"
+#include "hw/pci/msi.h"
+
+#include "hw/pci/pci.h"
+#include "hw/scsi/scsi.h"
+#include "hw/virtio/virtio-scsi.h"
+#include "hw/virtio/vhost-scsi-common.h"
+
+#include "exec/ram_addr.h"
+#include "hw/usb.h"
+#include "qemu/config-file.h"
+#include "qemu/error-report.h"
+#include "trace.h"
+#include "hw/nmi.h"
+#include "hw/intc/intc.h"
+
+#include "hw/ppc/spapr_cpu_core.h"
+#include "hw/mem/memory-device.h"
+#include "hw/ppc/spapr_tpm_proxy.h"
+#include "hw/ppc/spapr_nvdimm.h"
+#include "hw/ppc/spapr_numa.h"
+#include "hw/ppc/pef.h"
+
+#include "monitor/monitor.h"
+
+#include <libfdt.h>
+
+/* SLOF memory layout:
+ *
+ * SLOF raw image loaded at 0, copies its romfs right below the flat
+ * device-tree, then position SLOF itself 31M below that
+ *
+ * So we set FW_OVERHEAD to 40MB which should account for all of that
+ * and more
+ *
+ * We load our kernel at 4M, leaving space for SLOF initial image
+ */
+#define FDT_MAX_ADDR 0x80000000 /* FDT must stay below that */
+#define FW_MAX_SIZE 0x400000
+#define FW_FILE_NAME "slof.bin"
+#define FW_FILE_NAME_VOF "vof.bin"
+#define FW_OVERHEAD 0x2800000
+#define KERNEL_LOAD_ADDR FW_MAX_SIZE
+
+#define MIN_RMA_SLOF (128 * MiB)
+
+#define PHANDLE_INTC 0x00001111
+
+/* These two functions implement the VCPU id numbering: one to compute them
+ * all and one to identify thread 0 of a VCORE. Any change to the first one
+ * is likely to have an impact on the second one, so let's keep them close.
+ */
+static int spapr_vcpu_id(SpaprMachineState *spapr, int cpu_index)
+{
+ MachineState *ms = MACHINE(spapr);
+ unsigned int smp_threads = ms->smp.threads;
+
+ assert(spapr->vsmt);
+ return
+ (cpu_index / smp_threads) * spapr->vsmt + cpu_index % smp_threads;
+}
+static bool spapr_is_thread0_in_vcore(SpaprMachineState *spapr,
+ PowerPCCPU *cpu)
+{
+ assert(spapr->vsmt);
+ return spapr_get_vcpu_id(cpu) % spapr->vsmt == 0;
+}
+
+static bool pre_2_10_vmstate_dummy_icp_needed(void *opaque)
+{
+ /* Dummy entries correspond to unused ICPState objects in older QEMUs,
+ * and newer QEMUs don't even have them. In both cases, we don't want
+ * to send anything on the wire.
+ */
+ return false;
+}
+
+static const VMStateDescription pre_2_10_vmstate_dummy_icp = {
+ .name = "icp/server",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = pre_2_10_vmstate_dummy_icp_needed,
+ .fields = (VMStateField[]) {
+ VMSTATE_UNUSED(4), /* uint32_t xirr */
+ VMSTATE_UNUSED(1), /* uint8_t pending_priority */
+ VMSTATE_UNUSED(1), /* uint8_t mfrr */
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+static void pre_2_10_vmstate_register_dummy_icp(int i)
+{
+ vmstate_register(NULL, i, &pre_2_10_vmstate_dummy_icp,
+ (void *)(uintptr_t) i);
+}
+
+static void pre_2_10_vmstate_unregister_dummy_icp(int i)
+{
+ vmstate_unregister(NULL, &pre_2_10_vmstate_dummy_icp,
+ (void *)(uintptr_t) i);
+}
+
+int spapr_max_server_number(SpaprMachineState *spapr)
+{
+ MachineState *ms = MACHINE(spapr);
+
+ assert(spapr->vsmt);
+ return DIV_ROUND_UP(ms->smp.max_cpus * spapr->vsmt, ms->smp.threads);
+}
+
+static int spapr_fixup_cpu_smt_dt(void *fdt, int offset, PowerPCCPU *cpu,
+ int smt_threads)
+{
+ int i, ret = 0;
+ uint32_t servers_prop[smt_threads];
+ uint32_t gservers_prop[smt_threads * 2];
+ int index = spapr_get_vcpu_id(cpu);
+
+ if (cpu->compat_pvr) {
+ ret = fdt_setprop_cell(fdt, offset, "cpu-version", cpu->compat_pvr);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ /* Build interrupt servers and gservers properties */
+ for (i = 0; i < smt_threads; i++) {
+ servers_prop[i] = cpu_to_be32(index + i);
+ /* Hack, direct the group queues back to cpu 0 */
+ gservers_prop[i*2] = cpu_to_be32(index + i);
+ gservers_prop[i*2 + 1] = 0;
+ }
+ ret = fdt_setprop(fdt, offset, "ibm,ppc-interrupt-server#s",
+ servers_prop, sizeof(servers_prop));
+ if (ret < 0) {
+ return ret;
+ }
+ ret = fdt_setprop(fdt, offset, "ibm,ppc-interrupt-gserver#s",
+ gservers_prop, sizeof(gservers_prop));
+
+ return ret;
+}
+
+static void spapr_dt_pa_features(SpaprMachineState *spapr,
+ PowerPCCPU *cpu,
+ void *fdt, int offset)
+{
+ uint8_t pa_features_206[] = { 6, 0,
+ 0xf6, 0x1f, 0xc7, 0x00, 0x80, 0xc0 };
+ uint8_t pa_features_207[] = { 24, 0,
+ 0xf6, 0x1f, 0xc7, 0xc0, 0x80, 0xf0,
+ 0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x80, 0x00,
+ 0x80, 0x00, 0x80, 0x00, 0x00, 0x00 };
+ uint8_t pa_features_300[] = { 66, 0,
+ /* 0: MMU|FPU|SLB|RUN|DABR|NX, 1: fri[nzpm]|DABRX|SPRG3|SLB0|PP110 */
+ /* 2: VPM|DS205|PPR|DS202|DS206, 3: LSD|URG, SSO, 5: LE|CFAR|EB|LSQ */
+ 0xf6, 0x1f, 0xc7, 0xc0, 0x80, 0xf0, /* 0 - 5 */
+ /* 6: DS207 */
+ 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, /* 6 - 11 */
+ /* 16: Vector */
+ 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, /* 12 - 17 */
+ /* 18: Vec. Scalar, 20: Vec. XOR, 22: HTM */
+ 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, /* 18 - 23 */
+ /* 24: Ext. Dec, 26: 64 bit ftrs, 28: PM ftrs */
+ 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 24 - 29 */
+ /* 30: MMR, 32: LE atomic, 34: EBB + ext EBB */
+ 0x80, 0x00, 0x80, 0x00, 0xC0, 0x00, /* 30 - 35 */
+ /* 36: SPR SO, 38: Copy/Paste, 40: Radix MMU */
+ 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 36 - 41 */
+ /* 42: PM, 44: PC RA, 46: SC vec'd */
+ 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 42 - 47 */
+ /* 48: SIMD, 50: QP BFP, 52: String */
+ 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 48 - 53 */
+ /* 54: DecFP, 56: DecI, 58: SHA */
+ 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 54 - 59 */
+ /* 60: NM atomic, 62: RNG */
+ 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, /* 60 - 65 */
+ };
+ uint8_t *pa_features = NULL;
+ size_t pa_size;
+
+ if (ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_2_06, 0, cpu->compat_pvr)) {
+ pa_features = pa_features_206;
+ pa_size = sizeof(pa_features_206);
+ }
+ if (ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_2_07, 0, cpu->compat_pvr)) {
+ pa_features = pa_features_207;
+ pa_size = sizeof(pa_features_207);
+ }
+ if (ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0, cpu->compat_pvr)) {
+ pa_features = pa_features_300;
+ pa_size = sizeof(pa_features_300);
+ }
+ if (!pa_features) {
+ return;
+ }
+
+ if (ppc_hash64_has(cpu, PPC_HASH64_CI_LARGEPAGE)) {
+ /*
+ * Note: we keep CI large pages off by default because a 64K capable
+ * guest provisioned with large pages might otherwise try to map a qemu
+ * framebuffer (or other kind of memory mapped PCI BAR) using 64K pages
+ * even if that qemu runs on a 4k host.
+ * We dd this bit back here if we are confident this is not an issue
+ */
+ pa_features[3] |= 0x20;
+ }
+ if ((spapr_get_cap(spapr, SPAPR_CAP_HTM) != 0) && pa_size > 24) {
+ pa_features[24] |= 0x80; /* Transactional memory support */
+ }
+ if (spapr->cas_pre_isa3_guest && pa_size > 40) {
+ /* Workaround for broken kernels that attempt (guest) radix
+ * mode when they can't handle it, if they see the radix bit set
+ * in pa-features. So hide it from them. */
+ pa_features[40 + 2] &= ~0x80; /* Radix MMU */
+ }
+
+ _FDT((fdt_setprop(fdt, offset, "ibm,pa-features", pa_features, pa_size)));
+}
+
+static hwaddr spapr_node0_size(MachineState *machine)
+{
+ if (machine->numa_state->num_nodes) {
+ int i;
+ for (i = 0; i < machine->numa_state->num_nodes; ++i) {
+ if (machine->numa_state->nodes[i].node_mem) {
+ return MIN(pow2floor(machine->numa_state->nodes[i].node_mem),
+ machine->ram_size);
+ }
+ }
+ }
+ return machine->ram_size;
+}
+
+static void add_str(GString *s, const gchar *s1)
+{
+ g_string_append_len(s, s1, strlen(s1) + 1);
+}
+
+static int spapr_dt_memory_node(SpaprMachineState *spapr, void *fdt, int nodeid,
+ hwaddr start, hwaddr size)
+{
+ char mem_name[32];
+ uint64_t mem_reg_property[2];
+ int off;
+
+ mem_reg_property[0] = cpu_to_be64(start);
+ mem_reg_property[1] = cpu_to_be64(size);
+
+ sprintf(mem_name, "memory@%" HWADDR_PRIx, start);
+ off = fdt_add_subnode(fdt, 0, mem_name);
+ _FDT(off);
+ _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
+ _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
+ sizeof(mem_reg_property))));
+ spapr_numa_write_associativity_dt(spapr, fdt, off, nodeid);
+ return off;
+}
+
+static uint32_t spapr_pc_dimm_node(MemoryDeviceInfoList *list, ram_addr_t addr)
+{
+ MemoryDeviceInfoList *info;
+
+ for (info = list; info; info = info->next) {
+ MemoryDeviceInfo *value = info->value;
+
+ if (value && value->type == MEMORY_DEVICE_INFO_KIND_DIMM) {
+ PCDIMMDeviceInfo *pcdimm_info = value->u.dimm.data;
+
+ if (addr >= pcdimm_info->addr &&
+ addr < (pcdimm_info->addr + pcdimm_info->size)) {
+ return pcdimm_info->node;
+ }
+ }
+ }
+
+ return -1;
+}
+
+struct sPAPRDrconfCellV2 {
+ uint32_t seq_lmbs;
+ uint64_t base_addr;
+ uint32_t drc_index;
+ uint32_t aa_index;
+ uint32_t flags;
+} QEMU_PACKED;
+
+typedef struct DrconfCellQueue {
+ struct sPAPRDrconfCellV2 cell;
+ QSIMPLEQ_ENTRY(DrconfCellQueue) entry;
+} DrconfCellQueue;
+
+static DrconfCellQueue *
+spapr_get_drconf_cell(uint32_t seq_lmbs, uint64_t base_addr,
+ uint32_t drc_index, uint32_t aa_index,
+ uint32_t flags)
+{
+ DrconfCellQueue *elem;
+
+ elem = g_malloc0(sizeof(*elem));
+ elem->cell.seq_lmbs = cpu_to_be32(seq_lmbs);
+ elem->cell.base_addr = cpu_to_be64(base_addr);
+ elem->cell.drc_index = cpu_to_be32(drc_index);
+ elem->cell.aa_index = cpu_to_be32(aa_index);
+ elem->cell.flags = cpu_to_be32(flags);
+
+ return elem;
+}
+
+static int spapr_dt_dynamic_memory_v2(SpaprMachineState *spapr, void *fdt,
+ int offset, MemoryDeviceInfoList *dimms)
+{
+ MachineState *machine = MACHINE(spapr);
+ uint8_t *int_buf, *cur_index;
+ int ret;
+ uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE;
+ uint64_t addr, cur_addr, size;
+ uint32_t nr_boot_lmbs = (machine->device_memory->base / lmb_size);
+ uint64_t mem_end = machine->device_memory->base +
+ memory_region_size(&machine->device_memory->mr);
+ uint32_t node, buf_len, nr_entries = 0;
+ SpaprDrc *drc;
+ DrconfCellQueue *elem, *next;
+ MemoryDeviceInfoList *info;
+ QSIMPLEQ_HEAD(, DrconfCellQueue) drconf_queue
+ = QSIMPLEQ_HEAD_INITIALIZER(drconf_queue);
+
+ /* Entry to cover RAM and the gap area */
+ elem = spapr_get_drconf_cell(nr_boot_lmbs, 0, 0, -1,
+ SPAPR_LMB_FLAGS_RESERVED |
+ SPAPR_LMB_FLAGS_DRC_INVALID);
+ QSIMPLEQ_INSERT_TAIL(&drconf_queue, elem, entry);
+ nr_entries++;
+
+ cur_addr = machine->device_memory->base;
+ for (info = dimms; info; info = info->next) {
+ PCDIMMDeviceInfo *di = info->value->u.dimm.data;
+
+ addr = di->addr;
+ size = di->size;
+ node = di->node;
+
+ /*
+ * The NVDIMM area is hotpluggable after the NVDIMM is unplugged. The
+ * area is marked hotpluggable in the next iteration for the bigger
+ * chunk including the NVDIMM occupied area.
+ */
+ if (info->value->type == MEMORY_DEVICE_INFO_KIND_NVDIMM)
+ continue;
+
+ /* Entry for hot-pluggable area */
+ if (cur_addr < addr) {
+ drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB, cur_addr / lmb_size);
+ g_assert(drc);
+ elem = spapr_get_drconf_cell((addr - cur_addr) / lmb_size,
+ cur_addr, spapr_drc_index(drc), -1, 0);
+ QSIMPLEQ_INSERT_TAIL(&drconf_queue, elem, entry);
+ nr_entries++;
+ }
+
+ /* Entry for DIMM */
+ drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB, addr / lmb_size);
+ g_assert(drc);
+ elem = spapr_get_drconf_cell(size / lmb_size, addr,
+ spapr_drc_index(drc), node,
+ (SPAPR_LMB_FLAGS_ASSIGNED |
+ SPAPR_LMB_FLAGS_HOTREMOVABLE));
+ QSIMPLEQ_INSERT_TAIL(&drconf_queue, elem, entry);
+ nr_entries++;
+ cur_addr = addr + size;
+ }
+
+ /* Entry for remaining hotpluggable area */
+ if (cur_addr < mem_end) {
+ drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB, cur_addr / lmb_size);
+ g_assert(drc);
+ elem = spapr_get_drconf_cell((mem_end - cur_addr) / lmb_size,
+ cur_addr, spapr_drc_index(drc), -1, 0);
+ QSIMPLEQ_INSERT_TAIL(&drconf_queue, elem, entry);
+ nr_entries++;
+ }
+
+ buf_len = nr_entries * sizeof(struct sPAPRDrconfCellV2) + sizeof(uint32_t);
+ int_buf = cur_index = g_malloc0(buf_len);
+ *(uint32_t *)int_buf = cpu_to_be32(nr_entries);
+ cur_index += sizeof(nr_entries);
+
+ QSIMPLEQ_FOREACH_SAFE(elem, &drconf_queue, entry, next) {
+ memcpy(cur_index, &elem->cell, sizeof(elem->cell));
+ cur_index += sizeof(elem->cell);
+ QSIMPLEQ_REMOVE(&drconf_queue, elem, DrconfCellQueue, entry);
+ g_free(elem);
+ }
+
+ ret = fdt_setprop(fdt, offset, "ibm,dynamic-memory-v2", int_buf, buf_len);
+ g_free(int_buf);
+ if (ret < 0) {
+ return -1;
+ }
+ return 0;
+}
+
+static int spapr_dt_dynamic_memory(SpaprMachineState *spapr, void *fdt,
+ int offset, MemoryDeviceInfoList *dimms)
+{
+ MachineState *machine = MACHINE(spapr);
+ int i, ret;
+ uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE;
+ uint32_t device_lmb_start = machine->device_memory->base / lmb_size;
+ uint32_t nr_lmbs = (machine->device_memory->base +
+ memory_region_size(&machine->device_memory->mr)) /
+ lmb_size;
+ uint32_t *int_buf, *cur_index, buf_len;
+
+ /*
+ * Allocate enough buffer size to fit in ibm,dynamic-memory
+ */
+ buf_len = (nr_lmbs * SPAPR_DR_LMB_LIST_ENTRY_SIZE + 1) * sizeof(uint32_t);
+ cur_index = int_buf = g_malloc0(buf_len);
+ int_buf[0] = cpu_to_be32(nr_lmbs);
+ cur_index++;
+ for (i = 0; i < nr_lmbs; i++) {
+ uint64_t addr = i * lmb_size;
+ uint32_t *dynamic_memory = cur_index;
+
+ if (i >= device_lmb_start) {
+ SpaprDrc *drc;
+
+ drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB, i);
+ g_assert(drc);
+
+ dynamic_memory[0] = cpu_to_be32(addr >> 32);
+ dynamic_memory[1] = cpu_to_be32(addr & 0xffffffff);
+ dynamic_memory[2] = cpu_to_be32(spapr_drc_index(drc));
+ dynamic_memory[3] = cpu_to_be32(0); /* reserved */
+ dynamic_memory[4] = cpu_to_be32(spapr_pc_dimm_node(dimms, addr));
+ if (memory_region_present(get_system_memory(), addr)) {
+ dynamic_memory[5] = cpu_to_be32(SPAPR_LMB_FLAGS_ASSIGNED);
+ } else {
+ dynamic_memory[5] = cpu_to_be32(0);
+ }
+ } else {
+ /*
+ * LMB information for RMA, boot time RAM and gap b/n RAM and
+ * device memory region -- all these are marked as reserved
+ * and as having no valid DRC.
+ */
+ dynamic_memory[0] = cpu_to_be32(addr >> 32);
+ dynamic_memory[1] = cpu_to_be32(addr & 0xffffffff);
+ dynamic_memory[2] = cpu_to_be32(0);
+ dynamic_memory[3] = cpu_to_be32(0); /* reserved */
+ dynamic_memory[4] = cpu_to_be32(-1);
+ dynamic_memory[5] = cpu_to_be32(SPAPR_LMB_FLAGS_RESERVED |
+ SPAPR_LMB_FLAGS_DRC_INVALID);
+ }
+
+ cur_index += SPAPR_DR_LMB_LIST_ENTRY_SIZE;
+ }
+ ret = fdt_setprop(fdt, offset, "ibm,dynamic-memory", int_buf, buf_len);
+ g_free(int_buf);
+ if (ret < 0) {
+ return -1;
+ }
+ return 0;
+}
+
+/*
+ * Adds ibm,dynamic-reconfiguration-memory node.
+ * Refer to docs/specs/ppc-spapr-hotplug.txt for the documentation
+ * of this device tree node.
+ */
+static int spapr_dt_dynamic_reconfiguration_memory(SpaprMachineState *spapr,
+ void *fdt)
+{
+ MachineState *machine = MACHINE(spapr);
+ int ret, offset;
+ uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE;
+ uint32_t prop_lmb_size[] = {cpu_to_be32(lmb_size >> 32),
+ cpu_to_be32(lmb_size & 0xffffffff)};
+ MemoryDeviceInfoList *dimms = NULL;
+
+ /*
+ * Don't create the node if there is no device memory
+ */
+ if (machine->ram_size == machine->maxram_size) {
+ return 0;
+ }
+
+ offset = fdt_add_subnode(fdt, 0, "ibm,dynamic-reconfiguration-memory");
+
+ ret = fdt_setprop(fdt, offset, "ibm,lmb-size", prop_lmb_size,
+ sizeof(prop_lmb_size));
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = fdt_setprop_cell(fdt, offset, "ibm,memory-flags-mask", 0xff);
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = fdt_setprop_cell(fdt, offset, "ibm,memory-preservation-time", 0x0);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* ibm,dynamic-memory or ibm,dynamic-memory-v2 */
+ dimms = qmp_memory_device_list();
+ if (spapr_ovec_test(spapr->ov5_cas, OV5_DRMEM_V2)) {
+ ret = spapr_dt_dynamic_memory_v2(spapr, fdt, offset, dimms);
+ } else {
+ ret = spapr_dt_dynamic_memory(spapr, fdt, offset, dimms);
+ }
+ qapi_free_MemoryDeviceInfoList(dimms);
+
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = spapr_numa_write_assoc_lookup_arrays(spapr, fdt, offset);
+
+ return ret;
+}
+
+static int spapr_dt_memory(SpaprMachineState *spapr, void *fdt)
+{
+ MachineState *machine = MACHINE(spapr);
+ SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+ hwaddr mem_start, node_size;
+ int i, nb_nodes = machine->numa_state->num_nodes;
+ NodeInfo *nodes = machine->numa_state->nodes;
+
+ for (i = 0, mem_start = 0; i < nb_nodes; ++i) {
+ if (!nodes[i].node_mem) {
+ continue;
+ }
+ if (mem_start >= machine->ram_size) {
+ node_size = 0;
+ } else {
+ node_size = nodes[i].node_mem;
+ if (node_size > machine->ram_size - mem_start) {
+ node_size = machine->ram_size - mem_start;
+ }
+ }
+ if (!mem_start) {
+ /* spapr_machine_init() checks for rma_size <= node0_size
+ * already */
+ spapr_dt_memory_node(spapr, fdt, i, 0, spapr->rma_size);
+ mem_start += spapr->rma_size;
+ node_size -= spapr->rma_size;
+ }
+ for ( ; node_size; ) {
+ hwaddr sizetmp = pow2floor(node_size);
+
+ /* mem_start != 0 here */
+ if (ctzl(mem_start) < ctzl(sizetmp)) {
+ sizetmp = 1ULL << ctzl(mem_start);
+ }
+
+ spapr_dt_memory_node(spapr, fdt, i, mem_start, sizetmp);
+ node_size -= sizetmp;
+ mem_start += sizetmp;
+ }
+ }
+
+ /* Generate ibm,dynamic-reconfiguration-memory node if required */
+ if (spapr_ovec_test(spapr->ov5_cas, OV5_DRCONF_MEMORY)) {
+ int ret;
+
+ g_assert(smc->dr_lmb_enabled);
+ ret = spapr_dt_dynamic_reconfiguration_memory(spapr, fdt);
+ if (ret) {
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static void spapr_dt_cpu(CPUState *cs, void *fdt, int offset,
+ SpaprMachineState *spapr)
+{
+ MachineState *ms = MACHINE(spapr);
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
+ CPUPPCState *env = &cpu->env;
+ PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
+ int index = spapr_get_vcpu_id(cpu);
+ uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
+ 0xffffffff, 0xffffffff};
+ uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq()
+ : SPAPR_TIMEBASE_FREQ;
+ uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000;
+ uint32_t page_sizes_prop[64];
+ size_t page_sizes_prop_size;
+ unsigned int smp_threads = ms->smp.threads;
+ uint32_t vcpus_per_socket = smp_threads * ms->smp.cores;
+ uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
+ int compat_smt = MIN(smp_threads, ppc_compat_max_vthreads(cpu));
+ SpaprDrc *drc;
+ int drc_index;
+ uint32_t radix_AP_encodings[PPC_PAGE_SIZES_MAX_SZ];
+ int i;
+
+ drc = spapr_drc_by_id(TYPE_SPAPR_DRC_CPU, index);
+ if (drc) {
+ drc_index = spapr_drc_index(drc);
+ _FDT((fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index)));
+ }
+
+ _FDT((fdt_setprop_cell(fdt, offset, "reg", index)));
+ _FDT((fdt_setprop_string(fdt, offset, "device_type", "cpu")));
+
+ _FDT((fdt_setprop_cell(fdt, offset, "cpu-version", env->spr[SPR_PVR])));
+ _FDT((fdt_setprop_cell(fdt, offset, "d-cache-block-size",
+ env->dcache_line_size)));
+ _FDT((fdt_setprop_cell(fdt, offset, "d-cache-line-size",
+ env->dcache_line_size)));
+ _FDT((fdt_setprop_cell(fdt, offset, "i-cache-block-size",
+ env->icache_line_size)));
+ _FDT((fdt_setprop_cell(fdt, offset, "i-cache-line-size",
+ env->icache_line_size)));
+
+ if (pcc->l1_dcache_size) {
+ _FDT((fdt_setprop_cell(fdt, offset, "d-cache-size",
+ pcc->l1_dcache_size)));
+ } else {
+ warn_report("Unknown L1 dcache size for cpu");
+ }
+ if (pcc->l1_icache_size) {
+ _FDT((fdt_setprop_cell(fdt, offset, "i-cache-size",
+ pcc->l1_icache_size)));
+ } else {
+ warn_report("Unknown L1 icache size for cpu");
+ }
+
+ _FDT((fdt_setprop_cell(fdt, offset, "timebase-frequency", tbfreq)));
+ _FDT((fdt_setprop_cell(fdt, offset, "clock-frequency", cpufreq)));
+ _FDT((fdt_setprop_cell(fdt, offset, "slb-size", cpu->hash64_opts->slb_size)));
+ _FDT((fdt_setprop_cell(fdt, offset, "ibm,slb-size", cpu->hash64_opts->slb_size)));
+ _FDT((fdt_setprop_string(fdt, offset, "status", "okay")));
+ _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0)));
+
+ if (ppc_has_spr(cpu, SPR_PURR)) {
+ _FDT((fdt_setprop_cell(fdt, offset, "ibm,purr", 1)));
+ }
+ if (ppc_has_spr(cpu, SPR_PURR)) {
+ _FDT((fdt_setprop_cell(fdt, offset, "ibm,spurr", 1)));
+ }
+
+ if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)) {
+ _FDT((fdt_setprop(fdt, offset, "ibm,processor-segment-sizes",
+ segs, sizeof(segs))));
+ }
+
+ /* Advertise VSX (vector extensions) if available
+ * 1 == VMX / Altivec available
+ * 2 == VSX available
+ *
+ * Only CPUs for which we create core types in spapr_cpu_core.c
+ * are possible, and all of those have VMX */
+ if (spapr_get_cap(spapr, SPAPR_CAP_VSX) != 0) {
+ _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", 2)));
+ } else {
+ _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", 1)));
+ }
+
+ /* Advertise DFP (Decimal Floating Point) if available
+ * 0 / no property == no DFP
+ * 1 == DFP available */
+ if (spapr_get_cap(spapr, SPAPR_CAP_DFP) != 0) {
+ _FDT((fdt_setprop_cell(fdt, offset, "ibm,dfp", 1)));
+ }
+
+ page_sizes_prop_size = ppc_create_page_sizes_prop(cpu, page_sizes_prop,
+ sizeof(page_sizes_prop));
+ if (page_sizes_prop_size) {
+ _FDT((fdt_setprop(fdt, offset, "ibm,segment-page-sizes",
+ page_sizes_prop, page_sizes_prop_size)));
+ }
+
+ spapr_dt_pa_features(spapr, cpu, fdt, offset);
+
+ _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id",
+ cs->cpu_index / vcpus_per_socket)));
+
+ _FDT((fdt_setprop(fdt, offset, "ibm,pft-size",
+ pft_size_prop, sizeof(pft_size_prop))));
+
+ if (ms->numa_state->num_nodes > 1) {
+ _FDT(spapr_numa_fixup_cpu_dt(spapr, fdt, offset, cpu));
+ }
+
+ _FDT(spapr_fixup_cpu_smt_dt(fdt, offset, cpu, compat_smt));
+
+ if (pcc->radix_page_info) {
+ for (i = 0; i < pcc->radix_page_info->count; i++) {
+ radix_AP_encodings[i] =
+ cpu_to_be32(pcc->radix_page_info->entries[i]);
+ }
+ _FDT((fdt_setprop(fdt, offset, "ibm,processor-radix-AP-encodings",
+ radix_AP_encodings,
+ pcc->radix_page_info->count *
+ sizeof(radix_AP_encodings[0]))));
+ }
+
+ /*
+ * We set this property to let the guest know that it can use the large
+ * decrementer and its width in bits.
+ */
+ if (spapr_get_cap(spapr, SPAPR_CAP_LARGE_DECREMENTER) != SPAPR_CAP_OFF)
+ _FDT((fdt_setprop_u32(fdt, offset, "ibm,dec-bits",
+ pcc->lrg_decr_bits)));
+}
+
+static void spapr_dt_cpus(void *fdt, SpaprMachineState *spapr)
+{
+ CPUState **rev;
+ CPUState *cs;
+ int n_cpus;
+ int cpus_offset;
+ int i;
+
+ cpus_offset = fdt_add_subnode(fdt, 0, "cpus");
+ _FDT(cpus_offset);
+ _FDT((fdt_setprop_cell(fdt, cpus_offset, "#address-cells", 0x1)));
+ _FDT((fdt_setprop_cell(fdt, cpus_offset, "#size-cells", 0x0)));
+
+ /*
+ * We walk the CPUs in reverse order to ensure that CPU DT nodes
+ * created by fdt_add_subnode() end up in the right order in FDT
+ * for the guest kernel the enumerate the CPUs correctly.
+ *
+ * The CPU list cannot be traversed in reverse order, so we need
+ * to do extra work.
+ */
+ n_cpus = 0;
+ rev = NULL;
+ CPU_FOREACH(cs) {
+ rev = g_renew(CPUState *, rev, n_cpus + 1);
+ rev[n_cpus++] = cs;
+ }
+
+ for (i = n_cpus - 1; i >= 0; i--) {
+ CPUState *cs = rev[i];
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
+ int index = spapr_get_vcpu_id(cpu);
+ DeviceClass *dc = DEVICE_GET_CLASS(cs);
+ g_autofree char *nodename = NULL;
+ int offset;
+
+ if (!spapr_is_thread0_in_vcore(spapr, cpu)) {
+ continue;
+ }
+
+ nodename = g_strdup_printf("%s@%x", dc->fw_name, index);
+ offset = fdt_add_subnode(fdt, cpus_offset, nodename);
+ _FDT(offset);
+ spapr_dt_cpu(cs, fdt, offset, spapr);
+ }
+
+ g_free(rev);
+}
+
+static int spapr_dt_rng(void *fdt)
+{
+ int node;
+ int ret;
+
+ node = qemu_fdt_add_subnode(fdt, "/ibm,platform-facilities");
+ if (node <= 0) {
+ return -1;
+ }
+ ret = fdt_setprop_string(fdt, node, "device_type",
+ "ibm,platform-facilities");
+ ret |= fdt_setprop_cell(fdt, node, "#address-cells", 0x1);
+ ret |= fdt_setprop_cell(fdt, node, "#size-cells", 0x0);
+
+ node = fdt_add_subnode(fdt, node, "ibm,random-v1");
+ if (node <= 0) {
+ return -1;
+ }
+ ret |= fdt_setprop_string(fdt, node, "compatible", "ibm,random");
+
+ return ret ? -1 : 0;
+}
+
+static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt)
+{
+ MachineState *ms = MACHINE(spapr);
+ int rtas;
+ GString *hypertas = g_string_sized_new(256);
+ GString *qemu_hypertas = g_string_sized_new(256);
+ uint64_t max_device_addr = MACHINE(spapr)->device_memory->base +
+ memory_region_size(&MACHINE(spapr)->device_memory->mr);
+ uint32_t lrdr_capacity[] = {
+ cpu_to_be32(max_device_addr >> 32),
+ cpu_to_be32(max_device_addr & 0xffffffff),
+ cpu_to_be32(SPAPR_MEMORY_BLOCK_SIZE >> 32),
+ cpu_to_be32(SPAPR_MEMORY_BLOCK_SIZE & 0xffffffff),
+ cpu_to_be32(ms->smp.max_cpus / ms->smp.threads),
+ };
+
+ _FDT(rtas = fdt_add_subnode(fdt, 0, "rtas"));
+
+ /* hypertas */
+ add_str(hypertas, "hcall-pft");
+ add_str(hypertas, "hcall-term");
+ add_str(hypertas, "hcall-dabr");
+ add_str(hypertas, "hcall-interrupt");
+ add_str(hypertas, "hcall-tce");
+ add_str(hypertas, "hcall-vio");
+ add_str(hypertas, "hcall-splpar");
+ add_str(hypertas, "hcall-join");
+ add_str(hypertas, "hcall-bulk");
+ add_str(hypertas, "hcall-set-mode");
+ add_str(hypertas, "hcall-sprg0");
+ add_str(hypertas, "hcall-copy");
+ add_str(hypertas, "hcall-debug");
+ add_str(hypertas, "hcall-vphn");
+ if (spapr_get_cap(spapr, SPAPR_CAP_RPT_INVALIDATE) == SPAPR_CAP_ON) {
+ add_str(hypertas, "hcall-rpt-invalidate");
+ }
+
+ add_str(qemu_hypertas, "hcall-memop1");
+
+ if (!kvm_enabled() || kvmppc_spapr_use_multitce()) {
+ add_str(hypertas, "hcall-multi-tce");
+ }
+
+ if (spapr->resize_hpt != SPAPR_RESIZE_HPT_DISABLED) {
+ add_str(hypertas, "hcall-hpt-resize");
+ }
+
+ _FDT(fdt_setprop(fdt, rtas, "ibm,hypertas-functions",
+ hypertas->str, hypertas->len));
+ g_string_free(hypertas, TRUE);
+ _FDT(fdt_setprop(fdt, rtas, "qemu,hypertas-functions",
+ qemu_hypertas->str, qemu_hypertas->len));
+ g_string_free(qemu_hypertas, TRUE);
+
+ spapr_numa_write_rtas_dt(spapr, fdt, rtas);
+
+ /*
+ * FWNMI reserves RTAS_ERROR_LOG_MAX for the machine check error log,
+ * and 16 bytes per CPU for system reset error log plus an extra 8 bytes.
+ *
+ * The system reset requirements are driven by existing Linux and PowerVM
+ * implementation which (contrary to PAPR) saves r3 in the error log
+ * structure like machine check, so Linux expects to find the saved r3
+ * value at the address in r3 upon FWNMI-enabled sreset interrupt (and
+ * does not look at the error value).
+ *
+ * System reset interrupts are not subject to interlock like machine
+ * check, so this memory area could be corrupted if the sreset is
+ * interrupted by a machine check (or vice versa) if it was shared. To
+ * prevent this, system reset uses per-CPU areas for the sreset save
+ * area. A system reset that interrupts a system reset handler could
+ * still overwrite this area, but Linux doesn't try to recover in that
+ * case anyway.
+ *
+ * The extra 8 bytes is required because Linux's FWNMI error log check
+ * is off-by-one.
+ *
+ * RTAS_MIN_SIZE is required for the RTAS blob itself.
+ */
+ _FDT(fdt_setprop_cell(fdt, rtas, "rtas-size", RTAS_MIN_SIZE +
+ RTAS_ERROR_LOG_MAX +
+ ms->smp.max_cpus * sizeof(uint64_t) * 2 +
+ sizeof(uint64_t)));
+ _FDT(fdt_setprop_cell(fdt, rtas, "rtas-error-log-max",
+ RTAS_ERROR_LOG_MAX));
+ _FDT(fdt_setprop_cell(fdt, rtas, "rtas-event-scan-rate",
+ RTAS_EVENT_SCAN_RATE));
+
+ g_assert(msi_nonbroken);
+ _FDT(fdt_setprop(fdt, rtas, "ibm,change-msix-capable", NULL, 0));
+
+ /*
+ * According to PAPR, rtas ibm,os-term does not guarantee a return
+ * back to the guest cpu.
+ *
+ * While an additional ibm,extended-os-term property indicates
+ * that rtas call return will always occur. Set this property.
+ */
+ _FDT(fdt_setprop(fdt, rtas, "ibm,extended-os-term", NULL, 0));
+
+ _FDT(fdt_setprop(fdt, rtas, "ibm,lrdr-capacity",
+ lrdr_capacity, sizeof(lrdr_capacity)));
+
+ spapr_dt_rtas_tokens(fdt, rtas);
+}
+
+/*
+ * Prepare ibm,arch-vec-5-platform-support, which indicates the MMU
+ * and the XIVE features that the guest may request and thus the valid
+ * values for bytes 23..26 of option vector 5:
+ */
+static void spapr_dt_ov5_platform_support(SpaprMachineState *spapr, void *fdt,
+ int chosen)
+{
+ PowerPCCPU *first_ppc_cpu = POWERPC_CPU(first_cpu);
+
+ char val[2 * 4] = {
+ 23, 0x00, /* XICS / XIVE mode */
+ 24, 0x00, /* Hash/Radix, filled in below. */
+ 25, 0x00, /* Hash options: Segment Tables == no, GTSE == no. */
+ 26, 0x40, /* Radix options: GTSE == yes. */
+ };
+
+ if (spapr->irq->xics && spapr->irq->xive) {
+ val[1] = SPAPR_OV5_XIVE_BOTH;
+ } else if (spapr->irq->xive) {
+ val[1] = SPAPR_OV5_XIVE_EXPLOIT;
+ } else {
+ assert(spapr->irq->xics);
+ val[1] = SPAPR_OV5_XIVE_LEGACY;
+ }
+
+ if (!ppc_check_compat(first_ppc_cpu, CPU_POWERPC_LOGICAL_3_00, 0,
+ first_ppc_cpu->compat_pvr)) {
+ /*
+ * If we're in a pre POWER9 compat mode then the guest should
+ * do hash and use the legacy interrupt mode
+ */
+ val[1] = SPAPR_OV5_XIVE_LEGACY; /* XICS */
+ val[3] = 0x00; /* Hash */
+ spapr_check_mmu_mode(false);
+ } else if (kvm_enabled()) {
+ if (kvmppc_has_cap_mmu_radix() && kvmppc_has_cap_mmu_hash_v3()) {
+ val[3] = 0x80; /* OV5_MMU_BOTH */
+ } else if (kvmppc_has_cap_mmu_radix()) {
+ val[3] = 0x40; /* OV5_MMU_RADIX_300 */
+ } else {
+ val[3] = 0x00; /* Hash */
+ }
+ } else {
+ /* V3 MMU supports both hash and radix in tcg (with dynamic switching) */
+ val[3] = 0xC0;
+ }
+ _FDT(fdt_setprop(fdt, chosen, "ibm,arch-vec-5-platform-support",
+ val, sizeof(val)));
+}
+
+static void spapr_dt_chosen(SpaprMachineState *spapr, void *fdt, bool reset)
+{
+ MachineState *machine = MACHINE(spapr);
+ SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine);
+ int chosen;
+
+ _FDT(chosen = fdt_add_subnode(fdt, 0, "chosen"));
+
+ if (reset) {
+ const char *boot_device = spapr->boot_device;
+ char *stdout_path = spapr_vio_stdout_path(spapr->vio_bus);
+ size_t cb = 0;
+ char *bootlist = get_boot_devices_list(&cb);
+
+ if (machine->kernel_cmdline && machine->kernel_cmdline[0]) {
+ _FDT(fdt_setprop_string(fdt, chosen, "bootargs",
+ machine->kernel_cmdline));
+ }
+
+ if (spapr->initrd_size) {
+ _FDT(fdt_setprop_cell(fdt, chosen, "linux,initrd-start",
+ spapr->initrd_base));
+ _FDT(fdt_setprop_cell(fdt, chosen, "linux,initrd-end",
+ spapr->initrd_base + spapr->initrd_size));
+ }
+
+ if (spapr->kernel_size) {
+ uint64_t kprop[2] = { cpu_to_be64(spapr->kernel_addr),
+ cpu_to_be64(spapr->kernel_size) };
+
+ _FDT(fdt_setprop(fdt, chosen, "qemu,boot-kernel",
+ &kprop, sizeof(kprop)));
+ if (spapr->kernel_le) {
+ _FDT(fdt_setprop(fdt, chosen, "qemu,boot-kernel-le", NULL, 0));
+ }
+ }
+ if (boot_menu) {
+ _FDT((fdt_setprop_cell(fdt, chosen, "qemu,boot-menu", boot_menu)));
+ }
+ _FDT(fdt_setprop_cell(fdt, chosen, "qemu,graphic-width", graphic_width));
+ _FDT(fdt_setprop_cell(fdt, chosen, "qemu,graphic-height", graphic_height));
+ _FDT(fdt_setprop_cell(fdt, chosen, "qemu,graphic-depth", graphic_depth));
+
+ if (cb && bootlist) {
+ int i;
+
+ for (i = 0; i < cb; i++) {
+ if (bootlist[i] == '\n') {
+ bootlist[i] = ' ';
+ }
+ }
+ _FDT(fdt_setprop_string(fdt, chosen, "qemu,boot-list", bootlist));
+ }
+
+ if (boot_device && strlen(boot_device)) {
+ _FDT(fdt_setprop_string(fdt, chosen, "qemu,boot-device", boot_device));
+ }
+
+ if (!spapr->has_graphics && stdout_path) {
+ /*
+ * "linux,stdout-path" and "stdout" properties are
+ * deprecated by linux kernel. New platforms should only
+ * use the "stdout-path" property. Set the new property
+ * and continue using older property to remain compatible
+ * with the existing firmware.
+ */
+ _FDT(fdt_setprop_string(fdt, chosen, "linux,stdout-path", stdout_path));
+ _FDT(fdt_setprop_string(fdt, chosen, "stdout-path", stdout_path));
+ }
+
+ /*
+ * We can deal with BAR reallocation just fine, advertise it
+ * to the guest
+ */
+ if (smc->linux_pci_probe) {
+ _FDT(fdt_setprop_cell(fdt, chosen, "linux,pci-probe-only", 0));
+ }
+
+ spapr_dt_ov5_platform_support(spapr, fdt, chosen);
+
+ g_free(stdout_path);
+ g_free(bootlist);
+ }
+
+ _FDT(spapr_dt_ovec(fdt, chosen, spapr->ov5_cas, "ibm,architecture-vec-5"));
+}
+
+static void spapr_dt_hypervisor(SpaprMachineState *spapr, void *fdt)
+{
+ /* The /hypervisor node isn't in PAPR - this is a hack to allow PR
+ * KVM to work under pHyp with some guest co-operation */
+ int hypervisor;
+ uint8_t hypercall[16];
+
+ _FDT(hypervisor = fdt_add_subnode(fdt, 0, "hypervisor"));
+ /* indicate KVM hypercall interface */
+ _FDT(fdt_setprop_string(fdt, hypervisor, "compatible", "linux,kvm"));
+ if (kvmppc_has_cap_fixup_hcalls()) {
+ /*
+ * Older KVM versions with older guest kernels were broken
+ * with the magic page, don't allow the guest to map it.
+ */
+ if (!kvmppc_get_hypercall(first_cpu->env_ptr, hypercall,
+ sizeof(hypercall))) {
+ _FDT(fdt_setprop(fdt, hypervisor, "hcall-instructions",
+ hypercall, sizeof(hypercall)));
+ }
+ }
+}
+
+void *spapr_build_fdt(SpaprMachineState *spapr, bool reset, size_t space)
+{
+ MachineState *machine = MACHINE(spapr);
+ MachineClass *mc = MACHINE_GET_CLASS(machine);
+ SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine);
+ uint32_t root_drc_type_mask = 0;
+ int ret;
+ void *fdt;
+ SpaprPhbState *phb;
+ char *buf;
+
+ fdt = g_malloc0(space);
+ _FDT((fdt_create_empty_tree(fdt, space)));
+
+ /* Root node */
+ _FDT(fdt_setprop_string(fdt, 0, "device_type", "chrp"));
+ _FDT(fdt_setprop_string(fdt, 0, "model", "IBM pSeries (emulated by qemu)"));
+ _FDT(fdt_setprop_string(fdt, 0, "compatible", "qemu,pseries"));
+
+ /* Guest UUID & Name*/
+ buf = qemu_uuid_unparse_strdup(&qemu_uuid);
+ _FDT(fdt_setprop_string(fdt, 0, "vm,uuid", buf));
+ if (qemu_uuid_set) {
+ _FDT(fdt_setprop_string(fdt, 0, "system-id", buf));
+ }
+ g_free(buf);
+
+ if (qemu_get_vm_name()) {
+ _FDT(fdt_setprop_string(fdt, 0, "ibm,partition-name",
+ qemu_get_vm_name()));
+ }
+
+ /* Host Model & Serial Number */
+ if (spapr->host_model) {
+ _FDT(fdt_setprop_string(fdt, 0, "host-model", spapr->host_model));
+ } else if (smc->broken_host_serial_model && kvmppc_get_host_model(&buf)) {
+ _FDT(fdt_setprop_string(fdt, 0, "host-model", buf));
+ g_free(buf);
+ }
+
+ if (spapr->host_serial) {
+ _FDT(fdt_setprop_string(fdt, 0, "host-serial", spapr->host_serial));
+ } else if (smc->broken_host_serial_model && kvmppc_get_host_serial(&buf)) {
+ _FDT(fdt_setprop_string(fdt, 0, "host-serial", buf));
+ g_free(buf);
+ }
+
+ _FDT(fdt_setprop_cell(fdt, 0, "#address-cells", 2));
+ _FDT(fdt_setprop_cell(fdt, 0, "#size-cells", 2));
+
+ /* /interrupt controller */
+ spapr_irq_dt(spapr, spapr_max_server_number(spapr), fdt, PHANDLE_INTC);
+
+ ret = spapr_dt_memory(spapr, fdt);
+ if (ret < 0) {
+ error_report("couldn't setup memory nodes in fdt");
+ exit(1);
+ }
+
+ /* /vdevice */
+ spapr_dt_vdevice(spapr->vio_bus, fdt);
+
+ if (object_resolve_path_type("", TYPE_SPAPR_RNG, NULL)) {
+ ret = spapr_dt_rng(fdt);
+ if (ret < 0) {
+ error_report("could not set up rng device in the fdt");
+ exit(1);
+ }
+ }
+
+ QLIST_FOREACH(phb, &spapr->phbs, list) {
+ ret = spapr_dt_phb(spapr, phb, PHANDLE_INTC, fdt, NULL);
+ if (ret < 0) {
+ error_report("couldn't setup PCI devices in fdt");
+ exit(1);
+ }
+ }
+
+ spapr_dt_cpus(fdt, spapr);
+
+ /* ibm,drc-indexes and friends */
+ if (smc->dr_lmb_enabled) {
+ root_drc_type_mask |= SPAPR_DR_CONNECTOR_TYPE_LMB;
+ }
+ if (smc->dr_phb_enabled) {
+ root_drc_type_mask |= SPAPR_DR_CONNECTOR_TYPE_PHB;
+ }
+ if (mc->nvdimm_supported) {
+ root_drc_type_mask |= SPAPR_DR_CONNECTOR_TYPE_PMEM;
+ }
+ if (root_drc_type_mask) {
+ _FDT(spapr_dt_drc(fdt, 0, NULL, root_drc_type_mask));
+ }
+
+ if (mc->has_hotpluggable_cpus) {
+ int offset = fdt_path_offset(fdt, "/cpus");
+ ret = spapr_dt_drc(fdt, offset, NULL, SPAPR_DR_CONNECTOR_TYPE_CPU);
+ if (ret < 0) {
+ error_report("Couldn't set up CPU DR device tree properties");
+ exit(1);
+ }
+ }
+
+ /* /event-sources */
+ spapr_dt_events(spapr, fdt);
+
+ /* /rtas */
+ spapr_dt_rtas(spapr, fdt);
+
+ /* /chosen */
+ spapr_dt_chosen(spapr, fdt, reset);
+
+ /* /hypervisor */
+ if (kvm_enabled()) {
+ spapr_dt_hypervisor(spapr, fdt);
+ }
+
+ /* Build memory reserve map */
+ if (reset) {
+ if (spapr->kernel_size) {
+ _FDT((fdt_add_mem_rsv(fdt, spapr->kernel_addr,
+ spapr->kernel_size)));
+ }
+ if (spapr->initrd_size) {
+ _FDT((fdt_add_mem_rsv(fdt, spapr->initrd_base,
+ spapr->initrd_size)));
+ }
+ }
+
+ /* NVDIMM devices */
+ if (mc->nvdimm_supported) {
+ spapr_dt_persistent_memory(spapr, fdt);
+ }
+
+ return fdt;
+}
+
+static uint64_t translate_kernel_address(void *opaque, uint64_t addr)
+{
+ SpaprMachineState *spapr = opaque;
+
+ return (addr & 0x0fffffff) + spapr->kernel_addr;
+}
+
+static void emulate_spapr_hypercall(PPCVirtualHypervisor *vhyp,
+ PowerPCCPU *cpu)
+{
+ CPUPPCState *env = &cpu->env;
+
+ /* The TCG path should also be holding the BQL at this point */
+ g_assert(qemu_mutex_iothread_locked());
+
+ if (msr_pr) {
+ hcall_dprintf("Hypercall made with MSR[PR]=1\n");
+ env->gpr[3] = H_PRIVILEGE;
+ } else {
+ env->gpr[3] = spapr_hypercall(cpu, env->gpr[3], &env->gpr[4]);
+ }
+}
+
+struct LPCRSyncState {
+ target_ulong value;
+ target_ulong mask;
+};
+
+static void do_lpcr_sync(CPUState *cs, run_on_cpu_data arg)
+{
+ struct LPCRSyncState *s = arg.host_ptr;
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
+ CPUPPCState *env = &cpu->env;
+ target_ulong lpcr;
+
+ cpu_synchronize_state(cs);
+ lpcr = env->spr[SPR_LPCR];
+ lpcr &= ~s->mask;
+ lpcr |= s->value;
+ ppc_store_lpcr(cpu, lpcr);
+}
+
+void spapr_set_all_lpcrs(target_ulong value, target_ulong mask)
+{
+ CPUState *cs;
+ struct LPCRSyncState s = {
+ .value = value,
+ .mask = mask
+ };
+ CPU_FOREACH(cs) {
+ run_on_cpu(cs, do_lpcr_sync, RUN_ON_CPU_HOST_PTR(&s));
+ }
+}
+
+static void spapr_get_pate(PPCVirtualHypervisor *vhyp, ppc_v3_pate_t *entry)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(vhyp);
+
+ /* Copy PATE1:GR into PATE0:HR */
+ entry->dw0 = spapr->patb_entry & PATE0_HR;
+ entry->dw1 = spapr->patb_entry;
+}
+
+#define HPTE(_table, _i) (void *)(((uint64_t *)(_table)) + ((_i) * 2))
+#define HPTE_VALID(_hpte) (tswap64(*((uint64_t *)(_hpte))) & HPTE64_V_VALID)
+#define HPTE_DIRTY(_hpte) (tswap64(*((uint64_t *)(_hpte))) & HPTE64_V_HPTE_DIRTY)
+#define CLEAN_HPTE(_hpte) ((*(uint64_t *)(_hpte)) &= tswap64(~HPTE64_V_HPTE_DIRTY))
+#define DIRTY_HPTE(_hpte) ((*(uint64_t *)(_hpte)) |= tswap64(HPTE64_V_HPTE_DIRTY))
+
+/*
+ * Get the fd to access the kernel htab, re-opening it if necessary
+ */
+static int get_htab_fd(SpaprMachineState *spapr)
+{
+ Error *local_err = NULL;
+
+ if (spapr->htab_fd >= 0) {
+ return spapr->htab_fd;
+ }
+
+ spapr->htab_fd = kvmppc_get_htab_fd(false, 0, &local_err);
+ if (spapr->htab_fd < 0) {
+ error_report_err(local_err);
+ }
+
+ return spapr->htab_fd;
+}
+
+void close_htab_fd(SpaprMachineState *spapr)
+{
+ if (spapr->htab_fd >= 0) {
+ close(spapr->htab_fd);
+ }
+ spapr->htab_fd = -1;
+}
+
+static hwaddr spapr_hpt_mask(PPCVirtualHypervisor *vhyp)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(vhyp);
+
+ return HTAB_SIZE(spapr) / HASH_PTEG_SIZE_64 - 1;
+}
+
+static target_ulong spapr_encode_hpt_for_kvm_pr(PPCVirtualHypervisor *vhyp)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(vhyp);
+
+ assert(kvm_enabled());
+
+ if (!spapr->htab) {
+ return 0;
+ }
+
+ return (target_ulong)(uintptr_t)spapr->htab | (spapr->htab_shift - 18);
+}
+
+static const ppc_hash_pte64_t *spapr_map_hptes(PPCVirtualHypervisor *vhyp,
+ hwaddr ptex, int n)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(vhyp);
+ hwaddr pte_offset = ptex * HASH_PTE_SIZE_64;
+
+ if (!spapr->htab) {
+ /*
+ * HTAB is controlled by KVM. Fetch into temporary buffer
+ */
+ ppc_hash_pte64_t *hptes = g_malloc(n * HASH_PTE_SIZE_64);
+ kvmppc_read_hptes(hptes, ptex, n);
+ return hptes;
+ }
+
+ /*
+ * HTAB is controlled by QEMU. Just point to the internally
+ * accessible PTEG.
+ */
+ return (const ppc_hash_pte64_t *)(spapr->htab + pte_offset);
+}
+
+static void spapr_unmap_hptes(PPCVirtualHypervisor *vhyp,
+ const ppc_hash_pte64_t *hptes,
+ hwaddr ptex, int n)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(vhyp);
+
+ if (!spapr->htab) {
+ g_free((void *)hptes);
+ }
+
+ /* Nothing to do for qemu managed HPT */
+}
+
+void spapr_store_hpte(PowerPCCPU *cpu, hwaddr ptex,
+ uint64_t pte0, uint64_t pte1)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(cpu->vhyp);
+ hwaddr offset = ptex * HASH_PTE_SIZE_64;
+
+ if (!spapr->htab) {
+ kvmppc_write_hpte(ptex, pte0, pte1);
+ } else {
+ if (pte0 & HPTE64_V_VALID) {
+ stq_p(spapr->htab + offset + HPTE64_DW1, pte1);
+ /*
+ * When setting valid, we write PTE1 first. This ensures
+ * proper synchronization with the reading code in
+ * ppc_hash64_pteg_search()
+ */
+ smp_wmb();
+ stq_p(spapr->htab + offset, pte0);
+ } else {
+ stq_p(spapr->htab + offset, pte0);
+ /*
+ * When clearing it we set PTE0 first. This ensures proper
+ * synchronization with the reading code in
+ * ppc_hash64_pteg_search()
+ */
+ smp_wmb();
+ stq_p(spapr->htab + offset + HPTE64_DW1, pte1);
+ }
+ }
+}
+
+static void spapr_hpte_set_c(PPCVirtualHypervisor *vhyp, hwaddr ptex,
+ uint64_t pte1)
+{
+ hwaddr offset = ptex * HASH_PTE_SIZE_64 + HPTE64_DW1_C;
+ SpaprMachineState *spapr = SPAPR_MACHINE(vhyp);
+
+ if (!spapr->htab) {
+ /* There should always be a hash table when this is called */
+ error_report("spapr_hpte_set_c called with no hash table !");
+ return;
+ }
+
+ /* The HW performs a non-atomic byte update */
+ stb_p(spapr->htab + offset, (pte1 & 0xff) | 0x80);
+}
+
+static void spapr_hpte_set_r(PPCVirtualHypervisor *vhyp, hwaddr ptex,
+ uint64_t pte1)
+{
+ hwaddr offset = ptex * HASH_PTE_SIZE_64 + HPTE64_DW1_R;
+ SpaprMachineState *spapr = SPAPR_MACHINE(vhyp);
+
+ if (!spapr->htab) {
+ /* There should always be a hash table when this is called */
+ error_report("spapr_hpte_set_r called with no hash table !");
+ return;
+ }
+
+ /* The HW performs a non-atomic byte update */
+ stb_p(spapr->htab + offset, ((pte1 >> 8) & 0xff) | 0x01);
+}
+
+int spapr_hpt_shift_for_ramsize(uint64_t ramsize)
+{
+ int shift;
+
+ /* We aim for a hash table of size 1/128 the size of RAM (rounded
+ * up). The PAPR recommendation is actually 1/64 of RAM size, but
+ * that's much more than is needed for Linux guests */
+ shift = ctz64(pow2ceil(ramsize)) - 7;
+ shift = MAX(shift, 18); /* Minimum architected size */
+ shift = MIN(shift, 46); /* Maximum architected size */
+ return shift;
+}
+
+void spapr_free_hpt(SpaprMachineState *spapr)
+{
+ g_free(spapr->htab);
+ spapr->htab = NULL;
+ spapr->htab_shift = 0;
+ close_htab_fd(spapr);
+}
+
+int spapr_reallocate_hpt(SpaprMachineState *spapr, int shift, Error **errp)
+{
+ ERRP_GUARD();
+ long rc;
+
+ /* Clean up any HPT info from a previous boot */
+ spapr_free_hpt(spapr);
+
+ rc = kvmppc_reset_htab(shift);
+
+ if (rc == -EOPNOTSUPP) {
+ error_setg(errp, "HPT not supported in nested guests");
+ return -EOPNOTSUPP;
+ }
+
+ if (rc < 0) {
+ /* kernel-side HPT needed, but couldn't allocate one */
+ error_setg_errno(errp, errno, "Failed to allocate KVM HPT of order %d",
+ shift);
+ error_append_hint(errp, "Try smaller maxmem?\n");
+ return -errno;
+ } else if (rc > 0) {
+ /* kernel-side HPT allocated */
+ if (rc != shift) {
+ error_setg(errp,
+ "Requested order %d HPT, but kernel allocated order %ld",
+ shift, rc);
+ error_append_hint(errp, "Try smaller maxmem?\n");
+ return -ENOSPC;
+ }
+
+ spapr->htab_shift = shift;
+ spapr->htab = NULL;
+ } else {
+ /* kernel-side HPT not needed, allocate in userspace instead */
+ size_t size = 1ULL << shift;
+ int i;
+
+ spapr->htab = qemu_memalign(size, size);
+ memset(spapr->htab, 0, size);
+ spapr->htab_shift = shift;
+
+ for (i = 0; i < size / HASH_PTE_SIZE_64; i++) {
+ DIRTY_HPTE(HPTE(spapr->htab, i));
+ }
+ }
+ /* We're setting up a hash table, so that means we're not radix */
+ spapr->patb_entry = 0;
+ spapr_set_all_lpcrs(0, LPCR_HR | LPCR_UPRT);
+ return 0;
+}
+
+void spapr_setup_hpt(SpaprMachineState *spapr)
+{
+ int hpt_shift;
+
+ if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DISABLED) {
+ hpt_shift = spapr_hpt_shift_for_ramsize(MACHINE(spapr)->maxram_size);
+ } else {
+ uint64_t current_ram_size;
+
+ current_ram_size = MACHINE(spapr)->ram_size + get_plugged_memory_size();
+ hpt_shift = spapr_hpt_shift_for_ramsize(current_ram_size);
+ }
+ spapr_reallocate_hpt(spapr, hpt_shift, &error_fatal);
+
+ if (kvm_enabled()) {
+ hwaddr vrma_limit = kvmppc_vrma_limit(spapr->htab_shift);
+
+ /* Check our RMA fits in the possible VRMA */
+ if (vrma_limit < spapr->rma_size) {
+ error_report("Unable to create %" HWADDR_PRIu
+ "MiB RMA (VRMA only allows %" HWADDR_PRIu "MiB",
+ spapr->rma_size / MiB, vrma_limit / MiB);
+ exit(EXIT_FAILURE);
+ }
+ }
+}
+
+void spapr_check_mmu_mode(bool guest_radix)
+{
+ if (guest_radix) {
+ if (kvm_enabled() && !kvmppc_has_cap_mmu_radix()) {
+ error_report("Guest requested unavailable MMU mode (radix).");
+ exit(EXIT_FAILURE);
+ }
+ } else {
+ if (kvm_enabled() && kvmppc_has_cap_mmu_radix()
+ && !kvmppc_has_cap_mmu_hash_v3()) {
+ error_report("Guest requested unavailable MMU mode (hash).");
+ exit(EXIT_FAILURE);
+ }
+ }
+}
+
+static void spapr_machine_reset(MachineState *machine)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(machine);
+ PowerPCCPU *first_ppc_cpu;
+ hwaddr fdt_addr;
+ void *fdt;
+ int rc;
+
+ pef_kvm_reset(machine->cgs, &error_fatal);
+ spapr_caps_apply(spapr);
+
+ first_ppc_cpu = POWERPC_CPU(first_cpu);
+ if (kvm_enabled() && kvmppc_has_cap_mmu_radix() &&
+ ppc_type_check_compat(machine->cpu_type, CPU_POWERPC_LOGICAL_3_00, 0,
+ spapr->max_compat_pvr)) {
+ /*
+ * If using KVM with radix mode available, VCPUs can be started
+ * without a HPT because KVM will start them in radix mode.
+ * Set the GR bit in PATE so that we know there is no HPT.
+ */
+ spapr->patb_entry = PATE1_GR;
+ spapr_set_all_lpcrs(LPCR_HR | LPCR_UPRT, LPCR_HR | LPCR_UPRT);
+ } else {
+ spapr_setup_hpt(spapr);
+ }
+
+ qemu_devices_reset();
+
+ spapr_ovec_cleanup(spapr->ov5_cas);
+ spapr->ov5_cas = spapr_ovec_new();
+
+ ppc_set_compat_all(spapr->max_compat_pvr, &error_fatal);
+
+ /*
+ * This is fixing some of the default configuration of the XIVE
+ * devices. To be called after the reset of the machine devices.
+ */
+ spapr_irq_reset(spapr, &error_fatal);
+
+ /*
+ * There is no CAS under qtest. Simulate one to please the code that
+ * depends on spapr->ov5_cas. This is especially needed to test device
+ * unplug, so we do that before resetting the DRCs.
+ */
+ if (qtest_enabled()) {
+ spapr_ovec_cleanup(spapr->ov5_cas);
+ spapr->ov5_cas = spapr_ovec_clone(spapr->ov5);
+ }
+
+ /* DRC reset may cause a device to be unplugged. This will cause troubles
+ * if this device is used by another device (eg, a running vhost backend
+ * will crash QEMU if the DIMM holding the vring goes away). To avoid such
+ * situations, we reset DRCs after all devices have been reset.
+ */
+ spapr_drc_reset_all(spapr);
+
+ spapr_clear_pending_events(spapr);
+
+ /*
+ * We place the device tree just below either the top of the RMA,
+ * or just below 2GB, whichever is lower, so that it can be
+ * processed with 32-bit real mode code if necessary
+ */
+ fdt_addr = MIN(spapr->rma_size, FDT_MAX_ADDR) - FDT_MAX_SIZE;
+
+ fdt = spapr_build_fdt(spapr, true, FDT_MAX_SIZE);
+ if (spapr->vof) {
+ spapr_vof_reset(spapr, fdt, &error_fatal);
+ /*
+ * Do not pack the FDT as the client may change properties.
+ * VOF client does not expect the FDT so we do not load it to the VM.
+ */
+ } else {
+ rc = fdt_pack(fdt);
+ /* Should only fail if we've built a corrupted tree */
+ assert(rc == 0);
+
+ spapr_cpu_set_entry_state(first_ppc_cpu, SPAPR_ENTRY_POINT,
+ 0, fdt_addr, 0);
+ cpu_physical_memory_write(fdt_addr, fdt, fdt_totalsize(fdt));
+ }
+ qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt));
+
+ g_free(spapr->fdt_blob);
+ spapr->fdt_size = fdt_totalsize(fdt);
+ spapr->fdt_initial_size = spapr->fdt_size;
+ spapr->fdt_blob = fdt;
+
+ /* Set up the entry state */
+ first_ppc_cpu->env.gpr[5] = 0;
+
+ spapr->fwnmi_system_reset_addr = -1;
+ spapr->fwnmi_machine_check_addr = -1;
+ spapr->fwnmi_machine_check_interlock = -1;
+
+ /* Signal all vCPUs waiting on this condition */
+ qemu_cond_broadcast(&spapr->fwnmi_machine_check_interlock_cond);
+
+ migrate_del_blocker(spapr->fwnmi_migration_blocker);
+}
+
+static void spapr_create_nvram(SpaprMachineState *spapr)
+{
+ DeviceState *dev = qdev_new("spapr-nvram");
+ DriveInfo *dinfo = drive_get(IF_PFLASH, 0, 0);
+
+ if (dinfo) {
+ qdev_prop_set_drive_err(dev, "drive", blk_by_legacy_dinfo(dinfo),
+ &error_fatal);
+ }
+
+ qdev_realize_and_unref(dev, &spapr->vio_bus->bus, &error_fatal);
+
+ spapr->nvram = (struct SpaprNvram *)dev;
+}
+
+static void spapr_rtc_create(SpaprMachineState *spapr)
+{
+ object_initialize_child_with_props(OBJECT(spapr), "rtc", &spapr->rtc,
+ sizeof(spapr->rtc), TYPE_SPAPR_RTC,
+ &error_fatal, NULL);
+ qdev_realize(DEVICE(&spapr->rtc), NULL, &error_fatal);
+ object_property_add_alias(OBJECT(spapr), "rtc-time", OBJECT(&spapr->rtc),
+ "date");
+}
+
+/* Returns whether we want to use VGA or not */
+static bool spapr_vga_init(PCIBus *pci_bus, Error **errp)
+{
+ switch (vga_interface_type) {
+ case VGA_NONE:
+ return false;
+ case VGA_DEVICE:
+ return true;
+ case VGA_STD:
+ case VGA_VIRTIO:
+ case VGA_CIRRUS:
+ return pci_vga_init(pci_bus) != NULL;
+ default:
+ error_setg(errp,
+ "Unsupported VGA mode, only -vga std or -vga virtio is supported");
+ return false;
+ }
+}
+
+static int spapr_pre_load(void *opaque)
+{
+ int rc;
+
+ rc = spapr_caps_pre_load(opaque);
+ if (rc) {
+ return rc;
+ }
+
+ return 0;
+}
+
+static int spapr_post_load(void *opaque, int version_id)
+{
+ SpaprMachineState *spapr = (SpaprMachineState *)opaque;
+ int err = 0;
+
+ err = spapr_caps_post_migration(spapr);
+ if (err) {
+ return err;
+ }
+
+ /*
+ * In earlier versions, there was no separate qdev for the PAPR
+ * RTC, so the RTC offset was stored directly in sPAPREnvironment.
+ * So when migrating from those versions, poke the incoming offset
+ * value into the RTC device
+ */
+ if (version_id < 3) {
+ err = spapr_rtc_import_offset(&spapr->rtc, spapr->rtc_offset);
+ if (err) {
+ return err;
+ }
+ }
+
+ if (kvm_enabled() && spapr->patb_entry) {
+ PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
+ bool radix = !!(spapr->patb_entry & PATE1_GR);
+ bool gtse = !!(cpu->env.spr[SPR_LPCR] & LPCR_GTSE);
+
+ /*
+ * Update LPCR:HR and UPRT as they may not be set properly in
+ * the stream
+ */
+ spapr_set_all_lpcrs(radix ? (LPCR_HR | LPCR_UPRT) : 0,
+ LPCR_HR | LPCR_UPRT);
+
+ err = kvmppc_configure_v3_mmu(cpu, radix, gtse, spapr->patb_entry);
+ if (err) {
+ error_report("Process table config unsupported by the host");
+ return -EINVAL;
+ }
+ }
+
+ err = spapr_irq_post_load(spapr, version_id);
+ if (err) {
+ return err;
+ }
+
+ return err;
+}
+
+static int spapr_pre_save(void *opaque)
+{
+ int rc;
+
+ rc = spapr_caps_pre_save(opaque);
+ if (rc) {
+ return rc;
+ }
+
+ return 0;
+}
+
+static bool version_before_3(void *opaque, int version_id)
+{
+ return version_id < 3;
+}
+
+static bool spapr_pending_events_needed(void *opaque)
+{
+ SpaprMachineState *spapr = (SpaprMachineState *)opaque;
+ return !QTAILQ_EMPTY(&spapr->pending_events);
+}
+
+static const VMStateDescription vmstate_spapr_event_entry = {
+ .name = "spapr_event_log_entry",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT32(summary, SpaprEventLogEntry),
+ VMSTATE_UINT32(extended_length, SpaprEventLogEntry),
+ VMSTATE_VBUFFER_ALLOC_UINT32(extended_log, SpaprEventLogEntry, 0,
+ NULL, extended_length),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+static const VMStateDescription vmstate_spapr_pending_events = {
+ .name = "spapr_pending_events",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = spapr_pending_events_needed,
+ .fields = (VMStateField[]) {
+ VMSTATE_QTAILQ_V(pending_events, SpaprMachineState, 1,
+ vmstate_spapr_event_entry, SpaprEventLogEntry, next),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+static bool spapr_ov5_cas_needed(void *opaque)
+{
+ SpaprMachineState *spapr = opaque;
+ SpaprOptionVector *ov5_mask = spapr_ovec_new();
+ bool cas_needed;
+
+ /* Prior to the introduction of SpaprOptionVector, we had two option
+ * vectors we dealt with: OV5_FORM1_AFFINITY, and OV5_DRCONF_MEMORY.
+ * Both of these options encode machine topology into the device-tree
+ * in such a way that the now-booted OS should still be able to interact
+ * appropriately with QEMU regardless of what options were actually
+ * negotiatied on the source side.
+ *
+ * As such, we can avoid migrating the CAS-negotiated options if these
+ * are the only options available on the current machine/platform.
+ * Since these are the only options available for pseries-2.7 and
+ * earlier, this allows us to maintain old->new/new->old migration
+ * compatibility.
+ *
+ * For QEMU 2.8+, there are additional CAS-negotiatable options available
+ * via default pseries-2.8 machines and explicit command-line parameters.
+ * Some of these options, like OV5_HP_EVT, *do* require QEMU to be aware
+ * of the actual CAS-negotiated values to continue working properly. For
+ * example, availability of memory unplug depends on knowing whether
+ * OV5_HP_EVT was negotiated via CAS.
+ *
+ * Thus, for any cases where the set of available CAS-negotiatable
+ * options extends beyond OV5_FORM1_AFFINITY and OV5_DRCONF_MEMORY, we
+ * include the CAS-negotiated options in the migration stream, unless
+ * if they affect boot time behaviour only.
+ */
+ spapr_ovec_set(ov5_mask, OV5_FORM1_AFFINITY);
+ spapr_ovec_set(ov5_mask, OV5_DRCONF_MEMORY);
+ spapr_ovec_set(ov5_mask, OV5_DRMEM_V2);
+
+ /* We need extra information if we have any bits outside the mask
+ * defined above */
+ cas_needed = !spapr_ovec_subset(spapr->ov5, ov5_mask);
+
+ spapr_ovec_cleanup(ov5_mask);
+
+ return cas_needed;
+}
+
+static const VMStateDescription vmstate_spapr_ov5_cas = {
+ .name = "spapr_option_vector_ov5_cas",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = spapr_ov5_cas_needed,
+ .fields = (VMStateField[]) {
+ VMSTATE_STRUCT_POINTER_V(ov5_cas, SpaprMachineState, 1,
+ vmstate_spapr_ovec, SpaprOptionVector),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+static bool spapr_patb_entry_needed(void *opaque)
+{
+ SpaprMachineState *spapr = opaque;
+
+ return !!spapr->patb_entry;
+}
+
+static const VMStateDescription vmstate_spapr_patb_entry = {
+ .name = "spapr_patb_entry",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = spapr_patb_entry_needed,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT64(patb_entry, SpaprMachineState),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+static bool spapr_irq_map_needed(void *opaque)
+{
+ SpaprMachineState *spapr = opaque;
+
+ return spapr->irq_map && !bitmap_empty(spapr->irq_map, spapr->irq_map_nr);
+}
+
+static const VMStateDescription vmstate_spapr_irq_map = {
+ .name = "spapr_irq_map",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = spapr_irq_map_needed,
+ .fields = (VMStateField[]) {
+ VMSTATE_BITMAP(irq_map, SpaprMachineState, 0, irq_map_nr),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+static bool spapr_dtb_needed(void *opaque)
+{
+ SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(opaque);
+
+ return smc->update_dt_enabled;
+}
+
+static int spapr_dtb_pre_load(void *opaque)
+{
+ SpaprMachineState *spapr = (SpaprMachineState *)opaque;
+
+ g_free(spapr->fdt_blob);
+ spapr->fdt_blob = NULL;
+ spapr->fdt_size = 0;
+
+ return 0;
+}
+
+static const VMStateDescription vmstate_spapr_dtb = {
+ .name = "spapr_dtb",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = spapr_dtb_needed,
+ .pre_load = spapr_dtb_pre_load,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT32(fdt_initial_size, SpaprMachineState),
+ VMSTATE_UINT32(fdt_size, SpaprMachineState),
+ VMSTATE_VBUFFER_ALLOC_UINT32(fdt_blob, SpaprMachineState, 0, NULL,
+ fdt_size),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+static bool spapr_fwnmi_needed(void *opaque)
+{
+ SpaprMachineState *spapr = (SpaprMachineState *)opaque;
+
+ return spapr->fwnmi_machine_check_addr != -1;
+}
+
+static int spapr_fwnmi_pre_save(void *opaque)
+{
+ SpaprMachineState *spapr = (SpaprMachineState *)opaque;
+
+ /*
+ * Check if machine check handling is in progress and print a
+ * warning message.
+ */
+ if (spapr->fwnmi_machine_check_interlock != -1) {
+ warn_report("A machine check is being handled during migration. The"
+ "handler may run and log hardware error on the destination");
+ }
+
+ return 0;
+}
+
+static const VMStateDescription vmstate_spapr_fwnmi = {
+ .name = "spapr_fwnmi",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = spapr_fwnmi_needed,
+ .pre_save = spapr_fwnmi_pre_save,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT64(fwnmi_system_reset_addr, SpaprMachineState),
+ VMSTATE_UINT64(fwnmi_machine_check_addr, SpaprMachineState),
+ VMSTATE_INT32(fwnmi_machine_check_interlock, SpaprMachineState),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+static const VMStateDescription vmstate_spapr = {
+ .name = "spapr",
+ .version_id = 3,
+ .minimum_version_id = 1,
+ .pre_load = spapr_pre_load,
+ .post_load = spapr_post_load,
+ .pre_save = spapr_pre_save,
+ .fields = (VMStateField[]) {
+ /* used to be @next_irq */
+ VMSTATE_UNUSED_BUFFER(version_before_3, 0, 4),
+
+ /* RTC offset */
+ VMSTATE_UINT64_TEST(rtc_offset, SpaprMachineState, version_before_3),
+
+ VMSTATE_PPC_TIMEBASE_V(tb, SpaprMachineState, 2),
+ VMSTATE_END_OF_LIST()
+ },
+ .subsections = (const VMStateDescription*[]) {
+ &vmstate_spapr_ov5_cas,
+ &vmstate_spapr_patb_entry,
+ &vmstate_spapr_pending_events,
+ &vmstate_spapr_cap_htm,
+ &vmstate_spapr_cap_vsx,
+ &vmstate_spapr_cap_dfp,
+ &vmstate_spapr_cap_cfpc,
+ &vmstate_spapr_cap_sbbc,
+ &vmstate_spapr_cap_ibs,
+ &vmstate_spapr_cap_hpt_maxpagesize,
+ &vmstate_spapr_irq_map,
+ &vmstate_spapr_cap_nested_kvm_hv,
+ &vmstate_spapr_dtb,
+ &vmstate_spapr_cap_large_decr,
+ &vmstate_spapr_cap_ccf_assist,
+ &vmstate_spapr_cap_fwnmi,
+ &vmstate_spapr_fwnmi,
+ &vmstate_spapr_cap_rpt_invalidate,
+ NULL
+ }
+};
+
+static int htab_save_setup(QEMUFile *f, void *opaque)
+{
+ SpaprMachineState *spapr = opaque;
+
+ /* "Iteration" header */
+ if (!spapr->htab_shift) {
+ qemu_put_be32(f, -1);
+ } else {
+ qemu_put_be32(f, spapr->htab_shift);
+ }
+
+ if (spapr->htab) {
+ spapr->htab_save_index = 0;
+ spapr->htab_first_pass = true;
+ } else {
+ if (spapr->htab_shift) {
+ assert(kvm_enabled());
+ }
+ }
+
+
+ return 0;
+}
+
+static void htab_save_chunk(QEMUFile *f, SpaprMachineState *spapr,
+ int chunkstart, int n_valid, int n_invalid)
+{
+ qemu_put_be32(f, chunkstart);
+ qemu_put_be16(f, n_valid);
+ qemu_put_be16(f, n_invalid);
+ qemu_put_buffer(f, HPTE(spapr->htab, chunkstart),
+ HASH_PTE_SIZE_64 * n_valid);
+}
+
+static void htab_save_end_marker(QEMUFile *f)
+{
+ qemu_put_be32(f, 0);
+ qemu_put_be16(f, 0);
+ qemu_put_be16(f, 0);
+}
+
+static void htab_save_first_pass(QEMUFile *f, SpaprMachineState *spapr,
+ int64_t max_ns)
+{
+ bool has_timeout = max_ns != -1;
+ int htabslots = HTAB_SIZE(spapr) / HASH_PTE_SIZE_64;
+ int index = spapr->htab_save_index;
+ int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+
+ assert(spapr->htab_first_pass);
+
+ do {
+ int chunkstart;
+
+ /* Consume invalid HPTEs */
+ while ((index < htabslots)
+ && !HPTE_VALID(HPTE(spapr->htab, index))) {
+ CLEAN_HPTE(HPTE(spapr->htab, index));
+ index++;
+ }
+
+ /* Consume valid HPTEs */
+ chunkstart = index;
+ while ((index < htabslots) && (index - chunkstart < USHRT_MAX)
+ && HPTE_VALID(HPTE(spapr->htab, index))) {
+ CLEAN_HPTE(HPTE(spapr->htab, index));
+ index++;
+ }
+
+ if (index > chunkstart) {
+ int n_valid = index - chunkstart;
+
+ htab_save_chunk(f, spapr, chunkstart, n_valid, 0);
+
+ if (has_timeout &&
+ (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) > max_ns) {
+ break;
+ }
+ }
+ } while ((index < htabslots) && !qemu_file_rate_limit(f));
+
+ if (index >= htabslots) {
+ assert(index == htabslots);
+ index = 0;
+ spapr->htab_first_pass = false;
+ }
+ spapr->htab_save_index = index;
+}
+
+static int htab_save_later_pass(QEMUFile *f, SpaprMachineState *spapr,
+ int64_t max_ns)
+{
+ bool final = max_ns < 0;
+ int htabslots = HTAB_SIZE(spapr) / HASH_PTE_SIZE_64;
+ int examined = 0, sent = 0;
+ int index = spapr->htab_save_index;
+ int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+
+ assert(!spapr->htab_first_pass);
+
+ do {
+ int chunkstart, invalidstart;
+
+ /* Consume non-dirty HPTEs */
+ while ((index < htabslots)
+ && !HPTE_DIRTY(HPTE(spapr->htab, index))) {
+ index++;
+ examined++;
+ }
+
+ chunkstart = index;
+ /* Consume valid dirty HPTEs */
+ while ((index < htabslots) && (index - chunkstart < USHRT_MAX)
+ && HPTE_DIRTY(HPTE(spapr->htab, index))
+ && HPTE_VALID(HPTE(spapr->htab, index))) {
+ CLEAN_HPTE(HPTE(spapr->htab, index));
+ index++;
+ examined++;
+ }
+
+ invalidstart = index;
+ /* Consume invalid dirty HPTEs */
+ while ((index < htabslots) && (index - invalidstart < USHRT_MAX)
+ && HPTE_DIRTY(HPTE(spapr->htab, index))
+ && !HPTE_VALID(HPTE(spapr->htab, index))) {
+ CLEAN_HPTE(HPTE(spapr->htab, index));
+ index++;
+ examined++;
+ }
+
+ if (index > chunkstart) {
+ int n_valid = invalidstart - chunkstart;
+ int n_invalid = index - invalidstart;
+
+ htab_save_chunk(f, spapr, chunkstart, n_valid, n_invalid);
+ sent += index - chunkstart;
+
+ if (!final && (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) > max_ns) {
+ break;
+ }
+ }
+
+ if (examined >= htabslots) {
+ break;
+ }
+
+ if (index >= htabslots) {
+ assert(index == htabslots);
+ index = 0;
+ }
+ } while ((examined < htabslots) && (!qemu_file_rate_limit(f) || final));
+
+ if (index >= htabslots) {
+ assert(index == htabslots);
+ index = 0;
+ }
+
+ spapr->htab_save_index = index;
+
+ return (examined >= htabslots) && (sent == 0) ? 1 : 0;
+}
+
+#define MAX_ITERATION_NS 5000000 /* 5 ms */
+#define MAX_KVM_BUF_SIZE 2048
+
+static int htab_save_iterate(QEMUFile *f, void *opaque)
+{
+ SpaprMachineState *spapr = opaque;
+ int fd;
+ int rc = 0;
+
+ /* Iteration header */
+ if (!spapr->htab_shift) {
+ qemu_put_be32(f, -1);
+ return 1;
+ } else {
+ qemu_put_be32(f, 0);
+ }
+
+ if (!spapr->htab) {
+ assert(kvm_enabled());
+
+ fd = get_htab_fd(spapr);
+ if (fd < 0) {
+ return fd;
+ }
+
+ rc = kvmppc_save_htab(f, fd, MAX_KVM_BUF_SIZE, MAX_ITERATION_NS);
+ if (rc < 0) {
+ return rc;
+ }
+ } else if (spapr->htab_first_pass) {
+ htab_save_first_pass(f, spapr, MAX_ITERATION_NS);
+ } else {
+ rc = htab_save_later_pass(f, spapr, MAX_ITERATION_NS);
+ }
+
+ htab_save_end_marker(f);
+
+ return rc;
+}
+
+static int htab_save_complete(QEMUFile *f, void *opaque)
+{
+ SpaprMachineState *spapr = opaque;
+ int fd;
+
+ /* Iteration header */
+ if (!spapr->htab_shift) {
+ qemu_put_be32(f, -1);
+ return 0;
+ } else {
+ qemu_put_be32(f, 0);
+ }
+
+ if (!spapr->htab) {
+ int rc;
+
+ assert(kvm_enabled());
+
+ fd = get_htab_fd(spapr);
+ if (fd < 0) {
+ return fd;
+ }
+
+ rc = kvmppc_save_htab(f, fd, MAX_KVM_BUF_SIZE, -1);
+ if (rc < 0) {
+ return rc;
+ }
+ } else {
+ if (spapr->htab_first_pass) {
+ htab_save_first_pass(f, spapr, -1);
+ }
+ htab_save_later_pass(f, spapr, -1);
+ }
+
+ /* End marker */
+ htab_save_end_marker(f);
+
+ return 0;
+}
+
+static int htab_load(QEMUFile *f, void *opaque, int version_id)
+{
+ SpaprMachineState *spapr = opaque;
+ uint32_t section_hdr;
+ int fd = -1;
+ Error *local_err = NULL;
+
+ if (version_id < 1 || version_id > 1) {
+ error_report("htab_load() bad version");
+ return -EINVAL;
+ }
+
+ section_hdr = qemu_get_be32(f);
+
+ if (section_hdr == -1) {
+ spapr_free_hpt(spapr);
+ return 0;
+ }
+
+ if (section_hdr) {
+ int ret;
+
+ /* First section gives the htab size */
+ ret = spapr_reallocate_hpt(spapr, section_hdr, &local_err);
+ if (ret < 0) {
+ error_report_err(local_err);
+ return ret;
+ }
+ return 0;
+ }
+
+ if (!spapr->htab) {
+ assert(kvm_enabled());
+
+ fd = kvmppc_get_htab_fd(true, 0, &local_err);
+ if (fd < 0) {
+ error_report_err(local_err);
+ return fd;
+ }
+ }
+
+ while (true) {
+ uint32_t index;
+ uint16_t n_valid, n_invalid;
+
+ index = qemu_get_be32(f);
+ n_valid = qemu_get_be16(f);
+ n_invalid = qemu_get_be16(f);
+
+ if ((index == 0) && (n_valid == 0) && (n_invalid == 0)) {
+ /* End of Stream */
+ break;
+ }
+
+ if ((index + n_valid + n_invalid) >
+ (HTAB_SIZE(spapr) / HASH_PTE_SIZE_64)) {
+ /* Bad index in stream */
+ error_report(
+ "htab_load() bad index %d (%hd+%hd entries) in htab stream (htab_shift=%d)",
+ index, n_valid, n_invalid, spapr->htab_shift);
+ return -EINVAL;
+ }
+
+ if (spapr->htab) {
+ if (n_valid) {
+ qemu_get_buffer(f, HPTE(spapr->htab, index),
+ HASH_PTE_SIZE_64 * n_valid);
+ }
+ if (n_invalid) {
+ memset(HPTE(spapr->htab, index + n_valid), 0,
+ HASH_PTE_SIZE_64 * n_invalid);
+ }
+ } else {
+ int rc;
+
+ assert(fd >= 0);
+
+ rc = kvmppc_load_htab_chunk(f, fd, index, n_valid, n_invalid,
+ &local_err);
+ if (rc < 0) {
+ error_report_err(local_err);
+ return rc;
+ }
+ }
+ }
+
+ if (!spapr->htab) {
+ assert(fd >= 0);
+ close(fd);
+ }
+
+ return 0;
+}
+
+static void htab_save_cleanup(void *opaque)
+{
+ SpaprMachineState *spapr = opaque;
+
+ close_htab_fd(spapr);
+}
+
+static SaveVMHandlers savevm_htab_handlers = {
+ .save_setup = htab_save_setup,
+ .save_live_iterate = htab_save_iterate,
+ .save_live_complete_precopy = htab_save_complete,
+ .save_cleanup = htab_save_cleanup,
+ .load_state = htab_load,
+};
+
+static void spapr_boot_set(void *opaque, const char *boot_device,
+ Error **errp)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(opaque);
+
+ g_free(spapr->boot_device);
+ spapr->boot_device = g_strdup(boot_device);
+}
+
+static void spapr_create_lmb_dr_connectors(SpaprMachineState *spapr)
+{
+ MachineState *machine = MACHINE(spapr);
+ uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE;
+ uint32_t nr_lmbs = (machine->maxram_size - machine->ram_size)/lmb_size;
+ int i;
+
+ for (i = 0; i < nr_lmbs; i++) {
+ uint64_t addr;
+
+ addr = i * lmb_size + machine->device_memory->base;
+ spapr_dr_connector_new(OBJECT(spapr), TYPE_SPAPR_DRC_LMB,
+ addr / lmb_size);
+ }
+}
+
+/*
+ * If RAM size, maxmem size and individual node mem sizes aren't aligned
+ * to SPAPR_MEMORY_BLOCK_SIZE(256MB), then refuse to start the guest
+ * since we can't support such unaligned sizes with DRCONF_MEMORY.
+ */
+static void spapr_validate_node_memory(MachineState *machine, Error **errp)
+{
+ int i;
+
+ if (machine->ram_size % SPAPR_MEMORY_BLOCK_SIZE) {
+ error_setg(errp, "Memory size 0x" RAM_ADDR_FMT
+ " is not aligned to %" PRIu64 " MiB",
+ machine->ram_size,
+ SPAPR_MEMORY_BLOCK_SIZE / MiB);
+ return;
+ }
+
+ if (machine->maxram_size % SPAPR_MEMORY_BLOCK_SIZE) {
+ error_setg(errp, "Maximum memory size 0x" RAM_ADDR_FMT
+ " is not aligned to %" PRIu64 " MiB",
+ machine->ram_size,
+ SPAPR_MEMORY_BLOCK_SIZE / MiB);
+ return;
+ }
+
+ for (i = 0; i < machine->numa_state->num_nodes; i++) {
+ if (machine->numa_state->nodes[i].node_mem % SPAPR_MEMORY_BLOCK_SIZE) {
+ error_setg(errp,
+ "Node %d memory size 0x%" PRIx64
+ " is not aligned to %" PRIu64 " MiB",
+ i, machine->numa_state->nodes[i].node_mem,
+ SPAPR_MEMORY_BLOCK_SIZE / MiB);
+ return;
+ }
+ }
+}
+
+/* find cpu slot in machine->possible_cpus by core_id */
+static CPUArchId *spapr_find_cpu_slot(MachineState *ms, uint32_t id, int *idx)
+{
+ int index = id / ms->smp.threads;
+
+ if (index >= ms->possible_cpus->len) {
+ return NULL;
+ }
+ if (idx) {
+ *idx = index;
+ }
+ return &ms->possible_cpus->cpus[index];
+}
+
+static void spapr_set_vsmt_mode(SpaprMachineState *spapr, Error **errp)
+{
+ MachineState *ms = MACHINE(spapr);
+ SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+ Error *local_err = NULL;
+ bool vsmt_user = !!spapr->vsmt;
+ int kvm_smt = kvmppc_smt_threads();
+ int ret;
+ unsigned int smp_threads = ms->smp.threads;
+
+ if (!kvm_enabled() && (smp_threads > 1)) {
+ error_setg(errp, "TCG cannot support more than 1 thread/core "
+ "on a pseries machine");
+ return;
+ }
+ if (!is_power_of_2(smp_threads)) {
+ error_setg(errp, "Cannot support %d threads/core on a pseries "
+ "machine because it must be a power of 2", smp_threads);
+ return;
+ }
+
+ /* Detemine the VSMT mode to use: */
+ if (vsmt_user) {
+ if (spapr->vsmt < smp_threads) {
+ error_setg(errp, "Cannot support VSMT mode %d"
+ " because it must be >= threads/core (%d)",
+ spapr->vsmt, smp_threads);
+ return;
+ }
+ /* In this case, spapr->vsmt has been set by the command line */
+ } else if (!smc->smp_threads_vsmt) {
+ /*
+ * Default VSMT value is tricky, because we need it to be as
+ * consistent as possible (for migration), but this requires
+ * changing it for at least some existing cases. We pick 8 as
+ * the value that we'd get with KVM on POWER8, the
+ * overwhelmingly common case in production systems.
+ */
+ spapr->vsmt = MAX(8, smp_threads);
+ } else {
+ spapr->vsmt = smp_threads;
+ }
+
+ /* KVM: If necessary, set the SMT mode: */
+ if (kvm_enabled() && (spapr->vsmt != kvm_smt)) {
+ ret = kvmppc_set_smt_threads(spapr->vsmt);
+ if (ret) {
+ /* Looks like KVM isn't able to change VSMT mode */
+ error_setg(&local_err,
+ "Failed to set KVM's VSMT mode to %d (errno %d)",
+ spapr->vsmt, ret);
+ /* We can live with that if the default one is big enough
+ * for the number of threads, and a submultiple of the one
+ * we want. In this case we'll waste some vcpu ids, but
+ * behaviour will be correct */
+ if ((kvm_smt >= smp_threads) && ((spapr->vsmt % kvm_smt) == 0)) {
+ warn_report_err(local_err);
+ } else {
+ if (!vsmt_user) {
+ error_append_hint(&local_err,
+ "On PPC, a VM with %d threads/core"
+ " on a host with %d threads/core"
+ " requires the use of VSMT mode %d.\n",
+ smp_threads, kvm_smt, spapr->vsmt);
+ }
+ kvmppc_error_append_smt_possible_hint(&local_err);
+ error_propagate(errp, local_err);
+ }
+ }
+ }
+ /* else TCG: nothing to do currently */
+}
+
+static void spapr_init_cpus(SpaprMachineState *spapr)
+{
+ MachineState *machine = MACHINE(spapr);
+ MachineClass *mc = MACHINE_GET_CLASS(machine);
+ SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine);
+ const char *type = spapr_get_cpu_core_type(machine->cpu_type);
+ const CPUArchIdList *possible_cpus;
+ unsigned int smp_cpus = machine->smp.cpus;
+ unsigned int smp_threads = machine->smp.threads;
+ unsigned int max_cpus = machine->smp.max_cpus;
+ int boot_cores_nr = smp_cpus / smp_threads;
+ int i;
+
+ possible_cpus = mc->possible_cpu_arch_ids(machine);
+ if (mc->has_hotpluggable_cpus) {
+ if (smp_cpus % smp_threads) {
+ error_report("smp_cpus (%u) must be multiple of threads (%u)",
+ smp_cpus, smp_threads);
+ exit(1);
+ }
+ if (max_cpus % smp_threads) {
+ error_report("max_cpus (%u) must be multiple of threads (%u)",
+ max_cpus, smp_threads);
+ exit(1);
+ }
+ } else {
+ if (max_cpus != smp_cpus) {
+ error_report("This machine version does not support CPU hotplug");
+ exit(1);
+ }
+ boot_cores_nr = possible_cpus->len;
+ }
+
+ if (smc->pre_2_10_has_unused_icps) {
+ int i;
+
+ for (i = 0; i < spapr_max_server_number(spapr); i++) {
+ /* Dummy entries get deregistered when real ICPState objects
+ * are registered during CPU core hotplug.
+ */
+ pre_2_10_vmstate_register_dummy_icp(i);
+ }
+ }
+
+ for (i = 0; i < possible_cpus->len; i++) {
+ int core_id = i * smp_threads;
+
+ if (mc->has_hotpluggable_cpus) {
+ spapr_dr_connector_new(OBJECT(spapr), TYPE_SPAPR_DRC_CPU,
+ spapr_vcpu_id(spapr, core_id));
+ }
+
+ if (i < boot_cores_nr) {
+ Object *core = object_new(type);
+ int nr_threads = smp_threads;
+
+ /* Handle the partially filled core for older machine types */
+ if ((i + 1) * smp_threads >= smp_cpus) {
+ nr_threads = smp_cpus - i * smp_threads;
+ }
+
+ object_property_set_int(core, "nr-threads", nr_threads,
+ &error_fatal);
+ object_property_set_int(core, CPU_CORE_PROP_CORE_ID, core_id,
+ &error_fatal);
+ qdev_realize(DEVICE(core), NULL, &error_fatal);
+
+ object_unref(core);
+ }
+ }
+}
+
+static PCIHostState *spapr_create_default_phb(void)
+{
+ DeviceState *dev;
+
+ dev = qdev_new(TYPE_SPAPR_PCI_HOST_BRIDGE);
+ qdev_prop_set_uint32(dev, "index", 0);
+ sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+
+ return PCI_HOST_BRIDGE(dev);
+}
+
+static hwaddr spapr_rma_size(SpaprMachineState *spapr, Error **errp)
+{
+ MachineState *machine = MACHINE(spapr);
+ SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+ hwaddr rma_size = machine->ram_size;
+ hwaddr node0_size = spapr_node0_size(machine);
+
+ /* RMA has to fit in the first NUMA node */
+ rma_size = MIN(rma_size, node0_size);
+
+ /*
+ * VRMA access is via a special 1TiB SLB mapping, so the RMA can
+ * never exceed that
+ */
+ rma_size = MIN(rma_size, 1 * TiB);
+
+ /*
+ * Clamp the RMA size based on machine type. This is for
+ * migration compatibility with older qemu versions, which limited
+ * the RMA size for complicated and mostly bad reasons.
+ */
+ if (smc->rma_limit) {
+ rma_size = MIN(rma_size, smc->rma_limit);
+ }
+
+ if (rma_size < MIN_RMA_SLOF) {
+ error_setg(errp,
+ "pSeries SLOF firmware requires >= %" HWADDR_PRIx
+ "ldMiB guest RMA (Real Mode Area memory)",
+ MIN_RMA_SLOF / MiB);
+ return 0;
+ }
+
+ return rma_size;
+}
+
+static void spapr_create_nvdimm_dr_connectors(SpaprMachineState *spapr)
+{
+ MachineState *machine = MACHINE(spapr);
+ int i;
+
+ for (i = 0; i < machine->ram_slots; i++) {
+ spapr_dr_connector_new(OBJECT(spapr), TYPE_SPAPR_DRC_PMEM, i);
+ }
+}
+
+/* pSeries LPAR / sPAPR hardware init */
+static void spapr_machine_init(MachineState *machine)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(machine);
+ SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine);
+ MachineClass *mc = MACHINE_GET_CLASS(machine);
+ const char *bios_default = spapr->vof ? FW_FILE_NAME_VOF : FW_FILE_NAME;
+ const char *bios_name = machine->firmware ?: bios_default;
+ const char *kernel_filename = machine->kernel_filename;
+ const char *initrd_filename = machine->initrd_filename;
+ PCIHostState *phb;
+ int i;
+ MemoryRegion *sysmem = get_system_memory();
+ long load_limit, fw_size;
+ char *filename;
+ Error *resize_hpt_err = NULL;
+
+ /*
+ * if Secure VM (PEF) support is configured, then initialize it
+ */
+ pef_kvm_init(machine->cgs, &error_fatal);
+
+ msi_nonbroken = true;
+
+ QLIST_INIT(&spapr->phbs);
+ QTAILQ_INIT(&spapr->pending_dimm_unplugs);
+
+ /* Determine capabilities to run with */
+ spapr_caps_init(spapr);
+
+ kvmppc_check_papr_resize_hpt(&resize_hpt_err);
+ if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DEFAULT) {
+ /*
+ * If the user explicitly requested a mode we should either
+ * supply it, or fail completely (which we do below). But if
+ * it's not set explicitly, we reset our mode to something
+ * that works
+ */
+ if (resize_hpt_err) {
+ spapr->resize_hpt = SPAPR_RESIZE_HPT_DISABLED;
+ error_free(resize_hpt_err);
+ resize_hpt_err = NULL;
+ } else {
+ spapr->resize_hpt = smc->resize_hpt_default;
+ }
+ }
+
+ assert(spapr->resize_hpt != SPAPR_RESIZE_HPT_DEFAULT);
+
+ if ((spapr->resize_hpt != SPAPR_RESIZE_HPT_DISABLED) && resize_hpt_err) {
+ /*
+ * User requested HPT resize, but this host can't supply it. Bail out
+ */
+ error_report_err(resize_hpt_err);
+ exit(1);
+ }
+ error_free(resize_hpt_err);
+
+ spapr->rma_size = spapr_rma_size(spapr, &error_fatal);
+
+ /* Setup a load limit for the ramdisk leaving room for SLOF and FDT */
+ load_limit = MIN(spapr->rma_size, FDT_MAX_ADDR) - FW_OVERHEAD;
+
+ /*
+ * VSMT must be set in order to be able to compute VCPU ids, ie to
+ * call spapr_max_server_number() or spapr_vcpu_id().
+ */
+ spapr_set_vsmt_mode(spapr, &error_fatal);
+
+ /* Set up Interrupt Controller before we create the VCPUs */
+ spapr_irq_init(spapr, &error_fatal);
+
+ /* Set up containers for ibm,client-architecture-support negotiated options
+ */
+ spapr->ov5 = spapr_ovec_new();
+ spapr->ov5_cas = spapr_ovec_new();
+
+ if (smc->dr_lmb_enabled) {
+ spapr_ovec_set(spapr->ov5, OV5_DRCONF_MEMORY);
+ spapr_validate_node_memory(machine, &error_fatal);
+ }
+
+ spapr_ovec_set(spapr->ov5, OV5_FORM1_AFFINITY);
+
+ /* Do not advertise FORM2 NUMA support for pseries-6.1 and older */
+ if (!smc->pre_6_2_numa_affinity) {
+ spapr_ovec_set(spapr->ov5, OV5_FORM2_AFFINITY);
+ }
+
+ /* advertise support for dedicated HP event source to guests */
+ if (spapr->use_hotplug_event_source) {
+ spapr_ovec_set(spapr->ov5, OV5_HP_EVT);
+ }
+
+ /* advertise support for HPT resizing */
+ if (spapr->resize_hpt != SPAPR_RESIZE_HPT_DISABLED) {
+ spapr_ovec_set(spapr->ov5, OV5_HPT_RESIZE);
+ }
+
+ /* advertise support for ibm,dyamic-memory-v2 */
+ spapr_ovec_set(spapr->ov5, OV5_DRMEM_V2);
+
+ /* advertise XIVE on POWER9 machines */
+ if (spapr->irq->xive) {
+ spapr_ovec_set(spapr->ov5, OV5_XIVE_EXPLOIT);
+ }
+
+ /* init CPUs */
+ spapr_init_cpus(spapr);
+
+ spapr->gpu_numa_id = spapr_numa_initial_nvgpu_numa_id(machine);
+
+ /* Init numa_assoc_array */
+ spapr_numa_associativity_init(spapr, machine);
+
+ if ((!kvm_enabled() || kvmppc_has_cap_mmu_radix()) &&
+ ppc_type_check_compat(machine->cpu_type, CPU_POWERPC_LOGICAL_3_00, 0,
+ spapr->max_compat_pvr)) {
+ spapr_ovec_set(spapr->ov5, OV5_MMU_RADIX_300);
+ /* KVM and TCG always allow GTSE with radix... */
+ spapr_ovec_set(spapr->ov5, OV5_MMU_RADIX_GTSE);
+ }
+ /* ... but not with hash (currently). */
+
+ if (kvm_enabled()) {
+ /* Enable H_LOGICAL_CI_* so SLOF can talk to in-kernel devices */
+ kvmppc_enable_logical_ci_hcalls();
+ kvmppc_enable_set_mode_hcall();
+
+ /* H_CLEAR_MOD/_REF are mandatory in PAPR, but off by default */
+ kvmppc_enable_clear_ref_mod_hcalls();
+
+ /* Enable H_PAGE_INIT */
+ kvmppc_enable_h_page_init();
+ }
+
+ /* map RAM */
+ memory_region_add_subregion(sysmem, 0, machine->ram);
+
+ /* always allocate the device memory information */
+ machine->device_memory = g_malloc0(sizeof(*machine->device_memory));
+
+ /* initialize hotplug memory address space */
+ if (machine->ram_size < machine->maxram_size) {
+ ram_addr_t device_mem_size = machine->maxram_size - machine->ram_size;
+ /*
+ * Limit the number of hotpluggable memory slots to half the number
+ * slots that KVM supports, leaving the other half for PCI and other
+ * devices. However ensure that number of slots doesn't drop below 32.
+ */
+ int max_memslots = kvm_enabled() ? kvm_get_max_memslots() / 2 :
+ SPAPR_MAX_RAM_SLOTS;
+
+ if (max_memslots < SPAPR_MAX_RAM_SLOTS) {
+ max_memslots = SPAPR_MAX_RAM_SLOTS;
+ }
+ if (machine->ram_slots > max_memslots) {
+ error_report("Specified number of memory slots %"
+ PRIu64" exceeds max supported %d",
+ machine->ram_slots, max_memslots);
+ exit(1);
+ }
+
+ machine->device_memory->base = ROUND_UP(machine->ram_size,
+ SPAPR_DEVICE_MEM_ALIGN);
+ memory_region_init(&machine->device_memory->mr, OBJECT(spapr),
+ "device-memory", device_mem_size);
+ memory_region_add_subregion(sysmem, machine->device_memory->base,
+ &machine->device_memory->mr);
+ }
+
+ if (smc->dr_lmb_enabled) {
+ spapr_create_lmb_dr_connectors(spapr);
+ }
+
+ if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI) == SPAPR_CAP_ON) {
+ /* Create the error string for live migration blocker */
+ error_setg(&spapr->fwnmi_migration_blocker,
+ "A machine check is being handled during migration. The handler"
+ "may run and log hardware error on the destination");
+ }
+
+ if (mc->nvdimm_supported) {
+ spapr_create_nvdimm_dr_connectors(spapr);
+ }
+
+ /* Set up RTAS event infrastructure */
+ spapr_events_init(spapr);
+
+ /* Set up the RTC RTAS interfaces */
+ spapr_rtc_create(spapr);
+
+ /* Set up VIO bus */
+ spapr->vio_bus = spapr_vio_bus_init();
+
+ for (i = 0; serial_hd(i); i++) {
+ spapr_vty_create(spapr->vio_bus, serial_hd(i));
+ }
+
+ /* We always have at least the nvram device on VIO */
+ spapr_create_nvram(spapr);
+
+ /*
+ * Setup hotplug / dynamic-reconfiguration connectors. top-level
+ * connectors (described in root DT node's "ibm,drc-types" property)
+ * are pre-initialized here. additional child connectors (such as
+ * connectors for a PHBs PCI slots) are added as needed during their
+ * parent's realization.
+ */
+ if (smc->dr_phb_enabled) {
+ for (i = 0; i < SPAPR_MAX_PHBS; i++) {
+ spapr_dr_connector_new(OBJECT(machine), TYPE_SPAPR_DRC_PHB, i);
+ }
+ }
+
+ /* Set up PCI */
+ spapr_pci_rtas_init();
+
+ phb = spapr_create_default_phb();
+
+ for (i = 0; i < nb_nics; i++) {
+ NICInfo *nd = &nd_table[i];
+
+ if (!nd->model) {
+ nd->model = g_strdup("spapr-vlan");
+ }
+
+ if (g_str_equal(nd->model, "spapr-vlan") ||
+ g_str_equal(nd->model, "ibmveth")) {
+ spapr_vlan_create(spapr->vio_bus, nd);
+ } else {
+ pci_nic_init_nofail(&nd_table[i], phb->bus, nd->model, NULL);
+ }
+ }
+
+ for (i = 0; i <= drive_get_max_bus(IF_SCSI); i++) {
+ spapr_vscsi_create(spapr->vio_bus);
+ }
+
+ /* Graphics */
+ if (spapr_vga_init(phb->bus, &error_fatal)) {
+ spapr->has_graphics = true;
+ machine->usb |= defaults_enabled() && !machine->usb_disabled;
+ }
+
+ if (machine->usb) {
+ if (smc->use_ohci_by_default) {
+ pci_create_simple(phb->bus, -1, "pci-ohci");
+ } else {
+ pci_create_simple(phb->bus, -1, "nec-usb-xhci");
+ }
+
+ if (spapr->has_graphics) {
+ USBBus *usb_bus = usb_bus_find(-1);
+
+ usb_create_simple(usb_bus, "usb-kbd");
+ usb_create_simple(usb_bus, "usb-mouse");
+ }
+ }
+
+ if (kernel_filename) {
+ spapr->kernel_size = load_elf(kernel_filename, NULL,
+ translate_kernel_address, spapr,
+ NULL, NULL, NULL, NULL, 1,
+ PPC_ELF_MACHINE, 0, 0);
+ if (spapr->kernel_size == ELF_LOAD_WRONG_ENDIAN) {
+ spapr->kernel_size = load_elf(kernel_filename, NULL,
+ translate_kernel_address, spapr,
+ NULL, NULL, NULL, NULL, 0,
+ PPC_ELF_MACHINE, 0, 0);
+ spapr->kernel_le = spapr->kernel_size > 0;
+ }
+ if (spapr->kernel_size < 0) {
+ error_report("error loading %s: %s", kernel_filename,
+ load_elf_strerror(spapr->kernel_size));
+ exit(1);
+ }
+
+ /* load initrd */
+ if (initrd_filename) {
+ /* Try to locate the initrd in the gap between the kernel
+ * and the firmware. Add a bit of space just in case
+ */
+ spapr->initrd_base = (spapr->kernel_addr + spapr->kernel_size
+ + 0x1ffff) & ~0xffff;
+ spapr->initrd_size = load_image_targphys(initrd_filename,
+ spapr->initrd_base,
+ load_limit
+ - spapr->initrd_base);
+ if (spapr->initrd_size < 0) {
+ error_report("could not load initial ram disk '%s'",
+ initrd_filename);
+ exit(1);
+ }
+ }
+ }
+
+ filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
+ if (!filename) {
+ error_report("Could not find LPAR firmware '%s'", bios_name);
+ exit(1);
+ }
+ fw_size = load_image_targphys(filename, 0, FW_MAX_SIZE);
+ if (fw_size <= 0) {
+ error_report("Could not load LPAR firmware '%s'", filename);
+ exit(1);
+ }
+ g_free(filename);
+
+ /* FIXME: Should register things through the MachineState's qdev
+ * interface, this is a legacy from the sPAPREnvironment structure
+ * which predated MachineState but had a similar function */
+ vmstate_register(NULL, 0, &vmstate_spapr, spapr);
+ register_savevm_live("spapr/htab", VMSTATE_INSTANCE_ID_ANY, 1,
+ &savevm_htab_handlers, spapr);
+
+ qbus_set_hotplug_handler(sysbus_get_default(), OBJECT(machine));
+
+ qemu_register_boot_set(spapr_boot_set, spapr);
+
+ /*
+ * Nothing needs to be done to resume a suspended guest because
+ * suspending does not change the machine state, so no need for
+ * a ->wakeup method.
+ */
+ qemu_register_wakeup_support();
+
+ if (kvm_enabled()) {
+ /* to stop and start vmclock */
+ qemu_add_vm_change_state_handler(cpu_ppc_clock_vm_state_change,
+ &spapr->tb);
+
+ kvmppc_spapr_enable_inkernel_multitce();
+ }
+
+ qemu_cond_init(&spapr->fwnmi_machine_check_interlock_cond);
+ if (spapr->vof) {
+ spapr->vof->fw_size = fw_size; /* for claim() on itself */
+ spapr_register_hypercall(KVMPPC_H_VOF_CLIENT, spapr_h_vof_client);
+ }
+}
+
+#define DEFAULT_KVM_TYPE "auto"
+static int spapr_kvm_type(MachineState *machine, const char *vm_type)
+{
+ /*
+ * The use of g_ascii_strcasecmp() for 'hv' and 'pr' is to
+ * accomodate the 'HV' and 'PV' formats that exists in the
+ * wild. The 'auto' mode is being introduced already as
+ * lower-case, thus we don't need to bother checking for
+ * "AUTO".
+ */
+ if (!vm_type || !strcmp(vm_type, DEFAULT_KVM_TYPE)) {
+ return 0;
+ }
+
+ if (!g_ascii_strcasecmp(vm_type, "hv")) {
+ return 1;
+ }
+
+ if (!g_ascii_strcasecmp(vm_type, "pr")) {
+ return 2;
+ }
+
+ error_report("Unknown kvm-type specified '%s'", vm_type);
+ exit(1);
+}
+
+/*
+ * Implementation of an interface to adjust firmware path
+ * for the bootindex property handling.
+ */
+static char *spapr_get_fw_dev_path(FWPathProvider *p, BusState *bus,
+ DeviceState *dev)
+{
+#define CAST(type, obj, name) \
+ ((type *)object_dynamic_cast(OBJECT(obj), (name)))
+ SCSIDevice *d = CAST(SCSIDevice, dev, TYPE_SCSI_DEVICE);
+ SpaprPhbState *phb = CAST(SpaprPhbState, dev, TYPE_SPAPR_PCI_HOST_BRIDGE);
+ VHostSCSICommon *vsc = CAST(VHostSCSICommon, dev, TYPE_VHOST_SCSI_COMMON);
+ PCIDevice *pcidev = CAST(PCIDevice, dev, TYPE_PCI_DEVICE);
+
+ if (d) {
+ void *spapr = CAST(void, bus->parent, "spapr-vscsi");
+ VirtIOSCSI *virtio = CAST(VirtIOSCSI, bus->parent, TYPE_VIRTIO_SCSI);
+ USBDevice *usb = CAST(USBDevice, bus->parent, TYPE_USB_DEVICE);
+
+ if (spapr) {
+ /*
+ * Replace "channel@0/disk@0,0" with "disk@8000000000000000":
+ * In the top 16 bits of the 64-bit LUN, we use SRP luns of the form
+ * 0x8000 | (target << 8) | (bus << 5) | lun
+ * (see the "Logical unit addressing format" table in SAM5)
+ */
+ unsigned id = 0x8000 | (d->id << 8) | (d->channel << 5) | d->lun;
+ return g_strdup_printf("%s@%"PRIX64, qdev_fw_name(dev),
+ (uint64_t)id << 48);
+ } else if (virtio) {
+ /*
+ * We use SRP luns of the form 01000000 | (target << 8) | lun
+ * in the top 32 bits of the 64-bit LUN
+ * Note: the quote above is from SLOF and it is wrong,
+ * the actual binding is:
+ * swap 0100 or 10 << or 20 << ( target lun-id -- srplun )
+ */
+ unsigned id = 0x1000000 | (d->id << 16) | d->lun;
+ if (d->lun >= 256) {
+ /* Use the LUN "flat space addressing method" */
+ id |= 0x4000;
+ }
+ return g_strdup_printf("%s@%"PRIX64, qdev_fw_name(dev),
+ (uint64_t)id << 32);
+ } else if (usb) {
+ /*
+ * We use SRP luns of the form 01000000 | (usb-port << 16) | lun
+ * in the top 32 bits of the 64-bit LUN
+ */
+ unsigned usb_port = atoi(usb->port->path);
+ unsigned id = 0x1000000 | (usb_port << 16) | d->lun;
+ return g_strdup_printf("%s@%"PRIX64, qdev_fw_name(dev),
+ (uint64_t)id << 32);
+ }
+ }
+
+ /*
+ * SLOF probes the USB devices, and if it recognizes that the device is a
+ * storage device, it changes its name to "storage" instead of "usb-host",
+ * and additionally adds a child node for the SCSI LUN, so the correct
+ * boot path in SLOF is something like .../storage@1/disk@xxx" instead.
+ */
+ if (strcmp("usb-host", qdev_fw_name(dev)) == 0) {
+ USBDevice *usbdev = CAST(USBDevice, dev, TYPE_USB_DEVICE);
+ if (usb_device_is_scsi_storage(usbdev)) {
+ return g_strdup_printf("storage@%s/disk", usbdev->port->path);
+ }
+ }
+
+ if (phb) {
+ /* Replace "pci" with "pci@800000020000000" */
+ return g_strdup_printf("pci@%"PRIX64, phb->buid);
+ }
+
+ if (vsc) {
+ /* Same logic as virtio above */
+ unsigned id = 0x1000000 | (vsc->target << 16) | vsc->lun;
+ return g_strdup_printf("disk@%"PRIX64, (uint64_t)id << 32);
+ }
+
+ if (g_str_equal("pci-bridge", qdev_fw_name(dev))) {
+ /* SLOF uses "pci" instead of "pci-bridge" for PCI bridges */
+ PCIDevice *pcidev = CAST(PCIDevice, dev, TYPE_PCI_DEVICE);
+ return g_strdup_printf("pci@%x", PCI_SLOT(pcidev->devfn));
+ }
+
+ if (pcidev) {
+ return spapr_pci_fw_dev_name(pcidev);
+ }
+
+ return NULL;
+}
+
+static char *spapr_get_kvm_type(Object *obj, Error **errp)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+
+ return g_strdup(spapr->kvm_type);
+}
+
+static void spapr_set_kvm_type(Object *obj, const char *value, Error **errp)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+
+ g_free(spapr->kvm_type);
+ spapr->kvm_type = g_strdup(value);
+}
+
+static bool spapr_get_modern_hotplug_events(Object *obj, Error **errp)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+
+ return spapr->use_hotplug_event_source;
+}
+
+static void spapr_set_modern_hotplug_events(Object *obj, bool value,
+ Error **errp)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+
+ spapr->use_hotplug_event_source = value;
+}
+
+static bool spapr_get_msix_emulation(Object *obj, Error **errp)
+{
+ return true;
+}
+
+static char *spapr_get_resize_hpt(Object *obj, Error **errp)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+
+ switch (spapr->resize_hpt) {
+ case SPAPR_RESIZE_HPT_DEFAULT:
+ return g_strdup("default");
+ case SPAPR_RESIZE_HPT_DISABLED:
+ return g_strdup("disabled");
+ case SPAPR_RESIZE_HPT_ENABLED:
+ return g_strdup("enabled");
+ case SPAPR_RESIZE_HPT_REQUIRED:
+ return g_strdup("required");
+ }
+ g_assert_not_reached();
+}
+
+static void spapr_set_resize_hpt(Object *obj, const char *value, Error **errp)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+
+ if (strcmp(value, "default") == 0) {
+ spapr->resize_hpt = SPAPR_RESIZE_HPT_DEFAULT;
+ } else if (strcmp(value, "disabled") == 0) {
+ spapr->resize_hpt = SPAPR_RESIZE_HPT_DISABLED;
+ } else if (strcmp(value, "enabled") == 0) {
+ spapr->resize_hpt = SPAPR_RESIZE_HPT_ENABLED;
+ } else if (strcmp(value, "required") == 0) {
+ spapr->resize_hpt = SPAPR_RESIZE_HPT_REQUIRED;
+ } else {
+ error_setg(errp, "Bad value for \"resize-hpt\" property");
+ }
+}
+
+static bool spapr_get_vof(Object *obj, Error **errp)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+
+ return spapr->vof != NULL;
+}
+
+static void spapr_set_vof(Object *obj, bool value, Error **errp)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+
+ if (spapr->vof) {
+ vof_cleanup(spapr->vof);
+ g_free(spapr->vof);
+ spapr->vof = NULL;
+ }
+ if (!value) {
+ return;
+ }
+ spapr->vof = g_malloc0(sizeof(*spapr->vof));
+}
+
+static char *spapr_get_ic_mode(Object *obj, Error **errp)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+
+ if (spapr->irq == &spapr_irq_xics_legacy) {
+ return g_strdup("legacy");
+ } else if (spapr->irq == &spapr_irq_xics) {
+ return g_strdup("xics");
+ } else if (spapr->irq == &spapr_irq_xive) {
+ return g_strdup("xive");
+ } else if (spapr->irq == &spapr_irq_dual) {
+ return g_strdup("dual");
+ }
+ g_assert_not_reached();
+}
+
+static void spapr_set_ic_mode(Object *obj, const char *value, Error **errp)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+
+ if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
+ error_setg(errp, "This machine only uses the legacy XICS backend, don't pass ic-mode");
+ return;
+ }
+
+ /* The legacy IRQ backend can not be set */
+ if (strcmp(value, "xics") == 0) {
+ spapr->irq = &spapr_irq_xics;
+ } else if (strcmp(value, "xive") == 0) {
+ spapr->irq = &spapr_irq_xive;
+ } else if (strcmp(value, "dual") == 0) {
+ spapr->irq = &spapr_irq_dual;
+ } else {
+ error_setg(errp, "Bad value for \"ic-mode\" property");
+ }
+}
+
+static char *spapr_get_host_model(Object *obj, Error **errp)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+
+ return g_strdup(spapr->host_model);
+}
+
+static void spapr_set_host_model(Object *obj, const char *value, Error **errp)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+
+ g_free(spapr->host_model);
+ spapr->host_model = g_strdup(value);
+}
+
+static char *spapr_get_host_serial(Object *obj, Error **errp)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+
+ return g_strdup(spapr->host_serial);
+}
+
+static void spapr_set_host_serial(Object *obj, const char *value, Error **errp)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+
+ g_free(spapr->host_serial);
+ spapr->host_serial = g_strdup(value);
+}
+
+static void spapr_instance_init(Object *obj)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+ SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+ MachineState *ms = MACHINE(spapr);
+ MachineClass *mc = MACHINE_GET_CLASS(ms);
+
+ /*
+ * NVDIMM support went live in 5.1 without considering that, in
+ * other archs, the user needs to enable NVDIMM support with the
+ * 'nvdimm' machine option and the default behavior is NVDIMM
+ * support disabled. It is too late to roll back to the standard
+ * behavior without breaking 5.1 guests.
+ */
+ if (mc->nvdimm_supported) {
+ ms->nvdimms_state->is_enabled = true;
+ }
+
+ spapr->htab_fd = -1;
+ spapr->use_hotplug_event_source = true;
+ spapr->kvm_type = g_strdup(DEFAULT_KVM_TYPE);
+ object_property_add_str(obj, "kvm-type",
+ spapr_get_kvm_type, spapr_set_kvm_type);
+ object_property_set_description(obj, "kvm-type",
+ "Specifies the KVM virtualization mode (auto,"
+ " hv, pr). Defaults to 'auto'. This mode will use"
+ " any available KVM module loaded in the host,"
+ " where kvm_hv takes precedence if both kvm_hv and"
+ " kvm_pr are loaded.");
+ object_property_add_bool(obj, "modern-hotplug-events",
+ spapr_get_modern_hotplug_events,
+ spapr_set_modern_hotplug_events);
+ object_property_set_description(obj, "modern-hotplug-events",
+ "Use dedicated hotplug event mechanism in"
+ " place of standard EPOW events when possible"
+ " (required for memory hot-unplug support)");
+ ppc_compat_add_property(obj, "max-cpu-compat", &spapr->max_compat_pvr,
+ "Maximum permitted CPU compatibility mode");
+
+ object_property_add_str(obj, "resize-hpt",
+ spapr_get_resize_hpt, spapr_set_resize_hpt);
+ object_property_set_description(obj, "resize-hpt",
+ "Resizing of the Hash Page Table (enabled, disabled, required)");
+ object_property_add_uint32_ptr(obj, "vsmt",
+ &spapr->vsmt, OBJ_PROP_FLAG_READWRITE);
+ object_property_set_description(obj, "vsmt",
+ "Virtual SMT: KVM behaves as if this were"
+ " the host's SMT mode");
+
+ object_property_add_bool(obj, "vfio-no-msix-emulation",
+ spapr_get_msix_emulation, NULL);
+
+ object_property_add_uint64_ptr(obj, "kernel-addr",
+ &spapr->kernel_addr, OBJ_PROP_FLAG_READWRITE);
+ object_property_set_description(obj, "kernel-addr",
+ stringify(KERNEL_LOAD_ADDR)
+ " for -kernel is the default");
+ spapr->kernel_addr = KERNEL_LOAD_ADDR;
+
+ object_property_add_bool(obj, "x-vof", spapr_get_vof, spapr_set_vof);
+ object_property_set_description(obj, "x-vof",
+ "Enable Virtual Open Firmware (experimental)");
+
+ /* The machine class defines the default interrupt controller mode */
+ spapr->irq = smc->irq;
+ object_property_add_str(obj, "ic-mode", spapr_get_ic_mode,
+ spapr_set_ic_mode);
+ object_property_set_description(obj, "ic-mode",
+ "Specifies the interrupt controller mode (xics, xive, dual)");
+
+ object_property_add_str(obj, "host-model",
+ spapr_get_host_model, spapr_set_host_model);
+ object_property_set_description(obj, "host-model",
+ "Host model to advertise in guest device tree");
+ object_property_add_str(obj, "host-serial",
+ spapr_get_host_serial, spapr_set_host_serial);
+ object_property_set_description(obj, "host-serial",
+ "Host serial number to advertise in guest device tree");
+}
+
+static void spapr_machine_finalizefn(Object *obj)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+
+ g_free(spapr->kvm_type);
+}
+
+void spapr_do_system_reset_on_cpu(CPUState *cs, run_on_cpu_data arg)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
+ CPUPPCState *env = &cpu->env;
+
+ cpu_synchronize_state(cs);
+ /* If FWNMI is inactive, addr will be -1, which will deliver to 0x100 */
+ if (spapr->fwnmi_system_reset_addr != -1) {
+ uint64_t rtas_addr, addr;
+
+ /* get rtas addr from fdt */
+ rtas_addr = spapr_get_rtas_addr();
+ if (!rtas_addr) {
+ qemu_system_guest_panicked(NULL);
+ return;
+ }
+
+ addr = rtas_addr + RTAS_ERROR_LOG_MAX + cs->cpu_index * sizeof(uint64_t)*2;
+ stq_be_phys(&address_space_memory, addr, env->gpr[3]);
+ stq_be_phys(&address_space_memory, addr + sizeof(uint64_t), 0);
+ env->gpr[3] = addr;
+ }
+ ppc_cpu_do_system_reset(cs);
+ if (spapr->fwnmi_system_reset_addr != -1) {
+ env->nip = spapr->fwnmi_system_reset_addr;
+ }
+}
+
+static void spapr_nmi(NMIState *n, int cpu_index, Error **errp)
+{
+ CPUState *cs;
+
+ CPU_FOREACH(cs) {
+ async_run_on_cpu(cs, spapr_do_system_reset_on_cpu, RUN_ON_CPU_NULL);
+ }
+}
+
+int spapr_lmb_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr,
+ void *fdt, int *fdt_start_offset, Error **errp)
+{
+ uint64_t addr;
+ uint32_t node;
+
+ addr = spapr_drc_index(drc) * SPAPR_MEMORY_BLOCK_SIZE;
+ node = object_property_get_uint(OBJECT(drc->dev), PC_DIMM_NODE_PROP,
+ &error_abort);
+ *fdt_start_offset = spapr_dt_memory_node(spapr, fdt, node, addr,
+ SPAPR_MEMORY_BLOCK_SIZE);
+ return 0;
+}
+
+static void spapr_add_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size,
+ bool dedicated_hp_event_source)
+{
+ SpaprDrc *drc;
+ uint32_t nr_lmbs = size/SPAPR_MEMORY_BLOCK_SIZE;
+ int i;
+ uint64_t addr = addr_start;
+ bool hotplugged = spapr_drc_hotplugged(dev);
+
+ for (i = 0; i < nr_lmbs; i++) {
+ drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB,
+ addr / SPAPR_MEMORY_BLOCK_SIZE);
+ g_assert(drc);
+
+ /*
+ * memory_device_get_free_addr() provided a range of free addresses
+ * that doesn't overlap with any existing mapping at pre-plug. The
+ * corresponding LMB DRCs are thus assumed to be all attachable.
+ */
+ spapr_drc_attach(drc, dev);
+ if (!hotplugged) {
+ spapr_drc_reset(drc);
+ }
+ addr += SPAPR_MEMORY_BLOCK_SIZE;
+ }
+ /* send hotplug notification to the
+ * guest only in case of hotplugged memory
+ */
+ if (hotplugged) {
+ if (dedicated_hp_event_source) {
+ drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB,
+ addr_start / SPAPR_MEMORY_BLOCK_SIZE);
+ g_assert(drc);
+ spapr_hotplug_req_add_by_count_indexed(SPAPR_DR_CONNECTOR_TYPE_LMB,
+ nr_lmbs,
+ spapr_drc_index(drc));
+ } else {
+ spapr_hotplug_req_add_by_count(SPAPR_DR_CONNECTOR_TYPE_LMB,
+ nr_lmbs);
+ }
+ }
+}
+
+static void spapr_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev)
+{
+ SpaprMachineState *ms = SPAPR_MACHINE(hotplug_dev);
+ PCDIMMDevice *dimm = PC_DIMM(dev);
+ uint64_t size, addr;
+ int64_t slot;
+ bool is_nvdimm = object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM);
+
+ size = memory_device_get_region_size(MEMORY_DEVICE(dev), &error_abort);
+
+ pc_dimm_plug(dimm, MACHINE(ms));
+
+ if (!is_nvdimm) {
+ addr = object_property_get_uint(OBJECT(dimm),
+ PC_DIMM_ADDR_PROP, &error_abort);
+ spapr_add_lmbs(dev, addr, size,
+ spapr_ovec_test(ms->ov5_cas, OV5_HP_EVT));
+ } else {
+ slot = object_property_get_int(OBJECT(dimm),
+ PC_DIMM_SLOT_PROP, &error_abort);
+ /* We should have valid slot number at this point */
+ g_assert(slot >= 0);
+ spapr_add_nvdimm(dev, slot);
+ }
+}
+
+static void spapr_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
+ Error **errp)
+{
+ const SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(hotplug_dev);
+ SpaprMachineState *spapr = SPAPR_MACHINE(hotplug_dev);
+ bool is_nvdimm = object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM);
+ PCDIMMDevice *dimm = PC_DIMM(dev);
+ Error *local_err = NULL;
+ uint64_t size;
+ Object *memdev;
+ hwaddr pagesize;
+
+ if (!smc->dr_lmb_enabled) {
+ error_setg(errp, "Memory hotplug not supported for this machine");
+ return;
+ }
+
+ size = memory_device_get_region_size(MEMORY_DEVICE(dimm), &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ if (is_nvdimm) {
+ if (!spapr_nvdimm_validate(hotplug_dev, NVDIMM(dev), size, errp)) {
+ return;
+ }
+ } else if (size % SPAPR_MEMORY_BLOCK_SIZE) {
+ error_setg(errp, "Hotplugged memory size must be a multiple of "
+ "%" PRIu64 " MB", SPAPR_MEMORY_BLOCK_SIZE / MiB);
+ return;
+ }
+
+ memdev = object_property_get_link(OBJECT(dimm), PC_DIMM_MEMDEV_PROP,
+ &error_abort);
+ pagesize = host_memory_backend_pagesize(MEMORY_BACKEND(memdev));
+ if (!spapr_check_pagesize(spapr, pagesize, errp)) {
+ return;
+ }
+
+ pc_dimm_pre_plug(dimm, MACHINE(hotplug_dev), NULL, errp);
+}
+
+struct SpaprDimmState {
+ PCDIMMDevice *dimm;
+ uint32_t nr_lmbs;
+ QTAILQ_ENTRY(SpaprDimmState) next;
+};
+
+static SpaprDimmState *spapr_pending_dimm_unplugs_find(SpaprMachineState *s,
+ PCDIMMDevice *dimm)
+{
+ SpaprDimmState *dimm_state = NULL;
+
+ QTAILQ_FOREACH(dimm_state, &s->pending_dimm_unplugs, next) {
+ if (dimm_state->dimm == dimm) {
+ break;
+ }
+ }
+ return dimm_state;
+}
+
+static SpaprDimmState *spapr_pending_dimm_unplugs_add(SpaprMachineState *spapr,
+ uint32_t nr_lmbs,
+ PCDIMMDevice *dimm)
+{
+ SpaprDimmState *ds = NULL;
+
+ /*
+ * If this request is for a DIMM whose removal had failed earlier
+ * (due to guest's refusal to remove the LMBs), we would have this
+ * dimm already in the pending_dimm_unplugs list. In that
+ * case don't add again.
+ */
+ ds = spapr_pending_dimm_unplugs_find(spapr, dimm);
+ if (!ds) {
+ ds = g_malloc0(sizeof(SpaprDimmState));
+ ds->nr_lmbs = nr_lmbs;
+ ds->dimm = dimm;
+ QTAILQ_INSERT_HEAD(&spapr->pending_dimm_unplugs, ds, next);
+ }
+ return ds;
+}
+
+static void spapr_pending_dimm_unplugs_remove(SpaprMachineState *spapr,
+ SpaprDimmState *dimm_state)
+{
+ QTAILQ_REMOVE(&spapr->pending_dimm_unplugs, dimm_state, next);
+ g_free(dimm_state);
+}
+
+static SpaprDimmState *spapr_recover_pending_dimm_state(SpaprMachineState *ms,
+ PCDIMMDevice *dimm)
+{
+ SpaprDrc *drc;
+ uint64_t size = memory_device_get_region_size(MEMORY_DEVICE(dimm),
+ &error_abort);
+ uint32_t nr_lmbs = size / SPAPR_MEMORY_BLOCK_SIZE;
+ uint32_t avail_lmbs = 0;
+ uint64_t addr_start, addr;
+ int i;
+
+ addr_start = object_property_get_uint(OBJECT(dimm), PC_DIMM_ADDR_PROP,
+ &error_abort);
+
+ addr = addr_start;
+ for (i = 0; i < nr_lmbs; i++) {
+ drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB,
+ addr / SPAPR_MEMORY_BLOCK_SIZE);
+ g_assert(drc);
+ if (drc->dev) {
+ avail_lmbs++;
+ }
+ addr += SPAPR_MEMORY_BLOCK_SIZE;
+ }
+
+ return spapr_pending_dimm_unplugs_add(ms, avail_lmbs, dimm);
+}
+
+void spapr_memory_unplug_rollback(SpaprMachineState *spapr, DeviceState *dev)
+{
+ SpaprDimmState *ds;
+ PCDIMMDevice *dimm;
+ SpaprDrc *drc;
+ uint32_t nr_lmbs;
+ uint64_t size, addr_start, addr;
+ g_autofree char *qapi_error = NULL;
+ int i;
+
+ if (!dev) {
+ return;
+ }
+
+ dimm = PC_DIMM(dev);
+ ds = spapr_pending_dimm_unplugs_find(spapr, dimm);
+
+ /*
+ * 'ds == NULL' would mean that the DIMM doesn't have a pending
+ * unplug state, but one of its DRC is marked as unplug_requested.
+ * This is bad and weird enough to g_assert() out.
+ */
+ g_assert(ds);
+
+ spapr_pending_dimm_unplugs_remove(spapr, ds);
+
+ size = memory_device_get_region_size(MEMORY_DEVICE(dimm), &error_abort);
+ nr_lmbs = size / SPAPR_MEMORY_BLOCK_SIZE;
+
+ addr_start = object_property_get_uint(OBJECT(dimm), PC_DIMM_ADDR_PROP,
+ &error_abort);
+
+ addr = addr_start;
+ for (i = 0; i < nr_lmbs; i++) {
+ drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB,
+ addr / SPAPR_MEMORY_BLOCK_SIZE);
+ g_assert(drc);
+
+ drc->unplug_requested = false;
+ addr += SPAPR_MEMORY_BLOCK_SIZE;
+ }
+
+ /*
+ * Tell QAPI that something happened and the memory
+ * hotunplug wasn't successful. Keep sending
+ * MEM_UNPLUG_ERROR even while sending
+ * DEVICE_UNPLUG_GUEST_ERROR until the deprecation of
+ * MEM_UNPLUG_ERROR is due.
+ */
+ qapi_error = g_strdup_printf("Memory hotunplug rejected by the guest "
+ "for device %s", dev->id);
+
+ qapi_event_send_mem_unplug_error(dev->id ? : "", qapi_error);
+
+ qapi_event_send_device_unplug_guest_error(!!dev->id, dev->id,
+ dev->canonical_path);
+}
+
+/* Callback to be called during DRC release. */
+void spapr_lmb_release(DeviceState *dev)
+{
+ HotplugHandler *hotplug_ctrl = qdev_get_hotplug_handler(dev);
+ SpaprMachineState *spapr = SPAPR_MACHINE(hotplug_ctrl);
+ SpaprDimmState *ds = spapr_pending_dimm_unplugs_find(spapr, PC_DIMM(dev));
+
+ /* This information will get lost if a migration occurs
+ * during the unplug process. In this case recover it. */
+ if (ds == NULL) {
+ ds = spapr_recover_pending_dimm_state(spapr, PC_DIMM(dev));
+ g_assert(ds);
+ /* The DRC being examined by the caller at least must be counted */
+ g_assert(ds->nr_lmbs);
+ }
+
+ if (--ds->nr_lmbs) {
+ return;
+ }
+
+ /*
+ * Now that all the LMBs have been removed by the guest, call the
+ * unplug handler chain. This can never fail.
+ */
+ hotplug_handler_unplug(hotplug_ctrl, dev, &error_abort);
+ object_unparent(OBJECT(dev));
+}
+
+static void spapr_memory_unplug(HotplugHandler *hotplug_dev, DeviceState *dev)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(hotplug_dev);
+ SpaprDimmState *ds = spapr_pending_dimm_unplugs_find(spapr, PC_DIMM(dev));
+
+ /* We really shouldn't get this far without anything to unplug */
+ g_assert(ds);
+
+ pc_dimm_unplug(PC_DIMM(dev), MACHINE(hotplug_dev));
+ qdev_unrealize(dev);
+ spapr_pending_dimm_unplugs_remove(spapr, ds);
+}
+
+static void spapr_memory_unplug_request(HotplugHandler *hotplug_dev,
+ DeviceState *dev, Error **errp)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(hotplug_dev);
+ PCDIMMDevice *dimm = PC_DIMM(dev);
+ uint32_t nr_lmbs;
+ uint64_t size, addr_start, addr;
+ int i;
+ SpaprDrc *drc;
+
+ if (object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM)) {
+ error_setg(errp, "nvdimm device hot unplug is not supported yet.");
+ return;
+ }
+
+ size = memory_device_get_region_size(MEMORY_DEVICE(dimm), &error_abort);
+ nr_lmbs = size / SPAPR_MEMORY_BLOCK_SIZE;
+
+ addr_start = object_property_get_uint(OBJECT(dimm), PC_DIMM_ADDR_PROP,
+ &error_abort);
+
+ /*
+ * An existing pending dimm state for this DIMM means that there is an
+ * unplug operation in progress, waiting for the spapr_lmb_release
+ * callback to complete the job (BQL can't cover that far). In this case,
+ * bail out to avoid detaching DRCs that were already released.
+ */
+ if (spapr_pending_dimm_unplugs_find(spapr, dimm)) {
+ error_setg(errp, "Memory unplug already in progress for device %s",
+ dev->id);
+ return;
+ }
+
+ spapr_pending_dimm_unplugs_add(spapr, nr_lmbs, dimm);
+
+ addr = addr_start;
+ for (i = 0; i < nr_lmbs; i++) {
+ drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB,
+ addr / SPAPR_MEMORY_BLOCK_SIZE);
+ g_assert(drc);
+
+ spapr_drc_unplug_request(drc);
+ addr += SPAPR_MEMORY_BLOCK_SIZE;
+ }
+
+ drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB,
+ addr_start / SPAPR_MEMORY_BLOCK_SIZE);
+ spapr_hotplug_req_remove_by_count_indexed(SPAPR_DR_CONNECTOR_TYPE_LMB,
+ nr_lmbs, spapr_drc_index(drc));
+}
+
+/* Callback to be called during DRC release. */
+void spapr_core_release(DeviceState *dev)
+{
+ HotplugHandler *hotplug_ctrl = qdev_get_hotplug_handler(dev);
+
+ /* Call the unplug handler chain. This can never fail. */
+ hotplug_handler_unplug(hotplug_ctrl, dev, &error_abort);
+ object_unparent(OBJECT(dev));
+}
+
+static void spapr_core_unplug(HotplugHandler *hotplug_dev, DeviceState *dev)
+{
+ MachineState *ms = MACHINE(hotplug_dev);
+ SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(ms);
+ CPUCore *cc = CPU_CORE(dev);
+ CPUArchId *core_slot = spapr_find_cpu_slot(ms, cc->core_id, NULL);
+
+ if (smc->pre_2_10_has_unused_icps) {
+ SpaprCpuCore *sc = SPAPR_CPU_CORE(OBJECT(dev));
+ int i;
+
+ for (i = 0; i < cc->nr_threads; i++) {
+ CPUState *cs = CPU(sc->threads[i]);
+
+ pre_2_10_vmstate_register_dummy_icp(cs->cpu_index);
+ }
+ }
+
+ assert(core_slot);
+ core_slot->cpu = NULL;
+ qdev_unrealize(dev);
+}
+
+static
+void spapr_core_unplug_request(HotplugHandler *hotplug_dev, DeviceState *dev,
+ Error **errp)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(OBJECT(hotplug_dev));
+ int index;
+ SpaprDrc *drc;
+ CPUCore *cc = CPU_CORE(dev);
+
+ if (!spapr_find_cpu_slot(MACHINE(hotplug_dev), cc->core_id, &index)) {
+ error_setg(errp, "Unable to find CPU core with core-id: %d",
+ cc->core_id);
+ return;
+ }
+ if (index == 0) {
+ error_setg(errp, "Boot CPU core may not be unplugged");
+ return;
+ }
+
+ drc = spapr_drc_by_id(TYPE_SPAPR_DRC_CPU,
+ spapr_vcpu_id(spapr, cc->core_id));
+ g_assert(drc);
+
+ if (!spapr_drc_unplug_requested(drc)) {
+ spapr_drc_unplug_request(drc);
+ }
+
+ /*
+ * spapr_hotplug_req_remove_by_index is left unguarded, out of the
+ * "!spapr_drc_unplug_requested" check, to allow for multiple IRQ
+ * pulses removing the same CPU. Otherwise, in an failed hotunplug
+ * attempt (e.g. the kernel will refuse to remove the last online
+ * CPU), we will never attempt it again because unplug_requested
+ * will still be 'true' in that case.
+ */
+ spapr_hotplug_req_remove_by_index(drc);
+}
+
+int spapr_core_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr,
+ void *fdt, int *fdt_start_offset, Error **errp)
+{
+ SpaprCpuCore *core = SPAPR_CPU_CORE(drc->dev);
+ CPUState *cs = CPU(core->threads[0]);
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
+ DeviceClass *dc = DEVICE_GET_CLASS(cs);
+ int id = spapr_get_vcpu_id(cpu);
+ g_autofree char *nodename = NULL;
+ int offset;
+
+ nodename = g_strdup_printf("%s@%x", dc->fw_name, id);
+ offset = fdt_add_subnode(fdt, 0, nodename);
+
+ spapr_dt_cpu(cs, fdt, offset, spapr);
+
+ /*
+ * spapr_dt_cpu() does not fill the 'name' property in the
+ * CPU node. The function is called during boot process, before
+ * and after CAS, and overwriting the 'name' property written
+ * by SLOF is not allowed.
+ *
+ * Write it manually after spapr_dt_cpu(). This makes the hotplug
+ * CPUs more compatible with the coldplugged ones, which have
+ * the 'name' property. Linux Kernel also relies on this
+ * property to identify CPU nodes.
+ */
+ _FDT((fdt_setprop_string(fdt, offset, "name", nodename)));
+
+ *fdt_start_offset = offset;
+ return 0;
+}
+
+static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(OBJECT(hotplug_dev));
+ MachineClass *mc = MACHINE_GET_CLASS(spapr);
+ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+ SpaprCpuCore *core = SPAPR_CPU_CORE(OBJECT(dev));
+ CPUCore *cc = CPU_CORE(dev);
+ CPUState *cs;
+ SpaprDrc *drc;
+ CPUArchId *core_slot;
+ int index;
+ bool hotplugged = spapr_drc_hotplugged(dev);
+ int i;
+
+ core_slot = spapr_find_cpu_slot(MACHINE(hotplug_dev), cc->core_id, &index);
+ g_assert(core_slot); /* Already checked in spapr_core_pre_plug() */
+
+ drc = spapr_drc_by_id(TYPE_SPAPR_DRC_CPU,
+ spapr_vcpu_id(spapr, cc->core_id));
+
+ g_assert(drc || !mc->has_hotpluggable_cpus);
+
+ if (drc) {
+ /*
+ * spapr_core_pre_plug() already buys us this is a brand new
+ * core being plugged into a free slot. Nothing should already
+ * be attached to the corresponding DRC.
+ */
+ spapr_drc_attach(drc, dev);
+
+ if (hotplugged) {
+ /*
+ * Send hotplug notification interrupt to the guest only
+ * in case of hotplugged CPUs.
+ */
+ spapr_hotplug_req_add_by_index(drc);
+ } else {
+ spapr_drc_reset(drc);
+ }
+ }
+
+ core_slot->cpu = OBJECT(dev);
+
+ /*
+ * Set compatibility mode to match the boot CPU, which was either set
+ * by the machine reset code or by CAS. This really shouldn't fail at
+ * this point.
+ */
+ if (hotplugged) {
+ for (i = 0; i < cc->nr_threads; i++) {
+ ppc_set_compat(core->threads[i], POWERPC_CPU(first_cpu)->compat_pvr,
+ &error_abort);
+ }
+ }
+
+ if (smc->pre_2_10_has_unused_icps) {
+ for (i = 0; i < cc->nr_threads; i++) {
+ cs = CPU(core->threads[i]);
+ pre_2_10_vmstate_unregister_dummy_icp(cs->cpu_index);
+ }
+ }
+}
+
+static void spapr_core_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
+ Error **errp)
+{
+ MachineState *machine = MACHINE(OBJECT(hotplug_dev));
+ MachineClass *mc = MACHINE_GET_CLASS(hotplug_dev);
+ CPUCore *cc = CPU_CORE(dev);
+ const char *base_core_type = spapr_get_cpu_core_type(machine->cpu_type);
+ const char *type = object_get_typename(OBJECT(dev));
+ CPUArchId *core_slot;
+ int index;
+ unsigned int smp_threads = machine->smp.threads;
+
+ if (dev->hotplugged && !mc->has_hotpluggable_cpus) {
+ error_setg(errp, "CPU hotplug not supported for this machine");
+ return;
+ }
+
+ if (strcmp(base_core_type, type)) {
+ error_setg(errp, "CPU core type should be %s", base_core_type);
+ return;
+ }
+
+ if (cc->core_id % smp_threads) {
+ error_setg(errp, "invalid core id %d", cc->core_id);
+ return;
+ }
+
+ /*
+ * In general we should have homogeneous threads-per-core, but old
+ * (pre hotplug support) machine types allow the last core to have
+ * reduced threads as a compatibility hack for when we allowed
+ * total vcpus not a multiple of threads-per-core.
+ */
+ if (mc->has_hotpluggable_cpus && (cc->nr_threads != smp_threads)) {
+ error_setg(errp, "invalid nr-threads %d, must be %d", cc->nr_threads,
+ smp_threads);
+ return;
+ }
+
+ core_slot = spapr_find_cpu_slot(MACHINE(hotplug_dev), cc->core_id, &index);
+ if (!core_slot) {
+ error_setg(errp, "core id %d out of range", cc->core_id);
+ return;
+ }
+
+ if (core_slot->cpu) {
+ error_setg(errp, "core %d already populated", cc->core_id);
+ return;
+ }
+
+ numa_cpu_pre_plug(core_slot, dev, errp);
+}
+
+int spapr_phb_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr,
+ void *fdt, int *fdt_start_offset, Error **errp)
+{
+ SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(drc->dev);
+ int intc_phandle;
+
+ intc_phandle = spapr_irq_get_phandle(spapr, spapr->fdt_blob, errp);
+ if (intc_phandle <= 0) {
+ return -1;
+ }
+
+ if (spapr_dt_phb(spapr, sphb, intc_phandle, fdt, fdt_start_offset)) {
+ error_setg(errp, "unable to create FDT node for PHB %d", sphb->index);
+ return -1;
+ }
+
+ /* generally SLOF creates these, for hotplug it's up to QEMU */
+ _FDT(fdt_setprop_string(fdt, *fdt_start_offset, "name", "pci"));
+
+ return 0;
+}
+
+static bool spapr_phb_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
+ Error **errp)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(OBJECT(hotplug_dev));
+ SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(dev);
+ SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+ const unsigned windows_supported = spapr_phb_windows_supported(sphb);
+ SpaprDrc *drc;
+
+ if (dev->hotplugged && !smc->dr_phb_enabled) {
+ error_setg(errp, "PHB hotplug not supported for this machine");
+ return false;
+ }
+
+ if (sphb->index == (uint32_t)-1) {
+ error_setg(errp, "\"index\" for PAPR PHB is mandatory");
+ return false;
+ }
+
+ drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PHB, sphb->index);
+ if (drc && drc->dev) {
+ error_setg(errp, "PHB %d already attached", sphb->index);
+ return false;
+ }
+
+ /*
+ * This will check that sphb->index doesn't exceed the maximum number of
+ * PHBs for the current machine type.
+ */
+ return
+ smc->phb_placement(spapr, sphb->index,
+ &sphb->buid, &sphb->io_win_addr,
+ &sphb->mem_win_addr, &sphb->mem64_win_addr,
+ windows_supported, sphb->dma_liobn,
+ &sphb->nv2_gpa_win_addr, &sphb->nv2_atsd_win_addr,
+ errp);
+}
+
+static void spapr_phb_plug(HotplugHandler *hotplug_dev, DeviceState *dev)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(OBJECT(hotplug_dev));
+ SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+ SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(dev);
+ SpaprDrc *drc;
+ bool hotplugged = spapr_drc_hotplugged(dev);
+
+ if (!smc->dr_phb_enabled) {
+ return;
+ }
+
+ drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PHB, sphb->index);
+ /* hotplug hooks should check it's enabled before getting this far */
+ assert(drc);
+
+ /* spapr_phb_pre_plug() already checked the DRC is attachable */
+ spapr_drc_attach(drc, dev);
+
+ if (hotplugged) {
+ spapr_hotplug_req_add_by_index(drc);
+ } else {
+ spapr_drc_reset(drc);
+ }
+}
+
+void spapr_phb_release(DeviceState *dev)
+{
+ HotplugHandler *hotplug_ctrl = qdev_get_hotplug_handler(dev);
+
+ hotplug_handler_unplug(hotplug_ctrl, dev, &error_abort);
+ object_unparent(OBJECT(dev));
+}
+
+static void spapr_phb_unplug(HotplugHandler *hotplug_dev, DeviceState *dev)
+{
+ qdev_unrealize(dev);
+}
+
+static void spapr_phb_unplug_request(HotplugHandler *hotplug_dev,
+ DeviceState *dev, Error **errp)
+{
+ SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(dev);
+ SpaprDrc *drc;
+
+ drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PHB, sphb->index);
+ assert(drc);
+
+ if (!spapr_drc_unplug_requested(drc)) {
+ spapr_drc_unplug_request(drc);
+ spapr_hotplug_req_remove_by_index(drc);
+ } else {
+ error_setg(errp,
+ "PCI Host Bridge unplug already in progress for device %s",
+ dev->id);
+ }
+}
+
+static
+bool spapr_tpm_proxy_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
+ Error **errp)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(OBJECT(hotplug_dev));
+
+ if (spapr->tpm_proxy != NULL) {
+ error_setg(errp, "Only one TPM proxy can be specified for this machine");
+ return false;
+ }
+
+ return true;
+}
+
+static void spapr_tpm_proxy_plug(HotplugHandler *hotplug_dev, DeviceState *dev)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(OBJECT(hotplug_dev));
+ SpaprTpmProxy *tpm_proxy = SPAPR_TPM_PROXY(dev);
+
+ /* Already checked in spapr_tpm_proxy_pre_plug() */
+ g_assert(spapr->tpm_proxy == NULL);
+
+ spapr->tpm_proxy = tpm_proxy;
+}
+
+static void spapr_tpm_proxy_unplug(HotplugHandler *hotplug_dev, DeviceState *dev)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(OBJECT(hotplug_dev));
+
+ qdev_unrealize(dev);
+ object_unparent(OBJECT(dev));
+ spapr->tpm_proxy = NULL;
+}
+
+static void spapr_machine_device_plug(HotplugHandler *hotplug_dev,
+ DeviceState *dev, Error **errp)
+{
+ if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
+ spapr_memory_plug(hotplug_dev, dev);
+ } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) {
+ spapr_core_plug(hotplug_dev, dev);
+ } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_PCI_HOST_BRIDGE)) {
+ spapr_phb_plug(hotplug_dev, dev);
+ } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_TPM_PROXY)) {
+ spapr_tpm_proxy_plug(hotplug_dev, dev);
+ }
+}
+
+static void spapr_machine_device_unplug(HotplugHandler *hotplug_dev,
+ DeviceState *dev, Error **errp)
+{
+ if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
+ spapr_memory_unplug(hotplug_dev, dev);
+ } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) {
+ spapr_core_unplug(hotplug_dev, dev);
+ } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_PCI_HOST_BRIDGE)) {
+ spapr_phb_unplug(hotplug_dev, dev);
+ } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_TPM_PROXY)) {
+ spapr_tpm_proxy_unplug(hotplug_dev, dev);
+ }
+}
+
+bool spapr_memory_hot_unplug_supported(SpaprMachineState *spapr)
+{
+ return spapr_ovec_test(spapr->ov5_cas, OV5_HP_EVT) ||
+ /*
+ * CAS will process all pending unplug requests.
+ *
+ * HACK: a guest could theoretically have cleared all bits in OV5,
+ * but none of the guests we care for do.
+ */
+ spapr_ovec_empty(spapr->ov5_cas);
+}
+
+static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev,
+ DeviceState *dev, Error **errp)
+{
+ SpaprMachineState *sms = SPAPR_MACHINE(OBJECT(hotplug_dev));
+ MachineClass *mc = MACHINE_GET_CLASS(sms);
+ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+
+ if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
+ if (spapr_memory_hot_unplug_supported(sms)) {
+ spapr_memory_unplug_request(hotplug_dev, dev, errp);
+ } else {
+ error_setg(errp, "Memory hot unplug not supported for this guest");
+ }
+ } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) {
+ if (!mc->has_hotpluggable_cpus) {
+ error_setg(errp, "CPU hot unplug not supported on this machine");
+ return;
+ }
+ spapr_core_unplug_request(hotplug_dev, dev, errp);
+ } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_PCI_HOST_BRIDGE)) {
+ if (!smc->dr_phb_enabled) {
+ error_setg(errp, "PHB hot unplug not supported on this machine");
+ return;
+ }
+ spapr_phb_unplug_request(hotplug_dev, dev, errp);
+ } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_TPM_PROXY)) {
+ spapr_tpm_proxy_unplug(hotplug_dev, dev);
+ }
+}
+
+static void spapr_machine_device_pre_plug(HotplugHandler *hotplug_dev,
+ DeviceState *dev, Error **errp)
+{
+ if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
+ spapr_memory_pre_plug(hotplug_dev, dev, errp);
+ } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) {
+ spapr_core_pre_plug(hotplug_dev, dev, errp);
+ } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_PCI_HOST_BRIDGE)) {
+ spapr_phb_pre_plug(hotplug_dev, dev, errp);
+ } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_TPM_PROXY)) {
+ spapr_tpm_proxy_pre_plug(hotplug_dev, dev, errp);
+ }
+}
+
+static HotplugHandler *spapr_get_hotplug_handler(MachineState *machine,
+ DeviceState *dev)
+{
+ if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) ||
+ object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE) ||
+ object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_PCI_HOST_BRIDGE) ||
+ object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_TPM_PROXY)) {
+ return HOTPLUG_HANDLER(machine);
+ }
+ if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
+ PCIDevice *pcidev = PCI_DEVICE(dev);
+ PCIBus *root = pci_device_root_bus(pcidev);
+ SpaprPhbState *phb =
+ (SpaprPhbState *)object_dynamic_cast(OBJECT(BUS(root)->parent),
+ TYPE_SPAPR_PCI_HOST_BRIDGE);
+
+ if (phb) {
+ return HOTPLUG_HANDLER(phb);
+ }
+ }
+ return NULL;
+}
+
+static CpuInstanceProperties
+spapr_cpu_index_to_props(MachineState *machine, unsigned cpu_index)
+{
+ CPUArchId *core_slot;
+ MachineClass *mc = MACHINE_GET_CLASS(machine);
+
+ /* make sure possible_cpu are intialized */
+ mc->possible_cpu_arch_ids(machine);
+ /* get CPU core slot containing thread that matches cpu_index */
+ core_slot = spapr_find_cpu_slot(machine, cpu_index, NULL);
+ assert(core_slot);
+ return core_slot->props;
+}
+
+static int64_t spapr_get_default_cpu_node_id(const MachineState *ms, int idx)
+{
+ return idx / ms->smp.cores % ms->numa_state->num_nodes;
+}
+
+static const CPUArchIdList *spapr_possible_cpu_arch_ids(MachineState *machine)
+{
+ int i;
+ unsigned int smp_threads = machine->smp.threads;
+ unsigned int smp_cpus = machine->smp.cpus;
+ const char *core_type;
+ int spapr_max_cores = machine->smp.max_cpus / smp_threads;
+ MachineClass *mc = MACHINE_GET_CLASS(machine);
+
+ if (!mc->has_hotpluggable_cpus) {
+ spapr_max_cores = QEMU_ALIGN_UP(smp_cpus, smp_threads) / smp_threads;
+ }
+ if (machine->possible_cpus) {
+ assert(machine->possible_cpus->len == spapr_max_cores);
+ return machine->possible_cpus;
+ }
+
+ core_type = spapr_get_cpu_core_type(machine->cpu_type);
+ if (!core_type) {
+ error_report("Unable to find sPAPR CPU Core definition");
+ exit(1);
+ }
+
+ machine->possible_cpus = g_malloc0(sizeof(CPUArchIdList) +
+ sizeof(CPUArchId) * spapr_max_cores);
+ machine->possible_cpus->len = spapr_max_cores;
+ for (i = 0; i < machine->possible_cpus->len; i++) {
+ int core_id = i * smp_threads;
+
+ machine->possible_cpus->cpus[i].type = core_type;
+ machine->possible_cpus->cpus[i].vcpus_count = smp_threads;
+ machine->possible_cpus->cpus[i].arch_id = core_id;
+ machine->possible_cpus->cpus[i].props.has_core_id = true;
+ machine->possible_cpus->cpus[i].props.core_id = core_id;
+ }
+ return machine->possible_cpus;
+}
+
+static bool spapr_phb_placement(SpaprMachineState *spapr, uint32_t index,
+ uint64_t *buid, hwaddr *pio,
+ hwaddr *mmio32, hwaddr *mmio64,
+ unsigned n_dma, uint32_t *liobns,
+ hwaddr *nv2gpa, hwaddr *nv2atsd, Error **errp)
+{
+ /*
+ * New-style PHB window placement.
+ *
+ * Goals: Gives large (1TiB), naturally aligned 64-bit MMIO window
+ * for each PHB, in addition to 2GiB 32-bit MMIO and 64kiB PIO
+ * windows.
+ *
+ * Some guest kernels can't work with MMIO windows above 1<<46
+ * (64TiB), so we place up to 31 PHBs in the area 32TiB..64TiB
+ *
+ * 32TiB..(33TiB+1984kiB) contains the 64kiB PIO windows for each
+ * PHB stacked together. (32TiB+2GiB)..(32TiB+64GiB) contains the
+ * 2GiB 32-bit MMIO windows for each PHB. Then 33..64TiB has the
+ * 1TiB 64-bit MMIO windows for each PHB.
+ */
+ const uint64_t base_buid = 0x800000020000000ULL;
+ int i;
+
+ /* Sanity check natural alignments */
+ QEMU_BUILD_BUG_ON((SPAPR_PCI_BASE % SPAPR_PCI_MEM64_WIN_SIZE) != 0);
+ QEMU_BUILD_BUG_ON((SPAPR_PCI_LIMIT % SPAPR_PCI_MEM64_WIN_SIZE) != 0);
+ QEMU_BUILD_BUG_ON((SPAPR_PCI_MEM64_WIN_SIZE % SPAPR_PCI_MEM32_WIN_SIZE) != 0);
+ QEMU_BUILD_BUG_ON((SPAPR_PCI_MEM32_WIN_SIZE % SPAPR_PCI_IO_WIN_SIZE) != 0);
+ /* Sanity check bounds */
+ QEMU_BUILD_BUG_ON((SPAPR_MAX_PHBS * SPAPR_PCI_IO_WIN_SIZE) >
+ SPAPR_PCI_MEM32_WIN_SIZE);
+ QEMU_BUILD_BUG_ON((SPAPR_MAX_PHBS * SPAPR_PCI_MEM32_WIN_SIZE) >
+ SPAPR_PCI_MEM64_WIN_SIZE);
+
+ if (index >= SPAPR_MAX_PHBS) {
+ error_setg(errp, "\"index\" for PAPR PHB is too large (max %llu)",
+ SPAPR_MAX_PHBS - 1);
+ return false;
+ }
+
+ *buid = base_buid + index;
+ for (i = 0; i < n_dma; ++i) {
+ liobns[i] = SPAPR_PCI_LIOBN(index, i);
+ }
+
+ *pio = SPAPR_PCI_BASE + index * SPAPR_PCI_IO_WIN_SIZE;
+ *mmio32 = SPAPR_PCI_BASE + (index + 1) * SPAPR_PCI_MEM32_WIN_SIZE;
+ *mmio64 = SPAPR_PCI_BASE + (index + 1) * SPAPR_PCI_MEM64_WIN_SIZE;
+
+ *nv2gpa = SPAPR_PCI_NV2RAM64_WIN_BASE + index * SPAPR_PCI_NV2RAM64_WIN_SIZE;
+ *nv2atsd = SPAPR_PCI_NV2ATSD_WIN_BASE + index * SPAPR_PCI_NV2ATSD_WIN_SIZE;
+ return true;
+}
+
+static ICSState *spapr_ics_get(XICSFabric *dev, int irq)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(dev);
+
+ return ics_valid_irq(spapr->ics, irq) ? spapr->ics : NULL;
+}
+
+static void spapr_ics_resend(XICSFabric *dev)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(dev);
+
+ ics_resend(spapr->ics);
+}
+
+static ICPState *spapr_icp_get(XICSFabric *xi, int vcpu_id)
+{
+ PowerPCCPU *cpu = spapr_find_cpu(vcpu_id);
+
+ return cpu ? spapr_cpu_state(cpu)->icp : NULL;
+}
+
+static void spapr_pic_print_info(InterruptStatsProvider *obj,
+ Monitor *mon)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+
+ spapr_irq_print_info(spapr, mon);
+ monitor_printf(mon, "irqchip: %s\n",
+ kvm_irqchip_in_kernel() ? "in-kernel" : "emulated");
+}
+
+/*
+ * This is a XIVE only operation
+ */
+static int spapr_match_nvt(XiveFabric *xfb, uint8_t format,
+ uint8_t nvt_blk, uint32_t nvt_idx,
+ bool cam_ignore, uint8_t priority,
+ uint32_t logic_serv, XiveTCTXMatch *match)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(xfb);
+ XivePresenter *xptr = XIVE_PRESENTER(spapr->active_intc);
+ XivePresenterClass *xpc = XIVE_PRESENTER_GET_CLASS(xptr);
+ int count;
+
+ count = xpc->match_nvt(xptr, format, nvt_blk, nvt_idx, cam_ignore,
+ priority, logic_serv, match);
+ if (count < 0) {
+ return count;
+ }
+
+ /*
+ * When we implement the save and restore of the thread interrupt
+ * contexts in the enter/exit CPU handlers of the machine and the
+ * escalations in QEMU, we should be able to handle non dispatched
+ * vCPUs.
+ *
+ * Until this is done, the sPAPR machine should find at least one
+ * matching context always.
+ */
+ if (count == 0) {
+ qemu_log_mask(LOG_GUEST_ERROR, "XIVE: NVT %x/%x is not dispatched\n",
+ nvt_blk, nvt_idx);
+ }
+
+ return count;
+}
+
+int spapr_get_vcpu_id(PowerPCCPU *cpu)
+{
+ return cpu->vcpu_id;
+}
+
+bool spapr_set_vcpu_id(PowerPCCPU *cpu, int cpu_index, Error **errp)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+ MachineState *ms = MACHINE(spapr);
+ int vcpu_id;
+
+ vcpu_id = spapr_vcpu_id(spapr, cpu_index);
+
+ if (kvm_enabled() && !kvm_vcpu_id_is_valid(vcpu_id)) {
+ error_setg(errp, "Can't create CPU with id %d in KVM", vcpu_id);
+ error_append_hint(errp, "Adjust the number of cpus to %d "
+ "or try to raise the number of threads per core\n",
+ vcpu_id * ms->smp.threads / spapr->vsmt);
+ return false;
+ }
+
+ cpu->vcpu_id = vcpu_id;
+ return true;
+}
+
+PowerPCCPU *spapr_find_cpu(int vcpu_id)
+{
+ CPUState *cs;
+
+ CPU_FOREACH(cs) {
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
+
+ if (spapr_get_vcpu_id(cpu) == vcpu_id) {
+ return cpu;
+ }
+ }
+
+ return NULL;
+}
+
+static void spapr_cpu_exec_enter(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu)
+{
+ SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
+
+ /* These are only called by TCG, KVM maintains dispatch state */
+
+ spapr_cpu->prod = false;
+ if (spapr_cpu->vpa_addr) {
+ CPUState *cs = CPU(cpu);
+ uint32_t dispatch;
+
+ dispatch = ldl_be_phys(cs->as,
+ spapr_cpu->vpa_addr + VPA_DISPATCH_COUNTER);
+ dispatch++;
+ if ((dispatch & 1) != 0) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "VPA: incorrect dispatch counter value for "
+ "dispatched partition %u, correcting.\n", dispatch);
+ dispatch++;
+ }
+ stl_be_phys(cs->as,
+ spapr_cpu->vpa_addr + VPA_DISPATCH_COUNTER, dispatch);
+ }
+}
+
+static void spapr_cpu_exec_exit(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu)
+{
+ SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
+
+ if (spapr_cpu->vpa_addr) {
+ CPUState *cs = CPU(cpu);
+ uint32_t dispatch;
+
+ dispatch = ldl_be_phys(cs->as,
+ spapr_cpu->vpa_addr + VPA_DISPATCH_COUNTER);
+ dispatch++;
+ if ((dispatch & 1) != 1) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "VPA: incorrect dispatch counter value for "
+ "preempted partition %u, correcting.\n", dispatch);
+ dispatch++;
+ }
+ stl_be_phys(cs->as,
+ spapr_cpu->vpa_addr + VPA_DISPATCH_COUNTER, dispatch);
+ }
+}
+
+static void spapr_machine_class_init(ObjectClass *oc, void *data)
+{
+ MachineClass *mc = MACHINE_CLASS(oc);
+ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(oc);
+ FWPathProviderClass *fwc = FW_PATH_PROVIDER_CLASS(oc);
+ NMIClass *nc = NMI_CLASS(oc);
+ HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
+ PPCVirtualHypervisorClass *vhc = PPC_VIRTUAL_HYPERVISOR_CLASS(oc);
+ XICSFabricClass *xic = XICS_FABRIC_CLASS(oc);
+ InterruptStatsProviderClass *ispc = INTERRUPT_STATS_PROVIDER_CLASS(oc);
+ XiveFabricClass *xfc = XIVE_FABRIC_CLASS(oc);
+ VofMachineIfClass *vmc = VOF_MACHINE_CLASS(oc);
+
+ mc->desc = "pSeries Logical Partition (PAPR compliant)";
+ mc->ignore_boot_device_suffixes = true;
+
+ /*
+ * We set up the default / latest behaviour here. The class_init
+ * functions for the specific versioned machine types can override
+ * these details for backwards compatibility
+ */
+ mc->init = spapr_machine_init;
+ mc->reset = spapr_machine_reset;
+ mc->block_default_type = IF_SCSI;
+
+ /*
+ * Setting max_cpus to INT32_MAX. Both KVM and TCG max_cpus values
+ * should be limited by the host capability instead of hardcoded.
+ * max_cpus for KVM guests will be checked in kvm_init(), and TCG
+ * guests are welcome to have as many CPUs as the host are capable
+ * of emulate.
+ */
+ mc->max_cpus = INT32_MAX;
+
+ mc->no_parallel = 1;
+ mc->default_boot_order = "";
+ mc->default_ram_size = 512 * MiB;
+ mc->default_ram_id = "ppc_spapr.ram";
+ mc->default_display = "std";
+ mc->kvm_type = spapr_kvm_type;
+ machine_class_allow_dynamic_sysbus_dev(mc, TYPE_SPAPR_PCI_HOST_BRIDGE);
+ mc->pci_allow_0_address = true;
+ assert(!mc->get_hotplug_handler);
+ mc->get_hotplug_handler = spapr_get_hotplug_handler;
+ hc->pre_plug = spapr_machine_device_pre_plug;
+ hc->plug = spapr_machine_device_plug;
+ mc->cpu_index_to_instance_props = spapr_cpu_index_to_props;
+ mc->get_default_cpu_node_id = spapr_get_default_cpu_node_id;
+ mc->possible_cpu_arch_ids = spapr_possible_cpu_arch_ids;
+ hc->unplug_request = spapr_machine_device_unplug_request;
+ hc->unplug = spapr_machine_device_unplug;
+
+ smc->dr_lmb_enabled = true;
+ smc->update_dt_enabled = true;
+ mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power9_v2.0");
+ mc->has_hotpluggable_cpus = true;
+ mc->nvdimm_supported = true;
+ smc->resize_hpt_default = SPAPR_RESIZE_HPT_ENABLED;
+ fwc->get_dev_path = spapr_get_fw_dev_path;
+ nc->nmi_monitor_handler = spapr_nmi;
+ smc->phb_placement = spapr_phb_placement;
+ vhc->hypercall = emulate_spapr_hypercall;
+ vhc->hpt_mask = spapr_hpt_mask;
+ vhc->map_hptes = spapr_map_hptes;
+ vhc->unmap_hptes = spapr_unmap_hptes;
+ vhc->hpte_set_c = spapr_hpte_set_c;
+ vhc->hpte_set_r = spapr_hpte_set_r;
+ vhc->get_pate = spapr_get_pate;
+ vhc->encode_hpt_for_kvm_pr = spapr_encode_hpt_for_kvm_pr;
+ vhc->cpu_exec_enter = spapr_cpu_exec_enter;
+ vhc->cpu_exec_exit = spapr_cpu_exec_exit;
+ xic->ics_get = spapr_ics_get;
+ xic->ics_resend = spapr_ics_resend;
+ xic->icp_get = spapr_icp_get;
+ ispc->print_info = spapr_pic_print_info;
+ /* Force NUMA node memory size to be a multiple of
+ * SPAPR_MEMORY_BLOCK_SIZE (256M) since that's the granularity
+ * in which LMBs are represented and hot-added
+ */
+ mc->numa_mem_align_shift = 28;
+ mc->auto_enable_numa = true;
+
+ smc->default_caps.caps[SPAPR_CAP_HTM] = SPAPR_CAP_OFF;
+ smc->default_caps.caps[SPAPR_CAP_VSX] = SPAPR_CAP_ON;
+ smc->default_caps.caps[SPAPR_CAP_DFP] = SPAPR_CAP_ON;
+ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND;
+ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND;
+ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_WORKAROUND;
+ smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 16; /* 64kiB */
+ smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
+ smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
+ smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_ON;
+ smc->default_caps.caps[SPAPR_CAP_FWNMI] = SPAPR_CAP_ON;
+ smc->default_caps.caps[SPAPR_CAP_RPT_INVALIDATE] = SPAPR_CAP_OFF;
+ spapr_caps_add_properties(smc);
+ smc->irq = &spapr_irq_dual;
+ smc->dr_phb_enabled = true;
+ smc->linux_pci_probe = true;
+ smc->smp_threads_vsmt = true;
+ smc->nr_xirqs = SPAPR_NR_XIRQS;
+ xfc->match_nvt = spapr_match_nvt;
+ vmc->client_architecture_support = spapr_vof_client_architecture_support;
+ vmc->quiesce = spapr_vof_quiesce;
+ vmc->setprop = spapr_vof_setprop;
+}
+
+static const TypeInfo spapr_machine_info = {
+ .name = TYPE_SPAPR_MACHINE,
+ .parent = TYPE_MACHINE,
+ .abstract = true,
+ .instance_size = sizeof(SpaprMachineState),
+ .instance_init = spapr_instance_init,
+ .instance_finalize = spapr_machine_finalizefn,
+ .class_size = sizeof(SpaprMachineClass),
+ .class_init = spapr_machine_class_init,
+ .interfaces = (InterfaceInfo[]) {
+ { TYPE_FW_PATH_PROVIDER },
+ { TYPE_NMI },
+ { TYPE_HOTPLUG_HANDLER },
+ { TYPE_PPC_VIRTUAL_HYPERVISOR },
+ { TYPE_XICS_FABRIC },
+ { TYPE_INTERRUPT_STATS_PROVIDER },
+ { TYPE_XIVE_FABRIC },
+ { TYPE_VOF_MACHINE_IF },
+ { }
+ },
+};
+
+static void spapr_machine_latest_class_options(MachineClass *mc)
+{
+ mc->alias = "pseries";
+ mc->is_default = true;
+}
+
+#define DEFINE_SPAPR_MACHINE(suffix, verstr, latest) \
+ static void spapr_machine_##suffix##_class_init(ObjectClass *oc, \
+ void *data) \
+ { \
+ MachineClass *mc = MACHINE_CLASS(oc); \
+ spapr_machine_##suffix##_class_options(mc); \
+ if (latest) { \
+ spapr_machine_latest_class_options(mc); \
+ } \
+ } \
+ static const TypeInfo spapr_machine_##suffix##_info = { \
+ .name = MACHINE_TYPE_NAME("pseries-" verstr), \
+ .parent = TYPE_SPAPR_MACHINE, \
+ .class_init = spapr_machine_##suffix##_class_init, \
+ }; \
+ static void spapr_machine_register_##suffix(void) \
+ { \
+ type_register(&spapr_machine_##suffix##_info); \
+ } \
+ type_init(spapr_machine_register_##suffix)
+
+/*
+ * pseries-6.2
+ */
+static void spapr_machine_6_2_class_options(MachineClass *mc)
+{
+ /* Defaults for the latest behaviour inherited from the base class */
+}
+
+DEFINE_SPAPR_MACHINE(6_2, "6.2", true);
+
+/*
+ * pseries-6.1
+ */
+static void spapr_machine_6_1_class_options(MachineClass *mc)
+{
+ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+
+ spapr_machine_6_2_class_options(mc);
+ compat_props_add(mc->compat_props, hw_compat_6_1, hw_compat_6_1_len);
+ smc->pre_6_2_numa_affinity = true;
+ mc->smp_props.prefer_sockets = true;
+}
+
+DEFINE_SPAPR_MACHINE(6_1, "6.1", false);
+
+/*
+ * pseries-6.0
+ */
+static void spapr_machine_6_0_class_options(MachineClass *mc)
+{
+ spapr_machine_6_1_class_options(mc);
+ compat_props_add(mc->compat_props, hw_compat_6_0, hw_compat_6_0_len);
+}
+
+DEFINE_SPAPR_MACHINE(6_0, "6.0", false);
+
+/*
+ * pseries-5.2
+ */
+static void spapr_machine_5_2_class_options(MachineClass *mc)
+{
+ spapr_machine_6_0_class_options(mc);
+ compat_props_add(mc->compat_props, hw_compat_5_2, hw_compat_5_2_len);
+}
+
+DEFINE_SPAPR_MACHINE(5_2, "5.2", false);
+
+/*
+ * pseries-5.1
+ */
+static void spapr_machine_5_1_class_options(MachineClass *mc)
+{
+ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+
+ spapr_machine_5_2_class_options(mc);
+ compat_props_add(mc->compat_props, hw_compat_5_1, hw_compat_5_1_len);
+ smc->pre_5_2_numa_associativity = true;
+}
+
+DEFINE_SPAPR_MACHINE(5_1, "5.1", false);
+
+/*
+ * pseries-5.0
+ */
+static void spapr_machine_5_0_class_options(MachineClass *mc)
+{
+ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+ static GlobalProperty compat[] = {
+ { TYPE_SPAPR_PCI_HOST_BRIDGE, "pre-5.1-associativity", "on" },
+ };
+
+ spapr_machine_5_1_class_options(mc);
+ compat_props_add(mc->compat_props, hw_compat_5_0, hw_compat_5_0_len);
+ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
+ mc->numa_mem_supported = true;
+ smc->pre_5_1_assoc_refpoints = true;
+}
+
+DEFINE_SPAPR_MACHINE(5_0, "5.0", false);
+
+/*
+ * pseries-4.2
+ */
+static void spapr_machine_4_2_class_options(MachineClass *mc)
+{
+ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+
+ spapr_machine_5_0_class_options(mc);
+ compat_props_add(mc->compat_props, hw_compat_4_2, hw_compat_4_2_len);
+ smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
+ smc->default_caps.caps[SPAPR_CAP_FWNMI] = SPAPR_CAP_OFF;
+ smc->rma_limit = 16 * GiB;
+ mc->nvdimm_supported = false;
+}
+
+DEFINE_SPAPR_MACHINE(4_2, "4.2", false);
+
+/*
+ * pseries-4.1
+ */
+static void spapr_machine_4_1_class_options(MachineClass *mc)
+{
+ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+ static GlobalProperty compat[] = {
+ /* Only allow 4kiB and 64kiB IOMMU pagesizes */
+ { TYPE_SPAPR_PCI_HOST_BRIDGE, "pgsz", "0x11000" },
+ };
+
+ spapr_machine_4_2_class_options(mc);
+ smc->linux_pci_probe = false;
+ smc->smp_threads_vsmt = false;
+ compat_props_add(mc->compat_props, hw_compat_4_1, hw_compat_4_1_len);
+ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
+}
+
+DEFINE_SPAPR_MACHINE(4_1, "4.1", false);
+
+/*
+ * pseries-4.0
+ */
+static bool phb_placement_4_0(SpaprMachineState *spapr, uint32_t index,
+ uint64_t *buid, hwaddr *pio,
+ hwaddr *mmio32, hwaddr *mmio64,
+ unsigned n_dma, uint32_t *liobns,
+ hwaddr *nv2gpa, hwaddr *nv2atsd, Error **errp)
+{
+ if (!spapr_phb_placement(spapr, index, buid, pio, mmio32, mmio64, n_dma,
+ liobns, nv2gpa, nv2atsd, errp)) {
+ return false;
+ }
+
+ *nv2gpa = 0;
+ *nv2atsd = 0;
+ return true;
+}
+static void spapr_machine_4_0_class_options(MachineClass *mc)
+{
+ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+
+ spapr_machine_4_1_class_options(mc);
+ compat_props_add(mc->compat_props, hw_compat_4_0, hw_compat_4_0_len);
+ smc->phb_placement = phb_placement_4_0;
+ smc->irq = &spapr_irq_xics;
+ smc->pre_4_1_migration = true;
+}
+
+DEFINE_SPAPR_MACHINE(4_0, "4.0", false);
+
+/*
+ * pseries-3.1
+ */
+static void spapr_machine_3_1_class_options(MachineClass *mc)
+{
+ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+
+ spapr_machine_4_0_class_options(mc);
+ compat_props_add(mc->compat_props, hw_compat_3_1, hw_compat_3_1_len);
+
+ mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power8_v2.0");
+ smc->update_dt_enabled = false;
+ smc->dr_phb_enabled = false;
+ smc->broken_host_serial_model = true;
+ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_BROKEN;
+ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_BROKEN;
+ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_BROKEN;
+ smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_OFF;
+}
+
+DEFINE_SPAPR_MACHINE(3_1, "3.1", false);
+
+/*
+ * pseries-3.0
+ */
+
+static void spapr_machine_3_0_class_options(MachineClass *mc)
+{
+ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+
+ spapr_machine_3_1_class_options(mc);
+ compat_props_add(mc->compat_props, hw_compat_3_0, hw_compat_3_0_len);
+
+ smc->legacy_irq_allocation = true;
+ smc->nr_xirqs = 0x400;
+ smc->irq = &spapr_irq_xics_legacy;
+}
+
+DEFINE_SPAPR_MACHINE(3_0, "3.0", false);
+
+/*
+ * pseries-2.12
+ */
+static void spapr_machine_2_12_class_options(MachineClass *mc)
+{
+ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+ static GlobalProperty compat[] = {
+ { TYPE_POWERPC_CPU, "pre-3.0-migration", "on" },
+ { TYPE_SPAPR_CPU_CORE, "pre-3.0-migration", "on" },
+ };
+
+ spapr_machine_3_0_class_options(mc);
+ compat_props_add(mc->compat_props, hw_compat_2_12, hw_compat_2_12_len);
+ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
+
+ /* We depend on kvm_enabled() to choose a default value for the
+ * hpt-max-page-size capability. Of course we can't do it here
+ * because this is too early and the HW accelerator isn't initialzed
+ * yet. Postpone this to machine init (see default_caps_with_cpu()).
+ */
+ smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 0;
+}
+
+DEFINE_SPAPR_MACHINE(2_12, "2.12", false);
+
+static void spapr_machine_2_12_sxxm_class_options(MachineClass *mc)
+{
+ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+
+ spapr_machine_2_12_class_options(mc);
+ smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND;
+ smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND;
+ smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_FIXED_CCD;
+}
+
+DEFINE_SPAPR_MACHINE(2_12_sxxm, "2.12-sxxm", false);
+
+/*
+ * pseries-2.11
+ */
+
+static void spapr_machine_2_11_class_options(MachineClass *mc)
+{
+ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+
+ spapr_machine_2_12_class_options(mc);
+ smc->default_caps.caps[SPAPR_CAP_HTM] = SPAPR_CAP_ON;
+ compat_props_add(mc->compat_props, hw_compat_2_11, hw_compat_2_11_len);
+}
+
+DEFINE_SPAPR_MACHINE(2_11, "2.11", false);
+
+/*
+ * pseries-2.10
+ */
+
+static void spapr_machine_2_10_class_options(MachineClass *mc)
+{
+ spapr_machine_2_11_class_options(mc);
+ compat_props_add(mc->compat_props, hw_compat_2_10, hw_compat_2_10_len);
+}
+
+DEFINE_SPAPR_MACHINE(2_10, "2.10", false);
+
+/*
+ * pseries-2.9
+ */
+
+static void spapr_machine_2_9_class_options(MachineClass *mc)
+{
+ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+ static GlobalProperty compat[] = {
+ { TYPE_POWERPC_CPU, "pre-2.10-migration", "on" },
+ };
+
+ spapr_machine_2_10_class_options(mc);
+ compat_props_add(mc->compat_props, hw_compat_2_9, hw_compat_2_9_len);
+ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
+ smc->pre_2_10_has_unused_icps = true;
+ smc->resize_hpt_default = SPAPR_RESIZE_HPT_DISABLED;
+}
+
+DEFINE_SPAPR_MACHINE(2_9, "2.9", false);
+
+/*
+ * pseries-2.8
+ */
+
+static void spapr_machine_2_8_class_options(MachineClass *mc)
+{
+ static GlobalProperty compat[] = {
+ { TYPE_SPAPR_PCI_HOST_BRIDGE, "pcie-extended-configuration-space", "off" },
+ };
+
+ spapr_machine_2_9_class_options(mc);
+ compat_props_add(mc->compat_props, hw_compat_2_8, hw_compat_2_8_len);
+ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
+ mc->numa_mem_align_shift = 23;
+}
+
+DEFINE_SPAPR_MACHINE(2_8, "2.8", false);
+
+/*
+ * pseries-2.7
+ */
+
+static bool phb_placement_2_7(SpaprMachineState *spapr, uint32_t index,
+ uint64_t *buid, hwaddr *pio,
+ hwaddr *mmio32, hwaddr *mmio64,
+ unsigned n_dma, uint32_t *liobns,
+ hwaddr *nv2gpa, hwaddr *nv2atsd, Error **errp)
+{
+ /* Legacy PHB placement for pseries-2.7 and earlier machine types */
+ const uint64_t base_buid = 0x800000020000000ULL;
+ const hwaddr phb_spacing = 0x1000000000ULL; /* 64 GiB */
+ const hwaddr mmio_offset = 0xa0000000; /* 2 GiB + 512 MiB */
+ const hwaddr pio_offset = 0x80000000; /* 2 GiB */
+ const uint32_t max_index = 255;
+ const hwaddr phb0_alignment = 0x10000000000ULL; /* 1 TiB */
+
+ uint64_t ram_top = MACHINE(spapr)->ram_size;
+ hwaddr phb0_base, phb_base;
+ int i;
+
+ /* Do we have device memory? */
+ if (MACHINE(spapr)->maxram_size > ram_top) {
+ /* Can't just use maxram_size, because there may be an
+ * alignment gap between normal and device memory regions
+ */
+ ram_top = MACHINE(spapr)->device_memory->base +
+ memory_region_size(&MACHINE(spapr)->device_memory->mr);
+ }
+
+ phb0_base = QEMU_ALIGN_UP(ram_top, phb0_alignment);
+
+ if (index > max_index) {
+ error_setg(errp, "\"index\" for PAPR PHB is too large (max %u)",
+ max_index);
+ return false;
+ }
+
+ *buid = base_buid + index;
+ for (i = 0; i < n_dma; ++i) {
+ liobns[i] = SPAPR_PCI_LIOBN(index, i);
+ }
+
+ phb_base = phb0_base + index * phb_spacing;
+ *pio = phb_base + pio_offset;
+ *mmio32 = phb_base + mmio_offset;
+ /*
+ * We don't set the 64-bit MMIO window, relying on the PHB's
+ * fallback behaviour of automatically splitting a large "32-bit"
+ * window into contiguous 32-bit and 64-bit windows
+ */
+
+ *nv2gpa = 0;
+ *nv2atsd = 0;
+ return true;
+}
+
+static void spapr_machine_2_7_class_options(MachineClass *mc)
+{
+ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+ static GlobalProperty compat[] = {
+ { TYPE_SPAPR_PCI_HOST_BRIDGE, "mem_win_size", "0xf80000000", },
+ { TYPE_SPAPR_PCI_HOST_BRIDGE, "mem64_win_size", "0", },
+ { TYPE_POWERPC_CPU, "pre-2.8-migration", "on", },
+ { TYPE_SPAPR_PCI_HOST_BRIDGE, "pre-2.8-migration", "on", },
+ };
+
+ spapr_machine_2_8_class_options(mc);
+ mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power7_v2.3");
+ mc->default_machine_opts = "modern-hotplug-events=off";
+ compat_props_add(mc->compat_props, hw_compat_2_7, hw_compat_2_7_len);
+ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
+ smc->phb_placement = phb_placement_2_7;
+}
+
+DEFINE_SPAPR_MACHINE(2_7, "2.7", false);
+
+/*
+ * pseries-2.6
+ */
+
+static void spapr_machine_2_6_class_options(MachineClass *mc)
+{
+ static GlobalProperty compat[] = {
+ { TYPE_SPAPR_PCI_HOST_BRIDGE, "ddw", "off" },
+ };
+
+ spapr_machine_2_7_class_options(mc);
+ mc->has_hotpluggable_cpus = false;
+ compat_props_add(mc->compat_props, hw_compat_2_6, hw_compat_2_6_len);
+ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
+}
+
+DEFINE_SPAPR_MACHINE(2_6, "2.6", false);
+
+/*
+ * pseries-2.5
+ */
+
+static void spapr_machine_2_5_class_options(MachineClass *mc)
+{
+ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+ static GlobalProperty compat[] = {
+ { "spapr-vlan", "use-rx-buffer-pools", "off" },
+ };
+
+ spapr_machine_2_6_class_options(mc);
+ smc->use_ohci_by_default = true;
+ compat_props_add(mc->compat_props, hw_compat_2_5, hw_compat_2_5_len);
+ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
+}
+
+DEFINE_SPAPR_MACHINE(2_5, "2.5", false);
+
+/*
+ * pseries-2.4
+ */
+
+static void spapr_machine_2_4_class_options(MachineClass *mc)
+{
+ SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+
+ spapr_machine_2_5_class_options(mc);
+ smc->dr_lmb_enabled = false;
+ compat_props_add(mc->compat_props, hw_compat_2_4, hw_compat_2_4_len);
+}
+
+DEFINE_SPAPR_MACHINE(2_4, "2.4", false);
+
+/*
+ * pseries-2.3
+ */
+
+static void spapr_machine_2_3_class_options(MachineClass *mc)
+{
+ static GlobalProperty compat[] = {
+ { "spapr-pci-host-bridge", "dynamic-reconfiguration", "off" },
+ };
+ spapr_machine_2_4_class_options(mc);
+ compat_props_add(mc->compat_props, hw_compat_2_3, hw_compat_2_3_len);
+ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
+}
+DEFINE_SPAPR_MACHINE(2_3, "2.3", false);
+
+/*
+ * pseries-2.2
+ */
+
+static void spapr_machine_2_2_class_options(MachineClass *mc)
+{
+ static GlobalProperty compat[] = {
+ { TYPE_SPAPR_PCI_HOST_BRIDGE, "mem_win_size", "0x20000000" },
+ };
+
+ spapr_machine_2_3_class_options(mc);
+ compat_props_add(mc->compat_props, hw_compat_2_2, hw_compat_2_2_len);
+ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
+ mc->default_machine_opts = "modern-hotplug-events=off,suppress-vmdesc=on";
+}
+DEFINE_SPAPR_MACHINE(2_2, "2.2", false);
+
+/*
+ * pseries-2.1
+ */
+
+static void spapr_machine_2_1_class_options(MachineClass *mc)
+{
+ spapr_machine_2_2_class_options(mc);
+ compat_props_add(mc->compat_props, hw_compat_2_1, hw_compat_2_1_len);
+}
+DEFINE_SPAPR_MACHINE(2_1, "2.1", false);
+
+static void spapr_machine_register_types(void)
+{
+ type_register_static(&spapr_machine_info);
+}
+
+type_init(spapr_machine_register_types)
diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
new file mode 100644
index 000000000..ed7c077a0
--- /dev/null
+++ b/hw/ppc/spapr_caps.c
@@ -0,0 +1,944 @@
+/*
+ * QEMU PowerPC pSeries Logical Partition capabilities handling
+ *
+ * Copyright (c) 2017 David Gibson, Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "qapi/visitor.h"
+#include "sysemu/hw_accel.h"
+#include "exec/ram_addr.h"
+#include "target/ppc/cpu.h"
+#include "target/ppc/mmu-hash64.h"
+#include "cpu-models.h"
+#include "kvm_ppc.h"
+#include "migration/vmstate.h"
+#include "sysemu/tcg.h"
+
+#include "hw/ppc/spapr.h"
+
+typedef struct SpaprCapPossible {
+ int num; /* size of vals array below */
+ const char *help; /* help text for vals */
+ /*
+ * Note:
+ * - because of the way compatibility is determined vals MUST be ordered
+ * such that later options are a superset of all preceding options.
+ * - the order of vals must be preserved, that is their index is important,
+ * however vals may be added to the end of the list so long as the above
+ * point is observed
+ */
+ const char *vals[];
+} SpaprCapPossible;
+
+typedef struct SpaprCapabilityInfo {
+ const char *name;
+ const char *description;
+ int index;
+
+ /* Getter and Setter Function Pointers */
+ ObjectPropertyAccessor *get;
+ ObjectPropertyAccessor *set;
+ const char *type;
+ /* Possible values if this is a custom string type */
+ SpaprCapPossible *possible;
+ /* Make sure the virtual hardware can support this capability */
+ void (*apply)(SpaprMachineState *spapr, uint8_t val, Error **errp);
+ void (*cpu_apply)(SpaprMachineState *spapr, PowerPCCPU *cpu,
+ uint8_t val, Error **errp);
+ bool (*migrate_needed)(void *opaque);
+} SpaprCapabilityInfo;
+
+static void spapr_cap_get_bool(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ SpaprCapabilityInfo *cap = opaque;
+ SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+ bool value = spapr_get_cap(spapr, cap->index) == SPAPR_CAP_ON;
+
+ visit_type_bool(v, name, &value, errp);
+}
+
+static void spapr_cap_set_bool(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ SpaprCapabilityInfo *cap = opaque;
+ SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+ bool value;
+
+ if (!visit_type_bool(v, name, &value, errp)) {
+ return;
+ }
+
+ spapr->cmd_line_caps[cap->index] = true;
+ spapr->eff.caps[cap->index] = value ? SPAPR_CAP_ON : SPAPR_CAP_OFF;
+}
+
+
+static void spapr_cap_get_string(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ SpaprCapabilityInfo *cap = opaque;
+ SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+ char *val = NULL;
+ uint8_t value = spapr_get_cap(spapr, cap->index);
+
+ if (value >= cap->possible->num) {
+ error_setg(errp, "Invalid value (%d) for cap-%s", value, cap->name);
+ return;
+ }
+
+ val = g_strdup(cap->possible->vals[value]);
+
+ visit_type_str(v, name, &val, errp);
+ g_free(val);
+}
+
+static void spapr_cap_set_string(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ SpaprCapabilityInfo *cap = opaque;
+ SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+ uint8_t i;
+ char *val;
+
+ if (!visit_type_str(v, name, &val, errp)) {
+ return;
+ }
+
+ if (!strcmp(val, "?")) {
+ error_setg(errp, "%s", cap->possible->help);
+ goto out;
+ }
+ for (i = 0; i < cap->possible->num; i++) {
+ if (!strcasecmp(val, cap->possible->vals[i])) {
+ spapr->cmd_line_caps[cap->index] = true;
+ spapr->eff.caps[cap->index] = i;
+ goto out;
+ }
+ }
+
+ error_setg(errp, "Invalid capability mode \"%s\" for cap-%s", val,
+ cap->name);
+out:
+ g_free(val);
+}
+
+static void spapr_cap_get_pagesize(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ SpaprCapabilityInfo *cap = opaque;
+ SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+ uint8_t val = spapr_get_cap(spapr, cap->index);
+ uint64_t pagesize = (1ULL << val);
+
+ visit_type_size(v, name, &pagesize, errp);
+}
+
+static void spapr_cap_set_pagesize(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ SpaprCapabilityInfo *cap = opaque;
+ SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+ uint64_t pagesize;
+ uint8_t val;
+
+ if (!visit_type_size(v, name, &pagesize, errp)) {
+ return;
+ }
+
+ if (!is_power_of_2(pagesize)) {
+ error_setg(errp, "cap-%s must be a power of 2", cap->name);
+ return;
+ }
+
+ val = ctz64(pagesize);
+ spapr->cmd_line_caps[cap->index] = true;
+ spapr->eff.caps[cap->index] = val;
+}
+
+static void cap_htm_apply(SpaprMachineState *spapr, uint8_t val, Error **errp)
+{
+ ERRP_GUARD();
+ if (!val) {
+ /* TODO: We don't support disabling htm yet */
+ return;
+ }
+ if (tcg_enabled()) {
+ error_setg(errp, "No Transactional Memory support in TCG");
+ error_append_hint(errp, "Try appending -machine cap-htm=off\n");
+ } else if (kvm_enabled() && !kvmppc_has_cap_htm()) {
+ error_setg(errp,
+ "KVM implementation does not support Transactional Memory");
+ error_append_hint(errp, "Try appending -machine cap-htm=off\n");
+ }
+}
+
+static void cap_vsx_apply(SpaprMachineState *spapr, uint8_t val, Error **errp)
+{
+ ERRP_GUARD();
+ PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
+ CPUPPCState *env = &cpu->env;
+
+ if (!val) {
+ /* TODO: We don't support disabling vsx yet */
+ return;
+ }
+ /* Allowable CPUs in spapr_cpu_core.c should already have gotten
+ * rid of anything that doesn't do VMX */
+ g_assert(env->insns_flags & PPC_ALTIVEC);
+ if (!(env->insns_flags2 & PPC2_VSX)) {
+ error_setg(errp, "VSX support not available");
+ error_append_hint(errp, "Try appending -machine cap-vsx=off\n");
+ }
+}
+
+static void cap_dfp_apply(SpaprMachineState *spapr, uint8_t val, Error **errp)
+{
+ ERRP_GUARD();
+ PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
+ CPUPPCState *env = &cpu->env;
+
+ if (!val) {
+ /* TODO: We don't support disabling dfp yet */
+ return;
+ }
+ if (!(env->insns_flags2 & PPC2_DFP)) {
+ error_setg(errp, "DFP support not available");
+ error_append_hint(errp, "Try appending -machine cap-dfp=off\n");
+ }
+}
+
+SpaprCapPossible cap_cfpc_possible = {
+ .num = 3,
+ .vals = {"broken", "workaround", "fixed"},
+ .help = "broken - no protection, workaround - workaround available,"
+ " fixed - fixed in hardware",
+};
+
+static void cap_safe_cache_apply(SpaprMachineState *spapr, uint8_t val,
+ Error **errp)
+{
+ ERRP_GUARD();
+ uint8_t kvm_val = kvmppc_get_cap_safe_cache();
+
+ if (tcg_enabled() && val) {
+ /* TCG only supports broken, allow other values and print a warning */
+ warn_report("TCG doesn't support requested feature, cap-cfpc=%s",
+ cap_cfpc_possible.vals[val]);
+ } else if (kvm_enabled() && (val > kvm_val)) {
+ error_setg(errp,
+ "Requested safe cache capability level not supported by KVM");
+ error_append_hint(errp, "Try appending -machine cap-cfpc=%s\n",
+ cap_cfpc_possible.vals[kvm_val]);
+ }
+}
+
+SpaprCapPossible cap_sbbc_possible = {
+ .num = 3,
+ .vals = {"broken", "workaround", "fixed"},
+ .help = "broken - no protection, workaround - workaround available,"
+ " fixed - fixed in hardware",
+};
+
+static void cap_safe_bounds_check_apply(SpaprMachineState *spapr, uint8_t val,
+ Error **errp)
+{
+ ERRP_GUARD();
+ uint8_t kvm_val = kvmppc_get_cap_safe_bounds_check();
+
+ if (tcg_enabled() && val) {
+ /* TCG only supports broken, allow other values and print a warning */
+ warn_report("TCG doesn't support requested feature, cap-sbbc=%s",
+ cap_sbbc_possible.vals[val]);
+ } else if (kvm_enabled() && (val > kvm_val)) {
+ error_setg(errp,
+"Requested safe bounds check capability level not supported by KVM");
+ error_append_hint(errp, "Try appending -machine cap-sbbc=%s\n",
+ cap_sbbc_possible.vals[kvm_val]);
+ }
+}
+
+SpaprCapPossible cap_ibs_possible = {
+ .num = 5,
+ /* Note workaround only maintained for compatibility */
+ .vals = {"broken", "workaround", "fixed-ibs", "fixed-ccd", "fixed-na"},
+ .help = "broken - no protection, workaround - count cache flush"
+ ", fixed-ibs - indirect branch serialisation,"
+ " fixed-ccd - cache count disabled,"
+ " fixed-na - fixed in hardware (no longer applicable)",
+};
+
+static void cap_safe_indirect_branch_apply(SpaprMachineState *spapr,
+ uint8_t val, Error **errp)
+{
+ ERRP_GUARD();
+ uint8_t kvm_val = kvmppc_get_cap_safe_indirect_branch();
+
+ if (tcg_enabled() && val) {
+ /* TCG only supports broken, allow other values and print a warning */
+ warn_report("TCG doesn't support requested feature, cap-ibs=%s",
+ cap_ibs_possible.vals[val]);
+ } else if (kvm_enabled() && (val > kvm_val)) {
+ error_setg(errp,
+"Requested safe indirect branch capability level not supported by KVM");
+ error_append_hint(errp, "Try appending -machine cap-ibs=%s\n",
+ cap_ibs_possible.vals[kvm_val]);
+ }
+}
+
+#define VALUE_DESC_TRISTATE " (broken, workaround, fixed)"
+
+bool spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize,
+ Error **errp)
+{
+ hwaddr maxpagesize = (1ULL << spapr->eff.caps[SPAPR_CAP_HPT_MAXPAGESIZE]);
+
+ if (!kvmppc_hpt_needs_host_contiguous_pages()) {
+ return true;
+ }
+
+ if (maxpagesize > pagesize) {
+ error_setg(errp,
+ "Can't support %"HWADDR_PRIu" kiB guest pages with %"
+ HWADDR_PRIu" kiB host pages with this KVM implementation",
+ maxpagesize >> 10, pagesize >> 10);
+ return false;
+ }
+
+ return true;
+}
+
+static void cap_hpt_maxpagesize_apply(SpaprMachineState *spapr,
+ uint8_t val, Error **errp)
+{
+ if (val < 12) {
+ error_setg(errp, "Require at least 4kiB hpt-max-page-size");
+ return;
+ } else if (val < 16) {
+ warn_report("Many guests require at least 64kiB hpt-max-page-size");
+ }
+
+ spapr_check_pagesize(spapr, qemu_minrampagesize(), errp);
+}
+
+static bool cap_hpt_maxpagesize_migrate_needed(void *opaque)
+{
+ return !SPAPR_MACHINE_GET_CLASS(opaque)->pre_4_1_migration;
+}
+
+static bool spapr_pagesize_cb(void *opaque, uint32_t seg_pshift,
+ uint32_t pshift)
+{
+ unsigned maxshift = *((unsigned *)opaque);
+
+ assert(pshift >= seg_pshift);
+
+ /* Don't allow the guest to use pages bigger than the configured
+ * maximum size */
+ if (pshift > maxshift) {
+ return false;
+ }
+
+ /* For whatever reason, KVM doesn't allow multiple pagesizes
+ * within a segment, *except* for the case of 16M pages in a 4k or
+ * 64k segment. Always exclude other cases, so that TCG and KVM
+ * guests see a consistent environment */
+ if ((pshift != seg_pshift) && (pshift != 24)) {
+ return false;
+ }
+
+ return true;
+}
+
+static void ppc_hash64_filter_pagesizes(PowerPCCPU *cpu,
+ bool (*cb)(void *, uint32_t, uint32_t),
+ void *opaque)
+{
+ PPCHash64Options *opts = cpu->hash64_opts;
+ int i;
+ int n = 0;
+ bool ci_largepage = false;
+
+ assert(opts);
+
+ n = 0;
+ for (i = 0; i < ARRAY_SIZE(opts->sps); i++) {
+ PPCHash64SegmentPageSizes *sps = &opts->sps[i];
+ int j;
+ int m = 0;
+
+ assert(n <= i);
+
+ if (!sps->page_shift) {
+ break;
+ }
+
+ for (j = 0; j < ARRAY_SIZE(sps->enc); j++) {
+ PPCHash64PageSize *ps = &sps->enc[j];
+
+ assert(m <= j);
+ if (!ps->page_shift) {
+ break;
+ }
+
+ if (cb(opaque, sps->page_shift, ps->page_shift)) {
+ if (ps->page_shift >= 16) {
+ ci_largepage = true;
+ }
+ sps->enc[m++] = *ps;
+ }
+ }
+
+ /* Clear rest of the row */
+ for (j = m; j < ARRAY_SIZE(sps->enc); j++) {
+ memset(&sps->enc[j], 0, sizeof(sps->enc[j]));
+ }
+
+ if (m) {
+ n++;
+ }
+ }
+
+ /* Clear the rest of the table */
+ for (i = n; i < ARRAY_SIZE(opts->sps); i++) {
+ memset(&opts->sps[i], 0, sizeof(opts->sps[i]));
+ }
+
+ if (!ci_largepage) {
+ opts->flags &= ~PPC_HASH64_CI_LARGEPAGE;
+ }
+}
+
+static void cap_hpt_maxpagesize_cpu_apply(SpaprMachineState *spapr,
+ PowerPCCPU *cpu,
+ uint8_t val, Error **errp)
+{
+ unsigned maxshift = val;
+
+ ppc_hash64_filter_pagesizes(cpu, spapr_pagesize_cb, &maxshift);
+}
+
+static void cap_nested_kvm_hv_apply(SpaprMachineState *spapr,
+ uint8_t val, Error **errp)
+{
+ ERRP_GUARD();
+ PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
+
+ if (!val) {
+ /* capability disabled by default */
+ return;
+ }
+
+ if (tcg_enabled()) {
+ error_setg(errp, "No Nested KVM-HV support in TCG");
+ error_append_hint(errp, "Try appending -machine cap-nested-hv=off\n");
+ } else if (kvm_enabled()) {
+ if (!ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0,
+ spapr->max_compat_pvr)) {
+ error_setg(errp, "Nested KVM-HV only supported on POWER9");
+ error_append_hint(errp,
+ "Try appending -machine max-cpu-compat=power9\n");
+ return;
+ }
+
+ if (!kvmppc_has_cap_nested_kvm_hv()) {
+ error_setg(errp,
+ "KVM implementation does not support Nested KVM-HV");
+ error_append_hint(errp,
+ "Try appending -machine cap-nested-hv=off\n");
+ } else if (kvmppc_set_cap_nested_kvm_hv(val) < 0) {
+ error_setg(errp, "Error enabling cap-nested-hv with KVM");
+ error_append_hint(errp,
+ "Try appending -machine cap-nested-hv=off\n");
+ }
+ }
+}
+
+static void cap_large_decr_apply(SpaprMachineState *spapr,
+ uint8_t val, Error **errp)
+{
+ ERRP_GUARD();
+ PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
+ PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
+
+ if (!val) {
+ return; /* Disabled by default */
+ }
+
+ if (tcg_enabled()) {
+ if (!ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0,
+ spapr->max_compat_pvr)) {
+ error_setg(errp, "Large decrementer only supported on POWER9");
+ error_append_hint(errp, "Try -cpu POWER9\n");
+ return;
+ }
+ } else if (kvm_enabled()) {
+ int kvm_nr_bits = kvmppc_get_cap_large_decr();
+
+ if (!kvm_nr_bits) {
+ error_setg(errp, "No large decrementer support");
+ error_append_hint(errp,
+ "Try appending -machine cap-large-decr=off\n");
+ } else if (pcc->lrg_decr_bits != kvm_nr_bits) {
+ error_setg(errp,
+ "KVM large decrementer size (%d) differs to model (%d)",
+ kvm_nr_bits, pcc->lrg_decr_bits);
+ error_append_hint(errp,
+ "Try appending -machine cap-large-decr=off\n");
+ }
+ }
+}
+
+static void cap_large_decr_cpu_apply(SpaprMachineState *spapr,
+ PowerPCCPU *cpu,
+ uint8_t val, Error **errp)
+{
+ ERRP_GUARD();
+ CPUPPCState *env = &cpu->env;
+ target_ulong lpcr = env->spr[SPR_LPCR];
+
+ if (kvm_enabled()) {
+ if (kvmppc_enable_cap_large_decr(cpu, val)) {
+ error_setg(errp, "No large decrementer support");
+ error_append_hint(errp,
+ "Try appending -machine cap-large-decr=off\n");
+ }
+ }
+
+ if (val) {
+ lpcr |= LPCR_LD;
+ } else {
+ lpcr &= ~LPCR_LD;
+ }
+ ppc_store_lpcr(cpu, lpcr);
+}
+
+static void cap_ccf_assist_apply(SpaprMachineState *spapr, uint8_t val,
+ Error **errp)
+{
+ ERRP_GUARD();
+ uint8_t kvm_val = kvmppc_get_cap_count_cache_flush_assist();
+
+ if (tcg_enabled() && val) {
+ /* TCG doesn't implement anything here, but allow with a warning */
+ warn_report("TCG doesn't support requested feature, cap-ccf-assist=on");
+ } else if (kvm_enabled() && (val > kvm_val)) {
+ uint8_t kvm_ibs = kvmppc_get_cap_safe_indirect_branch();
+
+ if (kvm_ibs == SPAPR_CAP_FIXED_CCD) {
+ /*
+ * If we don't have CCF assist on the host, the assist
+ * instruction is a harmless no-op. It won't correctly
+ * implement the cache count flush *but* if we have
+ * count-cache-disabled in the host, that flush is
+ * unnnecessary. So, specifically allow this case. This
+ * allows us to have better performance on POWER9 DD2.3,
+ * while still working on POWER9 DD2.2 and POWER8 host
+ * cpus.
+ */
+ return;
+ }
+ error_setg(errp,
+ "Requested count cache flush assist capability level not supported by KVM");
+ error_append_hint(errp, "Try appending -machine cap-ccf-assist=off\n");
+ }
+}
+
+static void cap_fwnmi_apply(SpaprMachineState *spapr, uint8_t val,
+ Error **errp)
+{
+ ERRP_GUARD();
+ if (!val) {
+ return; /* Disabled by default */
+ }
+
+ if (kvm_enabled()) {
+ if (!kvmppc_get_fwnmi()) {
+ error_setg(errp,
+"Firmware Assisted Non-Maskable Interrupts(FWNMI) not supported by KVM.");
+ error_append_hint(errp, "Try appending -machine cap-fwnmi=off\n");
+ }
+ }
+}
+
+static void cap_rpt_invalidate_apply(SpaprMachineState *spapr,
+ uint8_t val, Error **errp)
+{
+ ERRP_GUARD();
+
+ if (!val) {
+ /* capability disabled by default */
+ return;
+ }
+
+ if (tcg_enabled()) {
+ error_setg(errp, "No H_RPT_INVALIDATE support in TCG");
+ error_append_hint(errp,
+ "Try appending -machine cap-rpt-invalidate=off\n");
+ } else if (kvm_enabled()) {
+ if (!kvmppc_has_cap_mmu_radix()) {
+ error_setg(errp, "H_RPT_INVALIDATE only supported on Radix");
+ return;
+ }
+
+ if (!kvmppc_has_cap_rpt_invalidate()) {
+ error_setg(errp,
+ "KVM implementation does not support H_RPT_INVALIDATE");
+ error_append_hint(errp,
+ "Try appending -machine cap-rpt-invalidate=off\n");
+ } else {
+ kvmppc_enable_h_rpt_invalidate();
+ }
+ }
+}
+
+SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
+ [SPAPR_CAP_HTM] = {
+ .name = "htm",
+ .description = "Allow Hardware Transactional Memory (HTM)",
+ .index = SPAPR_CAP_HTM,
+ .get = spapr_cap_get_bool,
+ .set = spapr_cap_set_bool,
+ .type = "bool",
+ .apply = cap_htm_apply,
+ },
+ [SPAPR_CAP_VSX] = {
+ .name = "vsx",
+ .description = "Allow Vector Scalar Extensions (VSX)",
+ .index = SPAPR_CAP_VSX,
+ .get = spapr_cap_get_bool,
+ .set = spapr_cap_set_bool,
+ .type = "bool",
+ .apply = cap_vsx_apply,
+ },
+ [SPAPR_CAP_DFP] = {
+ .name = "dfp",
+ .description = "Allow Decimal Floating Point (DFP)",
+ .index = SPAPR_CAP_DFP,
+ .get = spapr_cap_get_bool,
+ .set = spapr_cap_set_bool,
+ .type = "bool",
+ .apply = cap_dfp_apply,
+ },
+ [SPAPR_CAP_CFPC] = {
+ .name = "cfpc",
+ .description = "Cache Flush on Privilege Change" VALUE_DESC_TRISTATE,
+ .index = SPAPR_CAP_CFPC,
+ .get = spapr_cap_get_string,
+ .set = spapr_cap_set_string,
+ .type = "string",
+ .possible = &cap_cfpc_possible,
+ .apply = cap_safe_cache_apply,
+ },
+ [SPAPR_CAP_SBBC] = {
+ .name = "sbbc",
+ .description = "Speculation Barrier Bounds Checking" VALUE_DESC_TRISTATE,
+ .index = SPAPR_CAP_SBBC,
+ .get = spapr_cap_get_string,
+ .set = spapr_cap_set_string,
+ .type = "string",
+ .possible = &cap_sbbc_possible,
+ .apply = cap_safe_bounds_check_apply,
+ },
+ [SPAPR_CAP_IBS] = {
+ .name = "ibs",
+ .description =
+ "Indirect Branch Speculation (broken, workaround, fixed-ibs,"
+ "fixed-ccd, fixed-na)",
+ .index = SPAPR_CAP_IBS,
+ .get = spapr_cap_get_string,
+ .set = spapr_cap_set_string,
+ .type = "string",
+ .possible = &cap_ibs_possible,
+ .apply = cap_safe_indirect_branch_apply,
+ },
+ [SPAPR_CAP_HPT_MAXPAGESIZE] = {
+ .name = "hpt-max-page-size",
+ .description = "Maximum page size for Hash Page Table guests",
+ .index = SPAPR_CAP_HPT_MAXPAGESIZE,
+ .get = spapr_cap_get_pagesize,
+ .set = spapr_cap_set_pagesize,
+ .type = "int",
+ .apply = cap_hpt_maxpagesize_apply,
+ .cpu_apply = cap_hpt_maxpagesize_cpu_apply,
+ .migrate_needed = cap_hpt_maxpagesize_migrate_needed,
+ },
+ [SPAPR_CAP_NESTED_KVM_HV] = {
+ .name = "nested-hv",
+ .description = "Allow Nested KVM-HV",
+ .index = SPAPR_CAP_NESTED_KVM_HV,
+ .get = spapr_cap_get_bool,
+ .set = spapr_cap_set_bool,
+ .type = "bool",
+ .apply = cap_nested_kvm_hv_apply,
+ },
+ [SPAPR_CAP_LARGE_DECREMENTER] = {
+ .name = "large-decr",
+ .description = "Allow Large Decrementer",
+ .index = SPAPR_CAP_LARGE_DECREMENTER,
+ .get = spapr_cap_get_bool,
+ .set = spapr_cap_set_bool,
+ .type = "bool",
+ .apply = cap_large_decr_apply,
+ .cpu_apply = cap_large_decr_cpu_apply,
+ },
+ [SPAPR_CAP_CCF_ASSIST] = {
+ .name = "ccf-assist",
+ .description = "Count Cache Flush Assist via HW Instruction",
+ .index = SPAPR_CAP_CCF_ASSIST,
+ .get = spapr_cap_get_bool,
+ .set = spapr_cap_set_bool,
+ .type = "bool",
+ .apply = cap_ccf_assist_apply,
+ },
+ [SPAPR_CAP_FWNMI] = {
+ .name = "fwnmi",
+ .description = "Implements PAPR FWNMI option",
+ .index = SPAPR_CAP_FWNMI,
+ .get = spapr_cap_get_bool,
+ .set = spapr_cap_set_bool,
+ .type = "bool",
+ .apply = cap_fwnmi_apply,
+ },
+ [SPAPR_CAP_RPT_INVALIDATE] = {
+ .name = "rpt-invalidate",
+ .description = "Allow H_RPT_INVALIDATE",
+ .index = SPAPR_CAP_RPT_INVALIDATE,
+ .get = spapr_cap_get_bool,
+ .set = spapr_cap_set_bool,
+ .type = "bool",
+ .apply = cap_rpt_invalidate_apply,
+ },
+};
+
+static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr,
+ const char *cputype)
+{
+ SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+ SpaprCapabilities caps;
+
+ caps = smc->default_caps;
+
+ if (!ppc_type_check_compat(cputype, CPU_POWERPC_LOGICAL_3_00,
+ 0, spapr->max_compat_pvr)) {
+ caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_OFF;
+ }
+
+ if (!ppc_type_check_compat(cputype, CPU_POWERPC_LOGICAL_2_07,
+ 0, spapr->max_compat_pvr)) {
+ caps.caps[SPAPR_CAP_HTM] = SPAPR_CAP_OFF;
+ caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_BROKEN;
+ }
+
+ if (!ppc_type_check_compat(cputype, CPU_POWERPC_LOGICAL_2_06_PLUS,
+ 0, spapr->max_compat_pvr)) {
+ caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_BROKEN;
+ }
+
+ if (!ppc_type_check_compat(cputype, CPU_POWERPC_LOGICAL_2_06,
+ 0, spapr->max_compat_pvr)) {
+ caps.caps[SPAPR_CAP_VSX] = SPAPR_CAP_OFF;
+ caps.caps[SPAPR_CAP_DFP] = SPAPR_CAP_OFF;
+ caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_BROKEN;
+ }
+
+ /* This is for pseries-2.12 and older */
+ if (smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] == 0) {
+ uint8_t mps;
+
+ if (kvmppc_hpt_needs_host_contiguous_pages()) {
+ mps = ctz64(qemu_minrampagesize());
+ } else {
+ mps = 34; /* allow everything up to 16GiB, i.e. everything */
+ }
+
+ caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = mps;
+ }
+
+ return caps;
+}
+
+int spapr_caps_pre_load(void *opaque)
+{
+ SpaprMachineState *spapr = opaque;
+
+ /* Set to default so we can tell if this came in with the migration */
+ spapr->mig = spapr->def;
+ return 0;
+}
+
+int spapr_caps_pre_save(void *opaque)
+{
+ SpaprMachineState *spapr = opaque;
+
+ spapr->mig = spapr->eff;
+ return 0;
+}
+
+/* This has to be called from the top-level spapr post_load, not the
+ * caps specific one. Otherwise it wouldn't be called when the source
+ * caps are all defaults, which could still conflict with overridden
+ * caps on the destination */
+int spapr_caps_post_migration(SpaprMachineState *spapr)
+{
+ int i;
+ bool ok = true;
+ SpaprCapabilities dstcaps = spapr->eff;
+ SpaprCapabilities srccaps;
+
+ srccaps = default_caps_with_cpu(spapr, MACHINE(spapr)->cpu_type);
+ for (i = 0; i < SPAPR_CAP_NUM; i++) {
+ /* If not default value then assume came in with the migration */
+ if (spapr->mig.caps[i] != spapr->def.caps[i]) {
+ srccaps.caps[i] = spapr->mig.caps[i];
+ }
+ }
+
+ for (i = 0; i < SPAPR_CAP_NUM; i++) {
+ SpaprCapabilityInfo *info = &capability_table[i];
+
+ if (srccaps.caps[i] > dstcaps.caps[i]) {
+ error_report("cap-%s higher level (%d) in incoming stream than on destination (%d)",
+ info->name, srccaps.caps[i], dstcaps.caps[i]);
+ ok = false;
+ }
+
+ if (srccaps.caps[i] < dstcaps.caps[i]) {
+ warn_report("cap-%s lower level (%d) in incoming stream than on destination (%d)",
+ info->name, srccaps.caps[i], dstcaps.caps[i]);
+ }
+ }
+
+ return ok ? 0 : -EINVAL;
+}
+
+/* Used to generate the migration field and needed function for a spapr cap */
+#define SPAPR_CAP_MIG_STATE(sname, cap) \
+static bool spapr_cap_##sname##_needed(void *opaque) \
+{ \
+ SpaprMachineState *spapr = opaque; \
+ bool (*needed)(void *opaque) = \
+ capability_table[cap].migrate_needed; \
+ \
+ return needed ? needed(opaque) : true && \
+ spapr->cmd_line_caps[cap] && \
+ (spapr->eff.caps[cap] != \
+ spapr->def.caps[cap]); \
+} \
+ \
+const VMStateDescription vmstate_spapr_cap_##sname = { \
+ .name = "spapr/cap/" #sname, \
+ .version_id = 1, \
+ .minimum_version_id = 1, \
+ .needed = spapr_cap_##sname##_needed, \
+ .fields = (VMStateField[]) { \
+ VMSTATE_UINT8(mig.caps[cap], \
+ SpaprMachineState), \
+ VMSTATE_END_OF_LIST() \
+ }, \
+}
+
+SPAPR_CAP_MIG_STATE(htm, SPAPR_CAP_HTM);
+SPAPR_CAP_MIG_STATE(vsx, SPAPR_CAP_VSX);
+SPAPR_CAP_MIG_STATE(dfp, SPAPR_CAP_DFP);
+SPAPR_CAP_MIG_STATE(cfpc, SPAPR_CAP_CFPC);
+SPAPR_CAP_MIG_STATE(sbbc, SPAPR_CAP_SBBC);
+SPAPR_CAP_MIG_STATE(ibs, SPAPR_CAP_IBS);
+SPAPR_CAP_MIG_STATE(hpt_maxpagesize, SPAPR_CAP_HPT_MAXPAGESIZE);
+SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
+SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
+SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
+SPAPR_CAP_MIG_STATE(fwnmi, SPAPR_CAP_FWNMI);
+SPAPR_CAP_MIG_STATE(rpt_invalidate, SPAPR_CAP_RPT_INVALIDATE);
+
+void spapr_caps_init(SpaprMachineState *spapr)
+{
+ SpaprCapabilities default_caps;
+ int i;
+
+ /* Compute the actual set of caps we should run with */
+ default_caps = default_caps_with_cpu(spapr, MACHINE(spapr)->cpu_type);
+
+ for (i = 0; i < SPAPR_CAP_NUM; i++) {
+ /* Store the defaults */
+ spapr->def.caps[i] = default_caps.caps[i];
+ /* If not set on the command line then apply the default value */
+ if (!spapr->cmd_line_caps[i]) {
+ spapr->eff.caps[i] = default_caps.caps[i];
+ }
+ }
+}
+
+void spapr_caps_apply(SpaprMachineState *spapr)
+{
+ int i;
+
+ for (i = 0; i < SPAPR_CAP_NUM; i++) {
+ SpaprCapabilityInfo *info = &capability_table[i];
+
+ /*
+ * If the apply function can't set the desired level and thinks it's
+ * fatal, it should cause that.
+ */
+ info->apply(spapr, spapr->eff.caps[i], &error_fatal);
+ }
+}
+
+void spapr_caps_cpu_apply(SpaprMachineState *spapr, PowerPCCPU *cpu)
+{
+ int i;
+
+ for (i = 0; i < SPAPR_CAP_NUM; i++) {
+ SpaprCapabilityInfo *info = &capability_table[i];
+
+ /*
+ * If the apply function can't set the desired level and thinks it's
+ * fatal, it should cause that.
+ */
+ if (info->cpu_apply) {
+ info->cpu_apply(spapr, cpu, spapr->eff.caps[i], &error_fatal);
+ }
+ }
+}
+
+void spapr_caps_add_properties(SpaprMachineClass *smc)
+{
+ ObjectClass *klass = OBJECT_CLASS(smc);
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(capability_table); i++) {
+ SpaprCapabilityInfo *cap = &capability_table[i];
+ char *name = g_strdup_printf("cap-%s", cap->name);
+ char *desc;
+
+ object_class_property_add(klass, name, cap->type,
+ cap->get, cap->set,
+ NULL, cap);
+
+ desc = g_strdup_printf("%s", cap->description);
+ object_class_property_set_description(klass, name, desc);
+ g_free(name);
+ g_free(desc);
+ }
+}
diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
new file mode 100644
index 000000000..58e7341cb
--- /dev/null
+++ b/hw/ppc/spapr_cpu_core.c
@@ -0,0 +1,391 @@
+/*
+ * sPAPR CPU core device, acts as container of CPU thread devices.
+ *
+ * Copyright (C) 2016 Bharata B Rao <bharata@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/cpu/core.h"
+#include "hw/ppc/spapr_cpu_core.h"
+#include "hw/qdev-properties.h"
+#include "migration/vmstate.h"
+#include "target/ppc/cpu.h"
+#include "hw/ppc/spapr.h"
+#include "qapi/error.h"
+#include "sysemu/cpus.h"
+#include "sysemu/kvm.h"
+#include "target/ppc/kvm_ppc.h"
+#include "hw/ppc/ppc.h"
+#include "target/ppc/mmu-hash64.h"
+#include "sysemu/numa.h"
+#include "sysemu/reset.h"
+#include "sysemu/hw_accel.h"
+#include "qemu/error-report.h"
+
+static void spapr_reset_vcpu(PowerPCCPU *cpu)
+{
+ CPUState *cs = CPU(cpu);
+ CPUPPCState *env = &cpu->env;
+ PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
+ SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
+ target_ulong lpcr;
+ SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+
+ cpu_reset(cs);
+
+ env->spr[SPR_HIOR] = 0;
+
+ lpcr = env->spr[SPR_LPCR];
+
+ /* Set emulated LPCR to not send interrupts to hypervisor. Note that
+ * under KVM, the actual HW LPCR will be set differently by KVM itself,
+ * the settings below ensure proper operations with TCG in absence of
+ * a real hypervisor.
+ *
+ * Disable Power-saving mode Exit Cause exceptions for the CPU, so
+ * we don't get spurious wakups before an RTAS start-cpu call.
+ * For the same reason, set PSSCR_EC.
+ */
+ lpcr &= ~(LPCR_VPM1 | LPCR_ISL | LPCR_KBV | pcc->lpcr_pm);
+ lpcr |= LPCR_LPES0 | LPCR_LPES1;
+ env->spr[SPR_PSSCR] |= PSSCR_EC;
+
+ ppc_store_lpcr(cpu, lpcr);
+
+ /* Set a full AMOR so guest can use the AMR as it sees fit */
+ env->spr[SPR_AMOR] = 0xffffffffffffffffull;
+
+ spapr_cpu->vpa_addr = 0;
+ spapr_cpu->slb_shadow_addr = 0;
+ spapr_cpu->slb_shadow_size = 0;
+ spapr_cpu->dtl_addr = 0;
+ spapr_cpu->dtl_size = 0;
+
+ spapr_caps_cpu_apply(spapr, cpu);
+
+ kvm_check_mmu(cpu, &error_fatal);
+
+ spapr_irq_cpu_intc_reset(spapr, cpu);
+}
+
+void spapr_cpu_set_entry_state(PowerPCCPU *cpu, target_ulong nip,
+ target_ulong r1, target_ulong r3,
+ target_ulong r4)
+{
+ PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
+ CPUPPCState *env = &cpu->env;
+
+ env->nip = nip;
+ env->gpr[1] = r1;
+ env->gpr[3] = r3;
+ env->gpr[4] = r4;
+ kvmppc_set_reg_ppc_online(cpu, 1);
+ CPU(cpu)->halted = 0;
+ /* Enable Power-saving mode Exit Cause exceptions */
+ ppc_store_lpcr(cpu, env->spr[SPR_LPCR] | pcc->lpcr_pm);
+}
+
+/*
+ * Return the sPAPR CPU core type for @model which essentially is the CPU
+ * model specified with -cpu cmdline option.
+ */
+const char *spapr_get_cpu_core_type(const char *cpu_type)
+{
+ int len = strlen(cpu_type) - strlen(POWERPC_CPU_TYPE_SUFFIX);
+ char *core_type = g_strdup_printf(SPAPR_CPU_CORE_TYPE_NAME("%.*s"),
+ len, cpu_type);
+ ObjectClass *oc = object_class_by_name(core_type);
+
+ g_free(core_type);
+ if (!oc) {
+ return NULL;
+ }
+
+ return object_class_get_name(oc);
+}
+
+static bool slb_shadow_needed(void *opaque)
+{
+ SpaprCpuState *spapr_cpu = opaque;
+
+ return spapr_cpu->slb_shadow_addr != 0;
+}
+
+static const VMStateDescription vmstate_spapr_cpu_slb_shadow = {
+ .name = "spapr_cpu/vpa/slb_shadow",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = slb_shadow_needed,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT64(slb_shadow_addr, SpaprCpuState),
+ VMSTATE_UINT64(slb_shadow_size, SpaprCpuState),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static bool dtl_needed(void *opaque)
+{
+ SpaprCpuState *spapr_cpu = opaque;
+
+ return spapr_cpu->dtl_addr != 0;
+}
+
+static const VMStateDescription vmstate_spapr_cpu_dtl = {
+ .name = "spapr_cpu/vpa/dtl",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = dtl_needed,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT64(dtl_addr, SpaprCpuState),
+ VMSTATE_UINT64(dtl_size, SpaprCpuState),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static bool vpa_needed(void *opaque)
+{
+ SpaprCpuState *spapr_cpu = opaque;
+
+ return spapr_cpu->vpa_addr != 0;
+}
+
+static const VMStateDescription vmstate_spapr_cpu_vpa = {
+ .name = "spapr_cpu/vpa",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = vpa_needed,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT64(vpa_addr, SpaprCpuState),
+ VMSTATE_END_OF_LIST()
+ },
+ .subsections = (const VMStateDescription * []) {
+ &vmstate_spapr_cpu_slb_shadow,
+ &vmstate_spapr_cpu_dtl,
+ NULL
+ }
+};
+
+static const VMStateDescription vmstate_spapr_cpu_state = {
+ .name = "spapr_cpu",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_END_OF_LIST()
+ },
+ .subsections = (const VMStateDescription * []) {
+ &vmstate_spapr_cpu_vpa,
+ NULL
+ }
+};
+
+static void spapr_unrealize_vcpu(PowerPCCPU *cpu, SpaprCpuCore *sc)
+{
+ if (!sc->pre_3_0_migration) {
+ vmstate_unregister(NULL, &vmstate_spapr_cpu_state, cpu->machine_data);
+ }
+ spapr_irq_cpu_intc_destroy(SPAPR_MACHINE(qdev_get_machine()), cpu);
+ qdev_unrealize(DEVICE(cpu));
+}
+
+/*
+ * Called when CPUs are hot-plugged.
+ */
+static void spapr_cpu_core_reset(DeviceState *dev)
+{
+ CPUCore *cc = CPU_CORE(dev);
+ SpaprCpuCore *sc = SPAPR_CPU_CORE(dev);
+ int i;
+
+ for (i = 0; i < cc->nr_threads; i++) {
+ spapr_reset_vcpu(sc->threads[i]);
+ }
+}
+
+/*
+ * Called by the machine reset.
+ */
+static void spapr_cpu_core_reset_handler(void *opaque)
+{
+ spapr_cpu_core_reset(opaque);
+}
+
+static void spapr_delete_vcpu(PowerPCCPU *cpu)
+{
+ SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
+
+ cpu->machine_data = NULL;
+ g_free(spapr_cpu);
+ object_unparent(OBJECT(cpu));
+}
+
+static void spapr_cpu_core_unrealize(DeviceState *dev)
+{
+ SpaprCpuCore *sc = SPAPR_CPU_CORE(OBJECT(dev));
+ CPUCore *cc = CPU_CORE(dev);
+ int i;
+
+ for (i = 0; i < cc->nr_threads; i++) {
+ if (sc->threads[i]) {
+ /*
+ * Since this we can get here from the error path of
+ * spapr_cpu_core_realize(), make sure we only unrealize
+ * vCPUs that have already been realized.
+ */
+ if (object_property_get_bool(OBJECT(sc->threads[i]), "realized",
+ &error_abort)) {
+ spapr_unrealize_vcpu(sc->threads[i], sc);
+ }
+ spapr_delete_vcpu(sc->threads[i]);
+ }
+ }
+ g_free(sc->threads);
+ qemu_unregister_reset(spapr_cpu_core_reset_handler, sc);
+}
+
+static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ SpaprCpuCore *sc, Error **errp)
+{
+ CPUPPCState *env = &cpu->env;
+ CPUState *cs = CPU(cpu);
+
+ if (!qdev_realize(DEVICE(cpu), NULL, errp)) {
+ return false;
+ }
+
+ /* Set time-base frequency to 512 MHz */
+ cpu_ppc_tb_init(env, SPAPR_TIMEBASE_FREQ);
+
+ cpu_ppc_set_vhyp(cpu, PPC_VIRTUAL_HYPERVISOR(spapr));
+ kvmppc_set_papr(cpu);
+
+ if (spapr_irq_cpu_intc_create(spapr, cpu, errp) < 0) {
+ qdev_unrealize(DEVICE(cpu));
+ return false;
+ }
+
+ if (!sc->pre_3_0_migration) {
+ vmstate_register(NULL, cs->cpu_index, &vmstate_spapr_cpu_state,
+ cpu->machine_data);
+ }
+ return true;
+}
+
+static PowerPCCPU *spapr_create_vcpu(SpaprCpuCore *sc, int i, Error **errp)
+{
+ SpaprCpuCoreClass *scc = SPAPR_CPU_CORE_GET_CLASS(sc);
+ CPUCore *cc = CPU_CORE(sc);
+ g_autoptr(Object) obj = NULL;
+ g_autofree char *id = NULL;
+ CPUState *cs;
+ PowerPCCPU *cpu;
+
+ obj = object_new(scc->cpu_type);
+
+ cs = CPU(obj);
+ cpu = POWERPC_CPU(obj);
+ /*
+ * All CPUs start halted. CPU0 is unhalted from the machine level reset code
+ * and the rest are explicitly started up by the guest using an RTAS call.
+ */
+ cs->start_powered_off = true;
+ cs->cpu_index = cc->core_id + i;
+ if (!spapr_set_vcpu_id(cpu, cs->cpu_index, errp)) {
+ return NULL;
+ }
+
+ cpu->node_id = sc->node_id;
+
+ id = g_strdup_printf("thread[%d]", i);
+ object_property_add_child(OBJECT(sc), id, obj);
+
+ cpu->machine_data = g_new0(SpaprCpuState, 1);
+
+ return cpu;
+}
+
+static void spapr_cpu_core_realize(DeviceState *dev, Error **errp)
+{
+ /* We don't use SPAPR_MACHINE() in order to exit gracefully if the user
+ * tries to add a sPAPR CPU core to a non-pseries machine.
+ */
+ SpaprMachineState *spapr =
+ (SpaprMachineState *) object_dynamic_cast(qdev_get_machine(),
+ TYPE_SPAPR_MACHINE);
+ SpaprCpuCore *sc = SPAPR_CPU_CORE(OBJECT(dev));
+ CPUCore *cc = CPU_CORE(OBJECT(dev));
+ int i;
+
+ if (!spapr) {
+ error_setg(errp, TYPE_SPAPR_CPU_CORE " needs a pseries machine");
+ return;
+ }
+
+ qemu_register_reset(spapr_cpu_core_reset_handler, sc);
+ sc->threads = g_new0(PowerPCCPU *, cc->nr_threads);
+ for (i = 0; i < cc->nr_threads; i++) {
+ sc->threads[i] = spapr_create_vcpu(sc, i, errp);
+ if (!sc->threads[i] ||
+ !spapr_realize_vcpu(sc->threads[i], spapr, sc, errp)) {
+ spapr_cpu_core_unrealize(dev);
+ return;
+ }
+ }
+}
+
+static Property spapr_cpu_core_properties[] = {
+ DEFINE_PROP_INT32("node-id", SpaprCpuCore, node_id, CPU_UNSET_NUMA_NODE_ID),
+ DEFINE_PROP_BOOL("pre-3.0-migration", SpaprCpuCore, pre_3_0_migration,
+ false),
+ DEFINE_PROP_END_OF_LIST()
+};
+
+static void spapr_cpu_core_class_init(ObjectClass *oc, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(oc);
+ SpaprCpuCoreClass *scc = SPAPR_CPU_CORE_CLASS(oc);
+
+ dc->realize = spapr_cpu_core_realize;
+ dc->unrealize = spapr_cpu_core_unrealize;
+ dc->reset = spapr_cpu_core_reset;
+ device_class_set_props(dc, spapr_cpu_core_properties);
+ scc->cpu_type = data;
+}
+
+#define DEFINE_SPAPR_CPU_CORE_TYPE(cpu_model) \
+ { \
+ .parent = TYPE_SPAPR_CPU_CORE, \
+ .class_data = (void *) POWERPC_CPU_TYPE_NAME(cpu_model), \
+ .class_init = spapr_cpu_core_class_init, \
+ .name = SPAPR_CPU_CORE_TYPE_NAME(cpu_model), \
+ }
+
+static const TypeInfo spapr_cpu_core_type_infos[] = {
+ {
+ .name = TYPE_SPAPR_CPU_CORE,
+ .parent = TYPE_CPU_CORE,
+ .abstract = true,
+ .instance_size = sizeof(SpaprCpuCore),
+ .class_size = sizeof(SpaprCpuCoreClass),
+ },
+ DEFINE_SPAPR_CPU_CORE_TYPE("970_v2.2"),
+ DEFINE_SPAPR_CPU_CORE_TYPE("970mp_v1.0"),
+ DEFINE_SPAPR_CPU_CORE_TYPE("970mp_v1.1"),
+ DEFINE_SPAPR_CPU_CORE_TYPE("power5+_v2.1"),
+ DEFINE_SPAPR_CPU_CORE_TYPE("power7_v2.3"),
+ DEFINE_SPAPR_CPU_CORE_TYPE("power7+_v2.1"),
+ DEFINE_SPAPR_CPU_CORE_TYPE("power8_v2.0"),
+ DEFINE_SPAPR_CPU_CORE_TYPE("power8e_v2.1"),
+ DEFINE_SPAPR_CPU_CORE_TYPE("power8nvl_v1.0"),
+ DEFINE_SPAPR_CPU_CORE_TYPE("power9_v1.0"),
+ DEFINE_SPAPR_CPU_CORE_TYPE("power9_v2.0"),
+ DEFINE_SPAPR_CPU_CORE_TYPE("power10_v1.0"),
+ DEFINE_SPAPR_CPU_CORE_TYPE("power10_v2.0"),
+#ifdef CONFIG_KVM
+ DEFINE_SPAPR_CPU_CORE_TYPE("host"),
+#endif
+};
+
+DEFINE_TYPES(spapr_cpu_core_type_infos)
diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
new file mode 100644
index 000000000..f8ac0a10d
--- /dev/null
+++ b/hw/ppc/spapr_drc.c
@@ -0,0 +1,1326 @@
+/*
+ * QEMU SPAPR Dynamic Reconfiguration Connector Implementation
+ *
+ * Copyright IBM Corp. 2014
+ *
+ * Authors:
+ * Michael Roth <mdroth@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qnull.h"
+#include "qemu/cutils.h"
+#include "hw/ppc/spapr_drc.h"
+#include "qom/object.h"
+#include "migration/vmstate.h"
+#include "qapi/error.h"
+#include "qapi/qapi-events-qdev.h"
+#include "qapi/visitor.h"
+#include "qemu/error-report.h"
+#include "hw/ppc/spapr.h" /* for RTAS return codes */
+#include "hw/pci-host/spapr.h" /* spapr_phb_remove_pci_device_cb callback */
+#include "hw/ppc/spapr_nvdimm.h"
+#include "sysemu/device_tree.h"
+#include "sysemu/reset.h"
+#include "trace.h"
+
+#define DRC_CONTAINER_PATH "/dr-connector"
+#define DRC_INDEX_TYPE_SHIFT 28
+#define DRC_INDEX_ID_MASK ((1ULL << DRC_INDEX_TYPE_SHIFT) - 1)
+
+SpaprDrcType spapr_drc_type(SpaprDrc *drc)
+{
+ SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+
+ return 1 << drck->typeshift;
+}
+
+uint32_t spapr_drc_index(SpaprDrc *drc)
+{
+ SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+
+ /* no set format for a drc index: it only needs to be globally
+ * unique. this is how we encode the DRC type on bare-metal
+ * however, so might as well do that here
+ */
+ return (drck->typeshift << DRC_INDEX_TYPE_SHIFT)
+ | (drc->id & DRC_INDEX_ID_MASK);
+}
+
+static void spapr_drc_release(SpaprDrc *drc)
+{
+ SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+
+ drck->release(drc->dev);
+
+ drc->unplug_requested = false;
+ g_free(drc->fdt);
+ drc->fdt = NULL;
+ drc->fdt_start_offset = 0;
+ object_property_del(OBJECT(drc), "device");
+ drc->dev = NULL;
+}
+
+static uint32_t drc_isolate_physical(SpaprDrc *drc)
+{
+ switch (drc->state) {
+ case SPAPR_DRC_STATE_PHYSICAL_POWERON:
+ return RTAS_OUT_SUCCESS; /* Nothing to do */
+ case SPAPR_DRC_STATE_PHYSICAL_CONFIGURED:
+ break; /* see below */
+ case SPAPR_DRC_STATE_PHYSICAL_UNISOLATE:
+ return RTAS_OUT_PARAM_ERROR; /* not allowed */
+ default:
+ g_assert_not_reached();
+ }
+
+ drc->state = SPAPR_DRC_STATE_PHYSICAL_POWERON;
+
+ if (drc->unplug_requested) {
+ uint32_t drc_index = spapr_drc_index(drc);
+ trace_spapr_drc_set_isolation_state_finalizing(drc_index);
+ spapr_drc_release(drc);
+ }
+
+ return RTAS_OUT_SUCCESS;
+}
+
+static uint32_t drc_unisolate_physical(SpaprDrc *drc)
+{
+ switch (drc->state) {
+ case SPAPR_DRC_STATE_PHYSICAL_UNISOLATE:
+ case SPAPR_DRC_STATE_PHYSICAL_CONFIGURED:
+ return RTAS_OUT_SUCCESS; /* Nothing to do */
+ case SPAPR_DRC_STATE_PHYSICAL_POWERON:
+ break; /* see below */
+ default:
+ g_assert_not_reached();
+ }
+
+ /* cannot unisolate a non-existent resource, and, or resources
+ * which are in an 'UNUSABLE' allocation state. (PAPR 2.7,
+ * 13.5.3.5)
+ */
+ if (!drc->dev) {
+ return RTAS_OUT_NO_SUCH_INDICATOR;
+ }
+
+ drc->state = SPAPR_DRC_STATE_PHYSICAL_UNISOLATE;
+ drc->ccs_offset = drc->fdt_start_offset;
+ drc->ccs_depth = 0;
+
+ return RTAS_OUT_SUCCESS;
+}
+
+static uint32_t drc_isolate_logical(SpaprDrc *drc)
+{
+ switch (drc->state) {
+ case SPAPR_DRC_STATE_LOGICAL_AVAILABLE:
+ case SPAPR_DRC_STATE_LOGICAL_UNUSABLE:
+ return RTAS_OUT_SUCCESS; /* Nothing to do */
+ case SPAPR_DRC_STATE_LOGICAL_CONFIGURED:
+ break; /* see below */
+ case SPAPR_DRC_STATE_LOGICAL_UNISOLATE:
+ return RTAS_OUT_PARAM_ERROR; /* not allowed */
+ default:
+ g_assert_not_reached();
+ }
+
+ /*
+ * Fail any requests to ISOLATE the LMB DRC if this LMB doesn't
+ * belong to a DIMM device that is marked for removal.
+ *
+ * Currently the guest userspace tool drmgr that drives the memory
+ * hotplug/unplug will just try to remove a set of 'removable' LMBs
+ * in response to a hot unplug request that is based on drc-count.
+ * If the LMB being removed doesn't belong to a DIMM device that is
+ * actually being unplugged, fail the isolation request here.
+ */
+ if (spapr_drc_type(drc) == SPAPR_DR_CONNECTOR_TYPE_LMB
+ && !drc->unplug_requested) {
+ return RTAS_OUT_HW_ERROR;
+ }
+
+ drc->state = SPAPR_DRC_STATE_LOGICAL_AVAILABLE;
+
+ return RTAS_OUT_SUCCESS;
+}
+
+static uint32_t drc_unisolate_logical(SpaprDrc *drc)
+{
+ SpaprMachineState *spapr = NULL;
+
+ switch (drc->state) {
+ case SPAPR_DRC_STATE_LOGICAL_UNISOLATE:
+ case SPAPR_DRC_STATE_LOGICAL_CONFIGURED:
+ /*
+ * Unisolating a logical DRC that was marked for unplug
+ * means that the kernel is refusing the removal.
+ */
+ if (drc->unplug_requested && drc->dev) {
+ if (spapr_drc_type(drc) == SPAPR_DR_CONNECTOR_TYPE_LMB) {
+ spapr = SPAPR_MACHINE(qdev_get_machine());
+
+ spapr_memory_unplug_rollback(spapr, drc->dev);
+ }
+
+ drc->unplug_requested = false;
+
+ if (drc->dev->id) {
+ error_report("Device hotunplug rejected by the guest "
+ "for device %s", drc->dev->id);
+ }
+
+ qapi_event_send_device_unplug_guest_error(!!drc->dev->id,
+ drc->dev->id,
+ drc->dev->canonical_path);
+ }
+
+ return RTAS_OUT_SUCCESS; /* Nothing to do */
+ case SPAPR_DRC_STATE_LOGICAL_AVAILABLE:
+ break; /* see below */
+ case SPAPR_DRC_STATE_LOGICAL_UNUSABLE:
+ return RTAS_OUT_NO_SUCH_INDICATOR; /* not allowed */
+ default:
+ g_assert_not_reached();
+ }
+
+ /* Move to AVAILABLE state should have ensured device was present */
+ g_assert(drc->dev);
+
+ drc->state = SPAPR_DRC_STATE_LOGICAL_UNISOLATE;
+ drc->ccs_offset = drc->fdt_start_offset;
+ drc->ccs_depth = 0;
+
+ return RTAS_OUT_SUCCESS;
+}
+
+static uint32_t drc_set_usable(SpaprDrc *drc)
+{
+ switch (drc->state) {
+ case SPAPR_DRC_STATE_LOGICAL_AVAILABLE:
+ case SPAPR_DRC_STATE_LOGICAL_UNISOLATE:
+ case SPAPR_DRC_STATE_LOGICAL_CONFIGURED:
+ return RTAS_OUT_SUCCESS; /* Nothing to do */
+ case SPAPR_DRC_STATE_LOGICAL_UNUSABLE:
+ break; /* see below */
+ default:
+ g_assert_not_reached();
+ }
+
+ /* if there's no resource/device associated with the DRC, there's
+ * no way for us to put it in an allocation state consistent with
+ * being 'USABLE'. PAPR 2.7, 13.5.3.4 documents that this should
+ * result in an RTAS return code of -3 / "no such indicator"
+ */
+ if (!drc->dev) {
+ return RTAS_OUT_NO_SUCH_INDICATOR;
+ }
+ if (drc->unplug_requested) {
+ /* Don't allow the guest to move a device away from UNUSABLE
+ * state when we want to unplug it */
+ return RTAS_OUT_NO_SUCH_INDICATOR;
+ }
+
+ drc->state = SPAPR_DRC_STATE_LOGICAL_AVAILABLE;
+
+ return RTAS_OUT_SUCCESS;
+}
+
+static uint32_t drc_set_unusable(SpaprDrc *drc)
+{
+ switch (drc->state) {
+ case SPAPR_DRC_STATE_LOGICAL_UNUSABLE:
+ return RTAS_OUT_SUCCESS; /* Nothing to do */
+ case SPAPR_DRC_STATE_LOGICAL_AVAILABLE:
+ break; /* see below */
+ case SPAPR_DRC_STATE_LOGICAL_UNISOLATE:
+ case SPAPR_DRC_STATE_LOGICAL_CONFIGURED:
+ return RTAS_OUT_NO_SUCH_INDICATOR; /* not allowed */
+ default:
+ g_assert_not_reached();
+ }
+
+ drc->state = SPAPR_DRC_STATE_LOGICAL_UNUSABLE;
+ if (drc->unplug_requested) {
+ uint32_t drc_index = spapr_drc_index(drc);
+ trace_spapr_drc_set_allocation_state_finalizing(drc_index);
+ spapr_drc_release(drc);
+ }
+
+ return RTAS_OUT_SUCCESS;
+}
+
+static char *spapr_drc_name(SpaprDrc *drc)
+{
+ SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+
+ /* human-readable name for a DRC to encode into the DT
+ * description. this is mainly only used within a guest in place
+ * of the unique DRC index.
+ *
+ * in the case of VIO/PCI devices, it corresponds to a "location
+ * code" that maps a logical device/function (DRC index) to a
+ * physical (or virtual in the case of VIO) location in the system
+ * by chaining together the "location label" for each
+ * encapsulating component.
+ *
+ * since this is more to do with diagnosing physical hardware
+ * issues than guest compatibility, we choose location codes/DRC
+ * names that adhere to the documented format, but avoid encoding
+ * the entire topology information into the label/code, instead
+ * just using the location codes based on the labels for the
+ * endpoints (VIO/PCI adaptor connectors), which is basically just
+ * "C" followed by an integer ID.
+ *
+ * DRC names as documented by PAPR+ v2.7, 13.5.2.4
+ * location codes as documented by PAPR+ v2.7, 12.3.1.5
+ */
+ return g_strdup_printf("%s%d", drck->drc_name_prefix, drc->id);
+}
+
+/*
+ * dr-entity-sense sensor value
+ * returned via get-sensor-state RTAS calls
+ * as expected by state diagram in PAPR+ 2.7, 13.4
+ * based on the current allocation/indicator/power states
+ * for the DR connector.
+ */
+static SpaprDREntitySense physical_entity_sense(SpaprDrc *drc)
+{
+ /* this assumes all PCI devices are assigned to a 'live insertion'
+ * power domain, where QEMU manages power state automatically as
+ * opposed to the guest. present, non-PCI resources are unaffected
+ * by power state.
+ */
+ return drc->dev ? SPAPR_DR_ENTITY_SENSE_PRESENT
+ : SPAPR_DR_ENTITY_SENSE_EMPTY;
+}
+
+static SpaprDREntitySense logical_entity_sense(SpaprDrc *drc)
+{
+ switch (drc->state) {
+ case SPAPR_DRC_STATE_LOGICAL_UNUSABLE:
+ return SPAPR_DR_ENTITY_SENSE_UNUSABLE;
+ case SPAPR_DRC_STATE_LOGICAL_AVAILABLE:
+ case SPAPR_DRC_STATE_LOGICAL_UNISOLATE:
+ case SPAPR_DRC_STATE_LOGICAL_CONFIGURED:
+ g_assert(drc->dev);
+ return SPAPR_DR_ENTITY_SENSE_PRESENT;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static void prop_get_index(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ SpaprDrc *drc = SPAPR_DR_CONNECTOR(obj);
+ uint32_t value = spapr_drc_index(drc);
+ visit_type_uint32(v, name, &value, errp);
+}
+
+static void prop_get_fdt(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ SpaprDrc *drc = SPAPR_DR_CONNECTOR(obj);
+ QNull *null = NULL;
+ int fdt_offset_next, fdt_offset, fdt_depth;
+ void *fdt;
+
+ if (!drc->fdt) {
+ visit_type_null(v, NULL, &null, errp);
+ qobject_unref(null);
+ return;
+ }
+
+ fdt = drc->fdt;
+ fdt_offset = drc->fdt_start_offset;
+ fdt_depth = 0;
+
+ do {
+ const char *name = NULL;
+ const struct fdt_property *prop = NULL;
+ int prop_len = 0, name_len = 0;
+ uint32_t tag;
+ bool ok;
+
+ tag = fdt_next_tag(fdt, fdt_offset, &fdt_offset_next);
+ switch (tag) {
+ case FDT_BEGIN_NODE:
+ fdt_depth++;
+ name = fdt_get_name(fdt, fdt_offset, &name_len);
+ if (!visit_start_struct(v, name, NULL, 0, errp)) {
+ return;
+ }
+ break;
+ case FDT_END_NODE:
+ /* shouldn't ever see an FDT_END_NODE before FDT_BEGIN_NODE */
+ g_assert(fdt_depth > 0);
+ ok = visit_check_struct(v, errp);
+ visit_end_struct(v, NULL);
+ if (!ok) {
+ return;
+ }
+ fdt_depth--;
+ break;
+ case FDT_PROP: {
+ int i;
+ prop = fdt_get_property_by_offset(fdt, fdt_offset, &prop_len);
+ name = fdt_string(fdt, fdt32_to_cpu(prop->nameoff));
+ if (!visit_start_list(v, name, NULL, 0, errp)) {
+ return;
+ }
+ for (i = 0; i < prop_len; i++) {
+ if (!visit_type_uint8(v, NULL, (uint8_t *)&prop->data[i],
+ errp)) {
+ return;
+ }
+ }
+ ok = visit_check_list(v, errp);
+ visit_end_list(v, NULL);
+ if (!ok) {
+ return;
+ }
+ break;
+ }
+ default:
+ error_report("device FDT in unexpected state: %d", tag);
+ abort();
+ }
+ fdt_offset = fdt_offset_next;
+ } while (fdt_depth != 0);
+}
+
+void spapr_drc_attach(SpaprDrc *drc, DeviceState *d)
+{
+ trace_spapr_drc_attach(spapr_drc_index(drc));
+
+ g_assert(!drc->dev);
+ g_assert((drc->state == SPAPR_DRC_STATE_LOGICAL_UNUSABLE)
+ || (drc->state == SPAPR_DRC_STATE_PHYSICAL_POWERON));
+
+ drc->dev = d;
+
+ object_property_add_link(OBJECT(drc), "device",
+ object_get_typename(OBJECT(drc->dev)),
+ (Object **)(&drc->dev),
+ NULL, 0);
+}
+
+void spapr_drc_unplug_request(SpaprDrc *drc)
+{
+ SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+
+ trace_spapr_drc_unplug_request(spapr_drc_index(drc));
+
+ g_assert(drc->dev);
+
+ drc->unplug_requested = true;
+
+ if (drc->state != drck->empty_state) {
+ trace_spapr_drc_awaiting_quiesce(spapr_drc_index(drc));
+ return;
+ }
+
+ spapr_drc_release(drc);
+}
+
+bool spapr_drc_reset(SpaprDrc *drc)
+{
+ SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+ bool unplug_completed = false;
+
+ trace_spapr_drc_reset(spapr_drc_index(drc));
+
+ /* immediately upon reset we can safely assume DRCs whose devices
+ * are pending removal can be safely removed.
+ */
+ if (drc->unplug_requested) {
+ spapr_drc_release(drc);
+ unplug_completed = true;
+ }
+
+ if (drc->dev) {
+ /* A device present at reset is ready to go, same as coldplugged */
+ drc->state = drck->ready_state;
+ /*
+ * Ensure that we are able to send the FDT fragment again
+ * via configure-connector call if the guest requests.
+ */
+ drc->ccs_offset = drc->fdt_start_offset;
+ drc->ccs_depth = 0;
+ } else {
+ drc->state = drck->empty_state;
+ drc->ccs_offset = -1;
+ drc->ccs_depth = -1;
+ }
+
+ return unplug_completed;
+}
+
+static bool spapr_drc_unplug_requested_needed(void *opaque)
+{
+ return spapr_drc_unplug_requested(opaque);
+}
+
+static const VMStateDescription vmstate_spapr_drc_unplug_requested = {
+ .name = "spapr_drc/unplug_requested",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = spapr_drc_unplug_requested_needed,
+ .fields = (VMStateField []) {
+ VMSTATE_BOOL(unplug_requested, SpaprDrc),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static bool spapr_drc_needed(void *opaque)
+{
+ SpaprDrc *drc = opaque;
+ SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+
+ /*
+ * If no dev is plugged in there is no need to migrate the DRC state
+ * nor to reset the DRC at CAS.
+ */
+ if (!drc->dev) {
+ return false;
+ }
+
+ /*
+ * We need to reset the DRC at CAS or to migrate the DRC state if it's
+ * not equal to the expected long-term state, which is the same as the
+ * coldplugged initial state, or if an unplug request is pending.
+ */
+ return drc->state != drck->ready_state ||
+ spapr_drc_unplug_requested(drc);
+}
+
+static const VMStateDescription vmstate_spapr_drc = {
+ .name = "spapr_drc",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = spapr_drc_needed,
+ .fields = (VMStateField []) {
+ VMSTATE_UINT32(state, SpaprDrc),
+ VMSTATE_END_OF_LIST()
+ },
+ .subsections = (const VMStateDescription * []) {
+ &vmstate_spapr_drc_unplug_requested,
+ NULL
+ }
+};
+
+static void drc_realize(DeviceState *d, Error **errp)
+{
+ SpaprDrc *drc = SPAPR_DR_CONNECTOR(d);
+ Object *root_container;
+ gchar *link_name;
+ const char *child_name;
+
+ trace_spapr_drc_realize(spapr_drc_index(drc));
+ /* NOTE: we do this as part of realize/unrealize due to the fact
+ * that the guest will communicate with the DRC via RTAS calls
+ * referencing the global DRC index. By unlinking the DRC
+ * from DRC_CONTAINER_PATH/<drc_index> we effectively make it
+ * inaccessible by the guest, since lookups rely on this path
+ * existing in the composition tree
+ */
+ root_container = container_get(object_get_root(), DRC_CONTAINER_PATH);
+ link_name = g_strdup_printf("%x", spapr_drc_index(drc));
+ child_name = object_get_canonical_path_component(OBJECT(drc));
+ trace_spapr_drc_realize_child(spapr_drc_index(drc), child_name);
+ object_property_add_alias(root_container, link_name,
+ drc->owner, child_name);
+ g_free(link_name);
+ vmstate_register(VMSTATE_IF(drc), spapr_drc_index(drc), &vmstate_spapr_drc,
+ drc);
+ trace_spapr_drc_realize_complete(spapr_drc_index(drc));
+}
+
+static void drc_unrealize(DeviceState *d)
+{
+ SpaprDrc *drc = SPAPR_DR_CONNECTOR(d);
+ Object *root_container;
+ gchar *name;
+
+ trace_spapr_drc_unrealize(spapr_drc_index(drc));
+ vmstate_unregister(VMSTATE_IF(drc), &vmstate_spapr_drc, drc);
+ root_container = container_get(object_get_root(), DRC_CONTAINER_PATH);
+ name = g_strdup_printf("%x", spapr_drc_index(drc));
+ object_property_del(root_container, name);
+ g_free(name);
+}
+
+SpaprDrc *spapr_dr_connector_new(Object *owner, const char *type,
+ uint32_t id)
+{
+ SpaprDrc *drc = SPAPR_DR_CONNECTOR(object_new(type));
+ char *prop_name;
+
+ drc->id = id;
+ drc->owner = owner;
+ prop_name = g_strdup_printf("dr-connector[%"PRIu32"]",
+ spapr_drc_index(drc));
+ object_property_add_child(owner, prop_name, OBJECT(drc));
+ object_unref(OBJECT(drc));
+ qdev_realize(DEVICE(drc), NULL, NULL);
+ g_free(prop_name);
+
+ return drc;
+}
+
+static void spapr_dr_connector_instance_init(Object *obj)
+{
+ SpaprDrc *drc = SPAPR_DR_CONNECTOR(obj);
+ SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+
+ object_property_add_uint32_ptr(obj, "id", &drc->id, OBJ_PROP_FLAG_READ);
+ object_property_add(obj, "index", "uint32", prop_get_index,
+ NULL, NULL, NULL);
+ object_property_add(obj, "fdt", "struct", prop_get_fdt,
+ NULL, NULL, NULL);
+ drc->state = drck->empty_state;
+}
+
+static void spapr_dr_connector_class_init(ObjectClass *k, void *data)
+{
+ DeviceClass *dk = DEVICE_CLASS(k);
+
+ dk->realize = drc_realize;
+ dk->unrealize = drc_unrealize;
+ /*
+ * Reason: DR connector needs to be wired to either the machine or to a
+ * PHB in spapr_dr_connector_new().
+ */
+ dk->user_creatable = false;
+}
+
+static bool drc_physical_needed(void *opaque)
+{
+ SpaprDrcPhysical *drcp = (SpaprDrcPhysical *)opaque;
+ SpaprDrc *drc = SPAPR_DR_CONNECTOR(drcp);
+
+ if ((drc->dev && (drcp->dr_indicator == SPAPR_DR_INDICATOR_ACTIVE))
+ || (!drc->dev && (drcp->dr_indicator == SPAPR_DR_INDICATOR_INACTIVE))) {
+ return false;
+ }
+ return true;
+}
+
+static const VMStateDescription vmstate_spapr_drc_physical = {
+ .name = "spapr_drc/physical",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = drc_physical_needed,
+ .fields = (VMStateField []) {
+ VMSTATE_UINT32(dr_indicator, SpaprDrcPhysical),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+static void drc_physical_reset(void *opaque)
+{
+ SpaprDrc *drc = SPAPR_DR_CONNECTOR(opaque);
+ SpaprDrcPhysical *drcp = SPAPR_DRC_PHYSICAL(drc);
+
+ if (drc->dev) {
+ drcp->dr_indicator = SPAPR_DR_INDICATOR_ACTIVE;
+ } else {
+ drcp->dr_indicator = SPAPR_DR_INDICATOR_INACTIVE;
+ }
+}
+
+static void realize_physical(DeviceState *d, Error **errp)
+{
+ SpaprDrcPhysical *drcp = SPAPR_DRC_PHYSICAL(d);
+ Error *local_err = NULL;
+
+ drc_realize(d, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ vmstate_register(VMSTATE_IF(drcp),
+ spapr_drc_index(SPAPR_DR_CONNECTOR(drcp)),
+ &vmstate_spapr_drc_physical, drcp);
+ qemu_register_reset(drc_physical_reset, drcp);
+}
+
+static void unrealize_physical(DeviceState *d)
+{
+ SpaprDrcPhysical *drcp = SPAPR_DRC_PHYSICAL(d);
+
+ drc_unrealize(d);
+ vmstate_unregister(VMSTATE_IF(drcp), &vmstate_spapr_drc_physical, drcp);
+ qemu_unregister_reset(drc_physical_reset, drcp);
+}
+
+static void spapr_drc_physical_class_init(ObjectClass *k, void *data)
+{
+ DeviceClass *dk = DEVICE_CLASS(k);
+ SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_CLASS(k);
+
+ dk->realize = realize_physical;
+ dk->unrealize = unrealize_physical;
+ drck->dr_entity_sense = physical_entity_sense;
+ drck->isolate = drc_isolate_physical;
+ drck->unisolate = drc_unisolate_physical;
+ drck->ready_state = SPAPR_DRC_STATE_PHYSICAL_CONFIGURED;
+ drck->empty_state = SPAPR_DRC_STATE_PHYSICAL_POWERON;
+}
+
+static void spapr_drc_logical_class_init(ObjectClass *k, void *data)
+{
+ SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_CLASS(k);
+
+ drck->dr_entity_sense = logical_entity_sense;
+ drck->isolate = drc_isolate_logical;
+ drck->unisolate = drc_unisolate_logical;
+ drck->ready_state = SPAPR_DRC_STATE_LOGICAL_CONFIGURED;
+ drck->empty_state = SPAPR_DRC_STATE_LOGICAL_UNUSABLE;
+}
+
+static void spapr_drc_cpu_class_init(ObjectClass *k, void *data)
+{
+ SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_CLASS(k);
+
+ drck->typeshift = SPAPR_DR_CONNECTOR_TYPE_SHIFT_CPU;
+ drck->typename = "CPU";
+ drck->drc_name_prefix = "CPU ";
+ drck->release = spapr_core_release;
+ drck->dt_populate = spapr_core_dt_populate;
+}
+
+static void spapr_drc_pci_class_init(ObjectClass *k, void *data)
+{
+ SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_CLASS(k);
+
+ drck->typeshift = SPAPR_DR_CONNECTOR_TYPE_SHIFT_PCI;
+ drck->typename = "28";
+ drck->drc_name_prefix = "C";
+ drck->release = spapr_phb_remove_pci_device_cb;
+ drck->dt_populate = spapr_pci_dt_populate;
+}
+
+static void spapr_drc_lmb_class_init(ObjectClass *k, void *data)
+{
+ SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_CLASS(k);
+
+ drck->typeshift = SPAPR_DR_CONNECTOR_TYPE_SHIFT_LMB;
+ drck->typename = "MEM";
+ drck->drc_name_prefix = "LMB ";
+ drck->release = spapr_lmb_release;
+ drck->dt_populate = spapr_lmb_dt_populate;
+}
+
+static void spapr_drc_phb_class_init(ObjectClass *k, void *data)
+{
+ SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_CLASS(k);
+
+ drck->typeshift = SPAPR_DR_CONNECTOR_TYPE_SHIFT_PHB;
+ drck->typename = "PHB";
+ drck->drc_name_prefix = "PHB ";
+ drck->release = spapr_phb_release;
+ drck->dt_populate = spapr_phb_dt_populate;
+}
+
+static void spapr_drc_pmem_class_init(ObjectClass *k, void *data)
+{
+ SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_CLASS(k);
+
+ drck->typeshift = SPAPR_DR_CONNECTOR_TYPE_SHIFT_PMEM;
+ drck->typename = "PMEM";
+ drck->drc_name_prefix = "PMEM ";
+ drck->release = NULL;
+ drck->dt_populate = spapr_pmem_dt_populate;
+}
+
+static const TypeInfo spapr_dr_connector_info = {
+ .name = TYPE_SPAPR_DR_CONNECTOR,
+ .parent = TYPE_DEVICE,
+ .instance_size = sizeof(SpaprDrc),
+ .instance_init = spapr_dr_connector_instance_init,
+ .class_size = sizeof(SpaprDrcClass),
+ .class_init = spapr_dr_connector_class_init,
+ .abstract = true,
+};
+
+static const TypeInfo spapr_drc_physical_info = {
+ .name = TYPE_SPAPR_DRC_PHYSICAL,
+ .parent = TYPE_SPAPR_DR_CONNECTOR,
+ .instance_size = sizeof(SpaprDrcPhysical),
+ .class_init = spapr_drc_physical_class_init,
+ .abstract = true,
+};
+
+static const TypeInfo spapr_drc_logical_info = {
+ .name = TYPE_SPAPR_DRC_LOGICAL,
+ .parent = TYPE_SPAPR_DR_CONNECTOR,
+ .class_init = spapr_drc_logical_class_init,
+ .abstract = true,
+};
+
+static const TypeInfo spapr_drc_cpu_info = {
+ .name = TYPE_SPAPR_DRC_CPU,
+ .parent = TYPE_SPAPR_DRC_LOGICAL,
+ .class_init = spapr_drc_cpu_class_init,
+};
+
+static const TypeInfo spapr_drc_pci_info = {
+ .name = TYPE_SPAPR_DRC_PCI,
+ .parent = TYPE_SPAPR_DRC_PHYSICAL,
+ .class_init = spapr_drc_pci_class_init,
+};
+
+static const TypeInfo spapr_drc_lmb_info = {
+ .name = TYPE_SPAPR_DRC_LMB,
+ .parent = TYPE_SPAPR_DRC_LOGICAL,
+ .class_init = spapr_drc_lmb_class_init,
+};
+
+static const TypeInfo spapr_drc_phb_info = {
+ .name = TYPE_SPAPR_DRC_PHB,
+ .parent = TYPE_SPAPR_DRC_LOGICAL,
+ .instance_size = sizeof(SpaprDrc),
+ .class_init = spapr_drc_phb_class_init,
+};
+
+static const TypeInfo spapr_drc_pmem_info = {
+ .name = TYPE_SPAPR_DRC_PMEM,
+ .parent = TYPE_SPAPR_DRC_LOGICAL,
+ .class_init = spapr_drc_pmem_class_init,
+};
+
+/* helper functions for external users */
+
+SpaprDrc *spapr_drc_by_index(uint32_t index)
+{
+ Object *obj;
+ gchar *name;
+
+ name = g_strdup_printf("%s/%x", DRC_CONTAINER_PATH, index);
+ obj = object_resolve_path(name, NULL);
+ g_free(name);
+
+ return !obj ? NULL : SPAPR_DR_CONNECTOR(obj);
+}
+
+SpaprDrc *spapr_drc_by_id(const char *type, uint32_t id)
+{
+ SpaprDrcClass *drck
+ = SPAPR_DR_CONNECTOR_CLASS(object_class_by_name(type));
+
+ return spapr_drc_by_index(drck->typeshift << DRC_INDEX_TYPE_SHIFT
+ | (id & DRC_INDEX_ID_MASK));
+}
+
+/**
+ * spapr_dt_drc
+ *
+ * @fdt: libfdt device tree
+ * @path: path in the DT to generate properties
+ * @owner: parent Object/DeviceState for which to generate DRC
+ * descriptions for
+ * @drc_type_mask: mask of SpaprDrcType values corresponding
+ * to the types of DRCs to generate entries for
+ *
+ * generate OF properties to describe DRC topology/indices to guests
+ *
+ * as documented in PAPR+ v2.1, 13.5.2
+ */
+int spapr_dt_drc(void *fdt, int offset, Object *owner, uint32_t drc_type_mask)
+{
+ Object *root_container;
+ ObjectProperty *prop;
+ ObjectPropertyIterator iter;
+ uint32_t drc_count = 0;
+ GArray *drc_indexes, *drc_power_domains;
+ GString *drc_names, *drc_types;
+ int ret;
+
+ /*
+ * This should really be only called once per node since it overwrites
+ * the OF properties if they already exist.
+ */
+ g_assert(!fdt_get_property(fdt, offset, "ibm,drc-indexes", NULL));
+
+ /* the first entry of each properties is a 32-bit integer encoding
+ * the number of elements in the array. we won't know this until
+ * we complete the iteration through all the matching DRCs, but
+ * reserve the space now and set the offsets accordingly so we
+ * can fill them in later.
+ */
+ drc_indexes = g_array_new(false, true, sizeof(uint32_t));
+ drc_indexes = g_array_set_size(drc_indexes, 1);
+ drc_power_domains = g_array_new(false, true, sizeof(uint32_t));
+ drc_power_domains = g_array_set_size(drc_power_domains, 1);
+ drc_names = g_string_set_size(g_string_new(NULL), sizeof(uint32_t));
+ drc_types = g_string_set_size(g_string_new(NULL), sizeof(uint32_t));
+
+ /* aliases for all DRConnector objects will be rooted in QOM
+ * composition tree at DRC_CONTAINER_PATH
+ */
+ root_container = container_get(object_get_root(), DRC_CONTAINER_PATH);
+
+ object_property_iter_init(&iter, root_container);
+ while ((prop = object_property_iter_next(&iter))) {
+ Object *obj;
+ SpaprDrc *drc;
+ SpaprDrcClass *drck;
+ char *drc_name = NULL;
+ uint32_t drc_index, drc_power_domain;
+
+ if (!strstart(prop->type, "link<", NULL)) {
+ continue;
+ }
+
+ obj = object_property_get_link(root_container, prop->name,
+ &error_abort);
+ drc = SPAPR_DR_CONNECTOR(obj);
+ drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+
+ if (owner && (drc->owner != owner)) {
+ continue;
+ }
+
+ if ((spapr_drc_type(drc) & drc_type_mask) == 0) {
+ continue;
+ }
+
+ drc_count++;
+
+ /* ibm,drc-indexes */
+ drc_index = cpu_to_be32(spapr_drc_index(drc));
+ g_array_append_val(drc_indexes, drc_index);
+
+ /* ibm,drc-power-domains */
+ drc_power_domain = cpu_to_be32(-1);
+ g_array_append_val(drc_power_domains, drc_power_domain);
+
+ /* ibm,drc-names */
+ drc_name = spapr_drc_name(drc);
+ drc_names = g_string_append(drc_names, drc_name);
+ drc_names = g_string_insert_len(drc_names, -1, "\0", 1);
+ g_free(drc_name);
+
+ /* ibm,drc-types */
+ drc_types = g_string_append(drc_types, drck->typename);
+ drc_types = g_string_insert_len(drc_types, -1, "\0", 1);
+ }
+
+ /* now write the drc count into the space we reserved at the
+ * beginning of the arrays previously
+ */
+ *(uint32_t *)drc_indexes->data = cpu_to_be32(drc_count);
+ *(uint32_t *)drc_power_domains->data = cpu_to_be32(drc_count);
+ *(uint32_t *)drc_names->str = cpu_to_be32(drc_count);
+ *(uint32_t *)drc_types->str = cpu_to_be32(drc_count);
+
+ ret = fdt_setprop(fdt, offset, "ibm,drc-indexes",
+ drc_indexes->data,
+ drc_indexes->len * sizeof(uint32_t));
+ if (ret) {
+ error_report("Couldn't create ibm,drc-indexes property");
+ goto out;
+ }
+
+ ret = fdt_setprop(fdt, offset, "ibm,drc-power-domains",
+ drc_power_domains->data,
+ drc_power_domains->len * sizeof(uint32_t));
+ if (ret) {
+ error_report("Couldn't finalize ibm,drc-power-domains property");
+ goto out;
+ }
+
+ ret = fdt_setprop(fdt, offset, "ibm,drc-names",
+ drc_names->str, drc_names->len);
+ if (ret) {
+ error_report("Couldn't finalize ibm,drc-names property");
+ goto out;
+ }
+
+ ret = fdt_setprop(fdt, offset, "ibm,drc-types",
+ drc_types->str, drc_types->len);
+ if (ret) {
+ error_report("Couldn't finalize ibm,drc-types property");
+ goto out;
+ }
+
+out:
+ g_array_free(drc_indexes, true);
+ g_array_free(drc_power_domains, true);
+ g_string_free(drc_names, true);
+ g_string_free(drc_types, true);
+
+ return ret;
+}
+
+void spapr_drc_reset_all(SpaprMachineState *spapr)
+{
+ Object *drc_container;
+ ObjectProperty *prop;
+ ObjectPropertyIterator iter;
+
+ drc_container = container_get(object_get_root(), DRC_CONTAINER_PATH);
+restart:
+ object_property_iter_init(&iter, drc_container);
+ while ((prop = object_property_iter_next(&iter))) {
+ SpaprDrc *drc;
+
+ if (!strstart(prop->type, "link<", NULL)) {
+ continue;
+ }
+ drc = SPAPR_DR_CONNECTOR(object_property_get_link(drc_container,
+ prop->name,
+ &error_abort));
+
+ /*
+ * This will complete any pending plug/unplug requests.
+ * In case of a unplugged PHB or PCI bridge, this will
+ * cause some DRCs to be destroyed and thus potentially
+ * invalidate the iterator.
+ */
+ if (spapr_drc_reset(drc)) {
+ goto restart;
+ }
+ }
+}
+
+/*
+ * RTAS calls
+ */
+
+static uint32_t rtas_set_isolation_state(uint32_t idx, uint32_t state)
+{
+ SpaprDrc *drc = spapr_drc_by_index(idx);
+ SpaprDrcClass *drck;
+
+ if (!drc) {
+ return RTAS_OUT_NO_SUCH_INDICATOR;
+ }
+
+ trace_spapr_drc_set_isolation_state(spapr_drc_index(drc), state);
+
+ drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+
+ switch (state) {
+ case SPAPR_DR_ISOLATION_STATE_ISOLATED:
+ return drck->isolate(drc);
+
+ case SPAPR_DR_ISOLATION_STATE_UNISOLATED:
+ return drck->unisolate(drc);
+
+ default:
+ return RTAS_OUT_PARAM_ERROR;
+ }
+}
+
+static uint32_t rtas_set_allocation_state(uint32_t idx, uint32_t state)
+{
+ SpaprDrc *drc = spapr_drc_by_index(idx);
+
+ if (!drc || !object_dynamic_cast(OBJECT(drc), TYPE_SPAPR_DRC_LOGICAL)) {
+ return RTAS_OUT_NO_SUCH_INDICATOR;
+ }
+
+ trace_spapr_drc_set_allocation_state(spapr_drc_index(drc), state);
+
+ switch (state) {
+ case SPAPR_DR_ALLOCATION_STATE_USABLE:
+ return drc_set_usable(drc);
+
+ case SPAPR_DR_ALLOCATION_STATE_UNUSABLE:
+ return drc_set_unusable(drc);
+
+ default:
+ return RTAS_OUT_PARAM_ERROR;
+ }
+}
+
+static uint32_t rtas_set_dr_indicator(uint32_t idx, uint32_t state)
+{
+ SpaprDrc *drc = spapr_drc_by_index(idx);
+
+ if (!drc || !object_dynamic_cast(OBJECT(drc), TYPE_SPAPR_DRC_PHYSICAL)) {
+ return RTAS_OUT_NO_SUCH_INDICATOR;
+ }
+ if ((state != SPAPR_DR_INDICATOR_INACTIVE)
+ && (state != SPAPR_DR_INDICATOR_ACTIVE)
+ && (state != SPAPR_DR_INDICATOR_IDENTIFY)
+ && (state != SPAPR_DR_INDICATOR_ACTION)) {
+ return RTAS_OUT_PARAM_ERROR; /* bad state parameter */
+ }
+
+ trace_spapr_drc_set_dr_indicator(idx, state);
+ SPAPR_DRC_PHYSICAL(drc)->dr_indicator = state;
+ return RTAS_OUT_SUCCESS;
+}
+
+static void rtas_set_indicator(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ uint32_t token,
+ uint32_t nargs, target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ uint32_t type, idx, state;
+ uint32_t ret = RTAS_OUT_SUCCESS;
+
+ if (nargs != 3 || nret != 1) {
+ ret = RTAS_OUT_PARAM_ERROR;
+ goto out;
+ }
+
+ type = rtas_ld(args, 0);
+ idx = rtas_ld(args, 1);
+ state = rtas_ld(args, 2);
+
+ switch (type) {
+ case RTAS_SENSOR_TYPE_ISOLATION_STATE:
+ ret = rtas_set_isolation_state(idx, state);
+ break;
+ case RTAS_SENSOR_TYPE_DR:
+ ret = rtas_set_dr_indicator(idx, state);
+ break;
+ case RTAS_SENSOR_TYPE_ALLOCATION_STATE:
+ ret = rtas_set_allocation_state(idx, state);
+ break;
+ default:
+ ret = RTAS_OUT_NOT_SUPPORTED;
+ }
+
+out:
+ rtas_st(rets, 0, ret);
+}
+
+static void rtas_get_sensor_state(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ uint32_t token, uint32_t nargs,
+ target_ulong args, uint32_t nret,
+ target_ulong rets)
+{
+ uint32_t sensor_type;
+ uint32_t sensor_index;
+ uint32_t sensor_state = 0;
+ SpaprDrc *drc;
+ SpaprDrcClass *drck;
+ uint32_t ret = RTAS_OUT_SUCCESS;
+
+ if (nargs != 2 || nret != 2) {
+ ret = RTAS_OUT_PARAM_ERROR;
+ goto out;
+ }
+
+ sensor_type = rtas_ld(args, 0);
+ sensor_index = rtas_ld(args, 1);
+
+ if (sensor_type != RTAS_SENSOR_TYPE_ENTITY_SENSE) {
+ /* currently only DR-related sensors are implemented */
+ trace_spapr_rtas_get_sensor_state_not_supported(sensor_index,
+ sensor_type);
+ ret = RTAS_OUT_NOT_SUPPORTED;
+ goto out;
+ }
+
+ drc = spapr_drc_by_index(sensor_index);
+ if (!drc) {
+ trace_spapr_rtas_get_sensor_state_invalid(sensor_index);
+ ret = RTAS_OUT_PARAM_ERROR;
+ goto out;
+ }
+ drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+ sensor_state = drck->dr_entity_sense(drc);
+
+out:
+ rtas_st(rets, 0, ret);
+ rtas_st(rets, 1, sensor_state);
+}
+
+/* configure-connector work area offsets, int32_t units for field
+ * indexes, bytes for field offset/len values.
+ *
+ * as documented by PAPR+ v2.7, 13.5.3.5
+ */
+#define CC_IDX_NODE_NAME_OFFSET 2
+#define CC_IDX_PROP_NAME_OFFSET 2
+#define CC_IDX_PROP_LEN 3
+#define CC_IDX_PROP_DATA_OFFSET 4
+#define CC_VAL_DATA_OFFSET ((CC_IDX_PROP_DATA_OFFSET + 1) * 4)
+#define CC_WA_LEN 4096
+
+static void configure_connector_st(target_ulong addr, target_ulong offset,
+ const void *buf, size_t len)
+{
+ cpu_physical_memory_write(ppc64_phys_to_real(addr + offset),
+ buf, MIN(len, CC_WA_LEN - offset));
+}
+
+static void rtas_ibm_configure_connector(PowerPCCPU *cpu,
+ SpaprMachineState *spapr,
+ uint32_t token, uint32_t nargs,
+ target_ulong args, uint32_t nret,
+ target_ulong rets)
+{
+ uint64_t wa_addr;
+ uint64_t wa_offset;
+ uint32_t drc_index;
+ SpaprDrc *drc;
+ SpaprDrcClass *drck;
+ SpaprDRCCResponse resp = SPAPR_DR_CC_RESPONSE_CONTINUE;
+ int rc;
+
+ if (nargs != 2 || nret != 1) {
+ rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+ return;
+ }
+
+ wa_addr = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 0);
+
+ drc_index = rtas_ld(wa_addr, 0);
+ drc = spapr_drc_by_index(drc_index);
+ if (!drc) {
+ trace_spapr_rtas_ibm_configure_connector_invalid(drc_index);
+ rc = RTAS_OUT_PARAM_ERROR;
+ goto out;
+ }
+
+ if ((drc->state != SPAPR_DRC_STATE_LOGICAL_UNISOLATE)
+ && (drc->state != SPAPR_DRC_STATE_PHYSICAL_UNISOLATE)
+ && (drc->state != SPAPR_DRC_STATE_LOGICAL_CONFIGURED)
+ && (drc->state != SPAPR_DRC_STATE_PHYSICAL_CONFIGURED)) {
+ /*
+ * Need to unisolate the device before configuring
+ * or it should already be in configured state to
+ * allow configure-connector be called repeatedly.
+ */
+ rc = SPAPR_DR_CC_RESPONSE_NOT_CONFIGURABLE;
+ goto out;
+ }
+
+ drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+
+ /*
+ * This indicates that the kernel is reconfiguring a LMB due to
+ * a failed hotunplug. Rollback the DIMM unplug process.
+ */
+ if (spapr_drc_type(drc) == SPAPR_DR_CONNECTOR_TYPE_LMB &&
+ drc->unplug_requested) {
+ spapr_memory_unplug_rollback(spapr, drc->dev);
+ }
+
+ if (!drc->fdt) {
+ void *fdt;
+ int fdt_size;
+
+ fdt = create_device_tree(&fdt_size);
+
+ if (drck->dt_populate(drc, spapr, fdt, &drc->fdt_start_offset,
+ NULL)) {
+ g_free(fdt);
+ rc = SPAPR_DR_CC_RESPONSE_ERROR;
+ goto out;
+ }
+
+ drc->fdt = fdt;
+ drc->ccs_offset = drc->fdt_start_offset;
+ drc->ccs_depth = 0;
+ }
+
+ do {
+ uint32_t tag;
+ const char *name;
+ const struct fdt_property *prop;
+ int fdt_offset_next, prop_len;
+
+ tag = fdt_next_tag(drc->fdt, drc->ccs_offset, &fdt_offset_next);
+
+ switch (tag) {
+ case FDT_BEGIN_NODE:
+ drc->ccs_depth++;
+ name = fdt_get_name(drc->fdt, drc->ccs_offset, NULL);
+
+ /* provide the name of the next OF node */
+ wa_offset = CC_VAL_DATA_OFFSET;
+ rtas_st(wa_addr, CC_IDX_NODE_NAME_OFFSET, wa_offset);
+ configure_connector_st(wa_addr, wa_offset, name, strlen(name) + 1);
+ resp = SPAPR_DR_CC_RESPONSE_NEXT_CHILD;
+ break;
+ case FDT_END_NODE:
+ drc->ccs_depth--;
+ if (drc->ccs_depth == 0) {
+ uint32_t drc_index = spapr_drc_index(drc);
+
+ /* done sending the device tree, move to configured state */
+ trace_spapr_drc_set_configured(drc_index);
+ drc->state = drck->ready_state;
+ /*
+ * Ensure that we are able to send the FDT fragment
+ * again via configure-connector call if the guest requests.
+ */
+ drc->ccs_offset = drc->fdt_start_offset;
+ drc->ccs_depth = 0;
+ fdt_offset_next = drc->fdt_start_offset;
+ resp = SPAPR_DR_CC_RESPONSE_SUCCESS;
+ } else {
+ resp = SPAPR_DR_CC_RESPONSE_PREV_PARENT;
+ }
+ break;
+ case FDT_PROP:
+ prop = fdt_get_property_by_offset(drc->fdt, drc->ccs_offset,
+ &prop_len);
+ name = fdt_string(drc->fdt, fdt32_to_cpu(prop->nameoff));
+
+ /* provide the name of the next OF property */
+ wa_offset = CC_VAL_DATA_OFFSET;
+ rtas_st(wa_addr, CC_IDX_PROP_NAME_OFFSET, wa_offset);
+ configure_connector_st(wa_addr, wa_offset, name, strlen(name) + 1);
+
+ /* provide the length and value of the OF property. data gets
+ * placed immediately after NULL terminator of the OF property's
+ * name string
+ */
+ wa_offset += strlen(name) + 1,
+ rtas_st(wa_addr, CC_IDX_PROP_LEN, prop_len);
+ rtas_st(wa_addr, CC_IDX_PROP_DATA_OFFSET, wa_offset);
+ configure_connector_st(wa_addr, wa_offset, prop->data, prop_len);
+ resp = SPAPR_DR_CC_RESPONSE_NEXT_PROPERTY;
+ break;
+ case FDT_END:
+ resp = SPAPR_DR_CC_RESPONSE_ERROR;
+ default:
+ /* keep seeking for an actionable tag */
+ break;
+ }
+ if (drc->ccs_offset >= 0) {
+ drc->ccs_offset = fdt_offset_next;
+ }
+ } while (resp == SPAPR_DR_CC_RESPONSE_CONTINUE);
+
+ rc = resp;
+out:
+ rtas_st(rets, 0, rc);
+}
+
+static void spapr_drc_register_types(void)
+{
+ type_register_static(&spapr_dr_connector_info);
+ type_register_static(&spapr_drc_physical_info);
+ type_register_static(&spapr_drc_logical_info);
+ type_register_static(&spapr_drc_cpu_info);
+ type_register_static(&spapr_drc_pci_info);
+ type_register_static(&spapr_drc_lmb_info);
+ type_register_static(&spapr_drc_phb_info);
+ type_register_static(&spapr_drc_pmem_info);
+
+ spapr_rtas_register(RTAS_SET_INDICATOR, "set-indicator",
+ rtas_set_indicator);
+ spapr_rtas_register(RTAS_GET_SENSOR_STATE, "get-sensor-state",
+ rtas_get_sensor_state);
+ spapr_rtas_register(RTAS_IBM_CONFIGURE_CONNECTOR, "ibm,configure-connector",
+ rtas_ibm_configure_connector);
+}
+type_init(spapr_drc_register_types)
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
new file mode 100644
index 000000000..630e86282
--- /dev/null
+++ b/hw/ppc/spapr_events.c
@@ -0,0 +1,1082 @@
+/*
+ * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator
+ *
+ * RTAS events handling
+ *
+ * Copyright (c) 2012 David Gibson, IBM Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "sysemu/device_tree.h"
+#include "sysemu/runstate.h"
+
+#include "hw/ppc/fdt.h"
+#include "hw/ppc/spapr.h"
+#include "hw/ppc/spapr_vio.h"
+#include "hw/pci/pci.h"
+#include "hw/irq.h"
+#include "hw/pci-host/spapr.h"
+#include "hw/ppc/spapr_drc.h"
+#include "qemu/help_option.h"
+#include "qemu/bcd.h"
+#include "qemu/main-loop.h"
+#include "hw/ppc/spapr_ovec.h"
+#include <libfdt.h>
+#include "migration/blocker.h"
+
+#define RTAS_LOG_VERSION_MASK 0xff000000
+#define RTAS_LOG_VERSION_6 0x06000000
+#define RTAS_LOG_SEVERITY_MASK 0x00e00000
+#define RTAS_LOG_SEVERITY_ALREADY_REPORTED 0x00c00000
+#define RTAS_LOG_SEVERITY_FATAL 0x00a00000
+#define RTAS_LOG_SEVERITY_ERROR 0x00800000
+#define RTAS_LOG_SEVERITY_ERROR_SYNC 0x00600000
+#define RTAS_LOG_SEVERITY_WARNING 0x00400000
+#define RTAS_LOG_SEVERITY_EVENT 0x00200000
+#define RTAS_LOG_SEVERITY_NO_ERROR 0x00000000
+#define RTAS_LOG_DISPOSITION_MASK 0x00180000
+#define RTAS_LOG_DISPOSITION_FULLY_RECOVERED 0x00000000
+#define RTAS_LOG_DISPOSITION_LIMITED_RECOVERY 0x00080000
+#define RTAS_LOG_DISPOSITION_NOT_RECOVERED 0x00100000
+#define RTAS_LOG_OPTIONAL_PART_PRESENT 0x00040000
+#define RTAS_LOG_INITIATOR_MASK 0x0000f000
+#define RTAS_LOG_INITIATOR_UNKNOWN 0x00000000
+#define RTAS_LOG_INITIATOR_CPU 0x00001000
+#define RTAS_LOG_INITIATOR_PCI 0x00002000
+#define RTAS_LOG_INITIATOR_MEMORY 0x00004000
+#define RTAS_LOG_INITIATOR_HOTPLUG 0x00006000
+#define RTAS_LOG_TARGET_MASK 0x00000f00
+#define RTAS_LOG_TARGET_UNKNOWN 0x00000000
+#define RTAS_LOG_TARGET_CPU 0x00000100
+#define RTAS_LOG_TARGET_PCI 0x00000200
+#define RTAS_LOG_TARGET_MEMORY 0x00000400
+#define RTAS_LOG_TARGET_HOTPLUG 0x00000600
+#define RTAS_LOG_TYPE_MASK 0x000000ff
+#define RTAS_LOG_TYPE_OTHER 0x00000000
+#define RTAS_LOG_TYPE_RETRY 0x00000001
+#define RTAS_LOG_TYPE_TCE_ERR 0x00000002
+#define RTAS_LOG_TYPE_INTERN_DEV_FAIL 0x00000003
+#define RTAS_LOG_TYPE_TIMEOUT 0x00000004
+#define RTAS_LOG_TYPE_DATA_PARITY 0x00000005
+#define RTAS_LOG_TYPE_ADDR_PARITY 0x00000006
+#define RTAS_LOG_TYPE_CACHE_PARITY 0x00000007
+#define RTAS_LOG_TYPE_ADDR_INVALID 0x00000008
+#define RTAS_LOG_TYPE_ECC_UNCORR 0x00000009
+#define RTAS_LOG_TYPE_ECC_CORR 0x0000000a
+#define RTAS_LOG_TYPE_EPOW 0x00000040
+#define RTAS_LOG_TYPE_HOTPLUG 0x000000e5
+
+struct rtas_error_log {
+ uint32_t summary;
+ uint32_t extended_length;
+} QEMU_PACKED;
+
+struct rtas_event_log_v6 {
+ uint8_t b0;
+#define RTAS_LOG_V6_B0_VALID 0x80
+#define RTAS_LOG_V6_B0_UNRECOVERABLE_ERROR 0x40
+#define RTAS_LOG_V6_B0_RECOVERABLE_ERROR 0x20
+#define RTAS_LOG_V6_B0_DEGRADED_OPERATION 0x10
+#define RTAS_LOG_V6_B0_PREDICTIVE_ERROR 0x08
+#define RTAS_LOG_V6_B0_NEW_LOG 0x04
+#define RTAS_LOG_V6_B0_BIGENDIAN 0x02
+ uint8_t _resv1;
+ uint8_t b2;
+#define RTAS_LOG_V6_B2_POWERPC_FORMAT 0x80
+#define RTAS_LOG_V6_B2_LOG_FORMAT_MASK 0x0f
+#define RTAS_LOG_V6_B2_LOG_FORMAT_PLATFORM_EVENT 0x0e
+ uint8_t _resv2[9];
+ uint32_t company;
+#define RTAS_LOG_V6_COMPANY_IBM 0x49424d00 /* IBM<null> */
+} QEMU_PACKED;
+
+struct rtas_event_log_v6_section_header {
+ uint16_t section_id;
+ uint16_t section_length;
+ uint8_t section_version;
+ uint8_t section_subtype;
+ uint16_t creator_component_id;
+} QEMU_PACKED;
+
+struct rtas_event_log_v6_maina {
+#define RTAS_LOG_V6_SECTION_ID_MAINA 0x5048 /* PH */
+ struct rtas_event_log_v6_section_header hdr;
+ uint32_t creation_date; /* BCD: YYYYMMDD */
+ uint32_t creation_time; /* BCD: HHMMSS00 */
+ uint8_t _platform1[8];
+ char creator_id;
+ uint8_t _resv1[2];
+ uint8_t section_count;
+ uint8_t _resv2[4];
+ uint8_t _platform2[8];
+ uint32_t plid;
+ uint8_t _platform3[4];
+} QEMU_PACKED;
+
+struct rtas_event_log_v6_mainb {
+#define RTAS_LOG_V6_SECTION_ID_MAINB 0x5548 /* UH */
+ struct rtas_event_log_v6_section_header hdr;
+ uint8_t subsystem_id;
+ uint8_t _platform1;
+ uint8_t event_severity;
+ uint8_t event_subtype;
+ uint8_t _platform2[4];
+ uint8_t _resv1[2];
+ uint16_t action_flags;
+ uint8_t _resv2[4];
+} QEMU_PACKED;
+
+struct rtas_event_log_v6_epow {
+#define RTAS_LOG_V6_SECTION_ID_EPOW 0x4550 /* EP */
+ struct rtas_event_log_v6_section_header hdr;
+ uint8_t sensor_value;
+#define RTAS_LOG_V6_EPOW_ACTION_RESET 0
+#define RTAS_LOG_V6_EPOW_ACTION_WARN_COOLING 1
+#define RTAS_LOG_V6_EPOW_ACTION_WARN_POWER 2
+#define RTAS_LOG_V6_EPOW_ACTION_SYSTEM_SHUTDOWN 3
+#define RTAS_LOG_V6_EPOW_ACTION_SYSTEM_HALT 4
+#define RTAS_LOG_V6_EPOW_ACTION_MAIN_ENCLOSURE 5
+#define RTAS_LOG_V6_EPOW_ACTION_POWER_OFF 7
+ uint8_t event_modifier;
+#define RTAS_LOG_V6_EPOW_MODIFIER_NORMAL 1
+#define RTAS_LOG_V6_EPOW_MODIFIER_ON_UPS 2
+#define RTAS_LOG_V6_EPOW_MODIFIER_CRITICAL 3
+#define RTAS_LOG_V6_EPOW_MODIFIER_TEMPERATURE 4
+ uint8_t extended_modifier;
+#define RTAS_LOG_V6_EPOW_XMODIFIER_SYSTEM_WIDE 0
+#define RTAS_LOG_V6_EPOW_XMODIFIER_PARTITION_SPECIFIC 1
+ uint8_t _resv;
+ uint64_t reason_code;
+} QEMU_PACKED;
+
+struct epow_extended_log {
+ struct rtas_event_log_v6 v6hdr;
+ struct rtas_event_log_v6_maina maina;
+ struct rtas_event_log_v6_mainb mainb;
+ struct rtas_event_log_v6_epow epow;
+} QEMU_PACKED;
+
+union drc_identifier {
+ uint32_t index;
+ uint32_t count;
+ struct {
+ uint32_t count;
+ uint32_t index;
+ } count_indexed;
+ char name[1];
+} QEMU_PACKED;
+
+struct rtas_event_log_v6_hp {
+#define RTAS_LOG_V6_SECTION_ID_HOTPLUG 0x4850 /* HP */
+ struct rtas_event_log_v6_section_header hdr;
+ uint8_t hotplug_type;
+#define RTAS_LOG_V6_HP_TYPE_CPU 1
+#define RTAS_LOG_V6_HP_TYPE_MEMORY 2
+#define RTAS_LOG_V6_HP_TYPE_SLOT 3
+#define RTAS_LOG_V6_HP_TYPE_PHB 4
+#define RTAS_LOG_V6_HP_TYPE_PCI 5
+#define RTAS_LOG_V6_HP_TYPE_PMEM 6
+ uint8_t hotplug_action;
+#define RTAS_LOG_V6_HP_ACTION_ADD 1
+#define RTAS_LOG_V6_HP_ACTION_REMOVE 2
+ uint8_t hotplug_identifier;
+#define RTAS_LOG_V6_HP_ID_DRC_NAME 1
+#define RTAS_LOG_V6_HP_ID_DRC_INDEX 2
+#define RTAS_LOG_V6_HP_ID_DRC_COUNT 3
+#define RTAS_LOG_V6_HP_ID_DRC_COUNT_INDEXED 4
+ uint8_t reserved;
+ union drc_identifier drc_id;
+} QEMU_PACKED;
+
+struct hp_extended_log {
+ struct rtas_event_log_v6 v6hdr;
+ struct rtas_event_log_v6_maina maina;
+ struct rtas_event_log_v6_mainb mainb;
+ struct rtas_event_log_v6_hp hp;
+} QEMU_PACKED;
+
+struct rtas_event_log_v6_mc {
+#define RTAS_LOG_V6_SECTION_ID_MC 0x4D43 /* MC */
+ struct rtas_event_log_v6_section_header hdr;
+ uint32_t fru_id;
+ uint32_t proc_id;
+ uint8_t error_type;
+#define RTAS_LOG_V6_MC_TYPE_UE 0
+#define RTAS_LOG_V6_MC_TYPE_SLB 1
+#define RTAS_LOG_V6_MC_TYPE_ERAT 2
+#define RTAS_LOG_V6_MC_TYPE_TLB 4
+#define RTAS_LOG_V6_MC_TYPE_D_CACHE 5
+#define RTAS_LOG_V6_MC_TYPE_I_CACHE 7
+ uint8_t sub_err_type;
+#define RTAS_LOG_V6_MC_UE_INDETERMINATE 0
+#define RTAS_LOG_V6_MC_UE_IFETCH 1
+#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH 2
+#define RTAS_LOG_V6_MC_UE_LOAD_STORE 3
+#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE 4
+#define RTAS_LOG_V6_MC_SLB_PARITY 0
+#define RTAS_LOG_V6_MC_SLB_MULTIHIT 1
+#define RTAS_LOG_V6_MC_SLB_INDETERMINATE 2
+#define RTAS_LOG_V6_MC_ERAT_PARITY 1
+#define RTAS_LOG_V6_MC_ERAT_MULTIHIT 2
+#define RTAS_LOG_V6_MC_ERAT_INDETERMINATE 3
+#define RTAS_LOG_V6_MC_TLB_PARITY 1
+#define RTAS_LOG_V6_MC_TLB_MULTIHIT 2
+#define RTAS_LOG_V6_MC_TLB_INDETERMINATE 3
+/*
+ * Per PAPR,
+ * For UE error type, set bit 1 of sub_err_type to indicate effective addr is
+ * provided. For other error types (SLB/ERAT/TLB), set bit 0 to indicate
+ * same.
+ */
+#define RTAS_LOG_V6_MC_UE_EA_ADDR_PROVIDED 0x40
+#define RTAS_LOG_V6_MC_EA_ADDR_PROVIDED 0x80
+ uint8_t reserved_1[6];
+ uint64_t effective_address;
+ uint64_t logical_address;
+} QEMU_PACKED;
+
+struct mc_extended_log {
+ struct rtas_event_log_v6 v6hdr;
+ struct rtas_event_log_v6_mc mc;
+} QEMU_PACKED;
+
+struct MC_ierror_table {
+ unsigned long srr1_mask;
+ unsigned long srr1_value;
+ bool nip_valid; /* nip is a valid indicator of faulting address */
+ uint8_t error_type;
+ uint8_t error_subtype;
+ unsigned int initiator;
+ unsigned int severity;
+};
+
+static const struct MC_ierror_table mc_ierror_table[] = {
+{ 0x00000000081c0000, 0x0000000000040000, true,
+ RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_IFETCH,
+ RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000000080000, true,
+ RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY,
+ RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x00000000000c0000, true,
+ RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT,
+ RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000000100000, true,
+ RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT,
+ RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000000140000, true,
+ RTAS_LOG_V6_MC_TYPE_TLB, RTAS_LOG_V6_MC_TLB_MULTIHIT,
+ RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000000180000, true,
+ RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH,
+ RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, } };
+
+struct MC_derror_table {
+ unsigned long dsisr_value;
+ bool dar_valid; /* dar is a valid indicator of faulting address */
+ uint8_t error_type;
+ uint8_t error_subtype;
+ unsigned int initiator;
+ unsigned int severity;
+};
+
+static const struct MC_derror_table mc_derror_table[] = {
+{ 0x00008000, false,
+ RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_LOAD_STORE,
+ RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00004000, true,
+ RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE,
+ RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000800, true,
+ RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT,
+ RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000400, true,
+ RTAS_LOG_V6_MC_TYPE_TLB, RTAS_LOG_V6_MC_TLB_MULTIHIT,
+ RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000080, true,
+ RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT, /* Before PARITY */
+ RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000100, true,
+ RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY,
+ RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, } };
+
+#define SRR1_MC_LOADSTORE(srr1) ((srr1) & PPC_BIT(42))
+
+typedef enum EventClass {
+ EVENT_CLASS_INTERNAL_ERRORS = 0,
+ EVENT_CLASS_EPOW = 1,
+ EVENT_CLASS_RESERVED = 2,
+ EVENT_CLASS_HOT_PLUG = 3,
+ EVENT_CLASS_IO = 4,
+ EVENT_CLASS_MAX
+} EventClassIndex;
+#define EVENT_CLASS_MASK(index) (1 << (31 - index))
+
+static const char * const event_names[EVENT_CLASS_MAX] = {
+ [EVENT_CLASS_INTERNAL_ERRORS] = "internal-errors",
+ [EVENT_CLASS_EPOW] = "epow-events",
+ [EVENT_CLASS_HOT_PLUG] = "hot-plug-events",
+ [EVENT_CLASS_IO] = "ibm,io-events",
+};
+
+struct SpaprEventSource {
+ int irq;
+ uint32_t mask;
+ bool enabled;
+};
+
+static SpaprEventSource *spapr_event_sources_new(void)
+{
+ return g_new0(SpaprEventSource, EVENT_CLASS_MAX);
+}
+
+static void spapr_event_sources_register(SpaprEventSource *event_sources,
+ EventClassIndex index, int irq)
+{
+ /* we only support 1 irq per event class at the moment */
+ g_assert(event_sources);
+ g_assert(!event_sources[index].enabled);
+ event_sources[index].irq = irq;
+ event_sources[index].mask = EVENT_CLASS_MASK(index);
+ event_sources[index].enabled = true;
+}
+
+static const SpaprEventSource *
+spapr_event_sources_get_source(SpaprEventSource *event_sources,
+ EventClassIndex index)
+{
+ g_assert(index < EVENT_CLASS_MAX);
+ g_assert(event_sources);
+
+ return &event_sources[index];
+}
+
+void spapr_dt_events(SpaprMachineState *spapr, void *fdt)
+{
+ uint32_t irq_ranges[EVENT_CLASS_MAX * 2];
+ int i, count = 0, event_sources;
+ SpaprEventSource *events = spapr->event_sources;
+
+ g_assert(events);
+
+ _FDT(event_sources = fdt_add_subnode(fdt, 0, "event-sources"));
+
+ for (i = 0, count = 0; i < EVENT_CLASS_MAX; i++) {
+ int node_offset;
+ uint32_t interrupts[2];
+ const SpaprEventSource *source =
+ spapr_event_sources_get_source(events, i);
+ const char *source_name = event_names[i];
+
+ if (!source->enabled) {
+ continue;
+ }
+
+ spapr_dt_irq(interrupts, source->irq, false);
+
+ _FDT(node_offset = fdt_add_subnode(fdt, event_sources, source_name));
+ _FDT(fdt_setprop(fdt, node_offset, "interrupts", interrupts,
+ sizeof(interrupts)));
+
+ irq_ranges[count++] = interrupts[0];
+ irq_ranges[count++] = cpu_to_be32(1);
+ }
+
+ _FDT((fdt_setprop(fdt, event_sources, "interrupt-controller", NULL, 0)));
+ _FDT((fdt_setprop_cell(fdt, event_sources, "#interrupt-cells", 2)));
+ _FDT((fdt_setprop(fdt, event_sources, "interrupt-ranges",
+ irq_ranges, count * sizeof(uint32_t))));
+}
+
+static const SpaprEventSource *
+rtas_event_log_to_source(SpaprMachineState *spapr, int log_type)
+{
+ const SpaprEventSource *source;
+
+ g_assert(spapr->event_sources);
+
+ switch (log_type) {
+ case RTAS_LOG_TYPE_HOTPLUG:
+ source = spapr_event_sources_get_source(spapr->event_sources,
+ EVENT_CLASS_HOT_PLUG);
+ if (spapr_ovec_test(spapr->ov5_cas, OV5_HP_EVT)) {
+ g_assert(source->enabled);
+ break;
+ }
+ /* fall through back to epow for legacy hotplug interrupt source */
+ case RTAS_LOG_TYPE_EPOW:
+ source = spapr_event_sources_get_source(spapr->event_sources,
+ EVENT_CLASS_EPOW);
+ break;
+ default:
+ source = NULL;
+ }
+
+ return source;
+}
+
+static int rtas_event_log_to_irq(SpaprMachineState *spapr, int log_type)
+{
+ const SpaprEventSource *source;
+
+ source = rtas_event_log_to_source(spapr, log_type);
+ g_assert(source);
+ g_assert(source->enabled);
+
+ return source->irq;
+}
+
+static uint32_t spapr_event_log_entry_type(SpaprEventLogEntry *entry)
+{
+ return entry->summary & RTAS_LOG_TYPE_MASK;
+}
+
+static void rtas_event_log_queue(SpaprMachineState *spapr,
+ SpaprEventLogEntry *entry)
+{
+ QTAILQ_INSERT_TAIL(&spapr->pending_events, entry, next);
+}
+
+static SpaprEventLogEntry *rtas_event_log_dequeue(SpaprMachineState *spapr,
+ uint32_t event_mask)
+{
+ SpaprEventLogEntry *entry = NULL;
+
+ QTAILQ_FOREACH(entry, &spapr->pending_events, next) {
+ const SpaprEventSource *source =
+ rtas_event_log_to_source(spapr,
+ spapr_event_log_entry_type(entry));
+
+ g_assert(source);
+ if (source->mask & event_mask) {
+ break;
+ }
+ }
+
+ if (entry) {
+ QTAILQ_REMOVE(&spapr->pending_events, entry, next);
+ }
+
+ return entry;
+}
+
+static bool rtas_event_log_contains(SpaprMachineState *spapr, uint32_t event_mask)
+{
+ SpaprEventLogEntry *entry = NULL;
+
+ QTAILQ_FOREACH(entry, &spapr->pending_events, next) {
+ const SpaprEventSource *source =
+ rtas_event_log_to_source(spapr,
+ spapr_event_log_entry_type(entry));
+
+ if (source->mask & event_mask) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static uint32_t next_plid;
+
+static void spapr_init_v6hdr(struct rtas_event_log_v6 *v6hdr)
+{
+ v6hdr->b0 = RTAS_LOG_V6_B0_VALID | RTAS_LOG_V6_B0_NEW_LOG
+ | RTAS_LOG_V6_B0_BIGENDIAN;
+ v6hdr->b2 = RTAS_LOG_V6_B2_POWERPC_FORMAT
+ | RTAS_LOG_V6_B2_LOG_FORMAT_PLATFORM_EVENT;
+ v6hdr->company = cpu_to_be32(RTAS_LOG_V6_COMPANY_IBM);
+}
+
+static void spapr_init_maina(SpaprMachineState *spapr,
+ struct rtas_event_log_v6_maina *maina,
+ int section_count)
+{
+ struct tm tm;
+ int year;
+
+ maina->hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_MAINA);
+ maina->hdr.section_length = cpu_to_be16(sizeof(*maina));
+ /* FIXME: section version, subtype and creator id? */
+ spapr_rtc_read(&spapr->rtc, &tm, NULL);
+ year = tm.tm_year + 1900;
+ maina->creation_date = cpu_to_be32((to_bcd(year / 100) << 24)
+ | (to_bcd(year % 100) << 16)
+ | (to_bcd(tm.tm_mon + 1) << 8)
+ | to_bcd(tm.tm_mday));
+ maina->creation_time = cpu_to_be32((to_bcd(tm.tm_hour) << 24)
+ | (to_bcd(tm.tm_min) << 16)
+ | (to_bcd(tm.tm_sec) << 8));
+ maina->creator_id = 'H'; /* Hypervisor */
+ maina->section_count = section_count;
+ maina->plid = next_plid++;
+}
+
+static void spapr_powerdown_req(Notifier *n, void *opaque)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+ SpaprEventLogEntry *entry;
+ struct rtas_event_log_v6 *v6hdr;
+ struct rtas_event_log_v6_maina *maina;
+ struct rtas_event_log_v6_mainb *mainb;
+ struct rtas_event_log_v6_epow *epow;
+ struct epow_extended_log *new_epow;
+
+ entry = g_new(SpaprEventLogEntry, 1);
+ new_epow = g_malloc0(sizeof(*new_epow));
+ entry->extended_log = new_epow;
+
+ v6hdr = &new_epow->v6hdr;
+ maina = &new_epow->maina;
+ mainb = &new_epow->mainb;
+ epow = &new_epow->epow;
+
+ entry->summary = RTAS_LOG_VERSION_6
+ | RTAS_LOG_SEVERITY_EVENT
+ | RTAS_LOG_DISPOSITION_NOT_RECOVERED
+ | RTAS_LOG_OPTIONAL_PART_PRESENT
+ | RTAS_LOG_TYPE_EPOW;
+ entry->extended_length = sizeof(*new_epow);
+
+ spapr_init_v6hdr(v6hdr);
+ spapr_init_maina(spapr, maina, 3 /* Main-A, Main-B and EPOW */);
+
+ mainb->hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_MAINB);
+ mainb->hdr.section_length = cpu_to_be16(sizeof(*mainb));
+ /* FIXME: section version, subtype and creator id? */
+ mainb->subsystem_id = 0xa0; /* External environment */
+ mainb->event_severity = 0x00; /* Informational / non-error */
+ mainb->event_subtype = 0xd0; /* Normal shutdown */
+
+ epow->hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_EPOW);
+ epow->hdr.section_length = cpu_to_be16(sizeof(*epow));
+ epow->hdr.section_version = 2; /* includes extended modifier */
+ /* FIXME: section subtype and creator id? */
+ epow->sensor_value = RTAS_LOG_V6_EPOW_ACTION_SYSTEM_SHUTDOWN;
+ epow->event_modifier = RTAS_LOG_V6_EPOW_MODIFIER_NORMAL;
+ epow->extended_modifier = RTAS_LOG_V6_EPOW_XMODIFIER_PARTITION_SPECIFIC;
+
+ rtas_event_log_queue(spapr, entry);
+
+ qemu_irq_pulse(spapr_qirq(spapr,
+ rtas_event_log_to_irq(spapr, RTAS_LOG_TYPE_EPOW)));
+}
+
+static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action,
+ SpaprDrcType drc_type,
+ union drc_identifier *drc_id)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+ SpaprEventLogEntry *entry;
+ struct hp_extended_log *new_hp;
+ struct rtas_event_log_v6 *v6hdr;
+ struct rtas_event_log_v6_maina *maina;
+ struct rtas_event_log_v6_mainb *mainb;
+ struct rtas_event_log_v6_hp *hp;
+
+ entry = g_new(SpaprEventLogEntry, 1);
+ new_hp = g_malloc0(sizeof(struct hp_extended_log));
+ entry->extended_log = new_hp;
+
+ v6hdr = &new_hp->v6hdr;
+ maina = &new_hp->maina;
+ mainb = &new_hp->mainb;
+ hp = &new_hp->hp;
+
+ entry->summary = RTAS_LOG_VERSION_6
+ | RTAS_LOG_SEVERITY_EVENT
+ | RTAS_LOG_DISPOSITION_NOT_RECOVERED
+ | RTAS_LOG_OPTIONAL_PART_PRESENT
+ | RTAS_LOG_INITIATOR_HOTPLUG
+ | RTAS_LOG_TYPE_HOTPLUG;
+ entry->extended_length = sizeof(*new_hp);
+
+ spapr_init_v6hdr(v6hdr);
+ spapr_init_maina(spapr, maina, 3 /* Main-A, Main-B, HP */);
+
+ mainb->hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_MAINB);
+ mainb->hdr.section_length = cpu_to_be16(sizeof(*mainb));
+ mainb->subsystem_id = 0x80; /* External environment */
+ mainb->event_severity = 0x00; /* Informational / non-error */
+ mainb->event_subtype = 0x00; /* Normal shutdown */
+
+ hp->hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_HOTPLUG);
+ hp->hdr.section_length = cpu_to_be16(sizeof(*hp));
+ hp->hdr.section_version = 1; /* includes extended modifier */
+ hp->hotplug_action = hp_action;
+ hp->hotplug_identifier = hp_id;
+
+ switch (drc_type) {
+ case SPAPR_DR_CONNECTOR_TYPE_PCI:
+ hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_PCI;
+ break;
+ case SPAPR_DR_CONNECTOR_TYPE_LMB:
+ hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_MEMORY;
+ break;
+ case SPAPR_DR_CONNECTOR_TYPE_CPU:
+ hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_CPU;
+ break;
+ case SPAPR_DR_CONNECTOR_TYPE_PHB:
+ hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_PHB;
+ break;
+ case SPAPR_DR_CONNECTOR_TYPE_PMEM:
+ hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_PMEM;
+ break;
+ default:
+ /* we shouldn't be signaling hotplug events for resources
+ * that don't support them
+ */
+ g_assert(false);
+ return;
+ }
+
+ if (hp_id == RTAS_LOG_V6_HP_ID_DRC_COUNT) {
+ hp->drc_id.count = cpu_to_be32(drc_id->count);
+ } else if (hp_id == RTAS_LOG_V6_HP_ID_DRC_INDEX) {
+ hp->drc_id.index = cpu_to_be32(drc_id->index);
+ } else if (hp_id == RTAS_LOG_V6_HP_ID_DRC_COUNT_INDEXED) {
+ /* we should not be using count_indexed value unless the guest
+ * supports dedicated hotplug event source
+ */
+ g_assert(spapr_memory_hot_unplug_supported(spapr));
+ hp->drc_id.count_indexed.count =
+ cpu_to_be32(drc_id->count_indexed.count);
+ hp->drc_id.count_indexed.index =
+ cpu_to_be32(drc_id->count_indexed.index);
+ }
+
+ rtas_event_log_queue(spapr, entry);
+
+ qemu_irq_pulse(spapr_qirq(spapr,
+ rtas_event_log_to_irq(spapr, RTAS_LOG_TYPE_HOTPLUG)));
+}
+
+void spapr_hotplug_req_add_by_index(SpaprDrc *drc)
+{
+ SpaprDrcType drc_type = spapr_drc_type(drc);
+ union drc_identifier drc_id;
+
+ drc_id.index = spapr_drc_index(drc);
+ spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_INDEX,
+ RTAS_LOG_V6_HP_ACTION_ADD, drc_type, &drc_id);
+}
+
+void spapr_hotplug_req_remove_by_index(SpaprDrc *drc)
+{
+ SpaprDrcType drc_type = spapr_drc_type(drc);
+ union drc_identifier drc_id;
+
+ drc_id.index = spapr_drc_index(drc);
+ spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_INDEX,
+ RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id);
+}
+
+void spapr_hotplug_req_add_by_count(SpaprDrcType drc_type,
+ uint32_t count)
+{
+ union drc_identifier drc_id;
+
+ drc_id.count = count;
+ spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_COUNT,
+ RTAS_LOG_V6_HP_ACTION_ADD, drc_type, &drc_id);
+}
+
+void spapr_hotplug_req_remove_by_count(SpaprDrcType drc_type,
+ uint32_t count)
+{
+ union drc_identifier drc_id;
+
+ drc_id.count = count;
+ spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_COUNT,
+ RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id);
+}
+
+void spapr_hotplug_req_add_by_count_indexed(SpaprDrcType drc_type,
+ uint32_t count, uint32_t index)
+{
+ union drc_identifier drc_id;
+
+ drc_id.count_indexed.count = count;
+ drc_id.count_indexed.index = index;
+ spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_COUNT_INDEXED,
+ RTAS_LOG_V6_HP_ACTION_ADD, drc_type, &drc_id);
+}
+
+void spapr_hotplug_req_remove_by_count_indexed(SpaprDrcType drc_type,
+ uint32_t count, uint32_t index)
+{
+ union drc_identifier drc_id;
+
+ drc_id.count_indexed.count = count;
+ drc_id.count_indexed.index = index;
+ spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_COUNT_INDEXED,
+ RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id);
+}
+
+static void spapr_mc_set_ea_provided_flag(struct mc_extended_log *ext_elog)
+{
+ switch (ext_elog->mc.error_type) {
+ case RTAS_LOG_V6_MC_TYPE_UE:
+ ext_elog->mc.sub_err_type |= RTAS_LOG_V6_MC_UE_EA_ADDR_PROVIDED;
+ break;
+ case RTAS_LOG_V6_MC_TYPE_SLB:
+ case RTAS_LOG_V6_MC_TYPE_ERAT:
+ case RTAS_LOG_V6_MC_TYPE_TLB:
+ ext_elog->mc.sub_err_type |= RTAS_LOG_V6_MC_EA_ADDR_PROVIDED;
+ break;
+ default:
+ break;
+ }
+}
+
+static uint32_t spapr_mce_get_elog_type(PowerPCCPU *cpu, bool recovered,
+ struct mc_extended_log *ext_elog)
+{
+ int i;
+ CPUPPCState *env = &cpu->env;
+ uint32_t summary;
+ uint64_t dsisr = env->spr[SPR_DSISR];
+
+ summary = RTAS_LOG_VERSION_6 | RTAS_LOG_OPTIONAL_PART_PRESENT;
+ if (recovered) {
+ summary |= RTAS_LOG_DISPOSITION_FULLY_RECOVERED;
+ } else {
+ summary |= RTAS_LOG_DISPOSITION_NOT_RECOVERED;
+ }
+
+ if (SRR1_MC_LOADSTORE(env->spr[SPR_SRR1])) {
+ for (i = 0; i < ARRAY_SIZE(mc_derror_table); i++) {
+ if (!(dsisr & mc_derror_table[i].dsisr_value)) {
+ continue;
+ }
+
+ ext_elog->mc.error_type = mc_derror_table[i].error_type;
+ ext_elog->mc.sub_err_type = mc_derror_table[i].error_subtype;
+ if (mc_derror_table[i].dar_valid) {
+ ext_elog->mc.effective_address = cpu_to_be64(env->spr[SPR_DAR]);
+ spapr_mc_set_ea_provided_flag(ext_elog);
+ }
+
+ summary |= mc_derror_table[i].initiator
+ | mc_derror_table[i].severity;
+
+ return summary;
+ }
+ } else {
+ for (i = 0; i < ARRAY_SIZE(mc_ierror_table); i++) {
+ if ((env->spr[SPR_SRR1] & mc_ierror_table[i].srr1_mask) !=
+ mc_ierror_table[i].srr1_value) {
+ continue;
+ }
+
+ ext_elog->mc.error_type = mc_ierror_table[i].error_type;
+ ext_elog->mc.sub_err_type = mc_ierror_table[i].error_subtype;
+ if (mc_ierror_table[i].nip_valid) {
+ ext_elog->mc.effective_address = cpu_to_be64(env->nip);
+ spapr_mc_set_ea_provided_flag(ext_elog);
+ }
+
+ summary |= mc_ierror_table[i].initiator
+ | mc_ierror_table[i].severity;
+
+ return summary;
+ }
+ }
+
+ summary |= RTAS_LOG_INITIATOR_CPU;
+ return summary;
+}
+
+static void spapr_mce_dispatch_elog(SpaprMachineState *spapr, PowerPCCPU *cpu,
+ bool recovered)
+{
+ CPUState *cs = CPU(cpu);
+ CPUPPCState *env = &cpu->env;
+ uint64_t rtas_addr;
+ struct rtas_error_log log;
+ struct mc_extended_log *ext_elog;
+ uint32_t summary;
+
+ ext_elog = g_malloc0(sizeof(*ext_elog));
+ summary = spapr_mce_get_elog_type(cpu, recovered, ext_elog);
+
+ log.summary = cpu_to_be32(summary);
+ log.extended_length = cpu_to_be32(sizeof(*ext_elog));
+
+ spapr_init_v6hdr(&ext_elog->v6hdr);
+ ext_elog->mc.hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_MC);
+ ext_elog->mc.hdr.section_length =
+ cpu_to_be16(sizeof(struct rtas_event_log_v6_mc));
+ ext_elog->mc.hdr.section_version = 1;
+
+ /* get rtas addr from fdt */
+ rtas_addr = spapr_get_rtas_addr();
+ if (!rtas_addr) {
+ if (!recovered) {
+ error_report(
+"FWNMI: Unable to deliver machine check to guest: rtas_addr not found.");
+ qemu_system_guest_panicked(NULL);
+ } else {
+ warn_report(
+"FWNMI: Unable to deliver machine check to guest: rtas_addr not found. "
+"Machine check recovered.");
+ }
+ g_free(ext_elog);
+ return;
+ }
+
+ /*
+ * By taking the interlock, we assume that the MCE will be
+ * delivered to the guest. CAUTION: don't add anything that could
+ * prevent the MCE to be delivered after this line, otherwise the
+ * guest won't be able to release the interlock and ultimately
+ * hang/crash?
+ */
+ spapr->fwnmi_machine_check_interlock = cpu->vcpu_id;
+
+ stq_be_phys(&address_space_memory, rtas_addr + RTAS_ERROR_LOG_OFFSET,
+ env->gpr[3]);
+ cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET +
+ sizeof(env->gpr[3]), &log, sizeof(log));
+ cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET +
+ sizeof(env->gpr[3]) + sizeof(log), ext_elog,
+ sizeof(*ext_elog));
+ g_free(ext_elog);
+
+ env->gpr[3] = rtas_addr + RTAS_ERROR_LOG_OFFSET;
+
+ ppc_cpu_do_fwnmi_machine_check(cs, spapr->fwnmi_machine_check_addr);
+}
+
+void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+ CPUState *cs = CPU(cpu);
+ int ret;
+
+ if (spapr->fwnmi_machine_check_addr == -1) {
+ /* Non-FWNMI case, deliver it like an architected CPU interrupt. */
+ cs->exception_index = POWERPC_EXCP_MCHECK;
+ ppc_cpu_do_interrupt(cs);
+ return;
+ }
+
+ /* Wait for FWNMI interlock. */
+ while (spapr->fwnmi_machine_check_interlock != -1) {
+ /*
+ * Check whether the same CPU got machine check error
+ * while still handling the mc error (i.e., before
+ * that CPU called "ibm,nmi-interlock")
+ */
+ if (spapr->fwnmi_machine_check_interlock == cpu->vcpu_id) {
+ if (!recovered) {
+ error_report(
+"FWNMI: Unable to deliver machine check to guest: nested machine check.");
+ qemu_system_guest_panicked(NULL);
+ } else {
+ warn_report(
+"FWNMI: Unable to deliver machine check to guest: nested machine check. "
+"Machine check recovered.");
+ }
+ return;
+ }
+ qemu_cond_wait_iothread(&spapr->fwnmi_machine_check_interlock_cond);
+ if (spapr->fwnmi_machine_check_addr == -1) {
+ /*
+ * If the machine was reset while waiting for the interlock,
+ * abort the delivery. The machine check applies to a context
+ * that no longer exists, so it wouldn't make sense to deliver
+ * it now.
+ */
+ return;
+ }
+ }
+
+ /*
+ * Try to block migration while FWNMI is being handled, so the
+ * machine check handler runs where the information passed to it
+ * actually makes sense. This shouldn't actually block migration,
+ * only delay it slightly, assuming migration is retried. If the
+ * attempt to block fails, carry on. Unfortunately, it always
+ * fails when running with -only-migrate. A proper interface to
+ * delay migration completion for a bit could avoid that.
+ */
+ ret = migrate_add_blocker(spapr->fwnmi_migration_blocker, NULL);
+ if (ret == -EBUSY) {
+ warn_report("Received a fwnmi while migration was in progress");
+ }
+
+ spapr_mce_dispatch_elog(spapr, cpu, recovered);
+}
+
+static void check_exception(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ uint32_t token, uint32_t nargs,
+ target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ uint32_t mask, buf, len, event_len;
+ SpaprEventLogEntry *event;
+ struct rtas_error_log header;
+ int i;
+
+ if ((nargs < 6) || (nargs > 7) || nret != 1) {
+ rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+ return;
+ }
+
+ mask = rtas_ld(args, 2);
+ buf = rtas_ld(args, 4);
+ len = rtas_ld(args, 5);
+
+ event = rtas_event_log_dequeue(spapr, mask);
+ if (!event) {
+ goto out_no_events;
+ }
+
+ event_len = event->extended_length + sizeof(header);
+
+ if (event_len < len) {
+ len = event_len;
+ }
+
+ header.summary = cpu_to_be32(event->summary);
+ header.extended_length = cpu_to_be32(event->extended_length);
+ cpu_physical_memory_write(buf, &header, sizeof(header));
+ cpu_physical_memory_write(buf + sizeof(header), event->extended_log,
+ event->extended_length);
+ rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+ g_free(event->extended_log);
+ g_free(event);
+
+ /* according to PAPR+, the IRQ must be left asserted, or re-asserted, if
+ * there are still pending events to be fetched via check-exception. We
+ * do the latter here, since our code relies on edge-triggered
+ * interrupts.
+ */
+ for (i = 0; i < EVENT_CLASS_MAX; i++) {
+ if (rtas_event_log_contains(spapr, EVENT_CLASS_MASK(i))) {
+ const SpaprEventSource *source =
+ spapr_event_sources_get_source(spapr->event_sources, i);
+
+ g_assert(source->enabled);
+ qemu_irq_pulse(spapr_qirq(spapr, source->irq));
+ }
+ }
+
+ return;
+
+out_no_events:
+ rtas_st(rets, 0, RTAS_OUT_NO_ERRORS_FOUND);
+}
+
+static void event_scan(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ uint32_t token, uint32_t nargs,
+ target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ int i;
+ if (nargs != 4 || nret != 1) {
+ rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+ return;
+ }
+
+ for (i = 0; i < EVENT_CLASS_MAX; i++) {
+ if (rtas_event_log_contains(spapr, EVENT_CLASS_MASK(i))) {
+ const SpaprEventSource *source =
+ spapr_event_sources_get_source(spapr->event_sources, i);
+
+ g_assert(source->enabled);
+ qemu_irq_pulse(spapr_qirq(spapr, source->irq));
+ }
+ }
+
+ rtas_st(rets, 0, RTAS_OUT_NO_ERRORS_FOUND);
+}
+
+void spapr_clear_pending_events(SpaprMachineState *spapr)
+{
+ SpaprEventLogEntry *entry = NULL, *next_entry;
+
+ QTAILQ_FOREACH_SAFE(entry, &spapr->pending_events, next, next_entry) {
+ QTAILQ_REMOVE(&spapr->pending_events, entry, next);
+ g_free(entry->extended_log);
+ g_free(entry);
+ }
+}
+
+void spapr_clear_pending_hotplug_events(SpaprMachineState *spapr)
+{
+ SpaprEventLogEntry *entry = NULL, *next_entry;
+
+ QTAILQ_FOREACH_SAFE(entry, &spapr->pending_events, next, next_entry) {
+ if (spapr_event_log_entry_type(entry) == RTAS_LOG_TYPE_HOTPLUG) {
+ QTAILQ_REMOVE(&spapr->pending_events, entry, next);
+ g_free(entry->extended_log);
+ g_free(entry);
+ }
+ }
+}
+
+void spapr_events_init(SpaprMachineState *spapr)
+{
+ int epow_irq = SPAPR_IRQ_EPOW;
+
+ if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
+ epow_irq = spapr_irq_findone(spapr, &error_fatal);
+ }
+
+ spapr_irq_claim(spapr, epow_irq, false, &error_fatal);
+
+ QTAILQ_INIT(&spapr->pending_events);
+
+ spapr->event_sources = spapr_event_sources_new();
+
+ spapr_event_sources_register(spapr->event_sources, EVENT_CLASS_EPOW,
+ epow_irq);
+
+ /* NOTE: if machine supports modern/dedicated hotplug event source,
+ * we add it to the device-tree unconditionally. This means we may
+ * have cases where the source is enabled in QEMU, but unused by the
+ * guest because it does not support modern hotplug events, so we
+ * take care to rely on checking for negotiation of OV5_HP_EVT option
+ * before attempting to use it to signal events, rather than simply
+ * checking that it's enabled.
+ */
+ if (spapr->use_hotplug_event_source) {
+ int hp_irq = SPAPR_IRQ_HOTPLUG;
+
+ if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
+ hp_irq = spapr_irq_findone(spapr, &error_fatal);
+ }
+
+ spapr_irq_claim(spapr, hp_irq, false, &error_fatal);
+
+ spapr_event_sources_register(spapr->event_sources, EVENT_CLASS_HOT_PLUG,
+ hp_irq);
+ }
+
+ spapr->epow_notifier.notify = spapr_powerdown_req;
+ qemu_register_powerdown_notifier(&spapr->epow_notifier);
+ spapr_rtas_register(RTAS_CHECK_EXCEPTION, "check-exception",
+ check_exception);
+ spapr_rtas_register(RTAS_EVENT_SCAN, "event-scan", event_scan);
+}
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
new file mode 100644
index 000000000..222c1b6bb
--- /dev/null
+++ b/hw/ppc/spapr_hcall.c
@@ -0,0 +1,1557 @@
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
+#include "qapi/error.h"
+#include "sysemu/hw_accel.h"
+#include "sysemu/runstate.h"
+#include "qemu/log.h"
+#include "qemu/main-loop.h"
+#include "qemu/module.h"
+#include "qemu/error-report.h"
+#include "exec/exec-all.h"
+#include "helper_regs.h"
+#include "hw/ppc/spapr.h"
+#include "hw/ppc/spapr_cpu_core.h"
+#include "mmu-hash64.h"
+#include "cpu-models.h"
+#include "trace.h"
+#include "kvm_ppc.h"
+#include "hw/ppc/fdt.h"
+#include "hw/ppc/spapr_ovec.h"
+#include "hw/ppc/spapr_numa.h"
+#include "mmu-book3s-v3.h"
+#include "hw/mem/memory-device.h"
+
+bool is_ram_address(SpaprMachineState *spapr, hwaddr addr)
+{
+ MachineState *machine = MACHINE(spapr);
+ DeviceMemoryState *dms = machine->device_memory;
+
+ if (addr < machine->ram_size) {
+ return true;
+ }
+ if ((addr >= dms->base)
+ && ((addr - dms->base) < memory_region_size(&dms->mr))) {
+ return true;
+ }
+
+ return false;
+}
+
+/* Convert a return code from the KVM ioctl()s implementing resize HPT
+ * into a PAPR hypercall return code */
+static target_ulong resize_hpt_convert_rc(int ret)
+{
+ if (ret >= 100000) {
+ return H_LONG_BUSY_ORDER_100_SEC;
+ } else if (ret >= 10000) {
+ return H_LONG_BUSY_ORDER_10_SEC;
+ } else if (ret >= 1000) {
+ return H_LONG_BUSY_ORDER_1_SEC;
+ } else if (ret >= 100) {
+ return H_LONG_BUSY_ORDER_100_MSEC;
+ } else if (ret >= 10) {
+ return H_LONG_BUSY_ORDER_10_MSEC;
+ } else if (ret > 0) {
+ return H_LONG_BUSY_ORDER_1_MSEC;
+ }
+
+ switch (ret) {
+ case 0:
+ return H_SUCCESS;
+ case -EPERM:
+ return H_AUTHORITY;
+ case -EINVAL:
+ return H_PARAMETER;
+ case -ENXIO:
+ return H_CLOSED;
+ case -ENOSPC:
+ return H_PTEG_FULL;
+ case -EBUSY:
+ return H_BUSY;
+ case -ENOMEM:
+ return H_NO_MEM;
+ default:
+ return H_HARDWARE;
+ }
+}
+
+static target_ulong h_resize_hpt_prepare(PowerPCCPU *cpu,
+ SpaprMachineState *spapr,
+ target_ulong opcode,
+ target_ulong *args)
+{
+ target_ulong flags = args[0];
+ int shift = args[1];
+ uint64_t current_ram_size;
+ int rc;
+
+ if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DISABLED) {
+ return H_AUTHORITY;
+ }
+
+ if (!spapr->htab_shift) {
+ /* Radix guest, no HPT */
+ return H_NOT_AVAILABLE;
+ }
+
+ trace_spapr_h_resize_hpt_prepare(flags, shift);
+
+ if (flags != 0) {
+ return H_PARAMETER;
+ }
+
+ if (shift && ((shift < 18) || (shift > 46))) {
+ return H_PARAMETER;
+ }
+
+ current_ram_size = MACHINE(spapr)->ram_size + get_plugged_memory_size();
+
+ /* We only allow the guest to allocate an HPT one order above what
+ * we'd normally give them (to stop a small guest claiming a huge
+ * chunk of resources in the HPT */
+ if (shift > (spapr_hpt_shift_for_ramsize(current_ram_size) + 1)) {
+ return H_RESOURCE;
+ }
+
+ rc = kvmppc_resize_hpt_prepare(cpu, flags, shift);
+ if (rc != -ENOSYS) {
+ return resize_hpt_convert_rc(rc);
+ }
+
+ if (kvm_enabled()) {
+ return H_HARDWARE;
+ }
+
+ return softmmu_resize_hpt_prepare(cpu, spapr, shift);
+}
+
+static void do_push_sregs_to_kvm_pr(CPUState *cs, run_on_cpu_data data)
+{
+ int ret;
+
+ cpu_synchronize_state(cs);
+
+ ret = kvmppc_put_books_sregs(POWERPC_CPU(cs));
+ if (ret < 0) {
+ error_report("failed to push sregs to KVM: %s", strerror(-ret));
+ exit(1);
+ }
+}
+
+void push_sregs_to_kvm_pr(SpaprMachineState *spapr)
+{
+ CPUState *cs;
+
+ /*
+ * This is a hack for the benefit of KVM PR - it abuses the SDR1
+ * slot in kvm_sregs to communicate the userspace address of the
+ * HPT
+ */
+ if (!kvm_enabled() || !spapr->htab) {
+ return;
+ }
+
+ CPU_FOREACH(cs) {
+ run_on_cpu(cs, do_push_sregs_to_kvm_pr, RUN_ON_CPU_NULL);
+ }
+}
+
+static target_ulong h_resize_hpt_commit(PowerPCCPU *cpu,
+ SpaprMachineState *spapr,
+ target_ulong opcode,
+ target_ulong *args)
+{
+ target_ulong flags = args[0];
+ target_ulong shift = args[1];
+ int rc;
+
+ if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DISABLED) {
+ return H_AUTHORITY;
+ }
+
+ if (!spapr->htab_shift) {
+ /* Radix guest, no HPT */
+ return H_NOT_AVAILABLE;
+ }
+
+ trace_spapr_h_resize_hpt_commit(flags, shift);
+
+ rc = kvmppc_resize_hpt_commit(cpu, flags, shift);
+ if (rc != -ENOSYS) {
+ rc = resize_hpt_convert_rc(rc);
+ if (rc == H_SUCCESS) {
+ /* Need to set the new htab_shift in the machine state */
+ spapr->htab_shift = shift;
+ }
+ return rc;
+ }
+
+ if (kvm_enabled()) {
+ return H_HARDWARE;
+ }
+
+ return softmmu_resize_hpt_commit(cpu, spapr, flags, shift);
+}
+
+
+
+static target_ulong h_set_sprg0(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ target_ulong opcode, target_ulong *args)
+{
+ cpu_synchronize_state(CPU(cpu));
+ cpu->env.spr[SPR_SPRG0] = args[0];
+
+ return H_SUCCESS;
+}
+
+static target_ulong h_set_dabr(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ target_ulong opcode, target_ulong *args)
+{
+ if (!ppc_has_spr(cpu, SPR_DABR)) {
+ return H_HARDWARE; /* DABR register not available */
+ }
+ cpu_synchronize_state(CPU(cpu));
+
+ if (ppc_has_spr(cpu, SPR_DABRX)) {
+ cpu->env.spr[SPR_DABRX] = 0x3; /* Use Problem and Privileged state */
+ } else if (!(args[0] & 0x4)) { /* Breakpoint Translation set? */
+ return H_RESERVED_DABR;
+ }
+
+ cpu->env.spr[SPR_DABR] = args[0];
+ return H_SUCCESS;
+}
+
+static target_ulong h_set_xdabr(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ target_ulong opcode, target_ulong *args)
+{
+ target_ulong dabrx = args[1];
+
+ if (!ppc_has_spr(cpu, SPR_DABR) || !ppc_has_spr(cpu, SPR_DABRX)) {
+ return H_HARDWARE;
+ }
+
+ if ((dabrx & ~0xfULL) != 0 || (dabrx & H_DABRX_HYPERVISOR) != 0
+ || (dabrx & (H_DABRX_KERNEL | H_DABRX_USER)) == 0) {
+ return H_PARAMETER;
+ }
+
+ cpu_synchronize_state(CPU(cpu));
+ cpu->env.spr[SPR_DABRX] = dabrx;
+ cpu->env.spr[SPR_DABR] = args[0];
+
+ return H_SUCCESS;
+}
+
+static target_ulong h_page_init(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ target_ulong opcode, target_ulong *args)
+{
+ target_ulong flags = args[0];
+ hwaddr dst = args[1];
+ hwaddr src = args[2];
+ hwaddr len = TARGET_PAGE_SIZE;
+ uint8_t *pdst, *psrc;
+ target_long ret = H_SUCCESS;
+
+ if (flags & ~(H_ICACHE_SYNCHRONIZE | H_ICACHE_INVALIDATE
+ | H_COPY_PAGE | H_ZERO_PAGE)) {
+ qemu_log_mask(LOG_UNIMP, "h_page_init: Bad flags (" TARGET_FMT_lx "\n",
+ flags);
+ return H_PARAMETER;
+ }
+
+ /* Map-in destination */
+ if (!is_ram_address(spapr, dst) || (dst & ~TARGET_PAGE_MASK) != 0) {
+ return H_PARAMETER;
+ }
+ pdst = cpu_physical_memory_map(dst, &len, true);
+ if (!pdst || len != TARGET_PAGE_SIZE) {
+ return H_PARAMETER;
+ }
+
+ if (flags & H_COPY_PAGE) {
+ /* Map-in source, copy to destination, and unmap source again */
+ if (!is_ram_address(spapr, src) || (src & ~TARGET_PAGE_MASK) != 0) {
+ ret = H_PARAMETER;
+ goto unmap_out;
+ }
+ psrc = cpu_physical_memory_map(src, &len, false);
+ if (!psrc || len != TARGET_PAGE_SIZE) {
+ ret = H_PARAMETER;
+ goto unmap_out;
+ }
+ memcpy(pdst, psrc, len);
+ cpu_physical_memory_unmap(psrc, len, 0, len);
+ } else if (flags & H_ZERO_PAGE) {
+ memset(pdst, 0, len); /* Just clear the destination page */
+ }
+
+ if (kvm_enabled() && (flags & H_ICACHE_SYNCHRONIZE) != 0) {
+ kvmppc_dcbst_range(cpu, pdst, len);
+ }
+ if (flags & (H_ICACHE_SYNCHRONIZE | H_ICACHE_INVALIDATE)) {
+ if (kvm_enabled()) {
+ kvmppc_icbi_range(cpu, pdst, len);
+ } else {
+ tb_flush(CPU(cpu));
+ }
+ }
+
+unmap_out:
+ cpu_physical_memory_unmap(pdst, TARGET_PAGE_SIZE, 1, len);
+ return ret;
+}
+
+#define FLAGS_REGISTER_VPA 0x0000200000000000ULL
+#define FLAGS_REGISTER_DTL 0x0000400000000000ULL
+#define FLAGS_REGISTER_SLBSHADOW 0x0000600000000000ULL
+#define FLAGS_DEREGISTER_VPA 0x0000a00000000000ULL
+#define FLAGS_DEREGISTER_DTL 0x0000c00000000000ULL
+#define FLAGS_DEREGISTER_SLBSHADOW 0x0000e00000000000ULL
+
+static target_ulong register_vpa(PowerPCCPU *cpu, target_ulong vpa)
+{
+ CPUState *cs = CPU(cpu);
+ CPUPPCState *env = &cpu->env;
+ SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
+ uint16_t size;
+ uint8_t tmp;
+
+ if (vpa == 0) {
+ hcall_dprintf("Can't cope with registering a VPA at logical 0\n");
+ return H_HARDWARE;
+ }
+
+ if (vpa % env->dcache_line_size) {
+ return H_PARAMETER;
+ }
+ /* FIXME: bounds check the address */
+
+ size = lduw_be_phys(cs->as, vpa + 0x4);
+
+ if (size < VPA_MIN_SIZE) {
+ return H_PARAMETER;
+ }
+
+ /* VPA is not allowed to cross a page boundary */
+ if ((vpa / 4096) != ((vpa + size - 1) / 4096)) {
+ return H_PARAMETER;
+ }
+
+ spapr_cpu->vpa_addr = vpa;
+
+ tmp = ldub_phys(cs->as, spapr_cpu->vpa_addr + VPA_SHARED_PROC_OFFSET);
+ tmp |= VPA_SHARED_PROC_VAL;
+ stb_phys(cs->as, spapr_cpu->vpa_addr + VPA_SHARED_PROC_OFFSET, tmp);
+
+ return H_SUCCESS;
+}
+
+static target_ulong deregister_vpa(PowerPCCPU *cpu, target_ulong vpa)
+{
+ SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
+
+ if (spapr_cpu->slb_shadow_addr) {
+ return H_RESOURCE;
+ }
+
+ if (spapr_cpu->dtl_addr) {
+ return H_RESOURCE;
+ }
+
+ spapr_cpu->vpa_addr = 0;
+ return H_SUCCESS;
+}
+
+static target_ulong register_slb_shadow(PowerPCCPU *cpu, target_ulong addr)
+{
+ SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
+ uint32_t size;
+
+ if (addr == 0) {
+ hcall_dprintf("Can't cope with SLB shadow at logical 0\n");
+ return H_HARDWARE;
+ }
+
+ size = ldl_be_phys(CPU(cpu)->as, addr + 0x4);
+ if (size < 0x8) {
+ return H_PARAMETER;
+ }
+
+ if ((addr / 4096) != ((addr + size - 1) / 4096)) {
+ return H_PARAMETER;
+ }
+
+ if (!spapr_cpu->vpa_addr) {
+ return H_RESOURCE;
+ }
+
+ spapr_cpu->slb_shadow_addr = addr;
+ spapr_cpu->slb_shadow_size = size;
+
+ return H_SUCCESS;
+}
+
+static target_ulong deregister_slb_shadow(PowerPCCPU *cpu, target_ulong addr)
+{
+ SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
+
+ spapr_cpu->slb_shadow_addr = 0;
+ spapr_cpu->slb_shadow_size = 0;
+ return H_SUCCESS;
+}
+
+static target_ulong register_dtl(PowerPCCPU *cpu, target_ulong addr)
+{
+ SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
+ uint32_t size;
+
+ if (addr == 0) {
+ hcall_dprintf("Can't cope with DTL at logical 0\n");
+ return H_HARDWARE;
+ }
+
+ size = ldl_be_phys(CPU(cpu)->as, addr + 0x4);
+
+ if (size < 48) {
+ return H_PARAMETER;
+ }
+
+ if (!spapr_cpu->vpa_addr) {
+ return H_RESOURCE;
+ }
+
+ spapr_cpu->dtl_addr = addr;
+ spapr_cpu->dtl_size = size;
+
+ return H_SUCCESS;
+}
+
+static target_ulong deregister_dtl(PowerPCCPU *cpu, target_ulong addr)
+{
+ SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
+
+ spapr_cpu->dtl_addr = 0;
+ spapr_cpu->dtl_size = 0;
+
+ return H_SUCCESS;
+}
+
+static target_ulong h_register_vpa(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ target_ulong opcode, target_ulong *args)
+{
+ target_ulong flags = args[0];
+ target_ulong procno = args[1];
+ target_ulong vpa = args[2];
+ target_ulong ret = H_PARAMETER;
+ PowerPCCPU *tcpu;
+
+ tcpu = spapr_find_cpu(procno);
+ if (!tcpu) {
+ return H_PARAMETER;
+ }
+
+ switch (flags) {
+ case FLAGS_REGISTER_VPA:
+ ret = register_vpa(tcpu, vpa);
+ break;
+
+ case FLAGS_DEREGISTER_VPA:
+ ret = deregister_vpa(tcpu, vpa);
+ break;
+
+ case FLAGS_REGISTER_SLBSHADOW:
+ ret = register_slb_shadow(tcpu, vpa);
+ break;
+
+ case FLAGS_DEREGISTER_SLBSHADOW:
+ ret = deregister_slb_shadow(tcpu, vpa);
+ break;
+
+ case FLAGS_REGISTER_DTL:
+ ret = register_dtl(tcpu, vpa);
+ break;
+
+ case FLAGS_DEREGISTER_DTL:
+ ret = deregister_dtl(tcpu, vpa);
+ break;
+ }
+
+ return ret;
+}
+
+static target_ulong h_cede(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ target_ulong opcode, target_ulong *args)
+{
+ CPUPPCState *env = &cpu->env;
+ CPUState *cs = CPU(cpu);
+ SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
+
+ env->msr |= (1ULL << MSR_EE);
+ hreg_compute_hflags(env);
+
+ if (spapr_cpu->prod) {
+ spapr_cpu->prod = false;
+ return H_SUCCESS;
+ }
+
+ if (!cpu_has_work(cs)) {
+ cs->halted = 1;
+ cs->exception_index = EXCP_HLT;
+ cs->exit_request = 1;
+ }
+
+ return H_SUCCESS;
+}
+
+/*
+ * Confer to self, aka join. Cede could use the same pattern as well, if
+ * EXCP_HLT can be changed to ECXP_HALTED.
+ */
+static target_ulong h_confer_self(PowerPCCPU *cpu)
+{
+ CPUState *cs = CPU(cpu);
+ SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
+
+ if (spapr_cpu->prod) {
+ spapr_cpu->prod = false;
+ return H_SUCCESS;
+ }
+ cs->halted = 1;
+ cs->exception_index = EXCP_HALTED;
+ cs->exit_request = 1;
+
+ return H_SUCCESS;
+}
+
+static target_ulong h_join(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ target_ulong opcode, target_ulong *args)
+{
+ CPUPPCState *env = &cpu->env;
+ CPUState *cs;
+ bool last_unjoined = true;
+
+ if (env->msr & (1ULL << MSR_EE)) {
+ return H_BAD_MODE;
+ }
+
+ /*
+ * Must not join the last CPU running. Interestingly, no such restriction
+ * for H_CONFER-to-self, but that is probably not intended to be used
+ * when H_JOIN is available.
+ */
+ CPU_FOREACH(cs) {
+ PowerPCCPU *c = POWERPC_CPU(cs);
+ CPUPPCState *e = &c->env;
+ if (c == cpu) {
+ continue;
+ }
+
+ /* Don't have a way to indicate joined, so use halted && MSR[EE]=0 */
+ if (!cs->halted || (e->msr & (1ULL << MSR_EE))) {
+ last_unjoined = false;
+ break;
+ }
+ }
+ if (last_unjoined) {
+ return H_CONTINUE;
+ }
+
+ return h_confer_self(cpu);
+}
+
+static target_ulong h_confer(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ target_ulong opcode, target_ulong *args)
+{
+ target_long target = args[0];
+ uint32_t dispatch = args[1];
+ CPUState *cs = CPU(cpu);
+ SpaprCpuState *spapr_cpu;
+
+ /*
+ * -1 means confer to all other CPUs without dispatch counter check,
+ * otherwise it's a targeted confer.
+ */
+ if (target != -1) {
+ PowerPCCPU *target_cpu = spapr_find_cpu(target);
+ uint32_t target_dispatch;
+
+ if (!target_cpu) {
+ return H_PARAMETER;
+ }
+
+ /*
+ * target == self is a special case, we wait until prodded, without
+ * dispatch counter check.
+ */
+ if (cpu == target_cpu) {
+ return h_confer_self(cpu);
+ }
+
+ spapr_cpu = spapr_cpu_state(target_cpu);
+ if (!spapr_cpu->vpa_addr || ((dispatch & 1) == 0)) {
+ return H_SUCCESS;
+ }
+
+ target_dispatch = ldl_be_phys(cs->as,
+ spapr_cpu->vpa_addr + VPA_DISPATCH_COUNTER);
+ if (target_dispatch != dispatch) {
+ return H_SUCCESS;
+ }
+
+ /*
+ * The targeted confer does not do anything special beyond yielding
+ * the current vCPU, but even this should be better than nothing.
+ * At least for single-threaded tcg, it gives the target a chance to
+ * run before we run again. Multi-threaded tcg does not really do
+ * anything with EXCP_YIELD yet.
+ */
+ }
+
+ cs->exception_index = EXCP_YIELD;
+ cs->exit_request = 1;
+ cpu_loop_exit(cs);
+
+ return H_SUCCESS;
+}
+
+static target_ulong h_prod(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ target_ulong opcode, target_ulong *args)
+{
+ target_long target = args[0];
+ PowerPCCPU *tcpu;
+ CPUState *cs;
+ SpaprCpuState *spapr_cpu;
+
+ tcpu = spapr_find_cpu(target);
+ cs = CPU(tcpu);
+ if (!cs) {
+ return H_PARAMETER;
+ }
+
+ spapr_cpu = spapr_cpu_state(tcpu);
+ spapr_cpu->prod = true;
+ cs->halted = 0;
+ qemu_cpu_kick(cs);
+
+ return H_SUCCESS;
+}
+
+static target_ulong h_rtas(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ target_ulong opcode, target_ulong *args)
+{
+ target_ulong rtas_r3 = args[0];
+ uint32_t token = rtas_ld(rtas_r3, 0);
+ uint32_t nargs = rtas_ld(rtas_r3, 1);
+ uint32_t nret = rtas_ld(rtas_r3, 2);
+
+ return spapr_rtas_call(cpu, spapr, token, nargs, rtas_r3 + 12,
+ nret, rtas_r3 + 12 + 4*nargs);
+}
+
+static target_ulong h_logical_load(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ target_ulong opcode, target_ulong *args)
+{
+ CPUState *cs = CPU(cpu);
+ target_ulong size = args[0];
+ target_ulong addr = args[1];
+
+ switch (size) {
+ case 1:
+ args[0] = ldub_phys(cs->as, addr);
+ return H_SUCCESS;
+ case 2:
+ args[0] = lduw_phys(cs->as, addr);
+ return H_SUCCESS;
+ case 4:
+ args[0] = ldl_phys(cs->as, addr);
+ return H_SUCCESS;
+ case 8:
+ args[0] = ldq_phys(cs->as, addr);
+ return H_SUCCESS;
+ }
+ return H_PARAMETER;
+}
+
+static target_ulong h_logical_store(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ target_ulong opcode, target_ulong *args)
+{
+ CPUState *cs = CPU(cpu);
+
+ target_ulong size = args[0];
+ target_ulong addr = args[1];
+ target_ulong val = args[2];
+
+ switch (size) {
+ case 1:
+ stb_phys(cs->as, addr, val);
+ return H_SUCCESS;
+ case 2:
+ stw_phys(cs->as, addr, val);
+ return H_SUCCESS;
+ case 4:
+ stl_phys(cs->as, addr, val);
+ return H_SUCCESS;
+ case 8:
+ stq_phys(cs->as, addr, val);
+ return H_SUCCESS;
+ }
+ return H_PARAMETER;
+}
+
+static target_ulong h_logical_memop(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ target_ulong opcode, target_ulong *args)
+{
+ CPUState *cs = CPU(cpu);
+
+ target_ulong dst = args[0]; /* Destination address */
+ target_ulong src = args[1]; /* Source address */
+ target_ulong esize = args[2]; /* Element size (0=1,1=2,2=4,3=8) */
+ target_ulong count = args[3]; /* Element count */
+ target_ulong op = args[4]; /* 0 = copy, 1 = invert */
+ uint64_t tmp;
+ unsigned int mask = (1 << esize) - 1;
+ int step = 1 << esize;
+
+ if (count > 0x80000000) {
+ return H_PARAMETER;
+ }
+
+ if ((dst & mask) || (src & mask) || (op > 1)) {
+ return H_PARAMETER;
+ }
+
+ if (dst >= src && dst < (src + (count << esize))) {
+ dst = dst + ((count - 1) << esize);
+ src = src + ((count - 1) << esize);
+ step = -step;
+ }
+
+ while (count--) {
+ switch (esize) {
+ case 0:
+ tmp = ldub_phys(cs->as, src);
+ break;
+ case 1:
+ tmp = lduw_phys(cs->as, src);
+ break;
+ case 2:
+ tmp = ldl_phys(cs->as, src);
+ break;
+ case 3:
+ tmp = ldq_phys(cs->as, src);
+ break;
+ default:
+ return H_PARAMETER;
+ }
+ if (op == 1) {
+ tmp = ~tmp;
+ }
+ switch (esize) {
+ case 0:
+ stb_phys(cs->as, dst, tmp);
+ break;
+ case 1:
+ stw_phys(cs->as, dst, tmp);
+ break;
+ case 2:
+ stl_phys(cs->as, dst, tmp);
+ break;
+ case 3:
+ stq_phys(cs->as, dst, tmp);
+ break;
+ }
+ dst = dst + step;
+ src = src + step;
+ }
+
+ return H_SUCCESS;
+}
+
+static target_ulong h_logical_icbi(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ target_ulong opcode, target_ulong *args)
+{
+ /* Nothing to do on emulation, KVM will trap this in the kernel */
+ return H_SUCCESS;
+}
+
+static target_ulong h_logical_dcbf(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ target_ulong opcode, target_ulong *args)
+{
+ /* Nothing to do on emulation, KVM will trap this in the kernel */
+ return H_SUCCESS;
+}
+
+static target_ulong h_set_mode_resource_le(PowerPCCPU *cpu,
+ SpaprMachineState *spapr,
+ target_ulong mflags,
+ target_ulong value1,
+ target_ulong value2)
+{
+ if (value1) {
+ return H_P3;
+ }
+ if (value2) {
+ return H_P4;
+ }
+
+ switch (mflags) {
+ case H_SET_MODE_ENDIAN_BIG:
+ spapr_set_all_lpcrs(0, LPCR_ILE);
+ spapr_pci_switch_vga(spapr, true);
+ return H_SUCCESS;
+
+ case H_SET_MODE_ENDIAN_LITTLE:
+ spapr_set_all_lpcrs(LPCR_ILE, LPCR_ILE);
+ spapr_pci_switch_vga(spapr, false);
+ return H_SUCCESS;
+ }
+
+ return H_UNSUPPORTED_FLAG;
+}
+
+static target_ulong h_set_mode_resource_addr_trans_mode(PowerPCCPU *cpu,
+ target_ulong mflags,
+ target_ulong value1,
+ target_ulong value2)
+{
+ PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
+
+ if (!(pcc->insns_flags2 & PPC2_ISA207S)) {
+ return H_P2;
+ }
+ if (value1) {
+ return H_P3;
+ }
+ if (value2) {
+ return H_P4;
+ }
+
+ if (mflags == 1) {
+ /* AIL=1 is reserved in POWER8/POWER9/POWER10 */
+ return H_UNSUPPORTED_FLAG;
+ }
+
+ if (mflags == 2 && (pcc->insns_flags2 & PPC2_ISA310)) {
+ /* AIL=2 is reserved in POWER10 (ISA v3.1) */
+ return H_UNSUPPORTED_FLAG;
+ }
+
+ spapr_set_all_lpcrs(mflags << LPCR_AIL_SHIFT, LPCR_AIL);
+
+ return H_SUCCESS;
+}
+
+static target_ulong h_set_mode(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ target_ulong opcode, target_ulong *args)
+{
+ target_ulong resource = args[1];
+ target_ulong ret = H_P2;
+
+ switch (resource) {
+ case H_SET_MODE_RESOURCE_LE:
+ ret = h_set_mode_resource_le(cpu, spapr, args[0], args[2], args[3]);
+ break;
+ case H_SET_MODE_RESOURCE_ADDR_TRANS_MODE:
+ ret = h_set_mode_resource_addr_trans_mode(cpu, args[0],
+ args[2], args[3]);
+ break;
+ }
+
+ return ret;
+}
+
+static target_ulong h_clean_slb(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ target_ulong opcode, target_ulong *args)
+{
+ qemu_log_mask(LOG_UNIMP, "Unimplemented SPAPR hcall 0x"TARGET_FMT_lx"%s\n",
+ opcode, " (H_CLEAN_SLB)");
+ return H_FUNCTION;
+}
+
+static target_ulong h_invalidate_pid(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ target_ulong opcode, target_ulong *args)
+{
+ qemu_log_mask(LOG_UNIMP, "Unimplemented SPAPR hcall 0x"TARGET_FMT_lx"%s\n",
+ opcode, " (H_INVALIDATE_PID)");
+ return H_FUNCTION;
+}
+
+static void spapr_check_setup_free_hpt(SpaprMachineState *spapr,
+ uint64_t patbe_old, uint64_t patbe_new)
+{
+ /*
+ * We have 4 Options:
+ * HASH->HASH || RADIX->RADIX || NOTHING->RADIX : Do Nothing
+ * HASH->RADIX : Free HPT
+ * RADIX->HASH : Allocate HPT
+ * NOTHING->HASH : Allocate HPT
+ * Note: NOTHING implies the case where we said the guest could choose
+ * later and so assumed radix and now it's called H_REG_PROC_TBL
+ */
+
+ if ((patbe_old & PATE1_GR) == (patbe_new & PATE1_GR)) {
+ /* We assume RADIX, so this catches all the "Do Nothing" cases */
+ } else if (!(patbe_old & PATE1_GR)) {
+ /* HASH->RADIX : Free HPT */
+ spapr_free_hpt(spapr);
+ } else if (!(patbe_new & PATE1_GR)) {
+ /* RADIX->HASH || NOTHING->HASH : Allocate HPT */
+ spapr_setup_hpt(spapr);
+ }
+ return;
+}
+
+#define FLAGS_MASK 0x01FULL
+#define FLAG_MODIFY 0x10
+#define FLAG_REGISTER 0x08
+#define FLAG_RADIX 0x04
+#define FLAG_HASH_PROC_TBL 0x02
+#define FLAG_GTSE 0x01
+
+static target_ulong h_register_process_table(PowerPCCPU *cpu,
+ SpaprMachineState *spapr,
+ target_ulong opcode,
+ target_ulong *args)
+{
+ target_ulong flags = args[0];
+ target_ulong proc_tbl = args[1];
+ target_ulong page_size = args[2];
+ target_ulong table_size = args[3];
+ target_ulong update_lpcr = 0;
+ uint64_t cproc;
+
+ if (flags & ~FLAGS_MASK) { /* Check no reserved bits are set */
+ return H_PARAMETER;
+ }
+ if (flags & FLAG_MODIFY) {
+ if (flags & FLAG_REGISTER) {
+ if (flags & FLAG_RADIX) { /* Register new RADIX process table */
+ if (proc_tbl & 0xfff || proc_tbl >> 60) {
+ return H_P2;
+ } else if (page_size) {
+ return H_P3;
+ } else if (table_size > 24) {
+ return H_P4;
+ }
+ cproc = PATE1_GR | proc_tbl | table_size;
+ } else { /* Register new HPT process table */
+ if (flags & FLAG_HASH_PROC_TBL) { /* Hash with Segment Tables */
+ /* TODO - Not Supported */
+ /* Technically caused by flag bits => H_PARAMETER */
+ return H_PARAMETER;
+ } else { /* Hash with SLB */
+ if (proc_tbl >> 38) {
+ return H_P2;
+ } else if (page_size & ~0x7) {
+ return H_P3;
+ } else if (table_size > 24) {
+ return H_P4;
+ }
+ }
+ cproc = (proc_tbl << 25) | page_size << 5 | table_size;
+ }
+
+ } else { /* Deregister current process table */
+ /*
+ * Set to benign value: (current GR) | 0. This allows
+ * deregistration in KVM to succeed even if the radix bit
+ * in flags doesn't match the radix bit in the old PATE.
+ */
+ cproc = spapr->patb_entry & PATE1_GR;
+ }
+ } else { /* Maintain current registration */
+ if (!(flags & FLAG_RADIX) != !(spapr->patb_entry & PATE1_GR)) {
+ /* Technically caused by flag bits => H_PARAMETER */
+ return H_PARAMETER; /* Existing Process Table Mismatch */
+ }
+ cproc = spapr->patb_entry;
+ }
+
+ /* Check if we need to setup OR free the hpt */
+ spapr_check_setup_free_hpt(spapr, spapr->patb_entry, cproc);
+
+ spapr->patb_entry = cproc; /* Save new process table */
+
+ /* Update the UPRT, HR and GTSE bits in the LPCR for all cpus */
+ if (flags & FLAG_RADIX) /* Radix must use process tables, also set HR */
+ update_lpcr |= (LPCR_UPRT | LPCR_HR);
+ else if (flags & FLAG_HASH_PROC_TBL) /* Hash with process tables */
+ update_lpcr |= LPCR_UPRT;
+ if (flags & FLAG_GTSE) /* Guest translation shootdown enable */
+ update_lpcr |= LPCR_GTSE;
+
+ spapr_set_all_lpcrs(update_lpcr, LPCR_UPRT | LPCR_HR | LPCR_GTSE);
+
+ if (kvm_enabled()) {
+ return kvmppc_configure_v3_mmu(cpu, flags & FLAG_RADIX,
+ flags & FLAG_GTSE, cproc);
+ }
+ return H_SUCCESS;
+}
+
+#define H_SIGNAL_SYS_RESET_ALL -1
+#define H_SIGNAL_SYS_RESET_ALLBUTSELF -2
+
+static target_ulong h_signal_sys_reset(PowerPCCPU *cpu,
+ SpaprMachineState *spapr,
+ target_ulong opcode, target_ulong *args)
+{
+ target_long target = args[0];
+ CPUState *cs;
+
+ if (target < 0) {
+ /* Broadcast */
+ if (target < H_SIGNAL_SYS_RESET_ALLBUTSELF) {
+ return H_PARAMETER;
+ }
+
+ CPU_FOREACH(cs) {
+ PowerPCCPU *c = POWERPC_CPU(cs);
+
+ if (target == H_SIGNAL_SYS_RESET_ALLBUTSELF) {
+ if (c == cpu) {
+ continue;
+ }
+ }
+ run_on_cpu(cs, spapr_do_system_reset_on_cpu, RUN_ON_CPU_NULL);
+ }
+ return H_SUCCESS;
+
+ } else {
+ /* Unicast */
+ cs = CPU(spapr_find_cpu(target));
+ if (cs) {
+ run_on_cpu(cs, spapr_do_system_reset_on_cpu, RUN_ON_CPU_NULL);
+ return H_SUCCESS;
+ }
+ return H_PARAMETER;
+ }
+}
+
+/* Returns either a logical PVR or zero if none was found */
+static uint32_t cas_check_pvr(PowerPCCPU *cpu, uint32_t max_compat,
+ target_ulong *addr, bool *raw_mode_supported)
+{
+ bool explicit_match = false; /* Matched the CPU's real PVR */
+ uint32_t best_compat = 0;
+ int i;
+
+ /*
+ * We scan the supplied table of PVRs looking for two things
+ * 1. Is our real CPU PVR in the list?
+ * 2. What's the "best" listed logical PVR
+ */
+ for (i = 0; i < 512; ++i) {
+ uint32_t pvr, pvr_mask;
+
+ pvr_mask = ldl_be_phys(&address_space_memory, *addr);
+ pvr = ldl_be_phys(&address_space_memory, *addr + 4);
+ *addr += 8;
+
+ if (~pvr_mask & pvr) {
+ break; /* Terminator record */
+ }
+
+ if ((cpu->env.spr[SPR_PVR] & pvr_mask) == (pvr & pvr_mask)) {
+ explicit_match = true;
+ } else {
+ if (ppc_check_compat(cpu, pvr, best_compat, max_compat)) {
+ best_compat = pvr;
+ }
+ }
+ }
+
+ *raw_mode_supported = explicit_match;
+
+ /* Parsing finished */
+ trace_spapr_cas_pvr(cpu->compat_pvr, explicit_match, best_compat);
+
+ return best_compat;
+}
+
+static
+target_ulong do_client_architecture_support(PowerPCCPU *cpu,
+ SpaprMachineState *spapr,
+ target_ulong vec,
+ target_ulong fdt_bufsize)
+{
+ target_ulong ov_table; /* Working address in data buffer */
+ uint32_t cas_pvr;
+ SpaprOptionVector *ov1_guest, *ov5_guest;
+ bool guest_radix;
+ bool raw_mode_supported = false;
+ bool guest_xive;
+ CPUState *cs;
+ void *fdt;
+ uint32_t max_compat = spapr->max_compat_pvr;
+
+ /* CAS is supposed to be called early when only the boot vCPU is active. */
+ CPU_FOREACH(cs) {
+ if (cs == CPU(cpu)) {
+ continue;
+ }
+ if (!cs->halted) {
+ warn_report("guest has multiple active vCPUs at CAS, which is not allowed");
+ return H_MULTI_THREADS_ACTIVE;
+ }
+ }
+
+ cas_pvr = cas_check_pvr(cpu, max_compat, &vec, &raw_mode_supported);
+ if (!cas_pvr && (!raw_mode_supported || max_compat)) {
+ /*
+ * We couldn't find a suitable compatibility mode, and either
+ * the guest doesn't support "raw" mode for this CPU, or "raw"
+ * mode is disabled because a maximum compat mode is set.
+ */
+ error_report("Couldn't negotiate a suitable PVR during CAS");
+ return H_HARDWARE;
+ }
+
+ /* Update CPUs */
+ if (cpu->compat_pvr != cas_pvr) {
+ Error *local_err = NULL;
+
+ if (ppc_set_compat_all(cas_pvr, &local_err) < 0) {
+ /* We fail to set compat mode (likely because running with KVM PR),
+ * but maybe we can fallback to raw mode if the guest supports it.
+ */
+ if (!raw_mode_supported) {
+ error_report_err(local_err);
+ return H_HARDWARE;
+ }
+ error_free(local_err);
+ }
+ }
+
+ /* For the future use: here @ov_table points to the first option vector */
+ ov_table = vec;
+
+ ov1_guest = spapr_ovec_parse_vector(ov_table, 1);
+ if (!ov1_guest) {
+ warn_report("guest didn't provide option vector 1");
+ return H_PARAMETER;
+ }
+ ov5_guest = spapr_ovec_parse_vector(ov_table, 5);
+ if (!ov5_guest) {
+ spapr_ovec_cleanup(ov1_guest);
+ warn_report("guest didn't provide option vector 5");
+ return H_PARAMETER;
+ }
+ if (spapr_ovec_test(ov5_guest, OV5_MMU_BOTH)) {
+ error_report("guest requested hash and radix MMU, which is invalid.");
+ exit(EXIT_FAILURE);
+ }
+ if (spapr_ovec_test(ov5_guest, OV5_XIVE_BOTH)) {
+ error_report("guest requested an invalid interrupt mode");
+ exit(EXIT_FAILURE);
+ }
+
+ guest_radix = spapr_ovec_test(ov5_guest, OV5_MMU_RADIX_300);
+
+ guest_xive = spapr_ovec_test(ov5_guest, OV5_XIVE_EXPLOIT);
+
+ /*
+ * HPT resizing is a bit of a special case, because when enabled
+ * we assume an HPT guest will support it until it says it
+ * doesn't, instead of assuming it won't support it until it says
+ * it does. Strictly speaking that approach could break for
+ * guests which don't make a CAS call, but those are so old we
+ * don't care about them. Without that assumption we'd have to
+ * make at least a temporary allocation of an HPT sized for max
+ * memory, which could be impossibly difficult under KVM HV if
+ * maxram is large.
+ */
+ if (!guest_radix && !spapr_ovec_test(ov5_guest, OV5_HPT_RESIZE)) {
+ int maxshift = spapr_hpt_shift_for_ramsize(MACHINE(spapr)->maxram_size);
+
+ if (spapr->resize_hpt == SPAPR_RESIZE_HPT_REQUIRED) {
+ error_report(
+ "h_client_architecture_support: Guest doesn't support HPT resizing, but resize-hpt=required");
+ exit(1);
+ }
+
+ if (spapr->htab_shift < maxshift) {
+ /* Guest doesn't know about HPT resizing, so we
+ * pre-emptively resize for the maximum permitted RAM. At
+ * the point this is called, nothing should have been
+ * entered into the existing HPT */
+ spapr_reallocate_hpt(spapr, maxshift, &error_fatal);
+ push_sregs_to_kvm_pr(spapr);
+ }
+ }
+
+ /* NOTE: there are actually a number of ov5 bits where input from the
+ * guest is always zero, and the platform/QEMU enables them independently
+ * of guest input. To model these properly we'd want some sort of mask,
+ * but since they only currently apply to memory migration as defined
+ * by LoPAPR 1.1, 14.5.4.8, which QEMU doesn't implement, we don't need
+ * to worry about this for now.
+ */
+
+ /* full range of negotiated ov5 capabilities */
+ spapr_ovec_intersect(spapr->ov5_cas, spapr->ov5, ov5_guest);
+ spapr_ovec_cleanup(ov5_guest);
+
+ spapr_check_mmu_mode(guest_radix);
+
+ spapr->cas_pre_isa3_guest = !spapr_ovec_test(ov1_guest, OV1_PPC_3_00);
+ spapr_ovec_cleanup(ov1_guest);
+
+ /*
+ * Check for NUMA affinity conditions now that we know which NUMA
+ * affinity the guest will use.
+ */
+ spapr_numa_associativity_check(spapr);
+
+ /*
+ * Ensure the guest asks for an interrupt mode we support;
+ * otherwise terminate the boot.
+ */
+ if (guest_xive) {
+ if (!spapr->irq->xive) {
+ error_report(
+"Guest requested unavailable interrupt mode (XIVE), try the ic-mode=xive or ic-mode=dual machine property");
+ exit(EXIT_FAILURE);
+ }
+ } else {
+ if (!spapr->irq->xics) {
+ error_report(
+"Guest requested unavailable interrupt mode (XICS), either don't set the ic-mode machine property or try ic-mode=xics or ic-mode=dual");
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ spapr_irq_update_active_intc(spapr);
+
+ /*
+ * Process all pending hot-plug/unplug requests now. An updated full
+ * rendered FDT will be returned to the guest.
+ */
+ spapr_drc_reset_all(spapr);
+ spapr_clear_pending_hotplug_events(spapr);
+
+ /*
+ * If spapr_machine_reset() did not set up a HPT but one is necessary
+ * (because the guest isn't going to use radix) then set it up here.
+ */
+ if ((spapr->patb_entry & PATE1_GR) && !guest_radix) {
+ /* legacy hash or new hash: */
+ spapr_setup_hpt(spapr);
+ }
+
+ fdt = spapr_build_fdt(spapr, spapr->vof != NULL, fdt_bufsize);
+ g_free(spapr->fdt_blob);
+ spapr->fdt_size = fdt_totalsize(fdt);
+ spapr->fdt_initial_size = spapr->fdt_size;
+ spapr->fdt_blob = fdt;
+
+ return H_SUCCESS;
+}
+
+static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
+ SpaprMachineState *spapr,
+ target_ulong opcode,
+ target_ulong *args)
+{
+ target_ulong vec = ppc64_phys_to_real(args[0]);
+ target_ulong fdt_buf = args[1];
+ target_ulong fdt_bufsize = args[2];
+ target_ulong ret;
+ SpaprDeviceTreeUpdateHeader hdr = { .version_id = 1 };
+
+ if (fdt_bufsize < sizeof(hdr)) {
+ error_report("SLOF provided insufficient CAS buffer "
+ TARGET_FMT_lu " (min: %zu)", fdt_bufsize, sizeof(hdr));
+ exit(EXIT_FAILURE);
+ }
+
+ fdt_bufsize -= sizeof(hdr);
+
+ ret = do_client_architecture_support(cpu, spapr, vec, fdt_bufsize);
+ if (ret == H_SUCCESS) {
+ _FDT((fdt_pack(spapr->fdt_blob)));
+ spapr->fdt_size = fdt_totalsize(spapr->fdt_blob);
+ spapr->fdt_initial_size = spapr->fdt_size;
+
+ cpu_physical_memory_write(fdt_buf, &hdr, sizeof(hdr));
+ cpu_physical_memory_write(fdt_buf + sizeof(hdr), spapr->fdt_blob,
+ spapr->fdt_size);
+ trace_spapr_cas_continue(spapr->fdt_size + sizeof(hdr));
+ }
+
+ return ret;
+}
+
+target_ulong spapr_vof_client_architecture_support(MachineState *ms,
+ CPUState *cs,
+ target_ulong ovec_addr)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(ms);
+
+ target_ulong ret = do_client_architecture_support(POWERPC_CPU(cs), spapr,
+ ovec_addr, FDT_MAX_SIZE);
+
+ /*
+ * This adds stdout and generates phandles for boottime and CAS FDTs.
+ * It is alright to update the FDT here as do_client_architecture_support()
+ * does not pack it.
+ */
+ spapr_vof_client_dt_finalize(spapr, spapr->fdt_blob);
+
+ return ret;
+}
+
+static target_ulong h_get_cpu_characteristics(PowerPCCPU *cpu,
+ SpaprMachineState *spapr,
+ target_ulong opcode,
+ target_ulong *args)
+{
+ uint64_t characteristics = H_CPU_CHAR_HON_BRANCH_HINTS &
+ ~H_CPU_CHAR_THR_RECONF_TRIG;
+ uint64_t behaviour = H_CPU_BEHAV_FAVOUR_SECURITY;
+ uint8_t safe_cache = spapr_get_cap(spapr, SPAPR_CAP_CFPC);
+ uint8_t safe_bounds_check = spapr_get_cap(spapr, SPAPR_CAP_SBBC);
+ uint8_t safe_indirect_branch = spapr_get_cap(spapr, SPAPR_CAP_IBS);
+ uint8_t count_cache_flush_assist = spapr_get_cap(spapr,
+ SPAPR_CAP_CCF_ASSIST);
+
+ switch (safe_cache) {
+ case SPAPR_CAP_WORKAROUND:
+ characteristics |= H_CPU_CHAR_L1D_FLUSH_ORI30;
+ characteristics |= H_CPU_CHAR_L1D_FLUSH_TRIG2;
+ characteristics |= H_CPU_CHAR_L1D_THREAD_PRIV;
+ behaviour |= H_CPU_BEHAV_L1D_FLUSH_PR;
+ break;
+ case SPAPR_CAP_FIXED:
+ behaviour |= H_CPU_BEHAV_NO_L1D_FLUSH_ENTRY;
+ behaviour |= H_CPU_BEHAV_NO_L1D_FLUSH_UACCESS;
+ break;
+ default: /* broken */
+ assert(safe_cache == SPAPR_CAP_BROKEN);
+ behaviour |= H_CPU_BEHAV_L1D_FLUSH_PR;
+ break;
+ }
+
+ switch (safe_bounds_check) {
+ case SPAPR_CAP_WORKAROUND:
+ characteristics |= H_CPU_CHAR_SPEC_BAR_ORI31;
+ behaviour |= H_CPU_BEHAV_BNDS_CHK_SPEC_BAR;
+ break;
+ case SPAPR_CAP_FIXED:
+ break;
+ default: /* broken */
+ assert(safe_bounds_check == SPAPR_CAP_BROKEN);
+ behaviour |= H_CPU_BEHAV_BNDS_CHK_SPEC_BAR;
+ break;
+ }
+
+ switch (safe_indirect_branch) {
+ case SPAPR_CAP_FIXED_NA:
+ break;
+ case SPAPR_CAP_FIXED_CCD:
+ characteristics |= H_CPU_CHAR_CACHE_COUNT_DIS;
+ break;
+ case SPAPR_CAP_FIXED_IBS:
+ characteristics |= H_CPU_CHAR_BCCTRL_SERIALISED;
+ break;
+ case SPAPR_CAP_WORKAROUND:
+ behaviour |= H_CPU_BEHAV_FLUSH_COUNT_CACHE;
+ if (count_cache_flush_assist) {
+ characteristics |= H_CPU_CHAR_BCCTR_FLUSH_ASSIST;
+ }
+ break;
+ default: /* broken */
+ assert(safe_indirect_branch == SPAPR_CAP_BROKEN);
+ break;
+ }
+
+ args[0] = characteristics;
+ args[1] = behaviour;
+ return H_SUCCESS;
+}
+
+static target_ulong h_update_dt(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ target_ulong opcode, target_ulong *args)
+{
+ target_ulong dt = ppc64_phys_to_real(args[0]);
+ struct fdt_header hdr = { 0 };
+ unsigned cb;
+ SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+ void *fdt;
+
+ cpu_physical_memory_read(dt, &hdr, sizeof(hdr));
+ cb = fdt32_to_cpu(hdr.totalsize);
+
+ if (!smc->update_dt_enabled) {
+ return H_SUCCESS;
+ }
+
+ /* Check that the fdt did not grow out of proportion */
+ if (cb > spapr->fdt_initial_size * 2) {
+ trace_spapr_update_dt_failed_size(spapr->fdt_initial_size, cb,
+ fdt32_to_cpu(hdr.magic));
+ return H_PARAMETER;
+ }
+
+ fdt = g_malloc0(cb);
+ cpu_physical_memory_read(dt, fdt, cb);
+
+ /* Check the fdt consistency */
+ if (fdt_check_full(fdt, cb)) {
+ trace_spapr_update_dt_failed_check(spapr->fdt_initial_size, cb,
+ fdt32_to_cpu(hdr.magic));
+ return H_PARAMETER;
+ }
+
+ g_free(spapr->fdt_blob);
+ spapr->fdt_size = cb;
+ spapr->fdt_blob = fdt;
+ trace_spapr_update_dt(cb);
+
+ return H_SUCCESS;
+}
+
+static spapr_hcall_fn papr_hypercall_table[(MAX_HCALL_OPCODE / 4) + 1];
+static spapr_hcall_fn kvmppc_hypercall_table[KVMPPC_HCALL_MAX - KVMPPC_HCALL_BASE + 1];
+static spapr_hcall_fn svm_hypercall_table[(SVM_HCALL_MAX - SVM_HCALL_BASE) / 4 + 1];
+
+void spapr_register_hypercall(target_ulong opcode, spapr_hcall_fn fn)
+{
+ spapr_hcall_fn *slot;
+
+ if (opcode <= MAX_HCALL_OPCODE) {
+ assert((opcode & 0x3) == 0);
+
+ slot = &papr_hypercall_table[opcode / 4];
+ } else if (opcode >= SVM_HCALL_BASE && opcode <= SVM_HCALL_MAX) {
+ /* we only have SVM-related hcall numbers assigned in multiples of 4 */
+ assert((opcode & 0x3) == 0);
+
+ slot = &svm_hypercall_table[(opcode - SVM_HCALL_BASE) / 4];
+ } else {
+ assert((opcode >= KVMPPC_HCALL_BASE) && (opcode <= KVMPPC_HCALL_MAX));
+
+ slot = &kvmppc_hypercall_table[opcode - KVMPPC_HCALL_BASE];
+ }
+
+ assert(!(*slot));
+ *slot = fn;
+}
+
+target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode,
+ target_ulong *args)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+
+ if ((opcode <= MAX_HCALL_OPCODE)
+ && ((opcode & 0x3) == 0)) {
+ spapr_hcall_fn fn = papr_hypercall_table[opcode / 4];
+
+ if (fn) {
+ return fn(cpu, spapr, opcode, args);
+ }
+ } else if ((opcode >= SVM_HCALL_BASE) &&
+ (opcode <= SVM_HCALL_MAX)) {
+ spapr_hcall_fn fn = svm_hypercall_table[(opcode - SVM_HCALL_BASE) / 4];
+
+ if (fn) {
+ return fn(cpu, spapr, opcode, args);
+ }
+ } else if ((opcode >= KVMPPC_HCALL_BASE) &&
+ (opcode <= KVMPPC_HCALL_MAX)) {
+ spapr_hcall_fn fn = kvmppc_hypercall_table[opcode - KVMPPC_HCALL_BASE];
+
+ if (fn) {
+ return fn(cpu, spapr, opcode, args);
+ }
+ }
+
+ qemu_log_mask(LOG_UNIMP, "Unimplemented SPAPR hcall 0x" TARGET_FMT_lx "\n",
+ opcode);
+ return H_FUNCTION;
+}
+
+#ifndef CONFIG_TCG
+static target_ulong h_softmmu(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ target_ulong opcode, target_ulong *args)
+{
+ g_assert_not_reached();
+}
+
+static void hypercall_register_softmmu(void)
+{
+ /* hcall-pft */
+ spapr_register_hypercall(H_ENTER, h_softmmu);
+ spapr_register_hypercall(H_REMOVE, h_softmmu);
+ spapr_register_hypercall(H_PROTECT, h_softmmu);
+ spapr_register_hypercall(H_READ, h_softmmu);
+
+ /* hcall-bulk */
+ spapr_register_hypercall(H_BULK_REMOVE, h_softmmu);
+}
+#else
+static void hypercall_register_softmmu(void)
+{
+ /* DO NOTHING */
+}
+#endif
+
+static void hypercall_register_types(void)
+{
+ hypercall_register_softmmu();
+
+ /* hcall-hpt-resize */
+ spapr_register_hypercall(H_RESIZE_HPT_PREPARE, h_resize_hpt_prepare);
+ spapr_register_hypercall(H_RESIZE_HPT_COMMIT, h_resize_hpt_commit);
+
+ /* hcall-splpar */
+ spapr_register_hypercall(H_REGISTER_VPA, h_register_vpa);
+ spapr_register_hypercall(H_CEDE, h_cede);
+ spapr_register_hypercall(H_CONFER, h_confer);
+ spapr_register_hypercall(H_PROD, h_prod);
+
+ /* hcall-join */
+ spapr_register_hypercall(H_JOIN, h_join);
+
+ spapr_register_hypercall(H_SIGNAL_SYS_RESET, h_signal_sys_reset);
+
+ /* processor register resource access h-calls */
+ spapr_register_hypercall(H_SET_SPRG0, h_set_sprg0);
+ spapr_register_hypercall(H_SET_DABR, h_set_dabr);
+ spapr_register_hypercall(H_SET_XDABR, h_set_xdabr);
+ spapr_register_hypercall(H_PAGE_INIT, h_page_init);
+ spapr_register_hypercall(H_SET_MODE, h_set_mode);
+
+ /* In Memory Table MMU h-calls */
+ spapr_register_hypercall(H_CLEAN_SLB, h_clean_slb);
+ spapr_register_hypercall(H_INVALIDATE_PID, h_invalidate_pid);
+ spapr_register_hypercall(H_REGISTER_PROC_TBL, h_register_process_table);
+
+ /* hcall-get-cpu-characteristics */
+ spapr_register_hypercall(H_GET_CPU_CHARACTERISTICS,
+ h_get_cpu_characteristics);
+
+ /* "debugger" hcalls (also used by SLOF). Note: We do -not- differenciate
+ * here between the "CI" and the "CACHE" variants, they will use whatever
+ * mapping attributes qemu is using. When using KVM, the kernel will
+ * enforce the attributes more strongly
+ */
+ spapr_register_hypercall(H_LOGICAL_CI_LOAD, h_logical_load);
+ spapr_register_hypercall(H_LOGICAL_CI_STORE, h_logical_store);
+ spapr_register_hypercall(H_LOGICAL_CACHE_LOAD, h_logical_load);
+ spapr_register_hypercall(H_LOGICAL_CACHE_STORE, h_logical_store);
+ spapr_register_hypercall(H_LOGICAL_ICBI, h_logical_icbi);
+ spapr_register_hypercall(H_LOGICAL_DCBF, h_logical_dcbf);
+ spapr_register_hypercall(KVMPPC_H_LOGICAL_MEMOP, h_logical_memop);
+
+ /* qemu/KVM-PPC specific hcalls */
+ spapr_register_hypercall(KVMPPC_H_RTAS, h_rtas);
+
+ /* ibm,client-architecture-support support */
+ spapr_register_hypercall(KVMPPC_H_CAS, h_client_architecture_support);
+
+ spapr_register_hypercall(KVMPPC_H_UPDATE_DT, h_update_dt);
+}
+
+type_init(hypercall_register_types)
diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
new file mode 100644
index 000000000..db0107185
--- /dev/null
+++ b/hw/ppc/spapr_iommu.c
@@ -0,0 +1,718 @@
+/*
+ * QEMU sPAPR IOMMU (TCE) code
+ *
+ * Copyright (c) 2010 David Gibson, IBM Corporation <dwg@au1.ibm.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "qemu/log.h"
+#include "qemu/module.h"
+#include "sysemu/kvm.h"
+#include "kvm_ppc.h"
+#include "migration/vmstate.h"
+#include "sysemu/dma.h"
+#include "trace.h"
+
+#include "hw/ppc/spapr.h"
+#include "hw/ppc/spapr_vio.h"
+
+#include <libfdt.h>
+
+enum SpaprTceAccess {
+ SPAPR_TCE_FAULT = 0,
+ SPAPR_TCE_RO = 1,
+ SPAPR_TCE_WO = 2,
+ SPAPR_TCE_RW = 3,
+};
+
+#define IOMMU_PAGE_SIZE(shift) (1ULL << (shift))
+#define IOMMU_PAGE_MASK(shift) (~(IOMMU_PAGE_SIZE(shift) - 1))
+
+static QLIST_HEAD(, SpaprTceTable) spapr_tce_tables;
+
+SpaprTceTable *spapr_tce_find_by_liobn(target_ulong liobn)
+{
+ SpaprTceTable *tcet;
+
+ if (liobn & 0xFFFFFFFF00000000ULL) {
+ hcall_dprintf("Request for out-of-bounds LIOBN 0x" TARGET_FMT_lx "\n",
+ liobn);
+ return NULL;
+ }
+
+ QLIST_FOREACH(tcet, &spapr_tce_tables, list) {
+ if (tcet->liobn == (uint32_t)liobn) {
+ return tcet;
+ }
+ }
+
+ return NULL;
+}
+
+static IOMMUAccessFlags spapr_tce_iommu_access_flags(uint64_t tce)
+{
+ switch (tce & SPAPR_TCE_RW) {
+ case SPAPR_TCE_FAULT:
+ return IOMMU_NONE;
+ case SPAPR_TCE_RO:
+ return IOMMU_RO;
+ case SPAPR_TCE_WO:
+ return IOMMU_WO;
+ default: /* SPAPR_TCE_RW */
+ return IOMMU_RW;
+ }
+}
+
+static uint64_t *spapr_tce_alloc_table(uint32_t liobn,
+ uint32_t page_shift,
+ uint64_t bus_offset,
+ uint32_t nb_table,
+ int *fd,
+ bool need_vfio)
+{
+ uint64_t *table = NULL;
+
+ if (kvm_enabled()) {
+ table = kvmppc_create_spapr_tce(liobn, page_shift, bus_offset, nb_table,
+ fd, need_vfio);
+ }
+
+ if (!table) {
+ *fd = -1;
+ table = g_new0(uint64_t, nb_table);
+ }
+
+ trace_spapr_iommu_new_table(liobn, table, *fd);
+
+ return table;
+}
+
+static void spapr_tce_free_table(uint64_t *table, int fd, uint32_t nb_table)
+{
+ if (!kvm_enabled() ||
+ (kvmppc_remove_spapr_tce(table, fd, nb_table) != 0)) {
+ g_free(table);
+ }
+}
+
+/* Called from RCU critical section */
+static IOMMUTLBEntry spapr_tce_translate_iommu(IOMMUMemoryRegion *iommu,
+ hwaddr addr,
+ IOMMUAccessFlags flag,
+ int iommu_idx)
+{
+ SpaprTceTable *tcet = container_of(iommu, SpaprTceTable, iommu);
+ uint64_t tce;
+ IOMMUTLBEntry ret = {
+ .target_as = &address_space_memory,
+ .iova = 0,
+ .translated_addr = 0,
+ .addr_mask = ~(hwaddr)0,
+ .perm = IOMMU_NONE,
+ };
+
+ if ((addr >> tcet->page_shift) < tcet->nb_table) {
+ /* Check if we are in bound */
+ hwaddr page_mask = IOMMU_PAGE_MASK(tcet->page_shift);
+
+ tce = tcet->table[addr >> tcet->page_shift];
+ ret.iova = addr & page_mask;
+ ret.translated_addr = tce & page_mask;
+ ret.addr_mask = ~page_mask;
+ ret.perm = spapr_tce_iommu_access_flags(tce);
+ }
+ trace_spapr_iommu_xlate(tcet->liobn, addr, ret.translated_addr, ret.perm,
+ ret.addr_mask);
+
+ return ret;
+}
+
+static void spapr_tce_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n)
+{
+ MemoryRegion *mr = MEMORY_REGION(iommu_mr);
+ IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr);
+ hwaddr addr, granularity;
+ IOMMUTLBEntry iotlb;
+ SpaprTceTable *tcet = container_of(iommu_mr, SpaprTceTable, iommu);
+
+ if (tcet->skipping_replay) {
+ return;
+ }
+
+ granularity = memory_region_iommu_get_min_page_size(iommu_mr);
+
+ for (addr = 0; addr < memory_region_size(mr); addr += granularity) {
+ iotlb = imrc->translate(iommu_mr, addr, IOMMU_NONE, n->iommu_idx);
+ if (iotlb.perm != IOMMU_NONE) {
+ n->notify(n, &iotlb);
+ }
+
+ /*
+ * if (2^64 - MR size) < granularity, it's possible to get an
+ * infinite loop here. This should catch such a wraparound.
+ */
+ if ((addr + granularity) < addr) {
+ break;
+ }
+ }
+}
+
+static int spapr_tce_table_pre_save(void *opaque)
+{
+ SpaprTceTable *tcet = SPAPR_TCE_TABLE(opaque);
+
+ tcet->mig_table = tcet->table;
+ tcet->mig_nb_table = tcet->nb_table;
+
+ trace_spapr_iommu_pre_save(tcet->liobn, tcet->mig_nb_table,
+ tcet->bus_offset, tcet->page_shift);
+
+ return 0;
+}
+
+static uint64_t spapr_tce_get_min_page_size(IOMMUMemoryRegion *iommu)
+{
+ SpaprTceTable *tcet = container_of(iommu, SpaprTceTable, iommu);
+
+ return 1ULL << tcet->page_shift;
+}
+
+static int spapr_tce_get_attr(IOMMUMemoryRegion *iommu,
+ enum IOMMUMemoryRegionAttr attr, void *data)
+{
+ SpaprTceTable *tcet = container_of(iommu, SpaprTceTable, iommu);
+
+ if (attr == IOMMU_ATTR_SPAPR_TCE_FD && kvmppc_has_cap_spapr_vfio()) {
+ *(int *) data = tcet->fd;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static int spapr_tce_notify_flag_changed(IOMMUMemoryRegion *iommu,
+ IOMMUNotifierFlag old,
+ IOMMUNotifierFlag new,
+ Error **errp)
+{
+ struct SpaprTceTable *tbl = container_of(iommu, SpaprTceTable, iommu);
+
+ if (new & IOMMU_NOTIFIER_DEVIOTLB_UNMAP) {
+ error_setg(errp, "spart_tce does not support dev-iotlb yet");
+ return -EINVAL;
+ }
+
+ if (old == IOMMU_NOTIFIER_NONE && new != IOMMU_NOTIFIER_NONE) {
+ spapr_tce_set_need_vfio(tbl, true);
+ } else if (old != IOMMU_NOTIFIER_NONE && new == IOMMU_NOTIFIER_NONE) {
+ spapr_tce_set_need_vfio(tbl, false);
+ }
+ return 0;
+}
+
+static int spapr_tce_table_post_load(void *opaque, int version_id)
+{
+ SpaprTceTable *tcet = SPAPR_TCE_TABLE(opaque);
+ uint32_t old_nb_table = tcet->nb_table;
+ uint64_t old_bus_offset = tcet->bus_offset;
+ uint32_t old_page_shift = tcet->page_shift;
+
+ if (tcet->vdev) {
+ spapr_vio_set_bypass(tcet->vdev, tcet->bypass);
+ }
+
+ if (tcet->mig_nb_table != tcet->nb_table) {
+ spapr_tce_table_disable(tcet);
+ }
+
+ if (tcet->mig_nb_table) {
+ if (!tcet->nb_table) {
+ spapr_tce_table_enable(tcet, old_page_shift, old_bus_offset,
+ tcet->mig_nb_table);
+ }
+
+ memcpy(tcet->table, tcet->mig_table,
+ tcet->nb_table * sizeof(tcet->table[0]));
+
+ free(tcet->mig_table);
+ tcet->mig_table = NULL;
+ }
+
+ trace_spapr_iommu_post_load(tcet->liobn, old_nb_table, tcet->nb_table,
+ tcet->bus_offset, tcet->page_shift);
+
+ return 0;
+}
+
+static bool spapr_tce_table_ex_needed(void *opaque)
+{
+ SpaprTceTable *tcet = opaque;
+
+ return tcet->bus_offset || tcet->page_shift != 0xC;
+}
+
+static const VMStateDescription vmstate_spapr_tce_table_ex = {
+ .name = "spapr_iommu_ex",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = spapr_tce_table_ex_needed,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT64(bus_offset, SpaprTceTable),
+ VMSTATE_UINT32(page_shift, SpaprTceTable),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+static const VMStateDescription vmstate_spapr_tce_table = {
+ .name = "spapr_iommu",
+ .version_id = 2,
+ .minimum_version_id = 2,
+ .pre_save = spapr_tce_table_pre_save,
+ .post_load = spapr_tce_table_post_load,
+ .fields = (VMStateField []) {
+ /* Sanity check */
+ VMSTATE_UINT32_EQUAL(liobn, SpaprTceTable, NULL),
+
+ /* IOMMU state */
+ VMSTATE_UINT32(mig_nb_table, SpaprTceTable),
+ VMSTATE_BOOL(bypass, SpaprTceTable),
+ VMSTATE_VARRAY_UINT32_ALLOC(mig_table, SpaprTceTable, mig_nb_table, 0,
+ vmstate_info_uint64, uint64_t),
+
+ VMSTATE_END_OF_LIST()
+ },
+ .subsections = (const VMStateDescription*[]) {
+ &vmstate_spapr_tce_table_ex,
+ NULL
+ }
+};
+
+static void spapr_tce_table_realize(DeviceState *dev, Error **errp)
+{
+ SpaprTceTable *tcet = SPAPR_TCE_TABLE(dev);
+ Object *tcetobj = OBJECT(tcet);
+ gchar *tmp;
+
+ tcet->fd = -1;
+ tcet->need_vfio = false;
+ tmp = g_strdup_printf("tce-root-%x", tcet->liobn);
+ memory_region_init(&tcet->root, tcetobj, tmp, UINT64_MAX);
+ g_free(tmp);
+
+ tmp = g_strdup_printf("tce-iommu-%x", tcet->liobn);
+ memory_region_init_iommu(&tcet->iommu, sizeof(tcet->iommu),
+ TYPE_SPAPR_IOMMU_MEMORY_REGION,
+ tcetobj, tmp, 0);
+ g_free(tmp);
+
+ QLIST_INSERT_HEAD(&spapr_tce_tables, tcet, list);
+
+ vmstate_register(VMSTATE_IF(tcet), tcet->liobn, &vmstate_spapr_tce_table,
+ tcet);
+}
+
+void spapr_tce_set_need_vfio(SpaprTceTable *tcet, bool need_vfio)
+{
+ size_t table_size = tcet->nb_table * sizeof(uint64_t);
+ uint64_t *oldtable;
+ int newfd = -1;
+
+ g_assert(need_vfio != tcet->need_vfio);
+
+ tcet->need_vfio = need_vfio;
+
+ if (!need_vfio || (tcet->fd != -1 && kvmppc_has_cap_spapr_vfio())) {
+ return;
+ }
+
+ oldtable = tcet->table;
+
+ tcet->table = spapr_tce_alloc_table(tcet->liobn,
+ tcet->page_shift,
+ tcet->bus_offset,
+ tcet->nb_table,
+ &newfd,
+ need_vfio);
+ memcpy(tcet->table, oldtable, table_size);
+
+ spapr_tce_free_table(oldtable, tcet->fd, tcet->nb_table);
+
+ tcet->fd = newfd;
+}
+
+SpaprTceTable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn)
+{
+ SpaprTceTable *tcet;
+ gchar *tmp;
+
+ if (spapr_tce_find_by_liobn(liobn)) {
+ error_report("Attempted to create TCE table with duplicate"
+ " LIOBN 0x%x", liobn);
+ return NULL;
+ }
+
+ tcet = SPAPR_TCE_TABLE(object_new(TYPE_SPAPR_TCE_TABLE));
+ tcet->liobn = liobn;
+
+ tmp = g_strdup_printf("tce-table-%x", liobn);
+ object_property_add_child(OBJECT(owner), tmp, OBJECT(tcet));
+ g_free(tmp);
+ object_unref(OBJECT(tcet));
+
+ qdev_realize(DEVICE(tcet), NULL, NULL);
+
+ return tcet;
+}
+
+void spapr_tce_table_enable(SpaprTceTable *tcet,
+ uint32_t page_shift, uint64_t bus_offset,
+ uint32_t nb_table)
+{
+ if (tcet->nb_table) {
+ warn_report("trying to enable already enabled TCE table");
+ return;
+ }
+
+ tcet->bus_offset = bus_offset;
+ tcet->page_shift = page_shift;
+ tcet->nb_table = nb_table;
+ tcet->table = spapr_tce_alloc_table(tcet->liobn,
+ tcet->page_shift,
+ tcet->bus_offset,
+ tcet->nb_table,
+ &tcet->fd,
+ tcet->need_vfio);
+
+ memory_region_set_size(MEMORY_REGION(&tcet->iommu),
+ (uint64_t)tcet->nb_table << tcet->page_shift);
+ memory_region_add_subregion(&tcet->root, tcet->bus_offset,
+ MEMORY_REGION(&tcet->iommu));
+}
+
+void spapr_tce_table_disable(SpaprTceTable *tcet)
+{
+ if (!tcet->nb_table) {
+ return;
+ }
+
+ memory_region_del_subregion(&tcet->root, MEMORY_REGION(&tcet->iommu));
+ memory_region_set_size(MEMORY_REGION(&tcet->iommu), 0);
+
+ spapr_tce_free_table(tcet->table, tcet->fd, tcet->nb_table);
+ tcet->fd = -1;
+ tcet->table = NULL;
+ tcet->bus_offset = 0;
+ tcet->page_shift = 0;
+ tcet->nb_table = 0;
+}
+
+static void spapr_tce_table_unrealize(DeviceState *dev)
+{
+ SpaprTceTable *tcet = SPAPR_TCE_TABLE(dev);
+
+ vmstate_unregister(VMSTATE_IF(tcet), &vmstate_spapr_tce_table, tcet);
+
+ QLIST_REMOVE(tcet, list);
+
+ spapr_tce_table_disable(tcet);
+}
+
+MemoryRegion *spapr_tce_get_iommu(SpaprTceTable *tcet)
+{
+ return &tcet->root;
+}
+
+static void spapr_tce_reset(DeviceState *dev)
+{
+ SpaprTceTable *tcet = SPAPR_TCE_TABLE(dev);
+ size_t table_size = tcet->nb_table * sizeof(uint64_t);
+
+ if (tcet->nb_table) {
+ memset(tcet->table, 0, table_size);
+ }
+}
+
+static target_ulong put_tce_emu(SpaprTceTable *tcet, target_ulong ioba,
+ target_ulong tce)
+{
+ IOMMUTLBEvent event;
+ hwaddr page_mask = IOMMU_PAGE_MASK(tcet->page_shift);
+ unsigned long index = (ioba - tcet->bus_offset) >> tcet->page_shift;
+
+ if (index >= tcet->nb_table) {
+ hcall_dprintf("spapr_vio_put_tce on out-of-bounds IOBA 0x"
+ TARGET_FMT_lx "\n", ioba);
+ return H_PARAMETER;
+ }
+
+ tcet->table[index] = tce;
+
+ event.entry.target_as = &address_space_memory,
+ event.entry.iova = (ioba - tcet->bus_offset) & page_mask;
+ event.entry.translated_addr = tce & page_mask;
+ event.entry.addr_mask = ~page_mask;
+ event.entry.perm = spapr_tce_iommu_access_flags(tce);
+ event.type = event.entry.perm ? IOMMU_NOTIFIER_MAP : IOMMU_NOTIFIER_UNMAP;
+ memory_region_notify_iommu(&tcet->iommu, 0, event);
+
+ return H_SUCCESS;
+}
+
+static target_ulong h_put_tce_indirect(PowerPCCPU *cpu,
+ SpaprMachineState *spapr,
+ target_ulong opcode, target_ulong *args)
+{
+ int i;
+ target_ulong liobn = args[0];
+ target_ulong ioba = args[1];
+ target_ulong ioba1 = ioba;
+ target_ulong tce_list = args[2];
+ target_ulong npages = args[3];
+ target_ulong ret = H_PARAMETER, tce = 0;
+ SpaprTceTable *tcet = spapr_tce_find_by_liobn(liobn);
+ CPUState *cs = CPU(cpu);
+ hwaddr page_mask, page_size;
+
+ if (!tcet) {
+ return H_PARAMETER;
+ }
+
+ if ((npages > 512) || (tce_list & SPAPR_TCE_PAGE_MASK)) {
+ return H_PARAMETER;
+ }
+
+ page_mask = IOMMU_PAGE_MASK(tcet->page_shift);
+ page_size = IOMMU_PAGE_SIZE(tcet->page_shift);
+ ioba &= page_mask;
+
+ for (i = 0; i < npages; ++i, ioba += page_size) {
+ tce = ldq_be_phys(cs->as, tce_list + i * sizeof(target_ulong));
+
+ ret = put_tce_emu(tcet, ioba, tce);
+ if (ret) {
+ break;
+ }
+ }
+
+ /* Trace last successful or the first problematic entry */
+ i = i ? (i - 1) : 0;
+ if (SPAPR_IS_PCI_LIOBN(liobn)) {
+ trace_spapr_iommu_pci_indirect(liobn, ioba1, tce_list, i, tce, ret);
+ } else {
+ trace_spapr_iommu_indirect(liobn, ioba1, tce_list, i, tce, ret);
+ }
+ return ret;
+}
+
+static target_ulong h_stuff_tce(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ target_ulong opcode, target_ulong *args)
+{
+ int i;
+ target_ulong liobn = args[0];
+ target_ulong ioba = args[1];
+ target_ulong tce_value = args[2];
+ target_ulong npages = args[3];
+ target_ulong ret = H_PARAMETER;
+ SpaprTceTable *tcet = spapr_tce_find_by_liobn(liobn);
+ hwaddr page_mask, page_size;
+
+ if (!tcet) {
+ return H_PARAMETER;
+ }
+
+ if (npages > tcet->nb_table) {
+ return H_PARAMETER;
+ }
+
+ page_mask = IOMMU_PAGE_MASK(tcet->page_shift);
+ page_size = IOMMU_PAGE_SIZE(tcet->page_shift);
+ ioba &= page_mask;
+
+ for (i = 0; i < npages; ++i, ioba += page_size) {
+ ret = put_tce_emu(tcet, ioba, tce_value);
+ if (ret) {
+ break;
+ }
+ }
+ if (SPAPR_IS_PCI_LIOBN(liobn)) {
+ trace_spapr_iommu_pci_stuff(liobn, ioba, tce_value, npages, ret);
+ } else {
+ trace_spapr_iommu_stuff(liobn, ioba, tce_value, npages, ret);
+ }
+
+ return ret;
+}
+
+static target_ulong h_put_tce(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ target_ulong opcode, target_ulong *args)
+{
+ target_ulong liobn = args[0];
+ target_ulong ioba = args[1];
+ target_ulong tce = args[2];
+ target_ulong ret = H_PARAMETER;
+ SpaprTceTable *tcet = spapr_tce_find_by_liobn(liobn);
+
+ if (tcet) {
+ hwaddr page_mask = IOMMU_PAGE_MASK(tcet->page_shift);
+
+ ioba &= page_mask;
+
+ ret = put_tce_emu(tcet, ioba, tce);
+ }
+ if (SPAPR_IS_PCI_LIOBN(liobn)) {
+ trace_spapr_iommu_pci_put(liobn, ioba, tce, ret);
+ } else {
+ trace_spapr_iommu_put(liobn, ioba, tce, ret);
+ }
+
+ return ret;
+}
+
+static target_ulong get_tce_emu(SpaprTceTable *tcet, target_ulong ioba,
+ target_ulong *tce)
+{
+ unsigned long index = (ioba - tcet->bus_offset) >> tcet->page_shift;
+
+ if (index >= tcet->nb_table) {
+ hcall_dprintf("spapr_iommu_get_tce on out-of-bounds IOBA 0x"
+ TARGET_FMT_lx "\n", ioba);
+ return H_PARAMETER;
+ }
+
+ *tce = tcet->table[index];
+
+ return H_SUCCESS;
+}
+
+static target_ulong h_get_tce(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ target_ulong opcode, target_ulong *args)
+{
+ target_ulong liobn = args[0];
+ target_ulong ioba = args[1];
+ target_ulong tce = 0;
+ target_ulong ret = H_PARAMETER;
+ SpaprTceTable *tcet = spapr_tce_find_by_liobn(liobn);
+
+ if (tcet) {
+ hwaddr page_mask = IOMMU_PAGE_MASK(tcet->page_shift);
+
+ ioba &= page_mask;
+
+ ret = get_tce_emu(tcet, ioba, &tce);
+ if (!ret) {
+ args[0] = tce;
+ }
+ }
+ if (SPAPR_IS_PCI_LIOBN(liobn)) {
+ trace_spapr_iommu_pci_get(liobn, ioba, ret, tce);
+ } else {
+ trace_spapr_iommu_get(liobn, ioba, ret, tce);
+ }
+
+ return ret;
+}
+
+int spapr_dma_dt(void *fdt, int node_off, const char *propname,
+ uint32_t liobn, uint64_t window, uint32_t size)
+{
+ uint32_t dma_prop[5];
+ int ret;
+
+ dma_prop[0] = cpu_to_be32(liobn);
+ dma_prop[1] = cpu_to_be32(window >> 32);
+ dma_prop[2] = cpu_to_be32(window & 0xFFFFFFFF);
+ dma_prop[3] = 0; /* window size is 32 bits */
+ dma_prop[4] = cpu_to_be32(size);
+
+ ret = fdt_setprop_cell(fdt, node_off, "ibm,#dma-address-cells", 2);
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = fdt_setprop_cell(fdt, node_off, "ibm,#dma-size-cells", 2);
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = fdt_setprop(fdt, node_off, propname, dma_prop, sizeof(dma_prop));
+ if (ret < 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname,
+ SpaprTceTable *tcet)
+{
+ if (!tcet) {
+ return 0;
+ }
+
+ return spapr_dma_dt(fdt, node_off, propname,
+ tcet->liobn, 0, tcet->nb_table << tcet->page_shift);
+}
+
+static void spapr_tce_table_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ dc->realize = spapr_tce_table_realize;
+ dc->reset = spapr_tce_reset;
+ dc->unrealize = spapr_tce_table_unrealize;
+ /* Reason: This is just an internal device for handling the hypercalls */
+ dc->user_creatable = false;
+
+ QLIST_INIT(&spapr_tce_tables);
+
+ /* hcall-tce */
+ spapr_register_hypercall(H_PUT_TCE, h_put_tce);
+ spapr_register_hypercall(H_GET_TCE, h_get_tce);
+ spapr_register_hypercall(H_PUT_TCE_INDIRECT, h_put_tce_indirect);
+ spapr_register_hypercall(H_STUFF_TCE, h_stuff_tce);
+}
+
+static TypeInfo spapr_tce_table_info = {
+ .name = TYPE_SPAPR_TCE_TABLE,
+ .parent = TYPE_DEVICE,
+ .instance_size = sizeof(SpaprTceTable),
+ .class_init = spapr_tce_table_class_init,
+};
+
+static void spapr_iommu_memory_region_class_init(ObjectClass *klass, void *data)
+{
+ IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
+
+ imrc->translate = spapr_tce_translate_iommu;
+ imrc->replay = spapr_tce_replay;
+ imrc->get_min_page_size = spapr_tce_get_min_page_size;
+ imrc->notify_flag_changed = spapr_tce_notify_flag_changed;
+ imrc->get_attr = spapr_tce_get_attr;
+}
+
+static const TypeInfo spapr_iommu_memory_region_info = {
+ .parent = TYPE_IOMMU_MEMORY_REGION,
+ .name = TYPE_SPAPR_IOMMU_MEMORY_REGION,
+ .class_init = spapr_iommu_memory_region_class_init,
+};
+
+static void register_types(void)
+{
+ type_register_static(&spapr_tce_table_info);
+ type_register_static(&spapr_iommu_memory_region_info);
+}
+
+type_init(register_types);
diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c
new file mode 100644
index 000000000..a0d1e1298
--- /dev/null
+++ b/hw/ppc/spapr_irq.c
@@ -0,0 +1,599 @@
+/*
+ * QEMU PowerPC sPAPR IRQ interface
+ *
+ * Copyright (c) 2018, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "hw/irq.h"
+#include "hw/ppc/spapr.h"
+#include "hw/ppc/spapr_cpu_core.h"
+#include "hw/ppc/spapr_xive.h"
+#include "hw/ppc/xics.h"
+#include "hw/ppc/xics_spapr.h"
+#include "hw/qdev-properties.h"
+#include "cpu-models.h"
+#include "sysemu/kvm.h"
+
+#include "trace.h"
+
+static const TypeInfo spapr_intc_info = {
+ .name = TYPE_SPAPR_INTC,
+ .parent = TYPE_INTERFACE,
+ .class_size = sizeof(SpaprInterruptControllerClass),
+};
+
+static void spapr_irq_msi_init(SpaprMachineState *spapr)
+{
+ if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
+ /* Legacy mode doesn't use this allocator */
+ return;
+ }
+
+ spapr->irq_map_nr = spapr_irq_nr_msis(spapr);
+ spapr->irq_map = bitmap_new(spapr->irq_map_nr);
+}
+
+int spapr_irq_msi_alloc(SpaprMachineState *spapr, uint32_t num, bool align,
+ Error **errp)
+{
+ int irq;
+
+ /*
+ * The 'align_mask' parameter of bitmap_find_next_zero_area()
+ * should be one less than a power of 2; 0 means no
+ * alignment. Adapt the 'align' value of the former allocator
+ * to fit the requirements of bitmap_find_next_zero_area()
+ */
+ align -= 1;
+
+ irq = bitmap_find_next_zero_area(spapr->irq_map, spapr->irq_map_nr, 0, num,
+ align);
+ if (irq == spapr->irq_map_nr) {
+ error_setg(errp, "can't find a free %d-IRQ block", num);
+ return -1;
+ }
+
+ bitmap_set(spapr->irq_map, irq, num);
+
+ return irq + SPAPR_IRQ_MSI;
+}
+
+void spapr_irq_msi_free(SpaprMachineState *spapr, int irq, uint32_t num)
+{
+ bitmap_clear(spapr->irq_map, irq - SPAPR_IRQ_MSI, num);
+}
+
+int spapr_irq_init_kvm(SpaprInterruptControllerInitKvm fn,
+ SpaprInterruptController *intc,
+ uint32_t nr_servers,
+ Error **errp)
+{
+ Error *local_err = NULL;
+
+ if (kvm_enabled() && kvm_kernel_irqchip_allowed()) {
+ if (fn(intc, nr_servers, &local_err) < 0) {
+ if (kvm_kernel_irqchip_required()) {
+ error_prepend(&local_err,
+ "kernel_irqchip requested but unavailable: ");
+ error_propagate(errp, local_err);
+ return -1;
+ }
+
+ /*
+ * We failed to initialize the KVM device, fallback to
+ * emulated mode
+ */
+ error_prepend(&local_err,
+ "kernel_irqchip allowed but unavailable: ");
+ error_append_hint(&local_err,
+ "Falling back to kernel-irqchip=off\n");
+ warn_report_err(local_err);
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * XICS IRQ backend.
+ */
+
+SpaprIrq spapr_irq_xics = {
+ .xics = true,
+ .xive = false,
+};
+
+/*
+ * XIVE IRQ backend.
+ */
+
+SpaprIrq spapr_irq_xive = {
+ .xics = false,
+ .xive = true,
+};
+
+/*
+ * Dual XIVE and XICS IRQ backend.
+ *
+ * Both interrupt mode, XIVE and XICS, objects are created but the
+ * machine starts in legacy interrupt mode (XICS). It can be changed
+ * by the CAS negotiation process and, in that case, the new mode is
+ * activated after an extra machine reset.
+ */
+
+/*
+ * Define values in sync with the XIVE and XICS backend
+ */
+SpaprIrq spapr_irq_dual = {
+ .xics = true,
+ .xive = true,
+};
+
+
+static int spapr_irq_check(SpaprMachineState *spapr, Error **errp)
+{
+ ERRP_GUARD();
+ MachineState *machine = MACHINE(spapr);
+
+ /*
+ * Sanity checks on non-P9 machines. On these, XIVE is not
+ * advertised, see spapr_dt_ov5_platform_support()
+ */
+ if (!ppc_type_check_compat(machine->cpu_type, CPU_POWERPC_LOGICAL_3_00,
+ 0, spapr->max_compat_pvr)) {
+ /*
+ * If the 'dual' interrupt mode is selected, force XICS as CAS
+ * negotiation is useless.
+ */
+ if (spapr->irq == &spapr_irq_dual) {
+ spapr->irq = &spapr_irq_xics;
+ return 0;
+ }
+
+ /*
+ * Non-P9 machines using only XIVE is a bogus setup. We have two
+ * scenarios to take into account because of the compat mode:
+ *
+ * 1. POWER7/8 machines should fail to init later on when creating
+ * the XIVE interrupt presenters because a POWER9 exception
+ * model is required.
+
+ * 2. POWER9 machines using the POWER8 compat mode won't fail and
+ * will let the OS boot with a partial XIVE setup : DT
+ * properties but no hcalls.
+ *
+ * To cover both and not confuse the OS, add an early failure in
+ * QEMU.
+ */
+ if (!spapr->irq->xics) {
+ error_setg(errp, "XIVE-only machines require a POWER9 CPU");
+ return -1;
+ }
+ }
+
+ /*
+ * On a POWER9 host, some older KVM XICS devices cannot be destroyed and
+ * re-created. Same happens with KVM nested guests. Detect that early to
+ * avoid QEMU to exit later when the guest reboots.
+ */
+ if (kvm_enabled() &&
+ spapr->irq == &spapr_irq_dual &&
+ kvm_kernel_irqchip_required() &&
+ xics_kvm_has_broken_disconnect()) {
+ error_setg(errp,
+ "KVM is incompatible with ic-mode=dual,kernel-irqchip=on");
+ error_append_hint(errp,
+ "This can happen with an old KVM or in a KVM nested guest.\n");
+ error_append_hint(errp,
+ "Try without kernel-irqchip or with kernel-irqchip=off.\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * sPAPR IRQ frontend routines for devices
+ */
+#define ALL_INTCS(spapr_) \
+ { SPAPR_INTC((spapr_)->ics), SPAPR_INTC((spapr_)->xive), }
+
+int spapr_irq_cpu_intc_create(SpaprMachineState *spapr,
+ PowerPCCPU *cpu, Error **errp)
+{
+ SpaprInterruptController *intcs[] = ALL_INTCS(spapr);
+ int i;
+ int rc;
+
+ for (i = 0; i < ARRAY_SIZE(intcs); i++) {
+ SpaprInterruptController *intc = intcs[i];
+ if (intc) {
+ SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc);
+ rc = sicc->cpu_intc_create(intc, cpu, errp);
+ if (rc < 0) {
+ return rc;
+ }
+ }
+ }
+
+ return 0;
+}
+
+void spapr_irq_cpu_intc_reset(SpaprMachineState *spapr, PowerPCCPU *cpu)
+{
+ SpaprInterruptController *intcs[] = ALL_INTCS(spapr);
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(intcs); i++) {
+ SpaprInterruptController *intc = intcs[i];
+ if (intc) {
+ SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc);
+ sicc->cpu_intc_reset(intc, cpu);
+ }
+ }
+}
+
+void spapr_irq_cpu_intc_destroy(SpaprMachineState *spapr, PowerPCCPU *cpu)
+{
+ SpaprInterruptController *intcs[] = ALL_INTCS(spapr);
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(intcs); i++) {
+ SpaprInterruptController *intc = intcs[i];
+ if (intc) {
+ SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc);
+ sicc->cpu_intc_destroy(intc, cpu);
+ }
+ }
+}
+
+static void spapr_set_irq(void *opaque, int irq, int level)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(opaque);
+ SpaprInterruptControllerClass *sicc
+ = SPAPR_INTC_GET_CLASS(spapr->active_intc);
+
+ sicc->set_irq(spapr->active_intc, irq, level);
+}
+
+void spapr_irq_print_info(SpaprMachineState *spapr, Monitor *mon)
+{
+ SpaprInterruptControllerClass *sicc
+ = SPAPR_INTC_GET_CLASS(spapr->active_intc);
+
+ sicc->print_info(spapr->active_intc, mon);
+}
+
+void spapr_irq_dt(SpaprMachineState *spapr, uint32_t nr_servers,
+ void *fdt, uint32_t phandle)
+{
+ SpaprInterruptControllerClass *sicc
+ = SPAPR_INTC_GET_CLASS(spapr->active_intc);
+
+ sicc->dt(spapr->active_intc, nr_servers, fdt, phandle);
+}
+
+uint32_t spapr_irq_nr_msis(SpaprMachineState *spapr)
+{
+ SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+
+ if (smc->legacy_irq_allocation) {
+ return smc->nr_xirqs;
+ } else {
+ return SPAPR_XIRQ_BASE + smc->nr_xirqs - SPAPR_IRQ_MSI;
+ }
+}
+
+void spapr_irq_init(SpaprMachineState *spapr, Error **errp)
+{
+ SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+
+ if (kvm_enabled() && kvm_kernel_irqchip_split()) {
+ error_setg(errp, "kernel_irqchip split mode not supported on pseries");
+ return;
+ }
+
+ if (spapr_irq_check(spapr, errp) < 0) {
+ return;
+ }
+
+ /* Initialize the MSI IRQ allocator. */
+ spapr_irq_msi_init(spapr);
+
+ if (spapr->irq->xics) {
+ Object *obj;
+
+ obj = object_new(TYPE_ICS_SPAPR);
+
+ object_property_add_child(OBJECT(spapr), "ics", obj);
+ object_property_set_link(obj, ICS_PROP_XICS, OBJECT(spapr),
+ &error_abort);
+ object_property_set_int(obj, "nr-irqs", smc->nr_xirqs, &error_abort);
+ if (!qdev_realize(DEVICE(obj), NULL, errp)) {
+ return;
+ }
+
+ spapr->ics = ICS_SPAPR(obj);
+ }
+
+ if (spapr->irq->xive) {
+ uint32_t nr_servers = spapr_max_server_number(spapr);
+ DeviceState *dev;
+ int i;
+
+ dev = qdev_new(TYPE_SPAPR_XIVE);
+ qdev_prop_set_uint32(dev, "nr-irqs", smc->nr_xirqs + SPAPR_XIRQ_BASE);
+ /*
+ * 8 XIVE END structures per CPU. One for each available
+ * priority
+ */
+ qdev_prop_set_uint32(dev, "nr-ends", nr_servers << 3);
+ object_property_set_link(OBJECT(dev), "xive-fabric", OBJECT(spapr),
+ &error_abort);
+ sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+
+ spapr->xive = SPAPR_XIVE(dev);
+
+ /* Enable the CPU IPIs */
+ for (i = 0; i < nr_servers; ++i) {
+ SpaprInterruptControllerClass *sicc
+ = SPAPR_INTC_GET_CLASS(spapr->xive);
+
+ if (sicc->claim_irq(SPAPR_INTC(spapr->xive), SPAPR_IRQ_IPI + i,
+ false, errp) < 0) {
+ return;
+ }
+ }
+
+ spapr_xive_hcall_init(spapr);
+ }
+
+ spapr->qirqs = qemu_allocate_irqs(spapr_set_irq, spapr,
+ smc->nr_xirqs + SPAPR_XIRQ_BASE);
+
+ /*
+ * Mostly we don't actually need this until reset, except that not
+ * having this set up can cause VFIO devices to issue a
+ * false-positive warning during realize(), because they don't yet
+ * have an in-kernel irq chip.
+ */
+ spapr_irq_update_active_intc(spapr);
+}
+
+int spapr_irq_claim(SpaprMachineState *spapr, int irq, bool lsi, Error **errp)
+{
+ SpaprInterruptController *intcs[] = ALL_INTCS(spapr);
+ int i;
+ SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+ int rc;
+
+ assert(irq >= SPAPR_XIRQ_BASE);
+ assert(irq < (smc->nr_xirqs + SPAPR_XIRQ_BASE));
+
+ for (i = 0; i < ARRAY_SIZE(intcs); i++) {
+ SpaprInterruptController *intc = intcs[i];
+ if (intc) {
+ SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc);
+ rc = sicc->claim_irq(intc, irq, lsi, errp);
+ if (rc < 0) {
+ return rc;
+ }
+ }
+ }
+
+ return 0;
+}
+
+void spapr_irq_free(SpaprMachineState *spapr, int irq, int num)
+{
+ SpaprInterruptController *intcs[] = ALL_INTCS(spapr);
+ int i, j;
+ SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+
+ assert(irq >= SPAPR_XIRQ_BASE);
+ assert((irq + num) <= (smc->nr_xirqs + SPAPR_XIRQ_BASE));
+
+ for (i = irq; i < (irq + num); i++) {
+ for (j = 0; j < ARRAY_SIZE(intcs); j++) {
+ SpaprInterruptController *intc = intcs[j];
+
+ if (intc) {
+ SpaprInterruptControllerClass *sicc
+ = SPAPR_INTC_GET_CLASS(intc);
+ sicc->free_irq(intc, i);
+ }
+ }
+ }
+}
+
+qemu_irq spapr_qirq(SpaprMachineState *spapr, int irq)
+{
+ SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+
+ /*
+ * This interface is basically for VIO and PHB devices to find the
+ * right qemu_irq to manipulate, so we only allow access to the
+ * external irqs for now. Currently anything which needs to
+ * access the IPIs most naturally gets there via the guest side
+ * interfaces, we can change this if we need to in future.
+ */
+ assert(irq >= SPAPR_XIRQ_BASE);
+ assert(irq < (smc->nr_xirqs + SPAPR_XIRQ_BASE));
+
+ if (spapr->ics) {
+ assert(ics_valid_irq(spapr->ics, irq));
+ }
+ if (spapr->xive) {
+ assert(irq < spapr->xive->nr_irqs);
+ assert(xive_eas_is_valid(&spapr->xive->eat[irq]));
+ }
+
+ return spapr->qirqs[irq];
+}
+
+int spapr_irq_post_load(SpaprMachineState *spapr, int version_id)
+{
+ SpaprInterruptControllerClass *sicc;
+
+ spapr_irq_update_active_intc(spapr);
+ sicc = SPAPR_INTC_GET_CLASS(spapr->active_intc);
+ return sicc->post_load(spapr->active_intc, version_id);
+}
+
+void spapr_irq_reset(SpaprMachineState *spapr, Error **errp)
+{
+ assert(!spapr->irq_map || bitmap_empty(spapr->irq_map, spapr->irq_map_nr));
+
+ spapr_irq_update_active_intc(spapr);
+}
+
+int spapr_irq_get_phandle(SpaprMachineState *spapr, void *fdt, Error **errp)
+{
+ const char *nodename = "interrupt-controller";
+ int offset, phandle;
+
+ offset = fdt_subnode_offset(fdt, 0, nodename);
+ if (offset < 0) {
+ error_setg(errp, "Can't find node \"%s\": %s",
+ nodename, fdt_strerror(offset));
+ return -1;
+ }
+
+ phandle = fdt_get_phandle(fdt, offset);
+ if (!phandle) {
+ error_setg(errp, "Can't get phandle of node \"%s\"", nodename);
+ return -1;
+ }
+
+ return phandle;
+}
+
+static void set_active_intc(SpaprMachineState *spapr,
+ SpaprInterruptController *new_intc)
+{
+ SpaprInterruptControllerClass *sicc;
+ uint32_t nr_servers = spapr_max_server_number(spapr);
+
+ assert(new_intc);
+
+ if (new_intc == spapr->active_intc) {
+ /* Nothing to do */
+ return;
+ }
+
+ if (spapr->active_intc) {
+ sicc = SPAPR_INTC_GET_CLASS(spapr->active_intc);
+ if (sicc->deactivate) {
+ sicc->deactivate(spapr->active_intc);
+ }
+ }
+
+ sicc = SPAPR_INTC_GET_CLASS(new_intc);
+ if (sicc->activate) {
+ sicc->activate(new_intc, nr_servers, &error_fatal);
+ }
+
+ spapr->active_intc = new_intc;
+
+ /*
+ * We've changed the kernel irqchip, let VFIO devices know they
+ * need to readjust.
+ */
+ kvm_irqchip_change_notify();
+}
+
+void spapr_irq_update_active_intc(SpaprMachineState *spapr)
+{
+ SpaprInterruptController *new_intc;
+
+ if (!spapr->ics) {
+ /*
+ * XXX before we run CAS, ov5_cas is initialized empty, which
+ * indicates XICS, even if we have ic-mode=xive. TODO: clean
+ * up the CAS path so that we have a clearer way of handling
+ * this.
+ */
+ new_intc = SPAPR_INTC(spapr->xive);
+ } else if (spapr->ov5_cas
+ && spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
+ new_intc = SPAPR_INTC(spapr->xive);
+ } else {
+ new_intc = SPAPR_INTC(spapr->ics);
+ }
+
+ set_active_intc(spapr, new_intc);
+}
+
+/*
+ * XICS legacy routines - to deprecate one day
+ */
+
+static int ics_find_free_block(ICSState *ics, int num, int alignnum)
+{
+ int first, i;
+
+ for (first = 0; first < ics->nr_irqs; first += alignnum) {
+ if (num > (ics->nr_irqs - first)) {
+ return -1;
+ }
+ for (i = first; i < first + num; ++i) {
+ if (!ics_irq_free(ics, i)) {
+ break;
+ }
+ }
+ if (i == (first + num)) {
+ return first;
+ }
+ }
+
+ return -1;
+}
+
+int spapr_irq_find(SpaprMachineState *spapr, int num, bool align, Error **errp)
+{
+ ICSState *ics = spapr->ics;
+ int first = -1;
+
+ assert(ics);
+
+ /*
+ * MSIMesage::data is used for storing VIRQ so
+ * it has to be aligned to num to support multiple
+ * MSI vectors. MSI-X is not affected by this.
+ * The hint is used for the first IRQ, the rest should
+ * be allocated continuously.
+ */
+ if (align) {
+ assert((num == 1) || (num == 2) || (num == 4) ||
+ (num == 8) || (num == 16) || (num == 32));
+ first = ics_find_free_block(ics, num, num);
+ } else {
+ first = ics_find_free_block(ics, num, 1);
+ }
+
+ if (first < 0) {
+ error_setg(errp, "can't find a free %d-IRQ block", num);
+ return -1;
+ }
+
+ return first + ics->offset;
+}
+
+SpaprIrq spapr_irq_xics_legacy = {
+ .xics = true,
+ .xive = false,
+};
+
+static void spapr_irq_register_types(void)
+{
+ type_register_static(&spapr_intc_info);
+}
+
+type_init(spapr_irq_register_types)
diff --git a/hw/ppc/spapr_numa.c b/hw/ppc/spapr_numa.c
new file mode 100644
index 000000000..e9ef7e764
--- /dev/null
+++ b/hw/ppc/spapr_numa.c
@@ -0,0 +1,697 @@
+/*
+ * QEMU PowerPC pSeries Logical Partition NUMA associativity handling
+ *
+ * Copyright IBM Corp. 2020
+ *
+ * Authors:
+ * Daniel Henrique Barboza <danielhb413@gmail.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "hw/ppc/spapr_numa.h"
+#include "hw/pci-host/spapr.h"
+#include "hw/ppc/fdt.h"
+
+/* Moved from hw/ppc/spapr_pci_nvlink2.c */
+#define SPAPR_GPU_NUMA_ID (cpu_to_be32(1))
+
+/*
+ * Retrieves max_dist_ref_points of the current NUMA affinity.
+ */
+static int get_max_dist_ref_points(SpaprMachineState *spapr)
+{
+ if (spapr_ovec_test(spapr->ov5_cas, OV5_FORM2_AFFINITY)) {
+ return FORM2_DIST_REF_POINTS;
+ }
+
+ return FORM1_DIST_REF_POINTS;
+}
+
+/*
+ * Retrieves numa_assoc_size of the current NUMA affinity.
+ */
+static int get_numa_assoc_size(SpaprMachineState *spapr)
+{
+ if (spapr_ovec_test(spapr->ov5_cas, OV5_FORM2_AFFINITY)) {
+ return FORM2_NUMA_ASSOC_SIZE;
+ }
+
+ return FORM1_NUMA_ASSOC_SIZE;
+}
+
+/*
+ * Retrieves vcpu_assoc_size of the current NUMA affinity.
+ *
+ * vcpu_assoc_size is the size of ibm,associativity array
+ * for CPUs, which has an extra element (vcpu_id) in the end.
+ */
+static int get_vcpu_assoc_size(SpaprMachineState *spapr)
+{
+ return get_numa_assoc_size(spapr) + 1;
+}
+
+/*
+ * Retrieves the ibm,associativity array of NUMA node 'node_id'
+ * for the current NUMA affinity.
+ */
+static const uint32_t *get_associativity(SpaprMachineState *spapr, int node_id)
+{
+ if (spapr_ovec_test(spapr->ov5_cas, OV5_FORM2_AFFINITY)) {
+ return spapr->FORM2_assoc_array[node_id];
+ }
+ return spapr->FORM1_assoc_array[node_id];
+}
+
+/*
+ * Wrapper that returns node distance from ms->numa_state->nodes
+ * after handling edge cases where the distance might be absent.
+ */
+static int get_numa_distance(MachineState *ms, int src, int dst)
+{
+ NodeInfo *numa_info = ms->numa_state->nodes;
+ int ret = numa_info[src].distance[dst];
+
+ if (ret != 0) {
+ return ret;
+ }
+
+ /*
+ * In case QEMU adds a default NUMA single node when the user
+ * did not add any, or where the user did not supply distances,
+ * the distance will be absent (zero). Return local/remote
+ * distance in this case.
+ */
+ if (src == dst) {
+ return NUMA_DISTANCE_MIN;
+ }
+
+ return NUMA_DISTANCE_DEFAULT;
+}
+
+static bool spapr_numa_is_symmetrical(MachineState *ms)
+{
+ int nb_numa_nodes = ms->numa_state->num_nodes;
+ int src, dst;
+
+ for (src = 0; src < nb_numa_nodes; src++) {
+ for (dst = src; dst < nb_numa_nodes; dst++) {
+ if (get_numa_distance(ms, src, dst) !=
+ get_numa_distance(ms, dst, src)) {
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+/*
+ * NVLink2-connected GPU RAM needs to be placed on a separate NUMA node.
+ * We assign a new numa ID per GPU in spapr_pci_collect_nvgpu() which is
+ * called from vPHB reset handler so we initialize the counter here.
+ * If no NUMA is configured from the QEMU side, we start from 1 as GPU RAM
+ * must be equally distant from any other node.
+ * The final value of spapr->gpu_numa_id is going to be written to
+ * max-associativity-domains in spapr_build_fdt().
+ */
+unsigned int spapr_numa_initial_nvgpu_numa_id(MachineState *machine)
+{
+ return MAX(1, machine->numa_state->num_nodes);
+}
+
+/*
+ * This function will translate the user distances into
+ * what the kernel understand as possible values: 10
+ * (local distance), 20, 40, 80 and 160, and return the equivalent
+ * NUMA level for each. Current heuristic is:
+ * - local distance (10) returns numa_level = 0x4, meaning there is
+ * no rounding for local distance
+ * - distances between 11 and 30 inclusive -> rounded to 20,
+ * numa_level = 0x3
+ * - distances between 31 and 60 inclusive -> rounded to 40,
+ * numa_level = 0x2
+ * - distances between 61 and 120 inclusive -> rounded to 80,
+ * numa_level = 0x1
+ * - everything above 120 returns numa_level = 0 to indicate that
+ * there is no match. This will be calculated as disntace = 160
+ * by the kernel (as of v5.9)
+ */
+static uint8_t spapr_numa_get_numa_level(uint8_t distance)
+{
+ if (distance == 10) {
+ return 0x4;
+ } else if (distance > 11 && distance <= 30) {
+ return 0x3;
+ } else if (distance > 31 && distance <= 60) {
+ return 0x2;
+ } else if (distance > 61 && distance <= 120) {
+ return 0x1;
+ }
+
+ return 0;
+}
+
+static void spapr_numa_define_FORM1_domains(SpaprMachineState *spapr)
+{
+ MachineState *ms = MACHINE(spapr);
+ int nb_numa_nodes = ms->numa_state->num_nodes;
+ int src, dst, i, j;
+
+ /*
+ * Fill all associativity domains of non-zero NUMA nodes with
+ * node_id. This is required because the default value (0) is
+ * considered a match with associativity domains of node 0.
+ */
+ for (i = 1; i < nb_numa_nodes; i++) {
+ for (j = 1; j < FORM1_DIST_REF_POINTS; j++) {
+ spapr->FORM1_assoc_array[i][j] = cpu_to_be32(i);
+ }
+ }
+
+ for (src = 0; src < nb_numa_nodes; src++) {
+ for (dst = src; dst < nb_numa_nodes; dst++) {
+ /*
+ * This is how the associativity domain between A and B
+ * is calculated:
+ *
+ * - get the distance D between them
+ * - get the correspondent NUMA level 'n_level' for D
+ * - all associativity arrays were initialized with their own
+ * numa_ids, and we're calculating the distance in node_id
+ * ascending order, starting from node id 0 (the first node
+ * retrieved by numa_state). This will have a cascade effect in
+ * the algorithm because the associativity domains that node 0
+ * defines will be carried over to other nodes, and node 1
+ * associativities will be carried over after taking node 0
+ * associativities into account, and so on. This happens because
+ * we'll assign assoc_src as the associativity domain of dst
+ * as well, for all NUMA levels beyond and including n_level.
+ *
+ * The PPC kernel expects the associativity domains of node 0 to
+ * be always 0, and this algorithm will grant that by default.
+ */
+ uint8_t distance = get_numa_distance(ms, src, dst);
+ uint8_t n_level = spapr_numa_get_numa_level(distance);
+ uint32_t assoc_src;
+
+ /*
+ * n_level = 0 means that the distance is greater than our last
+ * rounded value (120). In this case there is no NUMA level match
+ * between src and dst and we can skip the remaining of the loop.
+ *
+ * The Linux kernel will assume that the distance between src and
+ * dst, in this case of no match, is 10 (local distance) doubled
+ * for each NUMA it didn't match. We have FORM1_DIST_REF_POINTS
+ * levels (4), so this gives us 10*2*2*2*2 = 160.
+ *
+ * This logic can be seen in the Linux kernel source code, as of
+ * v5.9, in arch/powerpc/mm/numa.c, function __node_distance().
+ */
+ if (n_level == 0) {
+ continue;
+ }
+
+ /*
+ * We must assign all assoc_src to dst, starting from n_level
+ * and going up to 0x1.
+ */
+ for (i = n_level; i > 0; i--) {
+ assoc_src = spapr->FORM1_assoc_array[src][i];
+ spapr->FORM1_assoc_array[dst][i] = assoc_src;
+ }
+ }
+ }
+
+}
+
+static void spapr_numa_FORM1_affinity_check(MachineState *machine)
+{
+ int i;
+
+ /*
+ * Check we don't have a memory-less/cpu-less NUMA node
+ * Firmware relies on the existing memory/cpu topology to provide the
+ * NUMA topology to the kernel.
+ * And the linux kernel needs to know the NUMA topology at start
+ * to be able to hotplug CPUs later.
+ */
+ if (machine->numa_state->num_nodes) {
+ for (i = 0; i < machine->numa_state->num_nodes; ++i) {
+ /* check for memory-less node */
+ if (machine->numa_state->nodes[i].node_mem == 0) {
+ CPUState *cs;
+ int found = 0;
+ /* check for cpu-less node */
+ CPU_FOREACH(cs) {
+ PowerPCCPU *cpu = POWERPC_CPU(cs);
+ if (cpu->node_id == i) {
+ found = 1;
+ break;
+ }
+ }
+ /* memory-less and cpu-less node */
+ if (!found) {
+ error_report(
+"Memory-less/cpu-less nodes are not supported with FORM1 NUMA (node %d)", i);
+ exit(EXIT_FAILURE);
+ }
+ }
+ }
+ }
+
+ if (!spapr_numa_is_symmetrical(machine)) {
+ error_report(
+"Asymmetrical NUMA topologies aren't supported in the pSeries machine using FORM1 NUMA");
+ exit(EXIT_FAILURE);
+ }
+}
+
+/*
+ * Set NUMA machine state data based on FORM1 affinity semantics.
+ */
+static void spapr_numa_FORM1_affinity_init(SpaprMachineState *spapr,
+ MachineState *machine)
+{
+ SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+ int nb_numa_nodes = machine->numa_state->num_nodes;
+ int i, j, max_nodes_with_gpus;
+
+ /*
+ * For all associativity arrays: first position is the size,
+ * position FORM1_DIST_REF_POINTS is always the numa_id,
+ * represented by the index 'i'.
+ *
+ * This will break on sparse NUMA setups, when/if QEMU starts
+ * to support it, because there will be no more guarantee that
+ * 'i' will be a valid node_id set by the user.
+ */
+ for (i = 0; i < nb_numa_nodes; i++) {
+ spapr->FORM1_assoc_array[i][0] = cpu_to_be32(FORM1_DIST_REF_POINTS);
+ spapr->FORM1_assoc_array[i][FORM1_DIST_REF_POINTS] = cpu_to_be32(i);
+ }
+
+ /*
+ * Initialize NVLink GPU associativity arrays. We know that
+ * the first GPU will take the first available NUMA id, and
+ * we'll have a maximum of NVGPU_MAX_NUM GPUs in the machine.
+ * At this point we're not sure if there are GPUs or not, but
+ * let's initialize the associativity arrays and allow NVLink
+ * GPUs to be handled like regular NUMA nodes later on.
+ */
+ max_nodes_with_gpus = nb_numa_nodes + NVGPU_MAX_NUM;
+
+ for (i = nb_numa_nodes; i < max_nodes_with_gpus; i++) {
+ spapr->FORM1_assoc_array[i][0] = cpu_to_be32(FORM1_DIST_REF_POINTS);
+
+ for (j = 1; j < FORM1_DIST_REF_POINTS; j++) {
+ uint32_t gpu_assoc = smc->pre_5_1_assoc_refpoints ?
+ SPAPR_GPU_NUMA_ID : cpu_to_be32(i);
+ spapr->FORM1_assoc_array[i][j] = gpu_assoc;
+ }
+
+ spapr->FORM1_assoc_array[i][FORM1_DIST_REF_POINTS] = cpu_to_be32(i);
+ }
+
+ /*
+ * Guests pseries-5.1 and older uses zeroed associativity domains,
+ * i.e. no domain definition based on NUMA distance input.
+ *
+ * Same thing with guests that have only one NUMA node.
+ */
+ if (smc->pre_5_2_numa_associativity ||
+ machine->numa_state->num_nodes <= 1) {
+ return;
+ }
+
+ spapr_numa_define_FORM1_domains(spapr);
+}
+
+/*
+ * Init NUMA FORM2 machine state data
+ */
+static void spapr_numa_FORM2_affinity_init(SpaprMachineState *spapr)
+{
+ int i;
+
+ /*
+ * For all resources but CPUs, FORM2 associativity arrays will
+ * be a size 2 array with the following format:
+ *
+ * ibm,associativity = {1, numa_id}
+ *
+ * CPUs will write an additional 'vcpu_id' on top of the arrays
+ * being initialized here. 'numa_id' is represented by the
+ * index 'i' of the loop.
+ *
+ * Given that this initialization is also valid for GPU associativity
+ * arrays, handle everything in one single step by populating the
+ * arrays up to NUMA_NODES_MAX_NUM.
+ */
+ for (i = 0; i < NUMA_NODES_MAX_NUM; i++) {
+ spapr->FORM2_assoc_array[i][0] = cpu_to_be32(1);
+ spapr->FORM2_assoc_array[i][1] = cpu_to_be32(i);
+ }
+}
+
+void spapr_numa_associativity_init(SpaprMachineState *spapr,
+ MachineState *machine)
+{
+ spapr_numa_FORM1_affinity_init(spapr, machine);
+ spapr_numa_FORM2_affinity_init(spapr);
+}
+
+void spapr_numa_associativity_check(SpaprMachineState *spapr)
+{
+ /*
+ * FORM2 does not have any restrictions we need to handle
+ * at CAS time, for now.
+ */
+ if (spapr_ovec_test(spapr->ov5_cas, OV5_FORM2_AFFINITY)) {
+ return;
+ }
+
+ spapr_numa_FORM1_affinity_check(MACHINE(spapr));
+}
+
+void spapr_numa_write_associativity_dt(SpaprMachineState *spapr, void *fdt,
+ int offset, int nodeid)
+{
+ const uint32_t *associativity = get_associativity(spapr, nodeid);
+
+ _FDT((fdt_setprop(fdt, offset, "ibm,associativity",
+ associativity,
+ get_numa_assoc_size(spapr) * sizeof(uint32_t))));
+}
+
+static uint32_t *spapr_numa_get_vcpu_assoc(SpaprMachineState *spapr,
+ PowerPCCPU *cpu)
+{
+ const uint32_t *associativity = get_associativity(spapr, cpu->node_id);
+ int max_distance_ref_points = get_max_dist_ref_points(spapr);
+ int vcpu_assoc_size = get_vcpu_assoc_size(spapr);
+ uint32_t *vcpu_assoc = g_new(uint32_t, vcpu_assoc_size);
+ int index = spapr_get_vcpu_id(cpu);
+
+ /*
+ * VCPUs have an extra 'cpu_id' value in ibm,associativity
+ * compared to other resources. Increment the size at index
+ * 0, put cpu_id last, then copy the remaining associativity
+ * domains.
+ */
+ vcpu_assoc[0] = cpu_to_be32(max_distance_ref_points + 1);
+ vcpu_assoc[vcpu_assoc_size - 1] = cpu_to_be32(index);
+ memcpy(vcpu_assoc + 1, associativity + 1,
+ (vcpu_assoc_size - 2) * sizeof(uint32_t));
+
+ return vcpu_assoc;
+}
+
+int spapr_numa_fixup_cpu_dt(SpaprMachineState *spapr, void *fdt,
+ int offset, PowerPCCPU *cpu)
+{
+ g_autofree uint32_t *vcpu_assoc = NULL;
+ int vcpu_assoc_size = get_vcpu_assoc_size(spapr);
+
+ vcpu_assoc = spapr_numa_get_vcpu_assoc(spapr, cpu);
+
+ /* Advertise NUMA via ibm,associativity */
+ return fdt_setprop(fdt, offset, "ibm,associativity", vcpu_assoc,
+ vcpu_assoc_size * sizeof(uint32_t));
+}
+
+
+int spapr_numa_write_assoc_lookup_arrays(SpaprMachineState *spapr, void *fdt,
+ int offset)
+{
+ MachineState *machine = MACHINE(spapr);
+ int max_distance_ref_points = get_max_dist_ref_points(spapr);
+ int nb_numa_nodes = machine->numa_state->num_nodes;
+ int nr_nodes = nb_numa_nodes ? nb_numa_nodes : 1;
+ uint32_t *int_buf, *cur_index, buf_len;
+ int ret, i;
+
+ /* ibm,associativity-lookup-arrays */
+ buf_len = (nr_nodes * max_distance_ref_points + 2) * sizeof(uint32_t);
+ cur_index = int_buf = g_malloc0(buf_len);
+ int_buf[0] = cpu_to_be32(nr_nodes);
+ /* Number of entries per associativity list */
+ int_buf[1] = cpu_to_be32(max_distance_ref_points);
+ cur_index += 2;
+ for (i = 0; i < nr_nodes; i++) {
+ /*
+ * For the lookup-array we use the ibm,associativity array of the
+ * current NUMA affinity, without the first element (size).
+ */
+ const uint32_t *associativity = get_associativity(spapr, i);
+ memcpy(cur_index, ++associativity,
+ sizeof(uint32_t) * max_distance_ref_points);
+ cur_index += max_distance_ref_points;
+ }
+ ret = fdt_setprop(fdt, offset, "ibm,associativity-lookup-arrays", int_buf,
+ (cur_index - int_buf) * sizeof(uint32_t));
+ g_free(int_buf);
+
+ return ret;
+}
+
+static void spapr_numa_FORM1_write_rtas_dt(SpaprMachineState *spapr,
+ void *fdt, int rtas)
+{
+ MachineState *ms = MACHINE(spapr);
+ SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+ uint32_t number_nvgpus_nodes = spapr->gpu_numa_id -
+ spapr_numa_initial_nvgpu_numa_id(ms);
+ uint32_t refpoints[] = {
+ cpu_to_be32(0x4),
+ cpu_to_be32(0x3),
+ cpu_to_be32(0x2),
+ cpu_to_be32(0x1),
+ };
+ uint32_t nr_refpoints = ARRAY_SIZE(refpoints);
+ uint32_t maxdomain = ms->numa_state->num_nodes + number_nvgpus_nodes;
+ uint32_t maxdomains[] = {
+ cpu_to_be32(4),
+ cpu_to_be32(maxdomain),
+ cpu_to_be32(maxdomain),
+ cpu_to_be32(maxdomain),
+ cpu_to_be32(maxdomain)
+ };
+
+ if (smc->pre_5_2_numa_associativity ||
+ ms->numa_state->num_nodes <= 1) {
+ uint32_t legacy_refpoints[] = {
+ cpu_to_be32(0x4),
+ cpu_to_be32(0x4),
+ cpu_to_be32(0x2),
+ };
+ uint32_t legacy_maxdomain = spapr->gpu_numa_id > 1 ? 1 : 0;
+ uint32_t legacy_maxdomains[] = {
+ cpu_to_be32(4),
+ cpu_to_be32(legacy_maxdomain),
+ cpu_to_be32(legacy_maxdomain),
+ cpu_to_be32(legacy_maxdomain),
+ cpu_to_be32(spapr->gpu_numa_id),
+ };
+
+ G_STATIC_ASSERT(sizeof(legacy_refpoints) <= sizeof(refpoints));
+ G_STATIC_ASSERT(sizeof(legacy_maxdomains) <= sizeof(maxdomains));
+
+ nr_refpoints = 3;
+
+ memcpy(refpoints, legacy_refpoints, sizeof(legacy_refpoints));
+ memcpy(maxdomains, legacy_maxdomains, sizeof(legacy_maxdomains));
+
+ /* pseries-5.0 and older reference-points array is {0x4, 0x4} */
+ if (smc->pre_5_1_assoc_refpoints) {
+ nr_refpoints = 2;
+ }
+ }
+
+ _FDT(fdt_setprop(fdt, rtas, "ibm,associativity-reference-points",
+ refpoints, nr_refpoints * sizeof(refpoints[0])));
+
+ _FDT(fdt_setprop(fdt, rtas, "ibm,max-associativity-domains",
+ maxdomains, sizeof(maxdomains)));
+}
+
+static void spapr_numa_FORM2_write_rtas_tables(SpaprMachineState *spapr,
+ void *fdt, int rtas)
+{
+ MachineState *ms = MACHINE(spapr);
+ int nb_numa_nodes = ms->numa_state->num_nodes;
+ int distance_table_entries = nb_numa_nodes * nb_numa_nodes;
+ g_autofree uint32_t *lookup_index_table = NULL;
+ g_autofree uint8_t *distance_table = NULL;
+ int src, dst, i, distance_table_size;
+
+ /*
+ * ibm,numa-lookup-index-table: array with length and a
+ * list of NUMA ids present in the guest.
+ */
+ lookup_index_table = g_new0(uint32_t, nb_numa_nodes + 1);
+ lookup_index_table[0] = cpu_to_be32(nb_numa_nodes);
+
+ for (i = 0; i < nb_numa_nodes; i++) {
+ lookup_index_table[i + 1] = cpu_to_be32(i);
+ }
+
+ _FDT(fdt_setprop(fdt, rtas, "ibm,numa-lookup-index-table",
+ lookup_index_table,
+ (nb_numa_nodes + 1) * sizeof(uint32_t)));
+
+ /*
+ * ibm,numa-distance-table: contains all node distances. First
+ * element is the size of the table as uint32, followed up
+ * by all the uint8 distances from the first NUMA node, then all
+ * distances from the second NUMA node and so on.
+ *
+ * ibm,numa-lookup-index-table is used by guest to navigate this
+ * array because NUMA ids can be sparse (node 0 is the first,
+ * node 8 is the second ...).
+ */
+ distance_table_size = distance_table_entries * sizeof(uint8_t) +
+ sizeof(uint32_t);
+ distance_table = g_new0(uint8_t, distance_table_size);
+ stl_be_p(distance_table, distance_table_entries);
+
+ /* Skip the uint32_t array length at the start */
+ i = sizeof(uint32_t);
+
+ for (src = 0; src < nb_numa_nodes; src++) {
+ for (dst = 0; dst < nb_numa_nodes; dst++) {
+ distance_table[i++] = get_numa_distance(ms, src, dst);
+ }
+ }
+
+ _FDT(fdt_setprop(fdt, rtas, "ibm,numa-distance-table",
+ distance_table, distance_table_size));
+}
+
+/*
+ * This helper could be compressed in a single function with
+ * FORM1 logic since we're setting the same DT values, with the
+ * difference being a call to spapr_numa_FORM2_write_rtas_tables()
+ * in the end. The separation was made to avoid clogging FORM1 code
+ * which already has to deal with compat modes from previous
+ * QEMU machine types.
+ */
+static void spapr_numa_FORM2_write_rtas_dt(SpaprMachineState *spapr,
+ void *fdt, int rtas)
+{
+ MachineState *ms = MACHINE(spapr);
+ uint32_t number_nvgpus_nodes = spapr->gpu_numa_id -
+ spapr_numa_initial_nvgpu_numa_id(ms);
+
+ /*
+ * In FORM2, ibm,associativity-reference-points will point to
+ * the element in the ibm,associativity array that contains the
+ * primary domain index (for FORM2, the first element).
+ *
+ * This value (in our case, the numa-id) is then used as an index
+ * to retrieve all other attributes of the node (distance,
+ * bandwidth, latency) via ibm,numa-lookup-index-table and other
+ * ibm,numa-*-table properties.
+ */
+ uint32_t refpoints[] = { cpu_to_be32(1) };
+
+ uint32_t maxdomain = ms->numa_state->num_nodes + number_nvgpus_nodes;
+ uint32_t maxdomains[] = { cpu_to_be32(1), cpu_to_be32(maxdomain) };
+
+ _FDT(fdt_setprop(fdt, rtas, "ibm,associativity-reference-points",
+ refpoints, sizeof(refpoints)));
+
+ _FDT(fdt_setprop(fdt, rtas, "ibm,max-associativity-domains",
+ maxdomains, sizeof(maxdomains)));
+
+ spapr_numa_FORM2_write_rtas_tables(spapr, fdt, rtas);
+}
+
+/*
+ * Helper that writes ibm,associativity-reference-points and
+ * max-associativity-domains in the RTAS pointed by @rtas
+ * in the DT @fdt.
+ */
+void spapr_numa_write_rtas_dt(SpaprMachineState *spapr, void *fdt, int rtas)
+{
+ if (spapr_ovec_test(spapr->ov5_cas, OV5_FORM2_AFFINITY)) {
+ spapr_numa_FORM2_write_rtas_dt(spapr, fdt, rtas);
+ return;
+ }
+
+ spapr_numa_FORM1_write_rtas_dt(spapr, fdt, rtas);
+}
+
+static target_ulong h_home_node_associativity(PowerPCCPU *cpu,
+ SpaprMachineState *spapr,
+ target_ulong opcode,
+ target_ulong *args)
+{
+ g_autofree uint32_t *vcpu_assoc = NULL;
+ target_ulong flags = args[0];
+ target_ulong procno = args[1];
+ PowerPCCPU *tcpu;
+ int idx, assoc_idx;
+ int vcpu_assoc_size = get_vcpu_assoc_size(spapr);
+
+ /* only support procno from H_REGISTER_VPA */
+ if (flags != 0x1) {
+ return H_FUNCTION;
+ }
+
+ tcpu = spapr_find_cpu(procno);
+ if (tcpu == NULL) {
+ return H_P2;
+ }
+
+ /*
+ * Given that we want to be flexible with the sizes and indexes,
+ * we must consider that there is a hard limit of how many
+ * associativities domain we can fit in R4 up to R9, which would be
+ * 12 associativity domains for vcpus. Assert and bail if that's
+ * not the case.
+ */
+ g_assert((vcpu_assoc_size - 1) <= 12);
+
+ vcpu_assoc = spapr_numa_get_vcpu_assoc(spapr, tcpu);
+ /* assoc_idx starts at 1 to skip associativity size */
+ assoc_idx = 1;
+
+#define ASSOCIATIVITY(a, b) (((uint64_t)(a) << 32) | \
+ ((uint64_t)(b) & 0xffffffff))
+
+ for (idx = 0; idx < 6; idx++) {
+ int32_t a, b;
+
+ /*
+ * vcpu_assoc[] will contain the associativity domains for tcpu,
+ * including tcpu->node_id and procno, meaning that we don't
+ * need to use these variables here.
+ *
+ * We'll read 2 values at a time to fill up the ASSOCIATIVITY()
+ * macro. The ternary will fill the remaining registers with -1
+ * after we went through vcpu_assoc[].
+ */
+ a = assoc_idx < vcpu_assoc_size ?
+ be32_to_cpu(vcpu_assoc[assoc_idx++]) : -1;
+ b = assoc_idx < vcpu_assoc_size ?
+ be32_to_cpu(vcpu_assoc[assoc_idx++]) : -1;
+
+ args[idx] = ASSOCIATIVITY(a, b);
+ }
+#undef ASSOCIATIVITY
+
+ return H_SUCCESS;
+}
+
+static void spapr_numa_register_types(void)
+{
+ /* Virtual Processor Home Node */
+ spapr_register_hypercall(H_HOME_NODE_ASSOCIATIVITY,
+ h_home_node_associativity);
+}
+
+type_init(spapr_numa_register_types)
diff --git a/hw/ppc/spapr_nvdimm.c b/hw/ppc/spapr_nvdimm.c
new file mode 100644
index 000000000..91de1052f
--- /dev/null
+++ b/hw/ppc/spapr_nvdimm.c
@@ -0,0 +1,528 @@
+/*
+ * QEMU PAPR Storage Class Memory Interfaces
+ *
+ * Copyright (c) 2019-2020, IBM Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "hw/ppc/spapr_drc.h"
+#include "hw/ppc/spapr_nvdimm.h"
+#include "hw/mem/nvdimm.h"
+#include "qemu/nvdimm-utils.h"
+#include "hw/ppc/fdt.h"
+#include "qemu/range.h"
+#include "hw/ppc/spapr_numa.h"
+
+/* DIMM health bitmap bitmap indicators. Taken from kernel's papr_scm.c */
+/* SCM device is unable to persist memory contents */
+#define PAPR_PMEM_UNARMED PPC_BIT(0)
+
+/*
+ * The nvdimm size should be aligned to SCM block size.
+ * The SCM block size should be aligned to SPAPR_MEMORY_BLOCK_SIZE
+ * in order to have SCM regions not to overlap with dimm memory regions.
+ * The SCM devices can have variable block sizes. For now, fixing the
+ * block size to the minimum value.
+ */
+#define SPAPR_MINIMUM_SCM_BLOCK_SIZE SPAPR_MEMORY_BLOCK_SIZE
+
+/* Have an explicit check for alignment */
+QEMU_BUILD_BUG_ON(SPAPR_MINIMUM_SCM_BLOCK_SIZE % SPAPR_MEMORY_BLOCK_SIZE);
+
+bool spapr_nvdimm_validate(HotplugHandler *hotplug_dev, NVDIMMDevice *nvdimm,
+ uint64_t size, Error **errp)
+{
+ const MachineClass *mc = MACHINE_GET_CLASS(hotplug_dev);
+ const MachineState *ms = MACHINE(hotplug_dev);
+ g_autofree char *uuidstr = NULL;
+ QemuUUID uuid;
+ int ret;
+
+ if (!mc->nvdimm_supported) {
+ error_setg(errp, "NVDIMM hotplug not supported for this machine");
+ return false;
+ }
+
+ if (!ms->nvdimms_state->is_enabled) {
+ error_setg(errp, "nvdimm device found but 'nvdimm=off' was set");
+ return false;
+ }
+
+ if (object_property_get_int(OBJECT(nvdimm), NVDIMM_LABEL_SIZE_PROP,
+ &error_abort) == 0) {
+ error_setg(errp, "PAPR requires NVDIMM devices to have label-size set");
+ return false;
+ }
+
+ if (size % SPAPR_MINIMUM_SCM_BLOCK_SIZE) {
+ error_setg(errp, "PAPR requires NVDIMM memory size (excluding label)"
+ " to be a multiple of %" PRIu64 "MB",
+ SPAPR_MINIMUM_SCM_BLOCK_SIZE / MiB);
+ return false;
+ }
+
+ uuidstr = object_property_get_str(OBJECT(nvdimm), NVDIMM_UUID_PROP,
+ &error_abort);
+ ret = qemu_uuid_parse(uuidstr, &uuid);
+ g_assert(!ret);
+
+ if (qemu_uuid_is_null(&uuid)) {
+ error_setg(errp, "NVDIMM device requires the uuid to be set");
+ return false;
+ }
+
+ return true;
+}
+
+
+void spapr_add_nvdimm(DeviceState *dev, uint64_t slot)
+{
+ SpaprDrc *drc;
+ bool hotplugged = spapr_drc_hotplugged(dev);
+
+ drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PMEM, slot);
+ g_assert(drc);
+
+ /*
+ * pc_dimm_get_free_slot() provided a free slot at pre-plug. The
+ * corresponding DRC is thus assumed to be attachable.
+ */
+ spapr_drc_attach(drc, dev);
+
+ if (hotplugged) {
+ spapr_hotplug_req_add_by_index(drc);
+ }
+}
+
+static int spapr_dt_nvdimm(SpaprMachineState *spapr, void *fdt,
+ int parent_offset, NVDIMMDevice *nvdimm)
+{
+ int child_offset;
+ char *buf;
+ SpaprDrc *drc;
+ uint32_t drc_idx;
+ uint32_t node = object_property_get_uint(OBJECT(nvdimm), PC_DIMM_NODE_PROP,
+ &error_abort);
+ uint64_t slot = object_property_get_uint(OBJECT(nvdimm), PC_DIMM_SLOT_PROP,
+ &error_abort);
+ uint64_t lsize = nvdimm->label_size;
+ uint64_t size = object_property_get_int(OBJECT(nvdimm), PC_DIMM_SIZE_PROP,
+ NULL);
+
+ drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PMEM, slot);
+ g_assert(drc);
+
+ drc_idx = spapr_drc_index(drc);
+
+ buf = g_strdup_printf("ibm,pmemory@%x", drc_idx);
+ child_offset = fdt_add_subnode(fdt, parent_offset, buf);
+ g_free(buf);
+
+ _FDT(child_offset);
+
+ _FDT((fdt_setprop_cell(fdt, child_offset, "reg", drc_idx)));
+ _FDT((fdt_setprop_string(fdt, child_offset, "compatible", "ibm,pmemory")));
+ _FDT((fdt_setprop_string(fdt, child_offset, "device_type", "ibm,pmemory")));
+
+ spapr_numa_write_associativity_dt(spapr, fdt, child_offset, node);
+
+ buf = qemu_uuid_unparse_strdup(&nvdimm->uuid);
+ _FDT((fdt_setprop_string(fdt, child_offset, "ibm,unit-guid", buf)));
+ g_free(buf);
+
+ _FDT((fdt_setprop_cell(fdt, child_offset, "ibm,my-drc-index", drc_idx)));
+
+ _FDT((fdt_setprop_u64(fdt, child_offset, "ibm,block-size",
+ SPAPR_MINIMUM_SCM_BLOCK_SIZE)));
+ _FDT((fdt_setprop_u64(fdt, child_offset, "ibm,number-of-blocks",
+ size / SPAPR_MINIMUM_SCM_BLOCK_SIZE)));
+ _FDT((fdt_setprop_cell(fdt, child_offset, "ibm,metadata-size", lsize)));
+
+ _FDT((fdt_setprop_string(fdt, child_offset, "ibm,pmem-application",
+ "operating-system")));
+ _FDT(fdt_setprop(fdt, child_offset, "ibm,cache-flush-required", NULL, 0));
+
+ return child_offset;
+}
+
+int spapr_pmem_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr,
+ void *fdt, int *fdt_start_offset, Error **errp)
+{
+ NVDIMMDevice *nvdimm = NVDIMM(drc->dev);
+
+ *fdt_start_offset = spapr_dt_nvdimm(spapr, fdt, 0, nvdimm);
+
+ return 0;
+}
+
+void spapr_dt_persistent_memory(SpaprMachineState *spapr, void *fdt)
+{
+ int offset = fdt_subnode_offset(fdt, 0, "ibm,persistent-memory");
+ GSList *iter, *nvdimms = nvdimm_get_device_list();
+
+ if (offset < 0) {
+ offset = fdt_add_subnode(fdt, 0, "ibm,persistent-memory");
+ _FDT(offset);
+ _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 0x1)));
+ _FDT((fdt_setprop_cell(fdt, offset, "#size-cells", 0x0)));
+ _FDT((fdt_setprop_string(fdt, offset, "device_type",
+ "ibm,persistent-memory")));
+ }
+
+ /* Create DT entries for cold plugged NVDIMM devices */
+ for (iter = nvdimms; iter; iter = iter->next) {
+ NVDIMMDevice *nvdimm = iter->data;
+
+ spapr_dt_nvdimm(spapr, fdt, offset, nvdimm);
+ }
+ g_slist_free(nvdimms);
+
+ return;
+}
+
+static target_ulong h_scm_read_metadata(PowerPCCPU *cpu,
+ SpaprMachineState *spapr,
+ target_ulong opcode,
+ target_ulong *args)
+{
+ uint32_t drc_index = args[0];
+ uint64_t offset = args[1];
+ uint64_t len = args[2];
+ SpaprDrc *drc = spapr_drc_by_index(drc_index);
+ NVDIMMDevice *nvdimm;
+ NVDIMMClass *ddc;
+ uint64_t data = 0;
+ uint8_t buf[8] = { 0 };
+
+ if (!drc || !drc->dev ||
+ spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) {
+ return H_PARAMETER;
+ }
+
+ if (len != 1 && len != 2 &&
+ len != 4 && len != 8) {
+ return H_P3;
+ }
+
+ nvdimm = NVDIMM(drc->dev);
+ if ((offset + len < offset) ||
+ (nvdimm->label_size < len + offset)) {
+ return H_P2;
+ }
+
+ ddc = NVDIMM_GET_CLASS(nvdimm);
+ ddc->read_label_data(nvdimm, buf, len, offset);
+
+ switch (len) {
+ case 1:
+ data = ldub_p(buf);
+ break;
+ case 2:
+ data = lduw_be_p(buf);
+ break;
+ case 4:
+ data = ldl_be_p(buf);
+ break;
+ case 8:
+ data = ldq_be_p(buf);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ args[0] = data;
+
+ return H_SUCCESS;
+}
+
+static target_ulong h_scm_write_metadata(PowerPCCPU *cpu,
+ SpaprMachineState *spapr,
+ target_ulong opcode,
+ target_ulong *args)
+{
+ uint32_t drc_index = args[0];
+ uint64_t offset = args[1];
+ uint64_t data = args[2];
+ uint64_t len = args[3];
+ SpaprDrc *drc = spapr_drc_by_index(drc_index);
+ NVDIMMDevice *nvdimm;
+ NVDIMMClass *ddc;
+ uint8_t buf[8] = { 0 };
+
+ if (!drc || !drc->dev ||
+ spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) {
+ return H_PARAMETER;
+ }
+
+ if (len != 1 && len != 2 &&
+ len != 4 && len != 8) {
+ return H_P4;
+ }
+
+ nvdimm = NVDIMM(drc->dev);
+ if ((offset + len < offset) ||
+ (nvdimm->label_size < len + offset)) {
+ return H_P2;
+ }
+
+ switch (len) {
+ case 1:
+ if (data & 0xffffffffffffff00) {
+ return H_P2;
+ }
+ stb_p(buf, data);
+ break;
+ case 2:
+ if (data & 0xffffffffffff0000) {
+ return H_P2;
+ }
+ stw_be_p(buf, data);
+ break;
+ case 4:
+ if (data & 0xffffffff00000000) {
+ return H_P2;
+ }
+ stl_be_p(buf, data);
+ break;
+ case 8:
+ stq_be_p(buf, data);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ ddc = NVDIMM_GET_CLASS(nvdimm);
+ ddc->write_label_data(nvdimm, buf, len, offset);
+
+ return H_SUCCESS;
+}
+
+static target_ulong h_scm_bind_mem(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ target_ulong opcode, target_ulong *args)
+{
+ uint32_t drc_index = args[0];
+ uint64_t starting_idx = args[1];
+ uint64_t no_of_scm_blocks_to_bind = args[2];
+ uint64_t target_logical_mem_addr = args[3];
+ uint64_t continue_token = args[4];
+ uint64_t size;
+ uint64_t total_no_of_scm_blocks;
+ SpaprDrc *drc = spapr_drc_by_index(drc_index);
+ hwaddr addr;
+ NVDIMMDevice *nvdimm;
+
+ if (!drc || !drc->dev ||
+ spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) {
+ return H_PARAMETER;
+ }
+
+ /*
+ * Currently continue token should be zero qemu has already bound
+ * everything and this hcall doesnt return H_BUSY.
+ */
+ if (continue_token > 0) {
+ return H_P5;
+ }
+
+ /* Currently qemu assigns the address. */
+ if (target_logical_mem_addr != 0xffffffffffffffff) {
+ return H_OVERLAP;
+ }
+
+ nvdimm = NVDIMM(drc->dev);
+
+ size = object_property_get_uint(OBJECT(nvdimm),
+ PC_DIMM_SIZE_PROP, &error_abort);
+
+ total_no_of_scm_blocks = size / SPAPR_MINIMUM_SCM_BLOCK_SIZE;
+
+ if (starting_idx > total_no_of_scm_blocks) {
+ return H_P2;
+ }
+
+ if (((starting_idx + no_of_scm_blocks_to_bind) < starting_idx) ||
+ ((starting_idx + no_of_scm_blocks_to_bind) > total_no_of_scm_blocks)) {
+ return H_P3;
+ }
+
+ addr = object_property_get_uint(OBJECT(nvdimm),
+ PC_DIMM_ADDR_PROP, &error_abort);
+
+ addr += starting_idx * SPAPR_MINIMUM_SCM_BLOCK_SIZE;
+
+ /* Already bound, Return target logical address in R5 */
+ args[1] = addr;
+ args[2] = no_of_scm_blocks_to_bind;
+
+ return H_SUCCESS;
+}
+
+static target_ulong h_scm_unbind_mem(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ target_ulong opcode, target_ulong *args)
+{
+ uint32_t drc_index = args[0];
+ uint64_t starting_scm_logical_addr = args[1];
+ uint64_t no_of_scm_blocks_to_unbind = args[2];
+ uint64_t continue_token = args[3];
+ uint64_t size_to_unbind;
+ Range blockrange = range_empty;
+ Range nvdimmrange = range_empty;
+ SpaprDrc *drc = spapr_drc_by_index(drc_index);
+ NVDIMMDevice *nvdimm;
+ uint64_t size, addr;
+
+ if (!drc || !drc->dev ||
+ spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) {
+ return H_PARAMETER;
+ }
+
+ /* continue_token should be zero as this hcall doesn't return H_BUSY. */
+ if (continue_token > 0) {
+ return H_P4;
+ }
+
+ /* Check if starting_scm_logical_addr is block aligned */
+ if (!QEMU_IS_ALIGNED(starting_scm_logical_addr,
+ SPAPR_MINIMUM_SCM_BLOCK_SIZE)) {
+ return H_P2;
+ }
+
+ size_to_unbind = no_of_scm_blocks_to_unbind * SPAPR_MINIMUM_SCM_BLOCK_SIZE;
+ if (no_of_scm_blocks_to_unbind == 0 || no_of_scm_blocks_to_unbind !=
+ size_to_unbind / SPAPR_MINIMUM_SCM_BLOCK_SIZE) {
+ return H_P3;
+ }
+
+ nvdimm = NVDIMM(drc->dev);
+ size = object_property_get_int(OBJECT(nvdimm), PC_DIMM_SIZE_PROP,
+ &error_abort);
+ addr = object_property_get_int(OBJECT(nvdimm), PC_DIMM_ADDR_PROP,
+ &error_abort);
+
+ range_init_nofail(&nvdimmrange, addr, size);
+ range_init_nofail(&blockrange, starting_scm_logical_addr, size_to_unbind);
+
+ if (!range_contains_range(&nvdimmrange, &blockrange)) {
+ return H_P3;
+ }
+
+ args[1] = no_of_scm_blocks_to_unbind;
+
+ /* let unplug take care of actual unbind */
+ return H_SUCCESS;
+}
+
+#define H_UNBIND_SCOPE_ALL 0x1
+#define H_UNBIND_SCOPE_DRC 0x2
+
+static target_ulong h_scm_unbind_all(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ target_ulong opcode, target_ulong *args)
+{
+ uint64_t target_scope = args[0];
+ uint32_t drc_index = args[1];
+ uint64_t continue_token = args[2];
+ NVDIMMDevice *nvdimm;
+ uint64_t size;
+ uint64_t no_of_scm_blocks_unbound = 0;
+
+ /* continue_token should be zero as this hcall doesn't return H_BUSY. */
+ if (continue_token > 0) {
+ return H_P4;
+ }
+
+ if (target_scope == H_UNBIND_SCOPE_DRC) {
+ SpaprDrc *drc = spapr_drc_by_index(drc_index);
+
+ if (!drc || !drc->dev ||
+ spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) {
+ return H_P2;
+ }
+
+ nvdimm = NVDIMM(drc->dev);
+ size = object_property_get_int(OBJECT(nvdimm), PC_DIMM_SIZE_PROP,
+ &error_abort);
+
+ no_of_scm_blocks_unbound = size / SPAPR_MINIMUM_SCM_BLOCK_SIZE;
+ } else if (target_scope == H_UNBIND_SCOPE_ALL) {
+ GSList *list, *nvdimms;
+
+ nvdimms = nvdimm_get_device_list();
+ for (list = nvdimms; list; list = list->next) {
+ nvdimm = list->data;
+ size = object_property_get_int(OBJECT(nvdimm), PC_DIMM_SIZE_PROP,
+ &error_abort);
+
+ no_of_scm_blocks_unbound += size / SPAPR_MINIMUM_SCM_BLOCK_SIZE;
+ }
+ g_slist_free(nvdimms);
+ } else {
+ return H_PARAMETER;
+ }
+
+ args[1] = no_of_scm_blocks_unbound;
+
+ /* let unplug take care of actual unbind */
+ return H_SUCCESS;
+}
+
+static target_ulong h_scm_health(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ target_ulong opcode, target_ulong *args)
+{
+
+ NVDIMMDevice *nvdimm;
+ uint64_t hbitmap = 0;
+ uint32_t drc_index = args[0];
+ SpaprDrc *drc = spapr_drc_by_index(drc_index);
+ const uint64_t hbitmap_mask = PAPR_PMEM_UNARMED;
+
+
+ /* Ensure that the drc is valid & is valid PMEM dimm and is plugged in */
+ if (!drc || !drc->dev ||
+ spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) {
+ return H_PARAMETER;
+ }
+
+ nvdimm = NVDIMM(drc->dev);
+
+ /* Update if the nvdimm is unarmed and send its status via health bitmaps */
+ if (object_property_get_bool(OBJECT(nvdimm), NVDIMM_UNARMED_PROP, NULL)) {
+ hbitmap |= PAPR_PMEM_UNARMED;
+ }
+
+ /* Update the out args with health bitmap/mask */
+ args[0] = hbitmap;
+ args[1] = hbitmap_mask;
+
+ return H_SUCCESS;
+}
+
+static void spapr_scm_register_types(void)
+{
+ /* qemu/scm specific hcalls */
+ spapr_register_hypercall(H_SCM_READ_METADATA, h_scm_read_metadata);
+ spapr_register_hypercall(H_SCM_WRITE_METADATA, h_scm_write_metadata);
+ spapr_register_hypercall(H_SCM_BIND_MEM, h_scm_bind_mem);
+ spapr_register_hypercall(H_SCM_UNBIND_MEM, h_scm_unbind_mem);
+ spapr_register_hypercall(H_SCM_UNBIND_ALL, h_scm_unbind_all);
+ spapr_register_hypercall(H_SCM_HEALTH, h_scm_health);
+}
+
+type_init(spapr_scm_register_types)
diff --git a/hw/ppc/spapr_ovec.c b/hw/ppc/spapr_ovec.c
new file mode 100644
index 000000000..b2567caa5
--- /dev/null
+++ b/hw/ppc/spapr_ovec.c
@@ -0,0 +1,241 @@
+/*
+ * QEMU SPAPR Architecture Option Vector Helper Functions
+ *
+ * Copyright IBM Corp. 2016
+ *
+ * Authors:
+ * Bharata B Rao <bharata@linux.vnet.ibm.com>
+ * Michael Roth <mdroth@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/ppc/spapr_ovec.h"
+#include "migration/vmstate.h"
+#include "qemu/bitmap.h"
+#include "exec/address-spaces.h"
+#include "qemu/error-report.h"
+#include "trace.h"
+#include <libfdt.h>
+
+#define OV_MAXBYTES 256 /* not including length byte */
+#define OV_MAXBITS (OV_MAXBYTES * BITS_PER_BYTE)
+
+/* we *could* work with bitmaps directly, but handling the bitmap privately
+ * allows us to more safely make assumptions about the bitmap size and
+ * simplify the calling code somewhat
+ */
+struct SpaprOptionVector {
+ unsigned long *bitmap;
+ int32_t bitmap_size; /* only used for migration */
+};
+
+const VMStateDescription vmstate_spapr_ovec = {
+ .name = "spapr_option_vector",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_BITMAP(bitmap, SpaprOptionVector, 1, bitmap_size),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+SpaprOptionVector *spapr_ovec_new(void)
+{
+ SpaprOptionVector *ov;
+
+ ov = g_new0(SpaprOptionVector, 1);
+ ov->bitmap = bitmap_new(OV_MAXBITS);
+ ov->bitmap_size = OV_MAXBITS;
+
+ return ov;
+}
+
+SpaprOptionVector *spapr_ovec_clone(SpaprOptionVector *ov_orig)
+{
+ SpaprOptionVector *ov;
+
+ g_assert(ov_orig);
+
+ ov = spapr_ovec_new();
+ bitmap_copy(ov->bitmap, ov_orig->bitmap, OV_MAXBITS);
+
+ return ov;
+}
+
+void spapr_ovec_intersect(SpaprOptionVector *ov,
+ SpaprOptionVector *ov1,
+ SpaprOptionVector *ov2)
+{
+ g_assert(ov);
+ g_assert(ov1);
+ g_assert(ov2);
+
+ bitmap_and(ov->bitmap, ov1->bitmap, ov2->bitmap, OV_MAXBITS);
+}
+
+/* returns true if ov1 has a subset of bits in ov2 */
+bool spapr_ovec_subset(SpaprOptionVector *ov1, SpaprOptionVector *ov2)
+{
+ unsigned long *tmp = bitmap_new(OV_MAXBITS);
+ bool result;
+
+ g_assert(ov1);
+ g_assert(ov2);
+
+ bitmap_andnot(tmp, ov1->bitmap, ov2->bitmap, OV_MAXBITS);
+ result = bitmap_empty(tmp, OV_MAXBITS);
+
+ g_free(tmp);
+
+ return result;
+}
+
+void spapr_ovec_cleanup(SpaprOptionVector *ov)
+{
+ if (ov) {
+ g_free(ov->bitmap);
+ g_free(ov);
+ }
+}
+
+void spapr_ovec_set(SpaprOptionVector *ov, long bitnr)
+{
+ g_assert(ov);
+ g_assert(bitnr < OV_MAXBITS);
+
+ set_bit(bitnr, ov->bitmap);
+}
+
+void spapr_ovec_clear(SpaprOptionVector *ov, long bitnr)
+{
+ g_assert(ov);
+ g_assert(bitnr < OV_MAXBITS);
+
+ clear_bit(bitnr, ov->bitmap);
+}
+
+bool spapr_ovec_test(SpaprOptionVector *ov, long bitnr)
+{
+ g_assert(ov);
+ g_assert(bitnr < OV_MAXBITS);
+
+ return test_bit(bitnr, ov->bitmap) ? true : false;
+}
+
+bool spapr_ovec_empty(SpaprOptionVector *ov)
+{
+ g_assert(ov);
+
+ return bitmap_empty(ov->bitmap, OV_MAXBITS);
+}
+
+static void guest_byte_to_bitmap(uint8_t entry, unsigned long *bitmap,
+ long bitmap_offset)
+{
+ int i;
+
+ for (i = 0; i < BITS_PER_BYTE; i++) {
+ if (entry & (1 << (BITS_PER_BYTE - 1 - i))) {
+ bitmap_set(bitmap, bitmap_offset + i, 1);
+ }
+ }
+}
+
+static uint8_t guest_byte_from_bitmap(unsigned long *bitmap, long bitmap_offset)
+{
+ uint8_t entry = 0;
+ int i;
+
+ for (i = 0; i < BITS_PER_BYTE; i++) {
+ if (test_bit(bitmap_offset + i, bitmap)) {
+ entry |= (1 << (BITS_PER_BYTE - 1 - i));
+ }
+ }
+
+ return entry;
+}
+
+static target_ulong vector_addr(target_ulong table_addr, int vector)
+{
+ uint16_t vector_count, vector_len;
+ int i;
+
+ vector_count = ldub_phys(&address_space_memory, table_addr) + 1;
+ if (vector > vector_count) {
+ return 0;
+ }
+ table_addr++; /* skip nr option vectors */
+
+ for (i = 0; i < vector - 1; i++) {
+ vector_len = ldub_phys(&address_space_memory, table_addr) + 1;
+ table_addr += vector_len + 1; /* bit-vector + length byte */
+ }
+ return table_addr;
+}
+
+SpaprOptionVector *spapr_ovec_parse_vector(target_ulong table_addr, int vector)
+{
+ SpaprOptionVector *ov;
+ target_ulong addr;
+ uint16_t vector_len;
+ int i;
+
+ g_assert(table_addr);
+ g_assert(vector >= 1); /* vector numbering starts at 1 */
+
+ addr = vector_addr(table_addr, vector);
+ if (!addr) {
+ /* specified vector isn't present */
+ return NULL;
+ }
+
+ vector_len = ldub_phys(&address_space_memory, addr++) + 1;
+ g_assert(vector_len <= OV_MAXBYTES);
+ ov = spapr_ovec_new();
+
+ for (i = 0; i < vector_len; i++) {
+ uint8_t entry = ldub_phys(&address_space_memory, addr + i);
+ if (entry) {
+ trace_spapr_ovec_parse_vector(vector, i + 1, vector_len, entry);
+ guest_byte_to_bitmap(entry, ov->bitmap, i * BITS_PER_BYTE);
+ }
+ }
+
+ return ov;
+}
+
+int spapr_dt_ovec(void *fdt, int fdt_offset,
+ SpaprOptionVector *ov, const char *name)
+{
+ uint8_t vec[OV_MAXBYTES + 1];
+ uint16_t vec_len;
+ unsigned long lastbit;
+ int i;
+
+ g_assert(ov);
+
+ lastbit = find_last_bit(ov->bitmap, OV_MAXBITS);
+ /* if no bits are set, include at least 1 byte of the vector so we can
+ * still encoded this in the device tree while abiding by the same
+ * encoding/sizing expected in ibm,client-architecture-support
+ */
+ vec_len = (lastbit == OV_MAXBITS) ? 1 : lastbit / BITS_PER_BYTE + 1;
+ g_assert(vec_len <= OV_MAXBYTES);
+ /* guest expects vector len encoded as vec_len - 1, since the length byte
+ * is assumed and not included, and the first byte of the vector
+ * is assumed as well
+ */
+ vec[0] = vec_len - 1;
+
+ for (i = 1; i < vec_len + 1; i++) {
+ vec[i] = guest_byte_from_bitmap(ov->bitmap, (i - 1) * BITS_PER_BYTE);
+ if (vec[i]) {
+ trace_spapr_ovec_populate_dt(i, vec_len, vec[i]);
+ }
+ }
+
+ return fdt_setprop(fdt, fdt_offset, name, vec, vec_len + 1);
+}
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
new file mode 100644
index 000000000..5bfd4aa9e
--- /dev/null
+++ b/hw/ppc/spapr_pci.c
@@ -0,0 +1,2530 @@
+/*
+ * QEMU sPAPR PCI host originated from Uninorth PCI host
+ *
+ * Copyright (c) 2011 Alexey Kardashevskiy, IBM Corporation.
+ * Copyright (C) 2011 David Gibson, IBM Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "hw/irq.h"
+#include "hw/sysbus.h"
+#include "migration/vmstate.h"
+#include "hw/pci/pci.h"
+#include "hw/pci/msi.h"
+#include "hw/pci/msix.h"
+#include "hw/pci/pci_host.h"
+#include "hw/ppc/spapr.h"
+#include "hw/pci-host/spapr.h"
+#include "exec/ram_addr.h"
+#include <libfdt.h>
+#include "trace.h"
+#include "qemu/error-report.h"
+#include "qemu/module.h"
+#include "qapi/qmp/qerror.h"
+#include "hw/ppc/fdt.h"
+#include "hw/pci/pci_bridge.h"
+#include "hw/pci/pci_bus.h"
+#include "hw/pci/pci_ids.h"
+#include "hw/ppc/spapr_drc.h"
+#include "hw/qdev-properties.h"
+#include "sysemu/device_tree.h"
+#include "sysemu/kvm.h"
+#include "sysemu/hostmem.h"
+#include "sysemu/numa.h"
+#include "hw/ppc/spapr_numa.h"
+#include "qemu/log.h"
+
+/* Copied from the kernel arch/powerpc/platforms/pseries/msi.c */
+#define RTAS_QUERY_FN 0
+#define RTAS_CHANGE_FN 1
+#define RTAS_RESET_FN 2
+#define RTAS_CHANGE_MSI_FN 3
+#define RTAS_CHANGE_MSIX_FN 4
+
+/* Interrupt types to return on RTAS_CHANGE_* */
+#define RTAS_TYPE_MSI 1
+#define RTAS_TYPE_MSIX 2
+
+SpaprPhbState *spapr_pci_find_phb(SpaprMachineState *spapr, uint64_t buid)
+{
+ SpaprPhbState *sphb;
+
+ QLIST_FOREACH(sphb, &spapr->phbs, list) {
+ if (sphb->buid != buid) {
+ continue;
+ }
+ return sphb;
+ }
+
+ return NULL;
+}
+
+PCIDevice *spapr_pci_find_dev(SpaprMachineState *spapr, uint64_t buid,
+ uint32_t config_addr)
+{
+ SpaprPhbState *sphb = spapr_pci_find_phb(spapr, buid);
+ PCIHostState *phb = PCI_HOST_BRIDGE(sphb);
+ int bus_num = (config_addr >> 16) & 0xFF;
+ int devfn = (config_addr >> 8) & 0xFF;
+
+ if (!phb) {
+ return NULL;
+ }
+
+ return pci_find_device(phb->bus, bus_num, devfn);
+}
+
+static uint32_t rtas_pci_cfgaddr(uint32_t arg)
+{
+ /* This handles the encoding of extended config space addresses */
+ return ((arg >> 20) & 0xf00) | (arg & 0xff);
+}
+
+static void finish_read_pci_config(SpaprMachineState *spapr, uint64_t buid,
+ uint32_t addr, uint32_t size,
+ target_ulong rets)
+{
+ PCIDevice *pci_dev;
+ uint32_t val;
+
+ if ((size != 1) && (size != 2) && (size != 4)) {
+ /* access must be 1, 2 or 4 bytes */
+ rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
+ return;
+ }
+
+ pci_dev = spapr_pci_find_dev(spapr, buid, addr);
+ addr = rtas_pci_cfgaddr(addr);
+
+ if (!pci_dev || (addr % size) || (addr >= pci_config_size(pci_dev))) {
+ /* Access must be to a valid device, within bounds and
+ * naturally aligned */
+ rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
+ return;
+ }
+
+ val = pci_host_config_read_common(pci_dev, addr,
+ pci_config_size(pci_dev), size);
+
+ rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+ rtas_st(rets, 1, val);
+}
+
+static void rtas_ibm_read_pci_config(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ uint32_t token, uint32_t nargs,
+ target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ uint64_t buid;
+ uint32_t size, addr;
+
+ if ((nargs != 4) || (nret != 2)) {
+ rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
+ return;
+ }
+
+ buid = rtas_ldq(args, 1);
+ size = rtas_ld(args, 3);
+ addr = rtas_ld(args, 0);
+
+ finish_read_pci_config(spapr, buid, addr, size, rets);
+}
+
+static void rtas_read_pci_config(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ uint32_t token, uint32_t nargs,
+ target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ uint32_t size, addr;
+
+ if ((nargs != 2) || (nret != 2)) {
+ rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
+ return;
+ }
+
+ size = rtas_ld(args, 1);
+ addr = rtas_ld(args, 0);
+
+ finish_read_pci_config(spapr, 0, addr, size, rets);
+}
+
+static void finish_write_pci_config(SpaprMachineState *spapr, uint64_t buid,
+ uint32_t addr, uint32_t size,
+ uint32_t val, target_ulong rets)
+{
+ PCIDevice *pci_dev;
+
+ if ((size != 1) && (size != 2) && (size != 4)) {
+ /* access must be 1, 2 or 4 bytes */
+ rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
+ return;
+ }
+
+ pci_dev = spapr_pci_find_dev(spapr, buid, addr);
+ addr = rtas_pci_cfgaddr(addr);
+
+ if (!pci_dev || (addr % size) || (addr >= pci_config_size(pci_dev))) {
+ /* Access must be to a valid device, within bounds and
+ * naturally aligned */
+ rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
+ return;
+ }
+
+ pci_host_config_write_common(pci_dev, addr, pci_config_size(pci_dev),
+ val, size);
+
+ rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+}
+
+static void rtas_ibm_write_pci_config(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ uint32_t token, uint32_t nargs,
+ target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ uint64_t buid;
+ uint32_t val, size, addr;
+
+ if ((nargs != 5) || (nret != 1)) {
+ rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
+ return;
+ }
+
+ buid = rtas_ldq(args, 1);
+ val = rtas_ld(args, 4);
+ size = rtas_ld(args, 3);
+ addr = rtas_ld(args, 0);
+
+ finish_write_pci_config(spapr, buid, addr, size, val, rets);
+}
+
+static void rtas_write_pci_config(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ uint32_t token, uint32_t nargs,
+ target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ uint32_t val, size, addr;
+
+ if ((nargs != 3) || (nret != 1)) {
+ rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
+ return;
+ }
+
+
+ val = rtas_ld(args, 2);
+ size = rtas_ld(args, 1);
+ addr = rtas_ld(args, 0);
+
+ finish_write_pci_config(spapr, 0, addr, size, val, rets);
+}
+
+/*
+ * Set MSI/MSIX message data.
+ * This is required for msi_notify()/msix_notify() which
+ * will write at the addresses via spapr_msi_write().
+ *
+ * If hwaddr == 0, all entries will have .data == first_irq i.e.
+ * table will be reset.
+ */
+static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr, bool msix,
+ unsigned first_irq, unsigned req_num)
+{
+ unsigned i;
+ MSIMessage msg = { .address = addr, .data = first_irq };
+
+ if (!msix) {
+ msi_set_message(pdev, msg);
+ trace_spapr_pci_msi_setup(pdev->name, 0, msg.address);
+ return;
+ }
+
+ for (i = 0; i < req_num; ++i) {
+ msix_set_message(pdev, i, msg);
+ trace_spapr_pci_msi_setup(pdev->name, i, msg.address);
+ if (addr) {
+ ++msg.data;
+ }
+ }
+}
+
+static void rtas_ibm_change_msi(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ uint32_t token, uint32_t nargs,
+ target_ulong args, uint32_t nret,
+ target_ulong rets)
+{
+ SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+ uint32_t config_addr = rtas_ld(args, 0);
+ uint64_t buid = rtas_ldq(args, 1);
+ unsigned int func = rtas_ld(args, 3);
+ unsigned int req_num = rtas_ld(args, 4); /* 0 == remove all */
+ unsigned int seq_num = rtas_ld(args, 5);
+ unsigned int ret_intr_type;
+ unsigned int irq, max_irqs = 0;
+ SpaprPhbState *phb = NULL;
+ PCIDevice *pdev = NULL;
+ SpaprPciMsi *msi;
+ int *config_addr_key;
+ Error *err = NULL;
+ int i;
+
+ /* Fins SpaprPhbState */
+ phb = spapr_pci_find_phb(spapr, buid);
+ if (phb) {
+ pdev = spapr_pci_find_dev(spapr, buid, config_addr);
+ }
+ if (!phb || !pdev) {
+ rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+ return;
+ }
+
+ switch (func) {
+ case RTAS_CHANGE_FN:
+ if (msi_present(pdev)) {
+ ret_intr_type = RTAS_TYPE_MSI;
+ } else if (msix_present(pdev)) {
+ ret_intr_type = RTAS_TYPE_MSIX;
+ } else {
+ rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+ return;
+ }
+ break;
+ case RTAS_CHANGE_MSI_FN:
+ if (msi_present(pdev)) {
+ ret_intr_type = RTAS_TYPE_MSI;
+ } else {
+ rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+ return;
+ }
+ break;
+ case RTAS_CHANGE_MSIX_FN:
+ if (msix_present(pdev)) {
+ ret_intr_type = RTAS_TYPE_MSIX;
+ } else {
+ rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+ return;
+ }
+ break;
+ default:
+ error_report("rtas_ibm_change_msi(%u) is not implemented", func);
+ rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+ return;
+ }
+
+ msi = (SpaprPciMsi *) g_hash_table_lookup(phb->msi, &config_addr);
+
+ /* Releasing MSIs */
+ if (!req_num) {
+ if (!msi) {
+ trace_spapr_pci_msi("Releasing wrong config", config_addr);
+ rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
+ return;
+ }
+
+ if (msi_present(pdev)) {
+ spapr_msi_setmsg(pdev, 0, false, 0, 0);
+ }
+ if (msix_present(pdev)) {
+ spapr_msi_setmsg(pdev, 0, true, 0, 0);
+ }
+ g_hash_table_remove(phb->msi, &config_addr);
+
+ trace_spapr_pci_msi("Released MSIs", config_addr);
+ rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+ rtas_st(rets, 1, 0);
+ return;
+ }
+
+ /* Enabling MSI */
+
+ /* Check if the device supports as many IRQs as requested */
+ if (ret_intr_type == RTAS_TYPE_MSI) {
+ max_irqs = msi_nr_vectors_allocated(pdev);
+ } else if (ret_intr_type == RTAS_TYPE_MSIX) {
+ max_irqs = pdev->msix_entries_nr;
+ }
+ if (!max_irqs) {
+ error_report("Requested interrupt type %d is not enabled for device %x",
+ ret_intr_type, config_addr);
+ rtas_st(rets, 0, -1); /* Hardware error */
+ return;
+ }
+ /* Correct the number if the guest asked for too many */
+ if (req_num > max_irqs) {
+ trace_spapr_pci_msi_retry(config_addr, req_num, max_irqs);
+ req_num = max_irqs;
+ irq = 0; /* to avoid misleading trace */
+ goto out;
+ }
+
+ /* Allocate MSIs */
+ if (smc->legacy_irq_allocation) {
+ irq = spapr_irq_find(spapr, req_num, ret_intr_type == RTAS_TYPE_MSI,
+ &err);
+ } else {
+ irq = spapr_irq_msi_alloc(spapr, req_num,
+ ret_intr_type == RTAS_TYPE_MSI, &err);
+ }
+ if (err) {
+ error_reportf_err(err, "Can't allocate MSIs for device %x: ",
+ config_addr);
+ rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
+ return;
+ }
+
+ for (i = 0; i < req_num; i++) {
+ spapr_irq_claim(spapr, irq + i, false, &err);
+ if (err) {
+ if (i) {
+ spapr_irq_free(spapr, irq, i);
+ }
+ if (!smc->legacy_irq_allocation) {
+ spapr_irq_msi_free(spapr, irq, req_num);
+ }
+ error_reportf_err(err, "Can't allocate MSIs for device %x: ",
+ config_addr);
+ rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
+ return;
+ }
+ }
+
+ /* Release previous MSIs */
+ if (msi) {
+ g_hash_table_remove(phb->msi, &config_addr);
+ }
+
+ /* Setup MSI/MSIX vectors in the device (via cfgspace or MSIX BAR) */
+ spapr_msi_setmsg(pdev, SPAPR_PCI_MSI_WINDOW, ret_intr_type == RTAS_TYPE_MSIX,
+ irq, req_num);
+
+ /* Add MSI device to cache */
+ msi = g_new(SpaprPciMsi, 1);
+ msi->first_irq = irq;
+ msi->num = req_num;
+ config_addr_key = g_new(int, 1);
+ *config_addr_key = config_addr;
+ g_hash_table_insert(phb->msi, config_addr_key, msi);
+
+out:
+ rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+ rtas_st(rets, 1, req_num);
+ rtas_st(rets, 2, ++seq_num);
+ if (nret > 3) {
+ rtas_st(rets, 3, ret_intr_type);
+ }
+
+ trace_spapr_pci_rtas_ibm_change_msi(config_addr, func, req_num, irq);
+}
+
+static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu,
+ SpaprMachineState *spapr,
+ uint32_t token,
+ uint32_t nargs,
+ target_ulong args,
+ uint32_t nret,
+ target_ulong rets)
+{
+ uint32_t config_addr = rtas_ld(args, 0);
+ uint64_t buid = rtas_ldq(args, 1);
+ unsigned int intr_src_num = -1, ioa_intr_num = rtas_ld(args, 3);
+ SpaprPhbState *phb = NULL;
+ PCIDevice *pdev = NULL;
+ SpaprPciMsi *msi;
+
+ /* Find SpaprPhbState */
+ phb = spapr_pci_find_phb(spapr, buid);
+ if (phb) {
+ pdev = spapr_pci_find_dev(spapr, buid, config_addr);
+ }
+ if (!phb || !pdev) {
+ rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+ return;
+ }
+
+ /* Find device descriptor and start IRQ */
+ msi = (SpaprPciMsi *) g_hash_table_lookup(phb->msi, &config_addr);
+ if (!msi || !msi->first_irq || !msi->num || (ioa_intr_num >= msi->num)) {
+ trace_spapr_pci_msi("Failed to return vector", config_addr);
+ rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
+ return;
+ }
+ intr_src_num = msi->first_irq + ioa_intr_num;
+ trace_spapr_pci_rtas_ibm_query_interrupt_source_number(ioa_intr_num,
+ intr_src_num);
+
+ rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+ rtas_st(rets, 1, intr_src_num);
+ rtas_st(rets, 2, 1);/* 0 == level; 1 == edge */
+}
+
+static void rtas_ibm_set_eeh_option(PowerPCCPU *cpu,
+ SpaprMachineState *spapr,
+ uint32_t token, uint32_t nargs,
+ target_ulong args, uint32_t nret,
+ target_ulong rets)
+{
+ SpaprPhbState *sphb;
+ uint32_t addr, option;
+ uint64_t buid;
+ int ret;
+
+ if ((nargs != 4) || (nret != 1)) {
+ goto param_error_exit;
+ }
+
+ buid = rtas_ldq(args, 1);
+ addr = rtas_ld(args, 0);
+ option = rtas_ld(args, 3);
+
+ sphb = spapr_pci_find_phb(spapr, buid);
+ if (!sphb) {
+ goto param_error_exit;
+ }
+
+ if (!spapr_phb_eeh_available(sphb)) {
+ goto param_error_exit;
+ }
+
+ ret = spapr_phb_vfio_eeh_set_option(sphb, addr, option);
+ rtas_st(rets, 0, ret);
+ return;
+
+param_error_exit:
+ rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+}
+
+static void rtas_ibm_get_config_addr_info2(PowerPCCPU *cpu,
+ SpaprMachineState *spapr,
+ uint32_t token, uint32_t nargs,
+ target_ulong args, uint32_t nret,
+ target_ulong rets)
+{
+ SpaprPhbState *sphb;
+ PCIDevice *pdev;
+ uint32_t addr, option;
+ uint64_t buid;
+
+ if ((nargs != 4) || (nret != 2)) {
+ goto param_error_exit;
+ }
+
+ buid = rtas_ldq(args, 1);
+ sphb = spapr_pci_find_phb(spapr, buid);
+ if (!sphb) {
+ goto param_error_exit;
+ }
+
+ if (!spapr_phb_eeh_available(sphb)) {
+ goto param_error_exit;
+ }
+
+ /*
+ * We always have PE address of form "00BB0001". "BB"
+ * represents the bus number of PE's primary bus.
+ */
+ option = rtas_ld(args, 3);
+ switch (option) {
+ case RTAS_GET_PE_ADDR:
+ addr = rtas_ld(args, 0);
+ pdev = spapr_pci_find_dev(spapr, buid, addr);
+ if (!pdev) {
+ goto param_error_exit;
+ }
+
+ rtas_st(rets, 1, (pci_bus_num(pci_get_bus(pdev)) << 16) + 1);
+ break;
+ case RTAS_GET_PE_MODE:
+ rtas_st(rets, 1, RTAS_PE_MODE_SHARED);
+ break;
+ default:
+ goto param_error_exit;
+ }
+
+ rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+ return;
+
+param_error_exit:
+ rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+}
+
+static void rtas_ibm_read_slot_reset_state2(PowerPCCPU *cpu,
+ SpaprMachineState *spapr,
+ uint32_t token, uint32_t nargs,
+ target_ulong args, uint32_t nret,
+ target_ulong rets)
+{
+ SpaprPhbState *sphb;
+ uint64_t buid;
+ int state, ret;
+
+ if ((nargs != 3) || (nret != 4 && nret != 5)) {
+ goto param_error_exit;
+ }
+
+ buid = rtas_ldq(args, 1);
+ sphb = spapr_pci_find_phb(spapr, buid);
+ if (!sphb) {
+ goto param_error_exit;
+ }
+
+ if (!spapr_phb_eeh_available(sphb)) {
+ goto param_error_exit;
+ }
+
+ ret = spapr_phb_vfio_eeh_get_state(sphb, &state);
+ rtas_st(rets, 0, ret);
+ if (ret != RTAS_OUT_SUCCESS) {
+ return;
+ }
+
+ rtas_st(rets, 1, state);
+ rtas_st(rets, 2, RTAS_EEH_SUPPORT);
+ rtas_st(rets, 3, RTAS_EEH_PE_UNAVAIL_INFO);
+ if (nret >= 5) {
+ rtas_st(rets, 4, RTAS_EEH_PE_RECOVER_INFO);
+ }
+ return;
+
+param_error_exit:
+ rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+}
+
+static void rtas_ibm_set_slot_reset(PowerPCCPU *cpu,
+ SpaprMachineState *spapr,
+ uint32_t token, uint32_t nargs,
+ target_ulong args, uint32_t nret,
+ target_ulong rets)
+{
+ SpaprPhbState *sphb;
+ uint32_t option;
+ uint64_t buid;
+ int ret;
+
+ if ((nargs != 4) || (nret != 1)) {
+ goto param_error_exit;
+ }
+
+ buid = rtas_ldq(args, 1);
+ option = rtas_ld(args, 3);
+ sphb = spapr_pci_find_phb(spapr, buid);
+ if (!sphb) {
+ goto param_error_exit;
+ }
+
+ if (!spapr_phb_eeh_available(sphb)) {
+ goto param_error_exit;
+ }
+
+ ret = spapr_phb_vfio_eeh_reset(sphb, option);
+ rtas_st(rets, 0, ret);
+ return;
+
+param_error_exit:
+ rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+}
+
+static void rtas_ibm_configure_pe(PowerPCCPU *cpu,
+ SpaprMachineState *spapr,
+ uint32_t token, uint32_t nargs,
+ target_ulong args, uint32_t nret,
+ target_ulong rets)
+{
+ SpaprPhbState *sphb;
+ uint64_t buid;
+ int ret;
+
+ if ((nargs != 3) || (nret != 1)) {
+ goto param_error_exit;
+ }
+
+ buid = rtas_ldq(args, 1);
+ sphb = spapr_pci_find_phb(spapr, buid);
+ if (!sphb) {
+ goto param_error_exit;
+ }
+
+ if (!spapr_phb_eeh_available(sphb)) {
+ goto param_error_exit;
+ }
+
+ ret = spapr_phb_vfio_eeh_configure(sphb);
+ rtas_st(rets, 0, ret);
+ return;
+
+param_error_exit:
+ rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+}
+
+/* To support it later */
+static void rtas_ibm_slot_error_detail(PowerPCCPU *cpu,
+ SpaprMachineState *spapr,
+ uint32_t token, uint32_t nargs,
+ target_ulong args, uint32_t nret,
+ target_ulong rets)
+{
+ SpaprPhbState *sphb;
+ int option;
+ uint64_t buid;
+
+ if ((nargs != 8) || (nret != 1)) {
+ goto param_error_exit;
+ }
+
+ buid = rtas_ldq(args, 1);
+ sphb = spapr_pci_find_phb(spapr, buid);
+ if (!sphb) {
+ goto param_error_exit;
+ }
+
+ if (!spapr_phb_eeh_available(sphb)) {
+ goto param_error_exit;
+ }
+
+ option = rtas_ld(args, 7);
+ switch (option) {
+ case RTAS_SLOT_TEMP_ERR_LOG:
+ case RTAS_SLOT_PERM_ERR_LOG:
+ break;
+ default:
+ goto param_error_exit;
+ }
+
+ /* We don't have error log yet */
+ rtas_st(rets, 0, RTAS_OUT_NO_ERRORS_FOUND);
+ return;
+
+param_error_exit:
+ rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+}
+
+static void pci_spapr_set_irq(void *opaque, int irq_num, int level)
+{
+ /*
+ * Here we use the number returned by pci_swizzle_map_irq_fn to find a
+ * corresponding qemu_irq.
+ */
+ SpaprPhbState *phb = opaque;
+ SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+
+ trace_spapr_pci_lsi_set(phb->dtbusname, irq_num, phb->lsi_table[irq_num].irq);
+ qemu_set_irq(spapr_qirq(spapr, phb->lsi_table[irq_num].irq), level);
+}
+
+static PCIINTxRoute spapr_route_intx_pin_to_irq(void *opaque, int pin)
+{
+ SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(opaque);
+ PCIINTxRoute route;
+
+ route.mode = PCI_INTX_ENABLED;
+ route.irq = sphb->lsi_table[pin].irq;
+
+ return route;
+}
+
+static uint64_t spapr_msi_read(void *opaque, hwaddr addr, unsigned size)
+{
+ qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid access\n", __func__);
+ return 0;
+}
+
+/*
+ * MSI/MSIX memory region implementation.
+ * The handler handles both MSI and MSIX.
+ * The vector number is encoded in least bits in data.
+ */
+static void spapr_msi_write(void *opaque, hwaddr addr,
+ uint64_t data, unsigned size)
+{
+ SpaprMachineState *spapr = opaque;
+ uint32_t irq = data;
+
+ trace_spapr_pci_msi_write(addr, data, irq);
+
+ qemu_irq_pulse(spapr_qirq(spapr, irq));
+}
+
+static const MemoryRegionOps spapr_msi_ops = {
+ /*
+ * .read result is undefined by PCI spec.
+ * define .read method to avoid assert failure in memory_region_init_io
+ */
+ .read = spapr_msi_read,
+ .write = spapr_msi_write,
+ .endianness = DEVICE_LITTLE_ENDIAN
+};
+
+/*
+ * PHB PCI device
+ */
+static AddressSpace *spapr_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn)
+{
+ SpaprPhbState *phb = opaque;
+
+ return &phb->iommu_as;
+}
+
+static char *spapr_phb_vfio_get_loc_code(SpaprPhbState *sphb, PCIDevice *pdev)
+{
+ g_autofree char *path = NULL;
+ g_autofree char *host = NULL;
+ g_autofree char *devspec = NULL;
+ char *buf = NULL;
+
+ /* Get the PCI VFIO host id */
+ host = object_property_get_str(OBJECT(pdev), "host", NULL);
+ if (!host) {
+ return NULL;
+ }
+
+ /* Construct the path of the file that will give us the DT location */
+ path = g_strdup_printf("/sys/bus/pci/devices/%s/devspec", host);
+ if (!g_file_get_contents(path, &devspec, NULL, NULL)) {
+ return NULL;
+ }
+
+ /* Construct and read from host device tree the loc-code */
+ path = g_strdup_printf("/proc/device-tree%s/ibm,loc-code", devspec);
+ if (!g_file_get_contents(path, &buf, NULL, NULL)) {
+ return NULL;
+ }
+ return buf;
+}
+
+static char *spapr_phb_get_loc_code(SpaprPhbState *sphb, PCIDevice *pdev)
+{
+ char *buf;
+ const char *devtype = "qemu";
+ uint32_t busnr = pci_bus_num(PCI_BUS(qdev_get_parent_bus(DEVICE(pdev))));
+
+ if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) {
+ buf = spapr_phb_vfio_get_loc_code(sphb, pdev);
+ if (buf) {
+ return buf;
+ }
+ devtype = "vfio";
+ }
+ /*
+ * For emulated devices and VFIO-failure case, make up
+ * the loc-code.
+ */
+ buf = g_strdup_printf("%s_%s:%04x:%02x:%02x.%x",
+ devtype, pdev->name, sphb->index, busnr,
+ PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+ return buf;
+}
+
+/* Macros to operate with address in OF binding to PCI */
+#define b_x(x, p, l) (((x) & ((1<<(l))-1)) << (p))
+#define b_n(x) b_x((x), 31, 1) /* 0 if relocatable */
+#define b_p(x) b_x((x), 30, 1) /* 1 if prefetchable */
+#define b_t(x) b_x((x), 29, 1) /* 1 if the address is aliased */
+#define b_ss(x) b_x((x), 24, 2) /* the space code */
+#define b_bbbbbbbb(x) b_x((x), 16, 8) /* bus number */
+#define b_ddddd(x) b_x((x), 11, 5) /* device number */
+#define b_fff(x) b_x((x), 8, 3) /* function number */
+#define b_rrrrrrrr(x) b_x((x), 0, 8) /* register number */
+
+/* for 'reg' OF properties */
+#define RESOURCE_CELLS_SIZE 2
+#define RESOURCE_CELLS_ADDRESS 3
+
+typedef struct ResourceFields {
+ uint32_t phys_hi;
+ uint32_t phys_mid;
+ uint32_t phys_lo;
+ uint32_t size_hi;
+ uint32_t size_lo;
+} QEMU_PACKED ResourceFields;
+
+typedef struct ResourceProps {
+ ResourceFields reg[8];
+ uint32_t reg_len;
+} ResourceProps;
+
+/* fill in the 'reg' OF properties for
+ * a PCI device. 'reg' describes resource requirements for a
+ * device's IO/MEM regions.
+ *
+ * the property is an array of ('phys-addr', 'size') pairs describing
+ * the addressable regions of the PCI device, where 'phys-addr' is a
+ * RESOURCE_CELLS_ADDRESS-tuple of 32-bit integers corresponding to
+ * (phys.hi, phys.mid, phys.lo), and 'size' is a
+ * RESOURCE_CELLS_SIZE-tuple corresponding to (size.hi, size.lo).
+ *
+ * phys.hi = 0xYYXXXXZZ, where:
+ * 0xYY = npt000ss
+ * ||| |
+ * ||| +-- space code
+ * ||| |
+ * ||| + 00 if configuration space
+ * ||| + 01 if IO region,
+ * ||| + 10 if 32-bit MEM region
+ * ||| + 11 if 64-bit MEM region
+ * |||
+ * ||+------ for non-relocatable IO: 1 if aliased
+ * || for relocatable IO: 1 if below 64KB
+ * || for MEM: 1 if below 1MB
+ * |+------- 1 if region is prefetchable
+ * +-------- 1 if region is non-relocatable
+ * 0xXXXX = bbbbbbbb dddddfff, encoding bus, slot, and function
+ * bits respectively
+ * 0xZZ = rrrrrrrr, the register number of the BAR corresponding
+ * to the region
+ *
+ * phys.mid and phys.lo correspond respectively to the hi/lo portions
+ * of the actual address of the region.
+ *
+ * note also that addresses defined in this property are, at least
+ * for PAPR guests, relative to the PHBs IO/MEM windows, and
+ * correspond directly to the addresses in the BARs.
+ *
+ * in accordance with PCI Bus Binding to Open Firmware,
+ * IEEE Std 1275-1994, section 4.1.1, as implemented by PAPR+ v2.7,
+ * Appendix C.
+ */
+static void populate_resource_props(PCIDevice *d, ResourceProps *rp)
+{
+ int bus_num = pci_bus_num(PCI_BUS(qdev_get_parent_bus(DEVICE(d))));
+ uint32_t dev_id = (b_bbbbbbbb(bus_num) |
+ b_ddddd(PCI_SLOT(d->devfn)) |
+ b_fff(PCI_FUNC(d->devfn)));
+ ResourceFields *reg;
+ int i, reg_idx = 0;
+
+ /* config space region */
+ reg = &rp->reg[reg_idx++];
+ reg->phys_hi = cpu_to_be32(dev_id);
+ reg->phys_mid = 0;
+ reg->phys_lo = 0;
+ reg->size_hi = 0;
+ reg->size_lo = 0;
+
+ for (i = 0; i < PCI_NUM_REGIONS; i++) {
+ if (!d->io_regions[i].size) {
+ continue;
+ }
+
+ reg = &rp->reg[reg_idx++];
+
+ reg->phys_hi = cpu_to_be32(dev_id | b_rrrrrrrr(pci_bar(d, i)));
+ if (d->io_regions[i].type & PCI_BASE_ADDRESS_SPACE_IO) {
+ reg->phys_hi |= cpu_to_be32(b_ss(1));
+ } else if (d->io_regions[i].type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
+ reg->phys_hi |= cpu_to_be32(b_ss(3));
+ } else {
+ reg->phys_hi |= cpu_to_be32(b_ss(2));
+ }
+ reg->phys_mid = 0;
+ reg->phys_lo = 0;
+ reg->size_hi = cpu_to_be32(d->io_regions[i].size >> 32);
+ reg->size_lo = cpu_to_be32(d->io_regions[i].size);
+ }
+
+ rp->reg_len = reg_idx * sizeof(ResourceFields);
+}
+
+typedef struct PCIClass PCIClass;
+typedef struct PCISubClass PCISubClass;
+typedef struct PCIIFace PCIIFace;
+
+struct PCIIFace {
+ int iface;
+ const char *name;
+};
+
+struct PCISubClass {
+ int subclass;
+ const char *name;
+ const PCIIFace *iface;
+};
+
+struct PCIClass {
+ const char *name;
+ const PCISubClass *subc;
+};
+
+static const PCISubClass undef_subclass[] = {
+ { PCI_CLASS_NOT_DEFINED_VGA, "display", NULL },
+ { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass mass_subclass[] = {
+ { PCI_CLASS_STORAGE_SCSI, "scsi", NULL },
+ { PCI_CLASS_STORAGE_IDE, "ide", NULL },
+ { PCI_CLASS_STORAGE_FLOPPY, "fdc", NULL },
+ { PCI_CLASS_STORAGE_IPI, "ipi", NULL },
+ { PCI_CLASS_STORAGE_RAID, "raid", NULL },
+ { PCI_CLASS_STORAGE_ATA, "ata", NULL },
+ { PCI_CLASS_STORAGE_SATA, "sata", NULL },
+ { PCI_CLASS_STORAGE_SAS, "sas", NULL },
+ { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass net_subclass[] = {
+ { PCI_CLASS_NETWORK_ETHERNET, "ethernet", NULL },
+ { PCI_CLASS_NETWORK_TOKEN_RING, "token-ring", NULL },
+ { PCI_CLASS_NETWORK_FDDI, "fddi", NULL },
+ { PCI_CLASS_NETWORK_ATM, "atm", NULL },
+ { PCI_CLASS_NETWORK_ISDN, "isdn", NULL },
+ { PCI_CLASS_NETWORK_WORLDFIP, "worldfip", NULL },
+ { PCI_CLASS_NETWORK_PICMG214, "picmg", NULL },
+ { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass displ_subclass[] = {
+ { PCI_CLASS_DISPLAY_VGA, "vga", NULL },
+ { PCI_CLASS_DISPLAY_XGA, "xga", NULL },
+ { PCI_CLASS_DISPLAY_3D, "3d-controller", NULL },
+ { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass media_subclass[] = {
+ { PCI_CLASS_MULTIMEDIA_VIDEO, "video", NULL },
+ { PCI_CLASS_MULTIMEDIA_AUDIO, "sound", NULL },
+ { PCI_CLASS_MULTIMEDIA_PHONE, "telephony", NULL },
+ { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass mem_subclass[] = {
+ { PCI_CLASS_MEMORY_RAM, "memory", NULL },
+ { PCI_CLASS_MEMORY_FLASH, "flash", NULL },
+ { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass bridg_subclass[] = {
+ { PCI_CLASS_BRIDGE_HOST, "host", NULL },
+ { PCI_CLASS_BRIDGE_ISA, "isa", NULL },
+ { PCI_CLASS_BRIDGE_EISA, "eisa", NULL },
+ { PCI_CLASS_BRIDGE_MC, "mca", NULL },
+ { PCI_CLASS_BRIDGE_PCI, "pci", NULL },
+ { PCI_CLASS_BRIDGE_PCMCIA, "pcmcia", NULL },
+ { PCI_CLASS_BRIDGE_NUBUS, "nubus", NULL },
+ { PCI_CLASS_BRIDGE_CARDBUS, "cardbus", NULL },
+ { PCI_CLASS_BRIDGE_RACEWAY, "raceway", NULL },
+ { PCI_CLASS_BRIDGE_PCI_SEMITP, "semi-transparent-pci", NULL },
+ { PCI_CLASS_BRIDGE_IB_PCI, "infiniband", NULL },
+ { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass comm_subclass[] = {
+ { PCI_CLASS_COMMUNICATION_SERIAL, "serial", NULL },
+ { PCI_CLASS_COMMUNICATION_PARALLEL, "parallel", NULL },
+ { PCI_CLASS_COMMUNICATION_MULTISERIAL, "multiport-serial", NULL },
+ { PCI_CLASS_COMMUNICATION_MODEM, "modem", NULL },
+ { PCI_CLASS_COMMUNICATION_GPIB, "gpib", NULL },
+ { PCI_CLASS_COMMUNICATION_SC, "smart-card", NULL },
+ { 0xFF, NULL, NULL, },
+};
+
+static const PCIIFace pic_iface[] = {
+ { PCI_CLASS_SYSTEM_PIC_IOAPIC, "io-apic" },
+ { PCI_CLASS_SYSTEM_PIC_IOXAPIC, "io-xapic" },
+ { 0xFF, NULL },
+};
+
+static const PCISubClass sys_subclass[] = {
+ { PCI_CLASS_SYSTEM_PIC, "interrupt-controller", pic_iface },
+ { PCI_CLASS_SYSTEM_DMA, "dma-controller", NULL },
+ { PCI_CLASS_SYSTEM_TIMER, "timer", NULL },
+ { PCI_CLASS_SYSTEM_RTC, "rtc", NULL },
+ { PCI_CLASS_SYSTEM_PCI_HOTPLUG, "hot-plug-controller", NULL },
+ { PCI_CLASS_SYSTEM_SDHCI, "sd-host-controller", NULL },
+ { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass inp_subclass[] = {
+ { PCI_CLASS_INPUT_KEYBOARD, "keyboard", NULL },
+ { PCI_CLASS_INPUT_PEN, "pen", NULL },
+ { PCI_CLASS_INPUT_MOUSE, "mouse", NULL },
+ { PCI_CLASS_INPUT_SCANNER, "scanner", NULL },
+ { PCI_CLASS_INPUT_GAMEPORT, "gameport", NULL },
+ { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass dock_subclass[] = {
+ { PCI_CLASS_DOCKING_GENERIC, "dock", NULL },
+ { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass cpu_subclass[] = {
+ { PCI_CLASS_PROCESSOR_PENTIUM, "pentium", NULL },
+ { PCI_CLASS_PROCESSOR_POWERPC, "powerpc", NULL },
+ { PCI_CLASS_PROCESSOR_MIPS, "mips", NULL },
+ { PCI_CLASS_PROCESSOR_CO, "co-processor", NULL },
+ { 0xFF, NULL, NULL },
+};
+
+static const PCIIFace usb_iface[] = {
+ { PCI_CLASS_SERIAL_USB_UHCI, "usb-uhci" },
+ { PCI_CLASS_SERIAL_USB_OHCI, "usb-ohci", },
+ { PCI_CLASS_SERIAL_USB_EHCI, "usb-ehci" },
+ { PCI_CLASS_SERIAL_USB_XHCI, "usb-xhci" },
+ { PCI_CLASS_SERIAL_USB_UNKNOWN, "usb-unknown" },
+ { PCI_CLASS_SERIAL_USB_DEVICE, "usb-device" },
+ { 0xFF, NULL },
+};
+
+static const PCISubClass ser_subclass[] = {
+ { PCI_CLASS_SERIAL_FIREWIRE, "firewire", NULL },
+ { PCI_CLASS_SERIAL_ACCESS, "access-bus", NULL },
+ { PCI_CLASS_SERIAL_SSA, "ssa", NULL },
+ { PCI_CLASS_SERIAL_USB, "usb", usb_iface },
+ { PCI_CLASS_SERIAL_FIBER, "fibre-channel", NULL },
+ { PCI_CLASS_SERIAL_SMBUS, "smb", NULL },
+ { PCI_CLASS_SERIAL_IB, "infiniband", NULL },
+ { PCI_CLASS_SERIAL_IPMI, "ipmi", NULL },
+ { PCI_CLASS_SERIAL_SERCOS, "sercos", NULL },
+ { PCI_CLASS_SERIAL_CANBUS, "canbus", NULL },
+ { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass wrl_subclass[] = {
+ { PCI_CLASS_WIRELESS_IRDA, "irda", NULL },
+ { PCI_CLASS_WIRELESS_CIR, "consumer-ir", NULL },
+ { PCI_CLASS_WIRELESS_RF_CONTROLLER, "rf-controller", NULL },
+ { PCI_CLASS_WIRELESS_BLUETOOTH, "bluetooth", NULL },
+ { PCI_CLASS_WIRELESS_BROADBAND, "broadband", NULL },
+ { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass sat_subclass[] = {
+ { PCI_CLASS_SATELLITE_TV, "satellite-tv", NULL },
+ { PCI_CLASS_SATELLITE_AUDIO, "satellite-audio", NULL },
+ { PCI_CLASS_SATELLITE_VOICE, "satellite-voice", NULL },
+ { PCI_CLASS_SATELLITE_DATA, "satellite-data", NULL },
+ { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass crypt_subclass[] = {
+ { PCI_CLASS_CRYPT_NETWORK, "network-encryption", NULL },
+ { PCI_CLASS_CRYPT_ENTERTAINMENT,
+ "entertainment-encryption", NULL },
+ { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass spc_subclass[] = {
+ { PCI_CLASS_SP_DPIO, "dpio", NULL },
+ { PCI_CLASS_SP_PERF, "counter", NULL },
+ { PCI_CLASS_SP_SYNCH, "measurement", NULL },
+ { PCI_CLASS_SP_MANAGEMENT, "management-card", NULL },
+ { 0xFF, NULL, NULL },
+};
+
+static const PCIClass pci_classes[] = {
+ { "legacy-device", undef_subclass },
+ { "mass-storage", mass_subclass },
+ { "network", net_subclass },
+ { "display", displ_subclass, },
+ { "multimedia-device", media_subclass },
+ { "memory-controller", mem_subclass },
+ { "unknown-bridge", bridg_subclass },
+ { "communication-controller", comm_subclass},
+ { "system-peripheral", sys_subclass },
+ { "input-controller", inp_subclass },
+ { "docking-station", dock_subclass },
+ { "cpu", cpu_subclass },
+ { "serial-bus", ser_subclass },
+ { "wireless-controller", wrl_subclass },
+ { "intelligent-io", NULL },
+ { "satellite-device", sat_subclass },
+ { "encryption", crypt_subclass },
+ { "data-processing-controller", spc_subclass },
+};
+
+static const char *dt_name_from_class(uint8_t class, uint8_t subclass,
+ uint8_t iface)
+{
+ const PCIClass *pclass;
+ const PCISubClass *psubclass;
+ const PCIIFace *piface;
+ const char *name;
+
+ if (class >= ARRAY_SIZE(pci_classes)) {
+ return "pci";
+ }
+
+ pclass = pci_classes + class;
+ name = pclass->name;
+
+ if (pclass->subc == NULL) {
+ return name;
+ }
+
+ psubclass = pclass->subc;
+ while ((psubclass->subclass & 0xff) != 0xff) {
+ if ((psubclass->subclass & 0xff) == subclass) {
+ name = psubclass->name;
+ break;
+ }
+ psubclass++;
+ }
+
+ piface = psubclass->iface;
+ if (piface == NULL) {
+ return name;
+ }
+ while ((piface->iface & 0xff) != 0xff) {
+ if ((piface->iface & 0xff) == iface) {
+ name = piface->name;
+ break;
+ }
+ piface++;
+ }
+
+ return name;
+}
+
+/*
+ * DRC helper functions
+ */
+
+static uint32_t drc_id_from_devfn(SpaprPhbState *phb,
+ uint8_t chassis, int32_t devfn)
+{
+ return (phb->index << 16) | (chassis << 8) | devfn;
+}
+
+static SpaprDrc *drc_from_devfn(SpaprPhbState *phb,
+ uint8_t chassis, int32_t devfn)
+{
+ return spapr_drc_by_id(TYPE_SPAPR_DRC_PCI,
+ drc_id_from_devfn(phb, chassis, devfn));
+}
+
+static uint8_t chassis_from_bus(PCIBus *bus)
+{
+ if (pci_bus_is_root(bus)) {
+ return 0;
+ } else {
+ PCIDevice *bridge = pci_bridge_get_device(bus);
+
+ return object_property_get_uint(OBJECT(bridge), "chassis_nr",
+ &error_abort);
+ }
+}
+
+static SpaprDrc *drc_from_dev(SpaprPhbState *phb, PCIDevice *dev)
+{
+ uint8_t chassis = chassis_from_bus(pci_get_bus(dev));
+
+ return drc_from_devfn(phb, chassis, dev->devfn);
+}
+
+static void add_drcs(SpaprPhbState *phb, PCIBus *bus)
+{
+ Object *owner;
+ int i;
+ uint8_t chassis;
+
+ if (!phb->dr_enabled) {
+ return;
+ }
+
+ chassis = chassis_from_bus(bus);
+
+ if (pci_bus_is_root(bus)) {
+ owner = OBJECT(phb);
+ } else {
+ owner = OBJECT(pci_bridge_get_device(bus));
+ }
+
+ for (i = 0; i < PCI_SLOT_MAX * PCI_FUNC_MAX; i++) {
+ spapr_dr_connector_new(owner, TYPE_SPAPR_DRC_PCI,
+ drc_id_from_devfn(phb, chassis, i));
+ }
+}
+
+static void remove_drcs(SpaprPhbState *phb, PCIBus *bus)
+{
+ int i;
+ uint8_t chassis;
+
+ if (!phb->dr_enabled) {
+ return;
+ }
+
+ chassis = chassis_from_bus(bus);
+
+ for (i = PCI_SLOT_MAX * PCI_FUNC_MAX - 1; i >= 0; i--) {
+ SpaprDrc *drc = drc_from_devfn(phb, chassis, i);
+
+ if (drc) {
+ object_unparent(OBJECT(drc));
+ }
+ }
+}
+
+typedef struct PciWalkFdt {
+ void *fdt;
+ int offset;
+ SpaprPhbState *sphb;
+ int err;
+} PciWalkFdt;
+
+static int spapr_dt_pci_device(SpaprPhbState *sphb, PCIDevice *dev,
+ void *fdt, int parent_offset);
+
+static void spapr_dt_pci_device_cb(PCIBus *bus, PCIDevice *pdev,
+ void *opaque)
+{
+ PciWalkFdt *p = opaque;
+ int err;
+
+ if (p->err) {
+ /* Something's already broken, don't keep going */
+ return;
+ }
+
+ err = spapr_dt_pci_device(p->sphb, pdev, p->fdt, p->offset);
+ if (err < 0) {
+ p->err = err;
+ }
+}
+
+/* Augment PCI device node with bridge specific information */
+static int spapr_dt_pci_bus(SpaprPhbState *sphb, PCIBus *bus,
+ void *fdt, int offset)
+{
+ Object *owner;
+ PciWalkFdt cbinfo = {
+ .fdt = fdt,
+ .offset = offset,
+ .sphb = sphb,
+ .err = 0,
+ };
+ int ret;
+
+ _FDT(fdt_setprop_cell(fdt, offset, "#address-cells",
+ RESOURCE_CELLS_ADDRESS));
+ _FDT(fdt_setprop_cell(fdt, offset, "#size-cells",
+ RESOURCE_CELLS_SIZE));
+
+ assert(bus);
+ pci_for_each_device_under_bus_reverse(bus, spapr_dt_pci_device_cb, &cbinfo);
+ if (cbinfo.err) {
+ return cbinfo.err;
+ }
+
+ if (pci_bus_is_root(bus)) {
+ owner = OBJECT(sphb);
+ } else {
+ owner = OBJECT(pci_bridge_get_device(bus));
+ }
+
+ ret = spapr_dt_drc(fdt, offset, owner,
+ SPAPR_DR_CONNECTOR_TYPE_PCI);
+ if (ret) {
+ return ret;
+ }
+
+ return offset;
+}
+
+char *spapr_pci_fw_dev_name(PCIDevice *dev)
+{
+ const gchar *basename;
+ int slot = PCI_SLOT(dev->devfn);
+ int func = PCI_FUNC(dev->devfn);
+ uint32_t ccode = pci_default_read_config(dev, PCI_CLASS_PROG, 3);
+
+ basename = dt_name_from_class((ccode >> 16) & 0xff, (ccode >> 8) & 0xff,
+ ccode & 0xff);
+
+ if (func != 0) {
+ return g_strdup_printf("%s@%x,%x", basename, slot, func);
+ } else {
+ return g_strdup_printf("%s@%x", basename, slot);
+ }
+}
+
+/* create OF node for pci device and required OF DT properties */
+static int spapr_dt_pci_device(SpaprPhbState *sphb, PCIDevice *dev,
+ void *fdt, int parent_offset)
+{
+ int offset;
+ g_autofree gchar *nodename = spapr_pci_fw_dev_name(dev);
+ PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev);
+ ResourceProps rp;
+ SpaprDrc *drc = drc_from_dev(sphb, dev);
+ uint32_t vendor_id = pci_default_read_config(dev, PCI_VENDOR_ID, 2);
+ uint32_t device_id = pci_default_read_config(dev, PCI_DEVICE_ID, 2);
+ uint32_t revision_id = pci_default_read_config(dev, PCI_REVISION_ID, 1);
+ uint32_t ccode = pci_default_read_config(dev, PCI_CLASS_PROG, 3);
+ uint32_t irq_pin = pci_default_read_config(dev, PCI_INTERRUPT_PIN, 1);
+ uint32_t subsystem_id = pci_default_read_config(dev, PCI_SUBSYSTEM_ID, 2);
+ uint32_t subsystem_vendor_id =
+ pci_default_read_config(dev, PCI_SUBSYSTEM_VENDOR_ID, 2);
+ uint32_t cache_line_size =
+ pci_default_read_config(dev, PCI_CACHE_LINE_SIZE, 1);
+ uint32_t pci_status = pci_default_read_config(dev, PCI_STATUS, 2);
+ gchar *loc_code;
+
+ _FDT(offset = fdt_add_subnode(fdt, parent_offset, nodename));
+
+ /* in accordance with PAPR+ v2.7 13.6.3, Table 181 */
+ _FDT(fdt_setprop_cell(fdt, offset, "vendor-id", vendor_id));
+ _FDT(fdt_setprop_cell(fdt, offset, "device-id", device_id));
+ _FDT(fdt_setprop_cell(fdt, offset, "revision-id", revision_id));
+
+ _FDT(fdt_setprop_cell(fdt, offset, "class-code", ccode));
+ if (irq_pin) {
+ _FDT(fdt_setprop_cell(fdt, offset, "interrupts", irq_pin));
+ }
+
+ if (subsystem_id) {
+ _FDT(fdt_setprop_cell(fdt, offset, "subsystem-id", subsystem_id));
+ }
+
+ if (subsystem_vendor_id) {
+ _FDT(fdt_setprop_cell(fdt, offset, "subsystem-vendor-id",
+ subsystem_vendor_id));
+ }
+
+ _FDT(fdt_setprop_cell(fdt, offset, "cache-line-size", cache_line_size));
+
+
+ /* the following fdt cells are masked off the pci status register */
+ _FDT(fdt_setprop_cell(fdt, offset, "devsel-speed",
+ PCI_STATUS_DEVSEL_MASK & pci_status));
+
+ if (pci_status & PCI_STATUS_FAST_BACK) {
+ _FDT(fdt_setprop(fdt, offset, "fast-back-to-back", NULL, 0));
+ }
+ if (pci_status & PCI_STATUS_66MHZ) {
+ _FDT(fdt_setprop(fdt, offset, "66mhz-capable", NULL, 0));
+ }
+ if (pci_status & PCI_STATUS_UDF) {
+ _FDT(fdt_setprop(fdt, offset, "udf-supported", NULL, 0));
+ }
+
+ loc_code = spapr_phb_get_loc_code(sphb, dev);
+ _FDT(fdt_setprop_string(fdt, offset, "ibm,loc-code", loc_code));
+ g_free(loc_code);
+
+ if (drc) {
+ _FDT(fdt_setprop_cell(fdt, offset, "ibm,my-drc-index",
+ spapr_drc_index(drc)));
+ }
+
+ if (msi_present(dev)) {
+ uint32_t max_msi = msi_nr_vectors_allocated(dev);
+ if (max_msi) {
+ _FDT(fdt_setprop_cell(fdt, offset, "ibm,req#msi", max_msi));
+ }
+ }
+ if (msix_present(dev)) {
+ uint32_t max_msix = dev->msix_entries_nr;
+ if (max_msix) {
+ _FDT(fdt_setprop_cell(fdt, offset, "ibm,req#msi-x", max_msix));
+ }
+ }
+
+ populate_resource_props(dev, &rp);
+ _FDT(fdt_setprop(fdt, offset, "reg", (uint8_t *)rp.reg, rp.reg_len));
+
+ if (sphb->pcie_ecs && pci_is_express(dev)) {
+ _FDT(fdt_setprop_cell(fdt, offset, "ibm,pci-config-space-type", 0x1));
+ }
+
+ spapr_phb_nvgpu_populate_pcidev_dt(dev, fdt, offset, sphb);
+
+ if (!pc->is_bridge) {
+ /* Properties only for non-bridges */
+ uint32_t min_grant = pci_default_read_config(dev, PCI_MIN_GNT, 1);
+ uint32_t max_latency = pci_default_read_config(dev, PCI_MAX_LAT, 1);
+ _FDT(fdt_setprop_cell(fdt, offset, "min-grant", min_grant));
+ _FDT(fdt_setprop_cell(fdt, offset, "max-latency", max_latency));
+ return offset;
+ } else {
+ PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev));
+
+ return spapr_dt_pci_bus(sphb, sec_bus, fdt, offset);
+ }
+}
+
+/* Callback to be called during DRC release. */
+void spapr_phb_remove_pci_device_cb(DeviceState *dev)
+{
+ HotplugHandler *hotplug_ctrl = qdev_get_hotplug_handler(dev);
+
+ hotplug_handler_unplug(hotplug_ctrl, dev, &error_abort);
+ object_unparent(OBJECT(dev));
+}
+
+int spapr_pci_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr,
+ void *fdt, int *fdt_start_offset, Error **errp)
+{
+ HotplugHandler *plug_handler = qdev_get_hotplug_handler(drc->dev);
+ SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(plug_handler);
+ PCIDevice *pdev = PCI_DEVICE(drc->dev);
+
+ *fdt_start_offset = spapr_dt_pci_device(sphb, pdev, fdt, 0);
+ return 0;
+}
+
+static void spapr_pci_bridge_plug(SpaprPhbState *phb,
+ PCIBridge *bridge)
+{
+ PCIBus *bus = pci_bridge_get_sec_bus(bridge);
+
+ add_drcs(phb, bus);
+}
+
+/* Returns non-zero if the value of "chassis_nr" is already in use */
+static int check_chassis_nr(Object *obj, void *opaque)
+{
+ int new_chassis_nr =
+ object_property_get_uint(opaque, "chassis_nr", &error_abort);
+ int chassis_nr =
+ object_property_get_uint(obj, "chassis_nr", NULL);
+
+ if (!object_dynamic_cast(obj, TYPE_PCI_BRIDGE)) {
+ return 0;
+ }
+
+ /* Skip unsupported bridge types */
+ if (!chassis_nr) {
+ return 0;
+ }
+
+ /* Skip self */
+ if (obj == opaque) {
+ return 0;
+ }
+
+ return chassis_nr == new_chassis_nr;
+}
+
+static bool bridge_has_valid_chassis_nr(Object *bridge, Error **errp)
+{
+ int chassis_nr =
+ object_property_get_uint(bridge, "chassis_nr", NULL);
+
+ /*
+ * slotid_cap_init() already ensures that "chassis_nr" isn't null for
+ * standard PCI bridges, so this really tells if "chassis_nr" is present
+ * or not.
+ */
+ if (!chassis_nr) {
+ error_setg(errp, "PCI Bridge lacks a \"chassis_nr\" property");
+ error_append_hint(errp, "Try -device pci-bridge instead.\n");
+ return false;
+ }
+
+ /* We want unique values for "chassis_nr" */
+ if (object_child_foreach_recursive(object_get_root(), check_chassis_nr,
+ bridge)) {
+ error_setg(errp, "Bridge chassis %d already in use", chassis_nr);
+ return false;
+ }
+
+ return true;
+}
+
+static void spapr_pci_pre_plug(HotplugHandler *plug_handler,
+ DeviceState *plugged_dev, Error **errp)
+{
+ SpaprPhbState *phb = SPAPR_PCI_HOST_BRIDGE(DEVICE(plug_handler));
+ PCIDevice *pdev = PCI_DEVICE(plugged_dev);
+ PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(plugged_dev);
+ SpaprDrc *drc = drc_from_dev(phb, pdev);
+ PCIBus *bus = PCI_BUS(qdev_get_parent_bus(DEVICE(pdev)));
+ uint32_t slotnr = PCI_SLOT(pdev->devfn);
+
+ if (!phb->dr_enabled) {
+ /* if this is a hotplug operation initiated by the user
+ * we need to let them know it's not enabled
+ */
+ if (plugged_dev->hotplugged) {
+ error_setg(errp, QERR_BUS_NO_HOTPLUG,
+ object_get_typename(OBJECT(phb)));
+ return;
+ }
+ }
+
+ if (pc->is_bridge) {
+ if (!bridge_has_valid_chassis_nr(OBJECT(plugged_dev), errp)) {
+ return;
+ }
+ }
+
+ /* Following the QEMU convention used for PCIe multifunction
+ * hotplug, we do not allow functions to be hotplugged to a
+ * slot that already has function 0 present
+ */
+ if (plugged_dev->hotplugged && bus->devices[PCI_DEVFN(slotnr, 0)] &&
+ PCI_FUNC(pdev->devfn) != 0) {
+ error_setg(errp, "PCI: slot %d function 0 already occupied by %s,"
+ " additional functions can no longer be exposed to guest.",
+ slotnr, bus->devices[PCI_DEVFN(slotnr, 0)]->name);
+ }
+
+ if (drc && drc->dev) {
+ error_setg(errp, "PCI: slot %d already occupied by %s", slotnr,
+ pci_get_function_0(PCI_DEVICE(drc->dev))->name);
+ return;
+ }
+}
+
+static void spapr_pci_plug(HotplugHandler *plug_handler,
+ DeviceState *plugged_dev, Error **errp)
+{
+ SpaprPhbState *phb = SPAPR_PCI_HOST_BRIDGE(DEVICE(plug_handler));
+ PCIDevice *pdev = PCI_DEVICE(plugged_dev);
+ PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(plugged_dev);
+ SpaprDrc *drc = drc_from_dev(phb, pdev);
+ uint32_t slotnr = PCI_SLOT(pdev->devfn);
+
+ /*
+ * If DR is disabled we don't need to do anything in the case of
+ * hotplug or coldplug callbacks.
+ */
+ if (!phb->dr_enabled) {
+ return;
+ }
+
+ g_assert(drc);
+
+ if (pc->is_bridge) {
+ spapr_pci_bridge_plug(phb, PCI_BRIDGE(plugged_dev));
+ }
+
+ /* spapr_pci_pre_plug() already checked the DRC is attachable */
+ spapr_drc_attach(drc, DEVICE(pdev));
+
+ /* If this is function 0, signal hotplug for all the device functions.
+ * Otherwise defer sending the hotplug event.
+ */
+ if (!spapr_drc_hotplugged(plugged_dev)) {
+ spapr_drc_reset(drc);
+ } else if (PCI_FUNC(pdev->devfn) == 0) {
+ int i;
+ uint8_t chassis = chassis_from_bus(pci_get_bus(pdev));
+
+ for (i = 0; i < 8; i++) {
+ SpaprDrc *func_drc;
+ SpaprDrcClass *func_drck;
+ SpaprDREntitySense state;
+
+ func_drc = drc_from_devfn(phb, chassis, PCI_DEVFN(slotnr, i));
+ func_drck = SPAPR_DR_CONNECTOR_GET_CLASS(func_drc);
+ state = func_drck->dr_entity_sense(func_drc);
+
+ if (state == SPAPR_DR_ENTITY_SENSE_PRESENT) {
+ spapr_hotplug_req_add_by_index(func_drc);
+ }
+ }
+ }
+}
+
+static void spapr_pci_bridge_unplug(SpaprPhbState *phb,
+ PCIBridge *bridge)
+{
+ PCIBus *bus = pci_bridge_get_sec_bus(bridge);
+
+ remove_drcs(phb, bus);
+}
+
+static void spapr_pci_unplug(HotplugHandler *plug_handler,
+ DeviceState *plugged_dev, Error **errp)
+{
+ PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(plugged_dev);
+ SpaprPhbState *phb = SPAPR_PCI_HOST_BRIDGE(DEVICE(plug_handler));
+
+ /* some version guests do not wait for completion of a device
+ * cleanup (generally done asynchronously by the kernel) before
+ * signaling to QEMU that the device is safe, but instead sleep
+ * for some 'safe' period of time. unfortunately on a busy host
+ * this sleep isn't guaranteed to be long enough, resulting in
+ * bad things like IRQ lines being left asserted during final
+ * device removal. to deal with this we call reset just prior
+ * to finalizing the device, which will put the device back into
+ * an 'idle' state, as the device cleanup code expects.
+ */
+ pci_device_reset(PCI_DEVICE(plugged_dev));
+
+ if (pc->is_bridge) {
+ spapr_pci_bridge_unplug(phb, PCI_BRIDGE(plugged_dev));
+ return;
+ }
+
+ qdev_unrealize(plugged_dev);
+}
+
+static void spapr_pci_unplug_request(HotplugHandler *plug_handler,
+ DeviceState *plugged_dev, Error **errp)
+{
+ SpaprPhbState *phb = SPAPR_PCI_HOST_BRIDGE(DEVICE(plug_handler));
+ PCIDevice *pdev = PCI_DEVICE(plugged_dev);
+ SpaprDrc *drc = drc_from_dev(phb, pdev);
+
+ if (!phb->dr_enabled) {
+ error_setg(errp, QERR_BUS_NO_HOTPLUG,
+ object_get_typename(OBJECT(phb)));
+ return;
+ }
+
+ g_assert(drc);
+ g_assert(drc->dev == plugged_dev);
+
+ if (!spapr_drc_unplug_requested(drc)) {
+ PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(plugged_dev);
+ uint32_t slotnr = PCI_SLOT(pdev->devfn);
+ SpaprDrc *func_drc;
+ SpaprDrcClass *func_drck;
+ SpaprDREntitySense state;
+ int i;
+ uint8_t chassis = chassis_from_bus(pci_get_bus(pdev));
+
+ if (pc->is_bridge) {
+ error_setg(errp, "PCI: Hot unplug of PCI bridges not supported");
+ return;
+ }
+ if (object_property_get_uint(OBJECT(pdev), "nvlink2-tgt", NULL)) {
+ error_setg(errp, "PCI: Cannot unplug NVLink2 devices");
+ return;
+ }
+
+ /* ensure any other present functions are pending unplug */
+ if (PCI_FUNC(pdev->devfn) == 0) {
+ for (i = 1; i < 8; i++) {
+ func_drc = drc_from_devfn(phb, chassis, PCI_DEVFN(slotnr, i));
+ func_drck = SPAPR_DR_CONNECTOR_GET_CLASS(func_drc);
+ state = func_drck->dr_entity_sense(func_drc);
+ if (state == SPAPR_DR_ENTITY_SENSE_PRESENT
+ && !spapr_drc_unplug_requested(func_drc)) {
+ /*
+ * Attempting to remove function 0 of a multifunction
+ * device will will cascade into removing all child
+ * functions, even if their unplug weren't requested
+ * beforehand.
+ */
+ spapr_drc_unplug_request(func_drc);
+ }
+ }
+ }
+
+ spapr_drc_unplug_request(drc);
+
+ /* if this isn't func 0, defer unplug event. otherwise signal removal
+ * for all present functions
+ */
+ if (PCI_FUNC(pdev->devfn) == 0) {
+ for (i = 7; i >= 0; i--) {
+ func_drc = drc_from_devfn(phb, chassis, PCI_DEVFN(slotnr, i));
+ func_drck = SPAPR_DR_CONNECTOR_GET_CLASS(func_drc);
+ state = func_drck->dr_entity_sense(func_drc);
+ if (state == SPAPR_DR_ENTITY_SENSE_PRESENT) {
+ spapr_hotplug_req_remove_by_index(func_drc);
+ }
+ }
+ }
+ } else {
+ error_setg(errp,
+ "PCI device unplug already in progress for device %s",
+ drc->dev->id);
+ }
+}
+
+static void spapr_phb_finalizefn(Object *obj)
+{
+ SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(obj);
+
+ g_free(sphb->dtbusname);
+ sphb->dtbusname = NULL;
+}
+
+static void spapr_phb_unrealize(DeviceState *dev)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+ SysBusDevice *s = SYS_BUS_DEVICE(dev);
+ PCIHostState *phb = PCI_HOST_BRIDGE(s);
+ SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(phb);
+ SpaprTceTable *tcet;
+ int i;
+ const unsigned windows_supported = spapr_phb_windows_supported(sphb);
+
+ spapr_phb_nvgpu_free(sphb);
+
+ if (sphb->msi) {
+ g_hash_table_unref(sphb->msi);
+ sphb->msi = NULL;
+ }
+
+ /*
+ * Remove IO/MMIO subregions and aliases, rest should get cleaned
+ * via PHB's unrealize->object_finalize
+ */
+ for (i = windows_supported - 1; i >= 0; i--) {
+ tcet = spapr_tce_find_by_liobn(sphb->dma_liobn[i]);
+ if (tcet) {
+ memory_region_del_subregion(&sphb->iommu_root,
+ spapr_tce_get_iommu(tcet));
+ }
+ }
+
+ remove_drcs(sphb, phb->bus);
+
+ for (i = PCI_NUM_PINS - 1; i >= 0; i--) {
+ if (sphb->lsi_table[i].irq) {
+ spapr_irq_free(spapr, sphb->lsi_table[i].irq, 1);
+ sphb->lsi_table[i].irq = 0;
+ }
+ }
+
+ QLIST_REMOVE(sphb, list);
+
+ memory_region_del_subregion(&sphb->iommu_root, &sphb->msiwindow);
+
+ /*
+ * An attached PCI device may have memory listeners, eg. VFIO PCI. We have
+ * unmapped all sections. Remove the listeners now, before destroying the
+ * address space.
+ */
+ address_space_remove_listeners(&sphb->iommu_as);
+ address_space_destroy(&sphb->iommu_as);
+
+ qbus_set_hotplug_handler(BUS(phb->bus), NULL);
+ pci_unregister_root_bus(phb->bus);
+
+ memory_region_del_subregion(get_system_memory(), &sphb->iowindow);
+ if (sphb->mem64_win_pciaddr != (hwaddr)-1) {
+ memory_region_del_subregion(get_system_memory(), &sphb->mem64window);
+ }
+ memory_region_del_subregion(get_system_memory(), &sphb->mem32window);
+}
+
+static void spapr_phb_destroy_msi(gpointer opaque)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+ SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+ SpaprPciMsi *msi = opaque;
+
+ if (!smc->legacy_irq_allocation) {
+ spapr_irq_msi_free(spapr, msi->first_irq, msi->num);
+ }
+ spapr_irq_free(spapr, msi->first_irq, msi->num);
+ g_free(msi);
+}
+
+static void spapr_phb_realize(DeviceState *dev, Error **errp)
+{
+ ERRP_GUARD();
+ /* We don't use SPAPR_MACHINE() in order to exit gracefully if the user
+ * tries to add a sPAPR PHB to a non-pseries machine.
+ */
+ SpaprMachineState *spapr =
+ (SpaprMachineState *) object_dynamic_cast(qdev_get_machine(),
+ TYPE_SPAPR_MACHINE);
+ SpaprMachineClass *smc = spapr ? SPAPR_MACHINE_GET_CLASS(spapr) : NULL;
+ SysBusDevice *s = SYS_BUS_DEVICE(dev);
+ SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(s);
+ PCIHostState *phb = PCI_HOST_BRIDGE(s);
+ MachineState *ms = MACHINE(spapr);
+ char *namebuf;
+ int i;
+ PCIBus *bus;
+ uint64_t msi_window_size = 4096;
+ SpaprTceTable *tcet;
+ const unsigned windows_supported = spapr_phb_windows_supported(sphb);
+
+ if (!spapr) {
+ error_setg(errp, TYPE_SPAPR_PCI_HOST_BRIDGE " needs a pseries machine");
+ return;
+ }
+
+ assert(sphb->index != (uint32_t)-1); /* checked in spapr_phb_pre_plug() */
+
+ if (sphb->mem64_win_size != 0) {
+ if (sphb->mem_win_size > SPAPR_PCI_MEM32_WIN_SIZE) {
+ error_setg(errp, "32-bit memory window of size 0x%"HWADDR_PRIx
+ " (max 2 GiB)", sphb->mem_win_size);
+ return;
+ }
+
+ /* 64-bit window defaults to identity mapping */
+ sphb->mem64_win_pciaddr = sphb->mem64_win_addr;
+ } else if (sphb->mem_win_size > SPAPR_PCI_MEM32_WIN_SIZE) {
+ /*
+ * For compatibility with old configuration, if no 64-bit MMIO
+ * window is specified, but the ordinary (32-bit) memory
+ * window is specified as > 2GiB, we treat it as a 2GiB 32-bit
+ * window, with a 64-bit MMIO window following on immediately
+ * afterwards
+ */
+ sphb->mem64_win_size = sphb->mem_win_size - SPAPR_PCI_MEM32_WIN_SIZE;
+ sphb->mem64_win_addr = sphb->mem_win_addr + SPAPR_PCI_MEM32_WIN_SIZE;
+ sphb->mem64_win_pciaddr =
+ SPAPR_PCI_MEM_WIN_BUS_OFFSET + SPAPR_PCI_MEM32_WIN_SIZE;
+ sphb->mem_win_size = SPAPR_PCI_MEM32_WIN_SIZE;
+ }
+
+ if (spapr_pci_find_phb(spapr, sphb->buid)) {
+ SpaprPhbState *s;
+
+ error_setg(errp, "PCI host bridges must have unique indexes");
+ error_append_hint(errp, "The following indexes are already in use:");
+ QLIST_FOREACH(s, &spapr->phbs, list) {
+ error_append_hint(errp, " %d", s->index);
+ }
+ error_append_hint(errp, "\nTry another value for the index property\n");
+ return;
+ }
+
+ if (sphb->numa_node != -1 &&
+ (sphb->numa_node >= MAX_NODES ||
+ !ms->numa_state->nodes[sphb->numa_node].present)) {
+ error_setg(errp, "Invalid NUMA node ID for PCI host bridge");
+ return;
+ }
+
+ sphb->dtbusname = g_strdup_printf("pci@%" PRIx64, sphb->buid);
+
+ /* Initialize memory regions */
+ namebuf = g_strdup_printf("%s.mmio", sphb->dtbusname);
+ memory_region_init(&sphb->memspace, OBJECT(sphb), namebuf, UINT64_MAX);
+ g_free(namebuf);
+
+ namebuf = g_strdup_printf("%s.mmio32-alias", sphb->dtbusname);
+ memory_region_init_alias(&sphb->mem32window, OBJECT(sphb),
+ namebuf, &sphb->memspace,
+ SPAPR_PCI_MEM_WIN_BUS_OFFSET, sphb->mem_win_size);
+ g_free(namebuf);
+ memory_region_add_subregion(get_system_memory(), sphb->mem_win_addr,
+ &sphb->mem32window);
+
+ if (sphb->mem64_win_size != 0) {
+ namebuf = g_strdup_printf("%s.mmio64-alias", sphb->dtbusname);
+ memory_region_init_alias(&sphb->mem64window, OBJECT(sphb),
+ namebuf, &sphb->memspace,
+ sphb->mem64_win_pciaddr, sphb->mem64_win_size);
+ g_free(namebuf);
+
+ memory_region_add_subregion(get_system_memory(),
+ sphb->mem64_win_addr,
+ &sphb->mem64window);
+ }
+
+ /* Initialize IO regions */
+ namebuf = g_strdup_printf("%s.io", sphb->dtbusname);
+ memory_region_init(&sphb->iospace, OBJECT(sphb),
+ namebuf, SPAPR_PCI_IO_WIN_SIZE);
+ g_free(namebuf);
+
+ namebuf = g_strdup_printf("%s.io-alias", sphb->dtbusname);
+ memory_region_init_alias(&sphb->iowindow, OBJECT(sphb), namebuf,
+ &sphb->iospace, 0, SPAPR_PCI_IO_WIN_SIZE);
+ g_free(namebuf);
+ memory_region_add_subregion(get_system_memory(), sphb->io_win_addr,
+ &sphb->iowindow);
+
+ bus = pci_register_root_bus(dev, NULL,
+ pci_spapr_set_irq, pci_swizzle_map_irq_fn, sphb,
+ &sphb->memspace, &sphb->iospace,
+ PCI_DEVFN(0, 0), PCI_NUM_PINS,
+ TYPE_PCI_BUS);
+
+ /*
+ * Despite resembling a vanilla PCI bus in most ways, the PAPR
+ * para-virtualized PCI bus *does* permit PCI-E extended config
+ * space access
+ */
+ if (sphb->pcie_ecs) {
+ bus->flags |= PCI_BUS_EXTENDED_CONFIG_SPACE;
+ }
+ phb->bus = bus;
+ qbus_set_hotplug_handler(BUS(phb->bus), OBJECT(sphb));
+
+ /*
+ * Initialize PHB address space.
+ * By default there will be at least one subregion for default
+ * 32bit DMA window.
+ * Later the guest might want to create another DMA window
+ * which will become another memory subregion.
+ */
+ namebuf = g_strdup_printf("%s.iommu-root", sphb->dtbusname);
+ memory_region_init(&sphb->iommu_root, OBJECT(sphb),
+ namebuf, UINT64_MAX);
+ g_free(namebuf);
+ address_space_init(&sphb->iommu_as, &sphb->iommu_root,
+ sphb->dtbusname);
+
+ /*
+ * As MSI/MSIX interrupts trigger by writing at MSI/MSIX vectors,
+ * we need to allocate some memory to catch those writes coming
+ * from msi_notify()/msix_notify().
+ * As MSIMessage:addr is going to be the same and MSIMessage:data
+ * is going to be a VIRQ number, 4 bytes of the MSI MR will only
+ * be used.
+ *
+ * For KVM we want to ensure that this memory is a full page so that
+ * our memory slot is of page size granularity.
+ */
+ if (kvm_enabled()) {
+ msi_window_size = qemu_real_host_page_size;
+ }
+
+ memory_region_init_io(&sphb->msiwindow, OBJECT(sphb), &spapr_msi_ops, spapr,
+ "msi", msi_window_size);
+ memory_region_add_subregion(&sphb->iommu_root, SPAPR_PCI_MSI_WINDOW,
+ &sphb->msiwindow);
+
+ pci_setup_iommu(bus, spapr_pci_dma_iommu, sphb);
+
+ pci_bus_set_route_irq_fn(bus, spapr_route_intx_pin_to_irq);
+
+ QLIST_INSERT_HEAD(&spapr->phbs, sphb, list);
+
+ /* Initialize the LSI table */
+ for (i = 0; i < PCI_NUM_PINS; i++) {
+ int irq = SPAPR_IRQ_PCI_LSI + sphb->index * PCI_NUM_PINS + i;
+
+ if (smc->legacy_irq_allocation) {
+ irq = spapr_irq_findone(spapr, errp);
+ if (irq < 0) {
+ error_prepend(errp, "can't allocate LSIs: ");
+ /*
+ * Older machines will never support PHB hotplug, ie, this is an
+ * init only path and QEMU will terminate. No need to rollback.
+ */
+ return;
+ }
+ }
+
+ if (spapr_irq_claim(spapr, irq, true, errp) < 0) {
+ error_prepend(errp, "can't allocate LSIs: ");
+ goto unrealize;
+ }
+
+ sphb->lsi_table[i].irq = irq;
+ }
+
+ /* allocate connectors for child PCI devices */
+ add_drcs(sphb, phb->bus);
+
+ /* DMA setup */
+ for (i = 0; i < windows_supported; ++i) {
+ tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn[i]);
+ if (!tcet) {
+ error_setg(errp, "Creating window#%d failed for %s",
+ i, sphb->dtbusname);
+ goto unrealize;
+ }
+ memory_region_add_subregion(&sphb->iommu_root, 0,
+ spapr_tce_get_iommu(tcet));
+ }
+
+ sphb->msi = g_hash_table_new_full(g_int_hash, g_int_equal, g_free,
+ spapr_phb_destroy_msi);
+ return;
+
+unrealize:
+ spapr_phb_unrealize(dev);
+}
+
+static int spapr_phb_children_reset(Object *child, void *opaque)
+{
+ DeviceState *dev = (DeviceState *) object_dynamic_cast(child, TYPE_DEVICE);
+
+ if (dev) {
+ device_legacy_reset(dev);
+ }
+
+ return 0;
+}
+
+void spapr_phb_dma_reset(SpaprPhbState *sphb)
+{
+ int i;
+ SpaprTceTable *tcet;
+
+ for (i = 0; i < SPAPR_PCI_DMA_MAX_WINDOWS; ++i) {
+ tcet = spapr_tce_find_by_liobn(sphb->dma_liobn[i]);
+
+ if (tcet && tcet->nb_table) {
+ spapr_tce_table_disable(tcet);
+ }
+ }
+
+ /* Register default 32bit DMA window */
+ tcet = spapr_tce_find_by_liobn(sphb->dma_liobn[0]);
+ spapr_tce_table_enable(tcet, SPAPR_TCE_PAGE_SHIFT, sphb->dma_win_addr,
+ sphb->dma_win_size >> SPAPR_TCE_PAGE_SHIFT);
+}
+
+static void spapr_phb_reset(DeviceState *qdev)
+{
+ SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(qdev);
+ Error *err = NULL;
+
+ spapr_phb_dma_reset(sphb);
+ spapr_phb_nvgpu_free(sphb);
+ spapr_phb_nvgpu_setup(sphb, &err);
+ if (err) {
+ error_report_err(err);
+ }
+
+ /* Reset the IOMMU state */
+ object_child_foreach(OBJECT(qdev), spapr_phb_children_reset, NULL);
+
+ if (spapr_phb_eeh_available(SPAPR_PCI_HOST_BRIDGE(qdev))) {
+ spapr_phb_vfio_reset(qdev);
+ }
+
+ g_hash_table_remove_all(sphb->msi);
+}
+
+static Property spapr_phb_properties[] = {
+ DEFINE_PROP_UINT32("index", SpaprPhbState, index, -1),
+ DEFINE_PROP_UINT64("mem_win_size", SpaprPhbState, mem_win_size,
+ SPAPR_PCI_MEM32_WIN_SIZE),
+ DEFINE_PROP_UINT64("mem64_win_size", SpaprPhbState, mem64_win_size,
+ SPAPR_PCI_MEM64_WIN_SIZE),
+ DEFINE_PROP_UINT64("io_win_size", SpaprPhbState, io_win_size,
+ SPAPR_PCI_IO_WIN_SIZE),
+ DEFINE_PROP_BOOL("dynamic-reconfiguration", SpaprPhbState, dr_enabled,
+ true),
+ /* Default DMA window is 0..1GB */
+ DEFINE_PROP_UINT64("dma_win_addr", SpaprPhbState, dma_win_addr, 0),
+ DEFINE_PROP_UINT64("dma_win_size", SpaprPhbState, dma_win_size, 0x40000000),
+ DEFINE_PROP_UINT64("dma64_win_addr", SpaprPhbState, dma64_win_addr,
+ 0x800000000000000ULL),
+ DEFINE_PROP_BOOL("ddw", SpaprPhbState, ddw_enabled, true),
+ DEFINE_PROP_UINT64("pgsz", SpaprPhbState, page_size_mask,
+ (1ULL << 12) | (1ULL << 16)
+ | (1ULL << 21) | (1ULL << 24)),
+ DEFINE_PROP_UINT32("numa_node", SpaprPhbState, numa_node, -1),
+ DEFINE_PROP_BOOL("pre-2.8-migration", SpaprPhbState,
+ pre_2_8_migration, false),
+ DEFINE_PROP_BOOL("pcie-extended-configuration-space", SpaprPhbState,
+ pcie_ecs, true),
+ DEFINE_PROP_UINT64("gpa", SpaprPhbState, nv2_gpa_win_addr, 0),
+ DEFINE_PROP_UINT64("atsd", SpaprPhbState, nv2_atsd_win_addr, 0),
+ DEFINE_PROP_BOOL("pre-5.1-associativity", SpaprPhbState,
+ pre_5_1_assoc, false),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static const VMStateDescription vmstate_spapr_pci_lsi = {
+ .name = "spapr_pci/lsi",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT32_EQUAL(irq, SpaprPciLsi, NULL),
+
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+static const VMStateDescription vmstate_spapr_pci_msi = {
+ .name = "spapr_pci/msi",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField []) {
+ VMSTATE_UINT32(key, SpaprPciMsiMig),
+ VMSTATE_UINT32(value.first_irq, SpaprPciMsiMig),
+ VMSTATE_UINT32(value.num, SpaprPciMsiMig),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+static int spapr_pci_pre_save(void *opaque)
+{
+ SpaprPhbState *sphb = opaque;
+ GHashTableIter iter;
+ gpointer key, value;
+ int i;
+
+ if (sphb->pre_2_8_migration) {
+ sphb->mig_liobn = sphb->dma_liobn[0];
+ sphb->mig_mem_win_addr = sphb->mem_win_addr;
+ sphb->mig_mem_win_size = sphb->mem_win_size;
+ sphb->mig_io_win_addr = sphb->io_win_addr;
+ sphb->mig_io_win_size = sphb->io_win_size;
+
+ if ((sphb->mem64_win_size != 0)
+ && (sphb->mem64_win_addr
+ == (sphb->mem_win_addr + sphb->mem_win_size))) {
+ sphb->mig_mem_win_size += sphb->mem64_win_size;
+ }
+ }
+
+ g_free(sphb->msi_devs);
+ sphb->msi_devs = NULL;
+ sphb->msi_devs_num = g_hash_table_size(sphb->msi);
+ if (!sphb->msi_devs_num) {
+ return 0;
+ }
+ sphb->msi_devs = g_new(SpaprPciMsiMig, sphb->msi_devs_num);
+
+ g_hash_table_iter_init(&iter, sphb->msi);
+ for (i = 0; g_hash_table_iter_next(&iter, &key, &value); ++i) {
+ sphb->msi_devs[i].key = *(uint32_t *) key;
+ sphb->msi_devs[i].value = *(SpaprPciMsi *) value;
+ }
+
+ return 0;
+}
+
+static int spapr_pci_post_save(void *opaque)
+{
+ SpaprPhbState *sphb = opaque;
+
+ g_free(sphb->msi_devs);
+ sphb->msi_devs = NULL;
+ sphb->msi_devs_num = 0;
+ return 0;
+}
+
+static int spapr_pci_post_load(void *opaque, int version_id)
+{
+ SpaprPhbState *sphb = opaque;
+ gpointer key, value;
+ int i;
+
+ for (i = 0; i < sphb->msi_devs_num; ++i) {
+ key = g_memdup(&sphb->msi_devs[i].key,
+ sizeof(sphb->msi_devs[i].key));
+ value = g_memdup(&sphb->msi_devs[i].value,
+ sizeof(sphb->msi_devs[i].value));
+ g_hash_table_insert(sphb->msi, key, value);
+ }
+ g_free(sphb->msi_devs);
+ sphb->msi_devs = NULL;
+ sphb->msi_devs_num = 0;
+
+ return 0;
+}
+
+static bool pre_2_8_migration(void *opaque, int version_id)
+{
+ SpaprPhbState *sphb = opaque;
+
+ return sphb->pre_2_8_migration;
+}
+
+static const VMStateDescription vmstate_spapr_pci = {
+ .name = "spapr_pci",
+ .version_id = 2,
+ .minimum_version_id = 2,
+ .pre_save = spapr_pci_pre_save,
+ .post_save = spapr_pci_post_save,
+ .post_load = spapr_pci_post_load,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT64_EQUAL(buid, SpaprPhbState, NULL),
+ VMSTATE_UINT32_TEST(mig_liobn, SpaprPhbState, pre_2_8_migration),
+ VMSTATE_UINT64_TEST(mig_mem_win_addr, SpaprPhbState, pre_2_8_migration),
+ VMSTATE_UINT64_TEST(mig_mem_win_size, SpaprPhbState, pre_2_8_migration),
+ VMSTATE_UINT64_TEST(mig_io_win_addr, SpaprPhbState, pre_2_8_migration),
+ VMSTATE_UINT64_TEST(mig_io_win_size, SpaprPhbState, pre_2_8_migration),
+ VMSTATE_STRUCT_ARRAY(lsi_table, SpaprPhbState, PCI_NUM_PINS, 0,
+ vmstate_spapr_pci_lsi, SpaprPciLsi),
+ VMSTATE_INT32(msi_devs_num, SpaprPhbState),
+ VMSTATE_STRUCT_VARRAY_ALLOC(msi_devs, SpaprPhbState, msi_devs_num, 0,
+ vmstate_spapr_pci_msi, SpaprPciMsiMig),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+static const char *spapr_phb_root_bus_path(PCIHostState *host_bridge,
+ PCIBus *rootbus)
+{
+ SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(host_bridge);
+
+ return sphb->dtbusname;
+}
+
+static void spapr_phb_class_init(ObjectClass *klass, void *data)
+{
+ PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass);
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ HotplugHandlerClass *hp = HOTPLUG_HANDLER_CLASS(klass);
+
+ hc->root_bus_path = spapr_phb_root_bus_path;
+ dc->realize = spapr_phb_realize;
+ dc->unrealize = spapr_phb_unrealize;
+ device_class_set_props(dc, spapr_phb_properties);
+ dc->reset = spapr_phb_reset;
+ dc->vmsd = &vmstate_spapr_pci;
+ /* Supported by TYPE_SPAPR_MACHINE */
+ dc->user_creatable = true;
+ set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
+ hp->pre_plug = spapr_pci_pre_plug;
+ hp->plug = spapr_pci_plug;
+ hp->unplug = spapr_pci_unplug;
+ hp->unplug_request = spapr_pci_unplug_request;
+}
+
+static const TypeInfo spapr_phb_info = {
+ .name = TYPE_SPAPR_PCI_HOST_BRIDGE,
+ .parent = TYPE_PCI_HOST_BRIDGE,
+ .instance_size = sizeof(SpaprPhbState),
+ .instance_finalize = spapr_phb_finalizefn,
+ .class_init = spapr_phb_class_init,
+ .interfaces = (InterfaceInfo[]) {
+ { TYPE_HOTPLUG_HANDLER },
+ { }
+ }
+};
+
+static void spapr_phb_pci_enumerate_bridge(PCIBus *bus, PCIDevice *pdev,
+ void *opaque)
+{
+ unsigned int *bus_no = opaque;
+ PCIBus *sec_bus = NULL;
+
+ if ((pci_default_read_config(pdev, PCI_HEADER_TYPE, 1) !=
+ PCI_HEADER_TYPE_BRIDGE)) {
+ return;
+ }
+
+ (*bus_no)++;
+ pci_default_write_config(pdev, PCI_PRIMARY_BUS, pci_dev_bus_num(pdev), 1);
+ pci_default_write_config(pdev, PCI_SECONDARY_BUS, *bus_no, 1);
+ pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, *bus_no, 1);
+
+ sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(pdev));
+ if (!sec_bus) {
+ return;
+ }
+
+ pci_for_each_device_under_bus(sec_bus, spapr_phb_pci_enumerate_bridge,
+ bus_no);
+ pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, *bus_no, 1);
+}
+
+static void spapr_phb_pci_enumerate(SpaprPhbState *phb)
+{
+ PCIBus *bus = PCI_HOST_BRIDGE(phb)->bus;
+ unsigned int bus_no = 0;
+
+ pci_for_each_device_under_bus(bus, spapr_phb_pci_enumerate_bridge,
+ &bus_no);
+
+}
+
+int spapr_dt_phb(SpaprMachineState *spapr, SpaprPhbState *phb,
+ uint32_t intc_phandle, void *fdt, int *node_offset)
+{
+ int bus_off, i, j, ret;
+ uint32_t bus_range[] = { cpu_to_be32(0), cpu_to_be32(0xff) };
+ struct {
+ uint32_t hi;
+ uint64_t child;
+ uint64_t parent;
+ uint64_t size;
+ } QEMU_PACKED ranges[] = {
+ {
+ cpu_to_be32(b_ss(1)), cpu_to_be64(0),
+ cpu_to_be64(phb->io_win_addr),
+ cpu_to_be64(memory_region_size(&phb->iospace)),
+ },
+ {
+ cpu_to_be32(b_ss(2)), cpu_to_be64(SPAPR_PCI_MEM_WIN_BUS_OFFSET),
+ cpu_to_be64(phb->mem_win_addr),
+ cpu_to_be64(phb->mem_win_size),
+ },
+ {
+ cpu_to_be32(b_ss(3)), cpu_to_be64(phb->mem64_win_pciaddr),
+ cpu_to_be64(phb->mem64_win_addr),
+ cpu_to_be64(phb->mem64_win_size),
+ },
+ };
+ const unsigned sizeof_ranges =
+ (phb->mem64_win_size ? 3 : 2) * sizeof(ranges[0]);
+ uint64_t bus_reg[] = { cpu_to_be64(phb->buid), 0 };
+ uint32_t interrupt_map_mask[] = {
+ cpu_to_be32(b_ddddd(-1)|b_fff(0)), 0x0, 0x0, cpu_to_be32(-1)};
+ uint32_t interrupt_map[PCI_SLOT_MAX * PCI_NUM_PINS][7];
+ uint32_t ddw_applicable[] = {
+ cpu_to_be32(RTAS_IBM_QUERY_PE_DMA_WINDOW),
+ cpu_to_be32(RTAS_IBM_CREATE_PE_DMA_WINDOW),
+ cpu_to_be32(RTAS_IBM_REMOVE_PE_DMA_WINDOW)
+ };
+ uint32_t ddw_extensions[] = {
+ cpu_to_be32(1),
+ cpu_to_be32(RTAS_IBM_RESET_PE_DMA_WINDOW)
+ };
+ SpaprTceTable *tcet;
+ SpaprDrc *drc;
+ Error *err = NULL;
+
+ /* Start populating the FDT */
+ _FDT(bus_off = fdt_add_subnode(fdt, 0, phb->dtbusname));
+ if (node_offset) {
+ *node_offset = bus_off;
+ }
+
+ /* Write PHB properties */
+ _FDT(fdt_setprop_string(fdt, bus_off, "device_type", "pci"));
+ _FDT(fdt_setprop_string(fdt, bus_off, "compatible", "IBM,Logical_PHB"));
+ _FDT(fdt_setprop_cell(fdt, bus_off, "#interrupt-cells", 0x1));
+ _FDT(fdt_setprop(fdt, bus_off, "used-by-rtas", NULL, 0));
+ _FDT(fdt_setprop(fdt, bus_off, "bus-range", &bus_range, sizeof(bus_range)));
+ _FDT(fdt_setprop(fdt, bus_off, "ranges", &ranges, sizeof_ranges));
+ _FDT(fdt_setprop(fdt, bus_off, "reg", &bus_reg, sizeof(bus_reg)));
+ _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pci-config-space-type", 0x1));
+ _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pe-total-#msi",
+ spapr_irq_nr_msis(spapr)));
+
+ /* Dynamic DMA window */
+ if (phb->ddw_enabled) {
+ _FDT(fdt_setprop(fdt, bus_off, "ibm,ddw-applicable", &ddw_applicable,
+ sizeof(ddw_applicable)));
+ _FDT(fdt_setprop(fdt, bus_off, "ibm,ddw-extensions",
+ &ddw_extensions, sizeof(ddw_extensions)));
+ }
+
+ /* Advertise NUMA via ibm,associativity */
+ if (phb->numa_node != -1) {
+ spapr_numa_write_associativity_dt(spapr, fdt, bus_off, phb->numa_node);
+ }
+
+ /* Build the interrupt-map, this must matches what is done
+ * in pci_swizzle_map_irq_fn
+ */
+ _FDT(fdt_setprop(fdt, bus_off, "interrupt-map-mask",
+ &interrupt_map_mask, sizeof(interrupt_map_mask)));
+ for (i = 0; i < PCI_SLOT_MAX; i++) {
+ for (j = 0; j < PCI_NUM_PINS; j++) {
+ uint32_t *irqmap = interrupt_map[i*PCI_NUM_PINS + j];
+ int lsi_num = pci_swizzle(i, j);
+
+ irqmap[0] = cpu_to_be32(b_ddddd(i)|b_fff(0));
+ irqmap[1] = 0;
+ irqmap[2] = 0;
+ irqmap[3] = cpu_to_be32(j+1);
+ irqmap[4] = cpu_to_be32(intc_phandle);
+ spapr_dt_irq(&irqmap[5], phb->lsi_table[lsi_num].irq, true);
+ }
+ }
+ /* Write interrupt map */
+ _FDT(fdt_setprop(fdt, bus_off, "interrupt-map", &interrupt_map,
+ sizeof(interrupt_map)));
+
+ tcet = spapr_tce_find_by_liobn(phb->dma_liobn[0]);
+ if (!tcet) {
+ return -1;
+ }
+ spapr_dma_dt(fdt, bus_off, "ibm,dma-window",
+ tcet->liobn, tcet->bus_offset,
+ tcet->nb_table << tcet->page_shift);
+
+ drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PHB, phb->index);
+ if (drc) {
+ uint32_t drc_index = cpu_to_be32(spapr_drc_index(drc));
+
+ _FDT(fdt_setprop(fdt, bus_off, "ibm,my-drc-index", &drc_index,
+ sizeof(drc_index)));
+ }
+
+ /* Walk the bridges and program the bus numbers*/
+ spapr_phb_pci_enumerate(phb);
+ _FDT(fdt_setprop_cell(fdt, bus_off, "qemu,phb-enumerated", 0x1));
+
+ /* Walk the bridge and subordinate buses */
+ ret = spapr_dt_pci_bus(phb, PCI_HOST_BRIDGE(phb)->bus, fdt, bus_off);
+ if (ret < 0) {
+ return ret;
+ }
+
+ spapr_phb_nvgpu_populate_dt(phb, fdt, bus_off, &err);
+ if (err) {
+ error_report_err(err);
+ }
+ spapr_phb_nvgpu_ram_populate_dt(phb, fdt);
+
+ return 0;
+}
+
+void spapr_pci_rtas_init(void)
+{
+ spapr_rtas_register(RTAS_READ_PCI_CONFIG, "read-pci-config",
+ rtas_read_pci_config);
+ spapr_rtas_register(RTAS_WRITE_PCI_CONFIG, "write-pci-config",
+ rtas_write_pci_config);
+ spapr_rtas_register(RTAS_IBM_READ_PCI_CONFIG, "ibm,read-pci-config",
+ rtas_ibm_read_pci_config);
+ spapr_rtas_register(RTAS_IBM_WRITE_PCI_CONFIG, "ibm,write-pci-config",
+ rtas_ibm_write_pci_config);
+ if (msi_nonbroken) {
+ spapr_rtas_register(RTAS_IBM_QUERY_INTERRUPT_SOURCE_NUMBER,
+ "ibm,query-interrupt-source-number",
+ rtas_ibm_query_interrupt_source_number);
+ spapr_rtas_register(RTAS_IBM_CHANGE_MSI, "ibm,change-msi",
+ rtas_ibm_change_msi);
+ }
+
+ spapr_rtas_register(RTAS_IBM_SET_EEH_OPTION,
+ "ibm,set-eeh-option",
+ rtas_ibm_set_eeh_option);
+ spapr_rtas_register(RTAS_IBM_GET_CONFIG_ADDR_INFO2,
+ "ibm,get-config-addr-info2",
+ rtas_ibm_get_config_addr_info2);
+ spapr_rtas_register(RTAS_IBM_READ_SLOT_RESET_STATE2,
+ "ibm,read-slot-reset-state2",
+ rtas_ibm_read_slot_reset_state2);
+ spapr_rtas_register(RTAS_IBM_SET_SLOT_RESET,
+ "ibm,set-slot-reset",
+ rtas_ibm_set_slot_reset);
+ spapr_rtas_register(RTAS_IBM_CONFIGURE_PE,
+ "ibm,configure-pe",
+ rtas_ibm_configure_pe);
+ spapr_rtas_register(RTAS_IBM_SLOT_ERROR_DETAIL,
+ "ibm,slot-error-detail",
+ rtas_ibm_slot_error_detail);
+}
+
+static void spapr_pci_register_types(void)
+{
+ type_register_static(&spapr_phb_info);
+}
+
+type_init(spapr_pci_register_types)
+
+static int spapr_switch_one_vga(DeviceState *dev, void *opaque)
+{
+ bool be = *(bool *)opaque;
+
+ if (object_dynamic_cast(OBJECT(dev), "VGA")
+ || object_dynamic_cast(OBJECT(dev), "secondary-vga")
+ || object_dynamic_cast(OBJECT(dev), "bochs-display")
+ || object_dynamic_cast(OBJECT(dev), "virtio-vga")) {
+ object_property_set_bool(OBJECT(dev), "big-endian-framebuffer", be,
+ &error_abort);
+ }
+ return 0;
+}
+
+void spapr_pci_switch_vga(SpaprMachineState *spapr, bool big_endian)
+{
+ SpaprPhbState *sphb;
+
+ /*
+ * For backward compatibility with existing guests, we switch
+ * the endianness of the VGA controller when changing the guest
+ * interrupt mode
+ */
+ QLIST_FOREACH(sphb, &spapr->phbs, list) {
+ BusState *bus = &PCI_HOST_BRIDGE(sphb)->bus->qbus;
+ qbus_walk_children(bus, spapr_switch_one_vga, NULL, NULL, NULL,
+ &big_endian);
+ }
+}
diff --git a/hw/ppc/spapr_pci_nvlink2.c b/hw/ppc/spapr_pci_nvlink2.c
new file mode 100644
index 000000000..7fb0cf4d0
--- /dev/null
+++ b/hw/ppc/spapr_pci_nvlink2.c
@@ -0,0 +1,445 @@
+/*
+ * QEMU sPAPR PCI for NVLink2 pass through
+ *
+ * Copyright (c) 2019 Alexey Kardashevskiy, IBM Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu-common.h"
+#include "hw/pci/pci.h"
+#include "hw/pci-host/spapr.h"
+#include "hw/ppc/spapr_numa.h"
+#include "qemu/error-report.h"
+#include "hw/ppc/fdt.h"
+#include "hw/pci/pci_bridge.h"
+
+#define PHANDLE_PCIDEV(phb, pdev) (0x12000000 | \
+ (((phb)->index) << 16) | ((pdev)->devfn))
+#define PHANDLE_GPURAM(phb, n) (0x110000FF | ((n) << 8) | \
+ (((phb)->index) << 16))
+#define PHANDLE_NVLINK(phb, gn, nn) (0x00130000 | (((phb)->index) << 8) | \
+ ((gn) << 4) | (nn))
+
+typedef struct SpaprPhbPciNvGpuSlot {
+ uint64_t tgt;
+ uint64_t gpa;
+ unsigned numa_id;
+ PCIDevice *gpdev;
+ int linknum;
+ struct {
+ uint64_t atsd_gpa;
+ PCIDevice *npdev;
+ uint32_t link_speed;
+ } links[NVGPU_MAX_LINKS];
+} SpaprPhbPciNvGpuSlot;
+
+struct SpaprPhbPciNvGpuConfig {
+ uint64_t nv2_ram_current;
+ uint64_t nv2_atsd_current;
+ int num; /* number of non empty (i.e. tgt!=0) entries in slots[] */
+ SpaprPhbPciNvGpuSlot slots[NVGPU_MAX_NUM];
+ Error *err;
+};
+
+static SpaprPhbPciNvGpuSlot *
+spapr_nvgpu_get_slot(SpaprPhbPciNvGpuConfig *nvgpus, uint64_t tgt)
+{
+ int i;
+
+ /* Search for partially collected "slot" */
+ for (i = 0; i < nvgpus->num; ++i) {
+ if (nvgpus->slots[i].tgt == tgt) {
+ return &nvgpus->slots[i];
+ }
+ }
+
+ if (nvgpus->num == ARRAY_SIZE(nvgpus->slots)) {
+ return NULL;
+ }
+
+ i = nvgpus->num;
+ nvgpus->slots[i].tgt = tgt;
+ ++nvgpus->num;
+
+ return &nvgpus->slots[i];
+}
+
+static void spapr_pci_collect_nvgpu(SpaprPhbPciNvGpuConfig *nvgpus,
+ PCIDevice *pdev, uint64_t tgt,
+ MemoryRegion *mr, Error **errp)
+{
+ MachineState *machine = MACHINE(qdev_get_machine());
+ SpaprMachineState *spapr = SPAPR_MACHINE(machine);
+ SpaprPhbPciNvGpuSlot *nvslot = spapr_nvgpu_get_slot(nvgpus, tgt);
+
+ if (!nvslot) {
+ error_setg(errp, "Found too many GPUs per vPHB");
+ return;
+ }
+ g_assert(!nvslot->gpdev);
+ nvslot->gpdev = pdev;
+
+ nvslot->gpa = nvgpus->nv2_ram_current;
+ nvgpus->nv2_ram_current += memory_region_size(mr);
+ nvslot->numa_id = spapr->gpu_numa_id;
+ ++spapr->gpu_numa_id;
+}
+
+static void spapr_pci_collect_nvnpu(SpaprPhbPciNvGpuConfig *nvgpus,
+ PCIDevice *pdev, uint64_t tgt,
+ MemoryRegion *mr, Error **errp)
+{
+ SpaprPhbPciNvGpuSlot *nvslot = spapr_nvgpu_get_slot(nvgpus, tgt);
+ int j;
+
+ if (!nvslot) {
+ error_setg(errp, "Found too many NVLink bridges per vPHB");
+ return;
+ }
+
+ j = nvslot->linknum;
+ if (j == ARRAY_SIZE(nvslot->links)) {
+ error_setg(errp, "Found too many NVLink bridges per GPU");
+ return;
+ }
+ ++nvslot->linknum;
+
+ g_assert(!nvslot->links[j].npdev);
+ nvslot->links[j].npdev = pdev;
+ nvslot->links[j].atsd_gpa = nvgpus->nv2_atsd_current;
+ nvgpus->nv2_atsd_current += memory_region_size(mr);
+ nvslot->links[j].link_speed =
+ object_property_get_uint(OBJECT(pdev), "nvlink2-link-speed", NULL);
+}
+
+static void spapr_phb_pci_collect_nvgpu(PCIBus *bus, PCIDevice *pdev,
+ void *opaque)
+{
+ PCIBus *sec_bus;
+ Object *po = OBJECT(pdev);
+ uint64_t tgt = object_property_get_uint(po, "nvlink2-tgt", NULL);
+
+ if (tgt) {
+ Error *local_err = NULL;
+ SpaprPhbPciNvGpuConfig *nvgpus = opaque;
+ Object *mr_gpu = object_property_get_link(po, "nvlink2-mr[0]", NULL);
+ Object *mr_npu = object_property_get_link(po, "nvlink2-atsd-mr[0]",
+ NULL);
+
+ g_assert(mr_gpu || mr_npu);
+ if (mr_gpu) {
+ spapr_pci_collect_nvgpu(nvgpus, pdev, tgt, MEMORY_REGION(mr_gpu),
+ &local_err);
+ } else {
+ spapr_pci_collect_nvnpu(nvgpus, pdev, tgt, MEMORY_REGION(mr_npu),
+ &local_err);
+ }
+ error_propagate(&nvgpus->err, local_err);
+ }
+ if ((pci_default_read_config(pdev, PCI_HEADER_TYPE, 1) !=
+ PCI_HEADER_TYPE_BRIDGE)) {
+ return;
+ }
+
+ sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(pdev));
+ if (!sec_bus) {
+ return;
+ }
+
+ pci_for_each_device_under_bus(sec_bus, spapr_phb_pci_collect_nvgpu, opaque);
+}
+
+void spapr_phb_nvgpu_setup(SpaprPhbState *sphb, Error **errp)
+{
+ int i, j, valid_gpu_num;
+ PCIBus *bus;
+
+ /* Search for GPUs and NPUs */
+ if (!sphb->nv2_gpa_win_addr || !sphb->nv2_atsd_win_addr) {
+ return;
+ }
+
+ sphb->nvgpus = g_new0(SpaprPhbPciNvGpuConfig, 1);
+ sphb->nvgpus->nv2_ram_current = sphb->nv2_gpa_win_addr;
+ sphb->nvgpus->nv2_atsd_current = sphb->nv2_atsd_win_addr;
+
+ bus = PCI_HOST_BRIDGE(sphb)->bus;
+ pci_for_each_device_under_bus(bus, spapr_phb_pci_collect_nvgpu,
+ sphb->nvgpus);
+
+ if (sphb->nvgpus->err) {
+ error_propagate(errp, sphb->nvgpus->err);
+ sphb->nvgpus->err = NULL;
+ goto cleanup_exit;
+ }
+
+ /* Add found GPU RAM and ATSD MRs if found */
+ for (i = 0, valid_gpu_num = 0; i < sphb->nvgpus->num; ++i) {
+ Object *nvmrobj;
+ SpaprPhbPciNvGpuSlot *nvslot = &sphb->nvgpus->slots[i];
+
+ if (!nvslot->gpdev) {
+ continue;
+ }
+ nvmrobj = object_property_get_link(OBJECT(nvslot->gpdev),
+ "nvlink2-mr[0]", NULL);
+ /* ATSD is pointless without GPU RAM MR so skip those */
+ if (!nvmrobj) {
+ continue;
+ }
+
+ ++valid_gpu_num;
+ memory_region_add_subregion(get_system_memory(), nvslot->gpa,
+ MEMORY_REGION(nvmrobj));
+
+ for (j = 0; j < nvslot->linknum; ++j) {
+ Object *atsdmrobj;
+
+ atsdmrobj = object_property_get_link(OBJECT(nvslot->links[j].npdev),
+ "nvlink2-atsd-mr[0]", NULL);
+ if (!atsdmrobj) {
+ continue;
+ }
+ memory_region_add_subregion(get_system_memory(),
+ nvslot->links[j].atsd_gpa,
+ MEMORY_REGION(atsdmrobj));
+ }
+ }
+
+ if (valid_gpu_num) {
+ return;
+ }
+ /* We did not find any interesting GPU */
+cleanup_exit:
+ g_free(sphb->nvgpus);
+ sphb->nvgpus = NULL;
+}
+
+void spapr_phb_nvgpu_free(SpaprPhbState *sphb)
+{
+ int i, j;
+
+ if (!sphb->nvgpus) {
+ return;
+ }
+
+ for (i = 0; i < sphb->nvgpus->num; ++i) {
+ SpaprPhbPciNvGpuSlot *nvslot = &sphb->nvgpus->slots[i];
+ Object *nv_mrobj = object_property_get_link(OBJECT(nvslot->gpdev),
+ "nvlink2-mr[0]", NULL);
+
+ if (nv_mrobj) {
+ memory_region_del_subregion(get_system_memory(),
+ MEMORY_REGION(nv_mrobj));
+ }
+ for (j = 0; j < nvslot->linknum; ++j) {
+ PCIDevice *npdev = nvslot->links[j].npdev;
+ Object *atsd_mrobj;
+ atsd_mrobj = object_property_get_link(OBJECT(npdev),
+ "nvlink2-atsd-mr[0]", NULL);
+ if (atsd_mrobj) {
+ memory_region_del_subregion(get_system_memory(),
+ MEMORY_REGION(atsd_mrobj));
+ }
+ }
+ }
+ g_free(sphb->nvgpus);
+ sphb->nvgpus = NULL;
+}
+
+void spapr_phb_nvgpu_populate_dt(SpaprPhbState *sphb, void *fdt, int bus_off,
+ Error **errp)
+{
+ int i, j, atsdnum = 0;
+ uint64_t atsd[8]; /* The existing limitation of known guests */
+
+ if (!sphb->nvgpus) {
+ return;
+ }
+
+ for (i = 0; (i < sphb->nvgpus->num) && (atsdnum < ARRAY_SIZE(atsd)); ++i) {
+ SpaprPhbPciNvGpuSlot *nvslot = &sphb->nvgpus->slots[i];
+
+ if (!nvslot->gpdev) {
+ continue;
+ }
+ for (j = 0; j < nvslot->linknum; ++j) {
+ if (!nvslot->links[j].atsd_gpa) {
+ continue;
+ }
+
+ if (atsdnum == ARRAY_SIZE(atsd)) {
+ error_report("Only %"PRIuPTR" ATSD registers supported",
+ ARRAY_SIZE(atsd));
+ break;
+ }
+ atsd[atsdnum] = cpu_to_be64(nvslot->links[j].atsd_gpa);
+ ++atsdnum;
+ }
+ }
+
+ if (!atsdnum) {
+ error_setg(errp, "No ATSD registers found");
+ return;
+ }
+
+ if (!spapr_phb_eeh_available(sphb)) {
+ /*
+ * ibm,mmio-atsd contains ATSD registers; these belong to an NPU PHB
+ * which we do not emulate as a separate device. Instead we put
+ * ibm,mmio-atsd to the vPHB with GPU and make sure that we do not
+ * put GPUs from different IOMMU groups to the same vPHB to ensure
+ * that the guest will use ATSDs from the corresponding NPU.
+ */
+ error_setg(errp, "ATSD requires separate vPHB per GPU IOMMU group");
+ return;
+ }
+
+ _FDT((fdt_setprop(fdt, bus_off, "ibm,mmio-atsd", atsd,
+ atsdnum * sizeof(atsd[0]))));
+}
+
+void spapr_phb_nvgpu_ram_populate_dt(SpaprPhbState *sphb, void *fdt)
+{
+ int i, j, linkidx, npuoff;
+ char *npuname;
+
+ if (!sphb->nvgpus) {
+ return;
+ }
+
+ npuname = g_strdup_printf("npuphb%d", sphb->index);
+ npuoff = fdt_add_subnode(fdt, 0, npuname);
+ _FDT(npuoff);
+ _FDT(fdt_setprop_cell(fdt, npuoff, "#address-cells", 1));
+ _FDT(fdt_setprop_cell(fdt, npuoff, "#size-cells", 0));
+ /* Advertise NPU as POWER9 so the guest can enable NPU2 contexts */
+ _FDT((fdt_setprop_string(fdt, npuoff, "compatible", "ibm,power9-npu")));
+ g_free(npuname);
+
+ for (i = 0, linkidx = 0; i < sphb->nvgpus->num; ++i) {
+ for (j = 0; j < sphb->nvgpus->slots[i].linknum; ++j) {
+ char *linkname = g_strdup_printf("link@%d", linkidx);
+ int off = fdt_add_subnode(fdt, npuoff, linkname);
+
+ _FDT(off);
+ /* _FDT((fdt_setprop_cell(fdt, off, "reg", linkidx))); */
+ _FDT((fdt_setprop_string(fdt, off, "compatible",
+ "ibm,npu-link")));
+ _FDT((fdt_setprop_cell(fdt, off, "phandle",
+ PHANDLE_NVLINK(sphb, i, j))));
+ _FDT((fdt_setprop_cell(fdt, off, "ibm,npu-link-index", linkidx)));
+ g_free(linkname);
+ ++linkidx;
+ }
+ }
+
+ /* Add memory nodes for GPU RAM and mark them unusable */
+ for (i = 0; i < sphb->nvgpus->num; ++i) {
+ SpaprPhbPciNvGpuSlot *nvslot = &sphb->nvgpus->slots[i];
+ Object *nv_mrobj = object_property_get_link(OBJECT(nvslot->gpdev),
+ "nvlink2-mr[0]",
+ &error_abort);
+ uint64_t size = object_property_get_uint(nv_mrobj, "size", NULL);
+ uint64_t mem_reg[2] = { cpu_to_be64(nvslot->gpa), cpu_to_be64(size) };
+ char *mem_name = g_strdup_printf("memory@%"PRIx64, nvslot->gpa);
+ int off = fdt_add_subnode(fdt, 0, mem_name);
+
+ _FDT(off);
+ _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
+ _FDT((fdt_setprop(fdt, off, "reg", mem_reg, sizeof(mem_reg))));
+
+ spapr_numa_write_associativity_dt(SPAPR_MACHINE(qdev_get_machine()),
+ fdt, off, nvslot->numa_id);
+
+ _FDT((fdt_setprop_string(fdt, off, "compatible",
+ "ibm,coherent-device-memory")));
+
+ mem_reg[1] = cpu_to_be64(0);
+ _FDT((fdt_setprop(fdt, off, "linux,usable-memory", mem_reg,
+ sizeof(mem_reg))));
+ _FDT((fdt_setprop_cell(fdt, off, "phandle",
+ PHANDLE_GPURAM(sphb, i))));
+ g_free(mem_name);
+ }
+
+}
+
+void spapr_phb_nvgpu_populate_pcidev_dt(PCIDevice *dev, void *fdt, int offset,
+ SpaprPhbState *sphb)
+{
+ int i, j;
+
+ if (!sphb->nvgpus) {
+ return;
+ }
+
+ for (i = 0; i < sphb->nvgpus->num; ++i) {
+ SpaprPhbPciNvGpuSlot *nvslot = &sphb->nvgpus->slots[i];
+
+ /* Skip "slot" without attached GPU */
+ if (!nvslot->gpdev) {
+ continue;
+ }
+ if (dev == nvslot->gpdev) {
+ uint32_t npus[nvslot->linknum];
+
+ for (j = 0; j < nvslot->linknum; ++j) {
+ PCIDevice *npdev = nvslot->links[j].npdev;
+
+ npus[j] = cpu_to_be32(PHANDLE_PCIDEV(sphb, npdev));
+ }
+ _FDT(fdt_setprop(fdt, offset, "ibm,npu", npus,
+ j * sizeof(npus[0])));
+ _FDT((fdt_setprop_cell(fdt, offset, "phandle",
+ PHANDLE_PCIDEV(sphb, dev))));
+ continue;
+ }
+
+ for (j = 0; j < nvslot->linknum; ++j) {
+ if (dev != nvslot->links[j].npdev) {
+ continue;
+ }
+
+ _FDT((fdt_setprop_cell(fdt, offset, "phandle",
+ PHANDLE_PCIDEV(sphb, dev))));
+ _FDT(fdt_setprop_cell(fdt, offset, "ibm,gpu",
+ PHANDLE_PCIDEV(sphb, nvslot->gpdev)));
+ _FDT((fdt_setprop_cell(fdt, offset, "ibm,nvlink",
+ PHANDLE_NVLINK(sphb, i, j))));
+ /*
+ * If we ever want to emulate GPU RAM at the same location as on
+ * the host - here is the encoding GPA->TGT:
+ *
+ * gta = ((sphb->nv2_gpa >> 42) & 0x1) << 42;
+ * gta |= ((sphb->nv2_gpa >> 45) & 0x3) << 43;
+ * gta |= ((sphb->nv2_gpa >> 49) & 0x3) << 45;
+ * gta |= sphb->nv2_gpa & ((1UL << 43) - 1);
+ */
+ _FDT(fdt_setprop_cell(fdt, offset, "memory-region",
+ PHANDLE_GPURAM(sphb, i)));
+ _FDT(fdt_setprop_u64(fdt, offset, "ibm,device-tgt-addr",
+ nvslot->tgt));
+ _FDT(fdt_setprop_cell(fdt, offset, "ibm,nvlink-speed",
+ nvslot->links[j].link_speed));
+ }
+ }
+}
diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c
new file mode 100644
index 000000000..2a76b4e0b
--- /dev/null
+++ b/hw/ppc/spapr_pci_vfio.c
@@ -0,0 +1,217 @@
+/*
+ * QEMU sPAPR PCI host for VFIO
+ *
+ * Copyright (c) 2011-2014 Alexey Kardashevskiy, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License,
+ * or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include <linux/vfio.h>
+#include "hw/ppc/spapr.h"
+#include "hw/pci-host/spapr.h"
+#include "hw/pci/msix.h"
+#include "hw/vfio/vfio.h"
+#include "qemu/error-report.h"
+
+bool spapr_phb_eeh_available(SpaprPhbState *sphb)
+{
+ return vfio_eeh_as_ok(&sphb->iommu_as);
+}
+
+static void spapr_phb_vfio_eeh_reenable(SpaprPhbState *sphb)
+{
+ vfio_eeh_as_op(&sphb->iommu_as, VFIO_EEH_PE_ENABLE);
+}
+
+void spapr_phb_vfio_reset(DeviceState *qdev)
+{
+ /*
+ * The PE might be in frozen state. To reenable the EEH
+ * functionality on it will clean the frozen state, which
+ * ensures that the contained PCI devices will work properly
+ * after reboot.
+ */
+ spapr_phb_vfio_eeh_reenable(SPAPR_PCI_HOST_BRIDGE(qdev));
+}
+
+static void spapr_eeh_pci_find_device(PCIBus *bus, PCIDevice *pdev,
+ void *opaque)
+{
+ bool *found = opaque;
+
+ if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) {
+ *found = true;
+ }
+}
+
+int spapr_phb_vfio_eeh_set_option(SpaprPhbState *sphb,
+ unsigned int addr, int option)
+{
+ uint32_t op;
+ int ret;
+
+ switch (option) {
+ case RTAS_EEH_DISABLE:
+ op = VFIO_EEH_PE_DISABLE;
+ break;
+ case RTAS_EEH_ENABLE: {
+ PCIHostState *phb;
+ bool found = false;
+
+ /*
+ * The EEH functionality is enabled per sphb level instead of
+ * per PCI device. We have already identified this specific sphb
+ * based on buid passed as argument to ibm,set-eeh-option rtas
+ * call. Now we just need to check the validity of the PCI
+ * pass-through devices (vfio-pci) under this sphb bus.
+ * We have already validated that all the devices under this sphb
+ * are from same iommu group (within same PE) before comming here.
+ *
+ * Prior to linux commit 98ba956f6a389 ("powerpc/pseries/eeh:
+ * Rework device EEH PE determination") kernel would call
+ * eeh-set-option for each device in the PE using the device's
+ * config_address as the argument rather than the PE address.
+ * Hence if we check validity of supplied config_addr whether
+ * it matches to this PHB will cause issues with older kernel
+ * versions v5.9 and older. If we return an error from
+ * eeh-set-option when the argument isn't a valid PE address
+ * then older kernels (v5.9 and older) will interpret that as
+ * EEH not being supported.
+ */
+ phb = PCI_HOST_BRIDGE(sphb);
+ pci_for_each_device(phb->bus, (addr >> 16) & 0xFF,
+ spapr_eeh_pci_find_device, &found);
+
+ if (!found) {
+ return RTAS_OUT_PARAM_ERROR;
+ }
+
+ op = VFIO_EEH_PE_ENABLE;
+ break;
+ }
+ case RTAS_EEH_THAW_IO:
+ op = VFIO_EEH_PE_UNFREEZE_IO;
+ break;
+ case RTAS_EEH_THAW_DMA:
+ op = VFIO_EEH_PE_UNFREEZE_DMA;
+ break;
+ default:
+ return RTAS_OUT_PARAM_ERROR;
+ }
+
+ ret = vfio_eeh_as_op(&sphb->iommu_as, op);
+ if (ret < 0) {
+ return RTAS_OUT_HW_ERROR;
+ }
+
+ return RTAS_OUT_SUCCESS;
+}
+
+int spapr_phb_vfio_eeh_get_state(SpaprPhbState *sphb, int *state)
+{
+ int ret;
+
+ ret = vfio_eeh_as_op(&sphb->iommu_as, VFIO_EEH_PE_GET_STATE);
+ if (ret < 0) {
+ return RTAS_OUT_PARAM_ERROR;
+ }
+
+ *state = ret;
+ return RTAS_OUT_SUCCESS;
+}
+
+static void spapr_phb_vfio_eeh_clear_dev_msix(PCIBus *bus,
+ PCIDevice *pdev,
+ void *opaque)
+{
+ /* Check if the device is VFIO PCI device */
+ if (!object_dynamic_cast(OBJECT(pdev), "vfio-pci")) {
+ return;
+ }
+
+ /*
+ * The MSIx table will be cleaned out by reset. We need
+ * disable it so that it can be reenabled properly. Also,
+ * the cached MSIx table should be cleared as it's not
+ * reflecting the contents in hardware.
+ */
+ if (msix_enabled(pdev)) {
+ uint16_t flags;
+
+ flags = pci_host_config_read_common(pdev,
+ pdev->msix_cap + PCI_MSIX_FLAGS,
+ pci_config_size(pdev), 2);
+ flags &= ~PCI_MSIX_FLAGS_ENABLE;
+ pci_host_config_write_common(pdev,
+ pdev->msix_cap + PCI_MSIX_FLAGS,
+ pci_config_size(pdev), flags, 2);
+ }
+
+ msix_reset(pdev);
+}
+
+static void spapr_phb_vfio_eeh_clear_bus_msix(PCIBus *bus, void *opaque)
+{
+ pci_for_each_device_under_bus(bus, spapr_phb_vfio_eeh_clear_dev_msix,
+ NULL);
+}
+
+static void spapr_phb_vfio_eeh_pre_reset(SpaprPhbState *sphb)
+{
+ PCIHostState *phb = PCI_HOST_BRIDGE(sphb);
+
+ pci_for_each_bus(phb->bus, spapr_phb_vfio_eeh_clear_bus_msix, NULL);
+}
+
+int spapr_phb_vfio_eeh_reset(SpaprPhbState *sphb, int option)
+{
+ uint32_t op;
+ int ret;
+
+ switch (option) {
+ case RTAS_SLOT_RESET_DEACTIVATE:
+ op = VFIO_EEH_PE_RESET_DEACTIVATE;
+ break;
+ case RTAS_SLOT_RESET_HOT:
+ spapr_phb_vfio_eeh_pre_reset(sphb);
+ op = VFIO_EEH_PE_RESET_HOT;
+ break;
+ case RTAS_SLOT_RESET_FUNDAMENTAL:
+ spapr_phb_vfio_eeh_pre_reset(sphb);
+ op = VFIO_EEH_PE_RESET_FUNDAMENTAL;
+ break;
+ default:
+ return RTAS_OUT_PARAM_ERROR;
+ }
+
+ ret = vfio_eeh_as_op(&sphb->iommu_as, op);
+ if (ret < 0) {
+ return RTAS_OUT_HW_ERROR;
+ }
+
+ return RTAS_OUT_SUCCESS;
+}
+
+int spapr_phb_vfio_eeh_configure(SpaprPhbState *sphb)
+{
+ int ret;
+
+ ret = vfio_eeh_as_op(&sphb->iommu_as, VFIO_EEH_PE_CONFIGURE);
+ if (ret < 0) {
+ return RTAS_OUT_PARAM_ERROR;
+ }
+
+ return RTAS_OUT_SUCCESS;
+}
diff --git a/hw/ppc/spapr_rng.c b/hw/ppc/spapr_rng.c
new file mode 100644
index 000000000..df5c4b968
--- /dev/null
+++ b/hw/ppc/spapr_rng.c
@@ -0,0 +1,162 @@
+/*
+ * QEMU sPAPR random number generator "device" for H_RANDOM hypercall
+ *
+ * Copyright 2015 Thomas Huth, Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License,
+ * or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "qemu/main-loop.h"
+#include "qemu/module.h"
+#include "sysemu/device_tree.h"
+#include "sysemu/rng.h"
+#include "hw/ppc/spapr.h"
+#include "hw/qdev-properties.h"
+#include "kvm_ppc.h"
+#include "qom/object.h"
+
+OBJECT_DECLARE_SIMPLE_TYPE(SpaprRngState, SPAPR_RNG)
+
+struct SpaprRngState {
+ /*< private >*/
+ DeviceState ds;
+ RngBackend *backend;
+ bool use_kvm;
+};
+
+struct HRandomData {
+ QemuSemaphore sem;
+ union {
+ uint64_t v64;
+ uint8_t v8[8];
+ } val;
+ int received;
+};
+typedef struct HRandomData HRandomData;
+
+/* Callback function for the RngBackend */
+static void random_recv(void *dest, const void *src, size_t size)
+{
+ HRandomData *hrdp = dest;
+
+ if (src && size > 0) {
+ assert(size + hrdp->received <= sizeof(hrdp->val.v8));
+ memcpy(&hrdp->val.v8[hrdp->received], src, size);
+ hrdp->received += size;
+ }
+
+ qemu_sem_post(&hrdp->sem);
+}
+
+/* Handler for the H_RANDOM hypercall */
+static target_ulong h_random(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ target_ulong opcode, target_ulong *args)
+{
+ SpaprRngState *rngstate;
+ HRandomData hrdata;
+
+ rngstate = SPAPR_RNG(object_resolve_path_type("", TYPE_SPAPR_RNG, NULL));
+
+ if (!rngstate || !rngstate->backend) {
+ return H_HARDWARE;
+ }
+
+ qemu_sem_init(&hrdata.sem, 0);
+ hrdata.val.v64 = 0;
+ hrdata.received = 0;
+
+ while (hrdata.received < 8) {
+ rng_backend_request_entropy(rngstate->backend, 8 - hrdata.received,
+ random_recv, &hrdata);
+ qemu_mutex_unlock_iothread();
+ qemu_sem_wait(&hrdata.sem);
+ qemu_mutex_lock_iothread();
+ }
+
+ qemu_sem_destroy(&hrdata.sem);
+ args[0] = hrdata.val.v64;
+
+ return H_SUCCESS;
+}
+
+static void spapr_rng_instance_init(Object *obj)
+{
+ if (object_resolve_path_type("", TYPE_SPAPR_RNG, NULL) != NULL) {
+ error_report("spapr-rng can not be instantiated twice!");
+ return;
+ }
+
+ object_property_set_description(obj, "rng",
+ "ID of the random number generator backend");
+}
+
+static void spapr_rng_realize(DeviceState *dev, Error **errp)
+{
+
+ SpaprRngState *rngstate = SPAPR_RNG(dev);
+
+ if (rngstate->use_kvm) {
+ if (kvmppc_enable_hwrng() == 0) {
+ return;
+ }
+ /*
+ * If user specified both, use-kvm and a backend, we fall back to
+ * the backend now. If not, provide an appropriate error message.
+ */
+ if (!rngstate->backend) {
+ error_setg(errp, "Could not initialize in-kernel H_RANDOM call!");
+ return;
+ }
+ }
+
+ if (rngstate->backend) {
+ spapr_register_hypercall(H_RANDOM, h_random);
+ } else {
+ error_setg(errp, "spapr-rng needs an RNG backend!");
+ }
+}
+
+static Property spapr_rng_properties[] = {
+ DEFINE_PROP_BOOL("use-kvm", SpaprRngState, use_kvm, false),
+ DEFINE_PROP_LINK("rng", SpaprRngState, backend, TYPE_RNG_BACKEND,
+ RngBackend *),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void spapr_rng_class_init(ObjectClass *oc, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(oc);
+
+ dc->realize = spapr_rng_realize;
+ set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+ device_class_set_props(dc, spapr_rng_properties);
+ dc->hotpluggable = false;
+}
+
+static const TypeInfo spapr_rng_info = {
+ .name = TYPE_SPAPR_RNG,
+ .parent = TYPE_DEVICE,
+ .instance_size = sizeof(SpaprRngState),
+ .instance_init = spapr_rng_instance_init,
+ .class_init = spapr_rng_class_init,
+};
+
+static void spapr_rng_register_type(void)
+{
+ type_register_static(&spapr_rng_info);
+}
+type_init(spapr_rng_register_type)
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
new file mode 100644
index 000000000..b476382ae
--- /dev/null
+++ b/hw/ppc/spapr_rtas.c
@@ -0,0 +1,636 @@
+/*
+ * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator
+ *
+ * Hypercall based emulated RTAS
+ *
+ * Copyright (c) 2010-2011 David Gibson, IBM Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qemu/error-report.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/device_tree.h"
+#include "sysemu/cpus.h"
+#include "sysemu/hw_accel.h"
+#include "sysemu/runstate.h"
+#include "kvm_ppc.h"
+
+#include "hw/ppc/spapr.h"
+#include "hw/ppc/spapr_vio.h"
+#include "hw/ppc/spapr_rtas.h"
+#include "hw/ppc/spapr_cpu_core.h"
+#include "hw/ppc/ppc.h"
+
+#include <libfdt.h>
+#include "hw/ppc/spapr_drc.h"
+#include "qemu/cutils.h"
+#include "trace.h"
+#include "hw/ppc/fdt.h"
+#include "target/ppc/mmu-hash64.h"
+#include "target/ppc/mmu-book3s-v3.h"
+#include "migration/blocker.h"
+#include "helper_regs.h"
+
+static void rtas_display_character(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ uint32_t token, uint32_t nargs,
+ target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ uint8_t c = rtas_ld(args, 0);
+ SpaprVioDevice *sdev = vty_lookup(spapr, 0);
+
+ if (!sdev) {
+ rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
+ } else {
+ vty_putchars(sdev, &c, sizeof(c));
+ rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+ }
+}
+
+static void rtas_power_off(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ uint32_t token, uint32_t nargs, target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ if (nargs != 2 || nret != 1) {
+ rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+ return;
+ }
+ qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
+ cpu_stop_current();
+ rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+}
+
+static void rtas_system_reboot(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ uint32_t token, uint32_t nargs,
+ target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ if (nargs != 0 || nret != 1) {
+ rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+ return;
+ }
+ qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
+ rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+}
+
+static void rtas_query_cpu_stopped_state(PowerPCCPU *cpu_,
+ SpaprMachineState *spapr,
+ uint32_t token, uint32_t nargs,
+ target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ target_ulong id;
+ PowerPCCPU *cpu;
+
+ if (nargs != 1 || nret != 2) {
+ rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+ return;
+ }
+
+ id = rtas_ld(args, 0);
+ cpu = spapr_find_cpu(id);
+ if (cpu != NULL) {
+ if (CPU(cpu)->halted) {
+ rtas_st(rets, 1, 0);
+ } else {
+ rtas_st(rets, 1, 2);
+ }
+
+ rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+ return;
+ }
+
+ /* Didn't find a matching cpu */
+ rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+}
+
+static void rtas_start_cpu(PowerPCCPU *callcpu, SpaprMachineState *spapr,
+ uint32_t token, uint32_t nargs,
+ target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ target_ulong id, start, r3;
+ PowerPCCPU *newcpu;
+ CPUPPCState *env;
+ target_ulong lpcr;
+ target_ulong caller_lpcr;
+
+ if (nargs != 3 || nret != 1) {
+ rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+ return;
+ }
+
+ id = rtas_ld(args, 0);
+ start = rtas_ld(args, 1);
+ r3 = rtas_ld(args, 2);
+
+ newcpu = spapr_find_cpu(id);
+ if (!newcpu) {
+ /* Didn't find a matching cpu */
+ rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+ return;
+ }
+
+ env = &newcpu->env;
+
+ if (!CPU(newcpu)->halted) {
+ rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
+ return;
+ }
+
+ cpu_synchronize_state(CPU(newcpu));
+
+ env->msr = (1ULL << MSR_SF) | (1ULL << MSR_ME);
+ hreg_compute_hflags(env);
+
+ caller_lpcr = callcpu->env.spr[SPR_LPCR];
+ lpcr = env->spr[SPR_LPCR];
+
+ /* Set ILE the same way */
+ lpcr = (lpcr & ~LPCR_ILE) | (caller_lpcr & LPCR_ILE);
+
+ /* Set AIL the same way */
+ lpcr = (lpcr & ~LPCR_AIL) | (caller_lpcr & LPCR_AIL);
+
+ if (env->mmu_model == POWERPC_MMU_3_00) {
+ /*
+ * New cpus are expected to start in the same radix/hash mode
+ * as the existing CPUs
+ */
+ if (ppc64_v3_radix(callcpu)) {
+ lpcr |= LPCR_UPRT | LPCR_GTSE | LPCR_HR;
+ } else {
+ lpcr &= ~(LPCR_UPRT | LPCR_GTSE | LPCR_HR);
+ }
+ env->spr[SPR_PSSCR] &= ~PSSCR_EC;
+ }
+ ppc_store_lpcr(newcpu, lpcr);
+
+ /*
+ * Set the timebase offset of the new CPU to that of the invoking
+ * CPU. This helps hotplugged CPU to have the correct timebase
+ * offset.
+ */
+ newcpu->env.tb_env->tb_offset = callcpu->env.tb_env->tb_offset;
+
+ spapr_cpu_set_entry_state(newcpu, start, 0, r3, 0);
+
+ qemu_cpu_kick(CPU(newcpu));
+
+ rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+}
+
+static void rtas_stop_self(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ uint32_t token, uint32_t nargs,
+ target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ CPUState *cs = CPU(cpu);
+ CPUPPCState *env = &cpu->env;
+ PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
+
+ /* Disable Power-saving mode Exit Cause exceptions for the CPU.
+ * This could deliver an interrupt on a dying CPU and crash the
+ * guest.
+ * For the same reason, set PSSCR_EC.
+ */
+ ppc_store_lpcr(cpu, env->spr[SPR_LPCR] & ~pcc->lpcr_pm);
+ env->spr[SPR_PSSCR] |= PSSCR_EC;
+ cs->halted = 1;
+ kvmppc_set_reg_ppc_online(cpu, 0);
+ qemu_cpu_kick(cs);
+}
+
+static void rtas_ibm_suspend_me(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ uint32_t token, uint32_t nargs,
+ target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ CPUState *cs;
+
+ if (nargs != 0 || nret != 1) {
+ rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+ return;
+ }
+
+ CPU_FOREACH(cs) {
+ PowerPCCPU *c = POWERPC_CPU(cs);
+ CPUPPCState *e = &c->env;
+ if (c == cpu) {
+ continue;
+ }
+
+ /* See h_join */
+ if (!cs->halted || (e->msr & (1ULL << MSR_EE))) {
+ rtas_st(rets, 0, H_MULTI_THREADS_ACTIVE);
+ return;
+ }
+ }
+
+ qemu_system_suspend_request();
+ rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+}
+
+static inline int sysparm_st(target_ulong addr, target_ulong len,
+ const void *val, uint16_t vallen)
+{
+ hwaddr phys = ppc64_phys_to_real(addr);
+
+ if (len < 2) {
+ return RTAS_OUT_SYSPARM_PARAM_ERROR;
+ }
+ stw_be_phys(&address_space_memory, phys, vallen);
+ cpu_physical_memory_write(phys + 2, val, MIN(len - 2, vallen));
+ return RTAS_OUT_SUCCESS;
+}
+
+static void rtas_ibm_get_system_parameter(PowerPCCPU *cpu,
+ SpaprMachineState *spapr,
+ uint32_t token, uint32_t nargs,
+ target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
+ MachineState *ms = MACHINE(spapr);
+ target_ulong parameter = rtas_ld(args, 0);
+ target_ulong buffer = rtas_ld(args, 1);
+ target_ulong length = rtas_ld(args, 2);
+ target_ulong ret;
+
+ switch (parameter) {
+ case RTAS_SYSPARM_SPLPAR_CHARACTERISTICS: {
+ char *param_val = g_strdup_printf("MaxEntCap=%d,"
+ "DesMem=%" PRIu64 ","
+ "DesProcs=%d,"
+ "MaxPlatProcs=%d",
+ ms->smp.max_cpus,
+ ms->ram_size / MiB,
+ ms->smp.cpus,
+ ms->smp.max_cpus);
+ if (pcc->n_host_threads > 0) {
+ char *hostthr_val, *old = param_val;
+
+ /*
+ * Add HostThrs property. This property is not present in PAPR but
+ * is expected by some guests to communicate the number of physical
+ * host threads per core on the system so that they can scale
+ * information which varies based on the thread configuration.
+ */
+ hostthr_val = g_strdup_printf(",HostThrs=%d", pcc->n_host_threads);
+ param_val = g_strconcat(param_val, hostthr_val, NULL);
+ g_free(hostthr_val);
+ g_free(old);
+ }
+ ret = sysparm_st(buffer, length, param_val, strlen(param_val) + 1);
+ g_free(param_val);
+ break;
+ }
+ case RTAS_SYSPARM_DIAGNOSTICS_RUN_MODE: {
+ uint8_t param_val = DIAGNOSTICS_RUN_MODE_DISABLED;
+
+ ret = sysparm_st(buffer, length, &param_val, sizeof(param_val));
+ break;
+ }
+ case RTAS_SYSPARM_UUID:
+ ret = sysparm_st(buffer, length, (unsigned char *)&qemu_uuid,
+ (qemu_uuid_set ? 16 : 0));
+ break;
+ default:
+ ret = RTAS_OUT_NOT_SUPPORTED;
+ }
+
+ rtas_st(rets, 0, ret);
+}
+
+static void rtas_ibm_set_system_parameter(PowerPCCPU *cpu,
+ SpaprMachineState *spapr,
+ uint32_t token, uint32_t nargs,
+ target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ target_ulong parameter = rtas_ld(args, 0);
+ target_ulong ret = RTAS_OUT_NOT_SUPPORTED;
+
+ switch (parameter) {
+ case RTAS_SYSPARM_SPLPAR_CHARACTERISTICS:
+ case RTAS_SYSPARM_DIAGNOSTICS_RUN_MODE:
+ case RTAS_SYSPARM_UUID:
+ ret = RTAS_OUT_NOT_AUTHORIZED;
+ break;
+ }
+
+ rtas_st(rets, 0, ret);
+}
+
+static void rtas_ibm_os_term(PowerPCCPU *cpu,
+ SpaprMachineState *spapr,
+ uint32_t token, uint32_t nargs,
+ target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ target_ulong msgaddr = rtas_ld(args, 0);
+ char msg[512];
+
+ cpu_physical_memory_read(msgaddr, msg, sizeof(msg) - 1);
+ msg[sizeof(msg) - 1] = 0;
+
+ error_report("OS terminated: %s", msg);
+ qemu_system_guest_panicked(NULL);
+
+ rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+}
+
+static void rtas_set_power_level(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ uint32_t token, uint32_t nargs,
+ target_ulong args, uint32_t nret,
+ target_ulong rets)
+{
+ int32_t power_domain;
+
+ if (nargs != 2 || nret != 2) {
+ rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+ return;
+ }
+
+ /* we currently only use a single, "live insert" powerdomain for
+ * hotplugged/dlpar'd resources, so the power is always live/full (100)
+ */
+ power_domain = rtas_ld(args, 0);
+ if (power_domain != -1) {
+ rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
+ return;
+ }
+
+ rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+ rtas_st(rets, 1, 100);
+}
+
+static void rtas_get_power_level(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ uint32_t token, uint32_t nargs,
+ target_ulong args, uint32_t nret,
+ target_ulong rets)
+{
+ int32_t power_domain;
+
+ if (nargs != 1 || nret != 2) {
+ rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+ return;
+ }
+
+ /* we currently only use a single, "live insert" powerdomain for
+ * hotplugged/dlpar'd resources, so the power is always live/full (100)
+ */
+ power_domain = rtas_ld(args, 0);
+ if (power_domain != -1) {
+ rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
+ return;
+ }
+
+ rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+ rtas_st(rets, 1, 100);
+}
+
+static void rtas_ibm_nmi_register(PowerPCCPU *cpu,
+ SpaprMachineState *spapr,
+ uint32_t token, uint32_t nargs,
+ target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ hwaddr rtas_addr;
+ target_ulong sreset_addr, mce_addr;
+
+ if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI) == SPAPR_CAP_OFF) {
+ rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
+ return;
+ }
+
+ rtas_addr = spapr_get_rtas_addr();
+ if (!rtas_addr) {
+ rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
+ return;
+ }
+
+ sreset_addr = rtas_ld(args, 0);
+ mce_addr = rtas_ld(args, 1);
+
+ /* PAPR requires these are in the first 32M of memory and within RMA */
+ if (sreset_addr >= 32 * MiB || sreset_addr >= spapr->rma_size ||
+ mce_addr >= 32 * MiB || mce_addr >= spapr->rma_size) {
+ rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+ return;
+ }
+
+ if (kvm_enabled()) {
+ if (kvmppc_set_fwnmi(cpu) < 0) {
+ rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
+ return;
+ }
+ }
+
+ spapr->fwnmi_system_reset_addr = sreset_addr;
+ spapr->fwnmi_machine_check_addr = mce_addr;
+
+ rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+}
+
+static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
+ SpaprMachineState *spapr,
+ uint32_t token, uint32_t nargs,
+ target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI) == SPAPR_CAP_OFF) {
+ rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
+ return;
+ }
+
+ if (spapr->fwnmi_machine_check_addr == -1) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+"FWNMI: ibm,nmi-interlock RTAS called with FWNMI not registered.\n");
+
+ /* NMI register not called */
+ rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+ return;
+ }
+
+ if (spapr->fwnmi_machine_check_interlock != cpu->vcpu_id) {
+ /*
+ * The vCPU that hit the NMI should invoke "ibm,nmi-interlock"
+ * This should be PARAM_ERROR, but Linux calls "ibm,nmi-interlock"
+ * for system reset interrupts, despite them not being interlocked.
+ * PowerVM silently ignores this and returns success here. Returning
+ * failure causes Linux to print the error "FWNMI: nmi-interlock
+ * failed: -3", although no other apparent ill effects, this is a
+ * regression for the user when enabling FWNMI. So for now, match
+ * PowerVM. When most Linux clients are fixed, this could be
+ * changed.
+ */
+ rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+ return;
+ }
+
+ /*
+ * vCPU issuing "ibm,nmi-interlock" is done with NMI handling,
+ * hence unset fwnmi_machine_check_interlock.
+ */
+ spapr->fwnmi_machine_check_interlock = -1;
+ qemu_cond_signal(&spapr->fwnmi_machine_check_interlock_cond);
+ rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+ migrate_del_blocker(spapr->fwnmi_migration_blocker);
+}
+
+static struct rtas_call {
+ const char *name;
+ spapr_rtas_fn fn;
+} rtas_table[RTAS_TOKEN_MAX - RTAS_TOKEN_BASE];
+
+target_ulong spapr_rtas_call(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ uint32_t token, uint32_t nargs, target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ if ((token >= RTAS_TOKEN_BASE) && (token < RTAS_TOKEN_MAX)) {
+ struct rtas_call *call = rtas_table + (token - RTAS_TOKEN_BASE);
+
+ if (call->fn) {
+ call->fn(cpu, spapr, token, nargs, args, nret, rets);
+ return H_SUCCESS;
+ }
+ }
+
+ /* HACK: Some Linux early debug code uses RTAS display-character,
+ * but assumes the token value is 0xa (which it is on some real
+ * machines) without looking it up in the device tree. This
+ * special case makes this work */
+ if (token == 0xa) {
+ rtas_display_character(cpu, spapr, 0xa, nargs, args, nret, rets);
+ return H_SUCCESS;
+ }
+
+ hcall_dprintf("Unknown RTAS token 0x%x\n", token);
+ rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+ return H_PARAMETER;
+}
+
+uint64_t qtest_rtas_call(char *cmd, uint32_t nargs, uint64_t args,
+ uint32_t nret, uint64_t rets)
+{
+ int token;
+
+ for (token = 0; token < RTAS_TOKEN_MAX - RTAS_TOKEN_BASE; token++) {
+ if (strcmp(cmd, rtas_table[token].name) == 0) {
+ SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+ PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
+
+ rtas_table[token].fn(cpu, spapr, token + RTAS_TOKEN_BASE,
+ nargs, args, nret, rets);
+ return H_SUCCESS;
+ }
+ }
+ return H_PARAMETER;
+}
+
+void spapr_rtas_register(int token, const char *name, spapr_rtas_fn fn)
+{
+ assert((token >= RTAS_TOKEN_BASE) && (token < RTAS_TOKEN_MAX));
+
+ token -= RTAS_TOKEN_BASE;
+
+ assert(!name || !rtas_table[token].name);
+
+ rtas_table[token].name = name;
+ rtas_table[token].fn = fn;
+}
+
+void spapr_dt_rtas_tokens(void *fdt, int rtas)
+{
+ int i;
+
+ for (i = 0; i < RTAS_TOKEN_MAX - RTAS_TOKEN_BASE; i++) {
+ struct rtas_call *call = &rtas_table[i];
+
+ if (!call->name) {
+ continue;
+ }
+
+ _FDT(fdt_setprop_cell(fdt, rtas, call->name, i + RTAS_TOKEN_BASE));
+ }
+}
+
+hwaddr spapr_get_rtas_addr(void)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+ int rtas_node;
+ const fdt32_t *rtas_data;
+ void *fdt = spapr->fdt_blob;
+
+ /* fetch rtas addr from fdt */
+ rtas_node = fdt_path_offset(fdt, "/rtas");
+ if (rtas_node < 0) {
+ return 0;
+ }
+
+ rtas_data = fdt_getprop(fdt, rtas_node, "linux,rtas-base", NULL);
+ if (!rtas_data) {
+ return 0;
+ }
+
+ /*
+ * We assume that the OS called RTAS instantiate-rtas, but some other
+ * OS might call RTAS instantiate-rtas-64 instead. This fine as of now
+ * as SLOF only supports 32-bit variant.
+ */
+ return (hwaddr)fdt32_to_cpu(*rtas_data);
+}
+
+static void core_rtas_register_types(void)
+{
+ spapr_rtas_register(RTAS_DISPLAY_CHARACTER, "display-character",
+ rtas_display_character);
+ spapr_rtas_register(RTAS_POWER_OFF, "power-off", rtas_power_off);
+ spapr_rtas_register(RTAS_SYSTEM_REBOOT, "system-reboot",
+ rtas_system_reboot);
+ spapr_rtas_register(RTAS_QUERY_CPU_STOPPED_STATE, "query-cpu-stopped-state",
+ rtas_query_cpu_stopped_state);
+ spapr_rtas_register(RTAS_START_CPU, "start-cpu", rtas_start_cpu);
+ spapr_rtas_register(RTAS_STOP_SELF, "stop-self", rtas_stop_self);
+ spapr_rtas_register(RTAS_IBM_SUSPEND_ME, "ibm,suspend-me",
+ rtas_ibm_suspend_me);
+ spapr_rtas_register(RTAS_IBM_GET_SYSTEM_PARAMETER,
+ "ibm,get-system-parameter",
+ rtas_ibm_get_system_parameter);
+ spapr_rtas_register(RTAS_IBM_SET_SYSTEM_PARAMETER,
+ "ibm,set-system-parameter",
+ rtas_ibm_set_system_parameter);
+ spapr_rtas_register(RTAS_IBM_OS_TERM, "ibm,os-term",
+ rtas_ibm_os_term);
+ spapr_rtas_register(RTAS_SET_POWER_LEVEL, "set-power-level",
+ rtas_set_power_level);
+ spapr_rtas_register(RTAS_GET_POWER_LEVEL, "get-power-level",
+ rtas_get_power_level);
+ spapr_rtas_register(RTAS_IBM_NMI_REGISTER, "ibm,nmi-register",
+ rtas_ibm_nmi_register);
+ spapr_rtas_register(RTAS_IBM_NMI_INTERLOCK, "ibm,nmi-interlock",
+ rtas_ibm_nmi_interlock);
+}
+
+type_init(core_rtas_register_types)
diff --git a/hw/ppc/spapr_rtas_ddw.c b/hw/ppc/spapr_rtas_ddw.c
new file mode 100644
index 000000000..3e826e130
--- /dev/null
+++ b/hw/ppc/spapr_rtas_ddw.c
@@ -0,0 +1,291 @@
+/*
+ * QEMU sPAPR Dynamic DMA windows support
+ *
+ * Copyright (c) 2015 Alexey Kardashevskiy, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License,
+ * or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "qemu/module.h"
+#include "hw/ppc/spapr.h"
+#include "hw/pci-host/spapr.h"
+#include "trace.h"
+
+static int spapr_phb_get_active_win_num_cb(Object *child, void *opaque)
+{
+ SpaprTceTable *tcet;
+
+ tcet = (SpaprTceTable *) object_dynamic_cast(child, TYPE_SPAPR_TCE_TABLE);
+ if (tcet && tcet->nb_table) {
+ ++*(unsigned *)opaque;
+ }
+ return 0;
+}
+
+static unsigned spapr_phb_get_active_win_num(SpaprPhbState *sphb)
+{
+ unsigned ret = 0;
+
+ object_child_foreach(OBJECT(sphb), spapr_phb_get_active_win_num_cb, &ret);
+
+ return ret;
+}
+
+static int spapr_phb_get_free_liobn_cb(Object *child, void *opaque)
+{
+ SpaprTceTable *tcet;
+
+ tcet = (SpaprTceTable *) object_dynamic_cast(child, TYPE_SPAPR_TCE_TABLE);
+ if (tcet && !tcet->nb_table) {
+ *(uint32_t *)opaque = tcet->liobn;
+ return 1;
+ }
+ return 0;
+}
+
+static unsigned spapr_phb_get_free_liobn(SpaprPhbState *sphb)
+{
+ uint32_t liobn = 0;
+
+ object_child_foreach(OBJECT(sphb), spapr_phb_get_free_liobn_cb, &liobn);
+
+ return liobn;
+}
+
+static uint32_t spapr_page_mask_to_query_mask(uint64_t page_mask)
+{
+ int i;
+ uint32_t mask = 0;
+ const struct { int shift; uint32_t mask; } masks[] = {
+ { 12, RTAS_DDW_PGSIZE_4K },
+ { 16, RTAS_DDW_PGSIZE_64K },
+ { 24, RTAS_DDW_PGSIZE_16M },
+ { 25, RTAS_DDW_PGSIZE_32M },
+ { 26, RTAS_DDW_PGSIZE_64M },
+ { 27, RTAS_DDW_PGSIZE_128M },
+ { 28, RTAS_DDW_PGSIZE_256M },
+ { 34, RTAS_DDW_PGSIZE_16G },
+ };
+
+ for (i = 0; i < ARRAY_SIZE(masks); ++i) {
+ if (page_mask & (1ULL << masks[i].shift)) {
+ mask |= masks[i].mask;
+ }
+ }
+
+ return mask;
+}
+
+static void rtas_ibm_query_pe_dma_window(PowerPCCPU *cpu,
+ SpaprMachineState *spapr,
+ uint32_t token, uint32_t nargs,
+ target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ SpaprPhbState *sphb;
+ uint64_t buid;
+ uint32_t avail, addr, pgmask = 0;
+
+ if ((nargs != 3) || (nret != 5)) {
+ goto param_error_exit;
+ }
+
+ buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
+ addr = rtas_ld(args, 0);
+ sphb = spapr_pci_find_phb(spapr, buid);
+ if (!sphb || !sphb->ddw_enabled) {
+ goto param_error_exit;
+ }
+
+ /* Translate page mask to LoPAPR format */
+ pgmask = spapr_page_mask_to_query_mask(sphb->page_size_mask);
+
+ avail = SPAPR_PCI_DMA_MAX_WINDOWS - spapr_phb_get_active_win_num(sphb);
+
+ rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+ rtas_st(rets, 1, avail);
+ rtas_st(rets, 2, 0x80000000); /* The largest window we can possibly have */
+ rtas_st(rets, 3, pgmask);
+ rtas_st(rets, 4, 0); /* DMA migration mask, not supported */
+
+ trace_spapr_iommu_ddw_query(buid, addr, avail, 0x80000000, pgmask);
+ return;
+
+param_error_exit:
+ rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+}
+
+static void rtas_ibm_create_pe_dma_window(PowerPCCPU *cpu,
+ SpaprMachineState *spapr,
+ uint32_t token, uint32_t nargs,
+ target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ SpaprPhbState *sphb;
+ SpaprTceTable *tcet = NULL;
+ uint32_t addr, page_shift, window_shift, liobn;
+ uint64_t buid, win_addr;
+ int windows;
+
+ if ((nargs != 5) || (nret != 4)) {
+ goto param_error_exit;
+ }
+
+ buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
+ addr = rtas_ld(args, 0);
+ sphb = spapr_pci_find_phb(spapr, buid);
+ if (!sphb || !sphb->ddw_enabled) {
+ goto param_error_exit;
+ }
+
+ page_shift = rtas_ld(args, 3);
+ window_shift = rtas_ld(args, 4);
+ liobn = spapr_phb_get_free_liobn(sphb);
+ windows = spapr_phb_get_active_win_num(sphb);
+
+ if (!(sphb->page_size_mask & (1ULL << page_shift)) ||
+ (window_shift < page_shift)) {
+ goto param_error_exit;
+ }
+
+ if (!liobn || !sphb->ddw_enabled || windows == SPAPR_PCI_DMA_MAX_WINDOWS) {
+ goto hw_error_exit;
+ }
+
+ tcet = spapr_tce_find_by_liobn(liobn);
+ if (!tcet) {
+ goto hw_error_exit;
+ }
+
+ win_addr = (windows == 0) ? sphb->dma_win_addr : sphb->dma64_win_addr;
+ /*
+ * We have just created a window, we know for the fact that it is empty,
+ * use a hack to avoid iterating over the table as it is quite possible
+ * to have billions of TCEs, all empty.
+ * Note that we cannot delay this to the first H_PUT_TCE as this hcall is
+ * mostly likely to be handled in KVM so QEMU just does not know if it
+ * happened.
+ */
+ tcet->skipping_replay = true;
+ spapr_tce_table_enable(tcet, page_shift, win_addr,
+ 1ULL << (window_shift - page_shift));
+ tcet->skipping_replay = false;
+ if (!tcet->nb_table) {
+ goto hw_error_exit;
+ }
+
+ trace_spapr_iommu_ddw_create(buid, addr, 1ULL << page_shift,
+ 1ULL << window_shift, tcet->bus_offset, liobn);
+
+ rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+ rtas_st(rets, 1, liobn);
+ rtas_st(rets, 2, tcet->bus_offset >> 32);
+ rtas_st(rets, 3, tcet->bus_offset & ((uint32_t) -1));
+
+ return;
+
+hw_error_exit:
+ rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
+ return;
+
+param_error_exit:
+ rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+}
+
+static void rtas_ibm_remove_pe_dma_window(PowerPCCPU *cpu,
+ SpaprMachineState *spapr,
+ uint32_t token, uint32_t nargs,
+ target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ SpaprPhbState *sphb;
+ SpaprTceTable *tcet;
+ uint32_t liobn;
+
+ if ((nargs != 1) || (nret != 1)) {
+ goto param_error_exit;
+ }
+
+ liobn = rtas_ld(args, 0);
+ tcet = spapr_tce_find_by_liobn(liobn);
+ if (!tcet) {
+ goto param_error_exit;
+ }
+
+ sphb = SPAPR_PCI_HOST_BRIDGE(OBJECT(tcet)->parent);
+ if (!sphb || !sphb->ddw_enabled || !tcet->nb_table) {
+ goto param_error_exit;
+ }
+
+ spapr_tce_table_disable(tcet);
+ trace_spapr_iommu_ddw_remove(liobn);
+
+ rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+ return;
+
+param_error_exit:
+ rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+}
+
+static void rtas_ibm_reset_pe_dma_window(PowerPCCPU *cpu,
+ SpaprMachineState *spapr,
+ uint32_t token, uint32_t nargs,
+ target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ SpaprPhbState *sphb;
+ uint64_t buid;
+ uint32_t addr;
+
+ if ((nargs != 3) || (nret != 1)) {
+ goto param_error_exit;
+ }
+
+ buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
+ addr = rtas_ld(args, 0);
+ sphb = spapr_pci_find_phb(spapr, buid);
+ if (!sphb || !sphb->ddw_enabled) {
+ goto param_error_exit;
+ }
+
+ spapr_phb_dma_reset(sphb);
+ trace_spapr_iommu_ddw_reset(buid, addr);
+
+ rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+
+ return;
+
+param_error_exit:
+ rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+}
+
+static void spapr_rtas_ddw_init(void)
+{
+ spapr_rtas_register(RTAS_IBM_QUERY_PE_DMA_WINDOW,
+ "ibm,query-pe-dma-window",
+ rtas_ibm_query_pe_dma_window);
+ spapr_rtas_register(RTAS_IBM_CREATE_PE_DMA_WINDOW,
+ "ibm,create-pe-dma-window",
+ rtas_ibm_create_pe_dma_window);
+ spapr_rtas_register(RTAS_IBM_REMOVE_PE_DMA_WINDOW,
+ "ibm,remove-pe-dma-window",
+ rtas_ibm_remove_pe_dma_window);
+ spapr_rtas_register(RTAS_IBM_RESET_PE_DMA_WINDOW,
+ "ibm,reset-pe-dma-window",
+ rtas_ibm_reset_pe_dma_window);
+}
+
+type_init(spapr_rtas_ddw_init)
diff --git a/hw/ppc/spapr_rtc.c b/hw/ppc/spapr_rtc.c
new file mode 100644
index 000000000..fba4dfca3
--- /dev/null
+++ b/hw/ppc/spapr_rtc.c
@@ -0,0 +1,190 @@
+/*
+ * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator
+ *
+ * RTAS Real Time Clock
+ *
+ * Copyright (c) 2010-2011 David Gibson, IBM Corporation.
+ * Copyright 2014 David Gibson, Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/timer.h"
+#include "sysemu/sysemu.h"
+#include "hw/ppc/spapr.h"
+#include "migration/vmstate.h"
+#include "qapi/error.h"
+#include "qapi/qapi-events-misc-target.h"
+#include "qemu/cutils.h"
+#include "qemu/module.h"
+
+void spapr_rtc_read(SpaprRtcState *rtc, struct tm *tm, uint32_t *ns)
+{
+ int64_t host_ns = qemu_clock_get_ns(rtc_clock);
+ int64_t guest_ns;
+ time_t guest_s;
+
+ assert(rtc);
+
+ guest_ns = host_ns + rtc->ns_offset;
+ guest_s = guest_ns / NANOSECONDS_PER_SECOND;
+
+ if (tm) {
+ gmtime_r(&guest_s, tm);
+ }
+ if (ns) {
+ *ns = guest_ns;
+ }
+}
+
+int spapr_rtc_import_offset(SpaprRtcState *rtc, int64_t legacy_offset)
+{
+ if (!rtc) {
+ return -ENODEV;
+ }
+
+ rtc->ns_offset = legacy_offset * NANOSECONDS_PER_SECOND;
+
+ return 0;
+}
+
+static void rtas_get_time_of_day(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ uint32_t token, uint32_t nargs,
+ target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ struct tm tm;
+ uint32_t ns;
+
+ if ((nargs != 0) || (nret != 8)) {
+ rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+ return;
+ }
+
+ spapr_rtc_read(&spapr->rtc, &tm, &ns);
+
+ rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+ rtas_st(rets, 1, tm.tm_year + 1900);
+ rtas_st(rets, 2, tm.tm_mon + 1);
+ rtas_st(rets, 3, tm.tm_mday);
+ rtas_st(rets, 4, tm.tm_hour);
+ rtas_st(rets, 5, tm.tm_min);
+ rtas_st(rets, 6, tm.tm_sec);
+ rtas_st(rets, 7, ns);
+}
+
+static void rtas_set_time_of_day(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ uint32_t token, uint32_t nargs,
+ target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ SpaprRtcState *rtc = &spapr->rtc;
+ struct tm tm;
+ time_t new_s;
+ int64_t host_ns;
+
+ if ((nargs != 7) || (nret != 1)) {
+ rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+ return;
+ }
+
+ tm.tm_year = rtas_ld(args, 0) - 1900;
+ tm.tm_mon = rtas_ld(args, 1) - 1;
+ tm.tm_mday = rtas_ld(args, 2);
+ tm.tm_hour = rtas_ld(args, 3);
+ tm.tm_min = rtas_ld(args, 4);
+ tm.tm_sec = rtas_ld(args, 5);
+
+ new_s = mktimegm(&tm);
+ if (new_s == -1) {
+ rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+ return;
+ }
+
+ /* Generate a monitor event for the change */
+ qapi_event_send_rtc_change(qemu_timedate_diff(&tm));
+
+ host_ns = qemu_clock_get_ns(rtc_clock);
+
+ rtc->ns_offset = (new_s * NANOSECONDS_PER_SECOND) - host_ns;
+
+ rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+}
+
+static void spapr_rtc_qom_date(Object *obj, struct tm *current_tm, Error **errp)
+{
+ spapr_rtc_read(SPAPR_RTC(obj), current_tm, NULL);
+}
+
+static void spapr_rtc_realize(DeviceState *dev, Error **errp)
+{
+ SpaprRtcState *rtc = SPAPR_RTC(dev);
+ struct tm tm;
+ time_t host_s;
+ int64_t rtc_ns;
+
+ /* Initialize the RTAS RTC from host time */
+
+ qemu_get_timedate(&tm, 0);
+ host_s = mktimegm(&tm);
+ rtc_ns = qemu_clock_get_ns(rtc_clock);
+ rtc->ns_offset = host_s * NANOSECONDS_PER_SECOND - rtc_ns;
+
+ object_property_add_tm(OBJECT(rtc), "date", spapr_rtc_qom_date);
+}
+
+static const VMStateDescription vmstate_spapr_rtc = {
+ .name = "spapr/rtc",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_INT64(ns_offset, SpaprRtcState),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+static void spapr_rtc_class_init(ObjectClass *oc, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(oc);
+
+ dc->realize = spapr_rtc_realize;
+ dc->vmsd = &vmstate_spapr_rtc;
+ /* Reason: This is an internal device only for handling the hypercalls */
+ dc->user_creatable = false;
+
+ spapr_rtas_register(RTAS_GET_TIME_OF_DAY, "get-time-of-day",
+ rtas_get_time_of_day);
+ spapr_rtas_register(RTAS_SET_TIME_OF_DAY, "set-time-of-day",
+ rtas_set_time_of_day);
+}
+
+static const TypeInfo spapr_rtc_info = {
+ .name = TYPE_SPAPR_RTC,
+ .parent = TYPE_DEVICE,
+ .instance_size = sizeof(SpaprRtcState),
+ .class_init = spapr_rtc_class_init,
+};
+
+static void spapr_rtc_register_types(void)
+{
+ type_register_static(&spapr_rtc_info);
+}
+type_init(spapr_rtc_register_types)
diff --git a/hw/ppc/spapr_softmmu.c b/hw/ppc/spapr_softmmu.c
new file mode 100644
index 000000000..4ee03c83e
--- /dev/null
+++ b/hw/ppc/spapr_softmmu.c
@@ -0,0 +1,612 @@
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
+#include "cpu.h"
+#include "helper_regs.h"
+#include "hw/ppc/spapr.h"
+#include "mmu-hash64.h"
+#include "mmu-book3s-v3.h"
+
+static inline bool valid_ptex(PowerPCCPU *cpu, target_ulong ptex)
+{
+ /*
+ * hash value/pteg group index is normalized by HPT mask
+ */
+ if (((ptex & ~7ULL) / HPTES_PER_GROUP) & ~ppc_hash64_hpt_mask(cpu)) {
+ return false;
+ }
+ return true;
+}
+
+static target_ulong h_enter(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ target_ulong opcode, target_ulong *args)
+{
+ target_ulong flags = args[0];
+ target_ulong ptex = args[1];
+ target_ulong pteh = args[2];
+ target_ulong ptel = args[3];
+ unsigned apshift;
+ target_ulong raddr;
+ target_ulong slot;
+ const ppc_hash_pte64_t *hptes;
+
+ apshift = ppc_hash64_hpte_page_shift_noslb(cpu, pteh, ptel);
+ if (!apshift) {
+ /* Bad page size encoding */
+ return H_PARAMETER;
+ }
+
+ raddr = (ptel & HPTE64_R_RPN) & ~((1ULL << apshift) - 1);
+
+ if (is_ram_address(spapr, raddr)) {
+ /* Regular RAM - should have WIMG=0010 */
+ if ((ptel & HPTE64_R_WIMG) != HPTE64_R_M) {
+ return H_PARAMETER;
+ }
+ } else {
+ target_ulong wimg_flags;
+ /* Looks like an IO address */
+ /* FIXME: What WIMG combinations could be sensible for IO?
+ * For now we allow WIMG=010x, but are there others? */
+ /* FIXME: Should we check against registered IO addresses? */
+ wimg_flags = (ptel & (HPTE64_R_W | HPTE64_R_I | HPTE64_R_M));
+
+ if (wimg_flags != HPTE64_R_I &&
+ wimg_flags != (HPTE64_R_I | HPTE64_R_M)) {
+ return H_PARAMETER;
+ }
+ }
+
+ pteh &= ~0x60ULL;
+
+ if (!valid_ptex(cpu, ptex)) {
+ return H_PARAMETER;
+ }
+
+ slot = ptex & 7ULL;
+ ptex = ptex & ~7ULL;
+
+ if (likely((flags & H_EXACT) == 0)) {
+ hptes = ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP);
+ for (slot = 0; slot < 8; slot++) {
+ if (!(ppc_hash64_hpte0(cpu, hptes, slot) & HPTE64_V_VALID)) {
+ break;
+ }
+ }
+ ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP);
+ if (slot == 8) {
+ return H_PTEG_FULL;
+ }
+ } else {
+ hptes = ppc_hash64_map_hptes(cpu, ptex + slot, 1);
+ if (ppc_hash64_hpte0(cpu, hptes, 0) & HPTE64_V_VALID) {
+ ppc_hash64_unmap_hptes(cpu, hptes, ptex + slot, 1);
+ return H_PTEG_FULL;
+ }
+ ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
+ }
+
+ spapr_store_hpte(cpu, ptex + slot, pteh | HPTE64_V_HPTE_DIRTY, ptel);
+
+ args[0] = ptex + slot;
+ return H_SUCCESS;
+}
+
+typedef enum {
+ REMOVE_SUCCESS = 0,
+ REMOVE_NOT_FOUND = 1,
+ REMOVE_PARM = 2,
+ REMOVE_HW = 3,
+} RemoveResult;
+
+static RemoveResult remove_hpte(PowerPCCPU *cpu
+ , target_ulong ptex,
+ target_ulong avpn,
+ target_ulong flags,
+ target_ulong *vp, target_ulong *rp)
+{
+ const ppc_hash_pte64_t *hptes;
+ target_ulong v, r;
+
+ if (!valid_ptex(cpu, ptex)) {
+ return REMOVE_PARM;
+ }
+
+ hptes = ppc_hash64_map_hptes(cpu, ptex, 1);
+ v = ppc_hash64_hpte0(cpu, hptes, 0);
+ r = ppc_hash64_hpte1(cpu, hptes, 0);
+ ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
+
+ if ((v & HPTE64_V_VALID) == 0 ||
+ ((flags & H_AVPN) && (v & ~0x7fULL) != avpn) ||
+ ((flags & H_ANDCOND) && (v & avpn) != 0)) {
+ return REMOVE_NOT_FOUND;
+ }
+ *vp = v;
+ *rp = r;
+ spapr_store_hpte(cpu, ptex, HPTE64_V_HPTE_DIRTY, 0);
+ ppc_hash64_tlb_flush_hpte(cpu, ptex, v, r);
+ return REMOVE_SUCCESS;
+}
+
+static target_ulong h_remove(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ target_ulong opcode, target_ulong *args)
+{
+ CPUPPCState *env = &cpu->env;
+ target_ulong flags = args[0];
+ target_ulong ptex = args[1];
+ target_ulong avpn = args[2];
+ RemoveResult ret;
+
+ ret = remove_hpte(cpu, ptex, avpn, flags,
+ &args[0], &args[1]);
+
+ switch (ret) {
+ case REMOVE_SUCCESS:
+ check_tlb_flush(env, true);
+ return H_SUCCESS;
+
+ case REMOVE_NOT_FOUND:
+ return H_NOT_FOUND;
+
+ case REMOVE_PARM:
+ return H_PARAMETER;
+
+ case REMOVE_HW:
+ return H_HARDWARE;
+ }
+
+ g_assert_not_reached();
+}
+
+#define H_BULK_REMOVE_TYPE 0xc000000000000000ULL
+#define H_BULK_REMOVE_REQUEST 0x4000000000000000ULL
+#define H_BULK_REMOVE_RESPONSE 0x8000000000000000ULL
+#define H_BULK_REMOVE_END 0xc000000000000000ULL
+#define H_BULK_REMOVE_CODE 0x3000000000000000ULL
+#define H_BULK_REMOVE_SUCCESS 0x0000000000000000ULL
+#define H_BULK_REMOVE_NOT_FOUND 0x1000000000000000ULL
+#define H_BULK_REMOVE_PARM 0x2000000000000000ULL
+#define H_BULK_REMOVE_HW 0x3000000000000000ULL
+#define H_BULK_REMOVE_RC 0x0c00000000000000ULL
+#define H_BULK_REMOVE_FLAGS 0x0300000000000000ULL
+#define H_BULK_REMOVE_ABSOLUTE 0x0000000000000000ULL
+#define H_BULK_REMOVE_ANDCOND 0x0100000000000000ULL
+#define H_BULK_REMOVE_AVPN 0x0200000000000000ULL
+#define H_BULK_REMOVE_PTEX 0x00ffffffffffffffULL
+
+#define H_BULK_REMOVE_MAX_BATCH 4
+
+static target_ulong h_bulk_remove(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ target_ulong opcode, target_ulong *args)
+{
+ CPUPPCState *env = &cpu->env;
+ int i;
+ target_ulong rc = H_SUCCESS;
+
+ for (i = 0; i < H_BULK_REMOVE_MAX_BATCH; i++) {
+ target_ulong *tsh = &args[i*2];
+ target_ulong tsl = args[i*2 + 1];
+ target_ulong v, r, ret;
+
+ if ((*tsh & H_BULK_REMOVE_TYPE) == H_BULK_REMOVE_END) {
+ break;
+ } else if ((*tsh & H_BULK_REMOVE_TYPE) != H_BULK_REMOVE_REQUEST) {
+ return H_PARAMETER;
+ }
+
+ *tsh &= H_BULK_REMOVE_PTEX | H_BULK_REMOVE_FLAGS;
+ *tsh |= H_BULK_REMOVE_RESPONSE;
+
+ if ((*tsh & H_BULK_REMOVE_ANDCOND) && (*tsh & H_BULK_REMOVE_AVPN)) {
+ *tsh |= H_BULK_REMOVE_PARM;
+ return H_PARAMETER;
+ }
+
+ ret = remove_hpte(cpu, *tsh & H_BULK_REMOVE_PTEX, tsl,
+ (*tsh & H_BULK_REMOVE_FLAGS) >> 26,
+ &v, &r);
+
+ *tsh |= ret << 60;
+
+ switch (ret) {
+ case REMOVE_SUCCESS:
+ *tsh |= (r & (HPTE64_R_C | HPTE64_R_R)) << 43;
+ break;
+
+ case REMOVE_PARM:
+ rc = H_PARAMETER;
+ goto exit;
+
+ case REMOVE_HW:
+ rc = H_HARDWARE;
+ goto exit;
+ }
+ }
+ exit:
+ check_tlb_flush(env, true);
+
+ return rc;
+}
+
+static target_ulong h_protect(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ target_ulong opcode, target_ulong *args)
+{
+ CPUPPCState *env = &cpu->env;
+ target_ulong flags = args[0];
+ target_ulong ptex = args[1];
+ target_ulong avpn = args[2];
+ const ppc_hash_pte64_t *hptes;
+ target_ulong v, r;
+
+ if (!valid_ptex(cpu, ptex)) {
+ return H_PARAMETER;
+ }
+
+ hptes = ppc_hash64_map_hptes(cpu, ptex, 1);
+ v = ppc_hash64_hpte0(cpu, hptes, 0);
+ r = ppc_hash64_hpte1(cpu, hptes, 0);
+ ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
+
+ if ((v & HPTE64_V_VALID) == 0 ||
+ ((flags & H_AVPN) && (v & ~0x7fULL) != avpn)) {
+ return H_NOT_FOUND;
+ }
+
+ r &= ~(HPTE64_R_PP0 | HPTE64_R_PP | HPTE64_R_N |
+ HPTE64_R_KEY_HI | HPTE64_R_KEY_LO);
+ r |= (flags << 55) & HPTE64_R_PP0;
+ r |= (flags << 48) & HPTE64_R_KEY_HI;
+ r |= flags & (HPTE64_R_PP | HPTE64_R_N | HPTE64_R_KEY_LO);
+ spapr_store_hpte(cpu, ptex,
+ (v & ~HPTE64_V_VALID) | HPTE64_V_HPTE_DIRTY, 0);
+ ppc_hash64_tlb_flush_hpte(cpu, ptex, v, r);
+ /* Flush the tlb */
+ check_tlb_flush(env, true);
+ /* Don't need a memory barrier, due to qemu's global lock */
+ spapr_store_hpte(cpu, ptex, v | HPTE64_V_HPTE_DIRTY, r);
+ return H_SUCCESS;
+}
+
+static target_ulong h_read(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ target_ulong opcode, target_ulong *args)
+{
+ target_ulong flags = args[0];
+ target_ulong ptex = args[1];
+ int i, ridx, n_entries = 1;
+ const ppc_hash_pte64_t *hptes;
+
+ if (!valid_ptex(cpu, ptex)) {
+ return H_PARAMETER;
+ }
+
+ if (flags & H_READ_4) {
+ /* Clear the two low order bits */
+ ptex &= ~(3ULL);
+ n_entries = 4;
+ }
+
+ hptes = ppc_hash64_map_hptes(cpu, ptex, n_entries);
+ for (i = 0, ridx = 0; i < n_entries; i++) {
+ args[ridx++] = ppc_hash64_hpte0(cpu, hptes, i);
+ args[ridx++] = ppc_hash64_hpte1(cpu, hptes, i);
+ }
+ ppc_hash64_unmap_hptes(cpu, hptes, ptex, n_entries);
+
+ return H_SUCCESS;
+}
+
+struct SpaprPendingHpt {
+ /* These fields are read-only after initialization */
+ int shift;
+ QemuThread thread;
+
+ /* These fields are protected by the BQL */
+ bool complete;
+
+ /* These fields are private to the preparation thread if
+ * !complete, otherwise protected by the BQL */
+ int ret;
+ void *hpt;
+};
+
+static void free_pending_hpt(SpaprPendingHpt *pending)
+{
+ if (pending->hpt) {
+ qemu_vfree(pending->hpt);
+ }
+
+ g_free(pending);
+}
+
+static void *hpt_prepare_thread(void *opaque)
+{
+ SpaprPendingHpt *pending = opaque;
+ size_t size = 1ULL << pending->shift;
+
+ pending->hpt = qemu_try_memalign(size, size);
+ if (pending->hpt) {
+ memset(pending->hpt, 0, size);
+ pending->ret = H_SUCCESS;
+ } else {
+ pending->ret = H_NO_MEM;
+ }
+
+ qemu_mutex_lock_iothread();
+
+ if (SPAPR_MACHINE(qdev_get_machine())->pending_hpt == pending) {
+ /* Ready to go */
+ pending->complete = true;
+ } else {
+ /* We've been cancelled, clean ourselves up */
+ free_pending_hpt(pending);
+ }
+
+ qemu_mutex_unlock_iothread();
+ return NULL;
+}
+
+/* Must be called with BQL held */
+static void cancel_hpt_prepare(SpaprMachineState *spapr)
+{
+ SpaprPendingHpt *pending = spapr->pending_hpt;
+
+ /* Let the thread know it's cancelled */
+ spapr->pending_hpt = NULL;
+
+ if (!pending) {
+ /* Nothing to do */
+ return;
+ }
+
+ if (!pending->complete) {
+ /* thread will clean itself up */
+ return;
+ }
+
+ free_pending_hpt(pending);
+}
+
+target_ulong softmmu_resize_hpt_prepare(PowerPCCPU *cpu,
+ SpaprMachineState *spapr,
+ target_ulong shift)
+{
+ SpaprPendingHpt *pending = spapr->pending_hpt;
+
+ if (pending) {
+ /* something already in progress */
+ if (pending->shift == shift) {
+ /* and it's suitable */
+ if (pending->complete) {
+ return pending->ret;
+ } else {
+ return H_LONG_BUSY_ORDER_100_MSEC;
+ }
+ }
+
+ /* not suitable, cancel and replace */
+ cancel_hpt_prepare(spapr);
+ }
+
+ if (!shift) {
+ /* nothing to do */
+ return H_SUCCESS;
+ }
+
+ /* start new prepare */
+
+ pending = g_new0(SpaprPendingHpt, 1);
+ pending->shift = shift;
+ pending->ret = H_HARDWARE;
+
+ qemu_thread_create(&pending->thread, "sPAPR HPT prepare",
+ hpt_prepare_thread, pending, QEMU_THREAD_DETACHED);
+
+ spapr->pending_hpt = pending;
+
+ /* In theory we could estimate the time more accurately based on
+ * the new size, but there's not much point */
+ return H_LONG_BUSY_ORDER_100_MSEC;
+}
+
+static uint64_t new_hpte_load0(void *htab, uint64_t pteg, int slot)
+{
+ uint8_t *addr = htab;
+
+ addr += pteg * HASH_PTEG_SIZE_64;
+ addr += slot * HASH_PTE_SIZE_64;
+ return ldq_p(addr);
+}
+
+static void new_hpte_store(void *htab, uint64_t pteg, int slot,
+ uint64_t pte0, uint64_t pte1)
+{
+ uint8_t *addr = htab;
+
+ addr += pteg * HASH_PTEG_SIZE_64;
+ addr += slot * HASH_PTE_SIZE_64;
+
+ stq_p(addr, pte0);
+ stq_p(addr + HPTE64_DW1, pte1);
+}
+
+static int rehash_hpte(PowerPCCPU *cpu,
+ const ppc_hash_pte64_t *hptes,
+ void *old_hpt, uint64_t oldsize,
+ void *new_hpt, uint64_t newsize,
+ uint64_t pteg, int slot)
+{
+ uint64_t old_hash_mask = (oldsize >> 7) - 1;
+ uint64_t new_hash_mask = (newsize >> 7) - 1;
+ target_ulong pte0 = ppc_hash64_hpte0(cpu, hptes, slot);
+ target_ulong pte1;
+ uint64_t avpn;
+ unsigned base_pg_shift;
+ uint64_t hash, new_pteg, replace_pte0;
+
+ if (!(pte0 & HPTE64_V_VALID) || !(pte0 & HPTE64_V_BOLTED)) {
+ return H_SUCCESS;
+ }
+
+ pte1 = ppc_hash64_hpte1(cpu, hptes, slot);
+
+ base_pg_shift = ppc_hash64_hpte_page_shift_noslb(cpu, pte0, pte1);
+ assert(base_pg_shift); /* H_ENTER shouldn't allow a bad encoding */
+ avpn = HPTE64_V_AVPN_VAL(pte0) & ~(((1ULL << base_pg_shift) - 1) >> 23);
+
+ if (pte0 & HPTE64_V_SECONDARY) {
+ pteg = ~pteg;
+ }
+
+ if ((pte0 & HPTE64_V_SSIZE) == HPTE64_V_SSIZE_256M) {
+ uint64_t offset, vsid;
+
+ /* We only have 28 - 23 bits of offset in avpn */
+ offset = (avpn & 0x1f) << 23;
+ vsid = avpn >> 5;
+ /* We can find more bits from the pteg value */
+ if (base_pg_shift < 23) {
+ offset |= ((vsid ^ pteg) & old_hash_mask) << base_pg_shift;
+ }
+
+ hash = vsid ^ (offset >> base_pg_shift);
+ } else if ((pte0 & HPTE64_V_SSIZE) == HPTE64_V_SSIZE_1T) {
+ uint64_t offset, vsid;
+
+ /* We only have 40 - 23 bits of seg_off in avpn */
+ offset = (avpn & 0x1ffff) << 23;
+ vsid = avpn >> 17;
+ if (base_pg_shift < 23) {
+ offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask)
+ << base_pg_shift;
+ }
+
+ hash = vsid ^ (vsid << 25) ^ (offset >> base_pg_shift);
+ } else {
+ error_report("rehash_pte: Bad segment size in HPTE");
+ return H_HARDWARE;
+ }
+
+ new_pteg = hash & new_hash_mask;
+ if (pte0 & HPTE64_V_SECONDARY) {
+ assert(~pteg == (hash & old_hash_mask));
+ new_pteg = ~new_pteg;
+ } else {
+ assert(pteg == (hash & old_hash_mask));
+ }
+ assert((oldsize != newsize) || (pteg == new_pteg));
+ replace_pte0 = new_hpte_load0(new_hpt, new_pteg, slot);
+ /*
+ * Strictly speaking, we don't need all these tests, since we only
+ * ever rehash bolted HPTEs. We might in future handle non-bolted
+ * HPTEs, though so make the logic correct for those cases as
+ * well.
+ */
+ if (replace_pte0 & HPTE64_V_VALID) {
+ assert(newsize < oldsize);
+ if (replace_pte0 & HPTE64_V_BOLTED) {
+ if (pte0 & HPTE64_V_BOLTED) {
+ /* Bolted collision, nothing we can do */
+ return H_PTEG_FULL;
+ } else {
+ /* Discard this hpte */
+ return H_SUCCESS;
+ }
+ }
+ }
+
+ new_hpte_store(new_hpt, new_pteg, slot, pte0, pte1);
+ return H_SUCCESS;
+}
+
+static int rehash_hpt(PowerPCCPU *cpu,
+ void *old_hpt, uint64_t oldsize,
+ void *new_hpt, uint64_t newsize)
+{
+ uint64_t n_ptegs = oldsize >> 7;
+ uint64_t pteg;
+ int slot;
+ int rc;
+
+ for (pteg = 0; pteg < n_ptegs; pteg++) {
+ hwaddr ptex = pteg * HPTES_PER_GROUP;
+ const ppc_hash_pte64_t *hptes
+ = ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP);
+
+ if (!hptes) {
+ return H_HARDWARE;
+ }
+
+ for (slot = 0; slot < HPTES_PER_GROUP; slot++) {
+ rc = rehash_hpte(cpu, hptes, old_hpt, oldsize, new_hpt, newsize,
+ pteg, slot);
+ if (rc != H_SUCCESS) {
+ ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP);
+ return rc;
+ }
+ }
+ ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP);
+ }
+
+ return H_SUCCESS;
+}
+
+target_ulong softmmu_resize_hpt_commit(PowerPCCPU *cpu,
+ SpaprMachineState *spapr,
+ target_ulong flags,
+ target_ulong shift)
+{
+ SpaprPendingHpt *pending = spapr->pending_hpt;
+ int rc;
+ size_t newsize;
+
+ if (flags != 0) {
+ return H_PARAMETER;
+ }
+
+ if (!pending || (pending->shift != shift)) {
+ /* no matching prepare */
+ return H_CLOSED;
+ }
+
+ if (!pending->complete) {
+ /* prepare has not completed */
+ return H_BUSY;
+ }
+
+ /* Shouldn't have got past PREPARE without an HPT */
+ g_assert(spapr->htab_shift);
+
+ newsize = 1ULL << pending->shift;
+ rc = rehash_hpt(cpu, spapr->htab, HTAB_SIZE(spapr),
+ pending->hpt, newsize);
+ if (rc == H_SUCCESS) {
+ qemu_vfree(spapr->htab);
+ spapr->htab = pending->hpt;
+ spapr->htab_shift = pending->shift;
+
+ push_sregs_to_kvm_pr(spapr);
+
+ pending->hpt = NULL; /* so it's not free()d */
+ }
+
+ /* Clean up */
+ spapr->pending_hpt = NULL;
+ free_pending_hpt(pending);
+
+ return rc;
+}
+
+static void hypercall_register_types(void)
+{
+ /* hcall-pft */
+ spapr_register_hypercall(H_ENTER, h_enter);
+ spapr_register_hypercall(H_REMOVE, h_remove);
+ spapr_register_hypercall(H_PROTECT, h_protect);
+ spapr_register_hypercall(H_READ, h_read);
+
+ /* hcall-bulk */
+ spapr_register_hypercall(H_BULK_REMOVE, h_bulk_remove);
+
+}
+
+type_init(hypercall_register_types)
diff --git a/hw/ppc/spapr_tpm_proxy.c b/hw/ppc/spapr_tpm_proxy.c
new file mode 100644
index 000000000..245408674
--- /dev/null
+++ b/hw/ppc/spapr_tpm_proxy.c
@@ -0,0 +1,177 @@
+/*
+ * SPAPR TPM Proxy/Hypercall
+ *
+ * Copyright IBM Corp. 2019
+ *
+ * Authors:
+ * Michael Roth <mdroth@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "sysemu/reset.h"
+#include "hw/ppc/spapr.h"
+#include "hw/qdev-properties.h"
+#include "trace.h"
+
+#define TPM_SPAPR_BUFSIZE 4096
+
+enum {
+ TPM_COMM_OP_EXECUTE = 1,
+ TPM_COMM_OP_CLOSE_SESSION = 2,
+};
+
+static void spapr_tpm_proxy_reset(void *opaque)
+{
+ SpaprTpmProxy *tpm_proxy = SPAPR_TPM_PROXY(opaque);
+
+ if (tpm_proxy->host_fd != -1) {
+ close(tpm_proxy->host_fd);
+ tpm_proxy->host_fd = -1;
+ }
+}
+
+static ssize_t tpm_execute(SpaprTpmProxy *tpm_proxy, target_ulong *args)
+{
+ uint64_t data_in = ppc64_phys_to_real(args[1]);
+ target_ulong data_in_size = args[2];
+ uint64_t data_out = ppc64_phys_to_real(args[3]);
+ target_ulong data_out_size = args[4];
+ uint8_t buf_in[TPM_SPAPR_BUFSIZE];
+ uint8_t buf_out[TPM_SPAPR_BUFSIZE];
+ ssize_t ret;
+
+ trace_spapr_tpm_execute(data_in, data_in_size, data_out, data_out_size);
+
+ if (data_in_size > TPM_SPAPR_BUFSIZE) {
+ error_report("invalid TPM input buffer size: " TARGET_FMT_lu,
+ data_in_size);
+ return H_P3;
+ }
+
+ if (data_out_size < TPM_SPAPR_BUFSIZE) {
+ error_report("invalid TPM output buffer size: " TARGET_FMT_lu,
+ data_out_size);
+ return H_P5;
+ }
+
+ if (tpm_proxy->host_fd == -1) {
+ tpm_proxy->host_fd = open(tpm_proxy->host_path, O_RDWR);
+ if (tpm_proxy->host_fd == -1) {
+ error_report("failed to open TPM device %s: %d",
+ tpm_proxy->host_path, errno);
+ return H_RESOURCE;
+ }
+ }
+
+ cpu_physical_memory_read(data_in, buf_in, data_in_size);
+
+ do {
+ ret = write(tpm_proxy->host_fd, buf_in, data_in_size);
+ if (ret > 0) {
+ data_in_size -= ret;
+ }
+ } while ((ret >= 0 && data_in_size > 0) || (ret == -1 && errno == EINTR));
+
+ if (ret == -1) {
+ error_report("failed to write to TPM device %s: %d",
+ tpm_proxy->host_path, errno);
+ return H_RESOURCE;
+ }
+
+ do {
+ ret = read(tpm_proxy->host_fd, buf_out, data_out_size);
+ } while (ret == 0 || (ret == -1 && errno == EINTR));
+
+ if (ret == -1) {
+ error_report("failed to read from TPM device %s: %d",
+ tpm_proxy->host_path, errno);
+ return H_RESOURCE;
+ }
+
+ cpu_physical_memory_write(data_out, buf_out, ret);
+ args[0] = ret;
+
+ return H_SUCCESS;
+}
+
+static target_ulong h_tpm_comm(PowerPCCPU *cpu,
+ SpaprMachineState *spapr,
+ target_ulong opcode,
+ target_ulong *args)
+{
+ target_ulong op = args[0];
+ SpaprTpmProxy *tpm_proxy = spapr->tpm_proxy;
+
+ if (!tpm_proxy) {
+ error_report("TPM proxy not available");
+ return H_FUNCTION;
+ }
+
+ trace_spapr_h_tpm_comm(tpm_proxy->host_path, op);
+
+ switch (op) {
+ case TPM_COMM_OP_EXECUTE:
+ return tpm_execute(tpm_proxy, args);
+ case TPM_COMM_OP_CLOSE_SESSION:
+ spapr_tpm_proxy_reset(tpm_proxy);
+ return H_SUCCESS;
+ default:
+ return H_PARAMETER;
+ }
+}
+
+static void spapr_tpm_proxy_realize(DeviceState *d, Error **errp)
+{
+ SpaprTpmProxy *tpm_proxy = SPAPR_TPM_PROXY(d);
+
+ if (tpm_proxy->host_path == NULL) {
+ error_setg(errp, "must specify 'host-path' option for device");
+ return;
+ }
+
+ tpm_proxy->host_fd = -1;
+ qemu_register_reset(spapr_tpm_proxy_reset, tpm_proxy);
+}
+
+static void spapr_tpm_proxy_unrealize(DeviceState *d)
+{
+ SpaprTpmProxy *tpm_proxy = SPAPR_TPM_PROXY(d);
+
+ qemu_unregister_reset(spapr_tpm_proxy_reset, tpm_proxy);
+}
+
+static Property spapr_tpm_proxy_properties[] = {
+ DEFINE_PROP_STRING("host-path", SpaprTpmProxy, host_path),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void spapr_tpm_proxy_class_init(ObjectClass *k, void *data)
+{
+ DeviceClass *dk = DEVICE_CLASS(k);
+
+ dk->realize = spapr_tpm_proxy_realize;
+ dk->unrealize = spapr_tpm_proxy_unrealize;
+ dk->user_creatable = true;
+ device_class_set_props(dk, spapr_tpm_proxy_properties);
+}
+
+static const TypeInfo spapr_tpm_proxy_info = {
+ .name = TYPE_SPAPR_TPM_PROXY,
+ .parent = TYPE_DEVICE,
+ .instance_size = sizeof(SpaprTpmProxy),
+ .class_init = spapr_tpm_proxy_class_init,
+};
+
+static void spapr_tpm_proxy_register_types(void)
+{
+ type_register_static(&spapr_tpm_proxy_info);
+ spapr_register_hypercall(SVM_H_TPM_COMM, h_tpm_comm);
+}
+
+type_init(spapr_tpm_proxy_register_types)
diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
new file mode 100644
index 000000000..b975ed29c
--- /dev/null
+++ b/hw/ppc/spapr_vio.c
@@ -0,0 +1,741 @@
+/*
+ * QEMU sPAPR VIO code
+ *
+ * Copyright (c) 2010 David Gibson, IBM Corporation <dwg@au1.ibm.com>
+ * Based on the s390 virtio bus code:
+ * Copyright (c) 2009 Alexander Graf <agraf@suse.de>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "qapi/visitor.h"
+#include "qemu/log.h"
+#include "hw/loader.h"
+#include "elf.h"
+#include "hw/sysbus.h"
+#include "sysemu/kvm.h"
+#include "sysemu/device_tree.h"
+#include "kvm_ppc.h"
+#include "migration/vmstate.h"
+
+#include "hw/ppc/spapr.h"
+#include "hw/ppc/spapr_vio.h"
+#include "hw/ppc/fdt.h"
+#include "trace.h"
+
+#include <libfdt.h>
+
+#define SPAPR_VIO_REG_BASE 0x71000000
+
+static char *spapr_vio_get_dev_name(DeviceState *qdev)
+{
+ SpaprVioDevice *dev = VIO_SPAPR_DEVICE(qdev);
+ SpaprVioDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev);
+
+ /* Device tree style name device@reg */
+ return g_strdup_printf("%s@%x", pc->dt_name, dev->reg);
+}
+
+static void spapr_vio_bus_class_init(ObjectClass *klass, void *data)
+{
+ BusClass *k = BUS_CLASS(klass);
+
+ k->get_dev_path = spapr_vio_get_dev_name;
+ k->get_fw_dev_path = spapr_vio_get_dev_name;
+}
+
+static const TypeInfo spapr_vio_bus_info = {
+ .name = TYPE_SPAPR_VIO_BUS,
+ .parent = TYPE_BUS,
+ .class_init = spapr_vio_bus_class_init,
+ .instance_size = sizeof(SpaprVioBus),
+};
+
+SpaprVioDevice *spapr_vio_find_by_reg(SpaprVioBus *bus, uint32_t reg)
+{
+ BusChild *kid;
+ SpaprVioDevice *dev = NULL;
+
+ QTAILQ_FOREACH(kid, &bus->bus.children, sibling) {
+ dev = (SpaprVioDevice *)kid->child;
+ if (dev->reg == reg) {
+ return dev;
+ }
+ }
+
+ return NULL;
+}
+
+static int vio_make_devnode(SpaprVioDevice *dev,
+ void *fdt)
+{
+ SpaprVioDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev);
+ int vdevice_off, node_off, ret;
+ char *dt_name;
+ const char *dt_compatible;
+
+ vdevice_off = fdt_path_offset(fdt, "/vdevice");
+ if (vdevice_off < 0) {
+ return vdevice_off;
+ }
+
+ dt_name = spapr_vio_get_dev_name(DEVICE(dev));
+ node_off = fdt_add_subnode(fdt, vdevice_off, dt_name);
+ g_free(dt_name);
+ if (node_off < 0) {
+ return node_off;
+ }
+
+ ret = fdt_setprop_cell(fdt, node_off, "reg", dev->reg);
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (pc->dt_type) {
+ ret = fdt_setprop_string(fdt, node_off, "device_type",
+ pc->dt_type);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ if (pc->get_dt_compatible) {
+ dt_compatible = pc->get_dt_compatible(dev);
+ } else {
+ dt_compatible = pc->dt_compatible;
+ }
+
+ if (dt_compatible) {
+ ret = fdt_setprop_string(fdt, node_off, "compatible",
+ dt_compatible);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ if (dev->irq) {
+ uint32_t ints_prop[2];
+
+ spapr_dt_irq(ints_prop, dev->irq, false);
+ ret = fdt_setprop(fdt, node_off, "interrupts", ints_prop,
+ sizeof(ints_prop));
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ ret = spapr_tcet_dma_dt(fdt, node_off, "ibm,my-dma-window", dev->tcet);
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (pc->devnode) {
+ ret = (pc->devnode)(dev, fdt, node_off);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ return node_off;
+}
+
+/*
+ * CRQ handling
+ */
+static target_ulong h_reg_crq(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ target_ulong opcode, target_ulong *args)
+{
+ target_ulong reg = args[0];
+ target_ulong queue_addr = args[1];
+ target_ulong queue_len = args[2];
+ SpaprVioDevice *dev = spapr_vio_find_by_reg(spapr->vio_bus, reg);
+
+ if (!dev) {
+ hcall_dprintf("Unit 0x" TARGET_FMT_lx " does not exist\n", reg);
+ return H_PARAMETER;
+ }
+
+ /* We can't grok a queue size bigger than 256M for now */
+ if (queue_len < 0x1000 || queue_len > 0x10000000) {
+ hcall_dprintf("Queue size too small or too big (0x" TARGET_FMT_lx
+ ")\n", queue_len);
+ return H_PARAMETER;
+ }
+
+ /* Check queue alignment */
+ if (queue_addr & 0xfff) {
+ hcall_dprintf("Queue not aligned (0x" TARGET_FMT_lx ")\n", queue_addr);
+ return H_PARAMETER;
+ }
+
+ /* Check if device supports CRQs */
+ if (!dev->crq.SendFunc) {
+ hcall_dprintf("Device does not support CRQ\n");
+ return H_NOT_FOUND;
+ }
+
+ /* Already a queue ? */
+ if (dev->crq.qsize) {
+ hcall_dprintf("CRQ already registered\n");
+ return H_RESOURCE;
+ }
+ dev->crq.qladdr = queue_addr;
+ dev->crq.qsize = queue_len;
+ dev->crq.qnext = 0;
+
+ trace_spapr_vio_h_reg_crq(reg, queue_addr, queue_len);
+ return H_SUCCESS;
+}
+
+static target_ulong free_crq(SpaprVioDevice *dev)
+{
+ dev->crq.qladdr = 0;
+ dev->crq.qsize = 0;
+ dev->crq.qnext = 0;
+
+ trace_spapr_vio_free_crq(dev->reg);
+
+ return H_SUCCESS;
+}
+
+static target_ulong h_free_crq(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ target_ulong opcode, target_ulong *args)
+{
+ target_ulong reg = args[0];
+ SpaprVioDevice *dev = spapr_vio_find_by_reg(spapr->vio_bus, reg);
+
+ if (!dev) {
+ hcall_dprintf("Unit 0x" TARGET_FMT_lx " does not exist\n", reg);
+ return H_PARAMETER;
+ }
+
+ return free_crq(dev);
+}
+
+static target_ulong h_send_crq(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ target_ulong opcode, target_ulong *args)
+{
+ target_ulong reg = args[0];
+ target_ulong msg_hi = args[1];
+ target_ulong msg_lo = args[2];
+ SpaprVioDevice *dev = spapr_vio_find_by_reg(spapr->vio_bus, reg);
+ uint64_t crq_mangle[2];
+
+ if (!dev) {
+ hcall_dprintf("Unit 0x" TARGET_FMT_lx " does not exist\n", reg);
+ return H_PARAMETER;
+ }
+ crq_mangle[0] = cpu_to_be64(msg_hi);
+ crq_mangle[1] = cpu_to_be64(msg_lo);
+
+ if (dev->crq.SendFunc) {
+ return dev->crq.SendFunc(dev, (uint8_t *)crq_mangle);
+ }
+
+ return H_HARDWARE;
+}
+
+static target_ulong h_enable_crq(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ target_ulong opcode, target_ulong *args)
+{
+ target_ulong reg = args[0];
+ SpaprVioDevice *dev = spapr_vio_find_by_reg(spapr->vio_bus, reg);
+
+ if (!dev) {
+ hcall_dprintf("Unit 0x" TARGET_FMT_lx " does not exist\n", reg);
+ return H_PARAMETER;
+ }
+
+ return 0;
+}
+
+/* Returns negative error, 0 success, or positive: queue full */
+int spapr_vio_send_crq(SpaprVioDevice *dev, uint8_t *crq)
+{
+ int rc;
+ uint8_t byte;
+
+ if (!dev->crq.qsize) {
+ error_report("spapr_vio_send_creq on uninitialized queue");
+ return -1;
+ }
+
+ /* Maybe do a fast path for KVM just writing to the pages */
+ rc = spapr_vio_dma_read(dev, dev->crq.qladdr + dev->crq.qnext, &byte, 1);
+ if (rc) {
+ return rc;
+ }
+ if (byte != 0) {
+ return 1;
+ }
+
+ rc = spapr_vio_dma_write(dev, dev->crq.qladdr + dev->crq.qnext + 8,
+ &crq[8], 8);
+ if (rc) {
+ return rc;
+ }
+
+ kvmppc_eieio();
+
+ rc = spapr_vio_dma_write(dev, dev->crq.qladdr + dev->crq.qnext, crq, 8);
+ if (rc) {
+ return rc;
+ }
+
+ dev->crq.qnext = (dev->crq.qnext + 16) % dev->crq.qsize;
+
+ if (dev->signal_state & 1) {
+ spapr_vio_irq_pulse(dev);
+ }
+
+ return 0;
+}
+
+/* "quiesce" handling */
+
+static void spapr_vio_quiesce_one(SpaprVioDevice *dev)
+{
+ if (dev->tcet) {
+ device_cold_reset(DEVICE(dev->tcet));
+ }
+ free_crq(dev);
+}
+
+void spapr_vio_set_bypass(SpaprVioDevice *dev, bool bypass)
+{
+ if (!dev->tcet) {
+ return;
+ }
+
+ memory_region_set_enabled(&dev->mrbypass, bypass);
+ memory_region_set_enabled(spapr_tce_get_iommu(dev->tcet), !bypass);
+
+ dev->tcet->bypass = bypass;
+}
+
+static void rtas_set_tce_bypass(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ uint32_t token,
+ uint32_t nargs, target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ SpaprVioBus *bus = spapr->vio_bus;
+ SpaprVioDevice *dev;
+ uint32_t unit, enable;
+
+ if (nargs != 2) {
+ rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+ return;
+ }
+ unit = rtas_ld(args, 0);
+ enable = rtas_ld(args, 1);
+ dev = spapr_vio_find_by_reg(bus, unit);
+ if (!dev) {
+ rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+ return;
+ }
+
+ if (!dev->tcet) {
+ rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+ return;
+ }
+
+ spapr_vio_set_bypass(dev, !!enable);
+
+ rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+}
+
+static void rtas_quiesce(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ uint32_t token,
+ uint32_t nargs, target_ulong args,
+ uint32_t nret, target_ulong rets)
+{
+ SpaprVioBus *bus = spapr->vio_bus;
+ BusChild *kid;
+ SpaprVioDevice *dev = NULL;
+
+ if (nargs != 0) {
+ rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+ return;
+ }
+
+ QTAILQ_FOREACH(kid, &bus->bus.children, sibling) {
+ dev = (SpaprVioDevice *)kid->child;
+ spapr_vio_quiesce_one(dev);
+ }
+
+ rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+}
+
+static SpaprVioDevice *reg_conflict(SpaprVioDevice *dev)
+{
+ SpaprVioBus *bus = SPAPR_VIO_BUS(dev->qdev.parent_bus);
+ BusChild *kid;
+ SpaprVioDevice *other;
+
+ /*
+ * Check for a device other than the given one which is already
+ * using the requested address. We have to open code this because
+ * the given dev might already be in the list.
+ */
+ QTAILQ_FOREACH(kid, &bus->bus.children, sibling) {
+ other = VIO_SPAPR_DEVICE(kid->child);
+
+ if (other != dev && other->reg == dev->reg) {
+ return other;
+ }
+ }
+
+ return 0;
+}
+
+static void spapr_vio_busdev_reset(DeviceState *qdev)
+{
+ SpaprVioDevice *dev = VIO_SPAPR_DEVICE(qdev);
+ SpaprVioDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev);
+
+ /* Shut down the request queue and TCEs if necessary */
+ spapr_vio_quiesce_one(dev);
+
+ dev->signal_state = 0;
+
+ spapr_vio_set_bypass(dev, false);
+ if (pc->reset) {
+ pc->reset(dev);
+ }
+}
+
+/*
+ * The register property of a VIO device is defined in libvirt using
+ * 0x1000 as a base register number plus a 0x1000 increment. For the
+ * VIO tty device, the base number is changed to 0x30000000. QEMU uses
+ * a base register number of 0x71000000 and then a simple increment.
+ *
+ * The formula below tries to compute a unique index number from the
+ * register value that will be used to define the IRQ number of the
+ * VIO device.
+ *
+ * A maximum of 256 VIO devices is covered. Collisions are possible
+ * but they will be detected when the IRQ is claimed.
+ */
+static inline uint32_t spapr_vio_reg_to_irq(uint32_t reg)
+{
+ uint32_t irq;
+
+ if (reg >= SPAPR_VIO_REG_BASE) {
+ /*
+ * VIO device register values when allocated by QEMU. For
+ * these, we simply mask the high bits to fit the overall
+ * range: [0x00 - 0xff].
+ *
+ * The nvram VIO device (reg=0x71000000) is a static device of
+ * the pseries machine and so is always allocated by QEMU. Its
+ * IRQ number is 0x0.
+ */
+ irq = reg & 0xff;
+
+ } else if (reg >= 0x30000000) {
+ /*
+ * VIO tty devices register values, when allocated by libvirt,
+ * are mapped in range [0xf0 - 0xff], gives us a maximum of 16
+ * vtys.
+ */
+ irq = 0xf0 | ((reg >> 12) & 0xf);
+
+ } else {
+ /*
+ * Other VIO devices register values, when allocated by
+ * libvirt, should be mapped in range [0x00 - 0xef]. Conflicts
+ * will be detected when IRQ is claimed.
+ */
+ irq = (reg >> 12) & 0xff;
+ }
+
+ return SPAPR_IRQ_VIO | irq;
+}
+
+static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+ SpaprVioDevice *dev = (SpaprVioDevice *)qdev;
+ SpaprVioDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev);
+ char *id;
+
+ if (dev->reg != -1) {
+ /*
+ * Explicitly assigned address, just verify that no-one else
+ * is using it. other mechanism). We have to open code this
+ * rather than using spapr_vio_find_by_reg() because sdev
+ * itself is already in the list.
+ */
+ SpaprVioDevice *other = reg_conflict(dev);
+
+ if (other) {
+ error_setg(errp, "%s and %s devices conflict at address %#x",
+ object_get_typename(OBJECT(qdev)),
+ object_get_typename(OBJECT(&other->qdev)),
+ dev->reg);
+ return;
+ }
+ } else {
+ /* Need to assign an address */
+ SpaprVioBus *bus = SPAPR_VIO_BUS(dev->qdev.parent_bus);
+
+ do {
+ dev->reg = bus->next_reg++;
+ } while (reg_conflict(dev));
+ }
+
+ /* Don't overwrite ids assigned on the command line */
+ if (!dev->qdev.id) {
+ id = spapr_vio_get_dev_name(DEVICE(dev));
+ dev->qdev.id = id;
+ }
+
+ dev->irq = spapr_vio_reg_to_irq(dev->reg);
+
+ if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
+ int irq = spapr_irq_findone(spapr, errp);
+
+ if (irq < 0) {
+ return;
+ }
+ dev->irq = irq;
+ }
+
+ if (spapr_irq_claim(spapr, dev->irq, false, errp) < 0) {
+ return;
+ }
+
+ if (pc->rtce_window_size) {
+ uint32_t liobn = SPAPR_VIO_LIOBN(dev->reg);
+
+ memory_region_init(&dev->mrroot, OBJECT(dev), "iommu-spapr-root",
+ MACHINE(spapr)->ram_size);
+ memory_region_init_alias(&dev->mrbypass, OBJECT(dev),
+ "iommu-spapr-bypass", get_system_memory(),
+ 0, MACHINE(spapr)->ram_size);
+ memory_region_add_subregion_overlap(&dev->mrroot, 0, &dev->mrbypass, 1);
+ address_space_init(&dev->as, &dev->mrroot, qdev->id);
+
+ dev->tcet = spapr_tce_new_table(qdev, liobn);
+ spapr_tce_table_enable(dev->tcet, SPAPR_TCE_PAGE_SHIFT, 0,
+ pc->rtce_window_size >> SPAPR_TCE_PAGE_SHIFT);
+ dev->tcet->vdev = dev;
+ memory_region_add_subregion_overlap(&dev->mrroot, 0,
+ spapr_tce_get_iommu(dev->tcet), 2);
+ }
+
+ pc->realize(dev, errp);
+}
+
+static target_ulong h_vio_signal(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ target_ulong opcode,
+ target_ulong *args)
+{
+ target_ulong reg = args[0];
+ target_ulong mode = args[1];
+ SpaprVioDevice *dev = spapr_vio_find_by_reg(spapr->vio_bus, reg);
+ SpaprVioDeviceClass *pc;
+
+ if (!dev) {
+ return H_PARAMETER;
+ }
+
+ pc = VIO_SPAPR_DEVICE_GET_CLASS(dev);
+
+ if (mode & ~pc->signal_mask) {
+ return H_PARAMETER;
+ }
+
+ dev->signal_state = mode;
+
+ return H_SUCCESS;
+}
+
+SpaprVioBus *spapr_vio_bus_init(void)
+{
+ SpaprVioBus *bus;
+ BusState *qbus;
+ DeviceState *dev;
+
+ /* Create bridge device */
+ dev = qdev_new(TYPE_SPAPR_VIO_BRIDGE);
+ sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+
+ /* Create bus on bridge device */
+ qbus = qbus_new(TYPE_SPAPR_VIO_BUS, dev, "spapr-vio");
+ bus = SPAPR_VIO_BUS(qbus);
+ bus->next_reg = SPAPR_VIO_REG_BASE;
+
+ /* hcall-vio */
+ spapr_register_hypercall(H_VIO_SIGNAL, h_vio_signal);
+
+ /* hcall-crq */
+ spapr_register_hypercall(H_REG_CRQ, h_reg_crq);
+ spapr_register_hypercall(H_FREE_CRQ, h_free_crq);
+ spapr_register_hypercall(H_SEND_CRQ, h_send_crq);
+ spapr_register_hypercall(H_ENABLE_CRQ, h_enable_crq);
+
+ /* RTAS calls */
+ spapr_rtas_register(RTAS_IBM_SET_TCE_BYPASS, "ibm,set-tce-bypass",
+ rtas_set_tce_bypass);
+ spapr_rtas_register(RTAS_QUIESCE, "quiesce", rtas_quiesce);
+
+ return bus;
+}
+
+static void spapr_vio_bridge_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ dc->fw_name = "vdevice";
+}
+
+static const TypeInfo spapr_vio_bridge_info = {
+ .name = TYPE_SPAPR_VIO_BRIDGE,
+ .parent = TYPE_SYS_BUS_DEVICE,
+ .class_init = spapr_vio_bridge_class_init,
+};
+
+const VMStateDescription vmstate_spapr_vio = {
+ .name = "spapr_vio",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ /* Sanity check */
+ VMSTATE_UINT32_EQUAL(reg, SpaprVioDevice, NULL),
+ VMSTATE_UINT32_EQUAL(irq, SpaprVioDevice, NULL),
+
+ /* General VIO device state */
+ VMSTATE_UINT64(signal_state, SpaprVioDevice),
+ VMSTATE_UINT64(crq.qladdr, SpaprVioDevice),
+ VMSTATE_UINT32(crq.qsize, SpaprVioDevice),
+ VMSTATE_UINT32(crq.qnext, SpaprVioDevice),
+
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+static void vio_spapr_device_class_init(ObjectClass *klass, void *data)
+{
+ DeviceClass *k = DEVICE_CLASS(klass);
+ k->realize = spapr_vio_busdev_realize;
+ k->reset = spapr_vio_busdev_reset;
+ k->bus_type = TYPE_SPAPR_VIO_BUS;
+}
+
+static const TypeInfo spapr_vio_type_info = {
+ .name = TYPE_VIO_SPAPR_DEVICE,
+ .parent = TYPE_DEVICE,
+ .instance_size = sizeof(SpaprVioDevice),
+ .abstract = true,
+ .class_size = sizeof(SpaprVioDeviceClass),
+ .class_init = vio_spapr_device_class_init,
+};
+
+static void spapr_vio_register_types(void)
+{
+ type_register_static(&spapr_vio_bus_info);
+ type_register_static(&spapr_vio_bridge_info);
+ type_register_static(&spapr_vio_type_info);
+}
+
+type_init(spapr_vio_register_types)
+
+static int compare_reg(const void *p1, const void *p2)
+{
+ SpaprVioDevice const *dev1, *dev2;
+
+ dev1 = (SpaprVioDevice *)*(DeviceState **)p1;
+ dev2 = (SpaprVioDevice *)*(DeviceState **)p2;
+
+ if (dev1->reg < dev2->reg) {
+ return -1;
+ }
+ if (dev1->reg == dev2->reg) {
+ return 0;
+ }
+
+ /* dev1->reg > dev2->reg */
+ return 1;
+}
+
+void spapr_dt_vdevice(SpaprVioBus *bus, void *fdt)
+{
+ DeviceState *qdev, **qdevs;
+ BusChild *kid;
+ int i, num, ret = 0;
+ int node;
+
+ _FDT(node = fdt_add_subnode(fdt, 0, "vdevice"));
+
+ _FDT(fdt_setprop_string(fdt, node, "device_type", "vdevice"));
+ _FDT(fdt_setprop_string(fdt, node, "compatible", "IBM,vdevice"));
+ _FDT(fdt_setprop_cell(fdt, node, "#address-cells", 1));
+ _FDT(fdt_setprop_cell(fdt, node, "#size-cells", 0));
+ _FDT(fdt_setprop_cell(fdt, node, "#interrupt-cells", 2));
+ _FDT(fdt_setprop(fdt, node, "interrupt-controller", NULL, 0));
+
+ /* Count qdevs on the bus list */
+ num = 0;
+ QTAILQ_FOREACH(kid, &bus->bus.children, sibling) {
+ num++;
+ }
+
+ /* Copy out into an array of pointers */
+ qdevs = g_new(DeviceState *, num);
+ num = 0;
+ QTAILQ_FOREACH(kid, &bus->bus.children, sibling) {
+ qdevs[num++] = kid->child;
+ }
+
+ /* Sort the array */
+ qsort(qdevs, num, sizeof(qdev), compare_reg);
+
+ /* Hack alert. Give the devices to libfdt in reverse order, we happen
+ * to know that will mean they are in forward order in the tree. */
+ for (i = num - 1; i >= 0; i--) {
+ SpaprVioDevice *dev = (SpaprVioDevice *)(qdevs[i]);
+ SpaprVioDeviceClass *vdc = VIO_SPAPR_DEVICE_GET_CLASS(dev);
+
+ ret = vio_make_devnode(dev, fdt);
+ if (ret < 0) {
+ error_report("Couldn't create device node /vdevice/%s@%"PRIx32,
+ vdc->dt_name, dev->reg);
+ exit(1);
+ }
+ }
+
+ g_free(qdevs);
+}
+
+gchar *spapr_vio_stdout_path(SpaprVioBus *bus)
+{
+ SpaprVioDevice *dev;
+ char *name, *path;
+
+ dev = spapr_vty_get_default(bus);
+ if (!dev) {
+ return NULL;
+ }
+
+ name = spapr_vio_get_dev_name(DEVICE(dev));
+ path = g_strdup_printf("/vdevice/%s", name);
+
+ g_free(name);
+ return path;
+}
diff --git a/hw/ppc/spapr_vof.c b/hw/ppc/spapr_vof.c
new file mode 100644
index 000000000..40ce8fe00
--- /dev/null
+++ b/hw/ppc/spapr_vof.c
@@ -0,0 +1,167 @@
+/*
+ * SPAPR machine hooks to Virtual Open Firmware,
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qapi/error.h"
+#include "hw/ppc/spapr.h"
+#include "hw/ppc/spapr_vio.h"
+#include "hw/ppc/spapr_cpu_core.h"
+#include "hw/ppc/fdt.h"
+#include "hw/ppc/vof.h"
+#include "sysemu/sysemu.h"
+#include "qom/qom-qobject.h"
+#include "trace.h"
+
+target_ulong spapr_h_vof_client(PowerPCCPU *cpu, SpaprMachineState *spapr,
+ target_ulong opcode, target_ulong *_args)
+{
+ int ret = vof_client_call(MACHINE(spapr), spapr->vof, spapr->fdt_blob,
+ ppc64_phys_to_real(_args[0]));
+
+ if (ret) {
+ return H_PARAMETER;
+ }
+ return H_SUCCESS;
+}
+
+void spapr_vof_client_dt_finalize(SpaprMachineState *spapr, void *fdt)
+{
+ char *stdout_path = spapr_vio_stdout_path(spapr->vio_bus);
+
+ vof_build_dt(fdt, spapr->vof);
+
+ if (spapr->vof->bootargs) {
+ int chosen;
+
+ _FDT(chosen = fdt_path_offset(fdt, "/chosen"));
+ /*
+ * If the client did not change "bootargs", spapr_dt_chosen() must have
+ * stored machine->kernel_cmdline in it before getting here.
+ */
+ _FDT(fdt_setprop_string(fdt, chosen, "bootargs", spapr->vof->bootargs));
+ }
+
+ /*
+ * SLOF-less setup requires an open instance of stdout for early
+ * kernel printk. By now all phandles are settled so we can open
+ * the default serial console.
+ */
+ if (stdout_path) {
+ _FDT(vof_client_open_store(fdt, spapr->vof, "/chosen", "stdout",
+ stdout_path));
+ }
+}
+
+void spapr_vof_reset(SpaprMachineState *spapr, void *fdt, Error **errp)
+{
+ target_ulong stack_ptr;
+ Vof *vof = spapr->vof;
+ PowerPCCPU *first_ppc_cpu = POWERPC_CPU(first_cpu);
+
+ vof_init(vof, spapr->rma_size, errp);
+
+ stack_ptr = vof_claim(vof, 0, VOF_STACK_SIZE, VOF_STACK_SIZE);
+ if (stack_ptr == -1) {
+ error_setg(errp, "Memory allocation for stack failed");
+ return;
+ }
+ /* Stack grows downwards plus reserve space for the minimum stack frame */
+ stack_ptr += VOF_STACK_SIZE - 0x20;
+
+ if (spapr->kernel_size &&
+ vof_claim(vof, spapr->kernel_addr, spapr->kernel_size, 0) == -1) {
+ error_setg(errp, "Memory for kernel is in use");
+ return;
+ }
+
+ if (spapr->initrd_size &&
+ vof_claim(vof, spapr->initrd_base, spapr->initrd_size, 0) == -1) {
+ error_setg(errp, "Memory for initramdisk is in use");
+ return;
+ }
+
+ spapr_vof_client_dt_finalize(spapr, fdt);
+
+ spapr_cpu_set_entry_state(first_ppc_cpu, SPAPR_ENTRY_POINT,
+ stack_ptr, spapr->initrd_base,
+ spapr->initrd_size);
+ /* VOF is 32bit BE so enforce MSR here */
+ first_ppc_cpu->env.msr &= ~((1ULL << MSR_SF) | (1ULL << MSR_LE));
+
+ /*
+ * At this point the expected allocation map is:
+ *
+ * 0..c38 - the initial firmware
+ * 8000..10000 - stack
+ * 400000.. - kernel
+ * 3ea0000.. - initramdisk
+ *
+ * We skip writing FDT as nothing expects it; OF client interface is
+ * going to be used for reading the device tree.
+ */
+}
+
+void spapr_vof_quiesce(MachineState *ms)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(ms);
+
+ spapr->fdt_size = fdt_totalsize(spapr->fdt_blob);
+ spapr->fdt_initial_size = spapr->fdt_size;
+}
+
+bool spapr_vof_setprop(MachineState *ms, const char *path, const char *propname,
+ void *val, int vallen)
+{
+ SpaprMachineState *spapr = SPAPR_MACHINE(ms);
+
+ /*
+ * We only allow changing properties which we know how to update in QEMU
+ * OR
+ * the ones which we know that they need to survive during "quiesce".
+ */
+
+ if (strcmp(path, "/rtas") == 0) {
+ if (strcmp(propname, "linux,rtas-base") == 0 ||
+ strcmp(propname, "linux,rtas-entry") == 0) {
+ /* These need to survive quiesce so let them store in the FDT */
+ return true;
+ }
+ }
+
+ if (strcmp(path, "/chosen") == 0) {
+ if (strcmp(propname, "bootargs") == 0) {
+ Vof *vof = spapr->vof;
+
+ g_free(vof->bootargs);
+ vof->bootargs = g_strndup(val, vallen);
+ return true;
+ }
+ if (strcmp(propname, "linux,initrd-start") == 0) {
+ if (vallen == sizeof(uint32_t)) {
+ spapr->initrd_base = ldl_be_p(val);
+ return true;
+ }
+ if (vallen == sizeof(uint64_t)) {
+ spapr->initrd_base = ldq_be_p(val);
+ return true;
+ }
+ return false;
+ }
+ if (strcmp(propname, "linux,initrd-end") == 0) {
+ if (vallen == sizeof(uint32_t)) {
+ spapr->initrd_size = ldl_be_p(val) - spapr->initrd_base;
+ return true;
+ }
+ if (vallen == sizeof(uint64_t)) {
+ spapr->initrd_size = ldq_be_p(val) - spapr->initrd_base;
+ return true;
+ }
+ return false;
+ }
+ }
+
+ return true;
+}
diff --git a/hw/ppc/trace-events b/hw/ppc/trace-events
new file mode 100644
index 000000000..3bf43fa34
--- /dev/null
+++ b/hw/ppc/trace-events
@@ -0,0 +1,143 @@
+# See docs/devel/tracing.rst for syntax documentation.
+
+# spapr_pci.c
+spapr_pci_msi(const char *msg, uint32_t ca) "%s (cfg=0x%x)"
+spapr_pci_msi_setup(const char *name, unsigned vector, uint64_t addr) "dev\"%s\" vector %u, addr=0x%"PRIx64
+spapr_pci_rtas_ibm_change_msi(unsigned cfg, unsigned func, unsigned req, unsigned first) "cfgaddr 0x%x func %u, requested %u, first irq %u"
+spapr_pci_rtas_ibm_query_interrupt_source_number(unsigned ioa, unsigned intr) "queries for #%u, IRQ%u"
+spapr_pci_msi_write(uint64_t addr, uint64_t data, uint32_t dt_irq) "@0x%"PRIx64"<=0x%"PRIx64" IRQ %u"
+spapr_pci_lsi_set(const char *busname, int pin, uint32_t irq) "%s PIN%d IRQ %u"
+spapr_pci_msi_retry(unsigned config_addr, unsigned req_num, unsigned max_irqs) "Guest device at 0x%x asked %u, have only %u"
+
+# spapr_hcall.c
+spapr_cas_continue(unsigned long n) "Copy changes to the guest: %ld bytes"
+spapr_cas_pvr(uint32_t cur_pvr, bool explicit_match, uint32_t new_pvr) "current=0x%x, explicit_match=%u, new=0x%x"
+spapr_h_resize_hpt_prepare(uint64_t flags, uint64_t shift) "flags=0x%"PRIx64", shift=%"PRIu64
+spapr_h_resize_hpt_commit(uint64_t flags, uint64_t shift) "flags=0x%"PRIx64", shift=%"PRIu64
+spapr_update_dt(unsigned cb) "New blob %u bytes"
+spapr_update_dt_failed_size(unsigned cbold, unsigned cbnew, unsigned magic) "Old blob %u bytes, new blob %u bytes, magic 0x%x"
+spapr_update_dt_failed_check(unsigned cbold, unsigned cbnew, unsigned magic) "Old blob %u bytes, new blob %u bytes, magic 0x%x"
+
+# spapr_tpm_proxy.c
+spapr_h_tpm_comm(const char *device_path, uint64_t operation) "tpm_device_path=%s operation=0x%"PRIx64
+spapr_tpm_execute(uint64_t data_in, uint64_t data_in_sz, uint64_t data_out, uint64_t data_out_sz) "data_in=0x%"PRIx64", data_in_sz=%"PRIu64", data_out=0x%"PRIx64", data_out_sz=%"PRIu64
+
+# spapr_iommu.c
+spapr_iommu_put(uint64_t liobn, uint64_t ioba, uint64_t tce, uint64_t ret) "liobn=0x%"PRIx64" ioba=0x%"PRIx64" tce=0x%"PRIx64" ret=%"PRId64
+spapr_iommu_get(uint64_t liobn, uint64_t ioba, uint64_t ret, uint64_t tce) "liobn=0x%"PRIx64" ioba=0x%"PRIx64" ret=%"PRId64" tce=0x%"PRIx64
+spapr_iommu_indirect(uint64_t liobn, uint64_t ioba, uint64_t tce, uint64_t iobaN, uint64_t tceN, uint64_t ret) "liobn=0x%"PRIx64" ioba=0x%"PRIx64" tcelist=0x%"PRIx64" iobaN=0x%"PRIx64" tceN=0x%"PRIx64" ret=%"PRId64
+spapr_iommu_stuff(uint64_t liobn, uint64_t ioba, uint64_t tce_value, uint64_t npages, uint64_t ret) "liobn=0x%"PRIx64" ioba=0x%"PRIx64" tcevalue=0x%"PRIx64" npages=%"PRId64" ret=%"PRId64
+spapr_iommu_pci_put(uint64_t liobn, uint64_t ioba, uint64_t tce, uint64_t ret) "liobn=0x%"PRIx64" ioba=0x%"PRIx64" tce=0x%"PRIx64" ret=%"PRId64
+spapr_iommu_pci_get(uint64_t liobn, uint64_t ioba, uint64_t ret, uint64_t tce) "liobn=0x%"PRIx64" ioba=0x%"PRIx64" ret=%"PRId64" tce=0x%"PRIx64
+spapr_iommu_pci_indirect(uint64_t liobn, uint64_t ioba, uint64_t tce, uint64_t iobaN, uint64_t tceN, uint64_t ret) "liobn=0x%"PRIx64" ioba=0x%"PRIx64" tcelist=0x%"PRIx64" iobaN=0x%"PRIx64" tceN=0x%"PRIx64" ret=%"PRId64
+spapr_iommu_pci_stuff(uint64_t liobn, uint64_t ioba, uint64_t tce_value, uint64_t npages, uint64_t ret) "liobn=0x%"PRIx64" ioba=0x%"PRIx64" tcevalue=0x%"PRIx64" npages=%"PRId64" ret=%"PRId64
+spapr_iommu_xlate(uint64_t liobn, uint64_t ioba, uint64_t tce, unsigned perm, unsigned pgsize) "liobn=0x%"PRIx64" 0x%"PRIx64" -> 0x%"PRIx64" perm=%u mask=0x%x"
+spapr_iommu_new_table(uint64_t liobn, void *table, int fd) "liobn=0x%"PRIx64" table=%p fd=%d"
+spapr_iommu_pre_save(uint64_t liobn, uint32_t nb, uint64_t offs, uint32_t ps) "liobn=%"PRIx64" %"PRIx32" bus_offset=0x%"PRIx64" ps=%"PRIu32
+spapr_iommu_post_load(uint64_t liobn, uint32_t pre_nb, uint32_t post_nb, uint64_t offs, uint32_t ps) "liobn=%"PRIx64" %"PRIx32" => 0x%"PRIx32" bus_offset=0x%"PRIx64" ps=%"PRIu32
+
+# spapr_rtas_ddw.c
+spapr_iommu_ddw_query(uint64_t buid, uint32_t cfgaddr, unsigned wa, uint64_t win_size, uint32_t pgmask) "buid=0x%"PRIx64" addr=0x%"PRIx32", %u windows available, max window size=0x%"PRIx64", mask=0x%"PRIx32
+spapr_iommu_ddw_create(uint64_t buid, uint32_t cfgaddr, uint64_t pg_size, uint64_t req_size, uint64_t start, uint32_t liobn) "buid=0x%"PRIx64" addr=0x%"PRIx32", page size=0x%"PRIx64", requested=0x%"PRIx64", start addr=0x%"PRIx64", liobn=0x%"PRIx32
+spapr_iommu_ddw_remove(uint32_t liobn) "liobn=0x%"PRIx32
+spapr_iommu_ddw_reset(uint64_t buid, uint32_t cfgaddr) "buid=0x%"PRIx64" addr=0x%"PRIx32
+
+# spapr_drc.c
+spapr_drc_set_isolation_state(uint32_t index, int state) "drc: 0x%"PRIx32", state: 0x%"PRIx32
+spapr_drc_set_isolation_state_finalizing(uint32_t index) "drc: 0x%"PRIx32
+spapr_drc_set_dr_indicator(uint32_t index, int state) "drc: 0x%"PRIx32", state: 0x%x"
+spapr_drc_set_allocation_state(uint32_t index, int state) "drc: 0x%"PRIx32", state: 0x%x"
+spapr_drc_set_allocation_state_finalizing(uint32_t index) "drc: 0x%"PRIx32
+spapr_drc_set_configured(uint32_t index) "drc: 0x%"PRIx32
+spapr_drc_attach(uint32_t index) "drc: 0x%"PRIx32
+spapr_drc_unplug_request(uint32_t index) "drc: 0x%"PRIx32
+spapr_drc_awaiting_quiesce(uint32_t index) "drc: 0x%"PRIx32
+spapr_drc_reset(uint32_t index) "drc: 0x%"PRIx32
+spapr_drc_realize(uint32_t index) "drc: 0x%"PRIx32
+spapr_drc_realize_child(uint32_t index, const char *childname) "drc: 0x%"PRIx32", child name: %s"
+spapr_drc_realize_complete(uint32_t index) "drc: 0x%"PRIx32
+spapr_drc_unrealize(uint32_t index) "drc: 0x%"PRIx32
+
+# spapr_ovec.c
+spapr_ovec_parse_vector(int vector, int byte, uint16_t vec_len, uint8_t entry) "read guest vector %2d, byte %3d / %3d: 0x%.2x"
+spapr_ovec_populate_dt(int byte, uint16_t vec_len, uint8_t entry) "encoding guest vector byte %3d / %3d: 0x%.2x"
+
+# spapr_drc.c
+spapr_rtas_get_sensor_state_not_supported(uint32_t index, uint32_t type) "sensor index: 0x%"PRIx32", type: %"PRIu32
+spapr_rtas_get_sensor_state_invalid(uint32_t index) "sensor index: 0x%"PRIx32
+spapr_rtas_ibm_configure_connector_invalid(uint32_t index) "DRC index: 0x%"PRIx32
+
+# spapr_vio.c
+spapr_vio_h_reg_crq(uint64_t reg, uint64_t queue_addr, uint64_t queue_len) "CRQ for dev 0x%" PRIx64 " registered at 0x%" PRIx64 "/0x%" PRIx64
+spapr_vio_free_crq(uint32_t reg) "CRQ for dev 0x%" PRIx32 " freed"
+
+# vof.c
+vof_error_str_truncated(const char *s, int len) "%s truncated to %d"
+vof_error_param(const char *method, int nargscheck, int nretcheck, int nargs, int nret) "%s takes/returns %d/%d, not %d/%d"
+vof_error_unknown_service(const char *service, int nargs, int nret) "\"%s\" args=%d rets=%d"
+vof_error_unknown_method(const char *method) "\"%s\""
+vof_error_unknown_ihandle_close(uint32_t ih) "ih=0x%x"
+vof_error_unknown_path(const char *path) "\"%s\""
+vof_error_write(uint32_t ih) "ih=0x%x"
+vof_finddevice(const char *path, uint32_t ph) "\"%s\" => ph=0x%x"
+vof_claim(uint32_t virt, uint32_t size, uint32_t align, uint32_t ret) "virt=0x%x size=0x%x align=0x%x => 0x%x"
+vof_release(uint32_t virt, uint32_t size, uint32_t ret) "virt=0x%x size=0x%x => 0x%x"
+vof_method(uint32_t ihandle, const char *method, uint32_t param, uint32_t ret, uint32_t ret2) "ih=0x%x \"%s\"(0x%x) => 0x%x 0x%x"
+vof_getprop(uint32_t ph, const char *prop, uint32_t ret, const char *val) "ph=0x%x \"%s\" => len=%d [%s]"
+vof_getproplen(uint32_t ph, const char *prop, uint32_t ret) "ph=0x%x \"%s\" => len=%d"
+vof_setprop(uint32_t ph, const char *prop, const char *val, uint32_t vallen, uint32_t ret) "ph=0x%x \"%s\" [%s] len=%d => ret=%d"
+vof_open(const char *path, uint32_t ph, uint32_t ih) "%s ph=0x%x => ih=0x%x"
+vof_interpret(const char *cmd, uint32_t param1, uint32_t param2, uint32_t ret, uint32_t ret2) "[%s] 0x%x 0x%x => 0x%x 0x%x"
+vof_package_to_path(uint32_t ph, const char *tmp, int ret) "ph=0x%x => %s len=%d"
+vof_instance_to_path(uint32_t ih, uint32_t ph, const char *tmp, int ret) "ih=0x%x ph=0x%x => %s len=%d"
+vof_instance_to_package(uint32_t ih, uint32_t ph) "ih=0x%x => ph=0x%x"
+vof_write(uint32_t ih, unsigned cb, const char *msg) "ih=0x%x [%u] \"%s\""
+vof_avail(uint64_t start, uint64_t end, uint64_t size) "0x%"PRIx64"..0x%"PRIx64" size=0x%"PRIx64
+vof_claimed(uint64_t start, uint64_t end, uint64_t size) "0x%"PRIx64"..0x%"PRIx64" size=0x%"PRIx64
+
+# ppc.c
+ppc_tb_adjust(uint64_t offs1, uint64_t offs2, int64_t diff, int64_t seconds) "adjusted from 0x%"PRIx64" to 0x%"PRIx64", diff %"PRId64" (%"PRId64"s)"
+ppc_tb_load(uint64_t tb) "tb 0x%016" PRIx64
+ppc_tb_store(uint64_t tb, uint64_t offset) "tb 0x%016" PRIx64 " offset 0x%08" PRIx64
+
+ppc_decr_load(uint64_t tb) "decr 0x%016" PRIx64
+ppc_decr_excp(const char *action) "%s decrementer"
+ppc_decr_store(uint32_t nr_bits, uint64_t decr, uint64_t value) "%d-bit 0x%016" PRIx64 " => 0x%016" PRIx64
+
+ppc4xx_fit(uint32_t ir, uint64_t tcr, uint64_t tsr) "ir %d TCR 0x%" PRIx64 " TSR 0x%" PRIx64
+ppc4xx_pit_stop(void) ""
+ppc4xx_pit_start(uint64_t reload) "PIT 0x%016" PRIx64
+ppc4xx_pit(uint32_t ar, uint32_t ir, uint64_t tcr, uint64_t tsr, uint64_t reload) "ar %d ir %d TCR 0x%" PRIx64 " TSR 0x%" PRIx64 " PIT 0x%016" PRIx64
+ppc4xx_wdt(uint64_t tcr, uint64_t tsr) "TCR 0x%" PRIx64 " TSR 0x%" PRIx64
+ppc40x_store_pit(uint64_t value) "val 0x%" PRIx64
+ppc40x_set_tb_clk(uint32_t value) "new frequency %" PRIu32
+ppc40x_timers_init(uint32_t value) "frequency %" PRIu32
+
+ppc_irq_set(void *env, uint32_t pin, uint32_t level) "env [%p] pin %d level %d"
+ppc_irq_set_exit(void *env, uint32_t n_IRQ, uint32_t level, uint32_t pending, uint32_t request) "env [%p] n_IRQ %d level %d => pending 0x%08" PRIx32 " req 0x%08" PRIx32
+ppc_irq_set_state(const char *name, uint32_t level) "\"%s\" level %d"
+ppc_irq_reset(const char *name) "%s"
+ppc_irq_cpu(const char *action) "%s"
+
+# prep_systemio.c
+prep_systemio_read(uint32_t addr, uint32_t val) "read addr=0x%x val=0x%x"
+prep_systemio_write(uint32_t addr, uint32_t val) "write addr=0x%x val=0x%x"
+
+# rs6000_mc.c
+rs6000mc_id_read(uint32_t addr, uint32_t val) "read addr=0x%x val=0x%x"
+rs6000mc_presence_read(uint32_t addr, uint32_t val) "read addr=0x%x val=0x%x"
+rs6000mc_size_read(uint32_t addr, uint32_t val) "read addr=0x%x val=0x%x"
+rs6000mc_size_write(uint32_t addr, uint32_t val) "write addr=0x%x val=0x%x"
+rs6000mc_parity_read(uint32_t addr, uint32_t val) "read addr=0x%x val=0x%x"
+
+# ppc4xx_pci.c
+ppc4xx_pci_map_irq(int32_t devfn, int irq_num, int slot) "devfn 0x%x irq %d -> %d"
+ppc4xx_pci_set_irq(int irq_num) "PCI irq %d"
+
+# ppc440_pcix.c
+ppc440_pcix_map_irq(int32_t devfn, int irq_num, int slot) "devfn 0x%x irq %d -> %d"
+ppc440_pcix_set_irq(int irq_num) "PCI irq %d"
+ppc440_pcix_update_pim(int idx, uint64_t size, uint64_t la) "Added window %d of size=0x%" PRIx64 " to CPU=0x%" PRIx64
+ppc440_pcix_update_pom(int idx, uint32_t size, uint64_t la, uint64_t pcia) "Added window %d of size=0x%x from CPU=0x%" PRIx64 " to PCI=0x%" PRIx64
+ppc440_pcix_reg_read(uint64_t addr, uint32_t val) "addr 0x%" PRIx64 " = 0x%" PRIx32
+ppc440_pcix_reg_write(uint64_t addr, uint32_t val, uint32_t size) "addr 0x%" PRIx64 " = 0x%" PRIx32 " size 0x%" PRIx32
diff --git a/hw/ppc/trace.h b/hw/ppc/trace.h
new file mode 100644
index 000000000..87c4198e6
--- /dev/null
+++ b/hw/ppc/trace.h
@@ -0,0 +1 @@
+#include "trace/trace-hw_ppc.h"
diff --git a/hw/ppc/virtex_ml507.c b/hw/ppc/virtex_ml507.c
new file mode 100644
index 000000000..9c575403b
--- /dev/null
+++ b/hw/ppc/virtex_ml507.c
@@ -0,0 +1,316 @@
+/*
+ * Model of Xilinx Virtex5 ML507 PPC-440 refdesign.
+ *
+ * Copyright (c) 2010 Edgar E. Iglesias.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/datadir.h"
+#include "qemu/units.h"
+#include "cpu.h"
+#include "hw/sysbus.h"
+#include "hw/char/serial.h"
+#include "hw/block/flash.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/reset.h"
+#include "hw/boards.h"
+#include "sysemu/device_tree.h"
+#include "hw/loader.h"
+#include "elf.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "qemu/option.h"
+
+#include "hw/intc/ppc-uic.h"
+#include "hw/ppc/ppc.h"
+#include "hw/ppc/ppc4xx.h"
+#include "hw/qdev-properties.h"
+#include "ppc405.h"
+
+#define EPAPR_MAGIC (0x45504150)
+#define FLASH_SIZE (16 * MiB)
+
+#define INTC_BASEADDR 0x81800000
+#define UART16550_BASEADDR 0x83e01003
+#define TIMER_BASEADDR 0x83c00000
+#define PFLASH_BASEADDR 0xfc000000
+
+#define TIMER_IRQ 3
+#define UART16550_IRQ 9
+
+static struct boot_info
+{
+ uint32_t bootstrap_pc;
+ uint32_t cmdline;
+ uint32_t fdt;
+ uint32_t ima_size;
+ void *vfdt;
+} boot_info;
+
+/* Create reset TLB entries for BookE, spanning the 32bit addr space. */
+static void mmubooke_create_initial_mapping(CPUPPCState *env,
+ target_ulong va,
+ hwaddr pa)
+{
+ ppcemb_tlb_t *tlb = &env->tlb.tlbe[0];
+
+ tlb->attr = 0;
+ tlb->prot = PAGE_VALID | ((PAGE_READ | PAGE_WRITE | PAGE_EXEC) << 4);
+ tlb->size = 1U << 31; /* up to 0x80000000 */
+ tlb->EPN = va & TARGET_PAGE_MASK;
+ tlb->RPN = pa & TARGET_PAGE_MASK;
+ tlb->PID = 0;
+
+ tlb = &env->tlb.tlbe[1];
+ tlb->attr = 0;
+ tlb->prot = PAGE_VALID | ((PAGE_READ | PAGE_WRITE | PAGE_EXEC) << 4);
+ tlb->size = 1U << 31; /* up to 0xffffffff */
+ tlb->EPN = 0x80000000 & TARGET_PAGE_MASK;
+ tlb->RPN = 0x80000000 & TARGET_PAGE_MASK;
+ tlb->PID = 0;
+}
+
+static PowerPCCPU *ppc440_init_xilinx(const char *cpu_type, uint32_t sysclk)
+{
+ PowerPCCPU *cpu;
+ CPUPPCState *env;
+ DeviceState *uicdev;
+ SysBusDevice *uicsbd;
+
+ cpu = POWERPC_CPU(cpu_create(cpu_type));
+ env = &cpu->env;
+
+ ppc_booke_timers_init(cpu, sysclk, 0/* no flags */);
+
+ ppc_dcr_init(env, NULL, NULL);
+
+ /* interrupt controller */
+ uicdev = qdev_new(TYPE_PPC_UIC);
+ uicsbd = SYS_BUS_DEVICE(uicdev);
+
+ object_property_set_link(OBJECT(uicdev), "cpu", OBJECT(cpu),
+ &error_fatal);
+ sysbus_realize_and_unref(uicsbd, &error_fatal);
+
+ sysbus_connect_irq(uicsbd, PPCUIC_OUTPUT_INT,
+ ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_INT]);
+ sysbus_connect_irq(uicsbd, PPCUIC_OUTPUT_CINT,
+ ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_CINT]);
+
+ /* This board doesn't wire anything up to the inputs of the UIC. */
+ return cpu;
+}
+
+static void main_cpu_reset(void *opaque)
+{
+ PowerPCCPU *cpu = opaque;
+ CPUPPCState *env = &cpu->env;
+ struct boot_info *bi = env->load_info;
+
+ cpu_reset(CPU(cpu));
+ /* Linux Kernel Parameters (passing device tree):
+ * r3: pointer to the fdt
+ * r4: 0
+ * r5: 0
+ * r6: epapr magic
+ * r7: size of IMA in bytes
+ * r8: 0
+ * r9: 0
+ */
+ env->gpr[1] = (16 * MiB) - 8;
+ /* Provide a device-tree. */
+ env->gpr[3] = bi->fdt;
+ env->nip = bi->bootstrap_pc;
+
+ /* Create a mapping for the kernel. */
+ mmubooke_create_initial_mapping(env, 0, 0);
+ env->gpr[6] = tswap32(EPAPR_MAGIC);
+ env->gpr[7] = bi->ima_size;
+}
+
+#define BINARY_DEVICE_TREE_FILE "virtex-ml507.dtb"
+static int xilinx_load_device_tree(hwaddr addr,
+ uint32_t ramsize,
+ hwaddr initrd_base,
+ hwaddr initrd_size,
+ const char *kernel_cmdline)
+{
+ char *path;
+ int fdt_size;
+ void *fdt = NULL;
+ int r;
+ const char *dtb_filename;
+
+ dtb_filename = current_machine->dtb;
+ if (dtb_filename) {
+ fdt = load_device_tree(dtb_filename, &fdt_size);
+ if (!fdt) {
+ error_report("Error while loading device tree file '%s'",
+ dtb_filename);
+ }
+ } else {
+ /* Try the local "ppc.dtb" override. */
+ fdt = load_device_tree("ppc.dtb", &fdt_size);
+ if (!fdt) {
+ path = qemu_find_file(QEMU_FILE_TYPE_BIOS, BINARY_DEVICE_TREE_FILE);
+ if (path) {
+ fdt = load_device_tree(path, &fdt_size);
+ g_free(path);
+ }
+ }
+ }
+ if (!fdt) {
+ return 0;
+ }
+
+ r = qemu_fdt_setprop_cell(fdt, "/chosen", "linux,initrd-start",
+ initrd_base);
+ if (r < 0) {
+ error_report("couldn't set /chosen/linux,initrd-start");
+ }
+
+ r = qemu_fdt_setprop_cell(fdt, "/chosen", "linux,initrd-end",
+ (initrd_base + initrd_size));
+ if (r < 0) {
+ error_report("couldn't set /chosen/linux,initrd-end");
+ }
+
+ r = qemu_fdt_setprop_string(fdt, "/chosen", "bootargs", kernel_cmdline);
+ if (r < 0)
+ fprintf(stderr, "couldn't set /chosen/bootargs\n");
+ cpu_physical_memory_write(addr, fdt, fdt_size);
+ g_free(fdt);
+ return fdt_size;
+}
+
+static void virtex_init(MachineState *machine)
+{
+ const char *kernel_filename = machine->kernel_filename;
+ const char *kernel_cmdline = machine->kernel_cmdline;
+ hwaddr initrd_base = 0;
+ int initrd_size = 0;
+ MemoryRegion *address_space_mem = get_system_memory();
+ DeviceState *dev;
+ PowerPCCPU *cpu;
+ CPUPPCState *env;
+ hwaddr ram_base = 0;
+ DriveInfo *dinfo;
+ qemu_irq irq[32], *cpu_irq;
+ int kernel_size;
+ int i;
+
+ /* init CPUs */
+ cpu = ppc440_init_xilinx(machine->cpu_type, 400000000);
+ env = &cpu->env;
+
+ if (env->mmu_model != POWERPC_MMU_BOOKE) {
+ error_report("MMU model %i not supported by this machine",
+ env->mmu_model);
+ exit(1);
+ }
+
+ qemu_register_reset(main_cpu_reset, cpu);
+
+ memory_region_add_subregion(address_space_mem, ram_base, machine->ram);
+
+ dinfo = drive_get(IF_PFLASH, 0, 0);
+ pflash_cfi01_register(PFLASH_BASEADDR, "virtex.flash", FLASH_SIZE,
+ dinfo ? blk_by_legacy_dinfo(dinfo) : NULL,
+ 64 * KiB, 1, 0x89, 0x18, 0x0000, 0x0, 1);
+
+ cpu_irq = (qemu_irq *) &env->irq_inputs[PPC40x_INPUT_INT];
+ dev = qdev_new("xlnx.xps-intc");
+ qdev_prop_set_uint32(dev, "kind-of-intr", 0);
+ sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+ sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, INTC_BASEADDR);
+ sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, cpu_irq[0]);
+ for (i = 0; i < 32; i++) {
+ irq[i] = qdev_get_gpio_in(dev, i);
+ }
+
+ serial_mm_init(address_space_mem, UART16550_BASEADDR, 2, irq[UART16550_IRQ],
+ 115200, serial_hd(0), DEVICE_LITTLE_ENDIAN);
+
+ /* 2 timers at irq 2 @ 62 Mhz. */
+ dev = qdev_new("xlnx.xps-timer");
+ qdev_prop_set_uint32(dev, "one-timer-only", 0);
+ qdev_prop_set_uint32(dev, "clock-frequency", 62 * 1000000);
+ sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+ sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, TIMER_BASEADDR);
+ sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, irq[TIMER_IRQ]);
+
+ if (kernel_filename) {
+ uint64_t entry, high;
+ hwaddr boot_offset;
+
+ /* Boots a kernel elf binary. */
+ kernel_size = load_elf(kernel_filename, NULL, NULL, NULL,
+ &entry, NULL, &high, NULL, 1, PPC_ELF_MACHINE,
+ 0, 0);
+ boot_info.bootstrap_pc = entry & 0x00ffffff;
+
+ if (kernel_size < 0) {
+ boot_offset = 0x1200000;
+ /* If we failed loading ELF's try a raw image. */
+ kernel_size = load_image_targphys(kernel_filename,
+ boot_offset,
+ machine->ram_size);
+ boot_info.bootstrap_pc = boot_offset;
+ high = boot_info.bootstrap_pc + kernel_size + 8192;
+ }
+
+ boot_info.ima_size = kernel_size;
+
+ /* Load initrd. */
+ if (machine->initrd_filename) {
+ initrd_base = high = ROUND_UP(high, 4);
+ initrd_size = load_image_targphys(machine->initrd_filename,
+ high, machine->ram_size - high);
+
+ if (initrd_size < 0) {
+ error_report("couldn't load ram disk '%s'",
+ machine->initrd_filename);
+ exit(1);
+ }
+ high = ROUND_UP(high + initrd_size, 4);
+ }
+
+ /* Provide a device-tree. */
+ boot_info.fdt = high + (8192 * 2);
+ boot_info.fdt &= ~8191;
+
+ xilinx_load_device_tree(boot_info.fdt, machine->ram_size,
+ initrd_base, initrd_size,
+ kernel_cmdline);
+ }
+ env->load_info = &boot_info;
+}
+
+static void virtex_machine_init(MachineClass *mc)
+{
+ mc->desc = "Xilinx Virtex ML507 reference design";
+ mc->init = virtex_init;
+ mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("440-xilinx");
+ mc->default_ram_id = "ram";
+}
+
+DEFINE_MACHINE("virtex-ml507", virtex_machine_init)
diff --git a/hw/ppc/vof.c b/hw/ppc/vof.c
new file mode 100644
index 000000000..73adc44ec
--- /dev/null
+++ b/hw/ppc/vof.c
@@ -0,0 +1,1062 @@
+/*
+ * QEMU PowerPC Virtual Open Firmware.
+ *
+ * This implements client interface from OpenFirmware IEEE1275 on the QEMU
+ * side to leave only a very basic firmware in the VM.
+ *
+ * Copyright (c) 2021 IBM Corporation.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/timer.h"
+#include "qemu/range.h"
+#include "qemu/units.h"
+#include "qemu/log.h"
+#include "qapi/error.h"
+#include "exec/ram_addr.h"
+#include "exec/address-spaces.h"
+#include "hw/ppc/vof.h"
+#include "hw/ppc/fdt.h"
+#include "sysemu/runstate.h"
+#include "qom/qom-qobject.h"
+#include "trace.h"
+
+#include <libfdt.h>
+
+/*
+ * OF 1275 "nextprop" description suggests is it 32 bytes max but
+ * LoPAPR defines "ibm,query-interrupt-source-number" which is 33 chars long.
+ */
+#define OF_PROPNAME_LEN_MAX 64
+
+#define VOF_MAX_PATH 256
+#define VOF_MAX_SETPROPLEN 2048
+#define VOF_MAX_METHODLEN 256
+#define VOF_MAX_FORTHCODE 256
+#define VOF_VTY_BUF_SIZE 256
+
+typedef struct {
+ uint64_t start;
+ uint64_t size;
+} OfClaimed;
+
+typedef struct {
+ char *path; /* the path used to open the instance */
+ uint32_t phandle;
+} OfInstance;
+
+static int readstr(hwaddr pa, char *buf, int size)
+{
+ if (VOF_MEM_READ(pa, buf, size) != MEMTX_OK) {
+ return -1;
+ }
+ if (strnlen(buf, size) == size) {
+ buf[size - 1] = '\0';
+ trace_vof_error_str_truncated(buf, size);
+ return -1;
+ }
+ return 0;
+}
+
+static bool cmpservice(const char *s, unsigned nargs, unsigned nret,
+ const char *s1, unsigned nargscheck, unsigned nretcheck)
+{
+ if (strcmp(s, s1)) {
+ return false;
+ }
+ if ((nargscheck && (nargs != nargscheck)) ||
+ (nretcheck && (nret != nretcheck))) {
+ trace_vof_error_param(s, nargscheck, nretcheck, nargs, nret);
+ return false;
+ }
+
+ return true;
+}
+
+static void prop_format(char *tval, int tlen, const void *prop, int len)
+{
+ int i;
+ const unsigned char *c;
+ char *t;
+ const char bin[] = "...";
+
+ for (i = 0, c = prop; i < len; ++i, ++c) {
+ if (*c == '\0' && i == len - 1) {
+ strncpy(tval, prop, tlen - 1);
+ return;
+ }
+ if (*c < 0x20 || *c >= 0x80) {
+ break;
+ }
+ }
+
+ for (i = 0, c = prop, t = tval; i < len; ++i, ++c) {
+ if (t >= tval + tlen - sizeof(bin) - 1 - 2 - 1) {
+ strcpy(t, bin);
+ return;
+ }
+ if (i && i % 4 == 0 && i != len - 1) {
+ strcat(t, " ");
+ ++t;
+ }
+ t += sprintf(t, "%02X", *c & 0xFF);
+ }
+}
+
+static int get_path(const void *fdt, int offset, char *buf, int len)
+{
+ int ret;
+
+ ret = fdt_get_path(fdt, offset, buf, len - 1);
+ if (ret < 0) {
+ return ret;
+ }
+
+ buf[len - 1] = '\0';
+
+ return strlen(buf) + 1;
+}
+
+static int phandle_to_path(const void *fdt, uint32_t ph, char *buf, int len)
+{
+ int ret;
+
+ ret = fdt_node_offset_by_phandle(fdt, ph);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return get_path(fdt, ret, buf, len);
+}
+
+static int path_offset(const void *fdt, const char *path)
+{
+ g_autofree char *p = NULL;
+ char *at;
+
+ /*
+ * https://www.devicetree.org/open-firmware/bindings/ppc/release/ppc-2_1.html#HDR16
+ *
+ * "Conversion from numeric representation to text representation shall use
+ * the lower case forms of the hexadecimal digits in the range a..f,
+ * suppressing leading zeros".
+ */
+ p = g_strdup(path);
+ for (at = strchr(p, '@'); at && *at; ) {
+ if (*at == '/') {
+ at = strchr(at, '@');
+ } else {
+ *at = tolower(*at);
+ ++at;
+ }
+ }
+
+ return fdt_path_offset(fdt, p);
+}
+
+static uint32_t vof_finddevice(const void *fdt, uint32_t nodeaddr)
+{
+ char fullnode[VOF_MAX_PATH];
+ uint32_t ret = PROM_ERROR;
+ int offset;
+
+ if (readstr(nodeaddr, fullnode, sizeof(fullnode))) {
+ return (uint32_t) ret;
+ }
+
+ offset = path_offset(fdt, fullnode);
+ if (offset >= 0) {
+ ret = fdt_get_phandle(fdt, offset);
+ }
+ trace_vof_finddevice(fullnode, ret);
+ return ret;
+}
+
+static const void *getprop(const void *fdt, int nodeoff, const char *propname,
+ int *proplen, bool *write0)
+{
+ const char *unit, *prop;
+ const void *ret = fdt_getprop(fdt, nodeoff, propname, proplen);
+
+ if (ret) {
+ if (write0) {
+ *write0 = false;
+ }
+ return ret;
+ }
+
+ if (strcmp(propname, "name")) {
+ return NULL;
+ }
+ /*
+ * We return a value for "name" from path if queried but property does not
+ * exist. @proplen does not include the unit part in this case.
+ */
+ prop = fdt_get_name(fdt, nodeoff, proplen);
+ if (!prop) {
+ *proplen = 0;
+ return NULL;
+ }
+
+ unit = memchr(prop, '@', *proplen);
+ if (unit) {
+ *proplen = unit - prop;
+ }
+ *proplen += 1;
+
+ /*
+ * Since it might be cut at "@" and there will be no trailing zero
+ * in the prop buffer, tell the caller to write zero at the end.
+ */
+ if (write0) {
+ *write0 = true;
+ }
+ return prop;
+}
+
+static uint32_t vof_getprop(const void *fdt, uint32_t nodeph, uint32_t pname,
+ uint32_t valaddr, uint32_t vallen)
+{
+ char propname[OF_PROPNAME_LEN_MAX + 1];
+ uint32_t ret = 0;
+ int proplen = 0;
+ const void *prop;
+ char trval[64] = "";
+ int nodeoff = fdt_node_offset_by_phandle(fdt, nodeph);
+ bool write0;
+
+ if (nodeoff < 0) {
+ return PROM_ERROR;
+ }
+ if (readstr(pname, propname, sizeof(propname))) {
+ return PROM_ERROR;
+ }
+ prop = getprop(fdt, nodeoff, propname, &proplen, &write0);
+ if (prop) {
+ const char zero = 0;
+ int cb = MIN(proplen, vallen);
+
+ if (VOF_MEM_WRITE(valaddr, prop, cb) != MEMTX_OK ||
+ /* if that was "name" with a unit address, overwrite '@' with '0' */
+ (write0 &&
+ cb == proplen &&
+ VOF_MEM_WRITE(valaddr + cb - 1, &zero, 1) != MEMTX_OK)) {
+ ret = PROM_ERROR;
+ } else {
+ /*
+ * OF1275 says:
+ * "Size is either the actual size of the property, or -1 if name
+ * does not exist", hence returning proplen instead of cb.
+ */
+ ret = proplen;
+ /* Do not format a value if tracepoint is silent, for performance */
+ if (trace_event_get_state(TRACE_VOF_GETPROP) &&
+ qemu_loglevel_mask(LOG_TRACE)) {
+ prop_format(trval, sizeof(trval), prop, ret);
+ }
+ }
+ } else {
+ ret = PROM_ERROR;
+ }
+ trace_vof_getprop(nodeph, propname, ret, trval);
+
+ return ret;
+}
+
+static uint32_t vof_getproplen(const void *fdt, uint32_t nodeph, uint32_t pname)
+{
+ char propname[OF_PROPNAME_LEN_MAX + 1];
+ uint32_t ret = 0;
+ int proplen = 0;
+ const void *prop;
+ int nodeoff = fdt_node_offset_by_phandle(fdt, nodeph);
+
+ if (nodeoff < 0) {
+ return PROM_ERROR;
+ }
+ if (readstr(pname, propname, sizeof(propname))) {
+ return PROM_ERROR;
+ }
+ prop = getprop(fdt, nodeoff, propname, &proplen, NULL);
+ if (prop) {
+ ret = proplen;
+ } else {
+ ret = PROM_ERROR;
+ }
+ trace_vof_getproplen(nodeph, propname, ret);
+
+ return ret;
+}
+
+static uint32_t vof_setprop(MachineState *ms, void *fdt, Vof *vof,
+ uint32_t nodeph, uint32_t pname,
+ uint32_t valaddr, uint32_t vallen)
+{
+ char propname[OF_PROPNAME_LEN_MAX + 1];
+ uint32_t ret = PROM_ERROR;
+ int offset, rc;
+ char trval[64] = "";
+ char nodepath[VOF_MAX_PATH] = "";
+ Object *vmo = object_dynamic_cast(OBJECT(ms), TYPE_VOF_MACHINE_IF);
+ VofMachineIfClass *vmc;
+ g_autofree char *val = NULL;
+
+ if (vallen > VOF_MAX_SETPROPLEN) {
+ goto trace_exit;
+ }
+ if (readstr(pname, propname, sizeof(propname))) {
+ goto trace_exit;
+ }
+ offset = fdt_node_offset_by_phandle(fdt, nodeph);
+ if (offset < 0) {
+ goto trace_exit;
+ }
+ rc = get_path(fdt, offset, nodepath, sizeof(nodepath));
+ if (rc <= 0) {
+ goto trace_exit;
+ }
+
+ val = g_malloc0(vallen);
+ if (VOF_MEM_READ(valaddr, val, vallen) != MEMTX_OK) {
+ goto trace_exit;
+ }
+
+ if (!vmo) {
+ goto trace_exit;
+ }
+
+ vmc = VOF_MACHINE_GET_CLASS(vmo);
+ if (!vmc->setprop || !vmc->setprop(ms, nodepath, propname, val, vallen)) {
+ goto trace_exit;
+ }
+
+ rc = fdt_setprop(fdt, offset, propname, val, vallen);
+ if (rc) {
+ goto trace_exit;
+ }
+
+ if (trace_event_get_state(TRACE_VOF_SETPROP) &&
+ qemu_loglevel_mask(LOG_TRACE)) {
+ prop_format(trval, sizeof(trval), val, vallen);
+ }
+ ret = vallen;
+
+trace_exit:
+ trace_vof_setprop(nodeph, propname, trval, vallen, ret);
+
+ return ret;
+}
+
+static uint32_t vof_nextprop(const void *fdt, uint32_t phandle,
+ uint32_t prevaddr, uint32_t nameaddr)
+{
+ int offset, nodeoff = fdt_node_offset_by_phandle(fdt, phandle);
+ char prev[OF_PROPNAME_LEN_MAX + 1];
+ const char *tmp;
+
+ if (readstr(prevaddr, prev, sizeof(prev))) {
+ return PROM_ERROR;
+ }
+
+ fdt_for_each_property_offset(offset, fdt, nodeoff) {
+ if (!fdt_getprop_by_offset(fdt, offset, &tmp, NULL)) {
+ return 0;
+ }
+ if (prev[0] == '\0' || strcmp(prev, tmp) == 0) {
+ if (prev[0] != '\0') {
+ offset = fdt_next_property_offset(fdt, offset);
+ if (offset < 0) {
+ return 0;
+ }
+ }
+ if (!fdt_getprop_by_offset(fdt, offset, &tmp, NULL)) {
+ return 0;
+ }
+
+ if (VOF_MEM_WRITE(nameaddr, tmp, strlen(tmp) + 1) != MEMTX_OK) {
+ return PROM_ERROR;
+ }
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static uint32_t vof_peer(const void *fdt, uint32_t phandle)
+{
+ uint32_t ret = 0;
+ int rc;
+
+ if (phandle == 0) {
+ rc = fdt_path_offset(fdt, "/");
+ } else {
+ rc = fdt_next_subnode(fdt, fdt_node_offset_by_phandle(fdt, phandle));
+ }
+
+ if (rc >= 0) {
+ ret = fdt_get_phandle(fdt, rc);
+ }
+
+ return ret;
+}
+
+static uint32_t vof_child(const void *fdt, uint32_t phandle)
+{
+ uint32_t ret = 0;
+ int rc = fdt_first_subnode(fdt, fdt_node_offset_by_phandle(fdt, phandle));
+
+ if (rc >= 0) {
+ ret = fdt_get_phandle(fdt, rc);
+ }
+
+ return ret;
+}
+
+static uint32_t vof_parent(const void *fdt, uint32_t phandle)
+{
+ uint32_t ret = 0;
+ int rc = fdt_parent_offset(fdt, fdt_node_offset_by_phandle(fdt, phandle));
+
+ if (rc >= 0) {
+ ret = fdt_get_phandle(fdt, rc);
+ }
+
+ return ret;
+}
+
+static uint32_t vof_do_open(void *fdt, Vof *vof, int offset, const char *path)
+{
+ uint32_t ret = PROM_ERROR;
+ OfInstance *inst = NULL;
+
+ if (vof->of_instance_last == 0xFFFFFFFF) {
+ /* We do not recycle ihandles yet */
+ goto trace_exit;
+ }
+
+ inst = g_new0(OfInstance, 1);
+ inst->phandle = fdt_get_phandle(fdt, offset);
+ g_assert(inst->phandle);
+ ++vof->of_instance_last;
+
+ inst->path = g_strdup(path);
+ g_hash_table_insert(vof->of_instances,
+ GINT_TO_POINTER(vof->of_instance_last),
+ inst);
+ ret = vof->of_instance_last;
+
+trace_exit:
+ trace_vof_open(path, inst ? inst->phandle : 0, ret);
+
+ return ret;
+}
+
+uint32_t vof_client_open_store(void *fdt, Vof *vof, const char *nodename,
+ const char *prop, const char *path)
+{
+ int offset, node = fdt_path_offset(fdt, nodename);
+ uint32_t inst;
+
+ offset = fdt_path_offset(fdt, path);
+ if (offset < 0) {
+ trace_vof_error_unknown_path(path);
+ return PROM_ERROR;
+ }
+
+ inst = vof_do_open(fdt, vof, offset, path);
+
+ return fdt_setprop_cell(fdt, node, prop, inst) >= 0 ? 0 : PROM_ERROR;
+}
+
+static uint32_t vof_open(void *fdt, Vof *vof, uint32_t pathaddr)
+{
+ char path[VOF_MAX_PATH];
+ int offset;
+
+ if (readstr(pathaddr, path, sizeof(path))) {
+ return PROM_ERROR;
+ }
+
+ offset = path_offset(fdt, path);
+ if (offset < 0) {
+ trace_vof_error_unknown_path(path);
+ return PROM_ERROR;
+ }
+
+ return vof_do_open(fdt, vof, offset, path);
+}
+
+static void vof_close(Vof *vof, uint32_t ihandle)
+{
+ if (!g_hash_table_remove(vof->of_instances, GINT_TO_POINTER(ihandle))) {
+ trace_vof_error_unknown_ihandle_close(ihandle);
+ }
+}
+
+static uint32_t vof_instance_to_package(Vof *vof, uint32_t ihandle)
+{
+ gpointer instp = g_hash_table_lookup(vof->of_instances,
+ GINT_TO_POINTER(ihandle));
+ uint32_t ret = PROM_ERROR;
+
+ if (instp) {
+ ret = ((OfInstance *)instp)->phandle;
+ }
+ trace_vof_instance_to_package(ihandle, ret);
+
+ return ret;
+}
+
+static uint32_t vof_package_to_path(const void *fdt, uint32_t phandle,
+ uint32_t buf, uint32_t len)
+{
+ int rc;
+ char tmp[VOF_MAX_PATH] = "";
+
+ rc = phandle_to_path(fdt, phandle, tmp, sizeof(tmp));
+ if (rc > 0) {
+ if (VOF_MEM_WRITE(buf, tmp, rc) != MEMTX_OK) {
+ rc = -1;
+ }
+ }
+
+ trace_vof_package_to_path(phandle, tmp, rc);
+
+ return rc > 0 ? (uint32_t)rc : PROM_ERROR;
+}
+
+static uint32_t vof_instance_to_path(void *fdt, Vof *vof, uint32_t ihandle,
+ uint32_t buf, uint32_t len)
+{
+ int rc = -1;
+ uint32_t phandle = vof_instance_to_package(vof, ihandle);
+ char tmp[VOF_MAX_PATH] = "";
+
+ if (phandle != -1) {
+ rc = phandle_to_path(fdt, phandle, tmp, sizeof(tmp));
+ if (rc > 0) {
+ if (VOF_MEM_WRITE(buf, tmp, rc) != MEMTX_OK) {
+ rc = -1;
+ }
+ }
+ }
+ trace_vof_instance_to_path(ihandle, phandle, tmp, rc);
+
+ return rc > 0 ? (uint32_t)rc : PROM_ERROR;
+}
+
+static uint32_t vof_write(Vof *vof, uint32_t ihandle, uint32_t buf,
+ uint32_t len)
+{
+ char tmp[VOF_VTY_BUF_SIZE];
+ unsigned cb;
+ OfInstance *inst = (OfInstance *)
+ g_hash_table_lookup(vof->of_instances, GINT_TO_POINTER(ihandle));
+
+ if (!inst) {
+ trace_vof_error_write(ihandle);
+ return PROM_ERROR;
+ }
+
+ for ( ; len > 0; len -= cb) {
+ cb = MIN(len, sizeof(tmp) - 1);
+ if (VOF_MEM_READ(buf, tmp, cb) != MEMTX_OK) {
+ return PROM_ERROR;
+ }
+
+ /* FIXME: there is no backend(s) yet so just call a trace */
+ if (trace_event_get_state(TRACE_VOF_WRITE) &&
+ qemu_loglevel_mask(LOG_TRACE)) {
+ tmp[cb] = '\0';
+ trace_vof_write(ihandle, cb, tmp);
+ }
+ }
+
+ return len;
+}
+
+static void vof_claimed_dump(GArray *claimed)
+{
+ int i;
+ OfClaimed c;
+
+ if (trace_event_get_state(TRACE_VOF_CLAIMED) &&
+ qemu_loglevel_mask(LOG_TRACE)) {
+
+ for (i = 0; i < claimed->len; ++i) {
+ c = g_array_index(claimed, OfClaimed, i);
+ trace_vof_claimed(c.start, c.start + c.size, c.size);
+ }
+ }
+}
+
+static bool vof_claim_avail(GArray *claimed, uint64_t virt, uint64_t size)
+{
+ int i;
+ OfClaimed c;
+
+ for (i = 0; i < claimed->len; ++i) {
+ c = g_array_index(claimed, OfClaimed, i);
+ if (ranges_overlap(c.start, c.size, virt, size)) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static void vof_claim_add(GArray *claimed, uint64_t virt, uint64_t size)
+{
+ OfClaimed newclaim;
+
+ newclaim.start = virt;
+ newclaim.size = size;
+ g_array_append_val(claimed, newclaim);
+}
+
+static gint of_claimed_compare_func(gconstpointer a, gconstpointer b)
+{
+ return ((OfClaimed *)a)->start - ((OfClaimed *)b)->start;
+}
+
+static void vof_dt_memory_available(void *fdt, GArray *claimed, uint64_t base)
+{
+ int i, n, offset, proplen = 0, sc, ac;
+ target_ulong mem0_end;
+ const uint8_t *mem0_reg;
+ g_autofree uint8_t *avail = NULL;
+ uint8_t *availcur;
+
+ if (!fdt || !claimed) {
+ return;
+ }
+
+ offset = fdt_path_offset(fdt, "/");
+ _FDT(offset);
+ ac = fdt_address_cells(fdt, offset);
+ g_assert(ac == 1 || ac == 2);
+ sc = fdt_size_cells(fdt, offset);
+ g_assert(sc == 1 || sc == 2);
+
+ offset = fdt_path_offset(fdt, "/memory@0");
+ _FDT(offset);
+
+ mem0_reg = fdt_getprop(fdt, offset, "reg", &proplen);
+ g_assert(mem0_reg && proplen == sizeof(uint32_t) * (ac + sc));
+ if (sc == 2) {
+ mem0_end = be64_to_cpu(*(uint64_t *)(mem0_reg + sizeof(uint32_t) * ac));
+ } else {
+ mem0_end = be32_to_cpu(*(uint32_t *)(mem0_reg + sizeof(uint32_t) * ac));
+ }
+
+ g_array_sort(claimed, of_claimed_compare_func);
+ vof_claimed_dump(claimed);
+
+ /*
+ * VOF resides in the first page so we do not need to check if there is
+ * available memory before the first claimed block
+ */
+ g_assert(claimed->len && (g_array_index(claimed, OfClaimed, 0).start == 0));
+
+ avail = g_malloc0(sizeof(uint32_t) * (ac + sc) * claimed->len);
+ for (i = 0, n = 0, availcur = avail; i < claimed->len; ++i) {
+ OfClaimed c = g_array_index(claimed, OfClaimed, i);
+ uint64_t start, size;
+
+ start = c.start + c.size;
+ if (i < claimed->len - 1) {
+ OfClaimed cn = g_array_index(claimed, OfClaimed, i + 1);
+
+ size = cn.start - start;
+ } else {
+ size = mem0_end - start;
+ }
+
+ if (ac == 2) {
+ *(uint64_t *) availcur = cpu_to_be64(start);
+ } else {
+ *(uint32_t *) availcur = cpu_to_be32(start);
+ }
+ availcur += sizeof(uint32_t) * ac;
+ if (sc == 2) {
+ *(uint64_t *) availcur = cpu_to_be64(size);
+ } else {
+ *(uint32_t *) availcur = cpu_to_be32(size);
+ }
+ availcur += sizeof(uint32_t) * sc;
+
+ if (size) {
+ trace_vof_avail(c.start + c.size, c.start + c.size + size, size);
+ ++n;
+ }
+ }
+ _FDT((fdt_setprop(fdt, offset, "available", avail, availcur - avail)));
+}
+
+/*
+ * OF1275:
+ * "Allocates size bytes of memory. If align is zero, the allocated range
+ * begins at the virtual address virt. Otherwise, an aligned address is
+ * automatically chosen and the input argument virt is ignored".
+ *
+ * In other words, exactly one of @virt and @align is non-zero.
+ */
+uint64_t vof_claim(Vof *vof, uint64_t virt, uint64_t size,
+ uint64_t align)
+{
+ uint64_t ret;
+
+ if (size == 0) {
+ ret = -1;
+ } else if (align == 0) {
+ if (!vof_claim_avail(vof->claimed, virt, size)) {
+ ret = -1;
+ } else {
+ ret = virt;
+ }
+ } else {
+ vof->claimed_base = QEMU_ALIGN_UP(vof->claimed_base, align);
+ while (1) {
+ if (vof->claimed_base >= vof->top_addr) {
+ error_report("Out of RMA memory for the OF client");
+ return -1;
+ }
+ if (vof_claim_avail(vof->claimed, vof->claimed_base, size)) {
+ break;
+ }
+ vof->claimed_base += size;
+ }
+ ret = vof->claimed_base;
+ }
+
+ if (ret != -1) {
+ vof->claimed_base = MAX(vof->claimed_base, ret + size);
+ vof_claim_add(vof->claimed, ret, size);
+ }
+ trace_vof_claim(virt, size, align, ret);
+
+ return ret;
+}
+
+static uint32_t vof_release(Vof *vof, uint64_t virt, uint64_t size)
+{
+ uint32_t ret = PROM_ERROR;
+ int i;
+ GArray *claimed = vof->claimed;
+ OfClaimed c;
+
+ for (i = 0; i < claimed->len; ++i) {
+ c = g_array_index(claimed, OfClaimed, i);
+ if (c.start == virt && c.size == size) {
+ g_array_remove_index(claimed, i);
+ ret = 0;
+ break;
+ }
+ }
+
+ trace_vof_release(virt, size, ret);
+
+ return ret;
+}
+
+static void vof_instantiate_rtas(Error **errp)
+{
+ error_setg(errp, "The firmware should have instantiated RTAS");
+}
+
+static uint32_t vof_call_method(MachineState *ms, Vof *vof, uint32_t methodaddr,
+ uint32_t ihandle, uint32_t param1,
+ uint32_t param2, uint32_t param3,
+ uint32_t param4, uint32_t *ret2)
+{
+ uint32_t ret = PROM_ERROR;
+ char method[VOF_MAX_METHODLEN] = "";
+ OfInstance *inst;
+
+ if (!ihandle) {
+ goto trace_exit;
+ }
+
+ inst = (OfInstance *)g_hash_table_lookup(vof->of_instances,
+ GINT_TO_POINTER(ihandle));
+ if (!inst) {
+ goto trace_exit;
+ }
+
+ if (readstr(methodaddr, method, sizeof(method))) {
+ goto trace_exit;
+ }
+
+ if (strcmp(inst->path, "/") == 0) {
+ if (strcmp(method, "ibm,client-architecture-support") == 0) {
+ Object *vmo = object_dynamic_cast(OBJECT(ms), TYPE_VOF_MACHINE_IF);
+
+ if (vmo) {
+ VofMachineIfClass *vmc = VOF_MACHINE_GET_CLASS(vmo);
+
+ g_assert(vmc->client_architecture_support);
+ ret = (uint32_t)vmc->client_architecture_support(ms, first_cpu,
+ param1);
+ }
+
+ *ret2 = 0;
+ }
+ } else if (strcmp(inst->path, "/rtas") == 0) {
+ if (strcmp(method, "instantiate-rtas") == 0) {
+ vof_instantiate_rtas(&error_fatal);
+ ret = 0;
+ *ret2 = param1; /* rtas-base */
+ }
+ } else {
+ trace_vof_error_unknown_method(method);
+ }
+
+trace_exit:
+ trace_vof_method(ihandle, method, param1, ret, *ret2);
+
+ return ret;
+}
+
+static uint32_t vof_call_interpret(uint32_t cmdaddr, uint32_t param1,
+ uint32_t param2, uint32_t *ret2)
+{
+ uint32_t ret = PROM_ERROR;
+ char cmd[VOF_MAX_FORTHCODE] = "";
+
+ /* No interpret implemented so just call a trace */
+ readstr(cmdaddr, cmd, sizeof(cmd));
+ trace_vof_interpret(cmd, param1, param2, ret, *ret2);
+
+ return ret;
+}
+
+static void vof_quiesce(MachineState *ms, void *fdt, Vof *vof)
+{
+ Object *vmo = object_dynamic_cast(OBJECT(ms), TYPE_VOF_MACHINE_IF);
+ /* After "quiesce", no change is expected to the FDT, pack FDT to ensure */
+ int rc = fdt_pack(fdt);
+
+ assert(rc == 0);
+
+ if (vmo) {
+ VofMachineIfClass *vmc = VOF_MACHINE_GET_CLASS(vmo);
+
+ if (vmc->quiesce) {
+ vmc->quiesce(ms);
+ }
+ }
+
+ vof_claimed_dump(vof->claimed);
+}
+
+static uint32_t vof_client_handle(MachineState *ms, void *fdt, Vof *vof,
+ const char *service,
+ uint32_t *args, unsigned nargs,
+ uint32_t *rets, unsigned nrets)
+{
+ uint32_t ret = 0;
+
+ /* @nrets includes the value which this function returns */
+#define cmpserv(s, a, r) \
+ cmpservice(service, nargs, nrets, (s), (a), (r))
+
+ if (cmpserv("finddevice", 1, 1)) {
+ ret = vof_finddevice(fdt, args[0]);
+ } else if (cmpserv("getprop", 4, 1)) {
+ ret = vof_getprop(fdt, args[0], args[1], args[2], args[3]);
+ } else if (cmpserv("getproplen", 2, 1)) {
+ ret = vof_getproplen(fdt, args[0], args[1]);
+ } else if (cmpserv("setprop", 4, 1)) {
+ ret = vof_setprop(ms, fdt, vof, args[0], args[1], args[2], args[3]);
+ } else if (cmpserv("nextprop", 3, 1)) {
+ ret = vof_nextprop(fdt, args[0], args[1], args[2]);
+ } else if (cmpserv("peer", 1, 1)) {
+ ret = vof_peer(fdt, args[0]);
+ } else if (cmpserv("child", 1, 1)) {
+ ret = vof_child(fdt, args[0]);
+ } else if (cmpserv("parent", 1, 1)) {
+ ret = vof_parent(fdt, args[0]);
+ } else if (cmpserv("open", 1, 1)) {
+ ret = vof_open(fdt, vof, args[0]);
+ } else if (cmpserv("close", 1, 0)) {
+ vof_close(vof, args[0]);
+ } else if (cmpserv("instance-to-package", 1, 1)) {
+ ret = vof_instance_to_package(vof, args[0]);
+ } else if (cmpserv("package-to-path", 3, 1)) {
+ ret = vof_package_to_path(fdt, args[0], args[1], args[2]);
+ } else if (cmpserv("instance-to-path", 3, 1)) {
+ ret = vof_instance_to_path(fdt, vof, args[0], args[1], args[2]);
+ } else if (cmpserv("write", 3, 1)) {
+ ret = vof_write(vof, args[0], args[1], args[2]);
+ } else if (cmpserv("claim", 3, 1)) {
+ uint64_t ret64 = vof_claim(vof, args[0], args[1], args[2]);
+
+ if (ret64 < 0x100000000UL) {
+ vof_dt_memory_available(fdt, vof->claimed, vof->claimed_base);
+ ret = (uint32_t)ret64;
+ } else {
+ if (ret64 != -1) {
+ vof_release(vof, ret, args[1]);
+ }
+ ret = PROM_ERROR;
+ }
+ } else if (cmpserv("release", 2, 0)) {
+ ret = vof_release(vof, args[0], args[1]);
+ if (ret != PROM_ERROR) {
+ vof_dt_memory_available(fdt, vof->claimed, vof->claimed_base);
+ }
+ } else if (cmpserv("call-method", 0, 0)) {
+ ret = vof_call_method(ms, vof, args[0], args[1], args[2], args[3],
+ args[4], args[5], rets);
+ } else if (cmpserv("interpret", 0, 0)) {
+ ret = vof_call_interpret(args[0], args[1], args[2], rets);
+ } else if (cmpserv("milliseconds", 0, 1)) {
+ ret = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
+ } else if (cmpserv("quiesce", 0, 0)) {
+ vof_quiesce(ms, fdt, vof);
+ } else if (cmpserv("exit", 0, 0)) {
+ error_report("Stopped as the VM requested \"exit\"");
+ vm_stop(RUN_STATE_PAUSED);
+ } else {
+ trace_vof_error_unknown_service(service, nargs, nrets);
+ ret = -1;
+ }
+
+#undef cmpserv
+
+ return ret;
+}
+
+/* Defined as Big Endian */
+struct prom_args {
+ uint32_t service;
+ uint32_t nargs;
+ uint32_t nret;
+ uint32_t args[10];
+} QEMU_PACKED;
+
+int vof_client_call(MachineState *ms, Vof *vof, void *fdt,
+ target_ulong args_real)
+{
+ struct prom_args args_be;
+ uint32_t args[ARRAY_SIZE(args_be.args)];
+ uint32_t rets[ARRAY_SIZE(args_be.args)] = { 0 }, ret;
+ char service[64];
+ unsigned nargs, nret, i;
+
+ if (VOF_MEM_READ(args_real, &args_be, sizeof(args_be)) != MEMTX_OK) {
+ return -EINVAL;
+ }
+ nargs = be32_to_cpu(args_be.nargs);
+ if (nargs >= ARRAY_SIZE(args_be.args)) {
+ return -EINVAL;
+ }
+
+ if (VOF_MEM_READ(be32_to_cpu(args_be.service), service, sizeof(service)) !=
+ MEMTX_OK) {
+ return -EINVAL;
+ }
+ if (strnlen(service, sizeof(service)) == sizeof(service)) {
+ /* Too long service name */
+ return -EINVAL;
+ }
+
+ for (i = 0; i < nargs; ++i) {
+ args[i] = be32_to_cpu(args_be.args[i]);
+ }
+
+ nret = be32_to_cpu(args_be.nret);
+ if (nret > ARRAY_SIZE(args_be.args) - nargs) {
+ return -EINVAL;
+ }
+ ret = vof_client_handle(ms, fdt, vof, service, args, nargs, rets, nret);
+ if (!nret) {
+ return 0;
+ }
+
+ /* @nrets includes the value which this function returns */
+ args_be.args[nargs] = cpu_to_be32(ret);
+ for (i = 1; i < nret; ++i) {
+ args_be.args[nargs + i] = cpu_to_be32(rets[i - 1]);
+ }
+
+ if (VOF_MEM_WRITE(args_real + offsetof(struct prom_args, args[nargs]),
+ args_be.args + nargs, sizeof(args_be.args[0]) * nret) !=
+ MEMTX_OK) {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void vof_instance_free(gpointer data)
+{
+ OfInstance *inst = (OfInstance *)data;
+
+ g_free(inst->path);
+ g_free(inst);
+}
+
+void vof_init(Vof *vof, uint64_t top_addr, Error **errp)
+{
+ vof_cleanup(vof);
+
+ vof->of_instances = g_hash_table_new_full(g_direct_hash, g_direct_equal,
+ NULL, vof_instance_free);
+ vof->claimed = g_array_new(false, false, sizeof(OfClaimed));
+
+ /* Keep allocations in 32bit as CLI ABI can only return cells==32bit */
+ vof->top_addr = MIN(top_addr, 4 * GiB);
+ if (vof_claim(vof, 0, vof->fw_size, 0) == -1) {
+ error_setg(errp, "Memory for firmware is in use");
+ }
+}
+
+void vof_cleanup(Vof *vof)
+{
+ if (vof->claimed) {
+ g_array_unref(vof->claimed);
+ }
+ if (vof->of_instances) {
+ g_hash_table_unref(vof->of_instances);
+ }
+ vof->claimed = NULL;
+ vof->of_instances = NULL;
+}
+
+void vof_build_dt(void *fdt, Vof *vof)
+{
+ uint32_t phandle = fdt_get_max_phandle(fdt);
+ int offset, proplen = 0;
+ const void *prop;
+
+ /* Assign phandles to nodes without predefined phandles (like XICS/XIVE) */
+ for (offset = fdt_next_node(fdt, -1, NULL);
+ offset >= 0;
+ offset = fdt_next_node(fdt, offset, NULL)) {
+ prop = fdt_getprop(fdt, offset, "phandle", &proplen);
+ if (prop) {
+ continue;
+ }
+ ++phandle;
+ _FDT(fdt_setprop_cell(fdt, offset, "phandle", phandle));
+ }
+
+ vof_dt_memory_available(fdt, vof->claimed, vof->claimed_base);
+}
+
+static const TypeInfo vof_machine_if_info = {
+ .name = TYPE_VOF_MACHINE_IF,
+ .parent = TYPE_INTERFACE,
+ .class_size = sizeof(VofMachineIfClass),
+};
+
+static void vof_machine_if_register_types(void)
+{
+ type_register_static(&vof_machine_if_info);
+}
+type_init(vof_machine_if_register_types)