Introduce Virtio-loopback epsilon release:

Epsilon release introduces a new compatibility layer which make virtio-loopback design to work with QEMU and rust-vmm vhost-user backend without require any changes. Signed-off-by: Timos Ampelikiotis <t.ampelikiotis@virtualopensystems.com> Change-Id: I52e57563e08a7d0bdc002f8e928ee61ba0c53dd9
author: Timos Ampelikiotis <t.ampelikiotis@virtualopensystems.com> 2023-10-10 11:40:56 +0000
committer: Timos Ampelikiotis <t.ampelikiotis@virtualopensystems.com> 2023-10-10 11:40:56 +0000
commit: e02cda008591317b1625707ff8e115a4841aa889 (patch)
tree: aee302e3cf8b59ec2d32ec481be3d1afddfc8968 /hw/ppc
parent: cc668e6b7e0ffd8c9d130513d12053cf5eda1d3b (diff)
66 files changed, 40088 insertions, 0 deletions
diff --git a/hw/ppc/Kconfig b/hw/ppc/Kconfig
new file mode 100644
index 000000000..400511c6b
--- /dev/null
+++ b/hw/ppc/Kconfig
@@ -0,0 +1,153 @@
+config PSERIES
+    bool
+    imply PCI_DEVICES
+    imply TEST_DEVICES
+    imply VIRTIO_VGA
+    imply NVDIMM
+    select DIMM
+    select PCI
+    select SPAPR_VSCSI
+    select VFIO if LINUX   # needed by spapr_pci_vfio.c
+    select XICS
+    select XIVE
+    select MSI_NONBROKEN
+    select FDT_PPC
+    select CHRP_NVRAM
+    select VOF
+
+config SPAPR_RNG
+    bool
+    default y
+    depends on PSERIES
+
+config POWERNV
+    bool
+    imply PCI_DEVICES
+    imply TEST_DEVICES
+    select ISA_IPMI_BT
+    select IPMI_LOCAL
+    select ISA_BUS
+    select MC146818RTC
+    select XICS
+    select XIVE
+    select FDT_PPC
+    select PCI_POWERNV
+
+config PPC405
+    bool
+    select M48T59
+    select PFLASH_CFI02
+    select PPC4XX
+    select SERIAL
+
+config PPC440
+    bool
+    imply PCI_DEVICES
+    imply TEST_DEVICES
+    imply E1000_PCI
+    select PCI_EXPRESS
+    select PPC4XX
+    select SERIAL
+    select FDT_PPC
+
+config PPC4XX
+    bool
+    select BITBANG_I2C
+    select PCI
+    select PPC_UIC
+
+config SAM460EX
+    bool
+    select PPC405
+    select PFLASH_CFI01
+    select IDE_SII3112
+    select M41T80
+    select PPC440
+    select SERIAL
+    select SM501
+    select SMBUS_EEPROM
+    select USB_EHCI_SYSBUS
+    select USB_OHCI
+    select FDT_PPC
+
+config PEGASOS2
+    bool
+    select MV64361
+    select VT82C686
+    select IDE_VIA
+    select SMBUS_EEPROM
+    select VOF
+# This should come with VT82C686
+    select ACPI_X86
+    imply ATI_VGA
+
+config PREP
+    bool
+    imply PCI_DEVICES
+    imply TEST_DEVICES
+    select CS4231A
+    select RAVEN_PCI
+    select I82378
+    select LSI_SCSI_PCI
+    select M48T59
+    select PC87312
+    select RS6000_MC
+    select FW_CFG_PPC
+
+config RS6000_MC
+    bool
+
+config MAC_OLDWORLD
+    bool
+    imply PCI_DEVICES
+    imply SUNGEM
+    imply TEST_DEVICES
+    select ADB
+    select GRACKLE_PCI
+    select HEATHROW_PIC
+    select MACIO
+    select FW_CFG_PPC
+
+config MAC_NEWWORLD
+    bool
+    imply PCI_DEVICES
+    imply SUNGEM
+    imply TEST_DEVICES
+    select ADB
+    select MACIO
+    select MACIO_GPIO
+    select MAC_PMU
+    select UNIN_PCI
+    select FW_CFG_PPC
+
+config E500
+    bool
+    imply AT24C
+    imply VIRTIO_PCI
+    select ETSEC
+    select OPENPIC
+    select PLATFORM_BUS
+    select PPCE500_PCI
+    select SERIAL
+    select MPC_I2C
+    select FDT_PPC
+    select DS1338
+
+config VIRTEX
+    bool
+    select PPC4XX
+    select PFLASH_CFI01
+    select SERIAL
+    select XILINX
+    select XILINX_ETHLITE
+    select FDT_PPC
+
+# Only used by 64-bit targets
+config FW_CFG_PPC
+    bool
+
+config FDT_PPC
+    bool
+
+config VOF
+    bool
diff --git a/hw/ppc/e500-ccsr.h b/hw/ppc/e500-ccsr.h
new file mode 100644
index 000000000..249c17be3
--- /dev/null
+++ b/hw/ppc/e500-ccsr.h
@@ -0,0 +1,18 @@
+#ifndef E500_CCSR_H
+#define E500_CCSR_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+struct PPCE500CCSRState {
+    /*< private >*/
+    SysBusDevice parent;
+    /*< public >*/
+
+    MemoryRegion ccsr_space;
+};
+
+#define TYPE_CCSR "e500-ccsr"
+OBJECT_DECLARE_SIMPLE_TYPE(PPCE500CCSRState, CCSR)
+
+#endif /* E500_CCSR_H */
diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
new file mode 100644
index 000000000..960e7efcd
--- /dev/null
+++ b/hw/ppc/e500.c
@@ -0,0 +1,1174 @@
+/*
+ * QEMU PowerPC e500-based platforms
+ *
+ * Copyright (C) 2009 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: Yu Liu,     <yu.liu@freescale.com>
+ *
+ * This file is derived from hw/ppc440_bamboo.c,
+ * the copyright for that material belongs to the original owners.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of  the GNU General  Public License as published by
+ * the Free Software Foundation;  either version 2 of the  License, or
+ * (at your option) any later version.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/datadir.h"
+#include "qemu/units.h"
+#include "qapi/error.h"
+#include "e500.h"
+#include "e500-ccsr.h"
+#include "net/net.h"
+#include "qemu/config-file.h"
+#include "hw/char/serial.h"
+#include "hw/pci/pci.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/kvm.h"
+#include "sysemu/reset.h"
+#include "sysemu/runstate.h"
+#include "kvm_ppc.h"
+#include "sysemu/device_tree.h"
+#include "hw/ppc/openpic.h"
+#include "hw/ppc/openpic_kvm.h"
+#include "hw/ppc/ppc.h"
+#include "hw/qdev-properties.h"
+#include "hw/loader.h"
+#include "elf.h"
+#include "hw/sysbus.h"
+#include "qemu/host-utils.h"
+#include "qemu/option.h"
+#include "hw/pci-host/ppce500.h"
+#include "qemu/error-report.h"
+#include "hw/platform-bus.h"
+#include "hw/net/fsl_etsec/etsec.h"
+#include "hw/i2c/i2c.h"
+#include "hw/irq.h"
+
+#define EPAPR_MAGIC                (0x45504150)
+#define BINARY_DEVICE_TREE_FILE    "mpc8544ds.dtb"
+#define DTC_LOAD_PAD               0x1800000
+#define DTC_PAD_MASK               0xFFFFF
+#define DTB_MAX_SIZE               (8 * MiB)
+#define INITRD_LOAD_PAD            0x2000000
+#define INITRD_PAD_MASK            0xFFFFFF
+
+#define RAM_SIZES_ALIGN            (64 * MiB)
+
+/* TODO: parameterize */
+#define MPC8544_CCSRBAR_SIZE       0x00100000ULL
+#define MPC8544_MPIC_REGS_OFFSET   0x40000ULL
+#define MPC8544_MSI_REGS_OFFSET   0x41600ULL
+#define MPC8544_SERIAL0_REGS_OFFSET 0x4500ULL
+#define MPC8544_SERIAL1_REGS_OFFSET 0x4600ULL
+#define MPC8544_PCI_REGS_OFFSET    0x8000ULL
+#define MPC8544_PCI_REGS_SIZE      0x1000ULL
+#define MPC8544_UTIL_OFFSET        0xe0000ULL
+#define MPC8XXX_GPIO_OFFSET        0x000FF000ULL
+#define MPC8544_I2C_REGS_OFFSET    0x3000ULL
+#define MPC8XXX_GPIO_IRQ           47
+#define MPC8544_I2C_IRQ            43
+#define RTC_REGS_OFFSET            0x68
+
+#define PLATFORM_CLK_FREQ_HZ       (400 * 1000 * 1000)
+
+struct boot_info
+{
+    uint32_t dt_base;
+    uint32_t dt_size;
+    uint32_t entry;
+};
+
+static uint32_t *pci_map_create(void *fdt, uint32_t mpic, int first_slot,
+                                int nr_slots, int *len)
+{
+    int i = 0;
+    int slot;
+    int pci_irq;
+    int host_irq;
+    int last_slot = first_slot + nr_slots;
+    uint32_t *pci_map;
+
+    *len = nr_slots * 4 * 7 * sizeof(uint32_t);
+    pci_map = g_malloc(*len);
+
+    for (slot = first_slot; slot < last_slot; slot++) {
+        for (pci_irq = 0; pci_irq < 4; pci_irq++) {
+            pci_map[i++] = cpu_to_be32(slot << 11);
+            pci_map[i++] = cpu_to_be32(0x0);
+            pci_map[i++] = cpu_to_be32(0x0);
+            pci_map[i++] = cpu_to_be32(pci_irq + 1);
+            pci_map[i++] = cpu_to_be32(mpic);
+            host_irq = ppce500_pci_map_irq_slot(slot, pci_irq);
+            pci_map[i++] = cpu_to_be32(host_irq + 1);
+            pci_map[i++] = cpu_to_be32(0x1);
+        }
+    }
+
+    assert((i * sizeof(uint32_t)) == *len);
+
+    return pci_map;
+}
+
+static void dt_serial_create(void *fdt, unsigned long long offset,
+                             const char *soc, const char *mpic,
+                             const char *alias, int idx, bool defcon)
+{
+    char *ser;
+
+    ser = g_strdup_printf("%s/serial@%llx", soc, offset);
+    qemu_fdt_add_subnode(fdt, ser);
+    qemu_fdt_setprop_string(fdt, ser, "device_type", "serial");
+    qemu_fdt_setprop_string(fdt, ser, "compatible", "ns16550");
+    qemu_fdt_setprop_cells(fdt, ser, "reg", offset, 0x100);
+    qemu_fdt_setprop_cell(fdt, ser, "cell-index", idx);
+    qemu_fdt_setprop_cell(fdt, ser, "clock-frequency", PLATFORM_CLK_FREQ_HZ);
+    qemu_fdt_setprop_cells(fdt, ser, "interrupts", 42, 2);
+    qemu_fdt_setprop_phandle(fdt, ser, "interrupt-parent", mpic);
+    qemu_fdt_setprop_string(fdt, "/aliases", alias, ser);
+
+    if (defcon) {
+        /*
+         * "linux,stdout-path" and "stdout" properties are deprecated by linux
+         * kernel. New platforms should only use the "stdout-path" property. Set
+         * the new property and continue using older property to remain
+         * compatible with the existing firmware.
+         */
+        qemu_fdt_setprop_string(fdt, "/chosen", "linux,stdout-path", ser);
+        qemu_fdt_setprop_string(fdt, "/chosen", "stdout-path", ser);
+    }
+    g_free(ser);
+}
+
+static void create_dt_mpc8xxx_gpio(void *fdt, const char *soc, const char *mpic)
+{
+    hwaddr mmio0 = MPC8XXX_GPIO_OFFSET;
+    int irq0 = MPC8XXX_GPIO_IRQ;
+    gchar *node = g_strdup_printf("%s/gpio@%"PRIx64, soc, mmio0);
+    gchar *poweroff = g_strdup_printf("%s/power-off", soc);
+    int gpio_ph;
+
+    qemu_fdt_add_subnode(fdt, node);
+    qemu_fdt_setprop_string(fdt, node, "compatible", "fsl,qoriq-gpio");
+    qemu_fdt_setprop_cells(fdt, node, "reg", mmio0, 0x1000);
+    qemu_fdt_setprop_cells(fdt, node, "interrupts", irq0, 0x2);
+    qemu_fdt_setprop_phandle(fdt, node, "interrupt-parent", mpic);
+    qemu_fdt_setprop_cells(fdt, node, "#gpio-cells", 2);
+    qemu_fdt_setprop(fdt, node, "gpio-controller", NULL, 0);
+    gpio_ph = qemu_fdt_alloc_phandle(fdt);
+    qemu_fdt_setprop_cell(fdt, node, "phandle", gpio_ph);
+    qemu_fdt_setprop_cell(fdt, node, "linux,phandle", gpio_ph);
+
+    /* Power Off Pin */
+    qemu_fdt_add_subnode(fdt, poweroff);
+    qemu_fdt_setprop_string(fdt, poweroff, "compatible", "gpio-poweroff");
+    qemu_fdt_setprop_cells(fdt, poweroff, "gpios", gpio_ph, 0, 0);
+
+    g_free(node);
+    g_free(poweroff);
+}
+
+static void dt_rtc_create(void *fdt, const char *i2c, const char *alias)
+{
+    int offset = RTC_REGS_OFFSET;
+
+    gchar *rtc = g_strdup_printf("%s/rtc@%"PRIx32, i2c, offset);
+    qemu_fdt_add_subnode(fdt, rtc);
+    qemu_fdt_setprop_string(fdt, rtc, "compatible", "pericom,pt7c4338");
+    qemu_fdt_setprop_cells(fdt, rtc, "reg", offset);
+    qemu_fdt_setprop_string(fdt, "/aliases", alias, rtc);
+
+    g_free(rtc);
+}
+
+static void dt_i2c_create(void *fdt, const char *soc, const char *mpic,
+                             const char *alias)
+{
+    hwaddr mmio0 = MPC8544_I2C_REGS_OFFSET;
+    int irq0 = MPC8544_I2C_IRQ;
+
+    gchar *i2c = g_strdup_printf("%s/i2c@%"PRIx64, soc, mmio0);
+    qemu_fdt_add_subnode(fdt, i2c);
+    qemu_fdt_setprop_string(fdt, i2c, "device_type", "i2c");
+    qemu_fdt_setprop_string(fdt, i2c, "compatible", "fsl-i2c");
+    qemu_fdt_setprop_cells(fdt, i2c, "reg", mmio0, 0x14);
+    qemu_fdt_setprop_cells(fdt, i2c, "cell-index", 0);
+    qemu_fdt_setprop_cells(fdt, i2c, "interrupts", irq0, 0x2);
+    qemu_fdt_setprop_phandle(fdt, i2c, "interrupt-parent", mpic);
+    qemu_fdt_setprop_string(fdt, "/aliases", alias, i2c);
+
+    g_free(i2c);
+}
+
+
+typedef struct PlatformDevtreeData {
+    void *fdt;
+    const char *mpic;
+    int irq_start;
+    const char *node;
+    PlatformBusDevice *pbus;
+} PlatformDevtreeData;
+
+static int create_devtree_etsec(SysBusDevice *sbdev, PlatformDevtreeData *data)
+{
+    eTSEC *etsec = ETSEC_COMMON(sbdev);
+    PlatformBusDevice *pbus = data->pbus;
+    hwaddr mmio0 = platform_bus_get_mmio_addr(pbus, sbdev, 0);
+    int irq0 = platform_bus_get_irqn(pbus, sbdev, 0);
+    int irq1 = platform_bus_get_irqn(pbus, sbdev, 1);
+    int irq2 = platform_bus_get_irqn(pbus, sbdev, 2);
+    gchar *node = g_strdup_printf("/platform/ethernet@%"PRIx64, mmio0);
+    gchar *group = g_strdup_printf("%s/queue-group", node);
+    void *fdt = data->fdt;
+
+    assert((int64_t)mmio0 >= 0);
+    assert(irq0 >= 0);
+    assert(irq1 >= 0);
+    assert(irq2 >= 0);
+
+    qemu_fdt_add_subnode(fdt, node);
+    qemu_fdt_setprop(fdt, node, "ranges", NULL, 0);
+    qemu_fdt_setprop_string(fdt, node, "device_type", "network");
+    qemu_fdt_setprop_string(fdt, node, "compatible", "fsl,etsec2");
+    qemu_fdt_setprop_string(fdt, node, "model", "eTSEC");
+    qemu_fdt_setprop(fdt, node, "local-mac-address", etsec->conf.macaddr.a, 6);
+    qemu_fdt_setprop_cells(fdt, node, "fixed-link", 0, 1, 1000, 0, 0);
+    qemu_fdt_setprop_cells(fdt, node, "#size-cells", 1);
+    qemu_fdt_setprop_cells(fdt, node, "#address-cells", 1);
+
+    qemu_fdt_add_subnode(fdt, group);
+    qemu_fdt_setprop_cells(fdt, group, "reg", mmio0, 0x1000);
+    qemu_fdt_setprop_cells(fdt, group, "interrupts",
+        data->irq_start + irq0, 0x2,
+        data->irq_start + irq1, 0x2,
+        data->irq_start + irq2, 0x2);
+
+    g_free(node);
+    g_free(group);
+
+    return 0;
+}
+
+static void sysbus_device_create_devtree(SysBusDevice *sbdev, void *opaque)
+{
+    PlatformDevtreeData *data = opaque;
+    bool matched = false;
+
+    if (object_dynamic_cast(OBJECT(sbdev), TYPE_ETSEC_COMMON)) {
+        create_devtree_etsec(sbdev, data);
+        matched = true;
+    }
+
+    if (!matched) {
+        error_report("Device %s is not supported by this machine yet.",
+                     qdev_fw_name(DEVICE(sbdev)));
+        exit(1);
+    }
+}
+
+static void platform_bus_create_devtree(PPCE500MachineState *pms,
+                                        void *fdt, const char *mpic)
+{
+    const PPCE500MachineClass *pmc = PPCE500_MACHINE_GET_CLASS(pms);
+    gchar *node = g_strdup_printf("/platform@%"PRIx64, pmc->platform_bus_base);
+    const char platcomp[] = "qemu,platform\0simple-bus";
+    uint64_t addr = pmc->platform_bus_base;
+    uint64_t size = pmc->platform_bus_size;
+    int irq_start = pmc->platform_bus_first_irq;
+
+    /* Create a /platform node that we can put all devices into */
+
+    qemu_fdt_add_subnode(fdt, node);
+    qemu_fdt_setprop(fdt, node, "compatible", platcomp, sizeof(platcomp));
+
+    /* Our platform bus region is less than 32bit big, so 1 cell is enough for
+       address and size */
+    qemu_fdt_setprop_cells(fdt, node, "#size-cells", 1);
+    qemu_fdt_setprop_cells(fdt, node, "#address-cells", 1);
+    qemu_fdt_setprop_cells(fdt, node, "ranges", 0, addr >> 32, addr, size);
+
+    qemu_fdt_setprop_phandle(fdt, node, "interrupt-parent", mpic);
+
+    /* Create dt nodes for dynamic devices */
+    PlatformDevtreeData data = {
+        .fdt = fdt,
+        .mpic = mpic,
+        .irq_start = irq_start,
+        .node = node,
+        .pbus = pms->pbus_dev,
+    };
+
+    /* Loop through all dynamic sysbus devices and create nodes for them */
+    foreach_dynamic_sysbus_device(sysbus_device_create_devtree, &data);
+
+    g_free(node);
+}
+
+static int ppce500_load_device_tree(PPCE500MachineState *pms,
+                                    hwaddr addr,
+                                    hwaddr initrd_base,
+                                    hwaddr initrd_size,
+                                    hwaddr kernel_base,
+                                    hwaddr kernel_size,
+                                    bool dry_run)
+{
+    MachineState *machine = MACHINE(pms);
+    unsigned int smp_cpus = machine->smp.cpus;
+    const PPCE500MachineClass *pmc = PPCE500_MACHINE_GET_CLASS(pms);
+    CPUPPCState *env = first_cpu->env_ptr;
+    int ret = -1;
+    uint64_t mem_reg_property[] = { 0, cpu_to_be64(machine->ram_size) };
+    int fdt_size;
+    void *fdt;
+    uint8_t hypercall[16];
+    uint32_t clock_freq = PLATFORM_CLK_FREQ_HZ;
+    uint32_t tb_freq = PLATFORM_CLK_FREQ_HZ;
+    int i;
+    char compatible_sb[] = "fsl,mpc8544-immr\0simple-bus";
+    char *soc;
+    char *mpic;
+    uint32_t mpic_ph;
+    uint32_t msi_ph;
+    char *gutil;
+    char *pci;
+    char *msi;
+    uint32_t *pci_map = NULL;
+    int len;
+    uint32_t pci_ranges[14] =
+        {
+            0x2000000, 0x0, pmc->pci_mmio_bus_base,
+            pmc->pci_mmio_base >> 32, pmc->pci_mmio_base,
+            0x0, 0x20000000,
+
+            0x1000000, 0x0, 0x0,
+            pmc->pci_pio_base >> 32, pmc->pci_pio_base,
+            0x0, 0x10000,
+        };
+    const char *dtb_file = machine->dtb;
+    const char *toplevel_compat = machine->dt_compatible;
+
+    if (dtb_file) {
+        char *filename;
+        filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, dtb_file);
+        if (!filename) {
+            goto out;
+        }
+
+        fdt = load_device_tree(filename, &fdt_size);
+        g_free(filename);
+        if (!fdt) {
+            goto out;
+        }
+        goto done;
+    }
+
+    fdt = create_device_tree(&fdt_size);
+    if (fdt == NULL) {
+        goto out;
+    }
+
+    /* Manipulate device tree in memory. */
+    qemu_fdt_setprop_cell(fdt, "/", "#address-cells", 2);
+    qemu_fdt_setprop_cell(fdt, "/", "#size-cells", 2);
+
+    qemu_fdt_add_subnode(fdt, "/memory");
+    qemu_fdt_setprop_string(fdt, "/memory", "device_type", "memory");
+    qemu_fdt_setprop(fdt, "/memory", "reg", mem_reg_property,
+                     sizeof(mem_reg_property));
+
+    qemu_fdt_add_subnode(fdt, "/chosen");
+    if (initrd_size) {
+        ret = qemu_fdt_setprop_cell(fdt, "/chosen", "linux,initrd-start",
+                                    initrd_base);
+        if (ret < 0) {
+            fprintf(stderr, "couldn't set /chosen/linux,initrd-start\n");
+        }
+
+        ret = qemu_fdt_setprop_cell(fdt, "/chosen", "linux,initrd-end",
+                                    (initrd_base + initrd_size));
+        if (ret < 0) {
+            fprintf(stderr, "couldn't set /chosen/linux,initrd-end\n");
+        }
+
+    }
+
+    if (kernel_base != -1ULL) {
+        qemu_fdt_setprop_cells(fdt, "/chosen", "qemu,boot-kernel",
+                                     kernel_base >> 32, kernel_base,
+                                     kernel_size >> 32, kernel_size);
+    }
+
+    ret = qemu_fdt_setprop_string(fdt, "/chosen", "bootargs",
+                                      machine->kernel_cmdline);
+    if (ret < 0)
+        fprintf(stderr, "couldn't set /chosen/bootargs\n");
+
+    if (kvm_enabled()) {
+        /* Read out host's frequencies */
+        clock_freq = kvmppc_get_clockfreq();
+        tb_freq = kvmppc_get_tbfreq();
+
+        /* indicate KVM hypercall interface */
+        qemu_fdt_add_subnode(fdt, "/hypervisor");
+        qemu_fdt_setprop_string(fdt, "/hypervisor", "compatible",
+                                "linux,kvm");
+        kvmppc_get_hypercall(env, hypercall, sizeof(hypercall));
+        qemu_fdt_setprop(fdt, "/hypervisor", "hcall-instructions",
+                         hypercall, sizeof(hypercall));
+        /* if KVM supports the idle hcall, set property indicating this */
+        if (kvmppc_get_hasidle(env)) {
+            qemu_fdt_setprop(fdt, "/hypervisor", "has-idle", NULL, 0);
+        }
+    }
+
+    /* Create CPU nodes */
+    qemu_fdt_add_subnode(fdt, "/cpus");
+    qemu_fdt_setprop_cell(fdt, "/cpus", "#address-cells", 1);
+    qemu_fdt_setprop_cell(fdt, "/cpus", "#size-cells", 0);
+
+    /* We need to generate the cpu nodes in reverse order, so Linux can pick
+       the first node as boot node and be happy */
+    for (i = smp_cpus - 1; i >= 0; i--) {
+        CPUState *cpu;
+        char *cpu_name;
+        uint64_t cpu_release_addr = pmc->spin_base + (i * 0x20);
+
+        cpu = qemu_get_cpu(i);
+        if (cpu == NULL) {
+            continue;
+        }
+        env = cpu->env_ptr;
+
+        cpu_name = g_strdup_printf("/cpus/PowerPC,8544@%x", i);
+        qemu_fdt_add_subnode(fdt, cpu_name);
+        qemu_fdt_setprop_cell(fdt, cpu_name, "clock-frequency", clock_freq);
+        qemu_fdt_setprop_cell(fdt, cpu_name, "timebase-frequency", tb_freq);
+        qemu_fdt_setprop_string(fdt, cpu_name, "device_type", "cpu");
+        qemu_fdt_setprop_cell(fdt, cpu_name, "reg", i);
+        qemu_fdt_setprop_cell(fdt, cpu_name, "d-cache-line-size",
+                              env->dcache_line_size);
+        qemu_fdt_setprop_cell(fdt, cpu_name, "i-cache-line-size",
+                              env->icache_line_size);
+        qemu_fdt_setprop_cell(fdt, cpu_name, "d-cache-size", 0x8000);
+        qemu_fdt_setprop_cell(fdt, cpu_name, "i-cache-size", 0x8000);
+        qemu_fdt_setprop_cell(fdt, cpu_name, "bus-frequency", 0);
+        if (cpu->cpu_index) {
+            qemu_fdt_setprop_string(fdt, cpu_name, "status", "disabled");
+            qemu_fdt_setprop_string(fdt, cpu_name, "enable-method",
+                                    "spin-table");
+            qemu_fdt_setprop_u64(fdt, cpu_name, "cpu-release-addr",
+                                 cpu_release_addr);
+        } else {
+            qemu_fdt_setprop_string(fdt, cpu_name, "status", "okay");
+        }
+        g_free(cpu_name);
+    }
+
+    qemu_fdt_add_subnode(fdt, "/aliases");
+    /* XXX These should go into their respective devices' code */
+    soc = g_strdup_printf("/soc@%"PRIx64, pmc->ccsrbar_base);
+    qemu_fdt_add_subnode(fdt, soc);
+    qemu_fdt_setprop_string(fdt, soc, "device_type", "soc");
+    qemu_fdt_setprop(fdt, soc, "compatible", compatible_sb,
+                     sizeof(compatible_sb));
+    qemu_fdt_setprop_cell(fdt, soc, "#address-cells", 1);
+    qemu_fdt_setprop_cell(fdt, soc, "#size-cells", 1);
+    qemu_fdt_setprop_cells(fdt, soc, "ranges", 0x0,
+                           pmc->ccsrbar_base >> 32, pmc->ccsrbar_base,
+                           MPC8544_CCSRBAR_SIZE);
+    /* XXX should contain a reasonable value */
+    qemu_fdt_setprop_cell(fdt, soc, "bus-frequency", 0);
+
+    mpic = g_strdup_printf("%s/pic@%llx", soc, MPC8544_MPIC_REGS_OFFSET);
+    qemu_fdt_add_subnode(fdt, mpic);
+    qemu_fdt_setprop_string(fdt, mpic, "device_type", "open-pic");
+    qemu_fdt_setprop_string(fdt, mpic, "compatible", "fsl,mpic");
+    qemu_fdt_setprop_cells(fdt, mpic, "reg", MPC8544_MPIC_REGS_OFFSET,
+                           0x40000);
+    qemu_fdt_setprop_cell(fdt, mpic, "#address-cells", 0);
+    qemu_fdt_setprop_cell(fdt, mpic, "#interrupt-cells", 2);
+    mpic_ph = qemu_fdt_alloc_phandle(fdt);
+    qemu_fdt_setprop_cell(fdt, mpic, "phandle", mpic_ph);
+    qemu_fdt_setprop_cell(fdt, mpic, "linux,phandle", mpic_ph);
+    qemu_fdt_setprop(fdt, mpic, "interrupt-controller", NULL, 0);
+
+    /*
+     * We have to generate ser1 first, because Linux takes the first
+     * device it finds in the dt as serial output device. And we generate
+     * devices in reverse order to the dt.
+     */
+    if (serial_hd(1)) {
+        dt_serial_create(fdt, MPC8544_SERIAL1_REGS_OFFSET,
+                         soc, mpic, "serial1", 1, false);
+    }
+
+    if (serial_hd(0)) {
+        dt_serial_create(fdt, MPC8544_SERIAL0_REGS_OFFSET,
+                         soc, mpic, "serial0", 0, true);
+    }
+
+    /* i2c */
+    dt_i2c_create(fdt, soc, mpic, "i2c");
+
+    dt_rtc_create(fdt, "i2c", "rtc");
+
+
+    gutil = g_strdup_printf("%s/global-utilities@%llx", soc,
+                            MPC8544_UTIL_OFFSET);
+    qemu_fdt_add_subnode(fdt, gutil);
+    qemu_fdt_setprop_string(fdt, gutil, "compatible", "fsl,mpc8544-guts");
+    qemu_fdt_setprop_cells(fdt, gutil, "reg", MPC8544_UTIL_OFFSET, 0x1000);
+    qemu_fdt_setprop(fdt, gutil, "fsl,has-rstcr", NULL, 0);
+    g_free(gutil);
+
+    msi = g_strdup_printf("/%s/msi@%llx", soc, MPC8544_MSI_REGS_OFFSET);
+    qemu_fdt_add_subnode(fdt, msi);
+    qemu_fdt_setprop_string(fdt, msi, "compatible", "fsl,mpic-msi");
+    qemu_fdt_setprop_cells(fdt, msi, "reg", MPC8544_MSI_REGS_OFFSET, 0x200);
+    msi_ph = qemu_fdt_alloc_phandle(fdt);
+    qemu_fdt_setprop_cells(fdt, msi, "msi-available-ranges", 0x0, 0x100);
+    qemu_fdt_setprop_phandle(fdt, msi, "interrupt-parent", mpic);
+    qemu_fdt_setprop_cells(fdt, msi, "interrupts",
+        0xe0, 0x0,
+        0xe1, 0x0,
+        0xe2, 0x0,
+        0xe3, 0x0,
+        0xe4, 0x0,
+        0xe5, 0x0,
+        0xe6, 0x0,
+        0xe7, 0x0);
+    qemu_fdt_setprop_cell(fdt, msi, "phandle", msi_ph);
+    qemu_fdt_setprop_cell(fdt, msi, "linux,phandle", msi_ph);
+    g_free(msi);
+
+    pci = g_strdup_printf("/pci@%llx",
+                          pmc->ccsrbar_base + MPC8544_PCI_REGS_OFFSET);
+    qemu_fdt_add_subnode(fdt, pci);
+    qemu_fdt_setprop_cell(fdt, pci, "cell-index", 0);
+    qemu_fdt_setprop_string(fdt, pci, "compatible", "fsl,mpc8540-pci");
+    qemu_fdt_setprop_string(fdt, pci, "device_type", "pci");
+    qemu_fdt_setprop_cells(fdt, pci, "interrupt-map-mask", 0xf800, 0x0,
+                           0x0, 0x7);
+    pci_map = pci_map_create(fdt, qemu_fdt_get_phandle(fdt, mpic),
+                             pmc->pci_first_slot, pmc->pci_nr_slots,
+                             &len);
+    qemu_fdt_setprop(fdt, pci, "interrupt-map", pci_map, len);
+    qemu_fdt_setprop_phandle(fdt, pci, "interrupt-parent", mpic);
+    qemu_fdt_setprop_cells(fdt, pci, "interrupts", 24, 2);
+    qemu_fdt_setprop_cells(fdt, pci, "bus-range", 0, 255);
+    for (i = 0; i < 14; i++) {
+        pci_ranges[i] = cpu_to_be32(pci_ranges[i]);
+    }
+    qemu_fdt_setprop_cell(fdt, pci, "fsl,msi", msi_ph);
+    qemu_fdt_setprop(fdt, pci, "ranges", pci_ranges, sizeof(pci_ranges));
+    qemu_fdt_setprop_cells(fdt, pci, "reg",
+                           (pmc->ccsrbar_base + MPC8544_PCI_REGS_OFFSET) >> 32,
+                           (pmc->ccsrbar_base + MPC8544_PCI_REGS_OFFSET),
+                           0, 0x1000);
+    qemu_fdt_setprop_cell(fdt, pci, "clock-frequency", 66666666);
+    qemu_fdt_setprop_cell(fdt, pci, "#interrupt-cells", 1);
+    qemu_fdt_setprop_cell(fdt, pci, "#size-cells", 2);
+    qemu_fdt_setprop_cell(fdt, pci, "#address-cells", 3);
+    qemu_fdt_setprop_string(fdt, "/aliases", "pci0", pci);
+    g_free(pci);
+
+    if (pmc->has_mpc8xxx_gpio) {
+        create_dt_mpc8xxx_gpio(fdt, soc, mpic);
+    }
+    g_free(soc);
+
+    if (pms->pbus_dev) {
+        platform_bus_create_devtree(pms, fdt, mpic);
+    }
+    g_free(mpic);
+
+    pmc->fixup_devtree(fdt);
+
+    if (toplevel_compat) {
+        qemu_fdt_setprop(fdt, "/", "compatible", toplevel_compat,
+                         strlen(toplevel_compat) + 1);
+    }
+
+done:
+    if (!dry_run) {
+        qemu_fdt_dumpdtb(fdt, fdt_size);
+        cpu_physical_memory_write(addr, fdt, fdt_size);
+    }
+    ret = fdt_size;
+    g_free(fdt);
+
+out:
+    g_free(pci_map);
+
+    return ret;
+}
+
+typedef struct DeviceTreeParams {
+    PPCE500MachineState *machine;
+    hwaddr addr;
+    hwaddr initrd_base;
+    hwaddr initrd_size;
+    hwaddr kernel_base;
+    hwaddr kernel_size;
+    Notifier notifier;
+} DeviceTreeParams;
+
+static void ppce500_reset_device_tree(void *opaque)
+{
+    DeviceTreeParams *p = opaque;
+    ppce500_load_device_tree(p->machine, p->addr, p->initrd_base,
+                             p->initrd_size, p->kernel_base, p->kernel_size,
+                             false);
+}
+
+static void ppce500_init_notify(Notifier *notifier, void *data)
+{
+    DeviceTreeParams *p = container_of(notifier, DeviceTreeParams, notifier);
+    ppce500_reset_device_tree(p);
+}
+
+static int ppce500_prep_device_tree(PPCE500MachineState *machine,
+                                    hwaddr addr,
+                                    hwaddr initrd_base,
+                                    hwaddr initrd_size,
+                                    hwaddr kernel_base,
+                                    hwaddr kernel_size)
+{
+    DeviceTreeParams *p = g_new(DeviceTreeParams, 1);
+    p->machine = machine;
+    p->addr = addr;
+    p->initrd_base = initrd_base;
+    p->initrd_size = initrd_size;
+    p->kernel_base = kernel_base;
+    p->kernel_size = kernel_size;
+
+    qemu_register_reset(ppce500_reset_device_tree, p);
+    p->notifier.notify = ppce500_init_notify;
+    qemu_add_machine_init_done_notifier(&p->notifier);
+
+    /* Issue the device tree loader once, so that we get the size of the blob */
+    return ppce500_load_device_tree(machine, addr, initrd_base, initrd_size,
+                                    kernel_base, kernel_size, true);
+}
+
+/* Create -kernel TLB entries for BookE.  */
+hwaddr booke206_page_size_to_tlb(uint64_t size)
+{
+    return 63 - clz64(size / KiB);
+}
+
+static int booke206_initial_map_tsize(CPUPPCState *env)
+{
+    struct boot_info *bi = env->load_info;
+    hwaddr dt_end;
+    int ps;
+
+    /* Our initial TLB entry needs to cover everything from 0 to
+       the device tree top */
+    dt_end = bi->dt_base + bi->dt_size;
+    ps = booke206_page_size_to_tlb(dt_end) + 1;
+    if (ps & 1) {
+        /* e500v2 can only do even TLB size bits */
+        ps++;
+    }
+    return ps;
+}
+
+static uint64_t mmubooke_initial_mapsize(CPUPPCState *env)
+{
+    int tsize;
+
+    tsize = booke206_initial_map_tsize(env);
+    return (1ULL << 10 << tsize);
+}
+
+static void mmubooke_create_initial_mapping(CPUPPCState *env)
+{
+    ppcmas_tlb_t *tlb = booke206_get_tlbm(env, 1, 0, 0);
+    hwaddr size;
+    int ps;
+
+    ps = booke206_initial_map_tsize(env);
+    size = (ps << MAS1_TSIZE_SHIFT);
+    tlb->mas1 = MAS1_VALID | size;
+    tlb->mas2 = 0;
+    tlb->mas7_3 = 0;
+    tlb->mas7_3 |= MAS3_UR | MAS3_UW | MAS3_UX | MAS3_SR | MAS3_SW | MAS3_SX;
+
+    env->tlb_dirty = true;
+}
+
+static void ppce500_cpu_reset_sec(void *opaque)
+{
+    PowerPCCPU *cpu = opaque;
+    CPUState *cs = CPU(cpu);
+
+    cpu_reset(cs);
+
+    cs->exception_index = EXCP_HLT;
+}
+
+static void ppce500_cpu_reset(void *opaque)
+{
+    PowerPCCPU *cpu = opaque;
+    CPUState *cs = CPU(cpu);
+    CPUPPCState *env = &cpu->env;
+    struct boot_info *bi = env->load_info;
+
+    cpu_reset(cs);
+
+    /* Set initial guest state. */
+    cs->halted = 0;
+    env->gpr[1] = (16 * MiB) - 8;
+    env->gpr[3] = bi->dt_base;
+    env->gpr[4] = 0;
+    env->gpr[5] = 0;
+    env->gpr[6] = EPAPR_MAGIC;
+    env->gpr[7] = mmubooke_initial_mapsize(env);
+    env->gpr[8] = 0;
+    env->gpr[9] = 0;
+    env->nip = bi->entry;
+    mmubooke_create_initial_mapping(env);
+}
+
+static DeviceState *ppce500_init_mpic_qemu(PPCE500MachineState *pms,
+                                           IrqLines  *irqs)
+{
+    DeviceState *dev;
+    SysBusDevice *s;
+    int i, j, k;
+    MachineState *machine = MACHINE(pms);
+    unsigned int smp_cpus = machine->smp.cpus;
+    const PPCE500MachineClass *pmc = PPCE500_MACHINE_GET_CLASS(pms);
+
+    dev = qdev_new(TYPE_OPENPIC);
+    object_property_add_child(OBJECT(machine), "pic", OBJECT(dev));
+    qdev_prop_set_uint32(dev, "model", pmc->mpic_version);
+    qdev_prop_set_uint32(dev, "nb_cpus", smp_cpus);
+
+    s = SYS_BUS_DEVICE(dev);
+    sysbus_realize_and_unref(s, &error_fatal);
+
+    k = 0;
+    for (i = 0; i < smp_cpus; i++) {
+        for (j = 0; j < OPENPIC_OUTPUT_NB; j++) {
+            sysbus_connect_irq(s, k++, irqs[i].irq[j]);
+        }
+    }
+
+    return dev;
+}
+
+static DeviceState *ppce500_init_mpic_kvm(const PPCE500MachineClass *pmc,
+                                          IrqLines *irqs, Error **errp)
+{
+    DeviceState *dev;
+    CPUState *cs;
+
+    dev = qdev_new(TYPE_KVM_OPENPIC);
+    qdev_prop_set_uint32(dev, "model", pmc->mpic_version);
+
+    if (!sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), errp)) {
+        object_unparent(OBJECT(dev));
+        return NULL;
+    }
+
+    CPU_FOREACH(cs) {
+        if (kvm_openpic_connect_vcpu(dev, cs)) {
+            fprintf(stderr, "%s: failed to connect vcpu to irqchip\n",
+                    __func__);
+            abort();
+        }
+    }
+
+    return dev;
+}
+
+static DeviceState *ppce500_init_mpic(PPCE500MachineState *pms,
+                                      MemoryRegion *ccsr,
+                                      IrqLines *irqs)
+{
+    const PPCE500MachineClass *pmc = PPCE500_MACHINE_GET_CLASS(pms);
+    DeviceState *dev = NULL;
+    SysBusDevice *s;
+
+    if (kvm_enabled()) {
+        Error *err = NULL;
+
+        if (kvm_kernel_irqchip_allowed()) {
+            dev = ppce500_init_mpic_kvm(pmc, irqs, &err);
+        }
+        if (kvm_kernel_irqchip_required() && !dev) {
+            error_reportf_err(err,
+                              "kernel_irqchip requested but unavailable: ");
+            exit(1);
+        }
+    }
+
+    if (!dev) {
+        dev = ppce500_init_mpic_qemu(pms, irqs);
+    }
+
+    s = SYS_BUS_DEVICE(dev);
+    memory_region_add_subregion(ccsr, MPC8544_MPIC_REGS_OFFSET,
+                                s->mmio[0].memory);
+
+    return dev;
+}
+
+static void ppce500_power_off(void *opaque, int line, int on)
+{
+    if (on) {
+        qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
+    }
+}
+
+void ppce500_init(MachineState *machine)
+{
+    MemoryRegion *address_space_mem = get_system_memory();
+    PPCE500MachineState *pms = PPCE500_MACHINE(machine);
+    const PPCE500MachineClass *pmc = PPCE500_MACHINE_GET_CLASS(machine);
+    PCIBus *pci_bus;
+    CPUPPCState *env = NULL;
+    uint64_t loadaddr;
+    hwaddr kernel_base = -1LL;
+    int kernel_size = 0;
+    hwaddr dt_base = 0;
+    hwaddr initrd_base = 0;
+    int initrd_size = 0;
+    hwaddr cur_base = 0;
+    char *filename;
+    const char *payload_name;
+    bool kernel_as_payload;
+    hwaddr bios_entry = 0;
+    target_long payload_size;
+    struct boot_info *boot_info;
+    int dt_size;
+    int i;
+    unsigned int smp_cpus = machine->smp.cpus;
+    /* irq num for pin INTA, INTB, INTC and INTD is 1, 2, 3 and
+     * 4 respectively */
+    unsigned int pci_irq_nrs[PCI_NUM_PINS] = {1, 2, 3, 4};
+    IrqLines *irqs;
+    DeviceState *dev, *mpicdev;
+    CPUPPCState *firstenv = NULL;
+    MemoryRegion *ccsr_addr_space;
+    SysBusDevice *s;
+    PPCE500CCSRState *ccsr;
+    I2CBus *i2c;
+
+    irqs = g_new0(IrqLines, smp_cpus);
+    for (i = 0; i < smp_cpus; i++) {
+        PowerPCCPU *cpu;
+        CPUState *cs;
+        qemu_irq *input;
+
+        cpu = POWERPC_CPU(object_new(machine->cpu_type));
+        env = &cpu->env;
+        cs = CPU(cpu);
+
+        if (env->mmu_model != POWERPC_MMU_BOOKE206) {
+            error_report("MMU model %i not supported by this machine",
+                         env->mmu_model);
+            exit(1);
+        }
+
+        /*
+         * Secondary CPU starts in halted state for now. Needs to change
+         * when implementing non-kernel boot.
+         */
+        object_property_set_bool(OBJECT(cs), "start-powered-off", i != 0,
+                                 &error_fatal);
+        qdev_realize_and_unref(DEVICE(cs), NULL, &error_fatal);
+
+        if (!firstenv) {
+            firstenv = env;
+        }
+
+        input = (qemu_irq *)env->irq_inputs;
+        irqs[i].irq[OPENPIC_OUTPUT_INT] = input[PPCE500_INPUT_INT];
+        irqs[i].irq[OPENPIC_OUTPUT_CINT] = input[PPCE500_INPUT_CINT];
+        env->spr_cb[SPR_BOOKE_PIR].default_value = cs->cpu_index = i;
+        env->mpic_iack = pmc->ccsrbar_base + MPC8544_MPIC_REGS_OFFSET + 0xa0;
+
+        ppc_booke_timers_init(cpu, PLATFORM_CLK_FREQ_HZ, PPC_TIMER_E500);
+
+        /* Register reset handler */
+        if (!i) {
+            /* Primary CPU */
+            struct boot_info *boot_info;
+            boot_info = g_malloc0(sizeof(struct boot_info));
+            qemu_register_reset(ppce500_cpu_reset, cpu);
+            env->load_info = boot_info;
+        } else {
+            /* Secondary CPUs */
+            qemu_register_reset(ppce500_cpu_reset_sec, cpu);
+        }
+    }
+
+    env = firstenv;
+
+    if (!QEMU_IS_ALIGNED(machine->ram_size, RAM_SIZES_ALIGN)) {
+        error_report("RAM size must be multiple of %" PRIu64, RAM_SIZES_ALIGN);
+        exit(EXIT_FAILURE);
+    }
+
+    /* Register Memory */
+    memory_region_add_subregion(address_space_mem, 0, machine->ram);
+
+    dev = qdev_new("e500-ccsr");
+    object_property_add_child(qdev_get_machine(), "e500-ccsr",
+                              OBJECT(dev));
+    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+    ccsr = CCSR(dev);
+    ccsr_addr_space = &ccsr->ccsr_space;
+    memory_region_add_subregion(address_space_mem, pmc->ccsrbar_base,
+                                ccsr_addr_space);
+
+    mpicdev = ppce500_init_mpic(pms, ccsr_addr_space, irqs);
+    g_free(irqs);
+
+    /* Serial */
+    if (serial_hd(0)) {
+        serial_mm_init(ccsr_addr_space, MPC8544_SERIAL0_REGS_OFFSET,
+                       0, qdev_get_gpio_in(mpicdev, 42), 399193,
+                       serial_hd(0), DEVICE_BIG_ENDIAN);
+    }
+
+    if (serial_hd(1)) {
+        serial_mm_init(ccsr_addr_space, MPC8544_SERIAL1_REGS_OFFSET,
+                       0, qdev_get_gpio_in(mpicdev, 42), 399193,
+                       serial_hd(1), DEVICE_BIG_ENDIAN);
+    }
+        /* I2C */
+    dev = qdev_new("mpc-i2c");
+    s = SYS_BUS_DEVICE(dev);
+    sysbus_realize_and_unref(s, &error_fatal);
+    sysbus_connect_irq(s, 0, qdev_get_gpio_in(mpicdev, MPC8544_I2C_IRQ));
+    memory_region_add_subregion(ccsr_addr_space, MPC8544_I2C_REGS_OFFSET,
+                                sysbus_mmio_get_region(s, 0));
+    i2c = (I2CBus *)qdev_get_child_bus(dev, "i2c");
+    i2c_slave_create_simple(i2c, "ds1338", RTC_REGS_OFFSET);
+
+
+    /* General Utility device */
+    dev = qdev_new("mpc8544-guts");
+    s = SYS_BUS_DEVICE(dev);
+    sysbus_realize_and_unref(s, &error_fatal);
+    memory_region_add_subregion(ccsr_addr_space, MPC8544_UTIL_OFFSET,
+                                sysbus_mmio_get_region(s, 0));
+
+    /* PCI */
+    dev = qdev_new("e500-pcihost");
+    object_property_add_child(qdev_get_machine(), "pci-host", OBJECT(dev));
+    qdev_prop_set_uint32(dev, "first_slot", pmc->pci_first_slot);
+    qdev_prop_set_uint32(dev, "first_pin_irq", pci_irq_nrs[0]);
+    s = SYS_BUS_DEVICE(dev);
+    sysbus_realize_and_unref(s, &error_fatal);
+    for (i = 0; i < PCI_NUM_PINS; i++) {
+        sysbus_connect_irq(s, i, qdev_get_gpio_in(mpicdev, pci_irq_nrs[i]));
+    }
+
+    memory_region_add_subregion(ccsr_addr_space, MPC8544_PCI_REGS_OFFSET,
+                                sysbus_mmio_get_region(s, 0));
+
+    pci_bus = (PCIBus *)qdev_get_child_bus(dev, "pci.0");
+    if (!pci_bus)
+        printf("couldn't create PCI controller!\n");
+
+    if (pci_bus) {
+        /* Register network interfaces. */
+        for (i = 0; i < nb_nics; i++) {
+            pci_nic_init_nofail(&nd_table[i], pci_bus, "virtio-net-pci", NULL);
+        }
+    }
+
+    /* Register spinning region */
+    sysbus_create_simple("e500-spin", pmc->spin_base, NULL);
+
+    if (pmc->has_mpc8xxx_gpio) {
+        qemu_irq poweroff_irq;
+
+        dev = qdev_new("mpc8xxx_gpio");
+        s = SYS_BUS_DEVICE(dev);
+        sysbus_realize_and_unref(s, &error_fatal);
+        sysbus_connect_irq(s, 0, qdev_get_gpio_in(mpicdev, MPC8XXX_GPIO_IRQ));
+        memory_region_add_subregion(ccsr_addr_space, MPC8XXX_GPIO_OFFSET,
+                                    sysbus_mmio_get_region(s, 0));
+
+        /* Power Off GPIO at Pin 0 */
+        poweroff_irq = qemu_allocate_irq(ppce500_power_off, NULL, 0);
+        qdev_connect_gpio_out(dev, 0, poweroff_irq);
+    }
+
+    /* Platform Bus Device */
+    if (pmc->has_platform_bus) {
+        dev = qdev_new(TYPE_PLATFORM_BUS_DEVICE);
+        dev->id = g_strdup(TYPE_PLATFORM_BUS_DEVICE);
+        qdev_prop_set_uint32(dev, "num_irqs", pmc->platform_bus_num_irqs);
+        qdev_prop_set_uint32(dev, "mmio_size", pmc->platform_bus_size);
+        sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+        pms->pbus_dev = PLATFORM_BUS_DEVICE(dev);
+
+        s = SYS_BUS_DEVICE(pms->pbus_dev);
+        for (i = 0; i < pmc->platform_bus_num_irqs; i++) {
+            int irqn = pmc->platform_bus_first_irq + i;
+            sysbus_connect_irq(s, i, qdev_get_gpio_in(mpicdev, irqn));
+        }
+
+        memory_region_add_subregion(address_space_mem,
+                                    pmc->platform_bus_base,
+                                    sysbus_mmio_get_region(s, 0));
+    }
+
+    /*
+     * Smart firmware defaults ahead!
+     *
+     * We follow the following table to select which payload we execute.
+     *
+     *  -kernel | -bios | payload
+     * ---------+-------+---------
+     *     N    |   Y   | u-boot
+     *     N    |   N   | u-boot
+     *     Y    |   Y   | u-boot
+     *     Y    |   N   | kernel
+     *
+     * This ensures backwards compatibility with how we used to expose
+     * -kernel to users but allows them to run through u-boot as well.
+     */
+    kernel_as_payload = false;
+    if (machine->firmware == NULL) {
+        if (machine->kernel_filename) {
+            payload_name = machine->kernel_filename;
+            kernel_as_payload = true;
+        } else {
+            payload_name = "u-boot.e500";
+        }
+    } else {
+        payload_name = machine->firmware;
+    }
+
+    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, payload_name);
+    if (!filename) {
+        error_report("could not find firmware/kernel file '%s'", payload_name);
+        exit(1);
+    }
+
+    payload_size = load_elf(filename, NULL, NULL, NULL,
+                            &bios_entry, &loadaddr, NULL, NULL,
+                            1, PPC_ELF_MACHINE, 0, 0);
+    if (payload_size < 0) {
+        /*
+         * Hrm. No ELF image? Try a uImage, maybe someone is giving us an
+         * ePAPR compliant kernel
+         */
+        loadaddr = LOAD_UIMAGE_LOADADDR_INVALID;
+        payload_size = load_uimage(filename, &bios_entry, &loadaddr, NULL,
+                                   NULL, NULL);
+        if (payload_size < 0) {
+            error_report("could not load firmware '%s'", filename);
+            exit(1);
+        }
+    }
+
+    g_free(filename);
+
+    if (kernel_as_payload) {
+        kernel_base = loadaddr;
+        kernel_size = payload_size;
+    }
+
+    cur_base = loadaddr + payload_size;
+    if (cur_base < 32 * MiB) {
+        /* u-boot occupies memory up to 32MB, so load blobs above */
+        cur_base = 32 * MiB;
+    }
+
+    /* Load bare kernel only if no bios/u-boot has been provided */
+    if (machine->kernel_filename && !kernel_as_payload) {
+        kernel_base = cur_base;
+        kernel_size = load_image_targphys(machine->kernel_filename,
+                                          cur_base,
+                                          machine->ram_size - cur_base);
+        if (kernel_size < 0) {
+            error_report("could not load kernel '%s'",
+                         machine->kernel_filename);
+            exit(1);
+        }
+
+        cur_base += kernel_size;
+    }
+
+    /* Load initrd. */
+    if (machine->initrd_filename) {
+        initrd_base = (cur_base + INITRD_LOAD_PAD) & ~INITRD_PAD_MASK;
+        initrd_size = load_image_targphys(machine->initrd_filename, initrd_base,
+                                          machine->ram_size - initrd_base);
+
+        if (initrd_size < 0) {
+            error_report("could not load initial ram disk '%s'",
+                         machine->initrd_filename);
+            exit(1);
+        }
+
+        cur_base = initrd_base + initrd_size;
+    }
+
+    /*
+     * Reserve space for dtb behind the kernel image because Linux has a bug
+     * where it can only handle the dtb if it's within the first 64MB of where
+     * <kernel> starts. dtb cannot not reach initrd_base because INITRD_LOAD_PAD
+     * ensures enough space between kernel and initrd.
+     */
+    dt_base = (loadaddr + payload_size + DTC_LOAD_PAD) & ~DTC_PAD_MASK;
+    if (dt_base + DTB_MAX_SIZE > machine->ram_size) {
+            error_report("not enough memory for device tree");
+            exit(1);
+    }
+
+    dt_size = ppce500_prep_device_tree(pms, dt_base,
+                                       initrd_base, initrd_size,
+                                       kernel_base, kernel_size);
+    if (dt_size < 0) {
+        error_report("couldn't load device tree");
+        exit(1);
+    }
+    assert(dt_size < DTB_MAX_SIZE);
+
+    boot_info = env->load_info;
+    boot_info->entry = bios_entry;
+    boot_info->dt_base = dt_base;
+    boot_info->dt_size = dt_size;
+}
+
+static void e500_ccsr_initfn(Object *obj)
+{
+    PPCE500CCSRState *ccsr = CCSR(obj);
+    memory_region_init(&ccsr->ccsr_space, obj, "e500-ccsr",
+                       MPC8544_CCSRBAR_SIZE);
+}
+
+static const TypeInfo e500_ccsr_info = {
+    .name          = TYPE_CCSR,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(PPCE500CCSRState),
+    .instance_init = e500_ccsr_initfn,
+};
+
+static const TypeInfo ppce500_info = {
+    .name          = TYPE_PPCE500_MACHINE,
+    .parent        = TYPE_MACHINE,
+    .abstract      = true,
+    .instance_size = sizeof(PPCE500MachineState),
+    .class_size    = sizeof(PPCE500MachineClass),
+};
+
+static void e500_register_types(void)
+{
+    type_register_static(&e500_ccsr_info);
+    type_register_static(&ppce500_info);
+}
+
+type_init(e500_register_types)
diff --git a/hw/ppc/e500.h b/hw/ppc/e500.h
new file mode 100644
index 000000000..1e5853b03
--- /dev/null
+++ b/hw/ppc/e500.h
@@ -0,0 +1,49 @@
+#ifndef PPCE500_H
+#define PPCE500_H
+
+#include "hw/boards.h"
+#include "hw/platform-bus.h"
+#include "qom/object.h"
+
+struct PPCE500MachineState {
+    /*< private >*/
+    MachineState parent_obj;
+
+    /* points to instance of TYPE_PLATFORM_BUS_DEVICE if
+     * board supports dynamic sysbus devices
+     */
+    PlatformBusDevice *pbus_dev;
+};
+
+struct PPCE500MachineClass {
+    /*< private >*/
+    MachineClass parent_class;
+
+    /* required -- must at least add toplevel board compatible */
+    void (*fixup_devtree)(void *fdt);
+
+    int pci_first_slot;
+    int pci_nr_slots;
+
+    int mpic_version;
+    bool has_mpc8xxx_gpio;
+    bool has_platform_bus;
+    hwaddr platform_bus_base;
+    hwaddr platform_bus_size;
+    int platform_bus_first_irq;
+    int platform_bus_num_irqs;
+    hwaddr ccsrbar_base;
+    hwaddr pci_pio_base;
+    hwaddr pci_mmio_base;
+    hwaddr pci_mmio_bus_base;
+    hwaddr spin_base;
+};
+
+void ppce500_init(MachineState *machine);
+
+hwaddr booke206_page_size_to_tlb(uint64_t size);
+
+#define TYPE_PPCE500_MACHINE      "ppce500-base-machine"
+OBJECT_DECLARE_TYPE(PPCE500MachineState, PPCE500MachineClass, PPCE500_MACHINE)
+
+#endif
diff --git a/hw/ppc/e500plat.c b/hw/ppc/e500plat.c
new file mode 100644
index 000000000..fc911bbb7
--- /dev/null
+++ b/hw/ppc/e500plat.c
@@ -0,0 +1,122 @@
+/*
+ * Generic device-tree-driven paravirt PPC e500 platform
+ *
+ * Copyright 2012 Freescale Semiconductor, Inc.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of  the GNU General  Public License as published by
+ * the Free Software Foundation;  either version 2 of the  License, or
+ * (at your option) any later version.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include "e500.h"
+#include "hw/net/fsl_etsec/etsec.h"
+#include "sysemu/device_tree.h"
+#include "sysemu/kvm.h"
+#include "hw/sysbus.h"
+#include "hw/pci/pci.h"
+#include "hw/ppc/openpic.h"
+#include "kvm_ppc.h"
+
+static void e500plat_fixup_devtree(void *fdt)
+{
+    const char model[] = "QEMU ppce500";
+    const char compatible[] = "fsl,qemu-e500";
+
+    qemu_fdt_setprop(fdt, "/", "model", model, sizeof(model));
+    qemu_fdt_setprop(fdt, "/", "compatible", compatible,
+                     sizeof(compatible));
+}
+
+static void e500plat_init(MachineState *machine)
+{
+    PPCE500MachineClass *pmc = PPCE500_MACHINE_GET_CLASS(machine);
+    /* Older KVM versions don't support EPR which breaks guests when we announce
+       MPIC variants that support EPR. Revert to an older one for those */
+    if (kvm_enabled() && !kvmppc_has_cap_epr()) {
+        pmc->mpic_version = OPENPIC_MODEL_FSL_MPIC_20;
+    }
+
+    ppce500_init(machine);
+}
+
+static void e500plat_machine_device_plug_cb(HotplugHandler *hotplug_dev,
+                                            DeviceState *dev, Error **errp)
+{
+    PPCE500MachineState *pms = PPCE500_MACHINE(hotplug_dev);
+
+    if (pms->pbus_dev) {
+        MachineClass *mc = MACHINE_GET_CLASS(pms);
+
+        if (device_is_dynamic_sysbus(mc, dev)) {
+            platform_bus_link_device(pms->pbus_dev, SYS_BUS_DEVICE(dev));
+        }
+    }
+}
+
+static
+HotplugHandler *e500plat_machine_get_hotpug_handler(MachineState *machine,
+                                                    DeviceState *dev)
+{
+    MachineClass *mc = MACHINE_GET_CLASS(machine);
+
+    if (device_is_dynamic_sysbus(mc, dev)) {
+        return HOTPLUG_HANDLER(machine);
+    }
+
+    return NULL;
+}
+
+#define TYPE_E500PLAT_MACHINE  MACHINE_TYPE_NAME("ppce500")
+
+static void e500plat_machine_class_init(ObjectClass *oc, void *data)
+{
+    PPCE500MachineClass *pmc = PPCE500_MACHINE_CLASS(oc);
+    HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
+    MachineClass *mc = MACHINE_CLASS(oc);
+
+    assert(!mc->get_hotplug_handler);
+    mc->get_hotplug_handler = e500plat_machine_get_hotpug_handler;
+    hc->plug = e500plat_machine_device_plug_cb;
+
+    pmc->pci_first_slot = 0x1;
+    pmc->pci_nr_slots = PCI_SLOT_MAX - 1;
+    pmc->fixup_devtree = e500plat_fixup_devtree;
+    pmc->mpic_version = OPENPIC_MODEL_FSL_MPIC_42;
+    pmc->has_mpc8xxx_gpio = true;
+    pmc->has_platform_bus = true;
+    pmc->platform_bus_base = 0xf00000000ULL;
+    pmc->platform_bus_size = 128 * MiB;
+    pmc->platform_bus_first_irq = 5;
+    pmc->platform_bus_num_irqs = 10;
+    pmc->ccsrbar_base = 0xFE0000000ULL;
+    pmc->pci_pio_base = 0xFE1000000ULL;
+    pmc->pci_mmio_base = 0xC00000000ULL;
+    pmc->pci_mmio_bus_base = 0xE0000000ULL;
+    pmc->spin_base = 0xFEF000000ULL;
+
+    mc->desc = "generic paravirt e500 platform";
+    mc->init = e500plat_init;
+    mc->max_cpus = 32;
+    mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("e500v2_v30");
+    mc->default_ram_id = "mpc8544ds.ram";
+    machine_class_allow_dynamic_sysbus_dev(mc, TYPE_ETSEC_COMMON);
+ }
+
+static const TypeInfo e500plat_info = {
+    .name          = TYPE_E500PLAT_MACHINE,
+    .parent        = TYPE_PPCE500_MACHINE,
+    .class_init    = e500plat_machine_class_init,
+    .interfaces    = (InterfaceInfo[]) {
+         { TYPE_HOTPLUG_HANDLER },
+         { }
+    }
+};
+
+static void e500plat_register_types(void)
+{
+    type_register_static(&e500plat_info);
+}
+type_init(e500plat_register_types)
diff --git a/hw/ppc/fdt.c b/hw/ppc/fdt.c
new file mode 100644
index 000000000..0828ad725
--- /dev/null
+++ b/hw/ppc/fdt.c
@@ -0,0 +1,49 @@
+/*
+ * QEMU PowerPC helper routines for the device tree.
+ *
+ * Copyright (C) 2016 IBM Corp.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "target/ppc/cpu.h"
+#include "target/ppc/mmu-hash64.h"
+
+#include "hw/ppc/fdt.h"
+
+#if defined(TARGET_PPC64)
+size_t ppc_create_page_sizes_prop(PowerPCCPU *cpu, uint32_t *prop,
+                                  size_t maxsize)
+{
+    size_t maxcells = maxsize / sizeof(uint32_t);
+    int i, j, count;
+    uint32_t *p = prop;
+
+    for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
+        PPCHash64SegmentPageSizes *sps = &cpu->hash64_opts->sps[i];
+
+        if (!sps->page_shift) {
+            break;
+        }
+        for (count = 0; count < PPC_PAGE_SIZES_MAX_SZ; count++) {
+            if (sps->enc[count].page_shift == 0) {
+                break;
+            }
+        }
+        if ((p - prop) >= (maxcells - 3 - count * 2)) {
+            break;
+        }
+        *(p++) = cpu_to_be32(sps->page_shift);
+        *(p++) = cpu_to_be32(sps->slb_enc);
+        *(p++) = cpu_to_be32(count);
+        for (j = 0; j < count; j++) {
+            *(p++) = cpu_to_be32(sps->enc[j].page_shift);
+            *(p++) = cpu_to_be32(sps->enc[j].pte_enc);
+        }
+    }
+
+    return (p - prop) * sizeof(uint32_t);
+}
+#endif
diff --git a/hw/ppc/fw_cfg.c b/hw/ppc/fw_cfg.c
new file mode 100644
index 000000000..a88b5c4bd
--- /dev/null
+++ b/hw/ppc/fw_cfg.c
@@ -0,0 +1,45 @@
+/*
+ * fw_cfg helpers (PPC specific)
+ *
+ * Copyright (c) 2019 Red Hat, Inc.
+ *
+ * Author:
+ *   Philippe Mathieu-Daudé <philmd@redhat.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/ppc/ppc.h"
+#include "hw/nvram/fw_cfg.h"
+
+const char *fw_cfg_arch_key_name(uint16_t key)
+{
+    static const struct {
+        uint16_t key;
+        const char *name;
+    } fw_cfg_arch_wellknown_keys[] = {
+        {FW_CFG_PPC_WIDTH, "width"},
+        {FW_CFG_PPC_HEIGHT, "height"},
+        {FW_CFG_PPC_DEPTH, "depth"},
+        {FW_CFG_PPC_TBFREQ, "tbfreq"},
+        {FW_CFG_PPC_CLOCKFREQ, "clockfreq"},
+        {FW_CFG_PPC_IS_KVM, "is_kvm"},
+        {FW_CFG_PPC_KVM_HC, "kvm_hc"},
+        {FW_CFG_PPC_KVM_PID, "pid"},
+        {FW_CFG_PPC_NVRAM_ADDR, "nvram_addr"},
+        {FW_CFG_PPC_BUSFREQ, "busfreq"},
+        {FW_CFG_PPC_NVRAM_FLAT, "nvram_flat"},
+        {FW_CFG_PPC_VIACONFIG, "viaconfig"},
+    };
+
+    for (size_t i = 0; i < ARRAY_SIZE(fw_cfg_arch_wellknown_keys); i++) {
+        if (fw_cfg_arch_wellknown_keys[i].key == key) {
+            return fw_cfg_arch_wellknown_keys[i].name;
+        }
+    }
+    return NULL;
+}
diff --git a/hw/ppc/mac.h b/hw/ppc/mac.h
new file mode 100644
index 000000000..22c840807
--- /dev/null
+++ b/hw/ppc/mac.h
@@ -0,0 +1,108 @@
+/*
+ * QEMU PowerMac emulation shared definitions and prototypes
+ *
+ * Copyright (c) 2004-2007 Fabrice Bellard
+ * Copyright (c) 2007 Jocelyn Mayer
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef PPC_MAC_H
+#define PPC_MAC_H
+
+#include "qemu/units.h"
+#include "exec/memory.h"
+#include "hw/boards.h"
+#include "hw/sysbus.h"
+#include "hw/input/adb.h"
+#include "hw/misc/mos6522.h"
+#include "hw/pci/pci_host.h"
+#include "hw/pci-host/uninorth.h"
+#include "qom/object.h"
+
+/* SMP is not enabled, for now */
+#define MAX_CPUS 1
+
+#define NVRAM_SIZE        0x2000
+#define PROM_FILENAME    "openbios-ppc"
+
+#define KERNEL_LOAD_ADDR 0x01000000
+#define KERNEL_GAP       0x00100000
+
+#define ESCC_CLOCK 3686400
+
+/* Old World IRQs */
+#define OLDWORLD_CUDA_IRQ      0x12
+#define OLDWORLD_ESCCB_IRQ     0x10
+#define OLDWORLD_ESCCA_IRQ     0xf
+#define OLDWORLD_IDE0_IRQ      0xd
+#define OLDWORLD_IDE0_DMA_IRQ  0x2
+#define OLDWORLD_IDE1_IRQ      0xe
+#define OLDWORLD_IDE1_DMA_IRQ  0x3
+
+/* New World IRQs */
+#define NEWWORLD_CUDA_IRQ      0x19
+#define NEWWORLD_PMU_IRQ       0x19
+#define NEWWORLD_ESCCB_IRQ     0x24
+#define NEWWORLD_ESCCA_IRQ     0x25
+#define NEWWORLD_IDE0_IRQ      0xd
+#define NEWWORLD_IDE0_DMA_IRQ  0x2
+#define NEWWORLD_IDE1_IRQ      0xe
+#define NEWWORLD_IDE1_DMA_IRQ  0x3
+#define NEWWORLD_EXTING_GPIO1  0x2f
+#define NEWWORLD_EXTING_GPIO9  0x37
+
+/* Core99 machine */
+#define TYPE_CORE99_MACHINE MACHINE_TYPE_NAME("mac99")
+typedef struct Core99MachineState Core99MachineState;
+DECLARE_INSTANCE_CHECKER(Core99MachineState, CORE99_MACHINE,
+                         TYPE_CORE99_MACHINE)
+
+#define CORE99_VIA_CONFIG_CUDA     0x0
+#define CORE99_VIA_CONFIG_PMU      0x1
+#define CORE99_VIA_CONFIG_PMU_ADB  0x2
+
+struct Core99MachineState {
+    /*< private >*/
+    MachineState parent;
+
+    uint8_t via_config;
+};
+
+/* Grackle PCI */
+#define TYPE_GRACKLE_PCI_HOST_BRIDGE "grackle-pcihost"
+
+/* Mac NVRAM */
+#define TYPE_MACIO_NVRAM "macio-nvram"
+OBJECT_DECLARE_SIMPLE_TYPE(MacIONVRAMState, MACIO_NVRAM)
+
+struct MacIONVRAMState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+
+    uint32_t size;
+    uint32_t it_shift;
+
+    MemoryRegion mem;
+    uint8_t *data;
+};
+
+void pmac_format_nvram_partition (MacIONVRAMState *nvr, int len);
+#endif /* PPC_MAC_H */
diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c
new file mode 100644
index 000000000..7bb7ac399
--- /dev/null
+++ b/hw/ppc/mac_newworld.c
@@ -0,0 +1,663 @@
+/*
+ * QEMU PowerPC CHRP (currently NewWorld PowerMac) hardware System Emulator
+ *
+ * Copyright (c) 2004-2007 Fabrice Bellard
+ * Copyright (c) 2007 Jocelyn Mayer
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ * PCI bus layout on a real G5 (U3 based):
+ *
+ * 0000:f0:0b.0 Host bridge [0600]: Apple Computer Inc. U3 AGP [106b:004b]
+ * 0000:f0:10.0 VGA compatible controller [0300]: ATI Technologies Inc RV350 AP [Radeon 9600] [1002:4150]
+ * 0001:00:00.0 Host bridge [0600]: Apple Computer Inc. CPC945 HT Bridge [106b:004a]
+ * 0001:00:01.0 PCI bridge [0604]: Advanced Micro Devices [AMD] AMD-8131 PCI-X Bridge [1022:7450] (rev 12)
+ * 0001:00:02.0 PCI bridge [0604]: Advanced Micro Devices [AMD] AMD-8131 PCI-X Bridge [1022:7450] (rev 12)
+ * 0001:00:03.0 PCI bridge [0604]: Apple Computer Inc. K2 HT-PCI Bridge [106b:0045]
+ * 0001:00:04.0 PCI bridge [0604]: Apple Computer Inc. K2 HT-PCI Bridge [106b:0046]
+ * 0001:00:05.0 PCI bridge [0604]: Apple Computer Inc. K2 HT-PCI Bridge [106b:0047]
+ * 0001:00:06.0 PCI bridge [0604]: Apple Computer Inc. K2 HT-PCI Bridge [106b:0048]
+ * 0001:00:07.0 PCI bridge [0604]: Apple Computer Inc. K2 HT-PCI Bridge [106b:0049]
+ * 0001:01:07.0 Class [ff00]: Apple Computer Inc. K2 KeyLargo Mac/IO [106b:0041] (rev 20)
+ * 0001:01:08.0 USB Controller [0c03]: Apple Computer Inc. K2 KeyLargo USB [106b:0040]
+ * 0001:01:09.0 USB Controller [0c03]: Apple Computer Inc. K2 KeyLargo USB [106b:0040]
+ * 0001:02:0b.0 USB Controller [0c03]: NEC Corporation USB [1033:0035] (rev 43)
+ * 0001:02:0b.1 USB Controller [0c03]: NEC Corporation USB [1033:0035] (rev 43)
+ * 0001:02:0b.2 USB Controller [0c03]: NEC Corporation USB 2.0 [1033:00e0] (rev 04)
+ * 0001:03:0d.0 Class [ff00]: Apple Computer Inc. K2 ATA/100 [106b:0043]
+ * 0001:03:0e.0 FireWire (IEEE 1394) [0c00]: Apple Computer Inc. K2 FireWire [106b:0042]
+ * 0001:04:0f.0 Ethernet controller [0200]: Apple Computer Inc. K2 GMAC (Sun GEM) [106b:004c]
+ * 0001:05:0c.0 IDE interface [0101]: Broadcom K2 SATA [1166:0240]
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/datadir.h"
+#include "qapi/error.h"
+#include "hw/ppc/ppc.h"
+#include "hw/qdev-properties.h"
+#include "hw/ppc/mac.h"
+#include "hw/input/adb.h"
+#include "hw/ppc/mac_dbdma.h"
+#include "hw/pci/pci.h"
+#include "net/net.h"
+#include "sysemu/sysemu.h"
+#include "hw/nvram/fw_cfg.h"
+#include "hw/char/escc.h"
+#include "hw/misc/macio/macio.h"
+#include "hw/ppc/openpic.h"
+#include "hw/loader.h"
+#include "hw/fw-path-provider.h"
+#include "elf.h"
+#include "qemu/error-report.h"
+#include "sysemu/kvm.h"
+#include "sysemu/reset.h"
+#include "kvm_ppc.h"
+#include "hw/usb.h"
+#include "hw/sysbus.h"
+#include "trace.h"
+
+#define MAX_IDE_BUS 2
+#define CFG_ADDR 0xf0000510
+#define TBFREQ (100UL * 1000UL * 1000UL)
+#define CLOCKFREQ (900UL * 1000UL * 1000UL)
+#define BUSFREQ (100UL * 1000UL * 1000UL)
+
+#define NDRV_VGA_FILENAME "qemu_vga.ndrv"
+
+#define PROM_BASE 0xfff00000
+#define PROM_SIZE (1 * MiB)
+
+static void fw_cfg_boot_set(void *opaque, const char *boot_device,
+                            Error **errp)
+{
+    fw_cfg_modify_i16(opaque, FW_CFG_BOOT_DEVICE, boot_device[0]);
+}
+
+static uint64_t translate_kernel_address(void *opaque, uint64_t addr)
+{
+    return (addr & 0x0fffffff) + KERNEL_LOAD_ADDR;
+}
+
+static void ppc_core99_reset(void *opaque)
+{
+    PowerPCCPU *cpu = opaque;
+
+    cpu_reset(CPU(cpu));
+    /* 970 CPUs want to get their initial IP as part of their boot protocol */
+    cpu->env.nip = PROM_BASE + 0x100;
+}
+
+/* PowerPC Mac99 hardware initialisation */
+static void ppc_core99_init(MachineState *machine)
+{
+    ram_addr_t ram_size = machine->ram_size;
+    const char *bios_name = machine->firmware ?: PROM_FILENAME;
+    const char *kernel_filename = machine->kernel_filename;
+    const char *kernel_cmdline = machine->kernel_cmdline;
+    const char *initrd_filename = machine->initrd_filename;
+    const char *boot_device = machine->boot_order;
+    Core99MachineState *core99_machine = CORE99_MACHINE(machine);
+    PowerPCCPU *cpu = NULL;
+    CPUPPCState *env = NULL;
+    char *filename;
+    IrqLines *openpic_irqs;
+    int linux_boot, i, j, k;
+    MemoryRegion *bios = g_new(MemoryRegion, 1);
+    hwaddr kernel_base, initrd_base, cmdline_base = 0;
+    long kernel_size, initrd_size;
+    UNINHostState *uninorth_pci;
+    PCIBus *pci_bus;
+    PCIDevice *macio;
+    ESCCState *escc;
+    bool has_pmu, has_adb;
+    MACIOIDEState *macio_ide;
+    BusState *adb_bus;
+    MacIONVRAMState *nvr;
+    int bios_size;
+    int ppc_boot_device;
+    DriveInfo *hd[MAX_IDE_BUS * MAX_IDE_DEVS];
+    void *fw_cfg;
+    int machine_arch;
+    SysBusDevice *s;
+    DeviceState *dev, *pic_dev;
+    DeviceState *uninorth_internal_dev = NULL, *uninorth_agp_dev = NULL;
+    hwaddr nvram_addr = 0xFFF04000;
+    uint64_t tbfreq;
+    unsigned int smp_cpus = machine->smp.cpus;
+
+    linux_boot = (kernel_filename != NULL);
+
+    /* init CPUs */
+    for (i = 0; i < smp_cpus; i++) {
+        cpu = POWERPC_CPU(cpu_create(machine->cpu_type));
+        env = &cpu->env;
+
+        /* Set time-base frequency to 100 Mhz */
+        cpu_ppc_tb_init(env, TBFREQ);
+        qemu_register_reset(ppc_core99_reset, cpu);
+    }
+
+    /* allocate RAM */
+    if (machine->ram_size > 2 * GiB) {
+        error_report("RAM size more than 2 GiB is not supported");
+        exit(1);
+    }
+    memory_region_add_subregion(get_system_memory(), 0, machine->ram);
+
+    /* allocate and load firmware ROM */
+    memory_region_init_rom(bios, NULL, "ppc_core99.bios", PROM_SIZE,
+                           &error_fatal);
+    memory_region_add_subregion(get_system_memory(), PROM_BASE, bios);
+
+    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
+    if (filename) {
+        /* Load OpenBIOS (ELF) */
+        bios_size = load_elf(filename, NULL, NULL, NULL, NULL,
+                             NULL, NULL, NULL, 1, PPC_ELF_MACHINE, 0, 0);
+
+        if (bios_size <= 0) {
+            /* or load binary ROM image */
+            bios_size = load_image_targphys(filename, PROM_BASE, PROM_SIZE);
+        }
+        g_free(filename);
+    } else {
+        bios_size = -1;
+    }
+    if (bios_size < 0 || bios_size > PROM_SIZE) {
+        error_report("could not load PowerPC bios '%s'", bios_name);
+        exit(1);
+    }
+
+    if (linux_boot) {
+        int bswap_needed;
+
+#ifdef BSWAP_NEEDED
+        bswap_needed = 1;
+#else
+        bswap_needed = 0;
+#endif
+        kernel_base = KERNEL_LOAD_ADDR;
+
+        kernel_size = load_elf(kernel_filename, NULL,
+                               translate_kernel_address, NULL, NULL, NULL,
+                               NULL, NULL, 1, PPC_ELF_MACHINE, 0, 0);
+        if (kernel_size < 0)
+            kernel_size = load_aout(kernel_filename, kernel_base,
+                                    ram_size - kernel_base, bswap_needed,
+                                    TARGET_PAGE_SIZE);
+        if (kernel_size < 0)
+            kernel_size = load_image_targphys(kernel_filename,
+                                              kernel_base,
+                                              ram_size - kernel_base);
+        if (kernel_size < 0) {
+            error_report("could not load kernel '%s'", kernel_filename);
+            exit(1);
+        }
+        /* load initrd */
+        if (initrd_filename) {
+            initrd_base = TARGET_PAGE_ALIGN(kernel_base + kernel_size + KERNEL_GAP);
+            initrd_size = load_image_targphys(initrd_filename, initrd_base,
+                                              ram_size - initrd_base);
+            if (initrd_size < 0) {
+                error_report("could not load initial ram disk '%s'",
+                             initrd_filename);
+                exit(1);
+            }
+            cmdline_base = TARGET_PAGE_ALIGN(initrd_base + initrd_size);
+        } else {
+            initrd_base = 0;
+            initrd_size = 0;
+            cmdline_base = TARGET_PAGE_ALIGN(kernel_base + kernel_size + KERNEL_GAP);
+        }
+        ppc_boot_device = 'm';
+    } else {
+        kernel_base = 0;
+        kernel_size = 0;
+        initrd_base = 0;
+        initrd_size = 0;
+        ppc_boot_device = '\0';
+        /* We consider that NewWorld PowerMac never have any floppy drive
+         * For now, OHW cannot boot from the network.
+         */
+        for (i = 0; boot_device[i] != '\0'; i++) {
+            if (boot_device[i] >= 'c' && boot_device[i] <= 'f') {
+                ppc_boot_device = boot_device[i];
+                break;
+            }
+        }
+        if (ppc_boot_device == '\0') {
+            error_report("No valid boot device for Mac99 machine");
+            exit(1);
+        }
+    }
+
+    /* UniN init */
+    dev = qdev_new(TYPE_UNI_NORTH);
+    s = SYS_BUS_DEVICE(dev);
+    sysbus_realize_and_unref(s, &error_fatal);
+    memory_region_add_subregion(get_system_memory(), 0xf8000000,
+                                sysbus_mmio_get_region(s, 0));
+
+    openpic_irqs = g_new0(IrqLines, smp_cpus);
+    for (i = 0; i < smp_cpus; i++) {
+        /* Mac99 IRQ connection between OpenPIC outputs pins
+         * and PowerPC input pins
+         */
+        switch (PPC_INPUT(env)) {
+        case PPC_FLAGS_INPUT_6xx:
+            openpic_irqs[i].irq[OPENPIC_OUTPUT_INT] =
+                ((qemu_irq *)env->irq_inputs)[PPC6xx_INPUT_INT];
+            openpic_irqs[i].irq[OPENPIC_OUTPUT_CINT] =
+                ((qemu_irq *)env->irq_inputs)[PPC6xx_INPUT_INT];
+            openpic_irqs[i].irq[OPENPIC_OUTPUT_MCK] =
+                ((qemu_irq *)env->irq_inputs)[PPC6xx_INPUT_MCP];
+            /* Not connected ? */
+            openpic_irqs[i].irq[OPENPIC_OUTPUT_DEBUG] = NULL;
+            /* Check this */
+            openpic_irqs[i].irq[OPENPIC_OUTPUT_RESET] =
+                ((qemu_irq *)env->irq_inputs)[PPC6xx_INPUT_HRESET];
+            break;
+#if defined(TARGET_PPC64)
+        case PPC_FLAGS_INPUT_970:
+            openpic_irqs[i].irq[OPENPIC_OUTPUT_INT] =
+                ((qemu_irq *)env->irq_inputs)[PPC970_INPUT_INT];
+            openpic_irqs[i].irq[OPENPIC_OUTPUT_CINT] =
+                ((qemu_irq *)env->irq_inputs)[PPC970_INPUT_INT];
+            openpic_irqs[i].irq[OPENPIC_OUTPUT_MCK] =
+                ((qemu_irq *)env->irq_inputs)[PPC970_INPUT_MCP];
+            /* Not connected ? */
+            openpic_irqs[i].irq[OPENPIC_OUTPUT_DEBUG] = NULL;
+            /* Check this */
+            openpic_irqs[i].irq[OPENPIC_OUTPUT_RESET] =
+                ((qemu_irq *)env->irq_inputs)[PPC970_INPUT_HRESET];
+            break;
+#endif /* defined(TARGET_PPC64) */
+        default:
+            error_report("Bus model not supported on mac99 machine");
+            exit(1);
+        }
+    }
+
+    if (PPC_INPUT(env) == PPC_FLAGS_INPUT_970) {
+        /* 970 gets a U3 bus */
+        /* Uninorth AGP bus */
+        dev = qdev_new(TYPE_U3_AGP_HOST_BRIDGE);
+        sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+        uninorth_pci = U3_AGP_HOST_BRIDGE(dev);
+        s = SYS_BUS_DEVICE(dev);
+        /* PCI hole */
+        memory_region_add_subregion(get_system_memory(), 0x80000000ULL,
+                                    sysbus_mmio_get_region(s, 2));
+        /* Register 8 MB of ISA IO space */
+        memory_region_add_subregion(get_system_memory(), 0xf2000000,
+                                    sysbus_mmio_get_region(s, 3));
+        sysbus_mmio_map(s, 0, 0xf0800000);
+        sysbus_mmio_map(s, 1, 0xf0c00000);
+
+        machine_arch = ARCH_MAC99_U3;
+    } else {
+        /* Use values found on a real PowerMac */
+        /* Uninorth AGP bus */
+        uninorth_agp_dev = qdev_new(TYPE_UNI_NORTH_AGP_HOST_BRIDGE);
+        s = SYS_BUS_DEVICE(uninorth_agp_dev);
+        sysbus_realize_and_unref(s, &error_fatal);
+        sysbus_mmio_map(s, 0, 0xf0800000);
+        sysbus_mmio_map(s, 1, 0xf0c00000);
+
+        /* Uninorth internal bus */
+        uninorth_internal_dev = qdev_new(
+                                TYPE_UNI_NORTH_INTERNAL_PCI_HOST_BRIDGE);
+        s = SYS_BUS_DEVICE(uninorth_internal_dev);
+        sysbus_realize_and_unref(s, &error_fatal);
+        sysbus_mmio_map(s, 0, 0xf4800000);
+        sysbus_mmio_map(s, 1, 0xf4c00000);
+
+        /* Uninorth main bus */
+        dev = qdev_new(TYPE_UNI_NORTH_PCI_HOST_BRIDGE);
+        qdev_prop_set_uint32(dev, "ofw-addr", 0xf2000000);
+        sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+        uninorth_pci = UNI_NORTH_PCI_HOST_BRIDGE(dev);
+        s = SYS_BUS_DEVICE(dev);
+        /* PCI hole */
+        memory_region_add_subregion(get_system_memory(), 0x80000000ULL,
+                                    sysbus_mmio_get_region(s, 2));
+        /* Register 8 MB of ISA IO space */
+        memory_region_add_subregion(get_system_memory(), 0xf2000000,
+                                    sysbus_mmio_get_region(s, 3));
+        sysbus_mmio_map(s, 0, 0xf2800000);
+        sysbus_mmio_map(s, 1, 0xf2c00000);
+
+        machine_arch = ARCH_MAC99;
+    }
+
+    machine->usb |= defaults_enabled() && !machine->usb_disabled;
+    has_pmu = (core99_machine->via_config != CORE99_VIA_CONFIG_CUDA);
+    has_adb = (core99_machine->via_config == CORE99_VIA_CONFIG_CUDA ||
+               core99_machine->via_config == CORE99_VIA_CONFIG_PMU_ADB);
+
+    /* Timebase Frequency */
+    if (kvm_enabled()) {
+        tbfreq = kvmppc_get_tbfreq();
+    } else {
+        tbfreq = TBFREQ;
+    }
+
+    /* init basic PC hardware */
+    pci_bus = PCI_HOST_BRIDGE(uninorth_pci)->bus;
+
+    /* MacIO */
+    macio = pci_new(-1, TYPE_NEWWORLD_MACIO);
+    dev = DEVICE(macio);
+    qdev_prop_set_uint64(dev, "frequency", tbfreq);
+    qdev_prop_set_bit(dev, "has-pmu", has_pmu);
+    qdev_prop_set_bit(dev, "has-adb", has_adb);
+
+    escc = ESCC(object_resolve_path_component(OBJECT(macio), "escc"));
+    qdev_prop_set_chr(DEVICE(escc), "chrA", serial_hd(0));
+    qdev_prop_set_chr(DEVICE(escc), "chrB", serial_hd(1));
+
+    pci_realize_and_unref(macio, pci_bus, &error_fatal);
+
+    pic_dev = DEVICE(object_resolve_path_component(OBJECT(macio), "pic"));
+    for (i = 0; i < 4; i++) {
+        qdev_connect_gpio_out(DEVICE(uninorth_pci), i,
+                              qdev_get_gpio_in(pic_dev, 0x1b + i));
+    }
+
+    /* TODO: additional PCI buses only wired up for 32-bit machines */
+    if (PPC_INPUT(env) != PPC_FLAGS_INPUT_970) {
+        /* Uninorth AGP bus */
+        for (i = 0; i < 4; i++) {
+            qdev_connect_gpio_out(uninorth_agp_dev, i,
+                                  qdev_get_gpio_in(pic_dev, 0x1b + i));
+        }
+
+        /* Uninorth internal bus */
+        for (i = 0; i < 4; i++) {
+            qdev_connect_gpio_out(uninorth_internal_dev, i,
+                                  qdev_get_gpio_in(pic_dev, 0x1b + i));
+        }
+    }
+
+    /* OpenPIC */
+    s = SYS_BUS_DEVICE(pic_dev);
+    k = 0;
+    for (i = 0; i < smp_cpus; i++) {
+        for (j = 0; j < OPENPIC_OUTPUT_NB; j++) {
+            sysbus_connect_irq(s, k++, openpic_irqs[i].irq[j]);
+        }
+    }
+    g_free(openpic_irqs);
+
+    /* We only emulate 2 out of 3 IDE controllers for now */
+    ide_drive_get(hd, ARRAY_SIZE(hd));
+
+    macio_ide = MACIO_IDE(object_resolve_path_component(OBJECT(macio),
+                                                        "ide[0]"));
+    macio_ide_init_drives(macio_ide, hd);
+
+    macio_ide = MACIO_IDE(object_resolve_path_component(OBJECT(macio),
+                                                        "ide[1]"));
+    macio_ide_init_drives(macio_ide, &hd[MAX_IDE_DEVS]);
+
+    if (has_adb) {
+        if (has_pmu) {
+            dev = DEVICE(object_resolve_path_component(OBJECT(macio), "pmu"));
+        } else {
+            dev = DEVICE(object_resolve_path_component(OBJECT(macio), "cuda"));
+        }
+
+        adb_bus = qdev_get_child_bus(dev, "adb.0");
+        dev = qdev_new(TYPE_ADB_KEYBOARD);
+        qdev_realize_and_unref(dev, adb_bus, &error_fatal);
+
+        dev = qdev_new(TYPE_ADB_MOUSE);
+        qdev_realize_and_unref(dev, adb_bus, &error_fatal);
+    }
+
+    if (machine->usb) {
+        pci_create_simple(pci_bus, -1, "pci-ohci");
+
+        /* U3 needs to use USB for input because Linux doesn't support via-cuda
+        on PPC64 */
+        if (!has_adb || machine_arch == ARCH_MAC99_U3) {
+            USBBus *usb_bus = usb_bus_find(-1);
+
+            usb_create_simple(usb_bus, "usb-kbd");
+            usb_create_simple(usb_bus, "usb-mouse");
+        }
+    }
+
+    pci_vga_init(pci_bus);
+
+    if (graphic_depth != 15 && graphic_depth != 32 && graphic_depth != 8) {
+        graphic_depth = 15;
+    }
+
+    for (i = 0; i < nb_nics; i++) {
+        pci_nic_init_nofail(&nd_table[i], pci_bus, "sungem", NULL);
+    }
+
+    /* The NewWorld NVRAM is not located in the MacIO device */
+    if (kvm_enabled() && qemu_real_host_page_size > 4096) {
+        /* We can't combine read-write and read-only in a single page, so
+           move the NVRAM out of ROM again for KVM */
+        nvram_addr = 0xFFE00000;
+    }
+    dev = qdev_new(TYPE_MACIO_NVRAM);
+    qdev_prop_set_uint32(dev, "size", 0x2000);
+    qdev_prop_set_uint32(dev, "it_shift", 1);
+    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, nvram_addr);
+    nvr = MACIO_NVRAM(dev);
+    pmac_format_nvram_partition(nvr, 0x2000);
+    /* No PCI init: the BIOS will do it */
+
+    dev = qdev_new(TYPE_FW_CFG_MEM);
+    fw_cfg = FW_CFG(dev);
+    qdev_prop_set_uint32(dev, "data_width", 1);
+    qdev_prop_set_bit(dev, "dma_enabled", false);
+    object_property_add_child(OBJECT(qdev_get_machine()), TYPE_FW_CFG,
+                              OBJECT(fw_cfg));
+    s = SYS_BUS_DEVICE(dev);
+    sysbus_realize_and_unref(s, &error_fatal);
+    sysbus_mmio_map(s, 0, CFG_ADDR);
+    sysbus_mmio_map(s, 1, CFG_ADDR + 2);
+
+    fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, (uint16_t)smp_cpus);
+    fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)machine->smp.max_cpus);
+    fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size);
+    fw_cfg_add_i16(fw_cfg, FW_CFG_MACHINE_ID, machine_arch);
+    fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, kernel_base);
+    fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size);
+    if (kernel_cmdline) {
+        fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_CMDLINE, cmdline_base);
+        pstrcpy_targphys("cmdline", cmdline_base, TARGET_PAGE_SIZE, kernel_cmdline);
+    } else {
+        fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_CMDLINE, 0);
+    }
+    fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_base);
+    fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size);
+    fw_cfg_add_i16(fw_cfg, FW_CFG_BOOT_DEVICE, ppc_boot_device);
+
+    fw_cfg_add_i16(fw_cfg, FW_CFG_PPC_WIDTH, graphic_width);
+    fw_cfg_add_i16(fw_cfg, FW_CFG_PPC_HEIGHT, graphic_height);
+    fw_cfg_add_i16(fw_cfg, FW_CFG_PPC_DEPTH, graphic_depth);
+
+    fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_VIACONFIG, core99_machine->via_config);
+
+    fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_IS_KVM, kvm_enabled());
+    if (kvm_enabled()) {
+        uint8_t *hypercall;
+
+        hypercall = g_malloc(16);
+        kvmppc_get_hypercall(env, hypercall, 16);
+        fw_cfg_add_bytes(fw_cfg, FW_CFG_PPC_KVM_HC, hypercall, 16);
+        fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_KVM_PID, getpid());
+    }
+    fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, tbfreq);
+    /* Mac OS X requires a "known good" clock-frequency value; pass it one. */
+    fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_CLOCKFREQ, CLOCKFREQ);
+    fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_BUSFREQ, BUSFREQ);
+    fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_NVRAM_ADDR, nvram_addr);
+
+    /* MacOS NDRV VGA driver */
+    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, NDRV_VGA_FILENAME);
+    if (filename) {
+        gchar *ndrv_file;
+        gsize ndrv_size;
+
+        if (g_file_get_contents(filename, &ndrv_file, &ndrv_size, NULL)) {
+            fw_cfg_add_file(fw_cfg, "ndrv/qemu_vga.ndrv", ndrv_file, ndrv_size);
+        }
+        g_free(filename);
+    }
+
+    qemu_register_boot_set(fw_cfg_boot_set, fw_cfg);
+}
+
+/*
+ * Implementation of an interface to adjust firmware path
+ * for the bootindex property handling.
+ */
+static char *core99_fw_dev_path(FWPathProvider *p, BusState *bus,
+                                DeviceState *dev)
+{
+    PCIDevice *pci;
+    MACIOIDEState *macio_ide;
+
+    if (!strcmp(object_get_typename(OBJECT(dev)), "macio-newworld")) {
+        pci = PCI_DEVICE(dev);
+        return g_strdup_printf("mac-io@%x", PCI_SLOT(pci->devfn));
+    }
+
+    if (!strcmp(object_get_typename(OBJECT(dev)), "macio-ide")) {
+        macio_ide = MACIO_IDE(dev);
+        return g_strdup_printf("ata-3@%x", macio_ide->addr);
+    }
+
+    if (!strcmp(object_get_typename(OBJECT(dev)), "ide-hd")) {
+        return g_strdup("disk");
+    }
+
+    if (!strcmp(object_get_typename(OBJECT(dev)), "ide-cd")) {
+        return g_strdup("cdrom");
+    }
+
+    if (!strcmp(object_get_typename(OBJECT(dev)), "virtio-blk-device")) {
+        return g_strdup("disk");
+    }
+
+    return NULL;
+}
+static int core99_kvm_type(MachineState *machine, const char *arg)
+{
+    /* Always force PR KVM */
+    return 2;
+}
+
+static void core99_machine_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+    FWPathProviderClass *fwc = FW_PATH_PROVIDER_CLASS(oc);
+
+    mc->desc = "Mac99 based PowerMAC";
+    mc->init = ppc_core99_init;
+    mc->block_default_type = IF_IDE;
+    mc->max_cpus = MAX_CPUS;
+    mc->default_boot_order = "cd";
+    mc->default_display = "std";
+    mc->kvm_type = core99_kvm_type;
+#ifdef TARGET_PPC64
+    mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("970fx_v3.1");
+#else
+    mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("7400_v2.9");
+#endif
+    mc->default_ram_id = "ppc_core99.ram";
+    mc->ignore_boot_device_suffixes = true;
+    fwc->get_dev_path = core99_fw_dev_path;
+}
+
+static char *core99_get_via_config(Object *obj, Error **errp)
+{
+    Core99MachineState *cms = CORE99_MACHINE(obj);
+
+    switch (cms->via_config) {
+    default:
+    case CORE99_VIA_CONFIG_CUDA:
+        return g_strdup("cuda");
+
+    case CORE99_VIA_CONFIG_PMU:
+        return g_strdup("pmu");
+
+    case CORE99_VIA_CONFIG_PMU_ADB:
+        return g_strdup("pmu-adb");
+    }
+}
+
+static void core99_set_via_config(Object *obj, const char *value, Error **errp)
+{
+    Core99MachineState *cms = CORE99_MACHINE(obj);
+
+    if (!strcmp(value, "cuda")) {
+        cms->via_config = CORE99_VIA_CONFIG_CUDA;
+    } else if (!strcmp(value, "pmu")) {
+        cms->via_config = CORE99_VIA_CONFIG_PMU;
+    } else if (!strcmp(value, "pmu-adb")) {
+        cms->via_config = CORE99_VIA_CONFIG_PMU_ADB;
+    } else {
+        error_setg(errp, "Invalid via value");
+        error_append_hint(errp, "Valid values are cuda, pmu, pmu-adb.\n");
+    }
+}
+
+static void core99_instance_init(Object *obj)
+{
+    Core99MachineState *cms = CORE99_MACHINE(obj);
+
+    /* Default via_config is CORE99_VIA_CONFIG_CUDA */
+    cms->via_config = CORE99_VIA_CONFIG_CUDA;
+    object_property_add_str(obj, "via", core99_get_via_config,
+                            core99_set_via_config);
+    object_property_set_description(obj, "via",
+                                    "Set VIA configuration. "
+                                    "Valid values are cuda, pmu and pmu-adb");
+
+    return;
+}
+
+static const TypeInfo core99_machine_info = {
+    .name          = MACHINE_TYPE_NAME("mac99"),
+    .parent        = TYPE_MACHINE,
+    .class_init    = core99_machine_class_init,
+    .instance_init = core99_instance_init,
+    .instance_size = sizeof(Core99MachineState),
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_FW_PATH_PROVIDER },
+        { }
+    },
+};
+
+static void mac_machine_register_types(void)
+{
+    type_register_static(&core99_machine_info);
+}
+
+type_init(mac_machine_register_types)
diff --git a/hw/ppc/mac_oldworld.c b/hw/ppc/mac_oldworld.c
new file mode 100644
index 000000000..de2be960e
--- /dev/null
+++ b/hw/ppc/mac_oldworld.c
@@ -0,0 +1,455 @@
+
+/*
+ * QEMU OldWorld PowerMac (currently ~G3 Beige) hardware System Emulator
+ *
+ * Copyright (c) 2004-2007 Fabrice Bellard
+ * Copyright (c) 2007 Jocelyn Mayer
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/datadir.h"
+#include "qemu/units.h"
+#include "qapi/error.h"
+#include "hw/ppc/ppc.h"
+#include "hw/qdev-properties.h"
+#include "mac.h"
+#include "hw/input/adb.h"
+#include "sysemu/sysemu.h"
+#include "net/net.h"
+#include "hw/isa/isa.h"
+#include "hw/pci/pci.h"
+#include "hw/pci/pci_host.h"
+#include "hw/nvram/fw_cfg.h"
+#include "hw/char/escc.h"
+#include "hw/misc/macio/macio.h"
+#include "hw/loader.h"
+#include "hw/fw-path-provider.h"
+#include "elf.h"
+#include "qemu/error-report.h"
+#include "sysemu/kvm.h"
+#include "sysemu/reset.h"
+#include "kvm_ppc.h"
+
+#define MAX_IDE_BUS 2
+#define CFG_ADDR 0xf0000510
+#define TBFREQ 16600000UL
+#define CLOCKFREQ 266000000UL
+#define BUSFREQ 66000000UL
+
+#define NDRV_VGA_FILENAME "qemu_vga.ndrv"
+
+#define GRACKLE_BASE 0xfec00000
+#define PROM_BASE 0xffc00000
+#define PROM_SIZE (4 * MiB)
+
+static void fw_cfg_boot_set(void *opaque, const char *boot_device,
+                            Error **errp)
+{
+    fw_cfg_modify_i16(opaque, FW_CFG_BOOT_DEVICE, boot_device[0]);
+}
+
+static uint64_t translate_kernel_address(void *opaque, uint64_t addr)
+{
+    return (addr & 0x0fffffff) + KERNEL_LOAD_ADDR;
+}
+
+static void ppc_heathrow_reset(void *opaque)
+{
+    PowerPCCPU *cpu = opaque;
+
+    cpu_reset(CPU(cpu));
+}
+
+static void ppc_heathrow_init(MachineState *machine)
+{
+    ram_addr_t ram_size = machine->ram_size;
+    const char *bios_name = machine->firmware ?: PROM_FILENAME;
+    const char *boot_device = machine->boot_order;
+    PowerPCCPU *cpu = NULL;
+    CPUPPCState *env = NULL;
+    char *filename;
+    int i;
+    MemoryRegion *bios = g_new(MemoryRegion, 1);
+    uint32_t kernel_base, initrd_base, cmdline_base = 0;
+    int32_t kernel_size, initrd_size;
+    PCIBus *pci_bus;
+    PCIDevice *macio;
+    MACIOIDEState *macio_ide;
+    ESCCState *escc;
+    SysBusDevice *s;
+    DeviceState *dev, *pic_dev, *grackle_dev;
+    BusState *adb_bus;
+    uint64_t bios_addr;
+    int bios_size;
+    unsigned int smp_cpus = machine->smp.cpus;
+    uint16_t ppc_boot_device;
+    DriveInfo *hd[MAX_IDE_BUS * MAX_IDE_DEVS];
+    void *fw_cfg;
+    uint64_t tbfreq;
+
+    /* init CPUs */
+    for (i = 0; i < smp_cpus; i++) {
+        cpu = POWERPC_CPU(cpu_create(machine->cpu_type));
+        env = &cpu->env;
+
+        /* Set time-base frequency to 16.6 Mhz */
+        cpu_ppc_tb_init(env,  TBFREQ);
+        qemu_register_reset(ppc_heathrow_reset, cpu);
+    }
+
+    /* allocate RAM */
+    if (ram_size > 2047 * MiB) {
+        error_report("Too much memory for this machine: %" PRId64 " MB, "
+                     "maximum 2047 MB", ram_size / MiB);
+        exit(1);
+    }
+
+    memory_region_add_subregion(get_system_memory(), 0, machine->ram);
+
+    /* allocate and load firmware ROM */
+    memory_region_init_rom(bios, NULL, "ppc_heathrow.bios", PROM_SIZE,
+                           &error_fatal);
+    memory_region_add_subregion(get_system_memory(), PROM_BASE, bios);
+
+    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
+    if (filename) {
+        /* Load OpenBIOS (ELF) */
+        bios_size = load_elf(filename, NULL, NULL, NULL, NULL, &bios_addr,
+                             NULL, NULL, 1, PPC_ELF_MACHINE, 0, 0);
+        /* Unfortunately, load_elf sign-extends reading elf32 */
+        bios_addr = (uint32_t)bios_addr;
+
+        if (bios_size <= 0) {
+            /* or if could not load ELF try loading a binary ROM image */
+            bios_size = load_image_targphys(filename, PROM_BASE, PROM_SIZE);
+            bios_addr = PROM_BASE;
+        }
+        g_free(filename);
+    } else {
+        bios_size = -1;
+    }
+    if (bios_size < 0 || bios_addr - PROM_BASE + bios_size > PROM_SIZE) {
+        error_report("could not load PowerPC bios '%s'", bios_name);
+        exit(1);
+    }
+
+    if (machine->kernel_filename) {
+        int bswap_needed;
+
+#ifdef BSWAP_NEEDED
+        bswap_needed = 1;
+#else
+        bswap_needed = 0;
+#endif
+        kernel_base = KERNEL_LOAD_ADDR;
+        kernel_size = load_elf(machine->kernel_filename, NULL,
+                               translate_kernel_address, NULL, NULL, NULL,
+                               NULL, NULL, 1, PPC_ELF_MACHINE, 0, 0);
+        if (kernel_size < 0)
+            kernel_size = load_aout(machine->kernel_filename, kernel_base,
+                                    ram_size - kernel_base, bswap_needed,
+                                    TARGET_PAGE_SIZE);
+        if (kernel_size < 0)
+            kernel_size = load_image_targphys(machine->kernel_filename,
+                                              kernel_base,
+                                              ram_size - kernel_base);
+        if (kernel_size < 0) {
+            error_report("could not load kernel '%s'",
+                         machine->kernel_filename);
+            exit(1);
+        }
+        /* load initrd */
+        if (machine->initrd_filename) {
+            initrd_base = TARGET_PAGE_ALIGN(kernel_base + kernel_size +
+                                            KERNEL_GAP);
+            initrd_size = load_image_targphys(machine->initrd_filename,
+                                              initrd_base,
+                                              ram_size - initrd_base);
+            if (initrd_size < 0) {
+                error_report("could not load initial ram disk '%s'",
+                             machine->initrd_filename);
+                exit(1);
+            }
+            cmdline_base = TARGET_PAGE_ALIGN(initrd_base + initrd_size);
+        } else {
+            initrd_base = 0;
+            initrd_size = 0;
+            cmdline_base = TARGET_PAGE_ALIGN(kernel_base + kernel_size + KERNEL_GAP);
+        }
+        ppc_boot_device = 'm';
+    } else {
+        kernel_base = 0;
+        kernel_size = 0;
+        initrd_base = 0;
+        initrd_size = 0;
+        ppc_boot_device = '\0';
+        for (i = 0; boot_device[i] != '\0'; i++) {
+            /* TOFIX: for now, the second IDE channel is not properly
+             *        used by OHW. The Mac floppy disk are not emulated.
+             *        For now, OHW cannot boot from the network.
+             */
+#if 0
+            if (boot_device[i] >= 'a' && boot_device[i] <= 'f') {
+                ppc_boot_device = boot_device[i];
+                break;
+            }
+#else
+            if (boot_device[i] >= 'c' && boot_device[i] <= 'd') {
+                ppc_boot_device = boot_device[i];
+                break;
+            }
+#endif
+        }
+        if (ppc_boot_device == '\0') {
+            error_report("No valid boot device for G3 Beige machine");
+            exit(1);
+        }
+    }
+
+    /* Timebase Frequency */
+    if (kvm_enabled()) {
+        tbfreq = kvmppc_get_tbfreq();
+    } else {
+        tbfreq = TBFREQ;
+    }
+
+    /* Grackle PCI host bridge */
+    grackle_dev = qdev_new(TYPE_GRACKLE_PCI_HOST_BRIDGE);
+    qdev_prop_set_uint32(grackle_dev, "ofw-addr", 0x80000000);
+    s = SYS_BUS_DEVICE(grackle_dev);
+    sysbus_realize_and_unref(s, &error_fatal);
+
+    sysbus_mmio_map(s, 0, GRACKLE_BASE);
+    sysbus_mmio_map(s, 1, GRACKLE_BASE + 0x200000);
+    /* PCI hole */
+    memory_region_add_subregion(get_system_memory(), 0x80000000ULL,
+                                sysbus_mmio_get_region(s, 2));
+    /* Register 2 MB of ISA IO space */
+    memory_region_add_subregion(get_system_memory(), 0xfe000000,
+                                sysbus_mmio_get_region(s, 3));
+
+    pci_bus = PCI_HOST_BRIDGE(grackle_dev)->bus;
+
+    /* MacIO */
+    macio = pci_new(PCI_DEVFN(16, 0), TYPE_OLDWORLD_MACIO);
+    dev = DEVICE(macio);
+    qdev_prop_set_uint64(dev, "frequency", tbfreq);
+
+    escc = ESCC(object_resolve_path_component(OBJECT(macio), "escc"));
+    qdev_prop_set_chr(DEVICE(escc), "chrA", serial_hd(0));
+    qdev_prop_set_chr(DEVICE(escc), "chrB", serial_hd(1));
+
+    pci_realize_and_unref(macio, pci_bus, &error_fatal);
+
+    pic_dev = DEVICE(object_resolve_path_component(OBJECT(macio), "pic"));
+    for (i = 0; i < 4; i++) {
+        qdev_connect_gpio_out(grackle_dev, i,
+                              qdev_get_gpio_in(pic_dev, 0x15 + i));
+    }
+
+    /* Connect the heathrow PIC outputs to the 6xx bus */
+    for (i = 0; i < smp_cpus; i++) {
+        switch (PPC_INPUT(env)) {
+        case PPC_FLAGS_INPUT_6xx:
+            /* XXX: we register only 1 output pin for heathrow PIC */
+            qdev_connect_gpio_out(pic_dev, 0,
+                ((qemu_irq *)env->irq_inputs)[PPC6xx_INPUT_INT]);
+            break;
+        default:
+            error_report("Bus model not supported on OldWorld Mac machine");
+            exit(1);
+        }
+    }
+
+    pci_vga_init(pci_bus);
+
+    for (i = 0; i < nb_nics; i++) {
+        pci_nic_init_nofail(&nd_table[i], pci_bus, "ne2k_pci", NULL);
+    }
+
+    /* MacIO IDE */
+    ide_drive_get(hd, ARRAY_SIZE(hd));
+    macio_ide = MACIO_IDE(object_resolve_path_component(OBJECT(macio),
+                                                        "ide[0]"));
+    macio_ide_init_drives(macio_ide, hd);
+
+    macio_ide = MACIO_IDE(object_resolve_path_component(OBJECT(macio),
+                                                        "ide[1]"));
+    macio_ide_init_drives(macio_ide, &hd[MAX_IDE_DEVS]);
+
+    /* MacIO CUDA/ADB */
+    dev = DEVICE(object_resolve_path_component(OBJECT(macio), "cuda"));
+    adb_bus = qdev_get_child_bus(dev, "adb.0");
+    dev = qdev_new(TYPE_ADB_KEYBOARD);
+    qdev_realize_and_unref(dev, adb_bus, &error_fatal);
+    dev = qdev_new(TYPE_ADB_MOUSE);
+    qdev_realize_and_unref(dev, adb_bus, &error_fatal);
+
+    if (machine_usb(machine)) {
+        pci_create_simple(pci_bus, -1, "pci-ohci");
+    }
+
+    if (graphic_depth != 15 && graphic_depth != 32 && graphic_depth != 8)
+        graphic_depth = 15;
+
+    /* No PCI init: the BIOS will do it */
+
+    dev = qdev_new(TYPE_FW_CFG_MEM);
+    fw_cfg = FW_CFG(dev);
+    qdev_prop_set_uint32(dev, "data_width", 1);
+    qdev_prop_set_bit(dev, "dma_enabled", false);
+    object_property_add_child(OBJECT(qdev_get_machine()), TYPE_FW_CFG,
+                              OBJECT(fw_cfg));
+    s = SYS_BUS_DEVICE(dev);
+    sysbus_realize_and_unref(s, &error_fatal);
+    sysbus_mmio_map(s, 0, CFG_ADDR);
+    sysbus_mmio_map(s, 1, CFG_ADDR + 2);
+
+    fw_cfg_add_i16(fw_cfg, FW_CFG_NB_CPUS, (uint16_t)smp_cpus);
+    fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)machine->smp.max_cpus);
+    fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)ram_size);
+    fw_cfg_add_i16(fw_cfg, FW_CFG_MACHINE_ID, ARCH_HEATHROW);
+    fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, kernel_base);
+    fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size);
+    if (machine->kernel_cmdline) {
+        fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_CMDLINE, cmdline_base);
+        pstrcpy_targphys("cmdline", cmdline_base, TARGET_PAGE_SIZE,
+                         machine->kernel_cmdline);
+    } else {
+        fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_CMDLINE, 0);
+    }
+    fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_base);
+    fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size);
+    fw_cfg_add_i16(fw_cfg, FW_CFG_BOOT_DEVICE, ppc_boot_device);
+
+    fw_cfg_add_i16(fw_cfg, FW_CFG_PPC_WIDTH, graphic_width);
+    fw_cfg_add_i16(fw_cfg, FW_CFG_PPC_HEIGHT, graphic_height);
+    fw_cfg_add_i16(fw_cfg, FW_CFG_PPC_DEPTH, graphic_depth);
+
+    fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_IS_KVM, kvm_enabled());
+    if (kvm_enabled()) {
+        uint8_t *hypercall;
+
+        hypercall = g_malloc(16);
+        kvmppc_get_hypercall(env, hypercall, 16);
+        fw_cfg_add_bytes(fw_cfg, FW_CFG_PPC_KVM_HC, hypercall, 16);
+        fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_KVM_PID, getpid());
+    }
+    fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, tbfreq);
+    /* Mac OS X requires a "known good" clock-frequency value; pass it one. */
+    fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_CLOCKFREQ, CLOCKFREQ);
+    fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_BUSFREQ, BUSFREQ);
+
+    /* MacOS NDRV VGA driver */
+    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, NDRV_VGA_FILENAME);
+    if (filename) {
+        gchar *ndrv_file;
+        gsize ndrv_size;
+
+        if (g_file_get_contents(filename, &ndrv_file, &ndrv_size, NULL)) {
+            fw_cfg_add_file(fw_cfg, "ndrv/qemu_vga.ndrv", ndrv_file, ndrv_size);
+        }
+        g_free(filename);
+    }
+
+    qemu_register_boot_set(fw_cfg_boot_set, fw_cfg);
+}
+
+/*
+ * Implementation of an interface to adjust firmware path
+ * for the bootindex property handling.
+ */
+static char *heathrow_fw_dev_path(FWPathProvider *p, BusState *bus,
+                                  DeviceState *dev)
+{
+    PCIDevice *pci;
+    MACIOIDEState *macio_ide;
+
+    if (!strcmp(object_get_typename(OBJECT(dev)), "macio-oldworld")) {
+        pci = PCI_DEVICE(dev);
+        return g_strdup_printf("mac-io@%x", PCI_SLOT(pci->devfn));
+    }
+
+    if (!strcmp(object_get_typename(OBJECT(dev)), "macio-ide")) {
+        macio_ide = MACIO_IDE(dev);
+        return g_strdup_printf("ata-3@%x", macio_ide->addr);
+    }
+
+    if (!strcmp(object_get_typename(OBJECT(dev)), "ide-hd")) {
+        return g_strdup("disk");
+    }
+
+    if (!strcmp(object_get_typename(OBJECT(dev)), "ide-cd")) {
+        return g_strdup("cdrom");
+    }
+
+    if (!strcmp(object_get_typename(OBJECT(dev)), "virtio-blk-device")) {
+        return g_strdup("disk");
+    }
+
+    return NULL;
+}
+
+static int heathrow_kvm_type(MachineState *machine, const char *arg)
+{
+    /* Always force PR KVM */
+    return 2;
+}
+
+static void heathrow_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+    FWPathProviderClass *fwc = FW_PATH_PROVIDER_CLASS(oc);
+
+    mc->desc = "Heathrow based PowerMAC";
+    mc->init = ppc_heathrow_init;
+    mc->block_default_type = IF_IDE;
+    mc->max_cpus = MAX_CPUS;
+#ifndef TARGET_PPC64
+    mc->is_default = true;
+#endif
+    /* TOFIX "cad" when Mac floppy is implemented */
+    mc->default_boot_order = "cd";
+    mc->kvm_type = heathrow_kvm_type;
+    mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("750_v3.1");
+    mc->default_display = "std";
+    mc->ignore_boot_device_suffixes = true;
+    mc->default_ram_id = "ppc_heathrow.ram";
+    fwc->get_dev_path = heathrow_fw_dev_path;
+}
+
+static const TypeInfo ppc_heathrow_machine_info = {
+    .name          = MACHINE_TYPE_NAME("g3beige"),
+    .parent        = TYPE_MACHINE,
+    .class_init    = heathrow_class_init,
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_FW_PATH_PROVIDER },
+        { }
+    },
+};
+
+static void ppc_heathrow_register_types(void)
+{
+    type_register_static(&ppc_heathrow_machine_info);
+}
+
+type_init(ppc_heathrow_register_types);
diff --git a/hw/ppc/meson.build b/hw/ppc/meson.build
new file mode 100644
index 000000000..aa4c8e6a2
--- /dev/null
+++ b/hw/ppc/meson.build
@@ -0,0 +1,90 @@
+ppc_ss = ss.source_set()
+ppc_ss.add(files(
+  'ppc.c',
+  'ppc_booke.c',
+))
+ppc_ss.add(when: 'CONFIG_FDT_PPC', if_true: [files(
+  'fdt.c',
+), fdt])
+ppc_ss.add(when: 'CONFIG_FW_CFG_PPC', if_true: files('fw_cfg.c'))
+
+# IBM pSeries (sPAPR)
+ppc_ss.add(when: 'CONFIG_PSERIES', if_true: files(
+  'spapr.c',
+  'spapr_caps.c',
+  'spapr_vio.c',
+  'spapr_events.c',
+  'spapr_hcall.c',
+  'spapr_iommu.c',
+  'spapr_rtas.c',
+  'spapr_pci.c',
+  'spapr_rtc.c',
+  'spapr_drc.c',
+  'spapr_cpu_core.c',
+  'spapr_ovec.c',
+  'spapr_irq.c',
+  'spapr_tpm_proxy.c',
+  'spapr_nvdimm.c',
+  'spapr_rtas_ddw.c',
+  'spapr_numa.c',
+  'pef.c',
+))
+ppc_ss.add(when: ['CONFIG_PSERIES', 'CONFIG_TCG'], if_true: files(
+  'spapr_softmmu.c',
+))
+ppc_ss.add(when: 'CONFIG_SPAPR_RNG', if_true: files('spapr_rng.c'))
+ppc_ss.add(when: ['CONFIG_PSERIES', 'CONFIG_LINUX'], if_true: files(
+  'spapr_pci_vfio.c',
+  'spapr_pci_nvlink2.c'
+))
+
+# IBM PowerNV
+ppc_ss.add(when: 'CONFIG_POWERNV', if_true: files(
+  'pnv.c',
+  'pnv_xscom.c',
+  'pnv_core.c',
+  'pnv_lpc.c',
+  'pnv_psi.c',
+  'pnv_occ.c',
+  'pnv_bmc.c',
+  'pnv_homer.c',
+  'pnv_pnor.c',
+))
+# PowerPC 4xx boards
+ppc_ss.add(when: 'CONFIG_PPC405', if_true: files(
+  'ppc405_boards.c',
+  'ppc405_uc.c'))
+ppc_ss.add(when: 'CONFIG_PPC440', if_true: files(
+  'ppc440_bamboo.c',
+  'ppc440_pcix.c', 'ppc440_uc.c'))
+ppc_ss.add(when: 'CONFIG_PPC4XX', if_true: files(
+  'ppc4xx_pci.c',
+  'ppc4xx_devs.c'))
+ppc_ss.add(when: 'CONFIG_SAM460EX', if_true: files('sam460ex.c'))
+# PReP
+ppc_ss.add(when: 'CONFIG_PREP', if_true: files('prep.c'))
+ppc_ss.add(when: 'CONFIG_PREP', if_true: files('prep_systemio.c'))
+ppc_ss.add(when: 'CONFIG_RS6000_MC', if_true: files('rs6000_mc.c'))
+# OldWorld PowerMac
+ppc_ss.add(when: 'CONFIG_MAC_OLDWORLD', if_true: files('mac_oldworld.c'))
+# NewWorld PowerMac
+ppc_ss.add(when: 'CONFIG_MAC_NEWWORLD', if_true: files('mac_newworld.c'))
+# e500
+ppc_ss.add(when: 'CONFIG_E500', if_true: files(
+  'e500.c',
+  'mpc8544ds.c',
+  'e500plat.c'
+))
+ppc_ss.add(when: 'CONFIG_E500', if_true: files(
+  'mpc8544_guts.c',
+  'ppce500_spin.c'
+))
+# PowerPC 440 Xilinx ML507 reference board.
+ppc_ss.add(when: 'CONFIG_VIRTEX', if_true: files('virtex_ml507.c'))
+# Pegasos2
+ppc_ss.add(when: 'CONFIG_PEGASOS2', if_true: files('pegasos2.c'))
+
+ppc_ss.add(when: 'CONFIG_VOF', if_true: files('vof.c'))
+ppc_ss.add(when: ['CONFIG_VOF', 'CONFIG_PSERIES'], if_true: files('spapr_vof.c'))
+
+hw_arch += {'ppc': ppc_ss}
diff --git a/hw/ppc/mpc8544_guts.c b/hw/ppc/mpc8544_guts.c
new file mode 100644
index 000000000..e8d2d51c2
--- /dev/null
+++ b/hw/ppc/mpc8544_guts.c
@@ -0,0 +1,142 @@
+/*
+ * QEMU PowerPC MPC8544 global util pseudo-device
+ *
+ * Copyright (C) 2011 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: Alexander Graf, <alex@csgraf.de>
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of  the GNU General  Public License as published by
+ * the Free Software Foundation;  either version 2 of the  License, or
+ * (at your option) any later version.
+ *
+ * *****************************************************************
+ *
+ * The documentation for this device is noted in the MPC8544 documentation,
+ * file name "MPC8544ERM.pdf". You can easily find it on the web.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/module.h"
+#include "sysemu/runstate.h"
+#include "cpu.h"
+#include "hw/sysbus.h"
+#include "qom/object.h"
+
+#define MPC8544_GUTS_MMIO_SIZE        0x1000
+#define MPC8544_GUTS_RSTCR_RESET      0x02
+
+#define MPC8544_GUTS_ADDR_PORPLLSR    0x00
+#define MPC8544_GUTS_ADDR_PORBMSR     0x04
+#define MPC8544_GUTS_ADDR_PORIMPSCR   0x08
+#define MPC8544_GUTS_ADDR_PORDEVSR    0x0C
+#define MPC8544_GUTS_ADDR_PORDBGMSR   0x10
+#define MPC8544_GUTS_ADDR_PORDEVSR2   0x14
+#define MPC8544_GUTS_ADDR_GPPORCR     0x20
+#define MPC8544_GUTS_ADDR_GPIOCR      0x30
+#define MPC8544_GUTS_ADDR_GPOUTDR     0x40
+#define MPC8544_GUTS_ADDR_GPINDR      0x50
+#define MPC8544_GUTS_ADDR_PMUXCR      0x60
+#define MPC8544_GUTS_ADDR_DEVDISR     0x70
+#define MPC8544_GUTS_ADDR_POWMGTCSR   0x80
+#define MPC8544_GUTS_ADDR_MCPSUMR     0x90
+#define MPC8544_GUTS_ADDR_RSTRSCR     0x94
+#define MPC8544_GUTS_ADDR_PVR         0xA0
+#define MPC8544_GUTS_ADDR_SVR         0xA4
+#define MPC8544_GUTS_ADDR_RSTCR       0xB0
+#define MPC8544_GUTS_ADDR_IOVSELSR    0xC0
+#define MPC8544_GUTS_ADDR_DDRCSR      0xB20
+#define MPC8544_GUTS_ADDR_DDRCDR      0xB24
+#define MPC8544_GUTS_ADDR_DDRCLKDR    0xB28
+#define MPC8544_GUTS_ADDR_CLKOCR      0xE00
+#define MPC8544_GUTS_ADDR_SRDS1CR1    0xF04
+#define MPC8544_GUTS_ADDR_SRDS2CR1    0xF10
+#define MPC8544_GUTS_ADDR_SRDS2CR3    0xF18
+
+#define TYPE_MPC8544_GUTS "mpc8544-guts"
+OBJECT_DECLARE_SIMPLE_TYPE(GutsState, MPC8544_GUTS)
+
+struct GutsState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+    /*< public >*/
+
+    MemoryRegion iomem;
+};
+
+
+static uint64_t mpc8544_guts_read(void *opaque, hwaddr addr,
+                                  unsigned size)
+{
+    uint32_t value = 0;
+    PowerPCCPU *cpu = POWERPC_CPU(current_cpu);
+    CPUPPCState *env = &cpu->env;
+
+    addr &= MPC8544_GUTS_MMIO_SIZE - 1;
+    switch (addr) {
+    case MPC8544_GUTS_ADDR_PVR:
+        value = env->spr[SPR_PVR];
+        break;
+    case MPC8544_GUTS_ADDR_SVR:
+        value = env->spr[SPR_E500_SVR];
+        break;
+    default:
+        fprintf(stderr, "guts: Unknown register read: %x\n", (int)addr);
+        break;
+    }
+
+    return value;
+}
+
+static void mpc8544_guts_write(void *opaque, hwaddr addr,
+                               uint64_t value, unsigned size)
+{
+    addr &= MPC8544_GUTS_MMIO_SIZE - 1;
+
+    switch (addr) {
+    case MPC8544_GUTS_ADDR_RSTCR:
+        if (value & MPC8544_GUTS_RSTCR_RESET) {
+            qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
+        }
+        break;
+    default:
+        fprintf(stderr, "guts: Unknown register write: %x = %x\n",
+                (int)addr, (unsigned)value);
+        break;
+    }
+}
+
+static const MemoryRegionOps mpc8544_guts_ops = {
+    .read = mpc8544_guts_read,
+    .write = mpc8544_guts_write,
+    .endianness = DEVICE_BIG_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
+static void mpc8544_guts_initfn(Object *obj)
+{
+    SysBusDevice *d = SYS_BUS_DEVICE(obj);
+    GutsState *s = MPC8544_GUTS(obj);
+
+    memory_region_init_io(&s->iomem, OBJECT(s), &mpc8544_guts_ops, s,
+                          "mpc8544.guts", MPC8544_GUTS_MMIO_SIZE);
+    sysbus_init_mmio(d, &s->iomem);
+}
+
+static const TypeInfo mpc8544_guts_info = {
+    .name          = TYPE_MPC8544_GUTS,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(GutsState),
+    .instance_init = mpc8544_guts_initfn,
+};
+
+static void mpc8544_guts_register_types(void)
+{
+    type_register_static(&mpc8544_guts_info);
+}
+
+type_init(mpc8544_guts_register_types)
diff --git a/hw/ppc/mpc8544ds.c b/hw/ppc/mpc8544ds.c
new file mode 100644
index 000000000..81177505f
--- /dev/null
+++ b/hw/ppc/mpc8544ds.c
@@ -0,0 +1,74 @@
+/*
+ * Support for the PPC e500-based mpc8544ds board
+ *
+ * Copyright 2012 Freescale Semiconductor, Inc.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of  the GNU General  Public License as published by
+ * the Free Software Foundation;  either version 2 of the  License, or
+ * (at your option) any later version.
+ */
+
+#include "qemu/osdep.h"
+#include "e500.h"
+#include "sysemu/device_tree.h"
+#include "hw/ppc/openpic.h"
+#include "qemu/error-report.h"
+#include "cpu.h"
+
+static void mpc8544ds_fixup_devtree(void *fdt)
+{
+    const char model[] = "MPC8544DS";
+    const char compatible[] = "MPC8544DS\0MPC85xxDS";
+
+    qemu_fdt_setprop(fdt, "/", "model", model, sizeof(model));
+    qemu_fdt_setprop(fdt, "/", "compatible", compatible,
+                     sizeof(compatible));
+}
+
+static void mpc8544ds_init(MachineState *machine)
+{
+    if (machine->ram_size > 0xc0000000) {
+        error_report("The MPC8544DS board only supports up to 3GB of RAM");
+        exit(1);
+    }
+
+    ppce500_init(machine);
+}
+
+static void e500plat_machine_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+    PPCE500MachineClass *pmc = PPCE500_MACHINE_CLASS(oc);
+
+    pmc->pci_first_slot = 0x11;
+    pmc->pci_nr_slots = 2;
+    pmc->fixup_devtree = mpc8544ds_fixup_devtree;
+    pmc->mpic_version = OPENPIC_MODEL_FSL_MPIC_20;
+    pmc->ccsrbar_base = 0xE0000000ULL;
+    pmc->pci_mmio_base = 0xC0000000ULL;
+    pmc->pci_mmio_bus_base = 0xC0000000ULL;
+    pmc->pci_pio_base = 0xE1000000ULL;
+    pmc->spin_base = 0xEF000000ULL;
+
+    mc->desc = "mpc8544ds";
+    mc->init = mpc8544ds_init;
+    mc->max_cpus = 15;
+    mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("e500v2_v30");
+    mc->default_ram_id = "mpc8544ds.ram";
+}
+
+#define TYPE_MPC8544DS_MACHINE  MACHINE_TYPE_NAME("mpc8544ds")
+
+static const TypeInfo mpc8544ds_info = {
+    .name          = TYPE_MPC8544DS_MACHINE,
+    .parent        = TYPE_PPCE500_MACHINE,
+    .class_init    = e500plat_machine_class_init,
+};
+
+static void mpc8544ds_register_types(void)
+{
+    type_register_static(&mpc8544ds_info);
+}
+
+type_init(mpc8544ds_register_types)
diff --git a/hw/ppc/pef.c b/hw/ppc/pef.c
new file mode 100644
index 000000000..cc44d5e33
--- /dev/null
+++ b/hw/ppc/pef.c
@@ -0,0 +1,142 @@
+/*
+ * PEF (Protected Execution Facility) for POWER support
+ *
+ * Copyright Red Hat.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+
+#include "qapi/error.h"
+#include "qom/object_interfaces.h"
+#include "sysemu/kvm.h"
+#include "migration/blocker.h"
+#include "exec/confidential-guest-support.h"
+#include "hw/ppc/pef.h"
+
+#define TYPE_PEF_GUEST "pef-guest"
+OBJECT_DECLARE_SIMPLE_TYPE(PefGuest, PEF_GUEST)
+
+typedef struct PefGuest PefGuest;
+typedef struct PefGuestClass PefGuestClass;
+
+struct PefGuestClass {
+    ConfidentialGuestSupportClass parent_class;
+};
+
+/**
+ * PefGuest:
+ *
+ * The PefGuest object is used for creating and managing a PEF
+ * guest.
+ *
+ * # $QEMU \
+ *         -object pef-guest,id=pef0 \
+ *         -machine ...,confidential-guest-support=pef0
+ */
+struct PefGuest {
+    ConfidentialGuestSupport parent_obj;
+};
+
+static int kvmppc_svm_init(ConfidentialGuestSupport *cgs, Error **errp)
+{
+#ifdef CONFIG_KVM
+    static Error *pef_mig_blocker;
+
+    if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_SECURE_GUEST)) {
+        error_setg(errp,
+                   "KVM implementation does not support Secure VMs (is an ultravisor running?)");
+        return -1;
+    } else {
+        int ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SECURE_GUEST, 0, 1);
+
+        if (ret < 0) {
+            error_setg(errp,
+                       "Error enabling PEF with KVM");
+            return -1;
+        }
+    }
+
+    /* add migration blocker */
+    error_setg(&pef_mig_blocker, "PEF: Migration is not implemented");
+    /* NB: This can fail if --only-migratable is used */
+    migrate_add_blocker(pef_mig_blocker, &error_fatal);
+
+    cgs->ready = true;
+
+    return 0;
+#else
+    g_assert_not_reached();
+#endif
+}
+
+/*
+ * Don't set error if KVM_PPC_SVM_OFF ioctl is invoked on kernels
+ * that don't support this ioctl.
+ */
+static int kvmppc_svm_off(Error **errp)
+{
+#ifdef CONFIG_KVM
+    int rc;
+
+    rc = kvm_vm_ioctl(KVM_STATE(current_accel()), KVM_PPC_SVM_OFF);
+    if (rc && rc != -ENOTTY) {
+        error_setg_errno(errp, -rc, "KVM_PPC_SVM_OFF ioctl failed");
+        return rc;
+    }
+    return 0;
+#else
+    g_assert_not_reached();
+#endif
+}
+
+int pef_kvm_init(ConfidentialGuestSupport *cgs, Error **errp)
+{
+    if (!object_dynamic_cast(OBJECT(cgs), TYPE_PEF_GUEST)) {
+        return 0;
+    }
+
+    if (!kvm_enabled()) {
+        error_setg(errp, "PEF requires KVM");
+        return -1;
+    }
+
+    return kvmppc_svm_init(cgs, errp);
+}
+
+int pef_kvm_reset(ConfidentialGuestSupport *cgs, Error **errp)
+{
+    if (!object_dynamic_cast(OBJECT(cgs), TYPE_PEF_GUEST)) {
+        return 0;
+    }
+
+    /*
+     * If we don't have KVM we should never have been able to
+     * initialize PEF, so we should never get this far
+     */
+    assert(kvm_enabled());
+
+    return kvmppc_svm_off(errp);
+}
+
+OBJECT_DEFINE_TYPE_WITH_INTERFACES(PefGuest,
+                                   pef_guest,
+                                   PEF_GUEST,
+                                   CONFIDENTIAL_GUEST_SUPPORT,
+                                   { TYPE_USER_CREATABLE },
+                                   { NULL })
+
+static void pef_guest_class_init(ObjectClass *oc, void *data)
+{
+}
+
+static void pef_guest_init(Object *obj)
+{
+}
+
+static void pef_guest_finalize(Object *obj)
+{
+}
diff --git a/hw/ppc/pegasos2.c b/hw/ppc/pegasos2.c
new file mode 100644
index 000000000..298e6b93e
--- /dev/null
+++ b/hw/ppc/pegasos2.c
@@ -0,0 +1,952 @@
+/*
+ * QEMU PowerPC CHRP (Genesi/bPlan Pegasos II) hardware System Emulator
+ *
+ * Copyright (c) 2018-2021 BALATON Zoltan
+ *
+ * This work is licensed under the GNU GPL license version 2 or later.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/units.h"
+#include "qapi/error.h"
+#include "hw/hw.h"
+#include "hw/ppc/ppc.h"
+#include "hw/sysbus.h"
+#include "hw/pci/pci_host.h"
+#include "hw/irq.h"
+#include "hw/pci-host/mv64361.h"
+#include "hw/isa/vt82c686.h"
+#include "hw/ide/pci.h"
+#include "hw/i2c/smbus_eeprom.h"
+#include "hw/qdev-properties.h"
+#include "sysemu/reset.h"
+#include "sysemu/runstate.h"
+#include "sysemu/qtest.h"
+#include "hw/boards.h"
+#include "hw/loader.h"
+#include "hw/fw-path-provider.h"
+#include "elf.h"
+#include "qemu/log.h"
+#include "qemu/error-report.h"
+#include "sysemu/kvm.h"
+#include "kvm_ppc.h"
+#include "exec/address-spaces.h"
+#include "qom/qom-qobject.h"
+#include "qapi/qmp/qdict.h"
+#include "trace.h"
+#include "qemu/datadir.h"
+#include "sysemu/device_tree.h"
+#include "hw/ppc/vof.h"
+
+#include <libfdt.h>
+
+#define PROM_FILENAME "vof.bin"
+#define PROM_ADDR     0xfff00000
+#define PROM_SIZE     0x80000
+
+#define KVMPPC_HCALL_BASE    0xf000
+#define KVMPPC_H_RTAS        (KVMPPC_HCALL_BASE + 0x0)
+#define KVMPPC_H_VOF_CLIENT  (KVMPPC_HCALL_BASE + 0x5)
+
+#define H_SUCCESS     0
+#define H_PRIVILEGE  -3  /* Caller not privileged */
+#define H_PARAMETER  -4  /* Parameter invalid, out-of-range or conflicting */
+
+#define BUS_FREQ_HZ 133333333
+
+#define PCI0_CFG_ADDR 0xcf8
+#define PCI0_MEM_BASE 0xc0000000
+#define PCI0_MEM_SIZE 0x20000000
+#define PCI0_IO_BASE  0xf8000000
+#define PCI0_IO_SIZE  0x10000
+
+#define PCI1_CFG_ADDR 0xc78
+#define PCI1_MEM_BASE 0x80000000
+#define PCI1_MEM_SIZE 0x40000000
+#define PCI1_IO_BASE  0xfe000000
+#define PCI1_IO_SIZE  0x10000
+
+#define TYPE_PEGASOS2_MACHINE  MACHINE_TYPE_NAME("pegasos2")
+OBJECT_DECLARE_TYPE(Pegasos2MachineState, MachineClass, PEGASOS2_MACHINE)
+
+struct Pegasos2MachineState {
+    MachineState parent_obj;
+    PowerPCCPU *cpu;
+    DeviceState *mv;
+    Vof *vof;
+    void *fdt_blob;
+    uint64_t kernel_addr;
+    uint64_t kernel_entry;
+    uint64_t kernel_size;
+};
+
+static void *build_fdt(MachineState *machine, int *fdt_size);
+
+static void pegasos2_cpu_reset(void *opaque)
+{
+    PowerPCCPU *cpu = opaque;
+    Pegasos2MachineState *pm = PEGASOS2_MACHINE(current_machine);
+
+    cpu_reset(CPU(cpu));
+    cpu->env.spr[SPR_HID1] = 7ULL << 28;
+    if (pm->vof) {
+        cpu->env.gpr[1] = 2 * VOF_STACK_SIZE - 0x20;
+        cpu->env.nip = 0x100;
+    }
+}
+
+static void pegasos2_init(MachineState *machine)
+{
+    Pegasos2MachineState *pm = PEGASOS2_MACHINE(machine);
+    CPUPPCState *env;
+    MemoryRegion *rom = g_new(MemoryRegion, 1);
+    PCIBus *pci_bus;
+    PCIDevice *dev;
+    I2CBus *i2c_bus;
+    const char *fwname = machine->firmware ?: PROM_FILENAME;
+    char *filename;
+    int sz;
+    uint8_t *spd_data;
+
+    /* init CPU */
+    pm->cpu = POWERPC_CPU(cpu_create(machine->cpu_type));
+    env = &pm->cpu->env;
+    if (PPC_INPUT(env) != PPC_FLAGS_INPUT_6xx) {
+        error_report("Incompatible CPU, only 6xx bus supported");
+        exit(1);
+    }
+
+    /* Set time-base frequency */
+    cpu_ppc_tb_init(env, BUS_FREQ_HZ / 4);
+    qemu_register_reset(pegasos2_cpu_reset, pm->cpu);
+
+    /* RAM */
+    if (machine->ram_size > 2 * GiB) {
+        error_report("RAM size more than 2 GiB is not supported");
+        exit(1);
+    }
+    memory_region_add_subregion(get_system_memory(), 0, machine->ram);
+
+    /* allocate and load firmware */
+    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, fwname);
+    if (!filename) {
+        error_report("Could not find firmware '%s'", fwname);
+        exit(1);
+    }
+    if (!machine->firmware && !pm->vof) {
+        pm->vof = g_malloc0(sizeof(*pm->vof));
+    }
+    memory_region_init_rom(rom, NULL, "pegasos2.rom", PROM_SIZE, &error_fatal);
+    memory_region_add_subregion(get_system_memory(), PROM_ADDR, rom);
+    sz = load_elf(filename, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 1,
+                  PPC_ELF_MACHINE, 0, 0);
+    if (sz <= 0) {
+        sz = load_image_targphys(filename, pm->vof ? 0 : PROM_ADDR, PROM_SIZE);
+    }
+    if (sz <= 0 || sz > PROM_SIZE) {
+        error_report("Could not load firmware '%s'", filename);
+        exit(1);
+    }
+    g_free(filename);
+    if (pm->vof) {
+        pm->vof->fw_size = sz;
+    }
+
+    /* Marvell Discovery II system controller */
+    pm->mv = DEVICE(sysbus_create_simple(TYPE_MV64361, -1,
+                             ((qemu_irq *)env->irq_inputs)[PPC6xx_INPUT_INT]));
+    pci_bus = mv64361_get_pci_bus(pm->mv, 1);
+
+    /* VIA VT8231 South Bridge (multifunction PCI device) */
+    /* VT8231 function 0: PCI-to-ISA Bridge */
+    dev = pci_create_simple_multifunction(pci_bus, PCI_DEVFN(12, 0), true,
+                                          TYPE_VT8231_ISA);
+    qdev_connect_gpio_out(DEVICE(dev), 0,
+                          qdev_get_gpio_in_named(pm->mv, "gpp", 31));
+
+    /* VT8231 function 1: IDE Controller */
+    dev = pci_create_simple(pci_bus, PCI_DEVFN(12, 1), "via-ide");
+    pci_ide_create_devs(dev);
+
+    /* VT8231 function 2-3: USB Ports */
+    pci_create_simple(pci_bus, PCI_DEVFN(12, 2), "vt82c686b-usb-uhci");
+    pci_create_simple(pci_bus, PCI_DEVFN(12, 3), "vt82c686b-usb-uhci");
+
+    /* VT8231 function 4: Power Management Controller */
+    dev = pci_create_simple(pci_bus, PCI_DEVFN(12, 4), TYPE_VT8231_PM);
+    i2c_bus = I2C_BUS(qdev_get_child_bus(DEVICE(dev), "i2c"));
+    spd_data = spd_data_generate(DDR, machine->ram_size);
+    smbus_eeprom_init_one(i2c_bus, 0x57, spd_data);
+
+    /* VT8231 function 5-6: AC97 Audio & Modem */
+    pci_create_simple(pci_bus, PCI_DEVFN(12, 5), TYPE_VIA_AC97);
+    pci_create_simple(pci_bus, PCI_DEVFN(12, 6), TYPE_VIA_MC97);
+
+    /* other PC hardware */
+    pci_vga_init(pci_bus);
+
+    if (machine->kernel_filename) {
+        sz = load_elf(machine->kernel_filename, NULL, NULL, NULL,
+                      &pm->kernel_entry, &pm->kernel_addr, NULL, NULL, 1,
+                      PPC_ELF_MACHINE, 0, 0);
+        if (sz <= 0) {
+            error_report("Could not load kernel '%s'",
+                         machine->kernel_filename);
+            exit(1);
+        }
+        pm->kernel_size = sz;
+        if (!pm->vof) {
+            warn_report("Option -kernel may be ineffective with -bios.");
+        }
+    } else if (pm->vof && !qtest_enabled()) {
+        warn_report("Using Virtual OpenFirmware but no -kernel option.");
+    }
+
+    if (!pm->vof && machine->kernel_cmdline && machine->kernel_cmdline[0]) {
+        warn_report("Option -append may be ineffective with -bios.");
+    }
+}
+
+static uint32_t pegasos2_mv_reg_read(Pegasos2MachineState *pm,
+                                     uint32_t addr, uint32_t len)
+{
+    MemoryRegion *r = sysbus_mmio_get_region(SYS_BUS_DEVICE(pm->mv), 0);
+    uint64_t val = 0xffffffffULL;
+    memory_region_dispatch_read(r, addr, &val, size_memop(len) | MO_LE,
+                                MEMTXATTRS_UNSPECIFIED);
+    return val;
+}
+
+static void pegasos2_mv_reg_write(Pegasos2MachineState *pm, uint32_t addr,
+                                  uint32_t len, uint32_t val)
+{
+    MemoryRegion *r = sysbus_mmio_get_region(SYS_BUS_DEVICE(pm->mv), 0);
+    memory_region_dispatch_write(r, addr, val, size_memop(len) | MO_LE,
+                                 MEMTXATTRS_UNSPECIFIED);
+}
+
+static uint32_t pegasos2_pci_config_read(Pegasos2MachineState *pm, int bus,
+                                         uint32_t addr, uint32_t len)
+{
+    hwaddr pcicfg = bus ? PCI1_CFG_ADDR : PCI0_CFG_ADDR;
+    uint64_t val = 0xffffffffULL;
+
+    if (len <= 4) {
+        pegasos2_mv_reg_write(pm, pcicfg, 4, addr | BIT(31));
+        val = pegasos2_mv_reg_read(pm, pcicfg + 4, len);
+    }
+    return val;
+}
+
+static void pegasos2_pci_config_write(Pegasos2MachineState *pm, int bus,
+                                      uint32_t addr, uint32_t len, uint32_t val)
+{
+    hwaddr pcicfg = bus ? PCI1_CFG_ADDR : PCI0_CFG_ADDR;
+
+    pegasos2_mv_reg_write(pm, pcicfg, 4, addr | BIT(31));
+    pegasos2_mv_reg_write(pm, pcicfg + 4, len, val);
+}
+
+static void pegasos2_machine_reset(MachineState *machine)
+{
+    Pegasos2MachineState *pm = PEGASOS2_MACHINE(machine);
+    void *fdt;
+    uint64_t d[2];
+    int sz;
+
+    qemu_devices_reset();
+    if (!pm->vof) {
+        return; /* Firmware should set up machine so nothing to do */
+    }
+
+    /* Otherwise, set up devices that board firmware would normally do */
+    pegasos2_mv_reg_write(pm, 0, 4, 0x28020ff);
+    pegasos2_mv_reg_write(pm, 0x278, 4, 0xa31fc);
+    pegasos2_mv_reg_write(pm, 0xf300, 4, 0x11ff0400);
+    pegasos2_mv_reg_write(pm, 0xf10c, 4, 0x80000000);
+    pegasos2_mv_reg_write(pm, 0x1c, 4, 0x8000000);
+    pegasos2_pci_config_write(pm, 0, PCI_COMMAND, 2, PCI_COMMAND_IO |
+                              PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
+    pegasos2_pci_config_write(pm, 1, PCI_COMMAND, 2, PCI_COMMAND_IO |
+                              PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
+
+    pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 0) << 8) |
+                              PCI_INTERRUPT_LINE, 2, 0x9);
+    pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 0) << 8) |
+                              0x50, 1, 0x2);
+
+    pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 1) << 8) |
+                              PCI_INTERRUPT_LINE, 2, 0x109);
+    pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 1) << 8) |
+                              PCI_CLASS_PROG, 1, 0xf);
+    pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 1) << 8) |
+                              0x40, 1, 0xb);
+    pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 1) << 8) |
+                              0x50, 4, 0x17171717);
+    pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 1) << 8) |
+                              PCI_COMMAND, 2, 0x87);
+
+    pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 2) << 8) |
+                              PCI_INTERRUPT_LINE, 2, 0x409);
+
+    pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 3) << 8) |
+                              PCI_INTERRUPT_LINE, 2, 0x409);
+
+    pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 4) << 8) |
+                              PCI_INTERRUPT_LINE, 2, 0x9);
+    pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 4) << 8) |
+                              0x48, 4, 0xf00);
+    pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 4) << 8) |
+                              0x40, 4, 0x558020);
+    pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 4) << 8) |
+                              0x90, 4, 0xd00);
+
+    pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 5) << 8) |
+                              PCI_INTERRUPT_LINE, 2, 0x309);
+
+    pegasos2_pci_config_write(pm, 1, (PCI_DEVFN(12, 6) << 8) |
+                              PCI_INTERRUPT_LINE, 2, 0x309);
+
+    /* Device tree and VOF set up */
+    vof_init(pm->vof, machine->ram_size, &error_fatal);
+    if (vof_claim(pm->vof, 0, VOF_STACK_SIZE, VOF_STACK_SIZE) == -1) {
+        error_report("Memory allocation for stack failed");
+        exit(1);
+    }
+    if (pm->kernel_size &&
+        vof_claim(pm->vof, pm->kernel_addr, pm->kernel_size, 0) == -1) {
+        error_report("Memory for kernel is in use");
+        exit(1);
+    }
+    fdt = build_fdt(machine, &sz);
+    /* FIXME: VOF assumes entry is same as load address */
+    d[0] = cpu_to_be64(pm->kernel_entry);
+    d[1] = cpu_to_be64(pm->kernel_size - (pm->kernel_entry - pm->kernel_addr));
+    qemu_fdt_setprop(fdt, "/chosen", "qemu,boot-kernel", d, sizeof(d));
+
+    qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt));
+    g_free(pm->fdt_blob);
+    pm->fdt_blob = fdt;
+
+    vof_build_dt(fdt, pm->vof);
+    vof_client_open_store(fdt, pm->vof, "/chosen", "stdout", "/failsafe");
+    pm->cpu->vhyp = PPC_VIRTUAL_HYPERVISOR(machine);
+}
+
+enum pegasos2_rtas_tokens {
+    RTAS_RESTART_RTAS = 0,
+    RTAS_NVRAM_FETCH = 1,
+    RTAS_NVRAM_STORE = 2,
+    RTAS_GET_TIME_OF_DAY = 3,
+    RTAS_SET_TIME_OF_DAY = 4,
+    RTAS_EVENT_SCAN = 6,
+    RTAS_CHECK_EXCEPTION = 7,
+    RTAS_READ_PCI_CONFIG = 8,
+    RTAS_WRITE_PCI_CONFIG = 9,
+    RTAS_DISPLAY_CHARACTER = 10,
+    RTAS_SET_INDICATOR = 11,
+    RTAS_POWER_OFF = 17,
+    RTAS_SUSPEND = 18,
+    RTAS_HIBERNATE = 19,
+    RTAS_SYSTEM_REBOOT = 20,
+};
+
+static target_ulong pegasos2_rtas(PowerPCCPU *cpu, Pegasos2MachineState *pm,
+                                  target_ulong args_real)
+{
+    AddressSpace *as = CPU(cpu)->as;
+    uint32_t token = ldl_be_phys(as, args_real);
+    uint32_t nargs = ldl_be_phys(as, args_real + 4);
+    uint32_t nrets = ldl_be_phys(as, args_real + 8);
+    uint32_t args = args_real + 12;
+    uint32_t rets = args_real + 12 + nargs * 4;
+
+    if (nrets < 1) {
+        qemu_log_mask(LOG_GUEST_ERROR, "Too few return values in RTAS call\n");
+        return H_PARAMETER;
+    }
+    switch (token) {
+    case RTAS_GET_TIME_OF_DAY:
+    {
+        QObject *qo = object_property_get_qobject(qdev_get_machine(),
+                                                  "rtc-time", &error_fatal);
+        QDict *qd = qobject_to(QDict, qo);
+
+        if (nargs != 0 || nrets != 8 || !qd) {
+            stl_be_phys(as, rets, -1);
+            qobject_unref(qo);
+            return H_PARAMETER;
+        }
+
+        stl_be_phys(as, rets, 0);
+        stl_be_phys(as, rets + 4, qdict_get_int(qd, "tm_year") + 1900);
+        stl_be_phys(as, rets + 8, qdict_get_int(qd, "tm_mon") + 1);
+        stl_be_phys(as, rets + 12, qdict_get_int(qd, "tm_mday"));
+        stl_be_phys(as, rets + 16, qdict_get_int(qd, "tm_hour"));
+        stl_be_phys(as, rets + 20, qdict_get_int(qd, "tm_min"));
+        stl_be_phys(as, rets + 24, qdict_get_int(qd, "tm_sec"));
+        stl_be_phys(as, rets + 28, 0);
+        qobject_unref(qo);
+        return H_SUCCESS;
+    }
+    case RTAS_READ_PCI_CONFIG:
+    {
+        uint32_t addr, len, val;
+
+        if (nargs != 2 || nrets != 2) {
+            stl_be_phys(as, rets, -1);
+            return H_PARAMETER;
+        }
+        addr = ldl_be_phys(as, args);
+        len = ldl_be_phys(as, args + 4);
+        val = pegasos2_pci_config_read(pm, !(addr >> 24),
+                                       addr & 0x0fffffff, len);
+        stl_be_phys(as, rets, 0);
+        stl_be_phys(as, rets + 4, val);
+        return H_SUCCESS;
+    }
+    case RTAS_WRITE_PCI_CONFIG:
+    {
+        uint32_t addr, len, val;
+
+        if (nargs != 3 || nrets != 1) {
+            stl_be_phys(as, rets, -1);
+            return H_PARAMETER;
+        }
+        addr = ldl_be_phys(as, args);
+        len = ldl_be_phys(as, args + 4);
+        val = ldl_be_phys(as, args + 8);
+        pegasos2_pci_config_write(pm, !(addr >> 24),
+                                  addr & 0x0fffffff, len, val);
+        stl_be_phys(as, rets, 0);
+        return H_SUCCESS;
+    }
+    case RTAS_DISPLAY_CHARACTER:
+        if (nargs != 1 || nrets != 1) {
+            stl_be_phys(as, rets, -1);
+            return H_PARAMETER;
+        }
+        qemu_log_mask(LOG_UNIMP, "%c", ldl_be_phys(as, args));
+        stl_be_phys(as, rets, 0);
+        return H_SUCCESS;
+    case RTAS_POWER_OFF:
+    {
+        if (nargs != 2 || nrets != 1) {
+            stl_be_phys(as, rets, -1);
+            return H_PARAMETER;
+        }
+        qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
+        stl_be_phys(as, rets, 0);
+        return H_SUCCESS;
+    }
+    default:
+        qemu_log_mask(LOG_UNIMP, "Unknown RTAS token %u (args=%u, rets=%u)\n",
+                      token, nargs, nrets);
+        stl_be_phys(as, rets, 0);
+        return H_SUCCESS;
+    }
+}
+
+static void pegasos2_hypercall(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu)
+{
+    Pegasos2MachineState *pm = PEGASOS2_MACHINE(vhyp);
+    CPUPPCState *env = &cpu->env;
+
+    /* The TCG path should also be holding the BQL at this point */
+    g_assert(qemu_mutex_iothread_locked());
+
+    if (msr_pr) {
+        qemu_log_mask(LOG_GUEST_ERROR, "Hypercall made with MSR[PR]=1\n");
+        env->gpr[3] = H_PRIVILEGE;
+    } else if (env->gpr[3] == KVMPPC_H_RTAS) {
+        env->gpr[3] = pegasos2_rtas(cpu, pm, env->gpr[4]);
+    } else if (env->gpr[3] == KVMPPC_H_VOF_CLIENT) {
+        int ret = vof_client_call(MACHINE(pm), pm->vof, pm->fdt_blob,
+                                  env->gpr[4]);
+        env->gpr[3] = (ret ? H_PARAMETER : H_SUCCESS);
+    } else {
+        qemu_log_mask(LOG_GUEST_ERROR, "Unsupported hypercall " TARGET_FMT_lx
+                      "\n", env->gpr[3]);
+        env->gpr[3] = -1;
+    }
+}
+
+static void vhyp_nop(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu)
+{
+}
+
+static target_ulong vhyp_encode_hpt_for_kvm_pr(PPCVirtualHypervisor *vhyp)
+{
+    return POWERPC_CPU(current_cpu)->env.spr[SPR_SDR1];
+}
+
+static bool pegasos2_setprop(MachineState *ms, const char *path,
+                             const char *propname, void *val, int vallen)
+{
+    return true;
+}
+
+static void pegasos2_machine_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+    PPCVirtualHypervisorClass *vhc = PPC_VIRTUAL_HYPERVISOR_CLASS(oc);
+    VofMachineIfClass *vmc = VOF_MACHINE_CLASS(oc);
+
+    mc->desc = "Genesi/bPlan Pegasos II";
+    mc->init = pegasos2_init;
+    mc->reset = pegasos2_machine_reset;
+    mc->block_default_type = IF_IDE;
+    mc->default_boot_order = "cd";
+    mc->default_display = "std";
+    mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("7400_v2.9");
+    mc->default_ram_id = "pegasos2.ram";
+    mc->default_ram_size = 512 * MiB;
+
+    vhc->hypercall = pegasos2_hypercall;
+    vhc->cpu_exec_enter = vhyp_nop;
+    vhc->cpu_exec_exit = vhyp_nop;
+    vhc->encode_hpt_for_kvm_pr = vhyp_encode_hpt_for_kvm_pr;
+
+    vmc->setprop = pegasos2_setprop;
+}
+
+static const TypeInfo pegasos2_machine_info = {
+    .name          = TYPE_PEGASOS2_MACHINE,
+    .parent        = TYPE_MACHINE,
+    .class_init    = pegasos2_machine_class_init,
+    .instance_size = sizeof(Pegasos2MachineState),
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_PPC_VIRTUAL_HYPERVISOR },
+        { TYPE_VOF_MACHINE_IF },
+        { }
+    },
+};
+
+static void pegasos2_machine_register_types(void)
+{
+    type_register_static(&pegasos2_machine_info);
+}
+
+type_init(pegasos2_machine_register_types)
+
+/* FDT creation for passing to firmware */
+
+typedef struct {
+    void *fdt;
+    const char *path;
+} FDTInfo;
+
+/* We do everything in reverse order so it comes out right in the tree */
+
+static void dt_ide(PCIBus *bus, PCIDevice *d, FDTInfo *fi)
+{
+    qemu_fdt_setprop_string(fi->fdt, fi->path, "device_type", "spi");
+}
+
+static void dt_usb(PCIBus *bus, PCIDevice *d, FDTInfo *fi)
+{
+    qemu_fdt_setprop_cell(fi->fdt, fi->path, "#size-cells", 0);
+    qemu_fdt_setprop_cell(fi->fdt, fi->path, "#address-cells", 1);
+    qemu_fdt_setprop_string(fi->fdt, fi->path, "device_type", "usb");
+}
+
+static void dt_isa(PCIBus *bus, PCIDevice *d, FDTInfo *fi)
+{
+    GString *name = g_string_sized_new(64);
+    uint32_t cells[3];
+
+    qemu_fdt_setprop_cell(fi->fdt, fi->path, "#size-cells", 1);
+    qemu_fdt_setprop_cell(fi->fdt, fi->path, "#address-cells", 2);
+    qemu_fdt_setprop_string(fi->fdt, fi->path, "device_type", "isa");
+    qemu_fdt_setprop_string(fi->fdt, fi->path, "name", "isa");
+
+    /* addional devices */
+    g_string_printf(name, "%s/lpt@i3bc", fi->path);
+    qemu_fdt_add_subnode(fi->fdt, name->str);
+    qemu_fdt_setprop_cell(fi->fdt, name->str, "clock-frequency", 0);
+    cells[0] = cpu_to_be32(7);
+    cells[1] = 0;
+    qemu_fdt_setprop(fi->fdt, name->str, "interrupts",
+                     cells, 2 * sizeof(cells[0]));
+    cells[0] = cpu_to_be32(1);
+    cells[1] = cpu_to_be32(0x3bc);
+    cells[2] = cpu_to_be32(8);
+    qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0]));
+    qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "lpt");
+    qemu_fdt_setprop_string(fi->fdt, name->str, "name", "lpt");
+
+    g_string_printf(name, "%s/fdc@i3f0", fi->path);
+    qemu_fdt_add_subnode(fi->fdt, name->str);
+    qemu_fdt_setprop_cell(fi->fdt, name->str, "clock-frequency", 0);
+    cells[0] = cpu_to_be32(6);
+    cells[1] = 0;
+    qemu_fdt_setprop(fi->fdt, name->str, "interrupts",
+                     cells, 2 * sizeof(cells[0]));
+    cells[0] = cpu_to_be32(1);
+    cells[1] = cpu_to_be32(0x3f0);
+    cells[2] = cpu_to_be32(8);
+    qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0]));
+    qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "fdc");
+    qemu_fdt_setprop_string(fi->fdt, name->str, "name", "fdc");
+
+    g_string_printf(name, "%s/timer@i40", fi->path);
+    qemu_fdt_add_subnode(fi->fdt, name->str);
+    qemu_fdt_setprop_cell(fi->fdt, name->str, "clock-frequency", 0);
+    cells[0] = cpu_to_be32(1);
+    cells[1] = cpu_to_be32(0x40);
+    cells[2] = cpu_to_be32(8);
+    qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0]));
+    qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "timer");
+    qemu_fdt_setprop_string(fi->fdt, name->str, "name", "timer");
+
+    g_string_printf(name, "%s/rtc@i70", fi->path);
+    qemu_fdt_add_subnode(fi->fdt, name->str);
+    qemu_fdt_setprop_string(fi->fdt, name->str, "compatible", "ds1385-rtc");
+    qemu_fdt_setprop_cell(fi->fdt, name->str, "clock-frequency", 0);
+    cells[0] = cpu_to_be32(8);
+    cells[1] = 0;
+    qemu_fdt_setprop(fi->fdt, name->str, "interrupts",
+                     cells, 2 * sizeof(cells[0]));
+    cells[0] = cpu_to_be32(1);
+    cells[1] = cpu_to_be32(0x70);
+    cells[2] = cpu_to_be32(2);
+    qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0]));
+    qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "rtc");
+    qemu_fdt_setprop_string(fi->fdt, name->str, "name", "rtc");
+
+    g_string_printf(name, "%s/keyboard@i60", fi->path);
+    qemu_fdt_add_subnode(fi->fdt, name->str);
+    cells[0] = cpu_to_be32(1);
+    cells[1] = 0;
+    qemu_fdt_setprop(fi->fdt, name->str, "interrupts",
+                     cells, 2 * sizeof(cells[0]));
+    cells[0] = cpu_to_be32(1);
+    cells[1] = cpu_to_be32(0x60);
+    cells[2] = cpu_to_be32(5);
+    qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0]));
+    qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "keyboard");
+    qemu_fdt_setprop_string(fi->fdt, name->str, "name", "keyboard");
+
+    g_string_printf(name, "%s/8042@i60", fi->path);
+    qemu_fdt_add_subnode(fi->fdt, name->str);
+    qemu_fdt_setprop_cell(fi->fdt, name->str, "#interrupt-cells", 2);
+    qemu_fdt_setprop_cell(fi->fdt, name->str, "#size-cells", 0);
+    qemu_fdt_setprop_cell(fi->fdt, name->str, "#address-cells", 1);
+    qemu_fdt_setprop_string(fi->fdt, name->str, "interrupt-controller", "");
+    qemu_fdt_setprop_cell(fi->fdt, name->str, "clock-frequency", 0);
+    cells[0] = cpu_to_be32(1);
+    cells[1] = cpu_to_be32(0x60);
+    cells[2] = cpu_to_be32(5);
+    qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0]));
+    qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "");
+    qemu_fdt_setprop_string(fi->fdt, name->str, "name", "8042");
+
+    g_string_printf(name, "%s/serial@i2f8", fi->path);
+    qemu_fdt_add_subnode(fi->fdt, name->str);
+    qemu_fdt_setprop_cell(fi->fdt, name->str, "clock-frequency", 0);
+    cells[0] = cpu_to_be32(3);
+    cells[1] = 0;
+    qemu_fdt_setprop(fi->fdt, name->str, "interrupts",
+                     cells, 2 * sizeof(cells[0]));
+    cells[0] = cpu_to_be32(1);
+    cells[1] = cpu_to_be32(0x2f8);
+    cells[2] = cpu_to_be32(8);
+    qemu_fdt_setprop(fi->fdt, name->str, "reg", cells, 3 * sizeof(cells[0]));
+    qemu_fdt_setprop_string(fi->fdt, name->str, "device_type", "serial");
+    qemu_fdt_setprop_string(fi->fdt, name->str, "name", "serial");
+
+    g_string_free(name, TRUE);
+}
+
+static struct {
+    const char *id;
+    const char *name;
+    void (*dtf)(PCIBus *bus, PCIDevice *d, FDTInfo *fi);
+} device_map[] = {
+    { "pci11ab,6460", "host", NULL },
+    { "pci1106,8231", "isa", dt_isa },
+    { "pci1106,571", "ide", dt_ide },
+    { "pci1106,3044", "firewire", NULL },
+    { "pci1106,3038", "usb", dt_usb },
+    { "pci1106,8235", "other", NULL },
+    { "pci1106,3058", "sound", NULL },
+    { NULL, NULL }
+};
+
+static void add_pci_device(PCIBus *bus, PCIDevice *d, void *opaque)
+{
+    FDTInfo *fi = opaque;
+    GString *node = g_string_new(NULL);
+    uint32_t cells[(PCI_NUM_REGIONS + 1) * 5];
+    int i, j;
+    const char *name = NULL;
+    g_autofree const gchar *pn = g_strdup_printf("pci%x,%x",
+                                     pci_get_word(&d->config[PCI_VENDOR_ID]),
+                                     pci_get_word(&d->config[PCI_DEVICE_ID]));
+
+    for (i = 0; device_map[i].id; i++) {
+        if (!strcmp(pn, device_map[i].id)) {
+            name = device_map[i].name;
+            break;
+        }
+    }
+    g_string_printf(node, "%s/%s@%x", fi->path, (name ?: pn),
+                    PCI_SLOT(d->devfn));
+    if (PCI_FUNC(d->devfn)) {
+        g_string_append_printf(node, ",%x", PCI_FUNC(d->devfn));
+    }
+
+    qemu_fdt_add_subnode(fi->fdt, node->str);
+    if (device_map[i].dtf) {
+        FDTInfo cfi = { fi->fdt, node->str };
+        device_map[i].dtf(bus, d, &cfi);
+    }
+    cells[0] = cpu_to_be32(d->devfn << 8);
+    cells[1] = 0;
+    cells[2] = 0;
+    cells[3] = 0;
+    cells[4] = 0;
+    j = 5;
+    for (i = 0; i < PCI_NUM_REGIONS; i++) {
+        if (!d->io_regions[i].size) {
+            continue;
+        }
+        cells[j] = cpu_to_be32(d->devfn << 8 | (PCI_BASE_ADDRESS_0 + i * 4));
+        if (d->io_regions[i].type & PCI_BASE_ADDRESS_SPACE_IO) {
+            cells[j] |= cpu_to_be32(1 << 24);
+        } else {
+            cells[j] |= cpu_to_be32(2 << 24);
+            if (d->io_regions[i].type & PCI_BASE_ADDRESS_MEM_PREFETCH) {
+                cells[j] |= cpu_to_be32(4 << 28);
+            }
+        }
+        cells[j + 1] = 0;
+        cells[j + 2] = 0;
+        cells[j + 3] = cpu_to_be32(d->io_regions[i].size >> 32);
+        cells[j + 4] = cpu_to_be32(d->io_regions[i].size);
+        j += 5;
+    }
+    qemu_fdt_setprop(fi->fdt, node->str, "reg", cells, j * sizeof(cells[0]));
+    qemu_fdt_setprop_string(fi->fdt, node->str, "name", name ?: pn);
+    if (pci_get_byte(&d->config[PCI_INTERRUPT_PIN])) {
+        qemu_fdt_setprop_cell(fi->fdt, node->str, "interrupts",
+                              pci_get_byte(&d->config[PCI_INTERRUPT_PIN]));
+    }
+    /* Pegasos2 firmware has subsystem-id amd subsystem-vendor-id swapped */
+    qemu_fdt_setprop_cell(fi->fdt, node->str, "subsystem-vendor-id",
+                          pci_get_word(&d->config[PCI_SUBSYSTEM_ID]));
+    qemu_fdt_setprop_cell(fi->fdt, node->str, "subsystem-id",
+                          pci_get_word(&d->config[PCI_SUBSYSTEM_VENDOR_ID]));
+    cells[0] = pci_get_long(&d->config[PCI_CLASS_REVISION]);
+    qemu_fdt_setprop_cell(fi->fdt, node->str, "class-code", cells[0] >> 8);
+    qemu_fdt_setprop_cell(fi->fdt, node->str, "revision-id", cells[0] & 0xff);
+    qemu_fdt_setprop_cell(fi->fdt, node->str, "device-id",
+                          pci_get_word(&d->config[PCI_DEVICE_ID]));
+    qemu_fdt_setprop_cell(fi->fdt, node->str, "vendor-id",
+                          pci_get_word(&d->config[PCI_VENDOR_ID]));
+
+    g_string_free(node, TRUE);
+}
+
+static void *build_fdt(MachineState *machine, int *fdt_size)
+{
+    Pegasos2MachineState *pm = PEGASOS2_MACHINE(machine);
+    PowerPCCPU *cpu = pm->cpu;
+    PCIBus *pci_bus;
+    FDTInfo fi;
+    uint32_t cells[16];
+    void *fdt = create_device_tree(fdt_size);
+
+    fi.fdt = fdt;
+
+    /* root node */
+    qemu_fdt_setprop_string(fdt, "/", "CODEGEN,description",
+                            "Pegasos CHRP PowerPC System");
+    qemu_fdt_setprop_string(fdt, "/", "CODEGEN,board", "Pegasos2");
+    qemu_fdt_setprop_string(fdt, "/", "CODEGEN,vendor", "bplan GmbH");
+    qemu_fdt_setprop_string(fdt, "/", "revision", "2B");
+    qemu_fdt_setprop_string(fdt, "/", "model", "Pegasos2");
+    qemu_fdt_setprop_string(fdt, "/", "device_type", "chrp");
+    qemu_fdt_setprop_cell(fdt, "/", "#address-cells", 1);
+    qemu_fdt_setprop_string(fdt, "/", "name", "bplan,Pegasos2");
+
+    /* pci@c0000000 */
+    qemu_fdt_add_subnode(fdt, "/pci@c0000000");
+    cells[0] = 0;
+    cells[1] = 0;
+    qemu_fdt_setprop(fdt, "/pci@c0000000", "bus-range",
+                     cells, 2 * sizeof(cells[0]));
+    qemu_fdt_setprop_cell(fdt, "/pci@c0000000", "pci-bridge-number", 1);
+    cells[0] = cpu_to_be32(PCI0_MEM_BASE);
+    cells[1] = cpu_to_be32(PCI0_MEM_SIZE);
+    qemu_fdt_setprop(fdt, "/pci@c0000000", "reg", cells, 2 * sizeof(cells[0]));
+    cells[0] = cpu_to_be32(0x01000000);
+    cells[1] = 0;
+    cells[2] = 0;
+    cells[3] = cpu_to_be32(PCI0_IO_BASE);
+    cells[4] = 0;
+    cells[5] = cpu_to_be32(PCI0_IO_SIZE);
+    cells[6] = cpu_to_be32(0x02000000);
+    cells[7] = 0;
+    cells[8] = cpu_to_be32(PCI0_MEM_BASE);
+    cells[9] = cpu_to_be32(PCI0_MEM_BASE);
+    cells[10] = 0;
+    cells[11] = cpu_to_be32(PCI0_MEM_SIZE);
+    qemu_fdt_setprop(fdt, "/pci@c0000000", "ranges",
+                     cells, 12 * sizeof(cells[0]));
+    qemu_fdt_setprop_cell(fdt, "/pci@c0000000", "#size-cells", 2);
+    qemu_fdt_setprop_cell(fdt, "/pci@c0000000", "#address-cells", 3);
+    qemu_fdt_setprop_string(fdt, "/pci@c0000000", "device_type", "pci");
+    qemu_fdt_setprop_string(fdt, "/pci@c0000000", "name", "pci");
+
+    fi.path = "/pci@c0000000";
+    pci_bus = mv64361_get_pci_bus(pm->mv, 0);
+    pci_for_each_device_reverse(pci_bus, 0, add_pci_device, &fi);
+
+    /* pci@80000000 */
+    qemu_fdt_add_subnode(fdt, "/pci@80000000");
+    cells[0] = 0;
+    cells[1] = 0;
+    qemu_fdt_setprop(fdt, "/pci@80000000", "bus-range",
+                     cells, 2 * sizeof(cells[0]));
+    qemu_fdt_setprop_cell(fdt, "/pci@80000000", "pci-bridge-number", 0);
+    cells[0] = cpu_to_be32(PCI1_MEM_BASE);
+    cells[1] = cpu_to_be32(PCI1_MEM_SIZE);
+    qemu_fdt_setprop(fdt, "/pci@80000000", "reg", cells, 2 * sizeof(cells[0]));
+    qemu_fdt_setprop_cell(fdt, "/pci@80000000", "8259-interrupt-acknowledge",
+                          0xf1000cb4);
+    cells[0] = cpu_to_be32(0x01000000);
+    cells[1] = 0;
+    cells[2] = 0;
+    cells[3] = cpu_to_be32(PCI1_IO_BASE);
+    cells[4] = 0;
+    cells[5] = cpu_to_be32(PCI1_IO_SIZE);
+    cells[6] = cpu_to_be32(0x02000000);
+    cells[7] = 0;
+    cells[8] = cpu_to_be32(PCI1_MEM_BASE);
+    cells[9] = cpu_to_be32(PCI1_MEM_BASE);
+    cells[10] = 0;
+    cells[11] = cpu_to_be32(PCI1_MEM_SIZE);
+    qemu_fdt_setprop(fdt, "/pci@80000000", "ranges",
+                     cells, 12 * sizeof(cells[0]));
+    qemu_fdt_setprop_cell(fdt, "/pci@80000000", "#size-cells", 2);
+    qemu_fdt_setprop_cell(fdt, "/pci@80000000", "#address-cells", 3);
+    qemu_fdt_setprop_string(fdt, "/pci@80000000", "device_type", "pci");
+    qemu_fdt_setprop_string(fdt, "/pci@80000000", "name", "pci");
+
+    fi.path = "/pci@80000000";
+    pci_bus = mv64361_get_pci_bus(pm->mv, 1);
+    pci_for_each_device_reverse(pci_bus, 0, add_pci_device, &fi);
+
+    qemu_fdt_add_subnode(fdt, "/failsafe");
+    qemu_fdt_setprop_string(fdt, "/failsafe", "device_type", "serial");
+    qemu_fdt_setprop_string(fdt, "/failsafe", "name", "failsafe");
+
+    qemu_fdt_add_subnode(fdt, "/rtas");
+    qemu_fdt_setprop_cell(fdt, "/rtas", "system-reboot", RTAS_SYSTEM_REBOOT);
+    qemu_fdt_setprop_cell(fdt, "/rtas", "hibernate", RTAS_HIBERNATE);
+    qemu_fdt_setprop_cell(fdt, "/rtas", "suspend", RTAS_SUSPEND);
+    qemu_fdt_setprop_cell(fdt, "/rtas", "power-off", RTAS_POWER_OFF);
+    qemu_fdt_setprop_cell(fdt, "/rtas", "set-indicator", RTAS_SET_INDICATOR);
+    qemu_fdt_setprop_cell(fdt, "/rtas", "display-character",
+                          RTAS_DISPLAY_CHARACTER);
+    qemu_fdt_setprop_cell(fdt, "/rtas", "write-pci-config",
+                          RTAS_WRITE_PCI_CONFIG);
+    qemu_fdt_setprop_cell(fdt, "/rtas", "read-pci-config",
+                          RTAS_READ_PCI_CONFIG);
+    /* Pegasos2 firmware misspells check-exception and guests use that */
+    qemu_fdt_setprop_cell(fdt, "/rtas", "check-execption",
+                          RTAS_CHECK_EXCEPTION);
+    qemu_fdt_setprop_cell(fdt, "/rtas", "event-scan", RTAS_EVENT_SCAN);
+    qemu_fdt_setprop_cell(fdt, "/rtas", "set-time-of-day",
+                          RTAS_SET_TIME_OF_DAY);
+    qemu_fdt_setprop_cell(fdt, "/rtas", "get-time-of-day",
+                          RTAS_GET_TIME_OF_DAY);
+    qemu_fdt_setprop_cell(fdt, "/rtas", "nvram-store", RTAS_NVRAM_STORE);
+    qemu_fdt_setprop_cell(fdt, "/rtas", "nvram-fetch", RTAS_NVRAM_FETCH);
+    qemu_fdt_setprop_cell(fdt, "/rtas", "restart-rtas", RTAS_RESTART_RTAS);
+    qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-error-log-max", 0);
+    qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-event-scan-rate", 0);
+    qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-display-device", 0);
+    qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-size", 20);
+    qemu_fdt_setprop_cell(fdt, "/rtas", "rtas-version", 1);
+
+    /* cpus */
+    qemu_fdt_add_subnode(fdt, "/cpus");
+    qemu_fdt_setprop_cell(fdt, "/cpus", "#cpus", 1);
+    qemu_fdt_setprop_cell(fdt, "/cpus", "#address-cells", 1);
+    qemu_fdt_setprop_cell(fdt, "/cpus", "#size-cells", 0);
+    qemu_fdt_setprop_string(fdt, "/cpus", "name", "cpus");
+
+    /* FIXME Get CPU name from CPU object */
+    const char *cp = "/cpus/PowerPC,G4";
+    qemu_fdt_add_subnode(fdt, cp);
+    qemu_fdt_setprop_cell(fdt, cp, "l2cr", 0);
+    qemu_fdt_setprop_cell(fdt, cp, "d-cache-size", 0x8000);
+    qemu_fdt_setprop_cell(fdt, cp, "d-cache-block-size",
+                          cpu->env.dcache_line_size);
+    qemu_fdt_setprop_cell(fdt, cp, "d-cache-line-size",
+                          cpu->env.dcache_line_size);
+    qemu_fdt_setprop_cell(fdt, cp, "i-cache-size", 0x8000);
+    qemu_fdt_setprop_cell(fdt, cp, "i-cache-block-size",
+                          cpu->env.icache_line_size);
+    qemu_fdt_setprop_cell(fdt, cp, "i-cache-line-size",
+                          cpu->env.icache_line_size);
+    if (cpu->env.id_tlbs) {
+        qemu_fdt_setprop_cell(fdt, cp, "i-tlb-sets", cpu->env.nb_ways);
+        qemu_fdt_setprop_cell(fdt, cp, "i-tlb-size", cpu->env.tlb_per_way);
+        qemu_fdt_setprop_cell(fdt, cp, "d-tlb-sets", cpu->env.nb_ways);
+        qemu_fdt_setprop_cell(fdt, cp, "d-tlb-size", cpu->env.tlb_per_way);
+        qemu_fdt_setprop_string(fdt, cp, "tlb-split", "");
+    }
+    qemu_fdt_setprop_cell(fdt, cp, "tlb-sets", cpu->env.nb_ways);
+    qemu_fdt_setprop_cell(fdt, cp, "tlb-size", cpu->env.nb_tlb);
+    qemu_fdt_setprop_string(fdt, cp, "state", "running");
+    if (cpu->env.insns_flags & PPC_ALTIVEC) {
+        qemu_fdt_setprop_string(fdt, cp, "altivec", "");
+        qemu_fdt_setprop_string(fdt, cp, "data-streams", "");
+    }
+    /*
+     * FIXME What flags do data-streams, external-control and
+     * performance-monitor depend on?
+     */
+    qemu_fdt_setprop_string(fdt, cp, "external-control", "");
+    if (cpu->env.insns_flags & PPC_FLOAT_FSQRT) {
+        qemu_fdt_setprop_string(fdt, cp, "general-purpose", "");
+    }
+    qemu_fdt_setprop_string(fdt, cp, "performance-monitor", "");
+    if (cpu->env.insns_flags & PPC_FLOAT_FRES) {
+        qemu_fdt_setprop_string(fdt, cp, "graphics", "");
+    }
+    qemu_fdt_setprop_cell(fdt, cp, "reservation-granule-size", 4);
+    qemu_fdt_setprop_cell(fdt, cp, "timebase-frequency",
+                          cpu->env.tb_env->tb_freq);
+    qemu_fdt_setprop_cell(fdt, cp, "bus-frequency", BUS_FREQ_HZ);
+    qemu_fdt_setprop_cell(fdt, cp, "clock-frequency", BUS_FREQ_HZ * 7.5);
+    qemu_fdt_setprop_cell(fdt, cp, "cpu-version", cpu->env.spr[SPR_PVR]);
+    cells[0] = 0;
+    cells[1] = 0;
+    qemu_fdt_setprop(fdt, cp, "reg", cells, 2 * sizeof(cells[0]));
+    qemu_fdt_setprop_string(fdt, cp, "device_type", "cpu");
+    qemu_fdt_setprop_string(fdt, cp, "name", strrchr(cp, '/') + 1);
+
+    /* memory */
+    qemu_fdt_add_subnode(fdt, "/memory@0");
+    cells[0] = 0;
+    cells[1] = cpu_to_be32(machine->ram_size);
+    qemu_fdt_setprop(fdt, "/memory@0", "reg", cells, 2 * sizeof(cells[0]));
+    qemu_fdt_setprop_string(fdt, "/memory@0", "device_type", "memory");
+    qemu_fdt_setprop_string(fdt, "/memory@0", "name", "memory");
+
+    qemu_fdt_add_subnode(fdt, "/chosen");
+    qemu_fdt_setprop_string(fdt, "/chosen", "bootargs",
+                            machine->kernel_cmdline ?: "");
+    qemu_fdt_setprop_string(fdt, "/chosen", "name", "chosen");
+
+    qemu_fdt_add_subnode(fdt, "/openprom");
+    qemu_fdt_setprop_string(fdt, "/openprom", "model", "Pegasos2,1.1");
+
+    return fdt;
+}
diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
new file mode 100644
index 000000000..71e45515f
--- /dev/null
+++ b/hw/ppc/pnv.c
@@ -0,0 +1,2132 @@
+/*
+ * QEMU PowerPC PowerNV machine model
+ *
+ * Copyright (c) 2016, IBM Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/datadir.h"
+#include "qemu/units.h"
+#include "qemu/cutils.h"
+#include "qapi/error.h"
+#include "sysemu/qtest.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/numa.h"
+#include "sysemu/reset.h"
+#include "sysemu/runstate.h"
+#include "sysemu/cpus.h"
+#include "sysemu/device_tree.h"
+#include "sysemu/hw_accel.h"
+#include "target/ppc/cpu.h"
+#include "hw/ppc/fdt.h"
+#include "hw/ppc/ppc.h"
+#include "hw/ppc/pnv.h"
+#include "hw/ppc/pnv_core.h"
+#include "hw/loader.h"
+#include "hw/nmi.h"
+#include "qapi/visitor.h"
+#include "monitor/monitor.h"
+#include "hw/intc/intc.h"
+#include "hw/ipmi/ipmi.h"
+#include "target/ppc/mmu-hash64.h"
+#include "hw/pci/msi.h"
+
+#include "hw/ppc/xics.h"
+#include "hw/qdev-properties.h"
+#include "hw/ppc/pnv_xscom.h"
+#include "hw/ppc/pnv_pnor.h"
+
+#include "hw/isa/isa.h"
+#include "hw/char/serial.h"
+#include "hw/rtc/mc146818rtc.h"
+
+#include <libfdt.h>
+
+#define FDT_MAX_SIZE            (1 * MiB)
+
+#define FW_FILE_NAME            "skiboot.lid"
+#define FW_LOAD_ADDR            0x0
+#define FW_MAX_SIZE             (16 * MiB)
+
+#define KERNEL_LOAD_ADDR        0x20000000
+#define KERNEL_MAX_SIZE         (128 * MiB)
+#define INITRD_LOAD_ADDR        0x28000000
+#define INITRD_MAX_SIZE         (128 * MiB)
+
+static const char *pnv_chip_core_typename(const PnvChip *o)
+{
+    const char *chip_type = object_class_get_name(object_get_class(OBJECT(o)));
+    int len = strlen(chip_type) - strlen(PNV_CHIP_TYPE_SUFFIX);
+    char *s = g_strdup_printf(PNV_CORE_TYPE_NAME("%.*s"), len, chip_type);
+    const char *core_type = object_class_get_name(object_class_by_name(s));
+    g_free(s);
+    return core_type;
+}
+
+/*
+ * On Power Systems E880 (POWER8), the max cpus (threads) should be :
+ *     4 * 4 sockets * 12 cores * 8 threads = 1536
+ * Let's make it 2^11
+ */
+#define MAX_CPUS                2048
+
+/*
+ * Memory nodes are created by hostboot, one for each range of memory
+ * that has a different "affinity". In practice, it means one range
+ * per chip.
+ */
+static void pnv_dt_memory(void *fdt, int chip_id, hwaddr start, hwaddr size)
+{
+    char *mem_name;
+    uint64_t mem_reg_property[2];
+    int off;
+
+    mem_reg_property[0] = cpu_to_be64(start);
+    mem_reg_property[1] = cpu_to_be64(size);
+
+    mem_name = g_strdup_printf("memory@%"HWADDR_PRIx, start);
+    off = fdt_add_subnode(fdt, 0, mem_name);
+    g_free(mem_name);
+
+    _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
+    _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
+                       sizeof(mem_reg_property))));
+    _FDT((fdt_setprop_cell(fdt, off, "ibm,chip-id", chip_id)));
+}
+
+static int get_cpus_node(void *fdt)
+{
+    int cpus_offset = fdt_path_offset(fdt, "/cpus");
+
+    if (cpus_offset < 0) {
+        cpus_offset = fdt_add_subnode(fdt, 0, "cpus");
+        if (cpus_offset) {
+            _FDT((fdt_setprop_cell(fdt, cpus_offset, "#address-cells", 0x1)));
+            _FDT((fdt_setprop_cell(fdt, cpus_offset, "#size-cells", 0x0)));
+        }
+    }
+    _FDT(cpus_offset);
+    return cpus_offset;
+}
+
+/*
+ * The PowerNV cores (and threads) need to use real HW ids and not an
+ * incremental index like it has been done on other platforms. This HW
+ * id is stored in the CPU PIR, it is used to create cpu nodes in the
+ * device tree, used in XSCOM to address cores and in interrupt
+ * servers.
+ */
+static void pnv_dt_core(PnvChip *chip, PnvCore *pc, void *fdt)
+{
+    PowerPCCPU *cpu = pc->threads[0];
+    CPUState *cs = CPU(cpu);
+    DeviceClass *dc = DEVICE_GET_CLASS(cs);
+    int smt_threads = CPU_CORE(pc)->nr_threads;
+    CPUPPCState *env = &cpu->env;
+    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
+    uint32_t servers_prop[smt_threads];
+    int i;
+    uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
+                       0xffffffff, 0xffffffff};
+    uint32_t tbfreq = PNV_TIMEBASE_FREQ;
+    uint32_t cpufreq = 1000000000;
+    uint32_t page_sizes_prop[64];
+    size_t page_sizes_prop_size;
+    const uint8_t pa_features[] = { 24, 0,
+                                    0xf6, 0x3f, 0xc7, 0xc0, 0x80, 0xf0,
+                                    0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
+                                    0x00, 0x00, 0x00, 0x00, 0x80, 0x00,
+                                    0x80, 0x00, 0x80, 0x00, 0x80, 0x00 };
+    int offset;
+    char *nodename;
+    int cpus_offset = get_cpus_node(fdt);
+
+    nodename = g_strdup_printf("%s@%x", dc->fw_name, pc->pir);
+    offset = fdt_add_subnode(fdt, cpus_offset, nodename);
+    _FDT(offset);
+    g_free(nodename);
+
+    _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id", chip->chip_id)));
+
+    _FDT((fdt_setprop_cell(fdt, offset, "reg", pc->pir)));
+    _FDT((fdt_setprop_cell(fdt, offset, "ibm,pir", pc->pir)));
+    _FDT((fdt_setprop_string(fdt, offset, "device_type", "cpu")));
+
+    _FDT((fdt_setprop_cell(fdt, offset, "cpu-version", env->spr[SPR_PVR])));
+    _FDT((fdt_setprop_cell(fdt, offset, "d-cache-block-size",
+                            env->dcache_line_size)));
+    _FDT((fdt_setprop_cell(fdt, offset, "d-cache-line-size",
+                            env->dcache_line_size)));
+    _FDT((fdt_setprop_cell(fdt, offset, "i-cache-block-size",
+                            env->icache_line_size)));
+    _FDT((fdt_setprop_cell(fdt, offset, "i-cache-line-size",
+                            env->icache_line_size)));
+
+    if (pcc->l1_dcache_size) {
+        _FDT((fdt_setprop_cell(fdt, offset, "d-cache-size",
+                               pcc->l1_dcache_size)));
+    } else {
+        warn_report("Unknown L1 dcache size for cpu");
+    }
+    if (pcc->l1_icache_size) {
+        _FDT((fdt_setprop_cell(fdt, offset, "i-cache-size",
+                               pcc->l1_icache_size)));
+    } else {
+        warn_report("Unknown L1 icache size for cpu");
+    }
+
+    _FDT((fdt_setprop_cell(fdt, offset, "timebase-frequency", tbfreq)));
+    _FDT((fdt_setprop_cell(fdt, offset, "clock-frequency", cpufreq)));
+    _FDT((fdt_setprop_cell(fdt, offset, "ibm,slb-size",
+                           cpu->hash64_opts->slb_size)));
+    _FDT((fdt_setprop_string(fdt, offset, "status", "okay")));
+    _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0)));
+
+    if (ppc_has_spr(cpu, SPR_PURR)) {
+        _FDT((fdt_setprop(fdt, offset, "ibm,purr", NULL, 0)));
+    }
+
+    if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)) {
+        _FDT((fdt_setprop(fdt, offset, "ibm,processor-segment-sizes",
+                           segs, sizeof(segs))));
+    }
+
+    /*
+     * Advertise VMX/VSX (vector extensions) if available
+     *   0 / no property == no vector extensions
+     *   1               == VMX / Altivec available
+     *   2               == VSX available
+     */
+    if (env->insns_flags & PPC_ALTIVEC) {
+        uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1;
+
+        _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", vmx)));
+    }
+
+    /*
+     * Advertise DFP (Decimal Floating Point) if available
+     *   0 / no property == no DFP
+     *   1               == DFP available
+     */
+    if (env->insns_flags2 & PPC2_DFP) {
+        _FDT((fdt_setprop_cell(fdt, offset, "ibm,dfp", 1)));
+    }
+
+    page_sizes_prop_size = ppc_create_page_sizes_prop(cpu, page_sizes_prop,
+                                                      sizeof(page_sizes_prop));
+    if (page_sizes_prop_size) {
+        _FDT((fdt_setprop(fdt, offset, "ibm,segment-page-sizes",
+                           page_sizes_prop, page_sizes_prop_size)));
+    }
+
+    _FDT((fdt_setprop(fdt, offset, "ibm,pa-features",
+                       pa_features, sizeof(pa_features))));
+
+    /* Build interrupt servers properties */
+    for (i = 0; i < smt_threads; i++) {
+        servers_prop[i] = cpu_to_be32(pc->pir + i);
+    }
+    _FDT((fdt_setprop(fdt, offset, "ibm,ppc-interrupt-server#s",
+                       servers_prop, sizeof(servers_prop))));
+}
+
+static void pnv_dt_icp(PnvChip *chip, void *fdt, uint32_t pir,
+                       uint32_t nr_threads)
+{
+    uint64_t addr = PNV_ICP_BASE(chip) | (pir << 12);
+    char *name;
+    const char compat[] = "IBM,power8-icp\0IBM,ppc-xicp";
+    uint32_t irange[2], i, rsize;
+    uint64_t *reg;
+    int offset;
+
+    irange[0] = cpu_to_be32(pir);
+    irange[1] = cpu_to_be32(nr_threads);
+
+    rsize = sizeof(uint64_t) * 2 * nr_threads;
+    reg = g_malloc(rsize);
+    for (i = 0; i < nr_threads; i++) {
+        reg[i * 2] = cpu_to_be64(addr | ((pir + i) * 0x1000));
+        reg[i * 2 + 1] = cpu_to_be64(0x1000);
+    }
+
+    name = g_strdup_printf("interrupt-controller@%"PRIX64, addr);
+    offset = fdt_add_subnode(fdt, 0, name);
+    _FDT(offset);
+    g_free(name);
+
+    _FDT((fdt_setprop(fdt, offset, "compatible", compat, sizeof(compat))));
+    _FDT((fdt_setprop(fdt, offset, "reg", reg, rsize)));
+    _FDT((fdt_setprop_string(fdt, offset, "device_type",
+                              "PowerPC-External-Interrupt-Presentation")));
+    _FDT((fdt_setprop(fdt, offset, "interrupt-controller", NULL, 0)));
+    _FDT((fdt_setprop(fdt, offset, "ibm,interrupt-server-ranges",
+                       irange, sizeof(irange))));
+    _FDT((fdt_setprop_cell(fdt, offset, "#interrupt-cells", 1)));
+    _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 0)));
+    g_free(reg);
+}
+
+static void pnv_chip_power8_dt_populate(PnvChip *chip, void *fdt)
+{
+    static const char compat[] = "ibm,power8-xscom\0ibm,xscom";
+    int i;
+
+    pnv_dt_xscom(chip, fdt, 0,
+                 cpu_to_be64(PNV_XSCOM_BASE(chip)),
+                 cpu_to_be64(PNV_XSCOM_SIZE),
+                 compat, sizeof(compat));
+
+    for (i = 0; i < chip->nr_cores; i++) {
+        PnvCore *pnv_core = chip->cores[i];
+
+        pnv_dt_core(chip, pnv_core, fdt);
+
+        /* Interrupt Control Presenters (ICP). One per core. */
+        pnv_dt_icp(chip, fdt, pnv_core->pir, CPU_CORE(pnv_core)->nr_threads);
+    }
+
+    if (chip->ram_size) {
+        pnv_dt_memory(fdt, chip->chip_id, chip->ram_start, chip->ram_size);
+    }
+}
+
+static void pnv_chip_power9_dt_populate(PnvChip *chip, void *fdt)
+{
+    static const char compat[] = "ibm,power9-xscom\0ibm,xscom";
+    int i;
+
+    pnv_dt_xscom(chip, fdt, 0,
+                 cpu_to_be64(PNV9_XSCOM_BASE(chip)),
+                 cpu_to_be64(PNV9_XSCOM_SIZE),
+                 compat, sizeof(compat));
+
+    for (i = 0; i < chip->nr_cores; i++) {
+        PnvCore *pnv_core = chip->cores[i];
+
+        pnv_dt_core(chip, pnv_core, fdt);
+    }
+
+    if (chip->ram_size) {
+        pnv_dt_memory(fdt, chip->chip_id, chip->ram_start, chip->ram_size);
+    }
+
+    pnv_dt_lpc(chip, fdt, 0, PNV9_LPCM_BASE(chip), PNV9_LPCM_SIZE);
+}
+
+static void pnv_chip_power10_dt_populate(PnvChip *chip, void *fdt)
+{
+    static const char compat[] = "ibm,power10-xscom\0ibm,xscom";
+    int i;
+
+    pnv_dt_xscom(chip, fdt, 0,
+                 cpu_to_be64(PNV10_XSCOM_BASE(chip)),
+                 cpu_to_be64(PNV10_XSCOM_SIZE),
+                 compat, sizeof(compat));
+
+    for (i = 0; i < chip->nr_cores; i++) {
+        PnvCore *pnv_core = chip->cores[i];
+
+        pnv_dt_core(chip, pnv_core, fdt);
+    }
+
+    if (chip->ram_size) {
+        pnv_dt_memory(fdt, chip->chip_id, chip->ram_start, chip->ram_size);
+    }
+
+    pnv_dt_lpc(chip, fdt, 0, PNV10_LPCM_BASE(chip), PNV10_LPCM_SIZE);
+}
+
+static void pnv_dt_rtc(ISADevice *d, void *fdt, int lpc_off)
+{
+    uint32_t io_base = d->ioport_id;
+    uint32_t io_regs[] = {
+        cpu_to_be32(1),
+        cpu_to_be32(io_base),
+        cpu_to_be32(2)
+    };
+    char *name;
+    int node;
+
+    name = g_strdup_printf("%s@i%x", qdev_fw_name(DEVICE(d)), io_base);
+    node = fdt_add_subnode(fdt, lpc_off, name);
+    _FDT(node);
+    g_free(name);
+
+    _FDT((fdt_setprop(fdt, node, "reg", io_regs, sizeof(io_regs))));
+    _FDT((fdt_setprop_string(fdt, node, "compatible", "pnpPNP,b00")));
+}
+
+static void pnv_dt_serial(ISADevice *d, void *fdt, int lpc_off)
+{
+    const char compatible[] = "ns16550\0pnpPNP,501";
+    uint32_t io_base = d->ioport_id;
+    uint32_t io_regs[] = {
+        cpu_to_be32(1),
+        cpu_to_be32(io_base),
+        cpu_to_be32(8)
+    };
+    char *name;
+    int node;
+
+    name = g_strdup_printf("%s@i%x", qdev_fw_name(DEVICE(d)), io_base);
+    node = fdt_add_subnode(fdt, lpc_off, name);
+    _FDT(node);
+    g_free(name);
+
+    _FDT((fdt_setprop(fdt, node, "reg", io_regs, sizeof(io_regs))));
+    _FDT((fdt_setprop(fdt, node, "compatible", compatible,
+                      sizeof(compatible))));
+
+    _FDT((fdt_setprop_cell(fdt, node, "clock-frequency", 1843200)));
+    _FDT((fdt_setprop_cell(fdt, node, "current-speed", 115200)));
+    _FDT((fdt_setprop_cell(fdt, node, "interrupts", d->isairq[0])));
+    _FDT((fdt_setprop_cell(fdt, node, "interrupt-parent",
+                           fdt_get_phandle(fdt, lpc_off))));
+
+    /* This is needed by Linux */
+    _FDT((fdt_setprop_string(fdt, node, "device_type", "serial")));
+}
+
+static void pnv_dt_ipmi_bt(ISADevice *d, void *fdt, int lpc_off)
+{
+    const char compatible[] = "bt\0ipmi-bt";
+    uint32_t io_base;
+    uint32_t io_regs[] = {
+        cpu_to_be32(1),
+        0, /* 'io_base' retrieved from the 'ioport' property of 'isa-ipmi-bt' */
+        cpu_to_be32(3)
+    };
+    uint32_t irq;
+    char *name;
+    int node;
+
+    io_base = object_property_get_int(OBJECT(d), "ioport", &error_fatal);
+    io_regs[1] = cpu_to_be32(io_base);
+
+    irq = object_property_get_int(OBJECT(d), "irq", &error_fatal);
+
+    name = g_strdup_printf("%s@i%x", qdev_fw_name(DEVICE(d)), io_base);
+    node = fdt_add_subnode(fdt, lpc_off, name);
+    _FDT(node);
+    g_free(name);
+
+    _FDT((fdt_setprop(fdt, node, "reg", io_regs, sizeof(io_regs))));
+    _FDT((fdt_setprop(fdt, node, "compatible", compatible,
+                      sizeof(compatible))));
+
+    /* Mark it as reserved to avoid Linux trying to claim it */
+    _FDT((fdt_setprop_string(fdt, node, "status", "reserved")));
+    _FDT((fdt_setprop_cell(fdt, node, "interrupts", irq)));
+    _FDT((fdt_setprop_cell(fdt, node, "interrupt-parent",
+                           fdt_get_phandle(fdt, lpc_off))));
+}
+
+typedef struct ForeachPopulateArgs {
+    void *fdt;
+    int offset;
+} ForeachPopulateArgs;
+
+static int pnv_dt_isa_device(DeviceState *dev, void *opaque)
+{
+    ForeachPopulateArgs *args = opaque;
+    ISADevice *d = ISA_DEVICE(dev);
+
+    if (object_dynamic_cast(OBJECT(dev), TYPE_MC146818_RTC)) {
+        pnv_dt_rtc(d, args->fdt, args->offset);
+    } else if (object_dynamic_cast(OBJECT(dev), TYPE_ISA_SERIAL)) {
+        pnv_dt_serial(d, args->fdt, args->offset);
+    } else if (object_dynamic_cast(OBJECT(dev), "isa-ipmi-bt")) {
+        pnv_dt_ipmi_bt(d, args->fdt, args->offset);
+    } else {
+        error_report("unknown isa device %s@i%x", qdev_fw_name(dev),
+                     d->ioport_id);
+    }
+
+    return 0;
+}
+
+/*
+ * The default LPC bus of a multichip system is on chip 0. It's
+ * recognized by the firmware (skiboot) using a "primary" property.
+ */
+static void pnv_dt_isa(PnvMachineState *pnv, void *fdt)
+{
+    int isa_offset = fdt_path_offset(fdt, pnv->chips[0]->dt_isa_nodename);
+    ForeachPopulateArgs args = {
+        .fdt = fdt,
+        .offset = isa_offset,
+    };
+    uint32_t phandle;
+
+    _FDT((fdt_setprop(fdt, isa_offset, "primary", NULL, 0)));
+
+    phandle = qemu_fdt_alloc_phandle(fdt);
+    assert(phandle > 0);
+    _FDT((fdt_setprop_cell(fdt, isa_offset, "phandle", phandle)));
+
+    /*
+     * ISA devices are not necessarily parented to the ISA bus so we
+     * can not use object_child_foreach()
+     */
+    qbus_walk_children(BUS(pnv->isa_bus), pnv_dt_isa_device, NULL, NULL, NULL,
+                       &args);
+}
+
+static void pnv_dt_power_mgt(PnvMachineState *pnv, void *fdt)
+{
+    int off;
+
+    off = fdt_add_subnode(fdt, 0, "ibm,opal");
+    off = fdt_add_subnode(fdt, off, "power-mgt");
+
+    _FDT(fdt_setprop_cell(fdt, off, "ibm,enabled-stop-levels", 0xc0000000));
+}
+
+static void *pnv_dt_create(MachineState *machine)
+{
+    PnvMachineClass *pmc = PNV_MACHINE_GET_CLASS(machine);
+    PnvMachineState *pnv = PNV_MACHINE(machine);
+    void *fdt;
+    char *buf;
+    int off;
+    int i;
+
+    fdt = g_malloc0(FDT_MAX_SIZE);
+    _FDT((fdt_create_empty_tree(fdt, FDT_MAX_SIZE)));
+
+    /* /qemu node */
+    _FDT((fdt_add_subnode(fdt, 0, "qemu")));
+
+    /* Root node */
+    _FDT((fdt_setprop_cell(fdt, 0, "#address-cells", 0x2)));
+    _FDT((fdt_setprop_cell(fdt, 0, "#size-cells", 0x2)));
+    _FDT((fdt_setprop_string(fdt, 0, "model",
+                             "IBM PowerNV (emulated by qemu)")));
+    _FDT((fdt_setprop(fdt, 0, "compatible", pmc->compat, pmc->compat_size)));
+
+    buf =  qemu_uuid_unparse_strdup(&qemu_uuid);
+    _FDT((fdt_setprop_string(fdt, 0, "vm,uuid", buf)));
+    if (qemu_uuid_set) {
+        _FDT((fdt_property_string(fdt, "system-id", buf)));
+    }
+    g_free(buf);
+
+    off = fdt_add_subnode(fdt, 0, "chosen");
+    if (machine->kernel_cmdline) {
+        _FDT((fdt_setprop_string(fdt, off, "bootargs",
+                                 machine->kernel_cmdline)));
+    }
+
+    if (pnv->initrd_size) {
+        uint32_t start_prop = cpu_to_be32(pnv->initrd_base);
+        uint32_t end_prop = cpu_to_be32(pnv->initrd_base + pnv->initrd_size);
+
+        _FDT((fdt_setprop(fdt, off, "linux,initrd-start",
+                               &start_prop, sizeof(start_prop))));
+        _FDT((fdt_setprop(fdt, off, "linux,initrd-end",
+                               &end_prop, sizeof(end_prop))));
+    }
+
+    /* Populate device tree for each chip */
+    for (i = 0; i < pnv->num_chips; i++) {
+        PNV_CHIP_GET_CLASS(pnv->chips[i])->dt_populate(pnv->chips[i], fdt);
+    }
+
+    /* Populate ISA devices on chip 0 */
+    pnv_dt_isa(pnv, fdt);
+
+    if (pnv->bmc) {
+        pnv_dt_bmc_sensors(pnv->bmc, fdt);
+    }
+
+    /* Create an extra node for power management on machines that support it */
+    if (pmc->dt_power_mgt) {
+        pmc->dt_power_mgt(pnv, fdt);
+    }
+
+    return fdt;
+}
+
+static void pnv_powerdown_notify(Notifier *n, void *opaque)
+{
+    PnvMachineState *pnv = container_of(n, PnvMachineState, powerdown_notifier);
+
+    if (pnv->bmc) {
+        pnv_bmc_powerdown(pnv->bmc);
+    }
+}
+
+static void pnv_reset(MachineState *machine)
+{
+    PnvMachineState *pnv = PNV_MACHINE(machine);
+    IPMIBmc *bmc;
+    void *fdt;
+
+    qemu_devices_reset();
+
+    /*
+     * The machine should provide by default an internal BMC simulator.
+     * If not, try to use the BMC device that was provided on the command
+     * line.
+     */
+    bmc = pnv_bmc_find(&error_fatal);
+    if (!pnv->bmc) {
+        if (!bmc) {
+            if (!qtest_enabled()) {
+                warn_report("machine has no BMC device. Use '-device "
+                            "ipmi-bmc-sim,id=bmc0 -device isa-ipmi-bt,bmc=bmc0,irq=10' "
+                            "to define one");
+            }
+        } else {
+            pnv_bmc_set_pnor(bmc, pnv->pnor);
+            pnv->bmc = bmc;
+        }
+    }
+
+    fdt = pnv_dt_create(machine);
+
+    /* Pack resulting tree */
+    _FDT((fdt_pack(fdt)));
+
+    qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt));
+    cpu_physical_memory_write(PNV_FDT_ADDR, fdt, fdt_totalsize(fdt));
+
+    g_free(fdt);
+}
+
+static ISABus *pnv_chip_power8_isa_create(PnvChip *chip, Error **errp)
+{
+    Pnv8Chip *chip8 = PNV8_CHIP(chip);
+    return pnv_lpc_isa_create(&chip8->lpc, true, errp);
+}
+
+static ISABus *pnv_chip_power8nvl_isa_create(PnvChip *chip, Error **errp)
+{
+    Pnv8Chip *chip8 = PNV8_CHIP(chip);
+    return pnv_lpc_isa_create(&chip8->lpc, false, errp);
+}
+
+static ISABus *pnv_chip_power9_isa_create(PnvChip *chip, Error **errp)
+{
+    Pnv9Chip *chip9 = PNV9_CHIP(chip);
+    return pnv_lpc_isa_create(&chip9->lpc, false, errp);
+}
+
+static ISABus *pnv_chip_power10_isa_create(PnvChip *chip, Error **errp)
+{
+    Pnv10Chip *chip10 = PNV10_CHIP(chip);
+    return pnv_lpc_isa_create(&chip10->lpc, false, errp);
+}
+
+static ISABus *pnv_isa_create(PnvChip *chip, Error **errp)
+{
+    return PNV_CHIP_GET_CLASS(chip)->isa_create(chip, errp);
+}
+
+static void pnv_chip_power8_pic_print_info(PnvChip *chip, Monitor *mon)
+{
+    Pnv8Chip *chip8 = PNV8_CHIP(chip);
+    int i;
+
+    ics_pic_print_info(&chip8->psi.ics, mon);
+    for (i = 0; i < chip->num_phbs; i++) {
+        pnv_phb3_msi_pic_print_info(&chip8->phbs[i].msis, mon);
+        ics_pic_print_info(&chip8->phbs[i].lsis, mon);
+    }
+}
+
+static void pnv_chip_power9_pic_print_info(PnvChip *chip, Monitor *mon)
+{
+    Pnv9Chip *chip9 = PNV9_CHIP(chip);
+    int i, j;
+
+    pnv_xive_pic_print_info(&chip9->xive, mon);
+    pnv_psi_pic_print_info(&chip9->psi, mon);
+
+    for (i = 0; i < PNV9_CHIP_MAX_PEC; i++) {
+        PnvPhb4PecState *pec = &chip9->pecs[i];
+        for (j = 0; j < pec->num_stacks; j++) {
+            pnv_phb4_pic_print_info(&pec->stacks[j].phb, mon);
+        }
+    }
+}
+
+static uint64_t pnv_chip_power8_xscom_core_base(PnvChip *chip,
+                                                uint32_t core_id)
+{
+    return PNV_XSCOM_EX_BASE(core_id);
+}
+
+static uint64_t pnv_chip_power9_xscom_core_base(PnvChip *chip,
+                                                uint32_t core_id)
+{
+    return PNV9_XSCOM_EC_BASE(core_id);
+}
+
+static uint64_t pnv_chip_power10_xscom_core_base(PnvChip *chip,
+                                                 uint32_t core_id)
+{
+    return PNV10_XSCOM_EC_BASE(core_id);
+}
+
+static bool pnv_match_cpu(const char *default_type, const char *cpu_type)
+{
+    PowerPCCPUClass *ppc_default =
+        POWERPC_CPU_CLASS(object_class_by_name(default_type));
+    PowerPCCPUClass *ppc =
+        POWERPC_CPU_CLASS(object_class_by_name(cpu_type));
+
+    return ppc_default->pvr_match(ppc_default, ppc->pvr);
+}
+
+static void pnv_ipmi_bt_init(ISABus *bus, IPMIBmc *bmc, uint32_t irq)
+{
+    ISADevice *dev = isa_new("isa-ipmi-bt");
+
+    object_property_set_link(OBJECT(dev), "bmc", OBJECT(bmc), &error_fatal);
+    object_property_set_int(OBJECT(dev), "irq", irq, &error_fatal);
+    isa_realize_and_unref(dev, bus, &error_fatal);
+}
+
+static void pnv_chip_power10_pic_print_info(PnvChip *chip, Monitor *mon)
+{
+    Pnv10Chip *chip10 = PNV10_CHIP(chip);
+
+    pnv_psi_pic_print_info(&chip10->psi, mon);
+}
+
+/* Always give the first 1GB to chip 0 else we won't boot */
+static uint64_t pnv_chip_get_ram_size(PnvMachineState *pnv, int chip_id)
+{
+    MachineState *machine = MACHINE(pnv);
+    uint64_t ram_per_chip;
+
+    assert(machine->ram_size >= 1 * GiB);
+
+    ram_per_chip = machine->ram_size / pnv->num_chips;
+    if (ram_per_chip >= 1 * GiB) {
+        return QEMU_ALIGN_DOWN(ram_per_chip, 1 * MiB);
+    }
+
+    assert(pnv->num_chips > 1);
+
+    ram_per_chip = (machine->ram_size - 1 * GiB) / (pnv->num_chips - 1);
+    return chip_id == 0 ? 1 * GiB : QEMU_ALIGN_DOWN(ram_per_chip, 1 * MiB);
+}
+
+static void pnv_init(MachineState *machine)
+{
+    const char *bios_name = machine->firmware ?: FW_FILE_NAME;
+    PnvMachineState *pnv = PNV_MACHINE(machine);
+    MachineClass *mc = MACHINE_GET_CLASS(machine);
+    char *fw_filename;
+    long fw_size;
+    uint64_t chip_ram_start = 0;
+    int i;
+    char *chip_typename;
+    DriveInfo *pnor = drive_get(IF_MTD, 0, 0);
+    DeviceState *dev;
+
+    /* allocate RAM */
+    if (machine->ram_size < mc->default_ram_size) {
+        char *sz = size_to_str(mc->default_ram_size);
+        error_report("Invalid RAM size, should be bigger than %s", sz);
+        g_free(sz);
+        exit(EXIT_FAILURE);
+    }
+    memory_region_add_subregion(get_system_memory(), 0, machine->ram);
+
+    /*
+     * Create our simple PNOR device
+     */
+    dev = qdev_new(TYPE_PNV_PNOR);
+    if (pnor) {
+        qdev_prop_set_drive(dev, "drive", blk_by_legacy_dinfo(pnor));
+    }
+    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+    pnv->pnor = PNV_PNOR(dev);
+
+    /* load skiboot firmware  */
+    fw_filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
+    if (!fw_filename) {
+        error_report("Could not find OPAL firmware '%s'", bios_name);
+        exit(1);
+    }
+
+    fw_size = load_image_targphys(fw_filename, pnv->fw_load_addr, FW_MAX_SIZE);
+    if (fw_size < 0) {
+        error_report("Could not load OPAL firmware '%s'", fw_filename);
+        exit(1);
+    }
+    g_free(fw_filename);
+
+    /* load kernel */
+    if (machine->kernel_filename) {
+        long kernel_size;
+
+        kernel_size = load_image_targphys(machine->kernel_filename,
+                                          KERNEL_LOAD_ADDR, KERNEL_MAX_SIZE);
+        if (kernel_size < 0) {
+            error_report("Could not load kernel '%s'",
+                         machine->kernel_filename);
+            exit(1);
+        }
+    }
+
+    /* load initrd */
+    if (machine->initrd_filename) {
+        pnv->initrd_base = INITRD_LOAD_ADDR;
+        pnv->initrd_size = load_image_targphys(machine->initrd_filename,
+                                  pnv->initrd_base, INITRD_MAX_SIZE);
+        if (pnv->initrd_size < 0) {
+            error_report("Could not load initial ram disk '%s'",
+                         machine->initrd_filename);
+            exit(1);
+        }
+    }
+
+    /* MSIs are supported on this platform */
+    msi_nonbroken = true;
+
+    /*
+     * Check compatibility of the specified CPU with the machine
+     * default.
+     */
+    if (!pnv_match_cpu(mc->default_cpu_type, machine->cpu_type)) {
+        error_report("invalid CPU model '%s' for %s machine",
+                     machine->cpu_type, mc->name);
+        exit(1);
+    }
+
+    /* Create the processor chips */
+    i = strlen(machine->cpu_type) - strlen(POWERPC_CPU_TYPE_SUFFIX);
+    chip_typename = g_strdup_printf(PNV_CHIP_TYPE_NAME("%.*s"),
+                                    i, machine->cpu_type);
+    if (!object_class_by_name(chip_typename)) {
+        error_report("invalid chip model '%.*s' for %s machine",
+                     i, machine->cpu_type, mc->name);
+        exit(1);
+    }
+
+    pnv->num_chips =
+        machine->smp.max_cpus / (machine->smp.cores * machine->smp.threads);
+    /*
+     * TODO: should we decide on how many chips we can create based
+     * on #cores and Venice vs. Murano vs. Naples chip type etc...,
+     */
+    if (!is_power_of_2(pnv->num_chips) || pnv->num_chips > 16) {
+        error_report("invalid number of chips: '%d'", pnv->num_chips);
+        error_printf(
+            "Try '-smp sockets=N'. Valid values are : 1, 2, 4, 8 and 16.\n");
+        exit(1);
+    }
+
+    pnv->chips = g_new0(PnvChip *, pnv->num_chips);
+    for (i = 0; i < pnv->num_chips; i++) {
+        char chip_name[32];
+        Object *chip = OBJECT(qdev_new(chip_typename));
+        uint64_t chip_ram_size =  pnv_chip_get_ram_size(pnv, i);
+
+        pnv->chips[i] = PNV_CHIP(chip);
+
+        /* Distribute RAM among the chips  */
+        object_property_set_int(chip, "ram-start", chip_ram_start,
+                                &error_fatal);
+        object_property_set_int(chip, "ram-size", chip_ram_size,
+                                &error_fatal);
+        chip_ram_start += chip_ram_size;
+
+        snprintf(chip_name, sizeof(chip_name), "chip[%d]", i);
+        object_property_add_child(OBJECT(pnv), chip_name, chip);
+        object_property_set_int(chip, "chip-id", i, &error_fatal);
+        object_property_set_int(chip, "nr-cores", machine->smp.cores,
+                                &error_fatal);
+        object_property_set_int(chip, "nr-threads", machine->smp.threads,
+                                &error_fatal);
+        /*
+         * The POWER8 machine use the XICS interrupt interface.
+         * Propagate the XICS fabric to the chip and its controllers.
+         */
+        if (object_dynamic_cast(OBJECT(pnv), TYPE_XICS_FABRIC)) {
+            object_property_set_link(chip, "xics", OBJECT(pnv), &error_abort);
+        }
+        if (object_dynamic_cast(OBJECT(pnv), TYPE_XIVE_FABRIC)) {
+            object_property_set_link(chip, "xive-fabric", OBJECT(pnv),
+                                     &error_abort);
+        }
+        sysbus_realize_and_unref(SYS_BUS_DEVICE(chip), &error_fatal);
+    }
+    g_free(chip_typename);
+
+    /* Instantiate ISA bus on chip 0 */
+    pnv->isa_bus = pnv_isa_create(pnv->chips[0], &error_fatal);
+
+    /* Create serial port */
+    serial_hds_isa_init(pnv->isa_bus, 0, MAX_ISA_SERIAL_PORTS);
+
+    /* Create an RTC ISA device too */
+    mc146818_rtc_init(pnv->isa_bus, 2000, NULL);
+
+    /*
+     * Create the machine BMC simulator and the IPMI BT device for
+     * communication with the BMC
+     */
+    if (defaults_enabled()) {
+        pnv->bmc = pnv_bmc_create(pnv->pnor);
+        pnv_ipmi_bt_init(pnv->isa_bus, pnv->bmc, 10);
+    }
+
+    /*
+     * The PNOR is mapped on the LPC FW address space by the BMC.
+     * Since we can not reach the remote BMC machine with LPC memops,
+     * map it always for now.
+     */
+    memory_region_add_subregion(pnv->chips[0]->fw_mr, PNOR_SPI_OFFSET,
+                                &pnv->pnor->mmio);
+
+    /*
+     * OpenPOWER systems use a IPMI SEL Event message to notify the
+     * host to powerdown
+     */
+    pnv->powerdown_notifier.notify = pnv_powerdown_notify;
+    qemu_register_powerdown_notifier(&pnv->powerdown_notifier);
+}
+
+/*
+ *    0:21  Reserved - Read as zeros
+ *   22:24  Chip ID
+ *   25:28  Core number
+ *   29:31  Thread ID
+ */
+static uint32_t pnv_chip_core_pir_p8(PnvChip *chip, uint32_t core_id)
+{
+    return (chip->chip_id << 7) | (core_id << 3);
+}
+
+static void pnv_chip_power8_intc_create(PnvChip *chip, PowerPCCPU *cpu,
+                                        Error **errp)
+{
+    Pnv8Chip *chip8 = PNV8_CHIP(chip);
+    Error *local_err = NULL;
+    Object *obj;
+    PnvCPUState *pnv_cpu = pnv_cpu_state(cpu);
+
+    obj = icp_create(OBJECT(cpu), TYPE_PNV_ICP, chip8->xics, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    pnv_cpu->intc = obj;
+}
+
+
+static void pnv_chip_power8_intc_reset(PnvChip *chip, PowerPCCPU *cpu)
+{
+    PnvCPUState *pnv_cpu = pnv_cpu_state(cpu);
+
+    icp_reset(ICP(pnv_cpu->intc));
+}
+
+static void pnv_chip_power8_intc_destroy(PnvChip *chip, PowerPCCPU *cpu)
+{
+    PnvCPUState *pnv_cpu = pnv_cpu_state(cpu);
+
+    icp_destroy(ICP(pnv_cpu->intc));
+    pnv_cpu->intc = NULL;
+}
+
+static void pnv_chip_power8_intc_print_info(PnvChip *chip, PowerPCCPU *cpu,
+                                            Monitor *mon)
+{
+    icp_pic_print_info(ICP(pnv_cpu_state(cpu)->intc), mon);
+}
+
+/*
+ *    0:48  Reserved - Read as zeroes
+ *   49:52  Node ID
+ *   53:55  Chip ID
+ *   56     Reserved - Read as zero
+ *   57:61  Core number
+ *   62:63  Thread ID
+ *
+ * We only care about the lower bits. uint32_t is fine for the moment.
+ */
+static uint32_t pnv_chip_core_pir_p9(PnvChip *chip, uint32_t core_id)
+{
+    return (chip->chip_id << 8) | (core_id << 2);
+}
+
+static uint32_t pnv_chip_core_pir_p10(PnvChip *chip, uint32_t core_id)
+{
+    return (chip->chip_id << 8) | (core_id << 2);
+}
+
+static void pnv_chip_power9_intc_create(PnvChip *chip, PowerPCCPU *cpu,
+                                        Error **errp)
+{
+    Pnv9Chip *chip9 = PNV9_CHIP(chip);
+    Error *local_err = NULL;
+    Object *obj;
+    PnvCPUState *pnv_cpu = pnv_cpu_state(cpu);
+
+    /*
+     * The core creates its interrupt presenter but the XIVE interrupt
+     * controller object is initialized afterwards. Hopefully, it's
+     * only used at runtime.
+     */
+    obj = xive_tctx_create(OBJECT(cpu), XIVE_PRESENTER(&chip9->xive),
+                           &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    pnv_cpu->intc = obj;
+}
+
+static void pnv_chip_power9_intc_reset(PnvChip *chip, PowerPCCPU *cpu)
+{
+    PnvCPUState *pnv_cpu = pnv_cpu_state(cpu);
+
+    xive_tctx_reset(XIVE_TCTX(pnv_cpu->intc));
+}
+
+static void pnv_chip_power9_intc_destroy(PnvChip *chip, PowerPCCPU *cpu)
+{
+    PnvCPUState *pnv_cpu = pnv_cpu_state(cpu);
+
+    xive_tctx_destroy(XIVE_TCTX(pnv_cpu->intc));
+    pnv_cpu->intc = NULL;
+}
+
+static void pnv_chip_power9_intc_print_info(PnvChip *chip, PowerPCCPU *cpu,
+                                            Monitor *mon)
+{
+    xive_tctx_pic_print_info(XIVE_TCTX(pnv_cpu_state(cpu)->intc), mon);
+}
+
+static void pnv_chip_power10_intc_create(PnvChip *chip, PowerPCCPU *cpu,
+                                        Error **errp)
+{
+    PnvCPUState *pnv_cpu = pnv_cpu_state(cpu);
+
+    /* Will be defined when the interrupt controller is */
+    pnv_cpu->intc = NULL;
+}
+
+static void pnv_chip_power10_intc_reset(PnvChip *chip, PowerPCCPU *cpu)
+{
+    ;
+}
+
+static void pnv_chip_power10_intc_destroy(PnvChip *chip, PowerPCCPU *cpu)
+{
+    PnvCPUState *pnv_cpu = pnv_cpu_state(cpu);
+
+    pnv_cpu->intc = NULL;
+}
+
+static void pnv_chip_power10_intc_print_info(PnvChip *chip, PowerPCCPU *cpu,
+                                             Monitor *mon)
+{
+}
+
+/*
+ * Allowed core identifiers on a POWER8 Processor Chip :
+ *
+ * <EX0 reserved>
+ *  EX1  - Venice only
+ *  EX2  - Venice only
+ *  EX3  - Venice only
+ *  EX4
+ *  EX5
+ *  EX6
+ * <EX7,8 reserved> <reserved>
+ *  EX9  - Venice only
+ *  EX10 - Venice only
+ *  EX11 - Venice only
+ *  EX12
+ *  EX13
+ *  EX14
+ * <EX15 reserved>
+ */
+#define POWER8E_CORE_MASK  (0x7070ull)
+#define POWER8_CORE_MASK   (0x7e7eull)
+
+/*
+ * POWER9 has 24 cores, ids starting at 0x0
+ */
+#define POWER9_CORE_MASK   (0xffffffffffffffull)
+
+
+#define POWER10_CORE_MASK  (0xffffffffffffffull)
+
+static void pnv_chip_power8_instance_init(Object *obj)
+{
+    PnvChip *chip = PNV_CHIP(obj);
+    Pnv8Chip *chip8 = PNV8_CHIP(obj);
+    PnvChipClass *pcc = PNV_CHIP_GET_CLASS(obj);
+    int i;
+
+    object_property_add_link(obj, "xics", TYPE_XICS_FABRIC,
+                             (Object **)&chip8->xics,
+                             object_property_allow_set_link,
+                             OBJ_PROP_LINK_STRONG);
+
+    object_initialize_child(obj, "psi", &chip8->psi, TYPE_PNV8_PSI);
+
+    object_initialize_child(obj, "lpc", &chip8->lpc, TYPE_PNV8_LPC);
+
+    object_initialize_child(obj, "occ", &chip8->occ, TYPE_PNV8_OCC);
+
+    object_initialize_child(obj, "homer", &chip8->homer, TYPE_PNV8_HOMER);
+
+    for (i = 0; i < pcc->num_phbs; i++) {
+        object_initialize_child(obj, "phb[*]", &chip8->phbs[i], TYPE_PNV_PHB3);
+    }
+
+    /*
+     * Number of PHBs is the chip default
+     */
+    chip->num_phbs = pcc->num_phbs;
+}
+
+static void pnv_chip_icp_realize(Pnv8Chip *chip8, Error **errp)
+ {
+    PnvChip *chip = PNV_CHIP(chip8);
+    PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
+    int i, j;
+    char *name;
+
+    name = g_strdup_printf("icp-%x", chip->chip_id);
+    memory_region_init(&chip8->icp_mmio, OBJECT(chip), name, PNV_ICP_SIZE);
+    sysbus_init_mmio(SYS_BUS_DEVICE(chip), &chip8->icp_mmio);
+    g_free(name);
+
+    sysbus_mmio_map(SYS_BUS_DEVICE(chip), 1, PNV_ICP_BASE(chip));
+
+    /* Map the ICP registers for each thread */
+    for (i = 0; i < chip->nr_cores; i++) {
+        PnvCore *pnv_core = chip->cores[i];
+        int core_hwid = CPU_CORE(pnv_core)->core_id;
+
+        for (j = 0; j < CPU_CORE(pnv_core)->nr_threads; j++) {
+            uint32_t pir = pcc->core_pir(chip, core_hwid) + j;
+            PnvICPState *icp = PNV_ICP(xics_icp_get(chip8->xics, pir));
+
+            memory_region_add_subregion(&chip8->icp_mmio, pir << 12,
+                                        &icp->mmio);
+        }
+    }
+}
+
+static void pnv_chip_power8_realize(DeviceState *dev, Error **errp)
+{
+    PnvChipClass *pcc = PNV_CHIP_GET_CLASS(dev);
+    PnvChip *chip = PNV_CHIP(dev);
+    Pnv8Chip *chip8 = PNV8_CHIP(dev);
+    Pnv8Psi *psi8 = &chip8->psi;
+    Error *local_err = NULL;
+    int i;
+
+    assert(chip8->xics);
+
+    /* XSCOM bridge is first */
+    pnv_xscom_realize(chip, PNV_XSCOM_SIZE, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+    sysbus_mmio_map(SYS_BUS_DEVICE(chip), 0, PNV_XSCOM_BASE(chip));
+
+    pcc->parent_realize(dev, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    /* Processor Service Interface (PSI) Host Bridge */
+    object_property_set_int(OBJECT(&chip8->psi), "bar", PNV_PSIHB_BASE(chip),
+                            &error_fatal);
+    object_property_set_link(OBJECT(&chip8->psi), ICS_PROP_XICS,
+                             OBJECT(chip8->xics), &error_abort);
+    if (!qdev_realize(DEVICE(&chip8->psi), NULL, errp)) {
+        return;
+    }
+    pnv_xscom_add_subregion(chip, PNV_XSCOM_PSIHB_BASE,
+                            &PNV_PSI(psi8)->xscom_regs);
+
+    /* Create LPC controller */
+    object_property_set_link(OBJECT(&chip8->lpc), "psi", OBJECT(&chip8->psi),
+                             &error_abort);
+    qdev_realize(DEVICE(&chip8->lpc), NULL, &error_fatal);
+    pnv_xscom_add_subregion(chip, PNV_XSCOM_LPC_BASE, &chip8->lpc.xscom_regs);
+
+    chip->fw_mr = &chip8->lpc.isa_fw;
+    chip->dt_isa_nodename = g_strdup_printf("/xscom@%" PRIx64 "/isa@%x",
+                                            (uint64_t) PNV_XSCOM_BASE(chip),
+                                            PNV_XSCOM_LPC_BASE);
+
+    /*
+     * Interrupt Management Area. This is the memory region holding
+     * all the Interrupt Control Presenter (ICP) registers
+     */
+    pnv_chip_icp_realize(chip8, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    /* Create the simplified OCC model */
+    object_property_set_link(OBJECT(&chip8->occ), "psi", OBJECT(&chip8->psi),
+                             &error_abort);
+    if (!qdev_realize(DEVICE(&chip8->occ), NULL, errp)) {
+        return;
+    }
+    pnv_xscom_add_subregion(chip, PNV_XSCOM_OCC_BASE, &chip8->occ.xscom_regs);
+
+    /* OCC SRAM model */
+    memory_region_add_subregion(get_system_memory(), PNV_OCC_SENSOR_BASE(chip),
+                                &chip8->occ.sram_regs);
+
+    /* HOMER */
+    object_property_set_link(OBJECT(&chip8->homer), "chip", OBJECT(chip),
+                             &error_abort);
+    if (!qdev_realize(DEVICE(&chip8->homer), NULL, errp)) {
+        return;
+    }
+    /* Homer Xscom region */
+    pnv_xscom_add_subregion(chip, PNV_XSCOM_PBA_BASE, &chip8->homer.pba_regs);
+
+    /* Homer mmio region */
+    memory_region_add_subregion(get_system_memory(), PNV_HOMER_BASE(chip),
+                                &chip8->homer.regs);
+
+    /* PHB3 controllers */
+    for (i = 0; i < chip->num_phbs; i++) {
+        PnvPHB3 *phb = &chip8->phbs[i];
+        PnvPBCQState *pbcq = &phb->pbcq;
+
+        object_property_set_int(OBJECT(phb), "index", i, &error_fatal);
+        object_property_set_int(OBJECT(phb), "chip-id", chip->chip_id,
+                                &error_fatal);
+        if (!sysbus_realize(SYS_BUS_DEVICE(phb), errp)) {
+            return;
+        }
+
+        /* Populate the XSCOM address space. */
+        pnv_xscom_add_subregion(chip,
+                                PNV_XSCOM_PBCQ_NEST_BASE + 0x400 * phb->phb_id,
+                                &pbcq->xscom_nest_regs);
+        pnv_xscom_add_subregion(chip,
+                                PNV_XSCOM_PBCQ_PCI_BASE + 0x400 * phb->phb_id,
+                                &pbcq->xscom_pci_regs);
+        pnv_xscom_add_subregion(chip,
+                                PNV_XSCOM_PBCQ_SPCI_BASE + 0x040 * phb->phb_id,
+                                &pbcq->xscom_spci_regs);
+    }
+}
+
+static uint32_t pnv_chip_power8_xscom_pcba(PnvChip *chip, uint64_t addr)
+{
+    addr &= (PNV_XSCOM_SIZE - 1);
+    return ((addr >> 4) & ~0xfull) | ((addr >> 3) & 0xf);
+}
+
+static void pnv_chip_power8e_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PnvChipClass *k = PNV_CHIP_CLASS(klass);
+
+    k->chip_cfam_id = 0x221ef04980000000ull;  /* P8 Murano DD2.1 */
+    k->cores_mask = POWER8E_CORE_MASK;
+    k->num_phbs = 3;
+    k->core_pir = pnv_chip_core_pir_p8;
+    k->intc_create = pnv_chip_power8_intc_create;
+    k->intc_reset = pnv_chip_power8_intc_reset;
+    k->intc_destroy = pnv_chip_power8_intc_destroy;
+    k->intc_print_info = pnv_chip_power8_intc_print_info;
+    k->isa_create = pnv_chip_power8_isa_create;
+    k->dt_populate = pnv_chip_power8_dt_populate;
+    k->pic_print_info = pnv_chip_power8_pic_print_info;
+    k->xscom_core_base = pnv_chip_power8_xscom_core_base;
+    k->xscom_pcba = pnv_chip_power8_xscom_pcba;
+    dc->desc = "PowerNV Chip POWER8E";
+
+    device_class_set_parent_realize(dc, pnv_chip_power8_realize,
+                                    &k->parent_realize);
+}
+
+static void pnv_chip_power8_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PnvChipClass *k = PNV_CHIP_CLASS(klass);
+
+    k->chip_cfam_id = 0x220ea04980000000ull; /* P8 Venice DD2.0 */
+    k->cores_mask = POWER8_CORE_MASK;
+    k->num_phbs = 3;
+    k->core_pir = pnv_chip_core_pir_p8;
+    k->intc_create = pnv_chip_power8_intc_create;
+    k->intc_reset = pnv_chip_power8_intc_reset;
+    k->intc_destroy = pnv_chip_power8_intc_destroy;
+    k->intc_print_info = pnv_chip_power8_intc_print_info;
+    k->isa_create = pnv_chip_power8_isa_create;
+    k->dt_populate = pnv_chip_power8_dt_populate;
+    k->pic_print_info = pnv_chip_power8_pic_print_info;
+    k->xscom_core_base = pnv_chip_power8_xscom_core_base;
+    k->xscom_pcba = pnv_chip_power8_xscom_pcba;
+    dc->desc = "PowerNV Chip POWER8";
+
+    device_class_set_parent_realize(dc, pnv_chip_power8_realize,
+                                    &k->parent_realize);
+}
+
+static void pnv_chip_power8nvl_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PnvChipClass *k = PNV_CHIP_CLASS(klass);
+
+    k->chip_cfam_id = 0x120d304980000000ull;  /* P8 Naples DD1.0 */
+    k->cores_mask = POWER8_CORE_MASK;
+    k->num_phbs = 3;
+    k->core_pir = pnv_chip_core_pir_p8;
+    k->intc_create = pnv_chip_power8_intc_create;
+    k->intc_reset = pnv_chip_power8_intc_reset;
+    k->intc_destroy = pnv_chip_power8_intc_destroy;
+    k->intc_print_info = pnv_chip_power8_intc_print_info;
+    k->isa_create = pnv_chip_power8nvl_isa_create;
+    k->dt_populate = pnv_chip_power8_dt_populate;
+    k->pic_print_info = pnv_chip_power8_pic_print_info;
+    k->xscom_core_base = pnv_chip_power8_xscom_core_base;
+    k->xscom_pcba = pnv_chip_power8_xscom_pcba;
+    dc->desc = "PowerNV Chip POWER8NVL";
+
+    device_class_set_parent_realize(dc, pnv_chip_power8_realize,
+                                    &k->parent_realize);
+}
+
+static void pnv_chip_power9_instance_init(Object *obj)
+{
+    PnvChip *chip = PNV_CHIP(obj);
+    Pnv9Chip *chip9 = PNV9_CHIP(obj);
+    PnvChipClass *pcc = PNV_CHIP_GET_CLASS(obj);
+    int i;
+
+    object_initialize_child(obj, "xive", &chip9->xive, TYPE_PNV_XIVE);
+    object_property_add_alias(obj, "xive-fabric", OBJECT(&chip9->xive),
+                              "xive-fabric");
+
+    object_initialize_child(obj, "psi", &chip9->psi, TYPE_PNV9_PSI);
+
+    object_initialize_child(obj, "lpc", &chip9->lpc, TYPE_PNV9_LPC);
+
+    object_initialize_child(obj, "occ", &chip9->occ, TYPE_PNV9_OCC);
+
+    object_initialize_child(obj, "homer", &chip9->homer, TYPE_PNV9_HOMER);
+
+    for (i = 0; i < PNV9_CHIP_MAX_PEC; i++) {
+        object_initialize_child(obj, "pec[*]", &chip9->pecs[i],
+                                TYPE_PNV_PHB4_PEC);
+    }
+
+    /*
+     * Number of PHBs is the chip default
+     */
+    chip->num_phbs = pcc->num_phbs;
+}
+
+static void pnv_chip_quad_realize(Pnv9Chip *chip9, Error **errp)
+{
+    PnvChip *chip = PNV_CHIP(chip9);
+    int i;
+
+    chip9->nr_quads = DIV_ROUND_UP(chip->nr_cores, 4);
+    chip9->quads = g_new0(PnvQuad, chip9->nr_quads);
+
+    for (i = 0; i < chip9->nr_quads; i++) {
+        char eq_name[32];
+        PnvQuad *eq = &chip9->quads[i];
+        PnvCore *pnv_core = chip->cores[i * 4];
+        int core_id = CPU_CORE(pnv_core)->core_id;
+
+        snprintf(eq_name, sizeof(eq_name), "eq[%d]", core_id);
+        object_initialize_child_with_props(OBJECT(chip), eq_name, eq,
+                                           sizeof(*eq), TYPE_PNV_QUAD,
+                                           &error_fatal, NULL);
+
+        object_property_set_int(OBJECT(eq), "quad-id", core_id, &error_fatal);
+        qdev_realize(DEVICE(eq), NULL, &error_fatal);
+
+        pnv_xscom_add_subregion(chip, PNV9_XSCOM_EQ_BASE(eq->quad_id),
+                                &eq->xscom_regs);
+    }
+}
+
+static void pnv_chip_power9_phb_realize(PnvChip *chip, Error **errp)
+{
+    Pnv9Chip *chip9 = PNV9_CHIP(chip);
+    int i, j;
+    int phb_id = 0;
+
+    for (i = 0; i < PNV9_CHIP_MAX_PEC; i++) {
+        PnvPhb4PecState *pec = &chip9->pecs[i];
+        PnvPhb4PecClass *pecc = PNV_PHB4_PEC_GET_CLASS(pec);
+        uint32_t pec_nest_base;
+        uint32_t pec_pci_base;
+
+        object_property_set_int(OBJECT(pec), "index", i, &error_fatal);
+        /*
+         * PEC0 -> 1 stack
+         * PEC1 -> 2 stacks
+         * PEC2 -> 3 stacks
+         */
+        object_property_set_int(OBJECT(pec), "num-stacks", i + 1,
+                                &error_fatal);
+        object_property_set_int(OBJECT(pec), "chip-id", chip->chip_id,
+                                &error_fatal);
+        object_property_set_link(OBJECT(pec), "system-memory",
+                                 OBJECT(get_system_memory()), &error_abort);
+        if (!qdev_realize(DEVICE(pec), NULL, errp)) {
+            return;
+        }
+
+        pec_nest_base = pecc->xscom_nest_base(pec);
+        pec_pci_base = pecc->xscom_pci_base(pec);
+
+        pnv_xscom_add_subregion(chip, pec_nest_base, &pec->nest_regs_mr);
+        pnv_xscom_add_subregion(chip, pec_pci_base, &pec->pci_regs_mr);
+
+        for (j = 0; j < pec->num_stacks && phb_id < chip->num_phbs;
+             j++, phb_id++) {
+            PnvPhb4PecStack *stack = &pec->stacks[j];
+            Object *obj = OBJECT(&stack->phb);
+
+            object_property_set_int(obj, "index", phb_id, &error_fatal);
+            object_property_set_int(obj, "chip-id", chip->chip_id,
+                                    &error_fatal);
+            object_property_set_int(obj, "version", PNV_PHB4_VERSION,
+                                    &error_fatal);
+            object_property_set_int(obj, "device-id", PNV_PHB4_DEVICE_ID,
+                                    &error_fatal);
+            object_property_set_link(obj, "stack", OBJECT(stack),
+                                     &error_abort);
+            if (!sysbus_realize(SYS_BUS_DEVICE(obj), errp)) {
+                return;
+            }
+
+            /* Populate the XSCOM address space. */
+            pnv_xscom_add_subregion(chip,
+                                   pec_nest_base + 0x40 * (stack->stack_no + 1),
+                                   &stack->nest_regs_mr);
+            pnv_xscom_add_subregion(chip,
+                                    pec_pci_base + 0x40 * (stack->stack_no + 1),
+                                    &stack->pci_regs_mr);
+            pnv_xscom_add_subregion(chip,
+                                    pec_pci_base + PNV9_XSCOM_PEC_PCI_STK0 +
+                                    0x40 * stack->stack_no,
+                                    &stack->phb_regs_mr);
+        }
+    }
+}
+
+static void pnv_chip_power9_realize(DeviceState *dev, Error **errp)
+{
+    PnvChipClass *pcc = PNV_CHIP_GET_CLASS(dev);
+    Pnv9Chip *chip9 = PNV9_CHIP(dev);
+    PnvChip *chip = PNV_CHIP(dev);
+    Pnv9Psi *psi9 = &chip9->psi;
+    Error *local_err = NULL;
+
+    /* XSCOM bridge is first */
+    pnv_xscom_realize(chip, PNV9_XSCOM_SIZE, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+    sysbus_mmio_map(SYS_BUS_DEVICE(chip), 0, PNV9_XSCOM_BASE(chip));
+
+    pcc->parent_realize(dev, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    pnv_chip_quad_realize(chip9, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    /* XIVE interrupt controller (POWER9) */
+    object_property_set_int(OBJECT(&chip9->xive), "ic-bar",
+                            PNV9_XIVE_IC_BASE(chip), &error_fatal);
+    object_property_set_int(OBJECT(&chip9->xive), "vc-bar",
+                            PNV9_XIVE_VC_BASE(chip), &error_fatal);
+    object_property_set_int(OBJECT(&chip9->xive), "pc-bar",
+                            PNV9_XIVE_PC_BASE(chip), &error_fatal);
+    object_property_set_int(OBJECT(&chip9->xive), "tm-bar",
+                            PNV9_XIVE_TM_BASE(chip), &error_fatal);
+    object_property_set_link(OBJECT(&chip9->xive), "chip", OBJECT(chip),
+                             &error_abort);
+    if (!sysbus_realize(SYS_BUS_DEVICE(&chip9->xive), errp)) {
+        return;
+    }
+    pnv_xscom_add_subregion(chip, PNV9_XSCOM_XIVE_BASE,
+                            &chip9->xive.xscom_regs);
+
+    /* Processor Service Interface (PSI) Host Bridge */
+    object_property_set_int(OBJECT(&chip9->psi), "bar", PNV9_PSIHB_BASE(chip),
+                            &error_fatal);
+    if (!qdev_realize(DEVICE(&chip9->psi), NULL, errp)) {
+        return;
+    }
+    pnv_xscom_add_subregion(chip, PNV9_XSCOM_PSIHB_BASE,
+                            &PNV_PSI(psi9)->xscom_regs);
+
+    /* LPC */
+    object_property_set_link(OBJECT(&chip9->lpc), "psi", OBJECT(&chip9->psi),
+                             &error_abort);
+    if (!qdev_realize(DEVICE(&chip9->lpc), NULL, errp)) {
+        return;
+    }
+    memory_region_add_subregion(get_system_memory(), PNV9_LPCM_BASE(chip),
+                                &chip9->lpc.xscom_regs);
+
+    chip->fw_mr = &chip9->lpc.isa_fw;
+    chip->dt_isa_nodename = g_strdup_printf("/lpcm-opb@%" PRIx64 "/lpc@0",
+                                            (uint64_t) PNV9_LPCM_BASE(chip));
+
+    /* Create the simplified OCC model */
+    object_property_set_link(OBJECT(&chip9->occ), "psi", OBJECT(&chip9->psi),
+                             &error_abort);
+    if (!qdev_realize(DEVICE(&chip9->occ), NULL, errp)) {
+        return;
+    }
+    pnv_xscom_add_subregion(chip, PNV9_XSCOM_OCC_BASE, &chip9->occ.xscom_regs);
+
+    /* OCC SRAM model */
+    memory_region_add_subregion(get_system_memory(), PNV9_OCC_SENSOR_BASE(chip),
+                                &chip9->occ.sram_regs);
+
+    /* HOMER */
+    object_property_set_link(OBJECT(&chip9->homer), "chip", OBJECT(chip),
+                             &error_abort);
+    if (!qdev_realize(DEVICE(&chip9->homer), NULL, errp)) {
+        return;
+    }
+    /* Homer Xscom region */
+    pnv_xscom_add_subregion(chip, PNV9_XSCOM_PBA_BASE, &chip9->homer.pba_regs);
+
+    /* Homer mmio region */
+    memory_region_add_subregion(get_system_memory(), PNV9_HOMER_BASE(chip),
+                                &chip9->homer.regs);
+
+    /* PHBs */
+    pnv_chip_power9_phb_realize(chip, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+}
+
+static uint32_t pnv_chip_power9_xscom_pcba(PnvChip *chip, uint64_t addr)
+{
+    addr &= (PNV9_XSCOM_SIZE - 1);
+    return addr >> 3;
+}
+
+static void pnv_chip_power9_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PnvChipClass *k = PNV_CHIP_CLASS(klass);
+
+    k->chip_cfam_id = 0x220d104900008000ull; /* P9 Nimbus DD2.0 */
+    k->cores_mask = POWER9_CORE_MASK;
+    k->core_pir = pnv_chip_core_pir_p9;
+    k->intc_create = pnv_chip_power9_intc_create;
+    k->intc_reset = pnv_chip_power9_intc_reset;
+    k->intc_destroy = pnv_chip_power9_intc_destroy;
+    k->intc_print_info = pnv_chip_power9_intc_print_info;
+    k->isa_create = pnv_chip_power9_isa_create;
+    k->dt_populate = pnv_chip_power9_dt_populate;
+    k->pic_print_info = pnv_chip_power9_pic_print_info;
+    k->xscom_core_base = pnv_chip_power9_xscom_core_base;
+    k->xscom_pcba = pnv_chip_power9_xscom_pcba;
+    dc->desc = "PowerNV Chip POWER9";
+    k->num_phbs = 6;
+
+    device_class_set_parent_realize(dc, pnv_chip_power9_realize,
+                                    &k->parent_realize);
+}
+
+static void pnv_chip_power10_instance_init(Object *obj)
+{
+    Pnv10Chip *chip10 = PNV10_CHIP(obj);
+
+    object_initialize_child(obj, "psi", &chip10->psi, TYPE_PNV10_PSI);
+    object_initialize_child(obj, "lpc", &chip10->lpc, TYPE_PNV10_LPC);
+}
+
+static void pnv_chip_power10_realize(DeviceState *dev, Error **errp)
+{
+    PnvChipClass *pcc = PNV_CHIP_GET_CLASS(dev);
+    PnvChip *chip = PNV_CHIP(dev);
+    Pnv10Chip *chip10 = PNV10_CHIP(dev);
+    Error *local_err = NULL;
+
+    /* XSCOM bridge is first */
+    pnv_xscom_realize(chip, PNV10_XSCOM_SIZE, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+    sysbus_mmio_map(SYS_BUS_DEVICE(chip), 0, PNV10_XSCOM_BASE(chip));
+
+    pcc->parent_realize(dev, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    /* Processor Service Interface (PSI) Host Bridge */
+    object_property_set_int(OBJECT(&chip10->psi), "bar",
+                            PNV10_PSIHB_BASE(chip), &error_fatal);
+    if (!qdev_realize(DEVICE(&chip10->psi), NULL, errp)) {
+        return;
+    }
+    pnv_xscom_add_subregion(chip, PNV10_XSCOM_PSIHB_BASE,
+                            &PNV_PSI(&chip10->psi)->xscom_regs);
+
+    /* LPC */
+    object_property_set_link(OBJECT(&chip10->lpc), "psi",
+                             OBJECT(&chip10->psi), &error_abort);
+    if (!qdev_realize(DEVICE(&chip10->lpc), NULL, errp)) {
+        return;
+    }
+    memory_region_add_subregion(get_system_memory(), PNV10_LPCM_BASE(chip),
+                                &chip10->lpc.xscom_regs);
+
+    chip->fw_mr = &chip10->lpc.isa_fw;
+    chip->dt_isa_nodename = g_strdup_printf("/lpcm-opb@%" PRIx64 "/lpc@0",
+                                            (uint64_t) PNV10_LPCM_BASE(chip));
+}
+
+static uint32_t pnv_chip_power10_xscom_pcba(PnvChip *chip, uint64_t addr)
+{
+    addr &= (PNV10_XSCOM_SIZE - 1);
+    return addr >> 3;
+}
+
+static void pnv_chip_power10_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PnvChipClass *k = PNV_CHIP_CLASS(klass);
+
+    k->chip_cfam_id = 0x120da04900008000ull; /* P10 DD1.0 (with NX) */
+    k->cores_mask = POWER10_CORE_MASK;
+    k->core_pir = pnv_chip_core_pir_p10;
+    k->intc_create = pnv_chip_power10_intc_create;
+    k->intc_reset = pnv_chip_power10_intc_reset;
+    k->intc_destroy = pnv_chip_power10_intc_destroy;
+    k->intc_print_info = pnv_chip_power10_intc_print_info;
+    k->isa_create = pnv_chip_power10_isa_create;
+    k->dt_populate = pnv_chip_power10_dt_populate;
+    k->pic_print_info = pnv_chip_power10_pic_print_info;
+    k->xscom_core_base = pnv_chip_power10_xscom_core_base;
+    k->xscom_pcba = pnv_chip_power10_xscom_pcba;
+    dc->desc = "PowerNV Chip POWER10";
+
+    device_class_set_parent_realize(dc, pnv_chip_power10_realize,
+                                    &k->parent_realize);
+}
+
+static void pnv_chip_core_sanitize(PnvChip *chip, Error **errp)
+{
+    PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
+    int cores_max;
+
+    /*
+     * No custom mask for this chip, let's use the default one from *
+     * the chip class
+     */
+    if (!chip->cores_mask) {
+        chip->cores_mask = pcc->cores_mask;
+    }
+
+    /* filter alien core ids ! some are reserved */
+    if ((chip->cores_mask & pcc->cores_mask) != chip->cores_mask) {
+        error_setg(errp, "warning: invalid core mask for chip Ox%"PRIx64" !",
+                   chip->cores_mask);
+        return;
+    }
+    chip->cores_mask &= pcc->cores_mask;
+
+    /* now that we have a sane layout, let check the number of cores */
+    cores_max = ctpop64(chip->cores_mask);
+    if (chip->nr_cores > cores_max) {
+        error_setg(errp, "warning: too many cores for chip ! Limit is %d",
+                   cores_max);
+        return;
+    }
+}
+
+static void pnv_chip_core_realize(PnvChip *chip, Error **errp)
+{
+    Error *error = NULL;
+    PnvChipClass *pcc = PNV_CHIP_GET_CLASS(chip);
+    const char *typename = pnv_chip_core_typename(chip);
+    int i, core_hwid;
+    PnvMachineState *pnv = PNV_MACHINE(qdev_get_machine());
+
+    if (!object_class_by_name(typename)) {
+        error_setg(errp, "Unable to find PowerNV CPU Core '%s'", typename);
+        return;
+    }
+
+    /* Cores */
+    pnv_chip_core_sanitize(chip, &error);
+    if (error) {
+        error_propagate(errp, error);
+        return;
+    }
+
+    chip->cores = g_new0(PnvCore *, chip->nr_cores);
+
+    for (i = 0, core_hwid = 0; (core_hwid < sizeof(chip->cores_mask) * 8)
+             && (i < chip->nr_cores); core_hwid++) {
+        char core_name[32];
+        PnvCore *pnv_core;
+        uint64_t xscom_core_base;
+
+        if (!(chip->cores_mask & (1ull << core_hwid))) {
+            continue;
+        }
+
+        pnv_core = PNV_CORE(object_new(typename));
+
+        snprintf(core_name, sizeof(core_name), "core[%d]", core_hwid);
+        object_property_add_child(OBJECT(chip), core_name, OBJECT(pnv_core));
+        chip->cores[i] = pnv_core;
+        object_property_set_int(OBJECT(pnv_core), "nr-threads",
+                                chip->nr_threads, &error_fatal);
+        object_property_set_int(OBJECT(pnv_core), CPU_CORE_PROP_CORE_ID,
+                                core_hwid, &error_fatal);
+        object_property_set_int(OBJECT(pnv_core), "pir",
+                                pcc->core_pir(chip, core_hwid), &error_fatal);
+        object_property_set_int(OBJECT(pnv_core), "hrmor", pnv->fw_load_addr,
+                                &error_fatal);
+        object_property_set_link(OBJECT(pnv_core), "chip", OBJECT(chip),
+                                 &error_abort);
+        qdev_realize(DEVICE(pnv_core), NULL, &error_fatal);
+
+        /* Each core has an XSCOM MMIO region */
+        xscom_core_base = pcc->xscom_core_base(chip, core_hwid);
+
+        pnv_xscom_add_subregion(chip, xscom_core_base,
+                                &pnv_core->xscom_regs);
+        i++;
+    }
+}
+
+static void pnv_chip_realize(DeviceState *dev, Error **errp)
+{
+    PnvChip *chip = PNV_CHIP(dev);
+    Error *error = NULL;
+
+    /* Cores */
+    pnv_chip_core_realize(chip, &error);
+    if (error) {
+        error_propagate(errp, error);
+        return;
+    }
+}
+
+static Property pnv_chip_properties[] = {
+    DEFINE_PROP_UINT32("chip-id", PnvChip, chip_id, 0),
+    DEFINE_PROP_UINT64("ram-start", PnvChip, ram_start, 0),
+    DEFINE_PROP_UINT64("ram-size", PnvChip, ram_size, 0),
+    DEFINE_PROP_UINT32("nr-cores", PnvChip, nr_cores, 1),
+    DEFINE_PROP_UINT64("cores-mask", PnvChip, cores_mask, 0x0),
+    DEFINE_PROP_UINT32("nr-threads", PnvChip, nr_threads, 1),
+    DEFINE_PROP_UINT32("num-phbs", PnvChip, num_phbs, 0),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void pnv_chip_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    set_bit(DEVICE_CATEGORY_CPU, dc->categories);
+    dc->realize = pnv_chip_realize;
+    device_class_set_props(dc, pnv_chip_properties);
+    dc->desc = "PowerNV Chip";
+}
+
+PowerPCCPU *pnv_chip_find_cpu(PnvChip *chip, uint32_t pir)
+{
+    int i, j;
+
+    for (i = 0; i < chip->nr_cores; i++) {
+        PnvCore *pc = chip->cores[i];
+        CPUCore *cc = CPU_CORE(pc);
+
+        for (j = 0; j < cc->nr_threads; j++) {
+            if (ppc_cpu_pir(pc->threads[j]) == pir) {
+                return pc->threads[j];
+            }
+        }
+    }
+    return NULL;
+}
+
+static ICSState *pnv_ics_get(XICSFabric *xi, int irq)
+{
+    PnvMachineState *pnv = PNV_MACHINE(xi);
+    int i, j;
+
+    for (i = 0; i < pnv->num_chips; i++) {
+        PnvChip *chip = pnv->chips[i];
+        Pnv8Chip *chip8 = PNV8_CHIP(pnv->chips[i]);
+
+        if (ics_valid_irq(&chip8->psi.ics, irq)) {
+            return &chip8->psi.ics;
+        }
+        for (j = 0; j < chip->num_phbs; j++) {
+            if (ics_valid_irq(&chip8->phbs[j].lsis, irq)) {
+                return &chip8->phbs[j].lsis;
+            }
+            if (ics_valid_irq(ICS(&chip8->phbs[j].msis), irq)) {
+                return ICS(&chip8->phbs[j].msis);
+            }
+        }
+    }
+    return NULL;
+}
+
+static void pnv_ics_resend(XICSFabric *xi)
+{
+    PnvMachineState *pnv = PNV_MACHINE(xi);
+    int i, j;
+
+    for (i = 0; i < pnv->num_chips; i++) {
+        PnvChip *chip = pnv->chips[i];
+        Pnv8Chip *chip8 = PNV8_CHIP(pnv->chips[i]);
+
+        ics_resend(&chip8->psi.ics);
+        for (j = 0; j < chip->num_phbs; j++) {
+            ics_resend(&chip8->phbs[j].lsis);
+            ics_resend(ICS(&chip8->phbs[j].msis));
+        }
+    }
+}
+
+static ICPState *pnv_icp_get(XICSFabric *xi, int pir)
+{
+    PowerPCCPU *cpu = ppc_get_vcpu_by_pir(pir);
+
+    return cpu ? ICP(pnv_cpu_state(cpu)->intc) : NULL;
+}
+
+static void pnv_pic_print_info(InterruptStatsProvider *obj,
+                               Monitor *mon)
+{
+    PnvMachineState *pnv = PNV_MACHINE(obj);
+    int i;
+    CPUState *cs;
+
+    CPU_FOREACH(cs) {
+        PowerPCCPU *cpu = POWERPC_CPU(cs);
+
+        /* XXX: loop on each chip/core/thread instead of CPU_FOREACH() */
+        PNV_CHIP_GET_CLASS(pnv->chips[0])->intc_print_info(pnv->chips[0], cpu,
+                                                           mon);
+    }
+
+    for (i = 0; i < pnv->num_chips; i++) {
+        PNV_CHIP_GET_CLASS(pnv->chips[i])->pic_print_info(pnv->chips[i], mon);
+    }
+}
+
+static int pnv_match_nvt(XiveFabric *xfb, uint8_t format,
+                         uint8_t nvt_blk, uint32_t nvt_idx,
+                         bool cam_ignore, uint8_t priority,
+                         uint32_t logic_serv,
+                         XiveTCTXMatch *match)
+{
+    PnvMachineState *pnv = PNV_MACHINE(xfb);
+    int total_count = 0;
+    int i;
+
+    for (i = 0; i < pnv->num_chips; i++) {
+        Pnv9Chip *chip9 = PNV9_CHIP(pnv->chips[i]);
+        XivePresenter *xptr = XIVE_PRESENTER(&chip9->xive);
+        XivePresenterClass *xpc = XIVE_PRESENTER_GET_CLASS(xptr);
+        int count;
+
+        count = xpc->match_nvt(xptr, format, nvt_blk, nvt_idx, cam_ignore,
+                               priority, logic_serv, match);
+
+        if (count < 0) {
+            return count;
+        }
+
+        total_count += count;
+    }
+
+    return total_count;
+}
+
+static void pnv_machine_power8_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+    XICSFabricClass *xic = XICS_FABRIC_CLASS(oc);
+    PnvMachineClass *pmc = PNV_MACHINE_CLASS(oc);
+    static const char compat[] = "qemu,powernv8\0qemu,powernv\0ibm,powernv";
+
+    mc->desc = "IBM PowerNV (Non-Virtualized) POWER8";
+    mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power8_v2.0");
+
+    xic->icp_get = pnv_icp_get;
+    xic->ics_get = pnv_ics_get;
+    xic->ics_resend = pnv_ics_resend;
+
+    pmc->compat = compat;
+    pmc->compat_size = sizeof(compat);
+}
+
+static void pnv_machine_power9_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+    XiveFabricClass *xfc = XIVE_FABRIC_CLASS(oc);
+    PnvMachineClass *pmc = PNV_MACHINE_CLASS(oc);
+    static const char compat[] = "qemu,powernv9\0ibm,powernv";
+
+    mc->desc = "IBM PowerNV (Non-Virtualized) POWER9";
+    mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power9_v2.0");
+    xfc->match_nvt = pnv_match_nvt;
+
+    mc->alias = "powernv";
+
+    pmc->compat = compat;
+    pmc->compat_size = sizeof(compat);
+    pmc->dt_power_mgt = pnv_dt_power_mgt;
+}
+
+static void pnv_machine_power10_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+    PnvMachineClass *pmc = PNV_MACHINE_CLASS(oc);
+    static const char compat[] = "qemu,powernv10\0ibm,powernv";
+
+    mc->desc = "IBM PowerNV (Non-Virtualized) POWER10";
+    mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power10_v2.0");
+
+    pmc->compat = compat;
+    pmc->compat_size = sizeof(compat);
+    pmc->dt_power_mgt = pnv_dt_power_mgt;
+}
+
+static bool pnv_machine_get_hb(Object *obj, Error **errp)
+{
+    PnvMachineState *pnv = PNV_MACHINE(obj);
+
+    return !!pnv->fw_load_addr;
+}
+
+static void pnv_machine_set_hb(Object *obj, bool value, Error **errp)
+{
+    PnvMachineState *pnv = PNV_MACHINE(obj);
+
+    if (value) {
+        pnv->fw_load_addr = 0x8000000;
+    }
+}
+
+static void pnv_cpu_do_nmi_on_cpu(CPUState *cs, run_on_cpu_data arg)
+{
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    CPUPPCState *env = &cpu->env;
+
+    cpu_synchronize_state(cs);
+    ppc_cpu_do_system_reset(cs);
+    if (env->spr[SPR_SRR1] & SRR1_WAKESTATE) {
+        /*
+         * Power-save wakeups, as indicated by non-zero SRR1[46:47] put the
+         * wakeup reason in SRR1[42:45], system reset is indicated with 0b0100
+         * (PPC_BIT(43)).
+         */
+        if (!(env->spr[SPR_SRR1] & SRR1_WAKERESET)) {
+            warn_report("ppc_cpu_do_system_reset does not set system reset wakeup reason");
+            env->spr[SPR_SRR1] |= SRR1_WAKERESET;
+        }
+    } else {
+        /*
+         * For non-powersave system resets, SRR1[42:45] are defined to be
+         * implementation-dependent. The POWER9 User Manual specifies that
+         * an external (SCOM driven, which may come from a BMC nmi command or
+         * another CPU requesting a NMI IPI) system reset exception should be
+         * 0b0010 (PPC_BIT(44)).
+         */
+        env->spr[SPR_SRR1] |= SRR1_WAKESCOM;
+    }
+}
+
+static void pnv_nmi(NMIState *n, int cpu_index, Error **errp)
+{
+    CPUState *cs;
+
+    CPU_FOREACH(cs) {
+        async_run_on_cpu(cs, pnv_cpu_do_nmi_on_cpu, RUN_ON_CPU_NULL);
+    }
+}
+
+static void pnv_machine_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+    InterruptStatsProviderClass *ispc = INTERRUPT_STATS_PROVIDER_CLASS(oc);
+    NMIClass *nc = NMI_CLASS(oc);
+
+    mc->desc = "IBM PowerNV (Non-Virtualized)";
+    mc->init = pnv_init;
+    mc->reset = pnv_reset;
+    mc->max_cpus = MAX_CPUS;
+    /* Pnv provides a AHCI device for storage */
+    mc->block_default_type = IF_IDE;
+    mc->no_parallel = 1;
+    mc->default_boot_order = NULL;
+    /*
+     * RAM defaults to less than 2048 for 32-bit hosts, and large
+     * enough to fit the maximum initrd size at it's load address
+     */
+    mc->default_ram_size = 1 * GiB;
+    mc->default_ram_id = "pnv.ram";
+    ispc->print_info = pnv_pic_print_info;
+    nc->nmi_monitor_handler = pnv_nmi;
+
+    object_class_property_add_bool(oc, "hb-mode",
+                                   pnv_machine_get_hb, pnv_machine_set_hb);
+    object_class_property_set_description(oc, "hb-mode",
+                              "Use a hostboot like boot loader");
+}
+
+#define DEFINE_PNV8_CHIP_TYPE(type, class_initfn) \
+    {                                             \
+        .name          = type,                    \
+        .class_init    = class_initfn,            \
+        .parent        = TYPE_PNV8_CHIP,          \
+    }
+
+#define DEFINE_PNV9_CHIP_TYPE(type, class_initfn) \
+    {                                             \
+        .name          = type,                    \
+        .class_init    = class_initfn,            \
+        .parent        = TYPE_PNV9_CHIP,          \
+    }
+
+#define DEFINE_PNV10_CHIP_TYPE(type, class_initfn) \
+    {                                              \
+        .name          = type,                     \
+        .class_init    = class_initfn,             \
+        .parent        = TYPE_PNV10_CHIP,          \
+    }
+
+static const TypeInfo types[] = {
+    {
+        .name          = MACHINE_TYPE_NAME("powernv10"),
+        .parent        = TYPE_PNV_MACHINE,
+        .class_init    = pnv_machine_power10_class_init,
+    },
+    {
+        .name          = MACHINE_TYPE_NAME("powernv9"),
+        .parent        = TYPE_PNV_MACHINE,
+        .class_init    = pnv_machine_power9_class_init,
+        .interfaces = (InterfaceInfo[]) {
+            { TYPE_XIVE_FABRIC },
+            { },
+        },
+    },
+    {
+        .name          = MACHINE_TYPE_NAME("powernv8"),
+        .parent        = TYPE_PNV_MACHINE,
+        .class_init    = pnv_machine_power8_class_init,
+        .interfaces = (InterfaceInfo[]) {
+            { TYPE_XICS_FABRIC },
+            { },
+        },
+    },
+    {
+        .name          = TYPE_PNV_MACHINE,
+        .parent        = TYPE_MACHINE,
+        .abstract       = true,
+        .instance_size = sizeof(PnvMachineState),
+        .class_init    = pnv_machine_class_init,
+        .class_size    = sizeof(PnvMachineClass),
+        .interfaces = (InterfaceInfo[]) {
+            { TYPE_INTERRUPT_STATS_PROVIDER },
+            { TYPE_NMI },
+            { },
+        },
+    },
+    {
+        .name          = TYPE_PNV_CHIP,
+        .parent        = TYPE_SYS_BUS_DEVICE,
+        .class_init    = pnv_chip_class_init,
+        .instance_size = sizeof(PnvChip),
+        .class_size    = sizeof(PnvChipClass),
+        .abstract      = true,
+    },
+
+    /*
+     * P10 chip and variants
+     */
+    {
+        .name          = TYPE_PNV10_CHIP,
+        .parent        = TYPE_PNV_CHIP,
+        .instance_init = pnv_chip_power10_instance_init,
+        .instance_size = sizeof(Pnv10Chip),
+    },
+    DEFINE_PNV10_CHIP_TYPE(TYPE_PNV_CHIP_POWER10, pnv_chip_power10_class_init),
+
+    /*
+     * P9 chip and variants
+     */
+    {
+        .name          = TYPE_PNV9_CHIP,
+        .parent        = TYPE_PNV_CHIP,
+        .instance_init = pnv_chip_power9_instance_init,
+        .instance_size = sizeof(Pnv9Chip),
+    },
+    DEFINE_PNV9_CHIP_TYPE(TYPE_PNV_CHIP_POWER9, pnv_chip_power9_class_init),
+
+    /*
+     * P8 chip and variants
+     */
+    {
+        .name          = TYPE_PNV8_CHIP,
+        .parent        = TYPE_PNV_CHIP,
+        .instance_init = pnv_chip_power8_instance_init,
+        .instance_size = sizeof(Pnv8Chip),
+    },
+    DEFINE_PNV8_CHIP_TYPE(TYPE_PNV_CHIP_POWER8, pnv_chip_power8_class_init),
+    DEFINE_PNV8_CHIP_TYPE(TYPE_PNV_CHIP_POWER8E, pnv_chip_power8e_class_init),
+    DEFINE_PNV8_CHIP_TYPE(TYPE_PNV_CHIP_POWER8NVL,
+                          pnv_chip_power8nvl_class_init),
+};
+
+DEFINE_TYPES(types)
diff --git a/hw/ppc/pnv_bmc.c b/hw/ppc/pnv_bmc.c
new file mode 100644
index 000000000..75a22ce50
--- /dev/null
+++ b/hw/ppc/pnv_bmc.c
@@ -0,0 +1,313 @@
+/*
+ * QEMU PowerNV, BMC related functions
+ *
+ * Copyright (c) 2016-2017, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qapi/error.h"
+#include "target/ppc/cpu.h"
+#include "qemu/log.h"
+#include "hw/ipmi/ipmi.h"
+#include "hw/ppc/fdt.h"
+
+#include "hw/ppc/pnv.h"
+
+#include <libfdt.h>
+
+/* TODO: include definition in ipmi.h */
+#define IPMI_SDR_FULL_TYPE 1
+
+/*
+ * OEM SEL Event data packet sent by BMC in response of a Read Event
+ * Message Buffer command
+ */
+typedef struct OemSel {
+    /* SEL header */
+    uint8_t id[2];
+    uint8_t type;
+    uint8_t timestamp[4];
+    uint8_t manuf_id[3];
+
+    /* OEM SEL data (6 bytes) follows */
+    uint8_t netfun;
+    uint8_t cmd;
+    uint8_t data[4];
+} OemSel;
+
+#define SOFT_OFF        0x00
+#define SOFT_REBOOT     0x01
+
+static bool pnv_bmc_is_simulator(IPMIBmc *bmc)
+{
+    return object_dynamic_cast(OBJECT(bmc), TYPE_IPMI_BMC_SIMULATOR);
+}
+
+static void pnv_gen_oem_sel(IPMIBmc *bmc, uint8_t reboot)
+{
+    /* IPMI SEL Event are 16 bytes long */
+    OemSel sel = {
+        .id        = { 0x55 , 0x55 },
+        .type      = 0xC0, /* OEM */
+        .manuf_id  = { 0x0, 0x0, 0x0 },
+        .timestamp = { 0x0, 0x0, 0x0, 0x0 },
+        .netfun    = 0x3A, /* IBM */
+        .cmd       = 0x04, /* AMI OEM SEL Power Notification */
+        .data      = { reboot, 0xFF, 0xFF, 0xFF },
+    };
+
+    ipmi_bmc_gen_event(bmc, (uint8_t *) &sel, 0 /* do not log the event */);
+}
+
+void pnv_bmc_powerdown(IPMIBmc *bmc)
+{
+    pnv_gen_oem_sel(bmc, SOFT_OFF);
+}
+
+void pnv_dt_bmc_sensors(IPMIBmc *bmc, void *fdt)
+{
+    int offset;
+    int i;
+    const struct ipmi_sdr_compact *sdr;
+    uint16_t nextrec;
+
+    if (!pnv_bmc_is_simulator(bmc)) {
+        return;
+    }
+
+    offset = fdt_add_subnode(fdt, 0, "bmc");
+    _FDT(offset);
+
+    _FDT((fdt_setprop_string(fdt, offset, "name", "bmc")));
+    offset = fdt_add_subnode(fdt, offset, "sensors");
+    _FDT(offset);
+
+    _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 0x1)));
+    _FDT((fdt_setprop_cell(fdt, offset, "#size-cells", 0x0)));
+
+    for (i = 0; !ipmi_bmc_sdr_find(bmc, i, &sdr, &nextrec); i++) {
+        int off;
+        char *name;
+
+        if (sdr->header.rec_type != IPMI_SDR_COMPACT_TYPE &&
+            sdr->header.rec_type != IPMI_SDR_FULL_TYPE) {
+            continue;
+        }
+
+        name = g_strdup_printf("sensor@%x", sdr->sensor_owner_number);
+        off = fdt_add_subnode(fdt, offset, name);
+        _FDT(off);
+        g_free(name);
+
+        _FDT((fdt_setprop_cell(fdt, off, "reg", sdr->sensor_owner_number)));
+        _FDT((fdt_setprop_string(fdt, off, "name", "sensor")));
+        _FDT((fdt_setprop_string(fdt, off, "compatible", "ibm,ipmi-sensor")));
+        _FDT((fdt_setprop_cell(fdt, off, "ipmi-sensor-reading-type",
+                               sdr->reading_type)));
+        _FDT((fdt_setprop_cell(fdt, off, "ipmi-entity-id",
+                               sdr->entity_id)));
+        _FDT((fdt_setprop_cell(fdt, off, "ipmi-entity-instance",
+                               sdr->entity_instance)));
+        _FDT((fdt_setprop_cell(fdt, off, "ipmi-sensor-type",
+                               sdr->sensor_type)));
+    }
+}
+
+/*
+ * HIOMAP protocol handler
+ */
+#define HIOMAP_C_RESET                  1
+#define HIOMAP_C_GET_INFO               2
+#define HIOMAP_C_GET_FLASH_INFO         3
+#define HIOMAP_C_CREATE_READ_WINDOW     4
+#define HIOMAP_C_CLOSE_WINDOW           5
+#define HIOMAP_C_CREATE_WRITE_WINDOW    6
+#define HIOMAP_C_MARK_DIRTY             7
+#define HIOMAP_C_FLUSH                  8
+#define HIOMAP_C_ACK                    9
+#define HIOMAP_C_ERASE                  10
+#define HIOMAP_C_DEVICE_NAME            11
+#define HIOMAP_C_LOCK                   12
+
+#define BLOCK_SHIFT                     12 /* 4K */
+
+static uint16_t bytes_to_blocks(uint32_t bytes)
+{
+    return bytes >> BLOCK_SHIFT;
+}
+
+static uint32_t blocks_to_bytes(uint16_t blocks)
+{
+    return blocks << BLOCK_SHIFT;
+}
+
+static int hiomap_erase(PnvPnor *pnor, uint32_t offset, uint32_t size)
+{
+    MemTxResult result;
+    int i;
+
+    for (i = 0; i < size / 4; i++) {
+        result = memory_region_dispatch_write(&pnor->mmio, offset + i * 4,
+                                              0xFFFFFFFF, MO_32,
+                                              MEMTXATTRS_UNSPECIFIED);
+        if (result != MEMTX_OK) {
+            return -1;
+        }
+    }
+    return 0;
+}
+
+static void hiomap_cmd(IPMIBmcSim *ibs, uint8_t *cmd, unsigned int cmd_len,
+                       RspBuffer *rsp)
+{
+    PnvPnor *pnor = PNV_PNOR(object_property_get_link(OBJECT(ibs), "pnor",
+                                                      &error_abort));
+    uint32_t pnor_size = pnor->size;
+    uint32_t pnor_addr = PNOR_SPI_OFFSET;
+    bool readonly = false;
+
+    rsp_buffer_push(rsp, cmd[2]);
+    rsp_buffer_push(rsp, cmd[3]);
+
+    switch (cmd[2]) {
+    case HIOMAP_C_MARK_DIRTY:
+    case HIOMAP_C_FLUSH:
+    case HIOMAP_C_ACK:
+        break;
+
+    case HIOMAP_C_ERASE:
+        if (hiomap_erase(pnor, blocks_to_bytes(cmd[5] << 8 | cmd[4]),
+                        blocks_to_bytes(cmd[7] << 8 | cmd[6]))) {
+            rsp_buffer_set_error(rsp, IPMI_CC_UNSPECIFIED);
+        }
+        break;
+
+    case HIOMAP_C_GET_INFO:
+        rsp_buffer_push(rsp, 2);  /* Version 2 */
+        rsp_buffer_push(rsp, BLOCK_SHIFT); /* block size */
+        rsp_buffer_push(rsp, 0);  /* Timeout */
+        rsp_buffer_push(rsp, 0);  /* Timeout */
+        break;
+
+    case HIOMAP_C_GET_FLASH_INFO:
+        rsp_buffer_push(rsp, bytes_to_blocks(pnor_size) & 0xFF);
+        rsp_buffer_push(rsp, bytes_to_blocks(pnor_size) >> 8);
+        rsp_buffer_push(rsp, 0x01);  /* erase size */
+        rsp_buffer_push(rsp, 0x00);  /* erase size */
+        break;
+
+    case HIOMAP_C_CREATE_READ_WINDOW:
+        readonly = true;
+        /* Fall through */
+
+    case HIOMAP_C_CREATE_WRITE_WINDOW:
+        memory_region_set_readonly(&pnor->mmio, readonly);
+        memory_region_set_enabled(&pnor->mmio, true);
+
+        rsp_buffer_push(rsp, bytes_to_blocks(pnor_addr) & 0xFF);
+        rsp_buffer_push(rsp, bytes_to_blocks(pnor_addr) >> 8);
+        rsp_buffer_push(rsp, bytes_to_blocks(pnor_size) & 0xFF);
+        rsp_buffer_push(rsp, bytes_to_blocks(pnor_size) >> 8);
+        rsp_buffer_push(rsp, 0x00); /* offset */
+        rsp_buffer_push(rsp, 0x00); /* offset */
+        break;
+
+    case HIOMAP_C_CLOSE_WINDOW:
+        memory_region_set_enabled(&pnor->mmio, false);
+        break;
+
+    case HIOMAP_C_DEVICE_NAME:
+    case HIOMAP_C_RESET:
+    case HIOMAP_C_LOCK:
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "HIOMAP: unknown command %02X\n", cmd[2]);
+        break;
+    }
+}
+
+#define HIOMAP   0x5a
+
+static const IPMICmdHandler hiomap_cmds[] = {
+    [HIOMAP] = { hiomap_cmd, 3 },
+};
+
+static const IPMINetfn hiomap_netfn = {
+    .cmd_nums = ARRAY_SIZE(hiomap_cmds),
+    .cmd_handlers = hiomap_cmds
+};
+
+
+void pnv_bmc_set_pnor(IPMIBmc *bmc, PnvPnor *pnor)
+{
+    if (!pnv_bmc_is_simulator(bmc)) {
+        return;
+    }
+
+    object_ref(OBJECT(pnor));
+    object_property_add_const_link(OBJECT(bmc), "pnor", OBJECT(pnor));
+
+    /* Install the HIOMAP protocol handlers to access the PNOR */
+    ipmi_sim_register_netfn(IPMI_BMC_SIMULATOR(bmc), IPMI_NETFN_OEM,
+                            &hiomap_netfn);
+}
+
+/*
+ * Instantiate the machine BMC. PowerNV uses the QEMU internal
+ * simulator but it could also be external.
+ */
+IPMIBmc *pnv_bmc_create(PnvPnor *pnor)
+{
+    Object *obj;
+
+    obj = object_new(TYPE_IPMI_BMC_SIMULATOR);
+    qdev_realize(DEVICE(obj), NULL, &error_fatal);
+    pnv_bmc_set_pnor(IPMI_BMC(obj), pnor);
+
+    return IPMI_BMC(obj);
+}
+
+typedef struct ForeachArgs {
+    const char *name;
+    Object *obj;
+} ForeachArgs;
+
+static int bmc_find(Object *child, void *opaque)
+{
+    ForeachArgs *args = opaque;
+
+    if (object_dynamic_cast(child, args->name)) {
+        if (args->obj) {
+            return 1;
+        }
+        args->obj = child;
+    }
+    return 0;
+}
+
+IPMIBmc *pnv_bmc_find(Error **errp)
+{
+    ForeachArgs args = { TYPE_IPMI_BMC, NULL };
+    int ret;
+
+    ret = object_child_foreach_recursive(object_get_root(), bmc_find, &args);
+    if (ret) {
+        error_setg(errp, "machine should have only one BMC device. "
+                   "Use '-nodefaults'");
+        return NULL;
+    }
+
+    return args.obj ? IPMI_BMC(args.obj) : NULL;
+}
diff --git a/hw/ppc/pnv_core.c b/hw/ppc/pnv_core.c
new file mode 100644
index 000000000..19e8eb885
--- /dev/null
+++ b/hw/ppc/pnv_core.c
@@ -0,0 +1,441 @@
+/*
+ * QEMU PowerPC PowerNV CPU Core model
+ *
+ * Copyright (c) 2016, IBM Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public License
+ * as published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "sysemu/reset.h"
+#include "qapi/error.h"
+#include "qemu/log.h"
+#include "qemu/module.h"
+#include "target/ppc/cpu.h"
+#include "hw/ppc/ppc.h"
+#include "hw/ppc/pnv.h"
+#include "hw/ppc/pnv_core.h"
+#include "hw/ppc/pnv_xscom.h"
+#include "hw/ppc/xics.h"
+#include "hw/qdev-properties.h"
+#include "helper_regs.h"
+
+static const char *pnv_core_cpu_typename(PnvCore *pc)
+{
+    const char *core_type = object_class_get_name(object_get_class(OBJECT(pc)));
+    int len = strlen(core_type) - strlen(PNV_CORE_TYPE_SUFFIX);
+    char *s = g_strdup_printf(POWERPC_CPU_TYPE_NAME("%.*s"), len, core_type);
+    const char *cpu_type = object_class_get_name(object_class_by_name(s));
+    g_free(s);
+    return cpu_type;
+}
+
+static void pnv_core_cpu_reset(PnvCore *pc, PowerPCCPU *cpu)
+{
+    CPUState *cs = CPU(cpu);
+    CPUPPCState *env = &cpu->env;
+    PnvChipClass *pcc = PNV_CHIP_GET_CLASS(pc->chip);
+
+    cpu_reset(cs);
+
+    /*
+     * the skiboot firmware elects a primary thread to initialize the
+     * system and it can be any.
+     */
+    env->gpr[3] = PNV_FDT_ADDR;
+    env->nip = 0x10;
+    env->msr |= MSR_HVB; /* Hypervisor mode */
+    env->spr[SPR_HRMOR] = pc->hrmor;
+    hreg_compute_hflags(env);
+
+    pcc->intc_reset(pc->chip, cpu);
+}
+
+/*
+ * These values are read by the PowerNV HW monitors under Linux
+ */
+#define PNV_XSCOM_EX_DTS_RESULT0     0x50000
+#define PNV_XSCOM_EX_DTS_RESULT1     0x50001
+
+static uint64_t pnv_core_power8_xscom_read(void *opaque, hwaddr addr,
+                                           unsigned int width)
+{
+    uint32_t offset = addr >> 3;
+    uint64_t val = 0;
+
+    /* The result should be 38 C */
+    switch (offset) {
+    case PNV_XSCOM_EX_DTS_RESULT0:
+        val = 0x26f024f023f0000ull;
+        break;
+    case PNV_XSCOM_EX_DTS_RESULT1:
+        val = 0x24f000000000000ull;
+        break;
+    default:
+        qemu_log_mask(LOG_UNIMP, "Warning: reading reg=0x%" HWADDR_PRIx "\n",
+                  addr);
+    }
+
+    return val;
+}
+
+static void pnv_core_power8_xscom_write(void *opaque, hwaddr addr, uint64_t val,
+                                        unsigned int width)
+{
+    qemu_log_mask(LOG_UNIMP, "Warning: writing to reg=0x%" HWADDR_PRIx "\n",
+                  addr);
+}
+
+static const MemoryRegionOps pnv_core_power8_xscom_ops = {
+    .read = pnv_core_power8_xscom_read,
+    .write = pnv_core_power8_xscom_write,
+    .valid.min_access_size = 8,
+    .valid.max_access_size = 8,
+    .impl.min_access_size = 8,
+    .impl.max_access_size = 8,
+    .endianness = DEVICE_BIG_ENDIAN,
+};
+
+
+/*
+ * POWER9 core controls
+ */
+#define PNV9_XSCOM_EC_PPM_SPECIAL_WKUP_HYP 0xf010d
+#define PNV9_XSCOM_EC_PPM_SPECIAL_WKUP_OTR 0xf010a
+
+static uint64_t pnv_core_power9_xscom_read(void *opaque, hwaddr addr,
+                                           unsigned int width)
+{
+    uint32_t offset = addr >> 3;
+    uint64_t val = 0;
+
+    /* The result should be 38 C */
+    switch (offset) {
+    case PNV_XSCOM_EX_DTS_RESULT0:
+        val = 0x26f024f023f0000ull;
+        break;
+    case PNV_XSCOM_EX_DTS_RESULT1:
+        val = 0x24f000000000000ull;
+        break;
+    case PNV9_XSCOM_EC_PPM_SPECIAL_WKUP_HYP:
+    case PNV9_XSCOM_EC_PPM_SPECIAL_WKUP_OTR:
+        val = 0x0;
+        break;
+    default:
+        qemu_log_mask(LOG_UNIMP, "Warning: reading reg=0x%" HWADDR_PRIx "\n",
+                  addr);
+    }
+
+    return val;
+}
+
+static void pnv_core_power9_xscom_write(void *opaque, hwaddr addr, uint64_t val,
+                                        unsigned int width)
+{
+    uint32_t offset = addr >> 3;
+
+    switch (offset) {
+    case PNV9_XSCOM_EC_PPM_SPECIAL_WKUP_HYP:
+    case PNV9_XSCOM_EC_PPM_SPECIAL_WKUP_OTR:
+        break;
+    default:
+        qemu_log_mask(LOG_UNIMP, "Warning: writing to reg=0x%" HWADDR_PRIx "\n",
+                      addr);
+    }
+}
+
+static const MemoryRegionOps pnv_core_power9_xscom_ops = {
+    .read = pnv_core_power9_xscom_read,
+    .write = pnv_core_power9_xscom_write,
+    .valid.min_access_size = 8,
+    .valid.max_access_size = 8,
+    .impl.min_access_size = 8,
+    .impl.max_access_size = 8,
+    .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static void pnv_core_cpu_realize(PnvCore *pc, PowerPCCPU *cpu, Error **errp)
+{
+    CPUPPCState *env = &cpu->env;
+    int core_pir;
+    int thread_index = 0; /* TODO: TCG supports only one thread */
+    ppc_spr_t *pir = &env->spr_cb[SPR_PIR];
+    Error *local_err = NULL;
+    PnvChipClass *pcc = PNV_CHIP_GET_CLASS(pc->chip);
+
+    if (!qdev_realize(DEVICE(cpu), NULL, errp)) {
+        return;
+    }
+
+    pcc->intc_create(pc->chip, cpu, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    core_pir = object_property_get_uint(OBJECT(pc), "pir", &error_abort);
+
+    /*
+     * The PIR of a thread is the core PIR + the thread index. We will
+     * need to find a way to get the thread index when TCG supports
+     * more than 1. We could use the object name ?
+     */
+    pir->default_value = core_pir + thread_index;
+
+    /* Set time-base frequency to 512 MHz */
+    cpu_ppc_tb_init(env, PNV_TIMEBASE_FREQ);
+}
+
+static void pnv_core_reset(void *dev)
+{
+    CPUCore *cc = CPU_CORE(dev);
+    PnvCore *pc = PNV_CORE(dev);
+    int i;
+
+    for (i = 0; i < cc->nr_threads; i++) {
+        pnv_core_cpu_reset(pc, pc->threads[i]);
+    }
+}
+
+static void pnv_core_realize(DeviceState *dev, Error **errp)
+{
+    PnvCore *pc = PNV_CORE(OBJECT(dev));
+    PnvCoreClass *pcc = PNV_CORE_GET_CLASS(pc);
+    CPUCore *cc = CPU_CORE(OBJECT(dev));
+    const char *typename = pnv_core_cpu_typename(pc);
+    Error *local_err = NULL;
+    void *obj;
+    int i, j;
+    char name[32];
+
+    assert(pc->chip);
+
+    pc->threads = g_new(PowerPCCPU *, cc->nr_threads);
+    for (i = 0; i < cc->nr_threads; i++) {
+        PowerPCCPU *cpu;
+
+        obj = object_new(typename);
+        cpu = POWERPC_CPU(obj);
+
+        pc->threads[i] = POWERPC_CPU(obj);
+
+        snprintf(name, sizeof(name), "thread[%d]", i);
+        object_property_add_child(OBJECT(pc), name, obj);
+
+        cpu->machine_data = g_new0(PnvCPUState, 1);
+
+        object_unref(obj);
+    }
+
+    for (j = 0; j < cc->nr_threads; j++) {
+        pnv_core_cpu_realize(pc, pc->threads[j], &local_err);
+        if (local_err) {
+            goto err;
+        }
+    }
+
+    snprintf(name, sizeof(name), "xscom-core.%d", cc->core_id);
+    /* TODO: check PNV_XSCOM_EX_SIZE for p10 */
+    pnv_xscom_region_init(&pc->xscom_regs, OBJECT(dev), pcc->xscom_ops,
+                          pc, name, PNV_XSCOM_EX_SIZE);
+
+    qemu_register_reset(pnv_core_reset, pc);
+    return;
+
+err:
+    while (--i >= 0) {
+        obj = OBJECT(pc->threads[i]);
+        object_unparent(obj);
+    }
+    g_free(pc->threads);
+    error_propagate(errp, local_err);
+}
+
+static void pnv_core_cpu_unrealize(PnvCore *pc, PowerPCCPU *cpu)
+{
+    PnvCPUState *pnv_cpu = pnv_cpu_state(cpu);
+    PnvChipClass *pcc = PNV_CHIP_GET_CLASS(pc->chip);
+
+    pcc->intc_destroy(pc->chip, cpu);
+    cpu_remove_sync(CPU(cpu));
+    cpu->machine_data = NULL;
+    g_free(pnv_cpu);
+    object_unparent(OBJECT(cpu));
+}
+
+static void pnv_core_unrealize(DeviceState *dev)
+{
+    PnvCore *pc = PNV_CORE(dev);
+    CPUCore *cc = CPU_CORE(dev);
+    int i;
+
+    qemu_unregister_reset(pnv_core_reset, pc);
+
+    for (i = 0; i < cc->nr_threads; i++) {
+        pnv_core_cpu_unrealize(pc, pc->threads[i]);
+    }
+    g_free(pc->threads);
+}
+
+static Property pnv_core_properties[] = {
+    DEFINE_PROP_UINT32("pir", PnvCore, pir, 0),
+    DEFINE_PROP_UINT64("hrmor", PnvCore, hrmor, 0),
+    DEFINE_PROP_LINK("chip", PnvCore, chip, TYPE_PNV_CHIP, PnvChip *),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void pnv_core_power8_class_init(ObjectClass *oc, void *data)
+{
+    PnvCoreClass *pcc = PNV_CORE_CLASS(oc);
+
+    pcc->xscom_ops = &pnv_core_power8_xscom_ops;
+}
+
+static void pnv_core_power9_class_init(ObjectClass *oc, void *data)
+{
+    PnvCoreClass *pcc = PNV_CORE_CLASS(oc);
+
+    pcc->xscom_ops = &pnv_core_power9_xscom_ops;
+}
+
+static void pnv_core_power10_class_init(ObjectClass *oc, void *data)
+{
+    PnvCoreClass *pcc = PNV_CORE_CLASS(oc);
+
+    /* TODO: Use the P9 XSCOMs for now on P10 */
+    pcc->xscom_ops = &pnv_core_power9_xscom_ops;
+}
+
+static void pnv_core_class_init(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+
+    dc->realize = pnv_core_realize;
+    dc->unrealize = pnv_core_unrealize;
+    device_class_set_props(dc, pnv_core_properties);
+    dc->user_creatable = false;
+}
+
+#define DEFINE_PNV_CORE_TYPE(family, cpu_model) \
+    {                                           \
+        .parent = TYPE_PNV_CORE,                \
+        .name = PNV_CORE_TYPE_NAME(cpu_model),  \
+        .class_init = pnv_core_##family##_class_init, \
+    }
+
+static const TypeInfo pnv_core_infos[] = {
+    {
+        .name           = TYPE_PNV_CORE,
+        .parent         = TYPE_CPU_CORE,
+        .instance_size  = sizeof(PnvCore),
+        .class_size     = sizeof(PnvCoreClass),
+        .class_init = pnv_core_class_init,
+        .abstract       = true,
+    },
+    DEFINE_PNV_CORE_TYPE(power8, "power8e_v2.1"),
+    DEFINE_PNV_CORE_TYPE(power8, "power8_v2.0"),
+    DEFINE_PNV_CORE_TYPE(power8, "power8nvl_v1.0"),
+    DEFINE_PNV_CORE_TYPE(power9, "power9_v2.0"),
+    DEFINE_PNV_CORE_TYPE(power10, "power10_v2.0"),
+};
+
+DEFINE_TYPES(pnv_core_infos)
+
+/*
+ * POWER9 Quads
+ */
+
+#define P9X_EX_NCU_SPEC_BAR                     0x11010
+
+static uint64_t pnv_quad_xscom_read(void *opaque, hwaddr addr,
+                                    unsigned int width)
+{
+    uint32_t offset = addr >> 3;
+    uint64_t val = -1;
+
+    switch (offset) {
+    case P9X_EX_NCU_SPEC_BAR:
+    case P9X_EX_NCU_SPEC_BAR + 0x400: /* Second EX */
+        val = 0;
+        break;
+    default:
+        qemu_log_mask(LOG_UNIMP, "%s: writing @0x%08x\n", __func__,
+                      offset);
+    }
+
+    return val;
+}
+
+static void pnv_quad_xscom_write(void *opaque, hwaddr addr, uint64_t val,
+                                 unsigned int width)
+{
+    uint32_t offset = addr >> 3;
+
+    switch (offset) {
+    case P9X_EX_NCU_SPEC_BAR:
+    case P9X_EX_NCU_SPEC_BAR + 0x400: /* Second EX */
+        break;
+    default:
+        qemu_log_mask(LOG_UNIMP, "%s: writing @0x%08x\n", __func__,
+                  offset);
+    }
+}
+
+static const MemoryRegionOps pnv_quad_xscom_ops = {
+    .read = pnv_quad_xscom_read,
+    .write = pnv_quad_xscom_write,
+    .valid.min_access_size = 8,
+    .valid.max_access_size = 8,
+    .impl.min_access_size = 8,
+    .impl.max_access_size = 8,
+    .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static void pnv_quad_realize(DeviceState *dev, Error **errp)
+{
+    PnvQuad *eq = PNV_QUAD(dev);
+    char name[32];
+
+    snprintf(name, sizeof(name), "xscom-quad.%d", eq->quad_id);
+    pnv_xscom_region_init(&eq->xscom_regs, OBJECT(dev), &pnv_quad_xscom_ops,
+                          eq, name, PNV9_XSCOM_EQ_SIZE);
+}
+
+static Property pnv_quad_properties[] = {
+    DEFINE_PROP_UINT32("quad-id", PnvQuad, quad_id, 0),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void pnv_quad_class_init(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+
+    dc->realize = pnv_quad_realize;
+    device_class_set_props(dc, pnv_quad_properties);
+    dc->user_creatable = false;
+}
+
+static const TypeInfo pnv_quad_info = {
+    .name          = TYPE_PNV_QUAD,
+    .parent        = TYPE_DEVICE,
+    .instance_size = sizeof(PnvQuad),
+    .class_init    = pnv_quad_class_init,
+};
+
+static void pnv_core_register_types(void)
+{
+    type_register_static(&pnv_quad_info);
+}
+
+type_init(pnv_core_register_types)
diff --git a/hw/ppc/pnv_homer.c b/hw/ppc/pnv_homer.c
new file mode 100644
index 000000000..9a262629b
--- /dev/null
+++ b/hw/ppc/pnv_homer.c
@@ -0,0 +1,382 @@
+/*
+ * QEMU PowerPC PowerNV Emulation of a few HOMER related registers
+ *
+ * Copyright (c) 2019, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qapi/error.h"
+#include "exec/hwaddr.h"
+#include "exec/memory.h"
+#include "sysemu/cpus.h"
+#include "hw/qdev-core.h"
+#include "hw/qdev-properties.h"
+#include "hw/ppc/pnv.h"
+#include "hw/ppc/pnv_homer.h"
+#include "hw/ppc/pnv_xscom.h"
+
+
+static bool core_max_array(PnvHomer *homer, hwaddr addr)
+{
+    int i;
+    PnvHomerClass *hmrc = PNV_HOMER_GET_CLASS(homer);
+
+    for (i = 0; i <= homer->chip->nr_cores; i++) {
+        if (addr == (hmrc->core_max_base + i)) {
+            return true;
+       }
+    }
+    return false;
+}
+
+/* P8 Pstate table */
+
+#define PNV8_OCC_PSTATE_VERSION          0x1f8001
+#define PNV8_OCC_PSTATE_MIN              0x1f8003
+#define PNV8_OCC_PSTATE_VALID            0x1f8000
+#define PNV8_OCC_PSTATE_THROTTLE         0x1f8002
+#define PNV8_OCC_PSTATE_NOM              0x1f8004
+#define PNV8_OCC_PSTATE_TURBO            0x1f8005
+#define PNV8_OCC_PSTATE_ULTRA_TURBO      0x1f8006
+#define PNV8_OCC_PSTATE_DATA             0x1f8008
+#define PNV8_OCC_PSTATE_ID_ZERO          0x1f8010
+#define PNV8_OCC_PSTATE_ID_ONE           0x1f8018
+#define PNV8_OCC_PSTATE_ID_TWO           0x1f8020
+#define PNV8_OCC_VDD_VOLTAGE_IDENTIFIER  0x1f8012
+#define PNV8_OCC_VCS_VOLTAGE_IDENTIFIER  0x1f8013
+#define PNV8_OCC_PSTATE_ZERO_FREQUENCY   0x1f8014
+#define PNV8_OCC_PSTATE_ONE_FREQUENCY    0x1f801c
+#define PNV8_OCC_PSTATE_TWO_FREQUENCY    0x1f8024
+#define PNV8_CORE_MAX_BASE               0x1f8810
+
+
+static uint64_t pnv_power8_homer_read(void *opaque, hwaddr addr,
+                                      unsigned size)
+{
+    PnvHomer *homer = PNV_HOMER(opaque);
+
+    switch (addr) {
+    case PNV8_OCC_PSTATE_VERSION:
+    case PNV8_OCC_PSTATE_MIN:
+    case PNV8_OCC_PSTATE_ID_ZERO:
+        return 0;
+    case PNV8_OCC_PSTATE_VALID:
+    case PNV8_OCC_PSTATE_THROTTLE:
+    case PNV8_OCC_PSTATE_NOM:
+    case PNV8_OCC_PSTATE_TURBO:
+    case PNV8_OCC_PSTATE_ID_ONE:
+    case PNV8_OCC_VDD_VOLTAGE_IDENTIFIER:
+    case PNV8_OCC_VCS_VOLTAGE_IDENTIFIER:
+        return 1;
+    case PNV8_OCC_PSTATE_ULTRA_TURBO:
+    case PNV8_OCC_PSTATE_ID_TWO:
+        return 2;
+    case PNV8_OCC_PSTATE_DATA:
+        return 0x1000000000000000;
+    /* P8 frequency for 0, 1, and 2 pstates */
+    case PNV8_OCC_PSTATE_ZERO_FREQUENCY:
+    case PNV8_OCC_PSTATE_ONE_FREQUENCY:
+    case PNV8_OCC_PSTATE_TWO_FREQUENCY:
+        return 3000;
+    }
+    /* pstate table core max array */
+    if (core_max_array(homer, addr)) {
+        return 1;
+    }
+    return 0;
+}
+
+static void pnv_power8_homer_write(void *opaque, hwaddr addr,
+                                   uint64_t val, unsigned size)
+{
+    /* callback function defined to homer write */
+    return;
+}
+
+static const MemoryRegionOps pnv_power8_homer_ops = {
+    .read = pnv_power8_homer_read,
+    .write = pnv_power8_homer_write,
+    .valid.min_access_size = 1,
+    .valid.max_access_size = 8,
+    .impl.min_access_size = 1,
+    .impl.max_access_size = 8,
+    .endianness = DEVICE_BIG_ENDIAN,
+};
+
+/* P8 PBA BARs */
+#define PBA_BAR0                     0x00
+#define PBA_BAR1                     0x01
+#define PBA_BAR2                     0x02
+#define PBA_BAR3                     0x03
+#define PBA_BARMASK0                 0x04
+#define PBA_BARMASK1                 0x05
+#define PBA_BARMASK2                 0x06
+#define PBA_BARMASK3                 0x07
+
+static uint64_t pnv_homer_power8_pba_read(void *opaque, hwaddr addr,
+                                          unsigned size)
+{
+    PnvHomer *homer = PNV_HOMER(opaque);
+    PnvChip *chip = homer->chip;
+    uint32_t reg = addr >> 3;
+    uint64_t val = 0;
+
+    switch (reg) {
+    case PBA_BAR0:
+        val = PNV_HOMER_BASE(chip);
+        break;
+    case PBA_BARMASK0: /* P8 homer region mask */
+        val = (PNV_HOMER_SIZE - 1) & 0x300000;
+        break;
+    case PBA_BAR3: /* P8 occ common area */
+        val = PNV_OCC_COMMON_AREA_BASE;
+        break;
+    case PBA_BARMASK3: /* P8 occ common area mask */
+        val = (PNV_OCC_COMMON_AREA_SIZE - 1) & 0x700000;
+        break;
+    default:
+        qemu_log_mask(LOG_UNIMP, "PBA: read to unimplemented register: Ox%"
+                      HWADDR_PRIx "\n", addr >> 3);
+    }
+    return val;
+}
+
+static void pnv_homer_power8_pba_write(void *opaque, hwaddr addr,
+                                         uint64_t val, unsigned size)
+{
+    qemu_log_mask(LOG_UNIMP, "PBA: write to unimplemented register: Ox%"
+                  HWADDR_PRIx "\n", addr >> 3);
+}
+
+static const MemoryRegionOps pnv_homer_power8_pba_ops = {
+    .read = pnv_homer_power8_pba_read,
+    .write = pnv_homer_power8_pba_write,
+    .valid.min_access_size = 8,
+    .valid.max_access_size = 8,
+    .impl.min_access_size = 8,
+    .impl.max_access_size = 8,
+    .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static void pnv_homer_power8_class_init(ObjectClass *klass, void *data)
+{
+    PnvHomerClass *homer = PNV_HOMER_CLASS(klass);
+
+    homer->pba_size = PNV_XSCOM_PBA_SIZE;
+    homer->pba_ops = &pnv_homer_power8_pba_ops;
+    homer->homer_size = PNV_HOMER_SIZE;
+    homer->homer_ops = &pnv_power8_homer_ops;
+    homer->core_max_base = PNV8_CORE_MAX_BASE;
+}
+
+static const TypeInfo pnv_homer_power8_type_info = {
+    .name          = TYPE_PNV8_HOMER,
+    .parent        = TYPE_PNV_HOMER,
+    .instance_size = sizeof(PnvHomer),
+    .class_init    = pnv_homer_power8_class_init,
+};
+
+/* P9 Pstate table */
+
+#define PNV9_OCC_PSTATE_ID_ZERO          0xe2018
+#define PNV9_OCC_PSTATE_ID_ONE           0xe2020
+#define PNV9_OCC_PSTATE_ID_TWO           0xe2028
+#define PNV9_OCC_PSTATE_DATA             0xe2000
+#define PNV9_OCC_PSTATE_DATA_AREA        0xe2008
+#define PNV9_OCC_PSTATE_MIN              0xe2003
+#define PNV9_OCC_PSTATE_NOM              0xe2004
+#define PNV9_OCC_PSTATE_TURBO            0xe2005
+#define PNV9_OCC_PSTATE_ULTRA_TURBO      0xe2818
+#define PNV9_OCC_MAX_PSTATE_ULTRA_TURBO  0xe2006
+#define PNV9_OCC_PSTATE_MAJOR_VERSION    0xe2001
+#define PNV9_OCC_OPAL_RUNTIME_DATA       0xe2b85
+#define PNV9_CHIP_HOMER_IMAGE_POINTER    0x200008
+#define PNV9_CHIP_HOMER_BASE             0x0
+#define PNV9_OCC_PSTATE_ZERO_FREQUENCY   0xe201c
+#define PNV9_OCC_PSTATE_ONE_FREQUENCY    0xe2024
+#define PNV9_OCC_PSTATE_TWO_FREQUENCY    0xe202c
+#define PNV9_OCC_ROLE_MASTER_OR_SLAVE    0xe2002
+#define PNV9_CORE_MAX_BASE               0xe2819
+
+
+static uint64_t pnv_power9_homer_read(void *opaque, hwaddr addr,
+                                      unsigned size)
+{
+    PnvHomer *homer = PNV_HOMER(opaque);
+
+    switch (addr) {
+    case PNV9_OCC_MAX_PSTATE_ULTRA_TURBO:
+    case PNV9_OCC_PSTATE_ID_ZERO:
+        return 0;
+    case PNV9_OCC_PSTATE_DATA:
+    case PNV9_OCC_ROLE_MASTER_OR_SLAVE:
+    case PNV9_OCC_PSTATE_NOM:
+    case PNV9_OCC_PSTATE_TURBO:
+    case PNV9_OCC_PSTATE_ID_ONE:
+    case PNV9_OCC_PSTATE_ULTRA_TURBO:
+    case PNV9_OCC_OPAL_RUNTIME_DATA:
+        return 1;
+    case PNV9_OCC_PSTATE_MIN:
+    case PNV9_OCC_PSTATE_ID_TWO:
+        return 2;
+
+    /* 3000 khz frequency for 0, 1, and 2 pstates */
+    case PNV9_OCC_PSTATE_ZERO_FREQUENCY:
+    case PNV9_OCC_PSTATE_ONE_FREQUENCY:
+    case PNV9_OCC_PSTATE_TWO_FREQUENCY:
+        return 3000;
+    case PNV9_OCC_PSTATE_MAJOR_VERSION:
+        return 0x90;
+    case PNV9_CHIP_HOMER_BASE:
+    case PNV9_OCC_PSTATE_DATA_AREA:
+    case PNV9_CHIP_HOMER_IMAGE_POINTER:
+        return 0x1000000000000000;
+    }
+    /* pstate table core max array */
+    if (core_max_array(homer, addr)) {
+        return 1;
+    }
+    return 0;
+}
+
+static void pnv_power9_homer_write(void *opaque, hwaddr addr,
+                                   uint64_t val, unsigned size)
+{
+    /* callback function defined to homer write */
+    return;
+}
+
+static const MemoryRegionOps pnv_power9_homer_ops = {
+    .read = pnv_power9_homer_read,
+    .write = pnv_power9_homer_write,
+    .valid.min_access_size = 1,
+    .valid.max_access_size = 8,
+    .impl.min_access_size = 1,
+    .impl.max_access_size = 8,
+    .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static uint64_t pnv_homer_power9_pba_read(void *opaque, hwaddr addr,
+                                          unsigned size)
+{
+    PnvHomer *homer = PNV_HOMER(opaque);
+    PnvChip *chip = homer->chip;
+    uint32_t reg = addr >> 3;
+    uint64_t val = 0;
+
+    switch (reg) {
+    case PBA_BAR0:
+        val = PNV9_HOMER_BASE(chip);
+        break;
+    case PBA_BARMASK0: /* P9 homer region mask */
+        val = (PNV9_HOMER_SIZE - 1) & 0x300000;
+        break;
+    case PBA_BAR2: /* P9 occ common area */
+        val = PNV9_OCC_COMMON_AREA_BASE;
+        break;
+    case PBA_BARMASK2: /* P9 occ common area size */
+        val = (PNV9_OCC_COMMON_AREA_SIZE - 1) & 0x700000;
+        break;
+    default:
+        qemu_log_mask(LOG_UNIMP, "PBA: read to unimplemented register: Ox%"
+                      HWADDR_PRIx "\n", addr >> 3);
+    }
+    return val;
+}
+
+static void pnv_homer_power9_pba_write(void *opaque, hwaddr addr,
+                                         uint64_t val, unsigned size)
+{
+    qemu_log_mask(LOG_UNIMP, "PBA: write to unimplemented register: Ox%"
+                  HWADDR_PRIx "\n", addr >> 3);
+}
+
+static const MemoryRegionOps pnv_homer_power9_pba_ops = {
+    .read = pnv_homer_power9_pba_read,
+    .write = pnv_homer_power9_pba_write,
+    .valid.min_access_size = 8,
+    .valid.max_access_size = 8,
+    .impl.min_access_size = 8,
+    .impl.max_access_size = 8,
+    .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static void pnv_homer_power9_class_init(ObjectClass *klass, void *data)
+{
+    PnvHomerClass *homer = PNV_HOMER_CLASS(klass);
+
+    homer->pba_size = PNV9_XSCOM_PBA_SIZE;
+    homer->pba_ops = &pnv_homer_power9_pba_ops;
+    homer->homer_size = PNV9_HOMER_SIZE;
+    homer->homer_ops = &pnv_power9_homer_ops;
+    homer->core_max_base = PNV9_CORE_MAX_BASE;
+}
+
+static const TypeInfo pnv_homer_power9_type_info = {
+    .name          = TYPE_PNV9_HOMER,
+    .parent        = TYPE_PNV_HOMER,
+    .instance_size = sizeof(PnvHomer),
+    .class_init    = pnv_homer_power9_class_init,
+};
+
+static void pnv_homer_realize(DeviceState *dev, Error **errp)
+{
+    PnvHomer *homer = PNV_HOMER(dev);
+    PnvHomerClass *hmrc = PNV_HOMER_GET_CLASS(homer);
+
+    assert(homer->chip);
+
+    pnv_xscom_region_init(&homer->pba_regs, OBJECT(dev), hmrc->pba_ops,
+                          homer, "xscom-pba", hmrc->pba_size);
+
+    /* homer region */
+    memory_region_init_io(&homer->regs, OBJECT(dev),
+                          hmrc->homer_ops, homer, "homer-main-memory",
+                          hmrc->homer_size);
+}
+
+static Property pnv_homer_properties[] = {
+    DEFINE_PROP_LINK("chip", PnvHomer, chip, TYPE_PNV_CHIP, PnvChip *),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void pnv_homer_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->realize = pnv_homer_realize;
+    dc->desc = "PowerNV HOMER Memory";
+    device_class_set_props(dc, pnv_homer_properties);
+    dc->user_creatable = false;
+}
+
+static const TypeInfo pnv_homer_type_info = {
+    .name          = TYPE_PNV_HOMER,
+    .parent        = TYPE_DEVICE,
+    .instance_size = sizeof(PnvHomer),
+    .class_init    = pnv_homer_class_init,
+    .class_size    = sizeof(PnvHomerClass),
+    .abstract      = true,
+};
+
+static void pnv_homer_register_types(void)
+{
+    type_register_static(&pnv_homer_type_info);
+    type_register_static(&pnv_homer_power8_type_info);
+    type_register_static(&pnv_homer_power9_type_info);
+}
+
+type_init(pnv_homer_register_types);
diff --git a/hw/ppc/pnv_lpc.c b/hw/ppc/pnv_lpc.c
new file mode 100644
index 000000000..bcbca3db9
--- /dev/null
+++ b/hw/ppc/pnv_lpc.c
@@ -0,0 +1,853 @@
+/*
+ * QEMU PowerPC PowerNV LPC controller
+ *
+ * Copyright (c) 2016, IBM Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "target/ppc/cpu.h"
+#include "qapi/error.h"
+#include "qemu/log.h"
+#include "qemu/module.h"
+#include "hw/irq.h"
+#include "hw/isa/isa.h"
+#include "hw/qdev-properties.h"
+#include "hw/ppc/pnv.h"
+#include "hw/ppc/pnv_lpc.h"
+#include "hw/ppc/pnv_xscom.h"
+#include "hw/ppc/fdt.h"
+
+#include <libfdt.h>
+
+enum {
+    ECCB_CTL    = 0,
+    ECCB_RESET  = 1,
+    ECCB_STAT   = 2,
+    ECCB_DATA   = 3,
+};
+
+/* OPB Master LS registers */
+#define OPB_MASTER_LS_ROUTE0    0x8
+#define OPB_MASTER_LS_ROUTE1    0xC
+#define OPB_MASTER_LS_IRQ_STAT  0x50
+#define   OPB_MASTER_IRQ_LPC            0x00000800
+#define OPB_MASTER_LS_IRQ_MASK  0x54
+#define OPB_MASTER_LS_IRQ_POL   0x58
+#define OPB_MASTER_LS_IRQ_INPUT 0x5c
+
+/* LPC HC registers */
+#define LPC_HC_FW_SEG_IDSEL     0x24
+#define LPC_HC_FW_RD_ACC_SIZE   0x28
+#define   LPC_HC_FW_RD_1B               0x00000000
+#define   LPC_HC_FW_RD_2B               0x01000000
+#define   LPC_HC_FW_RD_4B               0x02000000
+#define   LPC_HC_FW_RD_16B              0x04000000
+#define   LPC_HC_FW_RD_128B             0x07000000
+#define LPC_HC_IRQSER_CTRL      0x30
+#define   LPC_HC_IRQSER_EN              0x80000000
+#define   LPC_HC_IRQSER_QMODE           0x40000000
+#define   LPC_HC_IRQSER_START_MASK      0x03000000
+#define   LPC_HC_IRQSER_START_4CLK      0x00000000
+#define   LPC_HC_IRQSER_START_6CLK      0x01000000
+#define   LPC_HC_IRQSER_START_8CLK      0x02000000
+#define LPC_HC_IRQMASK          0x34    /* same bit defs as LPC_HC_IRQSTAT */
+#define LPC_HC_IRQSTAT          0x38
+#define   LPC_HC_IRQ_SERIRQ0            0x80000000 /* all bits down to ... */
+#define   LPC_HC_IRQ_SERIRQ16           0x00008000 /* IRQ16=IOCHK#, IRQ2=SMI# */
+#define   LPC_HC_IRQ_SERIRQ_ALL         0xffff8000
+#define   LPC_HC_IRQ_LRESET             0x00000400
+#define   LPC_HC_IRQ_SYNC_ABNORM_ERR    0x00000080
+#define   LPC_HC_IRQ_SYNC_NORESP_ERR    0x00000040
+#define   LPC_HC_IRQ_SYNC_NORM_ERR      0x00000020
+#define   LPC_HC_IRQ_SYNC_TIMEOUT_ERR   0x00000010
+#define   LPC_HC_IRQ_SYNC_TARG_TAR_ERR  0x00000008
+#define   LPC_HC_IRQ_SYNC_BM_TAR_ERR    0x00000004
+#define   LPC_HC_IRQ_SYNC_BM0_REQ       0x00000002
+#define   LPC_HC_IRQ_SYNC_BM1_REQ       0x00000001
+#define LPC_HC_ERROR_ADDRESS    0x40
+
+#define LPC_OPB_SIZE            0x100000000ull
+
+#define ISA_IO_SIZE             0x00010000
+#define ISA_MEM_SIZE            0x10000000
+#define ISA_FW_SIZE             0x10000000
+#define LPC_IO_OPB_ADDR         0xd0010000
+#define LPC_IO_OPB_SIZE         0x00010000
+#define LPC_MEM_OPB_ADDR        0xe0000000
+#define LPC_MEM_OPB_SIZE        0x10000000
+#define LPC_FW_OPB_ADDR         0xf0000000
+#define LPC_FW_OPB_SIZE         0x10000000
+
+#define LPC_OPB_REGS_OPB_ADDR   0xc0010000
+#define LPC_OPB_REGS_OPB_SIZE   0x00000060
+#define LPC_OPB_REGS_OPBA_ADDR  0xc0011000
+#define LPC_OPB_REGS_OPBA_SIZE  0x00000008
+#define LPC_HC_REGS_OPB_ADDR    0xc0012000
+#define LPC_HC_REGS_OPB_SIZE    0x00000100
+
+static int pnv_lpc_dt_xscom(PnvXScomInterface *dev, void *fdt, int xscom_offset)
+{
+    const char compat[] = "ibm,power8-lpc\0ibm,lpc";
+    char *name;
+    int offset;
+    uint32_t lpc_pcba = PNV_XSCOM_LPC_BASE;
+    uint32_t reg[] = {
+        cpu_to_be32(lpc_pcba),
+        cpu_to_be32(PNV_XSCOM_LPC_SIZE)
+    };
+
+    name = g_strdup_printf("isa@%x", lpc_pcba);
+    offset = fdt_add_subnode(fdt, xscom_offset, name);
+    _FDT(offset);
+    g_free(name);
+
+    _FDT((fdt_setprop(fdt, offset, "reg", reg, sizeof(reg))));
+    _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 2)));
+    _FDT((fdt_setprop_cell(fdt, offset, "#size-cells", 1)));
+    _FDT((fdt_setprop(fdt, offset, "compatible", compat, sizeof(compat))));
+    return 0;
+}
+
+/* POWER9 only */
+int pnv_dt_lpc(PnvChip *chip, void *fdt, int root_offset, uint64_t lpcm_addr,
+               uint64_t lpcm_size)
+{
+    const char compat[] = "ibm,power9-lpcm-opb\0simple-bus";
+    const char lpc_compat[] = "ibm,power9-lpc\0ibm,lpc";
+    char *name;
+    int offset, lpcm_offset;
+    uint32_t opb_ranges[8] = { 0,
+                               cpu_to_be32(lpcm_addr >> 32),
+                               cpu_to_be32((uint32_t)lpcm_addr),
+                               cpu_to_be32(lpcm_size / 2),
+                               cpu_to_be32(lpcm_size / 2),
+                               cpu_to_be32(lpcm_addr >> 32),
+                               cpu_to_be32(lpcm_size / 2),
+                               cpu_to_be32(lpcm_size / 2),
+    };
+    uint32_t opb_reg[4] = { cpu_to_be32(lpcm_addr >> 32),
+                            cpu_to_be32((uint32_t)lpcm_addr),
+                            cpu_to_be32(lpcm_size >> 32),
+                            cpu_to_be32((uint32_t)lpcm_size),
+    };
+    uint32_t lpc_ranges[12] = { 0, 0,
+                                cpu_to_be32(LPC_MEM_OPB_ADDR),
+                                cpu_to_be32(LPC_MEM_OPB_SIZE),
+                                cpu_to_be32(1), 0,
+                                cpu_to_be32(LPC_IO_OPB_ADDR),
+                                cpu_to_be32(LPC_IO_OPB_SIZE),
+                                cpu_to_be32(3), 0,
+                                cpu_to_be32(LPC_FW_OPB_ADDR),
+                                cpu_to_be32(LPC_FW_OPB_SIZE),
+    };
+    uint32_t reg[2];
+
+    /*
+     * OPB bus
+     */
+    name = g_strdup_printf("lpcm-opb@%"PRIx64, lpcm_addr);
+    lpcm_offset = fdt_add_subnode(fdt, root_offset, name);
+    _FDT(lpcm_offset);
+    g_free(name);
+
+    _FDT((fdt_setprop(fdt, lpcm_offset, "reg", opb_reg, sizeof(opb_reg))));
+    _FDT((fdt_setprop_cell(fdt, lpcm_offset, "#address-cells", 1)));
+    _FDT((fdt_setprop_cell(fdt, lpcm_offset, "#size-cells", 1)));
+    _FDT((fdt_setprop(fdt, lpcm_offset, "compatible", compat, sizeof(compat))));
+    _FDT((fdt_setprop_cell(fdt, lpcm_offset, "ibm,chip-id", chip->chip_id)));
+    _FDT((fdt_setprop(fdt, lpcm_offset, "ranges", opb_ranges,
+                      sizeof(opb_ranges))));
+
+    /*
+     * OPB Master registers
+     */
+    name = g_strdup_printf("opb-master@%x", LPC_OPB_REGS_OPB_ADDR);
+    offset = fdt_add_subnode(fdt, lpcm_offset, name);
+    _FDT(offset);
+    g_free(name);
+
+    reg[0] = cpu_to_be32(LPC_OPB_REGS_OPB_ADDR);
+    reg[1] = cpu_to_be32(LPC_OPB_REGS_OPB_SIZE);
+    _FDT((fdt_setprop(fdt, offset, "reg", reg, sizeof(reg))));
+    _FDT((fdt_setprop_string(fdt, offset, "compatible",
+                             "ibm,power9-lpcm-opb-master")));
+
+    /*
+     * OPB arbitrer registers
+     */
+    name = g_strdup_printf("opb-arbitrer@%x", LPC_OPB_REGS_OPBA_ADDR);
+    offset = fdt_add_subnode(fdt, lpcm_offset, name);
+    _FDT(offset);
+    g_free(name);
+
+    reg[0] = cpu_to_be32(LPC_OPB_REGS_OPBA_ADDR);
+    reg[1] = cpu_to_be32(LPC_OPB_REGS_OPBA_SIZE);
+    _FDT((fdt_setprop(fdt, offset, "reg", reg, sizeof(reg))));
+    _FDT((fdt_setprop_string(fdt, offset, "compatible",
+                             "ibm,power9-lpcm-opb-arbiter")));
+
+    /*
+     * LPC Host Controller registers
+     */
+    name = g_strdup_printf("lpc-controller@%x", LPC_HC_REGS_OPB_ADDR);
+    offset = fdt_add_subnode(fdt, lpcm_offset, name);
+    _FDT(offset);
+    g_free(name);
+
+    reg[0] = cpu_to_be32(LPC_HC_REGS_OPB_ADDR);
+    reg[1] = cpu_to_be32(LPC_HC_REGS_OPB_SIZE);
+    _FDT((fdt_setprop(fdt, offset, "reg", reg, sizeof(reg))));
+    _FDT((fdt_setprop_string(fdt, offset, "compatible",
+                             "ibm,power9-lpc-controller")));
+
+    name = g_strdup_printf("lpc@0");
+    offset = fdt_add_subnode(fdt, lpcm_offset, name);
+    _FDT(offset);
+    g_free(name);
+    _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 2)));
+    _FDT((fdt_setprop_cell(fdt, offset, "#size-cells", 1)));
+    _FDT((fdt_setprop(fdt, offset, "compatible", lpc_compat,
+                      sizeof(lpc_compat))));
+    _FDT((fdt_setprop(fdt, offset, "ranges", lpc_ranges,
+                      sizeof(lpc_ranges))));
+
+    return 0;
+}
+
+/*
+ * These read/write handlers of the OPB address space should be common
+ * with the P9 LPC Controller which uses direct MMIOs.
+ *
+ * TODO: rework to use address_space_stq() and address_space_ldq()
+ * instead.
+ */
+static bool opb_read(PnvLpcController *lpc, uint32_t addr, uint8_t *data,
+                     int sz)
+{
+    /* XXX Handle access size limits and FW read caching here */
+    return !address_space_read(&lpc->opb_as, addr, MEMTXATTRS_UNSPECIFIED,
+                               data, sz);
+}
+
+static bool opb_write(PnvLpcController *lpc, uint32_t addr, uint8_t *data,
+                      int sz)
+{
+    /* XXX Handle access size limits here */
+    return !address_space_write(&lpc->opb_as, addr, MEMTXATTRS_UNSPECIFIED,
+                                data, sz);
+}
+
+#define ECCB_CTL_READ           PPC_BIT(15)
+#define ECCB_CTL_SZ_LSH         (63 - 7)
+#define ECCB_CTL_SZ_MASK        PPC_BITMASK(4, 7)
+#define ECCB_CTL_ADDR_MASK      PPC_BITMASK(32, 63)
+
+#define ECCB_STAT_OP_DONE       PPC_BIT(52)
+#define ECCB_STAT_OP_ERR        PPC_BIT(52)
+#define ECCB_STAT_RD_DATA_LSH   (63 - 37)
+#define ECCB_STAT_RD_DATA_MASK  (0xffffffff << ECCB_STAT_RD_DATA_LSH)
+
+static void pnv_lpc_do_eccb(PnvLpcController *lpc, uint64_t cmd)
+{
+    /* XXX Check for magic bits at the top, addr size etc... */
+    unsigned int sz = (cmd & ECCB_CTL_SZ_MASK) >> ECCB_CTL_SZ_LSH;
+    uint32_t opb_addr = cmd & ECCB_CTL_ADDR_MASK;
+    uint8_t data[8];
+    bool success;
+
+    if (sz > sizeof(data)) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+            "ECCB: invalid operation at @0x%08x size %d\n", opb_addr, sz);
+        return;
+    }
+
+    if (cmd & ECCB_CTL_READ) {
+        success = opb_read(lpc, opb_addr, data, sz);
+        if (success) {
+            lpc->eccb_stat_reg = ECCB_STAT_OP_DONE |
+                    (((uint64_t)data[0]) << 24 |
+                     ((uint64_t)data[1]) << 16 |
+                     ((uint64_t)data[2]) <<  8 |
+                     ((uint64_t)data[3])) << ECCB_STAT_RD_DATA_LSH;
+        } else {
+            lpc->eccb_stat_reg = ECCB_STAT_OP_DONE |
+                    (0xffffffffull << ECCB_STAT_RD_DATA_LSH);
+        }
+    } else {
+        data[0] = lpc->eccb_data_reg >> 24;
+        data[1] = lpc->eccb_data_reg >> 16;
+        data[2] = lpc->eccb_data_reg >>  8;
+        data[3] = lpc->eccb_data_reg;
+
+        success = opb_write(lpc, opb_addr, data, sz);
+        lpc->eccb_stat_reg = ECCB_STAT_OP_DONE;
+    }
+    /* XXX Which error bit (if any) to signal OPB error ? */
+}
+
+static uint64_t pnv_lpc_xscom_read(void *opaque, hwaddr addr, unsigned size)
+{
+    PnvLpcController *lpc = PNV_LPC(opaque);
+    uint32_t offset = addr >> 3;
+    uint64_t val = 0;
+
+    switch (offset & 3) {
+    case ECCB_CTL:
+    case ECCB_RESET:
+        val = 0;
+        break;
+    case ECCB_STAT:
+        val = lpc->eccb_stat_reg;
+        lpc->eccb_stat_reg = 0;
+        break;
+    case ECCB_DATA:
+        val = ((uint64_t)lpc->eccb_data_reg) << 32;
+        break;
+    }
+    return val;
+}
+
+static void pnv_lpc_xscom_write(void *opaque, hwaddr addr,
+                                uint64_t val, unsigned size)
+{
+    PnvLpcController *lpc = PNV_LPC(opaque);
+    uint32_t offset = addr >> 3;
+
+    switch (offset & 3) {
+    case ECCB_CTL:
+        pnv_lpc_do_eccb(lpc, val);
+        break;
+    case ECCB_RESET:
+        /*  XXXX  */
+        break;
+    case ECCB_STAT:
+        break;
+    case ECCB_DATA:
+        lpc->eccb_data_reg = val >> 32;
+        break;
+    }
+}
+
+static const MemoryRegionOps pnv_lpc_xscom_ops = {
+    .read = pnv_lpc_xscom_read,
+    .write = pnv_lpc_xscom_write,
+    .valid.min_access_size = 8,
+    .valid.max_access_size = 8,
+    .impl.min_access_size = 8,
+    .impl.max_access_size = 8,
+    .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static uint64_t pnv_lpc_mmio_read(void *opaque, hwaddr addr, unsigned size)
+{
+    PnvLpcController *lpc = PNV_LPC(opaque);
+    uint64_t val = 0;
+    uint32_t opb_addr = addr & ECCB_CTL_ADDR_MASK;
+    MemTxResult result;
+
+    switch (size) {
+    case 4:
+        val = address_space_ldl(&lpc->opb_as, opb_addr, MEMTXATTRS_UNSPECIFIED,
+                                &result);
+        break;
+    case 1:
+        val = address_space_ldub(&lpc->opb_as, opb_addr, MEMTXATTRS_UNSPECIFIED,
+                                 &result);
+        break;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "OPB read failed at @0x%"
+                      HWADDR_PRIx " invalid size %d\n", addr, size);
+        return 0;
+    }
+
+    if (result != MEMTX_OK) {
+        qemu_log_mask(LOG_GUEST_ERROR, "OPB read failed at @0x%"
+                      HWADDR_PRIx "\n", addr);
+    }
+
+    return val;
+}
+
+static void pnv_lpc_mmio_write(void *opaque, hwaddr addr,
+                                uint64_t val, unsigned size)
+{
+    PnvLpcController *lpc = PNV_LPC(opaque);
+    uint32_t opb_addr = addr & ECCB_CTL_ADDR_MASK;
+    MemTxResult result;
+
+    switch (size) {
+    case 4:
+        address_space_stl(&lpc->opb_as, opb_addr, val, MEMTXATTRS_UNSPECIFIED,
+                          &result);
+         break;
+    case 1:
+        address_space_stb(&lpc->opb_as, opb_addr, val, MEMTXATTRS_UNSPECIFIED,
+                          &result);
+        break;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "OPB write failed at @0x%"
+                      HWADDR_PRIx " invalid size %d\n", addr, size);
+        return;
+    }
+
+    if (result != MEMTX_OK) {
+        qemu_log_mask(LOG_GUEST_ERROR, "OPB write failed at @0x%"
+                      HWADDR_PRIx "\n", addr);
+    }
+}
+
+static const MemoryRegionOps pnv_lpc_mmio_ops = {
+    .read = pnv_lpc_mmio_read,
+    .write = pnv_lpc_mmio_write,
+    .impl = {
+        .min_access_size = 1,
+        .max_access_size = 4,
+    },
+    .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static void pnv_lpc_eval_irqs(PnvLpcController *lpc)
+{
+    bool lpc_to_opb_irq = false;
+    PnvLpcClass *plc = PNV_LPC_GET_CLASS(lpc);
+
+    /* Update LPC controller to OPB line */
+    if (lpc->lpc_hc_irqser_ctrl & LPC_HC_IRQSER_EN) {
+        uint32_t irqs;
+
+        irqs = lpc->lpc_hc_irqstat & lpc->lpc_hc_irqmask;
+        lpc_to_opb_irq = (irqs != 0);
+    }
+
+    /* We don't honor the polarity register, it's pointless and unused
+     * anyway
+     */
+    if (lpc_to_opb_irq) {
+        lpc->opb_irq_input |= OPB_MASTER_IRQ_LPC;
+    } else {
+        lpc->opb_irq_input &= ~OPB_MASTER_IRQ_LPC;
+    }
+
+    /* Update OPB internal latch */
+    lpc->opb_irq_stat |= lpc->opb_irq_input & lpc->opb_irq_mask;
+
+    /* Reflect the interrupt */
+    pnv_psi_irq_set(lpc->psi, plc->psi_irq, lpc->opb_irq_stat != 0);
+}
+
+static uint64_t lpc_hc_read(void *opaque, hwaddr addr, unsigned size)
+{
+    PnvLpcController *lpc = opaque;
+    uint64_t val = 0xfffffffffffffffful;
+
+    switch (addr) {
+    case LPC_HC_FW_SEG_IDSEL:
+        val =  lpc->lpc_hc_fw_seg_idsel;
+        break;
+    case LPC_HC_FW_RD_ACC_SIZE:
+        val =  lpc->lpc_hc_fw_rd_acc_size;
+        break;
+    case LPC_HC_IRQSER_CTRL:
+        val =  lpc->lpc_hc_irqser_ctrl;
+        break;
+    case LPC_HC_IRQMASK:
+        val =  lpc->lpc_hc_irqmask;
+        break;
+    case LPC_HC_IRQSTAT:
+        val =  lpc->lpc_hc_irqstat;
+        break;
+    case LPC_HC_ERROR_ADDRESS:
+        val =  lpc->lpc_hc_error_addr;
+        break;
+    default:
+        qemu_log_mask(LOG_UNIMP, "LPC HC Unimplemented register: 0x%"
+                      HWADDR_PRIx "\n", addr);
+    }
+    return val;
+}
+
+static void lpc_hc_write(void *opaque, hwaddr addr, uint64_t val,
+                         unsigned size)
+{
+    PnvLpcController *lpc = opaque;
+
+    /* XXX Filter out reserved bits */
+
+    switch (addr) {
+    case LPC_HC_FW_SEG_IDSEL:
+        /* XXX Actually figure out how that works as this impact
+         * memory regions/aliases
+         */
+        lpc->lpc_hc_fw_seg_idsel = val;
+        break;
+    case LPC_HC_FW_RD_ACC_SIZE:
+        lpc->lpc_hc_fw_rd_acc_size = val;
+        break;
+    case LPC_HC_IRQSER_CTRL:
+        lpc->lpc_hc_irqser_ctrl = val;
+        pnv_lpc_eval_irqs(lpc);
+        break;
+    case LPC_HC_IRQMASK:
+        lpc->lpc_hc_irqmask = val;
+        pnv_lpc_eval_irqs(lpc);
+        break;
+    case LPC_HC_IRQSTAT:
+        lpc->lpc_hc_irqstat &= ~val;
+        pnv_lpc_eval_irqs(lpc);
+        break;
+    case LPC_HC_ERROR_ADDRESS:
+        break;
+    default:
+        qemu_log_mask(LOG_UNIMP, "LPC HC Unimplemented register: 0x%"
+                      HWADDR_PRIx "\n", addr);
+    }
+}
+
+static const MemoryRegionOps lpc_hc_ops = {
+    .read = lpc_hc_read,
+    .write = lpc_hc_write,
+    .endianness = DEVICE_BIG_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
+static uint64_t opb_master_read(void *opaque, hwaddr addr, unsigned size)
+{
+    PnvLpcController *lpc = opaque;
+    uint64_t val = 0xfffffffffffffffful;
+
+    switch (addr) {
+    case OPB_MASTER_LS_ROUTE0: /* TODO */
+        val = lpc->opb_irq_route0;
+        break;
+    case OPB_MASTER_LS_ROUTE1: /* TODO */
+        val = lpc->opb_irq_route1;
+        break;
+    case OPB_MASTER_LS_IRQ_STAT:
+        val = lpc->opb_irq_stat;
+        break;
+    case OPB_MASTER_LS_IRQ_MASK:
+        val = lpc->opb_irq_mask;
+        break;
+    case OPB_MASTER_LS_IRQ_POL:
+        val = lpc->opb_irq_pol;
+        break;
+    case OPB_MASTER_LS_IRQ_INPUT:
+        val = lpc->opb_irq_input;
+        break;
+    default:
+        qemu_log_mask(LOG_UNIMP, "OPBM: read on unimplemented register: 0x%"
+                      HWADDR_PRIx "\n", addr);
+    }
+
+    return val;
+}
+
+static void opb_master_write(void *opaque, hwaddr addr,
+                             uint64_t val, unsigned size)
+{
+    PnvLpcController *lpc = opaque;
+
+    switch (addr) {
+    case OPB_MASTER_LS_ROUTE0: /* TODO */
+        lpc->opb_irq_route0 = val;
+        break;
+    case OPB_MASTER_LS_ROUTE1: /* TODO */
+        lpc->opb_irq_route1 = val;
+        break;
+    case OPB_MASTER_LS_IRQ_STAT:
+        lpc->opb_irq_stat &= ~val;
+        pnv_lpc_eval_irqs(lpc);
+        break;
+    case OPB_MASTER_LS_IRQ_MASK:
+        lpc->opb_irq_mask = val;
+        pnv_lpc_eval_irqs(lpc);
+        break;
+    case OPB_MASTER_LS_IRQ_POL:
+        lpc->opb_irq_pol = val;
+        pnv_lpc_eval_irqs(lpc);
+        break;
+    case OPB_MASTER_LS_IRQ_INPUT:
+        /* Read only */
+        break;
+    default:
+        qemu_log_mask(LOG_UNIMP, "OPBM: write on unimplemented register: 0x%"
+                      HWADDR_PRIx " val=0x%08"PRIx64"\n", addr, val);
+    }
+}
+
+static const MemoryRegionOps opb_master_ops = {
+    .read = opb_master_read,
+    .write = opb_master_write,
+    .endianness = DEVICE_BIG_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
+static void pnv_lpc_power8_realize(DeviceState *dev, Error **errp)
+{
+    PnvLpcController *lpc = PNV_LPC(dev);
+    PnvLpcClass *plc = PNV_LPC_GET_CLASS(dev);
+    Error *local_err = NULL;
+
+    plc->parent_realize(dev, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    /* P8 uses a XSCOM region for LPC registers */
+    pnv_xscom_region_init(&lpc->xscom_regs, OBJECT(lpc),
+                          &pnv_lpc_xscom_ops, lpc, "xscom-lpc",
+                          PNV_XSCOM_LPC_SIZE);
+}
+
+static void pnv_lpc_power8_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PnvXScomInterfaceClass *xdc = PNV_XSCOM_INTERFACE_CLASS(klass);
+    PnvLpcClass *plc = PNV_LPC_CLASS(klass);
+
+    dc->desc = "PowerNV LPC Controller POWER8";
+
+    xdc->dt_xscom = pnv_lpc_dt_xscom;
+
+    plc->psi_irq = PSIHB_IRQ_LPC_I2C;
+
+    device_class_set_parent_realize(dc, pnv_lpc_power8_realize,
+                                    &plc->parent_realize);
+}
+
+static const TypeInfo pnv_lpc_power8_info = {
+    .name          = TYPE_PNV8_LPC,
+    .parent        = TYPE_PNV_LPC,
+    .class_init    = pnv_lpc_power8_class_init,
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_PNV_XSCOM_INTERFACE },
+        { }
+    }
+};
+
+static void pnv_lpc_power9_realize(DeviceState *dev, Error **errp)
+{
+    PnvLpcController *lpc = PNV_LPC(dev);
+    PnvLpcClass *plc = PNV_LPC_GET_CLASS(dev);
+    Error *local_err = NULL;
+
+    plc->parent_realize(dev, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    /* P9 uses a MMIO region */
+    memory_region_init_io(&lpc->xscom_regs, OBJECT(lpc), &pnv_lpc_mmio_ops,
+                          lpc, "lpcm", PNV9_LPCM_SIZE);
+}
+
+static void pnv_lpc_power9_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PnvLpcClass *plc = PNV_LPC_CLASS(klass);
+
+    dc->desc = "PowerNV LPC Controller POWER9";
+
+    plc->psi_irq = PSIHB9_IRQ_LPCHC;
+
+    device_class_set_parent_realize(dc, pnv_lpc_power9_realize,
+                                    &plc->parent_realize);
+}
+
+static const TypeInfo pnv_lpc_power9_info = {
+    .name          = TYPE_PNV9_LPC,
+    .parent        = TYPE_PNV_LPC,
+    .class_init    = pnv_lpc_power9_class_init,
+};
+
+static void pnv_lpc_power10_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->desc = "PowerNV LPC Controller POWER10";
+}
+
+static const TypeInfo pnv_lpc_power10_info = {
+    .name          = TYPE_PNV10_LPC,
+    .parent        = TYPE_PNV9_LPC,
+    .class_init    = pnv_lpc_power10_class_init,
+};
+
+static void pnv_lpc_realize(DeviceState *dev, Error **errp)
+{
+    PnvLpcController *lpc = PNV_LPC(dev);
+
+    assert(lpc->psi);
+
+    /* Reg inits */
+    lpc->lpc_hc_fw_rd_acc_size = LPC_HC_FW_RD_4B;
+
+    /* Create address space and backing MR for the OPB bus */
+    memory_region_init(&lpc->opb_mr, OBJECT(dev), "lpc-opb", 0x100000000ull);
+    address_space_init(&lpc->opb_as, &lpc->opb_mr, "lpc-opb");
+
+    /* Create ISA IO and Mem space regions which are the root of
+     * the ISA bus (ie, ISA address spaces). We don't create a
+     * separate one for FW which we alias to memory.
+     */
+    memory_region_init(&lpc->isa_io, OBJECT(dev), "isa-io", ISA_IO_SIZE);
+    memory_region_init(&lpc->isa_mem, OBJECT(dev), "isa-mem", ISA_MEM_SIZE);
+    memory_region_init(&lpc->isa_fw, OBJECT(dev),  "isa-fw", ISA_FW_SIZE);
+
+    /* Create windows from the OPB space to the ISA space */
+    memory_region_init_alias(&lpc->opb_isa_io, OBJECT(dev), "lpc-isa-io",
+                             &lpc->isa_io, 0, LPC_IO_OPB_SIZE);
+    memory_region_add_subregion(&lpc->opb_mr, LPC_IO_OPB_ADDR,
+                                &lpc->opb_isa_io);
+    memory_region_init_alias(&lpc->opb_isa_mem, OBJECT(dev), "lpc-isa-mem",
+                             &lpc->isa_mem, 0, LPC_MEM_OPB_SIZE);
+    memory_region_add_subregion(&lpc->opb_mr, LPC_MEM_OPB_ADDR,
+                                &lpc->opb_isa_mem);
+    memory_region_init_alias(&lpc->opb_isa_fw, OBJECT(dev), "lpc-isa-fw",
+                             &lpc->isa_fw, 0, LPC_FW_OPB_SIZE);
+    memory_region_add_subregion(&lpc->opb_mr, LPC_FW_OPB_ADDR,
+                                &lpc->opb_isa_fw);
+
+    /* Create MMIO regions for LPC HC and OPB registers */
+    memory_region_init_io(&lpc->opb_master_regs, OBJECT(dev), &opb_master_ops,
+                          lpc, "lpc-opb-master", LPC_OPB_REGS_OPB_SIZE);
+    memory_region_add_subregion(&lpc->opb_mr, LPC_OPB_REGS_OPB_ADDR,
+                                &lpc->opb_master_regs);
+    memory_region_init_io(&lpc->lpc_hc_regs, OBJECT(dev), &lpc_hc_ops, lpc,
+                          "lpc-hc", LPC_HC_REGS_OPB_SIZE);
+    memory_region_add_subregion(&lpc->opb_mr, LPC_HC_REGS_OPB_ADDR,
+                                &lpc->lpc_hc_regs);
+}
+
+static Property pnv_lpc_properties[] = {
+    DEFINE_PROP_LINK("psi", PnvLpcController, psi, TYPE_PNV_PSI, PnvPsi *),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void pnv_lpc_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->realize = pnv_lpc_realize;
+    dc->desc = "PowerNV LPC Controller";
+    device_class_set_props(dc, pnv_lpc_properties);
+    dc->user_creatable = false;
+}
+
+static const TypeInfo pnv_lpc_info = {
+    .name          = TYPE_PNV_LPC,
+    .parent        = TYPE_DEVICE,
+    .instance_size = sizeof(PnvLpcController),
+    .class_init    = pnv_lpc_class_init,
+    .class_size    = sizeof(PnvLpcClass),
+    .abstract      = true,
+};
+
+static void pnv_lpc_register_types(void)
+{
+    type_register_static(&pnv_lpc_info);
+    type_register_static(&pnv_lpc_power8_info);
+    type_register_static(&pnv_lpc_power9_info);
+    type_register_static(&pnv_lpc_power10_info);
+}
+
+type_init(pnv_lpc_register_types)
+
+/* If we don't use the built-in LPC interrupt deserializer, we need
+ * to provide a set of qirqs for the ISA bus or things will go bad.
+ *
+ * Most machines using pre-Naples chips (without said deserializer)
+ * have a CPLD that will collect the SerIRQ and shoot them as a
+ * single level interrupt to the P8 chip. So let's setup a hook
+ * for doing just that.
+ */
+static void pnv_lpc_isa_irq_handler_cpld(void *opaque, int n, int level)
+{
+    PnvMachineState *pnv = PNV_MACHINE(qdev_get_machine());
+    uint32_t old_state = pnv->cpld_irqstate;
+    PnvLpcController *lpc = PNV_LPC(opaque);
+
+    if (level) {
+        pnv->cpld_irqstate |= 1u << n;
+    } else {
+        pnv->cpld_irqstate &= ~(1u << n);
+    }
+
+    if (pnv->cpld_irqstate != old_state) {
+        pnv_psi_irq_set(lpc->psi, PSIHB_IRQ_EXTERNAL, pnv->cpld_irqstate != 0);
+    }
+}
+
+static void pnv_lpc_isa_irq_handler(void *opaque, int n, int level)
+{
+    PnvLpcController *lpc = PNV_LPC(opaque);
+
+    /* The Naples HW latches the 1 levels, clearing is done by SW */
+    if (level) {
+        lpc->lpc_hc_irqstat |= LPC_HC_IRQ_SERIRQ0 >> n;
+        pnv_lpc_eval_irqs(lpc);
+    }
+}
+
+ISABus *pnv_lpc_isa_create(PnvLpcController *lpc, bool use_cpld, Error **errp)
+{
+    Error *local_err = NULL;
+    ISABus *isa_bus;
+    qemu_irq *irqs;
+    qemu_irq_handler handler;
+
+    /* let isa_bus_new() create its own bridge on SysBus otherwise
+     * devices specified on the command line won't find the bus and
+     * will fail to create.
+     */
+    isa_bus = isa_bus_new(NULL, &lpc->isa_mem, &lpc->isa_io, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return NULL;
+    }
+
+    /* Not all variants have a working serial irq decoder. If not,
+     * handling of LPC interrupts becomes a platform issue (some
+     * platforms have a CPLD to do it).
+     */
+    if (use_cpld) {
+        handler = pnv_lpc_isa_irq_handler_cpld;
+    } else {
+        handler = pnv_lpc_isa_irq_handler;
+    }
+
+    irqs = qemu_allocate_irqs(handler, lpc, ISA_NUM_IRQS);
+
+    isa_bus_irqs(isa_bus, irqs);
+
+    return isa_bus;
+}
diff --git a/hw/ppc/pnv_occ.c b/hw/ppc/pnv_occ.c
new file mode 100644
index 000000000..5a716c256
--- /dev/null
+++ b/hw/ppc/pnv_occ.c
@@ -0,0 +1,302 @@
+/*
+ * QEMU PowerPC PowerNV Emulation of a few OCC related registers
+ *
+ * Copyright (c) 2015-2017, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "target/ppc/cpu.h"
+#include "qapi/error.h"
+#include "qemu/log.h"
+#include "qemu/module.h"
+#include "hw/qdev-properties.h"
+#include "hw/ppc/pnv.h"
+#include "hw/ppc/pnv_xscom.h"
+#include "hw/ppc/pnv_occ.h"
+
+#define OCB_OCI_OCCMISC         0x4020
+#define OCB_OCI_OCCMISC_AND     0x4021
+#define OCB_OCI_OCCMISC_OR      0x4022
+
+/* OCC sensors */
+#define OCC_SENSOR_DATA_BLOCK_OFFSET          0x580000
+#define OCC_SENSOR_DATA_VALID                 0x580001
+#define OCC_SENSOR_DATA_VERSION               0x580002
+#define OCC_SENSOR_DATA_READING_VERSION       0x580004
+#define OCC_SENSOR_DATA_NR_SENSORS            0x580008
+#define OCC_SENSOR_DATA_NAMES_OFFSET          0x580010
+#define OCC_SENSOR_DATA_READING_PING_OFFSET   0x580014
+#define OCC_SENSOR_DATA_READING_PONG_OFFSET   0x58000c
+#define OCC_SENSOR_DATA_NAME_LENGTH           0x58000d
+#define OCC_SENSOR_NAME_STRUCTURE_TYPE        0x580023
+#define OCC_SENSOR_LOC_CORE                   0x580022
+#define OCC_SENSOR_LOC_GPU                    0x580020
+#define OCC_SENSOR_TYPE_POWER                 0x580003
+#define OCC_SENSOR_NAME                       0x580005
+#define HWMON_SENSORS_MASK                    0x58001e
+#define SLW_IMAGE_BASE                        0x0
+
+static void pnv_occ_set_misc(PnvOCC *occ, uint64_t val)
+{
+    bool irq_state;
+    PnvOCCClass *poc = PNV_OCC_GET_CLASS(occ);
+
+    val &= 0xffff000000000000ull;
+
+    occ->occmisc = val;
+    irq_state = !!(val >> 63);
+    pnv_psi_irq_set(occ->psi, poc->psi_irq, irq_state);
+}
+
+static uint64_t pnv_occ_power8_xscom_read(void *opaque, hwaddr addr,
+                                          unsigned size)
+{
+    PnvOCC *occ = PNV_OCC(opaque);
+    uint32_t offset = addr >> 3;
+    uint64_t val = 0;
+
+    switch (offset) {
+    case OCB_OCI_OCCMISC:
+        val = occ->occmisc;
+        break;
+    default:
+        qemu_log_mask(LOG_UNIMP, "OCC Unimplemented register: Ox%"
+                      HWADDR_PRIx "\n", addr >> 3);
+    }
+    return val;
+}
+
+static void pnv_occ_power8_xscom_write(void *opaque, hwaddr addr,
+                                       uint64_t val, unsigned size)
+{
+    PnvOCC *occ = PNV_OCC(opaque);
+    uint32_t offset = addr >> 3;
+
+    switch (offset) {
+    case OCB_OCI_OCCMISC_AND:
+        pnv_occ_set_misc(occ, occ->occmisc & val);
+        break;
+    case OCB_OCI_OCCMISC_OR:
+        pnv_occ_set_misc(occ, occ->occmisc | val);
+        break;
+    case OCB_OCI_OCCMISC:
+        pnv_occ_set_misc(occ, val);
+        break;
+    default:
+        qemu_log_mask(LOG_UNIMP, "OCC Unimplemented register: Ox%"
+                      HWADDR_PRIx "\n", addr >> 3);
+    }
+}
+
+static uint64_t pnv_occ_common_area_read(void *opaque, hwaddr addr,
+                                         unsigned width)
+{
+    switch (addr) {
+    /*
+     * occ-sensor sanity check that asserts the sensor
+     * header block
+     */
+    case OCC_SENSOR_DATA_BLOCK_OFFSET:
+    case OCC_SENSOR_DATA_VALID:
+    case OCC_SENSOR_DATA_VERSION:
+    case OCC_SENSOR_DATA_READING_VERSION:
+    case OCC_SENSOR_DATA_NR_SENSORS:
+    case OCC_SENSOR_DATA_NAMES_OFFSET:
+    case OCC_SENSOR_DATA_READING_PING_OFFSET:
+    case OCC_SENSOR_DATA_READING_PONG_OFFSET:
+    case OCC_SENSOR_NAME_STRUCTURE_TYPE:
+        return 1;
+    case OCC_SENSOR_DATA_NAME_LENGTH:
+        return 0x30;
+    case OCC_SENSOR_LOC_CORE:
+        return 0x0040;
+    case OCC_SENSOR_TYPE_POWER:
+        return 0x0080;
+    case OCC_SENSOR_NAME:
+        return 0x1000;
+    case HWMON_SENSORS_MASK:
+    case OCC_SENSOR_LOC_GPU:
+        return 0x8e00;
+    case SLW_IMAGE_BASE:
+        return 0x1000000000000000;
+    }
+    return 0;
+}
+
+static void pnv_occ_common_area_write(void *opaque, hwaddr addr,
+                                             uint64_t val, unsigned width)
+{
+    /* callback function defined to occ common area write */
+    return;
+}
+
+static const MemoryRegionOps pnv_occ_power8_xscom_ops = {
+    .read = pnv_occ_power8_xscom_read,
+    .write = pnv_occ_power8_xscom_write,
+    .valid.min_access_size = 8,
+    .valid.max_access_size = 8,
+    .impl.min_access_size = 8,
+    .impl.max_access_size = 8,
+    .endianness = DEVICE_BIG_ENDIAN,
+};
+
+const MemoryRegionOps pnv_occ_sram_ops = {
+    .read = pnv_occ_common_area_read,
+    .write = pnv_occ_common_area_write,
+    .valid.min_access_size = 1,
+    .valid.max_access_size = 8,
+    .impl.min_access_size = 1,
+    .impl.max_access_size = 8,
+    .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static void pnv_occ_power8_class_init(ObjectClass *klass, void *data)
+{
+    PnvOCCClass *poc = PNV_OCC_CLASS(klass);
+
+    poc->xscom_size = PNV_XSCOM_OCC_SIZE;
+    poc->xscom_ops = &pnv_occ_power8_xscom_ops;
+    poc->psi_irq = PSIHB_IRQ_OCC;
+}
+
+static const TypeInfo pnv_occ_power8_type_info = {
+    .name          = TYPE_PNV8_OCC,
+    .parent        = TYPE_PNV_OCC,
+    .instance_size = sizeof(PnvOCC),
+    .class_init    = pnv_occ_power8_class_init,
+};
+
+#define P9_OCB_OCI_OCCMISC              0x6080
+#define P9_OCB_OCI_OCCMISC_CLEAR        0x6081
+#define P9_OCB_OCI_OCCMISC_OR           0x6082
+
+
+static uint64_t pnv_occ_power9_xscom_read(void *opaque, hwaddr addr,
+                                          unsigned size)
+{
+    PnvOCC *occ = PNV_OCC(opaque);
+    uint32_t offset = addr >> 3;
+    uint64_t val = 0;
+
+    switch (offset) {
+    case P9_OCB_OCI_OCCMISC:
+        val = occ->occmisc;
+        break;
+    default:
+        qemu_log_mask(LOG_UNIMP, "OCC Unimplemented register: Ox%"
+                      HWADDR_PRIx "\n", addr >> 3);
+    }
+    return val;
+}
+
+static void pnv_occ_power9_xscom_write(void *opaque, hwaddr addr,
+                                       uint64_t val, unsigned size)
+{
+    PnvOCC *occ = PNV_OCC(opaque);
+    uint32_t offset = addr >> 3;
+
+    switch (offset) {
+    case P9_OCB_OCI_OCCMISC_CLEAR:
+        pnv_occ_set_misc(occ, 0);
+        break;
+    case P9_OCB_OCI_OCCMISC_OR:
+        pnv_occ_set_misc(occ, occ->occmisc | val);
+        break;
+    case P9_OCB_OCI_OCCMISC:
+        pnv_occ_set_misc(occ, val);
+       break;
+    default:
+        qemu_log_mask(LOG_UNIMP, "OCC Unimplemented register: Ox%"
+                      HWADDR_PRIx "\n", addr >> 3);
+    }
+}
+
+static const MemoryRegionOps pnv_occ_power9_xscom_ops = {
+    .read = pnv_occ_power9_xscom_read,
+    .write = pnv_occ_power9_xscom_write,
+    .valid.min_access_size = 8,
+    .valid.max_access_size = 8,
+    .impl.min_access_size = 8,
+    .impl.max_access_size = 8,
+    .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static void pnv_occ_power9_class_init(ObjectClass *klass, void *data)
+{
+    PnvOCCClass *poc = PNV_OCC_CLASS(klass);
+
+    poc->xscom_size = PNV9_XSCOM_OCC_SIZE;
+    poc->xscom_ops = &pnv_occ_power9_xscom_ops;
+    poc->psi_irq = PSIHB9_IRQ_OCC;
+}
+
+static const TypeInfo pnv_occ_power9_type_info = {
+    .name          = TYPE_PNV9_OCC,
+    .parent        = TYPE_PNV_OCC,
+    .instance_size = sizeof(PnvOCC),
+    .class_init    = pnv_occ_power9_class_init,
+};
+
+static void pnv_occ_realize(DeviceState *dev, Error **errp)
+{
+    PnvOCC *occ = PNV_OCC(dev);
+    PnvOCCClass *poc = PNV_OCC_GET_CLASS(occ);
+
+    assert(occ->psi);
+
+    occ->occmisc = 0;
+
+    /* XScom region for OCC registers */
+    pnv_xscom_region_init(&occ->xscom_regs, OBJECT(dev), poc->xscom_ops,
+                          occ, "xscom-occ", poc->xscom_size);
+
+    /* OCC common area mmio region for OCC SRAM registers */
+    memory_region_init_io(&occ->sram_regs, OBJECT(dev), &pnv_occ_sram_ops,
+                          occ, "occ-common-area",
+                          PNV_OCC_SENSOR_DATA_BLOCK_SIZE);
+}
+
+static Property pnv_occ_properties[] = {
+    DEFINE_PROP_LINK("psi", PnvOCC, psi, TYPE_PNV_PSI, PnvPsi *),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void pnv_occ_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->realize = pnv_occ_realize;
+    dc->desc = "PowerNV OCC Controller";
+    device_class_set_props(dc, pnv_occ_properties);
+    dc->user_creatable = false;
+}
+
+static const TypeInfo pnv_occ_type_info = {
+    .name          = TYPE_PNV_OCC,
+    .parent        = TYPE_DEVICE,
+    .instance_size = sizeof(PnvOCC),
+    .class_init    = pnv_occ_class_init,
+    .class_size    = sizeof(PnvOCCClass),
+    .abstract      = true,
+};
+
+static void pnv_occ_register_types(void)
+{
+    type_register_static(&pnv_occ_type_info);
+    type_register_static(&pnv_occ_power8_type_info);
+    type_register_static(&pnv_occ_power9_type_info);
+}
+
+type_init(pnv_occ_register_types);
diff --git a/hw/ppc/pnv_pnor.c b/hw/ppc/pnv_pnor.c
new file mode 100644
index 000000000..83ecccca2
--- /dev/null
+++ b/hw/ppc/pnv_pnor.c
@@ -0,0 +1,141 @@
+/*
+ * QEMU PowerNV PNOR simple model
+ *
+ * Copyright (c) 2015-2019, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "qemu/units.h"
+#include "sysemu/block-backend.h"
+#include "sysemu/blockdev.h"
+#include "hw/loader.h"
+#include "hw/ppc/pnv_pnor.h"
+#include "hw/qdev-properties.h"
+#include "hw/qdev-properties-system.h"
+
+static uint64_t pnv_pnor_read(void *opaque, hwaddr addr, unsigned size)
+{
+    PnvPnor *s = PNV_PNOR(opaque);
+    uint64_t ret = 0;
+    int i;
+
+    for (i = 0; i < size; i++) {
+        ret |= (uint64_t) s->storage[addr + i] << (8 * (size - i - 1));
+    }
+
+    return ret;
+}
+
+static void pnv_pnor_update(PnvPnor *s, int offset, int size)
+{
+    int offset_end;
+    int ret;
+
+    if (!s->blk || !blk_is_writable(s->blk)) {
+        return;
+    }
+
+    offset_end = offset + size;
+    offset = QEMU_ALIGN_DOWN(offset, BDRV_SECTOR_SIZE);
+    offset_end = QEMU_ALIGN_UP(offset_end, BDRV_SECTOR_SIZE);
+
+    ret = blk_pwrite(s->blk, offset, s->storage + offset,
+                     offset_end - offset, 0);
+    if (ret < 0) {
+        error_report("Could not update PNOR offset=0x%" PRIx32" : %s", offset,
+                     strerror(-ret));
+    }
+}
+
+static void pnv_pnor_write(void *opaque, hwaddr addr, uint64_t data,
+                           unsigned size)
+{
+    PnvPnor *s = PNV_PNOR(opaque);
+    int i;
+
+    for (i = 0; i < size; i++) {
+        s->storage[addr + i] = (data >> (8 * (size - i - 1))) & 0xFF;
+    }
+    pnv_pnor_update(s, addr, size);
+}
+
+/*
+ * TODO: Check endianness: skiboot is BIG, Aspeed AHB is LITTLE, flash
+ * is BIG.
+ */
+static const MemoryRegionOps pnv_pnor_ops = {
+    .read = pnv_pnor_read,
+    .write = pnv_pnor_write,
+    .endianness = DEVICE_BIG_ENDIAN,
+    .valid = {
+        .min_access_size = 1,
+        .max_access_size = 4,
+    },
+};
+
+static void pnv_pnor_realize(DeviceState *dev, Error **errp)
+{
+    PnvPnor *s = PNV_PNOR(dev);
+    int ret;
+
+    if (s->blk) {
+        uint64_t perm = BLK_PERM_CONSISTENT_READ |
+                        (blk_supports_write_perm(s->blk) ? BLK_PERM_WRITE : 0);
+        ret = blk_set_perm(s->blk, perm, BLK_PERM_ALL, errp);
+        if (ret < 0) {
+            return;
+        }
+
+        s->size = blk_getlength(s->blk);
+        if (s->size <= 0) {
+            error_setg(errp, "failed to get flash size");
+            return;
+        }
+
+        s->storage = blk_blockalign(s->blk, s->size);
+
+        if (blk_pread(s->blk, 0, s->storage, s->size) != s->size) {
+            error_setg(errp, "failed to read the initial flash content");
+            return;
+        }
+    } else {
+        s->storage = blk_blockalign(NULL, s->size);
+        memset(s->storage, 0xFF, s->size);
+    }
+
+    memory_region_init_io(&s->mmio, OBJECT(s), &pnv_pnor_ops, s,
+                          TYPE_PNV_PNOR, s->size);
+}
+
+static Property pnv_pnor_properties[] = {
+    DEFINE_PROP_INT64("size", PnvPnor, size, 128 * MiB),
+    DEFINE_PROP_DRIVE("drive", PnvPnor, blk),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void pnv_pnor_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->realize = pnv_pnor_realize;
+    device_class_set_props(dc, pnv_pnor_properties);
+}
+
+static const TypeInfo pnv_pnor_info = {
+    .name          = TYPE_PNV_PNOR,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(PnvPnor),
+    .class_init    = pnv_pnor_class_init,
+};
+
+static void pnv_pnor_register_types(void)
+{
+    type_register_static(&pnv_pnor_info);
+}
+
+type_init(pnv_pnor_register_types)
diff --git a/hw/ppc/pnv_psi.c b/hw/ppc/pnv_psi.c
new file mode 100644
index 000000000..cd9a2c595
--- /dev/null
+++ b/hw/ppc/pnv_psi.c
@@ -0,0 +1,967 @@
+/*
+ * QEMU PowerPC PowerNV Processor Service Interface (PSI) model
+ *
+ * Copyright (c) 2015-2017, IBM Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/irq.h"
+#include "target/ppc/cpu.h"
+#include "qemu/log.h"
+#include "qemu/module.h"
+#include "sysemu/reset.h"
+#include "qapi/error.h"
+#include "monitor/monitor.h"
+
+
+#include "hw/ppc/fdt.h"
+#include "hw/ppc/pnv.h"
+#include "hw/ppc/pnv_xscom.h"
+#include "hw/qdev-properties.h"
+#include "hw/ppc/pnv_psi.h"
+
+#include <libfdt.h>
+
+#define PSIHB_XSCOM_FIR_RW      0x00
+#define PSIHB_XSCOM_FIR_AND     0x01
+#define PSIHB_XSCOM_FIR_OR      0x02
+#define PSIHB_XSCOM_FIRMASK_RW  0x03
+#define PSIHB_XSCOM_FIRMASK_AND 0x04
+#define PSIHB_XSCOM_FIRMASK_OR  0x05
+#define PSIHB_XSCOM_FIRACT0     0x06
+#define PSIHB_XSCOM_FIRACT1     0x07
+
+/* Host Bridge Base Address Register */
+#define PSIHB_XSCOM_BAR         0x0a
+#define   PSIHB_BAR_EN                  0x0000000000000001ull
+
+/* FSP Base Address Register */
+#define PSIHB_XSCOM_FSPBAR      0x0b
+
+/* PSI Host Bridge Control/Status Register */
+#define PSIHB_XSCOM_CR          0x0e
+#define   PSIHB_CR_FSP_CMD_ENABLE       0x8000000000000000ull
+#define   PSIHB_CR_FSP_MMIO_ENABLE      0x4000000000000000ull
+#define   PSIHB_CR_FSP_IRQ_ENABLE       0x1000000000000000ull
+#define   PSIHB_CR_FSP_ERR_RSP_ENABLE   0x0800000000000000ull
+#define   PSIHB_CR_PSI_LINK_ENABLE      0x0400000000000000ull
+#define   PSIHB_CR_FSP_RESET            0x0200000000000000ull
+#define   PSIHB_CR_PSIHB_RESET          0x0100000000000000ull
+#define   PSIHB_CR_PSI_IRQ              0x0000800000000000ull
+#define   PSIHB_CR_FSP_IRQ              0x0000400000000000ull
+#define   PSIHB_CR_FSP_LINK_ACTIVE      0x0000200000000000ull
+#define   PSIHB_CR_IRQ_CMD_EXPECT       0x0000010000000000ull
+          /* and more ... */
+
+/* PSIHB Status / Error Mask Register */
+#define PSIHB_XSCOM_SEMR        0x0f
+
+/* XIVR, to signal interrupts to the CEC firmware. more XIVR below. */
+#define PSIHB_XSCOM_XIVR_FSP    0x10
+#define   PSIHB_XIVR_SERVER_SH          40
+#define   PSIHB_XIVR_SERVER_MSK         (0xffffull << PSIHB_XIVR_SERVER_SH)
+#define   PSIHB_XIVR_PRIO_SH            32
+#define   PSIHB_XIVR_PRIO_MSK           (0xffull << PSIHB_XIVR_PRIO_SH)
+#define   PSIHB_XIVR_SRC_SH             29
+#define   PSIHB_XIVR_SRC_MSK            (0x7ull << PSIHB_XIVR_SRC_SH)
+#define   PSIHB_XIVR_PENDING            0x01000000ull
+
+/* PSI Host Bridge Set Control/ Status Register */
+#define PSIHB_XSCOM_SCR         0x12
+
+/* PSI Host Bridge Clear Control/ Status Register */
+#define PSIHB_XSCOM_CCR         0x13
+
+/* DMA Upper Address Register */
+#define PSIHB_XSCOM_DMA_UPADD   0x14
+
+/* Interrupt Status */
+#define PSIHB_XSCOM_IRQ_STAT    0x15
+#define   PSIHB_IRQ_STAT_OCC            0x0000001000000000ull
+#define   PSIHB_IRQ_STAT_FSI            0x0000000800000000ull
+#define   PSIHB_IRQ_STAT_LPCI2C         0x0000000400000000ull
+#define   PSIHB_IRQ_STAT_LOCERR         0x0000000200000000ull
+#define   PSIHB_IRQ_STAT_EXT            0x0000000100000000ull
+
+/* remaining XIVR */
+#define PSIHB_XSCOM_XIVR_OCC    0x16
+#define PSIHB_XSCOM_XIVR_FSI    0x17
+#define PSIHB_XSCOM_XIVR_LPCI2C 0x18
+#define PSIHB_XSCOM_XIVR_LOCERR 0x19
+#define PSIHB_XSCOM_XIVR_EXT    0x1a
+
+/* Interrupt Requester Source Compare Register */
+#define PSIHB_XSCOM_IRSN        0x1b
+#define   PSIHB_IRSN_COMP_SH            45
+#define   PSIHB_IRSN_COMP_MSK           (0x7ffffull << PSIHB_IRSN_COMP_SH)
+#define   PSIHB_IRSN_IRQ_MUX            0x0000000800000000ull
+#define   PSIHB_IRSN_IRQ_RESET          0x0000000400000000ull
+#define   PSIHB_IRSN_DOWNSTREAM_EN      0x0000000200000000ull
+#define   PSIHB_IRSN_UPSTREAM_EN        0x0000000100000000ull
+#define   PSIHB_IRSN_COMPMASK_SH        13
+#define   PSIHB_IRSN_COMPMASK_MSK       (0x7ffffull << PSIHB_IRSN_COMPMASK_SH)
+
+#define PSIHB_BAR_MASK                  0x0003fffffff00000ull
+#define PSIHB_FSPBAR_MASK               0x0003ffff00000000ull
+
+#define PSIHB9_BAR_MASK                 0x00fffffffff00000ull
+#define PSIHB9_FSPBAR_MASK              0x00ffffff00000000ull
+
+#define PSIHB_REG(addr) (((addr) >> 3) + PSIHB_XSCOM_BAR)
+
+static void pnv_psi_set_bar(PnvPsi *psi, uint64_t bar)
+{
+    PnvPsiClass *ppc = PNV_PSI_GET_CLASS(psi);
+    MemoryRegion *sysmem = get_system_memory();
+    uint64_t old = psi->regs[PSIHB_XSCOM_BAR];
+
+    psi->regs[PSIHB_XSCOM_BAR] = bar & (ppc->bar_mask | PSIHB_BAR_EN);
+
+    /* Update MR, always remove it first */
+    if (old & PSIHB_BAR_EN) {
+        memory_region_del_subregion(sysmem, &psi->regs_mr);
+    }
+
+    /* Then add it back if needed */
+    if (bar & PSIHB_BAR_EN) {
+        uint64_t addr = bar & ppc->bar_mask;
+        memory_region_add_subregion(sysmem, addr, &psi->regs_mr);
+    }
+}
+
+static void pnv_psi_update_fsp_mr(PnvPsi *psi)
+{
+    /* TODO: Update FSP MR if/when we support FSP BAR */
+}
+
+static void pnv_psi_set_cr(PnvPsi *psi, uint64_t cr)
+{
+    uint64_t old = psi->regs[PSIHB_XSCOM_CR];
+
+    psi->regs[PSIHB_XSCOM_CR] = cr;
+
+    /* Check some bit changes */
+    if ((old ^ psi->regs[PSIHB_XSCOM_CR]) & PSIHB_CR_FSP_MMIO_ENABLE) {
+        pnv_psi_update_fsp_mr(psi);
+    }
+}
+
+static void pnv_psi_set_irsn(PnvPsi *psi, uint64_t val)
+{
+    ICSState *ics = &PNV8_PSI(psi)->ics;
+
+    /* In this model we ignore the up/down enable bits for now
+     * as SW doesn't use them (other than setting them at boot).
+     * We ignore IRQ_MUX, its meaning isn't clear and we don't use
+     * it and finally we ignore reset (XXX fix that ?)
+     */
+    psi->regs[PSIHB_XSCOM_IRSN] = val & (PSIHB_IRSN_COMP_MSK |
+                                         PSIHB_IRSN_IRQ_MUX |
+                                         PSIHB_IRSN_IRQ_RESET |
+                                         PSIHB_IRSN_DOWNSTREAM_EN |
+                                         PSIHB_IRSN_UPSTREAM_EN);
+
+    /* We ignore the compare mask as well, our ICS emulation is too
+     * simplistic to make any use if it, and we extract the offset
+     * from the compare value
+     */
+    ics->offset = (val & PSIHB_IRSN_COMP_MSK) >> PSIHB_IRSN_COMP_SH;
+}
+
+/*
+ * FSP and PSI interrupts are muxed under the same number.
+ */
+static const uint32_t xivr_regs[] = {
+    [PSIHB_IRQ_PSI]       = PSIHB_XSCOM_XIVR_FSP,
+    [PSIHB_IRQ_FSP]       = PSIHB_XSCOM_XIVR_FSP,
+    [PSIHB_IRQ_OCC]       = PSIHB_XSCOM_XIVR_OCC,
+    [PSIHB_IRQ_FSI]       = PSIHB_XSCOM_XIVR_FSI,
+    [PSIHB_IRQ_LPC_I2C]   = PSIHB_XSCOM_XIVR_LPCI2C,
+    [PSIHB_IRQ_LOCAL_ERR] = PSIHB_XSCOM_XIVR_LOCERR,
+    [PSIHB_IRQ_EXTERNAL]  = PSIHB_XSCOM_XIVR_EXT,
+};
+
+static const uint32_t stat_regs[] = {
+    [PSIHB_IRQ_PSI]       = PSIHB_XSCOM_CR,
+    [PSIHB_IRQ_FSP]       = PSIHB_XSCOM_CR,
+    [PSIHB_IRQ_OCC]       = PSIHB_XSCOM_IRQ_STAT,
+    [PSIHB_IRQ_FSI]       = PSIHB_XSCOM_IRQ_STAT,
+    [PSIHB_IRQ_LPC_I2C]   = PSIHB_XSCOM_IRQ_STAT,
+    [PSIHB_IRQ_LOCAL_ERR] = PSIHB_XSCOM_IRQ_STAT,
+    [PSIHB_IRQ_EXTERNAL]  = PSIHB_XSCOM_IRQ_STAT,
+};
+
+static const uint64_t stat_bits[] = {
+    [PSIHB_IRQ_PSI]       = PSIHB_CR_PSI_IRQ,
+    [PSIHB_IRQ_FSP]       = PSIHB_CR_FSP_IRQ,
+    [PSIHB_IRQ_OCC]       = PSIHB_IRQ_STAT_OCC,
+    [PSIHB_IRQ_FSI]       = PSIHB_IRQ_STAT_FSI,
+    [PSIHB_IRQ_LPC_I2C]   = PSIHB_IRQ_STAT_LPCI2C,
+    [PSIHB_IRQ_LOCAL_ERR] = PSIHB_IRQ_STAT_LOCERR,
+    [PSIHB_IRQ_EXTERNAL]  = PSIHB_IRQ_STAT_EXT,
+};
+
+void pnv_psi_irq_set(PnvPsi *psi, int irq, bool state)
+{
+    PNV_PSI_GET_CLASS(psi)->irq_set(psi, irq, state);
+}
+
+static void pnv_psi_power8_irq_set(PnvPsi *psi, int irq, bool state)
+{
+    uint32_t xivr_reg;
+    uint32_t stat_reg;
+    uint32_t src;
+    bool masked;
+
+    if (irq > PSIHB_IRQ_EXTERNAL) {
+        qemu_log_mask(LOG_GUEST_ERROR, "PSI: Unsupported irq %d\n", irq);
+        return;
+    }
+
+    xivr_reg = xivr_regs[irq];
+    stat_reg = stat_regs[irq];
+
+    src = (psi->regs[xivr_reg] & PSIHB_XIVR_SRC_MSK) >> PSIHB_XIVR_SRC_SH;
+    if (state) {
+        psi->regs[stat_reg] |= stat_bits[irq];
+        /* TODO: optimization, check mask here. That means
+         * re-evaluating when unmasking
+         */
+        qemu_irq_raise(psi->qirqs[src]);
+    } else {
+        psi->regs[stat_reg] &= ~stat_bits[irq];
+
+        /* FSP and PSI are muxed so don't lower if either is still set */
+        if (stat_reg != PSIHB_XSCOM_CR ||
+            !(psi->regs[stat_reg] & (PSIHB_CR_PSI_IRQ | PSIHB_CR_FSP_IRQ))) {
+            qemu_irq_lower(psi->qirqs[src]);
+        } else {
+            state = true;
+        }
+    }
+
+    /* Note about the emulation of the pending bit: This isn't
+     * entirely correct. The pending bit should be cleared when the
+     * EOI has been received. However, we don't have callbacks on EOI
+     * (especially not under KVM) so no way to emulate that properly,
+     * so instead we just set that bit as the logical "output" of the
+     * XIVR (ie pending & !masked)
+     *
+     * CLG: We could define a new ICS object with a custom eoi()
+     * handler to clear the pending bit. But I am not sure this would
+     * be useful for the software anyhow.
+     */
+    masked = (psi->regs[xivr_reg] & PSIHB_XIVR_PRIO_MSK) == PSIHB_XIVR_PRIO_MSK;
+    if (state && !masked) {
+        psi->regs[xivr_reg] |= PSIHB_XIVR_PENDING;
+    } else {
+        psi->regs[xivr_reg] &= ~PSIHB_XIVR_PENDING;
+    }
+}
+
+static void pnv_psi_set_xivr(PnvPsi *psi, uint32_t reg, uint64_t val)
+{
+    ICSState *ics = &PNV8_PSI(psi)->ics;
+    uint16_t server;
+    uint8_t prio;
+    uint8_t src;
+
+    psi->regs[reg] = (psi->regs[reg] & PSIHB_XIVR_PENDING) |
+            (val & (PSIHB_XIVR_SERVER_MSK |
+                    PSIHB_XIVR_PRIO_MSK |
+                    PSIHB_XIVR_SRC_MSK));
+    val = psi->regs[reg];
+    server = (val & PSIHB_XIVR_SERVER_MSK) >> PSIHB_XIVR_SERVER_SH;
+    prio = (val & PSIHB_XIVR_PRIO_MSK) >> PSIHB_XIVR_PRIO_SH;
+    src = (val & PSIHB_XIVR_SRC_MSK) >> PSIHB_XIVR_SRC_SH;
+
+    if (src >= PSI_NUM_INTERRUPTS) {
+        qemu_log_mask(LOG_GUEST_ERROR, "PSI: Unsupported irq %d\n", src);
+        return;
+    }
+
+    /* Remove pending bit if the IRQ is masked */
+    if ((psi->regs[reg] & PSIHB_XIVR_PRIO_MSK) == PSIHB_XIVR_PRIO_MSK) {
+        psi->regs[reg] &= ~PSIHB_XIVR_PENDING;
+    }
+
+    /* The low order 2 bits are the link pointer (Type II interrupts).
+     * Shift back to get a valid IRQ server.
+     */
+    server >>= 2;
+
+    /* Now because of source remapping, weird things can happen
+     * if you change the source number dynamically, our simple ICS
+     * doesn't deal with remapping. So we just poke a different
+     * ICS entry based on what source number was written. This will
+     * do for now but a more accurate implementation would instead
+     * use a fixed server/prio and a remapper of the generated irq.
+     */
+    ics_write_xive(ics, src, server, prio, prio);
+}
+
+static uint64_t pnv_psi_reg_read(PnvPsi *psi, uint32_t offset, bool mmio)
+{
+    uint64_t val = 0xffffffffffffffffull;
+
+    switch (offset) {
+    case PSIHB_XSCOM_FIR_RW:
+    case PSIHB_XSCOM_FIRACT0:
+    case PSIHB_XSCOM_FIRACT1:
+    case PSIHB_XSCOM_BAR:
+    case PSIHB_XSCOM_FSPBAR:
+    case PSIHB_XSCOM_CR:
+    case PSIHB_XSCOM_XIVR_FSP:
+    case PSIHB_XSCOM_XIVR_OCC:
+    case PSIHB_XSCOM_XIVR_FSI:
+    case PSIHB_XSCOM_XIVR_LPCI2C:
+    case PSIHB_XSCOM_XIVR_LOCERR:
+    case PSIHB_XSCOM_XIVR_EXT:
+    case PSIHB_XSCOM_IRQ_STAT:
+    case PSIHB_XSCOM_SEMR:
+    case PSIHB_XSCOM_DMA_UPADD:
+    case PSIHB_XSCOM_IRSN:
+        val = psi->regs[offset];
+        break;
+    default:
+        qemu_log_mask(LOG_UNIMP, "PSI: read at 0x%" PRIx32 "\n", offset);
+    }
+    return val;
+}
+
+static void pnv_psi_reg_write(PnvPsi *psi, uint32_t offset, uint64_t val,
+                              bool mmio)
+{
+    switch (offset) {
+    case PSIHB_XSCOM_FIR_RW:
+    case PSIHB_XSCOM_FIRACT0:
+    case PSIHB_XSCOM_FIRACT1:
+    case PSIHB_XSCOM_SEMR:
+    case PSIHB_XSCOM_DMA_UPADD:
+        psi->regs[offset] = val;
+        break;
+    case PSIHB_XSCOM_FIR_OR:
+        psi->regs[PSIHB_XSCOM_FIR_RW] |= val;
+        break;
+    case PSIHB_XSCOM_FIR_AND:
+        psi->regs[PSIHB_XSCOM_FIR_RW] &= val;
+        break;
+    case PSIHB_XSCOM_BAR:
+        /* Only XSCOM can write this one */
+        if (!mmio) {
+            pnv_psi_set_bar(psi, val);
+        } else {
+            qemu_log_mask(LOG_GUEST_ERROR, "PSI: invalid write of BAR\n");
+        }
+        break;
+    case PSIHB_XSCOM_FSPBAR:
+        psi->regs[PSIHB_XSCOM_FSPBAR] = val & PSIHB_FSPBAR_MASK;
+        pnv_psi_update_fsp_mr(psi);
+        break;
+    case PSIHB_XSCOM_CR:
+        pnv_psi_set_cr(psi, val);
+        break;
+    case PSIHB_XSCOM_SCR:
+        pnv_psi_set_cr(psi, psi->regs[PSIHB_XSCOM_CR] | val);
+        break;
+    case PSIHB_XSCOM_CCR:
+        pnv_psi_set_cr(psi, psi->regs[PSIHB_XSCOM_CR] & ~val);
+        break;
+    case PSIHB_XSCOM_XIVR_FSP:
+    case PSIHB_XSCOM_XIVR_OCC:
+    case PSIHB_XSCOM_XIVR_FSI:
+    case PSIHB_XSCOM_XIVR_LPCI2C:
+    case PSIHB_XSCOM_XIVR_LOCERR:
+    case PSIHB_XSCOM_XIVR_EXT:
+        pnv_psi_set_xivr(psi, offset, val);
+        break;
+    case PSIHB_XSCOM_IRQ_STAT:
+        /* Read only */
+        qemu_log_mask(LOG_GUEST_ERROR, "PSI: invalid write of IRQ_STAT\n");
+        break;
+    case PSIHB_XSCOM_IRSN:
+        pnv_psi_set_irsn(psi, val);
+        break;
+    default:
+        qemu_log_mask(LOG_UNIMP, "PSI: write at 0x%" PRIx32 "\n", offset);
+    }
+}
+
+/*
+ * The values of the registers when accessed through the MMIO region
+ * follow the relation : xscom = (mmio + 0x50) >> 3
+ */
+static uint64_t pnv_psi_mmio_read(void *opaque, hwaddr addr, unsigned size)
+{
+    return pnv_psi_reg_read(opaque, PSIHB_REG(addr), true);
+}
+
+static void pnv_psi_mmio_write(void *opaque, hwaddr addr,
+                              uint64_t val, unsigned size)
+{
+    pnv_psi_reg_write(opaque, PSIHB_REG(addr), val, true);
+}
+
+static const MemoryRegionOps psi_mmio_ops = {
+    .read = pnv_psi_mmio_read,
+    .write = pnv_psi_mmio_write,
+    .endianness = DEVICE_BIG_ENDIAN,
+    .valid = {
+        .min_access_size = 8,
+        .max_access_size = 8,
+    },
+    .impl = {
+        .min_access_size = 8,
+        .max_access_size = 8,
+    },
+};
+
+static uint64_t pnv_psi_xscom_read(void *opaque, hwaddr addr, unsigned size)
+{
+    return pnv_psi_reg_read(opaque, addr >> 3, false);
+}
+
+static void pnv_psi_xscom_write(void *opaque, hwaddr addr,
+                                uint64_t val, unsigned size)
+{
+    pnv_psi_reg_write(opaque, addr >> 3, val, false);
+}
+
+static const MemoryRegionOps pnv_psi_xscom_ops = {
+    .read = pnv_psi_xscom_read,
+    .write = pnv_psi_xscom_write,
+    .endianness = DEVICE_BIG_ENDIAN,
+    .valid = {
+        .min_access_size = 8,
+        .max_access_size = 8,
+    },
+    .impl = {
+        .min_access_size = 8,
+        .max_access_size = 8,
+    }
+};
+
+static void pnv_psi_reset(DeviceState *dev)
+{
+    PnvPsi *psi = PNV_PSI(dev);
+
+    memset(psi->regs, 0x0, sizeof(psi->regs));
+
+    psi->regs[PSIHB_XSCOM_BAR] = psi->bar | PSIHB_BAR_EN;
+}
+
+static void pnv_psi_reset_handler(void *dev)
+{
+    device_cold_reset(DEVICE(dev));
+}
+
+static void pnv_psi_realize(DeviceState *dev, Error **errp)
+{
+    PnvPsi *psi = PNV_PSI(dev);
+
+    /* Default BAR for MMIO region */
+    pnv_psi_set_bar(psi, psi->bar | PSIHB_BAR_EN);
+
+    qemu_register_reset(pnv_psi_reset_handler, dev);
+}
+
+static void pnv_psi_power8_instance_init(Object *obj)
+{
+    Pnv8Psi *psi8 = PNV8_PSI(obj);
+
+    object_initialize_child(obj, "ics-psi", &psi8->ics, TYPE_ICS);
+    object_property_add_alias(obj, ICS_PROP_XICS, OBJECT(&psi8->ics),
+                              ICS_PROP_XICS);
+}
+
+static const uint8_t irq_to_xivr[] = {
+    PSIHB_XSCOM_XIVR_FSP,
+    PSIHB_XSCOM_XIVR_OCC,
+    PSIHB_XSCOM_XIVR_FSI,
+    PSIHB_XSCOM_XIVR_LPCI2C,
+    PSIHB_XSCOM_XIVR_LOCERR,
+    PSIHB_XSCOM_XIVR_EXT,
+};
+
+static void pnv_psi_power8_realize(DeviceState *dev, Error **errp)
+{
+    PnvPsi *psi = PNV_PSI(dev);
+    ICSState *ics = &PNV8_PSI(psi)->ics;
+    unsigned int i;
+
+    /* Create PSI interrupt control source */
+    if (!object_property_set_int(OBJECT(ics), "nr-irqs", PSI_NUM_INTERRUPTS,
+                                 errp)) {
+        return;
+    }
+    if (!qdev_realize(DEVICE(ics), NULL, errp)) {
+        return;
+    }
+
+    for (i = 0; i < ics->nr_irqs; i++) {
+        ics_set_irq_type(ics, i, true);
+    }
+
+    psi->qirqs = qemu_allocate_irqs(ics_set_irq, ics, ics->nr_irqs);
+
+    /* XSCOM region for PSI registers */
+    pnv_xscom_region_init(&psi->xscom_regs, OBJECT(dev), &pnv_psi_xscom_ops,
+                psi, "xscom-psi", PNV_XSCOM_PSIHB_SIZE);
+
+    /* Initialize MMIO region */
+    memory_region_init_io(&psi->regs_mr, OBJECT(dev), &psi_mmio_ops, psi,
+                          "psihb", PNV_PSIHB_SIZE);
+
+    /* Default sources in XIVR */
+    for (i = 0; i < PSI_NUM_INTERRUPTS; i++) {
+        uint8_t xivr = irq_to_xivr[i];
+        psi->regs[xivr] = PSIHB_XIVR_PRIO_MSK |
+            ((uint64_t) i << PSIHB_XIVR_SRC_SH);
+    }
+
+    pnv_psi_realize(dev, errp);
+}
+
+static int pnv_psi_dt_xscom(PnvXScomInterface *dev, void *fdt, int xscom_offset)
+{
+    PnvPsiClass *ppc = PNV_PSI_GET_CLASS(dev);
+    char *name;
+    int offset;
+    uint32_t reg[] = {
+        cpu_to_be32(ppc->xscom_pcba),
+        cpu_to_be32(ppc->xscom_size)
+    };
+
+    name = g_strdup_printf("psihb@%x", ppc->xscom_pcba);
+    offset = fdt_add_subnode(fdt, xscom_offset, name);
+    _FDT(offset);
+    g_free(name);
+
+    _FDT(fdt_setprop(fdt, offset, "reg", reg, sizeof(reg)));
+    _FDT(fdt_setprop_cell(fdt, offset, "#address-cells", 2));
+    _FDT(fdt_setprop_cell(fdt, offset, "#size-cells", 1));
+    _FDT(fdt_setprop(fdt, offset, "compatible", ppc->compat,
+                     ppc->compat_size));
+    return 0;
+}
+
+static Property pnv_psi_properties[] = {
+    DEFINE_PROP_UINT64("bar", PnvPsi, bar, 0),
+    DEFINE_PROP_UINT64("fsp-bar", PnvPsi, fsp_bar, 0),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void pnv_psi_power8_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PnvPsiClass *ppc = PNV_PSI_CLASS(klass);
+    static const char compat[] = "ibm,power8-psihb-x\0ibm,psihb-x";
+
+    dc->desc    = "PowerNV PSI Controller POWER8";
+    dc->realize = pnv_psi_power8_realize;
+
+    ppc->xscom_pcba = PNV_XSCOM_PSIHB_BASE;
+    ppc->xscom_size = PNV_XSCOM_PSIHB_SIZE;
+    ppc->bar_mask   = PSIHB_BAR_MASK;
+    ppc->irq_set    = pnv_psi_power8_irq_set;
+    ppc->compat     = compat;
+    ppc->compat_size = sizeof(compat);
+}
+
+static const TypeInfo pnv_psi_power8_info = {
+    .name          = TYPE_PNV8_PSI,
+    .parent        = TYPE_PNV_PSI,
+    .instance_size = sizeof(Pnv8Psi),
+    .instance_init = pnv_psi_power8_instance_init,
+    .class_init    = pnv_psi_power8_class_init,
+};
+
+
+/* Common registers */
+
+#define PSIHB9_CR                       0x20
+#define PSIHB9_SEMR                     0x28
+
+/* P9 registers */
+
+#define PSIHB9_INTERRUPT_CONTROL        0x58
+#define   PSIHB9_IRQ_METHOD             PPC_BIT(0)
+#define   PSIHB9_IRQ_RESET              PPC_BIT(1)
+#define PSIHB9_ESB_CI_BASE              0x60
+#define   PSIHB9_ESB_CI_64K             PPC_BIT(1)
+#define   PSIHB9_ESB_CI_ADDR_MASK       PPC_BITMASK(8, 47)
+#define   PSIHB9_ESB_CI_VALID           PPC_BIT(63)
+#define PSIHB9_ESB_NOTIF_ADDR           0x68
+#define   PSIHB9_ESB_NOTIF_ADDR_MASK    PPC_BITMASK(8, 60)
+#define   PSIHB9_ESB_NOTIF_VALID        PPC_BIT(63)
+#define PSIHB9_IVT_OFFSET               0x70
+#define   PSIHB9_IVT_OFF_SHIFT          32
+
+#define PSIHB9_IRQ_LEVEL                0x78 /* assertion */
+#define   PSIHB9_IRQ_LEVEL_PSI          PPC_BIT(0)
+#define   PSIHB9_IRQ_LEVEL_OCC          PPC_BIT(1)
+#define   PSIHB9_IRQ_LEVEL_FSI          PPC_BIT(2)
+#define   PSIHB9_IRQ_LEVEL_LPCHC        PPC_BIT(3)
+#define   PSIHB9_IRQ_LEVEL_LOCAL_ERR    PPC_BIT(4)
+#define   PSIHB9_IRQ_LEVEL_GLOBAL_ERR   PPC_BIT(5)
+#define   PSIHB9_IRQ_LEVEL_TPM          PPC_BIT(6)
+#define   PSIHB9_IRQ_LEVEL_LPC_SIRQ1    PPC_BIT(7)
+#define   PSIHB9_IRQ_LEVEL_LPC_SIRQ2    PPC_BIT(8)
+#define   PSIHB9_IRQ_LEVEL_LPC_SIRQ3    PPC_BIT(9)
+#define   PSIHB9_IRQ_LEVEL_LPC_SIRQ4    PPC_BIT(10)
+#define   PSIHB9_IRQ_LEVEL_SBE_I2C      PPC_BIT(11)
+#define   PSIHB9_IRQ_LEVEL_DIO          PPC_BIT(12)
+#define   PSIHB9_IRQ_LEVEL_PSU          PPC_BIT(13)
+#define   PSIHB9_IRQ_LEVEL_I2C_C        PPC_BIT(14)
+#define   PSIHB9_IRQ_LEVEL_I2C_D        PPC_BIT(15)
+#define   PSIHB9_IRQ_LEVEL_I2C_E        PPC_BIT(16)
+#define   PSIHB9_IRQ_LEVEL_SBE          PPC_BIT(19)
+
+#define PSIHB9_IRQ_STAT                 0x80 /* P bit */
+#define   PSIHB9_IRQ_STAT_PSI           PPC_BIT(0)
+#define   PSIHB9_IRQ_STAT_OCC           PPC_BIT(1)
+#define   PSIHB9_IRQ_STAT_FSI           PPC_BIT(2)
+#define   PSIHB9_IRQ_STAT_LPCHC         PPC_BIT(3)
+#define   PSIHB9_IRQ_STAT_LOCAL_ERR     PPC_BIT(4)
+#define   PSIHB9_IRQ_STAT_GLOBAL_ERR    PPC_BIT(5)
+#define   PSIHB9_IRQ_STAT_TPM           PPC_BIT(6)
+#define   PSIHB9_IRQ_STAT_LPC_SIRQ1     PPC_BIT(7)
+#define   PSIHB9_IRQ_STAT_LPC_SIRQ2     PPC_BIT(8)
+#define   PSIHB9_IRQ_STAT_LPC_SIRQ3     PPC_BIT(9)
+#define   PSIHB9_IRQ_STAT_LPC_SIRQ4     PPC_BIT(10)
+#define   PSIHB9_IRQ_STAT_SBE_I2C       PPC_BIT(11)
+#define   PSIHB9_IRQ_STAT_DIO           PPC_BIT(12)
+#define   PSIHB9_IRQ_STAT_PSU           PPC_BIT(13)
+
+static void pnv_psi_notify(XiveNotifier *xf, uint32_t srcno)
+{
+    PnvPsi *psi = PNV_PSI(xf);
+    uint64_t notif_port = psi->regs[PSIHB_REG(PSIHB9_ESB_NOTIF_ADDR)];
+    bool valid = notif_port & PSIHB9_ESB_NOTIF_VALID;
+    uint64_t notify_addr = notif_port & ~PSIHB9_ESB_NOTIF_VALID;
+
+    uint32_t offset =
+        (psi->regs[PSIHB_REG(PSIHB9_IVT_OFFSET)] >> PSIHB9_IVT_OFF_SHIFT);
+    uint64_t data = XIVE_TRIGGER_PQ | offset | srcno;
+    MemTxResult result;
+
+    if (!valid) {
+        return;
+    }
+
+    address_space_stq_be(&address_space_memory, notify_addr, data,
+                         MEMTXATTRS_UNSPECIFIED, &result);
+    if (result != MEMTX_OK) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: trigger failed @%"
+                      HWADDR_PRIx "\n", __func__, notif_port);
+        return;
+    }
+}
+
+static uint64_t pnv_psi_p9_mmio_read(void *opaque, hwaddr addr, unsigned size)
+{
+    PnvPsi *psi = PNV_PSI(opaque);
+    uint32_t reg = PSIHB_REG(addr);
+    uint64_t val = -1;
+
+    switch (addr) {
+    case PSIHB9_CR:
+    case PSIHB9_SEMR:
+        /* FSP stuff */
+    case PSIHB9_INTERRUPT_CONTROL:
+    case PSIHB9_ESB_CI_BASE:
+    case PSIHB9_ESB_NOTIF_ADDR:
+    case PSIHB9_IVT_OFFSET:
+        val = psi->regs[reg];
+        break;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "PSI: read at 0x%" PRIx64 "\n", addr);
+    }
+
+    return val;
+}
+
+static void pnv_psi_p9_mmio_write(void *opaque, hwaddr addr,
+                                  uint64_t val, unsigned size)
+{
+    PnvPsi *psi = PNV_PSI(opaque);
+    Pnv9Psi *psi9 = PNV9_PSI(psi);
+    uint32_t reg = PSIHB_REG(addr);
+    MemoryRegion *sysmem = get_system_memory();
+
+    switch (addr) {
+    case PSIHB9_CR:
+    case PSIHB9_SEMR:
+        /* FSP stuff */
+        break;
+    case PSIHB9_INTERRUPT_CONTROL:
+        if (val & PSIHB9_IRQ_RESET) {
+            device_cold_reset(DEVICE(&psi9->source));
+        }
+        psi->regs[reg] = val;
+        break;
+
+    case PSIHB9_ESB_CI_BASE:
+        if (!(val & PSIHB9_ESB_CI_VALID)) {
+            if (psi->regs[reg] & PSIHB9_ESB_CI_VALID) {
+                memory_region_del_subregion(sysmem, &psi9->source.esb_mmio);
+            }
+        } else {
+            if (!(psi->regs[reg] & PSIHB9_ESB_CI_VALID)) {
+                memory_region_add_subregion(sysmem,
+                                        val & ~PSIHB9_ESB_CI_VALID,
+                                        &psi9->source.esb_mmio);
+            }
+        }
+        psi->regs[reg] = val;
+        break;
+
+    case PSIHB9_ESB_NOTIF_ADDR:
+        psi->regs[reg] = val;
+        break;
+    case PSIHB9_IVT_OFFSET:
+        psi->regs[reg] = val;
+        break;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "PSI: write at 0x%" PRIx64 "\n", addr);
+    }
+}
+
+static const MemoryRegionOps pnv_psi_p9_mmio_ops = {
+    .read = pnv_psi_p9_mmio_read,
+    .write = pnv_psi_p9_mmio_write,
+    .endianness = DEVICE_BIG_ENDIAN,
+    .valid = {
+        .min_access_size = 8,
+        .max_access_size = 8,
+    },
+    .impl = {
+        .min_access_size = 8,
+        .max_access_size = 8,
+    },
+};
+
+static uint64_t pnv_psi_p9_xscom_read(void *opaque, hwaddr addr, unsigned size)
+{
+    /* No read are expected */
+    qemu_log_mask(LOG_GUEST_ERROR, "PSI: xscom read at 0x%" PRIx64 "\n", addr);
+    return -1;
+}
+
+static void pnv_psi_p9_xscom_write(void *opaque, hwaddr addr,
+                                uint64_t val, unsigned size)
+{
+    PnvPsi *psi = PNV_PSI(opaque);
+
+    /* XSCOM is only used to set the PSIHB MMIO region */
+    switch (addr >> 3) {
+    case PSIHB_XSCOM_BAR:
+        pnv_psi_set_bar(psi, val);
+        break;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "PSI: xscom write at 0x%" PRIx64 "\n",
+                      addr);
+    }
+}
+
+static const MemoryRegionOps pnv_psi_p9_xscom_ops = {
+    .read = pnv_psi_p9_xscom_read,
+    .write = pnv_psi_p9_xscom_write,
+    .endianness = DEVICE_BIG_ENDIAN,
+    .valid = {
+        .min_access_size = 8,
+        .max_access_size = 8,
+    },
+    .impl = {
+        .min_access_size = 8,
+        .max_access_size = 8,
+    }
+};
+
+static void pnv_psi_power9_irq_set(PnvPsi *psi, int irq, bool state)
+{
+    uint64_t irq_method = psi->regs[PSIHB_REG(PSIHB9_INTERRUPT_CONTROL)];
+
+    if (irq > PSIHB9_NUM_IRQS) {
+        qemu_log_mask(LOG_GUEST_ERROR, "PSI: Unsupported irq %d\n", irq);
+        return;
+    }
+
+    if (irq_method & PSIHB9_IRQ_METHOD) {
+        qemu_log_mask(LOG_GUEST_ERROR, "PSI: LSI IRQ method no supported\n");
+        return;
+    }
+
+    /* Update LSI levels */
+    if (state) {
+        psi->regs[PSIHB_REG(PSIHB9_IRQ_LEVEL)] |= PPC_BIT(irq);
+    } else {
+        psi->regs[PSIHB_REG(PSIHB9_IRQ_LEVEL)] &= ~PPC_BIT(irq);
+    }
+
+    qemu_set_irq(psi->qirqs[irq], state);
+}
+
+static void pnv_psi_power9_reset(DeviceState *dev)
+{
+    Pnv9Psi *psi = PNV9_PSI(dev);
+
+    pnv_psi_reset(dev);
+
+    if (memory_region_is_mapped(&psi->source.esb_mmio)) {
+        memory_region_del_subregion(get_system_memory(), &psi->source.esb_mmio);
+    }
+}
+
+static void pnv_psi_power9_instance_init(Object *obj)
+{
+    Pnv9Psi *psi = PNV9_PSI(obj);
+
+    object_initialize_child(obj, "source", &psi->source, TYPE_XIVE_SOURCE);
+}
+
+static void pnv_psi_power9_realize(DeviceState *dev, Error **errp)
+{
+    PnvPsi *psi = PNV_PSI(dev);
+    XiveSource *xsrc = &PNV9_PSI(psi)->source;
+    int i;
+
+    /* This is the only device with 4k ESB pages */
+    object_property_set_int(OBJECT(xsrc), "shift", XIVE_ESB_4K, &error_fatal);
+    object_property_set_int(OBJECT(xsrc), "nr-irqs", PSIHB9_NUM_IRQS,
+                            &error_fatal);
+    object_property_set_link(OBJECT(xsrc), "xive", OBJECT(psi), &error_abort);
+    if (!qdev_realize(DEVICE(xsrc), NULL, errp)) {
+        return;
+    }
+
+    for (i = 0; i < xsrc->nr_irqs; i++) {
+        xive_source_irq_set_lsi(xsrc, i);
+    }
+
+    psi->qirqs = qemu_allocate_irqs(xive_source_set_irq, xsrc, xsrc->nr_irqs);
+
+    /* XSCOM region for PSI registers */
+    pnv_xscom_region_init(&psi->xscom_regs, OBJECT(dev), &pnv_psi_p9_xscom_ops,
+                psi, "xscom-psi", PNV9_XSCOM_PSIHB_SIZE);
+
+    /* MMIO region for PSI registers */
+    memory_region_init_io(&psi->regs_mr, OBJECT(dev), &pnv_psi_p9_mmio_ops, psi,
+                          "psihb", PNV9_PSIHB_SIZE);
+
+    pnv_psi_realize(dev, errp);
+}
+
+static void pnv_psi_power9_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PnvPsiClass *ppc = PNV_PSI_CLASS(klass);
+    XiveNotifierClass *xfc = XIVE_NOTIFIER_CLASS(klass);
+    static const char compat[] = "ibm,power9-psihb-x\0ibm,psihb-x";
+
+    dc->desc    = "PowerNV PSI Controller POWER9";
+    dc->realize = pnv_psi_power9_realize;
+    dc->reset   = pnv_psi_power9_reset;
+
+    ppc->xscom_pcba = PNV9_XSCOM_PSIHB_BASE;
+    ppc->xscom_size = PNV9_XSCOM_PSIHB_SIZE;
+    ppc->bar_mask   = PSIHB9_BAR_MASK;
+    ppc->irq_set    = pnv_psi_power9_irq_set;
+    ppc->compat     = compat;
+    ppc->compat_size = sizeof(compat);
+
+    xfc->notify      = pnv_psi_notify;
+}
+
+static const TypeInfo pnv_psi_power9_info = {
+    .name          = TYPE_PNV9_PSI,
+    .parent        = TYPE_PNV_PSI,
+    .instance_size = sizeof(Pnv9Psi),
+    .instance_init = pnv_psi_power9_instance_init,
+    .class_init    = pnv_psi_power9_class_init,
+    .interfaces = (InterfaceInfo[]) {
+            { TYPE_XIVE_NOTIFIER },
+            { },
+    },
+};
+
+static void pnv_psi_power10_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PnvPsiClass *ppc = PNV_PSI_CLASS(klass);
+    static const char compat[] = "ibm,power10-psihb-x\0ibm,psihb-x";
+
+    dc->desc    = "PowerNV PSI Controller POWER10";
+
+    ppc->xscom_pcba = PNV10_XSCOM_PSIHB_BASE;
+    ppc->xscom_size = PNV10_XSCOM_PSIHB_SIZE;
+    ppc->compat     = compat;
+    ppc->compat_size = sizeof(compat);
+}
+
+static const TypeInfo pnv_psi_power10_info = {
+    .name          = TYPE_PNV10_PSI,
+    .parent        = TYPE_PNV9_PSI,
+    .class_init    = pnv_psi_power10_class_init,
+};
+
+static void pnv_psi_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PnvXScomInterfaceClass *xdc = PNV_XSCOM_INTERFACE_CLASS(klass);
+
+    xdc->dt_xscom = pnv_psi_dt_xscom;
+
+    dc->desc = "PowerNV PSI Controller";
+    device_class_set_props(dc, pnv_psi_properties);
+    dc->reset = pnv_psi_reset;
+    dc->user_creatable = false;
+}
+
+static const TypeInfo pnv_psi_info = {
+    .name          = TYPE_PNV_PSI,
+    .parent        = TYPE_DEVICE,
+    .instance_size = sizeof(PnvPsi),
+    .class_init    = pnv_psi_class_init,
+    .class_size    = sizeof(PnvPsiClass),
+    .abstract      = true,
+    .interfaces    = (InterfaceInfo[]) {
+        { TYPE_PNV_XSCOM_INTERFACE },
+        { }
+    }
+};
+
+static void pnv_psi_register_types(void)
+{
+    type_register_static(&pnv_psi_info);
+    type_register_static(&pnv_psi_power8_info);
+    type_register_static(&pnv_psi_power9_info);
+    type_register_static(&pnv_psi_power10_info);
+}
+
+type_init(pnv_psi_register_types);
+
+void pnv_psi_pic_print_info(Pnv9Psi *psi9, Monitor *mon)
+{
+    PnvPsi *psi = PNV_PSI(psi9);
+
+    uint32_t offset =
+        (psi->regs[PSIHB_REG(PSIHB9_IVT_OFFSET)] >> PSIHB9_IVT_OFF_SHIFT);
+
+    monitor_printf(mon, "PSIHB Source %08x .. %08x\n",
+                  offset, offset + psi9->source.nr_irqs - 1);
+    xive_source_pic_print_info(&psi9->source, offset, mon);
+}
diff --git a/hw/ppc/pnv_xscom.c b/hw/ppc/pnv_xscom.c
new file mode 100644
index 000000000..9ce018dbc
--- /dev/null
+++ b/hw/ppc/pnv_xscom.c
@@ -0,0 +1,324 @@
+/*
+ * QEMU PowerPC PowerNV XSCOM bus
+ *
+ * Copyright (c) 2016, IBM Corporation.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qemu/module.h"
+#include "sysemu/hw_accel.h"
+#include "target/ppc/cpu.h"
+#include "hw/sysbus.h"
+
+#include "hw/ppc/fdt.h"
+#include "hw/ppc/pnv.h"
+#include "hw/ppc/pnv_xscom.h"
+
+#include <libfdt.h>
+
+/* PRD registers */
+#define PRD_P8_IPOLL_REG_MASK           0x01020013
+#define PRD_P8_IPOLL_REG_STATUS         0x01020014
+#define PRD_P9_IPOLL_REG_MASK           0x000F0033
+#define PRD_P9_IPOLL_REG_STATUS         0x000F0034
+
+static void xscom_complete(CPUState *cs, uint64_t hmer_bits)
+{
+    /*
+     * TODO: When the read/write comes from the monitor, NULL is
+     * passed for the cpu, and no CPU completion is generated.
+     */
+    if (cs) {
+        PowerPCCPU *cpu = POWERPC_CPU(cs);
+        CPUPPCState *env = &cpu->env;
+
+        /*
+         * TODO: Need a CPU helper to set HMER, also handle generation
+         * of HMIs
+         */
+        cpu_synchronize_state(cs);
+        env->spr[SPR_HMER] |= hmer_bits;
+    }
+}
+
+static uint32_t pnv_xscom_pcba(PnvChip *chip, uint64_t addr)
+{
+    return PNV_CHIP_GET_CLASS(chip)->xscom_pcba(chip, addr);
+}
+
+static uint64_t xscom_read_default(PnvChip *chip, uint32_t pcba)
+{
+    switch (pcba) {
+    case 0xf000f:
+        return PNV_CHIP_GET_CLASS(chip)->chip_cfam_id;
+    case 0x18002:       /* ECID2 */
+        return 0;
+
+    case 0x1010c00:     /* PIBAM FIR */
+    case 0x1010c03:     /* PIBAM FIR MASK */
+
+        /* PRD registers */
+    case PRD_P8_IPOLL_REG_MASK:
+    case PRD_P8_IPOLL_REG_STATUS:
+    case PRD_P9_IPOLL_REG_MASK:
+    case PRD_P9_IPOLL_REG_STATUS:
+
+        /* P9 xscom reset */
+    case 0x0090018:     /* Receive status reg */
+    case 0x0090012:     /* log register */
+    case 0x0090013:     /* error register */
+
+        /* P8 xscom reset */
+    case 0x2020007:     /* ADU stuff, log register */
+    case 0x2020009:     /* ADU stuff, error register */
+    case 0x202000f:     /* ADU stuff, receive status register*/
+        return 0;
+    case 0x2013f01:     /* PBA stuff */
+    case 0x2013f05:     /* PBA stuff */
+        return 0;
+    case 0x2013028:     /* CAPP stuff */
+    case 0x201302a:     /* CAPP stuff */
+    case 0x2013801:     /* CAPP stuff */
+    case 0x2013802:     /* CAPP stuff */
+
+        /* P9 CAPP regs */
+    case 0x2010841:
+    case 0x2010842:
+    case 0x201082a:
+    case 0x2010828:
+    case 0x4010841:
+    case 0x4010842:
+    case 0x401082a:
+    case 0x4010828:
+        return 0;
+    default:
+        return -1;
+    }
+}
+
+static bool xscom_write_default(PnvChip *chip, uint32_t pcba, uint64_t val)
+{
+    /* We ignore writes to these */
+    switch (pcba) {
+    case 0xf000f:       /* chip id is RO */
+    case 0x1010c00:     /* PIBAM FIR */
+    case 0x1010c01:     /* PIBAM FIR */
+    case 0x1010c02:     /* PIBAM FIR */
+    case 0x1010c03:     /* PIBAM FIR MASK */
+    case 0x1010c04:     /* PIBAM FIR MASK */
+    case 0x1010c05:     /* PIBAM FIR MASK */
+        /* P9 xscom reset */
+    case 0x0090018:     /* Receive status reg */
+    case 0x0090012:     /* log register */
+    case 0x0090013:     /* error register */
+
+        /* P8 xscom reset */
+    case 0x2020007:     /* ADU stuff, log register */
+    case 0x2020009:     /* ADU stuff, error register */
+    case 0x202000f:     /* ADU stuff, receive status register*/
+
+    case 0x2013028:     /* CAPP stuff */
+    case 0x201302a:     /* CAPP stuff */
+    case 0x2013801:     /* CAPP stuff */
+    case 0x2013802:     /* CAPP stuff */
+
+        /* P9 CAPP regs */
+    case 0x2010841:
+    case 0x2010842:
+    case 0x201082a:
+    case 0x2010828:
+    case 0x4010841:
+    case 0x4010842:
+    case 0x401082a:
+    case 0x4010828:
+
+        /* P8 PRD registers */
+    case PRD_P8_IPOLL_REG_MASK:
+    case PRD_P8_IPOLL_REG_STATUS:
+    case PRD_P9_IPOLL_REG_MASK:
+    case PRD_P9_IPOLL_REG_STATUS:
+        return true;
+    default:
+        return false;
+    }
+}
+
+static uint64_t xscom_read(void *opaque, hwaddr addr, unsigned width)
+{
+    PnvChip *chip = opaque;
+    uint32_t pcba = pnv_xscom_pcba(chip, addr);
+    uint64_t val = 0;
+    MemTxResult result;
+
+    /* Handle some SCOMs here before dispatch */
+    val = xscom_read_default(chip, pcba);
+    if (val != -1) {
+        goto complete;
+    }
+
+    val = address_space_ldq(&chip->xscom_as, (uint64_t) pcba << 3,
+                            MEMTXATTRS_UNSPECIFIED, &result);
+    if (result != MEMTX_OK) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XSCOM read failed at @0x%"
+                      HWADDR_PRIx " pcba=0x%08x\n", addr, pcba);
+        xscom_complete(current_cpu, HMER_XSCOM_FAIL | HMER_XSCOM_DONE);
+        return 0;
+    }
+
+complete:
+    xscom_complete(current_cpu, HMER_XSCOM_DONE);
+    return val;
+}
+
+static void xscom_write(void *opaque, hwaddr addr, uint64_t val,
+                        unsigned width)
+{
+    PnvChip *chip = opaque;
+    uint32_t pcba = pnv_xscom_pcba(chip, addr);
+    MemTxResult result;
+
+    /* Handle some SCOMs here before dispatch */
+    if (xscom_write_default(chip, pcba, val)) {
+        goto complete;
+    }
+
+    address_space_stq(&chip->xscom_as, (uint64_t) pcba << 3, val,
+                      MEMTXATTRS_UNSPECIFIED, &result);
+    if (result != MEMTX_OK) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XSCOM write failed at @0x%"
+                      HWADDR_PRIx " pcba=0x%08x data=0x%" PRIx64 "\n",
+                      addr, pcba, val);
+        xscom_complete(current_cpu, HMER_XSCOM_FAIL | HMER_XSCOM_DONE);
+        return;
+    }
+
+complete:
+    xscom_complete(current_cpu, HMER_XSCOM_DONE);
+}
+
+const MemoryRegionOps pnv_xscom_ops = {
+    .read = xscom_read,
+    .write = xscom_write,
+    .valid.min_access_size = 8,
+    .valid.max_access_size = 8,
+    .impl.min_access_size = 8,
+    .impl.max_access_size = 8,
+    .endianness = DEVICE_BIG_ENDIAN,
+};
+
+void pnv_xscom_realize(PnvChip *chip, uint64_t size, Error **errp)
+{
+    SysBusDevice *sbd = SYS_BUS_DEVICE(chip);
+    char *name;
+
+    name = g_strdup_printf("xscom-%x", chip->chip_id);
+    memory_region_init_io(&chip->xscom_mmio, OBJECT(chip), &pnv_xscom_ops,
+                          chip, name, size);
+    sysbus_init_mmio(sbd, &chip->xscom_mmio);
+
+    memory_region_init(&chip->xscom, OBJECT(chip), name, size);
+    address_space_init(&chip->xscom_as, &chip->xscom, name);
+    g_free(name);
+}
+
+static const TypeInfo pnv_xscom_interface_info = {
+    .name = TYPE_PNV_XSCOM_INTERFACE,
+    .parent = TYPE_INTERFACE,
+    .class_size = sizeof(PnvXScomInterfaceClass),
+};
+
+static void pnv_xscom_register_types(void)
+{
+    type_register_static(&pnv_xscom_interface_info);
+}
+
+type_init(pnv_xscom_register_types)
+
+typedef struct ForeachPopulateArgs {
+    void *fdt;
+    int xscom_offset;
+} ForeachPopulateArgs;
+
+static int xscom_dt_child(Object *child, void *opaque)
+{
+    if (object_dynamic_cast(child, TYPE_PNV_XSCOM_INTERFACE)) {
+        ForeachPopulateArgs *args = opaque;
+        PnvXScomInterface *xd = PNV_XSCOM_INTERFACE(child);
+        PnvXScomInterfaceClass *xc = PNV_XSCOM_INTERFACE_GET_CLASS(xd);
+
+        /*
+         * Only "realized" devices should be configured in the DT
+         */
+        if (xc->dt_xscom && DEVICE(child)->realized) {
+            _FDT((xc->dt_xscom(xd, args->fdt, args->xscom_offset)));
+        }
+    }
+    return 0;
+}
+
+int pnv_dt_xscom(PnvChip *chip, void *fdt, int root_offset,
+                 uint64_t xscom_base, uint64_t xscom_size,
+                 const char *compat, int compat_size)
+{
+    uint64_t reg[] = { xscom_base, xscom_size };
+    int xscom_offset;
+    ForeachPopulateArgs args;
+    char *name;
+
+    name = g_strdup_printf("xscom@%" PRIx64, be64_to_cpu(reg[0]));
+    xscom_offset = fdt_add_subnode(fdt, root_offset, name);
+    _FDT(xscom_offset);
+    g_free(name);
+    _FDT((fdt_setprop_cell(fdt, xscom_offset, "ibm,chip-id", chip->chip_id)));
+    /*
+     * On P10, the xscom bus id has been deprecated and the chip id is
+     * calculated from the "Primary topology table index". See skiboot.
+     */
+    _FDT((fdt_setprop_cell(fdt, xscom_offset, "ibm,primary-topology-index",
+                           chip->chip_id)));
+    _FDT((fdt_setprop_cell(fdt, xscom_offset, "#address-cells", 1)));
+    _FDT((fdt_setprop_cell(fdt, xscom_offset, "#size-cells", 1)));
+    _FDT((fdt_setprop(fdt, xscom_offset, "reg", reg, sizeof(reg))));
+    _FDT((fdt_setprop(fdt, xscom_offset, "compatible", compat, compat_size)));
+    _FDT((fdt_setprop(fdt, xscom_offset, "scom-controller", NULL, 0)));
+
+    args.fdt = fdt;
+    args.xscom_offset = xscom_offset;
+
+    /*
+     * Loop on the whole object hierarchy to catch all
+     * PnvXScomInterface objects which can lie a bit deeper than the
+     * first layer.
+     */
+    object_child_foreach_recursive(OBJECT(chip), xscom_dt_child, &args);
+    return 0;
+}
+
+void pnv_xscom_add_subregion(PnvChip *chip, hwaddr offset, MemoryRegion *mr)
+{
+    memory_region_add_subregion(&chip->xscom, offset << 3, mr);
+}
+
+void pnv_xscom_region_init(MemoryRegion *mr,
+                           Object *owner,
+                           const MemoryRegionOps *ops,
+                           void *opaque,
+                           const char *name,
+                           uint64_t size)
+{
+    memory_region_init_io(mr, owner, ops, opaque, name, size << 3);
+}
diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c
new file mode 100644
index 000000000..e8127599c
--- /dev/null
+++ b/hw/ppc/ppc.c
@@ -0,0 +1,1465 @@
+/*
+ * QEMU generic PowerPC hardware System Emulator
+ *
+ * Copyright (c) 2003-2007 Jocelyn Mayer
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/irq.h"
+#include "hw/ppc/ppc.h"
+#include "hw/ppc/ppc_e500.h"
+#include "qemu/timer.h"
+#include "sysemu/cpus.h"
+#include "qemu/log.h"
+#include "qemu/main-loop.h"
+#include "qemu/error-report.h"
+#include "sysemu/kvm.h"
+#include "sysemu/runstate.h"
+#include "kvm_ppc.h"
+#include "migration/vmstate.h"
+#include "trace.h"
+
+static void cpu_ppc_tb_stop (CPUPPCState *env);
+static void cpu_ppc_tb_start (CPUPPCState *env);
+
+void ppc_set_irq(PowerPCCPU *cpu, int n_IRQ, int level)
+{
+    CPUState *cs = CPU(cpu);
+    CPUPPCState *env = &cpu->env;
+    unsigned int old_pending;
+    bool locked = false;
+
+    /* We may already have the BQL if coming from the reset path */
+    if (!qemu_mutex_iothread_locked()) {
+        locked = true;
+        qemu_mutex_lock_iothread();
+    }
+
+    old_pending = env->pending_interrupts;
+
+    if (level) {
+        env->pending_interrupts |= 1 << n_IRQ;
+        cpu_interrupt(cs, CPU_INTERRUPT_HARD);
+    } else {
+        env->pending_interrupts &= ~(1 << n_IRQ);
+        if (env->pending_interrupts == 0) {
+            cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD);
+        }
+    }
+
+    if (old_pending != env->pending_interrupts) {
+        kvmppc_set_interrupt(cpu, n_IRQ, level);
+    }
+
+
+    trace_ppc_irq_set_exit(env, n_IRQ, level, env->pending_interrupts,
+                           CPU(cpu)->interrupt_request);
+
+    if (locked) {
+        qemu_mutex_unlock_iothread();
+    }
+}
+
+/* PowerPC 6xx / 7xx internal IRQ controller */
+static void ppc6xx_set_irq(void *opaque, int pin, int level)
+{
+    PowerPCCPU *cpu = opaque;
+    CPUPPCState *env = &cpu->env;
+    int cur_level;
+
+    trace_ppc_irq_set(env, pin, level);
+
+    cur_level = (env->irq_input_state >> pin) & 1;
+    /* Don't generate spurious events */
+    if ((cur_level == 1 && level == 0) || (cur_level == 0 && level != 0)) {
+        CPUState *cs = CPU(cpu);
+
+        switch (pin) {
+        case PPC6xx_INPUT_TBEN:
+            /* Level sensitive - active high */
+            trace_ppc_irq_set_state("time base", level);
+            if (level) {
+                cpu_ppc_tb_start(env);
+            } else {
+                cpu_ppc_tb_stop(env);
+            }
+            break;
+        case PPC6xx_INPUT_INT:
+            /* Level sensitive - active high */
+            trace_ppc_irq_set_state("external IRQ", level);
+            ppc_set_irq(cpu, PPC_INTERRUPT_EXT, level);
+            break;
+        case PPC6xx_INPUT_SMI:
+            /* Level sensitive - active high */
+            trace_ppc_irq_set_state("SMI IRQ", level);
+            ppc_set_irq(cpu, PPC_INTERRUPT_SMI, level);
+            break;
+        case PPC6xx_INPUT_MCP:
+            /* Negative edge sensitive */
+            /* XXX: TODO: actual reaction may depends on HID0 status
+             *            603/604/740/750: check HID0[EMCP]
+             */
+            if (cur_level == 1 && level == 0) {
+                trace_ppc_irq_set_state("machine check", 1);
+                ppc_set_irq(cpu, PPC_INTERRUPT_MCK, 1);
+            }
+            break;
+        case PPC6xx_INPUT_CKSTP_IN:
+            /* Level sensitive - active low */
+            /* XXX: TODO: relay the signal to CKSTP_OUT pin */
+            /* XXX: Note that the only way to restart the CPU is to reset it */
+            if (level) {
+                trace_ppc_irq_cpu("stop");
+                cs->halted = 1;
+            }
+            break;
+        case PPC6xx_INPUT_HRESET:
+            /* Level sensitive - active low */
+            if (level) {
+                trace_ppc_irq_reset("CPU");
+                cpu_interrupt(cs, CPU_INTERRUPT_RESET);
+            }
+            break;
+        case PPC6xx_INPUT_SRESET:
+            trace_ppc_irq_set_state("RESET IRQ", level);
+            ppc_set_irq(cpu, PPC_INTERRUPT_RESET, level);
+            break;
+        default:
+            g_assert_not_reached();
+        }
+        if (level)
+            env->irq_input_state |= 1 << pin;
+        else
+            env->irq_input_state &= ~(1 << pin);
+    }
+}
+
+void ppc6xx_irq_init(PowerPCCPU *cpu)
+{
+    CPUPPCState *env = &cpu->env;
+
+    env->irq_inputs = (void **)qemu_allocate_irqs(&ppc6xx_set_irq, cpu,
+                                                  PPC6xx_INPUT_NB);
+}
+
+#if defined(TARGET_PPC64)
+/* PowerPC 970 internal IRQ controller */
+static void ppc970_set_irq(void *opaque, int pin, int level)
+{
+    PowerPCCPU *cpu = opaque;
+    CPUPPCState *env = &cpu->env;
+    int cur_level;
+
+    trace_ppc_irq_set(env, pin, level);
+
+    cur_level = (env->irq_input_state >> pin) & 1;
+    /* Don't generate spurious events */
+    if ((cur_level == 1 && level == 0) || (cur_level == 0 && level != 0)) {
+        CPUState *cs = CPU(cpu);
+
+        switch (pin) {
+        case PPC970_INPUT_INT:
+            /* Level sensitive - active high */
+            trace_ppc_irq_set_state("external IRQ", level);
+            ppc_set_irq(cpu, PPC_INTERRUPT_EXT, level);
+            break;
+        case PPC970_INPUT_THINT:
+            /* Level sensitive - active high */
+            trace_ppc_irq_set_state("SMI IRQ", level);
+            ppc_set_irq(cpu, PPC_INTERRUPT_THERM, level);
+            break;
+        case PPC970_INPUT_MCP:
+            /* Negative edge sensitive */
+            /* XXX: TODO: actual reaction may depends on HID0 status
+             *            603/604/740/750: check HID0[EMCP]
+             */
+            if (cur_level == 1 && level == 0) {
+                trace_ppc_irq_set_state("machine check", 1);
+                ppc_set_irq(cpu, PPC_INTERRUPT_MCK, 1);
+            }
+            break;
+        case PPC970_INPUT_CKSTP:
+            /* Level sensitive - active low */
+            /* XXX: TODO: relay the signal to CKSTP_OUT pin */
+            if (level) {
+                trace_ppc_irq_cpu("stop");
+                cs->halted = 1;
+            } else {
+                trace_ppc_irq_cpu("restart");
+                cs->halted = 0;
+                qemu_cpu_kick(cs);
+            }
+            break;
+        case PPC970_INPUT_HRESET:
+            /* Level sensitive - active low */
+            if (level) {
+                cpu_interrupt(cs, CPU_INTERRUPT_RESET);
+            }
+            break;
+        case PPC970_INPUT_SRESET:
+            trace_ppc_irq_set_state("RESET IRQ", level);
+            ppc_set_irq(cpu, PPC_INTERRUPT_RESET, level);
+            break;
+        case PPC970_INPUT_TBEN:
+            trace_ppc_irq_set_state("TBEN IRQ", level);
+            /* XXX: TODO */
+            break;
+        default:
+            g_assert_not_reached();
+        }
+        if (level)
+            env->irq_input_state |= 1 << pin;
+        else
+            env->irq_input_state &= ~(1 << pin);
+    }
+}
+
+void ppc970_irq_init(PowerPCCPU *cpu)
+{
+    CPUPPCState *env = &cpu->env;
+
+    env->irq_inputs = (void **)qemu_allocate_irqs(&ppc970_set_irq, cpu,
+                                                  PPC970_INPUT_NB);
+}
+
+/* POWER7 internal IRQ controller */
+static void power7_set_irq(void *opaque, int pin, int level)
+{
+    PowerPCCPU *cpu = opaque;
+
+    trace_ppc_irq_set(&cpu->env, pin, level);
+
+    switch (pin) {
+    case POWER7_INPUT_INT:
+        /* Level sensitive - active high */
+        trace_ppc_irq_set_state("external IRQ", level);
+        ppc_set_irq(cpu, PPC_INTERRUPT_EXT, level);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+void ppcPOWER7_irq_init(PowerPCCPU *cpu)
+{
+    CPUPPCState *env = &cpu->env;
+
+    env->irq_inputs = (void **)qemu_allocate_irqs(&power7_set_irq, cpu,
+                                                  POWER7_INPUT_NB);
+}
+
+/* POWER9 internal IRQ controller */
+static void power9_set_irq(void *opaque, int pin, int level)
+{
+    PowerPCCPU *cpu = opaque;
+
+    trace_ppc_irq_set(&cpu->env, pin, level);
+
+    switch (pin) {
+    case POWER9_INPUT_INT:
+        /* Level sensitive - active high */
+        trace_ppc_irq_set_state("external IRQ", level);
+        ppc_set_irq(cpu, PPC_INTERRUPT_EXT, level);
+        break;
+    case POWER9_INPUT_HINT:
+        /* Level sensitive - active high */
+        trace_ppc_irq_set_state("HV external IRQ", level);
+        ppc_set_irq(cpu, PPC_INTERRUPT_HVIRT, level);
+        break;
+    default:
+        g_assert_not_reached();
+        return;
+    }
+}
+
+void ppcPOWER9_irq_init(PowerPCCPU *cpu)
+{
+    CPUPPCState *env = &cpu->env;
+
+    env->irq_inputs = (void **)qemu_allocate_irqs(&power9_set_irq, cpu,
+                                                  POWER9_INPUT_NB);
+}
+#endif /* defined(TARGET_PPC64) */
+
+void ppc40x_core_reset(PowerPCCPU *cpu)
+{
+    CPUPPCState *env = &cpu->env;
+    target_ulong dbsr;
+
+    qemu_log_mask(CPU_LOG_RESET, "Reset PowerPC core\n");
+    cpu_interrupt(CPU(cpu), CPU_INTERRUPT_RESET);
+    dbsr = env->spr[SPR_40x_DBSR];
+    dbsr &= ~0x00000300;
+    dbsr |= 0x00000100;
+    env->spr[SPR_40x_DBSR] = dbsr;
+}
+
+void ppc40x_chip_reset(PowerPCCPU *cpu)
+{
+    CPUPPCState *env = &cpu->env;
+    target_ulong dbsr;
+
+    qemu_log_mask(CPU_LOG_RESET, "Reset PowerPC chip\n");
+    cpu_interrupt(CPU(cpu), CPU_INTERRUPT_RESET);
+    /* XXX: TODO reset all internal peripherals */
+    dbsr = env->spr[SPR_40x_DBSR];
+    dbsr &= ~0x00000300;
+    dbsr |= 0x00000200;
+    env->spr[SPR_40x_DBSR] = dbsr;
+}
+
+void ppc40x_system_reset(PowerPCCPU *cpu)
+{
+    qemu_log_mask(CPU_LOG_RESET, "Reset PowerPC system\n");
+    qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
+}
+
+void store_40x_dbcr0(CPUPPCState *env, uint32_t val)
+{
+    PowerPCCPU *cpu = env_archcpu(env);
+
+    qemu_mutex_lock_iothread();
+
+    switch ((val >> 28) & 0x3) {
+    case 0x0:
+        /* No action */
+        break;
+    case 0x1:
+        /* Core reset */
+        ppc40x_core_reset(cpu);
+        break;
+    case 0x2:
+        /* Chip reset */
+        ppc40x_chip_reset(cpu);
+        break;
+    case 0x3:
+        /* System reset */
+        ppc40x_system_reset(cpu);
+        break;
+    }
+
+    qemu_mutex_unlock_iothread();
+}
+
+/* PowerPC 40x internal IRQ controller */
+static void ppc40x_set_irq(void *opaque, int pin, int level)
+{
+    PowerPCCPU *cpu = opaque;
+    CPUPPCState *env = &cpu->env;
+    int cur_level;
+
+    trace_ppc_irq_set(env, pin, level);
+
+    cur_level = (env->irq_input_state >> pin) & 1;
+    /* Don't generate spurious events */
+    if ((cur_level == 1 && level == 0) || (cur_level == 0 && level != 0)) {
+        CPUState *cs = CPU(cpu);
+
+        switch (pin) {
+        case PPC40x_INPUT_RESET_SYS:
+            if (level) {
+                trace_ppc_irq_reset("system");
+                ppc40x_system_reset(cpu);
+            }
+            break;
+        case PPC40x_INPUT_RESET_CHIP:
+            if (level) {
+                trace_ppc_irq_reset("chip");
+                ppc40x_chip_reset(cpu);
+            }
+            break;
+        case PPC40x_INPUT_RESET_CORE:
+            /* XXX: TODO: update DBSR[MRR] */
+            if (level) {
+                trace_ppc_irq_reset("core");
+                ppc40x_core_reset(cpu);
+            }
+            break;
+        case PPC40x_INPUT_CINT:
+            /* Level sensitive - active high */
+            trace_ppc_irq_set_state("critical IRQ", level);
+            ppc_set_irq(cpu, PPC_INTERRUPT_CEXT, level);
+            break;
+        case PPC40x_INPUT_INT:
+            /* Level sensitive - active high */
+            trace_ppc_irq_set_state("external IRQ", level);
+            ppc_set_irq(cpu, PPC_INTERRUPT_EXT, level);
+            break;
+        case PPC40x_INPUT_HALT:
+            /* Level sensitive - active low */
+            if (level) {
+                trace_ppc_irq_cpu("stop");
+                cs->halted = 1;
+            } else {
+                trace_ppc_irq_cpu("restart");
+                cs->halted = 0;
+                qemu_cpu_kick(cs);
+            }
+            break;
+        case PPC40x_INPUT_DEBUG:
+            /* Level sensitive - active high */
+            trace_ppc_irq_set_state("debug pin", level);
+            ppc_set_irq(cpu, PPC_INTERRUPT_DEBUG, level);
+            break;
+        default:
+            g_assert_not_reached();
+        }
+        if (level)
+            env->irq_input_state |= 1 << pin;
+        else
+            env->irq_input_state &= ~(1 << pin);
+    }
+}
+
+void ppc40x_irq_init(PowerPCCPU *cpu)
+{
+    CPUPPCState *env = &cpu->env;
+
+    env->irq_inputs = (void **)qemu_allocate_irqs(&ppc40x_set_irq,
+                                                  cpu, PPC40x_INPUT_NB);
+}
+
+/* PowerPC E500 internal IRQ controller */
+static void ppce500_set_irq(void *opaque, int pin, int level)
+{
+    PowerPCCPU *cpu = opaque;
+    CPUPPCState *env = &cpu->env;
+    int cur_level;
+
+    trace_ppc_irq_set(env, pin, level);
+
+    cur_level = (env->irq_input_state >> pin) & 1;
+    /* Don't generate spurious events */
+    if ((cur_level == 1 && level == 0) || (cur_level == 0 && level != 0)) {
+        switch (pin) {
+        case PPCE500_INPUT_MCK:
+            if (level) {
+                trace_ppc_irq_reset("system");
+                qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
+            }
+            break;
+        case PPCE500_INPUT_RESET_CORE:
+            if (level) {
+                trace_ppc_irq_reset("core");
+                ppc_set_irq(cpu, PPC_INTERRUPT_MCK, level);
+            }
+            break;
+        case PPCE500_INPUT_CINT:
+            /* Level sensitive - active high */
+            trace_ppc_irq_set_state("critical IRQ", level);
+            ppc_set_irq(cpu, PPC_INTERRUPT_CEXT, level);
+            break;
+        case PPCE500_INPUT_INT:
+            /* Level sensitive - active high */
+            trace_ppc_irq_set_state("core IRQ", level);
+            ppc_set_irq(cpu, PPC_INTERRUPT_EXT, level);
+            break;
+        case PPCE500_INPUT_DEBUG:
+            /* Level sensitive - active high */
+            trace_ppc_irq_set_state("debug pin", level);
+            ppc_set_irq(cpu, PPC_INTERRUPT_DEBUG, level);
+            break;
+        default:
+            g_assert_not_reached();
+        }
+        if (level)
+            env->irq_input_state |= 1 << pin;
+        else
+            env->irq_input_state &= ~(1 << pin);
+    }
+}
+
+void ppce500_irq_init(PowerPCCPU *cpu)
+{
+    CPUPPCState *env = &cpu->env;
+
+    env->irq_inputs = (void **)qemu_allocate_irqs(&ppce500_set_irq,
+                                                  cpu, PPCE500_INPUT_NB);
+}
+
+/* Enable or Disable the E500 EPR capability */
+void ppce500_set_mpic_proxy(bool enabled)
+{
+    CPUState *cs;
+
+    CPU_FOREACH(cs) {
+        PowerPCCPU *cpu = POWERPC_CPU(cs);
+
+        cpu->env.mpic_proxy = enabled;
+        if (kvm_enabled()) {
+            kvmppc_set_mpic_proxy(cpu, enabled);
+        }
+    }
+}
+
+/*****************************************************************************/
+/* PowerPC time base and decrementer emulation */
+
+uint64_t cpu_ppc_get_tb(ppc_tb_t *tb_env, uint64_t vmclk, int64_t tb_offset)
+{
+    /* TB time in tb periods */
+    return muldiv64(vmclk, tb_env->tb_freq, NANOSECONDS_PER_SECOND) + tb_offset;
+}
+
+uint64_t cpu_ppc_load_tbl (CPUPPCState *env)
+{
+    ppc_tb_t *tb_env = env->tb_env;
+    uint64_t tb;
+
+    if (kvm_enabled()) {
+        return env->spr[SPR_TBL];
+    }
+
+    tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->tb_offset);
+    trace_ppc_tb_load(tb);
+
+    return tb;
+}
+
+static inline uint32_t _cpu_ppc_load_tbu(CPUPPCState *env)
+{
+    ppc_tb_t *tb_env = env->tb_env;
+    uint64_t tb;
+
+    tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->tb_offset);
+    trace_ppc_tb_load(tb);
+
+    return tb >> 32;
+}
+
+uint32_t cpu_ppc_load_tbu (CPUPPCState *env)
+{
+    if (kvm_enabled()) {
+        return env->spr[SPR_TBU];
+    }
+
+    return _cpu_ppc_load_tbu(env);
+}
+
+static inline void cpu_ppc_store_tb(ppc_tb_t *tb_env, uint64_t vmclk,
+                                    int64_t *tb_offsetp, uint64_t value)
+{
+    *tb_offsetp = value -
+        muldiv64(vmclk, tb_env->tb_freq, NANOSECONDS_PER_SECOND);
+
+    trace_ppc_tb_store(value, *tb_offsetp);
+}
+
+void cpu_ppc_store_tbl (CPUPPCState *env, uint32_t value)
+{
+    ppc_tb_t *tb_env = env->tb_env;
+    uint64_t tb;
+
+    tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->tb_offset);
+    tb &= 0xFFFFFFFF00000000ULL;
+    cpu_ppc_store_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
+                     &tb_env->tb_offset, tb | (uint64_t)value);
+}
+
+static inline void _cpu_ppc_store_tbu(CPUPPCState *env, uint32_t value)
+{
+    ppc_tb_t *tb_env = env->tb_env;
+    uint64_t tb;
+
+    tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->tb_offset);
+    tb &= 0x00000000FFFFFFFFULL;
+    cpu_ppc_store_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
+                     &tb_env->tb_offset, ((uint64_t)value << 32) | tb);
+}
+
+void cpu_ppc_store_tbu (CPUPPCState *env, uint32_t value)
+{
+    _cpu_ppc_store_tbu(env, value);
+}
+
+uint64_t cpu_ppc_load_atbl (CPUPPCState *env)
+{
+    ppc_tb_t *tb_env = env->tb_env;
+    uint64_t tb;
+
+    tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->atb_offset);
+    trace_ppc_tb_load(tb);
+
+    return tb;
+}
+
+uint32_t cpu_ppc_load_atbu (CPUPPCState *env)
+{
+    ppc_tb_t *tb_env = env->tb_env;
+    uint64_t tb;
+
+    tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->atb_offset);
+    trace_ppc_tb_load(tb);
+
+    return tb >> 32;
+}
+
+void cpu_ppc_store_atbl (CPUPPCState *env, uint32_t value)
+{
+    ppc_tb_t *tb_env = env->tb_env;
+    uint64_t tb;
+
+    tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->atb_offset);
+    tb &= 0xFFFFFFFF00000000ULL;
+    cpu_ppc_store_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
+                     &tb_env->atb_offset, tb | (uint64_t)value);
+}
+
+void cpu_ppc_store_atbu (CPUPPCState *env, uint32_t value)
+{
+    ppc_tb_t *tb_env = env->tb_env;
+    uint64_t tb;
+
+    tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), tb_env->atb_offset);
+    tb &= 0x00000000FFFFFFFFULL;
+    cpu_ppc_store_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
+                     &tb_env->atb_offset, ((uint64_t)value << 32) | tb);
+}
+
+uint64_t cpu_ppc_load_vtb(CPUPPCState *env)
+{
+    ppc_tb_t *tb_env = env->tb_env;
+
+    return cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
+                          tb_env->vtb_offset);
+}
+
+void cpu_ppc_store_vtb(CPUPPCState *env, uint64_t value)
+{
+    ppc_tb_t *tb_env = env->tb_env;
+
+    cpu_ppc_store_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
+                     &tb_env->vtb_offset, value);
+}
+
+void cpu_ppc_store_tbu40(CPUPPCState *env, uint64_t value)
+{
+    ppc_tb_t *tb_env = env->tb_env;
+    uint64_t tb;
+
+    tb = cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
+                        tb_env->tb_offset);
+    tb &= 0xFFFFFFUL;
+    tb |= (value & ~0xFFFFFFUL);
+    cpu_ppc_store_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
+                     &tb_env->tb_offset, tb);
+}
+
+static void cpu_ppc_tb_stop (CPUPPCState *env)
+{
+    ppc_tb_t *tb_env = env->tb_env;
+    uint64_t tb, atb, vmclk;
+
+    /* If the time base is already frozen, do nothing */
+    if (tb_env->tb_freq != 0) {
+        vmclk = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+        /* Get the time base */
+        tb = cpu_ppc_get_tb(tb_env, vmclk, tb_env->tb_offset);
+        /* Get the alternate time base */
+        atb = cpu_ppc_get_tb(tb_env, vmclk, tb_env->atb_offset);
+        /* Store the time base value (ie compute the current offset) */
+        cpu_ppc_store_tb(tb_env, vmclk, &tb_env->tb_offset, tb);
+        /* Store the alternate time base value (compute the current offset) */
+        cpu_ppc_store_tb(tb_env, vmclk, &tb_env->atb_offset, atb);
+        /* Set the time base frequency to zero */
+        tb_env->tb_freq = 0;
+        /* Now, the time bases are frozen to tb_offset / atb_offset value */
+    }
+}
+
+static void cpu_ppc_tb_start (CPUPPCState *env)
+{
+    ppc_tb_t *tb_env = env->tb_env;
+    uint64_t tb, atb, vmclk;
+
+    /* If the time base is not frozen, do nothing */
+    if (tb_env->tb_freq == 0) {
+        vmclk = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+        /* Get the time base from tb_offset */
+        tb = tb_env->tb_offset;
+        /* Get the alternate time base from atb_offset */
+        atb = tb_env->atb_offset;
+        /* Restore the tb frequency from the decrementer frequency */
+        tb_env->tb_freq = tb_env->decr_freq;
+        /* Store the time base value */
+        cpu_ppc_store_tb(tb_env, vmclk, &tb_env->tb_offset, tb);
+        /* Store the alternate time base value */
+        cpu_ppc_store_tb(tb_env, vmclk, &tb_env->atb_offset, atb);
+    }
+}
+
+bool ppc_decr_clear_on_delivery(CPUPPCState *env)
+{
+    ppc_tb_t *tb_env = env->tb_env;
+    int flags = PPC_DECR_UNDERFLOW_TRIGGERED | PPC_DECR_UNDERFLOW_LEVEL;
+    return ((tb_env->flags & flags) == PPC_DECR_UNDERFLOW_TRIGGERED);
+}
+
+static inline int64_t _cpu_ppc_load_decr(CPUPPCState *env, uint64_t next)
+{
+    ppc_tb_t *tb_env = env->tb_env;
+    int64_t decr, diff;
+
+    diff = next - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+    if (diff >= 0) {
+        decr = muldiv64(diff, tb_env->decr_freq, NANOSECONDS_PER_SECOND);
+    } else if (tb_env->flags & PPC_TIMER_BOOKE) {
+        decr = 0;
+    }  else {
+        decr = -muldiv64(-diff, tb_env->decr_freq, NANOSECONDS_PER_SECOND);
+    }
+    trace_ppc_decr_load(decr);
+
+    return decr;
+}
+
+target_ulong cpu_ppc_load_decr(CPUPPCState *env)
+{
+    ppc_tb_t *tb_env = env->tb_env;
+    uint64_t decr;
+
+    if (kvm_enabled()) {
+        return env->spr[SPR_DECR];
+    }
+
+    decr = _cpu_ppc_load_decr(env, tb_env->decr_next);
+
+    /*
+     * If large decrementer is enabled then the decrementer is signed extened
+     * to 64 bits, otherwise it is a 32 bit value.
+     */
+    if (env->spr[SPR_LPCR] & LPCR_LD) {
+        return decr;
+    }
+    return (uint32_t) decr;
+}
+
+target_ulong cpu_ppc_load_hdecr(CPUPPCState *env)
+{
+    PowerPCCPU *cpu = env_archcpu(env);
+    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
+    ppc_tb_t *tb_env = env->tb_env;
+    uint64_t hdecr;
+
+    hdecr =  _cpu_ppc_load_decr(env, tb_env->hdecr_next);
+
+    /*
+     * If we have a large decrementer (POWER9 or later) then hdecr is sign
+     * extended to 64 bits, otherwise it is 32 bits.
+     */
+    if (pcc->lrg_decr_bits > 32) {
+        return hdecr;
+    }
+    return (uint32_t) hdecr;
+}
+
+uint64_t cpu_ppc_load_purr (CPUPPCState *env)
+{
+    ppc_tb_t *tb_env = env->tb_env;
+
+    return cpu_ppc_get_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
+                          tb_env->purr_offset);
+}
+
+/* When decrementer expires,
+ * all we need to do is generate or queue a CPU exception
+ */
+static inline void cpu_ppc_decr_excp(PowerPCCPU *cpu)
+{
+    /* Raise it */
+    trace_ppc_decr_excp("raise");
+    ppc_set_irq(cpu, PPC_INTERRUPT_DECR, 1);
+}
+
+static inline void cpu_ppc_decr_lower(PowerPCCPU *cpu)
+{
+    ppc_set_irq(cpu, PPC_INTERRUPT_DECR, 0);
+}
+
+static inline void cpu_ppc_hdecr_excp(PowerPCCPU *cpu)
+{
+    CPUPPCState *env = &cpu->env;
+
+    /* Raise it */
+    trace_ppc_decr_excp("raise HV");
+
+    /* The architecture specifies that we don't deliver HDEC
+     * interrupts in a PM state. Not only they don't cause a
+     * wakeup but they also get effectively discarded.
+     */
+    if (!env->resume_as_sreset) {
+        ppc_set_irq(cpu, PPC_INTERRUPT_HDECR, 1);
+    }
+}
+
+static inline void cpu_ppc_hdecr_lower(PowerPCCPU *cpu)
+{
+    ppc_set_irq(cpu, PPC_INTERRUPT_HDECR, 0);
+}
+
+static void __cpu_ppc_store_decr(PowerPCCPU *cpu, uint64_t *nextp,
+                                 QEMUTimer *timer,
+                                 void (*raise_excp)(void *),
+                                 void (*lower_excp)(PowerPCCPU *),
+                                 target_ulong decr, target_ulong value,
+                                 int nr_bits)
+{
+    CPUPPCState *env = &cpu->env;
+    ppc_tb_t *tb_env = env->tb_env;
+    uint64_t now, next;
+    int64_t signed_value;
+    int64_t signed_decr;
+
+    /* Truncate value to decr_width and sign extend for simplicity */
+    signed_value = sextract64(value, 0, nr_bits);
+    signed_decr = sextract64(decr, 0, nr_bits);
+
+    trace_ppc_decr_store(nr_bits, decr, value);
+
+    if (kvm_enabled()) {
+        /* KVM handles decrementer exceptions, we don't need our own timer */
+        return;
+    }
+
+    /*
+     * Going from 2 -> 1, 1 -> 0 or 0 -> -1 is the event to generate a DEC
+     * interrupt.
+     *
+     * If we get a really small DEC value, we can assume that by the time we
+     * handled it we should inject an interrupt already.
+     *
+     * On MSB level based DEC implementations the MSB always means the interrupt
+     * is pending, so raise it on those.
+     *
+     * On MSB edge based DEC implementations the MSB going from 0 -> 1 triggers
+     * an edge interrupt, so raise it here too.
+     */
+    if ((value < 3) ||
+        ((tb_env->flags & PPC_DECR_UNDERFLOW_LEVEL) && signed_value < 0) ||
+        ((tb_env->flags & PPC_DECR_UNDERFLOW_TRIGGERED) && signed_value < 0
+          && signed_decr >= 0)) {
+        (*raise_excp)(cpu);
+        return;
+    }
+
+    /* On MSB level based systems a 0 for the MSB stops interrupt delivery */
+    if (signed_value >= 0 && (tb_env->flags & PPC_DECR_UNDERFLOW_LEVEL)) {
+        (*lower_excp)(cpu);
+    }
+
+    /* Calculate the next timer event */
+    now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+    next = now + muldiv64(value, NANOSECONDS_PER_SECOND, tb_env->decr_freq);
+    *nextp = next;
+
+    /* Adjust timer */
+    timer_mod(timer, next);
+}
+
+static inline void _cpu_ppc_store_decr(PowerPCCPU *cpu, target_ulong decr,
+                                       target_ulong value, int nr_bits)
+{
+    ppc_tb_t *tb_env = cpu->env.tb_env;
+
+    __cpu_ppc_store_decr(cpu, &tb_env->decr_next, tb_env->decr_timer,
+                         tb_env->decr_timer->cb, &cpu_ppc_decr_lower, decr,
+                         value, nr_bits);
+}
+
+void cpu_ppc_store_decr(CPUPPCState *env, target_ulong value)
+{
+    PowerPCCPU *cpu = env_archcpu(env);
+    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
+    int nr_bits = 32;
+
+    if (env->spr[SPR_LPCR] & LPCR_LD) {
+        nr_bits = pcc->lrg_decr_bits;
+    }
+
+    _cpu_ppc_store_decr(cpu, cpu_ppc_load_decr(env), value, nr_bits);
+}
+
+static void cpu_ppc_decr_cb(void *opaque)
+{
+    PowerPCCPU *cpu = opaque;
+
+    cpu_ppc_decr_excp(cpu);
+}
+
+static inline void _cpu_ppc_store_hdecr(PowerPCCPU *cpu, target_ulong hdecr,
+                                        target_ulong value, int nr_bits)
+{
+    ppc_tb_t *tb_env = cpu->env.tb_env;
+
+    if (tb_env->hdecr_timer != NULL) {
+        __cpu_ppc_store_decr(cpu, &tb_env->hdecr_next, tb_env->hdecr_timer,
+                             tb_env->hdecr_timer->cb, &cpu_ppc_hdecr_lower,
+                             hdecr, value, nr_bits);
+    }
+}
+
+void cpu_ppc_store_hdecr(CPUPPCState *env, target_ulong value)
+{
+    PowerPCCPU *cpu = env_archcpu(env);
+    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
+
+    _cpu_ppc_store_hdecr(cpu, cpu_ppc_load_hdecr(env), value,
+                         pcc->lrg_decr_bits);
+}
+
+static void cpu_ppc_hdecr_cb(void *opaque)
+{
+    PowerPCCPU *cpu = opaque;
+
+    cpu_ppc_hdecr_excp(cpu);
+}
+
+void cpu_ppc_store_purr(CPUPPCState *env, uint64_t value)
+{
+    ppc_tb_t *tb_env = env->tb_env;
+
+    cpu_ppc_store_tb(tb_env, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
+                     &tb_env->purr_offset, value);
+}
+
+static void cpu_ppc_set_tb_clk (void *opaque, uint32_t freq)
+{
+    CPUPPCState *env = opaque;
+    PowerPCCPU *cpu = env_archcpu(env);
+    ppc_tb_t *tb_env = env->tb_env;
+
+    tb_env->tb_freq = freq;
+    tb_env->decr_freq = freq;
+    /* There is a bug in Linux 2.4 kernels:
+     * if a decrementer exception is pending when it enables msr_ee at startup,
+     * it's not ready to handle it...
+     */
+    _cpu_ppc_store_decr(cpu, 0xFFFFFFFF, 0xFFFFFFFF, 32);
+    _cpu_ppc_store_hdecr(cpu, 0xFFFFFFFF, 0xFFFFFFFF, 32);
+    cpu_ppc_store_purr(env, 0x0000000000000000ULL);
+}
+
+static void timebase_save(PPCTimebase *tb)
+{
+    uint64_t ticks = cpu_get_host_ticks();
+    PowerPCCPU *first_ppc_cpu = POWERPC_CPU(first_cpu);
+
+    if (!first_ppc_cpu->env.tb_env) {
+        error_report("No timebase object");
+        return;
+    }
+
+    /* not used anymore, we keep it for compatibility */
+    tb->time_of_the_day_ns = qemu_clock_get_ns(QEMU_CLOCK_HOST);
+    /*
+     * tb_offset is only expected to be changed by QEMU so
+     * there is no need to update it from KVM here
+     */
+    tb->guest_timebase = ticks + first_ppc_cpu->env.tb_env->tb_offset;
+
+    tb->runstate_paused =
+        runstate_check(RUN_STATE_PAUSED) || runstate_check(RUN_STATE_SAVE_VM);
+}
+
+static void timebase_load(PPCTimebase *tb)
+{
+    CPUState *cpu;
+    PowerPCCPU *first_ppc_cpu = POWERPC_CPU(first_cpu);
+    int64_t tb_off_adj, tb_off;
+    unsigned long freq;
+
+    if (!first_ppc_cpu->env.tb_env) {
+        error_report("No timebase object");
+        return;
+    }
+
+    freq = first_ppc_cpu->env.tb_env->tb_freq;
+
+    tb_off_adj = tb->guest_timebase - cpu_get_host_ticks();
+
+    tb_off = first_ppc_cpu->env.tb_env->tb_offset;
+    trace_ppc_tb_adjust(tb_off, tb_off_adj, tb_off_adj - tb_off,
+                        (tb_off_adj - tb_off) / freq);
+
+    /* Set new offset to all CPUs */
+    CPU_FOREACH(cpu) {
+        PowerPCCPU *pcpu = POWERPC_CPU(cpu);
+        pcpu->env.tb_env->tb_offset = tb_off_adj;
+        kvmppc_set_reg_tb_offset(pcpu, pcpu->env.tb_env->tb_offset);
+    }
+}
+
+void cpu_ppc_clock_vm_state_change(void *opaque, bool running,
+                                   RunState state)
+{
+    PPCTimebase *tb = opaque;
+
+    if (running) {
+        timebase_load(tb);
+    } else {
+        timebase_save(tb);
+    }
+}
+
+/*
+ * When migrating a running guest, read the clock just
+ * before migration, so that the guest clock counts
+ * during the events between:
+ *
+ *  * vm_stop()
+ *  *
+ *  * pre_save()
+ *
+ *  This reduces clock difference on migration from 5s
+ *  to 0.1s (when max_downtime == 5s), because sending the
+ *  final pages of memory (which happens between vm_stop()
+ *  and pre_save()) takes max_downtime.
+ */
+static int timebase_pre_save(void *opaque)
+{
+    PPCTimebase *tb = opaque;
+
+    /* guest_timebase won't be overridden in case of paused guest or savevm */
+    if (!tb->runstate_paused) {
+        timebase_save(tb);
+    }
+
+    return 0;
+}
+
+const VMStateDescription vmstate_ppc_timebase = {
+    .name = "timebase",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .pre_save = timebase_pre_save,
+    .fields      = (VMStateField []) {
+        VMSTATE_UINT64(guest_timebase, PPCTimebase),
+        VMSTATE_INT64(time_of_the_day_ns, PPCTimebase),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+/* Set up (once) timebase frequency (in Hz) */
+clk_setup_cb cpu_ppc_tb_init (CPUPPCState *env, uint32_t freq)
+{
+    PowerPCCPU *cpu = env_archcpu(env);
+    ppc_tb_t *tb_env;
+
+    tb_env = g_malloc0(sizeof(ppc_tb_t));
+    env->tb_env = tb_env;
+    tb_env->flags = PPC_DECR_UNDERFLOW_TRIGGERED;
+    if (is_book3s_arch2x(env)) {
+        /* All Book3S 64bit CPUs implement level based DEC logic */
+        tb_env->flags |= PPC_DECR_UNDERFLOW_LEVEL;
+    }
+    /* Create new timer */
+    tb_env->decr_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, &cpu_ppc_decr_cb, cpu);
+    if (env->has_hv_mode) {
+        tb_env->hdecr_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, &cpu_ppc_hdecr_cb,
+                                                cpu);
+    } else {
+        tb_env->hdecr_timer = NULL;
+    }
+    cpu_ppc_set_tb_clk(env, freq);
+
+    return &cpu_ppc_set_tb_clk;
+}
+
+/* Specific helpers for POWER & PowerPC 601 RTC */
+void cpu_ppc601_store_rtcu (CPUPPCState *env, uint32_t value)
+{
+    _cpu_ppc_store_tbu(env, value);
+}
+
+uint32_t cpu_ppc601_load_rtcu (CPUPPCState *env)
+{
+    return _cpu_ppc_load_tbu(env);
+}
+
+void cpu_ppc601_store_rtcl (CPUPPCState *env, uint32_t value)
+{
+    cpu_ppc_store_tbl(env, value & 0x3FFFFF80);
+}
+
+uint32_t cpu_ppc601_load_rtcl (CPUPPCState *env)
+{
+    return cpu_ppc_load_tbl(env) & 0x3FFFFF80;
+}
+
+/*****************************************************************************/
+/* PowerPC 40x timers */
+
+/* PIT, FIT & WDT */
+typedef struct ppc40x_timer_t ppc40x_timer_t;
+struct ppc40x_timer_t {
+    uint64_t pit_reload;  /* PIT auto-reload value        */
+    uint64_t fit_next;    /* Tick for next FIT interrupt  */
+    QEMUTimer *fit_timer;
+    uint64_t wdt_next;    /* Tick for next WDT interrupt  */
+    QEMUTimer *wdt_timer;
+
+    /* 405 have the PIT, 440 have a DECR.  */
+    unsigned int decr_excp;
+};
+
+/* Fixed interval timer */
+static void cpu_4xx_fit_cb (void *opaque)
+{
+    PowerPCCPU *cpu;
+    CPUPPCState *env;
+    ppc_tb_t *tb_env;
+    ppc40x_timer_t *ppc40x_timer;
+    uint64_t now, next;
+
+    env = opaque;
+    cpu = env_archcpu(env);
+    tb_env = env->tb_env;
+    ppc40x_timer = tb_env->opaque;
+    now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+    switch ((env->spr[SPR_40x_TCR] >> 24) & 0x3) {
+    case 0:
+        next = 1 << 9;
+        break;
+    case 1:
+        next = 1 << 13;
+        break;
+    case 2:
+        next = 1 << 17;
+        break;
+    case 3:
+        next = 1 << 21;
+        break;
+    default:
+        /* Cannot occur, but makes gcc happy */
+        return;
+    }
+    next = now + muldiv64(next, NANOSECONDS_PER_SECOND, tb_env->tb_freq);
+    if (next == now)
+        next++;
+    timer_mod(ppc40x_timer->fit_timer, next);
+    env->spr[SPR_40x_TSR] |= 1 << 26;
+    if ((env->spr[SPR_40x_TCR] >> 23) & 0x1) {
+        ppc_set_irq(cpu, PPC_INTERRUPT_FIT, 1);
+    }
+    trace_ppc4xx_fit((int)((env->spr[SPR_40x_TCR] >> 23) & 0x1),
+                         env->spr[SPR_40x_TCR], env->spr[SPR_40x_TSR]);
+}
+
+/* Programmable interval timer */
+static void start_stop_pit (CPUPPCState *env, ppc_tb_t *tb_env, int is_excp)
+{
+    ppc40x_timer_t *ppc40x_timer;
+    uint64_t now, next;
+
+    ppc40x_timer = tb_env->opaque;
+    if (ppc40x_timer->pit_reload <= 1 ||
+        !((env->spr[SPR_40x_TCR] >> 26) & 0x1) ||
+        (is_excp && !((env->spr[SPR_40x_TCR] >> 22) & 0x1))) {
+        /* Stop PIT */
+        trace_ppc4xx_pit_stop();
+        timer_del(tb_env->decr_timer);
+    } else {
+        trace_ppc4xx_pit_start(ppc40x_timer->pit_reload);
+        now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+        next = now + muldiv64(ppc40x_timer->pit_reload,
+                              NANOSECONDS_PER_SECOND, tb_env->decr_freq);
+        if (is_excp)
+            next += tb_env->decr_next - now;
+        if (next == now)
+            next++;
+        timer_mod(tb_env->decr_timer, next);
+        tb_env->decr_next = next;
+    }
+}
+
+static void cpu_4xx_pit_cb (void *opaque)
+{
+    PowerPCCPU *cpu;
+    CPUPPCState *env;
+    ppc_tb_t *tb_env;
+    ppc40x_timer_t *ppc40x_timer;
+
+    env = opaque;
+    cpu = env_archcpu(env);
+    tb_env = env->tb_env;
+    ppc40x_timer = tb_env->opaque;
+    env->spr[SPR_40x_TSR] |= 1 << 27;
+    if ((env->spr[SPR_40x_TCR] >> 26) & 0x1) {
+        ppc_set_irq(cpu, ppc40x_timer->decr_excp, 1);
+    }
+    start_stop_pit(env, tb_env, 1);
+    trace_ppc4xx_pit((int)((env->spr[SPR_40x_TCR] >> 22) & 0x1),
+           (int)((env->spr[SPR_40x_TCR] >> 26) & 0x1),
+           env->spr[SPR_40x_TCR], env->spr[SPR_40x_TSR],
+           ppc40x_timer->pit_reload);
+}
+
+/* Watchdog timer */
+static void cpu_4xx_wdt_cb (void *opaque)
+{
+    PowerPCCPU *cpu;
+    CPUPPCState *env;
+    ppc_tb_t *tb_env;
+    ppc40x_timer_t *ppc40x_timer;
+    uint64_t now, next;
+
+    env = opaque;
+    cpu = env_archcpu(env);
+    tb_env = env->tb_env;
+    ppc40x_timer = tb_env->opaque;
+    now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+    switch ((env->spr[SPR_40x_TCR] >> 30) & 0x3) {
+    case 0:
+        next = 1 << 17;
+        break;
+    case 1:
+        next = 1 << 21;
+        break;
+    case 2:
+        next = 1 << 25;
+        break;
+    case 3:
+        next = 1 << 29;
+        break;
+    default:
+        /* Cannot occur, but makes gcc happy */
+        return;
+    }
+    next = now + muldiv64(next, NANOSECONDS_PER_SECOND, tb_env->decr_freq);
+    if (next == now)
+        next++;
+    trace_ppc4xx_wdt(env->spr[SPR_40x_TCR], env->spr[SPR_40x_TSR]);
+    switch ((env->spr[SPR_40x_TSR] >> 30) & 0x3) {
+    case 0x0:
+    case 0x1:
+        timer_mod(ppc40x_timer->wdt_timer, next);
+        ppc40x_timer->wdt_next = next;
+        env->spr[SPR_40x_TSR] |= 1U << 31;
+        break;
+    case 0x2:
+        timer_mod(ppc40x_timer->wdt_timer, next);
+        ppc40x_timer->wdt_next = next;
+        env->spr[SPR_40x_TSR] |= 1 << 30;
+        if ((env->spr[SPR_40x_TCR] >> 27) & 0x1) {
+            ppc_set_irq(cpu, PPC_INTERRUPT_WDT, 1);
+        }
+        break;
+    case 0x3:
+        env->spr[SPR_40x_TSR] &= ~0x30000000;
+        env->spr[SPR_40x_TSR] |= env->spr[SPR_40x_TCR] & 0x30000000;
+        switch ((env->spr[SPR_40x_TCR] >> 28) & 0x3) {
+        case 0x0:
+            /* No reset */
+            break;
+        case 0x1: /* Core reset */
+            ppc40x_core_reset(cpu);
+            break;
+        case 0x2: /* Chip reset */
+            ppc40x_chip_reset(cpu);
+            break;
+        case 0x3: /* System reset */
+            ppc40x_system_reset(cpu);
+            break;
+        }
+    }
+}
+
+void store_40x_pit (CPUPPCState *env, target_ulong val)
+{
+    ppc_tb_t *tb_env;
+    ppc40x_timer_t *ppc40x_timer;
+
+    tb_env = env->tb_env;
+    ppc40x_timer = tb_env->opaque;
+    trace_ppc40x_store_pit(val);
+    ppc40x_timer->pit_reload = val;
+    start_stop_pit(env, tb_env, 0);
+}
+
+target_ulong load_40x_pit (CPUPPCState *env)
+{
+    return cpu_ppc_load_decr(env);
+}
+
+static void ppc_40x_set_tb_clk (void *opaque, uint32_t freq)
+{
+    CPUPPCState *env = opaque;
+    ppc_tb_t *tb_env = env->tb_env;
+
+    trace_ppc40x_set_tb_clk(freq);
+    tb_env->tb_freq = freq;
+    tb_env->decr_freq = freq;
+    /* XXX: we should also update all timers */
+}
+
+clk_setup_cb ppc_40x_timers_init (CPUPPCState *env, uint32_t freq,
+                                  unsigned int decr_excp)
+{
+    ppc_tb_t *tb_env;
+    ppc40x_timer_t *ppc40x_timer;
+
+    tb_env = g_malloc0(sizeof(ppc_tb_t));
+    env->tb_env = tb_env;
+    tb_env->flags = PPC_DECR_UNDERFLOW_TRIGGERED;
+    ppc40x_timer = g_malloc0(sizeof(ppc40x_timer_t));
+    tb_env->tb_freq = freq;
+    tb_env->decr_freq = freq;
+    tb_env->opaque = ppc40x_timer;
+    trace_ppc40x_timers_init(freq);
+    if (ppc40x_timer != NULL) {
+        /* We use decr timer for PIT */
+        tb_env->decr_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, &cpu_4xx_pit_cb, env);
+        ppc40x_timer->fit_timer =
+            timer_new_ns(QEMU_CLOCK_VIRTUAL, &cpu_4xx_fit_cb, env);
+        ppc40x_timer->wdt_timer =
+            timer_new_ns(QEMU_CLOCK_VIRTUAL, &cpu_4xx_wdt_cb, env);
+        ppc40x_timer->decr_excp = decr_excp;
+    }
+
+    return &ppc_40x_set_tb_clk;
+}
+
+/*****************************************************************************/
+/* Embedded PowerPC Device Control Registers */
+typedef struct ppc_dcrn_t ppc_dcrn_t;
+struct ppc_dcrn_t {
+    dcr_read_cb dcr_read;
+    dcr_write_cb dcr_write;
+    void *opaque;
+};
+
+/* XXX: on 460, DCR addresses are 32 bits wide,
+ *      using DCRIPR to get the 22 upper bits of the DCR address
+ */
+#define DCRN_NB 1024
+struct ppc_dcr_t {
+    ppc_dcrn_t dcrn[DCRN_NB];
+    int (*read_error)(int dcrn);
+    int (*write_error)(int dcrn);
+};
+
+int ppc_dcr_read (ppc_dcr_t *dcr_env, int dcrn, uint32_t *valp)
+{
+    ppc_dcrn_t *dcr;
+
+    if (dcrn < 0 || dcrn >= DCRN_NB)
+        goto error;
+    dcr = &dcr_env->dcrn[dcrn];
+    if (dcr->dcr_read == NULL)
+        goto error;
+    *valp = (*dcr->dcr_read)(dcr->opaque, dcrn);
+
+    return 0;
+
+ error:
+    if (dcr_env->read_error != NULL)
+        return (*dcr_env->read_error)(dcrn);
+
+    return -1;
+}
+
+int ppc_dcr_write (ppc_dcr_t *dcr_env, int dcrn, uint32_t val)
+{
+    ppc_dcrn_t *dcr;
+
+    if (dcrn < 0 || dcrn >= DCRN_NB)
+        goto error;
+    dcr = &dcr_env->dcrn[dcrn];
+    if (dcr->dcr_write == NULL)
+        goto error;
+    (*dcr->dcr_write)(dcr->opaque, dcrn, val);
+
+    return 0;
+
+ error:
+    if (dcr_env->write_error != NULL)
+        return (*dcr_env->write_error)(dcrn);
+
+    return -1;
+}
+
+int ppc_dcr_register (CPUPPCState *env, int dcrn, void *opaque,
+                      dcr_read_cb dcr_read, dcr_write_cb dcr_write)
+{
+    ppc_dcr_t *dcr_env;
+    ppc_dcrn_t *dcr;
+
+    dcr_env = env->dcr_env;
+    if (dcr_env == NULL)
+        return -1;
+    if (dcrn < 0 || dcrn >= DCRN_NB)
+        return -1;
+    dcr = &dcr_env->dcrn[dcrn];
+    if (dcr->opaque != NULL ||
+        dcr->dcr_read != NULL ||
+        dcr->dcr_write != NULL)
+        return -1;
+    dcr->opaque = opaque;
+    dcr->dcr_read = dcr_read;
+    dcr->dcr_write = dcr_write;
+
+    return 0;
+}
+
+int ppc_dcr_init (CPUPPCState *env, int (*read_error)(int dcrn),
+                  int (*write_error)(int dcrn))
+{
+    ppc_dcr_t *dcr_env;
+
+    dcr_env = g_malloc0(sizeof(ppc_dcr_t));
+    dcr_env->read_error = read_error;
+    dcr_env->write_error = write_error;
+    env->dcr_env = dcr_env;
+
+    return 0;
+}
+
+/*****************************************************************************/
+
+int ppc_cpu_pir(PowerPCCPU *cpu)
+{
+    CPUPPCState *env = &cpu->env;
+    return env->spr_cb[SPR_PIR].default_value;
+}
+
+PowerPCCPU *ppc_get_vcpu_by_pir(int pir)
+{
+    CPUState *cs;
+
+    CPU_FOREACH(cs) {
+        PowerPCCPU *cpu = POWERPC_CPU(cs);
+
+        if (ppc_cpu_pir(cpu) == pir) {
+            return cpu;
+        }
+    }
+
+    return NULL;
+}
+
+void ppc_irq_reset(PowerPCCPU *cpu)
+{
+    CPUPPCState *env = &cpu->env;
+
+    env->irq_input_state = 0;
+    kvmppc_set_interrupt(cpu, PPC_INTERRUPT_EXT, 0);
+}
diff --git a/hw/ppc/ppc405.h b/hw/ppc/ppc405.h
new file mode 100644
index 000000000..c58f73988
--- /dev/null
+++ b/hw/ppc/ppc405.h
@@ -0,0 +1,72 @@
+/*
+ * QEMU PowerPC 405 shared definitions
+ *
+ * Copyright (c) 2007 Jocelyn Mayer
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef PPC405_H
+#define PPC405_H
+
+#include "hw/ppc/ppc4xx.h"
+
+/* Bootinfo as set-up by u-boot */
+typedef struct ppc4xx_bd_info_t ppc4xx_bd_info_t;
+struct ppc4xx_bd_info_t {
+    uint32_t bi_memstart;
+    uint32_t bi_memsize;
+    uint32_t bi_flashstart;
+    uint32_t bi_flashsize;
+    uint32_t bi_flashoffset; /* 0x10 */
+    uint32_t bi_sramstart;
+    uint32_t bi_sramsize;
+    uint32_t bi_bootflags;
+    uint32_t bi_ipaddr; /* 0x20 */
+    uint8_t  bi_enetaddr[6];
+    uint16_t bi_ethspeed;
+    uint32_t bi_intfreq;
+    uint32_t bi_busfreq; /* 0x30 */
+    uint32_t bi_baudrate;
+    uint8_t  bi_s_version[4];
+    uint8_t  bi_r_version[32];
+    uint32_t bi_procfreq;
+    uint32_t bi_plb_busfreq;
+    uint32_t bi_pci_busfreq;
+    uint8_t  bi_pci_enetaddr[6];
+    uint32_t bi_pci_enetaddr2[6];
+    uint32_t bi_opbfreq;
+    uint32_t bi_iic_fast[2];
+};
+
+/* PowerPC 405 core */
+ram_addr_t ppc405_set_bootinfo (CPUPPCState *env, ppc4xx_bd_info_t *bd,
+                                uint32_t flags);
+
+void ppc4xx_plb_init(CPUPPCState *env);
+void ppc405_ebc_init(CPUPPCState *env);
+
+CPUPPCState *ppc405ep_init(MemoryRegion *address_space_mem,
+                        MemoryRegion ram_memories[2],
+                        hwaddr ram_bases[2],
+                        hwaddr ram_sizes[2],
+                        uint32_t sysclk, DeviceState **uicdev,
+                        int do_init);
+
+#endif /* PPC405_H */
diff --git a/hw/ppc/ppc405_boards.c b/hw/ppc/ppc405_boards.c
new file mode 100644
index 000000000..972a7a4a3
--- /dev/null
+++ b/hw/ppc/ppc405_boards.c
@@ -0,0 +1,564 @@
+/*
+ * QEMU PowerPC 405 evaluation boards emulation
+ *
+ * Copyright (c) 2007 Jocelyn Mayer
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include "qapi/error.h"
+#include "qemu-common.h"
+#include "qemu/datadir.h"
+#include "cpu.h"
+#include "hw/ppc/ppc.h"
+#include "hw/qdev-properties.h"
+#include "hw/sysbus.h"
+#include "ppc405.h"
+#include "hw/rtc/m48t59.h"
+#include "hw/block/flash.h"
+#include "sysemu/qtest.h"
+#include "sysemu/reset.h"
+#include "sysemu/block-backend.h"
+#include "hw/boards.h"
+#include "qemu/error-report.h"
+#include "hw/loader.h"
+#include "qemu/cutils.h"
+
+#define BIOS_FILENAME "ppc405_rom.bin"
+#define BIOS_SIZE (2 * MiB)
+
+#define KERNEL_LOAD_ADDR 0x00000000
+#define INITRD_LOAD_ADDR 0x01800000
+
+#define USE_FLASH_BIOS
+
+/*****************************************************************************/
+/* PPC405EP reference board (IBM) */
+/* Standalone board with:
+ * - PowerPC 405EP CPU
+ * - SDRAM (0x00000000)
+ * - Flash (0xFFF80000)
+ * - SRAM  (0xFFF00000)
+ * - NVRAM (0xF0000000)
+ * - FPGA  (0xF0300000)
+ */
+typedef struct ref405ep_fpga_t ref405ep_fpga_t;
+struct ref405ep_fpga_t {
+    uint8_t reg0;
+    uint8_t reg1;
+};
+
+static uint64_t ref405ep_fpga_readb(void *opaque, hwaddr addr, unsigned size)
+{
+    ref405ep_fpga_t *fpga;
+    uint32_t ret;
+
+    fpga = opaque;
+    switch (addr) {
+    case 0x0:
+        ret = fpga->reg0;
+        break;
+    case 0x1:
+        ret = fpga->reg1;
+        break;
+    default:
+        ret = 0;
+        break;
+    }
+
+    return ret;
+}
+
+static void ref405ep_fpga_writeb(void *opaque, hwaddr addr, uint64_t value,
+                                 unsigned size)
+{
+    ref405ep_fpga_t *fpga;
+
+    fpga = opaque;
+    switch (addr) {
+    case 0x0:
+        /* Read only */
+        break;
+    case 0x1:
+        fpga->reg1 = value;
+        break;
+    default:
+        break;
+    }
+}
+
+static const MemoryRegionOps ref405ep_fpga_ops = {
+    .read = ref405ep_fpga_readb,
+    .write = ref405ep_fpga_writeb,
+    .impl.min_access_size = 1,
+    .impl.max_access_size = 1,
+    .valid.min_access_size = 1,
+    .valid.max_access_size = 4,
+    .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static void ref405ep_fpga_reset (void *opaque)
+{
+    ref405ep_fpga_t *fpga;
+
+    fpga = opaque;
+    fpga->reg0 = 0x00;
+    fpga->reg1 = 0x0F;
+}
+
+static void ref405ep_fpga_init(MemoryRegion *sysmem, uint32_t base)
+{
+    ref405ep_fpga_t *fpga;
+    MemoryRegion *fpga_memory = g_new(MemoryRegion, 1);
+
+    fpga = g_malloc0(sizeof(ref405ep_fpga_t));
+    memory_region_init_io(fpga_memory, NULL, &ref405ep_fpga_ops, fpga,
+                          "fpga", 0x00000100);
+    memory_region_add_subregion(sysmem, base, fpga_memory);
+    qemu_register_reset(&ref405ep_fpga_reset, fpga);
+}
+
+static void ref405ep_init(MachineState *machine)
+{
+    MachineClass *mc = MACHINE_GET_CLASS(machine);
+    const char *bios_name = machine->firmware ?: BIOS_FILENAME;
+    const char *kernel_filename = machine->kernel_filename;
+    const char *kernel_cmdline = machine->kernel_cmdline;
+    const char *initrd_filename = machine->initrd_filename;
+    char *filename;
+    ppc4xx_bd_info_t bd;
+    CPUPPCState *env;
+    DeviceState *dev;
+    SysBusDevice *s;
+    MemoryRegion *bios;
+    MemoryRegion *sram = g_new(MemoryRegion, 1);
+    ram_addr_t bdloc;
+    MemoryRegion *ram_memories = g_new(MemoryRegion, 2);
+    hwaddr ram_bases[2], ram_sizes[2];
+    target_ulong sram_size;
+    long bios_size;
+    //int phy_addr = 0;
+    //static int phy_addr = 1;
+    target_ulong kernel_base, initrd_base;
+    long kernel_size, initrd_size;
+    int linux_boot;
+    int len;
+    DriveInfo *dinfo;
+    MemoryRegion *sysmem = get_system_memory();
+    DeviceState *uicdev;
+
+    if (machine->ram_size != mc->default_ram_size) {
+        char *sz = size_to_str(mc->default_ram_size);
+        error_report("Invalid RAM size, should be %s", sz);
+        g_free(sz);
+        exit(EXIT_FAILURE);
+    }
+
+    /* XXX: fix this */
+    memory_region_init_alias(&ram_memories[0], NULL, "ef405ep.ram.alias",
+                             machine->ram, 0, machine->ram_size);
+    ram_bases[0] = 0;
+    ram_sizes[0] = machine->ram_size;
+    memory_region_init(&ram_memories[1], NULL, "ef405ep.ram1", 0);
+    ram_bases[1] = 0x00000000;
+    ram_sizes[1] = 0x00000000;
+    env = ppc405ep_init(sysmem, ram_memories, ram_bases, ram_sizes,
+                        33333333, &uicdev, kernel_filename == NULL ? 0 : 1);
+    /* allocate SRAM */
+    sram_size = 512 * KiB;
+    memory_region_init_ram(sram, NULL, "ef405ep.sram", sram_size,
+                           &error_fatal);
+    memory_region_add_subregion(sysmem, 0xFFF00000, sram);
+    /* allocate and load BIOS */
+#ifdef USE_FLASH_BIOS
+    dinfo = drive_get(IF_PFLASH, 0, 0);
+    if (dinfo) {
+        bios_size = 8 * MiB;
+        pflash_cfi02_register((uint32_t)(-bios_size),
+                              "ef405ep.bios", bios_size,
+                              blk_by_legacy_dinfo(dinfo),
+                              64 * KiB, 1,
+                              2, 0x0001, 0x22DA, 0x0000, 0x0000, 0x555, 0x2AA,
+                              1);
+    } else
+#endif
+    {
+        bios = g_new(MemoryRegion, 1);
+        memory_region_init_rom(bios, NULL, "ef405ep.bios", BIOS_SIZE,
+                               &error_fatal);
+
+        filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
+        if (filename) {
+            bios_size = load_image_size(filename,
+                                        memory_region_get_ram_ptr(bios),
+                                        BIOS_SIZE);
+            g_free(filename);
+            if (bios_size < 0) {
+                error_report("Could not load PowerPC BIOS '%s'", bios_name);
+                exit(1);
+            }
+            bios_size = (bios_size + 0xfff) & ~0xfff;
+            memory_region_add_subregion(sysmem, (uint32_t)(-bios_size), bios);
+        } else if (!qtest_enabled() || kernel_filename != NULL) {
+            error_report("Could not load PowerPC BIOS '%s'", bios_name);
+            exit(1);
+        } else {
+            /* Avoid an uninitialized variable warning */
+            bios_size = -1;
+        }
+    }
+    /* Register FPGA */
+    ref405ep_fpga_init(sysmem, 0xF0300000);
+    /* Register NVRAM */
+    dev = qdev_new("sysbus-m48t08");
+    qdev_prop_set_int32(dev, "base-year", 1968);
+    s = SYS_BUS_DEVICE(dev);
+    sysbus_realize_and_unref(s, &error_fatal);
+    sysbus_mmio_map(s, 0, 0xF0000000);
+    /* Load kernel */
+    linux_boot = (kernel_filename != NULL);
+    if (linux_boot) {
+        memset(&bd, 0, sizeof(bd));
+        bd.bi_memstart = 0x00000000;
+        bd.bi_memsize = machine->ram_size;
+        bd.bi_flashstart = -bios_size;
+        bd.bi_flashsize = -bios_size;
+        bd.bi_flashoffset = 0;
+        bd.bi_sramstart = 0xFFF00000;
+        bd.bi_sramsize = sram_size;
+        bd.bi_bootflags = 0;
+        bd.bi_intfreq = 133333333;
+        bd.bi_busfreq = 33333333;
+        bd.bi_baudrate = 115200;
+        bd.bi_s_version[0] = 'Q';
+        bd.bi_s_version[1] = 'M';
+        bd.bi_s_version[2] = 'U';
+        bd.bi_s_version[3] = '\0';
+        bd.bi_r_version[0] = 'Q';
+        bd.bi_r_version[1] = 'E';
+        bd.bi_r_version[2] = 'M';
+        bd.bi_r_version[3] = 'U';
+        bd.bi_r_version[4] = '\0';
+        bd.bi_procfreq = 133333333;
+        bd.bi_plb_busfreq = 33333333;
+        bd.bi_pci_busfreq = 33333333;
+        bd.bi_opbfreq = 33333333;
+        bdloc = ppc405_set_bootinfo(env, &bd, 0x00000001);
+        env->gpr[3] = bdloc;
+        kernel_base = KERNEL_LOAD_ADDR;
+        /* now we can load the kernel */
+        kernel_size = load_image_targphys(kernel_filename, kernel_base,
+                                          machine->ram_size - kernel_base);
+        if (kernel_size < 0) {
+            error_report("could not load kernel '%s'", kernel_filename);
+            exit(1);
+        }
+        printf("Load kernel size %ld at " TARGET_FMT_lx,
+               kernel_size, kernel_base);
+        /* load initrd */
+        if (initrd_filename) {
+            initrd_base = INITRD_LOAD_ADDR;
+            initrd_size = load_image_targphys(initrd_filename, initrd_base,
+                                              machine->ram_size - initrd_base);
+            if (initrd_size < 0) {
+                error_report("could not load initial ram disk '%s'",
+                             initrd_filename);
+                exit(1);
+            }
+        } else {
+            initrd_base = 0;
+            initrd_size = 0;
+        }
+        env->gpr[4] = initrd_base;
+        env->gpr[5] = initrd_size;
+        if (kernel_cmdline != NULL) {
+            len = strlen(kernel_cmdline);
+            bdloc -= ((len + 255) & ~255);
+            cpu_physical_memory_write(bdloc, kernel_cmdline, len + 1);
+            env->gpr[6] = bdloc;
+            env->gpr[7] = bdloc + len;
+        } else {
+            env->gpr[6] = 0;
+            env->gpr[7] = 0;
+        }
+        env->nip = KERNEL_LOAD_ADDR;
+    } else {
+        kernel_base = 0;
+        kernel_size = 0;
+        initrd_base = 0;
+        initrd_size = 0;
+        bdloc = 0;
+    }
+}
+
+static void ref405ep_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+
+    mc->desc = "ref405ep";
+    mc->init = ref405ep_init;
+    mc->default_ram_size = 0x08000000;
+    mc->default_ram_id = "ef405ep.ram";
+}
+
+static const TypeInfo ref405ep_type = {
+    .name = MACHINE_TYPE_NAME("ref405ep"),
+    .parent = TYPE_MACHINE,
+    .class_init = ref405ep_class_init,
+};
+
+/*****************************************************************************/
+/* AMCC Taihu evaluation board */
+/* - PowerPC 405EP processor
+ * - SDRAM               128 MB at 0x00000000
+ * - Boot flash          2 MB   at 0xFFE00000
+ * - Application flash   32 MB  at 0xFC000000
+ * - 2 serial ports
+ * - 2 ethernet PHY
+ * - 1 USB 1.1 device    0x50000000
+ * - 1 LCD display       0x50100000
+ * - 1 CPLD              0x50100000
+ * - 1 I2C EEPROM
+ * - 1 I2C thermal sensor
+ * - a set of LEDs
+ * - bit-bang SPI port using GPIOs
+ * - 1 EBC interface connector 0 0x50200000
+ * - 1 cardbus controller + expansion slot.
+ * - 1 PCI expansion slot.
+ */
+typedef struct taihu_cpld_t taihu_cpld_t;
+struct taihu_cpld_t {
+    uint8_t reg0;
+    uint8_t reg1;
+};
+
+static uint64_t taihu_cpld_read(void *opaque, hwaddr addr, unsigned size)
+{
+    taihu_cpld_t *cpld;
+    uint32_t ret;
+
+    cpld = opaque;
+    switch (addr) {
+    case 0x0:
+        ret = cpld->reg0;
+        break;
+    case 0x1:
+        ret = cpld->reg1;
+        break;
+    default:
+        ret = 0;
+        break;
+    }
+
+    return ret;
+}
+
+static void taihu_cpld_write(void *opaque, hwaddr addr,
+                             uint64_t value, unsigned size)
+{
+    taihu_cpld_t *cpld;
+
+    cpld = opaque;
+    switch (addr) {
+    case 0x0:
+        /* Read only */
+        break;
+    case 0x1:
+        cpld->reg1 = value;
+        break;
+    default:
+        break;
+    }
+}
+
+static const MemoryRegionOps taihu_cpld_ops = {
+    .read = taihu_cpld_read,
+    .write = taihu_cpld_write,
+    .impl = {
+        .min_access_size = 1,
+        .max_access_size = 1,
+    },
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static void taihu_cpld_reset (void *opaque)
+{
+    taihu_cpld_t *cpld;
+
+    cpld = opaque;
+    cpld->reg0 = 0x01;
+    cpld->reg1 = 0x80;
+}
+
+static void taihu_cpld_init(MemoryRegion *sysmem, uint32_t base)
+{
+    taihu_cpld_t *cpld;
+    MemoryRegion *cpld_memory = g_new(MemoryRegion, 1);
+
+    cpld = g_malloc0(sizeof(taihu_cpld_t));
+    memory_region_init_io(cpld_memory, NULL, &taihu_cpld_ops, cpld, "cpld", 0x100);
+    memory_region_add_subregion(sysmem, base, cpld_memory);
+    qemu_register_reset(&taihu_cpld_reset, cpld);
+}
+
+static void taihu_405ep_init(MachineState *machine)
+{
+    MachineClass *mc = MACHINE_GET_CLASS(machine);
+    const char *bios_name = machine->firmware ?: BIOS_FILENAME;
+    const char *kernel_filename = machine->kernel_filename;
+    const char *initrd_filename = machine->initrd_filename;
+    char *filename;
+    MemoryRegion *sysmem = get_system_memory();
+    MemoryRegion *bios;
+    MemoryRegion *ram_memories = g_new(MemoryRegion, 2);
+    hwaddr ram_bases[2], ram_sizes[2];
+    long bios_size;
+    target_ulong kernel_base, initrd_base;
+    long kernel_size, initrd_size;
+    int linux_boot;
+    int fl_idx;
+    DriveInfo *dinfo;
+    DeviceState *uicdev;
+
+    if (machine->ram_size != mc->default_ram_size) {
+        char *sz = size_to_str(mc->default_ram_size);
+        error_report("Invalid RAM size, should be %s", sz);
+        g_free(sz);
+        exit(EXIT_FAILURE);
+    }
+
+    ram_bases[0] = 0;
+    ram_sizes[0] = 0x04000000;
+    memory_region_init_alias(&ram_memories[0], NULL,
+                             "taihu_405ep.ram-0", machine->ram, ram_bases[0],
+                             ram_sizes[0]);
+    ram_bases[1] = 0x04000000;
+    ram_sizes[1] = 0x04000000;
+    memory_region_init_alias(&ram_memories[1], NULL,
+                             "taihu_405ep.ram-1", machine->ram, ram_bases[1],
+                             ram_sizes[1]);
+    ppc405ep_init(sysmem, ram_memories, ram_bases, ram_sizes,
+                  33333333, &uicdev, kernel_filename == NULL ? 0 : 1);
+    /* allocate and load BIOS */
+    fl_idx = 0;
+#if defined(USE_FLASH_BIOS)
+    dinfo = drive_get(IF_PFLASH, 0, fl_idx);
+    if (dinfo) {
+        bios_size = 2 * MiB;
+        pflash_cfi02_register(0xFFE00000,
+                              "taihu_405ep.bios", bios_size,
+                              blk_by_legacy_dinfo(dinfo),
+                              64 * KiB, 1,
+                              4, 0x0001, 0x22DA, 0x0000, 0x0000, 0x555, 0x2AA,
+                              1);
+        fl_idx++;
+    } else
+#endif
+    {
+        bios = g_new(MemoryRegion, 1);
+        memory_region_init_rom(bios, NULL, "taihu_405ep.bios", BIOS_SIZE,
+                               &error_fatal);
+        filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
+        if (filename) {
+            bios_size = load_image_size(filename,
+                                        memory_region_get_ram_ptr(bios),
+                                        BIOS_SIZE);
+            g_free(filename);
+            if (bios_size < 0) {
+                error_report("Could not load PowerPC BIOS '%s'", bios_name);
+                exit(1);
+            }
+            bios_size = (bios_size + 0xfff) & ~0xfff;
+            memory_region_add_subregion(sysmem, (uint32_t)(-bios_size), bios);
+        } else if (!qtest_enabled()) {
+            error_report("Could not load PowerPC BIOS '%s'", bios_name);
+            exit(1);
+        }
+    }
+    /* Register Linux flash */
+    dinfo = drive_get(IF_PFLASH, 0, fl_idx);
+    if (dinfo) {
+        bios_size = 32 * MiB;
+        pflash_cfi02_register(0xfc000000, "taihu_405ep.flash", bios_size,
+                              blk_by_legacy_dinfo(dinfo),
+                              64 * KiB, 1,
+                              4, 0x0001, 0x22DA, 0x0000, 0x0000, 0x555, 0x2AA,
+                              1);
+        fl_idx++;
+    }
+    /* Register CLPD & LCD display */
+    taihu_cpld_init(sysmem, 0x50100000);
+    /* Load kernel */
+    linux_boot = (kernel_filename != NULL);
+    if (linux_boot) {
+        kernel_base = KERNEL_LOAD_ADDR;
+        /* now we can load the kernel */
+        kernel_size = load_image_targphys(kernel_filename, kernel_base,
+                                          machine->ram_size - kernel_base);
+        if (kernel_size < 0) {
+            error_report("could not load kernel '%s'", kernel_filename);
+            exit(1);
+        }
+        /* load initrd */
+        if (initrd_filename) {
+            initrd_base = INITRD_LOAD_ADDR;
+            initrd_size = load_image_targphys(initrd_filename, initrd_base,
+                                              machine->ram_size - initrd_base);
+            if (initrd_size < 0) {
+                error_report("could not load initial ram disk '%s'",
+                             initrd_filename);
+                exit(1);
+            }
+        } else {
+            initrd_base = 0;
+            initrd_size = 0;
+        }
+    } else {
+        kernel_base = 0;
+        kernel_size = 0;
+        initrd_base = 0;
+        initrd_size = 0;
+    }
+}
+
+static void taihu_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+
+    mc->desc = "taihu";
+    mc->init = taihu_405ep_init;
+    mc->default_ram_size = 0x08000000;
+    mc->default_ram_id = "taihu_405ep.ram";
+}
+
+static const TypeInfo taihu_type = {
+    .name = MACHINE_TYPE_NAME("taihu"),
+    .parent = TYPE_MACHINE,
+    .class_init = taihu_class_init,
+};
+
+static void ppc405_machine_init(void)
+{
+    type_register_static(&ref405ep_type);
+    type_register_static(&taihu_type);
+}
+
+type_init(ppc405_machine_init)
diff --git a/hw/ppc/ppc405_uc.c b/hw/ppc/ppc405_uc.c
new file mode 100644
index 000000000..e632c408b
--- /dev/null
+++ b/hw/ppc/ppc405_uc.c
@@ -0,0 +1,1547 @@
+/*
+ * QEMU PowerPC 405 embedded processors emulation
+ *
+ * Copyright (c) 2007 Jocelyn Mayer
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include "qapi/error.h"
+#include "cpu.h"
+#include "hw/ppc/ppc.h"
+#include "hw/i2c/ppc4xx_i2c.h"
+#include "hw/irq.h"
+#include "ppc405.h"
+#include "hw/char/serial.h"
+#include "qemu/timer.h"
+#include "sysemu/reset.h"
+#include "sysemu/sysemu.h"
+#include "exec/address-spaces.h"
+#include "hw/intc/ppc-uic.h"
+#include "hw/qdev-properties.h"
+#include "qapi/error.h"
+
+//#define DEBUG_OPBA
+//#define DEBUG_SDRAM
+//#define DEBUG_GPIO
+//#define DEBUG_SERIAL
+//#define DEBUG_OCM
+//#define DEBUG_GPT
+//#define DEBUG_CLOCKS
+//#define DEBUG_CLOCKS_LL
+
+ram_addr_t ppc405_set_bootinfo (CPUPPCState *env, ppc4xx_bd_info_t *bd,
+                                uint32_t flags)
+{
+    CPUState *cs = env_cpu(env);
+    ram_addr_t bdloc;
+    int i, n;
+
+    /* We put the bd structure at the top of memory */
+    if (bd->bi_memsize >= 0x01000000UL)
+        bdloc = 0x01000000UL - sizeof(struct ppc4xx_bd_info_t);
+    else
+        bdloc = bd->bi_memsize - sizeof(struct ppc4xx_bd_info_t);
+    stl_be_phys(cs->as, bdloc + 0x00, bd->bi_memstart);
+    stl_be_phys(cs->as, bdloc + 0x04, bd->bi_memsize);
+    stl_be_phys(cs->as, bdloc + 0x08, bd->bi_flashstart);
+    stl_be_phys(cs->as, bdloc + 0x0C, bd->bi_flashsize);
+    stl_be_phys(cs->as, bdloc + 0x10, bd->bi_flashoffset);
+    stl_be_phys(cs->as, bdloc + 0x14, bd->bi_sramstart);
+    stl_be_phys(cs->as, bdloc + 0x18, bd->bi_sramsize);
+    stl_be_phys(cs->as, bdloc + 0x1C, bd->bi_bootflags);
+    stl_be_phys(cs->as, bdloc + 0x20, bd->bi_ipaddr);
+    for (i = 0; i < 6; i++) {
+        stb_phys(cs->as, bdloc + 0x24 + i, bd->bi_enetaddr[i]);
+    }
+    stw_be_phys(cs->as, bdloc + 0x2A, bd->bi_ethspeed);
+    stl_be_phys(cs->as, bdloc + 0x2C, bd->bi_intfreq);
+    stl_be_phys(cs->as, bdloc + 0x30, bd->bi_busfreq);
+    stl_be_phys(cs->as, bdloc + 0x34, bd->bi_baudrate);
+    for (i = 0; i < 4; i++) {
+        stb_phys(cs->as, bdloc + 0x38 + i, bd->bi_s_version[i]);
+    }
+    for (i = 0; i < 32; i++) {
+        stb_phys(cs->as, bdloc + 0x3C + i, bd->bi_r_version[i]);
+    }
+    stl_be_phys(cs->as, bdloc + 0x5C, bd->bi_plb_busfreq);
+    stl_be_phys(cs->as, bdloc + 0x60, bd->bi_pci_busfreq);
+    for (i = 0; i < 6; i++) {
+        stb_phys(cs->as, bdloc + 0x64 + i, bd->bi_pci_enetaddr[i]);
+    }
+    n = 0x6A;
+    if (flags & 0x00000001) {
+        for (i = 0; i < 6; i++)
+            stb_phys(cs->as, bdloc + n++, bd->bi_pci_enetaddr2[i]);
+    }
+    stl_be_phys(cs->as, bdloc + n, bd->bi_opbfreq);
+    n += 4;
+    for (i = 0; i < 2; i++) {
+        stl_be_phys(cs->as, bdloc + n, bd->bi_iic_fast[i]);
+        n += 4;
+    }
+
+    return bdloc;
+}
+
+/*****************************************************************************/
+/* Shared peripherals */
+
+/*****************************************************************************/
+/* Peripheral local bus arbitrer */
+enum {
+    PLB3A0_ACR = 0x077,
+    PLB4A0_ACR = 0x081,
+    PLB0_BESR  = 0x084,
+    PLB0_BEAR  = 0x086,
+    PLB0_ACR   = 0x087,
+    PLB4A1_ACR = 0x089,
+};
+
+typedef struct ppc4xx_plb_t ppc4xx_plb_t;
+struct ppc4xx_plb_t {
+    uint32_t acr;
+    uint32_t bear;
+    uint32_t besr;
+};
+
+static uint32_t dcr_read_plb (void *opaque, int dcrn)
+{
+    ppc4xx_plb_t *plb;
+    uint32_t ret;
+
+    plb = opaque;
+    switch (dcrn) {
+    case PLB0_ACR:
+        ret = plb->acr;
+        break;
+    case PLB0_BEAR:
+        ret = plb->bear;
+        break;
+    case PLB0_BESR:
+        ret = plb->besr;
+        break;
+    default:
+        /* Avoid gcc warning */
+        ret = 0;
+        break;
+    }
+
+    return ret;
+}
+
+static void dcr_write_plb (void *opaque, int dcrn, uint32_t val)
+{
+    ppc4xx_plb_t *plb;
+
+    plb = opaque;
+    switch (dcrn) {
+    case PLB0_ACR:
+        /* We don't care about the actual parameters written as
+         * we don't manage any priorities on the bus
+         */
+        plb->acr = val & 0xF8000000;
+        break;
+    case PLB0_BEAR:
+        /* Read only */
+        break;
+    case PLB0_BESR:
+        /* Write-clear */
+        plb->besr &= ~val;
+        break;
+    }
+}
+
+static void ppc4xx_plb_reset (void *opaque)
+{
+    ppc4xx_plb_t *plb;
+
+    plb = opaque;
+    plb->acr = 0x00000000;
+    plb->bear = 0x00000000;
+    plb->besr = 0x00000000;
+}
+
+void ppc4xx_plb_init(CPUPPCState *env)
+{
+    ppc4xx_plb_t *plb;
+
+    plb = g_malloc0(sizeof(ppc4xx_plb_t));
+    ppc_dcr_register(env, PLB3A0_ACR, plb, &dcr_read_plb, &dcr_write_plb);
+    ppc_dcr_register(env, PLB4A0_ACR, plb, &dcr_read_plb, &dcr_write_plb);
+    ppc_dcr_register(env, PLB0_ACR, plb, &dcr_read_plb, &dcr_write_plb);
+    ppc_dcr_register(env, PLB0_BEAR, plb, &dcr_read_plb, &dcr_write_plb);
+    ppc_dcr_register(env, PLB0_BESR, plb, &dcr_read_plb, &dcr_write_plb);
+    ppc_dcr_register(env, PLB4A1_ACR, plb, &dcr_read_plb, &dcr_write_plb);
+    qemu_register_reset(ppc4xx_plb_reset, plb);
+}
+
+/*****************************************************************************/
+/* PLB to OPB bridge */
+enum {
+    POB0_BESR0 = 0x0A0,
+    POB0_BESR1 = 0x0A2,
+    POB0_BEAR  = 0x0A4,
+};
+
+typedef struct ppc4xx_pob_t ppc4xx_pob_t;
+struct ppc4xx_pob_t {
+    uint32_t bear;
+    uint32_t besr0;
+    uint32_t besr1;
+};
+
+static uint32_t dcr_read_pob (void *opaque, int dcrn)
+{
+    ppc4xx_pob_t *pob;
+    uint32_t ret;
+
+    pob = opaque;
+    switch (dcrn) {
+    case POB0_BEAR:
+        ret = pob->bear;
+        break;
+    case POB0_BESR0:
+        ret = pob->besr0;
+        break;
+    case POB0_BESR1:
+        ret = pob->besr1;
+        break;
+    default:
+        /* Avoid gcc warning */
+        ret = 0;
+        break;
+    }
+
+    return ret;
+}
+
+static void dcr_write_pob (void *opaque, int dcrn, uint32_t val)
+{
+    ppc4xx_pob_t *pob;
+
+    pob = opaque;
+    switch (dcrn) {
+    case POB0_BEAR:
+        /* Read only */
+        break;
+    case POB0_BESR0:
+        /* Write-clear */
+        pob->besr0 &= ~val;
+        break;
+    case POB0_BESR1:
+        /* Write-clear */
+        pob->besr1 &= ~val;
+        break;
+    }
+}
+
+static void ppc4xx_pob_reset (void *opaque)
+{
+    ppc4xx_pob_t *pob;
+
+    pob = opaque;
+    /* No error */
+    pob->bear = 0x00000000;
+    pob->besr0 = 0x0000000;
+    pob->besr1 = 0x0000000;
+}
+
+static void ppc4xx_pob_init(CPUPPCState *env)
+{
+    ppc4xx_pob_t *pob;
+
+    pob = g_malloc0(sizeof(ppc4xx_pob_t));
+    ppc_dcr_register(env, POB0_BEAR, pob, &dcr_read_pob, &dcr_write_pob);
+    ppc_dcr_register(env, POB0_BESR0, pob, &dcr_read_pob, &dcr_write_pob);
+    ppc_dcr_register(env, POB0_BESR1, pob, &dcr_read_pob, &dcr_write_pob);
+    qemu_register_reset(ppc4xx_pob_reset, pob);
+}
+
+/*****************************************************************************/
+/* OPB arbitrer */
+typedef struct ppc4xx_opba_t ppc4xx_opba_t;
+struct ppc4xx_opba_t {
+    MemoryRegion io;
+    uint8_t cr;
+    uint8_t pr;
+};
+
+static uint64_t opba_readb(void *opaque, hwaddr addr, unsigned size)
+{
+    ppc4xx_opba_t *opba;
+    uint32_t ret;
+
+#ifdef DEBUG_OPBA
+    printf("%s: addr " TARGET_FMT_plx "\n", __func__, addr);
+#endif
+    opba = opaque;
+    switch (addr) {
+    case 0x00:
+        ret = opba->cr;
+        break;
+    case 0x01:
+        ret = opba->pr;
+        break;
+    default:
+        ret = 0x00;
+        break;
+    }
+
+    return ret;
+}
+
+static void opba_writeb(void *opaque, hwaddr addr, uint64_t value,
+                        unsigned size)
+{
+    ppc4xx_opba_t *opba;
+
+#ifdef DEBUG_OPBA
+    printf("%s: addr " TARGET_FMT_plx " val %08" PRIx32 "\n", __func__, addr,
+           value);
+#endif
+    opba = opaque;
+    switch (addr) {
+    case 0x00:
+        opba->cr = value & 0xF8;
+        break;
+    case 0x01:
+        opba->pr = value & 0xFF;
+        break;
+    default:
+        break;
+    }
+}
+static const MemoryRegionOps opba_ops = {
+    .read = opba_readb,
+    .write = opba_writeb,
+    .impl.min_access_size = 1,
+    .impl.max_access_size = 1,
+    .valid.min_access_size = 1,
+    .valid.max_access_size = 4,
+    .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static void ppc4xx_opba_reset (void *opaque)
+{
+    ppc4xx_opba_t *opba;
+
+    opba = opaque;
+    opba->cr = 0x00; /* No dynamic priorities - park disabled */
+    opba->pr = 0x11;
+}
+
+static void ppc4xx_opba_init(hwaddr base)
+{
+    ppc4xx_opba_t *opba;
+
+    opba = g_malloc0(sizeof(ppc4xx_opba_t));
+#ifdef DEBUG_OPBA
+    printf("%s: offset " TARGET_FMT_plx "\n", __func__, base);
+#endif
+    memory_region_init_io(&opba->io, NULL, &opba_ops, opba, "opba", 0x002);
+    memory_region_add_subregion(get_system_memory(), base, &opba->io);
+    qemu_register_reset(ppc4xx_opba_reset, opba);
+}
+
+/*****************************************************************************/
+/* Code decompression controller */
+/* XXX: TODO */
+
+/*****************************************************************************/
+/* Peripheral controller */
+typedef struct ppc4xx_ebc_t ppc4xx_ebc_t;
+struct ppc4xx_ebc_t {
+    uint32_t addr;
+    uint32_t bcr[8];
+    uint32_t bap[8];
+    uint32_t bear;
+    uint32_t besr0;
+    uint32_t besr1;
+    uint32_t cfg;
+};
+
+enum {
+    EBC0_CFGADDR = 0x012,
+    EBC0_CFGDATA = 0x013,
+};
+
+static uint32_t dcr_read_ebc (void *opaque, int dcrn)
+{
+    ppc4xx_ebc_t *ebc;
+    uint32_t ret;
+
+    ebc = opaque;
+    switch (dcrn) {
+    case EBC0_CFGADDR:
+        ret = ebc->addr;
+        break;
+    case EBC0_CFGDATA:
+        switch (ebc->addr) {
+        case 0x00: /* B0CR */
+            ret = ebc->bcr[0];
+            break;
+        case 0x01: /* B1CR */
+            ret = ebc->bcr[1];
+            break;
+        case 0x02: /* B2CR */
+            ret = ebc->bcr[2];
+            break;
+        case 0x03: /* B3CR */
+            ret = ebc->bcr[3];
+            break;
+        case 0x04: /* B4CR */
+            ret = ebc->bcr[4];
+            break;
+        case 0x05: /* B5CR */
+            ret = ebc->bcr[5];
+            break;
+        case 0x06: /* B6CR */
+            ret = ebc->bcr[6];
+            break;
+        case 0x07: /* B7CR */
+            ret = ebc->bcr[7];
+            break;
+        case 0x10: /* B0AP */
+            ret = ebc->bap[0];
+            break;
+        case 0x11: /* B1AP */
+            ret = ebc->bap[1];
+            break;
+        case 0x12: /* B2AP */
+            ret = ebc->bap[2];
+            break;
+        case 0x13: /* B3AP */
+            ret = ebc->bap[3];
+            break;
+        case 0x14: /* B4AP */
+            ret = ebc->bap[4];
+            break;
+        case 0x15: /* B5AP */
+            ret = ebc->bap[5];
+            break;
+        case 0x16: /* B6AP */
+            ret = ebc->bap[6];
+            break;
+        case 0x17: /* B7AP */
+            ret = ebc->bap[7];
+            break;
+        case 0x20: /* BEAR */
+            ret = ebc->bear;
+            break;
+        case 0x21: /* BESR0 */
+            ret = ebc->besr0;
+            break;
+        case 0x22: /* BESR1 */
+            ret = ebc->besr1;
+            break;
+        case 0x23: /* CFG */
+            ret = ebc->cfg;
+            break;
+        default:
+            ret = 0x00000000;
+            break;
+        }
+        break;
+    default:
+        ret = 0x00000000;
+        break;
+    }
+
+    return ret;
+}
+
+static void dcr_write_ebc (void *opaque, int dcrn, uint32_t val)
+{
+    ppc4xx_ebc_t *ebc;
+
+    ebc = opaque;
+    switch (dcrn) {
+    case EBC0_CFGADDR:
+        ebc->addr = val;
+        break;
+    case EBC0_CFGDATA:
+        switch (ebc->addr) {
+        case 0x00: /* B0CR */
+            break;
+        case 0x01: /* B1CR */
+            break;
+        case 0x02: /* B2CR */
+            break;
+        case 0x03: /* B3CR */
+            break;
+        case 0x04: /* B4CR */
+            break;
+        case 0x05: /* B5CR */
+            break;
+        case 0x06: /* B6CR */
+            break;
+        case 0x07: /* B7CR */
+            break;
+        case 0x10: /* B0AP */
+            break;
+        case 0x11: /* B1AP */
+            break;
+        case 0x12: /* B2AP */
+            break;
+        case 0x13: /* B3AP */
+            break;
+        case 0x14: /* B4AP */
+            break;
+        case 0x15: /* B5AP */
+            break;
+        case 0x16: /* B6AP */
+            break;
+        case 0x17: /* B7AP */
+            break;
+        case 0x20: /* BEAR */
+            break;
+        case 0x21: /* BESR0 */
+            break;
+        case 0x22: /* BESR1 */
+            break;
+        case 0x23: /* CFG */
+            break;
+        default:
+            break;
+        }
+        break;
+    default:
+        break;
+    }
+}
+
+static void ebc_reset (void *opaque)
+{
+    ppc4xx_ebc_t *ebc;
+    int i;
+
+    ebc = opaque;
+    ebc->addr = 0x00000000;
+    ebc->bap[0] = 0x7F8FFE80;
+    ebc->bcr[0] = 0xFFE28000;
+    for (i = 0; i < 8; i++) {
+        ebc->bap[i] = 0x00000000;
+        ebc->bcr[i] = 0x00000000;
+    }
+    ebc->besr0 = 0x00000000;
+    ebc->besr1 = 0x00000000;
+    ebc->cfg = 0x80400000;
+}
+
+void ppc405_ebc_init(CPUPPCState *env)
+{
+    ppc4xx_ebc_t *ebc;
+
+    ebc = g_malloc0(sizeof(ppc4xx_ebc_t));
+    qemu_register_reset(&ebc_reset, ebc);
+    ppc_dcr_register(env, EBC0_CFGADDR,
+                     ebc, &dcr_read_ebc, &dcr_write_ebc);
+    ppc_dcr_register(env, EBC0_CFGDATA,
+                     ebc, &dcr_read_ebc, &dcr_write_ebc);
+}
+
+/*****************************************************************************/
+/* DMA controller */
+enum {
+    DMA0_CR0 = 0x100,
+    DMA0_CT0 = 0x101,
+    DMA0_DA0 = 0x102,
+    DMA0_SA0 = 0x103,
+    DMA0_SG0 = 0x104,
+    DMA0_CR1 = 0x108,
+    DMA0_CT1 = 0x109,
+    DMA0_DA1 = 0x10A,
+    DMA0_SA1 = 0x10B,
+    DMA0_SG1 = 0x10C,
+    DMA0_CR2 = 0x110,
+    DMA0_CT2 = 0x111,
+    DMA0_DA2 = 0x112,
+    DMA0_SA2 = 0x113,
+    DMA0_SG2 = 0x114,
+    DMA0_CR3 = 0x118,
+    DMA0_CT3 = 0x119,
+    DMA0_DA3 = 0x11A,
+    DMA0_SA3 = 0x11B,
+    DMA0_SG3 = 0x11C,
+    DMA0_SR  = 0x120,
+    DMA0_SGC = 0x123,
+    DMA0_SLP = 0x125,
+    DMA0_POL = 0x126,
+};
+
+typedef struct ppc405_dma_t ppc405_dma_t;
+struct ppc405_dma_t {
+    qemu_irq irqs[4];
+    uint32_t cr[4];
+    uint32_t ct[4];
+    uint32_t da[4];
+    uint32_t sa[4];
+    uint32_t sg[4];
+    uint32_t sr;
+    uint32_t sgc;
+    uint32_t slp;
+    uint32_t pol;
+};
+
+static uint32_t dcr_read_dma (void *opaque, int dcrn)
+{
+    return 0;
+}
+
+static void dcr_write_dma (void *opaque, int dcrn, uint32_t val)
+{
+}
+
+static void ppc405_dma_reset (void *opaque)
+{
+    ppc405_dma_t *dma;
+    int i;
+
+    dma = opaque;
+    for (i = 0; i < 4; i++) {
+        dma->cr[i] = 0x00000000;
+        dma->ct[i] = 0x00000000;
+        dma->da[i] = 0x00000000;
+        dma->sa[i] = 0x00000000;
+        dma->sg[i] = 0x00000000;
+    }
+    dma->sr = 0x00000000;
+    dma->sgc = 0x00000000;
+    dma->slp = 0x7C000000;
+    dma->pol = 0x00000000;
+}
+
+static void ppc405_dma_init(CPUPPCState *env, qemu_irq irqs[4])
+{
+    ppc405_dma_t *dma;
+
+    dma = g_malloc0(sizeof(ppc405_dma_t));
+    memcpy(dma->irqs, irqs, 4 * sizeof(qemu_irq));
+    qemu_register_reset(&ppc405_dma_reset, dma);
+    ppc_dcr_register(env, DMA0_CR0,
+                     dma, &dcr_read_dma, &dcr_write_dma);
+    ppc_dcr_register(env, DMA0_CT0,
+                     dma, &dcr_read_dma, &dcr_write_dma);
+    ppc_dcr_register(env, DMA0_DA0,
+                     dma, &dcr_read_dma, &dcr_write_dma);
+    ppc_dcr_register(env, DMA0_SA0,
+                     dma, &dcr_read_dma, &dcr_write_dma);
+    ppc_dcr_register(env, DMA0_SG0,
+                     dma, &dcr_read_dma, &dcr_write_dma);
+    ppc_dcr_register(env, DMA0_CR1,
+                     dma, &dcr_read_dma, &dcr_write_dma);
+    ppc_dcr_register(env, DMA0_CT1,
+                     dma, &dcr_read_dma, &dcr_write_dma);
+    ppc_dcr_register(env, DMA0_DA1,
+                     dma, &dcr_read_dma, &dcr_write_dma);
+    ppc_dcr_register(env, DMA0_SA1,
+                     dma, &dcr_read_dma, &dcr_write_dma);
+    ppc_dcr_register(env, DMA0_SG1,
+                     dma, &dcr_read_dma, &dcr_write_dma);
+    ppc_dcr_register(env, DMA0_CR2,
+                     dma, &dcr_read_dma, &dcr_write_dma);
+    ppc_dcr_register(env, DMA0_CT2,
+                     dma, &dcr_read_dma, &dcr_write_dma);
+    ppc_dcr_register(env, DMA0_DA2,
+                     dma, &dcr_read_dma, &dcr_write_dma);
+    ppc_dcr_register(env, DMA0_SA2,
+                     dma, &dcr_read_dma, &dcr_write_dma);
+    ppc_dcr_register(env, DMA0_SG2,
+                     dma, &dcr_read_dma, &dcr_write_dma);
+    ppc_dcr_register(env, DMA0_CR3,
+                     dma, &dcr_read_dma, &dcr_write_dma);
+    ppc_dcr_register(env, DMA0_CT3,
+                     dma, &dcr_read_dma, &dcr_write_dma);
+    ppc_dcr_register(env, DMA0_DA3,
+                     dma, &dcr_read_dma, &dcr_write_dma);
+    ppc_dcr_register(env, DMA0_SA3,
+                     dma, &dcr_read_dma, &dcr_write_dma);
+    ppc_dcr_register(env, DMA0_SG3,
+                     dma, &dcr_read_dma, &dcr_write_dma);
+    ppc_dcr_register(env, DMA0_SR,
+                     dma, &dcr_read_dma, &dcr_write_dma);
+    ppc_dcr_register(env, DMA0_SGC,
+                     dma, &dcr_read_dma, &dcr_write_dma);
+    ppc_dcr_register(env, DMA0_SLP,
+                     dma, &dcr_read_dma, &dcr_write_dma);
+    ppc_dcr_register(env, DMA0_POL,
+                     dma, &dcr_read_dma, &dcr_write_dma);
+}
+
+/*****************************************************************************/
+/* GPIO */
+typedef struct ppc405_gpio_t ppc405_gpio_t;
+struct ppc405_gpio_t {
+    MemoryRegion io;
+    uint32_t or;
+    uint32_t tcr;
+    uint32_t osrh;
+    uint32_t osrl;
+    uint32_t tsrh;
+    uint32_t tsrl;
+    uint32_t odr;
+    uint32_t ir;
+    uint32_t rr1;
+    uint32_t isr1h;
+    uint32_t isr1l;
+};
+
+static uint64_t ppc405_gpio_read(void *opaque, hwaddr addr, unsigned size)
+{
+#ifdef DEBUG_GPIO
+    printf("%s: addr " TARGET_FMT_plx " size %d\n", __func__, addr, size);
+#endif
+
+    return 0;
+}
+
+static void ppc405_gpio_write(void *opaque, hwaddr addr, uint64_t value,
+                              unsigned size)
+{
+#ifdef DEBUG_GPIO
+    printf("%s: addr " TARGET_FMT_plx " size %d val %08" PRIx32 "\n",
+           __func__, addr, size, value);
+#endif
+}
+
+static const MemoryRegionOps ppc405_gpio_ops = {
+    .read = ppc405_gpio_read,
+    .write = ppc405_gpio_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static void ppc405_gpio_reset (void *opaque)
+{
+}
+
+static void ppc405_gpio_init(hwaddr base)
+{
+    ppc405_gpio_t *gpio;
+
+    gpio = g_malloc0(sizeof(ppc405_gpio_t));
+#ifdef DEBUG_GPIO
+    printf("%s: offset " TARGET_FMT_plx "\n", __func__, base);
+#endif
+    memory_region_init_io(&gpio->io, NULL, &ppc405_gpio_ops, gpio, "pgio", 0x038);
+    memory_region_add_subregion(get_system_memory(), base, &gpio->io);
+    qemu_register_reset(&ppc405_gpio_reset, gpio);
+}
+
+/*****************************************************************************/
+/* On Chip Memory */
+enum {
+    OCM0_ISARC   = 0x018,
+    OCM0_ISACNTL = 0x019,
+    OCM0_DSARC   = 0x01A,
+    OCM0_DSACNTL = 0x01B,
+};
+
+typedef struct ppc405_ocm_t ppc405_ocm_t;
+struct ppc405_ocm_t {
+    MemoryRegion ram;
+    MemoryRegion isarc_ram;
+    MemoryRegion dsarc_ram;
+    uint32_t isarc;
+    uint32_t isacntl;
+    uint32_t dsarc;
+    uint32_t dsacntl;
+};
+
+static void ocm_update_mappings (ppc405_ocm_t *ocm,
+                                 uint32_t isarc, uint32_t isacntl,
+                                 uint32_t dsarc, uint32_t dsacntl)
+{
+#ifdef DEBUG_OCM
+    printf("OCM update ISA %08" PRIx32 " %08" PRIx32 " (%08" PRIx32
+           " %08" PRIx32 ") DSA %08" PRIx32 " %08" PRIx32
+           " (%08" PRIx32 " %08" PRIx32 ")\n",
+           isarc, isacntl, dsarc, dsacntl,
+           ocm->isarc, ocm->isacntl, ocm->dsarc, ocm->dsacntl);
+#endif
+    if (ocm->isarc != isarc ||
+        (ocm->isacntl & 0x80000000) != (isacntl & 0x80000000)) {
+        if (ocm->isacntl & 0x80000000) {
+            /* Unmap previously assigned memory region */
+            printf("OCM unmap ISA %08" PRIx32 "\n", ocm->isarc);
+            memory_region_del_subregion(get_system_memory(), &ocm->isarc_ram);
+        }
+        if (isacntl & 0x80000000) {
+            /* Map new instruction memory region */
+#ifdef DEBUG_OCM
+            printf("OCM map ISA %08" PRIx32 "\n", isarc);
+#endif
+            memory_region_add_subregion(get_system_memory(), isarc,
+                                        &ocm->isarc_ram);
+        }
+    }
+    if (ocm->dsarc != dsarc ||
+        (ocm->dsacntl & 0x80000000) != (dsacntl & 0x80000000)) {
+        if (ocm->dsacntl & 0x80000000) {
+            /* Beware not to unmap the region we just mapped */
+            if (!(isacntl & 0x80000000) || ocm->dsarc != isarc) {
+                /* Unmap previously assigned memory region */
+#ifdef DEBUG_OCM
+                printf("OCM unmap DSA %08" PRIx32 "\n", ocm->dsarc);
+#endif
+                memory_region_del_subregion(get_system_memory(),
+                                            &ocm->dsarc_ram);
+            }
+        }
+        if (dsacntl & 0x80000000) {
+            /* Beware not to remap the region we just mapped */
+            if (!(isacntl & 0x80000000) || dsarc != isarc) {
+                /* Map new data memory region */
+#ifdef DEBUG_OCM
+                printf("OCM map DSA %08" PRIx32 "\n", dsarc);
+#endif
+                memory_region_add_subregion(get_system_memory(), dsarc,
+                                            &ocm->dsarc_ram);
+            }
+        }
+    }
+}
+
+static uint32_t dcr_read_ocm (void *opaque, int dcrn)
+{
+    ppc405_ocm_t *ocm;
+    uint32_t ret;
+
+    ocm = opaque;
+    switch (dcrn) {
+    case OCM0_ISARC:
+        ret = ocm->isarc;
+        break;
+    case OCM0_ISACNTL:
+        ret = ocm->isacntl;
+        break;
+    case OCM0_DSARC:
+        ret = ocm->dsarc;
+        break;
+    case OCM0_DSACNTL:
+        ret = ocm->dsacntl;
+        break;
+    default:
+        ret = 0;
+        break;
+    }
+
+    return ret;
+}
+
+static void dcr_write_ocm (void *opaque, int dcrn, uint32_t val)
+{
+    ppc405_ocm_t *ocm;
+    uint32_t isarc, dsarc, isacntl, dsacntl;
+
+    ocm = opaque;
+    isarc = ocm->isarc;
+    dsarc = ocm->dsarc;
+    isacntl = ocm->isacntl;
+    dsacntl = ocm->dsacntl;
+    switch (dcrn) {
+    case OCM0_ISARC:
+        isarc = val & 0xFC000000;
+        break;
+    case OCM0_ISACNTL:
+        isacntl = val & 0xC0000000;
+        break;
+    case OCM0_DSARC:
+        isarc = val & 0xFC000000;
+        break;
+    case OCM0_DSACNTL:
+        isacntl = val & 0xC0000000;
+        break;
+    }
+    ocm_update_mappings(ocm, isarc, isacntl, dsarc, dsacntl);
+    ocm->isarc = isarc;
+    ocm->dsarc = dsarc;
+    ocm->isacntl = isacntl;
+    ocm->dsacntl = dsacntl;
+}
+
+static void ocm_reset (void *opaque)
+{
+    ppc405_ocm_t *ocm;
+    uint32_t isarc, dsarc, isacntl, dsacntl;
+
+    ocm = opaque;
+    isarc = 0x00000000;
+    isacntl = 0x00000000;
+    dsarc = 0x00000000;
+    dsacntl = 0x00000000;
+    ocm_update_mappings(ocm, isarc, isacntl, dsarc, dsacntl);
+    ocm->isarc = isarc;
+    ocm->dsarc = dsarc;
+    ocm->isacntl = isacntl;
+    ocm->dsacntl = dsacntl;
+}
+
+static void ppc405_ocm_init(CPUPPCState *env)
+{
+    ppc405_ocm_t *ocm;
+
+    ocm = g_malloc0(sizeof(ppc405_ocm_t));
+    /* XXX: Size is 4096 or 0x04000000 */
+    memory_region_init_ram(&ocm->isarc_ram, NULL, "ppc405.ocm", 4 * KiB,
+                           &error_fatal);
+    memory_region_init_alias(&ocm->dsarc_ram, NULL, "ppc405.dsarc",
+                             &ocm->isarc_ram, 0, 4 * KiB);
+    qemu_register_reset(&ocm_reset, ocm);
+    ppc_dcr_register(env, OCM0_ISARC,
+                     ocm, &dcr_read_ocm, &dcr_write_ocm);
+    ppc_dcr_register(env, OCM0_ISACNTL,
+                     ocm, &dcr_read_ocm, &dcr_write_ocm);
+    ppc_dcr_register(env, OCM0_DSARC,
+                     ocm, &dcr_read_ocm, &dcr_write_ocm);
+    ppc_dcr_register(env, OCM0_DSACNTL,
+                     ocm, &dcr_read_ocm, &dcr_write_ocm);
+}
+
+/*****************************************************************************/
+/* General purpose timers */
+typedef struct ppc4xx_gpt_t ppc4xx_gpt_t;
+struct ppc4xx_gpt_t {
+    MemoryRegion iomem;
+    int64_t tb_offset;
+    uint32_t tb_freq;
+    QEMUTimer *timer;
+    qemu_irq irqs[5];
+    uint32_t oe;
+    uint32_t ol;
+    uint32_t im;
+    uint32_t is;
+    uint32_t ie;
+    uint32_t comp[5];
+    uint32_t mask[5];
+};
+
+static int ppc4xx_gpt_compare (ppc4xx_gpt_t *gpt, int n)
+{
+    /* XXX: TODO */
+    return 0;
+}
+
+static void ppc4xx_gpt_set_output (ppc4xx_gpt_t *gpt, int n, int level)
+{
+    /* XXX: TODO */
+}
+
+static void ppc4xx_gpt_set_outputs (ppc4xx_gpt_t *gpt)
+{
+    uint32_t mask;
+    int i;
+
+    mask = 0x80000000;
+    for (i = 0; i < 5; i++) {
+        if (gpt->oe & mask) {
+            /* Output is enabled */
+            if (ppc4xx_gpt_compare(gpt, i)) {
+                /* Comparison is OK */
+                ppc4xx_gpt_set_output(gpt, i, gpt->ol & mask);
+            } else {
+                /* Comparison is KO */
+                ppc4xx_gpt_set_output(gpt, i, gpt->ol & mask ? 0 : 1);
+            }
+        }
+        mask = mask >> 1;
+    }
+}
+
+static void ppc4xx_gpt_set_irqs (ppc4xx_gpt_t *gpt)
+{
+    uint32_t mask;
+    int i;
+
+    mask = 0x00008000;
+    for (i = 0; i < 5; i++) {
+        if (gpt->is & gpt->im & mask)
+            qemu_irq_raise(gpt->irqs[i]);
+        else
+            qemu_irq_lower(gpt->irqs[i]);
+        mask = mask >> 1;
+    }
+}
+
+static void ppc4xx_gpt_compute_timer (ppc4xx_gpt_t *gpt)
+{
+    /* XXX: TODO */
+}
+
+static uint64_t ppc4xx_gpt_read(void *opaque, hwaddr addr, unsigned size)
+{
+    ppc4xx_gpt_t *gpt;
+    uint32_t ret;
+    int idx;
+
+#ifdef DEBUG_GPT
+    printf("%s: addr " TARGET_FMT_plx "\n", __func__, addr);
+#endif
+    gpt = opaque;
+    switch (addr) {
+    case 0x00:
+        /* Time base counter */
+        ret = muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + gpt->tb_offset,
+                       gpt->tb_freq, NANOSECONDS_PER_SECOND);
+        break;
+    case 0x10:
+        /* Output enable */
+        ret = gpt->oe;
+        break;
+    case 0x14:
+        /* Output level */
+        ret = gpt->ol;
+        break;
+    case 0x18:
+        /* Interrupt mask */
+        ret = gpt->im;
+        break;
+    case 0x1C:
+    case 0x20:
+        /* Interrupt status */
+        ret = gpt->is;
+        break;
+    case 0x24:
+        /* Interrupt enable */
+        ret = gpt->ie;
+        break;
+    case 0x80 ... 0x90:
+        /* Compare timer */
+        idx = (addr - 0x80) >> 2;
+        ret = gpt->comp[idx];
+        break;
+    case 0xC0 ... 0xD0:
+        /* Compare mask */
+        idx = (addr - 0xC0) >> 2;
+        ret = gpt->mask[idx];
+        break;
+    default:
+        ret = -1;
+        break;
+    }
+
+    return ret;
+}
+
+static void ppc4xx_gpt_write(void *opaque, hwaddr addr, uint64_t value,
+                             unsigned size)
+{
+    ppc4xx_gpt_t *gpt;
+    int idx;
+
+#ifdef DEBUG_I2C
+    printf("%s: addr " TARGET_FMT_plx " val %08" PRIx32 "\n", __func__, addr,
+           value);
+#endif
+    gpt = opaque;
+    switch (addr) {
+    case 0x00:
+        /* Time base counter */
+        gpt->tb_offset = muldiv64(value, NANOSECONDS_PER_SECOND, gpt->tb_freq)
+            - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+        ppc4xx_gpt_compute_timer(gpt);
+        break;
+    case 0x10:
+        /* Output enable */
+        gpt->oe = value & 0xF8000000;
+        ppc4xx_gpt_set_outputs(gpt);
+        break;
+    case 0x14:
+        /* Output level */
+        gpt->ol = value & 0xF8000000;
+        ppc4xx_gpt_set_outputs(gpt);
+        break;
+    case 0x18:
+        /* Interrupt mask */
+        gpt->im = value & 0x0000F800;
+        break;
+    case 0x1C:
+        /* Interrupt status set */
+        gpt->is |= value & 0x0000F800;
+        ppc4xx_gpt_set_irqs(gpt);
+        break;
+    case 0x20:
+        /* Interrupt status clear */
+        gpt->is &= ~(value & 0x0000F800);
+        ppc4xx_gpt_set_irqs(gpt);
+        break;
+    case 0x24:
+        /* Interrupt enable */
+        gpt->ie = value & 0x0000F800;
+        ppc4xx_gpt_set_irqs(gpt);
+        break;
+    case 0x80 ... 0x90:
+        /* Compare timer */
+        idx = (addr - 0x80) >> 2;
+        gpt->comp[idx] = value & 0xF8000000;
+        ppc4xx_gpt_compute_timer(gpt);
+        break;
+    case 0xC0 ... 0xD0:
+        /* Compare mask */
+        idx = (addr - 0xC0) >> 2;
+        gpt->mask[idx] = value & 0xF8000000;
+        ppc4xx_gpt_compute_timer(gpt);
+        break;
+    }
+}
+
+static const MemoryRegionOps gpt_ops = {
+    .read = ppc4xx_gpt_read,
+    .write = ppc4xx_gpt_write,
+    .valid.min_access_size = 4,
+    .valid.max_access_size = 4,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static void ppc4xx_gpt_cb (void *opaque)
+{
+    ppc4xx_gpt_t *gpt;
+
+    gpt = opaque;
+    ppc4xx_gpt_set_irqs(gpt);
+    ppc4xx_gpt_set_outputs(gpt);
+    ppc4xx_gpt_compute_timer(gpt);
+}
+
+static void ppc4xx_gpt_reset (void *opaque)
+{
+    ppc4xx_gpt_t *gpt;
+    int i;
+
+    gpt = opaque;
+    timer_del(gpt->timer);
+    gpt->oe = 0x00000000;
+    gpt->ol = 0x00000000;
+    gpt->im = 0x00000000;
+    gpt->is = 0x00000000;
+    gpt->ie = 0x00000000;
+    for (i = 0; i < 5; i++) {
+        gpt->comp[i] = 0x00000000;
+        gpt->mask[i] = 0x00000000;
+    }
+}
+
+static void ppc4xx_gpt_init(hwaddr base, qemu_irq irqs[5])
+{
+    ppc4xx_gpt_t *gpt;
+    int i;
+
+    gpt = g_malloc0(sizeof(ppc4xx_gpt_t));
+    for (i = 0; i < 5; i++) {
+        gpt->irqs[i] = irqs[i];
+    }
+    gpt->timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, &ppc4xx_gpt_cb, gpt);
+#ifdef DEBUG_GPT
+    printf("%s: offset " TARGET_FMT_plx "\n", __func__, base);
+#endif
+    memory_region_init_io(&gpt->iomem, NULL, &gpt_ops, gpt, "gpt", 0x0d4);
+    memory_region_add_subregion(get_system_memory(), base, &gpt->iomem);
+    qemu_register_reset(ppc4xx_gpt_reset, gpt);
+}
+
+/*****************************************************************************/
+/* PowerPC 405EP */
+/* CPU control */
+enum {
+    PPC405EP_CPC0_PLLMR0 = 0x0F0,
+    PPC405EP_CPC0_BOOT   = 0x0F1,
+    PPC405EP_CPC0_EPCTL  = 0x0F3,
+    PPC405EP_CPC0_PLLMR1 = 0x0F4,
+    PPC405EP_CPC0_UCR    = 0x0F5,
+    PPC405EP_CPC0_SRR    = 0x0F6,
+    PPC405EP_CPC0_JTAGID = 0x0F7,
+    PPC405EP_CPC0_PCI    = 0x0F9,
+#if 0
+    PPC405EP_CPC0_ER     = xxx,
+    PPC405EP_CPC0_FR     = xxx,
+    PPC405EP_CPC0_SR     = xxx,
+#endif
+};
+
+enum {
+    PPC405EP_CPU_CLK   = 0,
+    PPC405EP_PLB_CLK   = 1,
+    PPC405EP_OPB_CLK   = 2,
+    PPC405EP_EBC_CLK   = 3,
+    PPC405EP_MAL_CLK   = 4,
+    PPC405EP_PCI_CLK   = 5,
+    PPC405EP_UART0_CLK = 6,
+    PPC405EP_UART1_CLK = 7,
+    PPC405EP_CLK_NB    = 8,
+};
+
+typedef struct ppc405ep_cpc_t ppc405ep_cpc_t;
+struct ppc405ep_cpc_t {
+    uint32_t sysclk;
+    clk_setup_t clk_setup[PPC405EP_CLK_NB];
+    uint32_t boot;
+    uint32_t epctl;
+    uint32_t pllmr[2];
+    uint32_t ucr;
+    uint32_t srr;
+    uint32_t jtagid;
+    uint32_t pci;
+    /* Clock and power management */
+    uint32_t er;
+    uint32_t fr;
+    uint32_t sr;
+};
+
+static void ppc405ep_compute_clocks (ppc405ep_cpc_t *cpc)
+{
+    uint32_t CPU_clk, PLB_clk, OPB_clk, EBC_clk, MAL_clk, PCI_clk;
+    uint32_t UART0_clk, UART1_clk;
+    uint64_t VCO_out, PLL_out;
+    int M, D;
+
+    VCO_out = 0;
+    if ((cpc->pllmr[1] & 0x80000000) && !(cpc->pllmr[1] & 0x40000000)) {
+        M = (((cpc->pllmr[1] >> 20) - 1) & 0xF) + 1; /* FBMUL */
+#ifdef DEBUG_CLOCKS_LL
+        printf("FBMUL %01" PRIx32 " %d\n", (cpc->pllmr[1] >> 20) & 0xF, M);
+#endif
+        D = 8 - ((cpc->pllmr[1] >> 16) & 0x7); /* FWDA */
+#ifdef DEBUG_CLOCKS_LL
+        printf("FWDA %01" PRIx32 " %d\n", (cpc->pllmr[1] >> 16) & 0x7, D);
+#endif
+        VCO_out = (uint64_t)cpc->sysclk * M * D;
+        if (VCO_out < 500000000UL || VCO_out > 1000000000UL) {
+            /* Error - unlock the PLL */
+            printf("VCO out of range %" PRIu64 "\n", VCO_out);
+#if 0
+            cpc->pllmr[1] &= ~0x80000000;
+            goto pll_bypass;
+#endif
+        }
+        PLL_out = VCO_out / D;
+        /* Pretend the PLL is locked */
+        cpc->boot |= 0x00000001;
+    } else {
+#if 0
+    pll_bypass:
+#endif
+        PLL_out = cpc->sysclk;
+        if (cpc->pllmr[1] & 0x40000000) {
+            /* Pretend the PLL is not locked */
+            cpc->boot &= ~0x00000001;
+        }
+    }
+    /* Now, compute all other clocks */
+    D = ((cpc->pllmr[0] >> 20) & 0x3) + 1; /* CCDV */
+#ifdef DEBUG_CLOCKS_LL
+    printf("CCDV %01" PRIx32 " %d\n", (cpc->pllmr[0] >> 20) & 0x3, D);
+#endif
+    CPU_clk = PLL_out / D;
+    D = ((cpc->pllmr[0] >> 16) & 0x3) + 1; /* CBDV */
+#ifdef DEBUG_CLOCKS_LL
+    printf("CBDV %01" PRIx32 " %d\n", (cpc->pllmr[0] >> 16) & 0x3, D);
+#endif
+    PLB_clk = CPU_clk / D;
+    D = ((cpc->pllmr[0] >> 12) & 0x3) + 1; /* OPDV */
+#ifdef DEBUG_CLOCKS_LL
+    printf("OPDV %01" PRIx32 " %d\n", (cpc->pllmr[0] >> 12) & 0x3, D);
+#endif
+    OPB_clk = PLB_clk / D;
+    D = ((cpc->pllmr[0] >> 8) & 0x3) + 2; /* EPDV */
+#ifdef DEBUG_CLOCKS_LL
+    printf("EPDV %01" PRIx32 " %d\n", (cpc->pllmr[0] >> 8) & 0x3, D);
+#endif
+    EBC_clk = PLB_clk / D;
+    D = ((cpc->pllmr[0] >> 4) & 0x3) + 1; /* MPDV */
+#ifdef DEBUG_CLOCKS_LL
+    printf("MPDV %01" PRIx32 " %d\n", (cpc->pllmr[0] >> 4) & 0x3, D);
+#endif
+    MAL_clk = PLB_clk / D;
+    D = (cpc->pllmr[0] & 0x3) + 1; /* PPDV */
+#ifdef DEBUG_CLOCKS_LL
+    printf("PPDV %01" PRIx32 " %d\n", cpc->pllmr[0] & 0x3, D);
+#endif
+    PCI_clk = PLB_clk / D;
+    D = ((cpc->ucr - 1) & 0x7F) + 1; /* U0DIV */
+#ifdef DEBUG_CLOCKS_LL
+    printf("U0DIV %01" PRIx32 " %d\n", cpc->ucr & 0x7F, D);
+#endif
+    UART0_clk = PLL_out / D;
+    D = (((cpc->ucr >> 8) - 1) & 0x7F) + 1; /* U1DIV */
+#ifdef DEBUG_CLOCKS_LL
+    printf("U1DIV %01" PRIx32 " %d\n", (cpc->ucr >> 8) & 0x7F, D);
+#endif
+    UART1_clk = PLL_out / D;
+#ifdef DEBUG_CLOCKS
+    printf("Setup PPC405EP clocks - sysclk %" PRIu32 " VCO %" PRIu64
+           " PLL out %" PRIu64 " Hz\n", cpc->sysclk, VCO_out, PLL_out);
+    printf("CPU %" PRIu32 " PLB %" PRIu32 " OPB %" PRIu32 " EBC %" PRIu32
+           " MAL %" PRIu32 " PCI %" PRIu32 " UART0 %" PRIu32
+           " UART1 %" PRIu32 "\n",
+           CPU_clk, PLB_clk, OPB_clk, EBC_clk, MAL_clk, PCI_clk,
+           UART0_clk, UART1_clk);
+#endif
+    /* Setup CPU clocks */
+    clk_setup(&cpc->clk_setup[PPC405EP_CPU_CLK], CPU_clk);
+    /* Setup PLB clock */
+    clk_setup(&cpc->clk_setup[PPC405EP_PLB_CLK], PLB_clk);
+    /* Setup OPB clock */
+    clk_setup(&cpc->clk_setup[PPC405EP_OPB_CLK], OPB_clk);
+    /* Setup external clock */
+    clk_setup(&cpc->clk_setup[PPC405EP_EBC_CLK], EBC_clk);
+    /* Setup MAL clock */
+    clk_setup(&cpc->clk_setup[PPC405EP_MAL_CLK], MAL_clk);
+    /* Setup PCI clock */
+    clk_setup(&cpc->clk_setup[PPC405EP_PCI_CLK], PCI_clk);
+    /* Setup UART0 clock */
+    clk_setup(&cpc->clk_setup[PPC405EP_UART0_CLK], UART0_clk);
+    /* Setup UART1 clock */
+    clk_setup(&cpc->clk_setup[PPC405EP_UART1_CLK], UART1_clk);
+}
+
+static uint32_t dcr_read_epcpc (void *opaque, int dcrn)
+{
+    ppc405ep_cpc_t *cpc;
+    uint32_t ret;
+
+    cpc = opaque;
+    switch (dcrn) {
+    case PPC405EP_CPC0_BOOT:
+        ret = cpc->boot;
+        break;
+    case PPC405EP_CPC0_EPCTL:
+        ret = cpc->epctl;
+        break;
+    case PPC405EP_CPC0_PLLMR0:
+        ret = cpc->pllmr[0];
+        break;
+    case PPC405EP_CPC0_PLLMR1:
+        ret = cpc->pllmr[1];
+        break;
+    case PPC405EP_CPC0_UCR:
+        ret = cpc->ucr;
+        break;
+    case PPC405EP_CPC0_SRR:
+        ret = cpc->srr;
+        break;
+    case PPC405EP_CPC0_JTAGID:
+        ret = cpc->jtagid;
+        break;
+    case PPC405EP_CPC0_PCI:
+        ret = cpc->pci;
+        break;
+    default:
+        /* Avoid gcc warning */
+        ret = 0;
+        break;
+    }
+
+    return ret;
+}
+
+static void dcr_write_epcpc (void *opaque, int dcrn, uint32_t val)
+{
+    ppc405ep_cpc_t *cpc;
+
+    cpc = opaque;
+    switch (dcrn) {
+    case PPC405EP_CPC0_BOOT:
+        /* Read-only register */
+        break;
+    case PPC405EP_CPC0_EPCTL:
+        /* Don't care for now */
+        cpc->epctl = val & 0xC00000F3;
+        break;
+    case PPC405EP_CPC0_PLLMR0:
+        cpc->pllmr[0] = val & 0x00633333;
+        ppc405ep_compute_clocks(cpc);
+        break;
+    case PPC405EP_CPC0_PLLMR1:
+        cpc->pllmr[1] = val & 0xC0F73FFF;
+        ppc405ep_compute_clocks(cpc);
+        break;
+    case PPC405EP_CPC0_UCR:
+        /* UART control - don't care for now */
+        cpc->ucr = val & 0x003F7F7F;
+        break;
+    case PPC405EP_CPC0_SRR:
+        cpc->srr = val;
+        break;
+    case PPC405EP_CPC0_JTAGID:
+        /* Read-only */
+        break;
+    case PPC405EP_CPC0_PCI:
+        cpc->pci = val;
+        break;
+    }
+}
+
+static void ppc405ep_cpc_reset (void *opaque)
+{
+    ppc405ep_cpc_t *cpc = opaque;
+
+    cpc->boot = 0x00000010;     /* Boot from PCI - IIC EEPROM disabled */
+    cpc->epctl = 0x00000000;
+    cpc->pllmr[0] = 0x00011010;
+    cpc->pllmr[1] = 0x40000000;
+    cpc->ucr = 0x00000000;
+    cpc->srr = 0x00040000;
+    cpc->pci = 0x00000000;
+    cpc->er = 0x00000000;
+    cpc->fr = 0x00000000;
+    cpc->sr = 0x00000000;
+    ppc405ep_compute_clocks(cpc);
+}
+
+/* XXX: sysclk should be between 25 and 100 MHz */
+static void ppc405ep_cpc_init (CPUPPCState *env, clk_setup_t clk_setup[8],
+                               uint32_t sysclk)
+{
+    ppc405ep_cpc_t *cpc;
+
+    cpc = g_malloc0(sizeof(ppc405ep_cpc_t));
+    memcpy(cpc->clk_setup, clk_setup,
+           PPC405EP_CLK_NB * sizeof(clk_setup_t));
+    cpc->jtagid = 0x20267049;
+    cpc->sysclk = sysclk;
+    qemu_register_reset(&ppc405ep_cpc_reset, cpc);
+    ppc_dcr_register(env, PPC405EP_CPC0_BOOT, cpc,
+                     &dcr_read_epcpc, &dcr_write_epcpc);
+    ppc_dcr_register(env, PPC405EP_CPC0_EPCTL, cpc,
+                     &dcr_read_epcpc, &dcr_write_epcpc);
+    ppc_dcr_register(env, PPC405EP_CPC0_PLLMR0, cpc,
+                     &dcr_read_epcpc, &dcr_write_epcpc);
+    ppc_dcr_register(env, PPC405EP_CPC0_PLLMR1, cpc,
+                     &dcr_read_epcpc, &dcr_write_epcpc);
+    ppc_dcr_register(env, PPC405EP_CPC0_UCR, cpc,
+                     &dcr_read_epcpc, &dcr_write_epcpc);
+    ppc_dcr_register(env, PPC405EP_CPC0_SRR, cpc,
+                     &dcr_read_epcpc, &dcr_write_epcpc);
+    ppc_dcr_register(env, PPC405EP_CPC0_JTAGID, cpc,
+                     &dcr_read_epcpc, &dcr_write_epcpc);
+    ppc_dcr_register(env, PPC405EP_CPC0_PCI, cpc,
+                     &dcr_read_epcpc, &dcr_write_epcpc);
+#if 0
+    ppc_dcr_register(env, PPC405EP_CPC0_ER, cpc,
+                     &dcr_read_epcpc, &dcr_write_epcpc);
+    ppc_dcr_register(env, PPC405EP_CPC0_FR, cpc,
+                     &dcr_read_epcpc, &dcr_write_epcpc);
+    ppc_dcr_register(env, PPC405EP_CPC0_SR, cpc,
+                     &dcr_read_epcpc, &dcr_write_epcpc);
+#endif
+}
+
+CPUPPCState *ppc405ep_init(MemoryRegion *address_space_mem,
+                        MemoryRegion ram_memories[2],
+                        hwaddr ram_bases[2],
+                        hwaddr ram_sizes[2],
+                        uint32_t sysclk, DeviceState **uicdevp,
+                        int do_init)
+{
+    clk_setup_t clk_setup[PPC405EP_CLK_NB], tlb_clk_setup;
+    qemu_irq dma_irqs[4], gpt_irqs[5], mal_irqs[4];
+    PowerPCCPU *cpu;
+    CPUPPCState *env;
+    DeviceState *uicdev;
+    SysBusDevice *uicsbd;
+
+    memset(clk_setup, 0, sizeof(clk_setup));
+    /* init CPUs */
+    cpu = ppc4xx_init(POWERPC_CPU_TYPE_NAME("405ep"),
+                      &clk_setup[PPC405EP_CPU_CLK],
+                      &tlb_clk_setup, sysclk);
+    env = &cpu->env;
+    clk_setup[PPC405EP_CPU_CLK].cb = tlb_clk_setup.cb;
+    clk_setup[PPC405EP_CPU_CLK].opaque = tlb_clk_setup.opaque;
+    /* Internal devices init */
+    /* Memory mapped devices registers */
+    /* PLB arbitrer */
+    ppc4xx_plb_init(env);
+    /* PLB to OPB bridge */
+    ppc4xx_pob_init(env);
+    /* OBP arbitrer */
+    ppc4xx_opba_init(0xef600600);
+    /* Initialize timers */
+    ppc_booke_timers_init(cpu, sysclk, 0);
+    /* Universal interrupt controller */
+    uicdev = qdev_new(TYPE_PPC_UIC);
+    uicsbd = SYS_BUS_DEVICE(uicdev);
+
+    object_property_set_link(OBJECT(uicdev), "cpu", OBJECT(cpu),
+                             &error_fatal);
+    sysbus_realize_and_unref(uicsbd, &error_fatal);
+
+    sysbus_connect_irq(uicsbd, PPCUIC_OUTPUT_INT,
+                       ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_INT]);
+    sysbus_connect_irq(uicsbd, PPCUIC_OUTPUT_CINT,
+                       ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_CINT]);
+
+    *uicdevp = uicdev;
+
+    /* SDRAM controller */
+        /* XXX 405EP has no ECC interrupt */
+    ppc4xx_sdram_init(env, qdev_get_gpio_in(uicdev, 17), 2, ram_memories,
+                      ram_bases, ram_sizes, do_init);
+    /* External bus controller */
+    ppc405_ebc_init(env);
+    /* DMA controller */
+    dma_irqs[0] = qdev_get_gpio_in(uicdev, 5);
+    dma_irqs[1] = qdev_get_gpio_in(uicdev, 6);
+    dma_irqs[2] = qdev_get_gpio_in(uicdev, 7);
+    dma_irqs[3] = qdev_get_gpio_in(uicdev, 8);
+    ppc405_dma_init(env, dma_irqs);
+    /* IIC controller */
+    sysbus_create_simple(TYPE_PPC4xx_I2C, 0xef600500,
+                         qdev_get_gpio_in(uicdev, 2));
+    /* GPIO */
+    ppc405_gpio_init(0xef600700);
+    /* Serial ports */
+    if (serial_hd(0) != NULL) {
+        serial_mm_init(address_space_mem, 0xef600300, 0,
+                       qdev_get_gpio_in(uicdev, 0),
+                       PPC_SERIAL_MM_BAUDBASE, serial_hd(0),
+                       DEVICE_BIG_ENDIAN);
+    }
+    if (serial_hd(1) != NULL) {
+        serial_mm_init(address_space_mem, 0xef600400, 0,
+                       qdev_get_gpio_in(uicdev, 1),
+                       PPC_SERIAL_MM_BAUDBASE, serial_hd(1),
+                       DEVICE_BIG_ENDIAN);
+    }
+    /* OCM */
+    ppc405_ocm_init(env);
+    /* GPT */
+    gpt_irqs[0] = qdev_get_gpio_in(uicdev, 19);
+    gpt_irqs[1] = qdev_get_gpio_in(uicdev, 20);
+    gpt_irqs[2] = qdev_get_gpio_in(uicdev, 21);
+    gpt_irqs[3] = qdev_get_gpio_in(uicdev, 22);
+    gpt_irqs[4] = qdev_get_gpio_in(uicdev, 23);
+    ppc4xx_gpt_init(0xef600000, gpt_irqs);
+    /* PCI */
+    /* Uses UIC IRQs 3, 16, 18 */
+    /* MAL */
+    mal_irqs[0] = qdev_get_gpio_in(uicdev, 11);
+    mal_irqs[1] = qdev_get_gpio_in(uicdev, 12);
+    mal_irqs[2] = qdev_get_gpio_in(uicdev, 13);
+    mal_irqs[3] = qdev_get_gpio_in(uicdev, 14);
+    ppc4xx_mal_init(env, 4, 2, mal_irqs);
+    /* Ethernet */
+    /* Uses UIC IRQs 9, 15, 17 */
+    /* CPU control */
+    ppc405ep_cpc_init(env, clk_setup, sysclk);
+
+    return env;
+}
diff --git a/hw/ppc/ppc440.h b/hw/ppc/ppc440.h
new file mode 100644
index 000000000..7cef93612
--- /dev/null
+++ b/hw/ppc/ppc440.h
@@ -0,0 +1,27 @@
+/*
+ * QEMU PowerPC 440 shared definitions
+ *
+ * Copyright (c) 2012 François Revol
+ * Copyright (c) 2016-2018 BALATON Zoltan
+ *
+ * This work is licensed under the GNU GPL license version 2 or later.
+ *
+ */
+
+#ifndef PPC440_H
+#define PPC440_H
+
+#include "hw/ppc/ppc.h"
+
+void ppc4xx_l2sram_init(CPUPPCState *env);
+void ppc4xx_cpr_init(CPUPPCState *env);
+void ppc4xx_sdr_init(CPUPPCState *env);
+void ppc440_sdram_init(CPUPPCState *env, int nbanks,
+                       MemoryRegion *ram_memories,
+                       hwaddr *ram_bases, hwaddr *ram_sizes,
+                       int do_init);
+void ppc4xx_ahb_init(CPUPPCState *env);
+void ppc4xx_dma_init(CPUPPCState *env, int dcr_base);
+void ppc460ex_pcie_init(CPUPPCState *env);
+
+#endif /* PPC440_H */
diff --git a/hw/ppc/ppc440_bamboo.c b/hw/ppc/ppc440_bamboo.c
new file mode 100644
index 000000000..7fb620b9a
--- /dev/null
+++ b/hw/ppc/ppc440_bamboo.c
@@ -0,0 +1,307 @@
+/*
+ * QEMU PowerPC 440 Bamboo board emulation
+ *
+ * Copyright 2007 IBM Corporation.
+ * Authors:
+ *	Jerone Young <jyoung5@us.ibm.com>
+ *	Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
+ *	Hollis Blanchard <hollisb@us.ibm.com>
+ *
+ * This work is licensed under the GNU GPL license version 2 or later.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include "qemu/error-report.h"
+#include "qemu-common.h"
+#include "qemu/datadir.h"
+#include "qemu/error-report.h"
+#include "net/net.h"
+#include "hw/pci/pci.h"
+#include "hw/boards.h"
+#include "sysemu/kvm.h"
+#include "kvm_ppc.h"
+#include "sysemu/device_tree.h"
+#include "hw/loader.h"
+#include "elf.h"
+#include "hw/char/serial.h"
+#include "hw/ppc/ppc.h"
+#include "ppc405.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/reset.h"
+#include "hw/sysbus.h"
+#include "hw/intc/ppc-uic.h"
+#include "hw/qdev-properties.h"
+#include "qapi/error.h"
+
+#define BINARY_DEVICE_TREE_FILE "bamboo.dtb"
+
+/* from u-boot */
+#define KERNEL_ADDR  0x1000000
+#define FDT_ADDR     0x1800000
+#define RAMDISK_ADDR 0x1900000
+
+#define PPC440EP_PCI_CONFIG     0xeec00000
+#define PPC440EP_PCI_INTACK     0xeed00000
+#define PPC440EP_PCI_SPECIAL    0xeed00000
+#define PPC440EP_PCI_REGS       0xef400000
+#define PPC440EP_PCI_IO         0xe8000000
+#define PPC440EP_PCI_IOLEN      0x00010000
+
+#define PPC440EP_SDRAM_NR_BANKS 4
+
+static const ram_addr_t ppc440ep_sdram_bank_sizes[] = {
+    256 * MiB, 128 * MiB, 64 * MiB, 32 * MiB, 16 * MiB, 8 * MiB, 0
+};
+
+static hwaddr entry;
+
+static int bamboo_load_device_tree(hwaddr addr,
+                                     uint32_t ramsize,
+                                     hwaddr initrd_base,
+                                     hwaddr initrd_size,
+                                     const char *kernel_cmdline)
+{
+    int ret = -1;
+    uint32_t mem_reg_property[] = { 0, 0, cpu_to_be32(ramsize) };
+    char *filename;
+    int fdt_size;
+    void *fdt;
+    uint32_t tb_freq = 400000000;
+    uint32_t clock_freq = 400000000;
+
+    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, BINARY_DEVICE_TREE_FILE);
+    if (!filename) {
+        return -1;
+    }
+    fdt = load_device_tree(filename, &fdt_size);
+    g_free(filename);
+    if (fdt == NULL) {
+        return -1;
+    }
+
+    /* Manipulate device tree in memory. */
+
+    ret = qemu_fdt_setprop(fdt, "/memory", "reg", mem_reg_property,
+                           sizeof(mem_reg_property));
+    if (ret < 0)
+        fprintf(stderr, "couldn't set /memory/reg\n");
+
+    ret = qemu_fdt_setprop_cell(fdt, "/chosen", "linux,initrd-start",
+                                initrd_base);
+    if (ret < 0)
+        fprintf(stderr, "couldn't set /chosen/linux,initrd-start\n");
+
+    ret = qemu_fdt_setprop_cell(fdt, "/chosen", "linux,initrd-end",
+                                (initrd_base + initrd_size));
+    if (ret < 0)
+        fprintf(stderr, "couldn't set /chosen/linux,initrd-end\n");
+
+    ret = qemu_fdt_setprop_string(fdt, "/chosen", "bootargs",
+                                  kernel_cmdline);
+    if (ret < 0)
+        fprintf(stderr, "couldn't set /chosen/bootargs\n");
+
+    /* Copy data from the host device tree into the guest. Since the guest can
+     * directly access the timebase without host involvement, we must expose
+     * the correct frequencies. */
+    if (kvm_enabled()) {
+        tb_freq = kvmppc_get_tbfreq();
+        clock_freq = kvmppc_get_clockfreq();
+    }
+
+    qemu_fdt_setprop_cell(fdt, "/cpus/cpu@0", "clock-frequency",
+                          clock_freq);
+    qemu_fdt_setprop_cell(fdt, "/cpus/cpu@0", "timebase-frequency",
+                          tb_freq);
+
+    rom_add_blob_fixed(BINARY_DEVICE_TREE_FILE, fdt, fdt_size, addr);
+    g_free(fdt);
+    return 0;
+}
+
+/* Create reset TLB entries for BookE, spanning the 32bit addr space.  */
+static void mmubooke_create_initial_mapping(CPUPPCState *env,
+                                     target_ulong va,
+                                     hwaddr pa)
+{
+    ppcemb_tlb_t *tlb = &env->tlb.tlbe[0];
+
+    tlb->attr = 0;
+    tlb->prot = PAGE_VALID | ((PAGE_READ | PAGE_WRITE | PAGE_EXEC) << 4);
+    tlb->size = 1U << 31; /* up to 0x80000000  */
+    tlb->EPN = va & TARGET_PAGE_MASK;
+    tlb->RPN = pa & TARGET_PAGE_MASK;
+    tlb->PID = 0;
+
+    tlb = &env->tlb.tlbe[1];
+    tlb->attr = 0;
+    tlb->prot = PAGE_VALID | ((PAGE_READ | PAGE_WRITE | PAGE_EXEC) << 4);
+    tlb->size = 1U << 31; /* up to 0xffffffff  */
+    tlb->EPN = 0x80000000 & TARGET_PAGE_MASK;
+    tlb->RPN = 0x80000000 & TARGET_PAGE_MASK;
+    tlb->PID = 0;
+}
+
+static void main_cpu_reset(void *opaque)
+{
+    PowerPCCPU *cpu = opaque;
+    CPUPPCState *env = &cpu->env;
+
+    cpu_reset(CPU(cpu));
+    env->gpr[1] = (16 * MiB) - 8;
+    env->gpr[3] = FDT_ADDR;
+    env->nip = entry;
+
+    /* Create a mapping for the kernel.  */
+    mmubooke_create_initial_mapping(env, 0, 0);
+}
+
+static void bamboo_init(MachineState *machine)
+{
+    const char *kernel_filename = machine->kernel_filename;
+    const char *kernel_cmdline = machine->kernel_cmdline;
+    const char *initrd_filename = machine->initrd_filename;
+    unsigned int pci_irq_nrs[4] = { 28, 27, 26, 25 };
+    MemoryRegion *address_space_mem = get_system_memory();
+    MemoryRegion *isa = g_new(MemoryRegion, 1);
+    MemoryRegion *ram_memories = g_new(MemoryRegion, PPC440EP_SDRAM_NR_BANKS);
+    hwaddr ram_bases[PPC440EP_SDRAM_NR_BANKS];
+    hwaddr ram_sizes[PPC440EP_SDRAM_NR_BANKS];
+    PCIBus *pcibus;
+    PowerPCCPU *cpu;
+    CPUPPCState *env;
+    target_long initrd_size = 0;
+    DeviceState *dev;
+    DeviceState *uicdev;
+    SysBusDevice *uicsbd;
+    int success;
+    int i;
+
+    cpu = POWERPC_CPU(cpu_create(machine->cpu_type));
+    env = &cpu->env;
+
+    if (env->mmu_model != POWERPC_MMU_BOOKE) {
+        error_report("MMU model %i not supported by this machine",
+                     env->mmu_model);
+        exit(1);
+    }
+
+    qemu_register_reset(main_cpu_reset, cpu);
+    ppc_booke_timers_init(cpu, 400000000, 0);
+    ppc_dcr_init(env, NULL, NULL);
+
+    /* interrupt controller */
+    uicdev = qdev_new(TYPE_PPC_UIC);
+    uicsbd = SYS_BUS_DEVICE(uicdev);
+
+    object_property_set_link(OBJECT(uicdev), "cpu", OBJECT(cpu),
+                             &error_fatal);
+    sysbus_realize_and_unref(uicsbd, &error_fatal);
+
+    sysbus_connect_irq(uicsbd, PPCUIC_OUTPUT_INT,
+                       ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_INT]);
+    sysbus_connect_irq(uicsbd, PPCUIC_OUTPUT_CINT,
+                       ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_CINT]);
+
+    /* SDRAM controller */
+    memset(ram_bases, 0, sizeof(ram_bases));
+    memset(ram_sizes, 0, sizeof(ram_sizes));
+    ppc4xx_sdram_banks(machine->ram, PPC440EP_SDRAM_NR_BANKS, ram_memories,
+                       ram_bases, ram_sizes, ppc440ep_sdram_bank_sizes);
+    /* XXX 440EP's ECC interrupts are on UIC1, but we've only created UIC0. */
+    ppc4xx_sdram_init(env,
+                      qdev_get_gpio_in(uicdev, 14),
+                      PPC440EP_SDRAM_NR_BANKS, ram_memories,
+                      ram_bases, ram_sizes, 1);
+
+    /* PCI */
+    dev = sysbus_create_varargs(TYPE_PPC4xx_PCI_HOST_BRIDGE,
+                                PPC440EP_PCI_CONFIG,
+                                qdev_get_gpio_in(uicdev, pci_irq_nrs[0]),
+                                qdev_get_gpio_in(uicdev, pci_irq_nrs[1]),
+                                qdev_get_gpio_in(uicdev, pci_irq_nrs[2]),
+                                qdev_get_gpio_in(uicdev, pci_irq_nrs[3]),
+                                NULL);
+    pcibus = (PCIBus *)qdev_get_child_bus(dev, "pci.0");
+    if (!pcibus) {
+        error_report("couldn't create PCI controller");
+        exit(1);
+    }
+
+    memory_region_init_alias(isa, NULL, "isa_mmio",
+                             get_system_io(), 0, PPC440EP_PCI_IOLEN);
+    memory_region_add_subregion(get_system_memory(), PPC440EP_PCI_IO, isa);
+
+    if (serial_hd(0) != NULL) {
+        serial_mm_init(address_space_mem, 0xef600300, 0,
+                       qdev_get_gpio_in(uicdev, 0),
+                       PPC_SERIAL_MM_BAUDBASE, serial_hd(0),
+                       DEVICE_BIG_ENDIAN);
+    }
+    if (serial_hd(1) != NULL) {
+        serial_mm_init(address_space_mem, 0xef600400, 0,
+                       qdev_get_gpio_in(uicdev, 1),
+                       PPC_SERIAL_MM_BAUDBASE, serial_hd(1),
+                       DEVICE_BIG_ENDIAN);
+    }
+
+    if (pcibus) {
+        /* Register network interfaces. */
+        for (i = 0; i < nb_nics; i++) {
+            /* There are no PCI NICs on the Bamboo board, but there are
+             * PCI slots, so we can pick whatever default model we want. */
+            pci_nic_init_nofail(&nd_table[i], pcibus, "e1000", NULL);
+        }
+    }
+
+    /* Load kernel. */
+    if (kernel_filename) {
+        hwaddr loadaddr = LOAD_UIMAGE_LOADADDR_INVALID;
+        success = load_uimage(kernel_filename, &entry, &loadaddr, NULL,
+                              NULL, NULL);
+        if (success < 0) {
+            uint64_t elf_entry;
+            success = load_elf(kernel_filename, NULL, NULL, NULL, &elf_entry,
+                               NULL, NULL, NULL, 1, PPC_ELF_MACHINE, 0, 0);
+            entry = elf_entry;
+        }
+        /* XXX try again as binary */
+        if (success < 0) {
+            error_report("could not load kernel '%s'", kernel_filename);
+            exit(1);
+        }
+    }
+
+    /* Load initrd. */
+    if (initrd_filename) {
+        initrd_size = load_image_targphys(initrd_filename, RAMDISK_ADDR,
+                                          machine->ram_size - RAMDISK_ADDR);
+
+        if (initrd_size < 0) {
+            error_report("could not load ram disk '%s' at %x",
+                         initrd_filename, RAMDISK_ADDR);
+            exit(1);
+        }
+    }
+
+    /* If we're loading a kernel directly, we must load the device tree too. */
+    if (kernel_filename) {
+        if (bamboo_load_device_tree(FDT_ADDR, machine->ram_size, RAMDISK_ADDR,
+                                    initrd_size, kernel_cmdline) < 0) {
+            error_report("couldn't load device tree");
+            exit(1);
+        }
+    }
+}
+
+static void bamboo_machine_init(MachineClass *mc)
+{
+    mc->desc = "bamboo";
+    mc->init = bamboo_init;
+    mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("440epb");
+    mc->default_ram_id = "ppc4xx.sdram";
+}
+
+DEFINE_MACHINE("bamboo", bamboo_machine_init)
diff --git a/hw/ppc/ppc440_pcix.c b/hw/ppc/ppc440_pcix.c
new file mode 100644
index 000000000..788d25514
--- /dev/null
+++ b/hw/ppc/ppc440_pcix.c
@@ -0,0 +1,538 @@
+/*
+ * Emulation of the ibm,plb-pcix PCI controller
+ * This is found in some 440 SoCs e.g. the 460EX.
+ *
+ * Copyright (c) 2016-2018 BALATON Zoltan
+ *
+ * Derived from ppc4xx_pci.c and pci-host/ppce500.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "qemu/log.h"
+#include "qemu/module.h"
+#include "hw/irq.h"
+#include "hw/ppc/ppc.h"
+#include "hw/ppc/ppc4xx.h"
+#include "hw/pci/pci.h"
+#include "hw/pci/pci_host.h"
+#include "trace.h"
+#include "qom/object.h"
+
+struct PLBOutMap {
+    uint64_t la;
+    uint64_t pcia;
+    uint32_t sa;
+    MemoryRegion mr;
+};
+
+struct PLBInMap {
+    uint64_t sa;
+    uint64_t la;
+    MemoryRegion mr;
+};
+
+#define TYPE_PPC440_PCIX_HOST_BRIDGE "ppc440-pcix-host"
+OBJECT_DECLARE_SIMPLE_TYPE(PPC440PCIXState, PPC440_PCIX_HOST_BRIDGE)
+
+#define PPC440_PCIX_NR_POMS 3
+#define PPC440_PCIX_NR_PIMS 3
+
+struct PPC440PCIXState {
+    PCIHostState parent_obj;
+
+    PCIDevice *dev;
+    struct PLBOutMap pom[PPC440_PCIX_NR_POMS];
+    struct PLBInMap pim[PPC440_PCIX_NR_PIMS];
+    uint32_t sts;
+    qemu_irq irq;
+    AddressSpace bm_as;
+    MemoryRegion bm;
+
+    MemoryRegion container;
+    MemoryRegion iomem;
+    MemoryRegion busmem;
+};
+
+#define PPC440_REG_BASE     0x80000
+#define PPC440_REG_SIZE     0xff
+
+#define PCIC0_CFGADDR       0x0
+#define PCIC0_CFGDATA       0x4
+
+#define PCIX0_POM0LAL       0x68
+#define PCIX0_POM0LAH       0x6c
+#define PCIX0_POM0SA        0x70
+#define PCIX0_POM0PCIAL     0x74
+#define PCIX0_POM0PCIAH     0x78
+#define PCIX0_POM1LAL       0x7c
+#define PCIX0_POM1LAH       0x80
+#define PCIX0_POM1SA        0x84
+#define PCIX0_POM1PCIAL     0x88
+#define PCIX0_POM1PCIAH     0x8c
+#define PCIX0_POM2SA        0x90
+
+#define PCIX0_PIM0SAL       0x98
+#define PCIX0_PIM0LAL       0x9c
+#define PCIX0_PIM0LAH       0xa0
+#define PCIX0_PIM1SA        0xa4
+#define PCIX0_PIM1LAL       0xa8
+#define PCIX0_PIM1LAH       0xac
+#define PCIX0_PIM2SAL       0xb0
+#define PCIX0_PIM2LAL       0xb4
+#define PCIX0_PIM2LAH       0xb8
+#define PCIX0_PIM0SAH       0xf8
+#define PCIX0_PIM2SAH       0xfc
+
+#define PCIX0_STS           0xe0
+
+#define PCI_ALL_SIZE        (PPC440_REG_BASE + PPC440_REG_SIZE)
+
+static void ppc440_pcix_clear_region(MemoryRegion *parent,
+                                     MemoryRegion *mem)
+{
+    if (memory_region_is_mapped(mem)) {
+        memory_region_del_subregion(parent, mem);
+        object_unparent(OBJECT(mem));
+    }
+}
+
+/* DMA mapping */
+static void ppc440_pcix_update_pim(PPC440PCIXState *s, int idx)
+{
+    MemoryRegion *mem = &s->pim[idx].mr;
+    char *name;
+    uint64_t size;
+
+    /* Before we modify anything, unmap and destroy the region */
+    ppc440_pcix_clear_region(&s->bm, mem);
+
+    if (!(s->pim[idx].sa & 1)) {
+        /* Not enabled, nothing to do */
+        return;
+    }
+
+    name = g_strdup_printf("PCI Inbound Window %d", idx);
+    size = ~(s->pim[idx].sa & ~7ULL) + 1;
+    memory_region_init_alias(mem, OBJECT(s), name, get_system_memory(),
+                             s->pim[idx].la, size);
+    memory_region_add_subregion_overlap(&s->bm, 0, mem, -1);
+    g_free(name);
+
+    trace_ppc440_pcix_update_pim(idx, size, s->pim[idx].la);
+}
+
+/* BAR mapping */
+static void ppc440_pcix_update_pom(PPC440PCIXState *s, int idx)
+{
+    MemoryRegion *mem = &s->pom[idx].mr;
+    MemoryRegion *address_space_mem = get_system_memory();
+    char *name;
+    uint32_t size;
+
+    /* Before we modify anything, unmap and destroy the region */
+    ppc440_pcix_clear_region(address_space_mem, mem);
+
+    if (!(s->pom[idx].sa & 1)) {
+        /* Not enabled, nothing to do */
+        return;
+    }
+
+    name = g_strdup_printf("PCI Outbound Window %d", idx);
+    size = ~(s->pom[idx].sa & 0xfffffffe) + 1;
+    if (!size) {
+        size = 0xffffffff;
+    }
+    memory_region_init_alias(mem, OBJECT(s), name, &s->busmem,
+                             s->pom[idx].pcia, size);
+    memory_region_add_subregion(address_space_mem, s->pom[idx].la, mem);
+    g_free(name);
+
+    trace_ppc440_pcix_update_pom(idx, size, s->pom[idx].la, s->pom[idx].pcia);
+}
+
+static void ppc440_pcix_reg_write4(void *opaque, hwaddr addr,
+                                   uint64_t val, unsigned size)
+{
+    struct PPC440PCIXState *s = opaque;
+
+    trace_ppc440_pcix_reg_write(addr, val, size);
+    switch (addr) {
+    case PCI_VENDOR_ID ... PCI_MAX_LAT:
+        stl_le_p(s->dev->config + addr, val);
+        break;
+
+    case PCIX0_POM0LAL:
+        s->pom[0].la &= 0xffffffff00000000ULL;
+        s->pom[0].la |= val;
+        ppc440_pcix_update_pom(s, 0);
+        break;
+    case PCIX0_POM0LAH:
+        s->pom[0].la &= 0xffffffffULL;
+        s->pom[0].la |= val << 32;
+        ppc440_pcix_update_pom(s, 0);
+        break;
+    case PCIX0_POM0SA:
+        s->pom[0].sa = val;
+        ppc440_pcix_update_pom(s, 0);
+        break;
+    case PCIX0_POM0PCIAL:
+        s->pom[0].pcia &= 0xffffffff00000000ULL;
+        s->pom[0].pcia |= val;
+        ppc440_pcix_update_pom(s, 0);
+        break;
+    case PCIX0_POM0PCIAH:
+        s->pom[0].pcia &= 0xffffffffULL;
+        s->pom[0].pcia |= val << 32;
+        ppc440_pcix_update_pom(s, 0);
+        break;
+    case PCIX0_POM1LAL:
+        s->pom[1].la &= 0xffffffff00000000ULL;
+        s->pom[1].la |= val;
+        ppc440_pcix_update_pom(s, 1);
+        break;
+    case PCIX0_POM1LAH:
+        s->pom[1].la &= 0xffffffffULL;
+        s->pom[1].la |= val << 32;
+        ppc440_pcix_update_pom(s, 1);
+        break;
+    case PCIX0_POM1SA:
+        s->pom[1].sa = val;
+        ppc440_pcix_update_pom(s, 1);
+        break;
+    case PCIX0_POM1PCIAL:
+        s->pom[1].pcia &= 0xffffffff00000000ULL;
+        s->pom[1].pcia |= val;
+        ppc440_pcix_update_pom(s, 1);
+        break;
+    case PCIX0_POM1PCIAH:
+        s->pom[1].pcia &= 0xffffffffULL;
+        s->pom[1].pcia |= val << 32;
+        ppc440_pcix_update_pom(s, 1);
+        break;
+    case PCIX0_POM2SA:
+        s->pom[2].sa = val;
+        break;
+
+    case PCIX0_PIM0SAL:
+        s->pim[0].sa &= 0xffffffff00000000ULL;
+        s->pim[0].sa |= val;
+        ppc440_pcix_update_pim(s, 0);
+        break;
+    case PCIX0_PIM0LAL:
+        s->pim[0].la &= 0xffffffff00000000ULL;
+        s->pim[0].la |= val;
+        ppc440_pcix_update_pim(s, 0);
+        break;
+    case PCIX0_PIM0LAH:
+        s->pim[0].la &= 0xffffffffULL;
+        s->pim[0].la |= val << 32;
+        ppc440_pcix_update_pim(s, 0);
+        break;
+    case PCIX0_PIM1SA:
+        s->pim[1].sa = val;
+        ppc440_pcix_update_pim(s, 1);
+        break;
+    case PCIX0_PIM1LAL:
+        s->pim[1].la &= 0xffffffff00000000ULL;
+        s->pim[1].la |= val;
+        ppc440_pcix_update_pim(s, 1);
+        break;
+    case PCIX0_PIM1LAH:
+        s->pim[1].la &= 0xffffffffULL;
+        s->pim[1].la |= val << 32;
+        ppc440_pcix_update_pim(s, 1);
+        break;
+    case PCIX0_PIM2SAL:
+        s->pim[2].sa &= 0xffffffff00000000ULL;
+        s->pim[2].sa |= val;
+        ppc440_pcix_update_pim(s, 2);
+        break;
+    case PCIX0_PIM2LAL:
+        s->pim[2].la &= 0xffffffff00000000ULL;
+        s->pim[2].la |= val;
+        ppc440_pcix_update_pim(s, 2);
+        break;
+    case PCIX0_PIM2LAH:
+        s->pim[2].la &= 0xffffffffULL;
+        s->pim[2].la |= val << 32;
+        ppc440_pcix_update_pim(s, 2);
+        break;
+
+    case PCIX0_STS:
+        s->sts = val;
+        break;
+
+    case PCIX0_PIM0SAH:
+        s->pim[0].sa &= 0xffffffffULL;
+        s->pim[0].sa |= val << 32;
+        ppc440_pcix_update_pim(s, 0);
+        break;
+    case PCIX0_PIM2SAH:
+        s->pim[2].sa &= 0xffffffffULL;
+        s->pim[2].sa |= val << 32;
+        ppc440_pcix_update_pim(s, 2);
+        break;
+
+    default:
+        qemu_log_mask(LOG_UNIMP,
+                      "%s: unhandled PCI internal register 0x%"HWADDR_PRIx"\n",
+                      __func__, addr);
+        break;
+    }
+}
+
+static uint64_t ppc440_pcix_reg_read4(void *opaque, hwaddr addr,
+                                     unsigned size)
+{
+    struct PPC440PCIXState *s = opaque;
+    uint32_t val;
+
+    switch (addr) {
+    case PCI_VENDOR_ID ... PCI_MAX_LAT:
+        val = ldl_le_p(s->dev->config + addr);
+        break;
+
+    case PCIX0_POM0LAL:
+        val = s->pom[0].la;
+        break;
+    case PCIX0_POM0LAH:
+        val = s->pom[0].la >> 32;
+        break;
+    case PCIX0_POM0SA:
+        val = s->pom[0].sa;
+        break;
+    case PCIX0_POM0PCIAL:
+        val = s->pom[0].pcia;
+        break;
+    case PCIX0_POM0PCIAH:
+        val = s->pom[0].pcia >> 32;
+        break;
+    case PCIX0_POM1LAL:
+        val = s->pom[1].la;
+        break;
+    case PCIX0_POM1LAH:
+        val = s->pom[1].la >> 32;
+        break;
+    case PCIX0_POM1SA:
+        val = s->pom[1].sa;
+        break;
+    case PCIX0_POM1PCIAL:
+        val = s->pom[1].pcia;
+        break;
+    case PCIX0_POM1PCIAH:
+        val = s->pom[1].pcia >> 32;
+        break;
+    case PCIX0_POM2SA:
+        val = s->pom[2].sa;
+        break;
+
+    case PCIX0_PIM0SAL:
+        val = s->pim[0].sa;
+        break;
+    case PCIX0_PIM0LAL:
+        val = s->pim[0].la;
+        break;
+    case PCIX0_PIM0LAH:
+        val = s->pim[0].la >> 32;
+        break;
+    case PCIX0_PIM1SA:
+        val = s->pim[1].sa;
+        break;
+    case PCIX0_PIM1LAL:
+        val = s->pim[1].la;
+        break;
+    case PCIX0_PIM1LAH:
+        val = s->pim[1].la >> 32;
+        break;
+    case PCIX0_PIM2SAL:
+        val = s->pim[2].sa;
+        break;
+    case PCIX0_PIM2LAL:
+        val = s->pim[2].la;
+        break;
+    case PCIX0_PIM2LAH:
+        val = s->pim[2].la >> 32;
+        break;
+
+    case PCIX0_STS:
+        val = s->sts;
+        break;
+
+    case PCIX0_PIM0SAH:
+        val = s->pim[0].sa  >> 32;
+        break;
+    case PCIX0_PIM2SAH:
+        val = s->pim[2].sa  >> 32;
+        break;
+
+    default:
+        qemu_log_mask(LOG_UNIMP,
+                      "%s: invalid PCI internal register 0x%" HWADDR_PRIx "\n",
+                      __func__, addr);
+        val = 0;
+    }
+
+    trace_ppc440_pcix_reg_read(addr, val);
+    return val;
+}
+
+static const MemoryRegionOps pci_reg_ops = {
+    .read = ppc440_pcix_reg_read4,
+    .write = ppc440_pcix_reg_write4,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static void ppc440_pcix_reset(DeviceState *dev)
+{
+    struct PPC440PCIXState *s = PPC440_PCIX_HOST_BRIDGE(dev);
+    int i;
+
+    for (i = 0; i < PPC440_PCIX_NR_POMS; i++) {
+        ppc440_pcix_clear_region(get_system_memory(), &s->pom[i].mr);
+    }
+    for (i = 0; i < PPC440_PCIX_NR_PIMS; i++) {
+        ppc440_pcix_clear_region(&s->bm, &s->pim[i].mr);
+    }
+    memset(s->pom, 0, sizeof(s->pom));
+    memset(s->pim, 0, sizeof(s->pim));
+    for (i = 0; i < PPC440_PCIX_NR_PIMS; i++) {
+        s->pim[i].sa = 0xffffffff00000000ULL;
+    }
+    s->sts = 0;
+}
+
+/*
+ * All four IRQ[ABCD] pins from all slots are tied to a single board
+ * IRQ, so our mapping function here maps everything to IRQ 0.
+ * The code in pci_change_irq_level() tracks the number of times
+ * the mapped IRQ is asserted and deasserted, so if multiple devices
+ * assert an IRQ at the same time the behaviour is correct.
+ *
+ * This may need further refactoring for boards that use multiple IRQ lines.
+ */
+static int ppc440_pcix_map_irq(PCIDevice *pci_dev, int irq_num)
+{
+    trace_ppc440_pcix_map_irq(pci_dev->devfn, irq_num, 0);
+    return 0;
+}
+
+static void ppc440_pcix_set_irq(void *opaque, int irq_num, int level)
+{
+    qemu_irq *pci_irq = opaque;
+
+    trace_ppc440_pcix_set_irq(irq_num);
+    if (irq_num < 0) {
+        error_report("%s: PCI irq %d", __func__, irq_num);
+        return;
+    }
+    qemu_set_irq(*pci_irq, level);
+}
+
+static AddressSpace *ppc440_pcix_set_iommu(PCIBus *b, void *opaque, int devfn)
+{
+    PPC440PCIXState *s = opaque;
+
+    return &s->bm_as;
+}
+
+/*
+ * Some guests on sam460ex write all kinds of garbage here such as
+ * missing enable bit and low bits set and still expect this to work
+ * (apparently it does on real hardware because these boot there) so
+ * we have to override these ops here and fix it up
+ */
+static void pci_host_config_write(void *opaque, hwaddr addr,
+                                  uint64_t val, unsigned len)
+{
+    PCIHostState *s = opaque;
+
+    if (addr != 0 || len != 4) {
+        return;
+    }
+    s->config_reg = (val & 0xfffffffcULL) | (1UL << 31);
+}
+
+static uint64_t pci_host_config_read(void *opaque, hwaddr addr,
+                                     unsigned len)
+{
+    PCIHostState *s = opaque;
+    uint32_t val = s->config_reg;
+
+    return val;
+}
+
+const MemoryRegionOps ppc440_pcix_host_conf_ops = {
+    .read = pci_host_config_read,
+    .write = pci_host_config_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static void ppc440_pcix_realize(DeviceState *dev, Error **errp)
+{
+    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
+    PPC440PCIXState *s;
+    PCIHostState *h;
+
+    h = PCI_HOST_BRIDGE(dev);
+    s = PPC440_PCIX_HOST_BRIDGE(dev);
+
+    sysbus_init_irq(sbd, &s->irq);
+    memory_region_init(&s->busmem, OBJECT(dev), "pci bus memory", UINT64_MAX);
+    h->bus = pci_register_root_bus(dev, NULL, ppc440_pcix_set_irq,
+                         ppc440_pcix_map_irq, &s->irq, &s->busmem,
+                         get_system_io(), PCI_DEVFN(0, 0), 1, TYPE_PCI_BUS);
+
+    s->dev = pci_create_simple(h->bus, PCI_DEVFN(0, 0), "ppc4xx-host-bridge");
+
+    memory_region_init(&s->bm, OBJECT(s), "bm-ppc440-pcix", UINT64_MAX);
+    memory_region_add_subregion(&s->bm, 0x0, &s->busmem);
+    address_space_init(&s->bm_as, &s->bm, "pci-bm");
+    pci_setup_iommu(h->bus, ppc440_pcix_set_iommu, s);
+
+    memory_region_init(&s->container, OBJECT(s), "pci-container", PCI_ALL_SIZE);
+    memory_region_init_io(&h->conf_mem, OBJECT(s), &ppc440_pcix_host_conf_ops,
+                          h, "pci-conf-idx", 4);
+    memory_region_init_io(&h->data_mem, OBJECT(s), &pci_host_data_le_ops,
+                          h, "pci-conf-data", 4);
+    memory_region_init_io(&s->iomem, OBJECT(s), &pci_reg_ops, s,
+                          "pci.reg", PPC440_REG_SIZE);
+    memory_region_add_subregion(&s->container, PCIC0_CFGADDR, &h->conf_mem);
+    memory_region_add_subregion(&s->container, PCIC0_CFGDATA, &h->data_mem);
+    memory_region_add_subregion(&s->container, PPC440_REG_BASE, &s->iomem);
+    sysbus_init_mmio(sbd, &s->container);
+}
+
+static void ppc440_pcix_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->realize = ppc440_pcix_realize;
+    dc->reset = ppc440_pcix_reset;
+}
+
+static const TypeInfo ppc440_pcix_info = {
+    .name          = TYPE_PPC440_PCIX_HOST_BRIDGE,
+    .parent        = TYPE_PCI_HOST_BRIDGE,
+    .instance_size = sizeof(PPC440PCIXState),
+    .class_init    = ppc440_pcix_class_init,
+};
+
+static void ppc440_pcix_register_types(void)
+{
+    type_register_static(&ppc440_pcix_info);
+}
+
+type_init(ppc440_pcix_register_types)
diff --git a/hw/ppc/ppc440_uc.c b/hw/ppc/ppc440_uc.c
new file mode 100644
index 000000000..993e3ba95
--- /dev/null
+++ b/hw/ppc/ppc440_uc.c
@@ -0,0 +1,1377 @@
+/*
+ * QEMU PowerPC 440 embedded processors emulation
+ *
+ * Copyright (c) 2012 François Revol
+ * Copyright (c) 2016-2019 BALATON Zoltan
+ *
+ * This work is licensed under the GNU GPL license version 2 or later.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "qemu/log.h"
+#include "qemu/module.h"
+#include "hw/irq.h"
+#include "exec/memory.h"
+#include "hw/ppc/ppc.h"
+#include "hw/qdev-properties.h"
+#include "hw/pci/pci.h"
+#include "sysemu/block-backend.h"
+#include "sysemu/reset.h"
+#include "ppc440.h"
+#include "qom/object.h"
+
+/*****************************************************************************/
+/* L2 Cache as SRAM */
+/* FIXME:fix names */
+enum {
+    DCR_L2CACHE_BASE  = 0x30,
+    DCR_L2CACHE_CFG   = DCR_L2CACHE_BASE,
+    DCR_L2CACHE_CMD,
+    DCR_L2CACHE_ADDR,
+    DCR_L2CACHE_DATA,
+    DCR_L2CACHE_STAT,
+    DCR_L2CACHE_CVER,
+    DCR_L2CACHE_SNP0,
+    DCR_L2CACHE_SNP1,
+    DCR_L2CACHE_END   = DCR_L2CACHE_SNP1,
+};
+
+/* base is 460ex-specific, cf. U-Boot, ppc4xx-isram.h */
+enum {
+    DCR_ISRAM0_BASE   = 0x20,
+    DCR_ISRAM0_SB0CR  = DCR_ISRAM0_BASE,
+    DCR_ISRAM0_SB1CR,
+    DCR_ISRAM0_SB2CR,
+    DCR_ISRAM0_SB3CR,
+    DCR_ISRAM0_BEAR,
+    DCR_ISRAM0_BESR0,
+    DCR_ISRAM0_BESR1,
+    DCR_ISRAM0_PMEG,
+    DCR_ISRAM0_CID,
+    DCR_ISRAM0_REVID,
+    DCR_ISRAM0_DPC,
+    DCR_ISRAM0_END    = DCR_ISRAM0_DPC
+};
+
+enum {
+    DCR_ISRAM1_BASE   = 0xb0,
+    DCR_ISRAM1_SB0CR  = DCR_ISRAM1_BASE,
+    /* single bank */
+    DCR_ISRAM1_BEAR   = DCR_ISRAM1_BASE + 0x04,
+    DCR_ISRAM1_BESR0,
+    DCR_ISRAM1_BESR1,
+    DCR_ISRAM1_PMEG,
+    DCR_ISRAM1_CID,
+    DCR_ISRAM1_REVID,
+    DCR_ISRAM1_DPC,
+    DCR_ISRAM1_END    = DCR_ISRAM1_DPC
+};
+
+typedef struct ppc4xx_l2sram_t {
+    MemoryRegion bank[4];
+    uint32_t l2cache[8];
+    uint32_t isram0[11];
+} ppc4xx_l2sram_t;
+
+#ifdef MAP_L2SRAM
+static void l2sram_update_mappings(ppc4xx_l2sram_t *l2sram,
+                                   uint32_t isarc, uint32_t isacntl,
+                                   uint32_t dsarc, uint32_t dsacntl)
+{
+    if (l2sram->isarc != isarc ||
+        (l2sram->isacntl & 0x80000000) != (isacntl & 0x80000000)) {
+        if (l2sram->isacntl & 0x80000000) {
+            /* Unmap previously assigned memory region */
+            memory_region_del_subregion(get_system_memory(),
+                                        &l2sram->isarc_ram);
+        }
+        if (isacntl & 0x80000000) {
+            /* Map new instruction memory region */
+            memory_region_add_subregion(get_system_memory(), isarc,
+                                        &l2sram->isarc_ram);
+        }
+    }
+    if (l2sram->dsarc != dsarc ||
+        (l2sram->dsacntl & 0x80000000) != (dsacntl & 0x80000000)) {
+        if (l2sram->dsacntl & 0x80000000) {
+            /* Beware not to unmap the region we just mapped */
+            if (!(isacntl & 0x80000000) || l2sram->dsarc != isarc) {
+                /* Unmap previously assigned memory region */
+                memory_region_del_subregion(get_system_memory(),
+                                            &l2sram->dsarc_ram);
+            }
+        }
+        if (dsacntl & 0x80000000) {
+            /* Beware not to remap the region we just mapped */
+            if (!(isacntl & 0x80000000) || dsarc != isarc) {
+                /* Map new data memory region */
+                memory_region_add_subregion(get_system_memory(), dsarc,
+                                            &l2sram->dsarc_ram);
+            }
+        }
+    }
+}
+#endif
+
+static uint32_t dcr_read_l2sram(void *opaque, int dcrn)
+{
+    ppc4xx_l2sram_t *l2sram = opaque;
+    uint32_t ret = 0;
+
+    switch (dcrn) {
+    case DCR_L2CACHE_CFG:
+    case DCR_L2CACHE_CMD:
+    case DCR_L2CACHE_ADDR:
+    case DCR_L2CACHE_DATA:
+    case DCR_L2CACHE_STAT:
+    case DCR_L2CACHE_CVER:
+    case DCR_L2CACHE_SNP0:
+    case DCR_L2CACHE_SNP1:
+        ret = l2sram->l2cache[dcrn - DCR_L2CACHE_BASE];
+        break;
+
+    case DCR_ISRAM0_SB0CR:
+    case DCR_ISRAM0_SB1CR:
+    case DCR_ISRAM0_SB2CR:
+    case DCR_ISRAM0_SB3CR:
+    case DCR_ISRAM0_BEAR:
+    case DCR_ISRAM0_BESR0:
+    case DCR_ISRAM0_BESR1:
+    case DCR_ISRAM0_PMEG:
+    case DCR_ISRAM0_CID:
+    case DCR_ISRAM0_REVID:
+    case DCR_ISRAM0_DPC:
+        ret = l2sram->isram0[dcrn - DCR_ISRAM0_BASE];
+        break;
+
+    default:
+        break;
+    }
+
+    return ret;
+}
+
+static void dcr_write_l2sram(void *opaque, int dcrn, uint32_t val)
+{
+    /*ppc4xx_l2sram_t *l2sram = opaque;*/
+    /* FIXME: Actually handle L2 cache mapping */
+
+    switch (dcrn) {
+    case DCR_L2CACHE_CFG:
+    case DCR_L2CACHE_CMD:
+    case DCR_L2CACHE_ADDR:
+    case DCR_L2CACHE_DATA:
+    case DCR_L2CACHE_STAT:
+    case DCR_L2CACHE_CVER:
+    case DCR_L2CACHE_SNP0:
+    case DCR_L2CACHE_SNP1:
+        /*l2sram->l2cache[dcrn - DCR_L2CACHE_BASE] = val;*/
+        break;
+
+    case DCR_ISRAM0_SB0CR:
+    case DCR_ISRAM0_SB1CR:
+    case DCR_ISRAM0_SB2CR:
+    case DCR_ISRAM0_SB3CR:
+    case DCR_ISRAM0_BEAR:
+    case DCR_ISRAM0_BESR0:
+    case DCR_ISRAM0_BESR1:
+    case DCR_ISRAM0_PMEG:
+    case DCR_ISRAM0_CID:
+    case DCR_ISRAM0_REVID:
+    case DCR_ISRAM0_DPC:
+        /*l2sram->isram0[dcrn - DCR_L2CACHE_BASE] = val;*/
+        break;
+
+    case DCR_ISRAM1_SB0CR:
+    case DCR_ISRAM1_BEAR:
+    case DCR_ISRAM1_BESR0:
+    case DCR_ISRAM1_BESR1:
+    case DCR_ISRAM1_PMEG:
+    case DCR_ISRAM1_CID:
+    case DCR_ISRAM1_REVID:
+    case DCR_ISRAM1_DPC:
+        /*l2sram->isram1[dcrn - DCR_L2CACHE_BASE] = val;*/
+        break;
+    }
+    /*l2sram_update_mappings(l2sram, isarc, isacntl, dsarc, dsacntl);*/
+}
+
+static void l2sram_reset(void *opaque)
+{
+    ppc4xx_l2sram_t *l2sram = opaque;
+
+    memset(l2sram->l2cache, 0, sizeof(l2sram->l2cache));
+    l2sram->l2cache[DCR_L2CACHE_STAT - DCR_L2CACHE_BASE] = 0x80000000;
+    memset(l2sram->isram0, 0, sizeof(l2sram->isram0));
+    /*l2sram_update_mappings(l2sram, isarc, isacntl, dsarc, dsacntl);*/
+}
+
+void ppc4xx_l2sram_init(CPUPPCState *env)
+{
+    ppc4xx_l2sram_t *l2sram;
+
+    l2sram = g_malloc0(sizeof(*l2sram));
+    /* XXX: Size is 4*64kB for 460ex, cf. U-Boot, ppc4xx-isram.h */
+    memory_region_init_ram(&l2sram->bank[0], NULL, "ppc4xx.l2sram_bank0",
+                           64 * KiB, &error_abort);
+    memory_region_init_ram(&l2sram->bank[1], NULL, "ppc4xx.l2sram_bank1",
+                           64 * KiB, &error_abort);
+    memory_region_init_ram(&l2sram->bank[2], NULL, "ppc4xx.l2sram_bank2",
+                           64 * KiB, &error_abort);
+    memory_region_init_ram(&l2sram->bank[3], NULL, "ppc4xx.l2sram_bank3",
+                           64 * KiB, &error_abort);
+    qemu_register_reset(&l2sram_reset, l2sram);
+    ppc_dcr_register(env, DCR_L2CACHE_CFG,
+                     l2sram, &dcr_read_l2sram, &dcr_write_l2sram);
+    ppc_dcr_register(env, DCR_L2CACHE_CMD,
+                     l2sram, &dcr_read_l2sram, &dcr_write_l2sram);
+    ppc_dcr_register(env, DCR_L2CACHE_ADDR,
+                     l2sram, &dcr_read_l2sram, &dcr_write_l2sram);
+    ppc_dcr_register(env, DCR_L2CACHE_DATA,
+                     l2sram, &dcr_read_l2sram, &dcr_write_l2sram);
+    ppc_dcr_register(env, DCR_L2CACHE_STAT,
+                     l2sram, &dcr_read_l2sram, &dcr_write_l2sram);
+    ppc_dcr_register(env, DCR_L2CACHE_CVER,
+                     l2sram, &dcr_read_l2sram, &dcr_write_l2sram);
+    ppc_dcr_register(env, DCR_L2CACHE_SNP0,
+                     l2sram, &dcr_read_l2sram, &dcr_write_l2sram);
+    ppc_dcr_register(env, DCR_L2CACHE_SNP1,
+                     l2sram, &dcr_read_l2sram, &dcr_write_l2sram);
+
+    ppc_dcr_register(env, DCR_ISRAM0_SB0CR,
+                     l2sram, &dcr_read_l2sram, &dcr_write_l2sram);
+    ppc_dcr_register(env, DCR_ISRAM0_SB1CR,
+                     l2sram, &dcr_read_l2sram, &dcr_write_l2sram);
+    ppc_dcr_register(env, DCR_ISRAM0_SB2CR,
+                     l2sram, &dcr_read_l2sram, &dcr_write_l2sram);
+    ppc_dcr_register(env, DCR_ISRAM0_SB3CR,
+                     l2sram, &dcr_read_l2sram, &dcr_write_l2sram);
+    ppc_dcr_register(env, DCR_ISRAM0_PMEG,
+                     l2sram, &dcr_read_l2sram, &dcr_write_l2sram);
+    ppc_dcr_register(env, DCR_ISRAM0_DPC,
+                     l2sram, &dcr_read_l2sram, &dcr_write_l2sram);
+
+    ppc_dcr_register(env, DCR_ISRAM1_SB0CR,
+                     l2sram, &dcr_read_l2sram, &dcr_write_l2sram);
+    ppc_dcr_register(env, DCR_ISRAM1_PMEG,
+                     l2sram, &dcr_read_l2sram, &dcr_write_l2sram);
+    ppc_dcr_register(env, DCR_ISRAM1_DPC,
+                     l2sram, &dcr_read_l2sram, &dcr_write_l2sram);
+}
+
+/*****************************************************************************/
+/* Clocking Power on Reset */
+enum {
+    CPR0_CFGADDR = 0xC,
+    CPR0_CFGDATA = 0xD,
+
+    CPR0_PLLD = 0x060,
+    CPR0_PLBED = 0x080,
+    CPR0_OPBD = 0x0C0,
+    CPR0_PERD = 0x0E0,
+    CPR0_AHBD = 0x100,
+};
+
+typedef struct ppc4xx_cpr_t {
+    uint32_t addr;
+} ppc4xx_cpr_t;
+
+static uint32_t dcr_read_cpr(void *opaque, int dcrn)
+{
+    ppc4xx_cpr_t *cpr = opaque;
+    uint32_t ret = 0;
+
+    switch (dcrn) {
+    case CPR0_CFGADDR:
+        ret = cpr->addr;
+        break;
+    case CPR0_CFGDATA:
+        switch (cpr->addr) {
+        case CPR0_PLLD:
+            ret = (0xb5 << 24) | (1 << 16) | (9 << 8);
+            break;
+        case CPR0_PLBED:
+            ret = (5 << 24);
+            break;
+        case CPR0_OPBD:
+            ret = (2 << 24);
+            break;
+        case CPR0_PERD:
+        case CPR0_AHBD:
+            ret = (1 << 24);
+            break;
+        default:
+            break;
+        }
+        break;
+    default:
+        break;
+    }
+
+    return ret;
+}
+
+static void dcr_write_cpr(void *opaque, int dcrn, uint32_t val)
+{
+    ppc4xx_cpr_t *cpr = opaque;
+
+    switch (dcrn) {
+    case CPR0_CFGADDR:
+        cpr->addr = val;
+        break;
+    case CPR0_CFGDATA:
+        break;
+    default:
+        break;
+    }
+}
+
+static void ppc4xx_cpr_reset(void *opaque)
+{
+    ppc4xx_cpr_t *cpr = opaque;
+
+    cpr->addr = 0;
+}
+
+void ppc4xx_cpr_init(CPUPPCState *env)
+{
+    ppc4xx_cpr_t *cpr;
+
+    cpr = g_malloc0(sizeof(*cpr));
+    ppc_dcr_register(env, CPR0_CFGADDR, cpr, &dcr_read_cpr, &dcr_write_cpr);
+    ppc_dcr_register(env, CPR0_CFGDATA, cpr, &dcr_read_cpr, &dcr_write_cpr);
+    qemu_register_reset(ppc4xx_cpr_reset, cpr);
+}
+
+/*****************************************************************************/
+/* System DCRs */
+typedef struct ppc4xx_sdr_t ppc4xx_sdr_t;
+struct ppc4xx_sdr_t {
+    uint32_t addr;
+};
+
+enum {
+    SDR0_CFGADDR = 0x00e,
+    SDR0_CFGDATA,
+    SDR0_STRP0 = 0x020,
+    SDR0_STRP1,
+    SDR0_102 = 0x66,
+    SDR0_103,
+    SDR0_128 = 0x80,
+    SDR0_ECID3 = 0x083,
+    SDR0_DDR0 = 0x0e1,
+    SDR0_USB0 = 0x320,
+};
+
+enum {
+    PESDR0_LOOP = 0x303,
+    PESDR0_RCSSET,
+    PESDR0_RCSSTS,
+    PESDR0_RSTSTA = 0x310,
+    PESDR1_LOOP = 0x343,
+    PESDR1_RCSSET,
+    PESDR1_RCSSTS,
+    PESDR1_RSTSTA = 0x365,
+};
+
+#define SDR0_DDR0_DDRM_ENCODE(n)  ((((unsigned long)(n)) & 0x03) << 29)
+#define SDR0_DDR0_DDRM_DDR1       0x20000000
+#define SDR0_DDR0_DDRM_DDR2       0x40000000
+
+static uint32_t dcr_read_sdr(void *opaque, int dcrn)
+{
+    ppc4xx_sdr_t *sdr = opaque;
+    uint32_t ret = 0;
+
+    switch (dcrn) {
+    case SDR0_CFGADDR:
+        ret = sdr->addr;
+        break;
+    case SDR0_CFGDATA:
+        switch (sdr->addr) {
+        case SDR0_STRP0:
+            ret = (0xb5 << 8) | (1 << 4) | 9;
+            break;
+        case SDR0_STRP1:
+            ret = (5 << 29) | (2 << 26) | (1 << 24);
+            break;
+        case SDR0_ECID3:
+            ret = 1 << 20; /* No Security/Kasumi support */
+            break;
+        case SDR0_DDR0:
+            ret = SDR0_DDR0_DDRM_ENCODE(1) | SDR0_DDR0_DDRM_DDR1;
+            break;
+        case PESDR0_RCSSET:
+        case PESDR1_RCSSET:
+            ret = (1 << 24) | (1 << 16);
+            break;
+        case PESDR0_RCSSTS:
+        case PESDR1_RCSSTS:
+            ret = (1 << 16) | (1 << 12);
+            break;
+        case PESDR0_RSTSTA:
+        case PESDR1_RSTSTA:
+            ret = 1;
+            break;
+        case PESDR0_LOOP:
+        case PESDR1_LOOP:
+            ret = 1 << 12;
+            break;
+        default:
+            break;
+        }
+        break;
+    default:
+        break;
+    }
+
+    return ret;
+}
+
+static void dcr_write_sdr(void *opaque, int dcrn, uint32_t val)
+{
+    ppc4xx_sdr_t *sdr = opaque;
+
+    switch (dcrn) {
+    case SDR0_CFGADDR:
+        sdr->addr = val;
+        break;
+    case SDR0_CFGDATA:
+        switch (sdr->addr) {
+        case 0x00: /* B0CR */
+            break;
+        default:
+            break;
+        }
+        break;
+    default:
+        break;
+    }
+}
+
+static void sdr_reset(void *opaque)
+{
+    ppc4xx_sdr_t *sdr = opaque;
+
+    sdr->addr = 0;
+}
+
+void ppc4xx_sdr_init(CPUPPCState *env)
+{
+    ppc4xx_sdr_t *sdr;
+
+    sdr = g_malloc0(sizeof(*sdr));
+    qemu_register_reset(&sdr_reset, sdr);
+    ppc_dcr_register(env, SDR0_CFGADDR,
+                     sdr, &dcr_read_sdr, &dcr_write_sdr);
+    ppc_dcr_register(env, SDR0_CFGDATA,
+                     sdr, &dcr_read_sdr, &dcr_write_sdr);
+    ppc_dcr_register(env, SDR0_102,
+                     sdr, &dcr_read_sdr, &dcr_write_sdr);
+    ppc_dcr_register(env, SDR0_103,
+                     sdr, &dcr_read_sdr, &dcr_write_sdr);
+    ppc_dcr_register(env, SDR0_128,
+                     sdr, &dcr_read_sdr, &dcr_write_sdr);
+    ppc_dcr_register(env, SDR0_USB0,
+                     sdr, &dcr_read_sdr, &dcr_write_sdr);
+}
+
+/*****************************************************************************/
+/* SDRAM controller */
+typedef struct ppc440_sdram_t {
+    uint32_t addr;
+    int nbanks;
+    MemoryRegion containers[4]; /* used for clipping */
+    MemoryRegion *ram_memories;
+    hwaddr ram_bases[4];
+    hwaddr ram_sizes[4];
+    uint32_t bcr[4];
+} ppc440_sdram_t;
+
+enum {
+    SDRAM0_CFGADDR = 0x10,
+    SDRAM0_CFGDATA,
+    SDRAM_R0BAS = 0x40,
+    SDRAM_R1BAS,
+    SDRAM_R2BAS,
+    SDRAM_R3BAS,
+    SDRAM_CONF1HB = 0x45,
+    SDRAM_PLBADDULL = 0x4a,
+    SDRAM_CONF1LL = 0x4b,
+    SDRAM_CONFPATHB = 0x4f,
+    SDRAM_PLBADDUHB = 0x50,
+};
+
+static uint32_t sdram_bcr(hwaddr ram_base, hwaddr ram_size)
+{
+    uint32_t bcr;
+
+    switch (ram_size) {
+    case (8 * MiB):
+        bcr = 0xffc0;
+        break;
+    case (16 * MiB):
+        bcr = 0xff80;
+        break;
+    case (32 * MiB):
+        bcr = 0xff00;
+        break;
+    case (64 * MiB):
+        bcr = 0xfe00;
+        break;
+    case (128 * MiB):
+        bcr = 0xfc00;
+        break;
+    case (256 * MiB):
+        bcr = 0xf800;
+        break;
+    case (512 * MiB):
+        bcr = 0xf000;
+        break;
+    case (1 * GiB):
+        bcr = 0xe000;
+        break;
+    case (2 * GiB):
+        bcr = 0xc000;
+        break;
+    case (4 * GiB):
+        bcr = 0x8000;
+        break;
+    default:
+        error_report("invalid RAM size " TARGET_FMT_plx, ram_size);
+        return 0;
+    }
+    bcr |= ram_base >> 2 & 0xffe00000;
+    bcr |= 1;
+
+    return bcr;
+}
+
+static inline hwaddr sdram_base(uint32_t bcr)
+{
+    return (bcr & 0xffe00000) << 2;
+}
+
+static uint64_t sdram_size(uint32_t bcr)
+{
+    uint64_t size;
+    int sh;
+
+    sh = 1024 - ((bcr >> 6) & 0x3ff);
+    size = 8 * MiB * sh;
+
+    return size;
+}
+
+static void sdram_set_bcr(ppc440_sdram_t *sdram, int i,
+                          uint32_t bcr, int enabled)
+{
+    if (sdram->bcr[i] & 1) {
+        /* First unmap RAM if enabled */
+        memory_region_del_subregion(get_system_memory(),
+                                    &sdram->containers[i]);
+        memory_region_del_subregion(&sdram->containers[i],
+                                    &sdram->ram_memories[i]);
+        object_unparent(OBJECT(&sdram->containers[i]));
+    }
+    sdram->bcr[i] = bcr & 0xffe0ffc1;
+    if (enabled && (bcr & 1)) {
+        memory_region_init(&sdram->containers[i], NULL, "sdram-containers",
+                           sdram_size(bcr));
+        memory_region_add_subregion(&sdram->containers[i], 0,
+                                    &sdram->ram_memories[i]);
+        memory_region_add_subregion(get_system_memory(),
+                                    sdram_base(bcr),
+                                    &sdram->containers[i]);
+    }
+}
+
+static void sdram_map_bcr(ppc440_sdram_t *sdram)
+{
+    int i;
+
+    for (i = 0; i < sdram->nbanks; i++) {
+        if (sdram->ram_sizes[i] != 0) {
+            sdram_set_bcr(sdram, i, sdram_bcr(sdram->ram_bases[i],
+                                              sdram->ram_sizes[i]), 1);
+        } else {
+            sdram_set_bcr(sdram, i, 0, 0);
+        }
+    }
+}
+
+static uint32_t dcr_read_sdram(void *opaque, int dcrn)
+{
+    ppc440_sdram_t *sdram = opaque;
+    uint32_t ret = 0;
+
+    switch (dcrn) {
+    case SDRAM_R0BAS:
+    case SDRAM_R1BAS:
+    case SDRAM_R2BAS:
+    case SDRAM_R3BAS:
+        if (sdram->ram_sizes[dcrn - SDRAM_R0BAS]) {
+            ret = sdram_bcr(sdram->ram_bases[dcrn - SDRAM_R0BAS],
+                            sdram->ram_sizes[dcrn - SDRAM_R0BAS]);
+        }
+        break;
+    case SDRAM_CONF1HB:
+    case SDRAM_CONF1LL:
+    case SDRAM_CONFPATHB:
+    case SDRAM_PLBADDULL:
+    case SDRAM_PLBADDUHB:
+        break;
+    case SDRAM0_CFGADDR:
+        ret = sdram->addr;
+        break;
+    case SDRAM0_CFGDATA:
+        switch (sdram->addr) {
+        case 0x14: /* SDRAM_MCSTAT (405EX) */
+        case 0x1F:
+            ret = 0x80000000;
+            break;
+        case 0x21: /* SDRAM_MCOPT2 */
+            ret = 0x08000000;
+            break;
+        case 0x40: /* SDRAM_MB0CF */
+            ret = 0x00008001;
+            break;
+        case 0x7A: /* SDRAM_DLCR */
+            ret = 0x02000000;
+            break;
+        case 0xE1: /* SDR0_DDR0 */
+            ret = SDR0_DDR0_DDRM_ENCODE(1) | SDR0_DDR0_DDRM_DDR1;
+            break;
+        default:
+            break;
+        }
+        break;
+    default:
+        break;
+    }
+
+    return ret;
+}
+
+static void dcr_write_sdram(void *opaque, int dcrn, uint32_t val)
+{
+    ppc440_sdram_t *sdram = opaque;
+
+    switch (dcrn) {
+    case SDRAM_R0BAS:
+    case SDRAM_R1BAS:
+    case SDRAM_R2BAS:
+    case SDRAM_R3BAS:
+    case SDRAM_CONF1HB:
+    case SDRAM_CONF1LL:
+    case SDRAM_CONFPATHB:
+    case SDRAM_PLBADDULL:
+    case SDRAM_PLBADDUHB:
+        break;
+    case SDRAM0_CFGADDR:
+        sdram->addr = val;
+        break;
+    case SDRAM0_CFGDATA:
+        switch (sdram->addr) {
+        case 0x00: /* B0CR */
+            break;
+        default:
+            break;
+        }
+        break;
+    default:
+        break;
+    }
+}
+
+static void sdram_reset(void *opaque)
+{
+    ppc440_sdram_t *sdram = opaque;
+
+    sdram->addr = 0;
+}
+
+void ppc440_sdram_init(CPUPPCState *env, int nbanks,
+                       MemoryRegion *ram_memories,
+                       hwaddr *ram_bases, hwaddr *ram_sizes,
+                       int do_init)
+{
+    ppc440_sdram_t *sdram;
+
+    sdram = g_malloc0(sizeof(*sdram));
+    sdram->nbanks = nbanks;
+    sdram->ram_memories = ram_memories;
+    memcpy(sdram->ram_bases, ram_bases, nbanks * sizeof(hwaddr));
+    memcpy(sdram->ram_sizes, ram_sizes, nbanks * sizeof(hwaddr));
+    qemu_register_reset(&sdram_reset, sdram);
+    ppc_dcr_register(env, SDRAM0_CFGADDR,
+                     sdram, &dcr_read_sdram, &dcr_write_sdram);
+    ppc_dcr_register(env, SDRAM0_CFGDATA,
+                     sdram, &dcr_read_sdram, &dcr_write_sdram);
+    if (do_init) {
+        sdram_map_bcr(sdram);
+    }
+
+    ppc_dcr_register(env, SDRAM_R0BAS,
+                     sdram, &dcr_read_sdram, &dcr_write_sdram);
+    ppc_dcr_register(env, SDRAM_R1BAS,
+                     sdram, &dcr_read_sdram, &dcr_write_sdram);
+    ppc_dcr_register(env, SDRAM_R2BAS,
+                     sdram, &dcr_read_sdram, &dcr_write_sdram);
+    ppc_dcr_register(env, SDRAM_R3BAS,
+                     sdram, &dcr_read_sdram, &dcr_write_sdram);
+    ppc_dcr_register(env, SDRAM_CONF1HB,
+                     sdram, &dcr_read_sdram, &dcr_write_sdram);
+    ppc_dcr_register(env, SDRAM_PLBADDULL,
+                     sdram, &dcr_read_sdram, &dcr_write_sdram);
+    ppc_dcr_register(env, SDRAM_CONF1LL,
+                     sdram, &dcr_read_sdram, &dcr_write_sdram);
+    ppc_dcr_register(env, SDRAM_CONFPATHB,
+                     sdram, &dcr_read_sdram, &dcr_write_sdram);
+    ppc_dcr_register(env, SDRAM_PLBADDUHB,
+                     sdram, &dcr_read_sdram, &dcr_write_sdram);
+}
+
+/*****************************************************************************/
+/* PLB to AHB bridge */
+enum {
+    AHB_TOP    = 0xA4,
+    AHB_BOT    = 0xA5,
+};
+
+typedef struct ppc4xx_ahb_t {
+    uint32_t top;
+    uint32_t bot;
+} ppc4xx_ahb_t;
+
+static uint32_t dcr_read_ahb(void *opaque, int dcrn)
+{
+    ppc4xx_ahb_t *ahb = opaque;
+    uint32_t ret = 0;
+
+    switch (dcrn) {
+    case AHB_TOP:
+        ret = ahb->top;
+        break;
+    case AHB_BOT:
+        ret = ahb->bot;
+        break;
+    default:
+        break;
+    }
+
+    return ret;
+}
+
+static void dcr_write_ahb(void *opaque, int dcrn, uint32_t val)
+{
+    ppc4xx_ahb_t *ahb = opaque;
+
+    switch (dcrn) {
+    case AHB_TOP:
+        ahb->top = val;
+        break;
+    case AHB_BOT:
+        ahb->bot = val;
+        break;
+    }
+}
+
+static void ppc4xx_ahb_reset(void *opaque)
+{
+    ppc4xx_ahb_t *ahb = opaque;
+
+    /* No error */
+    ahb->top = 0;
+    ahb->bot = 0;
+}
+
+void ppc4xx_ahb_init(CPUPPCState *env)
+{
+    ppc4xx_ahb_t *ahb;
+
+    ahb = g_malloc0(sizeof(*ahb));
+    ppc_dcr_register(env, AHB_TOP, ahb, &dcr_read_ahb, &dcr_write_ahb);
+    ppc_dcr_register(env, AHB_BOT, ahb, &dcr_read_ahb, &dcr_write_ahb);
+    qemu_register_reset(ppc4xx_ahb_reset, ahb);
+}
+
+/*****************************************************************************/
+/* DMA controller */
+
+#define DMA0_CR_CE  (1 << 31)
+#define DMA0_CR_PW  (1 << 26 | 1 << 25)
+#define DMA0_CR_DAI (1 << 24)
+#define DMA0_CR_SAI (1 << 23)
+#define DMA0_CR_DEC (1 << 2)
+
+enum {
+    DMA0_CR  = 0x00,
+    DMA0_CT,
+    DMA0_SAH,
+    DMA0_SAL,
+    DMA0_DAH,
+    DMA0_DAL,
+    DMA0_SGH,
+    DMA0_SGL,
+
+    DMA0_SR  = 0x20,
+    DMA0_SGC = 0x23,
+    DMA0_SLP = 0x25,
+    DMA0_POL = 0x26,
+};
+
+typedef struct {
+    uint32_t cr;
+    uint32_t ct;
+    uint64_t sa;
+    uint64_t da;
+    uint64_t sg;
+} PPC4xxDmaChnl;
+
+typedef struct {
+    int base;
+    PPC4xxDmaChnl ch[4];
+    uint32_t sr;
+} PPC4xxDmaState;
+
+static uint32_t dcr_read_dma(void *opaque, int dcrn)
+{
+    PPC4xxDmaState *dma = opaque;
+    uint32_t val = 0;
+    int addr = dcrn - dma->base;
+    int chnl = addr / 8;
+
+    switch (addr) {
+    case 0x00 ... 0x1f:
+        switch (addr % 8) {
+        case DMA0_CR:
+            val = dma->ch[chnl].cr;
+            break;
+        case DMA0_CT:
+            val = dma->ch[chnl].ct;
+            break;
+        case DMA0_SAH:
+            val = dma->ch[chnl].sa >> 32;
+            break;
+        case DMA0_SAL:
+            val = dma->ch[chnl].sa;
+            break;
+        case DMA0_DAH:
+            val = dma->ch[chnl].da >> 32;
+            break;
+        case DMA0_DAL:
+            val = dma->ch[chnl].da;
+            break;
+        case DMA0_SGH:
+            val = dma->ch[chnl].sg >> 32;
+            break;
+        case DMA0_SGL:
+            val = dma->ch[chnl].sg;
+            break;
+        }
+        break;
+    case DMA0_SR:
+        val = dma->sr;
+        break;
+    default:
+        qemu_log_mask(LOG_UNIMP, "%s: unimplemented register %x (%d, %x)\n",
+                      __func__, dcrn, chnl, addr);
+    }
+
+    return val;
+}
+
+static void dcr_write_dma(void *opaque, int dcrn, uint32_t val)
+{
+    PPC4xxDmaState *dma = opaque;
+    int addr = dcrn - dma->base;
+    int chnl = addr / 8;
+
+    switch (addr) {
+    case 0x00 ... 0x1f:
+        switch (addr % 8) {
+        case DMA0_CR:
+            dma->ch[chnl].cr = val;
+            if (val & DMA0_CR_CE) {
+                int count = dma->ch[chnl].ct & 0xffff;
+
+                if (count) {
+                    int width, i, sidx, didx;
+                    uint8_t *rptr, *wptr;
+                    hwaddr rlen, wlen;
+
+                    sidx = didx = 0;
+                    width = 1 << ((val & DMA0_CR_PW) >> 25);
+                    rptr = cpu_physical_memory_map(dma->ch[chnl].sa, &rlen,
+                                                   false);
+                    wptr = cpu_physical_memory_map(dma->ch[chnl].da, &wlen,
+                                                   true);
+                    if (rptr && wptr) {
+                        if (!(val & DMA0_CR_DEC) &&
+                            val & DMA0_CR_SAI && val & DMA0_CR_DAI) {
+                            /* optimise common case */
+                            memmove(wptr, rptr, count * width);
+                            sidx = didx = count * width;
+                        } else {
+                            /* do it the slow way */
+                            for (sidx = didx = i = 0; i < count; i++) {
+                                uint64_t v = ldn_le_p(rptr + sidx, width);
+                                stn_le_p(wptr + didx, width, v);
+                                if (val & DMA0_CR_SAI) {
+                                    sidx += width;
+                                }
+                                if (val & DMA0_CR_DAI) {
+                                    didx += width;
+                                }
+                            }
+                        }
+                    }
+                    if (wptr) {
+                        cpu_physical_memory_unmap(wptr, wlen, 1, didx);
+                    }
+                    if (rptr) {
+                        cpu_physical_memory_unmap(rptr, rlen, 0, sidx);
+                    }
+                }
+            }
+            break;
+        case DMA0_CT:
+            dma->ch[chnl].ct = val;
+            break;
+        case DMA0_SAH:
+            dma->ch[chnl].sa &= 0xffffffffULL;
+            dma->ch[chnl].sa |= (uint64_t)val << 32;
+            break;
+        case DMA0_SAL:
+            dma->ch[chnl].sa &= 0xffffffff00000000ULL;
+            dma->ch[chnl].sa |= val;
+            break;
+        case DMA0_DAH:
+            dma->ch[chnl].da &= 0xffffffffULL;
+            dma->ch[chnl].da |= (uint64_t)val << 32;
+            break;
+        case DMA0_DAL:
+            dma->ch[chnl].da &= 0xffffffff00000000ULL;
+            dma->ch[chnl].da |= val;
+            break;
+        case DMA0_SGH:
+            dma->ch[chnl].sg &= 0xffffffffULL;
+            dma->ch[chnl].sg |= (uint64_t)val << 32;
+            break;
+        case DMA0_SGL:
+            dma->ch[chnl].sg &= 0xffffffff00000000ULL;
+            dma->ch[chnl].sg |= val;
+            break;
+        }
+        break;
+    case DMA0_SR:
+        dma->sr &= ~val;
+        break;
+    default:
+        qemu_log_mask(LOG_UNIMP, "%s: unimplemented register %x (%d, %x)\n",
+                      __func__, dcrn, chnl, addr);
+    }
+}
+
+static void ppc4xx_dma_reset(void *opaque)
+{
+    PPC4xxDmaState *dma = opaque;
+    int dma_base = dma->base;
+
+    memset(dma, 0, sizeof(*dma));
+    dma->base = dma_base;
+}
+
+void ppc4xx_dma_init(CPUPPCState *env, int dcr_base)
+{
+    PPC4xxDmaState *dma;
+    int i;
+
+    dma = g_malloc0(sizeof(*dma));
+    dma->base = dcr_base;
+    qemu_register_reset(&ppc4xx_dma_reset, dma);
+    for (i = 0; i < 4; i++) {
+        ppc_dcr_register(env, dcr_base + i * 8 + DMA0_CR,
+                         dma, &dcr_read_dma, &dcr_write_dma);
+        ppc_dcr_register(env, dcr_base + i * 8 + DMA0_CT,
+                         dma, &dcr_read_dma, &dcr_write_dma);
+        ppc_dcr_register(env, dcr_base + i * 8 + DMA0_SAH,
+                         dma, &dcr_read_dma, &dcr_write_dma);
+        ppc_dcr_register(env, dcr_base + i * 8 + DMA0_SAL,
+                         dma, &dcr_read_dma, &dcr_write_dma);
+        ppc_dcr_register(env, dcr_base + i * 8 + DMA0_DAH,
+                         dma, &dcr_read_dma, &dcr_write_dma);
+        ppc_dcr_register(env, dcr_base + i * 8 + DMA0_DAL,
+                         dma, &dcr_read_dma, &dcr_write_dma);
+        ppc_dcr_register(env, dcr_base + i * 8 + DMA0_SGH,
+                         dma, &dcr_read_dma, &dcr_write_dma);
+        ppc_dcr_register(env, dcr_base + i * 8 + DMA0_SGL,
+                         dma, &dcr_read_dma, &dcr_write_dma);
+    }
+    ppc_dcr_register(env, dcr_base + DMA0_SR,
+                     dma, &dcr_read_dma, &dcr_write_dma);
+    ppc_dcr_register(env, dcr_base + DMA0_SGC,
+                     dma, &dcr_read_dma, &dcr_write_dma);
+    ppc_dcr_register(env, dcr_base + DMA0_SLP,
+                     dma, &dcr_read_dma, &dcr_write_dma);
+    ppc_dcr_register(env, dcr_base + DMA0_POL,
+                     dma, &dcr_read_dma, &dcr_write_dma);
+}
+
+/*****************************************************************************/
+/* PCI Express controller */
+/* FIXME: This is not complete and does not work, only implemented partially
+ * to allow firmware and guests to find an empty bus. Cards should use PCI.
+ */
+#include "hw/pci/pcie_host.h"
+
+#define TYPE_PPC460EX_PCIE_HOST "ppc460ex-pcie-host"
+OBJECT_DECLARE_SIMPLE_TYPE(PPC460EXPCIEState, PPC460EX_PCIE_HOST)
+
+struct PPC460EXPCIEState {
+    PCIExpressHost host;
+
+    MemoryRegion iomem;
+    qemu_irq irq[4];
+    int32_t dcrn_base;
+
+    uint64_t cfg_base;
+    uint32_t cfg_mask;
+    uint64_t msg_base;
+    uint32_t msg_mask;
+    uint64_t omr1_base;
+    uint64_t omr1_mask;
+    uint64_t omr2_base;
+    uint64_t omr2_mask;
+    uint64_t omr3_base;
+    uint64_t omr3_mask;
+    uint64_t reg_base;
+    uint32_t reg_mask;
+    uint32_t special;
+    uint32_t cfg;
+};
+
+#define DCRN_PCIE0_BASE 0x100
+#define DCRN_PCIE1_BASE 0x120
+
+enum {
+    PEGPL_CFGBAH = 0x0,
+    PEGPL_CFGBAL,
+    PEGPL_CFGMSK,
+    PEGPL_MSGBAH,
+    PEGPL_MSGBAL,
+    PEGPL_MSGMSK,
+    PEGPL_OMR1BAH,
+    PEGPL_OMR1BAL,
+    PEGPL_OMR1MSKH,
+    PEGPL_OMR1MSKL,
+    PEGPL_OMR2BAH,
+    PEGPL_OMR2BAL,
+    PEGPL_OMR2MSKH,
+    PEGPL_OMR2MSKL,
+    PEGPL_OMR3BAH,
+    PEGPL_OMR3BAL,
+    PEGPL_OMR3MSKH,
+    PEGPL_OMR3MSKL,
+    PEGPL_REGBAH,
+    PEGPL_REGBAL,
+    PEGPL_REGMSK,
+    PEGPL_SPECIAL,
+    PEGPL_CFG,
+};
+
+static uint32_t dcr_read_pcie(void *opaque, int dcrn)
+{
+    PPC460EXPCIEState *state = opaque;
+    uint32_t ret = 0;
+
+    switch (dcrn - state->dcrn_base) {
+    case PEGPL_CFGBAH:
+        ret = state->cfg_base >> 32;
+        break;
+    case PEGPL_CFGBAL:
+        ret = state->cfg_base;
+        break;
+    case PEGPL_CFGMSK:
+        ret = state->cfg_mask;
+        break;
+    case PEGPL_MSGBAH:
+        ret = state->msg_base >> 32;
+        break;
+    case PEGPL_MSGBAL:
+        ret = state->msg_base;
+        break;
+    case PEGPL_MSGMSK:
+        ret = state->msg_mask;
+        break;
+    case PEGPL_OMR1BAH:
+        ret = state->omr1_base >> 32;
+        break;
+    case PEGPL_OMR1BAL:
+        ret = state->omr1_base;
+        break;
+    case PEGPL_OMR1MSKH:
+        ret = state->omr1_mask >> 32;
+        break;
+    case PEGPL_OMR1MSKL:
+        ret = state->omr1_mask;
+        break;
+    case PEGPL_OMR2BAH:
+        ret = state->omr2_base >> 32;
+        break;
+    case PEGPL_OMR2BAL:
+        ret = state->omr2_base;
+        break;
+    case PEGPL_OMR2MSKH:
+        ret = state->omr2_mask >> 32;
+        break;
+    case PEGPL_OMR2MSKL:
+        ret = state->omr3_mask;
+        break;
+    case PEGPL_OMR3BAH:
+        ret = state->omr3_base >> 32;
+        break;
+    case PEGPL_OMR3BAL:
+        ret = state->omr3_base;
+        break;
+    case PEGPL_OMR3MSKH:
+        ret = state->omr3_mask >> 32;
+        break;
+    case PEGPL_OMR3MSKL:
+        ret = state->omr3_mask;
+        break;
+    case PEGPL_REGBAH:
+        ret = state->reg_base >> 32;
+        break;
+    case PEGPL_REGBAL:
+        ret = state->reg_base;
+        break;
+    case PEGPL_REGMSK:
+        ret = state->reg_mask;
+        break;
+    case PEGPL_SPECIAL:
+        ret = state->special;
+        break;
+    case PEGPL_CFG:
+        ret = state->cfg;
+        break;
+    }
+
+    return ret;
+}
+
+static void dcr_write_pcie(void *opaque, int dcrn, uint32_t val)
+{
+    PPC460EXPCIEState *s = opaque;
+    uint64_t size;
+
+    switch (dcrn - s->dcrn_base) {
+    case PEGPL_CFGBAH:
+        s->cfg_base = ((uint64_t)val << 32) | (s->cfg_base & 0xffffffff);
+        break;
+    case PEGPL_CFGBAL:
+        s->cfg_base = (s->cfg_base & 0xffffffff00000000ULL) | val;
+        break;
+    case PEGPL_CFGMSK:
+        s->cfg_mask = val;
+        size = ~(val & 0xfffffffe) + 1;
+        pcie_host_mmcfg_update(PCIE_HOST_BRIDGE(s), val & 1, s->cfg_base, size);
+        break;
+    case PEGPL_MSGBAH:
+        s->msg_base = ((uint64_t)val << 32) | (s->msg_base & 0xffffffff);
+        break;
+    case PEGPL_MSGBAL:
+        s->msg_base = (s->msg_base & 0xffffffff00000000ULL) | val;
+        break;
+    case PEGPL_MSGMSK:
+        s->msg_mask = val;
+        break;
+    case PEGPL_OMR1BAH:
+        s->omr1_base = ((uint64_t)val << 32) | (s->omr1_base & 0xffffffff);
+        break;
+    case PEGPL_OMR1BAL:
+        s->omr1_base = (s->omr1_base & 0xffffffff00000000ULL) | val;
+        break;
+    case PEGPL_OMR1MSKH:
+        s->omr1_mask = ((uint64_t)val << 32) | (s->omr1_mask & 0xffffffff);
+        break;
+    case PEGPL_OMR1MSKL:
+        s->omr1_mask = (s->omr1_mask & 0xffffffff00000000ULL) | val;
+        break;
+    case PEGPL_OMR2BAH:
+        s->omr2_base = ((uint64_t)val << 32) | (s->omr2_base & 0xffffffff);
+        break;
+    case PEGPL_OMR2BAL:
+        s->omr2_base = (s->omr2_base & 0xffffffff00000000ULL) | val;
+        break;
+    case PEGPL_OMR2MSKH:
+        s->omr2_mask = ((uint64_t)val << 32) | (s->omr2_mask & 0xffffffff);
+        break;
+    case PEGPL_OMR2MSKL:
+        s->omr2_mask = (s->omr2_mask & 0xffffffff00000000ULL) | val;
+        break;
+    case PEGPL_OMR3BAH:
+        s->omr3_base = ((uint64_t)val << 32) | (s->omr3_base & 0xffffffff);
+        break;
+    case PEGPL_OMR3BAL:
+        s->omr3_base = (s->omr3_base & 0xffffffff00000000ULL) | val;
+        break;
+    case PEGPL_OMR3MSKH:
+        s->omr3_mask = ((uint64_t)val << 32) | (s->omr3_mask & 0xffffffff);
+        break;
+    case PEGPL_OMR3MSKL:
+        s->omr3_mask = (s->omr3_mask & 0xffffffff00000000ULL) | val;
+        break;
+    case PEGPL_REGBAH:
+        s->reg_base = ((uint64_t)val << 32) | (s->reg_base & 0xffffffff);
+        break;
+    case PEGPL_REGBAL:
+        s->reg_base = (s->reg_base & 0xffffffff00000000ULL) | val;
+        break;
+    case PEGPL_REGMSK:
+        s->reg_mask = val;
+        /* FIXME: how is size encoded? */
+        size = (val == 0x7001 ? 4096 : ~(val & 0xfffffffe) + 1);
+        break;
+    case PEGPL_SPECIAL:
+        s->special = val;
+        break;
+    case PEGPL_CFG:
+        s->cfg = val;
+        break;
+    }
+}
+
+static void ppc460ex_set_irq(void *opaque, int irq_num, int level)
+{
+       PPC460EXPCIEState *s = opaque;
+       qemu_set_irq(s->irq[irq_num], level);
+}
+
+static void ppc460ex_pcie_realize(DeviceState *dev, Error **errp)
+{
+    PPC460EXPCIEState *s = PPC460EX_PCIE_HOST(dev);
+    PCIHostState *pci = PCI_HOST_BRIDGE(dev);
+    int i, id;
+    char buf[16];
+
+    switch (s->dcrn_base) {
+    case DCRN_PCIE0_BASE:
+        id = 0;
+        break;
+    case DCRN_PCIE1_BASE:
+        id = 1;
+        break;
+    default:
+        error_setg(errp, "invalid PCIe DCRN base");
+        return;
+    }
+    snprintf(buf, sizeof(buf), "pcie%d-io", id);
+    memory_region_init(&s->iomem, OBJECT(s), buf, UINT64_MAX);
+    for (i = 0; i < 4; i++) {
+        sysbus_init_irq(SYS_BUS_DEVICE(dev), &s->irq[i]);
+    }
+    snprintf(buf, sizeof(buf), "pcie.%d", id);
+    pci->bus = pci_register_root_bus(DEVICE(s), buf, ppc460ex_set_irq,
+                                pci_swizzle_map_irq_fn, s, &s->iomem,
+                                get_system_io(), 0, 4, TYPE_PCIE_BUS);
+}
+
+static Property ppc460ex_pcie_props[] = {
+    DEFINE_PROP_INT32("dcrn-base", PPC460EXPCIEState, dcrn_base, -1),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void ppc460ex_pcie_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
+    dc->realize = ppc460ex_pcie_realize;
+    device_class_set_props(dc, ppc460ex_pcie_props);
+    dc->hotpluggable = false;
+}
+
+static const TypeInfo ppc460ex_pcie_host_info = {
+    .name = TYPE_PPC460EX_PCIE_HOST,
+    .parent = TYPE_PCIE_HOST_BRIDGE,
+    .instance_size = sizeof(PPC460EXPCIEState),
+    .class_init = ppc460ex_pcie_class_init,
+};
+
+static void ppc460ex_pcie_register(void)
+{
+    type_register_static(&ppc460ex_pcie_host_info);
+}
+
+type_init(ppc460ex_pcie_register)
+
+static void ppc460ex_pcie_register_dcrs(PPC460EXPCIEState *s, CPUPPCState *env)
+{
+    ppc_dcr_register(env, s->dcrn_base + PEGPL_CFGBAH, s,
+                     &dcr_read_pcie, &dcr_write_pcie);
+    ppc_dcr_register(env, s->dcrn_base + PEGPL_CFGBAL, s,
+                     &dcr_read_pcie, &dcr_write_pcie);
+    ppc_dcr_register(env, s->dcrn_base + PEGPL_CFGMSK, s,
+                     &dcr_read_pcie, &dcr_write_pcie);
+    ppc_dcr_register(env, s->dcrn_base + PEGPL_MSGBAH, s,
+                     &dcr_read_pcie, &dcr_write_pcie);
+    ppc_dcr_register(env, s->dcrn_base + PEGPL_MSGBAL, s,
+                     &dcr_read_pcie, &dcr_write_pcie);
+    ppc_dcr_register(env, s->dcrn_base + PEGPL_MSGMSK, s,
+                     &dcr_read_pcie, &dcr_write_pcie);
+    ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR1BAH, s,
+                     &dcr_read_pcie, &dcr_write_pcie);
+    ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR1BAL, s,
+                     &dcr_read_pcie, &dcr_write_pcie);
+    ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR1MSKH, s,
+                     &dcr_read_pcie, &dcr_write_pcie);
+    ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR1MSKL, s,
+                     &dcr_read_pcie, &dcr_write_pcie);
+    ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR2BAH, s,
+                     &dcr_read_pcie, &dcr_write_pcie);
+    ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR2BAL, s,
+                     &dcr_read_pcie, &dcr_write_pcie);
+    ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR2MSKH, s,
+                     &dcr_read_pcie, &dcr_write_pcie);
+    ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR2MSKL, s,
+                     &dcr_read_pcie, &dcr_write_pcie);
+    ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR3BAH, s,
+                     &dcr_read_pcie, &dcr_write_pcie);
+    ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR3BAL, s,
+                     &dcr_read_pcie, &dcr_write_pcie);
+    ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR3MSKH, s,
+                     &dcr_read_pcie, &dcr_write_pcie);
+    ppc_dcr_register(env, s->dcrn_base + PEGPL_OMR3MSKL, s,
+                     &dcr_read_pcie, &dcr_write_pcie);
+    ppc_dcr_register(env, s->dcrn_base + PEGPL_REGBAH, s,
+                     &dcr_read_pcie, &dcr_write_pcie);
+    ppc_dcr_register(env, s->dcrn_base + PEGPL_REGBAL, s,
+                     &dcr_read_pcie, &dcr_write_pcie);
+    ppc_dcr_register(env, s->dcrn_base + PEGPL_REGMSK, s,
+                     &dcr_read_pcie, &dcr_write_pcie);
+    ppc_dcr_register(env, s->dcrn_base + PEGPL_SPECIAL, s,
+                     &dcr_read_pcie, &dcr_write_pcie);
+    ppc_dcr_register(env, s->dcrn_base + PEGPL_CFG, s,
+                     &dcr_read_pcie, &dcr_write_pcie);
+}
+
+void ppc460ex_pcie_init(CPUPPCState *env)
+{
+    DeviceState *dev;
+
+    dev = qdev_new(TYPE_PPC460EX_PCIE_HOST);
+    qdev_prop_set_int32(dev, "dcrn-base", DCRN_PCIE0_BASE);
+    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+    ppc460ex_pcie_register_dcrs(PPC460EX_PCIE_HOST(dev), env);
+
+    dev = qdev_new(TYPE_PPC460EX_PCIE_HOST);
+    qdev_prop_set_int32(dev, "dcrn-base", DCRN_PCIE1_BASE);
+    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+    ppc460ex_pcie_register_dcrs(PPC460EX_PCIE_HOST(dev), env);
+}
diff --git a/hw/ppc/ppc4xx_devs.c b/hw/ppc/ppc4xx_devs.c
new file mode 100644
index 000000000..980c48944
--- /dev/null
+++ b/hw/ppc/ppc4xx_devs.c
@@ -0,0 +1,715 @@
+/*
+ * QEMU PowerPC 4xx embedded processors shared devices emulation
+ *
+ * Copyright (c) 2007 Jocelyn Mayer
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include "sysemu/reset.h"
+#include "cpu.h"
+#include "hw/irq.h"
+#include "hw/ppc/ppc.h"
+#include "hw/ppc/ppc4xx.h"
+#include "hw/intc/ppc-uic.h"
+#include "hw/qdev-properties.h"
+#include "qemu/log.h"
+#include "exec/address-spaces.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+
+/*#define DEBUG_UIC*/
+
+#ifdef DEBUG_UIC
+#  define LOG_UIC(...) qemu_log_mask(CPU_LOG_INT, ## __VA_ARGS__)
+#else
+#  define LOG_UIC(...) do { } while (0)
+#endif
+
+static void ppc4xx_reset(void *opaque)
+{
+    PowerPCCPU *cpu = opaque;
+
+    cpu_reset(CPU(cpu));
+}
+
+/*****************************************************************************/
+/* Generic PowerPC 4xx processor instantiation */
+PowerPCCPU *ppc4xx_init(const char *cpu_type,
+                        clk_setup_t *cpu_clk, clk_setup_t *tb_clk,
+                        uint32_t sysclk)
+{
+    PowerPCCPU *cpu;
+    CPUPPCState *env;
+
+    /* init CPUs */
+    cpu = POWERPC_CPU(cpu_create(cpu_type));
+    env = &cpu->env;
+
+    cpu_clk->cb = NULL; /* We don't care about CPU clock frequency changes */
+    cpu_clk->opaque = env;
+    /* Set time-base frequency to sysclk */
+    tb_clk->cb = ppc_40x_timers_init(env, sysclk, PPC_INTERRUPT_PIT);
+    tb_clk->opaque = env;
+    ppc_dcr_init(env, NULL, NULL);
+    /* Register qemu callbacks */
+    qemu_register_reset(ppc4xx_reset, cpu);
+
+    return cpu;
+}
+
+/*****************************************************************************/
+/* SDRAM controller */
+typedef struct ppc4xx_sdram_t ppc4xx_sdram_t;
+struct ppc4xx_sdram_t {
+    uint32_t addr;
+    int nbanks;
+    MemoryRegion containers[4]; /* used for clipping */
+    MemoryRegion *ram_memories;
+    hwaddr ram_bases[4];
+    hwaddr ram_sizes[4];
+    uint32_t besr0;
+    uint32_t besr1;
+    uint32_t bear;
+    uint32_t cfg;
+    uint32_t status;
+    uint32_t rtr;
+    uint32_t pmit;
+    uint32_t bcr[4];
+    uint32_t tr;
+    uint32_t ecccfg;
+    uint32_t eccesr;
+    qemu_irq irq;
+};
+
+enum {
+    SDRAM0_CFGADDR = 0x010,
+    SDRAM0_CFGDATA = 0x011,
+};
+
+/* XXX: TOFIX: some patches have made this code become inconsistent:
+ *      there are type inconsistencies, mixing hwaddr, target_ulong
+ *      and uint32_t
+ */
+static uint32_t sdram_bcr (hwaddr ram_base,
+                           hwaddr ram_size)
+{
+    uint32_t bcr;
+
+    switch (ram_size) {
+    case 4 * MiB:
+        bcr = 0x00000000;
+        break;
+    case 8 * MiB:
+        bcr = 0x00020000;
+        break;
+    case 16 * MiB:
+        bcr = 0x00040000;
+        break;
+    case 32 * MiB:
+        bcr = 0x00060000;
+        break;
+    case 64 * MiB:
+        bcr = 0x00080000;
+        break;
+    case 128 * MiB:
+        bcr = 0x000A0000;
+        break;
+    case 256 * MiB:
+        bcr = 0x000C0000;
+        break;
+    default:
+        printf("%s: invalid RAM size " TARGET_FMT_plx "\n", __func__,
+               ram_size);
+        return 0x00000000;
+    }
+    bcr |= ram_base & 0xFF800000;
+    bcr |= 1;
+
+    return bcr;
+}
+
+static inline hwaddr sdram_base(uint32_t bcr)
+{
+    return bcr & 0xFF800000;
+}
+
+static target_ulong sdram_size (uint32_t bcr)
+{
+    target_ulong size;
+    int sh;
+
+    sh = (bcr >> 17) & 0x7;
+    if (sh == 7)
+        size = -1;
+    else
+        size = (4 * MiB) << sh;
+
+    return size;
+}
+
+static void sdram_set_bcr(ppc4xx_sdram_t *sdram, int i,
+                          uint32_t bcr, int enabled)
+{
+    if (sdram->bcr[i] & 0x00000001) {
+        /* Unmap RAM */
+#ifdef DEBUG_SDRAM
+        printf("%s: unmap RAM area " TARGET_FMT_plx " " TARGET_FMT_lx "\n",
+               __func__, sdram_base(sdram->bcr[i]), sdram_size(sdram->bcr[i]));
+#endif
+        memory_region_del_subregion(get_system_memory(),
+                                    &sdram->containers[i]);
+        memory_region_del_subregion(&sdram->containers[i],
+                                    &sdram->ram_memories[i]);
+        object_unparent(OBJECT(&sdram->containers[i]));
+    }
+    sdram->bcr[i] = bcr & 0xFFDEE001;
+    if (enabled && (bcr & 0x00000001)) {
+#ifdef DEBUG_SDRAM
+        printf("%s: Map RAM area " TARGET_FMT_plx " " TARGET_FMT_lx "\n",
+               __func__, sdram_base(bcr), sdram_size(bcr));
+#endif
+        memory_region_init(&sdram->containers[i], NULL, "sdram-containers",
+                           sdram_size(bcr));
+        memory_region_add_subregion(&sdram->containers[i], 0,
+                                    &sdram->ram_memories[i]);
+        memory_region_add_subregion(get_system_memory(),
+                                    sdram_base(bcr),
+                                    &sdram->containers[i]);
+    }
+}
+
+static void sdram_map_bcr (ppc4xx_sdram_t *sdram)
+{
+    int i;
+
+    for (i = 0; i < sdram->nbanks; i++) {
+        if (sdram->ram_sizes[i] != 0) {
+            sdram_set_bcr(sdram, i, sdram_bcr(sdram->ram_bases[i],
+                                              sdram->ram_sizes[i]), 1);
+        } else {
+            sdram_set_bcr(sdram, i, 0x00000000, 0);
+        }
+    }
+}
+
+static void sdram_unmap_bcr (ppc4xx_sdram_t *sdram)
+{
+    int i;
+
+    for (i = 0; i < sdram->nbanks; i++) {
+#ifdef DEBUG_SDRAM
+        printf("%s: Unmap RAM area " TARGET_FMT_plx " " TARGET_FMT_lx "\n",
+               __func__, sdram_base(sdram->bcr[i]), sdram_size(sdram->bcr[i]));
+#endif
+        memory_region_del_subregion(get_system_memory(),
+                                    &sdram->ram_memories[i]);
+    }
+}
+
+static uint32_t dcr_read_sdram (void *opaque, int dcrn)
+{
+    ppc4xx_sdram_t *sdram;
+    uint32_t ret;
+
+    sdram = opaque;
+    switch (dcrn) {
+    case SDRAM0_CFGADDR:
+        ret = sdram->addr;
+        break;
+    case SDRAM0_CFGDATA:
+        switch (sdram->addr) {
+        case 0x00: /* SDRAM_BESR0 */
+            ret = sdram->besr0;
+            break;
+        case 0x08: /* SDRAM_BESR1 */
+            ret = sdram->besr1;
+            break;
+        case 0x10: /* SDRAM_BEAR */
+            ret = sdram->bear;
+            break;
+        case 0x20: /* SDRAM_CFG */
+            ret = sdram->cfg;
+            break;
+        case 0x24: /* SDRAM_STATUS */
+            ret = sdram->status;
+            break;
+        case 0x30: /* SDRAM_RTR */
+            ret = sdram->rtr;
+            break;
+        case 0x34: /* SDRAM_PMIT */
+            ret = sdram->pmit;
+            break;
+        case 0x40: /* SDRAM_B0CR */
+            ret = sdram->bcr[0];
+            break;
+        case 0x44: /* SDRAM_B1CR */
+            ret = sdram->bcr[1];
+            break;
+        case 0x48: /* SDRAM_B2CR */
+            ret = sdram->bcr[2];
+            break;
+        case 0x4C: /* SDRAM_B3CR */
+            ret = sdram->bcr[3];
+            break;
+        case 0x80: /* SDRAM_TR */
+            ret = -1; /* ? */
+            break;
+        case 0x94: /* SDRAM_ECCCFG */
+            ret = sdram->ecccfg;
+            break;
+        case 0x98: /* SDRAM_ECCESR */
+            ret = sdram->eccesr;
+            break;
+        default: /* Error */
+            ret = -1;
+            break;
+        }
+        break;
+    default:
+        /* Avoid gcc warning */
+        ret = 0x00000000;
+        break;
+    }
+
+    return ret;
+}
+
+static void dcr_write_sdram (void *opaque, int dcrn, uint32_t val)
+{
+    ppc4xx_sdram_t *sdram;
+
+    sdram = opaque;
+    switch (dcrn) {
+    case SDRAM0_CFGADDR:
+        sdram->addr = val;
+        break;
+    case SDRAM0_CFGDATA:
+        switch (sdram->addr) {
+        case 0x00: /* SDRAM_BESR0 */
+            sdram->besr0 &= ~val;
+            break;
+        case 0x08: /* SDRAM_BESR1 */
+            sdram->besr1 &= ~val;
+            break;
+        case 0x10: /* SDRAM_BEAR */
+            sdram->bear = val;
+            break;
+        case 0x20: /* SDRAM_CFG */
+            val &= 0xFFE00000;
+            if (!(sdram->cfg & 0x80000000) && (val & 0x80000000)) {
+#ifdef DEBUG_SDRAM
+                printf("%s: enable SDRAM controller\n", __func__);
+#endif
+                /* validate all RAM mappings */
+                sdram_map_bcr(sdram);
+                sdram->status &= ~0x80000000;
+            } else if ((sdram->cfg & 0x80000000) && !(val & 0x80000000)) {
+#ifdef DEBUG_SDRAM
+                printf("%s: disable SDRAM controller\n", __func__);
+#endif
+                /* invalidate all RAM mappings */
+                sdram_unmap_bcr(sdram);
+                sdram->status |= 0x80000000;
+            }
+            if (!(sdram->cfg & 0x40000000) && (val & 0x40000000))
+                sdram->status |= 0x40000000;
+            else if ((sdram->cfg & 0x40000000) && !(val & 0x40000000))
+                sdram->status &= ~0x40000000;
+            sdram->cfg = val;
+            break;
+        case 0x24: /* SDRAM_STATUS */
+            /* Read-only register */
+            break;
+        case 0x30: /* SDRAM_RTR */
+            sdram->rtr = val & 0x3FF80000;
+            break;
+        case 0x34: /* SDRAM_PMIT */
+            sdram->pmit = (val & 0xF8000000) | 0x07C00000;
+            break;
+        case 0x40: /* SDRAM_B0CR */
+            sdram_set_bcr(sdram, 0, val, sdram->cfg & 0x80000000);
+            break;
+        case 0x44: /* SDRAM_B1CR */
+            sdram_set_bcr(sdram, 1, val, sdram->cfg & 0x80000000);
+            break;
+        case 0x48: /* SDRAM_B2CR */
+            sdram_set_bcr(sdram, 2, val, sdram->cfg & 0x80000000);
+            break;
+        case 0x4C: /* SDRAM_B3CR */
+            sdram_set_bcr(sdram, 3, val, sdram->cfg & 0x80000000);
+            break;
+        case 0x80: /* SDRAM_TR */
+            sdram->tr = val & 0x018FC01F;
+            break;
+        case 0x94: /* SDRAM_ECCCFG */
+            sdram->ecccfg = val & 0x00F00000;
+            break;
+        case 0x98: /* SDRAM_ECCESR */
+            val &= 0xFFF0F000;
+            if (sdram->eccesr == 0 && val != 0)
+                qemu_irq_raise(sdram->irq);
+            else if (sdram->eccesr != 0 && val == 0)
+                qemu_irq_lower(sdram->irq);
+            sdram->eccesr = val;
+            break;
+        default: /* Error */
+            break;
+        }
+        break;
+    }
+}
+
+static void sdram_reset (void *opaque)
+{
+    ppc4xx_sdram_t *sdram;
+
+    sdram = opaque;
+    sdram->addr = 0x00000000;
+    sdram->bear = 0x00000000;
+    sdram->besr0 = 0x00000000; /* No error */
+    sdram->besr1 = 0x00000000; /* No error */
+    sdram->cfg = 0x00000000;
+    sdram->ecccfg = 0x00000000; /* No ECC */
+    sdram->eccesr = 0x00000000; /* No error */
+    sdram->pmit = 0x07C00000;
+    sdram->rtr = 0x05F00000;
+    sdram->tr = 0x00854009;
+    /* We pre-initialize RAM banks */
+    sdram->status = 0x00000000;
+    sdram->cfg = 0x00800000;
+}
+
+void ppc4xx_sdram_init (CPUPPCState *env, qemu_irq irq, int nbanks,
+                        MemoryRegion *ram_memories,
+                        hwaddr *ram_bases,
+                        hwaddr *ram_sizes,
+                        int do_init)
+{
+    ppc4xx_sdram_t *sdram;
+
+    sdram = g_malloc0(sizeof(ppc4xx_sdram_t));
+    sdram->irq = irq;
+    sdram->nbanks = nbanks;
+    sdram->ram_memories = ram_memories;
+    memset(sdram->ram_bases, 0, 4 * sizeof(hwaddr));
+    memcpy(sdram->ram_bases, ram_bases,
+           nbanks * sizeof(hwaddr));
+    memset(sdram->ram_sizes, 0, 4 * sizeof(hwaddr));
+    memcpy(sdram->ram_sizes, ram_sizes,
+           nbanks * sizeof(hwaddr));
+    qemu_register_reset(&sdram_reset, sdram);
+    ppc_dcr_register(env, SDRAM0_CFGADDR,
+                     sdram, &dcr_read_sdram, &dcr_write_sdram);
+    ppc_dcr_register(env, SDRAM0_CFGDATA,
+                     sdram, &dcr_read_sdram, &dcr_write_sdram);
+    if (do_init)
+        sdram_map_bcr(sdram);
+}
+
+/*
+ * Split RAM between SDRAM banks.
+ *
+ * sdram_bank_sizes[] must be in descending order, that is sizes[i] > sizes[i+1]
+ * and must be 0-terminated.
+ *
+ * The 4xx SDRAM controller supports a small number of banks, and each bank
+ * must be one of a small set of sizes. The number of banks and the supported
+ * sizes varies by SoC.
+ */
+void ppc4xx_sdram_banks(MemoryRegion *ram, int nr_banks,
+                        MemoryRegion ram_memories[],
+                        hwaddr ram_bases[], hwaddr ram_sizes[],
+                        const ram_addr_t sdram_bank_sizes[])
+{
+    ram_addr_t size_left = memory_region_size(ram);
+    ram_addr_t base = 0;
+    ram_addr_t bank_size;
+    int i;
+    int j;
+
+    for (i = 0; i < nr_banks; i++) {
+        for (j = 0; sdram_bank_sizes[j] != 0; j++) {
+            bank_size = sdram_bank_sizes[j];
+            if (bank_size <= size_left) {
+                char name[32];
+
+                ram_bases[i] = base;
+                ram_sizes[i] = bank_size;
+                base += bank_size;
+                size_left -= bank_size;
+                snprintf(name, sizeof(name), "ppc4xx.sdram%d", i);
+                memory_region_init_alias(&ram_memories[i], NULL, name, ram,
+                                         ram_bases[i], ram_sizes[i]);
+                break;
+            }
+        }
+        if (!size_left) {
+            /* No need to use the remaining banks. */
+            break;
+        }
+    }
+
+    if (size_left) {
+        ram_addr_t used_size = memory_region_size(ram) - size_left;
+        GString *s = g_string_new(NULL);
+
+        for (i = 0; sdram_bank_sizes[i]; i++) {
+            g_string_append_printf(s, "%" PRIi64 "%s",
+                                   sdram_bank_sizes[i] / MiB,
+                                   sdram_bank_sizes[i + 1] ? ", " : "");
+        }
+        error_report("at most %d bank%s of %s MiB each supported",
+                     nr_banks, nr_banks == 1 ? "" : "s", s->str);
+        error_printf("Possible valid RAM size: %" PRIi64 " MiB \n",
+            used_size ? used_size / MiB : sdram_bank_sizes[i - 1] / MiB);
+
+        g_string_free(s, true);
+        exit(EXIT_FAILURE);
+    }
+}
+
+/*****************************************************************************/
+/* MAL */
+
+enum {
+    MAL0_CFG      = 0x180,
+    MAL0_ESR      = 0x181,
+    MAL0_IER      = 0x182,
+    MAL0_TXCASR   = 0x184,
+    MAL0_TXCARR   = 0x185,
+    MAL0_TXEOBISR = 0x186,
+    MAL0_TXDEIR   = 0x187,
+    MAL0_RXCASR   = 0x190,
+    MAL0_RXCARR   = 0x191,
+    MAL0_RXEOBISR = 0x192,
+    MAL0_RXDEIR   = 0x193,
+    MAL0_TXCTP0R  = 0x1A0,
+    MAL0_RXCTP0R  = 0x1C0,
+    MAL0_RCBS0    = 0x1E0,
+    MAL0_RCBS1    = 0x1E1,
+};
+
+typedef struct ppc4xx_mal_t ppc4xx_mal_t;
+struct ppc4xx_mal_t {
+    qemu_irq irqs[4];
+    uint32_t cfg;
+    uint32_t esr;
+    uint32_t ier;
+    uint32_t txcasr;
+    uint32_t txcarr;
+    uint32_t txeobisr;
+    uint32_t txdeir;
+    uint32_t rxcasr;
+    uint32_t rxcarr;
+    uint32_t rxeobisr;
+    uint32_t rxdeir;
+    uint32_t *txctpr;
+    uint32_t *rxctpr;
+    uint32_t *rcbs;
+    uint8_t  txcnum;
+    uint8_t  rxcnum;
+};
+
+static void ppc4xx_mal_reset(void *opaque)
+{
+    ppc4xx_mal_t *mal;
+
+    mal = opaque;
+    mal->cfg = 0x0007C000;
+    mal->esr = 0x00000000;
+    mal->ier = 0x00000000;
+    mal->rxcasr = 0x00000000;
+    mal->rxdeir = 0x00000000;
+    mal->rxeobisr = 0x00000000;
+    mal->txcasr = 0x00000000;
+    mal->txdeir = 0x00000000;
+    mal->txeobisr = 0x00000000;
+}
+
+static uint32_t dcr_read_mal(void *opaque, int dcrn)
+{
+    ppc4xx_mal_t *mal;
+    uint32_t ret;
+
+    mal = opaque;
+    switch (dcrn) {
+    case MAL0_CFG:
+        ret = mal->cfg;
+        break;
+    case MAL0_ESR:
+        ret = mal->esr;
+        break;
+    case MAL0_IER:
+        ret = mal->ier;
+        break;
+    case MAL0_TXCASR:
+        ret = mal->txcasr;
+        break;
+    case MAL0_TXCARR:
+        ret = mal->txcarr;
+        break;
+    case MAL0_TXEOBISR:
+        ret = mal->txeobisr;
+        break;
+    case MAL0_TXDEIR:
+        ret = mal->txdeir;
+        break;
+    case MAL0_RXCASR:
+        ret = mal->rxcasr;
+        break;
+    case MAL0_RXCARR:
+        ret = mal->rxcarr;
+        break;
+    case MAL0_RXEOBISR:
+        ret = mal->rxeobisr;
+        break;
+    case MAL0_RXDEIR:
+        ret = mal->rxdeir;
+        break;
+    default:
+        ret = 0;
+        break;
+    }
+    if (dcrn >= MAL0_TXCTP0R && dcrn < MAL0_TXCTP0R + mal->txcnum) {
+        ret = mal->txctpr[dcrn - MAL0_TXCTP0R];
+    }
+    if (dcrn >= MAL0_RXCTP0R && dcrn < MAL0_RXCTP0R + mal->rxcnum) {
+        ret = mal->rxctpr[dcrn - MAL0_RXCTP0R];
+    }
+    if (dcrn >= MAL0_RCBS0 && dcrn < MAL0_RCBS0 + mal->rxcnum) {
+        ret = mal->rcbs[dcrn - MAL0_RCBS0];
+    }
+
+    return ret;
+}
+
+static void dcr_write_mal(void *opaque, int dcrn, uint32_t val)
+{
+    ppc4xx_mal_t *mal;
+
+    mal = opaque;
+    switch (dcrn) {
+    case MAL0_CFG:
+        if (val & 0x80000000) {
+            ppc4xx_mal_reset(mal);
+        }
+        mal->cfg = val & 0x00FFC087;
+        break;
+    case MAL0_ESR:
+        /* Read/clear */
+        mal->esr &= ~val;
+        break;
+    case MAL0_IER:
+        mal->ier = val & 0x0000001F;
+        break;
+    case MAL0_TXCASR:
+        mal->txcasr = val & 0xF0000000;
+        break;
+    case MAL0_TXCARR:
+        mal->txcarr = val & 0xF0000000;
+        break;
+    case MAL0_TXEOBISR:
+        /* Read/clear */
+        mal->txeobisr &= ~val;
+        break;
+    case MAL0_TXDEIR:
+        /* Read/clear */
+        mal->txdeir &= ~val;
+        break;
+    case MAL0_RXCASR:
+        mal->rxcasr = val & 0xC0000000;
+        break;
+    case MAL0_RXCARR:
+        mal->rxcarr = val & 0xC0000000;
+        break;
+    case MAL0_RXEOBISR:
+        /* Read/clear */
+        mal->rxeobisr &= ~val;
+        break;
+    case MAL0_RXDEIR:
+        /* Read/clear */
+        mal->rxdeir &= ~val;
+        break;
+    }
+    if (dcrn >= MAL0_TXCTP0R && dcrn < MAL0_TXCTP0R + mal->txcnum) {
+        mal->txctpr[dcrn - MAL0_TXCTP0R] = val;
+    }
+    if (dcrn >= MAL0_RXCTP0R && dcrn < MAL0_RXCTP0R + mal->rxcnum) {
+        mal->rxctpr[dcrn - MAL0_RXCTP0R] = val;
+    }
+    if (dcrn >= MAL0_RCBS0 && dcrn < MAL0_RCBS0 + mal->rxcnum) {
+        mal->rcbs[dcrn - MAL0_RCBS0] = val & 0x000000FF;
+    }
+}
+
+void ppc4xx_mal_init(CPUPPCState *env, uint8_t txcnum, uint8_t rxcnum,
+                     qemu_irq irqs[4])
+{
+    ppc4xx_mal_t *mal;
+    int i;
+
+    assert(txcnum <= 32 && rxcnum <= 32);
+    mal = g_malloc0(sizeof(*mal));
+    mal->txcnum = txcnum;
+    mal->rxcnum = rxcnum;
+    mal->txctpr = g_new0(uint32_t, txcnum);
+    mal->rxctpr = g_new0(uint32_t, rxcnum);
+    mal->rcbs = g_new0(uint32_t, rxcnum);
+    for (i = 0; i < 4; i++) {
+        mal->irqs[i] = irqs[i];
+    }
+    qemu_register_reset(&ppc4xx_mal_reset, mal);
+    ppc_dcr_register(env, MAL0_CFG,
+                     mal, &dcr_read_mal, &dcr_write_mal);
+    ppc_dcr_register(env, MAL0_ESR,
+                     mal, &dcr_read_mal, &dcr_write_mal);
+    ppc_dcr_register(env, MAL0_IER,
+                     mal, &dcr_read_mal, &dcr_write_mal);
+    ppc_dcr_register(env, MAL0_TXCASR,
+                     mal, &dcr_read_mal, &dcr_write_mal);
+    ppc_dcr_register(env, MAL0_TXCARR,
+                     mal, &dcr_read_mal, &dcr_write_mal);
+    ppc_dcr_register(env, MAL0_TXEOBISR,
+                     mal, &dcr_read_mal, &dcr_write_mal);
+    ppc_dcr_register(env, MAL0_TXDEIR,
+                     mal, &dcr_read_mal, &dcr_write_mal);
+    ppc_dcr_register(env, MAL0_RXCASR,
+                     mal, &dcr_read_mal, &dcr_write_mal);
+    ppc_dcr_register(env, MAL0_RXCARR,
+                     mal, &dcr_read_mal, &dcr_write_mal);
+    ppc_dcr_register(env, MAL0_RXEOBISR,
+                     mal, &dcr_read_mal, &dcr_write_mal);
+    ppc_dcr_register(env, MAL0_RXDEIR,
+                     mal, &dcr_read_mal, &dcr_write_mal);
+    for (i = 0; i < txcnum; i++) {
+        ppc_dcr_register(env, MAL0_TXCTP0R + i,
+                         mal, &dcr_read_mal, &dcr_write_mal);
+    }
+    for (i = 0; i < rxcnum; i++) {
+        ppc_dcr_register(env, MAL0_RXCTP0R + i,
+                         mal, &dcr_read_mal, &dcr_write_mal);
+    }
+    for (i = 0; i < rxcnum; i++) {
+        ppc_dcr_register(env, MAL0_RCBS0 + i,
+                         mal, &dcr_read_mal, &dcr_write_mal);
+    }
+}
diff --git a/hw/ppc/ppc4xx_pci.c b/hw/ppc/ppc4xx_pci.c
new file mode 100644
index 000000000..304a29349
--- /dev/null
+++ b/hw/ppc/ppc4xx_pci.c
@@ -0,0 +1,389 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+/* This file implements emulation of the 32-bit PCI controller found in some
+ * 4xx SoCs, such as the 440EP. */
+
+#include "qemu/osdep.h"
+#include "hw/irq.h"
+#include "hw/ppc/ppc.h"
+#include "hw/ppc/ppc4xx.h"
+#include "migration/vmstate.h"
+#include "qemu/module.h"
+#include "sysemu/reset.h"
+#include "hw/pci/pci.h"
+#include "hw/pci/pci_host.h"
+#include "trace.h"
+#include "qom/object.h"
+
+struct PCIMasterMap {
+    uint32_t la;
+    uint32_t ma;
+    uint32_t pcila;
+    uint32_t pciha;
+};
+
+struct PCITargetMap {
+    uint32_t ms;
+    uint32_t la;
+};
+
+OBJECT_DECLARE_SIMPLE_TYPE(PPC4xxPCIState, PPC4xx_PCI_HOST_BRIDGE)
+
+#define PPC4xx_PCI_NR_PMMS 3
+#define PPC4xx_PCI_NR_PTMS 2
+
+#define PPC4xx_PCI_NUM_DEVS 5
+
+struct PPC4xxPCIState {
+    PCIHostState parent_obj;
+
+    struct PCIMasterMap pmm[PPC4xx_PCI_NR_PMMS];
+    struct PCITargetMap ptm[PPC4xx_PCI_NR_PTMS];
+    qemu_irq irq[PPC4xx_PCI_NUM_DEVS];
+
+    MemoryRegion container;
+    MemoryRegion iomem;
+};
+
+#define PCIC0_CFGADDR       0x0
+#define PCIC0_CFGDATA       0x4
+
+/* PLB Memory Map (PMM) registers specify which PLB addresses are translated to
+ * PCI accesses. */
+#define PCIL0_PMM0LA        0x0
+#define PCIL0_PMM0MA        0x4
+#define PCIL0_PMM0PCILA     0x8
+#define PCIL0_PMM0PCIHA     0xc
+#define PCIL0_PMM1LA        0x10
+#define PCIL0_PMM1MA        0x14
+#define PCIL0_PMM1PCILA     0x18
+#define PCIL0_PMM1PCIHA     0x1c
+#define PCIL0_PMM2LA        0x20
+#define PCIL0_PMM2MA        0x24
+#define PCIL0_PMM2PCILA     0x28
+#define PCIL0_PMM2PCIHA     0x2c
+
+/* PCI Target Map (PTM) registers specify which PCI addresses are translated to
+ * PLB accesses. */
+#define PCIL0_PTM1MS        0x30
+#define PCIL0_PTM1LA        0x34
+#define PCIL0_PTM2MS        0x38
+#define PCIL0_PTM2LA        0x3c
+#define PCI_REG_BASE        0x800000
+#define PCI_REG_SIZE        0x40
+
+#define PCI_ALL_SIZE        (PCI_REG_BASE + PCI_REG_SIZE)
+
+static void ppc4xx_pci_reg_write4(void *opaque, hwaddr offset,
+                                  uint64_t value, unsigned size)
+{
+    struct PPC4xxPCIState *pci = opaque;
+
+    /* We ignore all target attempts at PCI configuration, effectively
+     * assuming a bidirectional 1:1 mapping of PLB and PCI space. */
+
+    switch (offset) {
+    case PCIL0_PMM0LA:
+        pci->pmm[0].la = value;
+        break;
+    case PCIL0_PMM0MA:
+        pci->pmm[0].ma = value;
+        break;
+    case PCIL0_PMM0PCIHA:
+        pci->pmm[0].pciha = value;
+        break;
+    case PCIL0_PMM0PCILA:
+        pci->pmm[0].pcila = value;
+        break;
+
+    case PCIL0_PMM1LA:
+        pci->pmm[1].la = value;
+        break;
+    case PCIL0_PMM1MA:
+        pci->pmm[1].ma = value;
+        break;
+    case PCIL0_PMM1PCIHA:
+        pci->pmm[1].pciha = value;
+        break;
+    case PCIL0_PMM1PCILA:
+        pci->pmm[1].pcila = value;
+        break;
+
+    case PCIL0_PMM2LA:
+        pci->pmm[2].la = value;
+        break;
+    case PCIL0_PMM2MA:
+        pci->pmm[2].ma = value;
+        break;
+    case PCIL0_PMM2PCIHA:
+        pci->pmm[2].pciha = value;
+        break;
+    case PCIL0_PMM2PCILA:
+        pci->pmm[2].pcila = value;
+        break;
+
+    case PCIL0_PTM1MS:
+        pci->ptm[0].ms = value;
+        break;
+    case PCIL0_PTM1LA:
+        pci->ptm[0].la = value;
+        break;
+    case PCIL0_PTM2MS:
+        pci->ptm[1].ms = value;
+        break;
+    case PCIL0_PTM2LA:
+        pci->ptm[1].la = value;
+        break;
+
+    default:
+        printf("%s: unhandled PCI internal register 0x%lx\n", __func__,
+               (unsigned long)offset);
+        break;
+    }
+}
+
+static uint64_t ppc4xx_pci_reg_read4(void *opaque, hwaddr offset,
+                                     unsigned size)
+{
+    struct PPC4xxPCIState *pci = opaque;
+    uint32_t value;
+
+    switch (offset) {
+    case PCIL0_PMM0LA:
+        value = pci->pmm[0].la;
+        break;
+    case PCIL0_PMM0MA:
+        value = pci->pmm[0].ma;
+        break;
+    case PCIL0_PMM0PCIHA:
+        value = pci->pmm[0].pciha;
+        break;
+    case PCIL0_PMM0PCILA:
+        value = pci->pmm[0].pcila;
+        break;
+
+    case PCIL0_PMM1LA:
+        value = pci->pmm[1].la;
+        break;
+    case PCIL0_PMM1MA:
+        value = pci->pmm[1].ma;
+        break;
+    case PCIL0_PMM1PCIHA:
+        value = pci->pmm[1].pciha;
+        break;
+    case PCIL0_PMM1PCILA:
+        value = pci->pmm[1].pcila;
+        break;
+
+    case PCIL0_PMM2LA:
+        value = pci->pmm[2].la;
+        break;
+    case PCIL0_PMM2MA:
+        value = pci->pmm[2].ma;
+        break;
+    case PCIL0_PMM2PCIHA:
+        value = pci->pmm[2].pciha;
+        break;
+    case PCIL0_PMM2PCILA:
+        value = pci->pmm[2].pcila;
+        break;
+
+    case PCIL0_PTM1MS:
+        value = pci->ptm[0].ms;
+        break;
+    case PCIL0_PTM1LA:
+        value = pci->ptm[0].la;
+        break;
+    case PCIL0_PTM2MS:
+        value = pci->ptm[1].ms;
+        break;
+    case PCIL0_PTM2LA:
+        value = pci->ptm[1].la;
+        break;
+
+    default:
+        printf("%s: invalid PCI internal register 0x%lx\n", __func__,
+               (unsigned long)offset);
+        value = 0;
+    }
+
+    return value;
+}
+
+static const MemoryRegionOps pci_reg_ops = {
+    .read = ppc4xx_pci_reg_read4,
+    .write = ppc4xx_pci_reg_write4,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static void ppc4xx_pci_reset(void *opaque)
+{
+    struct PPC4xxPCIState *pci = opaque;
+
+    memset(pci->pmm, 0, sizeof(pci->pmm));
+    memset(pci->ptm, 0, sizeof(pci->ptm));
+}
+
+/* On Bamboo, all pins from each slot are tied to a single board IRQ. This
+ * may need further refactoring for other boards. */
+static int ppc4xx_pci_map_irq(PCIDevice *pci_dev, int irq_num)
+{
+    int slot = PCI_SLOT(pci_dev->devfn);
+
+    trace_ppc4xx_pci_map_irq(pci_dev->devfn, irq_num, slot);
+
+    return slot > 0 ? slot - 1 : PPC4xx_PCI_NUM_DEVS - 1;
+}
+
+static void ppc4xx_pci_set_irq(void *opaque, int irq_num, int level)
+{
+    qemu_irq *pci_irqs = opaque;
+
+    trace_ppc4xx_pci_set_irq(irq_num);
+    assert(irq_num >= 0 && irq_num < PPC4xx_PCI_NUM_DEVS);
+    qemu_set_irq(pci_irqs[irq_num], level);
+}
+
+static const VMStateDescription vmstate_pci_master_map = {
+    .name = "pci_master_map",
+    .version_id = 0,
+    .minimum_version_id = 0,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(la, struct PCIMasterMap),
+        VMSTATE_UINT32(ma, struct PCIMasterMap),
+        VMSTATE_UINT32(pcila, struct PCIMasterMap),
+        VMSTATE_UINT32(pciha, struct PCIMasterMap),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static const VMStateDescription vmstate_pci_target_map = {
+    .name = "pci_target_map",
+    .version_id = 0,
+    .minimum_version_id = 0,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(ms, struct PCITargetMap),
+        VMSTATE_UINT32(la, struct PCITargetMap),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static const VMStateDescription vmstate_ppc4xx_pci = {
+    .name = "ppc4xx_pci",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_STRUCT_ARRAY(pmm, PPC4xxPCIState, PPC4xx_PCI_NR_PMMS, 1,
+                             vmstate_pci_master_map,
+                             struct PCIMasterMap),
+        VMSTATE_STRUCT_ARRAY(ptm, PPC4xxPCIState, PPC4xx_PCI_NR_PTMS, 1,
+                             vmstate_pci_target_map,
+                             struct PCITargetMap),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+/* XXX Interrupt acknowledge cycles not supported. */
+static void ppc4xx_pcihost_realize(DeviceState *dev, Error **errp)
+{
+    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
+    PPC4xxPCIState *s;
+    PCIHostState *h;
+    PCIBus *b;
+    int i;
+
+    h = PCI_HOST_BRIDGE(dev);
+    s = PPC4xx_PCI_HOST_BRIDGE(dev);
+
+    for (i = 0; i < ARRAY_SIZE(s->irq); i++) {
+        sysbus_init_irq(sbd, &s->irq[i]);
+    }
+
+    b = pci_register_root_bus(dev, NULL, ppc4xx_pci_set_irq,
+                              ppc4xx_pci_map_irq, s->irq, get_system_memory(),
+                              get_system_io(), 0, ARRAY_SIZE(s->irq),
+                              TYPE_PCI_BUS);
+    h->bus = b;
+
+    pci_create_simple(b, 0, "ppc4xx-host-bridge");
+
+    /* XXX split into 2 memory regions, one for config space, one for regs */
+    memory_region_init(&s->container, OBJECT(s), "pci-container", PCI_ALL_SIZE);
+    memory_region_init_io(&h->conf_mem, OBJECT(s), &pci_host_conf_le_ops, h,
+                          "pci-conf-idx", 4);
+    memory_region_init_io(&h->data_mem, OBJECT(s), &pci_host_data_le_ops, h,
+                          "pci-conf-data", 4);
+    memory_region_init_io(&s->iomem, OBJECT(s), &pci_reg_ops, s,
+                          "pci.reg", PCI_REG_SIZE);
+    memory_region_add_subregion(&s->container, PCIC0_CFGADDR, &h->conf_mem);
+    memory_region_add_subregion(&s->container, PCIC0_CFGDATA, &h->data_mem);
+    memory_region_add_subregion(&s->container, PCI_REG_BASE, &s->iomem);
+    sysbus_init_mmio(sbd, &s->container);
+    qemu_register_reset(ppc4xx_pci_reset, s);
+}
+
+static void ppc4xx_host_bridge_class_init(ObjectClass *klass, void *data)
+{
+    PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->desc        = "Host bridge";
+    k->vendor_id    = PCI_VENDOR_ID_IBM;
+    k->device_id    = PCI_DEVICE_ID_IBM_440GX;
+    k->class_id     = PCI_CLASS_BRIDGE_OTHER;
+    /*
+     * PCI-facing part of the host bridge, not usable without the
+     * host-facing part, which can't be device_add'ed, yet.
+     */
+    dc->user_creatable = false;
+}
+
+static const TypeInfo ppc4xx_host_bridge_info = {
+    .name          = "ppc4xx-host-bridge",
+    .parent        = TYPE_PCI_DEVICE,
+    .instance_size = sizeof(PCIDevice),
+    .class_init    = ppc4xx_host_bridge_class_init,
+    .interfaces = (InterfaceInfo[]) {
+        { INTERFACE_CONVENTIONAL_PCI_DEVICE },
+        { },
+    },
+};
+
+static void ppc4xx_pcihost_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->realize = ppc4xx_pcihost_realize;
+    dc->vmsd = &vmstate_ppc4xx_pci;
+}
+
+static const TypeInfo ppc4xx_pcihost_info = {
+    .name          = TYPE_PPC4xx_PCI_HOST_BRIDGE,
+    .parent        = TYPE_PCI_HOST_BRIDGE,
+    .instance_size = sizeof(PPC4xxPCIState),
+    .class_init    = ppc4xx_pcihost_class_init,
+};
+
+static void ppc4xx_pci_register_types(void)
+{
+    type_register_static(&ppc4xx_pcihost_info);
+    type_register_static(&ppc4xx_host_bridge_info);
+}
+
+type_init(ppc4xx_pci_register_types)
diff --git a/hw/ppc/ppc_booke.c b/hw/ppc/ppc_booke.c
new file mode 100644
index 000000000..10b643861
--- /dev/null
+++ b/hw/ppc/ppc_booke.c
@@ -0,0 +1,369 @@
+/*
+ * QEMU PowerPC Booke hardware System Emulator
+ *
+ * Copyright (c) 2011 AdaCore
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "hw/ppc/ppc.h"
+#include "qemu/timer.h"
+#include "sysemu/reset.h"
+#include "sysemu/runstate.h"
+#include "hw/loader.h"
+#include "kvm_ppc.h"
+
+
+/* Timer Control Register */
+
+#define TCR_WP_SHIFT  30        /* Watchdog Timer Period */
+#define TCR_WP_MASK   (0x3U << TCR_WP_SHIFT)
+#define TCR_WRC_SHIFT 28        /* Watchdog Timer Reset Control */
+#define TCR_WRC_MASK  (0x3U << TCR_WRC_SHIFT)
+#define TCR_WIE       (1U << 27) /* Watchdog Timer Interrupt Enable */
+#define TCR_DIE       (1U << 26) /* Decrementer Interrupt Enable */
+#define TCR_FP_SHIFT  24        /* Fixed-Interval Timer Period */
+#define TCR_FP_MASK   (0x3U << TCR_FP_SHIFT)
+#define TCR_FIE       (1U << 23) /* Fixed-Interval Timer Interrupt Enable */
+#define TCR_ARE       (1U << 22) /* Auto-Reload Enable */
+
+/* Timer Control Register (e500 specific fields) */
+
+#define TCR_E500_FPEXT_SHIFT 13 /* Fixed-Interval Timer Period Extension */
+#define TCR_E500_FPEXT_MASK  (0xf << TCR_E500_FPEXT_SHIFT)
+#define TCR_E500_WPEXT_SHIFT 17 /* Watchdog Timer Period Extension */
+#define TCR_E500_WPEXT_MASK  (0xf << TCR_E500_WPEXT_SHIFT)
+
+/* Timer Status Register  */
+
+#define TSR_FIS       (1U << 26) /* Fixed-Interval Timer Interrupt Status */
+#define TSR_DIS       (1U << 27) /* Decrementer Interrupt Status */
+#define TSR_WRS_SHIFT 28        /* Watchdog Timer Reset Status */
+#define TSR_WRS_MASK  (0x3U << TSR_WRS_SHIFT)
+#define TSR_WIS       (1U << 30) /* Watchdog Timer Interrupt Status */
+#define TSR_ENW       (1U << 31) /* Enable Next Watchdog Timer */
+
+typedef struct booke_timer_t booke_timer_t;
+struct booke_timer_t {
+
+    uint64_t fit_next;
+    QEMUTimer *fit_timer;
+
+    uint64_t wdt_next;
+    QEMUTimer *wdt_timer;
+
+    uint32_t flags;
+};
+
+static void booke_update_irq(PowerPCCPU *cpu)
+{
+    CPUPPCState *env = &cpu->env;
+
+    ppc_set_irq(cpu, PPC_INTERRUPT_DECR,
+                (env->spr[SPR_BOOKE_TSR] & TSR_DIS
+                 && env->spr[SPR_BOOKE_TCR] & TCR_DIE));
+
+    ppc_set_irq(cpu, PPC_INTERRUPT_WDT,
+                (env->spr[SPR_BOOKE_TSR] & TSR_WIS
+                 && env->spr[SPR_BOOKE_TCR] & TCR_WIE));
+
+    ppc_set_irq(cpu, PPC_INTERRUPT_FIT,
+                (env->spr[SPR_BOOKE_TSR] & TSR_FIS
+                 && env->spr[SPR_BOOKE_TCR] & TCR_FIE));
+}
+
+/* Return the location of the bit of time base at which the FIT will raise an
+   interrupt */
+static uint8_t booke_get_fit_target(CPUPPCState *env, ppc_tb_t *tb_env)
+{
+    uint8_t fp = (env->spr[SPR_BOOKE_TCR] & TCR_FP_MASK) >> TCR_FP_SHIFT;
+
+    if (tb_env->flags & PPC_TIMER_E500) {
+        /* e500 Fixed-interval timer period extension */
+        uint32_t fpext = (env->spr[SPR_BOOKE_TCR] & TCR_E500_FPEXT_MASK)
+            >> TCR_E500_FPEXT_SHIFT;
+        fp = 63 - (fp | fpext << 2);
+    } else {
+        fp = env->fit_period[fp];
+    }
+
+    return fp;
+}
+
+/* Return the location of the bit of time base at which the WDT will raise an
+   interrupt */
+static uint8_t booke_get_wdt_target(CPUPPCState *env, ppc_tb_t *tb_env)
+{
+    uint8_t wp = (env->spr[SPR_BOOKE_TCR] & TCR_WP_MASK) >> TCR_WP_SHIFT;
+
+    if (tb_env->flags & PPC_TIMER_E500) {
+        /* e500 Watchdog timer period extension */
+        uint32_t wpext = (env->spr[SPR_BOOKE_TCR] & TCR_E500_WPEXT_MASK)
+            >> TCR_E500_WPEXT_SHIFT;
+        wp = 63 - (wp | wpext << 2);
+    } else {
+        wp = env->wdt_period[wp];
+    }
+
+    return wp;
+}
+
+static void booke_update_fixed_timer(CPUPPCState         *env,
+                                     uint8_t           target_bit,
+                                     uint64_t          *next,
+                                     QEMUTimer         *timer,
+                                     int               tsr_bit)
+{
+    ppc_tb_t *tb_env = env->tb_env;
+    uint64_t delta_tick, ticks = 0;
+    uint64_t tb;
+    uint64_t period;
+    uint64_t now;
+
+    if (!(env->spr[SPR_BOOKE_TSR] & tsr_bit)) {
+        /*
+         * Don't arm the timer again when the guest has the current
+         * interrupt still pending. Wait for it to ack it.
+         */
+        return;
+    }
+
+    now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+    tb  = cpu_ppc_get_tb(tb_env, now, tb_env->tb_offset);
+    period = 1ULL << target_bit;
+    delta_tick = period - (tb & (period - 1));
+
+    /* the timer triggers only when the selected bit toggles from 0 to 1 */
+    if (tb & period) {
+        ticks = period;
+    }
+
+    if (ticks + delta_tick < ticks) {
+        /* Overflow, so assume the biggest number we can express. */
+        ticks = UINT64_MAX;
+    } else {
+        ticks += delta_tick;
+    }
+
+    *next = now + muldiv64(ticks, NANOSECONDS_PER_SECOND, tb_env->tb_freq);
+    if ((*next < now) || (*next > INT64_MAX)) {
+        /* Overflow, so assume the biggest number the qemu timer supports. */
+        *next = INT64_MAX;
+    }
+
+    /* XXX: If expire time is now. We can't run the callback because we don't
+     * have access to it. So we just set the timer one nanosecond later.
+     */
+
+    if (*next == now) {
+        (*next)++;
+    } else {
+        /*
+         * There's no point to fake any granularity that's more fine grained
+         * than milliseconds. Anything beyond that just overloads the system.
+         */
+        *next = MAX(*next, now + SCALE_MS);
+    }
+
+    /* Fire the next timer */
+    timer_mod(timer, *next);
+}
+
+static void booke_decr_cb(void *opaque)
+{
+    PowerPCCPU *cpu = opaque;
+    CPUPPCState *env = &cpu->env;
+
+    env->spr[SPR_BOOKE_TSR] |= TSR_DIS;
+    booke_update_irq(cpu);
+
+    if (env->spr[SPR_BOOKE_TCR] & TCR_ARE) {
+        /* Do not reload 0, it is already there. It would just trigger
+         * the timer again and lead to infinite loop */
+        if (env->spr[SPR_BOOKE_DECAR] != 0) {
+            /* Auto Reload */
+            cpu_ppc_store_decr(env, env->spr[SPR_BOOKE_DECAR]);
+        }
+    }
+}
+
+static void booke_fit_cb(void *opaque)
+{
+    PowerPCCPU *cpu = opaque;
+    CPUPPCState *env = &cpu->env;
+    ppc_tb_t *tb_env;
+    booke_timer_t *booke_timer;
+
+    tb_env = env->tb_env;
+    booke_timer = tb_env->opaque;
+    env->spr[SPR_BOOKE_TSR] |= TSR_FIS;
+
+    booke_update_irq(cpu);
+
+    booke_update_fixed_timer(env,
+                             booke_get_fit_target(env, tb_env),
+                             &booke_timer->fit_next,
+                             booke_timer->fit_timer,
+                             TSR_FIS);
+}
+
+static void booke_wdt_cb(void *opaque)
+{
+    PowerPCCPU *cpu = opaque;
+    CPUPPCState *env = &cpu->env;
+    ppc_tb_t *tb_env;
+    booke_timer_t *booke_timer;
+
+    tb_env = env->tb_env;
+    booke_timer = tb_env->opaque;
+
+    /* TODO: There's lots of complicated stuff to do here */
+
+    booke_update_irq(cpu);
+
+    booke_update_fixed_timer(env,
+                             booke_get_wdt_target(env, tb_env),
+                             &booke_timer->wdt_next,
+                             booke_timer->wdt_timer,
+                             TSR_WIS);
+}
+
+void store_booke_tsr(CPUPPCState *env, target_ulong val)
+{
+    PowerPCCPU *cpu = env_archcpu(env);
+    ppc_tb_t *tb_env = env->tb_env;
+    booke_timer_t *booke_timer = tb_env->opaque;
+
+    env->spr[SPR_BOOKE_TSR] &= ~val;
+    kvmppc_clear_tsr_bits(cpu, val);
+
+    if (val & TSR_FIS) {
+        booke_update_fixed_timer(env,
+                                 booke_get_fit_target(env, tb_env),
+                                 &booke_timer->fit_next,
+                                 booke_timer->fit_timer,
+                                 TSR_FIS);
+    }
+
+    if (val & TSR_WIS) {
+        booke_update_fixed_timer(env,
+                                 booke_get_wdt_target(env, tb_env),
+                                 &booke_timer->wdt_next,
+                                 booke_timer->wdt_timer,
+                                 TSR_WIS);
+    }
+
+    booke_update_irq(cpu);
+}
+
+void store_booke_tcr(CPUPPCState *env, target_ulong val)
+{
+    PowerPCCPU *cpu = env_archcpu(env);
+    ppc_tb_t *tb_env = env->tb_env;
+    booke_timer_t *booke_timer = tb_env->opaque;
+
+    env->spr[SPR_BOOKE_TCR] = val;
+    kvmppc_set_tcr(cpu);
+
+    booke_update_irq(cpu);
+
+    booke_update_fixed_timer(env,
+                             booke_get_fit_target(env, tb_env),
+                             &booke_timer->fit_next,
+                             booke_timer->fit_timer,
+                             TSR_FIS);
+
+    booke_update_fixed_timer(env,
+                             booke_get_wdt_target(env, tb_env),
+                             &booke_timer->wdt_next,
+                             booke_timer->wdt_timer,
+                             TSR_WIS);
+}
+
+static void ppc_booke_timer_reset_handle(void *opaque)
+{
+    PowerPCCPU *cpu = opaque;
+    CPUPPCState *env = &cpu->env;
+
+    store_booke_tcr(env, 0);
+    store_booke_tsr(env, -1);
+}
+
+/*
+ * This function will be called whenever the CPU state changes.
+ * CPU states are defined "typedef enum RunState".
+ * Regarding timer, When CPU state changes to running after debug halt
+ * or similar cases which takes time then in between final watchdog
+ * expiry happenes. This will cause exit to QEMU and configured watchdog
+ * action will be taken. To avoid this we always clear the watchdog state when
+ * state changes to running.
+ */
+static void cpu_state_change_handler(void *opaque, bool running, RunState state)
+{
+    PowerPCCPU *cpu = opaque;
+    CPUPPCState *env = &cpu->env;
+
+    if (!running) {
+        return;
+    }
+
+    /*
+     * Clear watchdog interrupt condition by clearing TSR.
+     */
+    store_booke_tsr(env, TSR_ENW | TSR_WIS | TSR_WRS_MASK);
+}
+
+void ppc_booke_timers_init(PowerPCCPU *cpu, uint32_t freq, uint32_t flags)
+{
+    ppc_tb_t *tb_env;
+    booke_timer_t *booke_timer;
+    int ret = 0;
+
+    tb_env      = g_malloc0(sizeof(ppc_tb_t));
+    booke_timer = g_malloc0(sizeof(booke_timer_t));
+
+    cpu->env.tb_env = tb_env;
+    tb_env->flags = flags | PPC_TIMER_BOOKE | PPC_DECR_ZERO_TRIGGERED;
+
+    tb_env->tb_freq    = freq;
+    tb_env->decr_freq  = freq;
+    tb_env->opaque     = booke_timer;
+    tb_env->decr_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, &booke_decr_cb, cpu);
+
+    booke_timer->fit_timer =
+        timer_new_ns(QEMU_CLOCK_VIRTUAL, &booke_fit_cb, cpu);
+    booke_timer->wdt_timer =
+        timer_new_ns(QEMU_CLOCK_VIRTUAL, &booke_wdt_cb, cpu);
+
+    ret = kvmppc_booke_watchdog_enable(cpu);
+
+    if (ret) {
+        /* TODO: Start the QEMU emulated watchdog if not running on KVM.
+         * Also start the QEMU emulated watchdog if KVM does not support
+         * emulated watchdog or somehow it is not enabled (supported but
+         * not enabled is though some bug and requires debugging :)).
+         */
+    }
+
+    qemu_add_vm_change_state_handler(cpu_state_change_handler, cpu);
+
+    qemu_register_reset(ppc_booke_timer_reset_handle, cpu);
+}
diff --git a/hw/ppc/ppce500_spin.c b/hw/ppc/ppce500_spin.c
new file mode 100644
index 000000000..d57b19979
--- /dev/null
+++ b/hw/ppc/ppce500_spin.c
@@ -0,0 +1,209 @@
+/*
+ * QEMU PowerPC e500v2 ePAPR spinning code
+ *
+ * Copyright (C) 2011 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author: Alexander Graf, <agraf@suse.de>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * This code is not really a device, but models an interface that usually
+ * firmware takes care of. It's used when QEMU plays the role of firmware.
+ *
+ * Specification:
+ *
+ * https://www.power.org/resources/downloads/Power_ePAPR_APPROVED_v1.1.pdf
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/module.h"
+#include "qemu/units.h"
+#include "hw/hw.h"
+#include "hw/sysbus.h"
+#include "sysemu/hw_accel.h"
+#include "e500.h"
+#include "qom/object.h"
+
+#define MAX_CPUS 32
+
+typedef struct spin_info {
+    uint64_t addr;
+    uint64_t r3;
+    uint32_t resv;
+    uint32_t pir;
+    uint64_t reserved;
+} QEMU_PACKED SpinInfo;
+
+#define TYPE_E500_SPIN "e500-spin"
+OBJECT_DECLARE_SIMPLE_TYPE(SpinState, E500_SPIN)
+
+struct SpinState {
+    SysBusDevice parent_obj;
+
+    MemoryRegion iomem;
+    SpinInfo spin[MAX_CPUS];
+};
+
+static void spin_reset(DeviceState *dev)
+{
+    SpinState *s = E500_SPIN(dev);
+    int i;
+
+    for (i = 0; i < MAX_CPUS; i++) {
+        SpinInfo *info = &s->spin[i];
+
+        stl_p(&info->pir, i);
+        stq_p(&info->r3, i);
+        stq_p(&info->addr, 1);
+    }
+}
+
+static void mmubooke_create_initial_mapping(CPUPPCState *env,
+                                     target_ulong va,
+                                     hwaddr pa,
+                                     hwaddr len)
+{
+    ppcmas_tlb_t *tlb = booke206_get_tlbm(env, 1, 0, 1);
+    hwaddr size;
+
+    size = (booke206_page_size_to_tlb(len) << MAS1_TSIZE_SHIFT);
+    tlb->mas1 = MAS1_VALID | size;
+    tlb->mas2 = (va & TARGET_PAGE_MASK) | MAS2_M;
+    tlb->mas7_3 = pa & TARGET_PAGE_MASK;
+    tlb->mas7_3 |= MAS3_UR | MAS3_UW | MAS3_UX | MAS3_SR | MAS3_SW | MAS3_SX;
+    env->tlb_dirty = true;
+}
+
+static void spin_kick(CPUState *cs, run_on_cpu_data data)
+{
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    CPUPPCState *env = &cpu->env;
+    SpinInfo *curspin = data.host_ptr;
+    hwaddr map_size = 64 * MiB;
+    hwaddr map_start;
+
+    cpu_synchronize_state(cs);
+    stl_p(&curspin->pir, env->spr[SPR_BOOKE_PIR]);
+    env->nip = ldq_p(&curspin->addr) & (map_size - 1);
+    env->gpr[3] = ldq_p(&curspin->r3);
+    env->gpr[4] = 0;
+    env->gpr[5] = 0;
+    env->gpr[6] = 0;
+    env->gpr[7] = map_size;
+    env->gpr[8] = 0;
+    env->gpr[9] = 0;
+
+    map_start = ldq_p(&curspin->addr) & ~(map_size - 1);
+    mmubooke_create_initial_mapping(env, 0, map_start, map_size);
+
+    cs->halted = 0;
+    cs->exception_index = -1;
+    cs->stopped = false;
+    qemu_cpu_kick(cs);
+}
+
+static void spin_write(void *opaque, hwaddr addr, uint64_t value,
+                       unsigned len)
+{
+    SpinState *s = opaque;
+    int env_idx = addr / sizeof(SpinInfo);
+    CPUState *cpu;
+    SpinInfo *curspin = &s->spin[env_idx];
+    uint8_t *curspin_p = (uint8_t*)curspin;
+
+    cpu = qemu_get_cpu(env_idx);
+    if (cpu == NULL) {
+        /* Unknown CPU */
+        return;
+    }
+
+    if (cpu->cpu_index == 0) {
+        /* primary CPU doesn't spin */
+        return;
+    }
+
+    curspin_p = &curspin_p[addr % sizeof(SpinInfo)];
+    switch (len) {
+    case 1:
+        stb_p(curspin_p, value);
+        break;
+    case 2:
+        stw_p(curspin_p, value);
+        break;
+    case 4:
+        stl_p(curspin_p, value);
+        break;
+    }
+
+    if (!(ldq_p(&curspin->addr) & 1)) {
+        /* run CPU */
+        run_on_cpu(cpu, spin_kick, RUN_ON_CPU_HOST_PTR(curspin));
+    }
+}
+
+static uint64_t spin_read(void *opaque, hwaddr addr, unsigned len)
+{
+    SpinState *s = opaque;
+    uint8_t *spin_p = &((uint8_t*)s->spin)[addr];
+
+    switch (len) {
+    case 1:
+        return ldub_p(spin_p);
+    case 2:
+        return lduw_p(spin_p);
+    case 4:
+        return ldl_p(spin_p);
+    default:
+        hw_error("ppce500: unexpected %s with len = %u", __func__, len);
+    }
+}
+
+static const MemoryRegionOps spin_rw_ops = {
+    .read = spin_read,
+    .write = spin_write,
+    .endianness = DEVICE_BIG_ENDIAN,
+};
+
+static void ppce500_spin_initfn(Object *obj)
+{
+    SysBusDevice *dev = SYS_BUS_DEVICE(obj);
+    SpinState *s = E500_SPIN(dev);
+
+    memory_region_init_io(&s->iomem, obj, &spin_rw_ops, s,
+                          "e500 spin pv device", sizeof(SpinInfo) * MAX_CPUS);
+    sysbus_init_mmio(dev, &s->iomem);
+}
+
+static void ppce500_spin_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->reset = spin_reset;
+}
+
+static const TypeInfo ppce500_spin_info = {
+    .name          = TYPE_E500_SPIN,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(SpinState),
+    .instance_init = ppce500_spin_initfn,
+    .class_init    = ppce500_spin_class_init,
+};
+
+static void ppce500_spin_register_types(void)
+{
+    type_register_static(&ppce500_spin_info);
+}
+
+type_init(ppce500_spin_register_types)
diff --git a/hw/ppc/prep.c b/hw/ppc/prep.c
new file mode 100644
index 000000000..25a2e86b4
--- /dev/null
+++ b/hw/ppc/prep.c
@@ -0,0 +1,440 @@
+/*
+ * QEMU PPC PREP hardware System Emulator
+ *
+ * Copyright (c) 2003-2007 Jocelyn Mayer
+ * Copyright (c) 2017 Hervé Poussineau
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/rtc/m48t59.h"
+#include "hw/char/serial.h"
+#include "hw/block/fdc.h"
+#include "net/net.h"
+#include "hw/isa/isa.h"
+#include "hw/pci/pci.h"
+#include "hw/pci/pci_host.h"
+#include "hw/ppc/ppc.h"
+#include "hw/boards.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "qemu/log.h"
+#include "hw/loader.h"
+#include "hw/rtc/mc146818rtc.h"
+#include "hw/isa/pc87312.h"
+#include "hw/qdev-properties.h"
+#include "sysemu/kvm.h"
+#include "sysemu/reset.h"
+#include "trace.h"
+#include "elf.h"
+#include "qemu/units.h"
+#include "kvm_ppc.h"
+
+/* SMP is not enabled, for now */
+#define MAX_CPUS 1
+
+#define MAX_IDE_BUS 2
+
+#define CFG_ADDR 0xf0000510
+
+#define KERNEL_LOAD_ADDR 0x01000000
+#define INITRD_LOAD_ADDR 0x01800000
+
+#define NVRAM_SIZE        0x2000
+
+static void fw_cfg_boot_set(void *opaque, const char *boot_device,
+                            Error **errp)
+{
+    fw_cfg_modify_i16(opaque, FW_CFG_BOOT_DEVICE, boot_device[0]);
+}
+
+static void ppc_prep_reset(void *opaque)
+{
+    PowerPCCPU *cpu = opaque;
+
+    cpu_reset(CPU(cpu));
+}
+
+
+/*****************************************************************************/
+/* NVRAM helpers */
+static inline uint32_t nvram_read(Nvram *nvram, uint32_t addr)
+{
+    NvramClass *k = NVRAM_GET_CLASS(nvram);
+    return (k->read)(nvram, addr);
+}
+
+static inline void nvram_write(Nvram *nvram, uint32_t addr, uint32_t val)
+{
+    NvramClass *k = NVRAM_GET_CLASS(nvram);
+    (k->write)(nvram, addr, val);
+}
+
+static void NVRAM_set_byte(Nvram *nvram, uint32_t addr, uint8_t value)
+{
+    nvram_write(nvram, addr, value);
+}
+
+static uint8_t NVRAM_get_byte(Nvram *nvram, uint32_t addr)
+{
+    return nvram_read(nvram, addr);
+}
+
+static void NVRAM_set_word(Nvram *nvram, uint32_t addr, uint16_t value)
+{
+    nvram_write(nvram, addr, value >> 8);
+    nvram_write(nvram, addr + 1, value & 0xFF);
+}
+
+static uint16_t NVRAM_get_word(Nvram *nvram, uint32_t addr)
+{
+    uint16_t tmp;
+
+    tmp = nvram_read(nvram, addr) << 8;
+    tmp |= nvram_read(nvram, addr + 1);
+
+    return tmp;
+}
+
+static void NVRAM_set_lword(Nvram *nvram, uint32_t addr, uint32_t value)
+{
+    nvram_write(nvram, addr, value >> 24);
+    nvram_write(nvram, addr + 1, (value >> 16) & 0xFF);
+    nvram_write(nvram, addr + 2, (value >> 8) & 0xFF);
+    nvram_write(nvram, addr + 3, value & 0xFF);
+}
+
+static void NVRAM_set_string(Nvram *nvram, uint32_t addr, const char *str,
+                             uint32_t max)
+{
+    int i;
+
+    for (i = 0; i < max && str[i] != '\0'; i++) {
+        nvram_write(nvram, addr + i, str[i]);
+    }
+    nvram_write(nvram, addr + i, str[i]);
+    nvram_write(nvram, addr + max - 1, '\0');
+}
+
+static uint16_t NVRAM_crc_update (uint16_t prev, uint16_t value)
+{
+    uint16_t tmp;
+    uint16_t pd, pd1, pd2;
+
+    tmp = prev >> 8;
+    pd = prev ^ value;
+    pd1 = pd & 0x000F;
+    pd2 = ((pd >> 4) & 0x000F) ^ pd1;
+    tmp ^= (pd1 << 3) | (pd1 << 8);
+    tmp ^= pd2 | (pd2 << 7) | (pd2 << 12);
+
+    return tmp;
+}
+
+static uint16_t NVRAM_compute_crc (Nvram *nvram, uint32_t start, uint32_t count)
+{
+    uint32_t i;
+    uint16_t crc = 0xFFFF;
+    int odd;
+
+    odd = count & 1;
+    count &= ~1;
+    for (i = 0; i != count; i++) {
+        crc = NVRAM_crc_update(crc, NVRAM_get_word(nvram, start + i));
+    }
+    if (odd) {
+        crc = NVRAM_crc_update(crc, NVRAM_get_byte(nvram, start + i) << 8);
+    }
+
+    return crc;
+}
+
+#define CMDLINE_ADDR 0x017ff000
+
+static int PPC_NVRAM_set_params (Nvram *nvram, uint16_t NVRAM_size,
+                          const char *arch,
+                          uint32_t RAM_size, int boot_device,
+                          uint32_t kernel_image, uint32_t kernel_size,
+                          const char *cmdline,
+                          uint32_t initrd_image, uint32_t initrd_size,
+                          uint32_t NVRAM_image,
+                          int width, int height, int depth)
+{
+    uint16_t crc;
+
+    /* Set parameters for Open Hack'Ware BIOS */
+    NVRAM_set_string(nvram, 0x00, "QEMU_BIOS", 16);
+    NVRAM_set_lword(nvram,  0x10, 0x00000002); /* structure v2 */
+    NVRAM_set_word(nvram,   0x14, NVRAM_size);
+    NVRAM_set_string(nvram, 0x20, arch, 16);
+    NVRAM_set_lword(nvram,  0x30, RAM_size);
+    NVRAM_set_byte(nvram,   0x34, boot_device);
+    NVRAM_set_lword(nvram,  0x38, kernel_image);
+    NVRAM_set_lword(nvram,  0x3C, kernel_size);
+    if (cmdline) {
+        /* XXX: put the cmdline in NVRAM too ? */
+        pstrcpy_targphys("cmdline", CMDLINE_ADDR, RAM_size - CMDLINE_ADDR,
+                         cmdline);
+        NVRAM_set_lword(nvram,  0x40, CMDLINE_ADDR);
+        NVRAM_set_lword(nvram,  0x44, strlen(cmdline));
+    } else {
+        NVRAM_set_lword(nvram,  0x40, 0);
+        NVRAM_set_lword(nvram,  0x44, 0);
+    }
+    NVRAM_set_lword(nvram,  0x48, initrd_image);
+    NVRAM_set_lword(nvram,  0x4C, initrd_size);
+    NVRAM_set_lword(nvram,  0x50, NVRAM_image);
+
+    NVRAM_set_word(nvram,   0x54, width);
+    NVRAM_set_word(nvram,   0x56, height);
+    NVRAM_set_word(nvram,   0x58, depth);
+    crc = NVRAM_compute_crc(nvram, 0x00, 0xF8);
+    NVRAM_set_word(nvram,   0xFC, crc);
+
+    return 0;
+}
+
+static int prep_set_cmos_checksum(DeviceState *dev, void *opaque)
+{
+    uint16_t checksum = *(uint16_t *)opaque;
+    ISADevice *rtc;
+
+    if (object_dynamic_cast(OBJECT(dev), TYPE_MC146818_RTC)) {
+        rtc = ISA_DEVICE(dev);
+        rtc_set_memory(rtc, 0x2e, checksum & 0xff);
+        rtc_set_memory(rtc, 0x3e, checksum & 0xff);
+        rtc_set_memory(rtc, 0x2f, checksum >> 8);
+        rtc_set_memory(rtc, 0x3f, checksum >> 8);
+
+        object_property_add_alias(qdev_get_machine(), "rtc-time", OBJECT(rtc),
+                                  "date");
+    }
+    return 0;
+}
+
+static void ibm_40p_init(MachineState *machine)
+{
+    const char *bios_name = machine->firmware ?: "openbios-ppc";
+    CPUPPCState *env = NULL;
+    uint16_t cmos_checksum;
+    PowerPCCPU *cpu;
+    DeviceState *dev, *i82378_dev;
+    SysBusDevice *pcihost, *s;
+    Nvram *m48t59 = NULL;
+    PCIBus *pci_bus;
+    ISADevice *isa_dev;
+    ISABus *isa_bus;
+    void *fw_cfg;
+    int i;
+    uint32_t kernel_base = 0, initrd_base = 0;
+    long kernel_size = 0, initrd_size = 0;
+    char boot_device;
+
+    /* init CPU */
+    cpu = POWERPC_CPU(cpu_create(machine->cpu_type));
+    env = &cpu->env;
+    if (PPC_INPUT(env) != PPC_FLAGS_INPUT_6xx) {
+        error_report("only 6xx bus is supported on this machine");
+        exit(1);
+    }
+
+    if (env->flags & POWERPC_FLAG_RTC_CLK) {
+        /* POWER / PowerPC 601 RTC clock frequency is 7.8125 MHz */
+        cpu_ppc_tb_init(env, 7812500UL);
+    } else {
+        /* Set time-base frequency to 100 Mhz */
+        cpu_ppc_tb_init(env, 100UL * 1000UL * 1000UL);
+    }
+    qemu_register_reset(ppc_prep_reset, cpu);
+
+    /* PCI host */
+    dev = qdev_new("raven-pcihost");
+    qdev_prop_set_string(dev, "bios-name", bios_name);
+    qdev_prop_set_uint32(dev, "elf-machine", PPC_ELF_MACHINE);
+    pcihost = SYS_BUS_DEVICE(dev);
+    object_property_add_child(qdev_get_machine(), "raven", OBJECT(dev));
+    sysbus_realize_and_unref(pcihost, &error_fatal);
+    pci_bus = PCI_BUS(qdev_get_child_bus(dev, "pci.0"));
+    if (!pci_bus) {
+        error_report("could not create PCI host controller");
+        exit(1);
+    }
+
+    /* PCI -> ISA bridge */
+    i82378_dev = DEVICE(pci_create_simple(pci_bus, PCI_DEVFN(11, 0), "i82378"));
+    qdev_connect_gpio_out(i82378_dev, 0,
+                          cpu->env.irq_inputs[PPC6xx_INPUT_INT]);
+    sysbus_connect_irq(pcihost, 0, qdev_get_gpio_in(i82378_dev, 15));
+    isa_bus = ISA_BUS(qdev_get_child_bus(i82378_dev, "isa.0"));
+
+    /* Memory controller */
+    isa_dev = isa_new("rs6000-mc");
+    dev = DEVICE(isa_dev);
+    qdev_prop_set_uint32(dev, "ram-size", machine->ram_size);
+    isa_realize_and_unref(isa_dev, isa_bus, &error_fatal);
+
+    /* RTC */
+    isa_dev = isa_new(TYPE_MC146818_RTC);
+    dev = DEVICE(isa_dev);
+    qdev_prop_set_int32(dev, "base_year", 1900);
+    isa_realize_and_unref(isa_dev, isa_bus, &error_fatal);
+
+    /* initialize CMOS checksums */
+    cmos_checksum = 0x6aa9;
+    qbus_walk_children(BUS(isa_bus), prep_set_cmos_checksum, NULL, NULL, NULL,
+                       &cmos_checksum);
+
+    /* add some more devices */
+    if (defaults_enabled()) {
+        m48t59 = NVRAM(isa_create_simple(isa_bus, "isa-m48t59"));
+
+        isa_dev = isa_new("cs4231a");
+        dev = DEVICE(isa_dev);
+        qdev_prop_set_uint32(dev, "iobase", 0x830);
+        qdev_prop_set_uint32(dev, "irq", 10);
+        isa_realize_and_unref(isa_dev, isa_bus, &error_fatal);
+
+        isa_dev = isa_new("pc87312");
+        dev = DEVICE(isa_dev);
+        qdev_prop_set_uint32(dev, "config", 12);
+        isa_realize_and_unref(isa_dev, isa_bus, &error_fatal);
+
+        isa_dev = isa_new("prep-systemio");
+        dev = DEVICE(isa_dev);
+        qdev_prop_set_uint32(dev, "ibm-planar-id", 0xfc);
+        qdev_prop_set_uint32(dev, "equipment", 0xc0);
+        isa_realize_and_unref(isa_dev, isa_bus, &error_fatal);
+
+        dev = DEVICE(pci_create_simple(pci_bus, PCI_DEVFN(1, 0),
+                                       "lsi53c810"));
+        lsi53c8xx_handle_legacy_cmdline(dev);
+        qdev_connect_gpio_out(dev, 0, qdev_get_gpio_in(i82378_dev, 13));
+
+        /* XXX: s3-trio at PCI_DEVFN(2, 0) */
+        pci_vga_init(pci_bus);
+
+        for (i = 0; i < nb_nics; i++) {
+            pci_nic_init_nofail(&nd_table[i], pci_bus, "pcnet",
+                                i == 0 ? "3" : NULL);
+        }
+    }
+
+    /* Prepare firmware configuration for OpenBIOS */
+    dev = qdev_new(TYPE_FW_CFG_MEM);
+    fw_cfg = FW_CFG(dev);
+    qdev_prop_set_uint32(dev, "data_width", 1);
+    qdev_prop_set_bit(dev, "dma_enabled", false);
+    object_property_add_child(OBJECT(qdev_get_machine()), TYPE_FW_CFG,
+                              OBJECT(fw_cfg));
+    s = SYS_BUS_DEVICE(dev);
+    sysbus_realize_and_unref(s, &error_fatal);
+    sysbus_mmio_map(s, 0, CFG_ADDR);
+    sysbus_mmio_map(s, 1, CFG_ADDR + 2);
+
+    if (machine->kernel_filename) {
+        /* load kernel */
+        kernel_base = KERNEL_LOAD_ADDR;
+        kernel_size = load_image_targphys(machine->kernel_filename,
+                                          kernel_base,
+                                          machine->ram_size - kernel_base);
+        if (kernel_size < 0) {
+            error_report("could not load kernel '%s'",
+                         machine->kernel_filename);
+            exit(1);
+        }
+        fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_ADDR, kernel_base);
+        fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_SIZE, kernel_size);
+        /* load initrd */
+        if (machine->initrd_filename) {
+            initrd_base = INITRD_LOAD_ADDR;
+            initrd_size = load_image_targphys(machine->initrd_filename,
+                                              initrd_base,
+                                              machine->ram_size - initrd_base);
+            if (initrd_size < 0) {
+                error_report("could not load initial ram disk '%s'",
+                             machine->initrd_filename);
+                exit(1);
+            }
+            fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_ADDR, initrd_base);
+            fw_cfg_add_i32(fw_cfg, FW_CFG_INITRD_SIZE, initrd_size);
+        }
+        if (machine->kernel_cmdline && *machine->kernel_cmdline) {
+            fw_cfg_add_i32(fw_cfg, FW_CFG_KERNEL_CMDLINE, CMDLINE_ADDR);
+            pstrcpy_targphys("cmdline", CMDLINE_ADDR, TARGET_PAGE_SIZE,
+                             machine->kernel_cmdline);
+            fw_cfg_add_string(fw_cfg, FW_CFG_CMDLINE_DATA,
+                              machine->kernel_cmdline);
+            fw_cfg_add_i32(fw_cfg, FW_CFG_CMDLINE_SIZE,
+                           strlen(machine->kernel_cmdline) + 1);
+        }
+        boot_device = 'm';
+    } else {
+        boot_device = machine->boot_order[0];
+    }
+
+    fw_cfg_add_i16(fw_cfg, FW_CFG_MAX_CPUS, (uint16_t)machine->smp.max_cpus);
+    fw_cfg_add_i64(fw_cfg, FW_CFG_RAM_SIZE, (uint64_t)machine->ram_size);
+    fw_cfg_add_i16(fw_cfg, FW_CFG_MACHINE_ID, ARCH_PREP);
+
+    fw_cfg_add_i16(fw_cfg, FW_CFG_PPC_WIDTH, graphic_width);
+    fw_cfg_add_i16(fw_cfg, FW_CFG_PPC_HEIGHT, graphic_height);
+    fw_cfg_add_i16(fw_cfg, FW_CFG_PPC_DEPTH, graphic_depth);
+
+    fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_IS_KVM, kvm_enabled());
+    if (kvm_enabled()) {
+        uint8_t *hypercall;
+
+        fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, kvmppc_get_tbfreq());
+        hypercall = g_malloc(16);
+        kvmppc_get_hypercall(env, hypercall, 16);
+        fw_cfg_add_bytes(fw_cfg, FW_CFG_PPC_KVM_HC, hypercall, 16);
+        fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_KVM_PID, getpid());
+    } else {
+        fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, NANOSECONDS_PER_SECOND);
+    }
+    fw_cfg_add_i16(fw_cfg, FW_CFG_BOOT_DEVICE, boot_device);
+    qemu_register_boot_set(fw_cfg_boot_set, fw_cfg);
+
+    /* Prepare firmware configuration for Open Hack'Ware */
+    if (m48t59) {
+        PPC_NVRAM_set_params(m48t59, NVRAM_SIZE, "PREP", machine->ram_size,
+                             boot_device,
+                             kernel_base, kernel_size,
+                             machine->kernel_cmdline,
+                             initrd_base, initrd_size,
+                             /* XXX: need an option to load a NVRAM image */
+                             0,
+                             graphic_width, graphic_height, graphic_depth);
+    }
+}
+
+static void ibm_40p_machine_init(MachineClass *mc)
+{
+    mc->desc = "IBM RS/6000 7020 (40p)",
+    mc->init = ibm_40p_init;
+    mc->max_cpus = 1;
+    mc->default_ram_size = 128 * MiB;
+    mc->block_default_type = IF_SCSI;
+    mc->default_boot_order = "c";
+    mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("604");
+    mc->default_display = "std";
+}
+
+DEFINE_MACHINE("40p", ibm_40p_machine_init)
diff --git a/hw/ppc/prep_systemio.c b/hw/ppc/prep_systemio.c
new file mode 100644
index 000000000..b2bd78324
--- /dev/null
+++ b/hw/ppc/prep_systemio.c
@@ -0,0 +1,315 @@
+/*
+ * QEMU PReP System I/O emulation
+ *
+ * Copyright (c) 2017 Hervé Poussineau
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "hw/irq.h"
+#include "hw/isa/isa.h"
+#include "hw/qdev-properties.h"
+#include "migration/vmstate.h"
+#include "exec/address-spaces.h"
+#include "qom/object.h"
+#include "qemu/error-report.h" /* for error_report() */
+#include "qemu/module.h"
+#include "sysemu/runstate.h"
+#include "cpu.h"
+#include "trace.h"
+
+#define TYPE_PREP_SYSTEMIO "prep-systemio"
+OBJECT_DECLARE_SIMPLE_TYPE(PrepSystemIoState, PREP_SYSTEMIO)
+
+/* Bit as defined in PowerPC Reference Plaform v1.1, sect. 6.1.5, p. 132 */
+#define PREP_BIT(n) (1 << (7 - (n)))
+
+struct PrepSystemIoState {
+    ISADevice parent_obj;
+    MemoryRegion ppc_parity_mem;
+
+    qemu_irq non_contiguous_io_map_irq;
+    uint8_t sreset; /* 0x0092 */
+    uint8_t equipment; /* 0x080c */
+    uint8_t system_control; /* 0x081c */
+    uint8_t iomap_type; /* 0x0850 */
+    uint8_t ibm_planar_id; /* 0x0852 */
+    qemu_irq softreset_irq;
+    PortioList portio;
+};
+
+/* PORT 0092 -- Special Port 92 (Read/Write) */
+
+enum {
+    PORT0092_SOFTRESET  = PREP_BIT(7),
+    PORT0092_LE_MODE    = PREP_BIT(6),
+};
+
+static void prep_port0092_write(void *opaque, uint32_t addr, uint32_t val)
+{
+    PrepSystemIoState *s = opaque;
+
+    trace_prep_systemio_write(addr, val);
+
+    s->sreset = val & PORT0092_SOFTRESET;
+    qemu_set_irq(s->softreset_irq, s->sreset);
+
+    if ((val & PORT0092_LE_MODE) != 0) {
+        /* XXX Not supported yet */
+        error_report("little-endian mode not supported");
+        vm_stop(RUN_STATE_PAUSED);
+    } else {
+        /* Nothing to do */
+    }
+}
+
+static uint32_t prep_port0092_read(void *opaque, uint32_t addr)
+{
+    PrepSystemIoState *s = opaque;
+    trace_prep_systemio_read(addr, s->sreset);
+    return s->sreset;
+}
+
+/* PORT 0808 -- Hardfile Light Register (Write Only) */
+
+enum {
+    PORT0808_HARDFILE_LIGHT_ON  = PREP_BIT(7),
+};
+
+static void prep_port0808_write(void *opaque, uint32_t addr, uint32_t val)
+{
+    trace_prep_systemio_write(addr, val);
+}
+
+/* PORT 0810 -- Password Protect 1 Register (Write Only) */
+
+/* reset by port 0x4D in the SIO */
+static void prep_port0810_write(void *opaque, uint32_t addr, uint32_t val)
+{
+    trace_prep_systemio_write(addr, val);
+}
+
+/* PORT 0812 -- Password Protect 2 Register (Write Only) */
+
+/* reset by port 0x4D in the SIO */
+static void prep_port0812_write(void *opaque, uint32_t addr, uint32_t val)
+{
+    trace_prep_systemio_write(addr, val);
+}
+
+/* PORT 0814 -- L2 Invalidate Register (Write Only) */
+
+static void prep_port0814_write(void *opaque, uint32_t addr, uint32_t val)
+{
+    trace_prep_systemio_write(addr, val);
+}
+
+/* PORT 0818 -- Reserved for Keylock (Read Only) */
+
+enum {
+    PORT0818_KEYLOCK_SIGNAL_HIGH    = PREP_BIT(7),
+};
+
+static uint32_t prep_port0818_read(void *opaque, uint32_t addr)
+{
+    uint32_t val = 0;
+    trace_prep_systemio_read(addr, val);
+    return val;
+}
+
+/* PORT 080C -- Equipment */
+
+enum {
+    PORT080C_SCSIFUSE               = PREP_BIT(1),
+    PORT080C_L2_COPYBACK            = PREP_BIT(4),
+    PORT080C_L2_256                 = PREP_BIT(5),
+    PORT080C_UPGRADE_CPU            = PREP_BIT(6),
+    PORT080C_L2                     = PREP_BIT(7),
+};
+
+static uint32_t prep_port080c_read(void *opaque, uint32_t addr)
+{
+    PrepSystemIoState *s = opaque;
+    trace_prep_systemio_read(addr, s->equipment);
+    return s->equipment;
+}
+
+/* PORT 081C -- System Control Register (Read/Write) */
+
+enum {
+    PORT081C_FLOPPY_MOTOR_INHIBIT   = PREP_BIT(3),
+    PORT081C_MASK_TEA               = PREP_BIT(2),
+    PORT081C_L2_UPDATE_INHIBIT      = PREP_BIT(1),
+    PORT081C_L2_CACHEMISS_INHIBIT   = PREP_BIT(0),
+};
+
+static void prep_port081c_write(void *opaque, uint32_t addr, uint32_t val)
+{
+    static const uint8_t mask = PORT081C_FLOPPY_MOTOR_INHIBIT |
+                                PORT081C_MASK_TEA |
+                                PORT081C_L2_UPDATE_INHIBIT |
+                                PORT081C_L2_CACHEMISS_INHIBIT;
+    PrepSystemIoState *s = opaque;
+    trace_prep_systemio_write(addr, val);
+    s->system_control = val & mask;
+}
+
+static uint32_t prep_port081c_read(void *opaque, uint32_t addr)
+{
+    PrepSystemIoState *s = opaque;
+    trace_prep_systemio_read(addr, s->system_control);
+    return s->system_control;
+}
+
+/* System Board Identification */
+
+static uint32_t prep_port0852_read(void *opaque, uint32_t addr)
+{
+    PrepSystemIoState *s = opaque;
+    trace_prep_systemio_read(addr, s->ibm_planar_id);
+    return s->ibm_planar_id;
+}
+
+/* PORT 0850 -- I/O Map Type Register (Read/Write) */
+
+enum {
+    PORT0850_IOMAP_NONCONTIGUOUS    = PREP_BIT(7),
+};
+
+static uint32_t prep_port0850_read(void *opaque, uint32_t addr)
+{
+    PrepSystemIoState *s = opaque;
+    trace_prep_systemio_read(addr, s->iomap_type);
+    return s->iomap_type;
+}
+
+static void prep_port0850_write(void *opaque, uint32_t addr, uint32_t val)
+{
+    PrepSystemIoState *s = opaque;
+
+    trace_prep_systemio_write(addr, val);
+    qemu_set_irq(s->non_contiguous_io_map_irq,
+                 val & PORT0850_IOMAP_NONCONTIGUOUS);
+    s->iomap_type = val & PORT0850_IOMAP_NONCONTIGUOUS;
+}
+
+static const MemoryRegionPortio ppc_io800_port_list[] = {
+    { 0x092, 1, 1, .read = prep_port0092_read,
+                   .write = prep_port0092_write, },
+    { 0x808, 1, 1, .write = prep_port0808_write, },
+    { 0x80c, 1, 1, .read = prep_port080c_read, },
+    { 0x810, 1, 1, .write = prep_port0810_write, },
+    { 0x812, 1, 1, .write = prep_port0812_write, },
+    { 0x814, 1, 1, .write = prep_port0814_write, },
+    { 0x818, 1, 1, .read = prep_port0818_read },
+    { 0x81c, 1, 1, .read = prep_port081c_read,
+                   .write = prep_port081c_write, },
+    { 0x850, 1, 1, .read = prep_port0850_read,
+                   .write = prep_port0850_write, },
+    { 0x852, 1, 1, .read = prep_port0852_read, },
+    PORTIO_END_OF_LIST()
+};
+
+static uint64_t ppc_parity_error_readl(void *opaque, hwaddr addr,
+                                       unsigned int size)
+{
+    uint32_t val = 0;
+    trace_prep_systemio_read((unsigned int)addr, val);
+    return val;
+}
+
+static void ppc_parity_error_writel(void *opaque, hwaddr addr,
+                                    uint64_t data, unsigned size)
+{
+    qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid access\n", __func__);
+}
+
+static const MemoryRegionOps ppc_parity_error_ops = {
+    .read = ppc_parity_error_readl,
+    .write = ppc_parity_error_writel,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
+static void prep_systemio_realize(DeviceState *dev, Error **errp)
+{
+    ISADevice *isa = ISA_DEVICE(dev);
+    PrepSystemIoState *s = PREP_SYSTEMIO(dev);
+    PowerPCCPU *cpu;
+
+    qdev_init_gpio_out(dev, &s->non_contiguous_io_map_irq, 1);
+    s->iomap_type = PORT0850_IOMAP_NONCONTIGUOUS;
+    qemu_set_irq(s->non_contiguous_io_map_irq,
+                 s->iomap_type & PORT0850_IOMAP_NONCONTIGUOUS);
+    cpu = POWERPC_CPU(first_cpu);
+    s->softreset_irq = cpu->env.irq_inputs[PPC6xx_INPUT_HRESET];
+
+    isa_register_portio_list(isa, &s->portio, 0x0, ppc_io800_port_list, s,
+                             "systemio800");
+
+    memory_region_init_io(&s->ppc_parity_mem, OBJECT(dev),
+                          &ppc_parity_error_ops, s, "ppc-parity", 0x4);
+    memory_region_add_subregion(get_system_memory(), 0xbfffeff0,
+                                &s->ppc_parity_mem);
+}
+
+static const VMStateDescription vmstate_prep_systemio = {
+    .name = "prep_systemio",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT8(sreset, PrepSystemIoState),
+        VMSTATE_UINT8(system_control, PrepSystemIoState),
+        VMSTATE_UINT8(iomap_type, PrepSystemIoState),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static Property prep_systemio_properties[] = {
+    DEFINE_PROP_UINT8("ibm-planar-id", PrepSystemIoState, ibm_planar_id, 0),
+    DEFINE_PROP_UINT8("equipment", PrepSystemIoState, equipment, 0),
+    DEFINE_PROP_END_OF_LIST()
+};
+
+static void prep_systemio_class_initfn(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->realize = prep_systemio_realize;
+    dc->vmsd = &vmstate_prep_systemio;
+    device_class_set_props(dc, prep_systemio_properties);
+}
+
+static TypeInfo prep_systemio800_info = {
+    .name          = TYPE_PREP_SYSTEMIO,
+    .parent        = TYPE_ISA_DEVICE,
+    .instance_size = sizeof(PrepSystemIoState),
+    .class_init    = prep_systemio_class_initfn,
+};
+
+static void prep_systemio_register_types(void)
+{
+    type_register_static(&prep_systemio800_info);
+}
+
+type_init(prep_systemio_register_types)
diff --git a/hw/ppc/rs6000_mc.c b/hw/ppc/rs6000_mc.c
new file mode 100644
index 000000000..c0bc212e9
--- /dev/null
+++ b/hw/ppc/rs6000_mc.c
@@ -0,0 +1,238 @@
+/*
+ * QEMU RS/6000 memory controller
+ *
+ * Copyright (c) 2017 Hervé Poussineau
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) version 3 or any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include "hw/isa/isa.h"
+#include "hw/qdev-properties.h"
+#include "migration/vmstate.h"
+#include "exec/address-spaces.h"
+#include "qapi/error.h"
+#include "trace.h"
+#include "qom/object.h"
+
+#define TYPE_RS6000MC "rs6000-mc"
+OBJECT_DECLARE_SIMPLE_TYPE(RS6000MCState, RS6000MC)
+
+struct RS6000MCState {
+    ISADevice parent_obj;
+    /* see US patent 5,684,979 for details (expired 2001-11-04) */
+    uint32_t ram_size;
+    bool autoconfigure;
+    MemoryRegion simm[6];
+    unsigned int simm_size[6];
+    uint32_t end_address[8];
+    uint8_t port0820_index;
+    PortioList portio;
+};
+
+/* P0RT 0803 -- SIMM ID Register (32/8 MB) (Read Only) */
+
+static uint32_t rs6000mc_port0803_read(void *opaque, uint32_t addr)
+{
+    RS6000MCState *s = opaque;
+    uint32_t val = 0;
+    int socket;
+
+    /* (1 << socket) indicates 32 MB SIMM at given socket */
+    for (socket = 0; socket < 6; socket++) {
+        if (s->simm_size[socket] == 32) {
+            val |= (1 << socket);
+        }
+    }
+
+    trace_rs6000mc_id_read(addr, val);
+    return val;
+}
+
+/* PORT 0804 -- SIMM Presence Register (Read Only) */
+
+static uint32_t rs6000mc_port0804_read(void *opaque, uint32_t addr)
+{
+    RS6000MCState *s = opaque;
+    uint32_t val = 0xff;
+    int socket;
+
+    /* (1 << socket) indicates SIMM absence at given socket */
+    for (socket = 0; socket < 6; socket++) {
+        if (s->simm_size[socket]) {
+            val &= ~(1 << socket);
+        }
+    }
+    s->port0820_index = 0;
+
+    trace_rs6000mc_presence_read(addr, val);
+    return val;
+}
+
+/* Memory Controller Size Programming Register */
+
+static uint32_t rs6000mc_port0820_read(void *opaque, uint32_t addr)
+{
+    RS6000MCState *s = opaque;
+    uint32_t val = s->end_address[s->port0820_index] & 0x1f;
+    s->port0820_index = (s->port0820_index + 1) & 7;
+    trace_rs6000mc_size_read(addr, val);
+    return val;
+}
+
+static void rs6000mc_port0820_write(void *opaque, uint32_t addr, uint32_t val)
+{
+    RS6000MCState *s = opaque;
+    uint8_t socket = val >> 5;
+    uint32_t end_address = val & 0x1f;
+
+    trace_rs6000mc_size_write(addr, val);
+    s->end_address[socket] = end_address;
+    if (socket > 0 && socket < 7) {
+        if (s->simm_size[socket - 1]) {
+            uint32_t size;
+            uint32_t start_address = 0;
+            if (socket > 1) {
+                start_address = s->end_address[socket - 1];
+            }
+
+            size = end_address - start_address;
+            memory_region_set_enabled(&s->simm[socket - 1], size != 0);
+            memory_region_set_address(&s->simm[socket - 1],
+                                      start_address * 8 * MiB);
+        }
+    }
+}
+
+/* Read Memory Parity Error */
+
+enum {
+    PORT0841_NO_ERROR_DETECTED = 0x01,
+};
+
+static uint32_t rs6000mc_port0841_read(void *opaque, uint32_t addr)
+{
+    uint32_t val = PORT0841_NO_ERROR_DETECTED;
+    trace_rs6000mc_parity_read(addr, val);
+    return val;
+}
+
+static const MemoryRegionPortio rs6000mc_port_list[] = {
+    { 0x803, 1, 1, .read = rs6000mc_port0803_read },
+    { 0x804, 1, 1, .read = rs6000mc_port0804_read },
+    { 0x820, 1, 1, .read = rs6000mc_port0820_read,
+                   .write = rs6000mc_port0820_write, },
+    { 0x841, 1, 1, .read = rs6000mc_port0841_read },
+    PORTIO_END_OF_LIST()
+};
+
+static void rs6000mc_realize(DeviceState *dev, Error **errp)
+{
+    RS6000MCState *s = RS6000MC(dev);
+    int socket = 0;
+    unsigned int ram_size = s->ram_size / MiB;
+    Error *local_err = NULL;
+
+    while (socket < 6) {
+        if (ram_size >= 64) {
+            s->simm_size[socket] = 32;
+            s->simm_size[socket + 1] = 32;
+            ram_size -= 64;
+        } else if (ram_size >= 16) {
+            s->simm_size[socket] = 8;
+            s->simm_size[socket + 1] = 8;
+            ram_size -= 16;
+        } else {
+            /* Not enough memory */
+            break;
+        }
+        socket += 2;
+    }
+
+    for (socket = 0; socket < 6; socket++) {
+        if (s->simm_size[socket]) {
+            char name[] = "simm.?";
+            name[5] = socket + '0';
+            memory_region_init_ram(&s->simm[socket], OBJECT(dev), name,
+                                   s->simm_size[socket] * MiB, &local_err);
+            if (local_err) {
+                error_propagate(errp, local_err);
+                return;
+            }
+            memory_region_add_subregion_overlap(get_system_memory(), 0,
+                                                &s->simm[socket], socket);
+        }
+    }
+    if (ram_size) {
+        /* unable to push all requested RAM in SIMMs */
+        error_setg(errp, "RAM size incompatible with this board. "
+                   "Try again with something else, like %" PRId64 " MB",
+                   s->ram_size / MiB - ram_size);
+        return;
+    }
+
+    if (s->autoconfigure) {
+        uint32_t start_address = 0;
+        for (socket = 0; socket < 6; socket++) {
+            if (s->simm_size[socket]) {
+                memory_region_set_enabled(&s->simm[socket], true);
+                memory_region_set_address(&s->simm[socket], start_address);
+                start_address += memory_region_size(&s->simm[socket]);
+            }
+        }
+    }
+
+    isa_register_portio_list(ISA_DEVICE(dev), &s->portio, 0x0,
+                             rs6000mc_port_list, s, "rs6000mc");
+}
+
+static const VMStateDescription vmstate_rs6000mc = {
+    .name = "rs6000-mc",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT8(port0820_index, RS6000MCState),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static Property rs6000mc_properties[] = {
+    DEFINE_PROP_UINT32("ram-size", RS6000MCState, ram_size, 0),
+    DEFINE_PROP_BOOL("auto-configure", RS6000MCState, autoconfigure, true),
+    DEFINE_PROP_END_OF_LIST()
+};
+
+static void rs6000mc_class_initfn(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->realize = rs6000mc_realize;
+    dc->vmsd = &vmstate_rs6000mc;
+    device_class_set_props(dc, rs6000mc_properties);
+}
+
+static const TypeInfo rs6000mc_info = {
+    .name          = TYPE_RS6000MC,
+    .parent        = TYPE_ISA_DEVICE,
+    .instance_size = sizeof(RS6000MCState),
+    .class_init    = rs6000mc_class_initfn,
+};
+
+static void rs6000mc_types(void)
+{
+    type_register_static(&rs6000mc_info);
+}
+
+type_init(rs6000mc_types)
diff --git a/hw/ppc/sam460ex.c b/hw/ppc/sam460ex.c
new file mode 100644
index 000000000..0737234d6
--- /dev/null
+++ b/hw/ppc/sam460ex.c
@@ -0,0 +1,516 @@
+/*
+ * QEMU aCube Sam460ex board emulation
+ *
+ * Copyright (c) 2012 François Revol
+ * Copyright (c) 2016-2019 BALATON Zoltan
+ *
+ * This file is derived from hw/ppc440_bamboo.c,
+ * the copyright for that material belongs to the original owners.
+ *
+ * This work is licensed under the GNU GPL license version 2 or later.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/units.h"
+#include "qemu-common.h"
+#include "qemu/datadir.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "hw/boards.h"
+#include "sysemu/kvm.h"
+#include "kvm_ppc.h"
+#include "sysemu/device_tree.h"
+#include "sysemu/block-backend.h"
+#include "hw/loader.h"
+#include "elf.h"
+#include "exec/memory.h"
+#include "ppc440.h"
+#include "ppc405.h"
+#include "hw/block/flash.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/reset.h"
+#include "hw/sysbus.h"
+#include "hw/char/serial.h"
+#include "hw/i2c/ppc4xx_i2c.h"
+#include "hw/i2c/smbus_eeprom.h"
+#include "hw/usb/hcd-ehci.h"
+#include "hw/ppc/fdt.h"
+#include "hw/qdev-properties.h"
+#include "hw/intc/ppc-uic.h"
+
+#include <libfdt.h>
+
+#define BINARY_DEVICE_TREE_FILE "canyonlands.dtb"
+#define UBOOT_FILENAME "u-boot-sam460-20100605.bin"
+/* to extract the official U-Boot bin from the updater: */
+/* dd bs=1 skip=$(($(stat -c '%s' updater/updater-460) - 0x80000)) \
+     if=updater/updater-460 of=u-boot-sam460-20100605.bin */
+
+/* from Sam460 U-Boot include/configs/Sam460ex.h */
+#define FLASH_BASE             0xfff00000
+#define FLASH_BASE_H           0x4
+#define FLASH_SIZE             (1 * MiB)
+#define UBOOT_LOAD_BASE        0xfff80000
+#define UBOOT_SIZE             0x00080000
+#define UBOOT_ENTRY            0xfffffffc
+
+/* from U-Boot */
+#define EPAPR_MAGIC           (0x45504150)
+#define KERNEL_ADDR           0x1000000
+#define FDT_ADDR              0x1800000
+#define RAMDISK_ADDR          0x1900000
+
+/* Sam460ex IRQ MAP:
+   IRQ0  = ETH_INT
+   IRQ1  = FPGA_INT
+   IRQ2  = PCI_INT (PCIA, PCIB, PCIC, PCIB)
+   IRQ3  = FPGA_INT2
+   IRQ11 = RTC_INT
+   IRQ12 = SM502_INT
+*/
+
+#define CPU_FREQ 1150000000
+#define PLB_FREQ 230000000
+#define OPB_FREQ 115000000
+#define EBC_FREQ 115000000
+#define UART_FREQ 11059200
+#define SDRAM_NR_BANKS 4
+
+/* The SoC could also handle 4 GiB but firmware does not work with that. */
+/* Maybe it overflows a signed 32 bit number somewhere? */
+static const ram_addr_t ppc460ex_sdram_bank_sizes[] = {
+    2 * GiB, 1 * GiB, 512 * MiB, 256 * MiB, 128 * MiB, 64 * MiB,
+    32 * MiB, 0
+};
+
+struct boot_info {
+    uint32_t dt_base;
+    uint32_t dt_size;
+    uint32_t entry;
+};
+
+static int sam460ex_load_uboot(void)
+{
+    /*
+     * This first creates 1MiB of flash memory mapped at the end of
+     * the 32-bit address space (0xFFF00000..0xFFFFFFFF).
+     *
+     * If_PFLASH unit 0 is defined, the flash memory is initialized
+     * from that block backend.
+     *
+     * Else, it's initialized to zero.  And then 512KiB of ROM get
+     * mapped on top of its second half (0xFFF80000..0xFFFFFFFF),
+     * initialized from u-boot-sam460-20100605.bin.
+     *
+     * This doesn't smell right.
+     *
+     * The physical hardware appears to have 512KiB flash memory.
+     *
+     * TODO Figure out what we really need here, and clean this up.
+     */
+
+    DriveInfo *dinfo;
+
+    dinfo = drive_get(IF_PFLASH, 0, 0);
+    if (!pflash_cfi01_register(FLASH_BASE | ((hwaddr)FLASH_BASE_H << 32),
+                               "sam460ex.flash", FLASH_SIZE,
+                               dinfo ? blk_by_legacy_dinfo(dinfo) : NULL,
+                               64 * KiB, 1, 0x89, 0x18, 0x0000, 0x0, 1)) {
+        error_report("Error registering flash memory");
+        /* XXX: return an error instead? */
+        exit(1);
+    }
+
+    if (!dinfo) {
+        /*error_report("No flash image given with the 'pflash' parameter,"
+                " using default u-boot image");*/
+        rom_add_file_fixed(UBOOT_FILENAME,
+                           UBOOT_LOAD_BASE | ((hwaddr)FLASH_BASE_H << 32),
+                           -1);
+    }
+
+    return 0;
+}
+
+static int sam460ex_load_device_tree(hwaddr addr,
+                                     uint32_t ramsize,
+                                     hwaddr initrd_base,
+                                     hwaddr initrd_size,
+                                     const char *kernel_cmdline)
+{
+    uint32_t mem_reg_property[] = { 0, 0, cpu_to_be32(ramsize) };
+    char *filename;
+    int fdt_size;
+    void *fdt;
+    uint32_t tb_freq = CPU_FREQ;
+    uint32_t clock_freq = CPU_FREQ;
+    int offset;
+
+    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, BINARY_DEVICE_TREE_FILE);
+    if (!filename) {
+        error_report("Couldn't find dtb file `%s'", BINARY_DEVICE_TREE_FILE);
+        exit(1);
+    }
+    fdt = load_device_tree(filename, &fdt_size);
+    if (!fdt) {
+        error_report("Couldn't load dtb file `%s'", filename);
+        g_free(filename);
+        exit(1);
+    }
+    g_free(filename);
+
+    /* Manipulate device tree in memory. */
+
+    qemu_fdt_setprop(fdt, "/memory", "reg", mem_reg_property,
+                     sizeof(mem_reg_property));
+
+    /* default FDT doesn't have a /chosen node... */
+    qemu_fdt_add_subnode(fdt, "/chosen");
+
+    qemu_fdt_setprop_cell(fdt, "/chosen", "linux,initrd-start", initrd_base);
+
+    qemu_fdt_setprop_cell(fdt, "/chosen", "linux,initrd-end",
+                          (initrd_base + initrd_size));
+
+    qemu_fdt_setprop_string(fdt, "/chosen", "bootargs", kernel_cmdline);
+
+    /* Copy data from the host device tree into the guest. Since the guest can
+     * directly access the timebase without host involvement, we must expose
+     * the correct frequencies. */
+    if (kvm_enabled()) {
+        tb_freq = kvmppc_get_tbfreq();
+        clock_freq = kvmppc_get_clockfreq();
+    }
+
+    qemu_fdt_setprop_cell(fdt, "/cpus/cpu@0", "clock-frequency",
+                              clock_freq);
+    qemu_fdt_setprop_cell(fdt, "/cpus/cpu@0", "timebase-frequency",
+                              tb_freq);
+
+    /* Remove cpm node if it exists (it is not emulated) */
+    offset = fdt_path_offset(fdt, "/cpm");
+    if (offset >= 0) {
+        _FDT(fdt_nop_node(fdt, offset));
+    }
+
+    /* set serial port clocks */
+    offset = fdt_node_offset_by_compatible(fdt, -1, "ns16550");
+    while (offset >= 0) {
+        _FDT(fdt_setprop_cell(fdt, offset, "clock-frequency", UART_FREQ));
+        offset = fdt_node_offset_by_compatible(fdt, offset, "ns16550");
+    }
+
+    /* some more clocks */
+    qemu_fdt_setprop_cell(fdt, "/plb", "clock-frequency",
+                              PLB_FREQ);
+    qemu_fdt_setprop_cell(fdt, "/plb/opb", "clock-frequency",
+                              OPB_FREQ);
+    qemu_fdt_setprop_cell(fdt, "/plb/opb/ebc", "clock-frequency",
+                              EBC_FREQ);
+
+    rom_add_blob_fixed(BINARY_DEVICE_TREE_FILE, fdt, fdt_size, addr);
+    g_free(fdt);
+
+    return fdt_size;
+}
+
+/* Create reset TLB entries for BookE, mapping only the flash memory.  */
+static void mmubooke_create_initial_mapping_uboot(CPUPPCState *env)
+{
+    ppcemb_tlb_t *tlb = &env->tlb.tlbe[0];
+
+    /* on reset the flash is mapped by a shadow TLB,
+     * but since we don't implement them we need to use
+     * the same values U-Boot will use to avoid a fault.
+     */
+    tlb->attr = 0;
+    tlb->prot = PAGE_VALID | ((PAGE_READ | PAGE_WRITE | PAGE_EXEC) << 4);
+    tlb->size = 0x10000000; /* up to 0xffffffff  */
+    tlb->EPN = 0xf0000000 & TARGET_PAGE_MASK;
+    tlb->RPN = (0xf0000000 & TARGET_PAGE_MASK) | 0x4;
+    tlb->PID = 0;
+}
+
+/* Create reset TLB entries for BookE, spanning the 32bit addr space.  */
+static void mmubooke_create_initial_mapping(CPUPPCState *env,
+                                     target_ulong va,
+                                     hwaddr pa)
+{
+    ppcemb_tlb_t *tlb = &env->tlb.tlbe[0];
+
+    tlb->attr = 0;
+    tlb->prot = PAGE_VALID | ((PAGE_READ | PAGE_WRITE | PAGE_EXEC) << 4);
+    tlb->size = 1 << 31; /* up to 0x80000000  */
+    tlb->EPN = va & TARGET_PAGE_MASK;
+    tlb->RPN = pa & TARGET_PAGE_MASK;
+    tlb->PID = 0;
+}
+
+static void main_cpu_reset(void *opaque)
+{
+    PowerPCCPU *cpu = opaque;
+    CPUPPCState *env = &cpu->env;
+    struct boot_info *bi = env->load_info;
+
+    cpu_reset(CPU(cpu));
+
+    /* either we have a kernel to boot or we jump to U-Boot */
+    if (bi->entry != UBOOT_ENTRY) {
+        env->gpr[1] = (16 * MiB) - 8;
+        env->gpr[3] = FDT_ADDR;
+        env->nip = bi->entry;
+
+        /* Create a mapping for the kernel.  */
+        mmubooke_create_initial_mapping(env, 0, 0);
+        env->gpr[6] = tswap32(EPAPR_MAGIC);
+        env->gpr[7] = (16 * MiB) - 8; /* bi->ima_size; */
+
+    } else {
+        env->nip = UBOOT_ENTRY;
+        mmubooke_create_initial_mapping_uboot(env);
+    }
+}
+
+static void sam460ex_init(MachineState *machine)
+{
+    MemoryRegion *address_space_mem = get_system_memory();
+    MemoryRegion *isa = g_new(MemoryRegion, 1);
+    MemoryRegion *ram_memories = g_new(MemoryRegion, SDRAM_NR_BANKS);
+    hwaddr ram_bases[SDRAM_NR_BANKS] = {0};
+    hwaddr ram_sizes[SDRAM_NR_BANKS] = {0};
+    MemoryRegion *l2cache_ram = g_new(MemoryRegion, 1);
+    DeviceState *uic[4];
+    qemu_irq mal_irqs[4];
+    int i;
+    PCIBus *pci_bus;
+    PowerPCCPU *cpu;
+    CPUPPCState *env;
+    I2CBus *i2c;
+    hwaddr entry = UBOOT_ENTRY;
+    target_long initrd_size = 0;
+    DeviceState *dev;
+    SysBusDevice *sbdev;
+    struct boot_info *boot_info;
+    uint8_t *spd_data;
+    int success;
+
+    cpu = POWERPC_CPU(cpu_create(machine->cpu_type));
+    env = &cpu->env;
+    if (env->mmu_model != POWERPC_MMU_BOOKE) {
+        error_report("Only MMU model BookE is supported by this machine.");
+        exit(1);
+    }
+
+    qemu_register_reset(main_cpu_reset, cpu);
+    boot_info = g_malloc0(sizeof(*boot_info));
+    env->load_info = boot_info;
+
+    ppc_booke_timers_init(cpu, CPU_FREQ, 0);
+    ppc_dcr_init(env, NULL, NULL);
+
+    /* PLB arbitrer */
+    ppc4xx_plb_init(env);
+
+    /* interrupt controllers */
+    for (i = 0; i < ARRAY_SIZE(uic); i++) {
+        SysBusDevice *sbd;
+        /*
+         * UICs 1, 2 and 3 are cascaded through UIC 0.
+         * input_ints[n] is the interrupt number on UIC 0 which
+         * the INT output of UIC n is connected to. The CINT output
+         * of UIC n connects to input_ints[n] + 1.
+         * The entry in input_ints[] for UIC 0 is ignored, because UIC 0's
+         * INT and CINT outputs are connected to the CPU.
+         */
+        const int input_ints[] = { -1, 30, 10, 16 };
+
+        uic[i] = qdev_new(TYPE_PPC_UIC);
+        sbd = SYS_BUS_DEVICE(uic[i]);
+
+        qdev_prop_set_uint32(uic[i], "dcr-base", 0xc0 + i * 0x10);
+        object_property_set_link(OBJECT(uic[i]), "cpu", OBJECT(cpu),
+                                 &error_fatal);
+        sysbus_realize_and_unref(sbd, &error_fatal);
+
+        if (i == 0) {
+            sysbus_connect_irq(sbd, PPCUIC_OUTPUT_INT,
+                               ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_INT]);
+            sysbus_connect_irq(sbd, PPCUIC_OUTPUT_CINT,
+                               ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_CINT]);
+        } else {
+            sysbus_connect_irq(sbd, PPCUIC_OUTPUT_INT,
+                               qdev_get_gpio_in(uic[0], input_ints[i]));
+            sysbus_connect_irq(sbd, PPCUIC_OUTPUT_CINT,
+                               qdev_get_gpio_in(uic[0], input_ints[i] + 1));
+        }
+    }
+
+    /* SDRAM controller */
+    /* put all RAM on first bank because board has one slot
+     * and firmware only checks that */
+    ppc4xx_sdram_banks(machine->ram, 1, ram_memories, ram_bases, ram_sizes,
+                       ppc460ex_sdram_bank_sizes);
+
+    /* FIXME: does 460EX have ECC interrupts? */
+    ppc440_sdram_init(env, SDRAM_NR_BANKS, ram_memories,
+                      ram_bases, ram_sizes, 1);
+
+    /* IIC controllers and devices */
+    dev = sysbus_create_simple(TYPE_PPC4xx_I2C, 0x4ef600700,
+                               qdev_get_gpio_in(uic[0], 2));
+    i2c = PPC4xx_I2C(dev)->bus;
+    /* SPD EEPROM on RAM module */
+    spd_data = spd_data_generate(ram_sizes[0] < 128 * MiB ? DDR : DDR2,
+                                 ram_sizes[0]);
+    spd_data[20] = 4; /* SO-DIMM module */
+    smbus_eeprom_init_one(i2c, 0x50, spd_data);
+    /* RTC */
+    i2c_slave_create_simple(i2c, "m41t80", 0x68);
+
+    dev = sysbus_create_simple(TYPE_PPC4xx_I2C, 0x4ef600800,
+                               qdev_get_gpio_in(uic[0], 3));
+
+    /* External bus controller */
+    ppc405_ebc_init(env);
+
+    /* CPR */
+    ppc4xx_cpr_init(env);
+
+    /* PLB to AHB bridge */
+    ppc4xx_ahb_init(env);
+
+    /* System DCRs */
+    ppc4xx_sdr_init(env);
+
+    /* MAL */
+    for (i = 0; i < ARRAY_SIZE(mal_irqs); i++) {
+        mal_irqs[0] = qdev_get_gpio_in(uic[2], 3 + i);
+    }
+    ppc4xx_mal_init(env, 4, 16, mal_irqs);
+
+    /* DMA */
+    ppc4xx_dma_init(env, 0x200);
+
+    /* 256K of L2 cache as memory */
+    ppc4xx_l2sram_init(env);
+    /* FIXME: remove this after fixing l2sram mapping in ppc440_uc.c? */
+    memory_region_init_ram(l2cache_ram, NULL, "ppc440.l2cache_ram", 256 * KiB,
+                           &error_abort);
+    memory_region_add_subregion(address_space_mem, 0x400000000LL, l2cache_ram);
+
+    /* USB */
+    sysbus_create_simple(TYPE_PPC4xx_EHCI, 0x4bffd0400,
+                         qdev_get_gpio_in(uic[2], 29));
+    dev = qdev_new("sysbus-ohci");
+    qdev_prop_set_string(dev, "masterbus", "usb-bus.0");
+    qdev_prop_set_uint32(dev, "num-ports", 6);
+    sbdev = SYS_BUS_DEVICE(dev);
+    sysbus_realize_and_unref(sbdev, &error_fatal);
+    sysbus_mmio_map(sbdev, 0, 0x4bffd0000);
+    sysbus_connect_irq(sbdev, 0, qdev_get_gpio_in(uic[2], 30));
+    usb_create_simple(usb_bus_find(-1), "usb-kbd");
+    usb_create_simple(usb_bus_find(-1), "usb-mouse");
+
+    /* PCI bus */
+    ppc460ex_pcie_init(env);
+    /* All PCI irqs are connected to the same UIC pin (cf. UBoot source) */
+    dev = sysbus_create_simple("ppc440-pcix-host", 0xc0ec00000,
+                               qdev_get_gpio_in(uic[1], 0));
+    pci_bus = PCI_BUS(qdev_get_child_bus(dev, "pci.0"));
+
+    memory_region_init_alias(isa, NULL, "isa_mmio", get_system_io(),
+                             0, 0x10000);
+    memory_region_add_subregion(get_system_memory(), 0xc08000000, isa);
+
+    /* PCI devices */
+    pci_create_simple(pci_bus, PCI_DEVFN(6, 0), "sm501");
+    /* SoC has a single SATA port but we don't emulate that yet
+     * However, firmware and usual clients have driver for SiI311x
+     * so add one for convenience by default */
+    if (defaults_enabled()) {
+        pci_create_simple(pci_bus, -1, "sii3112");
+    }
+
+    /* SoC has 4 UARTs
+     * but board has only one wired and two are present in fdt */
+    if (serial_hd(0) != NULL) {
+        serial_mm_init(address_space_mem, 0x4ef600300, 0,
+                       qdev_get_gpio_in(uic[1], 1),
+                       PPC_SERIAL_MM_BAUDBASE, serial_hd(0),
+                       DEVICE_BIG_ENDIAN);
+    }
+    if (serial_hd(1) != NULL) {
+        serial_mm_init(address_space_mem, 0x4ef600400, 0,
+                       qdev_get_gpio_in(uic[0], 1),
+                       PPC_SERIAL_MM_BAUDBASE, serial_hd(1),
+                       DEVICE_BIG_ENDIAN);
+    }
+
+    /* Load U-Boot image. */
+    if (!machine->kernel_filename) {
+        success = sam460ex_load_uboot();
+        if (success < 0) {
+            error_report("could not load firmware");
+            exit(1);
+        }
+    }
+
+    /* Load kernel. */
+    if (machine->kernel_filename) {
+        hwaddr loadaddr = LOAD_UIMAGE_LOADADDR_INVALID;
+        success = load_uimage(machine->kernel_filename, &entry, &loadaddr,
+                              NULL, NULL, NULL);
+        if (success < 0) {
+            uint64_t elf_entry;
+
+            success = load_elf(machine->kernel_filename, NULL, NULL, NULL,
+                               &elf_entry, NULL, NULL, NULL,
+                               1, PPC_ELF_MACHINE, 0, 0);
+            entry = elf_entry;
+        }
+        /* XXX try again as binary */
+        if (success < 0) {
+            error_report("could not load kernel '%s'",
+                    machine->kernel_filename);
+            exit(1);
+        }
+    }
+
+    /* Load initrd. */
+    if (machine->initrd_filename) {
+        initrd_size = load_image_targphys(machine->initrd_filename,
+                                          RAMDISK_ADDR,
+                                          machine->ram_size - RAMDISK_ADDR);
+        if (initrd_size < 0) {
+            error_report("could not load ram disk '%s' at %x",
+                    machine->initrd_filename, RAMDISK_ADDR);
+            exit(1);
+        }
+    }
+
+    /* If we're loading a kernel directly, we must load the device tree too. */
+    if (machine->kernel_filename) {
+        int dt_size;
+
+        dt_size = sam460ex_load_device_tree(FDT_ADDR, machine->ram_size,
+                                    RAMDISK_ADDR, initrd_size,
+                                    machine->kernel_cmdline);
+
+        boot_info->dt_base = FDT_ADDR;
+        boot_info->dt_size = dt_size;
+    }
+
+    boot_info->entry = entry;
+}
+
+static void sam460ex_machine_init(MachineClass *mc)
+{
+    mc->desc = "aCube Sam460ex";
+    mc->init = sam460ex_init;
+    mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("460exb");
+    mc->default_ram_size = 512 * MiB;
+    mc->default_ram_id = "ppc4xx.sdram";
+}
+
+DEFINE_MACHINE("sam460ex", sam460ex_machine_init)
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
new file mode 100644
index 000000000..3b5fd749b
--- /dev/null
+++ b/hw/ppc/spapr.c
@@ -0,0 +1,5136 @@
+/*
+ * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator
+ *
+ * Copyright (c) 2004-2007 Fabrice Bellard
+ * Copyright (c) 2007 Jocelyn Mayer
+ * Copyright (c) 2010 David Gibson, IBM Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/datadir.h"
+#include "qapi/error.h"
+#include "qapi/qapi-events-machine.h"
+#include "qapi/qapi-events-qdev.h"
+#include "qapi/visitor.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/hostmem.h"
+#include "sysemu/numa.h"
+#include "sysemu/qtest.h"
+#include "sysemu/reset.h"
+#include "sysemu/runstate.h"
+#include "qemu/log.h"
+#include "hw/fw-path-provider.h"
+#include "elf.h"
+#include "net/net.h"
+#include "sysemu/device_tree.h"
+#include "sysemu/cpus.h"
+#include "sysemu/hw_accel.h"
+#include "kvm_ppc.h"
+#include "migration/misc.h"
+#include "migration/qemu-file-types.h"
+#include "migration/global_state.h"
+#include "migration/register.h"
+#include "migration/blocker.h"
+#include "mmu-hash64.h"
+#include "mmu-book3s-v3.h"
+#include "cpu-models.h"
+#include "hw/core/cpu.h"
+
+#include "hw/ppc/ppc.h"
+#include "hw/loader.h"
+
+#include "hw/ppc/fdt.h"
+#include "hw/ppc/spapr.h"
+#include "hw/ppc/spapr_vio.h"
+#include "hw/qdev-properties.h"
+#include "hw/pci-host/spapr.h"
+#include "hw/pci/msi.h"
+
+#include "hw/pci/pci.h"
+#include "hw/scsi/scsi.h"
+#include "hw/virtio/virtio-scsi.h"
+#include "hw/virtio/vhost-scsi-common.h"
+
+#include "exec/ram_addr.h"
+#include "hw/usb.h"
+#include "qemu/config-file.h"
+#include "qemu/error-report.h"
+#include "trace.h"
+#include "hw/nmi.h"
+#include "hw/intc/intc.h"
+
+#include "hw/ppc/spapr_cpu_core.h"
+#include "hw/mem/memory-device.h"
+#include "hw/ppc/spapr_tpm_proxy.h"
+#include "hw/ppc/spapr_nvdimm.h"
+#include "hw/ppc/spapr_numa.h"
+#include "hw/ppc/pef.h"
+
+#include "monitor/monitor.h"
+
+#include <libfdt.h>
+
+/* SLOF memory layout:
+ *
+ * SLOF raw image loaded at 0, copies its romfs right below the flat
+ * device-tree, then position SLOF itself 31M below that
+ *
+ * So we set FW_OVERHEAD to 40MB which should account for all of that
+ * and more
+ *
+ * We load our kernel at 4M, leaving space for SLOF initial image
+ */
+#define FDT_MAX_ADDR            0x80000000 /* FDT must stay below that */
+#define FW_MAX_SIZE             0x400000
+#define FW_FILE_NAME            "slof.bin"
+#define FW_FILE_NAME_VOF        "vof.bin"
+#define FW_OVERHEAD             0x2800000
+#define KERNEL_LOAD_ADDR        FW_MAX_SIZE
+
+#define MIN_RMA_SLOF            (128 * MiB)
+
+#define PHANDLE_INTC            0x00001111
+
+/* These two functions implement the VCPU id numbering: one to compute them
+ * all and one to identify thread 0 of a VCORE. Any change to the first one
+ * is likely to have an impact on the second one, so let's keep them close.
+ */
+static int spapr_vcpu_id(SpaprMachineState *spapr, int cpu_index)
+{
+    MachineState *ms = MACHINE(spapr);
+    unsigned int smp_threads = ms->smp.threads;
+
+    assert(spapr->vsmt);
+    return
+        (cpu_index / smp_threads) * spapr->vsmt + cpu_index % smp_threads;
+}
+static bool spapr_is_thread0_in_vcore(SpaprMachineState *spapr,
+                                      PowerPCCPU *cpu)
+{
+    assert(spapr->vsmt);
+    return spapr_get_vcpu_id(cpu) % spapr->vsmt == 0;
+}
+
+static bool pre_2_10_vmstate_dummy_icp_needed(void *opaque)
+{
+    /* Dummy entries correspond to unused ICPState objects in older QEMUs,
+     * and newer QEMUs don't even have them. In both cases, we don't want
+     * to send anything on the wire.
+     */
+    return false;
+}
+
+static const VMStateDescription pre_2_10_vmstate_dummy_icp = {
+    .name = "icp/server",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = pre_2_10_vmstate_dummy_icp_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_UNUSED(4), /* uint32_t xirr */
+        VMSTATE_UNUSED(1), /* uint8_t pending_priority */
+        VMSTATE_UNUSED(1), /* uint8_t mfrr */
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static void pre_2_10_vmstate_register_dummy_icp(int i)
+{
+    vmstate_register(NULL, i, &pre_2_10_vmstate_dummy_icp,
+                     (void *)(uintptr_t) i);
+}
+
+static void pre_2_10_vmstate_unregister_dummy_icp(int i)
+{
+    vmstate_unregister(NULL, &pre_2_10_vmstate_dummy_icp,
+                       (void *)(uintptr_t) i);
+}
+
+int spapr_max_server_number(SpaprMachineState *spapr)
+{
+    MachineState *ms = MACHINE(spapr);
+
+    assert(spapr->vsmt);
+    return DIV_ROUND_UP(ms->smp.max_cpus * spapr->vsmt, ms->smp.threads);
+}
+
+static int spapr_fixup_cpu_smt_dt(void *fdt, int offset, PowerPCCPU *cpu,
+                                  int smt_threads)
+{
+    int i, ret = 0;
+    uint32_t servers_prop[smt_threads];
+    uint32_t gservers_prop[smt_threads * 2];
+    int index = spapr_get_vcpu_id(cpu);
+
+    if (cpu->compat_pvr) {
+        ret = fdt_setprop_cell(fdt, offset, "cpu-version", cpu->compat_pvr);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    /* Build interrupt servers and gservers properties */
+    for (i = 0; i < smt_threads; i++) {
+        servers_prop[i] = cpu_to_be32(index + i);
+        /* Hack, direct the group queues back to cpu 0 */
+        gservers_prop[i*2] = cpu_to_be32(index + i);
+        gservers_prop[i*2 + 1] = 0;
+    }
+    ret = fdt_setprop(fdt, offset, "ibm,ppc-interrupt-server#s",
+                      servers_prop, sizeof(servers_prop));
+    if (ret < 0) {
+        return ret;
+    }
+    ret = fdt_setprop(fdt, offset, "ibm,ppc-interrupt-gserver#s",
+                      gservers_prop, sizeof(gservers_prop));
+
+    return ret;
+}
+
+static void spapr_dt_pa_features(SpaprMachineState *spapr,
+                                 PowerPCCPU *cpu,
+                                 void *fdt, int offset)
+{
+    uint8_t pa_features_206[] = { 6, 0,
+        0xf6, 0x1f, 0xc7, 0x00, 0x80, 0xc0 };
+    uint8_t pa_features_207[] = { 24, 0,
+        0xf6, 0x1f, 0xc7, 0xc0, 0x80, 0xf0,
+        0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x80, 0x00,
+        0x80, 0x00, 0x80, 0x00, 0x00, 0x00 };
+    uint8_t pa_features_300[] = { 66, 0,
+        /* 0: MMU|FPU|SLB|RUN|DABR|NX, 1: fri[nzpm]|DABRX|SPRG3|SLB0|PP110 */
+        /* 2: VPM|DS205|PPR|DS202|DS206, 3: LSD|URG, SSO, 5: LE|CFAR|EB|LSQ */
+        0xf6, 0x1f, 0xc7, 0xc0, 0x80, 0xf0, /* 0 - 5 */
+        /* 6: DS207 */
+        0x80, 0x00, 0x00, 0x00, 0x00, 0x00, /* 6 - 11 */
+        /* 16: Vector */
+        0x00, 0x00, 0x00, 0x00, 0x80, 0x00, /* 12 - 17 */
+        /* 18: Vec. Scalar, 20: Vec. XOR, 22: HTM */
+        0x80, 0x00, 0x80, 0x00, 0x00, 0x00, /* 18 - 23 */
+        /* 24: Ext. Dec, 26: 64 bit ftrs, 28: PM ftrs */
+        0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 24 - 29 */
+        /* 30: MMR, 32: LE atomic, 34: EBB + ext EBB */
+        0x80, 0x00, 0x80, 0x00, 0xC0, 0x00, /* 30 - 35 */
+        /* 36: SPR SO, 38: Copy/Paste, 40: Radix MMU */
+        0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 36 - 41 */
+        /* 42: PM, 44: PC RA, 46: SC vec'd */
+        0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 42 - 47 */
+        /* 48: SIMD, 50: QP BFP, 52: String */
+        0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 48 - 53 */
+        /* 54: DecFP, 56: DecI, 58: SHA */
+        0x80, 0x00, 0x80, 0x00, 0x80, 0x00, /* 54 - 59 */
+        /* 60: NM atomic, 62: RNG */
+        0x80, 0x00, 0x80, 0x00, 0x00, 0x00, /* 60 - 65 */
+    };
+    uint8_t *pa_features = NULL;
+    size_t pa_size;
+
+    if (ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_2_06, 0, cpu->compat_pvr)) {
+        pa_features = pa_features_206;
+        pa_size = sizeof(pa_features_206);
+    }
+    if (ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_2_07, 0, cpu->compat_pvr)) {
+        pa_features = pa_features_207;
+        pa_size = sizeof(pa_features_207);
+    }
+    if (ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0, cpu->compat_pvr)) {
+        pa_features = pa_features_300;
+        pa_size = sizeof(pa_features_300);
+    }
+    if (!pa_features) {
+        return;
+    }
+
+    if (ppc_hash64_has(cpu, PPC_HASH64_CI_LARGEPAGE)) {
+        /*
+         * Note: we keep CI large pages off by default because a 64K capable
+         * guest provisioned with large pages might otherwise try to map a qemu
+         * framebuffer (or other kind of memory mapped PCI BAR) using 64K pages
+         * even if that qemu runs on a 4k host.
+         * We dd this bit back here if we are confident this is not an issue
+         */
+        pa_features[3] |= 0x20;
+    }
+    if ((spapr_get_cap(spapr, SPAPR_CAP_HTM) != 0) && pa_size > 24) {
+        pa_features[24] |= 0x80;    /* Transactional memory support */
+    }
+    if (spapr->cas_pre_isa3_guest && pa_size > 40) {
+        /* Workaround for broken kernels that attempt (guest) radix
+         * mode when they can't handle it, if they see the radix bit set
+         * in pa-features. So hide it from them. */
+        pa_features[40 + 2] &= ~0x80; /* Radix MMU */
+    }
+
+    _FDT((fdt_setprop(fdt, offset, "ibm,pa-features", pa_features, pa_size)));
+}
+
+static hwaddr spapr_node0_size(MachineState *machine)
+{
+    if (machine->numa_state->num_nodes) {
+        int i;
+        for (i = 0; i < machine->numa_state->num_nodes; ++i) {
+            if (machine->numa_state->nodes[i].node_mem) {
+                return MIN(pow2floor(machine->numa_state->nodes[i].node_mem),
+                           machine->ram_size);
+            }
+        }
+    }
+    return machine->ram_size;
+}
+
+static void add_str(GString *s, const gchar *s1)
+{
+    g_string_append_len(s, s1, strlen(s1) + 1);
+}
+
+static int spapr_dt_memory_node(SpaprMachineState *spapr, void *fdt, int nodeid,
+                                hwaddr start, hwaddr size)
+{
+    char mem_name[32];
+    uint64_t mem_reg_property[2];
+    int off;
+
+    mem_reg_property[0] = cpu_to_be64(start);
+    mem_reg_property[1] = cpu_to_be64(size);
+
+    sprintf(mem_name, "memory@%" HWADDR_PRIx, start);
+    off = fdt_add_subnode(fdt, 0, mem_name);
+    _FDT(off);
+    _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
+    _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
+                      sizeof(mem_reg_property))));
+    spapr_numa_write_associativity_dt(spapr, fdt, off, nodeid);
+    return off;
+}
+
+static uint32_t spapr_pc_dimm_node(MemoryDeviceInfoList *list, ram_addr_t addr)
+{
+    MemoryDeviceInfoList *info;
+
+    for (info = list; info; info = info->next) {
+        MemoryDeviceInfo *value = info->value;
+
+        if (value && value->type == MEMORY_DEVICE_INFO_KIND_DIMM) {
+            PCDIMMDeviceInfo *pcdimm_info = value->u.dimm.data;
+
+            if (addr >= pcdimm_info->addr &&
+                addr < (pcdimm_info->addr + pcdimm_info->size)) {
+                return pcdimm_info->node;
+            }
+        }
+    }
+
+    return -1;
+}
+
+struct sPAPRDrconfCellV2 {
+     uint32_t seq_lmbs;
+     uint64_t base_addr;
+     uint32_t drc_index;
+     uint32_t aa_index;
+     uint32_t flags;
+} QEMU_PACKED;
+
+typedef struct DrconfCellQueue {
+    struct sPAPRDrconfCellV2 cell;
+    QSIMPLEQ_ENTRY(DrconfCellQueue) entry;
+} DrconfCellQueue;
+
+static DrconfCellQueue *
+spapr_get_drconf_cell(uint32_t seq_lmbs, uint64_t base_addr,
+                      uint32_t drc_index, uint32_t aa_index,
+                      uint32_t flags)
+{
+    DrconfCellQueue *elem;
+
+    elem = g_malloc0(sizeof(*elem));
+    elem->cell.seq_lmbs = cpu_to_be32(seq_lmbs);
+    elem->cell.base_addr = cpu_to_be64(base_addr);
+    elem->cell.drc_index = cpu_to_be32(drc_index);
+    elem->cell.aa_index = cpu_to_be32(aa_index);
+    elem->cell.flags = cpu_to_be32(flags);
+
+    return elem;
+}
+
+static int spapr_dt_dynamic_memory_v2(SpaprMachineState *spapr, void *fdt,
+                                      int offset, MemoryDeviceInfoList *dimms)
+{
+    MachineState *machine = MACHINE(spapr);
+    uint8_t *int_buf, *cur_index;
+    int ret;
+    uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE;
+    uint64_t addr, cur_addr, size;
+    uint32_t nr_boot_lmbs = (machine->device_memory->base / lmb_size);
+    uint64_t mem_end = machine->device_memory->base +
+                       memory_region_size(&machine->device_memory->mr);
+    uint32_t node, buf_len, nr_entries = 0;
+    SpaprDrc *drc;
+    DrconfCellQueue *elem, *next;
+    MemoryDeviceInfoList *info;
+    QSIMPLEQ_HEAD(, DrconfCellQueue) drconf_queue
+        = QSIMPLEQ_HEAD_INITIALIZER(drconf_queue);
+
+    /* Entry to cover RAM and the gap area */
+    elem = spapr_get_drconf_cell(nr_boot_lmbs, 0, 0, -1,
+                                 SPAPR_LMB_FLAGS_RESERVED |
+                                 SPAPR_LMB_FLAGS_DRC_INVALID);
+    QSIMPLEQ_INSERT_TAIL(&drconf_queue, elem, entry);
+    nr_entries++;
+
+    cur_addr = machine->device_memory->base;
+    for (info = dimms; info; info = info->next) {
+        PCDIMMDeviceInfo *di = info->value->u.dimm.data;
+
+        addr = di->addr;
+        size = di->size;
+        node = di->node;
+
+        /*
+         * The NVDIMM area is hotpluggable after the NVDIMM is unplugged. The
+         * area is marked hotpluggable in the next iteration for the bigger
+         * chunk including the NVDIMM occupied area.
+         */
+        if (info->value->type == MEMORY_DEVICE_INFO_KIND_NVDIMM)
+            continue;
+
+        /* Entry for hot-pluggable area */
+        if (cur_addr < addr) {
+            drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB, cur_addr / lmb_size);
+            g_assert(drc);
+            elem = spapr_get_drconf_cell((addr - cur_addr) / lmb_size,
+                                         cur_addr, spapr_drc_index(drc), -1, 0);
+            QSIMPLEQ_INSERT_TAIL(&drconf_queue, elem, entry);
+            nr_entries++;
+        }
+
+        /* Entry for DIMM */
+        drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB, addr / lmb_size);
+        g_assert(drc);
+        elem = spapr_get_drconf_cell(size / lmb_size, addr,
+                                     spapr_drc_index(drc), node,
+                                     (SPAPR_LMB_FLAGS_ASSIGNED |
+                                      SPAPR_LMB_FLAGS_HOTREMOVABLE));
+        QSIMPLEQ_INSERT_TAIL(&drconf_queue, elem, entry);
+        nr_entries++;
+        cur_addr = addr + size;
+    }
+
+    /* Entry for remaining hotpluggable area */
+    if (cur_addr < mem_end) {
+        drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB, cur_addr / lmb_size);
+        g_assert(drc);
+        elem = spapr_get_drconf_cell((mem_end - cur_addr) / lmb_size,
+                                     cur_addr, spapr_drc_index(drc), -1, 0);
+        QSIMPLEQ_INSERT_TAIL(&drconf_queue, elem, entry);
+        nr_entries++;
+    }
+
+    buf_len = nr_entries * sizeof(struct sPAPRDrconfCellV2) + sizeof(uint32_t);
+    int_buf = cur_index = g_malloc0(buf_len);
+    *(uint32_t *)int_buf = cpu_to_be32(nr_entries);
+    cur_index += sizeof(nr_entries);
+
+    QSIMPLEQ_FOREACH_SAFE(elem, &drconf_queue, entry, next) {
+        memcpy(cur_index, &elem->cell, sizeof(elem->cell));
+        cur_index += sizeof(elem->cell);
+        QSIMPLEQ_REMOVE(&drconf_queue, elem, DrconfCellQueue, entry);
+        g_free(elem);
+    }
+
+    ret = fdt_setprop(fdt, offset, "ibm,dynamic-memory-v2", int_buf, buf_len);
+    g_free(int_buf);
+    if (ret < 0) {
+        return -1;
+    }
+    return 0;
+}
+
+static int spapr_dt_dynamic_memory(SpaprMachineState *spapr, void *fdt,
+                                   int offset, MemoryDeviceInfoList *dimms)
+{
+    MachineState *machine = MACHINE(spapr);
+    int i, ret;
+    uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE;
+    uint32_t device_lmb_start = machine->device_memory->base / lmb_size;
+    uint32_t nr_lmbs = (machine->device_memory->base +
+                       memory_region_size(&machine->device_memory->mr)) /
+                       lmb_size;
+    uint32_t *int_buf, *cur_index, buf_len;
+
+    /*
+     * Allocate enough buffer size to fit in ibm,dynamic-memory
+     */
+    buf_len = (nr_lmbs * SPAPR_DR_LMB_LIST_ENTRY_SIZE + 1) * sizeof(uint32_t);
+    cur_index = int_buf = g_malloc0(buf_len);
+    int_buf[0] = cpu_to_be32(nr_lmbs);
+    cur_index++;
+    for (i = 0; i < nr_lmbs; i++) {
+        uint64_t addr = i * lmb_size;
+        uint32_t *dynamic_memory = cur_index;
+
+        if (i >= device_lmb_start) {
+            SpaprDrc *drc;
+
+            drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB, i);
+            g_assert(drc);
+
+            dynamic_memory[0] = cpu_to_be32(addr >> 32);
+            dynamic_memory[1] = cpu_to_be32(addr & 0xffffffff);
+            dynamic_memory[2] = cpu_to_be32(spapr_drc_index(drc));
+            dynamic_memory[3] = cpu_to_be32(0); /* reserved */
+            dynamic_memory[4] = cpu_to_be32(spapr_pc_dimm_node(dimms, addr));
+            if (memory_region_present(get_system_memory(), addr)) {
+                dynamic_memory[5] = cpu_to_be32(SPAPR_LMB_FLAGS_ASSIGNED);
+            } else {
+                dynamic_memory[5] = cpu_to_be32(0);
+            }
+        } else {
+            /*
+             * LMB information for RMA, boot time RAM and gap b/n RAM and
+             * device memory region -- all these are marked as reserved
+             * and as having no valid DRC.
+             */
+            dynamic_memory[0] = cpu_to_be32(addr >> 32);
+            dynamic_memory[1] = cpu_to_be32(addr & 0xffffffff);
+            dynamic_memory[2] = cpu_to_be32(0);
+            dynamic_memory[3] = cpu_to_be32(0); /* reserved */
+            dynamic_memory[4] = cpu_to_be32(-1);
+            dynamic_memory[5] = cpu_to_be32(SPAPR_LMB_FLAGS_RESERVED |
+                                            SPAPR_LMB_FLAGS_DRC_INVALID);
+        }
+
+        cur_index += SPAPR_DR_LMB_LIST_ENTRY_SIZE;
+    }
+    ret = fdt_setprop(fdt, offset, "ibm,dynamic-memory", int_buf, buf_len);
+    g_free(int_buf);
+    if (ret < 0) {
+        return -1;
+    }
+    return 0;
+}
+
+/*
+ * Adds ibm,dynamic-reconfiguration-memory node.
+ * Refer to docs/specs/ppc-spapr-hotplug.txt for the documentation
+ * of this device tree node.
+ */
+static int spapr_dt_dynamic_reconfiguration_memory(SpaprMachineState *spapr,
+                                                   void *fdt)
+{
+    MachineState *machine = MACHINE(spapr);
+    int ret, offset;
+    uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE;
+    uint32_t prop_lmb_size[] = {cpu_to_be32(lmb_size >> 32),
+                                cpu_to_be32(lmb_size & 0xffffffff)};
+    MemoryDeviceInfoList *dimms = NULL;
+
+    /*
+     * Don't create the node if there is no device memory
+     */
+    if (machine->ram_size == machine->maxram_size) {
+        return 0;
+    }
+
+    offset = fdt_add_subnode(fdt, 0, "ibm,dynamic-reconfiguration-memory");
+
+    ret = fdt_setprop(fdt, offset, "ibm,lmb-size", prop_lmb_size,
+                    sizeof(prop_lmb_size));
+    if (ret < 0) {
+        return ret;
+    }
+
+    ret = fdt_setprop_cell(fdt, offset, "ibm,memory-flags-mask", 0xff);
+    if (ret < 0) {
+        return ret;
+    }
+
+    ret = fdt_setprop_cell(fdt, offset, "ibm,memory-preservation-time", 0x0);
+    if (ret < 0) {
+        return ret;
+    }
+
+    /* ibm,dynamic-memory or ibm,dynamic-memory-v2 */
+    dimms = qmp_memory_device_list();
+    if (spapr_ovec_test(spapr->ov5_cas, OV5_DRMEM_V2)) {
+        ret = spapr_dt_dynamic_memory_v2(spapr, fdt, offset, dimms);
+    } else {
+        ret = spapr_dt_dynamic_memory(spapr, fdt, offset, dimms);
+    }
+    qapi_free_MemoryDeviceInfoList(dimms);
+
+    if (ret < 0) {
+        return ret;
+    }
+
+    ret = spapr_numa_write_assoc_lookup_arrays(spapr, fdt, offset);
+
+    return ret;
+}
+
+static int spapr_dt_memory(SpaprMachineState *spapr, void *fdt)
+{
+    MachineState *machine = MACHINE(spapr);
+    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+    hwaddr mem_start, node_size;
+    int i, nb_nodes = machine->numa_state->num_nodes;
+    NodeInfo *nodes = machine->numa_state->nodes;
+
+    for (i = 0, mem_start = 0; i < nb_nodes; ++i) {
+        if (!nodes[i].node_mem) {
+            continue;
+        }
+        if (mem_start >= machine->ram_size) {
+            node_size = 0;
+        } else {
+            node_size = nodes[i].node_mem;
+            if (node_size > machine->ram_size - mem_start) {
+                node_size = machine->ram_size - mem_start;
+            }
+        }
+        if (!mem_start) {
+            /* spapr_machine_init() checks for rma_size <= node0_size
+             * already */
+            spapr_dt_memory_node(spapr, fdt, i, 0, spapr->rma_size);
+            mem_start += spapr->rma_size;
+            node_size -= spapr->rma_size;
+        }
+        for ( ; node_size; ) {
+            hwaddr sizetmp = pow2floor(node_size);
+
+            /* mem_start != 0 here */
+            if (ctzl(mem_start) < ctzl(sizetmp)) {
+                sizetmp = 1ULL << ctzl(mem_start);
+            }
+
+            spapr_dt_memory_node(spapr, fdt, i, mem_start, sizetmp);
+            node_size -= sizetmp;
+            mem_start += sizetmp;
+        }
+    }
+
+    /* Generate ibm,dynamic-reconfiguration-memory node if required */
+    if (spapr_ovec_test(spapr->ov5_cas, OV5_DRCONF_MEMORY)) {
+        int ret;
+
+        g_assert(smc->dr_lmb_enabled);
+        ret = spapr_dt_dynamic_reconfiguration_memory(spapr, fdt);
+        if (ret) {
+            return ret;
+        }
+    }
+
+    return 0;
+}
+
+static void spapr_dt_cpu(CPUState *cs, void *fdt, int offset,
+                         SpaprMachineState *spapr)
+{
+    MachineState *ms = MACHINE(spapr);
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    CPUPPCState *env = &cpu->env;
+    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs);
+    int index = spapr_get_vcpu_id(cpu);
+    uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40),
+                       0xffffffff, 0xffffffff};
+    uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq()
+        : SPAPR_TIMEBASE_FREQ;
+    uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000;
+    uint32_t page_sizes_prop[64];
+    size_t page_sizes_prop_size;
+    unsigned int smp_threads = ms->smp.threads;
+    uint32_t vcpus_per_socket = smp_threads * ms->smp.cores;
+    uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
+    int compat_smt = MIN(smp_threads, ppc_compat_max_vthreads(cpu));
+    SpaprDrc *drc;
+    int drc_index;
+    uint32_t radix_AP_encodings[PPC_PAGE_SIZES_MAX_SZ];
+    int i;
+
+    drc = spapr_drc_by_id(TYPE_SPAPR_DRC_CPU, index);
+    if (drc) {
+        drc_index = spapr_drc_index(drc);
+        _FDT((fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index)));
+    }
+
+    _FDT((fdt_setprop_cell(fdt, offset, "reg", index)));
+    _FDT((fdt_setprop_string(fdt, offset, "device_type", "cpu")));
+
+    _FDT((fdt_setprop_cell(fdt, offset, "cpu-version", env->spr[SPR_PVR])));
+    _FDT((fdt_setprop_cell(fdt, offset, "d-cache-block-size",
+                           env->dcache_line_size)));
+    _FDT((fdt_setprop_cell(fdt, offset, "d-cache-line-size",
+                           env->dcache_line_size)));
+    _FDT((fdt_setprop_cell(fdt, offset, "i-cache-block-size",
+                           env->icache_line_size)));
+    _FDT((fdt_setprop_cell(fdt, offset, "i-cache-line-size",
+                           env->icache_line_size)));
+
+    if (pcc->l1_dcache_size) {
+        _FDT((fdt_setprop_cell(fdt, offset, "d-cache-size",
+                               pcc->l1_dcache_size)));
+    } else {
+        warn_report("Unknown L1 dcache size for cpu");
+    }
+    if (pcc->l1_icache_size) {
+        _FDT((fdt_setprop_cell(fdt, offset, "i-cache-size",
+                               pcc->l1_icache_size)));
+    } else {
+        warn_report("Unknown L1 icache size for cpu");
+    }
+
+    _FDT((fdt_setprop_cell(fdt, offset, "timebase-frequency", tbfreq)));
+    _FDT((fdt_setprop_cell(fdt, offset, "clock-frequency", cpufreq)));
+    _FDT((fdt_setprop_cell(fdt, offset, "slb-size", cpu->hash64_opts->slb_size)));
+    _FDT((fdt_setprop_cell(fdt, offset, "ibm,slb-size", cpu->hash64_opts->slb_size)));
+    _FDT((fdt_setprop_string(fdt, offset, "status", "okay")));
+    _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0)));
+
+    if (ppc_has_spr(cpu, SPR_PURR)) {
+        _FDT((fdt_setprop_cell(fdt, offset, "ibm,purr", 1)));
+    }
+    if (ppc_has_spr(cpu, SPR_PURR)) {
+        _FDT((fdt_setprop_cell(fdt, offset, "ibm,spurr", 1)));
+    }
+
+    if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)) {
+        _FDT((fdt_setprop(fdt, offset, "ibm,processor-segment-sizes",
+                          segs, sizeof(segs))));
+    }
+
+    /* Advertise VSX (vector extensions) if available
+     *   1               == VMX / Altivec available
+     *   2               == VSX available
+     *
+     * Only CPUs for which we create core types in spapr_cpu_core.c
+     * are possible, and all of those have VMX */
+    if (spapr_get_cap(spapr, SPAPR_CAP_VSX) != 0) {
+        _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", 2)));
+    } else {
+        _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", 1)));
+    }
+
+    /* Advertise DFP (Decimal Floating Point) if available
+     *   0 / no property == no DFP
+     *   1               == DFP available */
+    if (spapr_get_cap(spapr, SPAPR_CAP_DFP) != 0) {
+        _FDT((fdt_setprop_cell(fdt, offset, "ibm,dfp", 1)));
+    }
+
+    page_sizes_prop_size = ppc_create_page_sizes_prop(cpu, page_sizes_prop,
+                                                      sizeof(page_sizes_prop));
+    if (page_sizes_prop_size) {
+        _FDT((fdt_setprop(fdt, offset, "ibm,segment-page-sizes",
+                          page_sizes_prop, page_sizes_prop_size)));
+    }
+
+    spapr_dt_pa_features(spapr, cpu, fdt, offset);
+
+    _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id",
+                           cs->cpu_index / vcpus_per_socket)));
+
+    _FDT((fdt_setprop(fdt, offset, "ibm,pft-size",
+                      pft_size_prop, sizeof(pft_size_prop))));
+
+    if (ms->numa_state->num_nodes > 1) {
+        _FDT(spapr_numa_fixup_cpu_dt(spapr, fdt, offset, cpu));
+    }
+
+    _FDT(spapr_fixup_cpu_smt_dt(fdt, offset, cpu, compat_smt));
+
+    if (pcc->radix_page_info) {
+        for (i = 0; i < pcc->radix_page_info->count; i++) {
+            radix_AP_encodings[i] =
+                cpu_to_be32(pcc->radix_page_info->entries[i]);
+        }
+        _FDT((fdt_setprop(fdt, offset, "ibm,processor-radix-AP-encodings",
+                          radix_AP_encodings,
+                          pcc->radix_page_info->count *
+                          sizeof(radix_AP_encodings[0]))));
+    }
+
+    /*
+     * We set this property to let the guest know that it can use the large
+     * decrementer and its width in bits.
+     */
+    if (spapr_get_cap(spapr, SPAPR_CAP_LARGE_DECREMENTER) != SPAPR_CAP_OFF)
+        _FDT((fdt_setprop_u32(fdt, offset, "ibm,dec-bits",
+                              pcc->lrg_decr_bits)));
+}
+
+static void spapr_dt_cpus(void *fdt, SpaprMachineState *spapr)
+{
+    CPUState **rev;
+    CPUState *cs;
+    int n_cpus;
+    int cpus_offset;
+    int i;
+
+    cpus_offset = fdt_add_subnode(fdt, 0, "cpus");
+    _FDT(cpus_offset);
+    _FDT((fdt_setprop_cell(fdt, cpus_offset, "#address-cells", 0x1)));
+    _FDT((fdt_setprop_cell(fdt, cpus_offset, "#size-cells", 0x0)));
+
+    /*
+     * We walk the CPUs in reverse order to ensure that CPU DT nodes
+     * created by fdt_add_subnode() end up in the right order in FDT
+     * for the guest kernel the enumerate the CPUs correctly.
+     *
+     * The CPU list cannot be traversed in reverse order, so we need
+     * to do extra work.
+     */
+    n_cpus = 0;
+    rev = NULL;
+    CPU_FOREACH(cs) {
+        rev = g_renew(CPUState *, rev, n_cpus + 1);
+        rev[n_cpus++] = cs;
+    }
+
+    for (i = n_cpus - 1; i >= 0; i--) {
+        CPUState *cs = rev[i];
+        PowerPCCPU *cpu = POWERPC_CPU(cs);
+        int index = spapr_get_vcpu_id(cpu);
+        DeviceClass *dc = DEVICE_GET_CLASS(cs);
+        g_autofree char *nodename = NULL;
+        int offset;
+
+        if (!spapr_is_thread0_in_vcore(spapr, cpu)) {
+            continue;
+        }
+
+        nodename = g_strdup_printf("%s@%x", dc->fw_name, index);
+        offset = fdt_add_subnode(fdt, cpus_offset, nodename);
+        _FDT(offset);
+        spapr_dt_cpu(cs, fdt, offset, spapr);
+    }
+
+    g_free(rev);
+}
+
+static int spapr_dt_rng(void *fdt)
+{
+    int node;
+    int ret;
+
+    node = qemu_fdt_add_subnode(fdt, "/ibm,platform-facilities");
+    if (node <= 0) {
+        return -1;
+    }
+    ret = fdt_setprop_string(fdt, node, "device_type",
+                             "ibm,platform-facilities");
+    ret |= fdt_setprop_cell(fdt, node, "#address-cells", 0x1);
+    ret |= fdt_setprop_cell(fdt, node, "#size-cells", 0x0);
+
+    node = fdt_add_subnode(fdt, node, "ibm,random-v1");
+    if (node <= 0) {
+        return -1;
+    }
+    ret |= fdt_setprop_string(fdt, node, "compatible", "ibm,random");
+
+    return ret ? -1 : 0;
+}
+
+static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt)
+{
+    MachineState *ms = MACHINE(spapr);
+    int rtas;
+    GString *hypertas = g_string_sized_new(256);
+    GString *qemu_hypertas = g_string_sized_new(256);
+    uint64_t max_device_addr = MACHINE(spapr)->device_memory->base +
+        memory_region_size(&MACHINE(spapr)->device_memory->mr);
+    uint32_t lrdr_capacity[] = {
+        cpu_to_be32(max_device_addr >> 32),
+        cpu_to_be32(max_device_addr & 0xffffffff),
+        cpu_to_be32(SPAPR_MEMORY_BLOCK_SIZE >> 32),
+        cpu_to_be32(SPAPR_MEMORY_BLOCK_SIZE & 0xffffffff),
+        cpu_to_be32(ms->smp.max_cpus / ms->smp.threads),
+    };
+
+    _FDT(rtas = fdt_add_subnode(fdt, 0, "rtas"));
+
+    /* hypertas */
+    add_str(hypertas, "hcall-pft");
+    add_str(hypertas, "hcall-term");
+    add_str(hypertas, "hcall-dabr");
+    add_str(hypertas, "hcall-interrupt");
+    add_str(hypertas, "hcall-tce");
+    add_str(hypertas, "hcall-vio");
+    add_str(hypertas, "hcall-splpar");
+    add_str(hypertas, "hcall-join");
+    add_str(hypertas, "hcall-bulk");
+    add_str(hypertas, "hcall-set-mode");
+    add_str(hypertas, "hcall-sprg0");
+    add_str(hypertas, "hcall-copy");
+    add_str(hypertas, "hcall-debug");
+    add_str(hypertas, "hcall-vphn");
+    if (spapr_get_cap(spapr, SPAPR_CAP_RPT_INVALIDATE) == SPAPR_CAP_ON) {
+        add_str(hypertas, "hcall-rpt-invalidate");
+    }
+
+    add_str(qemu_hypertas, "hcall-memop1");
+
+    if (!kvm_enabled() || kvmppc_spapr_use_multitce()) {
+        add_str(hypertas, "hcall-multi-tce");
+    }
+
+    if (spapr->resize_hpt != SPAPR_RESIZE_HPT_DISABLED) {
+        add_str(hypertas, "hcall-hpt-resize");
+    }
+
+    _FDT(fdt_setprop(fdt, rtas, "ibm,hypertas-functions",
+                     hypertas->str, hypertas->len));
+    g_string_free(hypertas, TRUE);
+    _FDT(fdt_setprop(fdt, rtas, "qemu,hypertas-functions",
+                     qemu_hypertas->str, qemu_hypertas->len));
+    g_string_free(qemu_hypertas, TRUE);
+
+    spapr_numa_write_rtas_dt(spapr, fdt, rtas);
+
+    /*
+     * FWNMI reserves RTAS_ERROR_LOG_MAX for the machine check error log,
+     * and 16 bytes per CPU for system reset error log plus an extra 8 bytes.
+     *
+     * The system reset requirements are driven by existing Linux and PowerVM
+     * implementation which (contrary to PAPR) saves r3 in the error log
+     * structure like machine check, so Linux expects to find the saved r3
+     * value at the address in r3 upon FWNMI-enabled sreset interrupt (and
+     * does not look at the error value).
+     *
+     * System reset interrupts are not subject to interlock like machine
+     * check, so this memory area could be corrupted if the sreset is
+     * interrupted by a machine check (or vice versa) if it was shared. To
+     * prevent this, system reset uses per-CPU areas for the sreset save
+     * area. A system reset that interrupts a system reset handler could
+     * still overwrite this area, but Linux doesn't try to recover in that
+     * case anyway.
+     *
+     * The extra 8 bytes is required because Linux's FWNMI error log check
+     * is off-by-one.
+     *
+     * RTAS_MIN_SIZE is required for the RTAS blob itself.
+     */
+    _FDT(fdt_setprop_cell(fdt, rtas, "rtas-size", RTAS_MIN_SIZE +
+                          RTAS_ERROR_LOG_MAX +
+                          ms->smp.max_cpus * sizeof(uint64_t) * 2 +
+                          sizeof(uint64_t)));
+    _FDT(fdt_setprop_cell(fdt, rtas, "rtas-error-log-max",
+                          RTAS_ERROR_LOG_MAX));
+    _FDT(fdt_setprop_cell(fdt, rtas, "rtas-event-scan-rate",
+                          RTAS_EVENT_SCAN_RATE));
+
+    g_assert(msi_nonbroken);
+    _FDT(fdt_setprop(fdt, rtas, "ibm,change-msix-capable", NULL, 0));
+
+    /*
+     * According to PAPR, rtas ibm,os-term does not guarantee a return
+     * back to the guest cpu.
+     *
+     * While an additional ibm,extended-os-term property indicates
+     * that rtas call return will always occur. Set this property.
+     */
+    _FDT(fdt_setprop(fdt, rtas, "ibm,extended-os-term", NULL, 0));
+
+    _FDT(fdt_setprop(fdt, rtas, "ibm,lrdr-capacity",
+                     lrdr_capacity, sizeof(lrdr_capacity)));
+
+    spapr_dt_rtas_tokens(fdt, rtas);
+}
+
+/*
+ * Prepare ibm,arch-vec-5-platform-support, which indicates the MMU
+ * and the XIVE features that the guest may request and thus the valid
+ * values for bytes 23..26 of option vector 5:
+ */
+static void spapr_dt_ov5_platform_support(SpaprMachineState *spapr, void *fdt,
+                                          int chosen)
+{
+    PowerPCCPU *first_ppc_cpu = POWERPC_CPU(first_cpu);
+
+    char val[2 * 4] = {
+        23, 0x00, /* XICS / XIVE mode */
+        24, 0x00, /* Hash/Radix, filled in below. */
+        25, 0x00, /* Hash options: Segment Tables == no, GTSE == no. */
+        26, 0x40, /* Radix options: GTSE == yes. */
+    };
+
+    if (spapr->irq->xics && spapr->irq->xive) {
+        val[1] = SPAPR_OV5_XIVE_BOTH;
+    } else if (spapr->irq->xive) {
+        val[1] = SPAPR_OV5_XIVE_EXPLOIT;
+    } else {
+        assert(spapr->irq->xics);
+        val[1] = SPAPR_OV5_XIVE_LEGACY;
+    }
+
+    if (!ppc_check_compat(first_ppc_cpu, CPU_POWERPC_LOGICAL_3_00, 0,
+                          first_ppc_cpu->compat_pvr)) {
+        /*
+         * If we're in a pre POWER9 compat mode then the guest should
+         * do hash and use the legacy interrupt mode
+         */
+        val[1] = SPAPR_OV5_XIVE_LEGACY; /* XICS */
+        val[3] = 0x00; /* Hash */
+        spapr_check_mmu_mode(false);
+    } else if (kvm_enabled()) {
+        if (kvmppc_has_cap_mmu_radix() && kvmppc_has_cap_mmu_hash_v3()) {
+            val[3] = 0x80; /* OV5_MMU_BOTH */
+        } else if (kvmppc_has_cap_mmu_radix()) {
+            val[3] = 0x40; /* OV5_MMU_RADIX_300 */
+        } else {
+            val[3] = 0x00; /* Hash */
+        }
+    } else {
+        /* V3 MMU supports both hash and radix in tcg (with dynamic switching) */
+        val[3] = 0xC0;
+    }
+    _FDT(fdt_setprop(fdt, chosen, "ibm,arch-vec-5-platform-support",
+                     val, sizeof(val)));
+}
+
+static void spapr_dt_chosen(SpaprMachineState *spapr, void *fdt, bool reset)
+{
+    MachineState *machine = MACHINE(spapr);
+    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine);
+    int chosen;
+
+    _FDT(chosen = fdt_add_subnode(fdt, 0, "chosen"));
+
+    if (reset) {
+        const char *boot_device = spapr->boot_device;
+        char *stdout_path = spapr_vio_stdout_path(spapr->vio_bus);
+        size_t cb = 0;
+        char *bootlist = get_boot_devices_list(&cb);
+
+        if (machine->kernel_cmdline && machine->kernel_cmdline[0]) {
+            _FDT(fdt_setprop_string(fdt, chosen, "bootargs",
+                                    machine->kernel_cmdline));
+        }
+
+        if (spapr->initrd_size) {
+            _FDT(fdt_setprop_cell(fdt, chosen, "linux,initrd-start",
+                                  spapr->initrd_base));
+            _FDT(fdt_setprop_cell(fdt, chosen, "linux,initrd-end",
+                                  spapr->initrd_base + spapr->initrd_size));
+        }
+
+        if (spapr->kernel_size) {
+            uint64_t kprop[2] = { cpu_to_be64(spapr->kernel_addr),
+                                  cpu_to_be64(spapr->kernel_size) };
+
+            _FDT(fdt_setprop(fdt, chosen, "qemu,boot-kernel",
+                         &kprop, sizeof(kprop)));
+            if (spapr->kernel_le) {
+                _FDT(fdt_setprop(fdt, chosen, "qemu,boot-kernel-le", NULL, 0));
+            }
+        }
+        if (boot_menu) {
+            _FDT((fdt_setprop_cell(fdt, chosen, "qemu,boot-menu", boot_menu)));
+        }
+        _FDT(fdt_setprop_cell(fdt, chosen, "qemu,graphic-width", graphic_width));
+        _FDT(fdt_setprop_cell(fdt, chosen, "qemu,graphic-height", graphic_height));
+        _FDT(fdt_setprop_cell(fdt, chosen, "qemu,graphic-depth", graphic_depth));
+
+        if (cb && bootlist) {
+            int i;
+
+            for (i = 0; i < cb; i++) {
+                if (bootlist[i] == '\n') {
+                    bootlist[i] = ' ';
+                }
+            }
+            _FDT(fdt_setprop_string(fdt, chosen, "qemu,boot-list", bootlist));
+        }
+
+        if (boot_device && strlen(boot_device)) {
+            _FDT(fdt_setprop_string(fdt, chosen, "qemu,boot-device", boot_device));
+        }
+
+        if (!spapr->has_graphics && stdout_path) {
+            /*
+             * "linux,stdout-path" and "stdout" properties are
+             * deprecated by linux kernel. New platforms should only
+             * use the "stdout-path" property. Set the new property
+             * and continue using older property to remain compatible
+             * with the existing firmware.
+             */
+            _FDT(fdt_setprop_string(fdt, chosen, "linux,stdout-path", stdout_path));
+            _FDT(fdt_setprop_string(fdt, chosen, "stdout-path", stdout_path));
+        }
+
+        /*
+         * We can deal with BAR reallocation just fine, advertise it
+         * to the guest
+         */
+        if (smc->linux_pci_probe) {
+            _FDT(fdt_setprop_cell(fdt, chosen, "linux,pci-probe-only", 0));
+        }
+
+        spapr_dt_ov5_platform_support(spapr, fdt, chosen);
+
+        g_free(stdout_path);
+        g_free(bootlist);
+    }
+
+    _FDT(spapr_dt_ovec(fdt, chosen, spapr->ov5_cas, "ibm,architecture-vec-5"));
+}
+
+static void spapr_dt_hypervisor(SpaprMachineState *spapr, void *fdt)
+{
+    /* The /hypervisor node isn't in PAPR - this is a hack to allow PR
+     * KVM to work under pHyp with some guest co-operation */
+    int hypervisor;
+    uint8_t hypercall[16];
+
+    _FDT(hypervisor = fdt_add_subnode(fdt, 0, "hypervisor"));
+    /* indicate KVM hypercall interface */
+    _FDT(fdt_setprop_string(fdt, hypervisor, "compatible", "linux,kvm"));
+    if (kvmppc_has_cap_fixup_hcalls()) {
+        /*
+         * Older KVM versions with older guest kernels were broken
+         * with the magic page, don't allow the guest to map it.
+         */
+        if (!kvmppc_get_hypercall(first_cpu->env_ptr, hypercall,
+                                  sizeof(hypercall))) {
+            _FDT(fdt_setprop(fdt, hypervisor, "hcall-instructions",
+                             hypercall, sizeof(hypercall)));
+        }
+    }
+}
+
+void *spapr_build_fdt(SpaprMachineState *spapr, bool reset, size_t space)
+{
+    MachineState *machine = MACHINE(spapr);
+    MachineClass *mc = MACHINE_GET_CLASS(machine);
+    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine);
+    uint32_t root_drc_type_mask = 0;
+    int ret;
+    void *fdt;
+    SpaprPhbState *phb;
+    char *buf;
+
+    fdt = g_malloc0(space);
+    _FDT((fdt_create_empty_tree(fdt, space)));
+
+    /* Root node */
+    _FDT(fdt_setprop_string(fdt, 0, "device_type", "chrp"));
+    _FDT(fdt_setprop_string(fdt, 0, "model", "IBM pSeries (emulated by qemu)"));
+    _FDT(fdt_setprop_string(fdt, 0, "compatible", "qemu,pseries"));
+
+    /* Guest UUID & Name*/
+    buf = qemu_uuid_unparse_strdup(&qemu_uuid);
+    _FDT(fdt_setprop_string(fdt, 0, "vm,uuid", buf));
+    if (qemu_uuid_set) {
+        _FDT(fdt_setprop_string(fdt, 0, "system-id", buf));
+    }
+    g_free(buf);
+
+    if (qemu_get_vm_name()) {
+        _FDT(fdt_setprop_string(fdt, 0, "ibm,partition-name",
+                                qemu_get_vm_name()));
+    }
+
+    /* Host Model & Serial Number */
+    if (spapr->host_model) {
+        _FDT(fdt_setprop_string(fdt, 0, "host-model", spapr->host_model));
+    } else if (smc->broken_host_serial_model && kvmppc_get_host_model(&buf)) {
+        _FDT(fdt_setprop_string(fdt, 0, "host-model", buf));
+        g_free(buf);
+    }
+
+    if (spapr->host_serial) {
+        _FDT(fdt_setprop_string(fdt, 0, "host-serial", spapr->host_serial));
+    } else if (smc->broken_host_serial_model && kvmppc_get_host_serial(&buf)) {
+        _FDT(fdt_setprop_string(fdt, 0, "host-serial", buf));
+        g_free(buf);
+    }
+
+    _FDT(fdt_setprop_cell(fdt, 0, "#address-cells", 2));
+    _FDT(fdt_setprop_cell(fdt, 0, "#size-cells", 2));
+
+    /* /interrupt controller */
+    spapr_irq_dt(spapr, spapr_max_server_number(spapr), fdt, PHANDLE_INTC);
+
+    ret = spapr_dt_memory(spapr, fdt);
+    if (ret < 0) {
+        error_report("couldn't setup memory nodes in fdt");
+        exit(1);
+    }
+
+    /* /vdevice */
+    spapr_dt_vdevice(spapr->vio_bus, fdt);
+
+    if (object_resolve_path_type("", TYPE_SPAPR_RNG, NULL)) {
+        ret = spapr_dt_rng(fdt);
+        if (ret < 0) {
+            error_report("could not set up rng device in the fdt");
+            exit(1);
+        }
+    }
+
+    QLIST_FOREACH(phb, &spapr->phbs, list) {
+        ret = spapr_dt_phb(spapr, phb, PHANDLE_INTC, fdt, NULL);
+        if (ret < 0) {
+            error_report("couldn't setup PCI devices in fdt");
+            exit(1);
+        }
+    }
+
+    spapr_dt_cpus(fdt, spapr);
+
+    /* ibm,drc-indexes and friends */
+    if (smc->dr_lmb_enabled) {
+        root_drc_type_mask |= SPAPR_DR_CONNECTOR_TYPE_LMB;
+    }
+    if (smc->dr_phb_enabled) {
+        root_drc_type_mask |= SPAPR_DR_CONNECTOR_TYPE_PHB;
+    }
+    if (mc->nvdimm_supported) {
+        root_drc_type_mask |= SPAPR_DR_CONNECTOR_TYPE_PMEM;
+    }
+    if (root_drc_type_mask) {
+        _FDT(spapr_dt_drc(fdt, 0, NULL, root_drc_type_mask));
+    }
+
+    if (mc->has_hotpluggable_cpus) {
+        int offset = fdt_path_offset(fdt, "/cpus");
+        ret = spapr_dt_drc(fdt, offset, NULL, SPAPR_DR_CONNECTOR_TYPE_CPU);
+        if (ret < 0) {
+            error_report("Couldn't set up CPU DR device tree properties");
+            exit(1);
+        }
+    }
+
+    /* /event-sources */
+    spapr_dt_events(spapr, fdt);
+
+    /* /rtas */
+    spapr_dt_rtas(spapr, fdt);
+
+    /* /chosen */
+    spapr_dt_chosen(spapr, fdt, reset);
+
+    /* /hypervisor */
+    if (kvm_enabled()) {
+        spapr_dt_hypervisor(spapr, fdt);
+    }
+
+    /* Build memory reserve map */
+    if (reset) {
+        if (spapr->kernel_size) {
+            _FDT((fdt_add_mem_rsv(fdt, spapr->kernel_addr,
+                                  spapr->kernel_size)));
+        }
+        if (spapr->initrd_size) {
+            _FDT((fdt_add_mem_rsv(fdt, spapr->initrd_base,
+                                  spapr->initrd_size)));
+        }
+    }
+
+    /* NVDIMM devices */
+    if (mc->nvdimm_supported) {
+        spapr_dt_persistent_memory(spapr, fdt);
+    }
+
+    return fdt;
+}
+
+static uint64_t translate_kernel_address(void *opaque, uint64_t addr)
+{
+    SpaprMachineState *spapr = opaque;
+
+    return (addr & 0x0fffffff) + spapr->kernel_addr;
+}
+
+static void emulate_spapr_hypercall(PPCVirtualHypervisor *vhyp,
+                                    PowerPCCPU *cpu)
+{
+    CPUPPCState *env = &cpu->env;
+
+    /* The TCG path should also be holding the BQL at this point */
+    g_assert(qemu_mutex_iothread_locked());
+
+    if (msr_pr) {
+        hcall_dprintf("Hypercall made with MSR[PR]=1\n");
+        env->gpr[3] = H_PRIVILEGE;
+    } else {
+        env->gpr[3] = spapr_hypercall(cpu, env->gpr[3], &env->gpr[4]);
+    }
+}
+
+struct LPCRSyncState {
+    target_ulong value;
+    target_ulong mask;
+};
+
+static void do_lpcr_sync(CPUState *cs, run_on_cpu_data arg)
+{
+    struct LPCRSyncState *s = arg.host_ptr;
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    CPUPPCState *env = &cpu->env;
+    target_ulong lpcr;
+
+    cpu_synchronize_state(cs);
+    lpcr = env->spr[SPR_LPCR];
+    lpcr &= ~s->mask;
+    lpcr |= s->value;
+    ppc_store_lpcr(cpu, lpcr);
+}
+
+void spapr_set_all_lpcrs(target_ulong value, target_ulong mask)
+{
+    CPUState *cs;
+    struct LPCRSyncState s = {
+        .value = value,
+        .mask = mask
+    };
+    CPU_FOREACH(cs) {
+        run_on_cpu(cs, do_lpcr_sync, RUN_ON_CPU_HOST_PTR(&s));
+    }
+}
+
+static void spapr_get_pate(PPCVirtualHypervisor *vhyp, ppc_v3_pate_t *entry)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(vhyp);
+
+    /* Copy PATE1:GR into PATE0:HR */
+    entry->dw0 = spapr->patb_entry & PATE0_HR;
+    entry->dw1 = spapr->patb_entry;
+}
+
+#define HPTE(_table, _i)   (void *)(((uint64_t *)(_table)) + ((_i) * 2))
+#define HPTE_VALID(_hpte)  (tswap64(*((uint64_t *)(_hpte))) & HPTE64_V_VALID)
+#define HPTE_DIRTY(_hpte)  (tswap64(*((uint64_t *)(_hpte))) & HPTE64_V_HPTE_DIRTY)
+#define CLEAN_HPTE(_hpte)  ((*(uint64_t *)(_hpte)) &= tswap64(~HPTE64_V_HPTE_DIRTY))
+#define DIRTY_HPTE(_hpte)  ((*(uint64_t *)(_hpte)) |= tswap64(HPTE64_V_HPTE_DIRTY))
+
+/*
+ * Get the fd to access the kernel htab, re-opening it if necessary
+ */
+static int get_htab_fd(SpaprMachineState *spapr)
+{
+    Error *local_err = NULL;
+
+    if (spapr->htab_fd >= 0) {
+        return spapr->htab_fd;
+    }
+
+    spapr->htab_fd = kvmppc_get_htab_fd(false, 0, &local_err);
+    if (spapr->htab_fd < 0) {
+        error_report_err(local_err);
+    }
+
+    return spapr->htab_fd;
+}
+
+void close_htab_fd(SpaprMachineState *spapr)
+{
+    if (spapr->htab_fd >= 0) {
+        close(spapr->htab_fd);
+    }
+    spapr->htab_fd = -1;
+}
+
+static hwaddr spapr_hpt_mask(PPCVirtualHypervisor *vhyp)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(vhyp);
+
+    return HTAB_SIZE(spapr) / HASH_PTEG_SIZE_64 - 1;
+}
+
+static target_ulong spapr_encode_hpt_for_kvm_pr(PPCVirtualHypervisor *vhyp)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(vhyp);
+
+    assert(kvm_enabled());
+
+    if (!spapr->htab) {
+        return 0;
+    }
+
+    return (target_ulong)(uintptr_t)spapr->htab | (spapr->htab_shift - 18);
+}
+
+static const ppc_hash_pte64_t *spapr_map_hptes(PPCVirtualHypervisor *vhyp,
+                                                hwaddr ptex, int n)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(vhyp);
+    hwaddr pte_offset = ptex * HASH_PTE_SIZE_64;
+
+    if (!spapr->htab) {
+        /*
+         * HTAB is controlled by KVM. Fetch into temporary buffer
+         */
+        ppc_hash_pte64_t *hptes = g_malloc(n * HASH_PTE_SIZE_64);
+        kvmppc_read_hptes(hptes, ptex, n);
+        return hptes;
+    }
+
+    /*
+     * HTAB is controlled by QEMU. Just point to the internally
+     * accessible PTEG.
+     */
+    return (const ppc_hash_pte64_t *)(spapr->htab + pte_offset);
+}
+
+static void spapr_unmap_hptes(PPCVirtualHypervisor *vhyp,
+                              const ppc_hash_pte64_t *hptes,
+                              hwaddr ptex, int n)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(vhyp);
+
+    if (!spapr->htab) {
+        g_free((void *)hptes);
+    }
+
+    /* Nothing to do for qemu managed HPT */
+}
+
+void spapr_store_hpte(PowerPCCPU *cpu, hwaddr ptex,
+                      uint64_t pte0, uint64_t pte1)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(cpu->vhyp);
+    hwaddr offset = ptex * HASH_PTE_SIZE_64;
+
+    if (!spapr->htab) {
+        kvmppc_write_hpte(ptex, pte0, pte1);
+    } else {
+        if (pte0 & HPTE64_V_VALID) {
+            stq_p(spapr->htab + offset + HPTE64_DW1, pte1);
+            /*
+             * When setting valid, we write PTE1 first. This ensures
+             * proper synchronization with the reading code in
+             * ppc_hash64_pteg_search()
+             */
+            smp_wmb();
+            stq_p(spapr->htab + offset, pte0);
+        } else {
+            stq_p(spapr->htab + offset, pte0);
+            /*
+             * When clearing it we set PTE0 first. This ensures proper
+             * synchronization with the reading code in
+             * ppc_hash64_pteg_search()
+             */
+            smp_wmb();
+            stq_p(spapr->htab + offset + HPTE64_DW1, pte1);
+        }
+    }
+}
+
+static void spapr_hpte_set_c(PPCVirtualHypervisor *vhyp, hwaddr ptex,
+                             uint64_t pte1)
+{
+    hwaddr offset = ptex * HASH_PTE_SIZE_64 + HPTE64_DW1_C;
+    SpaprMachineState *spapr = SPAPR_MACHINE(vhyp);
+
+    if (!spapr->htab) {
+        /* There should always be a hash table when this is called */
+        error_report("spapr_hpte_set_c called with no hash table !");
+        return;
+    }
+
+    /* The HW performs a non-atomic byte update */
+    stb_p(spapr->htab + offset, (pte1 & 0xff) | 0x80);
+}
+
+static void spapr_hpte_set_r(PPCVirtualHypervisor *vhyp, hwaddr ptex,
+                             uint64_t pte1)
+{
+    hwaddr offset = ptex * HASH_PTE_SIZE_64 + HPTE64_DW1_R;
+    SpaprMachineState *spapr = SPAPR_MACHINE(vhyp);
+
+    if (!spapr->htab) {
+        /* There should always be a hash table when this is called */
+        error_report("spapr_hpte_set_r called with no hash table !");
+        return;
+    }
+
+    /* The HW performs a non-atomic byte update */
+    stb_p(spapr->htab + offset, ((pte1 >> 8) & 0xff) | 0x01);
+}
+
+int spapr_hpt_shift_for_ramsize(uint64_t ramsize)
+{
+    int shift;
+
+    /* We aim for a hash table of size 1/128 the size of RAM (rounded
+     * up).  The PAPR recommendation is actually 1/64 of RAM size, but
+     * that's much more than is needed for Linux guests */
+    shift = ctz64(pow2ceil(ramsize)) - 7;
+    shift = MAX(shift, 18); /* Minimum architected size */
+    shift = MIN(shift, 46); /* Maximum architected size */
+    return shift;
+}
+
+void spapr_free_hpt(SpaprMachineState *spapr)
+{
+    g_free(spapr->htab);
+    spapr->htab = NULL;
+    spapr->htab_shift = 0;
+    close_htab_fd(spapr);
+}
+
+int spapr_reallocate_hpt(SpaprMachineState *spapr, int shift, Error **errp)
+{
+    ERRP_GUARD();
+    long rc;
+
+    /* Clean up any HPT info from a previous boot */
+    spapr_free_hpt(spapr);
+
+    rc = kvmppc_reset_htab(shift);
+
+    if (rc == -EOPNOTSUPP) {
+        error_setg(errp, "HPT not supported in nested guests");
+        return -EOPNOTSUPP;
+    }
+
+    if (rc < 0) {
+        /* kernel-side HPT needed, but couldn't allocate one */
+        error_setg_errno(errp, errno, "Failed to allocate KVM HPT of order %d",
+                         shift);
+        error_append_hint(errp, "Try smaller maxmem?\n");
+        return -errno;
+    } else if (rc > 0) {
+        /* kernel-side HPT allocated */
+        if (rc != shift) {
+            error_setg(errp,
+                       "Requested order %d HPT, but kernel allocated order %ld",
+                       shift, rc);
+            error_append_hint(errp, "Try smaller maxmem?\n");
+            return -ENOSPC;
+        }
+
+        spapr->htab_shift = shift;
+        spapr->htab = NULL;
+    } else {
+        /* kernel-side HPT not needed, allocate in userspace instead */
+        size_t size = 1ULL << shift;
+        int i;
+
+        spapr->htab = qemu_memalign(size, size);
+        memset(spapr->htab, 0, size);
+        spapr->htab_shift = shift;
+
+        for (i = 0; i < size / HASH_PTE_SIZE_64; i++) {
+            DIRTY_HPTE(HPTE(spapr->htab, i));
+        }
+    }
+    /* We're setting up a hash table, so that means we're not radix */
+    spapr->patb_entry = 0;
+    spapr_set_all_lpcrs(0, LPCR_HR | LPCR_UPRT);
+    return 0;
+}
+
+void spapr_setup_hpt(SpaprMachineState *spapr)
+{
+    int hpt_shift;
+
+    if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DISABLED) {
+        hpt_shift = spapr_hpt_shift_for_ramsize(MACHINE(spapr)->maxram_size);
+    } else {
+        uint64_t current_ram_size;
+
+        current_ram_size = MACHINE(spapr)->ram_size + get_plugged_memory_size();
+        hpt_shift = spapr_hpt_shift_for_ramsize(current_ram_size);
+    }
+    spapr_reallocate_hpt(spapr, hpt_shift, &error_fatal);
+
+    if (kvm_enabled()) {
+        hwaddr vrma_limit = kvmppc_vrma_limit(spapr->htab_shift);
+
+        /* Check our RMA fits in the possible VRMA */
+        if (vrma_limit < spapr->rma_size) {
+            error_report("Unable to create %" HWADDR_PRIu
+                         "MiB RMA (VRMA only allows %" HWADDR_PRIu "MiB",
+                         spapr->rma_size / MiB, vrma_limit / MiB);
+            exit(EXIT_FAILURE);
+        }
+    }
+}
+
+void spapr_check_mmu_mode(bool guest_radix)
+{
+    if (guest_radix) {
+        if (kvm_enabled() && !kvmppc_has_cap_mmu_radix()) {
+            error_report("Guest requested unavailable MMU mode (radix).");
+            exit(EXIT_FAILURE);
+        }
+    } else {
+        if (kvm_enabled() && kvmppc_has_cap_mmu_radix()
+            && !kvmppc_has_cap_mmu_hash_v3()) {
+            error_report("Guest requested unavailable MMU mode (hash).");
+            exit(EXIT_FAILURE);
+        }
+    }
+}
+
+static void spapr_machine_reset(MachineState *machine)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(machine);
+    PowerPCCPU *first_ppc_cpu;
+    hwaddr fdt_addr;
+    void *fdt;
+    int rc;
+
+    pef_kvm_reset(machine->cgs, &error_fatal);
+    spapr_caps_apply(spapr);
+
+    first_ppc_cpu = POWERPC_CPU(first_cpu);
+    if (kvm_enabled() && kvmppc_has_cap_mmu_radix() &&
+        ppc_type_check_compat(machine->cpu_type, CPU_POWERPC_LOGICAL_3_00, 0,
+                              spapr->max_compat_pvr)) {
+        /*
+         * If using KVM with radix mode available, VCPUs can be started
+         * without a HPT because KVM will start them in radix mode.
+         * Set the GR bit in PATE so that we know there is no HPT.
+         */
+        spapr->patb_entry = PATE1_GR;
+        spapr_set_all_lpcrs(LPCR_HR | LPCR_UPRT, LPCR_HR | LPCR_UPRT);
+    } else {
+        spapr_setup_hpt(spapr);
+    }
+
+    qemu_devices_reset();
+
+    spapr_ovec_cleanup(spapr->ov5_cas);
+    spapr->ov5_cas = spapr_ovec_new();
+
+    ppc_set_compat_all(spapr->max_compat_pvr, &error_fatal);
+
+    /*
+     * This is fixing some of the default configuration of the XIVE
+     * devices. To be called after the reset of the machine devices.
+     */
+    spapr_irq_reset(spapr, &error_fatal);
+
+    /*
+     * There is no CAS under qtest. Simulate one to please the code that
+     * depends on spapr->ov5_cas. This is especially needed to test device
+     * unplug, so we do that before resetting the DRCs.
+     */
+    if (qtest_enabled()) {
+        spapr_ovec_cleanup(spapr->ov5_cas);
+        spapr->ov5_cas = spapr_ovec_clone(spapr->ov5);
+    }
+
+    /* DRC reset may cause a device to be unplugged. This will cause troubles
+     * if this device is used by another device (eg, a running vhost backend
+     * will crash QEMU if the DIMM holding the vring goes away). To avoid such
+     * situations, we reset DRCs after all devices have been reset.
+     */
+    spapr_drc_reset_all(spapr);
+
+    spapr_clear_pending_events(spapr);
+
+    /*
+     * We place the device tree just below either the top of the RMA,
+     * or just below 2GB, whichever is lower, so that it can be
+     * processed with 32-bit real mode code if necessary
+     */
+    fdt_addr = MIN(spapr->rma_size, FDT_MAX_ADDR) - FDT_MAX_SIZE;
+
+    fdt = spapr_build_fdt(spapr, true, FDT_MAX_SIZE);
+    if (spapr->vof) {
+        spapr_vof_reset(spapr, fdt, &error_fatal);
+        /*
+         * Do not pack the FDT as the client may change properties.
+         * VOF client does not expect the FDT so we do not load it to the VM.
+         */
+    } else {
+        rc = fdt_pack(fdt);
+        /* Should only fail if we've built a corrupted tree */
+        assert(rc == 0);
+
+        spapr_cpu_set_entry_state(first_ppc_cpu, SPAPR_ENTRY_POINT,
+                                  0, fdt_addr, 0);
+        cpu_physical_memory_write(fdt_addr, fdt, fdt_totalsize(fdt));
+    }
+    qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt));
+
+    g_free(spapr->fdt_blob);
+    spapr->fdt_size = fdt_totalsize(fdt);
+    spapr->fdt_initial_size = spapr->fdt_size;
+    spapr->fdt_blob = fdt;
+
+    /* Set up the entry state */
+    first_ppc_cpu->env.gpr[5] = 0;
+
+    spapr->fwnmi_system_reset_addr = -1;
+    spapr->fwnmi_machine_check_addr = -1;
+    spapr->fwnmi_machine_check_interlock = -1;
+
+    /* Signal all vCPUs waiting on this condition */
+    qemu_cond_broadcast(&spapr->fwnmi_machine_check_interlock_cond);
+
+    migrate_del_blocker(spapr->fwnmi_migration_blocker);
+}
+
+static void spapr_create_nvram(SpaprMachineState *spapr)
+{
+    DeviceState *dev = qdev_new("spapr-nvram");
+    DriveInfo *dinfo = drive_get(IF_PFLASH, 0, 0);
+
+    if (dinfo) {
+        qdev_prop_set_drive_err(dev, "drive", blk_by_legacy_dinfo(dinfo),
+                                &error_fatal);
+    }
+
+    qdev_realize_and_unref(dev, &spapr->vio_bus->bus, &error_fatal);
+
+    spapr->nvram = (struct SpaprNvram *)dev;
+}
+
+static void spapr_rtc_create(SpaprMachineState *spapr)
+{
+    object_initialize_child_with_props(OBJECT(spapr), "rtc", &spapr->rtc,
+                                       sizeof(spapr->rtc), TYPE_SPAPR_RTC,
+                                       &error_fatal, NULL);
+    qdev_realize(DEVICE(&spapr->rtc), NULL, &error_fatal);
+    object_property_add_alias(OBJECT(spapr), "rtc-time", OBJECT(&spapr->rtc),
+                              "date");
+}
+
+/* Returns whether we want to use VGA or not */
+static bool spapr_vga_init(PCIBus *pci_bus, Error **errp)
+{
+    switch (vga_interface_type) {
+    case VGA_NONE:
+        return false;
+    case VGA_DEVICE:
+        return true;
+    case VGA_STD:
+    case VGA_VIRTIO:
+    case VGA_CIRRUS:
+        return pci_vga_init(pci_bus) != NULL;
+    default:
+        error_setg(errp,
+                   "Unsupported VGA mode, only -vga std or -vga virtio is supported");
+        return false;
+    }
+}
+
+static int spapr_pre_load(void *opaque)
+{
+    int rc;
+
+    rc = spapr_caps_pre_load(opaque);
+    if (rc) {
+        return rc;
+    }
+
+    return 0;
+}
+
+static int spapr_post_load(void *opaque, int version_id)
+{
+    SpaprMachineState *spapr = (SpaprMachineState *)opaque;
+    int err = 0;
+
+    err = spapr_caps_post_migration(spapr);
+    if (err) {
+        return err;
+    }
+
+    /*
+     * In earlier versions, there was no separate qdev for the PAPR
+     * RTC, so the RTC offset was stored directly in sPAPREnvironment.
+     * So when migrating from those versions, poke the incoming offset
+     * value into the RTC device
+     */
+    if (version_id < 3) {
+        err = spapr_rtc_import_offset(&spapr->rtc, spapr->rtc_offset);
+        if (err) {
+            return err;
+        }
+    }
+
+    if (kvm_enabled() && spapr->patb_entry) {
+        PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
+        bool radix = !!(spapr->patb_entry & PATE1_GR);
+        bool gtse = !!(cpu->env.spr[SPR_LPCR] & LPCR_GTSE);
+
+        /*
+         * Update LPCR:HR and UPRT as they may not be set properly in
+         * the stream
+         */
+        spapr_set_all_lpcrs(radix ? (LPCR_HR | LPCR_UPRT) : 0,
+                            LPCR_HR | LPCR_UPRT);
+
+        err = kvmppc_configure_v3_mmu(cpu, radix, gtse, spapr->patb_entry);
+        if (err) {
+            error_report("Process table config unsupported by the host");
+            return -EINVAL;
+        }
+    }
+
+    err = spapr_irq_post_load(spapr, version_id);
+    if (err) {
+        return err;
+    }
+
+    return err;
+}
+
+static int spapr_pre_save(void *opaque)
+{
+    int rc;
+
+    rc = spapr_caps_pre_save(opaque);
+    if (rc) {
+        return rc;
+    }
+
+    return 0;
+}
+
+static bool version_before_3(void *opaque, int version_id)
+{
+    return version_id < 3;
+}
+
+static bool spapr_pending_events_needed(void *opaque)
+{
+    SpaprMachineState *spapr = (SpaprMachineState *)opaque;
+    return !QTAILQ_EMPTY(&spapr->pending_events);
+}
+
+static const VMStateDescription vmstate_spapr_event_entry = {
+    .name = "spapr_event_log_entry",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(summary, SpaprEventLogEntry),
+        VMSTATE_UINT32(extended_length, SpaprEventLogEntry),
+        VMSTATE_VBUFFER_ALLOC_UINT32(extended_log, SpaprEventLogEntry, 0,
+                                     NULL, extended_length),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static const VMStateDescription vmstate_spapr_pending_events = {
+    .name = "spapr_pending_events",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = spapr_pending_events_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_QTAILQ_V(pending_events, SpaprMachineState, 1,
+                         vmstate_spapr_event_entry, SpaprEventLogEntry, next),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static bool spapr_ov5_cas_needed(void *opaque)
+{
+    SpaprMachineState *spapr = opaque;
+    SpaprOptionVector *ov5_mask = spapr_ovec_new();
+    bool cas_needed;
+
+    /* Prior to the introduction of SpaprOptionVector, we had two option
+     * vectors we dealt with: OV5_FORM1_AFFINITY, and OV5_DRCONF_MEMORY.
+     * Both of these options encode machine topology into the device-tree
+     * in such a way that the now-booted OS should still be able to interact
+     * appropriately with QEMU regardless of what options were actually
+     * negotiatied on the source side.
+     *
+     * As such, we can avoid migrating the CAS-negotiated options if these
+     * are the only options available on the current machine/platform.
+     * Since these are the only options available for pseries-2.7 and
+     * earlier, this allows us to maintain old->new/new->old migration
+     * compatibility.
+     *
+     * For QEMU 2.8+, there are additional CAS-negotiatable options available
+     * via default pseries-2.8 machines and explicit command-line parameters.
+     * Some of these options, like OV5_HP_EVT, *do* require QEMU to be aware
+     * of the actual CAS-negotiated values to continue working properly. For
+     * example, availability of memory unplug depends on knowing whether
+     * OV5_HP_EVT was negotiated via CAS.
+     *
+     * Thus, for any cases where the set of available CAS-negotiatable
+     * options extends beyond OV5_FORM1_AFFINITY and OV5_DRCONF_MEMORY, we
+     * include the CAS-negotiated options in the migration stream, unless
+     * if they affect boot time behaviour only.
+     */
+    spapr_ovec_set(ov5_mask, OV5_FORM1_AFFINITY);
+    spapr_ovec_set(ov5_mask, OV5_DRCONF_MEMORY);
+    spapr_ovec_set(ov5_mask, OV5_DRMEM_V2);
+
+    /* We need extra information if we have any bits outside the mask
+     * defined above */
+    cas_needed = !spapr_ovec_subset(spapr->ov5, ov5_mask);
+
+    spapr_ovec_cleanup(ov5_mask);
+
+    return cas_needed;
+}
+
+static const VMStateDescription vmstate_spapr_ov5_cas = {
+    .name = "spapr_option_vector_ov5_cas",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = spapr_ov5_cas_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_STRUCT_POINTER_V(ov5_cas, SpaprMachineState, 1,
+                                 vmstate_spapr_ovec, SpaprOptionVector),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static bool spapr_patb_entry_needed(void *opaque)
+{
+    SpaprMachineState *spapr = opaque;
+
+    return !!spapr->patb_entry;
+}
+
+static const VMStateDescription vmstate_spapr_patb_entry = {
+    .name = "spapr_patb_entry",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = spapr_patb_entry_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT64(patb_entry, SpaprMachineState),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static bool spapr_irq_map_needed(void *opaque)
+{
+    SpaprMachineState *spapr = opaque;
+
+    return spapr->irq_map && !bitmap_empty(spapr->irq_map, spapr->irq_map_nr);
+}
+
+static const VMStateDescription vmstate_spapr_irq_map = {
+    .name = "spapr_irq_map",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = spapr_irq_map_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_BITMAP(irq_map, SpaprMachineState, 0, irq_map_nr),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static bool spapr_dtb_needed(void *opaque)
+{
+    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(opaque);
+
+    return smc->update_dt_enabled;
+}
+
+static int spapr_dtb_pre_load(void *opaque)
+{
+    SpaprMachineState *spapr = (SpaprMachineState *)opaque;
+
+    g_free(spapr->fdt_blob);
+    spapr->fdt_blob = NULL;
+    spapr->fdt_size = 0;
+
+    return 0;
+}
+
+static const VMStateDescription vmstate_spapr_dtb = {
+    .name = "spapr_dtb",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = spapr_dtb_needed,
+    .pre_load = spapr_dtb_pre_load,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(fdt_initial_size, SpaprMachineState),
+        VMSTATE_UINT32(fdt_size, SpaprMachineState),
+        VMSTATE_VBUFFER_ALLOC_UINT32(fdt_blob, SpaprMachineState, 0, NULL,
+                                     fdt_size),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static bool spapr_fwnmi_needed(void *opaque)
+{
+    SpaprMachineState *spapr = (SpaprMachineState *)opaque;
+
+    return spapr->fwnmi_machine_check_addr != -1;
+}
+
+static int spapr_fwnmi_pre_save(void *opaque)
+{
+    SpaprMachineState *spapr = (SpaprMachineState *)opaque;
+
+    /*
+     * Check if machine check handling is in progress and print a
+     * warning message.
+     */
+    if (spapr->fwnmi_machine_check_interlock != -1) {
+        warn_report("A machine check is being handled during migration. The"
+                "handler may run and log hardware error on the destination");
+    }
+
+    return 0;
+}
+
+static const VMStateDescription vmstate_spapr_fwnmi = {
+    .name = "spapr_fwnmi",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = spapr_fwnmi_needed,
+    .pre_save = spapr_fwnmi_pre_save,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT64(fwnmi_system_reset_addr, SpaprMachineState),
+        VMSTATE_UINT64(fwnmi_machine_check_addr, SpaprMachineState),
+        VMSTATE_INT32(fwnmi_machine_check_interlock, SpaprMachineState),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static const VMStateDescription vmstate_spapr = {
+    .name = "spapr",
+    .version_id = 3,
+    .minimum_version_id = 1,
+    .pre_load = spapr_pre_load,
+    .post_load = spapr_post_load,
+    .pre_save = spapr_pre_save,
+    .fields = (VMStateField[]) {
+        /* used to be @next_irq */
+        VMSTATE_UNUSED_BUFFER(version_before_3, 0, 4),
+
+        /* RTC offset */
+        VMSTATE_UINT64_TEST(rtc_offset, SpaprMachineState, version_before_3),
+
+        VMSTATE_PPC_TIMEBASE_V(tb, SpaprMachineState, 2),
+        VMSTATE_END_OF_LIST()
+    },
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_spapr_ov5_cas,
+        &vmstate_spapr_patb_entry,
+        &vmstate_spapr_pending_events,
+        &vmstate_spapr_cap_htm,
+        &vmstate_spapr_cap_vsx,
+        &vmstate_spapr_cap_dfp,
+        &vmstate_spapr_cap_cfpc,
+        &vmstate_spapr_cap_sbbc,
+        &vmstate_spapr_cap_ibs,
+        &vmstate_spapr_cap_hpt_maxpagesize,
+        &vmstate_spapr_irq_map,
+        &vmstate_spapr_cap_nested_kvm_hv,
+        &vmstate_spapr_dtb,
+        &vmstate_spapr_cap_large_decr,
+        &vmstate_spapr_cap_ccf_assist,
+        &vmstate_spapr_cap_fwnmi,
+        &vmstate_spapr_fwnmi,
+        &vmstate_spapr_cap_rpt_invalidate,
+        NULL
+    }
+};
+
+static int htab_save_setup(QEMUFile *f, void *opaque)
+{
+    SpaprMachineState *spapr = opaque;
+
+    /* "Iteration" header */
+    if (!spapr->htab_shift) {
+        qemu_put_be32(f, -1);
+    } else {
+        qemu_put_be32(f, spapr->htab_shift);
+    }
+
+    if (spapr->htab) {
+        spapr->htab_save_index = 0;
+        spapr->htab_first_pass = true;
+    } else {
+        if (spapr->htab_shift) {
+            assert(kvm_enabled());
+        }
+    }
+
+
+    return 0;
+}
+
+static void htab_save_chunk(QEMUFile *f, SpaprMachineState *spapr,
+                            int chunkstart, int n_valid, int n_invalid)
+{
+    qemu_put_be32(f, chunkstart);
+    qemu_put_be16(f, n_valid);
+    qemu_put_be16(f, n_invalid);
+    qemu_put_buffer(f, HPTE(spapr->htab, chunkstart),
+                    HASH_PTE_SIZE_64 * n_valid);
+}
+
+static void htab_save_end_marker(QEMUFile *f)
+{
+    qemu_put_be32(f, 0);
+    qemu_put_be16(f, 0);
+    qemu_put_be16(f, 0);
+}
+
+static void htab_save_first_pass(QEMUFile *f, SpaprMachineState *spapr,
+                                 int64_t max_ns)
+{
+    bool has_timeout = max_ns != -1;
+    int htabslots = HTAB_SIZE(spapr) / HASH_PTE_SIZE_64;
+    int index = spapr->htab_save_index;
+    int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+
+    assert(spapr->htab_first_pass);
+
+    do {
+        int chunkstart;
+
+        /* Consume invalid HPTEs */
+        while ((index < htabslots)
+               && !HPTE_VALID(HPTE(spapr->htab, index))) {
+            CLEAN_HPTE(HPTE(spapr->htab, index));
+            index++;
+        }
+
+        /* Consume valid HPTEs */
+        chunkstart = index;
+        while ((index < htabslots) && (index - chunkstart < USHRT_MAX)
+               && HPTE_VALID(HPTE(spapr->htab, index))) {
+            CLEAN_HPTE(HPTE(spapr->htab, index));
+            index++;
+        }
+
+        if (index > chunkstart) {
+            int n_valid = index - chunkstart;
+
+            htab_save_chunk(f, spapr, chunkstart, n_valid, 0);
+
+            if (has_timeout &&
+                (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) > max_ns) {
+                break;
+            }
+        }
+    } while ((index < htabslots) && !qemu_file_rate_limit(f));
+
+    if (index >= htabslots) {
+        assert(index == htabslots);
+        index = 0;
+        spapr->htab_first_pass = false;
+    }
+    spapr->htab_save_index = index;
+}
+
+static int htab_save_later_pass(QEMUFile *f, SpaprMachineState *spapr,
+                                int64_t max_ns)
+{
+    bool final = max_ns < 0;
+    int htabslots = HTAB_SIZE(spapr) / HASH_PTE_SIZE_64;
+    int examined = 0, sent = 0;
+    int index = spapr->htab_save_index;
+    int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+
+    assert(!spapr->htab_first_pass);
+
+    do {
+        int chunkstart, invalidstart;
+
+        /* Consume non-dirty HPTEs */
+        while ((index < htabslots)
+               && !HPTE_DIRTY(HPTE(spapr->htab, index))) {
+            index++;
+            examined++;
+        }
+
+        chunkstart = index;
+        /* Consume valid dirty HPTEs */
+        while ((index < htabslots) && (index - chunkstart < USHRT_MAX)
+               && HPTE_DIRTY(HPTE(spapr->htab, index))
+               && HPTE_VALID(HPTE(spapr->htab, index))) {
+            CLEAN_HPTE(HPTE(spapr->htab, index));
+            index++;
+            examined++;
+        }
+
+        invalidstart = index;
+        /* Consume invalid dirty HPTEs */
+        while ((index < htabslots) && (index - invalidstart < USHRT_MAX)
+               && HPTE_DIRTY(HPTE(spapr->htab, index))
+               && !HPTE_VALID(HPTE(spapr->htab, index))) {
+            CLEAN_HPTE(HPTE(spapr->htab, index));
+            index++;
+            examined++;
+        }
+
+        if (index > chunkstart) {
+            int n_valid = invalidstart - chunkstart;
+            int n_invalid = index - invalidstart;
+
+            htab_save_chunk(f, spapr, chunkstart, n_valid, n_invalid);
+            sent += index - chunkstart;
+
+            if (!final && (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) > max_ns) {
+                break;
+            }
+        }
+
+        if (examined >= htabslots) {
+            break;
+        }
+
+        if (index >= htabslots) {
+            assert(index == htabslots);
+            index = 0;
+        }
+    } while ((examined < htabslots) && (!qemu_file_rate_limit(f) || final));
+
+    if (index >= htabslots) {
+        assert(index == htabslots);
+        index = 0;
+    }
+
+    spapr->htab_save_index = index;
+
+    return (examined >= htabslots) && (sent == 0) ? 1 : 0;
+}
+
+#define MAX_ITERATION_NS    5000000 /* 5 ms */
+#define MAX_KVM_BUF_SIZE    2048
+
+static int htab_save_iterate(QEMUFile *f, void *opaque)
+{
+    SpaprMachineState *spapr = opaque;
+    int fd;
+    int rc = 0;
+
+    /* Iteration header */
+    if (!spapr->htab_shift) {
+        qemu_put_be32(f, -1);
+        return 1;
+    } else {
+        qemu_put_be32(f, 0);
+    }
+
+    if (!spapr->htab) {
+        assert(kvm_enabled());
+
+        fd = get_htab_fd(spapr);
+        if (fd < 0) {
+            return fd;
+        }
+
+        rc = kvmppc_save_htab(f, fd, MAX_KVM_BUF_SIZE, MAX_ITERATION_NS);
+        if (rc < 0) {
+            return rc;
+        }
+    } else  if (spapr->htab_first_pass) {
+        htab_save_first_pass(f, spapr, MAX_ITERATION_NS);
+    } else {
+        rc = htab_save_later_pass(f, spapr, MAX_ITERATION_NS);
+    }
+
+    htab_save_end_marker(f);
+
+    return rc;
+}
+
+static int htab_save_complete(QEMUFile *f, void *opaque)
+{
+    SpaprMachineState *spapr = opaque;
+    int fd;
+
+    /* Iteration header */
+    if (!spapr->htab_shift) {
+        qemu_put_be32(f, -1);
+        return 0;
+    } else {
+        qemu_put_be32(f, 0);
+    }
+
+    if (!spapr->htab) {
+        int rc;
+
+        assert(kvm_enabled());
+
+        fd = get_htab_fd(spapr);
+        if (fd < 0) {
+            return fd;
+        }
+
+        rc = kvmppc_save_htab(f, fd, MAX_KVM_BUF_SIZE, -1);
+        if (rc < 0) {
+            return rc;
+        }
+    } else {
+        if (spapr->htab_first_pass) {
+            htab_save_first_pass(f, spapr, -1);
+        }
+        htab_save_later_pass(f, spapr, -1);
+    }
+
+    /* End marker */
+    htab_save_end_marker(f);
+
+    return 0;
+}
+
+static int htab_load(QEMUFile *f, void *opaque, int version_id)
+{
+    SpaprMachineState *spapr = opaque;
+    uint32_t section_hdr;
+    int fd = -1;
+    Error *local_err = NULL;
+
+    if (version_id < 1 || version_id > 1) {
+        error_report("htab_load() bad version");
+        return -EINVAL;
+    }
+
+    section_hdr = qemu_get_be32(f);
+
+    if (section_hdr == -1) {
+        spapr_free_hpt(spapr);
+        return 0;
+    }
+
+    if (section_hdr) {
+        int ret;
+
+        /* First section gives the htab size */
+        ret = spapr_reallocate_hpt(spapr, section_hdr, &local_err);
+        if (ret < 0) {
+            error_report_err(local_err);
+            return ret;
+        }
+        return 0;
+    }
+
+    if (!spapr->htab) {
+        assert(kvm_enabled());
+
+        fd = kvmppc_get_htab_fd(true, 0, &local_err);
+        if (fd < 0) {
+            error_report_err(local_err);
+            return fd;
+        }
+    }
+
+    while (true) {
+        uint32_t index;
+        uint16_t n_valid, n_invalid;
+
+        index = qemu_get_be32(f);
+        n_valid = qemu_get_be16(f);
+        n_invalid = qemu_get_be16(f);
+
+        if ((index == 0) && (n_valid == 0) && (n_invalid == 0)) {
+            /* End of Stream */
+            break;
+        }
+
+        if ((index + n_valid + n_invalid) >
+            (HTAB_SIZE(spapr) / HASH_PTE_SIZE_64)) {
+            /* Bad index in stream */
+            error_report(
+                "htab_load() bad index %d (%hd+%hd entries) in htab stream (htab_shift=%d)",
+                index, n_valid, n_invalid, spapr->htab_shift);
+            return -EINVAL;
+        }
+
+        if (spapr->htab) {
+            if (n_valid) {
+                qemu_get_buffer(f, HPTE(spapr->htab, index),
+                                HASH_PTE_SIZE_64 * n_valid);
+            }
+            if (n_invalid) {
+                memset(HPTE(spapr->htab, index + n_valid), 0,
+                       HASH_PTE_SIZE_64 * n_invalid);
+            }
+        } else {
+            int rc;
+
+            assert(fd >= 0);
+
+            rc = kvmppc_load_htab_chunk(f, fd, index, n_valid, n_invalid,
+                                        &local_err);
+            if (rc < 0) {
+                error_report_err(local_err);
+                return rc;
+            }
+        }
+    }
+
+    if (!spapr->htab) {
+        assert(fd >= 0);
+        close(fd);
+    }
+
+    return 0;
+}
+
+static void htab_save_cleanup(void *opaque)
+{
+    SpaprMachineState *spapr = opaque;
+
+    close_htab_fd(spapr);
+}
+
+static SaveVMHandlers savevm_htab_handlers = {
+    .save_setup = htab_save_setup,
+    .save_live_iterate = htab_save_iterate,
+    .save_live_complete_precopy = htab_save_complete,
+    .save_cleanup = htab_save_cleanup,
+    .load_state = htab_load,
+};
+
+static void spapr_boot_set(void *opaque, const char *boot_device,
+                           Error **errp)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(opaque);
+
+    g_free(spapr->boot_device);
+    spapr->boot_device = g_strdup(boot_device);
+}
+
+static void spapr_create_lmb_dr_connectors(SpaprMachineState *spapr)
+{
+    MachineState *machine = MACHINE(spapr);
+    uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE;
+    uint32_t nr_lmbs = (machine->maxram_size - machine->ram_size)/lmb_size;
+    int i;
+
+    for (i = 0; i < nr_lmbs; i++) {
+        uint64_t addr;
+
+        addr = i * lmb_size + machine->device_memory->base;
+        spapr_dr_connector_new(OBJECT(spapr), TYPE_SPAPR_DRC_LMB,
+                               addr / lmb_size);
+    }
+}
+
+/*
+ * If RAM size, maxmem size and individual node mem sizes aren't aligned
+ * to SPAPR_MEMORY_BLOCK_SIZE(256MB), then refuse to start the guest
+ * since we can't support such unaligned sizes with DRCONF_MEMORY.
+ */
+static void spapr_validate_node_memory(MachineState *machine, Error **errp)
+{
+    int i;
+
+    if (machine->ram_size % SPAPR_MEMORY_BLOCK_SIZE) {
+        error_setg(errp, "Memory size 0x" RAM_ADDR_FMT
+                   " is not aligned to %" PRIu64 " MiB",
+                   machine->ram_size,
+                   SPAPR_MEMORY_BLOCK_SIZE / MiB);
+        return;
+    }
+
+    if (machine->maxram_size % SPAPR_MEMORY_BLOCK_SIZE) {
+        error_setg(errp, "Maximum memory size 0x" RAM_ADDR_FMT
+                   " is not aligned to %" PRIu64 " MiB",
+                   machine->ram_size,
+                   SPAPR_MEMORY_BLOCK_SIZE / MiB);
+        return;
+    }
+
+    for (i = 0; i < machine->numa_state->num_nodes; i++) {
+        if (machine->numa_state->nodes[i].node_mem % SPAPR_MEMORY_BLOCK_SIZE) {
+            error_setg(errp,
+                       "Node %d memory size 0x%" PRIx64
+                       " is not aligned to %" PRIu64 " MiB",
+                       i, machine->numa_state->nodes[i].node_mem,
+                       SPAPR_MEMORY_BLOCK_SIZE / MiB);
+            return;
+        }
+    }
+}
+
+/* find cpu slot in machine->possible_cpus by core_id */
+static CPUArchId *spapr_find_cpu_slot(MachineState *ms, uint32_t id, int *idx)
+{
+    int index = id / ms->smp.threads;
+
+    if (index >= ms->possible_cpus->len) {
+        return NULL;
+    }
+    if (idx) {
+        *idx = index;
+    }
+    return &ms->possible_cpus->cpus[index];
+}
+
+static void spapr_set_vsmt_mode(SpaprMachineState *spapr, Error **errp)
+{
+    MachineState *ms = MACHINE(spapr);
+    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+    Error *local_err = NULL;
+    bool vsmt_user = !!spapr->vsmt;
+    int kvm_smt = kvmppc_smt_threads();
+    int ret;
+    unsigned int smp_threads = ms->smp.threads;
+
+    if (!kvm_enabled() && (smp_threads > 1)) {
+        error_setg(errp, "TCG cannot support more than 1 thread/core "
+                   "on a pseries machine");
+        return;
+    }
+    if (!is_power_of_2(smp_threads)) {
+        error_setg(errp, "Cannot support %d threads/core on a pseries "
+                   "machine because it must be a power of 2", smp_threads);
+        return;
+    }
+
+    /* Detemine the VSMT mode to use: */
+    if (vsmt_user) {
+        if (spapr->vsmt < smp_threads) {
+            error_setg(errp, "Cannot support VSMT mode %d"
+                       " because it must be >= threads/core (%d)",
+                       spapr->vsmt, smp_threads);
+            return;
+        }
+        /* In this case, spapr->vsmt has been set by the command line */
+    } else if (!smc->smp_threads_vsmt) {
+        /*
+         * Default VSMT value is tricky, because we need it to be as
+         * consistent as possible (for migration), but this requires
+         * changing it for at least some existing cases.  We pick 8 as
+         * the value that we'd get with KVM on POWER8, the
+         * overwhelmingly common case in production systems.
+         */
+        spapr->vsmt = MAX(8, smp_threads);
+    } else {
+        spapr->vsmt = smp_threads;
+    }
+
+    /* KVM: If necessary, set the SMT mode: */
+    if (kvm_enabled() && (spapr->vsmt != kvm_smt)) {
+        ret = kvmppc_set_smt_threads(spapr->vsmt);
+        if (ret) {
+            /* Looks like KVM isn't able to change VSMT mode */
+            error_setg(&local_err,
+                       "Failed to set KVM's VSMT mode to %d (errno %d)",
+                       spapr->vsmt, ret);
+            /* We can live with that if the default one is big enough
+             * for the number of threads, and a submultiple of the one
+             * we want.  In this case we'll waste some vcpu ids, but
+             * behaviour will be correct */
+            if ((kvm_smt >= smp_threads) && ((spapr->vsmt % kvm_smt) == 0)) {
+                warn_report_err(local_err);
+            } else {
+                if (!vsmt_user) {
+                    error_append_hint(&local_err,
+                                      "On PPC, a VM with %d threads/core"
+                                      " on a host with %d threads/core"
+                                      " requires the use of VSMT mode %d.\n",
+                                      smp_threads, kvm_smt, spapr->vsmt);
+                }
+                kvmppc_error_append_smt_possible_hint(&local_err);
+                error_propagate(errp, local_err);
+            }
+        }
+    }
+    /* else TCG: nothing to do currently */
+}
+
+static void spapr_init_cpus(SpaprMachineState *spapr)
+{
+    MachineState *machine = MACHINE(spapr);
+    MachineClass *mc = MACHINE_GET_CLASS(machine);
+    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine);
+    const char *type = spapr_get_cpu_core_type(machine->cpu_type);
+    const CPUArchIdList *possible_cpus;
+    unsigned int smp_cpus = machine->smp.cpus;
+    unsigned int smp_threads = machine->smp.threads;
+    unsigned int max_cpus = machine->smp.max_cpus;
+    int boot_cores_nr = smp_cpus / smp_threads;
+    int i;
+
+    possible_cpus = mc->possible_cpu_arch_ids(machine);
+    if (mc->has_hotpluggable_cpus) {
+        if (smp_cpus % smp_threads) {
+            error_report("smp_cpus (%u) must be multiple of threads (%u)",
+                         smp_cpus, smp_threads);
+            exit(1);
+        }
+        if (max_cpus % smp_threads) {
+            error_report("max_cpus (%u) must be multiple of threads (%u)",
+                         max_cpus, smp_threads);
+            exit(1);
+        }
+    } else {
+        if (max_cpus != smp_cpus) {
+            error_report("This machine version does not support CPU hotplug");
+            exit(1);
+        }
+        boot_cores_nr = possible_cpus->len;
+    }
+
+    if (smc->pre_2_10_has_unused_icps) {
+        int i;
+
+        for (i = 0; i < spapr_max_server_number(spapr); i++) {
+            /* Dummy entries get deregistered when real ICPState objects
+             * are registered during CPU core hotplug.
+             */
+            pre_2_10_vmstate_register_dummy_icp(i);
+        }
+    }
+
+    for (i = 0; i < possible_cpus->len; i++) {
+        int core_id = i * smp_threads;
+
+        if (mc->has_hotpluggable_cpus) {
+            spapr_dr_connector_new(OBJECT(spapr), TYPE_SPAPR_DRC_CPU,
+                                   spapr_vcpu_id(spapr, core_id));
+        }
+
+        if (i < boot_cores_nr) {
+            Object *core  = object_new(type);
+            int nr_threads = smp_threads;
+
+            /* Handle the partially filled core for older machine types */
+            if ((i + 1) * smp_threads >= smp_cpus) {
+                nr_threads = smp_cpus - i * smp_threads;
+            }
+
+            object_property_set_int(core, "nr-threads", nr_threads,
+                                    &error_fatal);
+            object_property_set_int(core, CPU_CORE_PROP_CORE_ID, core_id,
+                                    &error_fatal);
+            qdev_realize(DEVICE(core), NULL, &error_fatal);
+
+            object_unref(core);
+        }
+    }
+}
+
+static PCIHostState *spapr_create_default_phb(void)
+{
+    DeviceState *dev;
+
+    dev = qdev_new(TYPE_SPAPR_PCI_HOST_BRIDGE);
+    qdev_prop_set_uint32(dev, "index", 0);
+    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+
+    return PCI_HOST_BRIDGE(dev);
+}
+
+static hwaddr spapr_rma_size(SpaprMachineState *spapr, Error **errp)
+{
+    MachineState *machine = MACHINE(spapr);
+    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+    hwaddr rma_size = machine->ram_size;
+    hwaddr node0_size = spapr_node0_size(machine);
+
+    /* RMA has to fit in the first NUMA node */
+    rma_size = MIN(rma_size, node0_size);
+
+    /*
+     * VRMA access is via a special 1TiB SLB mapping, so the RMA can
+     * never exceed that
+     */
+    rma_size = MIN(rma_size, 1 * TiB);
+
+    /*
+     * Clamp the RMA size based on machine type.  This is for
+     * migration compatibility with older qemu versions, which limited
+     * the RMA size for complicated and mostly bad reasons.
+     */
+    if (smc->rma_limit) {
+        rma_size = MIN(rma_size, smc->rma_limit);
+    }
+
+    if (rma_size < MIN_RMA_SLOF) {
+        error_setg(errp,
+                   "pSeries SLOF firmware requires >= %" HWADDR_PRIx
+                   "ldMiB guest RMA (Real Mode Area memory)",
+                   MIN_RMA_SLOF / MiB);
+        return 0;
+    }
+
+    return rma_size;
+}
+
+static void spapr_create_nvdimm_dr_connectors(SpaprMachineState *spapr)
+{
+    MachineState *machine = MACHINE(spapr);
+    int i;
+
+    for (i = 0; i < machine->ram_slots; i++) {
+        spapr_dr_connector_new(OBJECT(spapr), TYPE_SPAPR_DRC_PMEM, i);
+    }
+}
+
+/* pSeries LPAR / sPAPR hardware init */
+static void spapr_machine_init(MachineState *machine)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(machine);
+    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(machine);
+    MachineClass *mc = MACHINE_GET_CLASS(machine);
+    const char *bios_default = spapr->vof ? FW_FILE_NAME_VOF : FW_FILE_NAME;
+    const char *bios_name = machine->firmware ?: bios_default;
+    const char *kernel_filename = machine->kernel_filename;
+    const char *initrd_filename = machine->initrd_filename;
+    PCIHostState *phb;
+    int i;
+    MemoryRegion *sysmem = get_system_memory();
+    long load_limit, fw_size;
+    char *filename;
+    Error *resize_hpt_err = NULL;
+
+    /*
+     * if Secure VM (PEF) support is configured, then initialize it
+     */
+    pef_kvm_init(machine->cgs, &error_fatal);
+
+    msi_nonbroken = true;
+
+    QLIST_INIT(&spapr->phbs);
+    QTAILQ_INIT(&spapr->pending_dimm_unplugs);
+
+    /* Determine capabilities to run with */
+    spapr_caps_init(spapr);
+
+    kvmppc_check_papr_resize_hpt(&resize_hpt_err);
+    if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DEFAULT) {
+        /*
+         * If the user explicitly requested a mode we should either
+         * supply it, or fail completely (which we do below).  But if
+         * it's not set explicitly, we reset our mode to something
+         * that works
+         */
+        if (resize_hpt_err) {
+            spapr->resize_hpt = SPAPR_RESIZE_HPT_DISABLED;
+            error_free(resize_hpt_err);
+            resize_hpt_err = NULL;
+        } else {
+            spapr->resize_hpt = smc->resize_hpt_default;
+        }
+    }
+
+    assert(spapr->resize_hpt != SPAPR_RESIZE_HPT_DEFAULT);
+
+    if ((spapr->resize_hpt != SPAPR_RESIZE_HPT_DISABLED) && resize_hpt_err) {
+        /*
+         * User requested HPT resize, but this host can't supply it.  Bail out
+         */
+        error_report_err(resize_hpt_err);
+        exit(1);
+    }
+    error_free(resize_hpt_err);
+
+    spapr->rma_size = spapr_rma_size(spapr, &error_fatal);
+
+    /* Setup a load limit for the ramdisk leaving room for SLOF and FDT */
+    load_limit = MIN(spapr->rma_size, FDT_MAX_ADDR) - FW_OVERHEAD;
+
+    /*
+     * VSMT must be set in order to be able to compute VCPU ids, ie to
+     * call spapr_max_server_number() or spapr_vcpu_id().
+     */
+    spapr_set_vsmt_mode(spapr, &error_fatal);
+
+    /* Set up Interrupt Controller before we create the VCPUs */
+    spapr_irq_init(spapr, &error_fatal);
+
+    /* Set up containers for ibm,client-architecture-support negotiated options
+     */
+    spapr->ov5 = spapr_ovec_new();
+    spapr->ov5_cas = spapr_ovec_new();
+
+    if (smc->dr_lmb_enabled) {
+        spapr_ovec_set(spapr->ov5, OV5_DRCONF_MEMORY);
+        spapr_validate_node_memory(machine, &error_fatal);
+    }
+
+    spapr_ovec_set(spapr->ov5, OV5_FORM1_AFFINITY);
+
+    /* Do not advertise FORM2 NUMA support for pseries-6.1 and older */
+    if (!smc->pre_6_2_numa_affinity) {
+        spapr_ovec_set(spapr->ov5, OV5_FORM2_AFFINITY);
+    }
+
+    /* advertise support for dedicated HP event source to guests */
+    if (spapr->use_hotplug_event_source) {
+        spapr_ovec_set(spapr->ov5, OV5_HP_EVT);
+    }
+
+    /* advertise support for HPT resizing */
+    if (spapr->resize_hpt != SPAPR_RESIZE_HPT_DISABLED) {
+        spapr_ovec_set(spapr->ov5, OV5_HPT_RESIZE);
+    }
+
+    /* advertise support for ibm,dyamic-memory-v2 */
+    spapr_ovec_set(spapr->ov5, OV5_DRMEM_V2);
+
+    /* advertise XIVE on POWER9 machines */
+    if (spapr->irq->xive) {
+        spapr_ovec_set(spapr->ov5, OV5_XIVE_EXPLOIT);
+    }
+
+    /* init CPUs */
+    spapr_init_cpus(spapr);
+
+    spapr->gpu_numa_id = spapr_numa_initial_nvgpu_numa_id(machine);
+
+    /* Init numa_assoc_array */
+    spapr_numa_associativity_init(spapr, machine);
+
+    if ((!kvm_enabled() || kvmppc_has_cap_mmu_radix()) &&
+        ppc_type_check_compat(machine->cpu_type, CPU_POWERPC_LOGICAL_3_00, 0,
+                              spapr->max_compat_pvr)) {
+        spapr_ovec_set(spapr->ov5, OV5_MMU_RADIX_300);
+        /* KVM and TCG always allow GTSE with radix... */
+        spapr_ovec_set(spapr->ov5, OV5_MMU_RADIX_GTSE);
+    }
+    /* ... but not with hash (currently). */
+
+    if (kvm_enabled()) {
+        /* Enable H_LOGICAL_CI_* so SLOF can talk to in-kernel devices */
+        kvmppc_enable_logical_ci_hcalls();
+        kvmppc_enable_set_mode_hcall();
+
+        /* H_CLEAR_MOD/_REF are mandatory in PAPR, but off by default */
+        kvmppc_enable_clear_ref_mod_hcalls();
+
+        /* Enable H_PAGE_INIT */
+        kvmppc_enable_h_page_init();
+    }
+
+    /* map RAM */
+    memory_region_add_subregion(sysmem, 0, machine->ram);
+
+    /* always allocate the device memory information */
+    machine->device_memory = g_malloc0(sizeof(*machine->device_memory));
+
+    /* initialize hotplug memory address space */
+    if (machine->ram_size < machine->maxram_size) {
+        ram_addr_t device_mem_size = machine->maxram_size - machine->ram_size;
+        /*
+         * Limit the number of hotpluggable memory slots to half the number
+         * slots that KVM supports, leaving the other half for PCI and other
+         * devices. However ensure that number of slots doesn't drop below 32.
+         */
+        int max_memslots = kvm_enabled() ? kvm_get_max_memslots() / 2 :
+                           SPAPR_MAX_RAM_SLOTS;
+
+        if (max_memslots < SPAPR_MAX_RAM_SLOTS) {
+            max_memslots = SPAPR_MAX_RAM_SLOTS;
+        }
+        if (machine->ram_slots > max_memslots) {
+            error_report("Specified number of memory slots %"
+                         PRIu64" exceeds max supported %d",
+                         machine->ram_slots, max_memslots);
+            exit(1);
+        }
+
+        machine->device_memory->base = ROUND_UP(machine->ram_size,
+                                                SPAPR_DEVICE_MEM_ALIGN);
+        memory_region_init(&machine->device_memory->mr, OBJECT(spapr),
+                           "device-memory", device_mem_size);
+        memory_region_add_subregion(sysmem, machine->device_memory->base,
+                                    &machine->device_memory->mr);
+    }
+
+    if (smc->dr_lmb_enabled) {
+        spapr_create_lmb_dr_connectors(spapr);
+    }
+
+    if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI) == SPAPR_CAP_ON) {
+        /* Create the error string for live migration blocker */
+        error_setg(&spapr->fwnmi_migration_blocker,
+            "A machine check is being handled during migration. The handler"
+            "may run and log hardware error on the destination");
+    }
+
+    if (mc->nvdimm_supported) {
+        spapr_create_nvdimm_dr_connectors(spapr);
+    }
+
+    /* Set up RTAS event infrastructure */
+    spapr_events_init(spapr);
+
+    /* Set up the RTC RTAS interfaces */
+    spapr_rtc_create(spapr);
+
+    /* Set up VIO bus */
+    spapr->vio_bus = spapr_vio_bus_init();
+
+    for (i = 0; serial_hd(i); i++) {
+        spapr_vty_create(spapr->vio_bus, serial_hd(i));
+    }
+
+    /* We always have at least the nvram device on VIO */
+    spapr_create_nvram(spapr);
+
+    /*
+     * Setup hotplug / dynamic-reconfiguration connectors. top-level
+     * connectors (described in root DT node's "ibm,drc-types" property)
+     * are pre-initialized here. additional child connectors (such as
+     * connectors for a PHBs PCI slots) are added as needed during their
+     * parent's realization.
+     */
+    if (smc->dr_phb_enabled) {
+        for (i = 0; i < SPAPR_MAX_PHBS; i++) {
+            spapr_dr_connector_new(OBJECT(machine), TYPE_SPAPR_DRC_PHB, i);
+        }
+    }
+
+    /* Set up PCI */
+    spapr_pci_rtas_init();
+
+    phb = spapr_create_default_phb();
+
+    for (i = 0; i < nb_nics; i++) {
+        NICInfo *nd = &nd_table[i];
+
+        if (!nd->model) {
+            nd->model = g_strdup("spapr-vlan");
+        }
+
+        if (g_str_equal(nd->model, "spapr-vlan") ||
+            g_str_equal(nd->model, "ibmveth")) {
+            spapr_vlan_create(spapr->vio_bus, nd);
+        } else {
+            pci_nic_init_nofail(&nd_table[i], phb->bus, nd->model, NULL);
+        }
+    }
+
+    for (i = 0; i <= drive_get_max_bus(IF_SCSI); i++) {
+        spapr_vscsi_create(spapr->vio_bus);
+    }
+
+    /* Graphics */
+    if (spapr_vga_init(phb->bus, &error_fatal)) {
+        spapr->has_graphics = true;
+        machine->usb |= defaults_enabled() && !machine->usb_disabled;
+    }
+
+    if (machine->usb) {
+        if (smc->use_ohci_by_default) {
+            pci_create_simple(phb->bus, -1, "pci-ohci");
+        } else {
+            pci_create_simple(phb->bus, -1, "nec-usb-xhci");
+        }
+
+        if (spapr->has_graphics) {
+            USBBus *usb_bus = usb_bus_find(-1);
+
+            usb_create_simple(usb_bus, "usb-kbd");
+            usb_create_simple(usb_bus, "usb-mouse");
+        }
+    }
+
+    if (kernel_filename) {
+        spapr->kernel_size = load_elf(kernel_filename, NULL,
+                                      translate_kernel_address, spapr,
+                                      NULL, NULL, NULL, NULL, 1,
+                                      PPC_ELF_MACHINE, 0, 0);
+        if (spapr->kernel_size == ELF_LOAD_WRONG_ENDIAN) {
+            spapr->kernel_size = load_elf(kernel_filename, NULL,
+                                          translate_kernel_address, spapr,
+                                          NULL, NULL, NULL, NULL, 0,
+                                          PPC_ELF_MACHINE, 0, 0);
+            spapr->kernel_le = spapr->kernel_size > 0;
+        }
+        if (spapr->kernel_size < 0) {
+            error_report("error loading %s: %s", kernel_filename,
+                         load_elf_strerror(spapr->kernel_size));
+            exit(1);
+        }
+
+        /* load initrd */
+        if (initrd_filename) {
+            /* Try to locate the initrd in the gap between the kernel
+             * and the firmware. Add a bit of space just in case
+             */
+            spapr->initrd_base = (spapr->kernel_addr + spapr->kernel_size
+                                  + 0x1ffff) & ~0xffff;
+            spapr->initrd_size = load_image_targphys(initrd_filename,
+                                                     spapr->initrd_base,
+                                                     load_limit
+                                                     - spapr->initrd_base);
+            if (spapr->initrd_size < 0) {
+                error_report("could not load initial ram disk '%s'",
+                             initrd_filename);
+                exit(1);
+            }
+        }
+    }
+
+    filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
+    if (!filename) {
+        error_report("Could not find LPAR firmware '%s'", bios_name);
+        exit(1);
+    }
+    fw_size = load_image_targphys(filename, 0, FW_MAX_SIZE);
+    if (fw_size <= 0) {
+        error_report("Could not load LPAR firmware '%s'", filename);
+        exit(1);
+    }
+    g_free(filename);
+
+    /* FIXME: Should register things through the MachineState's qdev
+     * interface, this is a legacy from the sPAPREnvironment structure
+     * which predated MachineState but had a similar function */
+    vmstate_register(NULL, 0, &vmstate_spapr, spapr);
+    register_savevm_live("spapr/htab", VMSTATE_INSTANCE_ID_ANY, 1,
+                         &savevm_htab_handlers, spapr);
+
+    qbus_set_hotplug_handler(sysbus_get_default(), OBJECT(machine));
+
+    qemu_register_boot_set(spapr_boot_set, spapr);
+
+    /*
+     * Nothing needs to be done to resume a suspended guest because
+     * suspending does not change the machine state, so no need for
+     * a ->wakeup method.
+     */
+    qemu_register_wakeup_support();
+
+    if (kvm_enabled()) {
+        /* to stop and start vmclock */
+        qemu_add_vm_change_state_handler(cpu_ppc_clock_vm_state_change,
+                                         &spapr->tb);
+
+        kvmppc_spapr_enable_inkernel_multitce();
+    }
+
+    qemu_cond_init(&spapr->fwnmi_machine_check_interlock_cond);
+    if (spapr->vof) {
+        spapr->vof->fw_size = fw_size; /* for claim() on itself */
+        spapr_register_hypercall(KVMPPC_H_VOF_CLIENT, spapr_h_vof_client);
+    }
+}
+
+#define DEFAULT_KVM_TYPE "auto"
+static int spapr_kvm_type(MachineState *machine, const char *vm_type)
+{
+    /*
+     * The use of g_ascii_strcasecmp() for 'hv' and 'pr' is to
+     * accomodate the 'HV' and 'PV' formats that exists in the
+     * wild. The 'auto' mode is being introduced already as
+     * lower-case, thus we don't need to bother checking for
+     * "AUTO".
+     */
+    if (!vm_type || !strcmp(vm_type, DEFAULT_KVM_TYPE)) {
+        return 0;
+    }
+
+    if (!g_ascii_strcasecmp(vm_type, "hv")) {
+        return 1;
+    }
+
+    if (!g_ascii_strcasecmp(vm_type, "pr")) {
+        return 2;
+    }
+
+    error_report("Unknown kvm-type specified '%s'", vm_type);
+    exit(1);
+}
+
+/*
+ * Implementation of an interface to adjust firmware path
+ * for the bootindex property handling.
+ */
+static char *spapr_get_fw_dev_path(FWPathProvider *p, BusState *bus,
+                                   DeviceState *dev)
+{
+#define CAST(type, obj, name) \
+    ((type *)object_dynamic_cast(OBJECT(obj), (name)))
+    SCSIDevice *d = CAST(SCSIDevice,  dev, TYPE_SCSI_DEVICE);
+    SpaprPhbState *phb = CAST(SpaprPhbState, dev, TYPE_SPAPR_PCI_HOST_BRIDGE);
+    VHostSCSICommon *vsc = CAST(VHostSCSICommon, dev, TYPE_VHOST_SCSI_COMMON);
+    PCIDevice *pcidev = CAST(PCIDevice, dev, TYPE_PCI_DEVICE);
+
+    if (d) {
+        void *spapr = CAST(void, bus->parent, "spapr-vscsi");
+        VirtIOSCSI *virtio = CAST(VirtIOSCSI, bus->parent, TYPE_VIRTIO_SCSI);
+        USBDevice *usb = CAST(USBDevice, bus->parent, TYPE_USB_DEVICE);
+
+        if (spapr) {
+            /*
+             * Replace "channel@0/disk@0,0" with "disk@8000000000000000":
+             * In the top 16 bits of the 64-bit LUN, we use SRP luns of the form
+             * 0x8000 | (target << 8) | (bus << 5) | lun
+             * (see the "Logical unit addressing format" table in SAM5)
+             */
+            unsigned id = 0x8000 | (d->id << 8) | (d->channel << 5) | d->lun;
+            return g_strdup_printf("%s@%"PRIX64, qdev_fw_name(dev),
+                                   (uint64_t)id << 48);
+        } else if (virtio) {
+            /*
+             * We use SRP luns of the form 01000000 | (target << 8) | lun
+             * in the top 32 bits of the 64-bit LUN
+             * Note: the quote above is from SLOF and it is wrong,
+             * the actual binding is:
+             * swap 0100 or 10 << or 20 << ( target lun-id -- srplun )
+             */
+            unsigned id = 0x1000000 | (d->id << 16) | d->lun;
+            if (d->lun >= 256) {
+                /* Use the LUN "flat space addressing method" */
+                id |= 0x4000;
+            }
+            return g_strdup_printf("%s@%"PRIX64, qdev_fw_name(dev),
+                                   (uint64_t)id << 32);
+        } else if (usb) {
+            /*
+             * We use SRP luns of the form 01000000 | (usb-port << 16) | lun
+             * in the top 32 bits of the 64-bit LUN
+             */
+            unsigned usb_port = atoi(usb->port->path);
+            unsigned id = 0x1000000 | (usb_port << 16) | d->lun;
+            return g_strdup_printf("%s@%"PRIX64, qdev_fw_name(dev),
+                                   (uint64_t)id << 32);
+        }
+    }
+
+    /*
+     * SLOF probes the USB devices, and if it recognizes that the device is a
+     * storage device, it changes its name to "storage" instead of "usb-host",
+     * and additionally adds a child node for the SCSI LUN, so the correct
+     * boot path in SLOF is something like .../storage@1/disk@xxx" instead.
+     */
+    if (strcmp("usb-host", qdev_fw_name(dev)) == 0) {
+        USBDevice *usbdev = CAST(USBDevice, dev, TYPE_USB_DEVICE);
+        if (usb_device_is_scsi_storage(usbdev)) {
+            return g_strdup_printf("storage@%s/disk", usbdev->port->path);
+        }
+    }
+
+    if (phb) {
+        /* Replace "pci" with "pci@800000020000000" */
+        return g_strdup_printf("pci@%"PRIX64, phb->buid);
+    }
+
+    if (vsc) {
+        /* Same logic as virtio above */
+        unsigned id = 0x1000000 | (vsc->target << 16) | vsc->lun;
+        return g_strdup_printf("disk@%"PRIX64, (uint64_t)id << 32);
+    }
+
+    if (g_str_equal("pci-bridge", qdev_fw_name(dev))) {
+        /* SLOF uses "pci" instead of "pci-bridge" for PCI bridges */
+        PCIDevice *pcidev = CAST(PCIDevice, dev, TYPE_PCI_DEVICE);
+        return g_strdup_printf("pci@%x", PCI_SLOT(pcidev->devfn));
+    }
+
+    if (pcidev) {
+        return spapr_pci_fw_dev_name(pcidev);
+    }
+
+    return NULL;
+}
+
+static char *spapr_get_kvm_type(Object *obj, Error **errp)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+
+    return g_strdup(spapr->kvm_type);
+}
+
+static void spapr_set_kvm_type(Object *obj, const char *value, Error **errp)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+
+    g_free(spapr->kvm_type);
+    spapr->kvm_type = g_strdup(value);
+}
+
+static bool spapr_get_modern_hotplug_events(Object *obj, Error **errp)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+
+    return spapr->use_hotplug_event_source;
+}
+
+static void spapr_set_modern_hotplug_events(Object *obj, bool value,
+                                            Error **errp)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+
+    spapr->use_hotplug_event_source = value;
+}
+
+static bool spapr_get_msix_emulation(Object *obj, Error **errp)
+{
+    return true;
+}
+
+static char *spapr_get_resize_hpt(Object *obj, Error **errp)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+
+    switch (spapr->resize_hpt) {
+    case SPAPR_RESIZE_HPT_DEFAULT:
+        return g_strdup("default");
+    case SPAPR_RESIZE_HPT_DISABLED:
+        return g_strdup("disabled");
+    case SPAPR_RESIZE_HPT_ENABLED:
+        return g_strdup("enabled");
+    case SPAPR_RESIZE_HPT_REQUIRED:
+        return g_strdup("required");
+    }
+    g_assert_not_reached();
+}
+
+static void spapr_set_resize_hpt(Object *obj, const char *value, Error **errp)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+
+    if (strcmp(value, "default") == 0) {
+        spapr->resize_hpt = SPAPR_RESIZE_HPT_DEFAULT;
+    } else if (strcmp(value, "disabled") == 0) {
+        spapr->resize_hpt = SPAPR_RESIZE_HPT_DISABLED;
+    } else if (strcmp(value, "enabled") == 0) {
+        spapr->resize_hpt = SPAPR_RESIZE_HPT_ENABLED;
+    } else if (strcmp(value, "required") == 0) {
+        spapr->resize_hpt = SPAPR_RESIZE_HPT_REQUIRED;
+    } else {
+        error_setg(errp, "Bad value for \"resize-hpt\" property");
+    }
+}
+
+static bool spapr_get_vof(Object *obj, Error **errp)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+
+    return spapr->vof != NULL;
+}
+
+static void spapr_set_vof(Object *obj, bool value, Error **errp)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+
+    if (spapr->vof) {
+        vof_cleanup(spapr->vof);
+        g_free(spapr->vof);
+        spapr->vof = NULL;
+    }
+    if (!value) {
+        return;
+    }
+    spapr->vof = g_malloc0(sizeof(*spapr->vof));
+}
+
+static char *spapr_get_ic_mode(Object *obj, Error **errp)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+
+    if (spapr->irq == &spapr_irq_xics_legacy) {
+        return g_strdup("legacy");
+    } else if (spapr->irq == &spapr_irq_xics) {
+        return g_strdup("xics");
+    } else if (spapr->irq == &spapr_irq_xive) {
+        return g_strdup("xive");
+    } else if (spapr->irq == &spapr_irq_dual) {
+        return g_strdup("dual");
+    }
+    g_assert_not_reached();
+}
+
+static void spapr_set_ic_mode(Object *obj, const char *value, Error **errp)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+
+    if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
+        error_setg(errp, "This machine only uses the legacy XICS backend, don't pass ic-mode");
+        return;
+    }
+
+    /* The legacy IRQ backend can not be set */
+    if (strcmp(value, "xics") == 0) {
+        spapr->irq = &spapr_irq_xics;
+    } else if (strcmp(value, "xive") == 0) {
+        spapr->irq = &spapr_irq_xive;
+    } else if (strcmp(value, "dual") == 0) {
+        spapr->irq = &spapr_irq_dual;
+    } else {
+        error_setg(errp, "Bad value for \"ic-mode\" property");
+    }
+}
+
+static char *spapr_get_host_model(Object *obj, Error **errp)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+
+    return g_strdup(spapr->host_model);
+}
+
+static void spapr_set_host_model(Object *obj, const char *value, Error **errp)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+
+    g_free(spapr->host_model);
+    spapr->host_model = g_strdup(value);
+}
+
+static char *spapr_get_host_serial(Object *obj, Error **errp)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+
+    return g_strdup(spapr->host_serial);
+}
+
+static void spapr_set_host_serial(Object *obj, const char *value, Error **errp)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+
+    g_free(spapr->host_serial);
+    spapr->host_serial = g_strdup(value);
+}
+
+static void spapr_instance_init(Object *obj)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+    MachineState *ms = MACHINE(spapr);
+    MachineClass *mc = MACHINE_GET_CLASS(ms);
+
+    /*
+     * NVDIMM support went live in 5.1 without considering that, in
+     * other archs, the user needs to enable NVDIMM support with the
+     * 'nvdimm' machine option and the default behavior is NVDIMM
+     * support disabled. It is too late to roll back to the standard
+     * behavior without breaking 5.1 guests.
+     */
+    if (mc->nvdimm_supported) {
+        ms->nvdimms_state->is_enabled = true;
+    }
+
+    spapr->htab_fd = -1;
+    spapr->use_hotplug_event_source = true;
+    spapr->kvm_type = g_strdup(DEFAULT_KVM_TYPE);
+    object_property_add_str(obj, "kvm-type",
+                            spapr_get_kvm_type, spapr_set_kvm_type);
+    object_property_set_description(obj, "kvm-type",
+                                    "Specifies the KVM virtualization mode (auto,"
+                                    " hv, pr). Defaults to 'auto'. This mode will use"
+                                    " any available KVM module loaded in the host,"
+                                    " where kvm_hv takes precedence if both kvm_hv and"
+                                    " kvm_pr are loaded.");
+    object_property_add_bool(obj, "modern-hotplug-events",
+                            spapr_get_modern_hotplug_events,
+                            spapr_set_modern_hotplug_events);
+    object_property_set_description(obj, "modern-hotplug-events",
+                                    "Use dedicated hotplug event mechanism in"
+                                    " place of standard EPOW events when possible"
+                                    " (required for memory hot-unplug support)");
+    ppc_compat_add_property(obj, "max-cpu-compat", &spapr->max_compat_pvr,
+                            "Maximum permitted CPU compatibility mode");
+
+    object_property_add_str(obj, "resize-hpt",
+                            spapr_get_resize_hpt, spapr_set_resize_hpt);
+    object_property_set_description(obj, "resize-hpt",
+                                    "Resizing of the Hash Page Table (enabled, disabled, required)");
+    object_property_add_uint32_ptr(obj, "vsmt",
+                                   &spapr->vsmt, OBJ_PROP_FLAG_READWRITE);
+    object_property_set_description(obj, "vsmt",
+                                    "Virtual SMT: KVM behaves as if this were"
+                                    " the host's SMT mode");
+
+    object_property_add_bool(obj, "vfio-no-msix-emulation",
+                             spapr_get_msix_emulation, NULL);
+
+    object_property_add_uint64_ptr(obj, "kernel-addr",
+                                   &spapr->kernel_addr, OBJ_PROP_FLAG_READWRITE);
+    object_property_set_description(obj, "kernel-addr",
+                                    stringify(KERNEL_LOAD_ADDR)
+                                    " for -kernel is the default");
+    spapr->kernel_addr = KERNEL_LOAD_ADDR;
+
+    object_property_add_bool(obj, "x-vof", spapr_get_vof, spapr_set_vof);
+    object_property_set_description(obj, "x-vof",
+                                    "Enable Virtual Open Firmware (experimental)");
+
+    /* The machine class defines the default interrupt controller mode */
+    spapr->irq = smc->irq;
+    object_property_add_str(obj, "ic-mode", spapr_get_ic_mode,
+                            spapr_set_ic_mode);
+    object_property_set_description(obj, "ic-mode",
+                 "Specifies the interrupt controller mode (xics, xive, dual)");
+
+    object_property_add_str(obj, "host-model",
+        spapr_get_host_model, spapr_set_host_model);
+    object_property_set_description(obj, "host-model",
+        "Host model to advertise in guest device tree");
+    object_property_add_str(obj, "host-serial",
+        spapr_get_host_serial, spapr_set_host_serial);
+    object_property_set_description(obj, "host-serial",
+        "Host serial number to advertise in guest device tree");
+}
+
+static void spapr_machine_finalizefn(Object *obj)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+
+    g_free(spapr->kvm_type);
+}
+
+void spapr_do_system_reset_on_cpu(CPUState *cs, run_on_cpu_data arg)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    CPUPPCState *env = &cpu->env;
+
+    cpu_synchronize_state(cs);
+    /* If FWNMI is inactive, addr will be -1, which will deliver to 0x100 */
+    if (spapr->fwnmi_system_reset_addr != -1) {
+        uint64_t rtas_addr, addr;
+
+        /* get rtas addr from fdt */
+        rtas_addr = spapr_get_rtas_addr();
+        if (!rtas_addr) {
+            qemu_system_guest_panicked(NULL);
+            return;
+        }
+
+        addr = rtas_addr + RTAS_ERROR_LOG_MAX + cs->cpu_index * sizeof(uint64_t)*2;
+        stq_be_phys(&address_space_memory, addr, env->gpr[3]);
+        stq_be_phys(&address_space_memory, addr + sizeof(uint64_t), 0);
+        env->gpr[3] = addr;
+    }
+    ppc_cpu_do_system_reset(cs);
+    if (spapr->fwnmi_system_reset_addr != -1) {
+        env->nip = spapr->fwnmi_system_reset_addr;
+    }
+}
+
+static void spapr_nmi(NMIState *n, int cpu_index, Error **errp)
+{
+    CPUState *cs;
+
+    CPU_FOREACH(cs) {
+        async_run_on_cpu(cs, spapr_do_system_reset_on_cpu, RUN_ON_CPU_NULL);
+    }
+}
+
+int spapr_lmb_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr,
+                          void *fdt, int *fdt_start_offset, Error **errp)
+{
+    uint64_t addr;
+    uint32_t node;
+
+    addr = spapr_drc_index(drc) * SPAPR_MEMORY_BLOCK_SIZE;
+    node = object_property_get_uint(OBJECT(drc->dev), PC_DIMM_NODE_PROP,
+                                    &error_abort);
+    *fdt_start_offset = spapr_dt_memory_node(spapr, fdt, node, addr,
+                                             SPAPR_MEMORY_BLOCK_SIZE);
+    return 0;
+}
+
+static void spapr_add_lmbs(DeviceState *dev, uint64_t addr_start, uint64_t size,
+                           bool dedicated_hp_event_source)
+{
+    SpaprDrc *drc;
+    uint32_t nr_lmbs = size/SPAPR_MEMORY_BLOCK_SIZE;
+    int i;
+    uint64_t addr = addr_start;
+    bool hotplugged = spapr_drc_hotplugged(dev);
+
+    for (i = 0; i < nr_lmbs; i++) {
+        drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB,
+                              addr / SPAPR_MEMORY_BLOCK_SIZE);
+        g_assert(drc);
+
+        /*
+         * memory_device_get_free_addr() provided a range of free addresses
+         * that doesn't overlap with any existing mapping at pre-plug. The
+         * corresponding LMB DRCs are thus assumed to be all attachable.
+         */
+        spapr_drc_attach(drc, dev);
+        if (!hotplugged) {
+            spapr_drc_reset(drc);
+        }
+        addr += SPAPR_MEMORY_BLOCK_SIZE;
+    }
+    /* send hotplug notification to the
+     * guest only in case of hotplugged memory
+     */
+    if (hotplugged) {
+        if (dedicated_hp_event_source) {
+            drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB,
+                                  addr_start / SPAPR_MEMORY_BLOCK_SIZE);
+            g_assert(drc);
+            spapr_hotplug_req_add_by_count_indexed(SPAPR_DR_CONNECTOR_TYPE_LMB,
+                                                   nr_lmbs,
+                                                   spapr_drc_index(drc));
+        } else {
+            spapr_hotplug_req_add_by_count(SPAPR_DR_CONNECTOR_TYPE_LMB,
+                                           nr_lmbs);
+        }
+    }
+}
+
+static void spapr_memory_plug(HotplugHandler *hotplug_dev, DeviceState *dev)
+{
+    SpaprMachineState *ms = SPAPR_MACHINE(hotplug_dev);
+    PCDIMMDevice *dimm = PC_DIMM(dev);
+    uint64_t size, addr;
+    int64_t slot;
+    bool is_nvdimm = object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM);
+
+    size = memory_device_get_region_size(MEMORY_DEVICE(dev), &error_abort);
+
+    pc_dimm_plug(dimm, MACHINE(ms));
+
+    if (!is_nvdimm) {
+        addr = object_property_get_uint(OBJECT(dimm),
+                                        PC_DIMM_ADDR_PROP, &error_abort);
+        spapr_add_lmbs(dev, addr, size,
+                       spapr_ovec_test(ms->ov5_cas, OV5_HP_EVT));
+    } else {
+        slot = object_property_get_int(OBJECT(dimm),
+                                       PC_DIMM_SLOT_PROP, &error_abort);
+        /* We should have valid slot number at this point */
+        g_assert(slot >= 0);
+        spapr_add_nvdimm(dev, slot);
+    }
+}
+
+static void spapr_memory_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
+                                  Error **errp)
+{
+    const SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(hotplug_dev);
+    SpaprMachineState *spapr = SPAPR_MACHINE(hotplug_dev);
+    bool is_nvdimm = object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM);
+    PCDIMMDevice *dimm = PC_DIMM(dev);
+    Error *local_err = NULL;
+    uint64_t size;
+    Object *memdev;
+    hwaddr pagesize;
+
+    if (!smc->dr_lmb_enabled) {
+        error_setg(errp, "Memory hotplug not supported for this machine");
+        return;
+    }
+
+    size = memory_device_get_region_size(MEMORY_DEVICE(dimm), &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    if (is_nvdimm) {
+        if (!spapr_nvdimm_validate(hotplug_dev, NVDIMM(dev), size, errp)) {
+            return;
+        }
+    } else if (size % SPAPR_MEMORY_BLOCK_SIZE) {
+        error_setg(errp, "Hotplugged memory size must be a multiple of "
+                   "%" PRIu64 " MB", SPAPR_MEMORY_BLOCK_SIZE / MiB);
+        return;
+    }
+
+    memdev = object_property_get_link(OBJECT(dimm), PC_DIMM_MEMDEV_PROP,
+                                      &error_abort);
+    pagesize = host_memory_backend_pagesize(MEMORY_BACKEND(memdev));
+    if (!spapr_check_pagesize(spapr, pagesize, errp)) {
+        return;
+    }
+
+    pc_dimm_pre_plug(dimm, MACHINE(hotplug_dev), NULL, errp);
+}
+
+struct SpaprDimmState {
+    PCDIMMDevice *dimm;
+    uint32_t nr_lmbs;
+    QTAILQ_ENTRY(SpaprDimmState) next;
+};
+
+static SpaprDimmState *spapr_pending_dimm_unplugs_find(SpaprMachineState *s,
+                                                       PCDIMMDevice *dimm)
+{
+    SpaprDimmState *dimm_state = NULL;
+
+    QTAILQ_FOREACH(dimm_state, &s->pending_dimm_unplugs, next) {
+        if (dimm_state->dimm == dimm) {
+            break;
+        }
+    }
+    return dimm_state;
+}
+
+static SpaprDimmState *spapr_pending_dimm_unplugs_add(SpaprMachineState *spapr,
+                                                      uint32_t nr_lmbs,
+                                                      PCDIMMDevice *dimm)
+{
+    SpaprDimmState *ds = NULL;
+
+    /*
+     * If this request is for a DIMM whose removal had failed earlier
+     * (due to guest's refusal to remove the LMBs), we would have this
+     * dimm already in the pending_dimm_unplugs list. In that
+     * case don't add again.
+     */
+    ds = spapr_pending_dimm_unplugs_find(spapr, dimm);
+    if (!ds) {
+        ds = g_malloc0(sizeof(SpaprDimmState));
+        ds->nr_lmbs = nr_lmbs;
+        ds->dimm = dimm;
+        QTAILQ_INSERT_HEAD(&spapr->pending_dimm_unplugs, ds, next);
+    }
+    return ds;
+}
+
+static void spapr_pending_dimm_unplugs_remove(SpaprMachineState *spapr,
+                                              SpaprDimmState *dimm_state)
+{
+    QTAILQ_REMOVE(&spapr->pending_dimm_unplugs, dimm_state, next);
+    g_free(dimm_state);
+}
+
+static SpaprDimmState *spapr_recover_pending_dimm_state(SpaprMachineState *ms,
+                                                        PCDIMMDevice *dimm)
+{
+    SpaprDrc *drc;
+    uint64_t size = memory_device_get_region_size(MEMORY_DEVICE(dimm),
+                                                  &error_abort);
+    uint32_t nr_lmbs = size / SPAPR_MEMORY_BLOCK_SIZE;
+    uint32_t avail_lmbs = 0;
+    uint64_t addr_start, addr;
+    int i;
+
+    addr_start = object_property_get_uint(OBJECT(dimm), PC_DIMM_ADDR_PROP,
+                                          &error_abort);
+
+    addr = addr_start;
+    for (i = 0; i < nr_lmbs; i++) {
+        drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB,
+                              addr / SPAPR_MEMORY_BLOCK_SIZE);
+        g_assert(drc);
+        if (drc->dev) {
+            avail_lmbs++;
+        }
+        addr += SPAPR_MEMORY_BLOCK_SIZE;
+    }
+
+    return spapr_pending_dimm_unplugs_add(ms, avail_lmbs, dimm);
+}
+
+void spapr_memory_unplug_rollback(SpaprMachineState *spapr, DeviceState *dev)
+{
+    SpaprDimmState *ds;
+    PCDIMMDevice *dimm;
+    SpaprDrc *drc;
+    uint32_t nr_lmbs;
+    uint64_t size, addr_start, addr;
+    g_autofree char *qapi_error = NULL;
+    int i;
+
+    if (!dev) {
+        return;
+    }
+
+    dimm = PC_DIMM(dev);
+    ds = spapr_pending_dimm_unplugs_find(spapr, dimm);
+
+    /*
+     * 'ds == NULL' would mean that the DIMM doesn't have a pending
+     * unplug state, but one of its DRC is marked as unplug_requested.
+     * This is bad and weird enough to g_assert() out.
+     */
+    g_assert(ds);
+
+    spapr_pending_dimm_unplugs_remove(spapr, ds);
+
+    size = memory_device_get_region_size(MEMORY_DEVICE(dimm), &error_abort);
+    nr_lmbs = size / SPAPR_MEMORY_BLOCK_SIZE;
+
+    addr_start = object_property_get_uint(OBJECT(dimm), PC_DIMM_ADDR_PROP,
+                                          &error_abort);
+
+    addr = addr_start;
+    for (i = 0; i < nr_lmbs; i++) {
+        drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB,
+                              addr / SPAPR_MEMORY_BLOCK_SIZE);
+        g_assert(drc);
+
+        drc->unplug_requested = false;
+        addr += SPAPR_MEMORY_BLOCK_SIZE;
+    }
+
+    /*
+     * Tell QAPI that something happened and the memory
+     * hotunplug wasn't successful. Keep sending
+     * MEM_UNPLUG_ERROR even while sending
+     * DEVICE_UNPLUG_GUEST_ERROR until the deprecation of
+     * MEM_UNPLUG_ERROR is due.
+     */
+    qapi_error = g_strdup_printf("Memory hotunplug rejected by the guest "
+                                 "for device %s", dev->id);
+
+    qapi_event_send_mem_unplug_error(dev->id ? : "", qapi_error);
+
+    qapi_event_send_device_unplug_guest_error(!!dev->id, dev->id,
+                                              dev->canonical_path);
+}
+
+/* Callback to be called during DRC release. */
+void spapr_lmb_release(DeviceState *dev)
+{
+    HotplugHandler *hotplug_ctrl = qdev_get_hotplug_handler(dev);
+    SpaprMachineState *spapr = SPAPR_MACHINE(hotplug_ctrl);
+    SpaprDimmState *ds = spapr_pending_dimm_unplugs_find(spapr, PC_DIMM(dev));
+
+    /* This information will get lost if a migration occurs
+     * during the unplug process. In this case recover it. */
+    if (ds == NULL) {
+        ds = spapr_recover_pending_dimm_state(spapr, PC_DIMM(dev));
+        g_assert(ds);
+        /* The DRC being examined by the caller at least must be counted */
+        g_assert(ds->nr_lmbs);
+    }
+
+    if (--ds->nr_lmbs) {
+        return;
+    }
+
+    /*
+     * Now that all the LMBs have been removed by the guest, call the
+     * unplug handler chain. This can never fail.
+     */
+    hotplug_handler_unplug(hotplug_ctrl, dev, &error_abort);
+    object_unparent(OBJECT(dev));
+}
+
+static void spapr_memory_unplug(HotplugHandler *hotplug_dev, DeviceState *dev)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(hotplug_dev);
+    SpaprDimmState *ds = spapr_pending_dimm_unplugs_find(spapr, PC_DIMM(dev));
+
+    /* We really shouldn't get this far without anything to unplug */
+    g_assert(ds);
+
+    pc_dimm_unplug(PC_DIMM(dev), MACHINE(hotplug_dev));
+    qdev_unrealize(dev);
+    spapr_pending_dimm_unplugs_remove(spapr, ds);
+}
+
+static void spapr_memory_unplug_request(HotplugHandler *hotplug_dev,
+                                        DeviceState *dev, Error **errp)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(hotplug_dev);
+    PCDIMMDevice *dimm = PC_DIMM(dev);
+    uint32_t nr_lmbs;
+    uint64_t size, addr_start, addr;
+    int i;
+    SpaprDrc *drc;
+
+    if (object_dynamic_cast(OBJECT(dev), TYPE_NVDIMM)) {
+        error_setg(errp, "nvdimm device hot unplug is not supported yet.");
+        return;
+    }
+
+    size = memory_device_get_region_size(MEMORY_DEVICE(dimm), &error_abort);
+    nr_lmbs = size / SPAPR_MEMORY_BLOCK_SIZE;
+
+    addr_start = object_property_get_uint(OBJECT(dimm), PC_DIMM_ADDR_PROP,
+                                          &error_abort);
+
+    /*
+     * An existing pending dimm state for this DIMM means that there is an
+     * unplug operation in progress, waiting for the spapr_lmb_release
+     * callback to complete the job (BQL can't cover that far). In this case,
+     * bail out to avoid detaching DRCs that were already released.
+     */
+    if (spapr_pending_dimm_unplugs_find(spapr, dimm)) {
+        error_setg(errp, "Memory unplug already in progress for device %s",
+                   dev->id);
+        return;
+    }
+
+    spapr_pending_dimm_unplugs_add(spapr, nr_lmbs, dimm);
+
+    addr = addr_start;
+    for (i = 0; i < nr_lmbs; i++) {
+        drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB,
+                              addr / SPAPR_MEMORY_BLOCK_SIZE);
+        g_assert(drc);
+
+        spapr_drc_unplug_request(drc);
+        addr += SPAPR_MEMORY_BLOCK_SIZE;
+    }
+
+    drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB,
+                          addr_start / SPAPR_MEMORY_BLOCK_SIZE);
+    spapr_hotplug_req_remove_by_count_indexed(SPAPR_DR_CONNECTOR_TYPE_LMB,
+                                              nr_lmbs, spapr_drc_index(drc));
+}
+
+/* Callback to be called during DRC release. */
+void spapr_core_release(DeviceState *dev)
+{
+    HotplugHandler *hotplug_ctrl = qdev_get_hotplug_handler(dev);
+
+    /* Call the unplug handler chain. This can never fail. */
+    hotplug_handler_unplug(hotplug_ctrl, dev, &error_abort);
+    object_unparent(OBJECT(dev));
+}
+
+static void spapr_core_unplug(HotplugHandler *hotplug_dev, DeviceState *dev)
+{
+    MachineState *ms = MACHINE(hotplug_dev);
+    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(ms);
+    CPUCore *cc = CPU_CORE(dev);
+    CPUArchId *core_slot = spapr_find_cpu_slot(ms, cc->core_id, NULL);
+
+    if (smc->pre_2_10_has_unused_icps) {
+        SpaprCpuCore *sc = SPAPR_CPU_CORE(OBJECT(dev));
+        int i;
+
+        for (i = 0; i < cc->nr_threads; i++) {
+            CPUState *cs = CPU(sc->threads[i]);
+
+            pre_2_10_vmstate_register_dummy_icp(cs->cpu_index);
+        }
+    }
+
+    assert(core_slot);
+    core_slot->cpu = NULL;
+    qdev_unrealize(dev);
+}
+
+static
+void spapr_core_unplug_request(HotplugHandler *hotplug_dev, DeviceState *dev,
+                               Error **errp)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(OBJECT(hotplug_dev));
+    int index;
+    SpaprDrc *drc;
+    CPUCore *cc = CPU_CORE(dev);
+
+    if (!spapr_find_cpu_slot(MACHINE(hotplug_dev), cc->core_id, &index)) {
+        error_setg(errp, "Unable to find CPU core with core-id: %d",
+                   cc->core_id);
+        return;
+    }
+    if (index == 0) {
+        error_setg(errp, "Boot CPU core may not be unplugged");
+        return;
+    }
+
+    drc = spapr_drc_by_id(TYPE_SPAPR_DRC_CPU,
+                          spapr_vcpu_id(spapr, cc->core_id));
+    g_assert(drc);
+
+    if (!spapr_drc_unplug_requested(drc)) {
+        spapr_drc_unplug_request(drc);
+    }
+
+    /*
+     * spapr_hotplug_req_remove_by_index is left unguarded, out of the
+     * "!spapr_drc_unplug_requested" check, to allow for multiple IRQ
+     * pulses removing the same CPU. Otherwise, in an failed hotunplug
+     * attempt (e.g. the kernel will refuse to remove the last online
+     * CPU), we will never attempt it again because unplug_requested
+     * will still be 'true' in that case.
+     */
+    spapr_hotplug_req_remove_by_index(drc);
+}
+
+int spapr_core_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr,
+                           void *fdt, int *fdt_start_offset, Error **errp)
+{
+    SpaprCpuCore *core = SPAPR_CPU_CORE(drc->dev);
+    CPUState *cs = CPU(core->threads[0]);
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+    DeviceClass *dc = DEVICE_GET_CLASS(cs);
+    int id = spapr_get_vcpu_id(cpu);
+    g_autofree char *nodename = NULL;
+    int offset;
+
+    nodename = g_strdup_printf("%s@%x", dc->fw_name, id);
+    offset = fdt_add_subnode(fdt, 0, nodename);
+
+    spapr_dt_cpu(cs, fdt, offset, spapr);
+
+    /*
+     * spapr_dt_cpu() does not fill the 'name' property in the
+     * CPU node. The function is called during boot process, before
+     * and after CAS, and overwriting the 'name' property written
+     * by SLOF is not allowed.
+     *
+     * Write it manually after spapr_dt_cpu(). This makes the hotplug
+     * CPUs more compatible with the coldplugged ones, which have
+     * the 'name' property. Linux Kernel also relies on this
+     * property to identify CPU nodes.
+     */
+    _FDT((fdt_setprop_string(fdt, offset, "name", nodename)));
+
+    *fdt_start_offset = offset;
+    return 0;
+}
+
+static void spapr_core_plug(HotplugHandler *hotplug_dev, DeviceState *dev)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(OBJECT(hotplug_dev));
+    MachineClass *mc = MACHINE_GET_CLASS(spapr);
+    SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+    SpaprCpuCore *core = SPAPR_CPU_CORE(OBJECT(dev));
+    CPUCore *cc = CPU_CORE(dev);
+    CPUState *cs;
+    SpaprDrc *drc;
+    CPUArchId *core_slot;
+    int index;
+    bool hotplugged = spapr_drc_hotplugged(dev);
+    int i;
+
+    core_slot = spapr_find_cpu_slot(MACHINE(hotplug_dev), cc->core_id, &index);
+    g_assert(core_slot); /* Already checked in spapr_core_pre_plug() */
+
+    drc = spapr_drc_by_id(TYPE_SPAPR_DRC_CPU,
+                          spapr_vcpu_id(spapr, cc->core_id));
+
+    g_assert(drc || !mc->has_hotpluggable_cpus);
+
+    if (drc) {
+        /*
+         * spapr_core_pre_plug() already buys us this is a brand new
+         * core being plugged into a free slot. Nothing should already
+         * be attached to the corresponding DRC.
+         */
+        spapr_drc_attach(drc, dev);
+
+        if (hotplugged) {
+            /*
+             * Send hotplug notification interrupt to the guest only
+             * in case of hotplugged CPUs.
+             */
+            spapr_hotplug_req_add_by_index(drc);
+        } else {
+            spapr_drc_reset(drc);
+        }
+    }
+
+    core_slot->cpu = OBJECT(dev);
+
+    /*
+     * Set compatibility mode to match the boot CPU, which was either set
+     * by the machine reset code or by CAS. This really shouldn't fail at
+     * this point.
+     */
+    if (hotplugged) {
+        for (i = 0; i < cc->nr_threads; i++) {
+            ppc_set_compat(core->threads[i], POWERPC_CPU(first_cpu)->compat_pvr,
+                           &error_abort);
+        }
+    }
+
+    if (smc->pre_2_10_has_unused_icps) {
+        for (i = 0; i < cc->nr_threads; i++) {
+            cs = CPU(core->threads[i]);
+            pre_2_10_vmstate_unregister_dummy_icp(cs->cpu_index);
+        }
+    }
+}
+
+static void spapr_core_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
+                                Error **errp)
+{
+    MachineState *machine = MACHINE(OBJECT(hotplug_dev));
+    MachineClass *mc = MACHINE_GET_CLASS(hotplug_dev);
+    CPUCore *cc = CPU_CORE(dev);
+    const char *base_core_type = spapr_get_cpu_core_type(machine->cpu_type);
+    const char *type = object_get_typename(OBJECT(dev));
+    CPUArchId *core_slot;
+    int index;
+    unsigned int smp_threads = machine->smp.threads;
+
+    if (dev->hotplugged && !mc->has_hotpluggable_cpus) {
+        error_setg(errp, "CPU hotplug not supported for this machine");
+        return;
+    }
+
+    if (strcmp(base_core_type, type)) {
+        error_setg(errp, "CPU core type should be %s", base_core_type);
+        return;
+    }
+
+    if (cc->core_id % smp_threads) {
+        error_setg(errp, "invalid core id %d", cc->core_id);
+        return;
+    }
+
+    /*
+     * In general we should have homogeneous threads-per-core, but old
+     * (pre hotplug support) machine types allow the last core to have
+     * reduced threads as a compatibility hack for when we allowed
+     * total vcpus not a multiple of threads-per-core.
+     */
+    if (mc->has_hotpluggable_cpus && (cc->nr_threads != smp_threads)) {
+        error_setg(errp, "invalid nr-threads %d, must be %d", cc->nr_threads,
+                   smp_threads);
+        return;
+    }
+
+    core_slot = spapr_find_cpu_slot(MACHINE(hotplug_dev), cc->core_id, &index);
+    if (!core_slot) {
+        error_setg(errp, "core id %d out of range", cc->core_id);
+        return;
+    }
+
+    if (core_slot->cpu) {
+        error_setg(errp, "core %d already populated", cc->core_id);
+        return;
+    }
+
+    numa_cpu_pre_plug(core_slot, dev, errp);
+}
+
+int spapr_phb_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr,
+                          void *fdt, int *fdt_start_offset, Error **errp)
+{
+    SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(drc->dev);
+    int intc_phandle;
+
+    intc_phandle = spapr_irq_get_phandle(spapr, spapr->fdt_blob, errp);
+    if (intc_phandle <= 0) {
+        return -1;
+    }
+
+    if (spapr_dt_phb(spapr, sphb, intc_phandle, fdt, fdt_start_offset)) {
+        error_setg(errp, "unable to create FDT node for PHB %d", sphb->index);
+        return -1;
+    }
+
+    /* generally SLOF creates these, for hotplug it's up to QEMU */
+    _FDT(fdt_setprop_string(fdt, *fdt_start_offset, "name", "pci"));
+
+    return 0;
+}
+
+static bool spapr_phb_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
+                               Error **errp)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(OBJECT(hotplug_dev));
+    SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(dev);
+    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+    const unsigned windows_supported = spapr_phb_windows_supported(sphb);
+    SpaprDrc *drc;
+
+    if (dev->hotplugged && !smc->dr_phb_enabled) {
+        error_setg(errp, "PHB hotplug not supported for this machine");
+        return false;
+    }
+
+    if (sphb->index == (uint32_t)-1) {
+        error_setg(errp, "\"index\" for PAPR PHB is mandatory");
+        return false;
+    }
+
+    drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PHB, sphb->index);
+    if (drc && drc->dev) {
+        error_setg(errp, "PHB %d already attached", sphb->index);
+        return false;
+    }
+
+    /*
+     * This will check that sphb->index doesn't exceed the maximum number of
+     * PHBs for the current machine type.
+     */
+    return
+        smc->phb_placement(spapr, sphb->index,
+                           &sphb->buid, &sphb->io_win_addr,
+                           &sphb->mem_win_addr, &sphb->mem64_win_addr,
+                           windows_supported, sphb->dma_liobn,
+                           &sphb->nv2_gpa_win_addr, &sphb->nv2_atsd_win_addr,
+                           errp);
+}
+
+static void spapr_phb_plug(HotplugHandler *hotplug_dev, DeviceState *dev)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(OBJECT(hotplug_dev));
+    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+    SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(dev);
+    SpaprDrc *drc;
+    bool hotplugged = spapr_drc_hotplugged(dev);
+
+    if (!smc->dr_phb_enabled) {
+        return;
+    }
+
+    drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PHB, sphb->index);
+    /* hotplug hooks should check it's enabled before getting this far */
+    assert(drc);
+
+    /* spapr_phb_pre_plug() already checked the DRC is attachable */
+    spapr_drc_attach(drc, dev);
+
+    if (hotplugged) {
+        spapr_hotplug_req_add_by_index(drc);
+    } else {
+        spapr_drc_reset(drc);
+    }
+}
+
+void spapr_phb_release(DeviceState *dev)
+{
+    HotplugHandler *hotplug_ctrl = qdev_get_hotplug_handler(dev);
+
+    hotplug_handler_unplug(hotplug_ctrl, dev, &error_abort);
+    object_unparent(OBJECT(dev));
+}
+
+static void spapr_phb_unplug(HotplugHandler *hotplug_dev, DeviceState *dev)
+{
+    qdev_unrealize(dev);
+}
+
+static void spapr_phb_unplug_request(HotplugHandler *hotplug_dev,
+                                     DeviceState *dev, Error **errp)
+{
+    SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(dev);
+    SpaprDrc *drc;
+
+    drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PHB, sphb->index);
+    assert(drc);
+
+    if (!spapr_drc_unplug_requested(drc)) {
+        spapr_drc_unplug_request(drc);
+        spapr_hotplug_req_remove_by_index(drc);
+    } else {
+        error_setg(errp,
+                   "PCI Host Bridge unplug already in progress for device %s",
+                   dev->id);
+    }
+}
+
+static
+bool spapr_tpm_proxy_pre_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
+                              Error **errp)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(OBJECT(hotplug_dev));
+
+    if (spapr->tpm_proxy != NULL) {
+        error_setg(errp, "Only one TPM proxy can be specified for this machine");
+        return false;
+    }
+
+    return true;
+}
+
+static void spapr_tpm_proxy_plug(HotplugHandler *hotplug_dev, DeviceState *dev)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(OBJECT(hotplug_dev));
+    SpaprTpmProxy *tpm_proxy = SPAPR_TPM_PROXY(dev);
+
+    /* Already checked in spapr_tpm_proxy_pre_plug() */
+    g_assert(spapr->tpm_proxy == NULL);
+
+    spapr->tpm_proxy = tpm_proxy;
+}
+
+static void spapr_tpm_proxy_unplug(HotplugHandler *hotplug_dev, DeviceState *dev)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(OBJECT(hotplug_dev));
+
+    qdev_unrealize(dev);
+    object_unparent(OBJECT(dev));
+    spapr->tpm_proxy = NULL;
+}
+
+static void spapr_machine_device_plug(HotplugHandler *hotplug_dev,
+                                      DeviceState *dev, Error **errp)
+{
+    if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
+        spapr_memory_plug(hotplug_dev, dev);
+    } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) {
+        spapr_core_plug(hotplug_dev, dev);
+    } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_PCI_HOST_BRIDGE)) {
+        spapr_phb_plug(hotplug_dev, dev);
+    } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_TPM_PROXY)) {
+        spapr_tpm_proxy_plug(hotplug_dev, dev);
+    }
+}
+
+static void spapr_machine_device_unplug(HotplugHandler *hotplug_dev,
+                                        DeviceState *dev, Error **errp)
+{
+    if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
+        spapr_memory_unplug(hotplug_dev, dev);
+    } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) {
+        spapr_core_unplug(hotplug_dev, dev);
+    } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_PCI_HOST_BRIDGE)) {
+        spapr_phb_unplug(hotplug_dev, dev);
+    } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_TPM_PROXY)) {
+        spapr_tpm_proxy_unplug(hotplug_dev, dev);
+    }
+}
+
+bool spapr_memory_hot_unplug_supported(SpaprMachineState *spapr)
+{
+    return spapr_ovec_test(spapr->ov5_cas, OV5_HP_EVT) ||
+        /*
+         * CAS will process all pending unplug requests.
+         *
+         * HACK: a guest could theoretically have cleared all bits in OV5,
+         * but none of the guests we care for do.
+         */
+        spapr_ovec_empty(spapr->ov5_cas);
+}
+
+static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev,
+                                                DeviceState *dev, Error **errp)
+{
+    SpaprMachineState *sms = SPAPR_MACHINE(OBJECT(hotplug_dev));
+    MachineClass *mc = MACHINE_GET_CLASS(sms);
+    SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+
+    if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
+        if (spapr_memory_hot_unplug_supported(sms)) {
+            spapr_memory_unplug_request(hotplug_dev, dev, errp);
+        } else {
+            error_setg(errp, "Memory hot unplug not supported for this guest");
+        }
+    } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) {
+        if (!mc->has_hotpluggable_cpus) {
+            error_setg(errp, "CPU hot unplug not supported on this machine");
+            return;
+        }
+        spapr_core_unplug_request(hotplug_dev, dev, errp);
+    } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_PCI_HOST_BRIDGE)) {
+        if (!smc->dr_phb_enabled) {
+            error_setg(errp, "PHB hot unplug not supported on this machine");
+            return;
+        }
+        spapr_phb_unplug_request(hotplug_dev, dev, errp);
+    } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_TPM_PROXY)) {
+        spapr_tpm_proxy_unplug(hotplug_dev, dev);
+    }
+}
+
+static void spapr_machine_device_pre_plug(HotplugHandler *hotplug_dev,
+                                          DeviceState *dev, Error **errp)
+{
+    if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
+        spapr_memory_pre_plug(hotplug_dev, dev, errp);
+    } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) {
+        spapr_core_pre_plug(hotplug_dev, dev, errp);
+    } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_PCI_HOST_BRIDGE)) {
+        spapr_phb_pre_plug(hotplug_dev, dev, errp);
+    } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_TPM_PROXY)) {
+        spapr_tpm_proxy_pre_plug(hotplug_dev, dev, errp);
+    }
+}
+
+static HotplugHandler *spapr_get_hotplug_handler(MachineState *machine,
+                                                 DeviceState *dev)
+{
+    if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) ||
+        object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE) ||
+        object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_PCI_HOST_BRIDGE) ||
+        object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_TPM_PROXY)) {
+        return HOTPLUG_HANDLER(machine);
+    }
+    if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
+        PCIDevice *pcidev = PCI_DEVICE(dev);
+        PCIBus *root = pci_device_root_bus(pcidev);
+        SpaprPhbState *phb =
+            (SpaprPhbState *)object_dynamic_cast(OBJECT(BUS(root)->parent),
+                                                 TYPE_SPAPR_PCI_HOST_BRIDGE);
+
+        if (phb) {
+            return HOTPLUG_HANDLER(phb);
+        }
+    }
+    return NULL;
+}
+
+static CpuInstanceProperties
+spapr_cpu_index_to_props(MachineState *machine, unsigned cpu_index)
+{
+    CPUArchId *core_slot;
+    MachineClass *mc = MACHINE_GET_CLASS(machine);
+
+    /* make sure possible_cpu are intialized */
+    mc->possible_cpu_arch_ids(machine);
+    /* get CPU core slot containing thread that matches cpu_index */
+    core_slot = spapr_find_cpu_slot(machine, cpu_index, NULL);
+    assert(core_slot);
+    return core_slot->props;
+}
+
+static int64_t spapr_get_default_cpu_node_id(const MachineState *ms, int idx)
+{
+    return idx / ms->smp.cores % ms->numa_state->num_nodes;
+}
+
+static const CPUArchIdList *spapr_possible_cpu_arch_ids(MachineState *machine)
+{
+    int i;
+    unsigned int smp_threads = machine->smp.threads;
+    unsigned int smp_cpus = machine->smp.cpus;
+    const char *core_type;
+    int spapr_max_cores = machine->smp.max_cpus / smp_threads;
+    MachineClass *mc = MACHINE_GET_CLASS(machine);
+
+    if (!mc->has_hotpluggable_cpus) {
+        spapr_max_cores = QEMU_ALIGN_UP(smp_cpus, smp_threads) / smp_threads;
+    }
+    if (machine->possible_cpus) {
+        assert(machine->possible_cpus->len == spapr_max_cores);
+        return machine->possible_cpus;
+    }
+
+    core_type = spapr_get_cpu_core_type(machine->cpu_type);
+    if (!core_type) {
+        error_report("Unable to find sPAPR CPU Core definition");
+        exit(1);
+    }
+
+    machine->possible_cpus = g_malloc0(sizeof(CPUArchIdList) +
+                             sizeof(CPUArchId) * spapr_max_cores);
+    machine->possible_cpus->len = spapr_max_cores;
+    for (i = 0; i < machine->possible_cpus->len; i++) {
+        int core_id = i * smp_threads;
+
+        machine->possible_cpus->cpus[i].type = core_type;
+        machine->possible_cpus->cpus[i].vcpus_count = smp_threads;
+        machine->possible_cpus->cpus[i].arch_id = core_id;
+        machine->possible_cpus->cpus[i].props.has_core_id = true;
+        machine->possible_cpus->cpus[i].props.core_id = core_id;
+    }
+    return machine->possible_cpus;
+}
+
+static bool spapr_phb_placement(SpaprMachineState *spapr, uint32_t index,
+                                uint64_t *buid, hwaddr *pio,
+                                hwaddr *mmio32, hwaddr *mmio64,
+                                unsigned n_dma, uint32_t *liobns,
+                                hwaddr *nv2gpa, hwaddr *nv2atsd, Error **errp)
+{
+    /*
+     * New-style PHB window placement.
+     *
+     * Goals: Gives large (1TiB), naturally aligned 64-bit MMIO window
+     * for each PHB, in addition to 2GiB 32-bit MMIO and 64kiB PIO
+     * windows.
+     *
+     * Some guest kernels can't work with MMIO windows above 1<<46
+     * (64TiB), so we place up to 31 PHBs in the area 32TiB..64TiB
+     *
+     * 32TiB..(33TiB+1984kiB) contains the 64kiB PIO windows for each
+     * PHB stacked together.  (32TiB+2GiB)..(32TiB+64GiB) contains the
+     * 2GiB 32-bit MMIO windows for each PHB.  Then 33..64TiB has the
+     * 1TiB 64-bit MMIO windows for each PHB.
+     */
+    const uint64_t base_buid = 0x800000020000000ULL;
+    int i;
+
+    /* Sanity check natural alignments */
+    QEMU_BUILD_BUG_ON((SPAPR_PCI_BASE % SPAPR_PCI_MEM64_WIN_SIZE) != 0);
+    QEMU_BUILD_BUG_ON((SPAPR_PCI_LIMIT % SPAPR_PCI_MEM64_WIN_SIZE) != 0);
+    QEMU_BUILD_BUG_ON((SPAPR_PCI_MEM64_WIN_SIZE % SPAPR_PCI_MEM32_WIN_SIZE) != 0);
+    QEMU_BUILD_BUG_ON((SPAPR_PCI_MEM32_WIN_SIZE % SPAPR_PCI_IO_WIN_SIZE) != 0);
+    /* Sanity check bounds */
+    QEMU_BUILD_BUG_ON((SPAPR_MAX_PHBS * SPAPR_PCI_IO_WIN_SIZE) >
+                      SPAPR_PCI_MEM32_WIN_SIZE);
+    QEMU_BUILD_BUG_ON((SPAPR_MAX_PHBS * SPAPR_PCI_MEM32_WIN_SIZE) >
+                      SPAPR_PCI_MEM64_WIN_SIZE);
+
+    if (index >= SPAPR_MAX_PHBS) {
+        error_setg(errp, "\"index\" for PAPR PHB is too large (max %llu)",
+                   SPAPR_MAX_PHBS - 1);
+        return false;
+    }
+
+    *buid = base_buid + index;
+    for (i = 0; i < n_dma; ++i) {
+        liobns[i] = SPAPR_PCI_LIOBN(index, i);
+    }
+
+    *pio = SPAPR_PCI_BASE + index * SPAPR_PCI_IO_WIN_SIZE;
+    *mmio32 = SPAPR_PCI_BASE + (index + 1) * SPAPR_PCI_MEM32_WIN_SIZE;
+    *mmio64 = SPAPR_PCI_BASE + (index + 1) * SPAPR_PCI_MEM64_WIN_SIZE;
+
+    *nv2gpa = SPAPR_PCI_NV2RAM64_WIN_BASE + index * SPAPR_PCI_NV2RAM64_WIN_SIZE;
+    *nv2atsd = SPAPR_PCI_NV2ATSD_WIN_BASE + index * SPAPR_PCI_NV2ATSD_WIN_SIZE;
+    return true;
+}
+
+static ICSState *spapr_ics_get(XICSFabric *dev, int irq)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(dev);
+
+    return ics_valid_irq(spapr->ics, irq) ? spapr->ics : NULL;
+}
+
+static void spapr_ics_resend(XICSFabric *dev)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(dev);
+
+    ics_resend(spapr->ics);
+}
+
+static ICPState *spapr_icp_get(XICSFabric *xi, int vcpu_id)
+{
+    PowerPCCPU *cpu = spapr_find_cpu(vcpu_id);
+
+    return cpu ? spapr_cpu_state(cpu)->icp : NULL;
+}
+
+static void spapr_pic_print_info(InterruptStatsProvider *obj,
+                                 Monitor *mon)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+
+    spapr_irq_print_info(spapr, mon);
+    monitor_printf(mon, "irqchip: %s\n",
+                   kvm_irqchip_in_kernel() ? "in-kernel" : "emulated");
+}
+
+/*
+ * This is a XIVE only operation
+ */
+static int spapr_match_nvt(XiveFabric *xfb, uint8_t format,
+                           uint8_t nvt_blk, uint32_t nvt_idx,
+                           bool cam_ignore, uint8_t priority,
+                           uint32_t logic_serv, XiveTCTXMatch *match)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(xfb);
+    XivePresenter *xptr = XIVE_PRESENTER(spapr->active_intc);
+    XivePresenterClass *xpc = XIVE_PRESENTER_GET_CLASS(xptr);
+    int count;
+
+    count = xpc->match_nvt(xptr, format, nvt_blk, nvt_idx, cam_ignore,
+                           priority, logic_serv, match);
+    if (count < 0) {
+        return count;
+    }
+
+    /*
+     * When we implement the save and restore of the thread interrupt
+     * contexts in the enter/exit CPU handlers of the machine and the
+     * escalations in QEMU, we should be able to handle non dispatched
+     * vCPUs.
+     *
+     * Until this is done, the sPAPR machine should find at least one
+     * matching context always.
+     */
+    if (count == 0) {
+        qemu_log_mask(LOG_GUEST_ERROR, "XIVE: NVT %x/%x is not dispatched\n",
+                      nvt_blk, nvt_idx);
+    }
+
+    return count;
+}
+
+int spapr_get_vcpu_id(PowerPCCPU *cpu)
+{
+    return cpu->vcpu_id;
+}
+
+bool spapr_set_vcpu_id(PowerPCCPU *cpu, int cpu_index, Error **errp)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+    MachineState *ms = MACHINE(spapr);
+    int vcpu_id;
+
+    vcpu_id = spapr_vcpu_id(spapr, cpu_index);
+
+    if (kvm_enabled() && !kvm_vcpu_id_is_valid(vcpu_id)) {
+        error_setg(errp, "Can't create CPU with id %d in KVM", vcpu_id);
+        error_append_hint(errp, "Adjust the number of cpus to %d "
+                          "or try to raise the number of threads per core\n",
+                          vcpu_id * ms->smp.threads / spapr->vsmt);
+        return false;
+    }
+
+    cpu->vcpu_id = vcpu_id;
+    return true;
+}
+
+PowerPCCPU *spapr_find_cpu(int vcpu_id)
+{
+    CPUState *cs;
+
+    CPU_FOREACH(cs) {
+        PowerPCCPU *cpu = POWERPC_CPU(cs);
+
+        if (spapr_get_vcpu_id(cpu) == vcpu_id) {
+            return cpu;
+        }
+    }
+
+    return NULL;
+}
+
+static void spapr_cpu_exec_enter(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu)
+{
+    SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
+
+    /* These are only called by TCG, KVM maintains dispatch state */
+
+    spapr_cpu->prod = false;
+    if (spapr_cpu->vpa_addr) {
+        CPUState *cs = CPU(cpu);
+        uint32_t dispatch;
+
+        dispatch = ldl_be_phys(cs->as,
+                               spapr_cpu->vpa_addr + VPA_DISPATCH_COUNTER);
+        dispatch++;
+        if ((dispatch & 1) != 0) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "VPA: incorrect dispatch counter value for "
+                          "dispatched partition %u, correcting.\n", dispatch);
+            dispatch++;
+        }
+        stl_be_phys(cs->as,
+                    spapr_cpu->vpa_addr + VPA_DISPATCH_COUNTER, dispatch);
+    }
+}
+
+static void spapr_cpu_exec_exit(PPCVirtualHypervisor *vhyp, PowerPCCPU *cpu)
+{
+    SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
+
+    if (spapr_cpu->vpa_addr) {
+        CPUState *cs = CPU(cpu);
+        uint32_t dispatch;
+
+        dispatch = ldl_be_phys(cs->as,
+                               spapr_cpu->vpa_addr + VPA_DISPATCH_COUNTER);
+        dispatch++;
+        if ((dispatch & 1) != 1) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "VPA: incorrect dispatch counter value for "
+                          "preempted partition %u, correcting.\n", dispatch);
+            dispatch++;
+        }
+        stl_be_phys(cs->as,
+                    spapr_cpu->vpa_addr + VPA_DISPATCH_COUNTER, dispatch);
+    }
+}
+
+static void spapr_machine_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+    SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(oc);
+    FWPathProviderClass *fwc = FW_PATH_PROVIDER_CLASS(oc);
+    NMIClass *nc = NMI_CLASS(oc);
+    HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc);
+    PPCVirtualHypervisorClass *vhc = PPC_VIRTUAL_HYPERVISOR_CLASS(oc);
+    XICSFabricClass *xic = XICS_FABRIC_CLASS(oc);
+    InterruptStatsProviderClass *ispc = INTERRUPT_STATS_PROVIDER_CLASS(oc);
+    XiveFabricClass *xfc = XIVE_FABRIC_CLASS(oc);
+    VofMachineIfClass *vmc = VOF_MACHINE_CLASS(oc);
+
+    mc->desc = "pSeries Logical Partition (PAPR compliant)";
+    mc->ignore_boot_device_suffixes = true;
+
+    /*
+     * We set up the default / latest behaviour here.  The class_init
+     * functions for the specific versioned machine types can override
+     * these details for backwards compatibility
+     */
+    mc->init = spapr_machine_init;
+    mc->reset = spapr_machine_reset;
+    mc->block_default_type = IF_SCSI;
+
+    /*
+     * Setting max_cpus to INT32_MAX. Both KVM and TCG max_cpus values
+     * should be limited by the host capability instead of hardcoded.
+     * max_cpus for KVM guests will be checked in kvm_init(), and TCG
+     * guests are welcome to have as many CPUs as the host are capable
+     * of emulate.
+     */
+    mc->max_cpus = INT32_MAX;
+
+    mc->no_parallel = 1;
+    mc->default_boot_order = "";
+    mc->default_ram_size = 512 * MiB;
+    mc->default_ram_id = "ppc_spapr.ram";
+    mc->default_display = "std";
+    mc->kvm_type = spapr_kvm_type;
+    machine_class_allow_dynamic_sysbus_dev(mc, TYPE_SPAPR_PCI_HOST_BRIDGE);
+    mc->pci_allow_0_address = true;
+    assert(!mc->get_hotplug_handler);
+    mc->get_hotplug_handler = spapr_get_hotplug_handler;
+    hc->pre_plug = spapr_machine_device_pre_plug;
+    hc->plug = spapr_machine_device_plug;
+    mc->cpu_index_to_instance_props = spapr_cpu_index_to_props;
+    mc->get_default_cpu_node_id = spapr_get_default_cpu_node_id;
+    mc->possible_cpu_arch_ids = spapr_possible_cpu_arch_ids;
+    hc->unplug_request = spapr_machine_device_unplug_request;
+    hc->unplug = spapr_machine_device_unplug;
+
+    smc->dr_lmb_enabled = true;
+    smc->update_dt_enabled = true;
+    mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power9_v2.0");
+    mc->has_hotpluggable_cpus = true;
+    mc->nvdimm_supported = true;
+    smc->resize_hpt_default = SPAPR_RESIZE_HPT_ENABLED;
+    fwc->get_dev_path = spapr_get_fw_dev_path;
+    nc->nmi_monitor_handler = spapr_nmi;
+    smc->phb_placement = spapr_phb_placement;
+    vhc->hypercall = emulate_spapr_hypercall;
+    vhc->hpt_mask = spapr_hpt_mask;
+    vhc->map_hptes = spapr_map_hptes;
+    vhc->unmap_hptes = spapr_unmap_hptes;
+    vhc->hpte_set_c = spapr_hpte_set_c;
+    vhc->hpte_set_r = spapr_hpte_set_r;
+    vhc->get_pate = spapr_get_pate;
+    vhc->encode_hpt_for_kvm_pr = spapr_encode_hpt_for_kvm_pr;
+    vhc->cpu_exec_enter = spapr_cpu_exec_enter;
+    vhc->cpu_exec_exit = spapr_cpu_exec_exit;
+    xic->ics_get = spapr_ics_get;
+    xic->ics_resend = spapr_ics_resend;
+    xic->icp_get = spapr_icp_get;
+    ispc->print_info = spapr_pic_print_info;
+    /* Force NUMA node memory size to be a multiple of
+     * SPAPR_MEMORY_BLOCK_SIZE (256M) since that's the granularity
+     * in which LMBs are represented and hot-added
+     */
+    mc->numa_mem_align_shift = 28;
+    mc->auto_enable_numa = true;
+
+    smc->default_caps.caps[SPAPR_CAP_HTM] = SPAPR_CAP_OFF;
+    smc->default_caps.caps[SPAPR_CAP_VSX] = SPAPR_CAP_ON;
+    smc->default_caps.caps[SPAPR_CAP_DFP] = SPAPR_CAP_ON;
+    smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND;
+    smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND;
+    smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_WORKAROUND;
+    smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 16; /* 64kiB */
+    smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
+    smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
+    smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_ON;
+    smc->default_caps.caps[SPAPR_CAP_FWNMI] = SPAPR_CAP_ON;
+    smc->default_caps.caps[SPAPR_CAP_RPT_INVALIDATE] = SPAPR_CAP_OFF;
+    spapr_caps_add_properties(smc);
+    smc->irq = &spapr_irq_dual;
+    smc->dr_phb_enabled = true;
+    smc->linux_pci_probe = true;
+    smc->smp_threads_vsmt = true;
+    smc->nr_xirqs = SPAPR_NR_XIRQS;
+    xfc->match_nvt = spapr_match_nvt;
+    vmc->client_architecture_support = spapr_vof_client_architecture_support;
+    vmc->quiesce = spapr_vof_quiesce;
+    vmc->setprop = spapr_vof_setprop;
+}
+
+static const TypeInfo spapr_machine_info = {
+    .name          = TYPE_SPAPR_MACHINE,
+    .parent        = TYPE_MACHINE,
+    .abstract      = true,
+    .instance_size = sizeof(SpaprMachineState),
+    .instance_init = spapr_instance_init,
+    .instance_finalize = spapr_machine_finalizefn,
+    .class_size    = sizeof(SpaprMachineClass),
+    .class_init    = spapr_machine_class_init,
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_FW_PATH_PROVIDER },
+        { TYPE_NMI },
+        { TYPE_HOTPLUG_HANDLER },
+        { TYPE_PPC_VIRTUAL_HYPERVISOR },
+        { TYPE_XICS_FABRIC },
+        { TYPE_INTERRUPT_STATS_PROVIDER },
+        { TYPE_XIVE_FABRIC },
+        { TYPE_VOF_MACHINE_IF },
+        { }
+    },
+};
+
+static void spapr_machine_latest_class_options(MachineClass *mc)
+{
+    mc->alias = "pseries";
+    mc->is_default = true;
+}
+
+#define DEFINE_SPAPR_MACHINE(suffix, verstr, latest)                 \
+    static void spapr_machine_##suffix##_class_init(ObjectClass *oc, \
+                                                    void *data)      \
+    {                                                                \
+        MachineClass *mc = MACHINE_CLASS(oc);                        \
+        spapr_machine_##suffix##_class_options(mc);                  \
+        if (latest) {                                                \
+            spapr_machine_latest_class_options(mc);                  \
+        }                                                            \
+    }                                                                \
+    static const TypeInfo spapr_machine_##suffix##_info = {          \
+        .name = MACHINE_TYPE_NAME("pseries-" verstr),                \
+        .parent = TYPE_SPAPR_MACHINE,                                \
+        .class_init = spapr_machine_##suffix##_class_init,           \
+    };                                                               \
+    static void spapr_machine_register_##suffix(void)                \
+    {                                                                \
+        type_register(&spapr_machine_##suffix##_info);               \
+    }                                                                \
+    type_init(spapr_machine_register_##suffix)
+
+/*
+ * pseries-6.2
+ */
+static void spapr_machine_6_2_class_options(MachineClass *mc)
+{
+    /* Defaults for the latest behaviour inherited from the base class */
+}
+
+DEFINE_SPAPR_MACHINE(6_2, "6.2", true);
+
+/*
+ * pseries-6.1
+ */
+static void spapr_machine_6_1_class_options(MachineClass *mc)
+{
+    SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+
+    spapr_machine_6_2_class_options(mc);
+    compat_props_add(mc->compat_props, hw_compat_6_1, hw_compat_6_1_len);
+    smc->pre_6_2_numa_affinity = true;
+    mc->smp_props.prefer_sockets = true;
+}
+
+DEFINE_SPAPR_MACHINE(6_1, "6.1", false);
+
+/*
+ * pseries-6.0
+ */
+static void spapr_machine_6_0_class_options(MachineClass *mc)
+{
+    spapr_machine_6_1_class_options(mc);
+    compat_props_add(mc->compat_props, hw_compat_6_0, hw_compat_6_0_len);
+}
+
+DEFINE_SPAPR_MACHINE(6_0, "6.0", false);
+
+/*
+ * pseries-5.2
+ */
+static void spapr_machine_5_2_class_options(MachineClass *mc)
+{
+    spapr_machine_6_0_class_options(mc);
+    compat_props_add(mc->compat_props, hw_compat_5_2, hw_compat_5_2_len);
+}
+
+DEFINE_SPAPR_MACHINE(5_2, "5.2", false);
+
+/*
+ * pseries-5.1
+ */
+static void spapr_machine_5_1_class_options(MachineClass *mc)
+{
+    SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+
+    spapr_machine_5_2_class_options(mc);
+    compat_props_add(mc->compat_props, hw_compat_5_1, hw_compat_5_1_len);
+    smc->pre_5_2_numa_associativity = true;
+}
+
+DEFINE_SPAPR_MACHINE(5_1, "5.1", false);
+
+/*
+ * pseries-5.0
+ */
+static void spapr_machine_5_0_class_options(MachineClass *mc)
+{
+    SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+    static GlobalProperty compat[] = {
+        { TYPE_SPAPR_PCI_HOST_BRIDGE, "pre-5.1-associativity", "on" },
+    };
+
+    spapr_machine_5_1_class_options(mc);
+    compat_props_add(mc->compat_props, hw_compat_5_0, hw_compat_5_0_len);
+    compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
+    mc->numa_mem_supported = true;
+    smc->pre_5_1_assoc_refpoints = true;
+}
+
+DEFINE_SPAPR_MACHINE(5_0, "5.0", false);
+
+/*
+ * pseries-4.2
+ */
+static void spapr_machine_4_2_class_options(MachineClass *mc)
+{
+    SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+
+    spapr_machine_5_0_class_options(mc);
+    compat_props_add(mc->compat_props, hw_compat_4_2, hw_compat_4_2_len);
+    smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
+    smc->default_caps.caps[SPAPR_CAP_FWNMI] = SPAPR_CAP_OFF;
+    smc->rma_limit = 16 * GiB;
+    mc->nvdimm_supported = false;
+}
+
+DEFINE_SPAPR_MACHINE(4_2, "4.2", false);
+
+/*
+ * pseries-4.1
+ */
+static void spapr_machine_4_1_class_options(MachineClass *mc)
+{
+    SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+    static GlobalProperty compat[] = {
+        /* Only allow 4kiB and 64kiB IOMMU pagesizes */
+        { TYPE_SPAPR_PCI_HOST_BRIDGE, "pgsz", "0x11000" },
+    };
+
+    spapr_machine_4_2_class_options(mc);
+    smc->linux_pci_probe = false;
+    smc->smp_threads_vsmt = false;
+    compat_props_add(mc->compat_props, hw_compat_4_1, hw_compat_4_1_len);
+    compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
+}
+
+DEFINE_SPAPR_MACHINE(4_1, "4.1", false);
+
+/*
+ * pseries-4.0
+ */
+static bool phb_placement_4_0(SpaprMachineState *spapr, uint32_t index,
+                              uint64_t *buid, hwaddr *pio,
+                              hwaddr *mmio32, hwaddr *mmio64,
+                              unsigned n_dma, uint32_t *liobns,
+                              hwaddr *nv2gpa, hwaddr *nv2atsd, Error **errp)
+{
+    if (!spapr_phb_placement(spapr, index, buid, pio, mmio32, mmio64, n_dma,
+                             liobns, nv2gpa, nv2atsd, errp)) {
+        return false;
+    }
+
+    *nv2gpa = 0;
+    *nv2atsd = 0;
+    return true;
+}
+static void spapr_machine_4_0_class_options(MachineClass *mc)
+{
+    SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+
+    spapr_machine_4_1_class_options(mc);
+    compat_props_add(mc->compat_props, hw_compat_4_0, hw_compat_4_0_len);
+    smc->phb_placement = phb_placement_4_0;
+    smc->irq = &spapr_irq_xics;
+    smc->pre_4_1_migration = true;
+}
+
+DEFINE_SPAPR_MACHINE(4_0, "4.0", false);
+
+/*
+ * pseries-3.1
+ */
+static void spapr_machine_3_1_class_options(MachineClass *mc)
+{
+    SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+
+    spapr_machine_4_0_class_options(mc);
+    compat_props_add(mc->compat_props, hw_compat_3_1, hw_compat_3_1_len);
+
+    mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power8_v2.0");
+    smc->update_dt_enabled = false;
+    smc->dr_phb_enabled = false;
+    smc->broken_host_serial_model = true;
+    smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_BROKEN;
+    smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_BROKEN;
+    smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_BROKEN;
+    smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_OFF;
+}
+
+DEFINE_SPAPR_MACHINE(3_1, "3.1", false);
+
+/*
+ * pseries-3.0
+ */
+
+static void spapr_machine_3_0_class_options(MachineClass *mc)
+{
+    SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+
+    spapr_machine_3_1_class_options(mc);
+    compat_props_add(mc->compat_props, hw_compat_3_0, hw_compat_3_0_len);
+
+    smc->legacy_irq_allocation = true;
+    smc->nr_xirqs = 0x400;
+    smc->irq = &spapr_irq_xics_legacy;
+}
+
+DEFINE_SPAPR_MACHINE(3_0, "3.0", false);
+
+/*
+ * pseries-2.12
+ */
+static void spapr_machine_2_12_class_options(MachineClass *mc)
+{
+    SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+    static GlobalProperty compat[] = {
+        { TYPE_POWERPC_CPU, "pre-3.0-migration", "on" },
+        { TYPE_SPAPR_CPU_CORE, "pre-3.0-migration", "on" },
+    };
+
+    spapr_machine_3_0_class_options(mc);
+    compat_props_add(mc->compat_props, hw_compat_2_12, hw_compat_2_12_len);
+    compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
+
+    /* We depend on kvm_enabled() to choose a default value for the
+     * hpt-max-page-size capability. Of course we can't do it here
+     * because this is too early and the HW accelerator isn't initialzed
+     * yet. Postpone this to machine init (see default_caps_with_cpu()).
+     */
+    smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = 0;
+}
+
+DEFINE_SPAPR_MACHINE(2_12, "2.12", false);
+
+static void spapr_machine_2_12_sxxm_class_options(MachineClass *mc)
+{
+    SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+
+    spapr_machine_2_12_class_options(mc);
+    smc->default_caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_WORKAROUND;
+    smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_WORKAROUND;
+    smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_FIXED_CCD;
+}
+
+DEFINE_SPAPR_MACHINE(2_12_sxxm, "2.12-sxxm", false);
+
+/*
+ * pseries-2.11
+ */
+
+static void spapr_machine_2_11_class_options(MachineClass *mc)
+{
+    SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+
+    spapr_machine_2_12_class_options(mc);
+    smc->default_caps.caps[SPAPR_CAP_HTM] = SPAPR_CAP_ON;
+    compat_props_add(mc->compat_props, hw_compat_2_11, hw_compat_2_11_len);
+}
+
+DEFINE_SPAPR_MACHINE(2_11, "2.11", false);
+
+/*
+ * pseries-2.10
+ */
+
+static void spapr_machine_2_10_class_options(MachineClass *mc)
+{
+    spapr_machine_2_11_class_options(mc);
+    compat_props_add(mc->compat_props, hw_compat_2_10, hw_compat_2_10_len);
+}
+
+DEFINE_SPAPR_MACHINE(2_10, "2.10", false);
+
+/*
+ * pseries-2.9
+ */
+
+static void spapr_machine_2_9_class_options(MachineClass *mc)
+{
+    SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+    static GlobalProperty compat[] = {
+        { TYPE_POWERPC_CPU, "pre-2.10-migration", "on" },
+    };
+
+    spapr_machine_2_10_class_options(mc);
+    compat_props_add(mc->compat_props, hw_compat_2_9, hw_compat_2_9_len);
+    compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
+    smc->pre_2_10_has_unused_icps = true;
+    smc->resize_hpt_default = SPAPR_RESIZE_HPT_DISABLED;
+}
+
+DEFINE_SPAPR_MACHINE(2_9, "2.9", false);
+
+/*
+ * pseries-2.8
+ */
+
+static void spapr_machine_2_8_class_options(MachineClass *mc)
+{
+    static GlobalProperty compat[] = {
+        { TYPE_SPAPR_PCI_HOST_BRIDGE, "pcie-extended-configuration-space", "off" },
+    };
+
+    spapr_machine_2_9_class_options(mc);
+    compat_props_add(mc->compat_props, hw_compat_2_8, hw_compat_2_8_len);
+    compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
+    mc->numa_mem_align_shift = 23;
+}
+
+DEFINE_SPAPR_MACHINE(2_8, "2.8", false);
+
+/*
+ * pseries-2.7
+ */
+
+static bool phb_placement_2_7(SpaprMachineState *spapr, uint32_t index,
+                              uint64_t *buid, hwaddr *pio,
+                              hwaddr *mmio32, hwaddr *mmio64,
+                              unsigned n_dma, uint32_t *liobns,
+                              hwaddr *nv2gpa, hwaddr *nv2atsd, Error **errp)
+{
+    /* Legacy PHB placement for pseries-2.7 and earlier machine types */
+    const uint64_t base_buid = 0x800000020000000ULL;
+    const hwaddr phb_spacing = 0x1000000000ULL; /* 64 GiB */
+    const hwaddr mmio_offset = 0xa0000000; /* 2 GiB + 512 MiB */
+    const hwaddr pio_offset = 0x80000000; /* 2 GiB */
+    const uint32_t max_index = 255;
+    const hwaddr phb0_alignment = 0x10000000000ULL; /* 1 TiB */
+
+    uint64_t ram_top = MACHINE(spapr)->ram_size;
+    hwaddr phb0_base, phb_base;
+    int i;
+
+    /* Do we have device memory? */
+    if (MACHINE(spapr)->maxram_size > ram_top) {
+        /* Can't just use maxram_size, because there may be an
+         * alignment gap between normal and device memory regions
+         */
+        ram_top = MACHINE(spapr)->device_memory->base +
+            memory_region_size(&MACHINE(spapr)->device_memory->mr);
+    }
+
+    phb0_base = QEMU_ALIGN_UP(ram_top, phb0_alignment);
+
+    if (index > max_index) {
+        error_setg(errp, "\"index\" for PAPR PHB is too large (max %u)",
+                   max_index);
+        return false;
+    }
+
+    *buid = base_buid + index;
+    for (i = 0; i < n_dma; ++i) {
+        liobns[i] = SPAPR_PCI_LIOBN(index, i);
+    }
+
+    phb_base = phb0_base + index * phb_spacing;
+    *pio = phb_base + pio_offset;
+    *mmio32 = phb_base + mmio_offset;
+    /*
+     * We don't set the 64-bit MMIO window, relying on the PHB's
+     * fallback behaviour of automatically splitting a large "32-bit"
+     * window into contiguous 32-bit and 64-bit windows
+     */
+
+    *nv2gpa = 0;
+    *nv2atsd = 0;
+    return true;
+}
+
+static void spapr_machine_2_7_class_options(MachineClass *mc)
+{
+    SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+    static GlobalProperty compat[] = {
+        { TYPE_SPAPR_PCI_HOST_BRIDGE, "mem_win_size", "0xf80000000", },
+        { TYPE_SPAPR_PCI_HOST_BRIDGE, "mem64_win_size", "0", },
+        { TYPE_POWERPC_CPU, "pre-2.8-migration", "on", },
+        { TYPE_SPAPR_PCI_HOST_BRIDGE, "pre-2.8-migration", "on", },
+    };
+
+    spapr_machine_2_8_class_options(mc);
+    mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("power7_v2.3");
+    mc->default_machine_opts = "modern-hotplug-events=off";
+    compat_props_add(mc->compat_props, hw_compat_2_7, hw_compat_2_7_len);
+    compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
+    smc->phb_placement = phb_placement_2_7;
+}
+
+DEFINE_SPAPR_MACHINE(2_7, "2.7", false);
+
+/*
+ * pseries-2.6
+ */
+
+static void spapr_machine_2_6_class_options(MachineClass *mc)
+{
+    static GlobalProperty compat[] = {
+        { TYPE_SPAPR_PCI_HOST_BRIDGE, "ddw", "off" },
+    };
+
+    spapr_machine_2_7_class_options(mc);
+    mc->has_hotpluggable_cpus = false;
+    compat_props_add(mc->compat_props, hw_compat_2_6, hw_compat_2_6_len);
+    compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
+}
+
+DEFINE_SPAPR_MACHINE(2_6, "2.6", false);
+
+/*
+ * pseries-2.5
+ */
+
+static void spapr_machine_2_5_class_options(MachineClass *mc)
+{
+    SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+    static GlobalProperty compat[] = {
+        { "spapr-vlan", "use-rx-buffer-pools", "off" },
+    };
+
+    spapr_machine_2_6_class_options(mc);
+    smc->use_ohci_by_default = true;
+    compat_props_add(mc->compat_props, hw_compat_2_5, hw_compat_2_5_len);
+    compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
+}
+
+DEFINE_SPAPR_MACHINE(2_5, "2.5", false);
+
+/*
+ * pseries-2.4
+ */
+
+static void spapr_machine_2_4_class_options(MachineClass *mc)
+{
+    SpaprMachineClass *smc = SPAPR_MACHINE_CLASS(mc);
+
+    spapr_machine_2_5_class_options(mc);
+    smc->dr_lmb_enabled = false;
+    compat_props_add(mc->compat_props, hw_compat_2_4, hw_compat_2_4_len);
+}
+
+DEFINE_SPAPR_MACHINE(2_4, "2.4", false);
+
+/*
+ * pseries-2.3
+ */
+
+static void spapr_machine_2_3_class_options(MachineClass *mc)
+{
+    static GlobalProperty compat[] = {
+        { "spapr-pci-host-bridge", "dynamic-reconfiguration", "off" },
+    };
+    spapr_machine_2_4_class_options(mc);
+    compat_props_add(mc->compat_props, hw_compat_2_3, hw_compat_2_3_len);
+    compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
+}
+DEFINE_SPAPR_MACHINE(2_3, "2.3", false);
+
+/*
+ * pseries-2.2
+ */
+
+static void spapr_machine_2_2_class_options(MachineClass *mc)
+{
+    static GlobalProperty compat[] = {
+        { TYPE_SPAPR_PCI_HOST_BRIDGE, "mem_win_size", "0x20000000" },
+    };
+
+    spapr_machine_2_3_class_options(mc);
+    compat_props_add(mc->compat_props, hw_compat_2_2, hw_compat_2_2_len);
+    compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
+    mc->default_machine_opts = "modern-hotplug-events=off,suppress-vmdesc=on";
+}
+DEFINE_SPAPR_MACHINE(2_2, "2.2", false);
+
+/*
+ * pseries-2.1
+ */
+
+static void spapr_machine_2_1_class_options(MachineClass *mc)
+{
+    spapr_machine_2_2_class_options(mc);
+    compat_props_add(mc->compat_props, hw_compat_2_1, hw_compat_2_1_len);
+}
+DEFINE_SPAPR_MACHINE(2_1, "2.1", false);
+
+static void spapr_machine_register_types(void)
+{
+    type_register_static(&spapr_machine_info);
+}
+
+type_init(spapr_machine_register_types)
diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
new file mode 100644
index 000000000..ed7c077a0
--- /dev/null
+++ b/hw/ppc/spapr_caps.c
@@ -0,0 +1,944 @@
+/*
+ * QEMU PowerPC pSeries Logical Partition capabilities handling
+ *
+ * Copyright (c) 2017 David Gibson, Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "qapi/visitor.h"
+#include "sysemu/hw_accel.h"
+#include "exec/ram_addr.h"
+#include "target/ppc/cpu.h"
+#include "target/ppc/mmu-hash64.h"
+#include "cpu-models.h"
+#include "kvm_ppc.h"
+#include "migration/vmstate.h"
+#include "sysemu/tcg.h"
+
+#include "hw/ppc/spapr.h"
+
+typedef struct SpaprCapPossible {
+    int num;            /* size of vals array below */
+    const char *help;   /* help text for vals */
+    /*
+     * Note:
+     * - because of the way compatibility is determined vals MUST be ordered
+     *   such that later options are a superset of all preceding options.
+     * - the order of vals must be preserved, that is their index is important,
+     *   however vals may be added to the end of the list so long as the above
+     *   point is observed
+     */
+    const char *vals[];
+} SpaprCapPossible;
+
+typedef struct SpaprCapabilityInfo {
+    const char *name;
+    const char *description;
+    int index;
+
+    /* Getter and Setter Function Pointers */
+    ObjectPropertyAccessor *get;
+    ObjectPropertyAccessor *set;
+    const char *type;
+    /* Possible values if this is a custom string type */
+    SpaprCapPossible *possible;
+    /* Make sure the virtual hardware can support this capability */
+    void (*apply)(SpaprMachineState *spapr, uint8_t val, Error **errp);
+    void (*cpu_apply)(SpaprMachineState *spapr, PowerPCCPU *cpu,
+                      uint8_t val, Error **errp);
+    bool (*migrate_needed)(void *opaque);
+} SpaprCapabilityInfo;
+
+static void spapr_cap_get_bool(Object *obj, Visitor *v, const char *name,
+                               void *opaque, Error **errp)
+{
+    SpaprCapabilityInfo *cap = opaque;
+    SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+    bool value = spapr_get_cap(spapr, cap->index) == SPAPR_CAP_ON;
+
+    visit_type_bool(v, name, &value, errp);
+}
+
+static void spapr_cap_set_bool(Object *obj, Visitor *v, const char *name,
+                               void *opaque, Error **errp)
+{
+    SpaprCapabilityInfo *cap = opaque;
+    SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+    bool value;
+
+    if (!visit_type_bool(v, name, &value, errp)) {
+        return;
+    }
+
+    spapr->cmd_line_caps[cap->index] = true;
+    spapr->eff.caps[cap->index] = value ? SPAPR_CAP_ON : SPAPR_CAP_OFF;
+}
+
+
+static void  spapr_cap_get_string(Object *obj, Visitor *v, const char *name,
+                                  void *opaque, Error **errp)
+{
+    SpaprCapabilityInfo *cap = opaque;
+    SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+    char *val = NULL;
+    uint8_t value = spapr_get_cap(spapr, cap->index);
+
+    if (value >= cap->possible->num) {
+        error_setg(errp, "Invalid value (%d) for cap-%s", value, cap->name);
+        return;
+    }
+
+    val = g_strdup(cap->possible->vals[value]);
+
+    visit_type_str(v, name, &val, errp);
+    g_free(val);
+}
+
+static void spapr_cap_set_string(Object *obj, Visitor *v, const char *name,
+                                 void *opaque, Error **errp)
+{
+    SpaprCapabilityInfo *cap = opaque;
+    SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+    uint8_t i;
+    char *val;
+
+    if (!visit_type_str(v, name, &val, errp)) {
+        return;
+    }
+
+    if (!strcmp(val, "?")) {
+        error_setg(errp, "%s", cap->possible->help);
+        goto out;
+    }
+    for (i = 0; i < cap->possible->num; i++) {
+        if (!strcasecmp(val, cap->possible->vals[i])) {
+            spapr->cmd_line_caps[cap->index] = true;
+            spapr->eff.caps[cap->index] = i;
+            goto out;
+        }
+    }
+
+    error_setg(errp, "Invalid capability mode \"%s\" for cap-%s", val,
+               cap->name);
+out:
+    g_free(val);
+}
+
+static void spapr_cap_get_pagesize(Object *obj, Visitor *v, const char *name,
+                                   void *opaque, Error **errp)
+{
+    SpaprCapabilityInfo *cap = opaque;
+    SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+    uint8_t val = spapr_get_cap(spapr, cap->index);
+    uint64_t pagesize = (1ULL << val);
+
+    visit_type_size(v, name, &pagesize, errp);
+}
+
+static void spapr_cap_set_pagesize(Object *obj, Visitor *v, const char *name,
+                                   void *opaque, Error **errp)
+{
+    SpaprCapabilityInfo *cap = opaque;
+    SpaprMachineState *spapr = SPAPR_MACHINE(obj);
+    uint64_t pagesize;
+    uint8_t val;
+
+    if (!visit_type_size(v, name, &pagesize, errp)) {
+        return;
+    }
+
+    if (!is_power_of_2(pagesize)) {
+        error_setg(errp, "cap-%s must be a power of 2", cap->name);
+        return;
+    }
+
+    val = ctz64(pagesize);
+    spapr->cmd_line_caps[cap->index] = true;
+    spapr->eff.caps[cap->index] = val;
+}
+
+static void cap_htm_apply(SpaprMachineState *spapr, uint8_t val, Error **errp)
+{
+    ERRP_GUARD();
+    if (!val) {
+        /* TODO: We don't support disabling htm yet */
+        return;
+    }
+    if (tcg_enabled()) {
+        error_setg(errp, "No Transactional Memory support in TCG");
+        error_append_hint(errp, "Try appending -machine cap-htm=off\n");
+    } else if (kvm_enabled() && !kvmppc_has_cap_htm()) {
+        error_setg(errp,
+                   "KVM implementation does not support Transactional Memory");
+        error_append_hint(errp, "Try appending -machine cap-htm=off\n");
+    }
+}
+
+static void cap_vsx_apply(SpaprMachineState *spapr, uint8_t val, Error **errp)
+{
+    ERRP_GUARD();
+    PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
+    CPUPPCState *env = &cpu->env;
+
+    if (!val) {
+        /* TODO: We don't support disabling vsx yet */
+        return;
+    }
+    /* Allowable CPUs in spapr_cpu_core.c should already have gotten
+     * rid of anything that doesn't do VMX */
+    g_assert(env->insns_flags & PPC_ALTIVEC);
+    if (!(env->insns_flags2 & PPC2_VSX)) {
+        error_setg(errp, "VSX support not available");
+        error_append_hint(errp, "Try appending -machine cap-vsx=off\n");
+    }
+}
+
+static void cap_dfp_apply(SpaprMachineState *spapr, uint8_t val, Error **errp)
+{
+    ERRP_GUARD();
+    PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
+    CPUPPCState *env = &cpu->env;
+
+    if (!val) {
+        /* TODO: We don't support disabling dfp yet */
+        return;
+    }
+    if (!(env->insns_flags2 & PPC2_DFP)) {
+        error_setg(errp, "DFP support not available");
+        error_append_hint(errp, "Try appending -machine cap-dfp=off\n");
+    }
+}
+
+SpaprCapPossible cap_cfpc_possible = {
+    .num = 3,
+    .vals = {"broken", "workaround", "fixed"},
+    .help = "broken - no protection, workaround - workaround available,"
+            " fixed - fixed in hardware",
+};
+
+static void cap_safe_cache_apply(SpaprMachineState *spapr, uint8_t val,
+                                 Error **errp)
+{
+    ERRP_GUARD();
+    uint8_t kvm_val =  kvmppc_get_cap_safe_cache();
+
+    if (tcg_enabled() && val) {
+        /* TCG only supports broken, allow other values and print a warning */
+        warn_report("TCG doesn't support requested feature, cap-cfpc=%s",
+                    cap_cfpc_possible.vals[val]);
+    } else if (kvm_enabled() && (val > kvm_val)) {
+        error_setg(errp,
+                   "Requested safe cache capability level not supported by KVM");
+        error_append_hint(errp, "Try appending -machine cap-cfpc=%s\n",
+                          cap_cfpc_possible.vals[kvm_val]);
+    }
+}
+
+SpaprCapPossible cap_sbbc_possible = {
+    .num = 3,
+    .vals = {"broken", "workaround", "fixed"},
+    .help = "broken - no protection, workaround - workaround available,"
+            " fixed - fixed in hardware",
+};
+
+static void cap_safe_bounds_check_apply(SpaprMachineState *spapr, uint8_t val,
+                                        Error **errp)
+{
+    ERRP_GUARD();
+    uint8_t kvm_val =  kvmppc_get_cap_safe_bounds_check();
+
+    if (tcg_enabled() && val) {
+        /* TCG only supports broken, allow other values and print a warning */
+        warn_report("TCG doesn't support requested feature, cap-sbbc=%s",
+                    cap_sbbc_possible.vals[val]);
+    } else if (kvm_enabled() && (val > kvm_val)) {
+        error_setg(errp,
+"Requested safe bounds check capability level not supported by KVM");
+        error_append_hint(errp, "Try appending -machine cap-sbbc=%s\n",
+                          cap_sbbc_possible.vals[kvm_val]);
+    }
+}
+
+SpaprCapPossible cap_ibs_possible = {
+    .num = 5,
+    /* Note workaround only maintained for compatibility */
+    .vals = {"broken", "workaround", "fixed-ibs", "fixed-ccd", "fixed-na"},
+    .help = "broken - no protection, workaround - count cache flush"
+            ", fixed-ibs - indirect branch serialisation,"
+            " fixed-ccd - cache count disabled,"
+            " fixed-na - fixed in hardware (no longer applicable)",
+};
+
+static void cap_safe_indirect_branch_apply(SpaprMachineState *spapr,
+                                           uint8_t val, Error **errp)
+{
+    ERRP_GUARD();
+    uint8_t kvm_val = kvmppc_get_cap_safe_indirect_branch();
+
+    if (tcg_enabled() && val) {
+        /* TCG only supports broken, allow other values and print a warning */
+        warn_report("TCG doesn't support requested feature, cap-ibs=%s",
+                    cap_ibs_possible.vals[val]);
+    } else if (kvm_enabled() && (val > kvm_val)) {
+        error_setg(errp,
+"Requested safe indirect branch capability level not supported by KVM");
+        error_append_hint(errp, "Try appending -machine cap-ibs=%s\n",
+                          cap_ibs_possible.vals[kvm_val]);
+    }
+}
+
+#define VALUE_DESC_TRISTATE     " (broken, workaround, fixed)"
+
+bool spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize,
+                          Error **errp)
+{
+    hwaddr maxpagesize = (1ULL << spapr->eff.caps[SPAPR_CAP_HPT_MAXPAGESIZE]);
+
+    if (!kvmppc_hpt_needs_host_contiguous_pages()) {
+        return true;
+    }
+
+    if (maxpagesize > pagesize) {
+        error_setg(errp,
+                   "Can't support %"HWADDR_PRIu" kiB guest pages with %"
+                   HWADDR_PRIu" kiB host pages with this KVM implementation",
+                   maxpagesize >> 10, pagesize >> 10);
+        return false;
+    }
+
+    return true;
+}
+
+static void cap_hpt_maxpagesize_apply(SpaprMachineState *spapr,
+                                      uint8_t val, Error **errp)
+{
+    if (val < 12) {
+        error_setg(errp, "Require at least 4kiB hpt-max-page-size");
+        return;
+    } else if (val < 16) {
+        warn_report("Many guests require at least 64kiB hpt-max-page-size");
+    }
+
+    spapr_check_pagesize(spapr, qemu_minrampagesize(), errp);
+}
+
+static bool cap_hpt_maxpagesize_migrate_needed(void *opaque)
+{
+    return !SPAPR_MACHINE_GET_CLASS(opaque)->pre_4_1_migration;
+}
+
+static bool spapr_pagesize_cb(void *opaque, uint32_t seg_pshift,
+                              uint32_t pshift)
+{
+    unsigned maxshift = *((unsigned *)opaque);
+
+    assert(pshift >= seg_pshift);
+
+    /* Don't allow the guest to use pages bigger than the configured
+     * maximum size */
+    if (pshift > maxshift) {
+        return false;
+    }
+
+    /* For whatever reason, KVM doesn't allow multiple pagesizes
+     * within a segment, *except* for the case of 16M pages in a 4k or
+     * 64k segment.  Always exclude other cases, so that TCG and KVM
+     * guests see a consistent environment */
+    if ((pshift != seg_pshift) && (pshift != 24)) {
+        return false;
+    }
+
+    return true;
+}
+
+static void ppc_hash64_filter_pagesizes(PowerPCCPU *cpu,
+                                 bool (*cb)(void *, uint32_t, uint32_t),
+                                 void *opaque)
+{
+    PPCHash64Options *opts = cpu->hash64_opts;
+    int i;
+    int n = 0;
+    bool ci_largepage = false;
+
+    assert(opts);
+
+    n = 0;
+    for (i = 0; i < ARRAY_SIZE(opts->sps); i++) {
+        PPCHash64SegmentPageSizes *sps = &opts->sps[i];
+        int j;
+        int m = 0;
+
+        assert(n <= i);
+
+        if (!sps->page_shift) {
+            break;
+        }
+
+        for (j = 0; j < ARRAY_SIZE(sps->enc); j++) {
+            PPCHash64PageSize *ps = &sps->enc[j];
+
+            assert(m <= j);
+            if (!ps->page_shift) {
+                break;
+            }
+
+            if (cb(opaque, sps->page_shift, ps->page_shift)) {
+                if (ps->page_shift >= 16) {
+                    ci_largepage = true;
+                }
+                sps->enc[m++] = *ps;
+            }
+        }
+
+        /* Clear rest of the row */
+        for (j = m; j < ARRAY_SIZE(sps->enc); j++) {
+            memset(&sps->enc[j], 0, sizeof(sps->enc[j]));
+        }
+
+        if (m) {
+            n++;
+        }
+    }
+
+    /* Clear the rest of the table */
+    for (i = n; i < ARRAY_SIZE(opts->sps); i++) {
+        memset(&opts->sps[i], 0, sizeof(opts->sps[i]));
+    }
+
+    if (!ci_largepage) {
+        opts->flags &= ~PPC_HASH64_CI_LARGEPAGE;
+    }
+}
+
+static void cap_hpt_maxpagesize_cpu_apply(SpaprMachineState *spapr,
+                                          PowerPCCPU *cpu,
+                                          uint8_t val, Error **errp)
+{
+    unsigned maxshift = val;
+
+    ppc_hash64_filter_pagesizes(cpu, spapr_pagesize_cb, &maxshift);
+}
+
+static void cap_nested_kvm_hv_apply(SpaprMachineState *spapr,
+                                    uint8_t val, Error **errp)
+{
+    ERRP_GUARD();
+    PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
+
+    if (!val) {
+        /* capability disabled by default */
+        return;
+    }
+
+    if (tcg_enabled()) {
+        error_setg(errp, "No Nested KVM-HV support in TCG");
+        error_append_hint(errp, "Try appending -machine cap-nested-hv=off\n");
+    } else if (kvm_enabled()) {
+        if (!ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0,
+                              spapr->max_compat_pvr)) {
+            error_setg(errp, "Nested KVM-HV only supported on POWER9");
+            error_append_hint(errp,
+                              "Try appending -machine max-cpu-compat=power9\n");
+            return;
+        }
+
+        if (!kvmppc_has_cap_nested_kvm_hv()) {
+            error_setg(errp,
+                       "KVM implementation does not support Nested KVM-HV");
+            error_append_hint(errp,
+                              "Try appending -machine cap-nested-hv=off\n");
+        } else if (kvmppc_set_cap_nested_kvm_hv(val) < 0) {
+                error_setg(errp, "Error enabling cap-nested-hv with KVM");
+                error_append_hint(errp,
+                                  "Try appending -machine cap-nested-hv=off\n");
+        }
+    }
+}
+
+static void cap_large_decr_apply(SpaprMachineState *spapr,
+                                 uint8_t val, Error **errp)
+{
+    ERRP_GUARD();
+    PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
+    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
+
+    if (!val) {
+        return; /* Disabled by default */
+    }
+
+    if (tcg_enabled()) {
+        if (!ppc_check_compat(cpu, CPU_POWERPC_LOGICAL_3_00, 0,
+                              spapr->max_compat_pvr)) {
+            error_setg(errp, "Large decrementer only supported on POWER9");
+            error_append_hint(errp, "Try -cpu POWER9\n");
+            return;
+        }
+    } else if (kvm_enabled()) {
+        int kvm_nr_bits = kvmppc_get_cap_large_decr();
+
+        if (!kvm_nr_bits) {
+            error_setg(errp, "No large decrementer support");
+            error_append_hint(errp,
+                              "Try appending -machine cap-large-decr=off\n");
+        } else if (pcc->lrg_decr_bits != kvm_nr_bits) {
+            error_setg(errp,
+                       "KVM large decrementer size (%d) differs to model (%d)",
+                       kvm_nr_bits, pcc->lrg_decr_bits);
+            error_append_hint(errp,
+                              "Try appending -machine cap-large-decr=off\n");
+        }
+    }
+}
+
+static void cap_large_decr_cpu_apply(SpaprMachineState *spapr,
+                                     PowerPCCPU *cpu,
+                                     uint8_t val, Error **errp)
+{
+    ERRP_GUARD();
+    CPUPPCState *env = &cpu->env;
+    target_ulong lpcr = env->spr[SPR_LPCR];
+
+    if (kvm_enabled()) {
+        if (kvmppc_enable_cap_large_decr(cpu, val)) {
+            error_setg(errp, "No large decrementer support");
+            error_append_hint(errp,
+                              "Try appending -machine cap-large-decr=off\n");
+        }
+    }
+
+    if (val) {
+        lpcr |= LPCR_LD;
+    } else {
+        lpcr &= ~LPCR_LD;
+    }
+    ppc_store_lpcr(cpu, lpcr);
+}
+
+static void cap_ccf_assist_apply(SpaprMachineState *spapr, uint8_t val,
+                                 Error **errp)
+{
+    ERRP_GUARD();
+    uint8_t kvm_val = kvmppc_get_cap_count_cache_flush_assist();
+
+    if (tcg_enabled() && val) {
+        /* TCG doesn't implement anything here, but allow with a warning */
+        warn_report("TCG doesn't support requested feature, cap-ccf-assist=on");
+    } else if (kvm_enabled() && (val > kvm_val)) {
+        uint8_t kvm_ibs = kvmppc_get_cap_safe_indirect_branch();
+
+        if (kvm_ibs == SPAPR_CAP_FIXED_CCD) {
+            /*
+             * If we don't have CCF assist on the host, the assist
+             * instruction is a harmless no-op.  It won't correctly
+             * implement the cache count flush *but* if we have
+             * count-cache-disabled in the host, that flush is
+             * unnnecessary.  So, specifically allow this case.  This
+             * allows us to have better performance on POWER9 DD2.3,
+             * while still working on POWER9 DD2.2 and POWER8 host
+             * cpus.
+             */
+            return;
+        }
+        error_setg(errp,
+                   "Requested count cache flush assist capability level not supported by KVM");
+        error_append_hint(errp, "Try appending -machine cap-ccf-assist=off\n");
+    }
+}
+
+static void cap_fwnmi_apply(SpaprMachineState *spapr, uint8_t val,
+                                Error **errp)
+{
+    ERRP_GUARD();
+    if (!val) {
+        return; /* Disabled by default */
+    }
+
+    if (kvm_enabled()) {
+        if (!kvmppc_get_fwnmi()) {
+            error_setg(errp,
+"Firmware Assisted Non-Maskable Interrupts(FWNMI) not supported by KVM.");
+            error_append_hint(errp, "Try appending -machine cap-fwnmi=off\n");
+        }
+    }
+}
+
+static void cap_rpt_invalidate_apply(SpaprMachineState *spapr,
+                                     uint8_t val, Error **errp)
+{
+    ERRP_GUARD();
+
+    if (!val) {
+        /* capability disabled by default */
+        return;
+    }
+
+    if (tcg_enabled()) {
+        error_setg(errp, "No H_RPT_INVALIDATE support in TCG");
+        error_append_hint(errp,
+                          "Try appending -machine cap-rpt-invalidate=off\n");
+    } else if (kvm_enabled()) {
+        if (!kvmppc_has_cap_mmu_radix()) {
+            error_setg(errp, "H_RPT_INVALIDATE only supported on Radix");
+            return;
+        }
+
+        if (!kvmppc_has_cap_rpt_invalidate()) {
+            error_setg(errp,
+                       "KVM implementation does not support H_RPT_INVALIDATE");
+            error_append_hint(errp,
+                              "Try appending -machine cap-rpt-invalidate=off\n");
+        } else {
+            kvmppc_enable_h_rpt_invalidate();
+        }
+    }
+}
+
+SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
+    [SPAPR_CAP_HTM] = {
+        .name = "htm",
+        .description = "Allow Hardware Transactional Memory (HTM)",
+        .index = SPAPR_CAP_HTM,
+        .get = spapr_cap_get_bool,
+        .set = spapr_cap_set_bool,
+        .type = "bool",
+        .apply = cap_htm_apply,
+    },
+    [SPAPR_CAP_VSX] = {
+        .name = "vsx",
+        .description = "Allow Vector Scalar Extensions (VSX)",
+        .index = SPAPR_CAP_VSX,
+        .get = spapr_cap_get_bool,
+        .set = spapr_cap_set_bool,
+        .type = "bool",
+        .apply = cap_vsx_apply,
+    },
+    [SPAPR_CAP_DFP] = {
+        .name = "dfp",
+        .description = "Allow Decimal Floating Point (DFP)",
+        .index = SPAPR_CAP_DFP,
+        .get = spapr_cap_get_bool,
+        .set = spapr_cap_set_bool,
+        .type = "bool",
+        .apply = cap_dfp_apply,
+    },
+    [SPAPR_CAP_CFPC] = {
+        .name = "cfpc",
+        .description = "Cache Flush on Privilege Change" VALUE_DESC_TRISTATE,
+        .index = SPAPR_CAP_CFPC,
+        .get = spapr_cap_get_string,
+        .set = spapr_cap_set_string,
+        .type = "string",
+        .possible = &cap_cfpc_possible,
+        .apply = cap_safe_cache_apply,
+    },
+    [SPAPR_CAP_SBBC] = {
+        .name = "sbbc",
+        .description = "Speculation Barrier Bounds Checking" VALUE_DESC_TRISTATE,
+        .index = SPAPR_CAP_SBBC,
+        .get = spapr_cap_get_string,
+        .set = spapr_cap_set_string,
+        .type = "string",
+        .possible = &cap_sbbc_possible,
+        .apply = cap_safe_bounds_check_apply,
+    },
+    [SPAPR_CAP_IBS] = {
+        .name = "ibs",
+        .description =
+            "Indirect Branch Speculation (broken, workaround, fixed-ibs,"
+            "fixed-ccd, fixed-na)",
+        .index = SPAPR_CAP_IBS,
+        .get = spapr_cap_get_string,
+        .set = spapr_cap_set_string,
+        .type = "string",
+        .possible = &cap_ibs_possible,
+        .apply = cap_safe_indirect_branch_apply,
+    },
+    [SPAPR_CAP_HPT_MAXPAGESIZE] = {
+        .name = "hpt-max-page-size",
+        .description = "Maximum page size for Hash Page Table guests",
+        .index = SPAPR_CAP_HPT_MAXPAGESIZE,
+        .get = spapr_cap_get_pagesize,
+        .set = spapr_cap_set_pagesize,
+        .type = "int",
+        .apply = cap_hpt_maxpagesize_apply,
+        .cpu_apply = cap_hpt_maxpagesize_cpu_apply,
+        .migrate_needed = cap_hpt_maxpagesize_migrate_needed,
+    },
+    [SPAPR_CAP_NESTED_KVM_HV] = {
+        .name = "nested-hv",
+        .description = "Allow Nested KVM-HV",
+        .index = SPAPR_CAP_NESTED_KVM_HV,
+        .get = spapr_cap_get_bool,
+        .set = spapr_cap_set_bool,
+        .type = "bool",
+        .apply = cap_nested_kvm_hv_apply,
+    },
+    [SPAPR_CAP_LARGE_DECREMENTER] = {
+        .name = "large-decr",
+        .description = "Allow Large Decrementer",
+        .index = SPAPR_CAP_LARGE_DECREMENTER,
+        .get = spapr_cap_get_bool,
+        .set = spapr_cap_set_bool,
+        .type = "bool",
+        .apply = cap_large_decr_apply,
+        .cpu_apply = cap_large_decr_cpu_apply,
+    },
+    [SPAPR_CAP_CCF_ASSIST] = {
+        .name = "ccf-assist",
+        .description = "Count Cache Flush Assist via HW Instruction",
+        .index = SPAPR_CAP_CCF_ASSIST,
+        .get = spapr_cap_get_bool,
+        .set = spapr_cap_set_bool,
+        .type = "bool",
+        .apply = cap_ccf_assist_apply,
+    },
+    [SPAPR_CAP_FWNMI] = {
+        .name = "fwnmi",
+        .description = "Implements PAPR FWNMI option",
+        .index = SPAPR_CAP_FWNMI,
+        .get = spapr_cap_get_bool,
+        .set = spapr_cap_set_bool,
+        .type = "bool",
+        .apply = cap_fwnmi_apply,
+    },
+    [SPAPR_CAP_RPT_INVALIDATE] = {
+        .name = "rpt-invalidate",
+        .description = "Allow H_RPT_INVALIDATE",
+        .index = SPAPR_CAP_RPT_INVALIDATE,
+        .get = spapr_cap_get_bool,
+        .set = spapr_cap_set_bool,
+        .type = "bool",
+        .apply = cap_rpt_invalidate_apply,
+    },
+};
+
+static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr,
+                                               const char *cputype)
+{
+    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+    SpaprCapabilities caps;
+
+    caps = smc->default_caps;
+
+    if (!ppc_type_check_compat(cputype, CPU_POWERPC_LOGICAL_3_00,
+                               0, spapr->max_compat_pvr)) {
+        caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_OFF;
+    }
+
+    if (!ppc_type_check_compat(cputype, CPU_POWERPC_LOGICAL_2_07,
+                               0, spapr->max_compat_pvr)) {
+        caps.caps[SPAPR_CAP_HTM] = SPAPR_CAP_OFF;
+        caps.caps[SPAPR_CAP_CFPC] = SPAPR_CAP_BROKEN;
+    }
+
+    if (!ppc_type_check_compat(cputype, CPU_POWERPC_LOGICAL_2_06_PLUS,
+                               0, spapr->max_compat_pvr)) {
+        caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_BROKEN;
+    }
+
+    if (!ppc_type_check_compat(cputype, CPU_POWERPC_LOGICAL_2_06,
+                               0, spapr->max_compat_pvr)) {
+        caps.caps[SPAPR_CAP_VSX] = SPAPR_CAP_OFF;
+        caps.caps[SPAPR_CAP_DFP] = SPAPR_CAP_OFF;
+        caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_BROKEN;
+    }
+
+    /* This is for pseries-2.12 and older */
+    if (smc->default_caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] == 0) {
+        uint8_t mps;
+
+        if (kvmppc_hpt_needs_host_contiguous_pages()) {
+            mps = ctz64(qemu_minrampagesize());
+        } else {
+            mps = 34; /* allow everything up to 16GiB, i.e. everything */
+        }
+
+        caps.caps[SPAPR_CAP_HPT_MAXPAGESIZE] = mps;
+    }
+
+    return caps;
+}
+
+int spapr_caps_pre_load(void *opaque)
+{
+    SpaprMachineState *spapr = opaque;
+
+    /* Set to default so we can tell if this came in with the migration */
+    spapr->mig = spapr->def;
+    return 0;
+}
+
+int spapr_caps_pre_save(void *opaque)
+{
+    SpaprMachineState *spapr = opaque;
+
+    spapr->mig = spapr->eff;
+    return 0;
+}
+
+/* This has to be called from the top-level spapr post_load, not the
+ * caps specific one.  Otherwise it wouldn't be called when the source
+ * caps are all defaults, which could still conflict with overridden
+ * caps on the destination */
+int spapr_caps_post_migration(SpaprMachineState *spapr)
+{
+    int i;
+    bool ok = true;
+    SpaprCapabilities dstcaps = spapr->eff;
+    SpaprCapabilities srccaps;
+
+    srccaps = default_caps_with_cpu(spapr, MACHINE(spapr)->cpu_type);
+    for (i = 0; i < SPAPR_CAP_NUM; i++) {
+        /* If not default value then assume came in with the migration */
+        if (spapr->mig.caps[i] != spapr->def.caps[i]) {
+            srccaps.caps[i] = spapr->mig.caps[i];
+        }
+    }
+
+    for (i = 0; i < SPAPR_CAP_NUM; i++) {
+        SpaprCapabilityInfo *info = &capability_table[i];
+
+        if (srccaps.caps[i] > dstcaps.caps[i]) {
+            error_report("cap-%s higher level (%d) in incoming stream than on destination (%d)",
+                         info->name, srccaps.caps[i], dstcaps.caps[i]);
+            ok = false;
+        }
+
+        if (srccaps.caps[i] < dstcaps.caps[i]) {
+            warn_report("cap-%s lower level (%d) in incoming stream than on destination (%d)",
+                         info->name, srccaps.caps[i], dstcaps.caps[i]);
+        }
+    }
+
+    return ok ? 0 : -EINVAL;
+}
+
+/* Used to generate the migration field and needed function for a spapr cap */
+#define SPAPR_CAP_MIG_STATE(sname, cap)                 \
+static bool spapr_cap_##sname##_needed(void *opaque)    \
+{                                                       \
+    SpaprMachineState *spapr = opaque;                  \
+    bool (*needed)(void *opaque) =                      \
+        capability_table[cap].migrate_needed;           \
+                                                        \
+    return needed ? needed(opaque) : true &&            \
+           spapr->cmd_line_caps[cap] &&                 \
+           (spapr->eff.caps[cap] !=                     \
+            spapr->def.caps[cap]);                      \
+}                                                       \
+                                                        \
+const VMStateDescription vmstate_spapr_cap_##sname = {  \
+    .name = "spapr/cap/" #sname,                        \
+    .version_id = 1,                                    \
+    .minimum_version_id = 1,                            \
+    .needed = spapr_cap_##sname##_needed,               \
+    .fields = (VMStateField[]) {                        \
+        VMSTATE_UINT8(mig.caps[cap],                    \
+                      SpaprMachineState),               \
+        VMSTATE_END_OF_LIST()                           \
+    },                                                  \
+}
+
+SPAPR_CAP_MIG_STATE(htm, SPAPR_CAP_HTM);
+SPAPR_CAP_MIG_STATE(vsx, SPAPR_CAP_VSX);
+SPAPR_CAP_MIG_STATE(dfp, SPAPR_CAP_DFP);
+SPAPR_CAP_MIG_STATE(cfpc, SPAPR_CAP_CFPC);
+SPAPR_CAP_MIG_STATE(sbbc, SPAPR_CAP_SBBC);
+SPAPR_CAP_MIG_STATE(ibs, SPAPR_CAP_IBS);
+SPAPR_CAP_MIG_STATE(hpt_maxpagesize, SPAPR_CAP_HPT_MAXPAGESIZE);
+SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
+SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
+SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
+SPAPR_CAP_MIG_STATE(fwnmi, SPAPR_CAP_FWNMI);
+SPAPR_CAP_MIG_STATE(rpt_invalidate, SPAPR_CAP_RPT_INVALIDATE);
+
+void spapr_caps_init(SpaprMachineState *spapr)
+{
+    SpaprCapabilities default_caps;
+    int i;
+
+    /* Compute the actual set of caps we should run with */
+    default_caps = default_caps_with_cpu(spapr, MACHINE(spapr)->cpu_type);
+
+    for (i = 0; i < SPAPR_CAP_NUM; i++) {
+        /* Store the defaults */
+        spapr->def.caps[i] = default_caps.caps[i];
+        /* If not set on the command line then apply the default value */
+        if (!spapr->cmd_line_caps[i]) {
+            spapr->eff.caps[i] = default_caps.caps[i];
+        }
+    }
+}
+
+void spapr_caps_apply(SpaprMachineState *spapr)
+{
+    int i;
+
+    for (i = 0; i < SPAPR_CAP_NUM; i++) {
+        SpaprCapabilityInfo *info = &capability_table[i];
+
+        /*
+         * If the apply function can't set the desired level and thinks it's
+         * fatal, it should cause that.
+         */
+        info->apply(spapr, spapr->eff.caps[i], &error_fatal);
+    }
+}
+
+void spapr_caps_cpu_apply(SpaprMachineState *spapr, PowerPCCPU *cpu)
+{
+    int i;
+
+    for (i = 0; i < SPAPR_CAP_NUM; i++) {
+        SpaprCapabilityInfo *info = &capability_table[i];
+
+        /*
+         * If the apply function can't set the desired level and thinks it's
+         * fatal, it should cause that.
+         */
+        if (info->cpu_apply) {
+            info->cpu_apply(spapr, cpu, spapr->eff.caps[i], &error_fatal);
+        }
+    }
+}
+
+void spapr_caps_add_properties(SpaprMachineClass *smc)
+{
+    ObjectClass *klass = OBJECT_CLASS(smc);
+    int i;
+
+    for (i = 0; i < ARRAY_SIZE(capability_table); i++) {
+        SpaprCapabilityInfo *cap = &capability_table[i];
+        char *name = g_strdup_printf("cap-%s", cap->name);
+        char *desc;
+
+        object_class_property_add(klass, name, cap->type,
+                                  cap->get, cap->set,
+                                  NULL, cap);
+
+        desc = g_strdup_printf("%s", cap->description);
+        object_class_property_set_description(klass, name, desc);
+        g_free(name);
+        g_free(desc);
+    }
+}
diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
new file mode 100644
index 000000000..58e7341cb
--- /dev/null
+++ b/hw/ppc/spapr_cpu_core.c
@@ -0,0 +1,391 @@
+/*
+ * sPAPR CPU core device, acts as container of CPU thread devices.
+ *
+ * Copyright (C) 2016 Bharata B Rao <bharata@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/cpu/core.h"
+#include "hw/ppc/spapr_cpu_core.h"
+#include "hw/qdev-properties.h"
+#include "migration/vmstate.h"
+#include "target/ppc/cpu.h"
+#include "hw/ppc/spapr.h"
+#include "qapi/error.h"
+#include "sysemu/cpus.h"
+#include "sysemu/kvm.h"
+#include "target/ppc/kvm_ppc.h"
+#include "hw/ppc/ppc.h"
+#include "target/ppc/mmu-hash64.h"
+#include "sysemu/numa.h"
+#include "sysemu/reset.h"
+#include "sysemu/hw_accel.h"
+#include "qemu/error-report.h"
+
+static void spapr_reset_vcpu(PowerPCCPU *cpu)
+{
+    CPUState *cs = CPU(cpu);
+    CPUPPCState *env = &cpu->env;
+    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
+    SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
+    target_ulong lpcr;
+    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+
+    cpu_reset(cs);
+
+    env->spr[SPR_HIOR] = 0;
+
+    lpcr = env->spr[SPR_LPCR];
+
+    /* Set emulated LPCR to not send interrupts to hypervisor. Note that
+     * under KVM, the actual HW LPCR will be set differently by KVM itself,
+     * the settings below ensure proper operations with TCG in absence of
+     * a real hypervisor.
+     *
+     * Disable Power-saving mode Exit Cause exceptions for the CPU, so
+     * we don't get spurious wakups before an RTAS start-cpu call.
+     * For the same reason, set PSSCR_EC.
+     */
+    lpcr &= ~(LPCR_VPM1 | LPCR_ISL | LPCR_KBV | pcc->lpcr_pm);
+    lpcr |= LPCR_LPES0 | LPCR_LPES1;
+    env->spr[SPR_PSSCR] |= PSSCR_EC;
+
+    ppc_store_lpcr(cpu, lpcr);
+
+    /* Set a full AMOR so guest can use the AMR as it sees fit */
+    env->spr[SPR_AMOR] = 0xffffffffffffffffull;
+
+    spapr_cpu->vpa_addr = 0;
+    spapr_cpu->slb_shadow_addr = 0;
+    spapr_cpu->slb_shadow_size = 0;
+    spapr_cpu->dtl_addr = 0;
+    spapr_cpu->dtl_size = 0;
+
+    spapr_caps_cpu_apply(spapr, cpu);
+
+    kvm_check_mmu(cpu, &error_fatal);
+
+    spapr_irq_cpu_intc_reset(spapr, cpu);
+}
+
+void spapr_cpu_set_entry_state(PowerPCCPU *cpu, target_ulong nip,
+                               target_ulong r1, target_ulong r3,
+                               target_ulong r4)
+{
+    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
+    CPUPPCState *env = &cpu->env;
+
+    env->nip = nip;
+    env->gpr[1] = r1;
+    env->gpr[3] = r3;
+    env->gpr[4] = r4;
+    kvmppc_set_reg_ppc_online(cpu, 1);
+    CPU(cpu)->halted = 0;
+    /* Enable Power-saving mode Exit Cause exceptions */
+    ppc_store_lpcr(cpu, env->spr[SPR_LPCR] | pcc->lpcr_pm);
+}
+
+/*
+ * Return the sPAPR CPU core type for @model which essentially is the CPU
+ * model specified with -cpu cmdline option.
+ */
+const char *spapr_get_cpu_core_type(const char *cpu_type)
+{
+    int len = strlen(cpu_type) - strlen(POWERPC_CPU_TYPE_SUFFIX);
+    char *core_type = g_strdup_printf(SPAPR_CPU_CORE_TYPE_NAME("%.*s"),
+                                      len, cpu_type);
+    ObjectClass *oc = object_class_by_name(core_type);
+
+    g_free(core_type);
+    if (!oc) {
+        return NULL;
+    }
+
+    return object_class_get_name(oc);
+}
+
+static bool slb_shadow_needed(void *opaque)
+{
+    SpaprCpuState *spapr_cpu = opaque;
+
+    return spapr_cpu->slb_shadow_addr != 0;
+}
+
+static const VMStateDescription vmstate_spapr_cpu_slb_shadow = {
+    .name = "spapr_cpu/vpa/slb_shadow",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = slb_shadow_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT64(slb_shadow_addr, SpaprCpuState),
+        VMSTATE_UINT64(slb_shadow_size, SpaprCpuState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static bool dtl_needed(void *opaque)
+{
+    SpaprCpuState *spapr_cpu = opaque;
+
+    return spapr_cpu->dtl_addr != 0;
+}
+
+static const VMStateDescription vmstate_spapr_cpu_dtl = {
+    .name = "spapr_cpu/vpa/dtl",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = dtl_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT64(dtl_addr, SpaprCpuState),
+        VMSTATE_UINT64(dtl_size, SpaprCpuState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static bool vpa_needed(void *opaque)
+{
+    SpaprCpuState *spapr_cpu = opaque;
+
+    return spapr_cpu->vpa_addr != 0;
+}
+
+static const VMStateDescription vmstate_spapr_cpu_vpa = {
+    .name = "spapr_cpu/vpa",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = vpa_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT64(vpa_addr, SpaprCpuState),
+        VMSTATE_END_OF_LIST()
+    },
+    .subsections = (const VMStateDescription * []) {
+        &vmstate_spapr_cpu_slb_shadow,
+        &vmstate_spapr_cpu_dtl,
+        NULL
+    }
+};
+
+static const VMStateDescription vmstate_spapr_cpu_state = {
+    .name = "spapr_cpu",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_END_OF_LIST()
+    },
+    .subsections = (const VMStateDescription * []) {
+        &vmstate_spapr_cpu_vpa,
+        NULL
+    }
+};
+
+static void spapr_unrealize_vcpu(PowerPCCPU *cpu, SpaprCpuCore *sc)
+{
+    if (!sc->pre_3_0_migration) {
+        vmstate_unregister(NULL, &vmstate_spapr_cpu_state, cpu->machine_data);
+    }
+    spapr_irq_cpu_intc_destroy(SPAPR_MACHINE(qdev_get_machine()), cpu);
+    qdev_unrealize(DEVICE(cpu));
+}
+
+/*
+ * Called when CPUs are hot-plugged.
+ */
+static void spapr_cpu_core_reset(DeviceState *dev)
+{
+    CPUCore *cc = CPU_CORE(dev);
+    SpaprCpuCore *sc = SPAPR_CPU_CORE(dev);
+    int i;
+
+    for (i = 0; i < cc->nr_threads; i++) {
+        spapr_reset_vcpu(sc->threads[i]);
+    }
+}
+
+/*
+ * Called by the machine reset.
+ */
+static void spapr_cpu_core_reset_handler(void *opaque)
+{
+    spapr_cpu_core_reset(opaque);
+}
+
+static void spapr_delete_vcpu(PowerPCCPU *cpu)
+{
+    SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
+
+    cpu->machine_data = NULL;
+    g_free(spapr_cpu);
+    object_unparent(OBJECT(cpu));
+}
+
+static void spapr_cpu_core_unrealize(DeviceState *dev)
+{
+    SpaprCpuCore *sc = SPAPR_CPU_CORE(OBJECT(dev));
+    CPUCore *cc = CPU_CORE(dev);
+    int i;
+
+    for (i = 0; i < cc->nr_threads; i++) {
+        if (sc->threads[i]) {
+            /*
+             * Since this we can get here from the error path of
+             * spapr_cpu_core_realize(), make sure we only unrealize
+             * vCPUs that have already been realized.
+             */
+            if (object_property_get_bool(OBJECT(sc->threads[i]), "realized",
+                                         &error_abort)) {
+                spapr_unrealize_vcpu(sc->threads[i], sc);
+            }
+            spapr_delete_vcpu(sc->threads[i]);
+        }
+    }
+    g_free(sc->threads);
+    qemu_unregister_reset(spapr_cpu_core_reset_handler, sc);
+}
+
+static bool spapr_realize_vcpu(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                               SpaprCpuCore *sc, Error **errp)
+{
+    CPUPPCState *env = &cpu->env;
+    CPUState *cs = CPU(cpu);
+
+    if (!qdev_realize(DEVICE(cpu), NULL, errp)) {
+        return false;
+    }
+
+    /* Set time-base frequency to 512 MHz */
+    cpu_ppc_tb_init(env, SPAPR_TIMEBASE_FREQ);
+
+    cpu_ppc_set_vhyp(cpu, PPC_VIRTUAL_HYPERVISOR(spapr));
+    kvmppc_set_papr(cpu);
+
+    if (spapr_irq_cpu_intc_create(spapr, cpu, errp) < 0) {
+        qdev_unrealize(DEVICE(cpu));
+        return false;
+    }
+
+    if (!sc->pre_3_0_migration) {
+        vmstate_register(NULL, cs->cpu_index, &vmstate_spapr_cpu_state,
+                         cpu->machine_data);
+    }
+    return true;
+}
+
+static PowerPCCPU *spapr_create_vcpu(SpaprCpuCore *sc, int i, Error **errp)
+{
+    SpaprCpuCoreClass *scc = SPAPR_CPU_CORE_GET_CLASS(sc);
+    CPUCore *cc = CPU_CORE(sc);
+    g_autoptr(Object) obj = NULL;
+    g_autofree char *id = NULL;
+    CPUState *cs;
+    PowerPCCPU *cpu;
+
+    obj = object_new(scc->cpu_type);
+
+    cs = CPU(obj);
+    cpu = POWERPC_CPU(obj);
+    /*
+     * All CPUs start halted. CPU0 is unhalted from the machine level reset code
+     * and the rest are explicitly started up by the guest using an RTAS call.
+     */
+    cs->start_powered_off = true;
+    cs->cpu_index = cc->core_id + i;
+    if (!spapr_set_vcpu_id(cpu, cs->cpu_index, errp)) {
+        return NULL;
+    }
+
+    cpu->node_id = sc->node_id;
+
+    id = g_strdup_printf("thread[%d]", i);
+    object_property_add_child(OBJECT(sc), id, obj);
+
+    cpu->machine_data = g_new0(SpaprCpuState, 1);
+
+    return cpu;
+}
+
+static void spapr_cpu_core_realize(DeviceState *dev, Error **errp)
+{
+    /* We don't use SPAPR_MACHINE() in order to exit gracefully if the user
+     * tries to add a sPAPR CPU core to a non-pseries machine.
+     */
+    SpaprMachineState *spapr =
+        (SpaprMachineState *) object_dynamic_cast(qdev_get_machine(),
+                                                  TYPE_SPAPR_MACHINE);
+    SpaprCpuCore *sc = SPAPR_CPU_CORE(OBJECT(dev));
+    CPUCore *cc = CPU_CORE(OBJECT(dev));
+    int i;
+
+    if (!spapr) {
+        error_setg(errp, TYPE_SPAPR_CPU_CORE " needs a pseries machine");
+        return;
+    }
+
+    qemu_register_reset(spapr_cpu_core_reset_handler, sc);
+    sc->threads = g_new0(PowerPCCPU *, cc->nr_threads);
+    for (i = 0; i < cc->nr_threads; i++) {
+        sc->threads[i] = spapr_create_vcpu(sc, i, errp);
+        if (!sc->threads[i] ||
+            !spapr_realize_vcpu(sc->threads[i], spapr, sc, errp)) {
+            spapr_cpu_core_unrealize(dev);
+            return;
+        }
+    }
+}
+
+static Property spapr_cpu_core_properties[] = {
+    DEFINE_PROP_INT32("node-id", SpaprCpuCore, node_id, CPU_UNSET_NUMA_NODE_ID),
+    DEFINE_PROP_BOOL("pre-3.0-migration", SpaprCpuCore, pre_3_0_migration,
+                     false),
+    DEFINE_PROP_END_OF_LIST()
+};
+
+static void spapr_cpu_core_class_init(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    SpaprCpuCoreClass *scc = SPAPR_CPU_CORE_CLASS(oc);
+
+    dc->realize = spapr_cpu_core_realize;
+    dc->unrealize = spapr_cpu_core_unrealize;
+    dc->reset = spapr_cpu_core_reset;
+    device_class_set_props(dc, spapr_cpu_core_properties);
+    scc->cpu_type = data;
+}
+
+#define DEFINE_SPAPR_CPU_CORE_TYPE(cpu_model) \
+    {                                                   \
+        .parent = TYPE_SPAPR_CPU_CORE,                  \
+        .class_data = (void *) POWERPC_CPU_TYPE_NAME(cpu_model), \
+        .class_init = spapr_cpu_core_class_init,        \
+        .name = SPAPR_CPU_CORE_TYPE_NAME(cpu_model),    \
+    }
+
+static const TypeInfo spapr_cpu_core_type_infos[] = {
+    {
+        .name = TYPE_SPAPR_CPU_CORE,
+        .parent = TYPE_CPU_CORE,
+        .abstract = true,
+        .instance_size = sizeof(SpaprCpuCore),
+        .class_size = sizeof(SpaprCpuCoreClass),
+    },
+    DEFINE_SPAPR_CPU_CORE_TYPE("970_v2.2"),
+    DEFINE_SPAPR_CPU_CORE_TYPE("970mp_v1.0"),
+    DEFINE_SPAPR_CPU_CORE_TYPE("970mp_v1.1"),
+    DEFINE_SPAPR_CPU_CORE_TYPE("power5+_v2.1"),
+    DEFINE_SPAPR_CPU_CORE_TYPE("power7_v2.3"),
+    DEFINE_SPAPR_CPU_CORE_TYPE("power7+_v2.1"),
+    DEFINE_SPAPR_CPU_CORE_TYPE("power8_v2.0"),
+    DEFINE_SPAPR_CPU_CORE_TYPE("power8e_v2.1"),
+    DEFINE_SPAPR_CPU_CORE_TYPE("power8nvl_v1.0"),
+    DEFINE_SPAPR_CPU_CORE_TYPE("power9_v1.0"),
+    DEFINE_SPAPR_CPU_CORE_TYPE("power9_v2.0"),
+    DEFINE_SPAPR_CPU_CORE_TYPE("power10_v1.0"),
+    DEFINE_SPAPR_CPU_CORE_TYPE("power10_v2.0"),
+#ifdef CONFIG_KVM
+    DEFINE_SPAPR_CPU_CORE_TYPE("host"),
+#endif
+};
+
+DEFINE_TYPES(spapr_cpu_core_type_infos)
diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
new file mode 100644
index 000000000..f8ac0a10d
--- /dev/null
+++ b/hw/ppc/spapr_drc.c
@@ -0,0 +1,1326 @@
+/*
+ * QEMU SPAPR Dynamic Reconfiguration Connector Implementation
+ *
+ * Copyright IBM Corp. 2014
+ *
+ * Authors:
+ *  Michael Roth      <mdroth@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qnull.h"
+#include "qemu/cutils.h"
+#include "hw/ppc/spapr_drc.h"
+#include "qom/object.h"
+#include "migration/vmstate.h"
+#include "qapi/error.h"
+#include "qapi/qapi-events-qdev.h"
+#include "qapi/visitor.h"
+#include "qemu/error-report.h"
+#include "hw/ppc/spapr.h" /* for RTAS return codes */
+#include "hw/pci-host/spapr.h" /* spapr_phb_remove_pci_device_cb callback */
+#include "hw/ppc/spapr_nvdimm.h"
+#include "sysemu/device_tree.h"
+#include "sysemu/reset.h"
+#include "trace.h"
+
+#define DRC_CONTAINER_PATH "/dr-connector"
+#define DRC_INDEX_TYPE_SHIFT 28
+#define DRC_INDEX_ID_MASK ((1ULL << DRC_INDEX_TYPE_SHIFT) - 1)
+
+SpaprDrcType spapr_drc_type(SpaprDrc *drc)
+{
+    SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+
+    return 1 << drck->typeshift;
+}
+
+uint32_t spapr_drc_index(SpaprDrc *drc)
+{
+    SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+
+    /* no set format for a drc index: it only needs to be globally
+     * unique. this is how we encode the DRC type on bare-metal
+     * however, so might as well do that here
+     */
+    return (drck->typeshift << DRC_INDEX_TYPE_SHIFT)
+        | (drc->id & DRC_INDEX_ID_MASK);
+}
+
+static void spapr_drc_release(SpaprDrc *drc)
+{
+    SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+
+    drck->release(drc->dev);
+
+    drc->unplug_requested = false;
+    g_free(drc->fdt);
+    drc->fdt = NULL;
+    drc->fdt_start_offset = 0;
+    object_property_del(OBJECT(drc), "device");
+    drc->dev = NULL;
+}
+
+static uint32_t drc_isolate_physical(SpaprDrc *drc)
+{
+    switch (drc->state) {
+    case SPAPR_DRC_STATE_PHYSICAL_POWERON:
+        return RTAS_OUT_SUCCESS; /* Nothing to do */
+    case SPAPR_DRC_STATE_PHYSICAL_CONFIGURED:
+        break; /* see below */
+    case SPAPR_DRC_STATE_PHYSICAL_UNISOLATE:
+        return RTAS_OUT_PARAM_ERROR; /* not allowed */
+    default:
+        g_assert_not_reached();
+    }
+
+    drc->state = SPAPR_DRC_STATE_PHYSICAL_POWERON;
+
+    if (drc->unplug_requested) {
+        uint32_t drc_index = spapr_drc_index(drc);
+        trace_spapr_drc_set_isolation_state_finalizing(drc_index);
+        spapr_drc_release(drc);
+    }
+
+    return RTAS_OUT_SUCCESS;
+}
+
+static uint32_t drc_unisolate_physical(SpaprDrc *drc)
+{
+    switch (drc->state) {
+    case SPAPR_DRC_STATE_PHYSICAL_UNISOLATE:
+    case SPAPR_DRC_STATE_PHYSICAL_CONFIGURED:
+        return RTAS_OUT_SUCCESS; /* Nothing to do */
+    case SPAPR_DRC_STATE_PHYSICAL_POWERON:
+        break; /* see below */
+    default:
+        g_assert_not_reached();
+    }
+
+    /* cannot unisolate a non-existent resource, and, or resources
+     * which are in an 'UNUSABLE' allocation state. (PAPR 2.7,
+     * 13.5.3.5)
+     */
+    if (!drc->dev) {
+        return RTAS_OUT_NO_SUCH_INDICATOR;
+    }
+
+    drc->state = SPAPR_DRC_STATE_PHYSICAL_UNISOLATE;
+    drc->ccs_offset = drc->fdt_start_offset;
+    drc->ccs_depth = 0;
+
+    return RTAS_OUT_SUCCESS;
+}
+
+static uint32_t drc_isolate_logical(SpaprDrc *drc)
+{
+    switch (drc->state) {
+    case SPAPR_DRC_STATE_LOGICAL_AVAILABLE:
+    case SPAPR_DRC_STATE_LOGICAL_UNUSABLE:
+        return RTAS_OUT_SUCCESS; /* Nothing to do */
+    case SPAPR_DRC_STATE_LOGICAL_CONFIGURED:
+        break; /* see below */
+    case SPAPR_DRC_STATE_LOGICAL_UNISOLATE:
+        return RTAS_OUT_PARAM_ERROR; /* not allowed */
+    default:
+        g_assert_not_reached();
+    }
+
+    /*
+     * Fail any requests to ISOLATE the LMB DRC if this LMB doesn't
+     * belong to a DIMM device that is marked for removal.
+     *
+     * Currently the guest userspace tool drmgr that drives the memory
+     * hotplug/unplug will just try to remove a set of 'removable' LMBs
+     * in response to a hot unplug request that is based on drc-count.
+     * If the LMB being removed doesn't belong to a DIMM device that is
+     * actually being unplugged, fail the isolation request here.
+     */
+    if (spapr_drc_type(drc) == SPAPR_DR_CONNECTOR_TYPE_LMB
+        && !drc->unplug_requested) {
+        return RTAS_OUT_HW_ERROR;
+    }
+
+    drc->state = SPAPR_DRC_STATE_LOGICAL_AVAILABLE;
+
+    return RTAS_OUT_SUCCESS;
+}
+
+static uint32_t drc_unisolate_logical(SpaprDrc *drc)
+{
+    SpaprMachineState *spapr = NULL;
+
+    switch (drc->state) {
+    case SPAPR_DRC_STATE_LOGICAL_UNISOLATE:
+    case SPAPR_DRC_STATE_LOGICAL_CONFIGURED:
+        /*
+         * Unisolating a logical DRC that was marked for unplug
+         * means that the kernel is refusing the removal.
+         */
+        if (drc->unplug_requested && drc->dev) {
+            if (spapr_drc_type(drc) == SPAPR_DR_CONNECTOR_TYPE_LMB) {
+                spapr = SPAPR_MACHINE(qdev_get_machine());
+
+                spapr_memory_unplug_rollback(spapr, drc->dev);
+            }
+
+            drc->unplug_requested = false;
+
+            if (drc->dev->id) {
+                error_report("Device hotunplug rejected by the guest "
+                             "for device %s", drc->dev->id);
+            }
+
+            qapi_event_send_device_unplug_guest_error(!!drc->dev->id,
+                                                      drc->dev->id,
+                                                      drc->dev->canonical_path);
+        }
+
+        return RTAS_OUT_SUCCESS; /* Nothing to do */
+    case SPAPR_DRC_STATE_LOGICAL_AVAILABLE:
+        break; /* see below */
+    case SPAPR_DRC_STATE_LOGICAL_UNUSABLE:
+        return RTAS_OUT_NO_SUCH_INDICATOR; /* not allowed */
+    default:
+        g_assert_not_reached();
+    }
+
+    /* Move to AVAILABLE state should have ensured device was present */
+    g_assert(drc->dev);
+
+    drc->state = SPAPR_DRC_STATE_LOGICAL_UNISOLATE;
+    drc->ccs_offset = drc->fdt_start_offset;
+    drc->ccs_depth = 0;
+
+    return RTAS_OUT_SUCCESS;
+}
+
+static uint32_t drc_set_usable(SpaprDrc *drc)
+{
+    switch (drc->state) {
+    case SPAPR_DRC_STATE_LOGICAL_AVAILABLE:
+    case SPAPR_DRC_STATE_LOGICAL_UNISOLATE:
+    case SPAPR_DRC_STATE_LOGICAL_CONFIGURED:
+        return RTAS_OUT_SUCCESS; /* Nothing to do */
+    case SPAPR_DRC_STATE_LOGICAL_UNUSABLE:
+        break; /* see below */
+    default:
+        g_assert_not_reached();
+    }
+
+    /* if there's no resource/device associated with the DRC, there's
+     * no way for us to put it in an allocation state consistent with
+     * being 'USABLE'. PAPR 2.7, 13.5.3.4 documents that this should
+     * result in an RTAS return code of -3 / "no such indicator"
+     */
+    if (!drc->dev) {
+        return RTAS_OUT_NO_SUCH_INDICATOR;
+    }
+    if (drc->unplug_requested) {
+        /* Don't allow the guest to move a device away from UNUSABLE
+         * state when we want to unplug it */
+        return RTAS_OUT_NO_SUCH_INDICATOR;
+    }
+
+    drc->state = SPAPR_DRC_STATE_LOGICAL_AVAILABLE;
+
+    return RTAS_OUT_SUCCESS;
+}
+
+static uint32_t drc_set_unusable(SpaprDrc *drc)
+{
+    switch (drc->state) {
+    case SPAPR_DRC_STATE_LOGICAL_UNUSABLE:
+        return RTAS_OUT_SUCCESS; /* Nothing to do */
+    case SPAPR_DRC_STATE_LOGICAL_AVAILABLE:
+        break; /* see below */
+    case SPAPR_DRC_STATE_LOGICAL_UNISOLATE:
+    case SPAPR_DRC_STATE_LOGICAL_CONFIGURED:
+        return RTAS_OUT_NO_SUCH_INDICATOR; /* not allowed */
+    default:
+        g_assert_not_reached();
+    }
+
+    drc->state = SPAPR_DRC_STATE_LOGICAL_UNUSABLE;
+    if (drc->unplug_requested) {
+        uint32_t drc_index = spapr_drc_index(drc);
+        trace_spapr_drc_set_allocation_state_finalizing(drc_index);
+        spapr_drc_release(drc);
+    }
+
+    return RTAS_OUT_SUCCESS;
+}
+
+static char *spapr_drc_name(SpaprDrc *drc)
+{
+    SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+
+    /* human-readable name for a DRC to encode into the DT
+     * description. this is mainly only used within a guest in place
+     * of the unique DRC index.
+     *
+     * in the case of VIO/PCI devices, it corresponds to a "location
+     * code" that maps a logical device/function (DRC index) to a
+     * physical (or virtual in the case of VIO) location in the system
+     * by chaining together the "location label" for each
+     * encapsulating component.
+     *
+     * since this is more to do with diagnosing physical hardware
+     * issues than guest compatibility, we choose location codes/DRC
+     * names that adhere to the documented format, but avoid encoding
+     * the entire topology information into the label/code, instead
+     * just using the location codes based on the labels for the
+     * endpoints (VIO/PCI adaptor connectors), which is basically just
+     * "C" followed by an integer ID.
+     *
+     * DRC names as documented by PAPR+ v2.7, 13.5.2.4
+     * location codes as documented by PAPR+ v2.7, 12.3.1.5
+     */
+    return g_strdup_printf("%s%d", drck->drc_name_prefix, drc->id);
+}
+
+/*
+ * dr-entity-sense sensor value
+ * returned via get-sensor-state RTAS calls
+ * as expected by state diagram in PAPR+ 2.7, 13.4
+ * based on the current allocation/indicator/power states
+ * for the DR connector.
+ */
+static SpaprDREntitySense physical_entity_sense(SpaprDrc *drc)
+{
+    /* this assumes all PCI devices are assigned to a 'live insertion'
+     * power domain, where QEMU manages power state automatically as
+     * opposed to the guest. present, non-PCI resources are unaffected
+     * by power state.
+     */
+    return drc->dev ? SPAPR_DR_ENTITY_SENSE_PRESENT
+        : SPAPR_DR_ENTITY_SENSE_EMPTY;
+}
+
+static SpaprDREntitySense logical_entity_sense(SpaprDrc *drc)
+{
+    switch (drc->state) {
+    case SPAPR_DRC_STATE_LOGICAL_UNUSABLE:
+        return SPAPR_DR_ENTITY_SENSE_UNUSABLE;
+    case SPAPR_DRC_STATE_LOGICAL_AVAILABLE:
+    case SPAPR_DRC_STATE_LOGICAL_UNISOLATE:
+    case SPAPR_DRC_STATE_LOGICAL_CONFIGURED:
+        g_assert(drc->dev);
+        return SPAPR_DR_ENTITY_SENSE_PRESENT;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void prop_get_index(Object *obj, Visitor *v, const char *name,
+                           void *opaque, Error **errp)
+{
+    SpaprDrc *drc = SPAPR_DR_CONNECTOR(obj);
+    uint32_t value = spapr_drc_index(drc);
+    visit_type_uint32(v, name, &value, errp);
+}
+
+static void prop_get_fdt(Object *obj, Visitor *v, const char *name,
+                         void *opaque, Error **errp)
+{
+    SpaprDrc *drc = SPAPR_DR_CONNECTOR(obj);
+    QNull *null = NULL;
+    int fdt_offset_next, fdt_offset, fdt_depth;
+    void *fdt;
+
+    if (!drc->fdt) {
+        visit_type_null(v, NULL, &null, errp);
+        qobject_unref(null);
+        return;
+    }
+
+    fdt = drc->fdt;
+    fdt_offset = drc->fdt_start_offset;
+    fdt_depth = 0;
+
+    do {
+        const char *name = NULL;
+        const struct fdt_property *prop = NULL;
+        int prop_len = 0, name_len = 0;
+        uint32_t tag;
+        bool ok;
+
+        tag = fdt_next_tag(fdt, fdt_offset, &fdt_offset_next);
+        switch (tag) {
+        case FDT_BEGIN_NODE:
+            fdt_depth++;
+            name = fdt_get_name(fdt, fdt_offset, &name_len);
+            if (!visit_start_struct(v, name, NULL, 0, errp)) {
+                return;
+            }
+            break;
+        case FDT_END_NODE:
+            /* shouldn't ever see an FDT_END_NODE before FDT_BEGIN_NODE */
+            g_assert(fdt_depth > 0);
+            ok = visit_check_struct(v, errp);
+            visit_end_struct(v, NULL);
+            if (!ok) {
+                return;
+            }
+            fdt_depth--;
+            break;
+        case FDT_PROP: {
+            int i;
+            prop = fdt_get_property_by_offset(fdt, fdt_offset, &prop_len);
+            name = fdt_string(fdt, fdt32_to_cpu(prop->nameoff));
+            if (!visit_start_list(v, name, NULL, 0, errp)) {
+                return;
+            }
+            for (i = 0; i < prop_len; i++) {
+                if (!visit_type_uint8(v, NULL, (uint8_t *)&prop->data[i],
+                                      errp)) {
+                    return;
+                }
+            }
+            ok = visit_check_list(v, errp);
+            visit_end_list(v, NULL);
+            if (!ok) {
+                return;
+            }
+            break;
+        }
+        default:
+            error_report("device FDT in unexpected state: %d", tag);
+            abort();
+        }
+        fdt_offset = fdt_offset_next;
+    } while (fdt_depth != 0);
+}
+
+void spapr_drc_attach(SpaprDrc *drc, DeviceState *d)
+{
+    trace_spapr_drc_attach(spapr_drc_index(drc));
+
+    g_assert(!drc->dev);
+    g_assert((drc->state == SPAPR_DRC_STATE_LOGICAL_UNUSABLE)
+             || (drc->state == SPAPR_DRC_STATE_PHYSICAL_POWERON));
+
+    drc->dev = d;
+
+    object_property_add_link(OBJECT(drc), "device",
+                             object_get_typename(OBJECT(drc->dev)),
+                             (Object **)(&drc->dev),
+                             NULL, 0);
+}
+
+void spapr_drc_unplug_request(SpaprDrc *drc)
+{
+    SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+
+    trace_spapr_drc_unplug_request(spapr_drc_index(drc));
+
+    g_assert(drc->dev);
+
+    drc->unplug_requested = true;
+
+    if (drc->state != drck->empty_state) {
+        trace_spapr_drc_awaiting_quiesce(spapr_drc_index(drc));
+        return;
+    }
+
+    spapr_drc_release(drc);
+}
+
+bool spapr_drc_reset(SpaprDrc *drc)
+{
+    SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+    bool unplug_completed = false;
+
+    trace_spapr_drc_reset(spapr_drc_index(drc));
+
+    /* immediately upon reset we can safely assume DRCs whose devices
+     * are pending removal can be safely removed.
+     */
+    if (drc->unplug_requested) {
+        spapr_drc_release(drc);
+        unplug_completed = true;
+    }
+
+    if (drc->dev) {
+        /* A device present at reset is ready to go, same as coldplugged */
+        drc->state = drck->ready_state;
+        /*
+         * Ensure that we are able to send the FDT fragment again
+         * via configure-connector call if the guest requests.
+         */
+        drc->ccs_offset = drc->fdt_start_offset;
+        drc->ccs_depth = 0;
+    } else {
+        drc->state = drck->empty_state;
+        drc->ccs_offset = -1;
+        drc->ccs_depth = -1;
+    }
+
+    return unplug_completed;
+}
+
+static bool spapr_drc_unplug_requested_needed(void *opaque)
+{
+    return spapr_drc_unplug_requested(opaque);
+}
+
+static const VMStateDescription vmstate_spapr_drc_unplug_requested = {
+    .name = "spapr_drc/unplug_requested",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = spapr_drc_unplug_requested_needed,
+    .fields  = (VMStateField []) {
+        VMSTATE_BOOL(unplug_requested, SpaprDrc),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static bool spapr_drc_needed(void *opaque)
+{
+    SpaprDrc *drc = opaque;
+    SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+
+    /*
+     * If no dev is plugged in there is no need to migrate the DRC state
+     * nor to reset the DRC at CAS.
+     */
+    if (!drc->dev) {
+        return false;
+    }
+
+    /*
+     * We need to reset the DRC at CAS or to migrate the DRC state if it's
+     * not equal to the expected long-term state, which is the same as the
+     * coldplugged initial state, or if an unplug request is pending.
+     */
+    return drc->state != drck->ready_state ||
+        spapr_drc_unplug_requested(drc);
+}
+
+static const VMStateDescription vmstate_spapr_drc = {
+    .name = "spapr_drc",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = spapr_drc_needed,
+    .fields  = (VMStateField []) {
+        VMSTATE_UINT32(state, SpaprDrc),
+        VMSTATE_END_OF_LIST()
+    },
+    .subsections = (const VMStateDescription * []) {
+        &vmstate_spapr_drc_unplug_requested,
+        NULL
+    }
+};
+
+static void drc_realize(DeviceState *d, Error **errp)
+{
+    SpaprDrc *drc = SPAPR_DR_CONNECTOR(d);
+    Object *root_container;
+    gchar *link_name;
+    const char *child_name;
+
+    trace_spapr_drc_realize(spapr_drc_index(drc));
+    /* NOTE: we do this as part of realize/unrealize due to the fact
+     * that the guest will communicate with the DRC via RTAS calls
+     * referencing the global DRC index. By unlinking the DRC
+     * from DRC_CONTAINER_PATH/<drc_index> we effectively make it
+     * inaccessible by the guest, since lookups rely on this path
+     * existing in the composition tree
+     */
+    root_container = container_get(object_get_root(), DRC_CONTAINER_PATH);
+    link_name = g_strdup_printf("%x", spapr_drc_index(drc));
+    child_name = object_get_canonical_path_component(OBJECT(drc));
+    trace_spapr_drc_realize_child(spapr_drc_index(drc), child_name);
+    object_property_add_alias(root_container, link_name,
+                              drc->owner, child_name);
+    g_free(link_name);
+    vmstate_register(VMSTATE_IF(drc), spapr_drc_index(drc), &vmstate_spapr_drc,
+                     drc);
+    trace_spapr_drc_realize_complete(spapr_drc_index(drc));
+}
+
+static void drc_unrealize(DeviceState *d)
+{
+    SpaprDrc *drc = SPAPR_DR_CONNECTOR(d);
+    Object *root_container;
+    gchar *name;
+
+    trace_spapr_drc_unrealize(spapr_drc_index(drc));
+    vmstate_unregister(VMSTATE_IF(drc), &vmstate_spapr_drc, drc);
+    root_container = container_get(object_get_root(), DRC_CONTAINER_PATH);
+    name = g_strdup_printf("%x", spapr_drc_index(drc));
+    object_property_del(root_container, name);
+    g_free(name);
+}
+
+SpaprDrc *spapr_dr_connector_new(Object *owner, const char *type,
+                                         uint32_t id)
+{
+    SpaprDrc *drc = SPAPR_DR_CONNECTOR(object_new(type));
+    char *prop_name;
+
+    drc->id = id;
+    drc->owner = owner;
+    prop_name = g_strdup_printf("dr-connector[%"PRIu32"]",
+                                spapr_drc_index(drc));
+    object_property_add_child(owner, prop_name, OBJECT(drc));
+    object_unref(OBJECT(drc));
+    qdev_realize(DEVICE(drc), NULL, NULL);
+    g_free(prop_name);
+
+    return drc;
+}
+
+static void spapr_dr_connector_instance_init(Object *obj)
+{
+    SpaprDrc *drc = SPAPR_DR_CONNECTOR(obj);
+    SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+
+    object_property_add_uint32_ptr(obj, "id", &drc->id, OBJ_PROP_FLAG_READ);
+    object_property_add(obj, "index", "uint32", prop_get_index,
+                        NULL, NULL, NULL);
+    object_property_add(obj, "fdt", "struct", prop_get_fdt,
+                        NULL, NULL, NULL);
+    drc->state = drck->empty_state;
+}
+
+static void spapr_dr_connector_class_init(ObjectClass *k, void *data)
+{
+    DeviceClass *dk = DEVICE_CLASS(k);
+
+    dk->realize = drc_realize;
+    dk->unrealize = drc_unrealize;
+    /*
+     * Reason: DR connector needs to be wired to either the machine or to a
+     * PHB in spapr_dr_connector_new().
+     */
+    dk->user_creatable = false;
+}
+
+static bool drc_physical_needed(void *opaque)
+{
+    SpaprDrcPhysical *drcp = (SpaprDrcPhysical *)opaque;
+    SpaprDrc *drc = SPAPR_DR_CONNECTOR(drcp);
+
+    if ((drc->dev && (drcp->dr_indicator == SPAPR_DR_INDICATOR_ACTIVE))
+        || (!drc->dev && (drcp->dr_indicator == SPAPR_DR_INDICATOR_INACTIVE))) {
+        return false;
+    }
+    return true;
+}
+
+static const VMStateDescription vmstate_spapr_drc_physical = {
+    .name = "spapr_drc/physical",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = drc_physical_needed,
+    .fields  = (VMStateField []) {
+        VMSTATE_UINT32(dr_indicator, SpaprDrcPhysical),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void drc_physical_reset(void *opaque)
+{
+    SpaprDrc *drc = SPAPR_DR_CONNECTOR(opaque);
+    SpaprDrcPhysical *drcp = SPAPR_DRC_PHYSICAL(drc);
+
+    if (drc->dev) {
+        drcp->dr_indicator = SPAPR_DR_INDICATOR_ACTIVE;
+    } else {
+        drcp->dr_indicator = SPAPR_DR_INDICATOR_INACTIVE;
+    }
+}
+
+static void realize_physical(DeviceState *d, Error **errp)
+{
+    SpaprDrcPhysical *drcp = SPAPR_DRC_PHYSICAL(d);
+    Error *local_err = NULL;
+
+    drc_realize(d, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    vmstate_register(VMSTATE_IF(drcp),
+                     spapr_drc_index(SPAPR_DR_CONNECTOR(drcp)),
+                     &vmstate_spapr_drc_physical, drcp);
+    qemu_register_reset(drc_physical_reset, drcp);
+}
+
+static void unrealize_physical(DeviceState *d)
+{
+    SpaprDrcPhysical *drcp = SPAPR_DRC_PHYSICAL(d);
+
+    drc_unrealize(d);
+    vmstate_unregister(VMSTATE_IF(drcp), &vmstate_spapr_drc_physical, drcp);
+    qemu_unregister_reset(drc_physical_reset, drcp);
+}
+
+static void spapr_drc_physical_class_init(ObjectClass *k, void *data)
+{
+    DeviceClass *dk = DEVICE_CLASS(k);
+    SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_CLASS(k);
+
+    dk->realize = realize_physical;
+    dk->unrealize = unrealize_physical;
+    drck->dr_entity_sense = physical_entity_sense;
+    drck->isolate = drc_isolate_physical;
+    drck->unisolate = drc_unisolate_physical;
+    drck->ready_state = SPAPR_DRC_STATE_PHYSICAL_CONFIGURED;
+    drck->empty_state = SPAPR_DRC_STATE_PHYSICAL_POWERON;
+}
+
+static void spapr_drc_logical_class_init(ObjectClass *k, void *data)
+{
+    SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_CLASS(k);
+
+    drck->dr_entity_sense = logical_entity_sense;
+    drck->isolate = drc_isolate_logical;
+    drck->unisolate = drc_unisolate_logical;
+    drck->ready_state = SPAPR_DRC_STATE_LOGICAL_CONFIGURED;
+    drck->empty_state = SPAPR_DRC_STATE_LOGICAL_UNUSABLE;
+}
+
+static void spapr_drc_cpu_class_init(ObjectClass *k, void *data)
+{
+    SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_CLASS(k);
+
+    drck->typeshift = SPAPR_DR_CONNECTOR_TYPE_SHIFT_CPU;
+    drck->typename = "CPU";
+    drck->drc_name_prefix = "CPU ";
+    drck->release = spapr_core_release;
+    drck->dt_populate = spapr_core_dt_populate;
+}
+
+static void spapr_drc_pci_class_init(ObjectClass *k, void *data)
+{
+    SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_CLASS(k);
+
+    drck->typeshift = SPAPR_DR_CONNECTOR_TYPE_SHIFT_PCI;
+    drck->typename = "28";
+    drck->drc_name_prefix = "C";
+    drck->release = spapr_phb_remove_pci_device_cb;
+    drck->dt_populate = spapr_pci_dt_populate;
+}
+
+static void spapr_drc_lmb_class_init(ObjectClass *k, void *data)
+{
+    SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_CLASS(k);
+
+    drck->typeshift = SPAPR_DR_CONNECTOR_TYPE_SHIFT_LMB;
+    drck->typename = "MEM";
+    drck->drc_name_prefix = "LMB ";
+    drck->release = spapr_lmb_release;
+    drck->dt_populate = spapr_lmb_dt_populate;
+}
+
+static void spapr_drc_phb_class_init(ObjectClass *k, void *data)
+{
+    SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_CLASS(k);
+
+    drck->typeshift = SPAPR_DR_CONNECTOR_TYPE_SHIFT_PHB;
+    drck->typename = "PHB";
+    drck->drc_name_prefix = "PHB ";
+    drck->release = spapr_phb_release;
+    drck->dt_populate = spapr_phb_dt_populate;
+}
+
+static void spapr_drc_pmem_class_init(ObjectClass *k, void *data)
+{
+    SpaprDrcClass *drck = SPAPR_DR_CONNECTOR_CLASS(k);
+
+    drck->typeshift = SPAPR_DR_CONNECTOR_TYPE_SHIFT_PMEM;
+    drck->typename = "PMEM";
+    drck->drc_name_prefix = "PMEM ";
+    drck->release = NULL;
+    drck->dt_populate = spapr_pmem_dt_populate;
+}
+
+static const TypeInfo spapr_dr_connector_info = {
+    .name          = TYPE_SPAPR_DR_CONNECTOR,
+    .parent        = TYPE_DEVICE,
+    .instance_size = sizeof(SpaprDrc),
+    .instance_init = spapr_dr_connector_instance_init,
+    .class_size    = sizeof(SpaprDrcClass),
+    .class_init    = spapr_dr_connector_class_init,
+    .abstract      = true,
+};
+
+static const TypeInfo spapr_drc_physical_info = {
+    .name          = TYPE_SPAPR_DRC_PHYSICAL,
+    .parent        = TYPE_SPAPR_DR_CONNECTOR,
+    .instance_size = sizeof(SpaprDrcPhysical),
+    .class_init    = spapr_drc_physical_class_init,
+    .abstract      = true,
+};
+
+static const TypeInfo spapr_drc_logical_info = {
+    .name          = TYPE_SPAPR_DRC_LOGICAL,
+    .parent        = TYPE_SPAPR_DR_CONNECTOR,
+    .class_init    = spapr_drc_logical_class_init,
+    .abstract      = true,
+};
+
+static const TypeInfo spapr_drc_cpu_info = {
+    .name          = TYPE_SPAPR_DRC_CPU,
+    .parent        = TYPE_SPAPR_DRC_LOGICAL,
+    .class_init    = spapr_drc_cpu_class_init,
+};
+
+static const TypeInfo spapr_drc_pci_info = {
+    .name          = TYPE_SPAPR_DRC_PCI,
+    .parent        = TYPE_SPAPR_DRC_PHYSICAL,
+    .class_init    = spapr_drc_pci_class_init,
+};
+
+static const TypeInfo spapr_drc_lmb_info = {
+    .name          = TYPE_SPAPR_DRC_LMB,
+    .parent        = TYPE_SPAPR_DRC_LOGICAL,
+    .class_init    = spapr_drc_lmb_class_init,
+};
+
+static const TypeInfo spapr_drc_phb_info = {
+    .name          = TYPE_SPAPR_DRC_PHB,
+    .parent        = TYPE_SPAPR_DRC_LOGICAL,
+    .instance_size = sizeof(SpaprDrc),
+    .class_init    = spapr_drc_phb_class_init,
+};
+
+static const TypeInfo spapr_drc_pmem_info = {
+    .name          = TYPE_SPAPR_DRC_PMEM,
+    .parent        = TYPE_SPAPR_DRC_LOGICAL,
+    .class_init    = spapr_drc_pmem_class_init,
+};
+
+/* helper functions for external users */
+
+SpaprDrc *spapr_drc_by_index(uint32_t index)
+{
+    Object *obj;
+    gchar *name;
+
+    name = g_strdup_printf("%s/%x", DRC_CONTAINER_PATH, index);
+    obj = object_resolve_path(name, NULL);
+    g_free(name);
+
+    return !obj ? NULL : SPAPR_DR_CONNECTOR(obj);
+}
+
+SpaprDrc *spapr_drc_by_id(const char *type, uint32_t id)
+{
+    SpaprDrcClass *drck
+        = SPAPR_DR_CONNECTOR_CLASS(object_class_by_name(type));
+
+    return spapr_drc_by_index(drck->typeshift << DRC_INDEX_TYPE_SHIFT
+                              | (id & DRC_INDEX_ID_MASK));
+}
+
+/**
+ * spapr_dt_drc
+ *
+ * @fdt: libfdt device tree
+ * @path: path in the DT to generate properties
+ * @owner: parent Object/DeviceState for which to generate DRC
+ *         descriptions for
+ * @drc_type_mask: mask of SpaprDrcType values corresponding
+ *   to the types of DRCs to generate entries for
+ *
+ * generate OF properties to describe DRC topology/indices to guests
+ *
+ * as documented in PAPR+ v2.1, 13.5.2
+ */
+int spapr_dt_drc(void *fdt, int offset, Object *owner, uint32_t drc_type_mask)
+{
+    Object *root_container;
+    ObjectProperty *prop;
+    ObjectPropertyIterator iter;
+    uint32_t drc_count = 0;
+    GArray *drc_indexes, *drc_power_domains;
+    GString *drc_names, *drc_types;
+    int ret;
+
+    /*
+     * This should really be only called once per node since it overwrites
+     * the OF properties if they already exist.
+     */
+    g_assert(!fdt_get_property(fdt, offset, "ibm,drc-indexes", NULL));
+
+    /* the first entry of each properties is a 32-bit integer encoding
+     * the number of elements in the array. we won't know this until
+     * we complete the iteration through all the matching DRCs, but
+     * reserve the space now and set the offsets accordingly so we
+     * can fill them in later.
+     */
+    drc_indexes = g_array_new(false, true, sizeof(uint32_t));
+    drc_indexes = g_array_set_size(drc_indexes, 1);
+    drc_power_domains = g_array_new(false, true, sizeof(uint32_t));
+    drc_power_domains = g_array_set_size(drc_power_domains, 1);
+    drc_names = g_string_set_size(g_string_new(NULL), sizeof(uint32_t));
+    drc_types = g_string_set_size(g_string_new(NULL), sizeof(uint32_t));
+
+    /* aliases for all DRConnector objects will be rooted in QOM
+     * composition tree at DRC_CONTAINER_PATH
+     */
+    root_container = container_get(object_get_root(), DRC_CONTAINER_PATH);
+
+    object_property_iter_init(&iter, root_container);
+    while ((prop = object_property_iter_next(&iter))) {
+        Object *obj;
+        SpaprDrc *drc;
+        SpaprDrcClass *drck;
+        char *drc_name = NULL;
+        uint32_t drc_index, drc_power_domain;
+
+        if (!strstart(prop->type, "link<", NULL)) {
+            continue;
+        }
+
+        obj = object_property_get_link(root_container, prop->name,
+                                       &error_abort);
+        drc = SPAPR_DR_CONNECTOR(obj);
+        drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+
+        if (owner && (drc->owner != owner)) {
+            continue;
+        }
+
+        if ((spapr_drc_type(drc) & drc_type_mask) == 0) {
+            continue;
+        }
+
+        drc_count++;
+
+        /* ibm,drc-indexes */
+        drc_index = cpu_to_be32(spapr_drc_index(drc));
+        g_array_append_val(drc_indexes, drc_index);
+
+        /* ibm,drc-power-domains */
+        drc_power_domain = cpu_to_be32(-1);
+        g_array_append_val(drc_power_domains, drc_power_domain);
+
+        /* ibm,drc-names */
+        drc_name = spapr_drc_name(drc);
+        drc_names = g_string_append(drc_names, drc_name);
+        drc_names = g_string_insert_len(drc_names, -1, "\0", 1);
+        g_free(drc_name);
+
+        /* ibm,drc-types */
+        drc_types = g_string_append(drc_types, drck->typename);
+        drc_types = g_string_insert_len(drc_types, -1, "\0", 1);
+    }
+
+    /* now write the drc count into the space we reserved at the
+     * beginning of the arrays previously
+     */
+    *(uint32_t *)drc_indexes->data = cpu_to_be32(drc_count);
+    *(uint32_t *)drc_power_domains->data = cpu_to_be32(drc_count);
+    *(uint32_t *)drc_names->str = cpu_to_be32(drc_count);
+    *(uint32_t *)drc_types->str = cpu_to_be32(drc_count);
+
+    ret = fdt_setprop(fdt, offset, "ibm,drc-indexes",
+                      drc_indexes->data,
+                      drc_indexes->len * sizeof(uint32_t));
+    if (ret) {
+        error_report("Couldn't create ibm,drc-indexes property");
+        goto out;
+    }
+
+    ret = fdt_setprop(fdt, offset, "ibm,drc-power-domains",
+                      drc_power_domains->data,
+                      drc_power_domains->len * sizeof(uint32_t));
+    if (ret) {
+        error_report("Couldn't finalize ibm,drc-power-domains property");
+        goto out;
+    }
+
+    ret = fdt_setprop(fdt, offset, "ibm,drc-names",
+                      drc_names->str, drc_names->len);
+    if (ret) {
+        error_report("Couldn't finalize ibm,drc-names property");
+        goto out;
+    }
+
+    ret = fdt_setprop(fdt, offset, "ibm,drc-types",
+                      drc_types->str, drc_types->len);
+    if (ret) {
+        error_report("Couldn't finalize ibm,drc-types property");
+        goto out;
+    }
+
+out:
+    g_array_free(drc_indexes, true);
+    g_array_free(drc_power_domains, true);
+    g_string_free(drc_names, true);
+    g_string_free(drc_types, true);
+
+    return ret;
+}
+
+void spapr_drc_reset_all(SpaprMachineState *spapr)
+{
+    Object *drc_container;
+    ObjectProperty *prop;
+    ObjectPropertyIterator iter;
+
+    drc_container = container_get(object_get_root(), DRC_CONTAINER_PATH);
+restart:
+    object_property_iter_init(&iter, drc_container);
+    while ((prop = object_property_iter_next(&iter))) {
+        SpaprDrc *drc;
+
+        if (!strstart(prop->type, "link<", NULL)) {
+            continue;
+        }
+        drc = SPAPR_DR_CONNECTOR(object_property_get_link(drc_container,
+                                                          prop->name,
+                                                          &error_abort));
+
+        /*
+         * This will complete any pending plug/unplug requests.
+         * In case of a unplugged PHB or PCI bridge, this will
+         * cause some DRCs to be destroyed and thus potentially
+         * invalidate the iterator.
+         */
+        if (spapr_drc_reset(drc)) {
+            goto restart;
+        }
+    }
+}
+
+/*
+ * RTAS calls
+ */
+
+static uint32_t rtas_set_isolation_state(uint32_t idx, uint32_t state)
+{
+    SpaprDrc *drc = spapr_drc_by_index(idx);
+    SpaprDrcClass *drck;
+
+    if (!drc) {
+        return RTAS_OUT_NO_SUCH_INDICATOR;
+    }
+
+    trace_spapr_drc_set_isolation_state(spapr_drc_index(drc), state);
+
+    drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+
+    switch (state) {
+    case SPAPR_DR_ISOLATION_STATE_ISOLATED:
+        return drck->isolate(drc);
+
+    case SPAPR_DR_ISOLATION_STATE_UNISOLATED:
+        return drck->unisolate(drc);
+
+    default:
+        return RTAS_OUT_PARAM_ERROR;
+    }
+}
+
+static uint32_t rtas_set_allocation_state(uint32_t idx, uint32_t state)
+{
+    SpaprDrc *drc = spapr_drc_by_index(idx);
+
+    if (!drc || !object_dynamic_cast(OBJECT(drc), TYPE_SPAPR_DRC_LOGICAL)) {
+        return RTAS_OUT_NO_SUCH_INDICATOR;
+    }
+
+    trace_spapr_drc_set_allocation_state(spapr_drc_index(drc), state);
+
+    switch (state) {
+    case SPAPR_DR_ALLOCATION_STATE_USABLE:
+        return drc_set_usable(drc);
+
+    case SPAPR_DR_ALLOCATION_STATE_UNUSABLE:
+        return drc_set_unusable(drc);
+
+    default:
+        return RTAS_OUT_PARAM_ERROR;
+    }
+}
+
+static uint32_t rtas_set_dr_indicator(uint32_t idx, uint32_t state)
+{
+    SpaprDrc *drc = spapr_drc_by_index(idx);
+
+    if (!drc || !object_dynamic_cast(OBJECT(drc), TYPE_SPAPR_DRC_PHYSICAL)) {
+        return RTAS_OUT_NO_SUCH_INDICATOR;
+    }
+    if ((state != SPAPR_DR_INDICATOR_INACTIVE)
+        && (state != SPAPR_DR_INDICATOR_ACTIVE)
+        && (state != SPAPR_DR_INDICATOR_IDENTIFY)
+        && (state != SPAPR_DR_INDICATOR_ACTION)) {
+        return RTAS_OUT_PARAM_ERROR; /* bad state parameter */
+    }
+
+    trace_spapr_drc_set_dr_indicator(idx, state);
+    SPAPR_DRC_PHYSICAL(drc)->dr_indicator = state;
+    return RTAS_OUT_SUCCESS;
+}
+
+static void rtas_set_indicator(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                               uint32_t token,
+                               uint32_t nargs, target_ulong args,
+                               uint32_t nret, target_ulong rets)
+{
+    uint32_t type, idx, state;
+    uint32_t ret = RTAS_OUT_SUCCESS;
+
+    if (nargs != 3 || nret != 1) {
+        ret = RTAS_OUT_PARAM_ERROR;
+        goto out;
+    }
+
+    type = rtas_ld(args, 0);
+    idx = rtas_ld(args, 1);
+    state = rtas_ld(args, 2);
+
+    switch (type) {
+    case RTAS_SENSOR_TYPE_ISOLATION_STATE:
+        ret = rtas_set_isolation_state(idx, state);
+        break;
+    case RTAS_SENSOR_TYPE_DR:
+        ret = rtas_set_dr_indicator(idx, state);
+        break;
+    case RTAS_SENSOR_TYPE_ALLOCATION_STATE:
+        ret = rtas_set_allocation_state(idx, state);
+        break;
+    default:
+        ret = RTAS_OUT_NOT_SUPPORTED;
+    }
+
+out:
+    rtas_st(rets, 0, ret);
+}
+
+static void rtas_get_sensor_state(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                                  uint32_t token, uint32_t nargs,
+                                  target_ulong args, uint32_t nret,
+                                  target_ulong rets)
+{
+    uint32_t sensor_type;
+    uint32_t sensor_index;
+    uint32_t sensor_state = 0;
+    SpaprDrc *drc;
+    SpaprDrcClass *drck;
+    uint32_t ret = RTAS_OUT_SUCCESS;
+
+    if (nargs != 2 || nret != 2) {
+        ret = RTAS_OUT_PARAM_ERROR;
+        goto out;
+    }
+
+    sensor_type = rtas_ld(args, 0);
+    sensor_index = rtas_ld(args, 1);
+
+    if (sensor_type != RTAS_SENSOR_TYPE_ENTITY_SENSE) {
+        /* currently only DR-related sensors are implemented */
+        trace_spapr_rtas_get_sensor_state_not_supported(sensor_index,
+                                                        sensor_type);
+        ret = RTAS_OUT_NOT_SUPPORTED;
+        goto out;
+    }
+
+    drc = spapr_drc_by_index(sensor_index);
+    if (!drc) {
+        trace_spapr_rtas_get_sensor_state_invalid(sensor_index);
+        ret = RTAS_OUT_PARAM_ERROR;
+        goto out;
+    }
+    drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+    sensor_state = drck->dr_entity_sense(drc);
+
+out:
+    rtas_st(rets, 0, ret);
+    rtas_st(rets, 1, sensor_state);
+}
+
+/* configure-connector work area offsets, int32_t units for field
+ * indexes, bytes for field offset/len values.
+ *
+ * as documented by PAPR+ v2.7, 13.5.3.5
+ */
+#define CC_IDX_NODE_NAME_OFFSET 2
+#define CC_IDX_PROP_NAME_OFFSET 2
+#define CC_IDX_PROP_LEN 3
+#define CC_IDX_PROP_DATA_OFFSET 4
+#define CC_VAL_DATA_OFFSET ((CC_IDX_PROP_DATA_OFFSET + 1) * 4)
+#define CC_WA_LEN 4096
+
+static void configure_connector_st(target_ulong addr, target_ulong offset,
+                                   const void *buf, size_t len)
+{
+    cpu_physical_memory_write(ppc64_phys_to_real(addr + offset),
+                              buf, MIN(len, CC_WA_LEN - offset));
+}
+
+static void rtas_ibm_configure_connector(PowerPCCPU *cpu,
+                                         SpaprMachineState *spapr,
+                                         uint32_t token, uint32_t nargs,
+                                         target_ulong args, uint32_t nret,
+                                         target_ulong rets)
+{
+    uint64_t wa_addr;
+    uint64_t wa_offset;
+    uint32_t drc_index;
+    SpaprDrc *drc;
+    SpaprDrcClass *drck;
+    SpaprDRCCResponse resp = SPAPR_DR_CC_RESPONSE_CONTINUE;
+    int rc;
+
+    if (nargs != 2 || nret != 1) {
+        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+        return;
+    }
+
+    wa_addr = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 0);
+
+    drc_index = rtas_ld(wa_addr, 0);
+    drc = spapr_drc_by_index(drc_index);
+    if (!drc) {
+        trace_spapr_rtas_ibm_configure_connector_invalid(drc_index);
+        rc = RTAS_OUT_PARAM_ERROR;
+        goto out;
+    }
+
+    if ((drc->state != SPAPR_DRC_STATE_LOGICAL_UNISOLATE)
+        && (drc->state != SPAPR_DRC_STATE_PHYSICAL_UNISOLATE)
+        && (drc->state != SPAPR_DRC_STATE_LOGICAL_CONFIGURED)
+        && (drc->state != SPAPR_DRC_STATE_PHYSICAL_CONFIGURED)) {
+        /*
+         * Need to unisolate the device before configuring
+         * or it should already be in configured state to
+         * allow configure-connector be called repeatedly.
+         */
+        rc = SPAPR_DR_CC_RESPONSE_NOT_CONFIGURABLE;
+        goto out;
+    }
+
+    drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+
+    /*
+     * This indicates that the kernel is reconfiguring a LMB due to
+     * a failed hotunplug. Rollback the DIMM unplug process.
+     */
+    if (spapr_drc_type(drc) == SPAPR_DR_CONNECTOR_TYPE_LMB &&
+        drc->unplug_requested) {
+        spapr_memory_unplug_rollback(spapr, drc->dev);
+    }
+
+    if (!drc->fdt) {
+        void *fdt;
+        int fdt_size;
+
+        fdt = create_device_tree(&fdt_size);
+
+        if (drck->dt_populate(drc, spapr, fdt, &drc->fdt_start_offset,
+                              NULL)) {
+            g_free(fdt);
+            rc = SPAPR_DR_CC_RESPONSE_ERROR;
+            goto out;
+        }
+
+        drc->fdt = fdt;
+        drc->ccs_offset = drc->fdt_start_offset;
+        drc->ccs_depth = 0;
+    }
+
+    do {
+        uint32_t tag;
+        const char *name;
+        const struct fdt_property *prop;
+        int fdt_offset_next, prop_len;
+
+        tag = fdt_next_tag(drc->fdt, drc->ccs_offset, &fdt_offset_next);
+
+        switch (tag) {
+        case FDT_BEGIN_NODE:
+            drc->ccs_depth++;
+            name = fdt_get_name(drc->fdt, drc->ccs_offset, NULL);
+
+            /* provide the name of the next OF node */
+            wa_offset = CC_VAL_DATA_OFFSET;
+            rtas_st(wa_addr, CC_IDX_NODE_NAME_OFFSET, wa_offset);
+            configure_connector_st(wa_addr, wa_offset, name, strlen(name) + 1);
+            resp = SPAPR_DR_CC_RESPONSE_NEXT_CHILD;
+            break;
+        case FDT_END_NODE:
+            drc->ccs_depth--;
+            if (drc->ccs_depth == 0) {
+                uint32_t drc_index = spapr_drc_index(drc);
+
+                /* done sending the device tree, move to configured state */
+                trace_spapr_drc_set_configured(drc_index);
+                drc->state = drck->ready_state;
+                /*
+                 * Ensure that we are able to send the FDT fragment
+                 * again via configure-connector call if the guest requests.
+                 */
+                drc->ccs_offset = drc->fdt_start_offset;
+                drc->ccs_depth = 0;
+                fdt_offset_next = drc->fdt_start_offset;
+                resp = SPAPR_DR_CC_RESPONSE_SUCCESS;
+            } else {
+                resp = SPAPR_DR_CC_RESPONSE_PREV_PARENT;
+            }
+            break;
+        case FDT_PROP:
+            prop = fdt_get_property_by_offset(drc->fdt, drc->ccs_offset,
+                                              &prop_len);
+            name = fdt_string(drc->fdt, fdt32_to_cpu(prop->nameoff));
+
+            /* provide the name of the next OF property */
+            wa_offset = CC_VAL_DATA_OFFSET;
+            rtas_st(wa_addr, CC_IDX_PROP_NAME_OFFSET, wa_offset);
+            configure_connector_st(wa_addr, wa_offset, name, strlen(name) + 1);
+
+            /* provide the length and value of the OF property. data gets
+             * placed immediately after NULL terminator of the OF property's
+             * name string
+             */
+            wa_offset += strlen(name) + 1,
+            rtas_st(wa_addr, CC_IDX_PROP_LEN, prop_len);
+            rtas_st(wa_addr, CC_IDX_PROP_DATA_OFFSET, wa_offset);
+            configure_connector_st(wa_addr, wa_offset, prop->data, prop_len);
+            resp = SPAPR_DR_CC_RESPONSE_NEXT_PROPERTY;
+            break;
+        case FDT_END:
+            resp = SPAPR_DR_CC_RESPONSE_ERROR;
+        default:
+            /* keep seeking for an actionable tag */
+            break;
+        }
+        if (drc->ccs_offset >= 0) {
+            drc->ccs_offset = fdt_offset_next;
+        }
+    } while (resp == SPAPR_DR_CC_RESPONSE_CONTINUE);
+
+    rc = resp;
+out:
+    rtas_st(rets, 0, rc);
+}
+
+static void spapr_drc_register_types(void)
+{
+    type_register_static(&spapr_dr_connector_info);
+    type_register_static(&spapr_drc_physical_info);
+    type_register_static(&spapr_drc_logical_info);
+    type_register_static(&spapr_drc_cpu_info);
+    type_register_static(&spapr_drc_pci_info);
+    type_register_static(&spapr_drc_lmb_info);
+    type_register_static(&spapr_drc_phb_info);
+    type_register_static(&spapr_drc_pmem_info);
+
+    spapr_rtas_register(RTAS_SET_INDICATOR, "set-indicator",
+                        rtas_set_indicator);
+    spapr_rtas_register(RTAS_GET_SENSOR_STATE, "get-sensor-state",
+                        rtas_get_sensor_state);
+    spapr_rtas_register(RTAS_IBM_CONFIGURE_CONNECTOR, "ibm,configure-connector",
+                        rtas_ibm_configure_connector);
+}
+type_init(spapr_drc_register_types)
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
new file mode 100644
index 000000000..630e86282
--- /dev/null
+++ b/hw/ppc/spapr_events.c
@@ -0,0 +1,1082 @@
+/*
+ * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator
+ *
+ * RTAS events handling
+ *
+ * Copyright (c) 2012 David Gibson, IBM Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "sysemu/device_tree.h"
+#include "sysemu/runstate.h"
+
+#include "hw/ppc/fdt.h"
+#include "hw/ppc/spapr.h"
+#include "hw/ppc/spapr_vio.h"
+#include "hw/pci/pci.h"
+#include "hw/irq.h"
+#include "hw/pci-host/spapr.h"
+#include "hw/ppc/spapr_drc.h"
+#include "qemu/help_option.h"
+#include "qemu/bcd.h"
+#include "qemu/main-loop.h"
+#include "hw/ppc/spapr_ovec.h"
+#include <libfdt.h>
+#include "migration/blocker.h"
+
+#define RTAS_LOG_VERSION_MASK                   0xff000000
+#define   RTAS_LOG_VERSION_6                    0x06000000
+#define RTAS_LOG_SEVERITY_MASK                  0x00e00000
+#define   RTAS_LOG_SEVERITY_ALREADY_REPORTED    0x00c00000
+#define   RTAS_LOG_SEVERITY_FATAL               0x00a00000
+#define   RTAS_LOG_SEVERITY_ERROR               0x00800000
+#define   RTAS_LOG_SEVERITY_ERROR_SYNC          0x00600000
+#define   RTAS_LOG_SEVERITY_WARNING             0x00400000
+#define   RTAS_LOG_SEVERITY_EVENT               0x00200000
+#define   RTAS_LOG_SEVERITY_NO_ERROR            0x00000000
+#define RTAS_LOG_DISPOSITION_MASK               0x00180000
+#define   RTAS_LOG_DISPOSITION_FULLY_RECOVERED  0x00000000
+#define   RTAS_LOG_DISPOSITION_LIMITED_RECOVERY 0x00080000
+#define   RTAS_LOG_DISPOSITION_NOT_RECOVERED    0x00100000
+#define RTAS_LOG_OPTIONAL_PART_PRESENT          0x00040000
+#define RTAS_LOG_INITIATOR_MASK                 0x0000f000
+#define   RTAS_LOG_INITIATOR_UNKNOWN            0x00000000
+#define   RTAS_LOG_INITIATOR_CPU                0x00001000
+#define   RTAS_LOG_INITIATOR_PCI                0x00002000
+#define   RTAS_LOG_INITIATOR_MEMORY             0x00004000
+#define   RTAS_LOG_INITIATOR_HOTPLUG            0x00006000
+#define RTAS_LOG_TARGET_MASK                    0x00000f00
+#define   RTAS_LOG_TARGET_UNKNOWN               0x00000000
+#define   RTAS_LOG_TARGET_CPU                   0x00000100
+#define   RTAS_LOG_TARGET_PCI                   0x00000200
+#define   RTAS_LOG_TARGET_MEMORY                0x00000400
+#define   RTAS_LOG_TARGET_HOTPLUG               0x00000600
+#define RTAS_LOG_TYPE_MASK                      0x000000ff
+#define   RTAS_LOG_TYPE_OTHER                   0x00000000
+#define   RTAS_LOG_TYPE_RETRY                   0x00000001
+#define   RTAS_LOG_TYPE_TCE_ERR                 0x00000002
+#define   RTAS_LOG_TYPE_INTERN_DEV_FAIL         0x00000003
+#define   RTAS_LOG_TYPE_TIMEOUT                 0x00000004
+#define   RTAS_LOG_TYPE_DATA_PARITY             0x00000005
+#define   RTAS_LOG_TYPE_ADDR_PARITY             0x00000006
+#define   RTAS_LOG_TYPE_CACHE_PARITY            0x00000007
+#define   RTAS_LOG_TYPE_ADDR_INVALID            0x00000008
+#define   RTAS_LOG_TYPE_ECC_UNCORR              0x00000009
+#define   RTAS_LOG_TYPE_ECC_CORR                0x0000000a
+#define   RTAS_LOG_TYPE_EPOW                    0x00000040
+#define   RTAS_LOG_TYPE_HOTPLUG                 0x000000e5
+
+struct rtas_error_log {
+    uint32_t summary;
+    uint32_t extended_length;
+} QEMU_PACKED;
+
+struct rtas_event_log_v6 {
+    uint8_t b0;
+#define RTAS_LOG_V6_B0_VALID                          0x80
+#define RTAS_LOG_V6_B0_UNRECOVERABLE_ERROR            0x40
+#define RTAS_LOG_V6_B0_RECOVERABLE_ERROR              0x20
+#define RTAS_LOG_V6_B0_DEGRADED_OPERATION             0x10
+#define RTAS_LOG_V6_B0_PREDICTIVE_ERROR               0x08
+#define RTAS_LOG_V6_B0_NEW_LOG                        0x04
+#define RTAS_LOG_V6_B0_BIGENDIAN                      0x02
+    uint8_t _resv1;
+    uint8_t b2;
+#define RTAS_LOG_V6_B2_POWERPC_FORMAT                 0x80
+#define RTAS_LOG_V6_B2_LOG_FORMAT_MASK                0x0f
+#define   RTAS_LOG_V6_B2_LOG_FORMAT_PLATFORM_EVENT    0x0e
+    uint8_t _resv2[9];
+    uint32_t company;
+#define RTAS_LOG_V6_COMPANY_IBM                 0x49424d00 /* IBM<null> */
+} QEMU_PACKED;
+
+struct rtas_event_log_v6_section_header {
+    uint16_t section_id;
+    uint16_t section_length;
+    uint8_t section_version;
+    uint8_t section_subtype;
+    uint16_t creator_component_id;
+} QEMU_PACKED;
+
+struct rtas_event_log_v6_maina {
+#define RTAS_LOG_V6_SECTION_ID_MAINA                0x5048 /* PH */
+    struct rtas_event_log_v6_section_header hdr;
+    uint32_t creation_date; /* BCD: YYYYMMDD */
+    uint32_t creation_time; /* BCD: HHMMSS00 */
+    uint8_t _platform1[8];
+    char creator_id;
+    uint8_t _resv1[2];
+    uint8_t section_count;
+    uint8_t _resv2[4];
+    uint8_t _platform2[8];
+    uint32_t plid;
+    uint8_t _platform3[4];
+} QEMU_PACKED;
+
+struct rtas_event_log_v6_mainb {
+#define RTAS_LOG_V6_SECTION_ID_MAINB                0x5548 /* UH */
+    struct rtas_event_log_v6_section_header hdr;
+    uint8_t subsystem_id;
+    uint8_t _platform1;
+    uint8_t event_severity;
+    uint8_t event_subtype;
+    uint8_t _platform2[4];
+    uint8_t _resv1[2];
+    uint16_t action_flags;
+    uint8_t _resv2[4];
+} QEMU_PACKED;
+
+struct rtas_event_log_v6_epow {
+#define RTAS_LOG_V6_SECTION_ID_EPOW                 0x4550 /* EP */
+    struct rtas_event_log_v6_section_header hdr;
+    uint8_t sensor_value;
+#define RTAS_LOG_V6_EPOW_ACTION_RESET                    0
+#define RTAS_LOG_V6_EPOW_ACTION_WARN_COOLING             1
+#define RTAS_LOG_V6_EPOW_ACTION_WARN_POWER               2
+#define RTAS_LOG_V6_EPOW_ACTION_SYSTEM_SHUTDOWN          3
+#define RTAS_LOG_V6_EPOW_ACTION_SYSTEM_HALT              4
+#define RTAS_LOG_V6_EPOW_ACTION_MAIN_ENCLOSURE           5
+#define RTAS_LOG_V6_EPOW_ACTION_POWER_OFF                7
+    uint8_t event_modifier;
+#define RTAS_LOG_V6_EPOW_MODIFIER_NORMAL                 1
+#define RTAS_LOG_V6_EPOW_MODIFIER_ON_UPS                 2
+#define RTAS_LOG_V6_EPOW_MODIFIER_CRITICAL               3
+#define RTAS_LOG_V6_EPOW_MODIFIER_TEMPERATURE            4
+    uint8_t extended_modifier;
+#define RTAS_LOG_V6_EPOW_XMODIFIER_SYSTEM_WIDE           0
+#define RTAS_LOG_V6_EPOW_XMODIFIER_PARTITION_SPECIFIC    1
+    uint8_t _resv;
+    uint64_t reason_code;
+} QEMU_PACKED;
+
+struct epow_extended_log {
+    struct rtas_event_log_v6 v6hdr;
+    struct rtas_event_log_v6_maina maina;
+    struct rtas_event_log_v6_mainb mainb;
+    struct rtas_event_log_v6_epow epow;
+} QEMU_PACKED;
+
+union drc_identifier {
+    uint32_t index;
+    uint32_t count;
+    struct {
+        uint32_t count;
+        uint32_t index;
+    } count_indexed;
+    char name[1];
+} QEMU_PACKED;
+
+struct rtas_event_log_v6_hp {
+#define RTAS_LOG_V6_SECTION_ID_HOTPLUG              0x4850 /* HP */
+    struct rtas_event_log_v6_section_header hdr;
+    uint8_t hotplug_type;
+#define RTAS_LOG_V6_HP_TYPE_CPU                          1
+#define RTAS_LOG_V6_HP_TYPE_MEMORY                       2
+#define RTAS_LOG_V6_HP_TYPE_SLOT                         3
+#define RTAS_LOG_V6_HP_TYPE_PHB                          4
+#define RTAS_LOG_V6_HP_TYPE_PCI                          5
+#define RTAS_LOG_V6_HP_TYPE_PMEM                         6
+    uint8_t hotplug_action;
+#define RTAS_LOG_V6_HP_ACTION_ADD                        1
+#define RTAS_LOG_V6_HP_ACTION_REMOVE                     2
+    uint8_t hotplug_identifier;
+#define RTAS_LOG_V6_HP_ID_DRC_NAME                       1
+#define RTAS_LOG_V6_HP_ID_DRC_INDEX                      2
+#define RTAS_LOG_V6_HP_ID_DRC_COUNT                      3
+#define RTAS_LOG_V6_HP_ID_DRC_COUNT_INDEXED              4
+    uint8_t reserved;
+    union drc_identifier drc_id;
+} QEMU_PACKED;
+
+struct hp_extended_log {
+    struct rtas_event_log_v6 v6hdr;
+    struct rtas_event_log_v6_maina maina;
+    struct rtas_event_log_v6_mainb mainb;
+    struct rtas_event_log_v6_hp hp;
+} QEMU_PACKED;
+
+struct rtas_event_log_v6_mc {
+#define RTAS_LOG_V6_SECTION_ID_MC                   0x4D43 /* MC */
+    struct rtas_event_log_v6_section_header hdr;
+    uint32_t fru_id;
+    uint32_t proc_id;
+    uint8_t error_type;
+#define RTAS_LOG_V6_MC_TYPE_UE                           0
+#define RTAS_LOG_V6_MC_TYPE_SLB                          1
+#define RTAS_LOG_V6_MC_TYPE_ERAT                         2
+#define RTAS_LOG_V6_MC_TYPE_TLB                          4
+#define RTAS_LOG_V6_MC_TYPE_D_CACHE                      5
+#define RTAS_LOG_V6_MC_TYPE_I_CACHE                      7
+    uint8_t sub_err_type;
+#define RTAS_LOG_V6_MC_UE_INDETERMINATE                  0
+#define RTAS_LOG_V6_MC_UE_IFETCH                         1
+#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH         2
+#define RTAS_LOG_V6_MC_UE_LOAD_STORE                     3
+#define RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE     4
+#define RTAS_LOG_V6_MC_SLB_PARITY                        0
+#define RTAS_LOG_V6_MC_SLB_MULTIHIT                      1
+#define RTAS_LOG_V6_MC_SLB_INDETERMINATE                 2
+#define RTAS_LOG_V6_MC_ERAT_PARITY                       1
+#define RTAS_LOG_V6_MC_ERAT_MULTIHIT                     2
+#define RTAS_LOG_V6_MC_ERAT_INDETERMINATE                3
+#define RTAS_LOG_V6_MC_TLB_PARITY                        1
+#define RTAS_LOG_V6_MC_TLB_MULTIHIT                      2
+#define RTAS_LOG_V6_MC_TLB_INDETERMINATE                 3
+/*
+ * Per PAPR,
+ * For UE error type, set bit 1 of sub_err_type to indicate effective addr is
+ * provided. For other error types (SLB/ERAT/TLB), set bit 0 to indicate
+ * same.
+ */
+#define RTAS_LOG_V6_MC_UE_EA_ADDR_PROVIDED               0x40
+#define RTAS_LOG_V6_MC_EA_ADDR_PROVIDED                  0x80
+    uint8_t reserved_1[6];
+    uint64_t effective_address;
+    uint64_t logical_address;
+} QEMU_PACKED;
+
+struct mc_extended_log {
+    struct rtas_event_log_v6 v6hdr;
+    struct rtas_event_log_v6_mc mc;
+} QEMU_PACKED;
+
+struct MC_ierror_table {
+    unsigned long srr1_mask;
+    unsigned long srr1_value;
+    bool nip_valid; /* nip is a valid indicator of faulting address */
+    uint8_t error_type;
+    uint8_t error_subtype;
+    unsigned int initiator;
+    unsigned int severity;
+};
+
+static const struct MC_ierror_table mc_ierror_table[] = {
+{ 0x00000000081c0000, 0x0000000000040000, true,
+  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_IFETCH,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000000080000, true,
+  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x00000000000c0000, true,
+  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000000100000, true,
+  RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000000140000, true,
+  RTAS_LOG_V6_MC_TYPE_TLB, RTAS_LOG_V6_MC_TLB_MULTIHIT,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000000081c0000, 0x0000000000180000, true,
+  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_IFETCH,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, } };
+
+struct MC_derror_table {
+    unsigned long dsisr_value;
+    bool dar_valid; /* dar is a valid indicator of faulting address */
+    uint8_t error_type;
+    uint8_t error_subtype;
+    unsigned int initiator;
+    unsigned int severity;
+};
+
+static const struct MC_derror_table mc_derror_table[] = {
+{ 0x00008000, false,
+  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_LOAD_STORE,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00004000, true,
+  RTAS_LOG_V6_MC_TYPE_UE, RTAS_LOG_V6_MC_UE_PAGE_TABLE_WALK_LOAD_STORE,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000800, true,
+  RTAS_LOG_V6_MC_TYPE_ERAT, RTAS_LOG_V6_MC_ERAT_MULTIHIT,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000400, true,
+  RTAS_LOG_V6_MC_TYPE_TLB, RTAS_LOG_V6_MC_TLB_MULTIHIT,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000080, true,
+  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_MULTIHIT,  /* Before PARITY */
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, },
+{ 0x00000100, true,
+  RTAS_LOG_V6_MC_TYPE_SLB, RTAS_LOG_V6_MC_SLB_PARITY,
+  RTAS_LOG_INITIATOR_CPU, RTAS_LOG_SEVERITY_ERROR_SYNC, } };
+
+#define SRR1_MC_LOADSTORE(srr1) ((srr1) & PPC_BIT(42))
+
+typedef enum EventClass {
+    EVENT_CLASS_INTERNAL_ERRORS     = 0,
+    EVENT_CLASS_EPOW                = 1,
+    EVENT_CLASS_RESERVED            = 2,
+    EVENT_CLASS_HOT_PLUG            = 3,
+    EVENT_CLASS_IO                  = 4,
+    EVENT_CLASS_MAX
+} EventClassIndex;
+#define EVENT_CLASS_MASK(index) (1 << (31 - index))
+
+static const char * const event_names[EVENT_CLASS_MAX] = {
+    [EVENT_CLASS_INTERNAL_ERRORS]       = "internal-errors",
+    [EVENT_CLASS_EPOW]                  = "epow-events",
+    [EVENT_CLASS_HOT_PLUG]              = "hot-plug-events",
+    [EVENT_CLASS_IO]                    = "ibm,io-events",
+};
+
+struct SpaprEventSource {
+    int irq;
+    uint32_t mask;
+    bool enabled;
+};
+
+static SpaprEventSource *spapr_event_sources_new(void)
+{
+    return g_new0(SpaprEventSource, EVENT_CLASS_MAX);
+}
+
+static void spapr_event_sources_register(SpaprEventSource *event_sources,
+                                        EventClassIndex index, int irq)
+{
+    /* we only support 1 irq per event class at the moment */
+    g_assert(event_sources);
+    g_assert(!event_sources[index].enabled);
+    event_sources[index].irq = irq;
+    event_sources[index].mask = EVENT_CLASS_MASK(index);
+    event_sources[index].enabled = true;
+}
+
+static const SpaprEventSource *
+spapr_event_sources_get_source(SpaprEventSource *event_sources,
+                               EventClassIndex index)
+{
+    g_assert(index < EVENT_CLASS_MAX);
+    g_assert(event_sources);
+
+    return &event_sources[index];
+}
+
+void spapr_dt_events(SpaprMachineState *spapr, void *fdt)
+{
+    uint32_t irq_ranges[EVENT_CLASS_MAX * 2];
+    int i, count = 0, event_sources;
+    SpaprEventSource *events = spapr->event_sources;
+
+    g_assert(events);
+
+    _FDT(event_sources = fdt_add_subnode(fdt, 0, "event-sources"));
+
+    for (i = 0, count = 0; i < EVENT_CLASS_MAX; i++) {
+        int node_offset;
+        uint32_t interrupts[2];
+        const SpaprEventSource *source =
+            spapr_event_sources_get_source(events, i);
+        const char *source_name = event_names[i];
+
+        if (!source->enabled) {
+            continue;
+        }
+
+        spapr_dt_irq(interrupts, source->irq, false);
+
+        _FDT(node_offset = fdt_add_subnode(fdt, event_sources, source_name));
+        _FDT(fdt_setprop(fdt, node_offset, "interrupts", interrupts,
+                         sizeof(interrupts)));
+
+        irq_ranges[count++] = interrupts[0];
+        irq_ranges[count++] = cpu_to_be32(1);
+    }
+
+    _FDT((fdt_setprop(fdt, event_sources, "interrupt-controller", NULL, 0)));
+    _FDT((fdt_setprop_cell(fdt, event_sources, "#interrupt-cells", 2)));
+    _FDT((fdt_setprop(fdt, event_sources, "interrupt-ranges",
+                      irq_ranges, count * sizeof(uint32_t))));
+}
+
+static const SpaprEventSource *
+rtas_event_log_to_source(SpaprMachineState *spapr, int log_type)
+{
+    const SpaprEventSource *source;
+
+    g_assert(spapr->event_sources);
+
+    switch (log_type) {
+    case RTAS_LOG_TYPE_HOTPLUG:
+        source = spapr_event_sources_get_source(spapr->event_sources,
+                                                EVENT_CLASS_HOT_PLUG);
+        if (spapr_ovec_test(spapr->ov5_cas, OV5_HP_EVT)) {
+            g_assert(source->enabled);
+            break;
+        }
+        /* fall through back to epow for legacy hotplug interrupt source */
+    case RTAS_LOG_TYPE_EPOW:
+        source = spapr_event_sources_get_source(spapr->event_sources,
+                                                EVENT_CLASS_EPOW);
+        break;
+    default:
+        source = NULL;
+    }
+
+    return source;
+}
+
+static int rtas_event_log_to_irq(SpaprMachineState *spapr, int log_type)
+{
+    const SpaprEventSource *source;
+
+    source = rtas_event_log_to_source(spapr, log_type);
+    g_assert(source);
+    g_assert(source->enabled);
+
+    return source->irq;
+}
+
+static uint32_t spapr_event_log_entry_type(SpaprEventLogEntry *entry)
+{
+    return entry->summary & RTAS_LOG_TYPE_MASK;
+}
+
+static void rtas_event_log_queue(SpaprMachineState *spapr,
+                                 SpaprEventLogEntry *entry)
+{
+    QTAILQ_INSERT_TAIL(&spapr->pending_events, entry, next);
+}
+
+static SpaprEventLogEntry *rtas_event_log_dequeue(SpaprMachineState *spapr,
+                                                  uint32_t event_mask)
+{
+    SpaprEventLogEntry *entry = NULL;
+
+    QTAILQ_FOREACH(entry, &spapr->pending_events, next) {
+        const SpaprEventSource *source =
+            rtas_event_log_to_source(spapr,
+                                     spapr_event_log_entry_type(entry));
+
+        g_assert(source);
+        if (source->mask & event_mask) {
+            break;
+        }
+    }
+
+    if (entry) {
+        QTAILQ_REMOVE(&spapr->pending_events, entry, next);
+    }
+
+    return entry;
+}
+
+static bool rtas_event_log_contains(SpaprMachineState *spapr, uint32_t event_mask)
+{
+    SpaprEventLogEntry *entry = NULL;
+
+    QTAILQ_FOREACH(entry, &spapr->pending_events, next) {
+        const SpaprEventSource *source =
+            rtas_event_log_to_source(spapr,
+                                     spapr_event_log_entry_type(entry));
+
+        if (source->mask & event_mask) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+static uint32_t next_plid;
+
+static void spapr_init_v6hdr(struct rtas_event_log_v6 *v6hdr)
+{
+    v6hdr->b0 = RTAS_LOG_V6_B0_VALID | RTAS_LOG_V6_B0_NEW_LOG
+        | RTAS_LOG_V6_B0_BIGENDIAN;
+    v6hdr->b2 = RTAS_LOG_V6_B2_POWERPC_FORMAT
+        | RTAS_LOG_V6_B2_LOG_FORMAT_PLATFORM_EVENT;
+    v6hdr->company = cpu_to_be32(RTAS_LOG_V6_COMPANY_IBM);
+}
+
+static void spapr_init_maina(SpaprMachineState *spapr,
+                             struct rtas_event_log_v6_maina *maina,
+                             int section_count)
+{
+    struct tm tm;
+    int year;
+
+    maina->hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_MAINA);
+    maina->hdr.section_length = cpu_to_be16(sizeof(*maina));
+    /* FIXME: section version, subtype and creator id? */
+    spapr_rtc_read(&spapr->rtc, &tm, NULL);
+    year = tm.tm_year + 1900;
+    maina->creation_date = cpu_to_be32((to_bcd(year / 100) << 24)
+                                       | (to_bcd(year % 100) << 16)
+                                       | (to_bcd(tm.tm_mon + 1) << 8)
+                                       | to_bcd(tm.tm_mday));
+    maina->creation_time = cpu_to_be32((to_bcd(tm.tm_hour) << 24)
+                                       | (to_bcd(tm.tm_min) << 16)
+                                       | (to_bcd(tm.tm_sec) << 8));
+    maina->creator_id = 'H'; /* Hypervisor */
+    maina->section_count = section_count;
+    maina->plid = next_plid++;
+}
+
+static void spapr_powerdown_req(Notifier *n, void *opaque)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+    SpaprEventLogEntry *entry;
+    struct rtas_event_log_v6 *v6hdr;
+    struct rtas_event_log_v6_maina *maina;
+    struct rtas_event_log_v6_mainb *mainb;
+    struct rtas_event_log_v6_epow *epow;
+    struct epow_extended_log *new_epow;
+
+    entry = g_new(SpaprEventLogEntry, 1);
+    new_epow = g_malloc0(sizeof(*new_epow));
+    entry->extended_log = new_epow;
+
+    v6hdr = &new_epow->v6hdr;
+    maina = &new_epow->maina;
+    mainb = &new_epow->mainb;
+    epow = &new_epow->epow;
+
+    entry->summary = RTAS_LOG_VERSION_6
+                       | RTAS_LOG_SEVERITY_EVENT
+                       | RTAS_LOG_DISPOSITION_NOT_RECOVERED
+                       | RTAS_LOG_OPTIONAL_PART_PRESENT
+                       | RTAS_LOG_TYPE_EPOW;
+    entry->extended_length = sizeof(*new_epow);
+
+    spapr_init_v6hdr(v6hdr);
+    spapr_init_maina(spapr, maina, 3 /* Main-A, Main-B and EPOW */);
+
+    mainb->hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_MAINB);
+    mainb->hdr.section_length = cpu_to_be16(sizeof(*mainb));
+    /* FIXME: section version, subtype and creator id? */
+    mainb->subsystem_id = 0xa0; /* External environment */
+    mainb->event_severity = 0x00; /* Informational / non-error */
+    mainb->event_subtype = 0xd0; /* Normal shutdown */
+
+    epow->hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_EPOW);
+    epow->hdr.section_length = cpu_to_be16(sizeof(*epow));
+    epow->hdr.section_version = 2; /* includes extended modifier */
+    /* FIXME: section subtype and creator id? */
+    epow->sensor_value = RTAS_LOG_V6_EPOW_ACTION_SYSTEM_SHUTDOWN;
+    epow->event_modifier = RTAS_LOG_V6_EPOW_MODIFIER_NORMAL;
+    epow->extended_modifier = RTAS_LOG_V6_EPOW_XMODIFIER_PARTITION_SPECIFIC;
+
+    rtas_event_log_queue(spapr, entry);
+
+    qemu_irq_pulse(spapr_qirq(spapr,
+                   rtas_event_log_to_irq(spapr, RTAS_LOG_TYPE_EPOW)));
+}
+
+static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action,
+                                    SpaprDrcType drc_type,
+                                    union drc_identifier *drc_id)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+    SpaprEventLogEntry *entry;
+    struct hp_extended_log *new_hp;
+    struct rtas_event_log_v6 *v6hdr;
+    struct rtas_event_log_v6_maina *maina;
+    struct rtas_event_log_v6_mainb *mainb;
+    struct rtas_event_log_v6_hp *hp;
+
+    entry = g_new(SpaprEventLogEntry, 1);
+    new_hp = g_malloc0(sizeof(struct hp_extended_log));
+    entry->extended_log = new_hp;
+
+    v6hdr = &new_hp->v6hdr;
+    maina = &new_hp->maina;
+    mainb = &new_hp->mainb;
+    hp = &new_hp->hp;
+
+    entry->summary = RTAS_LOG_VERSION_6
+        | RTAS_LOG_SEVERITY_EVENT
+        | RTAS_LOG_DISPOSITION_NOT_RECOVERED
+        | RTAS_LOG_OPTIONAL_PART_PRESENT
+        | RTAS_LOG_INITIATOR_HOTPLUG
+        | RTAS_LOG_TYPE_HOTPLUG;
+    entry->extended_length = sizeof(*new_hp);
+
+    spapr_init_v6hdr(v6hdr);
+    spapr_init_maina(spapr, maina, 3 /* Main-A, Main-B, HP */);
+
+    mainb->hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_MAINB);
+    mainb->hdr.section_length = cpu_to_be16(sizeof(*mainb));
+    mainb->subsystem_id = 0x80; /* External environment */
+    mainb->event_severity = 0x00; /* Informational / non-error */
+    mainb->event_subtype = 0x00; /* Normal shutdown */
+
+    hp->hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_HOTPLUG);
+    hp->hdr.section_length = cpu_to_be16(sizeof(*hp));
+    hp->hdr.section_version = 1; /* includes extended modifier */
+    hp->hotplug_action = hp_action;
+    hp->hotplug_identifier = hp_id;
+
+    switch (drc_type) {
+    case SPAPR_DR_CONNECTOR_TYPE_PCI:
+        hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_PCI;
+        break;
+    case SPAPR_DR_CONNECTOR_TYPE_LMB:
+        hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_MEMORY;
+        break;
+    case SPAPR_DR_CONNECTOR_TYPE_CPU:
+        hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_CPU;
+        break;
+    case SPAPR_DR_CONNECTOR_TYPE_PHB:
+        hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_PHB;
+        break;
+    case SPAPR_DR_CONNECTOR_TYPE_PMEM:
+        hp->hotplug_type = RTAS_LOG_V6_HP_TYPE_PMEM;
+        break;
+    default:
+        /* we shouldn't be signaling hotplug events for resources
+         * that don't support them
+         */
+        g_assert(false);
+        return;
+    }
+
+    if (hp_id == RTAS_LOG_V6_HP_ID_DRC_COUNT) {
+        hp->drc_id.count = cpu_to_be32(drc_id->count);
+    } else if (hp_id == RTAS_LOG_V6_HP_ID_DRC_INDEX) {
+        hp->drc_id.index = cpu_to_be32(drc_id->index);
+    } else if (hp_id == RTAS_LOG_V6_HP_ID_DRC_COUNT_INDEXED) {
+        /* we should not be using count_indexed value unless the guest
+         * supports dedicated hotplug event source
+         */
+        g_assert(spapr_memory_hot_unplug_supported(spapr));
+        hp->drc_id.count_indexed.count =
+            cpu_to_be32(drc_id->count_indexed.count);
+        hp->drc_id.count_indexed.index =
+            cpu_to_be32(drc_id->count_indexed.index);
+    }
+
+    rtas_event_log_queue(spapr, entry);
+
+    qemu_irq_pulse(spapr_qirq(spapr,
+                   rtas_event_log_to_irq(spapr, RTAS_LOG_TYPE_HOTPLUG)));
+}
+
+void spapr_hotplug_req_add_by_index(SpaprDrc *drc)
+{
+    SpaprDrcType drc_type = spapr_drc_type(drc);
+    union drc_identifier drc_id;
+
+    drc_id.index = spapr_drc_index(drc);
+    spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_INDEX,
+                            RTAS_LOG_V6_HP_ACTION_ADD, drc_type, &drc_id);
+}
+
+void spapr_hotplug_req_remove_by_index(SpaprDrc *drc)
+{
+    SpaprDrcType drc_type = spapr_drc_type(drc);
+    union drc_identifier drc_id;
+
+    drc_id.index = spapr_drc_index(drc);
+    spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_INDEX,
+                            RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id);
+}
+
+void spapr_hotplug_req_add_by_count(SpaprDrcType drc_type,
+                                       uint32_t count)
+{
+    union drc_identifier drc_id;
+
+    drc_id.count = count;
+    spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_COUNT,
+                            RTAS_LOG_V6_HP_ACTION_ADD, drc_type, &drc_id);
+}
+
+void spapr_hotplug_req_remove_by_count(SpaprDrcType drc_type,
+                                          uint32_t count)
+{
+    union drc_identifier drc_id;
+
+    drc_id.count = count;
+    spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_COUNT,
+                            RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id);
+}
+
+void spapr_hotplug_req_add_by_count_indexed(SpaprDrcType drc_type,
+                                            uint32_t count, uint32_t index)
+{
+    union drc_identifier drc_id;
+
+    drc_id.count_indexed.count = count;
+    drc_id.count_indexed.index = index;
+    spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_COUNT_INDEXED,
+                            RTAS_LOG_V6_HP_ACTION_ADD, drc_type, &drc_id);
+}
+
+void spapr_hotplug_req_remove_by_count_indexed(SpaprDrcType drc_type,
+                                               uint32_t count, uint32_t index)
+{
+    union drc_identifier drc_id;
+
+    drc_id.count_indexed.count = count;
+    drc_id.count_indexed.index = index;
+    spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_COUNT_INDEXED,
+                            RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id);
+}
+
+static void spapr_mc_set_ea_provided_flag(struct mc_extended_log *ext_elog)
+{
+    switch (ext_elog->mc.error_type) {
+    case RTAS_LOG_V6_MC_TYPE_UE:
+        ext_elog->mc.sub_err_type |= RTAS_LOG_V6_MC_UE_EA_ADDR_PROVIDED;
+        break;
+    case RTAS_LOG_V6_MC_TYPE_SLB:
+    case RTAS_LOG_V6_MC_TYPE_ERAT:
+    case RTAS_LOG_V6_MC_TYPE_TLB:
+        ext_elog->mc.sub_err_type |= RTAS_LOG_V6_MC_EA_ADDR_PROVIDED;
+        break;
+    default:
+        break;
+    }
+}
+
+static uint32_t spapr_mce_get_elog_type(PowerPCCPU *cpu, bool recovered,
+                                        struct mc_extended_log *ext_elog)
+{
+    int i;
+    CPUPPCState *env = &cpu->env;
+    uint32_t summary;
+    uint64_t dsisr = env->spr[SPR_DSISR];
+
+    summary = RTAS_LOG_VERSION_6 | RTAS_LOG_OPTIONAL_PART_PRESENT;
+    if (recovered) {
+        summary |= RTAS_LOG_DISPOSITION_FULLY_RECOVERED;
+    } else {
+        summary |= RTAS_LOG_DISPOSITION_NOT_RECOVERED;
+    }
+
+    if (SRR1_MC_LOADSTORE(env->spr[SPR_SRR1])) {
+        for (i = 0; i < ARRAY_SIZE(mc_derror_table); i++) {
+            if (!(dsisr & mc_derror_table[i].dsisr_value)) {
+                continue;
+            }
+
+            ext_elog->mc.error_type = mc_derror_table[i].error_type;
+            ext_elog->mc.sub_err_type = mc_derror_table[i].error_subtype;
+            if (mc_derror_table[i].dar_valid) {
+                ext_elog->mc.effective_address = cpu_to_be64(env->spr[SPR_DAR]);
+                spapr_mc_set_ea_provided_flag(ext_elog);
+            }
+
+            summary |= mc_derror_table[i].initiator
+                        | mc_derror_table[i].severity;
+
+            return summary;
+        }
+    } else {
+        for (i = 0; i < ARRAY_SIZE(mc_ierror_table); i++) {
+            if ((env->spr[SPR_SRR1] & mc_ierror_table[i].srr1_mask) !=
+                    mc_ierror_table[i].srr1_value) {
+                continue;
+            }
+
+            ext_elog->mc.error_type = mc_ierror_table[i].error_type;
+            ext_elog->mc.sub_err_type = mc_ierror_table[i].error_subtype;
+            if (mc_ierror_table[i].nip_valid) {
+                ext_elog->mc.effective_address = cpu_to_be64(env->nip);
+                spapr_mc_set_ea_provided_flag(ext_elog);
+            }
+
+            summary |= mc_ierror_table[i].initiator
+                        | mc_ierror_table[i].severity;
+
+            return summary;
+        }
+    }
+
+    summary |= RTAS_LOG_INITIATOR_CPU;
+    return summary;
+}
+
+static void spapr_mce_dispatch_elog(SpaprMachineState *spapr, PowerPCCPU *cpu,
+                                    bool recovered)
+{
+    CPUState *cs = CPU(cpu);
+    CPUPPCState *env = &cpu->env;
+    uint64_t rtas_addr;
+    struct rtas_error_log log;
+    struct mc_extended_log *ext_elog;
+    uint32_t summary;
+
+    ext_elog = g_malloc0(sizeof(*ext_elog));
+    summary = spapr_mce_get_elog_type(cpu, recovered, ext_elog);
+
+    log.summary = cpu_to_be32(summary);
+    log.extended_length = cpu_to_be32(sizeof(*ext_elog));
+
+    spapr_init_v6hdr(&ext_elog->v6hdr);
+    ext_elog->mc.hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_MC);
+    ext_elog->mc.hdr.section_length =
+                    cpu_to_be16(sizeof(struct rtas_event_log_v6_mc));
+    ext_elog->mc.hdr.section_version = 1;
+
+    /* get rtas addr from fdt */
+    rtas_addr = spapr_get_rtas_addr();
+    if (!rtas_addr) {
+        if (!recovered) {
+            error_report(
+"FWNMI: Unable to deliver machine check to guest: rtas_addr not found.");
+            qemu_system_guest_panicked(NULL);
+        } else {
+            warn_report(
+"FWNMI: Unable to deliver machine check to guest: rtas_addr not found. "
+"Machine check recovered.");
+        }
+        g_free(ext_elog);
+        return;
+    }
+
+    /*
+     * By taking the interlock, we assume that the MCE will be
+     * delivered to the guest. CAUTION: don't add anything that could
+     * prevent the MCE to be delivered after this line, otherwise the
+     * guest won't be able to release the interlock and ultimately
+     * hang/crash?
+     */
+    spapr->fwnmi_machine_check_interlock = cpu->vcpu_id;
+
+    stq_be_phys(&address_space_memory, rtas_addr + RTAS_ERROR_LOG_OFFSET,
+                env->gpr[3]);
+    cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET +
+                              sizeof(env->gpr[3]), &log, sizeof(log));
+    cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET +
+                              sizeof(env->gpr[3]) + sizeof(log), ext_elog,
+                              sizeof(*ext_elog));
+    g_free(ext_elog);
+
+    env->gpr[3] = rtas_addr + RTAS_ERROR_LOG_OFFSET;
+
+    ppc_cpu_do_fwnmi_machine_check(cs, spapr->fwnmi_machine_check_addr);
+}
+
+void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+    CPUState *cs = CPU(cpu);
+    int ret;
+
+    if (spapr->fwnmi_machine_check_addr == -1) {
+        /* Non-FWNMI case, deliver it like an architected CPU interrupt. */
+        cs->exception_index = POWERPC_EXCP_MCHECK;
+        ppc_cpu_do_interrupt(cs);
+        return;
+    }
+
+    /* Wait for FWNMI interlock. */
+    while (spapr->fwnmi_machine_check_interlock != -1) {
+        /*
+         * Check whether the same CPU got machine check error
+         * while still handling the mc error (i.e., before
+         * that CPU called "ibm,nmi-interlock")
+         */
+        if (spapr->fwnmi_machine_check_interlock == cpu->vcpu_id) {
+            if (!recovered) {
+                error_report(
+"FWNMI: Unable to deliver machine check to guest: nested machine check.");
+                qemu_system_guest_panicked(NULL);
+            } else {
+                warn_report(
+"FWNMI: Unable to deliver machine check to guest: nested machine check. "
+"Machine check recovered.");
+            }
+            return;
+        }
+        qemu_cond_wait_iothread(&spapr->fwnmi_machine_check_interlock_cond);
+        if (spapr->fwnmi_machine_check_addr == -1) {
+            /*
+             * If the machine was reset while waiting for the interlock,
+             * abort the delivery. The machine check applies to a context
+             * that no longer exists, so it wouldn't make sense to deliver
+             * it now.
+             */
+            return;
+        }
+    }
+
+    /*
+     * Try to block migration while FWNMI is being handled, so the
+     * machine check handler runs where the information passed to it
+     * actually makes sense.  This shouldn't actually block migration,
+     * only delay it slightly, assuming migration is retried.  If the
+     * attempt to block fails, carry on.  Unfortunately, it always
+     * fails when running with -only-migrate.  A proper interface to
+     * delay migration completion for a bit could avoid that.
+     */
+    ret = migrate_add_blocker(spapr->fwnmi_migration_blocker, NULL);
+    if (ret == -EBUSY) {
+        warn_report("Received a fwnmi while migration was in progress");
+    }
+
+    spapr_mce_dispatch_elog(spapr, cpu, recovered);
+}
+
+static void check_exception(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                            uint32_t token, uint32_t nargs,
+                            target_ulong args,
+                            uint32_t nret, target_ulong rets)
+{
+    uint32_t mask, buf, len, event_len;
+    SpaprEventLogEntry *event;
+    struct rtas_error_log header;
+    int i;
+
+    if ((nargs < 6) || (nargs > 7) || nret != 1) {
+        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+        return;
+    }
+
+    mask = rtas_ld(args, 2);
+    buf = rtas_ld(args, 4);
+    len = rtas_ld(args, 5);
+
+    event = rtas_event_log_dequeue(spapr, mask);
+    if (!event) {
+        goto out_no_events;
+    }
+
+    event_len = event->extended_length + sizeof(header);
+
+    if (event_len < len) {
+        len = event_len;
+    }
+
+    header.summary = cpu_to_be32(event->summary);
+    header.extended_length = cpu_to_be32(event->extended_length);
+    cpu_physical_memory_write(buf, &header, sizeof(header));
+    cpu_physical_memory_write(buf + sizeof(header), event->extended_log,
+                              event->extended_length);
+    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+    g_free(event->extended_log);
+    g_free(event);
+
+    /* according to PAPR+, the IRQ must be left asserted, or re-asserted, if
+     * there are still pending events to be fetched via check-exception. We
+     * do the latter here, since our code relies on edge-triggered
+     * interrupts.
+     */
+    for (i = 0; i < EVENT_CLASS_MAX; i++) {
+        if (rtas_event_log_contains(spapr, EVENT_CLASS_MASK(i))) {
+            const SpaprEventSource *source =
+                spapr_event_sources_get_source(spapr->event_sources, i);
+
+            g_assert(source->enabled);
+            qemu_irq_pulse(spapr_qirq(spapr, source->irq));
+        }
+    }
+
+    return;
+
+out_no_events:
+    rtas_st(rets, 0, RTAS_OUT_NO_ERRORS_FOUND);
+}
+
+static void event_scan(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                       uint32_t token, uint32_t nargs,
+                       target_ulong args,
+                       uint32_t nret, target_ulong rets)
+{
+    int i;
+    if (nargs != 4 || nret != 1) {
+        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+        return;
+    }
+
+    for (i = 0; i < EVENT_CLASS_MAX; i++) {
+        if (rtas_event_log_contains(spapr, EVENT_CLASS_MASK(i))) {
+            const SpaprEventSource *source =
+                spapr_event_sources_get_source(spapr->event_sources, i);
+
+            g_assert(source->enabled);
+            qemu_irq_pulse(spapr_qirq(spapr, source->irq));
+        }
+    }
+
+    rtas_st(rets, 0, RTAS_OUT_NO_ERRORS_FOUND);
+}
+
+void spapr_clear_pending_events(SpaprMachineState *spapr)
+{
+    SpaprEventLogEntry *entry = NULL, *next_entry;
+
+    QTAILQ_FOREACH_SAFE(entry, &spapr->pending_events, next, next_entry) {
+        QTAILQ_REMOVE(&spapr->pending_events, entry, next);
+        g_free(entry->extended_log);
+        g_free(entry);
+    }
+}
+
+void spapr_clear_pending_hotplug_events(SpaprMachineState *spapr)
+{
+    SpaprEventLogEntry *entry = NULL, *next_entry;
+
+    QTAILQ_FOREACH_SAFE(entry, &spapr->pending_events, next, next_entry) {
+        if (spapr_event_log_entry_type(entry) == RTAS_LOG_TYPE_HOTPLUG) {
+            QTAILQ_REMOVE(&spapr->pending_events, entry, next);
+            g_free(entry->extended_log);
+            g_free(entry);
+        }
+    }
+}
+
+void spapr_events_init(SpaprMachineState *spapr)
+{
+    int epow_irq = SPAPR_IRQ_EPOW;
+
+    if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
+        epow_irq = spapr_irq_findone(spapr, &error_fatal);
+    }
+
+    spapr_irq_claim(spapr, epow_irq, false, &error_fatal);
+
+    QTAILQ_INIT(&spapr->pending_events);
+
+    spapr->event_sources = spapr_event_sources_new();
+
+    spapr_event_sources_register(spapr->event_sources, EVENT_CLASS_EPOW,
+                                 epow_irq);
+
+    /* NOTE: if machine supports modern/dedicated hotplug event source,
+     * we add it to the device-tree unconditionally. This means we may
+     * have cases where the source is enabled in QEMU, but unused by the
+     * guest because it does not support modern hotplug events, so we
+     * take care to rely on checking for negotiation of OV5_HP_EVT option
+     * before attempting to use it to signal events, rather than simply
+     * checking that it's enabled.
+     */
+    if (spapr->use_hotplug_event_source) {
+        int hp_irq = SPAPR_IRQ_HOTPLUG;
+
+        if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
+            hp_irq = spapr_irq_findone(spapr, &error_fatal);
+        }
+
+        spapr_irq_claim(spapr, hp_irq, false, &error_fatal);
+
+        spapr_event_sources_register(spapr->event_sources, EVENT_CLASS_HOT_PLUG,
+                                     hp_irq);
+    }
+
+    spapr->epow_notifier.notify = spapr_powerdown_req;
+    qemu_register_powerdown_notifier(&spapr->epow_notifier);
+    spapr_rtas_register(RTAS_CHECK_EXCEPTION, "check-exception",
+                        check_exception);
+    spapr_rtas_register(RTAS_EVENT_SCAN, "event-scan", event_scan);
+}
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
new file mode 100644
index 000000000..222c1b6bb
--- /dev/null
+++ b/hw/ppc/spapr_hcall.c
@@ -0,0 +1,1557 @@
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
+#include "qapi/error.h"
+#include "sysemu/hw_accel.h"
+#include "sysemu/runstate.h"
+#include "qemu/log.h"
+#include "qemu/main-loop.h"
+#include "qemu/module.h"
+#include "qemu/error-report.h"
+#include "exec/exec-all.h"
+#include "helper_regs.h"
+#include "hw/ppc/spapr.h"
+#include "hw/ppc/spapr_cpu_core.h"
+#include "mmu-hash64.h"
+#include "cpu-models.h"
+#include "trace.h"
+#include "kvm_ppc.h"
+#include "hw/ppc/fdt.h"
+#include "hw/ppc/spapr_ovec.h"
+#include "hw/ppc/spapr_numa.h"
+#include "mmu-book3s-v3.h"
+#include "hw/mem/memory-device.h"
+
+bool is_ram_address(SpaprMachineState *spapr, hwaddr addr)
+{
+    MachineState *machine = MACHINE(spapr);
+    DeviceMemoryState *dms = machine->device_memory;
+
+    if (addr < machine->ram_size) {
+        return true;
+    }
+    if ((addr >= dms->base)
+        && ((addr - dms->base) < memory_region_size(&dms->mr))) {
+        return true;
+    }
+
+    return false;
+}
+
+/* Convert a return code from the KVM ioctl()s implementing resize HPT
+ * into a PAPR hypercall return code */
+static target_ulong resize_hpt_convert_rc(int ret)
+{
+    if (ret >= 100000) {
+        return H_LONG_BUSY_ORDER_100_SEC;
+    } else if (ret >= 10000) {
+        return H_LONG_BUSY_ORDER_10_SEC;
+    } else if (ret >= 1000) {
+        return H_LONG_BUSY_ORDER_1_SEC;
+    } else if (ret >= 100) {
+        return H_LONG_BUSY_ORDER_100_MSEC;
+    } else if (ret >= 10) {
+        return H_LONG_BUSY_ORDER_10_MSEC;
+    } else if (ret > 0) {
+        return H_LONG_BUSY_ORDER_1_MSEC;
+    }
+
+    switch (ret) {
+    case 0:
+        return H_SUCCESS;
+    case -EPERM:
+        return H_AUTHORITY;
+    case -EINVAL:
+        return H_PARAMETER;
+    case -ENXIO:
+        return H_CLOSED;
+    case -ENOSPC:
+        return H_PTEG_FULL;
+    case -EBUSY:
+        return H_BUSY;
+    case -ENOMEM:
+        return H_NO_MEM;
+    default:
+        return H_HARDWARE;
+    }
+}
+
+static target_ulong h_resize_hpt_prepare(PowerPCCPU *cpu,
+                                         SpaprMachineState *spapr,
+                                         target_ulong opcode,
+                                         target_ulong *args)
+{
+    target_ulong flags = args[0];
+    int shift = args[1];
+    uint64_t current_ram_size;
+    int rc;
+
+    if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DISABLED) {
+        return H_AUTHORITY;
+    }
+
+    if (!spapr->htab_shift) {
+        /* Radix guest, no HPT */
+        return H_NOT_AVAILABLE;
+    }
+
+    trace_spapr_h_resize_hpt_prepare(flags, shift);
+
+    if (flags != 0) {
+        return H_PARAMETER;
+    }
+
+    if (shift && ((shift < 18) || (shift > 46))) {
+        return H_PARAMETER;
+    }
+
+    current_ram_size = MACHINE(spapr)->ram_size + get_plugged_memory_size();
+
+    /* We only allow the guest to allocate an HPT one order above what
+     * we'd normally give them (to stop a small guest claiming a huge
+     * chunk of resources in the HPT */
+    if (shift > (spapr_hpt_shift_for_ramsize(current_ram_size) + 1)) {
+        return H_RESOURCE;
+    }
+
+    rc = kvmppc_resize_hpt_prepare(cpu, flags, shift);
+    if (rc != -ENOSYS) {
+        return resize_hpt_convert_rc(rc);
+    }
+
+    if (kvm_enabled()) {
+        return H_HARDWARE;
+    }
+
+    return softmmu_resize_hpt_prepare(cpu, spapr, shift);
+}
+
+static void do_push_sregs_to_kvm_pr(CPUState *cs, run_on_cpu_data data)
+{
+    int ret;
+
+    cpu_synchronize_state(cs);
+
+    ret = kvmppc_put_books_sregs(POWERPC_CPU(cs));
+    if (ret < 0) {
+        error_report("failed to push sregs to KVM: %s", strerror(-ret));
+        exit(1);
+    }
+}
+
+void push_sregs_to_kvm_pr(SpaprMachineState *spapr)
+{
+    CPUState *cs;
+
+    /*
+     * This is a hack for the benefit of KVM PR - it abuses the SDR1
+     * slot in kvm_sregs to communicate the userspace address of the
+     * HPT
+     */
+    if (!kvm_enabled() || !spapr->htab) {
+        return;
+    }
+
+    CPU_FOREACH(cs) {
+        run_on_cpu(cs, do_push_sregs_to_kvm_pr, RUN_ON_CPU_NULL);
+    }
+}
+
+static target_ulong h_resize_hpt_commit(PowerPCCPU *cpu,
+                                        SpaprMachineState *spapr,
+                                        target_ulong opcode,
+                                        target_ulong *args)
+{
+    target_ulong flags = args[0];
+    target_ulong shift = args[1];
+    int rc;
+
+    if (spapr->resize_hpt == SPAPR_RESIZE_HPT_DISABLED) {
+        return H_AUTHORITY;
+    }
+
+    if (!spapr->htab_shift) {
+        /* Radix guest, no HPT */
+        return H_NOT_AVAILABLE;
+    }
+
+    trace_spapr_h_resize_hpt_commit(flags, shift);
+
+    rc = kvmppc_resize_hpt_commit(cpu, flags, shift);
+    if (rc != -ENOSYS) {
+        rc = resize_hpt_convert_rc(rc);
+        if (rc == H_SUCCESS) {
+            /* Need to set the new htab_shift in the machine state */
+            spapr->htab_shift = shift;
+        }
+        return rc;
+    }
+
+    if (kvm_enabled()) {
+        return H_HARDWARE;
+    }
+
+    return softmmu_resize_hpt_commit(cpu, spapr, flags, shift);
+}
+
+
+
+static target_ulong h_set_sprg0(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                                target_ulong opcode, target_ulong *args)
+{
+    cpu_synchronize_state(CPU(cpu));
+    cpu->env.spr[SPR_SPRG0] = args[0];
+
+    return H_SUCCESS;
+}
+
+static target_ulong h_set_dabr(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                               target_ulong opcode, target_ulong *args)
+{
+    if (!ppc_has_spr(cpu, SPR_DABR)) {
+        return H_HARDWARE;              /* DABR register not available */
+    }
+    cpu_synchronize_state(CPU(cpu));
+
+    if (ppc_has_spr(cpu, SPR_DABRX)) {
+        cpu->env.spr[SPR_DABRX] = 0x3;  /* Use Problem and Privileged state */
+    } else if (!(args[0] & 0x4)) {      /* Breakpoint Translation set? */
+        return H_RESERVED_DABR;
+    }
+
+    cpu->env.spr[SPR_DABR] = args[0];
+    return H_SUCCESS;
+}
+
+static target_ulong h_set_xdabr(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                                target_ulong opcode, target_ulong *args)
+{
+    target_ulong dabrx = args[1];
+
+    if (!ppc_has_spr(cpu, SPR_DABR) || !ppc_has_spr(cpu, SPR_DABRX)) {
+        return H_HARDWARE;
+    }
+
+    if ((dabrx & ~0xfULL) != 0 || (dabrx & H_DABRX_HYPERVISOR) != 0
+        || (dabrx & (H_DABRX_KERNEL | H_DABRX_USER)) == 0) {
+        return H_PARAMETER;
+    }
+
+    cpu_synchronize_state(CPU(cpu));
+    cpu->env.spr[SPR_DABRX] = dabrx;
+    cpu->env.spr[SPR_DABR] = args[0];
+
+    return H_SUCCESS;
+}
+
+static target_ulong h_page_init(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                                target_ulong opcode, target_ulong *args)
+{
+    target_ulong flags = args[0];
+    hwaddr dst = args[1];
+    hwaddr src = args[2];
+    hwaddr len = TARGET_PAGE_SIZE;
+    uint8_t *pdst, *psrc;
+    target_long ret = H_SUCCESS;
+
+    if (flags & ~(H_ICACHE_SYNCHRONIZE | H_ICACHE_INVALIDATE
+                  | H_COPY_PAGE | H_ZERO_PAGE)) {
+        qemu_log_mask(LOG_UNIMP, "h_page_init: Bad flags (" TARGET_FMT_lx "\n",
+                      flags);
+        return H_PARAMETER;
+    }
+
+    /* Map-in destination */
+    if (!is_ram_address(spapr, dst) || (dst & ~TARGET_PAGE_MASK) != 0) {
+        return H_PARAMETER;
+    }
+    pdst = cpu_physical_memory_map(dst, &len, true);
+    if (!pdst || len != TARGET_PAGE_SIZE) {
+        return H_PARAMETER;
+    }
+
+    if (flags & H_COPY_PAGE) {
+        /* Map-in source, copy to destination, and unmap source again */
+        if (!is_ram_address(spapr, src) || (src & ~TARGET_PAGE_MASK) != 0) {
+            ret = H_PARAMETER;
+            goto unmap_out;
+        }
+        psrc = cpu_physical_memory_map(src, &len, false);
+        if (!psrc || len != TARGET_PAGE_SIZE) {
+            ret = H_PARAMETER;
+            goto unmap_out;
+        }
+        memcpy(pdst, psrc, len);
+        cpu_physical_memory_unmap(psrc, len, 0, len);
+    } else if (flags & H_ZERO_PAGE) {
+        memset(pdst, 0, len);          /* Just clear the destination page */
+    }
+
+    if (kvm_enabled() && (flags & H_ICACHE_SYNCHRONIZE) != 0) {
+        kvmppc_dcbst_range(cpu, pdst, len);
+    }
+    if (flags & (H_ICACHE_SYNCHRONIZE | H_ICACHE_INVALIDATE)) {
+        if (kvm_enabled()) {
+            kvmppc_icbi_range(cpu, pdst, len);
+        } else {
+            tb_flush(CPU(cpu));
+        }
+    }
+
+unmap_out:
+    cpu_physical_memory_unmap(pdst, TARGET_PAGE_SIZE, 1, len);
+    return ret;
+}
+
+#define FLAGS_REGISTER_VPA         0x0000200000000000ULL
+#define FLAGS_REGISTER_DTL         0x0000400000000000ULL
+#define FLAGS_REGISTER_SLBSHADOW   0x0000600000000000ULL
+#define FLAGS_DEREGISTER_VPA       0x0000a00000000000ULL
+#define FLAGS_DEREGISTER_DTL       0x0000c00000000000ULL
+#define FLAGS_DEREGISTER_SLBSHADOW 0x0000e00000000000ULL
+
+static target_ulong register_vpa(PowerPCCPU *cpu, target_ulong vpa)
+{
+    CPUState *cs = CPU(cpu);
+    CPUPPCState *env = &cpu->env;
+    SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
+    uint16_t size;
+    uint8_t tmp;
+
+    if (vpa == 0) {
+        hcall_dprintf("Can't cope with registering a VPA at logical 0\n");
+        return H_HARDWARE;
+    }
+
+    if (vpa % env->dcache_line_size) {
+        return H_PARAMETER;
+    }
+    /* FIXME: bounds check the address */
+
+    size = lduw_be_phys(cs->as, vpa + 0x4);
+
+    if (size < VPA_MIN_SIZE) {
+        return H_PARAMETER;
+    }
+
+    /* VPA is not allowed to cross a page boundary */
+    if ((vpa / 4096) != ((vpa + size - 1) / 4096)) {
+        return H_PARAMETER;
+    }
+
+    spapr_cpu->vpa_addr = vpa;
+
+    tmp = ldub_phys(cs->as, spapr_cpu->vpa_addr + VPA_SHARED_PROC_OFFSET);
+    tmp |= VPA_SHARED_PROC_VAL;
+    stb_phys(cs->as, spapr_cpu->vpa_addr + VPA_SHARED_PROC_OFFSET, tmp);
+
+    return H_SUCCESS;
+}
+
+static target_ulong deregister_vpa(PowerPCCPU *cpu, target_ulong vpa)
+{
+    SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
+
+    if (spapr_cpu->slb_shadow_addr) {
+        return H_RESOURCE;
+    }
+
+    if (spapr_cpu->dtl_addr) {
+        return H_RESOURCE;
+    }
+
+    spapr_cpu->vpa_addr = 0;
+    return H_SUCCESS;
+}
+
+static target_ulong register_slb_shadow(PowerPCCPU *cpu, target_ulong addr)
+{
+    SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
+    uint32_t size;
+
+    if (addr == 0) {
+        hcall_dprintf("Can't cope with SLB shadow at logical 0\n");
+        return H_HARDWARE;
+    }
+
+    size = ldl_be_phys(CPU(cpu)->as, addr + 0x4);
+    if (size < 0x8) {
+        return H_PARAMETER;
+    }
+
+    if ((addr / 4096) != ((addr + size - 1) / 4096)) {
+        return H_PARAMETER;
+    }
+
+    if (!spapr_cpu->vpa_addr) {
+        return H_RESOURCE;
+    }
+
+    spapr_cpu->slb_shadow_addr = addr;
+    spapr_cpu->slb_shadow_size = size;
+
+    return H_SUCCESS;
+}
+
+static target_ulong deregister_slb_shadow(PowerPCCPU *cpu, target_ulong addr)
+{
+    SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
+
+    spapr_cpu->slb_shadow_addr = 0;
+    spapr_cpu->slb_shadow_size = 0;
+    return H_SUCCESS;
+}
+
+static target_ulong register_dtl(PowerPCCPU *cpu, target_ulong addr)
+{
+    SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
+    uint32_t size;
+
+    if (addr == 0) {
+        hcall_dprintf("Can't cope with DTL at logical 0\n");
+        return H_HARDWARE;
+    }
+
+    size = ldl_be_phys(CPU(cpu)->as, addr + 0x4);
+
+    if (size < 48) {
+        return H_PARAMETER;
+    }
+
+    if (!spapr_cpu->vpa_addr) {
+        return H_RESOURCE;
+    }
+
+    spapr_cpu->dtl_addr = addr;
+    spapr_cpu->dtl_size = size;
+
+    return H_SUCCESS;
+}
+
+static target_ulong deregister_dtl(PowerPCCPU *cpu, target_ulong addr)
+{
+    SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
+
+    spapr_cpu->dtl_addr = 0;
+    spapr_cpu->dtl_size = 0;
+
+    return H_SUCCESS;
+}
+
+static target_ulong h_register_vpa(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                                   target_ulong opcode, target_ulong *args)
+{
+    target_ulong flags = args[0];
+    target_ulong procno = args[1];
+    target_ulong vpa = args[2];
+    target_ulong ret = H_PARAMETER;
+    PowerPCCPU *tcpu;
+
+    tcpu = spapr_find_cpu(procno);
+    if (!tcpu) {
+        return H_PARAMETER;
+    }
+
+    switch (flags) {
+    case FLAGS_REGISTER_VPA:
+        ret = register_vpa(tcpu, vpa);
+        break;
+
+    case FLAGS_DEREGISTER_VPA:
+        ret = deregister_vpa(tcpu, vpa);
+        break;
+
+    case FLAGS_REGISTER_SLBSHADOW:
+        ret = register_slb_shadow(tcpu, vpa);
+        break;
+
+    case FLAGS_DEREGISTER_SLBSHADOW:
+        ret = deregister_slb_shadow(tcpu, vpa);
+        break;
+
+    case FLAGS_REGISTER_DTL:
+        ret = register_dtl(tcpu, vpa);
+        break;
+
+    case FLAGS_DEREGISTER_DTL:
+        ret = deregister_dtl(tcpu, vpa);
+        break;
+    }
+
+    return ret;
+}
+
+static target_ulong h_cede(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                           target_ulong opcode, target_ulong *args)
+{
+    CPUPPCState *env = &cpu->env;
+    CPUState *cs = CPU(cpu);
+    SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
+
+    env->msr |= (1ULL << MSR_EE);
+    hreg_compute_hflags(env);
+
+    if (spapr_cpu->prod) {
+        spapr_cpu->prod = false;
+        return H_SUCCESS;
+    }
+
+    if (!cpu_has_work(cs)) {
+        cs->halted = 1;
+        cs->exception_index = EXCP_HLT;
+        cs->exit_request = 1;
+    }
+
+    return H_SUCCESS;
+}
+
+/*
+ * Confer to self, aka join. Cede could use the same pattern as well, if
+ * EXCP_HLT can be changed to ECXP_HALTED.
+ */
+static target_ulong h_confer_self(PowerPCCPU *cpu)
+{
+    CPUState *cs = CPU(cpu);
+    SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
+
+    if (spapr_cpu->prod) {
+        spapr_cpu->prod = false;
+        return H_SUCCESS;
+    }
+    cs->halted = 1;
+    cs->exception_index = EXCP_HALTED;
+    cs->exit_request = 1;
+
+    return H_SUCCESS;
+}
+
+static target_ulong h_join(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                           target_ulong opcode, target_ulong *args)
+{
+    CPUPPCState *env = &cpu->env;
+    CPUState *cs;
+    bool last_unjoined = true;
+
+    if (env->msr & (1ULL << MSR_EE)) {
+        return H_BAD_MODE;
+    }
+
+    /*
+     * Must not join the last CPU running. Interestingly, no such restriction
+     * for H_CONFER-to-self, but that is probably not intended to be used
+     * when H_JOIN is available.
+     */
+    CPU_FOREACH(cs) {
+        PowerPCCPU *c = POWERPC_CPU(cs);
+        CPUPPCState *e = &c->env;
+        if (c == cpu) {
+            continue;
+        }
+
+        /* Don't have a way to indicate joined, so use halted && MSR[EE]=0 */
+        if (!cs->halted || (e->msr & (1ULL << MSR_EE))) {
+            last_unjoined = false;
+            break;
+        }
+    }
+    if (last_unjoined) {
+        return H_CONTINUE;
+    }
+
+    return h_confer_self(cpu);
+}
+
+static target_ulong h_confer(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                           target_ulong opcode, target_ulong *args)
+{
+    target_long target = args[0];
+    uint32_t dispatch = args[1];
+    CPUState *cs = CPU(cpu);
+    SpaprCpuState *spapr_cpu;
+
+    /*
+     * -1 means confer to all other CPUs without dispatch counter check,
+     *  otherwise it's a targeted confer.
+     */
+    if (target != -1) {
+        PowerPCCPU *target_cpu = spapr_find_cpu(target);
+        uint32_t target_dispatch;
+
+        if (!target_cpu) {
+            return H_PARAMETER;
+        }
+
+        /*
+         * target == self is a special case, we wait until prodded, without
+         * dispatch counter check.
+         */
+        if (cpu == target_cpu) {
+            return h_confer_self(cpu);
+        }
+
+        spapr_cpu = spapr_cpu_state(target_cpu);
+        if (!spapr_cpu->vpa_addr || ((dispatch & 1) == 0)) {
+            return H_SUCCESS;
+        }
+
+        target_dispatch = ldl_be_phys(cs->as,
+                                  spapr_cpu->vpa_addr + VPA_DISPATCH_COUNTER);
+        if (target_dispatch != dispatch) {
+            return H_SUCCESS;
+        }
+
+        /*
+         * The targeted confer does not do anything special beyond yielding
+         * the current vCPU, but even this should be better than nothing.
+         * At least for single-threaded tcg, it gives the target a chance to
+         * run before we run again. Multi-threaded tcg does not really do
+         * anything with EXCP_YIELD yet.
+         */
+    }
+
+    cs->exception_index = EXCP_YIELD;
+    cs->exit_request = 1;
+    cpu_loop_exit(cs);
+
+    return H_SUCCESS;
+}
+
+static target_ulong h_prod(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                           target_ulong opcode, target_ulong *args)
+{
+    target_long target = args[0];
+    PowerPCCPU *tcpu;
+    CPUState *cs;
+    SpaprCpuState *spapr_cpu;
+
+    tcpu = spapr_find_cpu(target);
+    cs = CPU(tcpu);
+    if (!cs) {
+        return H_PARAMETER;
+    }
+
+    spapr_cpu = spapr_cpu_state(tcpu);
+    spapr_cpu->prod = true;
+    cs->halted = 0;
+    qemu_cpu_kick(cs);
+
+    return H_SUCCESS;
+}
+
+static target_ulong h_rtas(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                           target_ulong opcode, target_ulong *args)
+{
+    target_ulong rtas_r3 = args[0];
+    uint32_t token = rtas_ld(rtas_r3, 0);
+    uint32_t nargs = rtas_ld(rtas_r3, 1);
+    uint32_t nret = rtas_ld(rtas_r3, 2);
+
+    return spapr_rtas_call(cpu, spapr, token, nargs, rtas_r3 + 12,
+                           nret, rtas_r3 + 12 + 4*nargs);
+}
+
+static target_ulong h_logical_load(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                                   target_ulong opcode, target_ulong *args)
+{
+    CPUState *cs = CPU(cpu);
+    target_ulong size = args[0];
+    target_ulong addr = args[1];
+
+    switch (size) {
+    case 1:
+        args[0] = ldub_phys(cs->as, addr);
+        return H_SUCCESS;
+    case 2:
+        args[0] = lduw_phys(cs->as, addr);
+        return H_SUCCESS;
+    case 4:
+        args[0] = ldl_phys(cs->as, addr);
+        return H_SUCCESS;
+    case 8:
+        args[0] = ldq_phys(cs->as, addr);
+        return H_SUCCESS;
+    }
+    return H_PARAMETER;
+}
+
+static target_ulong h_logical_store(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                                    target_ulong opcode, target_ulong *args)
+{
+    CPUState *cs = CPU(cpu);
+
+    target_ulong size = args[0];
+    target_ulong addr = args[1];
+    target_ulong val  = args[2];
+
+    switch (size) {
+    case 1:
+        stb_phys(cs->as, addr, val);
+        return H_SUCCESS;
+    case 2:
+        stw_phys(cs->as, addr, val);
+        return H_SUCCESS;
+    case 4:
+        stl_phys(cs->as, addr, val);
+        return H_SUCCESS;
+    case 8:
+        stq_phys(cs->as, addr, val);
+        return H_SUCCESS;
+    }
+    return H_PARAMETER;
+}
+
+static target_ulong h_logical_memop(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                                    target_ulong opcode, target_ulong *args)
+{
+    CPUState *cs = CPU(cpu);
+
+    target_ulong dst   = args[0]; /* Destination address */
+    target_ulong src   = args[1]; /* Source address */
+    target_ulong esize = args[2]; /* Element size (0=1,1=2,2=4,3=8) */
+    target_ulong count = args[3]; /* Element count */
+    target_ulong op    = args[4]; /* 0 = copy, 1 = invert */
+    uint64_t tmp;
+    unsigned int mask = (1 << esize) - 1;
+    int step = 1 << esize;
+
+    if (count > 0x80000000) {
+        return H_PARAMETER;
+    }
+
+    if ((dst & mask) || (src & mask) || (op > 1)) {
+        return H_PARAMETER;
+    }
+
+    if (dst >= src && dst < (src + (count << esize))) {
+            dst = dst + ((count - 1) << esize);
+            src = src + ((count - 1) << esize);
+            step = -step;
+    }
+
+    while (count--) {
+        switch (esize) {
+        case 0:
+            tmp = ldub_phys(cs->as, src);
+            break;
+        case 1:
+            tmp = lduw_phys(cs->as, src);
+            break;
+        case 2:
+            tmp = ldl_phys(cs->as, src);
+            break;
+        case 3:
+            tmp = ldq_phys(cs->as, src);
+            break;
+        default:
+            return H_PARAMETER;
+        }
+        if (op == 1) {
+            tmp = ~tmp;
+        }
+        switch (esize) {
+        case 0:
+            stb_phys(cs->as, dst, tmp);
+            break;
+        case 1:
+            stw_phys(cs->as, dst, tmp);
+            break;
+        case 2:
+            stl_phys(cs->as, dst, tmp);
+            break;
+        case 3:
+            stq_phys(cs->as, dst, tmp);
+            break;
+        }
+        dst = dst + step;
+        src = src + step;
+    }
+
+    return H_SUCCESS;
+}
+
+static target_ulong h_logical_icbi(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                                   target_ulong opcode, target_ulong *args)
+{
+    /* Nothing to do on emulation, KVM will trap this in the kernel */
+    return H_SUCCESS;
+}
+
+static target_ulong h_logical_dcbf(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                                   target_ulong opcode, target_ulong *args)
+{
+    /* Nothing to do on emulation, KVM will trap this in the kernel */
+    return H_SUCCESS;
+}
+
+static target_ulong h_set_mode_resource_le(PowerPCCPU *cpu,
+                                           SpaprMachineState *spapr,
+                                           target_ulong mflags,
+                                           target_ulong value1,
+                                           target_ulong value2)
+{
+    if (value1) {
+        return H_P3;
+    }
+    if (value2) {
+        return H_P4;
+    }
+
+    switch (mflags) {
+    case H_SET_MODE_ENDIAN_BIG:
+        spapr_set_all_lpcrs(0, LPCR_ILE);
+        spapr_pci_switch_vga(spapr, true);
+        return H_SUCCESS;
+
+    case H_SET_MODE_ENDIAN_LITTLE:
+        spapr_set_all_lpcrs(LPCR_ILE, LPCR_ILE);
+        spapr_pci_switch_vga(spapr, false);
+        return H_SUCCESS;
+    }
+
+    return H_UNSUPPORTED_FLAG;
+}
+
+static target_ulong h_set_mode_resource_addr_trans_mode(PowerPCCPU *cpu,
+                                                        target_ulong mflags,
+                                                        target_ulong value1,
+                                                        target_ulong value2)
+{
+    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
+
+    if (!(pcc->insns_flags2 & PPC2_ISA207S)) {
+        return H_P2;
+    }
+    if (value1) {
+        return H_P3;
+    }
+    if (value2) {
+        return H_P4;
+    }
+
+    if (mflags == 1) {
+        /* AIL=1 is reserved in POWER8/POWER9/POWER10 */
+        return H_UNSUPPORTED_FLAG;
+    }
+
+    if (mflags == 2 && (pcc->insns_flags2 & PPC2_ISA310)) {
+        /* AIL=2 is reserved in POWER10 (ISA v3.1) */
+        return H_UNSUPPORTED_FLAG;
+    }
+
+    spapr_set_all_lpcrs(mflags << LPCR_AIL_SHIFT, LPCR_AIL);
+
+    return H_SUCCESS;
+}
+
+static target_ulong h_set_mode(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                               target_ulong opcode, target_ulong *args)
+{
+    target_ulong resource = args[1];
+    target_ulong ret = H_P2;
+
+    switch (resource) {
+    case H_SET_MODE_RESOURCE_LE:
+        ret = h_set_mode_resource_le(cpu, spapr, args[0], args[2], args[3]);
+        break;
+    case H_SET_MODE_RESOURCE_ADDR_TRANS_MODE:
+        ret = h_set_mode_resource_addr_trans_mode(cpu, args[0],
+                                                  args[2], args[3]);
+        break;
+    }
+
+    return ret;
+}
+
+static target_ulong h_clean_slb(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                                target_ulong opcode, target_ulong *args)
+{
+    qemu_log_mask(LOG_UNIMP, "Unimplemented SPAPR hcall 0x"TARGET_FMT_lx"%s\n",
+                  opcode, " (H_CLEAN_SLB)");
+    return H_FUNCTION;
+}
+
+static target_ulong h_invalidate_pid(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                                     target_ulong opcode, target_ulong *args)
+{
+    qemu_log_mask(LOG_UNIMP, "Unimplemented SPAPR hcall 0x"TARGET_FMT_lx"%s\n",
+                  opcode, " (H_INVALIDATE_PID)");
+    return H_FUNCTION;
+}
+
+static void spapr_check_setup_free_hpt(SpaprMachineState *spapr,
+                                       uint64_t patbe_old, uint64_t patbe_new)
+{
+    /*
+     * We have 4 Options:
+     * HASH->HASH || RADIX->RADIX || NOTHING->RADIX : Do Nothing
+     * HASH->RADIX                                  : Free HPT
+     * RADIX->HASH                                  : Allocate HPT
+     * NOTHING->HASH                                : Allocate HPT
+     * Note: NOTHING implies the case where we said the guest could choose
+     *       later and so assumed radix and now it's called H_REG_PROC_TBL
+     */
+
+    if ((patbe_old & PATE1_GR) == (patbe_new & PATE1_GR)) {
+        /* We assume RADIX, so this catches all the "Do Nothing" cases */
+    } else if (!(patbe_old & PATE1_GR)) {
+        /* HASH->RADIX : Free HPT */
+        spapr_free_hpt(spapr);
+    } else if (!(patbe_new & PATE1_GR)) {
+        /* RADIX->HASH || NOTHING->HASH : Allocate HPT */
+        spapr_setup_hpt(spapr);
+    }
+    return;
+}
+
+#define FLAGS_MASK              0x01FULL
+#define FLAG_MODIFY             0x10
+#define FLAG_REGISTER           0x08
+#define FLAG_RADIX              0x04
+#define FLAG_HASH_PROC_TBL      0x02
+#define FLAG_GTSE               0x01
+
+static target_ulong h_register_process_table(PowerPCCPU *cpu,
+                                             SpaprMachineState *spapr,
+                                             target_ulong opcode,
+                                             target_ulong *args)
+{
+    target_ulong flags = args[0];
+    target_ulong proc_tbl = args[1];
+    target_ulong page_size = args[2];
+    target_ulong table_size = args[3];
+    target_ulong update_lpcr = 0;
+    uint64_t cproc;
+
+    if (flags & ~FLAGS_MASK) { /* Check no reserved bits are set */
+        return H_PARAMETER;
+    }
+    if (flags & FLAG_MODIFY) {
+        if (flags & FLAG_REGISTER) {
+            if (flags & FLAG_RADIX) { /* Register new RADIX process table */
+                if (proc_tbl & 0xfff || proc_tbl >> 60) {
+                    return H_P2;
+                } else if (page_size) {
+                    return H_P3;
+                } else if (table_size > 24) {
+                    return H_P4;
+                }
+                cproc = PATE1_GR | proc_tbl | table_size;
+            } else { /* Register new HPT process table */
+                if (flags & FLAG_HASH_PROC_TBL) { /* Hash with Segment Tables */
+                    /* TODO - Not Supported */
+                    /* Technically caused by flag bits => H_PARAMETER */
+                    return H_PARAMETER;
+                } else { /* Hash with SLB */
+                    if (proc_tbl >> 38) {
+                        return H_P2;
+                    } else if (page_size & ~0x7) {
+                        return H_P3;
+                    } else if (table_size > 24) {
+                        return H_P4;
+                    }
+                }
+                cproc = (proc_tbl << 25) | page_size << 5 | table_size;
+            }
+
+        } else { /* Deregister current process table */
+            /*
+             * Set to benign value: (current GR) | 0. This allows
+             * deregistration in KVM to succeed even if the radix bit
+             * in flags doesn't match the radix bit in the old PATE.
+             */
+            cproc = spapr->patb_entry & PATE1_GR;
+        }
+    } else { /* Maintain current registration */
+        if (!(flags & FLAG_RADIX) != !(spapr->patb_entry & PATE1_GR)) {
+            /* Technically caused by flag bits => H_PARAMETER */
+            return H_PARAMETER; /* Existing Process Table Mismatch */
+        }
+        cproc = spapr->patb_entry;
+    }
+
+    /* Check if we need to setup OR free the hpt */
+    spapr_check_setup_free_hpt(spapr, spapr->patb_entry, cproc);
+
+    spapr->patb_entry = cproc; /* Save new process table */
+
+    /* Update the UPRT, HR and GTSE bits in the LPCR for all cpus */
+    if (flags & FLAG_RADIX)     /* Radix must use process tables, also set HR */
+        update_lpcr |= (LPCR_UPRT | LPCR_HR);
+    else if (flags & FLAG_HASH_PROC_TBL) /* Hash with process tables */
+        update_lpcr |= LPCR_UPRT;
+    if (flags & FLAG_GTSE)      /* Guest translation shootdown enable */
+        update_lpcr |= LPCR_GTSE;
+
+    spapr_set_all_lpcrs(update_lpcr, LPCR_UPRT | LPCR_HR | LPCR_GTSE);
+
+    if (kvm_enabled()) {
+        return kvmppc_configure_v3_mmu(cpu, flags & FLAG_RADIX,
+                                       flags & FLAG_GTSE, cproc);
+    }
+    return H_SUCCESS;
+}
+
+#define H_SIGNAL_SYS_RESET_ALL         -1
+#define H_SIGNAL_SYS_RESET_ALLBUTSELF  -2
+
+static target_ulong h_signal_sys_reset(PowerPCCPU *cpu,
+                                       SpaprMachineState *spapr,
+                                       target_ulong opcode, target_ulong *args)
+{
+    target_long target = args[0];
+    CPUState *cs;
+
+    if (target < 0) {
+        /* Broadcast */
+        if (target < H_SIGNAL_SYS_RESET_ALLBUTSELF) {
+            return H_PARAMETER;
+        }
+
+        CPU_FOREACH(cs) {
+            PowerPCCPU *c = POWERPC_CPU(cs);
+
+            if (target == H_SIGNAL_SYS_RESET_ALLBUTSELF) {
+                if (c == cpu) {
+                    continue;
+                }
+            }
+            run_on_cpu(cs, spapr_do_system_reset_on_cpu, RUN_ON_CPU_NULL);
+        }
+        return H_SUCCESS;
+
+    } else {
+        /* Unicast */
+        cs = CPU(spapr_find_cpu(target));
+        if (cs) {
+            run_on_cpu(cs, spapr_do_system_reset_on_cpu, RUN_ON_CPU_NULL);
+            return H_SUCCESS;
+        }
+        return H_PARAMETER;
+    }
+}
+
+/* Returns either a logical PVR or zero if none was found */
+static uint32_t cas_check_pvr(PowerPCCPU *cpu, uint32_t max_compat,
+                              target_ulong *addr, bool *raw_mode_supported)
+{
+    bool explicit_match = false; /* Matched the CPU's real PVR */
+    uint32_t best_compat = 0;
+    int i;
+
+    /*
+     * We scan the supplied table of PVRs looking for two things
+     *   1. Is our real CPU PVR in the list?
+     *   2. What's the "best" listed logical PVR
+     */
+    for (i = 0; i < 512; ++i) {
+        uint32_t pvr, pvr_mask;
+
+        pvr_mask = ldl_be_phys(&address_space_memory, *addr);
+        pvr = ldl_be_phys(&address_space_memory, *addr + 4);
+        *addr += 8;
+
+        if (~pvr_mask & pvr) {
+            break; /* Terminator record */
+        }
+
+        if ((cpu->env.spr[SPR_PVR] & pvr_mask) == (pvr & pvr_mask)) {
+            explicit_match = true;
+        } else {
+            if (ppc_check_compat(cpu, pvr, best_compat, max_compat)) {
+                best_compat = pvr;
+            }
+        }
+    }
+
+    *raw_mode_supported = explicit_match;
+
+    /* Parsing finished */
+    trace_spapr_cas_pvr(cpu->compat_pvr, explicit_match, best_compat);
+
+    return best_compat;
+}
+
+static
+target_ulong do_client_architecture_support(PowerPCCPU *cpu,
+                                            SpaprMachineState *spapr,
+                                            target_ulong vec,
+                                            target_ulong fdt_bufsize)
+{
+    target_ulong ov_table; /* Working address in data buffer */
+    uint32_t cas_pvr;
+    SpaprOptionVector *ov1_guest, *ov5_guest;
+    bool guest_radix;
+    bool raw_mode_supported = false;
+    bool guest_xive;
+    CPUState *cs;
+    void *fdt;
+    uint32_t max_compat = spapr->max_compat_pvr;
+
+    /* CAS is supposed to be called early when only the boot vCPU is active. */
+    CPU_FOREACH(cs) {
+        if (cs == CPU(cpu)) {
+            continue;
+        }
+        if (!cs->halted) {
+            warn_report("guest has multiple active vCPUs at CAS, which is not allowed");
+            return H_MULTI_THREADS_ACTIVE;
+        }
+    }
+
+    cas_pvr = cas_check_pvr(cpu, max_compat, &vec, &raw_mode_supported);
+    if (!cas_pvr && (!raw_mode_supported || max_compat)) {
+        /*
+         * We couldn't find a suitable compatibility mode, and either
+         * the guest doesn't support "raw" mode for this CPU, or "raw"
+         * mode is disabled because a maximum compat mode is set.
+         */
+        error_report("Couldn't negotiate a suitable PVR during CAS");
+        return H_HARDWARE;
+    }
+
+    /* Update CPUs */
+    if (cpu->compat_pvr != cas_pvr) {
+        Error *local_err = NULL;
+
+        if (ppc_set_compat_all(cas_pvr, &local_err) < 0) {
+            /* We fail to set compat mode (likely because running with KVM PR),
+             * but maybe we can fallback to raw mode if the guest supports it.
+             */
+            if (!raw_mode_supported) {
+                error_report_err(local_err);
+                return H_HARDWARE;
+            }
+            error_free(local_err);
+        }
+    }
+
+    /* For the future use: here @ov_table points to the first option vector */
+    ov_table = vec;
+
+    ov1_guest = spapr_ovec_parse_vector(ov_table, 1);
+    if (!ov1_guest) {
+        warn_report("guest didn't provide option vector 1");
+        return H_PARAMETER;
+    }
+    ov5_guest = spapr_ovec_parse_vector(ov_table, 5);
+    if (!ov5_guest) {
+        spapr_ovec_cleanup(ov1_guest);
+        warn_report("guest didn't provide option vector 5");
+        return H_PARAMETER;
+    }
+    if (spapr_ovec_test(ov5_guest, OV5_MMU_BOTH)) {
+        error_report("guest requested hash and radix MMU, which is invalid.");
+        exit(EXIT_FAILURE);
+    }
+    if (spapr_ovec_test(ov5_guest, OV5_XIVE_BOTH)) {
+        error_report("guest requested an invalid interrupt mode");
+        exit(EXIT_FAILURE);
+    }
+
+    guest_radix = spapr_ovec_test(ov5_guest, OV5_MMU_RADIX_300);
+
+    guest_xive = spapr_ovec_test(ov5_guest, OV5_XIVE_EXPLOIT);
+
+    /*
+     * HPT resizing is a bit of a special case, because when enabled
+     * we assume an HPT guest will support it until it says it
+     * doesn't, instead of assuming it won't support it until it says
+     * it does.  Strictly speaking that approach could break for
+     * guests which don't make a CAS call, but those are so old we
+     * don't care about them.  Without that assumption we'd have to
+     * make at least a temporary allocation of an HPT sized for max
+     * memory, which could be impossibly difficult under KVM HV if
+     * maxram is large.
+     */
+    if (!guest_radix && !spapr_ovec_test(ov5_guest, OV5_HPT_RESIZE)) {
+        int maxshift = spapr_hpt_shift_for_ramsize(MACHINE(spapr)->maxram_size);
+
+        if (spapr->resize_hpt == SPAPR_RESIZE_HPT_REQUIRED) {
+            error_report(
+                "h_client_architecture_support: Guest doesn't support HPT resizing, but resize-hpt=required");
+            exit(1);
+        }
+
+        if (spapr->htab_shift < maxshift) {
+            /* Guest doesn't know about HPT resizing, so we
+             * pre-emptively resize for the maximum permitted RAM.  At
+             * the point this is called, nothing should have been
+             * entered into the existing HPT */
+            spapr_reallocate_hpt(spapr, maxshift, &error_fatal);
+            push_sregs_to_kvm_pr(spapr);
+        }
+    }
+
+    /* NOTE: there are actually a number of ov5 bits where input from the
+     * guest is always zero, and the platform/QEMU enables them independently
+     * of guest input. To model these properly we'd want some sort of mask,
+     * but since they only currently apply to memory migration as defined
+     * by LoPAPR 1.1, 14.5.4.8, which QEMU doesn't implement, we don't need
+     * to worry about this for now.
+     */
+
+    /* full range of negotiated ov5 capabilities */
+    spapr_ovec_intersect(spapr->ov5_cas, spapr->ov5, ov5_guest);
+    spapr_ovec_cleanup(ov5_guest);
+
+    spapr_check_mmu_mode(guest_radix);
+
+    spapr->cas_pre_isa3_guest = !spapr_ovec_test(ov1_guest, OV1_PPC_3_00);
+    spapr_ovec_cleanup(ov1_guest);
+
+    /*
+     * Check for NUMA affinity conditions now that we know which NUMA
+     * affinity the guest will use.
+     */
+    spapr_numa_associativity_check(spapr);
+
+    /*
+     * Ensure the guest asks for an interrupt mode we support;
+     * otherwise terminate the boot.
+     */
+    if (guest_xive) {
+        if (!spapr->irq->xive) {
+            error_report(
+"Guest requested unavailable interrupt mode (XIVE), try the ic-mode=xive or ic-mode=dual machine property");
+            exit(EXIT_FAILURE);
+        }
+    } else {
+        if (!spapr->irq->xics) {
+            error_report(
+"Guest requested unavailable interrupt mode (XICS), either don't set the ic-mode machine property or try ic-mode=xics or ic-mode=dual");
+            exit(EXIT_FAILURE);
+        }
+    }
+
+    spapr_irq_update_active_intc(spapr);
+
+    /*
+     * Process all pending hot-plug/unplug requests now. An updated full
+     * rendered FDT will be returned to the guest.
+     */
+    spapr_drc_reset_all(spapr);
+    spapr_clear_pending_hotplug_events(spapr);
+
+    /*
+     * If spapr_machine_reset() did not set up a HPT but one is necessary
+     * (because the guest isn't going to use radix) then set it up here.
+     */
+    if ((spapr->patb_entry & PATE1_GR) && !guest_radix) {
+        /* legacy hash or new hash: */
+        spapr_setup_hpt(spapr);
+    }
+
+    fdt = spapr_build_fdt(spapr, spapr->vof != NULL, fdt_bufsize);
+    g_free(spapr->fdt_blob);
+    spapr->fdt_size = fdt_totalsize(fdt);
+    spapr->fdt_initial_size = spapr->fdt_size;
+    spapr->fdt_blob = fdt;
+
+    return H_SUCCESS;
+}
+
+static target_ulong h_client_architecture_support(PowerPCCPU *cpu,
+                                                  SpaprMachineState *spapr,
+                                                  target_ulong opcode,
+                                                  target_ulong *args)
+{
+    target_ulong vec = ppc64_phys_to_real(args[0]);
+    target_ulong fdt_buf = args[1];
+    target_ulong fdt_bufsize = args[2];
+    target_ulong ret;
+    SpaprDeviceTreeUpdateHeader hdr = { .version_id = 1 };
+
+    if (fdt_bufsize < sizeof(hdr)) {
+        error_report("SLOF provided insufficient CAS buffer "
+                     TARGET_FMT_lu " (min: %zu)", fdt_bufsize, sizeof(hdr));
+        exit(EXIT_FAILURE);
+    }
+
+    fdt_bufsize -= sizeof(hdr);
+
+    ret = do_client_architecture_support(cpu, spapr, vec, fdt_bufsize);
+    if (ret == H_SUCCESS) {
+        _FDT((fdt_pack(spapr->fdt_blob)));
+        spapr->fdt_size = fdt_totalsize(spapr->fdt_blob);
+        spapr->fdt_initial_size = spapr->fdt_size;
+
+        cpu_physical_memory_write(fdt_buf, &hdr, sizeof(hdr));
+        cpu_physical_memory_write(fdt_buf + sizeof(hdr), spapr->fdt_blob,
+                                  spapr->fdt_size);
+        trace_spapr_cas_continue(spapr->fdt_size + sizeof(hdr));
+    }
+
+    return ret;
+}
+
+target_ulong spapr_vof_client_architecture_support(MachineState *ms,
+                                                   CPUState *cs,
+                                                   target_ulong ovec_addr)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(ms);
+
+    target_ulong ret = do_client_architecture_support(POWERPC_CPU(cs), spapr,
+                                                      ovec_addr, FDT_MAX_SIZE);
+
+    /*
+     * This adds stdout and generates phandles for boottime and CAS FDTs.
+     * It is alright to update the FDT here as do_client_architecture_support()
+     * does not pack it.
+     */
+    spapr_vof_client_dt_finalize(spapr, spapr->fdt_blob);
+
+    return ret;
+}
+
+static target_ulong h_get_cpu_characteristics(PowerPCCPU *cpu,
+                                              SpaprMachineState *spapr,
+                                              target_ulong opcode,
+                                              target_ulong *args)
+{
+    uint64_t characteristics = H_CPU_CHAR_HON_BRANCH_HINTS &
+                               ~H_CPU_CHAR_THR_RECONF_TRIG;
+    uint64_t behaviour = H_CPU_BEHAV_FAVOUR_SECURITY;
+    uint8_t safe_cache = spapr_get_cap(spapr, SPAPR_CAP_CFPC);
+    uint8_t safe_bounds_check = spapr_get_cap(spapr, SPAPR_CAP_SBBC);
+    uint8_t safe_indirect_branch = spapr_get_cap(spapr, SPAPR_CAP_IBS);
+    uint8_t count_cache_flush_assist = spapr_get_cap(spapr,
+                                                     SPAPR_CAP_CCF_ASSIST);
+
+    switch (safe_cache) {
+    case SPAPR_CAP_WORKAROUND:
+        characteristics |= H_CPU_CHAR_L1D_FLUSH_ORI30;
+        characteristics |= H_CPU_CHAR_L1D_FLUSH_TRIG2;
+        characteristics |= H_CPU_CHAR_L1D_THREAD_PRIV;
+        behaviour |= H_CPU_BEHAV_L1D_FLUSH_PR;
+        break;
+    case SPAPR_CAP_FIXED:
+        behaviour |= H_CPU_BEHAV_NO_L1D_FLUSH_ENTRY;
+        behaviour |= H_CPU_BEHAV_NO_L1D_FLUSH_UACCESS;
+        break;
+    default: /* broken */
+        assert(safe_cache == SPAPR_CAP_BROKEN);
+        behaviour |= H_CPU_BEHAV_L1D_FLUSH_PR;
+        break;
+    }
+
+    switch (safe_bounds_check) {
+    case SPAPR_CAP_WORKAROUND:
+        characteristics |= H_CPU_CHAR_SPEC_BAR_ORI31;
+        behaviour |= H_CPU_BEHAV_BNDS_CHK_SPEC_BAR;
+        break;
+    case SPAPR_CAP_FIXED:
+        break;
+    default: /* broken */
+        assert(safe_bounds_check == SPAPR_CAP_BROKEN);
+        behaviour |= H_CPU_BEHAV_BNDS_CHK_SPEC_BAR;
+        break;
+    }
+
+    switch (safe_indirect_branch) {
+    case SPAPR_CAP_FIXED_NA:
+        break;
+    case SPAPR_CAP_FIXED_CCD:
+        characteristics |= H_CPU_CHAR_CACHE_COUNT_DIS;
+        break;
+    case SPAPR_CAP_FIXED_IBS:
+        characteristics |= H_CPU_CHAR_BCCTRL_SERIALISED;
+        break;
+    case SPAPR_CAP_WORKAROUND:
+        behaviour |= H_CPU_BEHAV_FLUSH_COUNT_CACHE;
+        if (count_cache_flush_assist) {
+            characteristics |= H_CPU_CHAR_BCCTR_FLUSH_ASSIST;
+        }
+        break;
+    default: /* broken */
+        assert(safe_indirect_branch == SPAPR_CAP_BROKEN);
+        break;
+    }
+
+    args[0] = characteristics;
+    args[1] = behaviour;
+    return H_SUCCESS;
+}
+
+static target_ulong h_update_dt(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                                target_ulong opcode, target_ulong *args)
+{
+    target_ulong dt = ppc64_phys_to_real(args[0]);
+    struct fdt_header hdr = { 0 };
+    unsigned cb;
+    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+    void *fdt;
+
+    cpu_physical_memory_read(dt, &hdr, sizeof(hdr));
+    cb = fdt32_to_cpu(hdr.totalsize);
+
+    if (!smc->update_dt_enabled) {
+        return H_SUCCESS;
+    }
+
+    /* Check that the fdt did not grow out of proportion */
+    if (cb > spapr->fdt_initial_size * 2) {
+        trace_spapr_update_dt_failed_size(spapr->fdt_initial_size, cb,
+                                          fdt32_to_cpu(hdr.magic));
+        return H_PARAMETER;
+    }
+
+    fdt = g_malloc0(cb);
+    cpu_physical_memory_read(dt, fdt, cb);
+
+    /* Check the fdt consistency */
+    if (fdt_check_full(fdt, cb)) {
+        trace_spapr_update_dt_failed_check(spapr->fdt_initial_size, cb,
+                                           fdt32_to_cpu(hdr.magic));
+        return H_PARAMETER;
+    }
+
+    g_free(spapr->fdt_blob);
+    spapr->fdt_size = cb;
+    spapr->fdt_blob = fdt;
+    trace_spapr_update_dt(cb);
+
+    return H_SUCCESS;
+}
+
+static spapr_hcall_fn papr_hypercall_table[(MAX_HCALL_OPCODE / 4) + 1];
+static spapr_hcall_fn kvmppc_hypercall_table[KVMPPC_HCALL_MAX - KVMPPC_HCALL_BASE + 1];
+static spapr_hcall_fn svm_hypercall_table[(SVM_HCALL_MAX - SVM_HCALL_BASE) / 4 + 1];
+
+void spapr_register_hypercall(target_ulong opcode, spapr_hcall_fn fn)
+{
+    spapr_hcall_fn *slot;
+
+    if (opcode <= MAX_HCALL_OPCODE) {
+        assert((opcode & 0x3) == 0);
+
+        slot = &papr_hypercall_table[opcode / 4];
+    } else if (opcode >= SVM_HCALL_BASE && opcode <= SVM_HCALL_MAX) {
+        /* we only have SVM-related hcall numbers assigned in multiples of 4 */
+        assert((opcode & 0x3) == 0);
+
+        slot = &svm_hypercall_table[(opcode - SVM_HCALL_BASE) / 4];
+    } else {
+        assert((opcode >= KVMPPC_HCALL_BASE) && (opcode <= KVMPPC_HCALL_MAX));
+
+        slot = &kvmppc_hypercall_table[opcode - KVMPPC_HCALL_BASE];
+    }
+
+    assert(!(*slot));
+    *slot = fn;
+}
+
+target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode,
+                             target_ulong *args)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+
+    if ((opcode <= MAX_HCALL_OPCODE)
+        && ((opcode & 0x3) == 0)) {
+        spapr_hcall_fn fn = papr_hypercall_table[opcode / 4];
+
+        if (fn) {
+            return fn(cpu, spapr, opcode, args);
+        }
+    } else if ((opcode >= SVM_HCALL_BASE) &&
+               (opcode <= SVM_HCALL_MAX)) {
+        spapr_hcall_fn fn = svm_hypercall_table[(opcode - SVM_HCALL_BASE) / 4];
+
+        if (fn) {
+            return fn(cpu, spapr, opcode, args);
+        }
+    } else if ((opcode >= KVMPPC_HCALL_BASE) &&
+               (opcode <= KVMPPC_HCALL_MAX)) {
+        spapr_hcall_fn fn = kvmppc_hypercall_table[opcode - KVMPPC_HCALL_BASE];
+
+        if (fn) {
+            return fn(cpu, spapr, opcode, args);
+        }
+    }
+
+    qemu_log_mask(LOG_UNIMP, "Unimplemented SPAPR hcall 0x" TARGET_FMT_lx "\n",
+                  opcode);
+    return H_FUNCTION;
+}
+
+#ifndef CONFIG_TCG
+static target_ulong h_softmmu(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                            target_ulong opcode, target_ulong *args)
+{
+    g_assert_not_reached();
+}
+
+static void hypercall_register_softmmu(void)
+{
+    /* hcall-pft */
+    spapr_register_hypercall(H_ENTER, h_softmmu);
+    spapr_register_hypercall(H_REMOVE, h_softmmu);
+    spapr_register_hypercall(H_PROTECT, h_softmmu);
+    spapr_register_hypercall(H_READ, h_softmmu);
+
+    /* hcall-bulk */
+    spapr_register_hypercall(H_BULK_REMOVE, h_softmmu);
+}
+#else
+static void hypercall_register_softmmu(void)
+{
+    /* DO NOTHING */
+}
+#endif
+
+static void hypercall_register_types(void)
+{
+    hypercall_register_softmmu();
+
+    /* hcall-hpt-resize */
+    spapr_register_hypercall(H_RESIZE_HPT_PREPARE, h_resize_hpt_prepare);
+    spapr_register_hypercall(H_RESIZE_HPT_COMMIT, h_resize_hpt_commit);
+
+    /* hcall-splpar */
+    spapr_register_hypercall(H_REGISTER_VPA, h_register_vpa);
+    spapr_register_hypercall(H_CEDE, h_cede);
+    spapr_register_hypercall(H_CONFER, h_confer);
+    spapr_register_hypercall(H_PROD, h_prod);
+
+    /* hcall-join */
+    spapr_register_hypercall(H_JOIN, h_join);
+
+    spapr_register_hypercall(H_SIGNAL_SYS_RESET, h_signal_sys_reset);
+
+    /* processor register resource access h-calls */
+    spapr_register_hypercall(H_SET_SPRG0, h_set_sprg0);
+    spapr_register_hypercall(H_SET_DABR, h_set_dabr);
+    spapr_register_hypercall(H_SET_XDABR, h_set_xdabr);
+    spapr_register_hypercall(H_PAGE_INIT, h_page_init);
+    spapr_register_hypercall(H_SET_MODE, h_set_mode);
+
+    /* In Memory Table MMU h-calls */
+    spapr_register_hypercall(H_CLEAN_SLB, h_clean_slb);
+    spapr_register_hypercall(H_INVALIDATE_PID, h_invalidate_pid);
+    spapr_register_hypercall(H_REGISTER_PROC_TBL, h_register_process_table);
+
+    /* hcall-get-cpu-characteristics */
+    spapr_register_hypercall(H_GET_CPU_CHARACTERISTICS,
+                             h_get_cpu_characteristics);
+
+    /* "debugger" hcalls (also used by SLOF). Note: We do -not- differenciate
+     * here between the "CI" and the "CACHE" variants, they will use whatever
+     * mapping attributes qemu is using. When using KVM, the kernel will
+     * enforce the attributes more strongly
+     */
+    spapr_register_hypercall(H_LOGICAL_CI_LOAD, h_logical_load);
+    spapr_register_hypercall(H_LOGICAL_CI_STORE, h_logical_store);
+    spapr_register_hypercall(H_LOGICAL_CACHE_LOAD, h_logical_load);
+    spapr_register_hypercall(H_LOGICAL_CACHE_STORE, h_logical_store);
+    spapr_register_hypercall(H_LOGICAL_ICBI, h_logical_icbi);
+    spapr_register_hypercall(H_LOGICAL_DCBF, h_logical_dcbf);
+    spapr_register_hypercall(KVMPPC_H_LOGICAL_MEMOP, h_logical_memop);
+
+    /* qemu/KVM-PPC specific hcalls */
+    spapr_register_hypercall(KVMPPC_H_RTAS, h_rtas);
+
+    /* ibm,client-architecture-support support */
+    spapr_register_hypercall(KVMPPC_H_CAS, h_client_architecture_support);
+
+    spapr_register_hypercall(KVMPPC_H_UPDATE_DT, h_update_dt);
+}
+
+type_init(hypercall_register_types)
diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
new file mode 100644
index 000000000..db0107185
--- /dev/null
+++ b/hw/ppc/spapr_iommu.c
@@ -0,0 +1,718 @@
+/*
+ * QEMU sPAPR IOMMU (TCE) code
+ *
+ * Copyright (c) 2010 David Gibson, IBM Corporation <dwg@au1.ibm.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "qemu/log.h"
+#include "qemu/module.h"
+#include "sysemu/kvm.h"
+#include "kvm_ppc.h"
+#include "migration/vmstate.h"
+#include "sysemu/dma.h"
+#include "trace.h"
+
+#include "hw/ppc/spapr.h"
+#include "hw/ppc/spapr_vio.h"
+
+#include <libfdt.h>
+
+enum SpaprTceAccess {
+    SPAPR_TCE_FAULT = 0,
+    SPAPR_TCE_RO = 1,
+    SPAPR_TCE_WO = 2,
+    SPAPR_TCE_RW = 3,
+};
+
+#define IOMMU_PAGE_SIZE(shift)      (1ULL << (shift))
+#define IOMMU_PAGE_MASK(shift)      (~(IOMMU_PAGE_SIZE(shift) - 1))
+
+static QLIST_HEAD(, SpaprTceTable) spapr_tce_tables;
+
+SpaprTceTable *spapr_tce_find_by_liobn(target_ulong liobn)
+{
+    SpaprTceTable *tcet;
+
+    if (liobn & 0xFFFFFFFF00000000ULL) {
+        hcall_dprintf("Request for out-of-bounds LIOBN 0x" TARGET_FMT_lx "\n",
+                      liobn);
+        return NULL;
+    }
+
+    QLIST_FOREACH(tcet, &spapr_tce_tables, list) {
+        if (tcet->liobn == (uint32_t)liobn) {
+            return tcet;
+        }
+    }
+
+    return NULL;
+}
+
+static IOMMUAccessFlags spapr_tce_iommu_access_flags(uint64_t tce)
+{
+    switch (tce & SPAPR_TCE_RW) {
+    case SPAPR_TCE_FAULT:
+        return IOMMU_NONE;
+    case SPAPR_TCE_RO:
+        return IOMMU_RO;
+    case SPAPR_TCE_WO:
+        return IOMMU_WO;
+    default: /* SPAPR_TCE_RW */
+        return IOMMU_RW;
+    }
+}
+
+static uint64_t *spapr_tce_alloc_table(uint32_t liobn,
+                                       uint32_t page_shift,
+                                       uint64_t bus_offset,
+                                       uint32_t nb_table,
+                                       int *fd,
+                                       bool need_vfio)
+{
+    uint64_t *table = NULL;
+
+    if (kvm_enabled()) {
+        table = kvmppc_create_spapr_tce(liobn, page_shift, bus_offset, nb_table,
+                                        fd, need_vfio);
+    }
+
+    if (!table) {
+        *fd = -1;
+        table = g_new0(uint64_t, nb_table);
+    }
+
+    trace_spapr_iommu_new_table(liobn, table, *fd);
+
+    return table;
+}
+
+static void spapr_tce_free_table(uint64_t *table, int fd, uint32_t nb_table)
+{
+    if (!kvm_enabled() ||
+        (kvmppc_remove_spapr_tce(table, fd, nb_table) != 0)) {
+        g_free(table);
+    }
+}
+
+/* Called from RCU critical section */
+static IOMMUTLBEntry spapr_tce_translate_iommu(IOMMUMemoryRegion *iommu,
+                                               hwaddr addr,
+                                               IOMMUAccessFlags flag,
+                                               int iommu_idx)
+{
+    SpaprTceTable *tcet = container_of(iommu, SpaprTceTable, iommu);
+    uint64_t tce;
+    IOMMUTLBEntry ret = {
+        .target_as = &address_space_memory,
+        .iova = 0,
+        .translated_addr = 0,
+        .addr_mask = ~(hwaddr)0,
+        .perm = IOMMU_NONE,
+    };
+
+    if ((addr >> tcet->page_shift) < tcet->nb_table) {
+        /* Check if we are in bound */
+        hwaddr page_mask = IOMMU_PAGE_MASK(tcet->page_shift);
+
+        tce = tcet->table[addr >> tcet->page_shift];
+        ret.iova = addr & page_mask;
+        ret.translated_addr = tce & page_mask;
+        ret.addr_mask = ~page_mask;
+        ret.perm = spapr_tce_iommu_access_flags(tce);
+    }
+    trace_spapr_iommu_xlate(tcet->liobn, addr, ret.translated_addr, ret.perm,
+                            ret.addr_mask);
+
+    return ret;
+}
+
+static void spapr_tce_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n)
+{
+    MemoryRegion *mr = MEMORY_REGION(iommu_mr);
+    IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_GET_CLASS(iommu_mr);
+    hwaddr addr, granularity;
+    IOMMUTLBEntry iotlb;
+    SpaprTceTable *tcet = container_of(iommu_mr, SpaprTceTable, iommu);
+
+    if (tcet->skipping_replay) {
+        return;
+    }
+
+    granularity = memory_region_iommu_get_min_page_size(iommu_mr);
+
+    for (addr = 0; addr < memory_region_size(mr); addr += granularity) {
+        iotlb = imrc->translate(iommu_mr, addr, IOMMU_NONE, n->iommu_idx);
+        if (iotlb.perm != IOMMU_NONE) {
+            n->notify(n, &iotlb);
+        }
+
+        /*
+         * if (2^64 - MR size) < granularity, it's possible to get an
+         * infinite loop here.  This should catch such a wraparound.
+         */
+        if ((addr + granularity) < addr) {
+            break;
+        }
+    }
+}
+
+static int spapr_tce_table_pre_save(void *opaque)
+{
+    SpaprTceTable *tcet = SPAPR_TCE_TABLE(opaque);
+
+    tcet->mig_table = tcet->table;
+    tcet->mig_nb_table = tcet->nb_table;
+
+    trace_spapr_iommu_pre_save(tcet->liobn, tcet->mig_nb_table,
+                               tcet->bus_offset, tcet->page_shift);
+
+    return 0;
+}
+
+static uint64_t spapr_tce_get_min_page_size(IOMMUMemoryRegion *iommu)
+{
+    SpaprTceTable *tcet = container_of(iommu, SpaprTceTable, iommu);
+
+    return 1ULL << tcet->page_shift;
+}
+
+static int spapr_tce_get_attr(IOMMUMemoryRegion *iommu,
+                              enum IOMMUMemoryRegionAttr attr, void *data)
+{
+    SpaprTceTable *tcet = container_of(iommu, SpaprTceTable, iommu);
+
+    if (attr == IOMMU_ATTR_SPAPR_TCE_FD && kvmppc_has_cap_spapr_vfio()) {
+        *(int *) data = tcet->fd;
+        return 0;
+    }
+
+    return -EINVAL;
+}
+
+static int spapr_tce_notify_flag_changed(IOMMUMemoryRegion *iommu,
+                                         IOMMUNotifierFlag old,
+                                         IOMMUNotifierFlag new,
+                                         Error **errp)
+{
+    struct SpaprTceTable *tbl = container_of(iommu, SpaprTceTable, iommu);
+
+    if (new & IOMMU_NOTIFIER_DEVIOTLB_UNMAP) {
+        error_setg(errp, "spart_tce does not support dev-iotlb yet");
+        return -EINVAL;
+    }
+
+    if (old == IOMMU_NOTIFIER_NONE && new != IOMMU_NOTIFIER_NONE) {
+        spapr_tce_set_need_vfio(tbl, true);
+    } else if (old != IOMMU_NOTIFIER_NONE && new == IOMMU_NOTIFIER_NONE) {
+        spapr_tce_set_need_vfio(tbl, false);
+    }
+    return 0;
+}
+
+static int spapr_tce_table_post_load(void *opaque, int version_id)
+{
+    SpaprTceTable *tcet = SPAPR_TCE_TABLE(opaque);
+    uint32_t old_nb_table = tcet->nb_table;
+    uint64_t old_bus_offset = tcet->bus_offset;
+    uint32_t old_page_shift = tcet->page_shift;
+
+    if (tcet->vdev) {
+        spapr_vio_set_bypass(tcet->vdev, tcet->bypass);
+    }
+
+    if (tcet->mig_nb_table != tcet->nb_table) {
+        spapr_tce_table_disable(tcet);
+    }
+
+    if (tcet->mig_nb_table) {
+        if (!tcet->nb_table) {
+            spapr_tce_table_enable(tcet, old_page_shift, old_bus_offset,
+                                   tcet->mig_nb_table);
+        }
+
+        memcpy(tcet->table, tcet->mig_table,
+               tcet->nb_table * sizeof(tcet->table[0]));
+
+        free(tcet->mig_table);
+        tcet->mig_table = NULL;
+    }
+
+    trace_spapr_iommu_post_load(tcet->liobn, old_nb_table, tcet->nb_table,
+                                tcet->bus_offset, tcet->page_shift);
+
+    return 0;
+}
+
+static bool spapr_tce_table_ex_needed(void *opaque)
+{
+    SpaprTceTable *tcet = opaque;
+
+    return tcet->bus_offset || tcet->page_shift != 0xC;
+}
+
+static const VMStateDescription vmstate_spapr_tce_table_ex = {
+    .name = "spapr_iommu_ex",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = spapr_tce_table_ex_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT64(bus_offset, SpaprTceTable),
+        VMSTATE_UINT32(page_shift, SpaprTceTable),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static const VMStateDescription vmstate_spapr_tce_table = {
+    .name = "spapr_iommu",
+    .version_id = 2,
+    .minimum_version_id = 2,
+    .pre_save = spapr_tce_table_pre_save,
+    .post_load = spapr_tce_table_post_load,
+    .fields      = (VMStateField []) {
+        /* Sanity check */
+        VMSTATE_UINT32_EQUAL(liobn, SpaprTceTable, NULL),
+
+        /* IOMMU state */
+        VMSTATE_UINT32(mig_nb_table, SpaprTceTable),
+        VMSTATE_BOOL(bypass, SpaprTceTable),
+        VMSTATE_VARRAY_UINT32_ALLOC(mig_table, SpaprTceTable, mig_nb_table, 0,
+                                    vmstate_info_uint64, uint64_t),
+
+        VMSTATE_END_OF_LIST()
+    },
+    .subsections = (const VMStateDescription*[]) {
+        &vmstate_spapr_tce_table_ex,
+        NULL
+    }
+};
+
+static void spapr_tce_table_realize(DeviceState *dev, Error **errp)
+{
+    SpaprTceTable *tcet = SPAPR_TCE_TABLE(dev);
+    Object *tcetobj = OBJECT(tcet);
+    gchar *tmp;
+
+    tcet->fd = -1;
+    tcet->need_vfio = false;
+    tmp = g_strdup_printf("tce-root-%x", tcet->liobn);
+    memory_region_init(&tcet->root, tcetobj, tmp, UINT64_MAX);
+    g_free(tmp);
+
+    tmp = g_strdup_printf("tce-iommu-%x", tcet->liobn);
+    memory_region_init_iommu(&tcet->iommu, sizeof(tcet->iommu),
+                             TYPE_SPAPR_IOMMU_MEMORY_REGION,
+                             tcetobj, tmp, 0);
+    g_free(tmp);
+
+    QLIST_INSERT_HEAD(&spapr_tce_tables, tcet, list);
+
+    vmstate_register(VMSTATE_IF(tcet), tcet->liobn, &vmstate_spapr_tce_table,
+                     tcet);
+}
+
+void spapr_tce_set_need_vfio(SpaprTceTable *tcet, bool need_vfio)
+{
+    size_t table_size = tcet->nb_table * sizeof(uint64_t);
+    uint64_t *oldtable;
+    int newfd = -1;
+
+    g_assert(need_vfio != tcet->need_vfio);
+
+    tcet->need_vfio = need_vfio;
+
+    if (!need_vfio || (tcet->fd != -1 && kvmppc_has_cap_spapr_vfio())) {
+        return;
+    }
+
+    oldtable = tcet->table;
+
+    tcet->table = spapr_tce_alloc_table(tcet->liobn,
+                                        tcet->page_shift,
+                                        tcet->bus_offset,
+                                        tcet->nb_table,
+                                        &newfd,
+                                        need_vfio);
+    memcpy(tcet->table, oldtable, table_size);
+
+    spapr_tce_free_table(oldtable, tcet->fd, tcet->nb_table);
+
+    tcet->fd = newfd;
+}
+
+SpaprTceTable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn)
+{
+    SpaprTceTable *tcet;
+    gchar *tmp;
+
+    if (spapr_tce_find_by_liobn(liobn)) {
+        error_report("Attempted to create TCE table with duplicate"
+                " LIOBN 0x%x", liobn);
+        return NULL;
+    }
+
+    tcet = SPAPR_TCE_TABLE(object_new(TYPE_SPAPR_TCE_TABLE));
+    tcet->liobn = liobn;
+
+    tmp = g_strdup_printf("tce-table-%x", liobn);
+    object_property_add_child(OBJECT(owner), tmp, OBJECT(tcet));
+    g_free(tmp);
+    object_unref(OBJECT(tcet));
+
+    qdev_realize(DEVICE(tcet), NULL, NULL);
+
+    return tcet;
+}
+
+void spapr_tce_table_enable(SpaprTceTable *tcet,
+                            uint32_t page_shift, uint64_t bus_offset,
+                            uint32_t nb_table)
+{
+    if (tcet->nb_table) {
+        warn_report("trying to enable already enabled TCE table");
+        return;
+    }
+
+    tcet->bus_offset = bus_offset;
+    tcet->page_shift = page_shift;
+    tcet->nb_table = nb_table;
+    tcet->table = spapr_tce_alloc_table(tcet->liobn,
+                                        tcet->page_shift,
+                                        tcet->bus_offset,
+                                        tcet->nb_table,
+                                        &tcet->fd,
+                                        tcet->need_vfio);
+
+    memory_region_set_size(MEMORY_REGION(&tcet->iommu),
+                           (uint64_t)tcet->nb_table << tcet->page_shift);
+    memory_region_add_subregion(&tcet->root, tcet->bus_offset,
+                                MEMORY_REGION(&tcet->iommu));
+}
+
+void spapr_tce_table_disable(SpaprTceTable *tcet)
+{
+    if (!tcet->nb_table) {
+        return;
+    }
+
+    memory_region_del_subregion(&tcet->root, MEMORY_REGION(&tcet->iommu));
+    memory_region_set_size(MEMORY_REGION(&tcet->iommu), 0);
+
+    spapr_tce_free_table(tcet->table, tcet->fd, tcet->nb_table);
+    tcet->fd = -1;
+    tcet->table = NULL;
+    tcet->bus_offset = 0;
+    tcet->page_shift = 0;
+    tcet->nb_table = 0;
+}
+
+static void spapr_tce_table_unrealize(DeviceState *dev)
+{
+    SpaprTceTable *tcet = SPAPR_TCE_TABLE(dev);
+
+    vmstate_unregister(VMSTATE_IF(tcet), &vmstate_spapr_tce_table, tcet);
+
+    QLIST_REMOVE(tcet, list);
+
+    spapr_tce_table_disable(tcet);
+}
+
+MemoryRegion *spapr_tce_get_iommu(SpaprTceTable *tcet)
+{
+    return &tcet->root;
+}
+
+static void spapr_tce_reset(DeviceState *dev)
+{
+    SpaprTceTable *tcet = SPAPR_TCE_TABLE(dev);
+    size_t table_size = tcet->nb_table * sizeof(uint64_t);
+
+    if (tcet->nb_table) {
+        memset(tcet->table, 0, table_size);
+    }
+}
+
+static target_ulong put_tce_emu(SpaprTceTable *tcet, target_ulong ioba,
+                                target_ulong tce)
+{
+    IOMMUTLBEvent event;
+    hwaddr page_mask = IOMMU_PAGE_MASK(tcet->page_shift);
+    unsigned long index = (ioba - tcet->bus_offset) >> tcet->page_shift;
+
+    if (index >= tcet->nb_table) {
+        hcall_dprintf("spapr_vio_put_tce on out-of-bounds IOBA 0x"
+                      TARGET_FMT_lx "\n", ioba);
+        return H_PARAMETER;
+    }
+
+    tcet->table[index] = tce;
+
+    event.entry.target_as = &address_space_memory,
+    event.entry.iova = (ioba - tcet->bus_offset) & page_mask;
+    event.entry.translated_addr = tce & page_mask;
+    event.entry.addr_mask = ~page_mask;
+    event.entry.perm = spapr_tce_iommu_access_flags(tce);
+    event.type = event.entry.perm ? IOMMU_NOTIFIER_MAP : IOMMU_NOTIFIER_UNMAP;
+    memory_region_notify_iommu(&tcet->iommu, 0, event);
+
+    return H_SUCCESS;
+}
+
+static target_ulong h_put_tce_indirect(PowerPCCPU *cpu,
+                                       SpaprMachineState *spapr,
+                                       target_ulong opcode, target_ulong *args)
+{
+    int i;
+    target_ulong liobn = args[0];
+    target_ulong ioba = args[1];
+    target_ulong ioba1 = ioba;
+    target_ulong tce_list = args[2];
+    target_ulong npages = args[3];
+    target_ulong ret = H_PARAMETER, tce = 0;
+    SpaprTceTable *tcet = spapr_tce_find_by_liobn(liobn);
+    CPUState *cs = CPU(cpu);
+    hwaddr page_mask, page_size;
+
+    if (!tcet) {
+        return H_PARAMETER;
+    }
+
+    if ((npages > 512) || (tce_list & SPAPR_TCE_PAGE_MASK)) {
+        return H_PARAMETER;
+    }
+
+    page_mask = IOMMU_PAGE_MASK(tcet->page_shift);
+    page_size = IOMMU_PAGE_SIZE(tcet->page_shift);
+    ioba &= page_mask;
+
+    for (i = 0; i < npages; ++i, ioba += page_size) {
+        tce = ldq_be_phys(cs->as, tce_list + i * sizeof(target_ulong));
+
+        ret = put_tce_emu(tcet, ioba, tce);
+        if (ret) {
+            break;
+        }
+    }
+
+    /* Trace last successful or the first problematic entry */
+    i = i ? (i - 1) : 0;
+    if (SPAPR_IS_PCI_LIOBN(liobn)) {
+        trace_spapr_iommu_pci_indirect(liobn, ioba1, tce_list, i, tce, ret);
+    } else {
+        trace_spapr_iommu_indirect(liobn, ioba1, tce_list, i, tce, ret);
+    }
+    return ret;
+}
+
+static target_ulong h_stuff_tce(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                              target_ulong opcode, target_ulong *args)
+{
+    int i;
+    target_ulong liobn = args[0];
+    target_ulong ioba = args[1];
+    target_ulong tce_value = args[2];
+    target_ulong npages = args[3];
+    target_ulong ret = H_PARAMETER;
+    SpaprTceTable *tcet = spapr_tce_find_by_liobn(liobn);
+    hwaddr page_mask, page_size;
+
+    if (!tcet) {
+        return H_PARAMETER;
+    }
+
+    if (npages > tcet->nb_table) {
+        return H_PARAMETER;
+    }
+
+    page_mask = IOMMU_PAGE_MASK(tcet->page_shift);
+    page_size = IOMMU_PAGE_SIZE(tcet->page_shift);
+    ioba &= page_mask;
+
+    for (i = 0; i < npages; ++i, ioba += page_size) {
+        ret = put_tce_emu(tcet, ioba, tce_value);
+        if (ret) {
+            break;
+        }
+    }
+    if (SPAPR_IS_PCI_LIOBN(liobn)) {
+        trace_spapr_iommu_pci_stuff(liobn, ioba, tce_value, npages, ret);
+    } else {
+        trace_spapr_iommu_stuff(liobn, ioba, tce_value, npages, ret);
+    }
+
+    return ret;
+}
+
+static target_ulong h_put_tce(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                              target_ulong opcode, target_ulong *args)
+{
+    target_ulong liobn = args[0];
+    target_ulong ioba = args[1];
+    target_ulong tce = args[2];
+    target_ulong ret = H_PARAMETER;
+    SpaprTceTable *tcet = spapr_tce_find_by_liobn(liobn);
+
+    if (tcet) {
+        hwaddr page_mask = IOMMU_PAGE_MASK(tcet->page_shift);
+
+        ioba &= page_mask;
+
+        ret = put_tce_emu(tcet, ioba, tce);
+    }
+    if (SPAPR_IS_PCI_LIOBN(liobn)) {
+        trace_spapr_iommu_pci_put(liobn, ioba, tce, ret);
+    } else {
+        trace_spapr_iommu_put(liobn, ioba, tce, ret);
+    }
+
+    return ret;
+}
+
+static target_ulong get_tce_emu(SpaprTceTable *tcet, target_ulong ioba,
+                                target_ulong *tce)
+{
+    unsigned long index = (ioba - tcet->bus_offset) >> tcet->page_shift;
+
+    if (index >= tcet->nb_table) {
+        hcall_dprintf("spapr_iommu_get_tce on out-of-bounds IOBA 0x"
+                      TARGET_FMT_lx "\n", ioba);
+        return H_PARAMETER;
+    }
+
+    *tce = tcet->table[index];
+
+    return H_SUCCESS;
+}
+
+static target_ulong h_get_tce(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                              target_ulong opcode, target_ulong *args)
+{
+    target_ulong liobn = args[0];
+    target_ulong ioba = args[1];
+    target_ulong tce = 0;
+    target_ulong ret = H_PARAMETER;
+    SpaprTceTable *tcet = spapr_tce_find_by_liobn(liobn);
+
+    if (tcet) {
+        hwaddr page_mask = IOMMU_PAGE_MASK(tcet->page_shift);
+
+        ioba &= page_mask;
+
+        ret = get_tce_emu(tcet, ioba, &tce);
+        if (!ret) {
+            args[0] = tce;
+        }
+    }
+    if (SPAPR_IS_PCI_LIOBN(liobn)) {
+        trace_spapr_iommu_pci_get(liobn, ioba, ret, tce);
+    } else {
+        trace_spapr_iommu_get(liobn, ioba, ret, tce);
+    }
+
+    return ret;
+}
+
+int spapr_dma_dt(void *fdt, int node_off, const char *propname,
+                 uint32_t liobn, uint64_t window, uint32_t size)
+{
+    uint32_t dma_prop[5];
+    int ret;
+
+    dma_prop[0] = cpu_to_be32(liobn);
+    dma_prop[1] = cpu_to_be32(window >> 32);
+    dma_prop[2] = cpu_to_be32(window & 0xFFFFFFFF);
+    dma_prop[3] = 0; /* window size is 32 bits */
+    dma_prop[4] = cpu_to_be32(size);
+
+    ret = fdt_setprop_cell(fdt, node_off, "ibm,#dma-address-cells", 2);
+    if (ret < 0) {
+        return ret;
+    }
+
+    ret = fdt_setprop_cell(fdt, node_off, "ibm,#dma-size-cells", 2);
+    if (ret < 0) {
+        return ret;
+    }
+
+    ret = fdt_setprop(fdt, node_off, propname, dma_prop, sizeof(dma_prop));
+    if (ret < 0) {
+        return ret;
+    }
+
+    return 0;
+}
+
+int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname,
+                      SpaprTceTable *tcet)
+{
+    if (!tcet) {
+        return 0;
+    }
+
+    return spapr_dma_dt(fdt, node_off, propname,
+                        tcet->liobn, 0, tcet->nb_table << tcet->page_shift);
+}
+
+static void spapr_tce_table_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    dc->realize = spapr_tce_table_realize;
+    dc->reset = spapr_tce_reset;
+    dc->unrealize = spapr_tce_table_unrealize;
+    /* Reason: This is just an internal device for handling the hypercalls */
+    dc->user_creatable = false;
+
+    QLIST_INIT(&spapr_tce_tables);
+
+    /* hcall-tce */
+    spapr_register_hypercall(H_PUT_TCE, h_put_tce);
+    spapr_register_hypercall(H_GET_TCE, h_get_tce);
+    spapr_register_hypercall(H_PUT_TCE_INDIRECT, h_put_tce_indirect);
+    spapr_register_hypercall(H_STUFF_TCE, h_stuff_tce);
+}
+
+static TypeInfo spapr_tce_table_info = {
+    .name = TYPE_SPAPR_TCE_TABLE,
+    .parent = TYPE_DEVICE,
+    .instance_size = sizeof(SpaprTceTable),
+    .class_init = spapr_tce_table_class_init,
+};
+
+static void spapr_iommu_memory_region_class_init(ObjectClass *klass, void *data)
+{
+    IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
+
+    imrc->translate = spapr_tce_translate_iommu;
+    imrc->replay = spapr_tce_replay;
+    imrc->get_min_page_size = spapr_tce_get_min_page_size;
+    imrc->notify_flag_changed = spapr_tce_notify_flag_changed;
+    imrc->get_attr = spapr_tce_get_attr;
+}
+
+static const TypeInfo spapr_iommu_memory_region_info = {
+    .parent = TYPE_IOMMU_MEMORY_REGION,
+    .name = TYPE_SPAPR_IOMMU_MEMORY_REGION,
+    .class_init = spapr_iommu_memory_region_class_init,
+};
+
+static void register_types(void)
+{
+    type_register_static(&spapr_tce_table_info);
+    type_register_static(&spapr_iommu_memory_region_info);
+}
+
+type_init(register_types);
diff --git a/hw/ppc/spapr_irq.c b/hw/ppc/spapr_irq.c
new file mode 100644
index 000000000..a0d1e1298
--- /dev/null
+++ b/hw/ppc/spapr_irq.c
@@ -0,0 +1,599 @@
+/*
+ * QEMU PowerPC sPAPR IRQ interface
+ *
+ * Copyright (c) 2018, IBM Corporation.
+ *
+ * This code is licensed under the GPL version 2 or later. See the
+ * COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "hw/irq.h"
+#include "hw/ppc/spapr.h"
+#include "hw/ppc/spapr_cpu_core.h"
+#include "hw/ppc/spapr_xive.h"
+#include "hw/ppc/xics.h"
+#include "hw/ppc/xics_spapr.h"
+#include "hw/qdev-properties.h"
+#include "cpu-models.h"
+#include "sysemu/kvm.h"
+
+#include "trace.h"
+
+static const TypeInfo spapr_intc_info = {
+    .name = TYPE_SPAPR_INTC,
+    .parent = TYPE_INTERFACE,
+    .class_size = sizeof(SpaprInterruptControllerClass),
+};
+
+static void spapr_irq_msi_init(SpaprMachineState *spapr)
+{
+    if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
+        /* Legacy mode doesn't use this allocator */
+        return;
+    }
+
+    spapr->irq_map_nr = spapr_irq_nr_msis(spapr);
+    spapr->irq_map = bitmap_new(spapr->irq_map_nr);
+}
+
+int spapr_irq_msi_alloc(SpaprMachineState *spapr, uint32_t num, bool align,
+                        Error **errp)
+{
+    int irq;
+
+    /*
+     * The 'align_mask' parameter of bitmap_find_next_zero_area()
+     * should be one less than a power of 2; 0 means no
+     * alignment. Adapt the 'align' value of the former allocator
+     * to fit the requirements of bitmap_find_next_zero_area()
+     */
+    align -= 1;
+
+    irq = bitmap_find_next_zero_area(spapr->irq_map, spapr->irq_map_nr, 0, num,
+                                     align);
+    if (irq == spapr->irq_map_nr) {
+        error_setg(errp, "can't find a free %d-IRQ block", num);
+        return -1;
+    }
+
+    bitmap_set(spapr->irq_map, irq, num);
+
+    return irq + SPAPR_IRQ_MSI;
+}
+
+void spapr_irq_msi_free(SpaprMachineState *spapr, int irq, uint32_t num)
+{
+    bitmap_clear(spapr->irq_map, irq - SPAPR_IRQ_MSI, num);
+}
+
+int spapr_irq_init_kvm(SpaprInterruptControllerInitKvm fn,
+                       SpaprInterruptController *intc,
+                       uint32_t nr_servers,
+                       Error **errp)
+{
+    Error *local_err = NULL;
+
+    if (kvm_enabled() && kvm_kernel_irqchip_allowed()) {
+        if (fn(intc, nr_servers, &local_err) < 0) {
+            if (kvm_kernel_irqchip_required()) {
+                error_prepend(&local_err,
+                              "kernel_irqchip requested but unavailable: ");
+                error_propagate(errp, local_err);
+                return -1;
+            }
+
+            /*
+             * We failed to initialize the KVM device, fallback to
+             * emulated mode
+             */
+            error_prepend(&local_err,
+                          "kernel_irqchip allowed but unavailable: ");
+            error_append_hint(&local_err,
+                              "Falling back to kernel-irqchip=off\n");
+            warn_report_err(local_err);
+        }
+    }
+
+    return 0;
+}
+
+/*
+ * XICS IRQ backend.
+ */
+
+SpaprIrq spapr_irq_xics = {
+    .xics        = true,
+    .xive        = false,
+};
+
+/*
+ * XIVE IRQ backend.
+ */
+
+SpaprIrq spapr_irq_xive = {
+    .xics        = false,
+    .xive        = true,
+};
+
+/*
+ * Dual XIVE and XICS IRQ backend.
+ *
+ * Both interrupt mode, XIVE and XICS, objects are created but the
+ * machine starts in legacy interrupt mode (XICS). It can be changed
+ * by the CAS negotiation process and, in that case, the new mode is
+ * activated after an extra machine reset.
+ */
+
+/*
+ * Define values in sync with the XIVE and XICS backend
+ */
+SpaprIrq spapr_irq_dual = {
+    .xics        = true,
+    .xive        = true,
+};
+
+
+static int spapr_irq_check(SpaprMachineState *spapr, Error **errp)
+{
+    ERRP_GUARD();
+    MachineState *machine = MACHINE(spapr);
+
+    /*
+     * Sanity checks on non-P9 machines. On these, XIVE is not
+     * advertised, see spapr_dt_ov5_platform_support()
+     */
+    if (!ppc_type_check_compat(machine->cpu_type, CPU_POWERPC_LOGICAL_3_00,
+                               0, spapr->max_compat_pvr)) {
+        /*
+         * If the 'dual' interrupt mode is selected, force XICS as CAS
+         * negotiation is useless.
+         */
+        if (spapr->irq == &spapr_irq_dual) {
+            spapr->irq = &spapr_irq_xics;
+            return 0;
+        }
+
+        /*
+         * Non-P9 machines using only XIVE is a bogus setup. We have two
+         * scenarios to take into account because of the compat mode:
+         *
+         * 1. POWER7/8 machines should fail to init later on when creating
+         *    the XIVE interrupt presenters because a POWER9 exception
+         *    model is required.
+
+         * 2. POWER9 machines using the POWER8 compat mode won't fail and
+         *    will let the OS boot with a partial XIVE setup : DT
+         *    properties but no hcalls.
+         *
+         * To cover both and not confuse the OS, add an early failure in
+         * QEMU.
+         */
+        if (!spapr->irq->xics) {
+            error_setg(errp, "XIVE-only machines require a POWER9 CPU");
+            return -1;
+        }
+    }
+
+    /*
+     * On a POWER9 host, some older KVM XICS devices cannot be destroyed and
+     * re-created. Same happens with KVM nested guests. Detect that early to
+     * avoid QEMU to exit later when the guest reboots.
+     */
+    if (kvm_enabled() &&
+        spapr->irq == &spapr_irq_dual &&
+        kvm_kernel_irqchip_required() &&
+        xics_kvm_has_broken_disconnect()) {
+        error_setg(errp,
+            "KVM is incompatible with ic-mode=dual,kernel-irqchip=on");
+        error_append_hint(errp,
+            "This can happen with an old KVM or in a KVM nested guest.\n");
+        error_append_hint(errp,
+            "Try without kernel-irqchip or with kernel-irqchip=off.\n");
+        return -1;
+    }
+
+    return 0;
+}
+
+/*
+ * sPAPR IRQ frontend routines for devices
+ */
+#define ALL_INTCS(spapr_) \
+    { SPAPR_INTC((spapr_)->ics), SPAPR_INTC((spapr_)->xive), }
+
+int spapr_irq_cpu_intc_create(SpaprMachineState *spapr,
+                              PowerPCCPU *cpu, Error **errp)
+{
+    SpaprInterruptController *intcs[] = ALL_INTCS(spapr);
+    int i;
+    int rc;
+
+    for (i = 0; i < ARRAY_SIZE(intcs); i++) {
+        SpaprInterruptController *intc = intcs[i];
+        if (intc) {
+            SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc);
+            rc = sicc->cpu_intc_create(intc, cpu, errp);
+            if (rc < 0) {
+                return rc;
+            }
+        }
+    }
+
+    return 0;
+}
+
+void spapr_irq_cpu_intc_reset(SpaprMachineState *spapr, PowerPCCPU *cpu)
+{
+    SpaprInterruptController *intcs[] = ALL_INTCS(spapr);
+    int i;
+
+    for (i = 0; i < ARRAY_SIZE(intcs); i++) {
+        SpaprInterruptController *intc = intcs[i];
+        if (intc) {
+            SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc);
+            sicc->cpu_intc_reset(intc, cpu);
+        }
+    }
+}
+
+void spapr_irq_cpu_intc_destroy(SpaprMachineState *spapr, PowerPCCPU *cpu)
+{
+    SpaprInterruptController *intcs[] = ALL_INTCS(spapr);
+    int i;
+
+    for (i = 0; i < ARRAY_SIZE(intcs); i++) {
+        SpaprInterruptController *intc = intcs[i];
+        if (intc) {
+            SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc);
+            sicc->cpu_intc_destroy(intc, cpu);
+        }
+    }
+}
+
+static void spapr_set_irq(void *opaque, int irq, int level)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(opaque);
+    SpaprInterruptControllerClass *sicc
+        = SPAPR_INTC_GET_CLASS(spapr->active_intc);
+
+    sicc->set_irq(spapr->active_intc, irq, level);
+}
+
+void spapr_irq_print_info(SpaprMachineState *spapr, Monitor *mon)
+{
+    SpaprInterruptControllerClass *sicc
+        = SPAPR_INTC_GET_CLASS(spapr->active_intc);
+
+    sicc->print_info(spapr->active_intc, mon);
+}
+
+void spapr_irq_dt(SpaprMachineState *spapr, uint32_t nr_servers,
+                  void *fdt, uint32_t phandle)
+{
+    SpaprInterruptControllerClass *sicc
+        = SPAPR_INTC_GET_CLASS(spapr->active_intc);
+
+    sicc->dt(spapr->active_intc, nr_servers, fdt, phandle);
+}
+
+uint32_t spapr_irq_nr_msis(SpaprMachineState *spapr)
+{
+    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+
+    if (smc->legacy_irq_allocation) {
+        return smc->nr_xirqs;
+    } else {
+        return SPAPR_XIRQ_BASE + smc->nr_xirqs - SPAPR_IRQ_MSI;
+    }
+}
+
+void spapr_irq_init(SpaprMachineState *spapr, Error **errp)
+{
+    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+
+    if (kvm_enabled() && kvm_kernel_irqchip_split()) {
+        error_setg(errp, "kernel_irqchip split mode not supported on pseries");
+        return;
+    }
+
+    if (spapr_irq_check(spapr, errp) < 0) {
+        return;
+    }
+
+    /* Initialize the MSI IRQ allocator. */
+    spapr_irq_msi_init(spapr);
+
+    if (spapr->irq->xics) {
+        Object *obj;
+
+        obj = object_new(TYPE_ICS_SPAPR);
+
+        object_property_add_child(OBJECT(spapr), "ics", obj);
+        object_property_set_link(obj, ICS_PROP_XICS, OBJECT(spapr),
+                                 &error_abort);
+        object_property_set_int(obj, "nr-irqs", smc->nr_xirqs, &error_abort);
+        if (!qdev_realize(DEVICE(obj), NULL, errp)) {
+            return;
+        }
+
+        spapr->ics = ICS_SPAPR(obj);
+    }
+
+    if (spapr->irq->xive) {
+        uint32_t nr_servers = spapr_max_server_number(spapr);
+        DeviceState *dev;
+        int i;
+
+        dev = qdev_new(TYPE_SPAPR_XIVE);
+        qdev_prop_set_uint32(dev, "nr-irqs", smc->nr_xirqs + SPAPR_XIRQ_BASE);
+        /*
+         * 8 XIVE END structures per CPU. One for each available
+         * priority
+         */
+        qdev_prop_set_uint32(dev, "nr-ends", nr_servers << 3);
+        object_property_set_link(OBJECT(dev), "xive-fabric", OBJECT(spapr),
+                                 &error_abort);
+        sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+
+        spapr->xive = SPAPR_XIVE(dev);
+
+        /* Enable the CPU IPIs */
+        for (i = 0; i < nr_servers; ++i) {
+            SpaprInterruptControllerClass *sicc
+                = SPAPR_INTC_GET_CLASS(spapr->xive);
+
+            if (sicc->claim_irq(SPAPR_INTC(spapr->xive), SPAPR_IRQ_IPI + i,
+                                false, errp) < 0) {
+                return;
+            }
+        }
+
+        spapr_xive_hcall_init(spapr);
+    }
+
+    spapr->qirqs = qemu_allocate_irqs(spapr_set_irq, spapr,
+                                      smc->nr_xirqs + SPAPR_XIRQ_BASE);
+
+    /*
+     * Mostly we don't actually need this until reset, except that not
+     * having this set up can cause VFIO devices to issue a
+     * false-positive warning during realize(), because they don't yet
+     * have an in-kernel irq chip.
+     */
+    spapr_irq_update_active_intc(spapr);
+}
+
+int spapr_irq_claim(SpaprMachineState *spapr, int irq, bool lsi, Error **errp)
+{
+    SpaprInterruptController *intcs[] = ALL_INTCS(spapr);
+    int i;
+    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+    int rc;
+
+    assert(irq >= SPAPR_XIRQ_BASE);
+    assert(irq < (smc->nr_xirqs + SPAPR_XIRQ_BASE));
+
+    for (i = 0; i < ARRAY_SIZE(intcs); i++) {
+        SpaprInterruptController *intc = intcs[i];
+        if (intc) {
+            SpaprInterruptControllerClass *sicc = SPAPR_INTC_GET_CLASS(intc);
+            rc = sicc->claim_irq(intc, irq, lsi, errp);
+            if (rc < 0) {
+                return rc;
+            }
+        }
+    }
+
+    return 0;
+}
+
+void spapr_irq_free(SpaprMachineState *spapr, int irq, int num)
+{
+    SpaprInterruptController *intcs[] = ALL_INTCS(spapr);
+    int i, j;
+    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+
+    assert(irq >= SPAPR_XIRQ_BASE);
+    assert((irq + num) <= (smc->nr_xirqs + SPAPR_XIRQ_BASE));
+
+    for (i = irq; i < (irq + num); i++) {
+        for (j = 0; j < ARRAY_SIZE(intcs); j++) {
+            SpaprInterruptController *intc = intcs[j];
+
+            if (intc) {
+                SpaprInterruptControllerClass *sicc
+                    = SPAPR_INTC_GET_CLASS(intc);
+                sicc->free_irq(intc, i);
+            }
+        }
+    }
+}
+
+qemu_irq spapr_qirq(SpaprMachineState *spapr, int irq)
+{
+    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+
+    /*
+     * This interface is basically for VIO and PHB devices to find the
+     * right qemu_irq to manipulate, so we only allow access to the
+     * external irqs for now.  Currently anything which needs to
+     * access the IPIs most naturally gets there via the guest side
+     * interfaces, we can change this if we need to in future.
+     */
+    assert(irq >= SPAPR_XIRQ_BASE);
+    assert(irq < (smc->nr_xirqs + SPAPR_XIRQ_BASE));
+
+    if (spapr->ics) {
+        assert(ics_valid_irq(spapr->ics, irq));
+    }
+    if (spapr->xive) {
+        assert(irq < spapr->xive->nr_irqs);
+        assert(xive_eas_is_valid(&spapr->xive->eat[irq]));
+    }
+
+    return spapr->qirqs[irq];
+}
+
+int spapr_irq_post_load(SpaprMachineState *spapr, int version_id)
+{
+    SpaprInterruptControllerClass *sicc;
+
+    spapr_irq_update_active_intc(spapr);
+    sicc = SPAPR_INTC_GET_CLASS(spapr->active_intc);
+    return sicc->post_load(spapr->active_intc, version_id);
+}
+
+void spapr_irq_reset(SpaprMachineState *spapr, Error **errp)
+{
+    assert(!spapr->irq_map || bitmap_empty(spapr->irq_map, spapr->irq_map_nr));
+
+    spapr_irq_update_active_intc(spapr);
+}
+
+int spapr_irq_get_phandle(SpaprMachineState *spapr, void *fdt, Error **errp)
+{
+    const char *nodename = "interrupt-controller";
+    int offset, phandle;
+
+    offset = fdt_subnode_offset(fdt, 0, nodename);
+    if (offset < 0) {
+        error_setg(errp, "Can't find node \"%s\": %s",
+                   nodename, fdt_strerror(offset));
+        return -1;
+    }
+
+    phandle = fdt_get_phandle(fdt, offset);
+    if (!phandle) {
+        error_setg(errp, "Can't get phandle of node \"%s\"", nodename);
+        return -1;
+    }
+
+    return phandle;
+}
+
+static void set_active_intc(SpaprMachineState *spapr,
+                            SpaprInterruptController *new_intc)
+{
+    SpaprInterruptControllerClass *sicc;
+    uint32_t nr_servers = spapr_max_server_number(spapr);
+
+    assert(new_intc);
+
+    if (new_intc == spapr->active_intc) {
+        /* Nothing to do */
+        return;
+    }
+
+    if (spapr->active_intc) {
+        sicc = SPAPR_INTC_GET_CLASS(spapr->active_intc);
+        if (sicc->deactivate) {
+            sicc->deactivate(spapr->active_intc);
+        }
+    }
+
+    sicc = SPAPR_INTC_GET_CLASS(new_intc);
+    if (sicc->activate) {
+        sicc->activate(new_intc, nr_servers, &error_fatal);
+    }
+
+    spapr->active_intc = new_intc;
+
+    /*
+     * We've changed the kernel irqchip, let VFIO devices know they
+     * need to readjust.
+     */
+    kvm_irqchip_change_notify();
+}
+
+void spapr_irq_update_active_intc(SpaprMachineState *spapr)
+{
+    SpaprInterruptController *new_intc;
+
+    if (!spapr->ics) {
+        /*
+         * XXX before we run CAS, ov5_cas is initialized empty, which
+         * indicates XICS, even if we have ic-mode=xive.  TODO: clean
+         * up the CAS path so that we have a clearer way of handling
+         * this.
+         */
+        new_intc = SPAPR_INTC(spapr->xive);
+    } else if (spapr->ov5_cas
+               && spapr_ovec_test(spapr->ov5_cas, OV5_XIVE_EXPLOIT)) {
+        new_intc = SPAPR_INTC(spapr->xive);
+    } else {
+        new_intc = SPAPR_INTC(spapr->ics);
+    }
+
+    set_active_intc(spapr, new_intc);
+}
+
+/*
+ * XICS legacy routines - to deprecate one day
+ */
+
+static int ics_find_free_block(ICSState *ics, int num, int alignnum)
+{
+    int first, i;
+
+    for (first = 0; first < ics->nr_irqs; first += alignnum) {
+        if (num > (ics->nr_irqs - first)) {
+            return -1;
+        }
+        for (i = first; i < first + num; ++i) {
+            if (!ics_irq_free(ics, i)) {
+                break;
+            }
+        }
+        if (i == (first + num)) {
+            return first;
+        }
+    }
+
+    return -1;
+}
+
+int spapr_irq_find(SpaprMachineState *spapr, int num, bool align, Error **errp)
+{
+    ICSState *ics = spapr->ics;
+    int first = -1;
+
+    assert(ics);
+
+    /*
+     * MSIMesage::data is used for storing VIRQ so
+     * it has to be aligned to num to support multiple
+     * MSI vectors. MSI-X is not affected by this.
+     * The hint is used for the first IRQ, the rest should
+     * be allocated continuously.
+     */
+    if (align) {
+        assert((num == 1) || (num == 2) || (num == 4) ||
+               (num == 8) || (num == 16) || (num == 32));
+        first = ics_find_free_block(ics, num, num);
+    } else {
+        first = ics_find_free_block(ics, num, 1);
+    }
+
+    if (first < 0) {
+        error_setg(errp, "can't find a free %d-IRQ block", num);
+        return -1;
+    }
+
+    return first + ics->offset;
+}
+
+SpaprIrq spapr_irq_xics_legacy = {
+    .xics        = true,
+    .xive        = false,
+};
+
+static void spapr_irq_register_types(void)
+{
+    type_register_static(&spapr_intc_info);
+}
+
+type_init(spapr_irq_register_types)
diff --git a/hw/ppc/spapr_numa.c b/hw/ppc/spapr_numa.c
new file mode 100644
index 000000000..e9ef7e764
--- /dev/null
+++ b/hw/ppc/spapr_numa.c
@@ -0,0 +1,697 @@
+/*
+ * QEMU PowerPC pSeries Logical Partition NUMA associativity handling
+ *
+ * Copyright IBM Corp. 2020
+ *
+ * Authors:
+ *  Daniel Henrique Barboza      <danielhb413@gmail.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "hw/ppc/spapr_numa.h"
+#include "hw/pci-host/spapr.h"
+#include "hw/ppc/fdt.h"
+
+/* Moved from hw/ppc/spapr_pci_nvlink2.c */
+#define SPAPR_GPU_NUMA_ID           (cpu_to_be32(1))
+
+/*
+ * Retrieves max_dist_ref_points of the current NUMA affinity.
+ */
+static int get_max_dist_ref_points(SpaprMachineState *spapr)
+{
+    if (spapr_ovec_test(spapr->ov5_cas, OV5_FORM2_AFFINITY)) {
+        return FORM2_DIST_REF_POINTS;
+    }
+
+    return FORM1_DIST_REF_POINTS;
+}
+
+/*
+ * Retrieves numa_assoc_size of the current NUMA affinity.
+ */
+static int get_numa_assoc_size(SpaprMachineState *spapr)
+{
+    if (spapr_ovec_test(spapr->ov5_cas, OV5_FORM2_AFFINITY)) {
+        return FORM2_NUMA_ASSOC_SIZE;
+    }
+
+    return FORM1_NUMA_ASSOC_SIZE;
+}
+
+/*
+ * Retrieves vcpu_assoc_size of the current NUMA affinity.
+ *
+ * vcpu_assoc_size is the size of ibm,associativity array
+ * for CPUs, which has an extra element (vcpu_id) in the end.
+ */
+static int get_vcpu_assoc_size(SpaprMachineState *spapr)
+{
+    return get_numa_assoc_size(spapr) + 1;
+}
+
+/*
+ * Retrieves the ibm,associativity array of NUMA node 'node_id'
+ * for the current NUMA affinity.
+ */
+static const uint32_t *get_associativity(SpaprMachineState *spapr, int node_id)
+{
+    if (spapr_ovec_test(spapr->ov5_cas, OV5_FORM2_AFFINITY)) {
+        return spapr->FORM2_assoc_array[node_id];
+    }
+    return spapr->FORM1_assoc_array[node_id];
+}
+
+/*
+ * Wrapper that returns node distance from ms->numa_state->nodes
+ * after handling edge cases where the distance might be absent.
+ */
+static int get_numa_distance(MachineState *ms, int src, int dst)
+{
+    NodeInfo *numa_info = ms->numa_state->nodes;
+    int ret = numa_info[src].distance[dst];
+
+    if (ret != 0) {
+        return ret;
+    }
+
+    /*
+     * In case QEMU adds a default NUMA single node when the user
+     * did not add any, or where the user did not supply distances,
+     * the distance will be absent (zero). Return local/remote
+     * distance in this case.
+     */
+    if (src == dst) {
+        return NUMA_DISTANCE_MIN;
+    }
+
+    return NUMA_DISTANCE_DEFAULT;
+}
+
+static bool spapr_numa_is_symmetrical(MachineState *ms)
+{
+    int nb_numa_nodes = ms->numa_state->num_nodes;
+    int src, dst;
+
+    for (src = 0; src < nb_numa_nodes; src++) {
+        for (dst = src; dst < nb_numa_nodes; dst++) {
+            if (get_numa_distance(ms, src, dst) !=
+                get_numa_distance(ms, dst, src)) {
+                return false;
+            }
+        }
+    }
+
+    return true;
+}
+
+/*
+ * NVLink2-connected GPU RAM needs to be placed on a separate NUMA node.
+ * We assign a new numa ID per GPU in spapr_pci_collect_nvgpu() which is
+ * called from vPHB reset handler so we initialize the counter here.
+ * If no NUMA is configured from the QEMU side, we start from 1 as GPU RAM
+ * must be equally distant from any other node.
+ * The final value of spapr->gpu_numa_id is going to be written to
+ * max-associativity-domains in spapr_build_fdt().
+ */
+unsigned int spapr_numa_initial_nvgpu_numa_id(MachineState *machine)
+{
+    return MAX(1, machine->numa_state->num_nodes);
+}
+
+/*
+ * This function will translate the user distances into
+ * what the kernel understand as possible values: 10
+ * (local distance), 20, 40, 80 and 160, and return the equivalent
+ * NUMA level for each. Current heuristic is:
+ *  - local distance (10) returns numa_level = 0x4, meaning there is
+ *    no rounding for local distance
+ *  - distances between 11 and 30 inclusive -> rounded to 20,
+ *    numa_level = 0x3
+ *  - distances between 31 and 60 inclusive -> rounded to 40,
+ *    numa_level = 0x2
+ *  - distances between 61 and 120 inclusive -> rounded to 80,
+ *    numa_level = 0x1
+ *  - everything above 120 returns numa_level = 0 to indicate that
+ *    there is no match. This will be calculated as disntace = 160
+ *    by the kernel (as of v5.9)
+ */
+static uint8_t spapr_numa_get_numa_level(uint8_t distance)
+{
+    if (distance == 10) {
+        return 0x4;
+    } else if (distance > 11 && distance <= 30) {
+        return 0x3;
+    } else if (distance > 31 && distance <= 60) {
+        return 0x2;
+    } else if (distance > 61 && distance <= 120) {
+        return 0x1;
+    }
+
+    return 0;
+}
+
+static void spapr_numa_define_FORM1_domains(SpaprMachineState *spapr)
+{
+    MachineState *ms = MACHINE(spapr);
+    int nb_numa_nodes = ms->numa_state->num_nodes;
+    int src, dst, i, j;
+
+    /*
+     * Fill all associativity domains of non-zero NUMA nodes with
+     * node_id. This is required because the default value (0) is
+     * considered a match with associativity domains of node 0.
+     */
+    for (i = 1; i < nb_numa_nodes; i++) {
+        for (j = 1; j < FORM1_DIST_REF_POINTS; j++) {
+            spapr->FORM1_assoc_array[i][j] = cpu_to_be32(i);
+        }
+    }
+
+    for (src = 0; src < nb_numa_nodes; src++) {
+        for (dst = src; dst < nb_numa_nodes; dst++) {
+            /*
+             * This is how the associativity domain between A and B
+             * is calculated:
+             *
+             * - get the distance D between them
+             * - get the correspondent NUMA level 'n_level' for D
+             * - all associativity arrays were initialized with their own
+             * numa_ids, and we're calculating the distance in node_id
+             * ascending order, starting from node id 0 (the first node
+             * retrieved by numa_state). This will have a cascade effect in
+             * the algorithm because the associativity domains that node 0
+             * defines will be carried over to other nodes, and node 1
+             * associativities will be carried over after taking node 0
+             * associativities into account, and so on. This happens because
+             * we'll assign assoc_src as the associativity domain of dst
+             * as well, for all NUMA levels beyond and including n_level.
+             *
+             * The PPC kernel expects the associativity domains of node 0 to
+             * be always 0, and this algorithm will grant that by default.
+             */
+            uint8_t distance = get_numa_distance(ms, src, dst);
+            uint8_t n_level = spapr_numa_get_numa_level(distance);
+            uint32_t assoc_src;
+
+            /*
+             * n_level = 0 means that the distance is greater than our last
+             * rounded value (120). In this case there is no NUMA level match
+             * between src and dst and we can skip the remaining of the loop.
+             *
+             * The Linux kernel will assume that the distance between src and
+             * dst, in this case of no match, is 10 (local distance) doubled
+             * for each NUMA it didn't match. We have FORM1_DIST_REF_POINTS
+             * levels (4), so this gives us 10*2*2*2*2 = 160.
+             *
+             * This logic can be seen in the Linux kernel source code, as of
+             * v5.9, in arch/powerpc/mm/numa.c, function __node_distance().
+             */
+            if (n_level == 0) {
+                continue;
+            }
+
+            /*
+             * We must assign all assoc_src to dst, starting from n_level
+             * and going up to 0x1.
+             */
+            for (i = n_level; i > 0; i--) {
+                assoc_src = spapr->FORM1_assoc_array[src][i];
+                spapr->FORM1_assoc_array[dst][i] = assoc_src;
+            }
+        }
+    }
+
+}
+
+static void spapr_numa_FORM1_affinity_check(MachineState *machine)
+{
+    int i;
+
+    /*
+     * Check we don't have a memory-less/cpu-less NUMA node
+     * Firmware relies on the existing memory/cpu topology to provide the
+     * NUMA topology to the kernel.
+     * And the linux kernel needs to know the NUMA topology at start
+     * to be able to hotplug CPUs later.
+     */
+    if (machine->numa_state->num_nodes) {
+        for (i = 0; i < machine->numa_state->num_nodes; ++i) {
+            /* check for memory-less node */
+            if (machine->numa_state->nodes[i].node_mem == 0) {
+                CPUState *cs;
+                int found = 0;
+                /* check for cpu-less node */
+                CPU_FOREACH(cs) {
+                    PowerPCCPU *cpu = POWERPC_CPU(cs);
+                    if (cpu->node_id == i) {
+                        found = 1;
+                        break;
+                    }
+                }
+                /* memory-less and cpu-less node */
+                if (!found) {
+                    error_report(
+"Memory-less/cpu-less nodes are not supported with FORM1 NUMA (node %d)", i);
+                    exit(EXIT_FAILURE);
+                }
+            }
+        }
+    }
+
+    if (!spapr_numa_is_symmetrical(machine)) {
+        error_report(
+"Asymmetrical NUMA topologies aren't supported in the pSeries machine using FORM1 NUMA");
+        exit(EXIT_FAILURE);
+    }
+}
+
+/*
+ * Set NUMA machine state data based on FORM1 affinity semantics.
+ */
+static void spapr_numa_FORM1_affinity_init(SpaprMachineState *spapr,
+                                           MachineState *machine)
+{
+    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+    int nb_numa_nodes = machine->numa_state->num_nodes;
+    int i, j, max_nodes_with_gpus;
+
+    /*
+     * For all associativity arrays: first position is the size,
+     * position FORM1_DIST_REF_POINTS is always the numa_id,
+     * represented by the index 'i'.
+     *
+     * This will break on sparse NUMA setups, when/if QEMU starts
+     * to support it, because there will be no more guarantee that
+     * 'i' will be a valid node_id set by the user.
+     */
+    for (i = 0; i < nb_numa_nodes; i++) {
+        spapr->FORM1_assoc_array[i][0] = cpu_to_be32(FORM1_DIST_REF_POINTS);
+        spapr->FORM1_assoc_array[i][FORM1_DIST_REF_POINTS] = cpu_to_be32(i);
+    }
+
+    /*
+     * Initialize NVLink GPU associativity arrays. We know that
+     * the first GPU will take the first available NUMA id, and
+     * we'll have a maximum of NVGPU_MAX_NUM GPUs in the machine.
+     * At this point we're not sure if there are GPUs or not, but
+     * let's initialize the associativity arrays and allow NVLink
+     * GPUs to be handled like regular NUMA nodes later on.
+     */
+    max_nodes_with_gpus = nb_numa_nodes + NVGPU_MAX_NUM;
+
+    for (i = nb_numa_nodes; i < max_nodes_with_gpus; i++) {
+        spapr->FORM1_assoc_array[i][0] = cpu_to_be32(FORM1_DIST_REF_POINTS);
+
+        for (j = 1; j < FORM1_DIST_REF_POINTS; j++) {
+            uint32_t gpu_assoc = smc->pre_5_1_assoc_refpoints ?
+                                 SPAPR_GPU_NUMA_ID : cpu_to_be32(i);
+            spapr->FORM1_assoc_array[i][j] = gpu_assoc;
+        }
+
+        spapr->FORM1_assoc_array[i][FORM1_DIST_REF_POINTS] = cpu_to_be32(i);
+    }
+
+    /*
+     * Guests pseries-5.1 and older uses zeroed associativity domains,
+     * i.e. no domain definition based on NUMA distance input.
+     *
+     * Same thing with guests that have only one NUMA node.
+     */
+    if (smc->pre_5_2_numa_associativity ||
+        machine->numa_state->num_nodes <= 1) {
+        return;
+    }
+
+    spapr_numa_define_FORM1_domains(spapr);
+}
+
+/*
+ * Init NUMA FORM2 machine state data
+ */
+static void spapr_numa_FORM2_affinity_init(SpaprMachineState *spapr)
+{
+    int i;
+
+    /*
+     * For all resources but CPUs, FORM2 associativity arrays will
+     * be a size 2 array with the following format:
+     *
+     * ibm,associativity = {1, numa_id}
+     *
+     * CPUs will write an additional 'vcpu_id' on top of the arrays
+     * being initialized here. 'numa_id' is represented by the
+     * index 'i' of the loop.
+     *
+     * Given that this initialization is also valid for GPU associativity
+     * arrays, handle everything in one single step by populating the
+     * arrays up to NUMA_NODES_MAX_NUM.
+     */
+    for (i = 0; i < NUMA_NODES_MAX_NUM; i++) {
+        spapr->FORM2_assoc_array[i][0] = cpu_to_be32(1);
+        spapr->FORM2_assoc_array[i][1] = cpu_to_be32(i);
+    }
+}
+
+void spapr_numa_associativity_init(SpaprMachineState *spapr,
+                                   MachineState *machine)
+{
+    spapr_numa_FORM1_affinity_init(spapr, machine);
+    spapr_numa_FORM2_affinity_init(spapr);
+}
+
+void spapr_numa_associativity_check(SpaprMachineState *spapr)
+{
+    /*
+     * FORM2 does not have any restrictions we need to handle
+     * at CAS time, for now.
+     */
+    if (spapr_ovec_test(spapr->ov5_cas, OV5_FORM2_AFFINITY)) {
+        return;
+    }
+
+    spapr_numa_FORM1_affinity_check(MACHINE(spapr));
+}
+
+void spapr_numa_write_associativity_dt(SpaprMachineState *spapr, void *fdt,
+                                       int offset, int nodeid)
+{
+    const uint32_t *associativity = get_associativity(spapr, nodeid);
+
+    _FDT((fdt_setprop(fdt, offset, "ibm,associativity",
+                      associativity,
+                      get_numa_assoc_size(spapr) * sizeof(uint32_t))));
+}
+
+static uint32_t *spapr_numa_get_vcpu_assoc(SpaprMachineState *spapr,
+                                           PowerPCCPU *cpu)
+{
+    const uint32_t *associativity = get_associativity(spapr, cpu->node_id);
+    int max_distance_ref_points = get_max_dist_ref_points(spapr);
+    int vcpu_assoc_size = get_vcpu_assoc_size(spapr);
+    uint32_t *vcpu_assoc = g_new(uint32_t, vcpu_assoc_size);
+    int index = spapr_get_vcpu_id(cpu);
+
+    /*
+     * VCPUs have an extra 'cpu_id' value in ibm,associativity
+     * compared to other resources. Increment the size at index
+     * 0, put cpu_id last, then copy the remaining associativity
+     * domains.
+     */
+    vcpu_assoc[0] = cpu_to_be32(max_distance_ref_points + 1);
+    vcpu_assoc[vcpu_assoc_size - 1] = cpu_to_be32(index);
+    memcpy(vcpu_assoc + 1, associativity + 1,
+           (vcpu_assoc_size - 2) * sizeof(uint32_t));
+
+    return vcpu_assoc;
+}
+
+int spapr_numa_fixup_cpu_dt(SpaprMachineState *spapr, void *fdt,
+                            int offset, PowerPCCPU *cpu)
+{
+    g_autofree uint32_t *vcpu_assoc = NULL;
+    int vcpu_assoc_size = get_vcpu_assoc_size(spapr);
+
+    vcpu_assoc = spapr_numa_get_vcpu_assoc(spapr, cpu);
+
+    /* Advertise NUMA via ibm,associativity */
+    return fdt_setprop(fdt, offset, "ibm,associativity", vcpu_assoc,
+                       vcpu_assoc_size * sizeof(uint32_t));
+}
+
+
+int spapr_numa_write_assoc_lookup_arrays(SpaprMachineState *spapr, void *fdt,
+                                         int offset)
+{
+    MachineState *machine = MACHINE(spapr);
+    int max_distance_ref_points = get_max_dist_ref_points(spapr);
+    int nb_numa_nodes = machine->numa_state->num_nodes;
+    int nr_nodes = nb_numa_nodes ? nb_numa_nodes : 1;
+    uint32_t *int_buf, *cur_index, buf_len;
+    int ret, i;
+
+    /* ibm,associativity-lookup-arrays */
+    buf_len = (nr_nodes * max_distance_ref_points + 2) * sizeof(uint32_t);
+    cur_index = int_buf = g_malloc0(buf_len);
+    int_buf[0] = cpu_to_be32(nr_nodes);
+     /* Number of entries per associativity list */
+    int_buf[1] = cpu_to_be32(max_distance_ref_points);
+    cur_index += 2;
+    for (i = 0; i < nr_nodes; i++) {
+        /*
+         * For the lookup-array we use the ibm,associativity array of the
+         * current NUMA affinity, without the first element (size).
+         */
+        const uint32_t *associativity = get_associativity(spapr, i);
+        memcpy(cur_index, ++associativity,
+               sizeof(uint32_t) * max_distance_ref_points);
+        cur_index += max_distance_ref_points;
+    }
+    ret = fdt_setprop(fdt, offset, "ibm,associativity-lookup-arrays", int_buf,
+                      (cur_index - int_buf) * sizeof(uint32_t));
+    g_free(int_buf);
+
+    return ret;
+}
+
+static void spapr_numa_FORM1_write_rtas_dt(SpaprMachineState *spapr,
+                                           void *fdt, int rtas)
+{
+    MachineState *ms = MACHINE(spapr);
+    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+    uint32_t number_nvgpus_nodes = spapr->gpu_numa_id -
+                                   spapr_numa_initial_nvgpu_numa_id(ms);
+    uint32_t refpoints[] = {
+        cpu_to_be32(0x4),
+        cpu_to_be32(0x3),
+        cpu_to_be32(0x2),
+        cpu_to_be32(0x1),
+    };
+    uint32_t nr_refpoints = ARRAY_SIZE(refpoints);
+    uint32_t maxdomain = ms->numa_state->num_nodes + number_nvgpus_nodes;
+    uint32_t maxdomains[] = {
+        cpu_to_be32(4),
+        cpu_to_be32(maxdomain),
+        cpu_to_be32(maxdomain),
+        cpu_to_be32(maxdomain),
+        cpu_to_be32(maxdomain)
+    };
+
+    if (smc->pre_5_2_numa_associativity ||
+        ms->numa_state->num_nodes <= 1) {
+        uint32_t legacy_refpoints[] = {
+            cpu_to_be32(0x4),
+            cpu_to_be32(0x4),
+            cpu_to_be32(0x2),
+        };
+        uint32_t legacy_maxdomain = spapr->gpu_numa_id > 1 ? 1 : 0;
+        uint32_t legacy_maxdomains[] = {
+            cpu_to_be32(4),
+            cpu_to_be32(legacy_maxdomain),
+            cpu_to_be32(legacy_maxdomain),
+            cpu_to_be32(legacy_maxdomain),
+            cpu_to_be32(spapr->gpu_numa_id),
+        };
+
+        G_STATIC_ASSERT(sizeof(legacy_refpoints) <= sizeof(refpoints));
+        G_STATIC_ASSERT(sizeof(legacy_maxdomains) <= sizeof(maxdomains));
+
+        nr_refpoints = 3;
+
+        memcpy(refpoints, legacy_refpoints, sizeof(legacy_refpoints));
+        memcpy(maxdomains, legacy_maxdomains, sizeof(legacy_maxdomains));
+
+        /* pseries-5.0 and older reference-points array is {0x4, 0x4} */
+        if (smc->pre_5_1_assoc_refpoints) {
+            nr_refpoints = 2;
+        }
+    }
+
+    _FDT(fdt_setprop(fdt, rtas, "ibm,associativity-reference-points",
+                     refpoints, nr_refpoints * sizeof(refpoints[0])));
+
+    _FDT(fdt_setprop(fdt, rtas, "ibm,max-associativity-domains",
+                     maxdomains, sizeof(maxdomains)));
+}
+
+static void spapr_numa_FORM2_write_rtas_tables(SpaprMachineState *spapr,
+                                               void *fdt, int rtas)
+{
+    MachineState *ms = MACHINE(spapr);
+    int nb_numa_nodes = ms->numa_state->num_nodes;
+    int distance_table_entries = nb_numa_nodes * nb_numa_nodes;
+    g_autofree uint32_t *lookup_index_table = NULL;
+    g_autofree uint8_t *distance_table = NULL;
+    int src, dst, i, distance_table_size;
+
+    /*
+     * ibm,numa-lookup-index-table: array with length and a
+     * list of NUMA ids present in the guest.
+     */
+    lookup_index_table = g_new0(uint32_t, nb_numa_nodes + 1);
+    lookup_index_table[0] = cpu_to_be32(nb_numa_nodes);
+
+    for (i = 0; i < nb_numa_nodes; i++) {
+        lookup_index_table[i + 1] = cpu_to_be32(i);
+    }
+
+    _FDT(fdt_setprop(fdt, rtas, "ibm,numa-lookup-index-table",
+                     lookup_index_table,
+                     (nb_numa_nodes + 1) * sizeof(uint32_t)));
+
+    /*
+     * ibm,numa-distance-table: contains all node distances. First
+     * element is the size of the table as uint32, followed up
+     * by all the uint8 distances from the first NUMA node, then all
+     * distances from the second NUMA node and so on.
+     *
+     * ibm,numa-lookup-index-table is used by guest to navigate this
+     * array because NUMA ids can be sparse (node 0 is the first,
+     * node 8 is the second ...).
+     */
+    distance_table_size = distance_table_entries * sizeof(uint8_t) +
+                          sizeof(uint32_t);
+    distance_table = g_new0(uint8_t, distance_table_size);
+    stl_be_p(distance_table, distance_table_entries);
+
+    /* Skip the uint32_t array length at the start */
+    i = sizeof(uint32_t);
+
+    for (src = 0; src < nb_numa_nodes; src++) {
+        for (dst = 0; dst < nb_numa_nodes; dst++) {
+            distance_table[i++] = get_numa_distance(ms, src, dst);
+        }
+    }
+
+    _FDT(fdt_setprop(fdt, rtas, "ibm,numa-distance-table",
+                     distance_table, distance_table_size));
+}
+
+/*
+ * This helper could be compressed in a single function with
+ * FORM1 logic since we're setting the same DT values, with the
+ * difference being a call to spapr_numa_FORM2_write_rtas_tables()
+ * in the end. The separation was made to avoid clogging FORM1 code
+ * which already has to deal with compat modes from previous
+ * QEMU machine types.
+ */
+static void spapr_numa_FORM2_write_rtas_dt(SpaprMachineState *spapr,
+                                           void *fdt, int rtas)
+{
+    MachineState *ms = MACHINE(spapr);
+    uint32_t number_nvgpus_nodes = spapr->gpu_numa_id -
+                                   spapr_numa_initial_nvgpu_numa_id(ms);
+
+    /*
+     * In FORM2, ibm,associativity-reference-points will point to
+     * the element in the ibm,associativity array that contains the
+     * primary domain index (for FORM2, the first element).
+     *
+     * This value (in our case, the numa-id) is then used as an index
+     * to retrieve all other attributes of the node (distance,
+     * bandwidth, latency) via ibm,numa-lookup-index-table and other
+     * ibm,numa-*-table properties.
+     */
+    uint32_t refpoints[] = { cpu_to_be32(1) };
+
+    uint32_t maxdomain = ms->numa_state->num_nodes + number_nvgpus_nodes;
+    uint32_t maxdomains[] = { cpu_to_be32(1), cpu_to_be32(maxdomain) };
+
+    _FDT(fdt_setprop(fdt, rtas, "ibm,associativity-reference-points",
+                     refpoints, sizeof(refpoints)));
+
+    _FDT(fdt_setprop(fdt, rtas, "ibm,max-associativity-domains",
+                     maxdomains, sizeof(maxdomains)));
+
+    spapr_numa_FORM2_write_rtas_tables(spapr, fdt, rtas);
+}
+
+/*
+ * Helper that writes ibm,associativity-reference-points and
+ * max-associativity-domains in the RTAS pointed by @rtas
+ * in the DT @fdt.
+ */
+void spapr_numa_write_rtas_dt(SpaprMachineState *spapr, void *fdt, int rtas)
+{
+    if (spapr_ovec_test(spapr->ov5_cas, OV5_FORM2_AFFINITY)) {
+        spapr_numa_FORM2_write_rtas_dt(spapr, fdt, rtas);
+        return;
+    }
+
+    spapr_numa_FORM1_write_rtas_dt(spapr, fdt, rtas);
+}
+
+static target_ulong h_home_node_associativity(PowerPCCPU *cpu,
+                                              SpaprMachineState *spapr,
+                                              target_ulong opcode,
+                                              target_ulong *args)
+{
+    g_autofree uint32_t *vcpu_assoc = NULL;
+    target_ulong flags = args[0];
+    target_ulong procno = args[1];
+    PowerPCCPU *tcpu;
+    int idx, assoc_idx;
+    int vcpu_assoc_size = get_vcpu_assoc_size(spapr);
+
+    /* only support procno from H_REGISTER_VPA */
+    if (flags != 0x1) {
+        return H_FUNCTION;
+    }
+
+    tcpu = spapr_find_cpu(procno);
+    if (tcpu == NULL) {
+        return H_P2;
+    }
+
+    /*
+     * Given that we want to be flexible with the sizes and indexes,
+     * we must consider that there is a hard limit of how many
+     * associativities domain we can fit in R4 up to R9, which would be
+     * 12 associativity domains for vcpus. Assert and bail if that's
+     * not the case.
+     */
+    g_assert((vcpu_assoc_size - 1) <= 12);
+
+    vcpu_assoc = spapr_numa_get_vcpu_assoc(spapr, tcpu);
+    /* assoc_idx starts at 1 to skip associativity size */
+    assoc_idx = 1;
+
+#define ASSOCIATIVITY(a, b) (((uint64_t)(a) << 32) | \
+                             ((uint64_t)(b) & 0xffffffff))
+
+    for (idx = 0; idx < 6; idx++) {
+        int32_t a, b;
+
+        /*
+         * vcpu_assoc[] will contain the associativity domains for tcpu,
+         * including tcpu->node_id and procno, meaning that we don't
+         * need to use these variables here.
+         *
+         * We'll read 2 values at a time to fill up the ASSOCIATIVITY()
+         * macro. The ternary will fill the remaining registers with -1
+         * after we went through vcpu_assoc[].
+         */
+        a = assoc_idx < vcpu_assoc_size ?
+            be32_to_cpu(vcpu_assoc[assoc_idx++]) : -1;
+        b = assoc_idx < vcpu_assoc_size ?
+            be32_to_cpu(vcpu_assoc[assoc_idx++]) : -1;
+
+        args[idx] = ASSOCIATIVITY(a, b);
+    }
+#undef ASSOCIATIVITY
+
+    return H_SUCCESS;
+}
+
+static void spapr_numa_register_types(void)
+{
+    /* Virtual Processor Home Node */
+    spapr_register_hypercall(H_HOME_NODE_ASSOCIATIVITY,
+                             h_home_node_associativity);
+}
+
+type_init(spapr_numa_register_types)
diff --git a/hw/ppc/spapr_nvdimm.c b/hw/ppc/spapr_nvdimm.c
new file mode 100644
index 000000000..91de1052f
--- /dev/null
+++ b/hw/ppc/spapr_nvdimm.c
@@ -0,0 +1,528 @@
+/*
+ * QEMU PAPR Storage Class Memory Interfaces
+ *
+ * Copyright (c) 2019-2020, IBM Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "hw/ppc/spapr_drc.h"
+#include "hw/ppc/spapr_nvdimm.h"
+#include "hw/mem/nvdimm.h"
+#include "qemu/nvdimm-utils.h"
+#include "hw/ppc/fdt.h"
+#include "qemu/range.h"
+#include "hw/ppc/spapr_numa.h"
+
+/* DIMM health bitmap bitmap indicators. Taken from kernel's papr_scm.c */
+/* SCM device is unable to persist memory contents */
+#define PAPR_PMEM_UNARMED PPC_BIT(0)
+
+/*
+ * The nvdimm size should be aligned to SCM block size.
+ * The SCM block size should be aligned to SPAPR_MEMORY_BLOCK_SIZE
+ * in order to have SCM regions not to overlap with dimm memory regions.
+ * The SCM devices can have variable block sizes. For now, fixing the
+ * block size to the minimum value.
+ */
+#define SPAPR_MINIMUM_SCM_BLOCK_SIZE SPAPR_MEMORY_BLOCK_SIZE
+
+/* Have an explicit check for alignment */
+QEMU_BUILD_BUG_ON(SPAPR_MINIMUM_SCM_BLOCK_SIZE % SPAPR_MEMORY_BLOCK_SIZE);
+
+bool spapr_nvdimm_validate(HotplugHandler *hotplug_dev, NVDIMMDevice *nvdimm,
+                           uint64_t size, Error **errp)
+{
+    const MachineClass *mc = MACHINE_GET_CLASS(hotplug_dev);
+    const MachineState *ms = MACHINE(hotplug_dev);
+    g_autofree char *uuidstr = NULL;
+    QemuUUID uuid;
+    int ret;
+
+    if (!mc->nvdimm_supported) {
+        error_setg(errp, "NVDIMM hotplug not supported for this machine");
+        return false;
+    }
+
+    if (!ms->nvdimms_state->is_enabled) {
+        error_setg(errp, "nvdimm device found but 'nvdimm=off' was set");
+        return false;
+    }
+
+    if (object_property_get_int(OBJECT(nvdimm), NVDIMM_LABEL_SIZE_PROP,
+                                &error_abort) == 0) {
+        error_setg(errp, "PAPR requires NVDIMM devices to have label-size set");
+        return false;
+    }
+
+    if (size % SPAPR_MINIMUM_SCM_BLOCK_SIZE) {
+        error_setg(errp, "PAPR requires NVDIMM memory size (excluding label)"
+                   " to be a multiple of %" PRIu64 "MB",
+                   SPAPR_MINIMUM_SCM_BLOCK_SIZE / MiB);
+        return false;
+    }
+
+    uuidstr = object_property_get_str(OBJECT(nvdimm), NVDIMM_UUID_PROP,
+                                      &error_abort);
+    ret = qemu_uuid_parse(uuidstr, &uuid);
+    g_assert(!ret);
+
+    if (qemu_uuid_is_null(&uuid)) {
+        error_setg(errp, "NVDIMM device requires the uuid to be set");
+        return false;
+    }
+
+    return true;
+}
+
+
+void spapr_add_nvdimm(DeviceState *dev, uint64_t slot)
+{
+    SpaprDrc *drc;
+    bool hotplugged = spapr_drc_hotplugged(dev);
+
+    drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PMEM, slot);
+    g_assert(drc);
+
+    /*
+     * pc_dimm_get_free_slot() provided a free slot at pre-plug. The
+     * corresponding DRC is thus assumed to be attachable.
+     */
+    spapr_drc_attach(drc, dev);
+
+    if (hotplugged) {
+        spapr_hotplug_req_add_by_index(drc);
+    }
+}
+
+static int spapr_dt_nvdimm(SpaprMachineState *spapr, void *fdt,
+                           int parent_offset, NVDIMMDevice *nvdimm)
+{
+    int child_offset;
+    char *buf;
+    SpaprDrc *drc;
+    uint32_t drc_idx;
+    uint32_t node = object_property_get_uint(OBJECT(nvdimm), PC_DIMM_NODE_PROP,
+                                             &error_abort);
+    uint64_t slot = object_property_get_uint(OBJECT(nvdimm), PC_DIMM_SLOT_PROP,
+                                             &error_abort);
+    uint64_t lsize = nvdimm->label_size;
+    uint64_t size = object_property_get_int(OBJECT(nvdimm), PC_DIMM_SIZE_PROP,
+                                            NULL);
+
+    drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PMEM, slot);
+    g_assert(drc);
+
+    drc_idx = spapr_drc_index(drc);
+
+    buf = g_strdup_printf("ibm,pmemory@%x", drc_idx);
+    child_offset = fdt_add_subnode(fdt, parent_offset, buf);
+    g_free(buf);
+
+    _FDT(child_offset);
+
+    _FDT((fdt_setprop_cell(fdt, child_offset, "reg", drc_idx)));
+    _FDT((fdt_setprop_string(fdt, child_offset, "compatible", "ibm,pmemory")));
+    _FDT((fdt_setprop_string(fdt, child_offset, "device_type", "ibm,pmemory")));
+
+    spapr_numa_write_associativity_dt(spapr, fdt, child_offset, node);
+
+    buf = qemu_uuid_unparse_strdup(&nvdimm->uuid);
+    _FDT((fdt_setprop_string(fdt, child_offset, "ibm,unit-guid", buf)));
+    g_free(buf);
+
+    _FDT((fdt_setprop_cell(fdt, child_offset, "ibm,my-drc-index", drc_idx)));
+
+    _FDT((fdt_setprop_u64(fdt, child_offset, "ibm,block-size",
+                          SPAPR_MINIMUM_SCM_BLOCK_SIZE)));
+    _FDT((fdt_setprop_u64(fdt, child_offset, "ibm,number-of-blocks",
+                          size / SPAPR_MINIMUM_SCM_BLOCK_SIZE)));
+    _FDT((fdt_setprop_cell(fdt, child_offset, "ibm,metadata-size", lsize)));
+
+    _FDT((fdt_setprop_string(fdt, child_offset, "ibm,pmem-application",
+                             "operating-system")));
+    _FDT(fdt_setprop(fdt, child_offset, "ibm,cache-flush-required", NULL, 0));
+
+    return child_offset;
+}
+
+int spapr_pmem_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr,
+                           void *fdt, int *fdt_start_offset, Error **errp)
+{
+    NVDIMMDevice *nvdimm = NVDIMM(drc->dev);
+
+    *fdt_start_offset = spapr_dt_nvdimm(spapr, fdt, 0, nvdimm);
+
+    return 0;
+}
+
+void spapr_dt_persistent_memory(SpaprMachineState *spapr, void *fdt)
+{
+    int offset = fdt_subnode_offset(fdt, 0, "ibm,persistent-memory");
+    GSList *iter, *nvdimms = nvdimm_get_device_list();
+
+    if (offset < 0) {
+        offset = fdt_add_subnode(fdt, 0, "ibm,persistent-memory");
+        _FDT(offset);
+        _FDT((fdt_setprop_cell(fdt, offset, "#address-cells", 0x1)));
+        _FDT((fdt_setprop_cell(fdt, offset, "#size-cells", 0x0)));
+        _FDT((fdt_setprop_string(fdt, offset, "device_type",
+                                 "ibm,persistent-memory")));
+    }
+
+    /* Create DT entries for cold plugged NVDIMM devices */
+    for (iter = nvdimms; iter; iter = iter->next) {
+        NVDIMMDevice *nvdimm = iter->data;
+
+        spapr_dt_nvdimm(spapr, fdt, offset, nvdimm);
+    }
+    g_slist_free(nvdimms);
+
+    return;
+}
+
+static target_ulong h_scm_read_metadata(PowerPCCPU *cpu,
+                                        SpaprMachineState *spapr,
+                                        target_ulong opcode,
+                                        target_ulong *args)
+{
+    uint32_t drc_index = args[0];
+    uint64_t offset = args[1];
+    uint64_t len = args[2];
+    SpaprDrc *drc = spapr_drc_by_index(drc_index);
+    NVDIMMDevice *nvdimm;
+    NVDIMMClass *ddc;
+    uint64_t data = 0;
+    uint8_t buf[8] = { 0 };
+
+    if (!drc || !drc->dev ||
+        spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) {
+        return H_PARAMETER;
+    }
+
+    if (len != 1 && len != 2 &&
+        len != 4 && len != 8) {
+        return H_P3;
+    }
+
+    nvdimm = NVDIMM(drc->dev);
+    if ((offset + len < offset) ||
+        (nvdimm->label_size < len + offset)) {
+        return H_P2;
+    }
+
+    ddc = NVDIMM_GET_CLASS(nvdimm);
+    ddc->read_label_data(nvdimm, buf, len, offset);
+
+    switch (len) {
+    case 1:
+        data = ldub_p(buf);
+        break;
+    case 2:
+        data = lduw_be_p(buf);
+        break;
+    case 4:
+        data = ldl_be_p(buf);
+        break;
+    case 8:
+        data = ldq_be_p(buf);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    args[0] = data;
+
+    return H_SUCCESS;
+}
+
+static target_ulong h_scm_write_metadata(PowerPCCPU *cpu,
+                                         SpaprMachineState *spapr,
+                                         target_ulong opcode,
+                                         target_ulong *args)
+{
+    uint32_t drc_index = args[0];
+    uint64_t offset = args[1];
+    uint64_t data = args[2];
+    uint64_t len = args[3];
+    SpaprDrc *drc = spapr_drc_by_index(drc_index);
+    NVDIMMDevice *nvdimm;
+    NVDIMMClass *ddc;
+    uint8_t buf[8] = { 0 };
+
+    if (!drc || !drc->dev ||
+        spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) {
+        return H_PARAMETER;
+    }
+
+    if (len != 1 && len != 2 &&
+        len != 4 && len != 8) {
+        return H_P4;
+    }
+
+    nvdimm = NVDIMM(drc->dev);
+    if ((offset + len < offset) ||
+        (nvdimm->label_size < len + offset)) {
+        return H_P2;
+    }
+
+    switch (len) {
+    case 1:
+        if (data & 0xffffffffffffff00) {
+            return H_P2;
+        }
+        stb_p(buf, data);
+        break;
+    case 2:
+        if (data & 0xffffffffffff0000) {
+            return H_P2;
+        }
+        stw_be_p(buf, data);
+        break;
+    case 4:
+        if (data & 0xffffffff00000000) {
+            return H_P2;
+        }
+        stl_be_p(buf, data);
+        break;
+    case 8:
+        stq_be_p(buf, data);
+        break;
+    default:
+            g_assert_not_reached();
+    }
+
+    ddc = NVDIMM_GET_CLASS(nvdimm);
+    ddc->write_label_data(nvdimm, buf, len, offset);
+
+    return H_SUCCESS;
+}
+
+static target_ulong h_scm_bind_mem(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                                   target_ulong opcode, target_ulong *args)
+{
+    uint32_t drc_index = args[0];
+    uint64_t starting_idx = args[1];
+    uint64_t no_of_scm_blocks_to_bind = args[2];
+    uint64_t target_logical_mem_addr = args[3];
+    uint64_t continue_token = args[4];
+    uint64_t size;
+    uint64_t total_no_of_scm_blocks;
+    SpaprDrc *drc = spapr_drc_by_index(drc_index);
+    hwaddr addr;
+    NVDIMMDevice *nvdimm;
+
+    if (!drc || !drc->dev ||
+        spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) {
+        return H_PARAMETER;
+    }
+
+    /*
+     * Currently continue token should be zero qemu has already bound
+     * everything and this hcall doesnt return H_BUSY.
+     */
+    if (continue_token > 0) {
+        return H_P5;
+    }
+
+    /* Currently qemu assigns the address. */
+    if (target_logical_mem_addr != 0xffffffffffffffff) {
+        return H_OVERLAP;
+    }
+
+    nvdimm = NVDIMM(drc->dev);
+
+    size = object_property_get_uint(OBJECT(nvdimm),
+                                    PC_DIMM_SIZE_PROP, &error_abort);
+
+    total_no_of_scm_blocks = size / SPAPR_MINIMUM_SCM_BLOCK_SIZE;
+
+    if (starting_idx > total_no_of_scm_blocks) {
+        return H_P2;
+    }
+
+    if (((starting_idx + no_of_scm_blocks_to_bind) < starting_idx) ||
+        ((starting_idx + no_of_scm_blocks_to_bind) > total_no_of_scm_blocks)) {
+        return H_P3;
+    }
+
+    addr = object_property_get_uint(OBJECT(nvdimm),
+                                    PC_DIMM_ADDR_PROP, &error_abort);
+
+    addr += starting_idx * SPAPR_MINIMUM_SCM_BLOCK_SIZE;
+
+    /* Already bound, Return target logical address in R5 */
+    args[1] = addr;
+    args[2] = no_of_scm_blocks_to_bind;
+
+    return H_SUCCESS;
+}
+
+static target_ulong h_scm_unbind_mem(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                                     target_ulong opcode, target_ulong *args)
+{
+    uint32_t drc_index = args[0];
+    uint64_t starting_scm_logical_addr = args[1];
+    uint64_t no_of_scm_blocks_to_unbind = args[2];
+    uint64_t continue_token = args[3];
+    uint64_t size_to_unbind;
+    Range blockrange = range_empty;
+    Range nvdimmrange = range_empty;
+    SpaprDrc *drc = spapr_drc_by_index(drc_index);
+    NVDIMMDevice *nvdimm;
+    uint64_t size, addr;
+
+    if (!drc || !drc->dev ||
+        spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) {
+        return H_PARAMETER;
+    }
+
+    /* continue_token should be zero as this hcall doesn't return H_BUSY. */
+    if (continue_token > 0) {
+        return H_P4;
+    }
+
+    /* Check if starting_scm_logical_addr is block aligned */
+    if (!QEMU_IS_ALIGNED(starting_scm_logical_addr,
+                         SPAPR_MINIMUM_SCM_BLOCK_SIZE)) {
+        return H_P2;
+    }
+
+    size_to_unbind = no_of_scm_blocks_to_unbind * SPAPR_MINIMUM_SCM_BLOCK_SIZE;
+    if (no_of_scm_blocks_to_unbind == 0 || no_of_scm_blocks_to_unbind !=
+                               size_to_unbind / SPAPR_MINIMUM_SCM_BLOCK_SIZE) {
+        return H_P3;
+    }
+
+    nvdimm = NVDIMM(drc->dev);
+    size = object_property_get_int(OBJECT(nvdimm), PC_DIMM_SIZE_PROP,
+                                   &error_abort);
+    addr = object_property_get_int(OBJECT(nvdimm), PC_DIMM_ADDR_PROP,
+                                   &error_abort);
+
+    range_init_nofail(&nvdimmrange, addr, size);
+    range_init_nofail(&blockrange, starting_scm_logical_addr, size_to_unbind);
+
+    if (!range_contains_range(&nvdimmrange, &blockrange)) {
+        return H_P3;
+    }
+
+    args[1] = no_of_scm_blocks_to_unbind;
+
+    /* let unplug take care of actual unbind */
+    return H_SUCCESS;
+}
+
+#define H_UNBIND_SCOPE_ALL 0x1
+#define H_UNBIND_SCOPE_DRC 0x2
+
+static target_ulong h_scm_unbind_all(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                                     target_ulong opcode, target_ulong *args)
+{
+    uint64_t target_scope = args[0];
+    uint32_t drc_index = args[1];
+    uint64_t continue_token = args[2];
+    NVDIMMDevice *nvdimm;
+    uint64_t size;
+    uint64_t no_of_scm_blocks_unbound = 0;
+
+    /* continue_token should be zero as this hcall doesn't return H_BUSY. */
+    if (continue_token > 0) {
+        return H_P4;
+    }
+
+    if (target_scope == H_UNBIND_SCOPE_DRC) {
+        SpaprDrc *drc = spapr_drc_by_index(drc_index);
+
+        if (!drc || !drc->dev ||
+            spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) {
+            return H_P2;
+        }
+
+        nvdimm = NVDIMM(drc->dev);
+        size = object_property_get_int(OBJECT(nvdimm), PC_DIMM_SIZE_PROP,
+                                       &error_abort);
+
+        no_of_scm_blocks_unbound = size / SPAPR_MINIMUM_SCM_BLOCK_SIZE;
+    } else if (target_scope ==  H_UNBIND_SCOPE_ALL) {
+        GSList *list, *nvdimms;
+
+        nvdimms = nvdimm_get_device_list();
+        for (list = nvdimms; list; list = list->next) {
+            nvdimm = list->data;
+            size = object_property_get_int(OBJECT(nvdimm), PC_DIMM_SIZE_PROP,
+                                           &error_abort);
+
+            no_of_scm_blocks_unbound += size / SPAPR_MINIMUM_SCM_BLOCK_SIZE;
+        }
+        g_slist_free(nvdimms);
+    } else {
+        return H_PARAMETER;
+    }
+
+    args[1] = no_of_scm_blocks_unbound;
+
+    /* let unplug take care of actual unbind */
+    return H_SUCCESS;
+}
+
+static target_ulong h_scm_health(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                                 target_ulong opcode, target_ulong *args)
+{
+
+    NVDIMMDevice *nvdimm;
+    uint64_t hbitmap = 0;
+    uint32_t drc_index = args[0];
+    SpaprDrc *drc = spapr_drc_by_index(drc_index);
+    const uint64_t hbitmap_mask = PAPR_PMEM_UNARMED;
+
+
+    /* Ensure that the drc is valid & is valid PMEM dimm and is plugged in */
+    if (!drc || !drc->dev ||
+        spapr_drc_type(drc) != SPAPR_DR_CONNECTOR_TYPE_PMEM) {
+        return H_PARAMETER;
+    }
+
+    nvdimm = NVDIMM(drc->dev);
+
+    /* Update if the nvdimm is unarmed and send its status via health bitmaps */
+    if (object_property_get_bool(OBJECT(nvdimm), NVDIMM_UNARMED_PROP, NULL)) {
+        hbitmap |= PAPR_PMEM_UNARMED;
+    }
+
+    /* Update the out args with health bitmap/mask */
+    args[0] = hbitmap;
+    args[1] = hbitmap_mask;
+
+    return H_SUCCESS;
+}
+
+static void spapr_scm_register_types(void)
+{
+    /* qemu/scm specific hcalls */
+    spapr_register_hypercall(H_SCM_READ_METADATA, h_scm_read_metadata);
+    spapr_register_hypercall(H_SCM_WRITE_METADATA, h_scm_write_metadata);
+    spapr_register_hypercall(H_SCM_BIND_MEM, h_scm_bind_mem);
+    spapr_register_hypercall(H_SCM_UNBIND_MEM, h_scm_unbind_mem);
+    spapr_register_hypercall(H_SCM_UNBIND_ALL, h_scm_unbind_all);
+    spapr_register_hypercall(H_SCM_HEALTH, h_scm_health);
+}
+
+type_init(spapr_scm_register_types)
diff --git a/hw/ppc/spapr_ovec.c b/hw/ppc/spapr_ovec.c
new file mode 100644
index 000000000..b2567caa5
--- /dev/null
+++ b/hw/ppc/spapr_ovec.c
@@ -0,0 +1,241 @@
+/*
+ * QEMU SPAPR Architecture Option Vector Helper Functions
+ *
+ * Copyright IBM Corp. 2016
+ *
+ * Authors:
+ *  Bharata B Rao     <bharata@linux.vnet.ibm.com>
+ *  Michael Roth      <mdroth@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/ppc/spapr_ovec.h"
+#include "migration/vmstate.h"
+#include "qemu/bitmap.h"
+#include "exec/address-spaces.h"
+#include "qemu/error-report.h"
+#include "trace.h"
+#include <libfdt.h>
+
+#define OV_MAXBYTES 256 /* not including length byte */
+#define OV_MAXBITS (OV_MAXBYTES * BITS_PER_BYTE)
+
+/* we *could* work with bitmaps directly, but handling the bitmap privately
+ * allows us to more safely make assumptions about the bitmap size and
+ * simplify the calling code somewhat
+ */
+struct SpaprOptionVector {
+    unsigned long *bitmap;
+    int32_t bitmap_size; /* only used for migration */
+};
+
+const VMStateDescription vmstate_spapr_ovec = {
+    .name = "spapr_option_vector",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_BITMAP(bitmap, SpaprOptionVector, 1, bitmap_size),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+SpaprOptionVector *spapr_ovec_new(void)
+{
+    SpaprOptionVector *ov;
+
+    ov = g_new0(SpaprOptionVector, 1);
+    ov->bitmap = bitmap_new(OV_MAXBITS);
+    ov->bitmap_size = OV_MAXBITS;
+
+    return ov;
+}
+
+SpaprOptionVector *spapr_ovec_clone(SpaprOptionVector *ov_orig)
+{
+    SpaprOptionVector *ov;
+
+    g_assert(ov_orig);
+
+    ov = spapr_ovec_new();
+    bitmap_copy(ov->bitmap, ov_orig->bitmap, OV_MAXBITS);
+
+    return ov;
+}
+
+void spapr_ovec_intersect(SpaprOptionVector *ov,
+                          SpaprOptionVector *ov1,
+                          SpaprOptionVector *ov2)
+{
+    g_assert(ov);
+    g_assert(ov1);
+    g_assert(ov2);
+
+    bitmap_and(ov->bitmap, ov1->bitmap, ov2->bitmap, OV_MAXBITS);
+}
+
+/* returns true if ov1 has a subset of bits in ov2 */
+bool spapr_ovec_subset(SpaprOptionVector *ov1, SpaprOptionVector *ov2)
+{
+    unsigned long *tmp = bitmap_new(OV_MAXBITS);
+    bool result;
+
+    g_assert(ov1);
+    g_assert(ov2);
+
+    bitmap_andnot(tmp, ov1->bitmap, ov2->bitmap, OV_MAXBITS);
+    result = bitmap_empty(tmp, OV_MAXBITS);
+
+    g_free(tmp);
+
+    return result;
+}
+
+void spapr_ovec_cleanup(SpaprOptionVector *ov)
+{
+    if (ov) {
+        g_free(ov->bitmap);
+        g_free(ov);
+    }
+}
+
+void spapr_ovec_set(SpaprOptionVector *ov, long bitnr)
+{
+    g_assert(ov);
+    g_assert(bitnr < OV_MAXBITS);
+
+    set_bit(bitnr, ov->bitmap);
+}
+
+void spapr_ovec_clear(SpaprOptionVector *ov, long bitnr)
+{
+    g_assert(ov);
+    g_assert(bitnr < OV_MAXBITS);
+
+    clear_bit(bitnr, ov->bitmap);
+}
+
+bool spapr_ovec_test(SpaprOptionVector *ov, long bitnr)
+{
+    g_assert(ov);
+    g_assert(bitnr < OV_MAXBITS);
+
+    return test_bit(bitnr, ov->bitmap) ? true : false;
+}
+
+bool spapr_ovec_empty(SpaprOptionVector *ov)
+{
+    g_assert(ov);
+
+    return bitmap_empty(ov->bitmap, OV_MAXBITS);
+}
+
+static void guest_byte_to_bitmap(uint8_t entry, unsigned long *bitmap,
+                                 long bitmap_offset)
+{
+    int i;
+
+    for (i = 0; i < BITS_PER_BYTE; i++) {
+        if (entry & (1 << (BITS_PER_BYTE - 1 - i))) {
+            bitmap_set(bitmap, bitmap_offset + i, 1);
+        }
+    }
+}
+
+static uint8_t guest_byte_from_bitmap(unsigned long *bitmap, long bitmap_offset)
+{
+    uint8_t entry = 0;
+    int i;
+
+    for (i = 0; i < BITS_PER_BYTE; i++) {
+        if (test_bit(bitmap_offset + i, bitmap)) {
+            entry |= (1 << (BITS_PER_BYTE - 1 - i));
+        }
+    }
+
+    return entry;
+}
+
+static target_ulong vector_addr(target_ulong table_addr, int vector)
+{
+    uint16_t vector_count, vector_len;
+    int i;
+
+    vector_count = ldub_phys(&address_space_memory, table_addr) + 1;
+    if (vector > vector_count) {
+        return 0;
+    }
+    table_addr++; /* skip nr option vectors */
+
+    for (i = 0; i < vector - 1; i++) {
+        vector_len = ldub_phys(&address_space_memory, table_addr) + 1;
+        table_addr += vector_len + 1; /* bit-vector + length byte */
+    }
+    return table_addr;
+}
+
+SpaprOptionVector *spapr_ovec_parse_vector(target_ulong table_addr, int vector)
+{
+    SpaprOptionVector *ov;
+    target_ulong addr;
+    uint16_t vector_len;
+    int i;
+
+    g_assert(table_addr);
+    g_assert(vector >= 1);      /* vector numbering starts at 1 */
+
+    addr = vector_addr(table_addr, vector);
+    if (!addr) {
+        /* specified vector isn't present */
+        return NULL;
+    }
+
+    vector_len = ldub_phys(&address_space_memory, addr++) + 1;
+    g_assert(vector_len <= OV_MAXBYTES);
+    ov = spapr_ovec_new();
+
+    for (i = 0; i < vector_len; i++) {
+        uint8_t entry = ldub_phys(&address_space_memory, addr + i);
+        if (entry) {
+            trace_spapr_ovec_parse_vector(vector, i + 1, vector_len, entry);
+            guest_byte_to_bitmap(entry, ov->bitmap, i * BITS_PER_BYTE);
+        }
+    }
+
+    return ov;
+}
+
+int spapr_dt_ovec(void *fdt, int fdt_offset,
+                  SpaprOptionVector *ov, const char *name)
+{
+    uint8_t vec[OV_MAXBYTES + 1];
+    uint16_t vec_len;
+    unsigned long lastbit;
+    int i;
+
+    g_assert(ov);
+
+    lastbit = find_last_bit(ov->bitmap, OV_MAXBITS);
+    /* if no bits are set, include at least 1 byte of the vector so we can
+     * still encoded this in the device tree while abiding by the same
+     * encoding/sizing expected in ibm,client-architecture-support
+     */
+    vec_len = (lastbit == OV_MAXBITS) ? 1 : lastbit / BITS_PER_BYTE + 1;
+    g_assert(vec_len <= OV_MAXBYTES);
+    /* guest expects vector len encoded as vec_len - 1, since the length byte
+     * is assumed and not included, and the first byte of the vector
+     * is assumed as well
+     */
+    vec[0] = vec_len - 1;
+
+    for (i = 1; i < vec_len + 1; i++) {
+        vec[i] = guest_byte_from_bitmap(ov->bitmap, (i - 1) * BITS_PER_BYTE);
+        if (vec[i]) {
+            trace_spapr_ovec_populate_dt(i, vec_len, vec[i]);
+        }
+    }
+
+    return fdt_setprop(fdt, fdt_offset, name, vec, vec_len + 1);
+}
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
new file mode 100644
index 000000000..5bfd4aa9e
--- /dev/null
+++ b/hw/ppc/spapr_pci.c
@@ -0,0 +1,2530 @@
+/*
+ * QEMU sPAPR PCI host originated from Uninorth PCI host
+ *
+ * Copyright (c) 2011 Alexey Kardashevskiy, IBM Corporation.
+ * Copyright (C) 2011 David Gibson, IBM Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "hw/irq.h"
+#include "hw/sysbus.h"
+#include "migration/vmstate.h"
+#include "hw/pci/pci.h"
+#include "hw/pci/msi.h"
+#include "hw/pci/msix.h"
+#include "hw/pci/pci_host.h"
+#include "hw/ppc/spapr.h"
+#include "hw/pci-host/spapr.h"
+#include "exec/ram_addr.h"
+#include <libfdt.h>
+#include "trace.h"
+#include "qemu/error-report.h"
+#include "qemu/module.h"
+#include "qapi/qmp/qerror.h"
+#include "hw/ppc/fdt.h"
+#include "hw/pci/pci_bridge.h"
+#include "hw/pci/pci_bus.h"
+#include "hw/pci/pci_ids.h"
+#include "hw/ppc/spapr_drc.h"
+#include "hw/qdev-properties.h"
+#include "sysemu/device_tree.h"
+#include "sysemu/kvm.h"
+#include "sysemu/hostmem.h"
+#include "sysemu/numa.h"
+#include "hw/ppc/spapr_numa.h"
+#include "qemu/log.h"
+
+/* Copied from the kernel arch/powerpc/platforms/pseries/msi.c */
+#define RTAS_QUERY_FN           0
+#define RTAS_CHANGE_FN          1
+#define RTAS_RESET_FN           2
+#define RTAS_CHANGE_MSI_FN      3
+#define RTAS_CHANGE_MSIX_FN     4
+
+/* Interrupt types to return on RTAS_CHANGE_* */
+#define RTAS_TYPE_MSI           1
+#define RTAS_TYPE_MSIX          2
+
+SpaprPhbState *spapr_pci_find_phb(SpaprMachineState *spapr, uint64_t buid)
+{
+    SpaprPhbState *sphb;
+
+    QLIST_FOREACH(sphb, &spapr->phbs, list) {
+        if (sphb->buid != buid) {
+            continue;
+        }
+        return sphb;
+    }
+
+    return NULL;
+}
+
+PCIDevice *spapr_pci_find_dev(SpaprMachineState *spapr, uint64_t buid,
+                              uint32_t config_addr)
+{
+    SpaprPhbState *sphb = spapr_pci_find_phb(spapr, buid);
+    PCIHostState *phb = PCI_HOST_BRIDGE(sphb);
+    int bus_num = (config_addr >> 16) & 0xFF;
+    int devfn = (config_addr >> 8) & 0xFF;
+
+    if (!phb) {
+        return NULL;
+    }
+
+    return pci_find_device(phb->bus, bus_num, devfn);
+}
+
+static uint32_t rtas_pci_cfgaddr(uint32_t arg)
+{
+    /* This handles the encoding of extended config space addresses */
+    return ((arg >> 20) & 0xf00) | (arg & 0xff);
+}
+
+static void finish_read_pci_config(SpaprMachineState *spapr, uint64_t buid,
+                                   uint32_t addr, uint32_t size,
+                                   target_ulong rets)
+{
+    PCIDevice *pci_dev;
+    uint32_t val;
+
+    if ((size != 1) && (size != 2) && (size != 4)) {
+        /* access must be 1, 2 or 4 bytes */
+        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
+        return;
+    }
+
+    pci_dev = spapr_pci_find_dev(spapr, buid, addr);
+    addr = rtas_pci_cfgaddr(addr);
+
+    if (!pci_dev || (addr % size) || (addr >= pci_config_size(pci_dev))) {
+        /* Access must be to a valid device, within bounds and
+         * naturally aligned */
+        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
+        return;
+    }
+
+    val = pci_host_config_read_common(pci_dev, addr,
+                                      pci_config_size(pci_dev), size);
+
+    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+    rtas_st(rets, 1, val);
+}
+
+static void rtas_ibm_read_pci_config(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                                     uint32_t token, uint32_t nargs,
+                                     target_ulong args,
+                                     uint32_t nret, target_ulong rets)
+{
+    uint64_t buid;
+    uint32_t size, addr;
+
+    if ((nargs != 4) || (nret != 2)) {
+        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
+        return;
+    }
+
+    buid = rtas_ldq(args, 1);
+    size = rtas_ld(args, 3);
+    addr = rtas_ld(args, 0);
+
+    finish_read_pci_config(spapr, buid, addr, size, rets);
+}
+
+static void rtas_read_pci_config(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                                 uint32_t token, uint32_t nargs,
+                                 target_ulong args,
+                                 uint32_t nret, target_ulong rets)
+{
+    uint32_t size, addr;
+
+    if ((nargs != 2) || (nret != 2)) {
+        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
+        return;
+    }
+
+    size = rtas_ld(args, 1);
+    addr = rtas_ld(args, 0);
+
+    finish_read_pci_config(spapr, 0, addr, size, rets);
+}
+
+static void finish_write_pci_config(SpaprMachineState *spapr, uint64_t buid,
+                                    uint32_t addr, uint32_t size,
+                                    uint32_t val, target_ulong rets)
+{
+    PCIDevice *pci_dev;
+
+    if ((size != 1) && (size != 2) && (size != 4)) {
+        /* access must be 1, 2 or 4 bytes */
+        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
+        return;
+    }
+
+    pci_dev = spapr_pci_find_dev(spapr, buid, addr);
+    addr = rtas_pci_cfgaddr(addr);
+
+    if (!pci_dev || (addr % size) || (addr >= pci_config_size(pci_dev))) {
+        /* Access must be to a valid device, within bounds and
+         * naturally aligned */
+        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
+        return;
+    }
+
+    pci_host_config_write_common(pci_dev, addr, pci_config_size(pci_dev),
+                                 val, size);
+
+    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+}
+
+static void rtas_ibm_write_pci_config(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                                      uint32_t token, uint32_t nargs,
+                                      target_ulong args,
+                                      uint32_t nret, target_ulong rets)
+{
+    uint64_t buid;
+    uint32_t val, size, addr;
+
+    if ((nargs != 5) || (nret != 1)) {
+        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
+        return;
+    }
+
+    buid = rtas_ldq(args, 1);
+    val = rtas_ld(args, 4);
+    size = rtas_ld(args, 3);
+    addr = rtas_ld(args, 0);
+
+    finish_write_pci_config(spapr, buid, addr, size, val, rets);
+}
+
+static void rtas_write_pci_config(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                                  uint32_t token, uint32_t nargs,
+                                  target_ulong args,
+                                  uint32_t nret, target_ulong rets)
+{
+    uint32_t val, size, addr;
+
+    if ((nargs != 3) || (nret != 1)) {
+        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
+        return;
+    }
+
+
+    val = rtas_ld(args, 2);
+    size = rtas_ld(args, 1);
+    addr = rtas_ld(args, 0);
+
+    finish_write_pci_config(spapr, 0, addr, size, val, rets);
+}
+
+/*
+ * Set MSI/MSIX message data.
+ * This is required for msi_notify()/msix_notify() which
+ * will write at the addresses via spapr_msi_write().
+ *
+ * If hwaddr == 0, all entries will have .data == first_irq i.e.
+ * table will be reset.
+ */
+static void spapr_msi_setmsg(PCIDevice *pdev, hwaddr addr, bool msix,
+                             unsigned first_irq, unsigned req_num)
+{
+    unsigned i;
+    MSIMessage msg = { .address = addr, .data = first_irq };
+
+    if (!msix) {
+        msi_set_message(pdev, msg);
+        trace_spapr_pci_msi_setup(pdev->name, 0, msg.address);
+        return;
+    }
+
+    for (i = 0; i < req_num; ++i) {
+        msix_set_message(pdev, i, msg);
+        trace_spapr_pci_msi_setup(pdev->name, i, msg.address);
+        if (addr) {
+            ++msg.data;
+        }
+    }
+}
+
+static void rtas_ibm_change_msi(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                                uint32_t token, uint32_t nargs,
+                                target_ulong args, uint32_t nret,
+                                target_ulong rets)
+{
+    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+    uint32_t config_addr = rtas_ld(args, 0);
+    uint64_t buid = rtas_ldq(args, 1);
+    unsigned int func = rtas_ld(args, 3);
+    unsigned int req_num = rtas_ld(args, 4); /* 0 == remove all */
+    unsigned int seq_num = rtas_ld(args, 5);
+    unsigned int ret_intr_type;
+    unsigned int irq, max_irqs = 0;
+    SpaprPhbState *phb = NULL;
+    PCIDevice *pdev = NULL;
+    SpaprPciMsi *msi;
+    int *config_addr_key;
+    Error *err = NULL;
+    int i;
+
+    /* Fins SpaprPhbState */
+    phb = spapr_pci_find_phb(spapr, buid);
+    if (phb) {
+        pdev = spapr_pci_find_dev(spapr, buid, config_addr);
+    }
+    if (!phb || !pdev) {
+        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+        return;
+    }
+
+    switch (func) {
+    case RTAS_CHANGE_FN:
+        if (msi_present(pdev)) {
+            ret_intr_type = RTAS_TYPE_MSI;
+        } else if (msix_present(pdev)) {
+            ret_intr_type = RTAS_TYPE_MSIX;
+        } else {
+            rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+            return;
+        }
+        break;
+    case RTAS_CHANGE_MSI_FN:
+        if (msi_present(pdev)) {
+            ret_intr_type = RTAS_TYPE_MSI;
+        } else {
+            rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+            return;
+        }
+        break;
+    case RTAS_CHANGE_MSIX_FN:
+        if (msix_present(pdev)) {
+            ret_intr_type = RTAS_TYPE_MSIX;
+        } else {
+            rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+            return;
+        }
+        break;
+    default:
+        error_report("rtas_ibm_change_msi(%u) is not implemented", func);
+        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+        return;
+    }
+
+    msi = (SpaprPciMsi *) g_hash_table_lookup(phb->msi, &config_addr);
+
+    /* Releasing MSIs */
+    if (!req_num) {
+        if (!msi) {
+            trace_spapr_pci_msi("Releasing wrong config", config_addr);
+            rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
+            return;
+        }
+
+        if (msi_present(pdev)) {
+            spapr_msi_setmsg(pdev, 0, false, 0, 0);
+        }
+        if (msix_present(pdev)) {
+            spapr_msi_setmsg(pdev, 0, true, 0, 0);
+        }
+        g_hash_table_remove(phb->msi, &config_addr);
+
+        trace_spapr_pci_msi("Released MSIs", config_addr);
+        rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+        rtas_st(rets, 1, 0);
+        return;
+    }
+
+    /* Enabling MSI */
+
+    /* Check if the device supports as many IRQs as requested */
+    if (ret_intr_type == RTAS_TYPE_MSI) {
+        max_irqs = msi_nr_vectors_allocated(pdev);
+    } else if (ret_intr_type == RTAS_TYPE_MSIX) {
+        max_irqs = pdev->msix_entries_nr;
+    }
+    if (!max_irqs) {
+        error_report("Requested interrupt type %d is not enabled for device %x",
+                     ret_intr_type, config_addr);
+        rtas_st(rets, 0, -1); /* Hardware error */
+        return;
+    }
+    /* Correct the number if the guest asked for too many */
+    if (req_num > max_irqs) {
+        trace_spapr_pci_msi_retry(config_addr, req_num, max_irqs);
+        req_num = max_irqs;
+        irq = 0; /* to avoid misleading trace */
+        goto out;
+    }
+
+    /* Allocate MSIs */
+    if (smc->legacy_irq_allocation) {
+        irq = spapr_irq_find(spapr, req_num, ret_intr_type == RTAS_TYPE_MSI,
+                             &err);
+    } else {
+        irq = spapr_irq_msi_alloc(spapr, req_num,
+                                  ret_intr_type == RTAS_TYPE_MSI, &err);
+    }
+    if (err) {
+        error_reportf_err(err, "Can't allocate MSIs for device %x: ",
+                          config_addr);
+        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
+        return;
+    }
+
+    for (i = 0; i < req_num; i++) {
+        spapr_irq_claim(spapr, irq + i, false, &err);
+        if (err) {
+            if (i) {
+                spapr_irq_free(spapr, irq, i);
+            }
+            if (!smc->legacy_irq_allocation) {
+                spapr_irq_msi_free(spapr, irq, req_num);
+            }
+            error_reportf_err(err, "Can't allocate MSIs for device %x: ",
+                              config_addr);
+            rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
+            return;
+        }
+    }
+
+    /* Release previous MSIs */
+    if (msi) {
+        g_hash_table_remove(phb->msi, &config_addr);
+    }
+
+    /* Setup MSI/MSIX vectors in the device (via cfgspace or MSIX BAR) */
+    spapr_msi_setmsg(pdev, SPAPR_PCI_MSI_WINDOW, ret_intr_type == RTAS_TYPE_MSIX,
+                     irq, req_num);
+
+    /* Add MSI device to cache */
+    msi = g_new(SpaprPciMsi, 1);
+    msi->first_irq = irq;
+    msi->num = req_num;
+    config_addr_key = g_new(int, 1);
+    *config_addr_key = config_addr;
+    g_hash_table_insert(phb->msi, config_addr_key, msi);
+
+out:
+    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+    rtas_st(rets, 1, req_num);
+    rtas_st(rets, 2, ++seq_num);
+    if (nret > 3) {
+        rtas_st(rets, 3, ret_intr_type);
+    }
+
+    trace_spapr_pci_rtas_ibm_change_msi(config_addr, func, req_num, irq);
+}
+
+static void rtas_ibm_query_interrupt_source_number(PowerPCCPU *cpu,
+                                                   SpaprMachineState *spapr,
+                                                   uint32_t token,
+                                                   uint32_t nargs,
+                                                   target_ulong args,
+                                                   uint32_t nret,
+                                                   target_ulong rets)
+{
+    uint32_t config_addr = rtas_ld(args, 0);
+    uint64_t buid = rtas_ldq(args, 1);
+    unsigned int intr_src_num = -1, ioa_intr_num = rtas_ld(args, 3);
+    SpaprPhbState *phb = NULL;
+    PCIDevice *pdev = NULL;
+    SpaprPciMsi *msi;
+
+    /* Find SpaprPhbState */
+    phb = spapr_pci_find_phb(spapr, buid);
+    if (phb) {
+        pdev = spapr_pci_find_dev(spapr, buid, config_addr);
+    }
+    if (!phb || !pdev) {
+        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+        return;
+    }
+
+    /* Find device descriptor and start IRQ */
+    msi = (SpaprPciMsi *) g_hash_table_lookup(phb->msi, &config_addr);
+    if (!msi || !msi->first_irq || !msi->num || (ioa_intr_num >= msi->num)) {
+        trace_spapr_pci_msi("Failed to return vector", config_addr);
+        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
+        return;
+    }
+    intr_src_num = msi->first_irq + ioa_intr_num;
+    trace_spapr_pci_rtas_ibm_query_interrupt_source_number(ioa_intr_num,
+                                                           intr_src_num);
+
+    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+    rtas_st(rets, 1, intr_src_num);
+    rtas_st(rets, 2, 1);/* 0 == level; 1 == edge */
+}
+
+static void rtas_ibm_set_eeh_option(PowerPCCPU *cpu,
+                                    SpaprMachineState *spapr,
+                                    uint32_t token, uint32_t nargs,
+                                    target_ulong args, uint32_t nret,
+                                    target_ulong rets)
+{
+    SpaprPhbState *sphb;
+    uint32_t addr, option;
+    uint64_t buid;
+    int ret;
+
+    if ((nargs != 4) || (nret != 1)) {
+        goto param_error_exit;
+    }
+
+    buid = rtas_ldq(args, 1);
+    addr = rtas_ld(args, 0);
+    option = rtas_ld(args, 3);
+
+    sphb = spapr_pci_find_phb(spapr, buid);
+    if (!sphb) {
+        goto param_error_exit;
+    }
+
+    if (!spapr_phb_eeh_available(sphb)) {
+        goto param_error_exit;
+    }
+
+    ret = spapr_phb_vfio_eeh_set_option(sphb, addr, option);
+    rtas_st(rets, 0, ret);
+    return;
+
+param_error_exit:
+    rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+}
+
+static void rtas_ibm_get_config_addr_info2(PowerPCCPU *cpu,
+                                           SpaprMachineState *spapr,
+                                           uint32_t token, uint32_t nargs,
+                                           target_ulong args, uint32_t nret,
+                                           target_ulong rets)
+{
+    SpaprPhbState *sphb;
+    PCIDevice *pdev;
+    uint32_t addr, option;
+    uint64_t buid;
+
+    if ((nargs != 4) || (nret != 2)) {
+        goto param_error_exit;
+    }
+
+    buid = rtas_ldq(args, 1);
+    sphb = spapr_pci_find_phb(spapr, buid);
+    if (!sphb) {
+        goto param_error_exit;
+    }
+
+    if (!spapr_phb_eeh_available(sphb)) {
+        goto param_error_exit;
+    }
+
+    /*
+     * We always have PE address of form "00BB0001". "BB"
+     * represents the bus number of PE's primary bus.
+     */
+    option = rtas_ld(args, 3);
+    switch (option) {
+    case RTAS_GET_PE_ADDR:
+        addr = rtas_ld(args, 0);
+        pdev = spapr_pci_find_dev(spapr, buid, addr);
+        if (!pdev) {
+            goto param_error_exit;
+        }
+
+        rtas_st(rets, 1, (pci_bus_num(pci_get_bus(pdev)) << 16) + 1);
+        break;
+    case RTAS_GET_PE_MODE:
+        rtas_st(rets, 1, RTAS_PE_MODE_SHARED);
+        break;
+    default:
+        goto param_error_exit;
+    }
+
+    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+    return;
+
+param_error_exit:
+    rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+}
+
+static void rtas_ibm_read_slot_reset_state2(PowerPCCPU *cpu,
+                                            SpaprMachineState *spapr,
+                                            uint32_t token, uint32_t nargs,
+                                            target_ulong args, uint32_t nret,
+                                            target_ulong rets)
+{
+    SpaprPhbState *sphb;
+    uint64_t buid;
+    int state, ret;
+
+    if ((nargs != 3) || (nret != 4 && nret != 5)) {
+        goto param_error_exit;
+    }
+
+    buid = rtas_ldq(args, 1);
+    sphb = spapr_pci_find_phb(spapr, buid);
+    if (!sphb) {
+        goto param_error_exit;
+    }
+
+    if (!spapr_phb_eeh_available(sphb)) {
+        goto param_error_exit;
+    }
+
+    ret = spapr_phb_vfio_eeh_get_state(sphb, &state);
+    rtas_st(rets, 0, ret);
+    if (ret != RTAS_OUT_SUCCESS) {
+        return;
+    }
+
+    rtas_st(rets, 1, state);
+    rtas_st(rets, 2, RTAS_EEH_SUPPORT);
+    rtas_st(rets, 3, RTAS_EEH_PE_UNAVAIL_INFO);
+    if (nret >= 5) {
+        rtas_st(rets, 4, RTAS_EEH_PE_RECOVER_INFO);
+    }
+    return;
+
+param_error_exit:
+    rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+}
+
+static void rtas_ibm_set_slot_reset(PowerPCCPU *cpu,
+                                    SpaprMachineState *spapr,
+                                    uint32_t token, uint32_t nargs,
+                                    target_ulong args, uint32_t nret,
+                                    target_ulong rets)
+{
+    SpaprPhbState *sphb;
+    uint32_t option;
+    uint64_t buid;
+    int ret;
+
+    if ((nargs != 4) || (nret != 1)) {
+        goto param_error_exit;
+    }
+
+    buid = rtas_ldq(args, 1);
+    option = rtas_ld(args, 3);
+    sphb = spapr_pci_find_phb(spapr, buid);
+    if (!sphb) {
+        goto param_error_exit;
+    }
+
+    if (!spapr_phb_eeh_available(sphb)) {
+        goto param_error_exit;
+    }
+
+    ret = spapr_phb_vfio_eeh_reset(sphb, option);
+    rtas_st(rets, 0, ret);
+    return;
+
+param_error_exit:
+    rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+}
+
+static void rtas_ibm_configure_pe(PowerPCCPU *cpu,
+                                  SpaprMachineState *spapr,
+                                  uint32_t token, uint32_t nargs,
+                                  target_ulong args, uint32_t nret,
+                                  target_ulong rets)
+{
+    SpaprPhbState *sphb;
+    uint64_t buid;
+    int ret;
+
+    if ((nargs != 3) || (nret != 1)) {
+        goto param_error_exit;
+    }
+
+    buid = rtas_ldq(args, 1);
+    sphb = spapr_pci_find_phb(spapr, buid);
+    if (!sphb) {
+        goto param_error_exit;
+    }
+
+    if (!spapr_phb_eeh_available(sphb)) {
+        goto param_error_exit;
+    }
+
+    ret = spapr_phb_vfio_eeh_configure(sphb);
+    rtas_st(rets, 0, ret);
+    return;
+
+param_error_exit:
+    rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+}
+
+/* To support it later */
+static void rtas_ibm_slot_error_detail(PowerPCCPU *cpu,
+                                       SpaprMachineState *spapr,
+                                       uint32_t token, uint32_t nargs,
+                                       target_ulong args, uint32_t nret,
+                                       target_ulong rets)
+{
+    SpaprPhbState *sphb;
+    int option;
+    uint64_t buid;
+
+    if ((nargs != 8) || (nret != 1)) {
+        goto param_error_exit;
+    }
+
+    buid = rtas_ldq(args, 1);
+    sphb = spapr_pci_find_phb(spapr, buid);
+    if (!sphb) {
+        goto param_error_exit;
+    }
+
+    if (!spapr_phb_eeh_available(sphb)) {
+        goto param_error_exit;
+    }
+
+    option = rtas_ld(args, 7);
+    switch (option) {
+    case RTAS_SLOT_TEMP_ERR_LOG:
+    case RTAS_SLOT_PERM_ERR_LOG:
+        break;
+    default:
+        goto param_error_exit;
+    }
+
+    /* We don't have error log yet */
+    rtas_st(rets, 0, RTAS_OUT_NO_ERRORS_FOUND);
+    return;
+
+param_error_exit:
+    rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+}
+
+static void pci_spapr_set_irq(void *opaque, int irq_num, int level)
+{
+    /*
+     * Here we use the number returned by pci_swizzle_map_irq_fn to find a
+     * corresponding qemu_irq.
+     */
+    SpaprPhbState *phb = opaque;
+    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+
+    trace_spapr_pci_lsi_set(phb->dtbusname, irq_num, phb->lsi_table[irq_num].irq);
+    qemu_set_irq(spapr_qirq(spapr, phb->lsi_table[irq_num].irq), level);
+}
+
+static PCIINTxRoute spapr_route_intx_pin_to_irq(void *opaque, int pin)
+{
+    SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(opaque);
+    PCIINTxRoute route;
+
+    route.mode = PCI_INTX_ENABLED;
+    route.irq = sphb->lsi_table[pin].irq;
+
+    return route;
+}
+
+static uint64_t spapr_msi_read(void *opaque, hwaddr addr, unsigned size)
+{
+    qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid access\n", __func__);
+    return 0;
+}
+
+/*
+ * MSI/MSIX memory region implementation.
+ * The handler handles both MSI and MSIX.
+ * The vector number is encoded in least bits in data.
+ */
+static void spapr_msi_write(void *opaque, hwaddr addr,
+                            uint64_t data, unsigned size)
+{
+    SpaprMachineState *spapr = opaque;
+    uint32_t irq = data;
+
+    trace_spapr_pci_msi_write(addr, data, irq);
+
+    qemu_irq_pulse(spapr_qirq(spapr, irq));
+}
+
+static const MemoryRegionOps spapr_msi_ops = {
+    /*
+     * .read result is undefined by PCI spec.
+     * define .read method to avoid assert failure in memory_region_init_io
+     */
+    .read = spapr_msi_read,
+    .write = spapr_msi_write,
+    .endianness = DEVICE_LITTLE_ENDIAN
+};
+
+/*
+ * PHB PCI device
+ */
+static AddressSpace *spapr_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn)
+{
+    SpaprPhbState *phb = opaque;
+
+    return &phb->iommu_as;
+}
+
+static char *spapr_phb_vfio_get_loc_code(SpaprPhbState *sphb,  PCIDevice *pdev)
+{
+    g_autofree char *path = NULL;
+    g_autofree char *host = NULL;
+    g_autofree char *devspec = NULL;
+    char *buf = NULL;
+
+    /* Get the PCI VFIO host id */
+    host = object_property_get_str(OBJECT(pdev), "host", NULL);
+    if (!host) {
+        return NULL;
+    }
+
+    /* Construct the path of the file that will give us the DT location */
+    path = g_strdup_printf("/sys/bus/pci/devices/%s/devspec", host);
+    if (!g_file_get_contents(path, &devspec, NULL, NULL)) {
+        return NULL;
+    }
+
+    /* Construct and read from host device tree the loc-code */
+    path = g_strdup_printf("/proc/device-tree%s/ibm,loc-code", devspec);
+    if (!g_file_get_contents(path, &buf, NULL, NULL)) {
+        return NULL;
+    }
+    return buf;
+}
+
+static char *spapr_phb_get_loc_code(SpaprPhbState *sphb, PCIDevice *pdev)
+{
+    char *buf;
+    const char *devtype = "qemu";
+    uint32_t busnr = pci_bus_num(PCI_BUS(qdev_get_parent_bus(DEVICE(pdev))));
+
+    if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) {
+        buf = spapr_phb_vfio_get_loc_code(sphb, pdev);
+        if (buf) {
+            return buf;
+        }
+        devtype = "vfio";
+    }
+    /*
+     * For emulated devices and VFIO-failure case, make up
+     * the loc-code.
+     */
+    buf = g_strdup_printf("%s_%s:%04x:%02x:%02x.%x",
+                          devtype, pdev->name, sphb->index, busnr,
+                          PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+    return buf;
+}
+
+/* Macros to operate with address in OF binding to PCI */
+#define b_x(x, p, l)    (((x) & ((1<<(l))-1)) << (p))
+#define b_n(x)          b_x((x), 31, 1) /* 0 if relocatable */
+#define b_p(x)          b_x((x), 30, 1) /* 1 if prefetchable */
+#define b_t(x)          b_x((x), 29, 1) /* 1 if the address is aliased */
+#define b_ss(x)         b_x((x), 24, 2) /* the space code */
+#define b_bbbbbbbb(x)   b_x((x), 16, 8) /* bus number */
+#define b_ddddd(x)      b_x((x), 11, 5) /* device number */
+#define b_fff(x)        b_x((x), 8, 3)  /* function number */
+#define b_rrrrrrrr(x)   b_x((x), 0, 8)  /* register number */
+
+/* for 'reg' OF properties */
+#define RESOURCE_CELLS_SIZE 2
+#define RESOURCE_CELLS_ADDRESS 3
+
+typedef struct ResourceFields {
+    uint32_t phys_hi;
+    uint32_t phys_mid;
+    uint32_t phys_lo;
+    uint32_t size_hi;
+    uint32_t size_lo;
+} QEMU_PACKED ResourceFields;
+
+typedef struct ResourceProps {
+    ResourceFields reg[8];
+    uint32_t reg_len;
+} ResourceProps;
+
+/* fill in the 'reg' OF properties for
+ * a PCI device. 'reg' describes resource requirements for a
+ * device's IO/MEM regions.
+ *
+ * the property is an array of ('phys-addr', 'size') pairs describing
+ * the addressable regions of the PCI device, where 'phys-addr' is a
+ * RESOURCE_CELLS_ADDRESS-tuple of 32-bit integers corresponding to
+ * (phys.hi, phys.mid, phys.lo), and 'size' is a
+ * RESOURCE_CELLS_SIZE-tuple corresponding to (size.hi, size.lo).
+ *
+ * phys.hi = 0xYYXXXXZZ, where:
+ *   0xYY = npt000ss
+ *          |||   |
+ *          |||   +-- space code
+ *          |||               |
+ *          |||               +  00 if configuration space
+ *          |||               +  01 if IO region,
+ *          |||               +  10 if 32-bit MEM region
+ *          |||               +  11 if 64-bit MEM region
+ *          |||
+ *          ||+------ for non-relocatable IO: 1 if aliased
+ *          ||        for relocatable IO: 1 if below 64KB
+ *          ||        for MEM: 1 if below 1MB
+ *          |+------- 1 if region is prefetchable
+ *          +-------- 1 if region is non-relocatable
+ *   0xXXXX = bbbbbbbb dddddfff, encoding bus, slot, and function
+ *            bits respectively
+ *   0xZZ = rrrrrrrr, the register number of the BAR corresponding
+ *          to the region
+ *
+ * phys.mid and phys.lo correspond respectively to the hi/lo portions
+ * of the actual address of the region.
+ *
+ * note also that addresses defined in this property are, at least
+ * for PAPR guests, relative to the PHBs IO/MEM windows, and
+ * correspond directly to the addresses in the BARs.
+ *
+ * in accordance with PCI Bus Binding to Open Firmware,
+ * IEEE Std 1275-1994, section 4.1.1, as implemented by PAPR+ v2.7,
+ * Appendix C.
+ */
+static void populate_resource_props(PCIDevice *d, ResourceProps *rp)
+{
+    int bus_num = pci_bus_num(PCI_BUS(qdev_get_parent_bus(DEVICE(d))));
+    uint32_t dev_id = (b_bbbbbbbb(bus_num) |
+                       b_ddddd(PCI_SLOT(d->devfn)) |
+                       b_fff(PCI_FUNC(d->devfn)));
+    ResourceFields *reg;
+    int i, reg_idx = 0;
+
+    /* config space region */
+    reg = &rp->reg[reg_idx++];
+    reg->phys_hi = cpu_to_be32(dev_id);
+    reg->phys_mid = 0;
+    reg->phys_lo = 0;
+    reg->size_hi = 0;
+    reg->size_lo = 0;
+
+    for (i = 0; i < PCI_NUM_REGIONS; i++) {
+        if (!d->io_regions[i].size) {
+            continue;
+        }
+
+        reg = &rp->reg[reg_idx++];
+
+        reg->phys_hi = cpu_to_be32(dev_id | b_rrrrrrrr(pci_bar(d, i)));
+        if (d->io_regions[i].type & PCI_BASE_ADDRESS_SPACE_IO) {
+            reg->phys_hi |= cpu_to_be32(b_ss(1));
+        } else if (d->io_regions[i].type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
+            reg->phys_hi |= cpu_to_be32(b_ss(3));
+        } else {
+            reg->phys_hi |= cpu_to_be32(b_ss(2));
+        }
+        reg->phys_mid = 0;
+        reg->phys_lo = 0;
+        reg->size_hi = cpu_to_be32(d->io_regions[i].size >> 32);
+        reg->size_lo = cpu_to_be32(d->io_regions[i].size);
+    }
+
+    rp->reg_len = reg_idx * sizeof(ResourceFields);
+}
+
+typedef struct PCIClass PCIClass;
+typedef struct PCISubClass PCISubClass;
+typedef struct PCIIFace PCIIFace;
+
+struct PCIIFace {
+    int iface;
+    const char *name;
+};
+
+struct PCISubClass {
+    int subclass;
+    const char *name;
+    const PCIIFace *iface;
+};
+
+struct PCIClass {
+    const char *name;
+    const PCISubClass *subc;
+};
+
+static const PCISubClass undef_subclass[] = {
+    { PCI_CLASS_NOT_DEFINED_VGA, "display", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass mass_subclass[] = {
+    { PCI_CLASS_STORAGE_SCSI, "scsi", NULL },
+    { PCI_CLASS_STORAGE_IDE, "ide", NULL },
+    { PCI_CLASS_STORAGE_FLOPPY, "fdc", NULL },
+    { PCI_CLASS_STORAGE_IPI, "ipi", NULL },
+    { PCI_CLASS_STORAGE_RAID, "raid", NULL },
+    { PCI_CLASS_STORAGE_ATA, "ata", NULL },
+    { PCI_CLASS_STORAGE_SATA, "sata", NULL },
+    { PCI_CLASS_STORAGE_SAS, "sas", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass net_subclass[] = {
+    { PCI_CLASS_NETWORK_ETHERNET, "ethernet", NULL },
+    { PCI_CLASS_NETWORK_TOKEN_RING, "token-ring", NULL },
+    { PCI_CLASS_NETWORK_FDDI, "fddi", NULL },
+    { PCI_CLASS_NETWORK_ATM, "atm", NULL },
+    { PCI_CLASS_NETWORK_ISDN, "isdn", NULL },
+    { PCI_CLASS_NETWORK_WORLDFIP, "worldfip", NULL },
+    { PCI_CLASS_NETWORK_PICMG214, "picmg", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass displ_subclass[] = {
+    { PCI_CLASS_DISPLAY_VGA, "vga", NULL },
+    { PCI_CLASS_DISPLAY_XGA, "xga", NULL },
+    { PCI_CLASS_DISPLAY_3D, "3d-controller", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass media_subclass[] = {
+    { PCI_CLASS_MULTIMEDIA_VIDEO, "video", NULL },
+    { PCI_CLASS_MULTIMEDIA_AUDIO, "sound", NULL },
+    { PCI_CLASS_MULTIMEDIA_PHONE, "telephony", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass mem_subclass[] = {
+    { PCI_CLASS_MEMORY_RAM, "memory", NULL },
+    { PCI_CLASS_MEMORY_FLASH, "flash", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass bridg_subclass[] = {
+    { PCI_CLASS_BRIDGE_HOST, "host", NULL },
+    { PCI_CLASS_BRIDGE_ISA, "isa", NULL },
+    { PCI_CLASS_BRIDGE_EISA, "eisa", NULL },
+    { PCI_CLASS_BRIDGE_MC, "mca", NULL },
+    { PCI_CLASS_BRIDGE_PCI, "pci", NULL },
+    { PCI_CLASS_BRIDGE_PCMCIA, "pcmcia", NULL },
+    { PCI_CLASS_BRIDGE_NUBUS, "nubus", NULL },
+    { PCI_CLASS_BRIDGE_CARDBUS, "cardbus", NULL },
+    { PCI_CLASS_BRIDGE_RACEWAY, "raceway", NULL },
+    { PCI_CLASS_BRIDGE_PCI_SEMITP, "semi-transparent-pci", NULL },
+    { PCI_CLASS_BRIDGE_IB_PCI, "infiniband", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass comm_subclass[] = {
+    { PCI_CLASS_COMMUNICATION_SERIAL, "serial", NULL },
+    { PCI_CLASS_COMMUNICATION_PARALLEL, "parallel", NULL },
+    { PCI_CLASS_COMMUNICATION_MULTISERIAL, "multiport-serial", NULL },
+    { PCI_CLASS_COMMUNICATION_MODEM, "modem", NULL },
+    { PCI_CLASS_COMMUNICATION_GPIB, "gpib", NULL },
+    { PCI_CLASS_COMMUNICATION_SC, "smart-card", NULL },
+    { 0xFF, NULL, NULL, },
+};
+
+static const PCIIFace pic_iface[] = {
+    { PCI_CLASS_SYSTEM_PIC_IOAPIC, "io-apic" },
+    { PCI_CLASS_SYSTEM_PIC_IOXAPIC, "io-xapic" },
+    { 0xFF, NULL },
+};
+
+static const PCISubClass sys_subclass[] = {
+    { PCI_CLASS_SYSTEM_PIC, "interrupt-controller", pic_iface },
+    { PCI_CLASS_SYSTEM_DMA, "dma-controller", NULL },
+    { PCI_CLASS_SYSTEM_TIMER, "timer", NULL },
+    { PCI_CLASS_SYSTEM_RTC, "rtc", NULL },
+    { PCI_CLASS_SYSTEM_PCI_HOTPLUG, "hot-plug-controller", NULL },
+    { PCI_CLASS_SYSTEM_SDHCI, "sd-host-controller", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass inp_subclass[] = {
+    { PCI_CLASS_INPUT_KEYBOARD, "keyboard", NULL },
+    { PCI_CLASS_INPUT_PEN, "pen", NULL },
+    { PCI_CLASS_INPUT_MOUSE, "mouse", NULL },
+    { PCI_CLASS_INPUT_SCANNER, "scanner", NULL },
+    { PCI_CLASS_INPUT_GAMEPORT, "gameport", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass dock_subclass[] = {
+    { PCI_CLASS_DOCKING_GENERIC, "dock", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass cpu_subclass[] = {
+    { PCI_CLASS_PROCESSOR_PENTIUM, "pentium", NULL },
+    { PCI_CLASS_PROCESSOR_POWERPC, "powerpc", NULL },
+    { PCI_CLASS_PROCESSOR_MIPS, "mips", NULL },
+    { PCI_CLASS_PROCESSOR_CO, "co-processor", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCIIFace usb_iface[] = {
+    { PCI_CLASS_SERIAL_USB_UHCI, "usb-uhci" },
+    { PCI_CLASS_SERIAL_USB_OHCI, "usb-ohci", },
+    { PCI_CLASS_SERIAL_USB_EHCI, "usb-ehci" },
+    { PCI_CLASS_SERIAL_USB_XHCI, "usb-xhci" },
+    { PCI_CLASS_SERIAL_USB_UNKNOWN, "usb-unknown" },
+    { PCI_CLASS_SERIAL_USB_DEVICE, "usb-device" },
+    { 0xFF, NULL },
+};
+
+static const PCISubClass ser_subclass[] = {
+    { PCI_CLASS_SERIAL_FIREWIRE, "firewire", NULL },
+    { PCI_CLASS_SERIAL_ACCESS, "access-bus", NULL },
+    { PCI_CLASS_SERIAL_SSA, "ssa", NULL },
+    { PCI_CLASS_SERIAL_USB, "usb", usb_iface },
+    { PCI_CLASS_SERIAL_FIBER, "fibre-channel", NULL },
+    { PCI_CLASS_SERIAL_SMBUS, "smb", NULL },
+    { PCI_CLASS_SERIAL_IB, "infiniband", NULL },
+    { PCI_CLASS_SERIAL_IPMI, "ipmi", NULL },
+    { PCI_CLASS_SERIAL_SERCOS, "sercos", NULL },
+    { PCI_CLASS_SERIAL_CANBUS, "canbus", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass wrl_subclass[] = {
+    { PCI_CLASS_WIRELESS_IRDA, "irda", NULL },
+    { PCI_CLASS_WIRELESS_CIR, "consumer-ir", NULL },
+    { PCI_CLASS_WIRELESS_RF_CONTROLLER, "rf-controller", NULL },
+    { PCI_CLASS_WIRELESS_BLUETOOTH, "bluetooth", NULL },
+    { PCI_CLASS_WIRELESS_BROADBAND, "broadband", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass sat_subclass[] = {
+    { PCI_CLASS_SATELLITE_TV, "satellite-tv", NULL },
+    { PCI_CLASS_SATELLITE_AUDIO, "satellite-audio", NULL },
+    { PCI_CLASS_SATELLITE_VOICE, "satellite-voice", NULL },
+    { PCI_CLASS_SATELLITE_DATA, "satellite-data", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass crypt_subclass[] = {
+    { PCI_CLASS_CRYPT_NETWORK, "network-encryption", NULL },
+    { PCI_CLASS_CRYPT_ENTERTAINMENT,
+      "entertainment-encryption", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCISubClass spc_subclass[] = {
+    { PCI_CLASS_SP_DPIO, "dpio", NULL },
+    { PCI_CLASS_SP_PERF, "counter", NULL },
+    { PCI_CLASS_SP_SYNCH, "measurement", NULL },
+    { PCI_CLASS_SP_MANAGEMENT, "management-card", NULL },
+    { 0xFF, NULL, NULL },
+};
+
+static const PCIClass pci_classes[] = {
+    { "legacy-device", undef_subclass },
+    { "mass-storage",  mass_subclass },
+    { "network", net_subclass },
+    { "display", displ_subclass, },
+    { "multimedia-device", media_subclass },
+    { "memory-controller", mem_subclass },
+    { "unknown-bridge", bridg_subclass },
+    { "communication-controller", comm_subclass},
+    { "system-peripheral", sys_subclass },
+    { "input-controller", inp_subclass },
+    { "docking-station", dock_subclass },
+    { "cpu", cpu_subclass },
+    { "serial-bus", ser_subclass },
+    { "wireless-controller", wrl_subclass },
+    { "intelligent-io", NULL },
+    { "satellite-device", sat_subclass },
+    { "encryption", crypt_subclass },
+    { "data-processing-controller", spc_subclass },
+};
+
+static const char *dt_name_from_class(uint8_t class, uint8_t subclass,
+                                      uint8_t iface)
+{
+    const PCIClass *pclass;
+    const PCISubClass *psubclass;
+    const PCIIFace *piface;
+    const char *name;
+
+    if (class >= ARRAY_SIZE(pci_classes)) {
+        return "pci";
+    }
+
+    pclass = pci_classes + class;
+    name = pclass->name;
+
+    if (pclass->subc == NULL) {
+        return name;
+    }
+
+    psubclass = pclass->subc;
+    while ((psubclass->subclass & 0xff) != 0xff) {
+        if ((psubclass->subclass & 0xff) == subclass) {
+            name = psubclass->name;
+            break;
+        }
+        psubclass++;
+    }
+
+    piface = psubclass->iface;
+    if (piface == NULL) {
+        return name;
+    }
+    while ((piface->iface & 0xff) != 0xff) {
+        if ((piface->iface & 0xff) == iface) {
+            name = piface->name;
+            break;
+        }
+        piface++;
+    }
+
+    return name;
+}
+
+/*
+ * DRC helper functions
+ */
+
+static uint32_t drc_id_from_devfn(SpaprPhbState *phb,
+                                  uint8_t chassis, int32_t devfn)
+{
+    return (phb->index << 16) | (chassis << 8) | devfn;
+}
+
+static SpaprDrc *drc_from_devfn(SpaprPhbState *phb,
+                                uint8_t chassis, int32_t devfn)
+{
+    return spapr_drc_by_id(TYPE_SPAPR_DRC_PCI,
+                           drc_id_from_devfn(phb, chassis, devfn));
+}
+
+static uint8_t chassis_from_bus(PCIBus *bus)
+{
+    if (pci_bus_is_root(bus)) {
+        return 0;
+    } else {
+        PCIDevice *bridge = pci_bridge_get_device(bus);
+
+        return object_property_get_uint(OBJECT(bridge), "chassis_nr",
+                                        &error_abort);
+    }
+}
+
+static SpaprDrc *drc_from_dev(SpaprPhbState *phb, PCIDevice *dev)
+{
+    uint8_t chassis = chassis_from_bus(pci_get_bus(dev));
+
+    return drc_from_devfn(phb, chassis, dev->devfn);
+}
+
+static void add_drcs(SpaprPhbState *phb, PCIBus *bus)
+{
+    Object *owner;
+    int i;
+    uint8_t chassis;
+
+    if (!phb->dr_enabled) {
+        return;
+    }
+
+    chassis = chassis_from_bus(bus);
+
+    if (pci_bus_is_root(bus)) {
+        owner = OBJECT(phb);
+    } else {
+        owner = OBJECT(pci_bridge_get_device(bus));
+    }
+
+    for (i = 0; i < PCI_SLOT_MAX * PCI_FUNC_MAX; i++) {
+        spapr_dr_connector_new(owner, TYPE_SPAPR_DRC_PCI,
+                               drc_id_from_devfn(phb, chassis, i));
+    }
+}
+
+static void remove_drcs(SpaprPhbState *phb, PCIBus *bus)
+{
+    int i;
+    uint8_t chassis;
+
+    if (!phb->dr_enabled) {
+        return;
+    }
+
+    chassis = chassis_from_bus(bus);
+
+    for (i = PCI_SLOT_MAX * PCI_FUNC_MAX - 1; i >= 0; i--) {
+        SpaprDrc *drc = drc_from_devfn(phb, chassis, i);
+
+        if (drc) {
+            object_unparent(OBJECT(drc));
+        }
+    }
+}
+
+typedef struct PciWalkFdt {
+    void *fdt;
+    int offset;
+    SpaprPhbState *sphb;
+    int err;
+} PciWalkFdt;
+
+static int spapr_dt_pci_device(SpaprPhbState *sphb, PCIDevice *dev,
+                               void *fdt, int parent_offset);
+
+static void spapr_dt_pci_device_cb(PCIBus *bus, PCIDevice *pdev,
+                                   void *opaque)
+{
+    PciWalkFdt *p = opaque;
+    int err;
+
+    if (p->err) {
+        /* Something's already broken, don't keep going */
+        return;
+    }
+
+    err = spapr_dt_pci_device(p->sphb, pdev, p->fdt, p->offset);
+    if (err < 0) {
+        p->err = err;
+    }
+}
+
+/* Augment PCI device node with bridge specific information */
+static int spapr_dt_pci_bus(SpaprPhbState *sphb, PCIBus *bus,
+                               void *fdt, int offset)
+{
+    Object *owner;
+    PciWalkFdt cbinfo = {
+        .fdt = fdt,
+        .offset = offset,
+        .sphb = sphb,
+        .err = 0,
+    };
+    int ret;
+
+    _FDT(fdt_setprop_cell(fdt, offset, "#address-cells",
+                          RESOURCE_CELLS_ADDRESS));
+    _FDT(fdt_setprop_cell(fdt, offset, "#size-cells",
+                          RESOURCE_CELLS_SIZE));
+
+    assert(bus);
+    pci_for_each_device_under_bus_reverse(bus, spapr_dt_pci_device_cb, &cbinfo);
+    if (cbinfo.err) {
+        return cbinfo.err;
+    }
+
+    if (pci_bus_is_root(bus)) {
+        owner = OBJECT(sphb);
+    } else {
+        owner = OBJECT(pci_bridge_get_device(bus));
+    }
+
+    ret = spapr_dt_drc(fdt, offset, owner,
+                       SPAPR_DR_CONNECTOR_TYPE_PCI);
+    if (ret) {
+        return ret;
+    }
+
+    return offset;
+}
+
+char *spapr_pci_fw_dev_name(PCIDevice *dev)
+{
+    const gchar *basename;
+    int slot = PCI_SLOT(dev->devfn);
+    int func = PCI_FUNC(dev->devfn);
+    uint32_t ccode = pci_default_read_config(dev, PCI_CLASS_PROG, 3);
+
+    basename = dt_name_from_class((ccode >> 16) & 0xff, (ccode >> 8) & 0xff,
+                                  ccode & 0xff);
+
+    if (func != 0) {
+        return g_strdup_printf("%s@%x,%x", basename, slot, func);
+    } else {
+        return g_strdup_printf("%s@%x", basename, slot);
+    }
+}
+
+/* create OF node for pci device and required OF DT properties */
+static int spapr_dt_pci_device(SpaprPhbState *sphb, PCIDevice *dev,
+                               void *fdt, int parent_offset)
+{
+    int offset;
+    g_autofree gchar *nodename = spapr_pci_fw_dev_name(dev);
+    PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev);
+    ResourceProps rp;
+    SpaprDrc *drc = drc_from_dev(sphb, dev);
+    uint32_t vendor_id = pci_default_read_config(dev, PCI_VENDOR_ID, 2);
+    uint32_t device_id = pci_default_read_config(dev, PCI_DEVICE_ID, 2);
+    uint32_t revision_id = pci_default_read_config(dev, PCI_REVISION_ID, 1);
+    uint32_t ccode = pci_default_read_config(dev, PCI_CLASS_PROG, 3);
+    uint32_t irq_pin = pci_default_read_config(dev, PCI_INTERRUPT_PIN, 1);
+    uint32_t subsystem_id = pci_default_read_config(dev, PCI_SUBSYSTEM_ID, 2);
+    uint32_t subsystem_vendor_id =
+        pci_default_read_config(dev, PCI_SUBSYSTEM_VENDOR_ID, 2);
+    uint32_t cache_line_size =
+        pci_default_read_config(dev, PCI_CACHE_LINE_SIZE, 1);
+    uint32_t pci_status = pci_default_read_config(dev, PCI_STATUS, 2);
+    gchar *loc_code;
+
+    _FDT(offset = fdt_add_subnode(fdt, parent_offset, nodename));
+
+    /* in accordance with PAPR+ v2.7 13.6.3, Table 181 */
+    _FDT(fdt_setprop_cell(fdt, offset, "vendor-id", vendor_id));
+    _FDT(fdt_setprop_cell(fdt, offset, "device-id", device_id));
+    _FDT(fdt_setprop_cell(fdt, offset, "revision-id", revision_id));
+
+    _FDT(fdt_setprop_cell(fdt, offset, "class-code", ccode));
+    if (irq_pin) {
+        _FDT(fdt_setprop_cell(fdt, offset, "interrupts", irq_pin));
+    }
+
+    if (subsystem_id) {
+        _FDT(fdt_setprop_cell(fdt, offset, "subsystem-id", subsystem_id));
+    }
+
+    if (subsystem_vendor_id) {
+        _FDT(fdt_setprop_cell(fdt, offset, "subsystem-vendor-id",
+                              subsystem_vendor_id));
+    }
+
+    _FDT(fdt_setprop_cell(fdt, offset, "cache-line-size", cache_line_size));
+
+
+    /* the following fdt cells are masked off the pci status register */
+    _FDT(fdt_setprop_cell(fdt, offset, "devsel-speed",
+                          PCI_STATUS_DEVSEL_MASK & pci_status));
+
+    if (pci_status & PCI_STATUS_FAST_BACK) {
+        _FDT(fdt_setprop(fdt, offset, "fast-back-to-back", NULL, 0));
+    }
+    if (pci_status & PCI_STATUS_66MHZ) {
+        _FDT(fdt_setprop(fdt, offset, "66mhz-capable", NULL, 0));
+    }
+    if (pci_status & PCI_STATUS_UDF) {
+        _FDT(fdt_setprop(fdt, offset, "udf-supported", NULL, 0));
+    }
+
+    loc_code = spapr_phb_get_loc_code(sphb, dev);
+    _FDT(fdt_setprop_string(fdt, offset, "ibm,loc-code", loc_code));
+    g_free(loc_code);
+
+    if (drc) {
+        _FDT(fdt_setprop_cell(fdt, offset, "ibm,my-drc-index",
+                              spapr_drc_index(drc)));
+    }
+
+    if (msi_present(dev)) {
+        uint32_t max_msi = msi_nr_vectors_allocated(dev);
+        if (max_msi) {
+            _FDT(fdt_setprop_cell(fdt, offset, "ibm,req#msi", max_msi));
+        }
+    }
+    if (msix_present(dev)) {
+        uint32_t max_msix = dev->msix_entries_nr;
+        if (max_msix) {
+            _FDT(fdt_setprop_cell(fdt, offset, "ibm,req#msi-x", max_msix));
+        }
+    }
+
+    populate_resource_props(dev, &rp);
+    _FDT(fdt_setprop(fdt, offset, "reg", (uint8_t *)rp.reg, rp.reg_len));
+
+    if (sphb->pcie_ecs && pci_is_express(dev)) {
+        _FDT(fdt_setprop_cell(fdt, offset, "ibm,pci-config-space-type", 0x1));
+    }
+
+    spapr_phb_nvgpu_populate_pcidev_dt(dev, fdt, offset, sphb);
+
+    if (!pc->is_bridge) {
+        /* Properties only for non-bridges */
+        uint32_t min_grant = pci_default_read_config(dev, PCI_MIN_GNT, 1);
+        uint32_t max_latency = pci_default_read_config(dev, PCI_MAX_LAT, 1);
+        _FDT(fdt_setprop_cell(fdt, offset, "min-grant", min_grant));
+        _FDT(fdt_setprop_cell(fdt, offset, "max-latency", max_latency));
+        return offset;
+    } else {
+        PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev));
+
+        return spapr_dt_pci_bus(sphb, sec_bus, fdt, offset);
+    }
+}
+
+/* Callback to be called during DRC release. */
+void spapr_phb_remove_pci_device_cb(DeviceState *dev)
+{
+    HotplugHandler *hotplug_ctrl = qdev_get_hotplug_handler(dev);
+
+    hotplug_handler_unplug(hotplug_ctrl, dev, &error_abort);
+    object_unparent(OBJECT(dev));
+}
+
+int spapr_pci_dt_populate(SpaprDrc *drc, SpaprMachineState *spapr,
+                          void *fdt, int *fdt_start_offset, Error **errp)
+{
+    HotplugHandler *plug_handler = qdev_get_hotplug_handler(drc->dev);
+    SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(plug_handler);
+    PCIDevice *pdev = PCI_DEVICE(drc->dev);
+
+    *fdt_start_offset = spapr_dt_pci_device(sphb, pdev, fdt, 0);
+    return 0;
+}
+
+static void spapr_pci_bridge_plug(SpaprPhbState *phb,
+                                  PCIBridge *bridge)
+{
+    PCIBus *bus = pci_bridge_get_sec_bus(bridge);
+
+    add_drcs(phb, bus);
+}
+
+/* Returns non-zero if the value of "chassis_nr" is already in use */
+static int check_chassis_nr(Object *obj, void *opaque)
+{
+    int new_chassis_nr =
+        object_property_get_uint(opaque, "chassis_nr", &error_abort);
+    int chassis_nr =
+        object_property_get_uint(obj, "chassis_nr", NULL);
+
+    if (!object_dynamic_cast(obj, TYPE_PCI_BRIDGE)) {
+        return 0;
+    }
+
+    /* Skip unsupported bridge types */
+    if (!chassis_nr) {
+        return 0;
+    }
+
+    /* Skip self */
+    if (obj == opaque) {
+        return 0;
+    }
+
+    return chassis_nr == new_chassis_nr;
+}
+
+static bool bridge_has_valid_chassis_nr(Object *bridge, Error **errp)
+{
+    int chassis_nr =
+        object_property_get_uint(bridge, "chassis_nr", NULL);
+
+    /*
+     * slotid_cap_init() already ensures that "chassis_nr" isn't null for
+     * standard PCI bridges, so this really tells if "chassis_nr" is present
+     * or not.
+     */
+    if (!chassis_nr) {
+        error_setg(errp, "PCI Bridge lacks a \"chassis_nr\" property");
+        error_append_hint(errp, "Try -device pci-bridge instead.\n");
+        return false;
+    }
+
+    /* We want unique values for "chassis_nr" */
+    if (object_child_foreach_recursive(object_get_root(), check_chassis_nr,
+                                       bridge)) {
+        error_setg(errp, "Bridge chassis %d already in use", chassis_nr);
+        return false;
+    }
+
+    return true;
+}
+
+static void spapr_pci_pre_plug(HotplugHandler *plug_handler,
+                               DeviceState *plugged_dev, Error **errp)
+{
+    SpaprPhbState *phb = SPAPR_PCI_HOST_BRIDGE(DEVICE(plug_handler));
+    PCIDevice *pdev = PCI_DEVICE(plugged_dev);
+    PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(plugged_dev);
+    SpaprDrc *drc = drc_from_dev(phb, pdev);
+    PCIBus *bus = PCI_BUS(qdev_get_parent_bus(DEVICE(pdev)));
+    uint32_t slotnr = PCI_SLOT(pdev->devfn);
+
+    if (!phb->dr_enabled) {
+        /* if this is a hotplug operation initiated by the user
+         * we need to let them know it's not enabled
+         */
+        if (plugged_dev->hotplugged) {
+            error_setg(errp, QERR_BUS_NO_HOTPLUG,
+                       object_get_typename(OBJECT(phb)));
+            return;
+        }
+    }
+
+    if (pc->is_bridge) {
+        if (!bridge_has_valid_chassis_nr(OBJECT(plugged_dev), errp)) {
+            return;
+        }
+    }
+
+    /* Following the QEMU convention used for PCIe multifunction
+     * hotplug, we do not allow functions to be hotplugged to a
+     * slot that already has function 0 present
+     */
+    if (plugged_dev->hotplugged && bus->devices[PCI_DEVFN(slotnr, 0)] &&
+        PCI_FUNC(pdev->devfn) != 0) {
+        error_setg(errp, "PCI: slot %d function 0 already occupied by %s,"
+                   " additional functions can no longer be exposed to guest.",
+                   slotnr, bus->devices[PCI_DEVFN(slotnr, 0)]->name);
+    }
+
+    if (drc && drc->dev) {
+        error_setg(errp, "PCI: slot %d already occupied by %s", slotnr,
+                   pci_get_function_0(PCI_DEVICE(drc->dev))->name);
+        return;
+    }
+}
+
+static void spapr_pci_plug(HotplugHandler *plug_handler,
+                           DeviceState *plugged_dev, Error **errp)
+{
+    SpaprPhbState *phb = SPAPR_PCI_HOST_BRIDGE(DEVICE(plug_handler));
+    PCIDevice *pdev = PCI_DEVICE(plugged_dev);
+    PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(plugged_dev);
+    SpaprDrc *drc = drc_from_dev(phb, pdev);
+    uint32_t slotnr = PCI_SLOT(pdev->devfn);
+
+    /*
+     * If DR is disabled we don't need to do anything in the case of
+     * hotplug or coldplug callbacks.
+     */
+    if (!phb->dr_enabled) {
+        return;
+    }
+
+    g_assert(drc);
+
+    if (pc->is_bridge) {
+        spapr_pci_bridge_plug(phb, PCI_BRIDGE(plugged_dev));
+    }
+
+    /* spapr_pci_pre_plug() already checked the DRC is attachable */
+    spapr_drc_attach(drc, DEVICE(pdev));
+
+    /* If this is function 0, signal hotplug for all the device functions.
+     * Otherwise defer sending the hotplug event.
+     */
+    if (!spapr_drc_hotplugged(plugged_dev)) {
+        spapr_drc_reset(drc);
+    } else if (PCI_FUNC(pdev->devfn) == 0) {
+        int i;
+        uint8_t chassis = chassis_from_bus(pci_get_bus(pdev));
+
+        for (i = 0; i < 8; i++) {
+            SpaprDrc *func_drc;
+            SpaprDrcClass *func_drck;
+            SpaprDREntitySense state;
+
+            func_drc = drc_from_devfn(phb, chassis, PCI_DEVFN(slotnr, i));
+            func_drck = SPAPR_DR_CONNECTOR_GET_CLASS(func_drc);
+            state = func_drck->dr_entity_sense(func_drc);
+
+            if (state == SPAPR_DR_ENTITY_SENSE_PRESENT) {
+                spapr_hotplug_req_add_by_index(func_drc);
+            }
+        }
+    }
+}
+
+static void spapr_pci_bridge_unplug(SpaprPhbState *phb,
+                                    PCIBridge *bridge)
+{
+    PCIBus *bus = pci_bridge_get_sec_bus(bridge);
+
+    remove_drcs(phb, bus);
+}
+
+static void spapr_pci_unplug(HotplugHandler *plug_handler,
+                             DeviceState *plugged_dev, Error **errp)
+{
+    PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(plugged_dev);
+    SpaprPhbState *phb = SPAPR_PCI_HOST_BRIDGE(DEVICE(plug_handler));
+
+    /* some version guests do not wait for completion of a device
+     * cleanup (generally done asynchronously by the kernel) before
+     * signaling to QEMU that the device is safe, but instead sleep
+     * for some 'safe' period of time. unfortunately on a busy host
+     * this sleep isn't guaranteed to be long enough, resulting in
+     * bad things like IRQ lines being left asserted during final
+     * device removal. to deal with this we call reset just prior
+     * to finalizing the device, which will put the device back into
+     * an 'idle' state, as the device cleanup code expects.
+     */
+    pci_device_reset(PCI_DEVICE(plugged_dev));
+
+    if (pc->is_bridge) {
+        spapr_pci_bridge_unplug(phb, PCI_BRIDGE(plugged_dev));
+        return;
+    }
+
+    qdev_unrealize(plugged_dev);
+}
+
+static void spapr_pci_unplug_request(HotplugHandler *plug_handler,
+                                     DeviceState *plugged_dev, Error **errp)
+{
+    SpaprPhbState *phb = SPAPR_PCI_HOST_BRIDGE(DEVICE(plug_handler));
+    PCIDevice *pdev = PCI_DEVICE(plugged_dev);
+    SpaprDrc *drc = drc_from_dev(phb, pdev);
+
+    if (!phb->dr_enabled) {
+        error_setg(errp, QERR_BUS_NO_HOTPLUG,
+                   object_get_typename(OBJECT(phb)));
+        return;
+    }
+
+    g_assert(drc);
+    g_assert(drc->dev == plugged_dev);
+
+    if (!spapr_drc_unplug_requested(drc)) {
+        PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(plugged_dev);
+        uint32_t slotnr = PCI_SLOT(pdev->devfn);
+        SpaprDrc *func_drc;
+        SpaprDrcClass *func_drck;
+        SpaprDREntitySense state;
+        int i;
+        uint8_t chassis = chassis_from_bus(pci_get_bus(pdev));
+
+        if (pc->is_bridge) {
+            error_setg(errp, "PCI: Hot unplug of PCI bridges not supported");
+            return;
+        }
+        if (object_property_get_uint(OBJECT(pdev), "nvlink2-tgt", NULL)) {
+            error_setg(errp, "PCI: Cannot unplug NVLink2 devices");
+            return;
+        }
+
+        /* ensure any other present functions are pending unplug */
+        if (PCI_FUNC(pdev->devfn) == 0) {
+            for (i = 1; i < 8; i++) {
+                func_drc = drc_from_devfn(phb, chassis, PCI_DEVFN(slotnr, i));
+                func_drck = SPAPR_DR_CONNECTOR_GET_CLASS(func_drc);
+                state = func_drck->dr_entity_sense(func_drc);
+                if (state == SPAPR_DR_ENTITY_SENSE_PRESENT
+                    && !spapr_drc_unplug_requested(func_drc)) {
+                    /*
+                     * Attempting to remove function 0 of a multifunction
+                     * device will will cascade into removing all child
+                     * functions, even if their unplug weren't requested
+                     * beforehand.
+                     */
+                    spapr_drc_unplug_request(func_drc);
+                }
+            }
+        }
+
+        spapr_drc_unplug_request(drc);
+
+        /* if this isn't func 0, defer unplug event. otherwise signal removal
+         * for all present functions
+         */
+        if (PCI_FUNC(pdev->devfn) == 0) {
+            for (i = 7; i >= 0; i--) {
+                func_drc = drc_from_devfn(phb, chassis, PCI_DEVFN(slotnr, i));
+                func_drck = SPAPR_DR_CONNECTOR_GET_CLASS(func_drc);
+                state = func_drck->dr_entity_sense(func_drc);
+                if (state == SPAPR_DR_ENTITY_SENSE_PRESENT) {
+                    spapr_hotplug_req_remove_by_index(func_drc);
+                }
+            }
+        }
+    } else {
+        error_setg(errp,
+                   "PCI device unplug already in progress for device %s",
+                   drc->dev->id);
+    }
+}
+
+static void spapr_phb_finalizefn(Object *obj)
+{
+    SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(obj);
+
+    g_free(sphb->dtbusname);
+    sphb->dtbusname = NULL;
+}
+
+static void spapr_phb_unrealize(DeviceState *dev)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+    SysBusDevice *s = SYS_BUS_DEVICE(dev);
+    PCIHostState *phb = PCI_HOST_BRIDGE(s);
+    SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(phb);
+    SpaprTceTable *tcet;
+    int i;
+    const unsigned windows_supported = spapr_phb_windows_supported(sphb);
+
+    spapr_phb_nvgpu_free(sphb);
+
+    if (sphb->msi) {
+        g_hash_table_unref(sphb->msi);
+        sphb->msi = NULL;
+    }
+
+    /*
+     * Remove IO/MMIO subregions and aliases, rest should get cleaned
+     * via PHB's unrealize->object_finalize
+     */
+    for (i = windows_supported - 1; i >= 0; i--) {
+        tcet = spapr_tce_find_by_liobn(sphb->dma_liobn[i]);
+        if (tcet) {
+            memory_region_del_subregion(&sphb->iommu_root,
+                                        spapr_tce_get_iommu(tcet));
+        }
+    }
+
+    remove_drcs(sphb, phb->bus);
+
+    for (i = PCI_NUM_PINS - 1; i >= 0; i--) {
+        if (sphb->lsi_table[i].irq) {
+            spapr_irq_free(spapr, sphb->lsi_table[i].irq, 1);
+            sphb->lsi_table[i].irq = 0;
+        }
+    }
+
+    QLIST_REMOVE(sphb, list);
+
+    memory_region_del_subregion(&sphb->iommu_root, &sphb->msiwindow);
+
+    /*
+     * An attached PCI device may have memory listeners, eg. VFIO PCI. We have
+     * unmapped all sections. Remove the listeners now, before destroying the
+     * address space.
+     */
+    address_space_remove_listeners(&sphb->iommu_as);
+    address_space_destroy(&sphb->iommu_as);
+
+    qbus_set_hotplug_handler(BUS(phb->bus), NULL);
+    pci_unregister_root_bus(phb->bus);
+
+    memory_region_del_subregion(get_system_memory(), &sphb->iowindow);
+    if (sphb->mem64_win_pciaddr != (hwaddr)-1) {
+        memory_region_del_subregion(get_system_memory(), &sphb->mem64window);
+    }
+    memory_region_del_subregion(get_system_memory(), &sphb->mem32window);
+}
+
+static void spapr_phb_destroy_msi(gpointer opaque)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+    SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr);
+    SpaprPciMsi *msi = opaque;
+
+    if (!smc->legacy_irq_allocation) {
+        spapr_irq_msi_free(spapr, msi->first_irq, msi->num);
+    }
+    spapr_irq_free(spapr, msi->first_irq, msi->num);
+    g_free(msi);
+}
+
+static void spapr_phb_realize(DeviceState *dev, Error **errp)
+{
+    ERRP_GUARD();
+    /* We don't use SPAPR_MACHINE() in order to exit gracefully if the user
+     * tries to add a sPAPR PHB to a non-pseries machine.
+     */
+    SpaprMachineState *spapr =
+        (SpaprMachineState *) object_dynamic_cast(qdev_get_machine(),
+                                                  TYPE_SPAPR_MACHINE);
+    SpaprMachineClass *smc = spapr ? SPAPR_MACHINE_GET_CLASS(spapr) : NULL;
+    SysBusDevice *s = SYS_BUS_DEVICE(dev);
+    SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(s);
+    PCIHostState *phb = PCI_HOST_BRIDGE(s);
+    MachineState *ms = MACHINE(spapr);
+    char *namebuf;
+    int i;
+    PCIBus *bus;
+    uint64_t msi_window_size = 4096;
+    SpaprTceTable *tcet;
+    const unsigned windows_supported = spapr_phb_windows_supported(sphb);
+
+    if (!spapr) {
+        error_setg(errp, TYPE_SPAPR_PCI_HOST_BRIDGE " needs a pseries machine");
+        return;
+    }
+
+    assert(sphb->index != (uint32_t)-1); /* checked in spapr_phb_pre_plug() */
+
+    if (sphb->mem64_win_size != 0) {
+        if (sphb->mem_win_size > SPAPR_PCI_MEM32_WIN_SIZE) {
+            error_setg(errp, "32-bit memory window of size 0x%"HWADDR_PRIx
+                       " (max 2 GiB)", sphb->mem_win_size);
+            return;
+        }
+
+        /* 64-bit window defaults to identity mapping */
+        sphb->mem64_win_pciaddr = sphb->mem64_win_addr;
+    } else if (sphb->mem_win_size > SPAPR_PCI_MEM32_WIN_SIZE) {
+        /*
+         * For compatibility with old configuration, if no 64-bit MMIO
+         * window is specified, but the ordinary (32-bit) memory
+         * window is specified as > 2GiB, we treat it as a 2GiB 32-bit
+         * window, with a 64-bit MMIO window following on immediately
+         * afterwards
+         */
+        sphb->mem64_win_size = sphb->mem_win_size - SPAPR_PCI_MEM32_WIN_SIZE;
+        sphb->mem64_win_addr = sphb->mem_win_addr + SPAPR_PCI_MEM32_WIN_SIZE;
+        sphb->mem64_win_pciaddr =
+            SPAPR_PCI_MEM_WIN_BUS_OFFSET + SPAPR_PCI_MEM32_WIN_SIZE;
+        sphb->mem_win_size = SPAPR_PCI_MEM32_WIN_SIZE;
+    }
+
+    if (spapr_pci_find_phb(spapr, sphb->buid)) {
+        SpaprPhbState *s;
+
+        error_setg(errp, "PCI host bridges must have unique indexes");
+        error_append_hint(errp, "The following indexes are already in use:");
+        QLIST_FOREACH(s, &spapr->phbs, list) {
+            error_append_hint(errp, " %d", s->index);
+        }
+        error_append_hint(errp, "\nTry another value for the index property\n");
+        return;
+    }
+
+    if (sphb->numa_node != -1 &&
+        (sphb->numa_node >= MAX_NODES ||
+         !ms->numa_state->nodes[sphb->numa_node].present)) {
+        error_setg(errp, "Invalid NUMA node ID for PCI host bridge");
+        return;
+    }
+
+    sphb->dtbusname = g_strdup_printf("pci@%" PRIx64, sphb->buid);
+
+    /* Initialize memory regions */
+    namebuf = g_strdup_printf("%s.mmio", sphb->dtbusname);
+    memory_region_init(&sphb->memspace, OBJECT(sphb), namebuf, UINT64_MAX);
+    g_free(namebuf);
+
+    namebuf = g_strdup_printf("%s.mmio32-alias", sphb->dtbusname);
+    memory_region_init_alias(&sphb->mem32window, OBJECT(sphb),
+                             namebuf, &sphb->memspace,
+                             SPAPR_PCI_MEM_WIN_BUS_OFFSET, sphb->mem_win_size);
+    g_free(namebuf);
+    memory_region_add_subregion(get_system_memory(), sphb->mem_win_addr,
+                                &sphb->mem32window);
+
+    if (sphb->mem64_win_size != 0) {
+        namebuf = g_strdup_printf("%s.mmio64-alias", sphb->dtbusname);
+        memory_region_init_alias(&sphb->mem64window, OBJECT(sphb),
+                                 namebuf, &sphb->memspace,
+                                 sphb->mem64_win_pciaddr, sphb->mem64_win_size);
+        g_free(namebuf);
+
+        memory_region_add_subregion(get_system_memory(),
+                                    sphb->mem64_win_addr,
+                                    &sphb->mem64window);
+    }
+
+    /* Initialize IO regions */
+    namebuf = g_strdup_printf("%s.io", sphb->dtbusname);
+    memory_region_init(&sphb->iospace, OBJECT(sphb),
+                       namebuf, SPAPR_PCI_IO_WIN_SIZE);
+    g_free(namebuf);
+
+    namebuf = g_strdup_printf("%s.io-alias", sphb->dtbusname);
+    memory_region_init_alias(&sphb->iowindow, OBJECT(sphb), namebuf,
+                             &sphb->iospace, 0, SPAPR_PCI_IO_WIN_SIZE);
+    g_free(namebuf);
+    memory_region_add_subregion(get_system_memory(), sphb->io_win_addr,
+                                &sphb->iowindow);
+
+    bus = pci_register_root_bus(dev, NULL,
+                                pci_spapr_set_irq, pci_swizzle_map_irq_fn, sphb,
+                                &sphb->memspace, &sphb->iospace,
+                                PCI_DEVFN(0, 0), PCI_NUM_PINS,
+                                TYPE_PCI_BUS);
+
+    /*
+     * Despite resembling a vanilla PCI bus in most ways, the PAPR
+     * para-virtualized PCI bus *does* permit PCI-E extended config
+     * space access
+     */
+    if (sphb->pcie_ecs) {
+        bus->flags |= PCI_BUS_EXTENDED_CONFIG_SPACE;
+    }
+    phb->bus = bus;
+    qbus_set_hotplug_handler(BUS(phb->bus), OBJECT(sphb));
+
+    /*
+     * Initialize PHB address space.
+     * By default there will be at least one subregion for default
+     * 32bit DMA window.
+     * Later the guest might want to create another DMA window
+     * which will become another memory subregion.
+     */
+    namebuf = g_strdup_printf("%s.iommu-root", sphb->dtbusname);
+    memory_region_init(&sphb->iommu_root, OBJECT(sphb),
+                       namebuf, UINT64_MAX);
+    g_free(namebuf);
+    address_space_init(&sphb->iommu_as, &sphb->iommu_root,
+                       sphb->dtbusname);
+
+    /*
+     * As MSI/MSIX interrupts trigger by writing at MSI/MSIX vectors,
+     * we need to allocate some memory to catch those writes coming
+     * from msi_notify()/msix_notify().
+     * As MSIMessage:addr is going to be the same and MSIMessage:data
+     * is going to be a VIRQ number, 4 bytes of the MSI MR will only
+     * be used.
+     *
+     * For KVM we want to ensure that this memory is a full page so that
+     * our memory slot is of page size granularity.
+     */
+    if (kvm_enabled()) {
+        msi_window_size = qemu_real_host_page_size;
+    }
+
+    memory_region_init_io(&sphb->msiwindow, OBJECT(sphb), &spapr_msi_ops, spapr,
+                          "msi", msi_window_size);
+    memory_region_add_subregion(&sphb->iommu_root, SPAPR_PCI_MSI_WINDOW,
+                                &sphb->msiwindow);
+
+    pci_setup_iommu(bus, spapr_pci_dma_iommu, sphb);
+
+    pci_bus_set_route_irq_fn(bus, spapr_route_intx_pin_to_irq);
+
+    QLIST_INSERT_HEAD(&spapr->phbs, sphb, list);
+
+    /* Initialize the LSI table */
+    for (i = 0; i < PCI_NUM_PINS; i++) {
+        int irq = SPAPR_IRQ_PCI_LSI + sphb->index * PCI_NUM_PINS + i;
+
+        if (smc->legacy_irq_allocation) {
+            irq = spapr_irq_findone(spapr, errp);
+            if (irq < 0) {
+                error_prepend(errp, "can't allocate LSIs: ");
+                /*
+                 * Older machines will never support PHB hotplug, ie, this is an
+                 * init only path and QEMU will terminate. No need to rollback.
+                 */
+                return;
+            }
+        }
+
+        if (spapr_irq_claim(spapr, irq, true, errp) < 0) {
+            error_prepend(errp, "can't allocate LSIs: ");
+            goto unrealize;
+        }
+
+        sphb->lsi_table[i].irq = irq;
+    }
+
+    /* allocate connectors for child PCI devices */
+    add_drcs(sphb, phb->bus);
+
+    /* DMA setup */
+    for (i = 0; i < windows_supported; ++i) {
+        tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn[i]);
+        if (!tcet) {
+            error_setg(errp, "Creating window#%d failed for %s",
+                       i, sphb->dtbusname);
+            goto unrealize;
+        }
+        memory_region_add_subregion(&sphb->iommu_root, 0,
+                                    spapr_tce_get_iommu(tcet));
+    }
+
+    sphb->msi = g_hash_table_new_full(g_int_hash, g_int_equal, g_free,
+                                      spapr_phb_destroy_msi);
+    return;
+
+unrealize:
+    spapr_phb_unrealize(dev);
+}
+
+static int spapr_phb_children_reset(Object *child, void *opaque)
+{
+    DeviceState *dev = (DeviceState *) object_dynamic_cast(child, TYPE_DEVICE);
+
+    if (dev) {
+        device_legacy_reset(dev);
+    }
+
+    return 0;
+}
+
+void spapr_phb_dma_reset(SpaprPhbState *sphb)
+{
+    int i;
+    SpaprTceTable *tcet;
+
+    for (i = 0; i < SPAPR_PCI_DMA_MAX_WINDOWS; ++i) {
+        tcet = spapr_tce_find_by_liobn(sphb->dma_liobn[i]);
+
+        if (tcet && tcet->nb_table) {
+            spapr_tce_table_disable(tcet);
+        }
+    }
+
+    /* Register default 32bit DMA window */
+    tcet = spapr_tce_find_by_liobn(sphb->dma_liobn[0]);
+    spapr_tce_table_enable(tcet, SPAPR_TCE_PAGE_SHIFT, sphb->dma_win_addr,
+                           sphb->dma_win_size >> SPAPR_TCE_PAGE_SHIFT);
+}
+
+static void spapr_phb_reset(DeviceState *qdev)
+{
+    SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(qdev);
+    Error *err = NULL;
+
+    spapr_phb_dma_reset(sphb);
+    spapr_phb_nvgpu_free(sphb);
+    spapr_phb_nvgpu_setup(sphb, &err);
+    if (err) {
+        error_report_err(err);
+    }
+
+    /* Reset the IOMMU state */
+    object_child_foreach(OBJECT(qdev), spapr_phb_children_reset, NULL);
+
+    if (spapr_phb_eeh_available(SPAPR_PCI_HOST_BRIDGE(qdev))) {
+        spapr_phb_vfio_reset(qdev);
+    }
+
+    g_hash_table_remove_all(sphb->msi);
+}
+
+static Property spapr_phb_properties[] = {
+    DEFINE_PROP_UINT32("index", SpaprPhbState, index, -1),
+    DEFINE_PROP_UINT64("mem_win_size", SpaprPhbState, mem_win_size,
+                       SPAPR_PCI_MEM32_WIN_SIZE),
+    DEFINE_PROP_UINT64("mem64_win_size", SpaprPhbState, mem64_win_size,
+                       SPAPR_PCI_MEM64_WIN_SIZE),
+    DEFINE_PROP_UINT64("io_win_size", SpaprPhbState, io_win_size,
+                       SPAPR_PCI_IO_WIN_SIZE),
+    DEFINE_PROP_BOOL("dynamic-reconfiguration", SpaprPhbState, dr_enabled,
+                     true),
+    /* Default DMA window is 0..1GB */
+    DEFINE_PROP_UINT64("dma_win_addr", SpaprPhbState, dma_win_addr, 0),
+    DEFINE_PROP_UINT64("dma_win_size", SpaprPhbState, dma_win_size, 0x40000000),
+    DEFINE_PROP_UINT64("dma64_win_addr", SpaprPhbState, dma64_win_addr,
+                       0x800000000000000ULL),
+    DEFINE_PROP_BOOL("ddw", SpaprPhbState, ddw_enabled, true),
+    DEFINE_PROP_UINT64("pgsz", SpaprPhbState, page_size_mask,
+                       (1ULL << 12) | (1ULL << 16)
+                       | (1ULL << 21) | (1ULL << 24)),
+    DEFINE_PROP_UINT32("numa_node", SpaprPhbState, numa_node, -1),
+    DEFINE_PROP_BOOL("pre-2.8-migration", SpaprPhbState,
+                     pre_2_8_migration, false),
+    DEFINE_PROP_BOOL("pcie-extended-configuration-space", SpaprPhbState,
+                     pcie_ecs, true),
+    DEFINE_PROP_UINT64("gpa", SpaprPhbState, nv2_gpa_win_addr, 0),
+    DEFINE_PROP_UINT64("atsd", SpaprPhbState, nv2_atsd_win_addr, 0),
+    DEFINE_PROP_BOOL("pre-5.1-associativity", SpaprPhbState,
+                     pre_5_1_assoc, false),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static const VMStateDescription vmstate_spapr_pci_lsi = {
+    .name = "spapr_pci/lsi",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32_EQUAL(irq, SpaprPciLsi, NULL),
+
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static const VMStateDescription vmstate_spapr_pci_msi = {
+    .name = "spapr_pci/msi",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField []) {
+        VMSTATE_UINT32(key, SpaprPciMsiMig),
+        VMSTATE_UINT32(value.first_irq, SpaprPciMsiMig),
+        VMSTATE_UINT32(value.num, SpaprPciMsiMig),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static int spapr_pci_pre_save(void *opaque)
+{
+    SpaprPhbState *sphb = opaque;
+    GHashTableIter iter;
+    gpointer key, value;
+    int i;
+
+    if (sphb->pre_2_8_migration) {
+        sphb->mig_liobn = sphb->dma_liobn[0];
+        sphb->mig_mem_win_addr = sphb->mem_win_addr;
+        sphb->mig_mem_win_size = sphb->mem_win_size;
+        sphb->mig_io_win_addr = sphb->io_win_addr;
+        sphb->mig_io_win_size = sphb->io_win_size;
+
+        if ((sphb->mem64_win_size != 0)
+            && (sphb->mem64_win_addr
+                == (sphb->mem_win_addr + sphb->mem_win_size))) {
+            sphb->mig_mem_win_size += sphb->mem64_win_size;
+        }
+    }
+
+    g_free(sphb->msi_devs);
+    sphb->msi_devs = NULL;
+    sphb->msi_devs_num = g_hash_table_size(sphb->msi);
+    if (!sphb->msi_devs_num) {
+        return 0;
+    }
+    sphb->msi_devs = g_new(SpaprPciMsiMig, sphb->msi_devs_num);
+
+    g_hash_table_iter_init(&iter, sphb->msi);
+    for (i = 0; g_hash_table_iter_next(&iter, &key, &value); ++i) {
+        sphb->msi_devs[i].key = *(uint32_t *) key;
+        sphb->msi_devs[i].value = *(SpaprPciMsi *) value;
+    }
+
+    return 0;
+}
+
+static int spapr_pci_post_save(void *opaque)
+{
+    SpaprPhbState *sphb = opaque;
+
+    g_free(sphb->msi_devs);
+    sphb->msi_devs = NULL;
+    sphb->msi_devs_num = 0;
+    return 0;
+}
+
+static int spapr_pci_post_load(void *opaque, int version_id)
+{
+    SpaprPhbState *sphb = opaque;
+    gpointer key, value;
+    int i;
+
+    for (i = 0; i < sphb->msi_devs_num; ++i) {
+        key = g_memdup(&sphb->msi_devs[i].key,
+                       sizeof(sphb->msi_devs[i].key));
+        value = g_memdup(&sphb->msi_devs[i].value,
+                         sizeof(sphb->msi_devs[i].value));
+        g_hash_table_insert(sphb->msi, key, value);
+    }
+    g_free(sphb->msi_devs);
+    sphb->msi_devs = NULL;
+    sphb->msi_devs_num = 0;
+
+    return 0;
+}
+
+static bool pre_2_8_migration(void *opaque, int version_id)
+{
+    SpaprPhbState *sphb = opaque;
+
+    return sphb->pre_2_8_migration;
+}
+
+static const VMStateDescription vmstate_spapr_pci = {
+    .name = "spapr_pci",
+    .version_id = 2,
+    .minimum_version_id = 2,
+    .pre_save = spapr_pci_pre_save,
+    .post_save = spapr_pci_post_save,
+    .post_load = spapr_pci_post_load,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT64_EQUAL(buid, SpaprPhbState, NULL),
+        VMSTATE_UINT32_TEST(mig_liobn, SpaprPhbState, pre_2_8_migration),
+        VMSTATE_UINT64_TEST(mig_mem_win_addr, SpaprPhbState, pre_2_8_migration),
+        VMSTATE_UINT64_TEST(mig_mem_win_size, SpaprPhbState, pre_2_8_migration),
+        VMSTATE_UINT64_TEST(mig_io_win_addr, SpaprPhbState, pre_2_8_migration),
+        VMSTATE_UINT64_TEST(mig_io_win_size, SpaprPhbState, pre_2_8_migration),
+        VMSTATE_STRUCT_ARRAY(lsi_table, SpaprPhbState, PCI_NUM_PINS, 0,
+                             vmstate_spapr_pci_lsi, SpaprPciLsi),
+        VMSTATE_INT32(msi_devs_num, SpaprPhbState),
+        VMSTATE_STRUCT_VARRAY_ALLOC(msi_devs, SpaprPhbState, msi_devs_num, 0,
+                                    vmstate_spapr_pci_msi, SpaprPciMsiMig),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static const char *spapr_phb_root_bus_path(PCIHostState *host_bridge,
+                                           PCIBus *rootbus)
+{
+    SpaprPhbState *sphb = SPAPR_PCI_HOST_BRIDGE(host_bridge);
+
+    return sphb->dtbusname;
+}
+
+static void spapr_phb_class_init(ObjectClass *klass, void *data)
+{
+    PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    HotplugHandlerClass *hp = HOTPLUG_HANDLER_CLASS(klass);
+
+    hc->root_bus_path = spapr_phb_root_bus_path;
+    dc->realize = spapr_phb_realize;
+    dc->unrealize = spapr_phb_unrealize;
+    device_class_set_props(dc, spapr_phb_properties);
+    dc->reset = spapr_phb_reset;
+    dc->vmsd = &vmstate_spapr_pci;
+    /* Supported by TYPE_SPAPR_MACHINE */
+    dc->user_creatable = true;
+    set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
+    hp->pre_plug = spapr_pci_pre_plug;
+    hp->plug = spapr_pci_plug;
+    hp->unplug = spapr_pci_unplug;
+    hp->unplug_request = spapr_pci_unplug_request;
+}
+
+static const TypeInfo spapr_phb_info = {
+    .name          = TYPE_SPAPR_PCI_HOST_BRIDGE,
+    .parent        = TYPE_PCI_HOST_BRIDGE,
+    .instance_size = sizeof(SpaprPhbState),
+    .instance_finalize = spapr_phb_finalizefn,
+    .class_init    = spapr_phb_class_init,
+    .interfaces    = (InterfaceInfo[]) {
+        { TYPE_HOTPLUG_HANDLER },
+        { }
+    }
+};
+
+static void spapr_phb_pci_enumerate_bridge(PCIBus *bus, PCIDevice *pdev,
+                                           void *opaque)
+{
+    unsigned int *bus_no = opaque;
+    PCIBus *sec_bus = NULL;
+
+    if ((pci_default_read_config(pdev, PCI_HEADER_TYPE, 1) !=
+         PCI_HEADER_TYPE_BRIDGE)) {
+        return;
+    }
+
+    (*bus_no)++;
+    pci_default_write_config(pdev, PCI_PRIMARY_BUS, pci_dev_bus_num(pdev), 1);
+    pci_default_write_config(pdev, PCI_SECONDARY_BUS, *bus_no, 1);
+    pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, *bus_no, 1);
+
+    sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(pdev));
+    if (!sec_bus) {
+        return;
+    }
+
+    pci_for_each_device_under_bus(sec_bus, spapr_phb_pci_enumerate_bridge,
+                                  bus_no);
+    pci_default_write_config(pdev, PCI_SUBORDINATE_BUS, *bus_no, 1);
+}
+
+static void spapr_phb_pci_enumerate(SpaprPhbState *phb)
+{
+    PCIBus *bus = PCI_HOST_BRIDGE(phb)->bus;
+    unsigned int bus_no = 0;
+
+    pci_for_each_device_under_bus(bus, spapr_phb_pci_enumerate_bridge,
+                                  &bus_no);
+
+}
+
+int spapr_dt_phb(SpaprMachineState *spapr, SpaprPhbState *phb,
+                 uint32_t intc_phandle, void *fdt, int *node_offset)
+{
+    int bus_off, i, j, ret;
+    uint32_t bus_range[] = { cpu_to_be32(0), cpu_to_be32(0xff) };
+    struct {
+        uint32_t hi;
+        uint64_t child;
+        uint64_t parent;
+        uint64_t size;
+    } QEMU_PACKED ranges[] = {
+        {
+            cpu_to_be32(b_ss(1)), cpu_to_be64(0),
+            cpu_to_be64(phb->io_win_addr),
+            cpu_to_be64(memory_region_size(&phb->iospace)),
+        },
+        {
+            cpu_to_be32(b_ss(2)), cpu_to_be64(SPAPR_PCI_MEM_WIN_BUS_OFFSET),
+            cpu_to_be64(phb->mem_win_addr),
+            cpu_to_be64(phb->mem_win_size),
+        },
+        {
+            cpu_to_be32(b_ss(3)), cpu_to_be64(phb->mem64_win_pciaddr),
+            cpu_to_be64(phb->mem64_win_addr),
+            cpu_to_be64(phb->mem64_win_size),
+        },
+    };
+    const unsigned sizeof_ranges =
+        (phb->mem64_win_size ? 3 : 2) * sizeof(ranges[0]);
+    uint64_t bus_reg[] = { cpu_to_be64(phb->buid), 0 };
+    uint32_t interrupt_map_mask[] = {
+        cpu_to_be32(b_ddddd(-1)|b_fff(0)), 0x0, 0x0, cpu_to_be32(-1)};
+    uint32_t interrupt_map[PCI_SLOT_MAX * PCI_NUM_PINS][7];
+    uint32_t ddw_applicable[] = {
+        cpu_to_be32(RTAS_IBM_QUERY_PE_DMA_WINDOW),
+        cpu_to_be32(RTAS_IBM_CREATE_PE_DMA_WINDOW),
+        cpu_to_be32(RTAS_IBM_REMOVE_PE_DMA_WINDOW)
+    };
+    uint32_t ddw_extensions[] = {
+        cpu_to_be32(1),
+        cpu_to_be32(RTAS_IBM_RESET_PE_DMA_WINDOW)
+    };
+    SpaprTceTable *tcet;
+    SpaprDrc *drc;
+    Error *err = NULL;
+
+    /* Start populating the FDT */
+    _FDT(bus_off = fdt_add_subnode(fdt, 0, phb->dtbusname));
+    if (node_offset) {
+        *node_offset = bus_off;
+    }
+
+    /* Write PHB properties */
+    _FDT(fdt_setprop_string(fdt, bus_off, "device_type", "pci"));
+    _FDT(fdt_setprop_string(fdt, bus_off, "compatible", "IBM,Logical_PHB"));
+    _FDT(fdt_setprop_cell(fdt, bus_off, "#interrupt-cells", 0x1));
+    _FDT(fdt_setprop(fdt, bus_off, "used-by-rtas", NULL, 0));
+    _FDT(fdt_setprop(fdt, bus_off, "bus-range", &bus_range, sizeof(bus_range)));
+    _FDT(fdt_setprop(fdt, bus_off, "ranges", &ranges, sizeof_ranges));
+    _FDT(fdt_setprop(fdt, bus_off, "reg", &bus_reg, sizeof(bus_reg)));
+    _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pci-config-space-type", 0x1));
+    _FDT(fdt_setprop_cell(fdt, bus_off, "ibm,pe-total-#msi",
+                          spapr_irq_nr_msis(spapr)));
+
+    /* Dynamic DMA window */
+    if (phb->ddw_enabled) {
+        _FDT(fdt_setprop(fdt, bus_off, "ibm,ddw-applicable", &ddw_applicable,
+                         sizeof(ddw_applicable)));
+        _FDT(fdt_setprop(fdt, bus_off, "ibm,ddw-extensions",
+                         &ddw_extensions, sizeof(ddw_extensions)));
+    }
+
+    /* Advertise NUMA via ibm,associativity */
+    if (phb->numa_node != -1) {
+        spapr_numa_write_associativity_dt(spapr, fdt, bus_off, phb->numa_node);
+    }
+
+    /* Build the interrupt-map, this must matches what is done
+     * in pci_swizzle_map_irq_fn
+     */
+    _FDT(fdt_setprop(fdt, bus_off, "interrupt-map-mask",
+                     &interrupt_map_mask, sizeof(interrupt_map_mask)));
+    for (i = 0; i < PCI_SLOT_MAX; i++) {
+        for (j = 0; j < PCI_NUM_PINS; j++) {
+            uint32_t *irqmap = interrupt_map[i*PCI_NUM_PINS + j];
+            int lsi_num = pci_swizzle(i, j);
+
+            irqmap[0] = cpu_to_be32(b_ddddd(i)|b_fff(0));
+            irqmap[1] = 0;
+            irqmap[2] = 0;
+            irqmap[3] = cpu_to_be32(j+1);
+            irqmap[4] = cpu_to_be32(intc_phandle);
+            spapr_dt_irq(&irqmap[5], phb->lsi_table[lsi_num].irq, true);
+        }
+    }
+    /* Write interrupt map */
+    _FDT(fdt_setprop(fdt, bus_off, "interrupt-map", &interrupt_map,
+                     sizeof(interrupt_map)));
+
+    tcet = spapr_tce_find_by_liobn(phb->dma_liobn[0]);
+    if (!tcet) {
+        return -1;
+    }
+    spapr_dma_dt(fdt, bus_off, "ibm,dma-window",
+                 tcet->liobn, tcet->bus_offset,
+                 tcet->nb_table << tcet->page_shift);
+
+    drc = spapr_drc_by_id(TYPE_SPAPR_DRC_PHB, phb->index);
+    if (drc) {
+        uint32_t drc_index = cpu_to_be32(spapr_drc_index(drc));
+
+        _FDT(fdt_setprop(fdt, bus_off, "ibm,my-drc-index", &drc_index,
+                         sizeof(drc_index)));
+    }
+
+    /* Walk the bridges and program the bus numbers*/
+    spapr_phb_pci_enumerate(phb);
+    _FDT(fdt_setprop_cell(fdt, bus_off, "qemu,phb-enumerated", 0x1));
+
+    /* Walk the bridge and subordinate buses */
+    ret = spapr_dt_pci_bus(phb, PCI_HOST_BRIDGE(phb)->bus, fdt, bus_off);
+    if (ret < 0) {
+        return ret;
+    }
+
+    spapr_phb_nvgpu_populate_dt(phb, fdt, bus_off, &err);
+    if (err) {
+        error_report_err(err);
+    }
+    spapr_phb_nvgpu_ram_populate_dt(phb, fdt);
+
+    return 0;
+}
+
+void spapr_pci_rtas_init(void)
+{
+    spapr_rtas_register(RTAS_READ_PCI_CONFIG, "read-pci-config",
+                        rtas_read_pci_config);
+    spapr_rtas_register(RTAS_WRITE_PCI_CONFIG, "write-pci-config",
+                        rtas_write_pci_config);
+    spapr_rtas_register(RTAS_IBM_READ_PCI_CONFIG, "ibm,read-pci-config",
+                        rtas_ibm_read_pci_config);
+    spapr_rtas_register(RTAS_IBM_WRITE_PCI_CONFIG, "ibm,write-pci-config",
+                        rtas_ibm_write_pci_config);
+    if (msi_nonbroken) {
+        spapr_rtas_register(RTAS_IBM_QUERY_INTERRUPT_SOURCE_NUMBER,
+                            "ibm,query-interrupt-source-number",
+                            rtas_ibm_query_interrupt_source_number);
+        spapr_rtas_register(RTAS_IBM_CHANGE_MSI, "ibm,change-msi",
+                            rtas_ibm_change_msi);
+    }
+
+    spapr_rtas_register(RTAS_IBM_SET_EEH_OPTION,
+                        "ibm,set-eeh-option",
+                        rtas_ibm_set_eeh_option);
+    spapr_rtas_register(RTAS_IBM_GET_CONFIG_ADDR_INFO2,
+                        "ibm,get-config-addr-info2",
+                        rtas_ibm_get_config_addr_info2);
+    spapr_rtas_register(RTAS_IBM_READ_SLOT_RESET_STATE2,
+                        "ibm,read-slot-reset-state2",
+                        rtas_ibm_read_slot_reset_state2);
+    spapr_rtas_register(RTAS_IBM_SET_SLOT_RESET,
+                        "ibm,set-slot-reset",
+                        rtas_ibm_set_slot_reset);
+    spapr_rtas_register(RTAS_IBM_CONFIGURE_PE,
+                        "ibm,configure-pe",
+                        rtas_ibm_configure_pe);
+    spapr_rtas_register(RTAS_IBM_SLOT_ERROR_DETAIL,
+                        "ibm,slot-error-detail",
+                        rtas_ibm_slot_error_detail);
+}
+
+static void spapr_pci_register_types(void)
+{
+    type_register_static(&spapr_phb_info);
+}
+
+type_init(spapr_pci_register_types)
+
+static int spapr_switch_one_vga(DeviceState *dev, void *opaque)
+{
+    bool be = *(bool *)opaque;
+
+    if (object_dynamic_cast(OBJECT(dev), "VGA")
+        || object_dynamic_cast(OBJECT(dev), "secondary-vga")
+        || object_dynamic_cast(OBJECT(dev), "bochs-display")
+        || object_dynamic_cast(OBJECT(dev), "virtio-vga")) {
+        object_property_set_bool(OBJECT(dev), "big-endian-framebuffer", be,
+                                 &error_abort);
+    }
+    return 0;
+}
+
+void spapr_pci_switch_vga(SpaprMachineState *spapr, bool big_endian)
+{
+    SpaprPhbState *sphb;
+
+    /*
+     * For backward compatibility with existing guests, we switch
+     * the endianness of the VGA controller when changing the guest
+     * interrupt mode
+     */
+    QLIST_FOREACH(sphb, &spapr->phbs, list) {
+        BusState *bus = &PCI_HOST_BRIDGE(sphb)->bus->qbus;
+        qbus_walk_children(bus, spapr_switch_one_vga, NULL, NULL, NULL,
+                           &big_endian);
+    }
+}
diff --git a/hw/ppc/spapr_pci_nvlink2.c b/hw/ppc/spapr_pci_nvlink2.c
new file mode 100644
index 000000000..7fb0cf4d0
--- /dev/null
+++ b/hw/ppc/spapr_pci_nvlink2.c
@@ -0,0 +1,445 @@
+/*
+ * QEMU sPAPR PCI for NVLink2 pass through
+ *
+ * Copyright (c) 2019 Alexey Kardashevskiy, IBM Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu-common.h"
+#include "hw/pci/pci.h"
+#include "hw/pci-host/spapr.h"
+#include "hw/ppc/spapr_numa.h"
+#include "qemu/error-report.h"
+#include "hw/ppc/fdt.h"
+#include "hw/pci/pci_bridge.h"
+
+#define PHANDLE_PCIDEV(phb, pdev)    (0x12000000 | \
+                                     (((phb)->index) << 16) | ((pdev)->devfn))
+#define PHANDLE_GPURAM(phb, n)       (0x110000FF | ((n) << 8) | \
+                                     (((phb)->index) << 16))
+#define PHANDLE_NVLINK(phb, gn, nn)  (0x00130000 | (((phb)->index) << 8) | \
+                                     ((gn) << 4) | (nn))
+
+typedef struct SpaprPhbPciNvGpuSlot {
+        uint64_t tgt;
+        uint64_t gpa;
+        unsigned numa_id;
+        PCIDevice *gpdev;
+        int linknum;
+        struct {
+            uint64_t atsd_gpa;
+            PCIDevice *npdev;
+            uint32_t link_speed;
+        } links[NVGPU_MAX_LINKS];
+} SpaprPhbPciNvGpuSlot;
+
+struct SpaprPhbPciNvGpuConfig {
+    uint64_t nv2_ram_current;
+    uint64_t nv2_atsd_current;
+    int num; /* number of non empty (i.e. tgt!=0) entries in slots[] */
+    SpaprPhbPciNvGpuSlot slots[NVGPU_MAX_NUM];
+    Error *err;
+};
+
+static SpaprPhbPciNvGpuSlot *
+spapr_nvgpu_get_slot(SpaprPhbPciNvGpuConfig *nvgpus, uint64_t tgt)
+{
+    int i;
+
+    /* Search for partially collected "slot" */
+    for (i = 0; i < nvgpus->num; ++i) {
+        if (nvgpus->slots[i].tgt == tgt) {
+            return &nvgpus->slots[i];
+        }
+    }
+
+    if (nvgpus->num == ARRAY_SIZE(nvgpus->slots)) {
+        return NULL;
+    }
+
+    i = nvgpus->num;
+    nvgpus->slots[i].tgt = tgt;
+    ++nvgpus->num;
+
+    return &nvgpus->slots[i];
+}
+
+static void spapr_pci_collect_nvgpu(SpaprPhbPciNvGpuConfig *nvgpus,
+                                    PCIDevice *pdev, uint64_t tgt,
+                                    MemoryRegion *mr, Error **errp)
+{
+    MachineState *machine = MACHINE(qdev_get_machine());
+    SpaprMachineState *spapr = SPAPR_MACHINE(machine);
+    SpaprPhbPciNvGpuSlot *nvslot = spapr_nvgpu_get_slot(nvgpus, tgt);
+
+    if (!nvslot) {
+        error_setg(errp, "Found too many GPUs per vPHB");
+        return;
+    }
+    g_assert(!nvslot->gpdev);
+    nvslot->gpdev = pdev;
+
+    nvslot->gpa = nvgpus->nv2_ram_current;
+    nvgpus->nv2_ram_current += memory_region_size(mr);
+    nvslot->numa_id = spapr->gpu_numa_id;
+    ++spapr->gpu_numa_id;
+}
+
+static void spapr_pci_collect_nvnpu(SpaprPhbPciNvGpuConfig *nvgpus,
+                                    PCIDevice *pdev, uint64_t tgt,
+                                    MemoryRegion *mr, Error **errp)
+{
+    SpaprPhbPciNvGpuSlot *nvslot = spapr_nvgpu_get_slot(nvgpus, tgt);
+    int j;
+
+    if (!nvslot) {
+        error_setg(errp, "Found too many NVLink bridges per vPHB");
+        return;
+    }
+
+    j = nvslot->linknum;
+    if (j == ARRAY_SIZE(nvslot->links)) {
+        error_setg(errp, "Found too many NVLink bridges per GPU");
+        return;
+    }
+    ++nvslot->linknum;
+
+    g_assert(!nvslot->links[j].npdev);
+    nvslot->links[j].npdev = pdev;
+    nvslot->links[j].atsd_gpa = nvgpus->nv2_atsd_current;
+    nvgpus->nv2_atsd_current += memory_region_size(mr);
+    nvslot->links[j].link_speed =
+        object_property_get_uint(OBJECT(pdev), "nvlink2-link-speed", NULL);
+}
+
+static void spapr_phb_pci_collect_nvgpu(PCIBus *bus, PCIDevice *pdev,
+                                        void *opaque)
+{
+    PCIBus *sec_bus;
+    Object *po = OBJECT(pdev);
+    uint64_t tgt = object_property_get_uint(po, "nvlink2-tgt", NULL);
+
+    if (tgt) {
+        Error *local_err = NULL;
+        SpaprPhbPciNvGpuConfig *nvgpus = opaque;
+        Object *mr_gpu = object_property_get_link(po, "nvlink2-mr[0]", NULL);
+        Object *mr_npu = object_property_get_link(po, "nvlink2-atsd-mr[0]",
+                                                  NULL);
+
+        g_assert(mr_gpu || mr_npu);
+        if (mr_gpu) {
+            spapr_pci_collect_nvgpu(nvgpus, pdev, tgt, MEMORY_REGION(mr_gpu),
+                                    &local_err);
+        } else {
+            spapr_pci_collect_nvnpu(nvgpus, pdev, tgt, MEMORY_REGION(mr_npu),
+                                    &local_err);
+        }
+        error_propagate(&nvgpus->err, local_err);
+    }
+    if ((pci_default_read_config(pdev, PCI_HEADER_TYPE, 1) !=
+         PCI_HEADER_TYPE_BRIDGE)) {
+        return;
+    }
+
+    sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(pdev));
+    if (!sec_bus) {
+        return;
+    }
+
+    pci_for_each_device_under_bus(sec_bus, spapr_phb_pci_collect_nvgpu, opaque);
+}
+
+void spapr_phb_nvgpu_setup(SpaprPhbState *sphb, Error **errp)
+{
+    int i, j, valid_gpu_num;
+    PCIBus *bus;
+
+    /* Search for GPUs and NPUs */
+    if (!sphb->nv2_gpa_win_addr || !sphb->nv2_atsd_win_addr) {
+        return;
+    }
+
+    sphb->nvgpus = g_new0(SpaprPhbPciNvGpuConfig, 1);
+    sphb->nvgpus->nv2_ram_current = sphb->nv2_gpa_win_addr;
+    sphb->nvgpus->nv2_atsd_current = sphb->nv2_atsd_win_addr;
+
+    bus = PCI_HOST_BRIDGE(sphb)->bus;
+    pci_for_each_device_under_bus(bus, spapr_phb_pci_collect_nvgpu,
+                                  sphb->nvgpus);
+
+    if (sphb->nvgpus->err) {
+        error_propagate(errp, sphb->nvgpus->err);
+        sphb->nvgpus->err = NULL;
+        goto cleanup_exit;
+    }
+
+    /* Add found GPU RAM and ATSD MRs if found */
+    for (i = 0, valid_gpu_num = 0; i < sphb->nvgpus->num; ++i) {
+        Object *nvmrobj;
+        SpaprPhbPciNvGpuSlot *nvslot = &sphb->nvgpus->slots[i];
+
+        if (!nvslot->gpdev) {
+            continue;
+        }
+        nvmrobj = object_property_get_link(OBJECT(nvslot->gpdev),
+                                           "nvlink2-mr[0]", NULL);
+        /* ATSD is pointless without GPU RAM MR so skip those */
+        if (!nvmrobj) {
+            continue;
+        }
+
+        ++valid_gpu_num;
+        memory_region_add_subregion(get_system_memory(), nvslot->gpa,
+                                    MEMORY_REGION(nvmrobj));
+
+        for (j = 0; j < nvslot->linknum; ++j) {
+            Object *atsdmrobj;
+
+            atsdmrobj = object_property_get_link(OBJECT(nvslot->links[j].npdev),
+                                                 "nvlink2-atsd-mr[0]", NULL);
+            if (!atsdmrobj) {
+                continue;
+            }
+            memory_region_add_subregion(get_system_memory(),
+                                        nvslot->links[j].atsd_gpa,
+                                        MEMORY_REGION(atsdmrobj));
+        }
+    }
+
+    if (valid_gpu_num) {
+        return;
+    }
+    /* We did not find any interesting GPU */
+cleanup_exit:
+    g_free(sphb->nvgpus);
+    sphb->nvgpus = NULL;
+}
+
+void spapr_phb_nvgpu_free(SpaprPhbState *sphb)
+{
+    int i, j;
+
+    if (!sphb->nvgpus) {
+        return;
+    }
+
+    for (i = 0; i < sphb->nvgpus->num; ++i) {
+        SpaprPhbPciNvGpuSlot *nvslot = &sphb->nvgpus->slots[i];
+        Object *nv_mrobj = object_property_get_link(OBJECT(nvslot->gpdev),
+                                                    "nvlink2-mr[0]", NULL);
+
+        if (nv_mrobj) {
+            memory_region_del_subregion(get_system_memory(),
+                                        MEMORY_REGION(nv_mrobj));
+        }
+        for (j = 0; j < nvslot->linknum; ++j) {
+            PCIDevice *npdev = nvslot->links[j].npdev;
+            Object *atsd_mrobj;
+            atsd_mrobj = object_property_get_link(OBJECT(npdev),
+                                                  "nvlink2-atsd-mr[0]", NULL);
+            if (atsd_mrobj) {
+                memory_region_del_subregion(get_system_memory(),
+                                            MEMORY_REGION(atsd_mrobj));
+            }
+        }
+    }
+    g_free(sphb->nvgpus);
+    sphb->nvgpus = NULL;
+}
+
+void spapr_phb_nvgpu_populate_dt(SpaprPhbState *sphb, void *fdt, int bus_off,
+                                 Error **errp)
+{
+    int i, j, atsdnum = 0;
+    uint64_t atsd[8]; /* The existing limitation of known guests */
+
+    if (!sphb->nvgpus) {
+        return;
+    }
+
+    for (i = 0; (i < sphb->nvgpus->num) && (atsdnum < ARRAY_SIZE(atsd)); ++i) {
+        SpaprPhbPciNvGpuSlot *nvslot = &sphb->nvgpus->slots[i];
+
+        if (!nvslot->gpdev) {
+            continue;
+        }
+        for (j = 0; j < nvslot->linknum; ++j) {
+            if (!nvslot->links[j].atsd_gpa) {
+                continue;
+            }
+
+            if (atsdnum == ARRAY_SIZE(atsd)) {
+                error_report("Only %"PRIuPTR" ATSD registers supported",
+                             ARRAY_SIZE(atsd));
+                break;
+            }
+            atsd[atsdnum] = cpu_to_be64(nvslot->links[j].atsd_gpa);
+            ++atsdnum;
+        }
+    }
+
+    if (!atsdnum) {
+        error_setg(errp, "No ATSD registers found");
+        return;
+    }
+
+    if (!spapr_phb_eeh_available(sphb)) {
+        /*
+         * ibm,mmio-atsd contains ATSD registers; these belong to an NPU PHB
+         * which we do not emulate as a separate device. Instead we put
+         * ibm,mmio-atsd to the vPHB with GPU and make sure that we do not
+         * put GPUs from different IOMMU groups to the same vPHB to ensure
+         * that the guest will use ATSDs from the corresponding NPU.
+         */
+        error_setg(errp, "ATSD requires separate vPHB per GPU IOMMU group");
+        return;
+    }
+
+    _FDT((fdt_setprop(fdt, bus_off, "ibm,mmio-atsd", atsd,
+                      atsdnum * sizeof(atsd[0]))));
+}
+
+void spapr_phb_nvgpu_ram_populate_dt(SpaprPhbState *sphb, void *fdt)
+{
+    int i, j, linkidx, npuoff;
+    char *npuname;
+
+    if (!sphb->nvgpus) {
+        return;
+    }
+
+    npuname = g_strdup_printf("npuphb%d", sphb->index);
+    npuoff = fdt_add_subnode(fdt, 0, npuname);
+    _FDT(npuoff);
+    _FDT(fdt_setprop_cell(fdt, npuoff, "#address-cells", 1));
+    _FDT(fdt_setprop_cell(fdt, npuoff, "#size-cells", 0));
+    /* Advertise NPU as POWER9 so the guest can enable NPU2 contexts */
+    _FDT((fdt_setprop_string(fdt, npuoff, "compatible", "ibm,power9-npu")));
+    g_free(npuname);
+
+    for (i = 0, linkidx = 0; i < sphb->nvgpus->num; ++i) {
+        for (j = 0; j < sphb->nvgpus->slots[i].linknum; ++j) {
+            char *linkname = g_strdup_printf("link@%d", linkidx);
+            int off = fdt_add_subnode(fdt, npuoff, linkname);
+
+            _FDT(off);
+            /* _FDT((fdt_setprop_cell(fdt, off, "reg", linkidx))); */
+            _FDT((fdt_setprop_string(fdt, off, "compatible",
+                                     "ibm,npu-link")));
+            _FDT((fdt_setprop_cell(fdt, off, "phandle",
+                                   PHANDLE_NVLINK(sphb, i, j))));
+            _FDT((fdt_setprop_cell(fdt, off, "ibm,npu-link-index", linkidx)));
+            g_free(linkname);
+            ++linkidx;
+        }
+    }
+
+    /* Add memory nodes for GPU RAM and mark them unusable */
+    for (i = 0; i < sphb->nvgpus->num; ++i) {
+        SpaprPhbPciNvGpuSlot *nvslot = &sphb->nvgpus->slots[i];
+        Object *nv_mrobj = object_property_get_link(OBJECT(nvslot->gpdev),
+                                                    "nvlink2-mr[0]",
+                                                    &error_abort);
+        uint64_t size = object_property_get_uint(nv_mrobj, "size", NULL);
+        uint64_t mem_reg[2] = { cpu_to_be64(nvslot->gpa), cpu_to_be64(size) };
+        char *mem_name = g_strdup_printf("memory@%"PRIx64, nvslot->gpa);
+        int off = fdt_add_subnode(fdt, 0, mem_name);
+
+        _FDT(off);
+        _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
+        _FDT((fdt_setprop(fdt, off, "reg", mem_reg, sizeof(mem_reg))));
+
+        spapr_numa_write_associativity_dt(SPAPR_MACHINE(qdev_get_machine()),
+                                          fdt, off, nvslot->numa_id);
+
+        _FDT((fdt_setprop_string(fdt, off, "compatible",
+                                 "ibm,coherent-device-memory")));
+
+        mem_reg[1] = cpu_to_be64(0);
+        _FDT((fdt_setprop(fdt, off, "linux,usable-memory", mem_reg,
+                          sizeof(mem_reg))));
+        _FDT((fdt_setprop_cell(fdt, off, "phandle",
+                               PHANDLE_GPURAM(sphb, i))));
+        g_free(mem_name);
+    }
+
+}
+
+void spapr_phb_nvgpu_populate_pcidev_dt(PCIDevice *dev, void *fdt, int offset,
+                                        SpaprPhbState *sphb)
+{
+    int i, j;
+
+    if (!sphb->nvgpus) {
+        return;
+    }
+
+    for (i = 0; i < sphb->nvgpus->num; ++i) {
+        SpaprPhbPciNvGpuSlot *nvslot = &sphb->nvgpus->slots[i];
+
+        /* Skip "slot" without attached GPU */
+        if (!nvslot->gpdev) {
+            continue;
+        }
+        if (dev == nvslot->gpdev) {
+            uint32_t npus[nvslot->linknum];
+
+            for (j = 0; j < nvslot->linknum; ++j) {
+                PCIDevice *npdev = nvslot->links[j].npdev;
+
+                npus[j] = cpu_to_be32(PHANDLE_PCIDEV(sphb, npdev));
+            }
+            _FDT(fdt_setprop(fdt, offset, "ibm,npu", npus,
+                             j * sizeof(npus[0])));
+            _FDT((fdt_setprop_cell(fdt, offset, "phandle",
+                                   PHANDLE_PCIDEV(sphb, dev))));
+            continue;
+        }
+
+        for (j = 0; j < nvslot->linknum; ++j) {
+            if (dev != nvslot->links[j].npdev) {
+                continue;
+            }
+
+            _FDT((fdt_setprop_cell(fdt, offset, "phandle",
+                                   PHANDLE_PCIDEV(sphb, dev))));
+            _FDT(fdt_setprop_cell(fdt, offset, "ibm,gpu",
+                                  PHANDLE_PCIDEV(sphb, nvslot->gpdev)));
+            _FDT((fdt_setprop_cell(fdt, offset, "ibm,nvlink",
+                                   PHANDLE_NVLINK(sphb, i, j))));
+            /*
+             * If we ever want to emulate GPU RAM at the same location as on
+             * the host - here is the encoding GPA->TGT:
+             *
+             * gta  = ((sphb->nv2_gpa >> 42) & 0x1) << 42;
+             * gta |= ((sphb->nv2_gpa >> 45) & 0x3) << 43;
+             * gta |= ((sphb->nv2_gpa >> 49) & 0x3) << 45;
+             * gta |= sphb->nv2_gpa & ((1UL << 43) - 1);
+             */
+            _FDT(fdt_setprop_cell(fdt, offset, "memory-region",
+                                  PHANDLE_GPURAM(sphb, i)));
+            _FDT(fdt_setprop_u64(fdt, offset, "ibm,device-tgt-addr",
+                                 nvslot->tgt));
+            _FDT(fdt_setprop_cell(fdt, offset, "ibm,nvlink-speed",
+                                  nvslot->links[j].link_speed));
+        }
+    }
+}
diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c
new file mode 100644
index 000000000..2a76b4e0b
--- /dev/null
+++ b/hw/ppc/spapr_pci_vfio.c
@@ -0,0 +1,217 @@
+/*
+ * QEMU sPAPR PCI host for VFIO
+ *
+ * Copyright (c) 2011-2014 Alexey Kardashevskiy, IBM Corporation.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License,
+ *  or (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include <linux/vfio.h>
+#include "hw/ppc/spapr.h"
+#include "hw/pci-host/spapr.h"
+#include "hw/pci/msix.h"
+#include "hw/vfio/vfio.h"
+#include "qemu/error-report.h"
+
+bool spapr_phb_eeh_available(SpaprPhbState *sphb)
+{
+    return vfio_eeh_as_ok(&sphb->iommu_as);
+}
+
+static void spapr_phb_vfio_eeh_reenable(SpaprPhbState *sphb)
+{
+    vfio_eeh_as_op(&sphb->iommu_as, VFIO_EEH_PE_ENABLE);
+}
+
+void spapr_phb_vfio_reset(DeviceState *qdev)
+{
+    /*
+     * The PE might be in frozen state. To reenable the EEH
+     * functionality on it will clean the frozen state, which
+     * ensures that the contained PCI devices will work properly
+     * after reboot.
+     */
+    spapr_phb_vfio_eeh_reenable(SPAPR_PCI_HOST_BRIDGE(qdev));
+}
+
+static void spapr_eeh_pci_find_device(PCIBus *bus, PCIDevice *pdev,
+                                      void *opaque)
+{
+    bool *found = opaque;
+
+    if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) {
+        *found = true;
+    }
+}
+
+int spapr_phb_vfio_eeh_set_option(SpaprPhbState *sphb,
+                                  unsigned int addr, int option)
+{
+    uint32_t op;
+    int ret;
+
+    switch (option) {
+    case RTAS_EEH_DISABLE:
+        op = VFIO_EEH_PE_DISABLE;
+        break;
+    case RTAS_EEH_ENABLE: {
+        PCIHostState *phb;
+        bool found = false;
+
+        /*
+         * The EEH functionality is enabled per sphb level instead of
+         * per PCI device. We have already identified this specific sphb
+         * based on buid passed as argument to ibm,set-eeh-option rtas
+         * call. Now we just need to check the validity of the PCI
+         * pass-through devices (vfio-pci) under this sphb bus.
+         * We have already validated that all the devices under this sphb
+         * are from same iommu group (within same PE) before comming here.
+         *
+         * Prior to linux commit 98ba956f6a389 ("powerpc/pseries/eeh:
+         * Rework device EEH PE determination") kernel would call
+         * eeh-set-option for each device in the PE using the device's
+         * config_address as the argument rather than the PE address.
+         * Hence if we check validity of supplied config_addr whether
+         * it matches to this PHB will cause issues with older kernel
+         * versions v5.9 and older. If we return an error from
+         * eeh-set-option when the argument isn't a valid PE address
+         * then older kernels (v5.9 and older) will interpret that as
+         * EEH not being supported.
+         */
+        phb = PCI_HOST_BRIDGE(sphb);
+        pci_for_each_device(phb->bus, (addr >> 16) & 0xFF,
+                            spapr_eeh_pci_find_device, &found);
+
+        if (!found) {
+            return RTAS_OUT_PARAM_ERROR;
+        }
+
+        op = VFIO_EEH_PE_ENABLE;
+        break;
+    }
+    case RTAS_EEH_THAW_IO:
+        op = VFIO_EEH_PE_UNFREEZE_IO;
+        break;
+    case RTAS_EEH_THAW_DMA:
+        op = VFIO_EEH_PE_UNFREEZE_DMA;
+        break;
+    default:
+        return RTAS_OUT_PARAM_ERROR;
+    }
+
+    ret = vfio_eeh_as_op(&sphb->iommu_as, op);
+    if (ret < 0) {
+        return RTAS_OUT_HW_ERROR;
+    }
+
+    return RTAS_OUT_SUCCESS;
+}
+
+int spapr_phb_vfio_eeh_get_state(SpaprPhbState *sphb, int *state)
+{
+    int ret;
+
+    ret = vfio_eeh_as_op(&sphb->iommu_as, VFIO_EEH_PE_GET_STATE);
+    if (ret < 0) {
+        return RTAS_OUT_PARAM_ERROR;
+    }
+
+    *state = ret;
+    return RTAS_OUT_SUCCESS;
+}
+
+static void spapr_phb_vfio_eeh_clear_dev_msix(PCIBus *bus,
+                                              PCIDevice *pdev,
+                                              void *opaque)
+{
+    /* Check if the device is VFIO PCI device */
+    if (!object_dynamic_cast(OBJECT(pdev), "vfio-pci")) {
+        return;
+    }
+
+    /*
+     * The MSIx table will be cleaned out by reset. We need
+     * disable it so that it can be reenabled properly. Also,
+     * the cached MSIx table should be cleared as it's not
+     * reflecting the contents in hardware.
+     */
+    if (msix_enabled(pdev)) {
+        uint16_t flags;
+
+        flags = pci_host_config_read_common(pdev,
+                                            pdev->msix_cap + PCI_MSIX_FLAGS,
+                                            pci_config_size(pdev), 2);
+        flags &= ~PCI_MSIX_FLAGS_ENABLE;
+        pci_host_config_write_common(pdev,
+                                     pdev->msix_cap + PCI_MSIX_FLAGS,
+                                     pci_config_size(pdev), flags, 2);
+    }
+
+    msix_reset(pdev);
+}
+
+static void spapr_phb_vfio_eeh_clear_bus_msix(PCIBus *bus, void *opaque)
+{
+       pci_for_each_device_under_bus(bus, spapr_phb_vfio_eeh_clear_dev_msix,
+                                     NULL);
+}
+
+static void spapr_phb_vfio_eeh_pre_reset(SpaprPhbState *sphb)
+{
+       PCIHostState *phb = PCI_HOST_BRIDGE(sphb);
+
+       pci_for_each_bus(phb->bus, spapr_phb_vfio_eeh_clear_bus_msix, NULL);
+}
+
+int spapr_phb_vfio_eeh_reset(SpaprPhbState *sphb, int option)
+{
+    uint32_t op;
+    int ret;
+
+    switch (option) {
+    case RTAS_SLOT_RESET_DEACTIVATE:
+        op = VFIO_EEH_PE_RESET_DEACTIVATE;
+        break;
+    case RTAS_SLOT_RESET_HOT:
+        spapr_phb_vfio_eeh_pre_reset(sphb);
+        op = VFIO_EEH_PE_RESET_HOT;
+        break;
+    case RTAS_SLOT_RESET_FUNDAMENTAL:
+        spapr_phb_vfio_eeh_pre_reset(sphb);
+        op = VFIO_EEH_PE_RESET_FUNDAMENTAL;
+        break;
+    default:
+        return RTAS_OUT_PARAM_ERROR;
+    }
+
+    ret = vfio_eeh_as_op(&sphb->iommu_as, op);
+    if (ret < 0) {
+        return RTAS_OUT_HW_ERROR;
+    }
+
+    return RTAS_OUT_SUCCESS;
+}
+
+int spapr_phb_vfio_eeh_configure(SpaprPhbState *sphb)
+{
+    int ret;
+
+    ret = vfio_eeh_as_op(&sphb->iommu_as, VFIO_EEH_PE_CONFIGURE);
+    if (ret < 0) {
+        return RTAS_OUT_PARAM_ERROR;
+    }
+
+    return RTAS_OUT_SUCCESS;
+}
diff --git a/hw/ppc/spapr_rng.c b/hw/ppc/spapr_rng.c
new file mode 100644
index 000000000..df5c4b968
--- /dev/null
+++ b/hw/ppc/spapr_rng.c
@@ -0,0 +1,162 @@
+/*
+ * QEMU sPAPR random number generator "device" for H_RANDOM hypercall
+ *
+ * Copyright 2015 Thomas Huth, Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License,
+ * or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "qemu/main-loop.h"
+#include "qemu/module.h"
+#include "sysemu/device_tree.h"
+#include "sysemu/rng.h"
+#include "hw/ppc/spapr.h"
+#include "hw/qdev-properties.h"
+#include "kvm_ppc.h"
+#include "qom/object.h"
+
+OBJECT_DECLARE_SIMPLE_TYPE(SpaprRngState, SPAPR_RNG)
+
+struct SpaprRngState {
+    /*< private >*/
+    DeviceState ds;
+    RngBackend *backend;
+    bool use_kvm;
+};
+
+struct HRandomData {
+    QemuSemaphore sem;
+    union {
+        uint64_t v64;
+        uint8_t v8[8];
+    } val;
+    int received;
+};
+typedef struct HRandomData HRandomData;
+
+/* Callback function for the RngBackend */
+static void random_recv(void *dest, const void *src, size_t size)
+{
+    HRandomData *hrdp = dest;
+
+    if (src && size > 0) {
+        assert(size + hrdp->received <= sizeof(hrdp->val.v8));
+        memcpy(&hrdp->val.v8[hrdp->received], src, size);
+        hrdp->received += size;
+    }
+
+    qemu_sem_post(&hrdp->sem);
+}
+
+/* Handler for the H_RANDOM hypercall */
+static target_ulong h_random(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                             target_ulong opcode, target_ulong *args)
+{
+    SpaprRngState *rngstate;
+    HRandomData hrdata;
+
+    rngstate = SPAPR_RNG(object_resolve_path_type("", TYPE_SPAPR_RNG, NULL));
+
+    if (!rngstate || !rngstate->backend) {
+        return H_HARDWARE;
+    }
+
+    qemu_sem_init(&hrdata.sem, 0);
+    hrdata.val.v64 = 0;
+    hrdata.received = 0;
+
+    while (hrdata.received < 8) {
+        rng_backend_request_entropy(rngstate->backend, 8 - hrdata.received,
+                                    random_recv, &hrdata);
+        qemu_mutex_unlock_iothread();
+        qemu_sem_wait(&hrdata.sem);
+        qemu_mutex_lock_iothread();
+    }
+
+    qemu_sem_destroy(&hrdata.sem);
+    args[0] = hrdata.val.v64;
+
+    return H_SUCCESS;
+}
+
+static void spapr_rng_instance_init(Object *obj)
+{
+    if (object_resolve_path_type("", TYPE_SPAPR_RNG, NULL) != NULL) {
+        error_report("spapr-rng can not be instantiated twice!");
+        return;
+    }
+
+    object_property_set_description(obj, "rng",
+                                    "ID of the random number generator backend");
+}
+
+static void spapr_rng_realize(DeviceState *dev, Error **errp)
+{
+
+    SpaprRngState *rngstate = SPAPR_RNG(dev);
+
+    if (rngstate->use_kvm) {
+        if (kvmppc_enable_hwrng() == 0) {
+            return;
+        }
+        /*
+         * If user specified both, use-kvm and a backend, we fall back to
+         * the backend now. If not, provide an appropriate error message.
+         */
+        if (!rngstate->backend) {
+            error_setg(errp, "Could not initialize in-kernel H_RANDOM call!");
+            return;
+        }
+    }
+
+    if (rngstate->backend) {
+        spapr_register_hypercall(H_RANDOM, h_random);
+    } else {
+        error_setg(errp, "spapr-rng needs an RNG backend!");
+    }
+}
+
+static Property spapr_rng_properties[] = {
+    DEFINE_PROP_BOOL("use-kvm", SpaprRngState, use_kvm, false),
+    DEFINE_PROP_LINK("rng", SpaprRngState, backend, TYPE_RNG_BACKEND,
+                     RngBackend *),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void spapr_rng_class_init(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+
+    dc->realize = spapr_rng_realize;
+    set_bit(DEVICE_CATEGORY_MISC, dc->categories);
+    device_class_set_props(dc, spapr_rng_properties);
+    dc->hotpluggable = false;
+}
+
+static const TypeInfo spapr_rng_info = {
+    .name          = TYPE_SPAPR_RNG,
+    .parent        = TYPE_DEVICE,
+    .instance_size = sizeof(SpaprRngState),
+    .instance_init = spapr_rng_instance_init,
+    .class_init    = spapr_rng_class_init,
+};
+
+static void spapr_rng_register_type(void)
+{
+    type_register_static(&spapr_rng_info);
+}
+type_init(spapr_rng_register_type)
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
new file mode 100644
index 000000000..b476382ae
--- /dev/null
+++ b/hw/ppc/spapr_rtas.c
@@ -0,0 +1,636 @@
+/*
+ * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator
+ *
+ * Hypercall based emulated RTAS
+ *
+ * Copyright (c) 2010-2011 David Gibson, IBM Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qemu/error-report.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/device_tree.h"
+#include "sysemu/cpus.h"
+#include "sysemu/hw_accel.h"
+#include "sysemu/runstate.h"
+#include "kvm_ppc.h"
+
+#include "hw/ppc/spapr.h"
+#include "hw/ppc/spapr_vio.h"
+#include "hw/ppc/spapr_rtas.h"
+#include "hw/ppc/spapr_cpu_core.h"
+#include "hw/ppc/ppc.h"
+
+#include <libfdt.h>
+#include "hw/ppc/spapr_drc.h"
+#include "qemu/cutils.h"
+#include "trace.h"
+#include "hw/ppc/fdt.h"
+#include "target/ppc/mmu-hash64.h"
+#include "target/ppc/mmu-book3s-v3.h"
+#include "migration/blocker.h"
+#include "helper_regs.h"
+
+static void rtas_display_character(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                                   uint32_t token, uint32_t nargs,
+                                   target_ulong args,
+                                   uint32_t nret, target_ulong rets)
+{
+    uint8_t c = rtas_ld(args, 0);
+    SpaprVioDevice *sdev = vty_lookup(spapr, 0);
+
+    if (!sdev) {
+        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
+    } else {
+        vty_putchars(sdev, &c, sizeof(c));
+        rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+    }
+}
+
+static void rtas_power_off(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                           uint32_t token, uint32_t nargs, target_ulong args,
+                           uint32_t nret, target_ulong rets)
+{
+    if (nargs != 2 || nret != 1) {
+        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+        return;
+    }
+    qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
+    cpu_stop_current();
+    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+}
+
+static void rtas_system_reboot(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                               uint32_t token, uint32_t nargs,
+                               target_ulong args,
+                               uint32_t nret, target_ulong rets)
+{
+    if (nargs != 0 || nret != 1) {
+        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+        return;
+    }
+    qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
+    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+}
+
+static void rtas_query_cpu_stopped_state(PowerPCCPU *cpu_,
+                                         SpaprMachineState *spapr,
+                                         uint32_t token, uint32_t nargs,
+                                         target_ulong args,
+                                         uint32_t nret, target_ulong rets)
+{
+    target_ulong id;
+    PowerPCCPU *cpu;
+
+    if (nargs != 1 || nret != 2) {
+        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+        return;
+    }
+
+    id = rtas_ld(args, 0);
+    cpu = spapr_find_cpu(id);
+    if (cpu != NULL) {
+        if (CPU(cpu)->halted) {
+            rtas_st(rets, 1, 0);
+        } else {
+            rtas_st(rets, 1, 2);
+        }
+
+        rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+        return;
+    }
+
+    /* Didn't find a matching cpu */
+    rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+}
+
+static void rtas_start_cpu(PowerPCCPU *callcpu, SpaprMachineState *spapr,
+                           uint32_t token, uint32_t nargs,
+                           target_ulong args,
+                           uint32_t nret, target_ulong rets)
+{
+    target_ulong id, start, r3;
+    PowerPCCPU *newcpu;
+    CPUPPCState *env;
+    target_ulong lpcr;
+    target_ulong caller_lpcr;
+
+    if (nargs != 3 || nret != 1) {
+        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+        return;
+    }
+
+    id = rtas_ld(args, 0);
+    start = rtas_ld(args, 1);
+    r3 = rtas_ld(args, 2);
+
+    newcpu = spapr_find_cpu(id);
+    if (!newcpu) {
+        /* Didn't find a matching cpu */
+        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+        return;
+    }
+
+    env = &newcpu->env;
+
+    if (!CPU(newcpu)->halted) {
+        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
+        return;
+    }
+
+    cpu_synchronize_state(CPU(newcpu));
+
+    env->msr = (1ULL << MSR_SF) | (1ULL << MSR_ME);
+    hreg_compute_hflags(env);
+
+    caller_lpcr = callcpu->env.spr[SPR_LPCR];
+    lpcr = env->spr[SPR_LPCR];
+
+    /* Set ILE the same way */
+    lpcr = (lpcr & ~LPCR_ILE) | (caller_lpcr & LPCR_ILE);
+
+    /* Set AIL the same way */
+    lpcr = (lpcr & ~LPCR_AIL) | (caller_lpcr & LPCR_AIL);
+
+    if (env->mmu_model == POWERPC_MMU_3_00) {
+        /*
+         * New cpus are expected to start in the same radix/hash mode
+         * as the existing CPUs
+         */
+        if (ppc64_v3_radix(callcpu)) {
+            lpcr |= LPCR_UPRT | LPCR_GTSE | LPCR_HR;
+        } else {
+            lpcr &= ~(LPCR_UPRT | LPCR_GTSE | LPCR_HR);
+        }
+        env->spr[SPR_PSSCR] &= ~PSSCR_EC;
+    }
+    ppc_store_lpcr(newcpu, lpcr);
+
+    /*
+     * Set the timebase offset of the new CPU to that of the invoking
+     * CPU.  This helps hotplugged CPU to have the correct timebase
+     * offset.
+     */
+    newcpu->env.tb_env->tb_offset = callcpu->env.tb_env->tb_offset;
+
+    spapr_cpu_set_entry_state(newcpu, start, 0, r3, 0);
+
+    qemu_cpu_kick(CPU(newcpu));
+
+    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+}
+
+static void rtas_stop_self(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                           uint32_t token, uint32_t nargs,
+                           target_ulong args,
+                           uint32_t nret, target_ulong rets)
+{
+    CPUState *cs = CPU(cpu);
+    CPUPPCState *env = &cpu->env;
+    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
+
+    /* Disable Power-saving mode Exit Cause exceptions for the CPU.
+     * This could deliver an interrupt on a dying CPU and crash the
+     * guest.
+     * For the same reason, set PSSCR_EC.
+     */
+    ppc_store_lpcr(cpu, env->spr[SPR_LPCR] & ~pcc->lpcr_pm);
+    env->spr[SPR_PSSCR] |= PSSCR_EC;
+    cs->halted = 1;
+    kvmppc_set_reg_ppc_online(cpu, 0);
+    qemu_cpu_kick(cs);
+}
+
+static void rtas_ibm_suspend_me(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                                uint32_t token, uint32_t nargs,
+                                target_ulong args,
+                                uint32_t nret, target_ulong rets)
+{
+    CPUState *cs;
+
+    if (nargs != 0 || nret != 1) {
+        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+        return;
+    }
+
+    CPU_FOREACH(cs) {
+        PowerPCCPU *c = POWERPC_CPU(cs);
+        CPUPPCState *e = &c->env;
+        if (c == cpu) {
+            continue;
+        }
+
+        /* See h_join */
+        if (!cs->halted || (e->msr & (1ULL << MSR_EE))) {
+            rtas_st(rets, 0, H_MULTI_THREADS_ACTIVE);
+            return;
+        }
+    }
+
+    qemu_system_suspend_request();
+    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+}
+
+static inline int sysparm_st(target_ulong addr, target_ulong len,
+                             const void *val, uint16_t vallen)
+{
+    hwaddr phys = ppc64_phys_to_real(addr);
+
+    if (len < 2) {
+        return RTAS_OUT_SYSPARM_PARAM_ERROR;
+    }
+    stw_be_phys(&address_space_memory, phys, vallen);
+    cpu_physical_memory_write(phys + 2, val, MIN(len - 2, vallen));
+    return RTAS_OUT_SUCCESS;
+}
+
+static void rtas_ibm_get_system_parameter(PowerPCCPU *cpu,
+                                          SpaprMachineState *spapr,
+                                          uint32_t token, uint32_t nargs,
+                                          target_ulong args,
+                                          uint32_t nret, target_ulong rets)
+{
+    PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
+    MachineState *ms = MACHINE(spapr);
+    target_ulong parameter = rtas_ld(args, 0);
+    target_ulong buffer = rtas_ld(args, 1);
+    target_ulong length = rtas_ld(args, 2);
+    target_ulong ret;
+
+    switch (parameter) {
+    case RTAS_SYSPARM_SPLPAR_CHARACTERISTICS: {
+        char *param_val = g_strdup_printf("MaxEntCap=%d,"
+                                          "DesMem=%" PRIu64 ","
+                                          "DesProcs=%d,"
+                                          "MaxPlatProcs=%d",
+                                          ms->smp.max_cpus,
+                                          ms->ram_size / MiB,
+                                          ms->smp.cpus,
+                                          ms->smp.max_cpus);
+        if (pcc->n_host_threads > 0) {
+            char *hostthr_val, *old = param_val;
+
+            /*
+             * Add HostThrs property. This property is not present in PAPR but
+             * is expected by some guests to communicate the number of physical
+             * host threads per core on the system so that they can scale
+             * information which varies based on the thread configuration.
+             */
+            hostthr_val = g_strdup_printf(",HostThrs=%d", pcc->n_host_threads);
+            param_val = g_strconcat(param_val, hostthr_val, NULL);
+            g_free(hostthr_val);
+            g_free(old);
+        }
+        ret = sysparm_st(buffer, length, param_val, strlen(param_val) + 1);
+        g_free(param_val);
+        break;
+    }
+    case RTAS_SYSPARM_DIAGNOSTICS_RUN_MODE: {
+        uint8_t param_val = DIAGNOSTICS_RUN_MODE_DISABLED;
+
+        ret = sysparm_st(buffer, length, &param_val, sizeof(param_val));
+        break;
+    }
+    case RTAS_SYSPARM_UUID:
+        ret = sysparm_st(buffer, length, (unsigned char *)&qemu_uuid,
+                         (qemu_uuid_set ? 16 : 0));
+        break;
+    default:
+        ret = RTAS_OUT_NOT_SUPPORTED;
+    }
+
+    rtas_st(rets, 0, ret);
+}
+
+static void rtas_ibm_set_system_parameter(PowerPCCPU *cpu,
+                                          SpaprMachineState *spapr,
+                                          uint32_t token, uint32_t nargs,
+                                          target_ulong args,
+                                          uint32_t nret, target_ulong rets)
+{
+    target_ulong parameter = rtas_ld(args, 0);
+    target_ulong ret = RTAS_OUT_NOT_SUPPORTED;
+
+    switch (parameter) {
+    case RTAS_SYSPARM_SPLPAR_CHARACTERISTICS:
+    case RTAS_SYSPARM_DIAGNOSTICS_RUN_MODE:
+    case RTAS_SYSPARM_UUID:
+        ret = RTAS_OUT_NOT_AUTHORIZED;
+        break;
+    }
+
+    rtas_st(rets, 0, ret);
+}
+
+static void rtas_ibm_os_term(PowerPCCPU *cpu,
+                            SpaprMachineState *spapr,
+                            uint32_t token, uint32_t nargs,
+                            target_ulong args,
+                            uint32_t nret, target_ulong rets)
+{
+    target_ulong msgaddr = rtas_ld(args, 0);
+    char msg[512];
+
+    cpu_physical_memory_read(msgaddr, msg, sizeof(msg) - 1);
+    msg[sizeof(msg) - 1] = 0;
+
+    error_report("OS terminated: %s", msg);
+    qemu_system_guest_panicked(NULL);
+
+    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+}
+
+static void rtas_set_power_level(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                                 uint32_t token, uint32_t nargs,
+                                 target_ulong args, uint32_t nret,
+                                 target_ulong rets)
+{
+    int32_t power_domain;
+
+    if (nargs != 2 || nret != 2) {
+        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+        return;
+    }
+
+    /* we currently only use a single, "live insert" powerdomain for
+     * hotplugged/dlpar'd resources, so the power is always live/full (100)
+     */
+    power_domain = rtas_ld(args, 0);
+    if (power_domain != -1) {
+        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
+        return;
+    }
+
+    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+    rtas_st(rets, 1, 100);
+}
+
+static void rtas_get_power_level(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                                  uint32_t token, uint32_t nargs,
+                                  target_ulong args, uint32_t nret,
+                                  target_ulong rets)
+{
+    int32_t power_domain;
+
+    if (nargs != 1 || nret != 2) {
+        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+        return;
+    }
+
+    /* we currently only use a single, "live insert" powerdomain for
+     * hotplugged/dlpar'd resources, so the power is always live/full (100)
+     */
+    power_domain = rtas_ld(args, 0);
+    if (power_domain != -1) {
+        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
+        return;
+    }
+
+    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+    rtas_st(rets, 1, 100);
+}
+
+static void rtas_ibm_nmi_register(PowerPCCPU *cpu,
+                                  SpaprMachineState *spapr,
+                                  uint32_t token, uint32_t nargs,
+                                  target_ulong args,
+                                  uint32_t nret, target_ulong rets)
+{
+    hwaddr rtas_addr;
+    target_ulong sreset_addr, mce_addr;
+
+    if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI) == SPAPR_CAP_OFF) {
+        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
+        return;
+    }
+
+    rtas_addr = spapr_get_rtas_addr();
+    if (!rtas_addr) {
+        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
+        return;
+    }
+
+    sreset_addr = rtas_ld(args, 0);
+    mce_addr = rtas_ld(args, 1);
+
+    /* PAPR requires these are in the first 32M of memory and within RMA */
+    if (sreset_addr >= 32 * MiB || sreset_addr >= spapr->rma_size ||
+           mce_addr >= 32 * MiB ||    mce_addr >= spapr->rma_size) {
+        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+        return;
+    }
+
+    if (kvm_enabled()) {
+        if (kvmppc_set_fwnmi(cpu) < 0) {
+            rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
+            return;
+        }
+    }
+
+    spapr->fwnmi_system_reset_addr = sreset_addr;
+    spapr->fwnmi_machine_check_addr = mce_addr;
+
+    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+}
+
+static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
+                                   SpaprMachineState *spapr,
+                                   uint32_t token, uint32_t nargs,
+                                   target_ulong args,
+                                   uint32_t nret, target_ulong rets)
+{
+    if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI) == SPAPR_CAP_OFF) {
+        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
+        return;
+    }
+
+    if (spapr->fwnmi_machine_check_addr == -1) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+"FWNMI: ibm,nmi-interlock RTAS called with FWNMI not registered.\n");
+
+        /* NMI register not called */
+        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+        return;
+    }
+
+    if (spapr->fwnmi_machine_check_interlock != cpu->vcpu_id) {
+        /*
+	 * The vCPU that hit the NMI should invoke "ibm,nmi-interlock"
+         * This should be PARAM_ERROR, but Linux calls "ibm,nmi-interlock"
+	 * for system reset interrupts, despite them not being interlocked.
+	 * PowerVM silently ignores this and returns success here. Returning
+	 * failure causes Linux to print the error "FWNMI: nmi-interlock
+	 * failed: -3", although no other apparent ill effects, this is a
+	 * regression for the user when enabling FWNMI. So for now, match
+	 * PowerVM. When most Linux clients are fixed, this could be
+	 * changed.
+	 */
+        rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+        return;
+    }
+
+    /*
+     * vCPU issuing "ibm,nmi-interlock" is done with NMI handling,
+     * hence unset fwnmi_machine_check_interlock.
+     */
+    spapr->fwnmi_machine_check_interlock = -1;
+    qemu_cond_signal(&spapr->fwnmi_machine_check_interlock_cond);
+    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+    migrate_del_blocker(spapr->fwnmi_migration_blocker);
+}
+
+static struct rtas_call {
+    const char *name;
+    spapr_rtas_fn fn;
+} rtas_table[RTAS_TOKEN_MAX - RTAS_TOKEN_BASE];
+
+target_ulong spapr_rtas_call(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                             uint32_t token, uint32_t nargs, target_ulong args,
+                             uint32_t nret, target_ulong rets)
+{
+    if ((token >= RTAS_TOKEN_BASE) && (token < RTAS_TOKEN_MAX)) {
+        struct rtas_call *call = rtas_table + (token - RTAS_TOKEN_BASE);
+
+        if (call->fn) {
+            call->fn(cpu, spapr, token, nargs, args, nret, rets);
+            return H_SUCCESS;
+        }
+    }
+
+    /* HACK: Some Linux early debug code uses RTAS display-character,
+     * but assumes the token value is 0xa (which it is on some real
+     * machines) without looking it up in the device tree.  This
+     * special case makes this work */
+    if (token == 0xa) {
+        rtas_display_character(cpu, spapr, 0xa, nargs, args, nret, rets);
+        return H_SUCCESS;
+    }
+
+    hcall_dprintf("Unknown RTAS token 0x%x\n", token);
+    rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+    return H_PARAMETER;
+}
+
+uint64_t qtest_rtas_call(char *cmd, uint32_t nargs, uint64_t args,
+                         uint32_t nret, uint64_t rets)
+{
+    int token;
+
+    for (token = 0; token < RTAS_TOKEN_MAX - RTAS_TOKEN_BASE; token++) {
+        if (strcmp(cmd, rtas_table[token].name) == 0) {
+            SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+            PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
+
+            rtas_table[token].fn(cpu, spapr, token + RTAS_TOKEN_BASE,
+                                 nargs, args, nret, rets);
+            return H_SUCCESS;
+        }
+    }
+    return H_PARAMETER;
+}
+
+void spapr_rtas_register(int token, const char *name, spapr_rtas_fn fn)
+{
+    assert((token >= RTAS_TOKEN_BASE) && (token < RTAS_TOKEN_MAX));
+
+    token -= RTAS_TOKEN_BASE;
+
+    assert(!name || !rtas_table[token].name);
+
+    rtas_table[token].name = name;
+    rtas_table[token].fn = fn;
+}
+
+void spapr_dt_rtas_tokens(void *fdt, int rtas)
+{
+    int i;
+
+    for (i = 0; i < RTAS_TOKEN_MAX - RTAS_TOKEN_BASE; i++) {
+        struct rtas_call *call = &rtas_table[i];
+
+        if (!call->name) {
+            continue;
+        }
+
+        _FDT(fdt_setprop_cell(fdt, rtas, call->name, i + RTAS_TOKEN_BASE));
+    }
+}
+
+hwaddr spapr_get_rtas_addr(void)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+    int rtas_node;
+    const fdt32_t *rtas_data;
+    void *fdt = spapr->fdt_blob;
+
+    /* fetch rtas addr from fdt */
+    rtas_node = fdt_path_offset(fdt, "/rtas");
+    if (rtas_node < 0) {
+        return 0;
+    }
+
+    rtas_data = fdt_getprop(fdt, rtas_node, "linux,rtas-base", NULL);
+    if (!rtas_data) {
+        return 0;
+    }
+
+    /*
+     * We assume that the OS called RTAS instantiate-rtas, but some other
+     * OS might call RTAS instantiate-rtas-64 instead. This fine as of now
+     * as SLOF only supports 32-bit variant.
+     */
+    return (hwaddr)fdt32_to_cpu(*rtas_data);
+}
+
+static void core_rtas_register_types(void)
+{
+    spapr_rtas_register(RTAS_DISPLAY_CHARACTER, "display-character",
+                        rtas_display_character);
+    spapr_rtas_register(RTAS_POWER_OFF, "power-off", rtas_power_off);
+    spapr_rtas_register(RTAS_SYSTEM_REBOOT, "system-reboot",
+                        rtas_system_reboot);
+    spapr_rtas_register(RTAS_QUERY_CPU_STOPPED_STATE, "query-cpu-stopped-state",
+                        rtas_query_cpu_stopped_state);
+    spapr_rtas_register(RTAS_START_CPU, "start-cpu", rtas_start_cpu);
+    spapr_rtas_register(RTAS_STOP_SELF, "stop-self", rtas_stop_self);
+    spapr_rtas_register(RTAS_IBM_SUSPEND_ME, "ibm,suspend-me",
+                        rtas_ibm_suspend_me);
+    spapr_rtas_register(RTAS_IBM_GET_SYSTEM_PARAMETER,
+                        "ibm,get-system-parameter",
+                        rtas_ibm_get_system_parameter);
+    spapr_rtas_register(RTAS_IBM_SET_SYSTEM_PARAMETER,
+                        "ibm,set-system-parameter",
+                        rtas_ibm_set_system_parameter);
+    spapr_rtas_register(RTAS_IBM_OS_TERM, "ibm,os-term",
+                        rtas_ibm_os_term);
+    spapr_rtas_register(RTAS_SET_POWER_LEVEL, "set-power-level",
+                        rtas_set_power_level);
+    spapr_rtas_register(RTAS_GET_POWER_LEVEL, "get-power-level",
+                        rtas_get_power_level);
+    spapr_rtas_register(RTAS_IBM_NMI_REGISTER, "ibm,nmi-register",
+                        rtas_ibm_nmi_register);
+    spapr_rtas_register(RTAS_IBM_NMI_INTERLOCK, "ibm,nmi-interlock",
+                        rtas_ibm_nmi_interlock);
+}
+
+type_init(core_rtas_register_types)
diff --git a/hw/ppc/spapr_rtas_ddw.c b/hw/ppc/spapr_rtas_ddw.c
new file mode 100644
index 000000000..3e826e130
--- /dev/null
+++ b/hw/ppc/spapr_rtas_ddw.c
@@ -0,0 +1,291 @@
+/*
+ * QEMU sPAPR Dynamic DMA windows support
+ *
+ * Copyright (c) 2015 Alexey Kardashevskiy, IBM Corporation.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License,
+ *  or (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "qemu/module.h"
+#include "hw/ppc/spapr.h"
+#include "hw/pci-host/spapr.h"
+#include "trace.h"
+
+static int spapr_phb_get_active_win_num_cb(Object *child, void *opaque)
+{
+    SpaprTceTable *tcet;
+
+    tcet = (SpaprTceTable *) object_dynamic_cast(child, TYPE_SPAPR_TCE_TABLE);
+    if (tcet && tcet->nb_table) {
+        ++*(unsigned *)opaque;
+    }
+    return 0;
+}
+
+static unsigned spapr_phb_get_active_win_num(SpaprPhbState *sphb)
+{
+    unsigned ret = 0;
+
+    object_child_foreach(OBJECT(sphb), spapr_phb_get_active_win_num_cb, &ret);
+
+    return ret;
+}
+
+static int spapr_phb_get_free_liobn_cb(Object *child, void *opaque)
+{
+    SpaprTceTable *tcet;
+
+    tcet = (SpaprTceTable *) object_dynamic_cast(child, TYPE_SPAPR_TCE_TABLE);
+    if (tcet && !tcet->nb_table) {
+        *(uint32_t *)opaque = tcet->liobn;
+        return 1;
+    }
+    return 0;
+}
+
+static unsigned spapr_phb_get_free_liobn(SpaprPhbState *sphb)
+{
+    uint32_t liobn = 0;
+
+    object_child_foreach(OBJECT(sphb), spapr_phb_get_free_liobn_cb, &liobn);
+
+    return liobn;
+}
+
+static uint32_t spapr_page_mask_to_query_mask(uint64_t page_mask)
+{
+    int i;
+    uint32_t mask = 0;
+    const struct { int shift; uint32_t mask; } masks[] = {
+        { 12, RTAS_DDW_PGSIZE_4K },
+        { 16, RTAS_DDW_PGSIZE_64K },
+        { 24, RTAS_DDW_PGSIZE_16M },
+        { 25, RTAS_DDW_PGSIZE_32M },
+        { 26, RTAS_DDW_PGSIZE_64M },
+        { 27, RTAS_DDW_PGSIZE_128M },
+        { 28, RTAS_DDW_PGSIZE_256M },
+        { 34, RTAS_DDW_PGSIZE_16G },
+    };
+
+    for (i = 0; i < ARRAY_SIZE(masks); ++i) {
+        if (page_mask & (1ULL << masks[i].shift)) {
+            mask |= masks[i].mask;
+        }
+    }
+
+    return mask;
+}
+
+static void rtas_ibm_query_pe_dma_window(PowerPCCPU *cpu,
+                                         SpaprMachineState *spapr,
+                                         uint32_t token, uint32_t nargs,
+                                         target_ulong args,
+                                         uint32_t nret, target_ulong rets)
+{
+    SpaprPhbState *sphb;
+    uint64_t buid;
+    uint32_t avail, addr, pgmask = 0;
+
+    if ((nargs != 3) || (nret != 5)) {
+        goto param_error_exit;
+    }
+
+    buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
+    addr = rtas_ld(args, 0);
+    sphb = spapr_pci_find_phb(spapr, buid);
+    if (!sphb || !sphb->ddw_enabled) {
+        goto param_error_exit;
+    }
+
+    /* Translate page mask to LoPAPR format */
+    pgmask = spapr_page_mask_to_query_mask(sphb->page_size_mask);
+
+    avail = SPAPR_PCI_DMA_MAX_WINDOWS - spapr_phb_get_active_win_num(sphb);
+
+    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+    rtas_st(rets, 1, avail);
+    rtas_st(rets, 2, 0x80000000); /* The largest window we can possibly have */
+    rtas_st(rets, 3, pgmask);
+    rtas_st(rets, 4, 0); /* DMA migration mask, not supported */
+
+    trace_spapr_iommu_ddw_query(buid, addr, avail, 0x80000000, pgmask);
+    return;
+
+param_error_exit:
+    rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+}
+
+static void rtas_ibm_create_pe_dma_window(PowerPCCPU *cpu,
+                                          SpaprMachineState *spapr,
+                                          uint32_t token, uint32_t nargs,
+                                          target_ulong args,
+                                          uint32_t nret, target_ulong rets)
+{
+    SpaprPhbState *sphb;
+    SpaprTceTable *tcet = NULL;
+    uint32_t addr, page_shift, window_shift, liobn;
+    uint64_t buid, win_addr;
+    int windows;
+
+    if ((nargs != 5) || (nret != 4)) {
+        goto param_error_exit;
+    }
+
+    buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
+    addr = rtas_ld(args, 0);
+    sphb = spapr_pci_find_phb(spapr, buid);
+    if (!sphb || !sphb->ddw_enabled) {
+        goto param_error_exit;
+    }
+
+    page_shift = rtas_ld(args, 3);
+    window_shift = rtas_ld(args, 4);
+    liobn = spapr_phb_get_free_liobn(sphb);
+    windows = spapr_phb_get_active_win_num(sphb);
+
+    if (!(sphb->page_size_mask & (1ULL << page_shift)) ||
+        (window_shift < page_shift)) {
+        goto param_error_exit;
+    }
+
+    if (!liobn || !sphb->ddw_enabled || windows == SPAPR_PCI_DMA_MAX_WINDOWS) {
+        goto hw_error_exit;
+    }
+
+    tcet = spapr_tce_find_by_liobn(liobn);
+    if (!tcet) {
+        goto hw_error_exit;
+    }
+
+    win_addr = (windows == 0) ? sphb->dma_win_addr : sphb->dma64_win_addr;
+    /*
+     * We have just created a window, we know for the fact that it is empty,
+     * use a hack to avoid iterating over the table as it is quite possible
+     * to have billions of TCEs, all empty.
+     * Note that we cannot delay this to the first H_PUT_TCE as this hcall is
+     * mostly likely to be handled in KVM so QEMU just does not know if it
+     * happened.
+     */
+    tcet->skipping_replay = true;
+    spapr_tce_table_enable(tcet, page_shift, win_addr,
+                           1ULL << (window_shift - page_shift));
+    tcet->skipping_replay = false;
+    if (!tcet->nb_table) {
+        goto hw_error_exit;
+    }
+
+    trace_spapr_iommu_ddw_create(buid, addr, 1ULL << page_shift,
+                                 1ULL << window_shift, tcet->bus_offset, liobn);
+
+    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+    rtas_st(rets, 1, liobn);
+    rtas_st(rets, 2, tcet->bus_offset >> 32);
+    rtas_st(rets, 3, tcet->bus_offset & ((uint32_t) -1));
+
+    return;
+
+hw_error_exit:
+    rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
+    return;
+
+param_error_exit:
+    rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+}
+
+static void rtas_ibm_remove_pe_dma_window(PowerPCCPU *cpu,
+                                          SpaprMachineState *spapr,
+                                          uint32_t token, uint32_t nargs,
+                                          target_ulong args,
+                                          uint32_t nret, target_ulong rets)
+{
+    SpaprPhbState *sphb;
+    SpaprTceTable *tcet;
+    uint32_t liobn;
+
+    if ((nargs != 1) || (nret != 1)) {
+        goto param_error_exit;
+    }
+
+    liobn = rtas_ld(args, 0);
+    tcet = spapr_tce_find_by_liobn(liobn);
+    if (!tcet) {
+        goto param_error_exit;
+    }
+
+    sphb = SPAPR_PCI_HOST_BRIDGE(OBJECT(tcet)->parent);
+    if (!sphb || !sphb->ddw_enabled || !tcet->nb_table) {
+        goto param_error_exit;
+    }
+
+    spapr_tce_table_disable(tcet);
+    trace_spapr_iommu_ddw_remove(liobn);
+
+    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+    return;
+
+param_error_exit:
+    rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+}
+
+static void rtas_ibm_reset_pe_dma_window(PowerPCCPU *cpu,
+                                         SpaprMachineState *spapr,
+                                         uint32_t token, uint32_t nargs,
+                                         target_ulong args,
+                                         uint32_t nret, target_ulong rets)
+{
+    SpaprPhbState *sphb;
+    uint64_t buid;
+    uint32_t addr;
+
+    if ((nargs != 3) || (nret != 1)) {
+        goto param_error_exit;
+    }
+
+    buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
+    addr = rtas_ld(args, 0);
+    sphb = spapr_pci_find_phb(spapr, buid);
+    if (!sphb || !sphb->ddw_enabled) {
+        goto param_error_exit;
+    }
+
+    spapr_phb_dma_reset(sphb);
+    trace_spapr_iommu_ddw_reset(buid, addr);
+
+    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+
+    return;
+
+param_error_exit:
+    rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+}
+
+static void spapr_rtas_ddw_init(void)
+{
+    spapr_rtas_register(RTAS_IBM_QUERY_PE_DMA_WINDOW,
+                        "ibm,query-pe-dma-window",
+                        rtas_ibm_query_pe_dma_window);
+    spapr_rtas_register(RTAS_IBM_CREATE_PE_DMA_WINDOW,
+                        "ibm,create-pe-dma-window",
+                        rtas_ibm_create_pe_dma_window);
+    spapr_rtas_register(RTAS_IBM_REMOVE_PE_DMA_WINDOW,
+                        "ibm,remove-pe-dma-window",
+                        rtas_ibm_remove_pe_dma_window);
+    spapr_rtas_register(RTAS_IBM_RESET_PE_DMA_WINDOW,
+                        "ibm,reset-pe-dma-window",
+                        rtas_ibm_reset_pe_dma_window);
+}
+
+type_init(spapr_rtas_ddw_init)
diff --git a/hw/ppc/spapr_rtc.c b/hw/ppc/spapr_rtc.c
new file mode 100644
index 000000000..fba4dfca3
--- /dev/null
+++ b/hw/ppc/spapr_rtc.c
@@ -0,0 +1,190 @@
+/*
+ * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator
+ *
+ * RTAS Real Time Clock
+ *
+ * Copyright (c) 2010-2011 David Gibson, IBM Corporation.
+ * Copyright 2014 David Gibson, Red Hat.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/timer.h"
+#include "sysemu/sysemu.h"
+#include "hw/ppc/spapr.h"
+#include "migration/vmstate.h"
+#include "qapi/error.h"
+#include "qapi/qapi-events-misc-target.h"
+#include "qemu/cutils.h"
+#include "qemu/module.h"
+
+void spapr_rtc_read(SpaprRtcState *rtc, struct tm *tm, uint32_t *ns)
+{
+    int64_t host_ns = qemu_clock_get_ns(rtc_clock);
+    int64_t guest_ns;
+    time_t guest_s;
+
+    assert(rtc);
+
+    guest_ns = host_ns + rtc->ns_offset;
+    guest_s = guest_ns / NANOSECONDS_PER_SECOND;
+
+    if (tm) {
+        gmtime_r(&guest_s, tm);
+    }
+    if (ns) {
+        *ns = guest_ns;
+    }
+}
+
+int spapr_rtc_import_offset(SpaprRtcState *rtc, int64_t legacy_offset)
+{
+    if (!rtc) {
+        return -ENODEV;
+    }
+
+    rtc->ns_offset = legacy_offset * NANOSECONDS_PER_SECOND;
+
+    return 0;
+}
+
+static void rtas_get_time_of_day(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                                 uint32_t token, uint32_t nargs,
+                                 target_ulong args,
+                                 uint32_t nret, target_ulong rets)
+{
+    struct tm tm;
+    uint32_t ns;
+
+    if ((nargs != 0) || (nret != 8)) {
+        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+        return;
+    }
+
+    spapr_rtc_read(&spapr->rtc, &tm, &ns);
+
+    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+    rtas_st(rets, 1, tm.tm_year + 1900);
+    rtas_st(rets, 2, tm.tm_mon + 1);
+    rtas_st(rets, 3, tm.tm_mday);
+    rtas_st(rets, 4, tm.tm_hour);
+    rtas_st(rets, 5, tm.tm_min);
+    rtas_st(rets, 6, tm.tm_sec);
+    rtas_st(rets, 7, ns);
+}
+
+static void rtas_set_time_of_day(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                                 uint32_t token, uint32_t nargs,
+                                 target_ulong args,
+                                 uint32_t nret, target_ulong rets)
+{
+    SpaprRtcState *rtc = &spapr->rtc;
+    struct tm tm;
+    time_t new_s;
+    int64_t host_ns;
+
+    if ((nargs != 7) || (nret != 1)) {
+        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+        return;
+    }
+
+    tm.tm_year = rtas_ld(args, 0) - 1900;
+    tm.tm_mon = rtas_ld(args, 1) - 1;
+    tm.tm_mday = rtas_ld(args, 2);
+    tm.tm_hour = rtas_ld(args, 3);
+    tm.tm_min = rtas_ld(args, 4);
+    tm.tm_sec = rtas_ld(args, 5);
+
+    new_s = mktimegm(&tm);
+    if (new_s == -1) {
+        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+        return;
+    }
+
+    /* Generate a monitor event for the change */
+    qapi_event_send_rtc_change(qemu_timedate_diff(&tm));
+
+    host_ns = qemu_clock_get_ns(rtc_clock);
+
+    rtc->ns_offset = (new_s * NANOSECONDS_PER_SECOND) - host_ns;
+
+    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+}
+
+static void spapr_rtc_qom_date(Object *obj, struct tm *current_tm, Error **errp)
+{
+    spapr_rtc_read(SPAPR_RTC(obj), current_tm, NULL);
+}
+
+static void spapr_rtc_realize(DeviceState *dev, Error **errp)
+{
+    SpaprRtcState *rtc = SPAPR_RTC(dev);
+    struct tm tm;
+    time_t host_s;
+    int64_t rtc_ns;
+
+    /* Initialize the RTAS RTC from host time */
+
+    qemu_get_timedate(&tm, 0);
+    host_s = mktimegm(&tm);
+    rtc_ns = qemu_clock_get_ns(rtc_clock);
+    rtc->ns_offset = host_s * NANOSECONDS_PER_SECOND - rtc_ns;
+
+    object_property_add_tm(OBJECT(rtc), "date", spapr_rtc_qom_date);
+}
+
+static const VMStateDescription vmstate_spapr_rtc = {
+    .name = "spapr/rtc",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_INT64(ns_offset, SpaprRtcState),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static void spapr_rtc_class_init(ObjectClass *oc, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(oc);
+
+    dc->realize = spapr_rtc_realize;
+    dc->vmsd = &vmstate_spapr_rtc;
+    /* Reason: This is an internal device only for handling the hypercalls */
+    dc->user_creatable = false;
+
+    spapr_rtas_register(RTAS_GET_TIME_OF_DAY, "get-time-of-day",
+                        rtas_get_time_of_day);
+    spapr_rtas_register(RTAS_SET_TIME_OF_DAY, "set-time-of-day",
+                        rtas_set_time_of_day);
+}
+
+static const TypeInfo spapr_rtc_info = {
+    .name          = TYPE_SPAPR_RTC,
+    .parent        = TYPE_DEVICE,
+    .instance_size = sizeof(SpaprRtcState),
+    .class_init    = spapr_rtc_class_init,
+};
+
+static void spapr_rtc_register_types(void)
+{
+    type_register_static(&spapr_rtc_info);
+}
+type_init(spapr_rtc_register_types)
diff --git a/hw/ppc/spapr_softmmu.c b/hw/ppc/spapr_softmmu.c
new file mode 100644
index 000000000..4ee03c83e
--- /dev/null
+++ b/hw/ppc/spapr_softmmu.c
@@ -0,0 +1,612 @@
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
+#include "cpu.h"
+#include "helper_regs.h"
+#include "hw/ppc/spapr.h"
+#include "mmu-hash64.h"
+#include "mmu-book3s-v3.h"
+
+static inline bool valid_ptex(PowerPCCPU *cpu, target_ulong ptex)
+{
+    /*
+     * hash value/pteg group index is normalized by HPT mask
+     */
+    if (((ptex & ~7ULL) / HPTES_PER_GROUP) & ~ppc_hash64_hpt_mask(cpu)) {
+        return false;
+    }
+    return true;
+}
+
+static target_ulong h_enter(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                            target_ulong opcode, target_ulong *args)
+{
+    target_ulong flags = args[0];
+    target_ulong ptex = args[1];
+    target_ulong pteh = args[2];
+    target_ulong ptel = args[3];
+    unsigned apshift;
+    target_ulong raddr;
+    target_ulong slot;
+    const ppc_hash_pte64_t *hptes;
+
+    apshift = ppc_hash64_hpte_page_shift_noslb(cpu, pteh, ptel);
+    if (!apshift) {
+        /* Bad page size encoding */
+        return H_PARAMETER;
+    }
+
+    raddr = (ptel & HPTE64_R_RPN) & ~((1ULL << apshift) - 1);
+
+    if (is_ram_address(spapr, raddr)) {
+        /* Regular RAM - should have WIMG=0010 */
+        if ((ptel & HPTE64_R_WIMG) != HPTE64_R_M) {
+            return H_PARAMETER;
+        }
+    } else {
+        target_ulong wimg_flags;
+        /* Looks like an IO address */
+        /* FIXME: What WIMG combinations could be sensible for IO?
+         * For now we allow WIMG=010x, but are there others? */
+        /* FIXME: Should we check against registered IO addresses? */
+        wimg_flags = (ptel & (HPTE64_R_W | HPTE64_R_I | HPTE64_R_M));
+
+        if (wimg_flags != HPTE64_R_I &&
+            wimg_flags != (HPTE64_R_I | HPTE64_R_M)) {
+            return H_PARAMETER;
+        }
+    }
+
+    pteh &= ~0x60ULL;
+
+    if (!valid_ptex(cpu, ptex)) {
+        return H_PARAMETER;
+    }
+
+    slot = ptex & 7ULL;
+    ptex = ptex & ~7ULL;
+
+    if (likely((flags & H_EXACT) == 0)) {
+        hptes = ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP);
+        for (slot = 0; slot < 8; slot++) {
+            if (!(ppc_hash64_hpte0(cpu, hptes, slot) & HPTE64_V_VALID)) {
+                break;
+            }
+        }
+        ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP);
+        if (slot == 8) {
+            return H_PTEG_FULL;
+        }
+    } else {
+        hptes = ppc_hash64_map_hptes(cpu, ptex + slot, 1);
+        if (ppc_hash64_hpte0(cpu, hptes, 0) & HPTE64_V_VALID) {
+            ppc_hash64_unmap_hptes(cpu, hptes, ptex + slot, 1);
+            return H_PTEG_FULL;
+        }
+        ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
+    }
+
+    spapr_store_hpte(cpu, ptex + slot, pteh | HPTE64_V_HPTE_DIRTY, ptel);
+
+    args[0] = ptex + slot;
+    return H_SUCCESS;
+}
+
+typedef enum {
+    REMOVE_SUCCESS = 0,
+    REMOVE_NOT_FOUND = 1,
+    REMOVE_PARM = 2,
+    REMOVE_HW = 3,
+} RemoveResult;
+
+static RemoveResult remove_hpte(PowerPCCPU *cpu
+                                , target_ulong ptex,
+                                target_ulong avpn,
+                                target_ulong flags,
+                                target_ulong *vp, target_ulong *rp)
+{
+    const ppc_hash_pte64_t *hptes;
+    target_ulong v, r;
+
+    if (!valid_ptex(cpu, ptex)) {
+        return REMOVE_PARM;
+    }
+
+    hptes = ppc_hash64_map_hptes(cpu, ptex, 1);
+    v = ppc_hash64_hpte0(cpu, hptes, 0);
+    r = ppc_hash64_hpte1(cpu, hptes, 0);
+    ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
+
+    if ((v & HPTE64_V_VALID) == 0 ||
+        ((flags & H_AVPN) && (v & ~0x7fULL) != avpn) ||
+        ((flags & H_ANDCOND) && (v & avpn) != 0)) {
+        return REMOVE_NOT_FOUND;
+    }
+    *vp = v;
+    *rp = r;
+    spapr_store_hpte(cpu, ptex, HPTE64_V_HPTE_DIRTY, 0);
+    ppc_hash64_tlb_flush_hpte(cpu, ptex, v, r);
+    return REMOVE_SUCCESS;
+}
+
+static target_ulong h_remove(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                             target_ulong opcode, target_ulong *args)
+{
+    CPUPPCState *env = &cpu->env;
+    target_ulong flags = args[0];
+    target_ulong ptex = args[1];
+    target_ulong avpn = args[2];
+    RemoveResult ret;
+
+    ret = remove_hpte(cpu, ptex, avpn, flags,
+                      &args[0], &args[1]);
+
+    switch (ret) {
+    case REMOVE_SUCCESS:
+        check_tlb_flush(env, true);
+        return H_SUCCESS;
+
+    case REMOVE_NOT_FOUND:
+        return H_NOT_FOUND;
+
+    case REMOVE_PARM:
+        return H_PARAMETER;
+
+    case REMOVE_HW:
+        return H_HARDWARE;
+    }
+
+    g_assert_not_reached();
+}
+
+#define H_BULK_REMOVE_TYPE             0xc000000000000000ULL
+#define   H_BULK_REMOVE_REQUEST        0x4000000000000000ULL
+#define   H_BULK_REMOVE_RESPONSE       0x8000000000000000ULL
+#define   H_BULK_REMOVE_END            0xc000000000000000ULL
+#define H_BULK_REMOVE_CODE             0x3000000000000000ULL
+#define   H_BULK_REMOVE_SUCCESS        0x0000000000000000ULL
+#define   H_BULK_REMOVE_NOT_FOUND      0x1000000000000000ULL
+#define   H_BULK_REMOVE_PARM           0x2000000000000000ULL
+#define   H_BULK_REMOVE_HW             0x3000000000000000ULL
+#define H_BULK_REMOVE_RC               0x0c00000000000000ULL
+#define H_BULK_REMOVE_FLAGS            0x0300000000000000ULL
+#define   H_BULK_REMOVE_ABSOLUTE       0x0000000000000000ULL
+#define   H_BULK_REMOVE_ANDCOND        0x0100000000000000ULL
+#define   H_BULK_REMOVE_AVPN           0x0200000000000000ULL
+#define H_BULK_REMOVE_PTEX             0x00ffffffffffffffULL
+
+#define H_BULK_REMOVE_MAX_BATCH        4
+
+static target_ulong h_bulk_remove(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                                  target_ulong opcode, target_ulong *args)
+{
+    CPUPPCState *env = &cpu->env;
+    int i;
+    target_ulong rc = H_SUCCESS;
+
+    for (i = 0; i < H_BULK_REMOVE_MAX_BATCH; i++) {
+        target_ulong *tsh = &args[i*2];
+        target_ulong tsl = args[i*2 + 1];
+        target_ulong v, r, ret;
+
+        if ((*tsh & H_BULK_REMOVE_TYPE) == H_BULK_REMOVE_END) {
+            break;
+        } else if ((*tsh & H_BULK_REMOVE_TYPE) != H_BULK_REMOVE_REQUEST) {
+            return H_PARAMETER;
+        }
+
+        *tsh &= H_BULK_REMOVE_PTEX | H_BULK_REMOVE_FLAGS;
+        *tsh |= H_BULK_REMOVE_RESPONSE;
+
+        if ((*tsh & H_BULK_REMOVE_ANDCOND) && (*tsh & H_BULK_REMOVE_AVPN)) {
+            *tsh |= H_BULK_REMOVE_PARM;
+            return H_PARAMETER;
+        }
+
+        ret = remove_hpte(cpu, *tsh & H_BULK_REMOVE_PTEX, tsl,
+                          (*tsh & H_BULK_REMOVE_FLAGS) >> 26,
+                          &v, &r);
+
+        *tsh |= ret << 60;
+
+        switch (ret) {
+        case REMOVE_SUCCESS:
+            *tsh |= (r & (HPTE64_R_C | HPTE64_R_R)) << 43;
+            break;
+
+        case REMOVE_PARM:
+            rc = H_PARAMETER;
+            goto exit;
+
+        case REMOVE_HW:
+            rc = H_HARDWARE;
+            goto exit;
+        }
+    }
+ exit:
+    check_tlb_flush(env, true);
+
+    return rc;
+}
+
+static target_ulong h_protect(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                              target_ulong opcode, target_ulong *args)
+{
+    CPUPPCState *env = &cpu->env;
+    target_ulong flags = args[0];
+    target_ulong ptex = args[1];
+    target_ulong avpn = args[2];
+    const ppc_hash_pte64_t *hptes;
+    target_ulong v, r;
+
+    if (!valid_ptex(cpu, ptex)) {
+        return H_PARAMETER;
+    }
+
+    hptes = ppc_hash64_map_hptes(cpu, ptex, 1);
+    v = ppc_hash64_hpte0(cpu, hptes, 0);
+    r = ppc_hash64_hpte1(cpu, hptes, 0);
+    ppc_hash64_unmap_hptes(cpu, hptes, ptex, 1);
+
+    if ((v & HPTE64_V_VALID) == 0 ||
+        ((flags & H_AVPN) && (v & ~0x7fULL) != avpn)) {
+        return H_NOT_FOUND;
+    }
+
+    r &= ~(HPTE64_R_PP0 | HPTE64_R_PP | HPTE64_R_N |
+           HPTE64_R_KEY_HI | HPTE64_R_KEY_LO);
+    r |= (flags << 55) & HPTE64_R_PP0;
+    r |= (flags << 48) & HPTE64_R_KEY_HI;
+    r |= flags & (HPTE64_R_PP | HPTE64_R_N | HPTE64_R_KEY_LO);
+    spapr_store_hpte(cpu, ptex,
+                     (v & ~HPTE64_V_VALID) | HPTE64_V_HPTE_DIRTY, 0);
+    ppc_hash64_tlb_flush_hpte(cpu, ptex, v, r);
+    /* Flush the tlb */
+    check_tlb_flush(env, true);
+    /* Don't need a memory barrier, due to qemu's global lock */
+    spapr_store_hpte(cpu, ptex, v | HPTE64_V_HPTE_DIRTY, r);
+    return H_SUCCESS;
+}
+
+static target_ulong h_read(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                           target_ulong opcode, target_ulong *args)
+{
+    target_ulong flags = args[0];
+    target_ulong ptex = args[1];
+    int i, ridx, n_entries = 1;
+    const ppc_hash_pte64_t *hptes;
+
+    if (!valid_ptex(cpu, ptex)) {
+        return H_PARAMETER;
+    }
+
+    if (flags & H_READ_4) {
+        /* Clear the two low order bits */
+        ptex &= ~(3ULL);
+        n_entries = 4;
+    }
+
+    hptes = ppc_hash64_map_hptes(cpu, ptex, n_entries);
+    for (i = 0, ridx = 0; i < n_entries; i++) {
+        args[ridx++] = ppc_hash64_hpte0(cpu, hptes, i);
+        args[ridx++] = ppc_hash64_hpte1(cpu, hptes, i);
+    }
+    ppc_hash64_unmap_hptes(cpu, hptes, ptex, n_entries);
+
+    return H_SUCCESS;
+}
+
+struct SpaprPendingHpt {
+    /* These fields are read-only after initialization */
+    int shift;
+    QemuThread thread;
+
+    /* These fields are protected by the BQL */
+    bool complete;
+
+    /* These fields are private to the preparation thread if
+     * !complete, otherwise protected by the BQL */
+    int ret;
+    void *hpt;
+};
+
+static void free_pending_hpt(SpaprPendingHpt *pending)
+{
+    if (pending->hpt) {
+        qemu_vfree(pending->hpt);
+    }
+
+    g_free(pending);
+}
+
+static void *hpt_prepare_thread(void *opaque)
+{
+    SpaprPendingHpt *pending = opaque;
+    size_t size = 1ULL << pending->shift;
+
+    pending->hpt = qemu_try_memalign(size, size);
+    if (pending->hpt) {
+        memset(pending->hpt, 0, size);
+        pending->ret = H_SUCCESS;
+    } else {
+        pending->ret = H_NO_MEM;
+    }
+
+    qemu_mutex_lock_iothread();
+
+    if (SPAPR_MACHINE(qdev_get_machine())->pending_hpt == pending) {
+        /* Ready to go */
+        pending->complete = true;
+    } else {
+        /* We've been cancelled, clean ourselves up */
+        free_pending_hpt(pending);
+    }
+
+    qemu_mutex_unlock_iothread();
+    return NULL;
+}
+
+/* Must be called with BQL held */
+static void cancel_hpt_prepare(SpaprMachineState *spapr)
+{
+    SpaprPendingHpt *pending = spapr->pending_hpt;
+
+    /* Let the thread know it's cancelled */
+    spapr->pending_hpt = NULL;
+
+    if (!pending) {
+        /* Nothing to do */
+        return;
+    }
+
+    if (!pending->complete) {
+        /* thread will clean itself up */
+        return;
+    }
+
+    free_pending_hpt(pending);
+}
+
+target_ulong softmmu_resize_hpt_prepare(PowerPCCPU *cpu,
+                                         SpaprMachineState *spapr,
+                                         target_ulong shift)
+{
+    SpaprPendingHpt *pending = spapr->pending_hpt;
+
+    if (pending) {
+        /* something already in progress */
+        if (pending->shift == shift) {
+            /* and it's suitable */
+            if (pending->complete) {
+                return pending->ret;
+            } else {
+                return H_LONG_BUSY_ORDER_100_MSEC;
+            }
+        }
+
+        /* not suitable, cancel and replace */
+        cancel_hpt_prepare(spapr);
+    }
+
+    if (!shift) {
+        /* nothing to do */
+        return H_SUCCESS;
+    }
+
+    /* start new prepare */
+
+    pending = g_new0(SpaprPendingHpt, 1);
+    pending->shift = shift;
+    pending->ret = H_HARDWARE;
+
+    qemu_thread_create(&pending->thread, "sPAPR HPT prepare",
+                       hpt_prepare_thread, pending, QEMU_THREAD_DETACHED);
+
+    spapr->pending_hpt = pending;
+
+    /* In theory we could estimate the time more accurately based on
+     * the new size, but there's not much point */
+    return H_LONG_BUSY_ORDER_100_MSEC;
+}
+
+static uint64_t new_hpte_load0(void *htab, uint64_t pteg, int slot)
+{
+    uint8_t *addr = htab;
+
+    addr += pteg * HASH_PTEG_SIZE_64;
+    addr += slot * HASH_PTE_SIZE_64;
+    return  ldq_p(addr);
+}
+
+static void new_hpte_store(void *htab, uint64_t pteg, int slot,
+                           uint64_t pte0, uint64_t pte1)
+{
+    uint8_t *addr = htab;
+
+    addr += pteg * HASH_PTEG_SIZE_64;
+    addr += slot * HASH_PTE_SIZE_64;
+
+    stq_p(addr, pte0);
+    stq_p(addr + HPTE64_DW1, pte1);
+}
+
+static int rehash_hpte(PowerPCCPU *cpu,
+                       const ppc_hash_pte64_t *hptes,
+                       void *old_hpt, uint64_t oldsize,
+                       void *new_hpt, uint64_t newsize,
+                       uint64_t pteg, int slot)
+{
+    uint64_t old_hash_mask = (oldsize >> 7) - 1;
+    uint64_t new_hash_mask = (newsize >> 7) - 1;
+    target_ulong pte0 = ppc_hash64_hpte0(cpu, hptes, slot);
+    target_ulong pte1;
+    uint64_t avpn;
+    unsigned base_pg_shift;
+    uint64_t hash, new_pteg, replace_pte0;
+
+    if (!(pte0 & HPTE64_V_VALID) || !(pte0 & HPTE64_V_BOLTED)) {
+        return H_SUCCESS;
+    }
+
+    pte1 = ppc_hash64_hpte1(cpu, hptes, slot);
+
+    base_pg_shift = ppc_hash64_hpte_page_shift_noslb(cpu, pte0, pte1);
+    assert(base_pg_shift); /* H_ENTER shouldn't allow a bad encoding */
+    avpn = HPTE64_V_AVPN_VAL(pte0) & ~(((1ULL << base_pg_shift) - 1) >> 23);
+
+    if (pte0 & HPTE64_V_SECONDARY) {
+        pteg = ~pteg;
+    }
+
+    if ((pte0 & HPTE64_V_SSIZE) == HPTE64_V_SSIZE_256M) {
+        uint64_t offset, vsid;
+
+        /* We only have 28 - 23 bits of offset in avpn */
+        offset = (avpn & 0x1f) << 23;
+        vsid = avpn >> 5;
+        /* We can find more bits from the pteg value */
+        if (base_pg_shift < 23) {
+            offset |= ((vsid ^ pteg) & old_hash_mask) << base_pg_shift;
+        }
+
+        hash = vsid ^ (offset >> base_pg_shift);
+    } else if ((pte0 & HPTE64_V_SSIZE) == HPTE64_V_SSIZE_1T) {
+        uint64_t offset, vsid;
+
+        /* We only have 40 - 23 bits of seg_off in avpn */
+        offset = (avpn & 0x1ffff) << 23;
+        vsid = avpn >> 17;
+        if (base_pg_shift < 23) {
+            offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask)
+                << base_pg_shift;
+        }
+
+        hash = vsid ^ (vsid << 25) ^ (offset >> base_pg_shift);
+    } else {
+        error_report("rehash_pte: Bad segment size in HPTE");
+        return H_HARDWARE;
+    }
+
+    new_pteg = hash & new_hash_mask;
+    if (pte0 & HPTE64_V_SECONDARY) {
+        assert(~pteg == (hash & old_hash_mask));
+        new_pteg = ~new_pteg;
+    } else {
+        assert(pteg == (hash & old_hash_mask));
+    }
+    assert((oldsize != newsize) || (pteg == new_pteg));
+    replace_pte0 = new_hpte_load0(new_hpt, new_pteg, slot);
+    /*
+     * Strictly speaking, we don't need all these tests, since we only
+     * ever rehash bolted HPTEs.  We might in future handle non-bolted
+     * HPTEs, though so make the logic correct for those cases as
+     * well.
+     */
+    if (replace_pte0 & HPTE64_V_VALID) {
+        assert(newsize < oldsize);
+        if (replace_pte0 & HPTE64_V_BOLTED) {
+            if (pte0 & HPTE64_V_BOLTED) {
+                /* Bolted collision, nothing we can do */
+                return H_PTEG_FULL;
+            } else {
+                /* Discard this hpte */
+                return H_SUCCESS;
+            }
+        }
+    }
+
+    new_hpte_store(new_hpt, new_pteg, slot, pte0, pte1);
+    return H_SUCCESS;
+}
+
+static int rehash_hpt(PowerPCCPU *cpu,
+                      void *old_hpt, uint64_t oldsize,
+                      void *new_hpt, uint64_t newsize)
+{
+    uint64_t n_ptegs = oldsize >> 7;
+    uint64_t pteg;
+    int slot;
+    int rc;
+
+    for (pteg = 0; pteg < n_ptegs; pteg++) {
+        hwaddr ptex = pteg * HPTES_PER_GROUP;
+        const ppc_hash_pte64_t *hptes
+            = ppc_hash64_map_hptes(cpu, ptex, HPTES_PER_GROUP);
+
+        if (!hptes) {
+            return H_HARDWARE;
+        }
+
+        for (slot = 0; slot < HPTES_PER_GROUP; slot++) {
+            rc = rehash_hpte(cpu, hptes, old_hpt, oldsize, new_hpt, newsize,
+                             pteg, slot);
+            if (rc != H_SUCCESS) {
+                ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP);
+                return rc;
+            }
+        }
+        ppc_hash64_unmap_hptes(cpu, hptes, ptex, HPTES_PER_GROUP);
+    }
+
+    return H_SUCCESS;
+}
+
+target_ulong softmmu_resize_hpt_commit(PowerPCCPU *cpu,
+                                        SpaprMachineState *spapr,
+                                        target_ulong flags,
+                                        target_ulong shift)
+{
+    SpaprPendingHpt *pending = spapr->pending_hpt;
+    int rc;
+    size_t newsize;
+
+    if (flags != 0) {
+        return H_PARAMETER;
+    }
+
+    if (!pending || (pending->shift != shift)) {
+        /* no matching prepare */
+        return H_CLOSED;
+    }
+
+    if (!pending->complete) {
+        /* prepare has not completed */
+        return H_BUSY;
+    }
+
+    /* Shouldn't have got past PREPARE without an HPT */
+    g_assert(spapr->htab_shift);
+
+    newsize = 1ULL << pending->shift;
+    rc = rehash_hpt(cpu, spapr->htab, HTAB_SIZE(spapr),
+                    pending->hpt, newsize);
+    if (rc == H_SUCCESS) {
+        qemu_vfree(spapr->htab);
+        spapr->htab = pending->hpt;
+        spapr->htab_shift = pending->shift;
+
+        push_sregs_to_kvm_pr(spapr);
+
+        pending->hpt = NULL; /* so it's not free()d */
+    }
+
+    /* Clean up */
+    spapr->pending_hpt = NULL;
+    free_pending_hpt(pending);
+
+    return rc;
+}
+
+static void hypercall_register_types(void)
+{
+    /* hcall-pft */
+    spapr_register_hypercall(H_ENTER, h_enter);
+    spapr_register_hypercall(H_REMOVE, h_remove);
+    spapr_register_hypercall(H_PROTECT, h_protect);
+    spapr_register_hypercall(H_READ, h_read);
+
+    /* hcall-bulk */
+    spapr_register_hypercall(H_BULK_REMOVE, h_bulk_remove);
+
+}
+
+type_init(hypercall_register_types)
diff --git a/hw/ppc/spapr_tpm_proxy.c b/hw/ppc/spapr_tpm_proxy.c
new file mode 100644
index 000000000..245408674
--- /dev/null
+++ b/hw/ppc/spapr_tpm_proxy.c
@@ -0,0 +1,177 @@
+/*
+ * SPAPR TPM Proxy/Hypercall
+ *
+ * Copyright IBM Corp. 2019
+ *
+ * Authors:
+ *  Michael Roth      <mdroth@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "sysemu/reset.h"
+#include "hw/ppc/spapr.h"
+#include "hw/qdev-properties.h"
+#include "trace.h"
+
+#define TPM_SPAPR_BUFSIZE 4096
+
+enum {
+    TPM_COMM_OP_EXECUTE = 1,
+    TPM_COMM_OP_CLOSE_SESSION = 2,
+};
+
+static void spapr_tpm_proxy_reset(void *opaque)
+{
+    SpaprTpmProxy *tpm_proxy = SPAPR_TPM_PROXY(opaque);
+
+    if (tpm_proxy->host_fd != -1) {
+        close(tpm_proxy->host_fd);
+        tpm_proxy->host_fd = -1;
+    }
+}
+
+static ssize_t tpm_execute(SpaprTpmProxy *tpm_proxy, target_ulong *args)
+{
+    uint64_t data_in = ppc64_phys_to_real(args[1]);
+    target_ulong data_in_size = args[2];
+    uint64_t data_out = ppc64_phys_to_real(args[3]);
+    target_ulong data_out_size = args[4];
+    uint8_t buf_in[TPM_SPAPR_BUFSIZE];
+    uint8_t buf_out[TPM_SPAPR_BUFSIZE];
+    ssize_t ret;
+
+    trace_spapr_tpm_execute(data_in, data_in_size, data_out, data_out_size);
+
+    if (data_in_size > TPM_SPAPR_BUFSIZE) {
+        error_report("invalid TPM input buffer size: " TARGET_FMT_lu,
+                     data_in_size);
+        return H_P3;
+    }
+
+    if (data_out_size < TPM_SPAPR_BUFSIZE) {
+        error_report("invalid TPM output buffer size: " TARGET_FMT_lu,
+                     data_out_size);
+        return H_P5;
+    }
+
+    if (tpm_proxy->host_fd == -1) {
+        tpm_proxy->host_fd = open(tpm_proxy->host_path, O_RDWR);
+        if (tpm_proxy->host_fd == -1) {
+            error_report("failed to open TPM device %s: %d",
+                         tpm_proxy->host_path, errno);
+            return H_RESOURCE;
+        }
+    }
+
+    cpu_physical_memory_read(data_in, buf_in, data_in_size);
+
+    do {
+        ret = write(tpm_proxy->host_fd, buf_in, data_in_size);
+        if (ret > 0) {
+            data_in_size -= ret;
+        }
+    } while ((ret >= 0 && data_in_size > 0) || (ret == -1 && errno == EINTR));
+
+    if (ret == -1) {
+        error_report("failed to write to TPM device %s: %d",
+                     tpm_proxy->host_path, errno);
+        return H_RESOURCE;
+    }
+
+    do {
+        ret = read(tpm_proxy->host_fd, buf_out, data_out_size);
+    } while (ret == 0 || (ret == -1 && errno == EINTR));
+
+    if (ret == -1) {
+        error_report("failed to read from TPM device %s: %d",
+                     tpm_proxy->host_path, errno);
+        return H_RESOURCE;
+    }
+
+    cpu_physical_memory_write(data_out, buf_out, ret);
+    args[0] = ret;
+
+    return H_SUCCESS;
+}
+
+static target_ulong h_tpm_comm(PowerPCCPU *cpu,
+                               SpaprMachineState *spapr,
+                               target_ulong opcode,
+                               target_ulong *args)
+{
+    target_ulong op = args[0];
+    SpaprTpmProxy *tpm_proxy = spapr->tpm_proxy;
+
+    if (!tpm_proxy) {
+        error_report("TPM proxy not available");
+        return H_FUNCTION;
+    }
+
+    trace_spapr_h_tpm_comm(tpm_proxy->host_path, op);
+
+    switch (op) {
+    case TPM_COMM_OP_EXECUTE:
+        return tpm_execute(tpm_proxy, args);
+    case TPM_COMM_OP_CLOSE_SESSION:
+        spapr_tpm_proxy_reset(tpm_proxy);
+        return H_SUCCESS;
+    default:
+        return H_PARAMETER;
+    }
+}
+
+static void spapr_tpm_proxy_realize(DeviceState *d, Error **errp)
+{
+    SpaprTpmProxy *tpm_proxy = SPAPR_TPM_PROXY(d);
+
+    if (tpm_proxy->host_path == NULL) {
+        error_setg(errp, "must specify 'host-path' option for device");
+        return;
+    }
+
+    tpm_proxy->host_fd = -1;
+    qemu_register_reset(spapr_tpm_proxy_reset, tpm_proxy);
+}
+
+static void spapr_tpm_proxy_unrealize(DeviceState *d)
+{
+    SpaprTpmProxy *tpm_proxy = SPAPR_TPM_PROXY(d);
+
+    qemu_unregister_reset(spapr_tpm_proxy_reset, tpm_proxy);
+}
+
+static Property spapr_tpm_proxy_properties[] = {
+    DEFINE_PROP_STRING("host-path", SpaprTpmProxy, host_path),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void spapr_tpm_proxy_class_init(ObjectClass *k, void *data)
+{
+    DeviceClass *dk = DEVICE_CLASS(k);
+
+    dk->realize = spapr_tpm_proxy_realize;
+    dk->unrealize = spapr_tpm_proxy_unrealize;
+    dk->user_creatable = true;
+    device_class_set_props(dk, spapr_tpm_proxy_properties);
+}
+
+static const TypeInfo spapr_tpm_proxy_info = {
+    .name          = TYPE_SPAPR_TPM_PROXY,
+    .parent        = TYPE_DEVICE,
+    .instance_size = sizeof(SpaprTpmProxy),
+    .class_init    = spapr_tpm_proxy_class_init,
+};
+
+static void spapr_tpm_proxy_register_types(void)
+{
+    type_register_static(&spapr_tpm_proxy_info);
+    spapr_register_hypercall(SVM_H_TPM_COMM, h_tpm_comm);
+}
+
+type_init(spapr_tpm_proxy_register_types)
diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
new file mode 100644
index 000000000..b975ed29c
--- /dev/null
+++ b/hw/ppc/spapr_vio.c
@@ -0,0 +1,741 @@
+/*
+ * QEMU sPAPR VIO code
+ *
+ * Copyright (c) 2010 David Gibson, IBM Corporation <dwg@au1.ibm.com>
+ * Based on the s390 virtio bus code:
+ * Copyright (c) 2009 Alexander Graf <agraf@suse.de>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "qapi/visitor.h"
+#include "qemu/log.h"
+#include "hw/loader.h"
+#include "elf.h"
+#include "hw/sysbus.h"
+#include "sysemu/kvm.h"
+#include "sysemu/device_tree.h"
+#include "kvm_ppc.h"
+#include "migration/vmstate.h"
+
+#include "hw/ppc/spapr.h"
+#include "hw/ppc/spapr_vio.h"
+#include "hw/ppc/fdt.h"
+#include "trace.h"
+
+#include <libfdt.h>
+
+#define SPAPR_VIO_REG_BASE 0x71000000
+
+static char *spapr_vio_get_dev_name(DeviceState *qdev)
+{
+    SpaprVioDevice *dev = VIO_SPAPR_DEVICE(qdev);
+    SpaprVioDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev);
+
+    /* Device tree style name device@reg */
+    return g_strdup_printf("%s@%x", pc->dt_name, dev->reg);
+}
+
+static void spapr_vio_bus_class_init(ObjectClass *klass, void *data)
+{
+    BusClass *k = BUS_CLASS(klass);
+
+    k->get_dev_path = spapr_vio_get_dev_name;
+    k->get_fw_dev_path = spapr_vio_get_dev_name;
+}
+
+static const TypeInfo spapr_vio_bus_info = {
+    .name = TYPE_SPAPR_VIO_BUS,
+    .parent = TYPE_BUS,
+    .class_init = spapr_vio_bus_class_init,
+    .instance_size = sizeof(SpaprVioBus),
+};
+
+SpaprVioDevice *spapr_vio_find_by_reg(SpaprVioBus *bus, uint32_t reg)
+{
+    BusChild *kid;
+    SpaprVioDevice *dev = NULL;
+
+    QTAILQ_FOREACH(kid, &bus->bus.children, sibling) {
+        dev = (SpaprVioDevice *)kid->child;
+        if (dev->reg == reg) {
+            return dev;
+        }
+    }
+
+    return NULL;
+}
+
+static int vio_make_devnode(SpaprVioDevice *dev,
+                            void *fdt)
+{
+    SpaprVioDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev);
+    int vdevice_off, node_off, ret;
+    char *dt_name;
+    const char *dt_compatible;
+
+    vdevice_off = fdt_path_offset(fdt, "/vdevice");
+    if (vdevice_off < 0) {
+        return vdevice_off;
+    }
+
+    dt_name = spapr_vio_get_dev_name(DEVICE(dev));
+    node_off = fdt_add_subnode(fdt, vdevice_off, dt_name);
+    g_free(dt_name);
+    if (node_off < 0) {
+        return node_off;
+    }
+
+    ret = fdt_setprop_cell(fdt, node_off, "reg", dev->reg);
+    if (ret < 0) {
+        return ret;
+    }
+
+    if (pc->dt_type) {
+        ret = fdt_setprop_string(fdt, node_off, "device_type",
+                                 pc->dt_type);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    if (pc->get_dt_compatible) {
+        dt_compatible = pc->get_dt_compatible(dev);
+    } else {
+        dt_compatible = pc->dt_compatible;
+    }
+
+    if (dt_compatible) {
+        ret = fdt_setprop_string(fdt, node_off, "compatible",
+                                 dt_compatible);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    if (dev->irq) {
+        uint32_t ints_prop[2];
+
+        spapr_dt_irq(ints_prop, dev->irq, false);
+        ret = fdt_setprop(fdt, node_off, "interrupts", ints_prop,
+                          sizeof(ints_prop));
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    ret = spapr_tcet_dma_dt(fdt, node_off, "ibm,my-dma-window", dev->tcet);
+    if (ret < 0) {
+        return ret;
+    }
+
+    if (pc->devnode) {
+        ret = (pc->devnode)(dev, fdt, node_off);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    return node_off;
+}
+
+/*
+ * CRQ handling
+ */
+static target_ulong h_reg_crq(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                              target_ulong opcode, target_ulong *args)
+{
+    target_ulong reg = args[0];
+    target_ulong queue_addr = args[1];
+    target_ulong queue_len = args[2];
+    SpaprVioDevice *dev = spapr_vio_find_by_reg(spapr->vio_bus, reg);
+
+    if (!dev) {
+        hcall_dprintf("Unit 0x" TARGET_FMT_lx " does not exist\n", reg);
+        return H_PARAMETER;
+    }
+
+    /* We can't grok a queue size bigger than 256M for now */
+    if (queue_len < 0x1000 || queue_len > 0x10000000) {
+        hcall_dprintf("Queue size too small or too big (0x" TARGET_FMT_lx
+                      ")\n", queue_len);
+        return H_PARAMETER;
+    }
+
+    /* Check queue alignment */
+    if (queue_addr & 0xfff) {
+        hcall_dprintf("Queue not aligned (0x" TARGET_FMT_lx ")\n", queue_addr);
+        return H_PARAMETER;
+    }
+
+    /* Check if device supports CRQs */
+    if (!dev->crq.SendFunc) {
+        hcall_dprintf("Device does not support CRQ\n");
+        return H_NOT_FOUND;
+    }
+
+    /* Already a queue ? */
+    if (dev->crq.qsize) {
+        hcall_dprintf("CRQ already registered\n");
+        return H_RESOURCE;
+    }
+    dev->crq.qladdr = queue_addr;
+    dev->crq.qsize = queue_len;
+    dev->crq.qnext = 0;
+
+    trace_spapr_vio_h_reg_crq(reg, queue_addr, queue_len);
+    return H_SUCCESS;
+}
+
+static target_ulong free_crq(SpaprVioDevice *dev)
+{
+    dev->crq.qladdr = 0;
+    dev->crq.qsize = 0;
+    dev->crq.qnext = 0;
+
+    trace_spapr_vio_free_crq(dev->reg);
+
+    return H_SUCCESS;
+}
+
+static target_ulong h_free_crq(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                               target_ulong opcode, target_ulong *args)
+{
+    target_ulong reg = args[0];
+    SpaprVioDevice *dev = spapr_vio_find_by_reg(spapr->vio_bus, reg);
+
+    if (!dev) {
+        hcall_dprintf("Unit 0x" TARGET_FMT_lx " does not exist\n", reg);
+        return H_PARAMETER;
+    }
+
+    return free_crq(dev);
+}
+
+static target_ulong h_send_crq(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                               target_ulong opcode, target_ulong *args)
+{
+    target_ulong reg = args[0];
+    target_ulong msg_hi = args[1];
+    target_ulong msg_lo = args[2];
+    SpaprVioDevice *dev = spapr_vio_find_by_reg(spapr->vio_bus, reg);
+    uint64_t crq_mangle[2];
+
+    if (!dev) {
+        hcall_dprintf("Unit 0x" TARGET_FMT_lx " does not exist\n", reg);
+        return H_PARAMETER;
+    }
+    crq_mangle[0] = cpu_to_be64(msg_hi);
+    crq_mangle[1] = cpu_to_be64(msg_lo);
+
+    if (dev->crq.SendFunc) {
+        return dev->crq.SendFunc(dev, (uint8_t *)crq_mangle);
+    }
+
+    return H_HARDWARE;
+}
+
+static target_ulong h_enable_crq(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                                 target_ulong opcode, target_ulong *args)
+{
+    target_ulong reg = args[0];
+    SpaprVioDevice *dev = spapr_vio_find_by_reg(spapr->vio_bus, reg);
+
+    if (!dev) {
+        hcall_dprintf("Unit 0x" TARGET_FMT_lx " does not exist\n", reg);
+        return H_PARAMETER;
+    }
+
+    return 0;
+}
+
+/* Returns negative error, 0 success, or positive: queue full */
+int spapr_vio_send_crq(SpaprVioDevice *dev, uint8_t *crq)
+{
+    int rc;
+    uint8_t byte;
+
+    if (!dev->crq.qsize) {
+        error_report("spapr_vio_send_creq on uninitialized queue");
+        return -1;
+    }
+
+    /* Maybe do a fast path for KVM just writing to the pages */
+    rc = spapr_vio_dma_read(dev, dev->crq.qladdr + dev->crq.qnext, &byte, 1);
+    if (rc) {
+        return rc;
+    }
+    if (byte != 0) {
+        return 1;
+    }
+
+    rc = spapr_vio_dma_write(dev, dev->crq.qladdr + dev->crq.qnext + 8,
+                             &crq[8], 8);
+    if (rc) {
+        return rc;
+    }
+
+    kvmppc_eieio();
+
+    rc = spapr_vio_dma_write(dev, dev->crq.qladdr + dev->crq.qnext, crq, 8);
+    if (rc) {
+        return rc;
+    }
+
+    dev->crq.qnext = (dev->crq.qnext + 16) % dev->crq.qsize;
+
+    if (dev->signal_state & 1) {
+        spapr_vio_irq_pulse(dev);
+    }
+
+    return 0;
+}
+
+/* "quiesce" handling */
+
+static void spapr_vio_quiesce_one(SpaprVioDevice *dev)
+{
+    if (dev->tcet) {
+        device_cold_reset(DEVICE(dev->tcet));
+    }
+    free_crq(dev);
+}
+
+void spapr_vio_set_bypass(SpaprVioDevice *dev, bool bypass)
+{
+    if (!dev->tcet) {
+        return;
+    }
+
+    memory_region_set_enabled(&dev->mrbypass, bypass);
+    memory_region_set_enabled(spapr_tce_get_iommu(dev->tcet), !bypass);
+
+    dev->tcet->bypass = bypass;
+}
+
+static void rtas_set_tce_bypass(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                                uint32_t token,
+                                uint32_t nargs, target_ulong args,
+                                uint32_t nret, target_ulong rets)
+{
+    SpaprVioBus *bus = spapr->vio_bus;
+    SpaprVioDevice *dev;
+    uint32_t unit, enable;
+
+    if (nargs != 2) {
+        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+        return;
+    }
+    unit = rtas_ld(args, 0);
+    enable = rtas_ld(args, 1);
+    dev = spapr_vio_find_by_reg(bus, unit);
+    if (!dev) {
+        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+        return;
+    }
+
+    if (!dev->tcet) {
+        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+        return;
+    }
+
+    spapr_vio_set_bypass(dev, !!enable);
+
+    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+}
+
+static void rtas_quiesce(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                         uint32_t token,
+                         uint32_t nargs, target_ulong args,
+                         uint32_t nret, target_ulong rets)
+{
+    SpaprVioBus *bus = spapr->vio_bus;
+    BusChild *kid;
+    SpaprVioDevice *dev = NULL;
+
+    if (nargs != 0) {
+        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+        return;
+    }
+
+    QTAILQ_FOREACH(kid, &bus->bus.children, sibling) {
+        dev = (SpaprVioDevice *)kid->child;
+        spapr_vio_quiesce_one(dev);
+    }
+
+    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+}
+
+static SpaprVioDevice *reg_conflict(SpaprVioDevice *dev)
+{
+    SpaprVioBus *bus = SPAPR_VIO_BUS(dev->qdev.parent_bus);
+    BusChild *kid;
+    SpaprVioDevice *other;
+
+    /*
+     * Check for a device other than the given one which is already
+     * using the requested address. We have to open code this because
+     * the given dev might already be in the list.
+     */
+    QTAILQ_FOREACH(kid, &bus->bus.children, sibling) {
+        other = VIO_SPAPR_DEVICE(kid->child);
+
+        if (other != dev && other->reg == dev->reg) {
+            return other;
+        }
+    }
+
+    return 0;
+}
+
+static void spapr_vio_busdev_reset(DeviceState *qdev)
+{
+    SpaprVioDevice *dev = VIO_SPAPR_DEVICE(qdev);
+    SpaprVioDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev);
+
+    /* Shut down the request queue and TCEs if necessary */
+    spapr_vio_quiesce_one(dev);
+
+    dev->signal_state = 0;
+
+    spapr_vio_set_bypass(dev, false);
+    if (pc->reset) {
+        pc->reset(dev);
+    }
+}
+
+/*
+ * The register property of a VIO device is defined in libvirt using
+ * 0x1000 as a base register number plus a 0x1000 increment. For the
+ * VIO tty device, the base number is changed to 0x30000000. QEMU uses
+ * a base register number of 0x71000000 and then a simple increment.
+ *
+ * The formula below tries to compute a unique index number from the
+ * register value that will be used to define the IRQ number of the
+ * VIO device.
+ *
+ * A maximum of 256 VIO devices is covered. Collisions are possible
+ * but they will be detected when the IRQ is claimed.
+ */
+static inline uint32_t spapr_vio_reg_to_irq(uint32_t reg)
+{
+    uint32_t irq;
+
+    if (reg >= SPAPR_VIO_REG_BASE) {
+        /*
+         * VIO device register values when allocated by QEMU. For
+         * these, we simply mask the high bits to fit the overall
+         * range: [0x00 - 0xff].
+         *
+         * The nvram VIO device (reg=0x71000000) is a static device of
+         * the pseries machine and so is always allocated by QEMU. Its
+         * IRQ number is 0x0.
+         */
+        irq = reg & 0xff;
+
+    } else if (reg >= 0x30000000) {
+        /*
+         * VIO tty devices register values, when allocated by libvirt,
+         * are mapped in range [0xf0 - 0xff], gives us a maximum of 16
+         * vtys.
+         */
+        irq = 0xf0 | ((reg >> 12) & 0xf);
+
+    } else {
+        /*
+         * Other VIO devices register values, when allocated by
+         * libvirt, should be mapped in range [0x00 - 0xef]. Conflicts
+         * will be detected when IRQ is claimed.
+         */
+        irq = (reg >> 12) & 0xff;
+    }
+
+    return SPAPR_IRQ_VIO | irq;
+}
+
+static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+    SpaprVioDevice *dev = (SpaprVioDevice *)qdev;
+    SpaprVioDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev);
+    char *id;
+
+    if (dev->reg != -1) {
+        /*
+         * Explicitly assigned address, just verify that no-one else
+         * is using it.  other mechanism). We have to open code this
+         * rather than using spapr_vio_find_by_reg() because sdev
+         * itself is already in the list.
+         */
+        SpaprVioDevice *other = reg_conflict(dev);
+
+        if (other) {
+            error_setg(errp, "%s and %s devices conflict at address %#x",
+                       object_get_typename(OBJECT(qdev)),
+                       object_get_typename(OBJECT(&other->qdev)),
+                       dev->reg);
+            return;
+        }
+    } else {
+        /* Need to assign an address */
+        SpaprVioBus *bus = SPAPR_VIO_BUS(dev->qdev.parent_bus);
+
+        do {
+            dev->reg = bus->next_reg++;
+        } while (reg_conflict(dev));
+    }
+
+    /* Don't overwrite ids assigned on the command line */
+    if (!dev->qdev.id) {
+        id = spapr_vio_get_dev_name(DEVICE(dev));
+        dev->qdev.id = id;
+    }
+
+    dev->irq = spapr_vio_reg_to_irq(dev->reg);
+
+    if (SPAPR_MACHINE_GET_CLASS(spapr)->legacy_irq_allocation) {
+        int irq = spapr_irq_findone(spapr, errp);
+
+        if (irq < 0) {
+            return;
+        }
+        dev->irq = irq;
+    }
+
+    if (spapr_irq_claim(spapr, dev->irq, false, errp) < 0) {
+        return;
+    }
+
+    if (pc->rtce_window_size) {
+        uint32_t liobn = SPAPR_VIO_LIOBN(dev->reg);
+
+        memory_region_init(&dev->mrroot, OBJECT(dev), "iommu-spapr-root",
+                           MACHINE(spapr)->ram_size);
+        memory_region_init_alias(&dev->mrbypass, OBJECT(dev),
+                                 "iommu-spapr-bypass", get_system_memory(),
+                                 0, MACHINE(spapr)->ram_size);
+        memory_region_add_subregion_overlap(&dev->mrroot, 0, &dev->mrbypass, 1);
+        address_space_init(&dev->as, &dev->mrroot, qdev->id);
+
+        dev->tcet = spapr_tce_new_table(qdev, liobn);
+        spapr_tce_table_enable(dev->tcet, SPAPR_TCE_PAGE_SHIFT, 0,
+                               pc->rtce_window_size >> SPAPR_TCE_PAGE_SHIFT);
+        dev->tcet->vdev = dev;
+        memory_region_add_subregion_overlap(&dev->mrroot, 0,
+                                            spapr_tce_get_iommu(dev->tcet), 2);
+    }
+
+    pc->realize(dev, errp);
+}
+
+static target_ulong h_vio_signal(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                                 target_ulong opcode,
+                                 target_ulong *args)
+{
+    target_ulong reg = args[0];
+    target_ulong mode = args[1];
+    SpaprVioDevice *dev = spapr_vio_find_by_reg(spapr->vio_bus, reg);
+    SpaprVioDeviceClass *pc;
+
+    if (!dev) {
+        return H_PARAMETER;
+    }
+
+    pc = VIO_SPAPR_DEVICE_GET_CLASS(dev);
+
+    if (mode & ~pc->signal_mask) {
+        return H_PARAMETER;
+    }
+
+    dev->signal_state = mode;
+
+    return H_SUCCESS;
+}
+
+SpaprVioBus *spapr_vio_bus_init(void)
+{
+    SpaprVioBus *bus;
+    BusState *qbus;
+    DeviceState *dev;
+
+    /* Create bridge device */
+    dev = qdev_new(TYPE_SPAPR_VIO_BRIDGE);
+    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+
+    /* Create bus on bridge device */
+    qbus = qbus_new(TYPE_SPAPR_VIO_BUS, dev, "spapr-vio");
+    bus = SPAPR_VIO_BUS(qbus);
+    bus->next_reg = SPAPR_VIO_REG_BASE;
+
+    /* hcall-vio */
+    spapr_register_hypercall(H_VIO_SIGNAL, h_vio_signal);
+
+    /* hcall-crq */
+    spapr_register_hypercall(H_REG_CRQ, h_reg_crq);
+    spapr_register_hypercall(H_FREE_CRQ, h_free_crq);
+    spapr_register_hypercall(H_SEND_CRQ, h_send_crq);
+    spapr_register_hypercall(H_ENABLE_CRQ, h_enable_crq);
+
+    /* RTAS calls */
+    spapr_rtas_register(RTAS_IBM_SET_TCE_BYPASS, "ibm,set-tce-bypass",
+                        rtas_set_tce_bypass);
+    spapr_rtas_register(RTAS_QUIESCE, "quiesce", rtas_quiesce);
+
+    return bus;
+}
+
+static void spapr_vio_bridge_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->fw_name = "vdevice";
+}
+
+static const TypeInfo spapr_vio_bridge_info = {
+    .name          = TYPE_SPAPR_VIO_BRIDGE,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .class_init    = spapr_vio_bridge_class_init,
+};
+
+const VMStateDescription vmstate_spapr_vio = {
+    .name = "spapr_vio",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        /* Sanity check */
+        VMSTATE_UINT32_EQUAL(reg, SpaprVioDevice, NULL),
+        VMSTATE_UINT32_EQUAL(irq, SpaprVioDevice, NULL),
+
+        /* General VIO device state */
+        VMSTATE_UINT64(signal_state, SpaprVioDevice),
+        VMSTATE_UINT64(crq.qladdr, SpaprVioDevice),
+        VMSTATE_UINT32(crq.qsize, SpaprVioDevice),
+        VMSTATE_UINT32(crq.qnext, SpaprVioDevice),
+
+        VMSTATE_END_OF_LIST()
+    },
+};
+
+static void vio_spapr_device_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *k = DEVICE_CLASS(klass);
+    k->realize = spapr_vio_busdev_realize;
+    k->reset = spapr_vio_busdev_reset;
+    k->bus_type = TYPE_SPAPR_VIO_BUS;
+}
+
+static const TypeInfo spapr_vio_type_info = {
+    .name = TYPE_VIO_SPAPR_DEVICE,
+    .parent = TYPE_DEVICE,
+    .instance_size = sizeof(SpaprVioDevice),
+    .abstract = true,
+    .class_size = sizeof(SpaprVioDeviceClass),
+    .class_init = vio_spapr_device_class_init,
+};
+
+static void spapr_vio_register_types(void)
+{
+    type_register_static(&spapr_vio_bus_info);
+    type_register_static(&spapr_vio_bridge_info);
+    type_register_static(&spapr_vio_type_info);
+}
+
+type_init(spapr_vio_register_types)
+
+static int compare_reg(const void *p1, const void *p2)
+{
+    SpaprVioDevice const *dev1, *dev2;
+
+    dev1 = (SpaprVioDevice *)*(DeviceState **)p1;
+    dev2 = (SpaprVioDevice *)*(DeviceState **)p2;
+
+    if (dev1->reg < dev2->reg) {
+        return -1;
+    }
+    if (dev1->reg == dev2->reg) {
+        return 0;
+    }
+
+    /* dev1->reg > dev2->reg */
+    return 1;
+}
+
+void spapr_dt_vdevice(SpaprVioBus *bus, void *fdt)
+{
+    DeviceState *qdev, **qdevs;
+    BusChild *kid;
+    int i, num, ret = 0;
+    int node;
+
+    _FDT(node = fdt_add_subnode(fdt, 0, "vdevice"));
+
+    _FDT(fdt_setprop_string(fdt, node, "device_type", "vdevice"));
+    _FDT(fdt_setprop_string(fdt, node, "compatible", "IBM,vdevice"));
+    _FDT(fdt_setprop_cell(fdt, node, "#address-cells", 1));
+    _FDT(fdt_setprop_cell(fdt, node, "#size-cells", 0));
+    _FDT(fdt_setprop_cell(fdt, node, "#interrupt-cells", 2));
+    _FDT(fdt_setprop(fdt, node, "interrupt-controller", NULL, 0));
+
+    /* Count qdevs on the bus list */
+    num = 0;
+    QTAILQ_FOREACH(kid, &bus->bus.children, sibling) {
+        num++;
+    }
+
+    /* Copy out into an array of pointers */
+    qdevs = g_new(DeviceState *, num);
+    num = 0;
+    QTAILQ_FOREACH(kid, &bus->bus.children, sibling) {
+        qdevs[num++] = kid->child;
+    }
+
+    /* Sort the array */
+    qsort(qdevs, num, sizeof(qdev), compare_reg);
+
+    /* Hack alert. Give the devices to libfdt in reverse order, we happen
+     * to know that will mean they are in forward order in the tree. */
+    for (i = num - 1; i >= 0; i--) {
+        SpaprVioDevice *dev = (SpaprVioDevice *)(qdevs[i]);
+        SpaprVioDeviceClass *vdc = VIO_SPAPR_DEVICE_GET_CLASS(dev);
+
+        ret = vio_make_devnode(dev, fdt);
+        if (ret < 0) {
+            error_report("Couldn't create device node /vdevice/%s@%"PRIx32,
+                         vdc->dt_name, dev->reg);
+            exit(1);
+        }
+    }
+
+    g_free(qdevs);
+}
+
+gchar *spapr_vio_stdout_path(SpaprVioBus *bus)
+{
+    SpaprVioDevice *dev;
+    char *name, *path;
+
+    dev = spapr_vty_get_default(bus);
+    if (!dev) {
+        return NULL;
+    }
+
+    name = spapr_vio_get_dev_name(DEVICE(dev));
+    path = g_strdup_printf("/vdevice/%s", name);
+
+    g_free(name);
+    return path;
+}
diff --git a/hw/ppc/spapr_vof.c b/hw/ppc/spapr_vof.c
new file mode 100644
index 000000000..40ce8fe00
--- /dev/null
+++ b/hw/ppc/spapr_vof.c
@@ -0,0 +1,167 @@
+/*
+ * SPAPR machine hooks to Virtual Open Firmware,
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qapi/error.h"
+#include "hw/ppc/spapr.h"
+#include "hw/ppc/spapr_vio.h"
+#include "hw/ppc/spapr_cpu_core.h"
+#include "hw/ppc/fdt.h"
+#include "hw/ppc/vof.h"
+#include "sysemu/sysemu.h"
+#include "qom/qom-qobject.h"
+#include "trace.h"
+
+target_ulong spapr_h_vof_client(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                                target_ulong opcode, target_ulong *_args)
+{
+    int ret = vof_client_call(MACHINE(spapr), spapr->vof, spapr->fdt_blob,
+                              ppc64_phys_to_real(_args[0]));
+
+    if (ret) {
+        return H_PARAMETER;
+    }
+    return H_SUCCESS;
+}
+
+void spapr_vof_client_dt_finalize(SpaprMachineState *spapr, void *fdt)
+{
+    char *stdout_path = spapr_vio_stdout_path(spapr->vio_bus);
+
+    vof_build_dt(fdt, spapr->vof);
+
+    if (spapr->vof->bootargs) {
+        int chosen;
+
+        _FDT(chosen = fdt_path_offset(fdt, "/chosen"));
+        /*
+         * If the client did not change "bootargs", spapr_dt_chosen() must have
+         * stored machine->kernel_cmdline in it before getting here.
+         */
+        _FDT(fdt_setprop_string(fdt, chosen, "bootargs", spapr->vof->bootargs));
+    }
+
+    /*
+     * SLOF-less setup requires an open instance of stdout for early
+     * kernel printk. By now all phandles are settled so we can open
+     * the default serial console.
+     */
+    if (stdout_path) {
+        _FDT(vof_client_open_store(fdt, spapr->vof, "/chosen", "stdout",
+                                   stdout_path));
+    }
+}
+
+void spapr_vof_reset(SpaprMachineState *spapr, void *fdt, Error **errp)
+{
+    target_ulong stack_ptr;
+    Vof *vof = spapr->vof;
+    PowerPCCPU *first_ppc_cpu = POWERPC_CPU(first_cpu);
+
+    vof_init(vof, spapr->rma_size, errp);
+
+    stack_ptr = vof_claim(vof, 0, VOF_STACK_SIZE, VOF_STACK_SIZE);
+    if (stack_ptr == -1) {
+        error_setg(errp, "Memory allocation for stack failed");
+        return;
+    }
+    /* Stack grows downwards plus reserve space for the minimum stack frame */
+    stack_ptr += VOF_STACK_SIZE - 0x20;
+
+    if (spapr->kernel_size &&
+        vof_claim(vof, spapr->kernel_addr, spapr->kernel_size, 0) == -1) {
+        error_setg(errp, "Memory for kernel is in use");
+        return;
+    }
+
+    if (spapr->initrd_size &&
+        vof_claim(vof, spapr->initrd_base, spapr->initrd_size, 0) == -1) {
+        error_setg(errp, "Memory for initramdisk is in use");
+        return;
+    }
+
+    spapr_vof_client_dt_finalize(spapr, fdt);
+
+    spapr_cpu_set_entry_state(first_ppc_cpu, SPAPR_ENTRY_POINT,
+                              stack_ptr, spapr->initrd_base,
+                              spapr->initrd_size);
+    /* VOF is 32bit BE so enforce MSR here */
+    first_ppc_cpu->env.msr &= ~((1ULL << MSR_SF) | (1ULL << MSR_LE));
+
+    /*
+     * At this point the expected allocation map is:
+     *
+     * 0..c38 - the initial firmware
+     * 8000..10000 - stack
+     * 400000.. - kernel
+     * 3ea0000.. - initramdisk
+     *
+     * We skip writing FDT as nothing expects it; OF client interface is
+     * going to be used for reading the device tree.
+     */
+}
+
+void spapr_vof_quiesce(MachineState *ms)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(ms);
+
+    spapr->fdt_size = fdt_totalsize(spapr->fdt_blob);
+    spapr->fdt_initial_size = spapr->fdt_size;
+}
+
+bool spapr_vof_setprop(MachineState *ms, const char *path, const char *propname,
+                       void *val, int vallen)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(ms);
+
+    /*
+     * We only allow changing properties which we know how to update in QEMU
+     * OR
+     * the ones which we know that they need to survive during "quiesce".
+     */
+
+    if (strcmp(path, "/rtas") == 0) {
+        if (strcmp(propname, "linux,rtas-base") == 0 ||
+            strcmp(propname, "linux,rtas-entry") == 0) {
+            /* These need to survive quiesce so let them store in the FDT */
+            return true;
+        }
+    }
+
+    if (strcmp(path, "/chosen") == 0) {
+        if (strcmp(propname, "bootargs") == 0) {
+            Vof *vof = spapr->vof;
+
+            g_free(vof->bootargs);
+            vof->bootargs = g_strndup(val, vallen);
+            return true;
+        }
+        if (strcmp(propname, "linux,initrd-start") == 0) {
+            if (vallen == sizeof(uint32_t)) {
+                spapr->initrd_base = ldl_be_p(val);
+                return true;
+            }
+            if (vallen == sizeof(uint64_t)) {
+                spapr->initrd_base = ldq_be_p(val);
+                return true;
+            }
+            return false;
+        }
+        if (strcmp(propname, "linux,initrd-end") == 0) {
+            if (vallen == sizeof(uint32_t)) {
+                spapr->initrd_size = ldl_be_p(val) - spapr->initrd_base;
+                return true;
+            }
+            if (vallen == sizeof(uint64_t)) {
+                spapr->initrd_size = ldq_be_p(val) - spapr->initrd_base;
+                return true;
+            }
+            return false;
+        }
+    }
+
+    return true;
+}
diff --git a/hw/ppc/trace-events b/hw/ppc/trace-events
new file mode 100644
index 000000000..3bf43fa34
--- /dev/null
+++ b/hw/ppc/trace-events
@@ -0,0 +1,143 @@
+# See docs/devel/tracing.rst for syntax documentation.
+
+# spapr_pci.c
+spapr_pci_msi(const char *msg, uint32_t ca) "%s (cfg=0x%x)"
+spapr_pci_msi_setup(const char *name, unsigned vector, uint64_t addr) "dev\"%s\" vector %u, addr=0x%"PRIx64
+spapr_pci_rtas_ibm_change_msi(unsigned cfg, unsigned func, unsigned req, unsigned first) "cfgaddr 0x%x func %u, requested %u, first irq %u"
+spapr_pci_rtas_ibm_query_interrupt_source_number(unsigned ioa, unsigned intr) "queries for #%u, IRQ%u"
+spapr_pci_msi_write(uint64_t addr, uint64_t data, uint32_t dt_irq) "@0x%"PRIx64"<=0x%"PRIx64" IRQ %u"
+spapr_pci_lsi_set(const char *busname, int pin, uint32_t irq) "%s PIN%d IRQ %u"
+spapr_pci_msi_retry(unsigned config_addr, unsigned req_num, unsigned max_irqs) "Guest device at 0x%x asked %u, have only %u"
+
+# spapr_hcall.c
+spapr_cas_continue(unsigned long n) "Copy changes to the guest: %ld bytes"
+spapr_cas_pvr(uint32_t cur_pvr, bool explicit_match, uint32_t new_pvr) "current=0x%x, explicit_match=%u, new=0x%x"
+spapr_h_resize_hpt_prepare(uint64_t flags, uint64_t shift) "flags=0x%"PRIx64", shift=%"PRIu64
+spapr_h_resize_hpt_commit(uint64_t flags, uint64_t shift) "flags=0x%"PRIx64", shift=%"PRIu64
+spapr_update_dt(unsigned cb) "New blob %u bytes"
+spapr_update_dt_failed_size(unsigned cbold, unsigned cbnew, unsigned magic) "Old blob %u bytes, new blob %u bytes, magic 0x%x"
+spapr_update_dt_failed_check(unsigned cbold, unsigned cbnew, unsigned magic) "Old blob %u bytes, new blob %u bytes, magic 0x%x"
+
+# spapr_tpm_proxy.c
+spapr_h_tpm_comm(const char *device_path, uint64_t operation) "tpm_device_path=%s operation=0x%"PRIx64
+spapr_tpm_execute(uint64_t data_in, uint64_t data_in_sz, uint64_t data_out, uint64_t data_out_sz) "data_in=0x%"PRIx64", data_in_sz=%"PRIu64", data_out=0x%"PRIx64", data_out_sz=%"PRIu64
+
+# spapr_iommu.c
+spapr_iommu_put(uint64_t liobn, uint64_t ioba, uint64_t tce, uint64_t ret) "liobn=0x%"PRIx64" ioba=0x%"PRIx64" tce=0x%"PRIx64" ret=%"PRId64
+spapr_iommu_get(uint64_t liobn, uint64_t ioba, uint64_t ret, uint64_t tce) "liobn=0x%"PRIx64" ioba=0x%"PRIx64" ret=%"PRId64" tce=0x%"PRIx64
+spapr_iommu_indirect(uint64_t liobn, uint64_t ioba, uint64_t tce, uint64_t iobaN, uint64_t tceN, uint64_t ret) "liobn=0x%"PRIx64" ioba=0x%"PRIx64" tcelist=0x%"PRIx64" iobaN=0x%"PRIx64" tceN=0x%"PRIx64" ret=%"PRId64
+spapr_iommu_stuff(uint64_t liobn, uint64_t ioba, uint64_t tce_value, uint64_t npages, uint64_t ret) "liobn=0x%"PRIx64" ioba=0x%"PRIx64" tcevalue=0x%"PRIx64" npages=%"PRId64" ret=%"PRId64
+spapr_iommu_pci_put(uint64_t liobn, uint64_t ioba, uint64_t tce, uint64_t ret) "liobn=0x%"PRIx64" ioba=0x%"PRIx64" tce=0x%"PRIx64" ret=%"PRId64
+spapr_iommu_pci_get(uint64_t liobn, uint64_t ioba, uint64_t ret, uint64_t tce) "liobn=0x%"PRIx64" ioba=0x%"PRIx64" ret=%"PRId64" tce=0x%"PRIx64
+spapr_iommu_pci_indirect(uint64_t liobn, uint64_t ioba, uint64_t tce, uint64_t iobaN, uint64_t tceN, uint64_t ret) "liobn=0x%"PRIx64" ioba=0x%"PRIx64" tcelist=0x%"PRIx64" iobaN=0x%"PRIx64" tceN=0x%"PRIx64" ret=%"PRId64
+spapr_iommu_pci_stuff(uint64_t liobn, uint64_t ioba, uint64_t tce_value, uint64_t npages, uint64_t ret) "liobn=0x%"PRIx64" ioba=0x%"PRIx64" tcevalue=0x%"PRIx64" npages=%"PRId64" ret=%"PRId64
+spapr_iommu_xlate(uint64_t liobn, uint64_t ioba, uint64_t tce, unsigned perm, unsigned pgsize) "liobn=0x%"PRIx64" 0x%"PRIx64" -> 0x%"PRIx64" perm=%u mask=0x%x"
+spapr_iommu_new_table(uint64_t liobn, void *table, int fd) "liobn=0x%"PRIx64" table=%p fd=%d"
+spapr_iommu_pre_save(uint64_t liobn, uint32_t nb, uint64_t offs, uint32_t ps) "liobn=%"PRIx64" %"PRIx32" bus_offset=0x%"PRIx64" ps=%"PRIu32
+spapr_iommu_post_load(uint64_t liobn, uint32_t pre_nb, uint32_t post_nb, uint64_t offs, uint32_t ps) "liobn=%"PRIx64" %"PRIx32" => 0x%"PRIx32" bus_offset=0x%"PRIx64" ps=%"PRIu32
+
+# spapr_rtas_ddw.c
+spapr_iommu_ddw_query(uint64_t buid, uint32_t cfgaddr, unsigned wa, uint64_t win_size, uint32_t pgmask) "buid=0x%"PRIx64" addr=0x%"PRIx32", %u windows available, max window size=0x%"PRIx64", mask=0x%"PRIx32
+spapr_iommu_ddw_create(uint64_t buid, uint32_t cfgaddr, uint64_t pg_size, uint64_t req_size, uint64_t start, uint32_t liobn) "buid=0x%"PRIx64" addr=0x%"PRIx32", page size=0x%"PRIx64", requested=0x%"PRIx64", start addr=0x%"PRIx64", liobn=0x%"PRIx32
+spapr_iommu_ddw_remove(uint32_t liobn) "liobn=0x%"PRIx32
+spapr_iommu_ddw_reset(uint64_t buid, uint32_t cfgaddr) "buid=0x%"PRIx64" addr=0x%"PRIx32
+
+# spapr_drc.c
+spapr_drc_set_isolation_state(uint32_t index, int state) "drc: 0x%"PRIx32", state: 0x%"PRIx32
+spapr_drc_set_isolation_state_finalizing(uint32_t index) "drc: 0x%"PRIx32
+spapr_drc_set_dr_indicator(uint32_t index, int state) "drc: 0x%"PRIx32", state: 0x%x"
+spapr_drc_set_allocation_state(uint32_t index, int state) "drc: 0x%"PRIx32", state: 0x%x"
+spapr_drc_set_allocation_state_finalizing(uint32_t index) "drc: 0x%"PRIx32
+spapr_drc_set_configured(uint32_t index) "drc: 0x%"PRIx32
+spapr_drc_attach(uint32_t index) "drc: 0x%"PRIx32
+spapr_drc_unplug_request(uint32_t index) "drc: 0x%"PRIx32
+spapr_drc_awaiting_quiesce(uint32_t index) "drc: 0x%"PRIx32
+spapr_drc_reset(uint32_t index) "drc: 0x%"PRIx32
+spapr_drc_realize(uint32_t index) "drc: 0x%"PRIx32
+spapr_drc_realize_child(uint32_t index, const char *childname) "drc: 0x%"PRIx32", child name: %s"
+spapr_drc_realize_complete(uint32_t index) "drc: 0x%"PRIx32
+spapr_drc_unrealize(uint32_t index) "drc: 0x%"PRIx32
+
+# spapr_ovec.c
+spapr_ovec_parse_vector(int vector, int byte, uint16_t vec_len, uint8_t entry) "read guest vector %2d, byte %3d / %3d: 0x%.2x"
+spapr_ovec_populate_dt(int byte, uint16_t vec_len, uint8_t entry) "encoding guest vector byte %3d / %3d: 0x%.2x"
+
+# spapr_drc.c
+spapr_rtas_get_sensor_state_not_supported(uint32_t index, uint32_t type) "sensor index: 0x%"PRIx32", type: %"PRIu32
+spapr_rtas_get_sensor_state_invalid(uint32_t index) "sensor index: 0x%"PRIx32
+spapr_rtas_ibm_configure_connector_invalid(uint32_t index) "DRC index: 0x%"PRIx32
+
+# spapr_vio.c
+spapr_vio_h_reg_crq(uint64_t reg, uint64_t queue_addr, uint64_t queue_len) "CRQ for dev 0x%" PRIx64 " registered at 0x%" PRIx64 "/0x%" PRIx64
+spapr_vio_free_crq(uint32_t reg) "CRQ for dev 0x%" PRIx32 " freed"
+
+# vof.c
+vof_error_str_truncated(const char *s, int len) "%s truncated to %d"
+vof_error_param(const char *method, int nargscheck, int nretcheck, int nargs, int nret) "%s takes/returns %d/%d, not %d/%d"
+vof_error_unknown_service(const char *service, int nargs, int nret) "\"%s\" args=%d rets=%d"
+vof_error_unknown_method(const char *method) "\"%s\""
+vof_error_unknown_ihandle_close(uint32_t ih) "ih=0x%x"
+vof_error_unknown_path(const char *path) "\"%s\""
+vof_error_write(uint32_t ih) "ih=0x%x"
+vof_finddevice(const char *path, uint32_t ph) "\"%s\" => ph=0x%x"
+vof_claim(uint32_t virt, uint32_t size, uint32_t align, uint32_t ret) "virt=0x%x size=0x%x align=0x%x => 0x%x"
+vof_release(uint32_t virt, uint32_t size, uint32_t ret) "virt=0x%x size=0x%x => 0x%x"
+vof_method(uint32_t ihandle, const char *method, uint32_t param, uint32_t ret, uint32_t ret2) "ih=0x%x \"%s\"(0x%x) => 0x%x 0x%x"
+vof_getprop(uint32_t ph, const char *prop, uint32_t ret, const char *val) "ph=0x%x \"%s\" => len=%d [%s]"
+vof_getproplen(uint32_t ph, const char *prop, uint32_t ret) "ph=0x%x \"%s\" => len=%d"
+vof_setprop(uint32_t ph, const char *prop, const char *val, uint32_t vallen, uint32_t ret) "ph=0x%x \"%s\" [%s] len=%d => ret=%d"
+vof_open(const char *path, uint32_t ph, uint32_t ih) "%s ph=0x%x => ih=0x%x"
+vof_interpret(const char *cmd, uint32_t param1, uint32_t param2, uint32_t ret, uint32_t ret2) "[%s] 0x%x 0x%x => 0x%x 0x%x"
+vof_package_to_path(uint32_t ph, const char *tmp, int ret) "ph=0x%x => %s len=%d"
+vof_instance_to_path(uint32_t ih, uint32_t ph, const char *tmp, int ret) "ih=0x%x ph=0x%x => %s len=%d"
+vof_instance_to_package(uint32_t ih, uint32_t ph) "ih=0x%x => ph=0x%x"
+vof_write(uint32_t ih, unsigned cb, const char *msg) "ih=0x%x [%u] \"%s\""
+vof_avail(uint64_t start, uint64_t end, uint64_t size) "0x%"PRIx64"..0x%"PRIx64" size=0x%"PRIx64
+vof_claimed(uint64_t start, uint64_t end, uint64_t size) "0x%"PRIx64"..0x%"PRIx64" size=0x%"PRIx64
+
+# ppc.c
+ppc_tb_adjust(uint64_t offs1, uint64_t offs2, int64_t diff, int64_t seconds) "adjusted from 0x%"PRIx64" to 0x%"PRIx64", diff %"PRId64" (%"PRId64"s)"
+ppc_tb_load(uint64_t tb) "tb 0x%016" PRIx64
+ppc_tb_store(uint64_t tb, uint64_t offset) "tb 0x%016" PRIx64 " offset 0x%08" PRIx64
+
+ppc_decr_load(uint64_t tb) "decr 0x%016" PRIx64
+ppc_decr_excp(const char *action) "%s decrementer"
+ppc_decr_store(uint32_t nr_bits, uint64_t decr, uint64_t value) "%d-bit 0x%016" PRIx64 " => 0x%016" PRIx64
+
+ppc4xx_fit(uint32_t ir, uint64_t tcr, uint64_t tsr) "ir %d TCR 0x%" PRIx64 " TSR 0x%" PRIx64
+ppc4xx_pit_stop(void) ""
+ppc4xx_pit_start(uint64_t reload) "PIT 0x%016" PRIx64
+ppc4xx_pit(uint32_t ar, uint32_t ir, uint64_t tcr, uint64_t tsr, uint64_t reload) "ar %d ir %d TCR 0x%" PRIx64 " TSR 0x%" PRIx64 " PIT 0x%016" PRIx64
+ppc4xx_wdt(uint64_t tcr, uint64_t tsr) "TCR 0x%" PRIx64 " TSR 0x%" PRIx64
+ppc40x_store_pit(uint64_t value) "val 0x%" PRIx64
+ppc40x_set_tb_clk(uint32_t value) "new frequency %" PRIu32
+ppc40x_timers_init(uint32_t value) "frequency %" PRIu32
+
+ppc_irq_set(void *env, uint32_t pin, uint32_t level) "env [%p] pin %d level %d"
+ppc_irq_set_exit(void *env, uint32_t n_IRQ, uint32_t level, uint32_t pending, uint32_t request) "env [%p] n_IRQ %d level %d => pending 0x%08" PRIx32 " req 0x%08" PRIx32
+ppc_irq_set_state(const char *name, uint32_t level) "\"%s\" level %d"
+ppc_irq_reset(const char *name) "%s"
+ppc_irq_cpu(const char *action) "%s"
+
+# prep_systemio.c
+prep_systemio_read(uint32_t addr, uint32_t val) "read addr=0x%x val=0x%x"
+prep_systemio_write(uint32_t addr, uint32_t val) "write addr=0x%x val=0x%x"
+
+# rs6000_mc.c
+rs6000mc_id_read(uint32_t addr, uint32_t val) "read addr=0x%x val=0x%x"
+rs6000mc_presence_read(uint32_t addr, uint32_t val) "read addr=0x%x val=0x%x"
+rs6000mc_size_read(uint32_t addr, uint32_t val) "read addr=0x%x val=0x%x"
+rs6000mc_size_write(uint32_t addr, uint32_t val) "write addr=0x%x val=0x%x"
+rs6000mc_parity_read(uint32_t addr, uint32_t val) "read addr=0x%x val=0x%x"
+
+# ppc4xx_pci.c
+ppc4xx_pci_map_irq(int32_t devfn, int irq_num, int slot) "devfn 0x%x irq %d -> %d"
+ppc4xx_pci_set_irq(int irq_num) "PCI irq %d"
+
+# ppc440_pcix.c
+ppc440_pcix_map_irq(int32_t devfn, int irq_num, int slot) "devfn 0x%x irq %d -> %d"
+ppc440_pcix_set_irq(int irq_num) "PCI irq %d"
+ppc440_pcix_update_pim(int idx, uint64_t size, uint64_t la) "Added window %d of size=0x%" PRIx64 " to CPU=0x%" PRIx64
+ppc440_pcix_update_pom(int idx, uint32_t size, uint64_t la, uint64_t pcia) "Added window %d of size=0x%x from CPU=0x%" PRIx64 " to PCI=0x%" PRIx64
+ppc440_pcix_reg_read(uint64_t addr, uint32_t val) "addr 0x%" PRIx64 " = 0x%" PRIx32
+ppc440_pcix_reg_write(uint64_t addr, uint32_t val, uint32_t size) "addr 0x%" PRIx64 " = 0x%" PRIx32 " size 0x%" PRIx32
diff --git a/hw/ppc/trace.h b/hw/ppc/trace.h
new file mode 100644
index 000000000..87c4198e6
--- /dev/null
+++ b/hw/ppc/trace.h
@@ -0,0 +1 @@
+#include "trace/trace-hw_ppc.h"
diff --git a/hw/ppc/virtex_ml507.c b/hw/ppc/virtex_ml507.c
new file mode 100644
index 000000000..9c575403b
--- /dev/null
+++ b/hw/ppc/virtex_ml507.c
@@ -0,0 +1,316 @@
+/*
+ * Model of Xilinx Virtex5 ML507 PPC-440 refdesign.
+ *
+ * Copyright (c) 2010 Edgar E. Iglesias.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/datadir.h"
+#include "qemu/units.h"
+#include "cpu.h"
+#include "hw/sysbus.h"
+#include "hw/char/serial.h"
+#include "hw/block/flash.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/reset.h"
+#include "hw/boards.h"
+#include "sysemu/device_tree.h"
+#include "hw/loader.h"
+#include "elf.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "qemu/option.h"
+
+#include "hw/intc/ppc-uic.h"
+#include "hw/ppc/ppc.h"
+#include "hw/ppc/ppc4xx.h"
+#include "hw/qdev-properties.h"
+#include "ppc405.h"
+
+#define EPAPR_MAGIC    (0x45504150)
+#define FLASH_SIZE     (16 * MiB)
+
+#define INTC_BASEADDR       0x81800000
+#define UART16550_BASEADDR  0x83e01003
+#define TIMER_BASEADDR      0x83c00000
+#define PFLASH_BASEADDR     0xfc000000
+
+#define TIMER_IRQ           3
+#define UART16550_IRQ       9
+
+static struct boot_info
+{
+    uint32_t bootstrap_pc;
+    uint32_t cmdline;
+    uint32_t fdt;
+    uint32_t ima_size;
+    void *vfdt;
+} boot_info;
+
+/* Create reset TLB entries for BookE, spanning the 32bit addr space.  */
+static void mmubooke_create_initial_mapping(CPUPPCState *env,
+                                     target_ulong va,
+                                     hwaddr pa)
+{
+    ppcemb_tlb_t *tlb = &env->tlb.tlbe[0];
+
+    tlb->attr = 0;
+    tlb->prot = PAGE_VALID | ((PAGE_READ | PAGE_WRITE | PAGE_EXEC) << 4);
+    tlb->size = 1U << 31; /* up to 0x80000000  */
+    tlb->EPN = va & TARGET_PAGE_MASK;
+    tlb->RPN = pa & TARGET_PAGE_MASK;
+    tlb->PID = 0;
+
+    tlb = &env->tlb.tlbe[1];
+    tlb->attr = 0;
+    tlb->prot = PAGE_VALID | ((PAGE_READ | PAGE_WRITE | PAGE_EXEC) << 4);
+    tlb->size = 1U << 31; /* up to 0xffffffff  */
+    tlb->EPN = 0x80000000 & TARGET_PAGE_MASK;
+    tlb->RPN = 0x80000000 & TARGET_PAGE_MASK;
+    tlb->PID = 0;
+}
+
+static PowerPCCPU *ppc440_init_xilinx(const char *cpu_type, uint32_t sysclk)
+{
+    PowerPCCPU *cpu;
+    CPUPPCState *env;
+    DeviceState *uicdev;
+    SysBusDevice *uicsbd;
+
+    cpu = POWERPC_CPU(cpu_create(cpu_type));
+    env = &cpu->env;
+
+    ppc_booke_timers_init(cpu, sysclk, 0/* no flags */);
+
+    ppc_dcr_init(env, NULL, NULL);
+
+    /* interrupt controller */
+    uicdev = qdev_new(TYPE_PPC_UIC);
+    uicsbd = SYS_BUS_DEVICE(uicdev);
+
+    object_property_set_link(OBJECT(uicdev), "cpu", OBJECT(cpu),
+                             &error_fatal);
+    sysbus_realize_and_unref(uicsbd, &error_fatal);
+
+    sysbus_connect_irq(uicsbd, PPCUIC_OUTPUT_INT,
+                       ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_INT]);
+    sysbus_connect_irq(uicsbd, PPCUIC_OUTPUT_CINT,
+                       ((qemu_irq *)env->irq_inputs)[PPC40x_INPUT_CINT]);
+
+    /* This board doesn't wire anything up to the inputs of the UIC. */
+    return cpu;
+}
+
+static void main_cpu_reset(void *opaque)
+{
+    PowerPCCPU *cpu = opaque;
+    CPUPPCState *env = &cpu->env;
+    struct boot_info *bi = env->load_info;
+
+    cpu_reset(CPU(cpu));
+    /* Linux Kernel Parameters (passing device tree):
+       *   r3: pointer to the fdt
+       *   r4: 0
+       *   r5: 0
+       *   r6: epapr magic
+       *   r7: size of IMA in bytes
+       *   r8: 0
+       *   r9: 0
+    */
+    env->gpr[1] = (16 * MiB) - 8;
+    /* Provide a device-tree.  */
+    env->gpr[3] = bi->fdt;
+    env->nip = bi->bootstrap_pc;
+
+    /* Create a mapping for the kernel.  */
+    mmubooke_create_initial_mapping(env, 0, 0);
+    env->gpr[6] = tswap32(EPAPR_MAGIC);
+    env->gpr[7] = bi->ima_size;
+}
+
+#define BINARY_DEVICE_TREE_FILE "virtex-ml507.dtb"
+static int xilinx_load_device_tree(hwaddr addr,
+                                      uint32_t ramsize,
+                                      hwaddr initrd_base,
+                                      hwaddr initrd_size,
+                                      const char *kernel_cmdline)
+{
+    char *path;
+    int fdt_size;
+    void *fdt = NULL;
+    int r;
+    const char *dtb_filename;
+
+    dtb_filename = current_machine->dtb;
+    if (dtb_filename) {
+        fdt = load_device_tree(dtb_filename, &fdt_size);
+        if (!fdt) {
+            error_report("Error while loading device tree file '%s'",
+                dtb_filename);
+        }
+    } else {
+        /* Try the local "ppc.dtb" override.  */
+        fdt = load_device_tree("ppc.dtb", &fdt_size);
+        if (!fdt) {
+            path = qemu_find_file(QEMU_FILE_TYPE_BIOS, BINARY_DEVICE_TREE_FILE);
+            if (path) {
+                fdt = load_device_tree(path, &fdt_size);
+                g_free(path);
+            }
+        }
+    }
+    if (!fdt) {
+        return 0;
+    }
+
+    r = qemu_fdt_setprop_cell(fdt, "/chosen", "linux,initrd-start",
+                              initrd_base);
+    if (r < 0) {
+        error_report("couldn't set /chosen/linux,initrd-start");
+    }
+
+    r = qemu_fdt_setprop_cell(fdt, "/chosen", "linux,initrd-end",
+                              (initrd_base + initrd_size));
+    if (r < 0) {
+        error_report("couldn't set /chosen/linux,initrd-end");
+    }
+
+    r = qemu_fdt_setprop_string(fdt, "/chosen", "bootargs", kernel_cmdline);
+    if (r < 0)
+        fprintf(stderr, "couldn't set /chosen/bootargs\n");
+    cpu_physical_memory_write(addr, fdt, fdt_size);
+    g_free(fdt);
+    return fdt_size;
+}
+
+static void virtex_init(MachineState *machine)
+{
+    const char *kernel_filename = machine->kernel_filename;
+    const char *kernel_cmdline = machine->kernel_cmdline;
+    hwaddr initrd_base = 0;
+    int initrd_size = 0;
+    MemoryRegion *address_space_mem = get_system_memory();
+    DeviceState *dev;
+    PowerPCCPU *cpu;
+    CPUPPCState *env;
+    hwaddr ram_base = 0;
+    DriveInfo *dinfo;
+    qemu_irq irq[32], *cpu_irq;
+    int kernel_size;
+    int i;
+
+    /* init CPUs */
+    cpu = ppc440_init_xilinx(machine->cpu_type, 400000000);
+    env = &cpu->env;
+
+    if (env->mmu_model != POWERPC_MMU_BOOKE) {
+        error_report("MMU model %i not supported by this machine",
+                     env->mmu_model);
+        exit(1);
+    }
+
+    qemu_register_reset(main_cpu_reset, cpu);
+
+    memory_region_add_subregion(address_space_mem, ram_base, machine->ram);
+
+    dinfo = drive_get(IF_PFLASH, 0, 0);
+    pflash_cfi01_register(PFLASH_BASEADDR, "virtex.flash", FLASH_SIZE,
+                          dinfo ? blk_by_legacy_dinfo(dinfo) : NULL,
+                          64 * KiB, 1, 0x89, 0x18, 0x0000, 0x0, 1);
+
+    cpu_irq = (qemu_irq *) &env->irq_inputs[PPC40x_INPUT_INT];
+    dev = qdev_new("xlnx.xps-intc");
+    qdev_prop_set_uint32(dev, "kind-of-intr", 0);
+    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, INTC_BASEADDR);
+    sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, cpu_irq[0]);
+    for (i = 0; i < 32; i++) {
+        irq[i] = qdev_get_gpio_in(dev, i);
+    }
+
+    serial_mm_init(address_space_mem, UART16550_BASEADDR, 2, irq[UART16550_IRQ],
+                   115200, serial_hd(0), DEVICE_LITTLE_ENDIAN);
+
+    /* 2 timers at irq 2 @ 62 Mhz.  */
+    dev = qdev_new("xlnx.xps-timer");
+    qdev_prop_set_uint32(dev, "one-timer-only", 0);
+    qdev_prop_set_uint32(dev, "clock-frequency", 62 * 1000000);
+    sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal);
+    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, TIMER_BASEADDR);
+    sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0, irq[TIMER_IRQ]);
+
+    if (kernel_filename) {
+        uint64_t entry, high;
+        hwaddr boot_offset;
+
+        /* Boots a kernel elf binary.  */
+        kernel_size = load_elf(kernel_filename, NULL, NULL, NULL,
+                               &entry, NULL, &high, NULL, 1, PPC_ELF_MACHINE,
+                               0, 0);
+        boot_info.bootstrap_pc = entry & 0x00ffffff;
+
+        if (kernel_size < 0) {
+            boot_offset = 0x1200000;
+            /* If we failed loading ELF's try a raw image.  */
+            kernel_size = load_image_targphys(kernel_filename,
+                                              boot_offset,
+                                              machine->ram_size);
+            boot_info.bootstrap_pc = boot_offset;
+            high = boot_info.bootstrap_pc + kernel_size + 8192;
+        }
+
+        boot_info.ima_size = kernel_size;
+
+        /* Load initrd. */
+        if (machine->initrd_filename) {
+            initrd_base = high = ROUND_UP(high, 4);
+            initrd_size = load_image_targphys(machine->initrd_filename,
+                                              high, machine->ram_size - high);
+
+            if (initrd_size < 0) {
+                error_report("couldn't load ram disk '%s'",
+                             machine->initrd_filename);
+                exit(1);
+            }
+            high = ROUND_UP(high + initrd_size, 4);
+        }
+
+        /* Provide a device-tree.  */
+        boot_info.fdt = high + (8192 * 2);
+        boot_info.fdt &= ~8191;
+
+        xilinx_load_device_tree(boot_info.fdt, machine->ram_size,
+                                initrd_base, initrd_size,
+                                kernel_cmdline);
+    }
+    env->load_info = &boot_info;
+}
+
+static void virtex_machine_init(MachineClass *mc)
+{
+    mc->desc = "Xilinx Virtex ML507 reference design";
+    mc->init = virtex_init;
+    mc->default_cpu_type = POWERPC_CPU_TYPE_NAME("440-xilinx");
+    mc->default_ram_id = "ram";
+}
+
+DEFINE_MACHINE("virtex-ml507", virtex_machine_init)
diff --git a/hw/ppc/vof.c b/hw/ppc/vof.c
new file mode 100644
index 000000000..73adc44ec
--- /dev/null
+++ b/hw/ppc/vof.c
@@ -0,0 +1,1062 @@
+/*
+ * QEMU PowerPC Virtual Open Firmware.
+ *
+ * This implements client interface from OpenFirmware IEEE1275 on the QEMU
+ * side to leave only a very basic firmware in the VM.
+ *
+ * Copyright (c) 2021 IBM Corporation.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/timer.h"
+#include "qemu/range.h"
+#include "qemu/units.h"
+#include "qemu/log.h"
+#include "qapi/error.h"
+#include "exec/ram_addr.h"
+#include "exec/address-spaces.h"
+#include "hw/ppc/vof.h"
+#include "hw/ppc/fdt.h"
+#include "sysemu/runstate.h"
+#include "qom/qom-qobject.h"
+#include "trace.h"
+
+#include <libfdt.h>
+
+/*
+ * OF 1275 "nextprop" description suggests is it 32 bytes max but
+ * LoPAPR defines "ibm,query-interrupt-source-number" which is 33 chars long.
+ */
+#define OF_PROPNAME_LEN_MAX 64
+
+#define VOF_MAX_PATH        256
+#define VOF_MAX_SETPROPLEN  2048
+#define VOF_MAX_METHODLEN   256
+#define VOF_MAX_FORTHCODE   256
+#define VOF_VTY_BUF_SIZE    256
+
+typedef struct {
+    uint64_t start;
+    uint64_t size;
+} OfClaimed;
+
+typedef struct {
+    char *path; /* the path used to open the instance */
+    uint32_t phandle;
+} OfInstance;
+
+static int readstr(hwaddr pa, char *buf, int size)
+{
+    if (VOF_MEM_READ(pa, buf, size) != MEMTX_OK) {
+        return -1;
+    }
+    if (strnlen(buf, size) == size) {
+        buf[size - 1] = '\0';
+        trace_vof_error_str_truncated(buf, size);
+        return -1;
+    }
+    return 0;
+}
+
+static bool cmpservice(const char *s, unsigned nargs, unsigned nret,
+                       const char *s1, unsigned nargscheck, unsigned nretcheck)
+{
+    if (strcmp(s, s1)) {
+        return false;
+    }
+    if ((nargscheck && (nargs != nargscheck)) ||
+        (nretcheck && (nret != nretcheck))) {
+        trace_vof_error_param(s, nargscheck, nretcheck, nargs, nret);
+        return false;
+    }
+
+    return true;
+}
+
+static void prop_format(char *tval, int tlen, const void *prop, int len)
+{
+    int i;
+    const unsigned char *c;
+    char *t;
+    const char bin[] = "...";
+
+    for (i = 0, c = prop; i < len; ++i, ++c) {
+        if (*c == '\0' && i == len - 1) {
+            strncpy(tval, prop, tlen - 1);
+            return;
+        }
+        if (*c < 0x20 || *c >= 0x80) {
+            break;
+        }
+    }
+
+    for (i = 0, c = prop, t = tval; i < len; ++i, ++c) {
+        if (t >= tval + tlen - sizeof(bin) - 1 - 2 - 1) {
+            strcpy(t, bin);
+            return;
+        }
+        if (i && i % 4 == 0 && i != len - 1) {
+            strcat(t, " ");
+            ++t;
+        }
+        t += sprintf(t, "%02X", *c & 0xFF);
+    }
+}
+
+static int get_path(const void *fdt, int offset, char *buf, int len)
+{
+    int ret;
+
+    ret = fdt_get_path(fdt, offset, buf, len - 1);
+    if (ret < 0) {
+        return ret;
+    }
+
+    buf[len - 1] = '\0';
+
+    return strlen(buf) + 1;
+}
+
+static int phandle_to_path(const void *fdt, uint32_t ph, char *buf, int len)
+{
+    int ret;
+
+    ret = fdt_node_offset_by_phandle(fdt, ph);
+    if (ret < 0) {
+        return ret;
+    }
+
+    return get_path(fdt, ret, buf, len);
+}
+
+static int path_offset(const void *fdt, const char *path)
+{
+    g_autofree char *p = NULL;
+    char *at;
+
+    /*
+     * https://www.devicetree.org/open-firmware/bindings/ppc/release/ppc-2_1.html#HDR16
+     *
+     * "Conversion from numeric representation to text representation shall use
+     * the lower case forms of the hexadecimal digits in the range a..f,
+     * suppressing leading zeros".
+     */
+    p = g_strdup(path);
+    for (at = strchr(p, '@'); at && *at; ) {
+            if (*at == '/') {
+                at = strchr(at, '@');
+            } else {
+                *at = tolower(*at);
+                ++at;
+            }
+    }
+
+    return fdt_path_offset(fdt, p);
+}
+
+static uint32_t vof_finddevice(const void *fdt, uint32_t nodeaddr)
+{
+    char fullnode[VOF_MAX_PATH];
+    uint32_t ret = PROM_ERROR;
+    int offset;
+
+    if (readstr(nodeaddr, fullnode, sizeof(fullnode))) {
+        return (uint32_t) ret;
+    }
+
+    offset = path_offset(fdt, fullnode);
+    if (offset >= 0) {
+        ret = fdt_get_phandle(fdt, offset);
+    }
+    trace_vof_finddevice(fullnode, ret);
+    return ret;
+}
+
+static const void *getprop(const void *fdt, int nodeoff, const char *propname,
+                           int *proplen, bool *write0)
+{
+    const char *unit, *prop;
+    const void *ret = fdt_getprop(fdt, nodeoff, propname, proplen);
+
+    if (ret) {
+        if (write0) {
+            *write0 = false;
+        }
+        return ret;
+    }
+
+    if (strcmp(propname, "name")) {
+        return NULL;
+    }
+    /*
+     * We return a value for "name" from path if queried but property does not
+     * exist. @proplen does not include the unit part in this case.
+     */
+    prop = fdt_get_name(fdt, nodeoff, proplen);
+    if (!prop) {
+        *proplen = 0;
+        return NULL;
+    }
+
+    unit = memchr(prop, '@', *proplen);
+    if (unit) {
+        *proplen = unit - prop;
+    }
+    *proplen += 1;
+
+    /*
+     * Since it might be cut at "@" and there will be no trailing zero
+     * in the prop buffer, tell the caller to write zero at the end.
+     */
+    if (write0) {
+        *write0 = true;
+    }
+    return prop;
+}
+
+static uint32_t vof_getprop(const void *fdt, uint32_t nodeph, uint32_t pname,
+                            uint32_t valaddr, uint32_t vallen)
+{
+    char propname[OF_PROPNAME_LEN_MAX + 1];
+    uint32_t ret = 0;
+    int proplen = 0;
+    const void *prop;
+    char trval[64] = "";
+    int nodeoff = fdt_node_offset_by_phandle(fdt, nodeph);
+    bool write0;
+
+    if (nodeoff < 0) {
+        return PROM_ERROR;
+    }
+    if (readstr(pname, propname, sizeof(propname))) {
+        return PROM_ERROR;
+    }
+    prop = getprop(fdt, nodeoff, propname, &proplen, &write0);
+    if (prop) {
+        const char zero = 0;
+        int cb = MIN(proplen, vallen);
+
+        if (VOF_MEM_WRITE(valaddr, prop, cb) != MEMTX_OK ||
+            /* if that was "name" with a unit address, overwrite '@' with '0' */
+            (write0 &&
+             cb == proplen &&
+             VOF_MEM_WRITE(valaddr + cb - 1, &zero, 1) != MEMTX_OK)) {
+            ret = PROM_ERROR;
+        } else {
+            /*
+             * OF1275 says:
+             * "Size is either the actual size of the property, or -1 if name
+             * does not exist", hence returning proplen instead of cb.
+             */
+            ret = proplen;
+            /* Do not format a value if tracepoint is silent, for performance */
+            if (trace_event_get_state(TRACE_VOF_GETPROP) &&
+                qemu_loglevel_mask(LOG_TRACE)) {
+                prop_format(trval, sizeof(trval), prop, ret);
+            }
+        }
+    } else {
+        ret = PROM_ERROR;
+    }
+    trace_vof_getprop(nodeph, propname, ret, trval);
+
+    return ret;
+}
+
+static uint32_t vof_getproplen(const void *fdt, uint32_t nodeph, uint32_t pname)
+{
+    char propname[OF_PROPNAME_LEN_MAX + 1];
+    uint32_t ret = 0;
+    int proplen = 0;
+    const void *prop;
+    int nodeoff = fdt_node_offset_by_phandle(fdt, nodeph);
+
+    if (nodeoff < 0) {
+        return PROM_ERROR;
+    }
+    if (readstr(pname, propname, sizeof(propname))) {
+        return PROM_ERROR;
+    }
+    prop = getprop(fdt, nodeoff, propname, &proplen, NULL);
+    if (prop) {
+        ret = proplen;
+    } else {
+        ret = PROM_ERROR;
+    }
+    trace_vof_getproplen(nodeph, propname, ret);
+
+    return ret;
+}
+
+static uint32_t vof_setprop(MachineState *ms, void *fdt, Vof *vof,
+                            uint32_t nodeph, uint32_t pname,
+                            uint32_t valaddr, uint32_t vallen)
+{
+    char propname[OF_PROPNAME_LEN_MAX + 1];
+    uint32_t ret = PROM_ERROR;
+    int offset, rc;
+    char trval[64] = "";
+    char nodepath[VOF_MAX_PATH] = "";
+    Object *vmo = object_dynamic_cast(OBJECT(ms), TYPE_VOF_MACHINE_IF);
+    VofMachineIfClass *vmc;
+    g_autofree char *val = NULL;
+
+    if (vallen > VOF_MAX_SETPROPLEN) {
+        goto trace_exit;
+    }
+    if (readstr(pname, propname, sizeof(propname))) {
+        goto trace_exit;
+    }
+    offset = fdt_node_offset_by_phandle(fdt, nodeph);
+    if (offset < 0) {
+        goto trace_exit;
+    }
+    rc = get_path(fdt, offset, nodepath, sizeof(nodepath));
+    if (rc <= 0) {
+        goto trace_exit;
+    }
+
+    val = g_malloc0(vallen);
+    if (VOF_MEM_READ(valaddr, val, vallen) != MEMTX_OK) {
+        goto trace_exit;
+    }
+
+    if (!vmo) {
+        goto trace_exit;
+    }
+
+    vmc = VOF_MACHINE_GET_CLASS(vmo);
+    if (!vmc->setprop || !vmc->setprop(ms, nodepath, propname, val, vallen)) {
+        goto trace_exit;
+    }
+
+    rc = fdt_setprop(fdt, offset, propname, val, vallen);
+    if (rc) {
+        goto trace_exit;
+    }
+
+    if (trace_event_get_state(TRACE_VOF_SETPROP) &&
+        qemu_loglevel_mask(LOG_TRACE)) {
+        prop_format(trval, sizeof(trval), val, vallen);
+    }
+    ret = vallen;
+
+trace_exit:
+    trace_vof_setprop(nodeph, propname, trval, vallen, ret);
+
+    return ret;
+}
+
+static uint32_t vof_nextprop(const void *fdt, uint32_t phandle,
+                             uint32_t prevaddr, uint32_t nameaddr)
+{
+    int offset, nodeoff = fdt_node_offset_by_phandle(fdt, phandle);
+    char prev[OF_PROPNAME_LEN_MAX + 1];
+    const char *tmp;
+
+    if (readstr(prevaddr, prev, sizeof(prev))) {
+        return PROM_ERROR;
+    }
+
+    fdt_for_each_property_offset(offset, fdt, nodeoff) {
+        if (!fdt_getprop_by_offset(fdt, offset, &tmp, NULL)) {
+            return 0;
+        }
+        if (prev[0] == '\0' || strcmp(prev, tmp) == 0) {
+            if (prev[0] != '\0') {
+                offset = fdt_next_property_offset(fdt, offset);
+                if (offset < 0) {
+                    return 0;
+                }
+            }
+            if (!fdt_getprop_by_offset(fdt, offset, &tmp, NULL)) {
+                return 0;
+            }
+
+            if (VOF_MEM_WRITE(nameaddr, tmp, strlen(tmp) + 1) != MEMTX_OK) {
+                return PROM_ERROR;
+            }
+            return 1;
+        }
+    }
+
+    return 0;
+}
+
+static uint32_t vof_peer(const void *fdt, uint32_t phandle)
+{
+    uint32_t ret = 0;
+    int rc;
+
+    if (phandle == 0) {
+        rc = fdt_path_offset(fdt, "/");
+    } else {
+        rc = fdt_next_subnode(fdt, fdt_node_offset_by_phandle(fdt, phandle));
+    }
+
+    if (rc >= 0) {
+        ret = fdt_get_phandle(fdt, rc);
+    }
+
+    return ret;
+}
+
+static uint32_t vof_child(const void *fdt, uint32_t phandle)
+{
+    uint32_t ret = 0;
+    int rc = fdt_first_subnode(fdt, fdt_node_offset_by_phandle(fdt, phandle));
+
+    if (rc >= 0) {
+        ret = fdt_get_phandle(fdt, rc);
+    }
+
+    return ret;
+}
+
+static uint32_t vof_parent(const void *fdt, uint32_t phandle)
+{
+    uint32_t ret = 0;
+    int rc = fdt_parent_offset(fdt, fdt_node_offset_by_phandle(fdt, phandle));
+
+    if (rc >= 0) {
+        ret = fdt_get_phandle(fdt, rc);
+    }
+
+    return ret;
+}
+
+static uint32_t vof_do_open(void *fdt, Vof *vof, int offset, const char *path)
+{
+    uint32_t ret = PROM_ERROR;
+    OfInstance *inst = NULL;
+
+    if (vof->of_instance_last == 0xFFFFFFFF) {
+        /* We do not recycle ihandles yet */
+        goto trace_exit;
+    }
+
+    inst = g_new0(OfInstance, 1);
+    inst->phandle = fdt_get_phandle(fdt, offset);
+    g_assert(inst->phandle);
+    ++vof->of_instance_last;
+
+    inst->path = g_strdup(path);
+    g_hash_table_insert(vof->of_instances,
+                        GINT_TO_POINTER(vof->of_instance_last),
+                        inst);
+    ret = vof->of_instance_last;
+
+trace_exit:
+    trace_vof_open(path, inst ? inst->phandle : 0, ret);
+
+    return ret;
+}
+
+uint32_t vof_client_open_store(void *fdt, Vof *vof, const char *nodename,
+                               const char *prop, const char *path)
+{
+    int offset, node = fdt_path_offset(fdt, nodename);
+    uint32_t inst;
+
+    offset = fdt_path_offset(fdt, path);
+    if (offset < 0) {
+        trace_vof_error_unknown_path(path);
+        return PROM_ERROR;
+    }
+
+    inst = vof_do_open(fdt, vof, offset, path);
+
+    return fdt_setprop_cell(fdt, node, prop, inst) >= 0 ? 0 : PROM_ERROR;
+}
+
+static uint32_t vof_open(void *fdt, Vof *vof, uint32_t pathaddr)
+{
+    char path[VOF_MAX_PATH];
+    int offset;
+
+    if (readstr(pathaddr, path, sizeof(path))) {
+        return PROM_ERROR;
+    }
+
+    offset = path_offset(fdt, path);
+    if (offset < 0) {
+        trace_vof_error_unknown_path(path);
+        return PROM_ERROR;
+    }
+
+    return vof_do_open(fdt, vof, offset, path);
+}
+
+static void vof_close(Vof *vof, uint32_t ihandle)
+{
+    if (!g_hash_table_remove(vof->of_instances, GINT_TO_POINTER(ihandle))) {
+        trace_vof_error_unknown_ihandle_close(ihandle);
+    }
+}
+
+static uint32_t vof_instance_to_package(Vof *vof, uint32_t ihandle)
+{
+    gpointer instp = g_hash_table_lookup(vof->of_instances,
+                                         GINT_TO_POINTER(ihandle));
+    uint32_t ret = PROM_ERROR;
+
+    if (instp) {
+        ret = ((OfInstance *)instp)->phandle;
+    }
+    trace_vof_instance_to_package(ihandle, ret);
+
+    return ret;
+}
+
+static uint32_t vof_package_to_path(const void *fdt, uint32_t phandle,
+                                    uint32_t buf, uint32_t len)
+{
+    int rc;
+    char tmp[VOF_MAX_PATH] = "";
+
+    rc = phandle_to_path(fdt, phandle, tmp, sizeof(tmp));
+    if (rc > 0) {
+        if (VOF_MEM_WRITE(buf, tmp, rc) != MEMTX_OK) {
+            rc = -1;
+        }
+    }
+
+    trace_vof_package_to_path(phandle, tmp, rc);
+
+    return rc > 0 ? (uint32_t)rc : PROM_ERROR;
+}
+
+static uint32_t vof_instance_to_path(void *fdt, Vof *vof, uint32_t ihandle,
+                                     uint32_t buf, uint32_t len)
+{
+    int rc = -1;
+    uint32_t phandle = vof_instance_to_package(vof, ihandle);
+    char tmp[VOF_MAX_PATH] = "";
+
+    if (phandle != -1) {
+        rc = phandle_to_path(fdt, phandle, tmp, sizeof(tmp));
+        if (rc > 0) {
+            if (VOF_MEM_WRITE(buf, tmp, rc) != MEMTX_OK) {
+                rc = -1;
+            }
+        }
+    }
+    trace_vof_instance_to_path(ihandle, phandle, tmp, rc);
+
+    return rc > 0 ? (uint32_t)rc : PROM_ERROR;
+}
+
+static uint32_t vof_write(Vof *vof, uint32_t ihandle, uint32_t buf,
+                          uint32_t len)
+{
+    char tmp[VOF_VTY_BUF_SIZE];
+    unsigned cb;
+    OfInstance *inst = (OfInstance *)
+        g_hash_table_lookup(vof->of_instances, GINT_TO_POINTER(ihandle));
+
+    if (!inst) {
+        trace_vof_error_write(ihandle);
+        return PROM_ERROR;
+    }
+
+    for ( ; len > 0; len -= cb) {
+        cb = MIN(len, sizeof(tmp) - 1);
+        if (VOF_MEM_READ(buf, tmp, cb) != MEMTX_OK) {
+            return PROM_ERROR;
+        }
+
+        /* FIXME: there is no backend(s) yet so just call a trace */
+        if (trace_event_get_state(TRACE_VOF_WRITE) &&
+            qemu_loglevel_mask(LOG_TRACE)) {
+            tmp[cb] = '\0';
+            trace_vof_write(ihandle, cb, tmp);
+        }
+    }
+
+    return len;
+}
+
+static void vof_claimed_dump(GArray *claimed)
+{
+    int i;
+    OfClaimed c;
+
+    if (trace_event_get_state(TRACE_VOF_CLAIMED) &&
+        qemu_loglevel_mask(LOG_TRACE)) {
+
+        for (i = 0; i < claimed->len; ++i) {
+            c = g_array_index(claimed, OfClaimed, i);
+            trace_vof_claimed(c.start, c.start + c.size, c.size);
+        }
+    }
+}
+
+static bool vof_claim_avail(GArray *claimed, uint64_t virt, uint64_t size)
+{
+    int i;
+    OfClaimed c;
+
+    for (i = 0; i < claimed->len; ++i) {
+        c = g_array_index(claimed, OfClaimed, i);
+        if (ranges_overlap(c.start, c.size, virt, size)) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+static void vof_claim_add(GArray *claimed, uint64_t virt, uint64_t size)
+{
+    OfClaimed newclaim;
+
+    newclaim.start = virt;
+    newclaim.size = size;
+    g_array_append_val(claimed, newclaim);
+}
+
+static gint of_claimed_compare_func(gconstpointer a, gconstpointer b)
+{
+    return ((OfClaimed *)a)->start - ((OfClaimed *)b)->start;
+}
+
+static void vof_dt_memory_available(void *fdt, GArray *claimed, uint64_t base)
+{
+    int i, n, offset, proplen = 0, sc, ac;
+    target_ulong mem0_end;
+    const uint8_t *mem0_reg;
+    g_autofree uint8_t *avail = NULL;
+    uint8_t *availcur;
+
+    if (!fdt || !claimed) {
+        return;
+    }
+
+    offset = fdt_path_offset(fdt, "/");
+    _FDT(offset);
+    ac = fdt_address_cells(fdt, offset);
+    g_assert(ac == 1 || ac == 2);
+    sc = fdt_size_cells(fdt, offset);
+    g_assert(sc == 1 || sc == 2);
+
+    offset = fdt_path_offset(fdt, "/memory@0");
+    _FDT(offset);
+
+    mem0_reg = fdt_getprop(fdt, offset, "reg", &proplen);
+    g_assert(mem0_reg && proplen == sizeof(uint32_t) * (ac + sc));
+    if (sc == 2) {
+        mem0_end = be64_to_cpu(*(uint64_t *)(mem0_reg + sizeof(uint32_t) * ac));
+    } else {
+        mem0_end = be32_to_cpu(*(uint32_t *)(mem0_reg + sizeof(uint32_t) * ac));
+    }
+
+    g_array_sort(claimed, of_claimed_compare_func);
+    vof_claimed_dump(claimed);
+
+    /*
+     * VOF resides in the first page so we do not need to check if there is
+     * available memory before the first claimed block
+     */
+    g_assert(claimed->len && (g_array_index(claimed, OfClaimed, 0).start == 0));
+
+    avail = g_malloc0(sizeof(uint32_t) * (ac + sc) * claimed->len);
+    for (i = 0, n = 0, availcur = avail; i < claimed->len; ++i) {
+        OfClaimed c = g_array_index(claimed, OfClaimed, i);
+        uint64_t start, size;
+
+        start = c.start + c.size;
+        if (i < claimed->len - 1) {
+            OfClaimed cn = g_array_index(claimed, OfClaimed, i + 1);
+
+            size = cn.start - start;
+        } else {
+            size = mem0_end - start;
+        }
+
+        if (ac == 2) {
+            *(uint64_t *) availcur = cpu_to_be64(start);
+        } else {
+            *(uint32_t *) availcur = cpu_to_be32(start);
+        }
+        availcur += sizeof(uint32_t) * ac;
+        if (sc == 2) {
+            *(uint64_t *) availcur = cpu_to_be64(size);
+        } else {
+            *(uint32_t *) availcur = cpu_to_be32(size);
+        }
+        availcur += sizeof(uint32_t) * sc;
+
+        if (size) {
+            trace_vof_avail(c.start + c.size, c.start + c.size + size, size);
+            ++n;
+        }
+    }
+    _FDT((fdt_setprop(fdt, offset, "available", avail, availcur - avail)));
+}
+
+/*
+ * OF1275:
+ * "Allocates size bytes of memory. If align is zero, the allocated range
+ * begins at the virtual address virt. Otherwise, an aligned address is
+ * automatically chosen and the input argument virt is ignored".
+ *
+ * In other words, exactly one of @virt and @align is non-zero.
+ */
+uint64_t vof_claim(Vof *vof, uint64_t virt, uint64_t size,
+                   uint64_t align)
+{
+    uint64_t ret;
+
+    if (size == 0) {
+        ret = -1;
+    } else if (align == 0) {
+        if (!vof_claim_avail(vof->claimed, virt, size)) {
+            ret = -1;
+        } else {
+            ret = virt;
+        }
+    } else {
+        vof->claimed_base = QEMU_ALIGN_UP(vof->claimed_base, align);
+        while (1) {
+            if (vof->claimed_base >= vof->top_addr) {
+                error_report("Out of RMA memory for the OF client");
+                return -1;
+            }
+            if (vof_claim_avail(vof->claimed, vof->claimed_base, size)) {
+                break;
+            }
+            vof->claimed_base += size;
+        }
+        ret = vof->claimed_base;
+    }
+
+    if (ret != -1) {
+        vof->claimed_base = MAX(vof->claimed_base, ret + size);
+        vof_claim_add(vof->claimed, ret, size);
+    }
+    trace_vof_claim(virt, size, align, ret);
+
+    return ret;
+}
+
+static uint32_t vof_release(Vof *vof, uint64_t virt, uint64_t size)
+{
+    uint32_t ret = PROM_ERROR;
+    int i;
+    GArray *claimed = vof->claimed;
+    OfClaimed c;
+
+    for (i = 0; i < claimed->len; ++i) {
+        c = g_array_index(claimed, OfClaimed, i);
+        if (c.start == virt && c.size == size) {
+            g_array_remove_index(claimed, i);
+            ret = 0;
+            break;
+        }
+    }
+
+    trace_vof_release(virt, size, ret);
+
+    return ret;
+}
+
+static void vof_instantiate_rtas(Error **errp)
+{
+    error_setg(errp, "The firmware should have instantiated RTAS");
+}
+
+static uint32_t vof_call_method(MachineState *ms, Vof *vof, uint32_t methodaddr,
+                                uint32_t ihandle, uint32_t param1,
+                                uint32_t param2, uint32_t param3,
+                                uint32_t param4, uint32_t *ret2)
+{
+    uint32_t ret = PROM_ERROR;
+    char method[VOF_MAX_METHODLEN] = "";
+    OfInstance *inst;
+
+    if (!ihandle) {
+        goto trace_exit;
+    }
+
+    inst = (OfInstance *)g_hash_table_lookup(vof->of_instances,
+                                             GINT_TO_POINTER(ihandle));
+    if (!inst) {
+        goto trace_exit;
+    }
+
+    if (readstr(methodaddr, method, sizeof(method))) {
+        goto trace_exit;
+    }
+
+    if (strcmp(inst->path, "/") == 0) {
+        if (strcmp(method, "ibm,client-architecture-support") == 0) {
+            Object *vmo = object_dynamic_cast(OBJECT(ms), TYPE_VOF_MACHINE_IF);
+
+            if (vmo) {
+                VofMachineIfClass *vmc = VOF_MACHINE_GET_CLASS(vmo);
+
+                g_assert(vmc->client_architecture_support);
+                ret = (uint32_t)vmc->client_architecture_support(ms, first_cpu,
+                                                                 param1);
+            }
+
+            *ret2 = 0;
+        }
+    } else if (strcmp(inst->path, "/rtas") == 0) {
+        if (strcmp(method, "instantiate-rtas") == 0) {
+            vof_instantiate_rtas(&error_fatal);
+            ret = 0;
+            *ret2 = param1; /* rtas-base */
+        }
+    } else {
+        trace_vof_error_unknown_method(method);
+    }
+
+trace_exit:
+    trace_vof_method(ihandle, method, param1, ret, *ret2);
+
+    return ret;
+}
+
+static uint32_t vof_call_interpret(uint32_t cmdaddr, uint32_t param1,
+                                   uint32_t param2, uint32_t *ret2)
+{
+    uint32_t ret = PROM_ERROR;
+    char cmd[VOF_MAX_FORTHCODE] = "";
+
+    /* No interpret implemented so just call a trace */
+    readstr(cmdaddr, cmd, sizeof(cmd));
+    trace_vof_interpret(cmd, param1, param2, ret, *ret2);
+
+    return ret;
+}
+
+static void vof_quiesce(MachineState *ms, void *fdt, Vof *vof)
+{
+    Object *vmo = object_dynamic_cast(OBJECT(ms), TYPE_VOF_MACHINE_IF);
+    /* After "quiesce", no change is expected to the FDT, pack FDT to ensure */
+    int rc = fdt_pack(fdt);
+
+    assert(rc == 0);
+
+    if (vmo) {
+        VofMachineIfClass *vmc = VOF_MACHINE_GET_CLASS(vmo);
+
+        if (vmc->quiesce) {
+            vmc->quiesce(ms);
+        }
+    }
+
+    vof_claimed_dump(vof->claimed);
+}
+
+static uint32_t vof_client_handle(MachineState *ms, void *fdt, Vof *vof,
+                                  const char *service,
+                                  uint32_t *args, unsigned nargs,
+                                  uint32_t *rets, unsigned nrets)
+{
+    uint32_t ret = 0;
+
+    /* @nrets includes the value which this function returns */
+#define cmpserv(s, a, r) \
+    cmpservice(service, nargs, nrets, (s), (a), (r))
+
+    if (cmpserv("finddevice", 1, 1)) {
+        ret = vof_finddevice(fdt, args[0]);
+    } else if (cmpserv("getprop", 4, 1)) {
+        ret = vof_getprop(fdt, args[0], args[1], args[2], args[3]);
+    } else if (cmpserv("getproplen", 2, 1)) {
+        ret = vof_getproplen(fdt, args[0], args[1]);
+    } else if (cmpserv("setprop", 4, 1)) {
+        ret = vof_setprop(ms, fdt, vof, args[0], args[1], args[2], args[3]);
+    } else if (cmpserv("nextprop", 3, 1)) {
+        ret = vof_nextprop(fdt, args[0], args[1], args[2]);
+    } else if (cmpserv("peer", 1, 1)) {
+        ret = vof_peer(fdt, args[0]);
+    } else if (cmpserv("child", 1, 1)) {
+        ret = vof_child(fdt, args[0]);
+    } else if (cmpserv("parent", 1, 1)) {
+        ret = vof_parent(fdt, args[0]);
+    } else if (cmpserv("open", 1, 1)) {
+        ret = vof_open(fdt, vof, args[0]);
+    } else if (cmpserv("close", 1, 0)) {
+        vof_close(vof, args[0]);
+    } else if (cmpserv("instance-to-package", 1, 1)) {
+        ret = vof_instance_to_package(vof, args[0]);
+    } else if (cmpserv("package-to-path", 3, 1)) {
+        ret = vof_package_to_path(fdt, args[0], args[1], args[2]);
+    } else if (cmpserv("instance-to-path", 3, 1)) {
+        ret = vof_instance_to_path(fdt, vof, args[0], args[1], args[2]);
+    } else if (cmpserv("write", 3, 1)) {
+        ret = vof_write(vof, args[0], args[1], args[2]);
+    } else if (cmpserv("claim", 3, 1)) {
+        uint64_t ret64 = vof_claim(vof, args[0], args[1], args[2]);
+
+        if (ret64 < 0x100000000UL) {
+            vof_dt_memory_available(fdt, vof->claimed, vof->claimed_base);
+            ret = (uint32_t)ret64;
+        } else {
+            if (ret64 != -1) {
+                vof_release(vof, ret, args[1]);
+            }
+            ret = PROM_ERROR;
+        }
+    } else if (cmpserv("release", 2, 0)) {
+        ret = vof_release(vof, args[0], args[1]);
+        if (ret != PROM_ERROR) {
+            vof_dt_memory_available(fdt, vof->claimed, vof->claimed_base);
+        }
+    } else if (cmpserv("call-method", 0, 0)) {
+        ret = vof_call_method(ms, vof, args[0], args[1], args[2], args[3],
+                              args[4], args[5], rets);
+    } else if (cmpserv("interpret", 0, 0)) {
+        ret = vof_call_interpret(args[0], args[1], args[2], rets);
+    } else if (cmpserv("milliseconds", 0, 1)) {
+        ret = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
+    } else if (cmpserv("quiesce", 0, 0)) {
+        vof_quiesce(ms, fdt, vof);
+    } else if (cmpserv("exit", 0, 0)) {
+        error_report("Stopped as the VM requested \"exit\"");
+        vm_stop(RUN_STATE_PAUSED);
+    } else {
+        trace_vof_error_unknown_service(service, nargs, nrets);
+        ret = -1;
+    }
+
+#undef cmpserv
+
+    return ret;
+}
+
+/* Defined as Big Endian */
+struct prom_args {
+    uint32_t service;
+    uint32_t nargs;
+    uint32_t nret;
+    uint32_t args[10];
+} QEMU_PACKED;
+
+int vof_client_call(MachineState *ms, Vof *vof, void *fdt,
+                    target_ulong args_real)
+{
+    struct prom_args args_be;
+    uint32_t args[ARRAY_SIZE(args_be.args)];
+    uint32_t rets[ARRAY_SIZE(args_be.args)] = { 0 }, ret;
+    char service[64];
+    unsigned nargs, nret, i;
+
+    if (VOF_MEM_READ(args_real, &args_be, sizeof(args_be)) != MEMTX_OK) {
+        return -EINVAL;
+    }
+    nargs = be32_to_cpu(args_be.nargs);
+    if (nargs >= ARRAY_SIZE(args_be.args)) {
+        return -EINVAL;
+    }
+
+    if (VOF_MEM_READ(be32_to_cpu(args_be.service), service, sizeof(service)) !=
+        MEMTX_OK) {
+        return -EINVAL;
+    }
+    if (strnlen(service, sizeof(service)) == sizeof(service)) {
+        /* Too long service name */
+        return -EINVAL;
+    }
+
+    for (i = 0; i < nargs; ++i) {
+        args[i] = be32_to_cpu(args_be.args[i]);
+    }
+
+    nret = be32_to_cpu(args_be.nret);
+    if (nret > ARRAY_SIZE(args_be.args) - nargs) {
+        return -EINVAL;
+    }
+    ret = vof_client_handle(ms, fdt, vof, service, args, nargs, rets, nret);
+    if (!nret) {
+        return 0;
+    }
+
+    /* @nrets includes the value which this function returns */
+    args_be.args[nargs] = cpu_to_be32(ret);
+    for (i = 1; i < nret; ++i) {
+        args_be.args[nargs + i] = cpu_to_be32(rets[i - 1]);
+    }
+
+    if (VOF_MEM_WRITE(args_real + offsetof(struct prom_args, args[nargs]),
+                      args_be.args + nargs, sizeof(args_be.args[0]) * nret) !=
+        MEMTX_OK) {
+        return -EINVAL;
+    }
+
+    return 0;
+}
+
+static void vof_instance_free(gpointer data)
+{
+    OfInstance *inst = (OfInstance *)data;
+
+    g_free(inst->path);
+    g_free(inst);
+}
+
+void vof_init(Vof *vof, uint64_t top_addr, Error **errp)
+{
+    vof_cleanup(vof);
+
+    vof->of_instances = g_hash_table_new_full(g_direct_hash, g_direct_equal,
+                                              NULL, vof_instance_free);
+    vof->claimed = g_array_new(false, false, sizeof(OfClaimed));
+
+    /* Keep allocations in 32bit as CLI ABI can only return cells==32bit */
+    vof->top_addr = MIN(top_addr, 4 * GiB);
+    if (vof_claim(vof, 0, vof->fw_size, 0) == -1) {
+        error_setg(errp, "Memory for firmware is in use");
+    }
+}
+
+void vof_cleanup(Vof *vof)
+{
+    if (vof->claimed) {
+        g_array_unref(vof->claimed);
+    }
+    if (vof->of_instances) {
+        g_hash_table_unref(vof->of_instances);
+    }
+    vof->claimed = NULL;
+    vof->of_instances = NULL;
+}
+
+void vof_build_dt(void *fdt, Vof *vof)
+{
+    uint32_t phandle = fdt_get_max_phandle(fdt);
+    int offset, proplen = 0;
+    const void *prop;
+
+    /* Assign phandles to nodes without predefined phandles (like XICS/XIVE) */
+    for (offset = fdt_next_node(fdt, -1, NULL);
+         offset >= 0;
+         offset = fdt_next_node(fdt, offset, NULL)) {
+        prop = fdt_getprop(fdt, offset, "phandle", &proplen);
+        if (prop) {
+            continue;
+        }
+        ++phandle;
+        _FDT(fdt_setprop_cell(fdt, offset, "phandle", phandle));
+    }
+
+    vof_dt_memory_available(fdt, vof->claimed, vof->claimed_base);
+}
+
+static const TypeInfo vof_machine_if_info = {
+    .name = TYPE_VOF_MACHINE_IF,
+    .parent = TYPE_INTERFACE,
+    .class_size = sizeof(VofMachineIfClass),
+};
+
+static void vof_machine_if_register_types(void)
+{
+    type_register_static(&vof_machine_if_info);
+}
+type_init(vof_machine_if_register_types)
author	Timos Ampelikiotis <t.ampelikiotis@virtualopensystems.com>	2023-10-10 11:40:56 +0000
committer	Timos Ampelikiotis <t.ampelikiotis@virtualopensystems.com>	2023-10-10 11:40:56 +0000
commit	e02cda008591317b1625707ff8e115a4841aa889 (patch)
tree	aee302e3cf8b59ec2d32ec481be3d1afddfc8968 /hw/ppc
parent	cc668e6b7e0ffd8c9d130513d12053cf5eda1d3b (diff)