aboutsummaryrefslogtreecommitdiffstats
path: root/hw/intc/ioapic.c
diff options
context:
space:
mode:
Diffstat (limited to 'hw/intc/ioapic.c')
-rw-r--r--hw/intc/ioapic.c513
1 files changed, 513 insertions, 0 deletions
diff --git a/hw/intc/ioapic.c b/hw/intc/ioapic.c
new file mode 100644
index 000000000..264262959
--- /dev/null
+++ b/hw/intc/ioapic.c
@@ -0,0 +1,513 @@
+/*
+ * ioapic.c IOAPIC emulation logic
+ *
+ * Copyright (c) 2004-2005 Fabrice Bellard
+ *
+ * Split the ioapic logic from apic.c
+ * Xiantao Zhang <xiantao.zhang@intel.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "monitor/monitor.h"
+#include "hw/i386/apic.h"
+#include "hw/i386/ioapic.h"
+#include "hw/i386/ioapic_internal.h"
+#include "hw/i386/x86.h"
+#include "hw/intc/i8259.h"
+#include "hw/pci/msi.h"
+#include "hw/qdev-properties.h"
+#include "sysemu/kvm.h"
+#include "sysemu/sysemu.h"
+#include "hw/i386/apic-msidef.h"
+#include "hw/i386/x86-iommu.h"
+#include "trace.h"
+
+#define APIC_DELIVERY_MODE_SHIFT 8
+#define APIC_POLARITY_SHIFT 14
+#define APIC_TRIG_MODE_SHIFT 15
+
+static IOAPICCommonState *ioapics[MAX_IOAPICS];
+
+/* global variable from ioapic_common.c */
+extern int ioapic_no;
+
+struct ioapic_entry_info {
+ /* fields parsed from IOAPIC entries */
+ uint8_t masked;
+ uint8_t trig_mode;
+ uint16_t dest_idx;
+ uint8_t dest_mode;
+ uint8_t delivery_mode;
+ uint8_t vector;
+
+ /* MSI message generated from above parsed fields */
+ uint32_t addr;
+ uint32_t data;
+};
+
+static void ioapic_entry_parse(uint64_t entry, struct ioapic_entry_info *info)
+{
+ memset(info, 0, sizeof(*info));
+ info->masked = (entry >> IOAPIC_LVT_MASKED_SHIFT) & 1;
+ info->trig_mode = (entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1;
+ /*
+ * By default, this would be dest_id[8] + reserved[8]. When IR
+ * is enabled, this would be interrupt_index[15] +
+ * interrupt_format[1]. This field never means anything, but
+ * only used to generate corresponding MSI.
+ */
+ info->dest_idx = (entry >> IOAPIC_LVT_DEST_IDX_SHIFT) & 0xffff;
+ info->dest_mode = (entry >> IOAPIC_LVT_DEST_MODE_SHIFT) & 1;
+ info->delivery_mode = (entry >> IOAPIC_LVT_DELIV_MODE_SHIFT) \
+ & IOAPIC_DM_MASK;
+ if (info->delivery_mode == IOAPIC_DM_EXTINT) {
+ info->vector = pic_read_irq(isa_pic);
+ } else {
+ info->vector = entry & IOAPIC_VECTOR_MASK;
+ }
+
+ info->addr = APIC_DEFAULT_ADDRESS | \
+ (info->dest_idx << MSI_ADDR_DEST_IDX_SHIFT) | \
+ (info->dest_mode << MSI_ADDR_DEST_MODE_SHIFT);
+ info->data = (info->vector << MSI_DATA_VECTOR_SHIFT) | \
+ (info->trig_mode << MSI_DATA_TRIGGER_SHIFT) | \
+ (info->delivery_mode << MSI_DATA_DELIVERY_MODE_SHIFT);
+}
+
+static void ioapic_service(IOAPICCommonState *s)
+{
+ AddressSpace *ioapic_as = X86_MACHINE(qdev_get_machine())->ioapic_as;
+ struct ioapic_entry_info info;
+ uint8_t i;
+ uint32_t mask;
+ uint64_t entry;
+
+ for (i = 0; i < IOAPIC_NUM_PINS; i++) {
+ mask = 1 << i;
+ if (s->irr & mask) {
+ int coalesce = 0;
+
+ entry = s->ioredtbl[i];
+ ioapic_entry_parse(entry, &info);
+ if (!info.masked) {
+ if (info.trig_mode == IOAPIC_TRIGGER_EDGE) {
+ s->irr &= ~mask;
+ } else {
+ coalesce = s->ioredtbl[i] & IOAPIC_LVT_REMOTE_IRR;
+ trace_ioapic_set_remote_irr(i);
+ s->ioredtbl[i] |= IOAPIC_LVT_REMOTE_IRR;
+ }
+
+ if (coalesce) {
+ /* We are level triggered interrupts, and the
+ * guest should be still working on previous one,
+ * so skip it. */
+ continue;
+ }
+
+#ifdef CONFIG_KVM
+ if (kvm_irqchip_is_split()) {
+ if (info.trig_mode == IOAPIC_TRIGGER_EDGE) {
+ kvm_set_irq(kvm_state, i, 1);
+ kvm_set_irq(kvm_state, i, 0);
+ } else {
+ kvm_set_irq(kvm_state, i, 1);
+ }
+ continue;
+ }
+#endif
+
+ /* No matter whether IR is enabled, we translate
+ * the IOAPIC message into a MSI one, and its
+ * address space will decide whether we need a
+ * translation. */
+ stl_le_phys(ioapic_as, info.addr, info.data);
+ }
+ }
+ }
+}
+
+#define SUCCESSIVE_IRQ_MAX_COUNT 10000
+
+static void delayed_ioapic_service_cb(void *opaque)
+{
+ IOAPICCommonState *s = opaque;
+
+ ioapic_service(s);
+}
+
+static void ioapic_set_irq(void *opaque, int vector, int level)
+{
+ IOAPICCommonState *s = opaque;
+
+ /* ISA IRQs map to GSI 1-1 except for IRQ0 which maps
+ * to GSI 2. GSI maps to ioapic 1-1. This is not
+ * the cleanest way of doing it but it should work. */
+
+ trace_ioapic_set_irq(vector, level);
+ ioapic_stat_update_irq(s, vector, level);
+ if (vector == 0) {
+ vector = 2;
+ }
+ if (vector < IOAPIC_NUM_PINS) {
+ uint32_t mask = 1 << vector;
+ uint64_t entry = s->ioredtbl[vector];
+
+ if (((entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1) ==
+ IOAPIC_TRIGGER_LEVEL) {
+ /* level triggered */
+ if (level) {
+ s->irr |= mask;
+ if (!(entry & IOAPIC_LVT_REMOTE_IRR)) {
+ ioapic_service(s);
+ }
+ } else {
+ s->irr &= ~mask;
+ }
+ } else {
+ /* According to the 82093AA manual, we must ignore edge requests
+ * if the input pin is masked. */
+ if (level && !(entry & IOAPIC_LVT_MASKED)) {
+ s->irr |= mask;
+ ioapic_service(s);
+ }
+ }
+ }
+}
+
+static void ioapic_update_kvm_routes(IOAPICCommonState *s)
+{
+#ifdef CONFIG_KVM
+ int i;
+
+ if (kvm_irqchip_is_split()) {
+ for (i = 0; i < IOAPIC_NUM_PINS; i++) {
+ MSIMessage msg;
+ struct ioapic_entry_info info;
+ ioapic_entry_parse(s->ioredtbl[i], &info);
+ if (!info.masked) {
+ msg.address = info.addr;
+ msg.data = info.data;
+ kvm_irqchip_update_msi_route(kvm_state, i, msg, NULL);
+ }
+ }
+ kvm_irqchip_commit_routes(kvm_state);
+ }
+#endif
+}
+
+#ifdef CONFIG_KVM
+static void ioapic_iec_notifier(void *private, bool global,
+ uint32_t index, uint32_t mask)
+{
+ IOAPICCommonState *s = (IOAPICCommonState *)private;
+ /* For simplicity, we just update all the routes */
+ ioapic_update_kvm_routes(s);
+}
+#endif
+
+void ioapic_eoi_broadcast(int vector)
+{
+ IOAPICCommonState *s;
+ uint64_t entry;
+ int i, n;
+
+ trace_ioapic_eoi_broadcast(vector);
+
+ for (i = 0; i < MAX_IOAPICS; i++) {
+ s = ioapics[i];
+ if (!s) {
+ continue;
+ }
+ for (n = 0; n < IOAPIC_NUM_PINS; n++) {
+ entry = s->ioredtbl[n];
+
+ if ((entry & IOAPIC_VECTOR_MASK) != vector ||
+ ((entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1) != IOAPIC_TRIGGER_LEVEL) {
+ continue;
+ }
+
+#ifdef CONFIG_KVM
+ /*
+ * When IOAPIC is in the userspace while APIC is still in
+ * the kernel (i.e., split irqchip), we have a trick to
+ * kick the resamplefd logic for registered irqfds from
+ * userspace to deactivate the IRQ. When that happens, it
+ * means the irq bypassed userspace IOAPIC (so the irr and
+ * remote-irr of the table entry should be bypassed too
+ * even if interrupt come). Still kick the resamplefds if
+ * they're bound to the IRQ, to make sure to EOI the
+ * interrupt for the hardware correctly.
+ *
+ * Note: We still need to go through the irr & remote-irr
+ * operations below because we don't know whether there're
+ * emulated devices that are using/sharing the same IRQ.
+ */
+ kvm_resample_fd_notify(n);
+#endif
+
+ if (!(entry & IOAPIC_LVT_REMOTE_IRR)) {
+ continue;
+ }
+
+ trace_ioapic_clear_remote_irr(n, vector);
+ s->ioredtbl[n] = entry & ~IOAPIC_LVT_REMOTE_IRR;
+
+ if (!(entry & IOAPIC_LVT_MASKED) && (s->irr & (1 << n))) {
+ ++s->irq_eoi[n];
+ if (s->irq_eoi[n] >= SUCCESSIVE_IRQ_MAX_COUNT) {
+ /*
+ * Real hardware does not deliver the interrupt immediately
+ * during eoi broadcast, and this lets a buggy guest make
+ * slow progress even if it does not correctly handle a
+ * level-triggered interrupt. Emulate this behavior if we
+ * detect an interrupt storm.
+ */
+ s->irq_eoi[n] = 0;
+ timer_mod_anticipate(s->delayed_ioapic_service_timer,
+ qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
+ NANOSECONDS_PER_SECOND / 100);
+ trace_ioapic_eoi_delayed_reassert(n);
+ } else {
+ ioapic_service(s);
+ }
+ } else {
+ s->irq_eoi[n] = 0;
+ }
+ }
+ }
+}
+
+static uint64_t
+ioapic_mem_read(void *opaque, hwaddr addr, unsigned int size)
+{
+ IOAPICCommonState *s = opaque;
+ int index;
+ uint32_t val = 0;
+
+ addr &= 0xff;
+
+ switch (addr) {
+ case IOAPIC_IOREGSEL:
+ val = s->ioregsel;
+ break;
+ case IOAPIC_IOWIN:
+ if (size != 4) {
+ break;
+ }
+ switch (s->ioregsel) {
+ case IOAPIC_REG_ID:
+ case IOAPIC_REG_ARB:
+ val = s->id << IOAPIC_ID_SHIFT;
+ break;
+ case IOAPIC_REG_VER:
+ val = s->version |
+ ((IOAPIC_NUM_PINS - 1) << IOAPIC_VER_ENTRIES_SHIFT);
+ break;
+ default:
+ index = (s->ioregsel - IOAPIC_REG_REDTBL_BASE) >> 1;
+ if (index >= 0 && index < IOAPIC_NUM_PINS) {
+ if (s->ioregsel & 1) {
+ val = s->ioredtbl[index] >> 32;
+ } else {
+ val = s->ioredtbl[index] & 0xffffffff;
+ }
+ }
+ }
+ break;
+ }
+
+ trace_ioapic_mem_read(addr, s->ioregsel, size, val);
+
+ return val;
+}
+
+/*
+ * This is to satisfy the hack in Linux kernel. One hack of it is to
+ * simulate clearing the Remote IRR bit of IOAPIC entry using the
+ * following:
+ *
+ * "For IO-APIC's with EOI register, we use that to do an explicit EOI.
+ * Otherwise, we simulate the EOI message manually by changing the trigger
+ * mode to edge and then back to level, with RTE being masked during
+ * this."
+ *
+ * (See linux kernel __eoi_ioapic_pin() comment in commit c0205701)
+ *
+ * This is based on the assumption that, Remote IRR bit will be
+ * cleared by IOAPIC hardware when configured as edge-triggered
+ * interrupts.
+ *
+ * Without this, level-triggered interrupts in IR mode might fail to
+ * work correctly.
+ */
+static inline void
+ioapic_fix_edge_remote_irr(uint64_t *entry)
+{
+ if (!(*entry & IOAPIC_LVT_TRIGGER_MODE)) {
+ /* Edge-triggered interrupts, make sure remote IRR is zero */
+ *entry &= ~((uint64_t)IOAPIC_LVT_REMOTE_IRR);
+ }
+}
+
+static void
+ioapic_mem_write(void *opaque, hwaddr addr, uint64_t val,
+ unsigned int size)
+{
+ IOAPICCommonState *s = opaque;
+ int index;
+
+ addr &= 0xff;
+ trace_ioapic_mem_write(addr, s->ioregsel, size, val);
+
+ switch (addr) {
+ case IOAPIC_IOREGSEL:
+ s->ioregsel = val;
+ break;
+ case IOAPIC_IOWIN:
+ if (size != 4) {
+ break;
+ }
+ switch (s->ioregsel) {
+ case IOAPIC_REG_ID:
+ s->id = (val >> IOAPIC_ID_SHIFT) & IOAPIC_ID_MASK;
+ break;
+ case IOAPIC_REG_VER:
+ case IOAPIC_REG_ARB:
+ break;
+ default:
+ index = (s->ioregsel - IOAPIC_REG_REDTBL_BASE) >> 1;
+ if (index >= 0 && index < IOAPIC_NUM_PINS) {
+ uint64_t ro_bits = s->ioredtbl[index] & IOAPIC_RO_BITS;
+ if (s->ioregsel & 1) {
+ s->ioredtbl[index] &= 0xffffffff;
+ s->ioredtbl[index] |= (uint64_t)val << 32;
+ } else {
+ s->ioredtbl[index] &= ~0xffffffffULL;
+ s->ioredtbl[index] |= val;
+ }
+ /* restore RO bits */
+ s->ioredtbl[index] &= IOAPIC_RW_BITS;
+ s->ioredtbl[index] |= ro_bits;
+ s->irq_eoi[index] = 0;
+ ioapic_fix_edge_remote_irr(&s->ioredtbl[index]);
+ ioapic_service(s);
+ }
+ }
+ break;
+ case IOAPIC_EOI:
+ /* Explicit EOI is only supported for IOAPIC version 0x20 */
+ if (size != 4 || s->version != 0x20) {
+ break;
+ }
+ ioapic_eoi_broadcast(val);
+ break;
+ }
+
+ ioapic_update_kvm_routes(s);
+}
+
+static const MemoryRegionOps ioapic_io_ops = {
+ .read = ioapic_mem_read,
+ .write = ioapic_mem_write,
+ .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static void ioapic_machine_done_notify(Notifier *notifier, void *data)
+{
+#ifdef CONFIG_KVM
+ IOAPICCommonState *s = container_of(notifier, IOAPICCommonState,
+ machine_done);
+
+ if (kvm_irqchip_is_split()) {
+ X86IOMMUState *iommu = x86_iommu_get_default();
+ if (iommu) {
+ /* Register this IOAPIC with IOMMU IEC notifier, so that
+ * when there are IR invalidates, we can be notified to
+ * update kernel IR cache. */
+ x86_iommu_iec_register_notifier(iommu, ioapic_iec_notifier, s);
+ }
+ }
+#endif
+}
+
+#define IOAPIC_VER_DEF 0x20
+
+static void ioapic_realize(DeviceState *dev, Error **errp)
+{
+ IOAPICCommonState *s = IOAPIC_COMMON(dev);
+
+ if (s->version != 0x11 && s->version != 0x20) {
+ error_setg(errp, "IOAPIC only supports version 0x11 or 0x20 "
+ "(default: 0x%x).", IOAPIC_VER_DEF);
+ return;
+ }
+
+ memory_region_init_io(&s->io_memory, OBJECT(s), &ioapic_io_ops, s,
+ "ioapic", 0x1000);
+
+ s->delayed_ioapic_service_timer =
+ timer_new_ns(QEMU_CLOCK_VIRTUAL, delayed_ioapic_service_cb, s);
+
+ qdev_init_gpio_in(dev, ioapic_set_irq, IOAPIC_NUM_PINS);
+
+ ioapics[ioapic_no] = s;
+ s->machine_done.notify = ioapic_machine_done_notify;
+ qemu_add_machine_init_done_notifier(&s->machine_done);
+}
+
+static void ioapic_unrealize(DeviceState *dev)
+{
+ IOAPICCommonState *s = IOAPIC_COMMON(dev);
+
+ timer_free(s->delayed_ioapic_service_timer);
+}
+
+static Property ioapic_properties[] = {
+ DEFINE_PROP_UINT8("version", IOAPICCommonState, version, IOAPIC_VER_DEF),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void ioapic_class_init(ObjectClass *klass, void *data)
+{
+ IOAPICCommonClass *k = IOAPIC_COMMON_CLASS(klass);
+ DeviceClass *dc = DEVICE_CLASS(klass);
+
+ k->realize = ioapic_realize;
+ k->unrealize = ioapic_unrealize;
+ /*
+ * If APIC is in kernel, we need to update the kernel cache after
+ * migration, otherwise first 24 gsi routes will be invalid.
+ */
+ k->post_load = ioapic_update_kvm_routes;
+ dc->reset = ioapic_reset_common;
+ device_class_set_props(dc, ioapic_properties);
+}
+
+static const TypeInfo ioapic_info = {
+ .name = TYPE_IOAPIC,
+ .parent = TYPE_IOAPIC_COMMON,
+ .instance_size = sizeof(IOAPICCommonState),
+ .class_init = ioapic_class_init,
+};
+
+static void ioapic_register_types(void)
+{
+ type_register_static(&ioapic_info);
+}
+
+type_init(ioapic_register_types)