diff options
Diffstat (limited to 'hw/intc/ioapic.c')
-rw-r--r-- | hw/intc/ioapic.c | 513 |
1 files changed, 513 insertions, 0 deletions
diff --git a/hw/intc/ioapic.c b/hw/intc/ioapic.c new file mode 100644 index 000000000..264262959 --- /dev/null +++ b/hw/intc/ioapic.c @@ -0,0 +1,513 @@ +/* + * ioapic.c IOAPIC emulation logic + * + * Copyright (c) 2004-2005 Fabrice Bellard + * + * Split the ioapic logic from apic.c + * Xiantao Zhang <xiantao.zhang@intel.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "monitor/monitor.h" +#include "hw/i386/apic.h" +#include "hw/i386/ioapic.h" +#include "hw/i386/ioapic_internal.h" +#include "hw/i386/x86.h" +#include "hw/intc/i8259.h" +#include "hw/pci/msi.h" +#include "hw/qdev-properties.h" +#include "sysemu/kvm.h" +#include "sysemu/sysemu.h" +#include "hw/i386/apic-msidef.h" +#include "hw/i386/x86-iommu.h" +#include "trace.h" + +#define APIC_DELIVERY_MODE_SHIFT 8 +#define APIC_POLARITY_SHIFT 14 +#define APIC_TRIG_MODE_SHIFT 15 + +static IOAPICCommonState *ioapics[MAX_IOAPICS]; + +/* global variable from ioapic_common.c */ +extern int ioapic_no; + +struct ioapic_entry_info { + /* fields parsed from IOAPIC entries */ + uint8_t masked; + uint8_t trig_mode; + uint16_t dest_idx; + uint8_t dest_mode; + uint8_t delivery_mode; + uint8_t vector; + + /* MSI message generated from above parsed fields */ + uint32_t addr; + uint32_t data; +}; + +static void ioapic_entry_parse(uint64_t entry, struct ioapic_entry_info *info) +{ + memset(info, 0, sizeof(*info)); + info->masked = (entry >> IOAPIC_LVT_MASKED_SHIFT) & 1; + info->trig_mode = (entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1; + /* + * By default, this would be dest_id[8] + reserved[8]. When IR + * is enabled, this would be interrupt_index[15] + + * interrupt_format[1]. This field never means anything, but + * only used to generate corresponding MSI. + */ + info->dest_idx = (entry >> IOAPIC_LVT_DEST_IDX_SHIFT) & 0xffff; + info->dest_mode = (entry >> IOAPIC_LVT_DEST_MODE_SHIFT) & 1; + info->delivery_mode = (entry >> IOAPIC_LVT_DELIV_MODE_SHIFT) \ + & IOAPIC_DM_MASK; + if (info->delivery_mode == IOAPIC_DM_EXTINT) { + info->vector = pic_read_irq(isa_pic); + } else { + info->vector = entry & IOAPIC_VECTOR_MASK; + } + + info->addr = APIC_DEFAULT_ADDRESS | \ + (info->dest_idx << MSI_ADDR_DEST_IDX_SHIFT) | \ + (info->dest_mode << MSI_ADDR_DEST_MODE_SHIFT); + info->data = (info->vector << MSI_DATA_VECTOR_SHIFT) | \ + (info->trig_mode << MSI_DATA_TRIGGER_SHIFT) | \ + (info->delivery_mode << MSI_DATA_DELIVERY_MODE_SHIFT); +} + +static void ioapic_service(IOAPICCommonState *s) +{ + AddressSpace *ioapic_as = X86_MACHINE(qdev_get_machine())->ioapic_as; + struct ioapic_entry_info info; + uint8_t i; + uint32_t mask; + uint64_t entry; + + for (i = 0; i < IOAPIC_NUM_PINS; i++) { + mask = 1 << i; + if (s->irr & mask) { + int coalesce = 0; + + entry = s->ioredtbl[i]; + ioapic_entry_parse(entry, &info); + if (!info.masked) { + if (info.trig_mode == IOAPIC_TRIGGER_EDGE) { + s->irr &= ~mask; + } else { + coalesce = s->ioredtbl[i] & IOAPIC_LVT_REMOTE_IRR; + trace_ioapic_set_remote_irr(i); + s->ioredtbl[i] |= IOAPIC_LVT_REMOTE_IRR; + } + + if (coalesce) { + /* We are level triggered interrupts, and the + * guest should be still working on previous one, + * so skip it. */ + continue; + } + +#ifdef CONFIG_KVM + if (kvm_irqchip_is_split()) { + if (info.trig_mode == IOAPIC_TRIGGER_EDGE) { + kvm_set_irq(kvm_state, i, 1); + kvm_set_irq(kvm_state, i, 0); + } else { + kvm_set_irq(kvm_state, i, 1); + } + continue; + } +#endif + + /* No matter whether IR is enabled, we translate + * the IOAPIC message into a MSI one, and its + * address space will decide whether we need a + * translation. */ + stl_le_phys(ioapic_as, info.addr, info.data); + } + } + } +} + +#define SUCCESSIVE_IRQ_MAX_COUNT 10000 + +static void delayed_ioapic_service_cb(void *opaque) +{ + IOAPICCommonState *s = opaque; + + ioapic_service(s); +} + +static void ioapic_set_irq(void *opaque, int vector, int level) +{ + IOAPICCommonState *s = opaque; + + /* ISA IRQs map to GSI 1-1 except for IRQ0 which maps + * to GSI 2. GSI maps to ioapic 1-1. This is not + * the cleanest way of doing it but it should work. */ + + trace_ioapic_set_irq(vector, level); + ioapic_stat_update_irq(s, vector, level); + if (vector == 0) { + vector = 2; + } + if (vector < IOAPIC_NUM_PINS) { + uint32_t mask = 1 << vector; + uint64_t entry = s->ioredtbl[vector]; + + if (((entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1) == + IOAPIC_TRIGGER_LEVEL) { + /* level triggered */ + if (level) { + s->irr |= mask; + if (!(entry & IOAPIC_LVT_REMOTE_IRR)) { + ioapic_service(s); + } + } else { + s->irr &= ~mask; + } + } else { + /* According to the 82093AA manual, we must ignore edge requests + * if the input pin is masked. */ + if (level && !(entry & IOAPIC_LVT_MASKED)) { + s->irr |= mask; + ioapic_service(s); + } + } + } +} + +static void ioapic_update_kvm_routes(IOAPICCommonState *s) +{ +#ifdef CONFIG_KVM + int i; + + if (kvm_irqchip_is_split()) { + for (i = 0; i < IOAPIC_NUM_PINS; i++) { + MSIMessage msg; + struct ioapic_entry_info info; + ioapic_entry_parse(s->ioredtbl[i], &info); + if (!info.masked) { + msg.address = info.addr; + msg.data = info.data; + kvm_irqchip_update_msi_route(kvm_state, i, msg, NULL); + } + } + kvm_irqchip_commit_routes(kvm_state); + } +#endif +} + +#ifdef CONFIG_KVM +static void ioapic_iec_notifier(void *private, bool global, + uint32_t index, uint32_t mask) +{ + IOAPICCommonState *s = (IOAPICCommonState *)private; + /* For simplicity, we just update all the routes */ + ioapic_update_kvm_routes(s); +} +#endif + +void ioapic_eoi_broadcast(int vector) +{ + IOAPICCommonState *s; + uint64_t entry; + int i, n; + + trace_ioapic_eoi_broadcast(vector); + + for (i = 0; i < MAX_IOAPICS; i++) { + s = ioapics[i]; + if (!s) { + continue; + } + for (n = 0; n < IOAPIC_NUM_PINS; n++) { + entry = s->ioredtbl[n]; + + if ((entry & IOAPIC_VECTOR_MASK) != vector || + ((entry >> IOAPIC_LVT_TRIGGER_MODE_SHIFT) & 1) != IOAPIC_TRIGGER_LEVEL) { + continue; + } + +#ifdef CONFIG_KVM + /* + * When IOAPIC is in the userspace while APIC is still in + * the kernel (i.e., split irqchip), we have a trick to + * kick the resamplefd logic for registered irqfds from + * userspace to deactivate the IRQ. When that happens, it + * means the irq bypassed userspace IOAPIC (so the irr and + * remote-irr of the table entry should be bypassed too + * even if interrupt come). Still kick the resamplefds if + * they're bound to the IRQ, to make sure to EOI the + * interrupt for the hardware correctly. + * + * Note: We still need to go through the irr & remote-irr + * operations below because we don't know whether there're + * emulated devices that are using/sharing the same IRQ. + */ + kvm_resample_fd_notify(n); +#endif + + if (!(entry & IOAPIC_LVT_REMOTE_IRR)) { + continue; + } + + trace_ioapic_clear_remote_irr(n, vector); + s->ioredtbl[n] = entry & ~IOAPIC_LVT_REMOTE_IRR; + + if (!(entry & IOAPIC_LVT_MASKED) && (s->irr & (1 << n))) { + ++s->irq_eoi[n]; + if (s->irq_eoi[n] >= SUCCESSIVE_IRQ_MAX_COUNT) { + /* + * Real hardware does not deliver the interrupt immediately + * during eoi broadcast, and this lets a buggy guest make + * slow progress even if it does not correctly handle a + * level-triggered interrupt. Emulate this behavior if we + * detect an interrupt storm. + */ + s->irq_eoi[n] = 0; + timer_mod_anticipate(s->delayed_ioapic_service_timer, + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + + NANOSECONDS_PER_SECOND / 100); + trace_ioapic_eoi_delayed_reassert(n); + } else { + ioapic_service(s); + } + } else { + s->irq_eoi[n] = 0; + } + } + } +} + +static uint64_t +ioapic_mem_read(void *opaque, hwaddr addr, unsigned int size) +{ + IOAPICCommonState *s = opaque; + int index; + uint32_t val = 0; + + addr &= 0xff; + + switch (addr) { + case IOAPIC_IOREGSEL: + val = s->ioregsel; + break; + case IOAPIC_IOWIN: + if (size != 4) { + break; + } + switch (s->ioregsel) { + case IOAPIC_REG_ID: + case IOAPIC_REG_ARB: + val = s->id << IOAPIC_ID_SHIFT; + break; + case IOAPIC_REG_VER: + val = s->version | + ((IOAPIC_NUM_PINS - 1) << IOAPIC_VER_ENTRIES_SHIFT); + break; + default: + index = (s->ioregsel - IOAPIC_REG_REDTBL_BASE) >> 1; + if (index >= 0 && index < IOAPIC_NUM_PINS) { + if (s->ioregsel & 1) { + val = s->ioredtbl[index] >> 32; + } else { + val = s->ioredtbl[index] & 0xffffffff; + } + } + } + break; + } + + trace_ioapic_mem_read(addr, s->ioregsel, size, val); + + return val; +} + +/* + * This is to satisfy the hack in Linux kernel. One hack of it is to + * simulate clearing the Remote IRR bit of IOAPIC entry using the + * following: + * + * "For IO-APIC's with EOI register, we use that to do an explicit EOI. + * Otherwise, we simulate the EOI message manually by changing the trigger + * mode to edge and then back to level, with RTE being masked during + * this." + * + * (See linux kernel __eoi_ioapic_pin() comment in commit c0205701) + * + * This is based on the assumption that, Remote IRR bit will be + * cleared by IOAPIC hardware when configured as edge-triggered + * interrupts. + * + * Without this, level-triggered interrupts in IR mode might fail to + * work correctly. + */ +static inline void +ioapic_fix_edge_remote_irr(uint64_t *entry) +{ + if (!(*entry & IOAPIC_LVT_TRIGGER_MODE)) { + /* Edge-triggered interrupts, make sure remote IRR is zero */ + *entry &= ~((uint64_t)IOAPIC_LVT_REMOTE_IRR); + } +} + +static void +ioapic_mem_write(void *opaque, hwaddr addr, uint64_t val, + unsigned int size) +{ + IOAPICCommonState *s = opaque; + int index; + + addr &= 0xff; + trace_ioapic_mem_write(addr, s->ioregsel, size, val); + + switch (addr) { + case IOAPIC_IOREGSEL: + s->ioregsel = val; + break; + case IOAPIC_IOWIN: + if (size != 4) { + break; + } + switch (s->ioregsel) { + case IOAPIC_REG_ID: + s->id = (val >> IOAPIC_ID_SHIFT) & IOAPIC_ID_MASK; + break; + case IOAPIC_REG_VER: + case IOAPIC_REG_ARB: + break; + default: + index = (s->ioregsel - IOAPIC_REG_REDTBL_BASE) >> 1; + if (index >= 0 && index < IOAPIC_NUM_PINS) { + uint64_t ro_bits = s->ioredtbl[index] & IOAPIC_RO_BITS; + if (s->ioregsel & 1) { + s->ioredtbl[index] &= 0xffffffff; + s->ioredtbl[index] |= (uint64_t)val << 32; + } else { + s->ioredtbl[index] &= ~0xffffffffULL; + s->ioredtbl[index] |= val; + } + /* restore RO bits */ + s->ioredtbl[index] &= IOAPIC_RW_BITS; + s->ioredtbl[index] |= ro_bits; + s->irq_eoi[index] = 0; + ioapic_fix_edge_remote_irr(&s->ioredtbl[index]); + ioapic_service(s); + } + } + break; + case IOAPIC_EOI: + /* Explicit EOI is only supported for IOAPIC version 0x20 */ + if (size != 4 || s->version != 0x20) { + break; + } + ioapic_eoi_broadcast(val); + break; + } + + ioapic_update_kvm_routes(s); +} + +static const MemoryRegionOps ioapic_io_ops = { + .read = ioapic_mem_read, + .write = ioapic_mem_write, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static void ioapic_machine_done_notify(Notifier *notifier, void *data) +{ +#ifdef CONFIG_KVM + IOAPICCommonState *s = container_of(notifier, IOAPICCommonState, + machine_done); + + if (kvm_irqchip_is_split()) { + X86IOMMUState *iommu = x86_iommu_get_default(); + if (iommu) { + /* Register this IOAPIC with IOMMU IEC notifier, so that + * when there are IR invalidates, we can be notified to + * update kernel IR cache. */ + x86_iommu_iec_register_notifier(iommu, ioapic_iec_notifier, s); + } + } +#endif +} + +#define IOAPIC_VER_DEF 0x20 + +static void ioapic_realize(DeviceState *dev, Error **errp) +{ + IOAPICCommonState *s = IOAPIC_COMMON(dev); + + if (s->version != 0x11 && s->version != 0x20) { + error_setg(errp, "IOAPIC only supports version 0x11 or 0x20 " + "(default: 0x%x).", IOAPIC_VER_DEF); + return; + } + + memory_region_init_io(&s->io_memory, OBJECT(s), &ioapic_io_ops, s, + "ioapic", 0x1000); + + s->delayed_ioapic_service_timer = + timer_new_ns(QEMU_CLOCK_VIRTUAL, delayed_ioapic_service_cb, s); + + qdev_init_gpio_in(dev, ioapic_set_irq, IOAPIC_NUM_PINS); + + ioapics[ioapic_no] = s; + s->machine_done.notify = ioapic_machine_done_notify; + qemu_add_machine_init_done_notifier(&s->machine_done); +} + +static void ioapic_unrealize(DeviceState *dev) +{ + IOAPICCommonState *s = IOAPIC_COMMON(dev); + + timer_free(s->delayed_ioapic_service_timer); +} + +static Property ioapic_properties[] = { + DEFINE_PROP_UINT8("version", IOAPICCommonState, version, IOAPIC_VER_DEF), + DEFINE_PROP_END_OF_LIST(), +}; + +static void ioapic_class_init(ObjectClass *klass, void *data) +{ + IOAPICCommonClass *k = IOAPIC_COMMON_CLASS(klass); + DeviceClass *dc = DEVICE_CLASS(klass); + + k->realize = ioapic_realize; + k->unrealize = ioapic_unrealize; + /* + * If APIC is in kernel, we need to update the kernel cache after + * migration, otherwise first 24 gsi routes will be invalid. + */ + k->post_load = ioapic_update_kvm_routes; + dc->reset = ioapic_reset_common; + device_class_set_props(dc, ioapic_properties); +} + +static const TypeInfo ioapic_info = { + .name = TYPE_IOAPIC, + .parent = TYPE_IOAPIC_COMMON, + .instance_size = sizeof(IOAPICCommonState), + .class_init = ioapic_class_init, +}; + +static void ioapic_register_types(void) +{ + type_register_static(&ioapic_info); +} + +type_init(ioapic_register_types) |