diff options
author | Timos Ampelikiotis <t.ampelikiotis@virtualopensystems.com> | 2023-10-10 11:40:56 +0000 |
---|---|---|
committer | Timos Ampelikiotis <t.ampelikiotis@virtualopensystems.com> | 2023-10-10 11:40:56 +0000 |
commit | e02cda008591317b1625707ff8e115a4841aa889 (patch) | |
tree | aee302e3cf8b59ec2d32ec481be3d1afddfc8968 /hw/i386/kvm | |
parent | cc668e6b7e0ffd8c9d130513d12053cf5eda1d3b (diff) |
Introduce Virtio-loopback epsilon release:
Epsilon release introduces a new compatibility layer which make virtio-loopback
design to work with QEMU and rust-vmm vhost-user backend without require any
changes.
Signed-off-by: Timos Ampelikiotis <t.ampelikiotis@virtualopensystems.com>
Change-Id: I52e57563e08a7d0bdc002f8e928ee61ba0c53dd9
Diffstat (limited to 'hw/i386/kvm')
-rw-r--r-- | hw/i386/kvm/apic.c | 271 | ||||
-rw-r--r-- | hw/i386/kvm/clock.c | 350 | ||||
-rw-r--r-- | hw/i386/kvm/i8254.c | 337 | ||||
-rw-r--r-- | hw/i386/kvm/i8259.c | 167 | ||||
-rw-r--r-- | hw/i386/kvm/ioapic.c | 165 | ||||
-rw-r--r-- | hw/i386/kvm/meson.build | 8 |
6 files changed, 1298 insertions, 0 deletions
diff --git a/hw/i386/kvm/apic.c b/hw/i386/kvm/apic.c new file mode 100644 index 000000000..1e89ca089 --- /dev/null +++ b/hw/i386/kvm/apic.c @@ -0,0 +1,271 @@ +/* + * KVM in-kernel APIC support + * + * Copyright (c) 2011 Siemens AG + * + * Authors: + * Jan Kiszka <jan.kiszka@siemens.com> + * + * This work is licensed under the terms of the GNU GPL version 2. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/module.h" +#include "hw/i386/apic_internal.h" +#include "hw/pci/msi.h" +#include "sysemu/hw_accel.h" +#include "sysemu/kvm.h" +#include "kvm/kvm_i386.h" + +static inline void kvm_apic_set_reg(struct kvm_lapic_state *kapic, + int reg_id, uint32_t val) +{ + *((uint32_t *)(kapic->regs + (reg_id << 4))) = val; +} + +static inline uint32_t kvm_apic_get_reg(struct kvm_lapic_state *kapic, + int reg_id) +{ + return *((uint32_t *)(kapic->regs + (reg_id << 4))); +} + +static void kvm_put_apic_state(APICCommonState *s, struct kvm_lapic_state *kapic) +{ + int i; + + memset(kapic, 0, sizeof(*kapic)); + if (kvm_has_x2apic_api() && s->apicbase & MSR_IA32_APICBASE_EXTD) { + kvm_apic_set_reg(kapic, 0x2, s->initial_apic_id); + } else { + kvm_apic_set_reg(kapic, 0x2, s->id << 24); + } + kvm_apic_set_reg(kapic, 0x8, s->tpr); + kvm_apic_set_reg(kapic, 0xd, s->log_dest << 24); + kvm_apic_set_reg(kapic, 0xe, s->dest_mode << 28 | 0x0fffffff); + kvm_apic_set_reg(kapic, 0xf, s->spurious_vec); + for (i = 0; i < 8; i++) { + kvm_apic_set_reg(kapic, 0x10 + i, s->isr[i]); + kvm_apic_set_reg(kapic, 0x18 + i, s->tmr[i]); + kvm_apic_set_reg(kapic, 0x20 + i, s->irr[i]); + } + kvm_apic_set_reg(kapic, 0x28, s->esr); + kvm_apic_set_reg(kapic, 0x30, s->icr[0]); + kvm_apic_set_reg(kapic, 0x31, s->icr[1]); + for (i = 0; i < APIC_LVT_NB; i++) { + kvm_apic_set_reg(kapic, 0x32 + i, s->lvt[i]); + } + kvm_apic_set_reg(kapic, 0x38, s->initial_count); + kvm_apic_set_reg(kapic, 0x3e, s->divide_conf); +} + +void kvm_get_apic_state(DeviceState *dev, struct kvm_lapic_state *kapic) +{ + APICCommonState *s = APIC_COMMON(dev); + int i, v; + + if (kvm_has_x2apic_api() && s->apicbase & MSR_IA32_APICBASE_EXTD) { + assert(kvm_apic_get_reg(kapic, 0x2) == s->initial_apic_id); + } else { + s->id = kvm_apic_get_reg(kapic, 0x2) >> 24; + } + s->tpr = kvm_apic_get_reg(kapic, 0x8); + s->arb_id = kvm_apic_get_reg(kapic, 0x9); + s->log_dest = kvm_apic_get_reg(kapic, 0xd) >> 24; + s->dest_mode = kvm_apic_get_reg(kapic, 0xe) >> 28; + s->spurious_vec = kvm_apic_get_reg(kapic, 0xf); + for (i = 0; i < 8; i++) { + s->isr[i] = kvm_apic_get_reg(kapic, 0x10 + i); + s->tmr[i] = kvm_apic_get_reg(kapic, 0x18 + i); + s->irr[i] = kvm_apic_get_reg(kapic, 0x20 + i); + } + s->esr = kvm_apic_get_reg(kapic, 0x28); + s->icr[0] = kvm_apic_get_reg(kapic, 0x30); + s->icr[1] = kvm_apic_get_reg(kapic, 0x31); + for (i = 0; i < APIC_LVT_NB; i++) { + s->lvt[i] = kvm_apic_get_reg(kapic, 0x32 + i); + } + s->initial_count = kvm_apic_get_reg(kapic, 0x38); + s->divide_conf = kvm_apic_get_reg(kapic, 0x3e); + + v = (s->divide_conf & 3) | ((s->divide_conf >> 1) & 4); + s->count_shift = (v + 1) & 7; + + s->initial_count_load_time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + apic_next_timer(s, s->initial_count_load_time); +} + +static void kvm_apic_set_base(APICCommonState *s, uint64_t val) +{ + s->apicbase = val; +} + +static void kvm_apic_set_tpr(APICCommonState *s, uint8_t val) +{ + s->tpr = (val & 0x0f) << 4; +} + +static uint8_t kvm_apic_get_tpr(APICCommonState *s) +{ + return s->tpr >> 4; +} + +static void kvm_apic_enable_tpr_reporting(APICCommonState *s, bool enable) +{ + struct kvm_tpr_access_ctl ctl = { + .enabled = enable + }; + + kvm_vcpu_ioctl(CPU(s->cpu), KVM_TPR_ACCESS_REPORTING, &ctl); +} + +static void kvm_apic_vapic_base_update(APICCommonState *s) +{ + struct kvm_vapic_addr vapid_addr = { + .vapic_addr = s->vapic_paddr, + }; + int ret; + + ret = kvm_vcpu_ioctl(CPU(s->cpu), KVM_SET_VAPIC_ADDR, &vapid_addr); + if (ret < 0) { + fprintf(stderr, "KVM: setting VAPIC address failed (%s)\n", + strerror(-ret)); + abort(); + } +} + +static void kvm_apic_put(CPUState *cs, run_on_cpu_data data) +{ + APICCommonState *s = data.host_ptr; + struct kvm_lapic_state kapic; + int ret; + + kvm_put_apicbase(s->cpu, s->apicbase); + kvm_put_apic_state(s, &kapic); + + ret = kvm_vcpu_ioctl(CPU(s->cpu), KVM_SET_LAPIC, &kapic); + if (ret < 0) { + fprintf(stderr, "KVM_SET_LAPIC failed: %s\n", strerror(-ret)); + abort(); + } +} + +static void kvm_apic_post_load(APICCommonState *s) +{ + run_on_cpu(CPU(s->cpu), kvm_apic_put, RUN_ON_CPU_HOST_PTR(s)); +} + +static void do_inject_external_nmi(CPUState *cpu, run_on_cpu_data data) +{ + APICCommonState *s = data.host_ptr; + uint32_t lvt; + int ret; + + cpu_synchronize_state(cpu); + + lvt = s->lvt[APIC_LVT_LINT1]; + if (!(lvt & APIC_LVT_MASKED) && ((lvt >> 8) & 7) == APIC_DM_NMI) { + ret = kvm_vcpu_ioctl(cpu, KVM_NMI); + if (ret < 0) { + fprintf(stderr, "KVM: injection failed, NMI lost (%s)\n", + strerror(-ret)); + } + } +} + +static void kvm_apic_external_nmi(APICCommonState *s) +{ + run_on_cpu(CPU(s->cpu), do_inject_external_nmi, RUN_ON_CPU_HOST_PTR(s)); +} + +static void kvm_send_msi(MSIMessage *msg) +{ + int ret; + + /* + * The message has already passed through interrupt remapping if enabled, + * but the legacy extended destination ID in low bits still needs to be + * handled. + */ + msg->address = kvm_swizzle_msi_ext_dest_id(msg->address); + + ret = kvm_irqchip_send_msi(kvm_state, *msg); + if (ret < 0) { + fprintf(stderr, "KVM: injection failed, MSI lost (%s)\n", + strerror(-ret)); + } +} + +static uint64_t kvm_apic_mem_read(void *opaque, hwaddr addr, + unsigned size) +{ + return ~(uint64_t)0; +} + +static void kvm_apic_mem_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size) +{ + MSIMessage msg = { .address = addr, .data = data }; + + kvm_send_msi(&msg); +} + +static const MemoryRegionOps kvm_apic_io_ops = { + .read = kvm_apic_mem_read, + .write = kvm_apic_mem_write, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static void kvm_apic_reset(APICCommonState *s) +{ + /* Not used by KVM, which uses the CPU mp_state instead. */ + s->wait_for_sipi = 0; + + run_on_cpu(CPU(s->cpu), kvm_apic_put, RUN_ON_CPU_HOST_PTR(s)); +} + +static void kvm_apic_realize(DeviceState *dev, Error **errp) +{ + APICCommonState *s = APIC_COMMON(dev); + + memory_region_init_io(&s->io_memory, OBJECT(s), &kvm_apic_io_ops, s, + "kvm-apic-msi", APIC_SPACE_SIZE); + + assert(kvm_has_gsi_routing()); + msi_nonbroken = true; +} + +static void kvm_apic_unrealize(DeviceState *dev) +{ +} + +static void kvm_apic_class_init(ObjectClass *klass, void *data) +{ + APICCommonClass *k = APIC_COMMON_CLASS(klass); + + k->realize = kvm_apic_realize; + k->unrealize = kvm_apic_unrealize; + k->reset = kvm_apic_reset; + k->set_base = kvm_apic_set_base; + k->set_tpr = kvm_apic_set_tpr; + k->get_tpr = kvm_apic_get_tpr; + k->post_load = kvm_apic_post_load; + k->enable_tpr_reporting = kvm_apic_enable_tpr_reporting; + k->vapic_base_update = kvm_apic_vapic_base_update; + k->external_nmi = kvm_apic_external_nmi; + k->send_msi = kvm_send_msi; +} + +static const TypeInfo kvm_apic_info = { + .name = "kvm-apic", + .parent = TYPE_APIC_COMMON, + .instance_size = sizeof(APICCommonState), + .class_init = kvm_apic_class_init, +}; + +static void kvm_apic_register_types(void) +{ + type_register_static(&kvm_apic_info); +} + +type_init(kvm_apic_register_types) diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c new file mode 100644 index 000000000..df70b4a03 --- /dev/null +++ b/hw/i386/kvm/clock.c @@ -0,0 +1,350 @@ +/* + * QEMU KVM support, paravirtual clock device + * + * Copyright (C) 2011 Siemens AG + * + * Authors: + * Jan Kiszka <jan.kiszka@siemens.com> + * + * This work is licensed under the terms of the GNU GPL version 2. + * See the COPYING file in the top-level directory. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +#include "qemu/osdep.h" +#include "qemu/host-utils.h" +#include "qemu/module.h" +#include "sysemu/kvm.h" +#include "sysemu/runstate.h" +#include "sysemu/hw_accel.h" +#include "kvm/kvm_i386.h" +#include "migration/vmstate.h" +#include "hw/sysbus.h" +#include "hw/kvm/clock.h" +#include "hw/qdev-properties.h" +#include "qapi/error.h" + +#include <linux/kvm.h> +#include "standard-headers/asm-x86/kvm_para.h" +#include "qom/object.h" + +#define TYPE_KVM_CLOCK "kvmclock" +OBJECT_DECLARE_SIMPLE_TYPE(KVMClockState, KVM_CLOCK) + +struct KVMClockState { + /*< private >*/ + SysBusDevice busdev; + /*< public >*/ + + uint64_t clock; + bool clock_valid; + + /* whether the 'clock' value was obtained in the 'paused' state */ + bool runstate_paused; + + /* whether machine type supports reliable KVM_GET_CLOCK */ + bool mach_use_reliable_get_clock; + + /* whether the 'clock' value was obtained in a host with + * reliable KVM_GET_CLOCK */ + bool clock_is_reliable; +}; + +struct pvclock_vcpu_time_info { + uint32_t version; + uint32_t pad0; + uint64_t tsc_timestamp; + uint64_t system_time; + uint32_t tsc_to_system_mul; + int8_t tsc_shift; + uint8_t flags; + uint8_t pad[2]; +} __attribute__((__packed__)); /* 32 bytes */ + +static uint64_t kvmclock_current_nsec(KVMClockState *s) +{ + CPUState *cpu = first_cpu; + CPUX86State *env = cpu->env_ptr; + hwaddr kvmclock_struct_pa; + uint64_t migration_tsc = env->tsc; + struct pvclock_vcpu_time_info time; + uint64_t delta; + uint64_t nsec_lo; + uint64_t nsec_hi; + uint64_t nsec; + + cpu_synchronize_state(cpu); + + if (!(env->system_time_msr & 1ULL)) { + /* KVM clock not active */ + return 0; + } + + kvmclock_struct_pa = env->system_time_msr & ~1ULL; + cpu_physical_memory_read(kvmclock_struct_pa, &time, sizeof(time)); + + assert(time.tsc_timestamp <= migration_tsc); + delta = migration_tsc - time.tsc_timestamp; + if (time.tsc_shift < 0) { + delta >>= -time.tsc_shift; + } else { + delta <<= time.tsc_shift; + } + + mulu64(&nsec_lo, &nsec_hi, delta, time.tsc_to_system_mul); + nsec = (nsec_lo >> 32) | (nsec_hi << 32); + return nsec + time.system_time; +} + +static void kvm_update_clock(KVMClockState *s) +{ + struct kvm_clock_data data; + int ret; + + ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data); + if (ret < 0) { + fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(-ret)); + abort(); + } + s->clock = data.clock; + + /* If kvm_has_adjust_clock_stable() is false, KVM_GET_CLOCK returns + * essentially CLOCK_MONOTONIC plus a guest-specific adjustment. This + * can drift from the TSC-based value that is computed by the guest, + * so we need to go through kvmclock_current_nsec(). If + * kvm_has_adjust_clock_stable() is true, and the flags contain + * KVM_CLOCK_TSC_STABLE, then KVM_GET_CLOCK returns a TSC-based value + * and kvmclock_current_nsec() is not necessary. + * + * Here, however, we need not check KVM_CLOCK_TSC_STABLE. This is because: + * + * - if the host has disabled the kvmclock master clock, the guest already + * has protection against time going backwards. This "safety net" is only + * absent when kvmclock is stable; + * + * - therefore, we can replace a check like + * + * if last KVM_GET_CLOCK was not reliable then + * read from memory + * + * with + * + * if last KVM_GET_CLOCK was not reliable && masterclock is enabled + * read from memory + * + * However: + * + * - if kvm_has_adjust_clock_stable() returns false, the left side is + * always true (KVM_GET_CLOCK is never reliable), and the right side is + * unknown (because we don't have data.flags). We must assume it's true + * and read from memory. + * + * - if kvm_has_adjust_clock_stable() returns true, the result of the && + * is always false (masterclock is enabled iff KVM_GET_CLOCK is reliable) + * + * So we can just use this instead: + * + * if !kvm_has_adjust_clock_stable() then + * read from memory + */ + s->clock_is_reliable = kvm_has_adjust_clock_stable(); +} + +static void do_kvmclock_ctrl(CPUState *cpu, run_on_cpu_data data) +{ + int ret = kvm_vcpu_ioctl(cpu, KVM_KVMCLOCK_CTRL, 0); + + if (ret && ret != -EINVAL) { + fprintf(stderr, "%s: %s\n", __func__, strerror(-ret)); + } +} + +static void kvmclock_vm_state_change(void *opaque, bool running, + RunState state) +{ + KVMClockState *s = opaque; + CPUState *cpu; + int cap_clock_ctrl = kvm_check_extension(kvm_state, KVM_CAP_KVMCLOCK_CTRL); + int ret; + + if (running) { + struct kvm_clock_data data = {}; + + /* + * If the host where s->clock was read did not support reliable + * KVM_GET_CLOCK, read kvmclock value from memory. + */ + if (!s->clock_is_reliable) { + uint64_t pvclock_via_mem = kvmclock_current_nsec(s); + /* We can't rely on the saved clock value, just discard it */ + if (pvclock_via_mem) { + s->clock = pvclock_via_mem; + } + } + + s->clock_valid = false; + + data.clock = s->clock; + ret = kvm_vm_ioctl(kvm_state, KVM_SET_CLOCK, &data); + if (ret < 0) { + fprintf(stderr, "KVM_SET_CLOCK failed: %s\n", strerror(-ret)); + abort(); + } + + if (!cap_clock_ctrl) { + return; + } + CPU_FOREACH(cpu) { + run_on_cpu(cpu, do_kvmclock_ctrl, RUN_ON_CPU_NULL); + } + } else { + + if (s->clock_valid) { + return; + } + + s->runstate_paused = runstate_check(RUN_STATE_PAUSED); + + kvm_synchronize_all_tsc(); + + kvm_update_clock(s); + /* + * If the VM is stopped, declare the clock state valid to + * avoid re-reading it on next vmsave (which would return + * a different value). Will be reset when the VM is continued. + */ + s->clock_valid = true; + } +} + +static void kvmclock_realize(DeviceState *dev, Error **errp) +{ + KVMClockState *s = KVM_CLOCK(dev); + + if (!kvm_enabled()) { + error_setg(errp, "kvmclock device requires KVM"); + return; + } + + kvm_update_clock(s); + + qemu_add_vm_change_state_handler(kvmclock_vm_state_change, s); +} + +static bool kvmclock_clock_is_reliable_needed(void *opaque) +{ + KVMClockState *s = opaque; + + return s->mach_use_reliable_get_clock; +} + +static const VMStateDescription kvmclock_reliable_get_clock = { + .name = "kvmclock/clock_is_reliable", + .version_id = 1, + .minimum_version_id = 1, + .needed = kvmclock_clock_is_reliable_needed, + .fields = (VMStateField[]) { + VMSTATE_BOOL(clock_is_reliable, KVMClockState), + VMSTATE_END_OF_LIST() + } +}; + +/* + * When migrating, assume the source has an unreliable + * KVM_GET_CLOCK unless told otherwise. + */ +static int kvmclock_pre_load(void *opaque) +{ + KVMClockState *s = opaque; + + s->clock_is_reliable = false; + + return 0; +} + +/* + * When migrating a running guest, read the clock just + * before migration, so that the guest clock counts + * during the events between: + * + * * vm_stop() + * * + * * pre_save() + * + * This reduces kvmclock difference on migration from 5s + * to 0.1s (when max_downtime == 5s), because sending the + * final pages of memory (which happens between vm_stop() + * and pre_save()) takes max_downtime. + */ +static int kvmclock_pre_save(void *opaque) +{ + KVMClockState *s = opaque; + + if (!s->runstate_paused) { + kvm_update_clock(s); + } + + return 0; +} + +static const VMStateDescription kvmclock_vmsd = { + .name = "kvmclock", + .version_id = 1, + .minimum_version_id = 1, + .pre_load = kvmclock_pre_load, + .pre_save = kvmclock_pre_save, + .fields = (VMStateField[]) { + VMSTATE_UINT64(clock, KVMClockState), + VMSTATE_END_OF_LIST() + }, + .subsections = (const VMStateDescription * []) { + &kvmclock_reliable_get_clock, + NULL + } +}; + +static Property kvmclock_properties[] = { + DEFINE_PROP_BOOL("x-mach-use-reliable-get-clock", KVMClockState, + mach_use_reliable_get_clock, true), + DEFINE_PROP_END_OF_LIST(), +}; + +static void kvmclock_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->realize = kvmclock_realize; + dc->vmsd = &kvmclock_vmsd; + device_class_set_props(dc, kvmclock_properties); +} + +static const TypeInfo kvmclock_info = { + .name = TYPE_KVM_CLOCK, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(KVMClockState), + .class_init = kvmclock_class_init, +}; + +/* Note: Must be called after VCPU initialization. */ +void kvmclock_create(bool create_always) +{ + X86CPU *cpu = X86_CPU(first_cpu); + + if (!kvm_enabled() || !kvm_has_adjust_clock()) + return; + + if (create_always || + cpu->env.features[FEAT_KVM] & ((1ULL << KVM_FEATURE_CLOCKSOURCE) | + (1ULL << KVM_FEATURE_CLOCKSOURCE2))) { + sysbus_create_simple(TYPE_KVM_CLOCK, -1, NULL); + } +} + +static void kvmclock_register_types(void) +{ + type_register_static(&kvmclock_info); +} + +type_init(kvmclock_register_types) diff --git a/hw/i386/kvm/i8254.c b/hw/i386/kvm/i8254.c new file mode 100644 index 000000000..191a26fa5 --- /dev/null +++ b/hw/i386/kvm/i8254.c @@ -0,0 +1,337 @@ +/* + * KVM in-kernel PIT (i8254) support + * + * Copyright (c) 2003-2004 Fabrice Bellard + * Copyright (c) 2012 Jan Kiszka, Siemens AG + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" +#include <linux/kvm.h> +#include "qapi/qapi-types-machine.h" +#include "qapi/error.h" +#include "qemu/module.h" +#include "qemu/timer.h" +#include "sysemu/runstate.h" +#include "hw/timer/i8254.h" +#include "hw/timer/i8254_internal.h" +#include "hw/qdev-properties-system.h" +#include "sysemu/kvm.h" +#include "qom/object.h" + +#define KVM_PIT_REINJECT_BIT 0 + +#define CALIBRATION_ROUNDS 3 + +typedef struct KVMPITClass KVMPITClass; +typedef struct KVMPITState KVMPITState; +DECLARE_OBJ_CHECKERS(KVMPITState, KVMPITClass, + KVM_PIT, TYPE_KVM_I8254) + +struct KVMPITState { + PITCommonState parent_obj; + + LostTickPolicy lost_tick_policy; + bool vm_stopped; + int64_t kernel_clock_offset; +}; + +struct KVMPITClass { + PITCommonClass parent_class; + + DeviceRealize parent_realize; +}; + +static void kvm_pit_update_clock_offset(KVMPITState *s) +{ + int64_t offset, clock_offset; + struct timespec ts; + int i; + + /* + * Measure the delta between CLOCK_MONOTONIC, the base used for + * kvm_pit_channel_state::count_load_time, and QEMU_CLOCK_VIRTUAL. Take the + * minimum of several samples to filter out scheduling noise. + */ + clock_offset = INT64_MAX; + for (i = 0; i < CALIBRATION_ROUNDS; i++) { + offset = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + clock_gettime(CLOCK_MONOTONIC, &ts); + offset -= ts.tv_nsec; + offset -= (int64_t)ts.tv_sec * 1000000000; + if (uabs64(offset) < uabs64(clock_offset)) { + clock_offset = offset; + } + } + s->kernel_clock_offset = clock_offset; +} + +static void kvm_pit_get(PITCommonState *pit) +{ + KVMPITState *s = KVM_PIT(pit); + struct kvm_pit_state2 kpit; + struct kvm_pit_channel_state *kchan; + struct PITChannelState *sc; + int i, ret; + + /* No need to re-read the state if VM is stopped. */ + if (s->vm_stopped) { + return; + } + + if (kvm_has_pit_state2()) { + ret = kvm_vm_ioctl(kvm_state, KVM_GET_PIT2, &kpit); + if (ret < 0) { + fprintf(stderr, "KVM_GET_PIT2 failed: %s\n", strerror(-ret)); + abort(); + } + pit->channels[0].irq_disabled = kpit.flags & KVM_PIT_FLAGS_HPET_LEGACY; + } else { + /* + * kvm_pit_state2 is superset of kvm_pit_state struct, + * so we can use it for KVM_GET_PIT as well. + */ + ret = kvm_vm_ioctl(kvm_state, KVM_GET_PIT, &kpit); + if (ret < 0) { + fprintf(stderr, "KVM_GET_PIT failed: %s\n", strerror(-ret)); + abort(); + } + } + for (i = 0; i < 3; i++) { + kchan = &kpit.channels[i]; + sc = &pit->channels[i]; + sc->count = kchan->count; + sc->latched_count = kchan->latched_count; + sc->count_latched = kchan->count_latched; + sc->status_latched = kchan->status_latched; + sc->status = kchan->status; + sc->read_state = kchan->read_state; + sc->write_state = kchan->write_state; + sc->write_latch = kchan->write_latch; + sc->rw_mode = kchan->rw_mode; + sc->mode = kchan->mode; + sc->bcd = kchan->bcd; + sc->gate = kchan->gate; + sc->count_load_time = kchan->count_load_time + s->kernel_clock_offset; + } + + sc = &pit->channels[0]; + sc->next_transition_time = + pit_get_next_transition_time(sc, sc->count_load_time); +} + +static void kvm_pit_put(PITCommonState *pit) +{ + KVMPITState *s = KVM_PIT(pit); + struct kvm_pit_state2 kpit = {}; + struct kvm_pit_channel_state *kchan; + struct PITChannelState *sc; + int i, ret; + + /* The offset keeps changing as long as the VM is stopped. */ + if (s->vm_stopped) { + kvm_pit_update_clock_offset(s); + } + + kpit.flags = pit->channels[0].irq_disabled ? KVM_PIT_FLAGS_HPET_LEGACY : 0; + for (i = 0; i < 3; i++) { + kchan = &kpit.channels[i]; + sc = &pit->channels[i]; + kchan->count = sc->count; + kchan->latched_count = sc->latched_count; + kchan->count_latched = sc->count_latched; + kchan->status_latched = sc->status_latched; + kchan->status = sc->status; + kchan->read_state = sc->read_state; + kchan->write_state = sc->write_state; + kchan->write_latch = sc->write_latch; + kchan->rw_mode = sc->rw_mode; + kchan->mode = sc->mode; + kchan->bcd = sc->bcd; + kchan->gate = sc->gate; + kchan->count_load_time = sc->count_load_time - s->kernel_clock_offset; + } + + ret = kvm_vm_ioctl(kvm_state, + kvm_has_pit_state2() ? KVM_SET_PIT2 : KVM_SET_PIT, + &kpit); + if (ret < 0) { + fprintf(stderr, "%s failed: %s\n", + kvm_has_pit_state2() ? "KVM_SET_PIT2" : "KVM_SET_PIT", + strerror(-ret)); + abort(); + } +} + +static void kvm_pit_set_gate(PITCommonState *s, PITChannelState *sc, int val) +{ + kvm_pit_get(s); + + switch (sc->mode) { + default: + case 0: + case 4: + /* XXX: just disable/enable counting */ + break; + case 1: + case 2: + case 3: + case 5: + if (sc->gate < val) { + /* restart counting on rising edge */ + sc->count_load_time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + } + break; + } + sc->gate = val; + + kvm_pit_put(s); +} + +static void kvm_pit_get_channel_info(PITCommonState *s, PITChannelState *sc, + PITChannelInfo *info) +{ + kvm_pit_get(s); + + pit_get_channel_info_common(s, sc, info); +} + +static void kvm_pit_reset(DeviceState *dev) +{ + PITCommonState *s = PIT_COMMON(dev); + + pit_reset_common(s); + + kvm_pit_put(s); +} + +static void kvm_pit_irq_control(void *opaque, int n, int enable) +{ + PITCommonState *pit = opaque; + PITChannelState *s = &pit->channels[0]; + + kvm_pit_get(pit); + + s->irq_disabled = !enable; + + kvm_pit_put(pit); +} + +static void kvm_pit_vm_state_change(void *opaque, bool running, + RunState state) +{ + KVMPITState *s = opaque; + + if (running) { + kvm_pit_update_clock_offset(s); + kvm_pit_put(PIT_COMMON(s)); + s->vm_stopped = false; + } else { + kvm_pit_update_clock_offset(s); + kvm_pit_get(PIT_COMMON(s)); + s->vm_stopped = true; + } +} + +static void kvm_pit_realizefn(DeviceState *dev, Error **errp) +{ + PITCommonState *pit = PIT_COMMON(dev); + KVMPITClass *kpc = KVM_PIT_GET_CLASS(dev); + KVMPITState *s = KVM_PIT(pit); + struct kvm_pit_config config = { + .flags = 0, + }; + int ret; + + if (kvm_check_extension(kvm_state, KVM_CAP_PIT2)) { + ret = kvm_vm_ioctl(kvm_state, KVM_CREATE_PIT2, &config); + } else { + ret = kvm_vm_ioctl(kvm_state, KVM_CREATE_PIT); + } + if (ret < 0) { + error_setg(errp, "Create kernel PIC irqchip failed: %s", + strerror(-ret)); + return; + } + switch (s->lost_tick_policy) { + case LOST_TICK_POLICY_DELAY: + break; /* enabled by default */ + case LOST_TICK_POLICY_DISCARD: + if (kvm_check_extension(kvm_state, KVM_CAP_REINJECT_CONTROL)) { + struct kvm_reinject_control control = { .pit_reinject = 0 }; + + ret = kvm_vm_ioctl(kvm_state, KVM_REINJECT_CONTROL, &control); + if (ret < 0) { + error_setg(errp, + "Can't disable in-kernel PIT reinjection: %s", + strerror(-ret)); + return; + } + } + break; + default: + error_setg(errp, "Lost tick policy not supported."); + return; + } + + memory_region_init_io(&pit->ioports, OBJECT(dev), NULL, NULL, "kvm-pit", 4); + + qdev_init_gpio_in(dev, kvm_pit_irq_control, 1); + + qemu_add_vm_change_state_handler(kvm_pit_vm_state_change, s); + + kpc->parent_realize(dev, errp); +} + +static Property kvm_pit_properties[] = { + DEFINE_PROP_UINT32("iobase", PITCommonState, iobase, -1), + DEFINE_PROP_LOSTTICKPOLICY("lost_tick_policy", KVMPITState, + lost_tick_policy, LOST_TICK_POLICY_DELAY), + DEFINE_PROP_END_OF_LIST(), +}; + +static void kvm_pit_class_init(ObjectClass *klass, void *data) +{ + KVMPITClass *kpc = KVM_PIT_CLASS(klass); + PITCommonClass *k = PIT_COMMON_CLASS(klass); + DeviceClass *dc = DEVICE_CLASS(klass); + + device_class_set_parent_realize(dc, kvm_pit_realizefn, + &kpc->parent_realize); + k->set_channel_gate = kvm_pit_set_gate; + k->get_channel_info = kvm_pit_get_channel_info; + dc->reset = kvm_pit_reset; + device_class_set_props(dc, kvm_pit_properties); +} + +static const TypeInfo kvm_pit_info = { + .name = TYPE_KVM_I8254, + .parent = TYPE_PIT_COMMON, + .instance_size = sizeof(KVMPITState), + .class_init = kvm_pit_class_init, + .class_size = sizeof(KVMPITClass), +}; + +static void kvm_pit_register(void) +{ + type_register_static(&kvm_pit_info); +} + +type_init(kvm_pit_register) diff --git a/hw/i386/kvm/i8259.c b/hw/i386/kvm/i8259.c new file mode 100644 index 000000000..d61bae4dc --- /dev/null +++ b/hw/i386/kvm/i8259.c @@ -0,0 +1,167 @@ +/* + * KVM in-kernel PIC (i8259) support + * + * Copyright (c) 2011 Siemens AG + * + * Authors: + * Jan Kiszka <jan.kiszka@siemens.com> + * + * This work is licensed under the terms of the GNU GPL version 2. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "hw/isa/i8259_internal.h" +#include "hw/intc/i8259.h" +#include "qemu/module.h" +#include "hw/i386/apic_internal.h" +#include "hw/irq.h" +#include "sysemu/kvm.h" +#include "qom/object.h" + +#define TYPE_KVM_I8259 "kvm-i8259" +typedef struct KVMPICClass KVMPICClass; +DECLARE_CLASS_CHECKERS(KVMPICClass, KVM_PIC, + TYPE_KVM_I8259) + +/** + * KVMPICClass: + * @parent_realize: The parent's realizefn. + */ +struct KVMPICClass { + PICCommonClass parent_class; + + DeviceRealize parent_realize; +}; + +static void kvm_pic_get(PICCommonState *s) +{ + struct kvm_irqchip chip; + struct kvm_pic_state *kpic; + int ret; + + chip.chip_id = s->master ? KVM_IRQCHIP_PIC_MASTER : KVM_IRQCHIP_PIC_SLAVE; + ret = kvm_vm_ioctl(kvm_state, KVM_GET_IRQCHIP, &chip); + if (ret < 0) { + fprintf(stderr, "KVM_GET_IRQCHIP failed: %s\n", strerror(-ret)); + abort(); + } + + kpic = &chip.chip.pic; + + s->last_irr = kpic->last_irr; + s->irr = kpic->irr; + s->imr = kpic->imr; + s->isr = kpic->isr; + s->priority_add = kpic->priority_add; + s->irq_base = kpic->irq_base; + s->read_reg_select = kpic->read_reg_select; + s->poll = kpic->poll; + s->special_mask = kpic->special_mask; + s->init_state = kpic->init_state; + s->auto_eoi = kpic->auto_eoi; + s->rotate_on_auto_eoi = kpic->rotate_on_auto_eoi; + s->special_fully_nested_mode = kpic->special_fully_nested_mode; + s->init4 = kpic->init4; + s->elcr = kpic->elcr; + s->elcr_mask = kpic->elcr_mask; +} + +static void kvm_pic_put(PICCommonState *s) +{ + struct kvm_irqchip chip; + struct kvm_pic_state *kpic; + int ret; + + chip.chip_id = s->master ? KVM_IRQCHIP_PIC_MASTER : KVM_IRQCHIP_PIC_SLAVE; + + kpic = &chip.chip.pic; + + kpic->last_irr = s->last_irr; + kpic->irr = s->irr; + kpic->imr = s->imr; + kpic->isr = s->isr; + kpic->priority_add = s->priority_add; + kpic->irq_base = s->irq_base; + kpic->read_reg_select = s->read_reg_select; + kpic->poll = s->poll; + kpic->special_mask = s->special_mask; + kpic->init_state = s->init_state; + kpic->auto_eoi = s->auto_eoi; + kpic->rotate_on_auto_eoi = s->rotate_on_auto_eoi; + kpic->special_fully_nested_mode = s->special_fully_nested_mode; + kpic->init4 = s->init4; + kpic->elcr = s->elcr; + kpic->elcr_mask = s->elcr_mask; + + ret = kvm_vm_ioctl(kvm_state, KVM_SET_IRQCHIP, &chip); + if (ret < 0) { + fprintf(stderr, "KVM_SET_IRQCHIP failed: %s\n", strerror(-ret)); + abort(); + } +} + +static void kvm_pic_reset(DeviceState *dev) +{ + PICCommonState *s = PIC_COMMON(dev); + + s->elcr = 0; + pic_reset_common(s); + + kvm_pic_put(s); +} + +static void kvm_pic_set_irq(void *opaque, int irq, int level) +{ + int delivered; + + pic_stat_update_irq(irq, level); + delivered = kvm_set_irq(kvm_state, irq, level); + apic_report_irq_delivered(delivered); +} + +static void kvm_pic_realize(DeviceState *dev, Error **errp) +{ + PICCommonState *s = PIC_COMMON(dev); + KVMPICClass *kpc = KVM_PIC_GET_CLASS(dev); + + memory_region_init_io(&s->base_io, OBJECT(dev), NULL, NULL, "kvm-pic", 2); + memory_region_init_io(&s->elcr_io, OBJECT(dev), NULL, NULL, "kvm-elcr", 1); + + kpc->parent_realize(dev, errp); +} + +qemu_irq *kvm_i8259_init(ISABus *bus) +{ + i8259_init_chip(TYPE_KVM_I8259, bus, true); + i8259_init_chip(TYPE_KVM_I8259, bus, false); + + return qemu_allocate_irqs(kvm_pic_set_irq, NULL, ISA_NUM_IRQS); +} + +static void kvm_i8259_class_init(ObjectClass *klass, void *data) +{ + KVMPICClass *kpc = KVM_PIC_CLASS(klass); + PICCommonClass *k = PIC_COMMON_CLASS(klass); + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->reset = kvm_pic_reset; + device_class_set_parent_realize(dc, kvm_pic_realize, &kpc->parent_realize); + k->pre_save = kvm_pic_get; + k->post_load = kvm_pic_put; +} + +static const TypeInfo kvm_i8259_info = { + .name = TYPE_KVM_I8259, + .parent = TYPE_PIC_COMMON, + .instance_size = sizeof(PICCommonState), + .class_init = kvm_i8259_class_init, + .class_size = sizeof(KVMPICClass), +}; + +static void kvm_pic_register_types(void) +{ + type_register_static(&kvm_i8259_info); +} + +type_init(kvm_pic_register_types) diff --git a/hw/i386/kvm/ioapic.c b/hw/i386/kvm/ioapic.c new file mode 100644 index 000000000..ee7c8ef68 --- /dev/null +++ b/hw/i386/kvm/ioapic.c @@ -0,0 +1,165 @@ +/* + * KVM in-kernel IOPIC support + * + * Copyright (c) 2011 Siemens AG + * + * Authors: + * Jan Kiszka <jan.kiszka@siemens.com> + * + * This work is licensed under the terms of the GNU GPL version 2. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "monitor/monitor.h" +#include "hw/i386/x86.h" +#include "hw/qdev-properties.h" +#include "hw/i386/ioapic_internal.h" +#include "hw/i386/apic_internal.h" +#include "sysemu/kvm.h" + +/* PC Utility function */ +void kvm_pc_setup_irq_routing(bool pci_enabled) +{ + KVMState *s = kvm_state; + int i; + + assert(kvm_has_gsi_routing()); + for (i = 0; i < 8; ++i) { + if (i == 2) { + continue; + } + kvm_irqchip_add_irq_route(s, i, KVM_IRQCHIP_PIC_MASTER, i); + } + for (i = 8; i < 16; ++i) { + kvm_irqchip_add_irq_route(s, i, KVM_IRQCHIP_PIC_SLAVE, i - 8); + } + if (pci_enabled) { + for (i = 0; i < 24; ++i) { + if (i == 0) { + kvm_irqchip_add_irq_route(s, i, KVM_IRQCHIP_IOAPIC, 2); + } else if (i != 2) { + kvm_irqchip_add_irq_route(s, i, KVM_IRQCHIP_IOAPIC, i); + } + } + } + kvm_irqchip_commit_routes(s); +} + +typedef struct KVMIOAPICState KVMIOAPICState; + +struct KVMIOAPICState { + IOAPICCommonState ioapic; + uint32_t kvm_gsi_base; +}; + +static void kvm_ioapic_get(IOAPICCommonState *s) +{ + struct kvm_irqchip chip; + struct kvm_ioapic_state *kioapic; + int ret, i; + + chip.chip_id = KVM_IRQCHIP_IOAPIC; + ret = kvm_vm_ioctl(kvm_state, KVM_GET_IRQCHIP, &chip); + if (ret < 0) { + fprintf(stderr, "KVM_GET_IRQCHIP failed: %s\n", strerror(-ret)); + abort(); + } + + kioapic = &chip.chip.ioapic; + + s->id = kioapic->id; + s->ioregsel = kioapic->ioregsel; + s->irr = kioapic->irr; + for (i = 0; i < IOAPIC_NUM_PINS; i++) { + s->ioredtbl[i] = kioapic->redirtbl[i].bits; + } +} + +static void kvm_ioapic_put(IOAPICCommonState *s) +{ + struct kvm_irqchip chip; + struct kvm_ioapic_state *kioapic; + int ret, i; + + chip.chip_id = KVM_IRQCHIP_IOAPIC; + kioapic = &chip.chip.ioapic; + + kioapic->id = s->id; + kioapic->ioregsel = s->ioregsel; + kioapic->base_address = s->busdev.mmio[0].addr; + kioapic->irr = s->irr; + for (i = 0; i < IOAPIC_NUM_PINS; i++) { + kioapic->redirtbl[i].bits = s->ioredtbl[i]; + } + + ret = kvm_vm_ioctl(kvm_state, KVM_SET_IRQCHIP, &chip); + if (ret < 0) { + fprintf(stderr, "KVM_SET_IRQCHIP failed: %s\n", strerror(-ret)); + abort(); + } +} + +static void kvm_ioapic_reset(DeviceState *dev) +{ + IOAPICCommonState *s = IOAPIC_COMMON(dev); + + ioapic_reset_common(dev); + kvm_ioapic_put(s); +} + +static void kvm_ioapic_set_irq(void *opaque, int irq, int level) +{ + KVMIOAPICState *s = opaque; + IOAPICCommonState *common = IOAPIC_COMMON(s); + int delivered; + + ioapic_stat_update_irq(common, irq, level); + delivered = kvm_set_irq(kvm_state, s->kvm_gsi_base + irq, level); + apic_report_irq_delivered(delivered); +} + +static void kvm_ioapic_realize(DeviceState *dev, Error **errp) +{ + IOAPICCommonState *s = IOAPIC_COMMON(dev); + + memory_region_init_io(&s->io_memory, OBJECT(dev), NULL, NULL, "kvm-ioapic", 0x1000); + /* + * KVM ioapic only supports 0x11 now. This will only be used when + * we want to dump ioapic version. + */ + s->version = 0x11; + + qdev_init_gpio_in(dev, kvm_ioapic_set_irq, IOAPIC_NUM_PINS); +} + +static Property kvm_ioapic_properties[] = { + DEFINE_PROP_UINT32("gsi_base", KVMIOAPICState, kvm_gsi_base, 0), + DEFINE_PROP_END_OF_LIST() +}; + +static void kvm_ioapic_class_init(ObjectClass *klass, void *data) +{ + IOAPICCommonClass *k = IOAPIC_COMMON_CLASS(klass); + DeviceClass *dc = DEVICE_CLASS(klass); + + k->realize = kvm_ioapic_realize; + k->pre_save = kvm_ioapic_get; + k->post_load = kvm_ioapic_put; + dc->reset = kvm_ioapic_reset; + device_class_set_props(dc, kvm_ioapic_properties); +} + +static const TypeInfo kvm_ioapic_info = { + .name = TYPE_KVM_IOAPIC, + .parent = TYPE_IOAPIC_COMMON, + .instance_size = sizeof(KVMIOAPICState), + .class_init = kvm_ioapic_class_init, +}; + +static void kvm_ioapic_register_types(void) +{ + type_register_static(&kvm_ioapic_info); +} + +type_init(kvm_ioapic_register_types) diff --git a/hw/i386/kvm/meson.build b/hw/i386/kvm/meson.build new file mode 100644 index 000000000..95467f1de --- /dev/null +++ b/hw/i386/kvm/meson.build @@ -0,0 +1,8 @@ +i386_kvm_ss = ss.source_set() +i386_kvm_ss.add(files('clock.c')) +i386_kvm_ss.add(when: 'CONFIG_APIC', if_true: files('apic.c')) +i386_kvm_ss.add(when: 'CONFIG_I8254', if_true: files('i8254.c')) +i386_kvm_ss.add(when: 'CONFIG_I8259', if_true: files('i8259.c')) +i386_kvm_ss.add(when: 'CONFIG_IOAPIC', if_true: files('ioapic.c')) + +i386_ss.add_all(when: 'CONFIG_KVM', if_true: i386_kvm_ss) |