diff options
Diffstat (limited to 'linux-headers/linux')
-rw-r--r-- | linux-headers/linux/kvm.h | 2007 | ||||
-rw-r--r-- | linux-headers/linux/mman.h | 44 | ||||
-rw-r--r-- | linux-headers/linux/psci.h | 119 | ||||
-rw-r--r-- | linux-headers/linux/psp-sev.h | 166 | ||||
-rw-r--r-- | linux-headers/linux/userfaultfd.h | 302 | ||||
-rw-r--r-- | linux-headers/linux/vfio.h | 1370 | ||||
-rw-r--r-- | linux-headers/linux/vfio_ccw.h | 56 | ||||
-rw-r--r-- | linux-headers/linux/vfio_zdev.h | 78 | ||||
-rw-r--r-- | linux-headers/linux/vhost.h | 153 | ||||
-rw-r--r-- | linux-headers/linux/vhost_types.h | 1 | ||||
-rw-r--r-- | linux-headers/linux/virtio_config.h | 1 | ||||
-rw-r--r-- | linux-headers/linux/virtio_ring.h | 1 |
12 files changed, 4298 insertions, 0 deletions
diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h new file mode 100644 index 000000000..bcaf66cc4 --- /dev/null +++ b/linux-headers/linux/kvm.h @@ -0,0 +1,2007 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __LINUX_KVM_H +#define __LINUX_KVM_H + +/* + * Userspace interface for /dev/kvm - kernel based virtual machine + * + * Note: you must update KVM_API_VERSION if you change this interface. + */ + +#include <linux/const.h> +#include <linux/types.h> + +#include <linux/ioctl.h> +#include <asm/kvm.h> + +#define KVM_API_VERSION 12 + +/* *** Deprecated interfaces *** */ + +#define KVM_TRC_SHIFT 16 + +#define KVM_TRC_ENTRYEXIT (1 << KVM_TRC_SHIFT) +#define KVM_TRC_HANDLER (1 << (KVM_TRC_SHIFT + 1)) + +#define KVM_TRC_VMENTRY (KVM_TRC_ENTRYEXIT + 0x01) +#define KVM_TRC_VMEXIT (KVM_TRC_ENTRYEXIT + 0x02) +#define KVM_TRC_PAGE_FAULT (KVM_TRC_HANDLER + 0x01) + +#define KVM_TRC_HEAD_SIZE 12 +#define KVM_TRC_CYCLE_SIZE 8 +#define KVM_TRC_EXTRA_MAX 7 + +#define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02) +#define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03) +#define KVM_TRC_PEND_INTR (KVM_TRC_HANDLER + 0x04) +#define KVM_TRC_IO_READ (KVM_TRC_HANDLER + 0x05) +#define KVM_TRC_IO_WRITE (KVM_TRC_HANDLER + 0x06) +#define KVM_TRC_CR_READ (KVM_TRC_HANDLER + 0x07) +#define KVM_TRC_CR_WRITE (KVM_TRC_HANDLER + 0x08) +#define KVM_TRC_DR_READ (KVM_TRC_HANDLER + 0x09) +#define KVM_TRC_DR_WRITE (KVM_TRC_HANDLER + 0x0A) +#define KVM_TRC_MSR_READ (KVM_TRC_HANDLER + 0x0B) +#define KVM_TRC_MSR_WRITE (KVM_TRC_HANDLER + 0x0C) +#define KVM_TRC_CPUID (KVM_TRC_HANDLER + 0x0D) +#define KVM_TRC_INTR (KVM_TRC_HANDLER + 0x0E) +#define KVM_TRC_NMI (KVM_TRC_HANDLER + 0x0F) +#define KVM_TRC_VMMCALL (KVM_TRC_HANDLER + 0x10) +#define KVM_TRC_HLT (KVM_TRC_HANDLER + 0x11) +#define KVM_TRC_CLTS (KVM_TRC_HANDLER + 0x12) +#define KVM_TRC_LMSW (KVM_TRC_HANDLER + 0x13) +#define KVM_TRC_APIC_ACCESS (KVM_TRC_HANDLER + 0x14) +#define KVM_TRC_TDP_FAULT (KVM_TRC_HANDLER + 0x15) +#define KVM_TRC_GTLB_WRITE (KVM_TRC_HANDLER + 0x16) +#define KVM_TRC_STLB_WRITE (KVM_TRC_HANDLER + 0x17) +#define KVM_TRC_STLB_INVAL (KVM_TRC_HANDLER + 0x18) +#define KVM_TRC_PPC_INSTR (KVM_TRC_HANDLER + 0x19) + +struct kvm_user_trace_setup { + __u32 buf_size; + __u32 buf_nr; +}; + +#define __KVM_DEPRECATED_MAIN_W_0x06 \ + _IOW(KVMIO, 0x06, struct kvm_user_trace_setup) +#define __KVM_DEPRECATED_MAIN_0x07 _IO(KVMIO, 0x07) +#define __KVM_DEPRECATED_MAIN_0x08 _IO(KVMIO, 0x08) + +#define __KVM_DEPRECATED_VM_R_0x70 _IOR(KVMIO, 0x70, struct kvm_assigned_irq) + +struct kvm_breakpoint { + __u32 enabled; + __u32 padding; + __u64 address; +}; + +struct kvm_debug_guest { + __u32 enabled; + __u32 pad; + struct kvm_breakpoint breakpoints[4]; + __u32 singlestep; +}; + +#define __KVM_DEPRECATED_VCPU_W_0x87 _IOW(KVMIO, 0x87, struct kvm_debug_guest) + +/* *** End of deprecated interfaces *** */ + + +/* for KVM_CREATE_MEMORY_REGION */ +struct kvm_memory_region { + __u32 slot; + __u32 flags; + __u64 guest_phys_addr; + __u64 memory_size; /* bytes */ +}; + +/* for KVM_SET_USER_MEMORY_REGION */ +struct kvm_userspace_memory_region { + __u32 slot; + __u32 flags; + __u64 guest_phys_addr; + __u64 memory_size; /* bytes */ + __u64 userspace_addr; /* start of the userspace allocated memory */ +}; + +/* + * The bit 0 ~ bit 15 of kvm_memory_region::flags are visible for userspace, + * other bits are reserved for kvm internal use which are defined in + * include/linux/kvm_host.h. + */ +#define KVM_MEM_LOG_DIRTY_PAGES (1UL << 0) +#define KVM_MEM_READONLY (1UL << 1) + +/* for KVM_IRQ_LINE */ +struct kvm_irq_level { + /* + * ACPI gsi notion of irq. + * For IA-64 (APIC model) IOAPIC0: irq 0-23; IOAPIC1: irq 24-47.. + * For X86 (standard AT mode) PIC0/1: irq 0-15. IOAPIC0: 0-23.. + * For ARM: See Documentation/virt/kvm/api.rst + */ + union { + __u32 irq; + __s32 status; + }; + __u32 level; +}; + + +struct kvm_irqchip { + __u32 chip_id; + __u32 pad; + union { + char dummy[512]; /* reserving space */ +#ifdef __KVM_HAVE_PIT + struct kvm_pic_state pic; +#endif +#ifdef __KVM_HAVE_IOAPIC + struct kvm_ioapic_state ioapic; +#endif + } chip; +}; + +/* for KVM_CREATE_PIT2 */ +struct kvm_pit_config { + __u32 flags; + __u32 pad[15]; +}; + +#define KVM_PIT_SPEAKER_DUMMY 1 + +struct kvm_s390_skeys { + __u64 start_gfn; + __u64 count; + __u64 skeydata_addr; + __u32 flags; + __u32 reserved[9]; +}; + +#define KVM_S390_CMMA_PEEK (1 << 0) + +/** + * kvm_s390_cmma_log - Used for CMMA migration. + * + * Used both for input and output. + * + * @start_gfn: Guest page number to start from. + * @count: Size of the result buffer. + * @flags: Control operation mode via KVM_S390_CMMA_* flags + * @remaining: Used with KVM_S390_GET_CMMA_BITS. Indicates how many dirty + * pages are still remaining. + * @mask: Used with KVM_S390_SET_CMMA_BITS. Bitmap of bits to actually set + * in the PGSTE. + * @values: Pointer to the values buffer. + * + * Used in KVM_S390_{G,S}ET_CMMA_BITS ioctls. + */ +struct kvm_s390_cmma_log { + __u64 start_gfn; + __u32 count; + __u32 flags; + union { + __u64 remaining; + __u64 mask; + }; + __u64 values; +}; + +struct kvm_hyperv_exit { +#define KVM_EXIT_HYPERV_SYNIC 1 +#define KVM_EXIT_HYPERV_HCALL 2 +#define KVM_EXIT_HYPERV_SYNDBG 3 + __u32 type; + __u32 pad1; + union { + struct { + __u32 msr; + __u32 pad2; + __u64 control; + __u64 evt_page; + __u64 msg_page; + } synic; + struct { + __u64 input; + __u64 result; + __u64 params[2]; + } hcall; + struct { + __u32 msr; + __u32 pad2; + __u64 control; + __u64 status; + __u64 send_page; + __u64 recv_page; + __u64 pending_page; + } syndbg; + } u; +}; + +struct kvm_xen_exit { +#define KVM_EXIT_XEN_HCALL 1 + __u32 type; + union { + struct { + __u32 longmode; + __u32 cpl; + __u64 input; + __u64 result; + __u64 params[6]; + } hcall; + } u; +}; + +#define KVM_S390_GET_SKEYS_NONE 1 +#define KVM_S390_SKEYS_MAX 1048576 + +#define KVM_EXIT_UNKNOWN 0 +#define KVM_EXIT_EXCEPTION 1 +#define KVM_EXIT_IO 2 +#define KVM_EXIT_HYPERCALL 3 +#define KVM_EXIT_DEBUG 4 +#define KVM_EXIT_HLT 5 +#define KVM_EXIT_MMIO 6 +#define KVM_EXIT_IRQ_WINDOW_OPEN 7 +#define KVM_EXIT_SHUTDOWN 8 +#define KVM_EXIT_FAIL_ENTRY 9 +#define KVM_EXIT_INTR 10 +#define KVM_EXIT_SET_TPR 11 +#define KVM_EXIT_TPR_ACCESS 12 +#define KVM_EXIT_S390_SIEIC 13 +#define KVM_EXIT_S390_RESET 14 +#define KVM_EXIT_DCR 15 /* deprecated */ +#define KVM_EXIT_NMI 16 +#define KVM_EXIT_INTERNAL_ERROR 17 +#define KVM_EXIT_OSI 18 +#define KVM_EXIT_PAPR_HCALL 19 +#define KVM_EXIT_S390_UCONTROL 20 +#define KVM_EXIT_WATCHDOG 21 +#define KVM_EXIT_S390_TSCH 22 +#define KVM_EXIT_EPR 23 +#define KVM_EXIT_SYSTEM_EVENT 24 +#define KVM_EXIT_S390_STSI 25 +#define KVM_EXIT_IOAPIC_EOI 26 +#define KVM_EXIT_HYPERV 27 +#define KVM_EXIT_ARM_NISV 28 +#define KVM_EXIT_X86_RDMSR 29 +#define KVM_EXIT_X86_WRMSR 30 +#define KVM_EXIT_DIRTY_RING_FULL 31 +#define KVM_EXIT_AP_RESET_HOLD 32 +#define KVM_EXIT_X86_BUS_LOCK 33 +#define KVM_EXIT_XEN 34 + +/* For KVM_EXIT_INTERNAL_ERROR */ +/* Emulate instruction failed. */ +#define KVM_INTERNAL_ERROR_EMULATION 1 +/* Encounter unexpected simultaneous exceptions. */ +#define KVM_INTERNAL_ERROR_SIMUL_EX 2 +/* Encounter unexpected vm-exit due to delivery event. */ +#define KVM_INTERNAL_ERROR_DELIVERY_EV 3 +/* Encounter unexpected vm-exit reason */ +#define KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON 4 + +/* Flags that describe what fields in emulation_failure hold valid data. */ +#define KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES (1ULL << 0) + +/* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ +struct kvm_run { + /* in */ + __u8 request_interrupt_window; + __u8 immediate_exit; + __u8 padding1[6]; + + /* out */ + __u32 exit_reason; + __u8 ready_for_interrupt_injection; + __u8 if_flag; + __u16 flags; + + /* in (pre_kvm_run), out (post_kvm_run) */ + __u64 cr8; + __u64 apic_base; + +#ifdef __KVM_S390 + /* the processor status word for s390 */ + __u64 psw_mask; /* psw upper half */ + __u64 psw_addr; /* psw lower half */ +#endif + union { + /* KVM_EXIT_UNKNOWN */ + struct { + __u64 hardware_exit_reason; + } hw; + /* KVM_EXIT_FAIL_ENTRY */ + struct { + __u64 hardware_entry_failure_reason; + __u32 cpu; + } fail_entry; + /* KVM_EXIT_EXCEPTION */ + struct { + __u32 exception; + __u32 error_code; + } ex; + /* KVM_EXIT_IO */ + struct { +#define KVM_EXIT_IO_IN 0 +#define KVM_EXIT_IO_OUT 1 + __u8 direction; + __u8 size; /* bytes */ + __u16 port; + __u32 count; + __u64 data_offset; /* relative to kvm_run start */ + } io; + /* KVM_EXIT_DEBUG */ + struct { + struct kvm_debug_exit_arch arch; + } debug; + /* KVM_EXIT_MMIO */ + struct { + __u64 phys_addr; + __u8 data[8]; + __u32 len; + __u8 is_write; + } mmio; + /* KVM_EXIT_HYPERCALL */ + struct { + __u64 nr; + __u64 args[6]; + __u64 ret; + __u32 longmode; + __u32 pad; + } hypercall; + /* KVM_EXIT_TPR_ACCESS */ + struct { + __u64 rip; + __u32 is_write; + __u32 pad; + } tpr_access; + /* KVM_EXIT_S390_SIEIC */ + struct { + __u8 icptcode; + __u16 ipa; + __u32 ipb; + } s390_sieic; + /* KVM_EXIT_S390_RESET */ +#define KVM_S390_RESET_POR 1 +#define KVM_S390_RESET_CLEAR 2 +#define KVM_S390_RESET_SUBSYSTEM 4 +#define KVM_S390_RESET_CPU_INIT 8 +#define KVM_S390_RESET_IPL 16 + __u64 s390_reset_flags; + /* KVM_EXIT_S390_UCONTROL */ + struct { + __u64 trans_exc_code; + __u32 pgm_code; + } s390_ucontrol; + /* KVM_EXIT_DCR (deprecated) */ + struct { + __u32 dcrn; + __u32 data; + __u8 is_write; + } dcr; + /* KVM_EXIT_INTERNAL_ERROR */ + struct { + __u32 suberror; + /* Available with KVM_CAP_INTERNAL_ERROR_DATA: */ + __u32 ndata; + __u64 data[16]; + } internal; + /* + * KVM_INTERNAL_ERROR_EMULATION + * + * "struct emulation_failure" is an overlay of "struct internal" + * that is used for the KVM_INTERNAL_ERROR_EMULATION sub-type of + * KVM_EXIT_INTERNAL_ERROR. Note, unlike other internal error + * sub-types, this struct is ABI! It also needs to be backwards + * compatible with "struct internal". Take special care that + * "ndata" is correct, that new fields are enumerated in "flags", + * and that each flag enumerates fields that are 64-bit aligned + * and sized (so that ndata+internal.data[] is valid/accurate). + */ + struct { + __u32 suberror; + __u32 ndata; + __u64 flags; + __u8 insn_size; + __u8 insn_bytes[15]; + } emulation_failure; + /* KVM_EXIT_OSI */ + struct { + __u64 gprs[32]; + } osi; + /* KVM_EXIT_PAPR_HCALL */ + struct { + __u64 nr; + __u64 ret; + __u64 args[9]; + } papr_hcall; + /* KVM_EXIT_S390_TSCH */ + struct { + __u16 subchannel_id; + __u16 subchannel_nr; + __u32 io_int_parm; + __u32 io_int_word; + __u32 ipb; + __u8 dequeued; + } s390_tsch; + /* KVM_EXIT_EPR */ + struct { + __u32 epr; + } epr; + /* KVM_EXIT_SYSTEM_EVENT */ + struct { +#define KVM_SYSTEM_EVENT_SHUTDOWN 1 +#define KVM_SYSTEM_EVENT_RESET 2 +#define KVM_SYSTEM_EVENT_CRASH 3 + __u32 type; + __u64 flags; + } system_event; + /* KVM_EXIT_S390_STSI */ + struct { + __u64 addr; + __u8 ar; + __u8 reserved; + __u8 fc; + __u8 sel1; + __u16 sel2; + } s390_stsi; + /* KVM_EXIT_IOAPIC_EOI */ + struct { + __u8 vector; + } eoi; + /* KVM_EXIT_HYPERV */ + struct kvm_hyperv_exit hyperv; + /* KVM_EXIT_ARM_NISV */ + struct { + __u64 esr_iss; + __u64 fault_ipa; + } arm_nisv; + /* KVM_EXIT_X86_RDMSR / KVM_EXIT_X86_WRMSR */ + struct { + __u8 error; /* user -> kernel */ + __u8 pad[7]; +#define KVM_MSR_EXIT_REASON_INVAL (1 << 0) +#define KVM_MSR_EXIT_REASON_UNKNOWN (1 << 1) +#define KVM_MSR_EXIT_REASON_FILTER (1 << 2) + __u32 reason; /* kernel -> user */ + __u32 index; /* kernel -> user */ + __u64 data; /* kernel <-> user */ + } msr; + /* KVM_EXIT_XEN */ + struct kvm_xen_exit xen; + /* Fix the size of the union. */ + char padding[256]; + }; + + /* 2048 is the size of the char array used to bound/pad the size + * of the union that holds sync regs. + */ + #define SYNC_REGS_SIZE_BYTES 2048 + /* + * shared registers between kvm and userspace. + * kvm_valid_regs specifies the register classes set by the host + * kvm_dirty_regs specified the register classes dirtied by userspace + * struct kvm_sync_regs is architecture specific, as well as the + * bits for kvm_valid_regs and kvm_dirty_regs + */ + __u64 kvm_valid_regs; + __u64 kvm_dirty_regs; + union { + struct kvm_sync_regs regs; + char padding[SYNC_REGS_SIZE_BYTES]; + } s; +}; + +/* for KVM_REGISTER_COALESCED_MMIO / KVM_UNREGISTER_COALESCED_MMIO */ + +struct kvm_coalesced_mmio_zone { + __u64 addr; + __u32 size; + union { + __u32 pad; + __u32 pio; + }; +}; + +struct kvm_coalesced_mmio { + __u64 phys_addr; + __u32 len; + union { + __u32 pad; + __u32 pio; + }; + __u8 data[8]; +}; + +struct kvm_coalesced_mmio_ring { + __u32 first, last; + struct kvm_coalesced_mmio coalesced_mmio[0]; +}; + +#define KVM_COALESCED_MMIO_MAX \ + ((PAGE_SIZE - sizeof(struct kvm_coalesced_mmio_ring)) / \ + sizeof(struct kvm_coalesced_mmio)) + +/* for KVM_TRANSLATE */ +struct kvm_translation { + /* in */ + __u64 linear_address; + + /* out */ + __u64 physical_address; + __u8 valid; + __u8 writeable; + __u8 usermode; + __u8 pad[5]; +}; + +/* for KVM_S390_MEM_OP */ +struct kvm_s390_mem_op { + /* in */ + __u64 gaddr; /* the guest address */ + __u64 flags; /* flags */ + __u32 size; /* amount of bytes */ + __u32 op; /* type of operation */ + __u64 buf; /* buffer in userspace */ + union { + __u8 ar; /* the access register number */ + __u32 sida_offset; /* offset into the sida */ + __u8 reserved[32]; /* should be set to 0 */ + }; +}; +/* types for kvm_s390_mem_op->op */ +#define KVM_S390_MEMOP_LOGICAL_READ 0 +#define KVM_S390_MEMOP_LOGICAL_WRITE 1 +#define KVM_S390_MEMOP_SIDA_READ 2 +#define KVM_S390_MEMOP_SIDA_WRITE 3 +/* flags for kvm_s390_mem_op->flags */ +#define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0) +#define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) + +/* for KVM_INTERRUPT */ +struct kvm_interrupt { + /* in */ + __u32 irq; +}; + +/* for KVM_GET_DIRTY_LOG */ +struct kvm_dirty_log { + __u32 slot; + __u32 padding1; + union { + void *dirty_bitmap; /* one bit per page */ + __u64 padding2; + }; +}; + +/* for KVM_CLEAR_DIRTY_LOG */ +struct kvm_clear_dirty_log { + __u32 slot; + __u32 num_pages; + __u64 first_page; + union { + void *dirty_bitmap; /* one bit per page */ + __u64 padding2; + }; +}; + +/* for KVM_SET_SIGNAL_MASK */ +struct kvm_signal_mask { + __u32 len; + __u8 sigset[0]; +}; + +/* for KVM_TPR_ACCESS_REPORTING */ +struct kvm_tpr_access_ctl { + __u32 enabled; + __u32 flags; + __u32 reserved[8]; +}; + +/* for KVM_SET_VAPIC_ADDR */ +struct kvm_vapic_addr { + __u64 vapic_addr; +}; + +/* for KVM_SET_MP_STATE */ + +/* not all states are valid on all architectures */ +#define KVM_MP_STATE_RUNNABLE 0 +#define KVM_MP_STATE_UNINITIALIZED 1 +#define KVM_MP_STATE_INIT_RECEIVED 2 +#define KVM_MP_STATE_HALTED 3 +#define KVM_MP_STATE_SIPI_RECEIVED 4 +#define KVM_MP_STATE_STOPPED 5 +#define KVM_MP_STATE_CHECK_STOP 6 +#define KVM_MP_STATE_OPERATING 7 +#define KVM_MP_STATE_LOAD 8 +#define KVM_MP_STATE_AP_RESET_HOLD 9 + +struct kvm_mp_state { + __u32 mp_state; +}; + +struct kvm_s390_psw { + __u64 mask; + __u64 addr; +}; + +/* valid values for type in kvm_s390_interrupt */ +#define KVM_S390_SIGP_STOP 0xfffe0000u +#define KVM_S390_PROGRAM_INT 0xfffe0001u +#define KVM_S390_SIGP_SET_PREFIX 0xfffe0002u +#define KVM_S390_RESTART 0xfffe0003u +#define KVM_S390_INT_PFAULT_INIT 0xfffe0004u +#define KVM_S390_INT_PFAULT_DONE 0xfffe0005u +#define KVM_S390_MCHK 0xfffe1000u +#define KVM_S390_INT_CLOCK_COMP 0xffff1004u +#define KVM_S390_INT_CPU_TIMER 0xffff1005u +#define KVM_S390_INT_VIRTIO 0xffff2603u +#define KVM_S390_INT_SERVICE 0xffff2401u +#define KVM_S390_INT_EMERGENCY 0xffff1201u +#define KVM_S390_INT_EXTERNAL_CALL 0xffff1202u +/* Anything below 0xfffe0000u is taken by INT_IO */ +#define KVM_S390_INT_IO(ai,cssid,ssid,schid) \ + (((schid)) | \ + ((ssid) << 16) | \ + ((cssid) << 18) | \ + ((ai) << 26)) +#define KVM_S390_INT_IO_MIN 0x00000000u +#define KVM_S390_INT_IO_MAX 0xfffdffffu +#define KVM_S390_INT_IO_AI_MASK 0x04000000u + + +struct kvm_s390_interrupt { + __u32 type; + __u32 parm; + __u64 parm64; +}; + +struct kvm_s390_io_info { + __u16 subchannel_id; + __u16 subchannel_nr; + __u32 io_int_parm; + __u32 io_int_word; +}; + +struct kvm_s390_ext_info { + __u32 ext_params; + __u32 pad; + __u64 ext_params2; +}; + +struct kvm_s390_pgm_info { + __u64 trans_exc_code; + __u64 mon_code; + __u64 per_address; + __u32 data_exc_code; + __u16 code; + __u16 mon_class_nr; + __u8 per_code; + __u8 per_atmid; + __u8 exc_access_id; + __u8 per_access_id; + __u8 op_access_id; +#define KVM_S390_PGM_FLAGS_ILC_VALID 0x01 +#define KVM_S390_PGM_FLAGS_ILC_0 0x02 +#define KVM_S390_PGM_FLAGS_ILC_1 0x04 +#define KVM_S390_PGM_FLAGS_ILC_MASK 0x06 +#define KVM_S390_PGM_FLAGS_NO_REWIND 0x08 + __u8 flags; + __u8 pad[2]; +}; + +struct kvm_s390_prefix_info { + __u32 address; +}; + +struct kvm_s390_extcall_info { + __u16 code; +}; + +struct kvm_s390_emerg_info { + __u16 code; +}; + +#define KVM_S390_STOP_FLAG_STORE_STATUS 0x01 +struct kvm_s390_stop_info { + __u32 flags; +}; + +struct kvm_s390_mchk_info { + __u64 cr14; + __u64 mcic; + __u64 failing_storage_address; + __u32 ext_damage_code; + __u32 pad; + __u8 fixed_logout[16]; +}; + +struct kvm_s390_irq { + __u64 type; + union { + struct kvm_s390_io_info io; + struct kvm_s390_ext_info ext; + struct kvm_s390_pgm_info pgm; + struct kvm_s390_emerg_info emerg; + struct kvm_s390_extcall_info extcall; + struct kvm_s390_prefix_info prefix; + struct kvm_s390_stop_info stop; + struct kvm_s390_mchk_info mchk; + char reserved[64]; + } u; +}; + +struct kvm_s390_irq_state { + __u64 buf; + __u32 flags; /* will stay unused for compatibility reasons */ + __u32 len; + __u32 reserved[4]; /* will stay unused for compatibility reasons */ +}; + +/* for KVM_SET_GUEST_DEBUG */ + +#define KVM_GUESTDBG_ENABLE 0x00000001 +#define KVM_GUESTDBG_SINGLESTEP 0x00000002 + +struct kvm_guest_debug { + __u32 control; + __u32 pad; + struct kvm_guest_debug_arch arch; +}; + +enum { + kvm_ioeventfd_flag_nr_datamatch, + kvm_ioeventfd_flag_nr_pio, + kvm_ioeventfd_flag_nr_deassign, + kvm_ioeventfd_flag_nr_virtio_ccw_notify, + kvm_ioeventfd_flag_nr_fast_mmio, + kvm_ioeventfd_flag_nr_max, +}; + +#define KVM_IOEVENTFD_FLAG_DATAMATCH (1 << kvm_ioeventfd_flag_nr_datamatch) +#define KVM_IOEVENTFD_FLAG_PIO (1 << kvm_ioeventfd_flag_nr_pio) +#define KVM_IOEVENTFD_FLAG_DEASSIGN (1 << kvm_ioeventfd_flag_nr_deassign) +#define KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY \ + (1 << kvm_ioeventfd_flag_nr_virtio_ccw_notify) + +#define KVM_IOEVENTFD_VALID_FLAG_MASK ((1 << kvm_ioeventfd_flag_nr_max) - 1) + +struct kvm_ioeventfd { + __u64 datamatch; + __u64 addr; /* legal pio/mmio address */ + __u32 len; /* 1, 2, 4, or 8 bytes; or 0 to ignore length */ + __s32 fd; + __u32 flags; + __u8 pad[36]; +}; + +#define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0) +#define KVM_X86_DISABLE_EXITS_HLT (1 << 1) +#define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2) +#define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3) +#define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \ + KVM_X86_DISABLE_EXITS_HLT | \ + KVM_X86_DISABLE_EXITS_PAUSE | \ + KVM_X86_DISABLE_EXITS_CSTATE) + +/* for KVM_ENABLE_CAP */ +struct kvm_enable_cap { + /* in */ + __u32 cap; + __u32 flags; + __u64 args[4]; + __u8 pad[64]; +}; + +/* for KVM_PPC_GET_PVINFO */ + +#define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0) + +struct kvm_ppc_pvinfo { + /* out */ + __u32 flags; + __u32 hcall[4]; + __u8 pad[108]; +}; + +/* for KVM_PPC_GET_SMMU_INFO */ +#define KVM_PPC_PAGE_SIZES_MAX_SZ 8 + +struct kvm_ppc_one_page_size { + __u32 page_shift; /* Page shift (or 0) */ + __u32 pte_enc; /* Encoding in the HPTE (>>12) */ +}; + +struct kvm_ppc_one_seg_page_size { + __u32 page_shift; /* Base page shift of segment (or 0) */ + __u32 slb_enc; /* SLB encoding for BookS */ + struct kvm_ppc_one_page_size enc[KVM_PPC_PAGE_SIZES_MAX_SZ]; +}; + +#define KVM_PPC_PAGE_SIZES_REAL 0x00000001 +#define KVM_PPC_1T_SEGMENTS 0x00000002 +#define KVM_PPC_NO_HASH 0x00000004 + +struct kvm_ppc_smmu_info { + __u64 flags; + __u32 slb_size; + __u16 data_keys; /* # storage keys supported for data */ + __u16 instr_keys; /* # storage keys supported for instructions */ + struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ]; +}; + +/* for KVM_PPC_RESIZE_HPT_{PREPARE,COMMIT} */ +struct kvm_ppc_resize_hpt { + __u64 flags; + __u32 shift; + __u32 pad; +}; + +#define KVMIO 0xAE + +/* machine type bits, to be used as argument to KVM_CREATE_VM */ +#define KVM_VM_S390_UCONTROL 1 + +/* on ppc, 0 indicate default, 1 should force HV and 2 PR */ +#define KVM_VM_PPC_HV 1 +#define KVM_VM_PPC_PR 2 + +/* on MIPS, 0 indicates auto, 1 forces VZ ASE, 2 forces trap & emulate */ +#define KVM_VM_MIPS_AUTO 0 +#define KVM_VM_MIPS_VZ 1 +#define KVM_VM_MIPS_TE 2 + +#define KVM_S390_SIE_PAGE_OFFSET 1 + +/* + * On arm64, machine type can be used to request the physical + * address size for the VM. Bits[7-0] are reserved for the guest + * PA size shift (i.e, log2(PA_Size)). For backward compatibility, + * value 0 implies the default IPA size, 40bits. + */ +#define KVM_VM_TYPE_ARM_IPA_SIZE_MASK 0xffULL +#define KVM_VM_TYPE_ARM_IPA_SIZE(x) \ + ((x) & KVM_VM_TYPE_ARM_IPA_SIZE_MASK) +/* + * ioctls for /dev/kvm fds: + */ +#define KVM_GET_API_VERSION _IO(KVMIO, 0x00) +#define KVM_CREATE_VM _IO(KVMIO, 0x01) /* returns a VM fd */ +#define KVM_GET_MSR_INDEX_LIST _IOWR(KVMIO, 0x02, struct kvm_msr_list) + +#define KVM_S390_ENABLE_SIE _IO(KVMIO, 0x06) +/* + * Check if a kvm extension is available. Argument is extension number, + * return is 1 (yes) or 0 (no, sorry). + */ +#define KVM_CHECK_EXTENSION _IO(KVMIO, 0x03) +/* + * Get size for mmap(vcpu_fd) + */ +#define KVM_GET_VCPU_MMAP_SIZE _IO(KVMIO, 0x04) /* in bytes */ +#define KVM_GET_SUPPORTED_CPUID _IOWR(KVMIO, 0x05, struct kvm_cpuid2) +#define KVM_TRACE_ENABLE __KVM_DEPRECATED_MAIN_W_0x06 +#define KVM_TRACE_PAUSE __KVM_DEPRECATED_MAIN_0x07 +#define KVM_TRACE_DISABLE __KVM_DEPRECATED_MAIN_0x08 +#define KVM_GET_EMULATED_CPUID _IOWR(KVMIO, 0x09, struct kvm_cpuid2) +#define KVM_GET_MSR_FEATURE_INDEX_LIST _IOWR(KVMIO, 0x0a, struct kvm_msr_list) + +/* + * Extension capability list. + */ +#define KVM_CAP_IRQCHIP 0 +#define KVM_CAP_HLT 1 +#define KVM_CAP_MMU_SHADOW_CACHE_CONTROL 2 +#define KVM_CAP_USER_MEMORY 3 +#define KVM_CAP_SET_TSS_ADDR 4 +#define KVM_CAP_VAPIC 6 +#define KVM_CAP_EXT_CPUID 7 +#define KVM_CAP_CLOCKSOURCE 8 +#define KVM_CAP_NR_VCPUS 9 /* returns recommended max vcpus per vm */ +#define KVM_CAP_NR_MEMSLOTS 10 /* returns max memory slots per vm */ +#define KVM_CAP_PIT 11 +#define KVM_CAP_NOP_IO_DELAY 12 +#define KVM_CAP_PV_MMU 13 +#define KVM_CAP_MP_STATE 14 +#define KVM_CAP_COALESCED_MMIO 15 +#define KVM_CAP_SYNC_MMU 16 /* Changes to host mmap are reflected in guest */ +#define KVM_CAP_IOMMU 18 +/* Bug in KVM_SET_USER_MEMORY_REGION fixed: */ +#define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21 +#define KVM_CAP_USER_NMI 22 +#ifdef __KVM_HAVE_GUEST_DEBUG +#define KVM_CAP_SET_GUEST_DEBUG 23 +#endif +#ifdef __KVM_HAVE_PIT +#define KVM_CAP_REINJECT_CONTROL 24 +#endif +#define KVM_CAP_IRQ_ROUTING 25 +#define KVM_CAP_IRQ_INJECT_STATUS 26 +#define KVM_CAP_ASSIGN_DEV_IRQ 29 +/* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */ +#define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30 +#ifdef __KVM_HAVE_MCE +#define KVM_CAP_MCE 31 +#endif +#define KVM_CAP_IRQFD 32 +#ifdef __KVM_HAVE_PIT +#define KVM_CAP_PIT2 33 +#endif +#define KVM_CAP_SET_BOOT_CPU_ID 34 +#ifdef __KVM_HAVE_PIT_STATE2 +#define KVM_CAP_PIT_STATE2 35 +#endif +#define KVM_CAP_IOEVENTFD 36 +#define KVM_CAP_SET_IDENTITY_MAP_ADDR 37 +#ifdef __KVM_HAVE_XEN_HVM +#define KVM_CAP_XEN_HVM 38 +#endif +#define KVM_CAP_ADJUST_CLOCK 39 +#define KVM_CAP_INTERNAL_ERROR_DATA 40 +#ifdef __KVM_HAVE_VCPU_EVENTS +#define KVM_CAP_VCPU_EVENTS 41 +#endif +#define KVM_CAP_S390_PSW 42 +#define KVM_CAP_PPC_SEGSTATE 43 +#define KVM_CAP_HYPERV 44 +#define KVM_CAP_HYPERV_VAPIC 45 +#define KVM_CAP_HYPERV_SPIN 46 +#define KVM_CAP_PCI_SEGMENT 47 +#define KVM_CAP_PPC_PAIRED_SINGLES 48 +#define KVM_CAP_INTR_SHADOW 49 +#ifdef __KVM_HAVE_DEBUGREGS +#define KVM_CAP_DEBUGREGS 50 +#endif +#define KVM_CAP_X86_ROBUST_SINGLESTEP 51 +#define KVM_CAP_PPC_OSI 52 +#define KVM_CAP_PPC_UNSET_IRQ 53 +#define KVM_CAP_ENABLE_CAP 54 +#ifdef __KVM_HAVE_XSAVE +#define KVM_CAP_XSAVE 55 +#endif +#ifdef __KVM_HAVE_XCRS +#define KVM_CAP_XCRS 56 +#endif +#define KVM_CAP_PPC_GET_PVINFO 57 +#define KVM_CAP_PPC_IRQ_LEVEL 58 +#define KVM_CAP_ASYNC_PF 59 +#define KVM_CAP_TSC_CONTROL 60 +#define KVM_CAP_GET_TSC_KHZ 61 +#define KVM_CAP_PPC_BOOKE_SREGS 62 +#define KVM_CAP_SPAPR_TCE 63 +#define KVM_CAP_PPC_SMT 64 +#define KVM_CAP_PPC_RMA 65 +#define KVM_CAP_MAX_VCPUS 66 /* returns max vcpus per vm */ +#define KVM_CAP_PPC_HIOR 67 +#define KVM_CAP_PPC_PAPR 68 +#define KVM_CAP_SW_TLB 69 +#define KVM_CAP_ONE_REG 70 +#define KVM_CAP_S390_GMAP 71 +#define KVM_CAP_TSC_DEADLINE_TIMER 72 +#define KVM_CAP_S390_UCONTROL 73 +#define KVM_CAP_SYNC_REGS 74 +#define KVM_CAP_PCI_2_3 75 +#define KVM_CAP_KVMCLOCK_CTRL 76 +#define KVM_CAP_SIGNAL_MSI 77 +#define KVM_CAP_PPC_GET_SMMU_INFO 78 +#define KVM_CAP_S390_COW 79 +#define KVM_CAP_PPC_ALLOC_HTAB 80 +#define KVM_CAP_READONLY_MEM 81 +#define KVM_CAP_IRQFD_RESAMPLE 82 +#define KVM_CAP_PPC_BOOKE_WATCHDOG 83 +#define KVM_CAP_PPC_HTAB_FD 84 +#define KVM_CAP_S390_CSS_SUPPORT 85 +#define KVM_CAP_PPC_EPR 86 +#define KVM_CAP_ARM_PSCI 87 +#define KVM_CAP_ARM_SET_DEVICE_ADDR 88 +#define KVM_CAP_DEVICE_CTRL 89 +#define KVM_CAP_IRQ_MPIC 90 +#define KVM_CAP_PPC_RTAS 91 +#define KVM_CAP_IRQ_XICS 92 +#define KVM_CAP_ARM_EL1_32BIT 93 +#define KVM_CAP_SPAPR_MULTITCE 94 +#define KVM_CAP_EXT_EMUL_CPUID 95 +#define KVM_CAP_HYPERV_TIME 96 +#define KVM_CAP_IOAPIC_POLARITY_IGNORED 97 +#define KVM_CAP_ENABLE_CAP_VM 98 +#define KVM_CAP_S390_IRQCHIP 99 +#define KVM_CAP_IOEVENTFD_NO_LENGTH 100 +#define KVM_CAP_VM_ATTRIBUTES 101 +#define KVM_CAP_ARM_PSCI_0_2 102 +#define KVM_CAP_PPC_FIXUP_HCALL 103 +#define KVM_CAP_PPC_ENABLE_HCALL 104 +#define KVM_CAP_CHECK_EXTENSION_VM 105 +#define KVM_CAP_S390_USER_SIGP 106 +#define KVM_CAP_S390_VECTOR_REGISTERS 107 +#define KVM_CAP_S390_MEM_OP 108 +#define KVM_CAP_S390_USER_STSI 109 +#define KVM_CAP_S390_SKEYS 110 +#define KVM_CAP_MIPS_FPU 111 +#define KVM_CAP_MIPS_MSA 112 +#define KVM_CAP_S390_INJECT_IRQ 113 +#define KVM_CAP_S390_IRQ_STATE 114 +#define KVM_CAP_PPC_HWRNG 115 +#define KVM_CAP_DISABLE_QUIRKS 116 +#define KVM_CAP_X86_SMM 117 +#define KVM_CAP_MULTI_ADDRESS_SPACE 118 +#define KVM_CAP_GUEST_DEBUG_HW_BPS 119 +#define KVM_CAP_GUEST_DEBUG_HW_WPS 120 +#define KVM_CAP_SPLIT_IRQCHIP 121 +#define KVM_CAP_IOEVENTFD_ANY_LENGTH 122 +#define KVM_CAP_HYPERV_SYNIC 123 +#define KVM_CAP_S390_RI 124 +#define KVM_CAP_SPAPR_TCE_64 125 +#define KVM_CAP_ARM_PMU_V3 126 +#define KVM_CAP_VCPU_ATTRIBUTES 127 +#define KVM_CAP_MAX_VCPU_ID 128 +#define KVM_CAP_X2APIC_API 129 +#define KVM_CAP_S390_USER_INSTR0 130 +#define KVM_CAP_MSI_DEVID 131 +#define KVM_CAP_PPC_HTM 132 +#define KVM_CAP_SPAPR_RESIZE_HPT 133 +#define KVM_CAP_PPC_MMU_RADIX 134 +#define KVM_CAP_PPC_MMU_HASH_V3 135 +#define KVM_CAP_IMMEDIATE_EXIT 136 +#define KVM_CAP_MIPS_VZ 137 +#define KVM_CAP_MIPS_TE 138 +#define KVM_CAP_MIPS_64BIT 139 +#define KVM_CAP_S390_GS 140 +#define KVM_CAP_S390_AIS 141 +#define KVM_CAP_SPAPR_TCE_VFIO 142 +#define KVM_CAP_X86_DISABLE_EXITS 143 +#define KVM_CAP_ARM_USER_IRQ 144 +#define KVM_CAP_S390_CMMA_MIGRATION 145 +#define KVM_CAP_PPC_FWNMI 146 +#define KVM_CAP_PPC_SMT_POSSIBLE 147 +#define KVM_CAP_HYPERV_SYNIC2 148 +#define KVM_CAP_HYPERV_VP_INDEX 149 +#define KVM_CAP_S390_AIS_MIGRATION 150 +#define KVM_CAP_PPC_GET_CPU_CHAR 151 +#define KVM_CAP_S390_BPB 152 +#define KVM_CAP_GET_MSR_FEATURES 153 +#define KVM_CAP_HYPERV_EVENTFD 154 +#define KVM_CAP_HYPERV_TLBFLUSH 155 +#define KVM_CAP_S390_HPAGE_1M 156 +#define KVM_CAP_NESTED_STATE 157 +#define KVM_CAP_ARM_INJECT_SERROR_ESR 158 +#define KVM_CAP_MSR_PLATFORM_INFO 159 +#define KVM_CAP_PPC_NESTED_HV 160 +#define KVM_CAP_HYPERV_SEND_IPI 161 +#define KVM_CAP_COALESCED_PIO 162 +#define KVM_CAP_HYPERV_ENLIGHTENED_VMCS 163 +#define KVM_CAP_EXCEPTION_PAYLOAD 164 +#define KVM_CAP_ARM_VM_IPA_SIZE 165 +#define KVM_CAP_MANUAL_DIRTY_LOG_PROTECT 166 /* Obsolete */ +#define KVM_CAP_HYPERV_CPUID 167 +#define KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 168 +#define KVM_CAP_PPC_IRQ_XIVE 169 +#define KVM_CAP_ARM_SVE 170 +#define KVM_CAP_ARM_PTRAUTH_ADDRESS 171 +#define KVM_CAP_ARM_PTRAUTH_GENERIC 172 +#define KVM_CAP_PMU_EVENT_FILTER 173 +#define KVM_CAP_ARM_IRQ_LINE_LAYOUT_2 174 +#define KVM_CAP_HYPERV_DIRECT_TLBFLUSH 175 +#define KVM_CAP_PPC_GUEST_DEBUG_SSTEP 176 +#define KVM_CAP_ARM_NISV_TO_USER 177 +#define KVM_CAP_ARM_INJECT_EXT_DABT 178 +#define KVM_CAP_S390_VCPU_RESETS 179 +#define KVM_CAP_S390_PROTECTED 180 +#define KVM_CAP_PPC_SECURE_GUEST 181 +#define KVM_CAP_HALT_POLL 182 +#define KVM_CAP_ASYNC_PF_INT 183 +#define KVM_CAP_LAST_CPU 184 +#define KVM_CAP_SMALLER_MAXPHYADDR 185 +#define KVM_CAP_S390_DIAG318 186 +#define KVM_CAP_STEAL_TIME 187 +#define KVM_CAP_X86_USER_SPACE_MSR 188 +#define KVM_CAP_X86_MSR_FILTER 189 +#define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190 +#define KVM_CAP_SYS_HYPERV_CPUID 191 +#define KVM_CAP_DIRTY_LOG_RING 192 +#define KVM_CAP_X86_BUS_LOCK_EXIT 193 +#define KVM_CAP_PPC_DAWR1 194 +#define KVM_CAP_SET_GUEST_DEBUG2 195 +#define KVM_CAP_SGX_ATTRIBUTE 196 +#define KVM_CAP_VM_COPY_ENC_CONTEXT_FROM 197 +#define KVM_CAP_PTP_KVM 198 +#define KVM_CAP_HYPERV_ENFORCE_CPUID 199 +#define KVM_CAP_SREGS2 200 +#define KVM_CAP_EXIT_HYPERCALL 201 +#define KVM_CAP_PPC_RPT_INVALIDATE 202 +#define KVM_CAP_BINARY_STATS_FD 203 +#define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204 +#define KVM_CAP_ARM_MTE 205 + +#ifdef KVM_CAP_IRQ_ROUTING + +struct kvm_irq_routing_irqchip { + __u32 irqchip; + __u32 pin; +}; + +struct kvm_irq_routing_msi { + __u32 address_lo; + __u32 address_hi; + __u32 data; + union { + __u32 pad; + __u32 devid; + }; +}; + +struct kvm_irq_routing_s390_adapter { + __u64 ind_addr; + __u64 summary_addr; + __u64 ind_offset; + __u32 summary_offset; + __u32 adapter_id; +}; + +struct kvm_irq_routing_hv_sint { + __u32 vcpu; + __u32 sint; +}; + +/* gsi routing entry types */ +#define KVM_IRQ_ROUTING_IRQCHIP 1 +#define KVM_IRQ_ROUTING_MSI 2 +#define KVM_IRQ_ROUTING_S390_ADAPTER 3 +#define KVM_IRQ_ROUTING_HV_SINT 4 + +struct kvm_irq_routing_entry { + __u32 gsi; + __u32 type; + __u32 flags; + __u32 pad; + union { + struct kvm_irq_routing_irqchip irqchip; + struct kvm_irq_routing_msi msi; + struct kvm_irq_routing_s390_adapter adapter; + struct kvm_irq_routing_hv_sint hv_sint; + __u32 pad[8]; + } u; +}; + +struct kvm_irq_routing { + __u32 nr; + __u32 flags; + struct kvm_irq_routing_entry entries[0]; +}; + +#endif + +#ifdef KVM_CAP_MCE +/* x86 MCE */ +struct kvm_x86_mce { + __u64 status; + __u64 addr; + __u64 misc; + __u64 mcg_status; + __u8 bank; + __u8 pad1[7]; + __u64 pad2[3]; +}; +#endif + +#ifdef KVM_CAP_XEN_HVM +#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0) +#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1) +#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2) +#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3) + +struct kvm_xen_hvm_config { + __u32 flags; + __u32 msr; + __u64 blob_addr_32; + __u64 blob_addr_64; + __u8 blob_size_32; + __u8 blob_size_64; + __u8 pad2[30]; +}; +#endif + +#define KVM_IRQFD_FLAG_DEASSIGN (1 << 0) +/* + * Available with KVM_CAP_IRQFD_RESAMPLE + * + * KVM_IRQFD_FLAG_RESAMPLE indicates resamplefd is valid and specifies + * the irqfd to operate in resampling mode for level triggered interrupt + * emulation. See Documentation/virt/kvm/api.rst. + */ +#define KVM_IRQFD_FLAG_RESAMPLE (1 << 1) + +struct kvm_irqfd { + __u32 fd; + __u32 gsi; + __u32 flags; + __u32 resamplefd; + __u8 pad[16]; +}; + +/* For KVM_CAP_ADJUST_CLOCK */ + +/* Do not use 1, KVM_CHECK_EXTENSION returned it before we had flags. */ +#define KVM_CLOCK_TSC_STABLE 2 + +struct kvm_clock_data { + __u64 clock; + __u32 flags; + __u32 pad[9]; +}; + +/* For KVM_CAP_SW_TLB */ + +#define KVM_MMU_FSL_BOOKE_NOHV 0 +#define KVM_MMU_FSL_BOOKE_HV 1 + +struct kvm_config_tlb { + __u64 params; + __u64 array; + __u32 mmu_type; + __u32 array_len; +}; + +struct kvm_dirty_tlb { + __u64 bitmap; + __u32 num_dirty; +}; + +/* Available with KVM_CAP_ONE_REG */ + +#define KVM_REG_ARCH_MASK 0xff00000000000000ULL +#define KVM_REG_GENERIC 0x0000000000000000ULL + +/* + * Architecture specific registers are to be defined in arch headers and + * ORed with the arch identifier. + */ +#define KVM_REG_PPC 0x1000000000000000ULL +#define KVM_REG_X86 0x2000000000000000ULL +#define KVM_REG_IA64 0x3000000000000000ULL +#define KVM_REG_ARM 0x4000000000000000ULL +#define KVM_REG_S390 0x5000000000000000ULL +#define KVM_REG_ARM64 0x6000000000000000ULL +#define KVM_REG_MIPS 0x7000000000000000ULL +#define KVM_REG_RISCV 0x8000000000000000ULL + +#define KVM_REG_SIZE_SHIFT 52 +#define KVM_REG_SIZE_MASK 0x00f0000000000000ULL +#define KVM_REG_SIZE_U8 0x0000000000000000ULL +#define KVM_REG_SIZE_U16 0x0010000000000000ULL +#define KVM_REG_SIZE_U32 0x0020000000000000ULL +#define KVM_REG_SIZE_U64 0x0030000000000000ULL +#define KVM_REG_SIZE_U128 0x0040000000000000ULL +#define KVM_REG_SIZE_U256 0x0050000000000000ULL +#define KVM_REG_SIZE_U512 0x0060000000000000ULL +#define KVM_REG_SIZE_U1024 0x0070000000000000ULL +#define KVM_REG_SIZE_U2048 0x0080000000000000ULL + +struct kvm_reg_list { + __u64 n; /* number of regs */ + __u64 reg[0]; +}; + +struct kvm_one_reg { + __u64 id; + __u64 addr; +}; + +#define KVM_MSI_VALID_DEVID (1U << 0) +struct kvm_msi { + __u32 address_lo; + __u32 address_hi; + __u32 data; + __u32 flags; + __u32 devid; + __u8 pad[12]; +}; + +struct kvm_arm_device_addr { + __u64 id; + __u64 addr; +}; + +/* + * Device control API, available with KVM_CAP_DEVICE_CTRL + */ +#define KVM_CREATE_DEVICE_TEST 1 + +struct kvm_create_device { + __u32 type; /* in: KVM_DEV_TYPE_xxx */ + __u32 fd; /* out: device handle */ + __u32 flags; /* in: KVM_CREATE_DEVICE_xxx */ +}; + +struct kvm_device_attr { + __u32 flags; /* no flags currently defined */ + __u32 group; /* device-defined */ + __u64 attr; /* group-defined */ + __u64 addr; /* userspace address of attr data */ +}; + +#define KVM_DEV_VFIO_GROUP 1 +#define KVM_DEV_VFIO_GROUP_ADD 1 +#define KVM_DEV_VFIO_GROUP_DEL 2 +#define KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE 3 + +enum kvm_device_type { + KVM_DEV_TYPE_FSL_MPIC_20 = 1, +#define KVM_DEV_TYPE_FSL_MPIC_20 KVM_DEV_TYPE_FSL_MPIC_20 + KVM_DEV_TYPE_FSL_MPIC_42, +#define KVM_DEV_TYPE_FSL_MPIC_42 KVM_DEV_TYPE_FSL_MPIC_42 + KVM_DEV_TYPE_XICS, +#define KVM_DEV_TYPE_XICS KVM_DEV_TYPE_XICS + KVM_DEV_TYPE_VFIO, +#define KVM_DEV_TYPE_VFIO KVM_DEV_TYPE_VFIO + KVM_DEV_TYPE_ARM_VGIC_V2, +#define KVM_DEV_TYPE_ARM_VGIC_V2 KVM_DEV_TYPE_ARM_VGIC_V2 + KVM_DEV_TYPE_FLIC, +#define KVM_DEV_TYPE_FLIC KVM_DEV_TYPE_FLIC + KVM_DEV_TYPE_ARM_VGIC_V3, +#define KVM_DEV_TYPE_ARM_VGIC_V3 KVM_DEV_TYPE_ARM_VGIC_V3 + KVM_DEV_TYPE_ARM_VGIC_ITS, +#define KVM_DEV_TYPE_ARM_VGIC_ITS KVM_DEV_TYPE_ARM_VGIC_ITS + KVM_DEV_TYPE_XIVE, +#define KVM_DEV_TYPE_XIVE KVM_DEV_TYPE_XIVE + KVM_DEV_TYPE_ARM_PV_TIME, +#define KVM_DEV_TYPE_ARM_PV_TIME KVM_DEV_TYPE_ARM_PV_TIME + KVM_DEV_TYPE_MAX, +}; + +struct kvm_vfio_spapr_tce { + __s32 groupfd; + __s32 tablefd; +}; + +/* + * ioctls for VM fds + */ +#define KVM_SET_MEMORY_REGION _IOW(KVMIO, 0x40, struct kvm_memory_region) +/* + * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns + * a vcpu fd. + */ +#define KVM_CREATE_VCPU _IO(KVMIO, 0x41) +#define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log) +/* KVM_SET_MEMORY_ALIAS is obsolete: */ +#define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct kvm_memory_alias) +#define KVM_SET_NR_MMU_PAGES _IO(KVMIO, 0x44) +#define KVM_GET_NR_MMU_PAGES _IO(KVMIO, 0x45) +#define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46, \ + struct kvm_userspace_memory_region) +#define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47) +#define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64) + +/* enable ucontrol for s390 */ +struct kvm_s390_ucas_mapping { + __u64 user_addr; + __u64 vcpu_addr; + __u64 length; +}; +#define KVM_S390_UCAS_MAP _IOW(KVMIO, 0x50, struct kvm_s390_ucas_mapping) +#define KVM_S390_UCAS_UNMAP _IOW(KVMIO, 0x51, struct kvm_s390_ucas_mapping) +#define KVM_S390_VCPU_FAULT _IOW(KVMIO, 0x52, unsigned long) + +/* Device model IOC */ +#define KVM_CREATE_IRQCHIP _IO(KVMIO, 0x60) +#define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level) +#define KVM_GET_IRQCHIP _IOWR(KVMIO, 0x62, struct kvm_irqchip) +#define KVM_SET_IRQCHIP _IOR(KVMIO, 0x63, struct kvm_irqchip) +#define KVM_CREATE_PIT _IO(KVMIO, 0x64) +#define KVM_GET_PIT _IOWR(KVMIO, 0x65, struct kvm_pit_state) +#define KVM_SET_PIT _IOR(KVMIO, 0x66, struct kvm_pit_state) +#define KVM_IRQ_LINE_STATUS _IOWR(KVMIO, 0x67, struct kvm_irq_level) +#define KVM_REGISTER_COALESCED_MMIO \ + _IOW(KVMIO, 0x67, struct kvm_coalesced_mmio_zone) +#define KVM_UNREGISTER_COALESCED_MMIO \ + _IOW(KVMIO, 0x68, struct kvm_coalesced_mmio_zone) +#define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \ + struct kvm_assigned_pci_dev) +#define KVM_SET_GSI_ROUTING _IOW(KVMIO, 0x6a, struct kvm_irq_routing) +/* deprecated, replaced by KVM_ASSIGN_DEV_IRQ */ +#define KVM_ASSIGN_IRQ __KVM_DEPRECATED_VM_R_0x70 +#define KVM_ASSIGN_DEV_IRQ _IOW(KVMIO, 0x70, struct kvm_assigned_irq) +#define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71) +#define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \ + struct kvm_assigned_pci_dev) +#define KVM_ASSIGN_SET_MSIX_NR _IOW(KVMIO, 0x73, \ + struct kvm_assigned_msix_nr) +#define KVM_ASSIGN_SET_MSIX_ENTRY _IOW(KVMIO, 0x74, \ + struct kvm_assigned_msix_entry) +#define KVM_DEASSIGN_DEV_IRQ _IOW(KVMIO, 0x75, struct kvm_assigned_irq) +#define KVM_IRQFD _IOW(KVMIO, 0x76, struct kvm_irqfd) +#define KVM_CREATE_PIT2 _IOW(KVMIO, 0x77, struct kvm_pit_config) +#define KVM_SET_BOOT_CPU_ID _IO(KVMIO, 0x78) +#define KVM_IOEVENTFD _IOW(KVMIO, 0x79, struct kvm_ioeventfd) +#define KVM_XEN_HVM_CONFIG _IOW(KVMIO, 0x7a, struct kvm_xen_hvm_config) +#define KVM_SET_CLOCK _IOW(KVMIO, 0x7b, struct kvm_clock_data) +#define KVM_GET_CLOCK _IOR(KVMIO, 0x7c, struct kvm_clock_data) +/* Available with KVM_CAP_PIT_STATE2 */ +#define KVM_GET_PIT2 _IOR(KVMIO, 0x9f, struct kvm_pit_state2) +#define KVM_SET_PIT2 _IOW(KVMIO, 0xa0, struct kvm_pit_state2) +/* Available with KVM_CAP_PPC_GET_PVINFO */ +#define KVM_PPC_GET_PVINFO _IOW(KVMIO, 0xa1, struct kvm_ppc_pvinfo) +/* Available with KVM_CAP_TSC_CONTROL */ +#define KVM_SET_TSC_KHZ _IO(KVMIO, 0xa2) +#define KVM_GET_TSC_KHZ _IO(KVMIO, 0xa3) +/* Available with KVM_CAP_PCI_2_3 */ +#define KVM_ASSIGN_SET_INTX_MASK _IOW(KVMIO, 0xa4, \ + struct kvm_assigned_pci_dev) +/* Available with KVM_CAP_SIGNAL_MSI */ +#define KVM_SIGNAL_MSI _IOW(KVMIO, 0xa5, struct kvm_msi) +/* Available with KVM_CAP_PPC_GET_SMMU_INFO */ +#define KVM_PPC_GET_SMMU_INFO _IOR(KVMIO, 0xa6, struct kvm_ppc_smmu_info) +/* Available with KVM_CAP_PPC_ALLOC_HTAB */ +#define KVM_PPC_ALLOCATE_HTAB _IOWR(KVMIO, 0xa7, __u32) +#define KVM_CREATE_SPAPR_TCE _IOW(KVMIO, 0xa8, struct kvm_create_spapr_tce) +#define KVM_CREATE_SPAPR_TCE_64 _IOW(KVMIO, 0xa8, \ + struct kvm_create_spapr_tce_64) +/* Available with KVM_CAP_RMA */ +#define KVM_ALLOCATE_RMA _IOR(KVMIO, 0xa9, struct kvm_allocate_rma) +/* Available with KVM_CAP_PPC_HTAB_FD */ +#define KVM_PPC_GET_HTAB_FD _IOW(KVMIO, 0xaa, struct kvm_get_htab_fd) +/* Available with KVM_CAP_ARM_SET_DEVICE_ADDR */ +#define KVM_ARM_SET_DEVICE_ADDR _IOW(KVMIO, 0xab, struct kvm_arm_device_addr) +/* Available with KVM_CAP_PPC_RTAS */ +#define KVM_PPC_RTAS_DEFINE_TOKEN _IOW(KVMIO, 0xac, struct kvm_rtas_token_args) +/* Available with KVM_CAP_SPAPR_RESIZE_HPT */ +#define KVM_PPC_RESIZE_HPT_PREPARE _IOR(KVMIO, 0xad, struct kvm_ppc_resize_hpt) +#define KVM_PPC_RESIZE_HPT_COMMIT _IOR(KVMIO, 0xae, struct kvm_ppc_resize_hpt) +/* Available with KVM_CAP_PPC_RADIX_MMU or KVM_CAP_PPC_HASH_MMU_V3 */ +#define KVM_PPC_CONFIGURE_V3_MMU _IOW(KVMIO, 0xaf, struct kvm_ppc_mmuv3_cfg) +/* Available with KVM_CAP_PPC_RADIX_MMU */ +#define KVM_PPC_GET_RMMU_INFO _IOW(KVMIO, 0xb0, struct kvm_ppc_rmmu_info) +/* Available with KVM_CAP_PPC_GET_CPU_CHAR */ +#define KVM_PPC_GET_CPU_CHAR _IOR(KVMIO, 0xb1, struct kvm_ppc_cpu_char) +/* Available with KVM_CAP_PMU_EVENT_FILTER */ +#define KVM_SET_PMU_EVENT_FILTER _IOW(KVMIO, 0xb2, struct kvm_pmu_event_filter) +#define KVM_PPC_SVM_OFF _IO(KVMIO, 0xb3) +#define KVM_ARM_MTE_COPY_TAGS _IOR(KVMIO, 0xb4, struct kvm_arm_copy_mte_tags) + +/* ioctl for vm fd */ +#define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device) + +/* ioctls for fds returned by KVM_CREATE_DEVICE */ +#define KVM_SET_DEVICE_ATTR _IOW(KVMIO, 0xe1, struct kvm_device_attr) +#define KVM_GET_DEVICE_ATTR _IOW(KVMIO, 0xe2, struct kvm_device_attr) +#define KVM_HAS_DEVICE_ATTR _IOW(KVMIO, 0xe3, struct kvm_device_attr) + +/* + * ioctls for vcpu fds + */ +#define KVM_RUN _IO(KVMIO, 0x80) +#define KVM_GET_REGS _IOR(KVMIO, 0x81, struct kvm_regs) +#define KVM_SET_REGS _IOW(KVMIO, 0x82, struct kvm_regs) +#define KVM_GET_SREGS _IOR(KVMIO, 0x83, struct kvm_sregs) +#define KVM_SET_SREGS _IOW(KVMIO, 0x84, struct kvm_sregs) +#define KVM_TRANSLATE _IOWR(KVMIO, 0x85, struct kvm_translation) +#define KVM_INTERRUPT _IOW(KVMIO, 0x86, struct kvm_interrupt) +/* KVM_DEBUG_GUEST is no longer supported, use KVM_SET_GUEST_DEBUG instead */ +#define KVM_DEBUG_GUEST __KVM_DEPRECATED_VCPU_W_0x87 +#define KVM_GET_MSRS _IOWR(KVMIO, 0x88, struct kvm_msrs) +#define KVM_SET_MSRS _IOW(KVMIO, 0x89, struct kvm_msrs) +#define KVM_SET_CPUID _IOW(KVMIO, 0x8a, struct kvm_cpuid) +#define KVM_SET_SIGNAL_MASK _IOW(KVMIO, 0x8b, struct kvm_signal_mask) +#define KVM_GET_FPU _IOR(KVMIO, 0x8c, struct kvm_fpu) +#define KVM_SET_FPU _IOW(KVMIO, 0x8d, struct kvm_fpu) +#define KVM_GET_LAPIC _IOR(KVMIO, 0x8e, struct kvm_lapic_state) +#define KVM_SET_LAPIC _IOW(KVMIO, 0x8f, struct kvm_lapic_state) +#define KVM_SET_CPUID2 _IOW(KVMIO, 0x90, struct kvm_cpuid2) +#define KVM_GET_CPUID2 _IOWR(KVMIO, 0x91, struct kvm_cpuid2) +/* Available with KVM_CAP_VAPIC */ +#define KVM_TPR_ACCESS_REPORTING _IOWR(KVMIO, 0x92, struct kvm_tpr_access_ctl) +/* Available with KVM_CAP_VAPIC */ +#define KVM_SET_VAPIC_ADDR _IOW(KVMIO, 0x93, struct kvm_vapic_addr) +/* valid for virtual machine (for floating interrupt)_and_ vcpu */ +#define KVM_S390_INTERRUPT _IOW(KVMIO, 0x94, struct kvm_s390_interrupt) +/* store status for s390 */ +#define KVM_S390_STORE_STATUS_NOADDR (-1ul) +#define KVM_S390_STORE_STATUS_PREFIXED (-2ul) +#define KVM_S390_STORE_STATUS _IOW(KVMIO, 0x95, unsigned long) +/* initial ipl psw for s390 */ +#define KVM_S390_SET_INITIAL_PSW _IOW(KVMIO, 0x96, struct kvm_s390_psw) +/* initial reset for s390 */ +#define KVM_S390_INITIAL_RESET _IO(KVMIO, 0x97) +#define KVM_GET_MP_STATE _IOR(KVMIO, 0x98, struct kvm_mp_state) +#define KVM_SET_MP_STATE _IOW(KVMIO, 0x99, struct kvm_mp_state) +/* Available with KVM_CAP_USER_NMI */ +#define KVM_NMI _IO(KVMIO, 0x9a) +/* Available with KVM_CAP_SET_GUEST_DEBUG */ +#define KVM_SET_GUEST_DEBUG _IOW(KVMIO, 0x9b, struct kvm_guest_debug) +/* MCE for x86 */ +#define KVM_X86_SETUP_MCE _IOW(KVMIO, 0x9c, __u64) +#define KVM_X86_GET_MCE_CAP_SUPPORTED _IOR(KVMIO, 0x9d, __u64) +#define KVM_X86_SET_MCE _IOW(KVMIO, 0x9e, struct kvm_x86_mce) +/* Available with KVM_CAP_VCPU_EVENTS */ +#define KVM_GET_VCPU_EVENTS _IOR(KVMIO, 0x9f, struct kvm_vcpu_events) +#define KVM_SET_VCPU_EVENTS _IOW(KVMIO, 0xa0, struct kvm_vcpu_events) +/* Available with KVM_CAP_DEBUGREGS */ +#define KVM_GET_DEBUGREGS _IOR(KVMIO, 0xa1, struct kvm_debugregs) +#define KVM_SET_DEBUGREGS _IOW(KVMIO, 0xa2, struct kvm_debugregs) +/* + * vcpu version available with KVM_ENABLE_CAP + * vm version available with KVM_CAP_ENABLE_CAP_VM + */ +#define KVM_ENABLE_CAP _IOW(KVMIO, 0xa3, struct kvm_enable_cap) +/* Available with KVM_CAP_XSAVE */ +#define KVM_GET_XSAVE _IOR(KVMIO, 0xa4, struct kvm_xsave) +#define KVM_SET_XSAVE _IOW(KVMIO, 0xa5, struct kvm_xsave) +/* Available with KVM_CAP_XCRS */ +#define KVM_GET_XCRS _IOR(KVMIO, 0xa6, struct kvm_xcrs) +#define KVM_SET_XCRS _IOW(KVMIO, 0xa7, struct kvm_xcrs) +/* Available with KVM_CAP_SW_TLB */ +#define KVM_DIRTY_TLB _IOW(KVMIO, 0xaa, struct kvm_dirty_tlb) +/* Available with KVM_CAP_ONE_REG */ +#define KVM_GET_ONE_REG _IOW(KVMIO, 0xab, struct kvm_one_reg) +#define KVM_SET_ONE_REG _IOW(KVMIO, 0xac, struct kvm_one_reg) +/* VM is being stopped by host */ +#define KVM_KVMCLOCK_CTRL _IO(KVMIO, 0xad) +#define KVM_ARM_VCPU_INIT _IOW(KVMIO, 0xae, struct kvm_vcpu_init) +#define KVM_ARM_PREFERRED_TARGET _IOR(KVMIO, 0xaf, struct kvm_vcpu_init) +#define KVM_GET_REG_LIST _IOWR(KVMIO, 0xb0, struct kvm_reg_list) +/* Available with KVM_CAP_S390_MEM_OP */ +#define KVM_S390_MEM_OP _IOW(KVMIO, 0xb1, struct kvm_s390_mem_op) +/* Available with KVM_CAP_S390_SKEYS */ +#define KVM_S390_GET_SKEYS _IOW(KVMIO, 0xb2, struct kvm_s390_skeys) +#define KVM_S390_SET_SKEYS _IOW(KVMIO, 0xb3, struct kvm_s390_skeys) +/* Available with KVM_CAP_S390_INJECT_IRQ */ +#define KVM_S390_IRQ _IOW(KVMIO, 0xb4, struct kvm_s390_irq) +/* Available with KVM_CAP_S390_IRQ_STATE */ +#define KVM_S390_SET_IRQ_STATE _IOW(KVMIO, 0xb5, struct kvm_s390_irq_state) +#define KVM_S390_GET_IRQ_STATE _IOW(KVMIO, 0xb6, struct kvm_s390_irq_state) +/* Available with KVM_CAP_X86_SMM */ +#define KVM_SMI _IO(KVMIO, 0xb7) +/* Available with KVM_CAP_S390_CMMA_MIGRATION */ +#define KVM_S390_GET_CMMA_BITS _IOWR(KVMIO, 0xb8, struct kvm_s390_cmma_log) +#define KVM_S390_SET_CMMA_BITS _IOW(KVMIO, 0xb9, struct kvm_s390_cmma_log) +/* Memory Encryption Commands */ +#define KVM_MEMORY_ENCRYPT_OP _IOWR(KVMIO, 0xba, unsigned long) + +struct kvm_enc_region { + __u64 addr; + __u64 size; +}; + +#define KVM_MEMORY_ENCRYPT_REG_REGION _IOR(KVMIO, 0xbb, struct kvm_enc_region) +#define KVM_MEMORY_ENCRYPT_UNREG_REGION _IOR(KVMIO, 0xbc, struct kvm_enc_region) + +/* Available with KVM_CAP_HYPERV_EVENTFD */ +#define KVM_HYPERV_EVENTFD _IOW(KVMIO, 0xbd, struct kvm_hyperv_eventfd) + +/* Available with KVM_CAP_NESTED_STATE */ +#define KVM_GET_NESTED_STATE _IOWR(KVMIO, 0xbe, struct kvm_nested_state) +#define KVM_SET_NESTED_STATE _IOW(KVMIO, 0xbf, struct kvm_nested_state) + +/* Available with KVM_CAP_MANUAL_DIRTY_LOG_PROTECT_2 */ +#define KVM_CLEAR_DIRTY_LOG _IOWR(KVMIO, 0xc0, struct kvm_clear_dirty_log) + +/* Available with KVM_CAP_HYPERV_CPUID (vcpu) / KVM_CAP_SYS_HYPERV_CPUID (system) */ +#define KVM_GET_SUPPORTED_HV_CPUID _IOWR(KVMIO, 0xc1, struct kvm_cpuid2) + +/* Available with KVM_CAP_ARM_SVE */ +#define KVM_ARM_VCPU_FINALIZE _IOW(KVMIO, 0xc2, int) + +/* Available with KVM_CAP_S390_VCPU_RESETS */ +#define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3) +#define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4) + +struct kvm_s390_pv_sec_parm { + __u64 origin; + __u64 length; +}; + +struct kvm_s390_pv_unp { + __u64 addr; + __u64 size; + __u64 tweak; +}; + +enum pv_cmd_id { + KVM_PV_ENABLE, + KVM_PV_DISABLE, + KVM_PV_SET_SEC_PARMS, + KVM_PV_UNPACK, + KVM_PV_VERIFY, + KVM_PV_PREP_RESET, + KVM_PV_UNSHARE_ALL, +}; + +struct kvm_pv_cmd { + __u32 cmd; /* Command to be executed */ + __u16 rc; /* Ultravisor return code */ + __u16 rrc; /* Ultravisor return reason code */ + __u64 data; /* Data or address */ + __u32 flags; /* flags for future extensions. Must be 0 for now */ + __u32 reserved[3]; +}; + +/* Available with KVM_CAP_S390_PROTECTED */ +#define KVM_S390_PV_COMMAND _IOWR(KVMIO, 0xc5, struct kvm_pv_cmd) + +/* Available with KVM_CAP_X86_MSR_FILTER */ +#define KVM_X86_SET_MSR_FILTER _IOW(KVMIO, 0xc6, struct kvm_msr_filter) + +/* Available with KVM_CAP_DIRTY_LOG_RING */ +#define KVM_RESET_DIRTY_RINGS _IO(KVMIO, 0xc7) + +/* Per-VM Xen attributes */ +#define KVM_XEN_HVM_GET_ATTR _IOWR(KVMIO, 0xc8, struct kvm_xen_hvm_attr) +#define KVM_XEN_HVM_SET_ATTR _IOW(KVMIO, 0xc9, struct kvm_xen_hvm_attr) + +struct kvm_xen_hvm_attr { + __u16 type; + __u16 pad[3]; + union { + __u8 long_mode; + __u8 vector; + struct { + __u64 gfn; + } shared_info; + __u64 pad[8]; + } u; +}; + +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ +#define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0 +#define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1 +#define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR 0x2 + +/* Per-vCPU Xen attributes */ +#define KVM_XEN_VCPU_GET_ATTR _IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr) +#define KVM_XEN_VCPU_SET_ATTR _IOW(KVMIO, 0xcb, struct kvm_xen_vcpu_attr) + +#define KVM_GET_SREGS2 _IOR(KVMIO, 0xcc, struct kvm_sregs2) +#define KVM_SET_SREGS2 _IOW(KVMIO, 0xcd, struct kvm_sregs2) + +struct kvm_xen_vcpu_attr { + __u16 type; + __u16 pad[3]; + union { + __u64 gpa; + __u64 pad[8]; + struct { + __u64 state; + __u64 state_entry_time; + __u64 time_running; + __u64 time_runnable; + __u64 time_blocked; + __u64 time_offline; + } runstate; + } u; +}; + +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ +#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO 0x0 +#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO 0x1 +#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR 0x2 +#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT 0x3 +#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA 0x4 +#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5 + +/* Secure Encrypted Virtualization command */ +enum sev_cmd_id { + /* Guest initialization commands */ + KVM_SEV_INIT = 0, + KVM_SEV_ES_INIT, + /* Guest launch commands */ + KVM_SEV_LAUNCH_START, + KVM_SEV_LAUNCH_UPDATE_DATA, + KVM_SEV_LAUNCH_UPDATE_VMSA, + KVM_SEV_LAUNCH_SECRET, + KVM_SEV_LAUNCH_MEASURE, + KVM_SEV_LAUNCH_FINISH, + /* Guest migration commands (outgoing) */ + KVM_SEV_SEND_START, + KVM_SEV_SEND_UPDATE_DATA, + KVM_SEV_SEND_UPDATE_VMSA, + KVM_SEV_SEND_FINISH, + /* Guest migration commands (incoming) */ + KVM_SEV_RECEIVE_START, + KVM_SEV_RECEIVE_UPDATE_DATA, + KVM_SEV_RECEIVE_UPDATE_VMSA, + KVM_SEV_RECEIVE_FINISH, + /* Guest status and debug commands */ + KVM_SEV_GUEST_STATUS, + KVM_SEV_DBG_DECRYPT, + KVM_SEV_DBG_ENCRYPT, + /* Guest certificates commands */ + KVM_SEV_CERT_EXPORT, + /* Attestation report */ + KVM_SEV_GET_ATTESTATION_REPORT, + /* Guest Migration Extension */ + KVM_SEV_SEND_CANCEL, + + KVM_SEV_NR_MAX, +}; + +struct kvm_sev_cmd { + __u32 id; + __u64 data; + __u32 error; + __u32 sev_fd; +}; + +struct kvm_sev_launch_start { + __u32 handle; + __u32 policy; + __u64 dh_uaddr; + __u32 dh_len; + __u64 session_uaddr; + __u32 session_len; +}; + +struct kvm_sev_launch_update_data { + __u64 uaddr; + __u32 len; +}; + + +struct kvm_sev_launch_secret { + __u64 hdr_uaddr; + __u32 hdr_len; + __u64 guest_uaddr; + __u32 guest_len; + __u64 trans_uaddr; + __u32 trans_len; +}; + +struct kvm_sev_launch_measure { + __u64 uaddr; + __u32 len; +}; + +struct kvm_sev_guest_status { + __u32 handle; + __u32 policy; + __u32 state; +}; + +struct kvm_sev_dbg { + __u64 src_uaddr; + __u64 dst_uaddr; + __u32 len; +}; + +struct kvm_sev_attestation_report { + __u8 mnonce[16]; + __u64 uaddr; + __u32 len; +}; + +struct kvm_sev_send_start { + __u32 policy; + __u64 pdh_cert_uaddr; + __u32 pdh_cert_len; + __u64 plat_certs_uaddr; + __u32 plat_certs_len; + __u64 amd_certs_uaddr; + __u32 amd_certs_len; + __u64 session_uaddr; + __u32 session_len; +}; + +struct kvm_sev_send_update_data { + __u64 hdr_uaddr; + __u32 hdr_len; + __u64 guest_uaddr; + __u32 guest_len; + __u64 trans_uaddr; + __u32 trans_len; +}; + +struct kvm_sev_receive_start { + __u32 handle; + __u32 policy; + __u64 pdh_uaddr; + __u32 pdh_len; + __u64 session_uaddr; + __u32 session_len; +}; + +struct kvm_sev_receive_update_data { + __u64 hdr_uaddr; + __u32 hdr_len; + __u64 guest_uaddr; + __u32 guest_len; + __u64 trans_uaddr; + __u32 trans_len; +}; + +#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) +#define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) +#define KVM_DEV_ASSIGN_MASK_INTX (1 << 2) + +struct kvm_assigned_pci_dev { + __u32 assigned_dev_id; + __u32 busnr; + __u32 devfn; + __u32 flags; + __u32 segnr; + union { + __u32 reserved[11]; + }; +}; + +#define KVM_DEV_IRQ_HOST_INTX (1 << 0) +#define KVM_DEV_IRQ_HOST_MSI (1 << 1) +#define KVM_DEV_IRQ_HOST_MSIX (1 << 2) + +#define KVM_DEV_IRQ_GUEST_INTX (1 << 8) +#define KVM_DEV_IRQ_GUEST_MSI (1 << 9) +#define KVM_DEV_IRQ_GUEST_MSIX (1 << 10) + +#define KVM_DEV_IRQ_HOST_MASK 0x00ff +#define KVM_DEV_IRQ_GUEST_MASK 0xff00 + +struct kvm_assigned_irq { + __u32 assigned_dev_id; + __u32 host_irq; /* ignored (legacy field) */ + __u32 guest_irq; + __u32 flags; + union { + __u32 reserved[12]; + }; +}; + +struct kvm_assigned_msix_nr { + __u32 assigned_dev_id; + __u16 entry_nr; + __u16 padding; +}; + +#define KVM_MAX_MSIX_PER_DEV 256 +struct kvm_assigned_msix_entry { + __u32 assigned_dev_id; + __u32 gsi; + __u16 entry; /* The index of entry in the MSI-X table */ + __u16 padding[3]; +}; + +#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) +#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1) + +/* Available with KVM_CAP_ARM_USER_IRQ */ + +/* Bits for run->s.regs.device_irq_level */ +#define KVM_ARM_DEV_EL1_VTIMER (1 << 0) +#define KVM_ARM_DEV_EL1_PTIMER (1 << 1) +#define KVM_ARM_DEV_PMU (1 << 2) + +struct kvm_hyperv_eventfd { + __u32 conn_id; + __s32 fd; + __u32 flags; + __u32 padding[3]; +}; + +#define KVM_HYPERV_CONN_ID_MASK 0x00ffffff +#define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0) + +#define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (1 << 0) +#define KVM_DIRTY_LOG_INITIALLY_SET (1 << 1) + +/* + * Arch needs to define the macro after implementing the dirty ring + * feature. KVM_DIRTY_LOG_PAGE_OFFSET should be defined as the + * starting page offset of the dirty ring structures. + */ +#ifndef KVM_DIRTY_LOG_PAGE_OFFSET +#define KVM_DIRTY_LOG_PAGE_OFFSET 0 +#endif + +/* + * KVM dirty GFN flags, defined as: + * + * |---------------+---------------+--------------| + * | bit 1 (reset) | bit 0 (dirty) | Status | + * |---------------+---------------+--------------| + * | 0 | 0 | Invalid GFN | + * | 0 | 1 | Dirty GFN | + * | 1 | X | GFN to reset | + * |---------------+---------------+--------------| + * + * Lifecycle of a dirty GFN goes like: + * + * dirtied harvested reset + * 00 -----------> 01 -------------> 1X -------+ + * ^ | + * | | + * +------------------------------------------+ + * + * The userspace program is only responsible for the 01->1X state + * conversion after harvesting an entry. Also, it must not skip any + * dirty bits, so that dirty bits are always harvested in sequence. + */ +#define KVM_DIRTY_GFN_F_DIRTY _BITUL(0) +#define KVM_DIRTY_GFN_F_RESET _BITUL(1) +#define KVM_DIRTY_GFN_F_MASK 0x3 + +/* + * KVM dirty rings should be mapped at KVM_DIRTY_LOG_PAGE_OFFSET of + * per-vcpu mmaped regions as an array of struct kvm_dirty_gfn. The + * size of the gfn buffer is decided by the first argument when + * enabling KVM_CAP_DIRTY_LOG_RING. + */ +struct kvm_dirty_gfn { + __u32 flags; + __u32 slot; + __u64 offset; +}; + +#define KVM_BUS_LOCK_DETECTION_OFF (1 << 0) +#define KVM_BUS_LOCK_DETECTION_EXIT (1 << 1) + +/** + * struct kvm_stats_header - Header of per vm/vcpu binary statistics data. + * @flags: Some extra information for header, always 0 for now. + * @name_size: The size in bytes of the memory which contains statistics + * name string including trailing '\0'. The memory is allocated + * at the send of statistics descriptor. + * @num_desc: The number of statistics the vm or vcpu has. + * @id_offset: The offset of the vm/vcpu stats' id string in the file pointed + * by vm/vcpu stats fd. + * @desc_offset: The offset of the vm/vcpu stats' descriptor block in the file + * pointd by vm/vcpu stats fd. + * @data_offset: The offset of the vm/vcpu stats' data block in the file + * pointed by vm/vcpu stats fd. + * + * This is the header userspace needs to read from stats fd before any other + * readings. It is used by userspace to discover all the information about the + * vm/vcpu's binary statistics. + * Userspace reads this header from the start of the vm/vcpu's stats fd. + */ +struct kvm_stats_header { + __u32 flags; + __u32 name_size; + __u32 num_desc; + __u32 id_offset; + __u32 desc_offset; + __u32 data_offset; +}; + +#define KVM_STATS_TYPE_SHIFT 0 +#define KVM_STATS_TYPE_MASK (0xF << KVM_STATS_TYPE_SHIFT) +#define KVM_STATS_TYPE_CUMULATIVE (0x0 << KVM_STATS_TYPE_SHIFT) +#define KVM_STATS_TYPE_INSTANT (0x1 << KVM_STATS_TYPE_SHIFT) +#define KVM_STATS_TYPE_PEAK (0x2 << KVM_STATS_TYPE_SHIFT) +#define KVM_STATS_TYPE_MAX KVM_STATS_TYPE_PEAK + +#define KVM_STATS_UNIT_SHIFT 4 +#define KVM_STATS_UNIT_MASK (0xF << KVM_STATS_UNIT_SHIFT) +#define KVM_STATS_UNIT_NONE (0x0 << KVM_STATS_UNIT_SHIFT) +#define KVM_STATS_UNIT_BYTES (0x1 << KVM_STATS_UNIT_SHIFT) +#define KVM_STATS_UNIT_SECONDS (0x2 << KVM_STATS_UNIT_SHIFT) +#define KVM_STATS_UNIT_CYCLES (0x3 << KVM_STATS_UNIT_SHIFT) +#define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_CYCLES + +#define KVM_STATS_BASE_SHIFT 8 +#define KVM_STATS_BASE_MASK (0xF << KVM_STATS_BASE_SHIFT) +#define KVM_STATS_BASE_POW10 (0x0 << KVM_STATS_BASE_SHIFT) +#define KVM_STATS_BASE_POW2 (0x1 << KVM_STATS_BASE_SHIFT) +#define KVM_STATS_BASE_MAX KVM_STATS_BASE_POW2 + +/** + * struct kvm_stats_desc - Descriptor of a KVM statistics. + * @flags: Annotations of the stats, like type, unit, etc. + * @exponent: Used together with @flags to determine the unit. + * @size: The number of data items for this stats. + * Every data item is of type __u64. + * @offset: The offset of the stats to the start of stat structure in + * struture kvm or kvm_vcpu. + * @unused: Unused field for future usage. Always 0 for now. + * @name: The name string for the stats. Its size is indicated by the + * &kvm_stats_header->name_size. + */ +struct kvm_stats_desc { + __u32 flags; + __s16 exponent; + __u16 size; + __u32 offset; + __u32 unused; + char name[]; +}; + +#define KVM_GET_STATS_FD _IO(KVMIO, 0xce) + +#endif /* __LINUX_KVM_H */ diff --git a/linux-headers/linux/mman.h b/linux-headers/linux/mman.h new file mode 100644 index 000000000..434986fbe --- /dev/null +++ b/linux-headers/linux/mman.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _LINUX_MMAN_H +#define _LINUX_MMAN_H + +#include <asm/mman.h> +#include <asm-generic/hugetlb_encode.h> + +#define MREMAP_MAYMOVE 1 +#define MREMAP_FIXED 2 +#define MREMAP_DONTUNMAP 4 + +#define OVERCOMMIT_GUESS 0 +#define OVERCOMMIT_ALWAYS 1 +#define OVERCOMMIT_NEVER 2 + +#define MAP_SHARED 0x01 /* Share changes */ +#define MAP_PRIVATE 0x02 /* Changes are private */ +#define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */ + +/* + * Huge page size encoding when MAP_HUGETLB is specified, and a huge page + * size other than the default is desired. See hugetlb_encode.h. + * All known huge page size encodings are provided here. It is the + * responsibility of the application to know which sizes are supported on + * the running system. See mmap(2) man page for details. + */ +#define MAP_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT +#define MAP_HUGE_MASK HUGETLB_FLAG_ENCODE_MASK + +#define MAP_HUGE_16KB HUGETLB_FLAG_ENCODE_16KB +#define MAP_HUGE_64KB HUGETLB_FLAG_ENCODE_64KB +#define MAP_HUGE_512KB HUGETLB_FLAG_ENCODE_512KB +#define MAP_HUGE_1MB HUGETLB_FLAG_ENCODE_1MB +#define MAP_HUGE_2MB HUGETLB_FLAG_ENCODE_2MB +#define MAP_HUGE_8MB HUGETLB_FLAG_ENCODE_8MB +#define MAP_HUGE_16MB HUGETLB_FLAG_ENCODE_16MB +#define MAP_HUGE_32MB HUGETLB_FLAG_ENCODE_32MB +#define MAP_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB +#define MAP_HUGE_512MB HUGETLB_FLAG_ENCODE_512MB +#define MAP_HUGE_1GB HUGETLB_FLAG_ENCODE_1GB +#define MAP_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB +#define MAP_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB + +#endif /* _LINUX_MMAN_H */ diff --git a/linux-headers/linux/psci.h b/linux-headers/linux/psci.h new file mode 100644 index 000000000..a6772d508 --- /dev/null +++ b/linux-headers/linux/psci.h @@ -0,0 +1,119 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * ARM Power State and Coordination Interface (PSCI) header + * + * This header holds common PSCI defines and macros shared + * by: ARM kernel, ARM64 kernel, KVM ARM/ARM64 and user space. + * + * Copyright (C) 2014 Linaro Ltd. + * Author: Anup Patel <anup.patel@linaro.org> + */ + +#ifndef _LINUX_PSCI_H +#define _LINUX_PSCI_H + +/* + * PSCI v0.1 interface + * + * The PSCI v0.1 function numbers are implementation defined. + * + * Only PSCI return values such as: SUCCESS, NOT_SUPPORTED, + * INVALID_PARAMS, and DENIED defined below are applicable + * to PSCI v0.1. + */ + +/* PSCI v0.2 interface */ +#define PSCI_0_2_FN_BASE 0x84000000 +#define PSCI_0_2_FN(n) (PSCI_0_2_FN_BASE + (n)) +#define PSCI_0_2_64BIT 0x40000000 +#define PSCI_0_2_FN64_BASE \ + (PSCI_0_2_FN_BASE + PSCI_0_2_64BIT) +#define PSCI_0_2_FN64(n) (PSCI_0_2_FN64_BASE + (n)) + +#define PSCI_0_2_FN_PSCI_VERSION PSCI_0_2_FN(0) +#define PSCI_0_2_FN_CPU_SUSPEND PSCI_0_2_FN(1) +#define PSCI_0_2_FN_CPU_OFF PSCI_0_2_FN(2) +#define PSCI_0_2_FN_CPU_ON PSCI_0_2_FN(3) +#define PSCI_0_2_FN_AFFINITY_INFO PSCI_0_2_FN(4) +#define PSCI_0_2_FN_MIGRATE PSCI_0_2_FN(5) +#define PSCI_0_2_FN_MIGRATE_INFO_TYPE PSCI_0_2_FN(6) +#define PSCI_0_2_FN_MIGRATE_INFO_UP_CPU PSCI_0_2_FN(7) +#define PSCI_0_2_FN_SYSTEM_OFF PSCI_0_2_FN(8) +#define PSCI_0_2_FN_SYSTEM_RESET PSCI_0_2_FN(9) + +#define PSCI_0_2_FN64_CPU_SUSPEND PSCI_0_2_FN64(1) +#define PSCI_0_2_FN64_CPU_ON PSCI_0_2_FN64(3) +#define PSCI_0_2_FN64_AFFINITY_INFO PSCI_0_2_FN64(4) +#define PSCI_0_2_FN64_MIGRATE PSCI_0_2_FN64(5) +#define PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU PSCI_0_2_FN64(7) + +#define PSCI_1_0_FN_PSCI_FEATURES PSCI_0_2_FN(10) +#define PSCI_1_0_FN_SYSTEM_SUSPEND PSCI_0_2_FN(14) +#define PSCI_1_0_FN_SET_SUSPEND_MODE PSCI_0_2_FN(15) +#define PSCI_1_1_FN_SYSTEM_RESET2 PSCI_0_2_FN(18) + +#define PSCI_1_0_FN64_SYSTEM_SUSPEND PSCI_0_2_FN64(14) +#define PSCI_1_1_FN64_SYSTEM_RESET2 PSCI_0_2_FN64(18) + +/* PSCI v0.2 power state encoding for CPU_SUSPEND function */ +#define PSCI_0_2_POWER_STATE_ID_MASK 0xffff +#define PSCI_0_2_POWER_STATE_ID_SHIFT 0 +#define PSCI_0_2_POWER_STATE_TYPE_SHIFT 16 +#define PSCI_0_2_POWER_STATE_TYPE_MASK \ + (0x1 << PSCI_0_2_POWER_STATE_TYPE_SHIFT) +#define PSCI_0_2_POWER_STATE_AFFL_SHIFT 24 +#define PSCI_0_2_POWER_STATE_AFFL_MASK \ + (0x3 << PSCI_0_2_POWER_STATE_AFFL_SHIFT) + +/* PSCI extended power state encoding for CPU_SUSPEND function */ +#define PSCI_1_0_EXT_POWER_STATE_ID_MASK 0xfffffff +#define PSCI_1_0_EXT_POWER_STATE_ID_SHIFT 0 +#define PSCI_1_0_EXT_POWER_STATE_TYPE_SHIFT 30 +#define PSCI_1_0_EXT_POWER_STATE_TYPE_MASK \ + (0x1 << PSCI_1_0_EXT_POWER_STATE_TYPE_SHIFT) + +/* PSCI v0.2 affinity level state returned by AFFINITY_INFO */ +#define PSCI_0_2_AFFINITY_LEVEL_ON 0 +#define PSCI_0_2_AFFINITY_LEVEL_OFF 1 +#define PSCI_0_2_AFFINITY_LEVEL_ON_PENDING 2 + +/* PSCI v0.2 multicore support in Trusted OS returned by MIGRATE_INFO_TYPE */ +#define PSCI_0_2_TOS_UP_MIGRATE 0 +#define PSCI_0_2_TOS_UP_NO_MIGRATE 1 +#define PSCI_0_2_TOS_MP 2 + +/* PSCI version decoding (independent of PSCI version) */ +#define PSCI_VERSION_MAJOR_SHIFT 16 +#define PSCI_VERSION_MINOR_MASK \ + ((1U << PSCI_VERSION_MAJOR_SHIFT) - 1) +#define PSCI_VERSION_MAJOR_MASK ~PSCI_VERSION_MINOR_MASK +#define PSCI_VERSION_MAJOR(ver) \ + (((ver) & PSCI_VERSION_MAJOR_MASK) >> PSCI_VERSION_MAJOR_SHIFT) +#define PSCI_VERSION_MINOR(ver) \ + ((ver) & PSCI_VERSION_MINOR_MASK) +#define PSCI_VERSION(maj, min) \ + ((((maj) << PSCI_VERSION_MAJOR_SHIFT) & PSCI_VERSION_MAJOR_MASK) | \ + ((min) & PSCI_VERSION_MINOR_MASK)) + +/* PSCI features decoding (>=1.0) */ +#define PSCI_1_0_FEATURES_CPU_SUSPEND_PF_SHIFT 1 +#define PSCI_1_0_FEATURES_CPU_SUSPEND_PF_MASK \ + (0x1 << PSCI_1_0_FEATURES_CPU_SUSPEND_PF_SHIFT) + +#define PSCI_1_0_OS_INITIATED BIT(0) +#define PSCI_1_0_SUSPEND_MODE_PC 0 +#define PSCI_1_0_SUSPEND_MODE_OSI 1 + +/* PSCI return values (inclusive of all PSCI versions) */ +#define PSCI_RET_SUCCESS 0 +#define PSCI_RET_NOT_SUPPORTED -1 +#define PSCI_RET_INVALID_PARAMS -2 +#define PSCI_RET_DENIED -3 +#define PSCI_RET_ALREADY_ON -4 +#define PSCI_RET_ON_PENDING -5 +#define PSCI_RET_INTERNAL_FAILURE -6 +#define PSCI_RET_NOT_PRESENT -7 +#define PSCI_RET_DISABLED -8 +#define PSCI_RET_INVALID_ADDRESS -9 + +#endif /* _LINUX_PSCI_H */ diff --git a/linux-headers/linux/psp-sev.h b/linux-headers/linux/psp-sev.h new file mode 100644 index 000000000..51d8b3940 --- /dev/null +++ b/linux-headers/linux/psp-sev.h @@ -0,0 +1,166 @@ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ +/* + * Userspace interface for AMD Secure Encrypted Virtualization (SEV) + * platform management commands. + * + * Copyright (C) 2016-2017 Advanced Micro Devices, Inc. + * + * Author: Brijesh Singh <brijesh.singh@amd.com> + * + * SEV API specification is available at: https://developer.amd.com/sev/ + */ + +#ifndef __PSP_SEV_USER_H__ +#define __PSP_SEV_USER_H__ + +#include <linux/types.h> + +/** + * SEV platform commands + */ +enum { + SEV_FACTORY_RESET = 0, + SEV_PLATFORM_STATUS, + SEV_PEK_GEN, + SEV_PEK_CSR, + SEV_PDH_GEN, + SEV_PDH_CERT_EXPORT, + SEV_PEK_CERT_IMPORT, + SEV_GET_ID, /* This command is deprecated, use SEV_GET_ID2 */ + SEV_GET_ID2, + + SEV_MAX, +}; + +/** + * SEV Firmware status code + */ +typedef enum { + SEV_RET_SUCCESS = 0, + SEV_RET_INVALID_PLATFORM_STATE, + SEV_RET_INVALID_GUEST_STATE, + SEV_RET_INAVLID_CONFIG, + SEV_RET_INVALID_LEN, + SEV_RET_ALREADY_OWNED, + SEV_RET_INVALID_CERTIFICATE, + SEV_RET_POLICY_FAILURE, + SEV_RET_INACTIVE, + SEV_RET_INVALID_ADDRESS, + SEV_RET_BAD_SIGNATURE, + SEV_RET_BAD_MEASUREMENT, + SEV_RET_ASID_OWNED, + SEV_RET_INVALID_ASID, + SEV_RET_WBINVD_REQUIRED, + SEV_RET_DFFLUSH_REQUIRED, + SEV_RET_INVALID_GUEST, + SEV_RET_INVALID_COMMAND, + SEV_RET_ACTIVE, + SEV_RET_HWSEV_RET_PLATFORM, + SEV_RET_HWSEV_RET_UNSAFE, + SEV_RET_UNSUPPORTED, + SEV_RET_INVALID_PARAM, + SEV_RET_RESOURCE_LIMIT, + SEV_RET_SECURE_DATA_INVALID, + SEV_RET_MAX, +} sev_ret_code; + +/** + * struct sev_user_data_status - PLATFORM_STATUS command parameters + * + * @major: major API version + * @minor: minor API version + * @state: platform state + * @flags: platform config flags + * @build: firmware build id for API version + * @guest_count: number of active guests + */ +struct sev_user_data_status { + __u8 api_major; /* Out */ + __u8 api_minor; /* Out */ + __u8 state; /* Out */ + __u32 flags; /* Out */ + __u8 build; /* Out */ + __u32 guest_count; /* Out */ +} __attribute__((packed)); + +#define SEV_STATUS_FLAGS_CONFIG_ES 0x0100 + +/** + * struct sev_user_data_pek_csr - PEK_CSR command parameters + * + * @address: PEK certificate chain + * @length: length of certificate + */ +struct sev_user_data_pek_csr { + __u64 address; /* In */ + __u32 length; /* In/Out */ +} __attribute__((packed)); + +/** + * struct sev_user_data_cert_import - PEK_CERT_IMPORT command parameters + * + * @pek_address: PEK certificate chain + * @pek_len: length of PEK certificate + * @oca_address: OCA certificate chain + * @oca_len: length of OCA certificate + */ +struct sev_user_data_pek_cert_import { + __u64 pek_cert_address; /* In */ + __u32 pek_cert_len; /* In */ + __u64 oca_cert_address; /* In */ + __u32 oca_cert_len; /* In */ +} __attribute__((packed)); + +/** + * struct sev_user_data_pdh_cert_export - PDH_CERT_EXPORT command parameters + * + * @pdh_address: PDH certificate address + * @pdh_len: length of PDH certificate + * @cert_chain_address: PDH certificate chain + * @cert_chain_len: length of PDH certificate chain + */ +struct sev_user_data_pdh_cert_export { + __u64 pdh_cert_address; /* In */ + __u32 pdh_cert_len; /* In/Out */ + __u64 cert_chain_address; /* In */ + __u32 cert_chain_len; /* In/Out */ +} __attribute__((packed)); + +/** + * struct sev_user_data_get_id - GET_ID command parameters (deprecated) + * + * @socket1: Buffer to pass unique ID of first socket + * @socket2: Buffer to pass unique ID of second socket + */ +struct sev_user_data_get_id { + __u8 socket1[64]; /* Out */ + __u8 socket2[64]; /* Out */ +} __attribute__((packed)); + +/** + * struct sev_user_data_get_id2 - GET_ID command parameters + * @address: Buffer to store unique ID + * @length: length of the unique ID + */ +struct sev_user_data_get_id2 { + __u64 address; /* In */ + __u32 length; /* In/Out */ +} __attribute__((packed)); + +/** + * struct sev_issue_cmd - SEV ioctl parameters + * + * @cmd: SEV commands to execute + * @opaque: pointer to the command structure + * @error: SEV FW return code on failure + */ +struct sev_issue_cmd { + __u32 cmd; /* In */ + __u64 data; /* In */ + __u32 error; /* Out */ +} __attribute__((packed)); + +#define SEV_IOC_TYPE 'S' +#define SEV_ISSUE_CMD _IOWR(SEV_IOC_TYPE, 0x0, struct sev_issue_cmd) + +#endif /* __PSP_USER_SEV_H */ diff --git a/linux-headers/linux/userfaultfd.h b/linux-headers/linux/userfaultfd.h new file mode 100644 index 000000000..8479af5f4 --- /dev/null +++ b/linux-headers/linux/userfaultfd.h @@ -0,0 +1,302 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * include/linux/userfaultfd.h + * + * Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org> + * Copyright (C) 2015 Red Hat, Inc. + * + */ + +#ifndef _LINUX_USERFAULTFD_H +#define _LINUX_USERFAULTFD_H + +#include <linux/types.h> + +/* + * If the UFFDIO_API is upgraded someday, the UFFDIO_UNREGISTER and + * UFFDIO_WAKE ioctls should be defined as _IOW and not as _IOR. In + * userfaultfd.h we assumed the kernel was reading (instead _IOC_READ + * means the userland is reading). + */ +#define UFFD_API ((__u64)0xAA) +#define UFFD_API_REGISTER_MODES (UFFDIO_REGISTER_MODE_MISSING | \ + UFFDIO_REGISTER_MODE_WP | \ + UFFDIO_REGISTER_MODE_MINOR) +#define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP | \ + UFFD_FEATURE_EVENT_FORK | \ + UFFD_FEATURE_EVENT_REMAP | \ + UFFD_FEATURE_EVENT_REMOVE | \ + UFFD_FEATURE_EVENT_UNMAP | \ + UFFD_FEATURE_MISSING_HUGETLBFS | \ + UFFD_FEATURE_MISSING_SHMEM | \ + UFFD_FEATURE_SIGBUS | \ + UFFD_FEATURE_THREAD_ID | \ + UFFD_FEATURE_MINOR_HUGETLBFS | \ + UFFD_FEATURE_MINOR_SHMEM) +#define UFFD_API_IOCTLS \ + ((__u64)1 << _UFFDIO_REGISTER | \ + (__u64)1 << _UFFDIO_UNREGISTER | \ + (__u64)1 << _UFFDIO_API) +#define UFFD_API_RANGE_IOCTLS \ + ((__u64)1 << _UFFDIO_WAKE | \ + (__u64)1 << _UFFDIO_COPY | \ + (__u64)1 << _UFFDIO_ZEROPAGE | \ + (__u64)1 << _UFFDIO_WRITEPROTECT | \ + (__u64)1 << _UFFDIO_CONTINUE) +#define UFFD_API_RANGE_IOCTLS_BASIC \ + ((__u64)1 << _UFFDIO_WAKE | \ + (__u64)1 << _UFFDIO_COPY | \ + (__u64)1 << _UFFDIO_CONTINUE) + +/* + * Valid ioctl command number range with this API is from 0x00 to + * 0x3F. UFFDIO_API is the fixed number, everything else can be + * changed by implementing a different UFFD_API. If sticking to the + * same UFFD_API more ioctl can be added and userland will be aware of + * which ioctl the running kernel implements through the ioctl command + * bitmask written by the UFFDIO_API. + */ +#define _UFFDIO_REGISTER (0x00) +#define _UFFDIO_UNREGISTER (0x01) +#define _UFFDIO_WAKE (0x02) +#define _UFFDIO_COPY (0x03) +#define _UFFDIO_ZEROPAGE (0x04) +#define _UFFDIO_WRITEPROTECT (0x06) +#define _UFFDIO_CONTINUE (0x07) +#define _UFFDIO_API (0x3F) + +/* userfaultfd ioctl ids */ +#define UFFDIO 0xAA +#define UFFDIO_API _IOWR(UFFDIO, _UFFDIO_API, \ + struct uffdio_api) +#define UFFDIO_REGISTER _IOWR(UFFDIO, _UFFDIO_REGISTER, \ + struct uffdio_register) +#define UFFDIO_UNREGISTER _IOR(UFFDIO, _UFFDIO_UNREGISTER, \ + struct uffdio_range) +#define UFFDIO_WAKE _IOR(UFFDIO, _UFFDIO_WAKE, \ + struct uffdio_range) +#define UFFDIO_COPY _IOWR(UFFDIO, _UFFDIO_COPY, \ + struct uffdio_copy) +#define UFFDIO_ZEROPAGE _IOWR(UFFDIO, _UFFDIO_ZEROPAGE, \ + struct uffdio_zeropage) +#define UFFDIO_WRITEPROTECT _IOWR(UFFDIO, _UFFDIO_WRITEPROTECT, \ + struct uffdio_writeprotect) +#define UFFDIO_CONTINUE _IOWR(UFFDIO, _UFFDIO_CONTINUE, \ + struct uffdio_continue) + +/* read() structure */ +struct uffd_msg { + __u8 event; + + __u8 reserved1; + __u16 reserved2; + __u32 reserved3; + + union { + struct { + __u64 flags; + __u64 address; + union { + __u32 ptid; + } feat; + } pagefault; + + struct { + __u32 ufd; + } fork; + + struct { + __u64 from; + __u64 to; + __u64 len; + } remap; + + struct { + __u64 start; + __u64 end; + } remove; + + struct { + /* unused reserved fields */ + __u64 reserved1; + __u64 reserved2; + __u64 reserved3; + } reserved; + } arg; +} __attribute__((packed)); + +/* + * Start at 0x12 and not at 0 to be more strict against bugs. + */ +#define UFFD_EVENT_PAGEFAULT 0x12 +#define UFFD_EVENT_FORK 0x13 +#define UFFD_EVENT_REMAP 0x14 +#define UFFD_EVENT_REMOVE 0x15 +#define UFFD_EVENT_UNMAP 0x16 + +/* flags for UFFD_EVENT_PAGEFAULT */ +#define UFFD_PAGEFAULT_FLAG_WRITE (1<<0) /* If this was a write fault */ +#define UFFD_PAGEFAULT_FLAG_WP (1<<1) /* If reason is VM_UFFD_WP */ +#define UFFD_PAGEFAULT_FLAG_MINOR (1<<2) /* If reason is VM_UFFD_MINOR */ + +struct uffdio_api { + /* userland asks for an API number and the features to enable */ + __u64 api; + /* + * Kernel answers below with the all available features for + * the API, this notifies userland of which events and/or + * which flags for each event are enabled in the current + * kernel. + * + * Note: UFFD_EVENT_PAGEFAULT and UFFD_PAGEFAULT_FLAG_WRITE + * are to be considered implicitly always enabled in all kernels as + * long as the uffdio_api.api requested matches UFFD_API. + * + * UFFD_FEATURE_MISSING_HUGETLBFS means an UFFDIO_REGISTER + * with UFFDIO_REGISTER_MODE_MISSING mode will succeed on + * hugetlbfs virtual memory ranges. Adding or not adding + * UFFD_FEATURE_MISSING_HUGETLBFS to uffdio_api.features has + * no real functional effect after UFFDIO_API returns, but + * it's only useful for an initial feature set probe at + * UFFDIO_API time. There are two ways to use it: + * + * 1) by adding UFFD_FEATURE_MISSING_HUGETLBFS to the + * uffdio_api.features before calling UFFDIO_API, an error + * will be returned by UFFDIO_API on a kernel without + * hugetlbfs missing support + * + * 2) the UFFD_FEATURE_MISSING_HUGETLBFS can not be added in + * uffdio_api.features and instead it will be set by the + * kernel in the uffdio_api.features if the kernel supports + * it, so userland can later check if the feature flag is + * present in uffdio_api.features after UFFDIO_API + * succeeded. + * + * UFFD_FEATURE_MISSING_SHMEM works the same as + * UFFD_FEATURE_MISSING_HUGETLBFS, but it applies to shmem + * (i.e. tmpfs and other shmem based APIs). + * + * UFFD_FEATURE_SIGBUS feature means no page-fault + * (UFFD_EVENT_PAGEFAULT) event will be delivered, instead + * a SIGBUS signal will be sent to the faulting process. + * + * UFFD_FEATURE_THREAD_ID pid of the page faulted task_struct will + * be returned, if feature is not requested 0 will be returned. + * + * UFFD_FEATURE_MINOR_HUGETLBFS indicates that minor faults + * can be intercepted (via REGISTER_MODE_MINOR) for + * hugetlbfs-backed pages. + * + * UFFD_FEATURE_MINOR_SHMEM indicates the same support as + * UFFD_FEATURE_MINOR_HUGETLBFS, but for shmem-backed pages instead. + */ +#define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0) +#define UFFD_FEATURE_EVENT_FORK (1<<1) +#define UFFD_FEATURE_EVENT_REMAP (1<<2) +#define UFFD_FEATURE_EVENT_REMOVE (1<<3) +#define UFFD_FEATURE_MISSING_HUGETLBFS (1<<4) +#define UFFD_FEATURE_MISSING_SHMEM (1<<5) +#define UFFD_FEATURE_EVENT_UNMAP (1<<6) +#define UFFD_FEATURE_SIGBUS (1<<7) +#define UFFD_FEATURE_THREAD_ID (1<<8) +#define UFFD_FEATURE_MINOR_HUGETLBFS (1<<9) +#define UFFD_FEATURE_MINOR_SHMEM (1<<10) + __u64 features; + + __u64 ioctls; +}; + +struct uffdio_range { + __u64 start; + __u64 len; +}; + +struct uffdio_register { + struct uffdio_range range; +#define UFFDIO_REGISTER_MODE_MISSING ((__u64)1<<0) +#define UFFDIO_REGISTER_MODE_WP ((__u64)1<<1) +#define UFFDIO_REGISTER_MODE_MINOR ((__u64)1<<2) + __u64 mode; + + /* + * kernel answers which ioctl commands are available for the + * range, keep at the end as the last 8 bytes aren't read. + */ + __u64 ioctls; +}; + +struct uffdio_copy { + __u64 dst; + __u64 src; + __u64 len; +#define UFFDIO_COPY_MODE_DONTWAKE ((__u64)1<<0) + /* + * UFFDIO_COPY_MODE_WP will map the page write protected on + * the fly. UFFDIO_COPY_MODE_WP is available only if the + * write protected ioctl is implemented for the range + * according to the uffdio_register.ioctls. + */ +#define UFFDIO_COPY_MODE_WP ((__u64)1<<1) + __u64 mode; + + /* + * "copy" is written by the ioctl and must be at the end: the + * copy_from_user will not read the last 8 bytes. + */ + __s64 copy; +}; + +struct uffdio_zeropage { + struct uffdio_range range; +#define UFFDIO_ZEROPAGE_MODE_DONTWAKE ((__u64)1<<0) + __u64 mode; + + /* + * "zeropage" is written by the ioctl and must be at the end: + * the copy_from_user will not read the last 8 bytes. + */ + __s64 zeropage; +}; + +struct uffdio_writeprotect { + struct uffdio_range range; +/* + * UFFDIO_WRITEPROTECT_MODE_WP: set the flag to write protect a range, + * unset the flag to undo protection of a range which was previously + * write protected. + * + * UFFDIO_WRITEPROTECT_MODE_DONTWAKE: set the flag to avoid waking up + * any wait thread after the operation succeeds. + * + * NOTE: Write protecting a region (WP=1) is unrelated to page faults, + * therefore DONTWAKE flag is meaningless with WP=1. Removing write + * protection (WP=0) in response to a page fault wakes the faulting + * task unless DONTWAKE is set. + */ +#define UFFDIO_WRITEPROTECT_MODE_WP ((__u64)1<<0) +#define UFFDIO_WRITEPROTECT_MODE_DONTWAKE ((__u64)1<<1) + __u64 mode; +}; + +struct uffdio_continue { + struct uffdio_range range; +#define UFFDIO_CONTINUE_MODE_DONTWAKE ((__u64)1<<0) + __u64 mode; + + /* + * Fields below here are written by the ioctl and must be at the end: + * the copy_from_user will not read past here. + */ + __s64 mapped; +}; + +/* + * Flags for the userfaultfd(2) system call itself. + */ + +/* + * Create a userfaultfd that can handle page faults only in user mode. + */ +#define UFFD_USER_MODE_ONLY 1 + +#endif /* _LINUX_USERFAULTFD_H */ diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h new file mode 100644 index 000000000..e680594f2 --- /dev/null +++ b/linux-headers/linux/vfio.h @@ -0,0 +1,1370 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * VFIO API definition + * + * Copyright (C) 2012 Red Hat, Inc. All rights reserved. + * Author: Alex Williamson <alex.williamson@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef VFIO_H +#define VFIO_H + +#include <linux/types.h> +#include <linux/ioctl.h> + +#define VFIO_API_VERSION 0 + + +/* Kernel & User level defines for VFIO IOCTLs. */ + +/* Extensions */ + +#define VFIO_TYPE1_IOMMU 1 +#define VFIO_SPAPR_TCE_IOMMU 2 +#define VFIO_TYPE1v2_IOMMU 3 +/* + * IOMMU enforces DMA cache coherence (ex. PCIe NoSnoop stripping). This + * capability is subject to change as groups are added or removed. + */ +#define VFIO_DMA_CC_IOMMU 4 + +/* Check if EEH is supported */ +#define VFIO_EEH 5 + +/* Two-stage IOMMU */ +#define VFIO_TYPE1_NESTING_IOMMU 6 /* Implies v2 */ + +#define VFIO_SPAPR_TCE_v2_IOMMU 7 + +/* + * The No-IOMMU IOMMU offers no translation or isolation for devices and + * supports no ioctls outside of VFIO_CHECK_EXTENSION. Use of VFIO's No-IOMMU + * code will taint the host kernel and should be used with extreme caution. + */ +#define VFIO_NOIOMMU_IOMMU 8 + +/* Supports VFIO_DMA_UNMAP_FLAG_ALL */ +#define VFIO_UNMAP_ALL 9 + +/* Supports the vaddr flag for DMA map and unmap */ +#define VFIO_UPDATE_VADDR 10 + +/* + * The IOCTL interface is designed for extensibility by embedding the + * structure length (argsz) and flags into structures passed between + * kernel and userspace. We therefore use the _IO() macro for these + * defines to avoid implicitly embedding a size into the ioctl request. + * As structure fields are added, argsz will increase to match and flag + * bits will be defined to indicate additional fields with valid data. + * It's *always* the caller's responsibility to indicate the size of + * the structure passed by setting argsz appropriately. + */ + +#define VFIO_TYPE (';') +#define VFIO_BASE 100 + +/* + * For extension of INFO ioctls, VFIO makes use of a capability chain + * designed after PCI/e capabilities. A flag bit indicates whether + * this capability chain is supported and a field defined in the fixed + * structure defines the offset of the first capability in the chain. + * This field is only valid when the corresponding bit in the flags + * bitmap is set. This offset field is relative to the start of the + * INFO buffer, as is the next field within each capability header. + * The id within the header is a shared address space per INFO ioctl, + * while the version field is specific to the capability id. The + * contents following the header are specific to the capability id. + */ +struct vfio_info_cap_header { + __u16 id; /* Identifies capability */ + __u16 version; /* Version specific to the capability ID */ + __u32 next; /* Offset of next capability */ +}; + +/* + * Callers of INFO ioctls passing insufficiently sized buffers will see + * the capability chain flag bit set, a zero value for the first capability + * offset (if available within the provided argsz), and argsz will be + * updated to report the necessary buffer size. For compatibility, the + * INFO ioctl will not report error in this case, but the capability chain + * will not be available. + */ + +/* -------- IOCTLs for VFIO file descriptor (/dev/vfio/vfio) -------- */ + +/** + * VFIO_GET_API_VERSION - _IO(VFIO_TYPE, VFIO_BASE + 0) + * + * Report the version of the VFIO API. This allows us to bump the entire + * API version should we later need to add or change features in incompatible + * ways. + * Return: VFIO_API_VERSION + * Availability: Always + */ +#define VFIO_GET_API_VERSION _IO(VFIO_TYPE, VFIO_BASE + 0) + +/** + * VFIO_CHECK_EXTENSION - _IOW(VFIO_TYPE, VFIO_BASE + 1, __u32) + * + * Check whether an extension is supported. + * Return: 0 if not supported, 1 (or some other positive integer) if supported. + * Availability: Always + */ +#define VFIO_CHECK_EXTENSION _IO(VFIO_TYPE, VFIO_BASE + 1) + +/** + * VFIO_SET_IOMMU - _IOW(VFIO_TYPE, VFIO_BASE + 2, __s32) + * + * Set the iommu to the given type. The type must be supported by an + * iommu driver as verified by calling CHECK_EXTENSION using the same + * type. A group must be set to this file descriptor before this + * ioctl is available. The IOMMU interfaces enabled by this call are + * specific to the value set. + * Return: 0 on success, -errno on failure + * Availability: When VFIO group attached + */ +#define VFIO_SET_IOMMU _IO(VFIO_TYPE, VFIO_BASE + 2) + +/* -------- IOCTLs for GROUP file descriptors (/dev/vfio/$GROUP) -------- */ + +/** + * VFIO_GROUP_GET_STATUS - _IOR(VFIO_TYPE, VFIO_BASE + 3, + * struct vfio_group_status) + * + * Retrieve information about the group. Fills in provided + * struct vfio_group_info. Caller sets argsz. + * Return: 0 on succes, -errno on failure. + * Availability: Always + */ +struct vfio_group_status { + __u32 argsz; + __u32 flags; +#define VFIO_GROUP_FLAGS_VIABLE (1 << 0) +#define VFIO_GROUP_FLAGS_CONTAINER_SET (1 << 1) +}; +#define VFIO_GROUP_GET_STATUS _IO(VFIO_TYPE, VFIO_BASE + 3) + +/** + * VFIO_GROUP_SET_CONTAINER - _IOW(VFIO_TYPE, VFIO_BASE + 4, __s32) + * + * Set the container for the VFIO group to the open VFIO file + * descriptor provided. Groups may only belong to a single + * container. Containers may, at their discretion, support multiple + * groups. Only when a container is set are all of the interfaces + * of the VFIO file descriptor and the VFIO group file descriptor + * available to the user. + * Return: 0 on success, -errno on failure. + * Availability: Always + */ +#define VFIO_GROUP_SET_CONTAINER _IO(VFIO_TYPE, VFIO_BASE + 4) + +/** + * VFIO_GROUP_UNSET_CONTAINER - _IO(VFIO_TYPE, VFIO_BASE + 5) + * + * Remove the group from the attached container. This is the + * opposite of the SET_CONTAINER call and returns the group to + * an initial state. All device file descriptors must be released + * prior to calling this interface. When removing the last group + * from a container, the IOMMU will be disabled and all state lost, + * effectively also returning the VFIO file descriptor to an initial + * state. + * Return: 0 on success, -errno on failure. + * Availability: When attached to container + */ +#define VFIO_GROUP_UNSET_CONTAINER _IO(VFIO_TYPE, VFIO_BASE + 5) + +/** + * VFIO_GROUP_GET_DEVICE_FD - _IOW(VFIO_TYPE, VFIO_BASE + 6, char) + * + * Return a new file descriptor for the device object described by + * the provided string. The string should match a device listed in + * the devices subdirectory of the IOMMU group sysfs entry. The + * group containing the device must already be added to this context. + * Return: new file descriptor on success, -errno on failure. + * Availability: When attached to container + */ +#define VFIO_GROUP_GET_DEVICE_FD _IO(VFIO_TYPE, VFIO_BASE + 6) + +/* --------------- IOCTLs for DEVICE file descriptors --------------- */ + +/** + * VFIO_DEVICE_GET_INFO - _IOR(VFIO_TYPE, VFIO_BASE + 7, + * struct vfio_device_info) + * + * Retrieve information about the device. Fills in provided + * struct vfio_device_info. Caller sets argsz. + * Return: 0 on success, -errno on failure. + */ +struct vfio_device_info { + __u32 argsz; + __u32 flags; +#define VFIO_DEVICE_FLAGS_RESET (1 << 0) /* Device supports reset */ +#define VFIO_DEVICE_FLAGS_PCI (1 << 1) /* vfio-pci device */ +#define VFIO_DEVICE_FLAGS_PLATFORM (1 << 2) /* vfio-platform device */ +#define VFIO_DEVICE_FLAGS_AMBA (1 << 3) /* vfio-amba device */ +#define VFIO_DEVICE_FLAGS_CCW (1 << 4) /* vfio-ccw device */ +#define VFIO_DEVICE_FLAGS_AP (1 << 5) /* vfio-ap device */ +#define VFIO_DEVICE_FLAGS_FSL_MC (1 << 6) /* vfio-fsl-mc device */ +#define VFIO_DEVICE_FLAGS_CAPS (1 << 7) /* Info supports caps */ + __u32 num_regions; /* Max region index + 1 */ + __u32 num_irqs; /* Max IRQ index + 1 */ + __u32 cap_offset; /* Offset within info struct of first cap */ +}; +#define VFIO_DEVICE_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 7) + +/* + * Vendor driver using Mediated device framework should provide device_api + * attribute in supported type attribute groups. Device API string should be one + * of the following corresponding to device flags in vfio_device_info structure. + */ + +#define VFIO_DEVICE_API_PCI_STRING "vfio-pci" +#define VFIO_DEVICE_API_PLATFORM_STRING "vfio-platform" +#define VFIO_DEVICE_API_AMBA_STRING "vfio-amba" +#define VFIO_DEVICE_API_CCW_STRING "vfio-ccw" +#define VFIO_DEVICE_API_AP_STRING "vfio-ap" + +/* + * The following capabilities are unique to s390 zPCI devices. Their contents + * are further-defined in vfio_zdev.h + */ +#define VFIO_DEVICE_INFO_CAP_ZPCI_BASE 1 +#define VFIO_DEVICE_INFO_CAP_ZPCI_GROUP 2 +#define VFIO_DEVICE_INFO_CAP_ZPCI_UTIL 3 +#define VFIO_DEVICE_INFO_CAP_ZPCI_PFIP 4 + +/** + * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8, + * struct vfio_region_info) + * + * Retrieve information about a device region. Caller provides + * struct vfio_region_info with index value set. Caller sets argsz. + * Implementation of region mapping is bus driver specific. This is + * intended to describe MMIO, I/O port, as well as bus specific + * regions (ex. PCI config space). Zero sized regions may be used + * to describe unimplemented regions (ex. unimplemented PCI BARs). + * Return: 0 on success, -errno on failure. + */ +struct vfio_region_info { + __u32 argsz; + __u32 flags; +#define VFIO_REGION_INFO_FLAG_READ (1 << 0) /* Region supports read */ +#define VFIO_REGION_INFO_FLAG_WRITE (1 << 1) /* Region supports write */ +#define VFIO_REGION_INFO_FLAG_MMAP (1 << 2) /* Region supports mmap */ +#define VFIO_REGION_INFO_FLAG_CAPS (1 << 3) /* Info supports caps */ + __u32 index; /* Region index */ + __u32 cap_offset; /* Offset within info struct of first cap */ + __u64 size; /* Region size (bytes) */ + __u64 offset; /* Region offset from start of device fd */ +}; +#define VFIO_DEVICE_GET_REGION_INFO _IO(VFIO_TYPE, VFIO_BASE + 8) + +/* + * The sparse mmap capability allows finer granularity of specifying areas + * within a region with mmap support. When specified, the user should only + * mmap the offset ranges specified by the areas array. mmaps outside of the + * areas specified may fail (such as the range covering a PCI MSI-X table) or + * may result in improper device behavior. + * + * The structures below define version 1 of this capability. + */ +#define VFIO_REGION_INFO_CAP_SPARSE_MMAP 1 + +struct vfio_region_sparse_mmap_area { + __u64 offset; /* Offset of mmap'able area within region */ + __u64 size; /* Size of mmap'able area */ +}; + +struct vfio_region_info_cap_sparse_mmap { + struct vfio_info_cap_header header; + __u32 nr_areas; + __u32 reserved; + struct vfio_region_sparse_mmap_area areas[]; +}; + +/* + * The device specific type capability allows regions unique to a specific + * device or class of devices to be exposed. This helps solve the problem for + * vfio bus drivers of defining which region indexes correspond to which region + * on the device, without needing to resort to static indexes, as done by + * vfio-pci. For instance, if we were to go back in time, we might remove + * VFIO_PCI_VGA_REGION_INDEX and let vfio-pci simply define that all indexes + * greater than or equal to VFIO_PCI_NUM_REGIONS are device specific and we'd + * make a "VGA" device specific type to describe the VGA access space. This + * means that non-VGA devices wouldn't need to waste this index, and thus the + * address space associated with it due to implementation of device file + * descriptor offsets in vfio-pci. + * + * The current implementation is now part of the user ABI, so we can't use this + * for VGA, but there are other upcoming use cases, such as opregions for Intel + * IGD devices and framebuffers for vGPU devices. We missed VGA, but we'll + * use this for future additions. + * + * The structure below defines version 1 of this capability. + */ +#define VFIO_REGION_INFO_CAP_TYPE 2 + +struct vfio_region_info_cap_type { + struct vfio_info_cap_header header; + __u32 type; /* global per bus driver */ + __u32 subtype; /* type specific */ +}; + +/* + * List of region types, global per bus driver. + * If you introduce a new type, please add it here. + */ + +/* PCI region type containing a PCI vendor part */ +#define VFIO_REGION_TYPE_PCI_VENDOR_TYPE (1 << 31) +#define VFIO_REGION_TYPE_PCI_VENDOR_MASK (0xffff) +#define VFIO_REGION_TYPE_GFX (1) +#define VFIO_REGION_TYPE_CCW (2) +#define VFIO_REGION_TYPE_MIGRATION (3) + +/* sub-types for VFIO_REGION_TYPE_PCI_* */ + +/* 8086 vendor PCI sub-types */ +#define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION (1) +#define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG (2) +#define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG (3) + +/* 10de vendor PCI sub-types */ +/* + * NVIDIA GPU NVlink2 RAM is coherent RAM mapped onto the host address space. + * + * Deprecated, region no longer provided + */ +#define VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM (1) + +/* 1014 vendor PCI sub-types */ +/* + * IBM NPU NVlink2 ATSD (Address Translation Shootdown) register of NPU + * to do TLB invalidation on a GPU. + * + * Deprecated, region no longer provided + */ +#define VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD (1) + +/* sub-types for VFIO_REGION_TYPE_GFX */ +#define VFIO_REGION_SUBTYPE_GFX_EDID (1) + +/** + * struct vfio_region_gfx_edid - EDID region layout. + * + * Set display link state and EDID blob. + * + * The EDID blob has monitor information such as brand, name, serial + * number, physical size, supported video modes and more. + * + * This special region allows userspace (typically qemu) set a virtual + * EDID for the virtual monitor, which allows a flexible display + * configuration. + * + * For the edid blob spec look here: + * https://en.wikipedia.org/wiki/Extended_Display_Identification_Data + * + * On linux systems you can find the EDID blob in sysfs: + * /sys/class/drm/${card}/${connector}/edid + * + * You can use the edid-decode ulility (comes with xorg-x11-utils) to + * decode the EDID blob. + * + * @edid_offset: location of the edid blob, relative to the + * start of the region (readonly). + * @edid_max_size: max size of the edid blob (readonly). + * @edid_size: actual edid size (read/write). + * @link_state: display link state (read/write). + * VFIO_DEVICE_GFX_LINK_STATE_UP: Monitor is turned on. + * VFIO_DEVICE_GFX_LINK_STATE_DOWN: Monitor is turned off. + * @max_xres: max display width (0 == no limitation, readonly). + * @max_yres: max display height (0 == no limitation, readonly). + * + * EDID update protocol: + * (1) set link-state to down. + * (2) update edid blob and size. + * (3) set link-state to up. + */ +struct vfio_region_gfx_edid { + __u32 edid_offset; + __u32 edid_max_size; + __u32 edid_size; + __u32 max_xres; + __u32 max_yres; + __u32 link_state; +#define VFIO_DEVICE_GFX_LINK_STATE_UP 1 +#define VFIO_DEVICE_GFX_LINK_STATE_DOWN 2 +}; + +/* sub-types for VFIO_REGION_TYPE_CCW */ +#define VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD (1) +#define VFIO_REGION_SUBTYPE_CCW_SCHIB (2) +#define VFIO_REGION_SUBTYPE_CCW_CRW (3) + +/* sub-types for VFIO_REGION_TYPE_MIGRATION */ +#define VFIO_REGION_SUBTYPE_MIGRATION (1) + +/* + * The structure vfio_device_migration_info is placed at the 0th offset of + * the VFIO_REGION_SUBTYPE_MIGRATION region to get and set VFIO device related + * migration information. Field accesses from this structure are only supported + * at their native width and alignment. Otherwise, the result is undefined and + * vendor drivers should return an error. + * + * device_state: (read/write) + * - The user application writes to this field to inform the vendor driver + * about the device state to be transitioned to. + * - The vendor driver should take the necessary actions to change the + * device state. After successful transition to a given state, the + * vendor driver should return success on write(device_state, state) + * system call. If the device state transition fails, the vendor driver + * should return an appropriate -errno for the fault condition. + * - On the user application side, if the device state transition fails, + * that is, if write(device_state, state) returns an error, read + * device_state again to determine the current state of the device from + * the vendor driver. + * - The vendor driver should return previous state of the device unless + * the vendor driver has encountered an internal error, in which case + * the vendor driver may report the device_state VFIO_DEVICE_STATE_ERROR. + * - The user application must use the device reset ioctl to recover the + * device from VFIO_DEVICE_STATE_ERROR state. If the device is + * indicated to be in a valid device state by reading device_state, the + * user application may attempt to transition the device to any valid + * state reachable from the current state or terminate itself. + * + * device_state consists of 3 bits: + * - If bit 0 is set, it indicates the _RUNNING state. If bit 0 is clear, + * it indicates the _STOP state. When the device state is changed to + * _STOP, driver should stop the device before write() returns. + * - If bit 1 is set, it indicates the _SAVING state, which means that the + * driver should start gathering device state information that will be + * provided to the VFIO user application to save the device's state. + * - If bit 2 is set, it indicates the _RESUMING state, which means that + * the driver should prepare to resume the device. Data provided through + * the migration region should be used to resume the device. + * Bits 3 - 31 are reserved for future use. To preserve them, the user + * application should perform a read-modify-write operation on this + * field when modifying the specified bits. + * + * +------- _RESUMING + * |+------ _SAVING + * ||+----- _RUNNING + * ||| + * 000b => Device Stopped, not saving or resuming + * 001b => Device running, which is the default state + * 010b => Stop the device & save the device state, stop-and-copy state + * 011b => Device running and save the device state, pre-copy state + * 100b => Device stopped and the device state is resuming + * 101b => Invalid state + * 110b => Error state + * 111b => Invalid state + * + * State transitions: + * + * _RESUMING _RUNNING Pre-copy Stop-and-copy _STOP + * (100b) (001b) (011b) (010b) (000b) + * 0. Running or default state + * | + * + * 1. Normal Shutdown (optional) + * |------------------------------------->| + * + * 2. Save the state or suspend + * |------------------------->|---------->| + * + * 3. Save the state during live migration + * |----------->|------------>|---------->| + * + * 4. Resuming + * |<---------| + * + * 5. Resumed + * |--------->| + * + * 0. Default state of VFIO device is _RUNNING when the user application starts. + * 1. During normal shutdown of the user application, the user application may + * optionally change the VFIO device state from _RUNNING to _STOP. This + * transition is optional. The vendor driver must support this transition but + * must not require it. + * 2. When the user application saves state or suspends the application, the + * device state transitions from _RUNNING to stop-and-copy and then to _STOP. + * On state transition from _RUNNING to stop-and-copy, driver must stop the + * device, save the device state and send it to the application through the + * migration region. The sequence to be followed for such transition is given + * below. + * 3. In live migration of user application, the state transitions from _RUNNING + * to pre-copy, to stop-and-copy, and to _STOP. + * On state transition from _RUNNING to pre-copy, the driver should start + * gathering the device state while the application is still running and send + * the device state data to application through the migration region. + * On state transition from pre-copy to stop-and-copy, the driver must stop + * the device, save the device state and send it to the user application + * through the migration region. + * Vendor drivers must support the pre-copy state even for implementations + * where no data is provided to the user before the stop-and-copy state. The + * user must not be required to consume all migration data before the device + * transitions to a new state, including the stop-and-copy state. + * The sequence to be followed for above two transitions is given below. + * 4. To start the resuming phase, the device state should be transitioned from + * the _RUNNING to the _RESUMING state. + * In the _RESUMING state, the driver should use the device state data + * received through the migration region to resume the device. + * 5. After providing saved device data to the driver, the application should + * change the state from _RESUMING to _RUNNING. + * + * reserved: + * Reads on this field return zero and writes are ignored. + * + * pending_bytes: (read only) + * The number of pending bytes still to be migrated from the vendor driver. + * + * data_offset: (read only) + * The user application should read data_offset field from the migration + * region. The user application should read the device data from this + * offset within the migration region during the _SAVING state or write + * the device data during the _RESUMING state. See below for details of + * sequence to be followed. + * + * data_size: (read/write) + * The user application should read data_size to get the size in bytes of + * the data copied in the migration region during the _SAVING state and + * write the size in bytes of the data copied in the migration region + * during the _RESUMING state. + * + * The format of the migration region is as follows: + * ------------------------------------------------------------------ + * |vfio_device_migration_info| data section | + * | | /////////////////////////////// | + * ------------------------------------------------------------------ + * ^ ^ + * offset 0-trapped part data_offset + * + * The structure vfio_device_migration_info is always followed by the data + * section in the region, so data_offset will always be nonzero. The offset + * from where the data is copied is decided by the kernel driver. The data + * section can be trapped, mmapped, or partitioned, depending on how the kernel + * driver defines the data section. The data section partition can be defined + * as mapped by the sparse mmap capability. If mmapped, data_offset must be + * page aligned, whereas initial section which contains the + * vfio_device_migration_info structure, might not end at the offset, which is + * page aligned. The user is not required to access through mmap regardless + * of the capabilities of the region mmap. + * The vendor driver should determine whether and how to partition the data + * section. The vendor driver should return data_offset accordingly. + * + * The sequence to be followed while in pre-copy state and stop-and-copy state + * is as follows: + * a. Read pending_bytes, indicating the start of a new iteration to get device + * data. Repeated read on pending_bytes at this stage should have no side + * effects. + * If pending_bytes == 0, the user application should not iterate to get data + * for that device. + * If pending_bytes > 0, perform the following steps. + * b. Read data_offset, indicating that the vendor driver should make data + * available through the data section. The vendor driver should return this + * read operation only after data is available from (region + data_offset) + * to (region + data_offset + data_size). + * c. Read data_size, which is the amount of data in bytes available through + * the migration region. + * Read on data_offset and data_size should return the offset and size of + * the current buffer if the user application reads data_offset and + * data_size more than once here. + * d. Read data_size bytes of data from (region + data_offset) from the + * migration region. + * e. Process the data. + * f. Read pending_bytes, which indicates that the data from the previous + * iteration has been read. If pending_bytes > 0, go to step b. + * + * The user application can transition from the _SAVING|_RUNNING + * (pre-copy state) to the _SAVING (stop-and-copy) state regardless of the + * number of pending bytes. The user application should iterate in _SAVING + * (stop-and-copy) until pending_bytes is 0. + * + * The sequence to be followed while _RESUMING device state is as follows: + * While data for this device is available, repeat the following steps: + * a. Read data_offset from where the user application should write data. + * b. Write migration data starting at the migration region + data_offset for + * the length determined by data_size from the migration source. + * c. Write data_size, which indicates to the vendor driver that data is + * written in the migration region. Vendor driver must return this write + * operations on consuming data. Vendor driver should apply the + * user-provided migration region data to the device resume state. + * + * If an error occurs during the above sequences, the vendor driver can return + * an error code for next read() or write() operation, which will terminate the + * loop. The user application should then take the next necessary action, for + * example, failing migration or terminating the user application. + * + * For the user application, data is opaque. The user application should write + * data in the same order as the data is received and the data should be of + * same transaction size at the source. + */ + +struct vfio_device_migration_info { + __u32 device_state; /* VFIO device state */ +#define VFIO_DEVICE_STATE_STOP (0) +#define VFIO_DEVICE_STATE_RUNNING (1 << 0) +#define VFIO_DEVICE_STATE_SAVING (1 << 1) +#define VFIO_DEVICE_STATE_RESUMING (1 << 2) +#define VFIO_DEVICE_STATE_MASK (VFIO_DEVICE_STATE_RUNNING | \ + VFIO_DEVICE_STATE_SAVING | \ + VFIO_DEVICE_STATE_RESUMING) + +#define VFIO_DEVICE_STATE_VALID(state) \ + (state & VFIO_DEVICE_STATE_RESUMING ? \ + (state & VFIO_DEVICE_STATE_MASK) == VFIO_DEVICE_STATE_RESUMING : 1) + +#define VFIO_DEVICE_STATE_IS_ERROR(state) \ + ((state & VFIO_DEVICE_STATE_MASK) == (VFIO_DEVICE_STATE_SAVING | \ + VFIO_DEVICE_STATE_RESUMING)) + +#define VFIO_DEVICE_STATE_SET_ERROR(state) \ + ((state & ~VFIO_DEVICE_STATE_MASK) | VFIO_DEVICE_SATE_SAVING | \ + VFIO_DEVICE_STATE_RESUMING) + + __u32 reserved; + __u64 pending_bytes; + __u64 data_offset; + __u64 data_size; +}; + +/* + * The MSIX mappable capability informs that MSIX data of a BAR can be mmapped + * which allows direct access to non-MSIX registers which happened to be within + * the same system page. + * + * Even though the userspace gets direct access to the MSIX data, the existing + * VFIO_DEVICE_SET_IRQS interface must still be used for MSIX configuration. + */ +#define VFIO_REGION_INFO_CAP_MSIX_MAPPABLE 3 + +/* + * Capability with compressed real address (aka SSA - small system address) + * where GPU RAM is mapped on a system bus. Used by a GPU for DMA routing + * and by the userspace to associate a NVLink bridge with a GPU. + * + * Deprecated, capability no longer provided + */ +#define VFIO_REGION_INFO_CAP_NVLINK2_SSATGT 4 + +struct vfio_region_info_cap_nvlink2_ssatgt { + struct vfio_info_cap_header header; + __u64 tgt; +}; + +/* + * Capability with an NVLink link speed. The value is read by + * the NVlink2 bridge driver from the bridge's "ibm,nvlink-speed" + * property in the device tree. The value is fixed in the hardware + * and failing to provide the correct value results in the link + * not working with no indication from the driver why. + * + * Deprecated, capability no longer provided + */ +#define VFIO_REGION_INFO_CAP_NVLINK2_LNKSPD 5 + +struct vfio_region_info_cap_nvlink2_lnkspd { + struct vfio_info_cap_header header; + __u32 link_speed; + __u32 __pad; +}; + +/** + * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9, + * struct vfio_irq_info) + * + * Retrieve information about a device IRQ. Caller provides + * struct vfio_irq_info with index value set. Caller sets argsz. + * Implementation of IRQ mapping is bus driver specific. Indexes + * using multiple IRQs are primarily intended to support MSI-like + * interrupt blocks. Zero count irq blocks may be used to describe + * unimplemented interrupt types. + * + * The EVENTFD flag indicates the interrupt index supports eventfd based + * signaling. + * + * The MASKABLE flags indicates the index supports MASK and UNMASK + * actions described below. + * + * AUTOMASKED indicates that after signaling, the interrupt line is + * automatically masked by VFIO and the user needs to unmask the line + * to receive new interrupts. This is primarily intended to distinguish + * level triggered interrupts. + * + * The NORESIZE flag indicates that the interrupt lines within the index + * are setup as a set and new subindexes cannot be enabled without first + * disabling the entire index. This is used for interrupts like PCI MSI + * and MSI-X where the driver may only use a subset of the available + * indexes, but VFIO needs to enable a specific number of vectors + * upfront. In the case of MSI-X, where the user can enable MSI-X and + * then add and unmask vectors, it's up to userspace to make the decision + * whether to allocate the maximum supported number of vectors or tear + * down setup and incrementally increase the vectors as each is enabled. + */ +struct vfio_irq_info { + __u32 argsz; + __u32 flags; +#define VFIO_IRQ_INFO_EVENTFD (1 << 0) +#define VFIO_IRQ_INFO_MASKABLE (1 << 1) +#define VFIO_IRQ_INFO_AUTOMASKED (1 << 2) +#define VFIO_IRQ_INFO_NORESIZE (1 << 3) + __u32 index; /* IRQ index */ + __u32 count; /* Number of IRQs within this index */ +}; +#define VFIO_DEVICE_GET_IRQ_INFO _IO(VFIO_TYPE, VFIO_BASE + 9) + +/** + * VFIO_DEVICE_SET_IRQS - _IOW(VFIO_TYPE, VFIO_BASE + 10, struct vfio_irq_set) + * + * Set signaling, masking, and unmasking of interrupts. Caller provides + * struct vfio_irq_set with all fields set. 'start' and 'count' indicate + * the range of subindexes being specified. + * + * The DATA flags specify the type of data provided. If DATA_NONE, the + * operation performs the specified action immediately on the specified + * interrupt(s). For example, to unmask AUTOMASKED interrupt [0,0]: + * flags = (DATA_NONE|ACTION_UNMASK), index = 0, start = 0, count = 1. + * + * DATA_BOOL allows sparse support for the same on arrays of interrupts. + * For example, to mask interrupts [0,1] and [0,3] (but not [0,2]): + * flags = (DATA_BOOL|ACTION_MASK), index = 0, start = 1, count = 3, + * data = {1,0,1} + * + * DATA_EVENTFD binds the specified ACTION to the provided __s32 eventfd. + * A value of -1 can be used to either de-assign interrupts if already + * assigned or skip un-assigned interrupts. For example, to set an eventfd + * to be trigger for interrupts [0,0] and [0,2]: + * flags = (DATA_EVENTFD|ACTION_TRIGGER), index = 0, start = 0, count = 3, + * data = {fd1, -1, fd2} + * If index [0,1] is previously set, two count = 1 ioctls calls would be + * required to set [0,0] and [0,2] without changing [0,1]. + * + * Once a signaling mechanism is set, DATA_BOOL or DATA_NONE can be used + * with ACTION_TRIGGER to perform kernel level interrupt loopback testing + * from userspace (ie. simulate hardware triggering). + * + * Setting of an event triggering mechanism to userspace for ACTION_TRIGGER + * enables the interrupt index for the device. Individual subindex interrupts + * can be disabled using the -1 value for DATA_EVENTFD or the index can be + * disabled as a whole with: flags = (DATA_NONE|ACTION_TRIGGER), count = 0. + * + * Note that ACTION_[UN]MASK specify user->kernel signaling (irqfds) while + * ACTION_TRIGGER specifies kernel->user signaling. + */ +struct vfio_irq_set { + __u32 argsz; + __u32 flags; +#define VFIO_IRQ_SET_DATA_NONE (1 << 0) /* Data not present */ +#define VFIO_IRQ_SET_DATA_BOOL (1 << 1) /* Data is bool (u8) */ +#define VFIO_IRQ_SET_DATA_EVENTFD (1 << 2) /* Data is eventfd (s32) */ +#define VFIO_IRQ_SET_ACTION_MASK (1 << 3) /* Mask interrupt */ +#define VFIO_IRQ_SET_ACTION_UNMASK (1 << 4) /* Unmask interrupt */ +#define VFIO_IRQ_SET_ACTION_TRIGGER (1 << 5) /* Trigger interrupt */ + __u32 index; + __u32 start; + __u32 count; + __u8 data[]; +}; +#define VFIO_DEVICE_SET_IRQS _IO(VFIO_TYPE, VFIO_BASE + 10) + +#define VFIO_IRQ_SET_DATA_TYPE_MASK (VFIO_IRQ_SET_DATA_NONE | \ + VFIO_IRQ_SET_DATA_BOOL | \ + VFIO_IRQ_SET_DATA_EVENTFD) +#define VFIO_IRQ_SET_ACTION_TYPE_MASK (VFIO_IRQ_SET_ACTION_MASK | \ + VFIO_IRQ_SET_ACTION_UNMASK | \ + VFIO_IRQ_SET_ACTION_TRIGGER) +/** + * VFIO_DEVICE_RESET - _IO(VFIO_TYPE, VFIO_BASE + 11) + * + * Reset a device. + */ +#define VFIO_DEVICE_RESET _IO(VFIO_TYPE, VFIO_BASE + 11) + +/* + * The VFIO-PCI bus driver makes use of the following fixed region and + * IRQ index mapping. Unimplemented regions return a size of zero. + * Unimplemented IRQ types return a count of zero. + */ + +enum { + VFIO_PCI_BAR0_REGION_INDEX, + VFIO_PCI_BAR1_REGION_INDEX, + VFIO_PCI_BAR2_REGION_INDEX, + VFIO_PCI_BAR3_REGION_INDEX, + VFIO_PCI_BAR4_REGION_INDEX, + VFIO_PCI_BAR5_REGION_INDEX, + VFIO_PCI_ROM_REGION_INDEX, + VFIO_PCI_CONFIG_REGION_INDEX, + /* + * Expose VGA regions defined for PCI base class 03, subclass 00. + * This includes I/O port ranges 0x3b0 to 0x3bb and 0x3c0 to 0x3df + * as well as the MMIO range 0xa0000 to 0xbffff. Each implemented + * range is found at it's identity mapped offset from the region + * offset, for example 0x3b0 is region_info.offset + 0x3b0. Areas + * between described ranges are unimplemented. + */ + VFIO_PCI_VGA_REGION_INDEX, + VFIO_PCI_NUM_REGIONS = 9 /* Fixed user ABI, region indexes >=9 use */ + /* device specific cap to define content. */ +}; + +enum { + VFIO_PCI_INTX_IRQ_INDEX, + VFIO_PCI_MSI_IRQ_INDEX, + VFIO_PCI_MSIX_IRQ_INDEX, + VFIO_PCI_ERR_IRQ_INDEX, + VFIO_PCI_REQ_IRQ_INDEX, + VFIO_PCI_NUM_IRQS +}; + +/* + * The vfio-ccw bus driver makes use of the following fixed region and + * IRQ index mapping. Unimplemented regions return a size of zero. + * Unimplemented IRQ types return a count of zero. + */ + +enum { + VFIO_CCW_CONFIG_REGION_INDEX, + VFIO_CCW_NUM_REGIONS +}; + +enum { + VFIO_CCW_IO_IRQ_INDEX, + VFIO_CCW_CRW_IRQ_INDEX, + VFIO_CCW_REQ_IRQ_INDEX, + VFIO_CCW_NUM_IRQS +}; + +/** + * VFIO_DEVICE_GET_PCI_HOT_RESET_INFO - _IORW(VFIO_TYPE, VFIO_BASE + 12, + * struct vfio_pci_hot_reset_info) + * + * Return: 0 on success, -errno on failure: + * -enospc = insufficient buffer, -enodev = unsupported for device. + */ +struct vfio_pci_dependent_device { + __u32 group_id; + __u16 segment; + __u8 bus; + __u8 devfn; /* Use PCI_SLOT/PCI_FUNC */ +}; + +struct vfio_pci_hot_reset_info { + __u32 argsz; + __u32 flags; + __u32 count; + struct vfio_pci_dependent_device devices[]; +}; + +#define VFIO_DEVICE_GET_PCI_HOT_RESET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) + +/** + * VFIO_DEVICE_PCI_HOT_RESET - _IOW(VFIO_TYPE, VFIO_BASE + 13, + * struct vfio_pci_hot_reset) + * + * Return: 0 on success, -errno on failure. + */ +struct vfio_pci_hot_reset { + __u32 argsz; + __u32 flags; + __u32 count; + __s32 group_fds[]; +}; + +#define VFIO_DEVICE_PCI_HOT_RESET _IO(VFIO_TYPE, VFIO_BASE + 13) + +/** + * VFIO_DEVICE_QUERY_GFX_PLANE - _IOW(VFIO_TYPE, VFIO_BASE + 14, + * struct vfio_device_query_gfx_plane) + * + * Set the drm_plane_type and flags, then retrieve the gfx plane info. + * + * flags supported: + * - VFIO_GFX_PLANE_TYPE_PROBE and VFIO_GFX_PLANE_TYPE_DMABUF are set + * to ask if the mdev supports dma-buf. 0 on support, -EINVAL on no + * support for dma-buf. + * - VFIO_GFX_PLANE_TYPE_PROBE and VFIO_GFX_PLANE_TYPE_REGION are set + * to ask if the mdev supports region. 0 on support, -EINVAL on no + * support for region. + * - VFIO_GFX_PLANE_TYPE_DMABUF or VFIO_GFX_PLANE_TYPE_REGION is set + * with each call to query the plane info. + * - Others are invalid and return -EINVAL. + * + * Note: + * 1. Plane could be disabled by guest. In that case, success will be + * returned with zero-initialized drm_format, size, width and height + * fields. + * 2. x_hot/y_hot is set to 0xFFFFFFFF if no hotspot information available + * + * Return: 0 on success, -errno on other failure. + */ +struct vfio_device_gfx_plane_info { + __u32 argsz; + __u32 flags; +#define VFIO_GFX_PLANE_TYPE_PROBE (1 << 0) +#define VFIO_GFX_PLANE_TYPE_DMABUF (1 << 1) +#define VFIO_GFX_PLANE_TYPE_REGION (1 << 2) + /* in */ + __u32 drm_plane_type; /* type of plane: DRM_PLANE_TYPE_* */ + /* out */ + __u32 drm_format; /* drm format of plane */ + __u64 drm_format_mod; /* tiled mode */ + __u32 width; /* width of plane */ + __u32 height; /* height of plane */ + __u32 stride; /* stride of plane */ + __u32 size; /* size of plane in bytes, align on page*/ + __u32 x_pos; /* horizontal position of cursor plane */ + __u32 y_pos; /* vertical position of cursor plane*/ + __u32 x_hot; /* horizontal position of cursor hotspot */ + __u32 y_hot; /* vertical position of cursor hotspot */ + union { + __u32 region_index; /* region index */ + __u32 dmabuf_id; /* dma-buf id */ + }; +}; + +#define VFIO_DEVICE_QUERY_GFX_PLANE _IO(VFIO_TYPE, VFIO_BASE + 14) + +/** + * VFIO_DEVICE_GET_GFX_DMABUF - _IOW(VFIO_TYPE, VFIO_BASE + 15, __u32) + * + * Return a new dma-buf file descriptor for an exposed guest framebuffer + * described by the provided dmabuf_id. The dmabuf_id is returned from VFIO_ + * DEVICE_QUERY_GFX_PLANE as a token of the exposed guest framebuffer. + */ + +#define VFIO_DEVICE_GET_GFX_DMABUF _IO(VFIO_TYPE, VFIO_BASE + 15) + +/** + * VFIO_DEVICE_IOEVENTFD - _IOW(VFIO_TYPE, VFIO_BASE + 16, + * struct vfio_device_ioeventfd) + * + * Perform a write to the device at the specified device fd offset, with + * the specified data and width when the provided eventfd is triggered. + * vfio bus drivers may not support this for all regions, for all widths, + * or at all. vfio-pci currently only enables support for BAR regions, + * excluding the MSI-X vector table. + * + * Return: 0 on success, -errno on failure. + */ +struct vfio_device_ioeventfd { + __u32 argsz; + __u32 flags; +#define VFIO_DEVICE_IOEVENTFD_8 (1 << 0) /* 1-byte write */ +#define VFIO_DEVICE_IOEVENTFD_16 (1 << 1) /* 2-byte write */ +#define VFIO_DEVICE_IOEVENTFD_32 (1 << 2) /* 4-byte write */ +#define VFIO_DEVICE_IOEVENTFD_64 (1 << 3) /* 8-byte write */ +#define VFIO_DEVICE_IOEVENTFD_SIZE_MASK (0xf) + __u64 offset; /* device fd offset of write */ + __u64 data; /* data to be written */ + __s32 fd; /* -1 for de-assignment */ +}; + +#define VFIO_DEVICE_IOEVENTFD _IO(VFIO_TYPE, VFIO_BASE + 16) + +/** + * VFIO_DEVICE_FEATURE - _IORW(VFIO_TYPE, VFIO_BASE + 17, + * struct vfio_device_feature) + * + * Get, set, or probe feature data of the device. The feature is selected + * using the FEATURE_MASK portion of the flags field. Support for a feature + * can be probed by setting both the FEATURE_MASK and PROBE bits. A probe + * may optionally include the GET and/or SET bits to determine read vs write + * access of the feature respectively. Probing a feature will return success + * if the feature is supported and all of the optionally indicated GET/SET + * methods are supported. The format of the data portion of the structure is + * specific to the given feature. The data portion is not required for + * probing. GET and SET are mutually exclusive, except for use with PROBE. + * + * Return 0 on success, -errno on failure. + */ +struct vfio_device_feature { + __u32 argsz; + __u32 flags; +#define VFIO_DEVICE_FEATURE_MASK (0xffff) /* 16-bit feature index */ +#define VFIO_DEVICE_FEATURE_GET (1 << 16) /* Get feature into data[] */ +#define VFIO_DEVICE_FEATURE_SET (1 << 17) /* Set feature from data[] */ +#define VFIO_DEVICE_FEATURE_PROBE (1 << 18) /* Probe feature support */ + __u8 data[]; +}; + +#define VFIO_DEVICE_FEATURE _IO(VFIO_TYPE, VFIO_BASE + 17) + +/* + * Provide support for setting a PCI VF Token, which is used as a shared + * secret between PF and VF drivers. This feature may only be set on a + * PCI SR-IOV PF when SR-IOV is enabled on the PF and there are no existing + * open VFs. Data provided when setting this feature is a 16-byte array + * (__u8 b[16]), representing a UUID. + */ +#define VFIO_DEVICE_FEATURE_PCI_VF_TOKEN (0) + +/* -------- API for Type1 VFIO IOMMU -------- */ + +/** + * VFIO_IOMMU_GET_INFO - _IOR(VFIO_TYPE, VFIO_BASE + 12, struct vfio_iommu_info) + * + * Retrieve information about the IOMMU object. Fills in provided + * struct vfio_iommu_info. Caller sets argsz. + * + * XXX Should we do these by CHECK_EXTENSION too? + */ +struct vfio_iommu_type1_info { + __u32 argsz; + __u32 flags; +#define VFIO_IOMMU_INFO_PGSIZES (1 << 0) /* supported page sizes info */ +#define VFIO_IOMMU_INFO_CAPS (1 << 1) /* Info supports caps */ + __u64 iova_pgsizes; /* Bitmap of supported page sizes */ + __u32 cap_offset; /* Offset within info struct of first cap */ +}; + +/* + * The IOVA capability allows to report the valid IOVA range(s) + * excluding any non-relaxable reserved regions exposed by + * devices attached to the container. Any DMA map attempt + * outside the valid iova range will return error. + * + * The structures below define version 1 of this capability. + */ +#define VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE 1 + +struct vfio_iova_range { + __u64 start; + __u64 end; +}; + +struct vfio_iommu_type1_info_cap_iova_range { + struct vfio_info_cap_header header; + __u32 nr_iovas; + __u32 reserved; + struct vfio_iova_range iova_ranges[]; +}; + +/* + * The migration capability allows to report supported features for migration. + * + * The structures below define version 1 of this capability. + * + * The existence of this capability indicates that IOMMU kernel driver supports + * dirty page logging. + * + * pgsize_bitmap: Kernel driver returns bitmap of supported page sizes for dirty + * page logging. + * max_dirty_bitmap_size: Kernel driver returns maximum supported dirty bitmap + * size in bytes that can be used by user applications when getting the dirty + * bitmap. + */ +#define VFIO_IOMMU_TYPE1_INFO_CAP_MIGRATION 2 + +struct vfio_iommu_type1_info_cap_migration { + struct vfio_info_cap_header header; + __u32 flags; + __u64 pgsize_bitmap; + __u64 max_dirty_bitmap_size; /* in bytes */ +}; + +/* + * The DMA available capability allows to report the current number of + * simultaneously outstanding DMA mappings that are allowed. + * + * The structure below defines version 1 of this capability. + * + * avail: specifies the current number of outstanding DMA mappings allowed. + */ +#define VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL 3 + +struct vfio_iommu_type1_info_dma_avail { + struct vfio_info_cap_header header; + __u32 avail; +}; + +#define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) + +/** + * VFIO_IOMMU_MAP_DMA - _IOW(VFIO_TYPE, VFIO_BASE + 13, struct vfio_dma_map) + * + * Map process virtual addresses to IO virtual addresses using the + * provided struct vfio_dma_map. Caller sets argsz. READ &/ WRITE required. + * + * If flags & VFIO_DMA_MAP_FLAG_VADDR, update the base vaddr for iova, and + * unblock translation of host virtual addresses in the iova range. The vaddr + * must have previously been invalidated with VFIO_DMA_UNMAP_FLAG_VADDR. To + * maintain memory consistency within the user application, the updated vaddr + * must address the same memory object as originally mapped. Failure to do so + * will result in user memory corruption and/or device misbehavior. iova and + * size must match those in the original MAP_DMA call. Protection is not + * changed, and the READ & WRITE flags must be 0. + */ +struct vfio_iommu_type1_dma_map { + __u32 argsz; + __u32 flags; +#define VFIO_DMA_MAP_FLAG_READ (1 << 0) /* readable from device */ +#define VFIO_DMA_MAP_FLAG_WRITE (1 << 1) /* writable from device */ +#define VFIO_DMA_MAP_FLAG_VADDR (1 << 2) + __u64 vaddr; /* Process virtual address */ + __u64 iova; /* IO virtual address */ + __u64 size; /* Size of mapping (bytes) */ +}; + +#define VFIO_IOMMU_MAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 13) + +struct vfio_bitmap { + __u64 pgsize; /* page size for bitmap in bytes */ + __u64 size; /* in bytes */ + __u64 *data; /* one bit per page */ +}; + +/** + * VFIO_IOMMU_UNMAP_DMA - _IOWR(VFIO_TYPE, VFIO_BASE + 14, + * struct vfio_dma_unmap) + * + * Unmap IO virtual addresses using the provided struct vfio_dma_unmap. + * Caller sets argsz. The actual unmapped size is returned in the size + * field. No guarantee is made to the user that arbitrary unmaps of iova + * or size different from those used in the original mapping call will + * succeed. + * + * VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP should be set to get the dirty bitmap + * before unmapping IO virtual addresses. When this flag is set, the user must + * provide a struct vfio_bitmap in data[]. User must provide zero-allocated + * memory via vfio_bitmap.data and its size in the vfio_bitmap.size field. + * A bit in the bitmap represents one page, of user provided page size in + * vfio_bitmap.pgsize field, consecutively starting from iova offset. Bit set + * indicates that the page at that offset from iova is dirty. A Bitmap of the + * pages in the range of unmapped size is returned in the user-provided + * vfio_bitmap.data. + * + * If flags & VFIO_DMA_UNMAP_FLAG_ALL, unmap all addresses. iova and size + * must be 0. This cannot be combined with the get-dirty-bitmap flag. + * + * If flags & VFIO_DMA_UNMAP_FLAG_VADDR, do not unmap, but invalidate host + * virtual addresses in the iova range. Tasks that attempt to translate an + * iova's vaddr will block. DMA to already-mapped pages continues. This + * cannot be combined with the get-dirty-bitmap flag. + */ +struct vfio_iommu_type1_dma_unmap { + __u32 argsz; + __u32 flags; +#define VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP (1 << 0) +#define VFIO_DMA_UNMAP_FLAG_ALL (1 << 1) +#define VFIO_DMA_UNMAP_FLAG_VADDR (1 << 2) + __u64 iova; /* IO virtual address */ + __u64 size; /* Size of mapping (bytes) */ + __u8 data[]; +}; + +#define VFIO_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14) + +/* + * IOCTLs to enable/disable IOMMU container usage. + * No parameters are supported. + */ +#define VFIO_IOMMU_ENABLE _IO(VFIO_TYPE, VFIO_BASE + 15) +#define VFIO_IOMMU_DISABLE _IO(VFIO_TYPE, VFIO_BASE + 16) + +/** + * VFIO_IOMMU_DIRTY_PAGES - _IOWR(VFIO_TYPE, VFIO_BASE + 17, + * struct vfio_iommu_type1_dirty_bitmap) + * IOCTL is used for dirty pages logging. + * Caller should set flag depending on which operation to perform, details as + * below: + * + * Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_START flag set, instructs + * the IOMMU driver to log pages that are dirtied or potentially dirtied by + * the device; designed to be used when a migration is in progress. Dirty pages + * are logged until logging is disabled by user application by calling the IOCTL + * with VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP flag. + * + * Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP flag set, instructs + * the IOMMU driver to stop logging dirtied pages. + * + * Calling the IOCTL with VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP flag set + * returns the dirty pages bitmap for IOMMU container for a given IOVA range. + * The user must specify the IOVA range and the pgsize through the structure + * vfio_iommu_type1_dirty_bitmap_get in the data[] portion. This interface + * supports getting a bitmap of the smallest supported pgsize only and can be + * modified in future to get a bitmap of any specified supported pgsize. The + * user must provide a zeroed memory area for the bitmap memory and specify its + * size in bitmap.size. One bit is used to represent one page consecutively + * starting from iova offset. The user should provide page size in bitmap.pgsize + * field. A bit set in the bitmap indicates that the page at that offset from + * iova is dirty. The caller must set argsz to a value including the size of + * structure vfio_iommu_type1_dirty_bitmap_get, but excluding the size of the + * actual bitmap. If dirty pages logging is not enabled, an error will be + * returned. + * + * Only one of the flags _START, _STOP and _GET may be specified at a time. + * + */ +struct vfio_iommu_type1_dirty_bitmap { + __u32 argsz; + __u32 flags; +#define VFIO_IOMMU_DIRTY_PAGES_FLAG_START (1 << 0) +#define VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP (1 << 1) +#define VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP (1 << 2) + __u8 data[]; +}; + +struct vfio_iommu_type1_dirty_bitmap_get { + __u64 iova; /* IO virtual address */ + __u64 size; /* Size of iova range */ + struct vfio_bitmap bitmap; +}; + +#define VFIO_IOMMU_DIRTY_PAGES _IO(VFIO_TYPE, VFIO_BASE + 17) + +/* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */ + +/* + * The SPAPR TCE DDW info struct provides the information about + * the details of Dynamic DMA window capability. + * + * @pgsizes contains a page size bitmask, 4K/64K/16M are supported. + * @max_dynamic_windows_supported tells the maximum number of windows + * which the platform can create. + * @levels tells the maximum number of levels in multi-level IOMMU tables; + * this allows splitting a table into smaller chunks which reduces + * the amount of physically contiguous memory required for the table. + */ +struct vfio_iommu_spapr_tce_ddw_info { + __u64 pgsizes; /* Bitmap of supported page sizes */ + __u32 max_dynamic_windows_supported; + __u32 levels; +}; + +/* + * The SPAPR TCE info struct provides the information about the PCI bus + * address ranges available for DMA, these values are programmed into + * the hardware so the guest has to know that information. + * + * The DMA 32 bit window start is an absolute PCI bus address. + * The IOVA address passed via map/unmap ioctls are absolute PCI bus + * addresses too so the window works as a filter rather than an offset + * for IOVA addresses. + * + * Flags supported: + * - VFIO_IOMMU_SPAPR_INFO_DDW: informs the userspace that dynamic DMA windows + * (DDW) support is present. @ddw is only supported when DDW is present. + */ +struct vfio_iommu_spapr_tce_info { + __u32 argsz; + __u32 flags; +#define VFIO_IOMMU_SPAPR_INFO_DDW (1 << 0) /* DDW supported */ + __u32 dma32_window_start; /* 32 bit window start (bytes) */ + __u32 dma32_window_size; /* 32 bit window size (bytes) */ + struct vfio_iommu_spapr_tce_ddw_info ddw; +}; + +#define VFIO_IOMMU_SPAPR_TCE_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) + +/* + * EEH PE operation struct provides ways to: + * - enable/disable EEH functionality; + * - unfreeze IO/DMA for frozen PE; + * - read PE state; + * - reset PE; + * - configure PE; + * - inject EEH error. + */ +struct vfio_eeh_pe_err { + __u32 type; + __u32 func; + __u64 addr; + __u64 mask; +}; + +struct vfio_eeh_pe_op { + __u32 argsz; + __u32 flags; + __u32 op; + union { + struct vfio_eeh_pe_err err; + }; +}; + +#define VFIO_EEH_PE_DISABLE 0 /* Disable EEH functionality */ +#define VFIO_EEH_PE_ENABLE 1 /* Enable EEH functionality */ +#define VFIO_EEH_PE_UNFREEZE_IO 2 /* Enable IO for frozen PE */ +#define VFIO_EEH_PE_UNFREEZE_DMA 3 /* Enable DMA for frozen PE */ +#define VFIO_EEH_PE_GET_STATE 4 /* PE state retrieval */ +#define VFIO_EEH_PE_STATE_NORMAL 0 /* PE in functional state */ +#define VFIO_EEH_PE_STATE_RESET 1 /* PE reset in progress */ +#define VFIO_EEH_PE_STATE_STOPPED 2 /* Stopped DMA and IO */ +#define VFIO_EEH_PE_STATE_STOPPED_DMA 4 /* Stopped DMA only */ +#define VFIO_EEH_PE_STATE_UNAVAIL 5 /* State unavailable */ +#define VFIO_EEH_PE_RESET_DEACTIVATE 5 /* Deassert PE reset */ +#define VFIO_EEH_PE_RESET_HOT 6 /* Assert hot reset */ +#define VFIO_EEH_PE_RESET_FUNDAMENTAL 7 /* Assert fundamental reset */ +#define VFIO_EEH_PE_CONFIGURE 8 /* PE configuration */ +#define VFIO_EEH_PE_INJECT_ERR 9 /* Inject EEH error */ + +#define VFIO_EEH_PE_OP _IO(VFIO_TYPE, VFIO_BASE + 21) + +/** + * VFIO_IOMMU_SPAPR_REGISTER_MEMORY - _IOW(VFIO_TYPE, VFIO_BASE + 17, struct vfio_iommu_spapr_register_memory) + * + * Registers user space memory where DMA is allowed. It pins + * user pages and does the locked memory accounting so + * subsequent VFIO_IOMMU_MAP_DMA/VFIO_IOMMU_UNMAP_DMA calls + * get faster. + */ +struct vfio_iommu_spapr_register_memory { + __u32 argsz; + __u32 flags; + __u64 vaddr; /* Process virtual address */ + __u64 size; /* Size of mapping (bytes) */ +}; +#define VFIO_IOMMU_SPAPR_REGISTER_MEMORY _IO(VFIO_TYPE, VFIO_BASE + 17) + +/** + * VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY - _IOW(VFIO_TYPE, VFIO_BASE + 18, struct vfio_iommu_spapr_register_memory) + * + * Unregisters user space memory registered with + * VFIO_IOMMU_SPAPR_REGISTER_MEMORY. + * Uses vfio_iommu_spapr_register_memory for parameters. + */ +#define VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY _IO(VFIO_TYPE, VFIO_BASE + 18) + +/** + * VFIO_IOMMU_SPAPR_TCE_CREATE - _IOWR(VFIO_TYPE, VFIO_BASE + 19, struct vfio_iommu_spapr_tce_create) + * + * Creates an additional TCE table and programs it (sets a new DMA window) + * to every IOMMU group in the container. It receives page shift, window + * size and number of levels in the TCE table being created. + * + * It allocates and returns an offset on a PCI bus of the new DMA window. + */ +struct vfio_iommu_spapr_tce_create { + __u32 argsz; + __u32 flags; + /* in */ + __u32 page_shift; + __u32 __resv1; + __u64 window_size; + __u32 levels; + __u32 __resv2; + /* out */ + __u64 start_addr; +}; +#define VFIO_IOMMU_SPAPR_TCE_CREATE _IO(VFIO_TYPE, VFIO_BASE + 19) + +/** + * VFIO_IOMMU_SPAPR_TCE_REMOVE - _IOW(VFIO_TYPE, VFIO_BASE + 20, struct vfio_iommu_spapr_tce_remove) + * + * Unprograms a TCE table from all groups in the container and destroys it. + * It receives a PCI bus offset as a window id. + */ +struct vfio_iommu_spapr_tce_remove { + __u32 argsz; + __u32 flags; + /* in */ + __u64 start_addr; +}; +#define VFIO_IOMMU_SPAPR_TCE_REMOVE _IO(VFIO_TYPE, VFIO_BASE + 20) + +/* ***************************************************************** */ + +#endif /* VFIO_H */ diff --git a/linux-headers/linux/vfio_ccw.h b/linux-headers/linux/vfio_ccw.h new file mode 100644 index 000000000..516496f1d --- /dev/null +++ b/linux-headers/linux/vfio_ccw.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Interfaces for vfio-ccw + * + * Copyright IBM Corp. 2017 + * + * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com> + */ + +#ifndef _VFIO_CCW_H_ +#define _VFIO_CCW_H_ + +#include <linux/types.h> + +/* used for START SUBCHANNEL, always present */ +struct ccw_io_region { +#define ORB_AREA_SIZE 12 + __u8 orb_area[ORB_AREA_SIZE]; +#define SCSW_AREA_SIZE 12 + __u8 scsw_area[SCSW_AREA_SIZE]; +#define IRB_AREA_SIZE 96 + __u8 irb_area[IRB_AREA_SIZE]; + __u32 ret_code; +} __attribute__((packed)); + +/* + * used for processing commands that trigger asynchronous actions + * Note: this is controlled by a capability + */ +#define VFIO_CCW_ASYNC_CMD_HSCH (1 << 0) +#define VFIO_CCW_ASYNC_CMD_CSCH (1 << 1) +struct ccw_cmd_region { + __u32 command; + __u32 ret_code; +} __attribute__((packed)); + +/* + * Used for processing commands that read the subchannel-information block + * Reading this region triggers a stsch() to hardware + * Note: this is controlled by a capability + */ +struct ccw_schib_region { +#define SCHIB_AREA_SIZE 52 + __u8 schib_area[SCHIB_AREA_SIZE]; +} __attribute__((packed)); + +/* + * Used for returning a Channel Report Word to userspace. + * Note: this is controlled by a capability + */ +struct ccw_crw_region { + __u32 crw; + __u32 pad; +} __attribute__((packed)); + +#endif diff --git a/linux-headers/linux/vfio_zdev.h b/linux-headers/linux/vfio_zdev.h new file mode 100644 index 000000000..b4309397b --- /dev/null +++ b/linux-headers/linux/vfio_zdev.h @@ -0,0 +1,78 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * VFIO Region definitions for ZPCI devices + * + * Copyright IBM Corp. 2020 + * + * Author(s): Pierre Morel <pmorel@linux.ibm.com> + * Matthew Rosato <mjrosato@linux.ibm.com> + */ + +#ifndef _VFIO_ZDEV_H_ +#define _VFIO_ZDEV_H_ + +#include <linux/types.h> +#include <linux/vfio.h> + +/** + * VFIO_DEVICE_INFO_CAP_ZPCI_BASE - Base PCI Function information + * + * This capability provides a set of descriptive information about the + * associated PCI function. + */ +struct vfio_device_info_cap_zpci_base { + struct vfio_info_cap_header header; + __u64 start_dma; /* Start of available DMA addresses */ + __u64 end_dma; /* End of available DMA addresses */ + __u16 pchid; /* Physical Channel ID */ + __u16 vfn; /* Virtual function number */ + __u16 fmb_length; /* Measurement Block Length (in bytes) */ + __u8 pft; /* PCI Function Type */ + __u8 gid; /* PCI function group ID */ +}; + +/** + * VFIO_DEVICE_INFO_CAP_ZPCI_GROUP - Base PCI Function Group information + * + * This capability provides a set of descriptive information about the group of + * PCI functions that the associated device belongs to. + */ +struct vfio_device_info_cap_zpci_group { + struct vfio_info_cap_header header; + __u64 dasm; /* DMA Address space mask */ + __u64 msi_addr; /* MSI address */ + __u64 flags; +#define VFIO_DEVICE_INFO_ZPCI_FLAG_REFRESH 1 /* Program-specified TLB refresh */ + __u16 mui; /* Measurement Block Update Interval */ + __u16 noi; /* Maximum number of MSIs */ + __u16 maxstbl; /* Maximum Store Block Length */ + __u8 version; /* Supported PCI Version */ +}; + +/** + * VFIO_DEVICE_INFO_CAP_ZPCI_UTIL - Utility String + * + * This capability provides the utility string for the associated device, which + * is a device identifier string made up of EBCDID characters. 'size' specifies + * the length of 'util_str'. + */ +struct vfio_device_info_cap_zpci_util { + struct vfio_info_cap_header header; + __u32 size; + __u8 util_str[]; +}; + +/** + * VFIO_DEVICE_INFO_CAP_ZPCI_PFIP - PCI Function Path + * + * This capability provides the PCI function path string, which is an identifier + * that describes the internal hardware path of the device. 'size' specifies + * the length of 'pfip'. + */ +struct vfio_device_info_cap_zpci_pfip { + struct vfio_info_cap_header header; + __u32 size; + __u8 pfip[]; +}; + +#endif diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h new file mode 100644 index 000000000..c998860d7 --- /dev/null +++ b/linux-headers/linux/vhost.h @@ -0,0 +1,153 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _LINUX_VHOST_H +#define _LINUX_VHOST_H +/* Userspace interface for in-kernel virtio accelerators. */ + +/* vhost is used to reduce the number of system calls involved in virtio. + * + * Existing virtio net code is used in the guest without modification. + * + * This header includes interface used by userspace hypervisor for + * device configuration. + */ + +#include <linux/vhost_types.h> +#include <linux/types.h> +#include <linux/ioctl.h> + +#define VHOST_FILE_UNBIND -1 + +/* ioctls */ + +#define VHOST_VIRTIO 0xAF + +/* Features bitmask for forward compatibility. Transport bits are used for + * vhost specific features. */ +#define VHOST_GET_FEATURES _IOR(VHOST_VIRTIO, 0x00, __u64) +#define VHOST_SET_FEATURES _IOW(VHOST_VIRTIO, 0x00, __u64) + +/* Set current process as the (exclusive) owner of this file descriptor. This + * must be called before any other vhost command. Further calls to + * VHOST_OWNER_SET fail until VHOST_OWNER_RESET is called. */ +#define VHOST_SET_OWNER _IO(VHOST_VIRTIO, 0x01) +/* Give up ownership, and reset the device to default values. + * Allows subsequent call to VHOST_OWNER_SET to succeed. */ +#define VHOST_RESET_OWNER _IO(VHOST_VIRTIO, 0x02) + +/* Set up/modify memory layout */ +#define VHOST_SET_MEM_TABLE _IOW(VHOST_VIRTIO, 0x03, struct vhost_memory) + +/* Write logging setup. */ +/* Memory writes can optionally be logged by setting bit at an offset + * (calculated from the physical address) from specified log base. + * The bit is set using an atomic 32 bit operation. */ +/* Set base address for logging. */ +#define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64) +/* Specify an eventfd file descriptor to signal on log write. */ +#define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int) + +/* Ring setup. */ +/* Set number of descriptors in ring. This parameter can not + * be modified while ring is running (bound to a device). */ +#define VHOST_SET_VRING_NUM _IOW(VHOST_VIRTIO, 0x10, struct vhost_vring_state) +/* Set addresses for the ring. */ +#define VHOST_SET_VRING_ADDR _IOW(VHOST_VIRTIO, 0x11, struct vhost_vring_addr) +/* Base value where queue looks for available descriptors */ +#define VHOST_SET_VRING_BASE _IOW(VHOST_VIRTIO, 0x12, struct vhost_vring_state) +/* Get accessor: reads index, writes value in num */ +#define VHOST_GET_VRING_BASE _IOWR(VHOST_VIRTIO, 0x12, struct vhost_vring_state) + +/* Set the vring byte order in num. Valid values are VHOST_VRING_LITTLE_ENDIAN + * or VHOST_VRING_BIG_ENDIAN (other values return -EINVAL). + * The byte order cannot be changed while the device is active: trying to do so + * returns -EBUSY. + * This is a legacy only API that is simply ignored when VIRTIO_F_VERSION_1 is + * set. + * Not all kernel configurations support this ioctl, but all configurations that + * support SET also support GET. + */ +#define VHOST_VRING_LITTLE_ENDIAN 0 +#define VHOST_VRING_BIG_ENDIAN 1 +#define VHOST_SET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x13, struct vhost_vring_state) +#define VHOST_GET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x14, struct vhost_vring_state) + +/* The following ioctls use eventfd file descriptors to signal and poll + * for events. */ + +/* Set eventfd to poll for added buffers */ +#define VHOST_SET_VRING_KICK _IOW(VHOST_VIRTIO, 0x20, struct vhost_vring_file) +/* Set eventfd to signal when buffers have beed used */ +#define VHOST_SET_VRING_CALL _IOW(VHOST_VIRTIO, 0x21, struct vhost_vring_file) +/* Set eventfd to signal an error */ +#define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct vhost_vring_file) +/* Set busy loop timeout (in us) */ +#define VHOST_SET_VRING_BUSYLOOP_TIMEOUT _IOW(VHOST_VIRTIO, 0x23, \ + struct vhost_vring_state) +/* Get busy loop timeout (in us) */ +#define VHOST_GET_VRING_BUSYLOOP_TIMEOUT _IOW(VHOST_VIRTIO, 0x24, \ + struct vhost_vring_state) + +/* Set or get vhost backend capability */ + +/* Use message type V2 */ +#define VHOST_BACKEND_F_IOTLB_MSG_V2 0x1 +/* IOTLB can accept batching hints */ +#define VHOST_BACKEND_F_IOTLB_BATCH 0x2 + +#define VHOST_SET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x25, __u64) +#define VHOST_GET_BACKEND_FEATURES _IOR(VHOST_VIRTIO, 0x26, __u64) + +/* VHOST_NET specific defines */ + +/* Attach virtio net ring to a raw socket, or tap device. + * The socket must be already bound to an ethernet device, this device will be + * used for transmit. Pass fd -1 to unbind from the socket and the transmit + * device. This can be used to stop the ring (e.g. for migration). */ +#define VHOST_NET_SET_BACKEND _IOW(VHOST_VIRTIO, 0x30, struct vhost_vring_file) + +/* VHOST_SCSI specific defines */ + +#define VHOST_SCSI_SET_ENDPOINT _IOW(VHOST_VIRTIO, 0x40, struct vhost_scsi_target) +#define VHOST_SCSI_CLEAR_ENDPOINT _IOW(VHOST_VIRTIO, 0x41, struct vhost_scsi_target) +/* Changing this breaks userspace. */ +#define VHOST_SCSI_GET_ABI_VERSION _IOW(VHOST_VIRTIO, 0x42, int) +/* Set and get the events missed flag */ +#define VHOST_SCSI_SET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x43, __u32) +#define VHOST_SCSI_GET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x44, __u32) + +/* VHOST_VSOCK specific defines */ + +#define VHOST_VSOCK_SET_GUEST_CID _IOW(VHOST_VIRTIO, 0x60, __u64) +#define VHOST_VSOCK_SET_RUNNING _IOW(VHOST_VIRTIO, 0x61, int) + +/* VHOST_VDPA specific defines */ + +/* Get the device id. The device ids follow the same definition of + * the device id defined in virtio-spec. + */ +#define VHOST_VDPA_GET_DEVICE_ID _IOR(VHOST_VIRTIO, 0x70, __u32) +/* Get and set the status. The status bits follow the same definition + * of the device status defined in virtio-spec. + */ +#define VHOST_VDPA_GET_STATUS _IOR(VHOST_VIRTIO, 0x71, __u8) +#define VHOST_VDPA_SET_STATUS _IOW(VHOST_VIRTIO, 0x72, __u8) +/* Get and set the device config. The device config follows the same + * definition of the device config defined in virtio-spec. + */ +#define VHOST_VDPA_GET_CONFIG _IOR(VHOST_VIRTIO, 0x73, \ + struct vhost_vdpa_config) +#define VHOST_VDPA_SET_CONFIG _IOW(VHOST_VIRTIO, 0x74, \ + struct vhost_vdpa_config) +/* Enable/disable the ring. */ +#define VHOST_VDPA_SET_VRING_ENABLE _IOW(VHOST_VIRTIO, 0x75, \ + struct vhost_vring_state) +/* Get the max ring size. */ +#define VHOST_VDPA_GET_VRING_NUM _IOR(VHOST_VIRTIO, 0x76, __u16) + +/* Set event fd for config interrupt*/ +#define VHOST_VDPA_SET_CONFIG_CALL _IOW(VHOST_VIRTIO, 0x77, int) + +/* Get the valid iova range */ +#define VHOST_VDPA_GET_IOVA_RANGE _IOR(VHOST_VIRTIO, 0x78, \ + struct vhost_vdpa_iova_range) +#endif diff --git a/linux-headers/linux/vhost_types.h b/linux-headers/linux/vhost_types.h new file mode 100644 index 000000000..473e3c0d8 --- /dev/null +++ b/linux-headers/linux/vhost_types.h @@ -0,0 +1 @@ +#include "standard-headers/linux/vhost_types.h" diff --git a/linux-headers/linux/virtio_config.h b/linux-headers/linux/virtio_config.h new file mode 100644 index 000000000..5590f7d4a --- /dev/null +++ b/linux-headers/linux/virtio_config.h @@ -0,0 +1 @@ +#include "standard-headers/linux/virtio_config.h" diff --git a/linux-headers/linux/virtio_ring.h b/linux-headers/linux/virtio_ring.h new file mode 100644 index 000000000..c6f0fb6cd --- /dev/null +++ b/linux-headers/linux/virtio_ring.h @@ -0,0 +1 @@ +#include "standard-headers/linux/virtio_ring.h" |