aboutsummaryrefslogtreecommitdiffstats
path: root/contrib
diff options
context:
space:
mode:
Diffstat (limited to 'contrib')
-rw-r--r--contrib/elf2dmp/addrspace.c233
-rw-r--r--contrib/elf2dmp/addrspace.h44
-rw-r--r--contrib/elf2dmp/download.c45
-rw-r--r--contrib/elf2dmp/download.h13
-rw-r--r--contrib/elf2dmp/err.h13
-rw-r--r--contrib/elf2dmp/kdbg.h198
-rw-r--r--contrib/elf2dmp/main.c597
-rw-r--r--contrib/elf2dmp/meson.build5
-rw-r--r--contrib/elf2dmp/pdb.c315
-rw-r--r--contrib/elf2dmp/pdb.h241
-rw-r--r--contrib/elf2dmp/pe.h122
-rw-r--r--contrib/elf2dmp/qemu_elf.c154
-rw-r--r--contrib/elf2dmp/qemu_elf.h49
-rw-r--r--contrib/gitdm/aliases49
-rw-r--r--contrib/gitdm/domain-map43
-rw-r--r--contrib/gitdm/filetypes.txt146
-rw-r--r--contrib/gitdm/group-map-academics21
-rw-r--r--contrib/gitdm/group-map-cadence3
-rw-r--r--contrib/gitdm/group-map-codeweavers1
-rw-r--r--contrib/gitdm/group-map-ibm14
-rw-r--r--contrib/gitdm/group-map-individuals36
-rw-r--r--contrib/gitdm/group-map-interns13
-rw-r--r--contrib/gitdm/group-map-janustech5
-rw-r--r--contrib/gitdm/group-map-netflix5
-rw-r--r--contrib/gitdm/group-map-redhat9
-rw-r--r--contrib/gitdm/group-map-robots7
-rw-r--r--contrib/gitdm/group-map-wavecomp31
-rw-r--r--contrib/ivshmem-client/ivshmem-client.c445
-rw-r--r--contrib/ivshmem-client/ivshmem-client.h210
-rw-r--r--contrib/ivshmem-client/main.c240
-rw-r--r--contrib/ivshmem-client/meson.build4
-rw-r--r--contrib/ivshmem-server/ivshmem-server.c462
-rw-r--r--contrib/ivshmem-server/ivshmem-server.h163
-rw-r--r--contrib/ivshmem-server/main.c274
-rw-r--r--contrib/ivshmem-server/meson.build4
-rw-r--r--contrib/plugins/Makefile45
-rw-r--r--contrib/plugins/cache.c860
-rw-r--r--contrib/plugins/execlog.c153
-rw-r--r--contrib/plugins/hotblocks.c155
-rw-r--r--contrib/plugins/hotpages.c203
-rw-r--r--contrib/plugins/howvec.c372
-rw-r--r--contrib/plugins/hwprofile.c320
-rw-r--r--contrib/plugins/lockstep.c356
-rw-r--r--contrib/rdmacm-mux/main.c831
-rw-r--r--contrib/rdmacm-mux/meson.build9
-rw-r--r--contrib/rdmacm-mux/rdmacm-mux.h61
-rw-r--r--contrib/systemd/qemu-guest-agent.service11
-rw-r--r--contrib/systemd/qemu-pr-helper.service15
-rw-r--r--contrib/systemd/qemu-pr-helper.socket9
-rw-r--r--contrib/vhost-user-blk/meson.build5
-rw-r--r--contrib/vhost-user-blk/vhost-user-blk.c675
-rw-r--r--contrib/vhost-user-gpu/50-qemu-gpu.json.in5
-rw-r--r--contrib/vhost-user-gpu/meson.build12
-rw-r--r--contrib/vhost-user-gpu/vhost-user-gpu.c1256
-rw-r--r--contrib/vhost-user-gpu/virgl.c599
-rw-r--r--contrib/vhost-user-gpu/virgl.h26
-rw-r--r--contrib/vhost-user-gpu/vugbm.c325
-rw-r--r--contrib/vhost-user-gpu/vugbm.h66
-rw-r--r--contrib/vhost-user-gpu/vugpu.h183
-rw-r--r--contrib/vhost-user-input/main.c412
-rw-r--r--contrib/vhost-user-input/meson.build4
-rw-r--r--contrib/vhost-user-scsi/meson.build6
-rw-r--r--contrib/vhost-user-scsi/vhost-user-scsi.c436
63 files changed, 11624 insertions, 0 deletions
diff --git a/contrib/elf2dmp/addrspace.c b/contrib/elf2dmp/addrspace.c
new file mode 100644
index 000000000..53ded1706
--- /dev/null
+++ b/contrib/elf2dmp/addrspace.c
@@ -0,0 +1,233 @@
+/*
+ * Copyright (c) 2018 Virtuozzo International GmbH
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "addrspace.h"
+
+static struct pa_block *pa_space_find_block(struct pa_space *ps, uint64_t pa)
+{
+ size_t i;
+ for (i = 0; i < ps->block_nr; i++) {
+ if (ps->block[i].paddr <= pa &&
+ pa <= ps->block[i].paddr + ps->block[i].size) {
+ return ps->block + i;
+ }
+ }
+
+ return NULL;
+}
+
+static uint8_t *pa_space_resolve(struct pa_space *ps, uint64_t pa)
+{
+ struct pa_block *block = pa_space_find_block(ps, pa);
+
+ if (!block) {
+ return NULL;
+ }
+
+ return block->addr + (pa - block->paddr);
+}
+
+int pa_space_create(struct pa_space *ps, QEMU_Elf *qemu_elf)
+{
+ Elf64_Half phdr_nr = elf_getphdrnum(qemu_elf->map);
+ Elf64_Phdr *phdr = elf64_getphdr(qemu_elf->map);
+ size_t block_i = 0;
+ size_t i;
+
+ ps->block_nr = 0;
+
+ for (i = 0; i < phdr_nr; i++) {
+ if (phdr[i].p_type == PT_LOAD) {
+ ps->block_nr++;
+ }
+ }
+
+ ps->block = malloc(sizeof(*ps->block) * ps->block_nr);
+ if (!ps->block) {
+ return 1;
+ }
+
+ for (i = 0; i < phdr_nr; i++) {
+ if (phdr[i].p_type == PT_LOAD) {
+ ps->block[block_i] = (struct pa_block) {
+ .addr = (uint8_t *)qemu_elf->map + phdr[i].p_offset,
+ .paddr = phdr[i].p_paddr,
+ .size = phdr[i].p_filesz,
+ };
+ block_i++;
+ }
+ }
+
+ return 0;
+}
+
+void pa_space_destroy(struct pa_space *ps)
+{
+ ps->block_nr = 0;
+ free(ps->block);
+}
+
+void va_space_set_dtb(struct va_space *vs, uint64_t dtb)
+{
+ vs->dtb = dtb & 0x00ffffffffff000;
+}
+
+void va_space_create(struct va_space *vs, struct pa_space *ps, uint64_t dtb)
+{
+ vs->ps = ps;
+ va_space_set_dtb(vs, dtb);
+}
+
+static uint64_t get_pml4e(struct va_space *vs, uint64_t va)
+{
+ uint64_t pa = (vs->dtb & 0xffffffffff000) | ((va & 0xff8000000000) >> 36);
+
+ return *(uint64_t *)pa_space_resolve(vs->ps, pa);
+}
+
+static uint64_t get_pdpi(struct va_space *vs, uint64_t va, uint64_t pml4e)
+{
+ uint64_t pdpte_paddr = (pml4e & 0xffffffffff000) |
+ ((va & 0x7FC0000000) >> 27);
+
+ return *(uint64_t *)pa_space_resolve(vs->ps, pdpte_paddr);
+}
+
+static uint64_t pde_index(uint64_t va)
+{
+ return (va >> 21) & 0x1FF;
+}
+
+static uint64_t pdba_base(uint64_t pdpe)
+{
+ return pdpe & 0xFFFFFFFFFF000;
+}
+
+static uint64_t get_pgd(struct va_space *vs, uint64_t va, uint64_t pdpe)
+{
+ uint64_t pgd_entry = pdba_base(pdpe) + pde_index(va) * 8;
+
+ return *(uint64_t *)pa_space_resolve(vs->ps, pgd_entry);
+}
+
+static uint64_t pte_index(uint64_t va)
+{
+ return (va >> 12) & 0x1FF;
+}
+
+static uint64_t ptba_base(uint64_t pde)
+{
+ return pde & 0xFFFFFFFFFF000;
+}
+
+static uint64_t get_pte(struct va_space *vs, uint64_t va, uint64_t pgd)
+{
+ uint64_t pgd_val = ptba_base(pgd) + pte_index(va) * 8;
+
+ return *(uint64_t *)pa_space_resolve(vs->ps, pgd_val);
+}
+
+static uint64_t get_paddr(uint64_t va, uint64_t pte)
+{
+ return (pte & 0xFFFFFFFFFF000) | (va & 0xFFF);
+}
+
+static bool is_present(uint64_t entry)
+{
+ return entry & 0x1;
+}
+
+static bool page_size_flag(uint64_t entry)
+{
+ return entry & (1 << 7);
+}
+
+static uint64_t get_1GB_paddr(uint64_t va, uint64_t pdpte)
+{
+ return (pdpte & 0xfffffc0000000) | (va & 0x3fffffff);
+}
+
+static uint64_t get_2MB_paddr(uint64_t va, uint64_t pgd_entry)
+{
+ return (pgd_entry & 0xfffffffe00000) | (va & 0x00000001fffff);
+}
+
+static uint64_t va_space_va2pa(struct va_space *vs, uint64_t va)
+{
+ uint64_t pml4e, pdpe, pgd, pte;
+
+ pml4e = get_pml4e(vs, va);
+ if (!is_present(pml4e)) {
+ return INVALID_PA;
+ }
+
+ pdpe = get_pdpi(vs, va, pml4e);
+ if (!is_present(pdpe)) {
+ return INVALID_PA;
+ }
+
+ if (page_size_flag(pdpe)) {
+ return get_1GB_paddr(va, pdpe);
+ }
+
+ pgd = get_pgd(vs, va, pdpe);
+ if (!is_present(pgd)) {
+ return INVALID_PA;
+ }
+
+ if (page_size_flag(pgd)) {
+ return get_2MB_paddr(va, pgd);
+ }
+
+ pte = get_pte(vs, va, pgd);
+ if (!is_present(pte)) {
+ return INVALID_PA;
+ }
+
+ return get_paddr(va, pte);
+}
+
+void *va_space_resolve(struct va_space *vs, uint64_t va)
+{
+ uint64_t pa = va_space_va2pa(vs, va);
+
+ if (pa == INVALID_PA) {
+ return NULL;
+ }
+
+ return pa_space_resolve(vs->ps, pa);
+}
+
+int va_space_rw(struct va_space *vs, uint64_t addr,
+ void *buf, size_t size, int is_write)
+{
+ while (size) {
+ uint64_t page = addr & ELF2DMP_PFN_MASK;
+ size_t s = (page + ELF2DMP_PAGE_SIZE) - addr;
+ void *ptr;
+
+ s = (s > size) ? size : s;
+
+ ptr = va_space_resolve(vs, addr);
+ if (!ptr) {
+ return 1;
+ }
+
+ if (is_write) {
+ memcpy(ptr, buf, s);
+ } else {
+ memcpy(buf, ptr, s);
+ }
+
+ size -= s;
+ buf = (uint8_t *)buf + s;
+ addr += s;
+ }
+
+ return 0;
+}
diff --git a/contrib/elf2dmp/addrspace.h b/contrib/elf2dmp/addrspace.h
new file mode 100644
index 000000000..00b44c121
--- /dev/null
+++ b/contrib/elf2dmp/addrspace.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 Virtuozzo International GmbH
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ *
+ */
+
+#ifndef ADDRSPACE_H
+#define ADDRSPACE_H
+
+#include "qemu_elf.h"
+
+#define ELF2DMP_PAGE_BITS 12
+#define ELF2DMP_PAGE_SIZE (1ULL << ELF2DMP_PAGE_BITS)
+#define ELF2DMP_PFN_MASK (~(ELF2DMP_PAGE_SIZE - 1))
+
+#define INVALID_PA UINT64_MAX
+
+struct pa_block {
+ uint8_t *addr;
+ uint64_t paddr;
+ uint64_t size;
+};
+
+struct pa_space {
+ size_t block_nr;
+ struct pa_block *block;
+};
+
+struct va_space {
+ uint64_t dtb;
+ struct pa_space *ps;
+};
+
+int pa_space_create(struct pa_space *ps, QEMU_Elf *qemu_elf);
+void pa_space_destroy(struct pa_space *ps);
+
+void va_space_create(struct va_space *vs, struct pa_space *ps, uint64_t dtb);
+void va_space_set_dtb(struct va_space *vs, uint64_t dtb);
+void *va_space_resolve(struct va_space *vs, uint64_t va);
+int va_space_rw(struct va_space *vs, uint64_t addr,
+ void *buf, size_t size, int is_write);
+
+#endif /* ADDRSPACE_H */
diff --git a/contrib/elf2dmp/download.c b/contrib/elf2dmp/download.c
new file mode 100644
index 000000000..bd7650a7a
--- /dev/null
+++ b/contrib/elf2dmp/download.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2018 Virtuozzo International GmbH
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include <curl/curl.h>
+#include "download.h"
+
+int download_url(const char *name, const char *url)
+{
+ int err = 0;
+ FILE *file;
+ CURL *curl = curl_easy_init();
+
+ if (!curl) {
+ return 1;
+ }
+
+ file = fopen(name, "wb");
+ if (!file) {
+ err = 1;
+ goto out_curl;
+ }
+
+ if (curl_easy_setopt(curl, CURLOPT_URL, url) != CURLE_OK
+ || curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, NULL) != CURLE_OK
+ || curl_easy_setopt(curl, CURLOPT_WRITEDATA, file) != CURLE_OK
+ || curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1) != CURLE_OK
+ || curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 0) != CURLE_OK
+ || curl_easy_perform(curl) != CURLE_OK) {
+ unlink(name);
+ fclose(file);
+ err = 1;
+ } else {
+ err = fclose(file);
+ }
+
+out_curl:
+ curl_easy_cleanup(curl);
+
+ return err;
+}
diff --git a/contrib/elf2dmp/download.h b/contrib/elf2dmp/download.h
new file mode 100644
index 000000000..5c274925f
--- /dev/null
+++ b/contrib/elf2dmp/download.h
@@ -0,0 +1,13 @@
+/*
+ * Copyright (c) 2018 Virtuozzo International GmbH
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ *
+ */
+
+#ifndef DOWNLOAD_H
+#define DOWNLOAD_H
+
+int download_url(const char *name, const char *url);
+
+#endif /* DOWNLOAD_H */
diff --git a/contrib/elf2dmp/err.h b/contrib/elf2dmp/err.h
new file mode 100644
index 000000000..5456bd5a3
--- /dev/null
+++ b/contrib/elf2dmp/err.h
@@ -0,0 +1,13 @@
+/*
+ * Copyright (c) 2018 Virtuozzo International GmbH
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ *
+ */
+
+#ifndef ERR_H
+#define ERR_H
+
+#define eprintf(...) fprintf(stderr, __VA_ARGS__)
+
+#endif /* ERR_H */
diff --git a/contrib/elf2dmp/kdbg.h b/contrib/elf2dmp/kdbg.h
new file mode 100644
index 000000000..002e3d0cd
--- /dev/null
+++ b/contrib/elf2dmp/kdbg.h
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) 2018 Virtuozzo International GmbH
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ *
+ */
+
+#ifndef KDBG_H
+#define KDBG_H
+
+typedef struct DBGKD_GET_VERSION64 {
+ uint16_t MajorVersion;
+ uint16_t MinorVersion;
+ uint8_t ProtocolVersion;
+ uint8_t KdSecondaryVersion;
+ uint16_t Flags;
+ uint16_t MachineType;
+ uint8_t MaxPacketType;
+ uint8_t MaxStateChange;
+ uint8_t MaxManipulate;
+ uint8_t Simulation;
+ uint16_t Unused[1];
+ uint64_t KernBase;
+ uint64_t PsLoadedModuleList;
+ uint64_t DebuggerDataList;
+} DBGKD_GET_VERSION64;
+
+#ifndef _WIN32
+typedef struct LIST_ENTRY64 {
+ struct LIST_ENTRY64 *Flink;
+ struct LIST_ENTRY64 *Blink;
+} LIST_ENTRY64;
+#endif
+
+typedef struct DBGKD_DEBUG_DATA_HEADER64 {
+ LIST_ENTRY64 List;
+ uint32_t OwnerTag;
+ uint32_t Size;
+} DBGKD_DEBUG_DATA_HEADER64;
+
+typedef struct KDDEBUGGER_DATA64 {
+ DBGKD_DEBUG_DATA_HEADER64 Header;
+
+ uint64_t KernBase;
+ uint64_t BreakpointWithStatus;
+ uint64_t SavedContext;
+ uint16_t ThCallbackStack;
+ uint16_t NextCallback;
+ uint16_t FramePointer;
+ uint16_t PaeEnabled:1;
+ uint64_t KiCallUserMode;
+ uint64_t KeUserCallbackDispatcher;
+ uint64_t PsLoadedModuleList;
+ uint64_t PsActiveProcessHead;
+ uint64_t PspCidTable;
+ uint64_t ExpSystemResourcesList;
+ uint64_t ExpPagedPoolDescriptor;
+ uint64_t ExpNumberOfPagedPools;
+ uint64_t KeTimeIncrement;
+ uint64_t KeBugCheckCallbackListHead;
+ uint64_t KiBugcheckData;
+ uint64_t IopErrorLogListHead;
+ uint64_t ObpRootDirectoryObject;
+ uint64_t ObpTypeObjectType;
+ uint64_t MmSystemCacheStart;
+ uint64_t MmSystemCacheEnd;
+ uint64_t MmSystemCacheWs;
+ uint64_t MmPfnDatabase;
+ uint64_t MmSystemPtesStart;
+ uint64_t MmSystemPtesEnd;
+ uint64_t MmSubsectionBase;
+ uint64_t MmNumberOfPagingFiles;
+ uint64_t MmLowestPhysicalPage;
+ uint64_t MmHighestPhysicalPage;
+ uint64_t MmNumberOfPhysicalPages;
+ uint64_t MmMaximumNonPagedPoolInBytes;
+ uint64_t MmNonPagedSystemStart;
+ uint64_t MmNonPagedPoolStart;
+ uint64_t MmNonPagedPoolEnd;
+ uint64_t MmPagedPoolStart;
+ uint64_t MmPagedPoolEnd;
+ uint64_t MmPagedPoolInformation;
+ uint64_t MmPageSize;
+ uint64_t MmSizeOfPagedPoolInBytes;
+ uint64_t MmTotalCommitLimit;
+ uint64_t MmTotalCommittedPages;
+ uint64_t MmSharedCommit;
+ uint64_t MmDriverCommit;
+ uint64_t MmProcessCommit;
+ uint64_t MmPagedPoolCommit;
+ uint64_t MmExtendedCommit;
+ uint64_t MmZeroedPageListHead;
+ uint64_t MmFreePageListHead;
+ uint64_t MmStandbyPageListHead;
+ uint64_t MmModifiedPageListHead;
+ uint64_t MmModifiedNoWritePageListHead;
+ uint64_t MmAvailablePages;
+ uint64_t MmResidentAvailablePages;
+ uint64_t PoolTrackTable;
+ uint64_t NonPagedPoolDescriptor;
+ uint64_t MmHighestUserAddress;
+ uint64_t MmSystemRangeStart;
+ uint64_t MmUserProbeAddress;
+ uint64_t KdPrintCircularBuffer;
+ uint64_t KdPrintCircularBufferEnd;
+ uint64_t KdPrintWritePointer;
+ uint64_t KdPrintRolloverCount;
+ uint64_t MmLoadedUserImageList;
+
+ /* NT 5.1 Addition */
+
+ uint64_t NtBuildLab;
+ uint64_t KiNormalSystemCall;
+
+ /* NT 5.0 hotfix addition */
+
+ uint64_t KiProcessorBlock;
+ uint64_t MmUnloadedDrivers;
+ uint64_t MmLastUnloadedDriver;
+ uint64_t MmTriageActionTaken;
+ uint64_t MmSpecialPoolTag;
+ uint64_t KernelVerifier;
+ uint64_t MmVerifierData;
+ uint64_t MmAllocatedNonPagedPool;
+ uint64_t MmPeakCommitment;
+ uint64_t MmTotalCommitLimitMaximum;
+ uint64_t CmNtCSDVersion;
+
+ /* NT 5.1 Addition */
+
+ uint64_t MmPhysicalMemoryBlock;
+ uint64_t MmSessionBase;
+ uint64_t MmSessionSize;
+ uint64_t MmSystemParentTablePage;
+
+ /* Server 2003 addition */
+
+ uint64_t MmVirtualTranslationBase;
+ uint16_t OffsetKThreadNextProcessor;
+ uint16_t OffsetKThreadTeb;
+ uint16_t OffsetKThreadKernelStack;
+ uint16_t OffsetKThreadInitialStack;
+ uint16_t OffsetKThreadApcProcess;
+ uint16_t OffsetKThreadState;
+ uint16_t OffsetKThreadBStore;
+ uint16_t OffsetKThreadBStoreLimit;
+ uint16_t SizeEProcess;
+ uint16_t OffsetEprocessPeb;
+ uint16_t OffsetEprocessParentCID;
+ uint16_t OffsetEprocessDirectoryTableBase;
+ uint16_t SizePrcb;
+ uint16_t OffsetPrcbDpcRoutine;
+ uint16_t OffsetPrcbCurrentThread;
+ uint16_t OffsetPrcbMhz;
+ uint16_t OffsetPrcbCpuType;
+ uint16_t OffsetPrcbVendorString;
+ uint16_t OffsetPrcbProcStateContext;
+ uint16_t OffsetPrcbNumber;
+ uint16_t SizeEThread;
+ uint64_t KdPrintCircularBufferPtr;
+ uint64_t KdPrintBufferSize;
+ uint64_t KeLoaderBlock;
+ uint16_t SizePcr;
+ uint16_t OffsetPcrSelfPcr;
+ uint16_t OffsetPcrCurrentPrcb;
+ uint16_t OffsetPcrContainedPrcb;
+ uint16_t OffsetPcrInitialBStore;
+ uint16_t OffsetPcrBStoreLimit;
+ uint16_t OffsetPcrInitialStack;
+ uint16_t OffsetPcrStackLimit;
+ uint16_t OffsetPrcbPcrPage;
+ uint16_t OffsetPrcbProcStateSpecialReg;
+ uint16_t GdtR0Code;
+ uint16_t GdtR0Data;
+ uint16_t GdtR0Pcr;
+ uint16_t GdtR3Code;
+ uint16_t GdtR3Data;
+ uint16_t GdtR3Teb;
+ uint16_t GdtLdt;
+ uint16_t GdtTss;
+ uint16_t Gdt64R3CmCode;
+ uint16_t Gdt64R3CmTeb;
+ uint64_t IopNumTriageDumpDataBlocks;
+ uint64_t IopTriageDumpDataBlocks;
+
+ /* Longhorn addition */
+
+ uint64_t VfCrashDataBlock;
+ uint64_t MmBadPagesDetected;
+ uint64_t MmZeroedPageSingleBitErrorsDetected;
+
+ /* Windows 7 addition */
+
+ uint64_t EtwpDebuggerData;
+ uint16_t OffsetPrcbContext;
+} KDDEBUGGER_DATA64;
+
+#endif /* KDBG_H */
diff --git a/contrib/elf2dmp/main.c b/contrib/elf2dmp/main.c
new file mode 100644
index 000000000..20b477d58
--- /dev/null
+++ b/contrib/elf2dmp/main.c
@@ -0,0 +1,597 @@
+/*
+ * Copyright (c) 2018 Virtuozzo International GmbH
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ *
+ */
+
+#include "qemu/osdep.h"
+
+#include "err.h"
+#include "addrspace.h"
+#include "pe.h"
+#include "pdb.h"
+#include "kdbg.h"
+#include "download.h"
+#include "qemu/win_dump_defs.h"
+
+#define SYM_URL_BASE "https://msdl.microsoft.com/download/symbols/"
+#define PDB_NAME "ntkrnlmp.pdb"
+
+#define INITIAL_MXCSR 0x1f80
+
+typedef struct idt_desc {
+ uint16_t offset1; /* offset bits 0..15 */
+ uint16_t selector;
+ uint8_t ist;
+ uint8_t type_attr;
+ uint16_t offset2; /* offset bits 16..31 */
+ uint32_t offset3; /* offset bits 32..63 */
+ uint32_t rsrvd;
+} __attribute__ ((packed)) idt_desc_t;
+
+static uint64_t idt_desc_addr(idt_desc_t desc)
+{
+ return (uint64_t)desc.offset1 | ((uint64_t)desc.offset2 << 16) |
+ ((uint64_t)desc.offset3 << 32);
+}
+
+static const uint64_t SharedUserData = 0xfffff78000000000;
+
+#define KUSD_OFFSET_SUITE_MASK 0x2d0
+#define KUSD_OFFSET_PRODUCT_TYPE 0x264
+
+#define SYM_RESOLVE(base, r, s) ((s = pdb_resolve(base, r, #s)),\
+ s ? printf(#s" = 0x%016"PRIx64"\n", s) :\
+ eprintf("Failed to resolve "#s"\n"), s)
+
+static uint64_t rol(uint64_t x, uint64_t y)
+{
+ return (x << y) | (x >> (64 - y));
+}
+
+/*
+ * Decoding algorithm can be found in Volatility project
+ */
+static void kdbg_decode(uint64_t *dst, uint64_t *src, size_t size,
+ uint64_t kwn, uint64_t kwa, uint64_t kdbe)
+{
+ size_t i;
+ assert(size % sizeof(uint64_t) == 0);
+ for (i = 0; i < size / sizeof(uint64_t); i++) {
+ uint64_t block;
+
+ block = src[i];
+ block = rol(block ^ kwn, (uint8_t)kwn);
+ block = __builtin_bswap64(block ^ kdbe) ^ kwa;
+ dst[i] = block;
+ }
+}
+
+static KDDEBUGGER_DATA64 *get_kdbg(uint64_t KernBase, struct pdb_reader *pdb,
+ struct va_space *vs, uint64_t KdDebuggerDataBlock)
+{
+ const char OwnerTag[4] = "KDBG";
+ KDDEBUGGER_DATA64 *kdbg = NULL;
+ DBGKD_DEBUG_DATA_HEADER64 kdbg_hdr;
+ bool decode = false;
+ uint64_t kwn, kwa, KdpDataBlockEncoded;
+
+ if (va_space_rw(vs,
+ KdDebuggerDataBlock + offsetof(KDDEBUGGER_DATA64, Header),
+ &kdbg_hdr, sizeof(kdbg_hdr), 0)) {
+ eprintf("Failed to extract KDBG header\n");
+ return NULL;
+ }
+
+ if (memcmp(&kdbg_hdr.OwnerTag, OwnerTag, sizeof(OwnerTag))) {
+ uint64_t KiWaitNever, KiWaitAlways;
+
+ decode = true;
+
+ if (!SYM_RESOLVE(KernBase, pdb, KiWaitNever) ||
+ !SYM_RESOLVE(KernBase, pdb, KiWaitAlways) ||
+ !SYM_RESOLVE(KernBase, pdb, KdpDataBlockEncoded)) {
+ return NULL;
+ }
+
+ if (va_space_rw(vs, KiWaitNever, &kwn, sizeof(kwn), 0) ||
+ va_space_rw(vs, KiWaitAlways, &kwa, sizeof(kwa), 0)) {
+ return NULL;
+ }
+
+ printf("[KiWaitNever] = 0x%016"PRIx64"\n", kwn);
+ printf("[KiWaitAlways] = 0x%016"PRIx64"\n", kwa);
+
+ /*
+ * If KDBG header can be decoded, KDBG size is available
+ * and entire KDBG can be decoded.
+ */
+ printf("Decoding KDBG header...\n");
+ kdbg_decode((uint64_t *)&kdbg_hdr, (uint64_t *)&kdbg_hdr,
+ sizeof(kdbg_hdr), kwn, kwa, KdpDataBlockEncoded);
+
+ printf("Owner tag is \'%.4s\'\n", (char *)&kdbg_hdr.OwnerTag);
+ if (memcmp(&kdbg_hdr.OwnerTag, OwnerTag, sizeof(OwnerTag))) {
+ eprintf("Failed to decode KDBG header\n");
+ return NULL;
+ }
+ }
+
+ kdbg = malloc(kdbg_hdr.Size);
+ if (!kdbg) {
+ return NULL;
+ }
+
+ if (va_space_rw(vs, KdDebuggerDataBlock, kdbg, kdbg_hdr.Size, 0)) {
+ eprintf("Failed to extract entire KDBG\n");
+ return NULL;
+ }
+
+ if (!decode) {
+ return kdbg;
+ }
+
+ printf("Decoding KdDebuggerDataBlock...\n");
+ kdbg_decode((uint64_t *)kdbg, (uint64_t *)kdbg, kdbg_hdr.Size,
+ kwn, kwa, KdpDataBlockEncoded);
+
+ va_space_rw(vs, KdDebuggerDataBlock, kdbg, kdbg_hdr.Size, 1);
+
+ return kdbg;
+}
+
+static void win_context_init_from_qemu_cpu_state(WinContext *ctx,
+ QEMUCPUState *s)
+{
+ WinContext win_ctx = (WinContext){
+ .ContextFlags = WIN_CTX_X64 | WIN_CTX_INT | WIN_CTX_SEG | WIN_CTX_CTL,
+ .MxCsr = INITIAL_MXCSR,
+
+ .SegCs = s->cs.selector,
+ .SegSs = s->ss.selector,
+ .SegDs = s->ds.selector,
+ .SegEs = s->es.selector,
+ .SegFs = s->fs.selector,
+ .SegGs = s->gs.selector,
+ .EFlags = (uint32_t)s->rflags,
+
+ .Rax = s->rax,
+ .Rbx = s->rbx,
+ .Rcx = s->rcx,
+ .Rdx = s->rdx,
+ .Rsp = s->rsp,
+ .Rbp = s->rbp,
+ .Rsi = s->rsi,
+ .Rdi = s->rdi,
+ .R8 = s->r8,
+ .R9 = s->r9,
+ .R10 = s->r10,
+ .R11 = s->r11,
+ .R12 = s->r12,
+ .R13 = s->r13,
+ .R14 = s->r14,
+ .R15 = s->r15,
+
+ .Rip = s->rip,
+ .FltSave = {
+ .MxCsr = INITIAL_MXCSR,
+ },
+ };
+
+ *ctx = win_ctx;
+}
+
+/*
+ * Finds paging-structure hierarchy base,
+ * if previously set doesn't give access to kernel structures
+ */
+static int fix_dtb(struct va_space *vs, QEMU_Elf *qe)
+{
+ /*
+ * Firstly, test previously set DTB.
+ */
+ if (va_space_resolve(vs, SharedUserData)) {
+ return 0;
+ }
+
+ /*
+ * Secondly, find CPU which run system task.
+ */
+ size_t i;
+ for (i = 0; i < qe->state_nr; i++) {
+ QEMUCPUState *s = qe->state[i];
+
+ if (is_system(s)) {
+ va_space_set_dtb(vs, s->cr[3]);
+ printf("DTB 0x%016"PRIx64" has been found from CPU #%zu"
+ " as system task CR3\n", vs->dtb, i);
+ return !(va_space_resolve(vs, SharedUserData));
+ }
+ }
+
+ /*
+ * Thirdly, use KERNEL_GS_BASE from CPU #0 as PRCB address and
+ * CR3 as [Prcb+0x7000]
+ */
+ if (qe->has_kernel_gs_base) {
+ QEMUCPUState *s = qe->state[0];
+ uint64_t Prcb = s->kernel_gs_base;
+ uint64_t *cr3 = va_space_resolve(vs, Prcb + 0x7000);
+
+ if (!cr3) {
+ return 1;
+ }
+
+ va_space_set_dtb(vs, *cr3);
+ printf("DirectoryTableBase = 0x%016"PRIx64" has been found from CPU #0"
+ " as interrupt handling CR3\n", vs->dtb);
+ return !(va_space_resolve(vs, SharedUserData));
+ }
+
+ return 1;
+}
+
+static int fill_header(WinDumpHeader64 *hdr, struct pa_space *ps,
+ struct va_space *vs, uint64_t KdDebuggerDataBlock,
+ KDDEBUGGER_DATA64 *kdbg, uint64_t KdVersionBlock, int nr_cpus)
+{
+ uint32_t *suite_mask = va_space_resolve(vs, SharedUserData +
+ KUSD_OFFSET_SUITE_MASK);
+ int32_t *product_type = va_space_resolve(vs, SharedUserData +
+ KUSD_OFFSET_PRODUCT_TYPE);
+ DBGKD_GET_VERSION64 kvb;
+ WinDumpHeader64 h;
+ size_t i;
+
+ QEMU_BUILD_BUG_ON(KUSD_OFFSET_SUITE_MASK >= ELF2DMP_PAGE_SIZE);
+ QEMU_BUILD_BUG_ON(KUSD_OFFSET_PRODUCT_TYPE >= ELF2DMP_PAGE_SIZE);
+
+ if (!suite_mask || !product_type) {
+ return 1;
+ }
+
+ if (va_space_rw(vs, KdVersionBlock, &kvb, sizeof(kvb), 0)) {
+ eprintf("Failed to extract KdVersionBlock\n");
+ return 1;
+ }
+
+ h = (WinDumpHeader64) {
+ .Signature = "PAGE",
+ .ValidDump = "DU64",
+ .MajorVersion = kvb.MajorVersion,
+ .MinorVersion = kvb.MinorVersion,
+ .DirectoryTableBase = vs->dtb,
+ .PfnDatabase = kdbg->MmPfnDatabase,
+ .PsLoadedModuleList = kdbg->PsLoadedModuleList,
+ .PsActiveProcessHead = kdbg->PsActiveProcessHead,
+ .MachineImageType = kvb.MachineType,
+ .NumberProcessors = nr_cpus,
+ .BugcheckCode = LIVE_SYSTEM_DUMP,
+ .KdDebuggerDataBlock = KdDebuggerDataBlock,
+ .DumpType = 1,
+ .Comment = "Hello from elf2dmp!",
+ .SuiteMask = *suite_mask,
+ .ProductType = *product_type,
+ .SecondaryDataState = kvb.KdSecondaryVersion,
+ .PhysicalMemoryBlock = (WinDumpPhyMemDesc64) {
+ .NumberOfRuns = ps->block_nr,
+ },
+ .RequiredDumpSpace = sizeof(h),
+ };
+
+ for (i = 0; i < ps->block_nr; i++) {
+ h.PhysicalMemoryBlock.NumberOfPages += ps->block[i].size / ELF2DMP_PAGE_SIZE;
+ h.PhysicalMemoryBlock.Run[i] = (WinDumpPhyMemRun64) {
+ .BasePage = ps->block[i].paddr / ELF2DMP_PAGE_SIZE,
+ .PageCount = ps->block[i].size / ELF2DMP_PAGE_SIZE,
+ };
+ }
+
+ h.RequiredDumpSpace += h.PhysicalMemoryBlock.NumberOfPages << ELF2DMP_PAGE_BITS;
+
+ *hdr = h;
+
+ return 0;
+}
+
+static int fill_context(KDDEBUGGER_DATA64 *kdbg,
+ struct va_space *vs, QEMU_Elf *qe)
+{
+ int i;
+ for (i = 0; i < qe->state_nr; i++) {
+ uint64_t Prcb;
+ uint64_t Context;
+ WinContext ctx;
+ QEMUCPUState *s = qe->state[i];
+
+ if (va_space_rw(vs, kdbg->KiProcessorBlock + sizeof(Prcb) * i,
+ &Prcb, sizeof(Prcb), 0)) {
+ eprintf("Failed to read CPU #%d PRCB location\n", i);
+ return 1;
+ }
+
+ if (va_space_rw(vs, Prcb + kdbg->OffsetPrcbContext,
+ &Context, sizeof(Context), 0)) {
+ eprintf("Failed to read CPU #%d ContextFrame location\n", i);
+ return 1;
+ }
+
+ printf("Filling context for CPU #%d...\n", i);
+ win_context_init_from_qemu_cpu_state(&ctx, s);
+
+ if (va_space_rw(vs, Context, &ctx, sizeof(ctx), 1)) {
+ eprintf("Failed to fill CPU #%d context\n", i);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static int write_dump(struct pa_space *ps,
+ WinDumpHeader64 *hdr, const char *name)
+{
+ FILE *dmp_file = fopen(name, "wb");
+ size_t i;
+
+ if (!dmp_file) {
+ eprintf("Failed to open output file \'%s\'\n", name);
+ return 1;
+ }
+
+ printf("Writing header to file...\n");
+
+ if (fwrite(hdr, sizeof(*hdr), 1, dmp_file) != 1) {
+ eprintf("Failed to write dump header\n");
+ fclose(dmp_file);
+ return 1;
+ }
+
+ for (i = 0; i < ps->block_nr; i++) {
+ struct pa_block *b = &ps->block[i];
+
+ printf("Writing block #%zu/%zu to file...\n", i, ps->block_nr);
+ if (fwrite(b->addr, b->size, 1, dmp_file) != 1) {
+ eprintf("Failed to write dump header\n");
+ fclose(dmp_file);
+ return 1;
+ }
+ }
+
+ return fclose(dmp_file);
+}
+
+static int pe_get_pdb_symstore_hash(uint64_t base, void *start_addr,
+ char *hash, struct va_space *vs)
+{
+ const char e_magic[2] = "MZ";
+ const char Signature[4] = "PE\0\0";
+ const char sign_rsds[4] = "RSDS";
+ IMAGE_DOS_HEADER *dos_hdr = start_addr;
+ IMAGE_NT_HEADERS64 nt_hdrs;
+ IMAGE_FILE_HEADER *file_hdr = &nt_hdrs.FileHeader;
+ IMAGE_OPTIONAL_HEADER64 *opt_hdr = &nt_hdrs.OptionalHeader;
+ IMAGE_DATA_DIRECTORY *data_dir = nt_hdrs.OptionalHeader.DataDirectory;
+ IMAGE_DEBUG_DIRECTORY debug_dir;
+ OMFSignatureRSDS rsds;
+ char *pdb_name;
+ size_t pdb_name_sz;
+ size_t i;
+
+ QEMU_BUILD_BUG_ON(sizeof(*dos_hdr) >= ELF2DMP_PAGE_SIZE);
+
+ if (memcmp(&dos_hdr->e_magic, e_magic, sizeof(e_magic))) {
+ return 1;
+ }
+
+ if (va_space_rw(vs, base + dos_hdr->e_lfanew,
+ &nt_hdrs, sizeof(nt_hdrs), 0)) {
+ return 1;
+ }
+
+ if (memcmp(&nt_hdrs.Signature, Signature, sizeof(Signature)) ||
+ file_hdr->Machine != 0x8664 || opt_hdr->Magic != 0x020b) {
+ return 1;
+ }
+
+ printf("Debug Directory RVA = 0x%08"PRIx32"\n",
+ (uint32_t)data_dir[IMAGE_FILE_DEBUG_DIRECTORY].VirtualAddress);
+
+ if (va_space_rw(vs,
+ base + data_dir[IMAGE_FILE_DEBUG_DIRECTORY].VirtualAddress,
+ &debug_dir, sizeof(debug_dir), 0)) {
+ return 1;
+ }
+
+ if (debug_dir.Type != IMAGE_DEBUG_TYPE_CODEVIEW) {
+ return 1;
+ }
+
+ if (va_space_rw(vs,
+ base + debug_dir.AddressOfRawData,
+ &rsds, sizeof(rsds), 0)) {
+ return 1;
+ }
+
+ printf("CodeView signature is \'%.4s\'\n", rsds.Signature);
+
+ if (memcmp(&rsds.Signature, sign_rsds, sizeof(sign_rsds))) {
+ return 1;
+ }
+
+ pdb_name_sz = debug_dir.SizeOfData - sizeof(rsds);
+ pdb_name = malloc(pdb_name_sz);
+ if (!pdb_name) {
+ return 1;
+ }
+
+ if (va_space_rw(vs, base + debug_dir.AddressOfRawData +
+ offsetof(OMFSignatureRSDS, name), pdb_name, pdb_name_sz, 0)) {
+ free(pdb_name);
+ return 1;
+ }
+
+ printf("PDB name is \'%s\', \'%s\' expected\n", pdb_name, PDB_NAME);
+
+ if (strcmp(pdb_name, PDB_NAME)) {
+ eprintf("Unexpected PDB name, it seems the kernel isn't found\n");
+ free(pdb_name);
+ return 1;
+ }
+
+ free(pdb_name);
+
+ sprintf(hash, "%.08x%.04x%.04x%.02x%.02x", rsds.guid.a, rsds.guid.b,
+ rsds.guid.c, rsds.guid.d[0], rsds.guid.d[1]);
+ hash += 20;
+ for (i = 0; i < 6; i++, hash += 2) {
+ sprintf(hash, "%.02x", rsds.guid.e[i]);
+ }
+
+ sprintf(hash, "%.01x", rsds.age);
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ int err = 0;
+ QEMU_Elf qemu_elf;
+ struct pa_space ps;
+ struct va_space vs;
+ QEMUCPUState *state;
+ idt_desc_t first_idt_desc;
+ uint64_t KernBase;
+ void *nt_start_addr = NULL;
+ WinDumpHeader64 header;
+ char pdb_hash[34];
+ char pdb_url[] = SYM_URL_BASE PDB_NAME
+ "/0123456789ABCDEF0123456789ABCDEFx/" PDB_NAME;
+ struct pdb_reader pdb;
+ uint64_t KdDebuggerDataBlock;
+ KDDEBUGGER_DATA64 *kdbg;
+ uint64_t KdVersionBlock;
+
+ if (argc != 3) {
+ eprintf("usage:\n\t%s elf_file dmp_file\n", argv[0]);
+ return 1;
+ }
+
+ if (QEMU_Elf_init(&qemu_elf, argv[1])) {
+ eprintf("Failed to initialize QEMU ELF dump\n");
+ return 1;
+ }
+
+ if (pa_space_create(&ps, &qemu_elf)) {
+ eprintf("Failed to initialize physical address space\n");
+ err = 1;
+ goto out_elf;
+ }
+
+ state = qemu_elf.state[0];
+ printf("CPU #0 CR3 is 0x%016"PRIx64"\n", state->cr[3]);
+
+ va_space_create(&vs, &ps, state->cr[3]);
+ if (fix_dtb(&vs, &qemu_elf)) {
+ eprintf("Failed to find paging base\n");
+ err = 1;
+ goto out_elf;
+ }
+
+ printf("CPU #0 IDT is at 0x%016"PRIx64"\n", state->idt.base);
+
+ if (va_space_rw(&vs, state->idt.base,
+ &first_idt_desc, sizeof(first_idt_desc), 0)) {
+ eprintf("Failed to get CPU #0 IDT[0]\n");
+ err = 1;
+ goto out_ps;
+ }
+ printf("CPU #0 IDT[0] -> 0x%016"PRIx64"\n", idt_desc_addr(first_idt_desc));
+
+ KernBase = idt_desc_addr(first_idt_desc) & ~(ELF2DMP_PAGE_SIZE - 1);
+ printf("Searching kernel downwards from 0x%016"PRIx64"...\n", KernBase);
+
+ for (; KernBase >= 0xfffff78000000000; KernBase -= ELF2DMP_PAGE_SIZE) {
+ nt_start_addr = va_space_resolve(&vs, KernBase);
+ if (!nt_start_addr) {
+ continue;
+ }
+
+ if (*(uint16_t *)nt_start_addr == 0x5a4d) { /* MZ */
+ break;
+ }
+ }
+
+ if (!nt_start_addr) {
+ eprintf("Failed to find NT kernel image\n");
+ err = 1;
+ goto out_ps;
+ }
+
+ printf("KernBase = 0x%016"PRIx64", signature is \'%.2s\'\n", KernBase,
+ (char *)nt_start_addr);
+
+ if (pe_get_pdb_symstore_hash(KernBase, nt_start_addr, pdb_hash, &vs)) {
+ eprintf("Failed to get PDB symbol store hash\n");
+ err = 1;
+ goto out_ps;
+ }
+
+ sprintf(pdb_url, "%s%s/%s/%s", SYM_URL_BASE, PDB_NAME, pdb_hash, PDB_NAME);
+ printf("PDB URL is %s\n", pdb_url);
+
+ if (download_url(PDB_NAME, pdb_url)) {
+ eprintf("Failed to download PDB file\n");
+ err = 1;
+ goto out_ps;
+ }
+
+ if (pdb_init_from_file(PDB_NAME, &pdb)) {
+ eprintf("Failed to initialize PDB reader\n");
+ err = 1;
+ goto out_pdb_file;
+ }
+
+ if (!SYM_RESOLVE(KernBase, &pdb, KdDebuggerDataBlock) ||
+ !SYM_RESOLVE(KernBase, &pdb, KdVersionBlock)) {
+ err = 1;
+ goto out_pdb;
+ }
+
+ kdbg = get_kdbg(KernBase, &pdb, &vs, KdDebuggerDataBlock);
+ if (!kdbg) {
+ err = 1;
+ goto out_pdb;
+ }
+
+ if (fill_header(&header, &ps, &vs, KdDebuggerDataBlock, kdbg,
+ KdVersionBlock, qemu_elf.state_nr)) {
+ err = 1;
+ goto out_kdbg;
+ }
+
+ if (fill_context(kdbg, &vs, &qemu_elf)) {
+ err = 1;
+ goto out_kdbg;
+ }
+
+ if (write_dump(&ps, &header, argv[2])) {
+ eprintf("Failed to save dump\n");
+ err = 1;
+ goto out_kdbg;
+ }
+
+out_kdbg:
+ free(kdbg);
+out_pdb:
+ pdb_exit(&pdb);
+out_pdb_file:
+ unlink(PDB_NAME);
+out_ps:
+ pa_space_destroy(&ps);
+out_elf:
+ QEMU_Elf_exit(&qemu_elf);
+
+ return err;
+}
diff --git a/contrib/elf2dmp/meson.build b/contrib/elf2dmp/meson.build
new file mode 100644
index 000000000..4d86cb390
--- /dev/null
+++ b/contrib/elf2dmp/meson.build
@@ -0,0 +1,5 @@
+if curl.found()
+ executable('elf2dmp', files('main.c', 'addrspace.c', 'download.c', 'pdb.c', 'qemu_elf.c'),
+ dependencies: [glib, curl],
+ install: true)
+endif
diff --git a/contrib/elf2dmp/pdb.c b/contrib/elf2dmp/pdb.c
new file mode 100644
index 000000000..adcfa7e15
--- /dev/null
+++ b/contrib/elf2dmp/pdb.c
@@ -0,0 +1,315 @@
+/*
+ * Copyright (c) 2018 Virtuozzo International GmbH
+ *
+ * Based on source of Wine project
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+
+#include "qemu/osdep.h"
+
+#include "pdb.h"
+#include "err.h"
+
+static uint32_t pdb_get_file_size(const struct pdb_reader *r, unsigned idx)
+{
+ return r->ds.toc->file_size[idx];
+}
+
+static pdb_seg *get_seg_by_num(struct pdb_reader *r, size_t n)
+{
+ size_t i = 0;
+ char *ptr;
+
+ for (ptr = r->segs; (ptr < r->segs + r->segs_size); ) {
+ i++;
+ ptr += 8;
+ if (i == n) {
+ break;
+ }
+ ptr += sizeof(pdb_seg);
+ }
+
+ return (pdb_seg *)ptr;
+}
+
+uint64_t pdb_find_public_v3_symbol(struct pdb_reader *r, const char *name)
+{
+ size_t size = pdb_get_file_size(r, r->symbols->gsym_file);
+ int length;
+ const union codeview_symbol *sym;
+ const uint8_t *root = r->modimage;
+ size_t i;
+
+ for (i = 0; i < size; i += length) {
+ sym = (const void *)(root + i);
+ length = sym->generic.len + 2;
+
+ if (!sym->generic.id || length < 4) {
+ break;
+ }
+
+ if (sym->generic.id == S_PUB_V3 &&
+ !strcmp(name, sym->public_v3.name)) {
+ pdb_seg *segment = get_seg_by_num(r, sym->public_v3.segment);
+ uint32_t sect_rva = segment->dword[1];
+ uint64_t rva = sect_rva + sym->public_v3.offset;
+
+ printf("%s: 0x%016x(%d:\'%.8s\') + 0x%08x = 0x%09"PRIx64"\n", name,
+ sect_rva, sym->public_v3.segment,
+ ((char *)segment - 8), sym->public_v3.offset, rva);
+ return rva;
+ }
+ }
+
+ return 0;
+}
+
+uint64_t pdb_resolve(uint64_t img_base, struct pdb_reader *r, const char *name)
+{
+ uint64_t rva = pdb_find_public_v3_symbol(r, name);
+
+ if (!rva) {
+ return 0;
+ }
+
+ return img_base + rva;
+}
+
+static void pdb_reader_ds_exit(struct pdb_reader *r)
+{
+ free(r->ds.toc);
+}
+
+static void pdb_exit_symbols(struct pdb_reader *r)
+{
+ free(r->modimage);
+ free(r->symbols);
+}
+
+static void pdb_exit_segments(struct pdb_reader *r)
+{
+ free(r->segs);
+}
+
+static void *pdb_ds_read(const PDB_DS_HEADER *header,
+ const uint32_t *block_list, int size)
+{
+ int i, nBlocks;
+ uint8_t *buffer;
+
+ if (!size) {
+ return NULL;
+ }
+
+ nBlocks = (size + header->block_size - 1) / header->block_size;
+
+ buffer = malloc(nBlocks * header->block_size);
+ if (!buffer) {
+ return NULL;
+ }
+
+ for (i = 0; i < nBlocks; i++) {
+ memcpy(buffer + i * header->block_size, (const char *)header +
+ block_list[i] * header->block_size, header->block_size);
+ }
+
+ return buffer;
+}
+
+static void *pdb_ds_read_file(struct pdb_reader* r, uint32_t file_number)
+{
+ const uint32_t *block_list;
+ uint32_t block_size;
+ const uint32_t *file_size;
+ size_t i;
+
+ if (!r->ds.toc || file_number >= r->ds.toc->num_files) {
+ return NULL;
+ }
+
+ file_size = r->ds.toc->file_size;
+ r->file_used[file_number / 32] |= 1 << (file_number % 32);
+
+ if (file_size[file_number] == 0 || file_size[file_number] == 0xFFFFFFFF) {
+ return NULL;
+ }
+
+ block_list = file_size + r->ds.toc->num_files;
+ block_size = r->ds.header->block_size;
+
+ for (i = 0; i < file_number; i++) {
+ block_list += (file_size[i] + block_size - 1) / block_size;
+ }
+
+ return pdb_ds_read(r->ds.header, block_list, file_size[file_number]);
+}
+
+static int pdb_init_segments(struct pdb_reader *r)
+{
+ char *segs;
+ unsigned stream_idx = r->sidx.segments;
+
+ segs = pdb_ds_read_file(r, stream_idx);
+ if (!segs) {
+ return 1;
+ }
+
+ r->segs = segs;
+ r->segs_size = pdb_get_file_size(r, stream_idx);
+
+ return 0;
+}
+
+static int pdb_init_symbols(struct pdb_reader *r)
+{
+ int err = 0;
+ PDB_SYMBOLS *symbols;
+ PDB_STREAM_INDEXES *sidx = &r->sidx;
+
+ memset(sidx, -1, sizeof(*sidx));
+
+ symbols = pdb_ds_read_file(r, 3);
+ if (!symbols) {
+ return 1;
+ }
+
+ r->symbols = symbols;
+
+ if (symbols->stream_index_size != sizeof(PDB_STREAM_INDEXES)) {
+ err = 1;
+ goto out_symbols;
+ }
+
+ memcpy(sidx, (const char *)symbols + sizeof(PDB_SYMBOLS) +
+ symbols->module_size + symbols->offset_size +
+ symbols->hash_size + symbols->srcmodule_size +
+ symbols->pdbimport_size + symbols->unknown2_size, sizeof(*sidx));
+
+ /* Read global symbol table */
+ r->modimage = pdb_ds_read_file(r, symbols->gsym_file);
+ if (!r->modimage) {
+ err = 1;
+ goto out_symbols;
+ }
+
+ return 0;
+
+out_symbols:
+ free(symbols);
+
+ return err;
+}
+
+static int pdb_reader_ds_init(struct pdb_reader *r, PDB_DS_HEADER *hdr)
+{
+ if (hdr->block_size == 0) {
+ return 1;
+ }
+
+ memset(r->file_used, 0, sizeof(r->file_used));
+ r->ds.header = hdr;
+ r->ds.toc = pdb_ds_read(hdr, (uint32_t *)((uint8_t *)hdr +
+ hdr->toc_page * hdr->block_size), hdr->toc_size);
+
+ if (!r->ds.toc) {
+ return 1;
+ }
+
+ return 0;
+}
+
+static int pdb_reader_init(struct pdb_reader *r, void *data)
+{
+ int err = 0;
+ const char pdb7[] = "Microsoft C/C++ MSF 7.00";
+
+ if (memcmp(data, pdb7, sizeof(pdb7) - 1)) {
+ return 1;
+ }
+
+ if (pdb_reader_ds_init(r, data)) {
+ return 1;
+ }
+
+ r->ds.root = pdb_ds_read_file(r, 1);
+ if (!r->ds.root) {
+ err = 1;
+ goto out_ds;
+ }
+
+ if (pdb_init_symbols(r)) {
+ err = 1;
+ goto out_root;
+ }
+
+ if (pdb_init_segments(r)) {
+ err = 1;
+ goto out_sym;
+ }
+
+ return 0;
+
+out_sym:
+ pdb_exit_symbols(r);
+out_root:
+ free(r->ds.root);
+out_ds:
+ pdb_reader_ds_exit(r);
+
+ return err;
+}
+
+static void pdb_reader_exit(struct pdb_reader *r)
+{
+ pdb_exit_segments(r);
+ pdb_exit_symbols(r);
+ free(r->ds.root);
+ pdb_reader_ds_exit(r);
+}
+
+int pdb_init_from_file(const char *name, struct pdb_reader *reader)
+{
+ GError *gerr = NULL;
+ int err = 0;
+ void *map;
+
+ reader->gmf = g_mapped_file_new(name, TRUE, &gerr);
+ if (gerr) {
+ eprintf("Failed to map PDB file \'%s\'\n", name);
+ g_error_free(gerr);
+ return 1;
+ }
+
+ reader->file_size = g_mapped_file_get_length(reader->gmf);
+ map = g_mapped_file_get_contents(reader->gmf);
+ if (pdb_reader_init(reader, map)) {
+ err = 1;
+ goto out_unmap;
+ }
+
+ return 0;
+
+out_unmap:
+ g_mapped_file_unref(reader->gmf);
+
+ return err;
+}
+
+void pdb_exit(struct pdb_reader *reader)
+{
+ g_mapped_file_unref(reader->gmf);
+ pdb_reader_exit(reader);
+}
diff --git a/contrib/elf2dmp/pdb.h b/contrib/elf2dmp/pdb.h
new file mode 100644
index 000000000..4ea8925ee
--- /dev/null
+++ b/contrib/elf2dmp/pdb.h
@@ -0,0 +1,241 @@
+/*
+ * Copyright (c) 2018 Virtuozzo International GmbH
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ *
+ */
+
+#ifndef PDB_H
+#define PDB_H
+
+
+#ifndef _WIN32
+typedef struct GUID {
+ unsigned int Data1;
+ unsigned short Data2;
+ unsigned short Data3;
+ unsigned char Data4[8];
+} GUID;
+#endif
+
+struct PDB_FILE {
+ uint32_t size;
+ uint32_t unknown;
+};
+
+typedef struct PDB_DS_HEADER {
+ char signature[32];
+ uint32_t block_size;
+ uint32_t unknown1;
+ uint32_t num_pages;
+ uint32_t toc_size;
+ uint32_t unknown2;
+ uint32_t toc_page;
+} PDB_DS_HEADER;
+
+typedef struct PDB_DS_TOC {
+ uint32_t num_files;
+ uint32_t file_size[1];
+} PDB_DS_TOC;
+
+typedef struct PDB_DS_ROOT {
+ uint32_t Version;
+ uint32_t TimeDateStamp;
+ uint32_t Age;
+ GUID guid;
+ uint32_t cbNames;
+ char names[1];
+} PDB_DS_ROOT;
+
+typedef struct PDB_TYPES_OLD {
+ uint32_t version;
+ uint16_t first_index;
+ uint16_t last_index;
+ uint32_t type_size;
+ uint16_t file;
+ uint16_t pad;
+} PDB_TYPES_OLD;
+
+typedef struct PDB_TYPES {
+ uint32_t version;
+ uint32_t type_offset;
+ uint32_t first_index;
+ uint32_t last_index;
+ uint32_t type_size;
+ uint16_t file;
+ uint16_t pad;
+ uint32_t hash_size;
+ uint32_t hash_base;
+ uint32_t hash_offset;
+ uint32_t hash_len;
+ uint32_t search_offset;
+ uint32_t search_len;
+ uint32_t unknown_offset;
+ uint32_t unknown_len;
+} PDB_TYPES;
+
+typedef struct PDB_SYMBOL_RANGE {
+ uint16_t segment;
+ uint16_t pad1;
+ uint32_t offset;
+ uint32_t size;
+ uint32_t characteristics;
+ uint16_t index;
+ uint16_t pad2;
+} PDB_SYMBOL_RANGE;
+
+typedef struct PDB_SYMBOL_RANGE_EX {
+ uint16_t segment;
+ uint16_t pad1;
+ uint32_t offset;
+ uint32_t size;
+ uint32_t characteristics;
+ uint16_t index;
+ uint16_t pad2;
+ uint32_t timestamp;
+ uint32_t unknown;
+} PDB_SYMBOL_RANGE_EX;
+
+typedef struct PDB_SYMBOL_FILE {
+ uint32_t unknown1;
+ PDB_SYMBOL_RANGE range;
+ uint16_t flag;
+ uint16_t file;
+ uint32_t symbol_size;
+ uint32_t lineno_size;
+ uint32_t unknown2;
+ uint32_t nSrcFiles;
+ uint32_t attribute;
+ char filename[1];
+} PDB_SYMBOL_FILE;
+
+typedef struct PDB_SYMBOL_FILE_EX {
+ uint32_t unknown1;
+ PDB_SYMBOL_RANGE_EX range;
+ uint16_t flag;
+ uint16_t file;
+ uint32_t symbol_size;
+ uint32_t lineno_size;
+ uint32_t unknown2;
+ uint32_t nSrcFiles;
+ uint32_t attribute;
+ uint32_t reserved[2];
+ char filename[1];
+} PDB_SYMBOL_FILE_EX;
+
+typedef struct PDB_SYMBOL_SOURCE {
+ uint16_t nModules;
+ uint16_t nSrcFiles;
+ uint16_t table[1];
+} PDB_SYMBOL_SOURCE;
+
+typedef struct PDB_SYMBOL_IMPORT {
+ uint32_t unknown1;
+ uint32_t unknown2;
+ uint32_t TimeDateStamp;
+ uint32_t Age;
+ char filename[1];
+} PDB_SYMBOL_IMPORT;
+
+typedef struct PDB_SYMBOLS_OLD {
+ uint16_t hash1_file;
+ uint16_t hash2_file;
+ uint16_t gsym_file;
+ uint16_t pad;
+ uint32_t module_size;
+ uint32_t offset_size;
+ uint32_t hash_size;
+ uint32_t srcmodule_size;
+} PDB_SYMBOLS_OLD;
+
+typedef struct PDB_SYMBOLS {
+ uint32_t signature;
+ uint32_t version;
+ uint32_t unknown;
+ uint32_t hash1_file;
+ uint32_t hash2_file;
+ uint16_t gsym_file;
+ uint16_t unknown1;
+ uint32_t module_size;
+ uint32_t offset_size;
+ uint32_t hash_size;
+ uint32_t srcmodule_size;
+ uint32_t pdbimport_size;
+ uint32_t resvd0;
+ uint32_t stream_index_size;
+ uint32_t unknown2_size;
+ uint16_t resvd3;
+ uint16_t machine;
+ uint32_t resvd4;
+} PDB_SYMBOLS;
+
+typedef struct {
+ uint16_t FPO;
+ uint16_t unk0;
+ uint16_t unk1;
+ uint16_t unk2;
+ uint16_t unk3;
+ uint16_t segments;
+} PDB_STREAM_INDEXES_OLD;
+
+typedef struct {
+ uint16_t FPO;
+ uint16_t unk0;
+ uint16_t unk1;
+ uint16_t unk2;
+ uint16_t unk3;
+ uint16_t segments;
+ uint16_t unk4;
+ uint16_t unk5;
+ uint16_t unk6;
+ uint16_t FPO_EXT;
+ uint16_t unk7;
+} PDB_STREAM_INDEXES;
+
+union codeview_symbol {
+ struct {
+ int16_t len;
+ int16_t id;
+ } generic;
+
+ struct {
+ int16_t len;
+ int16_t id;
+ uint32_t symtype;
+ uint32_t offset;
+ uint16_t segment;
+ char name[1];
+ } public_v3;
+};
+
+#define S_PUB_V3 0x110E
+
+typedef struct pdb_seg {
+ uint32_t dword[8];
+} __attribute__ ((packed)) pdb_seg;
+
+#define IMAGE_FILE_MACHINE_I386 0x014c
+#define IMAGE_FILE_MACHINE_AMD64 0x8664
+
+struct pdb_reader {
+ GMappedFile *gmf;
+ size_t file_size;
+ struct {
+ PDB_DS_HEADER *header;
+ PDB_DS_TOC *toc;
+ PDB_DS_ROOT *root;
+ } ds;
+ uint32_t file_used[1024];
+ PDB_SYMBOLS *symbols;
+ PDB_STREAM_INDEXES sidx;
+ uint8_t *modimage;
+ char *segs;
+ size_t segs_size;
+};
+
+int pdb_init_from_file(const char *name, struct pdb_reader *reader);
+void pdb_exit(struct pdb_reader *reader);
+uint64_t pdb_resolve(uint64_t img_base, struct pdb_reader *r, const char *name);
+uint64_t pdb_find_public_v3_symbol(struct pdb_reader *reader, const char *name);
+
+#endif /* PDB_H */
diff --git a/contrib/elf2dmp/pe.h b/contrib/elf2dmp/pe.h
new file mode 100644
index 000000000..c2a4a6ba7
--- /dev/null
+++ b/contrib/elf2dmp/pe.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2018 Virtuozzo International GmbH
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ *
+ */
+
+#ifndef PE_H
+#define PE_H
+
+
+#ifndef _WIN32
+typedef struct IMAGE_DOS_HEADER {
+ uint16_t e_magic; /* 0x00: MZ Header signature */
+ uint16_t e_cblp; /* 0x02: Bytes on last page of file */
+ uint16_t e_cp; /* 0x04: Pages in file */
+ uint16_t e_crlc; /* 0x06: Relocations */
+ uint16_t e_cparhdr; /* 0x08: Size of header in paragraphs */
+ uint16_t e_minalloc; /* 0x0a: Minimum extra paragraphs needed */
+ uint16_t e_maxalloc; /* 0x0c: Maximum extra paragraphs needed */
+ uint16_t e_ss; /* 0x0e: Initial (relative) SS value */
+ uint16_t e_sp; /* 0x10: Initial SP value */
+ uint16_t e_csum; /* 0x12: Checksum */
+ uint16_t e_ip; /* 0x14: Initial IP value */
+ uint16_t e_cs; /* 0x16: Initial (relative) CS value */
+ uint16_t e_lfarlc; /* 0x18: File address of relocation table */
+ uint16_t e_ovno; /* 0x1a: Overlay number */
+ uint16_t e_res[4]; /* 0x1c: Reserved words */
+ uint16_t e_oemid; /* 0x24: OEM identifier (for e_oeminfo) */
+ uint16_t e_oeminfo; /* 0x26: OEM information; e_oemid specific */
+ uint16_t e_res2[10]; /* 0x28: Reserved words */
+ uint32_t e_lfanew; /* 0x3c: Offset to extended header */
+} __attribute__ ((packed)) IMAGE_DOS_HEADER;
+
+typedef struct IMAGE_FILE_HEADER {
+ uint16_t Machine;
+ uint16_t NumberOfSections;
+ uint32_t TimeDateStamp;
+ uint32_t PointerToSymbolTable;
+ uint32_t NumberOfSymbols;
+ uint16_t SizeOfOptionalHeader;
+ uint16_t Characteristics;
+} __attribute__ ((packed)) IMAGE_FILE_HEADER;
+
+typedef struct IMAGE_DATA_DIRECTORY {
+ uint32_t VirtualAddress;
+ uint32_t Size;
+} __attribute__ ((packed)) IMAGE_DATA_DIRECTORY;
+
+#define IMAGE_NUMBEROF_DIRECTORY_ENTRIES 16
+
+typedef struct IMAGE_OPTIONAL_HEADER64 {
+ uint16_t Magic; /* 0x20b */
+ uint8_t MajorLinkerVersion;
+ uint8_t MinorLinkerVersion;
+ uint32_t SizeOfCode;
+ uint32_t SizeOfInitializedData;
+ uint32_t SizeOfUninitializedData;
+ uint32_t AddressOfEntryPoint;
+ uint32_t BaseOfCode;
+ uint64_t ImageBase;
+ uint32_t SectionAlignment;
+ uint32_t FileAlignment;
+ uint16_t MajorOperatingSystemVersion;
+ uint16_t MinorOperatingSystemVersion;
+ uint16_t MajorImageVersion;
+ uint16_t MinorImageVersion;
+ uint16_t MajorSubsystemVersion;
+ uint16_t MinorSubsystemVersion;
+ uint32_t Win32VersionValue;
+ uint32_t SizeOfImage;
+ uint32_t SizeOfHeaders;
+ uint32_t CheckSum;
+ uint16_t Subsystem;
+ uint16_t DllCharacteristics;
+ uint64_t SizeOfStackReserve;
+ uint64_t SizeOfStackCommit;
+ uint64_t SizeOfHeapReserve;
+ uint64_t SizeOfHeapCommit;
+ uint32_t LoaderFlags;
+ uint32_t NumberOfRvaAndSizes;
+ IMAGE_DATA_DIRECTORY DataDirectory[IMAGE_NUMBEROF_DIRECTORY_ENTRIES];
+} __attribute__ ((packed)) IMAGE_OPTIONAL_HEADER64;
+
+typedef struct IMAGE_NT_HEADERS64 {
+ uint32_t Signature;
+ IMAGE_FILE_HEADER FileHeader;
+ IMAGE_OPTIONAL_HEADER64 OptionalHeader;
+} __attribute__ ((packed)) IMAGE_NT_HEADERS64;
+
+typedef struct IMAGE_DEBUG_DIRECTORY {
+ uint32_t Characteristics;
+ uint32_t TimeDateStamp;
+ uint16_t MajorVersion;
+ uint16_t MinorVersion;
+ uint32_t Type;
+ uint32_t SizeOfData;
+ uint32_t AddressOfRawData;
+ uint32_t PointerToRawData;
+} __attribute__ ((packed)) IMAGE_DEBUG_DIRECTORY;
+
+#define IMAGE_DEBUG_TYPE_CODEVIEW 2
+#endif
+
+#define IMAGE_FILE_DEBUG_DIRECTORY 6
+
+typedef struct guid_t {
+ uint32_t a;
+ uint16_t b;
+ uint16_t c;
+ uint8_t d[2];
+ uint8_t e[6];
+} __attribute__ ((packed)) guid_t;
+
+typedef struct OMFSignatureRSDS {
+ char Signature[4];
+ guid_t guid;
+ uint32_t age;
+ char name[];
+} __attribute__ ((packed)) OMFSignatureRSDS;
+
+#endif /* PE_H */
diff --git a/contrib/elf2dmp/qemu_elf.c b/contrib/elf2dmp/qemu_elf.c
new file mode 100644
index 000000000..b601b6d7b
--- /dev/null
+++ b/contrib/elf2dmp/qemu_elf.c
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 2018 Virtuozzo International GmbH
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "err.h"
+#include "qemu_elf.h"
+
+#define QEMU_NOTE_NAME "QEMU"
+
+#ifndef ROUND_UP
+#define ROUND_UP(n, d) (((n) + (d) - 1) & -(0 ? (n) : (d)))
+#endif
+
+#ifndef DIV_ROUND_UP
+#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
+#endif
+
+#define ELF_NOTE_SIZE(hdr_size, name_size, desc_size) \
+ ((DIV_ROUND_UP((hdr_size), 4) + \
+ DIV_ROUND_UP((name_size), 4) + \
+ DIV_ROUND_UP((desc_size), 4)) * 4)
+
+int is_system(QEMUCPUState *s)
+{
+ return s->gs.base >> 63;
+}
+
+static char *nhdr_get_name(Elf64_Nhdr *nhdr)
+{
+ return (char *)nhdr + ROUND_UP(sizeof(*nhdr), 4);
+}
+
+static void *nhdr_get_desc(Elf64_Nhdr *nhdr)
+{
+ return nhdr_get_name(nhdr) + ROUND_UP(nhdr->n_namesz, 4);
+}
+
+static Elf64_Nhdr *nhdr_get_next(Elf64_Nhdr *nhdr)
+{
+ return (void *)((uint8_t *)nhdr + ELF_NOTE_SIZE(sizeof(*nhdr),
+ nhdr->n_namesz, nhdr->n_descsz));
+}
+
+Elf64_Phdr *elf64_getphdr(void *map)
+{
+ Elf64_Ehdr *ehdr = map;
+ Elf64_Phdr *phdr = (void *)((uint8_t *)map + ehdr->e_phoff);
+
+ return phdr;
+}
+
+Elf64_Half elf_getphdrnum(void *map)
+{
+ Elf64_Ehdr *ehdr = map;
+
+ return ehdr->e_phnum;
+}
+
+static int init_states(QEMU_Elf *qe)
+{
+ Elf64_Phdr *phdr = elf64_getphdr(qe->map);
+ Elf64_Nhdr *start = (void *)((uint8_t *)qe->map + phdr[0].p_offset);
+ Elf64_Nhdr *end = (void *)((uint8_t *)start + phdr[0].p_memsz);
+ Elf64_Nhdr *nhdr;
+ size_t cpu_nr = 0;
+
+ if (phdr[0].p_type != PT_NOTE) {
+ eprintf("Failed to find PT_NOTE\n");
+ return 1;
+ }
+
+ qe->has_kernel_gs_base = 1;
+
+ for (nhdr = start; nhdr < end; nhdr = nhdr_get_next(nhdr)) {
+ if (!strcmp(nhdr_get_name(nhdr), QEMU_NOTE_NAME)) {
+ QEMUCPUState *state = nhdr_get_desc(nhdr);
+
+ if (state->size < sizeof(*state)) {
+ eprintf("CPU #%zu: QEMU CPU state size %u doesn't match\n",
+ cpu_nr, state->size);
+ /*
+ * We assume either every QEMU CPU state has KERNEL_GS_BASE or
+ * no one has.
+ */
+ qe->has_kernel_gs_base = 0;
+ }
+ cpu_nr++;
+ }
+ }
+
+ printf("%zu CPU states has been found\n", cpu_nr);
+
+ qe->state = malloc(sizeof(*qe->state) * cpu_nr);
+ if (!qe->state) {
+ return 1;
+ }
+
+ cpu_nr = 0;
+
+ for (nhdr = start; nhdr < end; nhdr = nhdr_get_next(nhdr)) {
+ if (!strcmp(nhdr_get_name(nhdr), QEMU_NOTE_NAME)) {
+ qe->state[cpu_nr] = nhdr_get_desc(nhdr);
+ cpu_nr++;
+ }
+ }
+
+ qe->state_nr = cpu_nr;
+
+ return 0;
+}
+
+static void exit_states(QEMU_Elf *qe)
+{
+ free(qe->state);
+}
+
+int QEMU_Elf_init(QEMU_Elf *qe, const char *filename)
+{
+ GError *gerr = NULL;
+ int err = 0;
+
+ qe->gmf = g_mapped_file_new(filename, TRUE, &gerr);
+ if (gerr) {
+ eprintf("Failed to map ELF dump file \'%s\'\n", filename);
+ g_error_free(gerr);
+ return 1;
+ }
+
+ qe->map = g_mapped_file_get_contents(qe->gmf);
+ qe->size = g_mapped_file_get_length(qe->gmf);
+
+ if (init_states(qe)) {
+ eprintf("Failed to extract QEMU CPU states\n");
+ err = 1;
+ goto out_unmap;
+ }
+
+ return 0;
+
+out_unmap:
+ g_mapped_file_unref(qe->gmf);
+
+ return err;
+}
+
+void QEMU_Elf_exit(QEMU_Elf *qe)
+{
+ exit_states(qe);
+ g_mapped_file_unref(qe->gmf);
+}
diff --git a/contrib/elf2dmp/qemu_elf.h b/contrib/elf2dmp/qemu_elf.h
new file mode 100644
index 000000000..b2f0d9cbc
--- /dev/null
+++ b/contrib/elf2dmp/qemu_elf.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2018 Virtuozzo International GmbH
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ */
+
+#ifndef ELF2DMP_QEMU_ELF_H
+#define ELF2DMP_QEMU_ELF_H
+
+#include "elf.h"
+
+typedef struct QEMUCPUSegment {
+ uint32_t selector;
+ uint32_t limit;
+ uint32_t flags;
+ uint32_t pad;
+ uint64_t base;
+} QEMUCPUSegment;
+
+typedef struct QEMUCPUState {
+ uint32_t version;
+ uint32_t size;
+ uint64_t rax, rbx, rcx, rdx, rsi, rdi, rsp, rbp;
+ uint64_t r8, r9, r10, r11, r12, r13, r14, r15;
+ uint64_t rip, rflags;
+ QEMUCPUSegment cs, ds, es, fs, gs, ss;
+ QEMUCPUSegment ldt, tr, gdt, idt;
+ uint64_t cr[5];
+ uint64_t kernel_gs_base;
+} QEMUCPUState;
+
+int is_system(QEMUCPUState *s);
+
+typedef struct QEMU_Elf {
+ GMappedFile *gmf;
+ size_t size;
+ void *map;
+ QEMUCPUState **state;
+ size_t state_nr;
+ int has_kernel_gs_base;
+} QEMU_Elf;
+
+int QEMU_Elf_init(QEMU_Elf *qe, const char *filename);
+void QEMU_Elf_exit(QEMU_Elf *qe);
+
+Elf64_Phdr *elf64_getphdr(void *map);
+Elf64_Half elf_getphdrnum(void *map);
+
+#endif /* ELF2DMP_QEMU_ELF_H */
diff --git a/contrib/gitdm/aliases b/contrib/gitdm/aliases
new file mode 100644
index 000000000..4792413ce
--- /dev/null
+++ b/contrib/gitdm/aliases
@@ -0,0 +1,49 @@
+#
+# This is the email aliases file, mapping secondary addresses onto a
+# single, canonical address. It duplicates some info from .mailmap so
+# if you are adding something here also consider if the .mailmap needs
+# updating.
+#
+# If you just want to avoid gitdm complaining about author fields
+# which are actually email addresses with the message:
+#
+# "...is an author name, probably not what you want"
+#
+# you can just apply --use-mailmap to you git-log command, e.g:
+#
+# git log --use-mailmap --numstat --since "last 2 years" | $GITDM
+#
+# however that will have the effect of squashing multiple addresses to
+# a canonical address which will distort the stats of those who
+# contribute in both personal and professional capacities from
+# different addresses.
+#
+
+# weird commits
+balrog@c046a42c-6fe2-441c-8c8c-71466251a162 balrogg@gmail.com
+aliguori@c046a42c-6fe2-441c-8c8c-71466251a162 anthony@codemonkey.ws
+aurel32@c046a42c-6fe2-441c-8c8c-71466251a162 aurelien@aurel32.net
+blueswir1@c046a42c-6fe2-441c-8c8c-71466251a162 blauwirbel@gmail.com
+edgar_igl@c046a42c-6fe2-441c-8c8c-71466251a162 edgar.iglesias@gmail.com
+bellard@c046a42c-6fe2-441c-8c8c-71466251a162 fabrice@bellard.org
+j_mayer@c046a42c-6fe2-441c-8c8c-71466251a162 l_indien@magic.fr
+pbrook@c046a42c-6fe2-441c-8c8c-71466251a162 paul@codesourcery.com
+ths@c046a42c-6fe2-441c-8c8c-71466251a162 ths@networkno.de
+malc@c046a42c-6fe2-441c-8c8c-71466251a162 av1474@comtv.ru
+
+# canonical emails
+liq3ea@163.com liq3ea@gmail.com
+
+# some broken tags
+yuval.shaia.ml.gmail.com yuval.shaia.ml@gmail.com
+
+# There is also a:
+# (no author) <(no author)@c046a42c-6fe2-441c-8c8c-71466251a162>
+# for the cvs2svn initialization commit e63c3dc74bf.
+
+# Next, translate a few commits where mailman rewrote the From: line due
+# to strict SPF, although we prefer to avoid adding more entries like that.
+"Ed Swierk via Qemu-devel" eswierk@skyportsystems.com
+"Ian McKellar via Qemu-devel" ianloic@google.com
+"Julia Suvorova via Qemu-devel" jusual@mail.ru
+"Justin Terry (VM) via Qemu-devel" juterry@microsoft.com
diff --git a/contrib/gitdm/domain-map b/contrib/gitdm/domain-map
new file mode 100644
index 000000000..2800d9f98
--- /dev/null
+++ b/contrib/gitdm/domain-map
@@ -0,0 +1,43 @@
+#
+# QEMU gitdm domain-map
+#
+# This maps email domains to nice easy to read company names
+#
+
+amd.com AMD
+baidu.com Baidu
+bytedance.com ByteDance
+cmss.chinamobile.com China Mobile
+citrix.com Citrix
+crudebyte.com Crudebyte
+eldorado.org.br Instituto de Pesquisas Eldorado
+fujitsu.com Fujitsu
+google.com Google
+greensocs.com GreenSocs
+huawei.com Huawei
+ibm.com IBM
+igalia.com Igalia
+intel.com Intel
+linaro.org Linaro
+lwn.net LWN
+microsoft.com Microsoft
+mvista.com MontaVista
+nokia.com Nokia
+nuviainc.com NUVIA
+nvidia.com NVIDIA
+oracle.com Oracle
+proxmox.com Proxmox
+quicinc.com Qualcomm Innovation Center
+redhat.com Red Hat
+rt-rk.com RT-RK
+samsung.com Samsung
+siemens.com Siemens
+sifive.com SiFive
+suse.com SUSE
+suse.de SUSE
+virtuozzo.com Virtuozzo
+wdc.com Western Digital
+windriver.com Wind River
+xilinx.com Xilinx
+yadro.com YADRO
+yandex-team.ru Yandex
diff --git a/contrib/gitdm/filetypes.txt b/contrib/gitdm/filetypes.txt
new file mode 100644
index 000000000..d2d6f6db8
--- /dev/null
+++ b/contrib/gitdm/filetypes.txt
@@ -0,0 +1,146 @@
+# -*- coding:utf-8 -*-
+# Copyright (C) 2006 Libresoft
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Library General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#
+# Authors : Gregorio Robles <grex@gsyc.escet.urjc.es>
+# Authors : GermĂ¡n PĂ³o-Caamaño <gpoo@gnome.org>
+#
+# This QEMU version is a cut-down version of what originally shipped
+# in the gitdm sample-config directory.
+#
+# This file contains associations parameters regarding filetypes
+# (documentation, development, multimedia, images...)
+#
+# format:
+# filetype <type> <regex> [<comment>]
+#
+# Order:
+# The list should keep an order, so filetypes can be counted properly.
+# ie. we want ltmain.sh -> 'build' instead of 'code'.
+#
+# If there is an filetype which is not in order but has values, it will
+# be added at the end.
+#
+order build,interface,tests,code,documentation,devel-doc,blobs
+
+#
+#
+# Code files (headers and the like included
+# (most common languages first
+#
+filetype code \.c$ # C
+filetype code \.c.inc$ # C
+filetype code \.C$ # C++
+filetype code \.cpp$ # C++
+filetype code \.c\+\+$ # C++
+filetype code \.cxx$ # C++
+filetype code \.cc$ # C++
+filetype code \.h$ # C or C++ header
+filetype code \.hh$ # C++ header
+filetype code \.hpp$ # C++ header
+filetype code \.hxx$ # C++ header
+filetype code \.sh$ # Shell
+filetype code \.pl$ # Perl
+filetype code \.py$ # Python
+filetype code \.s$ # Assembly
+filetype code \.S$ # Assembly
+filetype code \.asm$ # Assembly
+filetype code \.awk$ # awk
+filetype code ^common$ # script fragments
+filetype code ^common.*$ # script fragments
+filetype code (qom|qmp)-\w+$ # python script fragments
+
+#
+# Interface/api files
+#
+filetype interface \.json$ # json
+filetype interface \.hx$ # documented options
+
+#
+# Test related blobs (unfortunately we can't filter out test code)
+#
+filetype tests \.hex$
+filetype tests \d{2,3}$ # test data 00-999
+filetype tests ^[A-Z]{4}$ # ACPI test data
+filetype tests ^[A-Z]{4}\.*$ # ACPI test data
+filetype tests \.out$
+filetype tests \.out\.nocache$
+filetype tests \.err$
+filetype tests \.exit$ # bad-if-FOO.exit etc
+filetype tests \.decode$
+filetype tests \.yml$ # travis/shippable config
+
+#
+# Development documentation files (for hacking generally)
+#
+filetype devel-doc ^readme.*$
+filetype devel-doc ^changelog.*
+filetype devel-doc ^hacking.*$
+filetype devel-doc ^licen(s|c)e.*$
+filetype devel-doc ^copying.*$
+filetype devel-doc ^MAINTAINERS$
+filetype devel-doc ^BSD-2-Clause$
+filetype devel-doc ^BSD-3-Clause$
+filetype devel-doc ^GPL-2.0$
+filetype devel-doc \.txt$
+filetype devel-doc \.rst$
+filetype devel-doc \.texi$
+filetype devel-doc \.pod$
+
+#
+# Building, compiling, and configuration admin files
+#
+filetype build configure.*$
+filetype build Makefile$
+filetype build Makefile\.*$
+filetype build config$
+filetype build conf$
+filetype build \.cfg$
+filetype build \.mk$
+filetype build \.mak$
+filetype build \.docker$
+filetype build \.pre$
+filetype build ^.gitignore$
+filetype build ^.gitmodules$
+filetype build ^.gitpublish$
+filetype build ^.mailmap$
+filetype build ^.dir-locals.el$
+filetype build ^.editorconfig$
+filetype build ^.exrc$
+filetype build ^.gdbinit$
+filetype build \.cocci$ # Coccinelle semantic patches
+
+#
+# Misc blobs
+#
+filetype blobs \.bin$
+filetype blobs \.dtb$
+filetype blobs \.dts$
+filetype blobs \.rom$
+filetype blobs \.img$
+filetype blobs \.ndrv$
+filetype blobs \.bmp$
+filetype blobs \.svg$
+filetype blobs ^pi_10.com$
+
+
+#
+# Documentation files
+#
+filetype documentation \.html$
+filetype documentation \.txt$
+filetype documentation \.texi$
+filetype documentation \.po$ # translation files
diff --git a/contrib/gitdm/group-map-academics b/contrib/gitdm/group-map-academics
new file mode 100644
index 000000000..44745ca85
--- /dev/null
+++ b/contrib/gitdm/group-map-academics
@@ -0,0 +1,21 @@
+#
+# QEMU is quite often used for academic research purposes and we like
+# it even better when the work is up-streamed so the project can
+# benefit.
+#
+# We group our academic contributors here
+#
+
+# Institute for System Programming of Russian Academy of Science
+ispras.ru
+
+# Columbia University
+cs.columbia.edu
+cota@braap.org
+
+uni-paderborn.de
+edu
+edu.cn
+
+# Boston University
+bu.edu
diff --git a/contrib/gitdm/group-map-cadence b/contrib/gitdm/group-map-cadence
new file mode 100644
index 000000000..ab97dd2fc
--- /dev/null
+++ b/contrib/gitdm/group-map-cadence
@@ -0,0 +1,3 @@
+# Cadence Design Systems
+
+jcmvbkbc@gmail.com
diff --git a/contrib/gitdm/group-map-codeweavers b/contrib/gitdm/group-map-codeweavers
new file mode 100644
index 000000000..c4803489e
--- /dev/null
+++ b/contrib/gitdm/group-map-codeweavers
@@ -0,0 +1 @@
+sergio.g.delreal@gmail.com
diff --git a/contrib/gitdm/group-map-ibm b/contrib/gitdm/group-map-ibm
new file mode 100644
index 000000000..da62fa3f4
--- /dev/null
+++ b/contrib/gitdm/group-map-ibm
@@ -0,0 +1,14 @@
+#
+# Some IBM contributors submit via another domain
+#
+
+aik@ozlabs.ru
+andrew@aj.id.au
+benh@kernel.crashing.org
+clg@kaod.org
+danielhb413@gmail.com
+groug@kaod.org
+jcfaracco@gmail.com
+joel@jms.id.au
+sjitindarsingh@gmail.com
+tommusta@gmail.com
diff --git a/contrib/gitdm/group-map-individuals b/contrib/gitdm/group-map-individuals
new file mode 100644
index 000000000..f816aa877
--- /dev/null
+++ b/contrib/gitdm/group-map-individuals
@@ -0,0 +1,36 @@
+#
+# Individual and personal contributors
+#
+# This is simply to allow prolific developers with no company
+# affiliations (or non-company related personal work) to be grouped
+# together in the summary stats.
+#
+
+f4bug@amsat.org
+mjt@tls.msk.ru
+mark.cave-ayland@ilande.co.uk
+rth@twiddle.net
+noring@nocrew.org
+samuel.thibault@ens-lyon.org
+aurelien@aurel32.net
+balaton@eik.bme.hu
+e.emanuelegiuseppe@gmail.com
+andrew.smirnov@gmail.com
+sw@weilnetz.de
+deller@gmx.de
+fthain@telegraphics.com.au
+vr_qemu@t-online.de
+nieklinnenbank@gmail.com
+devnexen@gmail.com
+pauldzim@gmail.com
+ani@anisinha.ca
+sundeep.lkml@gmail.com
+mrolnik@gmail.com
+huth@tuxfamily.org
+jhogan@kernel.org
+atar4qemu@gmail.com
+minwoo.im.dev@gmail.com
+bmeng.cn@gmail.com
+liq3ea@gmail.com
+chetan4windows@gmail.com
+akihiko.odaki@gmail.com
diff --git a/contrib/gitdm/group-map-interns b/contrib/gitdm/group-map-interns
new file mode 100644
index 000000000..fe33a3231
--- /dev/null
+++ b/contrib/gitdm/group-map-interns
@@ -0,0 +1,13 @@
+#
+# Group together everyone working as an intern via one of the various
+# outreach programs.
+#
+
+# GSoC 2020 Virtual FIDO/U2F security key
+cesar.belley@lse.epita.fr
+
+# GSoC 2020 TCG performance
+ahmedkhaledkaraman@gmail.com
+
+# GSoC 2021 TCG plugins
+ma.mandourr@gmail.com
diff --git a/contrib/gitdm/group-map-janustech b/contrib/gitdm/group-map-janustech
new file mode 100644
index 000000000..4ae7cc24f
--- /dev/null
+++ b/contrib/gitdm/group-map-janustech
@@ -0,0 +1,5 @@
+#
+# Janus Technologies contributors using non-corporate email
+#
+
+marcel.apfelbaum@gmail.com
diff --git a/contrib/gitdm/group-map-netflix b/contrib/gitdm/group-map-netflix
new file mode 100644
index 000000000..468f95dcb
--- /dev/null
+++ b/contrib/gitdm/group-map-netflix
@@ -0,0 +1,5 @@
+#
+# Netflix contributors using their personal emails
+#
+
+imp@bsdimp.com
diff --git a/contrib/gitdm/group-map-redhat b/contrib/gitdm/group-map-redhat
new file mode 100644
index 000000000..02507b7b5
--- /dev/null
+++ b/contrib/gitdm/group-map-redhat
@@ -0,0 +1,9 @@
+#
+# Red Hat contributors using non-corporate email
+#
+
+david@gibson.dropbear.id.au
+laurent@vivier.eu
+pjp@fedoraproject.org
+armbru@pond.sub.org
+nirsof@gmail.com
diff --git a/contrib/gitdm/group-map-robots b/contrib/gitdm/group-map-robots
new file mode 100644
index 000000000..ffd956c2e
--- /dev/null
+++ b/contrib/gitdm/group-map-robots
@@ -0,0 +1,7 @@
+#
+# There are various automatic robots that occasionally scan and report
+# bugs. Let's group them together here.
+#
+
+# Euler Robot
+euler.robot@huawei.com
diff --git a/contrib/gitdm/group-map-wavecomp b/contrib/gitdm/group-map-wavecomp
new file mode 100644
index 000000000..c5c57f0ea
--- /dev/null
+++ b/contrib/gitdm/group-map-wavecomp
@@ -0,0 +1,31 @@
+#
+# Wave Computing acquired MIPS in June 2018. Also, from February 2013
+# to October 2017, MIPS was owned by Imagination Technologies.
+#
+
+aleksandar.markovic@imgtec.com
+aleksandar.markovic@mips.com
+alex.smith@imgtec.com
+andrew.bennett@imgtec.com
+amarkovic@wavecomp.com
+arikalo@wavecomp.com
+chris@mips.com
+dnikolic@wavecomp.com
+ericj@mips.com
+goran.ferenc@imgtec.com
+james.cowgill@mips.com
+james.hogan@imgtec.com
+james.hogan@mips.com
+leon.alrae@imgtec.com
+matt.redfearn@imgtec.com
+matthew.fortune@mips.com
+miodrag.dinic@imgtec.com
+paul@archlinuxmips.org
+paul.burton@imgtec.com
+petar.jovanovic@imgtec.com
+petarj@mips.com
+pburton@wavecomp.com
+smarkovic@wavecomp.com
+yongbok.kim@imgtec.com
+yongbok.kim@mips.com
+ysu@wavecomp.com
diff --git a/contrib/ivshmem-client/ivshmem-client.c b/contrib/ivshmem-client/ivshmem-client.c
new file mode 100644
index 000000000..182c79d27
--- /dev/null
+++ b/contrib/ivshmem-client/ivshmem-client.c
@@ -0,0 +1,445 @@
+/*
+ * Copyright 6WIND S.A., 2014
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version. See the COPYING file in the
+ * top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include "qemu/queue.h"
+
+#include "ivshmem-client.h"
+
+/* log a message on stdout if verbose=1 */
+#define IVSHMEM_CLIENT_DEBUG(client, fmt, ...) do { \
+ if ((client)->verbose) { \
+ printf(fmt, ## __VA_ARGS__); \
+ } \
+ } while (0)
+
+/* read message from the unix socket */
+static int
+ivshmem_client_read_one_msg(IvshmemClient *client, int64_t *index, int *fd)
+{
+ int ret;
+ struct msghdr msg;
+ struct iovec iov[1];
+ union {
+ struct cmsghdr cmsg;
+ char control[CMSG_SPACE(sizeof(int))];
+ } msg_control;
+ struct cmsghdr *cmsg;
+
+ iov[0].iov_base = index;
+ iov[0].iov_len = sizeof(*index);
+
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_iov = iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = &msg_control;
+ msg.msg_controllen = sizeof(msg_control);
+
+ ret = recvmsg(client->sock_fd, &msg, 0);
+ if (ret < sizeof(*index)) {
+ IVSHMEM_CLIENT_DEBUG(client, "cannot read message: %s\n",
+ strerror(errno));
+ return -1;
+ }
+ if (ret == 0) {
+ IVSHMEM_CLIENT_DEBUG(client, "lost connection to server\n");
+ return -1;
+ }
+
+ *index = GINT64_FROM_LE(*index);
+ *fd = -1;
+
+ for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+
+ if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)) ||
+ cmsg->cmsg_level != SOL_SOCKET ||
+ cmsg->cmsg_type != SCM_RIGHTS) {
+ continue;
+ }
+
+ memcpy(fd, CMSG_DATA(cmsg), sizeof(*fd));
+ }
+
+ return 0;
+}
+
+/* free a peer when the server advertises a disconnection or when the
+ * client is freed */
+static void
+ivshmem_client_free_peer(IvshmemClient *client, IvshmemClientPeer *peer)
+{
+ unsigned vector;
+
+ QTAILQ_REMOVE(&client->peer_list, peer, next);
+ for (vector = 0; vector < peer->vectors_count; vector++) {
+ close(peer->vectors[vector]);
+ }
+
+ g_free(peer);
+}
+
+/* handle message coming from server (new peer, new vectors) */
+static int
+ivshmem_client_handle_server_msg(IvshmemClient *client)
+{
+ IvshmemClientPeer *peer;
+ int64_t peer_id;
+ int ret, fd;
+
+ ret = ivshmem_client_read_one_msg(client, &peer_id, &fd);
+ if (ret < 0) {
+ return -1;
+ }
+
+ /* can return a peer or the local client */
+ peer = ivshmem_client_search_peer(client, peer_id);
+
+ /* delete peer */
+ if (fd == -1) {
+
+ if (peer == NULL || peer == &client->local) {
+ IVSHMEM_CLIENT_DEBUG(client, "receive delete for invalid "
+ "peer %" PRId64 "\n", peer_id);
+ return -1;
+ }
+
+ IVSHMEM_CLIENT_DEBUG(client, "delete peer id = %" PRId64 "\n", peer_id);
+ ivshmem_client_free_peer(client, peer);
+ return 0;
+ }
+
+ /* new peer */
+ if (peer == NULL) {
+ peer = g_malloc0(sizeof(*peer));
+ peer->id = peer_id;
+ peer->vectors_count = 0;
+ QTAILQ_INSERT_TAIL(&client->peer_list, peer, next);
+ IVSHMEM_CLIENT_DEBUG(client, "new peer id = %" PRId64 "\n", peer_id);
+ }
+
+ /* new vector */
+ IVSHMEM_CLIENT_DEBUG(client, " new vector %d (fd=%d) for peer id %"
+ PRId64 "\n", peer->vectors_count, fd, peer->id);
+ if (peer->vectors_count >= G_N_ELEMENTS(peer->vectors)) {
+ IVSHMEM_CLIENT_DEBUG(client, "Too many vectors received, failing");
+ return -1;
+ }
+
+ peer->vectors[peer->vectors_count] = fd;
+ peer->vectors_count++;
+
+ return 0;
+}
+
+/* init a new ivshmem client */
+int
+ivshmem_client_init(IvshmemClient *client, const char *unix_sock_path,
+ IvshmemClientNotifCb notif_cb, void *notif_arg,
+ bool verbose)
+{
+ int ret;
+ unsigned i;
+
+ memset(client, 0, sizeof(*client));
+
+ ret = snprintf(client->unix_sock_path, sizeof(client->unix_sock_path),
+ "%s", unix_sock_path);
+
+ if (ret < 0 || ret >= sizeof(client->unix_sock_path)) {
+ IVSHMEM_CLIENT_DEBUG(client, "could not copy unix socket path\n");
+ return -1;
+ }
+
+ for (i = 0; i < IVSHMEM_CLIENT_MAX_VECTORS; i++) {
+ client->local.vectors[i] = -1;
+ }
+
+ QTAILQ_INIT(&client->peer_list);
+ client->local.id = -1;
+
+ client->notif_cb = notif_cb;
+ client->notif_arg = notif_arg;
+ client->verbose = verbose;
+ client->shm_fd = -1;
+ client->sock_fd = -1;
+
+ return 0;
+}
+
+/* create and connect to the unix socket */
+int
+ivshmem_client_connect(IvshmemClient *client)
+{
+ struct sockaddr_un s_un;
+ int fd, ret;
+ int64_t tmp;
+
+ IVSHMEM_CLIENT_DEBUG(client, "connect to client %s\n",
+ client->unix_sock_path);
+
+ client->sock_fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (client->sock_fd < 0) {
+ IVSHMEM_CLIENT_DEBUG(client, "cannot create socket: %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ s_un.sun_family = AF_UNIX;
+ ret = snprintf(s_un.sun_path, sizeof(s_un.sun_path), "%s",
+ client->unix_sock_path);
+ if (ret < 0 || ret >= sizeof(s_un.sun_path)) {
+ IVSHMEM_CLIENT_DEBUG(client, "could not copy unix socket path\n");
+ goto err_close;
+ }
+
+ if (connect(client->sock_fd, (struct sockaddr *)&s_un, sizeof(s_un)) < 0) {
+ IVSHMEM_CLIENT_DEBUG(client, "cannot connect to %s: %s\n", s_un.sun_path,
+ strerror(errno));
+ goto err_close;
+ }
+
+ /* first, we expect a protocol version */
+ if (ivshmem_client_read_one_msg(client, &tmp, &fd) < 0 ||
+ (tmp != IVSHMEM_PROTOCOL_VERSION) || fd != -1) {
+ IVSHMEM_CLIENT_DEBUG(client, "cannot read from server\n");
+ goto err_close;
+ }
+
+ /* then, we expect our index + a fd == -1 */
+ if (ivshmem_client_read_one_msg(client, &client->local.id, &fd) < 0 ||
+ client->local.id < 0 || fd != -1) {
+ IVSHMEM_CLIENT_DEBUG(client, "cannot read from server (2)\n");
+ goto err_close;
+ }
+ IVSHMEM_CLIENT_DEBUG(client, "our_id=%" PRId64 "\n", client->local.id);
+
+ /* now, we expect shared mem fd + a -1 index, note that shm fd
+ * is not used */
+ if (ivshmem_client_read_one_msg(client, &tmp, &fd) < 0 ||
+ tmp != -1 || fd < 0) {
+ if (fd >= 0) {
+ close(fd);
+ }
+ IVSHMEM_CLIENT_DEBUG(client, "cannot read from server (3)\n");
+ goto err_close;
+ }
+ client->shm_fd = fd;
+ IVSHMEM_CLIENT_DEBUG(client, "shm_fd=%d\n", fd);
+
+ return 0;
+
+err_close:
+ close(client->sock_fd);
+ client->sock_fd = -1;
+ return -1;
+}
+
+/* close connection to the server, and free all peer structures */
+void
+ivshmem_client_close(IvshmemClient *client)
+{
+ IvshmemClientPeer *peer;
+ unsigned i;
+
+ IVSHMEM_CLIENT_DEBUG(client, "close client\n");
+
+ while ((peer = QTAILQ_FIRST(&client->peer_list)) != NULL) {
+ ivshmem_client_free_peer(client, peer);
+ }
+
+ close(client->shm_fd);
+ client->shm_fd = -1;
+ close(client->sock_fd);
+ client->sock_fd = -1;
+ client->local.id = -1;
+ for (i = 0; i < IVSHMEM_CLIENT_MAX_VECTORS; i++) {
+ close(client->local.vectors[i]);
+ client->local.vectors[i] = -1;
+ }
+ client->local.vectors_count = 0;
+}
+
+/* get the fd_set according to the unix socket and peer list */
+void
+ivshmem_client_get_fds(const IvshmemClient *client, fd_set *fds, int *maxfd)
+{
+ int fd;
+ unsigned vector;
+
+ FD_SET(client->sock_fd, fds);
+ if (client->sock_fd >= *maxfd) {
+ *maxfd = client->sock_fd + 1;
+ }
+
+ for (vector = 0; vector < client->local.vectors_count; vector++) {
+ fd = client->local.vectors[vector];
+ FD_SET(fd, fds);
+ if (fd >= *maxfd) {
+ *maxfd = fd + 1;
+ }
+ }
+}
+
+/* handle events from eventfd: just print a message on notification */
+static int
+ivshmem_client_handle_event(IvshmemClient *client, const fd_set *cur, int maxfd)
+{
+ IvshmemClientPeer *peer;
+ uint64_t kick;
+ unsigned i;
+ int ret;
+
+ peer = &client->local;
+
+ for (i = 0; i < peer->vectors_count; i++) {
+ if (peer->vectors[i] >= maxfd || !FD_ISSET(peer->vectors[i], cur)) {
+ continue;
+ }
+
+ ret = read(peer->vectors[i], &kick, sizeof(kick));
+ if (ret < 0) {
+ return ret;
+ }
+ if (ret != sizeof(kick)) {
+ IVSHMEM_CLIENT_DEBUG(client, "invalid read size = %d\n", ret);
+ errno = EINVAL;
+ return -1;
+ }
+ IVSHMEM_CLIENT_DEBUG(client, "received event on fd %d vector %d: %"
+ PRIu64 "\n", peer->vectors[i], i, kick);
+ if (client->notif_cb != NULL) {
+ client->notif_cb(client, peer, i, client->notif_arg);
+ }
+ }
+
+ return 0;
+}
+
+/* read and handle new messages on the given fd_set */
+int
+ivshmem_client_handle_fds(IvshmemClient *client, fd_set *fds, int maxfd)
+{
+ if (client->sock_fd < maxfd && FD_ISSET(client->sock_fd, fds) &&
+ ivshmem_client_handle_server_msg(client) < 0 && errno != EINTR) {
+ IVSHMEM_CLIENT_DEBUG(client, "ivshmem_client_handle_server_msg() "
+ "failed\n");
+ return -1;
+ } else if (ivshmem_client_handle_event(client, fds, maxfd) < 0 &&
+ errno != EINTR) {
+ IVSHMEM_CLIENT_DEBUG(client, "ivshmem_client_handle_event() failed\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+/* send a notification on a vector of a peer */
+int
+ivshmem_client_notify(const IvshmemClient *client,
+ const IvshmemClientPeer *peer, unsigned vector)
+{
+ uint64_t kick;
+ int fd;
+
+ if (vector >= peer->vectors_count) {
+ IVSHMEM_CLIENT_DEBUG(client, "invalid vector %u on peer %" PRId64 "\n",
+ vector, peer->id);
+ return -1;
+ }
+ fd = peer->vectors[vector];
+ IVSHMEM_CLIENT_DEBUG(client, "notify peer %" PRId64
+ " on vector %d, fd %d\n", peer->id, vector, fd);
+
+ kick = 1;
+ if (write(fd, &kick, sizeof(kick)) != sizeof(kick)) {
+ fprintf(stderr, "could not write to %d: %s\n", peer->vectors[vector],
+ strerror(errno));
+ return -1;
+ }
+ return 0;
+}
+
+/* send a notification to all vectors of a peer */
+int
+ivshmem_client_notify_all_vects(const IvshmemClient *client,
+ const IvshmemClientPeer *peer)
+{
+ unsigned vector;
+ int ret = 0;
+
+ for (vector = 0; vector < peer->vectors_count; vector++) {
+ if (ivshmem_client_notify(client, peer, vector) < 0) {
+ ret = -1;
+ }
+ }
+
+ return ret;
+}
+
+/* send a notification to all peers */
+int
+ivshmem_client_notify_broadcast(const IvshmemClient *client)
+{
+ IvshmemClientPeer *peer;
+ int ret = 0;
+
+ QTAILQ_FOREACH(peer, &client->peer_list, next) {
+ if (ivshmem_client_notify_all_vects(client, peer) < 0) {
+ ret = -1;
+ }
+ }
+
+ return ret;
+}
+
+/* lookup peer from its id */
+IvshmemClientPeer *
+ivshmem_client_search_peer(IvshmemClient *client, int64_t peer_id)
+{
+ IvshmemClientPeer *peer;
+
+ if (peer_id == client->local.id) {
+ return &client->local;
+ }
+
+ QTAILQ_FOREACH(peer, &client->peer_list, next) {
+ if (peer->id == peer_id) {
+ return peer;
+ }
+ }
+ return NULL;
+}
+
+/* dump our info, the list of peers their vectors on stdout */
+void
+ivshmem_client_dump(const IvshmemClient *client)
+{
+ const IvshmemClientPeer *peer;
+ unsigned vector;
+
+ /* dump local infos */
+ peer = &client->local;
+ printf("our_id = %" PRId64 "\n", peer->id);
+ for (vector = 0; vector < peer->vectors_count; vector++) {
+ printf(" vector %d is enabled (fd=%d)\n", vector,
+ peer->vectors[vector]);
+ }
+
+ /* dump peers */
+ QTAILQ_FOREACH(peer, &client->peer_list, next) {
+ printf("peer_id = %" PRId64 "\n", peer->id);
+
+ for (vector = 0; vector < peer->vectors_count; vector++) {
+ printf(" vector %d is enabled (fd=%d)\n", vector,
+ peer->vectors[vector]);
+ }
+ }
+}
diff --git a/contrib/ivshmem-client/ivshmem-client.h b/contrib/ivshmem-client/ivshmem-client.h
new file mode 100644
index 000000000..fc45a3806
--- /dev/null
+++ b/contrib/ivshmem-client/ivshmem-client.h
@@ -0,0 +1,210 @@
+/*
+ * Copyright 6WIND S.A., 2014
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version. See the COPYING file in the
+ * top-level directory.
+ */
+
+#ifndef IVSHMEM_CLIENT_H
+#define IVSHMEM_CLIENT_H
+
+/**
+ * This file provides helper to implement an ivshmem client. It is used
+ * on the host to ask QEMU to send an interrupt to an ivshmem PCI device in a
+ * guest. QEMU also implements an ivshmem client similar to this one, they both
+ * connect to an ivshmem server.
+ *
+ * A standalone ivshmem client based on this file is provided for debug/test
+ * purposes.
+ */
+
+#include <sys/select.h>
+
+#include "qemu/queue.h"
+#include "hw/misc/ivshmem.h"
+
+/**
+ * Maximum number of notification vectors supported by the client
+ */
+#define IVSHMEM_CLIENT_MAX_VECTORS 64
+
+/**
+ * Structure storing a peer
+ *
+ * Each time a client connects to an ivshmem server, it is advertised to
+ * all connected clients through the unix socket. When our ivshmem
+ * client receives a notification, it creates a IvshmemClientPeer
+ * structure to store the infos of this peer.
+ *
+ * This structure is also used to store the information of our own
+ * client in (IvshmemClient)->local.
+ */
+typedef struct IvshmemClientPeer {
+ QTAILQ_ENTRY(IvshmemClientPeer) next; /**< next in list*/
+ int64_t id; /**< the id of the peer */
+ int vectors[IVSHMEM_CLIENT_MAX_VECTORS]; /**< one fd per vector */
+ unsigned vectors_count; /**< number of vectors */
+} IvshmemClientPeer;
+
+typedef struct IvshmemClient IvshmemClient;
+
+/**
+ * Typedef of callback function used when our IvshmemClient receives a
+ * notification from a peer.
+ */
+typedef void (*IvshmemClientNotifCb)(
+ const IvshmemClient *client,
+ const IvshmemClientPeer *peer,
+ unsigned vect, void *arg);
+
+/**
+ * Structure describing an ivshmem client
+ *
+ * This structure stores all information related to our client: the name
+ * of the server unix socket, the list of peers advertised by the
+ * server, our own client information, and a pointer the notification
+ * callback function used when we receive a notification from a peer.
+ */
+struct IvshmemClient {
+ char unix_sock_path[PATH_MAX]; /**< path to unix sock */
+ int sock_fd; /**< unix sock filedesc */
+ int shm_fd; /**< shm file descriptor */
+
+ QTAILQ_HEAD(, IvshmemClientPeer) peer_list; /**< list of peers */
+ IvshmemClientPeer local; /**< our own infos */
+
+ IvshmemClientNotifCb notif_cb; /**< notification callback */
+ void *notif_arg; /**< notification argument */
+
+ bool verbose; /**< true to enable debug */
+};
+
+/**
+ * Initialize an ivshmem client
+ *
+ * @client: A pointer to an uninitialized IvshmemClient structure
+ * @unix_sock_path: The pointer to the unix socket file name
+ * @notif_cb: If not NULL, the pointer to the function to be called when
+ * our IvshmemClient receives a notification from a peer
+ * @notif_arg: Opaque pointer given as-is to the notification callback
+ * function
+ * @verbose: True to enable debug
+ *
+ * Returns: 0 on success, or a negative value on error
+ */
+int ivshmem_client_init(IvshmemClient *client, const char *unix_sock_path,
+ IvshmemClientNotifCb notif_cb, void *notif_arg,
+ bool verbose);
+
+/**
+ * Connect to the server
+ *
+ * Connect to the server unix socket, and read the first initial
+ * messages sent by the server, giving the ID of the client and the file
+ * descriptor of the shared memory.
+ *
+ * @client: The ivshmem client
+ *
+ * Returns: 0 on success, or a negative value on error
+ */
+int ivshmem_client_connect(IvshmemClient *client);
+
+/**
+ * Close connection to the server and free all peer structures
+ *
+ * @client: The ivshmem client
+ */
+void ivshmem_client_close(IvshmemClient *client);
+
+/**
+ * Fill a fd_set with file descriptors to be monitored
+ *
+ * This function will fill a fd_set with all file descriptors
+ * that must be polled (unix server socket and peers eventfd). The
+ * function will not initialize the fd_set, it is up to the caller
+ * to do this.
+ *
+ * @client: The ivshmem client
+ * @fds: The fd_set to be updated
+ * @maxfd: Must be set to the max file descriptor + 1 in fd_set. This value is
+ * updated if this function adds a greater fd in fd_set.
+ */
+void ivshmem_client_get_fds(const IvshmemClient *client, fd_set *fds,
+ int *maxfd);
+
+/**
+ * Read and handle new messages
+ *
+ * Given a fd_set filled by select(), handle incoming messages from
+ * server or peers.
+ *
+ * @client: The ivshmem client
+ * @fds: The fd_set containing the file descriptors to be checked. Note
+ * that file descriptors that are not related to our client are
+ * ignored.
+ * @maxfd: The maximum fd in fd_set, plus one.
+ *
+ * Returns: 0 on success, or a negative value on error
+ */
+int ivshmem_client_handle_fds(IvshmemClient *client, fd_set *fds, int maxfd);
+
+/**
+ * Send a notification to a vector of a peer
+ *
+ * @client: The ivshmem client
+ * @peer: The peer to be notified
+ * @vector: The number of the vector
+ *
+ * Returns: 0 on success, or a negative value on error
+ */
+int ivshmem_client_notify(const IvshmemClient *client,
+ const IvshmemClientPeer *peer, unsigned vector);
+
+/**
+ * Send a notification to all vectors of a peer
+ *
+ * @client: The ivshmem client
+ * @peer: The peer to be notified
+ *
+ * Returns: 0 on success, or a negative value on error (at least one
+ * notification failed)
+ */
+int ivshmem_client_notify_all_vects(const IvshmemClient *client,
+ const IvshmemClientPeer *peer);
+
+/**
+ * Broadcast a notification to all vectors of all peers
+ *
+ * @client: The ivshmem client
+ *
+ * Returns: 0 on success, or a negative value on error (at least one
+ * notification failed)
+ */
+int ivshmem_client_notify_broadcast(const IvshmemClient *client);
+
+/**
+ * Search a peer from its identifier
+ *
+ * Return the peer structure from its peer_id. If the given peer_id is
+ * the local id, the function returns the local peer structure.
+ *
+ * @client: The ivshmem client
+ * @peer_id: The identifier of the peer structure
+ *
+ * Returns: The peer structure, or NULL if not found
+ */
+IvshmemClientPeer *
+ivshmem_client_search_peer(IvshmemClient *client, int64_t peer_id);
+
+/**
+ * Dump information of this ivshmem client on stdout
+ *
+ * Dump the id and the vectors of the given ivshmem client and the list
+ * of its peers and their vectors on stdout.
+ *
+ * @client: The ivshmem client
+ */
+void ivshmem_client_dump(const IvshmemClient *client);
+
+#endif /* IVSHMEM_CLIENT_H */
diff --git a/contrib/ivshmem-client/main.c b/contrib/ivshmem-client/main.c
new file mode 100644
index 000000000..21f38f3fe
--- /dev/null
+++ b/contrib/ivshmem-client/main.c
@@ -0,0 +1,240 @@
+/*
+ * Copyright 6WIND S.A., 2014
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version. See the COPYING file in the
+ * top-level directory.
+ */
+
+#include "qemu/osdep.h"
+
+#include "ivshmem-client.h"
+
+#define IVSHMEM_CLIENT_DEFAULT_VERBOSE 0
+#define IVSHMEM_CLIENT_DEFAULT_UNIX_SOCK_PATH "/tmp/ivshmem_socket"
+
+typedef struct IvshmemClientArgs {
+ bool verbose;
+ const char *unix_sock_path;
+} IvshmemClientArgs;
+
+/* show ivshmem_client_usage and exit with given error code */
+static void
+ivshmem_client_usage(const char *name, int code)
+{
+ fprintf(stderr, "%s [opts]\n", name);
+ fprintf(stderr, " -h: show this help\n");
+ fprintf(stderr, " -v: verbose mode\n");
+ fprintf(stderr, " -S <unix_sock_path>: path to the unix socket\n"
+ " to connect to.\n"
+ " default=%s\n", IVSHMEM_CLIENT_DEFAULT_UNIX_SOCK_PATH);
+ exit(code);
+}
+
+/* parse the program arguments, exit on error */
+static void
+ivshmem_client_parse_args(IvshmemClientArgs *args, int argc, char *argv[])
+{
+ int c;
+
+ while ((c = getopt(argc, argv,
+ "h" /* help */
+ "v" /* verbose */
+ "S:" /* unix_sock_path */
+ )) != -1) {
+
+ switch (c) {
+ case 'h': /* help */
+ ivshmem_client_usage(argv[0], 0);
+ break;
+
+ case 'v': /* verbose */
+ args->verbose = 1;
+ break;
+
+ case 'S': /* unix_sock_path */
+ args->unix_sock_path = optarg;
+ break;
+
+ default:
+ ivshmem_client_usage(argv[0], 1);
+ break;
+ }
+ }
+}
+
+/* show command line help */
+static void
+ivshmem_client_cmdline_help(void)
+{
+ printf("dump: dump peers (including us)\n"
+ "int <peer> <vector>: notify one vector on a peer\n"
+ "int <peer> all: notify all vectors of a peer\n"
+ "int all: notify all vectors of all peers (excepting us)\n");
+}
+
+/* read stdin and handle commands */
+static int
+ivshmem_client_handle_stdin_command(IvshmemClient *client)
+{
+ IvshmemClientPeer *peer;
+ char buf[128];
+ char *s, *token;
+ int ret;
+ int peer_id, vector;
+
+ memset(buf, 0, sizeof(buf));
+ ret = read(0, buf, sizeof(buf) - 1);
+ if (ret < 0) {
+ return -1;
+ }
+
+ s = buf;
+ while ((token = strsep(&s, "\n\r;")) != NULL) {
+ if (!strcmp(token, "")) {
+ continue;
+ }
+ if (!strcmp(token, "?")) {
+ ivshmem_client_cmdline_help();
+ }
+ if (!strcmp(token, "help")) {
+ ivshmem_client_cmdline_help();
+ } else if (!strcmp(token, "dump")) {
+ ivshmem_client_dump(client);
+ } else if (!strcmp(token, "int all")) {
+ ivshmem_client_notify_broadcast(client);
+ } else if (sscanf(token, "int %d %d", &peer_id, &vector) == 2) {
+ peer = ivshmem_client_search_peer(client, peer_id);
+ if (peer == NULL) {
+ printf("cannot find peer_id = %d\n", peer_id);
+ continue;
+ }
+ ivshmem_client_notify(client, peer, vector);
+ } else if (sscanf(token, "int %d all", &peer_id) == 1) {
+ peer = ivshmem_client_search_peer(client, peer_id);
+ if (peer == NULL) {
+ printf("cannot find peer_id = %d\n", peer_id);
+ continue;
+ }
+ ivshmem_client_notify_all_vects(client, peer);
+ } else {
+ printf("invalid command, type help\n");
+ }
+ }
+
+ printf("cmd> ");
+ fflush(stdout);
+ return 0;
+}
+
+/* listen on stdin (command line), on unix socket (notifications of new
+ * and dead peers), and on eventfd (IRQ request) */
+static int
+ivshmem_client_poll_events(IvshmemClient *client)
+{
+ fd_set fds;
+ int ret, maxfd;
+
+ while (1) {
+
+ FD_ZERO(&fds);
+ FD_SET(0, &fds); /* add stdin in fd_set */
+ maxfd = 1;
+
+ ivshmem_client_get_fds(client, &fds, &maxfd);
+
+ ret = select(maxfd, &fds, NULL, NULL, NULL);
+ if (ret < 0) {
+ if (errno == EINTR) {
+ continue;
+ }
+
+ fprintf(stderr, "select error: %s\n", strerror(errno));
+ break;
+ }
+ if (ret == 0) {
+ continue;
+ }
+
+ if (FD_ISSET(0, &fds) &&
+ ivshmem_client_handle_stdin_command(client) < 0 && errno != EINTR) {
+ fprintf(stderr, "ivshmem_client_handle_stdin_command() failed\n");
+ break;
+ }
+
+ if (ivshmem_client_handle_fds(client, &fds, maxfd) < 0) {
+ fprintf(stderr, "ivshmem_client_handle_fds() failed\n");
+ break;
+ }
+ }
+
+ return ret;
+}
+
+/* callback when we receive a notification (just display it) */
+static void
+ivshmem_client_notification_cb(const IvshmemClient *client,
+ const IvshmemClientPeer *peer,
+ unsigned vect, void *arg)
+{
+ (void)client;
+ (void)arg;
+ printf("receive notification from peer_id=%" PRId64 " vector=%u\n",
+ peer->id, vect);
+}
+
+int
+main(int argc, char *argv[])
+{
+ struct sigaction sa;
+ IvshmemClient client;
+ IvshmemClientArgs args = {
+ .verbose = IVSHMEM_CLIENT_DEFAULT_VERBOSE,
+ .unix_sock_path = IVSHMEM_CLIENT_DEFAULT_UNIX_SOCK_PATH,
+ };
+
+ /* parse arguments, will exit on error */
+ ivshmem_client_parse_args(&args, argc, argv);
+
+ /* Ignore SIGPIPE, see this link for more info:
+ * http://www.mail-archive.com/libevent-users@monkey.org/msg01606.html */
+ sa.sa_handler = SIG_IGN;
+ sa.sa_flags = 0;
+ if (sigemptyset(&sa.sa_mask) == -1 ||
+ sigaction(SIGPIPE, &sa, 0) == -1) {
+ perror("failed to ignore SIGPIPE; sigaction");
+ return 1;
+ }
+
+ ivshmem_client_cmdline_help();
+ printf("cmd> ");
+ fflush(stdout);
+
+ if (ivshmem_client_init(&client, args.unix_sock_path,
+ ivshmem_client_notification_cb, NULL,
+ args.verbose) < 0) {
+ fprintf(stderr, "cannot init client\n");
+ return 1;
+ }
+
+ while (1) {
+ if (ivshmem_client_connect(&client) < 0) {
+ fprintf(stderr, "cannot connect to server, retry in 1 second\n");
+ sleep(1);
+ continue;
+ }
+
+ fprintf(stdout, "listen on server socket %d\n", client.sock_fd);
+
+ if (ivshmem_client_poll_events(&client) == 0) {
+ continue;
+ }
+
+ /* disconnected from server, reset all peers */
+ fprintf(stdout, "disconnected from server\n");
+
+ ivshmem_client_close(&client);
+ }
+
+ return 0;
+}
diff --git a/contrib/ivshmem-client/meson.build b/contrib/ivshmem-client/meson.build
new file mode 100644
index 000000000..1b171efb4
--- /dev/null
+++ b/contrib/ivshmem-client/meson.build
@@ -0,0 +1,4 @@
+executable('ivshmem-client', files('ivshmem-client.c', 'main.c'),
+ dependencies: glib,
+ build_by_default: targetos == 'linux',
+ install: false)
diff --git a/contrib/ivshmem-server/ivshmem-server.c b/contrib/ivshmem-server/ivshmem-server.c
new file mode 100644
index 000000000..39a6ffdb5
--- /dev/null
+++ b/contrib/ivshmem-server/ivshmem-server.c
@@ -0,0 +1,462 @@
+/*
+ * Copyright 6WIND S.A., 2014
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version. See the COPYING file in the
+ * top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qemu/host-utils.h"
+#include "qemu/sockets.h"
+
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include "ivshmem-server.h"
+
+/* log a message on stdout if verbose=1 */
+#define IVSHMEM_SERVER_DEBUG(server, fmt, ...) do { \
+ if ((server)->verbose) { \
+ printf(fmt, ## __VA_ARGS__); \
+ } \
+ } while (0)
+
+/** maximum size of a huge page, used by ivshmem_server_ftruncate() */
+#define IVSHMEM_SERVER_MAX_HUGEPAGE_SIZE (1024 * 1024 * 1024)
+
+/** default listen backlog (number of sockets not accepted) */
+#define IVSHMEM_SERVER_LISTEN_BACKLOG 10
+
+/* send message to a client unix socket */
+static int
+ivshmem_server_send_one_msg(int sock_fd, int64_t peer_id, int fd)
+{
+ int ret;
+ struct msghdr msg;
+ struct iovec iov[1];
+ union {
+ struct cmsghdr cmsg;
+ char control[CMSG_SPACE(sizeof(int))];
+ } msg_control;
+ struct cmsghdr *cmsg;
+
+ peer_id = GINT64_TO_LE(peer_id);
+ iov[0].iov_base = &peer_id;
+ iov[0].iov_len = sizeof(peer_id);
+
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_iov = iov;
+ msg.msg_iovlen = 1;
+
+ /* if fd is specified, add it in a cmsg */
+ if (fd >= 0) {
+ memset(&msg_control, 0, sizeof(msg_control));
+ msg.msg_control = &msg_control;
+ msg.msg_controllen = sizeof(msg_control);
+ cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+ memcpy(CMSG_DATA(cmsg), &fd, sizeof(fd));
+ }
+
+ ret = sendmsg(sock_fd, &msg, 0);
+ if (ret <= 0) {
+ return -1;
+ }
+
+ return 0;
+}
+
+/* free a peer when the server advertises a disconnection or when the
+ * server is freed */
+static void
+ivshmem_server_free_peer(IvshmemServer *server, IvshmemServerPeer *peer)
+{
+ unsigned vector;
+ IvshmemServerPeer *other_peer;
+
+ IVSHMEM_SERVER_DEBUG(server, "free peer %" PRId64 "\n", peer->id);
+ close(peer->sock_fd);
+ QTAILQ_REMOVE(&server->peer_list, peer, next);
+
+ /* advertise the deletion to other peers */
+ QTAILQ_FOREACH(other_peer, &server->peer_list, next) {
+ ivshmem_server_send_one_msg(other_peer->sock_fd, peer->id, -1);
+ }
+
+ for (vector = 0; vector < peer->vectors_count; vector++) {
+ event_notifier_cleanup(&peer->vectors[vector]);
+ }
+
+ g_free(peer);
+}
+
+/* send the peer id and the shm_fd just after a new client connection */
+static int
+ivshmem_server_send_initial_info(IvshmemServer *server, IvshmemServerPeer *peer)
+{
+ int ret;
+
+ /* send our protocol version first */
+ ret = ivshmem_server_send_one_msg(peer->sock_fd, IVSHMEM_PROTOCOL_VERSION,
+ -1);
+ if (ret < 0) {
+ IVSHMEM_SERVER_DEBUG(server, "cannot send version: %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ /* send the peer id to the client */
+ ret = ivshmem_server_send_one_msg(peer->sock_fd, peer->id, -1);
+ if (ret < 0) {
+ IVSHMEM_SERVER_DEBUG(server, "cannot send peer id: %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ /* send the shm_fd */
+ ret = ivshmem_server_send_one_msg(peer->sock_fd, -1, server->shm_fd);
+ if (ret < 0) {
+ IVSHMEM_SERVER_DEBUG(server, "cannot send shm fd: %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ return 0;
+}
+
+/* handle message on listening unix socket (new client connection) */
+static int
+ivshmem_server_handle_new_conn(IvshmemServer *server)
+{
+ IvshmemServerPeer *peer, *other_peer;
+ struct sockaddr_un unaddr;
+ socklen_t unaddr_len;
+ int newfd;
+ unsigned i;
+
+ /* accept the incoming connection */
+ unaddr_len = sizeof(unaddr);
+ newfd = qemu_accept(server->sock_fd,
+ (struct sockaddr *)&unaddr, &unaddr_len);
+
+ if (newfd < 0) {
+ IVSHMEM_SERVER_DEBUG(server, "cannot accept() %s\n", strerror(errno));
+ return -1;
+ }
+
+ qemu_set_nonblock(newfd);
+ IVSHMEM_SERVER_DEBUG(server, "accept()=%d\n", newfd);
+
+ /* allocate new structure for this peer */
+ peer = g_malloc0(sizeof(*peer));
+ peer->sock_fd = newfd;
+
+ /* get an unused peer id */
+ /* XXX: this could use id allocation such as Linux IDA, or simply
+ * a free-list */
+ for (i = 0; i < G_MAXUINT16; i++) {
+ if (ivshmem_server_search_peer(server, server->cur_id) == NULL) {
+ break;
+ }
+ server->cur_id++;
+ }
+ if (i == G_MAXUINT16) {
+ IVSHMEM_SERVER_DEBUG(server, "cannot allocate new client id\n");
+ close(newfd);
+ g_free(peer);
+ return -1;
+ }
+ peer->id = server->cur_id++;
+
+ /* create eventfd, one per vector */
+ peer->vectors_count = server->n_vectors;
+ for (i = 0; i < peer->vectors_count; i++) {
+ if (event_notifier_init(&peer->vectors[i], FALSE) < 0) {
+ IVSHMEM_SERVER_DEBUG(server, "cannot create eventfd\n");
+ goto fail;
+ }
+ }
+
+ /* send peer id and shm fd */
+ if (ivshmem_server_send_initial_info(server, peer) < 0) {
+ IVSHMEM_SERVER_DEBUG(server, "cannot send initial info\n");
+ goto fail;
+ }
+
+ /* advertise the new peer to others */
+ QTAILQ_FOREACH(other_peer, &server->peer_list, next) {
+ for (i = 0; i < peer->vectors_count; i++) {
+ ivshmem_server_send_one_msg(other_peer->sock_fd, peer->id,
+ peer->vectors[i].wfd);
+ }
+ }
+
+ /* advertise the other peers to the new one */
+ QTAILQ_FOREACH(other_peer, &server->peer_list, next) {
+ for (i = 0; i < peer->vectors_count; i++) {
+ ivshmem_server_send_one_msg(peer->sock_fd, other_peer->id,
+ other_peer->vectors[i].wfd);
+ }
+ }
+
+ /* advertise the new peer to itself */
+ for (i = 0; i < peer->vectors_count; i++) {
+ ivshmem_server_send_one_msg(peer->sock_fd, peer->id,
+ event_notifier_get_fd(&peer->vectors[i]));
+ }
+
+ QTAILQ_INSERT_TAIL(&server->peer_list, peer, next);
+ IVSHMEM_SERVER_DEBUG(server, "new peer id = %" PRId64 "\n",
+ peer->id);
+ return 0;
+
+fail:
+ while (i--) {
+ event_notifier_cleanup(&peer->vectors[i]);
+ }
+ close(newfd);
+ g_free(peer);
+ return -1;
+}
+
+/* Try to ftruncate a file to next power of 2 of shmsize.
+ * If it fails; all power of 2 above shmsize are tested until
+ * we reach the maximum huge page size. This is useful
+ * if the shm file is in a hugetlbfs that cannot be truncated to the
+ * shm_size value. */
+static int
+ivshmem_server_ftruncate(int fd, unsigned shmsize)
+{
+ int ret;
+ struct stat mapstat;
+
+ /* align shmsize to next power of 2 */
+ shmsize = pow2ceil(shmsize);
+
+ if (fstat(fd, &mapstat) != -1 && mapstat.st_size == shmsize) {
+ return 0;
+ }
+
+ while (shmsize <= IVSHMEM_SERVER_MAX_HUGEPAGE_SIZE) {
+ ret = ftruncate(fd, shmsize);
+ if (ret == 0) {
+ return ret;
+ }
+ shmsize *= 2;
+ }
+
+ return -1;
+}
+
+/* Init a new ivshmem server */
+int
+ivshmem_server_init(IvshmemServer *server, const char *unix_sock_path,
+ const char *shm_path, bool use_shm_open,
+ size_t shm_size, unsigned n_vectors,
+ bool verbose)
+{
+ int ret;
+
+ memset(server, 0, sizeof(*server));
+ server->verbose = verbose;
+
+ ret = snprintf(server->unix_sock_path, sizeof(server->unix_sock_path),
+ "%s", unix_sock_path);
+ if (ret < 0 || ret >= sizeof(server->unix_sock_path)) {
+ IVSHMEM_SERVER_DEBUG(server, "could not copy unix socket path\n");
+ return -1;
+ }
+ ret = snprintf(server->shm_path, sizeof(server->shm_path),
+ "%s", shm_path);
+ if (ret < 0 || ret >= sizeof(server->shm_path)) {
+ IVSHMEM_SERVER_DEBUG(server, "could not copy shm path\n");
+ return -1;
+ }
+
+ server->use_shm_open = use_shm_open;
+ server->shm_size = shm_size;
+ server->n_vectors = n_vectors;
+
+ QTAILQ_INIT(&server->peer_list);
+
+ return 0;
+}
+
+/* open shm, create and bind to the unix socket */
+int
+ivshmem_server_start(IvshmemServer *server)
+{
+ struct sockaddr_un s_un;
+ int shm_fd, sock_fd, ret;
+
+ /* open shm file */
+ if (server->use_shm_open) {
+ IVSHMEM_SERVER_DEBUG(server, "Using POSIX shared memory: %s\n",
+ server->shm_path);
+ shm_fd = shm_open(server->shm_path, O_CREAT | O_RDWR, S_IRWXU);
+ } else {
+ gchar *filename = g_strdup_printf("%s/ivshmem.XXXXXX", server->shm_path);
+ IVSHMEM_SERVER_DEBUG(server, "Using file-backed shared memory: %s\n",
+ server->shm_path);
+ shm_fd = mkstemp(filename);
+ unlink(filename);
+ g_free(filename);
+ }
+
+ if (shm_fd < 0) {
+ fprintf(stderr, "cannot open shm file %s: %s\n", server->shm_path,
+ strerror(errno));
+ return -1;
+ }
+ if (ivshmem_server_ftruncate(shm_fd, server->shm_size) < 0) {
+ fprintf(stderr, "ftruncate(%s) failed: %s\n", server->shm_path,
+ strerror(errno));
+ goto err_close_shm;
+ }
+
+ IVSHMEM_SERVER_DEBUG(server, "create & bind socket %s\n",
+ server->unix_sock_path);
+
+ /* create the unix listening socket */
+ sock_fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (sock_fd < 0) {
+ IVSHMEM_SERVER_DEBUG(server, "cannot create socket: %s\n",
+ strerror(errno));
+ goto err_close_shm;
+ }
+
+ s_un.sun_family = AF_UNIX;
+ ret = snprintf(s_un.sun_path, sizeof(s_un.sun_path), "%s",
+ server->unix_sock_path);
+ if (ret < 0 || ret >= sizeof(s_un.sun_path)) {
+ IVSHMEM_SERVER_DEBUG(server, "could not copy unix socket path\n");
+ goto err_close_sock;
+ }
+ if (bind(sock_fd, (struct sockaddr *)&s_un, sizeof(s_un)) < 0) {
+ IVSHMEM_SERVER_DEBUG(server, "cannot connect to %s: %s\n", s_un.sun_path,
+ strerror(errno));
+ goto err_close_sock;
+ }
+
+ if (listen(sock_fd, IVSHMEM_SERVER_LISTEN_BACKLOG) < 0) {
+ IVSHMEM_SERVER_DEBUG(server, "listen() failed: %s\n", strerror(errno));
+ goto err_close_sock;
+ }
+
+ server->sock_fd = sock_fd;
+ server->shm_fd = shm_fd;
+
+ return 0;
+
+err_close_sock:
+ close(sock_fd);
+err_close_shm:
+ if (server->use_shm_open) {
+ shm_unlink(server->shm_path);
+ }
+ close(shm_fd);
+ return -1;
+}
+
+/* close connections to clients, the unix socket and the shm fd */
+void
+ivshmem_server_close(IvshmemServer *server)
+{
+ IvshmemServerPeer *peer, *npeer;
+
+ IVSHMEM_SERVER_DEBUG(server, "close server\n");
+
+ QTAILQ_FOREACH_SAFE(peer, &server->peer_list, next, npeer) {
+ ivshmem_server_free_peer(server, peer);
+ }
+
+ unlink(server->unix_sock_path);
+ if (server->use_shm_open) {
+ shm_unlink(server->shm_path);
+ }
+ close(server->sock_fd);
+ close(server->shm_fd);
+ server->sock_fd = -1;
+ server->shm_fd = -1;
+}
+
+/* get the fd_set according to the unix socket and the peer list */
+void
+ivshmem_server_get_fds(const IvshmemServer *server, fd_set *fds, int *maxfd)
+{
+ IvshmemServerPeer *peer;
+
+ if (server->sock_fd == -1) {
+ return;
+ }
+
+ FD_SET(server->sock_fd, fds);
+ if (server->sock_fd >= *maxfd) {
+ *maxfd = server->sock_fd + 1;
+ }
+
+ QTAILQ_FOREACH(peer, &server->peer_list, next) {
+ FD_SET(peer->sock_fd, fds);
+ if (peer->sock_fd >= *maxfd) {
+ *maxfd = peer->sock_fd + 1;
+ }
+ }
+}
+
+/* process incoming messages on the sockets in fd_set */
+int
+ivshmem_server_handle_fds(IvshmemServer *server, fd_set *fds, int maxfd)
+{
+ IvshmemServerPeer *peer, *peer_next;
+
+ if (server->sock_fd < maxfd && FD_ISSET(server->sock_fd, fds) &&
+ ivshmem_server_handle_new_conn(server) < 0 && errno != EINTR) {
+ IVSHMEM_SERVER_DEBUG(server, "ivshmem_server_handle_new_conn() "
+ "failed\n");
+ return -1;
+ }
+
+ QTAILQ_FOREACH_SAFE(peer, &server->peer_list, next, peer_next) {
+ /* any message from a peer socket result in a close() */
+ IVSHMEM_SERVER_DEBUG(server, "peer->sock_fd=%d\n", peer->sock_fd);
+ if (peer->sock_fd < maxfd && FD_ISSET(peer->sock_fd, fds)) {
+ ivshmem_server_free_peer(server, peer);
+ }
+ }
+
+ return 0;
+}
+
+/* lookup peer from its id */
+IvshmemServerPeer *
+ivshmem_server_search_peer(IvshmemServer *server, int64_t peer_id)
+{
+ IvshmemServerPeer *peer;
+
+ QTAILQ_FOREACH(peer, &server->peer_list, next) {
+ if (peer->id == peer_id) {
+ return peer;
+ }
+ }
+ return NULL;
+}
+
+/* dump our info, the list of peers their vectors on stdout */
+void
+ivshmem_server_dump(const IvshmemServer *server)
+{
+ const IvshmemServerPeer *peer;
+ unsigned vector;
+
+ /* dump peers */
+ QTAILQ_FOREACH(peer, &server->peer_list, next) {
+ printf("peer_id = %" PRId64 "\n", peer->id);
+
+ for (vector = 0; vector < peer->vectors_count; vector++) {
+ printf(" vector %d is enabled (fd=%d)\n", vector,
+ event_notifier_get_fd(&peer->vectors[vector]));
+ }
+ }
+}
diff --git a/contrib/ivshmem-server/ivshmem-server.h b/contrib/ivshmem-server/ivshmem-server.h
new file mode 100644
index 000000000..d870adb6a
--- /dev/null
+++ b/contrib/ivshmem-server/ivshmem-server.h
@@ -0,0 +1,163 @@
+/*
+ * Copyright 6WIND S.A., 2014
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version. See the COPYING file in the
+ * top-level directory.
+ */
+
+#ifndef IVSHMEM_SERVER_H
+#define IVSHMEM_SERVER_H
+
+/**
+ * The ivshmem server is a daemon that creates a unix socket in listen
+ * mode. The ivshmem clients (qemu or ivshmem-client) connect to this
+ * unix socket. For each client, the server will create some eventfd
+ * (see EVENTFD(2)), one per vector. These fd are transmitted to all
+ * clients using the SCM_RIGHTS cmsg message. Therefore, each client is
+ * able to send a notification to another client without being
+ * "profixied" by the server.
+ *
+ * We use this mechanism to send interruptions between guests.
+ * qemu is able to transform an event on a eventfd into a PCI MSI-x
+ * interruption in the guest.
+ *
+ * The ivshmem server is also able to share the file descriptor
+ * associated to the ivshmem shared memory.
+ */
+
+#include <sys/select.h>
+
+#include "qemu/event_notifier.h"
+#include "qemu/queue.h"
+#include "hw/misc/ivshmem.h"
+
+/**
+ * Maximum number of notification vectors supported by the server
+ */
+#define IVSHMEM_SERVER_MAX_VECTORS 64
+
+/**
+ * Structure storing a peer
+ *
+ * Each time a client connects to an ivshmem server, a new
+ * IvshmemServerPeer structure is created. This peer and all its
+ * vectors are advertised to all connected clients through the connected
+ * unix sockets.
+ */
+typedef struct IvshmemServerPeer {
+ QTAILQ_ENTRY(IvshmemServerPeer) next; /**< next in list*/
+ int sock_fd; /**< connected unix sock */
+ int64_t id; /**< the id of the peer */
+ EventNotifier vectors[IVSHMEM_SERVER_MAX_VECTORS]; /**< one per vector */
+ unsigned vectors_count; /**< number of vectors */
+} IvshmemServerPeer;
+
+/**
+ * Structure describing an ivshmem server
+ *
+ * This structure stores all information related to our server: the name
+ * of the server unix socket and the list of connected peers.
+ */
+typedef struct IvshmemServer {
+ char unix_sock_path[PATH_MAX]; /**< path to unix socket */
+ int sock_fd; /**< unix sock file descriptor */
+ char shm_path[PATH_MAX]; /**< path to shm */
+ bool use_shm_open;
+ size_t shm_size; /**< size of shm */
+ int shm_fd; /**< shm file descriptor */
+ unsigned n_vectors; /**< number of vectors */
+ uint16_t cur_id; /**< id to be given to next client */
+ bool verbose; /**< true in verbose mode */
+ QTAILQ_HEAD(, IvshmemServerPeer) peer_list; /**< list of peers */
+} IvshmemServer;
+
+/**
+ * Initialize an ivshmem server
+ *
+ * @server: A pointer to an uninitialized IvshmemServer structure
+ * @unix_sock_path: The pointer to the unix socket file name
+ * @shm_path: Path to the shared memory. The path corresponds to a POSIX
+ * shm name or a hugetlbfs mount point.
+ * @shm_size: Size of shared memory
+ * @n_vectors: Number of interrupt vectors per client
+ * @verbose: True to enable verbose mode
+ *
+ * Returns: 0 on success, or a negative value on error
+ */
+int
+ivshmem_server_init(IvshmemServer *server, const char *unix_sock_path,
+ const char *shm_path, bool use_shm_open,
+ size_t shm_size, unsigned n_vectors,
+ bool verbose);
+
+/**
+ * Open the shm, then create and bind to the unix socket
+ *
+ * @server: The pointer to the initialized IvshmemServer structure
+ *
+ * Returns: 0 on success, or a negative value on error
+ */
+int ivshmem_server_start(IvshmemServer *server);
+
+/**
+ * Close the server
+ *
+ * Close connections to all clients, close the unix socket and the
+ * shared memory file descriptor. The structure remains initialized, so
+ * it is possible to call ivshmem_server_start() again after a call to
+ * ivshmem_server_close().
+ *
+ * @server: The ivshmem server
+ */
+void ivshmem_server_close(IvshmemServer *server);
+
+/**
+ * Fill a fd_set with file descriptors to be monitored
+ *
+ * This function will fill a fd_set with all file descriptors that must
+ * be polled (unix server socket and peers unix socket). The function
+ * will not initialize the fd_set, it is up to the caller to do it.
+ *
+ * @server: The ivshmem server
+ * @fds: The fd_set to be updated
+ * @maxfd: Must be set to the max file descriptor + 1 in fd_set. This value is
+ * updated if this function adds a greater fd in fd_set.
+ */
+void
+ivshmem_server_get_fds(const IvshmemServer *server, fd_set *fds, int *maxfd);
+
+/**
+ * Read and handle new messages
+ *
+ * Given a fd_set (for instance filled by a call to select()), handle
+ * incoming messages from peers.
+ *
+ * @server: The ivshmem server
+ * @fds: The fd_set containing the file descriptors to be checked. Note that
+ * file descriptors that are not related to our server are ignored.
+ * @maxfd: The maximum fd in fd_set, plus one.
+ *
+ * Returns: 0 on success, or a negative value on error
+ */
+int ivshmem_server_handle_fds(IvshmemServer *server, fd_set *fds, int maxfd);
+
+/**
+ * Search a peer from its identifier
+ *
+ * @server: The ivshmem server
+ * @peer_id: The identifier of the peer structure
+ *
+ * Returns: The peer structure, or NULL if not found
+ */
+IvshmemServerPeer *
+ivshmem_server_search_peer(IvshmemServer *server, int64_t peer_id);
+
+/**
+ * Dump information of this ivshmem server and its peers on stdout
+ *
+ * @server: The ivshmem server
+ */
+void ivshmem_server_dump(const IvshmemServer *server);
+
+#endif /* IVSHMEM_SERVER_H */
diff --git a/contrib/ivshmem-server/main.c b/contrib/ivshmem-server/main.c
new file mode 100644
index 000000000..224dbeb54
--- /dev/null
+++ b/contrib/ivshmem-server/main.c
@@ -0,0 +1,274 @@
+/*
+ * Copyright 6WIND S.A., 2014
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version. See the COPYING file in the
+ * top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/cutils.h"
+#include "qemu/option.h"
+#include "ivshmem-server.h"
+
+#define IVSHMEM_SERVER_DEFAULT_VERBOSE 0
+#define IVSHMEM_SERVER_DEFAULT_FOREGROUND 0
+#define IVSHMEM_SERVER_DEFAULT_PID_FILE "/var/run/ivshmem-server.pid"
+#define IVSHMEM_SERVER_DEFAULT_UNIX_SOCK_PATH "/tmp/ivshmem_socket"
+#define IVSHMEM_SERVER_DEFAULT_SHM_PATH "ivshmem"
+#define IVSHMEM_SERVER_DEFAULT_SHM_SIZE (4 * 1024 * 1024)
+#define IVSHMEM_SERVER_DEFAULT_N_VECTORS 1
+
+/* used to quit on signal SIGTERM */
+static int ivshmem_server_quit;
+
+/* arguments given by the user */
+typedef struct IvshmemServerArgs {
+ bool verbose;
+ bool foreground;
+ const char *pid_file;
+ const char *unix_socket_path;
+ const char *shm_path;
+ bool use_shm_open;
+ uint64_t shm_size;
+ unsigned n_vectors;
+} IvshmemServerArgs;
+
+static void
+ivshmem_server_usage(const char *progname)
+{
+ printf("Usage: %s [OPTION]...\n"
+ " -h: show this help\n"
+ " -v: verbose mode\n"
+ " -F: foreground mode (default is to daemonize)\n"
+ " -p <pid-file>: path to the PID file (used in daemon mode only)\n"
+ " default " IVSHMEM_SERVER_DEFAULT_PID_FILE "\n"
+ " -S <unix-socket-path>: path to the unix socket to listen to\n"
+ " default " IVSHMEM_SERVER_DEFAULT_UNIX_SOCK_PATH "\n"
+ " -M <shm-name>: POSIX shared memory object to use\n"
+ " default " IVSHMEM_SERVER_DEFAULT_SHM_PATH "\n"
+ " -m <dir-name>: where to create shared memory\n"
+ " -l <size>: size of shared memory in bytes\n"
+ " suffixes K, M and G can be used, e.g. 1K means 1024\n"
+ " default %u\n"
+ " -n <nvectors>: number of vectors\n"
+ " default %u\n",
+ progname, IVSHMEM_SERVER_DEFAULT_SHM_SIZE,
+ IVSHMEM_SERVER_DEFAULT_N_VECTORS);
+}
+
+static void
+ivshmem_server_help(const char *progname)
+{
+ fprintf(stderr, "Try '%s -h' for more information.\n", progname);
+}
+
+/* parse the program arguments, exit on error */
+static void
+ivshmem_server_parse_args(IvshmemServerArgs *args, int argc, char *argv[])
+{
+ int c;
+ unsigned long long v;
+ Error *err = NULL;
+
+ while ((c = getopt(argc, argv, "hvFp:S:m:M:l:n:")) != -1) {
+
+ switch (c) {
+ case 'h': /* help */
+ ivshmem_server_usage(argv[0]);
+ exit(0);
+ break;
+
+ case 'v': /* verbose */
+ args->verbose = 1;
+ break;
+
+ case 'F': /* foreground */
+ args->foreground = 1;
+ break;
+
+ case 'p': /* pid file */
+ args->pid_file = optarg;
+ break;
+
+ case 'S': /* unix socket path */
+ args->unix_socket_path = optarg;
+ break;
+
+ case 'M': /* shm name */
+ case 'm': /* dir name */
+ args->shm_path = optarg;
+ args->use_shm_open = c == 'M';
+ break;
+
+ case 'l': /* shm size */
+ if (!parse_option_size("shm_size", optarg, &args->shm_size,
+ &err)) {
+ error_report_err(err);
+ ivshmem_server_help(argv[0]);
+ exit(1);
+ }
+ break;
+
+ case 'n': /* number of vectors */
+ if (parse_uint_full(optarg, &v, 0) < 0) {
+ fprintf(stderr, "cannot parse n_vectors\n");
+ ivshmem_server_help(argv[0]);
+ exit(1);
+ }
+ args->n_vectors = v;
+ break;
+
+ default:
+ ivshmem_server_usage(argv[0]);
+ exit(1);
+ break;
+ }
+ }
+
+ if (args->n_vectors > IVSHMEM_SERVER_MAX_VECTORS) {
+ fprintf(stderr, "too many requested vectors (max is %d)\n",
+ IVSHMEM_SERVER_MAX_VECTORS);
+ ivshmem_server_help(argv[0]);
+ exit(1);
+ }
+
+ if (args->verbose == 1 && args->foreground == 0) {
+ fprintf(stderr, "cannot use verbose in daemon mode\n");
+ ivshmem_server_help(argv[0]);
+ exit(1);
+ }
+}
+
+/* wait for events on listening server unix socket and connected client
+ * sockets */
+static int
+ivshmem_server_poll_events(IvshmemServer *server)
+{
+ fd_set fds;
+ int ret = 0, maxfd;
+
+ while (!ivshmem_server_quit) {
+
+ FD_ZERO(&fds);
+ maxfd = 0;
+ ivshmem_server_get_fds(server, &fds, &maxfd);
+
+ ret = select(maxfd, &fds, NULL, NULL, NULL);
+
+ if (ret < 0) {
+ if (errno == EINTR) {
+ continue;
+ }
+
+ fprintf(stderr, "select error: %s\n", strerror(errno));
+ break;
+ }
+ if (ret == 0) {
+ continue;
+ }
+
+ if (ivshmem_server_handle_fds(server, &fds, maxfd) < 0) {
+ fprintf(stderr, "ivshmem_server_handle_fds() failed\n");
+ break;
+ }
+ }
+
+ return ret;
+}
+
+static void
+ivshmem_server_quit_cb(int signum)
+{
+ ivshmem_server_quit = 1;
+}
+
+int
+main(int argc, char *argv[])
+{
+ IvshmemServer server;
+ struct sigaction sa, sa_quit;
+ IvshmemServerArgs args = {
+ .verbose = IVSHMEM_SERVER_DEFAULT_VERBOSE,
+ .foreground = IVSHMEM_SERVER_DEFAULT_FOREGROUND,
+ .pid_file = IVSHMEM_SERVER_DEFAULT_PID_FILE,
+ .unix_socket_path = IVSHMEM_SERVER_DEFAULT_UNIX_SOCK_PATH,
+ .shm_path = IVSHMEM_SERVER_DEFAULT_SHM_PATH,
+ .use_shm_open = true,
+ .shm_size = IVSHMEM_SERVER_DEFAULT_SHM_SIZE,
+ .n_vectors = IVSHMEM_SERVER_DEFAULT_N_VECTORS,
+ };
+ int ret = 1;
+
+ /*
+ * Do not remove this notice without adding proper error handling!
+ * Start with handling ivshmem_server_send_one_msg() failure.
+ */
+ printf("*** Example code, do not use in production ***\n");
+
+ /* parse arguments, will exit on error */
+ ivshmem_server_parse_args(&args, argc, argv);
+
+ /* Ignore SIGPIPE, see this link for more info:
+ * http://www.mail-archive.com/libevent-users@monkey.org/msg01606.html */
+ sa.sa_handler = SIG_IGN;
+ sa.sa_flags = 0;
+ if (sigemptyset(&sa.sa_mask) == -1 ||
+ sigaction(SIGPIPE, &sa, 0) == -1) {
+ perror("failed to ignore SIGPIPE; sigaction");
+ goto err;
+ }
+
+ sa_quit.sa_handler = ivshmem_server_quit_cb;
+ sa_quit.sa_flags = 0;
+ if (sigemptyset(&sa_quit.sa_mask) == -1 ||
+ sigaction(SIGTERM, &sa_quit, 0) == -1 ||
+ sigaction(SIGINT, &sa_quit, 0) == -1) {
+ perror("failed to add signal handler; sigaction");
+ goto err;
+ }
+
+ /* init the ivshms structure */
+ if (ivshmem_server_init(&server, args.unix_socket_path,
+ args.shm_path, args.use_shm_open,
+ args.shm_size, args.n_vectors, args.verbose) < 0) {
+ fprintf(stderr, "cannot init server\n");
+ goto err;
+ }
+
+ /* start the ivshmem server (open shm & unix socket) */
+ if (ivshmem_server_start(&server) < 0) {
+ fprintf(stderr, "cannot bind\n");
+ goto err;
+ }
+
+ /* daemonize if asked to */
+ if (!args.foreground) {
+ FILE *fp;
+
+ if (qemu_daemon(1, 1) < 0) {
+ fprintf(stderr, "cannot daemonize: %s\n", strerror(errno));
+ goto err_close;
+ }
+
+ /* write pid file */
+ fp = fopen(args.pid_file, "w");
+ if (fp == NULL) {
+ fprintf(stderr, "cannot write pid file: %s\n", strerror(errno));
+ goto err_close;
+ }
+
+ fprintf(fp, "%d\n", (int) getpid());
+ fclose(fp);
+ }
+
+ ivshmem_server_poll_events(&server);
+ fprintf(stdout, "server disconnected\n");
+ ret = 0;
+
+err_close:
+ ivshmem_server_close(&server);
+err:
+ return ret;
+}
diff --git a/contrib/ivshmem-server/meson.build b/contrib/ivshmem-server/meson.build
new file mode 100644
index 000000000..3a5394220
--- /dev/null
+++ b/contrib/ivshmem-server/meson.build
@@ -0,0 +1,4 @@
+executable('ivshmem-server', files('ivshmem-server.c', 'main.c'),
+ dependencies: [qemuutil, rt],
+ build_by_default: targetos == 'linux',
+ install: false)
diff --git a/contrib/plugins/Makefile b/contrib/plugins/Makefile
new file mode 100644
index 000000000..54ac5ccd9
--- /dev/null
+++ b/contrib/plugins/Makefile
@@ -0,0 +1,45 @@
+# -*- Mode: makefile -*-
+#
+# This Makefile example is fairly independent from the main makefile
+# so users can take and adapt it for their build. We only really
+# include config-host.mak so we don't have to repeat probing for
+# cflags that the main configure has already done for us.
+#
+
+BUILD_DIR := $(CURDIR)/../..
+
+include $(BUILD_DIR)/config-host.mak
+
+VPATH += $(SRC_PATH)/contrib/plugins
+
+NAMES :=
+NAMES += execlog
+NAMES += hotblocks
+NAMES += hotpages
+NAMES += howvec
+NAMES += lockstep
+NAMES += hwprofile
+NAMES += cache
+
+SONAMES := $(addsuffix .so,$(addprefix lib,$(NAMES)))
+
+# The main QEMU uses Glib extensively so it's perfectly fine to use it
+# in plugins (which many example do).
+CFLAGS = $(GLIB_CFLAGS)
+CFLAGS += -fPIC -Wall $(filter -W%, $(QEMU_CFLAGS))
+CFLAGS += $(if $(findstring no-psabi,$(QEMU_CFLAGS)),-Wpsabi)
+CFLAGS += -I$(SRC_PATH)/include/qemu
+
+all: $(SONAMES)
+
+%.o: %.c
+ $(CC) $(CFLAGS) -c -o $@ $<
+
+lib%.so: %.o
+ $(CC) -shared -Wl,-soname,$@ -o $@ $^ $(LDLIBS)
+
+clean:
+ rm -f *.o *.so *.d
+ rm -Rf .libs
+
+.PHONY: all clean
diff --git a/contrib/plugins/cache.c b/contrib/plugins/cache.c
new file mode 100644
index 000000000..b9226e7c4
--- /dev/null
+++ b/contrib/plugins/cache.c
@@ -0,0 +1,860 @@
+/*
+ * Copyright (C) 2021, Mahmoud Mandour <ma.mandourr@gmail.com>
+ *
+ * License: GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <glib.h>
+
+#include <qemu-plugin.h>
+
+#define STRTOLL(x) g_ascii_strtoll(x, NULL, 10)
+
+QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
+
+static enum qemu_plugin_mem_rw rw = QEMU_PLUGIN_MEM_RW;
+
+static GHashTable *miss_ht;
+
+static GMutex hashtable_lock;
+static GRand *rng;
+
+static int limit;
+static bool sys;
+
+enum EvictionPolicy {
+ LRU,
+ FIFO,
+ RAND,
+};
+
+enum EvictionPolicy policy;
+
+/*
+ * A CacheSet is a set of cache blocks. A memory block that maps to a set can be
+ * put in any of the blocks inside the set. The number of block per set is
+ * called the associativity (assoc).
+ *
+ * Each block contains the the stored tag and a valid bit. Since this is not
+ * a functional simulator, the data itself is not stored. We only identify
+ * whether a block is in the cache or not by searching for its tag.
+ *
+ * In order to search for memory data in the cache, the set identifier and tag
+ * are extracted from the address and the set is probed to see whether a tag
+ * match occur.
+ *
+ * An address is logically divided into three portions: The block offset,
+ * the set number, and the tag.
+ *
+ * The set number is used to identify the set in which the block may exist.
+ * The tag is compared against all the tags of a set to search for a match. If a
+ * match is found, then the access is a hit.
+ *
+ * The CacheSet also contains bookkeaping information about eviction details.
+ */
+
+typedef struct {
+ uint64_t tag;
+ bool valid;
+} CacheBlock;
+
+typedef struct {
+ CacheBlock *blocks;
+ uint64_t *lru_priorities;
+ uint64_t lru_gen_counter;
+ GQueue *fifo_queue;
+} CacheSet;
+
+typedef struct {
+ CacheSet *sets;
+ int num_sets;
+ int cachesize;
+ int assoc;
+ int blksize_shift;
+ uint64_t set_mask;
+ uint64_t tag_mask;
+ uint64_t accesses;
+ uint64_t misses;
+} Cache;
+
+typedef struct {
+ char *disas_str;
+ const char *symbol;
+ uint64_t addr;
+ uint64_t l1_dmisses;
+ uint64_t l1_imisses;
+ uint64_t l2_misses;
+} InsnData;
+
+void (*update_hit)(Cache *cache, int set, int blk);
+void (*update_miss)(Cache *cache, int set, int blk);
+
+void (*metadata_init)(Cache *cache);
+void (*metadata_destroy)(Cache *cache);
+
+static int cores;
+static Cache **l1_dcaches, **l1_icaches;
+
+static bool use_l2;
+static Cache **l2_ucaches;
+
+static GMutex *l1_dcache_locks;
+static GMutex *l1_icache_locks;
+static GMutex *l2_ucache_locks;
+
+static uint64_t l1_dmem_accesses;
+static uint64_t l1_imem_accesses;
+static uint64_t l1_imisses;
+static uint64_t l1_dmisses;
+
+static uint64_t l2_mem_accesses;
+static uint64_t l2_misses;
+
+static int pow_of_two(int num)
+{
+ g_assert((num & (num - 1)) == 0);
+ int ret = 0;
+ while (num /= 2) {
+ ret++;
+ }
+ return ret;
+}
+
+/*
+ * LRU evection policy: For each set, a generation counter is maintained
+ * alongside a priority array.
+ *
+ * On each set access, the generation counter is incremented.
+ *
+ * On a cache hit: The hit-block is assigned the current generation counter,
+ * indicating that it is the most recently used block.
+ *
+ * On a cache miss: The block with the least priority is searched and replaced
+ * with the newly-cached block, of which the priority is set to the current
+ * generation number.
+ */
+
+static void lru_priorities_init(Cache *cache)
+{
+ int i;
+
+ for (i = 0; i < cache->num_sets; i++) {
+ cache->sets[i].lru_priorities = g_new0(uint64_t, cache->assoc);
+ cache->sets[i].lru_gen_counter = 0;
+ }
+}
+
+static void lru_update_blk(Cache *cache, int set_idx, int blk_idx)
+{
+ CacheSet *set = &cache->sets[set_idx];
+ set->lru_priorities[blk_idx] = cache->sets[set_idx].lru_gen_counter;
+ set->lru_gen_counter++;
+}
+
+static int lru_get_lru_block(Cache *cache, int set_idx)
+{
+ int i, min_idx, min_priority;
+
+ min_priority = cache->sets[set_idx].lru_priorities[0];
+ min_idx = 0;
+
+ for (i = 1; i < cache->assoc; i++) {
+ if (cache->sets[set_idx].lru_priorities[i] < min_priority) {
+ min_priority = cache->sets[set_idx].lru_priorities[i];
+ min_idx = i;
+ }
+ }
+ return min_idx;
+}
+
+static void lru_priorities_destroy(Cache *cache)
+{
+ int i;
+
+ for (i = 0; i < cache->num_sets; i++) {
+ g_free(cache->sets[i].lru_priorities);
+ }
+}
+
+/*
+ * FIFO eviction policy: a FIFO queue is maintained for each CacheSet that
+ * stores accesses to the cache.
+ *
+ * On a compulsory miss: The block index is enqueued to the fifo_queue to
+ * indicate that it's the latest cached block.
+ *
+ * On a conflict miss: The first-in block is removed from the cache and the new
+ * block is put in its place and enqueued to the FIFO queue.
+ */
+
+static void fifo_init(Cache *cache)
+{
+ int i;
+
+ for (i = 0; i < cache->num_sets; i++) {
+ cache->sets[i].fifo_queue = g_queue_new();
+ }
+}
+
+static int fifo_get_first_block(Cache *cache, int set)
+{
+ GQueue *q = cache->sets[set].fifo_queue;
+ return GPOINTER_TO_INT(g_queue_pop_tail(q));
+}
+
+static void fifo_update_on_miss(Cache *cache, int set, int blk_idx)
+{
+ GQueue *q = cache->sets[set].fifo_queue;
+ g_queue_push_head(q, GINT_TO_POINTER(blk_idx));
+}
+
+static void fifo_destroy(Cache *cache)
+{
+ int i;
+
+ for (i = 0; i < cache->num_sets; i++) {
+ g_queue_free(cache->sets[i].fifo_queue);
+ }
+}
+
+static inline uint64_t extract_tag(Cache *cache, uint64_t addr)
+{
+ return addr & cache->tag_mask;
+}
+
+static inline uint64_t extract_set(Cache *cache, uint64_t addr)
+{
+ return (addr & cache->set_mask) >> cache->blksize_shift;
+}
+
+static const char *cache_config_error(int blksize, int assoc, int cachesize)
+{
+ if (cachesize % blksize != 0) {
+ return "cache size must be divisible by block size";
+ } else if (cachesize % (blksize * assoc) != 0) {
+ return "cache size must be divisible by set size (assoc * block size)";
+ } else {
+ return NULL;
+ }
+}
+
+static bool bad_cache_params(int blksize, int assoc, int cachesize)
+{
+ return (cachesize % blksize) != 0 || (cachesize % (blksize * assoc) != 0);
+}
+
+static Cache *cache_init(int blksize, int assoc, int cachesize)
+{
+ Cache *cache;
+ int i;
+ uint64_t blk_mask;
+
+ /*
+ * This function shall not be called directly, and hence expects suitable
+ * parameters.
+ */
+ g_assert(!bad_cache_params(blksize, assoc, cachesize));
+
+ cache = g_new(Cache, 1);
+ cache->assoc = assoc;
+ cache->cachesize = cachesize;
+ cache->num_sets = cachesize / (blksize * assoc);
+ cache->sets = g_new(CacheSet, cache->num_sets);
+ cache->blksize_shift = pow_of_two(blksize);
+ cache->accesses = 0;
+ cache->misses = 0;
+
+ for (i = 0; i < cache->num_sets; i++) {
+ cache->sets[i].blocks = g_new0(CacheBlock, assoc);
+ }
+
+ blk_mask = blksize - 1;
+ cache->set_mask = ((cache->num_sets - 1) << cache->blksize_shift);
+ cache->tag_mask = ~(cache->set_mask | blk_mask);
+
+ if (metadata_init) {
+ metadata_init(cache);
+ }
+
+ return cache;
+}
+
+static Cache **caches_init(int blksize, int assoc, int cachesize)
+{
+ Cache **caches;
+ int i;
+
+ if (bad_cache_params(blksize, assoc, cachesize)) {
+ return NULL;
+ }
+
+ caches = g_new(Cache *, cores);
+
+ for (i = 0; i < cores; i++) {
+ caches[i] = cache_init(blksize, assoc, cachesize);
+ }
+
+ return caches;
+}
+
+static int get_invalid_block(Cache *cache, uint64_t set)
+{
+ int i;
+
+ for (i = 0; i < cache->assoc; i++) {
+ if (!cache->sets[set].blocks[i].valid) {
+ return i;
+ }
+ }
+
+ return -1;
+}
+
+static int get_replaced_block(Cache *cache, int set)
+{
+ switch (policy) {
+ case RAND:
+ return g_rand_int_range(rng, 0, cache->assoc);
+ case LRU:
+ return lru_get_lru_block(cache, set);
+ case FIFO:
+ return fifo_get_first_block(cache, set);
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static int in_cache(Cache *cache, uint64_t addr)
+{
+ int i;
+ uint64_t tag, set;
+
+ tag = extract_tag(cache, addr);
+ set = extract_set(cache, addr);
+
+ for (i = 0; i < cache->assoc; i++) {
+ if (cache->sets[set].blocks[i].tag == tag &&
+ cache->sets[set].blocks[i].valid) {
+ return i;
+ }
+ }
+
+ return -1;
+}
+
+/**
+ * access_cache(): Simulate a cache access
+ * @cache: The cache under simulation
+ * @addr: The address of the requested memory location
+ *
+ * Returns true if the requsted data is hit in the cache and false when missed.
+ * The cache is updated on miss for the next access.
+ */
+static bool access_cache(Cache *cache, uint64_t addr)
+{
+ int hit_blk, replaced_blk;
+ uint64_t tag, set;
+
+ tag = extract_tag(cache, addr);
+ set = extract_set(cache, addr);
+
+ hit_blk = in_cache(cache, addr);
+ if (hit_blk != -1) {
+ if (update_hit) {
+ update_hit(cache, set, hit_blk);
+ }
+ return true;
+ }
+
+ replaced_blk = get_invalid_block(cache, set);
+
+ if (replaced_blk == -1) {
+ replaced_blk = get_replaced_block(cache, set);
+ }
+
+ if (update_miss) {
+ update_miss(cache, set, replaced_blk);
+ }
+
+ cache->sets[set].blocks[replaced_blk].tag = tag;
+ cache->sets[set].blocks[replaced_blk].valid = true;
+
+ return false;
+}
+
+static void vcpu_mem_access(unsigned int vcpu_index, qemu_plugin_meminfo_t info,
+ uint64_t vaddr, void *userdata)
+{
+ uint64_t effective_addr;
+ struct qemu_plugin_hwaddr *hwaddr;
+ int cache_idx;
+ InsnData *insn;
+ bool hit_in_l1;
+
+ hwaddr = qemu_plugin_get_hwaddr(info, vaddr);
+ if (hwaddr && qemu_plugin_hwaddr_is_io(hwaddr)) {
+ return;
+ }
+
+ effective_addr = hwaddr ? qemu_plugin_hwaddr_phys_addr(hwaddr) : vaddr;
+ cache_idx = vcpu_index % cores;
+
+ g_mutex_lock(&l1_dcache_locks[cache_idx]);
+ hit_in_l1 = access_cache(l1_dcaches[cache_idx], effective_addr);
+ if (!hit_in_l1) {
+ insn = (InsnData *) userdata;
+ __atomic_fetch_add(&insn->l1_dmisses, 1, __ATOMIC_SEQ_CST);
+ l1_dcaches[cache_idx]->misses++;
+ }
+ l1_dcaches[cache_idx]->accesses++;
+ g_mutex_unlock(&l1_dcache_locks[cache_idx]);
+
+ if (hit_in_l1 || !use_l2) {
+ /* No need to access L2 */
+ return;
+ }
+
+ g_mutex_lock(&l2_ucache_locks[cache_idx]);
+ if (!access_cache(l2_ucaches[cache_idx], effective_addr)) {
+ insn = (InsnData *) userdata;
+ __atomic_fetch_add(&insn->l2_misses, 1, __ATOMIC_SEQ_CST);
+ l2_ucaches[cache_idx]->misses++;
+ }
+ l2_ucaches[cache_idx]->accesses++;
+ g_mutex_unlock(&l2_ucache_locks[cache_idx]);
+}
+
+static void vcpu_insn_exec(unsigned int vcpu_index, void *userdata)
+{
+ uint64_t insn_addr;
+ InsnData *insn;
+ int cache_idx;
+ bool hit_in_l1;
+
+ insn_addr = ((InsnData *) userdata)->addr;
+
+ cache_idx = vcpu_index % cores;
+ g_mutex_lock(&l1_icache_locks[cache_idx]);
+ hit_in_l1 = access_cache(l1_icaches[cache_idx], insn_addr);
+ if (!hit_in_l1) {
+ insn = (InsnData *) userdata;
+ __atomic_fetch_add(&insn->l1_imisses, 1, __ATOMIC_SEQ_CST);
+ l1_icaches[cache_idx]->misses++;
+ }
+ l1_icaches[cache_idx]->accesses++;
+ g_mutex_unlock(&l1_icache_locks[cache_idx]);
+
+ if (hit_in_l1 || !use_l2) {
+ /* No need to access L2 */
+ return;
+ }
+
+ g_mutex_lock(&l2_ucache_locks[cache_idx]);
+ if (!access_cache(l2_ucaches[cache_idx], insn_addr)) {
+ insn = (InsnData *) userdata;
+ __atomic_fetch_add(&insn->l2_misses, 1, __ATOMIC_SEQ_CST);
+ l2_ucaches[cache_idx]->misses++;
+ }
+ l2_ucaches[cache_idx]->accesses++;
+ g_mutex_unlock(&l2_ucache_locks[cache_idx]);
+}
+
+static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
+{
+ size_t n_insns;
+ size_t i;
+ InsnData *data;
+
+ n_insns = qemu_plugin_tb_n_insns(tb);
+ for (i = 0; i < n_insns; i++) {
+ struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i);
+ uint64_t effective_addr;
+
+ if (sys) {
+ effective_addr = (uint64_t) qemu_plugin_insn_haddr(insn);
+ } else {
+ effective_addr = (uint64_t) qemu_plugin_insn_vaddr(insn);
+ }
+
+ /*
+ * Instructions might get translated multiple times, we do not create
+ * new entries for those instructions. Instead, we fetch the same
+ * entry from the hash table and register it for the callback again.
+ */
+ g_mutex_lock(&hashtable_lock);
+ data = g_hash_table_lookup(miss_ht, GUINT_TO_POINTER(effective_addr));
+ if (data == NULL) {
+ data = g_new0(InsnData, 1);
+ data->disas_str = qemu_plugin_insn_disas(insn);
+ data->symbol = qemu_plugin_insn_symbol(insn);
+ data->addr = effective_addr;
+ g_hash_table_insert(miss_ht, GUINT_TO_POINTER(effective_addr),
+ (gpointer) data);
+ }
+ g_mutex_unlock(&hashtable_lock);
+
+ qemu_plugin_register_vcpu_mem_cb(insn, vcpu_mem_access,
+ QEMU_PLUGIN_CB_NO_REGS,
+ rw, data);
+
+ qemu_plugin_register_vcpu_insn_exec_cb(insn, vcpu_insn_exec,
+ QEMU_PLUGIN_CB_NO_REGS, data);
+ }
+}
+
+static void insn_free(gpointer data)
+{
+ InsnData *insn = (InsnData *) data;
+ g_free(insn->disas_str);
+ g_free(insn);
+}
+
+static void cache_free(Cache *cache)
+{
+ for (int i = 0; i < cache->num_sets; i++) {
+ g_free(cache->sets[i].blocks);
+ }
+
+ if (metadata_destroy) {
+ metadata_destroy(cache);
+ }
+
+ g_free(cache->sets);
+ g_free(cache);
+}
+
+static void caches_free(Cache **caches)
+{
+ int i;
+
+ for (i = 0; i < cores; i++) {
+ cache_free(caches[i]);
+ }
+}
+
+static void append_stats_line(GString *line, uint64_t l1_daccess,
+ uint64_t l1_dmisses, uint64_t l1_iaccess,
+ uint64_t l1_imisses, uint64_t l2_access,
+ uint64_t l2_misses)
+{
+ double l1_dmiss_rate, l1_imiss_rate, l2_miss_rate;
+
+ l1_dmiss_rate = ((double) l1_dmisses) / (l1_daccess) * 100.0;
+ l1_imiss_rate = ((double) l1_imisses) / (l1_iaccess) * 100.0;
+
+ g_string_append_printf(line, "%-14lu %-12lu %9.4lf%% %-14lu %-12lu"
+ " %9.4lf%%",
+ l1_daccess,
+ l1_dmisses,
+ l1_daccess ? l1_dmiss_rate : 0.0,
+ l1_iaccess,
+ l1_imisses,
+ l1_iaccess ? l1_imiss_rate : 0.0);
+
+ if (use_l2) {
+ l2_miss_rate = ((double) l2_misses) / (l2_access) * 100.0;
+ g_string_append_printf(line, " %-12lu %-11lu %10.4lf%%",
+ l2_access,
+ l2_misses,
+ l2_access ? l2_miss_rate : 0.0);
+ }
+
+ g_string_append(line, "\n");
+}
+
+static void sum_stats(void)
+{
+ int i;
+
+ g_assert(cores > 1);
+ for (i = 0; i < cores; i++) {
+ l1_imisses += l1_icaches[i]->misses;
+ l1_dmisses += l1_dcaches[i]->misses;
+ l1_imem_accesses += l1_icaches[i]->accesses;
+ l1_dmem_accesses += l1_dcaches[i]->accesses;
+
+ if (use_l2) {
+ l2_misses += l2_ucaches[i]->misses;
+ l2_mem_accesses += l2_ucaches[i]->accesses;
+ }
+ }
+}
+
+static int dcmp(gconstpointer a, gconstpointer b)
+{
+ InsnData *insn_a = (InsnData *) a;
+ InsnData *insn_b = (InsnData *) b;
+
+ return insn_a->l1_dmisses < insn_b->l1_dmisses ? 1 : -1;
+}
+
+static int icmp(gconstpointer a, gconstpointer b)
+{
+ InsnData *insn_a = (InsnData *) a;
+ InsnData *insn_b = (InsnData *) b;
+
+ return insn_a->l1_imisses < insn_b->l1_imisses ? 1 : -1;
+}
+
+static int l2_cmp(gconstpointer a, gconstpointer b)
+{
+ InsnData *insn_a = (InsnData *) a;
+ InsnData *insn_b = (InsnData *) b;
+
+ return insn_a->l2_misses < insn_b->l2_misses ? 1 : -1;
+}
+
+static void log_stats(void)
+{
+ int i;
+ Cache *icache, *dcache, *l2_cache;
+
+ g_autoptr(GString) rep = g_string_new("core #, data accesses, data misses,"
+ " dmiss rate, insn accesses,"
+ " insn misses, imiss rate");
+
+ if (use_l2) {
+ g_string_append(rep, ", l2 accesses, l2 misses, l2 miss rate");
+ }
+
+ g_string_append(rep, "\n");
+
+ for (i = 0; i < cores; i++) {
+ g_string_append_printf(rep, "%-8d", i);
+ dcache = l1_dcaches[i];
+ icache = l1_icaches[i];
+ l2_cache = use_l2 ? l2_ucaches[i] : NULL;
+ append_stats_line(rep, dcache->accesses, dcache->misses,
+ icache->accesses, icache->misses,
+ l2_cache ? l2_cache->accesses : 0,
+ l2_cache ? l2_cache->misses : 0);
+ }
+
+ if (cores > 1) {
+ sum_stats();
+ g_string_append_printf(rep, "%-8s", "sum");
+ append_stats_line(rep, l1_dmem_accesses, l1_dmisses,
+ l1_imem_accesses, l1_imisses,
+ l2_cache ? l2_mem_accesses : 0, l2_cache ? l2_misses : 0);
+ }
+
+ g_string_append(rep, "\n");
+ qemu_plugin_outs(rep->str);
+}
+
+static void log_top_insns(void)
+{
+ int i;
+ GList *curr, *miss_insns;
+ InsnData *insn;
+
+ miss_insns = g_hash_table_get_values(miss_ht);
+ miss_insns = g_list_sort(miss_insns, dcmp);
+ g_autoptr(GString) rep = g_string_new("");
+ g_string_append_printf(rep, "%s", "address, data misses, instruction\n");
+
+ for (curr = miss_insns, i = 0; curr && i < limit; i++, curr = curr->next) {
+ insn = (InsnData *) curr->data;
+ g_string_append_printf(rep, "0x%" PRIx64, insn->addr);
+ if (insn->symbol) {
+ g_string_append_printf(rep, " (%s)", insn->symbol);
+ }
+ g_string_append_printf(rep, ", %ld, %s\n", insn->l1_dmisses,
+ insn->disas_str);
+ }
+
+ miss_insns = g_list_sort(miss_insns, icmp);
+ g_string_append_printf(rep, "%s", "\naddress, fetch misses, instruction\n");
+
+ for (curr = miss_insns, i = 0; curr && i < limit; i++, curr = curr->next) {
+ insn = (InsnData *) curr->data;
+ g_string_append_printf(rep, "0x%" PRIx64, insn->addr);
+ if (insn->symbol) {
+ g_string_append_printf(rep, " (%s)", insn->symbol);
+ }
+ g_string_append_printf(rep, ", %ld, %s\n", insn->l1_imisses,
+ insn->disas_str);
+ }
+
+ if (!use_l2) {
+ goto finish;
+ }
+
+ miss_insns = g_list_sort(miss_insns, l2_cmp);
+ g_string_append_printf(rep, "%s", "\naddress, L2 misses, instruction\n");
+
+ for (curr = miss_insns, i = 0; curr && i < limit; i++, curr = curr->next) {
+ insn = (InsnData *) curr->data;
+ g_string_append_printf(rep, "0x%" PRIx64, insn->addr);
+ if (insn->symbol) {
+ g_string_append_printf(rep, " (%s)", insn->symbol);
+ }
+ g_string_append_printf(rep, ", %ld, %s\n", insn->l2_misses,
+ insn->disas_str);
+ }
+
+finish:
+ qemu_plugin_outs(rep->str);
+ g_list_free(miss_insns);
+}
+
+static void plugin_exit(qemu_plugin_id_t id, void *p)
+{
+ log_stats();
+ log_top_insns();
+
+ caches_free(l1_dcaches);
+ caches_free(l1_icaches);
+
+ g_free(l1_dcache_locks);
+ g_free(l1_icache_locks);
+
+ if (use_l2) {
+ caches_free(l2_ucaches);
+ g_free(l2_ucache_locks);
+ }
+
+ g_hash_table_destroy(miss_ht);
+}
+
+static void policy_init(void)
+{
+ switch (policy) {
+ case LRU:
+ update_hit = lru_update_blk;
+ update_miss = lru_update_blk;
+ metadata_init = lru_priorities_init;
+ metadata_destroy = lru_priorities_destroy;
+ break;
+ case FIFO:
+ update_miss = fifo_update_on_miss;
+ metadata_init = fifo_init;
+ metadata_destroy = fifo_destroy;
+ break;
+ case RAND:
+ rng = g_rand_new();
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+QEMU_PLUGIN_EXPORT
+int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,
+ int argc, char **argv)
+{
+ int i;
+ int l1_iassoc, l1_iblksize, l1_icachesize;
+ int l1_dassoc, l1_dblksize, l1_dcachesize;
+ int l2_assoc, l2_blksize, l2_cachesize;
+
+ limit = 32;
+ sys = info->system_emulation;
+
+ l1_dassoc = 8;
+ l1_dblksize = 64;
+ l1_dcachesize = l1_dblksize * l1_dassoc * 32;
+
+ l1_iassoc = 8;
+ l1_iblksize = 64;
+ l1_icachesize = l1_iblksize * l1_iassoc * 32;
+
+ l2_assoc = 16;
+ l2_blksize = 64;
+ l2_cachesize = l2_assoc * l2_blksize * 2048;
+
+ policy = LRU;
+
+ cores = sys ? qemu_plugin_n_vcpus() : 1;
+
+ for (i = 0; i < argc; i++) {
+ char *opt = argv[i];
+ g_autofree char **tokens = g_strsplit(opt, "=", 2);
+
+ if (g_strcmp0(tokens[0], "iblksize") == 0) {
+ l1_iblksize = STRTOLL(tokens[1]);
+ } else if (g_strcmp0(tokens[0], "iassoc") == 0) {
+ l1_iassoc = STRTOLL(tokens[1]);
+ } else if (g_strcmp0(tokens[0], "icachesize") == 0) {
+ l1_icachesize = STRTOLL(tokens[1]);
+ } else if (g_strcmp0(tokens[0], "dblksize") == 0) {
+ l1_dblksize = STRTOLL(tokens[1]);
+ } else if (g_strcmp0(tokens[0], "dassoc") == 0) {
+ l1_dassoc = STRTOLL(tokens[1]);
+ } else if (g_strcmp0(tokens[0], "dcachesize") == 0) {
+ l1_dcachesize = STRTOLL(tokens[1]);
+ } else if (g_strcmp0(tokens[0], "limit") == 0) {
+ limit = STRTOLL(tokens[1]);
+ } else if (g_strcmp0(tokens[0], "cores") == 0) {
+ cores = STRTOLL(tokens[1]);
+ } else if (g_strcmp0(tokens[0], "l2cachesize") == 0) {
+ use_l2 = true;
+ l2_cachesize = STRTOLL(tokens[1]);
+ } else if (g_strcmp0(tokens[0], "l2blksize") == 0) {
+ use_l2 = true;
+ l2_blksize = STRTOLL(tokens[1]);
+ } else if (g_strcmp0(tokens[0], "l2assoc") == 0) {
+ use_l2 = true;
+ l2_assoc = STRTOLL(tokens[1]);
+ } else if (g_strcmp0(tokens[0], "l2") == 0) {
+ if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &use_l2)) {
+ fprintf(stderr, "boolean argument parsing failed: %s\n", opt);
+ return -1;
+ }
+ } else if (g_strcmp0(tokens[0], "evict") == 0) {
+ if (g_strcmp0(tokens[1], "rand") == 0) {
+ policy = RAND;
+ } else if (g_strcmp0(tokens[1], "lru") == 0) {
+ policy = LRU;
+ } else if (g_strcmp0(tokens[1], "fifo") == 0) {
+ policy = FIFO;
+ } else {
+ fprintf(stderr, "invalid eviction policy: %s\n", opt);
+ return -1;
+ }
+ } else {
+ fprintf(stderr, "option parsing failed: %s\n", opt);
+ return -1;
+ }
+ }
+
+ policy_init();
+
+ l1_dcaches = caches_init(l1_dblksize, l1_dassoc, l1_dcachesize);
+ if (!l1_dcaches) {
+ const char *err = cache_config_error(l1_dblksize, l1_dassoc, l1_dcachesize);
+ fprintf(stderr, "dcache cannot be constructed from given parameters\n");
+ fprintf(stderr, "%s\n", err);
+ return -1;
+ }
+
+ l1_icaches = caches_init(l1_iblksize, l1_iassoc, l1_icachesize);
+ if (!l1_icaches) {
+ const char *err = cache_config_error(l1_iblksize, l1_iassoc, l1_icachesize);
+ fprintf(stderr, "icache cannot be constructed from given parameters\n");
+ fprintf(stderr, "%s\n", err);
+ return -1;
+ }
+
+ l2_ucaches = use_l2 ? caches_init(l2_blksize, l2_assoc, l2_cachesize) : NULL;
+ if (!l2_ucaches && use_l2) {
+ const char *err = cache_config_error(l2_blksize, l2_assoc, l2_cachesize);
+ fprintf(stderr, "L2 cache cannot be constructed from given parameters\n");
+ fprintf(stderr, "%s\n", err);
+ return -1;
+ }
+
+ l1_dcache_locks = g_new0(GMutex, cores);
+ l1_icache_locks = g_new0(GMutex, cores);
+ l2_ucache_locks = use_l2 ? g_new0(GMutex, cores) : NULL;
+
+ qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
+ qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
+
+ miss_ht = g_hash_table_new_full(NULL, g_direct_equal, NULL, insn_free);
+
+ return 0;
+}
diff --git a/contrib/plugins/execlog.c b/contrib/plugins/execlog.c
new file mode 100644
index 000000000..a5275dcc1
--- /dev/null
+++ b/contrib/plugins/execlog.c
@@ -0,0 +1,153 @@
+/*
+ * Copyright (C) 2021, Alexandre Iooss <erdnaxe@crans.org>
+ *
+ * Log instruction execution with memory access.
+ *
+ * License: GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#include <glib.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <qemu-plugin.h>
+
+QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
+
+/* Store last executed instruction on each vCPU as a GString */
+GArray *last_exec;
+
+/**
+ * Add memory read or write information to current instruction log
+ */
+static void vcpu_mem(unsigned int cpu_index, qemu_plugin_meminfo_t info,
+ uint64_t vaddr, void *udata)
+{
+ GString *s;
+
+ /* Find vCPU in array */
+ g_assert(cpu_index < last_exec->len);
+ s = g_array_index(last_exec, GString *, cpu_index);
+
+ /* Indicate type of memory access */
+ if (qemu_plugin_mem_is_store(info)) {
+ g_string_append(s, ", store");
+ } else {
+ g_string_append(s, ", load");
+ }
+
+ /* If full system emulation log physical address and device name */
+ struct qemu_plugin_hwaddr *hwaddr = qemu_plugin_get_hwaddr(info, vaddr);
+ if (hwaddr) {
+ uint64_t addr = qemu_plugin_hwaddr_phys_addr(hwaddr);
+ const char *name = qemu_plugin_hwaddr_device_name(hwaddr);
+ g_string_append_printf(s, ", 0x%08"PRIx64", %s", addr, name);
+ } else {
+ g_string_append_printf(s, ", 0x%08"PRIx64, vaddr);
+ }
+}
+
+/**
+ * Log instruction execution
+ */
+static void vcpu_insn_exec(unsigned int cpu_index, void *udata)
+{
+ GString *s;
+
+ /* Find or create vCPU in array */
+ while (cpu_index >= last_exec->len) {
+ s = g_string_new(NULL);
+ g_array_append_val(last_exec, s);
+ }
+ s = g_array_index(last_exec, GString *, cpu_index);
+
+ /* Print previous instruction in cache */
+ if (s->len) {
+ qemu_plugin_outs(s->str);
+ qemu_plugin_outs("\n");
+ }
+
+ /* Store new instruction in cache */
+ /* vcpu_mem will add memory access information to last_exec */
+ g_string_printf(s, "%u, ", cpu_index);
+ g_string_append(s, (char *)udata);
+}
+
+/**
+ * On translation block new translation
+ *
+ * QEMU convert code by translation block (TB). By hooking here we can then hook
+ * a callback on each instruction and memory access.
+ */
+static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
+{
+ struct qemu_plugin_insn *insn;
+ uint64_t insn_vaddr;
+ uint32_t insn_opcode;
+ char *insn_disas;
+
+ size_t n = qemu_plugin_tb_n_insns(tb);
+ for (size_t i = 0; i < n; i++) {
+ /*
+ * `insn` is shared between translations in QEMU, copy needed data here.
+ * `output` is never freed as it might be used multiple times during
+ * the emulation lifetime.
+ * We only consider the first 32 bits of the instruction, this may be
+ * a limitation for CISC architectures.
+ */
+ insn = qemu_plugin_tb_get_insn(tb, i);
+ insn_vaddr = qemu_plugin_insn_vaddr(insn);
+ insn_opcode = *((uint32_t *)qemu_plugin_insn_data(insn));
+ insn_disas = qemu_plugin_insn_disas(insn);
+ char *output = g_strdup_printf("0x%"PRIx64", 0x%"PRIx32", \"%s\"",
+ insn_vaddr, insn_opcode, insn_disas);
+
+ /* Register callback on memory read or write */
+ qemu_plugin_register_vcpu_mem_cb(insn, vcpu_mem,
+ QEMU_PLUGIN_CB_NO_REGS,
+ QEMU_PLUGIN_MEM_RW, NULL);
+
+ /* Register callback on instruction */
+ qemu_plugin_register_vcpu_insn_exec_cb(insn, vcpu_insn_exec,
+ QEMU_PLUGIN_CB_NO_REGS, output);
+ }
+}
+
+/**
+ * On plugin exit, print last instruction in cache
+ */
+static void plugin_exit(qemu_plugin_id_t id, void *p)
+{
+ guint i;
+ GString *s;
+ for (i = 0; i < last_exec->len; i++) {
+ s = g_array_index(last_exec, GString *, i);
+ if (s->str) {
+ qemu_plugin_outs(s->str);
+ qemu_plugin_outs("\n");
+ }
+ }
+}
+
+/**
+ * Install the plugin
+ */
+QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
+ const qemu_info_t *info, int argc,
+ char **argv)
+{
+ /*
+ * Initialize dynamic array to cache vCPU instruction. In user mode
+ * we don't know the size before emulation.
+ */
+ last_exec = g_array_new(FALSE, FALSE, sizeof(GString *));
+
+ /* Register translation block and exit callbacks */
+ qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
+ qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
+
+ return 0;
+}
diff --git a/contrib/plugins/hotblocks.c b/contrib/plugins/hotblocks.c
new file mode 100644
index 000000000..062200a7a
--- /dev/null
+++ b/contrib/plugins/hotblocks.c
@@ -0,0 +1,155 @@
+/*
+ * Copyright (C) 2019, Alex Bennée <alex.bennee@linaro.org>
+ *
+ * License: GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#include <inttypes.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <glib.h>
+
+#include <qemu-plugin.h>
+
+QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
+
+static bool do_inline;
+
+/* Plugins need to take care of their own locking */
+static GMutex lock;
+static GHashTable *hotblocks;
+static guint64 limit = 20;
+
+/*
+ * Counting Structure
+ *
+ * The internals of the TCG are not exposed to plugins so we can only
+ * get the starting PC for each block. We cheat this slightly by
+ * xor'ing the number of instructions to the hash to help
+ * differentiate.
+ */
+typedef struct {
+ uint64_t start_addr;
+ uint64_t exec_count;
+ int trans_count;
+ unsigned long insns;
+} ExecCount;
+
+static gint cmp_exec_count(gconstpointer a, gconstpointer b)
+{
+ ExecCount *ea = (ExecCount *) a;
+ ExecCount *eb = (ExecCount *) b;
+ return ea->exec_count > eb->exec_count ? -1 : 1;
+}
+
+static void plugin_exit(qemu_plugin_id_t id, void *p)
+{
+ g_autoptr(GString) report = g_string_new("collected ");
+ GList *counts, *it;
+ int i;
+
+ g_mutex_lock(&lock);
+ g_string_append_printf(report, "%d entries in the hash table\n",
+ g_hash_table_size(hotblocks));
+ counts = g_hash_table_get_values(hotblocks);
+ it = g_list_sort(counts, cmp_exec_count);
+
+ if (it) {
+ g_string_append_printf(report, "pc, tcount, icount, ecount\n");
+
+ for (i = 0; i < limit && it->next; i++, it = it->next) {
+ ExecCount *rec = (ExecCount *) it->data;
+ g_string_append_printf(report, "0x%016"PRIx64", %d, %ld, %"PRId64"\n",
+ rec->start_addr, rec->trans_count,
+ rec->insns, rec->exec_count);
+ }
+
+ g_list_free(it);
+ g_mutex_unlock(&lock);
+ }
+
+ qemu_plugin_outs(report->str);
+}
+
+static void plugin_init(void)
+{
+ hotblocks = g_hash_table_new(NULL, g_direct_equal);
+}
+
+static void vcpu_tb_exec(unsigned int cpu_index, void *udata)
+{
+ ExecCount *cnt;
+ uint64_t hash = (uint64_t) udata;
+
+ g_mutex_lock(&lock);
+ cnt = (ExecCount *) g_hash_table_lookup(hotblocks, (gconstpointer) hash);
+ /* should always succeed */
+ g_assert(cnt);
+ cnt->exec_count++;
+ g_mutex_unlock(&lock);
+}
+
+/*
+ * When do_inline we ask the plugin to increment the counter for us.
+ * Otherwise a helper is inserted which calls the vcpu_tb_exec
+ * callback.
+ */
+static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
+{
+ ExecCount *cnt;
+ uint64_t pc = qemu_plugin_tb_vaddr(tb);
+ size_t insns = qemu_plugin_tb_n_insns(tb);
+ uint64_t hash = pc ^ insns;
+
+ g_mutex_lock(&lock);
+ cnt = (ExecCount *) g_hash_table_lookup(hotblocks, (gconstpointer) hash);
+ if (cnt) {
+ cnt->trans_count++;
+ } else {
+ cnt = g_new0(ExecCount, 1);
+ cnt->start_addr = pc;
+ cnt->trans_count = 1;
+ cnt->insns = insns;
+ g_hash_table_insert(hotblocks, (gpointer) hash, (gpointer) cnt);
+ }
+
+ g_mutex_unlock(&lock);
+
+ if (do_inline) {
+ qemu_plugin_register_vcpu_tb_exec_inline(tb, QEMU_PLUGIN_INLINE_ADD_U64,
+ &cnt->exec_count, 1);
+ } else {
+ qemu_plugin_register_vcpu_tb_exec_cb(tb, vcpu_tb_exec,
+ QEMU_PLUGIN_CB_NO_REGS,
+ (void *)hash);
+ }
+}
+
+QEMU_PLUGIN_EXPORT
+int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,
+ int argc, char **argv)
+{
+ for (int i = 0; i < argc; i++) {
+ char *opt = argv[i];
+ g_autofree char **tokens = g_strsplit(opt, "=", 2);
+ if (g_strcmp0(tokens[0], "inline") == 0) {
+ if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &do_inline)) {
+ fprintf(stderr, "boolean argument parsing failed: %s\n", opt);
+ return -1;
+ }
+ } else {
+ fprintf(stderr, "option parsing failed: %s\n", opt);
+ return -1;
+ }
+ }
+
+ plugin_init();
+
+ qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
+ qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
+ return 0;
+}
diff --git a/contrib/plugins/hotpages.c b/contrib/plugins/hotpages.c
new file mode 100644
index 000000000..0d12910af
--- /dev/null
+++ b/contrib/plugins/hotpages.c
@@ -0,0 +1,203 @@
+/*
+ * Copyright (C) 2019, Alex Bennée <alex.bennee@linaro.org>
+ *
+ * Hot Pages - show which pages saw the most memory accesses.
+ *
+ * License: GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include <inttypes.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <glib.h>
+
+#include <qemu-plugin.h>
+
+QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+static uint64_t page_size = 4096;
+static uint64_t page_mask;
+static int limit = 50;
+static enum qemu_plugin_mem_rw rw = QEMU_PLUGIN_MEM_RW;
+static bool track_io;
+
+enum sort_type {
+ SORT_RW = 0,
+ SORT_R,
+ SORT_W,
+ SORT_A
+};
+
+static int sort_by = SORT_RW;
+
+typedef struct {
+ uint64_t page_address;
+ int cpu_read;
+ int cpu_write;
+ uint64_t reads;
+ uint64_t writes;
+} PageCounters;
+
+static GMutex lock;
+static GHashTable *pages;
+
+static gint cmp_access_count(gconstpointer a, gconstpointer b)
+{
+ PageCounters *ea = (PageCounters *) a;
+ PageCounters *eb = (PageCounters *) b;
+ int r;
+ switch (sort_by) {
+ case SORT_RW:
+ r = (ea->reads + ea->writes) > (eb->reads + eb->writes) ? -1 : 1;
+ break;
+ case SORT_R:
+ r = ea->reads > eb->reads ? -1 : 1;
+ break;
+ case SORT_W:
+ r = ea->writes > eb->writes ? -1 : 1;
+ break;
+ case SORT_A:
+ r = ea->page_address > eb->page_address ? -1 : 1;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ return r;
+}
+
+
+static void plugin_exit(qemu_plugin_id_t id, void *p)
+{
+ g_autoptr(GString) report = g_string_new("Addr, RCPUs, Reads, WCPUs, Writes\n");
+ int i;
+ GList *counts;
+
+ counts = g_hash_table_get_values(pages);
+ if (counts && g_list_next(counts)) {
+ GList *it;
+
+ it = g_list_sort(counts, cmp_access_count);
+
+ for (i = 0; i < limit && it->next; i++, it = it->next) {
+ PageCounters *rec = (PageCounters *) it->data;
+ g_string_append_printf(report,
+ "0x%016"PRIx64", 0x%04x, %"PRId64
+ ", 0x%04x, %"PRId64"\n",
+ rec->page_address,
+ rec->cpu_read, rec->reads,
+ rec->cpu_write, rec->writes);
+ }
+ g_list_free(it);
+ }
+
+ qemu_plugin_outs(report->str);
+}
+
+static void plugin_init(void)
+{
+ page_mask = (page_size - 1);
+ pages = g_hash_table_new(NULL, g_direct_equal);
+}
+
+static void vcpu_haddr(unsigned int cpu_index, qemu_plugin_meminfo_t meminfo,
+ uint64_t vaddr, void *udata)
+{
+ struct qemu_plugin_hwaddr *hwaddr = qemu_plugin_get_hwaddr(meminfo, vaddr);
+ uint64_t page;
+ PageCounters *count;
+
+ /* We only get a hwaddr for system emulation */
+ if (track_io) {
+ if (hwaddr && qemu_plugin_hwaddr_is_io(hwaddr)) {
+ page = vaddr;
+ } else {
+ return;
+ }
+ } else {
+ if (hwaddr && !qemu_plugin_hwaddr_is_io(hwaddr)) {
+ page = (uint64_t) qemu_plugin_hwaddr_phys_addr(hwaddr);
+ } else {
+ page = vaddr;
+ }
+ }
+ page &= ~page_mask;
+
+ g_mutex_lock(&lock);
+ count = (PageCounters *) g_hash_table_lookup(pages, GUINT_TO_POINTER(page));
+
+ if (!count) {
+ count = g_new0(PageCounters, 1);
+ count->page_address = page;
+ g_hash_table_insert(pages, GUINT_TO_POINTER(page), (gpointer) count);
+ }
+ if (qemu_plugin_mem_is_store(meminfo)) {
+ count->writes++;
+ count->cpu_write |= (1 << cpu_index);
+ } else {
+ count->reads++;
+ count->cpu_read |= (1 << cpu_index);
+ }
+
+ g_mutex_unlock(&lock);
+}
+
+static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
+{
+ size_t n = qemu_plugin_tb_n_insns(tb);
+ size_t i;
+
+ for (i = 0; i < n; i++) {
+ struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i);
+ qemu_plugin_register_vcpu_mem_cb(insn, vcpu_haddr,
+ QEMU_PLUGIN_CB_NO_REGS,
+ rw, NULL);
+ }
+}
+
+QEMU_PLUGIN_EXPORT
+int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,
+ int argc, char **argv)
+{
+ int i;
+
+ for (i = 0; i < argc; i++) {
+ char *opt = argv[i];
+ g_autofree char **tokens = g_strsplit(opt, "=", -1);
+
+ if (g_strcmp0(tokens[0], "sortby") == 0) {
+ if (g_strcmp0(tokens[1], "reads") == 0) {
+ sort_by = SORT_R;
+ } else if (g_strcmp0(tokens[1], "writes") == 0) {
+ sort_by = SORT_W;
+ } else if (g_strcmp0(tokens[1], "address") == 0) {
+ sort_by = SORT_A;
+ } else {
+ fprintf(stderr, "invalid value to sortby: %s\n", tokens[1]);
+ return -1;
+ }
+ } else if (g_strcmp0(tokens[0], "io") == 0) {
+ if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &track_io)) {
+ fprintf(stderr, "boolean argument parsing failed: %s\n", opt);
+ return -1;
+ }
+ } else if (g_strcmp0(tokens[0], "pagesize") == 0) {
+ page_size = g_ascii_strtoull(tokens[1], NULL, 10);
+ } else {
+ fprintf(stderr, "option parsing failed: %s\n", opt);
+ return -1;
+ }
+ }
+
+ plugin_init();
+
+ qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
+ qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
+ return 0;
+}
diff --git a/contrib/plugins/howvec.c b/contrib/plugins/howvec.c
new file mode 100644
index 000000000..4a5ec3d93
--- /dev/null
+++ b/contrib/plugins/howvec.c
@@ -0,0 +1,372 @@
+/*
+ * Copyright (C) 2019, Alex Bennée <alex.bennee@linaro.org>
+ *
+ * How vectorised is this code?
+ *
+ * Attempt to measure the amount of vectorisation that has been done
+ * on some code by counting classes of instruction.
+ *
+ * License: GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#include <inttypes.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <glib.h>
+
+#include <qemu-plugin.h>
+
+QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+typedef enum {
+ COUNT_CLASS,
+ COUNT_INDIVIDUAL,
+ COUNT_NONE
+} CountType;
+
+static int limit = 50;
+static bool do_inline;
+static bool verbose;
+
+static GMutex lock;
+static GHashTable *insns;
+
+typedef struct {
+ const char *class;
+ const char *opt;
+ uint32_t mask;
+ uint32_t pattern;
+ CountType what;
+ uint64_t count;
+} InsnClassExecCount;
+
+typedef struct {
+ char *insn;
+ uint32_t opcode;
+ uint64_t count;
+ InsnClassExecCount *class;
+} InsnExecCount;
+
+/*
+ * Matchers for classes of instructions, order is important.
+ *
+ * Your most precise match must be before looser matches. If no match
+ * is found in the table we can create an individual entry.
+ *
+ * 31..28 27..24 23..20 19..16 15..12 11..8 7..4 3..0
+ */
+static InsnClassExecCount aarch64_insn_classes[] = {
+ /* "Reserved"" */
+ { " UDEF", "udef", 0xffff0000, 0x00000000, COUNT_NONE},
+ { " SVE", "sve", 0x1e000000, 0x04000000, COUNT_CLASS},
+ { "Reserved", "res", 0x1e000000, 0x00000000, COUNT_CLASS},
+ /* Data Processing Immediate */
+ { " PCrel addr", "pcrel", 0x1f000000, 0x10000000, COUNT_CLASS},
+ { " Add/Sub (imm,tags)", "asit", 0x1f800000, 0x11800000, COUNT_CLASS},
+ { " Add/Sub (imm)", "asi", 0x1f000000, 0x11000000, COUNT_CLASS},
+ { " Logical (imm)", "logi", 0x1f800000, 0x12000000, COUNT_CLASS},
+ { " Move Wide (imm)", "movwi", 0x1f800000, 0x12800000, COUNT_CLASS},
+ { " Bitfield", "bitf", 0x1f800000, 0x13000000, COUNT_CLASS},
+ { " Extract", "extr", 0x1f800000, 0x13800000, COUNT_CLASS},
+ { "Data Proc Imm", "dpri", 0x1c000000, 0x10000000, COUNT_CLASS},
+ /* Branches */
+ { " Cond Branch (imm)", "cndb", 0xfe000000, 0x54000000, COUNT_CLASS},
+ { " Exception Gen", "excp", 0xff000000, 0xd4000000, COUNT_CLASS},
+ { " NOP", "nop", 0xffffffff, 0xd503201f, COUNT_NONE},
+ { " Hints", "hint", 0xfffff000, 0xd5032000, COUNT_CLASS},
+ { " Barriers", "barr", 0xfffff000, 0xd5033000, COUNT_CLASS},
+ { " PSTATE", "psta", 0xfff8f000, 0xd5004000, COUNT_CLASS},
+ { " System Insn", "sins", 0xffd80000, 0xd5080000, COUNT_CLASS},
+ { " System Reg", "sreg", 0xffd00000, 0xd5100000, COUNT_CLASS},
+ { " Branch (reg)", "breg", 0xfe000000, 0xd6000000, COUNT_CLASS},
+ { " Branch (imm)", "bimm", 0x7c000000, 0x14000000, COUNT_CLASS},
+ { " Cmp & Branch", "cmpb", 0x7e000000, 0x34000000, COUNT_CLASS},
+ { " Tst & Branch", "tstb", 0x7e000000, 0x36000000, COUNT_CLASS},
+ { "Branches", "branch", 0x1c000000, 0x14000000, COUNT_CLASS},
+ /* Loads and Stores */
+ { " AdvSimd ldstmult", "advlsm", 0xbfbf0000, 0x0c000000, COUNT_CLASS},
+ { " AdvSimd ldstmult++", "advlsmp", 0xbfb00000, 0x0c800000, COUNT_CLASS},
+ { " AdvSimd ldst", "advlss", 0xbf9f0000, 0x0d000000, COUNT_CLASS},
+ { " AdvSimd ldst++", "advlssp", 0xbf800000, 0x0d800000, COUNT_CLASS},
+ { " ldst excl", "ldstx", 0x3f000000, 0x08000000, COUNT_CLASS},
+ { " Prefetch", "prfm", 0xff000000, 0xd8000000, COUNT_CLASS},
+ { " Load Reg (lit)", "ldlit", 0x1b000000, 0x18000000, COUNT_CLASS},
+ { " ldst noalloc pair", "ldstnap", 0x3b800000, 0x28000000, COUNT_CLASS},
+ { " ldst pair", "ldstp", 0x38000000, 0x28000000, COUNT_CLASS},
+ { " ldst reg", "ldstr", 0x3b200000, 0x38000000, COUNT_CLASS},
+ { " Atomic ldst", "atomic", 0x3b200c00, 0x38200000, COUNT_CLASS},
+ { " ldst reg (reg off)", "ldstro", 0x3b200b00, 0x38200800, COUNT_CLASS},
+ { " ldst reg (pac)", "ldstpa", 0x3b200200, 0x38200800, COUNT_CLASS},
+ { " ldst reg (imm)", "ldsti", 0x3b000000, 0x39000000, COUNT_CLASS},
+ { "Loads & Stores", "ldst", 0x0a000000, 0x08000000, COUNT_CLASS},
+ /* Data Processing Register */
+ { "Data Proc Reg", "dprr", 0x0e000000, 0x0a000000, COUNT_CLASS},
+ /* Scalar FP */
+ { "Scalar FP ", "fpsimd", 0x0e000000, 0x0e000000, COUNT_CLASS},
+ /* Unclassified */
+ { "Unclassified", "unclas", 0x00000000, 0x00000000, COUNT_CLASS},
+};
+
+static InsnClassExecCount sparc32_insn_classes[] = {
+ { "Call", "call", 0xc0000000, 0x40000000, COUNT_CLASS},
+ { "Branch ICond", "bcc", 0xc1c00000, 0x00800000, COUNT_CLASS},
+ { "Branch Fcond", "fbcc", 0xc1c00000, 0x01800000, COUNT_CLASS},
+ { "SetHi", "sethi", 0xc1c00000, 0x01000000, COUNT_CLASS},
+ { "FPU ALU", "fpu", 0xc1f00000, 0x81a00000, COUNT_CLASS},
+ { "ALU", "alu", 0xc0000000, 0x80000000, COUNT_CLASS},
+ { "Load/Store", "ldst", 0xc0000000, 0xc0000000, COUNT_CLASS},
+ /* Unclassified */
+ { "Unclassified", "unclas", 0x00000000, 0x00000000, COUNT_INDIVIDUAL},
+};
+
+static InsnClassExecCount sparc64_insn_classes[] = {
+ { "SetHi & Branches", "op0", 0xc0000000, 0x00000000, COUNT_CLASS},
+ { "Call", "op1", 0xc0000000, 0x40000000, COUNT_CLASS},
+ { "Arith/Logical/Move", "op2", 0xc0000000, 0x80000000, COUNT_CLASS},
+ { "Arith/Logical/Move", "op3", 0xc0000000, 0xc0000000, COUNT_CLASS},
+ /* Unclassified */
+ { "Unclassified", "unclas", 0x00000000, 0x00000000, COUNT_INDIVIDUAL},
+};
+
+/* Default matcher for currently unclassified architectures */
+static InsnClassExecCount default_insn_classes[] = {
+ { "Unclassified", "unclas", 0x00000000, 0x00000000, COUNT_INDIVIDUAL},
+};
+
+typedef struct {
+ const char *qemu_target;
+ InsnClassExecCount *table;
+ int table_sz;
+} ClassSelector;
+
+static ClassSelector class_tables[] = {
+ { "aarch64", aarch64_insn_classes, ARRAY_SIZE(aarch64_insn_classes) },
+ { "sparc", sparc32_insn_classes, ARRAY_SIZE(sparc32_insn_classes) },
+ { "sparc64", sparc64_insn_classes, ARRAY_SIZE(sparc64_insn_classes) },
+ { NULL, default_insn_classes, ARRAY_SIZE(default_insn_classes) },
+};
+
+static InsnClassExecCount *class_table;
+static int class_table_sz;
+
+static gint cmp_exec_count(gconstpointer a, gconstpointer b)
+{
+ InsnExecCount *ea = (InsnExecCount *) a;
+ InsnExecCount *eb = (InsnExecCount *) b;
+ return ea->count > eb->count ? -1 : 1;
+}
+
+static void free_record(gpointer data)
+{
+ InsnExecCount *rec = (InsnExecCount *) data;
+ g_free(rec->insn);
+ g_free(rec);
+}
+
+static void plugin_exit(qemu_plugin_id_t id, void *p)
+{
+ g_autoptr(GString) report = g_string_new("Instruction Classes:\n");
+ int i;
+ GList *counts;
+ InsnClassExecCount *class = NULL;
+
+ for (i = 0; i < class_table_sz; i++) {
+ class = &class_table[i];
+ switch (class->what) {
+ case COUNT_CLASS:
+ if (class->count || verbose) {
+ g_string_append_printf(report, "Class: %-24s\t(%ld hits)\n",
+ class->class,
+ class->count);
+ }
+ break;
+ case COUNT_INDIVIDUAL:
+ g_string_append_printf(report, "Class: %-24s\tcounted individually\n",
+ class->class);
+ break;
+ case COUNT_NONE:
+ g_string_append_printf(report, "Class: %-24s\tnot counted\n",
+ class->class);
+ break;
+ default:
+ break;
+ }
+ }
+
+ counts = g_hash_table_get_values(insns);
+ if (counts && g_list_next(counts)) {
+ g_string_append_printf(report, "Individual Instructions:\n");
+ counts = g_list_sort(counts, cmp_exec_count);
+
+ for (i = 0; i < limit && g_list_next(counts);
+ i++, counts = g_list_next(counts)) {
+ InsnExecCount *rec = (InsnExecCount *) counts->data;
+ g_string_append_printf(report,
+ "Instr: %-24s\t(%ld hits)\t(op=0x%08x/%s)\n",
+ rec->insn,
+ rec->count,
+ rec->opcode,
+ rec->class ?
+ rec->class->class : "un-categorised");
+ }
+ g_list_free(counts);
+ }
+
+ g_hash_table_destroy(insns);
+
+ qemu_plugin_outs(report->str);
+}
+
+static void plugin_init(void)
+{
+ insns = g_hash_table_new_full(NULL, g_direct_equal, NULL, &free_record);
+}
+
+static void vcpu_insn_exec_before(unsigned int cpu_index, void *udata)
+{
+ uint64_t *count = (uint64_t *) udata;
+ (*count)++;
+}
+
+static uint64_t *find_counter(struct qemu_plugin_insn *insn)
+{
+ int i;
+ uint64_t *cnt = NULL;
+ uint32_t opcode;
+ InsnClassExecCount *class = NULL;
+
+ /*
+ * We only match the first 32 bits of the instruction which is
+ * fine for most RISCs but a bit limiting for CISC architectures.
+ * They would probably benefit from a more tailored plugin.
+ * However we can fall back to individual instruction counting.
+ */
+ opcode = *((uint32_t *)qemu_plugin_insn_data(insn));
+
+ for (i = 0; !cnt && i < class_table_sz; i++) {
+ class = &class_table[i];
+ uint32_t masked_bits = opcode & class->mask;
+ if (masked_bits == class->pattern) {
+ break;
+ }
+ }
+
+ g_assert(class);
+
+ switch (class->what) {
+ case COUNT_NONE:
+ return NULL;
+ case COUNT_CLASS:
+ return &class->count;
+ case COUNT_INDIVIDUAL:
+ {
+ InsnExecCount *icount;
+
+ g_mutex_lock(&lock);
+ icount = (InsnExecCount *) g_hash_table_lookup(insns,
+ GUINT_TO_POINTER(opcode));
+
+ if (!icount) {
+ icount = g_new0(InsnExecCount, 1);
+ icount->opcode = opcode;
+ icount->insn = qemu_plugin_insn_disas(insn);
+ icount->class = class;
+
+ g_hash_table_insert(insns, GUINT_TO_POINTER(opcode),
+ (gpointer) icount);
+ }
+ g_mutex_unlock(&lock);
+
+ return &icount->count;
+ }
+ default:
+ g_assert_not_reached();
+ }
+
+ return NULL;
+}
+
+static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
+{
+ size_t n = qemu_plugin_tb_n_insns(tb);
+ size_t i;
+
+ for (i = 0; i < n; i++) {
+ uint64_t *cnt;
+ struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i);
+ cnt = find_counter(insn);
+
+ if (cnt) {
+ if (do_inline) {
+ qemu_plugin_register_vcpu_insn_exec_inline(
+ insn, QEMU_PLUGIN_INLINE_ADD_U64, cnt, 1);
+ } else {
+ qemu_plugin_register_vcpu_insn_exec_cb(
+ insn, vcpu_insn_exec_before, QEMU_PLUGIN_CB_NO_REGS, cnt);
+ }
+ }
+ }
+}
+
+QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
+ const qemu_info_t *info,
+ int argc, char **argv)
+{
+ int i;
+
+ /* Select a class table appropriate to the guest architecture */
+ for (i = 0; i < ARRAY_SIZE(class_tables); i++) {
+ ClassSelector *entry = &class_tables[i];
+ if (!entry->qemu_target ||
+ strcmp(entry->qemu_target, info->target_name) == 0) {
+ class_table = entry->table;
+ class_table_sz = entry->table_sz;
+ break;
+ }
+ }
+
+ for (i = 0; i < argc; i++) {
+ char *p = argv[i];
+ g_autofree char **tokens = g_strsplit(p, "=", -1);
+ if (g_strcmp0(tokens[0], "inline") == 0) {
+ if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &do_inline)) {
+ fprintf(stderr, "boolean argument parsing failed: %s\n", p);
+ return -1;
+ }
+ } else if (g_strcmp0(tokens[0], "verbose") == 0) {
+ if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &verbose)) {
+ fprintf(stderr, "boolean argument parsing failed: %s\n", p);
+ return -1;
+ }
+ } else if (g_strcmp0(tokens[0], "count") == 0) {
+ char *value = tokens[1];
+ int j;
+ CountType type = COUNT_INDIVIDUAL;
+ if (*value == '!') {
+ type = COUNT_NONE;
+ value++;
+ }
+ for (j = 0; j < class_table_sz; j++) {
+ if (strcmp(value, class_table[j].opt) == 0) {
+ class_table[j].what = type;
+ break;
+ }
+ }
+ } else {
+ fprintf(stderr, "option parsing failed: %s\n", p);
+ return -1;
+ }
+ }
+
+ plugin_init();
+
+ qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
+ qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
+ return 0;
+}
diff --git a/contrib/plugins/hwprofile.c b/contrib/plugins/hwprofile.c
new file mode 100644
index 000000000..691d4edb0
--- /dev/null
+++ b/contrib/plugins/hwprofile.c
@@ -0,0 +1,320 @@
+/*
+ * Copyright (C) 2020, Alex Bennée <alex.bennee@linaro.org>
+ *
+ * HW Profile - breakdown access patterns for IO to devices
+ *
+ * License: GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include <inttypes.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <glib.h>
+
+#include <qemu-plugin.h>
+
+QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+typedef struct {
+ uint64_t cpu_read;
+ uint64_t cpu_write;
+ uint64_t reads;
+ uint64_t writes;
+} IOCounts;
+
+typedef struct {
+ uint64_t off_or_pc;
+ IOCounts counts;
+} IOLocationCounts;
+
+typedef struct {
+ const char *name;
+ uint64_t base;
+ IOCounts totals;
+ GHashTable *detail;
+} DeviceCounts;
+
+static GMutex lock;
+static GHashTable *devices;
+
+/* track the access pattern to a piece of HW */
+static bool pattern;
+/* track the source address of access to HW */
+static bool source;
+/* track only matched regions of HW */
+static bool check_match;
+static gchar **matches;
+
+static enum qemu_plugin_mem_rw rw = QEMU_PLUGIN_MEM_RW;
+
+static inline bool track_reads(void)
+{
+ return rw == QEMU_PLUGIN_MEM_RW || rw == QEMU_PLUGIN_MEM_R;
+}
+
+static inline bool track_writes(void)
+{
+ return rw == QEMU_PLUGIN_MEM_RW || rw == QEMU_PLUGIN_MEM_W;
+}
+
+static void plugin_init(void)
+{
+ devices = g_hash_table_new(NULL, NULL);
+}
+
+static gint sort_cmp(gconstpointer a, gconstpointer b)
+{
+ DeviceCounts *ea = (DeviceCounts *) a;
+ DeviceCounts *eb = (DeviceCounts *) b;
+ return ea->totals.reads + ea->totals.writes >
+ eb->totals.reads + eb->totals.writes ? -1 : 1;
+}
+
+static gint sort_loc(gconstpointer a, gconstpointer b)
+{
+ IOLocationCounts *ea = (IOLocationCounts *) a;
+ IOLocationCounts *eb = (IOLocationCounts *) b;
+ return ea->off_or_pc > eb->off_or_pc;
+}
+
+static void fmt_iocount_record(GString *s, IOCounts *rec)
+{
+ if (track_reads()) {
+ g_string_append_printf(s, ", %"PRIx64", %"PRId64,
+ rec->cpu_read, rec->reads);
+ }
+ if (track_writes()) {
+ g_string_append_printf(s, ", %"PRIx64", %"PRId64,
+ rec->cpu_write, rec->writes);
+ }
+}
+
+static void fmt_dev_record(GString *s, DeviceCounts *rec)
+{
+ g_string_append_printf(s, "%s, 0x%"PRIx64,
+ rec->name, rec->base);
+ fmt_iocount_record(s, &rec->totals);
+ g_string_append_c(s, '\n');
+}
+
+static void plugin_exit(qemu_plugin_id_t id, void *p)
+{
+ g_autoptr(GString) report = g_string_new("");
+ GList *counts;
+
+ if (!(pattern || source)) {
+ g_string_printf(report, "Device, Address");
+ if (track_reads()) {
+ g_string_append_printf(report, ", RCPUs, Reads");
+ }
+ if (track_writes()) {
+ g_string_append_printf(report, ", WCPUs, Writes");
+ }
+ g_string_append_c(report, '\n');
+ }
+
+ counts = g_hash_table_get_values(devices);
+ if (counts && g_list_next(counts)) {
+ GList *it;
+
+ it = g_list_sort(counts, sort_cmp);
+
+ while (it) {
+ DeviceCounts *rec = (DeviceCounts *) it->data;
+ if (rec->detail) {
+ GList *accesses = g_hash_table_get_values(rec->detail);
+ GList *io_it = g_list_sort(accesses, sort_loc);
+ const char *prefix = pattern ? "off" : "pc";
+ g_string_append_printf(report, "%s @ 0x%"PRIx64"\n",
+ rec->name, rec->base);
+ while (io_it) {
+ IOLocationCounts *loc = (IOLocationCounts *) io_it->data;
+ g_string_append_printf(report, " %s:%08"PRIx64,
+ prefix, loc->off_or_pc);
+ fmt_iocount_record(report, &loc->counts);
+ g_string_append_c(report, '\n');
+ io_it = io_it->next;
+ }
+ } else {
+ fmt_dev_record(report, rec);
+ }
+ it = it->next;
+ };
+ g_list_free(it);
+ }
+
+ qemu_plugin_outs(report->str);
+}
+
+static DeviceCounts *new_count(const char *name, uint64_t base)
+{
+ DeviceCounts *count = g_new0(DeviceCounts, 1);
+ count->name = name;
+ count->base = base;
+ if (pattern || source) {
+ count->detail = g_hash_table_new(NULL, NULL);
+ }
+ g_hash_table_insert(devices, (gpointer) name, count);
+ return count;
+}
+
+static IOLocationCounts *new_location(GHashTable *table, uint64_t off_or_pc)
+{
+ IOLocationCounts *loc = g_new0(IOLocationCounts, 1);
+ loc->off_or_pc = off_or_pc;
+ g_hash_table_insert(table, (gpointer) off_or_pc, loc);
+ return loc;
+}
+
+static void hwprofile_match_hit(DeviceCounts *rec, uint64_t off)
+{
+ g_autoptr(GString) report = g_string_new("hwprofile: match @ offset");
+ g_string_append_printf(report, "%"PRIx64", previous hits\n", off);
+ fmt_dev_record(report, rec);
+ qemu_plugin_outs(report->str);
+}
+
+static void inc_count(IOCounts *count, bool is_write, unsigned int cpu_index)
+{
+ if (is_write) {
+ count->writes++;
+ count->cpu_write |= (1 << cpu_index);
+ } else {
+ count->reads++;
+ count->cpu_read |= (1 << cpu_index);
+ }
+}
+
+static void vcpu_haddr(unsigned int cpu_index, qemu_plugin_meminfo_t meminfo,
+ uint64_t vaddr, void *udata)
+{
+ struct qemu_plugin_hwaddr *hwaddr = qemu_plugin_get_hwaddr(meminfo, vaddr);
+
+ if (!hwaddr || !qemu_plugin_hwaddr_is_io(hwaddr)) {
+ return;
+ } else {
+ const char *name = qemu_plugin_hwaddr_device_name(hwaddr);
+ uint64_t off = qemu_plugin_hwaddr_phys_addr(hwaddr);
+ bool is_write = qemu_plugin_mem_is_store(meminfo);
+ DeviceCounts *counts;
+
+ g_mutex_lock(&lock);
+ counts = (DeviceCounts *) g_hash_table_lookup(devices, name);
+
+ if (!counts) {
+ uint64_t base = vaddr - off;
+ counts = new_count(name, base);
+ }
+
+ if (check_match) {
+ if (g_strv_contains((const char * const *)matches, counts->name)) {
+ hwprofile_match_hit(counts, off);
+ inc_count(&counts->totals, is_write, cpu_index);
+ }
+ } else {
+ inc_count(&counts->totals, is_write, cpu_index);
+ }
+
+ /* either track offsets or source of access */
+ if (source) {
+ off = (uint64_t) udata;
+ }
+
+ if (pattern || source) {
+ IOLocationCounts *io_count = g_hash_table_lookup(counts->detail,
+ (gpointer) off);
+ if (!io_count) {
+ io_count = new_location(counts->detail, off);
+ }
+ inc_count(&io_count->counts, is_write, cpu_index);
+ }
+
+ g_mutex_unlock(&lock);
+ }
+}
+
+static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
+{
+ size_t n = qemu_plugin_tb_n_insns(tb);
+ size_t i;
+
+ for (i = 0; i < n; i++) {
+ struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i);
+ gpointer udata = (gpointer) (source ? qemu_plugin_insn_vaddr(insn) : 0);
+ qemu_plugin_register_vcpu_mem_cb(insn, vcpu_haddr,
+ QEMU_PLUGIN_CB_NO_REGS,
+ rw, udata);
+ }
+}
+
+QEMU_PLUGIN_EXPORT
+int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,
+ int argc, char **argv)
+{
+ int i;
+ g_autoptr(GString) matches_raw = g_string_new("");
+
+ for (i = 0; i < argc; i++) {
+ char *opt = argv[i];
+ g_autofree char **tokens = g_strsplit(opt, "=", 2);
+
+ if (g_strcmp0(tokens[0], "track") == 0) {
+ if (g_strcmp0(tokens[1], "read") == 0) {
+ rw = QEMU_PLUGIN_MEM_R;
+ } else if (g_strcmp0(tokens[1], "write") == 0) {
+ rw = QEMU_PLUGIN_MEM_W;
+ } else {
+ fprintf(stderr, "invalid value for track: %s\n", tokens[1]);
+ return -1;
+ }
+ } else if (g_strcmp0(tokens[0], "pattern") == 0) {
+ if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &pattern)) {
+ fprintf(stderr, "boolean argument parsing failed: %s\n", opt);
+ return -1;
+ }
+ } else if (g_strcmp0(tokens[0], "source") == 0) {
+ if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &source)) {
+ fprintf(stderr, "boolean argument parsing failed: %s\n", opt);
+ return -1;
+ }
+ } else if (g_strcmp0(tokens[0], "match") == 0) {
+ check_match = true;
+ g_string_append_printf(matches_raw, "%s,", tokens[1]);
+ } else {
+ fprintf(stderr, "option parsing failed: %s\n", opt);
+ return -1;
+ }
+ }
+ if (check_match) {
+ matches = g_strsplit(matches_raw->str, ",", -1);
+ }
+
+ if (source && pattern) {
+ fprintf(stderr, "can only currently track either source or pattern.\n");
+ return -1;
+ }
+
+ if (!info->system_emulation) {
+ fprintf(stderr, "hwprofile: plugin only useful for system emulation\n");
+ return -1;
+ }
+
+ /* Just warn about overflow */
+ if (info->system.smp_vcpus > 64 ||
+ info->system.max_vcpus > 64) {
+ fprintf(stderr, "hwprofile: can only track up to 64 CPUs\n");
+ }
+
+ plugin_init();
+
+ qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
+ qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
+ return 0;
+}
diff --git a/contrib/plugins/lockstep.c b/contrib/plugins/lockstep.c
new file mode 100644
index 000000000..a41ffe83f
--- /dev/null
+++ b/contrib/plugins/lockstep.c
@@ -0,0 +1,356 @@
+/*
+ * Lockstep Execution Plugin
+ *
+ * Allows you to execute two QEMU instances in lockstep and report
+ * when their execution diverges. This is mainly useful for developers
+ * who want to see where a change to TCG code generation has
+ * introduced a subtle and hard to find bug.
+ *
+ * Caveats:
+ * - single-threaded linux-user apps only with non-deterministic syscalls
+ * - no MTTCG enabled system emulation (icount may help)
+ *
+ * While icount makes things more deterministic it doesn't mean a
+ * particular run may execute the exact same sequence of blocks. An
+ * asynchronous event (for example X11 graphics update) may cause a
+ * block to end early and a new partial block to start. This means
+ * serial only test cases are a better bet. -d nochain may also help.
+ *
+ * This code is not thread safe!
+ *
+ * Copyright (c) 2020 Linaro Ltd
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include <glib.h>
+#include <inttypes.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <stdio.h>
+#include <errno.h>
+
+#include <qemu-plugin.h>
+
+QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
+
+/* saved so we can uninstall later */
+static qemu_plugin_id_t our_id;
+
+static unsigned long bb_count;
+static unsigned long insn_count;
+
+/* Information about a translated block */
+typedef struct {
+ uint64_t pc;
+ uint64_t insns;
+} BlockInfo;
+
+/* Information about an execution state in the log */
+typedef struct {
+ BlockInfo *block;
+ unsigned long insn_count;
+ unsigned long block_count;
+} ExecInfo;
+
+/* The execution state we compare */
+typedef struct {
+ uint64_t pc;
+ unsigned long insn_count;
+} ExecState;
+
+typedef struct {
+ GSList *log_pos;
+ int distance;
+} DivergeState;
+
+/* list of translated block info */
+static GSList *blocks;
+
+/* execution log and points of divergence */
+static GSList *log, *divergence_log;
+
+static int socket_fd;
+static char *path_to_unlink;
+
+static bool verbose;
+
+static void plugin_cleanup(qemu_plugin_id_t id)
+{
+ /* Free our block data */
+ g_slist_free_full(blocks, &g_free);
+ g_slist_free_full(log, &g_free);
+ g_slist_free(divergence_log);
+
+ close(socket_fd);
+ if (path_to_unlink) {
+ unlink(path_to_unlink);
+ }
+}
+
+static void plugin_exit(qemu_plugin_id_t id, void *p)
+{
+ g_autoptr(GString) out = g_string_new("No divergence :-)\n");
+ g_string_append_printf(out, "Executed %ld/%d blocks\n",
+ bb_count, g_slist_length(log));
+ g_string_append_printf(out, "Executed ~%ld instructions\n", insn_count);
+ qemu_plugin_outs(out->str);
+
+ plugin_cleanup(id);
+}
+
+static void report_divergance(ExecState *us, ExecState *them)
+{
+ DivergeState divrec = { log, 0 };
+ g_autoptr(GString) out = g_string_new("");
+ bool diverged = false;
+
+ /*
+ * If we have diverged before did we get back on track or are we
+ * totally loosing it?
+ */
+ if (divergence_log) {
+ DivergeState *last = (DivergeState *) divergence_log->data;
+ GSList *entry;
+
+ for (entry = log; g_slist_next(entry); entry = g_slist_next(entry)) {
+ if (entry == last->log_pos) {
+ break;
+ }
+ divrec.distance++;
+ }
+
+ /*
+ * If the last two records are so close it is likely we will
+ * not recover synchronisation with the other end.
+ */
+ if (divrec.distance == 1 && last->distance == 1) {
+ diverged = true;
+ }
+ }
+ divergence_log = g_slist_prepend(divergence_log,
+ g_memdup(&divrec, sizeof(divrec)));
+
+ /* Output short log entry of going out of sync... */
+ if (verbose || divrec.distance == 1 || diverged) {
+ g_string_printf(out, "@ 0x%016lx vs 0x%016lx (%d/%d since last)\n",
+ us->pc, them->pc, g_slist_length(divergence_log),
+ divrec.distance);
+ qemu_plugin_outs(out->str);
+ }
+
+ if (diverged) {
+ int i;
+ GSList *entry;
+
+ g_string_printf(out, "Δ insn_count @ 0x%016lx (%ld) vs 0x%016lx (%ld)\n",
+ us->pc, us->insn_count, them->pc, them->insn_count);
+
+ for (entry = log, i = 0;
+ g_slist_next(entry) && i < 5;
+ entry = g_slist_next(entry), i++) {
+ ExecInfo *prev = (ExecInfo *) entry->data;
+ g_string_append_printf(out,
+ " previously @ 0x%016lx/%ld (%ld insns)\n",
+ prev->block->pc, prev->block->insns,
+ prev->insn_count);
+ }
+ qemu_plugin_outs(out->str);
+ qemu_plugin_outs("too much divergence... giving up.");
+ qemu_plugin_uninstall(our_id, plugin_cleanup);
+ }
+}
+
+static void vcpu_tb_exec(unsigned int cpu_index, void *udata)
+{
+ BlockInfo *bi = (BlockInfo *) udata;
+ ExecState us, them;
+ ssize_t bytes;
+ ExecInfo *exec;
+
+ us.pc = bi->pc;
+ us.insn_count = insn_count;
+
+ /*
+ * Write our current position to the other end. If we fail the
+ * other end has probably died and we should shut down gracefully.
+ */
+ bytes = write(socket_fd, &us, sizeof(ExecState));
+ if (bytes < sizeof(ExecState)) {
+ qemu_plugin_outs(bytes < 0 ?
+ "problem writing to socket" :
+ "wrote less than expected to socket");
+ qemu_plugin_uninstall(our_id, plugin_cleanup);
+ return;
+ }
+
+ /*
+ * Now read where our peer has reached. Again a failure probably
+ * indicates the other end died and we should close down cleanly.
+ */
+ bytes = read(socket_fd, &them, sizeof(ExecState));
+ if (bytes < sizeof(ExecState)) {
+ qemu_plugin_outs(bytes < 0 ?
+ "problem reading from socket" :
+ "read less than expected");
+ qemu_plugin_uninstall(our_id, plugin_cleanup);
+ return;
+ }
+
+ /*
+ * Compare and report if we have diverged.
+ */
+ if (us.pc != them.pc) {
+ report_divergance(&us, &them);
+ }
+
+ /*
+ * Assume this block will execute fully and record it
+ * in the execution log.
+ */
+ insn_count += bi->insns;
+ bb_count++;
+ exec = g_new0(ExecInfo, 1);
+ exec->block = bi;
+ exec->insn_count = insn_count;
+ exec->block_count = bb_count;
+ log = g_slist_prepend(log, exec);
+}
+
+static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
+{
+ BlockInfo *bi = g_new0(BlockInfo, 1);
+ bi->pc = qemu_plugin_tb_vaddr(tb);
+ bi->insns = qemu_plugin_tb_n_insns(tb);
+
+ /* save a reference so we can free later */
+ blocks = g_slist_prepend(blocks, bi);
+ qemu_plugin_register_vcpu_tb_exec_cb(tb, vcpu_tb_exec,
+ QEMU_PLUGIN_CB_NO_REGS, (void *)bi);
+}
+
+
+/*
+ * Instead of encoding master/slave status into what is essentially
+ * two peers we shall just take the simple approach of checking for
+ * the existence of the pipe and assuming if it's not there we are the
+ * first process.
+ */
+static bool setup_socket(const char *path)
+{
+ struct sockaddr_un sockaddr;
+ int fd;
+
+ fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (fd < 0) {
+ perror("create socket");
+ return false;
+ }
+
+ sockaddr.sun_family = AF_UNIX;
+ g_strlcpy(sockaddr.sun_path, path, sizeof(sockaddr.sun_path) - 1);
+ if (bind(fd, (struct sockaddr *)&sockaddr, sizeof(sockaddr)) < 0) {
+ perror("bind socket");
+ close(fd);
+ return false;
+ }
+
+ /* remember to clean-up */
+ path_to_unlink = g_strdup(path);
+
+ if (listen(fd, 1) < 0) {
+ perror("listen socket");
+ close(fd);
+ return false;
+ }
+
+ socket_fd = accept(fd, NULL, NULL);
+ if (socket_fd < 0 && errno != EINTR) {
+ perror("accept socket");
+ close(fd);
+ return false;
+ }
+
+ qemu_plugin_outs("setup_socket::ready\n");
+
+ close(fd);
+ return true;
+}
+
+static bool connect_socket(const char *path)
+{
+ int fd;
+ struct sockaddr_un sockaddr;
+
+ fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (fd < 0) {
+ perror("create socket");
+ return false;
+ }
+
+ sockaddr.sun_family = AF_UNIX;
+ g_strlcpy(sockaddr.sun_path, path, sizeof(sockaddr.sun_path) - 1);
+
+ if (connect(fd, (struct sockaddr *)&sockaddr, sizeof(sockaddr)) < 0) {
+ perror("failed to connect");
+ close(fd);
+ return false;
+ }
+
+ qemu_plugin_outs("connect_socket::ready\n");
+
+ socket_fd = fd;
+ return true;
+}
+
+static bool setup_unix_socket(const char *path)
+{
+ if (g_file_test(path, G_FILE_TEST_EXISTS)) {
+ return connect_socket(path);
+ } else {
+ return setup_socket(path);
+ }
+}
+
+
+QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
+ const qemu_info_t *info,
+ int argc, char **argv)
+{
+ int i;
+ g_autofree char *sock_path = NULL;
+
+ for (i = 0; i < argc; i++) {
+ char *p = argv[i];
+ g_autofree char **tokens = g_strsplit(p, "=", 2);
+
+ if (g_strcmp0(tokens[0], "verbose") == 0) {
+ if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &verbose)) {
+ fprintf(stderr, "boolean argument parsing failed: %s\n", p);
+ return -1;
+ }
+ } else if (g_strcmp0(tokens[0], "sockpath") == 0) {
+ sock_path = tokens[1];
+ } else {
+ fprintf(stderr, "option parsing failed: %s\n", p);
+ return -1;
+ }
+ }
+
+ if (sock_path == NULL) {
+ fprintf(stderr, "Need a socket path to talk to other instance.\n");
+ return -1;
+ }
+
+ if (!setup_unix_socket(sock_path)) {
+ fprintf(stderr, "Failed to setup socket for communications.\n");
+ return -1;
+ }
+
+ our_id = id;
+
+ qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
+ qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
+ return 0;
+}
diff --git a/contrib/rdmacm-mux/main.c b/contrib/rdmacm-mux/main.c
new file mode 100644
index 000000000..771ca01e0
--- /dev/null
+++ b/contrib/rdmacm-mux/main.c
@@ -0,0 +1,831 @@
+/*
+ * QEMU paravirtual RDMA - rdmacm-mux implementation
+ *
+ * Copyright (C) 2018 Oracle
+ * Copyright (C) 2018 Red Hat Inc
+ *
+ * Authors:
+ * Yuval Shaia <yuval.shaia@oracle.com>
+ * Marcel Apfelbaum <marcel@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include <sys/poll.h>
+#include <sys/ioctl.h>
+#include <pthread.h>
+#include <syslog.h>
+
+#include <infiniband/verbs.h>
+#include <infiniband/umad.h>
+#include <infiniband/umad_types.h>
+#include <infiniband/umad_sa.h>
+#include <infiniband/umad_cm.h>
+
+#include "rdmacm-mux.h"
+
+#define SCALE_US 1000
+#define COMMID_TTL 2 /* How many SCALE_US a context of MAD session is saved */
+#define SLEEP_SECS 5 /* This is used both in poll() and thread */
+#define SERVER_LISTEN_BACKLOG 10
+#define MAX_CLIENTS 4096
+#define MAD_RMPP_VERSION 0
+#define MAD_METHOD_MASK0 0x8
+
+#define IB_USER_MAD_LONGS_PER_METHOD_MASK (128 / (8 * sizeof(long)))
+
+#define CM_REQ_DGID_POS 80
+#define CM_SIDR_REQ_DGID_POS 44
+
+/* The below can be override by command line parameter */
+#define UNIX_SOCKET_PATH "/var/run/rdmacm-mux"
+/* Has format %s-%s-%d" <path>-<rdma-dev--name>-<port> */
+#define SOCKET_PATH_MAX (PATH_MAX - NAME_MAX - sizeof(int) - 2)
+#define RDMA_PORT_NUM 1
+
+typedef struct RdmaCmServerArgs {
+ char unix_socket_path[PATH_MAX];
+ char rdma_dev_name[NAME_MAX];
+ int rdma_port_num;
+} RdmaCMServerArgs;
+
+typedef struct CommId2FdEntry {
+ int fd;
+ int ttl; /* Initialized to 2, decrement each timeout, entry delete when 0 */
+ __be64 gid_ifid;
+} CommId2FdEntry;
+
+typedef struct RdmaCmUMadAgent {
+ int port_id;
+ int agent_id;
+ GHashTable *gid2fd; /* Used to find fd of a given gid */
+ GHashTable *commid2fd; /* Used to find fd on of a given comm_id */
+} RdmaCmUMadAgent;
+
+typedef struct RdmaCmServer {
+ bool run;
+ RdmaCMServerArgs args;
+ struct pollfd fds[MAX_CLIENTS];
+ int nfds;
+ RdmaCmUMadAgent umad_agent;
+ pthread_t umad_recv_thread;
+ pthread_rwlock_t lock;
+} RdmaCMServer;
+
+static RdmaCMServer server = {0};
+
+static void usage(const char *progname)
+{
+ printf("Usage: %s [OPTION]...\n"
+ "Start a RDMA-CM multiplexer\n"
+ "\n"
+ "\t-h Show this help\n"
+ "\t-d rdma-device-name Name of RDMA device to register with\n"
+ "\t-s unix-socket-path Path to unix socket to listen on (default %s)\n"
+ "\t-p rdma-device-port Port number of RDMA device to register with (default %d)\n",
+ progname, UNIX_SOCKET_PATH, RDMA_PORT_NUM);
+}
+
+static void help(const char *progname)
+{
+ fprintf(stderr, "Try '%s -h' for more information.\n", progname);
+}
+
+static void parse_args(int argc, char *argv[])
+{
+ int c;
+ char unix_socket_path[SOCKET_PATH_MAX];
+
+ strcpy(server.args.rdma_dev_name, "");
+ strcpy(unix_socket_path, UNIX_SOCKET_PATH);
+ server.args.rdma_port_num = RDMA_PORT_NUM;
+
+ while ((c = getopt(argc, argv, "hs:d:p:")) != -1) {
+ switch (c) {
+ case 'h':
+ usage(argv[0]);
+ exit(0);
+
+ case 'd':
+ strncpy(server.args.rdma_dev_name, optarg, NAME_MAX - 1);
+ break;
+
+ case 's':
+ /* This is temporary, final name will build below */
+ strncpy(unix_socket_path, optarg, SOCKET_PATH_MAX - 1);
+ break;
+
+ case 'p':
+ server.args.rdma_port_num = atoi(optarg);
+ break;
+
+ default:
+ help(argv[0]);
+ exit(1);
+ }
+ }
+
+ if (!strcmp(server.args.rdma_dev_name, "")) {
+ fprintf(stderr, "Missing RDMA device name\n");
+ help(argv[0]);
+ exit(1);
+ }
+
+ /* Build unique unix-socket file name */
+ snprintf(server.args.unix_socket_path, PATH_MAX, "%s-%s-%d",
+ unix_socket_path, server.args.rdma_dev_name,
+ server.args.rdma_port_num);
+
+ syslog(LOG_INFO, "unix_socket_path=%s", server.args.unix_socket_path);
+ syslog(LOG_INFO, "rdma-device-name=%s", server.args.rdma_dev_name);
+ syslog(LOG_INFO, "rdma-device-port=%d", server.args.rdma_port_num);
+}
+
+static void hash_tbl_alloc(void)
+{
+
+ server.umad_agent.gid2fd = g_hash_table_new_full(g_int64_hash,
+ g_int64_equal,
+ g_free, g_free);
+ server.umad_agent.commid2fd = g_hash_table_new_full(g_int_hash,
+ g_int_equal,
+ g_free, g_free);
+}
+
+static void hash_tbl_free(void)
+{
+ if (server.umad_agent.commid2fd) {
+ g_hash_table_destroy(server.umad_agent.commid2fd);
+ }
+ if (server.umad_agent.gid2fd) {
+ g_hash_table_destroy(server.umad_agent.gid2fd);
+ }
+}
+
+
+static int _hash_tbl_search_fd_by_ifid(__be64 *gid_ifid)
+{
+ int *fd;
+
+ fd = g_hash_table_lookup(server.umad_agent.gid2fd, gid_ifid);
+ if (!fd) {
+ /* Let's try IPv4 */
+ *gid_ifid |= 0x00000000ffff0000;
+ fd = g_hash_table_lookup(server.umad_agent.gid2fd, gid_ifid);
+ }
+
+ return fd ? *fd : 0;
+}
+
+static int hash_tbl_search_fd_by_ifid(int *fd, __be64 *gid_ifid)
+{
+ pthread_rwlock_rdlock(&server.lock);
+ *fd = _hash_tbl_search_fd_by_ifid(gid_ifid);
+ pthread_rwlock_unlock(&server.lock);
+
+ if (!*fd) {
+ syslog(LOG_WARNING, "Can't find matching for ifid 0x%llx\n", *gid_ifid);
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
+static int hash_tbl_search_fd_by_comm_id(uint32_t comm_id, int *fd,
+ __be64 *gid_idid)
+{
+ CommId2FdEntry *fde;
+
+ pthread_rwlock_rdlock(&server.lock);
+ fde = g_hash_table_lookup(server.umad_agent.commid2fd, &comm_id);
+ pthread_rwlock_unlock(&server.lock);
+
+ if (!fde) {
+ syslog(LOG_WARNING, "Can't find matching for comm_id 0x%x\n", comm_id);
+ return -ENOENT;
+ }
+
+ *fd = fde->fd;
+ *gid_idid = fde->gid_ifid;
+
+ return 0;
+}
+
+static RdmaCmMuxErrCode add_fd_ifid_pair(int fd, __be64 gid_ifid)
+{
+ int fd1;
+
+ pthread_rwlock_wrlock(&server.lock);
+
+ fd1 = _hash_tbl_search_fd_by_ifid(&gid_ifid);
+ if (fd1) { /* record already exist - an error */
+ pthread_rwlock_unlock(&server.lock);
+ return fd == fd1 ? RDMACM_MUX_ERR_CODE_EEXIST :
+ RDMACM_MUX_ERR_CODE_EACCES;
+ }
+
+ g_hash_table_insert(server.umad_agent.gid2fd, g_memdup(&gid_ifid,
+ sizeof(gid_ifid)), g_memdup(&fd, sizeof(fd)));
+
+ pthread_rwlock_unlock(&server.lock);
+
+ syslog(LOG_INFO, "0x%lx registered on socket %d",
+ be64toh((uint64_t)gid_ifid), fd);
+
+ return RDMACM_MUX_ERR_CODE_OK;
+}
+
+static RdmaCmMuxErrCode delete_fd_ifid_pair(int fd, __be64 gid_ifid)
+{
+ int fd1;
+
+ pthread_rwlock_wrlock(&server.lock);
+
+ fd1 = _hash_tbl_search_fd_by_ifid(&gid_ifid);
+ if (!fd1) { /* record not exist - an error */
+ pthread_rwlock_unlock(&server.lock);
+ return RDMACM_MUX_ERR_CODE_ENOTFOUND;
+ }
+
+ g_hash_table_remove(server.umad_agent.gid2fd, g_memdup(&gid_ifid,
+ sizeof(gid_ifid)));
+ pthread_rwlock_unlock(&server.lock);
+
+ syslog(LOG_INFO, "0x%lx unregistered on socket %d",
+ be64toh((uint64_t)gid_ifid), fd);
+
+ return RDMACM_MUX_ERR_CODE_OK;
+}
+
+static void hash_tbl_save_fd_comm_id_pair(int fd, uint32_t comm_id,
+ uint64_t gid_ifid)
+{
+ CommId2FdEntry fde = {fd, COMMID_TTL, gid_ifid};
+
+ pthread_rwlock_wrlock(&server.lock);
+ g_hash_table_insert(server.umad_agent.commid2fd,
+ g_memdup(&comm_id, sizeof(comm_id)),
+ g_memdup(&fde, sizeof(fde)));
+ pthread_rwlock_unlock(&server.lock);
+}
+
+static gboolean remove_old_comm_ids(gpointer key, gpointer value,
+ gpointer user_data)
+{
+ CommId2FdEntry *fde = (CommId2FdEntry *)value;
+
+ return !fde->ttl--;
+}
+
+static gboolean remove_entry_from_gid2fd(gpointer key, gpointer value,
+ gpointer user_data)
+{
+ if (*(int *)value == *(int *)user_data) {
+ syslog(LOG_INFO, "0x%lx unregistered on socket %d",
+ be64toh(*(uint64_t *)key), *(int *)value);
+ return true;
+ }
+
+ return false;
+}
+
+static void hash_tbl_remove_fd_ifid_pair(int fd)
+{
+ pthread_rwlock_wrlock(&server.lock);
+ g_hash_table_foreach_remove(server.umad_agent.gid2fd,
+ remove_entry_from_gid2fd, (gpointer)&fd);
+ pthread_rwlock_unlock(&server.lock);
+}
+
+static int get_fd(const char *mad, int umad_len, int *fd, __be64 *gid_ifid)
+{
+ struct umad_hdr *hdr = (struct umad_hdr *)mad;
+ char *data = (char *)hdr + sizeof(*hdr);
+ int32_t comm_id = 0;
+ uint16_t attr_id = be16toh(hdr->attr_id);
+ int rc = 0;
+
+ if (umad_len <= sizeof(*hdr)) {
+ rc = -EINVAL;
+ syslog(LOG_DEBUG, "Ignoring MAD packets with header only\n");
+ goto out;
+ }
+
+ switch (attr_id) {
+ case UMAD_CM_ATTR_REQ:
+ if (unlikely(umad_len < sizeof(*hdr) + CM_REQ_DGID_POS +
+ sizeof(*gid_ifid))) {
+ rc = -EINVAL;
+ syslog(LOG_WARNING,
+ "Invalid MAD packet size (%d) for attr_id 0x%x\n", umad_len,
+ attr_id);
+ goto out;
+ }
+ memcpy(gid_ifid, data + CM_REQ_DGID_POS, sizeof(*gid_ifid));
+ rc = hash_tbl_search_fd_by_ifid(fd, gid_ifid);
+ break;
+
+ case UMAD_CM_ATTR_SIDR_REQ:
+ if (unlikely(umad_len < sizeof(*hdr) + CM_SIDR_REQ_DGID_POS +
+ sizeof(*gid_ifid))) {
+ rc = -EINVAL;
+ syslog(LOG_WARNING,
+ "Invalid MAD packet size (%d) for attr_id 0x%x\n", umad_len,
+ attr_id);
+ goto out;
+ }
+ memcpy(gid_ifid, data + CM_SIDR_REQ_DGID_POS, sizeof(*gid_ifid));
+ rc = hash_tbl_search_fd_by_ifid(fd, gid_ifid);
+ break;
+
+ case UMAD_CM_ATTR_REP:
+ /* Fall through */
+ case UMAD_CM_ATTR_REJ:
+ /* Fall through */
+ case UMAD_CM_ATTR_DREQ:
+ /* Fall through */
+ case UMAD_CM_ATTR_DREP:
+ /* Fall through */
+ case UMAD_CM_ATTR_RTU:
+ data += sizeof(comm_id);
+ /* Fall through */
+ case UMAD_CM_ATTR_SIDR_REP:
+ if (unlikely(umad_len < sizeof(*hdr) + sizeof(comm_id))) {
+ rc = -EINVAL;
+ syslog(LOG_WARNING,
+ "Invalid MAD packet size (%d) for attr_id 0x%x\n", umad_len,
+ attr_id);
+ goto out;
+ }
+ memcpy(&comm_id, data, sizeof(comm_id));
+ if (comm_id) {
+ rc = hash_tbl_search_fd_by_comm_id(comm_id, fd, gid_ifid);
+ }
+ break;
+
+ default:
+ rc = -EINVAL;
+ syslog(LOG_WARNING, "Unsupported attr_id 0x%x\n", attr_id);
+ }
+
+ syslog(LOG_DEBUG, "mad_to_vm: %d 0x%x 0x%x\n", *fd, attr_id, comm_id);
+
+out:
+ return rc;
+}
+
+static void *umad_recv_thread_func(void *args)
+{
+ int rc;
+ RdmaCmMuxMsg msg = {};
+ int fd = -2;
+
+ msg.hdr.msg_type = RDMACM_MUX_MSG_TYPE_REQ;
+ msg.hdr.op_code = RDMACM_MUX_OP_CODE_MAD;
+
+ while (server.run) {
+ do {
+ msg.umad_len = sizeof(msg.umad.mad);
+ rc = umad_recv(server.umad_agent.port_id, &msg.umad, &msg.umad_len,
+ SLEEP_SECS * SCALE_US);
+ if ((rc == -EIO) || (rc == -EINVAL)) {
+ syslog(LOG_CRIT, "Fatal error while trying to read MAD");
+ }
+
+ if (rc == -ETIMEDOUT) {
+ g_hash_table_foreach_remove(server.umad_agent.commid2fd,
+ remove_old_comm_ids, NULL);
+ }
+ } while (rc && server.run);
+
+ if (server.run) {
+ rc = get_fd(msg.umad.mad, msg.umad_len, &fd,
+ &msg.hdr.sgid.global.interface_id);
+ if (rc) {
+ continue;
+ }
+
+ send(fd, &msg, sizeof(msg), 0);
+ }
+ }
+
+ return NULL;
+}
+
+static int read_and_process(int fd)
+{
+ int rc;
+ RdmaCmMuxMsg msg = {};
+ struct umad_hdr *hdr;
+ uint32_t *comm_id = 0;
+ uint16_t attr_id;
+
+ rc = recv(fd, &msg, sizeof(msg), 0);
+ syslog(LOG_DEBUG, "Socket %d, recv %d\n", fd, rc);
+
+ if (rc < 0 && errno != EWOULDBLOCK) {
+ syslog(LOG_ERR, "Fail to read from socket %d\n", fd);
+ return -EIO;
+ }
+
+ if (!rc) {
+ syslog(LOG_ERR, "Fail to read from socket %d\n", fd);
+ return -EPIPE;
+ }
+
+ if (msg.hdr.msg_type != RDMACM_MUX_MSG_TYPE_REQ) {
+ syslog(LOG_WARNING, "Got non-request message (%d) from socket %d\n",
+ msg.hdr.msg_type, fd);
+ return -EPERM;
+ }
+
+ switch (msg.hdr.op_code) {
+ case RDMACM_MUX_OP_CODE_REG:
+ rc = add_fd_ifid_pair(fd, msg.hdr.sgid.global.interface_id);
+ break;
+
+ case RDMACM_MUX_OP_CODE_UNREG:
+ rc = delete_fd_ifid_pair(fd, msg.hdr.sgid.global.interface_id);
+ break;
+
+ case RDMACM_MUX_OP_CODE_MAD:
+ /* If this is REQ or REP then store the pair comm_id,fd to be later
+ * used for other messages where gid is unknown */
+ hdr = (struct umad_hdr *)msg.umad.mad;
+ attr_id = be16toh(hdr->attr_id);
+ if ((attr_id == UMAD_CM_ATTR_REQ) || (attr_id == UMAD_CM_ATTR_DREQ) ||
+ (attr_id == UMAD_CM_ATTR_SIDR_REQ) ||
+ (attr_id == UMAD_CM_ATTR_REP) || (attr_id == UMAD_CM_ATTR_DREP)) {
+ comm_id = (uint32_t *)(msg.umad.mad + sizeof(*hdr));
+ hash_tbl_save_fd_comm_id_pair(fd, *comm_id,
+ msg.hdr.sgid.global.interface_id);
+ }
+
+ syslog(LOG_DEBUG, "vm_to_mad: %d 0x%x 0x%x\n", fd, attr_id,
+ comm_id ? *comm_id : 0);
+ rc = umad_send(server.umad_agent.port_id, server.umad_agent.agent_id,
+ &msg.umad, msg.umad_len, 1, 0);
+ if (rc) {
+ syslog(LOG_ERR,
+ "Fail to send MAD message (0x%x) from socket %d, err=%d",
+ attr_id, fd, rc);
+ }
+ break;
+
+ default:
+ syslog(LOG_ERR, "Got invalid op_code (%d) from socket %d",
+ msg.hdr.msg_type, fd);
+ rc = RDMACM_MUX_ERR_CODE_EINVAL;
+ }
+
+ msg.hdr.msg_type = RDMACM_MUX_MSG_TYPE_RESP;
+ msg.hdr.err_code = rc;
+ rc = send(fd, &msg, sizeof(msg), 0);
+
+ return rc == sizeof(msg) ? 0 : -EPIPE;
+}
+
+static int accept_all(void)
+{
+ int fd, rc = 0;
+
+ pthread_rwlock_wrlock(&server.lock);
+
+ do {
+ if ((server.nfds + 1) > MAX_CLIENTS) {
+ syslog(LOG_WARNING, "Too many clients (%d)", server.nfds);
+ rc = -EIO;
+ goto out;
+ }
+
+ fd = accept(server.fds[0].fd, NULL, NULL);
+ if (fd < 0) {
+ if (errno != EWOULDBLOCK) {
+ syslog(LOG_WARNING, "accept() failed");
+ rc = -EIO;
+ goto out;
+ }
+ break;
+ }
+
+ syslog(LOG_INFO, "Client connected on socket %d\n", fd);
+ server.fds[server.nfds].fd = fd;
+ server.fds[server.nfds].events = POLLIN;
+ server.nfds++;
+ } while (fd != -1);
+
+out:
+ pthread_rwlock_unlock(&server.lock);
+ return rc;
+}
+
+static void compress_fds(void)
+{
+ int i, j;
+ int closed = 0;
+
+ pthread_rwlock_wrlock(&server.lock);
+
+ for (i = 1; i < server.nfds; i++) {
+ if (!server.fds[i].fd) {
+ closed++;
+ for (j = i; j < server.nfds - 1; j++) {
+ server.fds[j] = server.fds[j + 1];
+ }
+ }
+ }
+
+ server.nfds -= closed;
+
+ pthread_rwlock_unlock(&server.lock);
+}
+
+static void close_fd(int idx)
+{
+ close(server.fds[idx].fd);
+ syslog(LOG_INFO, "Socket %d closed\n", server.fds[idx].fd);
+ hash_tbl_remove_fd_ifid_pair(server.fds[idx].fd);
+ server.fds[idx].fd = 0;
+}
+
+static void run(void)
+{
+ int rc, nfds, i;
+ bool compress = false;
+
+ syslog(LOG_INFO, "Service started");
+
+ while (server.run) {
+ rc = poll(server.fds, server.nfds, SLEEP_SECS * SCALE_US);
+ if (rc < 0) {
+ if (errno != EINTR) {
+ syslog(LOG_WARNING, "poll() failed");
+ }
+ continue;
+ }
+
+ if (rc == 0) {
+ continue;
+ }
+
+ nfds = server.nfds;
+ for (i = 0; i < nfds; i++) {
+ syslog(LOG_DEBUG, "pollfd[%d]: revents 0x%x, events 0x%x\n", i,
+ server.fds[i].revents, server.fds[i].events);
+ if (server.fds[i].revents == 0) {
+ continue;
+ }
+
+ if (server.fds[i].revents != POLLIN) {
+ if (i == 0) {
+ syslog(LOG_NOTICE, "Unexpected poll() event (0x%x)\n",
+ server.fds[i].revents);
+ } else {
+ close_fd(i);
+ compress = true;
+ }
+ continue;
+ }
+
+ if (i == 0) {
+ rc = accept_all();
+ if (rc) {
+ continue;
+ }
+ } else {
+ rc = read_and_process(server.fds[i].fd);
+ if (rc) {
+ close_fd(i);
+ compress = true;
+ }
+ }
+ }
+
+ if (compress) {
+ compress = false;
+ compress_fds();
+ }
+ }
+}
+
+static void fini_listener(void)
+{
+ int i;
+
+ if (server.fds[0].fd <= 0) {
+ return;
+ }
+
+ for (i = server.nfds - 1; i >= 0; i--) {
+ if (server.fds[i].fd) {
+ close(server.fds[i].fd);
+ }
+ }
+
+ unlink(server.args.unix_socket_path);
+}
+
+static void fini_umad(void)
+{
+ if (server.umad_agent.agent_id) {
+ umad_unregister(server.umad_agent.port_id, server.umad_agent.agent_id);
+ }
+
+ if (server.umad_agent.port_id) {
+ umad_close_port(server.umad_agent.port_id);
+ }
+
+ hash_tbl_free();
+}
+
+static void fini(void)
+{
+ if (server.umad_recv_thread) {
+ pthread_join(server.umad_recv_thread, NULL);
+ server.umad_recv_thread = 0;
+ }
+ fini_umad();
+ fini_listener();
+ pthread_rwlock_destroy(&server.lock);
+
+ syslog(LOG_INFO, "Service going down");
+}
+
+static int init_listener(void)
+{
+ struct sockaddr_un sun;
+ int rc, on = 1;
+
+ server.fds[0].fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (server.fds[0].fd < 0) {
+ syslog(LOG_ALERT, "socket() failed");
+ return -EIO;
+ }
+
+ rc = setsockopt(server.fds[0].fd, SOL_SOCKET, SO_REUSEADDR, (char *)&on,
+ sizeof(on));
+ if (rc < 0) {
+ syslog(LOG_ALERT, "setsockopt() failed");
+ rc = -EIO;
+ goto err;
+ }
+
+ rc = ioctl(server.fds[0].fd, FIONBIO, (char *)&on);
+ if (rc < 0) {
+ syslog(LOG_ALERT, "ioctl() failed");
+ rc = -EIO;
+ goto err;
+ }
+
+ if (strlen(server.args.unix_socket_path) >= sizeof(sun.sun_path)) {
+ syslog(LOG_ALERT,
+ "Invalid unix_socket_path, size must be less than %ld\n",
+ sizeof(sun.sun_path));
+ rc = -EINVAL;
+ goto err;
+ }
+
+ sun.sun_family = AF_UNIX;
+ rc = snprintf(sun.sun_path, sizeof(sun.sun_path), "%s",
+ server.args.unix_socket_path);
+ if (rc < 0 || rc >= sizeof(sun.sun_path)) {
+ syslog(LOG_ALERT, "Could not copy unix socket path\n");
+ rc = -EINVAL;
+ goto err;
+ }
+
+ rc = bind(server.fds[0].fd, (struct sockaddr *)&sun, sizeof(sun));
+ if (rc < 0) {
+ syslog(LOG_ALERT, "bind() failed");
+ rc = -EIO;
+ goto err;
+ }
+
+ rc = listen(server.fds[0].fd, SERVER_LISTEN_BACKLOG);
+ if (rc < 0) {
+ syslog(LOG_ALERT, "listen() failed");
+ rc = -EIO;
+ goto err;
+ }
+
+ server.fds[0].events = POLLIN;
+ server.nfds = 1;
+ server.run = true;
+
+ return 0;
+
+err:
+ close(server.fds[0].fd);
+ return rc;
+}
+
+static int init_umad(void)
+{
+ long method_mask[IB_USER_MAD_LONGS_PER_METHOD_MASK];
+
+ server.umad_agent.port_id = umad_open_port(server.args.rdma_dev_name,
+ server.args.rdma_port_num);
+
+ if (server.umad_agent.port_id < 0) {
+ syslog(LOG_WARNING, "umad_open_port() failed");
+ return -EIO;
+ }
+
+ memset(&method_mask, 0, sizeof(method_mask));
+ method_mask[0] = MAD_METHOD_MASK0;
+ server.umad_agent.agent_id = umad_register(server.umad_agent.port_id,
+ UMAD_CLASS_CM,
+ UMAD_SA_CLASS_VERSION,
+ MAD_RMPP_VERSION, method_mask);
+ if (server.umad_agent.agent_id < 0) {
+ syslog(LOG_WARNING, "umad_register() failed");
+ return -EIO;
+ }
+
+ hash_tbl_alloc();
+
+ return 0;
+}
+
+static void signal_handler(int sig, siginfo_t *siginfo, void *context)
+{
+ static bool warned;
+
+ /* Prevent stop if clients are connected */
+ if (server.nfds != 1) {
+ if (!warned) {
+ syslog(LOG_WARNING,
+ "Can't stop while active client exist, resend SIGINT to overid");
+ warned = true;
+ return;
+ }
+ }
+
+ if (sig == SIGINT) {
+ server.run = false;
+ fini();
+ }
+
+ exit(0);
+}
+
+static int init(void)
+{
+ int rc;
+ struct sigaction sig = {};
+
+ rc = init_listener();
+ if (rc) {
+ return rc;
+ }
+
+ rc = init_umad();
+ if (rc) {
+ return rc;
+ }
+
+ pthread_rwlock_init(&server.lock, 0);
+
+ rc = pthread_create(&server.umad_recv_thread, NULL, umad_recv_thread_func,
+ NULL);
+ if (rc) {
+ syslog(LOG_ERR, "Fail to create UMAD receiver thread (%d)\n", rc);
+ return rc;
+ }
+
+ sig.sa_sigaction = &signal_handler;
+ sig.sa_flags = SA_SIGINFO;
+ rc = sigaction(SIGINT, &sig, NULL);
+ if (rc < 0) {
+ syslog(LOG_ERR, "Fail to install SIGINT handler (%d)\n", errno);
+ return rc;
+ }
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ int rc;
+
+ memset(&server, 0, sizeof(server));
+
+ parse_args(argc, argv);
+
+ rc = init();
+ if (rc) {
+ syslog(LOG_ERR, "Fail to initialize server (%d)\n", rc);
+ rc = -EAGAIN;
+ goto out;
+ }
+
+ run();
+
+out:
+ fini();
+
+ return rc;
+}
diff --git a/contrib/rdmacm-mux/meson.build b/contrib/rdmacm-mux/meson.build
new file mode 100644
index 000000000..6cc501674
--- /dev/null
+++ b/contrib/rdmacm-mux/meson.build
@@ -0,0 +1,9 @@
+if 'CONFIG_PVRDMA' in config_host
+ # if not found, CONFIG_PVRDMA should not be set
+ # FIXME: broken on big endian architectures
+ libumad = cc.find_library('ibumad', required: true)
+ executable('rdmacm-mux', files('main.c'),
+ dependencies: [glib, libumad],
+ build_by_default: false,
+ install: false)
+endif
diff --git a/contrib/rdmacm-mux/rdmacm-mux.h b/contrib/rdmacm-mux/rdmacm-mux.h
new file mode 100644
index 000000000..07a472291
--- /dev/null
+++ b/contrib/rdmacm-mux/rdmacm-mux.h
@@ -0,0 +1,61 @@
+/*
+ * QEMU paravirtual RDMA - rdmacm-mux declarations
+ *
+ * Copyright (C) 2018 Oracle
+ * Copyright (C) 2018 Red Hat Inc
+ *
+ * Authors:
+ * Yuval Shaia <yuval.shaia@oracle.com>
+ * Marcel Apfelbaum <marcel@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef RDMACM_MUX_H
+#define RDMACM_MUX_H
+
+#include "linux/if.h"
+#include <infiniband/verbs.h>
+#include <infiniband/umad.h>
+#include <rdma/rdma_user_cm.h>
+
+typedef enum RdmaCmMuxMsgType {
+ RDMACM_MUX_MSG_TYPE_REQ = 0,
+ RDMACM_MUX_MSG_TYPE_RESP = 1,
+} RdmaCmMuxMsgType;
+
+typedef enum RdmaCmMuxOpCode {
+ RDMACM_MUX_OP_CODE_REG = 0,
+ RDMACM_MUX_OP_CODE_UNREG = 1,
+ RDMACM_MUX_OP_CODE_MAD = 2,
+} RdmaCmMuxOpCode;
+
+typedef enum RdmaCmMuxErrCode {
+ RDMACM_MUX_ERR_CODE_OK = 0,
+ RDMACM_MUX_ERR_CODE_EINVAL = 1,
+ RDMACM_MUX_ERR_CODE_EEXIST = 2,
+ RDMACM_MUX_ERR_CODE_EACCES = 3,
+ RDMACM_MUX_ERR_CODE_ENOTFOUND = 4,
+} RdmaCmMuxErrCode;
+
+typedef struct RdmaCmMuxHdr {
+ RdmaCmMuxMsgType msg_type;
+ RdmaCmMuxOpCode op_code;
+ union ibv_gid sgid;
+ RdmaCmMuxErrCode err_code;
+} RdmaCmUHdr;
+
+typedef struct RdmaCmUMad {
+ struct ib_user_mad hdr;
+ char mad[RDMA_MAX_PRIVATE_DATA];
+} RdmaCmUMad;
+
+typedef struct RdmaCmMuxMsg {
+ RdmaCmUHdr hdr;
+ int umad_len;
+ RdmaCmUMad umad;
+} RdmaCmMuxMsg;
+
+#endif
diff --git a/contrib/systemd/qemu-guest-agent.service b/contrib/systemd/qemu-guest-agent.service
new file mode 100644
index 000000000..51cd7b37f
--- /dev/null
+++ b/contrib/systemd/qemu-guest-agent.service
@@ -0,0 +1,11 @@
+[Unit]
+Description=QEMU Guest Agent
+BindTo=dev-virtio\x2dports-org.qemu.guest_agent.0.device
+After=dev-virtio\x2dports-org.qemu.guest_agent.0.device
+
+[Service]
+ExecStart=-/usr/bin/qemu-ga
+Restart=always
+RestartSec=0
+
+[Install]
diff --git a/contrib/systemd/qemu-pr-helper.service b/contrib/systemd/qemu-pr-helper.service
new file mode 100644
index 000000000..a1d27b022
--- /dev/null
+++ b/contrib/systemd/qemu-pr-helper.service
@@ -0,0 +1,15 @@
+[Unit]
+Description=Persistent Reservation Daemon for QEMU
+
+[Service]
+WorkingDirectory=/tmp
+Type=simple
+ExecStart=/usr/bin/qemu-pr-helper
+PrivateTmp=yes
+ProtectSystem=strict
+ReadWritePaths=/var/run
+RestrictAddressFamilies=AF_UNIX
+Restart=always
+RestartSec=0
+
+[Install]
diff --git a/contrib/systemd/qemu-pr-helper.socket b/contrib/systemd/qemu-pr-helper.socket
new file mode 100644
index 000000000..9d7c3e5e2
--- /dev/null
+++ b/contrib/systemd/qemu-pr-helper.socket
@@ -0,0 +1,9 @@
+[Unit]
+Description=Persistent Reservation Daemon for QEMU
+
+[Socket]
+ListenStream=/run/qemu-pr-helper.sock
+SocketMode=0600
+
+[Install]
+WantedBy=multi-user.target
diff --git a/contrib/vhost-user-blk/meson.build b/contrib/vhost-user-blk/meson.build
new file mode 100644
index 000000000..7f47ad1b2
--- /dev/null
+++ b/contrib/vhost-user-blk/meson.build
@@ -0,0 +1,5 @@
+# FIXME: broken on 32-bit architectures
+executable('vhost-user-blk', files('vhost-user-blk.c'),
+ dependencies: [qemuutil, vhost_user],
+ build_by_default: true,
+ install: false)
diff --git a/contrib/vhost-user-blk/vhost-user-blk.c b/contrib/vhost-user-blk/vhost-user-blk.c
new file mode 100644
index 000000000..865bb5773
--- /dev/null
+++ b/contrib/vhost-user-blk/vhost-user-blk.c
@@ -0,0 +1,675 @@
+/*
+ * vhost-user-blk sample application
+ *
+ * Copyright (c) 2017 Intel Corporation. All rights reserved.
+ *
+ * Author:
+ * Changpeng Liu <changpeng.liu@intel.com>
+ *
+ * This work is based on the "vhost-user-scsi" sample and "virtio-blk" driver
+ * implementation by:
+ * Felipe Franciosi <felipe@nutanix.com>
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 only.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "standard-headers/linux/virtio_blk.h"
+#include "libvhost-user-glib.h"
+
+#if defined(__linux__)
+#include <linux/fs.h>
+#include <sys/ioctl.h>
+#endif
+
+enum {
+ VHOST_USER_BLK_MAX_QUEUES = 8,
+};
+
+struct virtio_blk_inhdr {
+ unsigned char status;
+};
+
+/* vhost user block device */
+typedef struct VubDev {
+ VugDev parent;
+ int blk_fd;
+ struct virtio_blk_config blkcfg;
+ bool enable_ro;
+ char *blk_name;
+ GMainLoop *loop;
+} VubDev;
+
+typedef struct VubReq {
+ VuVirtqElement *elem;
+ int64_t sector_num;
+ size_t size;
+ struct virtio_blk_inhdr *in;
+ struct virtio_blk_outhdr *out;
+ VubDev *vdev_blk;
+ struct VuVirtq *vq;
+} VubReq;
+
+/* refer util/iov.c */
+static size_t vub_iov_size(const struct iovec *iov,
+ const unsigned int iov_cnt)
+{
+ size_t len;
+ unsigned int i;
+
+ len = 0;
+ for (i = 0; i < iov_cnt; i++) {
+ len += iov[i].iov_len;
+ }
+ return len;
+}
+
+static size_t vub_iov_to_buf(const struct iovec *iov,
+ const unsigned int iov_cnt, void *buf)
+{
+ size_t len;
+ unsigned int i;
+
+ len = 0;
+ for (i = 0; i < iov_cnt; i++) {
+ memcpy(buf + len, iov[i].iov_base, iov[i].iov_len);
+ len += iov[i].iov_len;
+ }
+ return len;
+}
+
+static void vub_panic_cb(VuDev *vu_dev, const char *buf)
+{
+ VugDev *gdev;
+ VubDev *vdev_blk;
+
+ assert(vu_dev);
+
+ gdev = container_of(vu_dev, VugDev, parent);
+ vdev_blk = container_of(gdev, VubDev, parent);
+ if (buf) {
+ g_warning("vu_panic: %s", buf);
+ }
+
+ g_main_loop_quit(vdev_blk->loop);
+}
+
+static void vub_req_complete(VubReq *req)
+{
+ VugDev *gdev = &req->vdev_blk->parent;
+ VuDev *vu_dev = &gdev->parent;
+
+ /* IO size with 1 extra status byte */
+ vu_queue_push(vu_dev, req->vq, req->elem,
+ req->size + 1);
+ vu_queue_notify(vu_dev, req->vq);
+
+ if (req->elem) {
+ free(req->elem);
+ }
+
+ g_free(req);
+}
+
+static int vub_open(const char *file_name, bool wce)
+{
+ int fd;
+ int flags = O_RDWR;
+
+ if (!wce) {
+ flags |= O_DIRECT;
+ }
+
+ fd = open(file_name, flags);
+ if (fd < 0) {
+ fprintf(stderr, "Cannot open file %s, %s\n", file_name,
+ strerror(errno));
+ return -1;
+ }
+
+ return fd;
+}
+
+static ssize_t
+vub_readv(VubReq *req, struct iovec *iov, uint32_t iovcnt)
+{
+ VubDev *vdev_blk = req->vdev_blk;
+ ssize_t rc;
+
+ if (!iovcnt) {
+ fprintf(stderr, "Invalid Read IOV count\n");
+ return -1;
+ }
+
+ req->size = vub_iov_size(iov, iovcnt);
+ rc = preadv(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512);
+ if (rc < 0) {
+ fprintf(stderr, "%s, Sector %"PRIu64", Size %lu failed with %s\n",
+ vdev_blk->blk_name, req->sector_num, req->size,
+ strerror(errno));
+ return -1;
+ }
+
+ return rc;
+}
+
+static ssize_t
+vub_writev(VubReq *req, struct iovec *iov, uint32_t iovcnt)
+{
+ VubDev *vdev_blk = req->vdev_blk;
+ ssize_t rc;
+
+ if (!iovcnt) {
+ fprintf(stderr, "Invalid Write IOV count\n");
+ return -1;
+ }
+
+ req->size = vub_iov_size(iov, iovcnt);
+ rc = pwritev(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512);
+ if (rc < 0) {
+ fprintf(stderr, "%s, Sector %"PRIu64", Size %lu failed with %s\n",
+ vdev_blk->blk_name, req->sector_num, req->size,
+ strerror(errno));
+ return -1;
+ }
+
+ return rc;
+}
+
+static int
+vub_discard_write_zeroes(VubReq *req, struct iovec *iov, uint32_t iovcnt,
+ uint32_t type)
+{
+ struct virtio_blk_discard_write_zeroes *desc;
+ ssize_t size;
+ void *buf;
+
+ size = vub_iov_size(iov, iovcnt);
+ if (size != sizeof(*desc)) {
+ fprintf(stderr, "Invalid size %ld, expect %ld\n", size, sizeof(*desc));
+ return -1;
+ }
+ buf = g_new0(char, size);
+ vub_iov_to_buf(iov, iovcnt, buf);
+
+ #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT)
+ VubDev *vdev_blk = req->vdev_blk;
+ desc = (struct virtio_blk_discard_write_zeroes *)buf;
+ uint64_t range[2] = { le64toh(desc->sector) << 9,
+ le32toh(desc->num_sectors) << 9 };
+ if (type == VIRTIO_BLK_T_DISCARD) {
+ if (ioctl(vdev_blk->blk_fd, BLKDISCARD, range) == 0) {
+ g_free(buf);
+ return 0;
+ }
+ } else if (type == VIRTIO_BLK_T_WRITE_ZEROES) {
+ if (ioctl(vdev_blk->blk_fd, BLKZEROOUT, range) == 0) {
+ g_free(buf);
+ return 0;
+ }
+ }
+ #endif
+
+ g_free(buf);
+ return -1;
+}
+
+static void
+vub_flush(VubReq *req)
+{
+ VubDev *vdev_blk = req->vdev_blk;
+
+ fdatasync(vdev_blk->blk_fd);
+}
+
+static int vub_virtio_process_req(VubDev *vdev_blk,
+ VuVirtq *vq)
+{
+ VugDev *gdev = &vdev_blk->parent;
+ VuDev *vu_dev = &gdev->parent;
+ VuVirtqElement *elem;
+ uint32_t type;
+ unsigned in_num;
+ unsigned out_num;
+ VubReq *req;
+
+ elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement) + sizeof(VubReq));
+ if (!elem) {
+ return -1;
+ }
+
+ /* refer to hw/block/virtio_blk.c */
+ if (elem->out_num < 1 || elem->in_num < 1) {
+ fprintf(stderr, "virtio-blk request missing headers\n");
+ free(elem);
+ return -1;
+ }
+
+ req = g_new0(VubReq, 1);
+ req->vdev_blk = vdev_blk;
+ req->vq = vq;
+ req->elem = elem;
+
+ in_num = elem->in_num;
+ out_num = elem->out_num;
+
+ /* don't support VIRTIO_F_ANY_LAYOUT and virtio 1.0 only */
+ if (elem->out_sg[0].iov_len < sizeof(struct virtio_blk_outhdr)) {
+ fprintf(stderr, "Invalid outhdr size\n");
+ goto err;
+ }
+ req->out = (struct virtio_blk_outhdr *)elem->out_sg[0].iov_base;
+ out_num--;
+
+ if (elem->in_sg[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) {
+ fprintf(stderr, "Invalid inhdr size\n");
+ goto err;
+ }
+ req->in = (struct virtio_blk_inhdr *)elem->in_sg[in_num - 1].iov_base;
+ in_num--;
+
+ type = le32toh(req->out->type);
+ switch (type & ~VIRTIO_BLK_T_BARRIER) {
+ case VIRTIO_BLK_T_IN:
+ case VIRTIO_BLK_T_OUT: {
+ ssize_t ret = 0;
+ bool is_write = type & VIRTIO_BLK_T_OUT;
+ req->sector_num = le64toh(req->out->sector);
+ if (is_write) {
+ ret = vub_writev(req, &elem->out_sg[1], out_num);
+ } else {
+ ret = vub_readv(req, &elem->in_sg[0], in_num);
+ }
+ if (ret >= 0) {
+ req->in->status = VIRTIO_BLK_S_OK;
+ } else {
+ req->in->status = VIRTIO_BLK_S_IOERR;
+ }
+ vub_req_complete(req);
+ break;
+ }
+ case VIRTIO_BLK_T_FLUSH:
+ vub_flush(req);
+ req->in->status = VIRTIO_BLK_S_OK;
+ vub_req_complete(req);
+ break;
+ case VIRTIO_BLK_T_GET_ID: {
+ size_t size = MIN(vub_iov_size(&elem->in_sg[0], in_num),
+ VIRTIO_BLK_ID_BYTES);
+ snprintf(elem->in_sg[0].iov_base, size, "%s", "vhost_user_blk");
+ req->in->status = VIRTIO_BLK_S_OK;
+ req->size = elem->in_sg[0].iov_len;
+ vub_req_complete(req);
+ break;
+ }
+ case VIRTIO_BLK_T_DISCARD:
+ case VIRTIO_BLK_T_WRITE_ZEROES: {
+ int rc;
+ rc = vub_discard_write_zeroes(req, &elem->out_sg[1], out_num, type);
+ if (rc == 0) {
+ req->in->status = VIRTIO_BLK_S_OK;
+ } else {
+ req->in->status = VIRTIO_BLK_S_IOERR;
+ }
+ vub_req_complete(req);
+ break;
+ }
+ default:
+ req->in->status = VIRTIO_BLK_S_UNSUPP;
+ vub_req_complete(req);
+ break;
+ }
+
+ return 0;
+
+err:
+ free(elem);
+ g_free(req);
+ return -1;
+}
+
+static void vub_process_vq(VuDev *vu_dev, int idx)
+{
+ VugDev *gdev;
+ VubDev *vdev_blk;
+ VuVirtq *vq;
+ int ret;
+
+ gdev = container_of(vu_dev, VugDev, parent);
+ vdev_blk = container_of(gdev, VubDev, parent);
+ assert(vdev_blk);
+
+ vq = vu_get_queue(vu_dev, idx);
+ assert(vq);
+
+ while (1) {
+ ret = vub_virtio_process_req(vdev_blk, vq);
+ if (ret) {
+ break;
+ }
+ }
+}
+
+static void vub_queue_set_started(VuDev *vu_dev, int idx, bool started)
+{
+ VuVirtq *vq;
+
+ assert(vu_dev);
+
+ vq = vu_get_queue(vu_dev, idx);
+ vu_set_queue_handler(vu_dev, vq, started ? vub_process_vq : NULL);
+}
+
+static uint64_t
+vub_get_features(VuDev *dev)
+{
+ uint64_t features;
+ VugDev *gdev;
+ VubDev *vdev_blk;
+
+ gdev = container_of(dev, VugDev, parent);
+ vdev_blk = container_of(gdev, VubDev, parent);
+
+ features = 1ull << VIRTIO_BLK_F_SIZE_MAX |
+ 1ull << VIRTIO_BLK_F_SEG_MAX |
+ 1ull << VIRTIO_BLK_F_TOPOLOGY |
+ 1ull << VIRTIO_BLK_F_BLK_SIZE |
+ 1ull << VIRTIO_BLK_F_FLUSH |
+ #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT)
+ 1ull << VIRTIO_BLK_F_DISCARD |
+ 1ull << VIRTIO_BLK_F_WRITE_ZEROES |
+ #endif
+ 1ull << VIRTIO_BLK_F_CONFIG_WCE;
+
+ if (vdev_blk->enable_ro) {
+ features |= 1ull << VIRTIO_BLK_F_RO;
+ }
+
+ return features;
+}
+
+static uint64_t
+vub_get_protocol_features(VuDev *dev)
+{
+ return 1ull << VHOST_USER_PROTOCOL_F_CONFIG |
+ 1ull << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD;
+}
+
+static int
+vub_get_config(VuDev *vu_dev, uint8_t *config, uint32_t len)
+{
+ VugDev *gdev;
+ VubDev *vdev_blk;
+
+ if (len > sizeof(struct virtio_blk_config)) {
+ return -1;
+ }
+
+ gdev = container_of(vu_dev, VugDev, parent);
+ vdev_blk = container_of(gdev, VubDev, parent);
+ memcpy(config, &vdev_blk->blkcfg, len);
+
+ return 0;
+}
+
+static int
+vub_set_config(VuDev *vu_dev, const uint8_t *data,
+ uint32_t offset, uint32_t size, uint32_t flags)
+{
+ VugDev *gdev;
+ VubDev *vdev_blk;
+ uint8_t wce;
+ int fd;
+
+ /* don't support live migration */
+ if (flags != VHOST_SET_CONFIG_TYPE_MASTER) {
+ return -1;
+ }
+
+ gdev = container_of(vu_dev, VugDev, parent);
+ vdev_blk = container_of(gdev, VubDev, parent);
+
+ if (offset != offsetof(struct virtio_blk_config, wce) ||
+ size != 1) {
+ return -1;
+ }
+
+ wce = *data;
+ if (wce == vdev_blk->blkcfg.wce) {
+ /* Do nothing as same with old configuration */
+ return 0;
+ }
+
+ vdev_blk->blkcfg.wce = wce;
+ fprintf(stdout, "Write Cache Policy Changed\n");
+ if (vdev_blk->blk_fd >= 0) {
+ close(vdev_blk->blk_fd);
+ vdev_blk->blk_fd = -1;
+ }
+
+ fd = vub_open(vdev_blk->blk_name, wce);
+ if (fd < 0) {
+ fprintf(stderr, "Error to open block device %s\n", vdev_blk->blk_name);
+ vdev_blk->blk_fd = -1;
+ return -1;
+ }
+ vdev_blk->blk_fd = fd;
+
+ return 0;
+}
+
+static const VuDevIface vub_iface = {
+ .get_features = vub_get_features,
+ .queue_set_started = vub_queue_set_started,
+ .get_protocol_features = vub_get_protocol_features,
+ .get_config = vub_get_config,
+ .set_config = vub_set_config,
+};
+
+static int unix_sock_new(char *unix_fn)
+{
+ int sock;
+ struct sockaddr_un un;
+ size_t len;
+
+ assert(unix_fn);
+
+ sock = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (sock < 0) {
+ perror("socket");
+ return -1;
+ }
+
+ un.sun_family = AF_UNIX;
+ (void)snprintf(un.sun_path, sizeof(un.sun_path), "%s", unix_fn);
+ len = sizeof(un.sun_family) + strlen(un.sun_path);
+
+ (void)unlink(unix_fn);
+ if (bind(sock, (struct sockaddr *)&un, len) < 0) {
+ perror("bind");
+ goto fail;
+ }
+
+ if (listen(sock, 1) < 0) {
+ perror("listen");
+ goto fail;
+ }
+
+ return sock;
+
+fail:
+ (void)close(sock);
+
+ return -1;
+}
+
+static void vub_free(struct VubDev *vdev_blk)
+{
+ if (!vdev_blk) {
+ return;
+ }
+
+ g_main_loop_unref(vdev_blk->loop);
+ if (vdev_blk->blk_fd >= 0) {
+ close(vdev_blk->blk_fd);
+ }
+ g_free(vdev_blk);
+}
+
+static uint32_t
+vub_get_blocksize(int fd)
+{
+ uint32_t blocksize = 512;
+
+#if defined(__linux__) && defined(BLKSSZGET)
+ if (ioctl(fd, BLKSSZGET, &blocksize) == 0) {
+ return blocksize;
+ }
+#endif
+
+ return blocksize;
+}
+
+static void
+vub_initialize_config(int fd, struct virtio_blk_config *config)
+{
+ off64_t capacity;
+
+ capacity = lseek64(fd, 0, SEEK_END);
+ config->capacity = capacity >> 9;
+ config->blk_size = vub_get_blocksize(fd);
+ config->size_max = 65536;
+ config->seg_max = 128 - 2;
+ config->min_io_size = 1;
+ config->opt_io_size = 1;
+ config->num_queues = 1;
+ #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT)
+ config->max_discard_sectors = 32768;
+ config->max_discard_seg = 1;
+ config->discard_sector_alignment = config->blk_size >> 9;
+ config->max_write_zeroes_sectors = 32768;
+ config->max_write_zeroes_seg = 1;
+ #endif
+}
+
+static VubDev *
+vub_new(char *blk_file)
+{
+ VubDev *vdev_blk;
+
+ vdev_blk = g_new0(VubDev, 1);
+ vdev_blk->loop = g_main_loop_new(NULL, FALSE);
+ vdev_blk->blk_fd = vub_open(blk_file, 1);
+ if (vdev_blk->blk_fd < 0) {
+ fprintf(stderr, "Error to open block device %s\n", blk_file);
+ vub_free(vdev_blk);
+ return NULL;
+ }
+ vdev_blk->enable_ro = false;
+ vdev_blk->blkcfg.wce = 0;
+ vdev_blk->blk_name = blk_file;
+
+ /* fill virtio_blk_config with block parameters */
+ vub_initialize_config(vdev_blk->blk_fd, &vdev_blk->blkcfg);
+
+ return vdev_blk;
+}
+
+static int opt_fdnum = -1;
+static char *opt_socket_path;
+static char *opt_blk_file;
+static gboolean opt_print_caps;
+static gboolean opt_read_only;
+
+static GOptionEntry entries[] = {
+ { "print-capabilities", 'c', 0, G_OPTION_ARG_NONE, &opt_print_caps,
+ "Print capabilities", NULL },
+ { "fd", 'f', 0, G_OPTION_ARG_INT, &opt_fdnum,
+ "Use inherited fd socket", "FDNUM" },
+ { "socket-path", 's', 0, G_OPTION_ARG_FILENAME, &opt_socket_path,
+ "Use UNIX socket path", "PATH" },
+ {"blk-file", 'b', 0, G_OPTION_ARG_FILENAME, &opt_blk_file,
+ "block device or file path", "PATH"},
+ { "read-only", 'r', 0, G_OPTION_ARG_NONE, &opt_read_only,
+ "Enable read-only", NULL }
+};
+
+int main(int argc, char **argv)
+{
+ int lsock = -1, csock = -1;
+ VubDev *vdev_blk = NULL;
+ GError *error = NULL;
+ GOptionContext *context;
+
+ context = g_option_context_new(NULL);
+ g_option_context_add_main_entries(context, entries, NULL);
+ if (!g_option_context_parse(context, &argc, &argv, &error)) {
+ g_printerr("Option parsing failed: %s\n", error->message);
+ exit(EXIT_FAILURE);
+ }
+ if (opt_print_caps) {
+ g_print("{\n");
+ g_print(" \"type\": \"block\",\n");
+ g_print(" \"features\": [\n");
+ g_print(" \"read-only\",\n");
+ g_print(" \"blk-file\"\n");
+ g_print(" ]\n");
+ g_print("}\n");
+ exit(EXIT_SUCCESS);
+ }
+
+ if (!opt_blk_file) {
+ g_print("%s\n", g_option_context_get_help(context, true, NULL));
+ exit(EXIT_FAILURE);
+ }
+
+ if (opt_socket_path) {
+ lsock = unix_sock_new(opt_socket_path);
+ if (lsock < 0) {
+ exit(EXIT_FAILURE);
+ }
+ } else if (opt_fdnum < 0) {
+ g_print("%s\n", g_option_context_get_help(context, true, NULL));
+ exit(EXIT_FAILURE);
+ } else {
+ lsock = opt_fdnum;
+ }
+
+ csock = accept(lsock, NULL, NULL);
+ if (csock < 0) {
+ g_printerr("Accept error %s\n", strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+
+ vdev_blk = vub_new(opt_blk_file);
+ if (!vdev_blk) {
+ exit(EXIT_FAILURE);
+ }
+ if (opt_read_only) {
+ vdev_blk->enable_ro = true;
+ }
+
+ if (!vug_init(&vdev_blk->parent, VHOST_USER_BLK_MAX_QUEUES, csock,
+ vub_panic_cb, &vub_iface)) {
+ g_printerr("Failed to initialize libvhost-user-glib\n");
+ exit(EXIT_FAILURE);
+ }
+
+ g_main_loop_run(vdev_blk->loop);
+ g_main_loop_unref(vdev_blk->loop);
+ g_option_context_free(context);
+ vug_deinit(&vdev_blk->parent);
+ vub_free(vdev_blk);
+ if (csock >= 0) {
+ close(csock);
+ }
+ if (lsock >= 0) {
+ close(lsock);
+ }
+ g_free(opt_socket_path);
+ g_free(opt_blk_file);
+
+ return 0;
+}
diff --git a/contrib/vhost-user-gpu/50-qemu-gpu.json.in b/contrib/vhost-user-gpu/50-qemu-gpu.json.in
new file mode 100644
index 000000000..f5edd097f
--- /dev/null
+++ b/contrib/vhost-user-gpu/50-qemu-gpu.json.in
@@ -0,0 +1,5 @@
+{
+ "description": "QEMU vhost-user-gpu",
+ "type": "gpu",
+ "binary": "@libexecdir@/vhost-user-gpu"
+}
diff --git a/contrib/vhost-user-gpu/meson.build b/contrib/vhost-user-gpu/meson.build
new file mode 100644
index 000000000..92c8f3a86
--- /dev/null
+++ b/contrib/vhost-user-gpu/meson.build
@@ -0,0 +1,12 @@
+if 'CONFIG_TOOLS' in config_host and virgl.found() and gbm.found() \
+ and 'CONFIG_LINUX' in config_host and pixman.found()
+ executable('vhost-user-gpu', files('vhost-user-gpu.c', 'virgl.c', 'vugbm.c'),
+ dependencies: [qemuutil, pixman, gbm, virgl, vhost_user, opengl],
+ install: true,
+ install_dir: get_option('libexecdir'))
+
+ configure_file(input: '50-qemu-gpu.json.in',
+ output: '50-qemu-gpu.json',
+ configuration: { 'libexecdir' : get_option('prefix') / get_option('libexecdir') },
+ install_dir: qemu_datadir / 'vhost-user')
+endif
diff --git a/contrib/vhost-user-gpu/vhost-user-gpu.c b/contrib/vhost-user-gpu/vhost-user-gpu.c
new file mode 100644
index 000000000..611360e6b
--- /dev/null
+++ b/contrib/vhost-user-gpu/vhost-user-gpu.c
@@ -0,0 +1,1256 @@
+/*
+ * Virtio vhost-user GPU Device
+ *
+ * Copyright Red Hat, Inc. 2013-2018
+ *
+ * Authors:
+ * Dave Airlie <airlied@redhat.com>
+ * Gerd Hoffmann <kraxel@redhat.com>
+ * Marc-André Lureau <marcandre.lureau@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qemu/drm.h"
+#include "qapi/error.h"
+#include "qemu/sockets.h"
+
+#include <pixman.h>
+#include <glib-unix.h>
+
+#include "vugpu.h"
+#include "hw/virtio/virtio-gpu-bswap.h"
+#include "hw/virtio/virtio-gpu-pixman.h"
+#include "virgl.h"
+#include "vugbm.h"
+
+enum {
+ VHOST_USER_GPU_MAX_QUEUES = 2,
+};
+
+struct virtio_gpu_simple_resource {
+ uint32_t resource_id;
+ uint32_t width;
+ uint32_t height;
+ uint32_t format;
+ struct iovec *iov;
+ unsigned int iov_cnt;
+ uint32_t scanout_bitmask;
+ pixman_image_t *image;
+ struct vugbm_buffer buffer;
+ QTAILQ_ENTRY(virtio_gpu_simple_resource) next;
+};
+
+static gboolean opt_print_caps;
+static int opt_fdnum = -1;
+static char *opt_socket_path;
+static char *opt_render_node;
+static gboolean opt_virgl;
+
+static void vg_handle_ctrl(VuDev *dev, int qidx);
+static void vg_cleanup_mapping(VuGpu *g,
+ struct virtio_gpu_simple_resource *res);
+
+static const char *
+vg_cmd_to_string(int cmd)
+{
+#define CMD(cmd) [cmd] = #cmd
+ static const char *vg_cmd_str[] = {
+ CMD(VIRTIO_GPU_UNDEFINED),
+
+ /* 2d commands */
+ CMD(VIRTIO_GPU_CMD_GET_DISPLAY_INFO),
+ CMD(VIRTIO_GPU_CMD_RESOURCE_CREATE_2D),
+ CMD(VIRTIO_GPU_CMD_RESOURCE_UNREF),
+ CMD(VIRTIO_GPU_CMD_SET_SCANOUT),
+ CMD(VIRTIO_GPU_CMD_RESOURCE_FLUSH),
+ CMD(VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D),
+ CMD(VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING),
+ CMD(VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING),
+ CMD(VIRTIO_GPU_CMD_GET_CAPSET_INFO),
+ CMD(VIRTIO_GPU_CMD_GET_CAPSET),
+
+ /* 3d commands */
+ CMD(VIRTIO_GPU_CMD_CTX_CREATE),
+ CMD(VIRTIO_GPU_CMD_CTX_DESTROY),
+ CMD(VIRTIO_GPU_CMD_CTX_ATTACH_RESOURCE),
+ CMD(VIRTIO_GPU_CMD_CTX_DETACH_RESOURCE),
+ CMD(VIRTIO_GPU_CMD_RESOURCE_CREATE_3D),
+ CMD(VIRTIO_GPU_CMD_TRANSFER_TO_HOST_3D),
+ CMD(VIRTIO_GPU_CMD_TRANSFER_FROM_HOST_3D),
+ CMD(VIRTIO_GPU_CMD_SUBMIT_3D),
+
+ /* cursor commands */
+ CMD(VIRTIO_GPU_CMD_UPDATE_CURSOR),
+ CMD(VIRTIO_GPU_CMD_MOVE_CURSOR),
+ };
+#undef REQ
+
+ if (cmd >= 0 && cmd < G_N_ELEMENTS(vg_cmd_str)) {
+ return vg_cmd_str[cmd];
+ } else {
+ return "unknown";
+ }
+}
+
+static int
+vg_sock_fd_read(int sock, void *buf, ssize_t buflen)
+{
+ int ret;
+
+ do {
+ ret = read(sock, buf, buflen);
+ } while (ret < 0 && (errno == EINTR || errno == EAGAIN));
+
+ g_warn_if_fail(ret == buflen);
+ return ret;
+}
+
+static void
+vg_sock_fd_close(VuGpu *g)
+{
+ if (g->sock_fd >= 0) {
+ close(g->sock_fd);
+ g->sock_fd = -1;
+ }
+}
+
+static gboolean
+source_wait_cb(gint fd, GIOCondition condition, gpointer user_data)
+{
+ VuGpu *g = user_data;
+
+ if (!vg_recv_msg(g, VHOST_USER_GPU_DMABUF_UPDATE, 0, NULL)) {
+ return G_SOURCE_CONTINUE;
+ }
+
+ /* resume */
+ g->wait_in = 0;
+ vg_handle_ctrl(&g->dev.parent, 0);
+
+ return G_SOURCE_REMOVE;
+}
+
+void
+vg_wait_ok(VuGpu *g)
+{
+ assert(g->wait_in == 0);
+ g->wait_in = g_unix_fd_add(g->sock_fd, G_IO_IN | G_IO_HUP,
+ source_wait_cb, g);
+}
+
+static int
+vg_sock_fd_write(int sock, const void *buf, ssize_t buflen, int fd)
+{
+ ssize_t ret;
+ struct iovec iov = {
+ .iov_base = (void *)buf,
+ .iov_len = buflen,
+ };
+ struct msghdr msg = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ };
+ union {
+ struct cmsghdr cmsghdr;
+ char control[CMSG_SPACE(sizeof(int))];
+ } cmsgu;
+ struct cmsghdr *cmsg;
+
+ if (fd != -1) {
+ msg.msg_control = cmsgu.control;
+ msg.msg_controllen = sizeof(cmsgu.control);
+
+ cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+
+ *((int *)CMSG_DATA(cmsg)) = fd;
+ }
+
+ do {
+ ret = sendmsg(sock, &msg, 0);
+ } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
+
+ g_warn_if_fail(ret == buflen);
+ return ret;
+}
+
+void
+vg_send_msg(VuGpu *vg, const VhostUserGpuMsg *msg, int fd)
+{
+ if (vg_sock_fd_write(vg->sock_fd, msg,
+ VHOST_USER_GPU_HDR_SIZE + msg->size, fd) < 0) {
+ vg_sock_fd_close(vg);
+ }
+}
+
+bool
+vg_recv_msg(VuGpu *g, uint32_t expect_req, uint32_t expect_size,
+ gpointer payload)
+{
+ uint32_t req, flags, size;
+
+ if (vg_sock_fd_read(g->sock_fd, &req, sizeof(req)) < 0 ||
+ vg_sock_fd_read(g->sock_fd, &flags, sizeof(flags)) < 0 ||
+ vg_sock_fd_read(g->sock_fd, &size, sizeof(size)) < 0) {
+ goto err;
+ }
+
+ g_return_val_if_fail(req == expect_req, false);
+ g_return_val_if_fail(flags & VHOST_USER_GPU_MSG_FLAG_REPLY, false);
+ g_return_val_if_fail(size == expect_size, false);
+
+ if (size && vg_sock_fd_read(g->sock_fd, payload, size) != size) {
+ goto err;
+ }
+
+ return true;
+
+err:
+ vg_sock_fd_close(g);
+ return false;
+}
+
+static struct virtio_gpu_simple_resource *
+virtio_gpu_find_resource(VuGpu *g, uint32_t resource_id)
+{
+ struct virtio_gpu_simple_resource *res;
+
+ QTAILQ_FOREACH(res, &g->reslist, next) {
+ if (res->resource_id == resource_id) {
+ return res;
+ }
+ }
+ return NULL;
+}
+
+void
+vg_ctrl_response(VuGpu *g,
+ struct virtio_gpu_ctrl_command *cmd,
+ struct virtio_gpu_ctrl_hdr *resp,
+ size_t resp_len)
+{
+ size_t s;
+
+ if (cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_FENCE) {
+ resp->flags |= VIRTIO_GPU_FLAG_FENCE;
+ resp->fence_id = cmd->cmd_hdr.fence_id;
+ resp->ctx_id = cmd->cmd_hdr.ctx_id;
+ }
+ virtio_gpu_ctrl_hdr_bswap(resp);
+ s = iov_from_buf(cmd->elem.in_sg, cmd->elem.in_num, 0, resp, resp_len);
+ if (s != resp_len) {
+ g_critical("%s: response size incorrect %zu vs %zu",
+ __func__, s, resp_len);
+ }
+ vu_queue_push(&g->dev.parent, cmd->vq, &cmd->elem, s);
+ vu_queue_notify(&g->dev.parent, cmd->vq);
+ cmd->state = VG_CMD_STATE_FINISHED;
+}
+
+void
+vg_ctrl_response_nodata(VuGpu *g,
+ struct virtio_gpu_ctrl_command *cmd,
+ enum virtio_gpu_ctrl_type type)
+{
+ struct virtio_gpu_ctrl_hdr resp = {
+ .type = type,
+ };
+
+ vg_ctrl_response(g, cmd, &resp, sizeof(resp));
+}
+
+
+static gboolean
+get_display_info_cb(gint fd, GIOCondition condition, gpointer user_data)
+{
+ struct virtio_gpu_resp_display_info dpy_info = { {} };
+ VuGpu *vg = user_data;
+ struct virtio_gpu_ctrl_command *cmd = QTAILQ_LAST(&vg->fenceq);
+
+ g_debug("disp info cb");
+ assert(cmd->cmd_hdr.type == VIRTIO_GPU_CMD_GET_DISPLAY_INFO);
+ if (!vg_recv_msg(vg, VHOST_USER_GPU_GET_DISPLAY_INFO,
+ sizeof(dpy_info), &dpy_info)) {
+ return G_SOURCE_CONTINUE;
+ }
+
+ QTAILQ_REMOVE(&vg->fenceq, cmd, next);
+ vg_ctrl_response(vg, cmd, &dpy_info.hdr, sizeof(dpy_info));
+
+ vg->wait_in = 0;
+ vg_handle_ctrl(&vg->dev.parent, 0);
+
+ return G_SOURCE_REMOVE;
+}
+
+void
+vg_get_display_info(VuGpu *vg, struct virtio_gpu_ctrl_command *cmd)
+{
+ VhostUserGpuMsg msg = {
+ .request = VHOST_USER_GPU_GET_DISPLAY_INFO,
+ .size = 0,
+ };
+
+ assert(vg->wait_in == 0);
+
+ vg_send_msg(vg, &msg, -1);
+ vg->wait_in = g_unix_fd_add(vg->sock_fd, G_IO_IN | G_IO_HUP,
+ get_display_info_cb, vg);
+ cmd->state = VG_CMD_STATE_PENDING;
+}
+
+static void
+vg_resource_create_2d(VuGpu *g,
+ struct virtio_gpu_ctrl_command *cmd)
+{
+ pixman_format_code_t pformat;
+ struct virtio_gpu_simple_resource *res;
+ struct virtio_gpu_resource_create_2d c2d;
+
+ VUGPU_FILL_CMD(c2d);
+ virtio_gpu_bswap_32(&c2d, sizeof(c2d));
+
+ if (c2d.resource_id == 0) {
+ g_critical("%s: resource id 0 is not allowed", __func__);
+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
+ return;
+ }
+
+ res = virtio_gpu_find_resource(g, c2d.resource_id);
+ if (res) {
+ g_critical("%s: resource already exists %d", __func__, c2d.resource_id);
+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
+ return;
+ }
+
+ res = g_new0(struct virtio_gpu_simple_resource, 1);
+ res->width = c2d.width;
+ res->height = c2d.height;
+ res->format = c2d.format;
+ res->resource_id = c2d.resource_id;
+
+ pformat = virtio_gpu_get_pixman_format(c2d.format);
+ if (!pformat) {
+ g_critical("%s: host couldn't handle guest format %d",
+ __func__, c2d.format);
+ g_free(res);
+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER;
+ return;
+ }
+ vugbm_buffer_create(&res->buffer, &g->gdev, c2d.width, c2d.height);
+ res->image = pixman_image_create_bits(pformat,
+ c2d.width,
+ c2d.height,
+ (uint32_t *)res->buffer.mmap,
+ res->buffer.stride);
+ if (!res->image) {
+ g_critical("%s: resource creation failed %d %d %d",
+ __func__, c2d.resource_id, c2d.width, c2d.height);
+ vugbm_buffer_destroy(&res->buffer);
+ g_free(res);
+ cmd->error = VIRTIO_GPU_RESP_ERR_OUT_OF_MEMORY;
+ return;
+ }
+
+ QTAILQ_INSERT_HEAD(&g->reslist, res, next);
+}
+
+static void
+vg_disable_scanout(VuGpu *g, int scanout_id)
+{
+ struct virtio_gpu_scanout *scanout = &g->scanout[scanout_id];
+ struct virtio_gpu_simple_resource *res;
+
+ if (scanout->resource_id == 0) {
+ return;
+ }
+
+ res = virtio_gpu_find_resource(g, scanout->resource_id);
+ if (res) {
+ res->scanout_bitmask &= ~(1 << scanout_id);
+ }
+
+ scanout->width = 0;
+ scanout->height = 0;
+
+ if (g->sock_fd >= 0) {
+ VhostUserGpuMsg msg = {
+ .request = VHOST_USER_GPU_SCANOUT,
+ .size = sizeof(VhostUserGpuScanout),
+ .payload.scanout.scanout_id = scanout_id,
+ };
+ vg_send_msg(g, &msg, -1);
+ }
+}
+
+static void
+vg_resource_destroy(VuGpu *g,
+ struct virtio_gpu_simple_resource *res)
+{
+ int i;
+
+ if (res->scanout_bitmask) {
+ for (i = 0; i < VIRTIO_GPU_MAX_SCANOUTS; i++) {
+ if (res->scanout_bitmask & (1 << i)) {
+ vg_disable_scanout(g, i);
+ }
+ }
+ }
+
+ vugbm_buffer_destroy(&res->buffer);
+ vg_cleanup_mapping(g, res);
+ pixman_image_unref(res->image);
+ QTAILQ_REMOVE(&g->reslist, res, next);
+ g_free(res);
+}
+
+static void
+vg_resource_unref(VuGpu *g,
+ struct virtio_gpu_ctrl_command *cmd)
+{
+ struct virtio_gpu_simple_resource *res;
+ struct virtio_gpu_resource_unref unref;
+
+ VUGPU_FILL_CMD(unref);
+ virtio_gpu_bswap_32(&unref, sizeof(unref));
+
+ res = virtio_gpu_find_resource(g, unref.resource_id);
+ if (!res) {
+ g_critical("%s: illegal resource specified %d",
+ __func__, unref.resource_id);
+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
+ return;
+ }
+ vg_resource_destroy(g, res);
+}
+
+int
+vg_create_mapping_iov(VuGpu *g,
+ struct virtio_gpu_resource_attach_backing *ab,
+ struct virtio_gpu_ctrl_command *cmd,
+ struct iovec **iov)
+{
+ struct virtio_gpu_mem_entry *ents;
+ size_t esize, s;
+ int i;
+
+ if (ab->nr_entries > 16384) {
+ g_critical("%s: nr_entries is too big (%d > 16384)",
+ __func__, ab->nr_entries);
+ return -1;
+ }
+
+ esize = sizeof(*ents) * ab->nr_entries;
+ ents = g_malloc(esize);
+ s = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num,
+ sizeof(*ab), ents, esize);
+ if (s != esize) {
+ g_critical("%s: command data size incorrect %zu vs %zu",
+ __func__, s, esize);
+ g_free(ents);
+ return -1;
+ }
+
+ *iov = g_malloc0(sizeof(struct iovec) * ab->nr_entries);
+ for (i = 0; i < ab->nr_entries; i++) {
+ uint64_t len = ents[i].length;
+ (*iov)[i].iov_len = ents[i].length;
+ (*iov)[i].iov_base = vu_gpa_to_va(&g->dev.parent, &len, ents[i].addr);
+ if (!(*iov)[i].iov_base || len != ents[i].length) {
+ g_critical("%s: resource %d element %d",
+ __func__, ab->resource_id, i);
+ g_free(*iov);
+ g_free(ents);
+ *iov = NULL;
+ return -1;
+ }
+ }
+ g_free(ents);
+ return 0;
+}
+
+static void
+vg_resource_attach_backing(VuGpu *g,
+ struct virtio_gpu_ctrl_command *cmd)
+{
+ struct virtio_gpu_simple_resource *res;
+ struct virtio_gpu_resource_attach_backing ab;
+ int ret;
+
+ VUGPU_FILL_CMD(ab);
+ virtio_gpu_bswap_32(&ab, sizeof(ab));
+
+ res = virtio_gpu_find_resource(g, ab.resource_id);
+ if (!res) {
+ g_critical("%s: illegal resource specified %d",
+ __func__, ab.resource_id);
+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
+ return;
+ }
+
+ if (res->iov) {
+ cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
+ return;
+ }
+
+ ret = vg_create_mapping_iov(g, &ab, cmd, &res->iov);
+ if (ret != 0) {
+ cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
+ return;
+ }
+
+ res->iov_cnt = ab.nr_entries;
+}
+
+/* Though currently only free iov, maybe later will do more work. */
+void vg_cleanup_mapping_iov(VuGpu *g,
+ struct iovec *iov, uint32_t count)
+{
+ g_free(iov);
+}
+
+static void
+vg_cleanup_mapping(VuGpu *g,
+ struct virtio_gpu_simple_resource *res)
+{
+ vg_cleanup_mapping_iov(g, res->iov, res->iov_cnt);
+ res->iov = NULL;
+ res->iov_cnt = 0;
+}
+
+static void
+vg_resource_detach_backing(VuGpu *g,
+ struct virtio_gpu_ctrl_command *cmd)
+{
+ struct virtio_gpu_simple_resource *res;
+ struct virtio_gpu_resource_detach_backing detach;
+
+ VUGPU_FILL_CMD(detach);
+ virtio_gpu_bswap_32(&detach, sizeof(detach));
+
+ res = virtio_gpu_find_resource(g, detach.resource_id);
+ if (!res || !res->iov) {
+ g_critical("%s: illegal resource specified %d",
+ __func__, detach.resource_id);
+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
+ return;
+ }
+
+ vg_cleanup_mapping(g, res);
+}
+
+static void
+vg_transfer_to_host_2d(VuGpu *g,
+ struct virtio_gpu_ctrl_command *cmd)
+{
+ struct virtio_gpu_simple_resource *res;
+ int h;
+ uint32_t src_offset, dst_offset, stride;
+ int bpp;
+ pixman_format_code_t format;
+ struct virtio_gpu_transfer_to_host_2d t2d;
+
+ VUGPU_FILL_CMD(t2d);
+ virtio_gpu_t2d_bswap(&t2d);
+
+ res = virtio_gpu_find_resource(g, t2d.resource_id);
+ if (!res || !res->iov) {
+ g_critical("%s: illegal resource specified %d",
+ __func__, t2d.resource_id);
+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
+ return;
+ }
+
+ if (t2d.r.x > res->width ||
+ t2d.r.y > res->height ||
+ t2d.r.width > res->width ||
+ t2d.r.height > res->height ||
+ t2d.r.x + t2d.r.width > res->width ||
+ t2d.r.y + t2d.r.height > res->height) {
+ g_critical("%s: transfer bounds outside resource"
+ " bounds for resource %d: %d %d %d %d vs %d %d",
+ __func__, t2d.resource_id, t2d.r.x, t2d.r.y,
+ t2d.r.width, t2d.r.height, res->width, res->height);
+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER;
+ return;
+ }
+
+ format = pixman_image_get_format(res->image);
+ bpp = (PIXMAN_FORMAT_BPP(format) + 7) / 8;
+ stride = pixman_image_get_stride(res->image);
+
+ if (t2d.offset || t2d.r.x || t2d.r.y ||
+ t2d.r.width != pixman_image_get_width(res->image)) {
+ void *img_data = pixman_image_get_data(res->image);
+ for (h = 0; h < t2d.r.height; h++) {
+ src_offset = t2d.offset + stride * h;
+ dst_offset = (t2d.r.y + h) * stride + (t2d.r.x * bpp);
+
+ iov_to_buf(res->iov, res->iov_cnt, src_offset,
+ img_data
+ + dst_offset, t2d.r.width * bpp);
+ }
+ } else {
+ iov_to_buf(res->iov, res->iov_cnt, 0,
+ pixman_image_get_data(res->image),
+ pixman_image_get_stride(res->image)
+ * pixman_image_get_height(res->image));
+ }
+}
+
+static void
+vg_set_scanout(VuGpu *g,
+ struct virtio_gpu_ctrl_command *cmd)
+{
+ struct virtio_gpu_simple_resource *res, *ores;
+ struct virtio_gpu_scanout *scanout;
+ struct virtio_gpu_set_scanout ss;
+ int fd;
+
+ VUGPU_FILL_CMD(ss);
+ virtio_gpu_bswap_32(&ss, sizeof(ss));
+
+ if (ss.scanout_id >= VIRTIO_GPU_MAX_SCANOUTS) {
+ g_critical("%s: illegal scanout id specified %d",
+ __func__, ss.scanout_id);
+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_SCANOUT_ID;
+ return;
+ }
+
+ if (ss.resource_id == 0) {
+ vg_disable_scanout(g, ss.scanout_id);
+ return;
+ }
+
+ /* create a surface for this scanout */
+ res = virtio_gpu_find_resource(g, ss.resource_id);
+ if (!res) {
+ g_critical("%s: illegal resource specified %d",
+ __func__, ss.resource_id);
+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
+ return;
+ }
+
+ if (ss.r.x > res->width ||
+ ss.r.y > res->height ||
+ ss.r.width > res->width ||
+ ss.r.height > res->height ||
+ ss.r.x + ss.r.width > res->width ||
+ ss.r.y + ss.r.height > res->height) {
+ g_critical("%s: illegal scanout %d bounds for"
+ " resource %d, (%d,%d)+%d,%d vs %d %d",
+ __func__, ss.scanout_id, ss.resource_id, ss.r.x, ss.r.y,
+ ss.r.width, ss.r.height, res->width, res->height);
+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER;
+ return;
+ }
+
+ scanout = &g->scanout[ss.scanout_id];
+
+ ores = virtio_gpu_find_resource(g, scanout->resource_id);
+ if (ores) {
+ ores->scanout_bitmask &= ~(1 << ss.scanout_id);
+ }
+
+ res->scanout_bitmask |= (1 << ss.scanout_id);
+ scanout->resource_id = ss.resource_id;
+ scanout->x = ss.r.x;
+ scanout->y = ss.r.y;
+ scanout->width = ss.r.width;
+ scanout->height = ss.r.height;
+
+ struct vugbm_buffer *buffer = &res->buffer;
+
+ if (vugbm_buffer_can_get_dmabuf_fd(buffer)) {
+ VhostUserGpuMsg msg = {
+ .request = VHOST_USER_GPU_DMABUF_SCANOUT,
+ .size = sizeof(VhostUserGpuDMABUFScanout),
+ .payload.dmabuf_scanout = (VhostUserGpuDMABUFScanout) {
+ .scanout_id = ss.scanout_id,
+ .x = ss.r.x,
+ .y = ss.r.y,
+ .width = ss.r.width,
+ .height = ss.r.height,
+ .fd_width = buffer->width,
+ .fd_height = buffer->height,
+ .fd_stride = buffer->stride,
+ .fd_drm_fourcc = buffer->format
+ }
+ };
+
+ if (vugbm_buffer_get_dmabuf_fd(buffer, &fd)) {
+ vg_send_msg(g, &msg, fd);
+ close(fd);
+ }
+ } else {
+ VhostUserGpuMsg msg = {
+ .request = VHOST_USER_GPU_SCANOUT,
+ .size = sizeof(VhostUserGpuScanout),
+ .payload.scanout = (VhostUserGpuScanout) {
+ .scanout_id = ss.scanout_id,
+ .width = scanout->width,
+ .height = scanout->height
+ }
+ };
+ vg_send_msg(g, &msg, -1);
+ }
+}
+
+static void
+vg_resource_flush(VuGpu *g,
+ struct virtio_gpu_ctrl_command *cmd)
+{
+ struct virtio_gpu_simple_resource *res;
+ struct virtio_gpu_resource_flush rf;
+ pixman_region16_t flush_region;
+ int i;
+
+ VUGPU_FILL_CMD(rf);
+ virtio_gpu_bswap_32(&rf, sizeof(rf));
+
+ res = virtio_gpu_find_resource(g, rf.resource_id);
+ if (!res) {
+ g_critical("%s: illegal resource specified %d\n",
+ __func__, rf.resource_id);
+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
+ return;
+ }
+
+ if (rf.r.x > res->width ||
+ rf.r.y > res->height ||
+ rf.r.width > res->width ||
+ rf.r.height > res->height ||
+ rf.r.x + rf.r.width > res->width ||
+ rf.r.y + rf.r.height > res->height) {
+ g_critical("%s: flush bounds outside resource"
+ " bounds for resource %d: %d %d %d %d vs %d %d\n",
+ __func__, rf.resource_id, rf.r.x, rf.r.y,
+ rf.r.width, rf.r.height, res->width, res->height);
+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER;
+ return;
+ }
+
+ pixman_region_init_rect(&flush_region,
+ rf.r.x, rf.r.y, rf.r.width, rf.r.height);
+ for (i = 0; i < VIRTIO_GPU_MAX_SCANOUTS; i++) {
+ struct virtio_gpu_scanout *scanout;
+ pixman_region16_t region, finalregion;
+ pixman_box16_t *extents;
+
+ if (!(res->scanout_bitmask & (1 << i))) {
+ continue;
+ }
+ scanout = &g->scanout[i];
+
+ pixman_region_init(&finalregion);
+ pixman_region_init_rect(&region, scanout->x, scanout->y,
+ scanout->width, scanout->height);
+
+ pixman_region_intersect(&finalregion, &flush_region, &region);
+
+ extents = pixman_region_extents(&finalregion);
+ size_t width = extents->x2 - extents->x1;
+ size_t height = extents->y2 - extents->y1;
+
+ if (vugbm_buffer_can_get_dmabuf_fd(&res->buffer)) {
+ VhostUserGpuMsg vmsg = {
+ .request = VHOST_USER_GPU_DMABUF_UPDATE,
+ .size = sizeof(VhostUserGpuUpdate),
+ .payload.update = (VhostUserGpuUpdate) {
+ .scanout_id = i,
+ .x = extents->x1,
+ .y = extents->y1,
+ .width = width,
+ .height = height,
+ }
+ };
+ vg_send_msg(g, &vmsg, -1);
+ vg_wait_ok(g);
+ } else {
+ size_t bpp =
+ PIXMAN_FORMAT_BPP(pixman_image_get_format(res->image)) / 8;
+ size_t size = width * height * bpp;
+
+ void *p = g_malloc(VHOST_USER_GPU_HDR_SIZE +
+ sizeof(VhostUserGpuUpdate) + size);
+ VhostUserGpuMsg *msg = p;
+ msg->request = VHOST_USER_GPU_UPDATE;
+ msg->size = sizeof(VhostUserGpuUpdate) + size;
+ msg->payload.update = (VhostUserGpuUpdate) {
+ .scanout_id = i,
+ .x = extents->x1,
+ .y = extents->y1,
+ .width = width,
+ .height = height,
+ };
+ pixman_image_t *i =
+ pixman_image_create_bits(pixman_image_get_format(res->image),
+ msg->payload.update.width,
+ msg->payload.update.height,
+ p + offsetof(VhostUserGpuMsg,
+ payload.update.data),
+ width * bpp);
+ pixman_image_composite(PIXMAN_OP_SRC,
+ res->image, NULL, i,
+ extents->x1, extents->y1,
+ 0, 0, 0, 0,
+ width, height);
+ pixman_image_unref(i);
+ vg_send_msg(g, msg, -1);
+ g_free(msg);
+ }
+ pixman_region_fini(&region);
+ pixman_region_fini(&finalregion);
+ }
+ pixman_region_fini(&flush_region);
+}
+
+static void
+vg_process_cmd(VuGpu *vg, struct virtio_gpu_ctrl_command *cmd)
+{
+ switch (cmd->cmd_hdr.type) {
+ case VIRTIO_GPU_CMD_GET_DISPLAY_INFO:
+ vg_get_display_info(vg, cmd);
+ break;
+ case VIRTIO_GPU_CMD_RESOURCE_CREATE_2D:
+ vg_resource_create_2d(vg, cmd);
+ break;
+ case VIRTIO_GPU_CMD_RESOURCE_UNREF:
+ vg_resource_unref(vg, cmd);
+ break;
+ case VIRTIO_GPU_CMD_RESOURCE_FLUSH:
+ vg_resource_flush(vg, cmd);
+ break;
+ case VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D:
+ vg_transfer_to_host_2d(vg, cmd);
+ break;
+ case VIRTIO_GPU_CMD_SET_SCANOUT:
+ vg_set_scanout(vg, cmd);
+ break;
+ case VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING:
+ vg_resource_attach_backing(vg, cmd);
+ break;
+ case VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING:
+ vg_resource_detach_backing(vg, cmd);
+ break;
+ /* case VIRTIO_GPU_CMD_GET_EDID: */
+ /* break */
+ default:
+ g_warning("TODO handle ctrl %x\n", cmd->cmd_hdr.type);
+ cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
+ break;
+ }
+ if (cmd->state == VG_CMD_STATE_NEW) {
+ vg_ctrl_response_nodata(vg, cmd, cmd->error ? cmd->error :
+ VIRTIO_GPU_RESP_OK_NODATA);
+ }
+}
+
+static void
+vg_handle_ctrl(VuDev *dev, int qidx)
+{
+ VuGpu *vg = container_of(dev, VuGpu, dev.parent);
+ VuVirtq *vq = vu_get_queue(dev, qidx);
+ struct virtio_gpu_ctrl_command *cmd = NULL;
+ size_t len;
+
+ for (;;) {
+ if (vg->wait_in != 0) {
+ return;
+ }
+
+ cmd = vu_queue_pop(dev, vq, sizeof(struct virtio_gpu_ctrl_command));
+ if (!cmd) {
+ break;
+ }
+ cmd->vq = vq;
+ cmd->error = 0;
+ cmd->state = VG_CMD_STATE_NEW;
+
+ len = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num,
+ 0, &cmd->cmd_hdr, sizeof(cmd->cmd_hdr));
+ if (len != sizeof(cmd->cmd_hdr)) {
+ g_warning("%s: command size incorrect %zu vs %zu\n",
+ __func__, len, sizeof(cmd->cmd_hdr));
+ }
+
+ virtio_gpu_ctrl_hdr_bswap(&cmd->cmd_hdr);
+ g_debug("%d %s\n", cmd->cmd_hdr.type,
+ vg_cmd_to_string(cmd->cmd_hdr.type));
+
+ if (vg->virgl) {
+ vg_virgl_process_cmd(vg, cmd);
+ } else {
+ vg_process_cmd(vg, cmd);
+ }
+
+ if (cmd->state != VG_CMD_STATE_FINISHED) {
+ QTAILQ_INSERT_TAIL(&vg->fenceq, cmd, next);
+ vg->inflight++;
+ } else {
+ free(cmd);
+ }
+ }
+}
+
+static void
+update_cursor_data_simple(VuGpu *g, uint32_t resource_id, gpointer data)
+{
+ struct virtio_gpu_simple_resource *res;
+
+ res = virtio_gpu_find_resource(g, resource_id);
+ g_return_if_fail(res != NULL);
+ g_return_if_fail(pixman_image_get_width(res->image) == 64);
+ g_return_if_fail(pixman_image_get_height(res->image) == 64);
+ g_return_if_fail(
+ PIXMAN_FORMAT_BPP(pixman_image_get_format(res->image)) == 32);
+
+ memcpy(data, pixman_image_get_data(res->image), 64 * 64 * sizeof(uint32_t));
+}
+
+static void
+vg_process_cursor_cmd(VuGpu *g, struct virtio_gpu_update_cursor *cursor)
+{
+ switch (cursor->hdr.type) {
+ case VIRTIO_GPU_CMD_MOVE_CURSOR: {
+ VhostUserGpuMsg msg = {
+ .request = cursor->resource_id ?
+ VHOST_USER_GPU_CURSOR_POS : VHOST_USER_GPU_CURSOR_POS_HIDE,
+ .size = sizeof(VhostUserGpuCursorPos),
+ .payload.cursor_pos = {
+ .scanout_id = cursor->pos.scanout_id,
+ .x = cursor->pos.x,
+ .y = cursor->pos.y,
+ }
+ };
+ g_debug("%s: move", G_STRFUNC);
+ vg_send_msg(g, &msg, -1);
+ break;
+ }
+ case VIRTIO_GPU_CMD_UPDATE_CURSOR: {
+ VhostUserGpuMsg msg = {
+ .request = VHOST_USER_GPU_CURSOR_UPDATE,
+ .size = sizeof(VhostUserGpuCursorUpdate),
+ .payload.cursor_update = {
+ .pos = {
+ .scanout_id = cursor->pos.scanout_id,
+ .x = cursor->pos.x,
+ .y = cursor->pos.y,
+ },
+ .hot_x = cursor->hot_x,
+ .hot_y = cursor->hot_y,
+ }
+ };
+ g_debug("%s: update", G_STRFUNC);
+ if (g->virgl) {
+ vg_virgl_update_cursor_data(g, cursor->resource_id,
+ msg.payload.cursor_update.data);
+ } else {
+ update_cursor_data_simple(g, cursor->resource_id,
+ msg.payload.cursor_update.data);
+ }
+ vg_send_msg(g, &msg, -1);
+ break;
+ }
+ default:
+ g_debug("%s: unknown cmd %d", G_STRFUNC, cursor->hdr.type);
+ break;
+ }
+}
+
+static void
+vg_handle_cursor(VuDev *dev, int qidx)
+{
+ VuGpu *g = container_of(dev, VuGpu, dev.parent);
+ VuVirtq *vq = vu_get_queue(dev, qidx);
+ VuVirtqElement *elem;
+ size_t len;
+ struct virtio_gpu_update_cursor cursor;
+
+ for (;;) {
+ elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement));
+ if (!elem) {
+ break;
+ }
+ g_debug("cursor out:%d in:%d\n", elem->out_num, elem->in_num);
+
+ len = iov_to_buf(elem->out_sg, elem->out_num,
+ 0, &cursor, sizeof(cursor));
+ if (len != sizeof(cursor)) {
+ g_warning("%s: cursor size incorrect %zu vs %zu\n",
+ __func__, len, sizeof(cursor));
+ } else {
+ virtio_gpu_bswap_32(&cursor, sizeof(cursor));
+ vg_process_cursor_cmd(g, &cursor);
+ }
+ vu_queue_push(dev, vq, elem, 0);
+ vu_queue_notify(dev, vq);
+ free(elem);
+ }
+}
+
+static void
+vg_panic(VuDev *dev, const char *msg)
+{
+ g_critical("%s\n", msg);
+ exit(1);
+}
+
+static void
+vg_queue_set_started(VuDev *dev, int qidx, bool started)
+{
+ VuVirtq *vq = vu_get_queue(dev, qidx);
+
+ g_debug("queue started %d:%d\n", qidx, started);
+
+ switch (qidx) {
+ case 0:
+ vu_set_queue_handler(dev, vq, started ? vg_handle_ctrl : NULL);
+ break;
+ case 1:
+ vu_set_queue_handler(dev, vq, started ? vg_handle_cursor : NULL);
+ break;
+ default:
+ break;
+ }
+}
+
+static gboolean
+protocol_features_cb(gint fd, GIOCondition condition, gpointer user_data)
+{
+ VuGpu *g = user_data;
+ uint64_t u64;
+ VhostUserGpuMsg msg = {
+ .request = VHOST_USER_GPU_GET_PROTOCOL_FEATURES
+ };
+
+ if (!vg_recv_msg(g, msg.request, sizeof(u64), &u64)) {
+ return G_SOURCE_CONTINUE;
+ }
+
+ msg = (VhostUserGpuMsg) {
+ .request = VHOST_USER_GPU_SET_PROTOCOL_FEATURES,
+ .size = sizeof(uint64_t),
+ .payload.u64 = 0
+ };
+ vg_send_msg(g, &msg, -1);
+
+ g->wait_in = 0;
+ vg_handle_ctrl(&g->dev.parent, 0);
+
+ return G_SOURCE_REMOVE;
+}
+
+static void
+set_gpu_protocol_features(VuGpu *g)
+{
+ VhostUserGpuMsg msg = {
+ .request = VHOST_USER_GPU_GET_PROTOCOL_FEATURES
+ };
+
+ vg_send_msg(g, &msg, -1);
+ assert(g->wait_in == 0);
+ g->wait_in = g_unix_fd_add(g->sock_fd, G_IO_IN | G_IO_HUP,
+ protocol_features_cb, g);
+}
+
+static int
+vg_process_msg(VuDev *dev, VhostUserMsg *msg, int *do_reply)
+{
+ VuGpu *g = container_of(dev, VuGpu, dev.parent);
+
+ switch (msg->request) {
+ case VHOST_USER_GPU_SET_SOCKET: {
+ g_return_val_if_fail(msg->fd_num == 1, 1);
+ g_return_val_if_fail(g->sock_fd == -1, 1);
+ g->sock_fd = msg->fds[0];
+ set_gpu_protocol_features(g);
+ return 1;
+ }
+ default:
+ return 0;
+ }
+
+ return 0;
+}
+
+static uint64_t
+vg_get_features(VuDev *dev)
+{
+ uint64_t features = 0;
+
+ if (opt_virgl) {
+ features |= 1 << VIRTIO_GPU_F_VIRGL;
+ }
+
+ return features;
+}
+
+static void
+vg_set_features(VuDev *dev, uint64_t features)
+{
+ VuGpu *g = container_of(dev, VuGpu, dev.parent);
+ bool virgl = features & (1 << VIRTIO_GPU_F_VIRGL);
+
+ if (virgl && !g->virgl_inited) {
+ if (!vg_virgl_init(g)) {
+ vg_panic(dev, "Failed to initialize virgl");
+ }
+ g->virgl_inited = true;
+ }
+
+ g->virgl = virgl;
+}
+
+static int
+vg_get_config(VuDev *dev, uint8_t *config, uint32_t len)
+{
+ VuGpu *g = container_of(dev, VuGpu, dev.parent);
+
+ if (len > sizeof(struct virtio_gpu_config)) {
+ return -1;
+ }
+
+ if (opt_virgl) {
+ g->virtio_config.num_capsets = vg_virgl_get_num_capsets();
+ }
+
+ memcpy(config, &g->virtio_config, len);
+
+ return 0;
+}
+
+static int
+vg_set_config(VuDev *dev, const uint8_t *data,
+ uint32_t offset, uint32_t size,
+ uint32_t flags)
+{
+ VuGpu *g = container_of(dev, VuGpu, dev.parent);
+ struct virtio_gpu_config *config = (struct virtio_gpu_config *)data;
+
+ if (config->events_clear) {
+ g->virtio_config.events_read &= ~config->events_clear;
+ }
+
+ return 0;
+}
+
+static const VuDevIface vuiface = {
+ .set_features = vg_set_features,
+ .get_features = vg_get_features,
+ .queue_set_started = vg_queue_set_started,
+ .process_msg = vg_process_msg,
+ .get_config = vg_get_config,
+ .set_config = vg_set_config,
+};
+
+static void
+vg_destroy(VuGpu *g)
+{
+ struct virtio_gpu_simple_resource *res, *tmp;
+
+ vug_deinit(&g->dev);
+
+ vg_sock_fd_close(g);
+
+ QTAILQ_FOREACH_SAFE(res, &g->reslist, next, tmp) {
+ vg_resource_destroy(g, res);
+ }
+
+ vugbm_device_destroy(&g->gdev);
+}
+
+static GOptionEntry entries[] = {
+ { "print-capabilities", 'c', 0, G_OPTION_ARG_NONE, &opt_print_caps,
+ "Print capabilities", NULL },
+ { "fd", 'f', 0, G_OPTION_ARG_INT, &opt_fdnum,
+ "Use inherited fd socket", "FDNUM" },
+ { "socket-path", 's', 0, G_OPTION_ARG_FILENAME, &opt_socket_path,
+ "Use UNIX socket path", "PATH" },
+ { "render-node", 'r', 0, G_OPTION_ARG_FILENAME, &opt_render_node,
+ "Specify DRM render node", "PATH" },
+ { "virgl", 'v', 0, G_OPTION_ARG_NONE, &opt_virgl,
+ "Turn virgl rendering on", NULL },
+ { NULL, }
+};
+
+int
+main(int argc, char *argv[])
+{
+ GOptionContext *context;
+ GError *error = NULL;
+ GMainLoop *loop = NULL;
+ int fd;
+ VuGpu g = { .sock_fd = -1, .drm_rnode_fd = -1 };
+
+ QTAILQ_INIT(&g.reslist);
+ QTAILQ_INIT(&g.fenceq);
+
+ context = g_option_context_new("QEMU vhost-user-gpu");
+ g_option_context_add_main_entries(context, entries, NULL);
+ if (!g_option_context_parse(context, &argc, &argv, &error)) {
+ g_printerr("Option parsing failed: %s\n", error->message);
+ exit(EXIT_FAILURE);
+ }
+ g_option_context_free(context);
+
+ if (opt_print_caps) {
+ g_print("{\n");
+ g_print(" \"type\": \"gpu\",\n");
+ g_print(" \"features\": [\n");
+ g_print(" \"render-node\",\n");
+ g_print(" \"virgl\"\n");
+ g_print(" ]\n");
+ g_print("}\n");
+ exit(EXIT_SUCCESS);
+ }
+
+ g.drm_rnode_fd = qemu_drm_rendernode_open(opt_render_node);
+ if (opt_render_node && g.drm_rnode_fd == -1) {
+ g_printerr("Failed to open DRM rendernode.\n");
+ exit(EXIT_FAILURE);
+ }
+
+ vugbm_device_init(&g.gdev, g.drm_rnode_fd);
+
+ if ((!!opt_socket_path + (opt_fdnum != -1)) != 1) {
+ g_printerr("Please specify either --fd or --socket-path\n");
+ exit(EXIT_FAILURE);
+ }
+
+ if (opt_socket_path) {
+ int lsock = unix_listen(opt_socket_path, &error_fatal);
+ if (lsock < 0) {
+ g_printerr("Failed to listen on %s.\n", opt_socket_path);
+ exit(EXIT_FAILURE);
+ }
+ fd = accept(lsock, NULL, NULL);
+ close(lsock);
+ } else {
+ fd = opt_fdnum;
+ }
+ if (fd == -1) {
+ g_printerr("Invalid vhost-user socket.\n");
+ exit(EXIT_FAILURE);
+ }
+
+ if (!vug_init(&g.dev, VHOST_USER_GPU_MAX_QUEUES, fd, vg_panic, &vuiface)) {
+ g_printerr("Failed to initialize libvhost-user-glib.\n");
+ exit(EXIT_FAILURE);
+ }
+
+ loop = g_main_loop_new(NULL, FALSE);
+ g_main_loop_run(loop);
+ g_main_loop_unref(loop);
+
+ vg_destroy(&g);
+ if (g.drm_rnode_fd >= 0) {
+ close(g.drm_rnode_fd);
+ }
+
+ return 0;
+}
diff --git a/contrib/vhost-user-gpu/virgl.c b/contrib/vhost-user-gpu/virgl.c
new file mode 100644
index 000000000..3e45e1bd3
--- /dev/null
+++ b/contrib/vhost-user-gpu/virgl.c
@@ -0,0 +1,599 @@
+/*
+ * Virtio vhost-user GPU Device
+ *
+ * Copyright Red Hat, Inc. 2013-2018
+ *
+ * Authors:
+ * Dave Airlie <airlied@redhat.com>
+ * Gerd Hoffmann <kraxel@redhat.com>
+ * Marc-André Lureau <marcandre.lureau@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include <virglrenderer.h>
+#include "virgl.h"
+
+#include <epoxy/gl.h>
+
+void
+vg_virgl_update_cursor_data(VuGpu *g, uint32_t resource_id,
+ gpointer data)
+{
+ uint32_t width, height;
+ uint32_t *cursor;
+
+ cursor = virgl_renderer_get_cursor_data(resource_id, &width, &height);
+ g_return_if_fail(cursor != NULL);
+ g_return_if_fail(width == 64);
+ g_return_if_fail(height == 64);
+
+ memcpy(data, cursor, 64 * 64 * sizeof(uint32_t));
+ free(cursor);
+}
+
+static void
+virgl_cmd_context_create(VuGpu *g,
+ struct virtio_gpu_ctrl_command *cmd)
+{
+ struct virtio_gpu_ctx_create cc;
+
+ VUGPU_FILL_CMD(cc);
+
+ virgl_renderer_context_create(cc.hdr.ctx_id, cc.nlen,
+ cc.debug_name);
+}
+
+static void
+virgl_cmd_context_destroy(VuGpu *g,
+ struct virtio_gpu_ctrl_command *cmd)
+{
+ struct virtio_gpu_ctx_destroy cd;
+
+ VUGPU_FILL_CMD(cd);
+
+ virgl_renderer_context_destroy(cd.hdr.ctx_id);
+}
+
+static void
+virgl_cmd_create_resource_2d(VuGpu *g,
+ struct virtio_gpu_ctrl_command *cmd)
+{
+ struct virtio_gpu_resource_create_2d c2d;
+ struct virgl_renderer_resource_create_args args;
+
+ VUGPU_FILL_CMD(c2d);
+
+ args.handle = c2d.resource_id;
+ args.target = 2;
+ args.format = c2d.format;
+ args.bind = (1 << 1);
+ args.width = c2d.width;
+ args.height = c2d.height;
+ args.depth = 1;
+ args.array_size = 1;
+ args.last_level = 0;
+ args.nr_samples = 0;
+ args.flags = VIRTIO_GPU_RESOURCE_FLAG_Y_0_TOP;
+ virgl_renderer_resource_create(&args, NULL, 0);
+}
+
+static void
+virgl_cmd_create_resource_3d(VuGpu *g,
+ struct virtio_gpu_ctrl_command *cmd)
+{
+ struct virtio_gpu_resource_create_3d c3d;
+ struct virgl_renderer_resource_create_args args;
+
+ VUGPU_FILL_CMD(c3d);
+
+ args.handle = c3d.resource_id;
+ args.target = c3d.target;
+ args.format = c3d.format;
+ args.bind = c3d.bind;
+ args.width = c3d.width;
+ args.height = c3d.height;
+ args.depth = c3d.depth;
+ args.array_size = c3d.array_size;
+ args.last_level = c3d.last_level;
+ args.nr_samples = c3d.nr_samples;
+ args.flags = c3d.flags;
+ virgl_renderer_resource_create(&args, NULL, 0);
+}
+
+static void
+virgl_cmd_resource_unref(VuGpu *g,
+ struct virtio_gpu_ctrl_command *cmd)
+{
+ struct virtio_gpu_resource_unref unref;
+ struct iovec *res_iovs = NULL;
+ int num_iovs = 0;
+
+ VUGPU_FILL_CMD(unref);
+
+ virgl_renderer_resource_detach_iov(unref.resource_id,
+ &res_iovs,
+ &num_iovs);
+ if (res_iovs != NULL && num_iovs != 0) {
+ vg_cleanup_mapping_iov(g, res_iovs, num_iovs);
+ }
+ virgl_renderer_resource_unref(unref.resource_id);
+}
+
+/* Not yet(?) defined in standard-headers, remove when possible */
+#ifndef VIRTIO_GPU_CAPSET_VIRGL2
+#define VIRTIO_GPU_CAPSET_VIRGL2 2
+#endif
+
+static void
+virgl_cmd_get_capset_info(VuGpu *g,
+ struct virtio_gpu_ctrl_command *cmd)
+{
+ struct virtio_gpu_get_capset_info info;
+ struct virtio_gpu_resp_capset_info resp;
+
+ VUGPU_FILL_CMD(info);
+
+ memset(&resp, 0, sizeof(resp));
+ if (info.capset_index == 0) {
+ resp.capset_id = VIRTIO_GPU_CAPSET_VIRGL;
+ virgl_renderer_get_cap_set(resp.capset_id,
+ &resp.capset_max_version,
+ &resp.capset_max_size);
+ } else if (info.capset_index == 1) {
+ resp.capset_id = VIRTIO_GPU_CAPSET_VIRGL2;
+ virgl_renderer_get_cap_set(resp.capset_id,
+ &resp.capset_max_version,
+ &resp.capset_max_size);
+ } else {
+ resp.capset_max_version = 0;
+ resp.capset_max_size = 0;
+ }
+ resp.hdr.type = VIRTIO_GPU_RESP_OK_CAPSET_INFO;
+ vg_ctrl_response(g, cmd, &resp.hdr, sizeof(resp));
+}
+
+uint32_t
+vg_virgl_get_num_capsets(void)
+{
+ uint32_t capset2_max_ver, capset2_max_size;
+ virgl_renderer_get_cap_set(VIRTIO_GPU_CAPSET_VIRGL2,
+ &capset2_max_ver,
+ &capset2_max_size);
+
+ return capset2_max_ver ? 2 : 1;
+}
+
+static void
+virgl_cmd_get_capset(VuGpu *g,
+ struct virtio_gpu_ctrl_command *cmd)
+{
+ struct virtio_gpu_get_capset gc;
+ struct virtio_gpu_resp_capset *resp;
+ uint32_t max_ver, max_size;
+
+ VUGPU_FILL_CMD(gc);
+
+ virgl_renderer_get_cap_set(gc.capset_id, &max_ver,
+ &max_size);
+ if (!max_size) {
+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER;
+ return;
+ }
+ resp = g_malloc0(sizeof(*resp) + max_size);
+
+ resp->hdr.type = VIRTIO_GPU_RESP_OK_CAPSET;
+ virgl_renderer_fill_caps(gc.capset_id,
+ gc.capset_version,
+ (void *)resp->capset_data);
+ vg_ctrl_response(g, cmd, &resp->hdr, sizeof(*resp) + max_size);
+ g_free(resp);
+}
+
+static void
+virgl_cmd_submit_3d(VuGpu *g,
+ struct virtio_gpu_ctrl_command *cmd)
+{
+ struct virtio_gpu_cmd_submit cs;
+ void *buf;
+ size_t s;
+
+ VUGPU_FILL_CMD(cs);
+
+ buf = g_malloc(cs.size);
+ s = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num,
+ sizeof(cs), buf, cs.size);
+ if (s != cs.size) {
+ g_critical("%s: size mismatch (%zd/%d)", __func__, s, cs.size);
+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER;
+ goto out;
+ }
+
+ virgl_renderer_submit_cmd(buf, cs.hdr.ctx_id, cs.size / 4);
+
+out:
+ g_free(buf);
+}
+
+static void
+virgl_cmd_transfer_to_host_2d(VuGpu *g,
+ struct virtio_gpu_ctrl_command *cmd)
+{
+ struct virtio_gpu_transfer_to_host_2d t2d;
+ struct virtio_gpu_box box;
+
+ VUGPU_FILL_CMD(t2d);
+
+ box.x = t2d.r.x;
+ box.y = t2d.r.y;
+ box.z = 0;
+ box.w = t2d.r.width;
+ box.h = t2d.r.height;
+ box.d = 1;
+
+ virgl_renderer_transfer_write_iov(t2d.resource_id,
+ 0,
+ 0,
+ 0,
+ 0,
+ (struct virgl_box *)&box,
+ t2d.offset, NULL, 0);
+}
+
+static void
+virgl_cmd_transfer_to_host_3d(VuGpu *g,
+ struct virtio_gpu_ctrl_command *cmd)
+{
+ struct virtio_gpu_transfer_host_3d t3d;
+
+ VUGPU_FILL_CMD(t3d);
+
+ virgl_renderer_transfer_write_iov(t3d.resource_id,
+ t3d.hdr.ctx_id,
+ t3d.level,
+ t3d.stride,
+ t3d.layer_stride,
+ (struct virgl_box *)&t3d.box,
+ t3d.offset, NULL, 0);
+}
+
+static void
+virgl_cmd_transfer_from_host_3d(VuGpu *g,
+ struct virtio_gpu_ctrl_command *cmd)
+{
+ struct virtio_gpu_transfer_host_3d tf3d;
+
+ VUGPU_FILL_CMD(tf3d);
+
+ virgl_renderer_transfer_read_iov(tf3d.resource_id,
+ tf3d.hdr.ctx_id,
+ tf3d.level,
+ tf3d.stride,
+ tf3d.layer_stride,
+ (struct virgl_box *)&tf3d.box,
+ tf3d.offset, NULL, 0);
+}
+
+static void
+virgl_resource_attach_backing(VuGpu *g,
+ struct virtio_gpu_ctrl_command *cmd)
+{
+ struct virtio_gpu_resource_attach_backing att_rb;
+ struct iovec *res_iovs;
+ int ret;
+
+ VUGPU_FILL_CMD(att_rb);
+
+ ret = vg_create_mapping_iov(g, &att_rb, cmd, &res_iovs);
+ if (ret != 0) {
+ cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
+ return;
+ }
+
+ ret = virgl_renderer_resource_attach_iov(att_rb.resource_id,
+ res_iovs, att_rb.nr_entries);
+ if (ret != 0) {
+ vg_cleanup_mapping_iov(g, res_iovs, att_rb.nr_entries);
+ }
+}
+
+static void
+virgl_resource_detach_backing(VuGpu *g,
+ struct virtio_gpu_ctrl_command *cmd)
+{
+ struct virtio_gpu_resource_detach_backing detach_rb;
+ struct iovec *res_iovs = NULL;
+ int num_iovs = 0;
+
+ VUGPU_FILL_CMD(detach_rb);
+
+ virgl_renderer_resource_detach_iov(detach_rb.resource_id,
+ &res_iovs,
+ &num_iovs);
+ if (res_iovs == NULL || num_iovs == 0) {
+ return;
+ }
+ vg_cleanup_mapping_iov(g, res_iovs, num_iovs);
+}
+
+static void
+virgl_cmd_set_scanout(VuGpu *g,
+ struct virtio_gpu_ctrl_command *cmd)
+{
+ struct virtio_gpu_set_scanout ss;
+ struct virgl_renderer_resource_info info;
+ int ret;
+
+ VUGPU_FILL_CMD(ss);
+
+ if (ss.scanout_id >= VIRTIO_GPU_MAX_SCANOUTS) {
+ g_critical("%s: illegal scanout id specified %d",
+ __func__, ss.scanout_id);
+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_SCANOUT_ID;
+ return;
+ }
+
+ memset(&info, 0, sizeof(info));
+
+ if (ss.resource_id && ss.r.width && ss.r.height) {
+ ret = virgl_renderer_resource_get_info(ss.resource_id, &info);
+ if (ret == -1) {
+ g_critical("%s: illegal resource specified %d\n",
+ __func__, ss.resource_id);
+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
+ return;
+ }
+
+ int fd = -1;
+ if (virgl_renderer_get_fd_for_texture(info.tex_id, &fd) < 0) {
+ g_critical("%s: failed to get fd for texture\n", __func__);
+ cmd->error = VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID;
+ return;
+ }
+ assert(fd >= 0);
+ VhostUserGpuMsg msg = {
+ .request = VHOST_USER_GPU_DMABUF_SCANOUT,
+ .size = sizeof(VhostUserGpuDMABUFScanout),
+ .payload.dmabuf_scanout.scanout_id = ss.scanout_id,
+ .payload.dmabuf_scanout.x = ss.r.x,
+ .payload.dmabuf_scanout.y = ss.r.y,
+ .payload.dmabuf_scanout.width = ss.r.width,
+ .payload.dmabuf_scanout.height = ss.r.height,
+ .payload.dmabuf_scanout.fd_width = info.width,
+ .payload.dmabuf_scanout.fd_height = info.height,
+ .payload.dmabuf_scanout.fd_stride = info.stride,
+ .payload.dmabuf_scanout.fd_flags = info.flags,
+ .payload.dmabuf_scanout.fd_drm_fourcc = info.drm_fourcc
+ };
+ vg_send_msg(g, &msg, fd);
+ close(fd);
+ } else {
+ VhostUserGpuMsg msg = {
+ .request = VHOST_USER_GPU_DMABUF_SCANOUT,
+ .size = sizeof(VhostUserGpuDMABUFScanout),
+ .payload.dmabuf_scanout.scanout_id = ss.scanout_id,
+ };
+ g_debug("disable scanout");
+ vg_send_msg(g, &msg, -1);
+ }
+ g->scanout[ss.scanout_id].resource_id = ss.resource_id;
+}
+
+static void
+virgl_cmd_resource_flush(VuGpu *g,
+ struct virtio_gpu_ctrl_command *cmd)
+{
+ struct virtio_gpu_resource_flush rf;
+ int i;
+
+ VUGPU_FILL_CMD(rf);
+
+ glFlush();
+ if (!rf.resource_id) {
+ g_debug("bad resource id for flush..?");
+ return;
+ }
+ for (i = 0; i < VIRTIO_GPU_MAX_SCANOUTS; i++) {
+ if (g->scanout[i].resource_id != rf.resource_id) {
+ continue;
+ }
+ VhostUserGpuMsg msg = {
+ .request = VHOST_USER_GPU_DMABUF_UPDATE,
+ .size = sizeof(VhostUserGpuUpdate),
+ .payload.update.scanout_id = i,
+ .payload.update.x = rf.r.x,
+ .payload.update.y = rf.r.y,
+ .payload.update.width = rf.r.width,
+ .payload.update.height = rf.r.height
+ };
+ vg_send_msg(g, &msg, -1);
+ vg_wait_ok(g);
+ }
+}
+
+static void
+virgl_cmd_ctx_attach_resource(VuGpu *g,
+ struct virtio_gpu_ctrl_command *cmd)
+{
+ struct virtio_gpu_ctx_resource att_res;
+
+ VUGPU_FILL_CMD(att_res);
+
+ virgl_renderer_ctx_attach_resource(att_res.hdr.ctx_id, att_res.resource_id);
+}
+
+static void
+virgl_cmd_ctx_detach_resource(VuGpu *g,
+ struct virtio_gpu_ctrl_command *cmd)
+{
+ struct virtio_gpu_ctx_resource det_res;
+
+ VUGPU_FILL_CMD(det_res);
+
+ virgl_renderer_ctx_detach_resource(det_res.hdr.ctx_id, det_res.resource_id);
+}
+
+void vg_virgl_process_cmd(VuGpu *g, struct virtio_gpu_ctrl_command *cmd)
+{
+ virgl_renderer_force_ctx_0();
+ switch (cmd->cmd_hdr.type) {
+ case VIRTIO_GPU_CMD_CTX_CREATE:
+ virgl_cmd_context_create(g, cmd);
+ break;
+ case VIRTIO_GPU_CMD_CTX_DESTROY:
+ virgl_cmd_context_destroy(g, cmd);
+ break;
+ case VIRTIO_GPU_CMD_RESOURCE_CREATE_2D:
+ virgl_cmd_create_resource_2d(g, cmd);
+ break;
+ case VIRTIO_GPU_CMD_RESOURCE_CREATE_3D:
+ virgl_cmd_create_resource_3d(g, cmd);
+ break;
+ case VIRTIO_GPU_CMD_SUBMIT_3D:
+ virgl_cmd_submit_3d(g, cmd);
+ break;
+ case VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D:
+ virgl_cmd_transfer_to_host_2d(g, cmd);
+ break;
+ case VIRTIO_GPU_CMD_TRANSFER_TO_HOST_3D:
+ virgl_cmd_transfer_to_host_3d(g, cmd);
+ break;
+ case VIRTIO_GPU_CMD_TRANSFER_FROM_HOST_3D:
+ virgl_cmd_transfer_from_host_3d(g, cmd);
+ break;
+ case VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING:
+ virgl_resource_attach_backing(g, cmd);
+ break;
+ case VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING:
+ virgl_resource_detach_backing(g, cmd);
+ break;
+ case VIRTIO_GPU_CMD_SET_SCANOUT:
+ virgl_cmd_set_scanout(g, cmd);
+ break;
+ case VIRTIO_GPU_CMD_RESOURCE_FLUSH:
+ virgl_cmd_resource_flush(g, cmd);
+ break;
+ case VIRTIO_GPU_CMD_RESOURCE_UNREF:
+ virgl_cmd_resource_unref(g, cmd);
+ break;
+ case VIRTIO_GPU_CMD_CTX_ATTACH_RESOURCE:
+ /* TODO add security */
+ virgl_cmd_ctx_attach_resource(g, cmd);
+ break;
+ case VIRTIO_GPU_CMD_CTX_DETACH_RESOURCE:
+ /* TODO add security */
+ virgl_cmd_ctx_detach_resource(g, cmd);
+ break;
+ case VIRTIO_GPU_CMD_GET_CAPSET_INFO:
+ virgl_cmd_get_capset_info(g, cmd);
+ break;
+ case VIRTIO_GPU_CMD_GET_CAPSET:
+ virgl_cmd_get_capset(g, cmd);
+ break;
+ case VIRTIO_GPU_CMD_GET_DISPLAY_INFO:
+ vg_get_display_info(g, cmd);
+ break;
+ default:
+ g_debug("TODO handle ctrl %x\n", cmd->cmd_hdr.type);
+ cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;
+ break;
+ }
+
+ if (cmd->state != VG_CMD_STATE_NEW) {
+ return;
+ }
+
+ if (cmd->error) {
+ g_warning("%s: ctrl 0x%x, error 0x%x\n", __func__,
+ cmd->cmd_hdr.type, cmd->error);
+ vg_ctrl_response_nodata(g, cmd, cmd->error);
+ return;
+ }
+
+ if (!(cmd->cmd_hdr.flags & VIRTIO_GPU_FLAG_FENCE)) {
+ vg_ctrl_response_nodata(g, cmd, VIRTIO_GPU_RESP_OK_NODATA);
+ return;
+ }
+
+ g_debug("Creating fence id:%" PRId64 " type:%d",
+ cmd->cmd_hdr.fence_id, cmd->cmd_hdr.type);
+ virgl_renderer_create_fence(cmd->cmd_hdr.fence_id, cmd->cmd_hdr.type);
+}
+
+static void
+virgl_write_fence(void *opaque, uint32_t fence)
+{
+ VuGpu *g = opaque;
+ struct virtio_gpu_ctrl_command *cmd, *tmp;
+
+ QTAILQ_FOREACH_SAFE(cmd, &g->fenceq, next, tmp) {
+ /*
+ * the guest can end up emitting fences out of order
+ * so we should check all fenced cmds not just the first one.
+ */
+ if (cmd->cmd_hdr.fence_id > fence) {
+ continue;
+ }
+ g_debug("FENCE %" PRIu64, cmd->cmd_hdr.fence_id);
+ vg_ctrl_response_nodata(g, cmd, VIRTIO_GPU_RESP_OK_NODATA);
+ QTAILQ_REMOVE(&g->fenceq, cmd, next);
+ free(cmd);
+ g->inflight--;
+ }
+}
+
+#if defined(VIRGL_RENDERER_CALLBACKS_VERSION) && \
+ VIRGL_RENDERER_CALLBACKS_VERSION >= 2
+static int
+virgl_get_drm_fd(void *opaque)
+{
+ VuGpu *g = opaque;
+
+ return g->drm_rnode_fd;
+}
+#endif
+
+static struct virgl_renderer_callbacks virgl_cbs = {
+#if defined(VIRGL_RENDERER_CALLBACKS_VERSION) && \
+ VIRGL_RENDERER_CALLBACKS_VERSION >= 2
+ .get_drm_fd = virgl_get_drm_fd,
+ .version = 2,
+#else
+ .version = 1,
+#endif
+ .write_fence = virgl_write_fence,
+};
+
+static void
+vg_virgl_poll(VuDev *dev, int condition, void *data)
+{
+ virgl_renderer_poll();
+}
+
+bool
+vg_virgl_init(VuGpu *g)
+{
+ int ret;
+
+ if (g->drm_rnode_fd && virgl_cbs.version == 1) {
+ g_warning("virgl will use the default rendernode");
+ }
+
+ ret = virgl_renderer_init(g,
+ VIRGL_RENDERER_USE_EGL |
+ VIRGL_RENDERER_THREAD_SYNC,
+ &virgl_cbs);
+ if (ret != 0) {
+ return false;
+ }
+
+ ret = virgl_renderer_get_poll_fd();
+ if (ret != -1) {
+ g->renderer_source =
+ vug_source_new(&g->dev, ret, G_IO_IN, vg_virgl_poll, g);
+ }
+
+ return true;
+}
diff --git a/contrib/vhost-user-gpu/virgl.h b/contrib/vhost-user-gpu/virgl.h
new file mode 100644
index 000000000..17078783a
--- /dev/null
+++ b/contrib/vhost-user-gpu/virgl.h
@@ -0,0 +1,26 @@
+/*
+ * Virtio vhost-user GPU Device
+ *
+ * Copyright Red Hat, Inc. 2013-2018
+ *
+ * Authors:
+ * Dave Airlie <airlied@redhat.com>
+ * Gerd Hoffmann <kraxel@redhat.com>
+ * Marc-André Lureau <marcandre.lureau@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef VUGPU_VIRGL_H
+#define VUGPU_VIRGL_H
+
+#include "vugpu.h"
+
+bool vg_virgl_init(VuGpu *g);
+uint32_t vg_virgl_get_num_capsets(void);
+void vg_virgl_process_cmd(VuGpu *vg, struct virtio_gpu_ctrl_command *cmd);
+void vg_virgl_update_cursor_data(VuGpu *g, uint32_t resource_id,
+ gpointer data);
+
+#endif
diff --git a/contrib/vhost-user-gpu/vugbm.c b/contrib/vhost-user-gpu/vugbm.c
new file mode 100644
index 000000000..fb15d0372
--- /dev/null
+++ b/contrib/vhost-user-gpu/vugbm.c
@@ -0,0 +1,325 @@
+/*
+ * Virtio vhost-user GPU Device
+ *
+ * DRM helpers
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "vugbm.h"
+
+static bool
+mem_alloc_bo(struct vugbm_buffer *buf)
+{
+ buf->mmap = g_malloc(buf->width * buf->height * 4);
+ buf->stride = buf->width * 4;
+ return true;
+}
+
+static void
+mem_free_bo(struct vugbm_buffer *buf)
+{
+ g_free(buf->mmap);
+}
+
+static bool
+mem_map_bo(struct vugbm_buffer *buf)
+{
+ return buf->mmap != NULL;
+}
+
+static void
+mem_unmap_bo(struct vugbm_buffer *buf)
+{
+}
+
+static void
+mem_device_destroy(struct vugbm_device *dev)
+{
+}
+
+#ifdef CONFIG_MEMFD
+struct udmabuf_create {
+ uint32_t memfd;
+ uint32_t flags;
+ uint64_t offset;
+ uint64_t size;
+};
+
+#define UDMABUF_CREATE _IOW('u', 0x42, struct udmabuf_create)
+
+static size_t
+udmabuf_get_size(struct vugbm_buffer *buf)
+{
+ return ROUND_UP(buf->width * buf->height * 4, qemu_real_host_page_size);
+}
+
+static bool
+udmabuf_alloc_bo(struct vugbm_buffer *buf)
+{
+ int ret;
+
+ buf->memfd = memfd_create("udmabuf-bo", MFD_ALLOW_SEALING);
+ if (buf->memfd < 0) {
+ return false;
+ }
+
+ ret = ftruncate(buf->memfd, udmabuf_get_size(buf));
+ if (ret < 0) {
+ close(buf->memfd);
+ return false;
+ }
+
+ ret = fcntl(buf->memfd, F_ADD_SEALS, F_SEAL_SHRINK);
+ if (ret < 0) {
+ close(buf->memfd);
+ return false;
+ }
+
+ buf->stride = buf->width * 4;
+
+ return true;
+}
+
+static void
+udmabuf_free_bo(struct vugbm_buffer *buf)
+{
+ close(buf->memfd);
+}
+
+static bool
+udmabuf_map_bo(struct vugbm_buffer *buf)
+{
+ buf->mmap = mmap(NULL, udmabuf_get_size(buf),
+ PROT_READ | PROT_WRITE, MAP_SHARED, buf->memfd, 0);
+ if (buf->mmap == MAP_FAILED) {
+ return false;
+ }
+
+ return true;
+}
+
+static bool
+udmabuf_get_fd(struct vugbm_buffer *buf, int *fd)
+{
+ struct udmabuf_create create = {
+ .memfd = buf->memfd,
+ .offset = 0,
+ .size = udmabuf_get_size(buf),
+ };
+
+ *fd = ioctl(buf->dev->fd, UDMABUF_CREATE, &create);
+
+ return *fd >= 0;
+}
+
+static void
+udmabuf_unmap_bo(struct vugbm_buffer *buf)
+{
+ munmap(buf->mmap, udmabuf_get_size(buf));
+}
+
+static void
+udmabuf_device_destroy(struct vugbm_device *dev)
+{
+ close(dev->fd);
+}
+#endif
+
+#ifdef CONFIG_GBM
+static bool
+alloc_bo(struct vugbm_buffer *buf)
+{
+ struct gbm_device *dev = buf->dev->dev;
+
+ assert(!buf->bo);
+
+ buf->bo = gbm_bo_create(dev, buf->width, buf->height,
+ buf->format,
+ GBM_BO_USE_RENDERING | GBM_BO_USE_LINEAR);
+
+ if (buf->bo) {
+ buf->stride = gbm_bo_get_stride(buf->bo);
+ return true;
+ }
+
+ return false;
+}
+
+static void
+free_bo(struct vugbm_buffer *buf)
+{
+ gbm_bo_destroy(buf->bo);
+}
+
+static bool
+map_bo(struct vugbm_buffer *buf)
+{
+ uint32_t stride;
+
+ buf->mmap = gbm_bo_map(buf->bo, 0, 0, buf->width, buf->height,
+ GBM_BO_TRANSFER_READ_WRITE, &stride,
+ &buf->mmap_data);
+
+ assert(stride == buf->stride);
+
+ return buf->mmap != NULL;
+}
+
+static void
+unmap_bo(struct vugbm_buffer *buf)
+{
+ gbm_bo_unmap(buf->bo, buf->mmap_data);
+}
+
+static bool
+get_fd(struct vugbm_buffer *buf, int *fd)
+{
+ *fd = gbm_bo_get_fd(buf->bo);
+
+ return *fd >= 0;
+}
+
+static void
+device_destroy(struct vugbm_device *dev)
+{
+ gbm_device_destroy(dev->dev);
+}
+#endif
+
+void
+vugbm_device_destroy(struct vugbm_device *dev)
+{
+ if (!dev->inited) {
+ return;
+ }
+
+ dev->device_destroy(dev);
+}
+
+void
+vugbm_device_init(struct vugbm_device *dev, int fd)
+{
+ assert(!dev->inited);
+
+#ifdef CONFIG_GBM
+ if (fd >= 0) {
+ dev->dev = gbm_create_device(fd);
+ }
+ if (dev->dev != NULL) {
+ dev->fd = fd;
+ dev->alloc_bo = alloc_bo;
+ dev->free_bo = free_bo;
+ dev->get_fd = get_fd;
+ dev->map_bo = map_bo;
+ dev->unmap_bo = unmap_bo;
+ dev->device_destroy = device_destroy;
+ dev->inited = true;
+ }
+#endif
+#ifdef CONFIG_MEMFD
+ if (!dev->inited && g_file_test("/dev/udmabuf", G_FILE_TEST_EXISTS)) {
+ dev->fd = open("/dev/udmabuf", O_RDWR);
+ if (dev->fd >= 0) {
+ g_debug("Using experimental udmabuf backend");
+ dev->alloc_bo = udmabuf_alloc_bo;
+ dev->free_bo = udmabuf_free_bo;
+ dev->get_fd = udmabuf_get_fd;
+ dev->map_bo = udmabuf_map_bo;
+ dev->unmap_bo = udmabuf_unmap_bo;
+ dev->device_destroy = udmabuf_device_destroy;
+ dev->inited = true;
+ }
+ }
+#endif
+ if (!dev->inited) {
+ g_debug("Using mem fallback");
+ dev->alloc_bo = mem_alloc_bo;
+ dev->free_bo = mem_free_bo;
+ dev->map_bo = mem_map_bo;
+ dev->unmap_bo = mem_unmap_bo;
+ dev->device_destroy = mem_device_destroy;
+ dev->inited = true;
+ }
+ assert(dev->inited);
+}
+
+static bool
+vugbm_buffer_map(struct vugbm_buffer *buf)
+{
+ struct vugbm_device *dev = buf->dev;
+
+ return dev->map_bo(buf);
+}
+
+static void
+vugbm_buffer_unmap(struct vugbm_buffer *buf)
+{
+ struct vugbm_device *dev = buf->dev;
+
+ dev->unmap_bo(buf);
+}
+
+bool
+vugbm_buffer_can_get_dmabuf_fd(struct vugbm_buffer *buffer)
+{
+ if (!buffer->dev->get_fd) {
+ return false;
+ }
+
+ return true;
+}
+
+bool
+vugbm_buffer_get_dmabuf_fd(struct vugbm_buffer *buffer, int *fd)
+{
+ if (!vugbm_buffer_can_get_dmabuf_fd(buffer) ||
+ !buffer->dev->get_fd(buffer, fd)) {
+ g_warning("Failed to get dmabuf");
+ return false;
+ }
+
+ if (*fd < 0) {
+ g_warning("error: dmabuf_fd < 0");
+ return false;
+ }
+
+ return true;
+}
+
+bool
+vugbm_buffer_create(struct vugbm_buffer *buffer, struct vugbm_device *dev,
+ uint32_t width, uint32_t height)
+{
+ buffer->dev = dev;
+ buffer->width = width;
+ buffer->height = height;
+ buffer->format = GBM_FORMAT_XRGB8888;
+ buffer->stride = 0; /* modified during alloc */
+ if (!dev->alloc_bo(buffer)) {
+ g_warning("alloc_bo failed");
+ return false;
+ }
+
+ if (!vugbm_buffer_map(buffer)) {
+ g_warning("map_bo failed");
+ goto err;
+ }
+
+ return true;
+
+err:
+ dev->free_bo(buffer);
+ return false;
+}
+
+void
+vugbm_buffer_destroy(struct vugbm_buffer *buffer)
+{
+ struct vugbm_device *dev = buffer->dev;
+
+ vugbm_buffer_unmap(buffer);
+ dev->free_bo(buffer);
+}
diff --git a/contrib/vhost-user-gpu/vugbm.h b/contrib/vhost-user-gpu/vugbm.h
new file mode 100644
index 000000000..82bc4934e
--- /dev/null
+++ b/contrib/vhost-user-gpu/vugbm.h
@@ -0,0 +1,66 @@
+/*
+ * Virtio vhost-user GPU Device
+ *
+ * GBM helpers
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef VHOST_USER_GPU_VUGBM_H
+#define VHOST_USER_GPU_VUGBM_H
+
+
+#ifdef CONFIG_MEMFD
+#include <sys/ioctl.h>
+#endif
+
+#ifdef CONFIG_GBM
+#include <gbm.h>
+#endif
+
+struct vugbm_buffer;
+
+struct vugbm_device {
+ bool inited;
+ int fd;
+#ifdef CONFIG_GBM
+ struct gbm_device *dev;
+#endif
+
+ bool (*alloc_bo)(struct vugbm_buffer *buf);
+ void (*free_bo)(struct vugbm_buffer *buf);
+ bool (*get_fd)(struct vugbm_buffer *buf, int *fd);
+ bool (*map_bo)(struct vugbm_buffer *buf);
+ void (*unmap_bo)(struct vugbm_buffer *buf);
+ void (*device_destroy)(struct vugbm_device *dev);
+};
+
+struct vugbm_buffer {
+ struct vugbm_device *dev;
+
+#ifdef CONFIG_MEMFD
+ int memfd;
+#endif
+#ifdef CONFIG_GBM
+ struct gbm_bo *bo;
+ void *mmap_data;
+#endif
+
+ uint8_t *mmap;
+ uint32_t width;
+ uint32_t height;
+ uint32_t stride;
+ uint32_t format;
+};
+
+void vugbm_device_init(struct vugbm_device *dev, int fd);
+void vugbm_device_destroy(struct vugbm_device *dev);
+
+bool vugbm_buffer_create(struct vugbm_buffer *buffer, struct vugbm_device *dev,
+ uint32_t width, uint32_t height);
+bool vugbm_buffer_can_get_dmabuf_fd(struct vugbm_buffer *buffer);
+bool vugbm_buffer_get_dmabuf_fd(struct vugbm_buffer *buffer, int *fd);
+void vugbm_buffer_destroy(struct vugbm_buffer *buffer);
+
+#endif
diff --git a/contrib/vhost-user-gpu/vugpu.h b/contrib/vhost-user-gpu/vugpu.h
new file mode 100644
index 000000000..e2864bba6
--- /dev/null
+++ b/contrib/vhost-user-gpu/vugpu.h
@@ -0,0 +1,183 @@
+/*
+ * Virtio vhost-user GPU Device
+ *
+ * Copyright Red Hat, Inc. 2013-2018
+ *
+ * Authors:
+ * Dave Airlie <airlied@redhat.com>
+ * Gerd Hoffmann <kraxel@redhat.com>
+ * Marc-André Lureau <marcandre.lureau@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef VUGPU_H
+#define VUGPU_H
+
+
+#include "libvhost-user-glib.h"
+#include "standard-headers/linux/virtio_gpu.h"
+
+#include "qemu/queue.h"
+#include "qemu/iov.h"
+#include "qemu/bswap.h"
+#include "vugbm.h"
+
+typedef enum VhostUserGpuRequest {
+ VHOST_USER_GPU_NONE = 0,
+ VHOST_USER_GPU_GET_PROTOCOL_FEATURES,
+ VHOST_USER_GPU_SET_PROTOCOL_FEATURES,
+ VHOST_USER_GPU_GET_DISPLAY_INFO,
+ VHOST_USER_GPU_CURSOR_POS,
+ VHOST_USER_GPU_CURSOR_POS_HIDE,
+ VHOST_USER_GPU_CURSOR_UPDATE,
+ VHOST_USER_GPU_SCANOUT,
+ VHOST_USER_GPU_UPDATE,
+ VHOST_USER_GPU_DMABUF_SCANOUT,
+ VHOST_USER_GPU_DMABUF_UPDATE,
+} VhostUserGpuRequest;
+
+typedef struct VhostUserGpuDisplayInfoReply {
+ struct virtio_gpu_resp_display_info info;
+} VhostUserGpuDisplayInfoReply;
+
+typedef struct VhostUserGpuCursorPos {
+ uint32_t scanout_id;
+ uint32_t x;
+ uint32_t y;
+} QEMU_PACKED VhostUserGpuCursorPos;
+
+typedef struct VhostUserGpuCursorUpdate {
+ VhostUserGpuCursorPos pos;
+ uint32_t hot_x;
+ uint32_t hot_y;
+ uint32_t data[64 * 64];
+} QEMU_PACKED VhostUserGpuCursorUpdate;
+
+typedef struct VhostUserGpuScanout {
+ uint32_t scanout_id;
+ uint32_t width;
+ uint32_t height;
+} QEMU_PACKED VhostUserGpuScanout;
+
+typedef struct VhostUserGpuUpdate {
+ uint32_t scanout_id;
+ uint32_t x;
+ uint32_t y;
+ uint32_t width;
+ uint32_t height;
+ uint8_t data[];
+} QEMU_PACKED VhostUserGpuUpdate;
+
+typedef struct VhostUserGpuDMABUFScanout {
+ uint32_t scanout_id;
+ uint32_t x;
+ uint32_t y;
+ uint32_t width;
+ uint32_t height;
+ uint32_t fd_width;
+ uint32_t fd_height;
+ uint32_t fd_stride;
+ uint32_t fd_flags;
+ int fd_drm_fourcc;
+} QEMU_PACKED VhostUserGpuDMABUFScanout;
+
+typedef struct VhostUserGpuMsg {
+ uint32_t request; /* VhostUserGpuRequest */
+ uint32_t flags;
+ uint32_t size; /* the following payload size */
+ union {
+ VhostUserGpuCursorPos cursor_pos;
+ VhostUserGpuCursorUpdate cursor_update;
+ VhostUserGpuScanout scanout;
+ VhostUserGpuUpdate update;
+ VhostUserGpuDMABUFScanout dmabuf_scanout;
+ struct virtio_gpu_resp_display_info display_info;
+ uint64_t u64;
+ } payload;
+} QEMU_PACKED VhostUserGpuMsg;
+
+static VhostUserGpuMsg m __attribute__ ((unused));
+#define VHOST_USER_GPU_HDR_SIZE \
+ (sizeof(m.request) + sizeof(m.flags) + sizeof(m.size))
+
+#define VHOST_USER_GPU_MSG_FLAG_REPLY 0x4
+
+struct virtio_gpu_scanout {
+ uint32_t width, height;
+ int x, y;
+ int invalidate;
+ uint32_t resource_id;
+};
+
+typedef struct VuGpu {
+ VugDev dev;
+ struct virtio_gpu_config virtio_config;
+ struct vugbm_device gdev;
+ int sock_fd;
+ int drm_rnode_fd;
+ GSource *renderer_source;
+ guint wait_in;
+
+ bool virgl;
+ bool virgl_inited;
+ uint32_t inflight;
+
+ struct virtio_gpu_scanout scanout[VIRTIO_GPU_MAX_SCANOUTS];
+ QTAILQ_HEAD(, virtio_gpu_simple_resource) reslist;
+ QTAILQ_HEAD(, virtio_gpu_ctrl_command) fenceq;
+} VuGpu;
+
+enum {
+ VG_CMD_STATE_NEW,
+ VG_CMD_STATE_PENDING,
+ VG_CMD_STATE_FINISHED,
+};
+
+struct virtio_gpu_ctrl_command {
+ VuVirtqElement elem;
+ VuVirtq *vq;
+ struct virtio_gpu_ctrl_hdr cmd_hdr;
+ uint32_t error;
+ int state;
+ QTAILQ_ENTRY(virtio_gpu_ctrl_command) next;
+};
+
+#define VUGPU_FILL_CMD(out) do { \
+ size_t s; \
+ s = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, 0, \
+ &out, sizeof(out)); \
+ if (s != sizeof(out)) { \
+ g_critical("%s: command size incorrect %zu vs %zu", \
+ __func__, s, sizeof(out)); \
+ return; \
+ } \
+ } while (0)
+
+
+void vg_ctrl_response(VuGpu *g,
+ struct virtio_gpu_ctrl_command *cmd,
+ struct virtio_gpu_ctrl_hdr *resp,
+ size_t resp_len);
+
+void vg_ctrl_response_nodata(VuGpu *g,
+ struct virtio_gpu_ctrl_command *cmd,
+ enum virtio_gpu_ctrl_type type);
+
+int vg_create_mapping_iov(VuGpu *g,
+ struct virtio_gpu_resource_attach_backing *ab,
+ struct virtio_gpu_ctrl_command *cmd,
+ struct iovec **iov);
+void vg_cleanup_mapping_iov(VuGpu *g, struct iovec *iov, uint32_t count);
+void vg_get_display_info(VuGpu *vg, struct virtio_gpu_ctrl_command *cmd);
+
+void vg_wait_ok(VuGpu *g);
+
+void vg_send_msg(VuGpu *g, const VhostUserGpuMsg *msg, int fd);
+
+bool vg_recv_msg(VuGpu *g, uint32_t expect_req, uint32_t expect_size,
+ gpointer payload);
+
+
+#endif
diff --git a/contrib/vhost-user-input/main.c b/contrib/vhost-user-input/main.c
new file mode 100644
index 000000000..081230da5
--- /dev/null
+++ b/contrib/vhost-user-input/main.c
@@ -0,0 +1,412 @@
+/*
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version. See the COPYING file in the
+ * top-level directory.
+ */
+
+#include "qemu/osdep.h"
+
+#include <sys/ioctl.h>
+
+#include "qemu/iov.h"
+#include "qemu/bswap.h"
+#include "qemu/sockets.h"
+#include "libvhost-user-glib.h"
+#include "standard-headers/linux/input.h"
+#include "standard-headers/linux/virtio_input.h"
+#include "qapi/error.h"
+
+enum {
+ VHOST_USER_INPUT_MAX_QUEUES = 2,
+};
+
+typedef struct virtio_input_event virtio_input_event;
+typedef struct virtio_input_config virtio_input_config;
+
+typedef struct VuInput {
+ VugDev dev;
+ GSource *evsrc;
+ int evdevfd;
+ GArray *config;
+ virtio_input_config *sel_config;
+ struct {
+ virtio_input_event event;
+ VuVirtqElement *elem;
+ } *queue;
+ uint32_t qindex, qsize;
+} VuInput;
+
+static void vi_input_send(VuInput *vi, struct virtio_input_event *event)
+{
+ VuDev *dev = &vi->dev.parent;
+ VuVirtq *vq = vu_get_queue(dev, 0);
+ VuVirtqElement *elem;
+ int i, len;
+
+ /* queue up events ... */
+ if (vi->qindex == vi->qsize) {
+ vi->qsize++;
+ vi->queue = g_realloc_n(vi->queue, vi->qsize, sizeof(vi->queue[0]));
+ }
+ vi->queue[vi->qindex++].event = *event;
+
+ /* ... until we see a report sync ... */
+ if (event->type != htole16(EV_SYN) ||
+ event->code != htole16(SYN_REPORT)) {
+ return;
+ }
+
+ /* ... then check available space ... */
+ for (i = 0; i < vi->qindex; i++) {
+ elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement));
+ if (!elem) {
+ while (--i >= 0) {
+ vu_queue_unpop(dev, vq, vi->queue[i].elem, 0);
+ }
+ vi->qindex = 0;
+ g_warning("virtio-input queue full");
+ return;
+ }
+ vi->queue[i].elem = elem;
+ }
+
+ /* ... and finally pass them to the guest */
+ for (i = 0; i < vi->qindex; i++) {
+ elem = vi->queue[i].elem;
+ len = iov_from_buf(elem->in_sg, elem->in_num,
+ 0, &vi->queue[i].event, sizeof(virtio_input_event));
+ vu_queue_push(dev, vq, elem, len);
+ free(elem);
+ }
+
+ vu_queue_notify(&vi->dev.parent, vq);
+ vi->qindex = 0;
+}
+
+static void
+vi_evdev_watch(VuDev *dev, int condition, void *data)
+{
+ VuInput *vi = data;
+ int fd = vi->evdevfd;
+
+ g_debug("Got evdev condition %x", condition);
+
+ struct virtio_input_event virtio;
+ struct input_event evdev;
+ int rc;
+
+ for (;;) {
+ rc = read(fd, &evdev, sizeof(evdev));
+ if (rc != sizeof(evdev)) {
+ break;
+ }
+
+ g_debug("input %d %d %d", evdev.type, evdev.code, evdev.value);
+
+ virtio.type = htole16(evdev.type);
+ virtio.code = htole16(evdev.code);
+ virtio.value = htole32(evdev.value);
+ vi_input_send(vi, &virtio);
+ }
+}
+
+
+static void vi_handle_status(VuInput *vi, virtio_input_event *event)
+{
+ struct input_event evdev;
+ struct timeval tval;
+ int rc;
+
+ if (gettimeofday(&tval, NULL)) {
+ perror("vi_handle_status: gettimeofday");
+ return;
+ }
+
+ evdev.input_event_sec = tval.tv_sec;
+ evdev.input_event_usec = tval.tv_usec;
+ evdev.type = le16toh(event->type);
+ evdev.code = le16toh(event->code);
+ evdev.value = le32toh(event->value);
+
+ rc = write(vi->evdevfd, &evdev, sizeof(evdev));
+ if (rc == -1) {
+ perror("vi_host_handle_status: write");
+ }
+}
+
+static void vi_handle_sts(VuDev *dev, int qidx)
+{
+ VuInput *vi = container_of(dev, VuInput, dev.parent);
+ VuVirtq *vq = vu_get_queue(dev, qidx);
+ virtio_input_event event;
+ VuVirtqElement *elem;
+ int len;
+
+ g_debug("%s", G_STRFUNC);
+
+ for (;;) {
+ elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement));
+ if (!elem) {
+ break;
+ }
+
+ memset(&event, 0, sizeof(event));
+ len = iov_to_buf(elem->out_sg, elem->out_num,
+ 0, &event, sizeof(event));
+ vi_handle_status(vi, &event);
+ vu_queue_push(dev, vq, elem, len);
+ free(elem);
+ }
+
+ vu_queue_notify(&vi->dev.parent, vq);
+}
+
+static void
+vi_panic(VuDev *dev, const char *msg)
+{
+ g_critical("%s\n", msg);
+ exit(EXIT_FAILURE);
+}
+
+static void
+vi_queue_set_started(VuDev *dev, int qidx, bool started)
+{
+ VuInput *vi = container_of(dev, VuInput, dev.parent);
+ VuVirtq *vq = vu_get_queue(dev, qidx);
+
+ g_debug("queue started %d:%d", qidx, started);
+
+ if (qidx == 1) {
+ vu_set_queue_handler(dev, vq, started ? vi_handle_sts : NULL);
+ }
+
+ started = vu_queue_started(dev, vu_get_queue(dev, 0)) &&
+ vu_queue_started(dev, vu_get_queue(dev, 1));
+
+ if (started && !vi->evsrc) {
+ vi->evsrc = vug_source_new(&vi->dev, vi->evdevfd,
+ G_IO_IN, vi_evdev_watch, vi);
+ }
+
+ if (!started && vi->evsrc) {
+ vug_source_destroy(vi->evsrc);
+ vi->evsrc = NULL;
+ }
+}
+
+static virtio_input_config *
+vi_find_config(VuInput *vi, uint8_t select, uint8_t subsel)
+{
+ virtio_input_config *cfg;
+ int i;
+
+ for (i = 0; i < vi->config->len; i++) {
+ cfg = &g_array_index(vi->config, virtio_input_config, i);
+ if (select == cfg->select && subsel == cfg->subsel) {
+ return cfg;
+ }
+ }
+
+ return NULL;
+}
+
+static int vi_get_config(VuDev *dev, uint8_t *config, uint32_t len)
+{
+ VuInput *vi = container_of(dev, VuInput, dev.parent);
+
+ if (len > sizeof(*vi->sel_config)) {
+ return -1;
+ }
+
+ if (vi->sel_config) {
+ memcpy(config, vi->sel_config, len);
+ } else {
+ memset(config, 0, len);
+ }
+
+ return 0;
+}
+
+static int vi_set_config(VuDev *dev, const uint8_t *data,
+ uint32_t offset, uint32_t size,
+ uint32_t flags)
+{
+ VuInput *vi = container_of(dev, VuInput, dev.parent);
+ virtio_input_config *config = (virtio_input_config *)data;
+
+ vi->sel_config = vi_find_config(vi, config->select, config->subsel);
+
+ return 0;
+}
+
+static const VuDevIface vuiface = {
+ .queue_set_started = vi_queue_set_started,
+ .get_config = vi_get_config,
+ .set_config = vi_set_config,
+};
+
+static void
+vi_bits_config(VuInput *vi, int type, int count)
+{
+ virtio_input_config bits;
+ int rc, i, size = 0;
+
+ memset(&bits, 0, sizeof(bits));
+ rc = ioctl(vi->evdevfd, EVIOCGBIT(type, count / 8), bits.u.bitmap);
+ if (rc < 0) {
+ return;
+ }
+
+ for (i = 0; i < count / 8; i++) {
+ if (bits.u.bitmap[i]) {
+ size = i + 1;
+ }
+ }
+ if (size == 0) {
+ return;
+ }
+
+ bits.select = VIRTIO_INPUT_CFG_EV_BITS;
+ bits.subsel = type;
+ bits.size = size;
+ g_array_append_val(vi->config, bits);
+}
+
+static char *opt_evdev;
+static int opt_fdnum = -1;
+static char *opt_socket_path;
+static gboolean opt_nograb;
+static gboolean opt_print_caps;
+
+static GOptionEntry entries[] = {
+ { "print-capabilities", 'c', 0, G_OPTION_ARG_NONE, &opt_print_caps,
+ "Print capabilities", NULL },
+ { "no-grab", 'n', 0, G_OPTION_ARG_NONE, &opt_nograb,
+ "Don't grab device", NULL },
+ { "fd", 'f', 0, G_OPTION_ARG_INT, &opt_fdnum,
+ "Use inherited fd socket", "FDNUM" },
+ { "socket-path", 's', 0, G_OPTION_ARG_FILENAME, &opt_socket_path,
+ "Use UNIX socket path", "PATH" },
+ { "evdev-path", 'p', 0, G_OPTION_ARG_FILENAME, &opt_evdev,
+ "evdev input device path", "PATH" },
+ { NULL, }
+};
+
+int
+main(int argc, char *argv[])
+{
+ GMainLoop *loop = NULL;
+ VuInput vi = { 0, };
+ int rc, ver, fd;
+ virtio_input_config id;
+ struct input_id ids;
+ GError *error = NULL;
+ GOptionContext *context;
+
+ context = g_option_context_new(NULL);
+ g_option_context_add_main_entries(context, entries, NULL);
+ if (!g_option_context_parse(context, &argc, &argv, &error)) {
+ g_printerr("Option parsing failed: %s\n", error->message);
+ exit(EXIT_FAILURE);
+ }
+ if (opt_print_caps) {
+ g_print("{\n");
+ g_print(" \"type\": \"input\",\n");
+ g_print(" \"features\": [\n");
+ g_print(" \"evdev-path\",\n");
+ g_print(" \"no-grab\"\n");
+ g_print(" ]\n");
+ g_print("}\n");
+ exit(EXIT_SUCCESS);
+ }
+ if (!opt_evdev) {
+ g_printerr("Please specify an evdev path\n");
+ exit(EXIT_FAILURE);
+ }
+ if ((!!opt_socket_path + (opt_fdnum != -1)) != 1) {
+ g_printerr("Please specify either --fd or --socket-path\n");
+ exit(EXIT_FAILURE);
+ }
+
+ vi.evdevfd = open(opt_evdev, O_RDWR);
+ if (vi.evdevfd < 0) {
+ g_printerr("Failed to open evdev: %s\n", g_strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+
+ rc = ioctl(vi.evdevfd, EVIOCGVERSION, &ver);
+ if (rc < 0) {
+ g_printerr("%s: is not an evdev device\n", argv[1]);
+ exit(EXIT_FAILURE);
+ }
+
+ if (!opt_nograb) {
+ rc = ioctl(vi.evdevfd, EVIOCGRAB, 1);
+ if (rc < 0) {
+ g_printerr("Failed to grab device\n");
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ vi.config = g_array_new(false, false, sizeof(virtio_input_config));
+ memset(&id, 0, sizeof(id));
+ if (ioctl(vi.evdevfd, EVIOCGNAME(sizeof(id.u.string) - 1),
+ id.u.string) < 0) {
+ g_printerr("Failed to get evdev name: %s\n", g_strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+ id.select = VIRTIO_INPUT_CFG_ID_NAME;
+ id.size = strlen(id.u.string);
+ g_array_append_val(vi.config, id);
+
+ if (ioctl(vi.evdevfd, EVIOCGID, &ids) == 0) {
+ memset(&id, 0, sizeof(id));
+ id.select = VIRTIO_INPUT_CFG_ID_DEVIDS;
+ id.size = sizeof(struct virtio_input_devids);
+ id.u.ids.bustype = cpu_to_le16(ids.bustype);
+ id.u.ids.vendor = cpu_to_le16(ids.vendor);
+ id.u.ids.product = cpu_to_le16(ids.product);
+ id.u.ids.version = cpu_to_le16(ids.version);
+ g_array_append_val(vi.config, id);
+ }
+
+ vi_bits_config(&vi, EV_KEY, KEY_CNT);
+ vi_bits_config(&vi, EV_REL, REL_CNT);
+ vi_bits_config(&vi, EV_ABS, ABS_CNT);
+ vi_bits_config(&vi, EV_MSC, MSC_CNT);
+ vi_bits_config(&vi, EV_SW, SW_CNT);
+ g_debug("config length: %u", vi.config->len);
+
+ if (opt_socket_path) {
+ int lsock = unix_listen(opt_socket_path, &error_fatal);
+ if (lsock < 0) {
+ g_printerr("Failed to listen on %s.\n", opt_socket_path);
+ exit(EXIT_FAILURE);
+ }
+ fd = accept(lsock, NULL, NULL);
+ close(lsock);
+ } else {
+ fd = opt_fdnum;
+ }
+ if (fd == -1) {
+ g_printerr("Invalid vhost-user socket.\n");
+ exit(EXIT_FAILURE);
+ }
+
+ if (!vug_init(&vi.dev, VHOST_USER_INPUT_MAX_QUEUES, fd, vi_panic,
+ &vuiface)) {
+ g_printerr("Failed to initialize libvhost-user-glib.\n");
+ exit(EXIT_FAILURE);
+ }
+
+ loop = g_main_loop_new(NULL, FALSE);
+ g_main_loop_run(loop);
+ g_main_loop_unref(loop);
+
+ vug_deinit(&vi.dev);
+
+ vug_source_destroy(vi.evsrc);
+ g_array_free(vi.config, TRUE);
+ g_free(vi.queue);
+ return 0;
+}
diff --git a/contrib/vhost-user-input/meson.build b/contrib/vhost-user-input/meson.build
new file mode 100644
index 000000000..21a9ed4f1
--- /dev/null
+++ b/contrib/vhost-user-input/meson.build
@@ -0,0 +1,4 @@
+executable('vhost-user-input', files('main.c'),
+ dependencies: [qemuutil, vhost_user],
+ build_by_default: targetos == 'linux',
+ install: false)
diff --git a/contrib/vhost-user-scsi/meson.build b/contrib/vhost-user-scsi/meson.build
new file mode 100644
index 000000000..cc893f6f2
--- /dev/null
+++ b/contrib/vhost-user-scsi/meson.build
@@ -0,0 +1,6 @@
+if libiscsi.found()
+ executable('vhost-user-scsi', files('vhost-user-scsi.c'),
+ dependencies: [qemuutil, libiscsi, vhost_user],
+ build_by_default: targetos == 'linux',
+ install: false)
+endif
diff --git a/contrib/vhost-user-scsi/vhost-user-scsi.c b/contrib/vhost-user-scsi/vhost-user-scsi.c
new file mode 100644
index 000000000..4f6e3e2a2
--- /dev/null
+++ b/contrib/vhost-user-scsi/vhost-user-scsi.c
@@ -0,0 +1,436 @@
+/*
+ * vhost-user-scsi sample application
+ *
+ * Copyright (c) 2016 Nutanix Inc. All rights reserved.
+ *
+ * Author:
+ * Felipe Franciosi <felipe@nutanix.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 only.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include <iscsi/iscsi.h>
+#define inline __attribute__((gnu_inline)) /* required for libiscsi v1.9.0 */
+#include <iscsi/scsi-lowlevel.h>
+#undef inline
+#include "libvhost-user-glib.h"
+#include "standard-headers/linux/virtio_scsi.h"
+
+
+#define VUS_ISCSI_INITIATOR "iqn.2016-11.com.nutanix:vhost-user-scsi"
+
+enum {
+ VHOST_USER_SCSI_MAX_QUEUES = 8,
+};
+
+typedef struct VusIscsiLun {
+ struct iscsi_context *iscsi_ctx;
+ int iscsi_lun;
+} VusIscsiLun;
+
+typedef struct VusDev {
+ VugDev parent;
+
+ VusIscsiLun lun;
+ GMainLoop *loop;
+} VusDev;
+
+/** libiscsi integration **/
+
+typedef struct virtio_scsi_cmd_req VirtIOSCSICmdReq;
+typedef struct virtio_scsi_cmd_resp VirtIOSCSICmdResp;
+
+static int vus_iscsi_add_lun(VusIscsiLun *lun, char *iscsi_uri)
+{
+ struct iscsi_url *iscsi_url;
+ struct iscsi_context *iscsi_ctx;
+ int ret = 0;
+
+ assert(lun);
+ assert(iscsi_uri);
+ assert(!lun->iscsi_ctx);
+
+ iscsi_ctx = iscsi_create_context(VUS_ISCSI_INITIATOR);
+ if (!iscsi_ctx) {
+ g_warning("Unable to create iSCSI context");
+ return -1;
+ }
+
+ iscsi_url = iscsi_parse_full_url(iscsi_ctx, iscsi_uri);
+ if (!iscsi_url) {
+ g_warning("Unable to parse iSCSI URL: %s", iscsi_get_error(iscsi_ctx));
+ goto fail;
+ }
+
+ iscsi_set_session_type(iscsi_ctx, ISCSI_SESSION_NORMAL);
+ iscsi_set_header_digest(iscsi_ctx, ISCSI_HEADER_DIGEST_NONE_CRC32C);
+ if (iscsi_full_connect_sync(iscsi_ctx, iscsi_url->portal, iscsi_url->lun)) {
+ g_warning("Unable to login to iSCSI portal: %s",
+ iscsi_get_error(iscsi_ctx));
+ goto fail;
+ }
+
+ lun->iscsi_ctx = iscsi_ctx;
+ lun->iscsi_lun = iscsi_url->lun;
+
+ g_debug("Context %p created for lun 0: %s", iscsi_ctx, iscsi_uri);
+
+out:
+ if (iscsi_url) {
+ iscsi_destroy_url(iscsi_url);
+ }
+ return ret;
+
+fail:
+ (void)iscsi_destroy_context(iscsi_ctx);
+ ret = -1;
+ goto out;
+}
+
+static struct scsi_task *scsi_task_new(int cdb_len, uint8_t *cdb, int dir,
+ int xfer_len)
+{
+ struct scsi_task *task;
+
+ assert(cdb_len > 0);
+ assert(cdb);
+
+ task = g_new0(struct scsi_task, 1);
+ memcpy(task->cdb, cdb, cdb_len);
+ task->cdb_size = cdb_len;
+ task->xfer_dir = dir;
+ task->expxferlen = xfer_len;
+
+ return task;
+}
+
+static int get_cdb_len(uint8_t *cdb)
+{
+ assert(cdb);
+
+ switch (cdb[0] >> 5) {
+ case 0: return 6;
+ case 1: /* fall through */
+ case 2: return 10;
+ case 4: return 16;
+ case 5: return 12;
+ }
+ g_warning("Unable to determine cdb len (0x%02hhX)", (uint8_t)(cdb[0] >> 5));
+ return -1;
+}
+
+static int handle_cmd_sync(struct iscsi_context *ctx,
+ VirtIOSCSICmdReq *req,
+ struct iovec *out, unsigned int out_len,
+ VirtIOSCSICmdResp *rsp,
+ struct iovec *in, unsigned int in_len)
+{
+ struct scsi_task *task;
+ uint32_t dir;
+ uint32_t len;
+ int cdb_len;
+ int i;
+
+ assert(ctx);
+ assert(req);
+ assert(rsp);
+
+ if (!(!req->lun[1] && req->lun[2] == 0x40 && !req->lun[3])) {
+ /* Ignore anything different than target=0, lun=0 */
+ g_debug("Ignoring unconnected lun (0x%hhX, 0x%hhX)",
+ req->lun[1], req->lun[3]);
+ rsp->status = SCSI_STATUS_CHECK_CONDITION;
+ memset(rsp->sense, 0, sizeof(rsp->sense));
+ rsp->sense_len = 18;
+ rsp->sense[0] = 0x70;
+ rsp->sense[2] = SCSI_SENSE_ILLEGAL_REQUEST;
+ rsp->sense[7] = 10;
+ rsp->sense[12] = 0x24;
+
+ return 0;
+ }
+
+ cdb_len = get_cdb_len(req->cdb);
+ if (cdb_len == -1) {
+ return -1;
+ }
+
+ len = 0;
+ if (!out_len && !in_len) {
+ dir = SCSI_XFER_NONE;
+ } else if (out_len) {
+ dir = SCSI_XFER_WRITE;
+ for (i = 0; i < out_len; i++) {
+ len += out[i].iov_len;
+ }
+ } else {
+ dir = SCSI_XFER_READ;
+ for (i = 0; i < in_len; i++) {
+ len += in[i].iov_len;
+ }
+ }
+
+ task = scsi_task_new(cdb_len, req->cdb, dir, len);
+
+ if (dir == SCSI_XFER_WRITE) {
+ task->iovector_out.iov = (struct scsi_iovec *)out;
+ task->iovector_out.niov = out_len;
+ } else if (dir == SCSI_XFER_READ) {
+ task->iovector_in.iov = (struct scsi_iovec *)in;
+ task->iovector_in.niov = in_len;
+ }
+
+ g_debug("Sending iscsi cmd (cdb_len=%d, dir=%d, task=%p)",
+ cdb_len, dir, task);
+ if (!iscsi_scsi_command_sync(ctx, 0, task, NULL)) {
+ g_warning("Error serving SCSI command");
+ g_free(task);
+ return -1;
+ }
+
+ memset(rsp, 0, sizeof(*rsp));
+
+ rsp->status = task->status;
+ rsp->resid = task->residual;
+
+ if (task->status == SCSI_STATUS_CHECK_CONDITION) {
+ rsp->response = VIRTIO_SCSI_S_FAILURE;
+ rsp->sense_len = task->datain.size - 2;
+ memcpy(rsp->sense, &task->datain.data[2], rsp->sense_len);
+ }
+
+ g_free(task);
+
+ g_debug("Filled in rsp: status=%hhX, resid=%u, response=%hhX, sense_len=%u",
+ rsp->status, rsp->resid, rsp->response, rsp->sense_len);
+
+ return 0;
+}
+
+/** libvhost-user callbacks **/
+
+static void vus_panic_cb(VuDev *vu_dev, const char *buf)
+{
+ VugDev *gdev;
+ VusDev *vdev_scsi;
+
+ assert(vu_dev);
+
+ gdev = container_of(vu_dev, VugDev, parent);
+ vdev_scsi = container_of(gdev, VusDev, parent);
+ if (buf) {
+ g_warning("vu_panic: %s", buf);
+ }
+
+ g_main_loop_quit(vdev_scsi->loop);
+}
+
+static void vus_proc_req(VuDev *vu_dev, int idx)
+{
+ VugDev *gdev;
+ VusDev *vdev_scsi;
+ VuVirtq *vq;
+ VuVirtqElement *elem = NULL;
+
+ assert(vu_dev);
+
+ gdev = container_of(vu_dev, VugDev, parent);
+ vdev_scsi = container_of(gdev, VusDev, parent);
+
+ vq = vu_get_queue(vu_dev, idx);
+ if (!vq) {
+ g_warning("Error fetching VQ (dev=%p, idx=%d)", vu_dev, idx);
+ vus_panic_cb(vu_dev, NULL);
+ return;
+ }
+
+ g_debug("Got kicked on vq[%d]@%p", idx, vq);
+
+ while (1) {
+ VirtIOSCSICmdReq *req;
+ VirtIOSCSICmdResp *rsp;
+
+ elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement));
+ if (!elem) {
+ g_debug("No more elements pending on vq[%d]@%p", idx, vq);
+ break;
+ }
+ g_debug("Popped elem@%p", elem);
+
+ assert(!(elem->out_num > 1 && elem->in_num > 1));
+ assert(elem->out_num > 0 && elem->in_num > 0);
+
+ if (elem->out_sg[0].iov_len < sizeof(VirtIOSCSICmdReq)) {
+ g_warning("Invalid virtio-scsi req header");
+ vus_panic_cb(vu_dev, NULL);
+ break;
+ }
+ req = (VirtIOSCSICmdReq *)elem->out_sg[0].iov_base;
+
+ if (elem->in_sg[0].iov_len < sizeof(VirtIOSCSICmdResp)) {
+ g_warning("Invalid virtio-scsi rsp header");
+ vus_panic_cb(vu_dev, NULL);
+ break;
+ }
+ rsp = (VirtIOSCSICmdResp *)elem->in_sg[0].iov_base;
+
+ if (handle_cmd_sync(vdev_scsi->lun.iscsi_ctx,
+ req, &elem->out_sg[1], elem->out_num - 1,
+ rsp, &elem->in_sg[1], elem->in_num - 1) != 0) {
+ vus_panic_cb(vu_dev, NULL);
+ break;
+ }
+
+ vu_queue_push(vu_dev, vq, elem, 0);
+ vu_queue_notify(vu_dev, vq);
+
+ free(elem);
+ }
+ free(elem);
+}
+
+static void vus_queue_set_started(VuDev *vu_dev, int idx, bool started)
+{
+ VuVirtq *vq;
+
+ assert(vu_dev);
+
+ vq = vu_get_queue(vu_dev, idx);
+
+ if (idx == 0 || idx == 1) {
+ g_debug("queue %d unimplemented", idx);
+ } else {
+ vu_set_queue_handler(vu_dev, vq, started ? vus_proc_req : NULL);
+ }
+}
+
+static const VuDevIface vus_iface = {
+ .queue_set_started = vus_queue_set_started,
+};
+
+/** misc helpers **/
+
+static int unix_sock_new(char *unix_fn)
+{
+ int sock;
+ struct sockaddr_un un;
+ size_t len;
+
+ assert(unix_fn);
+
+ sock = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (sock < 0) {
+ perror("socket");
+ return -1;
+ }
+
+ un.sun_family = AF_UNIX;
+ (void)snprintf(un.sun_path, sizeof(un.sun_path), "%s", unix_fn);
+ len = sizeof(un.sun_family) + strlen(un.sun_path);
+
+ (void)unlink(unix_fn);
+ if (bind(sock, (struct sockaddr *)&un, len) < 0) {
+ perror("bind");
+ goto fail;
+ }
+
+ if (listen(sock, 1) < 0) {
+ perror("listen");
+ goto fail;
+ }
+
+ return sock;
+
+fail:
+ (void)close(sock);
+
+ return -1;
+}
+
+/** vhost-user-scsi **/
+
+int main(int argc, char **argv)
+{
+ VusDev *vdev_scsi = NULL;
+ char *unix_fn = NULL;
+ char *iscsi_uri = NULL;
+ int lsock = -1, csock = -1, opt, err = EXIT_SUCCESS;
+
+ while ((opt = getopt(argc, argv, "u:i:")) != -1) {
+ switch (opt) {
+ case 'h':
+ goto help;
+ case 'u':
+ unix_fn = g_strdup(optarg);
+ break;
+ case 'i':
+ iscsi_uri = g_strdup(optarg);
+ break;
+ default:
+ goto help;
+ }
+ }
+ if (!unix_fn || !iscsi_uri) {
+ goto help;
+ }
+
+ lsock = unix_sock_new(unix_fn);
+ if (lsock < 0) {
+ goto err;
+ }
+
+ csock = accept(lsock, NULL, NULL);
+ if (csock < 0) {
+ perror("accept");
+ goto err;
+ }
+
+ vdev_scsi = g_new0(VusDev, 1);
+ vdev_scsi->loop = g_main_loop_new(NULL, FALSE);
+
+ if (vus_iscsi_add_lun(&vdev_scsi->lun, iscsi_uri) != 0) {
+ goto err;
+ }
+
+ if (!vug_init(&vdev_scsi->parent, VHOST_USER_SCSI_MAX_QUEUES, csock,
+ vus_panic_cb, &vus_iface)) {
+ g_printerr("Failed to initialize libvhost-user-glib\n");
+ goto err;
+ }
+
+ g_main_loop_run(vdev_scsi->loop);
+
+ vug_deinit(&vdev_scsi->parent);
+
+out:
+ if (vdev_scsi) {
+ g_main_loop_unref(vdev_scsi->loop);
+ g_free(vdev_scsi);
+ unlink(unix_fn);
+ }
+ if (csock >= 0) {
+ close(csock);
+ }
+ if (lsock >= 0) {
+ close(lsock);
+ }
+ g_free(unix_fn);
+ g_free(iscsi_uri);
+
+ return err;
+
+err:
+ err = EXIT_FAILURE;
+ goto out;
+
+help:
+ fprintf(stderr, "Usage: %s [ -u unix_sock_path -i iscsi_uri ] | [ -h ]\n",
+ argv[0]);
+ fprintf(stderr, " -u path to unix socket\n");
+ fprintf(stderr, " -i iscsi uri for lun 0\n");
+ fprintf(stderr, " -h print help and quit\n");
+
+ goto err;
+}