diff options
-rw-r--r-- | .gitignore | 7 | ||||
-rw-r--r-- | Makefile | 62 | ||||
-rw-r--r-- | README.md | 19 | ||||
-rw-r--r-- | adapter.c | 157 | ||||
-rw-r--r-- | event_notifier.c | 205 | ||||
-rw-r--r-- | event_notifier.h | 54 | ||||
-rw-r--r-- | vhost_loopback.c | 326 | ||||
-rw-r--r-- | vhost_loopback.h | 34 | ||||
-rw-r--r-- | vhost_user_loopback.c | 800 | ||||
-rw-r--r-- | vhost_user_loopback.h | 786 | ||||
-rw-r--r-- | vhost_user_rng.c | 188 | ||||
-rw-r--r-- | vhost_user_rng.h | 44 | ||||
-rw-r--r-- | virtio_loopback.c | 1745 | ||||
-rw-r--r-- | virtio_loopback.h | 639 | ||||
-rw-r--r-- | virtio_rng.c | 171 | ||||
-rw-r--r-- | virtio_rng.h | 58 |
16 files changed, 5295 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..702ac5b --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +adapter +*.sh +.adapter* +make_bins.sh +*.o +*.c.* +*.h.* diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..af51327 --- /dev/null +++ b/Makefile @@ -0,0 +1,62 @@ +# Copyright 2022 Virtual Open Systems SAS. +# +# Authors: +# Timos Ampelikiotis <t.ampelikiotis@virtualopensystems.com> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + +#CFLAGS := -Wall -Wextra -Werror +#CFLAGS := -Wall -Wextra -Wno-unused-variable -Wno-unused-function +CFLAGS := -Wno-unused-variable -Wno-unused-function +CFLAGS += -DSERVER +CC = + +ifeq ($(ARCH), arm64) + # arm64 + CC = aarch64-linux-gnu-gcc +else + CC = gcc +endif + +ifeq ($(VHOST_USER_RNG), 1) + CFLAGS += -DVHOST_USER_RNG_DEV +endif + +INCL += -I . +DEPS = adapter.h vhost_user_loopback.h event_notifier.h virtio_loopback.h +SRC_C = event_notifier.c vhost_user_loopback.c virtio_loopback.c virtio_rng.c vhost_user_rng.c vhost_loopback.c adapter.c + +OBJS = $(SRC_C:.c=.o) +BINS = adapter + +ifeq ($(DEBUG), 1) + CFLAGS += -DDEBUG +endif + +all: $(BINS) + +$(BINS): $(OBJS) + @echo -e "CC\t$@" + $(CC) $(CFLAGS) $(INCL) $^ -o $@ -lpthread -static + +%.o: %.c + @echo -e "CC\t$@" + $(CC) $(CFLAGS) $(INCL) -c $< -o $@ + +clean: + rm -f *.o *~ $(BINS) + +.PHONY: all diff --git a/README.md b/README.md new file mode 100644 index 0000000..f4daa29 --- /dev/null +++ b/README.md @@ -0,0 +1,19 @@ +# virtio-loopback adapter repository + +This repository includes a alpha version of the "virtio_loopback_adapter" application which is part of the Virtio Loopback Design presented in this [document](https://git.virtualopensystems.com/virtio-loopback/docs/-/blob/master/design_docs). + +As described in the design document, the adapter is only a part of a more complex architecture. If you want to see the implementation and build the other componets, refer to the [virtio-loopback docs repository](https://git.virtualopensystems.com/c-022/virtio-loopback/docs). + +## Build the virtio-loopback adapter + +In order to build this project the next commands need to be used: +- `make` for x86 +- `make ARCH=arm64` for arm64 + +**NOTE**: You can also use the parameter "DEBUG=1" in order to enable the debug messages and "VHOST_USER_RNG=1" which tells to the adapter to use an external entropy source. An external entropy source in our case is a rng-user-space drivers which communicates with the adapter via the "vhost-user" protocol. If the option "VHOST_USER_RNG" is not specified into the "make" command, then the adapter will use its internal mechanism and produce its own random numbers (this case is used only for testing purposes). + +Exaple building the adapter with all the available parameters: +`make ARCH=arm64 VHOST_USER_RNG=1 DEBUG=1` + +## Current status +This repository contains the current results of the activity carried on by Virtual Open Systems in the [Automotive Grade Linux](https://www.automotivegradelinux.org) community. Both code and documentation included in this release are under active development, and are intended to be used to familiarize with the concept of virtio-loopback and to give developers the opportunity to test it. diff --git a/adapter.c b/adapter.c new file mode 100644 index 0000000..2aebd9f --- /dev/null +++ b/adapter.c @@ -0,0 +1,157 @@ +/* + * Copyright 2022 Virtual Open Systems SAS. + * + * Authors: + * Timos Ampelikiotis <t.ampelikiotis@virtualopensystems.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#include <unistd.h> +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include <sys/eventfd.h> +#include <fcntl.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <pthread.h> +#include <stdbool.h> +#include <sys/param.h> +#include <assert.h> + +/* For socket */ +#include <errno.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/un.h> + +/* Project header files */ +#include "virtio_loopback.h" +#include "vhost_user_loopback.h" +#include "virtio_rng.h" +#include "vhost_user_rng.h" + +/* Global variables */ +int client_sock; +struct vhost_dev *dev; +struct adapter_dev *adev; +struct vhost_user *vudev; + + +void vhost_user_adapter_init (void) +{ + /* Init vhost-user device */ + vudev = (struct vhost_user*) malloc(sizeof(struct vhost_user)); + + /* Init vhost device */ + dev = (struct vhost_dev*) malloc(sizeof(struct vhost_dev)); + + /* Init virtio device */ + global_vdev = (VirtIODevice*) malloc(sizeof(VirtIODevice)); + + /* Init virtio bus */ + global_vbus = (VirtioBus *) malloc(sizeof(VirtioBus)); + global_vbus->vdev = global_vdev; + global_vdev->vbus = global_vbus; + + /* Store virtio_dev reference into vhost_dev struct*/ + dev->vdev = global_vdev; + + /* Init adapter device */ + adev = (struct adapter_dev*) malloc(sizeof(struct adapter_dev)); + adev->vdev = dev; + adev->vudev = vudev; + adev->virtio_dev = global_vdev; + adev->vbus = global_vbus; +} + + +void client (char *sock_path) +{ + int rc, len; + struct sockaddr_un client_sockaddr; + + /* Initialize the struct to zero */ + memset(&client_sockaddr, 0, sizeof(struct sockaddr_un)); + + /* + * Create a UNIX socket + */ + client_sock = socket(AF_UNIX, SOCK_STREAM, 0); + if (client_sock == -1) { + printf("SOCKET ERROR\n"); + exit(1); + } + + /* + * Set up the UNIX sockaddr structure + * by using AF_UNIX for the family and + * giving it a filepath to connect. + */ + client_sockaddr.sun_family = AF_UNIX; + strcpy(client_sockaddr.sun_path, sock_path); + len = sizeof(client_sockaddr); + rc = connect(client_sock, (struct sockaddr *) &client_sockaddr, len); + if(rc == -1) { + printf("CONNECT ERROR\n"); + close(client_sock); + exit(1); + } + +} + +static void help_args (void) { + printf("Run example:\n\t./adapter -s /path_to_socket/rng.sock\n"); +} + +int main (int argc, char **argv) +{ +#ifdef VHOST_USER_RNG_DEV + /* + * Check if the user has provided a socket path. + * If not, print the help messages. + */ + if ((argc <= 2) || (strcmp(argv[1], "-s") != 0)) { + goto error_args; + } + + /* + * Create the socket and connect to the backend. + * Enabled on vhost-user case + */ + client(argv[2]); +#endif + + /* Initialize the adapter data structures */ + vhost_user_adapter_init(); + + /* Initialize the virtio/vhost-user device */ +#ifdef VHOST_USER_RNG_DEV + vhost_user_rng_realize(); /* <-- Enable that for vhost-user-rng */ +#else + virtio_rng_realize(); /* <-- Enable that for simple rng */ +#endif + + /* Startthe mmio trasnport layer and communiation with the loopback driver */ + virtio_mmio_start(); + + return 0; + +error_args: + help_args(); + return 1; +} diff --git a/event_notifier.c b/event_notifier.c new file mode 100644 index 0000000..121cc6e --- /dev/null +++ b/event_notifier.c @@ -0,0 +1,205 @@ +/* + * Based on: + * 1) file-posix.c of Qemu Project + * + * Copyright (c) 2006 Fabrice Bellard + * + * 2) event_notifier-posix.c of Qemu Project + * + * Copyright Red Hat, Inc. 2010 + * + * Authors: + * Michael S. Tsirkin <mst@redhat.com> + * + * 3) os-posix-lib.c of Qemu project + * + * Copyright (c) 2003-2008 Fabrice Bellard + * Copyright (c) 2010 Red Hat, Inc. + * + * Copyright 2022 Virtual Open Systems SAS. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + + +#include <unistd.h> +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include <sys/eventfd.h> +#include <fcntl.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <pthread.h> +#include <stdbool.h> +#include <sys/param.h> +#include <assert.h> + +/* For socket */ +#include <errno.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/un.h> + +/* Project header files */ +#include "vhost_user_loopback.h" + + +/* Sets a specific flag */ +int fcntl_setfl(int fd, int flag) +{ + int flags; + + flags = fcntl(fd, F_GETFL); + if (flags == -1) { + return -errno; + } + + if (fcntl(fd, F_SETFL, flags | flag) == -1) { + return -errno; + } + + return 0; +} + +void qemu_set_cloexec(int fd) +{ + int f; + f = fcntl(fd, F_GETFD); + f = fcntl(fd, F_SETFD, f | FD_CLOEXEC); +} + +/* + * Creates a pipe with FD_CLOEXEC set on both file descriptors + */ +int qemu_pipe(int pipefd[2]) +{ + int ret; + +#ifdef CONFIG_PIPE2 + ret = pipe2(pipefd, O_CLOEXEC); + if (ret != -1 || errno != ENOSYS) { + return ret; + } +#endif + ret = pipe(pipefd); + if (ret == 0) { + qemu_set_cloexec(pipefd[0]); + qemu_set_cloexec(pipefd[1]); + } + + return ret; +} + +int event_notifier_get_fd(const EventNotifier *e) +{ + return e->rfd; +} + +int event_notifier_get_wfd(const EventNotifier *e) +{ + return e->wfd; +} + +int event_notifier_set(EventNotifier *e) +{ + static const uint64_t value = 1; + ssize_t ret; + + if (!e->initialized) { + return -1; + } + + do { + ret = write(e->wfd, &value, sizeof(value)); + } while (ret < 0 && errno == EINTR); + + /* EAGAIN is fine, a read must be pending. */ + if (ret < 0 && errno != EAGAIN) { + return -errno; + } + return 0; +} + +int event_notifier_init(EventNotifier *e, int active) +{ + int fds[2]; + int ret; + + ret = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); + + if (ret >= 0) { + e->rfd = e->wfd = ret; + } else { + if (errno != ENOSYS) { + return -errno; + } + if (qemu_pipe(fds) < 0) { + return -errno; + } + ret = fcntl_setfl(fds[0], O_NONBLOCK); + if (ret < 0) { + ret = -errno; + goto fail; + } + ret = fcntl_setfl(fds[1], O_NONBLOCK); + if (ret < 0) { + ret = -errno; + goto fail; + } + e->rfd = fds[0]; + e->wfd = fds[1]; + } + e->initialized = true; + if (active) { + event_notifier_set(e); + } + return 0; + +fail: + close(fds[0]); + close(fds[1]); + return ret; +} + +bool ioeventfd_enabled(void) +{ + /* + * TODO: Delete if not needed: + * return !kvm_enabled() || kvm_eventfds_enabled(); + */ + return 1; +} + +int event_notifier_test_and_clear(EventNotifier *e) +{ + int value; + ssize_t len; + char buffer[512]; + + if (!e->initialized) { + return 0; + } + + /* Drain the notify pipe. For eventfd, only 8 bytes will be read. */ + value = 0; + do { + len = read(e->rfd, buffer, sizeof(buffer)); + value |= (len > 0); + } while ((len == -1 && errno == EINTR) || len == sizeof(buffer)); + + return value; +} diff --git a/event_notifier.h b/event_notifier.h new file mode 100644 index 0000000..412cc4b --- /dev/null +++ b/event_notifier.h @@ -0,0 +1,54 @@ +/* + * Based on event_notifier.h of Qemu project + * + * Copyright Red Hat, Inc. 2010 + * + * Authors: + * Michael S. Tsirkin <mst@redhat.com> + * + * Copyright 2022 Virtual Open Systems SAS. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + + +#ifndef EVENT_NOT_H +#define EVENT_NOT_H + +#include <stdint.h> +#include <stdbool.h> +#include <stddef.h> +#include <poll.h> +#include <pthread.h> + +typedef struct EventNotifier { + int rfd; + int wfd; + bool initialized; +} EventNotifier; + + +int fcntl_setfl(int fd, int flag); +void qemu_set_cloexec(int fd); +int qemu_pipe(int pipefd[2]); +int event_notifier_get_fd(const EventNotifier *e); +int event_notifier_get_wfd(const EventNotifier *e); +int event_notifier_set(EventNotifier *e); +int event_notifier_init(EventNotifier *e, int active); +bool ioeventfd_enabled(void); +int event_notifier_test_and_clear(EventNotifier *e); + + +#endif /* EVENT_NOT_H */ diff --git a/vhost_loopback.c b/vhost_loopback.c new file mode 100644 index 0000000..a76b3d0 --- /dev/null +++ b/vhost_loopback.c @@ -0,0 +1,326 @@ +/* + * Based on vhost.c of Qemu project; + * + * Copyright Red Hat, Inc. 2010 + * + * Authors: + * Michael S. Tsirkin <mst@redhat.com> + * + * Copyright Red Hat, Inc. 2010 + * + * Authors: + * Michael S. Tsirkin <mst@redhat.com> + * + * Copyright 2022 Virtual Open Systems SAS. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#include <unistd.h> +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include <sys/eventfd.h> +#include <fcntl.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <pthread.h> +#include <stdbool.h> +#include <sys/param.h> +#include <assert.h> + +/* For socket */ +#include <errno.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/un.h> + +/* Project header files */ +#include "virtio_loopback.h" +#include "vhost_user_loopback.h" +#include "event_notifier.h" + +/* vhost headers */ +#include "vhost_loopback.h" + +#ifdef DEBUG +#define DBG(...) printf("vhost-loopback: " __VA_ARGS__) +#else +#define DBG(...) +#endif /* DEBUG */ + +/* + * Stop processing guest IO notifications in qemu. + * Start processing them in vhost in kernel. + */ +int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) +{ + int i, r, e; + + /* + * We will pass the notifiers to the kernel, make sure that QEMU + * doesn't interfere. + */ + + /* TODO: Check if this is still useful */ + r = virtio_device_grab_ioeventfd(vdev); + if (r < 0) { + DBG("binding does not support host notifiers\n"); + goto fail; + } + + for (i = 0; i < hdev->nvqs; ++i) { + r = virtio_bus_set_host_notifier(vdev->vbus, hdev->vq_index + i, + true); + if (r < 0) { + DBG("vhost VQ %d notifier binding failed: %d", i, r); + goto fail; + } + } + + return 0; + +fail: + DBG("Fail vhost_dev_enable_notifiers\n"); + return r; +} + +/* TODO: This needs to be checked if it's still needed */ +static int vhost_dev_has_iommu(struct vhost_dev *dev) +{ + VirtIODevice *vdev = dev->vdev; + + /* + * For vhost, VIRTIO_F_IOMMU_PLATFORM means the backend support + * incremental memory mapping API via IOTLB API. For platform that + * does not have IOMMU, there's no need to enable this feature + * which may cause unnecessary IOTLB miss/update transactions. + */ + return virtio_bus_device_iommu_enabled(vdev) && + virtio_has_feature(vdev->host_features, VIRTIO_F_IOMMU_PLATFORM); +} + +static int vhost_dev_set_features(struct vhost_dev *dev, + bool enable_log) +{ + uint64_t features = dev->acked_features; + int r; + + if (enable_log) { + features |= 0x1ULL << VHOST_F_LOG_ALL; + } + + /* TODO: check if this is needed */ + if (!vhost_dev_has_iommu(dev)) { + features &= ~(0x1ULL << VIRTIO_F_IOMMU_PLATFORM); + } + + r = vhost_user_set_features(dev, features); + if (r < 0) { + DBG("vhost_set_features failed\n"); + goto out; + } + +out: + return r; +} + +static int vhost_virtqueue_set_addr(struct vhost_dev *dev, + struct vhost_virtqueue *vq, + unsigned idx, bool enable_log) +{ + struct vhost_vring_addr addr; + int r; + + memset(&addr, 0, sizeof(struct vhost_vring_addr)); + + addr.desc_user_addr = (uint64_t)(unsigned long)vq->desc; + addr.avail_user_addr = (uint64_t)(unsigned long)vq->avail; + addr.used_user_addr = (uint64_t)(unsigned long)vq->used; + + addr.index = idx; + addr.log_guest_addr = vq->used_phys; + addr.flags = enable_log ? (1 << VHOST_VRING_F_LOG) : 0; + + r = vhost_user_set_vring_addr(dev, &addr); + if (r < 0) { + DBG("vhost_set_vring_addr failed\n"); + } + return r; +} + + +/* Mask/unmask events from this vq. */ +void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n, + bool mask) +{ + struct VirtQueue *vvq = virtio_get_queue(vdev, n); + int r, index = n - hdev->vq_index; + struct vhost_vring_file file; + + if (mask) { + file.fd = event_notifier_get_wfd(&hdev->vqs[index].masked_notifier); + } else { + file.fd = event_notifier_get_wfd(virtio_queue_get_guest_notifier(vvq)); + } + + file.index = vhost_user_get_vq_index(hdev, n); + + r = vhost_user_set_vring_call(&file); + if (r < 0) { + DBG("vhost_set_vring_call failed\n"); + } +} + +static int vhost_virtqueue_start(struct vhost_dev *dev, + struct VirtIODevice *vdev, + struct vhost_virtqueue *vq, + unsigned idx) +{ + VirtioBus *vbus = vdev->vbus; + uint64_t s, l, a; + int r; + + int vhost_vq_index = vhost_user_get_vq_index(dev, idx); + struct vhost_vring_file file = { + .index = vhost_vq_index + }; + struct vhost_vring_state state = { + .index = vhost_vq_index + }; + struct VirtQueue *vvq = virtio_get_queue(vdev, idx); + + a = virtio_queue_get_desc_addr(vdev, idx); + if (a == 0) { + /* Queue might not be ready for start */ + return 0; + } + + vq->num = state.num = virtio_queue_get_num(vdev, idx); + + r = vhost_user_set_vring_num(dev, &state); + if (r) { + DBG("vhost_set_vring_num failed\n"); + return r; + } + + state.num = virtio_queue_get_last_avail_idx(vdev, idx); + r = vhost_user_set_vring_base(dev, &state); + if (r) { + DBG("vhost_set_vring_base failed\n"); + return r; + } + + vq->desc_size = s = l = virtio_queue_get_desc_size(vdev, idx); + vq->desc_phys = a; + vq->desc = (void *)a; + if (!vq->desc || l != s) { + DBG("Error : vq->desc = a\n"); + r = -ENOMEM; + return r; + } + + vq->avail_size = s = l = virtio_queue_get_avail_size(vdev, idx); + vq->avail_phys = a = virtio_queue_get_avail_addr(vdev, idx); + vq->avail = (void *)a; + if (!vq->avail || l != s) { + DBG("Error : vq->avail = a\n"); + r = -ENOMEM; + return r; + } + + vq->used_size = s = l = virtio_queue_get_used_size(vdev, idx); + vq->used_phys = a = virtio_queue_get_used_addr(vdev, idx); + DBG("vdev->vq[n].vring.used: 0x%lx\n", a); + vq->used = (void *)a; + if (!vq->used || l != s) { + DBG("Error : vq->used = a\n"); + r = -ENOMEM; + return r; + } + + r = vhost_virtqueue_set_addr(dev, vq, vhost_vq_index, dev->log_enabled); + if (r < 0) { + DBG("Fail vhost_virtqueue_set_addr\n"); + return r; + } + + file.fd = event_notifier_get_fd(virtio_queue_get_host_notifier(vvq)); + r = vhost_user_set_vring_kick(&file); + if (r) { + DBG("vhost_set_vring_kick failed\n"); + return r; + } + + /* Clear and discard previous events if any. */ + event_notifier_test_and_clear(&vq->masked_notifier); + + /* + * Init vring in unmasked state, unless guest_notifier_mask + * will do it later. + */ + if (!vdev->use_guest_notifier_mask) { + DBG("!vdev->use_guest_notifier_mask\n"); + /* TODO: check and handle errors. */ + vhost_virtqueue_mask(dev, vdev, idx, false); + } + + return 0; +} + +/* Host notifiers must be enabled at this point. */ +int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev) +{ + int i, r; + + hdev->started = true; + hdev->vdev = vdev; + + r = vhost_dev_set_features(hdev, hdev->log_enabled); + if (r < 0) { + return r; + } + + /* TODO: check if this is needed */ + if (vhost_dev_has_iommu(hdev)) { + DBG("memory_listener_register?\n"); + } + + /* TODO: We might need this function in the next release */ + /* + * r = vhost_user_set_mem_table(hdev); + * if (r < 0) { + * DBG("vhost_set_mem_table failed\n"); + * return r; + * } + */ + + /* This is used to exhange the loopback_fd to the vhost-user-device */ + vhost_user_share_fd(); + + for (i = 0; i < hdev->nvqs; ++i) { + r = vhost_virtqueue_start(hdev, + vdev, + hdev->vqs + i, + hdev->vq_index + i); + if (r < 0) { + DBG("Fail vhost_virtqueue_start\n"); + return r; + } + } + + return 0; +} diff --git a/vhost_loopback.h b/vhost_loopback.h new file mode 100644 index 0000000..ec9c67e --- /dev/null +++ b/vhost_loopback.h @@ -0,0 +1,34 @@ +/* + * Copyright 2022 Virtual Open Systems SAS. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef LOOPBACK_VHOST_H +#define LOOPBACK_VHOST_H + +#include <stdint.h> +#include <stdbool.h> +#include <stddef.h> +#include <poll.h> +#include <pthread.h> +#include "vhost_user_loopback.h" +#include "virtio_loopback.h" + +int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev); +int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev); +void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n, bool mask); + +#endif /* LOOPBACK_VHOST_H */ diff --git a/vhost_user_loopback.c b/vhost_user_loopback.c new file mode 100644 index 0000000..dec0186 --- /dev/null +++ b/vhost_user_loopback.c @@ -0,0 +1,800 @@ +/* + * Based on libvhost-user.c of Qemu project + * + * Copyright IBM, Corp. 2007 + * Copyright (c) 2016 Red Hat, Inc. + * + * Authors: + * Anthony Liguori <aliguori@us.ibm.com> + * Marc-André Lureau <mlureau@redhat.com> + * Victor Kaplansky <victork@redhat.com> + * + * Copyright 2022 Virtual Open Systems SAS. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#include <unistd.h> +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include <sys/eventfd.h> +#include <fcntl.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <pthread.h> +#include <stdbool.h> +#include <sys/param.h> +#include <assert.h> + +/* For socket */ +#include <errno.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/un.h> + +/* Project header files */ +#include "virtio_loopback.h" +#include "vhost_user_loopback.h" +#include "event_notifier.h" + +#ifdef DEBUG +#define DBG(...) printf("vhost-user-loopback: " __VA_ARGS__) +#else +#define DBG(...) +#endif /* DEBUG */ + + +bool vhost_user_one_time_request(VhostUserRequest request) +{ + switch (request) { + case VHOST_USER_SET_OWNER: + case VHOST_USER_RESET_OWNER: + case VHOST_USER_SET_MEM_TABLE: + case VHOST_USER_GET_QUEUE_NUM: + case VHOST_USER_NET_SET_MTU: + return true; + default: + return false; + } +} + + +void vmsg_close_fds(VhostUserMsg *vmsg) +{ + int i; + + for (i = 0; i < vmsg->fd_num; i++) { + close(vmsg->fds[i]); + } +} + + +bool vu_message_write(int conn_fd, VhostUserMsg *vmsg) +{ + int rc; + uint8_t *p = (uint8_t *)vmsg; + size_t fdsize; + char control[CMSG_SPACE(VHOST_MEMORY_BASELINE_NREGIONS * sizeof(int))] = {}; + struct iovec iov = { + .iov_base = (char *)vmsg, + .iov_len = VHOST_USER_HDR_SIZE, + }; + + struct msghdr msg = { + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_control = control, + }; + struct cmsghdr *cmsg; + + if (vhost_user_one_time_request(vmsg->request) && dev->vq_index != 0) { + vmsg->flags &= ~VHOST_USER_NEED_REPLY_MASK; + return 0; + } + + memset(control, 0, sizeof(control)); + if (vmsg->fd_num > 0) { + fdsize = vmsg->fd_num * sizeof(int); + msg.msg_controllen = CMSG_SPACE(fdsize); + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_len = CMSG_LEN(fdsize); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + memcpy(CMSG_DATA(cmsg), vmsg->fds, fdsize); + } else { + msg.msg_controllen = 0; + } + + do { + rc = sendmsg(conn_fd, &msg, 0); + } while (rc < 0 && (errno == EINTR || errno == EAGAIN)); + + if (vmsg->size) { + do { + if (vmsg->data) { + rc = write(conn_fd, vmsg->data, vmsg->size); + } else { + rc = write(conn_fd, p + VHOST_USER_HDR_SIZE, vmsg->size); + } + } while (rc < 0 && (errno == EINTR || errno == EAGAIN)); + } + + if (rc <= 0) { + DBG("Error while writing\n"); + return false; + } + + return true; +} + + +bool vu_message_read(int conn_fd, VhostUserMsg *vmsg) +{ + char control[CMSG_SPACE(VHOST_MEMORY_BASELINE_NREGIONS * sizeof(int))] = {}; + struct iovec iov = { + .iov_base = (char *)vmsg, + .iov_len = VHOST_USER_HDR_SIZE, + }; + struct msghdr msg = { + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_control = control, + .msg_controllen = sizeof(control), + }; + size_t fd_size; + struct cmsghdr *cmsg; + int rc; + + do { + rc = recvmsg(conn_fd, &msg, 0); + } while (rc < 0 && (errno == EINTR || errno == EAGAIN)); + + if (rc < 0) { + DBG("Error while recvmsg\n"); + return false; + } + + vmsg->fd_num = 0; + for (cmsg = CMSG_FIRSTHDR(&msg); + cmsg != NULL; + cmsg = CMSG_NXTHDR(&msg, cmsg)) + { + if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) { + fd_size = cmsg->cmsg_len - CMSG_LEN(0); + vmsg->fd_num = fd_size / sizeof(int); + memcpy(vmsg->fds, CMSG_DATA(cmsg), fd_size); + break; + } + } + + if (vmsg->size > sizeof(vmsg->payload)) { + DBG( + "Error: too big message request: %d, size: vmsg->size: %u, " + "while sizeof(vmsg->payload) = %zu\n", + vmsg->request, vmsg->size, sizeof(vmsg->payload)); + goto fail; + } + + if (vmsg->size) { + do { + rc = read(conn_fd, &vmsg->payload, vmsg->size); + } while (rc < 0 && (errno == EINTR || errno == EAGAIN)); + + if (rc <= 0) { + DBG("Error while reading\n"); + goto fail; + } + } + + return true; + +fail: + vmsg_close_fds(vmsg); + + return false; +} + +int vhost_user_set_owner(void) +{ + VhostUserMsg msg = { + .request = VHOST_USER_SET_OWNER, + .flags = VHOST_USER_VERSION, + }; + + return vu_message_write(client_sock, &msg); +} + +int process_message_reply(const VhostUserMsg *msg) +{ + int ret; + VhostUserMsg msg_reply; + + if ((msg->flags & VHOST_USER_NEED_REPLY_MASK) == 0) { + return 0; + } + + ret = vu_message_read(client_sock, &msg_reply); + if (ret < 0) { + return ret; + } + + if (msg_reply.request != msg->request) { + DBG("Received unexpected msg type. " + "Expected %d received %d\n", + msg->request, msg_reply.request); + return -EPROTO; + } + + return msg_reply.payload.u64 ? -EIO : 0; +} + +int vhost_user_get_u64(int request, uint64_t *u64) +{ + int ret; + VhostUserMsg msg = { + .request = request, + .flags = VHOST_USER_VERSION, + }; + + if (vhost_user_one_time_request(request) && dev->vq_index != 0) { + return 0; + } + + ret = vu_message_write(client_sock, &msg); + if (ret < 0) { + return ret; + } + + ret = vu_message_read(client_sock, &msg); + if (ret < 0) { + return ret; + } + + if (msg.request != request) { + DBG("Received unexpected msg type. Expected %d received %d\n", + request, msg.request); + return -EPROTO; + } + + if (msg.size != sizeof(msg.payload.u64)) { + DBG("Received bad msg size.\n"); + return -EPROTO; + } + + *u64 = msg.payload.u64; + + return 0; +} + + +int vhost_user_get_features(uint64_t *features) +{ + if (vhost_user_get_u64(VHOST_USER_GET_FEATURES, features) < 0) { + return -EPROTO; + } + + return 0; +} + +int enforce_reply(const VhostUserMsg *msg) +{ + uint64_t dummy; + + if (msg->flags & VHOST_USER_NEED_REPLY_MASK) { + return process_message_reply(msg); + } + + /* + * We need to wait for a reply but the backend does not + * support replies for the command we just sent. + * Send VHOST_USER_GET_FEATURES which makes all backends + * send a reply. + */ + return vhost_user_get_features(&dummy); +} + +int vhost_user_set_u64(int request, uint64_t u64, bool wait_for_reply) +{ + VhostUserMsg msg = { + .request = request, + .flags = VHOST_USER_VERSION, + .payload.u64 = u64, + .size = sizeof(msg.payload.u64), + }; + int ret; + + if (wait_for_reply) { + bool reply_supported = virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_REPLY_ACK); + + if (reply_supported) { + msg.flags |= VHOST_USER_NEED_REPLY_MASK; + } + } + + ret = vu_message_write(client_sock, &msg); + if (ret < 0) { + return ret; + } + + if (wait_for_reply) { + return enforce_reply(&msg); + } + + return 0; +} + +int vhost_user_set_features(struct vhost_dev *dev, + uint64_t features) +{ + /* + * wait for a reply if logging is enabled to make sure + * backend is actually logging changes + */ + bool log_enabled = features & (0x1ULL << VHOST_F_LOG_ALL); + + (void) dev; + + /* Pass hdev as parameter! */ + return vhost_user_set_u64(VHOST_USER_SET_FEATURES, features, + log_enabled); +} + +int vhost_user_set_protocol_features(uint64_t features) +{ + return vhost_user_set_u64(VHOST_USER_SET_PROTOCOL_FEATURES, features, + false); +} + +int vhost_user_get_max_memslots(uint64_t *max_memslots) +{ + uint64_t backend_max_memslots; + int err; + + err = vhost_user_get_u64(VHOST_USER_GET_MAX_MEM_SLOTS, + &backend_max_memslots); + if (err < 0) { + return err; + } + + *max_memslots = backend_max_memslots; + + return 0; +} + + + +int vhost_setup_slave_channel(struct vhost_dev *dev) +{ + VhostUserMsg msg = { + .request = VHOST_USER_SET_SLAVE_REQ_FD, + .flags = VHOST_USER_VERSION, + }; + int sv[2], ret = 0; + bool reply_supported = virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_REPLY_ACK); + + if (!virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_SLAVE_REQ)) { + return 0; + } + + if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) { + int saved_errno = errno; + DBG("socketpair() failed\n"); + return -saved_errno; + } + + memcpy(msg.fds, &sv[1], sizeof(int)); + msg.fd_num = 1; + + if (reply_supported) { + msg.flags |= VHOST_USER_NEED_REPLY_MASK; + } + + ret = vu_message_write(client_sock, &msg); + if (!ret) { + DBG("Go out\n"); + goto out; + } + + if (reply_supported) { + ret = process_message_reply(&msg); + DBG("Reply is done!\n"); + } + +out: + /* TODO: Close slave channel and fd in case of error */ + /* + * close(sv[1]); + * if (ret) { + * close_slave_channel(u); + * } + */ + + return ret; +} + + +int vhost_user_get_vq_index(struct vhost_dev *dev, int idx) +{ + /* TODO: Add a assert to check the requested index + * + * assert(idx >= dev->vq_index && idx < dev->vq_index + (int)dev->nvqs); + */ + return idx; +} + +void vhost_user_share_fd(void) +{ + size_t fd_num = 1; + VhostUserMsg msg = { + .request = (VhostUserRequest) VHOST_USER_SHARE_LOOPBACK_FD, + .flags = VHOST_USER_VERSION, + .payload.u64 = ((uint64_t)getpid() << 32) | (uint64_t)loopback_fd, + .size = sizeof(msg.payload.u64), + }; + + msg.fd_num = 1; + memcpy(msg.fds, &loopback_fd, fd_num * sizeof(int)); + + /* TODO: Check if we need to remove the VHOST_USER_NEED_REPLY_MASK flag + * + * msg.flags &= ~VHOST_USER_NEED_REPLY_MASK; + */ + + (void)vu_message_write(client_sock, &msg); +} + +int vhost_set_vring_file(VhostUserRequest request, + struct vhost_vring_file *file) +{ + int fds[VHOST_USER_MAX_RAM_SLOTS]; + size_t fd_num = 0; + VhostUserMsg msg = { + .request = request, + .flags = VHOST_USER_VERSION, + .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK, + .size = sizeof(msg.payload.u64), + }; + + if (ioeventfd_enabled() && file->fd > 0) { + fds[fd_num++] = file->fd; + } else { + msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK; + } + + /* TODO: Check if we need to remove the VHOST_USER_NEED_REPLY_MASK flag + * + * msg.flags &= ~VHOST_USER_NEED_REPLY_MASK; + */ + + (void)fds; + (void)fd_num; + + msg.fd_num = fd_num; + memcpy(msg.fds, &fds, fd_num * sizeof(int)); + + return !vu_message_write(client_sock, &msg); +} + +int vhost_user_set_vring_kick(struct vhost_vring_file *file) +{ + return vhost_set_vring_file(VHOST_USER_SET_VRING_KICK, file); +} + +int vhost_user_set_vring_call(struct vhost_vring_file *file) +{ + return vhost_set_vring_file(VHOST_USER_SET_VRING_CALL, file); +} + +static int vhost_set_vring(struct vhost_dev *dev, + unsigned long int request, + struct vhost_vring_state *ring) +{ + VhostUserMsg msg = { + .request = request, + .flags = VHOST_USER_VERSION, + .payload.state = *ring, + .size = sizeof(msg.payload.state), + }; + + return !vu_message_write(client_sock, &msg); +} + +int vhost_user_set_vring_num(struct vhost_dev *dev, + struct vhost_vring_state *ring) +{ + return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring); +} + +int vhost_user_set_vring_base(struct vhost_dev *dev, + struct vhost_vring_state *ring) +{ + return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring); +} + + +int vhost_user_set_vring_addr(struct vhost_dev *dev, + struct vhost_vring_addr *addr) +{ + int ret; + VhostUserMsg msg = { + .request = VHOST_USER_SET_VRING_ADDR, + .flags = VHOST_USER_VERSION, + .payload.addr = *addr, + .size = sizeof(msg.payload.addr), + }; + + bool reply_supported = virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_REPLY_ACK); + + /* + * wait for a reply if logging is enabled to make sure + * backend is actually logging changes + */ + bool wait_for_reply = addr->flags & (1 << VHOST_VRING_F_LOG); + + if (reply_supported && wait_for_reply) { + msg.flags |= VHOST_USER_NEED_REPLY_MASK; + } + + //ret = vhost_user_write(dev, &msg, NULL, 0); + ret = vu_message_write(client_sock, &msg); + if (ret < 0) { + DBG("Fail vhost_user_set_vring_addr\n"); + return ret; + } + + if (wait_for_reply) { + return enforce_reply(&msg); + } + + return 0; +} + + +int vhost_virtqueue_init(struct vhost_dev *dev, + struct vhost_virtqueue *vq, int n) +{ + int vhost_vq_index = (int)vhost_user_get_vq_index(dev, n); + + struct vhost_vring_file file = { + .index = vhost_vq_index, + }; + + int r = event_notifier_init(&vq->masked_notifier, 0); + if (r < 0) { + return r; + } + + file.fd = event_notifier_get_wfd(&vq->masked_notifier); + + r = vhost_user_set_vring_call(&file); + if (r) { + DBG("vhost_set_vring_call failed\n"); + return r; + } + + vq->dev = dev; + + return 0; +} + + +/* -------------------- Vring functions -------------------- */ + +/* TODO: This funciton might be implemented in a later release */ +static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, + bool reply_supported, + bool config_mem_slots) +{ + return 0; +} + + +/* TODO: This function is not yet fully optimized because in the current release + * it is not used. t will be implemented or deleted in a later release. + */ +int vhost_user_set_mem_table(struct vhost_dev *dev) +{ + int fds[VHOST_MEMORY_BASELINE_NREGIONS]; + size_t fd_num = 0; + bool reply_supported = virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_REPLY_ACK); + bool config_mem_slots = + virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS); + int ret; + bool do_postcopy = false; + + if (do_postcopy) { + /* + * Postcopy has enough differences that it's best done in it's own + * version + */ + return vhost_user_set_mem_table_postcopy(dev, reply_supported, + config_mem_slots); + } + + VhostUserMsg msg = { + .flags = VHOST_USER_VERSION, + }; + + if (reply_supported) { + msg.flags |= VHOST_USER_NEED_REPLY_MASK; + } + + return 0; +} + +/* ----------------- End of Vring functions ---------------- */ + +int vhost_user_backend_init(struct vhost_dev *vhdev) +{ + uint64_t features, protocol_features, ram_slots; + int err; + + err = vhost_user_get_features(&features); + if (err < 0) { + DBG("vhost_backend_init failed\n"); + return err; + } + + if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) { + vhdev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES; + + err = vhost_user_get_u64(VHOST_USER_GET_PROTOCOL_FEATURES, + &protocol_features); + if (err < 0) { + DBG("vhost_backend_init failed\n"); + return -EPROTO; + } + + vhdev->protocol_features = + protocol_features & VHOST_USER_PROTOCOL_FEATURE_MASK; + + /* TODO: Disable config bit for the rng, this might be usefull + * when new devices are added*/ + vhdev->protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG); + + err = vhost_user_set_protocol_features(vhdev->protocol_features); + if (err < 0) { + DBG("vhost_backend_init failed\n"); + return -EPROTO; + } + + /* query the max queues we support if backend supports Multiple Queue */ + if (vhdev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) { + err = vhost_user_get_u64(VHOST_USER_GET_QUEUE_NUM, + &vhdev->max_queues); + if (err < 0) { + DBG("vhost_backend_init failed\n"); + return -EPROTO; + } + } else { + vhdev->max_queues = 1; + } + + if (vhdev->num_queues && vhdev->max_queues < vhdev->num_queues) { + DBG("The maximum number of queues supported by the " + "backend is %ld\n", vhdev->max_queues); + return -EINVAL; + } + + if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) && + !(virtio_has_feature(vhdev->protocol_features, + VHOST_USER_PROTOCOL_F_SLAVE_REQ) && + virtio_has_feature(vhdev->protocol_features, + VHOST_USER_PROTOCOL_F_REPLY_ACK))) { + DBG("IOMMU support requires reply-ack and " + "slave-req protocol features.\n"); + return -EINVAL; + } + + /* get max memory regions if backend supports configurable RAM slots */ + if (!virtio_has_feature(vhdev->protocol_features, + VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS)) { + vhdev->memory_slots = VHOST_MEMORY_BASELINE_NREGIONS; + } else { + err = vhost_user_get_max_memslots(&ram_slots); + if (err < 0) { + DBG("vhost_backend_init failed\n"); + return -EPROTO; + } + + if (ram_slots < vhdev->memory_slots) { + DBG("The backend specified a max ram slots limit " + "of %ld, when the prior validated limit was " + "%ld. This limit should never decrease.\n", ram_slots, + vhdev->memory_slots); + return -EINVAL; + } + + vhdev->memory_slots = MIN(ram_slots, VHOST_USER_MAX_RAM_SLOTS); + } + } + + if (vhdev->migration_blocker == NULL && + !virtio_has_feature(vhdev->protocol_features, + VHOST_USER_PROTOCOL_F_LOG_SHMFD)) { + DBG("Migration disabled: vhost-user backend lacks " + "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature.\n"); + } + + if (vhdev->vq_index == 0) { + err = vhost_setup_slave_channel(vhdev); + if (err < 0) { + DBG("vhost_backend_init failed\n"); + return -EPROTO; + } + } + + /* TODO: We might need to set up a postcopy_notifier in a future release: + * + * u->postcopy_notifier.notify = vhost_user_postcopy_notifier; + * postcopy_add_notifier(&u->postcopy_notifier); + */ + + return 0; +} + + +void vhost_dev_init(struct vhost_dev *vhdev) { + + uint64_t features; + int r, n_initialized_vqs = 0; + unsigned int i; + + /* Vhost conf */ + vhdev->migration_blocker = NULL; + + (void)vhost_user_backend_init(vhdev); + + r = vhost_user_set_owner(); + if (r < 0) { + DBG("vhost_set_owner failed\n"); + } + + r = vhost_user_get_features(&features); + if (r < 0) { + DBG("vhost_get_features failed\n"); + } + + for (i = 0; i < vhdev->nvqs; ++i, ++n_initialized_vqs) { + r = vhost_virtqueue_init(vhdev, vhdev->vqs + i, vhdev->vq_index + i); + if (r < 0) { + DBG("Failed to initialize virtqueue %d", i); + } + } + + /* TODO: busyloop == 0 in rng case, but we might need it for new devices: + * + * if (busyloop_timeout) { + * for (i = 0; i < dev->nvqs; ++i) { + * r = vhost_virtqueue_set_busyloop_timeout(dev, dev->vq_index + i, + * busyloop_timeout); + * if (r < 0) { + * DBG("Failed to set busyloop timeout\n"); + * //goto fail_busyloop; + * } + * } + * } + */ + + vhdev->features = features; +} diff --git a/vhost_user_loopback.h b/vhost_user_loopback.h new file mode 100644 index 0000000..c41bca6 --- /dev/null +++ b/vhost_user_loopback.h @@ -0,0 +1,786 @@ +/* + * Based on libvhost-user.h of Qemu project + * + * Copyright (c) 2016 Red Hat, Inc. + * + * Authors: + * Victor Kaplansky <victork@redhat.com> + * Marc-André Lureau <mlureau@redhat.com> + * + * Copyright 2022 Virtual Open Systems SAS. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef LOOPBACK_LIBVHOST_USER_H +#define LOOPBACK_LIBVHOST_USER_H + +#include <stdint.h> +#include <stdbool.h> +#include <stddef.h> +#include <poll.h> +#include <linux/vhost.h> +#include <pthread.h> +#include "virtio_loopback.h" + +typedef struct adapter_dev { + struct vhost_dev *vdev; + struct vhost_user *vudev; + VirtIODevice *virtio_dev; + VirtioBus *vbus; +} AdapterDev; + +struct vhost_virtqueue { + int kick; + int call; + void *desc; + void *avail; + void *used; + int num; + unsigned long long desc_phys; + unsigned desc_size; + unsigned long long avail_phys; + unsigned avail_size; + unsigned long long used_phys; + unsigned used_size; + EventNotifier masked_notifier; + struct vhost_dev *dev; +}; + +struct vhost_dev { + VirtIODevice *vdev; + struct vhost_virtqueue *vqs; + unsigned int nvqs; + /* the first virtqueue which would be used by this vhost dev */ + int vq_index; + /* one past the last vq index for the virtio device (not vhost) */ + int vq_index_end; + /* if non-zero, minimum required value for max_queues */ + uint64_t num_queues; + uint64_t features; + uint64_t acked_features; + uint64_t backend_features; + uint64_t protocol_features; + uint64_t max_queues; + uint64_t backend_cap; + bool started; + bool log_enabled; + uint64_t log_size; + void *migration_blocker; + /* Vhost-user struct */ + uint64_t memory_slots; +}; + +struct vhost_user { + struct vhost_dev *dev; + /* Length of the region_rb and region_rb_offset arrays */ + size_t region_rb_len; + /* True once we've entered postcopy_listen */ + bool postcopy_listen; + /* Our current regions */ + int num_shadow_regions; +}; + +/* Global variables */ +extern int client_sock; +extern struct vhost_dev *dev; +extern struct adapter_dev *adev; +extern struct vhost_user *vudev; + +/* Based on qemu/hw/virtio/vhost-user.c */ +#define VHOST_USER_F_PROTOCOL_FEATURES 30 +#define VHOST_LOG_PAGE 4096 +#define VIRTQUEUE_MAX_SIZE 1024 +#define VHOST_MEMORY_BASELINE_NREGIONS 8 + +/* The version of the protocol we support */ +#define VHOST_USER_VERSION (0x1) + +/* + * Set a reasonable maximum number of ram slots, which will be supported by + * any architecture. + */ +#define VHOST_USER_MAX_RAM_SLOTS 32 +#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64) + +/* + * Maximum size of virtio device config space + */ +#define VHOST_USER_MAX_CONFIG_SIZE 256 + +typedef enum VhostSetConfigType { + VHOST_SET_CONFIG_TYPE_MASTER = 0, + VHOST_SET_CONFIG_TYPE_MIGRATION = 1, +} VhostSetConfigType; + +enum VhostUserProtocolFeature { + VHOST_USER_PROTOCOL_F_MQ = 0, + VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, + VHOST_USER_PROTOCOL_F_RARP = 2, + VHOST_USER_PROTOCOL_F_REPLY_ACK = 3, + VHOST_USER_PROTOCOL_F_NET_MTU = 4, + VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5, + VHOST_USER_PROTOCOL_F_CROSS_ENDIAN = 6, + VHOST_USER_PROTOCOL_F_CRYPTO_SESSION = 7, + VHOST_USER_PROTOCOL_F_PAGEFAULT = 8, + VHOST_USER_PROTOCOL_F_CONFIG = 9, + VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD = 10, + VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11, + VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12, + VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS = 14, + VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS = 15, + VHOST_USER_PROTOCOL_F_MAX +}; + +#define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1) + +typedef enum VhostUserRequest { + VHOST_USER_NONE = 0, + VHOST_USER_GET_FEATURES = 1, + VHOST_USER_SET_FEATURES = 2, + VHOST_USER_SET_OWNER = 3, + VHOST_USER_RESET_OWNER = 4, + VHOST_USER_SET_MEM_TABLE = 5, + VHOST_USER_SET_LOG_BASE = 6, + VHOST_USER_SET_LOG_FD = 7, + VHOST_USER_SET_VRING_NUM = 8, + VHOST_USER_SET_VRING_ADDR = 9, + VHOST_USER_SET_VRING_BASE = 10, + VHOST_USER_GET_VRING_BASE = 11, + VHOST_USER_SET_VRING_KICK = 12, + VHOST_USER_SET_VRING_CALL = 13, + VHOST_USER_SET_VRING_ERR = 14, + VHOST_USER_GET_PROTOCOL_FEATURES = 15, + VHOST_USER_SET_PROTOCOL_FEATURES = 16, + VHOST_USER_GET_QUEUE_NUM = 17, + VHOST_USER_SET_VRING_ENABLE = 18, + VHOST_USER_SEND_RARP = 19, + VHOST_USER_NET_SET_MTU = 20, + VHOST_USER_SET_SLAVE_REQ_FD = 21, + VHOST_USER_IOTLB_MSG = 22, + VHOST_USER_SET_VRING_ENDIAN = 23, + VHOST_USER_GET_CONFIG = 24, + VHOST_USER_SET_CONFIG = 25, + VHOST_USER_CREATE_CRYPTO_SESSION = 26, + VHOST_USER_CLOSE_CRYPTO_SESSION = 27, + VHOST_USER_POSTCOPY_ADVISE = 28, + VHOST_USER_POSTCOPY_LISTEN = 29, + VHOST_USER_POSTCOPY_END = 30, + VHOST_USER_GET_INFLIGHT_FD = 31, + VHOST_USER_SET_INFLIGHT_FD = 32, + VHOST_USER_GPU_SET_SOCKET = 33, + VHOST_USER_VRING_KICK = 35, + VHOST_USER_GET_MAX_MEM_SLOTS = 36, + VHOST_USER_ADD_MEM_REG = 37, + VHOST_USER_REM_MEM_REG = 38, + VHOST_USER_SHARE_LOOPBACK_FD = 39, + VHOST_USER_MAX +} VhostUserRequest; + +typedef enum VhostUserSlaveRequest { + VHOST_USER_SLAVE_NONE = 0, + VHOST_USER_SLAVE_IOTLB_MSG = 1, + VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2, + VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG = 3, + VHOST_USER_SLAVE_VRING_CALL = 4, + VHOST_USER_SLAVE_VRING_ERR = 5, + VHOST_USER_SLAVE_MAX +} VhostUserSlaveRequest; + +typedef struct VhostUserMemoryRegion { + uint64_t guest_phys_addr; + uint64_t memory_size; + uint64_t userspace_addr; + uint64_t mmap_offset; +} VhostUserMemoryRegion; + +#define VHOST_USER_MEM_REG_SIZE (sizeof(VhostUserMemoryRegion)) + +typedef struct VhostUserMemory { + uint32_t nregions; + uint32_t padding; + VhostUserMemoryRegion regions[VHOST_MEMORY_BASELINE_NREGIONS]; +} VhostUserMemory; + +typedef struct VhostUserMemRegMsg { + uint64_t padding; + VhostUserMemoryRegion region; +} VhostUserMemRegMsg; + +typedef struct VhostUserLog { + uint64_t mmap_size; + uint64_t mmap_offset; +} VhostUserLog; + +typedef struct VhostUserConfig { + uint32_t offset; + uint32_t size; + uint32_t flags; + uint8_t region[VHOST_USER_MAX_CONFIG_SIZE]; +} VhostUserConfig; + +static VhostUserConfig c __attribute__ ((unused)); +#define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \ + + sizeof(c.size) \ + + sizeof(c.flags)) + +typedef struct VhostUserVringArea { + uint64_t u64; + uint64_t size; + uint64_t offset; +} VhostUserVringArea; + +typedef struct VhostUserInflight { + uint64_t mmap_size; + uint64_t mmap_offset; + uint16_t num_queues; + uint16_t queue_size; +} VhostUserInflight; + +#if defined(_WIN32) && (defined(__x86_64__) || defined(__i386__)) +# define VU_PACKED __attribute__((gcc_struct, packed)) +#else +# define VU_PACKED __attribute__((packed)) +#endif + +typedef struct VhostUserMsg { + int request; + +#define VHOST_USER_VERSION_MASK (0x3) +#define VHOST_USER_REPLY_MASK (0x1 << 2) +#define VHOST_USER_NEED_REPLY_MASK (0x1 << 3) + uint32_t flags; + uint32_t size; /* the following payload size */ + + union { +#define VHOST_USER_VRING_IDX_MASK (0xff) +#define VHOST_USER_VRING_NOFD_MASK (0x1 << 8) + uint64_t u64; + struct vhost_vring_state state; + struct vhost_vring_addr addr; + VhostUserMemory memory; + VhostUserMemRegMsg memreg; + VhostUserLog log; + VhostUserConfig config; + VhostUserVringArea area; + VhostUserInflight inflight; + } payload; + + int fds[VHOST_MEMORY_BASELINE_NREGIONS]; + int fd_num; + uint8_t *data; +} VU_PACKED VhostUserMsg; + +typedef struct VuDevRegion { + /* Guest Physical address. */ + uint64_t gpa; + /* Memory region size. */ + uint64_t size; + /* QEMU virtual address (userspace). */ + uint64_t qva; + /* Starting offset in our mmaped space. */ + uint64_t mmap_offset; + /* Start address of mmaped space. */ + uint64_t mmap_addr; +} VuDevRegion; + +typedef struct VuDev VuDev; +typedef uint64_t (*vu_get_features_cb) (VuDev *dev); +typedef void (*vu_set_features_cb) (VuDev *dev, uint64_t features); +typedef int (*vu_process_msg_cb) (VuDev *dev, VhostUserMsg *vmsg, + int *do_reply); +typedef bool (*vu_read_msg_cb) (VuDev *dev, int sock, VhostUserMsg *vmsg); +typedef void (*vu_queue_set_started_cb) (VuDev *dev, int qidx, bool started); +typedef bool (*vu_queue_is_processed_in_order_cb) (VuDev *dev, int qidx); +typedef int (*vu_get_config_cb) (VuDev *dev, uint8_t *config, uint32_t len); +typedef int (*vu_set_config_cb) (VuDev *dev, const uint8_t *data, + uint32_t offset, uint32_t size, + uint32_t flags); + +typedef struct VuDevIface { + /* called by VHOST_USER_GET_FEATURES to get the features bitmask */ + vu_get_features_cb get_features; + /* enable vhost implementation features */ + vu_set_features_cb set_features; + /* get the protocol feature bitmask from the underlying vhost + * implementation */ + vu_get_features_cb get_protocol_features; + /* enable protocol features in the underlying vhost implementation. */ + vu_set_features_cb set_protocol_features; + /* process_msg is called for each vhost-user message received */ + /* skip libvhost-user processing if return value != 0 */ + vu_process_msg_cb process_msg; + /* tells when queues can be processed */ + vu_queue_set_started_cb queue_set_started; + /* + * If the queue is processed in order, in which case it will be + * resumed to vring.used->idx. This can help to support resuming + * on unmanaged exit/crash. + */ + vu_queue_is_processed_in_order_cb queue_is_processed_in_order; + /* get the config space of the device */ + vu_get_config_cb get_config; + /* set the config space of the device */ + vu_set_config_cb set_config; +} VuDevIface; + +typedef void (*vu_queue_handler_cb) (VuDev *dev, int qidx); + +typedef struct VuRing { + unsigned int num; + struct vring_desc *desc; + struct vring_avail *avail; + struct vring_used *used; + uint64_t log_guest_addr; + uint32_t flags; +} VuRing; + +typedef struct VuDescStateSplit { + /* Indicate whether this descriptor is inflight or not. + * Only available for head-descriptor. */ + uint8_t inflight; + + /* Padding */ + uint8_t padding[5]; + + /* Maintain a list for the last batch of used descriptors. + * Only available when batching is used for submitting */ + uint16_t next; + + /* Used to preserve the order of fetching available descriptors. + * Only available for head-descriptor. */ + uint64_t counter; +} VuDescStateSplit; + +typedef struct VuVirtqInflight { + /* The feature flags of this region. Now it's initialized to 0. */ + uint64_t features; + + /* The version of this region. It's 1 currently. + * Zero value indicates a vm reset happened. */ + uint16_t version; + + /* The size of VuDescStateSplit array. It's equal to the virtqueue + * size. Slave could get it from queue size field of VhostUserInflight. */ + uint16_t desc_num; + + /* The head of list that track the last batch of used descriptors. */ + uint16_t last_batch_head; + + /* Storing the idx value of used ring */ + uint16_t used_idx; + + /* Used to track the state of each descriptor in descriptor table */ + VuDescStateSplit desc[]; +} VuVirtqInflight; + +typedef struct VuVirtqInflightDesc { + uint16_t index; + uint64_t counter; +} VuVirtqInflightDesc; + +typedef struct VuVirtq { + VuRing vring; + VuVirtqInflight *inflight; + VuVirtqInflightDesc *resubmit_list; + uint16_t resubmit_num; + uint64_t counter; + /* Next head to pop */ + uint16_t last_avail_idx; + /* Last avail_idx read from VQ. */ + uint16_t shadow_avail_idx; + uint16_t used_idx; + /* Last used index value we have signalled on */ + uint16_t signalled_used; + /* Last used index value we have signalled on */ + bool signalled_used_valid; + /* Notification enabled? */ + bool notification; + int inuse; + vu_queue_handler_cb handler; + int call_fd; + int kick_fd; + int err_fd; + unsigned int enable; + bool started; + /* Guest addresses of our ring */ + struct vhost_vring_addr vra; +} VuVirtq; + +enum VuWatchCondtion { + VU_WATCH_IN = POLLIN, + VU_WATCH_OUT = POLLOUT, + VU_WATCH_PRI = POLLPRI, + VU_WATCH_ERR = POLLERR, + VU_WATCH_HUP = POLLHUP, +}; + +typedef void (*vu_panic_cb) (VuDev *dev, const char *err); +typedef void (*vu_watch_cb) (VuDev *dev, int condition, void *data); +typedef void (*vu_set_watch_cb) (VuDev *dev, int fd, int condition, + vu_watch_cb cb, void *data); +typedef void (*vu_remove_watch_cb) (VuDev *dev, int fd); + +typedef struct VuDevInflightInfo { + int fd; + void *addr; + uint64_t size; +} VuDevInflightInfo; + +struct VuDev { + int sock; + uint32_t nregions; + VuDevRegion regions[VHOST_USER_MAX_RAM_SLOTS]; + VuVirtq *vq; + VuDevInflightInfo inflight_info; + int log_call_fd; + /* Must be held while using slave_fd */ + pthread_mutex_t slave_mutex; + int slave_fd; + uint64_t log_size; + uint8_t *log_table; + uint64_t features; + uint64_t protocol_features; + bool broken; + uint16_t max_queues; + + /* + * @read_msg: custom method to read vhost-user message + * + * Read data from vhost_user socket fd and fill up + * the passed VhostUserMsg *vmsg struct. + * + * If reading fails, it should close the received set of file + * descriptors as socket message's auxiliary data. + * + * For the details, please refer to vu_message_read in libvhost-user.c + * which will be used by default if not custom method is provided when + * calling vu_init + * + * Returns: true if vhost-user message successfully received, + * otherwise return false. + * + */ + vu_read_msg_cb read_msg; + + /* + * @set_watch: add or update the given fd to the watch set, + * call cb when condition is met. + */ + vu_set_watch_cb set_watch; + + /* @remove_watch: remove the given fd from the watch set */ + vu_remove_watch_cb remove_watch; + + /* + * @panic: encountered an unrecoverable error, you may try to re-initialize + */ + vu_panic_cb panic; + const VuDevIface *iface; + + /* Postcopy data */ + int postcopy_ufd; + bool postcopy_listening; +}; + +typedef struct VuVirtqElement { + unsigned int index; + unsigned int out_num; + unsigned int in_num; + struct iovec *in_sg; + struct iovec *out_sg; +} VuVirtqElement; + +/** + * vu_init: + * @dev: a VuDev context + * @max_queues: maximum number of virtqueues + * @socket: the socket connected to vhost-user master + * @panic: a panic callback + * @set_watch: a set_watch callback + * @remove_watch: a remove_watch callback + * @iface: a VuDevIface structure with vhost-user device callbacks + * + * Initializes a VuDev vhost-user context. + * + * Returns: true on success, false on failure. + **/ +bool vu_init(VuDev *dev, + uint16_t max_queues, + int socket, + vu_panic_cb panic, + vu_read_msg_cb read_msg, + vu_set_watch_cb set_watch, + vu_remove_watch_cb remove_watch, + const VuDevIface *iface); + + +/** + * vu_deinit: + * @dev: a VuDev context + * + * Cleans up the VuDev context + */ +void vu_deinit(VuDev *dev); + +/** + * vu_dispatch: + * @dev: a VuDev context + * + * Process one vhost-user message. + * + * Returns: TRUE on success, FALSE on failure. + */ +bool vu_dispatch(VuDev *dev); + +/** + * vu_gpa_to_va: + * @dev: a VuDev context + * @plen: guest memory size + * @guest_addr: guest address + * + * Translate a guest address to a pointer. Returns NULL on failure. + */ +void *vu_gpa_to_va(VuDev *dev, uint64_t *plen, uint64_t guest_addr); + +/** + * vu_get_queue: + * @dev: a VuDev context + * @qidx: queue index + * + * Returns the queue number @qidx. + */ +VuVirtq *vu_get_queue(VuDev *dev, int qidx); + +/** + * vu_set_queue_handler: + * @dev: a VuDev context + * @vq: a VuVirtq queue + * @handler: the queue handler callback + * + * Set the queue handler. This function may be called several times + * for the same queue. If called with NULL @handler, the handler is + * removed. + */ +void vu_set_queue_handler(VuDev *dev, VuVirtq *vq, + vu_queue_handler_cb handler); + +/** + * vu_set_queue_host_notifier: + * @dev: a VuDev context + * @vq: a VuVirtq queue + * @fd: a file descriptor + * @size: host page size + * @offset: notifier offset in @fd file + * + * Set queue's host notifier. This function may be called several + * times for the same queue. If called with -1 @fd, the notifier + * is removed. + */ +bool vu_set_queue_host_notifier(VuDev *dev, VuVirtq *vq, int fd, + int size, int offset); + +/** + * vu_queue_set_notification: + * @dev: a VuDev context + * @vq: a VuVirtq queue + * @enable: state + * + * Set whether the queue notifies (via event index or interrupt) + */ +void vu_queue_set_notification(VuDev *dev, VuVirtq *vq, int enable); + +/** + * vu_queue_enabled: + * @dev: a VuDev context + * @vq: a VuVirtq queue + * + * Returns: whether the queue is enabled. + */ +bool vu_queue_enabled(VuDev *dev, VuVirtq *vq); + +/** + * vu_queue_started: + * @dev: a VuDev context + * @vq: a VuVirtq queue + * + * Returns: whether the queue is started. + */ +bool vu_queue_started(const VuDev *dev, const VuVirtq *vq); + +/** + * vu_queue_empty: + * @dev: a VuDev context + * @vq: a VuVirtq queue + * + * Returns: true if the queue is empty or not ready. + */ +bool vu_queue_empty(VuDev *dev, VuVirtq *vq); + +/** + * vu_queue_notify: + * @dev: a VuDev context + * @vq: a VuVirtq queue + * + * Request to notify the queue via callfd (skipped if unnecessary) + */ +void vu_queue_notify(VuDev *dev, VuVirtq *vq); + +/** + * vu_queue_notify_sync: + * @dev: a VuDev context + * @vq: a VuVirtq queue + * + * Request to notify the queue via callfd (skipped if unnecessary) + * or sync message if possible. + */ +void vu_queue_notify_sync(VuDev *dev, VuVirtq *vq); + +/** + * vu_queue_pop: + * @dev: a VuDev context + * @vq: a VuVirtq queue + * @sz: the size of struct to return (must be >= VuVirtqElement) + * + * Returns: a VuVirtqElement filled from the queue or NULL. The + * returned element must be free()-d by the caller. + */ +void *vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz); + + +/** + * vu_queue_unpop: + * @dev: a VuDev context + * @vq: a VuVirtq queue + * @elem: The #VuVirtqElement + * @len: number of bytes written + * + * Pretend the most recent element wasn't popped from the virtqueue. The next + * call to vu_queue_pop() will refetch the element. + */ +void vu_queue_unpop(VuDev *dev, VuVirtq *vq, VuVirtqElement *elem, + size_t len); + +/** + * vu_queue_rewind: + * @dev: a VuDev context + * @vq: a VuVirtq queue + * @num: number of elements to push back + * + * Pretend that elements weren't popped from the virtqueue. The next + * virtqueue_pop() will refetch the oldest element. + * + * Returns: true on success, false if @num is greater than the number of in use + * elements. + */ +bool vu_queue_rewind(VuDev *dev, VuVirtq *vq, unsigned int num); + +/** + * vu_queue_fill: + * @dev: a VuDev context + * @vq: a VuVirtq queue + * @elem: a VuVirtqElement + * @len: length in bytes to write + * @idx: optional offset for the used ring index (0 in general) + * + * Fill the used ring with @elem element. + */ +void vu_queue_fill(VuDev *dev, VuVirtq *vq, + const VuVirtqElement *elem, + unsigned int len, unsigned int idx); + +/** + * vu_queue_push: + * @dev: a VuDev context + * @vq: a VuVirtq queue + * @elem: a VuVirtqElement + * @len: length in bytes to write + * + * Helper that combines vu_queue_fill() with a vu_queue_flush(). + */ +void vu_queue_push(VuDev *dev, VuVirtq *vq, + const VuVirtqElement *elem, unsigned int len); + +/** + * vu_queue_flush: + * @dev: a VuDev context + * @vq: a VuVirtq queue + * @num: number of elements to flush + * + * Mark the last number of elements as done (used.idx is updated by + * num elements). +*/ +void vu_queue_flush(VuDev *dev, VuVirtq *vq, unsigned int num); + +/** + * vu_queue_get_avail_bytes: + * @dev: a VuDev context + * @vq: a VuVirtq queue + * @in_bytes: in bytes + * @out_bytes: out bytes + * @max_in_bytes: stop counting after max_in_bytes + * @max_out_bytes: stop counting after max_out_bytes + * + * Count the number of available bytes, up to max_in_bytes/max_out_bytes. + */ +void vu_queue_get_avail_bytes(VuDev *vdev, VuVirtq *vq, unsigned int *in_bytes, + unsigned int *out_bytes, + unsigned max_in_bytes, unsigned max_out_bytes); + +/** + * vu_queue_avail_bytes: + * @dev: a VuDev context + * @vq: a VuVirtq queue + * @in_bytes: expected in bytes + * @out_bytes: expected out bytes + * + * Returns: true if in_bytes <= in_total && out_bytes <= out_total + */ +bool vu_queue_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int in_bytes, + unsigned int out_bytes); + + +bool vhost_user_one_time_request(VhostUserRequest request); +void vmsg_close_fds(VhostUserMsg *vmsg); +bool vu_message_write(int conn_fd, VhostUserMsg *vmsg); +bool vu_message_read(int conn_fd, VhostUserMsg *vmsg); +int vhost_user_set_owner(void); +int process_message_reply(const VhostUserMsg *msg); +int vhost_user_get_u64(int request, uint64_t *u64); +int vhost_user_get_features(uint64_t *features); +int enforce_reply(const VhostUserMsg *msg); +int vhost_user_set_u64(int request, uint64_t u64, bool wait_for_reply); +int vhost_user_set_protocol_features(uint64_t features); +int vhost_user_get_max_memslots(uint64_t *max_memslots); +int vhost_setup_slave_channel(struct vhost_dev *dev); +int vhost_user_get_vq_index(struct vhost_dev *dev, int idx); +int vhost_set_vring_file(VhostUserRequest request, + struct vhost_vring_file *file); +int vhost_user_set_vring_kick(struct vhost_vring_file *file); +int vhost_user_set_vring_call(struct vhost_vring_file *file); +int vhost_virtqueue_init(struct vhost_dev *dev, + struct vhost_virtqueue *vq, int n); +void vhost_dev_init(struct vhost_dev *vhdev); +int vhost_user_set_features(struct vhost_dev *dev, + uint64_t features); +int vhost_user_set_mem_table(struct vhost_dev *dev); +int vhost_user_get_vq_index(struct vhost_dev *dev, int idx); +void vhost_user_share_fd(void); +int vhost_user_set_vring_num(struct vhost_dev *dev, struct vhost_vring_state *ring); +int vhost_user_set_vring_base(struct vhost_dev *dev, struct vhost_vring_state *ring); +int vhost_user_set_vring_addr(struct vhost_dev *dev, struct vhost_vring_addr *addr); + + +#endif /* LIBVHOST_USER_H */ diff --git a/vhost_user_rng.c b/vhost_user_rng.c new file mode 100644 index 0000000..7dc7d99 --- /dev/null +++ b/vhost_user_rng.c @@ -0,0 +1,188 @@ +/* + * Based on vhost-user-rng of Qemu project + * + * Copyright (c) 2021 Mathieu Poirier <mathieu.poirier@linaro.org> + * + * Copyright (c) 2022 Virtual Open Systems SAS. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#include <unistd.h> +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include <stdbool.h> +#include <sys/param.h> + +/* Project header files */ +#include "vhost_loopback.h" +#include "vhost_user_rng.h" + +#ifdef DEBUG +#define DBG(...) printf("vhost-user-rng: " __VA_ARGS__) +#else +#define DBG(...) +#endif /* DEBUG */ + +static void vu_rng_start(VirtIODevice *vdev) +{ + VHostUserRNG *rng = vdev->vhrng; + VirtioBus *k = vdev->vbus; + int ret; + int i; + + /* TODO: This might be deleted in future */ + if (!k->set_guest_notifiers) { + DBG("binding does not support guest notifiers\n"); + return; + } + + ret = vhost_dev_enable_notifiers(rng->vhost_dev, vdev); + if (ret < 0) { + DBG("Error enabling host notifiers: %d\n", ret); + return; + } + + ret = k->set_guest_notifiers(vdev, rng->vhost_dev->nvqs, true); + if (ret < 0) { + DBG("Error binding guest notifier: %d\n", ret); + return; + } + + rng->vhost_dev->acked_features = vdev->guest_features; + + ret = vhost_dev_start(rng->vhost_dev, vdev); + if (ret < 0) { + DBG("Error starting vhost-user-rng: %d\n", ret); + return; + } + + /* + * guest_notifier_mask/pending not used yet, so just unmask + * everything here. virtio-pci will do the right thing by + * enabling/disabling irqfd. + */ + for (i = 0; i < rng->vhost_dev->nvqs; i++) { + vhost_virtqueue_mask(rng->vhost_dev, vdev, i, false); + } + +} + +/* TODO: We need to implement this function in a future release */ +static void vu_rng_stop(VirtIODevice *vdev) +{ + VHostUserRNG *rng = vdev->vhrng; +} + + +static uint64_t vu_rng_get_features(VirtIODevice *vdev, + uint64_t requested_features) +{ + /* No feature bits used yet */ + return requested_features; +} + +/* TODO: We need to implement this function in a future release */ +static void vu_rng_guest_notifier_mask(VirtIODevice *vdev, int idx, bool mask) +{ + VHostUserRNG *rng = vdev->vhrng; + + /* vhost_virtqueue_mask(&rng->vhost_dev, vdev, idx, mask); */ +} + +/* TODO: We need to implement this function in a future release */ +static bool vu_rng_guest_notifier_pending(VirtIODevice *vdev, int idx) +{ + VHostUserRNG *rng = vdev->vhrng; + + /* return vhost_virtqueue_pending(&rng->vhost_dev, idx); */ + return 1; +} + +static void vu_rng_set_status(VirtIODevice *vdev, uint8_t status) +{ + VHostUserRNG *rng = vdev->vhrng; + bool should_start = status & VIRTIO_CONFIG_S_DRIVER_OK; + + if (rng->vhost_dev->started == should_start) { + DBG("rng->vhost_dev->started != should_start\n"); + return; + } + + if (should_start) { + vu_rng_start(vdev); + } else { + DBG("vu_rng_stop(vdev)\n"); + /* TODO: Add vu_rng_stop(vdev); when this function is implemented */ + } +} + +static void virtio_dev_class_init (VirtIODevice *vdev) { + + vdev->vdev_class = (VirtioDeviceClass *) malloc(sizeof(VirtioDeviceClass)); + vdev->vdev_class->parent = vdev; + vdev->vdev_class->set_status = vu_rng_set_status; + vdev->vdev_class->get_features = vu_rng_get_features; + vdev->vdev_class->guest_notifier_mask = vu_rng_guest_notifier_mask; + vdev->vdev_class->guest_notifier_pending = vu_rng_guest_notifier_pending; +} + + +void vhost_user_rng_init(VirtIODevice *vdev) { + + VHostUserRNG *vhrng = (VHostUserRNG*) malloc (sizeof(VHostUserRNG)); + vdev->vhrng = vhrng; + vhrng->parent = vdev; + vhrng->req_vq = vdev->vq; + vhrng->vhost_dev = dev; + + virtio_dev_class_init (vdev); + virtio_mmio_bus_init(vdev->vbus); +} + +static void vu_rng_handle_output(VirtIODevice *vdev, VirtQueue *vq) +{ + /* + * Not normally called; it's the daemon that handles the queue; + * however virtio's cleanup path can call this. + */ + DBG("vu_rng_handle_output\n"); +} + + +void vhost_user_rng_realize(void) +{ + virtio_dev_init(global_vdev, "virtio-rng", 4, 0); + + /* This needs to be change to vhost-user-rng init */ + vhost_user_rng_init(global_vdev); + + global_vdev->vq = virtio_add_queue(global_vdev, 4, vu_rng_handle_output); + + global_vdev->host_features = 0x39000000; + + proxy = (VirtIOMMIOProxy*) malloc (sizeof(VirtIOMMIOProxy)); + *proxy = (VirtIOMMIOProxy) { + .legacy = 1, + }; + + /* Virtqueues conf */ + dev->nvqs = 1; + dev->vqs = (struct vhost_virtqueue*) malloc(dev->nvqs * sizeof(struct vhost_virtqueue)); + + vhost_dev_init(dev); +} diff --git a/vhost_user_rng.h b/vhost_user_rng.h new file mode 100644 index 0000000..77a783c --- /dev/null +++ b/vhost_user_rng.h @@ -0,0 +1,44 @@ +/* + * Based on vhost-user-rng of Qemu project + * + * Copyright (c) 2021 Mathieu Poirier <mathieu.poirier@linaro.org> + * + * Copyright (c) 2022 Virtual Open Systems SAS. + * + * Author: + * Timos Ampelikiotis <t.ampelikiotis@virtualopensystems.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef VHOST_USER_RNG +#define VHOST_USER_RNG + +#include "vhost_loopback.h" +#include "virtio_rng.h" +#include "vhost_user_loopback.h" +#include "virtio_loopback.h" + +typedef struct VHostUserRNG { + VirtIODevice *parent; + struct vhost_virtqueue *vhost_vq; + struct vhost_dev *vhost_dev; + VirtQueue *req_vq; + bool connected; +} VHostUserRNG; + +void vhost_user_rng_realize(void); + +#endif /* VHOST_USER_RNG */ diff --git a/virtio_loopback.c b/virtio_loopback.c new file mode 100644 index 0000000..8da13b6 --- /dev/null +++ b/virtio_loopback.c @@ -0,0 +1,1745 @@ +/* + * + * Based on: + * + * 1) virtio.c of Qemu project + * + * Copyright IBM, Corp. 2007 + * + * Authors: + * Anthony Liguori <aliguori@us.ibm.com> + * + * + * 2) virtio-mmio.c of Qemu project + * + * Copyright (c) 2011 Linaro Limited + * + * Author: + * Peter Maydell <peter.maydell@linaro.org> + * + * + * Copyright 2022 Virtual Open Systems SAS. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <unistd.h> +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include <sys/eventfd.h> +#include <fcntl.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <pthread.h> +#include <stdbool.h> +#include <sys/param.h> + +/* For socket */ +#include <errno.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/un.h> + +/* Project header files */ +#include "virtio_loopback.h" +#include "virtio_rng.h" + +#include <stddef.h> +#include <pthread.h> +#include <limits.h> + +#ifdef DEBUG +#define DBG(...) printf("virtio-loopback: " __VA_ARGS__) +#else +#define DBG(...) +#endif /* DEBUG */ + +/* Global variables */ + +int efd; /* Eventfd file descriptor */ +uint64_t eftd_ctr; +fd_set rfds; +int s; +int fd; +int loopback_fd; + +virtio_device_info_struct_t device_info; +virtio_neg_t *address = NULL; + +VirtIOMMIOProxy *proxy; + +void virtio_add_feature(uint64_t *features, unsigned int fbit) +{ + *features |= (1ULL << fbit); +} + +bool virtio_has_feature(uint64_t features, unsigned int fbit) +{ + return !!(features & (1ULL << fbit)); +} + +static int virtio_validate_features(VirtIODevice *vdev) +{ + if (virtio_has_feature(vdev->host_features, VIRTIO_F_IOMMU_PLATFORM) && + !virtio_has_feature(vdev->guest_features, VIRTIO_F_IOMMU_PLATFORM)) { + return -EFAULT; + } + + return 0; +} + + +void virtio_set_started(VirtIODevice *vdev, bool started) +{ + if (started) { + vdev->start_on_kick = false; + } + + if (vdev->use_started) { + vdev->started = started; + } +} + +int virtio_set_status(VirtIODevice *vdev, uint8_t val) +{ + VirtioDeviceClass *k = vdev->vdev_class; + + if (virtio_has_feature(vdev->guest_features, VIRTIO_F_VERSION_1)) { + if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) && + val & VIRTIO_CONFIG_S_FEATURES_OK) { + int ret = virtio_validate_features(vdev); + + if (ret) { + return ret; + } + } + } + + if ((vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) != + (val & VIRTIO_CONFIG_S_DRIVER_OK)) { + virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK); + } + + if (k->set_status) { + k->set_status(vdev, val); + } + + vdev->status = val; + + return 0; +} + +uint64_t vring_align(uint64_t addr, unsigned long align) +{ + return QEMU_ALIGN_UP(addr, align); +} + +uint64_t virtio_queue_get_desc_size(VirtIODevice *vdev, int n) +{ + return sizeof(VRingDesc) * vdev->vq[n].vring.num; +} + +uint64_t virtio_queue_get_desc_addr(VirtIODevice *vdev, int n) +{ + return vdev->vq[n].vring.desc; +} + +uint64_t virtio_queue_get_avail_addr(VirtIODevice *vdev, int n) +{ + return vdev->vq[n].vring.avail; +} + +uint64_t virtio_queue_get_used_addr(VirtIODevice *vdev, int n) +{ + return vdev->vq[n].vring.used; +} + + +int virtio_queue_get_num(VirtIODevice *vdev, int n) +{ + return vdev->vq[n].vring.num; +} + + +uint64_t virtio_queue_get_avail_size(VirtIODevice *vdev, int n) +{ + int s; + + s = virtio_has_feature(vdev->guest_features, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; + return offsetof(VRingAvail, ring) + + sizeof(uint16_t) * vdev->vq[n].vring.num + s; +} + +uint64_t virtio_queue_get_used_size(VirtIODevice *vdev, int n) +{ + int s; + + s = virtio_has_feature(vdev->guest_features, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; + return offsetof(VRingUsed, ring) + + sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s; +} + +/* virt queue functions */ +void virtio_queue_update_rings(VirtIODevice *vdev, int n) +{ + VRing *vring = &vdev->vq[n].vring; + + if (!vring->num || !vring->desc || !vring->align) { + /* not yet setup -> nothing to do */ + return; + } + vring->avail = vring->desc + vring->num * sizeof(VRingDesc); + vring->used = vring_align(vring->avail + + offsetof(VRingAvail, ring[vring->num]), + vring->align); +} + +static uint16_t virtio_queue_split_get_last_avail_idx(VirtIODevice *vdev, + int n) +{ + return vdev->vq[n].last_avail_idx; +} + + +unsigned int virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n) +{ + return virtio_queue_split_get_last_avail_idx(vdev, n); +} + +void virtio_queue_set_num(VirtIODevice *vdev, int n, int num) +{ + /* Don't allow guest to flip queue between existent and + * nonexistent states, or to set it to an invalid size. + */ + if (!!num != !!vdev->vq[n].vring.num || + num > VIRTQUEUE_MAX_SIZE || + num < 0) { + return; + } + vdev->vq[n].vring.num = num; +} + +uint64_t virtio_queue_get_addr(VirtIODevice *vdev, int n) +{ + return vdev->vq[n].vring.desc; +} + + +void virtio_queue_set_addr(VirtIODevice *vdev, int n, uint64_t addr) +{ + if (!vdev->vq[n].vring.num) { + return; + } + vdev->vq[n].vring.desc = addr; + virtio_queue_update_rings(vdev, n); +} + +int virtio_queue_ready(VirtQueue *vq) +{ + return vq->vring.avail != 0; +} + + +uint16_t vring_avail_idx(VirtQueue *vq) +{ + vq->shadow_avail_idx = ((VRingAvail *)vq->vring.avail)->idx; + + return vq->shadow_avail_idx; +} + +uint16_t vring_avail_ring(VirtQueue *vq, int i) +{ + return ((VRingAvail *)vq->vring.avail)->ring[i]; +} + +int virtio_queue_split_empty(VirtQueue *vq) +{ + bool empty; + + if (!vq->vring.avail) { + return 1; + } + + if (vq->shadow_avail_idx != vq->last_avail_idx) { + return 0; + } + + empty = vring_avail_idx(vq) == vq->last_avail_idx; + return empty; +} + +int virtio_queue_empty(VirtQueue *vq) +{ + return virtio_queue_split_empty(vq); +} + +size_t iov_from_buf_full(const struct iovec *iov, unsigned int iov_cnt, + size_t offset, const void *buf, size_t bytes) +{ + size_t done; + unsigned int i; + for (i = 0, done = 0; (offset || done < bytes) && i < iov_cnt; i++) { + if (offset < iov[i].iov_len) { + size_t len = MIN(iov[i].iov_len - offset, bytes - done); + memcpy(iov[i].iov_base + offset, buf + done, len); + done += len; + offset = 0; + } else { + offset -= iov[i].iov_len; + } + } + return done; +} + + +size_t qemu_iov_from_buf(const struct iovec *iov, unsigned int iov_cnt, + size_t offset, const void *buf, size_t bytes) +{ + if (__builtin_constant_p(bytes) && iov_cnt && + offset <= iov[0].iov_len && bytes <= iov[0].iov_len - offset) { + memcpy(iov[0].iov_base + offset, buf, bytes); + return bytes; + } else { + return iov_from_buf_full(iov, iov_cnt, offset, buf, bytes); + } +} + + +/* Called within rcu_read_lock(). */ +static inline uint16_t vring_avail_flags(VirtQueue *vq) +{ + return ((VRingAvail *)vq->vring.avail)->flags; +} + +/* Called within rcu_read_lock(). */ +static inline uint16_t vring_get_used_event(VirtQueue *vq) +{ + return vring_avail_ring(vq, vq->vring.num); +} + +/* The following is used with USED_EVENT_IDX and AVAIL_EVENT_IDX */ +/* Assuming a given event_idx value from the other side, if + * we have just incremented index from old to new_idx, + * should we trigger an event? */ +static inline int vring_need_event(uint16_t event_idx, uint16_t new_idx, uint16_t old) +{ + /* Note: Xen has similar logic for notification hold-off + * in include/xen/interface/io/ring.h with req_event and req_prod + * corresponding to event_idx + 1 and new_idx respectively. + * Note also that req_event and req_prod in Xen start at 1, + * event indexes in virtio start at 0. */ + return (uint16_t)(new_idx - event_idx - 1) < (uint16_t)(new_idx - old); +} + +/* Called within rcu_read_lock(). */ +static bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq) +{ + uint16_t old, new; + bool v; + + /* Always notify when queue is empty (when feature acknowledge) */ + if (virtio_has_feature(vdev->guest_features, VIRTIO_F_NOTIFY_ON_EMPTY) && + !vq->inuse && virtio_queue_empty(vq)) { + return true; + } + + if (!virtio_has_feature(vdev->guest_features, VIRTIO_RING_F_EVENT_IDX)) { + return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT); + } + + v = vq->signalled_used_valid; + vq->signalled_used_valid = true; + old = vq->signalled_used; + new = vq->signalled_used = vq->used_idx; + return !v || vring_need_event(vring_get_used_event(vq), new, old); +} + +/* Called within rcu_read_lock(). */ +static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq) +{ + return virtio_split_should_notify(vdev, vq); +} + + +void virtio_set_isr(VirtIODevice *vdev, int value) +{ + uint8_t old = vdev->isr; + + /* Do not write ISR if it does not change, so that its cacheline remains + * shared in the common case where the guest does not read it. + */ + if ((old & value) != value) { + vdev->isr |= value; + } +} + +static void virtio_irq(VirtQueue *vq) +{ + virtio_set_isr(vq->vdev, 0x1); + virtio_notify_vector(vq->vdev); +} + +void virtio_notify(VirtIODevice *vdev, VirtQueue *vq) +{ + if (!virtio_should_notify(vdev, vq)) { + DBG("Do not notify!\n"); + return; + } + + virtio_irq(vq); +} + +static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem, int i) +{ + VRingUsed *used = (VRingUsed *)vq->vring.used; + + used->ring[i] = *uelem; +} + +void virtqueue_split_fill(VirtQueue *vq, const VirtQueueElement *elem, + unsigned int len, unsigned int idx) +{ + VRingUsedElem uelem; + + if (!vq->vring.used) { + return; + } + + idx = (idx + vq->used_idx) % vq->vring.num; + + uelem.id = elem->index; + uelem.len = len; + vring_used_write(vq, &uelem, idx); +} + +void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, + unsigned int len, unsigned int idx) +{ + virtqueue_split_fill(vq, elem, len, idx); +} + +static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val) +{ + ((VRingUsed *)vq->vring.used)->idx = val; + vq->used_idx = val; +} + +static void virtqueue_split_flush(VirtQueue *vq, unsigned int count) +{ + uint16_t old, new; + + if (!vq->vring.used) { + return; + } + + old = vq->used_idx; + new = old + count; + vring_used_idx_set(vq, new); + vq->inuse -= count; + if ((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)) + vq->signalled_used_valid = false; +} + +void virtqueue_flush(VirtQueue *vq, unsigned int count) +{ + virtqueue_split_flush(vq, count); +} + +void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, + unsigned int len) +{ + virtqueue_fill(vq, elem, len, 0); + virtqueue_flush(vq, 1); +} + + +void vring_set_avail_event(VirtQueue *vq, uint16_t val) +{ + uint16_t *avail; + + avail = (uint16_t *)&((VRingUsed *)vq->vring.used)->ring[vq->vring.num]; + *avail = val; +} + +static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg, + uint64_t *addr, struct iovec *iov, + unsigned int max_num_sg, bool is_write, + uint64_t pa, size_t sz) +{ + unsigned num_sg = *p_num_sg; + bool ok = false; + + if (!sz) { + DBG("virtio: zero sized buffers are not allowed\n"); + goto out; + } + + while (sz) { + uint64_t len = sz; + + if (num_sg == max_num_sg) { + DBG("virtio: too many write descriptors in \n" + "indirect table"); + goto out; + } + + ioctl(fd, SHARE_BUF, &pa); + + iov[num_sg].iov_base = mmap (NULL, 8192, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + iov[num_sg].iov_base += pa & 0xfff; + + if (!iov[num_sg].iov_base) { + DBG("virtio: bogus descriptor or out of resources\n"); + goto out; + } + + iov[num_sg].iov_len = len; + addr[num_sg] = pa; + + sz -= len; + pa += len; + num_sg++; + } + ok = true; + +out: + *p_num_sg = num_sg; + return ok; +} + +static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num) +{ + VirtQueueElement *elem; + size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0])); + size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]); + size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]); + size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0])); + size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]); + size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]); + + /* TODO: Add check for requested size + * + * assert(sz >= sizeof(VirtQueueElement)); + */ + elem = malloc(out_sg_end); + elem->out_num = out_num; + elem->in_num = in_num; + elem->in_addr = (void *)elem + in_addr_ofs; + elem->out_addr = (void *)elem + out_addr_ofs; + elem->in_sg = (void *)elem + in_sg_ofs; + elem->out_sg = (void *)elem + out_sg_ofs; + return elem; +} + +void *virtqueue_split_pop(VirtQueue *vq, size_t sz) +{ + unsigned int i, head, max; + int64_t len; + VirtIODevice *vdev = vq->vdev; + VirtQueueElement *elem = NULL; + unsigned out_num, in_num, elem_entries; + uint64_t addr[VIRTQUEUE_MAX_SIZE]; + struct iovec iov[VIRTQUEUE_MAX_SIZE]; + VRingDesc *desc; + int rc; + + if (virtio_queue_split_empty(vq)) { + goto done; + } + + /* When we start there are none of either input nor output. */ + out_num = in_num = elem_entries = 0; + + max = vq->vring.num; + + if (vq->inuse >= vq->vring.num) { + DBG("Virtqueue size exceeded\n"); + goto done; + } + + if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) { + goto done; + } + + if (virtio_has_feature(vdev->guest_features, VIRTIO_RING_F_EVENT_IDX)) { + vring_set_avail_event(vq, vq->last_avail_idx); + } + + i = head; + + desc = (VRingDesc *)vq->vring.desc + i; + + /* Collect all the descriptors */ + do { + bool map_ok; + + if (desc->flags & VRING_DESC_F_WRITE) { + map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num, + iov + out_num, + VIRTQUEUE_MAX_SIZE - out_num, true, + desc->addr, desc->len); + } else { + if (in_num) { + DBG("Incorrect order for descriptors\n"); + goto err_undo_map; + } + map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov, + VIRTQUEUE_MAX_SIZE, false, + desc->addr, desc->len); + } + if (!map_ok) { + goto err_undo_map; + } + + /* If we've got too many, that implies a descriptor loop. */ + if (++elem_entries > max) { + goto err_undo_map; + } + + rc = virtqueue_split_read_next_desc(vdev, desc, max, &i); + } while (rc == VIRTQUEUE_READ_DESC_MORE); + + if (rc == VIRTQUEUE_READ_DESC_ERROR) { + goto err_undo_map; + } + + /* Now copy what we have collected and mapped */ + elem = virtqueue_alloc_element(sz, out_num, in_num); + elem->index = head; + elem->ndescs = 1; + for (i = 0; i < out_num; i++) { + elem->out_addr[i] = addr[i]; + elem->out_sg[i] = iov[i]; + } + for (i = 0; i < in_num; i++) { + elem->in_addr[i] = addr[out_num + i]; + elem->in_sg[i] = iov[out_num + i]; + } + + vq->inuse++; + +done: + return elem; + +err_undo_map: + goto done; +} + +void *virtqueue_pop(VirtQueue *vq, size_t sz) +{ + return virtqueue_split_pop(vq, sz); +} + +bool virtqueue_get_head(VirtQueue *vq, unsigned int idx, + unsigned int *head) +{ + + /* Grab the next descriptor number they're advertising, and increment + * the index we've seen. */ + *head = vring_avail_ring(vq, idx % vq->vring.num); + + /* If their number is silly, that's a fatal mistake. */ + if (*head >= vq->vring.num) { + DBG("Guest says index %u is available", *head); + return false; + } + + return true; +} + +int virtqueue_num_heads(VirtQueue *vq, unsigned int idx) +{ + uint16_t num_heads = vring_avail_idx(vq) - idx; + + /* Check it isn't doing very strange things with descriptor numbers. */ + if (num_heads > vq->vring.num) { + DBG("Guest moved used index from %u to %u", + idx, vq->shadow_avail_idx); + return -EINVAL; + } + + return num_heads; +} + +int virtqueue_split_read_next_desc(VirtIODevice *vdev, VRingDesc *desc, + unsigned int max, unsigned int *next) +{ + /* If this descriptor says it doesn't chain, we're done. */ + if (!(desc->flags & VRING_DESC_F_NEXT)) { + return VIRTQUEUE_READ_DESC_DONE; + } + + /* Check they're not leading us off end of descriptors. */ + *next = desc->next; + + if (*next >= max) { + DBG( "Desc next is %u", *next); + return VIRTQUEUE_READ_DESC_ERROR; + } + + desc = (VRingDesc *)desc + *next; + return VIRTQUEUE_READ_DESC_MORE; +} + + +static void virtqueue_split_get_avail_bytes(VirtQueue *vq, + unsigned int *in_bytes, unsigned int *out_bytes, + unsigned max_in_bytes, unsigned max_out_bytes) +{ + VirtIODevice *vdev = vq->vdev; + unsigned int max, idx; + unsigned int total_bufs, in_total, out_total; + int64_t len = 0; + int rc; + + idx = vq->last_avail_idx; + total_bufs = in_total = out_total = 0; + + max = vq->vring.num; + + while ((rc = virtqueue_num_heads(vq, idx)) > 0) { + unsigned int num_bufs; + VRingDesc *desc; + unsigned int i; + + num_bufs = total_bufs; + + if (!virtqueue_get_head(vq, idx++, &i)) { + goto err; + } + + /* there is no need to copy anything form the cache struct */ + desc = (VRingDesc *)vq->vring.desc + i; + + if (desc->flags & VRING_DESC_F_INDIRECT) { + if (!desc->len || (desc->len % sizeof(VRingDesc))) { + DBG("Invalid size for indirect buffer table\n"); + goto err; + } + + /* If we've got too many, that implies a descriptor loop. */ + if (num_bufs >= max) { + goto err; + } + } + + do { + /* If we've got too many, that implies a descriptor loop. */ + if (++num_bufs > max) { + goto err; + } + + if (desc->flags & VRING_DESC_F_WRITE) { + in_total += desc->len; + } else { + out_total += desc->len; + } + if (in_total >= max_in_bytes && out_total >= max_out_bytes) { + goto done; + } + + rc = virtqueue_split_read_next_desc(vdev, desc, max, &i); + } while (rc == VIRTQUEUE_READ_DESC_MORE); + + if (rc == VIRTQUEUE_READ_DESC_ERROR) { + goto err; + } + + total_bufs = num_bufs; + } + + if (rc < 0) { + goto err; + } + +done: + if (in_bytes) { + *in_bytes = in_total; + } + if (out_bytes) { + *out_bytes = out_total; + } + return; + +err: + in_total = out_total = 0; + goto done; +} + +void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, + unsigned int *out_bytes, + unsigned max_in_bytes, unsigned max_out_bytes) +{ + if (!vq->vring.desc) { + goto err; + } + + virtqueue_split_get_avail_bytes(vq, in_bytes, out_bytes, + max_in_bytes, max_out_bytes); + + return; +err: + if (in_bytes) { + *in_bytes = 0; + } + if (out_bytes) { + *out_bytes = 0; + } +} + +void print_neg_flag(uint64_t neg_flag, bool read) { + + if (read) + DBG("Read:\n\t"); + else + DBG("Write:\n\t"); + + switch (neg_flag) { + case VIRTIO_MMIO_MAGIC_VALUE: //0x000 + DBG("VIRTIO_MMIO_MAGIC_VALUE\n"); + break; + case VIRTIO_MMIO_VERSION: //0x004 + DBG("VIRTIO_MMIO_VERSION\n"); + break; + case VIRTIO_MMIO_DEVICE_ID: //0x008 + DBG("VIRTIO_MMIO_DEVICE_ID\n"); + break; + case VIRTIO_MMIO_VENDOR_ID: //0x00c + DBG("VIRTIO_MMIO_VENDOR_ID\n"); + break; + case VIRTIO_MMIO_DEVICE_FEATURES: //0x010 + DBG("VIRTIO_MMIO_DEVICE_FEATURES\n"); + break; + case VIRTIO_MMIO_DEVICE_FEATURES_SEL: //0x014 + DBG("VIRTIO_MMIO_DEVICE_FEATURES_SEL\n"); + break; + case VIRTIO_MMIO_DRIVER_FEATURES: //0x020 + DBG("VIRTIO_MMIO_DRIVER_FEATURES\n"); + break; + case VIRTIO_MMIO_DRIVER_FEATURES_SEL: //0x024 + DBG("VIRTIO_MMIO_DRIVER_FEATURES_SEL\n"); + break; + case VIRTIO_MMIO_GUEST_PAGE_SIZE: //0x028 + DBG("VIRTIO_MMIO_GUEST_PAGE_SIZE\n"); + break; + case VIRTIO_MMIO_QUEUE_SEL: //0x030 + DBG("VIRTIO_MMIO_QUEUE_SEL\n"); + break; + case VIRTIO_MMIO_QUEUE_NUM_MAX: //0x034 + DBG("VIRTIO_MMIO_QUEUE_NUM_MAX\n"); + break; + case VIRTIO_MMIO_QUEUE_NUM: //0x038 + DBG("VIRTIO_MMIO_QUEUE_NUM\n"); + break; + case VIRTIO_MMIO_QUEUE_ALIGN: //0x03c + DBG("VIRTIO_MMIO_QUEUE_ALIGN\n"); + break; + case VIRTIO_MMIO_QUEUE_PFN: //0x040 + DBG("VIRTIO_MMIO_QUEUE_PFN\n"); + break; + case VIRTIO_MMIO_QUEUE_READY: //0x044 + DBG("VIRTIO_MMIO_QUEUE_READY\n"); + break; + case VIRTIO_MMIO_QUEUE_NOTIFY: //0x050 + DBG("VIRTIO_MMIO_QUEUE_NOTIFY\n"); + break; + case VIRTIO_MMIO_INTERRUPT_STATUS: //0x060 + DBG("VIRTIO_MMIO_INTERRUPT_STATUS\n"); + break; + case VIRTIO_MMIO_INTERRUPT_ACK: //0x064 + DBG("VIRTIO_MMIO_INTERRUPT_ACK\n"); + break; + case VIRTIO_MMIO_STATUS: //0x070 + DBG("VIRTIO_MMIO_STATUS\n"); + break; + case VIRTIO_MMIO_QUEUE_DESC_LOW: //0x080 + DBG("VIRTIO_MMIO_QUEUE_DESC_LOW\n"); + break; + case VIRTIO_MMIO_QUEUE_DESC_HIGH: //0x084 + DBG("VIRTIO_MMIO_QUEUE_DESC_HIGH\n"); + break; + case VIRTIO_MMIO_QUEUE_AVAIL_LOW: //0x090 + DBG("VIRTIO_MMIO_QUEUE_AVAIL_LOW\n"); + break; + case VIRTIO_MMIO_QUEUE_AVAIL_HIGH: //0x094 + DBG("VIRTIO_MMIO_QUEUE_AVAIL_HIGH\n"); + break; + case VIRTIO_MMIO_QUEUE_USED_LOW: //0x0a0 + DBG("VIRTIO_MMIO_QUEUE_USED_LOW\n"); + break; + case VIRTIO_MMIO_QUEUE_USED_HIGH: //0x0a4 + DBG("VIRTIO_MMIO_QUEUE_USED_HIGH\n"); + break; + case VIRTIO_MMIO_SHM_SEL: //0x0ac + DBG("VIRTIO_MMIO_SHM_SEL\n"); + break; + case VIRTIO_MMIO_SHM_LEN_LOW: //0x0b0 + DBG("VIRTIO_MMIO_SHM_LEN_LOW\n"); + break; + case VIRTIO_MMIO_SHM_LEN_HIGH: //0x0b4 + DBG("VIRTIO_MMIO_SHM_LEN_HIGH\n"); + break; + case VIRTIO_MMIO_SHM_BASE_LOW: //0x0b8 + DBG("VIRTIO_MMIO_SHM_BASE_LOW\n"); + break; + case VIRTIO_MMIO_SHM_BASE_HIGH: //0x0bc + DBG("VIRTIO_MMIO_SHM_BASE_HIGH\n"); + break; + case VIRTIO_MMIO_CONFIG_GENERATION: //0x0fc + DBG("VIRTIO_MMIO_CONFIG_GENERATION\n"); + break; + case VIRTIO_MMIO_CONFIG: //0x100 + DBG("VIRTIO_MMIO_CONFIG\n"); + break; + default: + DBG("Negotiation flag Unknown: %ld\n", neg_flag); + return; + } + +} + +int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val) +{ + bool bad = (val & ~(vdev->host_features)) != 0; + + val &= vdev->host_features; + + vdev->guest_features = val; + return bad ? -1 : 0; +} + +int virtio_set_features(VirtIODevice *vdev, uint64_t val) +{ + int ret; + /* + * The driver must not attempt to set features after feature negotiation + * has finished. + */ + if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) { + return -EINVAL; + } + ret = virtio_set_features_nocheck(vdev, val); + return ret; +} + + +/* TODO: MMIO notifiers -- This might not be needed anymore */ +static void virtio_queue_guest_notifier_read(EventNotifier *n) +{ +} + +int vhost_user_loopback_eventfd = 0; + +void *loopback_event_select(void *data) { + + int retval; + uint64_t eftd_ctr; + fd_set rfds; + int s; + + (void) data; + + DBG("\nWaiting event from vhost-user-device\n"); + fflush(stdout); + + FD_ZERO(&rfds); + FD_SET(vhost_user_loopback_eventfd, &rfds); + + while(1) { + + retval = select(vhost_user_loopback_eventfd+1, &rfds, NULL, NULL, NULL); + + if (retval == -1){ + DBG("\nselect() error. Exiting..."); + exit(EXIT_FAILURE); + } else if (retval > 0) { + + s = read(vhost_user_loopback_eventfd, &eftd_ctr, sizeof(uint64_t)); + if (s != sizeof(uint64_t)){ + DBG("\neventfd read error. Exiting..."); + exit(1); + } else { + virtio_irq(global_vdev->vq); + } + + } else if (retval == 0) { + DBG("\nselect() says that no data was available"); + } + } +} + + +void event_notifier_set_handler(EventNotifier *e, + void *handler) +{ + int ret; + pthread_t thread_id; + + vhost_user_loopback_eventfd = e->wfd; + + if (vhost_user_loopback_eventfd > 0) { + ret = pthread_create(&thread_id, NULL, loopback_event_select, NULL); + if (ret != 0) exit(1); + } +} + +void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign, + bool with_irqfd) +{ + if (assign && !with_irqfd) { + event_notifier_set_handler(&vq->guest_notifier, + virtio_queue_guest_notifier_read); + } else { + event_notifier_set_handler(&vq->guest_notifier, NULL); + } + if (!assign) { + /* Test and clear notifier before closing it, + * in case poll callback didn't have time to run. */ + virtio_queue_guest_notifier_read(&vq->guest_notifier); + } +} + +EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq) +{ + return &vq->guest_notifier; +} + +int virtio_mmio_set_guest_notifier(VirtIODevice *vdev, int n, bool assign, + bool with_irqfd) +{ + VirtioDeviceClass *vdc = vdev->vdev_class; + VirtQueue *vq = virtio_get_queue(vdev, n); + EventNotifier *notifier = virtio_queue_get_guest_notifier(vq); + + if (assign) { + int r = event_notifier_init(notifier, 0); + if (r < 0) { + return r; + } + virtio_queue_set_guest_notifier_fd_handler(vq, true, with_irqfd); + } else { + virtio_queue_set_guest_notifier_fd_handler(vq, false, with_irqfd); + } + + return 0; +} + +int virtio_mmio_set_guest_notifiers(VirtIODevice *vdev, int nvqs, + bool assign) +{ + bool with_irqfd = false; + int r, n; + + nvqs = MIN(nvqs, VIRTIO_QUEUE_MAX); + + for (n = 0; n < nvqs; n++) { + if (!virtio_queue_get_num(vdev, n)) { + break; + } + + r = virtio_mmio_set_guest_notifier(vdev, n, assign, with_irqfd); + if (r < 0) { + goto assign_error; + } + } + + return 0; + +assign_error: + DBG("Error return virtio_mmio_set_guest_notifiers\n"); + return r; +} + +EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq) +{ + return &vq->host_notifier; +} + +void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled) +{ + vq->host_notifier_enabled = enabled; +} + +int virtio_bus_set_host_notifier(VirtioBus *vbus, int n, bool assign) +{ + VirtIODevice *vdev = vbus->vdev; + VirtQueue *vq = virtio_get_queue(vdev, n); + + EventNotifier *notifier = virtio_queue_get_host_notifier(vq); + int r = 0; + + + if (!vbus->ioeventfd_assign) { + return -ENOSYS; + } + + if (assign) { + r = event_notifier_init(notifier, 1); + if (r < 0) { + DBG("unable to init event notifier: %d", r); + return r; + } + r = vbus->ioeventfd_assign(proxy, notifier, n, true); + if (r < 0) { + DBG("unable to assign ioeventfd: %d", r); + } + } else { + vbus->ioeventfd_assign(proxy, notifier, n, false); + } + + if (r == 0) { + virtio_queue_set_host_notifier_enabled(vq, assign); + } + + return r; +} + + + +/* On success, ioeventfd ownership belongs to the caller. */ +int virtio_bus_grab_ioeventfd(VirtioBus *bus) +{ + /* vhost can be used even if ioeventfd=off in the proxy device, + * so do not check k->ioeventfd_enabled. + */ + if (!bus->ioeventfd_assign) { + return -ENOSYS; + } + + if (bus->ioeventfd_grabbed == 0 && bus->ioeventfd_started) { + /* Remember that we need to restart ioeventfd + * when ioeventfd_grabbed becomes zero. + */ + bus->ioeventfd_started = true; + } + bus->ioeventfd_grabbed++; + return 0; +} + +int virtio_device_grab_ioeventfd(VirtIODevice *vdev) +{ + return virtio_bus_grab_ioeventfd(vdev->vbus); +} + +bool virtio_device_disabled(VirtIODevice *vdev) +{ + return vdev->disabled || vdev->broken; +} + +int prev_level = 0; + +void virtio_mmio_update_irq(VirtIODevice *vdev) +{ + int level, irq_num = 44; + pthread_t my_thread_id; + + if (!vdev) { + return; + } + + level = (vdev->isr != 0); + + if (!((level == 1) && (prev_level == 0))) { + prev_level = level; + return; + } + prev_level = level; + + DBG("Trigger interrupt (ioctl)\n"); + ioctl(fd, IRQ, &irq_num); +} + + +/* virtio device */ +void virtio_notify_vector(VirtIODevice *vdev) +{ + + /* TODO: Check if this is still needed */ + if (virtio_device_disabled(vdev)) { + DBG("Device is disabled\n"); + return; + } + + virtio_mmio_update_irq(vdev); + + /* TODO: substitue the previous line with the + * following when it's implemented + * + * if (k->notify) { + * k->notify(qbus->parent, vector); + * } + */ +} + +void virtio_update_irq(VirtIODevice *vdev) +{ + virtio_notify_vector(vdev); +} + +void virtio_queue_notify(VirtIODevice *vdev, int n) +{ + VirtQueue *vq = &vdev->vq[n]; + + if (!vq->vring.desc || vdev->broken) { + return; + } + + if (vq->host_notifier_enabled) { + event_notifier_set(&vq->host_notifier); + } else if (vq->handle_output) { + vq->handle_output(vdev, vq); + + if (vdev->start_on_kick) { + virtio_set_started(vdev, true); + } + } +} + + +static uint64_t virtio_mmio_read(VirtIODevice *vdev, uint64_t offset, unsigned size) +{ + + print_neg_flag (offset, 1); + + if (!vdev) { + /* If no backend is present, we treat most registers as + * read-as-zero, except for the magic number, version and + * vendor ID. This is not strictly sanctioned by the virtio + * spec, but it allows us to provide transports with no backend + * plugged in which don't confuse Linux's virtio code: the + * probe won't complain about the bad magic number, but the + * device ID of zero means no backend will claim it. + */ + switch (offset) { + case VIRTIO_MMIO_MAGIC_VALUE: + return VIRT_MAGIC; + case VIRTIO_MMIO_VERSION: + if (proxy->legacy) { + return VIRT_VERSION_LEGACY; + } else { + return VIRT_VERSION; + } + case VIRTIO_MMIO_VENDOR_ID: + return VIRT_VENDOR; + default: + return 0; + } + } + + if (offset >= VIRTIO_MMIO_CONFIG) { + offset -= VIRTIO_MMIO_CONFIG; + + /* TODO: To be implemented */ + + return 4; + } + + if (size != 4) { + DBG("wrong size access to register!\n"); + return 0; + } + + switch (offset) { + case VIRTIO_MMIO_MAGIC_VALUE: + return VIRT_MAGIC; + case VIRTIO_MMIO_VERSION: + if (proxy->legacy) { + return VIRT_VERSION_LEGACY; + } else { + return VIRT_VERSION; + } + case VIRTIO_MMIO_DEVICE_ID: + return vdev->device_id; + case VIRTIO_MMIO_VENDOR_ID: + return VIRT_VENDOR; + case VIRTIO_MMIO_DEVICE_FEATURES: + if (proxy->legacy) { + if (proxy->host_features_sel) { + return 0; + } else { + return vdev->host_features; + } + } else { + /* TODO: To be implemented */ + } + case VIRTIO_MMIO_QUEUE_NUM_MAX: + /* TODO: To be implemented */ + return VIRTQUEUE_MAX_SIZE; + case VIRTIO_MMIO_QUEUE_PFN: + if (!proxy->legacy) { + DBG("VIRTIO_MMIO_QUEUE_PFN: read from legacy register (0x%lx) in non-legacy mode\n", offset); + return 0; + } + return virtio_queue_get_addr(vdev, vdev->queue_sel) >> proxy->guest_page_shift; + + case VIRTIO_MMIO_QUEUE_READY: + if (proxy->legacy) { + DBG("VIRTIO_MMIO_QUEUE_READY: read from legacy register (0x%lx) in non-legacy mode\n", offset); + return 0; + } + /* TODO: To be implemented */ + case VIRTIO_MMIO_INTERRUPT_STATUS: + return vdev->isr; + case VIRTIO_MMIO_STATUS: + return vdev->status; + case VIRTIO_MMIO_CONFIG_GENERATION: + if (proxy->legacy) { + DBG("VIRTIO_MMIO_CONFIG_GENERATION: read from legacy register (0x%lx) in non-legacy mode\n", offset); + return 0; + } + return vdev->generation; + case VIRTIO_MMIO_SHM_LEN_LOW: + case VIRTIO_MMIO_SHM_LEN_HIGH: + /* + * VIRTIO_MMIO_SHM_SEL is unimplemented + * according to the linux driver, if region length is -1 + * the shared memory doesn't exist + */ + return -1; + case VIRTIO_MMIO_DEVICE_FEATURES_SEL: + case VIRTIO_MMIO_DRIVER_FEATURES: + case VIRTIO_MMIO_DRIVER_FEATURES_SEL: + case VIRTIO_MMIO_GUEST_PAGE_SIZE: + case VIRTIO_MMIO_QUEUE_SEL: + case VIRTIO_MMIO_QUEUE_NUM: + case VIRTIO_MMIO_QUEUE_ALIGN: + case VIRTIO_MMIO_QUEUE_NOTIFY: + case VIRTIO_MMIO_INTERRUPT_ACK: + case VIRTIO_MMIO_QUEUE_DESC_LOW: + case VIRTIO_MMIO_QUEUE_DESC_HIGH: + case VIRTIO_MMIO_QUEUE_AVAIL_LOW: + case VIRTIO_MMIO_QUEUE_AVAIL_HIGH: + case VIRTIO_MMIO_QUEUE_USED_LOW: + case VIRTIO_MMIO_QUEUE_USED_HIGH: + DBG("VIRTIO_MMIO_QUEUE_USED_HIGH: read of write-only register (0x%lx)\n", offset); + return 0; + default: + DBG("read: bad register offset (0x%lx)\n", offset); + return 0; + } + return 0; +} + + +void virtio_mmio_write(VirtIODevice *vdev, uint64_t offset, uint64_t value, + unsigned size) +{ + + print_neg_flag (offset, 0); + + if (!vdev) { + /* If no backend is present, we just make all registers + * write-ignored. This allows us to provide transports with + * no backend plugged in. + */ + return; + } + + if (offset >= VIRTIO_MMIO_CONFIG) { + offset -= VIRTIO_MMIO_CONFIG; + /* TODO: To be implemented */ + return; + } + if (size != 4) { + DBG("write: wrong size access to register!\n"); + return; + } + switch (offset) { + case VIRTIO_MMIO_DEVICE_FEATURES_SEL: + if (value) { + proxy->host_features_sel = 1; + } else { + proxy->host_features_sel = 0; + } + break; + case VIRTIO_MMIO_DRIVER_FEATURES: + if (proxy->legacy) { + if (proxy->guest_features_sel) { + DBG("attempt to write guest features with " + "guest_features_sel > 0 in legacy mode\n"); + } else { + virtio_set_features(vdev, value); + } + } else { + /* TODO: To be implemented */ + } + break; + case VIRTIO_MMIO_DRIVER_FEATURES_SEL: + if (value) { + proxy->guest_features_sel = 1; + } else { + proxy->guest_features_sel = 0; + } + break; + case VIRTIO_MMIO_GUEST_PAGE_SIZE: + if (!proxy->legacy) { + DBG("write to legacy register (0x%lx" + ") in non-legacy mode\n", offset); + return; + } + if (proxy->guest_page_shift > 31) { + proxy->guest_page_shift = 0; + } + break; + case VIRTIO_MMIO_QUEUE_SEL: + if (value < VIRTIO_QUEUE_MAX) { + vdev->queue_sel = value; + } + break; + case VIRTIO_MMIO_QUEUE_NUM: + + virtio_queue_set_num(vdev, vdev->queue_sel, value); + + if (proxy->legacy) { + virtio_queue_update_rings(vdev, vdev->queue_sel); + } else { + /* TODO: To be implemented */ + exit(1); + } + break; + case VIRTIO_MMIO_QUEUE_ALIGN: + if (!proxy->legacy) { + DBG("write to legacy register (0x%lx) in non-legacy mode\n", offset); + return; + } + /* TODO: To be implemented */ + break; + case VIRTIO_MMIO_QUEUE_PFN: + if (!proxy->legacy) { + DBG("write to legacy register (0x%lx) in non-legacy mode\n", offset); + return; + } + if (value == 0) { + /* TODO: To be implemented */ + } else { + (void)value; + uint64_t desc_addr; + desc_addr = (uint64_t)mmap (NULL, 16*PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + + virtio_queue_set_addr(vdev, vdev->queue_sel, + desc_addr); + } + break; + case VIRTIO_MMIO_QUEUE_READY: + if (proxy->legacy) { + DBG("write to non-legacy register (0x%lx) in legacy mode\n", offset); + return; + } + /* TODO: To be implemented */ + break; + case VIRTIO_MMIO_QUEUE_NOTIFY: + if (value < VIRTIO_QUEUE_MAX) { + virtio_queue_notify(vdev, value); + } + break; + case VIRTIO_MMIO_INTERRUPT_ACK: + vdev->isr = vdev->isr & ~value; + virtio_update_irq(vdev); + break; + case VIRTIO_MMIO_STATUS: + + /* TODO: Add it in a future release later + * + * if (!(value & VIRTIO_CONFIG_S_DRIVER_OK)) { + * virtio_mmio_stop_ioeventfd(proxy); + * } + */ + + if (!proxy->legacy && (value & VIRTIO_CONFIG_S_FEATURES_OK)) { + virtio_set_features(vdev, + ((uint64_t)proxy->guest_features[1]) << 32 | + proxy->guest_features[0]); + } + + virtio_set_status(vdev, value & 0xff); + + /* TODO: Check if this is still needed + * + * if (vdev->status == 0) { + * virtio_reset(vdev); + * virtio_mmio_soft_reset(proxy); + * } + */ + + break; + case VIRTIO_MMIO_QUEUE_DESC_LOW: + if (proxy->legacy) { + DBG("write to non-legacy register (0x%lx) in legacy mode\n", offset); + return; + } + /* TODO: To be implemented */ + break; + case VIRTIO_MMIO_QUEUE_DESC_HIGH: + if (proxy->legacy) { + DBG("write to non-legacy register (0x%lx) in legacy mode\n", offset); + return; + } + /* TODO: To be implemented */ + break; + case VIRTIO_MMIO_QUEUE_AVAIL_LOW: + if (proxy->legacy) { + DBG("write to non-legacy register (0x%lx) in legacy mode\n", offset); + return; + } + /* TODO: To be implemented */ + break; + case VIRTIO_MMIO_QUEUE_AVAIL_HIGH: + if (proxy->legacy) { + DBG("write to non-legacy register (0x%lx) in legacy mode\n", offset); + return; + } + /* TODO: To be implemented */ + break; + case VIRTIO_MMIO_QUEUE_USED_LOW: + if (proxy->legacy) { + DBG("write to non-legacy register (0x%lx) in legacy mode\n", offset); + return; + } + /* TODO: To be implemented */ + break; + case VIRTIO_MMIO_QUEUE_USED_HIGH: + if (proxy->legacy) { + DBG("write to non-legacy register (0x%lx) in legacy mode\n", offset); + return; + } + /* TODO: To be implemented */ + break; + case VIRTIO_MMIO_MAGIC_VALUE: + case VIRTIO_MMIO_VERSION: + case VIRTIO_MMIO_DEVICE_ID: + case VIRTIO_MMIO_VENDOR_ID: + case VIRTIO_MMIO_DEVICE_FEATURES: + case VIRTIO_MMIO_QUEUE_NUM_MAX: + case VIRTIO_MMIO_INTERRUPT_STATUS: + case VIRTIO_MMIO_CONFIG_GENERATION: + /* TODO: To be implemented */ + break; + default: + DBG("bad register offset (0x%lx)\n", offset); + } +} + +VirtIODevice *global_vdev; +VirtioBus *global_vbus; + +void adapter_read_write_cb (void) { + + /* If you want to print all the incoming events enable the next line + * + * print_neg_flag (address->notification, address->read); + */ + + if (address->read) { + address->data = virtio_mmio_read(global_vdev, address->notification, address->size); + } else { + virtio_mmio_write(global_vdev, address->notification, address->data, address->size); + } + (void)ioctl(fd, WAKEUP); + +} + + +void *my_select(void *data) { + + int retval; + (void) data; + + DBG("\nWaiting for loopback read/write events\n"); + fflush(stdout); + + FD_ZERO(&rfds); + FD_SET(efd, &rfds); + + while(1) { + + retval = select(efd+1, &rfds, NULL, NULL, NULL); + + if (retval == -1){ + DBG("\nselect() error. Exiting..."); + exit(EXIT_FAILURE); + } else if (retval > 0) { + + s = read(efd, &eftd_ctr, sizeof(uint64_t)); + if (s != sizeof(uint64_t)){ + DBG("\neventfd read error. Exiting..."); + exit(1); + } else { + adapter_read_write_cb (); + } + + } else if (retval == 0) { + DBG("\nselect() says that no data was available"); + } + } + +} + +void create_rng_struct (void) { + + device_info.magic = 0x74726976; + device_info.version = 0x1; + device_info.device_id = 0x4; + device_info.vendor = 0x554d4551; +} + +VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n) +{ + return vdev->vq + n; +} + +VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, + VirtIOHandleOutput handle_output) +{ + int i; + + for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { + if (vdev->vq[i].vring.num == 0) + break; + } + + if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE) { + exit(1); + } + + vdev->vq[i].vring.num = queue_size; + vdev->vq[i].vring.num_default = queue_size; + vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN; + vdev->vq[i].handle_output = handle_output; + vdev->vq[i].used_elems = (VirtQueueElement *) malloc (sizeof(VirtQueueElement) * queue_size); + + return &vdev->vq[i]; +} + +void virtio_dev_init(VirtIODevice *vdev, const char *name, + uint16_t device_id, size_t config_size) +{ + int i; + + vdev->start_on_kick = false; + vdev->started = false; + vdev->device_id = device_id; + vdev->status = 0; + vdev->queue_sel = 0; + vdev->config_vector = VIRTIO_NO_VECTOR; + vdev->vq = (VirtQueue *) malloc(sizeof(VirtQueue) * VIRTIO_QUEUE_MAX); + vdev->vm_running = false; + vdev->broken = false; + for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { + vdev->vq[i].vector = VIRTIO_NO_VECTOR; + vdev->vq[i].vdev = vdev; + vdev->vq[i].queue_index = i; + vdev->vq[i].host_notifier_enabled = false; + } + + vdev->name = name; + vdev->config_len = config_size; + if (vdev->config_len) { + vdev->config = (void *) malloc(config_size); + } else { + vdev->config = NULL; + } + + vdev->use_guest_notifier_mask = true; +} + +static bool virtio_mmio_ioeventfd_enabled(VirtIODevice *d) +{ + return (proxy->flags & VIRTIO_IOMMIO_FLAG_USE_IOEVENTFD) != 0; +} + +/* TODO: This function might not be needed anymore */ +static int virtio_mmio_ioeventfd_assign(VirtIOMMIOProxy *d, + EventNotifier *notifier, + int n, bool assign) +{ + return 0; +} + +bool virtio_bus_device_iommu_enabled(VirtIODevice *vdev) +{ + VirtioBus *k = vdev->vbus; + + if (!k->iommu_enabled) { + return false; + } + + return k->iommu_enabled(vdev); +} + +void virtio_mmio_bus_init(VirtioBus *k) +{ + k->set_guest_notifiers = virtio_mmio_set_guest_notifiers; + k->ioeventfd_enabled = virtio_mmio_ioeventfd_enabled; + k->ioeventfd_assign = virtio_mmio_ioeventfd_assign; +} + + +int virtio_mmio_start(void) { + + efd_data_t info; + pthread_t thread_id; + int ret = -1; + int flags; + + (void)info; + + fd = open("/dev/loopback", O_RDWR); + if (fd < 0) + { + perror ("Open call failed"); + return -1; + } + loopback_fd = fd; + + /* Create eventfd */ + efd = eventfd(0,0); + if (efd == -1) { + DBG("\nUnable to create eventfd! Exiting...\n"); + exit(EXIT_FAILURE); + } + + info.pid = getpid(); + info.efd = efd; + + (void)ioctl(fd, EFD_INIT, &info); + + /* Map notification mechanism */ + /* Multiple mmaps: /dev/loopback-0/vqs, /dev/loopback-0/ctlr */ + address = mmap (NULL, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (address == MAP_FAILED) + { + perror ("mmap operation failed"); + return -1; + } + + /* Wait the eventfd */ + ret = pthread_create(&thread_id, NULL, my_select, NULL); + if (ret != 0) exit(1); + + /* Fille the device info */ + create_rng_struct(); + + /* Start loopback transport */ + (void)ioctl(fd, START_LOOPBACK, &device_info); + + + ret = pthread_join(thread_id, NULL); + if (ret != 0) exit(1); + + DBG("\nClosing eventfd. Exiting...\n"); + close(efd); + + exit(EXIT_SUCCESS); +} diff --git a/virtio_loopback.h b/virtio_loopback.h new file mode 100644 index 0000000..5400cd7 --- /dev/null +++ b/virtio_loopback.h @@ -0,0 +1,639 @@ +/* + * Based on: + * 1) virtio.h of Qemu project + * + * Copyright IBM, Corp. 2007 + * + * Authors: + * Anthony Liguori <aliguori@us.ibm.com> + * + * 2) virtio-mmio.h of Qemu project + * + * Copyright (c) 2011 Linaro Limited + * + * Author: + * Peter Maydell <peter.maydell@linaro.org> + * + * 3) vhost.h of Qemu project + * + * Copyright 2022 Virtual Open Systems SAS. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +/* + * Control registers + */ +#ifndef VIRTIO_LOOPBACK +#define VIRTIO_LOOPBACK + +#include "event_notifier.h" + +/* Magic value ("virt" string) - Read Only */ +#define VIRTIO_MMIO_MAGIC_VALUE 0x000 + +/* Virtio device version - Read Only */ +#define VIRTIO_MMIO_VERSION 0x004 + +/* Virtio device ID - Read Only */ +#define VIRTIO_MMIO_DEVICE_ID 0x008 + +/* Virtio vendor ID - Read Only */ +#define VIRTIO_MMIO_VENDOR_ID 0x00c + +/* Bitmask of the features supported by the device (host) + * (32 bits per set) - Read Only */ +#define VIRTIO_MMIO_DEVICE_FEATURES 0x010 + +/* Device (host) features set selector - Write Only */ +#define VIRTIO_MMIO_DEVICE_FEATURES_SEL 0x014 + +/* Bitmask of features activated by the driver (guest) + * (32 bits per set) - Write Only */ +#define VIRTIO_MMIO_DRIVER_FEATURES 0x020 + +/* Activated features set selector - Write Only */ +#define VIRTIO_MMIO_DRIVER_FEATURES_SEL 0x024 + +/* Guest's memory page size in bytes - Write Only */ +#define VIRTIO_MMIO_GUEST_PAGE_SIZE 0x028 + +/* Queue selector - Write Only */ +#define VIRTIO_MMIO_QUEUE_SEL 0x030 + +/* Maximum size of the currently selected queue - Read Only */ +#define VIRTIO_MMIO_QUEUE_NUM_MAX 0x034 + +/* Queue size for the currently selected queue - Write Only */ +#define VIRTIO_MMIO_QUEUE_NUM 0x038 + + +/* Used Ring alignment for the currently selected queue - Write Only */ +#define VIRTIO_MMIO_QUEUE_ALIGN 0x03c + +/* Guest's PFN for the currently selected queue - Read Write */ +#define VIRTIO_MMIO_QUEUE_PFN 0x040 + +/* Ready bit for the currently selected queue - Read Write */ +#define VIRTIO_MMIO_QUEUE_READY 0x044 + +/* Queue notifier - Write Only */ +#define VIRTIO_MMIO_QUEUE_NOTIFY 0x050 + +/* Interrupt status - Read Only */ +#define VIRTIO_MMIO_INTERRUPT_STATUS 0x060 + +/* Interrupt acknowledge - Write Only */ +#define VIRTIO_MMIO_INTERRUPT_ACK 0x064 + +/* Device status register - Read Write */ +#define VIRTIO_MMIO_STATUS 0x070 + +/* Selected queue's Descriptor Table address, 64 bits in two halves */ +#define VIRTIO_MMIO_QUEUE_DESC_LOW 0x080 +#define VIRTIO_MMIO_QUEUE_DESC_HIGH 0x084 + +/* Selected queue's Available Ring address, 64 bits in two halves */ +#define VIRTIO_MMIO_QUEUE_AVAIL_LOW 0x090 +#define VIRTIO_MMIO_QUEUE_AVAIL_HIGH 0x094 + +/* Selected queue's Used Ring address, 64 bits in two halves */ +#define VIRTIO_MMIO_QUEUE_USED_LOW 0x0a0 +#define VIRTIO_MMIO_QUEUE_USED_HIGH 0x0a4 + +/* Shared memory region id */ +#define VIRTIO_MMIO_SHM_SEL 0x0ac + +/* Shared memory region length, 64 bits in two halves */ +#define VIRTIO_MMIO_SHM_LEN_LOW 0x0b0 +#define VIRTIO_MMIO_SHM_LEN_HIGH 0x0b4 + +/* Shared memory region base address, 64 bits in two halves */ +#define VIRTIO_MMIO_SHM_BASE_LOW 0x0b8 +#define VIRTIO_MMIO_SHM_BASE_HIGH 0x0bc + +/* Configuration atomicity value */ +#define VIRTIO_MMIO_CONFIG_GENERATION 0x0fc + +/* The config space is defined by each driver as + * the per-driver configuration space - Read Write */ +#define VIRTIO_MMIO_CONFIG 0x100 + +/* + * Interrupt flags (re: interrupt status & acknowledge registers) + */ +#define VIRTIO_MMIO_INT_VRING (1 << 0) +#define VIRTIO_MMIO_INT_CONFIG (1 << 1) + +#define VIRTIO_IOMMIO_FLAG_USE_IOEVENTFD_BIT 1 +#define VIRTIO_IOMMIO_FLAG_USE_IOEVENTFD \ + (1 << VIRTIO_IOMMIO_FLAG_USE_IOEVENTFD_BIT) + + +/* Virtio loopback driver related */ + +/* Qemu defines */ +#define VIRT_MAGIC 0x74726976 /* 'virt' */ +#define VIRT_VERSION 2 +#define VIRT_VERSION_LEGACY 1 +#define VIRT_VENDOR 0x554D4551 /* 'QEMU' */ + +#define VIRTQUEUE_MAX_SIZE 1024 +#define VIRTIO_QUEUE_MAX 1024 +#define VIRTIO_NO_VECTOR 0xffff +#define TYPE_VIRTIO_DEVICE "virtio-device" + +/* Loopback negotiation code */ + +#define PAGE_SIZE 4096 +#define EFD_INIT _IOC(_IOC_WRITE, 'k', 1, sizeof(efd_data_t)) +#define WAKEUP _IOC(_IOC_WRITE, 'k', 2, 0) +#define START_LOOPBACK _IOC(_IOC_WRITE, 'k', 3, sizeof(virtio_device_info_struct_t)) +#define IRQ _IOC(_IOC_WRITE, 'k', 4, sizeof(int)) +#define SHARE_VQS _IOC(_IOC_WRITE, 'k', 5, 0) +#define SHARE_BUF _IOC(_IOC_WRITE, 'k', 6, sizeof(uint64_t)) +#define USED_INFO _IOC(_IOC_WRITE, 'k', 7, 0) +#define DATA_INFO _IOC(_IOC_WRITE, 'k', 8, 0) + +#define VIRTIO_PCI_VRING_ALIGN 4096 + +typedef struct VirtIOMMIOProxy { + /* Generic */ + bool legacy; + uint32_t flags; + /* Guest accessible state needing migration and reset */ + uint32_t host_features_sel; + uint32_t guest_features_sel; + uint32_t guest_page_shift; + /* virtio-bus */ + bool format_transport_address; + /* Fields only used for non-legacy (v2) devices */ + uint32_t guest_features[2]; +} VirtIOMMIOProxy; + + +/* Vring specific */ +/* This marks a buffer as continuing via the next field. */ +#define VRING_DESC_F_NEXT 1 +/* This marks a buffer as write-only (otherwise read-only). */ +#define VRING_DESC_F_WRITE 2 +/* This means the buffer contains a list of buffer descriptors. */ +#define VRING_DESC_F_INDIRECT 4 + +/* + * Mark a descriptor as available or used in packed ring. + * Notice: they are defined as shifts instead of shifted values. + */ +#define VRING_PACKED_DESC_F_AVAIL 7 +#define VRING_PACKED_DESC_F_USED 15 + +/* The Host uses this in used->flags to advise the Guest: don't kick me when + * you add a buffer. It's unreliable, so it's simply an optimization. Guest + * will still kick if it's out of buffers. */ +#define VRING_USED_F_NO_NOTIFY 1 +/* The Guest uses this in avail->flags to advise the Host: don't interrupt me + * when you consume a buffer. It's unreliable, so it's simply an + * optimization. */ +#define VRING_AVAIL_F_NO_INTERRUPT 1 + +/* Enable events in packed ring. */ +#define VRING_PACKED_EVENT_FLAG_ENABLE 0x0 +/* Disable events in packed ring. */ +#define VRING_PACKED_EVENT_FLAG_DISABLE 0x1 +/* + * Enable events for a specific descriptor in packed ring. + * (as specified by Descriptor Ring Change Event Offset/Wrap Counter). + * Only valid if VIRTIO_RING_F_EVENT_IDX has been negotiated. + */ +#define VRING_PACKED_EVENT_FLAG_DESC 0x2 + +/* + * Wrap counter bit shift in event suppression structure + * of packed ring. + */ +#define VRING_PACKED_EVENT_F_WRAP_CTR 15 + +/* We support indirect buffer descriptors */ +#define VIRTIO_RING_F_INDIRECT_DESC 28 + +/* The Guest publishes the used index for which it expects an interrupt + * at the end of the avail ring. Host should ignore the avail->flags field. */ +/* The Host publishes the avail index for which it expects a kick + * at the end of the used ring. Guest should ignore the used->flags field. */ +#define VIRTIO_RING_F_EVENT_IDX 29 + +/* Alignment requirements for vring elements. + * When using pre-virtio 1.0 layout, these fall out naturally. + */ +#define VRING_AVAIL_ALIGN_SIZE 2 +#define VRING_USED_ALIGN_SIZE 4 +#define VRING_DESC_ALIGN_SIZE 16 +/******************/ + +typedef struct VRing +{ + unsigned int num; + unsigned int num_default; + unsigned int align; + uint64_t desc; + uint64_t avail; + uint64_t used; +} VRing; + +typedef struct VRingDesc +{ + uint64_t addr; + uint32_t len; + uint16_t flags; + uint16_t next; +} VRingDesc; + +typedef struct VRingPackedDesc { + uint64_t addr; + uint32_t len; + uint16_t id; + uint16_t flags; +} VRingPackedDesc; + +typedef struct VRingAvail +{ + uint16_t flags; + uint16_t idx; + uint16_t ring[]; +} VRingAvail; + +typedef struct VRingUsedElem +{ + uint32_t id; + uint32_t len; +} VRingUsedElem; + +typedef struct VRingUsed +{ + uint16_t flags; + uint16_t idx; + VRingUsedElem ring[]; +} VRingUsed; + +typedef struct VirtQueueElement +{ + unsigned int index; + unsigned int len; + unsigned int ndescs; + unsigned int out_num; + unsigned int in_num; + uint64_t *in_addr; + uint64_t *out_addr; + struct iovec *in_sg; + struct iovec *out_sg; +} VirtQueueElement; + +typedef struct VirtIODevice VirtIODevice; +typedef struct VirtQueue VirtQueue; +typedef void (*VirtIOHandleOutput)(VirtIODevice *, VirtQueue *); + +typedef struct VirtQueue +{ + VRing vring; + VirtQueueElement *used_elems; + + /* Next head to pop */ + uint16_t last_avail_idx; + bool last_avail_wrap_counter; + + /* Last avail_idx read from VQ. */ + uint16_t shadow_avail_idx; + bool shadow_avail_wrap_counter; + + uint16_t used_idx; + bool used_wrap_counter; + + /* Last used index value we have signalled on */ + uint16_t signalled_used; + + /* Last used index value we have signalled on */ + bool signalled_used_valid; + + /* Notification enabled? */ + bool notification; + + uint16_t queue_index; + + unsigned int inuse; + + uint16_t vector; + VirtIOHandleOutput handle_output; + VirtIODevice *vdev; + + EventNotifier guest_notifier; + EventNotifier host_notifier; + bool host_notifier_enabled; + //QLIST_ENTRY(VirtQueue) node; +} VirtQueue; + +typedef struct VirtIORNG VirtIORNG; +typedef struct VHostUserRNG VHostUserRNG; +typedef struct VirtioDeviceClass VirtioDeviceClass; +typedef struct VirtioBus VirtioBus; + +typedef struct VirtIODevice +{ + //DeviceState parent_obj; + VirtioBus *vbus; + VirtioDeviceClass *vdev_class; + const char *name; + uint8_t status; + uint8_t isr; + uint16_t queue_sel; + uint64_t guest_features; + uint64_t host_features; + uint64_t backend_features; + size_t config_len; + void *config; + uint16_t config_vector; + uint32_t generation; + int nvectors; + VirtQueue *vq; + //MemoryListener listener; + uint16_t device_id; + bool vm_running; + bool broken; /* device in invalid state, needs reset */ + bool use_disabled_flag; /* allow use of 'disable' flag when needed */ + bool disabled; /* device in temporarily disabled state */ + bool use_started; + bool started; + bool start_on_kick; /* when virtio 1.0 feature has not been negotiated */ + bool disable_legacy_check; + //VMChangeStateEntry *vmstate; + char *bus_name; + uint8_t device_endian; + bool use_guest_notifier_mask; + VirtIORNG *vrng; + VHostUserRNG *vhrng; + //AddressSpace *dma_as; + //QLIST_HEAD(, VirtQueue) *vector_queues; +} VirtIODevice; + +typedef struct efd_data { + int efd; + int pid; +} efd_data_t; + +typedef struct virtio_device_info_struct { + unsigned long magic; + unsigned long version; + unsigned long device_id; + unsigned long vendor; + +} virtio_device_info_struct_t; + +/* proto */ +typedef struct virtio_neg { + uint64_t notification; + uint64_t data; + uint64_t size; + bool read; + bool done; + bool request_op; +} virtio_neg_t; + + +/* This is left here as a reference, might be useful in the future */ +/* + * static void virtio_mmio_bus_class_init(ObjectClass *klass, void *data) + * { + * BusClass *bus_class = BUS_CLASS(klass); + * VirtioBusClass *k = VIRTIO_BUS_CLASS(klass); + * + * k->notify = virtio_mmio_update_irq; + * k->save_config = virtio_mmio_save_config; + * k->load_config = virtio_mmio_load_config; + * k->save_extra_state = virtio_mmio_save_extra_state; + * k->load_extra_state = virtio_mmio_load_extra_state; + * k->has_extra_state = virtio_mmio_has_extra_state; + * k->set_guest_notifiers = virtio_mmio_set_guest_notifiers; + * k->ioeventfd_enabled = virtio_mmio_ioeventfd_enabled; + * k->ioeventfd_assign = virtio_mmio_ioeventfd_assign; + * k->pre_plugged = virtio_mmio_pre_plugged; + * k->vmstate_change = virtio_mmio_vmstate_change; + * k->has_variable_vring_alignment = true; + * bus_class->max_dev = 1; + * bus_class->get_dev_path = virtio_mmio_bus_get_dev_path; + * } + * + */ + + +typedef struct VirtioBus { + + VirtIODevice *vdev; + void (*notify)(VirtIODevice *d, uint16_t vector); + bool (*has_extra_state)(VirtIODevice *d); + bool (*query_guest_notifiers)(VirtIODevice *d); + int (*set_guest_notifiers)(VirtIODevice *d, int nvqs, bool assign); + void (*vmstate_change)(VirtIODevice *d, bool running); + void (*pre_plugged)(VirtIODevice *d); + void (*device_plugged)(VirtIODevice *d); + /* + * transport independent exit function. + * This is called by virtio-bus just before the device is unplugged. + */ + void (*device_unplugged)(VirtIODevice *d); + int (*query_nvectors)(VirtIODevice *d); + /* + * ioeventfd handling: if the transport implements ioeventfd_assign, + * it must implement ioeventfd_enabled as well. + */ + /* Returns true if the ioeventfd is enabled for the device. */ + bool (*ioeventfd_enabled)(VirtIODevice *d); + /* + * Assigns/deassigns the ioeventfd backing for the transport on + * the device for queue number n. Returns an error value on + * failure. + */ + int (*ioeventfd_assign)(VirtIOMMIOProxy *d, EventNotifier *notifier, + int n, bool assign); + /* + * Whether queue number n is enabled. + */ + bool (*queue_enabled)(VirtIODevice *d, int n); + /* + * Does the transport have variable vring alignment? + * (ie can it ever call virtio_queue_set_align()?) + * Note that changing this will break migration for this transport. + */ + bool has_variable_vring_alignment; + bool (*iommu_enabled)(VirtIODevice *d); + + /* + * Set if ioeventfd has been started. + */ + bool ioeventfd_started; + + /* + * Set if ioeventfd has been grabbed by vhost. When ioeventfd + * is grabbed by vhost, we track its started/stopped state (which + * depends in turn on the virtio status register), but do not + * register a handler for the ioeventfd. When ioeventfd is + * released, if ioeventfd_started is true we finally register + * the handler so that QEMU's device model can use ioeventfd. + */ + int ioeventfd_grabbed; +} VirtioBus; + + +typedef struct VirtioDeviceClass { + /*< private >*/ + VirtIODevice *parent; + /*< public >*/ + /* This is what a VirtioDevice must implement */ + uint64_t (*get_features)(VirtIODevice *vdev, + uint64_t requested_features); + uint64_t (*bad_features)(VirtIODevice *vdev); + void (*set_features)(VirtIODevice *vdev, uint64_t val); + int (*validate_features)(VirtIODevice *vdev); + void (*get_config)(VirtIODevice *vdev, uint8_t *config); + void (*set_config)(VirtIODevice *vdev, const uint8_t *config); + void (*reset)(VirtIODevice *vdev); + void (*set_status)(VirtIODevice *vdev, uint8_t val); + /* For transitional devices, this is a bitmap of features + * that are only exposed on the legacy interface but not + * the modern one. + */ + uint64_t legacy_features; + /* Test and clear event pending status. + * Should be called after unmask to avoid losing events. + * If backend does not support masking, + * must check in frontend instead. + */ + bool (*guest_notifier_pending)(VirtIODevice *vdev, int n); + /* Mask/unmask events from this vq. Any events reported + * while masked will become pending. + * If backend does not support masking, + * must mask in frontend instead. + */ + void (*guest_notifier_mask)(VirtIODevice *vdev, int n, bool mask); + int (*start_ioeventfd)(VirtIODevice *vdev); + void (*stop_ioeventfd)(VirtIODevice *vdev); + /* Saving and loading of a device; trying to deprecate save/load + * use vmsd for new devices. + */ + /* Post load hook in vmsd is called early while device is processed, and + * when VirtIODevice isn't fully initialized. Devices should use this instead, + * unless they specifically want to verify the migration stream as it's + * processed, e.g. for bounds checking. + */ + int (*post_load)(VirtIODevice *vdev); + bool (*primary_unplug_pending)(void *opaque); +} VirtioDeviceClass; + +/* Global variables */ +extern int fd; +extern int loopback_fd; + +void handle_input(VirtIODevice *vdev, VirtQueue *vq); +void *my_select(void *data); +void *wait_read_write(void *data); +void *my_notify(void *data); +void create_rng_struct (void); +void print_neg_flag(uint64_t neg_flag, bool read); +void adapter_read_write_cb (void); +int virtio_mmio_start(void); + +int virtio_queue_ready(VirtQueue *vq); +void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, + unsigned int *out_bytes, + unsigned max_in_bytes, unsigned max_out_bytes); +void virtio_add_feature(uint64_t *features, unsigned int fbit); +bool virtio_has_feature(uint64_t features, unsigned int fbit); + +int virtio_queue_empty(VirtQueue *vq); +void *virtqueue_pop(VirtQueue *vq, size_t sz); +void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, unsigned int len); +size_t iov_from_buf(const struct iovec *iov, unsigned int iov_cnt, + size_t offset, const void *buf, size_t bytes); +bool virtqueue_get_head(VirtQueue *vq, unsigned int idx, + unsigned int *head); +void virtio_notify_vector(VirtIODevice *vdev); + +enum { + VIRTQUEUE_READ_DESC_ERROR = -1, + VIRTQUEUE_READ_DESC_DONE = 0, /* end of chain */ + VIRTQUEUE_READ_DESC_MORE = 1, /* more buffers in chain */ +}; + +size_t qemu_iov_from_buf(const struct iovec *iov, unsigned int iov_cnt, + size_t offset, const void *buf, size_t bytes); +VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size, + VirtIOHandleOutput handle_output); +VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n); +void virtio_dev_init(VirtIODevice *vdev, const char *name, + uint16_t device_id, size_t config_size); +void virtio_mmio_bus_init(VirtioBus *k); +int virtio_bus_set_host_notifier(VirtioBus *vbus, int n, bool assign); +EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq); +EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq); +uint64_t virtio_queue_get_desc_addr(VirtIODevice *vdev, int n); +uint64_t virtio_queue_get_avail_addr(VirtIODevice *vdev, int n); +uint64_t virtio_queue_get_used_addr(VirtIODevice *vdev, int n); +int virtio_queue_get_num(VirtIODevice *vdev, int n); +unsigned int virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n); +uint64_t virtio_queue_get_desc_size(VirtIODevice *vdev, int n); +uint64_t virtio_queue_get_avail_size(VirtIODevice *vdev, int n); +uint64_t virtio_queue_get_used_size(VirtIODevice *vdev, int n); +void virtio_set_isr(VirtIODevice *vdev, int value); +int virtio_device_grab_ioeventfd(VirtIODevice *vdev); +bool virtio_bus_device_iommu_enabled(VirtIODevice *vdev); +size_t iov_from_buf_full(const struct iovec *iov, unsigned int iov_cnt, + size_t offset, const void *buf, size_t bytes); +void event_notifier_set_handler(EventNotifier *e, + void *handler); +void virtio_notify(VirtIODevice *vdev, VirtQueue *vq); +int virtqueue_split_read_next_desc(VirtIODevice *vdev, VRingDesc *desc, + unsigned int max, unsigned int *next); + +/* Do we get callbacks when the ring is completely used, even if we've + * suppressed them? */ +#define VIRTIO_F_NOTIFY_ON_EMPTY 24 +#define VIRTIO_CONFIG_S_FEATURES_OK 8 +#define VIRTIO_CONFIG_S_DRIVER_OK 4 +#define VIRTIO_F_VERSION_1 32 +#define VIRTIO_F_ACCESS_PLATFORM 33 +/* Legacy name for VIRTIO_F_ACCESS_PLATFORM (for compatibility with old userspace) */ +#define VIRTIO_F_IOMMU_PLATFORM VIRTIO_F_ACCESS_PLATFORM + +/* Qemu Aligned functions */ +/* + * Round number down to multiple. Safe when m is not a power of 2 (see + * ROUND_DOWN for a faster version when a power of 2 is guaranteed). + */ +#define QEMU_ALIGN_DOWN(n, m) ((n) / (m) * (m)) + +/* + * Round number up to multiple. Safe when m is not a power of 2 (see + * ROUND_UP for a faster version when a power of 2 is guaranteed). + */ +#define QEMU_ALIGN_UP(n, m) QEMU_ALIGN_DOWN((n) + (m) - 1, (m)) + +/* Check if n is a multiple of m */ +#define QEMU_IS_ALIGNED(n, m) (((n) % (m)) == 0) + +/* n-byte align pointer down */ +#define QEMU_ALIGN_PTR_DOWN(p, n) \ + ((typeof(p))QEMU_ALIGN_DOWN((uintptr_t)(p), (n))) + +/* n-byte align pointer up */ +#define QEMU_ALIGN_PTR_UP(p, n) \ + ((typeof(p))QEMU_ALIGN_UP((uintptr_t)(p), (n))) + +/* Check if pointer p is n-bytes aligned */ +#define QEMU_PTR_IS_ALIGNED(p, n) QEMU_IS_ALIGNED((uintptr_t)(p), (n)) + +extern VirtIODevice *global_vdev; +extern VirtIOMMIOProxy *proxy; +extern VirtioBus *global_vbus; + +#endif /* VIRTIO_LOOPBACK */ + diff --git a/virtio_rng.c b/virtio_rng.c new file mode 100644 index 0000000..7fd7000 --- /dev/null +++ b/virtio_rng.c @@ -0,0 +1,171 @@ +/* + * A virtio device implementing a hardware random number generator. + * + * Based on virtio-rng.c of Qemu project + * Copyright 2012 Red Hat, Inc. + * Copyright 2012 Amit Shah <amit.shah@redhat.com> + * + * Copyright 2022 Virtual Open Systems SAS. + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + */ + +#include <unistd.h> +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> //Definition of uint64_t +#include <string.h> +#include <stdbool.h> +#include <sys/param.h> + +/* Project header files */ +#include "virtio_loopback.h" +#include "virtio_rng.h" + +#ifdef DEBUG +#define DBG(...) printf("virtio-rng: " __VA_ARGS__) +#else +#define DBG(...) +#endif /* DEBUG */ + +bool is_guest_ready(VirtIORNG *vrng) +{ + VirtIODevice *vdev = vrng->parent_obj; + + if (virtio_queue_ready(vrng->vq) + && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { + return true; + } + return false; +} + +size_t get_request_size(VirtQueue *vq, unsigned quota) +{ + unsigned int in, out; + + virtqueue_get_avail_bytes(vq, &in, &out, quota, 0); + return in; +} + +void virtio_rng_set_status(VirtIODevice *vdev, uint8_t status) +{ + VirtIORNG *vrng = vdev->vrng; + + vdev->status = status; + + /* Something changed, try to process buffers */ + virtio_rng_process(vrng); +} + +/* Send data from a char device over to the guest */ +void chr_read(VirtIORNG *vrng, const void *buf, size_t size) +{ + VirtIODevice *vdev = vrng->parent_obj; + VirtQueueElement *elem; + size_t len; + int offset; + + if (!is_guest_ready(vrng)) { + return; + } + + vrng->quota_remaining -= size; + + offset = 0; + while (offset < size) { + elem = virtqueue_pop(vrng->vq, sizeof(VirtQueueElement)); + + + if (!elem) { + break; + } + len = qemu_iov_from_buf(elem->in_sg, elem->in_num, + 0, buf + offset, size - offset); + offset += len; + + virtqueue_push(vrng->vq, elem, len); + + /* TODO: We need tp free the elem + * + * g_free(elem); + */ + } + virtio_notify(vdev, vrng->vq); + + if (!virtio_queue_empty(vrng->vq)) { + /* If we didn't drain the queue, call virtio_rng_process + * to take care of asking for more data as appropriate. + */ + virtio_rng_process(vrng); + } +} + +const char test_str[64] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}; + +void virtio_rng_process(VirtIORNG *vrng) +{ + size_t size; + unsigned quota; + + if (!is_guest_ready(vrng)) { + return; + } + + if (vrng->quota_remaining < 0) { + quota = 0; + } else { + quota = MIN((uint64_t)vrng->quota_remaining, (uint64_t)UINT32_MAX); + } + size = get_request_size(vrng->vq, quota); + size = MIN(vrng->quota_remaining, size); + + if (size) { + chr_read(vrng, &test_str, size); + } +} + +void handle_input(VirtIODevice *vdev, VirtQueue *vq) +{ + virtio_rng_process(vdev->vrng); +} + +static void virtio_dev_class_init (VirtIODevice *vdev) { + + vdev->vdev_class = (VirtioDeviceClass *) malloc(sizeof(VirtioDeviceClass)); + vdev->vdev_class->parent = vdev; + vdev->vdev_class->set_status = virtio_rng_set_status; +} + +void virtio_rng_init(VirtIODevice *vdev) { + + VirtIORNG *vrng = (VirtIORNG*) malloc (sizeof(VirtIORNG)); + vdev->vrng = vrng; + vrng->parent_obj = vdev; + vrng->vq = vdev->vq; + vrng->quota_remaining = LONG_MAX; + + /* Prepare dev_class */ + virtio_dev_class_init (vdev); +} + + +void virtio_rng_realize(void) { + + /* prepare procy and virtio dev*/ + proxy = (VirtIOMMIOProxy*) malloc (sizeof(VirtIOMMIOProxy)); + + virtio_dev_init(global_vdev, "virtio-rng", 4, 0); + + virtio_rng_init(global_vdev); + + global_vdev->vq = virtio_add_queue(global_vdev, 8, handle_input); + + global_vdev->host_features = 0x39000000; + + *proxy = (VirtIOMMIOProxy) { + .legacy = 1, + }; +} + diff --git a/virtio_rng.h b/virtio_rng.h new file mode 100644 index 0000000..042f0fd --- /dev/null +++ b/virtio_rng.h @@ -0,0 +1,58 @@ +/* + * Copyright 2022 Virtual Open Systems SAS. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#ifndef VIRTIO_RNG +#define VIRTIO_RNG + +#include "virtio_loopback.h" + +extern const char test_str[64]; + +typedef void RngBackend; + +typedef struct VirtIORNGConf { + RngBackend *rng; + uint64_t max_bytes; + uint32_t period_ms; +} VirtIORNGConf; + +typedef struct VirtIORNG { + VirtIODevice *parent_obj; + + /* Only one vq - guest puts buffer(s) on it when it needs entropy */ + VirtQueue *vq; + VirtIORNGConf conf; + RngBackend *rng; + + /* We purposefully don't migrate this state. The quota will reset on the + * destination as a result. Rate limiting is host state, not guest state. + */ + int64_t quota_remaining; + bool activate_timer; + +} VirtIORNG; + +bool is_guest_ready(VirtIORNG *vrng); +size_t get_request_size(VirtQueue *vq, unsigned quota); +void virtio_rng_set_status(VirtIODevice *vdev, uint8_t status); +void virtio_rng_process(VirtIORNG *vrng); +void chr_read(VirtIORNG *vrng, const void *buf, size_t size); +void virtio_rng_realize(void); +void virtio_rng_init(VirtIODevice *vdev); + +#endif /* VIRTIO_RNG */ |