diff options
author | 2024-12-03 10:27:24 +0200 | |
---|---|---|
committer | 2024-12-03 10:48:12 +0200 | |
commit | 7c56b4ab66bab75f629900a1d59d8cae7745525e (patch) | |
tree | aad61bfcdf60d61658b20ec7e68e6bceeeab5fd9 | |
parent | 266db2473316eb02c6c9d12c14ae2b917fda5d2c (diff) |
Update virtio-loopback adapter - notification / interrupt mechanisms
Updates:
- Notification mechanism update:
- The adapter passes the vqs' eventfds to the driver.
In this way, the driver will by-pass the adapter for
delivering notifications to the vhost-user device
during the data exchange process.
- Interrupt mechanism
- Simplify the conditions for when an interrupt should
be delivered to the driver.
Change-Id: Ice0dbe0165b275f70bd390bc1e1e22184b3fa94e
Signed-off-by: Timos Ampelikiotis <t.ampelikiotis@virtualopensystems.com>
-rw-r--r-- | Makefile | 2 | ||||
-rw-r--r-- | include/adapter.h | 2 | ||||
-rw-r--r-- | include/event_notifier.h | 2 | ||||
-rw-r--r-- | include/vhost_loopback.h | 2 | ||||
-rw-r--r-- | include/vhost_user_blk.h | 2 | ||||
-rw-r--r-- | include/vhost_user_can.h | 2 | ||||
-rw-r--r-- | include/vhost_user_console.h | 2 | ||||
-rw-r--r-- | include/vhost_user_loopback.h | 2 | ||||
-rw-r--r-- | include/vhost_user_rng.h | 2 | ||||
-rw-r--r-- | include/virtio_loopback.h | 167 | ||||
-rw-r--r-- | src/adapter/adapter.c | 2 | ||||
-rw-r--r-- | src/common/event_notifier.c | 2 | ||||
-rw-r--r-- | src/devices/vhost_user_blk.c | 77 | ||||
-rw-r--r-- | src/devices/vhost_user_can.c | 2 | ||||
-rw-r--r-- | src/devices/vhost_user_console.c | 2 | ||||
-rw-r--r-- | src/devices/vhost_user_gpio.c | 2 | ||||
-rw-r--r-- | src/devices/vhost_user_rng.c | 2 | ||||
-rw-r--r-- | src/devices/vhost_user_sound.c | 2 | ||||
-rw-r--r-- | src/lib/vhost_loopback.c | 24 | ||||
-rw-r--r-- | src/lib/virtio_loopback.c | 1089 | ||||
-rw-r--r-- | src/lib/virtio_vring.c | 792 |
21 files changed, 1054 insertions, 1127 deletions
@@ -1,4 +1,4 @@ -# Copyright 2022-2023 Virtual Open Systems SAS +# Copyright 2022-2024 Virtual Open Systems SAS # # Authors: # Timos Ampelikiotis <t.ampelikiotis@virtualopensystems.com> diff --git a/include/adapter.h b/include/adapter.h index 601ed23..590a0ab 100644 --- a/include/adapter.h +++ b/include/adapter.h @@ -1,6 +1,6 @@ /* * - * Copyright (c) 2022-2023 Virtual Open Systems SAS. + * Copyright (c) 2022-2024 Virtual Open Systems SAS. * * Author: * Timos Ampelikiotis <t.ampelikiotis@virtualopensystems.com> diff --git a/include/event_notifier.h b/include/event_notifier.h index 718f784..61324a9 100644 --- a/include/event_notifier.h +++ b/include/event_notifier.h @@ -6,7 +6,7 @@ * Authors: * Michael S. Tsirkin <mst@redhat.com> * - * Copyright 2023 Virtual Open Systems SAS. + * Copyright 2022-2024 Virtual Open Systems SAS. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/include/vhost_loopback.h b/include/vhost_loopback.h index 198a5af..d752569 100644 --- a/include/vhost_loopback.h +++ b/include/vhost_loopback.h @@ -1,7 +1,7 @@ /* * Based on vhost.h of QEMU project * - * Copyright 2022-2023 Virtual Open Systems SAS. + * Copyright 2022-2024 Virtual Open Systems SAS. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/include/vhost_user_blk.h b/include/vhost_user_blk.h index 3b25dfe..dc793f5 100644 --- a/include/vhost_user_blk.h +++ b/include/vhost_user_blk.h @@ -7,7 +7,7 @@ * Changpeng Liu <changpeng.liu@intel.com> * * - * Copyright (c) 2022-2023 Virtual Open Systems SAS. + * Copyright (c) 2022-2024 Virtual Open Systems SAS. * * * This program is free software; you can redistribute it and/or modify diff --git a/include/vhost_user_can.h b/include/vhost_user_can.h index 4768499..ce64f6d 100644 --- a/include/vhost_user_can.h +++ b/include/vhost_user_can.h @@ -5,7 +5,7 @@ * https://github.com/OpenSynergy/qemu/tree/virtio-can-spec-rfc-v3 * * Copyright (C) 2021-2023 OpenSynergy GmbH - * Copyright (c) 2023 Virtual Open Systems SAS. + * Copyright (c) 2023-2024 Virtual Open Systems SAS. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/include/vhost_user_console.h b/include/vhost_user_console.h index 9f0eed8..51df071 100644 --- a/include/vhost_user_console.h +++ b/include/vhost_user_console.h @@ -1,6 +1,6 @@ /* * - * Copyright (c) 2023 Virtual Open Systems SAS. + * Copyright (c) 2023-2024 Virtual Open Systems SAS. * * This work is licensed under the terms of the GNU GPL, version 2 or * (at your option) any later version. See the COPYING file in the diff --git a/include/vhost_user_loopback.h b/include/vhost_user_loopback.h index 3aa8e8c..8b0294f 100644 --- a/include/vhost_user_loopback.h +++ b/include/vhost_user_loopback.h @@ -7,7 +7,7 @@ * Victor Kaplansky <victork@redhat.com> * Marc-André Lureau <mlureau@redhat.com> * - * Copyright 2022-2023 Virtual Open Systems SAS. + * Copyright 2022-2024 Virtual Open Systems SAS. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/include/vhost_user_rng.h b/include/vhost_user_rng.h index b6f58da..ff92f2b 100644 --- a/include/vhost_user_rng.h +++ b/include/vhost_user_rng.h @@ -3,7 +3,7 @@ * * Copyright (c) 2021 Mathieu Poirier <mathieu.poirier@linaro.org> * - * Copyright (c) 2022-2023 Virtual Open Systems SAS. + * Copyright (c) 2022-2024 Virtual Open Systems SAS. * * Author: * Timos Ampelikiotis <t.ampelikiotis@virtualopensystems.com> diff --git a/include/virtio_loopback.h b/include/virtio_loopback.h index ac23299..e3f4a78 100644 --- a/include/virtio_loopback.h +++ b/include/virtio_loopback.h @@ -16,7 +16,7 @@ * * 3) vhost.h of QEMU project * - * Copyright 2022-2023 Virtual Open Systems SAS. + * Copyright 2022-2024 Virtual Open Systems SAS. * * This work is licensed under the terms of the GNU GPL, version 2. See * the COPYING file in the top-level directory. @@ -165,25 +165,11 @@ sizeof(virtio_device_info_struct_t)) #define IRQ _IOC(_IOC_WRITE, 'k', 4, sizeof(int)) #define SHARE_VQS _IOC(_IOC_WRITE, 'k', 5, sizeof(uint32_t)) -#define SHARE_COM_STRUCT _IOC(_IOC_WRITE, 'k', 7, 0) +#define SHARE_COM_STRUCT _IOC(_IOC_WRITE, 'k', 6, 0) +#define SHARE_VQS_NOTIF _IOC(_IOC_WRITE, 'k', 7, sizeof(struct vq_notifier)) #define VIRTIO_PCI_VRING_ALIGN 4096 -typedef struct VirtIOMMIOProxy { - /* Generic */ - bool legacy; - uint32_t flags; - /* Guest accessible state needing migration and reset */ - uint32_t host_features_sel; - uint32_t guest_features_sel; - uint32_t guest_page_shift; - /* virtio-bus */ - bool format_transport_address; - /* Fields only used for non-legacy (v2) devices */ - uint32_t guest_features[2]; -} VirtIOMMIOProxy; - - /* Vring specific */ /* This marks a buffer as continuing via the next field. */ #define VRING_DESC_F_NEXT 1 @@ -252,12 +238,86 @@ typedef struct VirtIOMMIOProxy { #define VRING_AVAIL_ALIGN_SIZE 2 #define VRING_USED_ALIGN_SIZE 4 #define VRING_DESC_ALIGN_SIZE 16 + +/* + * Do we get callbacks when the ring is completely used, even if we've + * suppressed them? + */ +#define VIRTIO_F_NOTIFY_ON_EMPTY 24 +#define VIRTIO_CONFIG_S_FEATURES_OK 8 +#define VIRTIO_CONFIG_S_DRIVER_OK 4 +#define VIRTIO_F_VERSION_1 32 +#define VIRTIO_F_ACCESS_PLATFORM 33 +#define VIRTIO_F_IN_ORDER 35 +#define VIRTIO_F_NOTIFICATION_DATA 38 + +/* + * Legacy name for VIRTIO_F_ACCESS_PLATFORM + * (for compatibility with old userspace) + */ +#ifndef VIRTIO_F_IOMMU_PLATFORM +#define VIRTIO_F_IOMMU_PLATFORM 33 +#endif + +/* QEMU Aligned functions */ +/* + * Round number down to multiple. Safe when m is not a power of 2 (see + * ROUND_DOWN for a faster version when a power of 2 is guaranteed). + */ +#define QEMU_ALIGN_DOWN(n, m) ((n) / (m) * (m)) + +/* + * Round number up to multiple. Safe when m is not a power of 2 (see + * ROUND_UP for a faster version when a power of 2 is guaranteed). + */ +#define QEMU_ALIGN_UP(n, m) QEMU_ALIGN_DOWN((n) + (m) - 1, (m)) + +/* Check if n is a multiple of m */ +#define QEMU_IS_ALIGNED(n, m) (((n) % (m)) == 0) + +/* n-byte align pointer down */ +#define QEMU_ALIGN_PTR_DOWN(p, n) \ + ((typeof(p))QEMU_ALIGN_DOWN((uintptr_t)(p), (n))) + +/* n-byte align pointer up */ +#define QEMU_ALIGN_PTR_UP(p, n) \ + ((typeof(p))QEMU_ALIGN_UP((uintptr_t)(p), (n))) + +/* Check if pointer p is n-bytes aligned */ +#define QEMU_PTR_IS_ALIGNED(p, n) QEMU_IS_ALIGNED((uintptr_t)(p), (n)) + +/* + * Define 1 GB offset in order to request big enough + * memory blocks from the kernel: + * 0x40000000 = 1024 * 1024 * 1024 = 64 * 4096 * 4096 = 1G + */ +#define OFFSET_1GB (64ULL * PAGE_SIZE * PAGE_SIZE) + +/* + * Define starting physical address of host memory address space + */ +#define INIT_PA 0 + /******************/ #define container_of(ptr, type, member) ({ \ const typeof(((type *) 0)->member) *__mptr = (ptr); \ (type *) ((char *) __mptr - offsetof(type, member));}) +typedef struct VirtIOMMIOProxy { + /* Generic */ + bool legacy; + uint32_t flags; + /* Guest accessible state needing migration and reset */ + uint32_t host_features_sel; + uint32_t guest_features_sel; + uint32_t guest_page_shift; + /* virtio-bus */ + bool format_transport_address; + /* Fields only used for non-legacy (v2) devices */ + uint32_t guest_features[2]; +} VirtIOMMIOProxy; + typedef struct VRing { unsigned int num; unsigned int num_default; @@ -384,6 +444,7 @@ typedef struct VirtIODevice { int notify_cnt; bool enable_virtio_interrupt; pthread_mutex_t interrupt_lock; + pthread_mutex_t isr_lock; int nvectors; VirtQueue *vq; VirtQueue **vqs; @@ -416,6 +477,12 @@ typedef struct VirtIODevice { const int *user_feature_bits; } VirtIODevice; +struct vq_notifier { + uint32_t vq_index; + int notifier_fd; + int pid; +}; + typedef struct efd_data { int efd[2]; int pid; @@ -652,64 +719,12 @@ int virtqueue_split_read_next_desc(VirtIODevice *vdev, VRingDesc *desc, unsigned int max, unsigned int *next); void print_config(uint8_t *config); uint32_t get_vqs_max_size(VirtIODevice *vdev); - -/* - * Do we get callbacks when the ring is completely used, even if we've - * suppressed them? - */ -#define VIRTIO_F_NOTIFY_ON_EMPTY 24 -#define VIRTIO_CONFIG_S_FEATURES_OK 8 -#define VIRTIO_CONFIG_S_DRIVER_OK 4 -#define VIRTIO_F_VERSION_1 32 -#define VIRTIO_F_ACCESS_PLATFORM 33 -#define VIRTIO_F_IN_ORDER 35 -#define VIRTIO_F_NOTIFICATION_DATA 38 - -/* - * Legacy name for VIRTIO_F_ACCESS_PLATFORM - * (for compatibility with old userspace) - */ -#ifndef VIRTIO_F_IOMMU_PLATFORM -#define VIRTIO_F_IOMMU_PLATFORM 33 -#endif - -/* QEMU Aligned functions */ -/* - * Round number down to multiple. Safe when m is not a power of 2 (see - * ROUND_DOWN for a faster version when a power of 2 is guaranteed). - */ -#define QEMU_ALIGN_DOWN(n, m) ((n) / (m) * (m)) - -/* - * Round number up to multiple. Safe when m is not a power of 2 (see - * ROUND_UP for a faster version when a power of 2 is guaranteed). - */ -#define QEMU_ALIGN_UP(n, m) QEMU_ALIGN_DOWN((n) + (m) - 1, (m)) - -/* Check if n is a multiple of m */ -#define QEMU_IS_ALIGNED(n, m) (((n) % (m)) == 0) - -/* n-byte align pointer down */ -#define QEMU_ALIGN_PTR_DOWN(p, n) \ - ((typeof(p))QEMU_ALIGN_DOWN((uintptr_t)(p), (n))) - -/* n-byte align pointer up */ -#define QEMU_ALIGN_PTR_UP(p, n) \ - ((typeof(p))QEMU_ALIGN_UP((uintptr_t)(p), (n))) - -/* Check if pointer p is n-bytes aligned */ -#define QEMU_PTR_IS_ALIGNED(p, n) QEMU_IS_ALIGNED((uintptr_t)(p), (n)) - -/* - * Define 1 GB offset in order to request big enough - * memory blocks from the kernel: - * 0x40000000 = 1024 * 1024 * 1024 = 64 * 4096 * 4096 = 1G - */ -#define OFFSET_1GB (64ULL * PAGE_SIZE * PAGE_SIZE) - -/* - * Define starting physical address of host memory address space - */ -#define INIT_PA 0 +int virtio_set_status(VirtIODevice *vdev, uint8_t val); +void virtio_queue_update_rings(VirtIODevice *vdev, int n); +void virtio_queue_set_num(VirtIODevice *vdev, int n, int num); +bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq); +uint64_t virtio_queue_get_addr(VirtIODevice *vdev, int n); +void virtio_queue_set_addr(VirtIODevice *vdev, int n, uint64_t addr); +void virtio_set_started(VirtIODevice *vdev, bool started); #endif /* VIRTIO_LOOPBACK */ diff --git a/src/adapter/adapter.c b/src/adapter/adapter.c index 8360ffb..55de251 100644 --- a/src/adapter/adapter.c +++ b/src/adapter/adapter.c @@ -1,5 +1,5 @@ /* - * Copyright 2022-2023 Virtual Open Systems SAS + * Copyright 2022-2024 Virtual Open Systems SAS * * Authors: * Timos Ampelikiotis <t.ampelikiotis@virtualopensystems.com> diff --git a/src/common/event_notifier.c b/src/common/event_notifier.c index d4b08f7..a74b86d 100644 --- a/src/common/event_notifier.c +++ b/src/common/event_notifier.c @@ -16,7 +16,7 @@ * Copyright (c) 2003-2008 Fabrice Bellard * Copyright (c) 2010 Red Hat, Inc. * - * Copyright 2023 Virtual Open Systems SAS. + * Copyright 2023-2024 Virtual Open Systems SAS. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/devices/vhost_user_blk.c b/src/devices/vhost_user_blk.c index bba954b..201d81f 100644 --- a/src/devices/vhost_user_blk.c +++ b/src/devices/vhost_user_blk.c @@ -11,7 +11,7 @@ * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com> * Nicholas Bellinger <nab@risingtidesystems.com> * - * Copyright (c) 2022-2023 Virtual Open Systems SAS. + * Copyright (c) 2022-2024 Virtual Open Systems SAS. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -64,80 +64,6 @@ static const int user_feature_bits[] = { VIRTIO_BLK_F_MQ, }; -static void vhost_user_blk_start(VirtIODevice *vdev) -{ - VHostUserBlk *s = vdev->vhublk; - VirtioBus *k = vdev->vbus; - unsigned int i; - int ret; - - DBG("vhost_user_blk_start\n"); - - if (!k->set_guest_notifiers) { - DBG("binding does not support guest notifiers\n"); - goto error; - } - - ret = vhost_dev_enable_notifiers(s->vhost_dev, vdev); - if (ret < 0) { - DBG("Error enabling host notifiers\n"); - goto error; - } - - ret = k->set_guest_notifiers(k->vdev, s->vhost_dev->nvqs, true); - if (ret < 0) { - DBG("Error enabling host notifier\n"); - goto error; - } - - s->vhost_dev->acked_features = vdev->guest_features; - DBG("acked_features: 0x%lx\n", vdev->guest_features); - - /* FIXME: We might do not need that */ - ret = vhost_dev_prepare_inflight(s->vhost_dev, vdev); - if (ret < 0) { - DBG("Error setting inflight format\n"); - goto error; - } - - if (!s->inflight->addr) { - ret = vhost_dev_get_inflight(s->vhost_dev, s->queue_size, s->inflight); - if (ret < 0) { - DBG("Error getting inflight\n"); - goto error; - } - } - - ret = vhost_dev_set_inflight(s->vhost_dev, s->inflight); - if (ret < 0) { - DBG("Error setting inflight\n"); - goto error; - } - - ret = vhost_dev_start(s->vhost_dev, vdev, true); - if (ret < 0) { - DBG("Error starting vhost\n"); - goto error; - } - - s->started_vu = true; - - /* - * guest_notifier_mask/pending not used yet, so just unmask - * everything here. virtio-pci will do the right thing by - * enabling/disabling irqfd. - */ - for (i = 0; i < s->vhost_dev->nvqs; i++) { - vhost_virtqueue_mask(s->vhost_dev, vdev, i, false); - } - - DBG("vhost_user_blk_start returns successfully\n"); - return; - -error: - -} - static int vhost_user_blk_handle_config_change(struct vhost_dev *dev) { int ret; @@ -319,7 +245,6 @@ static void vhost_user_dev_class_init_extra(VirtIODevice *vdev) DBG("virtio_dev_class_init\n"); vdev->vdev_class->realize = vhost_user_blk_realize; - vdev->vdev_class->start = vhost_user_blk_start; vdev->vdev_class->get_config = vhost_user_blk_update_config; vdev->vdev_class->set_config = vhost_user_blk_set_config; vdev->vdev_class->print_config = print_config_blk; diff --git a/src/devices/vhost_user_can.c b/src/devices/vhost_user_can.c index 091637c..5dff77c 100644 --- a/src/devices/vhost_user_can.c +++ b/src/devices/vhost_user_can.c @@ -5,7 +5,7 @@ * https://github.com/OpenSynergy/qemu/tree/virtio-can-spec-rfc-v3 * * Copyright (C) 2021-2023 OpenSynergy GmbH - * Copyright (c) 2023 Virtual Open Systems SAS. + * Copyright (c) 2023-2024 Virtual Open Systems SAS. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/devices/vhost_user_console.c b/src/devices/vhost_user_console.c index d640b22..500f87c 100644 --- a/src/devices/vhost_user_console.c +++ b/src/devices/vhost_user_console.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Virtual Open Systems SAS. + * Copyright (c) 2023-2024 Virtual Open Systems SAS. * * This work is licensed under the terms of the GNU GPL, version 2 or * (at your option) any later version. See the COPYING file in the diff --git a/src/devices/vhost_user_gpio.c b/src/devices/vhost_user_gpio.c index 0858020..937f702 100644 --- a/src/devices/vhost_user_gpio.c +++ b/src/devices/vhost_user_gpio.c @@ -3,7 +3,7 @@ * * Copyright (c) 2022 Viresh Kumar <viresh.kumar@linaro.org> * - * Copyright (c) 2023 Virtual Open Systems SAS. + * Copyright (c) 2023-2024 Virtual Open Systems SAS. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/devices/vhost_user_rng.c b/src/devices/vhost_user_rng.c index b58fa59..0b68550 100644 --- a/src/devices/vhost_user_rng.c +++ b/src/devices/vhost_user_rng.c @@ -3,7 +3,7 @@ * * Copyright (c) 2021 Mathieu Poirier <mathieu.poirier@linaro.org> * - * Copyright (c) 2022-2023 Virtual Open Systems SAS. + * Copyright (c) 2022-2024 Virtual Open Systems SAS. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/src/devices/vhost_user_sound.c b/src/devices/vhost_user_sound.c index cd3881f..7a5498a 100644 --- a/src/devices/vhost_user_sound.c +++ b/src/devices/vhost_user_sound.c @@ -3,7 +3,7 @@ * * Copyright 2020 Red Hat, Inc. * - * Copyright (c) 2023 Virtual Open Systems SAS. + * Copyright (c) 2023-2024 Virtual Open Systems SAS. * * This work is licensed under the terms of the GNU GPL, version 2 or * (at your option) any later version. See the COPYING file in the diff --git a/src/lib/vhost_loopback.c b/src/lib/vhost_loopback.c index 3ea18b7..393a1fa 100644 --- a/src/lib/vhost_loopback.c +++ b/src/lib/vhost_loopback.c @@ -11,7 +11,7 @@ * Authors: * Michael S. Tsirkin <mst@redhat.com> * - * Copyright 2022-2023 Virtual Open Systems SAS. + * Copyright 2022-2024 Virtual Open Systems SAS. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -68,6 +68,9 @@ */ int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) { + struct adapter_dev *adev = to_adapter_device(hdev, vdev); + VirtQueue *vq; + struct vq_notifier vq_notifier; unsigned int i; int r; @@ -76,13 +79,6 @@ int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) * doesn't interfere. */ - /* TODO: Check if this is still useful */ - r = virtio_device_grab_ioeventfd(vdev); - if (r < 0) { - DBG("binding does not support host notifiers\n"); - goto fail; - } - for (i = 0; i < hdev->nvqs; ++i) { r = virtio_bus_set_host_notifier(vdev->vbus, hdev->vq_index + i, @@ -91,6 +87,18 @@ int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) DBG("vhost VQ %d notifier binding failed: %d", i, r); goto fail; } + + /* + * Provide those notifier to the virtio-loopback driver: + * + * The driver will kick those notifiers without passing + * through the adapter anymore. + */ + VirtQueue *vq = virtio_get_queue(vdev, i); + vq_notifier.pid = getpid(); + vq_notifier.vq_index = i; + vq_notifier.notifier_fd = virtio_queue_get_host_notifier(vq)->wfd; + ioctl(adev->loopback_fd, SHARE_VQS_NOTIF, &vq_notifier); } return 0; diff --git a/src/lib/virtio_loopback.c b/src/lib/virtio_loopback.c index 1b57794..6f84bc3 100644 --- a/src/lib/virtio_loopback.c +++ b/src/lib/virtio_loopback.c @@ -18,7 +18,7 @@ * Peter Maydell <peter.maydell@linaro.org> * * - * Copyright 2022-2023 Virtual Open Systems SAS. + * Copyright 2022-2024 Virtual Open Systems SAS. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License; either version 2 @@ -67,784 +67,6 @@ #define DBG(...) #endif /* DEBUG */ -void virtio_add_feature(uint64_t *features, unsigned int fbit) -{ - *features |= (1ULL << fbit); -} - -bool virtio_has_feature(uint64_t features, unsigned int fbit) -{ - return !!(features & (1ULL << fbit)); -} - -static int virtio_validate_features(VirtIODevice *vdev) -{ - if (virtio_has_feature(vdev->host_features, VIRTIO_F_IOMMU_PLATFORM) && - !virtio_has_feature(vdev->guest_features, VIRTIO_F_IOMMU_PLATFORM)) { - return -EFAULT; - } - - return 0; -} - -bool virtio_device_should_start(VirtIODevice *vdev, uint8_t status) -{ - if (!vdev->vm_running) { - return false; - } - - return virtio_device_started(vdev, status); -} - -bool virtio_device_started(VirtIODevice *vdev, uint8_t status) -{ - - DBG("virtio_device_started: %d\n", status & VIRTIO_CONFIG_S_DRIVER_OK); - DBG("status: %d\n", status); - - (void)vdev; - - return status & VIRTIO_CONFIG_S_DRIVER_OK; -} -void virtio_set_started(VirtIODevice *vdev, bool started) -{ - if (started) { - vdev->start_on_kick = false; - } - - if (vdev->use_started) { - vdev->started = started; - } -} - -int virtio_set_status(VirtIODevice *vdev, uint8_t val) -{ - VirtioDeviceClass *k = vdev->vdev_class; - - DBG("virtio_set_status(...)\n"); - - if (virtio_has_feature(vdev->guest_features, VIRTIO_F_VERSION_1)) { - if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) && - val & VIRTIO_CONFIG_S_FEATURES_OK) { - int ret = virtio_validate_features(vdev); - - if (ret) { - return ret; - } - } - } - - if ((vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) != - (val & VIRTIO_CONFIG_S_DRIVER_OK)) { - virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK); - } - - DBG("set vdev->status:%u\n", vdev->status); - - if (k->set_status) { - k->set_status(vdev, val); - } - - vdev->status = val; - - return 0; -} - -uint64_t vring_align(uint64_t addr, unsigned long align) -{ - return QEMU_ALIGN_UP(addr, align); -} - -uint64_t virtio_queue_get_desc_size(VirtIODevice *vdev, int n) -{ - return sizeof(VRingDesc) * vdev->vq[n].vring.num; -} - -uint64_t virtio_queue_get_desc_addr(VirtIODevice *vdev, int n) -{ - return vdev->vq[n].vring.desc; -} - -uint64_t virtio_queue_get_avail_addr(VirtIODevice *vdev, int n) -{ - return vdev->vq[n].vring.avail; -} - -uint64_t virtio_queue_get_used_addr(VirtIODevice *vdev, int n) -{ - return vdev->vq[n].vring.used; -} - -int virtio_queue_get_num(VirtIODevice *vdev, int n) -{ - return vdev->vq[n].vring.num; -} - -uint64_t virtio_queue_get_avail_size(VirtIODevice *vdev, int n) -{ - int s; - - s = virtio_has_feature(vdev->guest_features, - VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; - return offsetof(VRingAvail, ring) + - sizeof(uint16_t) * vdev->vq[n].vring.num + s; -} - -uint64_t virtio_queue_get_used_size(VirtIODevice *vdev, int n) -{ - int s; - - s = virtio_has_feature(vdev->guest_features, - VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; - return offsetof(VRingUsed, ring) + - sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s; -} - -/* virt queue functions */ -void virtio_queue_update_rings(VirtIODevice *vdev, int n) -{ - VRing *vring = &vdev->vq[n].vring; - - if (!vring->num || !vring->desc || !vring->align) { - /* not yet setup -> nothing to do */ - return; - } - vring->avail = vring->desc + vring->num * sizeof(VRingDesc); - vring->used = vring_align(vring->avail + - offsetof(VRingAvail, ring[vring->num]), - vring->align); -} - -static uint16_t virtio_queue_split_get_last_avail_idx(VirtIODevice *vdev, - int n) -{ - return vdev->vq[n].last_avail_idx; -} - -unsigned int virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n) -{ - return virtio_queue_split_get_last_avail_idx(vdev, n); -} - -void virtio_queue_set_num(VirtIODevice *vdev, int n, int num) -{ - /* - * Don't allow guest to flip queue between existent and - * nonexistent states, or to set it to an invalid size. - */ - if (!!num != !!vdev->vq[n].vring.num || - num > VIRTQUEUE_MAX_SIZE || - num < 0) { - return; - } - vdev->vq[n].vring.num = num; -} - -uint64_t virtio_queue_get_addr(VirtIODevice *vdev, int n) -{ - return vdev->vq[n].vring.desc; -} - -void virtio_queue_set_addr(VirtIODevice *vdev, int n, uint64_t addr) -{ - if (!vdev->vq[n].vring.num) { - return; - } - vdev->vq[n].vring.desc = addr; - virtio_queue_update_rings(vdev, n); -} - -int virtio_queue_ready(VirtQueue *vq) -{ - return vq->vring.avail != 0; -} - -uint16_t vring_avail_idx(VirtQueue *vq) -{ - vq->shadow_avail_idx = ((VRingAvail *)vq->vring.avail)->idx; - - return vq->shadow_avail_idx; -} - -uint16_t vring_avail_ring(VirtQueue *vq, int i) -{ - return ((VRingAvail *)vq->vring.avail)->ring[i]; -} - -int virtio_queue_split_empty(VirtQueue *vq) -{ - bool empty; - - if (!vq->vring.avail) { - return 1; - } - - if (vq->shadow_avail_idx != vq->last_avail_idx) { - return 0; - } - - empty = vring_avail_idx(vq) == vq->last_avail_idx; - return empty; -} - -int virtio_queue_empty(VirtQueue *vq) -{ - return virtio_queue_split_empty(vq); -} - -size_t iov_from_buf_full(const struct iovec *iov, unsigned int iov_cnt, - size_t offset, const void *buf, size_t bytes) -{ - size_t done; - unsigned int i; - for (i = 0, done = 0; (offset || done < bytes) && i < iov_cnt; i++) { - if (offset < iov[i].iov_len) { - size_t len = MIN(iov[i].iov_len - offset, bytes - done); - memcpy(iov[i].iov_base + offset, buf + done, len); - done += len; - offset = 0; - } else { - offset -= iov[i].iov_len; - } - } - return done; -} - -size_t qemu_iov_from_buf(const struct iovec *iov, unsigned int iov_cnt, - size_t offset, const void *buf, size_t bytes) -{ - if (__builtin_constant_p(bytes) && iov_cnt && - offset <= iov[0].iov_len && bytes <= iov[0].iov_len - offset) { - memcpy(iov[0].iov_base + offset, buf, bytes); - return bytes; - } else { - return iov_from_buf_full(iov, iov_cnt, offset, buf, bytes); - } -} - -/* Called within rcu_read_lock(). */ -static inline uint16_t vring_avail_flags(VirtQueue *vq) -{ - return ((VRingAvail *)vq->vring.avail)->flags; -} - -/* Called within rcu_read_lock(). */ -static inline uint16_t vring_get_used_event(VirtQueue *vq) -{ - return vring_avail_ring(vq, vq->vring.num); -} - -/* The following is used with USED_EVENT_IDX and AVAIL_EVENT_IDX */ -/* - * Assuming a given event_idx value from the other side, if - * we have just incremented index from old to new_idx, - * should we trigger an event? - */ -//static inline int vring_need_event(uint16_t event_idx, -//static int vring_need_event(uint16_t event_idx, -// uint16_t new_idx, uint16_t old) -//{ -// /* -// * Note: Xen has similar logic for notification hold-off -// * in include/xen/interface/io/ring.h with req_event and req_prod -// * corresponding to event_idx + 1 and new_idx respectively. -// * Note also that req_event and req_prod in Xen start at 1, -// * event indexes in virtio start at 0. -// */ -// return (uint16_t)(new_idx - event_idx - 1) < (uint16_t)(new_idx - old); -//} - -/* Called within rcu_read_lock(). */ -static bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq) -{ - uint16_t old, new; - bool v; - - /* Always notify when queue is empty (when feature acknowledge) */ - if (virtio_has_feature(vdev->guest_features, VIRTIO_F_NOTIFY_ON_EMPTY) && - !vq->inuse && virtio_queue_empty(vq)) { - return true; - } - - if (!virtio_has_feature(vdev->guest_features, VIRTIO_RING_F_EVENT_IDX)) { - return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT); - } - - v = vq->signalled_used_valid; - vq->signalled_used_valid = true; - old = vq->signalled_used; - new = vq->signalled_used = vq->used_idx; - return !v || vring_need_event(vring_get_used_event(vq), new, old); -} - -/* Called within rcu_read_lock(). */ -static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq) -{ - return virtio_split_should_notify(vdev, vq); -} - -void virtio_set_isr(VirtIODevice *vdev, int value) -{ - uint8_t old = vdev->isr; - - /* - * Do not write ISR if it does not change, so that its cacheline remains - * shared in the common case where the guest does not read it. - */ - if ((old & value) != value) { - vdev->isr |= value; - } - - DBG("Update isr: %d\n", vdev->isr); -} - -static void virtio_irq(VirtQueue *vq) -{ - virtio_set_isr(vq->vdev, 0x1); - virtio_notify_vector(vq->vdev); -} - -void virtio_notify_config(VirtIODevice *vdev) -{ - - DBG("virtio_notify_config\n"); - - if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { - return; - } - - virtio_set_isr(vdev, 0x3); - vdev->generation++; - /* - * MMIO does not use vector parameter: - * virtio_notify_vector(vdev, vdev->config_vector); - */ - virtio_notify_vector(vdev); -} - -void virtio_notify(VirtIODevice *vdev, VirtQueue *vq) -{ - if (!virtio_should_notify(vdev, vq)) { - DBG("Do not notify!\n"); - return; - } - - virtio_irq(vq); -} - -static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem, int i) -{ - VRingUsed *used = (VRingUsed *)vq->vring.used; - - used->ring[i] = *uelem; -} - -void virtqueue_split_fill(VirtQueue *vq, const VirtQueueElement *elem, - unsigned int len, unsigned int idx) -{ - VRingUsedElem uelem; - - if (!vq->vring.used) { - return; - } - - idx = (idx + vq->used_idx) % vq->vring.num; - - uelem.id = elem->index; - uelem.len = len; - vring_used_write(vq, &uelem, idx); -} - -void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, - unsigned int len, unsigned int idx) -{ - virtqueue_split_fill(vq, elem, len, idx); -} - -static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val) -{ - ((VRingUsed *)vq->vring.used)->idx = val; - vq->used_idx = val; -} - -static void virtqueue_split_flush(VirtQueue *vq, unsigned int count) -{ - uint16_t old, new; - - if (!vq->vring.used) { - return; - } - - old = vq->used_idx; - new = old + count; - vring_used_idx_set(vq, new); - vq->inuse -= count; - if ((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)) { - vq->signalled_used_valid = false; - } -} - -void virtqueue_flush(VirtQueue *vq, unsigned int count) -{ - virtqueue_split_flush(vq, count); -} - -void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, - unsigned int len) -{ - virtqueue_fill(vq, elem, len, 0); - virtqueue_flush(vq, 1); -} - -void vring_set_avail_event(VirtQueue *vq, uint16_t val) -{ - uint16_t *avail; - - avail = (uint16_t *)&((VRingUsed *)vq->vring.used)->ring[vq->vring.num]; - *avail = val; -} - -static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg, - uint64_t *addr, struct iovec *iov, - unsigned int max_num_sg, bool is_write, - uint64_t pa, size_t sz) -{ - (void)vdev; - (void)p_num_sg; - (void)addr; - (void)iov; - (void)max_num_sg; - (void)is_write; - (void)pa; - (void)sz; - - DBG("virtqueue_map_desc: Not implemented\n"); - - return true; -} - -static void *virtqueue_alloc_element(size_t sz, unsigned out_num, - unsigned in_num) -{ - VirtQueueElement *elem; - size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0])); - size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]); - size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]); - size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0])); - size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]); - size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]); - - /* - * TODO: Add check for requested size - * - * assert(sz >= sizeof(VirtQueueElement)); - */ - elem = malloc(out_sg_end); - elem->out_num = out_num; - elem->in_num = in_num; - elem->in_addr = (void *)elem + in_addr_ofs; - elem->out_addr = (void *)elem + out_addr_ofs; - elem->in_sg = (void *)elem + in_sg_ofs; - elem->out_sg = (void *)elem + out_sg_ofs; - return elem; -} - -void *virtqueue_split_pop(VirtQueue *vq, size_t sz) -{ - unsigned int i, head, max; - VirtIODevice *vdev = vq->vdev; - VirtQueueElement *elem = NULL; - unsigned out_num, in_num, elem_entries; - uint64_t addr[VIRTQUEUE_MAX_SIZE]; - struct iovec iov[VIRTQUEUE_MAX_SIZE]; - VRingDesc *desc; - int rc; - - if (virtio_queue_split_empty(vq)) { - goto done; - } - - /* When we start there are none of either input nor output. */ - out_num = in_num = elem_entries = 0; - - max = vq->vring.num; - - if (vq->inuse >= vq->vring.num) { - DBG("Virtqueue size exceeded\n"); - goto done; - } - - if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) { - goto done; - } - - if (virtio_has_feature(vdev->guest_features, VIRTIO_RING_F_EVENT_IDX)) { - vring_set_avail_event(vq, vq->last_avail_idx); - } - - i = head; - - desc = (VRingDesc *)vq->vring.desc + i; - - /* Collect all the descriptors */ - do { - bool map_ok; - - if (desc->flags & VRING_DESC_F_WRITE) { - map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num, - iov + out_num, - VIRTQUEUE_MAX_SIZE - out_num, true, - desc->addr, desc->len); - } else { - if (in_num) { - DBG("Incorrect order for descriptors\n"); - goto err_undo_map; - } - map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov, - VIRTQUEUE_MAX_SIZE, false, - desc->addr, desc->len); - } - if (!map_ok) { - goto err_undo_map; - } - - /* If we've got too many, that implies a descriptor loop. */ - if (++elem_entries > max) { - goto err_undo_map; - } - - rc = virtqueue_split_read_next_desc(vdev, desc, max, &i); - } while (rc == VIRTQUEUE_READ_DESC_MORE); - - if (rc == VIRTQUEUE_READ_DESC_ERROR) { - goto err_undo_map; - } - - /* Now copy what we have collected and mapped */ - elem = virtqueue_alloc_element(sz, out_num, in_num); - elem->index = head; - elem->ndescs = 1; - for (i = 0; i < out_num; i++) { - elem->out_addr[i] = addr[i]; - elem->out_sg[i] = iov[i]; - } - for (i = 0; i < in_num; i++) { - elem->in_addr[i] = addr[out_num + i]; - elem->in_sg[i] = iov[out_num + i]; - } - - vq->inuse++; - -done: - return elem; - -err_undo_map: - goto done; -} - -void *virtqueue_pop(VirtQueue *vq, size_t sz) -{ - return virtqueue_split_pop(vq, sz); -} - -bool virtqueue_get_head(VirtQueue *vq, unsigned int idx, - unsigned int *head) -{ - - /* - * Grab the next descriptor number they're advertising, and increment - * the index we've seen. - */ - *head = vring_avail_ring(vq, idx % vq->vring.num); - - /* If their number is silly, that's a fatal mistake. */ - if (*head >= vq->vring.num) { - DBG("Guest says index %u is available", *head); - return false; - } - - return true; -} - -uint32_t get_vqs_max_size(VirtIODevice *vdev) -{ - uint32_t total_size, temp_size, total_p2 = 1; - int log_res = 0; - - (void)vdev; - - total_size = VIRTQUEUE_MAX_SIZE * sizeof(VRingDesc); - total_size += offsetof(VRingAvail, ring) + - VIRTQUEUE_MAX_SIZE * sizeof(uint16_t); - total_size += offsetof(VRingUsed, ring) + - VIRTQUEUE_MAX_SIZE * sizeof(uint16_t); - - temp_size = total_size; - - /* Compute log2 of total_size (Needs to be power of 2) */ - while ((temp_size /= 2) > 0) { - log_res++; - total_p2 *= 2; - } - - /* if total_size is not a power of 2: (total_size > 8) -> 16 */ - if (total_size > total_p2) { - total_size = 2 * total_p2; - } - - /* - * Align to page size: This needed only in case total_size - * is less than 4096 (PAGE_SIZE) - */ - if (total_size % PAGE_SIZE > 0) { - total_size = (total_size / PAGE_SIZE) * PAGE_SIZE + PAGE_SIZE; - } - - DBG("Total vqs size to mmap is: %u\n", total_size); - - return total_size; -} - -int virtqueue_num_heads(VirtQueue *vq, unsigned int idx) -{ - uint16_t num_heads = vring_avail_idx(vq) - idx; - - /* Check it isn't doing very strange things with descriptor numbers. */ - if (num_heads > vq->vring.num) { - DBG("Guest moved used index from %u to %u", - idx, vq->shadow_avail_idx); - return -EINVAL; - } - - return num_heads; -} - -int virtqueue_split_read_next_desc(VirtIODevice *vdev, VRingDesc *desc, - unsigned int max, unsigned int *next) -{ - (void)vdev; - - /* If this descriptor says it doesn't chain, we're done. */ - if (!(desc->flags & VRING_DESC_F_NEXT)) { - return VIRTQUEUE_READ_DESC_DONE; - } - - /* Check they're not leading us off end of descriptors. */ - *next = desc->next; - - if (*next >= max) { - DBG("Desc next is %u", *next); - return VIRTQUEUE_READ_DESC_ERROR; - } - - desc = (VRingDesc *)desc + *next; - return VIRTQUEUE_READ_DESC_MORE; -} - -static void virtqueue_split_get_avail_bytes(VirtQueue *vq, - unsigned int *in_bytes, unsigned int *out_bytes, - unsigned max_in_bytes, unsigned max_out_bytes) -{ - VirtIODevice *vdev = vq->vdev; - unsigned int max, idx; - unsigned int total_bufs, in_total, out_total; - int rc; - - idx = vq->last_avail_idx; - total_bufs = in_total = out_total = 0; - - max = vq->vring.num; - - while ((rc = virtqueue_num_heads(vq, idx)) > 0) { - unsigned int num_bufs; - VRingDesc *desc; - unsigned int i; - - num_bufs = total_bufs; - - if (!virtqueue_get_head(vq, idx++, &i)) { - goto err; - } - - /* there is no need to copy anything form the cache struct */ - desc = (VRingDesc *)vq->vring.desc + i; - - if (desc->flags & VRING_DESC_F_INDIRECT) { - if (!desc->len || (desc->len % sizeof(VRingDesc))) { - DBG("Invalid size for indirect buffer table\n"); - goto err; - } - - /* If we've got too many, that implies a descriptor loop. */ - if (num_bufs >= max) { - goto err; - } - } - - do { - /* If we've got too many, that implies a descriptor loop. */ - if (++num_bufs > max) { - goto err; - } - - if (desc->flags & VRING_DESC_F_WRITE) { - in_total += desc->len; - } else { - out_total += desc->len; - } - if (in_total >= max_in_bytes && out_total >= max_out_bytes) { - goto done; - } - - rc = virtqueue_split_read_next_desc(vdev, desc, max, &i); - } while (rc == VIRTQUEUE_READ_DESC_MORE); - - if (rc == VIRTQUEUE_READ_DESC_ERROR) { - goto err; - } - - total_bufs = num_bufs; - } - - if (rc < 0) { - goto err; - } - -done: - if (in_bytes) { - *in_bytes = in_total; - } - if (out_bytes) { - *out_bytes = out_total; - } - return; - -err: - in_total = out_total = 0; - goto done; -} - -void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, - unsigned int *out_bytes, - unsigned max_in_bytes, unsigned max_out_bytes) -{ - if (!vq->vring.desc) { - goto err; - } - - virtqueue_split_get_avail_bytes(vq, in_bytes, out_bytes, - max_in_bytes, max_out_bytes); - - return; -err: - if (in_bytes) { - *in_bytes = 0; - } - if (out_bytes) { - *out_bytes = 0; - } -} - void print_neg_flag(uint64_t neg_flag, bool read) { if (read) { @@ -957,6 +179,112 @@ void print_neg_flag(uint64_t neg_flag, bool read) } } +/* Called within rcu_read_lock(). */ +static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq) +{ + return virtio_split_should_notify(vdev, vq); +} + +void virtio_set_isr(VirtIODevice *vdev, int value) +{ + uint8_t old = vdev->isr; + + /* + * Do not write ISR if it does not change, so that its cacheline remains + * shared in the common case where the guest does not read it. + */ + if ((old & value) != value) { + vdev->isr |= value; + } + + DBG("Update isr: %d\n", vdev->isr); +} + +static void virtio_irq(VirtQueue *vq) +{ + pthread_mutex_lock(&vq->vdev->isr_lock); + virtio_set_isr(vq->vdev, 0x1); + pthread_mutex_unlock(&vq->vdev->isr_lock); + virtio_notify_vector(vq->vdev); +} + +void virtio_notify_config(VirtIODevice *vdev) +{ + + DBG("virtio_notify_config\n"); + + if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { + return; + } + + pthread_mutex_lock(&vdev->isr_lock); + virtio_set_isr(vdev, 0x3); + pthread_mutex_unlock(&vdev->isr_lock); + vdev->generation++; + /* + * MMIO does not use vector parameter: + * virtio_notify_vector(vdev, vdev->config_vector); + */ + virtio_notify_vector(vdev); +} + +void virtio_loopback_update_irq(VirtIODevice *vdev) +{ + int level, irq_num = 44; + struct adapter_dev *adev = to_adapter_device(vdev, virtio_dev); + + if (!vdev) { + return; + } + + level = (vdev->isr != 0); + + if (level == 1) { + DBG("Trigger interrupt (ioctl)\n"); + DBG("Interrupt counter: %d\n", vdev->int_count++); + (void) ioctl(adev->loopback_fd, IRQ, &irq_num); + } else { + DBG("No interrupt\n"); + } +} + +bool virtio_device_disabled(VirtIODevice *vdev) +{ + return vdev->disabled || vdev->broken; +} + +/* virtio device */ +void virtio_notify_vector(VirtIODevice *vdev) +{ + + /* TODO: Check if this is still needed */ + if (virtio_device_disabled(vdev)) { + DBG("Device is disabled\n"); + return; + } + + virtio_loopback_update_irq(vdev); + + /* + * TODO: substitue the previous line with the + * following when it's implemented + * + * if (k->notify) { + * k->notify(qbus->parent, vector); + * } + */ +} + +void virtio_notify(VirtIODevice *vdev, VirtQueue *vq) +{ + if (!virtio_should_notify(vdev, vq)) { + DBG("Do not notify!\n"); + return; + } + + virtio_irq(vq); +} + int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val) { bool bad = (val & ~(vdev->host_features)) != 0; @@ -983,7 +311,6 @@ int virtio_set_features(VirtIODevice *vdev, uint64_t val) return ret; } -/* TODO: MMIO notifiers -- This might not be needed anymore */ static void virtio_queue_guest_notifier_read(EventNotifier *n) { VirtQueue *vq = container_of(n, VirtQueue, guest_notifier); @@ -1000,6 +327,7 @@ void *loopback_event_select(void *_e) int rfd = e->rfd; VirtQueue *vq = container_of(e, VirtQueue, guest_notifier); VirtIODevice *vdev = vq->vdev; + struct adapter_dev *adev = to_adapter_device(vdev, virtio_dev); DBG("\nWaiting event from vhost-user-device\n"); @@ -1022,90 +350,48 @@ void *loopback_event_select(void *_e) "(eventfd: %d) -> event_count: %d (select value: %d)\n\n", rfd, vdev->eventfd_count, retval); - if (event_notifier_test_and_clear(e)) { - vdev->eventfd_count++; - virtio_irq(vq); - } + vdev->eventfd_count++; + virtio_queue_guest_notifier_read(e); pthread_mutex_unlock(&vdev->interrupt_lock); } } } -void event_notifier_set_handler(EventNotifier *e, - void *handler) -{ - int ret; - pthread_t thread_id; - (void)handler; - - if (e->wfd > 0) { - ret = pthread_create(&thread_id, NULL, loopback_event_select, - (void *)e); - if (ret != 0) { - exit(1); - } - } -} - -void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign, - bool with_irqfd) -{ - if (assign && !with_irqfd) { - event_notifier_set_handler(&vq->guest_notifier, - virtio_queue_guest_notifier_read); - } else { - event_notifier_set_handler(&vq->guest_notifier, NULL); - } - if (!assign) { - /* - * Test and clear notifier before closing it, - * in case poll callback didn't have time to run. - */ - virtio_queue_guest_notifier_read(&vq->guest_notifier); - } -} - EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq) { return &vq->guest_notifier; } -int virtio_loopback_set_guest_notifier(VirtIODevice *vdev, int n, bool assign, - bool with_irqfd) -{ - VirtQueue *vq = virtio_get_queue(vdev, n); - EventNotifier *notifier = virtio_queue_get_guest_notifier(vq); - - if (assign) { - int r = event_notifier_init(notifier, 0); - if (r < 0) { - return r; - } - virtio_queue_set_guest_notifier_fd_handler(vq, true, with_irqfd); - } else { - virtio_queue_set_guest_notifier_fd_handler(vq, false, with_irqfd); - } - - return 0; -} - int virtio_loopback_set_guest_notifiers(VirtIODevice *vdev, int nvqs, bool assign) { bool with_irqfd = false; + pthread_t thread_id; int r, n; + (void)assign; nvqs = MIN(nvqs, VIRTIO_QUEUE_MAX); for (n = 0; n < nvqs; n++) { + if (!virtio_queue_get_num(vdev, n)) { break; } - r = virtio_loopback_set_guest_notifier(vdev, n, assign, with_irqfd); + VirtQueue *vq = virtio_get_queue(vdev, n); + EventNotifier *notifier = virtio_queue_get_guest_notifier(vq); + int r = event_notifier_init(notifier, 0); if (r < 0) { goto assign_error; } + + if (vq->guest_notifier.wfd > 0) { + r = pthread_create(&thread_id, NULL, loopback_event_select, + (void *)&vq->guest_notifier); + if (r != 0) { + goto assign_error; + } + } } return 0; @@ -1130,118 +416,21 @@ int virtio_bus_set_host_notifier(VirtioBus *vbus, int n, bool assign) VirtIODevice *vdev = vbus->vdev; struct adapter_dev *adev = to_adapter_device(vdev, virtio_dev); VirtQueue *vq = virtio_get_queue(vdev, n); - EventNotifier *notifier = virtio_queue_get_host_notifier(vq); int r = 0; - if (!vbus->ioeventfd_assign) { - return -ENOSYS; - } - if (assign) { r = event_notifier_init(notifier, 1); if (r < 0) { DBG("unable to init event notifier: %d", r); return r; } - r = vbus->ioeventfd_assign(&adev->proxy, notifier, n, true); - if (r < 0) { - DBG("unable to assign ioeventfd: %d", r); - } - } else { - vbus->ioeventfd_assign(&adev->proxy, notifier, n, false); - } - - if (r == 0) { - virtio_queue_set_host_notifier_enabled(vq, assign); } + virtio_queue_set_host_notifier_enabled(vq, assign); return r; } -/* On success, ioeventfd ownership belongs to the caller. */ -int virtio_bus_grab_ioeventfd(VirtioBus *bus) -{ - /* - * vhost can be used even if ioeventfd=off in the proxy device, - * so do not check k->ioeventfd_enabled. - */ - if (!bus->ioeventfd_assign) { - return -ENOSYS; - } - - if (bus->ioeventfd_grabbed == 0 && bus->ioeventfd_started) { - /* - * Remember that we need to restart ioeventfd - * when ioeventfd_grabbed becomes zero. - */ - bus->ioeventfd_started = true; - } - bus->ioeventfd_grabbed++; - return 0; -} - -int virtio_device_grab_ioeventfd(VirtIODevice *vdev) -{ - return virtio_bus_grab_ioeventfd(vdev->vbus); -} - -bool virtio_device_disabled(VirtIODevice *vdev) -{ - return vdev->disabled || vdev->broken; -} - -void virtio_loopback_update_irq(VirtIODevice *vdev) -{ - int level, irq_num = 44; - struct adapter_dev *adev = to_adapter_device(vdev, virtio_dev); - - if (!vdev) { - return; - } - - level = (vdev->isr != 0); - - if (!((level == 1) && (vdev->prev_level == 0))) { - DBG("No interrupt\n"); - vdev->prev_level = level; - return; - } - vdev->prev_level = level; - - DBG("Trigger interrupt (ioctl)\n"); - DBG("Interrupt counter: %d\n", vdev->int_count++); - - (void) ioctl(adev->loopback_fd, IRQ, &irq_num); -} - -/* virtio device */ -void virtio_notify_vector(VirtIODevice *vdev) -{ - - /* TODO: Check if this is still needed */ - if (virtio_device_disabled(vdev)) { - DBG("Device is disabled\n"); - return; - } - - virtio_loopback_update_irq(vdev); - - /* - * TODO: substitue the previous line with the - * following when it's implemented - * - * if (k->notify) { - * k->notify(qbus->parent, vector); - * } - */ -} - -void virtio_update_irq(VirtIODevice *vdev) -{ - virtio_notify_vector(vdev); -} - void virtio_queue_notify(VirtIODevice *vdev, int n) { VirtQueue *vq = &vdev->vq[n]; @@ -1471,7 +660,11 @@ static uint64_t virtio_loopback_read(VirtIODevice *vdev, uint64_t offset, DBG("VIRTIO_MMIO_QUEUE_READY: Not implemented case\n"); return 0; case VIRTIO_MMIO_INTERRUPT_STATUS: - return vdev->isr; + int temp; + pthread_mutex_lock(&vdev->isr_lock); + temp = vdev->isr; + pthread_mutex_unlock(&vdev->isr_lock); + return temp; case VIRTIO_MMIO_STATUS: DBG("Read VIRTIO_MMIO_STATUS: %d\n", vdev->status); return vdev->status; @@ -1663,8 +856,10 @@ void virtio_loopback_write(VirtIODevice *vdev, uint64_t offset, } break; case VIRTIO_MMIO_INTERRUPT_ACK: + pthread_mutex_lock(&vdev->isr_lock); vdev->isr = vdev->isr & ~value; - virtio_update_irq(vdev); + pthread_mutex_unlock(&vdev->isr_lock); + virtio_notify_vector(vdev); break; case VIRTIO_MMIO_STATUS: @@ -1876,6 +1071,11 @@ void virtio_dev_init(VirtIODevice *vdev, const char *name, exit(1); } + if (pthread_mutex_init(&vdev->isr_lock, NULL) != 0) { + printf("[ERROR] mutex init has failed\n"); + exit(1); + } + vdev->start_on_kick = false; vdev->started = false; vdev->device_id = device_id; @@ -1918,18 +1118,6 @@ static bool virtio_loopback_ioeventfd_enabled(VirtIODevice *d) return (adev->proxy.flags & VIRTIO_IOMMIO_FLAG_USE_IOEVENTFD) != 0; } -/* TODO: This function might not be needed anymore */ -static int virtio_loopback_ioeventfd_assign(VirtIOMMIOProxy *d, - EventNotifier *notifier, - int n, bool assign) -{ - (void)d; - (void)notifier; - (void)n; - (void)assign; - return 0; -} - bool virtio_bus_device_iommu_enabled(VirtIODevice *vdev) { VirtioBus *k = vdev->vbus; @@ -1946,7 +1134,6 @@ void virtio_loopback_bus_init(VirtioBus *k) DBG("virtio_loopback_bus_init(...)\n"); k->set_guest_notifiers = virtio_loopback_set_guest_notifiers; k->ioeventfd_enabled = virtio_loopback_ioeventfd_enabled; - k->ioeventfd_assign = virtio_loopback_ioeventfd_assign; } int virtio_loopback_start(struct adapter_dev *adev, pthread_t *thread_id) diff --git a/src/lib/virtio_vring.c b/src/lib/virtio_vring.c new file mode 100644 index 0000000..f8f965b --- /dev/null +++ b/src/lib/virtio_vring.c @@ -0,0 +1,792 @@ +/* + * + * Based on: + * + * 1) virtio.c of QEMU project + * + * Copyright IBM, Corp. 2007 + * + * Authors: + * Anthony Liguori <aliguori@us.ibm.com> + * + * + * 2) virtio-mmio.c of QEMU project + * + * Copyright (c) 2011 Linaro Limited + * + * Author: + * Peter Maydell <peter.maydell@linaro.org> + * + * + * Copyright 2022-2024 Virtual Open Systems SAS. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <unistd.h> +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include <sys/eventfd.h> +#include <fcntl.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <pthread.h> +#include <stdbool.h> +#include <sys/param.h> + +/* For socket */ +#include <errno.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/un.h> + +/* Project header files */ +#include "virtio_loopback.h" +#include "vhost_user_loopback.h" +#include "virtio_rng.h" + +#include <stddef.h> +#include <pthread.h> +#include <limits.h> + +#ifdef DEBUG +#define DBG(...) printf("virtio-loopback: " __VA_ARGS__) +#else +#define DBG(...) +#endif /* DEBUG */ + +void virtio_add_feature(uint64_t *features, unsigned int fbit) +{ + *features |= (1ULL << fbit); +} + +bool virtio_has_feature(uint64_t features, unsigned int fbit) +{ + return !!(features & (1ULL << fbit)); +} + +static int virtio_validate_features(VirtIODevice *vdev) +{ + if (virtio_has_feature(vdev->host_features, VIRTIO_F_IOMMU_PLATFORM) && + !virtio_has_feature(vdev->guest_features, VIRTIO_F_IOMMU_PLATFORM)) { + return -EFAULT; + } + + return 0; +} + +bool virtio_device_should_start(VirtIODevice *vdev, uint8_t status) +{ + if (!vdev->vm_running) { + return false; + } + + return virtio_device_started(vdev, status); +} + +bool virtio_device_started(VirtIODevice *vdev, uint8_t status) +{ + + DBG("virtio_device_started: %d\n", status & VIRTIO_CONFIG_S_DRIVER_OK); + DBG("status: %d\n", status); + + (void)vdev; + + return status & VIRTIO_CONFIG_S_DRIVER_OK; +} + +void virtio_set_started(VirtIODevice *vdev, bool started) +{ + if (started) { + vdev->start_on_kick = false; + } + + if (vdev->use_started) { + vdev->started = started; + } +} + +int virtio_set_status(VirtIODevice *vdev, uint8_t val) +{ + VirtioDeviceClass *k = vdev->vdev_class; + + DBG("virtio_set_status(...)\n"); + + if (virtio_has_feature(vdev->guest_features, VIRTIO_F_VERSION_1)) { + if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) && + val & VIRTIO_CONFIG_S_FEATURES_OK) { + int ret = virtio_validate_features(vdev); + + if (ret) { + return ret; + } + } + } + + if ((vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) != + (val & VIRTIO_CONFIG_S_DRIVER_OK)) { + virtio_set_started(vdev, val & VIRTIO_CONFIG_S_DRIVER_OK); + } + + DBG("set vdev->status:%u\n", vdev->status); + + if (k->set_status) { + k->set_status(vdev, val); + } + + vdev->status = val; + + return 0; +} + +uint64_t vring_align(uint64_t addr, unsigned long align) +{ + return QEMU_ALIGN_UP(addr, align); +} + +uint64_t virtio_queue_get_desc_size(VirtIODevice *vdev, int n) +{ + return sizeof(VRingDesc) * vdev->vq[n].vring.num; +} + +uint64_t virtio_queue_get_desc_addr(VirtIODevice *vdev, int n) +{ + return vdev->vq[n].vring.desc; +} + +uint64_t virtio_queue_get_avail_addr(VirtIODevice *vdev, int n) +{ + return vdev->vq[n].vring.avail; +} + +uint64_t virtio_queue_get_used_addr(VirtIODevice *vdev, int n) +{ + return vdev->vq[n].vring.used; +} + +int virtio_queue_get_num(VirtIODevice *vdev, int n) +{ + return vdev->vq[n].vring.num; +} + +uint64_t virtio_queue_get_avail_size(VirtIODevice *vdev, int n) +{ + int s; + + s = virtio_has_feature(vdev->guest_features, + VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; + return offsetof(VRingAvail, ring) + + sizeof(uint16_t) * vdev->vq[n].vring.num + s; +} + +uint64_t virtio_queue_get_used_size(VirtIODevice *vdev, int n) +{ + int s; + + s = virtio_has_feature(vdev->guest_features, + VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; + return offsetof(VRingUsed, ring) + + sizeof(VRingUsedElem) * vdev->vq[n].vring.num + s; +} + +/* virt queue functions */ +void virtio_queue_update_rings(VirtIODevice *vdev, int n) +{ + VRing *vring = &vdev->vq[n].vring; + + if (!vring->num || !vring->desc || !vring->align) { + /* not yet setup -> nothing to do */ + return; + } + vring->avail = vring->desc + vring->num * sizeof(VRingDesc); + vring->used = vring_align(vring->avail + + offsetof(VRingAvail, ring[vring->num]), + vring->align); +} + +static uint16_t virtio_queue_split_get_last_avail_idx(VirtIODevice *vdev, + int n) +{ + return vdev->vq[n].last_avail_idx; +} + +unsigned int virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n) +{ + return virtio_queue_split_get_last_avail_idx(vdev, n); +} + +void virtio_queue_set_num(VirtIODevice *vdev, int n, int num) +{ + /* + * Don't allow guest to flip queue between existent and + * nonexistent states, or to set it to an invalid size. + */ + if (!!num != !!vdev->vq[n].vring.num || + num > VIRTQUEUE_MAX_SIZE || + num < 0) { + return; + } + vdev->vq[n].vring.num = num; +} + +uint64_t virtio_queue_get_addr(VirtIODevice *vdev, int n) +{ + return vdev->vq[n].vring.desc; +} + +void virtio_queue_set_addr(VirtIODevice *vdev, int n, uint64_t addr) +{ + if (!vdev->vq[n].vring.num) { + return; + } + vdev->vq[n].vring.desc = addr; + virtio_queue_update_rings(vdev, n); +} + +int virtio_queue_ready(VirtQueue *vq) +{ + return vq->vring.avail != 0; +} + +uint16_t vring_avail_idx(VirtQueue *vq) +{ + vq->shadow_avail_idx = ((VRingAvail *)vq->vring.avail)->idx; + + return vq->shadow_avail_idx; +} + +uint16_t vring_avail_ring(VirtQueue *vq, int i) +{ + return ((VRingAvail *)vq->vring.avail)->ring[i]; +} + +int virtio_queue_split_empty(VirtQueue *vq) +{ + bool empty; + + if (!vq->vring.avail) { + return 1; + } + + if (vq->shadow_avail_idx != vq->last_avail_idx) { + return 0; + } + + empty = vring_avail_idx(vq) == vq->last_avail_idx; + return empty; +} + +int virtio_queue_empty(VirtQueue *vq) +{ + return virtio_queue_split_empty(vq); +} + +size_t iov_from_buf_full(const struct iovec *iov, unsigned int iov_cnt, + size_t offset, const void *buf, size_t bytes) +{ + size_t done; + unsigned int i; + for (i = 0, done = 0; (offset || done < bytes) && i < iov_cnt; i++) { + if (offset < iov[i].iov_len) { + size_t len = MIN(iov[i].iov_len - offset, bytes - done); + memcpy(iov[i].iov_base + offset, buf + done, len); + done += len; + offset = 0; + } else { + offset -= iov[i].iov_len; + } + } + return done; +} + +size_t qemu_iov_from_buf(const struct iovec *iov, unsigned int iov_cnt, + size_t offset, const void *buf, size_t bytes) +{ + if (__builtin_constant_p(bytes) && iov_cnt && + offset <= iov[0].iov_len && bytes <= iov[0].iov_len - offset) { + memcpy(iov[0].iov_base + offset, buf, bytes); + return bytes; + } else { + return iov_from_buf_full(iov, iov_cnt, offset, buf, bytes); + } +} + +/* Called within rcu_read_lock(). */ +static inline uint16_t vring_avail_flags(VirtQueue *vq) +{ + return ((VRingAvail *)vq->vring.avail)->flags; +} + +/* Called within rcu_read_lock(). */ +static inline uint16_t vring_get_used_event(VirtQueue *vq) +{ + return vring_avail_ring(vq, vq->vring.num); +} + +/* The following is used with USED_EVENT_IDX and AVAIL_EVENT_IDX */ +/* + * Assuming a given event_idx value from the other side, if + * we have just incremented index from old to new_idx, + * should we trigger an event? + * + * static inline int vring_need_event(uint16_t event_idx, + * uint16_t new_idx, uint16_t old) + * { + * /\* + * * Note: Xen has similar logic for notification hold-off + * * in include/xen/interface/io/ring.h with req_event and req_prod + * * corresponding to event_idx + 1 and new_idx respectively. + * * Note also that req_event and req_prod in Xen start at 1, + * * event indexes in virtio start at 0. + * *\/ + * return (uint16_t)(new_idx - event_idx - 1) < (uint16_t)(new_idx - old); + * } + */ + +/* Called within rcu_read_lock(). */ +bool virtio_split_should_notify(VirtIODevice *vdev, VirtQueue *vq) +{ + uint16_t old, new; + bool v; + + /* Always notify when queue is empty (when feature acknowledge) */ + if (virtio_has_feature(vdev->guest_features, VIRTIO_F_NOTIFY_ON_EMPTY) && + !vq->inuse && virtio_queue_empty(vq)) { + return true; + } + + if (!virtio_has_feature(vdev->guest_features, VIRTIO_RING_F_EVENT_IDX)) { + return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT); + } + + v = vq->signalled_used_valid; + vq->signalled_used_valid = true; + old = vq->signalled_used; + new = vq->signalled_used = vq->used_idx; + return !v || vring_need_event(vring_get_used_event(vq), new, old); +} + +static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem, int i) +{ + VRingUsed *used = (VRingUsed *)vq->vring.used; + + used->ring[i] = *uelem; +} + +void virtqueue_split_fill(VirtQueue *vq, const VirtQueueElement *elem, + unsigned int len, unsigned int idx) +{ + VRingUsedElem uelem; + + if (!vq->vring.used) { + return; + } + + idx = (idx + vq->used_idx) % vq->vring.num; + + uelem.id = elem->index; + uelem.len = len; + vring_used_write(vq, &uelem, idx); +} + +void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, + unsigned int len, unsigned int idx) +{ + virtqueue_split_fill(vq, elem, len, idx); +} + +static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val) +{ + ((VRingUsed *)vq->vring.used)->idx = val; + vq->used_idx = val; +} + +static void virtqueue_split_flush(VirtQueue *vq, unsigned int count) +{ + uint16_t old, new; + + if (!vq->vring.used) { + return; + } + + old = vq->used_idx; + new = old + count; + vring_used_idx_set(vq, new); + vq->inuse -= count; + if ((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)) { + vq->signalled_used_valid = false; + } +} + +void virtqueue_flush(VirtQueue *vq, unsigned int count) +{ + virtqueue_split_flush(vq, count); +} + +void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem, + unsigned int len) +{ + virtqueue_fill(vq, elem, len, 0); + virtqueue_flush(vq, 1); +} + +void vring_set_avail_event(VirtQueue *vq, uint16_t val) +{ + uint16_t *avail; + + avail = (uint16_t *)&((VRingUsed *)vq->vring.used)->ring[vq->vring.num]; + *avail = val; +} + +static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg, + uint64_t *addr, struct iovec *iov, + unsigned int max_num_sg, bool is_write, + uint64_t pa, size_t sz) +{ + (void)vdev; + (void)p_num_sg; + (void)addr; + (void)iov; + (void)max_num_sg; + (void)is_write; + (void)pa; + (void)sz; + + DBG("virtqueue_map_desc: Not implemented\n"); + + return true; +} + +static void *virtqueue_alloc_element(size_t sz, unsigned out_num, + unsigned in_num) +{ + VirtQueueElement *elem; + size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0])); + size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]); + size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]); + size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0])); + size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]); + size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]); + + /* + * TODO: Add check for requested size + * + * assert(sz >= sizeof(VirtQueueElement)); + */ + elem = malloc(out_sg_end); + elem->out_num = out_num; + elem->in_num = in_num; + elem->in_addr = (void *)elem + in_addr_ofs; + elem->out_addr = (void *)elem + out_addr_ofs; + elem->in_sg = (void *)elem + in_sg_ofs; + elem->out_sg = (void *)elem + out_sg_ofs; + return elem; +} + +void *virtqueue_split_pop(VirtQueue *vq, size_t sz) +{ + unsigned int i, head, max; + VirtIODevice *vdev = vq->vdev; + VirtQueueElement *elem = NULL; + unsigned out_num, in_num, elem_entries; + uint64_t addr[VIRTQUEUE_MAX_SIZE]; + struct iovec iov[VIRTQUEUE_MAX_SIZE]; + VRingDesc *desc; + int rc; + + if (virtio_queue_split_empty(vq)) { + goto done; + } + + /* When we start there are none of either input nor output. */ + out_num = in_num = elem_entries = 0; + + max = vq->vring.num; + + if (vq->inuse >= vq->vring.num) { + DBG("Virtqueue size exceeded\n"); + goto done; + } + + if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) { + goto done; + } + + if (virtio_has_feature(vdev->guest_features, VIRTIO_RING_F_EVENT_IDX)) { + vring_set_avail_event(vq, vq->last_avail_idx); + } + + i = head; + + desc = (VRingDesc *)vq->vring.desc + i; + + /* Collect all the descriptors */ + do { + bool map_ok; + + if (desc->flags & VRING_DESC_F_WRITE) { + map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num, + iov + out_num, + VIRTQUEUE_MAX_SIZE - out_num, true, + desc->addr, desc->len); + } else { + if (in_num) { + DBG("Incorrect order for descriptors\n"); + goto err_undo_map; + } + map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov, + VIRTQUEUE_MAX_SIZE, false, + desc->addr, desc->len); + } + if (!map_ok) { + goto err_undo_map; + } + + /* If we've got too many, that implies a descriptor loop. */ + if (++elem_entries > max) { + goto err_undo_map; + } + + rc = virtqueue_split_read_next_desc(vdev, desc, max, &i); + } while (rc == VIRTQUEUE_READ_DESC_MORE); + + if (rc == VIRTQUEUE_READ_DESC_ERROR) { + goto err_undo_map; + } + + /* Now copy what we have collected and mapped */ + elem = virtqueue_alloc_element(sz, out_num, in_num); + elem->index = head; + elem->ndescs = 1; + for (i = 0; i < out_num; i++) { + elem->out_addr[i] = addr[i]; + elem->out_sg[i] = iov[i]; + } + for (i = 0; i < in_num; i++) { + elem->in_addr[i] = addr[out_num + i]; + elem->in_sg[i] = iov[out_num + i]; + } + + vq->inuse++; + +done: + return elem; + +err_undo_map: + goto done; +} + +void *virtqueue_pop(VirtQueue *vq, size_t sz) +{ + return virtqueue_split_pop(vq, sz); +} + +bool virtqueue_get_head(VirtQueue *vq, unsigned int idx, + unsigned int *head) +{ + + /* + * Grab the next descriptor number they're advertising, and increment + * the index we've seen. + */ + *head = vring_avail_ring(vq, idx % vq->vring.num); + + /* If their number is silly, that's a fatal mistake. */ + if (*head >= vq->vring.num) { + DBG("Guest says index %u is available", *head); + return false; + } + + return true; +} + +uint32_t get_vqs_max_size(VirtIODevice *vdev) +{ + uint32_t total_size, temp_size, total_p2 = 1; + int log_res = 0; + + (void)vdev; + + total_size = VIRTQUEUE_MAX_SIZE * sizeof(VRingDesc); + total_size += offsetof(VRingAvail, ring) + + VIRTQUEUE_MAX_SIZE * sizeof(uint16_t); + total_size += offsetof(VRingUsed, ring) + + VIRTQUEUE_MAX_SIZE * sizeof(uint16_t); + + temp_size = total_size; + + /* Compute log2 of total_size (Needs to be power of 2) */ + while ((temp_size /= 2) > 0) { + log_res++; + total_p2 *= 2; + } + + /* if total_size is not a power of 2: (total_size > 8) -> 16 */ + if (total_size > total_p2) { + total_size = 2 * total_p2; + } + + /* + * Align to page size: This needed only in case total_size + * is less than 4096 (PAGE_SIZE) + */ + if (total_size % PAGE_SIZE > 0) { + total_size = (total_size / PAGE_SIZE) * PAGE_SIZE + PAGE_SIZE; + } + + DBG("Total vqs size to mmap is: %u\n", total_size); + + return total_size; +} + +int virtqueue_num_heads(VirtQueue *vq, unsigned int idx) +{ + uint16_t num_heads = vring_avail_idx(vq) - idx; + + /* Check it isn't doing very strange things with descriptor numbers. */ + if (num_heads > vq->vring.num) { + DBG("Guest moved used index from %u to %u", + idx, vq->shadow_avail_idx); + return -EINVAL; + } + + return num_heads; +} + +int virtqueue_split_read_next_desc(VirtIODevice *vdev, VRingDesc *desc, + unsigned int max, unsigned int *next) +{ + (void)vdev; + + /* If this descriptor says it doesn't chain, we're done. */ + if (!(desc->flags & VRING_DESC_F_NEXT)) { + return VIRTQUEUE_READ_DESC_DONE; + } + + /* Check they're not leading us off end of descriptors. */ + *next = desc->next; + + if (*next >= max) { + DBG("Desc next is %u", *next); + return VIRTQUEUE_READ_DESC_ERROR; + } + + desc = (VRingDesc *)desc + *next; + return VIRTQUEUE_READ_DESC_MORE; +} + +static void virtqueue_split_get_avail_bytes(VirtQueue *vq, + unsigned int *in_bytes, unsigned int *out_bytes, + unsigned max_in_bytes, unsigned max_out_bytes) +{ + VirtIODevice *vdev = vq->vdev; + unsigned int max, idx; + unsigned int total_bufs, in_total, out_total; + int rc; + + idx = vq->last_avail_idx; + total_bufs = in_total = out_total = 0; + + max = vq->vring.num; + + while ((rc = virtqueue_num_heads(vq, idx)) > 0) { + unsigned int num_bufs; + VRingDesc *desc; + unsigned int i; + + num_bufs = total_bufs; + + if (!virtqueue_get_head(vq, idx++, &i)) { + goto err; + } + + /* there is no need to copy anything form the cache struct */ + desc = (VRingDesc *)vq->vring.desc + i; + + if (desc->flags & VRING_DESC_F_INDIRECT) { + if (!desc->len || (desc->len % sizeof(VRingDesc))) { + DBG("Invalid size for indirect buffer table\n"); + goto err; + } + + /* If we've got too many, that implies a descriptor loop. */ + if (num_bufs >= max) { + goto err; + } + } + + do { + /* If we've got too many, that implies a descriptor loop. */ + if (++num_bufs > max) { + goto err; + } + + if (desc->flags & VRING_DESC_F_WRITE) { + in_total += desc->len; + } else { + out_total += desc->len; + } + if (in_total >= max_in_bytes && out_total >= max_out_bytes) { + goto done; + } + + rc = virtqueue_split_read_next_desc(vdev, desc, max, &i); + } while (rc == VIRTQUEUE_READ_DESC_MORE); + + if (rc == VIRTQUEUE_READ_DESC_ERROR) { + goto err; + } + + total_bufs = num_bufs; + } + + if (rc < 0) { + goto err; + } + +done: + if (in_bytes) { + *in_bytes = in_total; + } + if (out_bytes) { + *out_bytes = out_total; + } + return; + +err: + in_total = out_total = 0; + goto done; +} + +void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, + unsigned int *out_bytes, + unsigned max_in_bytes, unsigned max_out_bytes) +{ + if (!vq->vring.desc) { + goto err; + } + + virtqueue_split_get_avail_bytes(vq, in_bytes, out_bytes, + max_in_bytes, max_out_bytes); + + return; +err: + if (in_bytes) { + *in_bytes = 0; + } + if (out_bytes) { + *out_bytes = 0; + } +} |