diff options
author | 2024-10-14 15:44:29 +0300 | |
---|---|---|
committer | 2024-12-03 11:56:03 +0200 | |
commit | 061ee070b409025af7e775c5ba9199673efed0bf (patch) | |
tree | d2aaa729ff690d03c2e932fa199a9ecbd4d65d83 | |
parent | 8948c9808eded80772de98cd4e8dd0cc71fdbe17 (diff) |
Update virtio-loopback driver - notification mechanism
Updates:
- README file
- Notification mechanism:
- Notifications triggered by virtio devices by-pass
adapter application and are delivered directly to
the vhost-user devices
- Fix coding style
Change-Id: I4b03f1c19dd527ad08bee80b5d515dcbd3b1c485
Signed-off-by: Timos Ampelikiotis <t.ampelikiotis@virtualopensystems.com>
-rw-r--r-- | README.md | 4 | ||||
-rw-r--r-- | virtio_loopback_device.c | 313 | ||||
-rw-r--r-- | virtio_loopback_driver.c | 465 | ||||
-rw-r--r-- | virtio_loopback_driver.h | 44 |
4 files changed, 540 insertions, 286 deletions
@@ -1,8 +1,8 @@ # virtio-loopback transport repository -This repository includes the latest version of the "virtio_loopback_transport" driver which is part of the Virtio Loopback Design presented in this [document](https://git.virtualopensystems.com/virtio-loopback/docs/-/blob/master/design_docs). This work carried on by Virtual Open Systems in the [Automotive Grade Linux](https://www.automotivegradelinux.org) community. +This repository includes the "virtio-loopback" driver which is part of the Virtio-loopback design. If you want to learn more about how to set up and test the whole virtio-loopback architecture, refer to the [virtio-loopback testing guide](https://gerrit.automotivelinux.org/gerrit/gitweb?p=src/virtio/virtio-loopback-adapter.git;a=blob;f=Documentation/testing_virtio_loopback_design.md;hb=HEAD). -As described in the design document, the transport is only a part of a more complex architecture. If you want to see the implementation and build the other components, refer to the [virtio-loopback docs repository](https://git.virtualopensystems.com/virtio-loopback/docs/-/tree/epsilon-release). +This work carried on by Virtual Open Systems in the [Automotive Grade Linux](https://www.automotivegradelinux.org) community. ## Build the virtio-loopback transport diff --git a/virtio_loopback_device.c b/virtio_loopback_device.c index e0b19a6..0c1c326 100644 --- a/virtio_loopback_device.c +++ b/virtio_loopback_device.c @@ -1,10 +1,27 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* + * Virtio loopback transport driver + * * Based on virtio_mmio.c * Copyright 2011-2014, ARM Ltd. * * Copyright 2022-2024 Virtual Open Systems SAS * + * Authors: + * Timos Ampelikiotis <t.ampelikiotis@virtualopensystems.com> + * Anna Panagopoulou <anna@virtualopensystems.com> + * Alvise Rigo <a.rigo@virtualopensystems.com> + * + * This module allows virtio devices to be used in a non-virtualized + * environment, coupled with vhost-user device (user-space drivers). + * + * It is set as a transport driver by the virtio-loopback device + * driver for a group of virtio drivers and reroutes all read/write + * operations to the userspace. In user-space, virtio-loopback adapter + * (the user-space component of the design) handles the read/write ops + * translates them into the corresponding vhost-user messages and + * forwards them to the corresponding vhost-user device. + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -14,10 +31,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #define pr_fmt(fmt) "virtio-loopback-transport: " fmt @@ -33,104 +46,105 @@ static void print_neg_flag(uint64_t neg_flag, bool read) pr_debug("Write:\n"); switch (neg_flag) { - case VIRTIO_MMIO_MAGIC_VALUE: //0x000 + case VIRTIO_MMIO_MAGIC_VALUE: pr_debug("\tVIRTIO_MMIO_MAGIC_VALUE\n"); break; - case VIRTIO_MMIO_VERSION: //0x004 + case VIRTIO_MMIO_VERSION: pr_debug("\tVIRTIO_MMIO_VERSION\n"); break; - case VIRTIO_MMIO_DEVICE_ID: //0x008 + case VIRTIO_MMIO_DEVICE_ID: pr_debug("\tVIRTIO_MMIO_DEVICE_ID\n"); break; - case VIRTIO_MMIO_VENDOR_ID: //0x00c + case VIRTIO_MMIO_VENDOR_ID: pr_debug("\tVIRTIO_MMIO_VENDOR_ID\n"); break; - case VIRTIO_MMIO_DEVICE_FEATURES: //0x010 + case VIRTIO_MMIO_DEVICE_FEATURES: pr_debug("\tVIRTIO_MMIO_DEVICE_FEATURES\n"); break; - case VIRTIO_MMIO_DEVICE_FEATURES_SEL: //0x014 + case VIRTIO_MMIO_DEVICE_FEATURES_SEL: pr_debug("\tVIRTIO_MMIO_DEVICE_FEATURES_SEL\n"); break; - case VIRTIO_MMIO_DRIVER_FEATURES: //0x020 + case VIRTIO_MMIO_DRIVER_FEATURES: pr_debug("\tVIRTIO_MMIO_DRIVER_FEATURES\n"); break; - case VIRTIO_MMIO_DRIVER_FEATURES_SEL: //0x024 + case VIRTIO_MMIO_DRIVER_FEATURES_SEL: pr_debug("\tVIRTIO_MMIO_DRIVER_FEATURES_SEL\n"); break; - case VIRTIO_MMIO_GUEST_PAGE_SIZE: //0x028 + case VIRTIO_MMIO_GUEST_PAGE_SIZE: pr_debug("\tVIRTIO_MMIO_GUEST_PAGE_SIZE\n"); break; - case VIRTIO_MMIO_QUEUE_SEL: //0x030 + case VIRTIO_MMIO_QUEUE_SEL: pr_debug("\tVIRTIO_MMIO_QUEUE_SEL\n"); break; - case VIRTIO_MMIO_QUEUE_NUM_MAX: //0x034 + case VIRTIO_MMIO_QUEUE_NUM_MAX: pr_debug("\tVIRTIO_MMIO_QUEUE_NUM_MAX\n"); break; - case VIRTIO_MMIO_QUEUE_NUM: //0x038 + case VIRTIO_MMIO_QUEUE_NUM: pr_debug("\tVIRTIO_MMIO_QUEUE_NUM\n"); break; - case VIRTIO_MMIO_QUEUE_ALIGN: //0x03c + case VIRTIO_MMIO_QUEUE_ALIGN: pr_debug("\tVIRTIO_MMIO_QUEUE_ALIGN\n"); break; - case VIRTIO_MMIO_QUEUE_PFN: //0x040 + case VIRTIO_MMIO_QUEUE_PFN: pr_debug("\tVIRTIO_MMIO_QUEUE_PFN\n"); break; - case VIRTIO_MMIO_QUEUE_READY: //0x044 + case VIRTIO_MMIO_QUEUE_READY: pr_debug("\tVIRTIO_MMIO_QUEUE_READY\n"); break; - case VIRTIO_MMIO_QUEUE_NOTIFY: //0x050 + case VIRTIO_MMIO_QUEUE_NOTIFY: pr_debug("\tVIRTIO_MMIO_QUEUE_NOTIFY\n"); break; - case VIRTIO_MMIO_INTERRUPT_STATUS: //0x060 + case VIRTIO_MMIO_INTERRUPT_STATUS: pr_debug("\tVIRTIO_MMIO_INTERRUPT_STATUS\n"); break; - case VIRTIO_MMIO_INTERRUPT_ACK: //0x064 + case VIRTIO_MMIO_INTERRUPT_ACK: pr_debug("\tVIRTIO_MMIO_INTERRUPT_ACK\n"); break; - case VIRTIO_MMIO_STATUS: //0x070 + case VIRTIO_MMIO_STATUS: pr_debug("\tVIRTIO_MMIO_STATUS\n"); break; - case VIRTIO_MMIO_QUEUE_DESC_LOW: //0x080 + case VIRTIO_MMIO_QUEUE_DESC_LOW: pr_debug("\tVIRTIO_MMIO_QUEUE_DESC_LOW\n"); break; - case VIRTIO_MMIO_QUEUE_DESC_HIGH: //0x084 + case VIRTIO_MMIO_QUEUE_DESC_HIGH: pr_debug("\tVIRTIO_MMIO_QUEUE_DESC_HIGH\n"); break; - case VIRTIO_MMIO_QUEUE_AVAIL_LOW: //0x090 + case VIRTIO_MMIO_QUEUE_AVAIL_LOW: pr_debug("\tVIRTIO_MMIO_QUEUE_AVAIL_LOW\n"); break; - case VIRTIO_MMIO_QUEUE_AVAIL_HIGH: //0x094 + case VIRTIO_MMIO_QUEUE_AVAIL_HIGH: pr_debug("\tVIRTIO_MMIO_QUEUE_AVAIL_HIGH\n"); break; - case VIRTIO_MMIO_QUEUE_USED_LOW: //0x0a0 + case VIRTIO_MMIO_QUEUE_USED_LOW: pr_debug("\tVIRTIO_MMIO_QUEUE_USED_LOW\n"); break; - case VIRTIO_MMIO_QUEUE_USED_HIGH: //0x0a4 + case VIRTIO_MMIO_QUEUE_USED_HIGH: pr_debug("\tVIRTIO_MMIO_QUEUE_USED_HIGH\n"); break; - case VIRTIO_MMIO_SHM_SEL: //0x0ac + case VIRTIO_MMIO_SHM_SEL: pr_debug("\tVIRTIO_MMIO_SHM_SEL\n"); break; - case VIRTIO_MMIO_SHM_LEN_LOW: //0x0b0 + case VIRTIO_MMIO_SHM_LEN_LOW: pr_debug("\tVIRTIO_MMIO_SHM_LEN_LOW\n"); break; - case VIRTIO_MMIO_SHM_LEN_HIGH: //0x0b4 + case VIRTIO_MMIO_SHM_LEN_HIGH: pr_debug("\tVIRTIO_MMIO_SHM_LEN_HIGH\n"); break; - case VIRTIO_MMIO_SHM_BASE_LOW: //0x0b8 + case VIRTIO_MMIO_SHM_BASE_LOW: pr_debug("\tVIRTIO_MMIO_SHM_BASE_LOW\n"); break; - case VIRTIO_MMIO_SHM_BASE_HIGH: //0x0bc + case VIRTIO_MMIO_SHM_BASE_HIGH: pr_debug("\tVIRTIO_MMIO_SHM_BASE_HIGH\n"); break; - case VIRTIO_MMIO_CONFIG_GENERATION: //0x0fc + case VIRTIO_MMIO_CONFIG_GENERATION: pr_debug("\tVIRTIO_MMIO_CONFIG_GENERATION\n"); break; default: if (neg_flag >= VIRTIO_MMIO_CONFIG) pr_debug("\tVIRTIO_MMIO_CONFIG\n"); else - pr_debug("\tNegotiation flag Unknown: %lld\n", neg_flag); + pr_debug("\tNegotiation flag Unknown: %lld\n", + neg_flag); return; } } @@ -171,13 +185,15 @@ static void print_neg_flag(uint64_t neg_flag, bool read) */ /* function declaration */ -static uint64_t read_adapter(uint64_t fn_id, uint64_t size, struct device_data *dev_data); -static void write_adapter(uint64_t data, uint64_t fn_id, uint64_t size, struct device_data *dev_data); +static uint64_t read_adapter(uint64_t fn_id, uint64_t size, + struct device_data *dev_data); +static void write_adapter(uint64_t data, uint64_t fn_id, uint64_t size, + struct device_data *dev_data); /* Configuration interface */ static u64 vl_get_features(struct virtio_device *vdev) { - struct virtio_loopback_device *vl_dev = to_virtio_loopback_device(vdev, vdev); + struct virtio_loopback_device *vl_dev = to_virtio_loopback_device(vdev); struct device_data *data = vl_dev->data; u64 features; @@ -195,7 +211,7 @@ static u64 vl_get_features(struct virtio_device *vdev) static int vl_finalize_features(struct virtio_device *vdev) { - struct virtio_loopback_device *vl_dev = to_virtio_loopback_device(vdev, vdev); + struct virtio_loopback_device *vl_dev = to_virtio_loopback_device(vdev); struct device_data *data = vl_dev->data; /* Give virtio_ring a chance to accept features. */ @@ -203,17 +219,19 @@ static int vl_finalize_features(struct virtio_device *vdev) /* Make sure there are no mixed devices */ if (vl_dev->version == 2 && - !__virtio_test_bit(vdev, VIRTIO_F_VERSION_1)) { - dev_err(&vdev->dev, "New virtio-loopback devices (version 2) " - "must provide VIRTIO_F_VERSION_1 feature!\n"); + !__virtio_test_bit(vdev, VIRTIO_F_VERSION_1)) { + dev_err(&vdev->dev, + "New virtio-mmio devices (version 2) must provide VIRTIO_F_VERSION_1 feature!\n"); return -EINVAL; } write_adapter(1, VIRTIO_MMIO_DRIVER_FEATURES_SEL, 4, data); - write_adapter((u32)(vdev->features >> 32), VIRTIO_MMIO_DRIVER_FEATURES, 4, data); + write_adapter((u32)(vdev->features >> 32), VIRTIO_MMIO_DRIVER_FEATURES, + 4, data); write_adapter(0, VIRTIO_MMIO_DRIVER_FEATURES_SEL, 4, data); - write_adapter((u32)vdev->features, VIRTIO_MMIO_DRIVER_FEATURES, 4, data); + write_adapter((u32)vdev->features, VIRTIO_MMIO_DRIVER_FEATURES, + 4, data); return 0; } @@ -221,7 +239,7 @@ static int vl_finalize_features(struct virtio_device *vdev) static void vl_get(struct virtio_device *vdev, unsigned int offset, void *buf, unsigned int len) { - struct virtio_loopback_device *vl_dev = to_virtio_loopback_device(vdev, vdev); + struct virtio_loopback_device *vl_dev = to_virtio_loopback_device(vdev); struct device_data *data = vl_dev->data; u8 b; @@ -233,7 +251,8 @@ static void vl_get(struct virtio_device *vdev, unsigned int offset, int i; for (i = 0; i < len; i++) - ptr[i] = read_adapter(VIRTIO_MMIO_CONFIG + offset + i, 1, data); + ptr[i] = read_adapter(VIRTIO_MMIO_CONFIG + offset + i, + 1, data); return; } @@ -243,17 +262,22 @@ static void vl_get(struct virtio_device *vdev, unsigned int offset, memcpy(buf, &b, sizeof(b)); break; case 2: - w = cpu_to_le16(read_adapter(VIRTIO_MMIO_CONFIG + offset, 2, data)); + w = cpu_to_le16(read_adapter(VIRTIO_MMIO_CONFIG + offset, + 2, data)); memcpy(buf, &w, sizeof(w)); break; case 4: - l = cpu_to_le32(read_adapter(VIRTIO_MMIO_CONFIG + offset, 4, data)); + l = cpu_to_le32(read_adapter(VIRTIO_MMIO_CONFIG + offset, + 4, data)); memcpy(buf, &l, sizeof(l)); break; case 8: - l = cpu_to_le32(read_adapter(VIRTIO_MMIO_CONFIG + offset, 4, data)); + l = cpu_to_le32(read_adapter(VIRTIO_MMIO_CONFIG + offset, + 4, data)); memcpy(buf, &l, sizeof(l)); - l = cpu_to_le32(read_adapter(VIRTIO_MMIO_CONFIG + offset + sizeof(l), 4, data)); + l = cpu_to_le32(read_adapter( + VIRTIO_MMIO_CONFIG + offset + sizeof(l), + 4, data)); memcpy(buf + sizeof(l), &l, sizeof(l)); break; default: @@ -264,7 +288,7 @@ static void vl_get(struct virtio_device *vdev, unsigned int offset, static void vl_set(struct virtio_device *vdev, unsigned int offset, const void *buf, unsigned int len) { - struct virtio_loopback_device *vl_dev = to_virtio_loopback_device(vdev, vdev); + struct virtio_loopback_device *vl_dev = to_virtio_loopback_device(vdev); struct device_data *data = vl_dev->data; u8 b; @@ -276,7 +300,8 @@ static void vl_set(struct virtio_device *vdev, unsigned int offset, int i; for (i = 0; i < len; i++) - write_adapter(ptr[i], VIRTIO_MMIO_CONFIG + offset + i, 1, data); + write_adapter(ptr[i], VIRTIO_MMIO_CONFIG + offset + i, + 1, data); return; } @@ -288,17 +313,22 @@ static void vl_set(struct virtio_device *vdev, unsigned int offset, break; case 2: memcpy(&w, buf, sizeof(w)); - write_adapter(le16_to_cpu(w), VIRTIO_MMIO_CONFIG + offset, 2, data); + write_adapter(le16_to_cpu(w), VIRTIO_MMIO_CONFIG + offset, + 2, data); break; case 4: memcpy(&l, buf, sizeof(l)); - write_adapter(le32_to_cpu(l), VIRTIO_MMIO_CONFIG + offset, 4, data); + write_adapter(le32_to_cpu(l), VIRTIO_MMIO_CONFIG + offset, + 4, data); break; case 8: memcpy(&l, buf, sizeof(l)); - write_adapter(le32_to_cpu(l), VIRTIO_MMIO_CONFIG + offset, 4, data); + write_adapter(le32_to_cpu(l), VIRTIO_MMIO_CONFIG + offset, + 4, data); memcpy(&l, buf + sizeof(l), sizeof(l)); - write_adapter(le32_to_cpu(l), VIRTIO_MMIO_CONFIG + offset + sizeof(l), 4, data); + write_adapter(le32_to_cpu(l), + VIRTIO_MMIO_CONFIG + offset + sizeof(l), + 4, data); break; default: BUG(); @@ -307,7 +337,7 @@ static void vl_set(struct virtio_device *vdev, unsigned int offset, static u32 vl_generation(struct virtio_device *vdev) { - struct virtio_loopback_device *vl_dev = to_virtio_loopback_device(vdev, vdev); + struct virtio_loopback_device *vl_dev = to_virtio_loopback_device(vdev); struct device_data *data = vl_dev->data; if (vl_dev->version == 1) @@ -318,7 +348,7 @@ static u32 vl_generation(struct virtio_device *vdev) static u8 vl_get_status(struct virtio_device *vdev) { - struct virtio_loopback_device *vl_dev = to_virtio_loopback_device(vdev, vdev); + struct virtio_loopback_device *vl_dev = to_virtio_loopback_device(vdev); struct device_data *data = vl_dev->data; return read_adapter(VIRTIO_MMIO_STATUS, 4, data) & 0xff; @@ -326,7 +356,7 @@ static u8 vl_get_status(struct virtio_device *vdev) static void vl_set_status(struct virtio_device *vdev, u8 status) { - struct virtio_loopback_device *vl_dev = to_virtio_loopback_device(vdev, vdev); + struct virtio_loopback_device *vl_dev = to_virtio_loopback_device(vdev); struct device_data *data = vl_dev->data; write_adapter(status, VIRTIO_MMIO_STATUS, 4, data); @@ -334,7 +364,7 @@ static void vl_set_status(struct virtio_device *vdev, u8 status) static void vl_reset(struct virtio_device *vdev) { - struct virtio_loopback_device *vl_dev = to_virtio_loopback_device(vdev, vdev); + struct virtio_loopback_device *vl_dev = to_virtio_loopback_device(vdev); struct device_data *data = vl_dev->data; /* 0 status means a reset. */ @@ -344,7 +374,8 @@ static void vl_reset(struct virtio_device *vdev) /* Notify work handling function */ static void notify_work_handler(struct work_struct *work) { - struct virtio_loopback_device *vl_dev = to_virtio_loopback_device(work, notify_work); + struct virtio_loopback_device *vl_dev = + container_of(work, struct virtio_loopback_device, notify_work); struct device_data *dev_data = vl_dev->data; struct notify_data *entry, *tmp; uint32_t index; @@ -365,30 +396,46 @@ static void notify_work_handler(struct work_struct *work) /* The notify function used when creating a virtqueue */ static bool vl_notify(struct virtqueue *vq) { - struct virtio_loopback_device *vl_dev = to_virtio_loopback_device(vq->vdev, vdev); + struct virtio_loopback_device *vl_dev = + to_virtio_loopback_device(vq->vdev); + struct eventfd_ctx **vq_notifiers = vl_dev->data->vq_data.vq_notifiers; + bool vq_notifiers_enabled = vl_dev->data->vq_data.vq_notifiers_enabled; struct notify_data *data; int ret = 1; - /* Create the new node */ - data = kmalloc(sizeof(struct notify_data), GFP_ATOMIC); - if (!data) - return false; - - data->index = vq->index; - INIT_LIST_HEAD(&data->list); + if (vq_notifiers_enabled && (vq_notifiers[vq->index])) { + /* Notify directly vhost-user-device bypassing the adapter */ +#if LINUX_VERSION_CODE > KERNEL_VERSION(6, 7, 12) + eventfd_signal(vq_notifiers[vq->index]); +#else + eventfd_signal(vq_notifiers[vq->index], 1); +#endif + } else { + /* Create the new node */ + data = kmalloc(sizeof(struct notify_data), GFP_ATOMIC); + if (!data) + return false; - /* Add in the notify_list, which should be protected! */ - spin_lock(&vl_dev->notify_q_lock); - list_add_tail(&data->list, &vl_dev->notify_list); - spin_unlock(&vl_dev->notify_q_lock); + data->index = vq->index; + INIT_LIST_HEAD(&data->list); - /* Schedule the element */ - while (ret) { - /* Force scheduling if queue_work fails and list is not empty */ - ret = !queue_work(loopback_data.notify_workqueue, &vl_dev->notify_work); + /* Add in the notify_list, which should be protected! */ spin_lock(&vl_dev->notify_q_lock); - ret &= !list_empty(&vl_dev->notify_list); + list_add_tail(&data->list, &vl_dev->notify_list); spin_unlock(&vl_dev->notify_q_lock); + + /* Schedule the element */ + while (ret) { + /* + * Force scheduling if queue_work fails and + * list is not empty + */ + ret = !queue_work(vl_dev->notify_workqueue, + &vl_dev->notify_work); + spin_lock(&vl_dev->notify_q_lock); + ret &= !list_empty(&vl_dev->notify_list); + spin_unlock(&vl_dev->notify_q_lock); + } } return true; @@ -401,9 +448,12 @@ bool vl_interrupt(struct virtio_loopback_device *vl_dev, int irq) struct virtio_loopback_vq_info *info; unsigned long status; - pr_debug("Received interrupt!\n"); - /* STATUS and ACK should be done without any intermediate status change */ - /* Read and acknowledge interrupts */ + /* + * Read and acknowledge interrupts + * + * Those two operations should be executed without any + * intermediate status change. + */ status = read_adapter(VIRTIO_MMIO_INTERRUPT_STATUS, 4, data); write_adapter(status, VIRTIO_MMIO_INTERRUPT_ACK, 4, data); @@ -423,7 +473,8 @@ bool vl_interrupt(struct virtio_loopback_device *vl_dev, int irq) static void vl_del_vq(struct virtqueue *vq) { - struct virtio_loopback_device *vl_dev = to_virtio_loopback_device(vq->vdev, vdev); + struct virtio_loopback_device *vl_dev = + to_virtio_loopback_device(vq->vdev); struct device_data *data = vl_dev->data; struct virtio_loopback_vq_info *info = vq->priv; @@ -456,11 +507,12 @@ static void vl_del_vqs(struct virtio_device *vdev) vl_del_vq(vq); } -static struct virtqueue *vl_setup_vq(struct virtio_device *vdev, unsigned int index, - void (*callback)(struct virtqueue *vq), - const char *name, bool ctx) +static struct virtqueue *vl_setup_vq(struct virtio_device *vdev, + unsigned int index, + void (*callback)(struct virtqueue *vq), + const char *name, bool ctx) { - struct virtio_loopback_device *vl_dev = to_virtio_loopback_device(vdev, vdev); + struct virtio_loopback_device *vl_dev = to_virtio_loopback_device(vdev); struct device_data *data = vl_dev->data; struct virtio_loopback_vq_info *info; struct virtqueue *vq; @@ -476,7 +528,8 @@ static struct virtqueue *vl_setup_vq(struct virtio_device *vdev, unsigned int in /* Queue shouldn't already be set up. */ if (read_adapter((vl_dev->version == 1 ? - VIRTIO_MMIO_QUEUE_PFN : VIRTIO_MMIO_QUEUE_READY), 4, data)) { + VIRTIO_MMIO_QUEUE_PFN : VIRTIO_MMIO_QUEUE_READY), + 4, data)) { err = -ENOENT; goto error_available; } @@ -496,7 +549,7 @@ static struct virtqueue *vl_setup_vq(struct virtio_device *vdev, unsigned int in /* Create the vring */ vq = vring_create_virtqueue(index, num, VIRTIO_MMIO_VRING_ALIGN, vdev, - true, true, ctx, vl_notify, callback, name); + true, true, ctx, vl_notify, callback, name); if (!vq) { err = -ENOMEM; goto error_new_virtqueue; @@ -507,7 +560,8 @@ static struct virtqueue *vl_setup_vq(struct virtio_device *vdev, unsigned int in #endif /* Activate the queue */ - write_adapter(virtqueue_get_vring_size(vq), VIRTIO_MMIO_QUEUE_NUM, 4, data); + write_adapter(virtqueue_get_vring_size(vq), VIRTIO_MMIO_QUEUE_NUM, 4, + data); if (vl_dev->version == 1) { u64 q_pfn = virtqueue_get_desc_addr(vq); @@ -518,14 +572,13 @@ static struct virtqueue *vl_setup_vq(struct virtio_device *vdev, unsigned int in data->vq_data.vq_pfns[data->vq_data.vq_index++] = q_pfn; /* - * virtio-loopback v1 uses a 32bit QUEUE PFN. If we have something - * that doesn't fit in 32bit, fail the setup rather than - * pretending to be successful. + * virtio-loopback v1 uses a 32bit QUEUE PFN. If we have + * something that doesn't fit in 32bit, fail the setup rather + * than pretending to be successful. */ if (q_pfn >> 32) { dev_err(&vdev->dev, - "platform bug: legacy virtio-loopback must not " - "be used with RAM above 0x%llxGB\n", + "platform bug: legacy virtio-loopback must not be used with RAM above 0x%llxGB\n", 0x1ULL << (32 + PAGE_SHIFT - 30)); err = -E2BIG; goto error_bad_pfn; @@ -538,15 +591,18 @@ static struct virtqueue *vl_setup_vq(struct virtio_device *vdev, unsigned int in addr = virtqueue_get_desc_addr(vq); write_adapter((u32)addr, VIRTIO_MMIO_QUEUE_DESC_LOW, 4, data); - write_adapter((u32)(addr >> 32), VIRTIO_MMIO_QUEUE_DESC_HIGH, 4, data); + write_adapter((u32)(addr >> 32), VIRTIO_MMIO_QUEUE_DESC_HIGH, + 4, data); addr = virtqueue_get_avail_addr(vq); write_adapter((u32)addr, VIRTIO_MMIO_QUEUE_AVAIL_LOW, 4, data); - write_adapter((u32)(addr >> 32), VIRTIO_MMIO_QUEUE_AVAIL_HIGH, 4, data); + write_adapter((u32)(addr >> 32), VIRTIO_MMIO_QUEUE_AVAIL_HIGH, + 4, data); addr = virtqueue_get_used_addr(vq); write_adapter((u32)addr, VIRTIO_MMIO_QUEUE_USED_LOW, 4, data); - write_adapter((u32)(addr >> 32), VIRTIO_MMIO_QUEUE_USED_HIGH, 4, data); + write_adapter((u32)(addr >> 32), VIRTIO_MMIO_QUEUE_USED_HIGH, + 4, data); write_adapter(1, VIRTIO_MMIO_QUEUE_READY, 4, data); } @@ -618,7 +674,7 @@ static int vl_find_vqs(struct virtio_device *vdev, unsigned int nvqs, } vqs[i] = vl_setup_vq(vdev, queue_idx++, vqi->callback, - vqi->name, vqi->ctx); + vqi->name, vqi->ctx); if (IS_ERR(vqs[i])) { vl_del_vqs(vdev); return PTR_ERR(vqs[i]); @@ -631,15 +687,15 @@ static int vl_find_vqs(struct virtio_device *vdev, unsigned int nvqs, static const char *vl_bus_name(struct virtio_device *vdev) { - struct virtio_loopback_device *vl_dev = to_virtio_loopback_device(vdev, vdev); + struct virtio_loopback_device *vl_dev = to_virtio_loopback_device(vdev); return vl_dev->pdev->name; } static bool vl_get_shm_region(struct virtio_device *vdev, - struct virtio_shm_region *region, u8 id) + struct virtio_shm_region *region, u8 id) { - struct virtio_loopback_device *vl_dev = to_virtio_loopback_device(vdev, vdev); + struct virtio_loopback_device *vl_dev = to_virtio_loopback_device(vdev); struct device_data *data = vl_dev->data; u64 len, addr; @@ -668,24 +724,25 @@ static bool vl_get_shm_region(struct virtio_device *vdev, } static const struct virtio_config_ops virtio_loopback_config_ops = { - .get = vl_get, - .set = vl_set, - .generation = vl_generation, - .get_status = vl_get_status, - .set_status = vl_set_status, - .reset = vl_reset, - .find_vqs = vl_find_vqs, - .del_vqs = vl_del_vqs, + .get = vl_get, + .set = vl_set, + .generation = vl_generation, + .get_status = vl_get_status, + .set_status = vl_set_status, + .reset = vl_reset, + .find_vqs = vl_find_vqs, + .del_vqs = vl_del_vqs, .get_features = vl_get_features, .finalize_features = vl_finalize_features, - .bus_name = vl_bus_name, + .bus_name = vl_bus_name, .get_shm_region = vl_get_shm_region, }; static void virtio_loopback_release_dev(struct device *_d) { - struct virtio_device *vdev = container_of(_d, struct virtio_device, dev); - struct virtio_loopback_device *vl_dev = to_virtio_loopback_device(vdev, vdev); + struct virtio_device *vdev = + container_of(_d, struct virtio_device, dev); + struct virtio_loopback_device *vl_dev = to_virtio_loopback_device(vdev); struct platform_device *pdev = vl_dev->pdev; devm_kfree(&pdev->dev, vl_dev); @@ -745,8 +802,8 @@ int loopback_register_virtio_dev(struct virtio_loopback_device *vl_dev) if (rc) rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); if (rc) - dev_warn(&pdev->dev, "Failed to enable 64-bit or 32-bit DMA." - "Trying to continue, but this might not work.\n"); + dev_warn(&pdev->dev, + "Failed to enable 64-bit or 32-bit DMA. Trying to continue, but this might not work.\n"); /* Register the virtio device in the system */ rc = register_virtio_device(&vl_dev->vdev); @@ -775,6 +832,8 @@ static int virtio_loopback_probe(struct platform_device *pdev) INIT_LIST_HEAD(&vl_dev->virtqueues); spin_lock_init(&vl_dev->lock); /* Initialize the workqueue */ + vl_dev->notify_workqueue = + create_singlethread_workqueue("notify_workqueue"); INIT_WORK(&vl_dev->notify_work, notify_work_handler); INIT_LIST_HEAD(&vl_dev->notify_list); spin_lock_init(&vl_dev->notify_q_lock); @@ -796,6 +855,10 @@ int virtio_loopback_remove(struct platform_device *pdev) { struct virtio_loopback_device *vl_dev = platform_get_drvdata(pdev); + /* Destroy the notify workqueue */ + flush_workqueue(vl_dev->notify_workqueue); + destroy_workqueue(vl_dev->notify_workqueue); + if (vl_dev->data) { unregister_virtio_device(&vl_dev->vdev); pr_info("unregister_virtio_device!\n"); @@ -817,7 +880,8 @@ struct platform_driver virtio_loopback_driver = { }, }; -static uint64_t read_adapter(uint64_t fn_id, uint64_t size, struct device_data *dev_data) +static uint64_t read_adapter(uint64_t fn_id, uint64_t size, + struct device_data *dev_data) { uint64_t result; @@ -852,8 +916,11 @@ static uint64_t read_adapter(uint64_t fn_id, uint64_t size, struct device_data * * "wake_up" kick, check the updated "done" value and return. */ - while (dev_data->valid_eventfd && atomic_read(&((struct virtio_neg *)(dev_data->info->data))->done) != 1) - wait_event_timeout(dev_data->wq, atomic_read(&((struct virtio_neg *)(dev_data->info->data))->done) == 1, 1 * HZ); + while (dev_data->valid_eventfd && + atomic_read(&((struct virtio_neg *)(dev_data->info->data))->done) != 1) + wait_event_timeout(dev_data->wq, + atomic_read(&((struct virtio_neg *)(dev_data->info->data))->done) == 1, + 1 * HZ); result = ((struct virtio_neg *)(dev_data->info->data))->data; @@ -862,7 +929,8 @@ static uint64_t read_adapter(uint64_t fn_id, uint64_t size, struct device_data * return result; } -static void write_adapter(uint64_t data, uint64_t fn_id, uint64_t size, struct device_data *dev_data) +static void write_adapter(uint64_t data, uint64_t fn_id, uint64_t size, + struct device_data *dev_data) { mutex_lock(&(dev_data)->read_write_lock); @@ -895,8 +963,11 @@ static void write_adapter(uint64_t data, uint64_t fn_id, uint64_t size, struct d * way, virtio-loopback driver will wake up even if has missed the * "wake_up" kick, check the updated "done" value and return. */ - while (dev_data->valid_eventfd && atomic_read(&((struct virtio_neg *)(dev_data->info->data))->done) != 1) - wait_event_timeout(dev_data->wq, atomic_read(&((struct virtio_neg *)(dev_data->info->data))->done) == 1, 1 * HZ); + while (dev_data->valid_eventfd && + atomic_read(&((struct virtio_neg *)(dev_data->info->data))->done) != 1) + wait_event_timeout(dev_data->wq, + atomic_read(&((struct virtio_neg *)(dev_data->info->data))->done) == 1, + 1 * HZ); mutex_unlock(&(dev_data)->read_write_lock); } diff --git a/virtio_loopback_driver.c b/virtio_loopback_driver.c index d822a3e..4284f0a 100644 --- a/virtio_loopback_driver.c +++ b/virtio_loopback_driver.c @@ -1,7 +1,22 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* + * Virtio loopback device driver + * * Copyright 2022-2024 Virtual Open Systems SAS * + * Authors: + * Timos Ampelikiotis <t.ampelikiotis@virtualopensystems.com> + * Anna Panagopoulou <anna@virtualopensystems.com> + * Alvise Rigo <a.rigo@virtualopensystems.com> + * + * This module allows virtio devices to be used in a non-virtualized + * environment, coupled with vhost-user device (user-space drivers). + * + * This module is responsible to assign the virtio-loopback transport driver + * to a group of virtio drivers in order to be able to share notifications and + * the vrings (without copies) with the corresponding vhost-user devices in + * the user-space. + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -11,10 +26,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #define pr_fmt(fmt) "virtio-loopback: " fmt @@ -23,46 +34,43 @@ #include "virtio_loopback_driver.h" /* Features */ -MODULE_LICENSE("GPL v2"); +MODULE_LICENSE("GPL"); /* The global data for the loopback */ -struct loopback_device_data loopback_data; -struct loopback_devices_array loopback_devices; +static struct loopback_device_data loopback_data; +static struct loopback_devices_array loopback_devices; /* - * This functions registers all mmap calls done by the user-space into an array + * This function registers all mmap calls done by the user-space into an array */ -static void add_share_mmap(struct file *filp, uint64_t pfn, uint64_t vm_start, uint64_t size) +static void add_share_mmap(struct file *filp, uint64_t pfn, + uint64_t vm_start, uint64_t size) { - struct file_priv_data *file_data = (struct file_priv_data *)(filp->private_data); + struct file_priv_data *file_data = + (struct file_priv_data *)(filp->private_data); struct mmap_data *mm_data = (struct mmap_data *)file_data->mm_data; - pr_debug("Add new mmaping! index: %d\n", mm_data->mmap_index); - pr_debug("pfn: 0x%llx", pfn); - pr_debug("vm_start: 0x%llx", vm_start); - pr_debug("size: 0x%llx", size); - mm_data->share_mmap_list[mm_data->mmap_index].pfn = pfn; mm_data->share_mmap_list[mm_data->mmap_index].vm_start = vm_start; mm_data->share_mmap_list[mm_data->mmap_index].size = size; - mm_data->share_mmap_list[mm_data->mmap_index].uid = task_pid_nr(current); + mm_data->share_mmap_list[mm_data->mmap_index].uid = + task_pid_nr(current); mm_data->mmap_index++; } /* - * This functions removes a record from mmap array + * This function removes a record from mmap array */ static void share_mmap_rem(struct vm_area_struct *vma) { struct file *file = vma->vm_file; - struct file_priv_data *file_data = (struct file_priv_data *)(file->private_data); + struct file_priv_data *file_data = + (struct file_priv_data *)(file->private_data); struct mmap_data *mm_data = (struct mmap_data *)file_data->mm_data; int i; for (i = 0; i < MMAP_LIMIT; i++) { if (mm_data->share_mmap_list[i].vm_start == vma->vm_start) { - pr_debug("share_mmap with pa: 0x%llx and size: %x is deleted from the list\n", - mm_data->share_mmap_list[i].pfn, mm_data->share_mmap_list[i].size); mm_data->share_mmap_list[i].uid = 0; mm_data->share_mmap_list[i].pfn = 0; mm_data->share_mmap_list[i].vm_start = 0; @@ -73,87 +81,130 @@ static void share_mmap_rem(struct vm_area_struct *vma) static void print_mmap_idx(struct mmap_data *mm_data, int i) { - pr_debug("share_mmap_list[%d].uid %x\n", i, mm_data->share_mmap_list[i].uid); - pr_debug("share_mmap_list[%d].pfn %llx\n", i, mm_data->share_mmap_list[i].pfn); - pr_debug("share_mmap_list[%d].vm_start %llx\n", i, mm_data->share_mmap_list[i].vm_start); - pr_debug("share_mmap_list[%d].size %x\n", i, mm_data->share_mmap_list[i].size); + pr_debug("share_mmap_list[%d].uid %x\n", i, + mm_data->share_mmap_list[i].uid); + pr_debug("share_mmap_list[%d].pfn %llx\n", i, + mm_data->share_mmap_list[i].pfn); + pr_debug("share_mmap_list[%d].vm_start %llx\n", i, + mm_data->share_mmap_list[i].vm_start); + pr_debug("share_mmap_list[%d].size %x\n", i, + mm_data->share_mmap_list[i].size); } +/** + * print_mmaps - Debug function to print details of all active mmap entries + * @mm_data: Pointer to the mmap_data structure containing mmap details + * + * This function iterates through the `share_mmap_list` array in the given + * `mm_data` structure and logs the details of each active mmap entry by + * calling `print_mmap_idx`. The number of entries printed is determined as: + * - `MMAP_LIMIT` if `mmap_index` is `0`. + * - The value of `mmap_index` otherwise. + * + * Note: + * - The function uses `pr_debug` for logging, so enable debugging to see + * the output. + * - Ensure that `mm_data` is properly initialized before calling this + * function to avoid accessing invalid memory. + */ + static void print_mmaps(struct mmap_data *mm_data) { - int i, limit = mm_data->mmap_index == 0 ? MMAP_LIMIT : mm_data->mmap_index; + int i, limit = + mm_data->mmap_index == 0 ? MMAP_LIMIT : mm_data->mmap_index; for (i = 0; i < limit; i++) print_mmap_idx(mm_data, i); } -/* - * This function return the corresponding user-space address of a pfn - * based on the mapping done during the initialization +/** + * share_mmap_exist_vma_return_correct_pfn - Calculate corrected PFN for a + * given address. + * @mm_data: Pointer to struct containing memory mapping data + * @addr: Address for which to calculate the corrected PFN + * + * This function iterates through the list of shared memory mappings in + * `mm_data` and checks if the given `addr` lies within any of the mappings. + * If it does, it computes the corrected PFN based on the mapping's start + * address, size, and PFN. + * + * Returns: + * - The corrected PFN if the address falls within a mapping. + * - 0 if the address does not match any mapping. */ -static uint64_t share_mmap_exist_vma_return_correct_pfn(struct mmap_data *mm_data, uint64_t addr) +static uint64_t share_mmap_exist_vma_return_correct_pfn( + struct mmap_data *mm_data, + uint64_t addr) { int i; uint64_t corrected_pfn; for (i = 0; i < MMAP_LIMIT; i++) { if ((mm_data->share_mmap_list[i].vm_start <= addr) && - (addr < mm_data->share_mmap_list[i].vm_start + mm_data->share_mmap_list[i].size)) { - pr_debug("addr (0x%llx) exist in: 0x%llx - 0x%llx\n", addr, mm_data->share_mmap_list[i].vm_start, - mm_data->share_mmap_list[i].vm_start + mm_data->share_mmap_list[i].size); - pr_debug("((addr - share_mmap_list[i].vm_start) / PAGE_SIZE): 0x%llx\n", - ((addr - mm_data->share_mmap_list[i].vm_start) / PAGE_SIZE)); - pr_debug("share_mmap_list[i].pfn: 0x%llx\n", mm_data->share_mmap_list[i].pfn); - corrected_pfn = ((addr - mm_data->share_mmap_list[i].vm_start) / PAGE_SIZE) + mm_data->share_mmap_list[i].pfn; + (addr < mm_data->share_mmap_list[i].vm_start + + mm_data->share_mmap_list[i].size)) { + corrected_pfn = ((addr - + mm_data->share_mmap_list[i].vm_start) + / PAGE_SIZE) + + mm_data->share_mmap_list[i].pfn; return corrected_pfn; } } return 0; } -static void pf_mmap_close(struct vm_area_struct *vma) -{ - pr_debug("unmap\t-> vma->vm_start: 0x%lx\n", vma->vm_start); - pr_debug("unmap\t-> size: %lu\n", vma->vm_end - vma->vm_start); - share_mmap_rem(vma); -} - +/** + * pf_mmap_fault - Handle page faults for the device mmap area + * @vmf: Pointer to the `vm_fault` structure containing fault information + * + * This function is called during a page fault to find and insert the correct + * page for the faulting address. It calculates the corrected PFN using the + * provided mmap data of the device and updates the faulting page. + * + * Returns: + * - 0 if successful. + * - `VM_FAULT_SIGBUS` on failure. + */ static vm_fault_t pf_mmap_fault(struct vm_fault *vmf) { uint64_t corrected_pfn; pfn_t corr_pfn_struct; struct page *page; - int ret = 0; struct file *file = vmf->vma->vm_file; - struct file_priv_data *file_data = (struct file_priv_data *)(file->private_data); - struct mmap_data *mm_data = (struct mmap_data *)file_data->mm_data; + struct file_priv_data *file_data = + (struct file_priv_data *)(file->private_data); + struct mmap_data *mm_data = + (struct mmap_data *)file_data->mm_data; - pr_debug("----- Page fault: %lld -----\n", mm_data->sum_pgfaults); + /* Count the total number of page_faults for debugging purpose */ mm_data->sum_pgfaults++; /* Find the corrected pfn */ - corrected_pfn = share_mmap_exist_vma_return_correct_pfn(mm_data, vmf->address); + corrected_pfn = share_mmap_exist_vma_return_correct_pfn(mm_data, + vmf->address); corr_pfn_struct.val = corrected_pfn; - /* Some debug prints */ - pr_debug("vma->vm_start: 0x%lx\n", vmf->vma->vm_start); - pr_debug("vma->vm_pgoff: 0x%lx\n", vmf->vma->vm_pgoff); - pr_debug("vmf->address: 0x%lx\n", vmf->address); - pr_debug("corrected_pfn: 0x%llx\n", corrected_pfn); - pr_debug("pfn_valid(corrected_pfn): 0x%x\n", pfn_valid(corrected_pfn)); - - BUG_ON(!pfn_valid(corrected_pfn)); + /* Ensure the PFN is valid */ + if (unlikely(!pfn_valid(corrected_pfn))) { + pr_err("Invalid PFN: %llu\n", corrected_pfn); + return VM_FAULT_SIGBUS; + } /* After finding the page, correct the vmf->page */ page = pfn_to_page(corrected_pfn); - BUG_ON(!virt_addr_valid(page_address(page))); + if (unlikely(!virt_addr_valid(page_address(page)))) { + pr_err("Invalid page address for PFN: %llu\n", corrected_pfn); + return VM_FAULT_SIGBUS; + } /* Insert the correct page */ - ret = vmf_insert_pfn(vmf->vma, vmf->address, corrected_pfn); - pr_debug("vmf_insert_pfn -> ret: %d\n", ret); + return vmf_insert_pfn(vmf->vma, vmf->address, corrected_pfn); +} - return ret; +static void pf_mmap_close(struct vm_area_struct *vma) +{ + share_mmap_rem(vma); } const struct vm_operations_struct pf_mmap_ops = { @@ -161,12 +212,26 @@ const struct vm_operations_struct pf_mmap_ops = { .fault = pf_mmap_fault, }; +/** + * pf_mmap_vm_page - Set up memory mapping for a file + * @filp: Pointer to the file structure for the mapping + * @vma: Pointer to the VM area structure representing the memory mapping + * + * This function sets up a user-space area by associating a physical frame + * number (PFN) with the virtual address range. It updates internal data + * structures to track the mapping and sets appropriate VM flags. + * + * Returns: + * - 0 on success. + * - Negative error code on failure. + */ static int pf_mmap_vm_page(struct file *filp, struct vm_area_struct *vma) { uint64_t size = (unsigned long)(vma->vm_end - vma->vm_start); - struct file_priv_data *file_data = (struct file_priv_data *)(filp->private_data); + struct file_priv_data *file_data = + (struct file_priv_data *)(filp->private_data); struct mmap_data *mm_data = (struct mmap_data *)file_data->mm_data; - uint64_t pfn = ((mm_data->cur_ram_idx++) * 0x40000); + uint64_t pfn = ((mm_data->cur_ram_idx++) * (size >> PAGE_SHIFT)); #if LINUX_VERSION_CODE < KERNEL_VERSION(6, 3, 0) vma->vm_flags |= VM_PFNMAP; @@ -177,15 +242,32 @@ static int pf_mmap_vm_page(struct file *filp, struct vm_area_struct *vma) return 0; } +/** + * mmap_vqs_com_struct - Map virtqueue or communication structure to user space + * @filp: Pointer to the file structure associated with the mapping + * @vma: Pointer to the VM area structure describing the memory region + * + * This function maps either the virtqueue data or the communication structure + * to the user space using `remap_pfn_range`. The choice of what to map depends + * on the `share_communication_struct` flag in the mmap data structure. + * + * Returns: + * - 0 on success. + * - Negative error code on failure. + */ static int mmap_vqs_com_struct(struct file *filp, struct vm_area_struct *vma) { int ret = 0; unsigned long size = (unsigned long)(vma->vm_end - vma->vm_start); - struct file_priv_data *file_data = (struct file_priv_data *)(filp->private_data); - struct device_data *dev_data = (struct device_data *)file_data->dev_data; + struct file_priv_data *file_data = + (struct file_priv_data *)(filp->private_data); + struct device_data *dev_data = + (struct device_data *)file_data->dev_data; struct mmap_data *mmap_data = (struct mmap_data *)file_data->mm_data; - struct mmap_info *com_mmap_virt = (struct mmap_info *)(file_data->dev_data->info)->data; - uint64_t com_mmap_pfn = ((uint64_t)virt_to_phys(com_mmap_virt)) >> PAGE_SHIFT; + struct mmap_info *com_mmap_virt = + (struct mmap_info *)(file_data->dev_data->info)->data; + uint64_t com_mmap_pfn = + ((uint64_t)virt_to_phys(com_mmap_virt)) >> PAGE_SHIFT; uint64_t starting_pfn; if (mmap_data->share_communication_struct) { @@ -201,68 +283,79 @@ static int mmap_vqs_com_struct(struct file *filp, struct vm_area_struct *vma) starting_pfn = dev_data->vq_data.vq_pfn; } - ret = remap_pfn_range(vma, vma->vm_start, starting_pfn, size, vma->vm_page_prot); + ret = remap_pfn_range(vma, vma->vm_start, starting_pfn, size, + vma->vm_page_prot); if (ret != 0) { pr_err("Mmap error\n"); print_mmaps(mmap_data); - goto out; + } else { + add_share_mmap(filp, starting_pfn, vma->vm_start, size); } - add_share_mmap(filp, starting_pfn, vma->vm_start, size); - -out: return ret; } +/** + * op_mmap - Map vring buffers, virtqueue or communication structure + * to user space. + * @filp: Pointer to the file structure associated with the mapping + * @vma: Pointer to the VM area structure describing the memory region + * + * This function checks if the incoming mmap sys_call is related to a) vrings + * or b) virtqueues / communication structure data (depending on + * `share_communication_struct` and `share_vqs` variables. Then calls + * `mmap_vqs_com_struct` and `pf_mmap_vm_page` correspondingly in order + * to apply a different mapping logic. + * + * Returns: + * - 0 on success. + * - Negative error code on failure. + */ static int op_mmap(struct file *filp, struct vm_area_struct *vma) { - struct file_priv_data *file_data = (struct file_priv_data *)(filp->private_data); + struct file_priv_data *file_data = + (struct file_priv_data *)(filp->private_data); struct mmap_data *mmap_data = (struct mmap_data *)file_data->mm_data; int ret = 0; - pr_debug("MMAP SYS_CALL -> vma->vm_pgoff: 0x%lx", vma->vm_pgoff); vma->vm_ops = &pf_mmap_ops; - if (mmap_data->share_communication_struct || mmap_data->share_vqs) { + if (mmap_data->share_communication_struct || mmap_data->share_vqs) ret = mmap_vqs_com_struct(filp, vma); - goto out; - } - - ret = pf_mmap_vm_page(filp, vma); + else + ret = pf_mmap_vm_page(filp, vma); -out: return ret; } -/* Defined for future work */ static ssize_t loopback_write(struct file *file, - const char __user *user_buffer, - size_t size, - loff_t *offset) + const char __user *user_buffer, + size_t size, + loff_t *offset) { ssize_t len = sizeof(int); - pr_debug("loopback write function\n"); if (len <= 0) return 0; return len; } -/* Defined for future work */ static ssize_t loopback_read(struct file *file, - char __user *user_buffer, - size_t size, loff_t *offset) + char __user *user_buffer, + size_t size, loff_t *offset) { - pr_debug("loopback read function\n"); return 0; } +/* + * The lseek sys_call is needed only by the vhost-user device + * located in vhost-device crate. + */ static loff_t loopback_seek(struct file *file, loff_t offset, int whence) { loff_t new_pos; - pr_debug("loopback seek function!\n"); switch (whence) { case SEEK_SET: new_pos = offset; @@ -288,13 +381,14 @@ static int register_virtio_loopback_dev(uint32_t device_id) struct platform_device *pdev; int err = 0; - pr_info("Received request to register a new virtio-loopback-dev\n"); + pr_info("Received request to register a new loopback transport\n"); /* Register a new loopback-transport device */ - pdev = platform_device_register_simple("loopback-transport", device_id, NULL, 0); + pdev = platform_device_register_simple("loopback-transport", + device_id, NULL, 0); if (IS_ERR(pdev)) { err = PTR_ERR(pdev); - pr_err("failed to register loopback-transport device: %d\n", err); + pr_err("Failed to register transport device: %d\n", err); } return err; @@ -304,15 +398,14 @@ static int register_virtio_loopback_dev(uint32_t device_id) int insert_entry_data(struct virtio_loopback_device *vl_dev, int id) { int err = 0; - /* Read and that value atomically */ + /* Read that value atomically */ uint32_t max_used_dev_idx = atomic_read(&loopback_devices.device_num); /* Store the new vl_dev */ - if ((id <= MAX_PDEV) && (max_used_dev_idx < MAX_PDEV)) { + if ((id <= MAX_PDEV) && (max_used_dev_idx < MAX_PDEV)) loopback_devices.devices[id] = vl_dev; - } else { + else err = -ENOMEM; - } /* Mark the request as completed and free registration */ complete(&loopback_devices.reg_vl_dev_completion[id]); @@ -320,7 +413,8 @@ int insert_entry_data(struct virtio_loopback_device *vl_dev, int id) } /* Helper function to mark an entry as active */ -static struct virtio_loopback_device *activate_entry_data(struct device_data *data, uint32_t curr_dev_id) +static struct virtio_loopback_device * +activate_entry_data(struct device_data *data, uint32_t curr_dev_id) { struct virtio_loopback_device *vl_dev = NULL; @@ -334,36 +428,83 @@ static struct virtio_loopback_device *activate_entry_data(struct device_data *da return vl_dev; } -static int start_loopback(struct file_priv_data *file_data, uint32_t curr_dev_id) +static int start_loopback(struct file_priv_data *file_data, + uint32_t curr_dev_id) { struct virtio_loopback_device *vl_dev; - int rc; + int ret; /* Activate the entry */ vl_dev = activate_entry_data(file_data->dev_data, curr_dev_id); if (vl_dev) { file_data->vl_dev_irq = vl_dev; /* Register the activated vl_dev in the system */ - rc = loopback_register_virtio_dev(vl_dev); + ret = loopback_register_virtio_dev(vl_dev); } else { pr_debug("No available entry found!\n"); file_data->vl_dev_irq = NULL; - rc = -EFAULT; + ret = -EFAULT; } - return rc; + return ret; } -static long loopback_ioctl(struct file *file, - unsigned int cmd, unsigned long arg) +/** + * loopback_ioctl - Handle various ioctl commands for loopback device + * @file: Pointer to the file structure associated with the device + * @cmd: The ioctl command code + * @arg: User-space argument associated with the command + * + * This function processes various ioctl commands to configure and control the + * loopback device. The supported commands include: + * + * - `EFD_INIT`: The user-space adapter component shares an eventfd with the + * loopback device. This eventfd is triggered by the device each time a + * read / write operation is requested via the communication data structure. + * + * - `WAKEUP`: Sets a flag in the device's internal structure and wakes up any + * read / write process waiting on the communication wait queue. + * + * - `START_LOOPBACK`: Registers and starts a new loopback device, assigning a + * unique device ID and waiting for its probe function to complete before + * returning to user space. + * + * - `IRQ`: Handles an interrupt request by triggering the device's interrupt + * logic with the provided IRQ number. + * + * - `SHARE_VQS`: Shares a specified virtqueue (selected via a queue index) + * between the user-space application and the loopback device. + * + * - `SHARE_COM_STRUCT`: Notifies the loopback-device that the next mmap call + * will request the communication structure to be as shared between + * user-space and the loopback device. + * + * - `SHARE_VQS_NOTIF`: The user-space uses this command to share the eventfd + * associated with a specific virtqueue. This eventfd will be triggered each + * time the virtio device calls the `notify` function. In this way the + * by-pass the user-space adapter component and delivered directly to the + * vhost-user devices in user-space. + * + * If an unknown `cmd` is provided, the function logs an error and returns + * `-ENOTTY` to indicate an unsupported ioctl command. + * + * Returns: + * - `0` on success. + * - Negative error codes (`-EFAULT`, `-ENOTTY`, or others) on failure. + */ +static long loopback_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) { struct efd_data efd_data; int irq, err; uint32_t queue_sel; - struct file_priv_data *file_data = (struct file_priv_data *)(file->private_data); + struct file_priv_data *file_data = + (struct file_priv_data *)(file->private_data); struct mmap_data *mm_data = (struct mmap_data *)file_data->mm_data; - struct device_data *dev_data = (struct device_data *)file_data->dev_data; + struct device_data *dev_data = + (struct device_data *)file_data->dev_data; uint32_t curr_avail_dev_id; + struct vq_notifier vq_notifier; switch (cmd) { case EFD_INIT: { @@ -378,12 +519,12 @@ static long loopback_ioctl(struct file *file, rcu_read_lock(); #if LINUX_VERSION_CODE < KERNEL_VERSION(5, 11, 220) - efd_file = fcheck_files(userspace_task->files, efd_data.efd[0]); + efd_file = fcheck_files(userspace_task->files, efd_data.efd[0]); #else #if LINUX_VERSION_CODE < KERNEL_VERSION(6, 7, 0) - efd_file = files_lookup_fd_rcu(userspace_task->files, efd_data.efd[0]); + efd_file = files_lookup_fd_rcu(userspace_task->files, efd_data.efd[0]); #else - efd_file = files_lookup_fd_raw(userspace_task->files, efd_data.efd[0]); + efd_file = files_lookup_fd_raw(userspace_task->files, efd_data.efd[0]); #endif #endif rcu_read_unlock(); @@ -395,25 +536,32 @@ static long loopback_ioctl(struct file *file, break; } case WAKEUP: { - atomic_set(&((struct virtio_neg *)(dev_data->info->data))->done, 1); + atomic_set(&((struct virtio_neg *)(dev_data->info->data))->done, + 1); wake_up(&(dev_data)->wq); break; } case START_LOOPBACK: { - if (copy_from_user(&(file_data)->device_info, (struct virtio_device_info_struct *) arg, - sizeof(struct virtio_device_info_struct))) + if (copy_from_user(&(file_data)->device_info, + (struct virtio_device_info_struct *) arg, + sizeof(struct virtio_device_info_struct))) return -EFAULT; /* Read and increase that value atomically */ - curr_avail_dev_id = atomic_add_return(1, &loopback_devices.device_num) - 1; + curr_avail_dev_id = + atomic_add_return(1, &loopback_devices.device_num) - 1; /* Register a new loopback device */ err = register_virtio_loopback_dev(curr_avail_dev_id); if (err) return -EFAULT; - /* Wait for probe function to be called before return control to user-space app */ - wait_for_completion(&loopback_devices.reg_vl_dev_completion[curr_avail_dev_id]); + /* + * Wait for probe function to be called before return control + * to user-space app + */ + wait_for_completion( + &loopback_devices.reg_vl_dev_completion[curr_avail_dev_id]); /* Start the loopback */ err = start_loopback(file_data, curr_avail_dev_id); @@ -425,27 +573,55 @@ static long loopback_ioctl(struct file *file, case IRQ: if (copy_from_user(&irq, (int *) arg, sizeof(int))) return -EFAULT; - pr_debug("\nIRQ\n"); /* * Both of the interrupt ways work but a) is more stable * and b) has better performance: * a) vl_interrupt(NULL); * b) queue_work(interrupt_workqueue, &async_interrupt); */ - /* Call the function */ vl_interrupt(file_data->vl_dev_irq, irq); break; case SHARE_VQS: - if (copy_from_user(&queue_sel, (uint32_t *) arg, sizeof(uint32_t))) + if (copy_from_user(&queue_sel, (uint32_t *) arg, + sizeof(uint32_t))) return -EFAULT; - pr_debug("\n\nSHARE_VQS: %u\n\n", queue_sel); dev_data->vq_data.vq_pfn = dev_data->vq_data.vq_pfns[queue_sel]; - pr_debug("Selected pfn is: 0x%llx", dev_data->vq_data.vq_pfn); mm_data->share_vqs = true; break; case SHARE_COM_STRUCT: mm_data->share_communication_struct = true; break; + case SHARE_VQS_NOTIF: + + struct task_struct *userspace_task; + struct file *efd_file; + + if (copy_from_user(&vq_notifier, (struct vq_notifier *) arg, + sizeof(struct vq_notifier))) + return -EFAULT; + + userspace_task = + pid_task(find_vpid(vq_notifier.pid), PIDTYPE_PID); + + rcu_read_lock(); +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 11, 220) + efd_file = fcheck_files(userspace_task->files, vq_notifier.notifier_fd); +#else +#if LINUX_VERSION_CODE < KERNEL_VERSION(6, 7, 0) + efd_file = files_lookup_fd_rcu(userspace_task->files, vq_notifier.notifier_fd); +#else + efd_file = files_lookup_fd_raw(userspace_task->files, vq_notifier.notifier_fd); +#endif +#endif + rcu_read_unlock(); + + dev_data->vq_data.vq_notifiers[vq_notifier.vq_index] = + eventfd_ctx_fileget(efd_file); + if (!dev_data->vq_data.vq_notifiers[vq_notifier.vq_index]) + return -1; + /* Mark device notifiers as enabled */ + dev_data->vq_data.vq_notifiers_enabled = true; + break; default: pr_err("Unknown loopback ioctl: %u\n", cmd); return -ENOTTY; @@ -456,13 +632,15 @@ static long loopback_ioctl(struct file *file, static int loopback_open(struct inode *inode, struct file *file) { - uint32_t val_1gb = 1024 * 1024 * 1024; // 1GB + uint32_t val_1gb = 1024 * 1024 * 1024; struct virtio_neg device_neg = {.done = ATOMIC_INIT(0)}; /* Allocate file private data */ - struct file_priv_data *file_data = kmalloc(sizeof(struct file_priv_data), - GFP_KERNEL); - struct device_data *dev_data = kmalloc(sizeof(struct device_data), GFP_KERNEL); - struct mmap_data *mm_data = kmalloc(sizeof(struct mmap_data), GFP_KERNEL); + struct file_priv_data *file_data = + kmalloc(sizeof(struct file_priv_data), GFP_KERNEL); + struct device_data *dev_data = + kmalloc(sizeof(struct device_data), GFP_KERNEL); + struct mmap_data *mm_data = + kmalloc(sizeof(struct mmap_data), GFP_KERNEL); if (!file_data || !dev_data || !mm_data) goto error_kmalloc; @@ -486,6 +664,7 @@ static int loopback_open(struct inode *inode, struct file *file) /* Init vq_data */ dev_data->vq_data.vq_index = 0; dev_data->valid_eventfd = true; + dev_data->vq_data.vq_notifiers_enabled = false; file_data->dev_data = dev_data; /* Init file mmap_data */ @@ -510,11 +689,13 @@ error_kmalloc: static int loopback_release(struct inode *inode, struct file *file) { - struct file_priv_data *file_data = (struct file_priv_data *)(file->private_data); - struct device_data *dev_data = (struct device_data *)file_data->dev_data; + struct file_priv_data *file_data = + (struct file_priv_data *)(file->private_data); + struct device_data *dev_data = + (struct device_data *)file_data->dev_data; struct mmap_data *mm_data = (struct mmap_data *)file_data->mm_data; - pr_info("Release the device\n"); + pr_info("Releasing the device\n"); /* * This makes the read/write do not wait * for the virtio-loopback-adapter if @@ -527,7 +708,7 @@ static int loopback_release(struct inode *inode, struct file *file) /* Cancel any pending work */ cancel_work_sync(&file_data->vl_dev_irq->notify_work); /* Continue with the vl_dev unregister */ - virtio_loopback_remove(file_data->vl_dev_irq->pdev); + virtio_loopback_driver.remove(file_data->vl_dev_irq->pdev); file_data->vl_dev_irq = NULL; } /* Subsequently free the dev_data */ @@ -552,7 +733,7 @@ static const struct file_operations fops = { .read = loopback_read, .write = loopback_write, .open = loopback_open, - .unlocked_ioctl = loopback_ioctl, + .unlocked_ioctl = loopback_ioctl, .mmap = op_mmap, .llseek = loopback_seek, .release = loopback_release @@ -568,9 +749,9 @@ static int __init loopback_init(void) /* Set-up the loopback_data */ loopback_data.dev_major = MAJOR(dev); #if LINUX_VERSION_CODE < KERNEL_VERSION(6, 4, 0) - loopback_data.class = class_create(THIS_MODULE, "loopback"); + loopback_data.class = class_create(THIS_MODULE, "loopback"); #else - loopback_data.class = class_create("loopback"); + loopback_data.class = class_create("loopback"); #endif if (IS_ERR(loopback_data.class)) { pr_err("Failed to create class\n"); @@ -579,10 +760,8 @@ static int __init loopback_init(void) cdev_init(&loopback_data.cdev, &fops); loopback_data.cdev.owner = THIS_MODULE; cdev_add(&loopback_data.cdev, MKDEV(loopback_data.dev_major, 0), 1); - device_create(loopback_data.class, NULL, MKDEV(loopback_data.dev_major, 0), - NULL, "loopback"); - /* Create the workqueues of the loopback driver */ - loopback_data.notify_workqueue = create_singlethread_workqueue("notify_workqueue"); + device_create(loopback_data.class, NULL, + MKDEV(loopback_data.dev_major, 0), NULL, "loopback"); /* Register virtio_loopback_transport */ (void)platform_driver_register(&virtio_loopback_driver); @@ -600,14 +779,16 @@ static int __init loopback_init(void) static void __exit loopback_exit(void) { int i; - uint32_t max_used_device_num = atomic_read(&loopback_devices.device_num); + uint32_t max_used_device_num = + atomic_read(&loopback_devices.device_num); - pr_info("Exit driver!\n"); + pr_info("Exit virtio_loopback driver!\n"); - /* Unregister loopback device */ + /* Unregister loopback devices */ for (i = 0; i < max_used_device_num; i++) if (loopback_devices.devices[i]) - platform_device_unregister(loopback_devices.devices[i]->pdev); + platform_device_unregister( + loopback_devices.devices[i]->pdev); /* Unregister virtio_loopback_transport */ platform_driver_unregister(&virtio_loopback_driver); @@ -619,10 +800,6 @@ static void __exit loopback_exit(void) pr_debug("device_destroy!\n"); class_destroy(loopback_data.class); pr_debug("class_destroy!\n"); - - /* Destroy the notify workqueue */ - flush_workqueue(loopback_data.notify_workqueue); - destroy_workqueue(loopback_data.notify_workqueue); } module_init(loopback_init); diff --git a/virtio_loopback_driver.h b/virtio_loopback_driver.h index d0a744b..57e2ce5 100644 --- a/virtio_loopback_driver.h +++ b/virtio_loopback_driver.h @@ -1,7 +1,14 @@ -// SPDX-License-Identifier: GPL-2.0-or-later +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* + * Virtio loopback device driver + * * Copyright 2022-2024 Virtual Open Systems SAS. * + * Authors: + * Timos Ampelikiotis <t.ampelikiotis@virtualopensystems.com> + * Anna Panagopoulou <anna@virtualopensystems.com> + * Alvise Rigo <a.rigo@virtualopensystems.com> + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -11,11 +18,8 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + #ifndef __LOOPBACK_H__ #define __LOOPBACK_H__ @@ -45,7 +49,7 @@ #include <linux/spinlock.h> #include <linux/virtio.h> #include <linux/virtio_config.h> -#include <uapi/linux/virtio_mmio.h> +#include <linux/virtio_mmio.h> #include <linux/virtio_ring.h> #include <linux/kernel.h> @@ -55,7 +59,6 @@ #include <linux/kthread.h> /* mmap includes */ -#include <linux/kernel.h> #include <linux/fs.h> #include <linux/device.h> #include <linux/mutex.h> @@ -77,8 +80,8 @@ */ #define VIRTIO_MMIO_VRING_ALIGN PAGE_SIZE -#define to_virtio_loopback_device(ptr, field) \ - container_of(ptr, struct virtio_loopback_device, field) +#define to_virtio_loopback_device(ptr) \ + container_of(ptr, struct virtio_loopback_device, vdev) /* mmap functionality */ #ifndef VM_RESERVED @@ -91,7 +94,8 @@ #define START_LOOPBACK _IOC(_IOC_WRITE, 'k', 3, sizeof(struct virtio_device_info_struct)) #define IRQ _IOC(_IOC_WRITE, 'k', 4, sizeof(int)) #define SHARE_VQS _IOC(_IOC_WRITE, 'k', 5, sizeof(uint32_t)) -#define SHARE_COM_STRUCT _IOC(_IOC_WRITE, 'k', 7, 0) +#define SHARE_COM_STRUCT _IOC(_IOC_WRITE, 'k', 6, 0) +#define SHARE_VQS_NOTIF _IOC(_IOC_WRITE, 'k', 7, sizeof(struct vq_notifier)) /* Data structures */ struct virtio_device_info_struct { @@ -126,11 +130,19 @@ struct mmap_data { uint64_t sum_pgfaults; }; +struct vq_notifier { + uint32_t vq_index; + int notifier_fd; + int pid; +}; + /* vq related data */ struct vq_data { uint32_t vq_index; uint64_t vq_pfns[16]; uint64_t vq_pfn; + struct eventfd_ctx *vq_notifiers[16]; + bool vq_notifiers_enabled; }; /* Data describing each device private status */ @@ -190,6 +202,9 @@ struct virtio_loopback_device { spinlock_t lock; struct list_head virtqueues; + /* Define workqueue for notifications */ + struct workqueue_struct *notify_workqueue; + /* Notify list and work struct */ spinlock_t notify_q_lock; struct list_head notify_list; @@ -231,23 +246,14 @@ struct loopback_device_data { /* sysfs class structure */ struct class *class; struct cdev cdev; - /* Define workqueue for notifications */ - struct workqueue_struct *notify_workqueue; }; /* Global variables */ -extern struct loopback_device_data loopback_data; -extern struct loopback_devices_array loopback_devices; extern struct platform_driver virtio_loopback_driver; /* Global functions */ int insert_entry_data(struct virtio_loopback_device *vl_dev, int id); int loopback_register_virtio_dev(struct virtio_loopback_device *vl_dev); bool vl_interrupt(struct virtio_loopback_device *vl_dev, int irq); -#if LINUX_VERSION_CODE > KERNEL_VERSION(6, 10, 8) -void virtio_loopback_remove(struct platform_device *pdev); -#else -int virtio_loopback_remove(struct platform_device *pdev); -#endif #endif /* __LOOPBACK_H__ */ |