diff options
Diffstat (limited to 'vhost_user_loopback.c')
-rw-r--r-- | vhost_user_loopback.c | 800 |
1 files changed, 800 insertions, 0 deletions
diff --git a/vhost_user_loopback.c b/vhost_user_loopback.c new file mode 100644 index 0000000..dec0186 --- /dev/null +++ b/vhost_user_loopback.c @@ -0,0 +1,800 @@ +/* + * Based on libvhost-user.c of Qemu project + * + * Copyright IBM, Corp. 2007 + * Copyright (c) 2016 Red Hat, Inc. + * + * Authors: + * Anthony Liguori <aliguori@us.ibm.com> + * Marc-André Lureau <mlureau@redhat.com> + * Victor Kaplansky <victork@redhat.com> + * + * Copyright 2022 Virtual Open Systems SAS. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#include <unistd.h> +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include <sys/eventfd.h> +#include <fcntl.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <pthread.h> +#include <stdbool.h> +#include <sys/param.h> +#include <assert.h> + +/* For socket */ +#include <errno.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/un.h> + +/* Project header files */ +#include "virtio_loopback.h" +#include "vhost_user_loopback.h" +#include "event_notifier.h" + +#ifdef DEBUG +#define DBG(...) printf("vhost-user-loopback: " __VA_ARGS__) +#else +#define DBG(...) +#endif /* DEBUG */ + + +bool vhost_user_one_time_request(VhostUserRequest request) +{ + switch (request) { + case VHOST_USER_SET_OWNER: + case VHOST_USER_RESET_OWNER: + case VHOST_USER_SET_MEM_TABLE: + case VHOST_USER_GET_QUEUE_NUM: + case VHOST_USER_NET_SET_MTU: + return true; + default: + return false; + } +} + + +void vmsg_close_fds(VhostUserMsg *vmsg) +{ + int i; + + for (i = 0; i < vmsg->fd_num; i++) { + close(vmsg->fds[i]); + } +} + + +bool vu_message_write(int conn_fd, VhostUserMsg *vmsg) +{ + int rc; + uint8_t *p = (uint8_t *)vmsg; + size_t fdsize; + char control[CMSG_SPACE(VHOST_MEMORY_BASELINE_NREGIONS * sizeof(int))] = {}; + struct iovec iov = { + .iov_base = (char *)vmsg, + .iov_len = VHOST_USER_HDR_SIZE, + }; + + struct msghdr msg = { + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_control = control, + }; + struct cmsghdr *cmsg; + + if (vhost_user_one_time_request(vmsg->request) && dev->vq_index != 0) { + vmsg->flags &= ~VHOST_USER_NEED_REPLY_MASK; + return 0; + } + + memset(control, 0, sizeof(control)); + if (vmsg->fd_num > 0) { + fdsize = vmsg->fd_num * sizeof(int); + msg.msg_controllen = CMSG_SPACE(fdsize); + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_len = CMSG_LEN(fdsize); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + memcpy(CMSG_DATA(cmsg), vmsg->fds, fdsize); + } else { + msg.msg_controllen = 0; + } + + do { + rc = sendmsg(conn_fd, &msg, 0); + } while (rc < 0 && (errno == EINTR || errno == EAGAIN)); + + if (vmsg->size) { + do { + if (vmsg->data) { + rc = write(conn_fd, vmsg->data, vmsg->size); + } else { + rc = write(conn_fd, p + VHOST_USER_HDR_SIZE, vmsg->size); + } + } while (rc < 0 && (errno == EINTR || errno == EAGAIN)); + } + + if (rc <= 0) { + DBG("Error while writing\n"); + return false; + } + + return true; +} + + +bool vu_message_read(int conn_fd, VhostUserMsg *vmsg) +{ + char control[CMSG_SPACE(VHOST_MEMORY_BASELINE_NREGIONS * sizeof(int))] = {}; + struct iovec iov = { + .iov_base = (char *)vmsg, + .iov_len = VHOST_USER_HDR_SIZE, + }; + struct msghdr msg = { + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_control = control, + .msg_controllen = sizeof(control), + }; + size_t fd_size; + struct cmsghdr *cmsg; + int rc; + + do { + rc = recvmsg(conn_fd, &msg, 0); + } while (rc < 0 && (errno == EINTR || errno == EAGAIN)); + + if (rc < 0) { + DBG("Error while recvmsg\n"); + return false; + } + + vmsg->fd_num = 0; + for (cmsg = CMSG_FIRSTHDR(&msg); + cmsg != NULL; + cmsg = CMSG_NXTHDR(&msg, cmsg)) + { + if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) { + fd_size = cmsg->cmsg_len - CMSG_LEN(0); + vmsg->fd_num = fd_size / sizeof(int); + memcpy(vmsg->fds, CMSG_DATA(cmsg), fd_size); + break; + } + } + + if (vmsg->size > sizeof(vmsg->payload)) { + DBG( + "Error: too big message request: %d, size: vmsg->size: %u, " + "while sizeof(vmsg->payload) = %zu\n", + vmsg->request, vmsg->size, sizeof(vmsg->payload)); + goto fail; + } + + if (vmsg->size) { + do { + rc = read(conn_fd, &vmsg->payload, vmsg->size); + } while (rc < 0 && (errno == EINTR || errno == EAGAIN)); + + if (rc <= 0) { + DBG("Error while reading\n"); + goto fail; + } + } + + return true; + +fail: + vmsg_close_fds(vmsg); + + return false; +} + +int vhost_user_set_owner(void) +{ + VhostUserMsg msg = { + .request = VHOST_USER_SET_OWNER, + .flags = VHOST_USER_VERSION, + }; + + return vu_message_write(client_sock, &msg); +} + +int process_message_reply(const VhostUserMsg *msg) +{ + int ret; + VhostUserMsg msg_reply; + + if ((msg->flags & VHOST_USER_NEED_REPLY_MASK) == 0) { + return 0; + } + + ret = vu_message_read(client_sock, &msg_reply); + if (ret < 0) { + return ret; + } + + if (msg_reply.request != msg->request) { + DBG("Received unexpected msg type. " + "Expected %d received %d\n", + msg->request, msg_reply.request); + return -EPROTO; + } + + return msg_reply.payload.u64 ? -EIO : 0; +} + +int vhost_user_get_u64(int request, uint64_t *u64) +{ + int ret; + VhostUserMsg msg = { + .request = request, + .flags = VHOST_USER_VERSION, + }; + + if (vhost_user_one_time_request(request) && dev->vq_index != 0) { + return 0; + } + + ret = vu_message_write(client_sock, &msg); + if (ret < 0) { + return ret; + } + + ret = vu_message_read(client_sock, &msg); + if (ret < 0) { + return ret; + } + + if (msg.request != request) { + DBG("Received unexpected msg type. Expected %d received %d\n", + request, msg.request); + return -EPROTO; + } + + if (msg.size != sizeof(msg.payload.u64)) { + DBG("Received bad msg size.\n"); + return -EPROTO; + } + + *u64 = msg.payload.u64; + + return 0; +} + + +int vhost_user_get_features(uint64_t *features) +{ + if (vhost_user_get_u64(VHOST_USER_GET_FEATURES, features) < 0) { + return -EPROTO; + } + + return 0; +} + +int enforce_reply(const VhostUserMsg *msg) +{ + uint64_t dummy; + + if (msg->flags & VHOST_USER_NEED_REPLY_MASK) { + return process_message_reply(msg); + } + + /* + * We need to wait for a reply but the backend does not + * support replies for the command we just sent. + * Send VHOST_USER_GET_FEATURES which makes all backends + * send a reply. + */ + return vhost_user_get_features(&dummy); +} + +int vhost_user_set_u64(int request, uint64_t u64, bool wait_for_reply) +{ + VhostUserMsg msg = { + .request = request, + .flags = VHOST_USER_VERSION, + .payload.u64 = u64, + .size = sizeof(msg.payload.u64), + }; + int ret; + + if (wait_for_reply) { + bool reply_supported = virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_REPLY_ACK); + + if (reply_supported) { + msg.flags |= VHOST_USER_NEED_REPLY_MASK; + } + } + + ret = vu_message_write(client_sock, &msg); + if (ret < 0) { + return ret; + } + + if (wait_for_reply) { + return enforce_reply(&msg); + } + + return 0; +} + +int vhost_user_set_features(struct vhost_dev *dev, + uint64_t features) +{ + /* + * wait for a reply if logging is enabled to make sure + * backend is actually logging changes + */ + bool log_enabled = features & (0x1ULL << VHOST_F_LOG_ALL); + + (void) dev; + + /* Pass hdev as parameter! */ + return vhost_user_set_u64(VHOST_USER_SET_FEATURES, features, + log_enabled); +} + +int vhost_user_set_protocol_features(uint64_t features) +{ + return vhost_user_set_u64(VHOST_USER_SET_PROTOCOL_FEATURES, features, + false); +} + +int vhost_user_get_max_memslots(uint64_t *max_memslots) +{ + uint64_t backend_max_memslots; + int err; + + err = vhost_user_get_u64(VHOST_USER_GET_MAX_MEM_SLOTS, + &backend_max_memslots); + if (err < 0) { + return err; + } + + *max_memslots = backend_max_memslots; + + return 0; +} + + + +int vhost_setup_slave_channel(struct vhost_dev *dev) +{ + VhostUserMsg msg = { + .request = VHOST_USER_SET_SLAVE_REQ_FD, + .flags = VHOST_USER_VERSION, + }; + int sv[2], ret = 0; + bool reply_supported = virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_REPLY_ACK); + + if (!virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_SLAVE_REQ)) { + return 0; + } + + if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) { + int saved_errno = errno; + DBG("socketpair() failed\n"); + return -saved_errno; + } + + memcpy(msg.fds, &sv[1], sizeof(int)); + msg.fd_num = 1; + + if (reply_supported) { + msg.flags |= VHOST_USER_NEED_REPLY_MASK; + } + + ret = vu_message_write(client_sock, &msg); + if (!ret) { + DBG("Go out\n"); + goto out; + } + + if (reply_supported) { + ret = process_message_reply(&msg); + DBG("Reply is done!\n"); + } + +out: + /* TODO: Close slave channel and fd in case of error */ + /* + * close(sv[1]); + * if (ret) { + * close_slave_channel(u); + * } + */ + + return ret; +} + + +int vhost_user_get_vq_index(struct vhost_dev *dev, int idx) +{ + /* TODO: Add a assert to check the requested index + * + * assert(idx >= dev->vq_index && idx < dev->vq_index + (int)dev->nvqs); + */ + return idx; +} + +void vhost_user_share_fd(void) +{ + size_t fd_num = 1; + VhostUserMsg msg = { + .request = (VhostUserRequest) VHOST_USER_SHARE_LOOPBACK_FD, + .flags = VHOST_USER_VERSION, + .payload.u64 = ((uint64_t)getpid() << 32) | (uint64_t)loopback_fd, + .size = sizeof(msg.payload.u64), + }; + + msg.fd_num = 1; + memcpy(msg.fds, &loopback_fd, fd_num * sizeof(int)); + + /* TODO: Check if we need to remove the VHOST_USER_NEED_REPLY_MASK flag + * + * msg.flags &= ~VHOST_USER_NEED_REPLY_MASK; + */ + + (void)vu_message_write(client_sock, &msg); +} + +int vhost_set_vring_file(VhostUserRequest request, + struct vhost_vring_file *file) +{ + int fds[VHOST_USER_MAX_RAM_SLOTS]; + size_t fd_num = 0; + VhostUserMsg msg = { + .request = request, + .flags = VHOST_USER_VERSION, + .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK, + .size = sizeof(msg.payload.u64), + }; + + if (ioeventfd_enabled() && file->fd > 0) { + fds[fd_num++] = file->fd; + } else { + msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK; + } + + /* TODO: Check if we need to remove the VHOST_USER_NEED_REPLY_MASK flag + * + * msg.flags &= ~VHOST_USER_NEED_REPLY_MASK; + */ + + (void)fds; + (void)fd_num; + + msg.fd_num = fd_num; + memcpy(msg.fds, &fds, fd_num * sizeof(int)); + + return !vu_message_write(client_sock, &msg); +} + +int vhost_user_set_vring_kick(struct vhost_vring_file *file) +{ + return vhost_set_vring_file(VHOST_USER_SET_VRING_KICK, file); +} + +int vhost_user_set_vring_call(struct vhost_vring_file *file) +{ + return vhost_set_vring_file(VHOST_USER_SET_VRING_CALL, file); +} + +static int vhost_set_vring(struct vhost_dev *dev, + unsigned long int request, + struct vhost_vring_state *ring) +{ + VhostUserMsg msg = { + .request = request, + .flags = VHOST_USER_VERSION, + .payload.state = *ring, + .size = sizeof(msg.payload.state), + }; + + return !vu_message_write(client_sock, &msg); +} + +int vhost_user_set_vring_num(struct vhost_dev *dev, + struct vhost_vring_state *ring) +{ + return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring); +} + +int vhost_user_set_vring_base(struct vhost_dev *dev, + struct vhost_vring_state *ring) +{ + return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring); +} + + +int vhost_user_set_vring_addr(struct vhost_dev *dev, + struct vhost_vring_addr *addr) +{ + int ret; + VhostUserMsg msg = { + .request = VHOST_USER_SET_VRING_ADDR, + .flags = VHOST_USER_VERSION, + .payload.addr = *addr, + .size = sizeof(msg.payload.addr), + }; + + bool reply_supported = virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_REPLY_ACK); + + /* + * wait for a reply if logging is enabled to make sure + * backend is actually logging changes + */ + bool wait_for_reply = addr->flags & (1 << VHOST_VRING_F_LOG); + + if (reply_supported && wait_for_reply) { + msg.flags |= VHOST_USER_NEED_REPLY_MASK; + } + + //ret = vhost_user_write(dev, &msg, NULL, 0); + ret = vu_message_write(client_sock, &msg); + if (ret < 0) { + DBG("Fail vhost_user_set_vring_addr\n"); + return ret; + } + + if (wait_for_reply) { + return enforce_reply(&msg); + } + + return 0; +} + + +int vhost_virtqueue_init(struct vhost_dev *dev, + struct vhost_virtqueue *vq, int n) +{ + int vhost_vq_index = (int)vhost_user_get_vq_index(dev, n); + + struct vhost_vring_file file = { + .index = vhost_vq_index, + }; + + int r = event_notifier_init(&vq->masked_notifier, 0); + if (r < 0) { + return r; + } + + file.fd = event_notifier_get_wfd(&vq->masked_notifier); + + r = vhost_user_set_vring_call(&file); + if (r) { + DBG("vhost_set_vring_call failed\n"); + return r; + } + + vq->dev = dev; + + return 0; +} + + +/* -------------------- Vring functions -------------------- */ + +/* TODO: This funciton might be implemented in a later release */ +static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, + bool reply_supported, + bool config_mem_slots) +{ + return 0; +} + + +/* TODO: This function is not yet fully optimized because in the current release + * it is not used. t will be implemented or deleted in a later release. + */ +int vhost_user_set_mem_table(struct vhost_dev *dev) +{ + int fds[VHOST_MEMORY_BASELINE_NREGIONS]; + size_t fd_num = 0; + bool reply_supported = virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_REPLY_ACK); + bool config_mem_slots = + virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS); + int ret; + bool do_postcopy = false; + + if (do_postcopy) { + /* + * Postcopy has enough differences that it's best done in it's own + * version + */ + return vhost_user_set_mem_table_postcopy(dev, reply_supported, + config_mem_slots); + } + + VhostUserMsg msg = { + .flags = VHOST_USER_VERSION, + }; + + if (reply_supported) { + msg.flags |= VHOST_USER_NEED_REPLY_MASK; + } + + return 0; +} + +/* ----------------- End of Vring functions ---------------- */ + +int vhost_user_backend_init(struct vhost_dev *vhdev) +{ + uint64_t features, protocol_features, ram_slots; + int err; + + err = vhost_user_get_features(&features); + if (err < 0) { + DBG("vhost_backend_init failed\n"); + return err; + } + + if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) { + vhdev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES; + + err = vhost_user_get_u64(VHOST_USER_GET_PROTOCOL_FEATURES, + &protocol_features); + if (err < 0) { + DBG("vhost_backend_init failed\n"); + return -EPROTO; + } + + vhdev->protocol_features = + protocol_features & VHOST_USER_PROTOCOL_FEATURE_MASK; + + /* TODO: Disable config bit for the rng, this might be usefull + * when new devices are added*/ + vhdev->protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG); + + err = vhost_user_set_protocol_features(vhdev->protocol_features); + if (err < 0) { + DBG("vhost_backend_init failed\n"); + return -EPROTO; + } + + /* query the max queues we support if backend supports Multiple Queue */ + if (vhdev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) { + err = vhost_user_get_u64(VHOST_USER_GET_QUEUE_NUM, + &vhdev->max_queues); + if (err < 0) { + DBG("vhost_backend_init failed\n"); + return -EPROTO; + } + } else { + vhdev->max_queues = 1; + } + + if (vhdev->num_queues && vhdev->max_queues < vhdev->num_queues) { + DBG("The maximum number of queues supported by the " + "backend is %ld\n", vhdev->max_queues); + return -EINVAL; + } + + if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) && + !(virtio_has_feature(vhdev->protocol_features, + VHOST_USER_PROTOCOL_F_SLAVE_REQ) && + virtio_has_feature(vhdev->protocol_features, + VHOST_USER_PROTOCOL_F_REPLY_ACK))) { + DBG("IOMMU support requires reply-ack and " + "slave-req protocol features.\n"); + return -EINVAL; + } + + /* get max memory regions if backend supports configurable RAM slots */ + if (!virtio_has_feature(vhdev->protocol_features, + VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS)) { + vhdev->memory_slots = VHOST_MEMORY_BASELINE_NREGIONS; + } else { + err = vhost_user_get_max_memslots(&ram_slots); + if (err < 0) { + DBG("vhost_backend_init failed\n"); + return -EPROTO; + } + + if (ram_slots < vhdev->memory_slots) { + DBG("The backend specified a max ram slots limit " + "of %ld, when the prior validated limit was " + "%ld. This limit should never decrease.\n", ram_slots, + vhdev->memory_slots); + return -EINVAL; + } + + vhdev->memory_slots = MIN(ram_slots, VHOST_USER_MAX_RAM_SLOTS); + } + } + + if (vhdev->migration_blocker == NULL && + !virtio_has_feature(vhdev->protocol_features, + VHOST_USER_PROTOCOL_F_LOG_SHMFD)) { + DBG("Migration disabled: vhost-user backend lacks " + "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature.\n"); + } + + if (vhdev->vq_index == 0) { + err = vhost_setup_slave_channel(vhdev); + if (err < 0) { + DBG("vhost_backend_init failed\n"); + return -EPROTO; + } + } + + /* TODO: We might need to set up a postcopy_notifier in a future release: + * + * u->postcopy_notifier.notify = vhost_user_postcopy_notifier; + * postcopy_add_notifier(&u->postcopy_notifier); + */ + + return 0; +} + + +void vhost_dev_init(struct vhost_dev *vhdev) { + + uint64_t features; + int r, n_initialized_vqs = 0; + unsigned int i; + + /* Vhost conf */ + vhdev->migration_blocker = NULL; + + (void)vhost_user_backend_init(vhdev); + + r = vhost_user_set_owner(); + if (r < 0) { + DBG("vhost_set_owner failed\n"); + } + + r = vhost_user_get_features(&features); + if (r < 0) { + DBG("vhost_get_features failed\n"); + } + + for (i = 0; i < vhdev->nvqs; ++i, ++n_initialized_vqs) { + r = vhost_virtqueue_init(vhdev, vhdev->vqs + i, vhdev->vq_index + i); + if (r < 0) { + DBG("Failed to initialize virtqueue %d", i); + } + } + + /* TODO: busyloop == 0 in rng case, but we might need it for new devices: + * + * if (busyloop_timeout) { + * for (i = 0; i < dev->nvqs; ++i) { + * r = vhost_virtqueue_set_busyloop_timeout(dev, dev->vq_index + i, + * busyloop_timeout); + * if (r < 0) { + * DBG("Failed to set busyloop timeout\n"); + * //goto fail_busyloop; + * } + * } + * } + */ + + vhdev->features = features; +} |