/* * Based on libvhost-user.c of QEMU project * * Copyright IBM, Corp. 2007 * Copyright (c) 2016 Red Hat, Inc. * * Authors: * Anthony Liguori * Marc-André Lureau * Victor Kaplansky * * Copyright 2022 Virtual Open Systems SAS. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software Foundation, * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #include #include #include #include #include #include #include #include #include #include #include #include #include /* For socket */ #include #include #include #include /* Project header files */ #include "virtio_loopback.h" #include "vhost_user_loopback.h" #include "event_notifier.h" #ifdef DEBUG #define DBG(...) printf("vhost-user-loopback: " __VA_ARGS__) #else #define DBG(...) #endif /* DEBUG */ bool vhost_user_one_time_request(VhostUserRequest request) { switch (request) { case VHOST_USER_SET_OWNER: case VHOST_USER_RESET_OWNER: case VHOST_USER_SET_MEM_TABLE: case VHOST_USER_GET_QUEUE_NUM: case VHOST_USER_NET_SET_MTU: return true; default: return false; } } void vmsg_close_fds(VhostUserMsg *vmsg) { int i; for (i = 0; i < vmsg->fd_num; i++) { close(vmsg->fds[i]); } } bool vu_message_write(int conn_fd, VhostUserMsg *vmsg) { int rc; uint8_t *p = (uint8_t *)vmsg; size_t fdsize; char control[CMSG_SPACE(VHOST_MEMORY_BASELINE_NREGIONS * sizeof(int))] = {}; struct iovec iov = { .iov_base = (char *)vmsg, .iov_len = VHOST_USER_HDR_SIZE, }; struct msghdr msg = { .msg_iov = &iov, .msg_iovlen = 1, .msg_control = control, }; struct cmsghdr *cmsg; if (vhost_user_one_time_request(vmsg->request) && dev->vq_index != 0) { vmsg->flags &= ~VHOST_USER_NEED_REPLY_MASK; return 0; } memset(control, 0, sizeof(control)); if (vmsg->fd_num > 0) { fdsize = vmsg->fd_num * sizeof(int); msg.msg_controllen = CMSG_SPACE(fdsize); cmsg = CMSG_FIRSTHDR(&msg); cmsg->cmsg_len = CMSG_LEN(fdsize); cmsg->cmsg_level = SOL_SOCKET; cmsg->cmsg_type = SCM_RIGHTS; memcpy(CMSG_DATA(cmsg), vmsg->fds, fdsize); } else { msg.msg_controllen = 0; } do { rc = sendmsg(conn_fd, &msg, 0); } while (rc < 0 && (errno == EINTR || errno == EAGAIN)); if (vmsg->size) { do { if (vmsg->data) { rc = write(conn_fd, vmsg->data, vmsg->size); } else { rc = write(conn_fd, p + VHOST_USER_HDR_SIZE, vmsg->size); } } while (rc < 0 && (errno == EINTR || errno == EAGAIN)); } if (rc <= 0) { DBG("Error while writing\n"); return false; } return true; } bool vu_message_read(int conn_fd, VhostUserMsg *vmsg) { char control[CMSG_SPACE(VHOST_MEMORY_BASELINE_NREGIONS * sizeof(int))] = {}; struct iovec iov = { .iov_base = (char *)vmsg, .iov_len = VHOST_USER_HDR_SIZE, }; struct msghdr msg = { .msg_iov = &iov, .msg_iovlen = 1, .msg_control = control, .msg_controllen = sizeof(control), }; size_t fd_size; struct cmsghdr *cmsg; int rc; do { rc = recvmsg(conn_fd, &msg, 0); } while (rc < 0 && (errno == EINTR || errno == EAGAIN)); if (rc < 0) { DBG("Error while recvmsg\n"); return false; } vmsg->fd_num = 0; for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; cmsg = CMSG_NXTHDR(&msg, cmsg)) { if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) { fd_size = cmsg->cmsg_len - CMSG_LEN(0); vmsg->fd_num = fd_size / sizeof(int); memcpy(vmsg->fds, CMSG_DATA(cmsg), fd_size); break; } } if (vmsg->size > sizeof(vmsg->payload)) { DBG( "Error: too big message request: %d, size: vmsg->size: %u, " "while sizeof(vmsg->payload) = %zu\n", vmsg->request, vmsg->size, sizeof(vmsg->payload)); goto fail; } if (vmsg->size) { do { rc = read(conn_fd, &vmsg->payload, vmsg->size); } while (rc < 0 && (errno == EINTR || errno == EAGAIN)); if (rc <= 0) { DBG("Error while reading\n"); goto fail; } } return true; fail: vmsg_close_fds(vmsg); return false; } int vhost_user_set_owner(void) { VhostUserMsg msg = { .request = VHOST_USER_SET_OWNER, .flags = VHOST_USER_VERSION, }; return vu_message_write(client_sock, &msg); } int process_message_reply(const VhostUserMsg *msg) { int ret; VhostUserMsg msg_reply; if ((msg->flags & VHOST_USER_NEED_REPLY_MASK) == 0) { return 0; } ret = vu_message_read(client_sock, &msg_reply); if (ret < 0) { return ret; } if (msg_reply.request != msg->request) { DBG("Received unexpected msg type. " "Expected %d received %d\n", msg->request, msg_reply.request); return -EPROTO; } return msg_reply.payload.u64 ? -EIO : 0; } int vhost_user_get_u64(int request, uint64_t *u64) { int ret; VhostUserMsg msg = { .request = request, .flags = VHOST_USER_VERSION, }; if (vhost_user_one_time_request(request) && dev->vq_index != 0) { return 0; } ret = vu_message_write(client_sock, &msg); if (ret < 0) { return ret; } ret = vu_message_read(client_sock, &msg); if (ret < 0) { return ret; } if (msg.request != request) { DBG("Received unexpected msg type. Expected %d received %d\n", request, msg.request); return -EPROTO; } if (msg.size != sizeof(msg.payload.u64)) { DBG("Received bad msg size.\n"); return -EPROTO; } *u64 = msg.payload.u64; return 0; } int vhost_user_get_features(uint64_t *features) { if (vhost_user_get_u64(VHOST_USER_GET_FEATURES, features) < 0) { return -EPROTO; } return 0; } int enforce_reply(const VhostUserMsg *msg) { uint64_t dummy; if (msg->flags & VHOST_USER_NEED_REPLY_MASK) { return process_message_reply(msg); } /* * We need to wait for a reply but the backend does not * support replies for the command we just sent. * Send VHOST_USER_GET_FEATURES which makes all backends * send a reply. */ return vhost_user_get_features(&dummy); } int vhost_user_set_u64(int request, uint64_t u64, bool wait_for_reply) { VhostUserMsg msg = { .request = request, .flags = VHOST_USER_VERSION, .payload.u64 = u64, .size = sizeof(msg.payload.u64), }; int ret; if (wait_for_reply) { bool reply_supported = virtio_has_feature(dev->protocol_features, VHOST_USER_PROTOCOL_F_REPLY_ACK); if (reply_supported) { msg.flags |= VHOST_USER_NEED_REPLY_MASK; } } ret = vu_message_write(client_sock, &msg); if (ret < 0) { return ret; } if (wait_for_reply) { return enforce_reply(&msg); } return 0; } int vhost_user_set_features(struct vhost_dev *dev, uint64_t features) { /* * wait for a reply if logging is enabled to make sure * backend is actually logging changes */ bool log_enabled = features & (0x1ULL << VHOST_F_LOG_ALL); (void) dev; /* Pass hdev as parameter! */ return vhost_user_set_u64(VHOST_USER_SET_FEATURES, features, log_enabled); } int vhost_user_set_protocol_features(uint64_t features) { return vhost_user_set_u64(VHOST_USER_SET_PROTOCOL_FEATURES, features, false); } int vhost_user_get_max_memslots(uint64_t *max_memslots) { uint64_t backend_max_memslots; int err; err = vhost_user_get_u64(VHOST_USER_GET_MAX_MEM_SLOTS, &backend_max_memslots); if (err < 0) { return err; } *max_memslots = backend_max_memslots; return 0; } int vhost_setup_slave_channel(struct vhost_dev *dev) { VhostUserMsg msg = { .request = VHOST_USER_SET_SLAVE_REQ_FD, .flags = VHOST_USER_VERSION, }; int sv[2], ret = 0; bool reply_supported = virtio_has_feature(dev->protocol_features, VHOST_USER_PROTOCOL_F_REPLY_ACK); if (!virtio_has_feature(dev->protocol_features, VHOST_USER_PROTOCOL_F_SLAVE_REQ)) { return 0; } if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) { int saved_errno = errno; DBG("socketpair() failed\n"); return -saved_errno; } memcpy(msg.fds, &sv[1], sizeof(int)); msg.fd_num = 1; if (reply_supported) { msg.flags |= VHOST_USER_NEED_REPLY_MASK; } ret = vu_message_write(client_sock, &msg); if (!ret) { DBG("Go out\n"); goto out; } if (reply_supported) { ret = process_message_reply(&msg); DBG("Reply is done!\n"); } out: /* TODO: Close slave channel and fd in case of error */ /* * close(sv[1]); * if (ret) { * close_slave_channel(u); * } */ return ret; } int vhost_user_get_vq_index(struct vhost_dev *dev, int idx) { /* * TODO: Add a assert to check the requested index * * assert(idx >= dev->vq_index && idx < dev->vq_index + (int)dev->nvqs); */ return idx; } void vhost_user_share_fd(void) { size_t fd_num = 1; VhostUserMsg msg = { .request = (VhostUserRequest) VHOST_USER_SHARE_LOOPBACK_FD, .flags = VHOST_USER_VERSION, .payload.u64 = ((uint64_t)getpid() << 32) | (uint64_t)loopback_fd, .size = sizeof(msg.payload.u64), }; msg.fd_num = 1; memcpy(msg.fds, &loopback_fd, fd_num * sizeof(int)); /* * TODO: Check if we need to remove the VHOST_USER_NEED_REPLY_MASK flag * * msg.flags &= ~VHOST_USER_NEED_REPLY_MASK; */ (void)vu_message_write(client_sock, &msg); } int vhost_set_vring_file(VhostUserRequest request, struct vhost_vring_file *file) { int fds[VHOST_USER_MAX_RAM_SLOTS]; size_t fd_num = 0; VhostUserMsg msg = { .request = request, .flags = VHOST_USER_VERSION, .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK, .size = sizeof(msg.payload.u64), }; if (ioeventfd_enabled() && file->fd > 0) { fds[fd_num++] = file->fd; } else { msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK; } /* * TODO: Check if we need to remove the VHOST_USER_NEED_REPLY_MASK flag * * msg.flags &= ~VHOST_USER_NEED_REPLY_MASK; */ (void)fds; (void)fd_num; msg.fd_num = fd_num; memcpy(msg.fds, &fds, fd_num * sizeof(int)); return !vu_message_write(client_sock, &msg); } int vhost_user_set_vring_kick(struct vhost_vring_file *file) { return vhost_set_vring_file(VHOST_USER_SET_VRING_KICK, file); } int vhost_user_set_vring_call(struct vhost_vring_file *file) { return vhost_set_vring_file(VHOST_USER_SET_VRING_CALL, file); } static int vhost_set_vring(struct vhost_dev *dev, unsigned long int request, struct vhost_vring_state *ring) { VhostUserMsg msg = { .request = request, .flags = VHOST_USER_VERSION, .payload.state = *ring, .size = sizeof(msg.payload.state), }; return !vu_message_write(client_sock, &msg); } int vhost_user_set_vring_num(struct vhost_dev *dev, struct vhost_vring_state *ring) { return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring); } int vhost_user_set_vring_base(struct vhost_dev *dev, struct vhost_vring_state *ring) { return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring); } int vhost_user_set_vring_addr(struct vhost_dev *dev, struct vhost_vring_addr *addr) { int ret; VhostUserMsg msg = { .request = VHOST_USER_SET_VRING_ADDR, .flags = VHOST_USER_VERSION, .payload.addr = *addr, .size = sizeof(msg.payload.addr), }; bool reply_supported = virtio_has_feature(dev->protocol_features, VHOST_USER_PROTOCOL_F_REPLY_ACK); /* * wait for a reply if logging is enabled to make sure * backend is actually logging changes */ bool wait_for_reply = addr->flags & (1 << VHOST_VRING_F_LOG); if (reply_supported && wait_for_reply) { msg.flags |= VHOST_USER_NEED_REPLY_MASK; } ret = vu_message_write(client_sock, &msg); if (ret < 0) { DBG("Fail vhost_user_set_vring_addr\n"); return ret; } if (wait_for_reply) { return enforce_reply(&msg); } return 0; } int vhost_virtqueue_init(struct vhost_dev *dev, struct vhost_virtqueue *vq, int n) { int vhost_vq_index = (int)vhost_user_get_vq_index(dev, n); struct vhost_vring_file file = { .index = vhost_vq_index, }; int r = event_notifier_init(&vq->masked_notifier, 0); if (r < 0) { return r; } file.fd = event_notifier_get_wfd(&vq->masked_notifier); r = vhost_user_set_vring_call(&file); if (r) { DBG("vhost_set_vring_call failed\n"); return r; } vq->dev = dev; return 0; } /* -------------------- Vring functions -------------------- */ /* TODO: This funciton might be implemented in a later release */ static int vhost_user_set_mem_table_postcopy(struct vhost_dev *dev, bool reply_supported, bool config_mem_slots) { return 0; } /* * TODO: This function is not yet fully optimized because in the current release * it is not used. t will be implemented or deleted in a later release. */ int vhost_user_set_mem_table(struct vhost_dev *dev) { int fds[VHOST_MEMORY_BASELINE_NREGIONS]; size_t fd_num = 0; bool reply_supported = virtio_has_feature(dev->protocol_features, VHOST_USER_PROTOCOL_F_REPLY_ACK); bool config_mem_slots = virtio_has_feature(dev->protocol_features, VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS); int ret; bool do_postcopy = false; if (do_postcopy) { /* * Postcopy has enough differences that it's best done in it's own * version */ return vhost_user_set_mem_table_postcopy(dev, reply_supported, config_mem_slots); } VhostUserMsg msg = { .flags = VHOST_USER_VERSION, }; if (reply_supported) { msg.flags |= VHOST_USER_NEED_REPLY_MASK; } return 0; } /* ----------------- End of Vring functions ---------------- */ int vhost_user_backend_init(struct vhost_dev *vhdev) { uint64_t features, protocol_features, ram_slots; int err; err = vhost_user_get_features(&features); if (err < 0) { DBG("vhost_backend_init failed\n"); return err; } if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) { vhdev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES; err = vhost_user_get_u64(VHOST_USER_GET_PROTOCOL_FEATURES, &protocol_features); if (err < 0) { DBG("vhost_backend_init failed\n"); return -EPROTO; } vhdev->protocol_features = protocol_features & VHOST_USER_PROTOCOL_FEATURE_MASK; /* * TODO: Disable config bit for the rng, this might be usefull * when new devices are added */ vhdev->protocol_features &= ~(1ULL << VHOST_USER_PROTOCOL_F_CONFIG); err = vhost_user_set_protocol_features(vhdev->protocol_features); if (err < 0) { DBG("vhost_backend_init failed\n"); return -EPROTO; } /* query the max queues we support if backend supports Multiple Queue */ if (vhdev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) { err = vhost_user_get_u64(VHOST_USER_GET_QUEUE_NUM, &vhdev->max_queues); if (err < 0) { DBG("vhost_backend_init failed\n"); return -EPROTO; } } else { vhdev->max_queues = 1; } if (vhdev->num_queues && vhdev->max_queues < vhdev->num_queues) { DBG("The maximum number of queues supported by the " "backend is %ld\n", vhdev->max_queues); return -EINVAL; } if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) && !(virtio_has_feature(vhdev->protocol_features, VHOST_USER_PROTOCOL_F_SLAVE_REQ) && virtio_has_feature(vhdev->protocol_features, VHOST_USER_PROTOCOL_F_REPLY_ACK))) { DBG("IOMMU support requires reply-ack and " "slave-req protocol features.\n"); return -EINVAL; } /* get max memory regions if backend supports configurable RAM slots */ if (!virtio_has_feature(vhdev->protocol_features, VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS)) { vhdev->memory_slots = VHOST_MEMORY_BASELINE_NREGIONS; } else { err = vhost_user_get_max_memslots(&ram_slots); if (err < 0) { DBG("vhost_backend_init failed\n"); return -EPROTO; } if (ram_slots < vhdev->memory_slots) { DBG("The backend specified a max ram slots limit " "of %ld, when the prior validated limit was " "%ld. This limit should never decrease.\n", ram_slots, vhdev->memory_slots); return -EINVAL; } vhdev->memory_slots = MIN(ram_slots, VHOST_USER_MAX_RAM_SLOTS); } } if (vhdev->migration_blocker == NULL && !virtio_has_feature(vhdev->protocol_features, VHOST_USER_PROTOCOL_F_LOG_SHMFD)) { DBG("Migration disabled: vhost-user backend lacks " "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature.\n"); } if (vhdev->vq_index == 0) { err = vhost_setup_slave_channel(vhdev); if (err < 0) { DBG("vhost_backend_init failed\n"); return -EPROTO; } } /* * TODO: We might need to set up a postcopy_notifier in a future release: * * u->postcopy_notifier.notify = vhost_user_postcopy_notifier; * postcopy_add_notifier(&u->postcopy_notifier); */ return 0; } void vhost_dev_init(struct vhost_dev *vhdev) { uint64_t features; int r, n_initialized_vqs = 0; unsigned int i; /* Vhost conf */ vhdev->migration_blocker = NULL; (void)vhost_user_backend_init(vhdev); r = vhost_user_set_owner(); if (r < 0) { DBG("vhost_set_owner failed\n"); } r = vhost_user_get_features(&features); if (r < 0) { DBG("vhost_get_features failed\n"); } for (i = 0; i < vhdev->nvqs; ++i, ++n_initialized_vqs) { r = vhost_virtqueue_init(vhdev, vhdev->vqs + i, vhdev->vq_index + i); if (r < 0) { DBG("Failed to initialize virtqueue %d", i); } } /* * TODO: busyloop == 0 in rng case, but we might need it for new devices: * * if (busyloop_timeout) { * for (i = 0; i < dev->nvqs; ++i) { * r = vhost_virtqueue_set_busyloop_timeout(dev, dev->vq_index + i, * busyloop_timeout); * if (r < 0) { * DBG("Failed to set busyloop timeout\n"); * //goto fail_busyloop; * } * } * } */ vhdev->features = features; }