5 files changed, 1735 insertions, 0 deletions
diff --git a/block/export/export.c b/block/export/export.c
new file mode 100644
index 000000000..6d3b9964c
--- /dev/null
+++ b/block/export/export.c
@@ -0,0 +1,364 @@
+/*
+ * Common block export infrastructure
+ *
+ * Copyright (c) 2012, 2020 Red Hat, Inc.
+ *
+ * Authors:
+ * Paolo Bonzini <pbonzini@redhat.com>
+ * Kevin Wolf <kwolf@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+
+#include "block/block.h"
+#include "sysemu/block-backend.h"
+#include "sysemu/iothread.h"
+#include "block/export.h"
+#include "block/fuse.h"
+#include "block/nbd.h"
+#include "qapi/error.h"
+#include "qapi/qapi-commands-block-export.h"
+#include "qapi/qapi-events-block-export.h"
+#include "qemu/id.h"
+#ifdef CONFIG_VHOST_USER_BLK_SERVER
+#include "vhost-user-blk-server.h"
+#endif
+
+static const BlockExportDriver *blk_exp_drivers[] = {
+    &blk_exp_nbd,
+#ifdef CONFIG_VHOST_USER_BLK_SERVER
+    &blk_exp_vhost_user_blk,
+#endif
+#ifdef CONFIG_FUSE
+    &blk_exp_fuse,
+#endif
+};
+
+/* Only accessed from the main thread */
+static QLIST_HEAD(, BlockExport) block_exports =
+    QLIST_HEAD_INITIALIZER(block_exports);
+
+BlockExport *blk_exp_find(const char *id)
+{
+    BlockExport *exp;
+
+    QLIST_FOREACH(exp, &block_exports, next) {
+        if (strcmp(id, exp->id) == 0) {
+            return exp;
+        }
+    }
+
+    return NULL;
+}
+
+static const BlockExportDriver *blk_exp_find_driver(BlockExportType type)
+{
+    int i;
+
+    for (i = 0; i < ARRAY_SIZE(blk_exp_drivers); i++) {
+        if (blk_exp_drivers[i]->type == type) {
+            return blk_exp_drivers[i];
+        }
+    }
+    return NULL;
+}
+
+BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp)
+{
+    bool fixed_iothread = export->has_fixed_iothread && export->fixed_iothread;
+    const BlockExportDriver *drv;
+    BlockExport *exp = NULL;
+    BlockDriverState *bs;
+    BlockBackend *blk = NULL;
+    AioContext *ctx;
+    uint64_t perm;
+    int ret;
+
+    if (!id_wellformed(export->id)) {
+        error_setg(errp, "Invalid block export id");
+        return NULL;
+    }
+    if (blk_exp_find(export->id)) {
+        error_setg(errp, "Block export id '%s' is already in use", export->id);
+        return NULL;
+    }
+
+    drv = blk_exp_find_driver(export->type);
+    if (!drv) {
+        error_setg(errp, "No driver found for the requested export type");
+        return NULL;
+    }
+
+    bs = bdrv_lookup_bs(NULL, export->node_name, errp);
+    if (!bs) {
+        return NULL;
+    }
+
+    if (!export->has_writable) {
+        export->writable = false;
+    }
+    if (bdrv_is_read_only(bs) && export->writable) {
+        error_setg(errp, "Cannot export read-only node as writable");
+        return NULL;
+    }
+
+    ctx = bdrv_get_aio_context(bs);
+    aio_context_acquire(ctx);
+
+    if (export->has_iothread) {
+        IOThread *iothread;
+        AioContext *new_ctx;
+        Error **set_context_errp;
+
+        iothread = iothread_by_id(export->iothread);
+        if (!iothread) {
+            error_setg(errp, "iothread \"%s\" not found", export->iothread);
+            goto fail;
+        }
+
+        new_ctx = iothread_get_aio_context(iothread);
+
+        /* Ignore errors with fixed-iothread=false */
+        set_context_errp = fixed_iothread ? errp : NULL;
+        ret = bdrv_try_set_aio_context(bs, new_ctx, set_context_errp);
+        if (ret == 0) {
+            aio_context_release(ctx);
+            aio_context_acquire(new_ctx);
+            ctx = new_ctx;
+        } else if (fixed_iothread) {
+            goto fail;
+        }
+    }
+
+    /*
+     * Block exports are used for non-shared storage migration. Make sure
+     * that BDRV_O_INACTIVE is cleared and the image is ready for write
+     * access since the export could be available before migration handover.
+     * ctx was acquired in the caller.
+     */
+    bdrv_invalidate_cache(bs, NULL);
+
+    perm = BLK_PERM_CONSISTENT_READ;
+    if (export->writable) {
+        perm |= BLK_PERM_WRITE;
+    }
+
+    blk = blk_new(ctx, perm, BLK_PERM_ALL);
+
+    if (!fixed_iothread) {
+        blk_set_allow_aio_context_change(blk, true);
+    }
+
+    ret = blk_insert_bs(blk, bs, errp);
+    if (ret < 0) {
+        goto fail;
+    }
+
+    if (!export->has_writethrough) {
+        export->writethrough = false;
+    }
+    blk_set_enable_write_cache(blk, !export->writethrough);
+
+    assert(drv->instance_size >= sizeof(BlockExport));
+    exp = g_malloc0(drv->instance_size);
+    *exp = (BlockExport) {
+        .drv        = drv,
+        .refcount   = 1,
+        .user_owned = true,
+        .id         = g_strdup(export->id),
+        .ctx        = ctx,
+        .blk        = blk,
+    };
+
+    ret = drv->create(exp, export, errp);
+    if (ret < 0) {
+        goto fail;
+    }
+
+    assert(exp->blk != NULL);
+
+    QLIST_INSERT_HEAD(&block_exports, exp, next);
+
+    aio_context_release(ctx);
+    return exp;
+
+fail:
+    blk_unref(blk);
+    aio_context_release(ctx);
+    if (exp) {
+        g_free(exp->id);
+        g_free(exp);
+    }
+    return NULL;
+}
+
+/* Callers must hold exp->ctx lock */
+void blk_exp_ref(BlockExport *exp)
+{
+    assert(exp->refcount > 0);
+    exp->refcount++;
+}
+
+/* Runs in the main thread */
+static void blk_exp_delete_bh(void *opaque)
+{
+    BlockExport *exp = opaque;
+    AioContext *aio_context = exp->ctx;
+
+    aio_context_acquire(aio_context);
+
+    assert(exp->refcount == 0);
+    QLIST_REMOVE(exp, next);
+    exp->drv->delete(exp);
+    blk_unref(exp->blk);
+    qapi_event_send_block_export_deleted(exp->id);
+    g_free(exp->id);
+    g_free(exp);
+
+    aio_context_release(aio_context);
+}
+
+/* Callers must hold exp->ctx lock */
+void blk_exp_unref(BlockExport *exp)
+{
+    assert(exp->refcount > 0);
+    if (--exp->refcount == 0) {
+        /* Touch the block_exports list only in the main thread */
+        aio_bh_schedule_oneshot(qemu_get_aio_context(), blk_exp_delete_bh,
+                                exp);
+    }
+}
+
+/*
+ * Drops the user reference to the export and requests that all client
+ * connections and other internally held references start to shut down. When
+ * the function returns, there may still be active references while the export
+ * is in the process of shutting down.
+ *
+ * Acquires exp->ctx internally. Callers must *not* hold the lock.
+ */
+void blk_exp_request_shutdown(BlockExport *exp)
+{
+    AioContext *aio_context = exp->ctx;
+
+    aio_context_acquire(aio_context);
+
+    /*
+     * If the user doesn't own the export any more, it is already shutting
+     * down. We must not call .request_shutdown and decrease the refcount a
+     * second time.
+     */
+    if (!exp->user_owned) {
+        goto out;
+    }
+
+    exp->drv->request_shutdown(exp);
+
+    assert(exp->user_owned);
+    exp->user_owned = false;
+    blk_exp_unref(exp);
+
+out:
+    aio_context_release(aio_context);
+}
+
+/*
+ * Returns whether a block export of the given type exists.
+ * type == BLOCK_EXPORT_TYPE__MAX checks for an export of any type.
+ */
+static bool blk_exp_has_type(BlockExportType type)
+{
+    BlockExport *exp;
+
+    if (type == BLOCK_EXPORT_TYPE__MAX) {
+        return !QLIST_EMPTY(&block_exports);
+    }
+
+    QLIST_FOREACH(exp, &block_exports, next) {
+        if (exp->drv->type == type) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+/* type == BLOCK_EXPORT_TYPE__MAX for all types */
+void blk_exp_close_all_type(BlockExportType type)
+{
+    BlockExport *exp, *next;
+
+    assert(in_aio_context_home_thread(qemu_get_aio_context()));
+
+    QLIST_FOREACH_SAFE(exp, &block_exports, next, next) {
+        if (type != BLOCK_EXPORT_TYPE__MAX && exp->drv->type != type) {
+            continue;
+        }
+        blk_exp_request_shutdown(exp);
+    }
+
+    AIO_WAIT_WHILE(NULL, blk_exp_has_type(type));
+}
+
+void blk_exp_close_all(void)
+{
+    blk_exp_close_all_type(BLOCK_EXPORT_TYPE__MAX);
+}
+
+void qmp_block_export_add(BlockExportOptions *export, Error **errp)
+{
+    blk_exp_add(export, errp);
+}
+
+void qmp_block_export_del(const char *id,
+                          bool has_mode, BlockExportRemoveMode mode,
+                          Error **errp)
+{
+    ERRP_GUARD();
+    BlockExport *exp;
+
+    exp = blk_exp_find(id);
+    if (exp == NULL) {
+        error_setg(errp, "Export '%s' is not found", id);
+        return;
+    }
+    if (!exp->user_owned) {
+        error_setg(errp, "Export '%s' is already shutting down", id);
+        return;
+    }
+
+    if (!has_mode) {
+        mode = BLOCK_EXPORT_REMOVE_MODE_SAFE;
+    }
+    if (mode == BLOCK_EXPORT_REMOVE_MODE_SAFE && exp->refcount > 1) {
+        error_setg(errp, "export '%s' still in use", exp->id);
+        error_append_hint(errp, "Use mode='hard' to force client "
+                          "disconnect\n");
+        return;
+    }
+
+    blk_exp_request_shutdown(exp);
+}
+
+BlockExportInfoList *qmp_query_block_exports(Error **errp)
+{
+    BlockExportInfoList *head = NULL, **tail = &head;
+    BlockExport *exp;
+
+    QLIST_FOREACH(exp, &block_exports, next) {
+        BlockExportInfo *info = g_new(BlockExportInfo, 1);
+        *info = (BlockExportInfo) {
+            .id             = g_strdup(exp->id),
+            .type           = exp->drv->type,
+            .node_name      = g_strdup(bdrv_get_node_name(blk_bs(exp->blk))),
+            .shutting_down  = !exp->user_owned,
+        };
+
+        QAPI_LIST_APPEND(tail, info);
+    }
+
+    return head;
+}
diff --git a/block/export/fuse.c b/block/export/fuse.c
new file mode 100644
index 000000000..823c126d2
--- /dev/null
+++ b/block/export/fuse.c
@@ -0,0 +1,812 @@
+/*
+ * Present a block device as a raw image through FUSE
+ *
+ * Copyright (c) 2020 Max Reitz <mreitz@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; under version 2 or later of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define FUSE_USE_VERSION 31
+
+#include "qemu/osdep.h"
+#include "block/aio.h"
+#include "block/block.h"
+#include "block/export.h"
+#include "block/fuse.h"
+#include "block/qapi.h"
+#include "qapi/error.h"
+#include "qapi/qapi-commands-block.h"
+#include "sysemu/block-backend.h"
+
+#include <fuse.h>
+#include <fuse_lowlevel.h>
+
+#if defined(CONFIG_FALLOCATE_ZERO_RANGE)
+#include <linux/falloc.h>
+#endif
+
+#ifdef __linux__
+#include <linux/fs.h>
+#endif
+
+/* Prevent overly long bounce buffer allocations */
+#define FUSE_MAX_BOUNCE_BYTES (MIN(BDRV_REQUEST_MAX_BYTES, 64 * 1024 * 1024))
+
+
+typedef struct FuseExport {
+    BlockExport common;
+
+    struct fuse_session *fuse_session;
+    struct fuse_buf fuse_buf;
+    bool mounted, fd_handler_set_up;
+
+    char *mountpoint;
+    bool writable;
+    bool growable;
+    /* Whether allow_other was used as a mount option or not */
+    bool allow_other;
+
+    mode_t st_mode;
+    uid_t st_uid;
+    gid_t st_gid;
+} FuseExport;
+
+static GHashTable *exports;
+static const struct fuse_lowlevel_ops fuse_ops;
+
+static void fuse_export_shutdown(BlockExport *exp);
+static void fuse_export_delete(BlockExport *exp);
+
+static void init_exports_table(void);
+
+static int setup_fuse_export(FuseExport *exp, const char *mountpoint,
+                             bool allow_other, Error **errp);
+static void read_from_fuse_export(void *opaque);
+
+static bool is_regular_file(const char *path, Error **errp);
+
+
+static int fuse_export_create(BlockExport *blk_exp,
+                              BlockExportOptions *blk_exp_args,
+                              Error **errp)
+{
+    FuseExport *exp = container_of(blk_exp, FuseExport, common);
+    BlockExportOptionsFuse *args = &blk_exp_args->u.fuse;
+    int ret;
+
+    assert(blk_exp_args->type == BLOCK_EXPORT_TYPE_FUSE);
+
+    /* For growable exports, take the RESIZE permission */
+    if (args->growable) {
+        uint64_t blk_perm, blk_shared_perm;
+
+        blk_get_perm(exp->common.blk, &blk_perm, &blk_shared_perm);
+
+        ret = blk_set_perm(exp->common.blk, blk_perm | BLK_PERM_RESIZE,
+                           blk_shared_perm, errp);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    init_exports_table();
+
+    /*
+     * It is important to do this check before calling is_regular_file() --
+     * that function will do a stat(), which we would have to handle if we
+     * already exported something on @mountpoint.  But we cannot, because
+     * we are currently caught up here.
+     * (Note that ideally we would want to resolve relative paths here,
+     * but bdrv_make_absolute_filename() might do the wrong thing for
+     * paths that contain colons, and realpath() would resolve symlinks,
+     * which we do not want: The mount point is not going to be the
+     * symlink's destination, but the link itself.)
+     * So this will not catch all potential clashes, but hopefully at
+     * least the most common one of specifying exactly the same path
+     * string twice.
+     */
+    if (g_hash_table_contains(exports, args->mountpoint)) {
+        error_setg(errp, "There already is a FUSE export on '%s'",
+                   args->mountpoint);
+        ret = -EEXIST;
+        goto fail;
+    }
+
+    if (!is_regular_file(args->mountpoint, errp)) {
+        ret = -EINVAL;
+        goto fail;
+    }
+
+    exp->mountpoint = g_strdup(args->mountpoint);
+    exp->writable = blk_exp_args->writable;
+    exp->growable = args->growable;
+
+    /* set default */
+    if (!args->has_allow_other) {
+        args->allow_other = FUSE_EXPORT_ALLOW_OTHER_AUTO;
+    }
+
+    exp->st_mode = S_IFREG | S_IRUSR;
+    if (exp->writable) {
+        exp->st_mode |= S_IWUSR;
+    }
+    exp->st_uid = getuid();
+    exp->st_gid = getgid();
+
+    if (args->allow_other == FUSE_EXPORT_ALLOW_OTHER_AUTO) {
+        /* Ignore errors on our first attempt */
+        ret = setup_fuse_export(exp, args->mountpoint, true, NULL);
+        exp->allow_other = ret == 0;
+        if (ret < 0) {
+            ret = setup_fuse_export(exp, args->mountpoint, false, errp);
+        }
+    } else {
+        exp->allow_other = args->allow_other == FUSE_EXPORT_ALLOW_OTHER_ON;
+        ret = setup_fuse_export(exp, args->mountpoint, exp->allow_other, errp);
+    }
+    if (ret < 0) {
+        goto fail;
+    }
+
+    return 0;
+
+fail:
+    fuse_export_delete(blk_exp);
+    return ret;
+}
+
+/**
+ * Allocates the global @exports hash table.
+ */
+static void init_exports_table(void)
+{
+    if (exports) {
+        return;
+    }
+
+    exports = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, NULL);
+}
+
+/**
+ * Create exp->fuse_session and mount it.
+ */
+static int setup_fuse_export(FuseExport *exp, const char *mountpoint,
+                             bool allow_other, Error **errp)
+{
+    const char *fuse_argv[4];
+    char *mount_opts;
+    struct fuse_args fuse_args;
+    int ret;
+
+    /*
+     * max_read needs to match what fuse_init() sets.
+     * max_write need not be supplied.
+     */
+    mount_opts = g_strdup_printf("max_read=%zu,default_permissions%s",
+                                 FUSE_MAX_BOUNCE_BYTES,
+                                 allow_other ? ",allow_other" : "");
+
+    fuse_argv[0] = ""; /* Dummy program name */
+    fuse_argv[1] = "-o";
+    fuse_argv[2] = mount_opts;
+    fuse_argv[3] = NULL;
+    fuse_args = (struct fuse_args)FUSE_ARGS_INIT(3, (char **)fuse_argv);
+
+    exp->fuse_session = fuse_session_new(&fuse_args, &fuse_ops,
+                                         sizeof(fuse_ops), exp);
+    g_free(mount_opts);
+    if (!exp->fuse_session) {
+        error_setg(errp, "Failed to set up FUSE session");
+        ret = -EIO;
+        goto fail;
+    }
+
+    ret = fuse_session_mount(exp->fuse_session, mountpoint);
+    if (ret < 0) {
+        error_setg(errp, "Failed to mount FUSE session to export");
+        ret = -EIO;
+        goto fail;
+    }
+    exp->mounted = true;
+
+    g_hash_table_insert(exports, g_strdup(mountpoint), NULL);
+
+    aio_set_fd_handler(exp->common.ctx,
+                       fuse_session_fd(exp->fuse_session), true,
+                       read_from_fuse_export, NULL, NULL, exp);
+    exp->fd_handler_set_up = true;
+
+    return 0;
+
+fail:
+    fuse_export_shutdown(&exp->common);
+    return ret;
+}
+
+/**
+ * Callback to be invoked when the FUSE session FD can be read from.
+ * (This is basically the FUSE event loop.)
+ */
+static void read_from_fuse_export(void *opaque)
+{
+    FuseExport *exp = opaque;
+    int ret;
+
+    blk_exp_ref(&exp->common);
+
+    do {
+        ret = fuse_session_receive_buf(exp->fuse_session, &exp->fuse_buf);
+    } while (ret == -EINTR);
+    if (ret < 0) {
+        goto out;
+    }
+
+    fuse_session_process_buf(exp->fuse_session, &exp->fuse_buf);
+
+out:
+    blk_exp_unref(&exp->common);
+}
+
+static void fuse_export_shutdown(BlockExport *blk_exp)
+{
+    FuseExport *exp = container_of(blk_exp, FuseExport, common);
+
+    if (exp->fuse_session) {
+        fuse_session_exit(exp->fuse_session);
+
+        if (exp->fd_handler_set_up) {
+            aio_set_fd_handler(exp->common.ctx,
+                               fuse_session_fd(exp->fuse_session), true,
+                               NULL, NULL, NULL, NULL);
+            exp->fd_handler_set_up = false;
+        }
+    }
+
+    if (exp->mountpoint) {
+        /*
+         * Safe to drop now, because we will not handle any requests
+         * for this export anymore anyway.
+         */
+        g_hash_table_remove(exports, exp->mountpoint);
+    }
+}
+
+static void fuse_export_delete(BlockExport *blk_exp)
+{
+    FuseExport *exp = container_of(blk_exp, FuseExport, common);
+
+    if (exp->fuse_session) {
+        if (exp->mounted) {
+            fuse_session_unmount(exp->fuse_session);
+        }
+
+        fuse_session_destroy(exp->fuse_session);
+    }
+
+    free(exp->fuse_buf.mem);
+    g_free(exp->mountpoint);
+}
+
+/**
+ * Check whether @path points to a regular file.  If not, put an
+ * appropriate message into *errp.
+ */
+static bool is_regular_file(const char *path, Error **errp)
+{
+    struct stat statbuf;
+    int ret;
+
+    ret = stat(path, &statbuf);
+    if (ret < 0) {
+        error_setg_errno(errp, errno, "Failed to stat '%s'", path);
+        return false;
+    }
+
+    if (!S_ISREG(statbuf.st_mode)) {
+        error_setg(errp, "'%s' is not a regular file", path);
+        return false;
+    }
+
+    return true;
+}
+
+/**
+ * A chance to set change some parameters supplied to FUSE_INIT.
+ */
+static void fuse_init(void *userdata, struct fuse_conn_info *conn)
+{
+    /*
+     * MIN_NON_ZERO() would not be wrong here, but what we set here
+     * must equal what has been passed to fuse_session_new().
+     * Therefore, as long as max_read must be passed as a mount option
+     * (which libfuse claims will be changed at some point), we have
+     * to set max_read to a fixed value here.
+     */
+    conn->max_read = FUSE_MAX_BOUNCE_BYTES;
+
+    conn->max_write = MIN_NON_ZERO(BDRV_REQUEST_MAX_BYTES, conn->max_write);
+}
+
+/**
+ * Let clients look up files.  Always return ENOENT because we only
+ * care about the mountpoint itself.
+ */
+static void fuse_lookup(fuse_req_t req, fuse_ino_t parent, const char *name)
+{
+    fuse_reply_err(req, ENOENT);
+}
+
+/**
+ * Let clients get file attributes (i.e., stat() the file).
+ */
+static void fuse_getattr(fuse_req_t req, fuse_ino_t inode,
+                         struct fuse_file_info *fi)
+{
+    struct stat statbuf;
+    int64_t length, allocated_blocks;
+    time_t now = time(NULL);
+    FuseExport *exp = fuse_req_userdata(req);
+
+    length = blk_getlength(exp->common.blk);
+    if (length < 0) {
+        fuse_reply_err(req, -length);
+        return;
+    }
+
+    allocated_blocks = bdrv_get_allocated_file_size(blk_bs(exp->common.blk));
+    if (allocated_blocks <= 0) {
+        allocated_blocks = DIV_ROUND_UP(length, 512);
+    } else {
+        allocated_blocks = DIV_ROUND_UP(allocated_blocks, 512);
+    }
+
+    statbuf = (struct stat) {
+        .st_ino     = inode,
+        .st_mode    = exp->st_mode,
+        .st_nlink   = 1,
+        .st_uid     = exp->st_uid,
+        .st_gid     = exp->st_gid,
+        .st_size    = length,
+        .st_blksize = blk_bs(exp->common.blk)->bl.request_alignment,
+        .st_blocks  = allocated_blocks,
+        .st_atime   = now,
+        .st_mtime   = now,
+        .st_ctime   = now,
+    };
+
+    fuse_reply_attr(req, &statbuf, 1.);
+}
+
+static int fuse_do_truncate(const FuseExport *exp, int64_t size,
+                            bool req_zero_write, PreallocMode prealloc)
+{
+    uint64_t blk_perm, blk_shared_perm;
+    BdrvRequestFlags truncate_flags = 0;
+    int ret;
+
+    if (req_zero_write) {
+        truncate_flags |= BDRV_REQ_ZERO_WRITE;
+    }
+
+    /* Growable exports have a permanent RESIZE permission */
+    if (!exp->growable) {
+        blk_get_perm(exp->common.blk, &blk_perm, &blk_shared_perm);
+
+        ret = blk_set_perm(exp->common.blk, blk_perm | BLK_PERM_RESIZE,
+                           blk_shared_perm, NULL);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    ret = blk_truncate(exp->common.blk, size, true, prealloc,
+                       truncate_flags, NULL);
+
+    if (!exp->growable) {
+        /* Must succeed, because we are only giving up the RESIZE permission */
+        blk_set_perm(exp->common.blk, blk_perm, blk_shared_perm, &error_abort);
+    }
+
+    return ret;
+}
+
+/**
+ * Let clients set file attributes.  Only resizing and changing
+ * permissions (st_mode, st_uid, st_gid) is allowed.
+ * Changing permissions is only allowed as far as it will actually
+ * permit access: Read-only exports cannot be given +w, and exports
+ * without allow_other cannot be given a different UID or GID, and
+ * they cannot be given non-owner access.
+ */
+static void fuse_setattr(fuse_req_t req, fuse_ino_t inode, struct stat *statbuf,
+                         int to_set, struct fuse_file_info *fi)
+{
+    FuseExport *exp = fuse_req_userdata(req);
+    int supported_attrs;
+    int ret;
+
+    supported_attrs = FUSE_SET_ATTR_SIZE | FUSE_SET_ATTR_MODE;
+    if (exp->allow_other) {
+        supported_attrs |= FUSE_SET_ATTR_UID | FUSE_SET_ATTR_GID;
+    }
+
+    if (to_set & ~supported_attrs) {
+        fuse_reply_err(req, ENOTSUP);
+        return;
+    }
+
+    /* Do some argument checks first before committing to anything */
+    if (to_set & FUSE_SET_ATTR_MODE) {
+        /*
+         * Without allow_other, non-owners can never access the export, so do
+         * not allow setting permissions for them
+         */
+        if (!exp->allow_other &&
+            (statbuf->st_mode & (S_IRWXG | S_IRWXO)) != 0)
+        {
+            fuse_reply_err(req, EPERM);
+            return;
+        }
+
+        /* +w for read-only exports makes no sense, disallow it */
+        if (!exp->writable &&
+            (statbuf->st_mode & (S_IWUSR | S_IWGRP | S_IWOTH)) != 0)
+        {
+            fuse_reply_err(req, EROFS);
+            return;
+        }
+    }
+
+    if (to_set & FUSE_SET_ATTR_SIZE) {
+        if (!exp->writable) {
+            fuse_reply_err(req, EACCES);
+            return;
+        }
+
+        ret = fuse_do_truncate(exp, statbuf->st_size, true, PREALLOC_MODE_OFF);
+        if (ret < 0) {
+            fuse_reply_err(req, -ret);
+            return;
+        }
+    }
+
+    if (to_set & FUSE_SET_ATTR_MODE) {
+        /* Ignore FUSE-supplied file type, only change the mode */
+        exp->st_mode = (statbuf->st_mode & 07777) | S_IFREG;
+    }
+
+    if (to_set & FUSE_SET_ATTR_UID) {
+        exp->st_uid = statbuf->st_uid;
+    }
+
+    if (to_set & FUSE_SET_ATTR_GID) {
+        exp->st_gid = statbuf->st_gid;
+    }
+
+    fuse_getattr(req, inode, fi);
+}
+
+/**
+ * Let clients open a file (i.e., the exported image).
+ */
+static void fuse_open(fuse_req_t req, fuse_ino_t inode,
+                      struct fuse_file_info *fi)
+{
+    fuse_reply_open(req, fi);
+}
+
+/**
+ * Handle client reads from the exported image.
+ */
+static void fuse_read(fuse_req_t req, fuse_ino_t inode,
+                      size_t size, off_t offset, struct fuse_file_info *fi)
+{
+    FuseExport *exp = fuse_req_userdata(req);
+    int64_t length;
+    void *buf;
+    int ret;
+
+    /* Limited by max_read, should not happen */
+    if (size > FUSE_MAX_BOUNCE_BYTES) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+
+    /**
+     * Clients will expect short reads at EOF, so we have to limit
+     * offset+size to the image length.
+     */
+    length = blk_getlength(exp->common.blk);
+    if (length < 0) {
+        fuse_reply_err(req, -length);
+        return;
+    }
+
+    if (offset + size > length) {
+        size = length - offset;
+    }
+
+    buf = qemu_try_blockalign(blk_bs(exp->common.blk), size);
+    if (!buf) {
+        fuse_reply_err(req, ENOMEM);
+        return;
+    }
+
+    ret = blk_pread(exp->common.blk, offset, buf, size);
+    if (ret >= 0) {
+        fuse_reply_buf(req, buf, size);
+    } else {
+        fuse_reply_err(req, -ret);
+    }
+
+    qemu_vfree(buf);
+}
+
+/**
+ * Handle client writes to the exported image.
+ */
+static void fuse_write(fuse_req_t req, fuse_ino_t inode, const char *buf,
+                       size_t size, off_t offset, struct fuse_file_info *fi)
+{
+    FuseExport *exp = fuse_req_userdata(req);
+    int64_t length;
+    int ret;
+
+    /* Limited by max_write, should not happen */
+    if (size > BDRV_REQUEST_MAX_BYTES) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+
+    if (!exp->writable) {
+        fuse_reply_err(req, EACCES);
+        return;
+    }
+
+    /**
+     * Clients will expect short writes at EOF, so we have to limit
+     * offset+size to the image length.
+     */
+    length = blk_getlength(exp->common.blk);
+    if (length < 0) {
+        fuse_reply_err(req, -length);
+        return;
+    }
+
+    if (offset + size > length) {
+        if (exp->growable) {
+            ret = fuse_do_truncate(exp, offset + size, true, PREALLOC_MODE_OFF);
+            if (ret < 0) {
+                fuse_reply_err(req, -ret);
+                return;
+            }
+        } else {
+            size = length - offset;
+        }
+    }
+
+    ret = blk_pwrite(exp->common.blk, offset, buf, size, 0);
+    if (ret >= 0) {
+        fuse_reply_write(req, size);
+    } else {
+        fuse_reply_err(req, -ret);
+    }
+}
+
+/**
+ * Let clients perform various fallocate() operations.
+ */
+static void fuse_fallocate(fuse_req_t req, fuse_ino_t inode, int mode,
+                           off_t offset, off_t length,
+                           struct fuse_file_info *fi)
+{
+    FuseExport *exp = fuse_req_userdata(req);
+    int64_t blk_len;
+    int ret;
+
+    if (!exp->writable) {
+        fuse_reply_err(req, EACCES);
+        return;
+    }
+
+    blk_len = blk_getlength(exp->common.blk);
+    if (blk_len < 0) {
+        fuse_reply_err(req, -blk_len);
+        return;
+    }
+
+    if (mode & FALLOC_FL_KEEP_SIZE) {
+        length = MIN(length, blk_len - offset);
+    }
+
+    if (mode & FALLOC_FL_PUNCH_HOLE) {
+        if (!(mode & FALLOC_FL_KEEP_SIZE)) {
+            fuse_reply_err(req, EINVAL);
+            return;
+        }
+
+        do {
+            int size = MIN(length, BDRV_REQUEST_MAX_BYTES);
+
+            ret = blk_pdiscard(exp->common.blk, offset, size);
+            offset += size;
+            length -= size;
+        } while (ret == 0 && length > 0);
+    }
+#ifdef CONFIG_FALLOCATE_ZERO_RANGE
+    else if (mode & FALLOC_FL_ZERO_RANGE) {
+        if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + length > blk_len) {
+            /* No need for zeroes, we are going to write them ourselves */
+            ret = fuse_do_truncate(exp, offset + length, false,
+                                   PREALLOC_MODE_OFF);
+            if (ret < 0) {
+                fuse_reply_err(req, -ret);
+                return;
+            }
+        }
+
+        do {
+            int size = MIN(length, BDRV_REQUEST_MAX_BYTES);
+
+            ret = blk_pwrite_zeroes(exp->common.blk,
+                                    offset, size, 0);
+            offset += size;
+            length -= size;
+        } while (ret == 0 && length > 0);
+    }
+#endif /* CONFIG_FALLOCATE_ZERO_RANGE */
+    else if (!mode) {
+        /* We can only fallocate at the EOF with a truncate */
+        if (offset < blk_len) {
+            fuse_reply_err(req, EOPNOTSUPP);
+            return;
+        }
+
+        if (offset > blk_len) {
+            /* No preallocation needed here */
+            ret = fuse_do_truncate(exp, offset, true, PREALLOC_MODE_OFF);
+            if (ret < 0) {
+                fuse_reply_err(req, -ret);
+                return;
+            }
+        }
+
+        ret = fuse_do_truncate(exp, offset + length, true,
+                               PREALLOC_MODE_FALLOC);
+    } else {
+        ret = -EOPNOTSUPP;
+    }
+
+    fuse_reply_err(req, ret < 0 ? -ret : 0);
+}
+
+/**
+ * Let clients fsync the exported image.
+ */
+static void fuse_fsync(fuse_req_t req, fuse_ino_t inode, int datasync,
+                       struct fuse_file_info *fi)
+{
+    FuseExport *exp = fuse_req_userdata(req);
+    int ret;
+
+    ret = blk_flush(exp->common.blk);
+    fuse_reply_err(req, ret < 0 ? -ret : 0);
+}
+
+/**
+ * Called before an FD to the exported image is closed.  (libfuse
+ * notes this to be a way to return last-minute errors.)
+ */
+static void fuse_flush(fuse_req_t req, fuse_ino_t inode,
+                        struct fuse_file_info *fi)
+{
+    fuse_fsync(req, inode, 1, fi);
+}
+
+#ifdef CONFIG_FUSE_LSEEK
+/**
+ * Let clients inquire allocation status.
+ */
+static void fuse_lseek(fuse_req_t req, fuse_ino_t inode, off_t offset,
+                       int whence, struct fuse_file_info *fi)
+{
+    FuseExport *exp = fuse_req_userdata(req);
+
+    if (whence != SEEK_HOLE && whence != SEEK_DATA) {
+        fuse_reply_err(req, EINVAL);
+        return;
+    }
+
+    while (true) {
+        int64_t pnum;
+        int ret;
+
+        ret = bdrv_block_status_above(blk_bs(exp->common.blk), NULL,
+                                      offset, INT64_MAX, &pnum, NULL, NULL);
+        if (ret < 0) {
+            fuse_reply_err(req, -ret);
+            return;
+        }
+
+        if (!pnum && (ret & BDRV_BLOCK_EOF)) {
+            int64_t blk_len;
+
+            /*
+             * If blk_getlength() rounds (e.g. by sectors), then the
+             * export length will be rounded, too.  However,
+             * bdrv_block_status_above() may return EOF at unaligned
+             * offsets.  We must not let this become visible and thus
+             * always simulate a hole between @offset (the real EOF)
+             * and @blk_len (the client-visible EOF).
+             */
+
+            blk_len = blk_getlength(exp->common.blk);
+            if (blk_len < 0) {
+                fuse_reply_err(req, -blk_len);
+                return;
+            }
+
+            if (offset > blk_len || whence == SEEK_DATA) {
+                fuse_reply_err(req, ENXIO);
+            } else {
+                fuse_reply_lseek(req, offset);
+            }
+            return;
+        }
+
+        if (ret & BDRV_BLOCK_DATA) {
+            if (whence == SEEK_DATA) {
+                fuse_reply_lseek(req, offset);
+                return;
+            }
+        } else {
+            if (whence == SEEK_HOLE) {
+                fuse_reply_lseek(req, offset);
+                return;
+            }
+        }
+
+        /* Safety check against infinite loops */
+        if (!pnum) {
+            fuse_reply_err(req, ENXIO);
+            return;
+        }
+
+        offset += pnum;
+    }
+}
+#endif
+
+static const struct fuse_lowlevel_ops fuse_ops = {
+    .init       = fuse_init,
+    .lookup     = fuse_lookup,
+    .getattr    = fuse_getattr,
+    .setattr    = fuse_setattr,
+    .open       = fuse_open,
+    .read       = fuse_read,
+    .write      = fuse_write,
+    .fallocate  = fuse_fallocate,
+    .flush      = fuse_flush,
+    .fsync      = fuse_fsync,
+#ifdef CONFIG_FUSE_LSEEK
+    .lseek      = fuse_lseek,
+#endif
+};
+
+const BlockExportDriver blk_exp_fuse = {
+    .type               = BLOCK_EXPORT_TYPE_FUSE,
+    .instance_size      = sizeof(FuseExport),
+    .create             = fuse_export_create,
+    .delete             = fuse_export_delete,
+    .request_shutdown   = fuse_export_shutdown,
+};
diff --git a/block/export/meson.build b/block/export/meson.build
new file mode 100644
index 000000000..0a08e384c
--- /dev/null
+++ b/block/export/meson.build
@@ -0,0 +1,7 @@
+blockdev_ss.add(files('export.c'))
+
+if have_vhost_user_blk_server
+    blockdev_ss.add(files('vhost-user-blk-server.c'))
+endif
+
+blockdev_ss.add(when: fuse, if_true: files('fuse.c'))
diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
new file mode 100644
index 000000000..186256333
--- /dev/null
+++ b/block/export/vhost-user-blk-server.c
@@ -0,0 +1,533 @@
+/*
+ * Sharing QEMU block devices via vhost-user protocal
+ *
+ * Parts of the code based on nbd/server.c.
+ *
+ * Copyright (c) Coiby Xu <coiby.xu@gmail.com>.
+ * Copyright (c) 2020 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "block/block.h"
+#include "subprojects/libvhost-user/libvhost-user.h" /* only for the type definitions */
+#include "standard-headers/linux/virtio_blk.h"
+#include "qemu/vhost-user-server.h"
+#include "vhost-user-blk-server.h"
+#include "qapi/error.h"
+#include "qom/object_interfaces.h"
+#include "sysemu/block-backend.h"
+#include "util/block-helpers.h"
+
+/*
+ * Sector units are 512 bytes regardless of the
+ * virtio_blk_config->blk_size value.
+ */
+#define VIRTIO_BLK_SECTOR_BITS 9
+#define VIRTIO_BLK_SECTOR_SIZE (1ull << VIRTIO_BLK_SECTOR_BITS)
+
+enum {
+    VHOST_USER_BLK_NUM_QUEUES_DEFAULT = 1,
+    VHOST_USER_BLK_MAX_DISCARD_SECTORS = 32768,
+    VHOST_USER_BLK_MAX_WRITE_ZEROES_SECTORS = 32768,
+};
+struct virtio_blk_inhdr {
+    unsigned char status;
+};
+
+typedef struct VuBlkReq {
+    VuVirtqElement elem;
+    int64_t sector_num;
+    size_t size;
+    struct virtio_blk_inhdr *in;
+    struct virtio_blk_outhdr out;
+    VuServer *server;
+    struct VuVirtq *vq;
+} VuBlkReq;
+
+/* vhost user block device */
+typedef struct {
+    BlockExport export;
+    VuServer vu_server;
+    uint32_t blk_size;
+    QIOChannelSocket *sioc;
+    struct virtio_blk_config blkcfg;
+    bool writable;
+} VuBlkExport;
+
+static void vu_blk_req_complete(VuBlkReq *req)
+{
+    VuDev *vu_dev = &req->server->vu_dev;
+
+    /* IO size with 1 extra status byte */
+    vu_queue_push(vu_dev, req->vq, &req->elem, req->size + 1);
+    vu_queue_notify(vu_dev, req->vq);
+
+    free(req);
+}
+
+static bool vu_blk_sect_range_ok(VuBlkExport *vexp, uint64_t sector,
+                                 size_t size)
+{
+    uint64_t nb_sectors;
+    uint64_t total_sectors;
+
+    if (size % VIRTIO_BLK_SECTOR_SIZE) {
+        return false;
+    }
+
+    nb_sectors = size >> VIRTIO_BLK_SECTOR_BITS;
+
+    QEMU_BUILD_BUG_ON(BDRV_SECTOR_SIZE != VIRTIO_BLK_SECTOR_SIZE);
+    if (nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
+        return false;
+    }
+    if ((sector << VIRTIO_BLK_SECTOR_BITS) % vexp->blk_size) {
+        return false;
+    }
+    blk_get_geometry(vexp->export.blk, &total_sectors);
+    if (sector > total_sectors || nb_sectors > total_sectors - sector) {
+        return false;
+    }
+    return true;
+}
+
+static int coroutine_fn
+vu_blk_discard_write_zeroes(VuBlkExport *vexp, struct iovec *iov,
+                            uint32_t iovcnt, uint32_t type)
+{
+    BlockBackend *blk = vexp->export.blk;
+    struct virtio_blk_discard_write_zeroes desc;
+    ssize_t size;
+    uint64_t sector;
+    uint32_t num_sectors;
+    uint32_t max_sectors;
+    uint32_t flags;
+    int bytes;
+
+    /* Only one desc is currently supported */
+    if (unlikely(iov_size(iov, iovcnt) > sizeof(desc))) {
+        return VIRTIO_BLK_S_UNSUPP;
+    }
+
+    size = iov_to_buf(iov, iovcnt, 0, &desc, sizeof(desc));
+    if (unlikely(size != sizeof(desc))) {
+        error_report("Invalid size %zd, expected %zu", size, sizeof(desc));
+        return VIRTIO_BLK_S_IOERR;
+    }
+
+    sector = le64_to_cpu(desc.sector);
+    num_sectors = le32_to_cpu(desc.num_sectors);
+    flags = le32_to_cpu(desc.flags);
+    max_sectors = (type == VIRTIO_BLK_T_WRITE_ZEROES) ?
+                  VHOST_USER_BLK_MAX_WRITE_ZEROES_SECTORS :
+                  VHOST_USER_BLK_MAX_DISCARD_SECTORS;
+
+    /* This check ensures that 'bytes' fits in an int */
+    if (unlikely(num_sectors > max_sectors)) {
+        return VIRTIO_BLK_S_IOERR;
+    }
+
+    bytes = num_sectors << VIRTIO_BLK_SECTOR_BITS;
+
+    if (unlikely(!vu_blk_sect_range_ok(vexp, sector, bytes))) {
+        return VIRTIO_BLK_S_IOERR;
+    }
+
+    /*
+     * The device MUST set the status byte to VIRTIO_BLK_S_UNSUPP for discard
+     * and write zeroes commands if any unknown flag is set.
+     */
+    if (unlikely(flags & ~VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP)) {
+        return VIRTIO_BLK_S_UNSUPP;
+    }
+
+    if (type == VIRTIO_BLK_T_WRITE_ZEROES) {
+        int blk_flags = 0;
+
+        if (flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) {
+            blk_flags |= BDRV_REQ_MAY_UNMAP;
+        }
+
+        if (blk_co_pwrite_zeroes(blk, sector << VIRTIO_BLK_SECTOR_BITS,
+                                 bytes, blk_flags) == 0) {
+            return VIRTIO_BLK_S_OK;
+        }
+    } else if (type == VIRTIO_BLK_T_DISCARD) {
+        /*
+         * The device MUST set the status byte to VIRTIO_BLK_S_UNSUPP for
+         * discard commands if the unmap flag is set.
+         */
+        if (unlikely(flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP)) {
+            return VIRTIO_BLK_S_UNSUPP;
+        }
+
+        if (blk_co_pdiscard(blk, sector << VIRTIO_BLK_SECTOR_BITS,
+                            bytes) == 0) {
+            return VIRTIO_BLK_S_OK;
+        }
+    }
+
+    return VIRTIO_BLK_S_IOERR;
+}
+
+static void coroutine_fn vu_blk_virtio_process_req(void *opaque)
+{
+    VuBlkReq *req = opaque;
+    VuServer *server = req->server;
+    VuVirtqElement *elem = &req->elem;
+    uint32_t type;
+
+    VuBlkExport *vexp = container_of(server, VuBlkExport, vu_server);
+    BlockBackend *blk = vexp->export.blk;
+
+    struct iovec *in_iov = elem->in_sg;
+    struct iovec *out_iov = elem->out_sg;
+    unsigned in_num = elem->in_num;
+    unsigned out_num = elem->out_num;
+
+    /* refer to hw/block/virtio_blk.c */
+    if (elem->out_num < 1 || elem->in_num < 1) {
+        error_report("virtio-blk request missing headers");
+        goto err;
+    }
+
+    if (unlikely(iov_to_buf(out_iov, out_num, 0, &req->out,
+                            sizeof(req->out)) != sizeof(req->out))) {
+        error_report("virtio-blk request outhdr too short");
+        goto err;
+    }
+
+    iov_discard_front(&out_iov, &out_num, sizeof(req->out));
+
+    if (in_iov[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) {
+        error_report("virtio-blk request inhdr too short");
+        goto err;
+    }
+
+    /* We always touch the last byte, so just see how big in_iov is.  */
+    req->in = (void *)in_iov[in_num - 1].iov_base
+              + in_iov[in_num - 1].iov_len
+              - sizeof(struct virtio_blk_inhdr);
+    iov_discard_back(in_iov, &in_num, sizeof(struct virtio_blk_inhdr));
+
+    type = le32_to_cpu(req->out.type);
+    switch (type & ~VIRTIO_BLK_T_BARRIER) {
+    case VIRTIO_BLK_T_IN:
+    case VIRTIO_BLK_T_OUT: {
+        QEMUIOVector qiov;
+        int64_t offset;
+        ssize_t ret = 0;
+        bool is_write = type & VIRTIO_BLK_T_OUT;
+        req->sector_num = le64_to_cpu(req->out.sector);
+
+        if (is_write && !vexp->writable) {
+            req->in->status = VIRTIO_BLK_S_IOERR;
+            break;
+        }
+
+        if (is_write) {
+            qemu_iovec_init_external(&qiov, out_iov, out_num);
+        } else {
+            qemu_iovec_init_external(&qiov, in_iov, in_num);
+        }
+
+        if (unlikely(!vu_blk_sect_range_ok(vexp,
+                                           req->sector_num,
+                                           qiov.size))) {
+            req->in->status = VIRTIO_BLK_S_IOERR;
+            break;
+        }
+
+        offset = req->sector_num << VIRTIO_BLK_SECTOR_BITS;
+
+        if (is_write) {
+            ret = blk_co_pwritev(blk, offset, qiov.size, &qiov, 0);
+        } else {
+            ret = blk_co_preadv(blk, offset, qiov.size, &qiov, 0);
+        }
+        if (ret >= 0) {
+            req->in->status = VIRTIO_BLK_S_OK;
+        } else {
+            req->in->status = VIRTIO_BLK_S_IOERR;
+        }
+        break;
+    }
+    case VIRTIO_BLK_T_FLUSH:
+        if (blk_co_flush(blk) == 0) {
+            req->in->status = VIRTIO_BLK_S_OK;
+        } else {
+            req->in->status = VIRTIO_BLK_S_IOERR;
+        }
+        break;
+    case VIRTIO_BLK_T_GET_ID: {
+        size_t size = MIN(iov_size(&elem->in_sg[0], in_num),
+                          VIRTIO_BLK_ID_BYTES);
+        snprintf(elem->in_sg[0].iov_base, size, "%s", "vhost_user_blk");
+        req->in->status = VIRTIO_BLK_S_OK;
+        req->size = elem->in_sg[0].iov_len;
+        break;
+    }
+    case VIRTIO_BLK_T_DISCARD:
+    case VIRTIO_BLK_T_WRITE_ZEROES: {
+        if (!vexp->writable) {
+            req->in->status = VIRTIO_BLK_S_IOERR;
+            break;
+        }
+
+        req->in->status = vu_blk_discard_write_zeroes(vexp, out_iov, out_num,
+                                                      type);
+        break;
+    }
+    default:
+        req->in->status = VIRTIO_BLK_S_UNSUPP;
+        break;
+    }
+
+    vu_blk_req_complete(req);
+    return;
+
+err:
+    free(req);
+}
+
+static void vu_blk_process_vq(VuDev *vu_dev, int idx)
+{
+    VuServer *server = container_of(vu_dev, VuServer, vu_dev);
+    VuVirtq *vq = vu_get_queue(vu_dev, idx);
+
+    while (1) {
+        VuBlkReq *req;
+
+        req = vu_queue_pop(vu_dev, vq, sizeof(VuBlkReq));
+        if (!req) {
+            break;
+        }
+
+        req->server = server;
+        req->vq = vq;
+
+        Coroutine *co =
+            qemu_coroutine_create(vu_blk_virtio_process_req, req);
+        qemu_coroutine_enter(co);
+    }
+}
+
+static void vu_blk_queue_set_started(VuDev *vu_dev, int idx, bool started)
+{
+    VuVirtq *vq;
+
+    assert(vu_dev);
+
+    vq = vu_get_queue(vu_dev, idx);
+    vu_set_queue_handler(vu_dev, vq, started ? vu_blk_process_vq : NULL);
+}
+
+static uint64_t vu_blk_get_features(VuDev *dev)
+{
+    uint64_t features;
+    VuServer *server = container_of(dev, VuServer, vu_dev);
+    VuBlkExport *vexp = container_of(server, VuBlkExport, vu_server);
+    features = 1ull << VIRTIO_BLK_F_SIZE_MAX |
+               1ull << VIRTIO_BLK_F_SEG_MAX |
+               1ull << VIRTIO_BLK_F_TOPOLOGY |
+               1ull << VIRTIO_BLK_F_BLK_SIZE |
+               1ull << VIRTIO_BLK_F_FLUSH |
+               1ull << VIRTIO_BLK_F_DISCARD |
+               1ull << VIRTIO_BLK_F_WRITE_ZEROES |
+               1ull << VIRTIO_BLK_F_CONFIG_WCE |
+               1ull << VIRTIO_BLK_F_MQ |
+               1ull << VIRTIO_F_VERSION_1 |
+               1ull << VIRTIO_RING_F_INDIRECT_DESC |
+               1ull << VIRTIO_RING_F_EVENT_IDX |
+               1ull << VHOST_USER_F_PROTOCOL_FEATURES;
+
+    if (!vexp->writable) {
+        features |= 1ull << VIRTIO_BLK_F_RO;
+    }
+
+    return features;
+}
+
+static uint64_t vu_blk_get_protocol_features(VuDev *dev)
+{
+    return 1ull << VHOST_USER_PROTOCOL_F_CONFIG;
+}
+
+static int
+vu_blk_get_config(VuDev *vu_dev, uint8_t *config, uint32_t len)
+{
+    VuServer *server = container_of(vu_dev, VuServer, vu_dev);
+    VuBlkExport *vexp = container_of(server, VuBlkExport, vu_server);
+
+    if (len > sizeof(struct virtio_blk_config)) {
+        return -1;
+    }
+
+    memcpy(config, &vexp->blkcfg, len);
+    return 0;
+}
+
+static int
+vu_blk_set_config(VuDev *vu_dev, const uint8_t *data,
+                    uint32_t offset, uint32_t size, uint32_t flags)
+{
+    VuServer *server = container_of(vu_dev, VuServer, vu_dev);
+    VuBlkExport *vexp = container_of(server, VuBlkExport, vu_server);
+    uint8_t wce;
+
+    /* don't support live migration */
+    if (flags != VHOST_SET_CONFIG_TYPE_MASTER) {
+        return -EINVAL;
+    }
+
+    if (offset != offsetof(struct virtio_blk_config, wce) ||
+        size != 1) {
+        return -EINVAL;
+    }
+
+    wce = *data;
+    vexp->blkcfg.wce = wce;
+    blk_set_enable_write_cache(vexp->export.blk, wce);
+    return 0;
+}
+
+/*
+ * When the client disconnects, it sends a VHOST_USER_NONE request
+ * and vu_process_message will simple call exit which cause the VM
+ * to exit abruptly.
+ * To avoid this issue,  process VHOST_USER_NONE request ahead
+ * of vu_process_message.
+ *
+ */
+static int vu_blk_process_msg(VuDev *dev, VhostUserMsg *vmsg, int *do_reply)
+{
+    if (vmsg->request == VHOST_USER_NONE) {
+        dev->panic(dev, "disconnect");
+        return true;
+    }
+    return false;
+}
+
+static const VuDevIface vu_blk_iface = {
+    .get_features          = vu_blk_get_features,
+    .queue_set_started     = vu_blk_queue_set_started,
+    .get_protocol_features = vu_blk_get_protocol_features,
+    .get_config            = vu_blk_get_config,
+    .set_config            = vu_blk_set_config,
+    .process_msg           = vu_blk_process_msg,
+};
+
+static void blk_aio_attached(AioContext *ctx, void *opaque)
+{
+    VuBlkExport *vexp = opaque;
+
+    vexp->export.ctx = ctx;
+    vhost_user_server_attach_aio_context(&vexp->vu_server, ctx);
+}
+
+static void blk_aio_detach(void *opaque)
+{
+    VuBlkExport *vexp = opaque;
+
+    vhost_user_server_detach_aio_context(&vexp->vu_server);
+    vexp->export.ctx = NULL;
+}
+
+static void
+vu_blk_initialize_config(BlockDriverState *bs,
+                         struct virtio_blk_config *config,
+                         uint32_t blk_size,
+                         uint16_t num_queues)
+{
+    config->capacity =
+        cpu_to_le64(bdrv_getlength(bs) >> VIRTIO_BLK_SECTOR_BITS);
+    config->blk_size = cpu_to_le32(blk_size);
+    config->size_max = cpu_to_le32(0);
+    config->seg_max = cpu_to_le32(128 - 2);
+    config->min_io_size = cpu_to_le16(1);
+    config->opt_io_size = cpu_to_le32(1);
+    config->num_queues = cpu_to_le16(num_queues);
+    config->max_discard_sectors =
+        cpu_to_le32(VHOST_USER_BLK_MAX_DISCARD_SECTORS);
+    config->max_discard_seg = cpu_to_le32(1);
+    config->discard_sector_alignment =
+        cpu_to_le32(blk_size >> VIRTIO_BLK_SECTOR_BITS);
+    config->max_write_zeroes_sectors
+        = cpu_to_le32(VHOST_USER_BLK_MAX_WRITE_ZEROES_SECTORS);
+    config->max_write_zeroes_seg = cpu_to_le32(1);
+}
+
+static void vu_blk_exp_request_shutdown(BlockExport *exp)
+{
+    VuBlkExport *vexp = container_of(exp, VuBlkExport, export);
+
+    vhost_user_server_stop(&vexp->vu_server);
+}
+
+static int vu_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
+                             Error **errp)
+{
+    VuBlkExport *vexp = container_of(exp, VuBlkExport, export);
+    BlockExportOptionsVhostUserBlk *vu_opts = &opts->u.vhost_user_blk;
+    Error *local_err = NULL;
+    uint64_t logical_block_size;
+    uint16_t num_queues = VHOST_USER_BLK_NUM_QUEUES_DEFAULT;
+
+    vexp->writable = opts->writable;
+    vexp->blkcfg.wce = 0;
+
+    if (vu_opts->has_logical_block_size) {
+        logical_block_size = vu_opts->logical_block_size;
+    } else {
+        logical_block_size = VIRTIO_BLK_SECTOR_SIZE;
+    }
+    check_block_size(exp->id, "logical-block-size", logical_block_size,
+                     &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return -EINVAL;
+    }
+    vexp->blk_size = logical_block_size;
+    blk_set_guest_block_size(exp->blk, logical_block_size);
+
+    if (vu_opts->has_num_queues) {
+        num_queues = vu_opts->num_queues;
+    }
+    if (num_queues == 0) {
+        error_setg(errp, "num-queues must be greater than 0");
+        return -EINVAL;
+    }
+
+    vu_blk_initialize_config(blk_bs(exp->blk), &vexp->blkcfg,
+                             logical_block_size, num_queues);
+
+    blk_add_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
+                                 vexp);
+
+    if (!vhost_user_server_start(&vexp->vu_server, vu_opts->addr, exp->ctx,
+                                 num_queues, &vu_blk_iface, errp)) {
+        blk_remove_aio_context_notifier(exp->blk, blk_aio_attached,
+                                        blk_aio_detach, vexp);
+        return -EADDRNOTAVAIL;
+    }
+
+    return 0;
+}
+
+static void vu_blk_exp_delete(BlockExport *exp)
+{
+    VuBlkExport *vexp = container_of(exp, VuBlkExport, export);
+
+    blk_remove_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
+                                    vexp);
+}
+
+const BlockExportDriver blk_exp_vhost_user_blk = {
+    .type               = BLOCK_EXPORT_TYPE_VHOST_USER_BLK,
+    .instance_size      = sizeof(VuBlkExport),
+    .create             = vu_blk_exp_create,
+    .delete             = vu_blk_exp_delete,
+    .request_shutdown   = vu_blk_exp_request_shutdown,
+};
diff --git a/block/export/vhost-user-blk-server.h b/block/export/vhost-user-blk-server.h
new file mode 100644
index 000000000..fcf46fc8a
--- /dev/null
+++ b/block/export/vhost-user-blk-server.h
@@ -0,0 +1,19 @@
+/*
+ * Sharing QEMU block devices via vhost-user protocal
+ *
+ * Copyright (c) Coiby Xu <coiby.xu@gmail.com>.
+ * Copyright (c) 2020 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#ifndef VHOST_USER_BLK_SERVER_H
+#define VHOST_USER_BLK_SERVER_H
+
+#include "block/export.h"
+
+/* For block/export/export.c */
+extern const BlockExportDriver blk_exp_vhost_user_blk;
+
+#endif /* VHOST_USER_BLK_SERVER_H */