diff options
Diffstat (limited to 'block/copy-on-read.c')
-rw-r--r-- | block/copy-on-read.c | 288 |
1 files changed, 288 insertions, 0 deletions
diff --git a/block/copy-on-read.c b/block/copy-on-read.c new file mode 100644 index 000000000..1fc7fb333 --- /dev/null +++ b/block/copy-on-read.c @@ -0,0 +1,288 @@ +/* + * Copy-on-read filter block driver + * + * Copyright (c) 2018 Red Hat, Inc. + * + * Author: + * Max Reitz <mreitz@redhat.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "block/block_int.h" +#include "qemu/module.h" +#include "qapi/error.h" +#include "qapi/qmp/qdict.h" +#include "block/copy-on-read.h" + + +typedef struct BDRVStateCOR { + BlockDriverState *bottom_bs; + bool chain_frozen; +} BDRVStateCOR; + + +static int cor_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) +{ + BlockDriverState *bottom_bs = NULL; + BDRVStateCOR *state = bs->opaque; + /* Find a bottom node name, if any */ + const char *bottom_node = qdict_get_try_str(options, "bottom"); + + bs->file = bdrv_open_child(NULL, options, "file", bs, &child_of_bds, + BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY, + false, errp); + if (!bs->file) { + return -EINVAL; + } + + bs->supported_read_flags = BDRV_REQ_PREFETCH; + + bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED | + (BDRV_REQ_FUA & bs->file->bs->supported_write_flags); + + bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED | + ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) & + bs->file->bs->supported_zero_flags); + + if (bottom_node) { + bottom_bs = bdrv_find_node(bottom_node); + if (!bottom_bs) { + error_setg(errp, "Bottom node '%s' not found", bottom_node); + qdict_del(options, "bottom"); + return -EINVAL; + } + qdict_del(options, "bottom"); + + if (!bottom_bs->drv) { + error_setg(errp, "Bottom node '%s' not opened", bottom_node); + return -EINVAL; + } + + if (bottom_bs->drv->is_filter) { + error_setg(errp, "Bottom node '%s' is a filter", bottom_node); + return -EINVAL; + } + + if (bdrv_freeze_backing_chain(bs, bottom_bs, errp) < 0) { + return -EINVAL; + } + state->chain_frozen = true; + + /* + * We do freeze the chain, so it shouldn't be removed. Still, storing a + * pointer worth bdrv_ref(). + */ + bdrv_ref(bottom_bs); + } + state->bottom_bs = bottom_bs; + + /* + * We don't need to call bdrv_child_refresh_perms() now as the permissions + * will be updated later when the filter node gets its parent. + */ + + return 0; +} + + +#define PERM_PASSTHROUGH (BLK_PERM_CONSISTENT_READ \ + | BLK_PERM_WRITE \ + | BLK_PERM_RESIZE) +#define PERM_UNCHANGED (BLK_PERM_ALL & ~PERM_PASSTHROUGH) + +static void cor_child_perm(BlockDriverState *bs, BdrvChild *c, + BdrvChildRole role, + BlockReopenQueue *reopen_queue, + uint64_t perm, uint64_t shared, + uint64_t *nperm, uint64_t *nshared) +{ + *nperm = perm & PERM_PASSTHROUGH; + *nshared = (shared & PERM_PASSTHROUGH) | PERM_UNCHANGED; + + /* We must not request write permissions for an inactive node, the child + * cannot provide it. */ + if (!(bs->open_flags & BDRV_O_INACTIVE)) { + *nperm |= BLK_PERM_WRITE_UNCHANGED; + } +} + + +static int64_t cor_getlength(BlockDriverState *bs) +{ + return bdrv_getlength(bs->file->bs); +} + + +static int coroutine_fn cor_co_preadv_part(BlockDriverState *bs, + int64_t offset, int64_t bytes, + QEMUIOVector *qiov, + size_t qiov_offset, + BdrvRequestFlags flags) +{ + int64_t n; + int local_flags; + int ret; + BDRVStateCOR *state = bs->opaque; + + if (!state->bottom_bs) { + return bdrv_co_preadv_part(bs->file, offset, bytes, qiov, qiov_offset, + flags | BDRV_REQ_COPY_ON_READ); + } + + while (bytes) { + local_flags = flags; + + /* In case of failure, try to copy-on-read anyway */ + ret = bdrv_is_allocated(bs->file->bs, offset, bytes, &n); + if (ret <= 0) { + ret = bdrv_is_allocated_above(bdrv_backing_chain_next(bs->file->bs), + state->bottom_bs, true, offset, + n, &n); + if (ret > 0 || ret < 0) { + local_flags |= BDRV_REQ_COPY_ON_READ; + } + /* Finish earlier if the end of a backing file has been reached */ + if (n == 0) { + break; + } + } + + /* Skip if neither read nor write are needed */ + if ((local_flags & (BDRV_REQ_PREFETCH | BDRV_REQ_COPY_ON_READ)) != + BDRV_REQ_PREFETCH) { + ret = bdrv_co_preadv_part(bs->file, offset, n, qiov, qiov_offset, + local_flags); + if (ret < 0) { + return ret; + } + } + + offset += n; + qiov_offset += n; + bytes -= n; + } + + return 0; +} + + +static int coroutine_fn cor_co_pwritev_part(BlockDriverState *bs, + int64_t offset, + int64_t bytes, + QEMUIOVector *qiov, + size_t qiov_offset, + BdrvRequestFlags flags) +{ + return bdrv_co_pwritev_part(bs->file, offset, bytes, qiov, qiov_offset, + flags); +} + + +static int coroutine_fn cor_co_pwrite_zeroes(BlockDriverState *bs, + int64_t offset, int64_t bytes, + BdrvRequestFlags flags) +{ + return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags); +} + + +static int coroutine_fn cor_co_pdiscard(BlockDriverState *bs, + int64_t offset, int64_t bytes) +{ + return bdrv_co_pdiscard(bs->file, offset, bytes); +} + + +static int coroutine_fn cor_co_pwritev_compressed(BlockDriverState *bs, + int64_t offset, + int64_t bytes, + QEMUIOVector *qiov) +{ + return bdrv_co_pwritev(bs->file, offset, bytes, qiov, + BDRV_REQ_WRITE_COMPRESSED); +} + + +static void cor_eject(BlockDriverState *bs, bool eject_flag) +{ + bdrv_eject(bs->file->bs, eject_flag); +} + + +static void cor_lock_medium(BlockDriverState *bs, bool locked) +{ + bdrv_lock_medium(bs->file->bs, locked); +} + + +static void cor_close(BlockDriverState *bs) +{ + BDRVStateCOR *s = bs->opaque; + + if (s->chain_frozen) { + s->chain_frozen = false; + bdrv_unfreeze_backing_chain(bs, s->bottom_bs); + } + + bdrv_unref(s->bottom_bs); +} + + +static BlockDriver bdrv_copy_on_read = { + .format_name = "copy-on-read", + .instance_size = sizeof(BDRVStateCOR), + + .bdrv_open = cor_open, + .bdrv_close = cor_close, + .bdrv_child_perm = cor_child_perm, + + .bdrv_getlength = cor_getlength, + + .bdrv_co_preadv_part = cor_co_preadv_part, + .bdrv_co_pwritev_part = cor_co_pwritev_part, + .bdrv_co_pwrite_zeroes = cor_co_pwrite_zeroes, + .bdrv_co_pdiscard = cor_co_pdiscard, + .bdrv_co_pwritev_compressed = cor_co_pwritev_compressed, + + .bdrv_eject = cor_eject, + .bdrv_lock_medium = cor_lock_medium, + + .has_variable_length = true, + .is_filter = true, +}; + + +void bdrv_cor_filter_drop(BlockDriverState *cor_filter_bs) +{ + BDRVStateCOR *s = cor_filter_bs->opaque; + + /* unfreeze, as otherwise bdrv_replace_node() will fail */ + if (s->chain_frozen) { + s->chain_frozen = false; + bdrv_unfreeze_backing_chain(cor_filter_bs, s->bottom_bs); + } + bdrv_drop_filter(cor_filter_bs, &error_abort); + bdrv_unref(cor_filter_bs); +} + + +static void bdrv_copy_on_read_init(void) +{ + bdrv_register(&bdrv_copy_on_read); +} + +block_init(bdrv_copy_on_read_init); |