diff options
Diffstat (limited to 'hw/xen')
-rw-r--r-- | hw/xen/meson.build | 20 | ||||
-rw-r--r-- | hw/xen/trace-events | 43 | ||||
-rw-r--r-- | hw/xen/trace.h | 1 | ||||
-rw-r--r-- | hw/xen/xen-backend.c | 175 | ||||
-rw-r--r-- | hw/xen/xen-bus-helper.c | 182 | ||||
-rw-r--r-- | hw/xen/xen-bus.c | 1405 | ||||
-rw-r--r-- | hw/xen/xen-host-pci-device.c | 402 | ||||
-rw-r--r-- | hw/xen/xen-host-pci-device.h | 58 | ||||
-rw-r--r-- | hw/xen/xen-legacy-backend.c | 843 | ||||
-rw-r--r-- | hw/xen/xen_devconfig.c | 129 | ||||
-rw-r--r-- | hw/xen/xen_pt.c | 1005 | ||||
-rw-r--r-- | hw/xen/xen_pt.h | 336 | ||||
-rw-r--r-- | hw/xen/xen_pt_config_init.c | 2110 | ||||
-rw-r--r-- | hw/xen/xen_pt_graphics.c | 291 | ||||
-rw-r--r-- | hw/xen/xen_pt_load_rom.c | 92 | ||||
-rw-r--r-- | hw/xen/xen_pt_msi.c | 650 | ||||
-rw-r--r-- | hw/xen/xen_pt_stub.c | 22 | ||||
-rw-r--r-- | hw/xen/xen_pvdev.c | 319 |
18 files changed, 8083 insertions, 0 deletions
diff --git a/hw/xen/meson.build b/hw/xen/meson.build new file mode 100644 index 000000000..076954b89 --- /dev/null +++ b/hw/xen/meson.build @@ -0,0 +1,20 @@ +softmmu_ss.add(when: ['CONFIG_XEN', xen], if_true: files( + 'xen-backend.c', + 'xen-bus-helper.c', + 'xen-bus.c', + 'xen-legacy-backend.c', + 'xen_devconfig.c', + 'xen_pvdev.c', +)) + +xen_specific_ss = ss.source_set() +xen_specific_ss.add(when: 'CONFIG_XEN_PCI_PASSTHROUGH', if_true: files( + 'xen-host-pci-device.c', + 'xen_pt.c', + 'xen_pt_config_init.c', + 'xen_pt_graphics.c', + 'xen_pt_load_rom.c', + 'xen_pt_msi.c', +), if_false: files('xen_pt_stub.c')) + +specific_ss.add_all(when: ['CONFIG_XEN', xen], if_true: xen_specific_ss) diff --git a/hw/xen/trace-events b/hw/xen/trace-events new file mode 100644 index 000000000..3da3fd834 --- /dev/null +++ b/hw/xen/trace-events @@ -0,0 +1,43 @@ +# See docs/devel/tracing.rst for syntax documentation. + +# ../../include/hw/xen/xen_common.h +xen_default_ioreq_server(void) "" +xen_ioreq_server_create(uint32_t id) "id: %u" +xen_ioreq_server_destroy(uint32_t id) "id: %u" +xen_ioreq_server_state(uint32_t id, bool enable) "id: %u: enable: %i" +xen_map_mmio_range(uint32_t id, uint64_t start_addr, uint64_t end_addr) "id: %u start: 0x%"PRIx64" end: 0x%"PRIx64 +xen_unmap_mmio_range(uint32_t id, uint64_t start_addr, uint64_t end_addr) "id: %u start: 0x%"PRIx64" end: 0x%"PRIx64 +xen_map_portio_range(uint32_t id, uint64_t start_addr, uint64_t end_addr) "id: %u start: 0x%"PRIx64" end: 0x%"PRIx64 +xen_unmap_portio_range(uint32_t id, uint64_t start_addr, uint64_t end_addr) "id: %u start: 0x%"PRIx64" end: 0x%"PRIx64 +xen_map_pcidev(uint32_t id, uint8_t bus, uint8_t dev, uint8_t func) "id: %u bdf: %02x.%02x.%02x" +xen_unmap_pcidev(uint32_t id, uint8_t bus, uint8_t dev, uint8_t func) "id: %u bdf: %02x.%02x.%02x" +xen_domid_restrict(int err) "err: %u" + +# xen-bus.c +xen_bus_realize(void) "" +xen_bus_unrealize(void) "" +xen_bus_enumerate(void) "" +xen_bus_cleanup(void) "" +xen_bus_type_enumerate(const char *type) "type: %s" +xen_bus_backend_create(const char *type, const char *path) "type: %s path: %s" +xen_bus_device_cleanup(const char *type, char *name) "type: %s name: %s" +xen_bus_add_watch(const char *node, const char *key) "node: %s key: %s" +xen_bus_remove_watch(const char *node, const char *key) "node: %s key: %s" +xen_device_realize(const char *type, char *name) "type: %s name: %s" +xen_device_unrealize(const char *type, char *name) "type: %s name: %s" +xen_device_backend_state(const char *type, char *name, const char *state) "type: %s name: %s -> %s" +xen_device_backend_online(const char *type, char *name, bool online) "type: %s name: %s -> %u" +xen_device_backend_changed(const char *type, char *name) "type: %s name: %s" +xen_device_frontend_state(const char *type, char *name, const char *state) "type: %s name: %s -> %s" +xen_device_frontend_changed(const char *type, char *name) "type: %s name: %s" +xen_device_unplug(const char *type, char *name) "type: %s name: %s" +xen_device_add_watch(const char *type, char *name, const char *node, const char *key) "type: %s name: %s node: %s key: %s" +xen_device_remove_watch(const char *type, char *name, const char *node, const char *key) "type: %s name: %s node: %s key: %s" + +# xen-bus-helper.c +xs_node_create(const char *node) "%s" +xs_node_destroy(const char *node) "%s" +xs_node_vprintf(char *path, char *value) "%s %s" +xs_node_vscanf(char *path, char *value) "%s %s" +xs_node_watch(char *path) "%s" +xs_node_unwatch(char *path) "%s" diff --git a/hw/xen/trace.h b/hw/xen/trace.h new file mode 100644 index 000000000..adba31a13 --- /dev/null +++ b/hw/xen/trace.h @@ -0,0 +1 @@ +#include "trace/trace-hw_xen.h" diff --git a/hw/xen/xen-backend.c b/hw/xen/xen-backend.c new file mode 100644 index 000000000..5b0fb76ea --- /dev/null +++ b/hw/xen/xen-backend.c @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2018 Citrix Systems Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "qapi/error.h" +#include "hw/xen/xen-backend.h" +#include "hw/xen/xen-bus.h" + +typedef struct XenBackendImpl { + const char *type; + XenBackendDeviceCreate create; + XenBackendDeviceDestroy destroy; +} XenBackendImpl; + +struct XenBackendInstance { + QLIST_ENTRY(XenBackendInstance) entry; + const XenBackendImpl *impl; + XenBus *xenbus; + char *name; + XenDevice *xendev; +}; + +static GHashTable *xen_backend_table_get(void) +{ + static GHashTable *table; + + if (table == NULL) { + table = g_hash_table_new(g_str_hash, g_str_equal); + } + + return table; +} + +static void xen_backend_table_add(XenBackendImpl *impl) +{ + g_hash_table_insert(xen_backend_table_get(), (void *)impl->type, impl); +} + +static const char **xen_backend_table_keys(unsigned int *count) +{ + return (const char **)g_hash_table_get_keys_as_array( + xen_backend_table_get(), count); +} + +static const XenBackendImpl *xen_backend_table_lookup(const char *type) +{ + return g_hash_table_lookup(xen_backend_table_get(), type); +} + +void xen_backend_register(const XenBackendInfo *info) +{ + XenBackendImpl *impl = g_new0(XenBackendImpl, 1); + + g_assert(info->type); + + if (xen_backend_table_lookup(info->type)) { + error_report("attempt to register duplicate Xen backend type '%s'", + info->type); + abort(); + } + + if (!info->create) { + error_report("backend type '%s' has no creator", info->type); + abort(); + } + + impl->type = info->type; + impl->create = info->create; + impl->destroy = info->destroy; + + xen_backend_table_add(impl); +} + +const char **xen_backend_get_types(unsigned int *count) +{ + return xen_backend_table_keys(count); +} + +static QLIST_HEAD(, XenBackendInstance) backend_list; + +static void xen_backend_list_add(XenBackendInstance *backend) +{ + QLIST_INSERT_HEAD(&backend_list, backend, entry); +} + +static XenBackendInstance *xen_backend_list_find(XenDevice *xendev) +{ + XenBackendInstance *backend; + + QLIST_FOREACH(backend, &backend_list, entry) { + if (backend->xendev == xendev) { + return backend; + } + } + + return NULL; +} + +static void xen_backend_list_remove(XenBackendInstance *backend) +{ + QLIST_REMOVE(backend, entry); +} + +void xen_backend_device_create(XenBus *xenbus, const char *type, + const char *name, QDict *opts, Error **errp) +{ + ERRP_GUARD(); + const XenBackendImpl *impl = xen_backend_table_lookup(type); + XenBackendInstance *backend; + + if (!impl) { + return; + } + + backend = g_new0(XenBackendInstance, 1); + backend->xenbus = xenbus; + backend->name = g_strdup(name); + + impl->create(backend, opts, errp); + if (*errp) { + g_free(backend->name); + g_free(backend); + return; + } + + backend->impl = impl; + xen_backend_list_add(backend); +} + +XenBus *xen_backend_get_bus(XenBackendInstance *backend) +{ + return backend->xenbus; +} + +const char *xen_backend_get_name(XenBackendInstance *backend) +{ + return backend->name; +} + +void xen_backend_set_device(XenBackendInstance *backend, + XenDevice *xendev) +{ + g_assert(!backend->xendev); + backend->xendev = xendev; +} + +XenDevice *xen_backend_get_device(XenBackendInstance *backend) +{ + return backend->xendev; +} + + +bool xen_backend_try_device_destroy(XenDevice *xendev, Error **errp) +{ + XenBackendInstance *backend = xen_backend_list_find(xendev); + const XenBackendImpl *impl; + + if (!backend) { + return false; + } + + impl = backend->impl; + impl->destroy(backend, errp); + + xen_backend_list_remove(backend); + g_free(backend->name); + g_free(backend); + + return true; +} diff --git a/hw/xen/xen-bus-helper.c b/hw/xen/xen-bus-helper.c new file mode 100644 index 000000000..5a1e12b37 --- /dev/null +++ b/hw/xen/xen-bus-helper.c @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2018 Citrix Systems Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "hw/xen/xen.h" +#include "hw/xen/xen-bus.h" +#include "hw/xen/xen-bus-helper.h" +#include "qapi/error.h" + +#include <glib/gprintf.h> + +struct xs_state { + enum xenbus_state statenum; + const char *statestr; +}; +#define XS_STATE(state) { state, #state } + +static struct xs_state xs_state[] = { + XS_STATE(XenbusStateUnknown), + XS_STATE(XenbusStateInitialising), + XS_STATE(XenbusStateInitWait), + XS_STATE(XenbusStateInitialised), + XS_STATE(XenbusStateConnected), + XS_STATE(XenbusStateClosing), + XS_STATE(XenbusStateClosed), + XS_STATE(XenbusStateReconfiguring), + XS_STATE(XenbusStateReconfigured), +}; + +#undef XS_STATE + +const char *xs_strstate(enum xenbus_state state) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(xs_state); i++) { + if (xs_state[i].statenum == state) { + return xs_state[i].statestr; + } + } + + return "INVALID"; +} + +void xs_node_create(struct xs_handle *xsh, xs_transaction_t tid, + const char *node, struct xs_permissions perms[], + unsigned int nr_perms, Error **errp) +{ + trace_xs_node_create(node); + + if (!xs_write(xsh, tid, node, "", 0)) { + error_setg_errno(errp, errno, "failed to create node '%s'", node); + return; + } + + if (!xs_set_permissions(xsh, tid, node, perms, nr_perms)) { + error_setg_errno(errp, errno, "failed to set node '%s' permissions", + node); + } +} + +void xs_node_destroy(struct xs_handle *xsh, xs_transaction_t tid, + const char *node, Error **errp) +{ + trace_xs_node_destroy(node); + + if (!xs_rm(xsh, tid, node)) { + error_setg_errno(errp, errno, "failed to destroy node '%s'", node); + } +} + +void xs_node_vprintf(struct xs_handle *xsh, xs_transaction_t tid, + const char *node, const char *key, Error **errp, + const char *fmt, va_list ap) +{ + char *path, *value; + int len; + + path = (strlen(node) != 0) ? g_strdup_printf("%s/%s", node, key) : + g_strdup(key); + len = g_vasprintf(&value, fmt, ap); + + trace_xs_node_vprintf(path, value); + + if (!xs_write(xsh, tid, path, value, len)) { + error_setg_errno(errp, errno, "failed to write '%s' to '%s'", + value, path); + } + + g_free(value); + g_free(path); +} + +void xs_node_printf(struct xs_handle *xsh, xs_transaction_t tid, + const char *node, const char *key, Error **errp, + const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + xs_node_vprintf(xsh, tid, node, key, errp, fmt, ap); + va_end(ap); +} + +int xs_node_vscanf(struct xs_handle *xsh, xs_transaction_t tid, + const char *node, const char *key, Error **errp, + const char *fmt, va_list ap) +{ + char *path, *value; + int rc; + + path = (strlen(node) != 0) ? g_strdup_printf("%s/%s", node, key) : + g_strdup(key); + value = xs_read(xsh, tid, path, NULL); + + trace_xs_node_vscanf(path, value); + + if (value) { + rc = vsscanf(value, fmt, ap); + } else { + error_setg_errno(errp, errno, "failed to read from '%s'", + path); + rc = EOF; + } + + free(value); + g_free(path); + + return rc; +} + +int xs_node_scanf(struct xs_handle *xsh, xs_transaction_t tid, + const char *node, const char *key, Error **errp, + const char *fmt, ...) +{ + va_list ap; + int rc; + + va_start(ap, fmt); + rc = xs_node_vscanf(xsh, tid, node, key, errp, fmt, ap); + va_end(ap); + + return rc; +} + +void xs_node_watch(struct xs_handle *xsh, const char *node, const char *key, + char *token, Error **errp) +{ + char *path; + + path = (strlen(node) != 0) ? g_strdup_printf("%s/%s", node, key) : + g_strdup(key); + + trace_xs_node_watch(path); + + if (!xs_watch(xsh, path, token)) { + error_setg_errno(errp, errno, "failed to watch node '%s'", path); + } + + g_free(path); +} + +void xs_node_unwatch(struct xs_handle *xsh, const char *node, + const char *key, const char *token, Error **errp) +{ + char *path; + + path = (strlen(node) != 0) ? g_strdup_printf("%s/%s", node, key) : + g_strdup(key); + + trace_xs_node_unwatch(path); + + if (!xs_unwatch(xsh, path, token)) { + error_setg_errno(errp, errno, "failed to unwatch node '%s'", path); + } + + g_free(path); +} diff --git a/hw/xen/xen-bus.c b/hw/xen/xen-bus.c new file mode 100644 index 000000000..416583f13 --- /dev/null +++ b/hw/xen/xen-bus.c @@ -0,0 +1,1405 @@ +/* + * Copyright (c) 2018 Citrix Systems Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/main-loop.h" +#include "qemu/module.h" +#include "qemu/uuid.h" +#include "hw/qdev-properties.h" +#include "hw/sysbus.h" +#include "hw/xen/xen.h" +#include "hw/xen/xen-backend.h" +#include "hw/xen/xen-bus.h" +#include "hw/xen/xen-bus-helper.h" +#include "monitor/monitor.h" +#include "qapi/error.h" +#include "qapi/qmp/qdict.h" +#include "sysemu/sysemu.h" +#include "trace.h" + +static char *xen_device_get_backend_path(XenDevice *xendev) +{ + XenBus *xenbus = XEN_BUS(qdev_get_parent_bus(DEVICE(xendev))); + XenDeviceClass *xendev_class = XEN_DEVICE_GET_CLASS(xendev); + const char *type = object_get_typename(OBJECT(xendev)); + const char *backend = xendev_class->backend; + + if (!backend) { + backend = type; + } + + return g_strdup_printf("/local/domain/%u/backend/%s/%u/%s", + xenbus->backend_id, backend, xendev->frontend_id, + xendev->name); +} + +static char *xen_device_get_frontend_path(XenDevice *xendev) +{ + XenDeviceClass *xendev_class = XEN_DEVICE_GET_CLASS(xendev); + const char *type = object_get_typename(OBJECT(xendev)); + const char *device = xendev_class->device; + + if (!device) { + device = type; + } + + return g_strdup_printf("/local/domain/%u/device/%s/%s", + xendev->frontend_id, device, xendev->name); +} + +static void xen_device_unplug(XenDevice *xendev, Error **errp) +{ + ERRP_GUARD(); + XenBus *xenbus = XEN_BUS(qdev_get_parent_bus(DEVICE(xendev))); + const char *type = object_get_typename(OBJECT(xendev)); + xs_transaction_t tid; + + trace_xen_device_unplug(type, xendev->name); + + /* Mimic the way the Xen toolstack does an unplug */ +again: + tid = xs_transaction_start(xenbus->xsh); + if (tid == XBT_NULL) { + error_setg_errno(errp, errno, "failed xs_transaction_start"); + return; + } + + xs_node_printf(xenbus->xsh, tid, xendev->backend_path, "online", + errp, "%u", 0); + if (*errp) { + goto abort; + } + + xs_node_printf(xenbus->xsh, tid, xendev->backend_path, "state", + errp, "%u", XenbusStateClosing); + if (*errp) { + goto abort; + } + + if (!xs_transaction_end(xenbus->xsh, tid, false)) { + if (errno == EAGAIN) { + goto again; + } + + error_setg_errno(errp, errno, "failed xs_transaction_end"); + } + + return; + +abort: + /* + * We only abort if there is already a failure so ignore any error + * from ending the transaction. + */ + xs_transaction_end(xenbus->xsh, tid, true); +} + +static void xen_bus_print_dev(Monitor *mon, DeviceState *dev, int indent) +{ + XenDevice *xendev = XEN_DEVICE(dev); + + monitor_printf(mon, "%*sname = '%s' frontend_id = %u\n", + indent, "", xendev->name, xendev->frontend_id); +} + +static char *xen_bus_get_dev_path(DeviceState *dev) +{ + return xen_device_get_backend_path(XEN_DEVICE(dev)); +} + +struct XenWatch { + char *node, *key; + char *token; + XenWatchHandler handler; + void *opaque; + Notifier notifier; +}; + +static void watch_notify(Notifier *n, void *data) +{ + XenWatch *watch = container_of(n, XenWatch, notifier); + const char *token = data; + + if (!strcmp(watch->token, token)) { + watch->handler(watch->opaque); + } +} + +static XenWatch *new_watch(const char *node, const char *key, + XenWatchHandler handler, void *opaque) +{ + XenWatch *watch = g_new0(XenWatch, 1); + QemuUUID uuid; + + qemu_uuid_generate(&uuid); + + watch->token = qemu_uuid_unparse_strdup(&uuid); + watch->node = g_strdup(node); + watch->key = g_strdup(key); + watch->handler = handler; + watch->opaque = opaque; + watch->notifier.notify = watch_notify; + + return watch; +} + +static void free_watch(XenWatch *watch) +{ + g_free(watch->token); + g_free(watch->key); + g_free(watch->node); + + g_free(watch); +} + +struct XenWatchList { + struct xs_handle *xsh; + NotifierList notifiers; +}; + +static void watch_list_event(void *opaque) +{ + XenWatchList *watch_list = opaque; + char **v; + const char *token; + + v = xs_check_watch(watch_list->xsh); + if (!v) { + return; + } + + token = v[XS_WATCH_TOKEN]; + + notifier_list_notify(&watch_list->notifiers, (void *)token); + + free(v); +} + +static XenWatchList *watch_list_create(struct xs_handle *xsh) +{ + XenWatchList *watch_list = g_new0(XenWatchList, 1); + + g_assert(xsh); + + watch_list->xsh = xsh; + notifier_list_init(&watch_list->notifiers); + qemu_set_fd_handler(xs_fileno(watch_list->xsh), watch_list_event, NULL, + watch_list); + + return watch_list; +} + +static void watch_list_destroy(XenWatchList *watch_list) +{ + g_assert(notifier_list_empty(&watch_list->notifiers)); + qemu_set_fd_handler(xs_fileno(watch_list->xsh), NULL, NULL, NULL); + g_free(watch_list); +} + +static XenWatch *watch_list_add(XenWatchList *watch_list, const char *node, + const char *key, XenWatchHandler handler, + void *opaque, Error **errp) +{ + ERRP_GUARD(); + XenWatch *watch = new_watch(node, key, handler, opaque); + + notifier_list_add(&watch_list->notifiers, &watch->notifier); + + xs_node_watch(watch_list->xsh, node, key, watch->token, errp); + if (*errp) { + notifier_remove(&watch->notifier); + free_watch(watch); + + return NULL; + } + + return watch; +} + +static void watch_list_remove(XenWatchList *watch_list, XenWatch *watch, + Error **errp) +{ + xs_node_unwatch(watch_list->xsh, watch->node, watch->key, watch->token, + errp); + + notifier_remove(&watch->notifier); + free_watch(watch); +} + +static XenWatch *xen_bus_add_watch(XenBus *xenbus, const char *node, + const char *key, XenWatchHandler handler, + Error **errp) +{ + trace_xen_bus_add_watch(node, key); + + return watch_list_add(xenbus->watch_list, node, key, handler, xenbus, + errp); +} + +static void xen_bus_remove_watch(XenBus *xenbus, XenWatch *watch, + Error **errp) +{ + trace_xen_bus_remove_watch(watch->node, watch->key); + + watch_list_remove(xenbus->watch_list, watch, errp); +} + +static void xen_bus_backend_create(XenBus *xenbus, const char *type, + const char *name, char *path, + Error **errp) +{ + ERRP_GUARD(); + xs_transaction_t tid; + char **key; + QDict *opts; + unsigned int i, n; + + trace_xen_bus_backend_create(type, path); + +again: + tid = xs_transaction_start(xenbus->xsh); + if (tid == XBT_NULL) { + error_setg(errp, "failed xs_transaction_start"); + return; + } + + key = xs_directory(xenbus->xsh, tid, path, &n); + if (!key) { + if (!xs_transaction_end(xenbus->xsh, tid, true)) { + error_setg_errno(errp, errno, "failed xs_transaction_end"); + } + return; + } + + opts = qdict_new(); + for (i = 0; i < n; i++) { + char *val; + + /* + * Assume anything found in the xenstore backend area, other than + * the keys created for a generic XenDevice, are parameters + * to be used to configure the backend. + */ + if (!strcmp(key[i], "state") || + !strcmp(key[i], "online") || + !strcmp(key[i], "frontend") || + !strcmp(key[i], "frontend-id") || + !strcmp(key[i], "hotplug-status")) + continue; + + if (xs_node_scanf(xenbus->xsh, tid, path, key[i], NULL, "%ms", + &val) == 1) { + qdict_put_str(opts, key[i], val); + free(val); + } + } + + free(key); + + if (!xs_transaction_end(xenbus->xsh, tid, false)) { + qobject_unref(opts); + + if (errno == EAGAIN) { + goto again; + } + + error_setg_errno(errp, errno, "failed xs_transaction_end"); + return; + } + + xen_backend_device_create(xenbus, type, name, opts, errp); + qobject_unref(opts); + + if (*errp) { + error_prepend(errp, "failed to create '%s' device '%s': ", type, name); + } +} + +static void xen_bus_type_enumerate(XenBus *xenbus, const char *type) +{ + char *domain_path = g_strdup_printf("backend/%s/%u", type, xen_domid); + char **backend; + unsigned int i, n; + + trace_xen_bus_type_enumerate(type); + + backend = xs_directory(xenbus->xsh, XBT_NULL, domain_path, &n); + if (!backend) { + goto out; + } + + for (i = 0; i < n; i++) { + char *backend_path = g_strdup_printf("%s/%s", domain_path, + backend[i]); + enum xenbus_state state; + unsigned int online; + + if (xs_node_scanf(xenbus->xsh, XBT_NULL, backend_path, "state", + NULL, "%u", &state) != 1) + state = XenbusStateUnknown; + + if (xs_node_scanf(xenbus->xsh, XBT_NULL, backend_path, "online", + NULL, "%u", &online) != 1) + online = 0; + + if (online && state == XenbusStateInitialising) { + Error *local_err = NULL; + + xen_bus_backend_create(xenbus, type, backend[i], backend_path, + &local_err); + if (local_err) { + error_report_err(local_err); + } + } + + g_free(backend_path); + } + + free(backend); + +out: + g_free(domain_path); +} + +static void xen_bus_enumerate(XenBus *xenbus) +{ + char **type; + unsigned int i, n; + + trace_xen_bus_enumerate(); + + type = xs_directory(xenbus->xsh, XBT_NULL, "backend", &n); + if (!type) { + return; + } + + for (i = 0; i < n; i++) { + xen_bus_type_enumerate(xenbus, type[i]); + } + + free(type); +} + +static void xen_bus_device_cleanup(XenDevice *xendev) +{ + const char *type = object_get_typename(OBJECT(xendev)); + Error *local_err = NULL; + + trace_xen_bus_device_cleanup(type, xendev->name); + + g_assert(!xendev->backend_online); + + if (!xen_backend_try_device_destroy(xendev, &local_err)) { + object_unparent(OBJECT(xendev)); + } + + if (local_err) { + error_report_err(local_err); + } +} + +static void xen_bus_cleanup(XenBus *xenbus) +{ + XenDevice *xendev, *next; + + trace_xen_bus_cleanup(); + + QLIST_FOREACH_SAFE(xendev, &xenbus->inactive_devices, list, next) { + g_assert(xendev->inactive); + QLIST_REMOVE(xendev, list); + xen_bus_device_cleanup(xendev); + } +} + +static void xen_bus_backend_changed(void *opaque) +{ + XenBus *xenbus = opaque; + + xen_bus_enumerate(xenbus); + xen_bus_cleanup(xenbus); +} + +static void xen_bus_unrealize(BusState *bus) +{ + XenBus *xenbus = XEN_BUS(bus); + + trace_xen_bus_unrealize(); + + if (xenbus->backend_watch) { + unsigned int i; + + for (i = 0; i < xenbus->backend_types; i++) { + if (xenbus->backend_watch[i]) { + xen_bus_remove_watch(xenbus, xenbus->backend_watch[i], NULL); + } + } + + g_free(xenbus->backend_watch); + xenbus->backend_watch = NULL; + } + + if (xenbus->watch_list) { + watch_list_destroy(xenbus->watch_list); + xenbus->watch_list = NULL; + } + + if (xenbus->xsh) { + xs_close(xenbus->xsh); + } +} + +static void xen_bus_realize(BusState *bus, Error **errp) +{ + char *key = g_strdup_printf("%u", xen_domid); + XenBus *xenbus = XEN_BUS(bus); + unsigned int domid; + const char **type; + unsigned int i; + Error *local_err = NULL; + + trace_xen_bus_realize(); + + xenbus->xsh = xs_open(0); + if (!xenbus->xsh) { + error_setg_errno(errp, errno, "failed xs_open"); + goto fail; + } + + if (xs_node_scanf(xenbus->xsh, XBT_NULL, "", /* domain root node */ + "domid", NULL, "%u", &domid) == 1) { + xenbus->backend_id = domid; + } else { + xenbus->backend_id = 0; /* Assume lack of node means dom0 */ + } + + xenbus->watch_list = watch_list_create(xenbus->xsh); + + module_call_init(MODULE_INIT_XEN_BACKEND); + + type = xen_backend_get_types(&xenbus->backend_types); + xenbus->backend_watch = g_new(XenWatch *, xenbus->backend_types); + + for (i = 0; i < xenbus->backend_types; i++) { + char *node = g_strdup_printf("backend/%s", type[i]); + + xenbus->backend_watch[i] = + xen_bus_add_watch(xenbus, node, key, xen_bus_backend_changed, + &local_err); + if (local_err) { + /* This need not be treated as a hard error so don't propagate */ + error_reportf_err(local_err, + "failed to set up '%s' enumeration watch: ", + type[i]); + } + + g_free(node); + } + + g_free(type); + g_free(key); + return; + +fail: + xen_bus_unrealize(bus); + g_free(key); +} + +static void xen_bus_unplug_request(HotplugHandler *hotplug, + DeviceState *dev, + Error **errp) +{ + XenDevice *xendev = XEN_DEVICE(dev); + + xen_device_unplug(xendev, errp); +} + +static void xen_bus_class_init(ObjectClass *class, void *data) +{ + BusClass *bus_class = BUS_CLASS(class); + HotplugHandlerClass *hotplug_class = HOTPLUG_HANDLER_CLASS(class); + + bus_class->print_dev = xen_bus_print_dev; + bus_class->get_dev_path = xen_bus_get_dev_path; + bus_class->realize = xen_bus_realize; + bus_class->unrealize = xen_bus_unrealize; + + hotplug_class->unplug_request = xen_bus_unplug_request; +} + +static const TypeInfo xen_bus_type_info = { + .name = TYPE_XEN_BUS, + .parent = TYPE_BUS, + .instance_size = sizeof(XenBus), + .class_size = sizeof(XenBusClass), + .class_init = xen_bus_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_HOTPLUG_HANDLER }, + { } + }, +}; + +void xen_device_backend_printf(XenDevice *xendev, const char *key, + const char *fmt, ...) +{ + XenBus *xenbus = XEN_BUS(qdev_get_parent_bus(DEVICE(xendev))); + Error *local_err = NULL; + va_list ap; + + g_assert(xenbus->xsh); + + va_start(ap, fmt); + xs_node_vprintf(xenbus->xsh, XBT_NULL, xendev->backend_path, key, + &local_err, fmt, ap); + va_end(ap); + + if (local_err) { + error_report_err(local_err); + } +} + +static int xen_device_backend_scanf(XenDevice *xendev, const char *key, + const char *fmt, ...) +{ + XenBus *xenbus = XEN_BUS(qdev_get_parent_bus(DEVICE(xendev))); + va_list ap; + int rc; + + g_assert(xenbus->xsh); + + va_start(ap, fmt); + rc = xs_node_vscanf(xenbus->xsh, XBT_NULL, xendev->backend_path, key, + NULL, fmt, ap); + va_end(ap); + + return rc; +} + +void xen_device_backend_set_state(XenDevice *xendev, + enum xenbus_state state) +{ + const char *type = object_get_typename(OBJECT(xendev)); + + if (xendev->backend_state == state) { + return; + } + + trace_xen_device_backend_state(type, xendev->name, + xs_strstate(state)); + + xendev->backend_state = state; + xen_device_backend_printf(xendev, "state", "%u", state); +} + +enum xenbus_state xen_device_backend_get_state(XenDevice *xendev) +{ + return xendev->backend_state; +} + +static void xen_device_backend_set_online(XenDevice *xendev, bool online) +{ + const char *type = object_get_typename(OBJECT(xendev)); + + if (xendev->backend_online == online) { + return; + } + + trace_xen_device_backend_online(type, xendev->name, online); + + xendev->backend_online = online; + xen_device_backend_printf(xendev, "online", "%u", online); +} + +/* + * Tell from the state whether the frontend is likely alive, + * i.e. it will react to a change of state of the backend. + */ +static bool xen_device_frontend_is_active(XenDevice *xendev) +{ + switch (xendev->frontend_state) { + case XenbusStateInitWait: + case XenbusStateInitialised: + case XenbusStateConnected: + case XenbusStateClosing: + return true; + default: + return false; + } +} + +static void xen_device_backend_changed(void *opaque) +{ + XenDevice *xendev = opaque; + const char *type = object_get_typename(OBJECT(xendev)); + enum xenbus_state state; + unsigned int online; + + trace_xen_device_backend_changed(type, xendev->name); + + if (xen_device_backend_scanf(xendev, "state", "%u", &state) != 1) { + state = XenbusStateUnknown; + } + + xen_device_backend_set_state(xendev, state); + + if (xen_device_backend_scanf(xendev, "online", "%u", &online) != 1) { + online = 0; + } + + xen_device_backend_set_online(xendev, !!online); + + /* + * If the toolstack (or unplug request callback) has set the backend + * state to Closing, but there is no active frontend then set the + * backend state to Closed. + */ + if (state == XenbusStateClosing && + !xen_device_frontend_is_active(xendev)) { + xen_device_backend_set_state(xendev, XenbusStateClosed); + } + + /* + * If a backend is still 'online' then we should leave it alone but, + * if a backend is not 'online', then the device is a candidate + * for destruction. Hence add it to the 'inactive' list to be cleaned + * by xen_bus_cleanup(). + */ + if (!online && + (state == XenbusStateClosed || state == XenbusStateInitialising || + state == XenbusStateInitWait || state == XenbusStateUnknown) && + !xendev->inactive) { + XenBus *xenbus = XEN_BUS(qdev_get_parent_bus(DEVICE(xendev))); + + xendev->inactive = true; + QLIST_INSERT_HEAD(&xenbus->inactive_devices, xendev, list); + + /* + * Re-write the state to cause a XenBus backend_watch notification, + * resulting in a call to xen_bus_cleanup(). + */ + xen_device_backend_printf(xendev, "state", "%u", state); + } +} + +static XenWatch *xen_device_add_watch(XenDevice *xendev, const char *node, + const char *key, + XenWatchHandler handler, + Error **errp) +{ + const char *type = object_get_typename(OBJECT(xendev)); + + trace_xen_device_add_watch(type, xendev->name, node, key); + + return watch_list_add(xendev->watch_list, node, key, handler, xendev, + errp); +} + +static void xen_device_remove_watch(XenDevice *xendev, XenWatch *watch, + Error **errp) +{ + const char *type = object_get_typename(OBJECT(xendev)); + + trace_xen_device_remove_watch(type, xendev->name, watch->node, + watch->key); + + watch_list_remove(xendev->watch_list, watch, errp); +} + + +static void xen_device_backend_create(XenDevice *xendev, Error **errp) +{ + ERRP_GUARD(); + XenBus *xenbus = XEN_BUS(qdev_get_parent_bus(DEVICE(xendev))); + struct xs_permissions perms[2]; + + xendev->backend_path = xen_device_get_backend_path(xendev); + + perms[0].id = xenbus->backend_id; + perms[0].perms = XS_PERM_NONE; + perms[1].id = xendev->frontend_id; + perms[1].perms = XS_PERM_READ; + + g_assert(xenbus->xsh); + + xs_node_create(xenbus->xsh, XBT_NULL, xendev->backend_path, perms, + ARRAY_SIZE(perms), errp); + if (*errp) { + error_prepend(errp, "failed to create backend: "); + return; + } + + xendev->backend_state_watch = + xen_device_add_watch(xendev, xendev->backend_path, + "state", xen_device_backend_changed, + errp); + if (*errp) { + error_prepend(errp, "failed to watch backend state: "); + return; + } + + xendev->backend_online_watch = + xen_device_add_watch(xendev, xendev->backend_path, + "online", xen_device_backend_changed, + errp); + if (*errp) { + error_prepend(errp, "failed to watch backend online: "); + return; + } +} + +static void xen_device_backend_destroy(XenDevice *xendev) +{ + XenBus *xenbus = XEN_BUS(qdev_get_parent_bus(DEVICE(xendev))); + Error *local_err = NULL; + + if (xendev->backend_online_watch) { + xen_device_remove_watch(xendev, xendev->backend_online_watch, NULL); + xendev->backend_online_watch = NULL; + } + + if (xendev->backend_state_watch) { + xen_device_remove_watch(xendev, xendev->backend_state_watch, NULL); + xendev->backend_state_watch = NULL; + } + + if (!xendev->backend_path) { + return; + } + + g_assert(xenbus->xsh); + + xs_node_destroy(xenbus->xsh, XBT_NULL, xendev->backend_path, + &local_err); + g_free(xendev->backend_path); + xendev->backend_path = NULL; + + if (local_err) { + error_report_err(local_err); + } +} + +void xen_device_frontend_printf(XenDevice *xendev, const char *key, + const char *fmt, ...) +{ + XenBus *xenbus = XEN_BUS(qdev_get_parent_bus(DEVICE(xendev))); + Error *local_err = NULL; + va_list ap; + + g_assert(xenbus->xsh); + + va_start(ap, fmt); + xs_node_vprintf(xenbus->xsh, XBT_NULL, xendev->frontend_path, key, + &local_err, fmt, ap); + va_end(ap); + + if (local_err) { + error_report_err(local_err); + } +} + +int xen_device_frontend_scanf(XenDevice *xendev, const char *key, + const char *fmt, ...) +{ + XenBus *xenbus = XEN_BUS(qdev_get_parent_bus(DEVICE(xendev))); + va_list ap; + int rc; + + g_assert(xenbus->xsh); + + va_start(ap, fmt); + rc = xs_node_vscanf(xenbus->xsh, XBT_NULL, xendev->frontend_path, key, + NULL, fmt, ap); + va_end(ap); + + return rc; +} + +static void xen_device_frontend_set_state(XenDevice *xendev, + enum xenbus_state state, + bool publish) +{ + const char *type = object_get_typename(OBJECT(xendev)); + + if (xendev->frontend_state == state) { + return; + } + + trace_xen_device_frontend_state(type, xendev->name, + xs_strstate(state)); + + xendev->frontend_state = state; + if (publish) { + xen_device_frontend_printf(xendev, "state", "%u", state); + } +} + +static void xen_device_frontend_changed(void *opaque) +{ + XenDevice *xendev = opaque; + XenDeviceClass *xendev_class = XEN_DEVICE_GET_CLASS(xendev); + const char *type = object_get_typename(OBJECT(xendev)); + enum xenbus_state state; + + trace_xen_device_frontend_changed(type, xendev->name); + + if (xen_device_frontend_scanf(xendev, "state", "%u", &state) != 1) { + state = XenbusStateUnknown; + } + + xen_device_frontend_set_state(xendev, state, false); + + if (state == XenbusStateInitialising && + xendev->backend_state == XenbusStateClosed && + xendev->backend_online) { + /* + * The frontend is re-initializing so switch back to + * InitWait. + */ + xen_device_backend_set_state(xendev, XenbusStateInitWait); + return; + } + + if (xendev_class->frontend_changed) { + Error *local_err = NULL; + + xendev_class->frontend_changed(xendev, state, &local_err); + + if (local_err) { + error_reportf_err(local_err, "frontend change error: "); + } + } +} + +static bool xen_device_frontend_exists(XenDevice *xendev) +{ + enum xenbus_state state; + + return (xen_device_frontend_scanf(xendev, "state", "%u", &state) == 1); +} + +static void xen_device_frontend_create(XenDevice *xendev, Error **errp) +{ + ERRP_GUARD(); + XenBus *xenbus = XEN_BUS(qdev_get_parent_bus(DEVICE(xendev))); + struct xs_permissions perms[2]; + + xendev->frontend_path = xen_device_get_frontend_path(xendev); + + /* + * The frontend area may have already been created by a legacy + * toolstack. + */ + if (!xen_device_frontend_exists(xendev)) { + perms[0].id = xendev->frontend_id; + perms[0].perms = XS_PERM_NONE; + perms[1].id = xenbus->backend_id; + perms[1].perms = XS_PERM_READ | XS_PERM_WRITE; + + g_assert(xenbus->xsh); + + xs_node_create(xenbus->xsh, XBT_NULL, xendev->frontend_path, perms, + ARRAY_SIZE(perms), errp); + if (*errp) { + error_prepend(errp, "failed to create frontend: "); + return; + } + } + + xendev->frontend_state_watch = + xen_device_add_watch(xendev, xendev->frontend_path, "state", + xen_device_frontend_changed, errp); + if (*errp) { + error_prepend(errp, "failed to watch frontend state: "); + } +} + +static void xen_device_frontend_destroy(XenDevice *xendev) +{ + XenBus *xenbus = XEN_BUS(qdev_get_parent_bus(DEVICE(xendev))); + Error *local_err = NULL; + + if (xendev->frontend_state_watch) { + xen_device_remove_watch(xendev, xendev->frontend_state_watch, + NULL); + xendev->frontend_state_watch = NULL; + } + + if (!xendev->frontend_path) { + return; + } + + g_assert(xenbus->xsh); + + xs_node_destroy(xenbus->xsh, XBT_NULL, xendev->frontend_path, + &local_err); + g_free(xendev->frontend_path); + xendev->frontend_path = NULL; + + if (local_err) { + error_report_err(local_err); + } +} + +void xen_device_set_max_grant_refs(XenDevice *xendev, unsigned int nr_refs, + Error **errp) +{ + if (xengnttab_set_max_grants(xendev->xgth, nr_refs)) { + error_setg_errno(errp, errno, "xengnttab_set_max_grants failed"); + } +} + +void *xen_device_map_grant_refs(XenDevice *xendev, uint32_t *refs, + unsigned int nr_refs, int prot, + Error **errp) +{ + void *map = xengnttab_map_domain_grant_refs(xendev->xgth, nr_refs, + xendev->frontend_id, refs, + prot); + + if (!map) { + error_setg_errno(errp, errno, + "xengnttab_map_domain_grant_refs failed"); + } + + return map; +} + +void xen_device_unmap_grant_refs(XenDevice *xendev, void *map, + unsigned int nr_refs, Error **errp) +{ + if (xengnttab_unmap(xendev->xgth, map, nr_refs)) { + error_setg_errno(errp, errno, "xengnttab_unmap failed"); + } +} + +static void compat_copy_grant_refs(XenDevice *xendev, bool to_domain, + XenDeviceGrantCopySegment segs[], + unsigned int nr_segs, Error **errp) +{ + uint32_t *refs = g_new(uint32_t, nr_segs); + int prot = to_domain ? PROT_WRITE : PROT_READ; + void *map; + unsigned int i; + + for (i = 0; i < nr_segs; i++) { + XenDeviceGrantCopySegment *seg = &segs[i]; + + refs[i] = to_domain ? seg->dest.foreign.ref : + seg->source.foreign.ref; + } + + map = xengnttab_map_domain_grant_refs(xendev->xgth, nr_segs, + xendev->frontend_id, refs, + prot); + if (!map) { + error_setg_errno(errp, errno, + "xengnttab_map_domain_grant_refs failed"); + goto done; + } + + for (i = 0; i < nr_segs; i++) { + XenDeviceGrantCopySegment *seg = &segs[i]; + void *page = map + (i * XC_PAGE_SIZE); + + if (to_domain) { + memcpy(page + seg->dest.foreign.offset, seg->source.virt, + seg->len); + } else { + memcpy(seg->dest.virt, page + seg->source.foreign.offset, + seg->len); + } + } + + if (xengnttab_unmap(xendev->xgth, map, nr_segs)) { + error_setg_errno(errp, errno, "xengnttab_unmap failed"); + } + +done: + g_free(refs); +} + +void xen_device_copy_grant_refs(XenDevice *xendev, bool to_domain, + XenDeviceGrantCopySegment segs[], + unsigned int nr_segs, Error **errp) +{ + xengnttab_grant_copy_segment_t *xengnttab_segs; + unsigned int i; + + if (!xendev->feature_grant_copy) { + compat_copy_grant_refs(xendev, to_domain, segs, nr_segs, errp); + return; + } + + xengnttab_segs = g_new0(xengnttab_grant_copy_segment_t, nr_segs); + + for (i = 0; i < nr_segs; i++) { + XenDeviceGrantCopySegment *seg = &segs[i]; + xengnttab_grant_copy_segment_t *xengnttab_seg = &xengnttab_segs[i]; + + if (to_domain) { + xengnttab_seg->flags = GNTCOPY_dest_gref; + xengnttab_seg->dest.foreign.domid = xendev->frontend_id; + xengnttab_seg->dest.foreign.ref = seg->dest.foreign.ref; + xengnttab_seg->dest.foreign.offset = seg->dest.foreign.offset; + xengnttab_seg->source.virt = seg->source.virt; + } else { + xengnttab_seg->flags = GNTCOPY_source_gref; + xengnttab_seg->source.foreign.domid = xendev->frontend_id; + xengnttab_seg->source.foreign.ref = seg->source.foreign.ref; + xengnttab_seg->source.foreign.offset = + seg->source.foreign.offset; + xengnttab_seg->dest.virt = seg->dest.virt; + } + + xengnttab_seg->len = seg->len; + } + + if (xengnttab_grant_copy(xendev->xgth, nr_segs, xengnttab_segs)) { + error_setg_errno(errp, errno, "xengnttab_grant_copy failed"); + goto done; + } + + for (i = 0; i < nr_segs; i++) { + xengnttab_grant_copy_segment_t *xengnttab_seg = &xengnttab_segs[i]; + + if (xengnttab_seg->status != GNTST_okay) { + error_setg(errp, "xengnttab_grant_copy seg[%u] failed", i); + break; + } + } + +done: + g_free(xengnttab_segs); +} + +struct XenEventChannel { + QLIST_ENTRY(XenEventChannel) list; + AioContext *ctx; + xenevtchn_handle *xeh; + evtchn_port_t local_port; + XenEventHandler handler; + void *opaque; +}; + +static bool xen_device_poll(void *opaque) +{ + XenEventChannel *channel = opaque; + + return channel->handler(channel->opaque); +} + +static void xen_device_event(void *opaque) +{ + XenEventChannel *channel = opaque; + unsigned long port = xenevtchn_pending(channel->xeh); + + if (port == channel->local_port) { + xen_device_poll(channel); + + xenevtchn_unmask(channel->xeh, port); + } +} + +void xen_device_set_event_channel_context(XenDevice *xendev, + XenEventChannel *channel, + AioContext *ctx, + Error **errp) +{ + if (!channel) { + error_setg(errp, "bad channel"); + return; + } + + if (channel->ctx) + aio_set_fd_handler(channel->ctx, xenevtchn_fd(channel->xeh), true, + NULL, NULL, NULL, NULL); + + channel->ctx = ctx; + aio_set_fd_handler(channel->ctx, xenevtchn_fd(channel->xeh), true, + xen_device_event, NULL, xen_device_poll, channel); +} + +XenEventChannel *xen_device_bind_event_channel(XenDevice *xendev, + unsigned int port, + XenEventHandler handler, + void *opaque, Error **errp) +{ + XenEventChannel *channel = g_new0(XenEventChannel, 1); + xenevtchn_port_or_error_t local_port; + + channel->xeh = xenevtchn_open(NULL, 0); + if (!channel->xeh) { + error_setg_errno(errp, errno, "failed xenevtchn_open"); + goto fail; + } + + local_port = xenevtchn_bind_interdomain(channel->xeh, + xendev->frontend_id, + port); + if (local_port < 0) { + error_setg_errno(errp, errno, "xenevtchn_bind_interdomain failed"); + goto fail; + } + + channel->local_port = local_port; + channel->handler = handler; + channel->opaque = opaque; + + /* Only reason for failure is a NULL channel */ + xen_device_set_event_channel_context(xendev, channel, + qemu_get_aio_context(), + &error_abort); + + QLIST_INSERT_HEAD(&xendev->event_channels, channel, list); + + return channel; + +fail: + if (channel->xeh) { + xenevtchn_close(channel->xeh); + } + + g_free(channel); + + return NULL; +} + +void xen_device_notify_event_channel(XenDevice *xendev, + XenEventChannel *channel, + Error **errp) +{ + if (!channel) { + error_setg(errp, "bad channel"); + return; + } + + if (xenevtchn_notify(channel->xeh, channel->local_port) < 0) { + error_setg_errno(errp, errno, "xenevtchn_notify failed"); + } +} + +void xen_device_unbind_event_channel(XenDevice *xendev, + XenEventChannel *channel, + Error **errp) +{ + if (!channel) { + error_setg(errp, "bad channel"); + return; + } + + QLIST_REMOVE(channel, list); + + aio_set_fd_handler(channel->ctx, xenevtchn_fd(channel->xeh), true, + NULL, NULL, NULL, NULL); + + if (xenevtchn_unbind(channel->xeh, channel->local_port) < 0) { + error_setg_errno(errp, errno, "xenevtchn_unbind failed"); + } + + xenevtchn_close(channel->xeh); + g_free(channel); +} + +static void xen_device_unrealize(DeviceState *dev) +{ + XenDevice *xendev = XEN_DEVICE(dev); + XenDeviceClass *xendev_class = XEN_DEVICE_GET_CLASS(xendev); + const char *type = object_get_typename(OBJECT(xendev)); + XenEventChannel *channel, *next; + + if (!xendev->name) { + return; + } + + trace_xen_device_unrealize(type, xendev->name); + + if (xendev->exit.notify) { + qemu_remove_exit_notifier(&xendev->exit); + xendev->exit.notify = NULL; + } + + if (xendev_class->unrealize) { + xendev_class->unrealize(xendev); + } + + /* Make sure all event channels are cleaned up */ + QLIST_FOREACH_SAFE(channel, &xendev->event_channels, list, next) { + xen_device_unbind_event_channel(xendev, channel, NULL); + } + + xen_device_frontend_destroy(xendev); + xen_device_backend_destroy(xendev); + + if (xendev->xgth) { + xengnttab_close(xendev->xgth); + xendev->xgth = NULL; + } + + if (xendev->watch_list) { + watch_list_destroy(xendev->watch_list); + xendev->watch_list = NULL; + } + + if (xendev->xsh) { + xs_close(xendev->xsh); + xendev->xsh = NULL; + } + + g_free(xendev->name); + xendev->name = NULL; +} + +static void xen_device_exit(Notifier *n, void *data) +{ + XenDevice *xendev = container_of(n, XenDevice, exit); + + xen_device_unrealize(DEVICE(xendev)); +} + +static void xen_device_realize(DeviceState *dev, Error **errp) +{ + ERRP_GUARD(); + XenDevice *xendev = XEN_DEVICE(dev); + XenDeviceClass *xendev_class = XEN_DEVICE_GET_CLASS(xendev); + XenBus *xenbus = XEN_BUS(qdev_get_parent_bus(DEVICE(xendev))); + const char *type = object_get_typename(OBJECT(xendev)); + + if (xendev->frontend_id == DOMID_INVALID) { + xendev->frontend_id = xen_domid; + } + + if (xendev->frontend_id >= DOMID_FIRST_RESERVED) { + error_setg(errp, "invalid frontend-id"); + goto unrealize; + } + + if (!xendev_class->get_name) { + error_setg(errp, "get_name method not implemented"); + goto unrealize; + } + + xendev->name = xendev_class->get_name(xendev, errp); + if (*errp) { + error_prepend(errp, "failed to get device name: "); + goto unrealize; + } + + trace_xen_device_realize(type, xendev->name); + + xendev->xsh = xs_open(0); + if (!xendev->xsh) { + error_setg_errno(errp, errno, "failed xs_open"); + goto unrealize; + } + + xendev->watch_list = watch_list_create(xendev->xsh); + + xendev->xgth = xengnttab_open(NULL, 0); + if (!xendev->xgth) { + error_setg_errno(errp, errno, "failed xengnttab_open"); + goto unrealize; + } + + xendev->feature_grant_copy = + (xengnttab_grant_copy(xendev->xgth, 0, NULL) == 0); + + xen_device_backend_create(xendev, errp); + if (*errp) { + goto unrealize; + } + + xen_device_frontend_create(xendev, errp); + if (*errp) { + goto unrealize; + } + + if (xendev_class->realize) { + xendev_class->realize(xendev, errp); + if (*errp) { + goto unrealize; + } + } + + xen_device_backend_printf(xendev, "frontend", "%s", + xendev->frontend_path); + xen_device_backend_printf(xendev, "frontend-id", "%u", + xendev->frontend_id); + xen_device_backend_printf(xendev, "hotplug-status", "connected"); + + xen_device_backend_set_online(xendev, true); + xen_device_backend_set_state(xendev, XenbusStateInitWait); + + if (!xen_device_frontend_exists(xendev)) { + xen_device_frontend_printf(xendev, "backend", "%s", + xendev->backend_path); + xen_device_frontend_printf(xendev, "backend-id", "%u", + xenbus->backend_id); + + xen_device_frontend_set_state(xendev, XenbusStateInitialising, true); + } + + xendev->exit.notify = xen_device_exit; + qemu_add_exit_notifier(&xendev->exit); + return; + +unrealize: + xen_device_unrealize(dev); +} + +static Property xen_device_props[] = { + DEFINE_PROP_UINT16("frontend-id", XenDevice, frontend_id, + DOMID_INVALID), + DEFINE_PROP_END_OF_LIST() +}; + +static void xen_device_class_init(ObjectClass *class, void *data) +{ + DeviceClass *dev_class = DEVICE_CLASS(class); + + dev_class->realize = xen_device_realize; + dev_class->unrealize = xen_device_unrealize; + device_class_set_props(dev_class, xen_device_props); + dev_class->bus_type = TYPE_XEN_BUS; +} + +static const TypeInfo xen_device_type_info = { + .name = TYPE_XEN_DEVICE, + .parent = TYPE_DEVICE, + .instance_size = sizeof(XenDevice), + .abstract = true, + .class_size = sizeof(XenDeviceClass), + .class_init = xen_device_class_init, +}; + +typedef struct XenBridge { + SysBusDevice busdev; +} XenBridge; + +#define TYPE_XEN_BRIDGE "xen-bridge" + +static const TypeInfo xen_bridge_type_info = { + .name = TYPE_XEN_BRIDGE, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(XenBridge), +}; + +static void xen_register_types(void) +{ + type_register_static(&xen_bridge_type_info); + type_register_static(&xen_bus_type_info); + type_register_static(&xen_device_type_info); +} + +type_init(xen_register_types) + +void xen_bus_init(void) +{ + DeviceState *dev = qdev_new(TYPE_XEN_BRIDGE); + BusState *bus = qbus_new(TYPE_XEN_BUS, dev, NULL); + + sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); + qbus_set_bus_hotplug_handler(bus); +} diff --git a/hw/xen/xen-host-pci-device.c b/hw/xen/xen-host-pci-device.c new file mode 100644 index 000000000..8c6e9a171 --- /dev/null +++ b/hw/xen/xen-host-pci-device.c @@ -0,0 +1,402 @@ +/* + * Copyright (C) 2011 Citrix Ltd. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/cutils.h" +#include "xen-host-pci-device.h" + +#define XEN_HOST_PCI_MAX_EXT_CAP \ + ((PCIE_CONFIG_SPACE_SIZE - PCI_CONFIG_SPACE_SIZE) / (PCI_CAP_SIZEOF + 4)) + +#ifdef XEN_HOST_PCI_DEVICE_DEBUG +# define XEN_HOST_PCI_LOG(f, a...) fprintf(stderr, "%s: " f, __func__, ##a) +#else +# define XEN_HOST_PCI_LOG(f, a...) (void)0 +#endif + +/* + * from linux/ioport.h + * IO resources have these defined flags. + */ +#define IORESOURCE_BITS 0x000000ff /* Bus-specific bits */ + +#define IORESOURCE_TYPE_BITS 0x00000f00 /* Resource type */ +#define IORESOURCE_IO 0x00000100 +#define IORESOURCE_MEM 0x00000200 + +#define IORESOURCE_PREFETCH 0x00001000 /* No side effects */ +#define IORESOURCE_MEM_64 0x00100000 + +static void xen_host_pci_sysfs_path(const XenHostPCIDevice *d, + const char *name, char *buf, ssize_t size) +{ + int rc; + + rc = snprintf(buf, size, "/sys/bus/pci/devices/%04x:%02x:%02x.%d/%s", + d->domain, d->bus, d->dev, d->func, name); + assert(rc >= 0 && rc < size); +} + + +/* This size should be enough to read the first 7 lines of a resource file */ +#define XEN_HOST_PCI_RESOURCE_BUFFER_SIZE 400 +static void xen_host_pci_get_resource(XenHostPCIDevice *d, Error **errp) +{ + int i, rc, fd; + char path[PATH_MAX]; + char buf[XEN_HOST_PCI_RESOURCE_BUFFER_SIZE]; + unsigned long long start, end, flags, size; + char *endptr, *s; + uint8_t type; + + xen_host_pci_sysfs_path(d, "resource", path, sizeof(path)); + + fd = open(path, O_RDONLY); + if (fd == -1) { + error_setg_file_open(errp, errno, path); + return; + } + + do { + rc = read(fd, &buf, sizeof(buf) - 1); + if (rc < 0 && errno != EINTR) { + error_setg_errno(errp, errno, "read err"); + goto out; + } + } while (rc < 0); + buf[rc] = 0; + + s = buf; + for (i = 0; i < PCI_NUM_REGIONS; i++) { + type = 0; + + start = strtoll(s, &endptr, 16); + if (*endptr != ' ' || s == endptr) { + break; + } + s = endptr + 1; + end = strtoll(s, &endptr, 16); + if (*endptr != ' ' || s == endptr) { + break; + } + s = endptr + 1; + flags = strtoll(s, &endptr, 16); + if (*endptr != '\n' || s == endptr) { + break; + } + s = endptr + 1; + + if (start) { + size = end - start + 1; + } else { + size = 0; + } + + if (flags & IORESOURCE_IO) { + type |= XEN_HOST_PCI_REGION_TYPE_IO; + } + if (flags & IORESOURCE_MEM) { + type |= XEN_HOST_PCI_REGION_TYPE_MEM; + } + if (flags & IORESOURCE_PREFETCH) { + type |= XEN_HOST_PCI_REGION_TYPE_PREFETCH; + } + if (flags & IORESOURCE_MEM_64) { + type |= XEN_HOST_PCI_REGION_TYPE_MEM_64; + } + + if (i < PCI_ROM_SLOT) { + d->io_regions[i].base_addr = start; + d->io_regions[i].size = size; + d->io_regions[i].type = type; + d->io_regions[i].bus_flags = flags & IORESOURCE_BITS; + } else { + d->rom.base_addr = start; + d->rom.size = size; + d->rom.type = type; + d->rom.bus_flags = flags & IORESOURCE_BITS; + } + } + + if (i != PCI_NUM_REGIONS) { + error_setg(errp, "Invalid format or input too short: %s", buf); + } + +out: + close(fd); +} + +/* This size should be enough to read a long from a file */ +#define XEN_HOST_PCI_GET_VALUE_BUFFER_SIZE 22 +static void xen_host_pci_get_value(XenHostPCIDevice *d, const char *name, + unsigned int *pvalue, int base, Error **errp) +{ + char path[PATH_MAX]; + char buf[XEN_HOST_PCI_GET_VALUE_BUFFER_SIZE]; + int fd, rc; + unsigned long value; + const char *endptr; + + xen_host_pci_sysfs_path(d, name, path, sizeof(path)); + + fd = open(path, O_RDONLY); + if (fd == -1) { + error_setg_file_open(errp, errno, path); + return; + } + + do { + rc = read(fd, &buf, sizeof(buf) - 1); + if (rc < 0 && errno != EINTR) { + error_setg_errno(errp, errno, "read err"); + goto out; + } + } while (rc < 0); + + buf[rc] = 0; + rc = qemu_strtoul(buf, &endptr, base, &value); + if (!rc) { + assert(value <= UINT_MAX); + *pvalue = value; + } else { + error_setg_errno(errp, -rc, "failed to parse value '%s'", buf); + } + +out: + close(fd); +} + +static inline void xen_host_pci_get_hex_value(XenHostPCIDevice *d, + const char *name, + unsigned int *pvalue, + Error **errp) +{ + xen_host_pci_get_value(d, name, pvalue, 16, errp); +} + +static inline void xen_host_pci_get_dec_value(XenHostPCIDevice *d, + const char *name, + unsigned int *pvalue, + Error **errp) +{ + xen_host_pci_get_value(d, name, pvalue, 10, errp); +} + +static bool xen_host_pci_dev_is_virtfn(XenHostPCIDevice *d) +{ + char path[PATH_MAX]; + struct stat buf; + + xen_host_pci_sysfs_path(d, "physfn", path, sizeof(path)); + + return !stat(path, &buf); +} + +static void xen_host_pci_config_open(XenHostPCIDevice *d, Error **errp) +{ + char path[PATH_MAX]; + + xen_host_pci_sysfs_path(d, "config", path, sizeof(path)); + + d->config_fd = open(path, O_RDWR); + if (d->config_fd == -1) { + error_setg_file_open(errp, errno, path); + } +} + +static int xen_host_pci_config_read(XenHostPCIDevice *d, + int pos, void *buf, int len) +{ + int rc; + + do { + rc = pread(d->config_fd, buf, len, pos); + } while (rc < 0 && (errno == EINTR || errno == EAGAIN)); + if (rc != len) { + return -errno; + } + return 0; +} + +static int xen_host_pci_config_write(XenHostPCIDevice *d, + int pos, const void *buf, int len) +{ + int rc; + + do { + rc = pwrite(d->config_fd, buf, len, pos); + } while (rc < 0 && (errno == EINTR || errno == EAGAIN)); + if (rc != len) { + return -errno; + } + return 0; +} + + +int xen_host_pci_get_byte(XenHostPCIDevice *d, int pos, uint8_t *p) +{ + uint8_t buf; + int rc = xen_host_pci_config_read(d, pos, &buf, 1); + if (!rc) { + *p = buf; + } + return rc; +} + +int xen_host_pci_get_word(XenHostPCIDevice *d, int pos, uint16_t *p) +{ + uint16_t buf; + int rc = xen_host_pci_config_read(d, pos, &buf, 2); + if (!rc) { + *p = le16_to_cpu(buf); + } + return rc; +} + +int xen_host_pci_get_long(XenHostPCIDevice *d, int pos, uint32_t *p) +{ + uint32_t buf; + int rc = xen_host_pci_config_read(d, pos, &buf, 4); + if (!rc) { + *p = le32_to_cpu(buf); + } + return rc; +} + +int xen_host_pci_get_block(XenHostPCIDevice *d, int pos, uint8_t *buf, int len) +{ + return xen_host_pci_config_read(d, pos, buf, len); +} + +int xen_host_pci_set_byte(XenHostPCIDevice *d, int pos, uint8_t data) +{ + return xen_host_pci_config_write(d, pos, &data, 1); +} + +int xen_host_pci_set_word(XenHostPCIDevice *d, int pos, uint16_t data) +{ + data = cpu_to_le16(data); + return xen_host_pci_config_write(d, pos, &data, 2); +} + +int xen_host_pci_set_long(XenHostPCIDevice *d, int pos, uint32_t data) +{ + data = cpu_to_le32(data); + return xen_host_pci_config_write(d, pos, &data, 4); +} + +int xen_host_pci_set_block(XenHostPCIDevice *d, int pos, uint8_t *buf, int len) +{ + return xen_host_pci_config_write(d, pos, buf, len); +} + +int xen_host_pci_find_ext_cap_offset(XenHostPCIDevice *d, uint32_t cap) +{ + uint32_t header = 0; + int max_cap = XEN_HOST_PCI_MAX_EXT_CAP; + int pos = PCI_CONFIG_SPACE_SIZE; + + do { + if (xen_host_pci_get_long(d, pos, &header)) { + break; + } + /* + * If we have no capabilities, this is indicated by cap ID, + * cap version and next pointer all being 0. + */ + if (header == 0) { + break; + } + + if (PCI_EXT_CAP_ID(header) == cap) { + return pos; + } + + pos = PCI_EXT_CAP_NEXT(header); + if (pos < PCI_CONFIG_SPACE_SIZE) { + break; + } + + max_cap--; + } while (max_cap > 0); + + return -1; +} + +void xen_host_pci_device_get(XenHostPCIDevice *d, uint16_t domain, + uint8_t bus, uint8_t dev, uint8_t func, + Error **errp) +{ + ERRP_GUARD(); + unsigned int v; + + d->config_fd = -1; + d->domain = domain; + d->bus = bus; + d->dev = dev; + d->func = func; + + xen_host_pci_config_open(d, errp); + if (*errp) { + goto error; + } + + xen_host_pci_get_resource(d, errp); + if (*errp) { + goto error; + } + + xen_host_pci_get_hex_value(d, "vendor", &v, errp); + if (*errp) { + goto error; + } + d->vendor_id = v; + + xen_host_pci_get_hex_value(d, "device", &v, errp); + if (*errp) { + goto error; + } + d->device_id = v; + + xen_host_pci_get_dec_value(d, "irq", &v, errp); + if (*errp) { + goto error; + } + d->irq = v; + + xen_host_pci_get_hex_value(d, "class", &v, errp); + if (*errp) { + goto error; + } + d->class_code = v; + + d->is_virtfn = xen_host_pci_dev_is_virtfn(d); + + return; + +error: + + if (d->config_fd >= 0) { + close(d->config_fd); + d->config_fd = -1; + } +} + +bool xen_host_pci_device_closed(XenHostPCIDevice *d) +{ + return d->config_fd == -1; +} + +void xen_host_pci_device_put(XenHostPCIDevice *d) +{ + if (d->config_fd >= 0) { + close(d->config_fd); + d->config_fd = -1; + } +} diff --git a/hw/xen/xen-host-pci-device.h b/hw/xen/xen-host-pci-device.h new file mode 100644 index 000000000..4d8d34ecb --- /dev/null +++ b/hw/xen/xen-host-pci-device.h @@ -0,0 +1,58 @@ +#ifndef XEN_HOST_PCI_DEVICE_H +#define XEN_HOST_PCI_DEVICE_H + +#include "hw/pci/pci.h" + +enum { + XEN_HOST_PCI_REGION_TYPE_IO = 1 << 1, + XEN_HOST_PCI_REGION_TYPE_MEM = 1 << 2, + XEN_HOST_PCI_REGION_TYPE_PREFETCH = 1 << 3, + XEN_HOST_PCI_REGION_TYPE_MEM_64 = 1 << 4, +}; + +typedef struct XenHostPCIIORegion { + pcibus_t base_addr; + pcibus_t size; + uint8_t type; + uint8_t bus_flags; /* Bus-specific bits */ +} XenHostPCIIORegion; + +typedef struct XenHostPCIDevice { + uint16_t domain; + uint8_t bus; + uint8_t dev; + uint8_t func; + + uint16_t vendor_id; + uint16_t device_id; + uint32_t class_code; + int irq; + + XenHostPCIIORegion io_regions[PCI_NUM_REGIONS - 1]; + XenHostPCIIORegion rom; + + bool is_virtfn; + + int config_fd; +} XenHostPCIDevice; + +void xen_host_pci_device_get(XenHostPCIDevice *d, uint16_t domain, + uint8_t bus, uint8_t dev, uint8_t func, + Error **errp); +void xen_host_pci_device_put(XenHostPCIDevice *pci_dev); +bool xen_host_pci_device_closed(XenHostPCIDevice *d); + +int xen_host_pci_get_byte(XenHostPCIDevice *d, int pos, uint8_t *p); +int xen_host_pci_get_word(XenHostPCIDevice *d, int pos, uint16_t *p); +int xen_host_pci_get_long(XenHostPCIDevice *d, int pos, uint32_t *p); +int xen_host_pci_get_block(XenHostPCIDevice *d, int pos, uint8_t *buf, + int len); +int xen_host_pci_set_byte(XenHostPCIDevice *d, int pos, uint8_t data); +int xen_host_pci_set_word(XenHostPCIDevice *d, int pos, uint16_t data); +int xen_host_pci_set_long(XenHostPCIDevice *d, int pos, uint32_t data); +int xen_host_pci_set_block(XenHostPCIDevice *d, int pos, uint8_t *buf, + int len); + +int xen_host_pci_find_ext_cap_offset(XenHostPCIDevice *s, uint32_t cap); + +#endif /* XEN_HOST_PCI_DEVICE_H */ diff --git a/hw/xen/xen-legacy-backend.c b/hw/xen/xen-legacy-backend.c new file mode 100644 index 000000000..085fd31ef --- /dev/null +++ b/hw/xen/xen-legacy-backend.c @@ -0,0 +1,843 @@ +/* + * xen backend driver infrastructure + * (c) 2008 Gerd Hoffmann <kraxel@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; under version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +/* + * TODO: add some xenbus / xenstore concepts overview here. + */ + +#include "qemu/osdep.h" + +#include "hw/sysbus.h" +#include "hw/boards.h" +#include "hw/qdev-properties.h" +#include "qemu/main-loop.h" +#include "qapi/error.h" +#include "hw/xen/xen-legacy-backend.h" +#include "hw/xen/xen_pvdev.h" +#include "monitor/qdev.h" + +DeviceState *xen_sysdev; +BusState *xen_sysbus; + +/* ------------------------------------------------------------- */ + +/* public */ +struct xs_handle *xenstore; +const char *xen_protocol; + +/* private */ +static bool xen_feature_grant_copy; +static int debug; + +int xenstore_write_be_str(struct XenLegacyDevice *xendev, const char *node, + const char *val) +{ + return xenstore_write_str(xendev->be, node, val); +} + +int xenstore_write_be_int(struct XenLegacyDevice *xendev, const char *node, + int ival) +{ + return xenstore_write_int(xendev->be, node, ival); +} + +int xenstore_write_be_int64(struct XenLegacyDevice *xendev, const char *node, + int64_t ival) +{ + return xenstore_write_int64(xendev->be, node, ival); +} + +char *xenstore_read_be_str(struct XenLegacyDevice *xendev, const char *node) +{ + return xenstore_read_str(xendev->be, node); +} + +int xenstore_read_be_int(struct XenLegacyDevice *xendev, const char *node, + int *ival) +{ + return xenstore_read_int(xendev->be, node, ival); +} + +char *xenstore_read_fe_str(struct XenLegacyDevice *xendev, const char *node) +{ + return xenstore_read_str(xendev->fe, node); +} + +int xenstore_read_fe_int(struct XenLegacyDevice *xendev, const char *node, + int *ival) +{ + return xenstore_read_int(xendev->fe, node, ival); +} + +int xenstore_read_fe_uint64(struct XenLegacyDevice *xendev, const char *node, + uint64_t *uval) +{ + return xenstore_read_uint64(xendev->fe, node, uval); +} + +/* ------------------------------------------------------------- */ + +int xen_be_set_state(struct XenLegacyDevice *xendev, enum xenbus_state state) +{ + int rc; + + rc = xenstore_write_be_int(xendev, "state", state); + if (rc < 0) { + return rc; + } + xen_pv_printf(xendev, 1, "backend state: %s -> %s\n", + xenbus_strstate(xendev->be_state), xenbus_strstate(state)); + xendev->be_state = state; + return 0; +} + +void xen_be_set_max_grant_refs(struct XenLegacyDevice *xendev, + unsigned int nr_refs) +{ + assert(xendev->ops->flags & DEVOPS_FLAG_NEED_GNTDEV); + + if (xengnttab_set_max_grants(xendev->gnttabdev, nr_refs)) { + xen_pv_printf(xendev, 0, "xengnttab_set_max_grants failed: %s\n", + strerror(errno)); + } +} + +void *xen_be_map_grant_refs(struct XenLegacyDevice *xendev, uint32_t *refs, + unsigned int nr_refs, int prot) +{ + void *ptr; + + assert(xendev->ops->flags & DEVOPS_FLAG_NEED_GNTDEV); + + ptr = xengnttab_map_domain_grant_refs(xendev->gnttabdev, nr_refs, + xen_domid, refs, prot); + if (!ptr) { + xen_pv_printf(xendev, 0, + "xengnttab_map_domain_grant_refs failed: %s\n", + strerror(errno)); + } + + return ptr; +} + +void xen_be_unmap_grant_refs(struct XenLegacyDevice *xendev, void *ptr, + unsigned int nr_refs) +{ + assert(xendev->ops->flags & DEVOPS_FLAG_NEED_GNTDEV); + + if (xengnttab_unmap(xendev->gnttabdev, ptr, nr_refs)) { + xen_pv_printf(xendev, 0, "xengnttab_unmap failed: %s\n", + strerror(errno)); + } +} + +static int compat_copy_grant_refs(struct XenLegacyDevice *xendev, + bool to_domain, + XenGrantCopySegment segs[], + unsigned int nr_segs) +{ + uint32_t *refs = g_new(uint32_t, nr_segs); + int prot = to_domain ? PROT_WRITE : PROT_READ; + void *pages; + unsigned int i; + + for (i = 0; i < nr_segs; i++) { + XenGrantCopySegment *seg = &segs[i]; + + refs[i] = to_domain ? + seg->dest.foreign.ref : seg->source.foreign.ref; + } + + pages = xengnttab_map_domain_grant_refs(xendev->gnttabdev, nr_segs, + xen_domid, refs, prot); + if (!pages) { + xen_pv_printf(xendev, 0, + "xengnttab_map_domain_grant_refs failed: %s\n", + strerror(errno)); + g_free(refs); + return -1; + } + + for (i = 0; i < nr_segs; i++) { + XenGrantCopySegment *seg = &segs[i]; + void *page = pages + (i * XC_PAGE_SIZE); + + if (to_domain) { + memcpy(page + seg->dest.foreign.offset, seg->source.virt, + seg->len); + } else { + memcpy(seg->dest.virt, page + seg->source.foreign.offset, + seg->len); + } + } + + if (xengnttab_unmap(xendev->gnttabdev, pages, nr_segs)) { + xen_pv_printf(xendev, 0, "xengnttab_unmap failed: %s\n", + strerror(errno)); + } + + g_free(refs); + return 0; +} + +int xen_be_copy_grant_refs(struct XenLegacyDevice *xendev, + bool to_domain, + XenGrantCopySegment segs[], + unsigned int nr_segs) +{ + xengnttab_grant_copy_segment_t *xengnttab_segs; + unsigned int i; + int rc; + + assert(xendev->ops->flags & DEVOPS_FLAG_NEED_GNTDEV); + + if (!xen_feature_grant_copy) { + return compat_copy_grant_refs(xendev, to_domain, segs, nr_segs); + } + + xengnttab_segs = g_new0(xengnttab_grant_copy_segment_t, nr_segs); + + for (i = 0; i < nr_segs; i++) { + XenGrantCopySegment *seg = &segs[i]; + xengnttab_grant_copy_segment_t *xengnttab_seg = &xengnttab_segs[i]; + + if (to_domain) { + xengnttab_seg->flags = GNTCOPY_dest_gref; + xengnttab_seg->dest.foreign.domid = xen_domid; + xengnttab_seg->dest.foreign.ref = seg->dest.foreign.ref; + xengnttab_seg->dest.foreign.offset = seg->dest.foreign.offset; + xengnttab_seg->source.virt = seg->source.virt; + } else { + xengnttab_seg->flags = GNTCOPY_source_gref; + xengnttab_seg->source.foreign.domid = xen_domid; + xengnttab_seg->source.foreign.ref = seg->source.foreign.ref; + xengnttab_seg->source.foreign.offset = + seg->source.foreign.offset; + xengnttab_seg->dest.virt = seg->dest.virt; + } + + xengnttab_seg->len = seg->len; + } + + rc = xengnttab_grant_copy(xendev->gnttabdev, nr_segs, xengnttab_segs); + + if (rc) { + xen_pv_printf(xendev, 0, "xengnttab_copy failed: %s\n", + strerror(errno)); + } + + for (i = 0; i < nr_segs; i++) { + xengnttab_grant_copy_segment_t *xengnttab_seg = + &xengnttab_segs[i]; + + if (xengnttab_seg->status != GNTST_okay) { + xen_pv_printf(xendev, 0, "segment[%u] status: %d\n", i, + xengnttab_seg->status); + rc = -1; + } + } + + g_free(xengnttab_segs); + return rc; +} + +/* + * get xen backend device, allocate a new one if it doesn't exist. + */ +static struct XenLegacyDevice *xen_be_get_xendev(const char *type, int dom, + int dev, + struct XenDevOps *ops) +{ + struct XenLegacyDevice *xendev; + + xendev = xen_pv_find_xendev(type, dom, dev); + if (xendev) { + return xendev; + } + + /* init new xendev */ + xendev = g_malloc0(ops->size); + object_initialize(&xendev->qdev, ops->size, TYPE_XENBACKEND); + OBJECT(xendev)->free = g_free; + qdev_set_id(DEVICE(xendev), g_strdup_printf("xen-%s-%d", type, dev), + &error_fatal); + qdev_realize(DEVICE(xendev), xen_sysbus, &error_fatal); + object_unref(OBJECT(xendev)); + + xendev->type = type; + xendev->dom = dom; + xendev->dev = dev; + xendev->ops = ops; + + snprintf(xendev->be, sizeof(xendev->be), "backend/%s/%d/%d", + xendev->type, xendev->dom, xendev->dev); + snprintf(xendev->name, sizeof(xendev->name), "%s-%d", + xendev->type, xendev->dev); + + xendev->debug = debug; + xendev->local_port = -1; + + xendev->evtchndev = xenevtchn_open(NULL, 0); + if (xendev->evtchndev == NULL) { + xen_pv_printf(NULL, 0, "can't open evtchn device\n"); + qdev_unplug(DEVICE(xendev), NULL); + return NULL; + } + qemu_set_cloexec(xenevtchn_fd(xendev->evtchndev)); + + xen_pv_insert_xendev(xendev); + + if (xendev->ops->alloc) { + xendev->ops->alloc(xendev); + } + + return xendev; +} + + +/* + * Sync internal data structures on xenstore updates. + * Node specifies the changed field. node = NULL means + * update all fields (used for initialization). + */ +static void xen_be_backend_changed(struct XenLegacyDevice *xendev, + const char *node) +{ + if (node == NULL || strcmp(node, "online") == 0) { + if (xenstore_read_be_int(xendev, "online", &xendev->online) == -1) { + xendev->online = 0; + } + } + + if (node) { + xen_pv_printf(xendev, 2, "backend update: %s\n", node); + if (xendev->ops->backend_changed) { + xendev->ops->backend_changed(xendev, node); + } + } +} + +static void xen_be_frontend_changed(struct XenLegacyDevice *xendev, + const char *node) +{ + int fe_state; + + if (node == NULL || strcmp(node, "state") == 0) { + if (xenstore_read_fe_int(xendev, "state", &fe_state) == -1) { + fe_state = XenbusStateUnknown; + } + if (xendev->fe_state != fe_state) { + xen_pv_printf(xendev, 1, "frontend state: %s -> %s\n", + xenbus_strstate(xendev->fe_state), + xenbus_strstate(fe_state)); + } + xendev->fe_state = fe_state; + } + if (node == NULL || strcmp(node, "protocol") == 0) { + g_free(xendev->protocol); + xendev->protocol = xenstore_read_fe_str(xendev, "protocol"); + if (xendev->protocol) { + xen_pv_printf(xendev, 1, "frontend protocol: %s\n", + xendev->protocol); + } + } + + if (node) { + xen_pv_printf(xendev, 2, "frontend update: %s\n", node); + if (xendev->ops->frontend_changed) { + xendev->ops->frontend_changed(xendev, node); + } + } +} + +/* ------------------------------------------------------------- */ +/* Check for possible state transitions and perform them. */ + +/* + * Initial xendev setup. Read frontend path, register watch for it. + * Should succeed once xend finished setting up the backend device. + * + * Also sets initial state (-> Initializing) when done. Which + * only affects the xendev->be_state variable as xenbus should + * already be put into that state by xend. + */ +static int xen_be_try_setup(struct XenLegacyDevice *xendev) +{ + char token[XEN_BUFSIZE]; + int be_state; + + if (xenstore_read_be_int(xendev, "state", &be_state) == -1) { + xen_pv_printf(xendev, 0, "reading backend state failed\n"); + return -1; + } + + if (be_state != XenbusStateInitialising) { + xen_pv_printf(xendev, 0, "initial backend state is wrong (%s)\n", + xenbus_strstate(be_state)); + return -1; + } + + xendev->fe = xenstore_read_be_str(xendev, "frontend"); + if (xendev->fe == NULL) { + xen_pv_printf(xendev, 0, "reading frontend path failed\n"); + return -1; + } + + /* setup frontend watch */ + snprintf(token, sizeof(token), "fe:%p", xendev); + if (!xs_watch(xenstore, xendev->fe, token)) { + xen_pv_printf(xendev, 0, "watching frontend path (%s) failed\n", + xendev->fe); + return -1; + } + xen_be_set_state(xendev, XenbusStateInitialising); + + xen_be_backend_changed(xendev, NULL); + xen_be_frontend_changed(xendev, NULL); + return 0; +} + +/* + * Try initialize xendev. Prepare everything the backend can do + * without synchronizing with the frontend. Fakes hotplug-status. No + * hotplug involved here because this is about userspace drivers, thus + * there are kernel backend devices which could invoke hotplug. + * + * Goes to InitWait on success. + */ +static int xen_be_try_init(struct XenLegacyDevice *xendev) +{ + int rc = 0; + + if (!xendev->online) { + xen_pv_printf(xendev, 1, "not online\n"); + return -1; + } + + if (xendev->ops->init) { + rc = xendev->ops->init(xendev); + } + if (rc != 0) { + xen_pv_printf(xendev, 1, "init() failed\n"); + return rc; + } + + xenstore_write_be_str(xendev, "hotplug-status", "connected"); + xen_be_set_state(xendev, XenbusStateInitWait); + return 0; +} + +/* + * Try to initialise xendev. Depends on the frontend being ready + * for it (shared ring and evtchn info in xenstore, state being + * Initialised or Connected). + * + * Goes to Connected on success. + */ +static int xen_be_try_initialise(struct XenLegacyDevice *xendev) +{ + int rc = 0; + + if (xendev->fe_state != XenbusStateInitialised && + xendev->fe_state != XenbusStateConnected) { + if (xendev->ops->flags & DEVOPS_FLAG_IGNORE_STATE) { + xen_pv_printf(xendev, 2, "frontend not ready, ignoring\n"); + } else { + xen_pv_printf(xendev, 2, "frontend not ready (yet)\n"); + return -1; + } + } + + if (xendev->ops->flags & DEVOPS_FLAG_NEED_GNTDEV) { + xendev->gnttabdev = xengnttab_open(NULL, 0); + if (xendev->gnttabdev == NULL) { + xen_pv_printf(NULL, 0, "can't open gnttab device\n"); + return -1; + } + } else { + xendev->gnttabdev = NULL; + } + + if (xendev->ops->initialise) { + rc = xendev->ops->initialise(xendev); + } + if (rc != 0) { + xen_pv_printf(xendev, 0, "initialise() failed\n"); + return rc; + } + + xen_be_set_state(xendev, XenbusStateConnected); + return 0; +} + +/* + * Try to let xendev know that it is connected. Depends on the + * frontend being Connected. Note that this may be called more + * than once since the backend state is not modified. + */ +static void xen_be_try_connected(struct XenLegacyDevice *xendev) +{ + if (!xendev->ops->connected) { + return; + } + + if (xendev->fe_state != XenbusStateConnected) { + if (xendev->ops->flags & DEVOPS_FLAG_IGNORE_STATE) { + xen_pv_printf(xendev, 2, "frontend not ready, ignoring\n"); + } else { + xen_pv_printf(xendev, 2, "frontend not ready (yet)\n"); + return; + } + } + + xendev->ops->connected(xendev); +} + +/* + * Teardown connection. + * + * Goes to Closed when done. + */ +static void xen_be_disconnect(struct XenLegacyDevice *xendev, + enum xenbus_state state) +{ + if (xendev->be_state != XenbusStateClosing && + xendev->be_state != XenbusStateClosed && + xendev->ops->disconnect) { + xendev->ops->disconnect(xendev); + } + if (xendev->gnttabdev) { + xengnttab_close(xendev->gnttabdev); + xendev->gnttabdev = NULL; + } + if (xendev->be_state != state) { + xen_be_set_state(xendev, state); + } +} + +/* + * Try to reset xendev, for reconnection by another frontend instance. + */ +static int xen_be_try_reset(struct XenLegacyDevice *xendev) +{ + if (xendev->fe_state != XenbusStateInitialising) { + return -1; + } + + xen_pv_printf(xendev, 1, "device reset (for re-connect)\n"); + xen_be_set_state(xendev, XenbusStateInitialising); + return 0; +} + +/* + * state change dispatcher function + */ +void xen_be_check_state(struct XenLegacyDevice *xendev) +{ + int rc = 0; + + /* frontend may request shutdown from almost anywhere */ + if (xendev->fe_state == XenbusStateClosing || + xendev->fe_state == XenbusStateClosed) { + xen_be_disconnect(xendev, xendev->fe_state); + return; + } + + /* check for possible backend state transitions */ + for (;;) { + switch (xendev->be_state) { + case XenbusStateUnknown: + rc = xen_be_try_setup(xendev); + break; + case XenbusStateInitialising: + rc = xen_be_try_init(xendev); + break; + case XenbusStateInitWait: + rc = xen_be_try_initialise(xendev); + break; + case XenbusStateConnected: + /* xendev->be_state doesn't change */ + xen_be_try_connected(xendev); + rc = -1; + break; + case XenbusStateClosed: + rc = xen_be_try_reset(xendev); + break; + default: + rc = -1; + } + if (rc != 0) { + break; + } + } +} + +/* ------------------------------------------------------------- */ + +static int xenstore_scan(const char *type, int dom, struct XenDevOps *ops) +{ + struct XenLegacyDevice *xendev; + char path[XEN_BUFSIZE], token[XEN_BUFSIZE]; + char **dev = NULL; + unsigned int cdev, j; + + /* setup watch */ + snprintf(token, sizeof(token), "be:%p:%d:%p", type, dom, ops); + snprintf(path, sizeof(path), "backend/%s/%d", type, dom); + if (!xs_watch(xenstore, path, token)) { + xen_pv_printf(NULL, 0, "xen be: watching backend path (%s) failed\n", + path); + return -1; + } + + /* look for backends */ + dev = xs_directory(xenstore, 0, path, &cdev); + if (!dev) { + return 0; + } + for (j = 0; j < cdev; j++) { + xendev = xen_be_get_xendev(type, dom, atoi(dev[j]), ops); + if (xendev == NULL) { + continue; + } + xen_be_check_state(xendev); + } + free(dev); + return 0; +} + +void xenstore_update_be(char *watch, char *type, int dom, + struct XenDevOps *ops) +{ + struct XenLegacyDevice *xendev; + char path[XEN_BUFSIZE], *bepath; + unsigned int len, dev; + + len = snprintf(path, sizeof(path), "backend/%s/%d", type, dom); + if (strncmp(path, watch, len) != 0) { + return; + } + if (sscanf(watch + len, "/%u/%255s", &dev, path) != 2) { + strcpy(path, ""); + if (sscanf(watch + len, "/%u", &dev) != 1) { + dev = -1; + } + } + if (dev == -1) { + return; + } + + xendev = xen_be_get_xendev(type, dom, dev, ops); + if (xendev != NULL) { + bepath = xs_read(xenstore, 0, xendev->be, &len); + if (bepath == NULL) { + xen_pv_del_xendev(xendev); + } else { + free(bepath); + xen_be_backend_changed(xendev, path); + xen_be_check_state(xendev); + } + } +} + +void xenstore_update_fe(char *watch, struct XenLegacyDevice *xendev) +{ + char *node; + unsigned int len; + + len = strlen(xendev->fe); + if (strncmp(xendev->fe, watch, len) != 0) { + return; + } + if (watch[len] != '/') { + return; + } + node = watch + len + 1; + + xen_be_frontend_changed(xendev, node); + xen_be_check_state(xendev); +} +/* -------------------------------------------------------------------- */ + +int xen_be_init(void) +{ + xengnttab_handle *gnttabdev; + + xenstore = xs_daemon_open(); + if (!xenstore) { + xen_pv_printf(NULL, 0, "can't connect to xenstored\n"); + return -1; + } + + qemu_set_fd_handler(xs_fileno(xenstore), xenstore_update, NULL, NULL); + + if (xen_xc == NULL || xen_fmem == NULL) { + /* Check if xen_init() have been called */ + goto err; + } + + gnttabdev = xengnttab_open(NULL, 0); + if (gnttabdev != NULL) { + if (xengnttab_grant_copy(gnttabdev, 0, NULL) == 0) { + xen_feature_grant_copy = true; + } + xengnttab_close(gnttabdev); + } + + xen_sysdev = qdev_new(TYPE_XENSYSDEV); + sysbus_realize_and_unref(SYS_BUS_DEVICE(xen_sysdev), &error_fatal); + xen_sysbus = qbus_new(TYPE_XENSYSBUS, xen_sysdev, "xen-sysbus"); + qbus_set_bus_hotplug_handler(xen_sysbus); + + return 0; + +err: + qemu_set_fd_handler(xs_fileno(xenstore), NULL, NULL, NULL); + xs_daemon_close(xenstore); + xenstore = NULL; + + return -1; +} + +static void xen_set_dynamic_sysbus(void) +{ + Object *machine = qdev_get_machine(); + ObjectClass *oc = object_get_class(machine); + MachineClass *mc = MACHINE_CLASS(oc); + + machine_class_allow_dynamic_sysbus_dev(mc, TYPE_XENSYSDEV); +} + +int xen_be_register(const char *type, struct XenDevOps *ops) +{ + char path[50]; + int rc; + + if (ops->backend_register) { + rc = ops->backend_register(); + if (rc) { + return rc; + } + } + + snprintf(path, sizeof(path), "device-model/%u/backends/%s", xen_domid, + type); + xenstore_mkdir(path, XS_PERM_NONE); + + return xenstore_scan(type, xen_domid, ops); +} + +void xen_be_register_common(void) +{ + xen_set_dynamic_sysbus(); + + xen_be_register("console", &xen_console_ops); + xen_be_register("vkbd", &xen_kbdmouse_ops); +#ifdef CONFIG_VIRTFS + xen_be_register("9pfs", &xen_9pfs_ops); +#endif +#ifdef CONFIG_USB_LIBUSB + xen_be_register("qusb", &xen_usb_ops); +#endif +} + +int xen_be_bind_evtchn(struct XenLegacyDevice *xendev) +{ + if (xendev->local_port != -1) { + return 0; + } + xendev->local_port = xenevtchn_bind_interdomain + (xendev->evtchndev, xendev->dom, xendev->remote_port); + if (xendev->local_port == -1) { + xen_pv_printf(xendev, 0, "xenevtchn_bind_interdomain failed\n"); + return -1; + } + xen_pv_printf(xendev, 2, "bind evtchn port %d\n", xendev->local_port); + qemu_set_fd_handler(xenevtchn_fd(xendev->evtchndev), + xen_pv_evtchn_event, NULL, xendev); + return 0; +} + + +static Property xendev_properties[] = { + DEFINE_PROP_END_OF_LIST(), +}; + +static void xendev_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + device_class_set_props(dc, xendev_properties); + set_bit(DEVICE_CATEGORY_MISC, dc->categories); + /* xen-backend devices can be plugged/unplugged dynamically */ + dc->user_creatable = true; + dc->bus_type = TYPE_XENSYSBUS; +} + +static const TypeInfo xendev_type_info = { + .name = TYPE_XENBACKEND, + .parent = TYPE_DEVICE, + .class_init = xendev_class_init, + .instance_size = sizeof(struct XenLegacyDevice), +}; + +static void xen_sysbus_class_init(ObjectClass *klass, void *data) +{ + HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(klass); + + hc->unplug = qdev_simple_device_unplug_cb; +} + +static const TypeInfo xensysbus_info = { + .name = TYPE_XENSYSBUS, + .parent = TYPE_BUS, + .class_init = xen_sysbus_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_HOTPLUG_HANDLER }, + { } + } +}; + +static Property xen_sysdev_properties[] = { + {/* end of property list */}, +}; + +static void xen_sysdev_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + device_class_set_props(dc, xen_sysdev_properties); +} + +static const TypeInfo xensysdev_info = { + .name = TYPE_XENSYSDEV, + .parent = TYPE_SYS_BUS_DEVICE, + .instance_size = sizeof(SysBusDevice), + .class_init = xen_sysdev_class_init, +}; + +static void xenbe_register_types(void) +{ + type_register_static(&xensysbus_info); + type_register_static(&xensysdev_info); + type_register_static(&xendev_type_info); +} + +type_init(xenbe_register_types) diff --git a/hw/xen/xen_devconfig.c b/hw/xen/xen_devconfig.c new file mode 100644 index 000000000..46ee4a7f0 --- /dev/null +++ b/hw/xen/xen_devconfig.c @@ -0,0 +1,129 @@ +#include "qemu/osdep.h" +#include "hw/xen/xen-legacy-backend.h" +#include "qemu/option.h" +#include "sysemu/blockdev.h" +#include "sysemu/sysemu.h" + +/* ------------------------------------------------------------- */ + +static int xen_config_dev_dirs(const char *ftype, const char *btype, int vdev, + char *fe, char *be, int len) +{ + char *dom; + + dom = xs_get_domain_path(xenstore, xen_domid); + snprintf(fe, len, "%s/device/%s/%d", dom, ftype, vdev); + free(dom); + + dom = xs_get_domain_path(xenstore, 0); + snprintf(be, len, "%s/backend/%s/%d/%d", dom, btype, xen_domid, vdev); + free(dom); + + xenstore_mkdir(fe, XS_PERM_READ | XS_PERM_WRITE); + xenstore_mkdir(be, XS_PERM_READ); + return 0; +} + +static int xen_config_dev_all(char *fe, char *be) +{ + /* frontend */ + if (xen_protocol) + xenstore_write_str(fe, "protocol", xen_protocol); + + xenstore_write_int(fe, "state", XenbusStateInitialising); + xenstore_write_int(fe, "backend-id", 0); + xenstore_write_str(fe, "backend", be); + + /* backend */ + xenstore_write_str(be, "domain", qemu_name ? qemu_name : "no-name"); + xenstore_write_int(be, "online", 1); + xenstore_write_int(be, "state", XenbusStateInitialising); + xenstore_write_int(be, "frontend-id", xen_domid); + xenstore_write_str(be, "frontend", fe); + + return 0; +} + +/* ------------------------------------------------------------- */ + +int xen_config_dev_blk(DriveInfo *disk) +{ + char fe[256], be[256], device_name[32]; + int vdev = 202 * 256 + 16 * disk->unit; + int cdrom = disk->media_cd; + const char *devtype = cdrom ? "cdrom" : "disk"; + const char *mode = cdrom ? "r" : "w"; + const char *filename = qemu_opt_get(disk->opts, "file"); + + snprintf(device_name, sizeof(device_name), "xvd%c", 'a' + disk->unit); + xen_pv_printf(NULL, 1, "config disk %d [%s]: %s\n", + disk->unit, device_name, filename); + xen_config_dev_dirs("vbd", "qdisk", vdev, fe, be, sizeof(fe)); + + /* frontend */ + xenstore_write_int(fe, "virtual-device", vdev); + xenstore_write_str(fe, "device-type", devtype); + + /* backend */ + xenstore_write_str(be, "dev", device_name); + xenstore_write_str(be, "type", "file"); + xenstore_write_str(be, "params", filename); + xenstore_write_str(be, "mode", mode); + + /* common stuff */ + return xen_config_dev_all(fe, be); +} + +int xen_config_dev_nic(NICInfo *nic) +{ + char fe[256], be[256]; + char mac[20]; + int vlan_id = -1; + + net_hub_id_for_client(nic->netdev, &vlan_id); + snprintf(mac, sizeof(mac), "%02x:%02x:%02x:%02x:%02x:%02x", + nic->macaddr.a[0], nic->macaddr.a[1], nic->macaddr.a[2], + nic->macaddr.a[3], nic->macaddr.a[4], nic->macaddr.a[5]); + xen_pv_printf(NULL, 1, "config nic %d: mac=\"%s\"\n", vlan_id, mac); + xen_config_dev_dirs("vif", "qnic", vlan_id, fe, be, sizeof(fe)); + + /* frontend */ + xenstore_write_int(fe, "handle", vlan_id); + xenstore_write_str(fe, "mac", mac); + + /* backend */ + xenstore_write_int(be, "handle", vlan_id); + xenstore_write_str(be, "mac", mac); + + /* common stuff */ + return xen_config_dev_all(fe, be); +} + +int xen_config_dev_vfb(int vdev, const char *type) +{ + char fe[256], be[256]; + + xen_config_dev_dirs("vfb", "vfb", vdev, fe, be, sizeof(fe)); + + /* backend */ + xenstore_write_str(be, "type", type); + + /* common stuff */ + return xen_config_dev_all(fe, be); +} + +int xen_config_dev_vkbd(int vdev) +{ + char fe[256], be[256]; + + xen_config_dev_dirs("vkbd", "vkbd", vdev, fe, be, sizeof(fe)); + return xen_config_dev_all(fe, be); +} + +int xen_config_dev_console(int vdev) +{ + char fe[256], be[256]; + + xen_config_dev_dirs("console", "console", vdev, fe, be, sizeof(fe)); + return xen_config_dev_all(fe, be); +} diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c new file mode 100644 index 000000000..027190fa4 --- /dev/null +++ b/hw/xen/xen_pt.c @@ -0,0 +1,1005 @@ +/* + * Copyright (c) 2007, Neocleus Corporation. + * Copyright (c) 2007, Intel Corporation. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Alex Novik <alex@neocleus.com> + * Allen Kay <allen.m.kay@intel.com> + * Guy Zana <guy@neocleus.com> + * + * This file implements direct PCI assignment to a HVM guest + */ + +/* + * Interrupt Disable policy: + * + * INTx interrupt: + * Initialize(register_real_device) + * Map INTx(xc_physdev_map_pirq): + * <fail> + * - Set real Interrupt Disable bit to '1'. + * - Set machine_irq and assigned_device->machine_irq to '0'. + * * Don't bind INTx. + * + * Bind INTx(xc_domain_bind_pt_pci_irq): + * <fail> + * - Set real Interrupt Disable bit to '1'. + * - Unmap INTx. + * - Decrement xen_pt_mapped_machine_irq[machine_irq] + * - Set assigned_device->machine_irq to '0'. + * + * Write to Interrupt Disable bit by guest software(xen_pt_cmd_reg_write) + * Write '0' + * - Set real bit to '0' if assigned_device->machine_irq isn't '0'. + * + * Write '1' + * - Set real bit to '1'. + * + * MSI interrupt: + * Initialize MSI register(xen_pt_msi_setup, xen_pt_msi_update) + * Bind MSI(xc_domain_update_msi_irq) + * <fail> + * - Unmap MSI. + * - Set dev->msi->pirq to '-1'. + * + * MSI-X interrupt: + * Initialize MSI-X register(xen_pt_msix_update_one) + * Bind MSI-X(xc_domain_update_msi_irq) + * <fail> + * - Unmap MSI-X. + * - Set entry->pirq to '-1'. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include <sys/ioctl.h> + +#include "hw/pci/pci.h" +#include "hw/qdev-properties.h" +#include "hw/qdev-properties-system.h" +#include "hw/xen/xen.h" +#include "hw/i386/pc.h" +#include "hw/xen/xen-legacy-backend.h" +#include "xen_pt.h" +#include "qemu/range.h" + +static bool has_igd_gfx_passthru; + +bool xen_igd_gfx_pt_enabled(void) +{ + return has_igd_gfx_passthru; +} + +void xen_igd_gfx_pt_set(bool value, Error **errp) +{ + has_igd_gfx_passthru = value; +} + +#define XEN_PT_NR_IRQS (256) +static uint8_t xen_pt_mapped_machine_irq[XEN_PT_NR_IRQS] = {0}; + +void xen_pt_log(const PCIDevice *d, const char *f, ...) +{ + va_list ap; + + va_start(ap, f); + if (d) { + fprintf(stderr, "[%02x:%02x.%d] ", pci_dev_bus_num(d), + PCI_SLOT(d->devfn), PCI_FUNC(d->devfn)); + } + vfprintf(stderr, f, ap); + va_end(ap); +} + +/* Config Space */ + +static int xen_pt_pci_config_access_check(PCIDevice *d, uint32_t addr, int len) +{ + /* check offset range */ + if (addr > 0xFF) { + XEN_PT_ERR(d, "Failed to access register with offset exceeding 0xFF. " + "(addr: 0x%02x, len: %d)\n", addr, len); + return -1; + } + + /* check read size */ + if ((len != 1) && (len != 2) && (len != 4)) { + XEN_PT_ERR(d, "Failed to access register with invalid access length. " + "(addr: 0x%02x, len: %d)\n", addr, len); + return -1; + } + + /* check offset alignment */ + if (addr & (len - 1)) { + XEN_PT_ERR(d, "Failed to access register with invalid access size " + "alignment. (addr: 0x%02x, len: %d)\n", addr, len); + return -1; + } + + return 0; +} + +int xen_pt_bar_offset_to_index(uint32_t offset) +{ + int index = 0; + + /* check Exp ROM BAR */ + if (offset == PCI_ROM_ADDRESS) { + return PCI_ROM_SLOT; + } + + /* calculate BAR index */ + index = (offset - PCI_BASE_ADDRESS_0) >> 2; + if (index >= PCI_NUM_REGIONS) { + return -1; + } + + return index; +} + +static uint32_t xen_pt_pci_read_config(PCIDevice *d, uint32_t addr, int len) +{ + XenPCIPassthroughState *s = XEN_PT_DEVICE(d); + uint32_t val = 0; + XenPTRegGroup *reg_grp_entry = NULL; + XenPTReg *reg_entry = NULL; + int rc = 0; + int emul_len = 0; + uint32_t find_addr = addr; + + if (xen_pt_pci_config_access_check(d, addr, len)) { + goto exit; + } + + /* find register group entry */ + reg_grp_entry = xen_pt_find_reg_grp(s, addr); + if (reg_grp_entry) { + /* check 0-Hardwired register group */ + if (reg_grp_entry->reg_grp->grp_type == XEN_PT_GRP_TYPE_HARDWIRED) { + /* no need to emulate, just return 0 */ + val = 0; + goto exit; + } + } + + /* read I/O device register value */ + rc = xen_host_pci_get_block(&s->real_device, addr, (uint8_t *)&val, len); + if (rc < 0) { + XEN_PT_ERR(d, "pci_read_block failed. return value: %d.\n", rc); + memset(&val, 0xff, len); + } + + /* just return the I/O device register value for + * passthrough type register group */ + if (reg_grp_entry == NULL) { + goto exit; + } + + /* adjust the read value to appropriate CFC-CFF window */ + val <<= (addr & 3) << 3; + emul_len = len; + + /* loop around the guest requested size */ + while (emul_len > 0) { + /* find register entry to be emulated */ + reg_entry = xen_pt_find_reg(reg_grp_entry, find_addr); + if (reg_entry) { + XenPTRegInfo *reg = reg_entry->reg; + uint32_t real_offset = reg_grp_entry->base_offset + reg->offset; + uint32_t valid_mask = 0xFFFFFFFF >> ((4 - emul_len) << 3); + uint8_t *ptr_val = NULL; + + valid_mask <<= (find_addr - real_offset) << 3; + ptr_val = (uint8_t *)&val + (real_offset & 3); + + /* do emulation based on register size */ + switch (reg->size) { + case 1: + if (reg->u.b.read) { + rc = reg->u.b.read(s, reg_entry, ptr_val, valid_mask); + } + break; + case 2: + if (reg->u.w.read) { + rc = reg->u.w.read(s, reg_entry, + (uint16_t *)ptr_val, valid_mask); + } + break; + case 4: + if (reg->u.dw.read) { + rc = reg->u.dw.read(s, reg_entry, + (uint32_t *)ptr_val, valid_mask); + } + break; + } + + if (rc < 0) { + xen_shutdown_fatal_error("Internal error: Invalid read " + "emulation. (%s, rc: %d)\n", + __func__, rc); + return 0; + } + + /* calculate next address to find */ + emul_len -= reg->size; + if (emul_len > 0) { + find_addr = real_offset + reg->size; + } + } else { + /* nothing to do with passthrough type register, + * continue to find next byte */ + emul_len--; + find_addr++; + } + } + + /* need to shift back before returning them to pci bus emulator */ + val >>= ((addr & 3) << 3); + +exit: + XEN_PT_LOG_CONFIG(d, addr, val, len); + return val; +} + +static void xen_pt_pci_write_config(PCIDevice *d, uint32_t addr, + uint32_t val, int len) +{ + XenPCIPassthroughState *s = XEN_PT_DEVICE(d); + int index = 0; + XenPTRegGroup *reg_grp_entry = NULL; + int rc = 0; + uint32_t read_val = 0, wb_mask; + int emul_len = 0; + XenPTReg *reg_entry = NULL; + uint32_t find_addr = addr; + XenPTRegInfo *reg = NULL; + bool wp_flag = false; + + if (xen_pt_pci_config_access_check(d, addr, len)) { + return; + } + + XEN_PT_LOG_CONFIG(d, addr, val, len); + + /* check unused BAR register */ + index = xen_pt_bar_offset_to_index(addr); + if ((index >= 0) && (val != 0)) { + uint32_t chk = val; + + if (index == PCI_ROM_SLOT) + chk |= (uint32_t)~PCI_ROM_ADDRESS_MASK; + + if ((chk != XEN_PT_BAR_ALLF) && + (s->bases[index].bar_flag == XEN_PT_BAR_FLAG_UNUSED)) { + XEN_PT_WARN(d, "Guest attempt to set address to unused " + "Base Address Register. (addr: 0x%02x, len: %d)\n", + addr, len); + } + } + + /* find register group entry */ + reg_grp_entry = xen_pt_find_reg_grp(s, addr); + if (reg_grp_entry) { + /* check 0-Hardwired register group */ + if (reg_grp_entry->reg_grp->grp_type == XEN_PT_GRP_TYPE_HARDWIRED) { + /* ignore silently */ + XEN_PT_WARN(d, "Access to 0-Hardwired register. " + "(addr: 0x%02x, len: %d)\n", addr, len); + return; + } + } + + rc = xen_host_pci_get_block(&s->real_device, addr, + (uint8_t *)&read_val, len); + if (rc < 0) { + XEN_PT_ERR(d, "pci_read_block failed. return value: %d.\n", rc); + memset(&read_val, 0xff, len); + wb_mask = 0; + } else { + wb_mask = 0xFFFFFFFF >> ((4 - len) << 3); + } + + /* pass directly to the real device for passthrough type register group */ + if (reg_grp_entry == NULL) { + if (!s->permissive) { + wb_mask = 0; + wp_flag = true; + } + goto out; + } + + memory_region_transaction_begin(); + pci_default_write_config(d, addr, val, len); + + /* adjust the read and write value to appropriate CFC-CFF window */ + read_val <<= (addr & 3) << 3; + val <<= (addr & 3) << 3; + emul_len = len; + + /* loop around the guest requested size */ + while (emul_len > 0) { + /* find register entry to be emulated */ + reg_entry = xen_pt_find_reg(reg_grp_entry, find_addr); + if (reg_entry) { + reg = reg_entry->reg; + uint32_t real_offset = reg_grp_entry->base_offset + reg->offset; + uint32_t valid_mask = 0xFFFFFFFF >> ((4 - emul_len) << 3); + uint8_t *ptr_val = NULL; + uint32_t wp_mask = reg->emu_mask | reg->ro_mask; + + valid_mask <<= (find_addr - real_offset) << 3; + ptr_val = (uint8_t *)&val + (real_offset & 3); + if (!s->permissive) { + wp_mask |= reg->res_mask; + } + if (wp_mask == (0xFFFFFFFF >> ((4 - reg->size) << 3))) { + wb_mask &= ~((wp_mask >> ((find_addr - real_offset) << 3)) + << ((len - emul_len) << 3)); + } + + /* do emulation based on register size */ + switch (reg->size) { + case 1: + if (reg->u.b.write) { + rc = reg->u.b.write(s, reg_entry, ptr_val, + read_val >> ((real_offset & 3) << 3), + valid_mask); + } + break; + case 2: + if (reg->u.w.write) { + rc = reg->u.w.write(s, reg_entry, (uint16_t *)ptr_val, + (read_val >> ((real_offset & 3) << 3)), + valid_mask); + } + break; + case 4: + if (reg->u.dw.write) { + rc = reg->u.dw.write(s, reg_entry, (uint32_t *)ptr_val, + (read_val >> ((real_offset & 3) << 3)), + valid_mask); + } + break; + } + + if (rc < 0) { + xen_shutdown_fatal_error("Internal error: Invalid write" + " emulation. (%s, rc: %d)\n", + __func__, rc); + return; + } + + /* calculate next address to find */ + emul_len -= reg->size; + if (emul_len > 0) { + find_addr = real_offset + reg->size; + } + } else { + /* nothing to do with passthrough type register, + * continue to find next byte */ + if (!s->permissive) { + wb_mask &= ~(0xff << ((len - emul_len) << 3)); + /* Unused BARs will make it here, but we don't want to issue + * warnings for writes to them (bogus writes get dealt with + * above). + */ + if (index < 0) { + wp_flag = true; + } + } + emul_len--; + find_addr++; + } + } + + /* need to shift back before passing them to xen_host_pci_set_block. */ + val >>= (addr & 3) << 3; + + memory_region_transaction_commit(); + +out: + if (wp_flag && !s->permissive_warned) { + s->permissive_warned = true; + xen_pt_log(d, "Write-back to unknown field 0x%02x (partially) inhibited (0x%0*x)\n", + addr, len * 2, wb_mask); + xen_pt_log(d, "If the device doesn't work, try enabling permissive mode\n"); + xen_pt_log(d, "(unsafe) and if it helps report the problem to xen-devel\n"); + } + for (index = 0; wb_mask; index += len) { + /* unknown regs are passed through */ + while (!(wb_mask & 0xff)) { + index++; + wb_mask >>= 8; + } + len = 0; + do { + len++; + wb_mask >>= 8; + } while (wb_mask & 0xff); + rc = xen_host_pci_set_block(&s->real_device, addr + index, + (uint8_t *)&val + index, len); + + if (rc < 0) { + XEN_PT_ERR(d, "xen_host_pci_set_block failed. return value: %d.\n", rc); + } + } +} + +/* register regions */ + +static uint64_t xen_pt_bar_read(void *o, hwaddr addr, + unsigned size) +{ + PCIDevice *d = o; + /* if this function is called, that probably means that there is a + * misconfiguration of the IOMMU. */ + XEN_PT_ERR(d, "Should not read BAR through QEMU. @0x"TARGET_FMT_plx"\n", + addr); + return 0; +} +static void xen_pt_bar_write(void *o, hwaddr addr, uint64_t val, + unsigned size) +{ + PCIDevice *d = o; + /* Same comment as xen_pt_bar_read function */ + XEN_PT_ERR(d, "Should not write BAR through QEMU. @0x"TARGET_FMT_plx"\n", + addr); +} + +static const MemoryRegionOps ops = { + .endianness = DEVICE_NATIVE_ENDIAN, + .read = xen_pt_bar_read, + .write = xen_pt_bar_write, +}; + +static int xen_pt_register_regions(XenPCIPassthroughState *s, uint16_t *cmd) +{ + int i = 0; + XenHostPCIDevice *d = &s->real_device; + + /* Register PIO/MMIO BARs */ + for (i = 0; i < PCI_ROM_SLOT; i++) { + XenHostPCIIORegion *r = &d->io_regions[i]; + uint8_t type; + + if (r->base_addr == 0 || r->size == 0) { + continue; + } + + s->bases[i].access.u = r->base_addr; + + if (r->type & XEN_HOST_PCI_REGION_TYPE_IO) { + type = PCI_BASE_ADDRESS_SPACE_IO; + *cmd |= PCI_COMMAND_IO; + } else { + type = PCI_BASE_ADDRESS_SPACE_MEMORY; + if (r->type & XEN_HOST_PCI_REGION_TYPE_PREFETCH) { + type |= PCI_BASE_ADDRESS_MEM_PREFETCH; + } + if (r->type & XEN_HOST_PCI_REGION_TYPE_MEM_64) { + type |= PCI_BASE_ADDRESS_MEM_TYPE_64; + } + *cmd |= PCI_COMMAND_MEMORY; + } + + memory_region_init_io(&s->bar[i], OBJECT(s), &ops, &s->dev, + "xen-pci-pt-bar", r->size); + pci_register_bar(&s->dev, i, type, &s->bar[i]); + + XEN_PT_LOG(&s->dev, "IO region %i registered (size=0x%08"PRIx64 + " base_addr=0x%08"PRIx64" type: 0x%x)\n", + i, r->size, r->base_addr, type); + } + + /* Register expansion ROM address */ + if (d->rom.base_addr && d->rom.size) { + uint32_t bar_data = 0; + + /* Re-set BAR reported by OS, otherwise ROM can't be read. */ + if (xen_host_pci_get_long(d, PCI_ROM_ADDRESS, &bar_data)) { + return 0; + } + if ((bar_data & PCI_ROM_ADDRESS_MASK) == 0) { + bar_data |= d->rom.base_addr & PCI_ROM_ADDRESS_MASK; + xen_host_pci_set_long(d, PCI_ROM_ADDRESS, bar_data); + } + + s->bases[PCI_ROM_SLOT].access.maddr = d->rom.base_addr; + + memory_region_init_io(&s->rom, OBJECT(s), &ops, &s->dev, + "xen-pci-pt-rom", d->rom.size); + pci_register_bar(&s->dev, PCI_ROM_SLOT, PCI_BASE_ADDRESS_MEM_PREFETCH, + &s->rom); + + XEN_PT_LOG(&s->dev, "Expansion ROM registered (size=0x%08"PRIx64 + " base_addr=0x%08"PRIx64")\n", + d->rom.size, d->rom.base_addr); + } + + xen_pt_register_vga_regions(d); + return 0; +} + +/* region mapping */ + +static int xen_pt_bar_from_region(XenPCIPassthroughState *s, MemoryRegion *mr) +{ + int i = 0; + + for (i = 0; i < PCI_NUM_REGIONS - 1; i++) { + if (mr == &s->bar[i]) { + return i; + } + } + if (mr == &s->rom) { + return PCI_ROM_SLOT; + } + return -1; +} + +/* + * This function checks if an io_region overlaps an io_region from another + * device. The io_region to check is provided with (addr, size and type) + * A callback can be provided and will be called for every region that is + * overlapped. + * The return value indicates if the region is overlappsed */ +struct CheckBarArgs { + XenPCIPassthroughState *s; + pcibus_t addr; + pcibus_t size; + uint8_t type; + bool rc; +}; +static void xen_pt_check_bar_overlap(PCIBus *bus, PCIDevice *d, void *opaque) +{ + struct CheckBarArgs *arg = opaque; + XenPCIPassthroughState *s = arg->s; + uint8_t type = arg->type; + int i; + + if (d->devfn == s->dev.devfn) { + return; + } + + /* xxx: This ignores bridges. */ + for (i = 0; i < PCI_NUM_REGIONS; i++) { + const PCIIORegion *r = &d->io_regions[i]; + + if (!r->size) { + continue; + } + if ((type & PCI_BASE_ADDRESS_SPACE_IO) + != (r->type & PCI_BASE_ADDRESS_SPACE_IO)) { + continue; + } + + if (ranges_overlap(arg->addr, arg->size, r->addr, r->size)) { + XEN_PT_WARN(&s->dev, + "Overlapped to device [%02x:%02x.%d] Region: %i" + " (addr: 0x%"FMT_PCIBUS", len: 0x%"FMT_PCIBUS")\n", + pci_bus_num(bus), PCI_SLOT(d->devfn), + PCI_FUNC(d->devfn), i, r->addr, r->size); + arg->rc = true; + } + } +} + +static void xen_pt_region_update(XenPCIPassthroughState *s, + MemoryRegionSection *sec, bool adding) +{ + PCIDevice *d = &s->dev; + MemoryRegion *mr = sec->mr; + int bar = -1; + int rc; + int op = adding ? DPCI_ADD_MAPPING : DPCI_REMOVE_MAPPING; + struct CheckBarArgs args = { + .s = s, + .addr = sec->offset_within_address_space, + .size = int128_get64(sec->size), + .rc = false, + }; + + bar = xen_pt_bar_from_region(s, mr); + if (bar == -1 && (!s->msix || &s->msix->mmio != mr)) { + return; + } + + if (s->msix && &s->msix->mmio == mr) { + if (adding) { + s->msix->mmio_base_addr = sec->offset_within_address_space; + rc = xen_pt_msix_update_remap(s, s->msix->bar_index); + } + return; + } + + args.type = d->io_regions[bar].type; + pci_for_each_device_under_bus(pci_get_bus(d), + xen_pt_check_bar_overlap, &args); + if (args.rc) { + XEN_PT_WARN(d, "Region: %d (addr: 0x%"FMT_PCIBUS + ", len: 0x%"FMT_PCIBUS") is overlapped.\n", + bar, sec->offset_within_address_space, + int128_get64(sec->size)); + } + + if (d->io_regions[bar].type & PCI_BASE_ADDRESS_SPACE_IO) { + uint32_t guest_port = sec->offset_within_address_space; + uint32_t machine_port = s->bases[bar].access.pio_base; + uint32_t size = int128_get64(sec->size); + rc = xc_domain_ioport_mapping(xen_xc, xen_domid, + guest_port, machine_port, size, + op); + if (rc) { + XEN_PT_ERR(d, "%s ioport mapping failed! (err: %i)\n", + adding ? "create new" : "remove old", errno); + } + } else { + pcibus_t guest_addr = sec->offset_within_address_space; + pcibus_t machine_addr = s->bases[bar].access.maddr + + sec->offset_within_region; + pcibus_t size = int128_get64(sec->size); + rc = xc_domain_memory_mapping(xen_xc, xen_domid, + XEN_PFN(guest_addr + XC_PAGE_SIZE - 1), + XEN_PFN(machine_addr + XC_PAGE_SIZE - 1), + XEN_PFN(size + XC_PAGE_SIZE - 1), + op); + if (rc) { + XEN_PT_ERR(d, "%s mem mapping failed! (err: %i)\n", + adding ? "create new" : "remove old", errno); + } + } +} + +static void xen_pt_region_add(MemoryListener *l, MemoryRegionSection *sec) +{ + XenPCIPassthroughState *s = container_of(l, XenPCIPassthroughState, + memory_listener); + + memory_region_ref(sec->mr); + xen_pt_region_update(s, sec, true); +} + +static void xen_pt_region_del(MemoryListener *l, MemoryRegionSection *sec) +{ + XenPCIPassthroughState *s = container_of(l, XenPCIPassthroughState, + memory_listener); + + xen_pt_region_update(s, sec, false); + memory_region_unref(sec->mr); +} + +static void xen_pt_io_region_add(MemoryListener *l, MemoryRegionSection *sec) +{ + XenPCIPassthroughState *s = container_of(l, XenPCIPassthroughState, + io_listener); + + memory_region_ref(sec->mr); + xen_pt_region_update(s, sec, true); +} + +static void xen_pt_io_region_del(MemoryListener *l, MemoryRegionSection *sec) +{ + XenPCIPassthroughState *s = container_of(l, XenPCIPassthroughState, + io_listener); + + xen_pt_region_update(s, sec, false); + memory_region_unref(sec->mr); +} + +static const MemoryListener xen_pt_memory_listener = { + .name = "xen-pt-mem", + .region_add = xen_pt_region_add, + .region_del = xen_pt_region_del, + .priority = 10, +}; + +static const MemoryListener xen_pt_io_listener = { + .name = "xen-pt-io", + .region_add = xen_pt_io_region_add, + .region_del = xen_pt_io_region_del, + .priority = 10, +}; + +static void +xen_igd_passthrough_isa_bridge_create(XenPCIPassthroughState *s, + XenHostPCIDevice *dev) +{ + uint16_t gpu_dev_id; + PCIDevice *d = &s->dev; + + gpu_dev_id = dev->device_id; + igd_passthrough_isa_bridge_create(pci_get_bus(d), gpu_dev_id); +} + +/* destroy. */ +static void xen_pt_destroy(PCIDevice *d) { + + XenPCIPassthroughState *s = XEN_PT_DEVICE(d); + XenHostPCIDevice *host_dev = &s->real_device; + uint8_t machine_irq = s->machine_irq; + uint8_t intx; + int rc; + + if (machine_irq && !xen_host_pci_device_closed(&s->real_device)) { + intx = xen_pt_pci_intx(s); + rc = xc_domain_unbind_pt_irq(xen_xc, xen_domid, machine_irq, + PT_IRQ_TYPE_PCI, + pci_dev_bus_num(d), + PCI_SLOT(s->dev.devfn), + intx, + 0 /* isa_irq */); + if (rc < 0) { + XEN_PT_ERR(d, "unbinding of interrupt INT%c failed." + " (machine irq: %i, err: %d)" + " But bravely continuing on..\n", + 'a' + intx, machine_irq, errno); + } + } + + /* N.B. xen_pt_config_delete takes care of freeing them. */ + if (s->msi) { + xen_pt_msi_disable(s); + } + if (s->msix) { + xen_pt_msix_disable(s); + } + + if (machine_irq) { + xen_pt_mapped_machine_irq[machine_irq]--; + + if (xen_pt_mapped_machine_irq[machine_irq] == 0) { + rc = xc_physdev_unmap_pirq(xen_xc, xen_domid, machine_irq); + + if (rc < 0) { + XEN_PT_ERR(d, "unmapping of interrupt %i failed. (err: %d)" + " But bravely continuing on..\n", + machine_irq, errno); + } + } + s->machine_irq = 0; + } + + /* delete all emulated config registers */ + xen_pt_config_delete(s); + + xen_pt_unregister_vga_regions(host_dev); + + if (s->listener_set) { + memory_listener_unregister(&s->memory_listener); + memory_listener_unregister(&s->io_listener); + s->listener_set = false; + } + if (!xen_host_pci_device_closed(&s->real_device)) { + xen_host_pci_device_put(&s->real_device); + } +} +/* init */ + +static void xen_pt_realize(PCIDevice *d, Error **errp) +{ + ERRP_GUARD(); + XenPCIPassthroughState *s = XEN_PT_DEVICE(d); + int i, rc = 0; + uint8_t machine_irq = 0, scratch; + uint16_t cmd = 0; + int pirq = XEN_PT_UNASSIGNED_PIRQ; + + /* register real device */ + XEN_PT_LOG(d, "Assigning real physical device %02x:%02x.%d" + " to devfn 0x%x\n", + s->hostaddr.bus, s->hostaddr.slot, s->hostaddr.function, + s->dev.devfn); + + xen_host_pci_device_get(&s->real_device, + s->hostaddr.domain, s->hostaddr.bus, + s->hostaddr.slot, s->hostaddr.function, + errp); + if (*errp) { + error_append_hint(errp, "Failed to \"open\" the real pci device"); + return; + } + + s->is_virtfn = s->real_device.is_virtfn; + if (s->is_virtfn) { + XEN_PT_LOG(d, "%04x:%02x:%02x.%d is a SR-IOV Virtual Function\n", + s->real_device.domain, s->real_device.bus, + s->real_device.dev, s->real_device.func); + } + + /* Initialize virtualized PCI configuration (Extended 256 Bytes) */ + memset(d->config, 0, PCI_CONFIG_SPACE_SIZE); + + s->memory_listener = xen_pt_memory_listener; + s->io_listener = xen_pt_io_listener; + + /* Setup VGA bios for passthrough GFX */ + if ((s->real_device.domain == 0) && (s->real_device.bus == 0) && + (s->real_device.dev == 2) && (s->real_device.func == 0)) { + if (!is_igd_vga_passthrough(&s->real_device)) { + error_setg(errp, "Need to enable igd-passthru if you're trying" + " to passthrough IGD GFX"); + xen_host_pci_device_put(&s->real_device); + return; + } + + xen_pt_setup_vga(s, &s->real_device, errp); + if (*errp) { + error_append_hint(errp, "Setup VGA BIOS of passthrough" + " GFX failed"); + xen_host_pci_device_put(&s->real_device); + return; + } + + /* Register ISA bridge for passthrough GFX. */ + xen_igd_passthrough_isa_bridge_create(s, &s->real_device); + } + + /* Handle real device's MMIO/PIO BARs */ + xen_pt_register_regions(s, &cmd); + + /* reinitialize each config register to be emulated */ + xen_pt_config_init(s, errp); + if (*errp) { + error_append_hint(errp, "PCI Config space initialisation failed"); + rc = -1; + goto err_out; + } + + /* Bind interrupt */ + rc = xen_host_pci_get_byte(&s->real_device, PCI_INTERRUPT_PIN, &scratch); + if (rc) { + error_setg_errno(errp, errno, "Failed to read PCI_INTERRUPT_PIN"); + goto err_out; + } + if (!scratch) { + XEN_PT_LOG(d, "no pin interrupt\n"); + goto out; + } + + machine_irq = s->real_device.irq; + if (machine_irq == 0) { + XEN_PT_LOG(d, "machine irq is 0\n"); + cmd |= PCI_COMMAND_INTX_DISABLE; + goto out; + } + + rc = xc_physdev_map_pirq(xen_xc, xen_domid, machine_irq, &pirq); + if (rc < 0) { + XEN_PT_ERR(d, "Mapping machine irq %u to pirq %i failed, (err: %d)\n", + machine_irq, pirq, errno); + + /* Disable PCI intx assertion (turn on bit10 of devctl) */ + cmd |= PCI_COMMAND_INTX_DISABLE; + machine_irq = 0; + s->machine_irq = 0; + } else { + machine_irq = pirq; + s->machine_irq = pirq; + xen_pt_mapped_machine_irq[machine_irq]++; + } + + /* bind machine_irq to device */ + if (machine_irq != 0) { + uint8_t e_intx = xen_pt_pci_intx(s); + + rc = xc_domain_bind_pt_pci_irq(xen_xc, xen_domid, machine_irq, + pci_dev_bus_num(d), + PCI_SLOT(d->devfn), + e_intx); + if (rc < 0) { + XEN_PT_ERR(d, "Binding of interrupt %i failed! (err: %d)\n", + e_intx, errno); + + /* Disable PCI intx assertion (turn on bit10 of devctl) */ + cmd |= PCI_COMMAND_INTX_DISABLE; + xen_pt_mapped_machine_irq[machine_irq]--; + + if (xen_pt_mapped_machine_irq[machine_irq] == 0) { + if (xc_physdev_unmap_pirq(xen_xc, xen_domid, machine_irq)) { + XEN_PT_ERR(d, "Unmapping of machine interrupt %i failed!" + " (err: %d)\n", machine_irq, errno); + } + } + s->machine_irq = 0; + } + } + +out: + if (cmd) { + uint16_t val; + + rc = xen_host_pci_get_word(&s->real_device, PCI_COMMAND, &val); + if (rc) { + error_setg_errno(errp, errno, "Failed to read PCI_COMMAND"); + goto err_out; + } else { + val |= cmd; + rc = xen_host_pci_set_word(&s->real_device, PCI_COMMAND, val); + if (rc) { + error_setg_errno(errp, errno, "Failed to write PCI_COMMAND" + " val = 0x%x", val); + goto err_out; + } + } + } + + memory_listener_register(&s->memory_listener, &address_space_memory); + memory_listener_register(&s->io_listener, &address_space_io); + s->listener_set = true; + XEN_PT_LOG(d, + "Real physical device %02x:%02x.%d registered successfully\n", + s->hostaddr.bus, s->hostaddr.slot, s->hostaddr.function); + + return; + +err_out: + for (i = 0; i < PCI_ROM_SLOT; i++) { + object_unparent(OBJECT(&s->bar[i])); + } + object_unparent(OBJECT(&s->rom)); + + xen_pt_destroy(d); + assert(rc); +} + +static void xen_pt_unregister_device(PCIDevice *d) +{ + xen_pt_destroy(d); +} + +static Property xen_pci_passthrough_properties[] = { + DEFINE_PROP_PCI_HOST_DEVADDR("hostaddr", XenPCIPassthroughState, hostaddr), + DEFINE_PROP_BOOL("permissive", XenPCIPassthroughState, permissive, false), + DEFINE_PROP_END_OF_LIST(), +}; + +static void xen_pci_passthrough_instance_init(Object *obj) +{ + /* QEMU_PCI_CAP_EXPRESS initialization does not depend on QEMU command + * line, therefore, no need to wait to realize like other devices */ + PCI_DEVICE(obj)->cap_present |= QEMU_PCI_CAP_EXPRESS; +} + +static void xen_pci_passthrough_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + + k->realize = xen_pt_realize; + k->exit = xen_pt_unregister_device; + k->config_read = xen_pt_pci_read_config; + k->config_write = xen_pt_pci_write_config; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); + dc->desc = "Assign an host PCI device with Xen"; + device_class_set_props(dc, xen_pci_passthrough_properties); +}; + +static void xen_pci_passthrough_finalize(Object *obj) +{ + XenPCIPassthroughState *s = XEN_PT_DEVICE(obj); + + xen_pt_msix_delete(s); +} + +static const TypeInfo xen_pci_passthrough_info = { + .name = TYPE_XEN_PT_DEVICE, + .parent = TYPE_PCI_DEVICE, + .instance_size = sizeof(XenPCIPassthroughState), + .instance_finalize = xen_pci_passthrough_finalize, + .class_init = xen_pci_passthrough_class_init, + .instance_init = xen_pci_passthrough_instance_init, + .interfaces = (InterfaceInfo[]) { + { INTERFACE_CONVENTIONAL_PCI_DEVICE }, + { INTERFACE_PCIE_DEVICE }, + { }, + }, +}; + +static void xen_pci_passthrough_register_types(void) +{ + type_register_static(&xen_pci_passthrough_info); +} + +type_init(xen_pci_passthrough_register_types) diff --git a/hw/xen/xen_pt.h b/hw/xen/xen_pt.h new file mode 100644 index 000000000..c74c4678f --- /dev/null +++ b/hw/xen/xen_pt.h @@ -0,0 +1,336 @@ +#ifndef XEN_PT_H +#define XEN_PT_H + +#include "hw/xen/xen_common.h" +#include "hw/pci/pci.h" +#include "xen-host-pci-device.h" +#include "qom/object.h" + +bool xen_igd_gfx_pt_enabled(void); +void xen_igd_gfx_pt_set(bool value, Error **errp); + +void xen_pt_log(const PCIDevice *d, const char *f, ...) GCC_FMT_ATTR(2, 3); + +#define XEN_PT_ERR(d, _f, _a...) xen_pt_log(d, "%s: Error: "_f, __func__, ##_a) + +#ifdef XEN_PT_LOGGING_ENABLED +# define XEN_PT_LOG(d, _f, _a...) xen_pt_log(d, "%s: " _f, __func__, ##_a) +# define XEN_PT_WARN(d, _f, _a...) \ + xen_pt_log(d, "%s: Warning: "_f, __func__, ##_a) +#else +# define XEN_PT_LOG(d, _f, _a...) +# define XEN_PT_WARN(d, _f, _a...) +#endif + +#ifdef XEN_PT_DEBUG_PCI_CONFIG_ACCESS +# define XEN_PT_LOG_CONFIG(d, addr, val, len) \ + xen_pt_log(d, "%s: address=0x%04x val=0x%08x len=%d\n", \ + __func__, addr, val, len) +#else +# define XEN_PT_LOG_CONFIG(d, addr, val, len) +#endif + + +/* Helper */ +#define XEN_PFN(x) ((x) >> XC_PAGE_SHIFT) + +typedef const struct XenPTRegInfo XenPTRegInfo; +typedef struct XenPTReg XenPTReg; + + +#define TYPE_XEN_PT_DEVICE "xen-pci-passthrough" +OBJECT_DECLARE_SIMPLE_TYPE(XenPCIPassthroughState, XEN_PT_DEVICE) + +uint32_t igd_read_opregion(XenPCIPassthroughState *s); +void igd_write_opregion(XenPCIPassthroughState *s, uint32_t val); + +/* function type for config reg */ +typedef int (*xen_pt_conf_reg_init) + (XenPCIPassthroughState *, XenPTRegInfo *, uint32_t real_offset, + uint32_t *data); +typedef int (*xen_pt_conf_dword_write) + (XenPCIPassthroughState *, XenPTReg *cfg_entry, + uint32_t *val, uint32_t dev_value, uint32_t valid_mask); +typedef int (*xen_pt_conf_word_write) + (XenPCIPassthroughState *, XenPTReg *cfg_entry, + uint16_t *val, uint16_t dev_value, uint16_t valid_mask); +typedef int (*xen_pt_conf_byte_write) + (XenPCIPassthroughState *, XenPTReg *cfg_entry, + uint8_t *val, uint8_t dev_value, uint8_t valid_mask); +typedef int (*xen_pt_conf_dword_read) + (XenPCIPassthroughState *, XenPTReg *cfg_entry, + uint32_t *val, uint32_t valid_mask); +typedef int (*xen_pt_conf_word_read) + (XenPCIPassthroughState *, XenPTReg *cfg_entry, + uint16_t *val, uint16_t valid_mask); +typedef int (*xen_pt_conf_byte_read) + (XenPCIPassthroughState *, XenPTReg *cfg_entry, + uint8_t *val, uint8_t valid_mask); + +#define XEN_PT_BAR_ALLF 0xFFFFFFFF +#define XEN_PT_BAR_UNMAPPED (-1) + +#define XEN_PCI_CAP_MAX 48 + +#define XEN_PCI_INTEL_OPREGION 0xfc + +typedef enum { + XEN_PT_GRP_TYPE_HARDWIRED = 0, /* 0 Hardwired reg group */ + XEN_PT_GRP_TYPE_EMU, /* emul reg group */ +} XenPTRegisterGroupType; + +typedef enum { + XEN_PT_BAR_FLAG_MEM = 0, /* Memory type BAR */ + XEN_PT_BAR_FLAG_IO, /* I/O type BAR */ + XEN_PT_BAR_FLAG_UPPER, /* upper 64bit BAR */ + XEN_PT_BAR_FLAG_UNUSED, /* unused BAR */ +} XenPTBarFlag; + + +typedef struct XenPTRegion { + /* BAR flag */ + XenPTBarFlag bar_flag; + /* Translation of the emulated address */ + union { + uint64_t maddr; + uint64_t pio_base; + uint64_t u; + } access; +} XenPTRegion; + +/* XenPTRegInfo declaration + * - only for emulated register (either a part or whole bit). + * - for passthrough register that need special behavior (like interacting with + * other component), set emu_mask to all 0 and specify r/w func properly. + * - do NOT use ALL F for init_val, otherwise the tbl will not be registered. + */ + +/* emulated register information */ +struct XenPTRegInfo { + uint32_t offset; + uint32_t size; + uint32_t init_val; + /* reg reserved field mask (ON:reserved, OFF:defined) */ + uint32_t res_mask; + /* reg read only field mask (ON:RO/ROS, OFF:other) */ + uint32_t ro_mask; + /* reg read/write-1-clear field mask (ON:RW1C/RW1CS, OFF:other) */ + uint32_t rw1c_mask; + /* reg emulate field mask (ON:emu, OFF:passthrough) */ + uint32_t emu_mask; + xen_pt_conf_reg_init init; + /* read/write function pointer + * for double_word/word/byte size */ + union { + struct { + xen_pt_conf_dword_write write; + xen_pt_conf_dword_read read; + } dw; + struct { + xen_pt_conf_word_write write; + xen_pt_conf_word_read read; + } w; + struct { + xen_pt_conf_byte_write write; + xen_pt_conf_byte_read read; + } b; + } u; +}; + +/* emulated register management */ +struct XenPTReg { + QLIST_ENTRY(XenPTReg) entries; + XenPTRegInfo *reg; + union { + uint8_t *byte; + uint16_t *half_word; + uint32_t *word; + } ptr; /* pointer to dev.config. */ +}; + +typedef const struct XenPTRegGroupInfo XenPTRegGroupInfo; + +/* emul reg group size initialize method */ +typedef int (*xen_pt_reg_size_init_fn) + (XenPCIPassthroughState *, XenPTRegGroupInfo *, + uint32_t base_offset, uint8_t *size); + +/* emulated register group information */ +struct XenPTRegGroupInfo { + uint8_t grp_id; + XenPTRegisterGroupType grp_type; + uint8_t grp_size; + xen_pt_reg_size_init_fn size_init; + XenPTRegInfo *emu_regs; +}; + +/* emul register group management table */ +typedef struct XenPTRegGroup { + QLIST_ENTRY(XenPTRegGroup) entries; + XenPTRegGroupInfo *reg_grp; + uint32_t base_offset; + uint8_t size; + QLIST_HEAD(, XenPTReg) reg_tbl_list; +} XenPTRegGroup; + + +#define XEN_PT_UNASSIGNED_PIRQ (-1) +typedef struct XenPTMSI { + uint16_t flags; + uint32_t addr_lo; /* guest message address */ + uint32_t addr_hi; /* guest message upper address */ + uint16_t data; /* guest message data */ + uint32_t ctrl_offset; /* saved control offset */ + uint32_t mask; /* guest mask bits */ + int pirq; /* guest pirq corresponding */ + bool initialized; /* when guest MSI is initialized */ + bool mapped; /* when pirq is mapped */ +} XenPTMSI; + +typedef struct XenPTMSIXEntry { + int pirq; + uint64_t addr; + uint32_t data; + uint32_t latch[4]; + bool updated; /* indicate whether MSI ADDR or DATA is updated */ +} XenPTMSIXEntry; +typedef struct XenPTMSIX { + uint32_t ctrl_offset; + bool enabled; + bool maskall; + int total_entries; + int bar_index; + uint64_t table_base; + uint32_t table_offset_adjust; /* page align mmap */ + uint64_t mmio_base_addr; + MemoryRegion mmio; + void *phys_iomem_base; + XenPTMSIXEntry msix_entry[]; +} XenPTMSIX; + +struct XenPCIPassthroughState { + PCIDevice dev; + + PCIHostDeviceAddress hostaddr; + bool is_virtfn; + bool permissive; + bool permissive_warned; + XenHostPCIDevice real_device; + XenPTRegion bases[PCI_NUM_REGIONS]; /* Access regions */ + QLIST_HEAD(, XenPTRegGroup) reg_grps; + + uint32_t machine_irq; + + XenPTMSI *msi; + XenPTMSIX *msix; + + MemoryRegion bar[PCI_NUM_REGIONS - 1]; + MemoryRegion rom; + + MemoryListener memory_listener; + MemoryListener io_listener; + bool listener_set; +}; + +void xen_pt_config_init(XenPCIPassthroughState *s, Error **errp); +void xen_pt_config_delete(XenPCIPassthroughState *s); +XenPTRegGroup *xen_pt_find_reg_grp(XenPCIPassthroughState *s, uint32_t address); +XenPTReg *xen_pt_find_reg(XenPTRegGroup *reg_grp, uint32_t address); +int xen_pt_bar_offset_to_index(uint32_t offset); + +static inline pcibus_t xen_pt_get_emul_size(XenPTBarFlag flag, pcibus_t r_size) +{ + /* align resource size (memory type only) */ + if (flag == XEN_PT_BAR_FLAG_MEM) { + return (r_size + XC_PAGE_SIZE - 1) & XC_PAGE_MASK; + } else { + return r_size; + } +} + +/* INTx */ +/* The PCI Local Bus Specification, Rev. 3.0, + * Section 6.2.4 Miscellaneous Registers, pp 223 + * outlines 5 valid values for the interrupt pin (intx). + * 0: For devices (or device functions) that don't use an interrupt in + * 1: INTA# + * 2: INTB# + * 3: INTC# + * 4: INTD# + * + * Xen uses the following 4 values for intx + * 0: INTA# + * 1: INTB# + * 2: INTC# + * 3: INTD# + * + * Observing that these list of values are not the same, xen_pt_pci_read_intx() + * uses the following mapping from hw to xen values. + * This seems to reflect the current usage within Xen. + * + * PCI hardware | Xen | Notes + * ----------------+-----+---------------------------------------------------- + * 0 | 0 | No interrupt + * 1 | 0 | INTA# + * 2 | 1 | INTB# + * 3 | 2 | INTC# + * 4 | 3 | INTD# + * any other value | 0 | This should never happen, log error message + */ + +static inline uint8_t xen_pt_pci_read_intx(XenPCIPassthroughState *s) +{ + uint8_t v = 0; + xen_host_pci_get_byte(&s->real_device, PCI_INTERRUPT_PIN, &v); + return v; +} + +static inline uint8_t xen_pt_pci_intx(XenPCIPassthroughState *s) +{ + uint8_t r_val = xen_pt_pci_read_intx(s); + + XEN_PT_LOG(&s->dev, "intx=%i\n", r_val); + if (r_val < 1 || r_val > 4) { + XEN_PT_LOG(&s->dev, "Interrupt pin read from hardware is out of range:" + " value=%i, acceptable range is 1 - 4\n", r_val); + r_val = 0; + } else { + /* Note that if s.real_device.config_fd is closed we make 0xff. */ + r_val -= 1; + } + + return r_val; +} + +/* MSI/MSI-X */ +int xen_pt_msi_setup(XenPCIPassthroughState *s); +int xen_pt_msi_update(XenPCIPassthroughState *d); +void xen_pt_msi_disable(XenPCIPassthroughState *s); + +int xen_pt_msix_init(XenPCIPassthroughState *s, uint32_t base); +void xen_pt_msix_delete(XenPCIPassthroughState *s); +void xen_pt_msix_unmap(XenPCIPassthroughState *s); +int xen_pt_msix_update(XenPCIPassthroughState *s); +int xen_pt_msix_update_remap(XenPCIPassthroughState *s, int bar_index); +void xen_pt_msix_disable(XenPCIPassthroughState *s); + +static inline bool xen_pt_has_msix_mapping(XenPCIPassthroughState *s, int bar) +{ + return s->msix && s->msix->bar_index == bar; +} + +extern void *pci_assign_dev_load_option_rom(PCIDevice *dev, + int *size, + unsigned int domain, + unsigned int bus, unsigned int slot, + unsigned int function); +static inline bool is_igd_vga_passthrough(XenHostPCIDevice *dev) +{ + return (xen_igd_gfx_pt_enabled() + && ((dev->class_code >> 0x8) == PCI_CLASS_DISPLAY_VGA)); +} +int xen_pt_register_vga_regions(XenHostPCIDevice *dev); +int xen_pt_unregister_vga_regions(XenHostPCIDevice *dev); +void xen_pt_setup_vga(XenPCIPassthroughState *s, XenHostPCIDevice *dev, + Error **errp); +#endif /* XEN_PT_H */ diff --git a/hw/xen/xen_pt_config_init.c b/hw/xen/xen_pt_config_init.c new file mode 100644 index 000000000..c5c4e943a --- /dev/null +++ b/hw/xen/xen_pt_config_init.c @@ -0,0 +1,2110 @@ +/* + * Copyright (c) 2007, Neocleus Corporation. + * Copyright (c) 2007, Intel Corporation. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Alex Novik <alex@neocleus.com> + * Allen Kay <allen.m.kay@intel.com> + * Guy Zana <guy@neocleus.com> + * + * This file implements direct PCI assignment to a HVM guest + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/timer.h" +#include "hw/xen/xen-legacy-backend.h" +#include "xen_pt.h" + +#define XEN_PT_MERGE_VALUE(value, data, val_mask) \ + (((value) & (val_mask)) | ((data) & ~(val_mask))) + +#define XEN_PT_INVALID_REG 0xFFFFFFFF /* invalid register value */ + +/* prototype */ + +static int xen_pt_ptr_reg_init(XenPCIPassthroughState *s, XenPTRegInfo *reg, + uint32_t real_offset, uint32_t *data); + + +/* helper */ + +/* A return value of 1 means the capability should NOT be exposed to guest. */ +static int xen_pt_hide_dev_cap(const XenHostPCIDevice *d, uint8_t grp_id) +{ + switch (grp_id) { + case PCI_CAP_ID_EXP: + /* The PCI Express Capability Structure of the VF of Intel 82599 10GbE + * Controller looks trivial, e.g., the PCI Express Capabilities + * Register is 0. We should not try to expose it to guest. + * + * The datasheet is available at + * http://download.intel.com/design/network/datashts/82599_datasheet.pdf + * + * See 'Table 9.7. VF PCIe Configuration Space' of the datasheet, the + * PCI Express Capability Structure of the VF of Intel 82599 10GbE + * Controller looks trivial, e.g., the PCI Express Capabilities + * Register is 0, so the Capability Version is 0 and + * xen_pt_pcie_size_init() would fail. + */ + if (d->vendor_id == PCI_VENDOR_ID_INTEL && + d->device_id == PCI_DEVICE_ID_INTEL_82599_SFP_VF) { + return 1; + } + break; + } + return 0; +} + +/* find emulate register group entry */ +XenPTRegGroup *xen_pt_find_reg_grp(XenPCIPassthroughState *s, uint32_t address) +{ + XenPTRegGroup *entry = NULL; + + /* find register group entry */ + QLIST_FOREACH(entry, &s->reg_grps, entries) { + /* check address */ + if ((entry->base_offset <= address) + && ((entry->base_offset + entry->size) > address)) { + return entry; + } + } + + /* group entry not found */ + return NULL; +} + +/* find emulate register entry */ +XenPTReg *xen_pt_find_reg(XenPTRegGroup *reg_grp, uint32_t address) +{ + XenPTReg *reg_entry = NULL; + XenPTRegInfo *reg = NULL; + uint32_t real_offset = 0; + + /* find register entry */ + QLIST_FOREACH(reg_entry, ®_grp->reg_tbl_list, entries) { + reg = reg_entry->reg; + real_offset = reg_grp->base_offset + reg->offset; + /* check address */ + if ((real_offset <= address) + && ((real_offset + reg->size) > address)) { + return reg_entry; + } + } + + return NULL; +} + +static uint32_t get_throughable_mask(const XenPCIPassthroughState *s, + XenPTRegInfo *reg, uint32_t valid_mask) +{ + uint32_t throughable_mask = ~(reg->emu_mask | reg->ro_mask); + + if (!s->permissive) { + throughable_mask &= ~reg->res_mask; + } + + return throughable_mask & valid_mask; +} + +/**************** + * general register functions + */ + +/* register initialization function */ + +static int xen_pt_common_reg_init(XenPCIPassthroughState *s, + XenPTRegInfo *reg, uint32_t real_offset, + uint32_t *data) +{ + *data = reg->init_val; + return 0; +} + +/* Read register functions */ + +static int xen_pt_byte_reg_read(XenPCIPassthroughState *s, XenPTReg *cfg_entry, + uint8_t *value, uint8_t valid_mask) +{ + XenPTRegInfo *reg = cfg_entry->reg; + uint8_t valid_emu_mask = 0; + uint8_t *data = cfg_entry->ptr.byte; + + /* emulate byte register */ + valid_emu_mask = reg->emu_mask & valid_mask; + *value = XEN_PT_MERGE_VALUE(*value, *data, ~valid_emu_mask); + + return 0; +} +static int xen_pt_word_reg_read(XenPCIPassthroughState *s, XenPTReg *cfg_entry, + uint16_t *value, uint16_t valid_mask) +{ + XenPTRegInfo *reg = cfg_entry->reg; + uint16_t valid_emu_mask = 0; + uint16_t *data = cfg_entry->ptr.half_word; + + /* emulate word register */ + valid_emu_mask = reg->emu_mask & valid_mask; + *value = XEN_PT_MERGE_VALUE(*value, *data, ~valid_emu_mask); + + return 0; +} +static int xen_pt_long_reg_read(XenPCIPassthroughState *s, XenPTReg *cfg_entry, + uint32_t *value, uint32_t valid_mask) +{ + XenPTRegInfo *reg = cfg_entry->reg; + uint32_t valid_emu_mask = 0; + uint32_t *data = cfg_entry->ptr.word; + + /* emulate long register */ + valid_emu_mask = reg->emu_mask & valid_mask; + *value = XEN_PT_MERGE_VALUE(*value, *data, ~valid_emu_mask); + + return 0; +} + +/* Write register functions */ + +static int xen_pt_byte_reg_write(XenPCIPassthroughState *s, XenPTReg *cfg_entry, + uint8_t *val, uint8_t dev_value, + uint8_t valid_mask) +{ + XenPTRegInfo *reg = cfg_entry->reg; + uint8_t writable_mask = 0; + uint8_t throughable_mask = get_throughable_mask(s, reg, valid_mask); + uint8_t *data = cfg_entry->ptr.byte; + + /* modify emulate register */ + writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask; + *data = XEN_PT_MERGE_VALUE(*val, *data, writable_mask); + + /* create value for writing to I/O device register */ + *val = XEN_PT_MERGE_VALUE(*val, dev_value & ~reg->rw1c_mask, + throughable_mask); + + return 0; +} +static int xen_pt_word_reg_write(XenPCIPassthroughState *s, XenPTReg *cfg_entry, + uint16_t *val, uint16_t dev_value, + uint16_t valid_mask) +{ + XenPTRegInfo *reg = cfg_entry->reg; + uint16_t writable_mask = 0; + uint16_t throughable_mask = get_throughable_mask(s, reg, valid_mask); + uint16_t *data = cfg_entry->ptr.half_word; + + /* modify emulate register */ + writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask; + *data = XEN_PT_MERGE_VALUE(*val, *data, writable_mask); + + /* create value for writing to I/O device register */ + *val = XEN_PT_MERGE_VALUE(*val, dev_value & ~reg->rw1c_mask, + throughable_mask); + + return 0; +} +static int xen_pt_long_reg_write(XenPCIPassthroughState *s, XenPTReg *cfg_entry, + uint32_t *val, uint32_t dev_value, + uint32_t valid_mask) +{ + XenPTRegInfo *reg = cfg_entry->reg; + uint32_t writable_mask = 0; + uint32_t throughable_mask = get_throughable_mask(s, reg, valid_mask); + uint32_t *data = cfg_entry->ptr.word; + + /* modify emulate register */ + writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask; + *data = XEN_PT_MERGE_VALUE(*val, *data, writable_mask); + + /* create value for writing to I/O device register */ + *val = XEN_PT_MERGE_VALUE(*val, dev_value & ~reg->rw1c_mask, + throughable_mask); + + return 0; +} + + +/* XenPTRegInfo declaration + * - only for emulated register (either a part or whole bit). + * - for passthrough register that need special behavior (like interacting with + * other component), set emu_mask to all 0 and specify r/w func properly. + * - do NOT use ALL F for init_val, otherwise the tbl will not be registered. + */ + +/******************** + * Header Type0 + */ + +static int xen_pt_vendor_reg_init(XenPCIPassthroughState *s, + XenPTRegInfo *reg, uint32_t real_offset, + uint32_t *data) +{ + *data = s->real_device.vendor_id; + return 0; +} +static int xen_pt_device_reg_init(XenPCIPassthroughState *s, + XenPTRegInfo *reg, uint32_t real_offset, + uint32_t *data) +{ + *data = s->real_device.device_id; + return 0; +} +static int xen_pt_status_reg_init(XenPCIPassthroughState *s, + XenPTRegInfo *reg, uint32_t real_offset, + uint32_t *data) +{ + XenPTRegGroup *reg_grp_entry = NULL; + XenPTReg *reg_entry = NULL; + uint32_t reg_field = 0; + + /* find Header register group */ + reg_grp_entry = xen_pt_find_reg_grp(s, PCI_CAPABILITY_LIST); + if (reg_grp_entry) { + /* find Capabilities Pointer register */ + reg_entry = xen_pt_find_reg(reg_grp_entry, PCI_CAPABILITY_LIST); + if (reg_entry) { + /* check Capabilities Pointer register */ + if (*reg_entry->ptr.half_word) { + reg_field |= PCI_STATUS_CAP_LIST; + } else { + reg_field &= ~PCI_STATUS_CAP_LIST; + } + } else { + xen_shutdown_fatal_error("Internal error: Couldn't find XenPTReg*" + " for Capabilities Pointer register." + " (%s)\n", __func__); + return -1; + } + } else { + xen_shutdown_fatal_error("Internal error: Couldn't find XenPTRegGroup" + " for Header. (%s)\n", __func__); + return -1; + } + + *data = reg_field; + return 0; +} +static int xen_pt_header_type_reg_init(XenPCIPassthroughState *s, + XenPTRegInfo *reg, uint32_t real_offset, + uint32_t *data) +{ + /* read PCI_HEADER_TYPE */ + *data = reg->init_val | 0x80; + return 0; +} + +/* initialize Interrupt Pin register */ +static int xen_pt_irqpin_reg_init(XenPCIPassthroughState *s, + XenPTRegInfo *reg, uint32_t real_offset, + uint32_t *data) +{ + if (s->real_device.irq) { + *data = xen_pt_pci_read_intx(s); + } + return 0; +} + +/* Command register */ +static int xen_pt_cmd_reg_write(XenPCIPassthroughState *s, XenPTReg *cfg_entry, + uint16_t *val, uint16_t dev_value, + uint16_t valid_mask) +{ + XenPTRegInfo *reg = cfg_entry->reg; + uint16_t writable_mask = 0; + uint16_t throughable_mask = get_throughable_mask(s, reg, valid_mask); + uint16_t *data = cfg_entry->ptr.half_word; + + /* modify emulate register */ + writable_mask = ~reg->ro_mask & valid_mask; + *data = XEN_PT_MERGE_VALUE(*val, *data, writable_mask); + + /* create value for writing to I/O device register */ + if (*val & PCI_COMMAND_INTX_DISABLE) { + throughable_mask |= PCI_COMMAND_INTX_DISABLE; + } else { + if (s->machine_irq) { + throughable_mask |= PCI_COMMAND_INTX_DISABLE; + } + } + + *val = XEN_PT_MERGE_VALUE(*val, dev_value, throughable_mask); + + return 0; +} + +/* BAR */ +#define XEN_PT_BAR_MEM_RO_MASK 0x0000000F /* BAR ReadOnly mask(Memory) */ +#define XEN_PT_BAR_MEM_EMU_MASK 0xFFFFFFF0 /* BAR emul mask(Memory) */ +#define XEN_PT_BAR_IO_RO_MASK 0x00000003 /* BAR ReadOnly mask(I/O) */ +#define XEN_PT_BAR_IO_EMU_MASK 0xFFFFFFFC /* BAR emul mask(I/O) */ + +static bool is_64bit_bar(PCIIORegion *r) +{ + return !!(r->type & PCI_BASE_ADDRESS_MEM_TYPE_64); +} + +static uint64_t xen_pt_get_bar_size(PCIIORegion *r) +{ + if (is_64bit_bar(r)) { + uint64_t size64; + size64 = (r + 1)->size; + size64 <<= 32; + size64 += r->size; + return size64; + } + return r->size; +} + +static XenPTBarFlag xen_pt_bar_reg_parse(XenPCIPassthroughState *s, + int index) +{ + PCIDevice *d = PCI_DEVICE(s); + XenPTRegion *region = NULL; + PCIIORegion *r; + + /* check 64bit BAR */ + if ((0 < index) && (index < PCI_ROM_SLOT)) { + int type = s->real_device.io_regions[index - 1].type; + + if ((type & XEN_HOST_PCI_REGION_TYPE_MEM) + && (type & XEN_HOST_PCI_REGION_TYPE_MEM_64)) { + region = &s->bases[index - 1]; + if (region->bar_flag != XEN_PT_BAR_FLAG_UPPER) { + return XEN_PT_BAR_FLAG_UPPER; + } + } + } + + /* check unused BAR */ + r = &d->io_regions[index]; + if (!xen_pt_get_bar_size(r)) { + return XEN_PT_BAR_FLAG_UNUSED; + } + + /* for ExpROM BAR */ + if (index == PCI_ROM_SLOT) { + return XEN_PT_BAR_FLAG_MEM; + } + + /* check BAR I/O indicator */ + if (s->real_device.io_regions[index].type & XEN_HOST_PCI_REGION_TYPE_IO) { + return XEN_PT_BAR_FLAG_IO; + } else { + return XEN_PT_BAR_FLAG_MEM; + } +} + +static inline uint32_t base_address_with_flags(XenHostPCIIORegion *hr) +{ + if (hr->type & XEN_HOST_PCI_REGION_TYPE_IO) { + return hr->base_addr | (hr->bus_flags & ~PCI_BASE_ADDRESS_IO_MASK); + } else { + return hr->base_addr | (hr->bus_flags & ~PCI_BASE_ADDRESS_MEM_MASK); + } +} + +static int xen_pt_bar_reg_init(XenPCIPassthroughState *s, XenPTRegInfo *reg, + uint32_t real_offset, uint32_t *data) +{ + uint32_t reg_field = 0; + int index; + + index = xen_pt_bar_offset_to_index(reg->offset); + if (index < 0 || index >= PCI_NUM_REGIONS) { + XEN_PT_ERR(&s->dev, "Internal error: Invalid BAR index [%d].\n", index); + return -1; + } + + /* set BAR flag */ + s->bases[index].bar_flag = xen_pt_bar_reg_parse(s, index); + if (s->bases[index].bar_flag == XEN_PT_BAR_FLAG_UNUSED) { + reg_field = XEN_PT_INVALID_REG; + } + + *data = reg_field; + return 0; +} +static int xen_pt_bar_reg_read(XenPCIPassthroughState *s, XenPTReg *cfg_entry, + uint32_t *value, uint32_t valid_mask) +{ + XenPTRegInfo *reg = cfg_entry->reg; + uint32_t valid_emu_mask = 0; + uint32_t bar_emu_mask = 0; + int index; + + /* get BAR index */ + index = xen_pt_bar_offset_to_index(reg->offset); + if (index < 0 || index >= PCI_NUM_REGIONS - 1) { + XEN_PT_ERR(&s->dev, "Internal error: Invalid BAR index [%d].\n", index); + return -1; + } + + /* use fixed-up value from kernel sysfs */ + *value = base_address_with_flags(&s->real_device.io_regions[index]); + + /* set emulate mask depend on BAR flag */ + switch (s->bases[index].bar_flag) { + case XEN_PT_BAR_FLAG_MEM: + bar_emu_mask = XEN_PT_BAR_MEM_EMU_MASK; + break; + case XEN_PT_BAR_FLAG_IO: + bar_emu_mask = XEN_PT_BAR_IO_EMU_MASK; + break; + case XEN_PT_BAR_FLAG_UPPER: + bar_emu_mask = XEN_PT_BAR_ALLF; + break; + default: + break; + } + + /* emulate BAR */ + valid_emu_mask = bar_emu_mask & valid_mask; + *value = XEN_PT_MERGE_VALUE(*value, *cfg_entry->ptr.word, ~valid_emu_mask); + + return 0; +} +static int xen_pt_bar_reg_write(XenPCIPassthroughState *s, XenPTReg *cfg_entry, + uint32_t *val, uint32_t dev_value, + uint32_t valid_mask) +{ + XenPTRegInfo *reg = cfg_entry->reg; + XenPTRegion *base = NULL; + PCIDevice *d = PCI_DEVICE(s); + const PCIIORegion *r; + uint32_t writable_mask = 0; + uint32_t bar_emu_mask = 0; + uint32_t bar_ro_mask = 0; + uint32_t r_size = 0; + int index = 0; + uint32_t *data = cfg_entry->ptr.word; + + index = xen_pt_bar_offset_to_index(reg->offset); + if (index < 0 || index >= PCI_NUM_REGIONS) { + XEN_PT_ERR(d, "Internal error: Invalid BAR index [%d].\n", index); + return -1; + } + + r = &d->io_regions[index]; + base = &s->bases[index]; + r_size = xen_pt_get_emul_size(base->bar_flag, r->size); + + /* set emulate mask and read-only mask values depend on the BAR flag */ + switch (s->bases[index].bar_flag) { + case XEN_PT_BAR_FLAG_MEM: + bar_emu_mask = XEN_PT_BAR_MEM_EMU_MASK; + if (!r_size) { + /* low 32 bits mask for 64 bit bars */ + bar_ro_mask = XEN_PT_BAR_ALLF; + } else { + bar_ro_mask = XEN_PT_BAR_MEM_RO_MASK | (r_size - 1); + } + break; + case XEN_PT_BAR_FLAG_IO: + bar_emu_mask = XEN_PT_BAR_IO_EMU_MASK; + bar_ro_mask = XEN_PT_BAR_IO_RO_MASK | (r_size - 1); + break; + case XEN_PT_BAR_FLAG_UPPER: + assert(index > 0); + r_size = d->io_regions[index - 1].size >> 32; + bar_emu_mask = XEN_PT_BAR_ALLF; + bar_ro_mask = r_size ? r_size - 1 : 0; + break; + default: + break; + } + + /* modify emulate register */ + writable_mask = bar_emu_mask & ~bar_ro_mask & valid_mask; + *data = XEN_PT_MERGE_VALUE(*val, *data, writable_mask); + + /* check whether we need to update the virtual region address or not */ + switch (s->bases[index].bar_flag) { + case XEN_PT_BAR_FLAG_UPPER: + case XEN_PT_BAR_FLAG_MEM: + /* nothing to do */ + break; + case XEN_PT_BAR_FLAG_IO: + /* nothing to do */ + break; + default: + break; + } + + /* create value for writing to I/O device register */ + *val = XEN_PT_MERGE_VALUE(*val, dev_value, 0); + + return 0; +} + +/* write Exp ROM BAR */ +static int xen_pt_exp_rom_bar_reg_write(XenPCIPassthroughState *s, + XenPTReg *cfg_entry, uint32_t *val, + uint32_t dev_value, uint32_t valid_mask) +{ + XenPTRegInfo *reg = cfg_entry->reg; + XenPTRegion *base = NULL; + PCIDevice *d = PCI_DEVICE(s); + uint32_t writable_mask = 0; + uint32_t throughable_mask = get_throughable_mask(s, reg, valid_mask); + pcibus_t r_size = 0; + uint32_t bar_ro_mask = 0; + uint32_t *data = cfg_entry->ptr.word; + + r_size = d->io_regions[PCI_ROM_SLOT].size; + base = &s->bases[PCI_ROM_SLOT]; + /* align memory type resource size */ + r_size = xen_pt_get_emul_size(base->bar_flag, r_size); + + /* set emulate mask and read-only mask */ + bar_ro_mask = (reg->ro_mask | (r_size - 1)) & ~PCI_ROM_ADDRESS_ENABLE; + + /* modify emulate register */ + writable_mask = ~bar_ro_mask & valid_mask; + *data = XEN_PT_MERGE_VALUE(*val, *data, writable_mask); + + /* create value for writing to I/O device register */ + *val = XEN_PT_MERGE_VALUE(*val, dev_value, throughable_mask); + + return 0; +} + +static int xen_pt_intel_opregion_read(XenPCIPassthroughState *s, + XenPTReg *cfg_entry, + uint32_t *value, uint32_t valid_mask) +{ + *value = igd_read_opregion(s); + return 0; +} + +static int xen_pt_intel_opregion_write(XenPCIPassthroughState *s, + XenPTReg *cfg_entry, uint32_t *value, + uint32_t dev_value, uint32_t valid_mask) +{ + igd_write_opregion(s, *value); + return 0; +} + +/* Header Type0 reg static information table */ +static XenPTRegInfo xen_pt_emu_reg_header0[] = { + /* Vendor ID reg */ + { + .offset = PCI_VENDOR_ID, + .size = 2, + .init_val = 0x0000, + .ro_mask = 0xFFFF, + .emu_mask = 0xFFFF, + .init = xen_pt_vendor_reg_init, + .u.w.read = xen_pt_word_reg_read, + .u.w.write = xen_pt_word_reg_write, + }, + /* Device ID reg */ + { + .offset = PCI_DEVICE_ID, + .size = 2, + .init_val = 0x0000, + .ro_mask = 0xFFFF, + .emu_mask = 0xFFFF, + .init = xen_pt_device_reg_init, + .u.w.read = xen_pt_word_reg_read, + .u.w.write = xen_pt_word_reg_write, + }, + /* Command reg */ + { + .offset = PCI_COMMAND, + .size = 2, + .init_val = 0x0000, + .res_mask = 0xF880, + .emu_mask = 0x0743, + .init = xen_pt_common_reg_init, + .u.w.read = xen_pt_word_reg_read, + .u.w.write = xen_pt_cmd_reg_write, + }, + /* Capabilities Pointer reg */ + { + .offset = PCI_CAPABILITY_LIST, + .size = 1, + .init_val = 0x00, + .ro_mask = 0xFF, + .emu_mask = 0xFF, + .init = xen_pt_ptr_reg_init, + .u.b.read = xen_pt_byte_reg_read, + .u.b.write = xen_pt_byte_reg_write, + }, + /* Status reg */ + /* use emulated Cap Ptr value to initialize, + * so need to be declared after Cap Ptr reg + */ + { + .offset = PCI_STATUS, + .size = 2, + .init_val = 0x0000, + .res_mask = 0x0007, + .ro_mask = 0x06F8, + .rw1c_mask = 0xF900, + .emu_mask = 0x0010, + .init = xen_pt_status_reg_init, + .u.w.read = xen_pt_word_reg_read, + .u.w.write = xen_pt_word_reg_write, + }, + /* Cache Line Size reg */ + { + .offset = PCI_CACHE_LINE_SIZE, + .size = 1, + .init_val = 0x00, + .ro_mask = 0x00, + .emu_mask = 0xFF, + .init = xen_pt_common_reg_init, + .u.b.read = xen_pt_byte_reg_read, + .u.b.write = xen_pt_byte_reg_write, + }, + /* Latency Timer reg */ + { + .offset = PCI_LATENCY_TIMER, + .size = 1, + .init_val = 0x00, + .ro_mask = 0x00, + .emu_mask = 0xFF, + .init = xen_pt_common_reg_init, + .u.b.read = xen_pt_byte_reg_read, + .u.b.write = xen_pt_byte_reg_write, + }, + /* Header Type reg */ + { + .offset = PCI_HEADER_TYPE, + .size = 1, + .init_val = 0x00, + .ro_mask = 0xFF, + .emu_mask = 0x00, + .init = xen_pt_header_type_reg_init, + .u.b.read = xen_pt_byte_reg_read, + .u.b.write = xen_pt_byte_reg_write, + }, + /* Interrupt Line reg */ + { + .offset = PCI_INTERRUPT_LINE, + .size = 1, + .init_val = 0x00, + .ro_mask = 0x00, + .emu_mask = 0xFF, + .init = xen_pt_common_reg_init, + .u.b.read = xen_pt_byte_reg_read, + .u.b.write = xen_pt_byte_reg_write, + }, + /* Interrupt Pin reg */ + { + .offset = PCI_INTERRUPT_PIN, + .size = 1, + .init_val = 0x00, + .ro_mask = 0xFF, + .emu_mask = 0xFF, + .init = xen_pt_irqpin_reg_init, + .u.b.read = xen_pt_byte_reg_read, + .u.b.write = xen_pt_byte_reg_write, + }, + /* BAR 0 reg */ + /* mask of BAR need to be decided later, depends on IO/MEM type */ + { + .offset = PCI_BASE_ADDRESS_0, + .size = 4, + .init_val = 0x00000000, + .init = xen_pt_bar_reg_init, + .u.dw.read = xen_pt_bar_reg_read, + .u.dw.write = xen_pt_bar_reg_write, + }, + /* BAR 1 reg */ + { + .offset = PCI_BASE_ADDRESS_1, + .size = 4, + .init_val = 0x00000000, + .init = xen_pt_bar_reg_init, + .u.dw.read = xen_pt_bar_reg_read, + .u.dw.write = xen_pt_bar_reg_write, + }, + /* BAR 2 reg */ + { + .offset = PCI_BASE_ADDRESS_2, + .size = 4, + .init_val = 0x00000000, + .init = xen_pt_bar_reg_init, + .u.dw.read = xen_pt_bar_reg_read, + .u.dw.write = xen_pt_bar_reg_write, + }, + /* BAR 3 reg */ + { + .offset = PCI_BASE_ADDRESS_3, + .size = 4, + .init_val = 0x00000000, + .init = xen_pt_bar_reg_init, + .u.dw.read = xen_pt_bar_reg_read, + .u.dw.write = xen_pt_bar_reg_write, + }, + /* BAR 4 reg */ + { + .offset = PCI_BASE_ADDRESS_4, + .size = 4, + .init_val = 0x00000000, + .init = xen_pt_bar_reg_init, + .u.dw.read = xen_pt_bar_reg_read, + .u.dw.write = xen_pt_bar_reg_write, + }, + /* BAR 5 reg */ + { + .offset = PCI_BASE_ADDRESS_5, + .size = 4, + .init_val = 0x00000000, + .init = xen_pt_bar_reg_init, + .u.dw.read = xen_pt_bar_reg_read, + .u.dw.write = xen_pt_bar_reg_write, + }, + /* Expansion ROM BAR reg */ + { + .offset = PCI_ROM_ADDRESS, + .size = 4, + .init_val = 0x00000000, + .ro_mask = ~PCI_ROM_ADDRESS_MASK & ~PCI_ROM_ADDRESS_ENABLE, + .emu_mask = (uint32_t)PCI_ROM_ADDRESS_MASK, + .init = xen_pt_bar_reg_init, + .u.dw.read = xen_pt_long_reg_read, + .u.dw.write = xen_pt_exp_rom_bar_reg_write, + }, + { + .size = 0, + }, +}; + + +/********************************* + * Vital Product Data Capability + */ + +/* Vital Product Data Capability Structure reg static information table */ +static XenPTRegInfo xen_pt_emu_reg_vpd[] = { + { + .offset = PCI_CAP_LIST_NEXT, + .size = 1, + .init_val = 0x00, + .ro_mask = 0xFF, + .emu_mask = 0xFF, + .init = xen_pt_ptr_reg_init, + .u.b.read = xen_pt_byte_reg_read, + .u.b.write = xen_pt_byte_reg_write, + }, + { + .offset = PCI_VPD_ADDR, + .size = 2, + .ro_mask = 0x0003, + .emu_mask = 0x0003, + .init = xen_pt_common_reg_init, + .u.w.read = xen_pt_word_reg_read, + .u.w.write = xen_pt_word_reg_write, + }, + { + .size = 0, + }, +}; + + +/************************************** + * Vendor Specific Capability + */ + +/* Vendor Specific Capability Structure reg static information table */ +static XenPTRegInfo xen_pt_emu_reg_vendor[] = { + { + .offset = PCI_CAP_LIST_NEXT, + .size = 1, + .init_val = 0x00, + .ro_mask = 0xFF, + .emu_mask = 0xFF, + .init = xen_pt_ptr_reg_init, + .u.b.read = xen_pt_byte_reg_read, + .u.b.write = xen_pt_byte_reg_write, + }, + { + .size = 0, + }, +}; + + +/***************************** + * PCI Express Capability + */ + +static inline uint8_t get_capability_version(XenPCIPassthroughState *s, + uint32_t offset) +{ + uint8_t flag; + if (xen_host_pci_get_byte(&s->real_device, offset + PCI_EXP_FLAGS, &flag)) { + return 0; + } + return flag & PCI_EXP_FLAGS_VERS; +} + +static inline uint8_t get_device_type(XenPCIPassthroughState *s, + uint32_t offset) +{ + uint8_t flag; + if (xen_host_pci_get_byte(&s->real_device, offset + PCI_EXP_FLAGS, &flag)) { + return 0; + } + return (flag & PCI_EXP_FLAGS_TYPE) >> 4; +} + +/* initialize Link Control register */ +static int xen_pt_linkctrl_reg_init(XenPCIPassthroughState *s, + XenPTRegInfo *reg, uint32_t real_offset, + uint32_t *data) +{ + uint8_t cap_ver = get_capability_version(s, real_offset - reg->offset); + uint8_t dev_type = get_device_type(s, real_offset - reg->offset); + + /* no need to initialize in case of Root Complex Integrated Endpoint + * with cap_ver 1.x + */ + if ((dev_type == PCI_EXP_TYPE_RC_END) && (cap_ver == 1)) { + *data = XEN_PT_INVALID_REG; + } + + *data = reg->init_val; + return 0; +} +/* initialize Device Control 2 register */ +static int xen_pt_devctrl2_reg_init(XenPCIPassthroughState *s, + XenPTRegInfo *reg, uint32_t real_offset, + uint32_t *data) +{ + uint8_t cap_ver = get_capability_version(s, real_offset - reg->offset); + + /* no need to initialize in case of cap_ver 1.x */ + if (cap_ver == 1) { + *data = XEN_PT_INVALID_REG; + } + + *data = reg->init_val; + return 0; +} +/* initialize Link Control 2 register */ +static int xen_pt_linkctrl2_reg_init(XenPCIPassthroughState *s, + XenPTRegInfo *reg, uint32_t real_offset, + uint32_t *data) +{ + uint8_t cap_ver = get_capability_version(s, real_offset - reg->offset); + uint32_t reg_field = 0; + + /* no need to initialize in case of cap_ver 1.x */ + if (cap_ver == 1) { + reg_field = XEN_PT_INVALID_REG; + } else { + /* set Supported Link Speed */ + uint8_t lnkcap; + int rc; + rc = xen_host_pci_get_byte(&s->real_device, + real_offset - reg->offset + PCI_EXP_LNKCAP, + &lnkcap); + if (rc) { + return rc; + } + reg_field |= PCI_EXP_LNKCAP_SLS & lnkcap; + } + + *data = reg_field; + return 0; +} + +/* PCI Express Capability Structure reg static information table */ +static XenPTRegInfo xen_pt_emu_reg_pcie[] = { + /* Next Pointer reg */ + { + .offset = PCI_CAP_LIST_NEXT, + .size = 1, + .init_val = 0x00, + .ro_mask = 0xFF, + .emu_mask = 0xFF, + .init = xen_pt_ptr_reg_init, + .u.b.read = xen_pt_byte_reg_read, + .u.b.write = xen_pt_byte_reg_write, + }, + /* Device Capabilities reg */ + { + .offset = PCI_EXP_DEVCAP, + .size = 4, + .init_val = 0x00000000, + .ro_mask = 0xFFFFFFFF, + .emu_mask = 0x10000000, + .init = xen_pt_common_reg_init, + .u.dw.read = xen_pt_long_reg_read, + .u.dw.write = xen_pt_long_reg_write, + }, + /* Device Control reg */ + { + .offset = PCI_EXP_DEVCTL, + .size = 2, + .init_val = 0x2810, + .ro_mask = 0x8400, + .emu_mask = 0xFFFF, + .init = xen_pt_common_reg_init, + .u.w.read = xen_pt_word_reg_read, + .u.w.write = xen_pt_word_reg_write, + }, + /* Device Status reg */ + { + .offset = PCI_EXP_DEVSTA, + .size = 2, + .res_mask = 0xFFC0, + .ro_mask = 0x0030, + .rw1c_mask = 0x000F, + .init = xen_pt_common_reg_init, + .u.w.read = xen_pt_word_reg_read, + .u.w.write = xen_pt_word_reg_write, + }, + /* Link Control reg */ + { + .offset = PCI_EXP_LNKCTL, + .size = 2, + .init_val = 0x0000, + .ro_mask = 0xFC34, + .emu_mask = 0xFFFF, + .init = xen_pt_linkctrl_reg_init, + .u.w.read = xen_pt_word_reg_read, + .u.w.write = xen_pt_word_reg_write, + }, + /* Link Status reg */ + { + .offset = PCI_EXP_LNKSTA, + .size = 2, + .ro_mask = 0x3FFF, + .rw1c_mask = 0xC000, + .init = xen_pt_common_reg_init, + .u.w.read = xen_pt_word_reg_read, + .u.w.write = xen_pt_word_reg_write, + }, + /* Device Control 2 reg */ + { + .offset = 0x28, + .size = 2, + .init_val = 0x0000, + .ro_mask = 0xFFE0, + .emu_mask = 0xFFFF, + .init = xen_pt_devctrl2_reg_init, + .u.w.read = xen_pt_word_reg_read, + .u.w.write = xen_pt_word_reg_write, + }, + /* Link Control 2 reg */ + { + .offset = 0x30, + .size = 2, + .init_val = 0x0000, + .ro_mask = 0xE040, + .emu_mask = 0xFFFF, + .init = xen_pt_linkctrl2_reg_init, + .u.w.read = xen_pt_word_reg_read, + .u.w.write = xen_pt_word_reg_write, + }, + { + .size = 0, + }, +}; + + +/********************************* + * Power Management Capability + */ + +/* Power Management Capability reg static information table */ +static XenPTRegInfo xen_pt_emu_reg_pm[] = { + /* Next Pointer reg */ + { + .offset = PCI_CAP_LIST_NEXT, + .size = 1, + .init_val = 0x00, + .ro_mask = 0xFF, + .emu_mask = 0xFF, + .init = xen_pt_ptr_reg_init, + .u.b.read = xen_pt_byte_reg_read, + .u.b.write = xen_pt_byte_reg_write, + }, + /* Power Management Capabilities reg */ + { + .offset = PCI_CAP_FLAGS, + .size = 2, + .init_val = 0x0000, + .ro_mask = 0xFFFF, + .emu_mask = 0xF9C8, + .init = xen_pt_common_reg_init, + .u.w.read = xen_pt_word_reg_read, + .u.w.write = xen_pt_word_reg_write, + }, + /* PCI Power Management Control/Status reg */ + { + .offset = PCI_PM_CTRL, + .size = 2, + .init_val = 0x0008, + .res_mask = 0x00F0, + .ro_mask = 0x610C, + .rw1c_mask = 0x8000, + .emu_mask = 0x810B, + .init = xen_pt_common_reg_init, + .u.w.read = xen_pt_word_reg_read, + .u.w.write = xen_pt_word_reg_write, + }, + { + .size = 0, + }, +}; + + +/******************************** + * MSI Capability + */ + +/* Helper */ +#define xen_pt_msi_check_type(offset, flags, what) \ + ((offset) == ((flags) & PCI_MSI_FLAGS_64BIT ? \ + PCI_MSI_##what##_64 : PCI_MSI_##what##_32)) + +/* Message Control register */ +static int xen_pt_msgctrl_reg_init(XenPCIPassthroughState *s, + XenPTRegInfo *reg, uint32_t real_offset, + uint32_t *data) +{ + XenPTMSI *msi = s->msi; + uint16_t reg_field; + int rc; + + /* use I/O device register's value as initial value */ + rc = xen_host_pci_get_word(&s->real_device, real_offset, ®_field); + if (rc) { + return rc; + } + if (reg_field & PCI_MSI_FLAGS_ENABLE) { + XEN_PT_LOG(&s->dev, "MSI already enabled, disabling it first\n"); + xen_host_pci_set_word(&s->real_device, real_offset, + reg_field & ~PCI_MSI_FLAGS_ENABLE); + } + msi->flags |= reg_field; + msi->ctrl_offset = real_offset; + msi->initialized = false; + msi->mapped = false; + + *data = reg->init_val; + return 0; +} +static int xen_pt_msgctrl_reg_write(XenPCIPassthroughState *s, + XenPTReg *cfg_entry, uint16_t *val, + uint16_t dev_value, uint16_t valid_mask) +{ + XenPTRegInfo *reg = cfg_entry->reg; + XenPTMSI *msi = s->msi; + uint16_t writable_mask = 0; + uint16_t throughable_mask = get_throughable_mask(s, reg, valid_mask); + uint16_t *data = cfg_entry->ptr.half_word; + + /* Currently no support for multi-vector */ + if (*val & PCI_MSI_FLAGS_QSIZE) { + XEN_PT_WARN(&s->dev, "Tries to set more than 1 vector ctrl %x\n", *val); + } + + /* modify emulate register */ + writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask; + *data = XEN_PT_MERGE_VALUE(*val, *data, writable_mask); + msi->flags |= *data & ~PCI_MSI_FLAGS_ENABLE; + + /* create value for writing to I/O device register */ + *val = XEN_PT_MERGE_VALUE(*val, dev_value, throughable_mask); + + /* update MSI */ + if (*val & PCI_MSI_FLAGS_ENABLE) { + /* setup MSI pirq for the first time */ + if (!msi->initialized) { + /* Init physical one */ + XEN_PT_LOG(&s->dev, "setup MSI (register: %x).\n", *val); + if (xen_pt_msi_setup(s)) { + /* We do not broadcast the error to the framework code, so + * that MSI errors are contained in MSI emulation code and + * QEMU can go on running. + * Guest MSI would be actually not working. + */ + *val &= ~PCI_MSI_FLAGS_ENABLE; + XEN_PT_WARN(&s->dev, "Can not map MSI (register: %x)!\n", *val); + return 0; + } + if (xen_pt_msi_update(s)) { + *val &= ~PCI_MSI_FLAGS_ENABLE; + XEN_PT_WARN(&s->dev, "Can not bind MSI (register: %x)!\n", *val); + return 0; + } + msi->initialized = true; + msi->mapped = true; + } + msi->flags |= PCI_MSI_FLAGS_ENABLE; + } else if (msi->mapped) { + xen_pt_msi_disable(s); + } + + return 0; +} + +/* initialize Message Upper Address register */ +static int xen_pt_msgaddr64_reg_init(XenPCIPassthroughState *s, + XenPTRegInfo *reg, uint32_t real_offset, + uint32_t *data) +{ + /* no need to initialize in case of 32 bit type */ + if (!(s->msi->flags & PCI_MSI_FLAGS_64BIT)) { + *data = XEN_PT_INVALID_REG; + } else { + *data = reg->init_val; + } + + return 0; +} +/* this function will be called twice (for 32 bit and 64 bit type) */ +/* initialize Message Data register */ +static int xen_pt_msgdata_reg_init(XenPCIPassthroughState *s, + XenPTRegInfo *reg, uint32_t real_offset, + uint32_t *data) +{ + uint32_t flags = s->msi->flags; + uint32_t offset = reg->offset; + + /* check the offset whether matches the type or not */ + if (xen_pt_msi_check_type(offset, flags, DATA)) { + *data = reg->init_val; + } else { + *data = XEN_PT_INVALID_REG; + } + return 0; +} + +/* this function will be called twice (for 32 bit and 64 bit type) */ +/* initialize Mask register */ +static int xen_pt_mask_reg_init(XenPCIPassthroughState *s, + XenPTRegInfo *reg, uint32_t real_offset, + uint32_t *data) +{ + uint32_t flags = s->msi->flags; + + /* check the offset whether matches the type or not */ + if (!(flags & PCI_MSI_FLAGS_MASKBIT)) { + *data = XEN_PT_INVALID_REG; + } else if (xen_pt_msi_check_type(reg->offset, flags, MASK)) { + *data = reg->init_val; + } else { + *data = XEN_PT_INVALID_REG; + } + return 0; +} + +/* this function will be called twice (for 32 bit and 64 bit type) */ +/* initialize Pending register */ +static int xen_pt_pending_reg_init(XenPCIPassthroughState *s, + XenPTRegInfo *reg, uint32_t real_offset, + uint32_t *data) +{ + uint32_t flags = s->msi->flags; + + /* check the offset whether matches the type or not */ + if (!(flags & PCI_MSI_FLAGS_MASKBIT)) { + *data = XEN_PT_INVALID_REG; + } else if (xen_pt_msi_check_type(reg->offset, flags, PENDING)) { + *data = reg->init_val; + } else { + *data = XEN_PT_INVALID_REG; + } + return 0; +} + +/* write Message Address register */ +static int xen_pt_msgaddr32_reg_write(XenPCIPassthroughState *s, + XenPTReg *cfg_entry, uint32_t *val, + uint32_t dev_value, uint32_t valid_mask) +{ + XenPTRegInfo *reg = cfg_entry->reg; + uint32_t writable_mask = 0; + uint32_t old_addr = *cfg_entry->ptr.word; + uint32_t *data = cfg_entry->ptr.word; + + /* modify emulate register */ + writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask; + *data = XEN_PT_MERGE_VALUE(*val, *data, writable_mask); + s->msi->addr_lo = *data; + + /* create value for writing to I/O device register */ + *val = XEN_PT_MERGE_VALUE(*val, dev_value, 0); + + /* update MSI */ + if (*data != old_addr) { + if (s->msi->mapped) { + xen_pt_msi_update(s); + } + } + + return 0; +} +/* write Message Upper Address register */ +static int xen_pt_msgaddr64_reg_write(XenPCIPassthroughState *s, + XenPTReg *cfg_entry, uint32_t *val, + uint32_t dev_value, uint32_t valid_mask) +{ + XenPTRegInfo *reg = cfg_entry->reg; + uint32_t writable_mask = 0; + uint32_t old_addr = *cfg_entry->ptr.word; + uint32_t *data = cfg_entry->ptr.word; + + /* check whether the type is 64 bit or not */ + if (!(s->msi->flags & PCI_MSI_FLAGS_64BIT)) { + XEN_PT_ERR(&s->dev, + "Can't write to the upper address without 64 bit support\n"); + return -1; + } + + /* modify emulate register */ + writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask; + *data = XEN_PT_MERGE_VALUE(*val, *data, writable_mask); + /* update the msi_info too */ + s->msi->addr_hi = *data; + + /* create value for writing to I/O device register */ + *val = XEN_PT_MERGE_VALUE(*val, dev_value, 0); + + /* update MSI */ + if (*data != old_addr) { + if (s->msi->mapped) { + xen_pt_msi_update(s); + } + } + + return 0; +} + + +/* this function will be called twice (for 32 bit and 64 bit type) */ +/* write Message Data register */ +static int xen_pt_msgdata_reg_write(XenPCIPassthroughState *s, + XenPTReg *cfg_entry, uint16_t *val, + uint16_t dev_value, uint16_t valid_mask) +{ + XenPTRegInfo *reg = cfg_entry->reg; + XenPTMSI *msi = s->msi; + uint16_t writable_mask = 0; + uint16_t old_data = *cfg_entry->ptr.half_word; + uint32_t offset = reg->offset; + uint16_t *data = cfg_entry->ptr.half_word; + + /* check the offset whether matches the type or not */ + if (!xen_pt_msi_check_type(offset, msi->flags, DATA)) { + /* exit I/O emulator */ + XEN_PT_ERR(&s->dev, "the offset does not match the 32/64 bit type!\n"); + return -1; + } + + /* modify emulate register */ + writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask; + *data = XEN_PT_MERGE_VALUE(*val, *data, writable_mask); + /* update the msi_info too */ + msi->data = *data; + + /* create value for writing to I/O device register */ + *val = XEN_PT_MERGE_VALUE(*val, dev_value, 0); + + /* update MSI */ + if (*data != old_data) { + if (msi->mapped) { + xen_pt_msi_update(s); + } + } + + return 0; +} + +static int xen_pt_mask_reg_write(XenPCIPassthroughState *s, XenPTReg *cfg_entry, + uint32_t *val, uint32_t dev_value, + uint32_t valid_mask) +{ + int rc; + + rc = xen_pt_long_reg_write(s, cfg_entry, val, dev_value, valid_mask); + if (rc) { + return rc; + } + + s->msi->mask = *val; + + return 0; +} + +/* MSI Capability Structure reg static information table */ +static XenPTRegInfo xen_pt_emu_reg_msi[] = { + /* Next Pointer reg */ + { + .offset = PCI_CAP_LIST_NEXT, + .size = 1, + .init_val = 0x00, + .ro_mask = 0xFF, + .emu_mask = 0xFF, + .init = xen_pt_ptr_reg_init, + .u.b.read = xen_pt_byte_reg_read, + .u.b.write = xen_pt_byte_reg_write, + }, + /* Message Control reg */ + { + .offset = PCI_MSI_FLAGS, + .size = 2, + .init_val = 0x0000, + .res_mask = 0xFE00, + .ro_mask = 0x018E, + .emu_mask = 0x017E, + .init = xen_pt_msgctrl_reg_init, + .u.w.read = xen_pt_word_reg_read, + .u.w.write = xen_pt_msgctrl_reg_write, + }, + /* Message Address reg */ + { + .offset = PCI_MSI_ADDRESS_LO, + .size = 4, + .init_val = 0x00000000, + .ro_mask = 0x00000003, + .emu_mask = 0xFFFFFFFF, + .init = xen_pt_common_reg_init, + .u.dw.read = xen_pt_long_reg_read, + .u.dw.write = xen_pt_msgaddr32_reg_write, + }, + /* Message Upper Address reg (if PCI_MSI_FLAGS_64BIT set) */ + { + .offset = PCI_MSI_ADDRESS_HI, + .size = 4, + .init_val = 0x00000000, + .ro_mask = 0x00000000, + .emu_mask = 0xFFFFFFFF, + .init = xen_pt_msgaddr64_reg_init, + .u.dw.read = xen_pt_long_reg_read, + .u.dw.write = xen_pt_msgaddr64_reg_write, + }, + /* Message Data reg (16 bits of data for 32-bit devices) */ + { + .offset = PCI_MSI_DATA_32, + .size = 2, + .init_val = 0x0000, + .ro_mask = 0x0000, + .emu_mask = 0xFFFF, + .init = xen_pt_msgdata_reg_init, + .u.w.read = xen_pt_word_reg_read, + .u.w.write = xen_pt_msgdata_reg_write, + }, + /* Message Data reg (16 bits of data for 64-bit devices) */ + { + .offset = PCI_MSI_DATA_64, + .size = 2, + .init_val = 0x0000, + .ro_mask = 0x0000, + .emu_mask = 0xFFFF, + .init = xen_pt_msgdata_reg_init, + .u.w.read = xen_pt_word_reg_read, + .u.w.write = xen_pt_msgdata_reg_write, + }, + /* Mask reg (if PCI_MSI_FLAGS_MASKBIT set, for 32-bit devices) */ + { + .offset = PCI_MSI_MASK_32, + .size = 4, + .init_val = 0x00000000, + .ro_mask = 0xFFFFFFFF, + .emu_mask = 0xFFFFFFFF, + .init = xen_pt_mask_reg_init, + .u.dw.read = xen_pt_long_reg_read, + .u.dw.write = xen_pt_mask_reg_write, + }, + /* Mask reg (if PCI_MSI_FLAGS_MASKBIT set, for 64-bit devices) */ + { + .offset = PCI_MSI_MASK_64, + .size = 4, + .init_val = 0x00000000, + .ro_mask = 0xFFFFFFFF, + .emu_mask = 0xFFFFFFFF, + .init = xen_pt_mask_reg_init, + .u.dw.read = xen_pt_long_reg_read, + .u.dw.write = xen_pt_mask_reg_write, + }, + /* Pending reg (if PCI_MSI_FLAGS_MASKBIT set, for 32-bit devices) */ + { + .offset = PCI_MSI_MASK_32 + 4, + .size = 4, + .init_val = 0x00000000, + .ro_mask = 0xFFFFFFFF, + .emu_mask = 0x00000000, + .init = xen_pt_pending_reg_init, + .u.dw.read = xen_pt_long_reg_read, + .u.dw.write = xen_pt_long_reg_write, + }, + /* Pending reg (if PCI_MSI_FLAGS_MASKBIT set, for 64-bit devices) */ + { + .offset = PCI_MSI_MASK_64 + 4, + .size = 4, + .init_val = 0x00000000, + .ro_mask = 0xFFFFFFFF, + .emu_mask = 0x00000000, + .init = xen_pt_pending_reg_init, + .u.dw.read = xen_pt_long_reg_read, + .u.dw.write = xen_pt_long_reg_write, + }, + { + .size = 0, + }, +}; + + +/************************************** + * MSI-X Capability + */ + +/* Message Control register for MSI-X */ +static int xen_pt_msixctrl_reg_init(XenPCIPassthroughState *s, + XenPTRegInfo *reg, uint32_t real_offset, + uint32_t *data) +{ + uint16_t reg_field; + int rc; + + /* use I/O device register's value as initial value */ + rc = xen_host_pci_get_word(&s->real_device, real_offset, ®_field); + if (rc) { + return rc; + } + if (reg_field & PCI_MSIX_FLAGS_ENABLE) { + XEN_PT_LOG(&s->dev, "MSIX already enabled, disabling it first\n"); + xen_host_pci_set_word(&s->real_device, real_offset, + reg_field & ~PCI_MSIX_FLAGS_ENABLE); + } + + s->msix->ctrl_offset = real_offset; + + *data = reg->init_val; + return 0; +} +static int xen_pt_msixctrl_reg_write(XenPCIPassthroughState *s, + XenPTReg *cfg_entry, uint16_t *val, + uint16_t dev_value, uint16_t valid_mask) +{ + XenPTRegInfo *reg = cfg_entry->reg; + uint16_t writable_mask = 0; + uint16_t throughable_mask = get_throughable_mask(s, reg, valid_mask); + int debug_msix_enabled_old; + uint16_t *data = cfg_entry->ptr.half_word; + + /* modify emulate register */ + writable_mask = reg->emu_mask & ~reg->ro_mask & valid_mask; + *data = XEN_PT_MERGE_VALUE(*val, *data, writable_mask); + + /* create value for writing to I/O device register */ + *val = XEN_PT_MERGE_VALUE(*val, dev_value, throughable_mask); + + /* update MSI-X */ + if ((*val & PCI_MSIX_FLAGS_ENABLE) + && !(*val & PCI_MSIX_FLAGS_MASKALL)) { + xen_pt_msix_update(s); + } else if (!(*val & PCI_MSIX_FLAGS_ENABLE) && s->msix->enabled) { + xen_pt_msix_disable(s); + } + + s->msix->maskall = *val & PCI_MSIX_FLAGS_MASKALL; + + debug_msix_enabled_old = s->msix->enabled; + s->msix->enabled = !!(*val & PCI_MSIX_FLAGS_ENABLE); + if (s->msix->enabled != debug_msix_enabled_old) { + XEN_PT_LOG(&s->dev, "%s MSI-X\n", + s->msix->enabled ? "enable" : "disable"); + } + + return 0; +} + +/* MSI-X Capability Structure reg static information table */ +static XenPTRegInfo xen_pt_emu_reg_msix[] = { + /* Next Pointer reg */ + { + .offset = PCI_CAP_LIST_NEXT, + .size = 1, + .init_val = 0x00, + .ro_mask = 0xFF, + .emu_mask = 0xFF, + .init = xen_pt_ptr_reg_init, + .u.b.read = xen_pt_byte_reg_read, + .u.b.write = xen_pt_byte_reg_write, + }, + /* Message Control reg */ + { + .offset = PCI_MSI_FLAGS, + .size = 2, + .init_val = 0x0000, + .res_mask = 0x3800, + .ro_mask = 0x07FF, + .emu_mask = 0x0000, + .init = xen_pt_msixctrl_reg_init, + .u.w.read = xen_pt_word_reg_read, + .u.w.write = xen_pt_msixctrl_reg_write, + }, + { + .size = 0, + }, +}; + +static XenPTRegInfo xen_pt_emu_reg_igd_opregion[] = { + /* Intel IGFX OpRegion reg */ + { + .offset = 0x0, + .size = 4, + .init_val = 0, + .emu_mask = 0xFFFFFFFF, + .u.dw.read = xen_pt_intel_opregion_read, + .u.dw.write = xen_pt_intel_opregion_write, + }, + { + .size = 0, + }, +}; + +/**************************** + * Capabilities + */ + +/* capability structure register group size functions */ + +static int xen_pt_reg_grp_size_init(XenPCIPassthroughState *s, + const XenPTRegGroupInfo *grp_reg, + uint32_t base_offset, uint8_t *size) +{ + *size = grp_reg->grp_size; + return 0; +} +/* get Vendor Specific Capability Structure register group size */ +static int xen_pt_vendor_size_init(XenPCIPassthroughState *s, + const XenPTRegGroupInfo *grp_reg, + uint32_t base_offset, uint8_t *size) +{ + return xen_host_pci_get_byte(&s->real_device, base_offset + 0x02, size); +} +/* get PCI Express Capability Structure register group size */ +static int xen_pt_pcie_size_init(XenPCIPassthroughState *s, + const XenPTRegGroupInfo *grp_reg, + uint32_t base_offset, uint8_t *size) +{ + PCIDevice *d = PCI_DEVICE(s); + uint8_t version = get_capability_version(s, base_offset); + uint8_t type = get_device_type(s, base_offset); + uint8_t pcie_size = 0; + + + /* calculate size depend on capability version and device/port type */ + /* in case of PCI Express Base Specification Rev 1.x */ + if (version == 1) { + /* The PCI Express Capabilities, Device Capabilities, and Device + * Status/Control registers are required for all PCI Express devices. + * The Link Capabilities and Link Status/Control are required for all + * Endpoints that are not Root Complex Integrated Endpoints. Endpoints + * are not required to implement registers other than those listed + * above and terminate the capability structure. + */ + switch (type) { + case PCI_EXP_TYPE_ENDPOINT: + case PCI_EXP_TYPE_LEG_END: + pcie_size = 0x14; + break; + case PCI_EXP_TYPE_RC_END: + /* has no link */ + pcie_size = 0x0C; + break; + /* only EndPoint passthrough is supported */ + case PCI_EXP_TYPE_ROOT_PORT: + case PCI_EXP_TYPE_UPSTREAM: + case PCI_EXP_TYPE_DOWNSTREAM: + case PCI_EXP_TYPE_PCI_BRIDGE: + case PCI_EXP_TYPE_PCIE_BRIDGE: + case PCI_EXP_TYPE_RC_EC: + default: + XEN_PT_ERR(d, "Unsupported device/port type 0x%x.\n", type); + return -1; + } + } + /* in case of PCI Express Base Specification Rev 2.0 */ + else if (version == 2) { + switch (type) { + case PCI_EXP_TYPE_ENDPOINT: + case PCI_EXP_TYPE_LEG_END: + case PCI_EXP_TYPE_RC_END: + /* For Functions that do not implement the registers, + * these spaces must be hardwired to 0b. + */ + pcie_size = 0x3C; + break; + /* only EndPoint passthrough is supported */ + case PCI_EXP_TYPE_ROOT_PORT: + case PCI_EXP_TYPE_UPSTREAM: + case PCI_EXP_TYPE_DOWNSTREAM: + case PCI_EXP_TYPE_PCI_BRIDGE: + case PCI_EXP_TYPE_PCIE_BRIDGE: + case PCI_EXP_TYPE_RC_EC: + default: + XEN_PT_ERR(d, "Unsupported device/port type 0x%x.\n", type); + return -1; + } + } else { + XEN_PT_ERR(d, "Unsupported capability version 0x%x.\n", version); + return -1; + } + + *size = pcie_size; + return 0; +} +/* get MSI Capability Structure register group size */ +static int xen_pt_msi_size_init(XenPCIPassthroughState *s, + const XenPTRegGroupInfo *grp_reg, + uint32_t base_offset, uint8_t *size) +{ + uint16_t msg_ctrl = 0; + uint8_t msi_size = 0xa; + int rc; + + rc = xen_host_pci_get_word(&s->real_device, base_offset + PCI_MSI_FLAGS, + &msg_ctrl); + if (rc) { + return rc; + } + /* check if 64-bit address is capable of per-vector masking */ + if (msg_ctrl & PCI_MSI_FLAGS_64BIT) { + msi_size += 4; + } + if (msg_ctrl & PCI_MSI_FLAGS_MASKBIT) { + msi_size += 10; + } + + s->msi = g_new0(XenPTMSI, 1); + s->msi->pirq = XEN_PT_UNASSIGNED_PIRQ; + + *size = msi_size; + return 0; +} +/* get MSI-X Capability Structure register group size */ +static int xen_pt_msix_size_init(XenPCIPassthroughState *s, + const XenPTRegGroupInfo *grp_reg, + uint32_t base_offset, uint8_t *size) +{ + int rc = 0; + + rc = xen_pt_msix_init(s, base_offset); + + if (rc < 0) { + XEN_PT_ERR(&s->dev, "Internal error: Invalid xen_pt_msix_init.\n"); + return rc; + } + + *size = grp_reg->grp_size; + return 0; +} + + +static const XenPTRegGroupInfo xen_pt_emu_reg_grps[] = { + /* Header Type0 reg group */ + { + .grp_id = 0xFF, + .grp_type = XEN_PT_GRP_TYPE_EMU, + .grp_size = 0x40, + .size_init = xen_pt_reg_grp_size_init, + .emu_regs = xen_pt_emu_reg_header0, + }, + /* PCI PowerManagement Capability reg group */ + { + .grp_id = PCI_CAP_ID_PM, + .grp_type = XEN_PT_GRP_TYPE_EMU, + .grp_size = PCI_PM_SIZEOF, + .size_init = xen_pt_reg_grp_size_init, + .emu_regs = xen_pt_emu_reg_pm, + }, + /* AGP Capability Structure reg group */ + { + .grp_id = PCI_CAP_ID_AGP, + .grp_type = XEN_PT_GRP_TYPE_HARDWIRED, + .grp_size = 0x30, + .size_init = xen_pt_reg_grp_size_init, + }, + /* Vital Product Data Capability Structure reg group */ + { + .grp_id = PCI_CAP_ID_VPD, + .grp_type = XEN_PT_GRP_TYPE_EMU, + .grp_size = 0x08, + .size_init = xen_pt_reg_grp_size_init, + .emu_regs = xen_pt_emu_reg_vpd, + }, + /* Slot Identification reg group */ + { + .grp_id = PCI_CAP_ID_SLOTID, + .grp_type = XEN_PT_GRP_TYPE_HARDWIRED, + .grp_size = 0x04, + .size_init = xen_pt_reg_grp_size_init, + }, + /* MSI Capability Structure reg group */ + { + .grp_id = PCI_CAP_ID_MSI, + .grp_type = XEN_PT_GRP_TYPE_EMU, + .grp_size = 0xFF, + .size_init = xen_pt_msi_size_init, + .emu_regs = xen_pt_emu_reg_msi, + }, + /* PCI-X Capabilities List Item reg group */ + { + .grp_id = PCI_CAP_ID_PCIX, + .grp_type = XEN_PT_GRP_TYPE_HARDWIRED, + .grp_size = 0x18, + .size_init = xen_pt_reg_grp_size_init, + }, + /* Vendor Specific Capability Structure reg group */ + { + .grp_id = PCI_CAP_ID_VNDR, + .grp_type = XEN_PT_GRP_TYPE_EMU, + .grp_size = 0xFF, + .size_init = xen_pt_vendor_size_init, + .emu_regs = xen_pt_emu_reg_vendor, + }, + /* SHPC Capability List Item reg group */ + { + .grp_id = PCI_CAP_ID_SHPC, + .grp_type = XEN_PT_GRP_TYPE_HARDWIRED, + .grp_size = 0x08, + .size_init = xen_pt_reg_grp_size_init, + }, + /* Subsystem ID and Subsystem Vendor ID Capability List Item reg group */ + { + .grp_id = PCI_CAP_ID_SSVID, + .grp_type = XEN_PT_GRP_TYPE_HARDWIRED, + .grp_size = 0x08, + .size_init = xen_pt_reg_grp_size_init, + }, + /* AGP 8x Capability Structure reg group */ + { + .grp_id = PCI_CAP_ID_AGP3, + .grp_type = XEN_PT_GRP_TYPE_HARDWIRED, + .grp_size = 0x30, + .size_init = xen_pt_reg_grp_size_init, + }, + /* PCI Express Capability Structure reg group */ + { + .grp_id = PCI_CAP_ID_EXP, + .grp_type = XEN_PT_GRP_TYPE_EMU, + .grp_size = 0xFF, + .size_init = xen_pt_pcie_size_init, + .emu_regs = xen_pt_emu_reg_pcie, + }, + /* MSI-X Capability Structure reg group */ + { + .grp_id = PCI_CAP_ID_MSIX, + .grp_type = XEN_PT_GRP_TYPE_EMU, + .grp_size = 0x0C, + .size_init = xen_pt_msix_size_init, + .emu_regs = xen_pt_emu_reg_msix, + }, + /* Intel IGD Opregion group */ + { + .grp_id = XEN_PCI_INTEL_OPREGION, + .grp_type = XEN_PT_GRP_TYPE_EMU, + .grp_size = 0x4, + .size_init = xen_pt_reg_grp_size_init, + .emu_regs = xen_pt_emu_reg_igd_opregion, + }, + { + .grp_size = 0, + }, +}; + +/* initialize Capabilities Pointer or Next Pointer register */ +static int xen_pt_ptr_reg_init(XenPCIPassthroughState *s, + XenPTRegInfo *reg, uint32_t real_offset, + uint32_t *data) +{ + int i, rc; + uint8_t reg_field; + uint8_t cap_id = 0; + + rc = xen_host_pci_get_byte(&s->real_device, real_offset, ®_field); + if (rc) { + return rc; + } + /* find capability offset */ + while (reg_field) { + for (i = 0; xen_pt_emu_reg_grps[i].grp_size != 0; i++) { + if (xen_pt_hide_dev_cap(&s->real_device, + xen_pt_emu_reg_grps[i].grp_id)) { + continue; + } + + rc = xen_host_pci_get_byte(&s->real_device, + reg_field + PCI_CAP_LIST_ID, &cap_id); + if (rc) { + XEN_PT_ERR(&s->dev, "Failed to read capability @0x%x (rc:%d)\n", + reg_field + PCI_CAP_LIST_ID, rc); + return rc; + } + if (xen_pt_emu_reg_grps[i].grp_id == cap_id) { + if (xen_pt_emu_reg_grps[i].grp_type == XEN_PT_GRP_TYPE_EMU) { + goto out; + } + /* ignore the 0 hardwired capability, find next one */ + break; + } + } + + /* next capability */ + rc = xen_host_pci_get_byte(&s->real_device, + reg_field + PCI_CAP_LIST_NEXT, ®_field); + if (rc) { + return rc; + } + } + +out: + *data = reg_field; + return 0; +} + + +/************* + * Main + */ + +static uint8_t find_cap_offset(XenPCIPassthroughState *s, uint8_t cap) +{ + uint8_t id; + unsigned max_cap = XEN_PCI_CAP_MAX; + uint8_t pos = PCI_CAPABILITY_LIST; + uint8_t status = 0; + + if (xen_host_pci_get_byte(&s->real_device, PCI_STATUS, &status)) { + return 0; + } + if ((status & PCI_STATUS_CAP_LIST) == 0) { + return 0; + } + + while (max_cap--) { + if (xen_host_pci_get_byte(&s->real_device, pos, &pos)) { + break; + } + if (pos < PCI_CONFIG_HEADER_SIZE) { + break; + } + + pos &= ~3; + if (xen_host_pci_get_byte(&s->real_device, + pos + PCI_CAP_LIST_ID, &id)) { + break; + } + + if (id == 0xff) { + break; + } + if (id == cap) { + return pos; + } + + pos += PCI_CAP_LIST_NEXT; + } + return 0; +} + +static void xen_pt_config_reg_init(XenPCIPassthroughState *s, + XenPTRegGroup *reg_grp, XenPTRegInfo *reg, + Error **errp) +{ + XenPTReg *reg_entry; + uint32_t data = 0; + int rc = 0; + + reg_entry = g_new0(XenPTReg, 1); + reg_entry->reg = reg; + + if (reg->init) { + uint32_t host_mask, size_mask; + unsigned int offset; + uint32_t val; + + /* initialize emulate register */ + rc = reg->init(s, reg_entry->reg, + reg_grp->base_offset + reg->offset, &data); + if (rc < 0) { + g_free(reg_entry); + error_setg(errp, "Init emulate register fail"); + return; + } + if (data == XEN_PT_INVALID_REG) { + /* free unused BAR register entry */ + g_free(reg_entry); + return; + } + /* Sync up the data to dev.config */ + offset = reg_grp->base_offset + reg->offset; + size_mask = 0xFFFFFFFF >> ((4 - reg->size) << 3); + + switch (reg->size) { + case 1: rc = xen_host_pci_get_byte(&s->real_device, offset, (uint8_t *)&val); + break; + case 2: rc = xen_host_pci_get_word(&s->real_device, offset, (uint16_t *)&val); + break; + case 4: rc = xen_host_pci_get_long(&s->real_device, offset, &val); + break; + default: abort(); + } + if (rc) { + /* Serious issues when we cannot read the host values! */ + g_free(reg_entry); + error_setg(errp, "Cannot read host values"); + return; + } + /* Set bits in emu_mask are the ones we emulate. The dev.config shall + * contain the emulated view of the guest - therefore we flip the mask + * to mask out the host values (which dev.config initially has) . */ + host_mask = size_mask & ~reg->emu_mask; + + if ((data & host_mask) != (val & host_mask)) { + uint32_t new_val; + + /* Mask out host (including past size). */ + new_val = val & host_mask; + /* Merge emulated ones (excluding the non-emulated ones). */ + new_val |= data & host_mask; + /* Leave intact host and emulated values past the size - even though + * we do not care as we write per reg->size granularity, but for the + * logging below lets have the proper value. */ + new_val |= ((val | data)) & ~size_mask; + XEN_PT_LOG(&s->dev,"Offset 0x%04x mismatch! Emulated=0x%04x, host=0x%04x, syncing to 0x%04x.\n", + offset, data, val, new_val); + val = new_val; + } else + val = data; + + if (val & ~size_mask) { + error_setg(errp, "Offset 0x%04x:0x%04x expands past" + " register size (%d)", offset, val, reg->size); + g_free(reg_entry); + return; + } + /* This could be just pci_set_long as we don't modify the bits + * past reg->size, but in case this routine is run in parallel or the + * init value is larger, we do not want to over-write registers. */ + switch (reg->size) { + case 1: pci_set_byte(s->dev.config + offset, (uint8_t)val); + break; + case 2: pci_set_word(s->dev.config + offset, (uint16_t)val); + break; + case 4: pci_set_long(s->dev.config + offset, val); + break; + default: abort(); + } + /* set register value pointer to the data. */ + reg_entry->ptr.byte = s->dev.config + offset; + + } + /* list add register entry */ + QLIST_INSERT_HEAD(®_grp->reg_tbl_list, reg_entry, entries); +} + +void xen_pt_config_init(XenPCIPassthroughState *s, Error **errp) +{ + ERRP_GUARD(); + int i, rc; + + QLIST_INIT(&s->reg_grps); + + for (i = 0; xen_pt_emu_reg_grps[i].grp_size != 0; i++) { + uint32_t reg_grp_offset = 0; + XenPTRegGroup *reg_grp_entry = NULL; + + if (xen_pt_emu_reg_grps[i].grp_id != 0xFF + && xen_pt_emu_reg_grps[i].grp_id != XEN_PCI_INTEL_OPREGION) { + if (xen_pt_hide_dev_cap(&s->real_device, + xen_pt_emu_reg_grps[i].grp_id)) { + continue; + } + + reg_grp_offset = find_cap_offset(s, xen_pt_emu_reg_grps[i].grp_id); + + if (!reg_grp_offset) { + continue; + } + } + + /* + * By default we will trap up to 0x40 in the cfg space. + * If an intel device is pass through we need to trap 0xfc, + * therefore the size should be 0xff. + */ + if (xen_pt_emu_reg_grps[i].grp_id == XEN_PCI_INTEL_OPREGION) { + reg_grp_offset = XEN_PCI_INTEL_OPREGION; + } + + reg_grp_entry = g_new0(XenPTRegGroup, 1); + QLIST_INIT(®_grp_entry->reg_tbl_list); + QLIST_INSERT_HEAD(&s->reg_grps, reg_grp_entry, entries); + + reg_grp_entry->base_offset = reg_grp_offset; + reg_grp_entry->reg_grp = xen_pt_emu_reg_grps + i; + if (xen_pt_emu_reg_grps[i].size_init) { + /* get register group size */ + rc = xen_pt_emu_reg_grps[i].size_init(s, reg_grp_entry->reg_grp, + reg_grp_offset, + ®_grp_entry->size); + if (rc < 0) { + error_setg(errp, "Failed to initialize %d/%zu, type = 0x%x," + " rc: %d", i, ARRAY_SIZE(xen_pt_emu_reg_grps), + xen_pt_emu_reg_grps[i].grp_type, rc); + xen_pt_config_delete(s); + return; + } + } + + if (xen_pt_emu_reg_grps[i].grp_type == XEN_PT_GRP_TYPE_EMU) { + if (xen_pt_emu_reg_grps[i].emu_regs) { + int j = 0; + XenPTRegInfo *regs = xen_pt_emu_reg_grps[i].emu_regs; + + /* initialize capability register */ + for (j = 0; regs->size != 0; j++, regs++) { + xen_pt_config_reg_init(s, reg_grp_entry, regs, errp); + if (*errp) { + error_append_hint(errp, "Failed to init register %d" + " offsets 0x%x in grp_type = 0x%x (%d/%zu)", + j, + regs->offset, + xen_pt_emu_reg_grps[i].grp_type, + i, ARRAY_SIZE(xen_pt_emu_reg_grps)); + xen_pt_config_delete(s); + return; + } + } + } + } + } +} + +/* delete all emulate register */ +void xen_pt_config_delete(XenPCIPassthroughState *s) +{ + struct XenPTRegGroup *reg_group, *next_grp; + struct XenPTReg *reg, *next_reg; + + /* free MSI/MSI-X info table */ + if (s->msix) { + xen_pt_msix_unmap(s); + } + g_free(s->msi); + + /* free all register group entry */ + QLIST_FOREACH_SAFE(reg_group, &s->reg_grps, entries, next_grp) { + /* free all register entry */ + QLIST_FOREACH_SAFE(reg, ®_group->reg_tbl_list, entries, next_reg) { + QLIST_REMOVE(reg, entries); + g_free(reg); + } + + QLIST_REMOVE(reg_group, entries); + g_free(reg_group); + } +} diff --git a/hw/xen/xen_pt_graphics.c b/hw/xen/xen_pt_graphics.c new file mode 100644 index 000000000..a3bc7e392 --- /dev/null +++ b/hw/xen/xen_pt_graphics.c @@ -0,0 +1,291 @@ +/* + * graphics passthrough + */ +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "xen_pt.h" +#include "xen-host-pci-device.h" +#include "hw/xen/xen-legacy-backend.h" + +static unsigned long igd_guest_opregion; +static unsigned long igd_host_opregion; + +#define XEN_PCI_INTEL_OPREGION_MASK 0xfff + +typedef struct VGARegion { + int type; /* Memory or port I/O */ + uint64_t guest_base_addr; + uint64_t machine_base_addr; + uint64_t size; /* size of the region */ + int rc; +} VGARegion; + +#define IORESOURCE_IO 0x00000100 +#define IORESOURCE_MEM 0x00000200 + +static struct VGARegion vga_args[] = { + { + .type = IORESOURCE_IO, + .guest_base_addr = 0x3B0, + .machine_base_addr = 0x3B0, + .size = 0xC, + .rc = -1, + }, + { + .type = IORESOURCE_IO, + .guest_base_addr = 0x3C0, + .machine_base_addr = 0x3C0, + .size = 0x20, + .rc = -1, + }, + { + .type = IORESOURCE_MEM, + .guest_base_addr = 0xa0000 >> XC_PAGE_SHIFT, + .machine_base_addr = 0xa0000 >> XC_PAGE_SHIFT, + .size = 0x20, + .rc = -1, + }, +}; + +/* + * register VGA resources for the domain with assigned gfx + */ +int xen_pt_register_vga_regions(XenHostPCIDevice *dev) +{ + int i = 0; + + if (!is_igd_vga_passthrough(dev)) { + return 0; + } + + for (i = 0 ; i < ARRAY_SIZE(vga_args); i++) { + if (vga_args[i].type == IORESOURCE_IO) { + vga_args[i].rc = xc_domain_ioport_mapping(xen_xc, xen_domid, + vga_args[i].guest_base_addr, + vga_args[i].machine_base_addr, + vga_args[i].size, DPCI_ADD_MAPPING); + } else { + vga_args[i].rc = xc_domain_memory_mapping(xen_xc, xen_domid, + vga_args[i].guest_base_addr, + vga_args[i].machine_base_addr, + vga_args[i].size, DPCI_ADD_MAPPING); + } + + if (vga_args[i].rc) { + XEN_PT_ERR(NULL, "VGA %s mapping failed! (rc: %i)\n", + vga_args[i].type == IORESOURCE_IO ? "ioport" : "memory", + vga_args[i].rc); + return vga_args[i].rc; + } + } + + return 0; +} + +/* + * unregister VGA resources for the domain with assigned gfx + */ +int xen_pt_unregister_vga_regions(XenHostPCIDevice *dev) +{ + int i = 0; + int ret = 0; + + if (!is_igd_vga_passthrough(dev)) { + return 0; + } + + for (i = 0 ; i < ARRAY_SIZE(vga_args); i++) { + if (vga_args[i].type == IORESOURCE_IO) { + vga_args[i].rc = xc_domain_ioport_mapping(xen_xc, xen_domid, + vga_args[i].guest_base_addr, + vga_args[i].machine_base_addr, + vga_args[i].size, DPCI_REMOVE_MAPPING); + } else { + vga_args[i].rc = xc_domain_memory_mapping(xen_xc, xen_domid, + vga_args[i].guest_base_addr, + vga_args[i].machine_base_addr, + vga_args[i].size, DPCI_REMOVE_MAPPING); + } + + if (vga_args[i].rc) { + XEN_PT_ERR(NULL, "VGA %s unmapping failed! (rc: %i)\n", + vga_args[i].type == IORESOURCE_IO ? "ioport" : "memory", + vga_args[i].rc); + return vga_args[i].rc; + } + } + + if (igd_guest_opregion) { + ret = xc_domain_memory_mapping(xen_xc, xen_domid, + (unsigned long)(igd_guest_opregion >> XC_PAGE_SHIFT), + (unsigned long)(igd_host_opregion >> XC_PAGE_SHIFT), + 3, + DPCI_REMOVE_MAPPING); + if (ret) { + return ret; + } + } + + return 0; +} + +static void *get_vgabios(XenPCIPassthroughState *s, int *size, + XenHostPCIDevice *dev) +{ + return pci_assign_dev_load_option_rom(&s->dev, size, + dev->domain, dev->bus, + dev->dev, dev->func); +} + +/* Refer to Seabios. */ +struct rom_header { + uint16_t signature; + uint8_t size; + uint8_t initVector[4]; + uint8_t reserved[17]; + uint16_t pcioffset; + uint16_t pnpoffset; +} __attribute__((packed)); + +struct pci_data { + uint32_t signature; + uint16_t vendor; + uint16_t device; + uint16_t vitaldata; + uint16_t dlen; + uint8_t drevision; + uint8_t class_lo; + uint16_t class_hi; + uint16_t ilen; + uint16_t irevision; + uint8_t type; + uint8_t indicator; + uint16_t reserved; +} __attribute__((packed)); + +void xen_pt_setup_vga(XenPCIPassthroughState *s, XenHostPCIDevice *dev, + Error **errp) +{ + unsigned char *bios = NULL; + struct rom_header *rom; + int bios_size; + char *c = NULL; + char checksum = 0; + uint32_t len = 0; + struct pci_data *pd = NULL; + + if (!is_igd_vga_passthrough(dev)) { + error_setg(errp, "Need to enable igd-passthrough"); + return; + } + + bios = get_vgabios(s, &bios_size, dev); + if (!bios) { + error_setg(errp, "VGA: Can't get VBIOS"); + return; + } + + if (bios_size < sizeof(struct rom_header)) { + error_setg(errp, "VGA: VBIOS image corrupt (too small)"); + return; + } + + /* Currently we fixed this address as a primary. */ + rom = (struct rom_header *)bios; + + if (rom->pcioffset + sizeof(struct pci_data) > bios_size) { + error_setg(errp, "VGA: VBIOS image corrupt (bad pcioffset field)"); + return; + } + + pd = (void *)(bios + (unsigned char)rom->pcioffset); + + /* We may need to fixup Device Identification. */ + if (pd->device != s->real_device.device_id) { + pd->device = s->real_device.device_id; + + len = rom->size * 512; + if (len > bios_size) { + error_setg(errp, "VGA: VBIOS image corrupt (bad size field)"); + return; + } + + /* Then adjust the bios checksum */ + for (c = (char *)bios; c < ((char *)bios + len); c++) { + checksum += *c; + } + if (checksum) { + bios[len - 1] -= checksum; + XEN_PT_LOG(&s->dev, "vga bios checksum is adjusted %x!\n", + checksum); + } + } + + /* Currently we fixed this address as a primary for legacy BIOS. */ + cpu_physical_memory_write(0xc0000, bios, bios_size); +} + +uint32_t igd_read_opregion(XenPCIPassthroughState *s) +{ + uint32_t val = 0; + + if (!igd_guest_opregion) { + return val; + } + + val = igd_guest_opregion; + + XEN_PT_LOG(&s->dev, "Read opregion val=%x\n", val); + return val; +} + +#define XEN_PCI_INTEL_OPREGION_PAGES 0x3 +#define XEN_PCI_INTEL_OPREGION_ENABLE_ACCESSED 0x1 +void igd_write_opregion(XenPCIPassthroughState *s, uint32_t val) +{ + int ret; + + if (igd_guest_opregion) { + XEN_PT_LOG(&s->dev, "opregion register already been set, ignoring %x\n", + val); + return; + } + + /* We just work with LE. */ + xen_host_pci_get_block(&s->real_device, XEN_PCI_INTEL_OPREGION, + (uint8_t *)&igd_host_opregion, 4); + igd_guest_opregion = (unsigned long)(val & ~XEN_PCI_INTEL_OPREGION_MASK) + | (igd_host_opregion & XEN_PCI_INTEL_OPREGION_MASK); + + ret = xc_domain_iomem_permission(xen_xc, xen_domid, + (unsigned long)(igd_host_opregion >> XC_PAGE_SHIFT), + XEN_PCI_INTEL_OPREGION_PAGES, + XEN_PCI_INTEL_OPREGION_ENABLE_ACCESSED); + + if (ret) { + XEN_PT_ERR(&s->dev, "[%d]:Can't enable to access IGD host opregion:" + " 0x%lx.\n", ret, + (unsigned long)(igd_host_opregion >> XC_PAGE_SHIFT)), + igd_guest_opregion = 0; + return; + } + + ret = xc_domain_memory_mapping(xen_xc, xen_domid, + (unsigned long)(igd_guest_opregion >> XC_PAGE_SHIFT), + (unsigned long)(igd_host_opregion >> XC_PAGE_SHIFT), + XEN_PCI_INTEL_OPREGION_PAGES, + DPCI_ADD_MAPPING); + + if (ret) { + XEN_PT_ERR(&s->dev, "[%d]:Can't map IGD host opregion:0x%lx to" + " guest opregion:0x%lx.\n", ret, + (unsigned long)(igd_host_opregion >> XC_PAGE_SHIFT), + (unsigned long)(igd_guest_opregion >> XC_PAGE_SHIFT)); + igd_guest_opregion = 0; + return; + } + + XEN_PT_LOG(&s->dev, "Map OpRegion: 0x%lx -> 0x%lx\n", + (unsigned long)(igd_host_opregion >> XC_PAGE_SHIFT), + (unsigned long)(igd_guest_opregion >> XC_PAGE_SHIFT)); +} diff --git a/hw/xen/xen_pt_load_rom.c b/hw/xen/xen_pt_load_rom.c new file mode 100644 index 000000000..03422a8a7 --- /dev/null +++ b/hw/xen/xen_pt_load_rom.c @@ -0,0 +1,92 @@ +/* + * This is splited from hw/i386/kvm/pci-assign.c + */ +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "hw/loader.h" +#include "hw/pci/pci.h" +#include "xen_pt.h" + +/* + * Scan the assigned devices for the devices that have an option ROM, and then + * load the corresponding ROM data to RAM. If an error occurs while loading an + * option ROM, we just ignore that option ROM and continue with the next one. + */ +void *pci_assign_dev_load_option_rom(PCIDevice *dev, + int *size, unsigned int domain, + unsigned int bus, unsigned int slot, + unsigned int function) +{ + char name[32], rom_file[64]; + FILE *fp; + uint8_t val; + struct stat st; + void *ptr = NULL; + Object *owner = OBJECT(dev); + + /* If loading ROM from file, pci handles it */ + if (dev->romfile || !dev->rom_bar) { + return NULL; + } + + snprintf(rom_file, sizeof(rom_file), + "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/rom", + domain, bus, slot, function); + + /* Write "1" to the ROM file to enable it */ + fp = fopen(rom_file, "r+"); + if (fp == NULL) { + if (errno != ENOENT) { + error_report("pci-assign: Cannot open %s: %s", rom_file, strerror(errno)); + } + return NULL; + } + if (fstat(fileno(fp), &st) == -1) { + error_report("pci-assign: Cannot stat %s: %s", rom_file, strerror(errno)); + goto close_rom; + } + + val = 1; + if (fwrite(&val, 1, 1, fp) != 1) { + goto close_rom; + } + fseek(fp, 0, SEEK_SET); + + if (dev->romsize != -1) { + if (st.st_size > dev->romsize) { + error_report("ROM BAR \"%s\" (%ld bytes) is too large for ROM size %u", + rom_file, (long) st.st_size, dev->romsize); + goto close_rom; + } + } else { + dev->romsize = st.st_size; + } + + snprintf(name, sizeof(name), "%s.rom", object_get_typename(owner)); + memory_region_init_ram(&dev->rom, owner, name, dev->romsize, &error_abort); + ptr = memory_region_get_ram_ptr(&dev->rom); + memset(ptr, 0xff, dev->romsize); + + if (!fread(ptr, 1, st.st_size, fp)) { + error_report("pci-assign: Cannot read from host %s", rom_file); + error_printf("Device option ROM contents are probably invalid " + "(check dmesg).\nSkip option ROM probe with rombar=0, " + "or load from file with romfile=\n"); + goto close_rom; + } + + pci_register_bar(dev, PCI_ROM_SLOT, 0, &dev->rom); + dev->has_rom = true; + *size = st.st_size; +close_rom: + /* Write "0" to disable ROM */ + fseek(fp, 0, SEEK_SET); + val = 0; + if (!fwrite(&val, 1, 1, fp)) { + XEN_PT_WARN(dev, "%s\n", "Failed to disable pci-sysfs rom file"); + } + fclose(fp); + + return ptr; +} diff --git a/hw/xen/xen_pt_msi.c b/hw/xen/xen_pt_msi.c new file mode 100644 index 000000000..b71563f98 --- /dev/null +++ b/hw/xen/xen_pt_msi.c @@ -0,0 +1,650 @@ +/* + * Copyright (c) 2007, Intel Corporation. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Jiang Yunhong <yunhong.jiang@intel.com> + * + * This file implements direct PCI assignment to a HVM guest + */ + +#include "qemu/osdep.h" + +#include "hw/xen/xen-legacy-backend.h" +#include "xen_pt.h" +#include "hw/i386/apic-msidef.h" + + +#define XEN_PT_AUTO_ASSIGN -1 + +/* shift count for gflags */ +#define XEN_PT_GFLAGS_SHIFT_DEST_ID 0 +#define XEN_PT_GFLAGS_SHIFT_RH 8 +#define XEN_PT_GFLAGS_SHIFT_DM 9 +#define XEN_PT_GFLAGSSHIFT_DELIV_MODE 12 +#define XEN_PT_GFLAGSSHIFT_TRG_MODE 15 +#define XEN_PT_GFLAGSSHIFT_UNMASKED 16 + +#define latch(fld) latch[PCI_MSIX_ENTRY_##fld / sizeof(uint32_t)] + +/* + * Helpers + */ + +static inline uint8_t msi_vector(uint32_t data) +{ + return (data & MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT; +} + +static inline uint8_t msi_dest_id(uint32_t addr) +{ + return (addr & MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT; +} + +static inline uint32_t msi_ext_dest_id(uint32_t addr_hi) +{ + return addr_hi & 0xffffff00; +} + +static uint32_t msi_gflags(uint32_t data, uint64_t addr) +{ + uint32_t result = 0; + int rh, dm, dest_id, deliv_mode, trig_mode; + + rh = (addr >> MSI_ADDR_REDIRECTION_SHIFT) & 0x1; + dm = (addr >> MSI_ADDR_DEST_MODE_SHIFT) & 0x1; + dest_id = msi_dest_id(addr); + deliv_mode = (data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x7; + trig_mode = (data >> MSI_DATA_TRIGGER_SHIFT) & 0x1; + + result = dest_id | (rh << XEN_PT_GFLAGS_SHIFT_RH) + | (dm << XEN_PT_GFLAGS_SHIFT_DM) + | (deliv_mode << XEN_PT_GFLAGSSHIFT_DELIV_MODE) + | (trig_mode << XEN_PT_GFLAGSSHIFT_TRG_MODE); + + return result; +} + +static inline uint64_t msi_addr64(XenPTMSI *msi) +{ + return (uint64_t)msi->addr_hi << 32 | msi->addr_lo; +} + +static int msi_msix_enable(XenPCIPassthroughState *s, + uint32_t address, + uint16_t flag, + bool enable) +{ + uint16_t val = 0; + int rc; + + if (!address) { + return -1; + } + + rc = xen_host_pci_get_word(&s->real_device, address, &val); + if (rc) { + XEN_PT_ERR(&s->dev, "Failed to read MSI/MSI-X register (0x%x), rc:%d\n", + address, rc); + return rc; + } + if (enable) { + val |= flag; + } else { + val &= ~flag; + } + rc = xen_host_pci_set_word(&s->real_device, address, val); + if (rc) { + XEN_PT_ERR(&s->dev, "Failed to write MSI/MSI-X register (0x%x), rc:%d\n", + address, rc); + } + return rc; +} + +static int msi_msix_setup(XenPCIPassthroughState *s, + uint64_t addr, + uint32_t data, + int *ppirq, + bool is_msix, + int msix_entry, + bool is_not_mapped) +{ + uint8_t gvec = msi_vector(data); + int rc = 0; + + assert((!is_msix && msix_entry == 0) || is_msix); + + if (xen_is_pirq_msi(data)) { + *ppirq = msi_ext_dest_id(addr >> 32) | msi_dest_id(addr); + if (!*ppirq) { + /* this probably identifies an misconfiguration of the guest, + * try the emulated path */ + *ppirq = XEN_PT_UNASSIGNED_PIRQ; + } else { + XEN_PT_LOG(&s->dev, "requested pirq %d for MSI%s" + " (vec: 0x%x, entry: 0x%x)\n", + *ppirq, is_msix ? "-X" : "", gvec, msix_entry); + } + } + + if (is_not_mapped) { + uint64_t table_base = 0; + + if (is_msix) { + table_base = s->msix->table_base; + } + + rc = xc_physdev_map_pirq_msi(xen_xc, xen_domid, XEN_PT_AUTO_ASSIGN, + ppirq, PCI_DEVFN(s->real_device.dev, + s->real_device.func), + s->real_device.bus, + msix_entry, table_base); + if (rc) { + XEN_PT_ERR(&s->dev, + "Mapping of MSI%s (err: %i, vec: 0x%x, entry 0x%x)\n", + is_msix ? "-X" : "", errno, gvec, msix_entry); + return rc; + } + } + + return 0; +} +static int msi_msix_update(XenPCIPassthroughState *s, + uint64_t addr, + uint32_t data, + int pirq, + bool is_msix, + int msix_entry, + int *old_pirq, + bool masked) +{ + PCIDevice *d = &s->dev; + uint8_t gvec = msi_vector(data); + uint32_t gflags = msi_gflags(data, addr); + int rc = 0; + uint64_t table_addr = 0; + + XEN_PT_LOG(d, "Updating MSI%s with pirq %d gvec 0x%x gflags 0x%x" + " (entry: 0x%x)\n", + is_msix ? "-X" : "", pirq, gvec, gflags, msix_entry); + + if (is_msix) { + table_addr = s->msix->mmio_base_addr; + } + + gflags |= masked ? 0 : (1u << XEN_PT_GFLAGSSHIFT_UNMASKED); + + rc = xc_domain_update_msi_irq(xen_xc, xen_domid, gvec, + pirq, gflags, table_addr); + + if (rc) { + XEN_PT_ERR(d, "Updating of MSI%s failed. (err: %d)\n", + is_msix ? "-X" : "", errno); + + if (xc_physdev_unmap_pirq(xen_xc, xen_domid, *old_pirq)) { + XEN_PT_ERR(d, "Unmapping of MSI%s pirq %d failed. (err: %d)\n", + is_msix ? "-X" : "", *old_pirq, errno); + } + *old_pirq = XEN_PT_UNASSIGNED_PIRQ; + } + return rc; +} + +static int msi_msix_disable(XenPCIPassthroughState *s, + uint64_t addr, + uint32_t data, + int pirq, + bool is_msix, + bool is_binded) +{ + PCIDevice *d = &s->dev; + uint8_t gvec = msi_vector(data); + uint32_t gflags = msi_gflags(data, addr); + int rc = 0; + + if (pirq == XEN_PT_UNASSIGNED_PIRQ) { + return 0; + } + + if (is_binded) { + XEN_PT_LOG(d, "Unbind MSI%s with pirq %d, gvec 0x%x\n", + is_msix ? "-X" : "", pirq, gvec); + rc = xc_domain_unbind_msi_irq(xen_xc, xen_domid, gvec, pirq, gflags); + if (rc) { + XEN_PT_ERR(d, "Unbinding of MSI%s failed. (err: %d, pirq: %d, gvec: 0x%x)\n", + is_msix ? "-X" : "", errno, pirq, gvec); + return rc; + } + } + + XEN_PT_LOG(d, "Unmap MSI%s pirq %d\n", is_msix ? "-X" : "", pirq); + rc = xc_physdev_unmap_pirq(xen_xc, xen_domid, pirq); + if (rc) { + XEN_PT_ERR(d, "Unmapping of MSI%s pirq %d failed. (err: %i)\n", + is_msix ? "-X" : "", pirq, errno); + return rc; + } + + return 0; +} + +/* + * MSI virtualization functions + */ + +static int xen_pt_msi_set_enable(XenPCIPassthroughState *s, bool enable) +{ + XEN_PT_LOG(&s->dev, "%s MSI.\n", enable ? "enabling" : "disabling"); + + if (!s->msi) { + return -1; + } + + return msi_msix_enable(s, s->msi->ctrl_offset, PCI_MSI_FLAGS_ENABLE, + enable); +} + +/* setup physical msi, but don't enable it */ +int xen_pt_msi_setup(XenPCIPassthroughState *s) +{ + int pirq = XEN_PT_UNASSIGNED_PIRQ; + int rc = 0; + XenPTMSI *msi = s->msi; + + if (msi->initialized) { + XEN_PT_ERR(&s->dev, + "Setup physical MSI when it has been properly initialized.\n"); + return -1; + } + + rc = msi_msix_setup(s, msi_addr64(msi), msi->data, &pirq, false, 0, true); + if (rc) { + return rc; + } + + if (pirq < 0) { + XEN_PT_ERR(&s->dev, "Invalid pirq number: %d.\n", pirq); + return -1; + } + + msi->pirq = pirq; + XEN_PT_LOG(&s->dev, "MSI mapped with pirq %d.\n", pirq); + + return 0; +} + +int xen_pt_msi_update(XenPCIPassthroughState *s) +{ + XenPTMSI *msi = s->msi; + + /* Current MSI emulation in QEMU only supports 1 vector */ + return msi_msix_update(s, msi_addr64(msi), msi->data, msi->pirq, + false, 0, &msi->pirq, msi->mask & 1); +} + +void xen_pt_msi_disable(XenPCIPassthroughState *s) +{ + XenPTMSI *msi = s->msi; + + if (!msi) { + return; + } + + (void)xen_pt_msi_set_enable(s, false); + + msi_msix_disable(s, msi_addr64(msi), msi->data, msi->pirq, false, + msi->initialized); + + /* clear msi info */ + msi->flags &= ~PCI_MSI_FLAGS_ENABLE; + msi->initialized = false; + msi->mapped = false; + msi->pirq = XEN_PT_UNASSIGNED_PIRQ; +} + +/* + * MSI-X virtualization functions + */ + +static int msix_set_enable(XenPCIPassthroughState *s, bool enabled) +{ + XEN_PT_LOG(&s->dev, "%s MSI-X.\n", enabled ? "enabling" : "disabling"); + + if (!s->msix) { + return -1; + } + + return msi_msix_enable(s, s->msix->ctrl_offset, PCI_MSIX_FLAGS_ENABLE, + enabled); +} + +static int xen_pt_msix_update_one(XenPCIPassthroughState *s, int entry_nr, + uint32_t vec_ctrl) +{ + XenPTMSIXEntry *entry = NULL; + int pirq; + int rc; + + if (entry_nr < 0 || entry_nr >= s->msix->total_entries) { + return -EINVAL; + } + + entry = &s->msix->msix_entry[entry_nr]; + + if (!entry->updated) { + return 0; + } + + pirq = entry->pirq; + + /* + * Update the entry addr and data to the latest values only when the + * entry is masked or they are all masked, as required by the spec. + * Addr and data changes while the MSI-X entry is unmasked get deferred + * until the next masked -> unmasked transition. + */ + if (pirq == XEN_PT_UNASSIGNED_PIRQ || s->msix->maskall || + (vec_ctrl & PCI_MSIX_ENTRY_CTRL_MASKBIT)) { + entry->addr = entry->latch(LOWER_ADDR) | + ((uint64_t)entry->latch(UPPER_ADDR) << 32); + entry->data = entry->latch(DATA); + } + + rc = msi_msix_setup(s, entry->addr, entry->data, &pirq, true, entry_nr, + entry->pirq == XEN_PT_UNASSIGNED_PIRQ); + if (rc) { + return rc; + } + if (entry->pirq == XEN_PT_UNASSIGNED_PIRQ) { + entry->pirq = pirq; + } + + rc = msi_msix_update(s, entry->addr, entry->data, pirq, true, + entry_nr, &entry->pirq, + vec_ctrl & PCI_MSIX_ENTRY_CTRL_MASKBIT); + + if (!rc) { + entry->updated = false; + } + + return rc; +} + +int xen_pt_msix_update(XenPCIPassthroughState *s) +{ + XenPTMSIX *msix = s->msix; + int i; + + for (i = 0; i < msix->total_entries; i++) { + xen_pt_msix_update_one(s, i, msix->msix_entry[i].latch(VECTOR_CTRL)); + } + + return 0; +} + +void xen_pt_msix_disable(XenPCIPassthroughState *s) +{ + int i = 0; + + msix_set_enable(s, false); + + for (i = 0; i < s->msix->total_entries; i++) { + XenPTMSIXEntry *entry = &s->msix->msix_entry[i]; + + msi_msix_disable(s, entry->addr, entry->data, entry->pirq, true, true); + + /* clear MSI-X info */ + entry->pirq = XEN_PT_UNASSIGNED_PIRQ; + entry->updated = false; + } +} + +int xen_pt_msix_update_remap(XenPCIPassthroughState *s, int bar_index) +{ + XenPTMSIXEntry *entry; + int i, ret; + + if (!(s->msix && s->msix->bar_index == bar_index)) { + return 0; + } + + for (i = 0; i < s->msix->total_entries; i++) { + entry = &s->msix->msix_entry[i]; + if (entry->pirq != XEN_PT_UNASSIGNED_PIRQ) { + ret = xc_domain_unbind_pt_irq(xen_xc, xen_domid, entry->pirq, + PT_IRQ_TYPE_MSI, 0, 0, 0, 0); + if (ret) { + XEN_PT_ERR(&s->dev, "unbind MSI-X entry %d failed (err: %d)\n", + entry->pirq, errno); + } + entry->updated = true; + } + } + return xen_pt_msix_update(s); +} + +static uint32_t get_entry_value(XenPTMSIXEntry *e, int offset) +{ + assert(!(offset % sizeof(*e->latch))); + return e->latch[offset / sizeof(*e->latch)]; +} + +static void set_entry_value(XenPTMSIXEntry *e, int offset, uint32_t val) +{ + assert(!(offset % sizeof(*e->latch))); + e->latch[offset / sizeof(*e->latch)] = val; +} + +static void pci_msix_write(void *opaque, hwaddr addr, + uint64_t val, unsigned size) +{ + XenPCIPassthroughState *s = opaque; + XenPTMSIX *msix = s->msix; + XenPTMSIXEntry *entry; + unsigned int entry_nr, offset; + + entry_nr = addr / PCI_MSIX_ENTRY_SIZE; + if (entry_nr >= msix->total_entries) { + return; + } + entry = &msix->msix_entry[entry_nr]; + offset = addr % PCI_MSIX_ENTRY_SIZE; + + if (offset != PCI_MSIX_ENTRY_VECTOR_CTRL) { + if (get_entry_value(entry, offset) == val + && entry->pirq != XEN_PT_UNASSIGNED_PIRQ) { + return; + } + + entry->updated = true; + } else if (msix->enabled && entry->updated && + !(val & PCI_MSIX_ENTRY_CTRL_MASKBIT)) { + const volatile uint32_t *vec_ctrl; + + /* + * If Xen intercepts the mask bit access, entry->vec_ctrl may not be + * up-to-date. Read from hardware directly. + */ + vec_ctrl = s->msix->phys_iomem_base + entry_nr * PCI_MSIX_ENTRY_SIZE + + PCI_MSIX_ENTRY_VECTOR_CTRL; + xen_pt_msix_update_one(s, entry_nr, *vec_ctrl); + } + + set_entry_value(entry, offset, val); +} + +static uint64_t pci_msix_read(void *opaque, hwaddr addr, + unsigned size) +{ + XenPCIPassthroughState *s = opaque; + XenPTMSIX *msix = s->msix; + int entry_nr, offset; + + entry_nr = addr / PCI_MSIX_ENTRY_SIZE; + if (entry_nr < 0) { + XEN_PT_ERR(&s->dev, "asked MSI-X entry '%i' invalid!\n", entry_nr); + return 0; + } + + offset = addr % PCI_MSIX_ENTRY_SIZE; + + if (addr < msix->total_entries * PCI_MSIX_ENTRY_SIZE) { + return get_entry_value(&msix->msix_entry[entry_nr], offset); + } else { + /* Pending Bit Array (PBA) */ + return *(uint32_t *)(msix->phys_iomem_base + addr); + } +} + +static bool pci_msix_accepts(void *opaque, hwaddr addr, + unsigned size, bool is_write, + MemTxAttrs attrs) +{ + return !(addr & (size - 1)); +} + +static const MemoryRegionOps pci_msix_ops = { + .read = pci_msix_read, + .write = pci_msix_write, + .endianness = DEVICE_NATIVE_ENDIAN, + .valid = { + .min_access_size = 4, + .max_access_size = 4, + .unaligned = false, + .accepts = pci_msix_accepts + }, + .impl = { + .min_access_size = 4, + .max_access_size = 4, + .unaligned = false + } +}; + +int xen_pt_msix_init(XenPCIPassthroughState *s, uint32_t base) +{ + uint8_t id = 0; + uint16_t control = 0; + uint32_t table_off = 0; + int i, total_entries, bar_index; + XenHostPCIDevice *hd = &s->real_device; + PCIDevice *d = &s->dev; + int fd = -1; + XenPTMSIX *msix = NULL; + int rc = 0; + + rc = xen_host_pci_get_byte(hd, base + PCI_CAP_LIST_ID, &id); + if (rc) { + return rc; + } + + if (id != PCI_CAP_ID_MSIX) { + XEN_PT_ERR(d, "Invalid id 0x%x base 0x%x\n", id, base); + return -1; + } + + rc = xen_host_pci_get_word(hd, base + PCI_MSIX_FLAGS, &control); + if (rc) { + XEN_PT_ERR(d, "Failed to read PCI_MSIX_FLAGS field\n"); + return rc; + } + total_entries = control & PCI_MSIX_FLAGS_QSIZE; + total_entries += 1; + + s->msix = g_malloc0(sizeof (XenPTMSIX) + + total_entries * sizeof (XenPTMSIXEntry)); + msix = s->msix; + + msix->total_entries = total_entries; + for (i = 0; i < total_entries; i++) { + msix->msix_entry[i].pirq = XEN_PT_UNASSIGNED_PIRQ; + } + + memory_region_init_io(&msix->mmio, OBJECT(s), &pci_msix_ops, + s, "xen-pci-pt-msix", + (total_entries * PCI_MSIX_ENTRY_SIZE + + XC_PAGE_SIZE - 1) + & XC_PAGE_MASK); + + rc = xen_host_pci_get_long(hd, base + PCI_MSIX_TABLE, &table_off); + if (rc) { + XEN_PT_ERR(d, "Failed to read PCI_MSIX_TABLE field\n"); + goto error_out; + } + bar_index = msix->bar_index = table_off & PCI_MSIX_FLAGS_BIRMASK; + table_off = table_off & ~PCI_MSIX_FLAGS_BIRMASK; + msix->table_base = s->real_device.io_regions[bar_index].base_addr; + XEN_PT_LOG(d, "get MSI-X table BAR base 0x%"PRIx64"\n", msix->table_base); + + fd = open("/dev/mem", O_RDWR); + if (fd == -1) { + rc = -errno; + XEN_PT_ERR(d, "Can't open /dev/mem: %s\n", strerror(errno)); + goto error_out; + } + XEN_PT_LOG(d, "table_off = 0x%x, total_entries = %d\n", + table_off, total_entries); + msix->table_offset_adjust = table_off & 0x0fff; + msix->phys_iomem_base = + mmap(NULL, + total_entries * PCI_MSIX_ENTRY_SIZE + msix->table_offset_adjust, + PROT_READ, + MAP_SHARED | MAP_LOCKED, + fd, + msix->table_base + table_off - msix->table_offset_adjust); + close(fd); + if (msix->phys_iomem_base == MAP_FAILED) { + rc = -errno; + XEN_PT_ERR(d, "Can't map physical MSI-X table: %s\n", strerror(errno)); + goto error_out; + } + msix->phys_iomem_base = (char *)msix->phys_iomem_base + + msix->table_offset_adjust; + + XEN_PT_LOG(d, "mapping physical MSI-X table to %p\n", + msix->phys_iomem_base); + + memory_region_add_subregion_overlap(&s->bar[bar_index], table_off, + &msix->mmio, + 2); /* Priority: pci default + 1 */ + + return 0; + +error_out: + g_free(s->msix); + s->msix = NULL; + return rc; +} + +void xen_pt_msix_unmap(XenPCIPassthroughState *s) +{ + XenPTMSIX *msix = s->msix; + + if (!msix) { + return; + } + + /* unmap the MSI-X memory mapped register area */ + if (msix->phys_iomem_base) { + XEN_PT_LOG(&s->dev, "unmapping physical MSI-X table from %p\n", + msix->phys_iomem_base); + munmap(msix->phys_iomem_base, msix->total_entries * PCI_MSIX_ENTRY_SIZE + + msix->table_offset_adjust); + } + + memory_region_del_subregion(&s->bar[msix->bar_index], &msix->mmio); +} + +void xen_pt_msix_delete(XenPCIPassthroughState *s) +{ + XenPTMSIX *msix = s->msix; + + if (!msix) { + return; + } + + object_unparent(OBJECT(&msix->mmio)); + + g_free(s->msix); + s->msix = NULL; +} diff --git a/hw/xen/xen_pt_stub.c b/hw/xen/xen_pt_stub.c new file mode 100644 index 000000000..2d8cac8d5 --- /dev/null +++ b/hw/xen/xen_pt_stub.c @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2020 Citrix Systems UK Ltd. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "hw/xen/xen_pt.h" +#include "qapi/error.h" + +bool xen_igd_gfx_pt_enabled(void) +{ + return false; +} + +void xen_igd_gfx_pt_set(bool value, Error **errp) +{ + if (value) { + error_setg(errp, "Xen PCI passthrough support not built in"); + } +} diff --git a/hw/xen/xen_pvdev.c b/hw/xen/xen_pvdev.c new file mode 100644 index 000000000..8ab458922 --- /dev/null +++ b/hw/xen/xen_pvdev.c @@ -0,0 +1,319 @@ +/* + * Xen para-virtualization device + * + * (c) 2008 Gerd Hoffmann <kraxel@redhat.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/> + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "qemu/main-loop.h" +#include "hw/qdev-core.h" +#include "hw/xen/xen-legacy-backend.h" +#include "hw/xen/xen_pvdev.h" + +/* private */ +static int debug; + +struct xs_dirs { + char *xs_dir; + QTAILQ_ENTRY(xs_dirs) list; +}; + +static QTAILQ_HEAD(, xs_dirs) xs_cleanup = + QTAILQ_HEAD_INITIALIZER(xs_cleanup); + +static QTAILQ_HEAD(, XenLegacyDevice) xendevs = + QTAILQ_HEAD_INITIALIZER(xendevs); + +/* ------------------------------------------------------------- */ + +static void xenstore_cleanup_dir(char *dir) +{ + struct xs_dirs *d; + + d = g_malloc(sizeof(*d)); + d->xs_dir = dir; + QTAILQ_INSERT_TAIL(&xs_cleanup, d, list); +} + +void xen_config_cleanup(void) +{ + struct xs_dirs *d; + + QTAILQ_FOREACH(d, &xs_cleanup, list) { + xs_rm(xenstore, 0, d->xs_dir); + } +} + +int xenstore_mkdir(char *path, int p) +{ + struct xs_permissions perms[2] = { + { + .id = 0, /* set owner: dom0 */ + }, { + .id = xen_domid, + .perms = p, + } + }; + + if (!xs_mkdir(xenstore, 0, path)) { + xen_pv_printf(NULL, 0, "xs_mkdir %s: failed\n", path); + return -1; + } + xenstore_cleanup_dir(g_strdup(path)); + + if (!xs_set_permissions(xenstore, 0, path, perms, 2)) { + xen_pv_printf(NULL, 0, "xs_set_permissions %s: failed\n", path); + return -1; + } + return 0; +} + +int xenstore_write_str(const char *base, const char *node, const char *val) +{ + char abspath[XEN_BUFSIZE]; + + snprintf(abspath, sizeof(abspath), "%s/%s", base, node); + if (!xs_write(xenstore, 0, abspath, val, strlen(val))) { + return -1; + } + return 0; +} + +char *xenstore_read_str(const char *base, const char *node) +{ + char abspath[XEN_BUFSIZE]; + unsigned int len; + char *str, *ret = NULL; + + snprintf(abspath, sizeof(abspath), "%s/%s", base, node); + str = xs_read(xenstore, 0, abspath, &len); + if (str != NULL) { + /* move to qemu-allocated memory to make sure + * callers can savely g_free() stuff. */ + ret = g_strdup(str); + free(str); + } + return ret; +} + +int xenstore_write_int(const char *base, const char *node, int ival) +{ + char val[12]; + + snprintf(val, sizeof(val), "%d", ival); + return xenstore_write_str(base, node, val); +} + +int xenstore_write_int64(const char *base, const char *node, int64_t ival) +{ + char val[21]; + + snprintf(val, sizeof(val), "%"PRId64, ival); + return xenstore_write_str(base, node, val); +} + +int xenstore_read_int(const char *base, const char *node, int *ival) +{ + char *val; + int rc = -1; + + val = xenstore_read_str(base, node); + if (val && 1 == sscanf(val, "%d", ival)) { + rc = 0; + } + g_free(val); + return rc; +} + +int xenstore_read_uint64(const char *base, const char *node, uint64_t *uval) +{ + char *val; + int rc = -1; + + val = xenstore_read_str(base, node); + if (val && 1 == sscanf(val, "%"SCNu64, uval)) { + rc = 0; + } + g_free(val); + return rc; +} + +void xenstore_update(void *unused) +{ + char **vec = NULL; + intptr_t type, ops, ptr; + unsigned int dom, count; + + vec = xs_read_watch(xenstore, &count); + if (vec == NULL) { + goto cleanup; + } + + if (sscanf(vec[XS_WATCH_TOKEN], "be:%" PRIxPTR ":%d:%" PRIxPTR, + &type, &dom, &ops) == 3) { + xenstore_update_be(vec[XS_WATCH_PATH], (void *)type, dom, (void*)ops); + } + if (sscanf(vec[XS_WATCH_TOKEN], "fe:%" PRIxPTR, &ptr) == 1) { + xenstore_update_fe(vec[XS_WATCH_PATH], (void *)ptr); + } + +cleanup: + free(vec); +} + +const char *xenbus_strstate(enum xenbus_state state) +{ + static const char *const name[] = { + [XenbusStateUnknown] = "Unknown", + [XenbusStateInitialising] = "Initialising", + [XenbusStateInitWait] = "InitWait", + [XenbusStateInitialised] = "Initialised", + [XenbusStateConnected] = "Connected", + [XenbusStateClosing] = "Closing", + [XenbusStateClosed] = "Closed", + }; + return (state < ARRAY_SIZE(name)) ? name[state] : "INVALID"; +} + +/* + * msg_level: + * 0 == errors (stderr + logfile). + * 1 == informative debug messages (logfile only). + * 2 == noisy debug messages (logfile only). + * 3 == will flood your log (logfile only). + */ +void xen_pv_printf(struct XenLegacyDevice *xendev, int msg_level, + const char *fmt, ...) +{ + va_list args; + + if (xendev) { + if (msg_level > xendev->debug) { + return; + } + qemu_log("xen be: %s: ", xendev->name); + if (msg_level == 0) { + fprintf(stderr, "xen be: %s: ", xendev->name); + } + } else { + if (msg_level > debug) { + return; + } + qemu_log("xen be core: "); + if (msg_level == 0) { + fprintf(stderr, "xen be core: "); + } + } + va_start(args, fmt); + qemu_log_vprintf(fmt, args); + va_end(args); + if (msg_level == 0) { + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); + } + qemu_log_flush(); +} + +void xen_pv_evtchn_event(void *opaque) +{ + struct XenLegacyDevice *xendev = opaque; + evtchn_port_t port; + + port = xenevtchn_pending(xendev->evtchndev); + if (port != xendev->local_port) { + xen_pv_printf(xendev, 0, + "xenevtchn_pending returned %d (expected %d)\n", + port, xendev->local_port); + return; + } + xenevtchn_unmask(xendev->evtchndev, port); + + if (xendev->ops->event) { + xendev->ops->event(xendev); + } +} + +void xen_pv_unbind_evtchn(struct XenLegacyDevice *xendev) +{ + if (xendev->local_port == -1) { + return; + } + qemu_set_fd_handler(xenevtchn_fd(xendev->evtchndev), NULL, NULL, NULL); + xenevtchn_unbind(xendev->evtchndev, xendev->local_port); + xen_pv_printf(xendev, 2, "unbind evtchn port %d\n", xendev->local_port); + xendev->local_port = -1; +} + +int xen_pv_send_notify(struct XenLegacyDevice *xendev) +{ + return xenevtchn_notify(xendev->evtchndev, xendev->local_port); +} + +/* ------------------------------------------------------------- */ + +struct XenLegacyDevice *xen_pv_find_xendev(const char *type, int dom, int dev) +{ + struct XenLegacyDevice *xendev; + + QTAILQ_FOREACH(xendev, &xendevs, next) { + if (xendev->dom != dom) { + continue; + } + if (xendev->dev != dev) { + continue; + } + if (strcmp(xendev->type, type) != 0) { + continue; + } + return xendev; + } + return NULL; +} + +/* + * release xen backend device. + */ +void xen_pv_del_xendev(struct XenLegacyDevice *xendev) +{ + if (xendev->ops->free) { + xendev->ops->free(xendev); + } + + if (xendev->fe) { + char token[XEN_BUFSIZE]; + snprintf(token, sizeof(token), "fe:%p", xendev); + xs_unwatch(xenstore, xendev->fe, token); + g_free(xendev->fe); + } + + if (xendev->evtchndev != NULL) { + xenevtchn_close(xendev->evtchndev); + } + if (xendev->gnttabdev != NULL) { + xengnttab_close(xendev->gnttabdev); + } + + QTAILQ_REMOVE(&xendevs, xendev, next); + + qdev_unplug(&xendev->qdev, NULL); +} + +void xen_pv_insert_xendev(struct XenLegacyDevice *xendev) +{ + QTAILQ_INSERT_TAIL(&xendevs, xendev, next); +} |