aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/plugins
diff options
context:
space:
mode:
authorTimos Ampelikiotis <t.ampelikiotis@virtualopensystems.com>2023-10-10 11:40:56 +0000
committerTimos Ampelikiotis <t.ampelikiotis@virtualopensystems.com>2023-10-10 11:40:56 +0000
commite02cda008591317b1625707ff8e115a4841aa889 (patch)
treeaee302e3cf8b59ec2d32ec481be3d1afddfc8968 /contrib/plugins
parentcc668e6b7e0ffd8c9d130513d12053cf5eda1d3b (diff)
Introduce Virtio-loopback epsilon release:
Epsilon release introduces a new compatibility layer which make virtio-loopback design to work with QEMU and rust-vmm vhost-user backend without require any changes. Signed-off-by: Timos Ampelikiotis <t.ampelikiotis@virtualopensystems.com> Change-Id: I52e57563e08a7d0bdc002f8e928ee61ba0c53dd9
Diffstat (limited to 'contrib/plugins')
-rw-r--r--contrib/plugins/Makefile45
-rw-r--r--contrib/plugins/cache.c860
-rw-r--r--contrib/plugins/execlog.c153
-rw-r--r--contrib/plugins/hotblocks.c155
-rw-r--r--contrib/plugins/hotpages.c203
-rw-r--r--contrib/plugins/howvec.c372
-rw-r--r--contrib/plugins/hwprofile.c320
-rw-r--r--contrib/plugins/lockstep.c356
8 files changed, 2464 insertions, 0 deletions
diff --git a/contrib/plugins/Makefile b/contrib/plugins/Makefile
new file mode 100644
index 000000000..54ac5ccd9
--- /dev/null
+++ b/contrib/plugins/Makefile
@@ -0,0 +1,45 @@
+# -*- Mode: makefile -*-
+#
+# This Makefile example is fairly independent from the main makefile
+# so users can take and adapt it for their build. We only really
+# include config-host.mak so we don't have to repeat probing for
+# cflags that the main configure has already done for us.
+#
+
+BUILD_DIR := $(CURDIR)/../..
+
+include $(BUILD_DIR)/config-host.mak
+
+VPATH += $(SRC_PATH)/contrib/plugins
+
+NAMES :=
+NAMES += execlog
+NAMES += hotblocks
+NAMES += hotpages
+NAMES += howvec
+NAMES += lockstep
+NAMES += hwprofile
+NAMES += cache
+
+SONAMES := $(addsuffix .so,$(addprefix lib,$(NAMES)))
+
+# The main QEMU uses Glib extensively so it's perfectly fine to use it
+# in plugins (which many example do).
+CFLAGS = $(GLIB_CFLAGS)
+CFLAGS += -fPIC -Wall $(filter -W%, $(QEMU_CFLAGS))
+CFLAGS += $(if $(findstring no-psabi,$(QEMU_CFLAGS)),-Wpsabi)
+CFLAGS += -I$(SRC_PATH)/include/qemu
+
+all: $(SONAMES)
+
+%.o: %.c
+ $(CC) $(CFLAGS) -c -o $@ $<
+
+lib%.so: %.o
+ $(CC) -shared -Wl,-soname,$@ -o $@ $^ $(LDLIBS)
+
+clean:
+ rm -f *.o *.so *.d
+ rm -Rf .libs
+
+.PHONY: all clean
diff --git a/contrib/plugins/cache.c b/contrib/plugins/cache.c
new file mode 100644
index 000000000..b9226e7c4
--- /dev/null
+++ b/contrib/plugins/cache.c
@@ -0,0 +1,860 @@
+/*
+ * Copyright (C) 2021, Mahmoud Mandour <ma.mandourr@gmail.com>
+ *
+ * License: GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <glib.h>
+
+#include <qemu-plugin.h>
+
+#define STRTOLL(x) g_ascii_strtoll(x, NULL, 10)
+
+QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
+
+static enum qemu_plugin_mem_rw rw = QEMU_PLUGIN_MEM_RW;
+
+static GHashTable *miss_ht;
+
+static GMutex hashtable_lock;
+static GRand *rng;
+
+static int limit;
+static bool sys;
+
+enum EvictionPolicy {
+ LRU,
+ FIFO,
+ RAND,
+};
+
+enum EvictionPolicy policy;
+
+/*
+ * A CacheSet is a set of cache blocks. A memory block that maps to a set can be
+ * put in any of the blocks inside the set. The number of block per set is
+ * called the associativity (assoc).
+ *
+ * Each block contains the the stored tag and a valid bit. Since this is not
+ * a functional simulator, the data itself is not stored. We only identify
+ * whether a block is in the cache or not by searching for its tag.
+ *
+ * In order to search for memory data in the cache, the set identifier and tag
+ * are extracted from the address and the set is probed to see whether a tag
+ * match occur.
+ *
+ * An address is logically divided into three portions: The block offset,
+ * the set number, and the tag.
+ *
+ * The set number is used to identify the set in which the block may exist.
+ * The tag is compared against all the tags of a set to search for a match. If a
+ * match is found, then the access is a hit.
+ *
+ * The CacheSet also contains bookkeaping information about eviction details.
+ */
+
+typedef struct {
+ uint64_t tag;
+ bool valid;
+} CacheBlock;
+
+typedef struct {
+ CacheBlock *blocks;
+ uint64_t *lru_priorities;
+ uint64_t lru_gen_counter;
+ GQueue *fifo_queue;
+} CacheSet;
+
+typedef struct {
+ CacheSet *sets;
+ int num_sets;
+ int cachesize;
+ int assoc;
+ int blksize_shift;
+ uint64_t set_mask;
+ uint64_t tag_mask;
+ uint64_t accesses;
+ uint64_t misses;
+} Cache;
+
+typedef struct {
+ char *disas_str;
+ const char *symbol;
+ uint64_t addr;
+ uint64_t l1_dmisses;
+ uint64_t l1_imisses;
+ uint64_t l2_misses;
+} InsnData;
+
+void (*update_hit)(Cache *cache, int set, int blk);
+void (*update_miss)(Cache *cache, int set, int blk);
+
+void (*metadata_init)(Cache *cache);
+void (*metadata_destroy)(Cache *cache);
+
+static int cores;
+static Cache **l1_dcaches, **l1_icaches;
+
+static bool use_l2;
+static Cache **l2_ucaches;
+
+static GMutex *l1_dcache_locks;
+static GMutex *l1_icache_locks;
+static GMutex *l2_ucache_locks;
+
+static uint64_t l1_dmem_accesses;
+static uint64_t l1_imem_accesses;
+static uint64_t l1_imisses;
+static uint64_t l1_dmisses;
+
+static uint64_t l2_mem_accesses;
+static uint64_t l2_misses;
+
+static int pow_of_two(int num)
+{
+ g_assert((num & (num - 1)) == 0);
+ int ret = 0;
+ while (num /= 2) {
+ ret++;
+ }
+ return ret;
+}
+
+/*
+ * LRU evection policy: For each set, a generation counter is maintained
+ * alongside a priority array.
+ *
+ * On each set access, the generation counter is incremented.
+ *
+ * On a cache hit: The hit-block is assigned the current generation counter,
+ * indicating that it is the most recently used block.
+ *
+ * On a cache miss: The block with the least priority is searched and replaced
+ * with the newly-cached block, of which the priority is set to the current
+ * generation number.
+ */
+
+static void lru_priorities_init(Cache *cache)
+{
+ int i;
+
+ for (i = 0; i < cache->num_sets; i++) {
+ cache->sets[i].lru_priorities = g_new0(uint64_t, cache->assoc);
+ cache->sets[i].lru_gen_counter = 0;
+ }
+}
+
+static void lru_update_blk(Cache *cache, int set_idx, int blk_idx)
+{
+ CacheSet *set = &cache->sets[set_idx];
+ set->lru_priorities[blk_idx] = cache->sets[set_idx].lru_gen_counter;
+ set->lru_gen_counter++;
+}
+
+static int lru_get_lru_block(Cache *cache, int set_idx)
+{
+ int i, min_idx, min_priority;
+
+ min_priority = cache->sets[set_idx].lru_priorities[0];
+ min_idx = 0;
+
+ for (i = 1; i < cache->assoc; i++) {
+ if (cache->sets[set_idx].lru_priorities[i] < min_priority) {
+ min_priority = cache->sets[set_idx].lru_priorities[i];
+ min_idx = i;
+ }
+ }
+ return min_idx;
+}
+
+static void lru_priorities_destroy(Cache *cache)
+{
+ int i;
+
+ for (i = 0; i < cache->num_sets; i++) {
+ g_free(cache->sets[i].lru_priorities);
+ }
+}
+
+/*
+ * FIFO eviction policy: a FIFO queue is maintained for each CacheSet that
+ * stores accesses to the cache.
+ *
+ * On a compulsory miss: The block index is enqueued to the fifo_queue to
+ * indicate that it's the latest cached block.
+ *
+ * On a conflict miss: The first-in block is removed from the cache and the new
+ * block is put in its place and enqueued to the FIFO queue.
+ */
+
+static void fifo_init(Cache *cache)
+{
+ int i;
+
+ for (i = 0; i < cache->num_sets; i++) {
+ cache->sets[i].fifo_queue = g_queue_new();
+ }
+}
+
+static int fifo_get_first_block(Cache *cache, int set)
+{
+ GQueue *q = cache->sets[set].fifo_queue;
+ return GPOINTER_TO_INT(g_queue_pop_tail(q));
+}
+
+static void fifo_update_on_miss(Cache *cache, int set, int blk_idx)
+{
+ GQueue *q = cache->sets[set].fifo_queue;
+ g_queue_push_head(q, GINT_TO_POINTER(blk_idx));
+}
+
+static void fifo_destroy(Cache *cache)
+{
+ int i;
+
+ for (i = 0; i < cache->num_sets; i++) {
+ g_queue_free(cache->sets[i].fifo_queue);
+ }
+}
+
+static inline uint64_t extract_tag(Cache *cache, uint64_t addr)
+{
+ return addr & cache->tag_mask;
+}
+
+static inline uint64_t extract_set(Cache *cache, uint64_t addr)
+{
+ return (addr & cache->set_mask) >> cache->blksize_shift;
+}
+
+static const char *cache_config_error(int blksize, int assoc, int cachesize)
+{
+ if (cachesize % blksize != 0) {
+ return "cache size must be divisible by block size";
+ } else if (cachesize % (blksize * assoc) != 0) {
+ return "cache size must be divisible by set size (assoc * block size)";
+ } else {
+ return NULL;
+ }
+}
+
+static bool bad_cache_params(int blksize, int assoc, int cachesize)
+{
+ return (cachesize % blksize) != 0 || (cachesize % (blksize * assoc) != 0);
+}
+
+static Cache *cache_init(int blksize, int assoc, int cachesize)
+{
+ Cache *cache;
+ int i;
+ uint64_t blk_mask;
+
+ /*
+ * This function shall not be called directly, and hence expects suitable
+ * parameters.
+ */
+ g_assert(!bad_cache_params(blksize, assoc, cachesize));
+
+ cache = g_new(Cache, 1);
+ cache->assoc = assoc;
+ cache->cachesize = cachesize;
+ cache->num_sets = cachesize / (blksize * assoc);
+ cache->sets = g_new(CacheSet, cache->num_sets);
+ cache->blksize_shift = pow_of_two(blksize);
+ cache->accesses = 0;
+ cache->misses = 0;
+
+ for (i = 0; i < cache->num_sets; i++) {
+ cache->sets[i].blocks = g_new0(CacheBlock, assoc);
+ }
+
+ blk_mask = blksize - 1;
+ cache->set_mask = ((cache->num_sets - 1) << cache->blksize_shift);
+ cache->tag_mask = ~(cache->set_mask | blk_mask);
+
+ if (metadata_init) {
+ metadata_init(cache);
+ }
+
+ return cache;
+}
+
+static Cache **caches_init(int blksize, int assoc, int cachesize)
+{
+ Cache **caches;
+ int i;
+
+ if (bad_cache_params(blksize, assoc, cachesize)) {
+ return NULL;
+ }
+
+ caches = g_new(Cache *, cores);
+
+ for (i = 0; i < cores; i++) {
+ caches[i] = cache_init(blksize, assoc, cachesize);
+ }
+
+ return caches;
+}
+
+static int get_invalid_block(Cache *cache, uint64_t set)
+{
+ int i;
+
+ for (i = 0; i < cache->assoc; i++) {
+ if (!cache->sets[set].blocks[i].valid) {
+ return i;
+ }
+ }
+
+ return -1;
+}
+
+static int get_replaced_block(Cache *cache, int set)
+{
+ switch (policy) {
+ case RAND:
+ return g_rand_int_range(rng, 0, cache->assoc);
+ case LRU:
+ return lru_get_lru_block(cache, set);
+ case FIFO:
+ return fifo_get_first_block(cache, set);
+ default:
+ g_assert_not_reached();
+ }
+}
+
+static int in_cache(Cache *cache, uint64_t addr)
+{
+ int i;
+ uint64_t tag, set;
+
+ tag = extract_tag(cache, addr);
+ set = extract_set(cache, addr);
+
+ for (i = 0; i < cache->assoc; i++) {
+ if (cache->sets[set].blocks[i].tag == tag &&
+ cache->sets[set].blocks[i].valid) {
+ return i;
+ }
+ }
+
+ return -1;
+}
+
+/**
+ * access_cache(): Simulate a cache access
+ * @cache: The cache under simulation
+ * @addr: The address of the requested memory location
+ *
+ * Returns true if the requsted data is hit in the cache and false when missed.
+ * The cache is updated on miss for the next access.
+ */
+static bool access_cache(Cache *cache, uint64_t addr)
+{
+ int hit_blk, replaced_blk;
+ uint64_t tag, set;
+
+ tag = extract_tag(cache, addr);
+ set = extract_set(cache, addr);
+
+ hit_blk = in_cache(cache, addr);
+ if (hit_blk != -1) {
+ if (update_hit) {
+ update_hit(cache, set, hit_blk);
+ }
+ return true;
+ }
+
+ replaced_blk = get_invalid_block(cache, set);
+
+ if (replaced_blk == -1) {
+ replaced_blk = get_replaced_block(cache, set);
+ }
+
+ if (update_miss) {
+ update_miss(cache, set, replaced_blk);
+ }
+
+ cache->sets[set].blocks[replaced_blk].tag = tag;
+ cache->sets[set].blocks[replaced_blk].valid = true;
+
+ return false;
+}
+
+static void vcpu_mem_access(unsigned int vcpu_index, qemu_plugin_meminfo_t info,
+ uint64_t vaddr, void *userdata)
+{
+ uint64_t effective_addr;
+ struct qemu_plugin_hwaddr *hwaddr;
+ int cache_idx;
+ InsnData *insn;
+ bool hit_in_l1;
+
+ hwaddr = qemu_plugin_get_hwaddr(info, vaddr);
+ if (hwaddr && qemu_plugin_hwaddr_is_io(hwaddr)) {
+ return;
+ }
+
+ effective_addr = hwaddr ? qemu_plugin_hwaddr_phys_addr(hwaddr) : vaddr;
+ cache_idx = vcpu_index % cores;
+
+ g_mutex_lock(&l1_dcache_locks[cache_idx]);
+ hit_in_l1 = access_cache(l1_dcaches[cache_idx], effective_addr);
+ if (!hit_in_l1) {
+ insn = (InsnData *) userdata;
+ __atomic_fetch_add(&insn->l1_dmisses, 1, __ATOMIC_SEQ_CST);
+ l1_dcaches[cache_idx]->misses++;
+ }
+ l1_dcaches[cache_idx]->accesses++;
+ g_mutex_unlock(&l1_dcache_locks[cache_idx]);
+
+ if (hit_in_l1 || !use_l2) {
+ /* No need to access L2 */
+ return;
+ }
+
+ g_mutex_lock(&l2_ucache_locks[cache_idx]);
+ if (!access_cache(l2_ucaches[cache_idx], effective_addr)) {
+ insn = (InsnData *) userdata;
+ __atomic_fetch_add(&insn->l2_misses, 1, __ATOMIC_SEQ_CST);
+ l2_ucaches[cache_idx]->misses++;
+ }
+ l2_ucaches[cache_idx]->accesses++;
+ g_mutex_unlock(&l2_ucache_locks[cache_idx]);
+}
+
+static void vcpu_insn_exec(unsigned int vcpu_index, void *userdata)
+{
+ uint64_t insn_addr;
+ InsnData *insn;
+ int cache_idx;
+ bool hit_in_l1;
+
+ insn_addr = ((InsnData *) userdata)->addr;
+
+ cache_idx = vcpu_index % cores;
+ g_mutex_lock(&l1_icache_locks[cache_idx]);
+ hit_in_l1 = access_cache(l1_icaches[cache_idx], insn_addr);
+ if (!hit_in_l1) {
+ insn = (InsnData *) userdata;
+ __atomic_fetch_add(&insn->l1_imisses, 1, __ATOMIC_SEQ_CST);
+ l1_icaches[cache_idx]->misses++;
+ }
+ l1_icaches[cache_idx]->accesses++;
+ g_mutex_unlock(&l1_icache_locks[cache_idx]);
+
+ if (hit_in_l1 || !use_l2) {
+ /* No need to access L2 */
+ return;
+ }
+
+ g_mutex_lock(&l2_ucache_locks[cache_idx]);
+ if (!access_cache(l2_ucaches[cache_idx], insn_addr)) {
+ insn = (InsnData *) userdata;
+ __atomic_fetch_add(&insn->l2_misses, 1, __ATOMIC_SEQ_CST);
+ l2_ucaches[cache_idx]->misses++;
+ }
+ l2_ucaches[cache_idx]->accesses++;
+ g_mutex_unlock(&l2_ucache_locks[cache_idx]);
+}
+
+static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
+{
+ size_t n_insns;
+ size_t i;
+ InsnData *data;
+
+ n_insns = qemu_plugin_tb_n_insns(tb);
+ for (i = 0; i < n_insns; i++) {
+ struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i);
+ uint64_t effective_addr;
+
+ if (sys) {
+ effective_addr = (uint64_t) qemu_plugin_insn_haddr(insn);
+ } else {
+ effective_addr = (uint64_t) qemu_plugin_insn_vaddr(insn);
+ }
+
+ /*
+ * Instructions might get translated multiple times, we do not create
+ * new entries for those instructions. Instead, we fetch the same
+ * entry from the hash table and register it for the callback again.
+ */
+ g_mutex_lock(&hashtable_lock);
+ data = g_hash_table_lookup(miss_ht, GUINT_TO_POINTER(effective_addr));
+ if (data == NULL) {
+ data = g_new0(InsnData, 1);
+ data->disas_str = qemu_plugin_insn_disas(insn);
+ data->symbol = qemu_plugin_insn_symbol(insn);
+ data->addr = effective_addr;
+ g_hash_table_insert(miss_ht, GUINT_TO_POINTER(effective_addr),
+ (gpointer) data);
+ }
+ g_mutex_unlock(&hashtable_lock);
+
+ qemu_plugin_register_vcpu_mem_cb(insn, vcpu_mem_access,
+ QEMU_PLUGIN_CB_NO_REGS,
+ rw, data);
+
+ qemu_plugin_register_vcpu_insn_exec_cb(insn, vcpu_insn_exec,
+ QEMU_PLUGIN_CB_NO_REGS, data);
+ }
+}
+
+static void insn_free(gpointer data)
+{
+ InsnData *insn = (InsnData *) data;
+ g_free(insn->disas_str);
+ g_free(insn);
+}
+
+static void cache_free(Cache *cache)
+{
+ for (int i = 0; i < cache->num_sets; i++) {
+ g_free(cache->sets[i].blocks);
+ }
+
+ if (metadata_destroy) {
+ metadata_destroy(cache);
+ }
+
+ g_free(cache->sets);
+ g_free(cache);
+}
+
+static void caches_free(Cache **caches)
+{
+ int i;
+
+ for (i = 0; i < cores; i++) {
+ cache_free(caches[i]);
+ }
+}
+
+static void append_stats_line(GString *line, uint64_t l1_daccess,
+ uint64_t l1_dmisses, uint64_t l1_iaccess,
+ uint64_t l1_imisses, uint64_t l2_access,
+ uint64_t l2_misses)
+{
+ double l1_dmiss_rate, l1_imiss_rate, l2_miss_rate;
+
+ l1_dmiss_rate = ((double) l1_dmisses) / (l1_daccess) * 100.0;
+ l1_imiss_rate = ((double) l1_imisses) / (l1_iaccess) * 100.0;
+
+ g_string_append_printf(line, "%-14lu %-12lu %9.4lf%% %-14lu %-12lu"
+ " %9.4lf%%",
+ l1_daccess,
+ l1_dmisses,
+ l1_daccess ? l1_dmiss_rate : 0.0,
+ l1_iaccess,
+ l1_imisses,
+ l1_iaccess ? l1_imiss_rate : 0.0);
+
+ if (use_l2) {
+ l2_miss_rate = ((double) l2_misses) / (l2_access) * 100.0;
+ g_string_append_printf(line, " %-12lu %-11lu %10.4lf%%",
+ l2_access,
+ l2_misses,
+ l2_access ? l2_miss_rate : 0.0);
+ }
+
+ g_string_append(line, "\n");
+}
+
+static void sum_stats(void)
+{
+ int i;
+
+ g_assert(cores > 1);
+ for (i = 0; i < cores; i++) {
+ l1_imisses += l1_icaches[i]->misses;
+ l1_dmisses += l1_dcaches[i]->misses;
+ l1_imem_accesses += l1_icaches[i]->accesses;
+ l1_dmem_accesses += l1_dcaches[i]->accesses;
+
+ if (use_l2) {
+ l2_misses += l2_ucaches[i]->misses;
+ l2_mem_accesses += l2_ucaches[i]->accesses;
+ }
+ }
+}
+
+static int dcmp(gconstpointer a, gconstpointer b)
+{
+ InsnData *insn_a = (InsnData *) a;
+ InsnData *insn_b = (InsnData *) b;
+
+ return insn_a->l1_dmisses < insn_b->l1_dmisses ? 1 : -1;
+}
+
+static int icmp(gconstpointer a, gconstpointer b)
+{
+ InsnData *insn_a = (InsnData *) a;
+ InsnData *insn_b = (InsnData *) b;
+
+ return insn_a->l1_imisses < insn_b->l1_imisses ? 1 : -1;
+}
+
+static int l2_cmp(gconstpointer a, gconstpointer b)
+{
+ InsnData *insn_a = (InsnData *) a;
+ InsnData *insn_b = (InsnData *) b;
+
+ return insn_a->l2_misses < insn_b->l2_misses ? 1 : -1;
+}
+
+static void log_stats(void)
+{
+ int i;
+ Cache *icache, *dcache, *l2_cache;
+
+ g_autoptr(GString) rep = g_string_new("core #, data accesses, data misses,"
+ " dmiss rate, insn accesses,"
+ " insn misses, imiss rate");
+
+ if (use_l2) {
+ g_string_append(rep, ", l2 accesses, l2 misses, l2 miss rate");
+ }
+
+ g_string_append(rep, "\n");
+
+ for (i = 0; i < cores; i++) {
+ g_string_append_printf(rep, "%-8d", i);
+ dcache = l1_dcaches[i];
+ icache = l1_icaches[i];
+ l2_cache = use_l2 ? l2_ucaches[i] : NULL;
+ append_stats_line(rep, dcache->accesses, dcache->misses,
+ icache->accesses, icache->misses,
+ l2_cache ? l2_cache->accesses : 0,
+ l2_cache ? l2_cache->misses : 0);
+ }
+
+ if (cores > 1) {
+ sum_stats();
+ g_string_append_printf(rep, "%-8s", "sum");
+ append_stats_line(rep, l1_dmem_accesses, l1_dmisses,
+ l1_imem_accesses, l1_imisses,
+ l2_cache ? l2_mem_accesses : 0, l2_cache ? l2_misses : 0);
+ }
+
+ g_string_append(rep, "\n");
+ qemu_plugin_outs(rep->str);
+}
+
+static void log_top_insns(void)
+{
+ int i;
+ GList *curr, *miss_insns;
+ InsnData *insn;
+
+ miss_insns = g_hash_table_get_values(miss_ht);
+ miss_insns = g_list_sort(miss_insns, dcmp);
+ g_autoptr(GString) rep = g_string_new("");
+ g_string_append_printf(rep, "%s", "address, data misses, instruction\n");
+
+ for (curr = miss_insns, i = 0; curr && i < limit; i++, curr = curr->next) {
+ insn = (InsnData *) curr->data;
+ g_string_append_printf(rep, "0x%" PRIx64, insn->addr);
+ if (insn->symbol) {
+ g_string_append_printf(rep, " (%s)", insn->symbol);
+ }
+ g_string_append_printf(rep, ", %ld, %s\n", insn->l1_dmisses,
+ insn->disas_str);
+ }
+
+ miss_insns = g_list_sort(miss_insns, icmp);
+ g_string_append_printf(rep, "%s", "\naddress, fetch misses, instruction\n");
+
+ for (curr = miss_insns, i = 0; curr && i < limit; i++, curr = curr->next) {
+ insn = (InsnData *) curr->data;
+ g_string_append_printf(rep, "0x%" PRIx64, insn->addr);
+ if (insn->symbol) {
+ g_string_append_printf(rep, " (%s)", insn->symbol);
+ }
+ g_string_append_printf(rep, ", %ld, %s\n", insn->l1_imisses,
+ insn->disas_str);
+ }
+
+ if (!use_l2) {
+ goto finish;
+ }
+
+ miss_insns = g_list_sort(miss_insns, l2_cmp);
+ g_string_append_printf(rep, "%s", "\naddress, L2 misses, instruction\n");
+
+ for (curr = miss_insns, i = 0; curr && i < limit; i++, curr = curr->next) {
+ insn = (InsnData *) curr->data;
+ g_string_append_printf(rep, "0x%" PRIx64, insn->addr);
+ if (insn->symbol) {
+ g_string_append_printf(rep, " (%s)", insn->symbol);
+ }
+ g_string_append_printf(rep, ", %ld, %s\n", insn->l2_misses,
+ insn->disas_str);
+ }
+
+finish:
+ qemu_plugin_outs(rep->str);
+ g_list_free(miss_insns);
+}
+
+static void plugin_exit(qemu_plugin_id_t id, void *p)
+{
+ log_stats();
+ log_top_insns();
+
+ caches_free(l1_dcaches);
+ caches_free(l1_icaches);
+
+ g_free(l1_dcache_locks);
+ g_free(l1_icache_locks);
+
+ if (use_l2) {
+ caches_free(l2_ucaches);
+ g_free(l2_ucache_locks);
+ }
+
+ g_hash_table_destroy(miss_ht);
+}
+
+static void policy_init(void)
+{
+ switch (policy) {
+ case LRU:
+ update_hit = lru_update_blk;
+ update_miss = lru_update_blk;
+ metadata_init = lru_priorities_init;
+ metadata_destroy = lru_priorities_destroy;
+ break;
+ case FIFO:
+ update_miss = fifo_update_on_miss;
+ metadata_init = fifo_init;
+ metadata_destroy = fifo_destroy;
+ break;
+ case RAND:
+ rng = g_rand_new();
+ break;
+ default:
+ g_assert_not_reached();
+ }
+}
+
+QEMU_PLUGIN_EXPORT
+int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,
+ int argc, char **argv)
+{
+ int i;
+ int l1_iassoc, l1_iblksize, l1_icachesize;
+ int l1_dassoc, l1_dblksize, l1_dcachesize;
+ int l2_assoc, l2_blksize, l2_cachesize;
+
+ limit = 32;
+ sys = info->system_emulation;
+
+ l1_dassoc = 8;
+ l1_dblksize = 64;
+ l1_dcachesize = l1_dblksize * l1_dassoc * 32;
+
+ l1_iassoc = 8;
+ l1_iblksize = 64;
+ l1_icachesize = l1_iblksize * l1_iassoc * 32;
+
+ l2_assoc = 16;
+ l2_blksize = 64;
+ l2_cachesize = l2_assoc * l2_blksize * 2048;
+
+ policy = LRU;
+
+ cores = sys ? qemu_plugin_n_vcpus() : 1;
+
+ for (i = 0; i < argc; i++) {
+ char *opt = argv[i];
+ g_autofree char **tokens = g_strsplit(opt, "=", 2);
+
+ if (g_strcmp0(tokens[0], "iblksize") == 0) {
+ l1_iblksize = STRTOLL(tokens[1]);
+ } else if (g_strcmp0(tokens[0], "iassoc") == 0) {
+ l1_iassoc = STRTOLL(tokens[1]);
+ } else if (g_strcmp0(tokens[0], "icachesize") == 0) {
+ l1_icachesize = STRTOLL(tokens[1]);
+ } else if (g_strcmp0(tokens[0], "dblksize") == 0) {
+ l1_dblksize = STRTOLL(tokens[1]);
+ } else if (g_strcmp0(tokens[0], "dassoc") == 0) {
+ l1_dassoc = STRTOLL(tokens[1]);
+ } else if (g_strcmp0(tokens[0], "dcachesize") == 0) {
+ l1_dcachesize = STRTOLL(tokens[1]);
+ } else if (g_strcmp0(tokens[0], "limit") == 0) {
+ limit = STRTOLL(tokens[1]);
+ } else if (g_strcmp0(tokens[0], "cores") == 0) {
+ cores = STRTOLL(tokens[1]);
+ } else if (g_strcmp0(tokens[0], "l2cachesize") == 0) {
+ use_l2 = true;
+ l2_cachesize = STRTOLL(tokens[1]);
+ } else if (g_strcmp0(tokens[0], "l2blksize") == 0) {
+ use_l2 = true;
+ l2_blksize = STRTOLL(tokens[1]);
+ } else if (g_strcmp0(tokens[0], "l2assoc") == 0) {
+ use_l2 = true;
+ l2_assoc = STRTOLL(tokens[1]);
+ } else if (g_strcmp0(tokens[0], "l2") == 0) {
+ if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &use_l2)) {
+ fprintf(stderr, "boolean argument parsing failed: %s\n", opt);
+ return -1;
+ }
+ } else if (g_strcmp0(tokens[0], "evict") == 0) {
+ if (g_strcmp0(tokens[1], "rand") == 0) {
+ policy = RAND;
+ } else if (g_strcmp0(tokens[1], "lru") == 0) {
+ policy = LRU;
+ } else if (g_strcmp0(tokens[1], "fifo") == 0) {
+ policy = FIFO;
+ } else {
+ fprintf(stderr, "invalid eviction policy: %s\n", opt);
+ return -1;
+ }
+ } else {
+ fprintf(stderr, "option parsing failed: %s\n", opt);
+ return -1;
+ }
+ }
+
+ policy_init();
+
+ l1_dcaches = caches_init(l1_dblksize, l1_dassoc, l1_dcachesize);
+ if (!l1_dcaches) {
+ const char *err = cache_config_error(l1_dblksize, l1_dassoc, l1_dcachesize);
+ fprintf(stderr, "dcache cannot be constructed from given parameters\n");
+ fprintf(stderr, "%s\n", err);
+ return -1;
+ }
+
+ l1_icaches = caches_init(l1_iblksize, l1_iassoc, l1_icachesize);
+ if (!l1_icaches) {
+ const char *err = cache_config_error(l1_iblksize, l1_iassoc, l1_icachesize);
+ fprintf(stderr, "icache cannot be constructed from given parameters\n");
+ fprintf(stderr, "%s\n", err);
+ return -1;
+ }
+
+ l2_ucaches = use_l2 ? caches_init(l2_blksize, l2_assoc, l2_cachesize) : NULL;
+ if (!l2_ucaches && use_l2) {
+ const char *err = cache_config_error(l2_blksize, l2_assoc, l2_cachesize);
+ fprintf(stderr, "L2 cache cannot be constructed from given parameters\n");
+ fprintf(stderr, "%s\n", err);
+ return -1;
+ }
+
+ l1_dcache_locks = g_new0(GMutex, cores);
+ l1_icache_locks = g_new0(GMutex, cores);
+ l2_ucache_locks = use_l2 ? g_new0(GMutex, cores) : NULL;
+
+ qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
+ qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
+
+ miss_ht = g_hash_table_new_full(NULL, g_direct_equal, NULL, insn_free);
+
+ return 0;
+}
diff --git a/contrib/plugins/execlog.c b/contrib/plugins/execlog.c
new file mode 100644
index 000000000..a5275dcc1
--- /dev/null
+++ b/contrib/plugins/execlog.c
@@ -0,0 +1,153 @@
+/*
+ * Copyright (C) 2021, Alexandre Iooss <erdnaxe@crans.org>
+ *
+ * Log instruction execution with memory access.
+ *
+ * License: GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#include <glib.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <qemu-plugin.h>
+
+QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
+
+/* Store last executed instruction on each vCPU as a GString */
+GArray *last_exec;
+
+/**
+ * Add memory read or write information to current instruction log
+ */
+static void vcpu_mem(unsigned int cpu_index, qemu_plugin_meminfo_t info,
+ uint64_t vaddr, void *udata)
+{
+ GString *s;
+
+ /* Find vCPU in array */
+ g_assert(cpu_index < last_exec->len);
+ s = g_array_index(last_exec, GString *, cpu_index);
+
+ /* Indicate type of memory access */
+ if (qemu_plugin_mem_is_store(info)) {
+ g_string_append(s, ", store");
+ } else {
+ g_string_append(s, ", load");
+ }
+
+ /* If full system emulation log physical address and device name */
+ struct qemu_plugin_hwaddr *hwaddr = qemu_plugin_get_hwaddr(info, vaddr);
+ if (hwaddr) {
+ uint64_t addr = qemu_plugin_hwaddr_phys_addr(hwaddr);
+ const char *name = qemu_plugin_hwaddr_device_name(hwaddr);
+ g_string_append_printf(s, ", 0x%08"PRIx64", %s", addr, name);
+ } else {
+ g_string_append_printf(s, ", 0x%08"PRIx64, vaddr);
+ }
+}
+
+/**
+ * Log instruction execution
+ */
+static void vcpu_insn_exec(unsigned int cpu_index, void *udata)
+{
+ GString *s;
+
+ /* Find or create vCPU in array */
+ while (cpu_index >= last_exec->len) {
+ s = g_string_new(NULL);
+ g_array_append_val(last_exec, s);
+ }
+ s = g_array_index(last_exec, GString *, cpu_index);
+
+ /* Print previous instruction in cache */
+ if (s->len) {
+ qemu_plugin_outs(s->str);
+ qemu_plugin_outs("\n");
+ }
+
+ /* Store new instruction in cache */
+ /* vcpu_mem will add memory access information to last_exec */
+ g_string_printf(s, "%u, ", cpu_index);
+ g_string_append(s, (char *)udata);
+}
+
+/**
+ * On translation block new translation
+ *
+ * QEMU convert code by translation block (TB). By hooking here we can then hook
+ * a callback on each instruction and memory access.
+ */
+static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
+{
+ struct qemu_plugin_insn *insn;
+ uint64_t insn_vaddr;
+ uint32_t insn_opcode;
+ char *insn_disas;
+
+ size_t n = qemu_plugin_tb_n_insns(tb);
+ for (size_t i = 0; i < n; i++) {
+ /*
+ * `insn` is shared between translations in QEMU, copy needed data here.
+ * `output` is never freed as it might be used multiple times during
+ * the emulation lifetime.
+ * We only consider the first 32 bits of the instruction, this may be
+ * a limitation for CISC architectures.
+ */
+ insn = qemu_plugin_tb_get_insn(tb, i);
+ insn_vaddr = qemu_plugin_insn_vaddr(insn);
+ insn_opcode = *((uint32_t *)qemu_plugin_insn_data(insn));
+ insn_disas = qemu_plugin_insn_disas(insn);
+ char *output = g_strdup_printf("0x%"PRIx64", 0x%"PRIx32", \"%s\"",
+ insn_vaddr, insn_opcode, insn_disas);
+
+ /* Register callback on memory read or write */
+ qemu_plugin_register_vcpu_mem_cb(insn, vcpu_mem,
+ QEMU_PLUGIN_CB_NO_REGS,
+ QEMU_PLUGIN_MEM_RW, NULL);
+
+ /* Register callback on instruction */
+ qemu_plugin_register_vcpu_insn_exec_cb(insn, vcpu_insn_exec,
+ QEMU_PLUGIN_CB_NO_REGS, output);
+ }
+}
+
+/**
+ * On plugin exit, print last instruction in cache
+ */
+static void plugin_exit(qemu_plugin_id_t id, void *p)
+{
+ guint i;
+ GString *s;
+ for (i = 0; i < last_exec->len; i++) {
+ s = g_array_index(last_exec, GString *, i);
+ if (s->str) {
+ qemu_plugin_outs(s->str);
+ qemu_plugin_outs("\n");
+ }
+ }
+}
+
+/**
+ * Install the plugin
+ */
+QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
+ const qemu_info_t *info, int argc,
+ char **argv)
+{
+ /*
+ * Initialize dynamic array to cache vCPU instruction. In user mode
+ * we don't know the size before emulation.
+ */
+ last_exec = g_array_new(FALSE, FALSE, sizeof(GString *));
+
+ /* Register translation block and exit callbacks */
+ qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
+ qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
+
+ return 0;
+}
diff --git a/contrib/plugins/hotblocks.c b/contrib/plugins/hotblocks.c
new file mode 100644
index 000000000..062200a7a
--- /dev/null
+++ b/contrib/plugins/hotblocks.c
@@ -0,0 +1,155 @@
+/*
+ * Copyright (C) 2019, Alex Bennée <alex.bennee@linaro.org>
+ *
+ * License: GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#include <inttypes.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <glib.h>
+
+#include <qemu-plugin.h>
+
+QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
+
+static bool do_inline;
+
+/* Plugins need to take care of their own locking */
+static GMutex lock;
+static GHashTable *hotblocks;
+static guint64 limit = 20;
+
+/*
+ * Counting Structure
+ *
+ * The internals of the TCG are not exposed to plugins so we can only
+ * get the starting PC for each block. We cheat this slightly by
+ * xor'ing the number of instructions to the hash to help
+ * differentiate.
+ */
+typedef struct {
+ uint64_t start_addr;
+ uint64_t exec_count;
+ int trans_count;
+ unsigned long insns;
+} ExecCount;
+
+static gint cmp_exec_count(gconstpointer a, gconstpointer b)
+{
+ ExecCount *ea = (ExecCount *) a;
+ ExecCount *eb = (ExecCount *) b;
+ return ea->exec_count > eb->exec_count ? -1 : 1;
+}
+
+static void plugin_exit(qemu_plugin_id_t id, void *p)
+{
+ g_autoptr(GString) report = g_string_new("collected ");
+ GList *counts, *it;
+ int i;
+
+ g_mutex_lock(&lock);
+ g_string_append_printf(report, "%d entries in the hash table\n",
+ g_hash_table_size(hotblocks));
+ counts = g_hash_table_get_values(hotblocks);
+ it = g_list_sort(counts, cmp_exec_count);
+
+ if (it) {
+ g_string_append_printf(report, "pc, tcount, icount, ecount\n");
+
+ for (i = 0; i < limit && it->next; i++, it = it->next) {
+ ExecCount *rec = (ExecCount *) it->data;
+ g_string_append_printf(report, "0x%016"PRIx64", %d, %ld, %"PRId64"\n",
+ rec->start_addr, rec->trans_count,
+ rec->insns, rec->exec_count);
+ }
+
+ g_list_free(it);
+ g_mutex_unlock(&lock);
+ }
+
+ qemu_plugin_outs(report->str);
+}
+
+static void plugin_init(void)
+{
+ hotblocks = g_hash_table_new(NULL, g_direct_equal);
+}
+
+static void vcpu_tb_exec(unsigned int cpu_index, void *udata)
+{
+ ExecCount *cnt;
+ uint64_t hash = (uint64_t) udata;
+
+ g_mutex_lock(&lock);
+ cnt = (ExecCount *) g_hash_table_lookup(hotblocks, (gconstpointer) hash);
+ /* should always succeed */
+ g_assert(cnt);
+ cnt->exec_count++;
+ g_mutex_unlock(&lock);
+}
+
+/*
+ * When do_inline we ask the plugin to increment the counter for us.
+ * Otherwise a helper is inserted which calls the vcpu_tb_exec
+ * callback.
+ */
+static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
+{
+ ExecCount *cnt;
+ uint64_t pc = qemu_plugin_tb_vaddr(tb);
+ size_t insns = qemu_plugin_tb_n_insns(tb);
+ uint64_t hash = pc ^ insns;
+
+ g_mutex_lock(&lock);
+ cnt = (ExecCount *) g_hash_table_lookup(hotblocks, (gconstpointer) hash);
+ if (cnt) {
+ cnt->trans_count++;
+ } else {
+ cnt = g_new0(ExecCount, 1);
+ cnt->start_addr = pc;
+ cnt->trans_count = 1;
+ cnt->insns = insns;
+ g_hash_table_insert(hotblocks, (gpointer) hash, (gpointer) cnt);
+ }
+
+ g_mutex_unlock(&lock);
+
+ if (do_inline) {
+ qemu_plugin_register_vcpu_tb_exec_inline(tb, QEMU_PLUGIN_INLINE_ADD_U64,
+ &cnt->exec_count, 1);
+ } else {
+ qemu_plugin_register_vcpu_tb_exec_cb(tb, vcpu_tb_exec,
+ QEMU_PLUGIN_CB_NO_REGS,
+ (void *)hash);
+ }
+}
+
+QEMU_PLUGIN_EXPORT
+int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,
+ int argc, char **argv)
+{
+ for (int i = 0; i < argc; i++) {
+ char *opt = argv[i];
+ g_autofree char **tokens = g_strsplit(opt, "=", 2);
+ if (g_strcmp0(tokens[0], "inline") == 0) {
+ if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &do_inline)) {
+ fprintf(stderr, "boolean argument parsing failed: %s\n", opt);
+ return -1;
+ }
+ } else {
+ fprintf(stderr, "option parsing failed: %s\n", opt);
+ return -1;
+ }
+ }
+
+ plugin_init();
+
+ qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
+ qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
+ return 0;
+}
diff --git a/contrib/plugins/hotpages.c b/contrib/plugins/hotpages.c
new file mode 100644
index 000000000..0d12910af
--- /dev/null
+++ b/contrib/plugins/hotpages.c
@@ -0,0 +1,203 @@
+/*
+ * Copyright (C) 2019, Alex Bennée <alex.bennee@linaro.org>
+ *
+ * Hot Pages - show which pages saw the most memory accesses.
+ *
+ * License: GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include <inttypes.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <glib.h>
+
+#include <qemu-plugin.h>
+
+QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+static uint64_t page_size = 4096;
+static uint64_t page_mask;
+static int limit = 50;
+static enum qemu_plugin_mem_rw rw = QEMU_PLUGIN_MEM_RW;
+static bool track_io;
+
+enum sort_type {
+ SORT_RW = 0,
+ SORT_R,
+ SORT_W,
+ SORT_A
+};
+
+static int sort_by = SORT_RW;
+
+typedef struct {
+ uint64_t page_address;
+ int cpu_read;
+ int cpu_write;
+ uint64_t reads;
+ uint64_t writes;
+} PageCounters;
+
+static GMutex lock;
+static GHashTable *pages;
+
+static gint cmp_access_count(gconstpointer a, gconstpointer b)
+{
+ PageCounters *ea = (PageCounters *) a;
+ PageCounters *eb = (PageCounters *) b;
+ int r;
+ switch (sort_by) {
+ case SORT_RW:
+ r = (ea->reads + ea->writes) > (eb->reads + eb->writes) ? -1 : 1;
+ break;
+ case SORT_R:
+ r = ea->reads > eb->reads ? -1 : 1;
+ break;
+ case SORT_W:
+ r = ea->writes > eb->writes ? -1 : 1;
+ break;
+ case SORT_A:
+ r = ea->page_address > eb->page_address ? -1 : 1;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ return r;
+}
+
+
+static void plugin_exit(qemu_plugin_id_t id, void *p)
+{
+ g_autoptr(GString) report = g_string_new("Addr, RCPUs, Reads, WCPUs, Writes\n");
+ int i;
+ GList *counts;
+
+ counts = g_hash_table_get_values(pages);
+ if (counts && g_list_next(counts)) {
+ GList *it;
+
+ it = g_list_sort(counts, cmp_access_count);
+
+ for (i = 0; i < limit && it->next; i++, it = it->next) {
+ PageCounters *rec = (PageCounters *) it->data;
+ g_string_append_printf(report,
+ "0x%016"PRIx64", 0x%04x, %"PRId64
+ ", 0x%04x, %"PRId64"\n",
+ rec->page_address,
+ rec->cpu_read, rec->reads,
+ rec->cpu_write, rec->writes);
+ }
+ g_list_free(it);
+ }
+
+ qemu_plugin_outs(report->str);
+}
+
+static void plugin_init(void)
+{
+ page_mask = (page_size - 1);
+ pages = g_hash_table_new(NULL, g_direct_equal);
+}
+
+static void vcpu_haddr(unsigned int cpu_index, qemu_plugin_meminfo_t meminfo,
+ uint64_t vaddr, void *udata)
+{
+ struct qemu_plugin_hwaddr *hwaddr = qemu_plugin_get_hwaddr(meminfo, vaddr);
+ uint64_t page;
+ PageCounters *count;
+
+ /* We only get a hwaddr for system emulation */
+ if (track_io) {
+ if (hwaddr && qemu_plugin_hwaddr_is_io(hwaddr)) {
+ page = vaddr;
+ } else {
+ return;
+ }
+ } else {
+ if (hwaddr && !qemu_plugin_hwaddr_is_io(hwaddr)) {
+ page = (uint64_t) qemu_plugin_hwaddr_phys_addr(hwaddr);
+ } else {
+ page = vaddr;
+ }
+ }
+ page &= ~page_mask;
+
+ g_mutex_lock(&lock);
+ count = (PageCounters *) g_hash_table_lookup(pages, GUINT_TO_POINTER(page));
+
+ if (!count) {
+ count = g_new0(PageCounters, 1);
+ count->page_address = page;
+ g_hash_table_insert(pages, GUINT_TO_POINTER(page), (gpointer) count);
+ }
+ if (qemu_plugin_mem_is_store(meminfo)) {
+ count->writes++;
+ count->cpu_write |= (1 << cpu_index);
+ } else {
+ count->reads++;
+ count->cpu_read |= (1 << cpu_index);
+ }
+
+ g_mutex_unlock(&lock);
+}
+
+static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
+{
+ size_t n = qemu_plugin_tb_n_insns(tb);
+ size_t i;
+
+ for (i = 0; i < n; i++) {
+ struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i);
+ qemu_plugin_register_vcpu_mem_cb(insn, vcpu_haddr,
+ QEMU_PLUGIN_CB_NO_REGS,
+ rw, NULL);
+ }
+}
+
+QEMU_PLUGIN_EXPORT
+int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,
+ int argc, char **argv)
+{
+ int i;
+
+ for (i = 0; i < argc; i++) {
+ char *opt = argv[i];
+ g_autofree char **tokens = g_strsplit(opt, "=", -1);
+
+ if (g_strcmp0(tokens[0], "sortby") == 0) {
+ if (g_strcmp0(tokens[1], "reads") == 0) {
+ sort_by = SORT_R;
+ } else if (g_strcmp0(tokens[1], "writes") == 0) {
+ sort_by = SORT_W;
+ } else if (g_strcmp0(tokens[1], "address") == 0) {
+ sort_by = SORT_A;
+ } else {
+ fprintf(stderr, "invalid value to sortby: %s\n", tokens[1]);
+ return -1;
+ }
+ } else if (g_strcmp0(tokens[0], "io") == 0) {
+ if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &track_io)) {
+ fprintf(stderr, "boolean argument parsing failed: %s\n", opt);
+ return -1;
+ }
+ } else if (g_strcmp0(tokens[0], "pagesize") == 0) {
+ page_size = g_ascii_strtoull(tokens[1], NULL, 10);
+ } else {
+ fprintf(stderr, "option parsing failed: %s\n", opt);
+ return -1;
+ }
+ }
+
+ plugin_init();
+
+ qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
+ qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
+ return 0;
+}
diff --git a/contrib/plugins/howvec.c b/contrib/plugins/howvec.c
new file mode 100644
index 000000000..4a5ec3d93
--- /dev/null
+++ b/contrib/plugins/howvec.c
@@ -0,0 +1,372 @@
+/*
+ * Copyright (C) 2019, Alex Bennée <alex.bennee@linaro.org>
+ *
+ * How vectorised is this code?
+ *
+ * Attempt to measure the amount of vectorisation that has been done
+ * on some code by counting classes of instruction.
+ *
+ * License: GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#include <inttypes.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <glib.h>
+
+#include <qemu-plugin.h>
+
+QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+typedef enum {
+ COUNT_CLASS,
+ COUNT_INDIVIDUAL,
+ COUNT_NONE
+} CountType;
+
+static int limit = 50;
+static bool do_inline;
+static bool verbose;
+
+static GMutex lock;
+static GHashTable *insns;
+
+typedef struct {
+ const char *class;
+ const char *opt;
+ uint32_t mask;
+ uint32_t pattern;
+ CountType what;
+ uint64_t count;
+} InsnClassExecCount;
+
+typedef struct {
+ char *insn;
+ uint32_t opcode;
+ uint64_t count;
+ InsnClassExecCount *class;
+} InsnExecCount;
+
+/*
+ * Matchers for classes of instructions, order is important.
+ *
+ * Your most precise match must be before looser matches. If no match
+ * is found in the table we can create an individual entry.
+ *
+ * 31..28 27..24 23..20 19..16 15..12 11..8 7..4 3..0
+ */
+static InsnClassExecCount aarch64_insn_classes[] = {
+ /* "Reserved"" */
+ { " UDEF", "udef", 0xffff0000, 0x00000000, COUNT_NONE},
+ { " SVE", "sve", 0x1e000000, 0x04000000, COUNT_CLASS},
+ { "Reserved", "res", 0x1e000000, 0x00000000, COUNT_CLASS},
+ /* Data Processing Immediate */
+ { " PCrel addr", "pcrel", 0x1f000000, 0x10000000, COUNT_CLASS},
+ { " Add/Sub (imm,tags)", "asit", 0x1f800000, 0x11800000, COUNT_CLASS},
+ { " Add/Sub (imm)", "asi", 0x1f000000, 0x11000000, COUNT_CLASS},
+ { " Logical (imm)", "logi", 0x1f800000, 0x12000000, COUNT_CLASS},
+ { " Move Wide (imm)", "movwi", 0x1f800000, 0x12800000, COUNT_CLASS},
+ { " Bitfield", "bitf", 0x1f800000, 0x13000000, COUNT_CLASS},
+ { " Extract", "extr", 0x1f800000, 0x13800000, COUNT_CLASS},
+ { "Data Proc Imm", "dpri", 0x1c000000, 0x10000000, COUNT_CLASS},
+ /* Branches */
+ { " Cond Branch (imm)", "cndb", 0xfe000000, 0x54000000, COUNT_CLASS},
+ { " Exception Gen", "excp", 0xff000000, 0xd4000000, COUNT_CLASS},
+ { " NOP", "nop", 0xffffffff, 0xd503201f, COUNT_NONE},
+ { " Hints", "hint", 0xfffff000, 0xd5032000, COUNT_CLASS},
+ { " Barriers", "barr", 0xfffff000, 0xd5033000, COUNT_CLASS},
+ { " PSTATE", "psta", 0xfff8f000, 0xd5004000, COUNT_CLASS},
+ { " System Insn", "sins", 0xffd80000, 0xd5080000, COUNT_CLASS},
+ { " System Reg", "sreg", 0xffd00000, 0xd5100000, COUNT_CLASS},
+ { " Branch (reg)", "breg", 0xfe000000, 0xd6000000, COUNT_CLASS},
+ { " Branch (imm)", "bimm", 0x7c000000, 0x14000000, COUNT_CLASS},
+ { " Cmp & Branch", "cmpb", 0x7e000000, 0x34000000, COUNT_CLASS},
+ { " Tst & Branch", "tstb", 0x7e000000, 0x36000000, COUNT_CLASS},
+ { "Branches", "branch", 0x1c000000, 0x14000000, COUNT_CLASS},
+ /* Loads and Stores */
+ { " AdvSimd ldstmult", "advlsm", 0xbfbf0000, 0x0c000000, COUNT_CLASS},
+ { " AdvSimd ldstmult++", "advlsmp", 0xbfb00000, 0x0c800000, COUNT_CLASS},
+ { " AdvSimd ldst", "advlss", 0xbf9f0000, 0x0d000000, COUNT_CLASS},
+ { " AdvSimd ldst++", "advlssp", 0xbf800000, 0x0d800000, COUNT_CLASS},
+ { " ldst excl", "ldstx", 0x3f000000, 0x08000000, COUNT_CLASS},
+ { " Prefetch", "prfm", 0xff000000, 0xd8000000, COUNT_CLASS},
+ { " Load Reg (lit)", "ldlit", 0x1b000000, 0x18000000, COUNT_CLASS},
+ { " ldst noalloc pair", "ldstnap", 0x3b800000, 0x28000000, COUNT_CLASS},
+ { " ldst pair", "ldstp", 0x38000000, 0x28000000, COUNT_CLASS},
+ { " ldst reg", "ldstr", 0x3b200000, 0x38000000, COUNT_CLASS},
+ { " Atomic ldst", "atomic", 0x3b200c00, 0x38200000, COUNT_CLASS},
+ { " ldst reg (reg off)", "ldstro", 0x3b200b00, 0x38200800, COUNT_CLASS},
+ { " ldst reg (pac)", "ldstpa", 0x3b200200, 0x38200800, COUNT_CLASS},
+ { " ldst reg (imm)", "ldsti", 0x3b000000, 0x39000000, COUNT_CLASS},
+ { "Loads & Stores", "ldst", 0x0a000000, 0x08000000, COUNT_CLASS},
+ /* Data Processing Register */
+ { "Data Proc Reg", "dprr", 0x0e000000, 0x0a000000, COUNT_CLASS},
+ /* Scalar FP */
+ { "Scalar FP ", "fpsimd", 0x0e000000, 0x0e000000, COUNT_CLASS},
+ /* Unclassified */
+ { "Unclassified", "unclas", 0x00000000, 0x00000000, COUNT_CLASS},
+};
+
+static InsnClassExecCount sparc32_insn_classes[] = {
+ { "Call", "call", 0xc0000000, 0x40000000, COUNT_CLASS},
+ { "Branch ICond", "bcc", 0xc1c00000, 0x00800000, COUNT_CLASS},
+ { "Branch Fcond", "fbcc", 0xc1c00000, 0x01800000, COUNT_CLASS},
+ { "SetHi", "sethi", 0xc1c00000, 0x01000000, COUNT_CLASS},
+ { "FPU ALU", "fpu", 0xc1f00000, 0x81a00000, COUNT_CLASS},
+ { "ALU", "alu", 0xc0000000, 0x80000000, COUNT_CLASS},
+ { "Load/Store", "ldst", 0xc0000000, 0xc0000000, COUNT_CLASS},
+ /* Unclassified */
+ { "Unclassified", "unclas", 0x00000000, 0x00000000, COUNT_INDIVIDUAL},
+};
+
+static InsnClassExecCount sparc64_insn_classes[] = {
+ { "SetHi & Branches", "op0", 0xc0000000, 0x00000000, COUNT_CLASS},
+ { "Call", "op1", 0xc0000000, 0x40000000, COUNT_CLASS},
+ { "Arith/Logical/Move", "op2", 0xc0000000, 0x80000000, COUNT_CLASS},
+ { "Arith/Logical/Move", "op3", 0xc0000000, 0xc0000000, COUNT_CLASS},
+ /* Unclassified */
+ { "Unclassified", "unclas", 0x00000000, 0x00000000, COUNT_INDIVIDUAL},
+};
+
+/* Default matcher for currently unclassified architectures */
+static InsnClassExecCount default_insn_classes[] = {
+ { "Unclassified", "unclas", 0x00000000, 0x00000000, COUNT_INDIVIDUAL},
+};
+
+typedef struct {
+ const char *qemu_target;
+ InsnClassExecCount *table;
+ int table_sz;
+} ClassSelector;
+
+static ClassSelector class_tables[] = {
+ { "aarch64", aarch64_insn_classes, ARRAY_SIZE(aarch64_insn_classes) },
+ { "sparc", sparc32_insn_classes, ARRAY_SIZE(sparc32_insn_classes) },
+ { "sparc64", sparc64_insn_classes, ARRAY_SIZE(sparc64_insn_classes) },
+ { NULL, default_insn_classes, ARRAY_SIZE(default_insn_classes) },
+};
+
+static InsnClassExecCount *class_table;
+static int class_table_sz;
+
+static gint cmp_exec_count(gconstpointer a, gconstpointer b)
+{
+ InsnExecCount *ea = (InsnExecCount *) a;
+ InsnExecCount *eb = (InsnExecCount *) b;
+ return ea->count > eb->count ? -1 : 1;
+}
+
+static void free_record(gpointer data)
+{
+ InsnExecCount *rec = (InsnExecCount *) data;
+ g_free(rec->insn);
+ g_free(rec);
+}
+
+static void plugin_exit(qemu_plugin_id_t id, void *p)
+{
+ g_autoptr(GString) report = g_string_new("Instruction Classes:\n");
+ int i;
+ GList *counts;
+ InsnClassExecCount *class = NULL;
+
+ for (i = 0; i < class_table_sz; i++) {
+ class = &class_table[i];
+ switch (class->what) {
+ case COUNT_CLASS:
+ if (class->count || verbose) {
+ g_string_append_printf(report, "Class: %-24s\t(%ld hits)\n",
+ class->class,
+ class->count);
+ }
+ break;
+ case COUNT_INDIVIDUAL:
+ g_string_append_printf(report, "Class: %-24s\tcounted individually\n",
+ class->class);
+ break;
+ case COUNT_NONE:
+ g_string_append_printf(report, "Class: %-24s\tnot counted\n",
+ class->class);
+ break;
+ default:
+ break;
+ }
+ }
+
+ counts = g_hash_table_get_values(insns);
+ if (counts && g_list_next(counts)) {
+ g_string_append_printf(report, "Individual Instructions:\n");
+ counts = g_list_sort(counts, cmp_exec_count);
+
+ for (i = 0; i < limit && g_list_next(counts);
+ i++, counts = g_list_next(counts)) {
+ InsnExecCount *rec = (InsnExecCount *) counts->data;
+ g_string_append_printf(report,
+ "Instr: %-24s\t(%ld hits)\t(op=0x%08x/%s)\n",
+ rec->insn,
+ rec->count,
+ rec->opcode,
+ rec->class ?
+ rec->class->class : "un-categorised");
+ }
+ g_list_free(counts);
+ }
+
+ g_hash_table_destroy(insns);
+
+ qemu_plugin_outs(report->str);
+}
+
+static void plugin_init(void)
+{
+ insns = g_hash_table_new_full(NULL, g_direct_equal, NULL, &free_record);
+}
+
+static void vcpu_insn_exec_before(unsigned int cpu_index, void *udata)
+{
+ uint64_t *count = (uint64_t *) udata;
+ (*count)++;
+}
+
+static uint64_t *find_counter(struct qemu_plugin_insn *insn)
+{
+ int i;
+ uint64_t *cnt = NULL;
+ uint32_t opcode;
+ InsnClassExecCount *class = NULL;
+
+ /*
+ * We only match the first 32 bits of the instruction which is
+ * fine for most RISCs but a bit limiting for CISC architectures.
+ * They would probably benefit from a more tailored plugin.
+ * However we can fall back to individual instruction counting.
+ */
+ opcode = *((uint32_t *)qemu_plugin_insn_data(insn));
+
+ for (i = 0; !cnt && i < class_table_sz; i++) {
+ class = &class_table[i];
+ uint32_t masked_bits = opcode & class->mask;
+ if (masked_bits == class->pattern) {
+ break;
+ }
+ }
+
+ g_assert(class);
+
+ switch (class->what) {
+ case COUNT_NONE:
+ return NULL;
+ case COUNT_CLASS:
+ return &class->count;
+ case COUNT_INDIVIDUAL:
+ {
+ InsnExecCount *icount;
+
+ g_mutex_lock(&lock);
+ icount = (InsnExecCount *) g_hash_table_lookup(insns,
+ GUINT_TO_POINTER(opcode));
+
+ if (!icount) {
+ icount = g_new0(InsnExecCount, 1);
+ icount->opcode = opcode;
+ icount->insn = qemu_plugin_insn_disas(insn);
+ icount->class = class;
+
+ g_hash_table_insert(insns, GUINT_TO_POINTER(opcode),
+ (gpointer) icount);
+ }
+ g_mutex_unlock(&lock);
+
+ return &icount->count;
+ }
+ default:
+ g_assert_not_reached();
+ }
+
+ return NULL;
+}
+
+static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
+{
+ size_t n = qemu_plugin_tb_n_insns(tb);
+ size_t i;
+
+ for (i = 0; i < n; i++) {
+ uint64_t *cnt;
+ struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i);
+ cnt = find_counter(insn);
+
+ if (cnt) {
+ if (do_inline) {
+ qemu_plugin_register_vcpu_insn_exec_inline(
+ insn, QEMU_PLUGIN_INLINE_ADD_U64, cnt, 1);
+ } else {
+ qemu_plugin_register_vcpu_insn_exec_cb(
+ insn, vcpu_insn_exec_before, QEMU_PLUGIN_CB_NO_REGS, cnt);
+ }
+ }
+ }
+}
+
+QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
+ const qemu_info_t *info,
+ int argc, char **argv)
+{
+ int i;
+
+ /* Select a class table appropriate to the guest architecture */
+ for (i = 0; i < ARRAY_SIZE(class_tables); i++) {
+ ClassSelector *entry = &class_tables[i];
+ if (!entry->qemu_target ||
+ strcmp(entry->qemu_target, info->target_name) == 0) {
+ class_table = entry->table;
+ class_table_sz = entry->table_sz;
+ break;
+ }
+ }
+
+ for (i = 0; i < argc; i++) {
+ char *p = argv[i];
+ g_autofree char **tokens = g_strsplit(p, "=", -1);
+ if (g_strcmp0(tokens[0], "inline") == 0) {
+ if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &do_inline)) {
+ fprintf(stderr, "boolean argument parsing failed: %s\n", p);
+ return -1;
+ }
+ } else if (g_strcmp0(tokens[0], "verbose") == 0) {
+ if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &verbose)) {
+ fprintf(stderr, "boolean argument parsing failed: %s\n", p);
+ return -1;
+ }
+ } else if (g_strcmp0(tokens[0], "count") == 0) {
+ char *value = tokens[1];
+ int j;
+ CountType type = COUNT_INDIVIDUAL;
+ if (*value == '!') {
+ type = COUNT_NONE;
+ value++;
+ }
+ for (j = 0; j < class_table_sz; j++) {
+ if (strcmp(value, class_table[j].opt) == 0) {
+ class_table[j].what = type;
+ break;
+ }
+ }
+ } else {
+ fprintf(stderr, "option parsing failed: %s\n", p);
+ return -1;
+ }
+ }
+
+ plugin_init();
+
+ qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
+ qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
+ return 0;
+}
diff --git a/contrib/plugins/hwprofile.c b/contrib/plugins/hwprofile.c
new file mode 100644
index 000000000..691d4edb0
--- /dev/null
+++ b/contrib/plugins/hwprofile.c
@@ -0,0 +1,320 @@
+/*
+ * Copyright (C) 2020, Alex Bennée <alex.bennee@linaro.org>
+ *
+ * HW Profile - breakdown access patterns for IO to devices
+ *
+ * License: GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include <inttypes.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <glib.h>
+
+#include <qemu-plugin.h>
+
+QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+typedef struct {
+ uint64_t cpu_read;
+ uint64_t cpu_write;
+ uint64_t reads;
+ uint64_t writes;
+} IOCounts;
+
+typedef struct {
+ uint64_t off_or_pc;
+ IOCounts counts;
+} IOLocationCounts;
+
+typedef struct {
+ const char *name;
+ uint64_t base;
+ IOCounts totals;
+ GHashTable *detail;
+} DeviceCounts;
+
+static GMutex lock;
+static GHashTable *devices;
+
+/* track the access pattern to a piece of HW */
+static bool pattern;
+/* track the source address of access to HW */
+static bool source;
+/* track only matched regions of HW */
+static bool check_match;
+static gchar **matches;
+
+static enum qemu_plugin_mem_rw rw = QEMU_PLUGIN_MEM_RW;
+
+static inline bool track_reads(void)
+{
+ return rw == QEMU_PLUGIN_MEM_RW || rw == QEMU_PLUGIN_MEM_R;
+}
+
+static inline bool track_writes(void)
+{
+ return rw == QEMU_PLUGIN_MEM_RW || rw == QEMU_PLUGIN_MEM_W;
+}
+
+static void plugin_init(void)
+{
+ devices = g_hash_table_new(NULL, NULL);
+}
+
+static gint sort_cmp(gconstpointer a, gconstpointer b)
+{
+ DeviceCounts *ea = (DeviceCounts *) a;
+ DeviceCounts *eb = (DeviceCounts *) b;
+ return ea->totals.reads + ea->totals.writes >
+ eb->totals.reads + eb->totals.writes ? -1 : 1;
+}
+
+static gint sort_loc(gconstpointer a, gconstpointer b)
+{
+ IOLocationCounts *ea = (IOLocationCounts *) a;
+ IOLocationCounts *eb = (IOLocationCounts *) b;
+ return ea->off_or_pc > eb->off_or_pc;
+}
+
+static void fmt_iocount_record(GString *s, IOCounts *rec)
+{
+ if (track_reads()) {
+ g_string_append_printf(s, ", %"PRIx64", %"PRId64,
+ rec->cpu_read, rec->reads);
+ }
+ if (track_writes()) {
+ g_string_append_printf(s, ", %"PRIx64", %"PRId64,
+ rec->cpu_write, rec->writes);
+ }
+}
+
+static void fmt_dev_record(GString *s, DeviceCounts *rec)
+{
+ g_string_append_printf(s, "%s, 0x%"PRIx64,
+ rec->name, rec->base);
+ fmt_iocount_record(s, &rec->totals);
+ g_string_append_c(s, '\n');
+}
+
+static void plugin_exit(qemu_plugin_id_t id, void *p)
+{
+ g_autoptr(GString) report = g_string_new("");
+ GList *counts;
+
+ if (!(pattern || source)) {
+ g_string_printf(report, "Device, Address");
+ if (track_reads()) {
+ g_string_append_printf(report, ", RCPUs, Reads");
+ }
+ if (track_writes()) {
+ g_string_append_printf(report, ", WCPUs, Writes");
+ }
+ g_string_append_c(report, '\n');
+ }
+
+ counts = g_hash_table_get_values(devices);
+ if (counts && g_list_next(counts)) {
+ GList *it;
+
+ it = g_list_sort(counts, sort_cmp);
+
+ while (it) {
+ DeviceCounts *rec = (DeviceCounts *) it->data;
+ if (rec->detail) {
+ GList *accesses = g_hash_table_get_values(rec->detail);
+ GList *io_it = g_list_sort(accesses, sort_loc);
+ const char *prefix = pattern ? "off" : "pc";
+ g_string_append_printf(report, "%s @ 0x%"PRIx64"\n",
+ rec->name, rec->base);
+ while (io_it) {
+ IOLocationCounts *loc = (IOLocationCounts *) io_it->data;
+ g_string_append_printf(report, " %s:%08"PRIx64,
+ prefix, loc->off_or_pc);
+ fmt_iocount_record(report, &loc->counts);
+ g_string_append_c(report, '\n');
+ io_it = io_it->next;
+ }
+ } else {
+ fmt_dev_record(report, rec);
+ }
+ it = it->next;
+ };
+ g_list_free(it);
+ }
+
+ qemu_plugin_outs(report->str);
+}
+
+static DeviceCounts *new_count(const char *name, uint64_t base)
+{
+ DeviceCounts *count = g_new0(DeviceCounts, 1);
+ count->name = name;
+ count->base = base;
+ if (pattern || source) {
+ count->detail = g_hash_table_new(NULL, NULL);
+ }
+ g_hash_table_insert(devices, (gpointer) name, count);
+ return count;
+}
+
+static IOLocationCounts *new_location(GHashTable *table, uint64_t off_or_pc)
+{
+ IOLocationCounts *loc = g_new0(IOLocationCounts, 1);
+ loc->off_or_pc = off_or_pc;
+ g_hash_table_insert(table, (gpointer) off_or_pc, loc);
+ return loc;
+}
+
+static void hwprofile_match_hit(DeviceCounts *rec, uint64_t off)
+{
+ g_autoptr(GString) report = g_string_new("hwprofile: match @ offset");
+ g_string_append_printf(report, "%"PRIx64", previous hits\n", off);
+ fmt_dev_record(report, rec);
+ qemu_plugin_outs(report->str);
+}
+
+static void inc_count(IOCounts *count, bool is_write, unsigned int cpu_index)
+{
+ if (is_write) {
+ count->writes++;
+ count->cpu_write |= (1 << cpu_index);
+ } else {
+ count->reads++;
+ count->cpu_read |= (1 << cpu_index);
+ }
+}
+
+static void vcpu_haddr(unsigned int cpu_index, qemu_plugin_meminfo_t meminfo,
+ uint64_t vaddr, void *udata)
+{
+ struct qemu_plugin_hwaddr *hwaddr = qemu_plugin_get_hwaddr(meminfo, vaddr);
+
+ if (!hwaddr || !qemu_plugin_hwaddr_is_io(hwaddr)) {
+ return;
+ } else {
+ const char *name = qemu_plugin_hwaddr_device_name(hwaddr);
+ uint64_t off = qemu_plugin_hwaddr_phys_addr(hwaddr);
+ bool is_write = qemu_plugin_mem_is_store(meminfo);
+ DeviceCounts *counts;
+
+ g_mutex_lock(&lock);
+ counts = (DeviceCounts *) g_hash_table_lookup(devices, name);
+
+ if (!counts) {
+ uint64_t base = vaddr - off;
+ counts = new_count(name, base);
+ }
+
+ if (check_match) {
+ if (g_strv_contains((const char * const *)matches, counts->name)) {
+ hwprofile_match_hit(counts, off);
+ inc_count(&counts->totals, is_write, cpu_index);
+ }
+ } else {
+ inc_count(&counts->totals, is_write, cpu_index);
+ }
+
+ /* either track offsets or source of access */
+ if (source) {
+ off = (uint64_t) udata;
+ }
+
+ if (pattern || source) {
+ IOLocationCounts *io_count = g_hash_table_lookup(counts->detail,
+ (gpointer) off);
+ if (!io_count) {
+ io_count = new_location(counts->detail, off);
+ }
+ inc_count(&io_count->counts, is_write, cpu_index);
+ }
+
+ g_mutex_unlock(&lock);
+ }
+}
+
+static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
+{
+ size_t n = qemu_plugin_tb_n_insns(tb);
+ size_t i;
+
+ for (i = 0; i < n; i++) {
+ struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i);
+ gpointer udata = (gpointer) (source ? qemu_plugin_insn_vaddr(insn) : 0);
+ qemu_plugin_register_vcpu_mem_cb(insn, vcpu_haddr,
+ QEMU_PLUGIN_CB_NO_REGS,
+ rw, udata);
+ }
+}
+
+QEMU_PLUGIN_EXPORT
+int qemu_plugin_install(qemu_plugin_id_t id, const qemu_info_t *info,
+ int argc, char **argv)
+{
+ int i;
+ g_autoptr(GString) matches_raw = g_string_new("");
+
+ for (i = 0; i < argc; i++) {
+ char *opt = argv[i];
+ g_autofree char **tokens = g_strsplit(opt, "=", 2);
+
+ if (g_strcmp0(tokens[0], "track") == 0) {
+ if (g_strcmp0(tokens[1], "read") == 0) {
+ rw = QEMU_PLUGIN_MEM_R;
+ } else if (g_strcmp0(tokens[1], "write") == 0) {
+ rw = QEMU_PLUGIN_MEM_W;
+ } else {
+ fprintf(stderr, "invalid value for track: %s\n", tokens[1]);
+ return -1;
+ }
+ } else if (g_strcmp0(tokens[0], "pattern") == 0) {
+ if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &pattern)) {
+ fprintf(stderr, "boolean argument parsing failed: %s\n", opt);
+ return -1;
+ }
+ } else if (g_strcmp0(tokens[0], "source") == 0) {
+ if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &source)) {
+ fprintf(stderr, "boolean argument parsing failed: %s\n", opt);
+ return -1;
+ }
+ } else if (g_strcmp0(tokens[0], "match") == 0) {
+ check_match = true;
+ g_string_append_printf(matches_raw, "%s,", tokens[1]);
+ } else {
+ fprintf(stderr, "option parsing failed: %s\n", opt);
+ return -1;
+ }
+ }
+ if (check_match) {
+ matches = g_strsplit(matches_raw->str, ",", -1);
+ }
+
+ if (source && pattern) {
+ fprintf(stderr, "can only currently track either source or pattern.\n");
+ return -1;
+ }
+
+ if (!info->system_emulation) {
+ fprintf(stderr, "hwprofile: plugin only useful for system emulation\n");
+ return -1;
+ }
+
+ /* Just warn about overflow */
+ if (info->system.smp_vcpus > 64 ||
+ info->system.max_vcpus > 64) {
+ fprintf(stderr, "hwprofile: can only track up to 64 CPUs\n");
+ }
+
+ plugin_init();
+
+ qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
+ qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
+ return 0;
+}
diff --git a/contrib/plugins/lockstep.c b/contrib/plugins/lockstep.c
new file mode 100644
index 000000000..a41ffe83f
--- /dev/null
+++ b/contrib/plugins/lockstep.c
@@ -0,0 +1,356 @@
+/*
+ * Lockstep Execution Plugin
+ *
+ * Allows you to execute two QEMU instances in lockstep and report
+ * when their execution diverges. This is mainly useful for developers
+ * who want to see where a change to TCG code generation has
+ * introduced a subtle and hard to find bug.
+ *
+ * Caveats:
+ * - single-threaded linux-user apps only with non-deterministic syscalls
+ * - no MTTCG enabled system emulation (icount may help)
+ *
+ * While icount makes things more deterministic it doesn't mean a
+ * particular run may execute the exact same sequence of blocks. An
+ * asynchronous event (for example X11 graphics update) may cause a
+ * block to end early and a new partial block to start. This means
+ * serial only test cases are a better bet. -d nochain may also help.
+ *
+ * This code is not thread safe!
+ *
+ * Copyright (c) 2020 Linaro Ltd
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include <glib.h>
+#include <inttypes.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <stdio.h>
+#include <errno.h>
+
+#include <qemu-plugin.h>
+
+QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
+
+/* saved so we can uninstall later */
+static qemu_plugin_id_t our_id;
+
+static unsigned long bb_count;
+static unsigned long insn_count;
+
+/* Information about a translated block */
+typedef struct {
+ uint64_t pc;
+ uint64_t insns;
+} BlockInfo;
+
+/* Information about an execution state in the log */
+typedef struct {
+ BlockInfo *block;
+ unsigned long insn_count;
+ unsigned long block_count;
+} ExecInfo;
+
+/* The execution state we compare */
+typedef struct {
+ uint64_t pc;
+ unsigned long insn_count;
+} ExecState;
+
+typedef struct {
+ GSList *log_pos;
+ int distance;
+} DivergeState;
+
+/* list of translated block info */
+static GSList *blocks;
+
+/* execution log and points of divergence */
+static GSList *log, *divergence_log;
+
+static int socket_fd;
+static char *path_to_unlink;
+
+static bool verbose;
+
+static void plugin_cleanup(qemu_plugin_id_t id)
+{
+ /* Free our block data */
+ g_slist_free_full(blocks, &g_free);
+ g_slist_free_full(log, &g_free);
+ g_slist_free(divergence_log);
+
+ close(socket_fd);
+ if (path_to_unlink) {
+ unlink(path_to_unlink);
+ }
+}
+
+static void plugin_exit(qemu_plugin_id_t id, void *p)
+{
+ g_autoptr(GString) out = g_string_new("No divergence :-)\n");
+ g_string_append_printf(out, "Executed %ld/%d blocks\n",
+ bb_count, g_slist_length(log));
+ g_string_append_printf(out, "Executed ~%ld instructions\n", insn_count);
+ qemu_plugin_outs(out->str);
+
+ plugin_cleanup(id);
+}
+
+static void report_divergance(ExecState *us, ExecState *them)
+{
+ DivergeState divrec = { log, 0 };
+ g_autoptr(GString) out = g_string_new("");
+ bool diverged = false;
+
+ /*
+ * If we have diverged before did we get back on track or are we
+ * totally loosing it?
+ */
+ if (divergence_log) {
+ DivergeState *last = (DivergeState *) divergence_log->data;
+ GSList *entry;
+
+ for (entry = log; g_slist_next(entry); entry = g_slist_next(entry)) {
+ if (entry == last->log_pos) {
+ break;
+ }
+ divrec.distance++;
+ }
+
+ /*
+ * If the last two records are so close it is likely we will
+ * not recover synchronisation with the other end.
+ */
+ if (divrec.distance == 1 && last->distance == 1) {
+ diverged = true;
+ }
+ }
+ divergence_log = g_slist_prepend(divergence_log,
+ g_memdup(&divrec, sizeof(divrec)));
+
+ /* Output short log entry of going out of sync... */
+ if (verbose || divrec.distance == 1 || diverged) {
+ g_string_printf(out, "@ 0x%016lx vs 0x%016lx (%d/%d since last)\n",
+ us->pc, them->pc, g_slist_length(divergence_log),
+ divrec.distance);
+ qemu_plugin_outs(out->str);
+ }
+
+ if (diverged) {
+ int i;
+ GSList *entry;
+
+ g_string_printf(out, "Δ insn_count @ 0x%016lx (%ld) vs 0x%016lx (%ld)\n",
+ us->pc, us->insn_count, them->pc, them->insn_count);
+
+ for (entry = log, i = 0;
+ g_slist_next(entry) && i < 5;
+ entry = g_slist_next(entry), i++) {
+ ExecInfo *prev = (ExecInfo *) entry->data;
+ g_string_append_printf(out,
+ " previously @ 0x%016lx/%ld (%ld insns)\n",
+ prev->block->pc, prev->block->insns,
+ prev->insn_count);
+ }
+ qemu_plugin_outs(out->str);
+ qemu_plugin_outs("too much divergence... giving up.");
+ qemu_plugin_uninstall(our_id, plugin_cleanup);
+ }
+}
+
+static void vcpu_tb_exec(unsigned int cpu_index, void *udata)
+{
+ BlockInfo *bi = (BlockInfo *) udata;
+ ExecState us, them;
+ ssize_t bytes;
+ ExecInfo *exec;
+
+ us.pc = bi->pc;
+ us.insn_count = insn_count;
+
+ /*
+ * Write our current position to the other end. If we fail the
+ * other end has probably died and we should shut down gracefully.
+ */
+ bytes = write(socket_fd, &us, sizeof(ExecState));
+ if (bytes < sizeof(ExecState)) {
+ qemu_plugin_outs(bytes < 0 ?
+ "problem writing to socket" :
+ "wrote less than expected to socket");
+ qemu_plugin_uninstall(our_id, plugin_cleanup);
+ return;
+ }
+
+ /*
+ * Now read where our peer has reached. Again a failure probably
+ * indicates the other end died and we should close down cleanly.
+ */
+ bytes = read(socket_fd, &them, sizeof(ExecState));
+ if (bytes < sizeof(ExecState)) {
+ qemu_plugin_outs(bytes < 0 ?
+ "problem reading from socket" :
+ "read less than expected");
+ qemu_plugin_uninstall(our_id, plugin_cleanup);
+ return;
+ }
+
+ /*
+ * Compare and report if we have diverged.
+ */
+ if (us.pc != them.pc) {
+ report_divergance(&us, &them);
+ }
+
+ /*
+ * Assume this block will execute fully and record it
+ * in the execution log.
+ */
+ insn_count += bi->insns;
+ bb_count++;
+ exec = g_new0(ExecInfo, 1);
+ exec->block = bi;
+ exec->insn_count = insn_count;
+ exec->block_count = bb_count;
+ log = g_slist_prepend(log, exec);
+}
+
+static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
+{
+ BlockInfo *bi = g_new0(BlockInfo, 1);
+ bi->pc = qemu_plugin_tb_vaddr(tb);
+ bi->insns = qemu_plugin_tb_n_insns(tb);
+
+ /* save a reference so we can free later */
+ blocks = g_slist_prepend(blocks, bi);
+ qemu_plugin_register_vcpu_tb_exec_cb(tb, vcpu_tb_exec,
+ QEMU_PLUGIN_CB_NO_REGS, (void *)bi);
+}
+
+
+/*
+ * Instead of encoding master/slave status into what is essentially
+ * two peers we shall just take the simple approach of checking for
+ * the existence of the pipe and assuming if it's not there we are the
+ * first process.
+ */
+static bool setup_socket(const char *path)
+{
+ struct sockaddr_un sockaddr;
+ int fd;
+
+ fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (fd < 0) {
+ perror("create socket");
+ return false;
+ }
+
+ sockaddr.sun_family = AF_UNIX;
+ g_strlcpy(sockaddr.sun_path, path, sizeof(sockaddr.sun_path) - 1);
+ if (bind(fd, (struct sockaddr *)&sockaddr, sizeof(sockaddr)) < 0) {
+ perror("bind socket");
+ close(fd);
+ return false;
+ }
+
+ /* remember to clean-up */
+ path_to_unlink = g_strdup(path);
+
+ if (listen(fd, 1) < 0) {
+ perror("listen socket");
+ close(fd);
+ return false;
+ }
+
+ socket_fd = accept(fd, NULL, NULL);
+ if (socket_fd < 0 && errno != EINTR) {
+ perror("accept socket");
+ close(fd);
+ return false;
+ }
+
+ qemu_plugin_outs("setup_socket::ready\n");
+
+ close(fd);
+ return true;
+}
+
+static bool connect_socket(const char *path)
+{
+ int fd;
+ struct sockaddr_un sockaddr;
+
+ fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (fd < 0) {
+ perror("create socket");
+ return false;
+ }
+
+ sockaddr.sun_family = AF_UNIX;
+ g_strlcpy(sockaddr.sun_path, path, sizeof(sockaddr.sun_path) - 1);
+
+ if (connect(fd, (struct sockaddr *)&sockaddr, sizeof(sockaddr)) < 0) {
+ perror("failed to connect");
+ close(fd);
+ return false;
+ }
+
+ qemu_plugin_outs("connect_socket::ready\n");
+
+ socket_fd = fd;
+ return true;
+}
+
+static bool setup_unix_socket(const char *path)
+{
+ if (g_file_test(path, G_FILE_TEST_EXISTS)) {
+ return connect_socket(path);
+ } else {
+ return setup_socket(path);
+ }
+}
+
+
+QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
+ const qemu_info_t *info,
+ int argc, char **argv)
+{
+ int i;
+ g_autofree char *sock_path = NULL;
+
+ for (i = 0; i < argc; i++) {
+ char *p = argv[i];
+ g_autofree char **tokens = g_strsplit(p, "=", 2);
+
+ if (g_strcmp0(tokens[0], "verbose") == 0) {
+ if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &verbose)) {
+ fprintf(stderr, "boolean argument parsing failed: %s\n", p);
+ return -1;
+ }
+ } else if (g_strcmp0(tokens[0], "sockpath") == 0) {
+ sock_path = tokens[1];
+ } else {
+ fprintf(stderr, "option parsing failed: %s\n", p);
+ return -1;
+ }
+ }
+
+ if (sock_path == NULL) {
+ fprintf(stderr, "Need a socket path to talk to other instance.\n");
+ return -1;
+ }
+
+ if (!setup_unix_socket(sock_path)) {
+ fprintf(stderr, "Failed to setup socket for communications.\n");
+ return -1;
+ }
+
+ our_id = id;
+
+ qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
+ qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
+ return 0;
+}