6 files changed, 3200 insertions, 0 deletions
diff --git a/tests/fp/fp-bench.c b/tests/fp/fp-bench.c
new file mode 100644
index 000000000..c24baf853
--- /dev/null
+++ b/tests/fp/fp-bench.c
@@ -0,0 +1,709 @@
+/*
+ * fp-bench.c - A collection of simple floating point microbenchmarks.
+ *
+ * Copyright (C) 2018, Emilio G. Cota <cota@braap.org>
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ */
+#ifndef HW_POISON_H
+#error Must define HW_POISON_H to work around TARGET_* poisoning
+#endif
+
+#include "qemu/osdep.h"
+#include <math.h>
+#include <fenv.h>
+#include "qemu/timer.h"
+#include "qemu/int128.h"
+#include "fpu/softfloat.h"
+
+/* amortize the computation of random inputs */
+#define OPS_PER_ITER     50000
+
+#define MAX_OPERANDS 3
+
+#define SEED_A 0xdeadfacedeadface
+#define SEED_B 0xbadc0feebadc0fee
+#define SEED_C 0xbeefdeadbeefdead
+
+enum op {
+    OP_ADD,
+    OP_SUB,
+    OP_MUL,
+    OP_DIV,
+    OP_FMA,
+    OP_SQRT,
+    OP_CMP,
+    OP_MAX_NR,
+};
+
+static const char * const op_names[] = {
+    [OP_ADD] = "add",
+    [OP_SUB] = "sub",
+    [OP_MUL] = "mul",
+    [OP_DIV] = "div",
+    [OP_FMA] = "mulAdd",
+    [OP_SQRT] = "sqrt",
+    [OP_CMP] = "cmp",
+    [OP_MAX_NR] = NULL,
+};
+
+enum precision {
+    PREC_SINGLE,
+    PREC_DOUBLE,
+    PREC_QUAD,
+    PREC_FLOAT32,
+    PREC_FLOAT64,
+    PREC_FLOAT128,
+    PREC_MAX_NR,
+};
+
+enum rounding {
+    ROUND_EVEN,
+    ROUND_ZERO,
+    ROUND_DOWN,
+    ROUND_UP,
+    ROUND_TIEAWAY,
+    N_ROUND_MODES,
+};
+
+static const char * const round_names[] = {
+    [ROUND_EVEN] = "even",
+    [ROUND_ZERO] = "zero",
+    [ROUND_DOWN] = "down",
+    [ROUND_UP] = "up",
+    [ROUND_TIEAWAY] = "tieaway",
+};
+
+enum tester {
+    TESTER_SOFT,
+    TESTER_HOST,
+    TESTER_MAX_NR,
+};
+
+static const char * const tester_names[] = {
+    [TESTER_SOFT] = "soft",
+    [TESTER_HOST] = "host",
+    [TESTER_MAX_NR] = NULL,
+};
+
+union fp {
+    float f;
+    double d;
+    float32 f32;
+    float64 f64;
+    float128 f128;
+    uint64_t u64;
+};
+
+struct op_state;
+
+typedef float (*float_func_t)(const struct op_state *s);
+typedef double (*double_func_t)(const struct op_state *s);
+
+union fp_func {
+    float_func_t float_func;
+    double_func_t double_func;
+};
+
+typedef void (*bench_func_t)(void);
+
+struct op_desc {
+    const char * const name;
+};
+
+#define DEFAULT_DURATION_SECS 1
+
+static uint64_t random_ops[MAX_OPERANDS] = {
+    SEED_A, SEED_B, SEED_C,
+};
+
+static float128 random_quad_ops[MAX_OPERANDS] = {
+    {SEED_A, SEED_B}, {SEED_B, SEED_C}, {SEED_C, SEED_A},
+};
+static float_status soft_status;
+static enum precision precision;
+static enum op operation;
+static enum tester tester;
+static uint64_t n_completed_ops;
+static unsigned int duration = DEFAULT_DURATION_SECS;
+static int64_t ns_elapsed;
+/* disable optimizations with volatile */
+static volatile union fp res;
+
+/*
+ * From: https://en.wikipedia.org/wiki/Xorshift
+ * This is faster than rand_r(), and gives us a wider range (RAND_MAX is only
+ * guaranteed to be >= INT_MAX).
+ */
+static uint64_t xorshift64star(uint64_t x)
+{
+    x ^= x >> 12; /* a */
+    x ^= x << 25; /* b */
+    x ^= x >> 27; /* c */
+    return x * UINT64_C(2685821657736338717);
+}
+
+static void update_random_ops(int n_ops, enum precision prec)
+{
+    int i;
+
+    for (i = 0; i < n_ops; i++) {
+
+        switch (prec) {
+        case PREC_SINGLE:
+        case PREC_FLOAT32:
+        {
+            uint64_t r = random_ops[i];
+            do {
+                r = xorshift64star(r);
+            } while (!float32_is_normal(r));
+            random_ops[i] = r;
+            break;
+        }
+        case PREC_DOUBLE:
+        case PREC_FLOAT64:
+        {
+            uint64_t r = random_ops[i];
+            do {
+                r = xorshift64star(r);
+            } while (!float64_is_normal(r));
+            random_ops[i] = r;
+            break;
+        }
+        case PREC_QUAD:
+        case PREC_FLOAT128:
+        {
+            float128 r = random_quad_ops[i];
+            uint64_t hi = r.high;
+            uint64_t lo = r.low;
+            do {
+                hi = xorshift64star(hi);
+                lo = xorshift64star(lo);
+                r = make_float128(hi, lo);
+            } while (!float128_is_normal(r));
+            random_quad_ops[i] = r;
+            break;
+        }
+        default:
+            g_assert_not_reached();
+        }
+    }
+}
+
+static void fill_random(union fp *ops, int n_ops, enum precision prec,
+                        bool no_neg)
+{
+    int i;
+
+    for (i = 0; i < n_ops; i++) {
+        switch (prec) {
+        case PREC_SINGLE:
+        case PREC_FLOAT32:
+            ops[i].f32 = make_float32(random_ops[i]);
+            if (no_neg && float32_is_neg(ops[i].f32)) {
+                ops[i].f32 = float32_chs(ops[i].f32);
+            }
+            break;
+        case PREC_DOUBLE:
+        case PREC_FLOAT64:
+            ops[i].f64 = make_float64(random_ops[i]);
+            if (no_neg && float64_is_neg(ops[i].f64)) {
+                ops[i].f64 = float64_chs(ops[i].f64);
+            }
+            break;
+        case PREC_QUAD:
+        case PREC_FLOAT128:
+            ops[i].f128 = random_quad_ops[i];
+            if (no_neg && float128_is_neg(ops[i].f128)) {
+                ops[i].f128 = float128_chs(ops[i].f128);
+            }
+            break;
+        default:
+            g_assert_not_reached();
+        }
+    }
+}
+
+/*
+ * The main benchmark function. Instead of (ab)using macros, we rely
+ * on the compiler to unfold this at compile-time.
+ */
+static void bench(enum precision prec, enum op op, int n_ops, bool no_neg)
+{
+    int64_t tf = get_clock() + duration * 1000000000LL;
+
+    while (get_clock() < tf) {
+        union fp ops[MAX_OPERANDS];
+        int64_t t0;
+        int i;
+
+        update_random_ops(n_ops, prec);
+        switch (prec) {
+        case PREC_SINGLE:
+            fill_random(ops, n_ops, prec, no_neg);
+            t0 = get_clock();
+            for (i = 0; i < OPS_PER_ITER; i++) {
+                float a = ops[0].f;
+                float b = ops[1].f;
+                float c = ops[2].f;
+
+                switch (op) {
+                case OP_ADD:
+                    res.f = a + b;
+                    break;
+                case OP_SUB:
+                    res.f = a - b;
+                    break;
+                case OP_MUL:
+                    res.f = a * b;
+                    break;
+                case OP_DIV:
+                    res.f = a / b;
+                    break;
+                case OP_FMA:
+                    res.f = fmaf(a, b, c);
+                    break;
+                case OP_SQRT:
+                    res.f = sqrtf(a);
+                    break;
+                case OP_CMP:
+                    res.u64 = isgreater(a, b);
+                    break;
+                default:
+                    g_assert_not_reached();
+                }
+            }
+            break;
+        case PREC_DOUBLE:
+            fill_random(ops, n_ops, prec, no_neg);
+            t0 = get_clock();
+            for (i = 0; i < OPS_PER_ITER; i++) {
+                double a = ops[0].d;
+                double b = ops[1].d;
+                double c = ops[2].d;
+
+                switch (op) {
+                case OP_ADD:
+                    res.d = a + b;
+                    break;
+                case OP_SUB:
+                    res.d = a - b;
+                    break;
+                case OP_MUL:
+                    res.d = a * b;
+                    break;
+                case OP_DIV:
+                    res.d = a / b;
+                    break;
+                case OP_FMA:
+                    res.d = fma(a, b, c);
+                    break;
+                case OP_SQRT:
+                    res.d = sqrt(a);
+                    break;
+                case OP_CMP:
+                    res.u64 = isgreater(a, b);
+                    break;
+                default:
+                    g_assert_not_reached();
+                }
+            }
+            break;
+        case PREC_FLOAT32:
+            fill_random(ops, n_ops, prec, no_neg);
+            t0 = get_clock();
+            for (i = 0; i < OPS_PER_ITER; i++) {
+                float32 a = ops[0].f32;
+                float32 b = ops[1].f32;
+                float32 c = ops[2].f32;
+
+                switch (op) {
+                case OP_ADD:
+                    res.f32 = float32_add(a, b, &soft_status);
+                    break;
+                case OP_SUB:
+                    res.f32 = float32_sub(a, b, &soft_status);
+                    break;
+                case OP_MUL:
+                    res.f = float32_mul(a, b, &soft_status);
+                    break;
+                case OP_DIV:
+                    res.f32 = float32_div(a, b, &soft_status);
+                    break;
+                case OP_FMA:
+                    res.f32 = float32_muladd(a, b, c, 0, &soft_status);
+                    break;
+                case OP_SQRT:
+                    res.f32 = float32_sqrt(a, &soft_status);
+                    break;
+                case OP_CMP:
+                    res.u64 = float32_compare_quiet(a, b, &soft_status);
+                    break;
+                default:
+                    g_assert_not_reached();
+                }
+            }
+            break;
+        case PREC_FLOAT64:
+            fill_random(ops, n_ops, prec, no_neg);
+            t0 = get_clock();
+            for (i = 0; i < OPS_PER_ITER; i++) {
+                float64 a = ops[0].f64;
+                float64 b = ops[1].f64;
+                float64 c = ops[2].f64;
+
+                switch (op) {
+                case OP_ADD:
+                    res.f64 = float64_add(a, b, &soft_status);
+                    break;
+                case OP_SUB:
+                    res.f64 = float64_sub(a, b, &soft_status);
+                    break;
+                case OP_MUL:
+                    res.f = float64_mul(a, b, &soft_status);
+                    break;
+                case OP_DIV:
+                    res.f64 = float64_div(a, b, &soft_status);
+                    break;
+                case OP_FMA:
+                    res.f64 = float64_muladd(a, b, c, 0, &soft_status);
+                    break;
+                case OP_SQRT:
+                    res.f64 = float64_sqrt(a, &soft_status);
+                    break;
+                case OP_CMP:
+                    res.u64 = float64_compare_quiet(a, b, &soft_status);
+                    break;
+                default:
+                    g_assert_not_reached();
+                }
+            }
+            break;
+        case PREC_FLOAT128:
+            fill_random(ops, n_ops, prec, no_neg);
+            t0 = get_clock();
+            for (i = 0; i < OPS_PER_ITER; i++) {
+                float128 a = ops[0].f128;
+                float128 b = ops[1].f128;
+                float128 c = ops[2].f128;
+
+                switch (op) {
+                case OP_ADD:
+                    res.f128 = float128_add(a, b, &soft_status);
+                    break;
+                case OP_SUB:
+                    res.f128 = float128_sub(a, b, &soft_status);
+                    break;
+                case OP_MUL:
+                    res.f128 = float128_mul(a, b, &soft_status);
+                    break;
+                case OP_DIV:
+                    res.f128 = float128_div(a, b, &soft_status);
+                    break;
+                case OP_FMA:
+                    res.f128 = float128_muladd(a, b, c, 0, &soft_status);
+                    break;
+                case OP_SQRT:
+                    res.f128 = float128_sqrt(a, &soft_status);
+                    break;
+                case OP_CMP:
+                    res.u64 = float128_compare_quiet(a, b, &soft_status);
+                    break;
+                default:
+                    g_assert_not_reached();
+                }
+            }
+            break;
+        default:
+            g_assert_not_reached();
+        }
+        ns_elapsed += get_clock() - t0;
+        n_completed_ops += OPS_PER_ITER;
+    }
+}
+
+#define GEN_BENCH(name, type, prec, op, n_ops)          \
+    static void __attribute__((flatten)) name(void)     \
+    {                                                   \
+        bench(prec, op, n_ops, false);                  \
+    }
+
+#define GEN_BENCH_NO_NEG(name, type, prec, op, n_ops)   \
+    static void __attribute__((flatten)) name(void)     \
+    {                                                   \
+        bench(prec, op, n_ops, true);                   \
+    }
+
+#define GEN_BENCH_ALL_TYPES(opname, op, n_ops)                          \
+    GEN_BENCH(bench_ ## opname ## _float, float, PREC_SINGLE, op, n_ops) \
+    GEN_BENCH(bench_ ## opname ## _double, double, PREC_DOUBLE, op, n_ops) \
+    GEN_BENCH(bench_ ## opname ## _float32, float32, PREC_FLOAT32, op, n_ops) \
+    GEN_BENCH(bench_ ## opname ## _float64, float64, PREC_FLOAT64, op, n_ops) \
+    GEN_BENCH(bench_ ## opname ## _float128, float128, PREC_FLOAT128, op, n_ops)
+
+GEN_BENCH_ALL_TYPES(add, OP_ADD, 2)
+GEN_BENCH_ALL_TYPES(sub, OP_SUB, 2)
+GEN_BENCH_ALL_TYPES(mul, OP_MUL, 2)
+GEN_BENCH_ALL_TYPES(div, OP_DIV, 2)
+GEN_BENCH_ALL_TYPES(fma, OP_FMA, 3)
+GEN_BENCH_ALL_TYPES(cmp, OP_CMP, 2)
+#undef GEN_BENCH_ALL_TYPES
+
+#define GEN_BENCH_ALL_TYPES_NO_NEG(name, op, n)                         \
+    GEN_BENCH_NO_NEG(bench_ ## name ## _float, float, PREC_SINGLE, op, n) \
+    GEN_BENCH_NO_NEG(bench_ ## name ## _double, double, PREC_DOUBLE, op, n) \
+    GEN_BENCH_NO_NEG(bench_ ## name ## _float32, float32, PREC_FLOAT32, op, n) \
+    GEN_BENCH_NO_NEG(bench_ ## name ## _float64, float64, PREC_FLOAT64, op, n) \
+    GEN_BENCH_NO_NEG(bench_ ## name ## _float128, float128, PREC_FLOAT128, op, n)
+
+GEN_BENCH_ALL_TYPES_NO_NEG(sqrt, OP_SQRT, 1)
+#undef GEN_BENCH_ALL_TYPES_NO_NEG
+
+#undef GEN_BENCH_NO_NEG
+#undef GEN_BENCH
+
+#define GEN_BENCH_FUNCS(opname, op)                             \
+    [op] = {                                                    \
+        [PREC_SINGLE]    = bench_ ## opname ## _float,          \
+        [PREC_DOUBLE]    = bench_ ## opname ## _double,         \
+        [PREC_FLOAT32]   = bench_ ## opname ## _float32,        \
+        [PREC_FLOAT64]   = bench_ ## opname ## _float64,        \
+        [PREC_FLOAT128]   = bench_ ## opname ## _float128,      \
+    }
+
+static const bench_func_t bench_funcs[OP_MAX_NR][PREC_MAX_NR] = {
+    GEN_BENCH_FUNCS(add, OP_ADD),
+    GEN_BENCH_FUNCS(sub, OP_SUB),
+    GEN_BENCH_FUNCS(mul, OP_MUL),
+    GEN_BENCH_FUNCS(div, OP_DIV),
+    GEN_BENCH_FUNCS(fma, OP_FMA),
+    GEN_BENCH_FUNCS(sqrt, OP_SQRT),
+    GEN_BENCH_FUNCS(cmp, OP_CMP),
+};
+
+#undef GEN_BENCH_FUNCS
+
+static void run_bench(void)
+{
+    bench_func_t f;
+
+    f = bench_funcs[operation][precision];
+    g_assert(f);
+    f();
+}
+
+/* @arr must be NULL-terminated */
+static int find_name(const char * const *arr, const char *name)
+{
+    int i;
+
+    for (i = 0; arr[i] != NULL; i++) {
+        if (strcmp(name, arr[i]) == 0) {
+            return i;
+        }
+    }
+    return -1;
+}
+
+static void usage_complete(int argc, char *argv[])
+{
+    gchar *op_list = g_strjoinv(", ", (gchar **)op_names);
+    gchar *tester_list = g_strjoinv(", ", (gchar **)tester_names);
+
+    fprintf(stderr, "Usage: %s [options]\n", argv[0]);
+    fprintf(stderr, "options:\n");
+    fprintf(stderr, " -d = duration, in seconds. Default: %d\n",
+            DEFAULT_DURATION_SECS);
+    fprintf(stderr, " -h = show this help message.\n");
+    fprintf(stderr, " -o = floating point operation (%s). Default: %s\n",
+            op_list, op_names[0]);
+    fprintf(stderr, " -p = floating point precision (single, double, quad[soft only]). "
+            "Default: single\n");
+    fprintf(stderr, " -r = rounding mode (even, zero, down, up, tieaway). "
+            "Default: even\n");
+    fprintf(stderr, " -t = tester (%s). Default: %s\n",
+            tester_list, tester_names[0]);
+    fprintf(stderr, " -z = flush inputs to zero (soft tester only). "
+            "Default: disabled\n");
+    fprintf(stderr, " -Z = flush output to zero (soft tester only). "
+            "Default: disabled\n");
+
+    g_free(tester_list);
+    g_free(op_list);
+}
+
+static int round_name_to_mode(const char *name)
+{
+    int i;
+
+    for (i = 0; i < N_ROUND_MODES; i++) {
+        if (!strcmp(round_names[i], name)) {
+            return i;
+        }
+    }
+    return -1;
+}
+
+static void QEMU_NORETURN die_host_rounding(enum rounding rounding)
+{
+    fprintf(stderr, "fatal: '%s' rounding not supported on this host\n",
+            round_names[rounding]);
+    exit(EXIT_FAILURE);
+}
+
+static void set_host_precision(enum rounding rounding)
+{
+    int rhost;
+
+    switch (rounding) {
+    case ROUND_EVEN:
+        rhost = FE_TONEAREST;
+        break;
+    case ROUND_ZERO:
+        rhost = FE_TOWARDZERO;
+        break;
+    case ROUND_DOWN:
+        rhost = FE_DOWNWARD;
+        break;
+    case ROUND_UP:
+        rhost = FE_UPWARD;
+        break;
+    case ROUND_TIEAWAY:
+        die_host_rounding(rounding);
+        return;
+    default:
+        g_assert_not_reached();
+    }
+
+    if (fesetround(rhost)) {
+        die_host_rounding(rounding);
+    }
+}
+
+static void set_soft_precision(enum rounding rounding)
+{
+    signed char mode;
+
+    switch (rounding) {
+    case ROUND_EVEN:
+        mode = float_round_nearest_even;
+        break;
+    case ROUND_ZERO:
+        mode = float_round_to_zero;
+        break;
+    case ROUND_DOWN:
+        mode = float_round_down;
+        break;
+    case ROUND_UP:
+        mode = float_round_up;
+        break;
+    case ROUND_TIEAWAY:
+        mode = float_round_ties_away;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    soft_status.float_rounding_mode = mode;
+}
+
+static void parse_args(int argc, char *argv[])
+{
+    int c;
+    int val;
+    int rounding = ROUND_EVEN;
+
+    for (;;) {
+        c = getopt(argc, argv, "d:ho:p:r:t:zZ");
+        if (c < 0) {
+            break;
+        }
+        switch (c) {
+        case 'd':
+            duration = atoi(optarg);
+            break;
+        case 'h':
+            usage_complete(argc, argv);
+            exit(EXIT_SUCCESS);
+        case 'o':
+            val = find_name(op_names, optarg);
+            if (val < 0) {
+                fprintf(stderr, "Unsupported op '%s'\n", optarg);
+                exit(EXIT_FAILURE);
+            }
+            operation = val;
+            break;
+        case 'p':
+            if (!strcmp(optarg, "single")) {
+                precision = PREC_SINGLE;
+            } else if (!strcmp(optarg, "double")) {
+                precision = PREC_DOUBLE;
+            } else if (!strcmp(optarg, "quad")) {
+                precision = PREC_QUAD;
+            } else {
+                fprintf(stderr, "Unsupported precision '%s'\n", optarg);
+                exit(EXIT_FAILURE);
+            }
+            break;
+        case 'r':
+            rounding = round_name_to_mode(optarg);
+            if (rounding < 0) {
+                fprintf(stderr, "fatal: invalid rounding mode '%s'\n", optarg);
+                exit(EXIT_FAILURE);
+            }
+            break;
+        case 't':
+            val = find_name(tester_names, optarg);
+            if (val < 0) {
+                fprintf(stderr, "Unsupported tester '%s'\n", optarg);
+                exit(EXIT_FAILURE);
+            }
+            tester = val;
+            break;
+        case 'z':
+            soft_status.flush_inputs_to_zero = 1;
+            break;
+        case 'Z':
+            soft_status.flush_to_zero = 1;
+            break;
+        }
+    }
+
+    /* set precision and rounding mode based on the tester */
+    switch (tester) {
+    case TESTER_HOST:
+        set_host_precision(rounding);
+        break;
+    case TESTER_SOFT:
+        set_soft_precision(rounding);
+        switch (precision) {
+        case PREC_SINGLE:
+            precision = PREC_FLOAT32;
+            break;
+        case PREC_DOUBLE:
+            precision = PREC_FLOAT64;
+            break;
+        case PREC_QUAD:
+            precision = PREC_FLOAT128;
+            break;
+        default:
+            g_assert_not_reached();
+        }
+        break;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static void pr_stats(void)
+{
+    printf("%.2f MFlops\n", (double)n_completed_ops / ns_elapsed * 1e3);
+}
+
+int main(int argc, char *argv[])
+{
+    parse_args(argc, argv);
+    run_bench();
+    pr_stats();
+    return 0;
+}
diff --git a/tests/fp/fp-test-log2.c b/tests/fp/fp-test-log2.c
new file mode 100644
index 000000000..4eae93eb7
--- /dev/null
+++ b/tests/fp/fp-test-log2.c
@@ -0,0 +1,118 @@
+/*
+ * fp-test-log2.c - test QEMU's softfloat log2
+ *
+ * Copyright (C) 2020, Linaro, Ltd.
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ */
+#ifndef HW_POISON_H
+#error Must define HW_POISON_H to work around TARGET_* poisoning
+#endif
+
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
+#include <math.h>
+#include "fpu/softfloat.h"
+
+typedef union {
+    double d;
+    float64 i;
+} ufloat64;
+
+static int errors;
+
+static void compare(ufloat64 test, ufloat64 real, ufloat64 soft, bool exact)
+{
+    int msb;
+    uint64_t ulp = UINT64_MAX;
+
+    if (real.i == soft.i) {
+        return;
+    }
+    msb = 63 - __builtin_clzll(real.i ^ soft.i);
+
+    if (msb < 52) {
+        if (real.i > soft.i) {
+            ulp = real.i - soft.i;
+        } else {
+            ulp = soft.i - real.i;
+        }
+    }
+
+    /* glibc allows 3 ulp error in its libm-test-ulps; allow 4 here */
+    if (!exact && ulp <= 4) {
+        return;
+    }
+
+    printf("test: %016" PRIx64 "  %+.13a\n"
+           "  sf: %016" PRIx64 "  %+.13a\n"
+           "libm: %016" PRIx64 "  %+.13a\n",
+           test.i, test.d, soft.i, soft.d, real.i, real.d);
+
+    if (msb == 63) {
+        printf("Error in sign!\n\n");
+    } else if (msb >= 52) {
+        printf("Error in exponent: %d\n\n",
+               (int)(soft.i >> 52) - (int)(real.i >> 52));
+    } else {
+        printf("Error in fraction: %" PRIu64 " ulp\n\n", ulp);
+    }
+
+    if (++errors == 20) {
+        exit(1);
+    }
+}
+
+int main(int ac, char **av)
+{
+    ufloat64 test, real, soft;
+    float_status qsf = {0};
+    int i;
+
+    set_float_rounding_mode(float_round_nearest_even, &qsf);
+
+    test.d = 0.0;
+    real.d = -__builtin_inf();
+    soft.i = float64_log2(test.i, &qsf);
+    compare(test, real, soft, true);
+
+    test.d = 1.0;
+    real.d = 0.0;
+    soft.i = float64_log2(test.i, &qsf);
+    compare(test, real, soft, true);
+
+    test.d = 2.0;
+    real.d = 1.0;
+    soft.i = float64_log2(test.i, &qsf);
+    compare(test, real, soft, true);
+
+    test.d = 4.0;
+    real.d = 2.0;
+    soft.i = float64_log2(test.i, &qsf);
+    compare(test, real, soft, true);
+
+    test.d = 0x1p64;
+    real.d = 64.0;
+    soft.i = float64_log2(test.i, &qsf);
+    compare(test, real, soft, true);
+
+    test.d = __builtin_inf();
+    real.d = __builtin_inf();
+    soft.i = float64_log2(test.i, &qsf);
+    compare(test, real, soft, true);
+
+    for (i = 0; i < 10000; ++i) {
+        test.d = drand48() + 1.0;    /* [1.0, 2.0) */
+        real.d = log2(test.d);
+        soft.i = float64_log2(test.i, &qsf);
+        compare(test, real, soft, false);
+
+        test.d = drand48() * 100;    /* [0.0, 100) */
+        real.d = log2(test.d);
+        soft.i = float64_log2(test.i, &qsf);
+        compare(test, real, soft, false);
+    }
+
+    return 0;
+}
diff --git a/tests/fp/fp-test.c b/tests/fp/fp-test.c
new file mode 100644
index 000000000..352dd71c4
--- /dev/null
+++ b/tests/fp/fp-test.c
@@ -0,0 +1,1021 @@
+/*
+ * fp-test.c - test QEMU's softfloat implementation using Berkeley's Testfloat
+ *
+ * Copyright (C) 2018, Emilio G. Cota <cota@braap.org>
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ *
+ * This file is derived from testfloat/source/testsoftfloat.c. Its copyright
+ * info follows:
+ *
+ * Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
+ * University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions, and the following disclaimer.
+ *
+ *  2. Redistributions in binary form must reproduce the above copyright notice,
+ *     this list of conditions, and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *  3. Neither the name of the University nor the names of its contributors may
+ *     be used to endorse or promote products derived from this software without
+ *     specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef HW_POISON_H
+#error Must define HW_POISON_H to work around TARGET_* poisoning
+#endif
+
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
+#include <math.h>
+#include "fpu/softfloat.h"
+#include "platform.h"
+
+#include "fail.h"
+#include "slowfloat.h"
+#include "functions.h"
+#include "genCases.h"
+#include "verCases.h"
+#include "writeCase.h"
+#include "testLoops.h"
+
+typedef float16_t (*abz_f16)(float16_t, float16_t);
+typedef bool (*ab_f16_z_bool)(float16_t, float16_t);
+typedef float32_t (*abz_f32)(float32_t, float32_t);
+typedef bool (*ab_f32_z_bool)(float32_t, float32_t);
+typedef float64_t (*abz_f64)(float64_t, float64_t);
+typedef bool (*ab_f64_z_bool)(float64_t, float64_t);
+typedef void (*abz_extF80M)(const extFloat80_t *, const extFloat80_t *,
+                            extFloat80_t *);
+typedef bool (*ab_extF80M_z_bool)(const extFloat80_t *, const extFloat80_t *);
+typedef void (*abz_f128M)(const float128_t *, const float128_t *, float128_t *);
+typedef bool (*ab_f128M_z_bool)(const float128_t *, const float128_t *);
+
+static const char * const round_mode_names[] = {
+    [ROUND_NEAR_EVEN] = "even",
+    [ROUND_MINMAG] = "zero",
+    [ROUND_MIN] = "down",
+    [ROUND_MAX] = "up",
+    [ROUND_NEAR_MAXMAG] = "tieaway",
+    [ROUND_ODD] = "odd",
+};
+static unsigned int *test_ops;
+static unsigned int n_test_ops;
+static unsigned int n_max_errors = 20;
+static unsigned int test_round_mode = ROUND_NEAR_EVEN;
+static unsigned int *round_modes;
+static unsigned int n_round_modes;
+static int test_level = 1;
+static uint8_t slow_init_flags;
+static uint8_t qemu_init_flags;
+
+/* qemu softfloat status */
+static float_status qsf;
+
+static const char commands_string[] =
+    "operations:\n"
+    "    <int>_to_<float>            <float>_add      <float>_eq\n"
+    "    <float>_to_<int>            <float>_sub      <float>_le\n"
+    "    <float>_to_<int>_r_minMag   <float>_mul      <float>_lt\n"
+    "    <float>_to_<float>          <float>_mulAdd   <float>_eq_signaling\n"
+    "    <float>_roundToInt          <float>_div      <float>_le_quiet\n"
+    "                                <float>_rem      <float>_lt_quiet\n"
+    "                                <float>_sqrt\n"
+    "    Where <int>: ui32, ui64, i32, i64\n"
+    "          <float>: f16, f32, f64, extF80, f128\n"
+    "    If no operation is provided, all the above are tested\n"
+    "options:\n"
+    " -e = max error count per test. Default: 20. Set no limit with 0\n"
+    " -f = initial FP exception flags (vioux). Default: none\n"
+    " -l = thoroughness level (1 (default), 2)\n"
+    " -r = rounding mode (even (default), zero, down, up, tieaway, odd)\n"
+    "      Set to 'all' to test all rounding modes, if applicable\n"
+    " -s = stop when a test fails";
+
+static void usage_complete(int argc, char *argv[])
+{
+    fprintf(stderr, "Usage: %s [options] [operation1 ...]\n", argv[0]);
+    fprintf(stderr, "%s\n", commands_string);
+    exit(EXIT_FAILURE);
+}
+
+/* keep wrappers separate but do not bother defining headers for all of them */
+#include "wrap.c.inc"
+
+static void not_implemented(void)
+{
+    fprintf(stderr, "Not implemented.\n");
+}
+
+static bool is_allowed(unsigned op, int rmode)
+{
+    /* odd has not been implemented for any 80-bit ops */
+    if (rmode == softfloat_round_odd) {
+        switch (op) {
+        case EXTF80_TO_UI32:
+        case EXTF80_TO_UI64:
+        case EXTF80_TO_I32:
+        case EXTF80_TO_I64:
+        case EXTF80_TO_UI32_R_MINMAG:
+        case EXTF80_TO_UI64_R_MINMAG:
+        case EXTF80_TO_I32_R_MINMAG:
+        case EXTF80_TO_I64_R_MINMAG:
+        case EXTF80_TO_F16:
+        case EXTF80_TO_F32:
+        case EXTF80_TO_F64:
+        case EXTF80_TO_F128:
+        case EXTF80_ROUNDTOINT:
+        case EXTF80_ADD:
+        case EXTF80_SUB:
+        case EXTF80_MUL:
+        case EXTF80_DIV:
+        case EXTF80_REM:
+        case EXTF80_SQRT:
+        case EXTF80_EQ:
+        case EXTF80_LE:
+        case EXTF80_LT:
+        case EXTF80_EQ_SIGNALING:
+        case EXTF80_LE_QUIET:
+        case EXTF80_LT_QUIET:
+        case UI32_TO_EXTF80:
+        case UI64_TO_EXTF80:
+        case I32_TO_EXTF80:
+        case I64_TO_EXTF80:
+        case F16_TO_EXTF80:
+        case F32_TO_EXTF80:
+        case F64_TO_EXTF80:
+        case F128_TO_EXTF80:
+            return false;
+        }
+    }
+    return true;
+}
+
+static void do_testfloat(int op, int rmode, bool exact)
+{
+    abz_f16 true_abz_f16;
+    abz_f16 subj_abz_f16;
+    ab_f16_z_bool true_f16_z_bool;
+    ab_f16_z_bool subj_f16_z_bool;
+    abz_f32 true_abz_f32;
+    abz_f32 subj_abz_f32;
+    ab_f32_z_bool true_ab_f32_z_bool;
+    ab_f32_z_bool subj_ab_f32_z_bool;
+    abz_f64 true_abz_f64;
+    abz_f64 subj_abz_f64;
+    ab_f64_z_bool true_ab_f64_z_bool;
+    ab_f64_z_bool subj_ab_f64_z_bool;
+    abz_extF80M true_abz_extF80M;
+    abz_extF80M subj_abz_extF80M;
+    ab_extF80M_z_bool true_ab_extF80M_z_bool;
+    ab_extF80M_z_bool subj_ab_extF80M_z_bool;
+    abz_f128M true_abz_f128M;
+    abz_f128M subj_abz_f128M;
+    ab_f128M_z_bool true_ab_f128M_z_bool;
+    ab_f128M_z_bool subj_ab_f128M_z_bool;
+
+    fputs(">> Testing ", stderr);
+    verCases_writeFunctionName(stderr);
+    fputs("\n", stderr);
+
+    if (!is_allowed(op, rmode)) {
+        not_implemented();
+        return;
+    }
+
+    switch (op) {
+    case UI32_TO_F16:
+        test_a_ui32_z_f16(slow_ui32_to_f16, qemu_ui32_to_f16);
+        break;
+    case UI32_TO_F32:
+        test_a_ui32_z_f32(slow_ui32_to_f32, qemu_ui32_to_f32);
+        break;
+    case UI32_TO_F64:
+        test_a_ui32_z_f64(slow_ui32_to_f64, qemu_ui32_to_f64);
+        break;
+    case UI32_TO_EXTF80:
+        not_implemented();
+        break;
+    case UI32_TO_F128:
+        not_implemented();
+        break;
+    case UI64_TO_F16:
+        test_a_ui64_z_f16(slow_ui64_to_f16, qemu_ui64_to_f16);
+        break;
+    case UI64_TO_F32:
+        test_a_ui64_z_f32(slow_ui64_to_f32, qemu_ui64_to_f32);
+        break;
+    case UI64_TO_F64:
+        test_a_ui64_z_f64(slow_ui64_to_f64, qemu_ui64_to_f64);
+        break;
+    case UI64_TO_EXTF80:
+        not_implemented();
+        break;
+    case UI64_TO_F128:
+        test_a_ui64_z_f128(slow_ui64_to_f128M, qemu_ui64_to_f128M);
+        break;
+    case I32_TO_F16:
+        test_a_i32_z_f16(slow_i32_to_f16, qemu_i32_to_f16);
+        break;
+    case I32_TO_F32:
+        test_a_i32_z_f32(slow_i32_to_f32, qemu_i32_to_f32);
+        break;
+    case I32_TO_F64:
+        test_a_i32_z_f64(slow_i32_to_f64, qemu_i32_to_f64);
+        break;
+    case I32_TO_EXTF80:
+        test_a_i32_z_extF80(slow_i32_to_extF80M, qemu_i32_to_extF80M);
+        break;
+    case I32_TO_F128:
+        test_a_i32_z_f128(slow_i32_to_f128M, qemu_i32_to_f128M);
+        break;
+    case I64_TO_F16:
+        test_a_i64_z_f16(slow_i64_to_f16, qemu_i64_to_f16);
+        break;
+    case I64_TO_F32:
+        test_a_i64_z_f32(slow_i64_to_f32, qemu_i64_to_f32);
+        break;
+    case I64_TO_F64:
+        test_a_i64_z_f64(slow_i64_to_f64, qemu_i64_to_f64);
+        break;
+    case I64_TO_EXTF80:
+        test_a_i64_z_extF80(slow_i64_to_extF80M, qemu_i64_to_extF80M);
+        break;
+    case I64_TO_F128:
+        test_a_i64_z_f128(slow_i64_to_f128M, qemu_i64_to_f128M);
+        break;
+    case F16_TO_UI32:
+        test_a_f16_z_ui32_rx(slow_f16_to_ui32, qemu_f16_to_ui32, rmode, exact);
+        break;
+    case F16_TO_UI64:
+        test_a_f16_z_ui64_rx(slow_f16_to_ui64, qemu_f16_to_ui64, rmode, exact);
+        break;
+    case F16_TO_I32:
+        test_a_f16_z_i32_rx(slow_f16_to_i32, qemu_f16_to_i32, rmode, exact);
+        break;
+    case F16_TO_I64:
+        test_a_f16_z_i64_rx(slow_f16_to_i64, qemu_f16_to_i64, rmode, exact);
+        break;
+    case F16_TO_UI32_R_MINMAG:
+        test_a_f16_z_ui32_x(slow_f16_to_ui32_r_minMag,
+                            qemu_f16_to_ui32_r_minMag, exact);
+        break;
+    case F16_TO_UI64_R_MINMAG:
+        test_a_f16_z_ui64_x(slow_f16_to_ui64_r_minMag,
+                            qemu_f16_to_ui64_r_minMag, exact);
+        break;
+    case F16_TO_I32_R_MINMAG:
+        test_a_f16_z_i32_x(slow_f16_to_i32_r_minMag, qemu_f16_to_i32_r_minMag,
+                           exact);
+        break;
+    case F16_TO_I64_R_MINMAG:
+        test_a_f16_z_i64_x(slow_f16_to_i64_r_minMag, qemu_f16_to_i64_r_minMag,
+                           exact);
+        break;
+    case F16_TO_F32:
+        test_a_f16_z_f32(slow_f16_to_f32, qemu_f16_to_f32);
+        break;
+    case F16_TO_F64:
+        test_a_f16_z_f64(slow_f16_to_f64, qemu_f16_to_f64);
+        break;
+    case F16_TO_EXTF80:
+        not_implemented();
+        break;
+    case F16_TO_F128:
+        not_implemented();
+        break;
+    case F16_ROUNDTOINT:
+        test_az_f16_rx(slow_f16_roundToInt, qemu_f16_roundToInt, rmode, exact);
+        break;
+    case F16_ADD:
+        true_abz_f16 = slow_f16_add;
+        subj_abz_f16 = qemu_f16_add;
+        goto test_abz_f16;
+    case F16_SUB:
+        true_abz_f16 = slow_f16_sub;
+        subj_abz_f16 = qemu_f16_sub;
+        goto test_abz_f16;
+    case F16_MUL:
+        true_abz_f16 = slow_f16_mul;
+        subj_abz_f16 = qemu_f16_mul;
+        goto test_abz_f16;
+    case F16_DIV:
+        true_abz_f16 = slow_f16_div;
+        subj_abz_f16 = qemu_f16_div;
+        goto test_abz_f16;
+    case F16_REM:
+        not_implemented();
+        break;
+    test_abz_f16:
+        test_abz_f16(true_abz_f16, subj_abz_f16);
+        break;
+    case F16_MULADD:
+        test_abcz_f16(slow_f16_mulAdd, qemu_f16_mulAdd);
+        break;
+    case F16_SQRT:
+        test_az_f16(slow_f16_sqrt, qemu_f16_sqrt);
+        break;
+    case F16_EQ:
+        true_f16_z_bool = slow_f16_eq;
+        subj_f16_z_bool = qemu_f16_eq;
+        goto test_ab_f16_z_bool;
+    case F16_LE:
+        true_f16_z_bool = slow_f16_le;
+        subj_f16_z_bool = qemu_f16_le;
+        goto test_ab_f16_z_bool;
+    case F16_LT:
+        true_f16_z_bool = slow_f16_lt;
+        subj_f16_z_bool = qemu_f16_lt;
+        goto test_ab_f16_z_bool;
+    case F16_EQ_SIGNALING:
+        true_f16_z_bool = slow_f16_eq_signaling;
+        subj_f16_z_bool = qemu_f16_eq_signaling;
+        goto test_ab_f16_z_bool;
+    case F16_LE_QUIET:
+        true_f16_z_bool = slow_f16_le_quiet;
+        subj_f16_z_bool = qemu_f16_le_quiet;
+        goto test_ab_f16_z_bool;
+    case F16_LT_QUIET:
+        true_f16_z_bool = slow_f16_lt_quiet;
+        subj_f16_z_bool = qemu_f16_lt_quiet;
+    test_ab_f16_z_bool:
+        test_ab_f16_z_bool(true_f16_z_bool, subj_f16_z_bool);
+        break;
+    case F32_TO_UI32:
+        test_a_f32_z_ui32_rx(slow_f32_to_ui32, qemu_f32_to_ui32, rmode, exact);
+        break;
+    case F32_TO_UI64:
+        test_a_f32_z_ui64_rx(slow_f32_to_ui64, qemu_f32_to_ui64, rmode, exact);
+        break;
+    case F32_TO_I32:
+        test_a_f32_z_i32_rx(slow_f32_to_i32, qemu_f32_to_i32, rmode, exact);
+        break;
+    case F32_TO_I64:
+        test_a_f32_z_i64_rx(slow_f32_to_i64, qemu_f32_to_i64, rmode, exact);
+        break;
+    case F32_TO_UI32_R_MINMAG:
+        test_a_f32_z_ui32_x(slow_f32_to_ui32_r_minMag,
+                            qemu_f32_to_ui32_r_minMag, exact);
+        break;
+    case F32_TO_UI64_R_MINMAG:
+        test_a_f32_z_ui64_x(slow_f32_to_ui64_r_minMag,
+                            qemu_f32_to_ui64_r_minMag, exact);
+        break;
+    case F32_TO_I32_R_MINMAG:
+        test_a_f32_z_i32_x(slow_f32_to_i32_r_minMag, qemu_f32_to_i32_r_minMag,
+                           exact);
+        break;
+    case F32_TO_I64_R_MINMAG:
+        test_a_f32_z_i64_x(slow_f32_to_i64_r_minMag, qemu_f32_to_i64_r_minMag,
+                           exact);
+        break;
+    case F32_TO_F16:
+        test_a_f32_z_f16(slow_f32_to_f16, qemu_f32_to_f16);
+        break;
+    case F32_TO_F64:
+        test_a_f32_z_f64(slow_f32_to_f64, qemu_f32_to_f64);
+        break;
+    case F32_TO_EXTF80:
+        test_a_f32_z_extF80(slow_f32_to_extF80M, qemu_f32_to_extF80M);
+        break;
+    case F32_TO_F128:
+        test_a_f32_z_f128(slow_f32_to_f128M, qemu_f32_to_f128M);
+        break;
+    case F32_ROUNDTOINT:
+        test_az_f32_rx(slow_f32_roundToInt, qemu_f32_roundToInt, rmode, exact);
+        break;
+    case F32_ADD:
+        true_abz_f32 = slow_f32_add;
+        subj_abz_f32 = qemu_f32_add;
+        goto test_abz_f32;
+    case F32_SUB:
+        true_abz_f32 = slow_f32_sub;
+        subj_abz_f32 = qemu_f32_sub;
+        goto test_abz_f32;
+    case F32_MUL:
+        true_abz_f32 = slow_f32_mul;
+        subj_abz_f32 = qemu_f32_mul;
+        goto test_abz_f32;
+    case F32_DIV:
+        true_abz_f32 = slow_f32_div;
+        subj_abz_f32 = qemu_f32_div;
+        goto test_abz_f32;
+    case F32_REM:
+        true_abz_f32 = slow_f32_rem;
+        subj_abz_f32 = qemu_f32_rem;
+    test_abz_f32:
+        test_abz_f32(true_abz_f32, subj_abz_f32);
+        break;
+    case F32_MULADD:
+        test_abcz_f32(slow_f32_mulAdd, qemu_f32_mulAdd);
+        break;
+    case F32_SQRT:
+        test_az_f32(slow_f32_sqrt, qemu_f32_sqrt);
+        break;
+    case F32_EQ:
+        true_ab_f32_z_bool = slow_f32_eq;
+        subj_ab_f32_z_bool = qemu_f32_eq;
+        goto test_ab_f32_z_bool;
+    case F32_LE:
+        true_ab_f32_z_bool = slow_f32_le;
+        subj_ab_f32_z_bool = qemu_f32_le;
+        goto test_ab_f32_z_bool;
+    case F32_LT:
+        true_ab_f32_z_bool = slow_f32_lt;
+        subj_ab_f32_z_bool = qemu_f32_lt;
+        goto test_ab_f32_z_bool;
+    case F32_EQ_SIGNALING:
+        true_ab_f32_z_bool = slow_f32_eq_signaling;
+        subj_ab_f32_z_bool = qemu_f32_eq_signaling;
+        goto test_ab_f32_z_bool;
+    case F32_LE_QUIET:
+        true_ab_f32_z_bool = slow_f32_le_quiet;
+        subj_ab_f32_z_bool = qemu_f32_le_quiet;
+        goto test_ab_f32_z_bool;
+    case F32_LT_QUIET:
+        true_ab_f32_z_bool = slow_f32_lt_quiet;
+        subj_ab_f32_z_bool = qemu_f32_lt_quiet;
+    test_ab_f32_z_bool:
+        test_ab_f32_z_bool(true_ab_f32_z_bool, subj_ab_f32_z_bool);
+        break;
+    case F64_TO_UI32:
+        test_a_f64_z_ui32_rx(slow_f64_to_ui32, qemu_f64_to_ui32, rmode, exact);
+        break;
+    case F64_TO_UI64:
+        test_a_f64_z_ui64_rx(slow_f64_to_ui64, qemu_f64_to_ui64, rmode, exact);
+        break;
+    case F64_TO_I32:
+        test_a_f64_z_i32_rx(slow_f64_to_i32, qemu_f64_to_i32, rmode, exact);
+        break;
+    case F64_TO_I64:
+        test_a_f64_z_i64_rx(slow_f64_to_i64, qemu_f64_to_i64, rmode, exact);
+        break;
+    case F64_TO_UI32_R_MINMAG:
+        test_a_f64_z_ui32_x(slow_f64_to_ui32_r_minMag,
+                            qemu_f64_to_ui32_r_minMag, exact);
+        break;
+    case F64_TO_UI64_R_MINMAG:
+        test_a_f64_z_ui64_x(slow_f64_to_ui64_r_minMag,
+                            qemu_f64_to_ui64_r_minMag, exact);
+        break;
+    case F64_TO_I32_R_MINMAG:
+        test_a_f64_z_i32_x(slow_f64_to_i32_r_minMag, qemu_f64_to_i32_r_minMag,
+                           exact);
+        break;
+    case F64_TO_I64_R_MINMAG:
+        test_a_f64_z_i64_x(slow_f64_to_i64_r_minMag, qemu_f64_to_i64_r_minMag,
+                           exact);
+        break;
+    case F64_TO_F16:
+        test_a_f64_z_f16(slow_f64_to_f16, qemu_f64_to_f16);
+        break;
+    case F64_TO_F32:
+        test_a_f64_z_f32(slow_f64_to_f32, qemu_f64_to_f32);
+        break;
+    case F64_TO_EXTF80:
+        test_a_f64_z_extF80(slow_f64_to_extF80M, qemu_f64_to_extF80M);
+        break;
+    case F64_TO_F128:
+        test_a_f64_z_f128(slow_f64_to_f128M, qemu_f64_to_f128M);
+        break;
+    case F64_ROUNDTOINT:
+        test_az_f64_rx(slow_f64_roundToInt, qemu_f64_roundToInt, rmode, exact);
+        break;
+    case F64_ADD:
+        true_abz_f64 = slow_f64_add;
+        subj_abz_f64 = qemu_f64_add;
+        goto test_abz_f64;
+    case F64_SUB:
+        true_abz_f64 = slow_f64_sub;
+        subj_abz_f64 = qemu_f64_sub;
+        goto test_abz_f64;
+    case F64_MUL:
+        true_abz_f64 = slow_f64_mul;
+        subj_abz_f64 = qemu_f64_mul;
+        goto test_abz_f64;
+    case F64_DIV:
+        true_abz_f64 = slow_f64_div;
+        subj_abz_f64 = qemu_f64_div;
+        goto test_abz_f64;
+    case F64_REM:
+        true_abz_f64 = slow_f64_rem;
+        subj_abz_f64 = qemu_f64_rem;
+    test_abz_f64:
+        test_abz_f64(true_abz_f64, subj_abz_f64);
+        break;
+    case F64_MULADD:
+        test_abcz_f64(slow_f64_mulAdd, qemu_f64_mulAdd);
+        break;
+    case F64_SQRT:
+        test_az_f64(slow_f64_sqrt, qemu_f64_sqrt);
+        break;
+    case F64_EQ:
+        true_ab_f64_z_bool = slow_f64_eq;
+        subj_ab_f64_z_bool = qemu_f64_eq;
+        goto test_ab_f64_z_bool;
+    case F64_LE:
+        true_ab_f64_z_bool = slow_f64_le;
+        subj_ab_f64_z_bool = qemu_f64_le;
+        goto test_ab_f64_z_bool;
+    case F64_LT:
+        true_ab_f64_z_bool = slow_f64_lt;
+        subj_ab_f64_z_bool = qemu_f64_lt;
+        goto test_ab_f64_z_bool;
+    case F64_EQ_SIGNALING:
+        true_ab_f64_z_bool = slow_f64_eq_signaling;
+        subj_ab_f64_z_bool = qemu_f64_eq_signaling;
+        goto test_ab_f64_z_bool;
+    case F64_LE_QUIET:
+        true_ab_f64_z_bool = slow_f64_le_quiet;
+        subj_ab_f64_z_bool = qemu_f64_le_quiet;
+        goto test_ab_f64_z_bool;
+    case F64_LT_QUIET:
+        true_ab_f64_z_bool = slow_f64_lt_quiet;
+        subj_ab_f64_z_bool = qemu_f64_lt_quiet;
+    test_ab_f64_z_bool:
+        test_ab_f64_z_bool(true_ab_f64_z_bool, subj_ab_f64_z_bool);
+        break;
+    case EXTF80_TO_UI32:
+        not_implemented();
+        break;
+    case EXTF80_TO_UI64:
+        not_implemented();
+        break;
+    case EXTF80_TO_I32:
+        test_a_extF80_z_i32_rx(slow_extF80M_to_i32, qemu_extF80M_to_i32, rmode,
+                               exact);
+        break;
+    case EXTF80_TO_I64:
+        test_a_extF80_z_i64_rx(slow_extF80M_to_i64, qemu_extF80M_to_i64, rmode,
+                               exact);
+        break;
+    case EXTF80_TO_UI32_R_MINMAG:
+        not_implemented();
+        break;
+    case EXTF80_TO_UI64_R_MINMAG:
+        not_implemented();
+        break;
+    case EXTF80_TO_I32_R_MINMAG:
+        test_a_extF80_z_i32_x(slow_extF80M_to_i32_r_minMag,
+                              qemu_extF80M_to_i32_r_minMag, exact);
+        break;
+    case EXTF80_TO_I64_R_MINMAG:
+        test_a_extF80_z_i64_x(slow_extF80M_to_i64_r_minMag,
+                              qemu_extF80M_to_i64_r_minMag, exact);
+        break;
+    case EXTF80_TO_F16:
+        not_implemented();
+        break;
+    case EXTF80_TO_F32:
+        test_a_extF80_z_f32(slow_extF80M_to_f32, qemu_extF80M_to_f32);
+        break;
+    case EXTF80_TO_F64:
+        test_a_extF80_z_f64(slow_extF80M_to_f64, qemu_extF80M_to_f64);
+        break;
+    case EXTF80_TO_F128:
+        test_a_extF80_z_f128(slow_extF80M_to_f128M, qemu_extF80M_to_f128M);
+        break;
+    case EXTF80_ROUNDTOINT:
+        test_az_extF80_rx(slow_extF80M_roundToInt, qemu_extF80M_roundToInt,
+                          rmode, exact);
+        break;
+    case EXTF80_ADD:
+        true_abz_extF80M = slow_extF80M_add;
+        subj_abz_extF80M = qemu_extF80M_add;
+        goto test_abz_extF80;
+    case EXTF80_SUB:
+        true_abz_extF80M = slow_extF80M_sub;
+        subj_abz_extF80M = qemu_extF80M_sub;
+        goto test_abz_extF80;
+    case EXTF80_MUL:
+        true_abz_extF80M = slow_extF80M_mul;
+        subj_abz_extF80M = qemu_extF80M_mul;
+        goto test_abz_extF80;
+    case EXTF80_DIV:
+        true_abz_extF80M = slow_extF80M_div;
+        subj_abz_extF80M = qemu_extF80M_div;
+        goto test_abz_extF80;
+    case EXTF80_REM:
+        true_abz_extF80M = slow_extF80M_rem;
+        subj_abz_extF80M = qemu_extF80M_rem;
+    test_abz_extF80:
+        test_abz_extF80(true_abz_extF80M, subj_abz_extF80M);
+        break;
+    case EXTF80_SQRT:
+        test_az_extF80(slow_extF80M_sqrt, qemu_extF80M_sqrt);
+        break;
+    case EXTF80_EQ:
+        true_ab_extF80M_z_bool = slow_extF80M_eq;
+        subj_ab_extF80M_z_bool = qemu_extF80M_eq;
+        goto test_ab_extF80_z_bool;
+    case EXTF80_LE:
+        true_ab_extF80M_z_bool = slow_extF80M_le;
+        subj_ab_extF80M_z_bool = qemu_extF80M_le;
+        goto test_ab_extF80_z_bool;
+    case EXTF80_LT:
+        true_ab_extF80M_z_bool = slow_extF80M_lt;
+        subj_ab_extF80M_z_bool = qemu_extF80M_lt;
+        goto test_ab_extF80_z_bool;
+    case EXTF80_EQ_SIGNALING:
+        true_ab_extF80M_z_bool = slow_extF80M_eq_signaling;
+        subj_ab_extF80M_z_bool = qemu_extF80M_eq_signaling;
+        goto test_ab_extF80_z_bool;
+    case EXTF80_LE_QUIET:
+        true_ab_extF80M_z_bool = slow_extF80M_le_quiet;
+        subj_ab_extF80M_z_bool = qemu_extF80M_le_quiet;
+        goto test_ab_extF80_z_bool;
+    case EXTF80_LT_QUIET:
+        true_ab_extF80M_z_bool = slow_extF80M_lt_quiet;
+        subj_ab_extF80M_z_bool = qemu_extF80M_lt_quiet;
+    test_ab_extF80_z_bool:
+        test_ab_extF80_z_bool(true_ab_extF80M_z_bool, subj_ab_extF80M_z_bool);
+        break;
+    case F128_TO_UI32:
+        test_a_f128_z_ui32_rx(slow_f128M_to_ui32, qemu_f128M_to_ui32, rmode,
+                              exact);
+        break;
+    case F128_TO_UI64:
+        test_a_f128_z_ui64_rx(slow_f128M_to_ui64, qemu_f128M_to_ui64, rmode,
+                              exact);
+        break;
+    case F128_TO_I32:
+        test_a_f128_z_i32_rx(slow_f128M_to_i32, qemu_f128M_to_i32, rmode,
+                             exact);
+        break;
+    case F128_TO_I64:
+        test_a_f128_z_i64_rx(slow_f128M_to_i64, qemu_f128M_to_i64, rmode,
+                             exact);
+        break;
+    case F128_TO_UI32_R_MINMAG:
+        test_a_f128_z_ui32_x(slow_f128M_to_ui32_r_minMag,
+                             qemu_f128M_to_ui32_r_minMag, exact);
+        break;
+    case F128_TO_UI64_R_MINMAG:
+        test_a_f128_z_ui64_x(slow_f128M_to_ui64_r_minMag,
+                             qemu_f128M_to_ui64_r_minMag, exact);
+        break;
+    case F128_TO_I32_R_MINMAG:
+        test_a_f128_z_i32_x(slow_f128M_to_i32_r_minMag,
+                            qemu_f128M_to_i32_r_minMag, exact);
+        break;
+    case F128_TO_I64_R_MINMAG:
+        test_a_f128_z_i64_x(slow_f128M_to_i64_r_minMag,
+                            qemu_f128M_to_i64_r_minMag, exact);
+        break;
+    case F128_TO_F16:
+        not_implemented();
+        break;
+    case F128_TO_F32:
+        test_a_f128_z_f32(slow_f128M_to_f32, qemu_f128M_to_f32);
+        break;
+    case F128_TO_F64:
+        test_a_f128_z_f64(slow_f128M_to_f64, qemu_f128M_to_f64);
+        break;
+    case F128_TO_EXTF80:
+        test_a_f128_z_extF80(slow_f128M_to_extF80M, qemu_f128M_to_extF80M);
+        break;
+    case F128_ROUNDTOINT:
+        test_az_f128_rx(slow_f128M_roundToInt, qemu_f128M_roundToInt, rmode,
+                        exact);
+        break;
+    case F128_ADD:
+        true_abz_f128M = slow_f128M_add;
+        subj_abz_f128M = qemu_f128M_add;
+        goto test_abz_f128;
+    case F128_SUB:
+        true_abz_f128M = slow_f128M_sub;
+        subj_abz_f128M = qemu_f128M_sub;
+        goto test_abz_f128;
+    case F128_MUL:
+        true_abz_f128M = slow_f128M_mul;
+        subj_abz_f128M = qemu_f128M_mul;
+        goto test_abz_f128;
+    case F128_DIV:
+        true_abz_f128M = slow_f128M_div;
+        subj_abz_f128M = qemu_f128M_div;
+        goto test_abz_f128;
+    case F128_REM:
+        true_abz_f128M = slow_f128M_rem;
+        subj_abz_f128M = qemu_f128M_rem;
+    test_abz_f128:
+        test_abz_f128(true_abz_f128M, subj_abz_f128M);
+        break;
+    case F128_MULADD:
+        test_abcz_f128(slow_f128M_mulAdd, qemu_f128M_mulAdd);
+        break;
+    case F128_SQRT:
+        test_az_f128(slow_f128M_sqrt, qemu_f128M_sqrt);
+        break;
+    case F128_EQ:
+        true_ab_f128M_z_bool = slow_f128M_eq;
+        subj_ab_f128M_z_bool = qemu_f128M_eq;
+        goto test_ab_f128_z_bool;
+    case F128_LE:
+        true_ab_f128M_z_bool = slow_f128M_le;
+        subj_ab_f128M_z_bool = qemu_f128M_le;
+        goto test_ab_f128_z_bool;
+    case F128_LT:
+        true_ab_f128M_z_bool = slow_f128M_lt;
+        subj_ab_f128M_z_bool = qemu_f128M_lt;
+        goto test_ab_f128_z_bool;
+    case F128_EQ_SIGNALING:
+        true_ab_f128M_z_bool = slow_f128M_eq_signaling;
+        subj_ab_f128M_z_bool = qemu_f128M_eq_signaling;
+        goto test_ab_f128_z_bool;
+    case F128_LE_QUIET:
+        true_ab_f128M_z_bool = slow_f128M_le_quiet;
+        subj_ab_f128M_z_bool = qemu_f128M_le_quiet;
+        goto test_ab_f128_z_bool;
+    case F128_LT_QUIET:
+        true_ab_f128M_z_bool = slow_f128M_lt_quiet;
+        subj_ab_f128M_z_bool = qemu_f128M_lt_quiet;
+    test_ab_f128_z_bool:
+        test_ab_f128_z_bool(true_ab_f128M_z_bool, subj_ab_f128M_z_bool);
+        break;
+    }
+    if ((verCases_errorStop && verCases_anyErrors)) {
+        verCases_exitWithStatus();
+    }
+}
+
+static unsigned int test_name_to_op(const char *arg)
+{
+    unsigned int i;
+
+    /* counting begins at 1 */
+    for (i = 1; i < NUM_FUNCTIONS; i++) {
+        const char *name = functionInfos[i].namePtr;
+
+        if (name && !strcmp(name, arg)) {
+            return i;
+        }
+    }
+    return 0;
+}
+
+static unsigned int round_name_to_mode(const char *name)
+{
+    int i;
+
+    /* counting begins at 1 */
+    for (i = 1; i < NUM_ROUNDINGMODES; i++) {
+        if (!strcmp(round_mode_names[i], name)) {
+            return i;
+        }
+    }
+    return 0;
+}
+
+static int set_init_flags(const char *flags)
+{
+    const char *p;
+
+    for (p = flags; *p != '\0'; p++) {
+        switch (*p) {
+        case 'v':
+            slow_init_flags |= softfloat_flag_invalid;
+            qemu_init_flags |= float_flag_invalid;
+            break;
+        case 'i':
+            slow_init_flags |= softfloat_flag_infinite;
+            qemu_init_flags |= float_flag_divbyzero;
+            break;
+        case 'o':
+            slow_init_flags |= softfloat_flag_overflow;
+            qemu_init_flags |= float_flag_overflow;
+            break;
+        case 'u':
+            slow_init_flags |= softfloat_flag_underflow;
+            qemu_init_flags |= float_flag_underflow;
+            break;
+        case 'x':
+            slow_init_flags |= softfloat_flag_inexact;
+            qemu_init_flags |= float_flag_inexact;
+            break;
+        default:
+            return 1;
+        }
+    }
+    return 0;
+}
+
+static uint_fast8_t slow_clear_flags(void)
+{
+    uint8_t prev = slowfloat_exceptionFlags;
+
+    slowfloat_exceptionFlags = slow_init_flags;
+    return prev;
+}
+
+static uint_fast8_t qemu_clear_flags(void)
+{
+    uint8_t prev = qemu_flags_to_sf(qsf.float_exception_flags);
+
+    qsf.float_exception_flags = qemu_init_flags;
+    return prev;
+}
+
+static void parse_args(int argc, char *argv[])
+{
+    unsigned int i;
+    int c;
+
+    for (;;) {
+        c = getopt(argc, argv, "he:f:l:r:s");
+        if (c < 0) {
+            break;
+        }
+        switch (c) {
+        case 'h':
+            usage_complete(argc, argv);
+            exit(EXIT_SUCCESS);
+        case 'e':
+            if (qemu_strtoui(optarg, NULL, 0, &n_max_errors)) {
+                fprintf(stderr, "fatal: invalid max error count\n");
+                exit(EXIT_FAILURE);
+            }
+            break;
+        case 'f':
+            if (set_init_flags(optarg)) {
+                fprintf(stderr, "fatal: flags must be a subset of 'vioux'\n");
+                exit(EXIT_FAILURE);
+            }
+            break;
+        case 'l':
+            if (qemu_strtoi(optarg, NULL, 0, &test_level)) {
+                fprintf(stderr, "fatal: invalid test level\n");
+                exit(EXIT_FAILURE);
+            }
+            break;
+        case 'r':
+            if (!strcmp(optarg, "all")) {
+                test_round_mode = 0;
+            } else {
+                test_round_mode = round_name_to_mode(optarg);
+                if (test_round_mode == 0) {
+                    fprintf(stderr, "fatal: invalid rounding mode\n");
+                    exit(EXIT_FAILURE);
+                }
+            }
+            break;
+        case 's':
+            verCases_errorStop = true;
+            break;
+        case '?':
+            /* invalid option or missing argument; getopt prints error info */
+            exit(EXIT_FAILURE);
+        }
+    }
+
+    /* set rounding modes */
+    if (test_round_mode == 0) {
+        /* test all rounding modes; note that counting begins at 1 */
+        n_round_modes = NUM_ROUNDINGMODES - 1;
+        round_modes = g_malloc_n(n_round_modes, sizeof(*round_modes));
+        for (i = 0; i < n_round_modes; i++) {
+            round_modes[i] = i + 1;
+        }
+    } else {
+        n_round_modes = 1;
+        round_modes = g_malloc(sizeof(*round_modes));
+        round_modes[0] = test_round_mode;
+    }
+
+    /* set test ops */
+    if (optind == argc) {
+        /* test all ops; note that counting begins at 1 */
+        n_test_ops = NUM_FUNCTIONS - 1;
+        test_ops = g_malloc_n(n_test_ops, sizeof(*test_ops));
+        for (i = 0; i < n_test_ops; i++) {
+            test_ops[i] = i + 1;
+        }
+    } else {
+        n_test_ops = argc - optind;
+        test_ops = g_malloc_n(n_test_ops, sizeof(*test_ops));
+        for (i = 0; i < n_test_ops; i++) {
+            const char *name = argv[i + optind];
+            unsigned int op = test_name_to_op(name);
+
+            if (op == 0) {
+                fprintf(stderr, "fatal: invalid op '%s'\n", name);
+                exit(EXIT_FAILURE);
+            }
+            test_ops[i] = op;
+        }
+    }
+}
+
+static void QEMU_NORETURN run_test(void)
+{
+    unsigned int i;
+
+    genCases_setLevel(test_level);
+    verCases_maxErrorCount = n_max_errors;
+
+    testLoops_trueFlagsFunction = slow_clear_flags;
+    testLoops_subjFlagsFunction = qemu_clear_flags;
+
+    for (i = 0; i < n_test_ops; i++) {
+        unsigned int op = test_ops[i];
+        int j;
+
+        if (functionInfos[op].namePtr == NULL) {
+            continue;
+        }
+        verCases_functionNamePtr = functionInfos[op].namePtr;
+
+        for (j = 0; j < n_round_modes; j++) {
+            int attrs = functionInfos[op].attribs;
+            int round = round_modes[j];
+            int rmode = roundingModes[round];
+            int k;
+
+            verCases_roundingCode = 0;
+            slowfloat_roundingMode = rmode;
+            qsf.float_rounding_mode = sf_rounding_to_qemu(rmode);
+
+            if (attrs & (FUNC_ARG_ROUNDINGMODE | FUNC_EFF_ROUNDINGMODE)) {
+                /* print rounding mode if the op is affected by it */
+                verCases_roundingCode = round;
+            } else if (j > 0) {
+                /* if the op is not sensitive to rounding, move on */
+                break;
+            }
+
+            /* QEMU doesn't have !exact */
+            verCases_exact = true;
+            verCases_usesExact = !!(attrs & FUNC_ARG_EXACT);
+
+            for (k = 0; k < 3; k++) {
+                FloatX80RoundPrec qsf_prec80 = floatx80_precision_x;
+                int prec80 = 80;
+                int l;
+
+                if (k == 1) {
+                    prec80 = 64;
+                    qsf_prec80 = floatx80_precision_d;
+                } else if (k == 2) {
+                    prec80 = 32;
+                    qsf_prec80 = floatx80_precision_s;
+                }
+
+                verCases_roundingPrecision = 0;
+                slow_extF80_roundingPrecision = prec80;
+                qsf.floatx80_rounding_precision = qsf_prec80;
+
+                if (attrs & FUNC_EFF_ROUNDINGPRECISION) {
+                    verCases_roundingPrecision = prec80;
+                } else if (k > 0) {
+                    /* if the op is not sensitive to prec80, move on */
+                    break;
+                }
+
+                /* note: the count begins at 1 */
+                for (l = 1; l < NUM_TININESSMODES; l++) {
+                    int tmode = tininessModes[l];
+
+                    verCases_tininessCode = 0;
+                    slowfloat_detectTininess = tmode;
+                    qsf.tininess_before_rounding = sf_tininess_to_qemu(tmode);
+
+                    if (attrs & FUNC_EFF_TININESSMODE ||
+                        ((attrs & FUNC_EFF_TININESSMODE_REDUCEDPREC) &&
+                         prec80 && prec80 < 80)) {
+                        verCases_tininessCode = l;
+                    } else if (l > 1) {
+                        /* if the op is not sensitive to tininess, move on */
+                        break;
+                    }
+
+                    do_testfloat(op, rmode, true);
+                }
+            }
+        }
+    }
+    verCases_exitWithStatus();
+    /* old compilers might miss that we exited */
+    g_assert_not_reached();
+}
+
+int main(int argc, char *argv[])
+{
+    parse_args(argc, argv);
+    fail_programName = argv[0];
+    run_test(); /* does not return */
+}
diff --git a/tests/fp/meson.build b/tests/fp/meson.build
new file mode 100644
index 000000000..07e2cdc8d
--- /dev/null
+++ b/tests/fp/meson.build
@@ -0,0 +1,645 @@
+# There are namespace pollution issues on Windows, due to osdep.h
+# bringing in Windows headers that define a FLOAT128 type.
+if targetos == 'windows'
+  subdir_done()
+endif
+
+fpcflags = [
+  # softfloat defines
+  '-DSOFTFLOAT_ROUND_ODD',
+  '-DINLINE_LEVEL=5',
+  '-DSOFTFLOAT_FAST_DIV32TO16',
+  '-DSOFTFLOAT_FAST_DIV64TO32',
+  '-DSOFTFLOAT_FAST_INT64',
+  # testfloat defines
+  '-DFLOAT16',
+  '-DFLOAT64',
+  '-DEXTFLOAT80',
+  '-DFLOAT128',
+  '-DFLOAT_ROUND_ODD',
+  '-DLONG_DOUBLE_IS_EXTFLOAT80',
+]
+
+sfdir = 'berkeley-softfloat-3/source'
+sfspedir = sfdir / '8086-SSE'
+tfdir = 'berkeley-testfloat-3/source'
+
+sfinc = include_directories(sfdir / 'include', sfspedir)
+
+tfcflags = [
+  '-Wno-implicit-fallthrough',
+  '-Wno-strict-prototypes',
+  '-Wno-unknown-pragmas',
+  '-Wno-uninitialized',
+  '-Wno-missing-prototypes',
+  '-Wno-return-type',
+  '-Wno-unused-function',
+  '-Wno-error',
+]
+
+tfgencases = [
+  tfdir / 'genCases_ui32.c',
+  tfdir / 'genCases_ui64.c',
+  tfdir / 'genCases_i32.c',
+  tfdir / 'genCases_i64.c',
+  tfdir / 'genCases_f16.c',
+  tfdir / 'genCases_f32.c',
+  tfdir / 'genCases_f64.c',
+  tfdir / 'genCases_extF80.c',
+  tfdir / 'genCases_f128.c',
+]
+
+tfwritecase = [
+  tfdir / 'writeCase_a_ui32.c',
+  tfdir / 'writeCase_a_ui64.c',
+  tfdir / 'writeCase_a_f16.c',
+  tfdir / 'writeCase_ab_f16.c',
+  tfdir / 'writeCase_abc_f16.c',
+  tfdir / 'writeCase_a_f32.c',
+  tfdir / 'writeCase_ab_f32.c',
+  tfdir / 'writeCase_abc_f32.c',
+  tfdir / 'writeCase_a_f64.c',
+  tfdir / 'writeCase_ab_f64.c',
+  tfdir / 'writeCase_abc_f64.c',
+  tfdir / 'writeCase_a_extF80M.c',
+  tfdir / 'writeCase_ab_extF80M.c',
+  tfdir / 'writeCase_a_f128M.c',
+  tfdir / 'writeCase_ab_f128M.c',
+  tfdir / 'writeCase_abc_f128M.c',
+  tfdir / 'writeCase_z_bool.c',
+  tfdir / 'writeCase_z_ui32.c',
+  tfdir / 'writeCase_z_ui64.c',
+  tfdir / 'writeCase_z_f16.c',
+  tfdir / 'writeCase_z_f32.c',
+  tfdir / 'writeCase_z_f64.c',
+  tfdir / 'writeCase_z_extF80M.c',
+  tfdir / 'writeCase_z_f128M.c',
+]
+
+tftest = [
+  tfdir / 'test_a_ui32_z_f16.c',
+  tfdir / 'test_a_ui32_z_f32.c',
+  tfdir / 'test_a_ui32_z_f64.c',
+  tfdir / 'test_a_ui32_z_extF80.c',
+  tfdir / 'test_a_ui32_z_f128.c',
+  tfdir / 'test_a_ui64_z_f16.c',
+  tfdir / 'test_a_ui64_z_f32.c',
+  tfdir / 'test_a_ui64_z_f64.c',
+  tfdir / 'test_a_ui64_z_extF80.c',
+  tfdir / 'test_a_ui64_z_f128.c',
+  tfdir / 'test_a_i32_z_f16.c',
+  tfdir / 'test_a_i32_z_f32.c',
+  tfdir / 'test_a_i32_z_f64.c',
+  tfdir / 'test_a_i32_z_extF80.c',
+  tfdir / 'test_a_i32_z_f128.c',
+  tfdir / 'test_a_i64_z_f16.c',
+  tfdir / 'test_a_i64_z_f32.c',
+  tfdir / 'test_a_i64_z_f64.c',
+  tfdir / 'test_a_i64_z_extF80.c',
+  tfdir / 'test_a_i64_z_f128.c',
+  tfdir / 'test_a_f16_z_ui32_rx.c',
+  tfdir / 'test_a_f16_z_ui64_rx.c',
+  tfdir / 'test_a_f16_z_i32_rx.c',
+  tfdir / 'test_a_f16_z_i64_rx.c',
+  tfdir / 'test_a_f16_z_ui32_x.c',
+  tfdir / 'test_a_f16_z_ui64_x.c',
+  tfdir / 'test_a_f16_z_i32_x.c',
+  tfdir / 'test_a_f16_z_i64_x.c',
+  tfdir / 'test_a_f16_z_f32.c',
+  tfdir / 'test_a_f16_z_f64.c',
+  tfdir / 'test_a_f16_z_extF80.c',
+  tfdir / 'test_a_f16_z_f128.c',
+  tfdir / 'test_az_f16.c',
+  tfdir / 'test_az_f16_rx.c',
+  tfdir / 'test_abz_f16.c',
+  tfdir / 'test_abcz_f16.c',
+  tfdir / 'test_ab_f16_z_bool.c',
+  tfdir / 'test_a_f32_z_ui32_rx.c',
+  tfdir / 'test_a_f32_z_ui64_rx.c',
+  tfdir / 'test_a_f32_z_i32_rx.c',
+  tfdir / 'test_a_f32_z_i64_rx.c',
+  tfdir / 'test_a_f32_z_ui32_x.c',
+  tfdir / 'test_a_f32_z_ui64_x.c',
+  tfdir / 'test_a_f32_z_i32_x.c',
+  tfdir / 'test_a_f32_z_i64_x.c',
+  tfdir / 'test_a_f32_z_f16.c',
+  tfdir / 'test_a_f32_z_f64.c',
+  tfdir / 'test_a_f32_z_extF80.c',
+  tfdir / 'test_a_f32_z_f128.c',
+  tfdir / 'test_az_f32.c',
+  tfdir / 'test_az_f32_rx.c',
+  tfdir / 'test_abz_f32.c',
+  tfdir / 'test_abcz_f32.c',
+  tfdir / 'test_ab_f32_z_bool.c',
+  tfdir / 'test_a_f64_z_ui32_rx.c',
+  tfdir / 'test_a_f64_z_ui64_rx.c',
+  tfdir / 'test_a_f64_z_i32_rx.c',
+  tfdir / 'test_a_f64_z_i64_rx.c',
+  tfdir / 'test_a_f64_z_ui32_x.c',
+  tfdir / 'test_a_f64_z_ui64_x.c',
+  tfdir / 'test_a_f64_z_i32_x.c',
+  tfdir / 'test_a_f64_z_i64_x.c',
+  tfdir / 'test_a_f64_z_f16.c',
+  tfdir / 'test_a_f64_z_f32.c',
+  tfdir / 'test_a_f64_z_extF80.c',
+  tfdir / 'test_a_f64_z_f128.c',
+  tfdir / 'test_az_f64.c',
+  tfdir / 'test_az_f64_rx.c',
+  tfdir / 'test_abz_f64.c',
+  tfdir / 'test_abcz_f64.c',
+  tfdir / 'test_ab_f64_z_bool.c',
+  tfdir / 'test_a_extF80_z_ui32_rx.c',
+  tfdir / 'test_a_extF80_z_ui64_rx.c',
+  tfdir / 'test_a_extF80_z_i32_rx.c',
+  tfdir / 'test_a_extF80_z_i64_rx.c',
+  tfdir / 'test_a_extF80_z_ui32_x.c',
+  tfdir / 'test_a_extF80_z_ui64_x.c',
+  tfdir / 'test_a_extF80_z_i32_x.c',
+  tfdir / 'test_a_extF80_z_i64_x.c',
+  tfdir / 'test_a_extF80_z_f16.c',
+  tfdir / 'test_a_extF80_z_f32.c',
+  tfdir / 'test_a_extF80_z_f64.c',
+  tfdir / 'test_a_extF80_z_f128.c',
+  tfdir / 'test_az_extF80.c',
+  tfdir / 'test_az_extF80_rx.c',
+  tfdir / 'test_abz_extF80.c',
+  tfdir / 'test_ab_extF80_z_bool.c',
+  tfdir / 'test_a_f128_z_ui32_rx.c',
+  tfdir / 'test_a_f128_z_ui64_rx.c',
+  tfdir / 'test_a_f128_z_i32_rx.c',
+  tfdir / 'test_a_f128_z_i64_rx.c',
+  tfdir / 'test_a_f128_z_ui32_x.c',
+  tfdir / 'test_a_f128_z_ui64_x.c',
+  tfdir / 'test_a_f128_z_i32_x.c',
+  tfdir / 'test_a_f128_z_i64_x.c',
+  tfdir / 'test_a_f128_z_f16.c',
+  tfdir / 'test_a_f128_z_f32.c',
+  tfdir / 'test_a_f128_z_f64.c',
+  tfdir / 'test_a_f128_z_extF80.c',
+  tfdir / 'test_az_f128.c',
+  tfdir / 'test_az_f128_rx.c',
+  tfdir / 'test_abz_f128.c',
+  tfdir / 'test_abcz_f128.c',
+  tfdir / 'test_ab_f128_z_bool.c',
+]
+
+libtestfloat = static_library(
+  'testfloat',
+  files(
+    tfdir / 'uint128_inline.c',
+    tfdir / 'uint128.c',
+    tfdir / 'fail.c',
+    tfdir / 'functions_common.c',
+    tfdir / 'functionInfos.c',
+    tfdir / 'standardFunctionInfos.c',
+    tfdir / 'random.c',
+    tfdir / 'genCases_common.c',
+    tfgencases,
+    tfdir / 'genCases_writeTestsTotal.c',
+    tfdir / 'verCases_inline.c',
+    tfdir / 'verCases_common.c',
+    tfdir / 'verCases_writeFunctionName.c',
+    tfdir / 'readHex.c',
+    tfdir / 'writeHex.c',
+    tfwritecase,
+    tfdir / 'testLoops_common.c',
+    tftest,
+  ),
+  include_directories: sfinc,
+  c_args: tfcflags + fpcflags,
+)
+
+sfcflags = [
+  '-Wno-implicit-fallthrough',
+  '-Wno-missing-prototypes',
+  '-Wno-redundant-decls',
+  '-Wno-return-type',
+  '-Wno-error',
+]
+
+libsoftfloat = static_library(
+  'softfloat',
+  files(
+    # primitives
+    sfdir / 's_eq128.c',
+    sfdir / 's_le128.c',
+    sfdir / 's_lt128.c',
+    sfdir / 's_shortShiftLeft128.c',
+    sfdir / 's_shortShiftRight128.c',
+    sfdir / 's_shortShiftRightJam64.c',
+    sfdir / 's_shortShiftRightJam64Extra.c',
+    sfdir / 's_shortShiftRightJam128.c',
+    sfdir / 's_shortShiftRightJam128Extra.c',
+    sfdir / 's_shiftRightJam32.c',
+    sfdir / 's_shiftRightJam64.c',
+    sfdir / 's_shiftRightJam64Extra.c',
+    sfdir / 's_shiftRightJam128.c',
+    sfdir / 's_shiftRightJam128Extra.c',
+    sfdir / 's_shiftRightJam256M.c',
+    sfdir / 's_countLeadingZeros8.c',
+    sfdir / 's_countLeadingZeros16.c',
+    sfdir / 's_countLeadingZeros32.c',
+    sfdir / 's_countLeadingZeros64.c',
+    sfdir / 's_add128.c',
+    sfdir / 's_add256M.c',
+    sfdir / 's_sub128.c',
+    sfdir / 's_sub256M.c',
+    sfdir / 's_mul64ByShifted32To128.c',
+    sfdir / 's_mul64To128.c',
+    sfdir / 's_mul128By32.c',
+    sfdir / 's_mul128To256M.c',
+    sfdir / 's_approxRecip_1Ks.c',
+    sfdir / 's_approxRecip32_1.c',
+    sfdir / 's_approxRecipSqrt_1Ks.c',
+    sfdir / 's_approxRecipSqrt32_1.c',
+    # others
+    sfdir / 's_roundToUI32.c',
+    sfdir / 's_roundToUI64.c',
+    sfdir / 's_roundToI32.c',
+    sfdir / 's_roundToI64.c',
+    sfdir / 's_normSubnormalF16Sig.c',
+    sfdir / 's_roundPackToF16.c',
+    sfdir / 's_normRoundPackToF16.c',
+    sfdir / 's_addMagsF16.c',
+    sfdir / 's_subMagsF16.c',
+    sfdir / 's_mulAddF16.c',
+    sfdir / 's_normSubnormalF32Sig.c',
+    sfdir / 's_roundPackToF32.c',
+    sfdir / 's_normRoundPackToF32.c',
+    sfdir / 's_addMagsF32.c',
+    sfdir / 's_subMagsF32.c',
+    sfdir / 's_mulAddF32.c',
+    sfdir / 's_normSubnormalF64Sig.c',
+    sfdir / 's_roundPackToF64.c',
+    sfdir / 's_normRoundPackToF64.c',
+    sfdir / 's_addMagsF64.c',
+    sfdir / 's_subMagsF64.c',
+    sfdir / 's_mulAddF64.c',
+    sfdir / 's_normSubnormalExtF80Sig.c',
+    sfdir / 's_roundPackToExtF80.c',
+    sfdir / 's_normRoundPackToExtF80.c',
+    sfdir / 's_addMagsExtF80.c',
+    sfdir / 's_subMagsExtF80.c',
+    sfdir / 's_normSubnormalF128Sig.c',
+    sfdir / 's_roundPackToF128.c',
+    sfdir / 's_normRoundPackToF128.c',
+    sfdir / 's_addMagsF128.c',
+    sfdir / 's_subMagsF128.c',
+    sfdir / 's_mulAddF128.c',
+    sfdir / 'softfloat_state.c',
+    sfdir / 'ui32_to_f16.c',
+    sfdir / 'ui32_to_f32.c',
+    sfdir / 'ui32_to_f64.c',
+    sfdir / 'ui32_to_extF80.c',
+    sfdir / 'ui32_to_extF80M.c',
+    sfdir / 'ui32_to_f128.c',
+    sfdir / 'ui32_to_f128M.c',
+    sfdir / 'ui64_to_f16.c',
+    sfdir / 'ui64_to_f32.c',
+    sfdir / 'ui64_to_f64.c',
+    sfdir / 'ui64_to_extF80.c',
+    sfdir / 'ui64_to_extF80M.c',
+    sfdir / 'ui64_to_f128.c',
+    sfdir / 'ui64_to_f128M.c',
+    sfdir / 'i32_to_f16.c',
+    sfdir / 'i32_to_f32.c',
+    sfdir / 'i32_to_f64.c',
+    sfdir / 'i32_to_extF80.c',
+    sfdir / 'i32_to_extF80M.c',
+    sfdir / 'i32_to_f128.c',
+    sfdir / 'i32_to_f128M.c',
+    sfdir / 'i64_to_f16.c',
+    sfdir / 'i64_to_f32.c',
+    sfdir / 'i64_to_f64.c',
+    sfdir / 'i64_to_extF80.c',
+    sfdir / 'i64_to_extF80M.c',
+    sfdir / 'i64_to_f128.c',
+    sfdir / 'i64_to_f128M.c',
+    sfdir / 'f16_to_ui32.c',
+    sfdir / 'f16_to_ui64.c',
+    sfdir / 'f16_to_i32.c',
+    sfdir / 'f16_to_i64.c',
+    sfdir / 'f16_to_ui32_r_minMag.c',
+    sfdir / 'f16_to_ui64_r_minMag.c',
+    sfdir / 'f16_to_i32_r_minMag.c',
+    sfdir / 'f16_to_i64_r_minMag.c',
+    sfdir / 'f16_to_f32.c',
+    sfdir / 'f16_to_f64.c',
+    sfdir / 'f16_to_extF80.c',
+    sfdir / 'f16_to_extF80M.c',
+    sfdir / 'f16_to_f128.c',
+    sfdir / 'f16_to_f128M.c',
+    sfdir / 'f16_roundToInt.c',
+    sfdir / 'f16_add.c',
+    sfdir / 'f16_sub.c',
+    sfdir / 'f16_mul.c',
+    sfdir / 'f16_mulAdd.c',
+    sfdir / 'f16_div.c',
+    sfdir / 'f16_rem.c',
+    sfdir / 'f16_sqrt.c',
+    sfdir / 'f16_eq.c',
+    sfdir / 'f16_le.c',
+    sfdir / 'f16_lt.c',
+    sfdir / 'f16_eq_signaling.c',
+    sfdir / 'f16_le_quiet.c',
+    sfdir / 'f16_lt_quiet.c',
+    sfdir / 'f16_isSignalingNaN.c',
+    sfdir / 'f32_to_ui32.c',
+    sfdir / 'f32_to_ui64.c',
+    sfdir / 'f32_to_i32.c',
+    sfdir / 'f32_to_i64.c',
+    sfdir / 'f32_to_ui32_r_minMag.c',
+    sfdir / 'f32_to_ui64_r_minMag.c',
+    sfdir / 'f32_to_i32_r_minMag.c',
+    sfdir / 'f32_to_i64_r_minMag.c',
+    sfdir / 'f32_to_f16.c',
+    sfdir / 'f32_to_f64.c',
+    sfdir / 'f32_to_extF80.c',
+    sfdir / 'f32_to_extF80M.c',
+    sfdir / 'f32_to_f128.c',
+    sfdir / 'f32_to_f128M.c',
+    sfdir / 'f32_roundToInt.c',
+    sfdir / 'f32_add.c',
+    sfdir / 'f32_sub.c',
+    sfdir / 'f32_mul.c',
+    sfdir / 'f32_mulAdd.c',
+    sfdir / 'f32_div.c',
+    sfdir / 'f32_rem.c',
+    sfdir / 'f32_sqrt.c',
+    sfdir / 'f32_eq.c',
+    sfdir / 'f32_le.c',
+    sfdir / 'f32_lt.c',
+    sfdir / 'f32_eq_signaling.c',
+    sfdir / 'f32_le_quiet.c',
+    sfdir / 'f32_lt_quiet.c',
+    sfdir / 'f32_isSignalingNaN.c',
+    sfdir / 'f64_to_ui32.c',
+    sfdir / 'f64_to_ui64.c',
+    sfdir / 'f64_to_i32.c',
+    sfdir / 'f64_to_i64.c',
+    sfdir / 'f64_to_ui32_r_minMag.c',
+    sfdir / 'f64_to_ui64_r_minMag.c',
+    sfdir / 'f64_to_i32_r_minMag.c',
+    sfdir / 'f64_to_i64_r_minMag.c',
+    sfdir / 'f64_to_f16.c',
+    sfdir / 'f64_to_f32.c',
+    sfdir / 'f64_to_extF80.c',
+    sfdir / 'f64_to_extF80M.c',
+    sfdir / 'f64_to_f128.c',
+    sfdir / 'f64_to_f128M.c',
+    sfdir / 'f64_roundToInt.c',
+    sfdir / 'f64_add.c',
+    sfdir / 'f64_sub.c',
+    sfdir / 'f64_mul.c',
+    sfdir / 'f64_mulAdd.c',
+    sfdir / 'f64_div.c',
+    sfdir / 'f64_rem.c',
+    sfdir / 'f64_sqrt.c',
+    sfdir / 'f64_eq.c',
+    sfdir / 'f64_le.c',
+    sfdir / 'f64_lt.c',
+    sfdir / 'f64_eq_signaling.c',
+    sfdir / 'f64_le_quiet.c',
+    sfdir / 'f64_lt_quiet.c',
+    sfdir / 'f64_isSignalingNaN.c',
+    sfdir / 'extF80_to_ui32.c',
+    sfdir / 'extF80_to_ui64.c',
+    sfdir / 'extF80_to_i32.c',
+    sfdir / 'extF80_to_i64.c',
+    sfdir / 'extF80_to_ui32_r_minMag.c',
+    sfdir / 'extF80_to_ui64_r_minMag.c',
+    sfdir / 'extF80_to_i32_r_minMag.c',
+    sfdir / 'extF80_to_i64_r_minMag.c',
+    sfdir / 'extF80_to_f16.c',
+    sfdir / 'extF80_to_f32.c',
+    sfdir / 'extF80_to_f64.c',
+    sfdir / 'extF80_to_f128.c',
+    sfdir / 'extF80_roundToInt.c',
+    sfdir / 'extF80_add.c',
+    sfdir / 'extF80_sub.c',
+    sfdir / 'extF80_mul.c',
+    sfdir / 'extF80_div.c',
+    sfdir / 'extF80_rem.c',
+    sfdir / 'extF80_sqrt.c',
+    sfdir / 'extF80_eq.c',
+    sfdir / 'extF80_le.c',
+    sfdir / 'extF80_lt.c',
+    sfdir / 'extF80_eq_signaling.c',
+    sfdir / 'extF80_le_quiet.c',
+    sfdir / 'extF80_lt_quiet.c',
+    sfdir / 'extF80_isSignalingNaN.c',
+    sfdir / 'extF80M_to_ui32.c',
+    sfdir / 'extF80M_to_ui64.c',
+    sfdir / 'extF80M_to_i32.c',
+    sfdir / 'extF80M_to_i64.c',
+    sfdir / 'extF80M_to_ui32_r_minMag.c',
+    sfdir / 'extF80M_to_ui64_r_minMag.c',
+    sfdir / 'extF80M_to_i32_r_minMag.c',
+    sfdir / 'extF80M_to_i64_r_minMag.c',
+    sfdir / 'extF80M_to_f16.c',
+    sfdir / 'extF80M_to_f32.c',
+    sfdir / 'extF80M_to_f64.c',
+    sfdir / 'extF80M_to_f128M.c',
+    sfdir / 'extF80M_roundToInt.c',
+    sfdir / 'extF80M_add.c',
+    sfdir / 'extF80M_sub.c',
+    sfdir / 'extF80M_mul.c',
+    sfdir / 'extF80M_div.c',
+    sfdir / 'extF80M_rem.c',
+    sfdir / 'extF80M_sqrt.c',
+    sfdir / 'extF80M_eq.c',
+    sfdir / 'extF80M_le.c',
+    sfdir / 'extF80M_lt.c',
+    sfdir / 'extF80M_eq_signaling.c',
+    sfdir / 'extF80M_le_quiet.c',
+    sfdir / 'extF80M_lt_quiet.c',
+    sfdir / 'f128_to_ui32.c',
+    sfdir / 'f128_to_ui64.c',
+    sfdir / 'f128_to_i32.c',
+    sfdir / 'f128_to_i64.c',
+    sfdir / 'f128_to_ui32_r_minMag.c',
+    sfdir / 'f128_to_ui64_r_minMag.c',
+    sfdir / 'f128_to_i32_r_minMag.c',
+    sfdir / 'f128_to_i64_r_minMag.c',
+    sfdir / 'f128_to_f16.c',
+    sfdir / 'f128_to_f32.c',
+    sfdir / 'f128_to_extF80.c',
+    sfdir / 'f128_to_f64.c',
+    sfdir / 'f128_roundToInt.c',
+    sfdir / 'f128_add.c',
+    sfdir / 'f128_sub.c',
+    sfdir / 'f128_mul.c',
+    sfdir / 'f128_mulAdd.c',
+    sfdir / 'f128_div.c',
+    sfdir / 'f128_rem.c',
+    sfdir / 'f128_sqrt.c',
+    sfdir / 'f128_eq.c',
+    sfdir / 'f128_le.c',
+    sfdir / 'f128_lt.c',
+    sfdir / 'f128_eq_signaling.c',
+    sfdir / 'f128_le_quiet.c',
+    sfdir / 'f128_lt_quiet.c',
+    sfdir / 'f128_isSignalingNaN.c',
+    sfdir / 'f128M_to_ui32.c',
+    sfdir / 'f128M_to_ui64.c',
+    sfdir / 'f128M_to_i32.c',
+    sfdir / 'f128M_to_i64.c',
+    sfdir / 'f128M_to_ui32_r_minMag.c',
+    sfdir / 'f128M_to_ui64_r_minMag.c',
+    sfdir / 'f128M_to_i32_r_minMag.c',
+    sfdir / 'f128M_to_i64_r_minMag.c',
+    sfdir / 'f128M_to_f16.c',
+    sfdir / 'f128M_to_f32.c',
+    sfdir / 'f128M_to_extF80M.c',
+    sfdir / 'f128M_to_f64.c',
+    sfdir / 'f128M_roundToInt.c',
+    sfdir / 'f128M_add.c',
+    sfdir / 'f128M_sub.c',
+    sfdir / 'f128M_mul.c',
+    sfdir / 'f128M_mulAdd.c',
+    sfdir / 'f128M_div.c',
+    sfdir / 'f128M_rem.c',
+    sfdir / 'f128M_sqrt.c',
+    sfdir / 'f128M_eq.c',
+    sfdir / 'f128M_le.c',
+    sfdir / 'f128M_lt.c',
+    sfdir / 'f128M_eq_signaling.c',
+    sfdir / 'f128M_le_quiet.c',
+    sfdir / 'f128M_lt_quiet.c',
+    # spe
+    sfspedir / 'softfloat_raiseFlags.c',
+    sfspedir / 's_f16UIToCommonNaN.c',
+    sfspedir / 's_commonNaNToF16UI.c',
+    sfspedir / 's_propagateNaNF16UI.c',
+    sfspedir / 's_f32UIToCommonNaN.c',
+    sfspedir / 's_commonNaNToF32UI.c',
+    sfspedir / 's_propagateNaNF32UI.c',
+    sfspedir / 's_f64UIToCommonNaN.c',
+    sfspedir / 's_commonNaNToF64UI.c',
+    sfspedir / 's_propagateNaNF64UI.c',
+    sfspedir / 'extF80M_isSignalingNaN.c',
+    sfspedir / 's_extF80UIToCommonNaN.c',
+    sfspedir / 's_commonNaNToExtF80UI.c',
+    sfspedir / 's_propagateNaNExtF80UI.c',
+    sfspedir / 'f128M_isSignalingNaN.c',
+    sfspedir / 's_f128UIToCommonNaN.c',
+    sfspedir / 's_commonNaNToF128UI.c',
+    sfspedir / 's_propagateNaNF128UI.c',
+  ),
+  include_directories: sfinc,
+  c_args: sfcflags + fpcflags,
+)
+
+fpcflags += [
+  # work around TARGET_* poisoning
+  '-DHW_POISON_H',
+  # define a target to match testfloat's implementation-defined choices, such as
+  # whether to raise the invalid flag when dealing with NaNs in muladd.
+  '-DTARGET_ARM',
+  # FIXME: uiZ may be used uninitialized in this function
+  '-Wno-uninitialized',
+]
+
+fptest = executable(
+  'fp-test',
+  ['fp-test.c', tfdir / 'slowfloat.c', '../../fpu/softfloat.c'],
+  link_with: [libtestfloat, libsoftfloat],
+  dependencies: [qemuutil],
+  include_directories: [sfinc, include_directories(tfdir)],
+  c_args: fpcflags,
+)
+softfloat_conv_tests = {
+    'float-to-float': 'f16_to_f32 f16_to_f64 f16_to_extF80 f16_to_f128 ' +
+                      'f32_to_f16 f32_to_f64 f32_to_extF80 ' +
+                      'f64_to_f16 f64_to_f32 ' +
+                      'extF80_to_f16 extF80_to_f32 ' +
+                      'extF80_to_f64 extF80_to_f128 ' +
+                      'f128_to_f16',
+    'int-to-float': 'i32_to_f16 i64_to_f16 i32_to_f32 i64_to_f32 ' +
+                    'i32_to_f64 i64_to_f64 ' +
+                    'i32_to_extF80 i64_to_extF80 ' +
+                    'i32_to_f128 i64_to_f128',
+    'uint-to-float': 'ui32_to_f16 ui64_to_f16 ui32_to_f32 ui64_to_f32 ' +
+                     'ui32_to_f64 ui64_to_f64 ui64_to_f128 ' +
+                     'ui32_to_extF80 ui64_to_extF80',
+    'float-to-int': 'f16_to_i32 f16_to_i32_r_minMag ' +
+                    'f32_to_i32 f32_to_i32_r_minMag ' +
+                    'f64_to_i32 f64_to_i32_r_minMag ' +
+                    'extF80_to_i32 extF80_to_i32_r_minMag ' +
+                    'f128_to_i32 f128_to_i32_r_minMag ' +
+                    'f16_to_i64 f16_to_i64_r_minMag ' +
+                    'f32_to_i64 f32_to_i64_r_minMag ' +
+                    'f64_to_i64 f64_to_i64_r_minMag ' +
+                    'extF80_to_i64 extF80_to_i64_r_minMag ' +
+                    'f128_to_i64 f128_to_i64_r_minMag',
+    'float-to-uint': 'f16_to_ui32 f16_to_ui32_r_minMag ' +
+                     'f32_to_ui32 f32_to_ui32_r_minMag ' +
+                     'f64_to_ui32 f64_to_ui32_r_minMag ' +
+                     'extF80_to_ui32 extF80_to_ui32_r_minMag ' +
+                     'f128_to_ui32 f128_to_ui32_r_minMag ' +
+                     'f16_to_ui64 f16_to_ui64_r_minMag ' +
+                     'f32_to_ui64 f32_to_ui64_r_minMag ' +
+                     'f64_to_ui64 f64_to_ui64_r_minMag ' +
+                     'extF80_to_ui64 extF80_to_ui64_r_minMag ' +
+                     'f128_to_ui64 f128_to_ui64_r_minMag',
+    'round-to-integer': 'f16_roundToInt f32_roundToInt ' +
+                        'f64_roundToInt extF80_roundToInt f128_roundToInt'
+}
+softfloat_tests = {
+    'eq_signaling' : 'compare',
+    'le' : 'compare',
+    'le_quiet' : 'compare',
+    'lt_quiet' : 'compare',
+    'add': 'ops',
+    'sub': 'ops',
+    'mul': 'ops',
+    'div': 'ops',
+    'rem': 'ops',
+    'sqrt': 'ops'
+}
+# The full test suite can take a bit of time, default to a quick run
+# "-l 2 -r all" can take more than a day for some operations and is best
+# run manually
+fptest_args = ['-s', '-l', '1']
+fptest_rounding_args = ['-r', 'all']
+
+# Conversion Routines:
+foreach k, v : softfloat_conv_tests
+  test('fp-test-' + k, fptest,
+       args: fptest_args + fptest_rounding_args + v.split(),
+       suite: ['softfloat', 'softfloat-conv'])
+endforeach
+
+foreach k, v : softfloat_tests
+  test('fp-test-' + k, fptest,
+       args: fptest_args + fptest_rounding_args +
+             ['f16_' + k, 'f32_' + k, 'f64_' + k, 'f128_' + k, 'extF80_' + k],
+       suite: ['softfloat', 'softfloat-' + v])
+endforeach
+
+# FIXME: extF80_{mulAdd} (missing)
+test('fp-test-mulAdd', fptest,
+     # no fptest_rounding_args
+     args: fptest_args +
+           ['f16_mulAdd', 'f32_mulAdd', 'f64_mulAdd', 'f128_mulAdd'],
+     suite: ['softfloat-slow', 'softfloat-ops-slow'], timeout: 90)
+
+fpbench = executable(
+  'fp-bench',
+  ['fp-bench.c', '../../fpu/softfloat.c'],
+  link_with: [libtestfloat, libsoftfloat],
+  dependencies: [qemuutil],
+  include_directories: [sfinc, include_directories(tfdir)],
+  c_args: fpcflags,
+)
+
+fptestlog2 = executable(
+  'fp-test-log2',
+  ['fp-test-log2.c', '../../fpu/softfloat.c'],
+  link_with: [libsoftfloat],
+  dependencies: [qemuutil],
+  include_directories: [sfinc],
+  c_args: fpcflags,
+)
+test('fp-test-log2', fptestlog2,
+     suite: ['softfloat', 'softfloat-ops'])
diff --git a/tests/fp/platform.h b/tests/fp/platform.h
new file mode 100644
index 000000000..c20ba70ba
--- /dev/null
+++ b/tests/fp/platform.h
@@ -0,0 +1,41 @@
+#ifndef QEMU_TESTFLOAT_PLATFORM_H
+#define QEMU_TESTFLOAT_PLATFORM_H
+/*
+ * Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+ * California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright notice,
+ *     this list of conditions, and the following disclaimer.
+ *
+ *  2. Redistributions in binary form must reproduce the above copyright notice,
+ *     this list of conditions, and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *  3. Neither the name of the University nor the names of its contributors may
+ *     be used to endorse or promote products derived from this software without
+ *     specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include "config-host.h"
+
+#ifndef HOST_WORDS_BIGENDIAN
+#define LITTLEENDIAN 1
+/* otherwise do not define it */
+#endif
+
+#define INLINE static inline
+
+#endif /* QEMU_TESTFLOAT_PLATFORM_H */
diff --git a/tests/fp/wrap.c.inc b/tests/fp/wrap.c.inc
new file mode 100644
index 000000000..9ff884c14
--- /dev/null
+++ b/tests/fp/wrap.c.inc
@@ -0,0 +1,666 @@
+/*
+ * In this file we wrap QEMU FP functions to look like softfloat/testfloat's,
+ * so that we can use the testfloat infrastructure as-is.
+ *
+ * This file must be included directly from fp-test.c. We could compile it
+ * separately, but it would be tedious to add declarations for all the wrappers.
+ */
+
+static signed char sf_tininess_to_qemu(uint_fast8_t mode)
+{
+    switch (mode) {
+    case softfloat_tininess_beforeRounding:
+        return float_tininess_before_rounding;
+    case softfloat_tininess_afterRounding:
+        return float_tininess_after_rounding;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static signed char sf_rounding_to_qemu(uint_fast8_t mode)
+{
+    switch (mode) {
+    case softfloat_round_near_even:
+        return float_round_nearest_even;
+    case softfloat_round_minMag:
+        return float_round_to_zero;
+    case softfloat_round_min:
+        return float_round_down;
+    case softfloat_round_max:
+        return float_round_up;
+    case softfloat_round_near_maxMag:
+        return float_round_ties_away;
+    case softfloat_round_odd:
+        return float_round_to_odd;
+    default:
+        g_assert_not_reached();
+    }
+}
+
+static uint_fast8_t qemu_flags_to_sf(uint8_t qflags)
+{
+    uint_fast8_t ret = 0;
+
+    if (qflags & float_flag_invalid) {
+        ret |= softfloat_flag_invalid;
+    }
+    if (qflags & float_flag_divbyzero) {
+        ret |= softfloat_flag_infinite;
+    }
+    if (qflags & float_flag_overflow) {
+        ret |= softfloat_flag_overflow;
+    }
+    if (qflags & float_flag_underflow) {
+        ret |= softfloat_flag_underflow;
+    }
+    if (qflags & float_flag_inexact) {
+        ret |= softfloat_flag_inexact;
+    }
+    return ret;
+}
+
+/*
+ * floatx80 and float128 cannot be cast between qemu and softfloat, because
+ * in softfloat the order of the fields depends on the host's endianness.
+ */
+static extFloat80_t qemu_to_soft80(floatx80 a)
+{
+    extFloat80_t ret;
+
+    ret.signif = a.low;
+    ret.signExp = a.high;
+    return ret;
+}
+
+static floatx80 soft_to_qemu80(extFloat80_t a)
+{
+    floatx80 ret;
+
+    ret.low = a.signif;
+    ret.high = a.signExp;
+    return ret;
+}
+
+static float128_t qemu_to_soft128(float128 a)
+{
+    float128_t ret;
+    struct uint128 *to = (struct uint128 *)&ret;
+
+    to->v0 = a.low;
+    to->v64 = a.high;
+    return ret;
+}
+
+static float128 soft_to_qemu128(float128_t a)
+{
+    struct uint128 *from = (struct uint128 *)&a;
+    float128 ret;
+
+    ret.low = from->v0;
+    ret.high = from->v64;
+    return ret;
+}
+
+/* conversions */
+#define WRAP_SF_TO_SF_IEEE(name, func, a_type, b_type)  \
+    static b_type##_t name(a_type##_t a)                \
+    {                                                   \
+        a_type *ap = (a_type *)&a;                      \
+        b_type ret;                                     \
+                                                        \
+        ret = func(*ap, true, &qsf);                    \
+        return *(b_type##_t *)&ret;                     \
+    }
+
+WRAP_SF_TO_SF_IEEE(qemu_f16_to_f32, float16_to_float32, float16, float32)
+WRAP_SF_TO_SF_IEEE(qemu_f16_to_f64, float16_to_float64, float16, float64)
+
+WRAP_SF_TO_SF_IEEE(qemu_f32_to_f16, float32_to_float16, float32, float16)
+WRAP_SF_TO_SF_IEEE(qemu_f64_to_f16, float64_to_float16, float64, float16)
+#undef WRAP_SF_TO_SF_IEEE
+
+#define WRAP_SF_TO_SF(name, func, a_type, b_type)       \
+    static b_type##_t name(a_type##_t a)                \
+    {                                                   \
+        a_type *ap = (a_type *)&a;                      \
+        b_type ret;                                     \
+                                                        \
+        ret = func(*ap, &qsf);                          \
+        return *(b_type##_t *)&ret;                     \
+    }
+
+WRAP_SF_TO_SF(qemu_f32_to_f64, float32_to_float64, float32, float64)
+WRAP_SF_TO_SF(qemu_f64_to_f32, float64_to_float32, float64, float32)
+#undef WRAP_SF_TO_SF
+
+#define WRAP_SF_TO_80(name, func, type)                 \
+    static void name(type##_t a, extFloat80_t *res)     \
+    {                                                   \
+        floatx80 ret;                                   \
+        type *ap = (type *)&a;                          \
+                                                        \
+        ret = func(*ap, &qsf);                          \
+        *res = qemu_to_soft80(ret);                     \
+    }
+
+WRAP_SF_TO_80(qemu_f32_to_extF80M, float32_to_floatx80, float32)
+WRAP_SF_TO_80(qemu_f64_to_extF80M, float64_to_floatx80, float64)
+#undef WRAP_SF_TO_80
+
+#define WRAP_SF_TO_128(name, func, type)                \
+    static void name(type##_t a, float128_t *res)       \
+    {                                                   \
+        float128 ret;                                   \
+        type *ap = (type *)&a;                          \
+                                                        \
+        ret = func(*ap, &qsf);                          \
+        *res = qemu_to_soft128(ret);                    \
+    }
+
+WRAP_SF_TO_128(qemu_f32_to_f128M, float32_to_float128, float32)
+WRAP_SF_TO_128(qemu_f64_to_f128M, float64_to_float128, float64)
+#undef WRAP_SF_TO_128
+
+/* Note: exact is ignored since qemu's softfloat assumes it is set */
+#define WRAP_SF_TO_INT(name, func, type, fast_type)                     \
+    static fast_type name(type##_t a, uint_fast8_t round, bool exact)   \
+    {                                                                   \
+        type *ap = (type *)&a;                                          \
+                                                                        \
+        qsf.float_rounding_mode = sf_rounding_to_qemu(round);           \
+        return func(*ap, &qsf);                                         \
+    }
+
+WRAP_SF_TO_INT(qemu_f16_to_ui32, float16_to_uint32, float16, uint_fast32_t)
+WRAP_SF_TO_INT(qemu_f16_to_ui64, float16_to_uint64, float16, uint_fast64_t)
+
+WRAP_SF_TO_INT(qemu_f32_to_ui32, float32_to_uint32, float32, uint_fast32_t)
+WRAP_SF_TO_INT(qemu_f32_to_ui64, float32_to_uint64, float32, uint_fast64_t)
+
+WRAP_SF_TO_INT(qemu_f64_to_ui32, float64_to_uint32, float64, uint_fast32_t)
+WRAP_SF_TO_INT(qemu_f64_to_ui64, float64_to_uint64, float64, uint_fast64_t)
+
+WRAP_SF_TO_INT(qemu_f16_to_i32, float16_to_int32, float16, int_fast32_t)
+WRAP_SF_TO_INT(qemu_f16_to_i64, float16_to_int64, float16, int_fast64_t)
+
+WRAP_SF_TO_INT(qemu_f32_to_i32, float32_to_int32, float32, int_fast32_t)
+WRAP_SF_TO_INT(qemu_f32_to_i64, float32_to_int64, float32, int_fast64_t)
+
+WRAP_SF_TO_INT(qemu_f64_to_i32, float64_to_int32, float64, int_fast32_t)
+WRAP_SF_TO_INT(qemu_f64_to_i64, float64_to_int64, float64, int_fast64_t)
+#undef WRAP_SF_TO_INT
+
+/* Note: exact is ignored since qemu's softfloat assumes it is set */
+#define WRAP_SF_TO_INT_MINMAG(name, func, type, fast_type)      \
+    static fast_type name(type##_t a, bool exact)               \
+    {                                                           \
+        type *ap = (type *)&a;                                  \
+                                                                \
+        return func(*ap, &qsf);                                 \
+    }
+
+WRAP_SF_TO_INT_MINMAG(qemu_f16_to_ui32_r_minMag,
+                      float16_to_uint32_round_to_zero, float16, uint_fast32_t)
+WRAP_SF_TO_INT_MINMAG(qemu_f16_to_ui64_r_minMag,
+                      float16_to_uint64_round_to_zero, float16, uint_fast64_t)
+
+WRAP_SF_TO_INT_MINMAG(qemu_f16_to_i32_r_minMag,
+                      float16_to_int32_round_to_zero, float16, int_fast32_t)
+WRAP_SF_TO_INT_MINMAG(qemu_f16_to_i64_r_minMag,
+                      float16_to_int64_round_to_zero, float16, int_fast64_t)
+
+WRAP_SF_TO_INT_MINMAG(qemu_f32_to_ui32_r_minMag,
+                      float32_to_uint32_round_to_zero, float32, uint_fast32_t)
+WRAP_SF_TO_INT_MINMAG(qemu_f32_to_ui64_r_minMag,
+                      float32_to_uint64_round_to_zero, float32, uint_fast64_t)
+
+WRAP_SF_TO_INT_MINMAG(qemu_f32_to_i32_r_minMag,
+                      float32_to_int32_round_to_zero, float32, int_fast32_t)
+WRAP_SF_TO_INT_MINMAG(qemu_f32_to_i64_r_minMag,
+                      float32_to_int64_round_to_zero, float32, int_fast64_t)
+
+WRAP_SF_TO_INT_MINMAG(qemu_f64_to_ui32_r_minMag,
+                      float64_to_uint32_round_to_zero, float64, uint_fast32_t)
+WRAP_SF_TO_INT_MINMAG(qemu_f64_to_ui64_r_minMag,
+                      float64_to_uint64_round_to_zero, float64, uint_fast64_t)
+
+WRAP_SF_TO_INT_MINMAG(qemu_f64_to_i32_r_minMag,
+                      float64_to_int32_round_to_zero, float64, int_fast32_t)
+WRAP_SF_TO_INT_MINMAG(qemu_f64_to_i64_r_minMag,
+                      float64_to_int64_round_to_zero, float64, int_fast64_t)
+#undef WRAP_SF_TO_INT_MINMAG
+
+#define WRAP_80_TO_SF(name, func, type)                 \
+    static type##_t name(const extFloat80_t *ap)        \
+    {                                                   \
+        floatx80 a;                                     \
+        type ret;                                       \
+                                                        \
+        a = soft_to_qemu80(*ap);                        \
+        ret = func(a, &qsf);                            \
+        return *(type##_t *)&ret;                       \
+    }
+
+WRAP_80_TO_SF(qemu_extF80M_to_f32, floatx80_to_float32, float32)
+WRAP_80_TO_SF(qemu_extF80M_to_f64, floatx80_to_float64, float64)
+#undef WRAP_80_TO_SF
+
+#define WRAP_128_TO_SF(name, func, type)        \
+    static type##_t name(const float128_t *ap)  \
+    {                                           \
+        float128 a;                             \
+        type ret;                               \
+                                                \
+        a = soft_to_qemu128(*ap);               \
+        ret = func(a, &qsf);                    \
+        return *(type##_t *)&ret;               \
+    }
+
+WRAP_128_TO_SF(qemu_f128M_to_f32, float128_to_float32, float32)
+WRAP_128_TO_SF(qemu_f128M_to_f64, float128_to_float64, float64)
+#undef WRAP_128_TO_SF
+
+static void qemu_extF80M_to_f128M(const extFloat80_t *from, float128_t *to)
+{
+    floatx80 qfrom;
+    float128 qto;
+
+    qfrom = soft_to_qemu80(*from);
+    qto = floatx80_to_float128(qfrom, &qsf);
+    *to = qemu_to_soft128(qto);
+}
+
+static void qemu_f128M_to_extF80M(const float128_t *from, extFloat80_t *to)
+{
+    float128 qfrom;
+    floatx80 qto;
+
+    qfrom = soft_to_qemu128(*from);
+    qto = float128_to_floatx80(qfrom, &qsf);
+    *to = qemu_to_soft80(qto);
+}
+
+#define WRAP_INT_TO_SF(name, func, int_type, type)      \
+    static type##_t name(int_type a)                    \
+    {                                                   \
+        type ret;                                       \
+                                                        \
+        ret = func(a, &qsf);                            \
+        return *(type##_t *)&ret;                       \
+    }
+
+WRAP_INT_TO_SF(qemu_ui32_to_f16, uint32_to_float16, uint32_t, float16)
+WRAP_INT_TO_SF(qemu_ui32_to_f32, uint32_to_float32, uint32_t, float32)
+WRAP_INT_TO_SF(qemu_ui32_to_f64, uint32_to_float64, uint32_t, float64)
+
+WRAP_INT_TO_SF(qemu_ui64_to_f16, uint64_to_float16, uint64_t, float16)
+WRAP_INT_TO_SF(qemu_ui64_to_f32, uint64_to_float32, uint64_t, float32)
+WRAP_INT_TO_SF(qemu_ui64_to_f64, uint64_to_float64, uint64_t, float64)
+
+WRAP_INT_TO_SF(qemu_i32_to_f16, int32_to_float16, int32_t, float16)
+WRAP_INT_TO_SF(qemu_i32_to_f32, int32_to_float32, int32_t, float32)
+WRAP_INT_TO_SF(qemu_i32_to_f64, int32_to_float64, int32_t, float64)
+
+WRAP_INT_TO_SF(qemu_i64_to_f16, int64_to_float16, int64_t, float16)
+WRAP_INT_TO_SF(qemu_i64_to_f32, int64_to_float32, int64_t, float32)
+WRAP_INT_TO_SF(qemu_i64_to_f64, int64_to_float64, int64_t, float64)
+#undef WRAP_INT_TO_SF
+
+#define WRAP_INT_TO_80(name, func, int_type)            \
+    static void name(int_type a, extFloat80_t *res)     \
+    {                                                   \
+        floatx80 ret;                                   \
+                                                        \
+        ret = func(a, &qsf);                            \
+        *res = qemu_to_soft80(ret);                     \
+    }
+
+WRAP_INT_TO_80(qemu_i32_to_extF80M, int32_to_floatx80, int32_t)
+WRAP_INT_TO_80(qemu_i64_to_extF80M, int64_to_floatx80, int64_t)
+#undef WRAP_INT_TO_80
+
+/* Note: exact is ignored since qemu's softfloat assumes it is set */
+#define WRAP_80_TO_INT(name, func, fast_type)                           \
+    static fast_type name(const extFloat80_t *ap, uint_fast8_t round,   \
+                          bool exact)                                   \
+    {                                                                   \
+        floatx80 a;                                                     \
+                                                                        \
+        a = soft_to_qemu80(*ap);                                        \
+        qsf.float_rounding_mode = sf_rounding_to_qemu(round);           \
+        return func(a, &qsf);                                           \
+    }
+
+WRAP_80_TO_INT(qemu_extF80M_to_i32, floatx80_to_int32, int_fast32_t)
+WRAP_80_TO_INT(qemu_extF80M_to_i64, floatx80_to_int64, int_fast64_t)
+#undef WRAP_80_TO_INT
+
+/* Note: exact is ignored since qemu's softfloat assumes it is set */
+#define WRAP_80_TO_INT_MINMAG(name, func, fast_type)            \
+    static fast_type name(const extFloat80_t *ap, bool exact)   \
+    {                                                           \
+        floatx80 a;                                             \
+                                                                \
+        a = soft_to_qemu80(*ap);                                \
+        return func(a, &qsf);                                   \
+    }
+
+WRAP_80_TO_INT_MINMAG(qemu_extF80M_to_i32_r_minMag,
+                      floatx80_to_int32_round_to_zero, int_fast32_t)
+WRAP_80_TO_INT_MINMAG(qemu_extF80M_to_i64_r_minMag,
+                      floatx80_to_int64_round_to_zero, int_fast64_t)
+#undef WRAP_80_TO_INT_MINMAG
+
+/* Note: exact is ignored since qemu's softfloat assumes it is set */
+#define WRAP_128_TO_INT(name, func, fast_type)                          \
+    static fast_type name(const float128_t *ap, uint_fast8_t round,     \
+                          bool exact)                                   \
+    {                                                                   \
+        float128 a;                                                     \
+                                                                        \
+        a = soft_to_qemu128(*ap);                                       \
+        qsf.float_rounding_mode = sf_rounding_to_qemu(round);           \
+        return func(a, &qsf);                                           \
+    }
+
+WRAP_128_TO_INT(qemu_f128M_to_i32, float128_to_int32, int_fast32_t)
+WRAP_128_TO_INT(qemu_f128M_to_i64, float128_to_int64, int_fast64_t)
+
+WRAP_128_TO_INT(qemu_f128M_to_ui32, float128_to_uint32, uint_fast32_t)
+WRAP_128_TO_INT(qemu_f128M_to_ui64, float128_to_uint64, uint_fast64_t)
+#undef WRAP_128_TO_INT
+
+/* Note: exact is ignored since qemu's softfloat assumes it is set */
+#define WRAP_128_TO_INT_MINMAG(name, func, fast_type)           \
+    static fast_type name(const float128_t *ap, bool exact)     \
+    {                                                           \
+        float128 a;                                             \
+                                                                \
+        a = soft_to_qemu128(*ap);                               \
+        return func(a, &qsf);                                   \
+    }
+
+WRAP_128_TO_INT_MINMAG(qemu_f128M_to_i32_r_minMag,
+                       float128_to_int32_round_to_zero, int_fast32_t)
+WRAP_128_TO_INT_MINMAG(qemu_f128M_to_i64_r_minMag,
+                       float128_to_int64_round_to_zero, int_fast64_t)
+
+WRAP_128_TO_INT_MINMAG(qemu_f128M_to_ui32_r_minMag,
+                       float128_to_uint32_round_to_zero, uint_fast32_t)
+WRAP_128_TO_INT_MINMAG(qemu_f128M_to_ui64_r_minMag,
+                       float128_to_uint64_round_to_zero, uint_fast64_t)
+#undef WRAP_128_TO_INT_MINMAG
+
+#define WRAP_INT_TO_128(name, func, int_type)           \
+    static void name(int_type a, float128_t *res)       \
+    {                                                   \
+        float128 ret;                                   \
+                                                        \
+        ret = func(a, &qsf);                            \
+        *res = qemu_to_soft128(ret);                    \
+    }
+
+WRAP_INT_TO_128(qemu_ui64_to_f128M, uint64_to_float128, uint64_t)
+
+WRAP_INT_TO_128(qemu_i32_to_f128M, int32_to_float128, int32_t)
+WRAP_INT_TO_128(qemu_i64_to_f128M, int64_to_float128, int64_t)
+#undef WRAP_INT_TO_128
+
+/* Note: exact is ignored since qemu's softfloat assumes it is set */
+#define WRAP_ROUND_TO_INT(name, func, type)                             \
+    static type##_t name(type##_t a, uint_fast8_t round, bool exact)    \
+    {                                                                   \
+        type *ap = (type *)&a;                                          \
+        type ret;                                                       \
+                                                                        \
+        qsf.float_rounding_mode = sf_rounding_to_qemu(round);           \
+        ret = func(*ap, &qsf);                                          \
+        return *(type##_t *)&ret;                                       \
+    }
+
+WRAP_ROUND_TO_INT(qemu_f16_roundToInt, float16_round_to_int, float16)
+WRAP_ROUND_TO_INT(qemu_f32_roundToInt, float32_round_to_int, float32)
+WRAP_ROUND_TO_INT(qemu_f64_roundToInt, float64_round_to_int, float64)
+#undef WRAP_ROUND_TO_INT
+
+static void qemu_extF80M_roundToInt(const extFloat80_t *ap, uint_fast8_t round,
+                                    bool exact, extFloat80_t *res)
+{
+    floatx80 a;
+    floatx80 ret;
+
+    a = soft_to_qemu80(*ap);
+    qsf.float_rounding_mode = sf_rounding_to_qemu(round);
+    ret = floatx80_round_to_int(a, &qsf);
+    *res = qemu_to_soft80(ret);
+}
+
+static void qemu_f128M_roundToInt(const float128_t *ap, uint_fast8_t round,
+                                  bool exact, float128_t *res)
+{
+    float128 a;
+    float128 ret;
+
+    a = soft_to_qemu128(*ap);
+    qsf.float_rounding_mode = sf_rounding_to_qemu(round);
+    ret = float128_round_to_int(a, &qsf);
+    *res = qemu_to_soft128(ret);
+}
+
+/* operations */
+#define WRAP1(name, func, type)                 \
+    static type##_t name(type##_t a)            \
+    {                                           \
+        type *ap = (type *)&a;                  \
+        type ret;                               \
+                                                \
+        ret = func(*ap, &qsf);                  \
+        return *(type##_t *)&ret;               \
+    }
+
+#define WRAP2(name, func, type)                         \
+    static type##_t name(type##_t a, type##_t b)        \
+    {                                                   \
+        type *ap = (type *)&a;                          \
+        type *bp = (type *)&b;                          \
+        type ret;                                       \
+                                                        \
+        ret = func(*ap, *bp, &qsf);                     \
+        return *(type##_t *)&ret;                       \
+    }
+
+#define WRAP_COMMON_OPS(b)                              \
+    WRAP1(qemu_f##b##_sqrt, float##b##_sqrt, float##b)  \
+    WRAP2(qemu_f##b##_add, float##b##_add, float##b)    \
+    WRAP2(qemu_f##b##_sub, float##b##_sub, float##b)    \
+    WRAP2(qemu_f##b##_mul, float##b##_mul, float##b)    \
+    WRAP2(qemu_f##b##_div, float##b##_div, float##b)
+
+WRAP_COMMON_OPS(16)
+WRAP_COMMON_OPS(32)
+WRAP_COMMON_OPS(64)
+#undef WRAP_COMMON
+
+WRAP2(qemu_f32_rem, float32_rem, float32)
+WRAP2(qemu_f64_rem, float64_rem, float64)
+#undef WRAP2
+#undef WRAP1
+
+#define WRAP1_80(name, func)                                    \
+    static void name(const extFloat80_t *ap, extFloat80_t *res) \
+    {                                                           \
+        floatx80 a;                                             \
+        floatx80 ret;                                           \
+                                                                \
+        a = soft_to_qemu80(*ap);                                \
+        ret = func(a, &qsf);                                    \
+        *res = qemu_to_soft80(ret);                             \
+    }
+
+WRAP1_80(qemu_extF80M_sqrt, floatx80_sqrt)
+#undef WRAP1_80
+
+#define WRAP1_128(name, func)                                   \
+    static void name(const float128_t *ap, float128_t *res)     \
+    {                                                           \
+        float128 a;                                             \
+        float128 ret;                                           \
+                                                                \
+        a = soft_to_qemu128(*ap);                               \
+        ret = func(a, &qsf);                                    \
+        *res = qemu_to_soft128(ret);                            \
+    }
+
+WRAP1_128(qemu_f128M_sqrt, float128_sqrt)
+#undef WRAP1_128
+
+#define WRAP2_80(name, func)                                            \
+    static void name(const extFloat80_t *ap, const extFloat80_t *bp,    \
+                     extFloat80_t *res)                                 \
+    {                                                                   \
+        floatx80 a;                                                     \
+        floatx80 b;                                                     \
+        floatx80 ret;                                                   \
+                                                                        \
+        a = soft_to_qemu80(*ap);                                        \
+        b = soft_to_qemu80(*bp);                                        \
+        ret = func(a, b, &qsf);                                         \
+        *res = qemu_to_soft80(ret);                                     \
+    }
+
+WRAP2_80(qemu_extF80M_add, floatx80_add)
+WRAP2_80(qemu_extF80M_sub, floatx80_sub)
+WRAP2_80(qemu_extF80M_mul, floatx80_mul)
+WRAP2_80(qemu_extF80M_div, floatx80_div)
+WRAP2_80(qemu_extF80M_rem, floatx80_rem)
+#undef WRAP2_80
+
+#define WRAP2_128(name, func)                                           \
+    static void name(const float128_t *ap, const float128_t *bp,        \
+                     float128_t *res)                                   \
+    {                                                                   \
+        float128 a;                                                     \
+        float128 b;                                                     \
+        float128 ret;                                                   \
+                                                                        \
+        a = soft_to_qemu128(*ap);                                       \
+        b = soft_to_qemu128(*bp);                                       \
+        ret = func(a, b, &qsf);                                         \
+        *res = qemu_to_soft128(ret);                                    \
+    }
+
+WRAP2_128(qemu_f128M_add, float128_add)
+WRAP2_128(qemu_f128M_sub, float128_sub)
+WRAP2_128(qemu_f128M_mul, float128_mul)
+WRAP2_128(qemu_f128M_div, float128_div)
+WRAP2_128(qemu_f128M_rem, float128_rem)
+#undef WRAP2_128
+
+#define WRAP_MULADD(name, func, type)                           \
+    static type##_t name(type##_t a, type##_t b, type##_t c)    \
+    {                                                           \
+        type *ap = (type *)&a;                                  \
+        type *bp = (type *)&b;                                  \
+        type *cp = (type *)&c;                                  \
+        type ret;                                               \
+                                                                \
+        ret = func(*ap, *bp, *cp, 0, &qsf);                     \
+        return *(type##_t *)&ret;                               \
+    }
+
+WRAP_MULADD(qemu_f16_mulAdd, float16_muladd, float16)
+WRAP_MULADD(qemu_f32_mulAdd, float32_muladd, float32)
+WRAP_MULADD(qemu_f64_mulAdd, float64_muladd, float64)
+#undef WRAP_MULADD
+
+static void qemu_f128M_mulAdd(const float128_t *ap, const float128_t *bp,
+                              const float128_t *cp, float128_t *res)
+{
+    float128 a, b, c, ret;
+
+    a = soft_to_qemu128(*ap);
+    b = soft_to_qemu128(*bp);
+    c = soft_to_qemu128(*cp);
+    ret = float128_muladd(a, b, c, 0, &qsf);
+    *res = qemu_to_soft128(ret);
+}
+
+#define WRAP_CMP16(name, func, retcond)         \
+    static bool name(float16_t a, float16_t b)  \
+    {                                           \
+        float16 *ap = (float16 *)&a;            \
+        float16 *bp = (float16 *)&b;            \
+        int ret;                                \
+                                                \
+        ret = func(*ap, *bp, &qsf);             \
+        return retcond;                         \
+    }
+
+WRAP_CMP16(qemu_f16_eq_signaling, float16_compare, ret == 0)
+WRAP_CMP16(qemu_f16_eq, float16_compare_quiet, ret == 0)
+WRAP_CMP16(qemu_f16_le, float16_compare, ret <= 0)
+WRAP_CMP16(qemu_f16_lt, float16_compare, ret < 0)
+WRAP_CMP16(qemu_f16_le_quiet, float16_compare_quiet, ret <= 0)
+WRAP_CMP16(qemu_f16_lt_quiet, float16_compare_quiet, ret < 0)
+#undef WRAP_CMP16
+
+#define WRAP_CMP(name, func, type)              \
+    static bool name(type##_t a, type##_t b)    \
+    {                                           \
+        type *ap = (type *)&a;                  \
+        type *bp = (type *)&b;                  \
+                                                \
+        return !!func(*ap, *bp, &qsf);          \
+    }
+
+#define GEN_WRAP_CMP(b)                                                 \
+    WRAP_CMP(qemu_f##b##_eq_signaling, float##b##_eq, float##b)         \
+    WRAP_CMP(qemu_f##b##_eq, float##b##_eq_quiet, float##b)             \
+    WRAP_CMP(qemu_f##b##_le, float##b##_le, float##b)                   \
+    WRAP_CMP(qemu_f##b##_lt, float##b##_lt, float##b)                   \
+    WRAP_CMP(qemu_f##b##_le_quiet, float##b##_le_quiet, float##b)       \
+    WRAP_CMP(qemu_f##b##_lt_quiet, float##b##_lt_quiet, float##b)
+
+GEN_WRAP_CMP(32)
+GEN_WRAP_CMP(64)
+#undef GEN_WRAP_CMP
+#undef WRAP_CMP
+
+#define WRAP_CMP80(name, func)                                          \
+    static bool name(const extFloat80_t *ap, const extFloat80_t *bp)    \
+    {                                                                   \
+        floatx80 a;                                                     \
+        floatx80 b;                                                     \
+                                                                        \
+        a = soft_to_qemu80(*ap);                                        \
+        b = soft_to_qemu80(*bp);                                        \
+        return !!func(a, b, &qsf);                                      \
+    }
+
+WRAP_CMP80(qemu_extF80M_eq_signaling, floatx80_eq)
+WRAP_CMP80(qemu_extF80M_eq, floatx80_eq_quiet)
+WRAP_CMP80(qemu_extF80M_le, floatx80_le)
+WRAP_CMP80(qemu_extF80M_lt, floatx80_lt)
+WRAP_CMP80(qemu_extF80M_le_quiet, floatx80_le_quiet)
+WRAP_CMP80(qemu_extF80M_lt_quiet, floatx80_lt_quiet)
+#undef WRAP_CMP80
+
+#define WRAP_CMP128(name, func)                                         \
+    static bool name(const float128_t *ap, const float128_t *bp)        \
+    {                                                                   \
+        float128 a;                                                     \
+        float128 b;                                                     \
+                                                                        \
+        a = soft_to_qemu128(*ap);                                       \
+        b = soft_to_qemu128(*bp);                                       \
+        return !!func(a, b, &qsf);                                      \
+    }
+
+WRAP_CMP128(qemu_f128M_eq_signaling, float128_eq)
+WRAP_CMP128(qemu_f128M_eq, float128_eq_quiet)
+WRAP_CMP128(qemu_f128M_le, float128_le)
+WRAP_CMP128(qemu_f128M_lt, float128_lt)
+WRAP_CMP128(qemu_f128M_le_quiet, float128_le_quiet)
+WRAP_CMP128(qemu_f128M_lt_quiet, float128_lt_quiet)
+#undef WRAP_CMP128