diff options
author | 2023-10-10 11:40:56 +0000 | |
---|---|---|
committer | 2023-10-10 11:40:56 +0000 | |
commit | e02cda008591317b1625707ff8e115a4841aa889 (patch) | |
tree | aee302e3cf8b59ec2d32ec481be3d1afddfc8968 /tests/tcg/hexagon | |
parent | cc668e6b7e0ffd8c9d130513d12053cf5eda1d3b (diff) |
Introduce Virtio-loopback epsilon release:
Epsilon release introduces a new compatibility layer which make virtio-loopback
design to work with QEMU and rust-vmm vhost-user backend without require any
changes.
Signed-off-by: Timos Ampelikiotis <t.ampelikiotis@virtualopensystems.com>
Change-Id: I52e57563e08a7d0bdc002f8e928ee61ba0c53dd9
Diffstat (limited to 'tests/tcg/hexagon')
24 files changed, 8122 insertions, 0 deletions
diff --git a/tests/tcg/hexagon/Makefile.target b/tests/tcg/hexagon/Makefile.target new file mode 100644 index 000000000..8b07a2816 --- /dev/null +++ b/tests/tcg/hexagon/Makefile.target @@ -0,0 +1,45 @@ +## +## Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. +## +## This program is free software; you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation; either version 2 of the License, or +## (at your option) any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this program; if not, see <http://www.gnu.org/licenses/>. +## + +# Hexagon doesn't support gdb, so skip the EXTRA_RUNS +EXTRA_RUNS = + +CFLAGS += -Wno-incompatible-pointer-types -Wno-undefined-internal +CFLAGS += -fno-unroll-loops + +HEX_SRC=$(SRC_PATH)/tests/tcg/hexagon +VPATH += $(HEX_SRC) + +first: $(HEX_SRC)/first.S + $(CC) -static -mv67 -nostdlib $^ -o $@ + +HEX_TESTS = first +HEX_TESTS += hex_sigsegv +HEX_TESTS += misc +HEX_TESTS += preg_alias +HEX_TESTS += dual_stores +HEX_TESTS += multi_result +HEX_TESTS += mem_noshuf +HEX_TESTS += circ +HEX_TESTS += brev +HEX_TESTS += load_unpack +HEX_TESTS += load_align +HEX_TESTS += atomics +HEX_TESTS += fpstuff +HEX_TESTS += overflow + +TESTS += $(HEX_TESTS) diff --git a/tests/tcg/hexagon/atomics.c b/tests/tcg/hexagon/atomics.c new file mode 100644 index 000000000..ff1ceee24 --- /dev/null +++ b/tests/tcg/hexagon/atomics.c @@ -0,0 +1,139 @@ +/* + * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <inttypes.h> +#include <pthread.h> + +/* Using volatile because we are testing atomics */ +static inline int atomic_inc32(volatile int *x) +{ + int old, dummy; + __asm__ __volatile__( + "1: %0 = memw_locked(%2)\n\t" + " %1 = add(%0, #1)\n\t" + " memw_locked(%2, p0) = %1\n\t" + " if (!p0) jump 1b\n\t" + : "=&r"(old), "=&r"(dummy) + : "r"(x) + : "p0", "memory"); + return old; +} + +/* Using volatile because we are testing atomics */ +static inline long long atomic_inc64(volatile long long *x) +{ + long long old, dummy; + __asm__ __volatile__( + "1: %0 = memd_locked(%2)\n\t" + " %1 = #1\n\t" + " %1 = add(%0, %1)\n\t" + " memd_locked(%2, p0) = %1\n\t" + " if (!p0) jump 1b\n\t" + : "=&r"(old), "=&r"(dummy) + : "r"(x) + : "p0", "memory"); + return old; +} + +/* Using volatile because we are testing atomics */ +static inline int atomic_dec32(volatile int *x) +{ + int old, dummy; + __asm__ __volatile__( + "1: %0 = memw_locked(%2)\n\t" + " %1 = add(%0, #-1)\n\t" + " memw_locked(%2, p0) = %1\n\t" + " if (!p0) jump 1b\n\t" + : "=&r"(old), "=&r"(dummy) + : "r"(x) + : "p0", "memory"); + return old; +} + +/* Using volatile because we are testing atomics */ +static inline long long atomic_dec64(volatile long long *x) +{ + long long old, dummy; + __asm__ __volatile__( + "1: %0 = memd_locked(%2)\n\t" + " %1 = #-1\n\t" + " %1 = add(%0, %1)\n\t" + " memd_locked(%2, p0) = %1\n\t" + " if (!p0) jump 1b\n\t" + : "=&r"(old), "=&r"(dummy) + : "r"(x) + : "p0", "memory"); + return old; +} + +#define LOOP_CNT 1000 +/* Using volatile because we are testing atomics */ +volatile int tick32 = 1; +/* Using volatile because we are testing atomics */ +volatile long long tick64 = 1; +int err; + +void *thread1_func(void *arg) +{ + int i; + + for (i = 0; i < LOOP_CNT; i++) { + atomic_inc32(&tick32); + atomic_dec64(&tick64); + } + return NULL; +} + +void *thread2_func(void *arg) +{ + int i; + for (i = 0; i < LOOP_CNT; i++) { + atomic_dec32(&tick32); + atomic_inc64(&tick64); + } + return NULL; +} + +void test_pthread(void) +{ + pthread_t tid1, tid2; + + pthread_create(&tid1, NULL, thread1_func, "hello1"); + pthread_create(&tid2, NULL, thread2_func, "hello2"); + pthread_join(tid1, NULL); + pthread_join(tid2, NULL); + + if (tick32 != 1) { + printf("ERROR: tick32 %d != 1\n", tick32); + err++; + } + if (tick64 != 1) { + printf("ERROR: tick64 %lld != 1\n", tick64); + err++; + } +} + +int main(int argc, char **argv) +{ + test_pthread(); + puts(err ? "FAIL" : "PASS"); + return err; +} diff --git a/tests/tcg/hexagon/brev.c b/tests/tcg/hexagon/brev.c new file mode 100644 index 000000000..9736a2405 --- /dev/null +++ b/tests/tcg/hexagon/brev.c @@ -0,0 +1,190 @@ +/* + * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdio.h> +#include <string.h> + +int err; + +#define NBITS 8 +#define SIZE (1 << NBITS) + +long long dbuf[SIZE] __attribute__((aligned(1 << 16))) = {0}; +int wbuf[SIZE] __attribute__((aligned(1 << 16))) = {0}; +short hbuf[SIZE] __attribute__((aligned(1 << 16))) = {0}; +unsigned char bbuf[SIZE] __attribute__((aligned(1 << 16))) = {0}; + +/* + * We use the C preporcessor to deal with the combinations of types + */ + +#define BREV_LOAD(SZ, RES, ADDR, INC) \ + __asm__( \ + "m0 = %2\n\t" \ + "%0 = mem" #SZ "(%1++m0:brev)\n\t" \ + : "=r"(RES), "+r"(ADDR) \ + : "r"(INC) \ + : "m0") + +#define BREV_LOAD_b(RES, ADDR, INC) \ + BREV_LOAD(b, RES, ADDR, INC) +#define BREV_LOAD_ub(RES, ADDR, INC) \ + BREV_LOAD(ub, RES, ADDR, INC) +#define BREV_LOAD_h(RES, ADDR, INC) \ + BREV_LOAD(h, RES, ADDR, INC) +#define BREV_LOAD_uh(RES, ADDR, INC) \ + BREV_LOAD(uh, RES, ADDR, INC) +#define BREV_LOAD_w(RES, ADDR, INC) \ + BREV_LOAD(w, RES, ADDR, INC) +#define BREV_LOAD_d(RES, ADDR, INC) \ + BREV_LOAD(d, RES, ADDR, INC) + +#define BREV_STORE(SZ, PART, ADDR, VAL, INC) \ + __asm__( \ + "m0 = %2\n\t" \ + "mem" #SZ "(%0++m0:brev) = %1" PART "\n\t" \ + : "+r"(ADDR) \ + : "r"(VAL), "r"(INC) \ + : "m0", "memory") + +#define BREV_STORE_b(ADDR, VAL, INC) \ + BREV_STORE(b, "", ADDR, VAL, INC) +#define BREV_STORE_h(ADDR, VAL, INC) \ + BREV_STORE(h, "", ADDR, VAL, INC) +#define BREV_STORE_f(ADDR, VAL, INC) \ + BREV_STORE(h, ".H", ADDR, VAL, INC) +#define BREV_STORE_w(ADDR, VAL, INC) \ + BREV_STORE(w, "", ADDR, VAL, INC) +#define BREV_STORE_d(ADDR, VAL, INC) \ + BREV_STORE(d, "", ADDR, VAL, INC) + +#define BREV_STORE_NEW(SZ, ADDR, VAL, INC) \ + __asm__( \ + "m0 = %2\n\t" \ + "{\n\t" \ + " r5 = %1\n\t" \ + " mem" #SZ "(%0++m0:brev) = r5.new\n\t" \ + "}\n\t" \ + : "+r"(ADDR) \ + : "r"(VAL), "r"(INC) \ + : "r5", "m0", "memory") + +#define BREV_STORE_bnew(ADDR, VAL, INC) \ + BREV_STORE_NEW(b, ADDR, VAL, INC) +#define BREV_STORE_hnew(ADDR, VAL, INC) \ + BREV_STORE_NEW(h, ADDR, VAL, INC) +#define BREV_STORE_wnew(ADDR, VAL, INC) \ + BREV_STORE_NEW(w, ADDR, VAL, INC) + +int bitreverse(int x) +{ + int result = 0; + int i; + for (i = 0; i < NBITS; i++) { + result <<= 1; + result |= x & 1; + x >>= 1; + } + return result; +} + +int sext8(int x) +{ + return (x << 24) >> 24; +} + +void check(int i, long long result, long long expect) +{ + if (result != expect) { + printf("ERROR(%d): 0x%04llx != 0x%04llx\n", i, result, expect); + err++; + } +} + +#define TEST_BREV_LOAD(SZ, TYPE, BUF, SHIFT, EXP) \ + do { \ + p = BUF; \ + for (i = 0; i < SIZE; i++) { \ + TYPE result; \ + BREV_LOAD_##SZ(result, p, 1 << (SHIFT - NBITS)); \ + check(i, result, EXP); \ + } \ + } while (0) + +#define TEST_BREV_STORE(SZ, TYPE, BUF, VAL, SHIFT) \ + do { \ + p = BUF; \ + memset(BUF, 0xff, sizeof(BUF)); \ + for (i = 0; i < SIZE; i++) { \ + BREV_STORE_##SZ(p, (TYPE)(VAL), 1 << (SHIFT - NBITS)); \ + } \ + for (i = 0; i < SIZE; i++) { \ + check(i, BUF[i], bitreverse(i)); \ + } \ + } while (0) + +#define TEST_BREV_STORE_NEW(SZ, BUF, SHIFT) \ + do { \ + p = BUF; \ + memset(BUF, 0xff, sizeof(BUF)); \ + for (i = 0; i < SIZE; i++) { \ + BREV_STORE_##SZ(p, i, 1 << (SHIFT - NBITS)); \ + } \ + for (i = 0; i < SIZE; i++) { \ + check(i, BUF[i], bitreverse(i)); \ + } \ + } while (0) + +/* + * We'll set high_half[i] = i << 16 for use in the .H form of store + * which stores from the high half of the word. + */ +int high_half[SIZE]; + +int main() +{ + void *p; + int i; + + for (i = 0; i < SIZE; i++) { + bbuf[i] = bitreverse(i); + hbuf[i] = bitreverse(i); + wbuf[i] = bitreverse(i); + dbuf[i] = bitreverse(i); + high_half[i] = i << 16; + } + + TEST_BREV_LOAD(b, int, bbuf, 16, sext8(i)); + TEST_BREV_LOAD(ub, int, bbuf, 16, i); + TEST_BREV_LOAD(h, int, hbuf, 15, i); + TEST_BREV_LOAD(uh, int, hbuf, 15, i); + TEST_BREV_LOAD(w, int, wbuf, 14, i); + TEST_BREV_LOAD(d, long long, dbuf, 13, i); + + TEST_BREV_STORE(b, int, bbuf, i, 16); + TEST_BREV_STORE(h, int, hbuf, i, 15); + TEST_BREV_STORE(f, int, hbuf, high_half[i], 15); + TEST_BREV_STORE(w, int, wbuf, i, 14); + TEST_BREV_STORE(d, long long, dbuf, i, 13); + + TEST_BREV_STORE_NEW(bnew, bbuf, 16); + TEST_BREV_STORE_NEW(hnew, hbuf, 15); + TEST_BREV_STORE_NEW(wnew, wbuf, 14); + + puts(err ? "FAIL" : "PASS"); + return err ? 1 : 0; +} diff --git a/tests/tcg/hexagon/circ.c b/tests/tcg/hexagon/circ.c new file mode 100644 index 000000000..67a1aa305 --- /dev/null +++ b/tests/tcg/hexagon/circ.c @@ -0,0 +1,486 @@ +/* + * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdio.h> + +#define DEBUG 0 +#define DEBUG_PRINTF(...) \ + do { \ + if (DEBUG) { \ + printf(__VA_ARGS__); \ + } \ + } while (0) + + +#define NBYTES (1 << 8) +#define NHALFS (NBYTES / sizeof(short)) +#define NWORDS (NBYTES / sizeof(int)) +#define NDOBLS (NBYTES / sizeof(long long)) + +long long dbuf[NDOBLS] __attribute__((aligned(1 << 12))) = {0}; +int wbuf[NWORDS] __attribute__((aligned(1 << 12))) = {0}; +short hbuf[NHALFS] __attribute__((aligned(1 << 12))) = {0}; +unsigned char bbuf[NBYTES] __attribute__((aligned(1 << 12))) = {0}; + +/* + * We use the C preporcessor to deal with the combinations of types + */ + +#define INIT(BUF, N) \ + void init_##BUF(void) \ + { \ + int i; \ + for (i = 0; i < N; i++) { \ + BUF[i] = i; \ + } \ + } \ + +INIT(bbuf, NBYTES) +INIT(hbuf, NHALFS) +INIT(wbuf, NWORDS) +INIT(dbuf, NDOBLS) + +/* + * Macros for performing circular load + * RES result + * ADDR address + * START start address of buffer + * LEN length of buffer (in bytes) + * INC address increment (in bytes for IMM, elements for REG) + */ +#define CIRC_LOAD_IMM(SIZE, RES, ADDR, START, LEN, INC) \ + __asm__( \ + "r4 = %3\n\t" \ + "m0 = r4\n\t" \ + "cs0 = %2\n\t" \ + "%0 = mem" #SIZE "(%1++#" #INC ":circ(M0))\n\t" \ + : "=r"(RES), "+r"(ADDR) \ + : "r"(START), "r"(LEN) \ + : "r4", "m0", "cs0") +#define CIRC_LOAD_IMM_b(RES, ADDR, START, LEN, INC) \ + CIRC_LOAD_IMM(b, RES, ADDR, START, LEN, INC) +#define CIRC_LOAD_IMM_ub(RES, ADDR, START, LEN, INC) \ + CIRC_LOAD_IMM(ub, RES, ADDR, START, LEN, INC) +#define CIRC_LOAD_IMM_h(RES, ADDR, START, LEN, INC) \ + CIRC_LOAD_IMM(h, RES, ADDR, START, LEN, INC) +#define CIRC_LOAD_IMM_uh(RES, ADDR, START, LEN, INC) \ + CIRC_LOAD_IMM(uh, RES, ADDR, START, LEN, INC) +#define CIRC_LOAD_IMM_w(RES, ADDR, START, LEN, INC) \ + CIRC_LOAD_IMM(w, RES, ADDR, START, LEN, INC) +#define CIRC_LOAD_IMM_d(RES, ADDR, START, LEN, INC) \ + CIRC_LOAD_IMM(d, RES, ADDR, START, LEN, INC) + +/* + * The mreg has the following pieces + * mreg[31:28] increment[10:7] + * mreg[27:24] K value (used Hexagon v3 and earlier) + * mreg[23:17] increment[6:0] + * mreg[16:0] circular buffer length + */ +static int build_mreg(int inc, int K, int len) +{ + return ((inc & 0x780) << 21) | + ((K & 0xf) << 24) | + ((inc & 0x7f) << 17) | + (len & 0x1ffff); +} + +#define CIRC_LOAD_REG(SIZE, RES, ADDR, START, LEN, INC) \ + __asm__( \ + "r4 = %2\n\t" \ + "m1 = r4\n\t" \ + "cs1 = %3\n\t" \ + "%0 = mem" #SIZE "(%1++I:circ(M1))\n\t" \ + : "=r"(RES), "+r"(ADDR) \ + : "r"(build_mreg((INC), 0, (LEN))), \ + "r"(START) \ + : "r4", "m1", "cs1") +#define CIRC_LOAD_REG_b(RES, ADDR, START, LEN, INC) \ + CIRC_LOAD_REG(b, RES, ADDR, START, LEN, INC) +#define CIRC_LOAD_REG_ub(RES, ADDR, START, LEN, INC) \ + CIRC_LOAD_REG(ub, RES, ADDR, START, LEN, INC) +#define CIRC_LOAD_REG_h(RES, ADDR, START, LEN, INC) \ + CIRC_LOAD_REG(h, RES, ADDR, START, LEN, INC) +#define CIRC_LOAD_REG_uh(RES, ADDR, START, LEN, INC) \ + CIRC_LOAD_REG(uh, RES, ADDR, START, LEN, INC) +#define CIRC_LOAD_REG_w(RES, ADDR, START, LEN, INC) \ + CIRC_LOAD_REG(w, RES, ADDR, START, LEN, INC) +#define CIRC_LOAD_REG_d(RES, ADDR, START, LEN, INC) \ + CIRC_LOAD_REG(d, RES, ADDR, START, LEN, INC) + +/* + * Macros for performing circular store + * VAL value to store + * ADDR address + * START start address of buffer + * LEN length of buffer (in bytes) + * INC address increment (in bytes for IMM, elements for REG) + */ +#define CIRC_STORE_IMM(SIZE, PART, VAL, ADDR, START, LEN, INC) \ + __asm__( \ + "r4 = %3\n\t" \ + "m0 = r4\n\t" \ + "cs0 = %1\n\t" \ + "mem" #SIZE "(%0++#" #INC ":circ(M0)) = %2" PART "\n\t" \ + : "+r"(ADDR) \ + : "r"(START), "r"(VAL), "r"(LEN) \ + : "r4", "m0", "cs0", "memory") +#define CIRC_STORE_IMM_b(VAL, ADDR, START, LEN, INC) \ + CIRC_STORE_IMM(b, "", VAL, ADDR, START, LEN, INC) +#define CIRC_STORE_IMM_h(VAL, ADDR, START, LEN, INC) \ + CIRC_STORE_IMM(h, "", VAL, ADDR, START, LEN, INC) +#define CIRC_STORE_IMM_f(VAL, ADDR, START, LEN, INC) \ + CIRC_STORE_IMM(h, ".H", VAL, ADDR, START, LEN, INC) +#define CIRC_STORE_IMM_w(VAL, ADDR, START, LEN, INC) \ + CIRC_STORE_IMM(w, "", VAL, ADDR, START, LEN, INC) +#define CIRC_STORE_IMM_d(VAL, ADDR, START, LEN, INC) \ + CIRC_STORE_IMM(d, "", VAL, ADDR, START, LEN, INC) + +#define CIRC_STORE_NEW_IMM(SIZE, VAL, ADDR, START, LEN, INC) \ + __asm__( \ + "r4 = %3\n\t" \ + "m0 = r4\n\t" \ + "cs0 = %1\n\t" \ + "{\n\t" \ + " r5 = %2\n\t" \ + " mem" #SIZE "(%0++#" #INC ":circ(M0)) = r5.new\n\t" \ + "}\n\t" \ + : "+r"(ADDR) \ + : "r"(START), "r"(VAL), "r"(LEN) \ + : "r4", "r5", "m0", "cs0", "memory") +#define CIRC_STORE_IMM_bnew(VAL, ADDR, START, LEN, INC) \ + CIRC_STORE_NEW_IMM(b, VAL, ADDR, START, LEN, INC) +#define CIRC_STORE_IMM_hnew(VAL, ADDR, START, LEN, INC) \ + CIRC_STORE_NEW_IMM(h, VAL, ADDR, START, LEN, INC) +#define CIRC_STORE_IMM_wnew(VAL, ADDR, START, LEN, INC) \ + CIRC_STORE_NEW_IMM(w, VAL, ADDR, START, LEN, INC) + +#define CIRC_STORE_REG(SIZE, PART, VAL, ADDR, START, LEN, INC) \ + __asm__( \ + "r4 = %1\n\t" \ + "m1 = r4\n\t" \ + "cs1 = %2\n\t" \ + "mem" #SIZE "(%0++I:circ(M1)) = %3" PART "\n\t" \ + : "+r"(ADDR) \ + : "r"(build_mreg((INC), 0, (LEN))), \ + "r"(START), \ + "r"(VAL) \ + : "r4", "m1", "cs1", "memory") +#define CIRC_STORE_REG_b(VAL, ADDR, START, LEN, INC) \ + CIRC_STORE_REG(b, "", VAL, ADDR, START, LEN, INC) +#define CIRC_STORE_REG_h(VAL, ADDR, START, LEN, INC) \ + CIRC_STORE_REG(h, "", VAL, ADDR, START, LEN, INC) +#define CIRC_STORE_REG_f(VAL, ADDR, START, LEN, INC) \ + CIRC_STORE_REG(h, ".H", VAL, ADDR, START, LEN, INC) +#define CIRC_STORE_REG_w(VAL, ADDR, START, LEN, INC) \ + CIRC_STORE_REG(w, "", VAL, ADDR, START, LEN, INC) +#define CIRC_STORE_REG_d(VAL, ADDR, START, LEN, INC) \ + CIRC_STORE_REG(d, "", VAL, ADDR, START, LEN, INC) + +#define CIRC_STORE_NEW_REG(SIZE, VAL, ADDR, START, LEN, INC) \ + __asm__( \ + "r4 = %1\n\t" \ + "m1 = r4\n\t" \ + "cs1 = %2\n\t" \ + "{\n\t" \ + " r5 = %3\n\t" \ + " mem" #SIZE "(%0++I:circ(M1)) = r5.new\n\t" \ + "}\n\t" \ + : "+r"(ADDR) \ + : "r"(build_mreg((INC), 0, (LEN))), \ + "r"(START), \ + "r"(VAL) \ + : "r4", "r5", "m1", "cs1", "memory") +#define CIRC_STORE_REG_bnew(VAL, ADDR, START, LEN, INC) \ + CIRC_STORE_NEW_REG(b, VAL, ADDR, START, LEN, INC) +#define CIRC_STORE_REG_hnew(VAL, ADDR, START, LEN, INC) \ + CIRC_STORE_NEW_REG(h, VAL, ADDR, START, LEN, INC) +#define CIRC_STORE_REG_wnew(VAL, ADDR, START, LEN, INC) \ + CIRC_STORE_NEW_REG(w, VAL, ADDR, START, LEN, INC) + + +int err; + +/* We'll test increments +1 and -1 */ +void check_load(int i, long long result, int inc, int size) +{ + int expect = (i * inc); + while (expect >= size) { + expect -= size; + } + while (expect < 0) { + expect += size; + } + if (result != expect) { + printf("ERROR(%d): %lld != %d\n", i, result, expect); + err++; + } +} + +#define TEST_LOAD_IMM(SZ, TYPE, BUF, BUFSIZE, INC, FMT) \ +void circ_test_load_imm_##SZ(void) \ +{ \ + TYPE *p = (TYPE *)BUF; \ + int size = 10; \ + int i; \ + for (i = 0; i < BUFSIZE; i++) { \ + TYPE element; \ + CIRC_LOAD_IMM_##SZ(element, p, BUF, size * sizeof(TYPE), (INC)); \ + DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \ + i, p, element); \ + check_load(i, element, ((INC) / (int)sizeof(TYPE)), size); \ + } \ + p = (TYPE *)BUF; \ + for (i = 0; i < BUFSIZE; i++) { \ + TYPE element; \ + CIRC_LOAD_IMM_##SZ(element, p, BUF, size * sizeof(TYPE), -(INC)); \ + DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \ + i, p, element); \ + check_load(i, element, (-(INC) / (int)sizeof(TYPE)), size); \ + } \ +} + +TEST_LOAD_IMM(b, char, bbuf, NBYTES, 1, d) +TEST_LOAD_IMM(ub, unsigned char, bbuf, NBYTES, 1, d) +TEST_LOAD_IMM(h, short, hbuf, NHALFS, 2, d) +TEST_LOAD_IMM(uh, unsigned short, hbuf, NHALFS, 2, d) +TEST_LOAD_IMM(w, int, wbuf, NWORDS, 4, d) +TEST_LOAD_IMM(d, long long, dbuf, NDOBLS, 8, lld) + +#define TEST_LOAD_REG(SZ, TYPE, BUF, BUFSIZE, FMT) \ +void circ_test_load_reg_##SZ(void) \ +{ \ + TYPE *p = (TYPE *)BUF; \ + int size = 13; \ + int i; \ + for (i = 0; i < BUFSIZE; i++) { \ + TYPE element; \ + CIRC_LOAD_REG_##SZ(element, p, BUF, size * sizeof(TYPE), 1); \ + DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \ + i, p, element); \ + check_load(i, element, 1, size); \ + } \ + p = (TYPE *)BUF; \ + for (i = 0; i < BUFSIZE; i++) { \ + TYPE element; \ + CIRC_LOAD_REG_##SZ(element, p, BUF, size * sizeof(TYPE), -1); \ + DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \ + i, p, element); \ + check_load(i, element, -1, size); \ + } \ +} + +TEST_LOAD_REG(b, char, bbuf, NBYTES, d) +TEST_LOAD_REG(ub, unsigned char, bbuf, NBYTES, d) +TEST_LOAD_REG(h, short, hbuf, NHALFS, d) +TEST_LOAD_REG(uh, unsigned short, hbuf, NHALFS, d) +TEST_LOAD_REG(w, int, wbuf, NWORDS, d) +TEST_LOAD_REG(d, long long, dbuf, NDOBLS, lld) + +/* The circular stores will wrap around somewhere inside the buffer */ +#define CIRC_VAL(SZ, TYPE, BUFSIZE) \ +TYPE circ_val_##SZ(int i, int inc, int size) \ +{ \ + int mod = BUFSIZE % size; \ + int elem = i * inc; \ + if (elem < 0) { \ + if (-elem <= size - mod) { \ + return (elem + BUFSIZE - mod); \ + } else { \ + return (elem + BUFSIZE + size - mod); \ + } \ + } else if (elem < mod) {\ + return (elem + BUFSIZE - mod); \ + } else { \ + return (elem + BUFSIZE - size - mod); \ + } \ +} + +CIRC_VAL(b, unsigned char, NBYTES) +CIRC_VAL(h, short, NHALFS) +CIRC_VAL(w, int, NWORDS) +CIRC_VAL(d, long long, NDOBLS) + +/* + * Circular stores should only write to the first "size" elements of the buffer + * the remainder of the elements should have BUF[i] == i + */ +#define CHECK_STORE(SZ, BUF, BUFSIZE, FMT) \ +void check_store_##SZ(int inc, int size) \ +{ \ + int i; \ + for (i = 0; i < size; i++) { \ + DEBUG_PRINTF(#BUF "[%3d] = 0x%02" #FMT ", guess = 0x%02" #FMT "\n", \ + i, BUF[i], circ_val_##SZ(i, inc, size)); \ + if (BUF[i] != circ_val_##SZ(i, inc, size)) { \ + printf("ERROR(%3d): 0x%02" #FMT " != 0x%02" #FMT "\n", \ + i, BUF[i], circ_val_##SZ(i, inc, size)); \ + err++; \ + } \ + } \ + for (i = size; i < BUFSIZE; i++) { \ + if (BUF[i] != i) { \ + printf("ERROR(%3d): 0x%02" #FMT " != 0x%02x\n", i, BUF[i], i); \ + err++; \ + } \ + } \ +} + +CHECK_STORE(b, bbuf, NBYTES, x) +CHECK_STORE(h, hbuf, NHALFS, x) +CHECK_STORE(w, wbuf, NWORDS, x) +CHECK_STORE(d, dbuf, NDOBLS, llx) + +#define CIRC_TEST_STORE_IMM(SZ, CHK, TYPE, BUF, BUFSIZE, SHIFT, INC) \ +void circ_test_store_imm_##SZ(void) \ +{ \ + unsigned int size = 27; \ + TYPE *p = BUF; \ + TYPE val = 0; \ + int i; \ + init_##BUF(); \ + for (i = 0; i < BUFSIZE; i++) { \ + CIRC_STORE_IMM_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), INC); \ + val++; \ + } \ + check_store_##CHK(((INC) / (int)sizeof(TYPE)), size); \ + p = BUF; \ + val = 0; \ + init_##BUF(); \ + for (i = 0; i < BUFSIZE; i++) { \ + CIRC_STORE_IMM_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), \ + -(INC)); \ + val++; \ + } \ + check_store_##CHK((-(INC) / (int)sizeof(TYPE)), size); \ +} + +CIRC_TEST_STORE_IMM(b, b, unsigned char, bbuf, NBYTES, 0, 1) +CIRC_TEST_STORE_IMM(h, h, short, hbuf, NHALFS, 0, 2) +CIRC_TEST_STORE_IMM(f, h, short, hbuf, NHALFS, 16, 2) +CIRC_TEST_STORE_IMM(w, w, int, wbuf, NWORDS, 0, 4) +CIRC_TEST_STORE_IMM(d, d, long long, dbuf, NDOBLS, 0, 8) +CIRC_TEST_STORE_IMM(bnew, b, unsigned char, bbuf, NBYTES, 0, 1) +CIRC_TEST_STORE_IMM(hnew, h, short, hbuf, NHALFS, 0, 2) +CIRC_TEST_STORE_IMM(wnew, w, int, wbuf, NWORDS, 0, 4) + +#define CIRC_TEST_STORE_REG(SZ, CHK, TYPE, BUF, BUFSIZE, SHIFT) \ +void circ_test_store_reg_##SZ(void) \ +{ \ + TYPE *p = BUF; \ + unsigned int size = 19; \ + TYPE val = 0; \ + int i; \ + init_##BUF(); \ + for (i = 0; i < BUFSIZE; i++) { \ + CIRC_STORE_REG_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), 1); \ + val++; \ + } \ + check_store_##CHK(1, size); \ + p = BUF; \ + val = 0; \ + init_##BUF(); \ + for (i = 0; i < BUFSIZE; i++) { \ + CIRC_STORE_REG_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), -1); \ + val++; \ + } \ + check_store_##CHK(-1, size); \ +} + +CIRC_TEST_STORE_REG(b, b, unsigned char, bbuf, NBYTES, 0) +CIRC_TEST_STORE_REG(h, h, short, hbuf, NHALFS, 0) +CIRC_TEST_STORE_REG(f, h, short, hbuf, NHALFS, 16) +CIRC_TEST_STORE_REG(w, w, int, wbuf, NWORDS, 0) +CIRC_TEST_STORE_REG(d, d, long long, dbuf, NDOBLS, 0) +CIRC_TEST_STORE_REG(bnew, b, unsigned char, bbuf, NBYTES, 0) +CIRC_TEST_STORE_REG(hnew, h, short, hbuf, NHALFS, 0) +CIRC_TEST_STORE_REG(wnew, w, int, wbuf, NWORDS, 0) + +/* Test the old scheme used in Hexagon V3 */ +static void circ_test_v3(void) +{ + int *p = wbuf; + int size = 15; + int K = 4; /* 64 bytes */ + int element; + int i; + + init_wbuf(); + + for (i = 0; i < NWORDS; i++) { + __asm__( + "r4 = %2\n\t" + "m1 = r4\n\t" + "%0 = memw(%1++I:circ(M1))\n\t" + : "=r"(element), "+r"(p) + : "r"(build_mreg(1, K, size * sizeof(int))) + : "r4", "m1"); + DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2d\n", i, p, element); + check_load(i, element, 1, size); + } +} + +int main() +{ + init_bbuf(); + init_hbuf(); + init_wbuf(); + init_dbuf(); + + DEBUG_PRINTF("NBYTES = %d\n", NBYTES); + DEBUG_PRINTF("Address of dbuf = 0x%p\n", dbuf); + DEBUG_PRINTF("Address of wbuf = 0x%p\n", wbuf); + DEBUG_PRINTF("Address of hbuf = 0x%p\n", hbuf); + DEBUG_PRINTF("Address of bbuf = 0x%p\n", bbuf); + + circ_test_load_imm_b(); + circ_test_load_imm_ub(); + circ_test_load_imm_h(); + circ_test_load_imm_uh(); + circ_test_load_imm_w(); + circ_test_load_imm_d(); + + circ_test_load_reg_b(); + circ_test_load_reg_ub(); + circ_test_load_reg_h(); + circ_test_load_reg_uh(); + circ_test_load_reg_w(); + circ_test_load_reg_d(); + + circ_test_store_imm_b(); + circ_test_store_imm_h(); + circ_test_store_imm_f(); + circ_test_store_imm_w(); + circ_test_store_imm_d(); + circ_test_store_imm_bnew(); + circ_test_store_imm_hnew(); + circ_test_store_imm_wnew(); + + circ_test_store_reg_b(); + circ_test_store_reg_h(); + circ_test_store_reg_f(); + circ_test_store_reg_w(); + circ_test_store_reg_d(); + circ_test_store_reg_bnew(); + circ_test_store_reg_hnew(); + circ_test_store_reg_wnew(); + + circ_test_v3(); + + puts(err ? "FAIL" : "PASS"); + return err ? 1 : 0; +} diff --git a/tests/tcg/hexagon/dual_stores.c b/tests/tcg/hexagon/dual_stores.c new file mode 100644 index 000000000..a86a381ab --- /dev/null +++ b/tests/tcg/hexagon/dual_stores.c @@ -0,0 +1,60 @@ +/* + * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdio.h> + +/* + * Make sure that two stores in the same packet honor proper + * semantics: slot 1 executes first, then slot 0. + * This is important when the addresses overlap. + */ +static inline void dual_stores(int *p, char *q, int x, char y) +{ + asm volatile("{\n\t" + " memw(%0) = %2\n\t" + " memb(%1) = %3\n\t" + "}\n" + :: "r"(p), "r"(q), "r"(x), "r"(y) + : "memory"); +} + +typedef union { + int word; + char byte; +} Dual; + +int err; + +static void check(Dual d, int expect) +{ + if (d.word != expect) { + printf("ERROR: 0x%08x != 0x%08x\n", d.word, expect); + err++; + } +} + +int main() +{ + Dual d; + + d.word = ~0; + dual_stores(&d.word, &d.byte, 0x12345678, 0xff); + check(d, 0x123456ff); + + puts(err ? "FAIL" : "PASS"); + return err; +} diff --git a/tests/tcg/hexagon/first.S b/tests/tcg/hexagon/first.S new file mode 100644 index 000000000..e9f2d963e --- /dev/null +++ b/tests/tcg/hexagon/first.S @@ -0,0 +1,56 @@ +/* + * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#define SYS_write 64 +#define SYS_exit_group 94 +#define SYS_exit 93 + +#define FD_STDOUT 1 + + .type str,@object + .section .rodata +str: + .string "Hello!\n" + .size str, 8 + +.text +.global _start +_start: + r6 = #SYS_write + r0 = #FD_STDOUT + r1 = ##str + r2 = #7 + trap0(#1) + + r0 = #0 + r6 = #SYS_exit_group + trap0(#1) + +.section ".note.ABI-tag", "a" +.align 4 +.long 1f - 0f /* name length */ +.long 3f - 2f /* data length */ +.long 1 /* note type */ + +/* + * vendor name seems like this should be MUSL but lldb doesn't agree. + */ +0: .asciz "GNU" +1: .align 4 +2: .long 0 /* linux */ + .long 3,0,0 +3: .align 4 diff --git a/tests/tcg/hexagon/float_convs.ref b/tests/tcg/hexagon/float_convs.ref new file mode 100644 index 000000000..9ec9ffc08 --- /dev/null +++ b/tests/tcg/hexagon/float_convs.ref @@ -0,0 +1,748 @@ +### Rounding to nearest +from single: f32(-nan:0xffa00000) + to double: f64(-nan:0x00ffffffffffffffff) (INVALID) + to int32: -1 (INVALID) + to int64: -1 (INVALID) + to uint32: -1 (INVALID) + to uint64: -1 (INVALID) +from single: f32(-nan:0xffc00000) + to double: f64(-nan:0x00ffffffffffffffff) (OK) + to int32: -1 (INVALID) + to int64: -1 (INVALID) + to uint32: -1 (INVALID) + to uint64: -1 (INVALID) +from single: f32(-inf:0xff800000) + to double: f64(-inf:0x00fff0000000000000) (OK) + to int32: -2147483648 (INVALID) + to int64: -9223372036854775808 (INVALID) + to uint32: 0 (INVALID) + to uint64: 0 (INVALID) +from single: f32(-0x1.fffffe00000000000000p+127:0xff7fffff) + to double: f64(-0x1.fffffe00000000000000p+127:0x00c7efffffe0000000) (INEXACT ) + to int32: -2147483648 (INVALID) + to int64: -9223372036854775808 (INVALID) + to uint32: 0 (INVALID) + to uint64: 0 (INVALID) +from single: f32(-0x1.1874b200000000000000p+103:0xf30c3a59) + to double: f64(-0x1.1874b200000000000000p+103:0x00c661874b20000000) (INEXACT ) + to int32: -2147483648 (INVALID) + to int64: -9223372036854775808 (INVALID) + to uint32: 0 (INVALID) + to uint64: 0 (INVALID) +from single: f32(-0x1.c0bab600000000000000p+99:0xf1605d5b) + to double: f64(-0x1.c0bab600000000000000p+99:0x00c62c0bab60000000) (INEXACT ) + to int32: -2147483648 (INVALID) + to int64: -9223372036854775808 (INVALID) + to uint32: 0 (INVALID) + to uint64: 0 (INVALID) +from single: f32(-0x1.31f75000000000000000p-40:0xab98fba8) + to double: f64(-0x1.31f75000000000000000p-40:0x00bd731f7500000000) (INEXACT ) + to int32: 0 (INEXACT ) + to int64: 0 (INEXACT ) + to uint32: 0 (INVALID) + to uint64: 0 (INVALID) +from single: f32(-0x1.50544400000000000000p-66:0x9ea82a22) + to double: f64(-0x1.50544400000000000000p-66:0x00bbd5054440000000) (INEXACT ) + to int32: 0 (INEXACT ) + to int64: 0 (INEXACT ) + to uint32: 0 (INVALID) + to uint64: 0 (INVALID) +from single: f32(-0x1.00000000000000000000p-126:0x80800000) + to double: f64(-0x1.00000000000000000000p-126:0x00b810000000000000) (OK) + to int32: 0 (INEXACT ) + to int64: 0 (INEXACT ) + to uint32: 0 (INVALID) + to uint64: 0 (INVALID) +from single: f32(0x0.00000000000000000000p+0:0000000000) + to double: f64(0x0.00000000000000000000p+0:00000000000000000000) (OK) + to int32: 0 (OK) + to int64: 0 (OK) + to uint32: 0 (OK) + to uint64: 0 (OK) +from single: f32(0x1.00000000000000000000p-126:0x00800000) + to double: f64(0x1.00000000000000000000p-126:0x003810000000000000) (OK) + to int32: 0 (INEXACT ) + to int64: 0 (INEXACT ) + to uint32: 0 (INEXACT ) + to uint64: 0 (INEXACT ) +from single: f32(0x1.00000000000000000000p-25:0x33000000) + to double: f64(0x1.00000000000000000000p-25:0x003e60000000000000) (OK) + to int32: 0 (INEXACT ) + to int64: 0 (INEXACT ) + to uint32: 0 (INEXACT ) + to uint64: 0 (INEXACT ) +from single: f32(0x1.ffffe600000000000000p-25:0x337ffff3) + to double: f64(0x1.ffffe600000000000000p-25:0x003e6ffffe60000000) (INEXACT ) + to int32: 0 (INEXACT ) + to int64: 0 (INEXACT ) + to uint32: 0 (INEXACT ) + to uint64: 0 (INEXACT ) +from single: f32(0x1.ff801a00000000000000p-15:0x387fc00d) + to double: f64(0x1.ff801a00000000000000p-15:0x003f0ff801a0000000) (INEXACT ) + to int32: 0 (INEXACT ) + to int64: 0 (INEXACT ) + to uint32: 0 (INEXACT ) + to uint64: 0 (INEXACT ) +from single: f32(0x1.00000c00000000000000p-14:0x38800006) + to double: f64(0x1.00000c00000000000000p-14:0x003f100000c0000000) (INEXACT ) + to int32: 0 (INEXACT ) + to int64: 0 (INEXACT ) + to uint32: 0 (INEXACT ) + to uint64: 0 (INEXACT ) +from single: f32(0x1.00000000000000000000p+0:0x3f800000) + to double: f64(0x1.00000000000000000000p+0:0x003ff0000000000000) (OK) + to int32: 1 (OK) + to int64: 1 (OK) + to uint32: 1 (OK) + to uint64: 1 (OK) +from single: f32(0x1.00400000000000000000p+0:0x3f802000) + to double: f64(0x1.00400000000000000000p+0:0x003ff0040000000000) (INEXACT ) + to int32: 1 (INEXACT ) + to int64: 1 (INEXACT ) + to uint32: 1 (INEXACT ) + to uint64: 1 (INEXACT ) +from single: f32(0x1.00000000000000000000p+1:0x40000000) + to double: f64(0x1.00000000000000000000p+1:0x004000000000000000) (OK) + to int32: 2 (OK) + to int64: 2 (OK) + to uint32: 2 (OK) + to uint64: 2 (OK) +from single: f32(0x1.5bf0a800000000000000p+1:0x402df854) + to double: f64(0x1.5bf0a800000000000000p+1:0x004005bf0a80000000) (INEXACT ) + to int32: 2 (INEXACT ) + to int64: 2 (INEXACT ) + to uint32: 2 (INEXACT ) + to uint64: 2 (INEXACT ) +from single: f32(0x1.921fb600000000000000p+1:0x40490fdb) + to double: f64(0x1.921fb600000000000000p+1:0x00400921fb60000000) (INEXACT ) + to int32: 3 (INEXACT ) + to int64: 3 (INEXACT ) + to uint32: 3 (INEXACT ) + to uint64: 3 (INEXACT ) +from single: f32(0x1.ffbe0000000000000000p+15:0x477fdf00) + to double: f64(0x1.ffbe0000000000000000p+15:0x0040effbe000000000) (INEXACT ) + to int32: 65503 (OK) + to int64: 65503 (OK) + to uint32: 65503 (OK) + to uint64: 65503 (OK) +from single: f32(0x1.ffc00000000000000000p+15:0x477fe000) + to double: f64(0x1.ffc00000000000000000p+15:0x0040effc0000000000) (INEXACT ) + to int32: 65504 (OK) + to int64: 65504 (OK) + to uint32: 65504 (OK) + to uint64: 65504 (OK) +from single: f32(0x1.ffc20000000000000000p+15:0x477fe100) + to double: f64(0x1.ffc20000000000000000p+15:0x0040effc2000000000) (INEXACT ) + to int32: 65505 (OK) + to int64: 65505 (OK) + to uint32: 65505 (OK) + to uint64: 65505 (OK) +from single: f32(0x1.ffbf0000000000000000p+16:0x47ffdf80) + to double: f64(0x1.ffbf0000000000000000p+16:0x0040fffbf000000000) (INEXACT ) + to int32: 131007 (OK) + to int64: 131007 (OK) + to uint32: 131007 (OK) + to uint64: 131007 (OK) +from single: f32(0x1.ffc00000000000000000p+16:0x47ffe000) + to double: f64(0x1.ffc00000000000000000p+16:0x0040fffc0000000000) (INEXACT ) + to int32: 131008 (OK) + to int64: 131008 (OK) + to uint32: 131008 (OK) + to uint64: 131008 (OK) +from single: f32(0x1.ffc10000000000000000p+16:0x47ffe080) + to double: f64(0x1.ffc10000000000000000p+16:0x0040fffc1000000000) (INEXACT ) + to int32: 131009 (OK) + to int64: 131009 (OK) + to uint32: 131009 (OK) + to uint64: 131009 (OK) +from single: f32(0x1.c0bab600000000000000p+99:0x71605d5b) + to double: f64(0x1.c0bab600000000000000p+99:0x00462c0bab60000000) (INEXACT ) + to int32: 2147483647 (INVALID) + to int64: 9223372036854775807 (INVALID) + to uint32: -1 (INVALID) + to uint64: -1 (INVALID) +from single: f32(0x1.fffffe00000000000000p+127:0x7f7fffff) + to double: f64(0x1.fffffe00000000000000p+127:0x0047efffffe0000000) (INEXACT ) + to int32: 2147483647 (INVALID) + to int64: 9223372036854775807 (INVALID) + to uint32: -1 (INVALID) + to uint64: -1 (INVALID) +from single: f32(inf:0x7f800000) + to double: f64(inf:0x007ff0000000000000) (OK) + to int32: 2147483647 (INVALID) + to int64: 9223372036854775807 (INVALID) + to uint32: -1 (INVALID) + to uint64: -1 (INVALID) +from single: f32(-nan:0x7fc00000) + to double: f64(-nan:0x00ffffffffffffffff) (OK) + to int32: -1 (INVALID) + to int64: -1 (INVALID) + to uint32: -1 (INVALID) + to uint64: -1 (INVALID) +from single: f32(-nan:0x7fa00000) + to double: f64(-nan:0x00ffffffffffffffff) (INVALID) + to int32: -1 (INVALID) + to int64: -1 (INVALID) + to uint32: -1 (INVALID) + to uint64: -1 (INVALID) +### Rounding upwards +from single: f32(-nan:0xffa00000) + to double: f64(-nan:0x00ffffffffffffffff) (INVALID) + to int32: -1 (INVALID) + to int64: -1 (INVALID) + to uint32: -1 (INVALID) + to uint64: -1 (INVALID) +from single: f32(-nan:0xffc00000) + to double: f64(-nan:0x00ffffffffffffffff) (OK) + to int32: -1 (INVALID) + to int64: -1 (INVALID) + to uint32: -1 (INVALID) + to uint64: -1 (INVALID) +from single: f32(-inf:0xff800000) + to double: f64(-inf:0x00fff0000000000000) (OK) + to int32: -2147483648 (INVALID) + to int64: -9223372036854775808 (INVALID) + to uint32: 0 (INVALID) + to uint64: 0 (INVALID) +from single: f32(-0x1.fffffe00000000000000p+127:0xff7fffff) + to double: f64(-0x1.fffffe00000000000000p+127:0x00c7efffffe0000000) (INEXACT ) + to int32: -2147483648 (INVALID) + to int64: -9223372036854775808 (INVALID) + to uint32: 0 (INVALID) + to uint64: 0 (INVALID) +from single: f32(-0x1.1874b200000000000000p+103:0xf30c3a59) + to double: f64(-0x1.1874b200000000000000p+103:0x00c661874b20000000) (INEXACT ) + to int32: -2147483648 (INVALID) + to int64: -9223372036854775808 (INVALID) + to uint32: 0 (INVALID) + to uint64: 0 (INVALID) +from single: f32(-0x1.c0bab600000000000000p+99:0xf1605d5b) + to double: f64(-0x1.c0bab600000000000000p+99:0x00c62c0bab60000000) (INEXACT ) + to int32: -2147483648 (INVALID) + to int64: -9223372036854775808 (INVALID) + to uint32: 0 (INVALID) + to uint64: 0 (INVALID) +from single: f32(-0x1.31f75000000000000000p-40:0xab98fba8) + to double: f64(-0x1.31f75000000000000000p-40:0x00bd731f7500000000) (INEXACT ) + to int32: 0 (INEXACT ) + to int64: 0 (INEXACT ) + to uint32: 0 (INVALID) + to uint64: 0 (INVALID) +from single: f32(-0x1.50544400000000000000p-66:0x9ea82a22) + to double: f64(-0x1.50544400000000000000p-66:0x00bbd5054440000000) (INEXACT ) + to int32: 0 (INEXACT ) + to int64: 0 (INEXACT ) + to uint32: 0 (INVALID) + to uint64: 0 (INVALID) +from single: f32(-0x1.00000000000000000000p-126:0x80800000) + to double: f64(-0x1.00000000000000000000p-126:0x00b810000000000000) (OK) + to int32: 0 (INEXACT ) + to int64: 0 (INEXACT ) + to uint32: 0 (INVALID) + to uint64: 0 (INVALID) +from single: f32(0x0.00000000000000000000p+0:0000000000) + to double: f64(0x0.00000000000000000000p+0:00000000000000000000) (OK) + to int32: 0 (OK) + to int64: 0 (OK) + to uint32: 0 (OK) + to uint64: 0 (OK) +from single: f32(0x1.00000000000000000000p-126:0x00800000) + to double: f64(0x1.00000000000000000000p-126:0x003810000000000000) (OK) + to int32: 0 (INEXACT ) + to int64: 0 (INEXACT ) + to uint32: 0 (INEXACT ) + to uint64: 0 (INEXACT ) +from single: f32(0x1.00000000000000000000p-25:0x33000000) + to double: f64(0x1.00000000000000000000p-25:0x003e60000000000000) (OK) + to int32: 0 (INEXACT ) + to int64: 0 (INEXACT ) + to uint32: 0 (INEXACT ) + to uint64: 0 (INEXACT ) +from single: f32(0x1.ffffe600000000000000p-25:0x337ffff3) + to double: f64(0x1.ffffe600000000000000p-25:0x003e6ffffe60000000) (INEXACT ) + to int32: 0 (INEXACT ) + to int64: 0 (INEXACT ) + to uint32: 0 (INEXACT ) + to uint64: 0 (INEXACT ) +from single: f32(0x1.ff801a00000000000000p-15:0x387fc00d) + to double: f64(0x1.ff801a00000000000000p-15:0x003f0ff801a0000000) (INEXACT ) + to int32: 0 (INEXACT ) + to int64: 0 (INEXACT ) + to uint32: 0 (INEXACT ) + to uint64: 0 (INEXACT ) +from single: f32(0x1.00000c00000000000000p-14:0x38800006) + to double: f64(0x1.00000c00000000000000p-14:0x003f100000c0000000) (INEXACT ) + to int32: 0 (INEXACT ) + to int64: 0 (INEXACT ) + to uint32: 0 (INEXACT ) + to uint64: 0 (INEXACT ) +from single: f32(0x1.00000000000000000000p+0:0x3f800000) + to double: f64(0x1.00000000000000000000p+0:0x003ff0000000000000) (OK) + to int32: 1 (OK) + to int64: 1 (OK) + to uint32: 1 (OK) + to uint64: 1 (OK) +from single: f32(0x1.00400000000000000000p+0:0x3f802000) + to double: f64(0x1.00400000000000000000p+0:0x003ff0040000000000) (INEXACT ) + to int32: 1 (INEXACT ) + to int64: 1 (INEXACT ) + to uint32: 1 (INEXACT ) + to uint64: 1 (INEXACT ) +from single: f32(0x1.00000000000000000000p+1:0x40000000) + to double: f64(0x1.00000000000000000000p+1:0x004000000000000000) (OK) + to int32: 2 (OK) + to int64: 2 (OK) + to uint32: 2 (OK) + to uint64: 2 (OK) +from single: f32(0x1.5bf0a800000000000000p+1:0x402df854) + to double: f64(0x1.5bf0a800000000000000p+1:0x004005bf0a80000000) (INEXACT ) + to int32: 2 (INEXACT ) + to int64: 2 (INEXACT ) + to uint32: 2 (INEXACT ) + to uint64: 2 (INEXACT ) +from single: f32(0x1.921fb600000000000000p+1:0x40490fdb) + to double: f64(0x1.921fb600000000000000p+1:0x00400921fb60000000) (INEXACT ) + to int32: 3 (INEXACT ) + to int64: 3 (INEXACT ) + to uint32: 3 (INEXACT ) + to uint64: 3 (INEXACT ) +from single: f32(0x1.ffbe0000000000000000p+15:0x477fdf00) + to double: f64(0x1.ffbe0000000000000000p+15:0x0040effbe000000000) (INEXACT ) + to int32: 65503 (OK) + to int64: 65503 (OK) + to uint32: 65503 (OK) + to uint64: 65503 (OK) +from single: f32(0x1.ffc00000000000000000p+15:0x477fe000) + to double: f64(0x1.ffc00000000000000000p+15:0x0040effc0000000000) (INEXACT ) + to int32: 65504 (OK) + to int64: 65504 (OK) + to uint32: 65504 (OK) + to uint64: 65504 (OK) +from single: f32(0x1.ffc20000000000000000p+15:0x477fe100) + to double: f64(0x1.ffc20000000000000000p+15:0x0040effc2000000000) (INEXACT ) + to int32: 65505 (OK) + to int64: 65505 (OK) + to uint32: 65505 (OK) + to uint64: 65505 (OK) +from single: f32(0x1.ffbf0000000000000000p+16:0x47ffdf80) + to double: f64(0x1.ffbf0000000000000000p+16:0x0040fffbf000000000) (INEXACT ) + to int32: 131007 (OK) + to int64: 131007 (OK) + to uint32: 131007 (OK) + to uint64: 131007 (OK) +from single: f32(0x1.ffc00000000000000000p+16:0x47ffe000) + to double: f64(0x1.ffc00000000000000000p+16:0x0040fffc0000000000) (INEXACT ) + to int32: 131008 (OK) + to int64: 131008 (OK) + to uint32: 131008 (OK) + to uint64: 131008 (OK) +from single: f32(0x1.ffc10000000000000000p+16:0x47ffe080) + to double: f64(0x1.ffc10000000000000000p+16:0x0040fffc1000000000) (INEXACT ) + to int32: 131009 (OK) + to int64: 131009 (OK) + to uint32: 131009 (OK) + to uint64: 131009 (OK) +from single: f32(0x1.c0bab600000000000000p+99:0x71605d5b) + to double: f64(0x1.c0bab600000000000000p+99:0x00462c0bab60000000) (INEXACT ) + to int32: 2147483647 (INVALID) + to int64: 9223372036854775807 (INVALID) + to uint32: -1 (INVALID) + to uint64: -1 (INVALID) +from single: f32(0x1.fffffe00000000000000p+127:0x7f7fffff) + to double: f64(0x1.fffffe00000000000000p+127:0x0047efffffe0000000) (INEXACT ) + to int32: 2147483647 (INVALID) + to int64: 9223372036854775807 (INVALID) + to uint32: -1 (INVALID) + to uint64: -1 (INVALID) +from single: f32(inf:0x7f800000) + to double: f64(inf:0x007ff0000000000000) (OK) + to int32: 2147483647 (INVALID) + to int64: 9223372036854775807 (INVALID) + to uint32: -1 (INVALID) + to uint64: -1 (INVALID) +from single: f32(-nan:0x7fc00000) + to double: f64(-nan:0x00ffffffffffffffff) (OK) + to int32: -1 (INVALID) + to int64: -1 (INVALID) + to uint32: -1 (INVALID) + to uint64: -1 (INVALID) +from single: f32(-nan:0x7fa00000) + to double: f64(-nan:0x00ffffffffffffffff) (INVALID) + to int32: -1 (INVALID) + to int64: -1 (INVALID) + to uint32: -1 (INVALID) + to uint64: -1 (INVALID) +### Rounding downwards +from single: f32(-nan:0xffa00000) + to double: f64(-nan:0x00ffffffffffffffff) (INVALID) + to int32: -1 (INVALID) + to int64: -1 (INVALID) + to uint32: -1 (INVALID) + to uint64: -1 (INVALID) +from single: f32(-nan:0xffc00000) + to double: f64(-nan:0x00ffffffffffffffff) (OK) + to int32: -1 (INVALID) + to int64: -1 (INVALID) + to uint32: -1 (INVALID) + to uint64: -1 (INVALID) +from single: f32(-inf:0xff800000) + to double: f64(-inf:0x00fff0000000000000) (OK) + to int32: -2147483648 (INVALID) + to int64: -9223372036854775808 (INVALID) + to uint32: 0 (INVALID) + to uint64: 0 (INVALID) +from single: f32(-0x1.fffffe00000000000000p+127:0xff7fffff) + to double: f64(-0x1.fffffe00000000000000p+127:0x00c7efffffe0000000) (INEXACT ) + to int32: -2147483648 (INVALID) + to int64: -9223372036854775808 (INVALID) + to uint32: 0 (INVALID) + to uint64: 0 (INVALID) +from single: f32(-0x1.1874b200000000000000p+103:0xf30c3a59) + to double: f64(-0x1.1874b200000000000000p+103:0x00c661874b20000000) (INEXACT ) + to int32: -2147483648 (INVALID) + to int64: -9223372036854775808 (INVALID) + to uint32: 0 (INVALID) + to uint64: 0 (INVALID) +from single: f32(-0x1.c0bab600000000000000p+99:0xf1605d5b) + to double: f64(-0x1.c0bab600000000000000p+99:0x00c62c0bab60000000) (INEXACT ) + to int32: -2147483648 (INVALID) + to int64: -9223372036854775808 (INVALID) + to uint32: 0 (INVALID) + to uint64: 0 (INVALID) +from single: f32(-0x1.31f75000000000000000p-40:0xab98fba8) + to double: f64(-0x1.31f75000000000000000p-40:0x00bd731f7500000000) (INEXACT ) + to int32: 0 (INEXACT ) + to int64: 0 (INEXACT ) + to uint32: 0 (INVALID) + to uint64: 0 (INVALID) +from single: f32(-0x1.50544400000000000000p-66:0x9ea82a22) + to double: f64(-0x1.50544400000000000000p-66:0x00bbd5054440000000) (INEXACT ) + to int32: 0 (INEXACT ) + to int64: 0 (INEXACT ) + to uint32: 0 (INVALID) + to uint64: 0 (INVALID) +from single: f32(-0x1.00000000000000000000p-126:0x80800000) + to double: f64(-0x1.00000000000000000000p-126:0x00b810000000000000) (OK) + to int32: 0 (INEXACT ) + to int64: 0 (INEXACT ) + to uint32: 0 (INVALID) + to uint64: 0 (INVALID) +from single: f32(0x0.00000000000000000000p+0:0000000000) + to double: f64(0x0.00000000000000000000p+0:00000000000000000000) (OK) + to int32: 0 (OK) + to int64: 0 (OK) + to uint32: 0 (OK) + to uint64: 0 (OK) +from single: f32(0x1.00000000000000000000p-126:0x00800000) + to double: f64(0x1.00000000000000000000p-126:0x003810000000000000) (OK) + to int32: 0 (INEXACT ) + to int64: 0 (INEXACT ) + to uint32: 0 (INEXACT ) + to uint64: 0 (INEXACT ) +from single: f32(0x1.00000000000000000000p-25:0x33000000) + to double: f64(0x1.00000000000000000000p-25:0x003e60000000000000) (OK) + to int32: 0 (INEXACT ) + to int64: 0 (INEXACT ) + to uint32: 0 (INEXACT ) + to uint64: 0 (INEXACT ) +from single: f32(0x1.ffffe600000000000000p-25:0x337ffff3) + to double: f64(0x1.ffffe600000000000000p-25:0x003e6ffffe60000000) (INEXACT ) + to int32: 0 (INEXACT ) + to int64: 0 (INEXACT ) + to uint32: 0 (INEXACT ) + to uint64: 0 (INEXACT ) +from single: f32(0x1.ff801a00000000000000p-15:0x387fc00d) + to double: f64(0x1.ff801a00000000000000p-15:0x003f0ff801a0000000) (INEXACT ) + to int32: 0 (INEXACT ) + to int64: 0 (INEXACT ) + to uint32: 0 (INEXACT ) + to uint64: 0 (INEXACT ) +from single: f32(0x1.00000c00000000000000p-14:0x38800006) + to double: f64(0x1.00000c00000000000000p-14:0x003f100000c0000000) (INEXACT ) + to int32: 0 (INEXACT ) + to int64: 0 (INEXACT ) + to uint32: 0 (INEXACT ) + to uint64: 0 (INEXACT ) +from single: f32(0x1.00000000000000000000p+0:0x3f800000) + to double: f64(0x1.00000000000000000000p+0:0x003ff0000000000000) (OK) + to int32: 1 (OK) + to int64: 1 (OK) + to uint32: 1 (OK) + to uint64: 1 (OK) +from single: f32(0x1.00400000000000000000p+0:0x3f802000) + to double: f64(0x1.00400000000000000000p+0:0x003ff0040000000000) (INEXACT ) + to int32: 1 (INEXACT ) + to int64: 1 (INEXACT ) + to uint32: 1 (INEXACT ) + to uint64: 1 (INEXACT ) +from single: f32(0x1.00000000000000000000p+1:0x40000000) + to double: f64(0x1.00000000000000000000p+1:0x004000000000000000) (OK) + to int32: 2 (OK) + to int64: 2 (OK) + to uint32: 2 (OK) + to uint64: 2 (OK) +from single: f32(0x1.5bf0a800000000000000p+1:0x402df854) + to double: f64(0x1.5bf0a800000000000000p+1:0x004005bf0a80000000) (INEXACT ) + to int32: 2 (INEXACT ) + to int64: 2 (INEXACT ) + to uint32: 2 (INEXACT ) + to uint64: 2 (INEXACT ) +from single: f32(0x1.921fb600000000000000p+1:0x40490fdb) + to double: f64(0x1.921fb600000000000000p+1:0x00400921fb60000000) (INEXACT ) + to int32: 3 (INEXACT ) + to int64: 3 (INEXACT ) + to uint32: 3 (INEXACT ) + to uint64: 3 (INEXACT ) +from single: f32(0x1.ffbe0000000000000000p+15:0x477fdf00) + to double: f64(0x1.ffbe0000000000000000p+15:0x0040effbe000000000) (INEXACT ) + to int32: 65503 (OK) + to int64: 65503 (OK) + to uint32: 65503 (OK) + to uint64: 65503 (OK) +from single: f32(0x1.ffc00000000000000000p+15:0x477fe000) + to double: f64(0x1.ffc00000000000000000p+15:0x0040effc0000000000) (INEXACT ) + to int32: 65504 (OK) + to int64: 65504 (OK) + to uint32: 65504 (OK) + to uint64: 65504 (OK) +from single: f32(0x1.ffc20000000000000000p+15:0x477fe100) + to double: f64(0x1.ffc20000000000000000p+15:0x0040effc2000000000) (INEXACT ) + to int32: 65505 (OK) + to int64: 65505 (OK) + to uint32: 65505 (OK) + to uint64: 65505 (OK) +from single: f32(0x1.ffbf0000000000000000p+16:0x47ffdf80) + to double: f64(0x1.ffbf0000000000000000p+16:0x0040fffbf000000000) (INEXACT ) + to int32: 131007 (OK) + to int64: 131007 (OK) + to uint32: 131007 (OK) + to uint64: 131007 (OK) +from single: f32(0x1.ffc00000000000000000p+16:0x47ffe000) + to double: f64(0x1.ffc00000000000000000p+16:0x0040fffc0000000000) (INEXACT ) + to int32: 131008 (OK) + to int64: 131008 (OK) + to uint32: 131008 (OK) + to uint64: 131008 (OK) +from single: f32(0x1.ffc10000000000000000p+16:0x47ffe080) + to double: f64(0x1.ffc10000000000000000p+16:0x0040fffc1000000000) (INEXACT ) + to int32: 131009 (OK) + to int64: 131009 (OK) + to uint32: 131009 (OK) + to uint64: 131009 (OK) +from single: f32(0x1.c0bab600000000000000p+99:0x71605d5b) + to double: f64(0x1.c0bab600000000000000p+99:0x00462c0bab60000000) (INEXACT ) + to int32: 2147483647 (INVALID) + to int64: 9223372036854775807 (INVALID) + to uint32: -1 (INVALID) + to uint64: -1 (INVALID) +from single: f32(0x1.fffffe00000000000000p+127:0x7f7fffff) + to double: f64(0x1.fffffe00000000000000p+127:0x0047efffffe0000000) (INEXACT ) + to int32: 2147483647 (INVALID) + to int64: 9223372036854775807 (INVALID) + to uint32: -1 (INVALID) + to uint64: -1 (INVALID) +from single: f32(inf:0x7f800000) + to double: f64(inf:0x007ff0000000000000) (OK) + to int32: 2147483647 (INVALID) + to int64: 9223372036854775807 (INVALID) + to uint32: -1 (INVALID) + to uint64: -1 (INVALID) +from single: f32(-nan:0x7fc00000) + to double: f64(-nan:0x00ffffffffffffffff) (OK) + to int32: -1 (INVALID) + to int64: -1 (INVALID) + to uint32: -1 (INVALID) + to uint64: -1 (INVALID) +from single: f32(-nan:0x7fa00000) + to double: f64(-nan:0x00ffffffffffffffff) (INVALID) + to int32: -1 (INVALID) + to int64: -1 (INVALID) + to uint32: -1 (INVALID) + to uint64: -1 (INVALID) +### Rounding to zero +from single: f32(-nan:0xffa00000) + to double: f64(-nan:0x00ffffffffffffffff) (INVALID) + to int32: -1 (INVALID) + to int64: -1 (INVALID) + to uint32: -1 (INVALID) + to uint64: -1 (INVALID) +from single: f32(-nan:0xffc00000) + to double: f64(-nan:0x00ffffffffffffffff) (OK) + to int32: -1 (INVALID) + to int64: -1 (INVALID) + to uint32: -1 (INVALID) + to uint64: -1 (INVALID) +from single: f32(-inf:0xff800000) + to double: f64(-inf:0x00fff0000000000000) (OK) + to int32: -2147483648 (INVALID) + to int64: -9223372036854775808 (INVALID) + to uint32: 0 (INVALID) + to uint64: 0 (INVALID) +from single: f32(-0x1.fffffe00000000000000p+127:0xff7fffff) + to double: f64(-0x1.fffffe00000000000000p+127:0x00c7efffffe0000000) (INEXACT ) + to int32: -2147483648 (INVALID) + to int64: -9223372036854775808 (INVALID) + to uint32: 0 (INVALID) + to uint64: 0 (INVALID) +from single: f32(-0x1.1874b200000000000000p+103:0xf30c3a59) + to double: f64(-0x1.1874b200000000000000p+103:0x00c661874b20000000) (INEXACT ) + to int32: -2147483648 (INVALID) + to int64: -9223372036854775808 (INVALID) + to uint32: 0 (INVALID) + to uint64: 0 (INVALID) +from single: f32(-0x1.c0bab600000000000000p+99:0xf1605d5b) + to double: f64(-0x1.c0bab600000000000000p+99:0x00c62c0bab60000000) (INEXACT ) + to int32: -2147483648 (INVALID) + to int64: -9223372036854775808 (INVALID) + to uint32: 0 (INVALID) + to uint64: 0 (INVALID) +from single: f32(-0x1.31f75000000000000000p-40:0xab98fba8) + to double: f64(-0x1.31f75000000000000000p-40:0x00bd731f7500000000) (INEXACT ) + to int32: 0 (INEXACT ) + to int64: 0 (INEXACT ) + to uint32: 0 (INVALID) + to uint64: 0 (INVALID) +from single: f32(-0x1.50544400000000000000p-66:0x9ea82a22) + to double: f64(-0x1.50544400000000000000p-66:0x00bbd5054440000000) (INEXACT ) + to int32: 0 (INEXACT ) + to int64: 0 (INEXACT ) + to uint32: 0 (INVALID) + to uint64: 0 (INVALID) +from single: f32(-0x1.00000000000000000000p-126:0x80800000) + to double: f64(-0x1.00000000000000000000p-126:0x00b810000000000000) (OK) + to int32: 0 (INEXACT ) + to int64: 0 (INEXACT ) + to uint32: 0 (INVALID) + to uint64: 0 (INVALID) +from single: f32(0x0.00000000000000000000p+0:0000000000) + to double: f64(0x0.00000000000000000000p+0:00000000000000000000) (OK) + to int32: 0 (OK) + to int64: 0 (OK) + to uint32: 0 (OK) + to uint64: 0 (OK) +from single: f32(0x1.00000000000000000000p-126:0x00800000) + to double: f64(0x1.00000000000000000000p-126:0x003810000000000000) (OK) + to int32: 0 (INEXACT ) + to int64: 0 (INEXACT ) + to uint32: 0 (INEXACT ) + to uint64: 0 (INEXACT ) +from single: f32(0x1.00000000000000000000p-25:0x33000000) + to double: f64(0x1.00000000000000000000p-25:0x003e60000000000000) (OK) + to int32: 0 (INEXACT ) + to int64: 0 (INEXACT ) + to uint32: 0 (INEXACT ) + to uint64: 0 (INEXACT ) +from single: f32(0x1.ffffe600000000000000p-25:0x337ffff3) + to double: f64(0x1.ffffe600000000000000p-25:0x003e6ffffe60000000) (INEXACT ) + to int32: 0 (INEXACT ) + to int64: 0 (INEXACT ) + to uint32: 0 (INEXACT ) + to uint64: 0 (INEXACT ) +from single: f32(0x1.ff801a00000000000000p-15:0x387fc00d) + to double: f64(0x1.ff801a00000000000000p-15:0x003f0ff801a0000000) (INEXACT ) + to int32: 0 (INEXACT ) + to int64: 0 (INEXACT ) + to uint32: 0 (INEXACT ) + to uint64: 0 (INEXACT ) +from single: f32(0x1.00000c00000000000000p-14:0x38800006) + to double: f64(0x1.00000c00000000000000p-14:0x003f100000c0000000) (INEXACT ) + to int32: 0 (INEXACT ) + to int64: 0 (INEXACT ) + to uint32: 0 (INEXACT ) + to uint64: 0 (INEXACT ) +from single: f32(0x1.00000000000000000000p+0:0x3f800000) + to double: f64(0x1.00000000000000000000p+0:0x003ff0000000000000) (OK) + to int32: 1 (OK) + to int64: 1 (OK) + to uint32: 1 (OK) + to uint64: 1 (OK) +from single: f32(0x1.00400000000000000000p+0:0x3f802000) + to double: f64(0x1.00400000000000000000p+0:0x003ff0040000000000) (INEXACT ) + to int32: 1 (INEXACT ) + to int64: 1 (INEXACT ) + to uint32: 1 (INEXACT ) + to uint64: 1 (INEXACT ) +from single: f32(0x1.00000000000000000000p+1:0x40000000) + to double: f64(0x1.00000000000000000000p+1:0x004000000000000000) (OK) + to int32: 2 (OK) + to int64: 2 (OK) + to uint32: 2 (OK) + to uint64: 2 (OK) +from single: f32(0x1.5bf0a800000000000000p+1:0x402df854) + to double: f64(0x1.5bf0a800000000000000p+1:0x004005bf0a80000000) (INEXACT ) + to int32: 2 (INEXACT ) + to int64: 2 (INEXACT ) + to uint32: 2 (INEXACT ) + to uint64: 2 (INEXACT ) +from single: f32(0x1.921fb600000000000000p+1:0x40490fdb) + to double: f64(0x1.921fb600000000000000p+1:0x00400921fb60000000) (INEXACT ) + to int32: 3 (INEXACT ) + to int64: 3 (INEXACT ) + to uint32: 3 (INEXACT ) + to uint64: 3 (INEXACT ) +from single: f32(0x1.ffbe0000000000000000p+15:0x477fdf00) + to double: f64(0x1.ffbe0000000000000000p+15:0x0040effbe000000000) (INEXACT ) + to int32: 65503 (OK) + to int64: 65503 (OK) + to uint32: 65503 (OK) + to uint64: 65503 (OK) +from single: f32(0x1.ffc00000000000000000p+15:0x477fe000) + to double: f64(0x1.ffc00000000000000000p+15:0x0040effc0000000000) (INEXACT ) + to int32: 65504 (OK) + to int64: 65504 (OK) + to uint32: 65504 (OK) + to uint64: 65504 (OK) +from single: f32(0x1.ffc20000000000000000p+15:0x477fe100) + to double: f64(0x1.ffc20000000000000000p+15:0x0040effc2000000000) (INEXACT ) + to int32: 65505 (OK) + to int64: 65505 (OK) + to uint32: 65505 (OK) + to uint64: 65505 (OK) +from single: f32(0x1.ffbf0000000000000000p+16:0x47ffdf80) + to double: f64(0x1.ffbf0000000000000000p+16:0x0040fffbf000000000) (INEXACT ) + to int32: 131007 (OK) + to int64: 131007 (OK) + to uint32: 131007 (OK) + to uint64: 131007 (OK) +from single: f32(0x1.ffc00000000000000000p+16:0x47ffe000) + to double: f64(0x1.ffc00000000000000000p+16:0x0040fffc0000000000) (INEXACT ) + to int32: 131008 (OK) + to int64: 131008 (OK) + to uint32: 131008 (OK) + to uint64: 131008 (OK) +from single: f32(0x1.ffc10000000000000000p+16:0x47ffe080) + to double: f64(0x1.ffc10000000000000000p+16:0x0040fffc1000000000) (INEXACT ) + to int32: 131009 (OK) + to int64: 131009 (OK) + to uint32: 131009 (OK) + to uint64: 131009 (OK) +from single: f32(0x1.c0bab600000000000000p+99:0x71605d5b) + to double: f64(0x1.c0bab600000000000000p+99:0x00462c0bab60000000) (INEXACT ) + to int32: 2147483647 (INVALID) + to int64: 9223372036854775807 (INVALID) + to uint32: -1 (INVALID) + to uint64: -1 (INVALID) +from single: f32(0x1.fffffe00000000000000p+127:0x7f7fffff) + to double: f64(0x1.fffffe00000000000000p+127:0x0047efffffe0000000) (INEXACT ) + to int32: 2147483647 (INVALID) + to int64: 9223372036854775807 (INVALID) + to uint32: -1 (INVALID) + to uint64: -1 (INVALID) +from single: f32(inf:0x7f800000) + to double: f64(inf:0x007ff0000000000000) (OK) + to int32: 2147483647 (INVALID) + to int64: 9223372036854775807 (INVALID) + to uint32: -1 (INVALID) + to uint64: -1 (INVALID) +from single: f32(-nan:0x7fc00000) + to double: f64(-nan:0x00ffffffffffffffff) (OK) + to int32: -1 (INVALID) + to int64: -1 (INVALID) + to uint32: -1 (INVALID) + to uint64: -1 (INVALID) +from single: f32(-nan:0x7fa00000) + to double: f64(-nan:0x00ffffffffffffffff) (INVALID) + to int32: -1 (INVALID) + to int64: -1 (INVALID) + to uint32: -1 (INVALID) + to uint64: -1 (INVALID) diff --git a/tests/tcg/hexagon/float_madds.ref b/tests/tcg/hexagon/float_madds.ref new file mode 100644 index 000000000..ceed3bbbf --- /dev/null +++ b/tests/tcg/hexagon/float_madds.ref @@ -0,0 +1,768 @@ +### Rounding to nearest +op : f32(-nan:0xffa00000) * f32(-nan:0xffc00000) + f32(-inf:0xff800000) +res: f32(-nan:0xffffffff) flags=INVALID (0/0) +op : f32(-nan:0xffc00000) * f32(-inf:0xff800000) + f32(-nan:0xffa00000) +res: f32(-nan:0xffffffff) flags=INVALID (0/1) +op : f32(-inf:0xff800000) * f32(-nan:0xffa00000) + f32(-nan:0xffc00000) +res: f32(-nan:0xffffffff) flags=INVALID (0/2) +op : f32(-nan:0xffc00000) * f32(-inf:0xff800000) + f32(-0x1.fffffe00000000000000p+127:0xff7fffff) +res: f32(-nan:0xffffffff) flags=OK (1/0) +op : f32(-inf:0xff800000) * f32(-0x1.fffffe00000000000000p+127:0xff7fffff) + f32(-nan:0xffc00000) +res: f32(-nan:0xffffffff) flags=OK (1/1) +op : f32(-0x1.fffffe00000000000000p+127:0xff7fffff) * f32(-nan:0xffc00000) + f32(-inf:0xff800000) +res: f32(-nan:0xffffffff) flags=OK (1/2) +op : f32(-inf:0xff800000) * f32(-0x1.fffffe00000000000000p+127:0xff7fffff) + f32(-0x1.1874b200000000000000p+103:0xf30c3a59) +res: f32(inf:0x7f800000) flags=OK (2/0) +op : f32(-0x1.fffffe00000000000000p+127:0xff7fffff) * f32(-0x1.1874b200000000000000p+103:0xf30c3a59) + f32(-inf:0xff800000) +res: f32(-inf:0xff800000) flags=OK (2/1) +op : f32(-0x1.1874b200000000000000p+103:0xf30c3a59) * f32(-inf:0xff800000) + f32(-0x1.fffffe00000000000000p+127:0xff7fffff) +res: f32(inf:0x7f800000) flags=OK (2/2) +op : f32(-0x1.fffffe00000000000000p+127:0xff7fffff) * f32(-0x1.1874b200000000000000p+103:0xf30c3a59) + f32(-0x1.c0bab600000000000000p+99:0xf1605d5b) +res: f32(inf:0x7f800000) flags=OVERFLOW INEXACT (3/0) +op : f32(-0x1.1874b200000000000000p+103:0xf30c3a59) * f32(-0x1.c0bab600000000000000p+99:0xf1605d5b) + f32(-0x1.fffffe00000000000000p+127:0xff7fffff) +res: f32(inf:0x7f800000) flags=OVERFLOW INEXACT (3/1) +op : f32(-0x1.c0bab600000000000000p+99:0xf1605d5b) * f32(-0x1.fffffe00000000000000p+127:0xff7fffff) + f32(-0x1.1874b200000000000000p+103:0xf30c3a59) +res: f32(inf:0x7f800000) flags=OVERFLOW INEXACT (3/2) +op : f32(-0x1.1874b200000000000000p+103:0xf30c3a59) * f32(-0x1.c0bab600000000000000p+99:0xf1605d5b) + f32(-0x1.31f75000000000000000p-40:0xab98fba8) +res: f32(inf:0x7f800000) flags=OVERFLOW INEXACT (4/0) +op : f32(-0x1.c0bab600000000000000p+99:0xf1605d5b) * f32(-0x1.31f75000000000000000p-40:0xab98fba8) + f32(-0x1.1874b200000000000000p+103:0xf30c3a59) +res: f32(-0x1.1874b200000000000000p+103:0xf30c3a59) flags=INEXACT (4/1) +op : f32(-0x1.31f75000000000000000p-40:0xab98fba8) * f32(-0x1.1874b200000000000000p+103:0xf30c3a59) + f32(-0x1.c0bab600000000000000p+99:0xf1605d5b) +res: f32(-0x1.c0bab600000000000000p+99:0xf1605d5b) flags=INEXACT (4/2) +op : f32(-0x1.c0bab600000000000000p+99:0xf1605d5b) * f32(-0x1.31f75000000000000000p-40:0xab98fba8) + f32(-0x1.50544400000000000000p-66:0x9ea82a22) +res: f32(0x1.0c27fa00000000000000p+60:0x5d8613fd) flags=INEXACT (5/0) +op : f32(-0x1.31f75000000000000000p-40:0xab98fba8) * f32(-0x1.50544400000000000000p-66:0x9ea82a22) + f32(-0x1.c0bab600000000000000p+99:0xf1605d5b) +res: f32(-0x1.c0bab600000000000000p+99:0xf1605d5b) flags=INEXACT (5/1) +op : f32(-0x1.50544400000000000000p-66:0x9ea82a22) * f32(-0x1.c0bab600000000000000p+99:0xf1605d5b) + f32(-0x1.31f75000000000000000p-40:0xab98fba8) +res: f32(0x1.26c46200000000000000p+34:0x50936231) flags=INEXACT (5/2) +op : f32(-0x1.31f75000000000000000p-40:0xab98fba8) * f32(-0x1.50544400000000000000p-66:0x9ea82a22) + f32(-0x1.00000000000000000000p-126:0x80800000) +res: f32(0x1.91f94000000000000000p-106:0x0ac8fca0) flags=INEXACT (6/0) +op : f32(-0x1.50544400000000000000p-66:0x9ea82a22) * f32(-0x1.00000000000000000000p-126:0x80800000) + f32(-0x1.31f75000000000000000p-40:0xab98fba8) +res: f32(-0x1.31f75000000000000000p-40:0xab98fba8) flags=INEXACT (6/1) +op : f32(-0x1.00000000000000000000p-126:0x80800000) * f32(-0x1.31f75000000000000000p-40:0xab98fba8) + f32(-0x1.50544400000000000000p-66:0x9ea82a22) +res: f32(-0x1.50544400000000000000p-66:0x9ea82a22) flags=INEXACT (6/2) +op : f32(-0x1.50544400000000000000p-66:0x9ea82a22) * f32(-0x1.00000000000000000000p-126:0x80800000) + f32(0x0.00000000000000000000p+0:0000000000) +res: f32(0x0.00000000000000000000p+0:0000000000) flags=UNDERFLOW INEXACT (7/0) +op : f32(-0x1.00000000000000000000p-126:0x80800000) * f32(0x0.00000000000000000000p+0:0000000000) + f32(-0x1.50544400000000000000p-66:0x9ea82a22) +res: f32(-0x1.50544400000000000000p-66:0x9ea82a22) flags=INEXACT (7/1) +op : f32(0x0.00000000000000000000p+0:0000000000) * f32(-0x1.50544400000000000000p-66:0x9ea82a22) + f32(-0x1.00000000000000000000p-126:0x80800000) +res: f32(-0x1.00000000000000000000p-126:0x80800000) flags=OK (7/2) +op : f32(-0x1.00000000000000000000p-126:0x80800000) * f32(0x0.00000000000000000000p+0:0000000000) + f32(0x1.00000000000000000000p-126:0x00800000) +res: f32(0x1.00000000000000000000p-126:0x00800000) flags=OK (8/0) +op : f32(0x0.00000000000000000000p+0:0000000000) * f32(0x1.00000000000000000000p-126:0x00800000) + f32(-0x1.00000000000000000000p-126:0x80800000) +res: f32(-0x1.00000000000000000000p-126:0x80800000) flags=OK (8/1) +op : f32(0x1.00000000000000000000p-126:0x00800000) * f32(-0x1.00000000000000000000p-126:0x80800000) + f32(0x0.00000000000000000000p+0:0000000000) +res: f32(-0x0.00000000000000000000p+0:0x80000000) flags=UNDERFLOW INEXACT (8/2) +op : f32(0x0.00000000000000000000p+0:0000000000) * f32(0x1.00000000000000000000p-126:0x00800000) + f32(0x1.00000000000000000000p-25:0x33000000) +res: f32(0x1.00000000000000000000p-25:0x33000000) flags=OK (9/0) +op : f32(0x1.00000000000000000000p-126:0x00800000) * f32(0x1.00000000000000000000p-25:0x33000000) + f32(0x0.00000000000000000000p+0:0000000000) +res: f32(0x0.00000000000000000000p+0:0000000000) flags=UNDERFLOW INEXACT (9/1) +op : f32(0x1.00000000000000000000p-25:0x33000000) * f32(0x0.00000000000000000000p+0:0000000000) + f32(0x1.00000000000000000000p-126:0x00800000) +res: f32(0x1.00000000000000000000p-126:0x00800000) flags=OK (9/2) +op : f32(0x1.00000000000000000000p-126:0x00800000) * f32(0x1.00000000000000000000p-25:0x33000000) + f32(0x1.ffffe600000000000000p-25:0x337ffff3) +res: f32(0x1.ffffe600000000000000p-25:0x337ffff3) flags=INEXACT (10/0) +op : f32(0x1.00000000000000000000p-25:0x33000000) * f32(0x1.ffffe600000000000000p-25:0x337ffff3) + f32(0x1.00000000000000000000p-126:0x00800000) +res: f32(0x1.ffffe600000000000000p-50:0x26fffff3) flags=INEXACT (10/1) +op : f32(0x1.ffffe600000000000000p-25:0x337ffff3) * f32(0x1.00000000000000000000p-126:0x00800000) + f32(0x1.00000000000000000000p-25:0x33000000) +res: f32(0x1.00000000000000000000p-25:0x33000000) flags=INEXACT (10/2) +op : f32(0x1.00000000000000000000p-25:0x33000000) * f32(0x1.ffffe600000000000000p-25:0x337ffff3) + f32(0x1.ff801a00000000000000p-15:0x387fc00d) +res: f32(0x1.ff801a00000000000000p-15:0x387fc00d) flags=INEXACT (11/0) +op : f32(0x1.ffffe600000000000000p-25:0x337ffff3) * f32(0x1.ff801a00000000000000p-15:0x387fc00d) + f32(0x1.00000000000000000000p-25:0x33000000) +res: f32(0x1.0007fe00000000000000p-25:0x330003ff) flags=INEXACT (11/1) +op : f32(0x1.ff801a00000000000000p-15:0x387fc00d) * f32(0x1.00000000000000000000p-25:0x33000000) + f32(0x1.ffffe600000000000000p-25:0x337ffff3) +res: f32(0x1.0001f200000000000000p-24:0x338000f9) flags=INEXACT (11/2) +op : f32(0x1.ffffe600000000000000p-25:0x337ffff3) * f32(0x1.ff801a00000000000000p-15:0x387fc00d) + f32(0x1.00000c00000000000000p-14:0x38800006) +res: f32(0x1.00000c00000000000000p-14:0x38800006) flags=INEXACT (12/0) +op : f32(0x1.ff801a00000000000000p-15:0x387fc00d) * f32(0x1.00000c00000000000000p-14:0x38800006) + f32(0x1.ffffe600000000000000p-25:0x337ffff3) +res: f32(0x1.0ffbf400000000000000p-24:0x3387fdfa) flags=INEXACT (12/1) +op : f32(0x1.00000c00000000000000p-14:0x38800006) * f32(0x1.ffffe600000000000000p-25:0x337ffff3) + f32(0x1.ff801a00000000000000p-15:0x387fc00d) +res: f32(0x1.ff801c00000000000000p-15:0x387fc00e) flags=INEXACT (12/2) +op : f32(0x1.ff801a00000000000000p-15:0x387fc00d) * f32(0x1.00000c00000000000000p-14:0x38800006) + f32(0x1.00000000000000000000p+0:0x3f800000) +res: f32(0x1.00000000000000000000p+0:0x3f800000) flags=INEXACT (13/0) +op : f32(0x1.00000c00000000000000p-14:0x38800006) * f32(0x1.00000000000000000000p+0:0x3f800000) + f32(0x1.ff801a00000000000000p-15:0x387fc00d) +res: f32(0x1.ffc01800000000000000p-14:0x38ffe00c) flags=INEXACT (13/1) +op : f32(0x1.00000000000000000000p+0:0x3f800000) * f32(0x1.ff801a00000000000000p-15:0x387fc00d) + f32(0x1.00000c00000000000000p-14:0x38800006) +res: f32(0x1.ffc01800000000000000p-14:0x38ffe00c) flags=INEXACT (13/2) +op : f32(0x1.00000c00000000000000p-14:0x38800006) * f32(0x1.00000000000000000000p+0:0x3f800000) + f32(0x1.00400000000000000000p+0:0x3f802000) +res: f32(0x1.00440000000000000000p+0:0x3f802200) flags=INEXACT (14/0) +op : f32(0x1.00000000000000000000p+0:0x3f800000) * f32(0x1.00400000000000000000p+0:0x3f802000) + f32(0x1.00000c00000000000000p-14:0x38800006) +res: f32(0x1.00440000000000000000p+0:0x3f802200) flags=INEXACT (14/1) +op : f32(0x1.00400000000000000000p+0:0x3f802000) * f32(0x1.00000c00000000000000p-14:0x38800006) + f32(0x1.00000000000000000000p+0:0x3f800000) +res: f32(0x1.00040200000000000000p+0:0x3f800201) flags=INEXACT (14/2) +op : f32(0x1.00000000000000000000p+0:0x3f800000) * f32(0x1.00400000000000000000p+0:0x3f802000) + f32(0x1.00000000000000000000p+1:0x40000000) +res: f32(0x1.80200000000000000000p+1:0x40401000) flags=INEXACT (15/0) +op : f32(0x1.00400000000000000000p+0:0x3f802000) * f32(0x1.00000000000000000000p+1:0x40000000) + f32(0x1.00000000000000000000p+0:0x3f800000) +res: f32(0x1.80400000000000000000p+1:0x40402000) flags=INEXACT (15/1) +op : f32(0x1.00000000000000000000p+1:0x40000000) * f32(0x1.00000000000000000000p+0:0x3f800000) + f32(0x1.00400000000000000000p+0:0x3f802000) +res: f32(0x1.80200000000000000000p+1:0x40401000) flags=INEXACT (15/2) +op : f32(0x1.00400000000000000000p+0:0x3f802000) * f32(0x1.00000000000000000000p+1:0x40000000) + f32(0x1.5bf0a800000000000000p+1:0x402df854) +res: f32(0x1.2e185400000000000000p+2:0x40970c2a) flags=INEXACT (16/0) +op : f32(0x1.00000000000000000000p+1:0x40000000) * f32(0x1.5bf0a800000000000000p+1:0x402df854) + f32(0x1.00400000000000000000p+0:0x3f802000) +res: f32(0x1.9c00a800000000000000p+2:0x40ce0054) flags=INEXACT (16/1) +op : f32(0x1.5bf0a800000000000000p+1:0x402df854) * f32(0x1.00400000000000000000p+0:0x3f802000) + f32(0x1.00000000000000000000p+1:0x40000000) +res: f32(0x1.2e23d200000000000000p+2:0x409711e9) flags=INEXACT (16/2) +op : f32(0x1.00000000000000000000p+1:0x40000000) * f32(0x1.5bf0a800000000000000p+1:0x402df854) + f32(0x1.921fb600000000000000p+1:0x40490fdb) +res: f32(0x1.12804200000000000000p+3:0x41094021) flags=INEXACT (17/0) +op : f32(0x1.5bf0a800000000000000p+1:0x402df854) * f32(0x1.921fb600000000000000p+1:0x40490fdb) + f32(0x1.00000000000000000000p+1:0x40000000) +res: f32(0x1.51458000000000000000p+3:0x4128a2c0) flags=INEXACT (17/1) +op : f32(0x1.921fb600000000000000p+1:0x40490fdb) * f32(0x1.00000000000000000000p+1:0x40000000) + f32(0x1.5bf0a800000000000000p+1:0x402df854) +res: f32(0x1.200c0400000000000000p+3:0x41100602) flags=INEXACT (17/2) +op : f32(0x1.5bf0a800000000000000p+1:0x402df854) * f32(0x1.921fb600000000000000p+1:0x40490fdb) + f32(0x1.ffbe0000000000000000p+15:0x477fdf00) +res: f32(0x1.ffcf1400000000000000p+15:0x477fe78a) flags=INEXACT (18/0) +op : f32(0x1.921fb600000000000000p+1:0x40490fdb) * f32(0x1.ffbe0000000000000000p+15:0x477fdf00) + f32(0x1.5bf0a800000000000000p+1:0x402df854) +res: f32(0x1.91ed3c00000000000000p+17:0x4848f69e) flags=INEXACT (18/1) +op : f32(0x1.ffbe0000000000000000p+15:0x477fdf00) * f32(0x1.5bf0a800000000000000p+1:0x402df854) + f32(0x1.921fb600000000000000p+1:0x40490fdb) +res: f32(0x1.5bc56000000000000000p+17:0x482de2b0) flags=INEXACT (18/2) +op : f32(0x1.921fb600000000000000p+1:0x40490fdb) * f32(0x1.ffbe0000000000000000p+15:0x477fdf00) + f32(0x1.ffc00000000000000000p+15:0x477fe000) +res: f32(0x1.08edf000000000000000p+18:0x488476f8) flags=INEXACT (19/0) +op : f32(0x1.ffbe0000000000000000p+15:0x477fdf00) * f32(0x1.ffc00000000000000000p+15:0x477fe000) + f32(0x1.921fb600000000000000p+1:0x40490fdb) +res: f32(0x1.ff7e0800000000000000p+31:0x4f7fbf04) flags=INEXACT (19/1) +op : f32(0x1.ffc00000000000000000p+15:0x477fe000) * f32(0x1.921fb600000000000000p+1:0x40490fdb) + f32(0x1.ffbe0000000000000000p+15:0x477fdf00) +res: f32(0x1.08ee7a00000000000000p+18:0x4884773d) flags=INEXACT (19/2) +op : f32(0x1.ffbe0000000000000000p+15:0x477fdf00) * f32(0x1.ffc00000000000000000p+15:0x477fe000) + f32(0x1.ffc20000000000000000p+15:0x477fe100) +res: f32(0x1.ff800800000000000000p+31:0x4f7fc004) flags=INEXACT (20/0) +op : f32(0x1.ffc00000000000000000p+15:0x477fe000) * f32(0x1.ffc20000000000000000p+15:0x477fe100) + f32(0x1.ffbe0000000000000000p+15:0x477fdf00) +res: f32(0x1.ff840800000000000000p+31:0x4f7fc204) flags=INEXACT (20/1) +op : f32(0x1.ffc20000000000000000p+15:0x477fe100) * f32(0x1.ffbe0000000000000000p+15:0x477fdf00) + f32(0x1.ffc00000000000000000p+15:0x477fe000) +res: f32(0x1.ff820800000000000000p+31:0x4f7fc104) flags=INEXACT (20/2) +op : f32(0x1.ffc00000000000000000p+15:0x477fe000) * f32(0x1.ffc20000000000000000p+15:0x477fe100) + f32(0x1.ffbf0000000000000000p+16:0x47ffdf80) +res: f32(0x1.ff860800000000000000p+31:0x4f7fc304) flags=INEXACT (21/0) +op : f32(0x1.ffc20000000000000000p+15:0x477fe100) * f32(0x1.ffbf0000000000000000p+16:0x47ffdf80) + f32(0x1.ffc00000000000000000p+15:0x477fe000) +res: f32(0x1.ff820800000000000000p+32:0x4fffc104) flags=INEXACT (21/1) +op : f32(0x1.ffbf0000000000000000p+16:0x47ffdf80) * f32(0x1.ffc00000000000000000p+15:0x477fe000) + f32(0x1.ffc20000000000000000p+15:0x477fe100) +res: f32(0x1.ff800800000000000000p+32:0x4fffc004) flags=INEXACT (21/2) +op : f32(0x1.ffc20000000000000000p+15:0x477fe100) * f32(0x1.ffbf0000000000000000p+16:0x47ffdf80) + f32(0x1.ffc00000000000000000p+16:0x47ffe000) +res: f32(0x1.ff830800000000000000p+32:0x4fffc184) flags=INEXACT (22/0) +op : f32(0x1.ffbf0000000000000000p+16:0x47ffdf80) * f32(0x1.ffc00000000000000000p+16:0x47ffe000) + f32(0x1.ffc20000000000000000p+15:0x477fe100) +res: f32(0x1.ff7f8800000000000000p+33:0x507fbfc4) flags=INEXACT (22/1) +op : f32(0x1.ffc00000000000000000p+16:0x47ffe000) * f32(0x1.ffc20000000000000000p+15:0x477fe100) + f32(0x1.ffbf0000000000000000p+16:0x47ffdf80) +res: f32(0x1.ff840800000000000000p+32:0x4fffc204) flags=INEXACT (22/2) +op : f32(0x1.ffbf0000000000000000p+16:0x47ffdf80) * f32(0x1.ffc00000000000000000p+16:0x47ffe000) + f32(0x1.ffc10000000000000000p+16:0x47ffe080) +res: f32(0x1.ff800800000000000000p+33:0x507fc004) flags=INEXACT (23/0) +op : f32(0x1.ffc00000000000000000p+16:0x47ffe000) * f32(0x1.ffc10000000000000000p+16:0x47ffe080) + f32(0x1.ffbf0000000000000000p+16:0x47ffdf80) +res: f32(0x1.ff820800000000000000p+33:0x507fc104) flags=INEXACT (23/1) +op : f32(0x1.ffc10000000000000000p+16:0x47ffe080) * f32(0x1.ffbf0000000000000000p+16:0x47ffdf80) + f32(0x1.ffc00000000000000000p+16:0x47ffe000) +res: f32(0x1.ff810800000000000000p+33:0x507fc084) flags=INEXACT (23/2) +op : f32(0x1.ffc00000000000000000p+16:0x47ffe000) * f32(0x1.ffc10000000000000000p+16:0x47ffe080) + f32(0x1.c0bab600000000000000p+99:0x71605d5b) +res: f32(0x1.c0bab600000000000000p+99:0x71605d5b) flags=INEXACT (24/0) +op : f32(0x1.ffc10000000000000000p+16:0x47ffe080) * f32(0x1.c0bab600000000000000p+99:0x71605d5b) + f32(0x1.ffc00000000000000000p+16:0x47ffe000) +res: f32(0x1.c0838000000000000000p+116:0x79e041c0) flags=INEXACT (24/1) +op : f32(0x1.c0bab600000000000000p+99:0x71605d5b) * f32(0x1.ffc00000000000000000p+16:0x47ffe000) + f32(0x1.ffc10000000000000000p+16:0x47ffe080) +res: f32(0x1.c0829e00000000000000p+116:0x79e0414f) flags=INEXACT (24/2) +op : f32(0x1.ffc10000000000000000p+16:0x47ffe080) * f32(0x1.c0bab600000000000000p+99:0x71605d5b) + f32(0x1.fffffe00000000000000p+127:0x7f7fffff) +res: f32(inf:0x7f800000) flags=OVERFLOW INEXACT (25/0) +op : f32(0x1.c0bab600000000000000p+99:0x71605d5b) * f32(0x1.fffffe00000000000000p+127:0x7f7fffff) + f32(0x1.ffc10000000000000000p+16:0x47ffe080) +res: f32(inf:0x7f800000) flags=OVERFLOW INEXACT (25/1) +op : f32(0x1.fffffe00000000000000p+127:0x7f7fffff) * f32(0x1.ffc10000000000000000p+16:0x47ffe080) + f32(0x1.c0bab600000000000000p+99:0x71605d5b) +res: f32(inf:0x7f800000) flags=OVERFLOW INEXACT (25/2) +op : f32(0x1.c0bab600000000000000p+99:0x71605d5b) * f32(0x1.fffffe00000000000000p+127:0x7f7fffff) + f32(inf:0x7f800000) +res: f32(inf:0x7f800000) flags=OK (26/0) +op : f32(0x1.fffffe00000000000000p+127:0x7f7fffff) * f32(inf:0x7f800000) + f32(0x1.c0bab600000000000000p+99:0x71605d5b) +res: f32(inf:0x7f800000) flags=OK (26/1) +op : f32(inf:0x7f800000) * f32(0x1.c0bab600000000000000p+99:0x71605d5b) + f32(0x1.fffffe00000000000000p+127:0x7f7fffff) +res: f32(inf:0x7f800000) flags=OK (26/2) +op : f32(0x1.fffffe00000000000000p+127:0x7f7fffff) * f32(inf:0x7f800000) + f32(-nan:0x7fc00000) +res: f32(-nan:0xffffffff) flags=OK (27/0) +op : f32(inf:0x7f800000) * f32(-nan:0x7fc00000) + f32(0x1.fffffe00000000000000p+127:0x7f7fffff) +res: f32(-nan:0xffffffff) flags=OK (27/1) +op : f32(-nan:0x7fc00000) * f32(0x1.fffffe00000000000000p+127:0x7f7fffff) + f32(inf:0x7f800000) +res: f32(-nan:0xffffffff) flags=OK (27/2) +op : f32(inf:0x7f800000) * f32(-nan:0x7fc00000) + f32(-nan:0x7fa00000) +res: f32(-nan:0xffffffff) flags=INVALID (28/0) +op : f32(-nan:0x7fc00000) * f32(-nan:0x7fa00000) + f32(inf:0x7f800000) +res: f32(-nan:0xffffffff) flags=INVALID (28/1) +op : f32(-nan:0x7fa00000) * f32(inf:0x7f800000) + f32(-nan:0x7fc00000) +res: f32(-nan:0xffffffff) flags=INVALID (28/2) +op : f32(-nan:0x7fc00000) * f32(-nan:0x7fa00000) + f32(-nan:0xffa00000) +res: f32(-nan:0xffffffff) flags=INVALID (29/0) +op : f32(-nan:0x7fa00000) * f32(-nan:0xffa00000) + f32(-nan:0x7fc00000) +res: f32(-nan:0xffffffff) flags=INVALID (29/1) +op : f32(-nan:0xffa00000) * f32(-nan:0x7fc00000) + f32(-nan:0x7fa00000) +res: f32(-nan:0xffffffff) flags=INVALID (29/2) +op : f32(-nan:0x7fa00000) * f32(-nan:0xffa00000) + f32(-nan:0xffc00000) +res: f32(-nan:0xffffffff) flags=INVALID (30/0) +op : f32(-nan:0xffa00000) * f32(-nan:0xffc00000) + f32(-nan:0x7fa00000) +res: f32(-nan:0xffffffff) flags=INVALID (30/1) +op : f32(-nan:0xffc00000) * f32(-nan:0x7fa00000) + f32(-nan:0xffa00000) +res: f32(-nan:0xffffffff) flags=INVALID (30/2) +# LP184149 +op : f32(0x0.00000000000000000000p+0:0000000000) * f32(0x1.00000000000000000000p-1:0x3f000000) + f32(0x0.00000000000000000000p+0:0000000000) +res: f32(0x0.00000000000000000000p+0:0000000000) flags=OK (31/0) +op : f32(0x1.00000000000000000000p-149:0x00000001) * f32(0x1.00000000000000000000p-149:0x00000001) + f32(0x1.00000000000000000000p-149:0x00000001) +res: f32(0x1.00000000000000000000p-149:0x00000001) flags=UNDERFLOW INEXACT (32/0) +### Rounding upwards +op : f32(-nan:0xffa00000) * f32(-nan:0xffc00000) + f32(-inf:0xff800000) +res: f32(-nan:0xffffffff) flags=INVALID (0/0) +op : f32(-nan:0xffc00000) * f32(-inf:0xff800000) + f32(-nan:0xffa00000) +res: f32(-nan:0xffffffff) flags=INVALID (0/1) +op : f32(-inf:0xff800000) * f32(-nan:0xffa00000) + f32(-nan:0xffc00000) +res: f32(-nan:0xffffffff) flags=INVALID (0/2) +op : f32(-nan:0xffc00000) * f32(-inf:0xff800000) + f32(-0x1.fffffe00000000000000p+127:0xff7fffff) +res: f32(-nan:0xffffffff) flags=OK (1/0) +op : f32(-inf:0xff800000) * f32(-0x1.fffffe00000000000000p+127:0xff7fffff) + f32(-nan:0xffc00000) +res: f32(-nan:0xffffffff) flags=OK (1/1) +op : f32(-0x1.fffffe00000000000000p+127:0xff7fffff) * f32(-nan:0xffc00000) + f32(-inf:0xff800000) +res: f32(-nan:0xffffffff) flags=OK (1/2) +op : f32(-inf:0xff800000) * f32(-0x1.fffffe00000000000000p+127:0xff7fffff) + f32(-0x1.1874b200000000000000p+103:0xf30c3a59) +res: f32(inf:0x7f800000) flags=OK (2/0) +op : f32(-0x1.fffffe00000000000000p+127:0xff7fffff) * f32(-0x1.1874b200000000000000p+103:0xf30c3a59) + f32(-inf:0xff800000) +res: f32(-inf:0xff800000) flags=OK (2/1) +op : f32(-0x1.1874b200000000000000p+103:0xf30c3a59) * f32(-inf:0xff800000) + f32(-0x1.fffffe00000000000000p+127:0xff7fffff) +res: f32(inf:0x7f800000) flags=OK (2/2) +op : f32(-0x1.fffffe00000000000000p+127:0xff7fffff) * f32(-0x1.1874b200000000000000p+103:0xf30c3a59) + f32(-0x1.c0bab600000000000000p+99:0xf1605d5b) +res: f32(inf:0x7f800000) flags=OVERFLOW INEXACT (3/0) +op : f32(-0x1.1874b200000000000000p+103:0xf30c3a59) * f32(-0x1.c0bab600000000000000p+99:0xf1605d5b) + f32(-0x1.fffffe00000000000000p+127:0xff7fffff) +res: f32(inf:0x7f800000) flags=OVERFLOW INEXACT (3/1) +op : f32(-0x1.c0bab600000000000000p+99:0xf1605d5b) * f32(-0x1.fffffe00000000000000p+127:0xff7fffff) + f32(-0x1.1874b200000000000000p+103:0xf30c3a59) +res: f32(inf:0x7f800000) flags=OVERFLOW INEXACT (3/2) +op : f32(-0x1.1874b200000000000000p+103:0xf30c3a59) * f32(-0x1.c0bab600000000000000p+99:0xf1605d5b) + f32(-0x1.31f75000000000000000p-40:0xab98fba8) +res: f32(inf:0x7f800000) flags=OVERFLOW INEXACT (4/0) +op : f32(-0x1.c0bab600000000000000p+99:0xf1605d5b) * f32(-0x1.31f75000000000000000p-40:0xab98fba8) + f32(-0x1.1874b200000000000000p+103:0xf30c3a59) +res: f32(-0x1.1874b000000000000000p+103:0xf30c3a58) flags=INEXACT (4/1) +op : f32(-0x1.31f75000000000000000p-40:0xab98fba8) * f32(-0x1.1874b200000000000000p+103:0xf30c3a59) + f32(-0x1.c0bab600000000000000p+99:0xf1605d5b) +res: f32(-0x1.c0bab400000000000000p+99:0xf1605d5a) flags=INEXACT (4/2) +op : f32(-0x1.c0bab600000000000000p+99:0xf1605d5b) * f32(-0x1.31f75000000000000000p-40:0xab98fba8) + f32(-0x1.50544400000000000000p-66:0x9ea82a22) +res: f32(0x1.0c27fa00000000000000p+60:0x5d8613fd) flags=INEXACT (5/0) +op : f32(-0x1.31f75000000000000000p-40:0xab98fba8) * f32(-0x1.50544400000000000000p-66:0x9ea82a22) + f32(-0x1.c0bab600000000000000p+99:0xf1605d5b) +res: f32(-0x1.c0bab400000000000000p+99:0xf1605d5a) flags=INEXACT (5/1) +op : f32(-0x1.50544400000000000000p-66:0x9ea82a22) * f32(-0x1.c0bab600000000000000p+99:0xf1605d5b) + f32(-0x1.31f75000000000000000p-40:0xab98fba8) +res: f32(0x1.26c46200000000000000p+34:0x50936231) flags=INEXACT (5/2) +op : f32(-0x1.31f75000000000000000p-40:0xab98fba8) * f32(-0x1.50544400000000000000p-66:0x9ea82a22) + f32(-0x1.00000000000000000000p-126:0x80800000) +res: f32(0x1.91f94000000000000000p-106:0x0ac8fca0) flags=INEXACT (6/0) +op : f32(-0x1.50544400000000000000p-66:0x9ea82a22) * f32(-0x1.00000000000000000000p-126:0x80800000) + f32(-0x1.31f75000000000000000p-40:0xab98fba8) +res: f32(-0x1.31f74e00000000000000p-40:0xab98fba7) flags=INEXACT (6/1) +op : f32(-0x1.00000000000000000000p-126:0x80800000) * f32(-0x1.31f75000000000000000p-40:0xab98fba8) + f32(-0x1.50544400000000000000p-66:0x9ea82a22) +res: f32(-0x1.50544200000000000000p-66:0x9ea82a21) flags=INEXACT (6/2) +op : f32(-0x1.50544400000000000000p-66:0x9ea82a22) * f32(-0x1.00000000000000000000p-126:0x80800000) + f32(0x0.00000000000000000000p+0:0000000000) +res: f32(0x1.00000000000000000000p-149:0x00000001) flags=UNDERFLOW INEXACT (7/0) +op : f32(-0x1.00000000000000000000p-126:0x80800000) * f32(0x0.00000000000000000000p+0:0000000000) + f32(-0x1.50544400000000000000p-66:0x9ea82a22) +res: f32(-0x1.50544400000000000000p-66:0x9ea82a22) flags=INEXACT (7/1) +op : f32(0x0.00000000000000000000p+0:0000000000) * f32(-0x1.50544400000000000000p-66:0x9ea82a22) + f32(-0x1.00000000000000000000p-126:0x80800000) +res: f32(-0x1.00000000000000000000p-126:0x80800000) flags=OK (7/2) +op : f32(-0x1.00000000000000000000p-126:0x80800000) * f32(0x0.00000000000000000000p+0:0000000000) + f32(0x1.00000000000000000000p-126:0x00800000) +res: f32(0x1.00000000000000000000p-126:0x00800000) flags=OK (8/0) +op : f32(0x0.00000000000000000000p+0:0000000000) * f32(0x1.00000000000000000000p-126:0x00800000) + f32(-0x1.00000000000000000000p-126:0x80800000) +res: f32(-0x1.00000000000000000000p-126:0x80800000) flags=OK (8/1) +op : f32(0x1.00000000000000000000p-126:0x00800000) * f32(-0x1.00000000000000000000p-126:0x80800000) + f32(0x0.00000000000000000000p+0:0000000000) +res: f32(-0x0.00000000000000000000p+0:0x80000000) flags=UNDERFLOW INEXACT (8/2) +op : f32(0x0.00000000000000000000p+0:0000000000) * f32(0x1.00000000000000000000p-126:0x00800000) + f32(0x1.00000000000000000000p-25:0x33000000) +res: f32(0x1.00000000000000000000p-25:0x33000000) flags=OK (9/0) +op : f32(0x1.00000000000000000000p-126:0x00800000) * f32(0x1.00000000000000000000p-25:0x33000000) + f32(0x0.00000000000000000000p+0:0000000000) +res: f32(0x1.00000000000000000000p-149:0x00000001) flags=UNDERFLOW INEXACT (9/1) +op : f32(0x1.00000000000000000000p-25:0x33000000) * f32(0x0.00000000000000000000p+0:0000000000) + f32(0x1.00000000000000000000p-126:0x00800000) +res: f32(0x1.00000000000000000000p-126:0x00800000) flags=OK (9/2) +op : f32(0x1.00000000000000000000p-126:0x00800000) * f32(0x1.00000000000000000000p-25:0x33000000) + f32(0x1.ffffe600000000000000p-25:0x337ffff3) +res: f32(0x1.ffffe800000000000000p-25:0x337ffff4) flags=INEXACT (10/0) +op : f32(0x1.00000000000000000000p-25:0x33000000) * f32(0x1.ffffe600000000000000p-25:0x337ffff3) + f32(0x1.00000000000000000000p-126:0x00800000) +res: f32(0x1.ffffe800000000000000p-50:0x26fffff4) flags=INEXACT (10/1) +op : f32(0x1.ffffe600000000000000p-25:0x337ffff3) * f32(0x1.00000000000000000000p-126:0x00800000) + f32(0x1.00000000000000000000p-25:0x33000000) +res: f32(0x1.00000200000000000000p-25:0x33000001) flags=INEXACT (10/2) +op : f32(0x1.00000000000000000000p-25:0x33000000) * f32(0x1.ffffe600000000000000p-25:0x337ffff3) + f32(0x1.ff801a00000000000000p-15:0x387fc00d) +res: f32(0x1.ff801c00000000000000p-15:0x387fc00e) flags=INEXACT (11/0) +op : f32(0x1.ffffe600000000000000p-25:0x337ffff3) * f32(0x1.ff801a00000000000000p-15:0x387fc00d) + f32(0x1.00000000000000000000p-25:0x33000000) +res: f32(0x1.00080000000000000000p-25:0x33000400) flags=INEXACT (11/1) +op : f32(0x1.ff801a00000000000000p-15:0x387fc00d) * f32(0x1.00000000000000000000p-25:0x33000000) + f32(0x1.ffffe600000000000000p-25:0x337ffff3) +res: f32(0x1.0001f400000000000000p-24:0x338000fa) flags=INEXACT (11/2) +op : f32(0x1.ffffe600000000000000p-25:0x337ffff3) * f32(0x1.ff801a00000000000000p-15:0x387fc00d) + f32(0x1.00000c00000000000000p-14:0x38800006) +res: f32(0x1.00000e00000000000000p-14:0x38800007) flags=INEXACT (12/0) +op : f32(0x1.ff801a00000000000000p-15:0x387fc00d) * f32(0x1.00000c00000000000000p-14:0x38800006) + f32(0x1.ffffe600000000000000p-25:0x337ffff3) +res: f32(0x1.0ffbf600000000000000p-24:0x3387fdfb) flags=INEXACT (12/1) +op : f32(0x1.00000c00000000000000p-14:0x38800006) * f32(0x1.ffffe600000000000000p-25:0x337ffff3) + f32(0x1.ff801a00000000000000p-15:0x387fc00d) +res: f32(0x1.ff801c00000000000000p-15:0x387fc00e) flags=INEXACT (12/2) +op : f32(0x1.ff801a00000000000000p-15:0x387fc00d) * f32(0x1.00000c00000000000000p-14:0x38800006) + f32(0x1.00000000000000000000p+0:0x3f800000) +res: f32(0x1.00000200000000000000p+0:0x3f800001) flags=INEXACT (13/0) +op : f32(0x1.00000c00000000000000p-14:0x38800006) * f32(0x1.00000000000000000000p+0:0x3f800000) + f32(0x1.ff801a00000000000000p-15:0x387fc00d) +res: f32(0x1.ffc01a00000000000000p-14:0x38ffe00d) flags=INEXACT (13/1) +op : f32(0x1.00000000000000000000p+0:0x3f800000) * f32(0x1.ff801a00000000000000p-15:0x387fc00d) + f32(0x1.00000c00000000000000p-14:0x38800006) +res: f32(0x1.ffc01a00000000000000p-14:0x38ffe00d) flags=INEXACT (13/2) +op : f32(0x1.00000c00000000000000p-14:0x38800006) * f32(0x1.00000000000000000000p+0:0x3f800000) + f32(0x1.00400000000000000000p+0:0x3f802000) +res: f32(0x1.00440200000000000000p+0:0x3f802201) flags=INEXACT (14/0) +op : f32(0x1.00000000000000000000p+0:0x3f800000) * f32(0x1.00400000000000000000p+0:0x3f802000) + f32(0x1.00000c00000000000000p-14:0x38800006) +res: f32(0x1.00440200000000000000p+0:0x3f802201) flags=INEXACT (14/1) +op : f32(0x1.00400000000000000000p+0:0x3f802000) * f32(0x1.00000c00000000000000p-14:0x38800006) + f32(0x1.00000000000000000000p+0:0x3f800000) +res: f32(0x1.00040200000000000000p+0:0x3f800201) flags=INEXACT (14/2) +op : f32(0x1.00000000000000000000p+0:0x3f800000) * f32(0x1.00400000000000000000p+0:0x3f802000) + f32(0x1.00000000000000000000p+1:0x40000000) +res: f32(0x1.80200000000000000000p+1:0x40401000) flags=INEXACT (15/0) +op : f32(0x1.00400000000000000000p+0:0x3f802000) * f32(0x1.00000000000000000000p+1:0x40000000) + f32(0x1.00000000000000000000p+0:0x3f800000) +res: f32(0x1.80400000000000000000p+1:0x40402000) flags=INEXACT (15/1) +op : f32(0x1.00000000000000000000p+1:0x40000000) * f32(0x1.00000000000000000000p+0:0x3f800000) + f32(0x1.00400000000000000000p+0:0x3f802000) +res: f32(0x1.80200000000000000000p+1:0x40401000) flags=INEXACT (15/2) +op : f32(0x1.00400000000000000000p+0:0x3f802000) * f32(0x1.00000000000000000000p+1:0x40000000) + f32(0x1.5bf0a800000000000000p+1:0x402df854) +res: f32(0x1.2e185400000000000000p+2:0x40970c2a) flags=INEXACT (16/0) +op : f32(0x1.00000000000000000000p+1:0x40000000) * f32(0x1.5bf0a800000000000000p+1:0x402df854) + f32(0x1.00400000000000000000p+0:0x3f802000) +res: f32(0x1.9c00a800000000000000p+2:0x40ce0054) flags=INEXACT (16/1) +op : f32(0x1.5bf0a800000000000000p+1:0x402df854) * f32(0x1.00400000000000000000p+0:0x3f802000) + f32(0x1.00000000000000000000p+1:0x40000000) +res: f32(0x1.2e23d400000000000000p+2:0x409711ea) flags=INEXACT (16/2) +op : f32(0x1.00000000000000000000p+1:0x40000000) * f32(0x1.5bf0a800000000000000p+1:0x402df854) + f32(0x1.921fb600000000000000p+1:0x40490fdb) +res: f32(0x1.12804200000000000000p+3:0x41094021) flags=INEXACT (17/0) +op : f32(0x1.5bf0a800000000000000p+1:0x402df854) * f32(0x1.921fb600000000000000p+1:0x40490fdb) + f32(0x1.00000000000000000000p+1:0x40000000) +res: f32(0x1.51458200000000000000p+3:0x4128a2c1) flags=INEXACT (17/1) +op : f32(0x1.921fb600000000000000p+1:0x40490fdb) * f32(0x1.00000000000000000000p+1:0x40000000) + f32(0x1.5bf0a800000000000000p+1:0x402df854) +res: f32(0x1.200c0600000000000000p+3:0x41100603) flags=INEXACT (17/2) +op : f32(0x1.5bf0a800000000000000p+1:0x402df854) * f32(0x1.921fb600000000000000p+1:0x40490fdb) + f32(0x1.ffbe0000000000000000p+15:0x477fdf00) +res: f32(0x1.ffcf1600000000000000p+15:0x477fe78b) flags=INEXACT (18/0) +op : f32(0x1.921fb600000000000000p+1:0x40490fdb) * f32(0x1.ffbe0000000000000000p+15:0x477fdf00) + f32(0x1.5bf0a800000000000000p+1:0x402df854) +res: f32(0x1.91ed3c00000000000000p+17:0x4848f69e) flags=INEXACT (18/1) +op : f32(0x1.ffbe0000000000000000p+15:0x477fdf00) * f32(0x1.5bf0a800000000000000p+1:0x402df854) + f32(0x1.921fb600000000000000p+1:0x40490fdb) +res: f32(0x1.5bc56200000000000000p+17:0x482de2b1) flags=INEXACT (18/2) +op : f32(0x1.921fb600000000000000p+1:0x40490fdb) * f32(0x1.ffbe0000000000000000p+15:0x477fdf00) + f32(0x1.ffc00000000000000000p+15:0x477fe000) +res: f32(0x1.08edf000000000000000p+18:0x488476f8) flags=INEXACT (19/0) +op : f32(0x1.ffbe0000000000000000p+15:0x477fdf00) * f32(0x1.ffc00000000000000000p+15:0x477fe000) + f32(0x1.921fb600000000000000p+1:0x40490fdb) +res: f32(0x1.ff7e0a00000000000000p+31:0x4f7fbf05) flags=INEXACT (19/1) +op : f32(0x1.ffc00000000000000000p+15:0x477fe000) * f32(0x1.921fb600000000000000p+1:0x40490fdb) + f32(0x1.ffbe0000000000000000p+15:0x477fdf00) +res: f32(0x1.08ee7a00000000000000p+18:0x4884773d) flags=INEXACT (19/2) +op : f32(0x1.ffbe0000000000000000p+15:0x477fdf00) * f32(0x1.ffc00000000000000000p+15:0x477fe000) + f32(0x1.ffc20000000000000000p+15:0x477fe100) +res: f32(0x1.ff800a00000000000000p+31:0x4f7fc005) flags=INEXACT (20/0) +op : f32(0x1.ffc00000000000000000p+15:0x477fe000) * f32(0x1.ffc20000000000000000p+15:0x477fe100) + f32(0x1.ffbe0000000000000000p+15:0x477fdf00) +res: f32(0x1.ff840800000000000000p+31:0x4f7fc204) flags=INEXACT (20/1) +op : f32(0x1.ffc20000000000000000p+15:0x477fe100) * f32(0x1.ffbe0000000000000000p+15:0x477fdf00) + f32(0x1.ffc00000000000000000p+15:0x477fe000) +res: f32(0x1.ff820800000000000000p+31:0x4f7fc104) flags=INEXACT (20/2) +op : f32(0x1.ffc00000000000000000p+15:0x477fe000) * f32(0x1.ffc20000000000000000p+15:0x477fe100) + f32(0x1.ffbf0000000000000000p+16:0x47ffdf80) +res: f32(0x1.ff860800000000000000p+31:0x4f7fc304) flags=INEXACT (21/0) +op : f32(0x1.ffc20000000000000000p+15:0x477fe100) * f32(0x1.ffbf0000000000000000p+16:0x47ffdf80) + f32(0x1.ffc00000000000000000p+15:0x477fe000) +res: f32(0x1.ff820800000000000000p+32:0x4fffc104) flags=INEXACT (21/1) +op : f32(0x1.ffbf0000000000000000p+16:0x47ffdf80) * f32(0x1.ffc00000000000000000p+15:0x477fe000) + f32(0x1.ffc20000000000000000p+15:0x477fe100) +res: f32(0x1.ff800a00000000000000p+32:0x4fffc005) flags=INEXACT (21/2) +op : f32(0x1.ffc20000000000000000p+15:0x477fe100) * f32(0x1.ffbf0000000000000000p+16:0x47ffdf80) + f32(0x1.ffc00000000000000000p+16:0x47ffe000) +res: f32(0x1.ff830800000000000000p+32:0x4fffc184) flags=INEXACT (22/0) +op : f32(0x1.ffbf0000000000000000p+16:0x47ffdf80) * f32(0x1.ffc00000000000000000p+16:0x47ffe000) + f32(0x1.ffc20000000000000000p+15:0x477fe100) +res: f32(0x1.ff7f8a00000000000000p+33:0x507fbfc5) flags=INEXACT (22/1) +op : f32(0x1.ffc00000000000000000p+16:0x47ffe000) * f32(0x1.ffc20000000000000000p+15:0x477fe100) + f32(0x1.ffbf0000000000000000p+16:0x47ffdf80) +res: f32(0x1.ff840800000000000000p+32:0x4fffc204) flags=INEXACT (22/2) +op : f32(0x1.ffbf0000000000000000p+16:0x47ffdf80) * f32(0x1.ffc00000000000000000p+16:0x47ffe000) + f32(0x1.ffc10000000000000000p+16:0x47ffe080) +res: f32(0x1.ff800a00000000000000p+33:0x507fc005) flags=INEXACT (23/0) +op : f32(0x1.ffc00000000000000000p+16:0x47ffe000) * f32(0x1.ffc10000000000000000p+16:0x47ffe080) + f32(0x1.ffbf0000000000000000p+16:0x47ffdf80) +res: f32(0x1.ff820800000000000000p+33:0x507fc104) flags=INEXACT (23/1) +op : f32(0x1.ffc10000000000000000p+16:0x47ffe080) * f32(0x1.ffbf0000000000000000p+16:0x47ffdf80) + f32(0x1.ffc00000000000000000p+16:0x47ffe000) +res: f32(0x1.ff810800000000000000p+33:0x507fc084) flags=INEXACT (23/2) +op : f32(0x1.ffc00000000000000000p+16:0x47ffe000) * f32(0x1.ffc10000000000000000p+16:0x47ffe080) + f32(0x1.c0bab600000000000000p+99:0x71605d5b) +res: f32(0x1.c0bab800000000000000p+99:0x71605d5c) flags=INEXACT (24/0) +op : f32(0x1.ffc10000000000000000p+16:0x47ffe080) * f32(0x1.c0bab600000000000000p+99:0x71605d5b) + f32(0x1.ffc00000000000000000p+16:0x47ffe000) +res: f32(0x1.c0838000000000000000p+116:0x79e041c0) flags=INEXACT (24/1) +op : f32(0x1.c0bab600000000000000p+99:0x71605d5b) * f32(0x1.ffc00000000000000000p+16:0x47ffe000) + f32(0x1.ffc10000000000000000p+16:0x47ffe080) +res: f32(0x1.c082a000000000000000p+116:0x79e04150) flags=INEXACT (24/2) +op : f32(0x1.ffc10000000000000000p+16:0x47ffe080) * f32(0x1.c0bab600000000000000p+99:0x71605d5b) + f32(0x1.fffffe00000000000000p+127:0x7f7fffff) +res: f32(inf:0x7f800000) flags=OVERFLOW INEXACT (25/0) +op : f32(0x1.c0bab600000000000000p+99:0x71605d5b) * f32(0x1.fffffe00000000000000p+127:0x7f7fffff) + f32(0x1.ffc10000000000000000p+16:0x47ffe080) +res: f32(inf:0x7f800000) flags=OVERFLOW INEXACT (25/1) +op : f32(0x1.fffffe00000000000000p+127:0x7f7fffff) * f32(0x1.ffc10000000000000000p+16:0x47ffe080) + f32(0x1.c0bab600000000000000p+99:0x71605d5b) +res: f32(inf:0x7f800000) flags=OVERFLOW INEXACT (25/2) +op : f32(0x1.c0bab600000000000000p+99:0x71605d5b) * f32(0x1.fffffe00000000000000p+127:0x7f7fffff) + f32(inf:0x7f800000) +res: f32(inf:0x7f800000) flags=OK (26/0) +op : f32(0x1.fffffe00000000000000p+127:0x7f7fffff) * f32(inf:0x7f800000) + f32(0x1.c0bab600000000000000p+99:0x71605d5b) +res: f32(inf:0x7f800000) flags=OK (26/1) +op : f32(inf:0x7f800000) * f32(0x1.c0bab600000000000000p+99:0x71605d5b) + f32(0x1.fffffe00000000000000p+127:0x7f7fffff) +res: f32(inf:0x7f800000) flags=OK (26/2) +op : f32(0x1.fffffe00000000000000p+127:0x7f7fffff) * f32(inf:0x7f800000) + f32(-nan:0x7fc00000) +res: f32(-nan:0xffffffff) flags=OK (27/0) +op : f32(inf:0x7f800000) * f32(-nan:0x7fc00000) + f32(0x1.fffffe00000000000000p+127:0x7f7fffff) +res: f32(-nan:0xffffffff) flags=OK (27/1) +op : f32(-nan:0x7fc00000) * f32(0x1.fffffe00000000000000p+127:0x7f7fffff) + f32(inf:0x7f800000) +res: f32(-nan:0xffffffff) flags=OK (27/2) +op : f32(inf:0x7f800000) * f32(-nan:0x7fc00000) + f32(-nan:0x7fa00000) +res: f32(-nan:0xffffffff) flags=INVALID (28/0) +op : f32(-nan:0x7fc00000) * f32(-nan:0x7fa00000) + f32(inf:0x7f800000) +res: f32(-nan:0xffffffff) flags=INVALID (28/1) +op : f32(-nan:0x7fa00000) * f32(inf:0x7f800000) + f32(-nan:0x7fc00000) +res: f32(-nan:0xffffffff) flags=INVALID (28/2) +op : f32(-nan:0x7fc00000) * f32(-nan:0x7fa00000) + f32(-nan:0xffa00000) +res: f32(-nan:0xffffffff) flags=INVALID (29/0) +op : f32(-nan:0x7fa00000) * f32(-nan:0xffa00000) + f32(-nan:0x7fc00000) +res: f32(-nan:0xffffffff) flags=INVALID (29/1) +op : f32(-nan:0xffa00000) * f32(-nan:0x7fc00000) + f32(-nan:0x7fa00000) +res: f32(-nan:0xffffffff) flags=INVALID (29/2) +op : f32(-nan:0x7fa00000) * f32(-nan:0xffa00000) + f32(-nan:0xffc00000) +res: f32(-nan:0xffffffff) flags=INVALID (30/0) +op : f32(-nan:0xffa00000) * f32(-nan:0xffc00000) + f32(-nan:0x7fa00000) +res: f32(-nan:0xffffffff) flags=INVALID (30/1) +op : f32(-nan:0xffc00000) * f32(-nan:0x7fa00000) + f32(-nan:0xffa00000) +res: f32(-nan:0xffffffff) flags=INVALID (30/2) +# LP184149 +op : f32(0x0.00000000000000000000p+0:0000000000) * f32(0x1.00000000000000000000p-1:0x3f000000) + f32(0x0.00000000000000000000p+0:0000000000) +res: f32(0x0.00000000000000000000p+0:0000000000) flags=OK (31/0) +op : f32(0x1.00000000000000000000p-149:0x00000001) * f32(0x1.00000000000000000000p-149:0x00000001) + f32(0x1.00000000000000000000p-149:0x00000001) +res: f32(0x1.00000000000000000000p-148:0x00000002) flags=UNDERFLOW INEXACT (32/0) +### Rounding downwards +op : f32(-nan:0xffa00000) * f32(-nan:0xffc00000) + f32(-inf:0xff800000) +res: f32(-nan:0xffffffff) flags=INVALID (0/0) +op : f32(-nan:0xffc00000) * f32(-inf:0xff800000) + f32(-nan:0xffa00000) +res: f32(-nan:0xffffffff) flags=INVALID (0/1) +op : f32(-inf:0xff800000) * f32(-nan:0xffa00000) + f32(-nan:0xffc00000) +res: f32(-nan:0xffffffff) flags=INVALID (0/2) +op : f32(-nan:0xffc00000) * f32(-inf:0xff800000) + f32(-0x1.fffffe00000000000000p+127:0xff7fffff) +res: f32(-nan:0xffffffff) flags=OK (1/0) +op : f32(-inf:0xff800000) * f32(-0x1.fffffe00000000000000p+127:0xff7fffff) + f32(-nan:0xffc00000) +res: f32(-nan:0xffffffff) flags=OK (1/1) +op : f32(-0x1.fffffe00000000000000p+127:0xff7fffff) * f32(-nan:0xffc00000) + f32(-inf:0xff800000) +res: f32(-nan:0xffffffff) flags=OK (1/2) +op : f32(-inf:0xff800000) * f32(-0x1.fffffe00000000000000p+127:0xff7fffff) + f32(-0x1.1874b200000000000000p+103:0xf30c3a59) +res: f32(inf:0x7f800000) flags=OK (2/0) +op : f32(-0x1.fffffe00000000000000p+127:0xff7fffff) * f32(-0x1.1874b200000000000000p+103:0xf30c3a59) + f32(-inf:0xff800000) +res: f32(-inf:0xff800000) flags=OK (2/1) +op : f32(-0x1.1874b200000000000000p+103:0xf30c3a59) * f32(-inf:0xff800000) + f32(-0x1.fffffe00000000000000p+127:0xff7fffff) +res: f32(inf:0x7f800000) flags=OK (2/2) +op : f32(-0x1.fffffe00000000000000p+127:0xff7fffff) * f32(-0x1.1874b200000000000000p+103:0xf30c3a59) + f32(-0x1.c0bab600000000000000p+99:0xf1605d5b) +res: f32(0x1.fffffe00000000000000p+127:0x7f7fffff) flags=OVERFLOW INEXACT (3/0) +op : f32(-0x1.1874b200000000000000p+103:0xf30c3a59) * f32(-0x1.c0bab600000000000000p+99:0xf1605d5b) + f32(-0x1.fffffe00000000000000p+127:0xff7fffff) +res: f32(0x1.fffffe00000000000000p+127:0x7f7fffff) flags=OVERFLOW INEXACT (3/1) +op : f32(-0x1.c0bab600000000000000p+99:0xf1605d5b) * f32(-0x1.fffffe00000000000000p+127:0xff7fffff) + f32(-0x1.1874b200000000000000p+103:0xf30c3a59) +res: f32(0x1.fffffe00000000000000p+127:0x7f7fffff) flags=OVERFLOW INEXACT (3/2) +op : f32(-0x1.1874b200000000000000p+103:0xf30c3a59) * f32(-0x1.c0bab600000000000000p+99:0xf1605d5b) + f32(-0x1.31f75000000000000000p-40:0xab98fba8) +res: f32(0x1.fffffe00000000000000p+127:0x7f7fffff) flags=OVERFLOW INEXACT (4/0) +op : f32(-0x1.c0bab600000000000000p+99:0xf1605d5b) * f32(-0x1.31f75000000000000000p-40:0xab98fba8) + f32(-0x1.1874b200000000000000p+103:0xf30c3a59) +res: f32(-0x1.1874b200000000000000p+103:0xf30c3a59) flags=INEXACT (4/1) +op : f32(-0x1.31f75000000000000000p-40:0xab98fba8) * f32(-0x1.1874b200000000000000p+103:0xf30c3a59) + f32(-0x1.c0bab600000000000000p+99:0xf1605d5b) +res: f32(-0x1.c0bab600000000000000p+99:0xf1605d5b) flags=INEXACT (4/2) +op : f32(-0x1.c0bab600000000000000p+99:0xf1605d5b) * f32(-0x1.31f75000000000000000p-40:0xab98fba8) + f32(-0x1.50544400000000000000p-66:0x9ea82a22) +res: f32(0x1.0c27f800000000000000p+60:0x5d8613fc) flags=INEXACT (5/0) +op : f32(-0x1.31f75000000000000000p-40:0xab98fba8) * f32(-0x1.50544400000000000000p-66:0x9ea82a22) + f32(-0x1.c0bab600000000000000p+99:0xf1605d5b) +res: f32(-0x1.c0bab600000000000000p+99:0xf1605d5b) flags=INEXACT (5/1) +op : f32(-0x1.50544400000000000000p-66:0x9ea82a22) * f32(-0x1.c0bab600000000000000p+99:0xf1605d5b) + f32(-0x1.31f75000000000000000p-40:0xab98fba8) +res: f32(0x1.26c46000000000000000p+34:0x50936230) flags=INEXACT (5/2) +op : f32(-0x1.31f75000000000000000p-40:0xab98fba8) * f32(-0x1.50544400000000000000p-66:0x9ea82a22) + f32(-0x1.00000000000000000000p-126:0x80800000) +res: f32(0x1.91f93e00000000000000p-106:0x0ac8fc9f) flags=INEXACT (6/0) +op : f32(-0x1.50544400000000000000p-66:0x9ea82a22) * f32(-0x1.00000000000000000000p-126:0x80800000) + f32(-0x1.31f75000000000000000p-40:0xab98fba8) +res: f32(-0x1.31f75000000000000000p-40:0xab98fba8) flags=INEXACT (6/1) +op : f32(-0x1.00000000000000000000p-126:0x80800000) * f32(-0x1.31f75000000000000000p-40:0xab98fba8) + f32(-0x1.50544400000000000000p-66:0x9ea82a22) +res: f32(-0x1.50544400000000000000p-66:0x9ea82a22) flags=INEXACT (6/2) +op : f32(-0x1.50544400000000000000p-66:0x9ea82a22) * f32(-0x1.00000000000000000000p-126:0x80800000) + f32(0x0.00000000000000000000p+0:0000000000) +res: f32(0x0.00000000000000000000p+0:0000000000) flags=UNDERFLOW INEXACT (7/0) +op : f32(-0x1.00000000000000000000p-126:0x80800000) * f32(0x0.00000000000000000000p+0:0000000000) + f32(-0x1.50544400000000000000p-66:0x9ea82a22) +res: f32(-0x1.50544400000000000000p-66:0x9ea82a22) flags=INEXACT (7/1) +op : f32(0x0.00000000000000000000p+0:0000000000) * f32(-0x1.50544400000000000000p-66:0x9ea82a22) + f32(-0x1.00000000000000000000p-126:0x80800000) +res: f32(-0x1.00000000000000000000p-126:0x80800000) flags=OK (7/2) +op : f32(-0x1.00000000000000000000p-126:0x80800000) * f32(0x0.00000000000000000000p+0:0000000000) + f32(0x1.00000000000000000000p-126:0x00800000) +res: f32(0x1.00000000000000000000p-126:0x00800000) flags=OK (8/0) +op : f32(0x0.00000000000000000000p+0:0000000000) * f32(0x1.00000000000000000000p-126:0x00800000) + f32(-0x1.00000000000000000000p-126:0x80800000) +res: f32(-0x1.00000000000000000000p-126:0x80800000) flags=OK (8/1) +op : f32(0x1.00000000000000000000p-126:0x00800000) * f32(-0x1.00000000000000000000p-126:0x80800000) + f32(0x0.00000000000000000000p+0:0000000000) +res: f32(-0x1.00000000000000000000p-149:0x80000001) flags=UNDERFLOW INEXACT (8/2) +op : f32(0x0.00000000000000000000p+0:0000000000) * f32(0x1.00000000000000000000p-126:0x00800000) + f32(0x1.00000000000000000000p-25:0x33000000) +res: f32(0x1.00000000000000000000p-25:0x33000000) flags=OK (9/0) +op : f32(0x1.00000000000000000000p-126:0x00800000) * f32(0x1.00000000000000000000p-25:0x33000000) + f32(0x0.00000000000000000000p+0:0000000000) +res: f32(0x0.00000000000000000000p+0:0000000000) flags=UNDERFLOW INEXACT (9/1) +op : f32(0x1.00000000000000000000p-25:0x33000000) * f32(0x0.00000000000000000000p+0:0000000000) + f32(0x1.00000000000000000000p-126:0x00800000) +res: f32(0x1.00000000000000000000p-126:0x00800000) flags=OK (9/2) +op : f32(0x1.00000000000000000000p-126:0x00800000) * f32(0x1.00000000000000000000p-25:0x33000000) + f32(0x1.ffffe600000000000000p-25:0x337ffff3) +res: f32(0x1.ffffe600000000000000p-25:0x337ffff3) flags=INEXACT (10/0) +op : f32(0x1.00000000000000000000p-25:0x33000000) * f32(0x1.ffffe600000000000000p-25:0x337ffff3) + f32(0x1.00000000000000000000p-126:0x00800000) +res: f32(0x1.ffffe600000000000000p-50:0x26fffff3) flags=INEXACT (10/1) +op : f32(0x1.ffffe600000000000000p-25:0x337ffff3) * f32(0x1.00000000000000000000p-126:0x00800000) + f32(0x1.00000000000000000000p-25:0x33000000) +res: f32(0x1.00000000000000000000p-25:0x33000000) flags=INEXACT (10/2) +op : f32(0x1.00000000000000000000p-25:0x33000000) * f32(0x1.ffffe600000000000000p-25:0x337ffff3) + f32(0x1.ff801a00000000000000p-15:0x387fc00d) +res: f32(0x1.ff801a00000000000000p-15:0x387fc00d) flags=INEXACT (11/0) +op : f32(0x1.ffffe600000000000000p-25:0x337ffff3) * f32(0x1.ff801a00000000000000p-15:0x387fc00d) + f32(0x1.00000000000000000000p-25:0x33000000) +res: f32(0x1.0007fe00000000000000p-25:0x330003ff) flags=INEXACT (11/1) +op : f32(0x1.ff801a00000000000000p-15:0x387fc00d) * f32(0x1.00000000000000000000p-25:0x33000000) + f32(0x1.ffffe600000000000000p-25:0x337ffff3) +res: f32(0x1.0001f200000000000000p-24:0x338000f9) flags=INEXACT (11/2) +op : f32(0x1.ffffe600000000000000p-25:0x337ffff3) * f32(0x1.ff801a00000000000000p-15:0x387fc00d) + f32(0x1.00000c00000000000000p-14:0x38800006) +res: f32(0x1.00000c00000000000000p-14:0x38800006) flags=INEXACT (12/0) +op : f32(0x1.ff801a00000000000000p-15:0x387fc00d) * f32(0x1.00000c00000000000000p-14:0x38800006) + f32(0x1.ffffe600000000000000p-25:0x337ffff3) +res: f32(0x1.0ffbf400000000000000p-24:0x3387fdfa) flags=INEXACT (12/1) +op : f32(0x1.00000c00000000000000p-14:0x38800006) * f32(0x1.ffffe600000000000000p-25:0x337ffff3) + f32(0x1.ff801a00000000000000p-15:0x387fc00d) +res: f32(0x1.ff801a00000000000000p-15:0x387fc00d) flags=INEXACT (12/2) +op : f32(0x1.ff801a00000000000000p-15:0x387fc00d) * f32(0x1.00000c00000000000000p-14:0x38800006) + f32(0x1.00000000000000000000p+0:0x3f800000) +res: f32(0x1.00000000000000000000p+0:0x3f800000) flags=INEXACT (13/0) +op : f32(0x1.00000c00000000000000p-14:0x38800006) * f32(0x1.00000000000000000000p+0:0x3f800000) + f32(0x1.ff801a00000000000000p-15:0x387fc00d) +res: f32(0x1.ffc01800000000000000p-14:0x38ffe00c) flags=INEXACT (13/1) +op : f32(0x1.00000000000000000000p+0:0x3f800000) * f32(0x1.ff801a00000000000000p-15:0x387fc00d) + f32(0x1.00000c00000000000000p-14:0x38800006) +res: f32(0x1.ffc01800000000000000p-14:0x38ffe00c) flags=INEXACT (13/2) +op : f32(0x1.00000c00000000000000p-14:0x38800006) * f32(0x1.00000000000000000000p+0:0x3f800000) + f32(0x1.00400000000000000000p+0:0x3f802000) +res: f32(0x1.00440000000000000000p+0:0x3f802200) flags=INEXACT (14/0) +op : f32(0x1.00000000000000000000p+0:0x3f800000) * f32(0x1.00400000000000000000p+0:0x3f802000) + f32(0x1.00000c00000000000000p-14:0x38800006) +res: f32(0x1.00440000000000000000p+0:0x3f802200) flags=INEXACT (14/1) +op : f32(0x1.00400000000000000000p+0:0x3f802000) * f32(0x1.00000c00000000000000p-14:0x38800006) + f32(0x1.00000000000000000000p+0:0x3f800000) +res: f32(0x1.00040000000000000000p+0:0x3f800200) flags=INEXACT (14/2) +op : f32(0x1.00000000000000000000p+0:0x3f800000) * f32(0x1.00400000000000000000p+0:0x3f802000) + f32(0x1.00000000000000000000p+1:0x40000000) +res: f32(0x1.80200000000000000000p+1:0x40401000) flags=INEXACT (15/0) +op : f32(0x1.00400000000000000000p+0:0x3f802000) * f32(0x1.00000000000000000000p+1:0x40000000) + f32(0x1.00000000000000000000p+0:0x3f800000) +res: f32(0x1.80400000000000000000p+1:0x40402000) flags=INEXACT (15/1) +op : f32(0x1.00000000000000000000p+1:0x40000000) * f32(0x1.00000000000000000000p+0:0x3f800000) + f32(0x1.00400000000000000000p+0:0x3f802000) +res: f32(0x1.80200000000000000000p+1:0x40401000) flags=INEXACT (15/2) +op : f32(0x1.00400000000000000000p+0:0x3f802000) * f32(0x1.00000000000000000000p+1:0x40000000) + f32(0x1.5bf0a800000000000000p+1:0x402df854) +res: f32(0x1.2e185400000000000000p+2:0x40970c2a) flags=INEXACT (16/0) +op : f32(0x1.00000000000000000000p+1:0x40000000) * f32(0x1.5bf0a800000000000000p+1:0x402df854) + f32(0x1.00400000000000000000p+0:0x3f802000) +res: f32(0x1.9c00a800000000000000p+2:0x40ce0054) flags=INEXACT (16/1) +op : f32(0x1.5bf0a800000000000000p+1:0x402df854) * f32(0x1.00400000000000000000p+0:0x3f802000) + f32(0x1.00000000000000000000p+1:0x40000000) +res: f32(0x1.2e23d200000000000000p+2:0x409711e9) flags=INEXACT (16/2) +op : f32(0x1.00000000000000000000p+1:0x40000000) * f32(0x1.5bf0a800000000000000p+1:0x402df854) + f32(0x1.921fb600000000000000p+1:0x40490fdb) +res: f32(0x1.12804000000000000000p+3:0x41094020) flags=INEXACT (17/0) +op : f32(0x1.5bf0a800000000000000p+1:0x402df854) * f32(0x1.921fb600000000000000p+1:0x40490fdb) + f32(0x1.00000000000000000000p+1:0x40000000) +res: f32(0x1.51458000000000000000p+3:0x4128a2c0) flags=INEXACT (17/1) +op : f32(0x1.921fb600000000000000p+1:0x40490fdb) * f32(0x1.00000000000000000000p+1:0x40000000) + f32(0x1.5bf0a800000000000000p+1:0x402df854) +res: f32(0x1.200c0400000000000000p+3:0x41100602) flags=INEXACT (17/2) +op : f32(0x1.5bf0a800000000000000p+1:0x402df854) * f32(0x1.921fb600000000000000p+1:0x40490fdb) + f32(0x1.ffbe0000000000000000p+15:0x477fdf00) +res: f32(0x1.ffcf1400000000000000p+15:0x477fe78a) flags=INEXACT (18/0) +op : f32(0x1.921fb600000000000000p+1:0x40490fdb) * f32(0x1.ffbe0000000000000000p+15:0x477fdf00) + f32(0x1.5bf0a800000000000000p+1:0x402df854) +res: f32(0x1.91ed3a00000000000000p+17:0x4848f69d) flags=INEXACT (18/1) +op : f32(0x1.ffbe0000000000000000p+15:0x477fdf00) * f32(0x1.5bf0a800000000000000p+1:0x402df854) + f32(0x1.921fb600000000000000p+1:0x40490fdb) +res: f32(0x1.5bc56000000000000000p+17:0x482de2b0) flags=INEXACT (18/2) +op : f32(0x1.921fb600000000000000p+1:0x40490fdb) * f32(0x1.ffbe0000000000000000p+15:0x477fdf00) + f32(0x1.ffc00000000000000000p+15:0x477fe000) +res: f32(0x1.08edee00000000000000p+18:0x488476f7) flags=INEXACT (19/0) +op : f32(0x1.ffbe0000000000000000p+15:0x477fdf00) * f32(0x1.ffc00000000000000000p+15:0x477fe000) + f32(0x1.921fb600000000000000p+1:0x40490fdb) +res: f32(0x1.ff7e0800000000000000p+31:0x4f7fbf04) flags=INEXACT (19/1) +op : f32(0x1.ffc00000000000000000p+15:0x477fe000) * f32(0x1.921fb600000000000000p+1:0x40490fdb) + f32(0x1.ffbe0000000000000000p+15:0x477fdf00) +res: f32(0x1.08ee7800000000000000p+18:0x4884773c) flags=INEXACT (19/2) +op : f32(0x1.ffbe0000000000000000p+15:0x477fdf00) * f32(0x1.ffc00000000000000000p+15:0x477fe000) + f32(0x1.ffc20000000000000000p+15:0x477fe100) +res: f32(0x1.ff800800000000000000p+31:0x4f7fc004) flags=INEXACT (20/0) +op : f32(0x1.ffc00000000000000000p+15:0x477fe000) * f32(0x1.ffc20000000000000000p+15:0x477fe100) + f32(0x1.ffbe0000000000000000p+15:0x477fdf00) +res: f32(0x1.ff840600000000000000p+31:0x4f7fc203) flags=INEXACT (20/1) +op : f32(0x1.ffc20000000000000000p+15:0x477fe100) * f32(0x1.ffbe0000000000000000p+15:0x477fdf00) + f32(0x1.ffc00000000000000000p+15:0x477fe000) +res: f32(0x1.ff820600000000000000p+31:0x4f7fc103) flags=INEXACT (20/2) +op : f32(0x1.ffc00000000000000000p+15:0x477fe000) * f32(0x1.ffc20000000000000000p+15:0x477fe100) + f32(0x1.ffbf0000000000000000p+16:0x47ffdf80) +res: f32(0x1.ff860600000000000000p+31:0x4f7fc303) flags=INEXACT (21/0) +op : f32(0x1.ffc20000000000000000p+15:0x477fe100) * f32(0x1.ffbf0000000000000000p+16:0x47ffdf80) + f32(0x1.ffc00000000000000000p+15:0x477fe000) +res: f32(0x1.ff820600000000000000p+32:0x4fffc103) flags=INEXACT (21/1) +op : f32(0x1.ffbf0000000000000000p+16:0x47ffdf80) * f32(0x1.ffc00000000000000000p+15:0x477fe000) + f32(0x1.ffc20000000000000000p+15:0x477fe100) +res: f32(0x1.ff800800000000000000p+32:0x4fffc004) flags=INEXACT (21/2) +op : f32(0x1.ffc20000000000000000p+15:0x477fe100) * f32(0x1.ffbf0000000000000000p+16:0x47ffdf80) + f32(0x1.ffc00000000000000000p+16:0x47ffe000) +res: f32(0x1.ff830600000000000000p+32:0x4fffc183) flags=INEXACT (22/0) +op : f32(0x1.ffbf0000000000000000p+16:0x47ffdf80) * f32(0x1.ffc00000000000000000p+16:0x47ffe000) + f32(0x1.ffc20000000000000000p+15:0x477fe100) +res: f32(0x1.ff7f8800000000000000p+33:0x507fbfc4) flags=INEXACT (22/1) +op : f32(0x1.ffc00000000000000000p+16:0x47ffe000) * f32(0x1.ffc20000000000000000p+15:0x477fe100) + f32(0x1.ffbf0000000000000000p+16:0x47ffdf80) +res: f32(0x1.ff840600000000000000p+32:0x4fffc203) flags=INEXACT (22/2) +op : f32(0x1.ffbf0000000000000000p+16:0x47ffdf80) * f32(0x1.ffc00000000000000000p+16:0x47ffe000) + f32(0x1.ffc10000000000000000p+16:0x47ffe080) +res: f32(0x1.ff800800000000000000p+33:0x507fc004) flags=INEXACT (23/0) +op : f32(0x1.ffc00000000000000000p+16:0x47ffe000) * f32(0x1.ffc10000000000000000p+16:0x47ffe080) + f32(0x1.ffbf0000000000000000p+16:0x47ffdf80) +res: f32(0x1.ff820600000000000000p+33:0x507fc103) flags=INEXACT (23/1) +op : f32(0x1.ffc10000000000000000p+16:0x47ffe080) * f32(0x1.ffbf0000000000000000p+16:0x47ffdf80) + f32(0x1.ffc00000000000000000p+16:0x47ffe000) +res: f32(0x1.ff810600000000000000p+33:0x507fc083) flags=INEXACT (23/2) +op : f32(0x1.ffc00000000000000000p+16:0x47ffe000) * f32(0x1.ffc10000000000000000p+16:0x47ffe080) + f32(0x1.c0bab600000000000000p+99:0x71605d5b) +res: f32(0x1.c0bab600000000000000p+99:0x71605d5b) flags=INEXACT (24/0) +op : f32(0x1.ffc10000000000000000p+16:0x47ffe080) * f32(0x1.c0bab600000000000000p+99:0x71605d5b) + f32(0x1.ffc00000000000000000p+16:0x47ffe000) +res: f32(0x1.c0837e00000000000000p+116:0x79e041bf) flags=INEXACT (24/1) +op : f32(0x1.c0bab600000000000000p+99:0x71605d5b) * f32(0x1.ffc00000000000000000p+16:0x47ffe000) + f32(0x1.ffc10000000000000000p+16:0x47ffe080) +res: f32(0x1.c0829e00000000000000p+116:0x79e0414f) flags=INEXACT (24/2) +op : f32(0x1.ffc10000000000000000p+16:0x47ffe080) * f32(0x1.c0bab600000000000000p+99:0x71605d5b) + f32(0x1.fffffe00000000000000p+127:0x7f7fffff) +res: f32(0x1.fffffe00000000000000p+127:0x7f7fffff) flags=OVERFLOW INEXACT (25/0) +op : f32(0x1.c0bab600000000000000p+99:0x71605d5b) * f32(0x1.fffffe00000000000000p+127:0x7f7fffff) + f32(0x1.ffc10000000000000000p+16:0x47ffe080) +res: f32(0x1.fffffe00000000000000p+127:0x7f7fffff) flags=OVERFLOW INEXACT (25/1) +op : f32(0x1.fffffe00000000000000p+127:0x7f7fffff) * f32(0x1.ffc10000000000000000p+16:0x47ffe080) + f32(0x1.c0bab600000000000000p+99:0x71605d5b) +res: f32(0x1.fffffe00000000000000p+127:0x7f7fffff) flags=OVERFLOW INEXACT (25/2) +op : f32(0x1.c0bab600000000000000p+99:0x71605d5b) * f32(0x1.fffffe00000000000000p+127:0x7f7fffff) + f32(inf:0x7f800000) +res: f32(inf:0x7f800000) flags=OK (26/0) +op : f32(0x1.fffffe00000000000000p+127:0x7f7fffff) * f32(inf:0x7f800000) + f32(0x1.c0bab600000000000000p+99:0x71605d5b) +res: f32(inf:0x7f800000) flags=OK (26/1) +op : f32(inf:0x7f800000) * f32(0x1.c0bab600000000000000p+99:0x71605d5b) + f32(0x1.fffffe00000000000000p+127:0x7f7fffff) +res: f32(inf:0x7f800000) flags=OK (26/2) +op : f32(0x1.fffffe00000000000000p+127:0x7f7fffff) * f32(inf:0x7f800000) + f32(-nan:0x7fc00000) +res: f32(-nan:0xffffffff) flags=OK (27/0) +op : f32(inf:0x7f800000) * f32(-nan:0x7fc00000) + f32(0x1.fffffe00000000000000p+127:0x7f7fffff) +res: f32(-nan:0xffffffff) flags=OK (27/1) +op : f32(-nan:0x7fc00000) * f32(0x1.fffffe00000000000000p+127:0x7f7fffff) + f32(inf:0x7f800000) +res: f32(-nan:0xffffffff) flags=OK (27/2) +op : f32(inf:0x7f800000) * f32(-nan:0x7fc00000) + f32(-nan:0x7fa00000) +res: f32(-nan:0xffffffff) flags=INVALID (28/0) +op : f32(-nan:0x7fc00000) * f32(-nan:0x7fa00000) + f32(inf:0x7f800000) +res: f32(-nan:0xffffffff) flags=INVALID (28/1) +op : f32(-nan:0x7fa00000) * f32(inf:0x7f800000) + f32(-nan:0x7fc00000) +res: f32(-nan:0xffffffff) flags=INVALID (28/2) +op : f32(-nan:0x7fc00000) * f32(-nan:0x7fa00000) + f32(-nan:0xffa00000) +res: f32(-nan:0xffffffff) flags=INVALID (29/0) +op : f32(-nan:0x7fa00000) * f32(-nan:0xffa00000) + f32(-nan:0x7fc00000) +res: f32(-nan:0xffffffff) flags=INVALID (29/1) +op : f32(-nan:0xffa00000) * f32(-nan:0x7fc00000) + f32(-nan:0x7fa00000) +res: f32(-nan:0xffffffff) flags=INVALID (29/2) +op : f32(-nan:0x7fa00000) * f32(-nan:0xffa00000) + f32(-nan:0xffc00000) +res: f32(-nan:0xffffffff) flags=INVALID (30/0) +op : f32(-nan:0xffa00000) * f32(-nan:0xffc00000) + f32(-nan:0x7fa00000) +res: f32(-nan:0xffffffff) flags=INVALID (30/1) +op : f32(-nan:0xffc00000) * f32(-nan:0x7fa00000) + f32(-nan:0xffa00000) +res: f32(-nan:0xffffffff) flags=INVALID (30/2) +# LP184149 +op : f32(0x0.00000000000000000000p+0:0000000000) * f32(0x1.00000000000000000000p-1:0x3f000000) + f32(0x0.00000000000000000000p+0:0000000000) +res: f32(0x0.00000000000000000000p+0:0000000000) flags=OK (31/0) +op : f32(0x1.00000000000000000000p-149:0x00000001) * f32(0x1.00000000000000000000p-149:0x00000001) + f32(0x1.00000000000000000000p-149:0x00000001) +res: f32(0x1.00000000000000000000p-149:0x00000001) flags=UNDERFLOW INEXACT (32/0) +### Rounding to zero +op : f32(-nan:0xffa00000) * f32(-nan:0xffc00000) + f32(-inf:0xff800000) +res: f32(-nan:0xffffffff) flags=INVALID (0/0) +op : f32(-nan:0xffc00000) * f32(-inf:0xff800000) + f32(-nan:0xffa00000) +res: f32(-nan:0xffffffff) flags=INVALID (0/1) +op : f32(-inf:0xff800000) * f32(-nan:0xffa00000) + f32(-nan:0xffc00000) +res: f32(-nan:0xffffffff) flags=INVALID (0/2) +op : f32(-nan:0xffc00000) * f32(-inf:0xff800000) + f32(-0x1.fffffe00000000000000p+127:0xff7fffff) +res: f32(-nan:0xffffffff) flags=OK (1/0) +op : f32(-inf:0xff800000) * f32(-0x1.fffffe00000000000000p+127:0xff7fffff) + f32(-nan:0xffc00000) +res: f32(-nan:0xffffffff) flags=OK (1/1) +op : f32(-0x1.fffffe00000000000000p+127:0xff7fffff) * f32(-nan:0xffc00000) + f32(-inf:0xff800000) +res: f32(-nan:0xffffffff) flags=OK (1/2) +op : f32(-inf:0xff800000) * f32(-0x1.fffffe00000000000000p+127:0xff7fffff) + f32(-0x1.1874b200000000000000p+103:0xf30c3a59) +res: f32(inf:0x7f800000) flags=OK (2/0) +op : f32(-0x1.fffffe00000000000000p+127:0xff7fffff) * f32(-0x1.1874b200000000000000p+103:0xf30c3a59) + f32(-inf:0xff800000) +res: f32(-inf:0xff800000) flags=OK (2/1) +op : f32(-0x1.1874b200000000000000p+103:0xf30c3a59) * f32(-inf:0xff800000) + f32(-0x1.fffffe00000000000000p+127:0xff7fffff) +res: f32(inf:0x7f800000) flags=OK (2/2) +op : f32(-0x1.fffffe00000000000000p+127:0xff7fffff) * f32(-0x1.1874b200000000000000p+103:0xf30c3a59) + f32(-0x1.c0bab600000000000000p+99:0xf1605d5b) +res: f32(0x1.fffffe00000000000000p+127:0x7f7fffff) flags=OVERFLOW INEXACT (3/0) +op : f32(-0x1.1874b200000000000000p+103:0xf30c3a59) * f32(-0x1.c0bab600000000000000p+99:0xf1605d5b) + f32(-0x1.fffffe00000000000000p+127:0xff7fffff) +res: f32(0x1.fffffe00000000000000p+127:0x7f7fffff) flags=OVERFLOW INEXACT (3/1) +op : f32(-0x1.c0bab600000000000000p+99:0xf1605d5b) * f32(-0x1.fffffe00000000000000p+127:0xff7fffff) + f32(-0x1.1874b200000000000000p+103:0xf30c3a59) +res: f32(0x1.fffffe00000000000000p+127:0x7f7fffff) flags=OVERFLOW INEXACT (3/2) +op : f32(-0x1.1874b200000000000000p+103:0xf30c3a59) * f32(-0x1.c0bab600000000000000p+99:0xf1605d5b) + f32(-0x1.31f75000000000000000p-40:0xab98fba8) +res: f32(0x1.fffffe00000000000000p+127:0x7f7fffff) flags=OVERFLOW INEXACT (4/0) +op : f32(-0x1.c0bab600000000000000p+99:0xf1605d5b) * f32(-0x1.31f75000000000000000p-40:0xab98fba8) + f32(-0x1.1874b200000000000000p+103:0xf30c3a59) +res: f32(-0x1.1874b000000000000000p+103:0xf30c3a58) flags=INEXACT (4/1) +op : f32(-0x1.31f75000000000000000p-40:0xab98fba8) * f32(-0x1.1874b200000000000000p+103:0xf30c3a59) + f32(-0x1.c0bab600000000000000p+99:0xf1605d5b) +res: f32(-0x1.c0bab400000000000000p+99:0xf1605d5a) flags=INEXACT (4/2) +op : f32(-0x1.c0bab600000000000000p+99:0xf1605d5b) * f32(-0x1.31f75000000000000000p-40:0xab98fba8) + f32(-0x1.50544400000000000000p-66:0x9ea82a22) +res: f32(0x1.0c27f800000000000000p+60:0x5d8613fc) flags=INEXACT (5/0) +op : f32(-0x1.31f75000000000000000p-40:0xab98fba8) * f32(-0x1.50544400000000000000p-66:0x9ea82a22) + f32(-0x1.c0bab600000000000000p+99:0xf1605d5b) +res: f32(-0x1.c0bab400000000000000p+99:0xf1605d5a) flags=INEXACT (5/1) +op : f32(-0x1.50544400000000000000p-66:0x9ea82a22) * f32(-0x1.c0bab600000000000000p+99:0xf1605d5b) + f32(-0x1.31f75000000000000000p-40:0xab98fba8) +res: f32(0x1.26c46000000000000000p+34:0x50936230) flags=INEXACT (5/2) +op : f32(-0x1.31f75000000000000000p-40:0xab98fba8) * f32(-0x1.50544400000000000000p-66:0x9ea82a22) + f32(-0x1.00000000000000000000p-126:0x80800000) +res: f32(0x1.91f93e00000000000000p-106:0x0ac8fc9f) flags=INEXACT (6/0) +op : f32(-0x1.50544400000000000000p-66:0x9ea82a22) * f32(-0x1.00000000000000000000p-126:0x80800000) + f32(-0x1.31f75000000000000000p-40:0xab98fba8) +res: f32(-0x1.31f74e00000000000000p-40:0xab98fba7) flags=INEXACT (6/1) +op : f32(-0x1.00000000000000000000p-126:0x80800000) * f32(-0x1.31f75000000000000000p-40:0xab98fba8) + f32(-0x1.50544400000000000000p-66:0x9ea82a22) +res: f32(-0x1.50544200000000000000p-66:0x9ea82a21) flags=INEXACT (6/2) +op : f32(-0x1.50544400000000000000p-66:0x9ea82a22) * f32(-0x1.00000000000000000000p-126:0x80800000) + f32(0x0.00000000000000000000p+0:0000000000) +res: f32(0x0.00000000000000000000p+0:0000000000) flags=UNDERFLOW INEXACT (7/0) +op : f32(-0x1.00000000000000000000p-126:0x80800000) * f32(0x0.00000000000000000000p+0:0000000000) + f32(-0x1.50544400000000000000p-66:0x9ea82a22) +res: f32(-0x1.50544400000000000000p-66:0x9ea82a22) flags=INEXACT (7/1) +op : f32(0x0.00000000000000000000p+0:0000000000) * f32(-0x1.50544400000000000000p-66:0x9ea82a22) + f32(-0x1.00000000000000000000p-126:0x80800000) +res: f32(-0x1.00000000000000000000p-126:0x80800000) flags=OK (7/2) +op : f32(-0x1.00000000000000000000p-126:0x80800000) * f32(0x0.00000000000000000000p+0:0000000000) + f32(0x1.00000000000000000000p-126:0x00800000) +res: f32(0x1.00000000000000000000p-126:0x00800000) flags=OK (8/0) +op : f32(0x0.00000000000000000000p+0:0000000000) * f32(0x1.00000000000000000000p-126:0x00800000) + f32(-0x1.00000000000000000000p-126:0x80800000) +res: f32(-0x1.00000000000000000000p-126:0x80800000) flags=OK (8/1) +op : f32(0x1.00000000000000000000p-126:0x00800000) * f32(-0x1.00000000000000000000p-126:0x80800000) + f32(0x0.00000000000000000000p+0:0000000000) +res: f32(-0x0.00000000000000000000p+0:0x80000000) flags=UNDERFLOW INEXACT (8/2) +op : f32(0x0.00000000000000000000p+0:0000000000) * f32(0x1.00000000000000000000p-126:0x00800000) + f32(0x1.00000000000000000000p-25:0x33000000) +res: f32(0x1.00000000000000000000p-25:0x33000000) flags=OK (9/0) +op : f32(0x1.00000000000000000000p-126:0x00800000) * f32(0x1.00000000000000000000p-25:0x33000000) + f32(0x0.00000000000000000000p+0:0000000000) +res: f32(0x0.00000000000000000000p+0:0000000000) flags=UNDERFLOW INEXACT (9/1) +op : f32(0x1.00000000000000000000p-25:0x33000000) * f32(0x0.00000000000000000000p+0:0000000000) + f32(0x1.00000000000000000000p-126:0x00800000) +res: f32(0x1.00000000000000000000p-126:0x00800000) flags=OK (9/2) +op : f32(0x1.00000000000000000000p-126:0x00800000) * f32(0x1.00000000000000000000p-25:0x33000000) + f32(0x1.ffffe600000000000000p-25:0x337ffff3) +res: f32(0x1.ffffe600000000000000p-25:0x337ffff3) flags=INEXACT (10/0) +op : f32(0x1.00000000000000000000p-25:0x33000000) * f32(0x1.ffffe600000000000000p-25:0x337ffff3) + f32(0x1.00000000000000000000p-126:0x00800000) +res: f32(0x1.ffffe600000000000000p-50:0x26fffff3) flags=INEXACT (10/1) +op : f32(0x1.ffffe600000000000000p-25:0x337ffff3) * f32(0x1.00000000000000000000p-126:0x00800000) + f32(0x1.00000000000000000000p-25:0x33000000) +res: f32(0x1.00000000000000000000p-25:0x33000000) flags=INEXACT (10/2) +op : f32(0x1.00000000000000000000p-25:0x33000000) * f32(0x1.ffffe600000000000000p-25:0x337ffff3) + f32(0x1.ff801a00000000000000p-15:0x387fc00d) +res: f32(0x1.ff801a00000000000000p-15:0x387fc00d) flags=INEXACT (11/0) +op : f32(0x1.ffffe600000000000000p-25:0x337ffff3) * f32(0x1.ff801a00000000000000p-15:0x387fc00d) + f32(0x1.00000000000000000000p-25:0x33000000) +res: f32(0x1.0007fe00000000000000p-25:0x330003ff) flags=INEXACT (11/1) +op : f32(0x1.ff801a00000000000000p-15:0x387fc00d) * f32(0x1.00000000000000000000p-25:0x33000000) + f32(0x1.ffffe600000000000000p-25:0x337ffff3) +res: f32(0x1.0001f200000000000000p-24:0x338000f9) flags=INEXACT (11/2) +op : f32(0x1.ffffe600000000000000p-25:0x337ffff3) * f32(0x1.ff801a00000000000000p-15:0x387fc00d) + f32(0x1.00000c00000000000000p-14:0x38800006) +res: f32(0x1.00000c00000000000000p-14:0x38800006) flags=INEXACT (12/0) +op : f32(0x1.ff801a00000000000000p-15:0x387fc00d) * f32(0x1.00000c00000000000000p-14:0x38800006) + f32(0x1.ffffe600000000000000p-25:0x337ffff3) +res: f32(0x1.0ffbf400000000000000p-24:0x3387fdfa) flags=INEXACT (12/1) +op : f32(0x1.00000c00000000000000p-14:0x38800006) * f32(0x1.ffffe600000000000000p-25:0x337ffff3) + f32(0x1.ff801a00000000000000p-15:0x387fc00d) +res: f32(0x1.ff801a00000000000000p-15:0x387fc00d) flags=INEXACT (12/2) +op : f32(0x1.ff801a00000000000000p-15:0x387fc00d) * f32(0x1.00000c00000000000000p-14:0x38800006) + f32(0x1.00000000000000000000p+0:0x3f800000) +res: f32(0x1.00000000000000000000p+0:0x3f800000) flags=INEXACT (13/0) +op : f32(0x1.00000c00000000000000p-14:0x38800006) * f32(0x1.00000000000000000000p+0:0x3f800000) + f32(0x1.ff801a00000000000000p-15:0x387fc00d) +res: f32(0x1.ffc01800000000000000p-14:0x38ffe00c) flags=INEXACT (13/1) +op : f32(0x1.00000000000000000000p+0:0x3f800000) * f32(0x1.ff801a00000000000000p-15:0x387fc00d) + f32(0x1.00000c00000000000000p-14:0x38800006) +res: f32(0x1.ffc01800000000000000p-14:0x38ffe00c) flags=INEXACT (13/2) +op : f32(0x1.00000c00000000000000p-14:0x38800006) * f32(0x1.00000000000000000000p+0:0x3f800000) + f32(0x1.00400000000000000000p+0:0x3f802000) +res: f32(0x1.00440000000000000000p+0:0x3f802200) flags=INEXACT (14/0) +op : f32(0x1.00000000000000000000p+0:0x3f800000) * f32(0x1.00400000000000000000p+0:0x3f802000) + f32(0x1.00000c00000000000000p-14:0x38800006) +res: f32(0x1.00440000000000000000p+0:0x3f802200) flags=INEXACT (14/1) +op : f32(0x1.00400000000000000000p+0:0x3f802000) * f32(0x1.00000c00000000000000p-14:0x38800006) + f32(0x1.00000000000000000000p+0:0x3f800000) +res: f32(0x1.00040000000000000000p+0:0x3f800200) flags=INEXACT (14/2) +op : f32(0x1.00000000000000000000p+0:0x3f800000) * f32(0x1.00400000000000000000p+0:0x3f802000) + f32(0x1.00000000000000000000p+1:0x40000000) +res: f32(0x1.80200000000000000000p+1:0x40401000) flags=INEXACT (15/0) +op : f32(0x1.00400000000000000000p+0:0x3f802000) * f32(0x1.00000000000000000000p+1:0x40000000) + f32(0x1.00000000000000000000p+0:0x3f800000) +res: f32(0x1.80400000000000000000p+1:0x40402000) flags=INEXACT (15/1) +op : f32(0x1.00000000000000000000p+1:0x40000000) * f32(0x1.00000000000000000000p+0:0x3f800000) + f32(0x1.00400000000000000000p+0:0x3f802000) +res: f32(0x1.80200000000000000000p+1:0x40401000) flags=INEXACT (15/2) +op : f32(0x1.00400000000000000000p+0:0x3f802000) * f32(0x1.00000000000000000000p+1:0x40000000) + f32(0x1.5bf0a800000000000000p+1:0x402df854) +res: f32(0x1.2e185400000000000000p+2:0x40970c2a) flags=INEXACT (16/0) +op : f32(0x1.00000000000000000000p+1:0x40000000) * f32(0x1.5bf0a800000000000000p+1:0x402df854) + f32(0x1.00400000000000000000p+0:0x3f802000) +res: f32(0x1.9c00a800000000000000p+2:0x40ce0054) flags=INEXACT (16/1) +op : f32(0x1.5bf0a800000000000000p+1:0x402df854) * f32(0x1.00400000000000000000p+0:0x3f802000) + f32(0x1.00000000000000000000p+1:0x40000000) +res: f32(0x1.2e23d200000000000000p+2:0x409711e9) flags=INEXACT (16/2) +op : f32(0x1.00000000000000000000p+1:0x40000000) * f32(0x1.5bf0a800000000000000p+1:0x402df854) + f32(0x1.921fb600000000000000p+1:0x40490fdb) +res: f32(0x1.12804000000000000000p+3:0x41094020) flags=INEXACT (17/0) +op : f32(0x1.5bf0a800000000000000p+1:0x402df854) * f32(0x1.921fb600000000000000p+1:0x40490fdb) + f32(0x1.00000000000000000000p+1:0x40000000) +res: f32(0x1.51458000000000000000p+3:0x4128a2c0) flags=INEXACT (17/1) +op : f32(0x1.921fb600000000000000p+1:0x40490fdb) * f32(0x1.00000000000000000000p+1:0x40000000) + f32(0x1.5bf0a800000000000000p+1:0x402df854) +res: f32(0x1.200c0400000000000000p+3:0x41100602) flags=INEXACT (17/2) +op : f32(0x1.5bf0a800000000000000p+1:0x402df854) * f32(0x1.921fb600000000000000p+1:0x40490fdb) + f32(0x1.ffbe0000000000000000p+15:0x477fdf00) +res: f32(0x1.ffcf1400000000000000p+15:0x477fe78a) flags=INEXACT (18/0) +op : f32(0x1.921fb600000000000000p+1:0x40490fdb) * f32(0x1.ffbe0000000000000000p+15:0x477fdf00) + f32(0x1.5bf0a800000000000000p+1:0x402df854) +res: f32(0x1.91ed3a00000000000000p+17:0x4848f69d) flags=INEXACT (18/1) +op : f32(0x1.ffbe0000000000000000p+15:0x477fdf00) * f32(0x1.5bf0a800000000000000p+1:0x402df854) + f32(0x1.921fb600000000000000p+1:0x40490fdb) +res: f32(0x1.5bc56000000000000000p+17:0x482de2b0) flags=INEXACT (18/2) +op : f32(0x1.921fb600000000000000p+1:0x40490fdb) * f32(0x1.ffbe0000000000000000p+15:0x477fdf00) + f32(0x1.ffc00000000000000000p+15:0x477fe000) +res: f32(0x1.08edee00000000000000p+18:0x488476f7) flags=INEXACT (19/0) +op : f32(0x1.ffbe0000000000000000p+15:0x477fdf00) * f32(0x1.ffc00000000000000000p+15:0x477fe000) + f32(0x1.921fb600000000000000p+1:0x40490fdb) +res: f32(0x1.ff7e0800000000000000p+31:0x4f7fbf04) flags=INEXACT (19/1) +op : f32(0x1.ffc00000000000000000p+15:0x477fe000) * f32(0x1.921fb600000000000000p+1:0x40490fdb) + f32(0x1.ffbe0000000000000000p+15:0x477fdf00) +res: f32(0x1.08ee7800000000000000p+18:0x4884773c) flags=INEXACT (19/2) +op : f32(0x1.ffbe0000000000000000p+15:0x477fdf00) * f32(0x1.ffc00000000000000000p+15:0x477fe000) + f32(0x1.ffc20000000000000000p+15:0x477fe100) +res: f32(0x1.ff800800000000000000p+31:0x4f7fc004) flags=INEXACT (20/0) +op : f32(0x1.ffc00000000000000000p+15:0x477fe000) * f32(0x1.ffc20000000000000000p+15:0x477fe100) + f32(0x1.ffbe0000000000000000p+15:0x477fdf00) +res: f32(0x1.ff840600000000000000p+31:0x4f7fc203) flags=INEXACT (20/1) +op : f32(0x1.ffc20000000000000000p+15:0x477fe100) * f32(0x1.ffbe0000000000000000p+15:0x477fdf00) + f32(0x1.ffc00000000000000000p+15:0x477fe000) +res: f32(0x1.ff820600000000000000p+31:0x4f7fc103) flags=INEXACT (20/2) +op : f32(0x1.ffc00000000000000000p+15:0x477fe000) * f32(0x1.ffc20000000000000000p+15:0x477fe100) + f32(0x1.ffbf0000000000000000p+16:0x47ffdf80) +res: f32(0x1.ff860600000000000000p+31:0x4f7fc303) flags=INEXACT (21/0) +op : f32(0x1.ffc20000000000000000p+15:0x477fe100) * f32(0x1.ffbf0000000000000000p+16:0x47ffdf80) + f32(0x1.ffc00000000000000000p+15:0x477fe000) +res: f32(0x1.ff820600000000000000p+32:0x4fffc103) flags=INEXACT (21/1) +op : f32(0x1.ffbf0000000000000000p+16:0x47ffdf80) * f32(0x1.ffc00000000000000000p+15:0x477fe000) + f32(0x1.ffc20000000000000000p+15:0x477fe100) +res: f32(0x1.ff800800000000000000p+32:0x4fffc004) flags=INEXACT (21/2) +op : f32(0x1.ffc20000000000000000p+15:0x477fe100) * f32(0x1.ffbf0000000000000000p+16:0x47ffdf80) + f32(0x1.ffc00000000000000000p+16:0x47ffe000) +res: f32(0x1.ff830600000000000000p+32:0x4fffc183) flags=INEXACT (22/0) +op : f32(0x1.ffbf0000000000000000p+16:0x47ffdf80) * f32(0x1.ffc00000000000000000p+16:0x47ffe000) + f32(0x1.ffc20000000000000000p+15:0x477fe100) +res: f32(0x1.ff7f8800000000000000p+33:0x507fbfc4) flags=INEXACT (22/1) +op : f32(0x1.ffc00000000000000000p+16:0x47ffe000) * f32(0x1.ffc20000000000000000p+15:0x477fe100) + f32(0x1.ffbf0000000000000000p+16:0x47ffdf80) +res: f32(0x1.ff840600000000000000p+32:0x4fffc203) flags=INEXACT (22/2) +op : f32(0x1.ffbf0000000000000000p+16:0x47ffdf80) * f32(0x1.ffc00000000000000000p+16:0x47ffe000) + f32(0x1.ffc10000000000000000p+16:0x47ffe080) +res: f32(0x1.ff800800000000000000p+33:0x507fc004) flags=INEXACT (23/0) +op : f32(0x1.ffc00000000000000000p+16:0x47ffe000) * f32(0x1.ffc10000000000000000p+16:0x47ffe080) + f32(0x1.ffbf0000000000000000p+16:0x47ffdf80) +res: f32(0x1.ff820600000000000000p+33:0x507fc103) flags=INEXACT (23/1) +op : f32(0x1.ffc10000000000000000p+16:0x47ffe080) * f32(0x1.ffbf0000000000000000p+16:0x47ffdf80) + f32(0x1.ffc00000000000000000p+16:0x47ffe000) +res: f32(0x1.ff810600000000000000p+33:0x507fc083) flags=INEXACT (23/2) +op : f32(0x1.ffc00000000000000000p+16:0x47ffe000) * f32(0x1.ffc10000000000000000p+16:0x47ffe080) + f32(0x1.c0bab600000000000000p+99:0x71605d5b) +res: f32(0x1.c0bab600000000000000p+99:0x71605d5b) flags=INEXACT (24/0) +op : f32(0x1.ffc10000000000000000p+16:0x47ffe080) * f32(0x1.c0bab600000000000000p+99:0x71605d5b) + f32(0x1.ffc00000000000000000p+16:0x47ffe000) +res: f32(0x1.c0837e00000000000000p+116:0x79e041bf) flags=INEXACT (24/1) +op : f32(0x1.c0bab600000000000000p+99:0x71605d5b) * f32(0x1.ffc00000000000000000p+16:0x47ffe000) + f32(0x1.ffc10000000000000000p+16:0x47ffe080) +res: f32(0x1.c0829e00000000000000p+116:0x79e0414f) flags=INEXACT (24/2) +op : f32(0x1.ffc10000000000000000p+16:0x47ffe080) * f32(0x1.c0bab600000000000000p+99:0x71605d5b) + f32(0x1.fffffe00000000000000p+127:0x7f7fffff) +res: f32(0x1.fffffe00000000000000p+127:0x7f7fffff) flags=OVERFLOW INEXACT (25/0) +op : f32(0x1.c0bab600000000000000p+99:0x71605d5b) * f32(0x1.fffffe00000000000000p+127:0x7f7fffff) + f32(0x1.ffc10000000000000000p+16:0x47ffe080) +res: f32(0x1.fffffe00000000000000p+127:0x7f7fffff) flags=OVERFLOW INEXACT (25/1) +op : f32(0x1.fffffe00000000000000p+127:0x7f7fffff) * f32(0x1.ffc10000000000000000p+16:0x47ffe080) + f32(0x1.c0bab600000000000000p+99:0x71605d5b) +res: f32(0x1.fffffe00000000000000p+127:0x7f7fffff) flags=OVERFLOW INEXACT (25/2) +op : f32(0x1.c0bab600000000000000p+99:0x71605d5b) * f32(0x1.fffffe00000000000000p+127:0x7f7fffff) + f32(inf:0x7f800000) +res: f32(inf:0x7f800000) flags=OK (26/0) +op : f32(0x1.fffffe00000000000000p+127:0x7f7fffff) * f32(inf:0x7f800000) + f32(0x1.c0bab600000000000000p+99:0x71605d5b) +res: f32(inf:0x7f800000) flags=OK (26/1) +op : f32(inf:0x7f800000) * f32(0x1.c0bab600000000000000p+99:0x71605d5b) + f32(0x1.fffffe00000000000000p+127:0x7f7fffff) +res: f32(inf:0x7f800000) flags=OK (26/2) +op : f32(0x1.fffffe00000000000000p+127:0x7f7fffff) * f32(inf:0x7f800000) + f32(-nan:0x7fc00000) +res: f32(-nan:0xffffffff) flags=OK (27/0) +op : f32(inf:0x7f800000) * f32(-nan:0x7fc00000) + f32(0x1.fffffe00000000000000p+127:0x7f7fffff) +res: f32(-nan:0xffffffff) flags=OK (27/1) +op : f32(-nan:0x7fc00000) * f32(0x1.fffffe00000000000000p+127:0x7f7fffff) + f32(inf:0x7f800000) +res: f32(-nan:0xffffffff) flags=OK (27/2) +op : f32(inf:0x7f800000) * f32(-nan:0x7fc00000) + f32(-nan:0x7fa00000) +res: f32(-nan:0xffffffff) flags=INVALID (28/0) +op : f32(-nan:0x7fc00000) * f32(-nan:0x7fa00000) + f32(inf:0x7f800000) +res: f32(-nan:0xffffffff) flags=INVALID (28/1) +op : f32(-nan:0x7fa00000) * f32(inf:0x7f800000) + f32(-nan:0x7fc00000) +res: f32(-nan:0xffffffff) flags=INVALID (28/2) +op : f32(-nan:0x7fc00000) * f32(-nan:0x7fa00000) + f32(-nan:0xffa00000) +res: f32(-nan:0xffffffff) flags=INVALID (29/0) +op : f32(-nan:0x7fa00000) * f32(-nan:0xffa00000) + f32(-nan:0x7fc00000) +res: f32(-nan:0xffffffff) flags=INVALID (29/1) +op : f32(-nan:0xffa00000) * f32(-nan:0x7fc00000) + f32(-nan:0x7fa00000) +res: f32(-nan:0xffffffff) flags=INVALID (29/2) +op : f32(-nan:0x7fa00000) * f32(-nan:0xffa00000) + f32(-nan:0xffc00000) +res: f32(-nan:0xffffffff) flags=INVALID (30/0) +op : f32(-nan:0xffa00000) * f32(-nan:0xffc00000) + f32(-nan:0x7fa00000) +res: f32(-nan:0xffffffff) flags=INVALID (30/1) +op : f32(-nan:0xffc00000) * f32(-nan:0x7fa00000) + f32(-nan:0xffa00000) +res: f32(-nan:0xffffffff) flags=INVALID (30/2) +# LP184149 +op : f32(0x0.00000000000000000000p+0:0000000000) * f32(0x1.00000000000000000000p-1:0x3f000000) + f32(0x0.00000000000000000000p+0:0000000000) +res: f32(0x0.00000000000000000000p+0:0000000000) flags=OK (31/0) +op : f32(0x1.00000000000000000000p-149:0x00000001) * f32(0x1.00000000000000000000p-149:0x00000001) + f32(0x1.00000000000000000000p-149:0x00000001) +res: f32(0x1.00000000000000000000p-149:0x00000001) flags=UNDERFLOW INEXACT (32/0) diff --git a/tests/tcg/hexagon/fpstuff.c b/tests/tcg/hexagon/fpstuff.c new file mode 100644 index 000000000..0dff429f4 --- /dev/null +++ b/tests/tcg/hexagon/fpstuff.c @@ -0,0 +1,612 @@ +/* + * Copyright(c) 2020-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * This test checks various FP operations performed on Hexagon + */ + +#include <stdio.h> + +const int FPINVF_BIT = 1; /* Invalid */ +const int FPINVF = 1 << FPINVF_BIT; +const int FPDBZF_BIT = 2; /* Divide by zero */ +const int FPDBZF = 1 << FPDBZF_BIT; +const int FPOVFF_BIT = 3; /* Overflow */ +const int FPOVFF = 1 << FPOVFF_BIT; +const int FPUNFF_BIT = 4; /* Underflow */ +const int FPUNFF = 1 << FPUNFF_BIT; +const int FPINPF_BIT = 5; /* Inexact */ +const int FPINPF = 1 << FPINPF_BIT; + +const int SF_ZERO = 0x00000000; +const int SF_NaN = 0x7fc00000; +const int SF_NaN_special = 0x7f800001; +const int SF_ANY = 0x3f800000; +const int SF_HEX_NAN = 0xffffffff; +const int SF_small_neg = 0xab98fba8; + +const long long DF_NaN = 0x7ff8000000000000ULL; +const long long DF_ANY = 0x3f80000000000000ULL; +const long long DF_HEX_NAN = 0xffffffffffffffffULL; +const long long DF_small_neg = 0xbd731f7500000000ULL; + +int err; + +#define CLEAR_FPSTATUS \ + "r2 = usr\n\t" \ + "r2 = clrbit(r2, #1)\n\t" \ + "r2 = clrbit(r2, #2)\n\t" \ + "r2 = clrbit(r2, #3)\n\t" \ + "r2 = clrbit(r2, #4)\n\t" \ + "r2 = clrbit(r2, #5)\n\t" \ + "usr = r2\n\t" + +static void check_fpstatus_bit(int usr, int expect, int flag, const char *n) +{ + int bit = 1 << flag; + if ((usr & bit) != (expect & bit)) { + printf("ERROR %s: usr = %d, expect = %d\n", n, + (usr >> flag) & 1, (expect >> flag) & 1); + err++; + } +} + +static void check_fpstatus(int usr, int expect) +{ + check_fpstatus_bit(usr, expect, FPINVF_BIT, "Invalid"); + check_fpstatus_bit(usr, expect, FPDBZF_BIT, "Div by zero"); + check_fpstatus_bit(usr, expect, FPOVFF_BIT, "Overflow"); + check_fpstatus_bit(usr, expect, FPUNFF_BIT, "Underflow"); + check_fpstatus_bit(usr, expect, FPINPF_BIT, "Inexact"); +} + +static void check32(int val, int expect) +{ + if (val != expect) { + printf("ERROR: 0x%x != 0x%x\n", val, expect); + err++; + } +} +static void check64(unsigned long long val, unsigned long long expect) +{ + if (val != expect) { + printf("ERROR: 0x%llx != 0x%llx\n", val, expect); + err++; + } +} + +static void check_compare_exception(void) +{ + int cmp; + int usr; + + /* Check that FP compares are quiet (don't raise any execptions) */ + asm (CLEAR_FPSTATUS + "p0 = sfcmp.eq(%2, %3)\n\t" + "%0 = p0\n\t" + "%1 = usr\n\t" + : "=r"(cmp), "=r"(usr) : "r"(SF_NaN), "r"(SF_ANY) + : "r2", "p0", "usr"); + check32(cmp, 0); + check_fpstatus(usr, 0); + + asm (CLEAR_FPSTATUS + "p0 = sfcmp.gt(%2, %3)\n\t" + "%0 = p0\n\t" + "%1 = usr\n\t" + : "=r"(cmp), "=r"(usr) : "r"(SF_NaN), "r"(SF_ANY) + : "r2", "p0", "usr"); + check32(cmp, 0); + check_fpstatus(usr, 0); + + asm (CLEAR_FPSTATUS + "p0 = sfcmp.ge(%2, %3)\n\t" + "%0 = p0\n\t" + "%1 = usr\n\t" + : "=r"(cmp), "=r"(usr) : "r"(SF_NaN), "r"(SF_ANY) + : "r2", "p0", "usr"); + check32(cmp, 0); + check_fpstatus(usr, 0); + + asm (CLEAR_FPSTATUS + "p0 = dfcmp.eq(%2, %3)\n\t" + "%0 = p0\n\t" + "%1 = usr\n\t" + : "=r"(cmp), "=r"(usr) : "r"(DF_NaN), "r"(DF_ANY) + : "r2", "p0", "usr"); + check32(cmp, 0); + check_fpstatus(usr, 0); + + asm (CLEAR_FPSTATUS + "p0 = dfcmp.gt(%2, %3)\n\t" + "%0 = p0\n\t" + "%1 = usr\n\t" + : "=r"(cmp), "=r"(usr) : "r"(DF_NaN), "r"(DF_ANY) + : "r2", "p0", "usr"); + check32(cmp, 0); + check_fpstatus(usr, 0); + + asm (CLEAR_FPSTATUS + "p0 = dfcmp.ge(%2, %3)\n\t" + "%0 = p0\n\t" + "%1 = usr\n\t" + : "=r"(cmp), "=r"(usr) : "r"(DF_NaN), "r"(DF_ANY) + : "r2", "p0", "usr"); + check32(cmp, 0); + check_fpstatus(usr, 0); +} + +static void check_sfminmax(void) +{ + int minmax; + int usr; + + /* + * Execute sfmin/sfmax instructions with one operand as NaN + * Check that + * Result is the other operand + * Invalid bit in USR is not set + */ + asm (CLEAR_FPSTATUS + "%0 = sfmin(%2, %3)\n\t" + "%1 = usr\n\t" + : "=r"(minmax), "=r"(usr) : "r"(SF_NaN), "r"(SF_ANY) + : "r2", "usr"); + check64(minmax, SF_ANY); + check_fpstatus(usr, 0); + + asm (CLEAR_FPSTATUS + "%0 = sfmax(%2, %3)\n\t" + "%1 = usr\n\t" + : "=r"(minmax), "=r"(usr) : "r"(SF_NaN), "r"(SF_ANY) + : "r2", "usr"); + check64(minmax, SF_ANY); + check_fpstatus(usr, 0); + + /* + * Execute sfmin/sfmax instructions with both operands NaN + * Check that + * Result is SF_HEX_NAN + * Invalid bit in USR is set + */ + asm (CLEAR_FPSTATUS + "%0 = sfmin(%2, %3)\n\t" + "%1 = usr\n\t" + : "=r"(minmax), "=r"(usr) : "r"(SF_NaN), "r"(SF_NaN) + : "r2", "usr"); + check64(minmax, SF_HEX_NAN); + check_fpstatus(usr, 0); + + asm (CLEAR_FPSTATUS + "%0 = sfmax(%2, %3)\n\t" + "%1 = usr\n\t" + : "=r"(minmax), "=r"(usr) : "r"(SF_NaN), "r"(SF_NaN) + : "r2", "usr"); + check64(minmax, SF_HEX_NAN); + check_fpstatus(usr, 0); +} + +static void check_dfminmax(void) +{ + unsigned long long minmax; + int usr; + + /* + * Execute dfmin/dfmax instructions with one operand as NaN + * Check that + * Result is the other operand + * Invalid bit in USR is set + */ + asm (CLEAR_FPSTATUS + "%0 = dfmin(%2, %3)\n\t" + "%1 = usr\n\t" + : "=r"(minmax), "=r"(usr) : "r"(DF_NaN), "r"(DF_ANY) + : "r2", "usr"); + check64(minmax, DF_ANY); + check_fpstatus(usr, FPINVF); + + asm (CLEAR_FPSTATUS + "%0 = dfmax(%2, %3)\n\t" + "%1 = usr\n\t" + : "=r"(minmax), "=r"(usr) : "r"(DF_NaN), "r"(DF_ANY) + : "r2", "usr"); + check64(minmax, DF_ANY); + check_fpstatus(usr, FPINVF); + + /* + * Execute dfmin/dfmax instructions with both operands NaN + * Check that + * Result is DF_HEX_NAN + * Invalid bit in USR is set + */ + asm (CLEAR_FPSTATUS + "%0 = dfmin(%2, %3)\n\t" + "%1 = usr\n\t" + : "=r"(minmax), "=r"(usr) : "r"(DF_NaN), "r"(DF_NaN) + : "r2", "usr"); + check64(minmax, DF_HEX_NAN); + check_fpstatus(usr, FPINVF); + + asm (CLEAR_FPSTATUS + "%0 = dfmax(%2, %3)\n\t" + "%1 = usr\n\t" + : "=r"(minmax), "=r"(usr) : "r"(DF_NaN), "r"(DF_NaN) + : "r2", "usr"); + check64(minmax, DF_HEX_NAN); + check_fpstatus(usr, FPINVF); +} + +static void check_recip_exception(void) +{ + int result; + int usr; + + /* + * Check that sfrecipa doesn't set status bits when + * a NaN with bit 22 non-zero is passed + */ + asm (CLEAR_FPSTATUS + "%0,p0 = sfrecipa(%2, %3)\n\t" + "%1 = usr\n\t" + : "=r"(result), "=r"(usr) : "r"(SF_NaN), "r"(SF_ANY) + : "r2", "p0", "usr"); + check32(result, SF_HEX_NAN); + check_fpstatus(usr, 0); + + asm (CLEAR_FPSTATUS + "%0,p0 = sfrecipa(%2, %3)\n\t" + "%1 = usr\n\t" + : "=r"(result), "=r"(usr) : "r"(SF_ANY), "r"(SF_NaN) + : "r2", "p0", "usr"); + check32(result, SF_HEX_NAN); + check_fpstatus(usr, 0); + + asm (CLEAR_FPSTATUS + "%0,p0 = sfrecipa(%2, %2)\n\t" + "%1 = usr\n\t" + : "=r"(result), "=r"(usr) : "r"(SF_NaN) + : "r2", "p0", "usr"); + check32(result, SF_HEX_NAN); + check_fpstatus(usr, 0); + + /* + * Check that sfrecipa doesn't set status bits when + * a NaN with bit 22 zero is passed + */ + asm (CLEAR_FPSTATUS + "%0,p0 = sfrecipa(%2, %3)\n\t" + "%1 = usr\n\t" + : "=r"(result), "=r"(usr) : "r"(SF_NaN_special), "r"(SF_ANY) + : "r2", "p0", "usr"); + check32(result, SF_HEX_NAN); + check_fpstatus(usr, FPINVF); + + asm (CLEAR_FPSTATUS + "%0,p0 = sfrecipa(%2, %3)\n\t" + "%1 = usr\n\t" + : "=r"(result), "=r"(usr) : "r"(SF_ANY), "r"(SF_NaN_special) + : "r2", "p0", "usr"); + check32(result, SF_HEX_NAN); + check_fpstatus(usr, FPINVF); + + asm (CLEAR_FPSTATUS + "%0,p0 = sfrecipa(%2, %2)\n\t" + "%1 = usr\n\t" + : "=r"(result), "=r"(usr) : "r"(SF_NaN_special) + : "r2", "p0", "usr"); + check32(result, SF_HEX_NAN); + check_fpstatus(usr, FPINVF); + + /* + * Check that sfrecipa properly sets divid-by-zero + */ + asm (CLEAR_FPSTATUS + "%0,p0 = sfrecipa(%2, %3)\n\t" + "%1 = usr\n\t" + : "=r"(result), "=r"(usr) : "r"(0x885dc960), "r"(0x80000000) + : "r2", "p0", "usr"); + check32(result, 0x3f800000); + check_fpstatus(usr, FPDBZF); + + asm (CLEAR_FPSTATUS + "%0,p0 = sfrecipa(%2, %3)\n\t" + "%1 = usr\n\t" + : "=r"(result), "=r"(usr) : "r"(0x7f800000), "r"(SF_ZERO) + : "r2", "p0", "usr"); + check32(result, 0x3f800000); + check_fpstatus(usr, 0); +} + +static void check_canonical_NaN(void) +{ + int sf_result; + unsigned long long df_result; + int usr; + + /* Check that each FP instruction properly returns SF_HEX_NAN/DF_HEX_NAN */ + asm(CLEAR_FPSTATUS + "%0 = sfadd(%2, %3)\n\t" + "%1 = usr\n\t" + : "=r"(sf_result), "=r"(usr) : "r"(SF_NaN), "r"(SF_ANY) + : "r2", "usr"); + check32(sf_result, SF_HEX_NAN); + check_fpstatus(usr, 0); + + asm(CLEAR_FPSTATUS + "%0 = sfsub(%2, %3)\n\t" + "%1 = usr\n\t" + : "=r"(sf_result), "=r"(usr) : "r"(SF_NaN), "r"(SF_ANY) + : "r2", "usr"); + check32(sf_result, SF_HEX_NAN); + check_fpstatus(usr, 0); + + asm(CLEAR_FPSTATUS + "%0 = sfmpy(%2, %3)\n\t" + "%1 = usr\n\t" + : "=r"(sf_result), "=r"(usr) : "r"(SF_NaN), "r"(SF_ANY) + : "r2", "usr"); + check32(sf_result, SF_HEX_NAN); + check_fpstatus(usr, 0); + + sf_result = SF_ZERO; + asm(CLEAR_FPSTATUS + "%0 += sfmpy(%2, %3)\n\t" + "%1 = usr\n\t" + : "+r"(sf_result), "=r"(usr) : "r"(SF_NaN), "r"(SF_ANY) + : "r2", "usr"); + check32(sf_result, SF_HEX_NAN); + check_fpstatus(usr, 0); + + sf_result = SF_ZERO; + asm(CLEAR_FPSTATUS + "p0 = !cmp.eq(r0, r0)\n\t" + "%0 += sfmpy(%2, %3, p0):scale\n\t" + "%1 = usr\n\t" + : "+r"(sf_result), "=r"(usr) : "r"(SF_NaN), "r"(SF_ANY) + : "r2", "usr", "p0"); + check32(sf_result, SF_HEX_NAN); + check_fpstatus(usr, 0); + + sf_result = SF_ZERO; + asm(CLEAR_FPSTATUS + "%0 -= sfmpy(%2, %3)\n\t" + "%1 = usr\n\t" + : "+r"(sf_result), "=r"(usr) : "r"(SF_NaN), "r"(SF_ANY) + : "r2", "usr"); + check32(sf_result, SF_HEX_NAN); + check_fpstatus(usr, 0); + + sf_result = SF_ZERO; + asm(CLEAR_FPSTATUS + "%0 += sfmpy(%2, %3):lib\n\t" + "%1 = usr\n\t" + : "+r"(sf_result), "=r"(usr) : "r"(SF_NaN), "r"(SF_ANY) + : "r2", "usr"); + check32(sf_result, SF_HEX_NAN); + check_fpstatus(usr, 0); + + sf_result = SF_ZERO; + asm(CLEAR_FPSTATUS + "%0 -= sfmpy(%2, %3):lib\n\t" + "%1 = usr\n\t" + : "+r"(sf_result), "=r"(usr) : "r"(SF_NaN), "r"(SF_ANY) + : "r2", "usr"); + check32(sf_result, SF_HEX_NAN); + check_fpstatus(usr, 0); + + asm(CLEAR_FPSTATUS + "%0 = convert_df2sf(%2)\n\t" + "%1 = usr\n\t" + : "=r"(sf_result), "=r"(usr) : "r"(DF_NaN) + : "r2", "usr"); + check32(sf_result, SF_HEX_NAN); + check_fpstatus(usr, 0); + + asm(CLEAR_FPSTATUS + "%0 = dfadd(%2, %3)\n\t" + "%1 = usr\n\t" + : "=r"(df_result), "=r"(usr) : "r"(DF_NaN), "r"(DF_ANY) + : "r2", "usr"); + check64(df_result, DF_HEX_NAN); + check_fpstatus(usr, 0); + + asm(CLEAR_FPSTATUS + "%0 = dfsub(%2, %3)\n\t" + "%1 = usr\n\t" + : "=r"(df_result), "=r"(usr) : "r"(DF_NaN), "r"(DF_ANY) + : "r2", "usr"); + check64(df_result, DF_HEX_NAN); + check_fpstatus(usr, 0); + + asm(CLEAR_FPSTATUS + "%0 = convert_sf2df(%2)\n\t" + "%1 = usr\n\t" + : "=r"(df_result), "=r"(usr) : "r"(SF_NaN) + : "r2", "usr"); + check64(df_result, DF_HEX_NAN); + check_fpstatus(usr, 0); +} + +static void check_invsqrta(void) +{ + int result; + int predval; + + asm volatile("%0,p0 = sfinvsqrta(%2)\n\t" + "%1 = p0\n\t" + : "+r"(result), "=r"(predval) + : "r"(0x7f800000) + : "p0"); + check32(result, 0xff800000); + check32(predval, 0x0); +} + +static void check_float2int_convs() +{ + int res32; + long long res64; + int usr; + + /* + * Check that the various forms of float-to-unsigned + * check sign before rounding + */ + asm(CLEAR_FPSTATUS + "%0 = convert_sf2uw(%2)\n\t" + "%1 = usr\n\t" + : "=r"(res32), "=r"(usr) : "r"(SF_small_neg) + : "r2", "usr"); + check32(res32, 0); + check_fpstatus(usr, FPINVF); + + asm(CLEAR_FPSTATUS + "%0 = convert_sf2uw(%2):chop\n\t" + "%1 = usr\n\t" + : "=r"(res32), "=r"(usr) : "r"(SF_small_neg) + : "r2", "usr"); + check32(res32, 0); + check_fpstatus(usr, FPINVF); + + asm(CLEAR_FPSTATUS + "%0 = convert_sf2ud(%2)\n\t" + "%1 = usr\n\t" + : "=r"(res64), "=r"(usr) : "r"(SF_small_neg) + : "r2", "usr"); + check64(res64, 0); + check_fpstatus(usr, FPINVF); + + asm(CLEAR_FPSTATUS + "%0 = convert_sf2ud(%2):chop\n\t" + "%1 = usr\n\t" + : "=r"(res64), "=r"(usr) : "r"(SF_small_neg) + : "r2", "usr"); + check64(res64, 0); + check_fpstatus(usr, FPINVF); + + asm(CLEAR_FPSTATUS + "%0 = convert_df2uw(%2)\n\t" + "%1 = usr\n\t" + : "=r"(res32), "=r"(usr) : "r"(DF_small_neg) + : "r2", "usr"); + check32(res32, 0); + check_fpstatus(usr, FPINVF); + + asm(CLEAR_FPSTATUS + "%0 = convert_df2uw(%2):chop\n\t" + "%1 = usr\n\t" + : "=r"(res32), "=r"(usr) : "r"(DF_small_neg) + : "r2", "usr"); + check32(res32, 0); + check_fpstatus(usr, FPINVF); + + asm(CLEAR_FPSTATUS + "%0 = convert_df2ud(%2)\n\t" + "%1 = usr\n\t" + : "=r"(res64), "=r"(usr) : "r"(DF_small_neg) + : "r2", "usr"); + check64(res64, 0); + check_fpstatus(usr, FPINVF); + + asm(CLEAR_FPSTATUS + "%0 = convert_df2ud(%2):chop\n\t" + "%1 = usr\n\t" + : "=r"(res64), "=r"(usr) : "r"(DF_small_neg) + : "r2", "usr"); + check64(res64, 0); + check_fpstatus(usr, FPINVF); + + /* + * Check that the various forms of float-to-signed return -1 for NaN + */ + asm(CLEAR_FPSTATUS + "%0 = convert_sf2w(%2)\n\t" + "%1 = usr\n\t" + : "=r"(res32), "=r"(usr) : "r"(SF_NaN) + : "r2", "usr"); + check32(res32, -1); + check_fpstatus(usr, FPINVF); + + asm(CLEAR_FPSTATUS + "%0 = convert_sf2w(%2):chop\n\t" + "%1 = usr\n\t" + : "=r"(res32), "=r"(usr) : "r"(SF_NaN) + : "r2", "usr"); + check32(res32, -1); + check_fpstatus(usr, FPINVF); + + asm(CLEAR_FPSTATUS + "%0 = convert_sf2d(%2)\n\t" + "%1 = usr\n\t" + : "=r"(res64), "=r"(usr) : "r"(SF_NaN) + : "r2", "usr"); + check64(res64, -1); + check_fpstatus(usr, FPINVF); + + asm(CLEAR_FPSTATUS + "%0 = convert_sf2d(%2):chop\n\t" + "%1 = usr\n\t" + : "=r"(res64), "=r"(usr) : "r"(SF_NaN) + : "r2", "usr"); + check64(res64, -1); + check_fpstatus(usr, FPINVF); + + asm(CLEAR_FPSTATUS + "%0 = convert_df2w(%2)\n\t" + "%1 = usr\n\t" + : "=r"(res32), "=r"(usr) : "r"(DF_NaN) + : "r2", "usr"); + check32(res32, -1); + check_fpstatus(usr, FPINVF); + + asm(CLEAR_FPSTATUS + "%0 = convert_df2w(%2):chop\n\t" + "%1 = usr\n\t" + : "=r"(res32), "=r"(usr) : "r"(DF_NaN) + : "r2", "usr"); + check32(res32, -1); + check_fpstatus(usr, FPINVF); + + asm(CLEAR_FPSTATUS + "%0 = convert_df2d(%2)\n\t" + "%1 = usr\n\t" + : "=r"(res64), "=r"(usr) : "r"(DF_NaN) + : "r2", "usr"); + check64(res64, -1); + check_fpstatus(usr, FPINVF); + + asm(CLEAR_FPSTATUS + "%0 = convert_df2d(%2):chop\n\t" + "%1 = usr\n\t" + : "=r"(res64), "=r"(usr) : "r"(DF_NaN) + : "r2", "usr"); + check64(res64, -1); + check_fpstatus(usr, FPINVF); +} + +int main() +{ + check_compare_exception(); + check_sfminmax(); + check_dfminmax(); + check_recip_exception(); + check_canonical_NaN(); + check_invsqrta(); + check_float2int_convs(); + + puts(err ? "FAIL" : "PASS"); + return err ? 1 : 0; +} diff --git a/tests/tcg/hexagon/hex_sigsegv.c b/tests/tcg/hexagon/hex_sigsegv.c new file mode 100644 index 000000000..dc2b34925 --- /dev/null +++ b/tests/tcg/hexagon/hex_sigsegv.c @@ -0,0 +1,106 @@ +/* + * Copyright(c) 2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Test the VLIW semantics of two stores in a packet + * + * When a packet has 2 stores, either both commit or neither commit. + * We test this with a packet that does stores to both NULL and a global + * variable, "should_not_change". After the SIGSEGV is caught, we check + * that the "should_not_change" value is the same. + */ + +#include <stdlib.h> +#include <stdio.h> +#include <unistd.h> +#include <sys/types.h> +#include <fcntl.h> +#include <setjmp.h> +#include <signal.h> + +typedef unsigned char uint8_t; + +int err; +int segv_caught; + +#define SHOULD_NOT_CHANGE_VAL 5 +int should_not_change = SHOULD_NOT_CHANGE_VAL; + +#define BUF_SIZE 300 +unsigned char buf[BUF_SIZE]; + + +static void __check(const char *filename, int line, int x, int expect) +{ + if (x != expect) { + printf("ERROR %s:%d - %d != %d\n", + filename, line, x, expect); + err++; + } +} + +#define check(x, expect) __check(__FILE__, __LINE__, (x), (expect)) + +static void __chk_error(const char *filename, int line, int ret) +{ + if (ret < 0) { + printf("ERROR %s:%d - %d\n", filename, line, ret); + err++; + } +} + +#define chk_error(ret) __chk_error(__FILE__, __LINE__, (ret)) + +jmp_buf jmp_env; + +static void sig_segv(int sig, siginfo_t *info, void *puc) +{ + check(sig, SIGSEGV); + segv_caught = 1; + longjmp(jmp_env, 1); +} + +int main() +{ + struct sigaction act; + + /* SIGSEGV test */ + act.sa_sigaction = sig_segv; + sigemptyset(&act.sa_mask); + act.sa_flags = SA_SIGINFO; + chk_error(sigaction(SIGSEGV, &act, NULL)); + if (setjmp(jmp_env) == 0) { + asm volatile("r18 = ##should_not_change\n\t" + "r19 = #0\n\t" + "{\n\t" + " memw(r18) = #7\n\t" + " memw(r19) = #0\n\t" + "}\n\t" + : : : "r18", "r19", "memory"); + } + + act.sa_handler = SIG_DFL; + sigemptyset(&act.sa_mask); + act.sa_flags = 0; + chk_error(sigaction(SIGSEGV, &act, NULL)); + + check(segv_caught, 1); + check(should_not_change, SHOULD_NOT_CHANGE_VAL); + + puts(err ? "FAIL" : "PASS"); + return err ? EXIT_FAILURE : EXIT_SUCCESS; +} diff --git a/tests/tcg/hexagon/hvx_histogram.c b/tests/tcg/hexagon/hvx_histogram.c new file mode 100644 index 000000000..43377a9ab --- /dev/null +++ b/tests/tcg/hexagon/hvx_histogram.c @@ -0,0 +1,88 @@ +/* + * Copyright(c) 2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include "hvx_histogram_row.h" + +const int vector_len = 128; +const int width = 275; +const int height = 20; +const int stride = (width + vector_len - 1) & -vector_len; + +int err; + +static uint8_t input[height][stride] __attribute__((aligned(128))) = { +#include "hvx_histogram_input.h" +}; + +static int result[256] __attribute__((aligned(128))); +static int expect[256] __attribute__((aligned(128))); + +static void check(void) +{ + for (int i = 0; i < 256; i++) { + int res = result[i]; + int exp = expect[i]; + if (res != exp) { + printf("ERROR at %3d: 0x%04x != 0x%04x\n", + i, res, exp); + err++; + } + } +} + +static void ref_histogram(uint8_t *src, int stride, int width, int height, + int *hist) +{ + for (int i = 0; i < 256; i++) { + hist[i] = 0; + } + + for (int i = 0; i < height; i++) { + for (int j = 0; j < width; j++) { + hist[src[i * stride + j]]++; + } + } +} + +static void hvx_histogram(uint8_t *src, int stride, int width, int height, + int *hist) +{ + int n = 8192 / width; + + for (int i = 0; i < 256; i++) { + hist[i] = 0; + } + + for (int i = 0; i < height; i += n) { + int k = height - i > n ? n : height - i; + hvx_histogram_row(src, stride, width, k, hist); + src += n * stride; + } +} + +int main() +{ + ref_histogram(&input[0][0], stride, width, height, expect); + hvx_histogram(&input[0][0], stride, width, height, result); + check(); + + puts(err ? "FAIL" : "PASS"); + return err ? 1 : 0; +} diff --git a/tests/tcg/hexagon/hvx_histogram_input.h b/tests/tcg/hexagon/hvx_histogram_input.h new file mode 100644 index 000000000..2f9109255 --- /dev/null +++ b/tests/tcg/hexagon/hvx_histogram_input.h @@ -0,0 +1,717 @@ +/* + * Copyright(c) 2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + + { 0x26, 0x32, 0x2e, 0x2e, 0x2d, 0x2c, 0x2d, 0x2d, + 0x2c, 0x2e, 0x31, 0x33, 0x36, 0x39, 0x3b, 0x3f, + 0x42, 0x46, 0x4a, 0x4c, 0x51, 0x53, 0x53, 0x54, + 0x56, 0x57, 0x58, 0x57, 0x56, 0x52, 0x51, 0x4f, + 0x4c, 0x49, 0x47, 0x42, 0x3e, 0x3b, 0x38, 0x35, + 0x33, 0x30, 0x2e, 0x2c, 0x2b, 0x2a, 0x2a, 0x28, + 0x28, 0x27, 0x27, 0x28, 0x29, 0x2a, 0x2c, 0x2e, + 0x2f, 0x33, 0x36, 0x38, 0x3c, 0x3d, 0x40, 0x42, + 0x43, 0x42, 0x43, 0x44, 0x43, 0x41, 0x40, 0x3b, + 0x3b, 0x3a, 0x38, 0x35, 0x32, 0x2f, 0x2c, 0x29, + 0x27, 0x26, 0x23, 0x21, 0x1e, 0x1c, 0x1a, 0x19, + 0x17, 0x15, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, + 0x0f, 0x0e, 0x0f, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, + 0x0c, 0x0d, 0x0e, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, + 0x0c, 0x0c, 0x0d, 0x0c, 0x0f, 0x0e, 0x0f, 0x0f, + 0x0f, 0x10, 0x11, 0x12, 0x14, 0x16, 0x17, 0x19, + 0x1c, 0x1d, 0x21, 0x25, 0x27, 0x29, 0x2b, 0x2f, + 0x31, 0x33, 0x36, 0x38, 0x39, 0x3a, 0x3b, 0x3c, + 0x3c, 0x3d, 0x3e, 0x3e, 0x3c, 0x3b, 0x3a, 0x39, + 0x39, 0x3a, 0x3a, 0x3a, 0x3a, 0x3c, 0x3e, 0x43, + 0x47, 0x4a, 0x4d, 0x51, 0x51, 0x54, 0x56, 0x56, + 0x57, 0x56, 0x53, 0x4f, 0x4b, 0x47, 0x43, 0x41, + 0x3e, 0x3c, 0x3a, 0x37, 0x36, 0x33, 0x32, 0x34, + 0x34, 0x34, 0x34, 0x35, 0x36, 0x39, 0x3d, 0x3d, + 0x3f, 0x40, 0x40, 0x40, 0x40, 0x3e, 0x40, 0x40, + 0x42, 0x44, 0x47, 0x48, 0x4b, 0x4e, 0x56, 0x5c, + 0x62, 0x68, 0x6f, 0x73, 0x76, 0x79, 0x7a, 0x7c, + 0x7e, 0x7c, 0x78, 0x72, 0x6e, 0x69, 0x65, 0x60, + 0x5b, 0x56, 0x52, 0x4d, 0x4a, 0x48, 0x47, 0x46, + 0x44, 0x43, 0x42, 0x41, 0x41, 0x41, 0x40, 0x40, + 0x3f, 0x3e, 0x3d, 0x3c, 0x3b, 0x3b, 0x38, 0x37, + 0x36, 0x35, 0x36, 0x35, 0x36, 0x37, 0x38, 0x3c, + 0x3d, 0x3f, 0x42, 0x44, 0x46, 0x48, 0x4b, 0x4c, + 0x4e, 0x4e, 0x4d, 0x4c, 0x4a, 0x48, 0x49, 0x49, + 0x4b, 0x4d, 0x4e, }, + { 0x23, 0x2d, 0x29, 0x29, 0x28, 0x28, 0x29, 0x29, + 0x28, 0x2b, 0x2d, 0x2f, 0x32, 0x34, 0x36, 0x3a, + 0x3d, 0x41, 0x44, 0x47, 0x4a, 0x4c, 0x4e, 0x4e, + 0x50, 0x51, 0x51, 0x51, 0x4f, 0x4c, 0x4b, 0x48, + 0x46, 0x44, 0x40, 0x3d, 0x39, 0x36, 0x34, 0x30, + 0x2f, 0x2d, 0x2a, 0x29, 0x28, 0x27, 0x26, 0x25, + 0x25, 0x24, 0x24, 0x24, 0x26, 0x28, 0x28, 0x2a, + 0x2b, 0x2e, 0x32, 0x34, 0x37, 0x39, 0x3b, 0x3c, + 0x3d, 0x3d, 0x3e, 0x3e, 0x3e, 0x3c, 0x3b, 0x38, + 0x37, 0x35, 0x33, 0x30, 0x2e, 0x2b, 0x27, 0x25, + 0x24, 0x21, 0x20, 0x1d, 0x1b, 0x1a, 0x18, 0x16, + 0x15, 0x14, 0x13, 0x12, 0x10, 0x11, 0x10, 0x0e, + 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0c, 0x0c, 0x0b, + 0x0b, 0x0b, 0x0c, 0x0b, 0x0b, 0x09, 0x0a, 0x0b, + 0x0b, 0x0a, 0x0a, 0x0c, 0x0c, 0x0c, 0x0d, 0x0e, + 0x0e, 0x0f, 0x0f, 0x11, 0x12, 0x15, 0x15, 0x17, + 0x1a, 0x1c, 0x1f, 0x22, 0x25, 0x26, 0x29, 0x2a, + 0x2d, 0x30, 0x33, 0x34, 0x35, 0x35, 0x37, 0x37, + 0x39, 0x3a, 0x39, 0x38, 0x37, 0x36, 0x36, 0x37, + 0x35, 0x36, 0x35, 0x35, 0x36, 0x37, 0x3a, 0x3e, + 0x40, 0x43, 0x48, 0x49, 0x4b, 0x4c, 0x4d, 0x4e, + 0x4f, 0x4f, 0x4c, 0x48, 0x45, 0x41, 0x3e, 0x3b, + 0x3a, 0x37, 0x36, 0x33, 0x32, 0x31, 0x30, 0x31, + 0x32, 0x31, 0x31, 0x31, 0x31, 0x34, 0x37, 0x38, + 0x3a, 0x3b, 0x3b, 0x3b, 0x3c, 0x3b, 0x3d, 0x3e, + 0x3f, 0x40, 0x43, 0x44, 0x47, 0x4b, 0x4f, 0x56, + 0x5a, 0x60, 0x66, 0x69, 0x6a, 0x6e, 0x71, 0x72, + 0x73, 0x72, 0x6d, 0x69, 0x66, 0x60, 0x5c, 0x59, + 0x54, 0x50, 0x4d, 0x48, 0x46, 0x44, 0x44, 0x43, + 0x42, 0x41, 0x41, 0x40, 0x3f, 0x3f, 0x3e, 0x3d, + 0x3d, 0x3d, 0x3c, 0x3a, 0x39, 0x38, 0x35, 0x35, + 0x34, 0x34, 0x35, 0x34, 0x35, 0x36, 0x39, 0x3c, + 0x3d, 0x3e, 0x41, 0x43, 0x44, 0x46, 0x48, 0x49, + 0x4a, 0x49, 0x48, 0x47, 0x45, 0x43, 0x43, 0x44, + 0x45, 0x47, 0x48, }, + { 0x23, 0x2d, 0x2a, 0x2a, 0x29, 0x29, 0x2a, 0x2a, + 0x29, 0x2c, 0x2d, 0x2f, 0x32, 0x34, 0x36, 0x3a, + 0x3d, 0x40, 0x44, 0x48, 0x4a, 0x4c, 0x4e, 0x4e, + 0x50, 0x51, 0x51, 0x51, 0x4f, 0x4c, 0x4b, 0x48, + 0x46, 0x44, 0x40, 0x3d, 0x39, 0x36, 0x34, 0x30, + 0x2f, 0x2d, 0x2a, 0x29, 0x28, 0x27, 0x26, 0x25, + 0x25, 0x24, 0x24, 0x25, 0x26, 0x28, 0x29, 0x2a, + 0x2b, 0x2e, 0x31, 0x34, 0x37, 0x39, 0x3b, 0x3c, + 0x3d, 0x3e, 0x3e, 0x3d, 0x3e, 0x3c, 0x3c, 0x3a, + 0x37, 0x35, 0x33, 0x30, 0x2f, 0x2b, 0x28, 0x26, + 0x24, 0x21, 0x20, 0x1e, 0x1c, 0x1b, 0x18, 0x17, + 0x16, 0x14, 0x13, 0x12, 0x10, 0x10, 0x0f, 0x0e, + 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0c, + 0x0b, 0x0b, 0x0c, 0x0c, 0x0c, 0x0b, 0x0b, 0x0c, + 0x0c, 0x0b, 0x0b, 0x0c, 0x0d, 0x0c, 0x0e, 0x0e, + 0x0e, 0x0f, 0x11, 0x11, 0x13, 0x14, 0x16, 0x18, + 0x1a, 0x1d, 0x1f, 0x22, 0x25, 0x26, 0x29, 0x2b, + 0x2d, 0x31, 0x33, 0x34, 0x36, 0x37, 0x38, 0x38, + 0x39, 0x3a, 0x39, 0x38, 0x37, 0x36, 0x37, 0x37, + 0x35, 0x36, 0x35, 0x36, 0x35, 0x38, 0x3a, 0x3e, + 0x40, 0x41, 0x45, 0x47, 0x49, 0x4a, 0x4c, 0x4d, + 0x4e, 0x4d, 0x4a, 0x47, 0x44, 0x40, 0x3d, 0x3b, + 0x39, 0x37, 0x34, 0x34, 0x32, 0x31, 0x31, 0x33, + 0x32, 0x31, 0x32, 0x33, 0x32, 0x36, 0x38, 0x39, + 0x3b, 0x3c, 0x3c, 0x3c, 0x3d, 0x3d, 0x3e, 0x3e, + 0x41, 0x42, 0x43, 0x45, 0x48, 0x4c, 0x50, 0x56, + 0x5b, 0x5f, 0x62, 0x67, 0x69, 0x6c, 0x6e, 0x6e, + 0x70, 0x6f, 0x6b, 0x67, 0x63, 0x5e, 0x5b, 0x58, + 0x54, 0x51, 0x4e, 0x4a, 0x48, 0x46, 0x46, 0x46, + 0x45, 0x46, 0x44, 0x43, 0x44, 0x43, 0x42, 0x42, + 0x41, 0x40, 0x3f, 0x3e, 0x3c, 0x3b, 0x3a, 0x39, + 0x39, 0x39, 0x38, 0x37, 0x37, 0x3a, 0x3e, 0x40, + 0x42, 0x43, 0x47, 0x47, 0x48, 0x4a, 0x4b, 0x4c, + 0x4c, 0x4b, 0x4a, 0x48, 0x46, 0x44, 0x43, 0x45, + 0x45, 0x46, 0x47, }, + { 0x21, 0x2b, 0x28, 0x28, 0x28, 0x28, 0x29, 0x29, + 0x28, 0x2a, 0x2d, 0x30, 0x32, 0x34, 0x37, 0x3a, + 0x3c, 0x40, 0x44, 0x48, 0x4a, 0x4c, 0x4e, 0x4e, + 0x50, 0x51, 0x52, 0x51, 0x4f, 0x4b, 0x4b, 0x48, + 0x45, 0x43, 0x3f, 0x3c, 0x39, 0x36, 0x33, 0x30, + 0x2f, 0x2d, 0x2b, 0x2a, 0x28, 0x27, 0x26, 0x25, + 0x24, 0x24, 0x24, 0x25, 0x27, 0x27, 0x29, 0x2a, + 0x2c, 0x2d, 0x31, 0x34, 0x37, 0x39, 0x3b, 0x3c, + 0x3d, 0x3e, 0x3e, 0x3e, 0x3e, 0x3d, 0x3c, 0x3a, + 0x37, 0x35, 0x33, 0x30, 0x2f, 0x2b, 0x28, 0x26, + 0x25, 0x21, 0x20, 0x1e, 0x1c, 0x19, 0x19, 0x18, + 0x17, 0x15, 0x15, 0x12, 0x11, 0x11, 0x11, 0x0f, + 0x0e, 0x0e, 0x0e, 0x0e, 0x0d, 0x0d, 0x0d, 0x0c, + 0x0c, 0x0c, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, + 0x0c, 0x0c, 0x0c, 0x0c, 0x0e, 0x0e, 0x0f, 0x0f, + 0x0f, 0x10, 0x11, 0x13, 0x13, 0x15, 0x16, 0x18, + 0x1a, 0x1c, 0x1f, 0x22, 0x25, 0x28, 0x29, 0x2d, + 0x2f, 0x32, 0x34, 0x35, 0x36, 0x37, 0x38, 0x38, + 0x39, 0x3a, 0x39, 0x39, 0x37, 0x36, 0x37, 0x36, + 0x35, 0x35, 0x37, 0x35, 0x36, 0x37, 0x3a, 0x3d, + 0x3e, 0x41, 0x43, 0x46, 0x46, 0x47, 0x48, 0x49, + 0x4a, 0x49, 0x47, 0x45, 0x42, 0x3f, 0x3d, 0x3b, + 0x3a, 0x38, 0x36, 0x34, 0x32, 0x32, 0x32, 0x32, + 0x32, 0x31, 0x33, 0x32, 0x34, 0x37, 0x38, 0x38, + 0x3a, 0x3b, 0x3d, 0x3d, 0x3d, 0x3e, 0x3f, 0x41, + 0x42, 0x44, 0x44, 0x46, 0x49, 0x4d, 0x50, 0x54, + 0x58, 0x5c, 0x61, 0x63, 0x65, 0x69, 0x6a, 0x6c, + 0x6d, 0x6c, 0x68, 0x64, 0x61, 0x5c, 0x59, 0x57, + 0x53, 0x51, 0x4f, 0x4c, 0x4a, 0x48, 0x48, 0x49, + 0x49, 0x48, 0x48, 0x48, 0x47, 0x47, 0x46, 0x46, + 0x45, 0x44, 0x42, 0x41, 0x3f, 0x3e, 0x3c, 0x3c, + 0x3c, 0x3d, 0x3c, 0x3c, 0x3c, 0x3e, 0x41, 0x43, + 0x46, 0x48, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4e, + 0x4e, 0x4d, 0x4b, 0x49, 0x47, 0x44, 0x44, 0x45, + 0x45, 0x45, 0x46, }, + { 0x22, 0x2b, 0x27, 0x27, 0x27, 0x27, 0x28, 0x28, + 0x28, 0x2a, 0x2c, 0x2f, 0x30, 0x34, 0x37, 0x3b, + 0x3d, 0x41, 0x45, 0x48, 0x4a, 0x4c, 0x4e, 0x4e, + 0x50, 0x51, 0x52, 0x51, 0x4f, 0x4b, 0x4b, 0x47, + 0x45, 0x43, 0x3f, 0x3c, 0x39, 0x36, 0x33, 0x30, + 0x2f, 0x2d, 0x2b, 0x2a, 0x27, 0x26, 0x25, 0x24, + 0x23, 0x24, 0x24, 0x25, 0x27, 0x27, 0x29, 0x2a, + 0x2c, 0x2e, 0x31, 0x34, 0x37, 0x39, 0x3a, 0x3b, + 0x3d, 0x3e, 0x3e, 0x3f, 0x3f, 0x3d, 0x3c, 0x3a, + 0x38, 0x36, 0x34, 0x31, 0x2e, 0x2c, 0x29, 0x26, + 0x25, 0x22, 0x20, 0x1e, 0x1c, 0x1a, 0x19, 0x18, + 0x16, 0x15, 0x14, 0x12, 0x10, 0x11, 0x11, 0x0f, + 0x0e, 0x0e, 0x0e, 0x0e, 0x0d, 0x0c, 0x0d, 0x0c, + 0x0c, 0x0c, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, + 0x0c, 0x0c, 0x0c, 0x0d, 0x0d, 0x0e, 0x0f, 0x0f, + 0x0f, 0x10, 0x11, 0x13, 0x13, 0x15, 0x15, 0x18, + 0x19, 0x1d, 0x1f, 0x21, 0x24, 0x27, 0x2a, 0x2c, + 0x30, 0x33, 0x35, 0x36, 0x37, 0x38, 0x39, 0x39, + 0x3a, 0x3a, 0x39, 0x39, 0x37, 0x36, 0x37, 0x36, + 0x36, 0x36, 0x36, 0x36, 0x36, 0x37, 0x39, 0x3a, + 0x3d, 0x3e, 0x41, 0x43, 0x43, 0x45, 0x46, 0x46, + 0x47, 0x46, 0x44, 0x42, 0x40, 0x3d, 0x3a, 0x39, + 0x37, 0x36, 0x35, 0x34, 0x33, 0x32, 0x32, 0x32, + 0x32, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, + 0x39, 0x3c, 0x3c, 0x3e, 0x3e, 0x3e, 0x41, 0x43, + 0x44, 0x45, 0x46, 0x48, 0x49, 0x4c, 0x51, 0x54, + 0x56, 0x5a, 0x5f, 0x61, 0x63, 0x65, 0x67, 0x69, + 0x6a, 0x69, 0x67, 0x61, 0x5f, 0x5b, 0x58, 0x56, + 0x54, 0x51, 0x50, 0x4e, 0x4c, 0x4a, 0x4b, 0x4c, + 0x4c, 0x4b, 0x4b, 0x4b, 0x4b, 0x49, 0x4a, 0x49, + 0x49, 0x48, 0x46, 0x44, 0x42, 0x41, 0x40, 0x3f, + 0x3f, 0x40, 0x40, 0x40, 0x40, 0x42, 0x46, 0x49, + 0x4b, 0x4c, 0x4f, 0x4f, 0x50, 0x52, 0x51, 0x51, + 0x50, 0x4f, 0x4c, 0x4a, 0x48, 0x46, 0x45, 0x44, + 0x44, 0x45, 0x46, }, + { 0x21, 0x2a, 0x27, 0x27, 0x27, 0x27, 0x27, 0x27, + 0x27, 0x29, 0x2d, 0x2f, 0x31, 0x34, 0x37, 0x3b, + 0x3e, 0x41, 0x45, 0x48, 0x4a, 0x4c, 0x4e, 0x4e, + 0x50, 0x51, 0x52, 0x51, 0x4f, 0x4b, 0x4b, 0x48, + 0x45, 0x43, 0x3f, 0x3c, 0x39, 0x36, 0x33, 0x2f, + 0x2f, 0x2d, 0x2a, 0x2a, 0x27, 0x26, 0x25, 0x24, + 0x22, 0x24, 0x24, 0x25, 0x27, 0x27, 0x29, 0x2a, + 0x2c, 0x2f, 0x31, 0x34, 0x37, 0x39, 0x3a, 0x3c, + 0x3d, 0x3e, 0x3f, 0x40, 0x3f, 0x3d, 0x3d, 0x3a, + 0x38, 0x36, 0x34, 0x31, 0x2e, 0x2c, 0x29, 0x26, + 0x25, 0x22, 0x21, 0x1f, 0x1d, 0x1b, 0x19, 0x18, + 0x16, 0x14, 0x14, 0x13, 0x11, 0x11, 0x11, 0x0f, + 0x0f, 0x0f, 0x0e, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, + 0x0d, 0x0d, 0x0c, 0x0b, 0x0b, 0x0b, 0x0b, 0x0c, + 0x0c, 0x0d, 0x0d, 0x0d, 0x0e, 0x0e, 0x0f, 0x0f, + 0x0f, 0x10, 0x13, 0x13, 0x14, 0x15, 0x17, 0x19, + 0x1a, 0x1d, 0x1f, 0x22, 0x25, 0x27, 0x2a, 0x2e, + 0x31, 0x33, 0x35, 0x38, 0x39, 0x3a, 0x3b, 0x3b, + 0x3c, 0x3c, 0x3b, 0x3a, 0x39, 0x38, 0x38, 0x37, + 0x36, 0x36, 0x37, 0x36, 0x37, 0x38, 0x38, 0x3a, + 0x3b, 0x3e, 0x40, 0x40, 0x41, 0x42, 0x43, 0x42, + 0x43, 0x42, 0x40, 0x40, 0x3f, 0x3c, 0x3b, 0x39, + 0x38, 0x37, 0x36, 0x35, 0x34, 0x33, 0x32, 0x33, + 0x32, 0x32, 0x34, 0x35, 0x35, 0x36, 0x39, 0x39, + 0x3a, 0x3c, 0x3c, 0x3f, 0x40, 0x41, 0x43, 0x45, + 0x45, 0x47, 0x48, 0x4a, 0x4b, 0x4d, 0x50, 0x53, + 0x56, 0x59, 0x5c, 0x5f, 0x60, 0x65, 0x64, 0x66, + 0x68, 0x66, 0x64, 0x61, 0x5e, 0x5a, 0x59, 0x56, + 0x54, 0x52, 0x51, 0x50, 0x4e, 0x4c, 0x4d, 0x4f, + 0x4f, 0x4f, 0x50, 0x50, 0x4f, 0x4f, 0x4e, 0x4d, + 0x4c, 0x4b, 0x49, 0x47, 0x45, 0x44, 0x43, 0x43, + 0x42, 0x43, 0x44, 0x44, 0x46, 0x47, 0x49, 0x4d, + 0x4f, 0x51, 0x53, 0x54, 0x53, 0x54, 0x54, 0x53, + 0x53, 0x51, 0x4e, 0x4b, 0x4a, 0x47, 0x45, 0x44, + 0x44, 0x45, 0x46, }, + { 0x20, 0x28, 0x26, 0x26, 0x25, 0x24, 0x27, 0x27, + 0x27, 0x29, 0x2c, 0x2e, 0x31, 0x34, 0x37, 0x3b, + 0x3e, 0x41, 0x45, 0x48, 0x4a, 0x4c, 0x4e, 0x4e, + 0x50, 0x51, 0x52, 0x51, 0x4f, 0x4b, 0x4a, 0x49, + 0x45, 0x43, 0x3f, 0x3c, 0x3a, 0x36, 0x33, 0x30, + 0x2f, 0x2d, 0x2a, 0x28, 0x27, 0x26, 0x25, 0x24, + 0x23, 0x24, 0x24, 0x25, 0x27, 0x27, 0x29, 0x2a, + 0x2c, 0x2e, 0x31, 0x34, 0x37, 0x39, 0x3b, 0x3c, + 0x3d, 0x3e, 0x3f, 0x40, 0x3e, 0x3d, 0x3d, 0x3a, + 0x38, 0x36, 0x34, 0x31, 0x2f, 0x2c, 0x29, 0x27, + 0x25, 0x21, 0x21, 0x1f, 0x1c, 0x1d, 0x19, 0x18, + 0x16, 0x15, 0x15, 0x13, 0x12, 0x11, 0x11, 0x0f, + 0x0f, 0x0e, 0x0f, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, + 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, + 0x0d, 0x0d, 0x0d, 0x0e, 0x0e, 0x0e, 0x0f, 0x10, + 0x10, 0x10, 0x12, 0x13, 0x15, 0x16, 0x18, 0x1a, + 0x1c, 0x1d, 0x20, 0x22, 0x25, 0x27, 0x2a, 0x2e, + 0x30, 0x34, 0x38, 0x39, 0x3a, 0x3b, 0x3b, 0x3b, + 0x3c, 0x3d, 0x3c, 0x3b, 0x3a, 0x39, 0x38, 0x37, + 0x36, 0x36, 0x38, 0x37, 0x37, 0x37, 0x38, 0x3a, + 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x40, 0x40, + 0x42, 0x40, 0x3f, 0x3e, 0x3d, 0x3b, 0x3a, 0x39, + 0x37, 0x36, 0x36, 0x35, 0x34, 0x34, 0x33, 0x33, + 0x33, 0x34, 0x35, 0x35, 0x35, 0x36, 0x38, 0x39, + 0x3a, 0x3b, 0x3d, 0x3f, 0x42, 0x43, 0x45, 0x45, + 0x46, 0x48, 0x49, 0x4b, 0x4b, 0x4d, 0x50, 0x53, + 0x56, 0x57, 0x5a, 0x5c, 0x5e, 0x61, 0x63, 0x65, + 0x66, 0x64, 0x62, 0x5f, 0x5c, 0x59, 0x58, 0x56, + 0x55, 0x54, 0x52, 0x51, 0x50, 0x51, 0x51, 0x52, + 0x52, 0x52, 0x52, 0x52, 0x51, 0x51, 0x51, 0x50, + 0x4f, 0x4e, 0x4c, 0x4a, 0x47, 0x46, 0x45, 0x45, + 0x45, 0x46, 0x46, 0x46, 0x4a, 0x4c, 0x4d, 0x52, + 0x54, 0x56, 0x58, 0x58, 0x56, 0x57, 0x57, 0x56, + 0x55, 0x53, 0x50, 0x4d, 0x49, 0x45, 0x44, 0x44, + 0x43, 0x44, 0x45, }, + { 0x1f, 0x27, 0x24, 0x23, 0x25, 0x24, 0x25, 0x26, + 0x26, 0x28, 0x2b, 0x2e, 0x31, 0x34, 0x37, 0x3a, + 0x3d, 0x41, 0x45, 0x48, 0x4b, 0x4d, 0x4f, 0x4e, + 0x50, 0x51, 0x52, 0x50, 0x4f, 0x4b, 0x4a, 0x49, + 0x45, 0x43, 0x3f, 0x3c, 0x3a, 0x36, 0x33, 0x30, + 0x2f, 0x2d, 0x29, 0x28, 0x27, 0x26, 0x25, 0x24, + 0x23, 0x25, 0x24, 0x25, 0x27, 0x27, 0x29, 0x2a, + 0x2c, 0x2f, 0x32, 0x34, 0x37, 0x39, 0x3b, 0x3c, + 0x3e, 0x3f, 0x3f, 0x40, 0x3e, 0x3d, 0x3c, 0x3a, + 0x38, 0x36, 0x34, 0x31, 0x30, 0x2c, 0x29, 0x28, + 0x25, 0x23, 0x22, 0x1f, 0x1c, 0x1c, 0x18, 0x18, + 0x16, 0x14, 0x14, 0x13, 0x11, 0x11, 0x11, 0x0f, + 0x0f, 0x0e, 0x0f, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, + 0x0c, 0x0c, 0x0b, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, + 0x0d, 0x0e, 0x0e, 0x0f, 0x0d, 0x0f, 0x10, 0x10, + 0x10, 0x11, 0x13, 0x14, 0x15, 0x16, 0x19, 0x1a, + 0x1c, 0x1f, 0x20, 0x23, 0x26, 0x28, 0x2a, 0x2e, + 0x31, 0x35, 0x38, 0x39, 0x3a, 0x3c, 0x3d, 0x3d, + 0x3e, 0x3e, 0x3d, 0x3c, 0x3a, 0x3a, 0x39, 0x39, + 0x38, 0x37, 0x38, 0x38, 0x37, 0x38, 0x39, 0x3a, + 0x3c, 0x3c, 0x3d, 0x3e, 0x3f, 0x3f, 0x40, 0x3f, + 0x41, 0x40, 0x3e, 0x3e, 0x3d, 0x3b, 0x3b, 0x39, + 0x37, 0x37, 0x35, 0x36, 0x34, 0x34, 0x34, 0x35, + 0x35, 0x34, 0x34, 0x35, 0x35, 0x37, 0x38, 0x39, + 0x3a, 0x3c, 0x3f, 0x3f, 0x43, 0x43, 0x45, 0x47, + 0x48, 0x48, 0x4a, 0x4b, 0x4e, 0x4d, 0x51, 0x53, + 0x56, 0x58, 0x59, 0x5b, 0x5d, 0x60, 0x62, 0x63, + 0x64, 0x63, 0x61, 0x5e, 0x5c, 0x5a, 0x57, 0x56, + 0x55, 0x54, 0x53, 0x52, 0x51, 0x51, 0x52, 0x52, + 0x54, 0x54, 0x55, 0x55, 0x55, 0x54, 0x54, 0x53, + 0x52, 0x50, 0x4e, 0x4d, 0x4b, 0x4a, 0x48, 0x48, + 0x48, 0x48, 0x4a, 0x4b, 0x4d, 0x4f, 0x52, 0x55, + 0x58, 0x5a, 0x5b, 0x5b, 0x5b, 0x5b, 0x5a, 0x59, + 0x58, 0x55, 0x51, 0x4e, 0x4a, 0x46, 0x45, 0x44, + 0x44, 0x44, 0x44, }, + { 0x1e, 0x26, 0x23, 0x23, 0x25, 0x24, 0x25, 0x26, + 0x26, 0x28, 0x2b, 0x2e, 0x31, 0x34, 0x37, 0x3a, + 0x3e, 0x42, 0x45, 0x48, 0x4b, 0x4d, 0x4f, 0x4f, + 0x50, 0x51, 0x52, 0x50, 0x4f, 0x4b, 0x4a, 0x48, + 0x46, 0x44, 0x3f, 0x3b, 0x39, 0x36, 0x33, 0x30, + 0x2f, 0x2d, 0x2a, 0x28, 0x27, 0x26, 0x25, 0x24, + 0x23, 0x24, 0x24, 0x25, 0x27, 0x27, 0x29, 0x2a, + 0x2c, 0x2f, 0x32, 0x34, 0x37, 0x39, 0x3b, 0x3d, + 0x3e, 0x3f, 0x41, 0x41, 0x40, 0x3e, 0x3d, 0x3b, + 0x38, 0x37, 0x34, 0x32, 0x30, 0x2c, 0x2a, 0x27, + 0x26, 0x23, 0x22, 0x20, 0x1d, 0x1b, 0x1a, 0x19, + 0x17, 0x15, 0x15, 0x13, 0x12, 0x12, 0x11, 0x0f, + 0x11, 0x0f, 0x0e, 0x0e, 0x0d, 0x0d, 0x0d, 0x0c, + 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, + 0x0e, 0x0e, 0x0e, 0x0f, 0x10, 0x10, 0x11, 0x11, + 0x11, 0x13, 0x16, 0x15, 0x15, 0x18, 0x1a, 0x1b, + 0x1d, 0x20, 0x22, 0x24, 0x27, 0x29, 0x2c, 0x30, + 0x33, 0x37, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3e, + 0x40, 0x40, 0x40, 0x3f, 0x3e, 0x3d, 0x3c, 0x3a, + 0x3a, 0x3a, 0x3a, 0x3a, 0x3a, 0x3a, 0x3b, 0x3d, + 0x3d, 0x3f, 0x40, 0x40, 0x3f, 0x41, 0x41, 0x41, + 0x41, 0x41, 0x40, 0x40, 0x3f, 0x3e, 0x3c, 0x3b, + 0x3a, 0x39, 0x37, 0x36, 0x36, 0x35, 0x35, 0x36, + 0x36, 0x35, 0x35, 0x36, 0x36, 0x38, 0x39, 0x39, + 0x3b, 0x3c, 0x3e, 0x40, 0x41, 0x43, 0x45, 0x47, + 0x48, 0x48, 0x4b, 0x4c, 0x4d, 0x4f, 0x51, 0x53, + 0x56, 0x56, 0x59, 0x5b, 0x5d, 0x5f, 0x61, 0x62, + 0x63, 0x63, 0x61, 0x5e, 0x5c, 0x5a, 0x59, 0x57, + 0x56, 0x54, 0x54, 0x53, 0x52, 0x53, 0x53, 0x55, + 0x56, 0x56, 0x57, 0x57, 0x57, 0x57, 0x56, 0x56, + 0x55, 0x53, 0x51, 0x4f, 0x4d, 0x4b, 0x49, 0x4b, + 0x4b, 0x4c, 0x4d, 0x4e, 0x51, 0x53, 0x55, 0x58, + 0x5b, 0x5c, 0x60, 0x60, 0x5f, 0x5e, 0x5d, 0x5c, + 0x5a, 0x57, 0x53, 0x4f, 0x4b, 0x46, 0x45, 0x44, + 0x44, 0x44, 0x44, }, + { 0x1d, 0x25, 0x22, 0x22, 0x23, 0x23, 0x24, 0x25, + 0x25, 0x28, 0x2b, 0x2e, 0x31, 0x34, 0x37, 0x3a, + 0x3e, 0x42, 0x45, 0x48, 0x4b, 0x4d, 0x4f, 0x4f, + 0x50, 0x51, 0x52, 0x50, 0x4f, 0x4b, 0x4a, 0x47, + 0x45, 0x43, 0x3f, 0x3c, 0x38, 0x35, 0x33, 0x30, + 0x2f, 0x2d, 0x2a, 0x28, 0x27, 0x26, 0x25, 0x24, + 0x23, 0x24, 0x24, 0x25, 0x27, 0x27, 0x29, 0x2a, + 0x2b, 0x2f, 0x32, 0x34, 0x37, 0x39, 0x3c, 0x3d, + 0x3e, 0x3f, 0x40, 0x41, 0x40, 0x3e, 0x3d, 0x3b, + 0x39, 0x36, 0x34, 0x32, 0x30, 0x2d, 0x2a, 0x26, + 0x26, 0x24, 0x22, 0x1f, 0x1d, 0x1c, 0x1a, 0x19, + 0x18, 0x16, 0x15, 0x14, 0x12, 0x12, 0x12, 0x10, + 0x10, 0x0f, 0x0e, 0x10, 0x0e, 0x0e, 0x0d, 0x0c, + 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0e, 0x0d, 0x0e, + 0x0f, 0x0f, 0x0f, 0x10, 0x11, 0x11, 0x11, 0x12, + 0x13, 0x14, 0x16, 0x16, 0x18, 0x1a, 0x1b, 0x1c, + 0x1e, 0x21, 0x23, 0x25, 0x28, 0x2a, 0x2e, 0x32, + 0x34, 0x38, 0x3a, 0x3c, 0x3d, 0x3f, 0x40, 0x42, + 0x43, 0x43, 0x43, 0x42, 0x40, 0x3e, 0x3e, 0x3c, + 0x3b, 0x3b, 0x3c, 0x3a, 0x3b, 0x3b, 0x3e, 0x3e, + 0x40, 0x3f, 0x41, 0x41, 0x41, 0x42, 0x42, 0x43, + 0x42, 0x41, 0x41, 0x41, 0x40, 0x3e, 0x3d, 0x3c, + 0x3b, 0x3a, 0x39, 0x37, 0x36, 0x35, 0x36, 0x37, + 0x35, 0x36, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, + 0x3b, 0x3d, 0x3e, 0x40, 0x41, 0x41, 0x44, 0x46, + 0x48, 0x48, 0x4a, 0x4c, 0x4d, 0x4f, 0x51, 0x53, + 0x55, 0x57, 0x59, 0x5a, 0x5b, 0x5e, 0x5f, 0x61, + 0x62, 0x61, 0x60, 0x5e, 0x5c, 0x5a, 0x59, 0x58, + 0x56, 0x55, 0x54, 0x53, 0x53, 0x54, 0x54, 0x55, + 0x57, 0x57, 0x58, 0x59, 0x5a, 0x58, 0x59, 0x58, + 0x57, 0x55, 0x53, 0x52, 0x4f, 0x4e, 0x4d, 0x4d, + 0x4d, 0x4f, 0x51, 0x50, 0x54, 0x56, 0x59, 0x5c, + 0x5f, 0x61, 0x64, 0x64, 0x63, 0x61, 0x5e, 0x5e, + 0x5c, 0x59, 0x54, 0x50, 0x4c, 0x46, 0x45, 0x44, + 0x44, 0x44, 0x44, }, + { 0x1c, 0x24, 0x21, 0x21, 0x21, 0x22, 0x23, 0x23, + 0x25, 0x27, 0x2a, 0x2e, 0x31, 0x33, 0x37, 0x3b, + 0x3e, 0x42, 0x45, 0x48, 0x4b, 0x4c, 0x50, 0x4f, + 0x50, 0x51, 0x52, 0x50, 0x4e, 0x4b, 0x4a, 0x49, + 0x45, 0x42, 0x3f, 0x3c, 0x38, 0x35, 0x33, 0x30, + 0x2f, 0x2d, 0x2a, 0x28, 0x27, 0x26, 0x25, 0x24, + 0x23, 0x24, 0x24, 0x25, 0x27, 0x27, 0x29, 0x2a, + 0x2b, 0x2f, 0x32, 0x34, 0x38, 0x39, 0x3c, 0x3d, + 0x3e, 0x3e, 0x40, 0x41, 0x40, 0x3e, 0x3c, 0x3a, + 0x39, 0x37, 0x35, 0x33, 0x30, 0x2d, 0x2b, 0x28, + 0x26, 0x23, 0x23, 0x20, 0x1e, 0x1b, 0x19, 0x19, + 0x17, 0x16, 0x15, 0x14, 0x12, 0x12, 0x11, 0x10, + 0x0f, 0x0e, 0x0e, 0x10, 0x0e, 0x0d, 0x0c, 0x0c, + 0x0c, 0x0d, 0x0d, 0x0d, 0x0d, 0x0e, 0x0d, 0x0e, + 0x0f, 0x0f, 0x0f, 0x10, 0x11, 0x11, 0x12, 0x14, + 0x14, 0x14, 0x16, 0x18, 0x19, 0x1b, 0x1c, 0x1e, + 0x20, 0x23, 0x26, 0x27, 0x29, 0x2c, 0x2f, 0x33, + 0x36, 0x38, 0x3b, 0x3e, 0x3e, 0x42, 0x43, 0x46, + 0x46, 0x46, 0x46, 0x44, 0x42, 0x41, 0x3f, 0x3e, + 0x3d, 0x3d, 0x3e, 0x3d, 0x3d, 0x3e, 0x3e, 0x40, + 0x40, 0x40, 0x43, 0x43, 0x42, 0x43, 0x45, 0x43, + 0x43, 0x43, 0x42, 0x42, 0x41, 0x40, 0x40, 0x3e, + 0x3c, 0x3a, 0x3a, 0x38, 0x36, 0x36, 0x36, 0x36, + 0x37, 0x37, 0x36, 0x38, 0x38, 0x39, 0x3b, 0x3b, + 0x3e, 0x3e, 0x3e, 0x40, 0x41, 0x43, 0x45, 0x46, + 0x46, 0x49, 0x4c, 0x4c, 0x4d, 0x4f, 0x51, 0x54, + 0x56, 0x57, 0x58, 0x5a, 0x5c, 0x5e, 0x60, 0x60, + 0x61, 0x61, 0x60, 0x5f, 0x5c, 0x5a, 0x59, 0x58, + 0x57, 0x57, 0x55, 0x54, 0x53, 0x55, 0x55, 0x58, + 0x58, 0x59, 0x5a, 0x5a, 0x5a, 0x5b, 0x5b, 0x5b, + 0x5a, 0x59, 0x56, 0x54, 0x53, 0x4e, 0x4e, 0x50, + 0x50, 0x51, 0x52, 0x52, 0x57, 0x59, 0x5d, 0x60, + 0x63, 0x63, 0x66, 0x66, 0x66, 0x64, 0x63, 0x61, + 0x60, 0x5b, 0x55, 0x51, 0x4d, 0x48, 0x45, 0x44, + 0x43, 0x43, 0x43, }, + { 0x1b, 0x23, 0x20, 0x21, 0x22, 0x22, 0x23, 0x24, + 0x26, 0x27, 0x2a, 0x2e, 0x31, 0x33, 0x37, 0x3b, + 0x3d, 0x42, 0x46, 0x49, 0x4a, 0x4c, 0x4f, 0x4f, + 0x50, 0x50, 0x52, 0x50, 0x4e, 0x4b, 0x4b, 0x49, + 0x45, 0x42, 0x3e, 0x3c, 0x38, 0x35, 0x33, 0x30, + 0x2f, 0x2d, 0x2a, 0x28, 0x27, 0x26, 0x25, 0x24, + 0x23, 0x24, 0x24, 0x25, 0x27, 0x27, 0x29, 0x2a, + 0x2c, 0x2f, 0x32, 0x35, 0x38, 0x3a, 0x3c, 0x3d, + 0x3e, 0x3e, 0x40, 0x41, 0x40, 0x3f, 0x3d, 0x3b, + 0x3a, 0x38, 0x36, 0x33, 0x30, 0x2d, 0x2b, 0x29, + 0x27, 0x24, 0x24, 0x21, 0x1e, 0x1c, 0x1b, 0x1a, + 0x18, 0x17, 0x16, 0x15, 0x13, 0x12, 0x10, 0x0f, + 0x10, 0x0f, 0x0e, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, + 0x0d, 0x0d, 0x0e, 0x0e, 0x0e, 0x0f, 0x0e, 0x0f, + 0x10, 0x11, 0x11, 0x12, 0x13, 0x13, 0x14, 0x15, + 0x15, 0x16, 0x17, 0x1a, 0x1b, 0x1d, 0x1e, 0x20, + 0x21, 0x25, 0x27, 0x29, 0x2b, 0x2d, 0x31, 0x35, + 0x37, 0x39, 0x3c, 0x3f, 0x40, 0x43, 0x46, 0x47, + 0x4a, 0x49, 0x48, 0x46, 0x45, 0x43, 0x42, 0x41, + 0x3f, 0x40, 0x3f, 0x3f, 0x40, 0x3f, 0x41, 0x43, + 0x43, 0x43, 0x44, 0x45, 0x45, 0x45, 0x45, 0x45, + 0x45, 0x45, 0x44, 0x43, 0x43, 0x42, 0x42, 0x40, + 0x3e, 0x3d, 0x3c, 0x39, 0x38, 0x38, 0x38, 0x38, + 0x38, 0x36, 0x38, 0x39, 0x39, 0x3a, 0x3c, 0x3d, + 0x3e, 0x3e, 0x3f, 0x41, 0x42, 0x42, 0x43, 0x45, + 0x46, 0x49, 0x4b, 0x4d, 0x4f, 0x50, 0x53, 0x54, + 0x57, 0x58, 0x5a, 0x5c, 0x5b, 0x5e, 0x60, 0x61, + 0x60, 0x60, 0x5f, 0x5f, 0x5d, 0x5b, 0x5b, 0x59, + 0x58, 0x57, 0x56, 0x55, 0x55, 0x55, 0x57, 0x59, + 0x5b, 0x5b, 0x5d, 0x5c, 0x5c, 0x5e, 0x5e, 0x5e, + 0x5d, 0x5b, 0x59, 0x56, 0x54, 0x51, 0x51, 0x51, + 0x52, 0x55, 0x56, 0x56, 0x5a, 0x5d, 0x5f, 0x63, + 0x66, 0x68, 0x6b, 0x6b, 0x68, 0x67, 0x66, 0x64, + 0x61, 0x5d, 0x57, 0x52, 0x4f, 0x49, 0x46, 0x45, + 0x43, 0x43, 0x43, }, + { 0x1a, 0x22, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, + 0x26, 0x27, 0x2a, 0x2d, 0x31, 0x33, 0x37, 0x3b, + 0x3d, 0x41, 0x46, 0x49, 0x4a, 0x4d, 0x4f, 0x4f, + 0x50, 0x51, 0x52, 0x50, 0x4e, 0x4b, 0x4b, 0x48, + 0x44, 0x42, 0x3e, 0x3c, 0x39, 0x35, 0x33, 0x30, + 0x2f, 0x2d, 0x2a, 0x28, 0x27, 0x26, 0x25, 0x24, + 0x23, 0x24, 0x24, 0x25, 0x27, 0x27, 0x29, 0x2a, + 0x2d, 0x2f, 0x32, 0x35, 0x39, 0x3a, 0x3c, 0x3d, + 0x3e, 0x3f, 0x40, 0x41, 0x40, 0x3f, 0x3e, 0x3c, + 0x3a, 0x38, 0x36, 0x33, 0x31, 0x2d, 0x2c, 0x29, + 0x27, 0x26, 0x24, 0x21, 0x1f, 0x1d, 0x1c, 0x1a, + 0x19, 0x18, 0x16, 0x15, 0x14, 0x13, 0x12, 0x10, + 0x11, 0x10, 0x0f, 0x0f, 0x0f, 0x0e, 0x0e, 0x0e, + 0x0f, 0x0f, 0x0e, 0x0e, 0x0e, 0x0f, 0x0f, 0x10, + 0x11, 0x12, 0x12, 0x13, 0x15, 0x15, 0x16, 0x16, + 0x17, 0x18, 0x1a, 0x1b, 0x1c, 0x1e, 0x1f, 0x21, + 0x22, 0x25, 0x27, 0x2a, 0x2c, 0x2e, 0x33, 0x36, + 0x39, 0x3a, 0x3d, 0x40, 0x41, 0x45, 0x47, 0x4a, + 0x4c, 0x4d, 0x4c, 0x4a, 0x48, 0x45, 0x44, 0x41, + 0x42, 0x42, 0x42, 0x42, 0x42, 0x43, 0x43, 0x44, + 0x45, 0x47, 0x47, 0x48, 0x47, 0x48, 0x47, 0x47, + 0x48, 0x48, 0x46, 0x46, 0x46, 0x43, 0x43, 0x41, + 0x3f, 0x3e, 0x3b, 0x39, 0x38, 0x37, 0x37, 0x37, + 0x38, 0x38, 0x37, 0x39, 0x39, 0x3a, 0x3c, 0x3e, + 0x3e, 0x3f, 0x3f, 0x3f, 0x42, 0x43, 0x43, 0x45, + 0x47, 0x48, 0x4b, 0x4c, 0x4e, 0x50, 0x51, 0x54, + 0x56, 0x58, 0x5a, 0x5c, 0x5c, 0x5f, 0x5f, 0x5f, + 0x61, 0x60, 0x5f, 0x5f, 0x5e, 0x5b, 0x5c, 0x5b, + 0x59, 0x59, 0x57, 0x56, 0x55, 0x56, 0x57, 0x59, + 0x5a, 0x5b, 0x5c, 0x5c, 0x5d, 0x5e, 0x5e, 0x5d, + 0x5e, 0x5c, 0x5a, 0x57, 0x55, 0x52, 0x51, 0x52, + 0x53, 0x55, 0x57, 0x58, 0x5c, 0x5e, 0x61, 0x65, + 0x69, 0x6b, 0x6c, 0x6b, 0x6a, 0x69, 0x67, 0x64, + 0x61, 0x5d, 0x59, 0x53, 0x4d, 0x48, 0x46, 0x45, + 0x44, 0x44, 0x43, }, + { 0x1a, 0x21, 0x1e, 0x1f, 0x20, 0x21, 0x23, 0x24, + 0x25, 0x28, 0x2a, 0x2e, 0x31, 0x33, 0x37, 0x3b, + 0x3e, 0x41, 0x46, 0x49, 0x4b, 0x4d, 0x4f, 0x4e, + 0x50, 0x51, 0x51, 0x50, 0x4e, 0x4b, 0x4a, 0x48, + 0x44, 0x42, 0x3e, 0x3c, 0x39, 0x35, 0x32, 0x30, + 0x2f, 0x2d, 0x29, 0x27, 0x27, 0x26, 0x25, 0x24, + 0x23, 0x24, 0x24, 0x25, 0x26, 0x27, 0x29, 0x2a, + 0x2c, 0x2f, 0x32, 0x35, 0x38, 0x3b, 0x3c, 0x3e, + 0x3f, 0x3f, 0x40, 0x41, 0x40, 0x3f, 0x3e, 0x3c, + 0x3a, 0x39, 0x36, 0x34, 0x31, 0x2d, 0x2c, 0x29, + 0x27, 0x26, 0x24, 0x21, 0x1f, 0x1d, 0x1c, 0x1a, + 0x19, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x10, + 0x11, 0x10, 0x0f, 0x0f, 0x0f, 0x0e, 0x0e, 0x0e, + 0x0e, 0x0e, 0x0e, 0x0e, 0x0e, 0x0f, 0x0f, 0x10, + 0x11, 0x13, 0x14, 0x14, 0x15, 0x16, 0x17, 0x19, + 0x19, 0x1a, 0x1c, 0x1d, 0x1e, 0x20, 0x22, 0x24, + 0x25, 0x27, 0x29, 0x2c, 0x2e, 0x31, 0x35, 0x38, + 0x3a, 0x3d, 0x41, 0x42, 0x45, 0x48, 0x4c, 0x4e, + 0x4f, 0x4f, 0x4f, 0x4d, 0x4b, 0x49, 0x47, 0x47, + 0x46, 0x45, 0x45, 0x45, 0x44, 0x44, 0x46, 0x47, + 0x48, 0x49, 0x4b, 0x4b, 0x4a, 0x4b, 0x4b, 0x4a, + 0x4b, 0x4a, 0x49, 0x49, 0x48, 0x46, 0x46, 0x44, + 0x42, 0x41, 0x3d, 0x3b, 0x3a, 0x38, 0x38, 0x38, + 0x37, 0x37, 0x39, 0x38, 0x3a, 0x3a, 0x3c, 0x3c, + 0x3e, 0x40, 0x40, 0x41, 0x43, 0x43, 0x45, 0x46, + 0x48, 0x49, 0x4b, 0x4e, 0x4f, 0x50, 0x53, 0x55, + 0x57, 0x59, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, + 0x60, 0x60, 0x5f, 0x5f, 0x5e, 0x5c, 0x5b, 0x5a, + 0x59, 0x58, 0x57, 0x57, 0x56, 0x56, 0x57, 0x58, + 0x59, 0x5a, 0x5b, 0x5c, 0x5c, 0x5d, 0x5e, 0x5d, + 0x5c, 0x5b, 0x58, 0x57, 0x54, 0x52, 0x52, 0x53, + 0x54, 0x57, 0x58, 0x58, 0x5b, 0x5e, 0x62, 0x65, + 0x69, 0x6b, 0x6d, 0x6c, 0x6a, 0x69, 0x67, 0x64, + 0x62, 0x5e, 0x59, 0x54, 0x4d, 0x48, 0x47, 0x46, + 0x45, 0x45, 0x44, }, + { 0x1a, 0x21, 0x1e, 0x1f, 0x20, 0x21, 0x23, 0x24, + 0x25, 0x28, 0x2a, 0x2e, 0x31, 0x34, 0x37, 0x3b, + 0x3e, 0x42, 0x47, 0x49, 0x4b, 0x4d, 0x4f, 0x4f, + 0x50, 0x51, 0x51, 0x50, 0x50, 0x4c, 0x4a, 0x47, + 0x44, 0x42, 0x3e, 0x3c, 0x39, 0x35, 0x32, 0x31, + 0x2f, 0x2d, 0x29, 0x27, 0x26, 0x26, 0x25, 0x24, + 0x23, 0x24, 0x25, 0x25, 0x26, 0x27, 0x29, 0x2b, + 0x2c, 0x2f, 0x33, 0x35, 0x38, 0x3a, 0x3c, 0x3e, + 0x40, 0x40, 0x41, 0x42, 0x41, 0x3f, 0x3f, 0x3d, + 0x3b, 0x39, 0x36, 0x33, 0x32, 0x2e, 0x2d, 0x2a, + 0x27, 0x26, 0x25, 0x22, 0x1f, 0x1d, 0x1c, 0x1b, + 0x19, 0x17, 0x17, 0x16, 0x15, 0x14, 0x12, 0x11, + 0x11, 0x11, 0x10, 0x10, 0x0f, 0x0f, 0x0f, 0x0f, + 0x0f, 0x0f, 0x10, 0x11, 0x10, 0x11, 0x11, 0x12, + 0x11, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1b, + 0x1c, 0x1c, 0x1e, 0x20, 0x21, 0x22, 0x23, 0x25, + 0x27, 0x2a, 0x2c, 0x2f, 0x31, 0x35, 0x38, 0x3b, + 0x3d, 0x40, 0x44, 0x47, 0x49, 0x4c, 0x4f, 0x51, + 0x53, 0x53, 0x53, 0x51, 0x50, 0x4e, 0x4c, 0x4b, + 0x4a, 0x49, 0x49, 0x49, 0x49, 0x4a, 0x4a, 0x4d, + 0x4e, 0x4e, 0x4f, 0x50, 0x4f, 0x50, 0x51, 0x50, + 0x50, 0x4e, 0x4d, 0x4c, 0x4b, 0x48, 0x48, 0x47, + 0x44, 0x42, 0x3f, 0x3d, 0x3b, 0x3a, 0x39, 0x39, + 0x39, 0x38, 0x39, 0x3b, 0x3a, 0x3c, 0x3e, 0x3d, + 0x40, 0x40, 0x40, 0x42, 0x42, 0x42, 0x45, 0x46, + 0x47, 0x49, 0x4c, 0x4e, 0x50, 0x50, 0x53, 0x56, + 0x58, 0x59, 0x5d, 0x5d, 0x5e, 0x60, 0x61, 0x61, + 0x62, 0x61, 0x60, 0x60, 0x5e, 0x5d, 0x5d, 0x5b, + 0x57, 0x58, 0x56, 0x55, 0x55, 0x56, 0x56, 0x59, + 0x59, 0x58, 0x5a, 0x5a, 0x5a, 0x5c, 0x5c, 0x5c, + 0x5b, 0x5b, 0x58, 0x57, 0x54, 0x53, 0x52, 0x53, + 0x54, 0x57, 0x58, 0x59, 0x5c, 0x5f, 0x63, 0x67, + 0x6b, 0x6d, 0x6e, 0x6e, 0x6b, 0x6a, 0x68, 0x64, + 0x62, 0x5e, 0x58, 0x53, 0x4f, 0x49, 0x47, 0x46, + 0x45, 0x45, 0x44, }, + { 0x19, 0x20, 0x1e, 0x1e, 0x1f, 0x20, 0x22, 0x23, + 0x25, 0x27, 0x2a, 0x2e, 0x31, 0x34, 0x37, 0x3a, + 0x3e, 0x41, 0x46, 0x49, 0x4a, 0x4d, 0x4f, 0x4e, + 0x50, 0x51, 0x51, 0x4f, 0x4f, 0x4d, 0x49, 0x47, + 0x44, 0x42, 0x3e, 0x3c, 0x39, 0x36, 0x32, 0x31, + 0x2f, 0x2d, 0x29, 0x27, 0x26, 0x26, 0x25, 0x24, + 0x23, 0x24, 0x25, 0x25, 0x26, 0x28, 0x29, 0x2b, + 0x2c, 0x2f, 0x33, 0x35, 0x38, 0x3a, 0x3c, 0x3e, + 0x3f, 0x3f, 0x41, 0x42, 0x41, 0x3f, 0x3f, 0x3d, + 0x3c, 0x39, 0x36, 0x33, 0x32, 0x2e, 0x2d, 0x2a, + 0x27, 0x26, 0x25, 0x22, 0x1f, 0x1e, 0x1d, 0x1b, + 0x1a, 0x17, 0x17, 0x17, 0x14, 0x14, 0x12, 0x11, + 0x11, 0x12, 0x11, 0x11, 0x10, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x11, 0x11, 0x11, 0x12, 0x13, 0x14, + 0x14, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1c, 0x1e, + 0x1e, 0x1f, 0x22, 0x23, 0x23, 0x24, 0x25, 0x27, + 0x2a, 0x2d, 0x2f, 0x31, 0x35, 0x38, 0x3a, 0x3e, + 0x41, 0x44, 0x48, 0x4b, 0x4d, 0x51, 0x53, 0x55, + 0x57, 0x57, 0x56, 0x55, 0x54, 0x52, 0x52, 0x50, + 0x4e, 0x50, 0x4e, 0x4d, 0x4d, 0x4d, 0x4f, 0x51, + 0x51, 0x52, 0x54, 0x55, 0x55, 0x55, 0x57, 0x55, + 0x54, 0x53, 0x52, 0x4e, 0x4d, 0x4b, 0x4a, 0x49, + 0x46, 0x44, 0x41, 0x3f, 0x3d, 0x3b, 0x3a, 0x3a, + 0x39, 0x39, 0x39, 0x39, 0x3a, 0x3b, 0x3d, 0x3e, + 0x3f, 0x40, 0x41, 0x42, 0x44, 0x44, 0x45, 0x47, + 0x49, 0x49, 0x4a, 0x4d, 0x50, 0x51, 0x53, 0x57, + 0x5a, 0x5b, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x62, + 0x63, 0x62, 0x60, 0x60, 0x5e, 0x5c, 0x5c, 0x59, + 0x58, 0x56, 0x55, 0x55, 0x55, 0x55, 0x55, 0x54, + 0x56, 0x56, 0x57, 0x58, 0x58, 0x59, 0x5a, 0x59, + 0x58, 0x57, 0x56, 0x55, 0x54, 0x52, 0x53, 0x53, + 0x53, 0x56, 0x57, 0x59, 0x5b, 0x5e, 0x62, 0x66, + 0x6a, 0x6c, 0x6d, 0x6e, 0x6b, 0x69, 0x67, 0x64, + 0x61, 0x5d, 0x58, 0x54, 0x50, 0x4a, 0x47, 0x46, + 0x45, 0x45, 0x44, }, + { 0x1a, 0x21, 0x1e, 0x1f, 0x1f, 0x20, 0x22, 0x23, + 0x25, 0x27, 0x2b, 0x2e, 0x31, 0x34, 0x37, 0x3b, + 0x3d, 0x42, 0x45, 0x49, 0x4a, 0x4d, 0x4e, 0x4e, + 0x51, 0x52, 0x50, 0x4f, 0x4f, 0x4c, 0x49, 0x48, + 0x45, 0x42, 0x3e, 0x3b, 0x39, 0x36, 0x32, 0x32, + 0x2f, 0x2c, 0x2a, 0x28, 0x26, 0x26, 0x25, 0x24, + 0x23, 0x24, 0x24, 0x25, 0x25, 0x28, 0x29, 0x2b, + 0x2d, 0x2f, 0x33, 0x35, 0x38, 0x3a, 0x3c, 0x3e, + 0x3f, 0x3f, 0x41, 0x42, 0x41, 0x3f, 0x3e, 0x3c, + 0x3c, 0x3a, 0x37, 0x33, 0x32, 0x2f, 0x2d, 0x2b, + 0x28, 0x26, 0x25, 0x22, 0x20, 0x1e, 0x1d, 0x1b, + 0x1a, 0x17, 0x17, 0x16, 0x14, 0x14, 0x12, 0x11, + 0x12, 0x11, 0x11, 0x11, 0x11, 0x10, 0x10, 0x10, + 0x10, 0x11, 0x12, 0x12, 0x12, 0x13, 0x14, 0x14, + 0x16, 0x18, 0x19, 0x1a, 0x1b, 0x1d, 0x1e, 0x1f, + 0x21, 0x22, 0x23, 0x25, 0x26, 0x26, 0x28, 0x2a, + 0x2c, 0x2e, 0x32, 0x34, 0x39, 0x39, 0x3d, 0x41, + 0x45, 0x47, 0x4c, 0x4e, 0x51, 0x54, 0x56, 0x58, + 0x5b, 0x5c, 0x5a, 0x59, 0x58, 0x56, 0x55, 0x53, + 0x53, 0x52, 0x52, 0x51, 0x52, 0x52, 0x53, 0x55, + 0x57, 0x58, 0x5a, 0x5a, 0x59, 0x5b, 0x59, 0x59, + 0x58, 0x57, 0x55, 0x53, 0x51, 0x4e, 0x4c, 0x4a, + 0x48, 0x46, 0x43, 0x40, 0x3e, 0x3c, 0x3b, 0x3b, + 0x38, 0x39, 0x38, 0x39, 0x3a, 0x3d, 0x3d, 0x3e, + 0x3f, 0x40, 0x41, 0x43, 0x44, 0x45, 0x46, 0x48, + 0x4a, 0x4b, 0x4d, 0x4e, 0x50, 0x52, 0x54, 0x56, + 0x59, 0x5c, 0x5e, 0x5f, 0x60, 0x62, 0x62, 0x63, + 0x63, 0x63, 0x61, 0x5f, 0x5e, 0x5d, 0x5c, 0x5b, + 0x59, 0x56, 0x56, 0x55, 0x54, 0x53, 0x53, 0x54, + 0x55, 0x54, 0x55, 0x55, 0x55, 0x57, 0x58, 0x57, + 0x57, 0x56, 0x55, 0x54, 0x54, 0x52, 0x52, 0x53, + 0x54, 0x55, 0x57, 0x58, 0x5b, 0x5e, 0x62, 0x65, + 0x69, 0x6b, 0x6d, 0x6e, 0x6a, 0x69, 0x67, 0x63, + 0x61, 0x5d, 0x58, 0x54, 0x4f, 0x4b, 0x48, 0x47, + 0x46, 0x45, 0x45, }, + { 0x1a, 0x21, 0x1e, 0x1f, 0x1f, 0x20, 0x22, 0x23, + 0x25, 0x27, 0x2b, 0x2d, 0x31, 0x34, 0x37, 0x3b, + 0x3d, 0x42, 0x45, 0x48, 0x4c, 0x4e, 0x4e, 0x4f, + 0x51, 0x52, 0x50, 0x50, 0x4f, 0x4c, 0x4a, 0x48, + 0x45, 0x42, 0x3f, 0x3b, 0x39, 0x36, 0x32, 0x31, + 0x2f, 0x2c, 0x2a, 0x28, 0x26, 0x26, 0x25, 0x24, + 0x23, 0x24, 0x24, 0x25, 0x27, 0x28, 0x29, 0x2b, + 0x2d, 0x30, 0x33, 0x36, 0x39, 0x3b, 0x3d, 0x3f, + 0x3f, 0x40, 0x42, 0x43, 0x42, 0x40, 0x3e, 0x3c, + 0x3c, 0x3a, 0x37, 0x34, 0x32, 0x2f, 0x2d, 0x2c, + 0x2a, 0x27, 0x26, 0x23, 0x20, 0x1e, 0x1d, 0x1c, + 0x1a, 0x18, 0x18, 0x17, 0x15, 0x16, 0x14, 0x12, + 0x12, 0x12, 0x12, 0x12, 0x12, 0x11, 0x11, 0x12, + 0x12, 0x12, 0x13, 0x14, 0x14, 0x14, 0x15, 0x16, + 0x17, 0x19, 0x1b, 0x1c, 0x1e, 0x20, 0x20, 0x22, + 0x24, 0x25, 0x26, 0x27, 0x28, 0x2a, 0x2c, 0x2c, + 0x2f, 0x32, 0x35, 0x37, 0x3b, 0x3c, 0x41, 0x45, + 0x48, 0x4c, 0x50, 0x52, 0x54, 0x57, 0x5a, 0x5c, + 0x5f, 0x5f, 0x5f, 0x5d, 0x5c, 0x5b, 0x5a, 0x58, + 0x57, 0x57, 0x57, 0x56, 0x56, 0x57, 0x57, 0x5a, + 0x5c, 0x5e, 0x5f, 0x61, 0x5f, 0x5f, 0x5f, 0x5e, + 0x5d, 0x5c, 0x5a, 0x57, 0x55, 0x52, 0x4f, 0x4e, + 0x4a, 0x47, 0x46, 0x42, 0x41, 0x3e, 0x3d, 0x3c, + 0x3b, 0x3a, 0x39, 0x39, 0x3b, 0x3c, 0x3d, 0x3f, + 0x40, 0x42, 0x42, 0x44, 0x45, 0x46, 0x49, 0x49, + 0x4b, 0x4c, 0x4e, 0x4f, 0x51, 0x54, 0x57, 0x58, + 0x5b, 0x5d, 0x61, 0x61, 0x61, 0x63, 0x65, 0x65, + 0x64, 0x64, 0x62, 0x61, 0x60, 0x5e, 0x5d, 0x5c, + 0x59, 0x58, 0x56, 0x54, 0x53, 0x53, 0x53, 0x54, + 0x54, 0x53, 0x53, 0x54, 0x54, 0x54, 0x55, 0x55, + 0x56, 0x55, 0x54, 0x53, 0x53, 0x52, 0x52, 0x53, + 0x55, 0x56, 0x57, 0x58, 0x5b, 0x5e, 0x62, 0x66, + 0x69, 0x6b, 0x6d, 0x6d, 0x6b, 0x69, 0x67, 0x64, + 0x61, 0x5d, 0x58, 0x55, 0x50, 0x4b, 0x48, 0x47, + 0x46, 0x46, 0x46, }, + { 0x1a, 0x20, 0x1e, 0x1f, 0x1f, 0x21, 0x22, 0x23, + 0x25, 0x27, 0x2b, 0x2d, 0x31, 0x34, 0x37, 0x3b, + 0x3d, 0x42, 0x45, 0x48, 0x4c, 0x4e, 0x4f, 0x4f, + 0x51, 0x52, 0x51, 0x50, 0x4e, 0x4b, 0x4a, 0x48, + 0x45, 0x42, 0x3f, 0x3b, 0x38, 0x36, 0x32, 0x31, + 0x2f, 0x2c, 0x2a, 0x28, 0x26, 0x26, 0x25, 0x24, + 0x23, 0x24, 0x24, 0x25, 0x27, 0x28, 0x29, 0x2b, + 0x2e, 0x30, 0x33, 0x36, 0x39, 0x3b, 0x3d, 0x3f, + 0x3f, 0x40, 0x41, 0x42, 0x41, 0x40, 0x3e, 0x3c, + 0x3c, 0x3a, 0x37, 0x34, 0x33, 0x30, 0x2e, 0x2b, + 0x29, 0x26, 0x24, 0x24, 0x20, 0x1f, 0x1d, 0x1d, + 0x1a, 0x19, 0x17, 0x16, 0x16, 0x16, 0x16, 0x14, + 0x13, 0x12, 0x13, 0x13, 0x13, 0x12, 0x12, 0x13, + 0x13, 0x14, 0x15, 0x15, 0x14, 0x15, 0x16, 0x18, + 0x19, 0x1b, 0x1c, 0x1e, 0x20, 0x21, 0x22, 0x24, + 0x27, 0x28, 0x29, 0x2a, 0x2c, 0x2c, 0x2d, 0x2f, + 0x32, 0x35, 0x37, 0x3a, 0x3c, 0x3e, 0x44, 0x48, + 0x4c, 0x50, 0x54, 0x56, 0x58, 0x5b, 0x5e, 0x60, + 0x61, 0x63, 0x62, 0x61, 0x60, 0x5f, 0x5e, 0x5e, + 0x5c, 0x5c, 0x5b, 0x5a, 0x5a, 0x5b, 0x5c, 0x5e, + 0x60, 0x63, 0x64, 0x65, 0x63, 0x62, 0x63, 0x63, + 0x61, 0x60, 0x5e, 0x5b, 0x58, 0x55, 0x51, 0x4f, + 0x4c, 0x4a, 0x47, 0x44, 0x42, 0x41, 0x3e, 0x3c, + 0x3b, 0x3a, 0x3a, 0x3b, 0x3b, 0x3c, 0x3e, 0x3f, + 0x40, 0x42, 0x43, 0x45, 0x46, 0x47, 0x49, 0x4a, + 0x4c, 0x4c, 0x4f, 0x51, 0x52, 0x55, 0x58, 0x5b, + 0x5c, 0x5f, 0x61, 0x62, 0x63, 0x64, 0x64, 0x65, + 0x66, 0x65, 0x63, 0x62, 0x5f, 0x5e, 0x5e, 0x5c, + 0x5b, 0x58, 0x56, 0x55, 0x54, 0x53, 0x52, 0x53, + 0x52, 0x52, 0x52, 0x52, 0x52, 0x53, 0x55, 0x55, + 0x55, 0x53, 0x53, 0x53, 0x52, 0x51, 0x52, 0x52, + 0x55, 0x55, 0x58, 0x58, 0x5b, 0x5d, 0x61, 0x65, + 0x68, 0x6a, 0x6c, 0x6b, 0x69, 0x68, 0x67, 0x64, + 0x61, 0x5e, 0x58, 0x54, 0x4f, 0x4b, 0x49, 0x48, + 0x47, 0x46, 0x45, }, + { 0x19, 0x20, 0x1d, 0x1f, 0x1f, 0x20, 0x23, 0x23, + 0x25, 0x27, 0x2b, 0x2d, 0x31, 0x34, 0x37, 0x3b, + 0x3d, 0x42, 0x45, 0x48, 0x4c, 0x4e, 0x4f, 0x4f, + 0x51, 0x52, 0x51, 0x50, 0x4e, 0x4b, 0x4a, 0x48, + 0x44, 0x42, 0x3f, 0x3a, 0x38, 0x36, 0x32, 0x30, + 0x2f, 0x2c, 0x2a, 0x28, 0x26, 0x26, 0x25, 0x24, + 0x23, 0x24, 0x24, 0x25, 0x26, 0x28, 0x29, 0x2b, + 0x2e, 0x30, 0x34, 0x36, 0x39, 0x3b, 0x3d, 0x3f, + 0x3f, 0x40, 0x41, 0x42, 0x41, 0x40, 0x3e, 0x3c, + 0x3c, 0x3a, 0x37, 0x34, 0x33, 0x30, 0x2e, 0x2b, + 0x29, 0x27, 0x25, 0x24, 0x21, 0x1f, 0x1e, 0x1c, + 0x1b, 0x19, 0x17, 0x16, 0x16, 0x16, 0x16, 0x14, + 0x13, 0x12, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, + 0x13, 0x14, 0x15, 0x14, 0x14, 0x14, 0x17, 0x19, + 0x1a, 0x1c, 0x1e, 0x20, 0x21, 0x23, 0x24, 0x26, + 0x29, 0x29, 0x2b, 0x2c, 0x2d, 0x2e, 0x30, 0x31, + 0x34, 0x38, 0x3b, 0x3c, 0x3f, 0x42, 0x47, 0x4c, + 0x50, 0x54, 0x57, 0x5b, 0x5c, 0x5e, 0x62, 0x63, + 0x66, 0x66, 0x66, 0x65, 0x64, 0x63, 0x61, 0x62, + 0x60, 0x60, 0x5f, 0x5e, 0x5e, 0x5f, 0x60, 0x62, + 0x65, 0x67, 0x69, 0x6a, 0x69, 0x68, 0x69, 0x67, + 0x66, 0x64, 0x62, 0x5f, 0x5c, 0x58, 0x54, 0x51, + 0x4e, 0x4b, 0x49, 0x45, 0x43, 0x41, 0x40, 0x3e, + 0x3c, 0x3a, 0x3b, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, + 0x41, 0x42, 0x44, 0x46, 0x46, 0x48, 0x49, 0x4b, + 0x4d, 0x50, 0x51, 0x53, 0x55, 0x57, 0x58, 0x5c, + 0x5f, 0x60, 0x63, 0x64, 0x64, 0x65, 0x66, 0x66, + 0x66, 0x65, 0x65, 0x63, 0x61, 0x5f, 0x5e, 0x5c, + 0x5a, 0x58, 0x56, 0x55, 0x54, 0x53, 0x52, 0x52, + 0x53, 0x52, 0x52, 0x52, 0x52, 0x53, 0x53, 0x53, + 0x54, 0x53, 0x53, 0x52, 0x53, 0x51, 0x53, 0x53, + 0x55, 0x57, 0x58, 0x59, 0x5b, 0x5d, 0x62, 0x64, + 0x68, 0x6a, 0x6c, 0x6b, 0x69, 0x68, 0x67, 0x64, + 0x61, 0x5d, 0x57, 0x54, 0x50, 0x4a, 0x48, 0x47, + 0x46, 0x45, 0x45, }, diff --git a/tests/tcg/hexagon/hvx_histogram_row.S b/tests/tcg/hexagon/hvx_histogram_row.S new file mode 100644 index 000000000..5e42c3314 --- /dev/null +++ b/tests/tcg/hexagon/hvx_histogram_row.S @@ -0,0 +1,294 @@ +/* + * Copyright(c) 2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + + +/* + * void hvx_histogram_row(uint8_t *src, => r0 + * int stride, => r1 + * int width, => r2 + * int height, => r3 + * int *hist => r4) + */ + .text + .p2align 2 + .global hvx_histogram_row + .type hvx_histogram_row, @function +hvx_histogram_row: + { r2 = lsr(r2, #7) /* size / VLEN */ + r5 = and(r2, #127) /* size % VLEN */ + v1 = #0 + v0 = #0 + } + /* + * Step 1: Clean the whole vector register file + */ + { v3:2 = v1:0 + v5:4 = v1:0 + p0 = cmp.gt(r2, #0) /* P0 = (width / VLEN > 0) */ + p1 = cmp.eq(r5, #0) /* P1 = (width % VLEN == 0) */ + } + { q0 = vsetq(r5) + v7:6 = v1:0 + } + { v9:8 = v1:0 + v11:10 = v1:0 + } + { v13:12 = v1:0 + v15:14 = v1:0 + } + { v17:16 = v1:0 + v19:18 = v1:0 + } + { v21:20 = v1:0 + v23:22 = v1:0 + } + { v25:24 = v1:0 + v27:26 = v1:0 + } + { v29:28 = v1:0 + v31:30 = v1:0 + r10 = add(r0, r1) /* R10 = &src[2 * stride] */ + loop1(.outerloop, r3) + } + + /* + * Step 2: vhist + */ + .falign +.outerloop: + { if (!p0) jump .loopend + loop0(.innerloop, r2) + } + + .falign +.innerloop: + { v12.tmp = vmem(R0++#1) + vhist + }:endloop0 + + .falign +.loopend: + if (p1) jump .skip /* if (width % VLEN == 0) done with current row */ + { v13.tmp = vmem(r0 + #0) + vhist(q0) + } + + .falign +.skip: + { r0 = r10 /* R0 = &src[(i + 1) * stride] */ + r10 = add(r10, r1) /* R10 = &src[(i + 2) * stride] */ + }:endloop1 + + + /* + * Step 3: Sum up the data + */ + { v0.h = vshuff(v0.h) + r10 = ##0x00010001 + } + v1.h = vshuff(v1.h) + { V2.h = vshuff(v2.h) + v0.w = vdmpy(v0.h, r10.h):sat + } + { v3.h = vshuff(v3.h) + v1.w = vdmpy(v1.h, r10.h):sat + } + { v4.h = vshuff(V4.h) + v2.w = vdmpy(v2.h, r10.h):sat + } + { v5.h = vshuff(v5.h) + v3.w = vdmpy(v3.h, r10.h):sat + } + { v6.h = vshuff(v6.h) + v4.w = vdmpy(v4.h, r10.h):sat + } + { v7.h = vshuff(v7.h) + v5.w = vdmpy(v5.h, r10.h):sat + } + { v8.h = vshuff(V8.h) + v6.w = vdmpy(v6.h, r10.h):sat + } + { v9.h = vshuff(V9.h) + v7.w = vdmpy(v7.h, r10.h):sat + } + { v10.h = vshuff(v10.h) + v8.w = vdmpy(v8.h, r10.h):sat + } + { v11.h = vshuff(v11.h) + v9.w = vdmpy(v9.h, r10.h):sat + } + { v12.h = vshuff(v12.h) + v10.w = vdmpy(v10.h, r10.h):sat + } + { v13.h = vshuff(V13.h) + v11.w = vdmpy(v11.h, r10.h):sat + } + { v14.h = vshuff(v14.h) + v12.w = vdmpy(v12.h, r10.h):sat + } + { v15.h = vshuff(v15.h) + v13.w = vdmpy(v13.h, r10.h):sat + } + { v16.h = vshuff(v16.h) + v14.w = vdmpy(v14.h, r10.h):sat + } + { v17.h = vshuff(v17.h) + v15.w = vdmpy(v15.h, r10.h):sat + } + { v18.h = vshuff(v18.h) + v16.w = vdmpy(v16.h, r10.h):sat + } + { v19.h = vshuff(v19.h) + v17.w = vdmpy(v17.h, r10.h):sat + } + { v20.h = vshuff(v20.h) + v18.W = vdmpy(v18.h, r10.h):sat + } + { v21.h = vshuff(v21.h) + v19.w = vdmpy(v19.h, r10.h):sat + } + { v22.h = vshuff(v22.h) + v20.w = vdmpy(v20.h, r10.h):sat + } + { v23.h = vshuff(v23.h) + v21.w = vdmpy(v21.h, r10.h):sat + } + { v24.h = vshuff(v24.h) + v22.w = vdmpy(v22.h, r10.h):sat + } + { v25.h = vshuff(v25.h) + v23.w = vdmpy(v23.h, r10.h):sat + } + { v26.h = vshuff(v26.h) + v24.w = vdmpy(v24.h, r10.h):sat + } + { v27.h = vshuff(V27.h) + v25.w = vdmpy(v25.h, r10.h):sat + } + { v28.h = vshuff(v28.h) + v26.w = vdmpy(v26.h, r10.h):sat + } + { v29.h = vshuff(v29.h) + v27.w = vdmpy(v27.h, r10.h):sat + } + { v30.h = vshuff(v30.h) + v28.w = vdmpy(v28.h, r10.h):sat + } + { v31.h = vshuff(v31.h) + v29.w = vdmpy(v29.h, r10.h):sat + r28 = #32 + } + { vshuff(v1, v0, r28) + v30.w = vdmpy(v30.h, r10.h):sat + } + { vshuff(v3, v2, r28) + v31.w = vdmpy(v31.h, r10.h):sat + } + { vshuff(v5, v4, r28) + v0.w = vadd(v1.w, v0.w) + v2.w = vadd(v3.w, v2.w) + } + { vshuff(v7, v6, r28) + r7 = #64 + } + { vshuff(v9, v8, r28) + v4.w = vadd(v5.w, v4.w) + v6.w = vadd(v7.w, v6.w) + } + vshuff(v11, v10, r28) + { vshuff(v13, v12, r28) + v8.w = vadd(v9.w, v8.w) + v10.w = vadd(v11.w, v10.w) + } + vshuff(v15, v14, r28) + { vshuff(v17, v16, r28) + v12.w = vadd(v13.w, v12.w) + v14.w = vadd(v15.w, v14.w) + } + vshuff(v19, v18, r28) + { vshuff(v21, v20, r28) + v16.w = vadd(v17.w, v16.w) + v18.w = vadd(v19.w, v18.w) + } + vshuff(v23, v22, r28) + { vshuff(v25, v24, r28) + v20.w = vadd(v21.w, v20.w) + v22.w = vadd(v23.w, v22.w) + } + vshuff(v27, v26, r28) + { vshuff(v29, v28, r28) + v24.w = vadd(v25.w, v24.w) + v26.w = vadd(v27.w, v26.w) + } + vshuff(v31, v30, r28) + { v28.w = vadd(v29.w, v28.w) + vshuff(v2, v0, r7) + } + { v30.w = vadd(v31.w, v30.w) + vshuff(v6, v4, r7) + v0.w = vadd(v0.w, v2.w) + } + { vshuff(v10, v8, r7) + v1.tmp = vmem(r4 + #0) /* update hist[0-31] */ + v0.w = vadd(v0.w, v1.w) + vmem(r4++#1) = v0.new + } + { vshuff(v14, v12, r7) + v4.w = vadd(v4.w, v6.w) + v8.w = vadd(v8.w, v10.w) + } + { vshuff(v18, v16, r7) + v1.tmp = vmem(r4 + #0) /* update hist[32-63] */ + v4.w = vadd(v4.w, v1.w) + vmem(r4++#1) = v4.new + } + { vshuff(v22, v20, r7) + v12.w = vadd(v12.w, v14.w) + V16.w = vadd(v16.w, v18.w) + } + { vshuff(v26, v24, r7) + v1.tmp = vmem(r4 + #0) /* update hist[64-95] */ + v8.w = vadd(v8.w, v1.w) + vmem(r4++#1) = v8.new + } + { vshuff(v30, v28, r7) + v1.tmp = vmem(r4 + #0) /* update hist[96-127] */ + v12.w = vadd(v12.w, v1.w) + vmem(r4++#1) = v12.new + } + + { v20.w = vadd(v20.w, v22.w) + v1.tmp = vmem(r4 + #0) /* update hist[128-159] */ + v16.w = vadd(v16.w, v1.w) + vmem(r4++#1) = v16.new + } + { v24.w = vadd(v24.w, v26.w) + v1.tmp = vmem(r4 + #0) /* update hist[160-191] */ + v20.w = vadd(v20.w, v1.w) + vmem(r4++#1) = v20.new + } + { v28.w = vadd(v28.w, v30.w) + v1.tmp = vmem(r4 + #0) /* update hist[192-223] */ + v24.w = vadd(v24.w, v1.w) + vmem(r4++#1) = v24.new + } + { v1.tmp = vmem(r4 + #0) /* update hist[224-255] */ + v28.w = vadd(v28.w, v1.w) + vmem(r4++#1) = v28.new + } + jumpr r31 + .size hvx_histogram_row, .-hvx_histogram_row diff --git a/tests/tcg/hexagon/hvx_histogram_row.h b/tests/tcg/hexagon/hvx_histogram_row.h new file mode 100644 index 000000000..6a4531a92 --- /dev/null +++ b/tests/tcg/hexagon/hvx_histogram_row.h @@ -0,0 +1,24 @@ +/* + * Copyright(c) 2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef HVX_HISTOGRAM_ROW_H +#define HVX_HISTOGRAM_ROW_H + +void hvx_histogram_row(uint8_t *src, int stride, int width, int height, + int *hist); + +#endif diff --git a/tests/tcg/hexagon/hvx_misc.c b/tests/tcg/hexagon/hvx_misc.c new file mode 100644 index 000000000..312bb98b4 --- /dev/null +++ b/tests/tcg/hexagon/hvx_misc.c @@ -0,0 +1,469 @@ +/* + * Copyright(c) 2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdio.h> +#include <stdint.h> +#include <stdbool.h> +#include <string.h> + +int err; + +static void __check(int line, int i, int j, uint64_t result, uint64_t expect) +{ + if (result != expect) { + printf("ERROR at line %d: [%d][%d] 0x%016llx != 0x%016llx\n", + line, i, j, result, expect); + err++; + } +} + +#define check(RES, EXP) __check(__LINE__, RES, EXP) + +#define MAX_VEC_SIZE_BYTES 128 + +typedef union { + uint64_t ud[MAX_VEC_SIZE_BYTES / 8]; + int64_t d[MAX_VEC_SIZE_BYTES / 8]; + uint32_t uw[MAX_VEC_SIZE_BYTES / 4]; + int32_t w[MAX_VEC_SIZE_BYTES / 4]; + uint16_t uh[MAX_VEC_SIZE_BYTES / 2]; + int16_t h[MAX_VEC_SIZE_BYTES / 2]; + uint8_t ub[MAX_VEC_SIZE_BYTES / 1]; + int8_t b[MAX_VEC_SIZE_BYTES / 1]; +} MMVector; + +#define BUFSIZE 16 +#define OUTSIZE 16 +#define MASKMOD 3 + +MMVector buffer0[BUFSIZE] __attribute__((aligned(MAX_VEC_SIZE_BYTES))); +MMVector buffer1[BUFSIZE] __attribute__((aligned(MAX_VEC_SIZE_BYTES))); +MMVector mask[BUFSIZE] __attribute__((aligned(MAX_VEC_SIZE_BYTES))); +MMVector output[OUTSIZE] __attribute__((aligned(MAX_VEC_SIZE_BYTES))); +MMVector expect[OUTSIZE] __attribute__((aligned(MAX_VEC_SIZE_BYTES))); + +#define CHECK_OUTPUT_FUNC(FIELD, FIELDSZ) \ +static void check_output_##FIELD(int line, size_t num_vectors) \ +{ \ + for (int i = 0; i < num_vectors; i++) { \ + for (int j = 0; j < MAX_VEC_SIZE_BYTES / FIELDSZ; j++) { \ + __check(line, i, j, output[i].FIELD[j], expect[i].FIELD[j]); \ + } \ + } \ +} + +CHECK_OUTPUT_FUNC(d, 8) +CHECK_OUTPUT_FUNC(w, 4) +CHECK_OUTPUT_FUNC(h, 2) +CHECK_OUTPUT_FUNC(b, 1) + +static void init_buffers(void) +{ + int counter0 = 0; + int counter1 = 17; + for (int i = 0; i < BUFSIZE; i++) { + for (int j = 0; j < MAX_VEC_SIZE_BYTES; j++) { + buffer0[i].b[j] = counter0++; + buffer1[i].b[j] = counter1++; + } + for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) { + mask[i].w[j] = (i + j % MASKMOD == 0) ? 0 : 1; + } + } +} + +static void test_load_tmp(void) +{ + void *p0 = buffer0; + void *p1 = buffer1; + void *pout = output; + + for (int i = 0; i < BUFSIZE; i++) { + /* + * Load into v12 as .tmp, then use it in the next packet + * Should get the new value within the same packet and + * the old value in the next packet + */ + asm("v3 = vmem(%0 + #0)\n\t" + "r1 = #1\n\t" + "v12 = vsplat(r1)\n\t" + "{\n\t" + " v12.tmp = vmem(%1 + #0)\n\t" + " v4.w = vadd(v12.w, v3.w)\n\t" + "}\n\t" + "v4.w = vadd(v4.w, v12.w)\n\t" + "vmem(%2 + #0) = v4\n\t" + : : "r"(p0), "r"(p1), "r"(pout) + : "r1", "v12", "v3", "v4", "v6", "memory"); + p0 += sizeof(MMVector); + p1 += sizeof(MMVector); + pout += sizeof(MMVector); + + for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) { + expect[i].w[j] = buffer0[i].w[j] + buffer1[i].w[j] + 1; + } + } + + check_output_w(__LINE__, BUFSIZE); +} + +static void test_load_cur(void) +{ + void *p0 = buffer0; + void *pout = output; + + for (int i = 0; i < BUFSIZE; i++) { + asm("{\n\t" + " v2.cur = vmem(%0 + #0)\n\t" + " vmem(%1 + #0) = v2\n\t" + "}\n\t" + : : "r"(p0), "r"(pout) : "v2", "memory"); + p0 += sizeof(MMVector); + pout += sizeof(MMVector); + + for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) { + expect[i].uw[j] = buffer0[i].uw[j]; + } + } + + check_output_w(__LINE__, BUFSIZE); +} + +static void test_load_aligned(void) +{ + /* Aligned loads ignore the low bits of the address */ + void *p0 = buffer0; + void *pout = output; + const size_t offset = 13; + + p0 += offset; /* Create an unaligned address */ + asm("v2 = vmem(%0 + #0)\n\t" + "vmem(%1 + #0) = v2\n\t" + : : "r"(p0), "r"(pout) : "v2", "memory"); + + expect[0] = buffer0[0]; + + check_output_w(__LINE__, 1); +} + +static void test_load_unaligned(void) +{ + void *p0 = buffer0; + void *pout = output; + const size_t offset = 12; + + p0 += offset; /* Create an unaligned address */ + asm("v2 = vmemu(%0 + #0)\n\t" + "vmem(%1 + #0) = v2\n\t" + : : "r"(p0), "r"(pout) : "v2", "memory"); + + memcpy(expect, &buffer0[0].ub[offset], sizeof(MMVector)); + + check_output_w(__LINE__, 1); +} + +static void test_store_aligned(void) +{ + /* Aligned stores ignore the low bits of the address */ + void *p0 = buffer0; + void *pout = output; + const size_t offset = 13; + + pout += offset; /* Create an unaligned address */ + asm("v2 = vmem(%0 + #0)\n\t" + "vmem(%1 + #0) = v2\n\t" + : : "r"(p0), "r"(pout) : "v2", "memory"); + + expect[0] = buffer0[0]; + + check_output_w(__LINE__, 1); +} + +static void test_store_unaligned(void) +{ + void *p0 = buffer0; + void *pout = output; + const size_t offset = 12; + + pout += offset; /* Create an unaligned address */ + asm("v2 = vmem(%0 + #0)\n\t" + "vmemu(%1 + #0) = v2\n\t" + : : "r"(p0), "r"(pout) : "v2", "memory"); + + memcpy(expect, buffer0, 2 * sizeof(MMVector)); + memcpy(&expect[0].ub[offset], buffer0, sizeof(MMVector)); + + check_output_w(__LINE__, 2); +} + +static void test_masked_store(bool invert) +{ + void *p0 = buffer0; + void *pmask = mask; + void *pout = output; + + memset(expect, 0xff, sizeof(expect)); + memset(output, 0xff, sizeof(expect)); + + for (int i = 0; i < BUFSIZE; i++) { + if (invert) { + asm("r4 = #0\n\t" + "v4 = vsplat(r4)\n\t" + "v5 = vmem(%0 + #0)\n\t" + "q0 = vcmp.eq(v4.w, v5.w)\n\t" + "v5 = vmem(%1)\n\t" + "if (!q0) vmem(%2) = v5\n\t" /* Inverted test */ + : : "r"(pmask), "r"(p0), "r"(pout) + : "r4", "v4", "v5", "q0", "memory"); + } else { + asm("r4 = #0\n\t" + "v4 = vsplat(r4)\n\t" + "v5 = vmem(%0 + #0)\n\t" + "q0 = vcmp.eq(v4.w, v5.w)\n\t" + "v5 = vmem(%1)\n\t" + "if (q0) vmem(%2) = v5\n\t" /* Non-inverted test */ + : : "r"(pmask), "r"(p0), "r"(pout) + : "r4", "v4", "v5", "q0", "memory"); + } + p0 += sizeof(MMVector); + pmask += sizeof(MMVector); + pout += sizeof(MMVector); + + for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) { + if (invert) { + if (i + j % MASKMOD != 0) { + expect[i].w[j] = buffer0[i].w[j]; + } + } else { + if (i + j % MASKMOD == 0) { + expect[i].w[j] = buffer0[i].w[j]; + } + } + } + } + + check_output_w(__LINE__, BUFSIZE); +} + +static void test_new_value_store(void) +{ + void *p0 = buffer0; + void *pout = output; + + asm("{\n\t" + " v2 = vmem(%0 + #0)\n\t" + " vmem(%1 + #0) = v2.new\n\t" + "}\n\t" + : : "r"(p0), "r"(pout) : "v2", "memory"); + + expect[0] = buffer0[0]; + + check_output_w(__LINE__, 1); +} + +static void test_max_temps() +{ + void *p0 = buffer0; + void *pout = output; + + asm("v0 = vmem(%0 + #0)\n\t" + "v1 = vmem(%0 + #1)\n\t" + "v2 = vmem(%0 + #2)\n\t" + "v3 = vmem(%0 + #3)\n\t" + "v4 = vmem(%0 + #4)\n\t" + "{\n\t" + " v1:0.w = vadd(v3:2.w, v1:0.w)\n\t" + " v2.b = vshuffe(v3.b, v2.b)\n\t" + " v3.w = vadd(v1.w, v4.w)\n\t" + " v4.tmp = vmem(%0 + #5)\n\t" + "}\n\t" + "vmem(%1 + #0) = v0\n\t" + "vmem(%1 + #1) = v1\n\t" + "vmem(%1 + #2) = v2\n\t" + "vmem(%1 + #3) = v3\n\t" + "vmem(%1 + #4) = v4\n\t" + : : "r"(p0), "r"(pout) : "memory"); + + /* The first two vectors come from the vadd-pair instruction */ + for (int i = 0; i < MAX_VEC_SIZE_BYTES / 4; i++) { + expect[0].w[i] = buffer0[0].w[i] + buffer0[2].w[i]; + expect[1].w[i] = buffer0[1].w[i] + buffer0[3].w[i]; + } + /* The third vector comes from the vshuffe instruction */ + for (int i = 0; i < MAX_VEC_SIZE_BYTES / 2; i++) { + expect[2].uh[i] = (buffer0[2].uh[i] & 0xff) | + (buffer0[3].uh[i] & 0xff) << 8; + } + /* The fourth vector comes from the vadd-single instruction */ + for (int i = 0; i < MAX_VEC_SIZE_BYTES / 4; i++) { + expect[3].w[i] = buffer0[1].w[i] + buffer0[5].w[i]; + } + /* + * The fifth vector comes from the load to v4 + * make sure the .tmp is dropped + */ + expect[4] = buffer0[4]; + + check_output_b(__LINE__, 5); +} + +#define VEC_OP1(ASM, EL, IN, OUT) \ + asm("v2 = vmem(%0 + #0)\n\t" \ + "v2" #EL " = " #ASM "(v2" #EL ")\n\t" \ + "vmem(%1 + #0) = v2\n\t" \ + : : "r"(IN), "r"(OUT) : "v2", "memory") + +#define VEC_OP2(ASM, EL, IN0, IN1, OUT) \ + asm("v2 = vmem(%0 + #0)\n\t" \ + "v3 = vmem(%1 + #0)\n\t" \ + "v2" #EL " = " #ASM "(v2" #EL ", v3" #EL ")\n\t" \ + "vmem(%2 + #0) = v2\n\t" \ + : : "r"(IN0), "r"(IN1), "r"(OUT) : "v2", "v3", "memory") + +#define TEST_VEC_OP1(NAME, ASM, EL, FIELD, FIELDSZ, OP) \ +static void test_##NAME(void) \ +{ \ + void *pin = buffer0; \ + void *pout = output; \ + for (int i = 0; i < BUFSIZE; i++) { \ + VEC_OP1(ASM, EL, pin, pout); \ + pin += sizeof(MMVector); \ + pout += sizeof(MMVector); \ + } \ + for (int i = 0; i < BUFSIZE; i++) { \ + for (int j = 0; j < MAX_VEC_SIZE_BYTES / FIELDSZ; j++) { \ + expect[i].FIELD[j] = OP buffer0[i].FIELD[j]; \ + } \ + } \ + check_output_##FIELD(__LINE__, BUFSIZE); \ +} + +#define TEST_VEC_OP2(NAME, ASM, EL, FIELD, FIELDSZ, OP) \ +static void test_##NAME(void) \ +{ \ + void *p0 = buffer0; \ + void *p1 = buffer1; \ + void *pout = output; \ + for (int i = 0; i < BUFSIZE; i++) { \ + VEC_OP2(ASM, EL, p0, p1, pout); \ + p0 += sizeof(MMVector); \ + p1 += sizeof(MMVector); \ + pout += sizeof(MMVector); \ + } \ + for (int i = 0; i < BUFSIZE; i++) { \ + for (int j = 0; j < MAX_VEC_SIZE_BYTES / FIELDSZ; j++) { \ + expect[i].FIELD[j] = buffer0[i].FIELD[j] OP buffer1[i].FIELD[j]; \ + } \ + } \ + check_output_##FIELD(__LINE__, BUFSIZE); \ +} + +#define THRESHOLD 31 + +#define PRED_OP2(ASM, IN0, IN1, OUT, INV) \ + asm("r4 = #%3\n\t" \ + "v1.b = vsplat(r4)\n\t" \ + "v2 = vmem(%0 + #0)\n\t" \ + "q0 = vcmp.gt(v2.b, v1.b)\n\t" \ + "v3 = vmem(%1 + #0)\n\t" \ + "q1 = vcmp.gt(v3.b, v1.b)\n\t" \ + "q2 = " #ASM "(q0, " INV "q1)\n\t" \ + "r4 = #0xff\n\t" \ + "v1.b = vsplat(r4)\n\t" \ + "if (q2) vmem(%2 + #0) = v1\n\t" \ + : : "r"(IN0), "r"(IN1), "r"(OUT), "i"(THRESHOLD) \ + : "r4", "v1", "v2", "v3", "q0", "q1", "q2", "memory") + +#define TEST_PRED_OP2(NAME, ASM, OP, INV) \ +static void test_##NAME(bool invert) \ +{ \ + void *p0 = buffer0; \ + void *p1 = buffer1; \ + void *pout = output; \ + memset(output, 0, sizeof(expect)); \ + for (int i = 0; i < BUFSIZE; i++) { \ + PRED_OP2(ASM, p0, p1, pout, INV); \ + p0 += sizeof(MMVector); \ + p1 += sizeof(MMVector); \ + pout += sizeof(MMVector); \ + } \ + for (int i = 0; i < BUFSIZE; i++) { \ + for (int j = 0; j < MAX_VEC_SIZE_BYTES; j++) { \ + bool p0 = (buffer0[i].b[j] > THRESHOLD); \ + bool p1 = (buffer1[i].b[j] > THRESHOLD); \ + if (invert) { \ + expect[i].b[j] = (p0 OP !p1) ? 0xff : 0x00; \ + } else { \ + expect[i].b[j] = (p0 OP p1) ? 0xff : 0x00; \ + } \ + } \ + } \ + check_output_b(__LINE__, BUFSIZE); \ +} + +TEST_VEC_OP2(vadd_w, vadd, .w, w, 4, +) +TEST_VEC_OP2(vadd_h, vadd, .h, h, 2, +) +TEST_VEC_OP2(vadd_b, vadd, .b, b, 1, +) +TEST_VEC_OP2(vsub_w, vsub, .w, w, 4, -) +TEST_VEC_OP2(vsub_h, vsub, .h, h, 2, -) +TEST_VEC_OP2(vsub_b, vsub, .b, b, 1, -) +TEST_VEC_OP2(vxor, vxor, , d, 8, ^) +TEST_VEC_OP2(vand, vand, , d, 8, &) +TEST_VEC_OP2(vor, vor, , d, 8, |) +TEST_VEC_OP1(vnot, vnot, , d, 8, ~) + +TEST_PRED_OP2(pred_or, or, |, "") +TEST_PRED_OP2(pred_or_n, or, |, "!") +TEST_PRED_OP2(pred_and, and, &, "") +TEST_PRED_OP2(pred_and_n, and, &, "!") +TEST_PRED_OP2(pred_xor, xor, ^, "") + +int main() +{ + init_buffers(); + + test_load_tmp(); + test_load_cur(); + test_load_aligned(); + test_load_unaligned(); + test_store_aligned(); + test_store_unaligned(); + test_masked_store(false); + test_masked_store(true); + test_new_value_store(); + test_max_temps(); + + test_vadd_w(); + test_vadd_h(); + test_vadd_b(); + test_vsub_w(); + test_vsub_h(); + test_vsub_b(); + test_vxor(); + test_vand(); + test_vor(); + test_vnot(); + + test_pred_or(false); + test_pred_or_n(true); + test_pred_and(false); + test_pred_and_n(true); + test_pred_xor(false); + + puts(err ? "FAIL" : "PASS"); + return err ? 1 : 0; +} diff --git a/tests/tcg/hexagon/load_align.c b/tests/tcg/hexagon/load_align.c new file mode 100644 index 000000000..12fc9cbd8 --- /dev/null +++ b/tests/tcg/hexagon/load_align.c @@ -0,0 +1,415 @@ +/* + * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Test load align instructions + * + * Example + * r1:0 = memh_fifo(r1+#0) + * loads a half word from memory, shifts the destination register + * right by one half word and inserts the loaded value into the high + * half word of the destination. + * + * There are 8 addressing modes and byte and half word variants, for a + * total of 16 instructions to test + */ + +#include <stdio.h> +#include <string.h> + +int err; + +char buf[16] __attribute__((aligned(1 << 16))); + +void init_buf(void) +{ + int i; + for (i = 0; i < 16; i++) { + buf[i] = i + 1; + } +} + +void __check(int line, long long result, long long expect) +{ + if (result != expect) { + printf("ERROR at line %d: 0x%016llx != 0x%016llx\n", + line, result, expect); + err++; + } +} + +#define check(RES, EXP) __check(__LINE__, RES, EXP) + +void __checkp(int line, void *p, void *expect) +{ + if (p != expect) { + printf("ERROR at line %d: 0x%p != 0x%p\n", line, p, expect); + err++; + } +} + +#define checkp(RES, EXP) __checkp(__LINE__, RES, EXP) + +/* + **************************************************************************** + * _io addressing mode (addr + offset) + */ +#define LOAD_io(SZ, RES, ADDR, OFF) \ + __asm__( \ + "%0 = mem" #SZ "_fifo(%1+#" #OFF ")\n\t" \ + : "+r"(RES) \ + : "r"(ADDR)) +#define LOAD_io_b(RES, ADDR, OFF) \ + LOAD_io(b, RES, ADDR, OFF) +#define LOAD_io_h(RES, ADDR, OFF) \ + LOAD_io(h, RES, ADDR, OFF) + +#define TEST_io(NAME, SZ, SIZE, EXP1, EXP2, EXP3, EXP4) \ +void test_##NAME(void) \ +{ \ + long long result = ~0LL; \ + LOAD_io_##SZ(result, buf, 0 * (SIZE)); \ + check(result, (EXP1)); \ + LOAD_io_##SZ(result, buf, 1 * (SIZE)); \ + check(result, (EXP2)); \ + LOAD_io_##SZ(result, buf, 2 * (SIZE)); \ + check(result, (EXP3)); \ + LOAD_io_##SZ(result, buf, 3 * (SIZE)); \ + check(result, (EXP4)); \ +} + +TEST_io(loadalignb_io, b, 1, + 0x01ffffffffffffffLL, 0x0201ffffffffffffLL, + 0x030201ffffffffffLL, 0x04030201ffffffffLL) +TEST_io(loadalignh_io, h, 2, + 0x0201ffffffffffffLL, 0x04030201ffffffffLL, + 0x060504030201ffffLL, 0x0807060504030201LL) + +/* + **************************************************************************** + * _ur addressing mode (index << offset + base) + */ +#define LOAD_ur(SZ, RES, SHIFT, IDX) \ + __asm__( \ + "%0 = mem" #SZ "_fifo(%1<<#" #SHIFT " + ##buf)\n\t" \ + : "+r"(RES) \ + : "r"(IDX)) +#define LOAD_ur_b(RES, SHIFT, IDX) \ + LOAD_ur(b, RES, SHIFT, IDX) +#define LOAD_ur_h(RES, SHIFT, IDX) \ + LOAD_ur(h, RES, SHIFT, IDX) + +#define TEST_ur(NAME, SZ, SHIFT, RES1, RES2, RES3, RES4) \ +void test_##NAME(void) \ +{ \ + long long result = ~0LL; \ + LOAD_ur_##SZ(result, (SHIFT), 0); \ + check(result, (RES1)); \ + LOAD_ur_##SZ(result, (SHIFT), 1); \ + check(result, (RES2)); \ + LOAD_ur_##SZ(result, (SHIFT), 2); \ + check(result, (RES3)); \ + LOAD_ur_##SZ(result, (SHIFT), 3); \ + check(result, (RES4)); \ +} + +TEST_ur(loadalignb_ur, b, 1, + 0x01ffffffffffffffLL, 0x0301ffffffffffffLL, + 0x050301ffffffffffLL, 0x07050301ffffffffLL) +TEST_ur(loadalignh_ur, h, 1, + 0x0201ffffffffffffLL, 0x04030201ffffffffLL, + 0x060504030201ffffLL, 0x0807060504030201LL) + +/* + **************************************************************************** + * _ap addressing mode (addr = base) + */ +#define LOAD_ap(SZ, RES, PTR, ADDR) \ + __asm__( \ + "%0 = mem" #SZ "_fifo(%1 = ##" #ADDR ")\n\t" \ + : "+r"(RES), "=r"(PTR)) +#define LOAD_ap_b(RES, PTR, ADDR) \ + LOAD_ap(b, RES, PTR, ADDR) +#define LOAD_ap_h(RES, PTR, ADDR) \ + LOAD_ap(h, RES, PTR, ADDR) + +#define TEST_ap(NAME, SZ, SIZE, RES1, RES2, RES3, RES4) \ +void test_##NAME(void) \ +{ \ + long long result = ~0LL; \ + void *ptr; \ + LOAD_ap_##SZ(result, ptr, (buf + 0 * (SIZE))); \ + check(result, (RES1)); \ + checkp(ptr, &buf[0 * (SIZE)]); \ + LOAD_ap_##SZ(result, ptr, (buf + 1 * (SIZE))); \ + check(result, (RES2)); \ + checkp(ptr, &buf[1 * (SIZE)]); \ + LOAD_ap_##SZ(result, ptr, (buf + 2 * (SIZE))); \ + check(result, (RES3)); \ + checkp(ptr, &buf[2 * (SIZE)]); \ + LOAD_ap_##SZ(result, ptr, (buf + 3 * (SIZE))); \ + check(result, (RES4)); \ + checkp(ptr, &buf[3 * (SIZE)]); \ +} + +TEST_ap(loadalignb_ap, b, 1, + 0x01ffffffffffffffLL, 0x0201ffffffffffffLL, + 0x030201ffffffffffLL, 0x04030201ffffffffLL) +TEST_ap(loadalignh_ap, h, 2, + 0x0201ffffffffffffLL, 0x04030201ffffffffLL, + 0x060504030201ffffLL, 0x0807060504030201LL) + +/* + **************************************************************************** + * _rp addressing mode (addr ++ modifer-reg) + */ +#define LOAD_pr(SZ, RES, PTR, INC) \ + __asm__( \ + "m0 = %2\n\t" \ + "%0 = mem" #SZ "_fifo(%1++m0)\n\t" \ + : "+r"(RES), "+r"(PTR) \ + : "r"(INC) \ + : "m0") +#define LOAD_pr_b(RES, PTR, INC) \ + LOAD_pr(b, RES, PTR, INC) +#define LOAD_pr_h(RES, PTR, INC) \ + LOAD_pr(h, RES, PTR, INC) + +#define TEST_pr(NAME, SZ, SIZE, RES1, RES2, RES3, RES4) \ +void test_##NAME(void) \ +{ \ + long long result = ~0LL; \ + void *ptr = buf; \ + LOAD_pr_##SZ(result, ptr, (SIZE)); \ + check(result, (RES1)); \ + checkp(ptr, &buf[1 * (SIZE)]); \ + LOAD_pr_##SZ(result, ptr, (SIZE)); \ + check(result, (RES2)); \ + checkp(ptr, &buf[2 * (SIZE)]); \ + LOAD_pr_##SZ(result, ptr, (SIZE)); \ + check(result, (RES3)); \ + checkp(ptr, &buf[3 * (SIZE)]); \ + LOAD_pr_##SZ(result, ptr, (SIZE)); \ + check(result, (RES4)); \ + checkp(ptr, &buf[4 * (SIZE)]); \ +} + +TEST_pr(loadalignb_pr, b, 1, + 0x01ffffffffffffffLL, 0x0201ffffffffffffLL, + 0x030201ffffffffffLL, 0x04030201ffffffffLL) +TEST_pr(loadalignh_pr, h, 2, + 0x0201ffffffffffffLL, 0x04030201ffffffffLL, + 0x060504030201ffffLL, 0x0807060504030201LL) + +/* + **************************************************************************** + * _pbr addressing mode (addr ++ modifer-reg:brev) + */ +#define LOAD_pbr(SZ, RES, PTR) \ + __asm__( \ + "r4 = #(1 << (16 - 3))\n\t" \ + "m0 = r4\n\t" \ + "%0 = mem" #SZ "_fifo(%1++m0:brev)\n\t" \ + : "+r"(RES), "+r"(PTR) \ + : \ + : "r4", "m0") +#define LOAD_pbr_b(RES, PTR) \ + LOAD_pbr(b, RES, PTR) +#define LOAD_pbr_h(RES, PTR) \ + LOAD_pbr(h, RES, PTR) + +#define TEST_pbr(NAME, SZ, RES1, RES2, RES3, RES4) \ +void test_##NAME(void) \ +{ \ + long long result = ~0LL; \ + void *ptr = buf; \ + LOAD_pbr_##SZ(result, ptr); \ + check(result, (RES1)); \ + LOAD_pbr_##SZ(result, ptr); \ + check(result, (RES2)); \ + LOAD_pbr_##SZ(result, ptr); \ + check(result, (RES3)); \ + LOAD_pbr_##SZ(result, ptr); \ + check(result, (RES4)); \ +} + +TEST_pbr(loadalignb_pbr, b, + 0x01ffffffffffffffLL, 0x0501ffffffffffffLL, + 0x030501ffffffffffLL, 0x07030501ffffffffLL) +TEST_pbr(loadalignh_pbr, h, + 0x0201ffffffffffffLL, 0x06050201ffffffffLL, + 0x040306050201ffffLL, 0x0807040306050201LL) + +/* + **************************************************************************** + * _pi addressing mode (addr ++ inc) + */ +#define LOAD_pi(SZ, RES, PTR, INC) \ + __asm__( \ + "%0 = mem" #SZ "_fifo(%1++#" #INC ")\n\t" \ + : "+r"(RES), "+r"(PTR)) +#define LOAD_pi_b(RES, PTR, INC) \ + LOAD_pi(b, RES, PTR, INC) +#define LOAD_pi_h(RES, PTR, INC) \ + LOAD_pi(h, RES, PTR, INC) + +#define TEST_pi(NAME, SZ, INC, RES1, RES2, RES3, RES4) \ +void test_##NAME(void) \ +{ \ + long long result = ~0LL; \ + void *ptr = buf; \ + LOAD_pi_##SZ(result, ptr, (INC)); \ + check(result, (RES1)); \ + checkp(ptr, &buf[1 * (INC)]); \ + LOAD_pi_##SZ(result, ptr, (INC)); \ + check(result, (RES2)); \ + checkp(ptr, &buf[2 * (INC)]); \ + LOAD_pi_##SZ(result, ptr, (INC)); \ + check(result, (RES3)); \ + checkp(ptr, &buf[3 * (INC)]); \ + LOAD_pi_##SZ(result, ptr, (INC)); \ + check(result, (RES4)); \ + checkp(ptr, &buf[4 * (INC)]); \ +} + +TEST_pi(loadalignb_pi, b, 1, + 0x01ffffffffffffffLL, 0x0201ffffffffffffLL, + 0x030201ffffffffffLL, 0x04030201ffffffffLL) +TEST_pi(loadalignh_pi, h, 2, + 0x0201ffffffffffffLL, 0x04030201ffffffffLL, + 0x060504030201ffffLL, 0x0807060504030201LL) + +/* + **************************************************************************** + * _pci addressing mode (addr ++ inc:circ) + */ +#define LOAD_pci(SZ, RES, PTR, START, LEN, INC) \ + __asm__( \ + "r4 = %3\n\t" \ + "m0 = r4\n\t" \ + "cs0 = %2\n\t" \ + "%0 = mem" #SZ "_fifo(%1++#" #INC ":circ(m0))\n\t" \ + : "+r"(RES), "+r"(PTR) \ + : "r"(START), "r"(LEN) \ + : "r4", "m0", "cs0") +#define LOAD_pci_b(RES, PTR, START, LEN, INC) \ + LOAD_pci(b, RES, PTR, START, LEN, INC) +#define LOAD_pci_h(RES, PTR, START, LEN, INC) \ + LOAD_pci(h, RES, PTR, START, LEN, INC) + +#define TEST_pci(NAME, SZ, LEN, INC, RES1, RES2, RES3, RES4) \ +void test_##NAME(void) \ +{ \ + long long result = ~0LL; \ + void *ptr = buf; \ + LOAD_pci_##SZ(result, ptr, buf, (LEN), (INC)); \ + check(result, (RES1)); \ + checkp(ptr, &buf[(1 * (INC)) % (LEN)]); \ + LOAD_pci_##SZ(result, ptr, buf, (LEN), (INC)); \ + check(result, (RES2)); \ + checkp(ptr, &buf[(2 * (INC)) % (LEN)]); \ + LOAD_pci_##SZ(result, ptr, buf, (LEN), (INC)); \ + check(result, (RES3)); \ + checkp(ptr, &buf[(3 * (INC)) % (LEN)]); \ + LOAD_pci_##SZ(result, ptr, buf, (LEN), (INC)); \ + check(result, (RES4)); \ + checkp(ptr, &buf[(4 * (INC)) % (LEN)]); \ +} + +TEST_pci(loadalignb_pci, b, 2, 1, + 0x01ffffffffffffffLL, 0x0201ffffffffffffLL, + 0x010201ffffffffffLL, 0x02010201ffffffffLL) +TEST_pci(loadalignh_pci, h, 4, 2, + 0x0201ffffffffffffLL, 0x04030201ffffffffLL, + 0x020104030201ffffLL, 0x0403020104030201LL) + +/* + **************************************************************************** + * _pcr addressing mode (addr ++ I:circ(modifier-reg)) + */ +#define LOAD_pcr(SZ, RES, PTR, START, LEN, INC) \ + __asm__( \ + "r4 = %2\n\t" \ + "m1 = r4\n\t" \ + "cs1 = %3\n\t" \ + "%0 = mem" #SZ "_fifo(%1++I:circ(m1))\n\t" \ + : "+r"(RES), "+r"(PTR) \ + : "r"((((INC) & 0x7f) << 17) | ((LEN) & 0x1ffff)), \ + "r"(START) \ + : "r4", "m1", "cs1") +#define LOAD_pcr_b(RES, PTR, START, LEN, INC) \ + LOAD_pcr(b, RES, PTR, START, LEN, INC) +#define LOAD_pcr_h(RES, PTR, START, LEN, INC) \ + LOAD_pcr(h, RES, PTR, START, LEN, INC) + +#define TEST_pcr(NAME, SZ, SIZE, LEN, INC, RES1, RES2, RES3, RES4) \ +void test_##NAME(void) \ +{ \ + long long result = ~0LL; \ + void *ptr = buf; \ + LOAD_pcr_##SZ(result, ptr, buf, (LEN), (INC)); \ + check(result, (RES1)); \ + checkp(ptr, &buf[(1 * (INC) * (SIZE)) % (LEN)]); \ + LOAD_pcr_##SZ(result, ptr, buf, (LEN), (INC)); \ + check(result, (RES2)); \ + checkp(ptr, &buf[(2 * (INC) * (SIZE)) % (LEN)]); \ + LOAD_pcr_##SZ(result, ptr, buf, (LEN), (INC)); \ + check(result, (RES3)); \ + checkp(ptr, &buf[(3 * (INC) * (SIZE)) % (LEN)]); \ + LOAD_pcr_##SZ(result, ptr, buf, (LEN), (INC)); \ + check(result, (RES4)); \ + checkp(ptr, &buf[(4 * (INC) * (SIZE)) % (LEN)]); \ +} + +TEST_pcr(loadalignb_pcr, b, 1, 2, 1, + 0x01ffffffffffffffLL, 0x0201ffffffffffffLL, + 0x010201ffffffffffLL, 0x02010201ffffffffLL) +TEST_pcr(loadalignh_pcr, h, 2, 4, 1, + 0x0201ffffffffffffLL, 0x04030201ffffffffLL, + 0x020104030201ffffLL, 0x0403020104030201LL) + +int main() +{ + init_buf(); + + test_loadalignb_io(); + test_loadalignh_io(); + + test_loadalignb_ur(); + test_loadalignh_ur(); + + test_loadalignb_ap(); + test_loadalignh_ap(); + + test_loadalignb_pr(); + test_loadalignh_pr(); + + test_loadalignb_pbr(); + test_loadalignh_pbr(); + + test_loadalignb_pi(); + test_loadalignh_pi(); + + test_loadalignb_pci(); + test_loadalignh_pci(); + + test_loadalignb_pcr(); + test_loadalignh_pcr(); + + puts(err ? "FAIL" : "PASS"); + return err ? 1 : 0; +} diff --git a/tests/tcg/hexagon/load_unpack.c b/tests/tcg/hexagon/load_unpack.c new file mode 100644 index 000000000..3575a37a2 --- /dev/null +++ b/tests/tcg/hexagon/load_unpack.c @@ -0,0 +1,474 @@ +/* + * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Test load unpack instructions + * + * Example + * r0 = memubh(r1+#0) + * loads a half word from memory and zero-extends the 2 bytes to form a word + * + * For each addressing mode, there are 4 tests + * bzw2 unsigned 2 elements + * bsw2 signed 2 elements + * bzw4 unsigned 4 elements + * bsw4 signed 4 elements + * There are 8 addressing modes, for a total of 32 instructions to test + */ + +#include <stdio.h> +#include <string.h> + +int err; + +char buf[16] __attribute__((aligned(1 << 16))); + +void init_buf(void) +{ + int i; + for (i = 0; i < 16; i++) { + int sign = i % 2 == 0 ? 0x80 : 0; + buf[i] = sign | (i + 1); + } +} + +void __check(int line, long long result, long long expect) +{ + if (result != expect) { + printf("ERROR at line %d: 0x%08llx != 0x%08llx\n", + line, result, expect); + err++; + } +} + +#define check(RES, EXP) __check(__LINE__, RES, EXP) + +void __checkp(int line, void *p, void *expect) +{ + if (p != expect) { + printf("ERROR at line %d: 0x%p != 0x%p\n", line, p, expect); + err++; + } +} + +#define checkp(RES, EXP) __checkp(__LINE__, RES, EXP) + +/* + **************************************************************************** + * _io addressing mode (addr + offset) + */ +#define BxW_LOAD_io(SZ, RES, ADDR, OFF) \ + __asm__( \ + "%0 = mem" #SZ "(%1+#" #OFF ")\n\t" \ + : "=r"(RES) \ + : "r"(ADDR)) +#define BxW_LOAD_io_Z(RES, ADDR, OFF) \ + BxW_LOAD_io(ubh, RES, ADDR, OFF) +#define BxW_LOAD_io_S(RES, ADDR, OFF) \ + BxW_LOAD_io(bh, RES, ADDR, OFF) + +#define TEST_io(NAME, TYPE, SIGN, SIZE, EXT, EXP1, EXP2, EXP3, EXP4) \ +void test_##NAME(void) \ +{ \ + TYPE result; \ + init_buf(); \ + BxW_LOAD_io_##SIGN(result, buf, 0 * (SIZE)); \ + check(result, (EXP1) | (EXT)); \ + BxW_LOAD_io_##SIGN(result, buf, 1 * (SIZE)); \ + check(result, (EXP2) | (EXT)); \ + BxW_LOAD_io_##SIGN(result, buf, 2 * (SIZE)); \ + check(result, (EXP3) | (EXT)); \ + BxW_LOAD_io_##SIGN(result, buf, 3 * (SIZE)); \ + check(result, (EXP4) | (EXT)); \ +} + + +TEST_io(loadbzw2_io, int, Z, 2, 0x00000000, + 0x00020081, 0x00040083, 0x00060085, 0x00080087) +TEST_io(loadbsw2_io, int, S, 2, 0x0000ff00, + 0x00020081, 0x00040083, 0x00060085, 0x00080087) +TEST_io(loadbzw4_io, long long, Z, 4, 0x0000000000000000LL, + 0x0004008300020081LL, 0x0008008700060085LL, + 0x000c008b000a0089LL, 0x0010008f000e008dLL) +TEST_io(loadbsw4_io, long long, S, 4, 0x0000ff000000ff00LL, + 0x0004008300020081LL, 0x0008008700060085LL, + 0x000c008b000a0089LL, 0x0010008f000e008dLL) + +/* + **************************************************************************** + * _ur addressing mode (index << offset + base) + */ +#define BxW_LOAD_ur(SZ, RES, SHIFT, IDX) \ + __asm__( \ + "%0 = mem" #SZ "(%1<<#" #SHIFT " + ##buf)\n\t" \ + : "=r"(RES) \ + : "r"(IDX)) +#define BxW_LOAD_ur_Z(RES, SHIFT, IDX) \ + BxW_LOAD_ur(ubh, RES, SHIFT, IDX) +#define BxW_LOAD_ur_S(RES, SHIFT, IDX) \ + BxW_LOAD_ur(bh, RES, SHIFT, IDX) + +#define TEST_ur(NAME, TYPE, SIGN, SHIFT, EXT, RES1, RES2, RES3, RES4) \ +void test_##NAME(void) \ +{ \ + TYPE result; \ + init_buf(); \ + BxW_LOAD_ur_##SIGN(result, (SHIFT), 0); \ + check(result, (RES1) | (EXT)); \ + BxW_LOAD_ur_##SIGN(result, (SHIFT), 1); \ + check(result, (RES2) | (EXT)); \ + BxW_LOAD_ur_##SIGN(result, (SHIFT), 2); \ + check(result, (RES3) | (EXT)); \ + BxW_LOAD_ur_##SIGN(result, (SHIFT), 3); \ + check(result, (RES4) | (EXT)); \ +} \ + +TEST_ur(loadbzw2_ur, int, Z, 1, 0x00000000, + 0x00020081, 0x00040083, 0x00060085, 0x00080087) +TEST_ur(loadbsw2_ur, int, S, 1, 0x0000ff00, + 0x00020081, 0x00040083, 0x00060085, 0x00080087) +TEST_ur(loadbzw4_ur, long long, Z, 2, 0x0000000000000000LL, + 0x0004008300020081LL, 0x0008008700060085LL, + 0x000c008b000a0089LL, 0x0010008f000e008dLL) +TEST_ur(loadbsw4_ur, long long, S, 2, 0x0000ff000000ff00LL, + 0x0004008300020081LL, 0x0008008700060085LL, + 0x000c008b000a0089LL, 0x0010008f000e008dLL) + +/* + **************************************************************************** + * _ap addressing mode (addr = base) + */ +#define BxW_LOAD_ap(SZ, RES, PTR, ADDR) \ + __asm__( \ + "%0 = mem" #SZ "(%1 = ##" #ADDR ")\n\t" \ + : "=r"(RES), "=r"(PTR)) +#define BxW_LOAD_ap_Z(RES, PTR, ADDR) \ + BxW_LOAD_ap(ubh, RES, PTR, ADDR) +#define BxW_LOAD_ap_S(RES, PTR, ADDR) \ + BxW_LOAD_ap(bh, RES, PTR, ADDR) + +#define TEST_ap(NAME, TYPE, SIGN, SIZE, EXT, RES1, RES2, RES3, RES4) \ +void test_##NAME(void) \ +{ \ + TYPE result; \ + void *ptr; \ + init_buf(); \ + BxW_LOAD_ap_##SIGN(result, ptr, (buf + 0 * (SIZE))); \ + check(result, (RES1) | (EXT)); \ + checkp(ptr, &buf[0 * (SIZE)]); \ + BxW_LOAD_ap_##SIGN(result, ptr, (buf + 1 * (SIZE))); \ + check(result, (RES2) | (EXT)); \ + checkp(ptr, &buf[1 * (SIZE)]); \ + BxW_LOAD_ap_##SIGN(result, ptr, (buf + 2 * (SIZE))); \ + check(result, (RES3) | (EXT)); \ + checkp(ptr, &buf[2 * (SIZE)]); \ + BxW_LOAD_ap_##SIGN(result, ptr, (buf + 3 * (SIZE))); \ + check(result, (RES4) | (EXT)); \ + checkp(ptr, &buf[3 * (SIZE)]); \ +} + +TEST_ap(loadbzw2_ap, int, Z, 2, 0x00000000, + 0x00020081, 0x00040083, 0x00060085, 0x00080087) +TEST_ap(loadbsw2_ap, int, S, 2, 0x0000ff00, + 0x00020081, 0x00040083, 0x00060085, 0x00080087) +TEST_ap(loadbzw4_ap, long long, Z, 4, 0x0000000000000000LL, + 0x0004008300020081LL, 0x0008008700060085LL, + 0x000c008b000a0089LL, 0x0010008f000e008dLL) +TEST_ap(loadbsw4_ap, long long, S, 4, 0x0000ff000000ff00LL, + 0x0004008300020081LL, 0x0008008700060085LL, + 0x000c008b000a0089LL, 0x0010008f000e008dLL) + +/* + **************************************************************************** + * _rp addressing mode (addr ++ modifer-reg) + */ +#define BxW_LOAD_pr(SZ, RES, PTR, INC) \ + __asm__( \ + "m0 = %2\n\t" \ + "%0 = mem" #SZ "(%1++m0)\n\t" \ + : "=r"(RES), "+r"(PTR) \ + : "r"(INC) \ + : "m0") +#define BxW_LOAD_pr_Z(RES, PTR, INC) \ + BxW_LOAD_pr(ubh, RES, PTR, INC) +#define BxW_LOAD_pr_S(RES, PTR, INC) \ + BxW_LOAD_pr(bh, RES, PTR, INC) + +#define TEST_pr(NAME, TYPE, SIGN, SIZE, EXT, RES1, RES2, RES3, RES4) \ +void test_##NAME(void) \ +{ \ + TYPE result; \ + void *ptr = buf; \ + init_buf(); \ + BxW_LOAD_pr_##SIGN(result, ptr, (SIZE)); \ + check(result, (RES1) | (EXT)); \ + checkp(ptr, &buf[1 * (SIZE)]); \ + BxW_LOAD_pr_##SIGN(result, ptr, (SIZE)); \ + check(result, (RES2) | (EXT)); \ + checkp(ptr, &buf[2 * (SIZE)]); \ + BxW_LOAD_pr_##SIGN(result, ptr, (SIZE)); \ + check(result, (RES3) | (EXT)); \ + checkp(ptr, &buf[3 * (SIZE)]); \ + BxW_LOAD_pr_##SIGN(result, ptr, (SIZE)); \ + check(result, (RES4) | (EXT)); \ + checkp(ptr, &buf[4 * (SIZE)]); \ +} + +TEST_pr(loadbzw2_pr, int, Z, 2, 0x00000000, + 0x00020081, 0x0040083, 0x00060085, 0x00080087) +TEST_pr(loadbsw2_pr, int, S, 2, 0x0000ff00, + 0x00020081, 0x0040083, 0x00060085, 0x00080087) +TEST_pr(loadbzw4_pr, long long, Z, 4, 0x0000000000000000LL, + 0x0004008300020081LL, 0x0008008700060085LL, + 0x000c008b000a0089LL, 0x0010008f000e008dLL) +TEST_pr(loadbsw4_pr, long long, S, 4, 0x0000ff000000ff00LL, + 0x0004008300020081LL, 0x0008008700060085LL, + 0x000c008b000a0089LL, 0x0010008f000e008dLL) + +/* + **************************************************************************** + * _pbr addressing mode (addr ++ modifer-reg:brev) + */ +#define BxW_LOAD_pbr(SZ, RES, PTR) \ + __asm__( \ + "r4 = #(1 << (16 - 3))\n\t" \ + "m0 = r4\n\t" \ + "%0 = mem" #SZ "(%1++m0:brev)\n\t" \ + : "=r"(RES), "+r"(PTR) \ + : \ + : "r4", "m0") +#define BxW_LOAD_pbr_Z(RES, PTR) \ + BxW_LOAD_pbr(ubh, RES, PTR) +#define BxW_LOAD_pbr_S(RES, PTR) \ + BxW_LOAD_pbr(bh, RES, PTR) + +#define TEST_pbr(NAME, TYPE, SIGN, EXT, RES1, RES2, RES3, RES4) \ +void test_##NAME(void) \ +{ \ + TYPE result; \ + void *ptr = buf; \ + init_buf(); \ + BxW_LOAD_pbr_##SIGN(result, ptr); \ + check(result, (RES1) | (EXT)); \ + BxW_LOAD_pbr_##SIGN(result, ptr); \ + check(result, (RES2) | (EXT)); \ + BxW_LOAD_pbr_##SIGN(result, ptr); \ + check(result, (RES3) | (EXT)); \ + BxW_LOAD_pbr_##SIGN(result, ptr); \ + check(result, (RES4) | (EXT)); \ +} + +TEST_pbr(loadbzw2_pbr, int, Z, 0x00000000, + 0x00020081, 0x00060085, 0x00040083, 0x00080087) +TEST_pbr(loadbsw2_pbr, int, S, 0x0000ff00, + 0x00020081, 0x00060085, 0x00040083, 0x00080087) +TEST_pbr(loadbzw4_pbr, long long, Z, 0x0000000000000000LL, + 0x0004008300020081LL, 0x0008008700060085LL, + 0x0006008500040083LL, 0x000a008900080087LL) +TEST_pbr(loadbsw4_pbr, long long, S, 0x0000ff000000ff00LL, + 0x0004008300020081LL, 0x0008008700060085LL, + 0x0006008500040083LL, 0x000a008900080087LL) + +/* + **************************************************************************** + * _pi addressing mode (addr ++ inc) + */ +#define BxW_LOAD_pi(SZ, RES, PTR, INC) \ + __asm__( \ + "%0 = mem" #SZ "(%1++#" #INC ")\n\t" \ + : "=r"(RES), "+r"(PTR)) +#define BxW_LOAD_pi_Z(RES, PTR, INC) \ + BxW_LOAD_pi(ubh, RES, PTR, INC) +#define BxW_LOAD_pi_S(RES, PTR, INC) \ + BxW_LOAD_pi(bh, RES, PTR, INC) + +#define TEST_pi(NAME, TYPE, SIGN, INC, EXT, RES1, RES2, RES3, RES4) \ +void test_##NAME(void) \ +{ \ + TYPE result; \ + void *ptr = buf; \ + init_buf(); \ + BxW_LOAD_pi_##SIGN(result, ptr, (INC)); \ + check(result, (RES1) | (EXT)); \ + checkp(ptr, &buf[1 * (INC)]); \ + BxW_LOAD_pi_##SIGN(result, ptr, (INC)); \ + check(result, (RES2) | (EXT)); \ + checkp(ptr, &buf[2 * (INC)]); \ + BxW_LOAD_pi_##SIGN(result, ptr, (INC)); \ + check(result, (RES3) | (EXT)); \ + checkp(ptr, &buf[3 * (INC)]); \ + BxW_LOAD_pi_##SIGN(result, ptr, (INC)); \ + check(result, (RES4) | (EXT)); \ + checkp(ptr, &buf[4 * (INC)]); \ +} + +TEST_pi(loadbzw2_pi, int, Z, 2, 0x00000000, + 0x00020081, 0x00040083, 0x00060085, 0x00080087) +TEST_pi(loadbsw2_pi, int, S, 2, 0x0000ff00, + 0x00020081, 0x00040083, 0x00060085, 0x00080087) +TEST_pi(loadbzw4_pi, long long, Z, 4, 0x0000000000000000LL, + 0x0004008300020081LL, 0x0008008700060085LL, + 0x000c008b000a0089LL, 0x0010008f000e008dLL) +TEST_pi(loadbsw4_pi, long long, S, 4, 0x0000ff000000ff00LL, + 0x0004008300020081LL, 0x0008008700060085LL, + 0x000c008b000a0089LL, 0x0010008f000e008dLL) + +/* + **************************************************************************** + * _pci addressing mode (addr ++ inc:circ) + */ +#define BxW_LOAD_pci(SZ, RES, PTR, START, LEN, INC) \ + __asm__( \ + "r4 = %3\n\t" \ + "m0 = r4\n\t" \ + "cs0 = %2\n\t" \ + "%0 = mem" #SZ "(%1++#" #INC ":circ(m0))\n\t" \ + : "=r"(RES), "+r"(PTR) \ + : "r"(START), "r"(LEN) \ + : "r4", "m0", "cs0") +#define BxW_LOAD_pci_Z(RES, PTR, START, LEN, INC) \ + BxW_LOAD_pci(ubh, RES, PTR, START, LEN, INC) +#define BxW_LOAD_pci_S(RES, PTR, START, LEN, INC) \ + BxW_LOAD_pci(bh, RES, PTR, START, LEN, INC) + +#define TEST_pci(NAME, TYPE, SIGN, LEN, INC, EXT, RES1, RES2, RES3, RES4) \ +void test_##NAME(void) \ +{ \ + TYPE result; \ + void *ptr = buf; \ + init_buf(); \ + BxW_LOAD_pci_##SIGN(result, ptr, buf, (LEN), (INC)); \ + check(result, (RES1) | (EXT)); \ + checkp(ptr, &buf[(1 * (INC)) % (LEN)]); \ + BxW_LOAD_pci_##SIGN(result, ptr, buf, (LEN), (INC)); \ + check(result, (RES2) | (EXT)); \ + checkp(ptr, &buf[(2 * (INC)) % (LEN)]); \ + BxW_LOAD_pci_##SIGN(result, ptr, buf, (LEN), (INC)); \ + check(result, (RES3) | (EXT)); \ + checkp(ptr, &buf[(3 * (INC)) % (LEN)]); \ + BxW_LOAD_pci_##SIGN(result, ptr, buf, (LEN), (INC)); \ + check(result, (RES4) | (EXT)); \ + checkp(ptr, &buf[(4 * (INC)) % (LEN)]); \ +} + +TEST_pci(loadbzw2_pci, int, Z, 6, 2, 0x00000000, + 0x00020081, 0x00040083, 0x00060085, 0x00020081) +TEST_pci(loadbsw2_pci, int, S, 6, 2, 0x0000ff00, + 0x00020081, 0x00040083, 0x00060085, 0x00020081) +TEST_pci(loadbzw4_pci, long long, Z, 8, 4, 0x0000000000000000LL, + 0x0004008300020081LL, 0x0008008700060085LL, + 0x0004008300020081LL, 0x0008008700060085LL) +TEST_pci(loadbsw4_pci, long long, S, 8, 4, 0x0000ff000000ff00LL, + 0x0004008300020081LL, 0x0008008700060085LL, + 0x0004008300020081LL, 0x0008008700060085LL) + +/* + **************************************************************************** + * _pcr addressing mode (addr ++ I:circ(modifier-reg)) + */ +#define BxW_LOAD_pcr(SZ, RES, PTR, START, LEN, INC) \ + __asm__( \ + "r4 = %2\n\t" \ + "m1 = r4\n\t" \ + "cs1 = %3\n\t" \ + "%0 = mem" #SZ "(%1++I:circ(m1))\n\t" \ + : "=r"(RES), "+r"(PTR) \ + : "r"((((INC) & 0x7f) << 17) | ((LEN) & 0x1ffff)), \ + "r"(START) \ + : "r4", "m1", "cs1") +#define BxW_LOAD_pcr_Z(RES, PTR, START, LEN, INC) \ + BxW_LOAD_pcr(ubh, RES, PTR, START, LEN, INC) +#define BxW_LOAD_pcr_S(RES, PTR, START, LEN, INC) \ + BxW_LOAD_pcr(bh, RES, PTR, START, LEN, INC) + +#define TEST_pcr(NAME, TYPE, SIGN, SIZE, LEN, INC, \ + EXT, RES1, RES2, RES3, RES4) \ +void test_##NAME(void) \ +{ \ + TYPE result; \ + void *ptr = buf; \ + init_buf(); \ + BxW_LOAD_pcr_##SIGN(result, ptr, buf, (LEN), (INC)); \ + check(result, (RES1) | (EXT)); \ + checkp(ptr, &buf[(1 * (INC) * (SIZE)) % (LEN)]); \ + BxW_LOAD_pcr_##SIGN(result, ptr, buf, (LEN), (INC)); \ + check(result, (RES2) | (EXT)); \ + checkp(ptr, &buf[(2 * (INC) * (SIZE)) % (LEN)]); \ + BxW_LOAD_pcr_##SIGN(result, ptr, buf, (LEN), (INC)); \ + check(result, (RES3) | (EXT)); \ + checkp(ptr, &buf[(3 * (INC) * (SIZE)) % (LEN)]); \ + BxW_LOAD_pcr_##SIGN(result, ptr, buf, (LEN), (INC)); \ + check(result, (RES4) | (EXT)); \ + checkp(ptr, &buf[(4 * (INC) * (SIZE)) % (LEN)]); \ +} + +TEST_pcr(loadbzw2_pcr, int, Z, 2, 8, 2, 0x00000000, + 0x00020081, 0x00060085, 0x00020081, 0x00060085) +TEST_pcr(loadbsw2_pcr, int, S, 2, 8, 2, 0x0000ff00, + 0x00020081, 0x00060085, 0x00020081, 0x00060085) +TEST_pcr(loadbzw4_pcr, long long, Z, 4, 8, 1, 0x0000000000000000LL, + 0x0004008300020081LL, 0x0008008700060085LL, + 0x0004008300020081LL, 0x0008008700060085LL) +TEST_pcr(loadbsw4_pcr, long long, S, 4, 8, 1, 0x0000ff000000ff00LL, + 0x0004008300020081LL, 0x0008008700060085LL, + 0x0004008300020081LL, 0x0008008700060085LL) + +int main() +{ + test_loadbzw2_io(); + test_loadbsw2_io(); + test_loadbzw4_io(); + test_loadbsw4_io(); + + test_loadbzw2_ur(); + test_loadbsw2_ur(); + test_loadbzw4_ur(); + test_loadbsw4_ur(); + + test_loadbzw2_ap(); + test_loadbsw2_ap(); + test_loadbzw4_ap(); + test_loadbsw4_ap(); + + test_loadbzw2_pr(); + test_loadbsw2_pr(); + test_loadbzw4_pr(); + test_loadbsw4_pr(); + + test_loadbzw2_pbr(); + test_loadbsw2_pbr(); + test_loadbzw4_pbr(); + test_loadbsw4_pbr(); + + test_loadbzw2_pi(); + test_loadbsw2_pi(); + test_loadbzw4_pi(); + test_loadbsw4_pi(); + + test_loadbzw2_pci(); + test_loadbsw2_pci(); + test_loadbzw4_pci(); + test_loadbsw4_pci(); + + test_loadbzw2_pcr(); + test_loadbsw2_pcr(); + test_loadbzw4_pcr(); + test_loadbsw4_pcr(); + + puts(err ? "FAIL" : "PASS"); + return err ? 1 : 0; +} diff --git a/tests/tcg/hexagon/mem_noshuf.c b/tests/tcg/hexagon/mem_noshuf.c new file mode 100644 index 000000000..dd714d5e9 --- /dev/null +++ b/tests/tcg/hexagon/mem_noshuf.c @@ -0,0 +1,328 @@ +/* + * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdio.h> + +/* + * Make sure that the :mem_noshuf packet attribute is honored. + * This is important when the addresses overlap. + * The store instruction in slot 1 effectively executes first, + * followed by the load instruction in slot 0. + */ + +#define MEM_NOSHUF32(NAME, ST_TYPE, LD_TYPE, ST_OP, LD_OP) \ +static inline unsigned int NAME(ST_TYPE * p, LD_TYPE * q, ST_TYPE x) \ +{ \ + unsigned int ret; \ + asm volatile("{\n\t" \ + " " #ST_OP "(%1) = %3\n\t" \ + " %0 = " #LD_OP "(%2)\n\t" \ + "}:mem_noshuf\n" \ + : "=r"(ret) \ + : "r"(p), "r"(q), "r"(x) \ + : "memory"); \ + return ret; \ +} + +#define MEM_NOSHUF64(NAME, ST_TYPE, LD_TYPE, ST_OP, LD_OP) \ +static inline unsigned long long NAME(ST_TYPE * p, LD_TYPE * q, ST_TYPE x) \ +{ \ + unsigned long long ret; \ + asm volatile("{\n\t" \ + " " #ST_OP "(%1) = %3\n\t" \ + " %0 = " #LD_OP "(%2)\n\t" \ + "}:mem_noshuf\n" \ + : "=r"(ret) \ + : "r"(p), "r"(q), "r"(x) \ + : "memory"); \ + return ret; \ +} + +/* Store byte combinations */ +MEM_NOSHUF32(mem_noshuf_sb_lb, signed char, signed char, memb, memb) +MEM_NOSHUF32(mem_noshuf_sb_lub, signed char, unsigned char, memb, memub) +MEM_NOSHUF32(mem_noshuf_sb_lh, signed char, signed short, memb, memh) +MEM_NOSHUF32(mem_noshuf_sb_luh, signed char, unsigned short, memb, memuh) +MEM_NOSHUF32(mem_noshuf_sb_lw, signed char, signed int, memb, memw) +MEM_NOSHUF64(mem_noshuf_sb_ld, signed char, signed long long, memb, memd) + +/* Store half combinations */ +MEM_NOSHUF32(mem_noshuf_sh_lb, signed short, signed char, memh, memb) +MEM_NOSHUF32(mem_noshuf_sh_lub, signed short, unsigned char, memh, memub) +MEM_NOSHUF32(mem_noshuf_sh_lh, signed short, signed short, memh, memh) +MEM_NOSHUF32(mem_noshuf_sh_luh, signed short, unsigned short, memh, memuh) +MEM_NOSHUF32(mem_noshuf_sh_lw, signed short, signed int, memh, memw) +MEM_NOSHUF64(mem_noshuf_sh_ld, signed short, signed long long, memh, memd) + +/* Store word combinations */ +MEM_NOSHUF32(mem_noshuf_sw_lb, signed int, signed char, memw, memb) +MEM_NOSHUF32(mem_noshuf_sw_lub, signed int, unsigned char, memw, memub) +MEM_NOSHUF32(mem_noshuf_sw_lh, signed int, signed short, memw, memh) +MEM_NOSHUF32(mem_noshuf_sw_luh, signed int, unsigned short, memw, memuh) +MEM_NOSHUF32(mem_noshuf_sw_lw, signed int, signed int, memw, memw) +MEM_NOSHUF64(mem_noshuf_sw_ld, signed int, signed long long, memw, memd) + +/* Store double combinations */ +MEM_NOSHUF32(mem_noshuf_sd_lb, long long, signed char, memd, memb) +MEM_NOSHUF32(mem_noshuf_sd_lub, long long, unsigned char, memd, memub) +MEM_NOSHUF32(mem_noshuf_sd_lh, long long, signed short, memd, memh) +MEM_NOSHUF32(mem_noshuf_sd_luh, long long, unsigned short, memd, memuh) +MEM_NOSHUF32(mem_noshuf_sd_lw, long long, signed int, memd, memw) +MEM_NOSHUF64(mem_noshuf_sd_ld, long long, signed long long, memd, memd) + +static inline unsigned int cancel_sw_lb(int pred, int *p, signed char *q, int x) +{ + unsigned int ret; + asm volatile("p0 = cmp.eq(%4, #0)\n\t" + "{\n\t" + " if (!p0) memw(%1) = %3\n\t" + " %0 = memb(%2)\n\t" + "}:mem_noshuf\n" + : "=r"(ret) + : "r"(p), "r"(q), "r"(x), "r"(pred) + : "p0", "memory"); + return ret; +} + +static inline +unsigned long long cancel_sw_ld(int pred, int *p, long long *q, int x) +{ + long long ret; + asm volatile("p0 = cmp.eq(%4, #0)\n\t" + "{\n\t" + " if (!p0) memw(%1) = %3\n\t" + " %0 = memd(%2)\n\t" + "}:mem_noshuf\n" + : "=r"(ret) + : "r"(p), "r"(q), "r"(x), "r"(pred) + : "p0", "memory"); + return ret; +} + +typedef union { + signed long long d[2]; + unsigned long long ud[2]; + signed int w[4]; + unsigned int uw[4]; + signed short h[8]; + unsigned short uh[8]; + signed char b[16]; + unsigned char ub[16]; +} Memory; + +int err; + +static void check32(int n, int expect) +{ + if (n != expect) { + printf("ERROR: 0x%08x != 0x%08x\n", n, expect); + err++; + } +} + +static void check64(long long n, long long expect) +{ + if (n != expect) { + printf("ERROR: 0x%08llx != 0x%08llx\n", n, expect); + err++; + } +} + +int main() +{ + Memory n; + unsigned int res32; + unsigned long long res64; + + /* + * Store byte combinations + */ + n.w[0] = ~0; + res32 = mem_noshuf_sb_lb(&n.b[0], &n.b[0], 0x87); + check32(res32, 0xffffff87); + + n.w[0] = ~0; + res32 = mem_noshuf_sb_lub(&n.b[0], &n.ub[0], 0x87); + check32(res32, 0x00000087); + + n.w[0] = ~0; + res32 = mem_noshuf_sb_lh(&n.b[0], &n.h[0], 0x87); + check32(res32, 0xffffff87); + + n.w[0] = ~0; + res32 = mem_noshuf_sb_luh(&n.b[0], &n.uh[0], 0x87); + check32(res32, 0x0000ff87); + + n.w[0] = ~0; + res32 = mem_noshuf_sb_lw(&n.b[0], &n.w[0], 0x87); + check32(res32, 0xffffff87); + + n.d[0] = ~0LL; + res64 = mem_noshuf_sb_ld(&n.b[0], &n.d[0], 0x87); + check64(res64, 0xffffffffffffff87LL); + + /* + * Store half combinations + */ + n.w[0] = ~0; + res32 = mem_noshuf_sh_lb(&n.h[0], &n.b[0], 0x8787); + check32(res32, 0xffffff87); + + n.w[0] = ~0; + res32 = mem_noshuf_sh_lub(&n.h[0], &n.ub[1], 0x8f87); + check32(res32, 0x0000008f); + + n.w[0] = ~0; + res32 = mem_noshuf_sh_lh(&n.h[0], &n.h[0], 0x8a87); + check32(res32, 0xffff8a87); + + n.w[0] = ~0; + res32 = mem_noshuf_sh_luh(&n.h[0], &n.uh[0], 0x8a87); + check32(res32, 0x8a87); + + n.w[0] = ~0; + res32 = mem_noshuf_sh_lw(&n.h[1], &n.w[0], 0x8a87); + check32(res32, 0x8a87ffff); + + n.w[0] = ~0; + res64 = mem_noshuf_sh_ld(&n.h[1], &n.d[0], 0x8a87); + check64(res64, 0xffffffff8a87ffffLL); + + /* + * Store word combinations + */ + n.w[0] = ~0; + res32 = mem_noshuf_sw_lb(&n.w[0], &n.b[0], 0x12345687); + check32(res32, 0xffffff87); + + n.w[0] = ~0; + res32 = mem_noshuf_sw_lub(&n.w[0], &n.ub[0], 0x12345687); + check32(res32, 0x00000087); + + n.w[0] = ~0; + res32 = mem_noshuf_sw_lh(&n.w[0], &n.h[0], 0x1234f678); + check32(res32, 0xfffff678); + + n.w[0] = ~0; + res32 = mem_noshuf_sw_luh(&n.w[0], &n.uh[0], 0x12345678); + check32(res32, 0x00005678); + + n.w[0] = ~0; + res32 = mem_noshuf_sw_lw(&n.w[0], &n.w[0], 0x12345678); + check32(res32, 0x12345678); + + n.d[0] = ~0LL; + res64 = mem_noshuf_sw_ld(&n.w[0], &n.d[0], 0x12345678); + check64(res64, 0xffffffff12345678LL); + + /* + * Store double combinations + */ + n.d[0] = ~0LL; + res32 = mem_noshuf_sd_lb(&n.d[0], &n.b[1], 0x123456789abcdef0); + check32(res32, 0xffffffde); + + n.d[0] = ~0LL; + res32 = mem_noshuf_sd_lub(&n.d[0], &n.ub[1], 0x123456789abcdef0); + check32(res32, 0x000000de); + + n.d[0] = ~0LL; + res32 = mem_noshuf_sd_lh(&n.d[0], &n.h[1], 0x123456789abcdef0); + check32(res32, 0xffff9abc); + + n.d[0] = ~0LL; + res32 = mem_noshuf_sd_luh(&n.d[0], &n.uh[1], 0x123456789abcdef0); + check32(res32, 0x00009abc); + + n.d[0] = ~0LL; + res32 = mem_noshuf_sd_lw(&n.d[0], &n.w[1], 0x123456789abcdef0); + check32(res32, 0x12345678); + + n.d[0] = ~0LL; + res64 = mem_noshuf_sd_ld(&n.d[0], &n.d[0], 0x123456789abcdef0); + check64(res64, 0x123456789abcdef0LL); + + /* + * Predicated word stores + */ + n.w[0] = ~0; + res32 = cancel_sw_lb(0, &n.w[0], &n.b[0], 0x12345678); + check32(res32, 0xffffffff); + + n.w[0] = ~0; + res32 = cancel_sw_lb(1, &n.w[0], &n.b[0], 0x12345687); + check32(res32, 0xffffff87); + + /* + * Predicated double stores + */ + n.d[0] = ~0LL; + res64 = cancel_sw_ld(0, &n.w[0], &n.d[0], 0x12345678); + check64(res64, 0xffffffffffffffffLL); + + n.d[0] = ~0LL; + res64 = cancel_sw_ld(1, &n.w[0], &n.d[0], 0x12345678); + check64(res64, 0xffffffff12345678LL); + + n.d[0] = ~0LL; + res64 = cancel_sw_ld(0, &n.w[1], &n.d[0], 0x12345678); + check64(res64, 0xffffffffffffffffLL); + + n.d[0] = ~0LL; + res64 = cancel_sw_ld(1, &n.w[1], &n.d[0], 0x12345678); + check64(res64, 0x12345678ffffffffLL); + + /* + * No overlap tests + */ + n.w[0] = ~0; + res32 = mem_noshuf_sb_lb(&n.b[1], &n.b[0], 0x87); + check32(res32, 0xffffffff); + + n.w[0] = ~0; + res32 = mem_noshuf_sb_lb(&n.b[0], &n.b[1], 0x87); + check32(res32, 0xffffffff); + + n.w[0] = ~0; + res32 = mem_noshuf_sh_lh(&n.h[1], &n.h[0], 0x8787); + check32(res32, 0xffffffff); + + n.w[0] = ~0; + res32 = mem_noshuf_sh_lh(&n.h[0], &n.h[1], 0x8787); + check32(res32, 0xffffffff); + + n.d[0] = ~0LL; + res32 = mem_noshuf_sw_lw(&n.w[0], &n.w[1], 0x12345678); + check32(res32, 0xffffffff); + + n.d[0] = ~0LL; + res32 = mem_noshuf_sw_lw(&n.w[1], &n.w[0], 0x12345678); + check32(res32, 0xffffffff); + + n.d[0] = ~0LL; + n.d[1] = ~0LL; + res64 = mem_noshuf_sd_ld(&n.d[1], &n.d[0], 0x123456789abcdef0LL); + check64(res64, 0xffffffffffffffffLL); + + n.d[0] = ~0LL; + n.d[1] = ~0LL; + res64 = mem_noshuf_sd_ld(&n.d[0], &n.d[1], 0x123456789abcdef0LL); + check64(res64, 0xffffffffffffffffLL); + + puts(err ? "FAIL" : "PASS"); + return err; +} diff --git a/tests/tcg/hexagon/misc.c b/tests/tcg/hexagon/misc.c new file mode 100644 index 000000000..f0b1947fb --- /dev/null +++ b/tests/tcg/hexagon/misc.c @@ -0,0 +1,473 @@ +/* + * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdio.h> +#include <string.h> + +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; +typedef unsigned int uint32_t; + + +static inline void S4_storerhnew_rr(void *p, int index, uint16_t v) +{ + asm volatile("{\n\t" + " r0 = %0\n\n" + " memh(%1+%2<<#2) = r0.new\n\t" + "}\n" + :: "r"(v), "r"(p), "r"(index) + : "r0", "memory"); +} + +static uint32_t data; +static inline void *S4_storerbnew_ap(uint8_t v) +{ + void *ret; + asm volatile("{\n\t" + " r0 = %1\n\n" + " memb(%0 = ##data) = r0.new\n\t" + "}\n" + : "=r"(ret) + : "r"(v) + : "r0", "memory"); + return ret; +} + +static inline void *S4_storerhnew_ap(uint16_t v) +{ + void *ret; + asm volatile("{\n\t" + " r0 = %1\n\n" + " memh(%0 = ##data) = r0.new\n\t" + "}\n" + : "=r"(ret) + : "r"(v) + : "r0", "memory"); + return ret; +} + +static inline void *S4_storerinew_ap(uint32_t v) +{ + void *ret; + asm volatile("{\n\t" + " r0 = %1\n\n" + " memw(%0 = ##data) = r0.new\n\t" + "}\n" + : "=r"(ret) + : "r"(v) + : "r0", "memory"); + return ret; +} + +static inline void S4_storeirbt_io(void *p, int pred) +{ + asm volatile("p0 = cmp.eq(%0, #1)\n\t" + "if (p0) memb(%1+#4)=#27\n\t" + :: "r"(pred), "r"(p) + : "p0", "memory"); +} + +static inline void S4_storeirbf_io(void *p, int pred) +{ + asm volatile("p0 = cmp.eq(%0, #1)\n\t" + "if (!p0) memb(%1+#4)=#27\n\t" + :: "r"(pred), "r"(p) + : "p0", "memory"); +} + +static inline void S4_storeirbtnew_io(void *p, int pred) +{ + asm volatile("{\n\t" + " p0 = cmp.eq(%0, #1)\n\t" + " if (p0.new) memb(%1+#4)=#27\n\t" + "}\n\t" + :: "r"(pred), "r"(p) + : "p0", "memory"); +} + +static inline void S4_storeirbfnew_io(void *p, int pred) +{ + asm volatile("{\n\t" + " p0 = cmp.eq(%0, #1)\n\t" + " if (!p0.new) memb(%1+#4)=#27\n\t" + "}\n\t" + :: "r"(pred), "r"(p) + : "p0", "memory"); +} + +static inline void S4_storeirht_io(void *p, int pred) +{ + asm volatile("p0 = cmp.eq(%0, #1)\n\t" + "if (p0) memh(%1+#4)=#27\n\t" + :: "r"(pred), "r"(p) + : "p0", "memory"); +} + +static inline void S4_storeirhf_io(void *p, int pred) +{ + asm volatile("p0 = cmp.eq(%0, #1)\n\t" + "if (!p0) memh(%1+#4)=#27\n\t" + :: "r"(pred), "r"(p) + : "p0", "memory"); +} + +static inline void S4_storeirhtnew_io(void *p, int pred) +{ + asm volatile("{\n\t" + " p0 = cmp.eq(%0, #1)\n\t" + " if (p0.new) memh(%1+#4)=#27\n\t" + "}\n\t" + :: "r"(pred), "r"(p) + : "p0", "memory"); +} + +static inline void S4_storeirhfnew_io(void *p, int pred) +{ + asm volatile("{\n\t" + " p0 = cmp.eq(%0, #1)\n\t" + " if (!p0.new) memh(%1+#4)=#27\n\t" + "}\n\t" + :: "r"(pred), "r"(p) + : "p0", "memory"); +} + +static inline void S4_storeirit_io(void *p, int pred) +{ + asm volatile("p0 = cmp.eq(%0, #1)\n\t" + "if (p0) memw(%1+#4)=#27\n\t" + :: "r"(pred), "r"(p) + : "p0", "memory"); +} + +static inline void S4_storeirif_io(void *p, int pred) +{ + asm volatile("p0 = cmp.eq(%0, #1)\n\t" + "if (!p0) memw(%1+#4)=#27\n\t" + :: "r"(pred), "r"(p) + : "p0", "memory"); +} + +static inline void S4_storeiritnew_io(void *p, int pred) +{ + asm volatile("{\n\t" + " p0 = cmp.eq(%0, #1)\n\t" + " if (p0.new) memw(%1+#4)=#27\n\t" + "}\n\t" + :: "r"(pred), "r"(p) + : "p0", "memory"); +} + +static inline void S4_storeirifnew_io(void *p, int pred) +{ + asm volatile("{\n\t" + " p0 = cmp.eq(%0, #1)\n\t" + " if (!p0.new) memw(%1+#4)=#27\n\t" + "}\n\t" + :: "r"(pred), "r"(p) + : "p0", "memory"); +} + +static int L2_ploadrifnew_pi(void *p, int pred) +{ + int result; + asm volatile("%0 = #31\n\t" + "{\n\t" + " p0 = cmp.eq(%1, #1)\n\t" + " if (!p0.new) %0 = memw(%2++#4)\n\t" + "}\n\t" + : "=r"(result) : "r"(pred), "r"(p) + : "p0"); + return result; +} + +/* + * Test that compound-compare-jump is executed in 2 parts + * First we have to do all the compares in the packet and + * account for auto-anding. Then, we can do the predicated + * jump. + */ +static inline int cmpnd_cmp_jump(void) +{ + int retval; + asm ("r5 = #7\n\t" + "r6 = #9\n\t" + "{\n\t" + " p0 = cmp.eq(r5, #7)\n\t" + " if (p0.new) jump:nt 1f\n\t" + " p0 = cmp.eq(r6, #7)\n\t" + "}\n\t" + "%0 = #12\n\t" + "jump 2f\n\t" + "1:\n\t" + "%0 = #13\n\t" + "2:\n\t" + : "=r"(retval) :: "r5", "r6", "p0"); + return retval; +} + +static inline int test_clrtnew(int arg1, int old_val) +{ + int ret; + asm volatile("r5 = %2\n\t" + "{\n\t" + "p0 = cmp.eq(%1, #1)\n\t" + "if (p0.new) r5=#0\n\t" + "}\n\t" + "%0 = r5\n\t" + : "=r"(ret) + : "r"(arg1), "r"(old_val) + : "p0", "r5"); + return ret; +} + +int err; + +static void check(int val, int expect) +{ + if (val != expect) { + printf("ERROR: 0x%04x != 0x%04x\n", val, expect); + err++; + } +} + +static void check64(long long val, long long expect) +{ + if (val != expect) { + printf("ERROR: 0x%016llx != 0x%016llx\n", val, expect); + err++; + } +} + +uint32_t init[10] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }; +uint32_t array[10]; + +uint32_t early_exit; + +/* + * Write this as a function because we can't guarantee the compiler will + * allocate a frame with just the SL2_return_tnew packet. + */ +static void SL2_return_tnew(int x); +asm ("SL2_return_tnew:\n\t" + " allocframe(#0)\n\t" + " r1 = #1\n\t" + " memw(##early_exit) = r1\n\t" + " {\n\t" + " p0 = cmp.eq(r0, #1)\n\t" + " if (p0.new) dealloc_return:nt\n\t" /* SL2_return_tnew */ + " }\n\t" + " r1 = #0\n\t" + " memw(##early_exit) = r1\n\t" + " dealloc_return\n\t" + ); + +static long long creg_pair(int x, int y) +{ + long long retval; + asm ("m0 = %1\n\t" + "m1 = %2\n\t" + "%0 = c7:6\n\t" + : "=r"(retval) : "r"(x), "r"(y) : "m0", "m1"); + return retval; +} + +static long long decbin(long long x, long long y, int *pred) +{ + long long retval; + asm ("%0 = decbin(%2, %3)\n\t" + "%1 = p0\n\t" + : "=r"(retval), "=r"(*pred) + : "r"(x), "r"(y)); + return retval; +} + +/* Check that predicates are auto-and'ed in a packet */ +static int auto_and(void) +{ + int retval; + asm ("r5 = #1\n\t" + "{\n\t" + " p0 = cmp.eq(r1, #1)\n\t" + " p0 = cmp.eq(r1, #2)\n\t" + "}\n\t" + "%0 = p0\n\t" + : "=r"(retval) + : + : "r5", "p0"); + return retval; +} + +void test_lsbnew(void) +{ + int result; + + asm("r0 = #2\n\t" + "r1 = #5\n\t" + "{\n\t" + " p0 = r0\n\t" + " if (p0.new) r1 = #3\n\t" + "}\n\t" + "%0 = r1\n\t" + : "=r"(result) :: "r0", "r1", "p0"); + check(result, 5); +} + +void test_l2fetch(void) +{ + /* These don't do anything in qemu, just make sure they don't assert */ + asm volatile ("l2fetch(r0, r1)\n\t" + "l2fetch(r0, r3:2)\n\t"); +} + +int main() +{ + int res; + long long res64; + int pred; + + memcpy(array, init, sizeof(array)); + S4_storerhnew_rr(array, 4, 0xffff); + check(array[4], 0xffff); + + data = ~0; + check((uint32_t)S4_storerbnew_ap(0x12), (uint32_t)&data); + check(data, 0xffffff12); + + data = ~0; + check((uint32_t)S4_storerhnew_ap(0x1234), (uint32_t)&data); + check(data, 0xffff1234); + + data = ~0; + check((uint32_t)S4_storerinew_ap(0x12345678), (uint32_t)&data); + check(data, 0x12345678); + + /* Byte */ + memcpy(array, init, sizeof(array)); + S4_storeirbt_io(&array[1], 1); + check(array[2], 27); + S4_storeirbt_io(&array[2], 0); + check(array[3], 3); + + memcpy(array, init, sizeof(array)); + S4_storeirbf_io(&array[3], 0); + check(array[4], 27); + S4_storeirbf_io(&array[4], 1); + check(array[5], 5); + + memcpy(array, init, sizeof(array)); + S4_storeirbtnew_io(&array[5], 1); + check(array[6], 27); + S4_storeirbtnew_io(&array[6], 0); + check(array[7], 7); + + memcpy(array, init, sizeof(array)); + S4_storeirbfnew_io(&array[7], 0); + check(array[8], 27); + S4_storeirbfnew_io(&array[8], 1); + check(array[9], 9); + + /* Half word */ + memcpy(array, init, sizeof(array)); + S4_storeirht_io(&array[1], 1); + check(array[2], 27); + S4_storeirht_io(&array[2], 0); + check(array[3], 3); + + memcpy(array, init, sizeof(array)); + S4_storeirhf_io(&array[3], 0); + check(array[4], 27); + S4_storeirhf_io(&array[4], 1); + check(array[5], 5); + + memcpy(array, init, sizeof(array)); + S4_storeirhtnew_io(&array[5], 1); + check(array[6], 27); + S4_storeirhtnew_io(&array[6], 0); + check(array[7], 7); + + memcpy(array, init, sizeof(array)); + S4_storeirhfnew_io(&array[7], 0); + check(array[8], 27); + S4_storeirhfnew_io(&array[8], 1); + check(array[9], 9); + + /* Word */ + memcpy(array, init, sizeof(array)); + S4_storeirit_io(&array[1], 1); + check(array[2], 27); + S4_storeirit_io(&array[2], 0); + check(array[3], 3); + + memcpy(array, init, sizeof(array)); + S4_storeirif_io(&array[3], 0); + check(array[4], 27); + S4_storeirif_io(&array[4], 1); + check(array[5], 5); + + memcpy(array, init, sizeof(array)); + S4_storeiritnew_io(&array[5], 1); + check(array[6], 27); + S4_storeiritnew_io(&array[6], 0); + check(array[7], 7); + + memcpy(array, init, sizeof(array)); + S4_storeirifnew_io(&array[7], 0); + check(array[8], 27); + S4_storeirifnew_io(&array[8], 1); + check(array[9], 9); + + memcpy(array, init, sizeof(array)); + res = L2_ploadrifnew_pi(&array[6], 0); + check(res, 6); + res = L2_ploadrifnew_pi(&array[7], 1); + check(res, 31); + + int x = cmpnd_cmp_jump(); + check(x, 12); + + SL2_return_tnew(0); + check(early_exit, 0); + SL2_return_tnew(1); + check(early_exit, 1); + + long long pair = creg_pair(5, 7); + check((int)pair, 5); + check((int)(pair >> 32), 7); + + res = test_clrtnew(1, 7); + check(res, 0); + res = test_clrtnew(2, 7); + check(res, 7); + + res64 = decbin(0xf0f1f2f3f4f5f6f7LL, 0x7f6f5f4f3f2f1f0fLL, &pred); + check64(res64, 0x357980003700010cLL); + check(pred, 0); + + res64 = decbin(0xfLL, 0x1bLL, &pred); + check64(res64, 0x78000100LL); + check(pred, 1); + + res = auto_and(); + check(res, 0); + + test_lsbnew(); + + test_l2fetch(); + + puts(err ? "FAIL" : "PASS"); + return err; +} diff --git a/tests/tcg/hexagon/multi_result.c b/tests/tcg/hexagon/multi_result.c new file mode 100644 index 000000000..52997b312 --- /dev/null +++ b/tests/tcg/hexagon/multi_result.c @@ -0,0 +1,282 @@ +/* + * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdio.h> + +static int sfrecipa(int Rs, int Rt, int *pred_result) +{ + int result; + int predval; + + asm volatile("%0,p0 = sfrecipa(%2, %3)\n\t" + "%1 = p0\n\t" + : "+r"(result), "=r"(predval) + : "r"(Rs), "r"(Rt) + : "p0"); + *pred_result = predval; + return result; +} + +static int sfinvsqrta(int Rs, int *pred_result) +{ + int result; + int predval; + + asm volatile("%0,p0 = sfinvsqrta(%2)\n\t" + "%1 = p0\n\t" + : "+r"(result), "=r"(predval) + : "r"(Rs) + : "p0"); + *pred_result = predval; + return result; +} + +static long long vacsh(long long Rxx, long long Rss, long long Rtt, + int *pred_result, int *ovf_result) +{ + long long result = Rxx; + int predval; + int usr; + + /* + * This instruction can set bit 0 (OVF/overflow) in usr + * Clear the bit first, then return that bit to the caller + */ + asm volatile("r2 = usr\n\t" + "r2 = clrbit(r2, #0)\n\t" /* clear overflow bit */ + "usr = r2\n\t" + "%0,p0 = vacsh(%3, %4)\n\t" + "%1 = p0\n\t" + "%2 = usr\n\t" + : "+r"(result), "=r"(predval), "=r"(usr) + : "r"(Rss), "r"(Rtt) + : "r2", "p0", "usr"); + *pred_result = predval; + *ovf_result = (usr & 1); + return result; +} + +static long long vminub(long long Rtt, long long Rss, + int *pred_result) +{ + long long result; + int predval; + + asm volatile("%0,p0 = vminub(%2, %3)\n\t" + "%1 = p0\n\t" + : "=r"(result), "=r"(predval) + : "r"(Rtt), "r"(Rss) + : "p0"); + *pred_result = predval; + return result; +} + +static long long add_carry(long long Rss, long long Rtt, + int pred_in, int *pred_result) +{ + long long result; + int predval = pred_in; + + asm volatile("p0 = %1\n\t" + "%0 = add(%2, %3, p0):carry\n\t" + "%1 = p0\n\t" + : "=r"(result), "+r"(predval) + : "r"(Rss), "r"(Rtt) + : "p0"); + *pred_result = predval; + return result; +} + +static long long sub_carry(long long Rss, long long Rtt, + int pred_in, int *pred_result) +{ + long long result; + int predval = pred_in; + + asm volatile("p0 = !cmp.eq(%1, #0)\n\t" + "%0 = sub(%2, %3, p0):carry\n\t" + "%1 = p0\n\t" + : "=r"(result), "+r"(predval) + : "r"(Rss), "r"(Rtt) + : "p0"); + *pred_result = predval; + return result; +} + +int err; + +static void check_ll(long long val, long long expect) +{ + if (val != expect) { + printf("ERROR: 0x%016llx != 0x%016llx\n", val, expect); + err++; + } +} + +static void check(int val, int expect) +{ + if (val != expect) { + printf("ERROR: 0x%08x != 0x%08x\n", val, expect); + err++; + } +} + +static void check_p(int val, int expect) +{ + if (val != expect) { + printf("ERROR: 0x%02x != 0x%02x\n", val, expect); + err++; + } +} + +static void test_sfrecipa() +{ + int res; + int pred_result; + + res = sfrecipa(0x04030201, 0x05060708, &pred_result); + check(res, 0x59f38001); + check_p(pred_result, 0x00); +} + +static void test_sfinvsqrta() +{ + int res; + int pred_result; + + res = sfinvsqrta(0x04030201, &pred_result); + check(res, 0x4d330000); + check_p(pred_result, 0xe0); + + res = sfinvsqrta(0x0, &pred_result); + check(res, 0x3f800000); + check_p(pred_result, 0x0); +} + +static void test_vacsh() +{ + long long res64; + int pred_result; + int ovf_result; + + res64 = vacsh(0x0004000300020001LL, + 0x0001000200030004LL, + 0x0000000000000000LL, &pred_result, &ovf_result); + check_ll(res64, 0x0004000300030004LL); + check_p(pred_result, 0xf0); + check(ovf_result, 0); + + res64 = vacsh(0x0004000300020001LL, + 0x0001000200030004LL, + 0x000affff000d0000LL, &pred_result, &ovf_result); + check_ll(res64, 0x000e0003000f0004LL); + check_p(pred_result, 0xcc); + check(ovf_result, 0); + + res64 = vacsh(0x00047fff00020001LL, + 0x00017fff00030004LL, + 0x000a0fff000d0000LL, &pred_result, &ovf_result); + check_ll(res64, 0x000e7fff000f0004LL); + check_p(pred_result, 0xfc); + check(ovf_result, 1); + + res64 = vacsh(0x0004000300020001LL, + 0x0001000200030009LL, + 0x000affff000d0001LL, &pred_result, &ovf_result); + check_ll(res64, 0x000e0003000f0008LL); + check_p(pred_result, 0xcc); + check(ovf_result, 0); +} + +static void test_vminub() +{ + long long res64; + int pred_result; + + res64 = vminub(0x0807060504030201LL, + 0x0102030405060708LL, + &pred_result); + check_ll(res64, 0x0102030404030201LL); + check_p(pred_result, 0xf0); + + res64 = vminub(0x0802060405030701LL, + 0x0107030504060208LL, + &pred_result); + check_ll(res64, 0x0102030404030201LL); + check_p(pred_result, 0xaa); +} + +static void test_add_carry() +{ + long long res64; + int pred_result; + + res64 = add_carry(0x0000000000000000LL, + 0xffffffffffffffffLL, + 1, &pred_result); + check_ll(res64, 0x0000000000000000LL); + check_p(pred_result, 0xff); + + res64 = add_carry(0x0000000100000000LL, + 0xffffffffffffffffLL, + 0, &pred_result); + check_ll(res64, 0x00000000ffffffffLL); + check_p(pred_result, 0xff); + + res64 = add_carry(0x0000000100000000LL, + 0xffffffffffffffffLL, + 0, &pred_result); + check_ll(res64, 0x00000000ffffffffLL); + check_p(pred_result, 0xff); +} + +static void test_sub_carry() +{ + long long res64; + int pred_result; + + res64 = sub_carry(0x0000000000000000LL, + 0x0000000000000000LL, + 1, &pred_result); + check_ll(res64, 0x0000000000000000LL); + check_p(pred_result, 0xff); + + res64 = sub_carry(0x0000000100000000LL, + 0x0000000000000000LL, + 0, &pred_result); + check_ll(res64, 0x00000000ffffffffLL); + check_p(pred_result, 0xff); + + res64 = sub_carry(0x0000000100000000LL, + 0x0000000000000000LL, + 0, &pred_result); + check_ll(res64, 0x00000000ffffffffLL); + check_p(pred_result, 0xff); +} + +int main() +{ + test_sfrecipa(); + test_sfinvsqrta(); + test_vacsh(); + test_vminub(); + test_add_carry(); + test_sub_carry(); + + puts(err ? "FAIL" : "PASS"); + return err; +} diff --git a/tests/tcg/hexagon/overflow.c b/tests/tcg/hexagon/overflow.c new file mode 100644 index 000000000..196fcf7f3 --- /dev/null +++ b/tests/tcg/hexagon/overflow.c @@ -0,0 +1,107 @@ +/* + * Copyright(c) 2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdlib.h> +#include <stdio.h> +#include <unistd.h> +#include <sys/types.h> +#include <fcntl.h> +#include <setjmp.h> +#include <signal.h> + + +int err; + +static void __check(const char *filename, int line, int x, int expect) +{ + if (x != expect) { + printf("ERROR %s:%d - %d != %d\n", + filename, line, x, expect); + err++; + } +} + +#define check(x, expect) __check(__FILE__, __LINE__, (x), (expect)) + +static int satub(int src, int *p, int *ovf_result) +{ + int result; + int usr; + + /* + * This instruction can set bit 0 (OVF/overflow) in usr + * Clear the bit first, then return that bit to the caller + * + * We also store the src into *p in the same packet, so we + * can ensure the overflow doesn't get set when an exception + * is generated. + */ + asm volatile("r2 = usr\n\t" + "r2 = clrbit(r2, #0)\n\t" /* clear overflow bit */ + "usr = r2\n\t" + "{\n\t" + " %0 = satub(%2)\n\t" + " memw(%3) = %2\n\t" + "}\n\t" + "%1 = usr\n\t" + : "=r"(result), "=r"(usr) + : "r"(src), "r"(p) + : "r2", "usr", "memory"); + *ovf_result = (usr & 1); + return result; +} + +int read_usr_overflow(void) +{ + int result; + asm volatile("%0 = usr\n\t" : "=r"(result)); + return result & 1; +} + + +jmp_buf jmp_env; +int usr_overflow; + +static void sig_segv(int sig, siginfo_t *info, void *puc) +{ + usr_overflow = read_usr_overflow(); + longjmp(jmp_env, 1); +} + +int main() +{ + struct sigaction act; + int ovf; + + /* SIGSEGV test */ + act.sa_sigaction = sig_segv; + sigemptyset(&act.sa_mask); + act.sa_flags = SA_SIGINFO; + sigaction(SIGSEGV, &act, NULL); + if (setjmp(jmp_env) == 0) { + satub(300, 0, &ovf); + } + + act.sa_handler = SIG_DFL; + sigemptyset(&act.sa_mask); + act.sa_flags = 0; + + check(usr_overflow, 0); + + puts(err ? "FAIL" : "PASS"); + return err ? EXIT_FAILURE : EXIT_SUCCESS; +} diff --git a/tests/tcg/hexagon/preg_alias.c b/tests/tcg/hexagon/preg_alias.c new file mode 100644 index 000000000..0cac469b7 --- /dev/null +++ b/tests/tcg/hexagon/preg_alias.c @@ -0,0 +1,169 @@ +/* + * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdio.h> + +static inline int preg_alias(int v0, int v1, int v2, int v3) +{ + int ret; + asm volatile("p0 = %1\n\t" + "p1 = %2\n\t" + "p2 = %3\n\t" + "p3 = %4\n\t" + "%0 = C4\n" + : "=r"(ret) + : "r"(v0), "r"(v1), "r"(v2), "r"(v3) + : "p0", "p1", "p2", "p3"); + return ret; +} + +static inline int preg_alias_pair(int v0, int v1, int v2, int v3) +{ + long long c54; + asm volatile("p0 = %1\n\t" + "p1 = %2\n\t" + "p2 = %3\n\t" + "p3 = %4\n\t" + "%0 = C5:4\n" + : "=r"(c54) + : "r"(v0), "r"(v1), "r"(v2), "r"(v3) + : "p0", "p1", "p2", "p3"); + return (int)c54; +} + +typedef union { + int creg; + struct { + unsigned char p0; + unsigned char p1; + unsigned char p2; + unsigned char p3; + } pregs; +} PRegs; + +static inline void creg_alias(int cval, PRegs *pregs) +{ + unsigned char val; + asm volatile("c4 = %0" : : "r"(cval)); + + asm volatile("%0 = p0" : "=r"(val)); + pregs->pregs.p0 = val; + asm volatile("%0 = p1" : "=r"(val)); + pregs->pregs.p1 = val; + asm volatile("%0 = p2" : "=r"(val)); + pregs->pregs.p2 = val; + asm volatile("%0 = p3" : "=r"(val)); + pregs->pregs.p3 = val; +} + +int err; + +static void check(int val, int expect) +{ + if (val != expect) { + printf("ERROR: 0x%08x != 0x%08x\n", val, expect); + err++; + } +} + +static inline void creg_alias_pair(unsigned int cval, PRegs *pregs) +{ + unsigned long long cval_pair = (0xdeadbeefULL << 32) | cval; + unsigned char val; + int c5; + asm volatile("c5:4 = %0" : : "r"(cval_pair)); + + asm volatile("%0 = p0" : "=r"(val)); + pregs->pregs.p0 = val; + asm volatile("%0 = p1" : "=r"(val)); + pregs->pregs.p1 = val; + asm volatile("%0 = p2" : "=r"(val)); + pregs->pregs.p2 = val; + asm volatile("%0 = p3" : "=r"(val)); + pregs->pregs.p3 = val; + asm volatile("%0 = c5" : "=r"(c5)); + check(c5, 0xdeadbeef); +} + +int main() +{ + int c4; + PRegs pregs; + + c4 = preg_alias(0xff, 0x00, 0xff, 0x00); + check(c4, 0x00ff00ff); + c4 = preg_alias(0xff, 0x00, 0x00, 0x00); + check(c4, 0x000000ff); + c4 = preg_alias(0x00, 0xff, 0x00, 0x00); + check(c4, 0x0000ff00); + c4 = preg_alias(0x00, 0x00, 0xff, 0x00); + check(c4, 0x00ff0000); + c4 = preg_alias(0x00, 0x00, 0x00, 0xff); + check(c4, 0xff000000); + c4 = preg_alias(0xff, 0xff, 0xff, 0xff); + check(c4, 0xffffffff); + + c4 = preg_alias_pair(0xff, 0x00, 0xff, 0x00); + check(c4, 0x00ff00ff); + c4 = preg_alias_pair(0xff, 0x00, 0x00, 0x00); + check(c4, 0x000000ff); + c4 = preg_alias_pair(0x00, 0xff, 0x00, 0x00); + check(c4, 0x0000ff00); + c4 = preg_alias_pair(0x00, 0x00, 0xff, 0x00); + check(c4, 0x00ff0000); + c4 = preg_alias_pair(0x00, 0x00, 0x00, 0xff); + check(c4, 0xff000000); + c4 = preg_alias_pair(0xff, 0xff, 0xff, 0xff); + check(c4, 0xffffffff); + + creg_alias(0x00ff00ff, &pregs); + check(pregs.creg, 0x00ff00ff); + creg_alias(0x00ffff00, &pregs); + check(pregs.creg, 0x00ffff00); + creg_alias(0x00000000, &pregs); + check(pregs.creg, 0x00000000); + creg_alias(0xff000000, &pregs); + check(pregs.creg, 0xff000000); + creg_alias(0x00ff0000, &pregs); + check(pregs.creg, 0x00ff0000); + creg_alias(0x0000ff00, &pregs); + check(pregs.creg, 0x0000ff00); + creg_alias(0x000000ff, &pregs); + check(pregs.creg, 0x000000ff); + creg_alias(0xffffffff, &pregs); + check(pregs.creg, 0xffffffff); + + creg_alias_pair(0x00ff00ff, &pregs); + check(pregs.creg, 0x00ff00ff); + creg_alias_pair(0x00ffff00, &pregs); + check(pregs.creg, 0x00ffff00); + creg_alias_pair(0x00000000, &pregs); + check(pregs.creg, 0x00000000); + creg_alias_pair(0xff000000, &pregs); + check(pregs.creg, 0xff000000); + creg_alias_pair(0x00ff0000, &pregs); + check(pregs.creg, 0x00ff0000); + creg_alias_pair(0x0000ff00, &pregs); + check(pregs.creg, 0x0000ff00); + creg_alias_pair(0x000000ff, &pregs); + check(pregs.creg, 0x000000ff); + creg_alias_pair(0xffffffff, &pregs); + check(pregs.creg, 0xffffffff); + + puts(err ? "FAIL" : "PASS"); + return err; +} diff --git a/tests/tcg/hexagon/scatter_gather.c b/tests/tcg/hexagon/scatter_gather.c new file mode 100644 index 000000000..b93eb1813 --- /dev/null +++ b/tests/tcg/hexagon/scatter_gather.c @@ -0,0 +1,1011 @@ +/* + * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * This example tests the HVX scatter/gather instructions + * + * See section 5.13 of the V68 HVX Programmer's Reference + * + * There are 3 main classes operations + * _16 16-bit elements and 16-bit offsets + * _32 32-bit elements and 32-bit offsets + * _16_32 16-bit elements and 32-bit offsets + * + * There are also masked and accumulate versions + */ + +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <inttypes.h> + +typedef long HVX_Vector __attribute__((__vector_size__(128))) + __attribute__((aligned(128))); +typedef long HVX_VectorPair __attribute__((__vector_size__(256))) + __attribute__((aligned(128))); +typedef long HVX_VectorPred __attribute__((__vector_size__(128))) + __attribute__((aligned(128))); + +#define VSCATTER_16(BASE, RGN, OFF, VALS) \ + __builtin_HEXAGON_V6_vscattermh_128B((int)BASE, RGN, OFF, VALS) +#define VSCATTER_16_MASKED(MASK, BASE, RGN, OFF, VALS) \ + __builtin_HEXAGON_V6_vscattermhq_128B(MASK, (int)BASE, RGN, OFF, VALS) +#define VSCATTER_32(BASE, RGN, OFF, VALS) \ + __builtin_HEXAGON_V6_vscattermw_128B((int)BASE, RGN, OFF, VALS) +#define VSCATTER_32_MASKED(MASK, BASE, RGN, OFF, VALS) \ + __builtin_HEXAGON_V6_vscattermwq_128B(MASK, (int)BASE, RGN, OFF, VALS) +#define VSCATTER_16_32(BASE, RGN, OFF, VALS) \ + __builtin_HEXAGON_V6_vscattermhw_128B((int)BASE, RGN, OFF, VALS) +#define VSCATTER_16_32_MASKED(MASK, BASE, RGN, OFF, VALS) \ + __builtin_HEXAGON_V6_vscattermhwq_128B(MASK, (int)BASE, RGN, OFF, VALS) +#define VSCATTER_16_ACC(BASE, RGN, OFF, VALS) \ + __builtin_HEXAGON_V6_vscattermh_add_128B((int)BASE, RGN, OFF, VALS) +#define VSCATTER_32_ACC(BASE, RGN, OFF, VALS) \ + __builtin_HEXAGON_V6_vscattermw_add_128B((int)BASE, RGN, OFF, VALS) +#define VSCATTER_16_32_ACC(BASE, RGN, OFF, VALS) \ + __builtin_HEXAGON_V6_vscattermhw_add_128B((int)BASE, RGN, OFF, VALS) + +#define VGATHER_16(DSTADDR, BASE, RGN, OFF) \ + __builtin_HEXAGON_V6_vgathermh_128B(DSTADDR, (int)BASE, RGN, OFF) +#define VGATHER_16_MASKED(DSTADDR, MASK, BASE, RGN, OFF) \ + __builtin_HEXAGON_V6_vgathermhq_128B(DSTADDR, MASK, (int)BASE, RGN, OFF) +#define VGATHER_32(DSTADDR, BASE, RGN, OFF) \ + __builtin_HEXAGON_V6_vgathermw_128B(DSTADDR, (int)BASE, RGN, OFF) +#define VGATHER_32_MASKED(DSTADDR, MASK, BASE, RGN, OFF) \ + __builtin_HEXAGON_V6_vgathermwq_128B(DSTADDR, MASK, (int)BASE, RGN, OFF) +#define VGATHER_16_32(DSTADDR, BASE, RGN, OFF) \ + __builtin_HEXAGON_V6_vgathermhw_128B(DSTADDR, (int)BASE, RGN, OFF) +#define VGATHER_16_32_MASKED(DSTADDR, MASK, BASE, RGN, OFF) \ + __builtin_HEXAGON_V6_vgathermhwq_128B(DSTADDR, MASK, (int)BASE, RGN, OFF) + +#define VSHUFF_H(V) \ + __builtin_HEXAGON_V6_vshuffh_128B(V) +#define VSPLAT_H(X) \ + __builtin_HEXAGON_V6_lvsplath_128B(X) +#define VAND_VAL(PRED, VAL) \ + __builtin_HEXAGON_V6_vandvrt_128B(PRED, VAL) +#define VDEAL_H(V) \ + __builtin_HEXAGON_V6_vdealh_128B(V) + +int err; + +/* define the number of rows/cols in a square matrix */ +#define MATRIX_SIZE 64 + +/* define the size of the scatter buffer */ +#define SCATTER_BUFFER_SIZE (MATRIX_SIZE * MATRIX_SIZE) + +/* fake vtcm - put buffers together and force alignment */ +static struct { + unsigned short vscatter16[SCATTER_BUFFER_SIZE]; + unsigned short vgather16[MATRIX_SIZE]; + unsigned int vscatter32[SCATTER_BUFFER_SIZE]; + unsigned int vgather32[MATRIX_SIZE]; + unsigned short vscatter16_32[SCATTER_BUFFER_SIZE]; + unsigned short vgather16_32[MATRIX_SIZE]; +} vtcm __attribute__((aligned(0x10000))); + +/* declare the arrays of reference values */ +unsigned short vscatter16_ref[SCATTER_BUFFER_SIZE]; +unsigned short vgather16_ref[MATRIX_SIZE]; +unsigned int vscatter32_ref[SCATTER_BUFFER_SIZE]; +unsigned int vgather32_ref[MATRIX_SIZE]; +unsigned short vscatter16_32_ref[SCATTER_BUFFER_SIZE]; +unsigned short vgather16_32_ref[MATRIX_SIZE]; + +/* declare the arrays of offsets */ +unsigned short half_offsets[MATRIX_SIZE]; +unsigned int word_offsets[MATRIX_SIZE]; + +/* declare the arrays of values */ +unsigned short half_values[MATRIX_SIZE]; +unsigned short half_values_acc[MATRIX_SIZE]; +unsigned short half_values_masked[MATRIX_SIZE]; +unsigned int word_values[MATRIX_SIZE]; +unsigned int word_values_acc[MATRIX_SIZE]; +unsigned int word_values_masked[MATRIX_SIZE]; + +/* declare the arrays of predicates */ +unsigned short half_predicates[MATRIX_SIZE]; +unsigned int word_predicates[MATRIX_SIZE]; + +/* make this big enough for all the intrinsics */ +const size_t region_len = sizeof(vtcm); + +/* optionally add sync instructions */ +#define SYNC_VECTOR 1 + +static void sync_scatter(void *addr) +{ +#if SYNC_VECTOR + /* + * Do the scatter release followed by a dummy load to complete the + * synchronization. Normally the dummy load would be deferred as + * long as possible to minimize stalls. + */ + asm volatile("vmem(%0 + #0):scatter_release\n" : : "r"(addr)); + /* use volatile to force the load */ + volatile HVX_Vector vDummy = *(HVX_Vector *)addr; vDummy = vDummy; +#endif +} + +static void sync_gather(void *addr) +{ +#if SYNC_VECTOR + /* use volatile to force the load */ + volatile HVX_Vector vDummy = *(HVX_Vector *)addr; vDummy = vDummy; +#endif +} + +/* optionally print the results */ +#define PRINT_DATA 0 + +#define FILL_CHAR '.' + +/* fill vtcm scratch with ee */ +void prefill_vtcm_scratch(void) +{ + memset(&vtcm, FILL_CHAR, sizeof(vtcm)); +} + +/* create byte offsets to be a diagonal of the matrix with 16 bit elements */ +void create_offsets_values_preds_16(void) +{ + unsigned short half_element = 0; + unsigned short half_element_masked = 0; + char letter = 'A'; + char letter_masked = '@'; + + for (int i = 0; i < MATRIX_SIZE; i++) { + half_offsets[i] = i * (2 * MATRIX_SIZE + 2); + + half_element = 0; + half_element_masked = 0; + for (int j = 0; j < 2; j++) { + half_element |= letter << j * 8; + half_element_masked |= letter_masked << j * 8; + } + + half_values[i] = half_element; + half_values_acc[i] = ((i % 10) << 8) + (i % 10); + half_values_masked[i] = half_element_masked; + + letter++; + /* reset to 'A' */ + if (letter == 'M') { + letter = 'A'; + } + + half_predicates[i] = (i % 3 == 0 || i % 5 == 0) ? ~0 : 0; + } +} + +/* create byte offsets to be a diagonal of the matrix with 32 bit elements */ +void create_offsets_values_preds_32(void) +{ + unsigned int word_element = 0; + unsigned int word_element_masked = 0; + char letter = 'A'; + char letter_masked = '&'; + + for (int i = 0; i < MATRIX_SIZE; i++) { + word_offsets[i] = i * (4 * MATRIX_SIZE + 4); + + word_element = 0; + word_element_masked = 0; + for (int j = 0; j < 4; j++) { + word_element |= letter << j * 8; + word_element_masked |= letter_masked << j * 8; + } + + word_values[i] = word_element; + word_values_acc[i] = ((i % 10) << 8) + (i % 10); + word_values_masked[i] = word_element_masked; + + letter++; + /* reset to 'A' */ + if (letter == 'M') { + letter = 'A'; + } + + word_predicates[i] = (i % 4 == 0 || i % 7 == 0) ? ~0 : 0; + } +} + +/* + * create byte offsets to be a diagonal of the matrix with 16 bit elements + * and 32 bit offsets + */ +void create_offsets_values_preds_16_32(void) +{ + unsigned short half_element = 0; + unsigned short half_element_masked = 0; + char letter = 'D'; + char letter_masked = '$'; + + for (int i = 0; i < MATRIX_SIZE; i++) { + word_offsets[i] = i * (2 * MATRIX_SIZE + 2); + + half_element = 0; + half_element_masked = 0; + for (int j = 0; j < 2; j++) { + half_element |= letter << j * 8; + half_element_masked |= letter_masked << j * 8; + } + + half_values[i] = half_element; + half_values_acc[i] = ((i % 10) << 8) + (i % 10); + half_values_masked[i] = half_element_masked; + + letter++; + /* reset to 'A' */ + if (letter == 'P') { + letter = 'D'; + } + + half_predicates[i] = (i % 2 == 0 || i % 13 == 0) ? ~0 : 0; + } +} + +/* scatter the 16 bit elements using intrinsics */ +void vector_scatter_16(void) +{ + /* copy the offsets and values to vectors */ + HVX_Vector offsets = *(HVX_Vector *)half_offsets; + HVX_Vector values = *(HVX_Vector *)half_values; + + VSCATTER_16(&vtcm.vscatter16, region_len, offsets, values); + + sync_scatter(vtcm.vscatter16); +} + +/* scatter-accumulate the 16 bit elements using intrinsics */ +void vector_scatter_16_acc(void) +{ + /* copy the offsets and values to vectors */ + HVX_Vector offsets = *(HVX_Vector *)half_offsets; + HVX_Vector values = *(HVX_Vector *)half_values_acc; + + VSCATTER_16_ACC(&vtcm.vscatter16, region_len, offsets, values); + + sync_scatter(vtcm.vscatter16); +} + +/* scatter the 16 bit elements using intrinsics */ +void vector_scatter_16_masked(void) +{ + /* copy the offsets and values to vectors */ + HVX_Vector offsets = *(HVX_Vector *)half_offsets; + HVX_Vector values = *(HVX_Vector *)half_values_masked; + HVX_Vector pred_reg = *(HVX_Vector *)half_predicates; + HVX_VectorPred preds = VAND_VAL(pred_reg, ~0); + + VSCATTER_16_MASKED(preds, &vtcm.vscatter16, region_len, offsets, values); + + sync_scatter(vtcm.vscatter16); +} + +/* scatter the 32 bit elements using intrinsics */ +void vector_scatter_32(void) +{ + /* copy the offsets and values to vectors */ + HVX_Vector offsetslo = *(HVX_Vector *)word_offsets; + HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2]; + HVX_Vector valueslo = *(HVX_Vector *)word_values; + HVX_Vector valueshi = *(HVX_Vector *)&word_values[MATRIX_SIZE / 2]; + + VSCATTER_32(&vtcm.vscatter32, region_len, offsetslo, valueslo); + VSCATTER_32(&vtcm.vscatter32, region_len, offsetshi, valueshi); + + sync_scatter(vtcm.vscatter32); +} + +/* scatter-acc the 32 bit elements using intrinsics */ +void vector_scatter_32_acc(void) +{ + /* copy the offsets and values to vectors */ + HVX_Vector offsetslo = *(HVX_Vector *)word_offsets; + HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2]; + HVX_Vector valueslo = *(HVX_Vector *)word_values_acc; + HVX_Vector valueshi = *(HVX_Vector *)&word_values_acc[MATRIX_SIZE / 2]; + + VSCATTER_32_ACC(&vtcm.vscatter32, region_len, offsetslo, valueslo); + VSCATTER_32_ACC(&vtcm.vscatter32, region_len, offsetshi, valueshi); + + sync_scatter(vtcm.vscatter32); +} + +/* scatter the 32 bit elements using intrinsics */ +void vector_scatter_32_masked(void) +{ + /* copy the offsets and values to vectors */ + HVX_Vector offsetslo = *(HVX_Vector *)word_offsets; + HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2]; + HVX_Vector valueslo = *(HVX_Vector *)word_values_masked; + HVX_Vector valueshi = *(HVX_Vector *)&word_values_masked[MATRIX_SIZE / 2]; + HVX_Vector pred_reglo = *(HVX_Vector *)word_predicates; + HVX_Vector pred_reghi = *(HVX_Vector *)&word_predicates[MATRIX_SIZE / 2]; + HVX_VectorPred predslo = VAND_VAL(pred_reglo, ~0); + HVX_VectorPred predshi = VAND_VAL(pred_reghi, ~0); + + VSCATTER_32_MASKED(predslo, &vtcm.vscatter32, region_len, offsetslo, + valueslo); + VSCATTER_32_MASKED(predshi, &vtcm.vscatter32, region_len, offsetshi, + valueshi); + + sync_scatter(vtcm.vscatter16); +} + +/* scatter the 16 bit elements with 32 bit offsets using intrinsics */ +void vector_scatter_16_32(void) +{ + HVX_VectorPair offsets; + HVX_Vector values; + + /* get the word offsets in a vector pair */ + offsets = *(HVX_VectorPair *)word_offsets; + + /* these values need to be shuffled for the scatter */ + values = *(HVX_Vector *)half_values; + values = VSHUFF_H(values); + + VSCATTER_16_32(&vtcm.vscatter16_32, region_len, offsets, values); + + sync_scatter(vtcm.vscatter16_32); +} + +/* scatter-acc the 16 bit elements with 32 bit offsets using intrinsics */ +void vector_scatter_16_32_acc(void) +{ + HVX_VectorPair offsets; + HVX_Vector values; + + /* get the word offsets in a vector pair */ + offsets = *(HVX_VectorPair *)word_offsets; + + /* these values need to be shuffled for the scatter */ + values = *(HVX_Vector *)half_values_acc; + values = VSHUFF_H(values); + + VSCATTER_16_32_ACC(&vtcm.vscatter16_32, region_len, offsets, values); + + sync_scatter(vtcm.vscatter16_32); +} + +/* masked scatter the 16 bit elements with 32 bit offsets using intrinsics */ +void vector_scatter_16_32_masked(void) +{ + HVX_VectorPair offsets; + HVX_Vector values; + HVX_Vector pred_reg; + + /* get the word offsets in a vector pair */ + offsets = *(HVX_VectorPair *)word_offsets; + + /* these values need to be shuffled for the scatter */ + values = *(HVX_Vector *)half_values_masked; + values = VSHUFF_H(values); + + pred_reg = *(HVX_Vector *)half_predicates; + pred_reg = VSHUFF_H(pred_reg); + HVX_VectorPred preds = VAND_VAL(pred_reg, ~0); + + VSCATTER_16_32_MASKED(preds, &vtcm.vscatter16_32, region_len, offsets, + values); + + sync_scatter(vtcm.vscatter16_32); +} + +/* gather the elements from the scatter16 buffer */ +void vector_gather_16(void) +{ + HVX_Vector *vgather = (HVX_Vector *)&vtcm.vgather16; + HVX_Vector offsets = *(HVX_Vector *)half_offsets; + + VGATHER_16(vgather, &vtcm.vscatter16, region_len, offsets); + + sync_gather(vgather); +} + +static unsigned short gather_16_masked_init(void) +{ + char letter = '?'; + return letter | (letter << 8); +} + +void vector_gather_16_masked(void) +{ + HVX_Vector *vgather = (HVX_Vector *)&vtcm.vgather16; + HVX_Vector offsets = *(HVX_Vector *)half_offsets; + HVX_Vector pred_reg = *(HVX_Vector *)half_predicates; + HVX_VectorPred preds = VAND_VAL(pred_reg, ~0); + + *vgather = VSPLAT_H(gather_16_masked_init()); + VGATHER_16_MASKED(vgather, preds, &vtcm.vscatter16, region_len, offsets); + + sync_gather(vgather); +} + +/* gather the elements from the scatter32 buffer */ +void vector_gather_32(void) +{ + HVX_Vector *vgatherlo = (HVX_Vector *)&vtcm.vgather32; + HVX_Vector *vgatherhi = + (HVX_Vector *)((int)&vtcm.vgather32 + (MATRIX_SIZE * 2)); + HVX_Vector offsetslo = *(HVX_Vector *)word_offsets; + HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2]; + + VGATHER_32(vgatherlo, &vtcm.vscatter32, region_len, offsetslo); + VGATHER_32(vgatherhi, &vtcm.vscatter32, region_len, offsetshi); + + sync_gather(vgatherhi); +} + +static unsigned int gather_32_masked_init(void) +{ + char letter = '?'; + return letter | (letter << 8) | (letter << 16) | (letter << 24); +} + +void vector_gather_32_masked(void) +{ + HVX_Vector *vgatherlo = (HVX_Vector *)&vtcm.vgather32; + HVX_Vector *vgatherhi = + (HVX_Vector *)((int)&vtcm.vgather32 + (MATRIX_SIZE * 2)); + HVX_Vector offsetslo = *(HVX_Vector *)word_offsets; + HVX_Vector offsetshi = *(HVX_Vector *)&word_offsets[MATRIX_SIZE / 2]; + HVX_Vector pred_reglo = *(HVX_Vector *)word_predicates; + HVX_VectorPred predslo = VAND_VAL(pred_reglo, ~0); + HVX_Vector pred_reghi = *(HVX_Vector *)&word_predicates[MATRIX_SIZE / 2]; + HVX_VectorPred predshi = VAND_VAL(pred_reghi, ~0); + + *vgatherlo = VSPLAT_H(gather_32_masked_init()); + *vgatherhi = VSPLAT_H(gather_32_masked_init()); + VGATHER_32_MASKED(vgatherlo, predslo, &vtcm.vscatter32, region_len, + offsetslo); + VGATHER_32_MASKED(vgatherhi, predshi, &vtcm.vscatter32, region_len, + offsetshi); + + sync_gather(vgatherlo); + sync_gather(vgatherhi); +} + +/* gather the elements from the scatter16_32 buffer */ +void vector_gather_16_32(void) +{ + HVX_Vector *vgather; + HVX_VectorPair offsets; + HVX_Vector values; + + /* get the vtcm address to gather from */ + vgather = (HVX_Vector *)&vtcm.vgather16_32; + + /* get the word offsets in a vector pair */ + offsets = *(HVX_VectorPair *)word_offsets; + + VGATHER_16_32(vgather, &vtcm.vscatter16_32, region_len, offsets); + + /* deal the elements to get the order back */ + values = *(HVX_Vector *)vgather; + values = VDEAL_H(values); + + /* write it back to vtcm address */ + *(HVX_Vector *)vgather = values; +} + +void vector_gather_16_32_masked(void) +{ + HVX_Vector *vgather; + HVX_VectorPair offsets; + HVX_Vector pred_reg; + HVX_VectorPred preds; + HVX_Vector values; + + /* get the vtcm address to gather from */ + vgather = (HVX_Vector *)&vtcm.vgather16_32; + + /* get the word offsets in a vector pair */ + offsets = *(HVX_VectorPair *)word_offsets; + pred_reg = *(HVX_Vector *)half_predicates; + pred_reg = VSHUFF_H(pred_reg); + preds = VAND_VAL(pred_reg, ~0); + + *vgather = VSPLAT_H(gather_16_masked_init()); + VGATHER_16_32_MASKED(vgather, preds, &vtcm.vscatter16_32, region_len, + offsets); + + /* deal the elements to get the order back */ + values = *(HVX_Vector *)vgather; + values = VDEAL_H(values); + + /* write it back to vtcm address */ + *(HVX_Vector *)vgather = values; +} + +static void check_buffer(const char *name, void *c, void *r, size_t size) +{ + char *check = (char *)c; + char *ref = (char *)r; + for (int i = 0; i < size; i++) { + if (check[i] != ref[i]) { + printf("ERROR %s [%d]: 0x%x (%c) != 0x%x (%c)\n", name, i, + check[i], check[i], ref[i], ref[i]); + err++; + } + } +} + +/* + * These scalar functions are the C equivalents of the vector functions that + * use HVX + */ + +/* scatter the 16 bit elements using C */ +void scalar_scatter_16(unsigned short *vscatter16) +{ + for (int i = 0; i < MATRIX_SIZE; ++i) { + vscatter16[half_offsets[i] / 2] = half_values[i]; + } +} + +void check_scatter_16() +{ + memset(vscatter16_ref, FILL_CHAR, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); + scalar_scatter_16(vscatter16_ref); + check_buffer(__func__, vtcm.vscatter16, vscatter16_ref, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); +} + +/* scatter the 16 bit elements using C */ +void scalar_scatter_16_acc(unsigned short *vscatter16) +{ + for (int i = 0; i < MATRIX_SIZE; ++i) { + vscatter16[half_offsets[i] / 2] += half_values_acc[i]; + } +} + +void check_scatter_16_acc() +{ + memset(vscatter16_ref, FILL_CHAR, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); + scalar_scatter_16(vscatter16_ref); + scalar_scatter_16_acc(vscatter16_ref); + check_buffer(__func__, vtcm.vscatter16, vscatter16_ref, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); +} + +/* scatter the 16 bit elements using C */ +void scalar_scatter_16_masked(unsigned short *vscatter16) +{ + for (int i = 0; i < MATRIX_SIZE; i++) { + if (half_predicates[i]) { + vscatter16[half_offsets[i] / 2] = half_values_masked[i]; + } + } + +} + +void check_scatter_16_masked() +{ + memset(vscatter16_ref, FILL_CHAR, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); + scalar_scatter_16(vscatter16_ref); + scalar_scatter_16_acc(vscatter16_ref); + scalar_scatter_16_masked(vscatter16_ref); + check_buffer(__func__, vtcm.vscatter16, vscatter16_ref, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); +} + +/* scatter the 32 bit elements using C */ +void scalar_scatter_32(unsigned int *vscatter32) +{ + for (int i = 0; i < MATRIX_SIZE; ++i) { + vscatter32[word_offsets[i] / 4] = word_values[i]; + } +} + +void check_scatter_32() +{ + memset(vscatter32_ref, FILL_CHAR, + SCATTER_BUFFER_SIZE * sizeof(unsigned int)); + scalar_scatter_32(vscatter32_ref); + check_buffer(__func__, vtcm.vscatter32, vscatter32_ref, + SCATTER_BUFFER_SIZE * sizeof(unsigned int)); +} + +/* scatter the 32 bit elements using C */ +void scalar_scatter_32_acc(unsigned int *vscatter32) +{ + for (int i = 0; i < MATRIX_SIZE; ++i) { + vscatter32[word_offsets[i] / 4] += word_values_acc[i]; + } +} + +void check_scatter_32_acc() +{ + memset(vscatter32_ref, FILL_CHAR, + SCATTER_BUFFER_SIZE * sizeof(unsigned int)); + scalar_scatter_32(vscatter32_ref); + scalar_scatter_32_acc(vscatter32_ref); + check_buffer(__func__, vtcm.vscatter32, vscatter32_ref, + SCATTER_BUFFER_SIZE * sizeof(unsigned int)); +} + +/* scatter the 32 bit elements using C */ +void scalar_scatter_32_masked(unsigned int *vscatter32) +{ + for (int i = 0; i < MATRIX_SIZE; i++) { + if (word_predicates[i]) { + vscatter32[word_offsets[i] / 4] = word_values_masked[i]; + } + } +} + +void check_scatter_32_masked() +{ + memset(vscatter32_ref, FILL_CHAR, + SCATTER_BUFFER_SIZE * sizeof(unsigned int)); + scalar_scatter_32(vscatter32_ref); + scalar_scatter_32_acc(vscatter32_ref); + scalar_scatter_32_masked(vscatter32_ref); + check_buffer(__func__, vtcm.vscatter32, vscatter32_ref, + SCATTER_BUFFER_SIZE * sizeof(unsigned int)); +} + +/* scatter the 32 bit elements using C */ +void scalar_scatter_16_32(unsigned short *vscatter16_32) +{ + for (int i = 0; i < MATRIX_SIZE; ++i) { + vscatter16_32[word_offsets[i] / 2] = half_values[i]; + } +} + +void check_scatter_16_32() +{ + memset(vscatter16_32_ref, FILL_CHAR, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); + scalar_scatter_16_32(vscatter16_32_ref); + check_buffer(__func__, vtcm.vscatter16_32, vscatter16_32_ref, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); +} + +/* scatter the 32 bit elements using C */ +void scalar_scatter_16_32_acc(unsigned short *vscatter16_32) +{ + for (int i = 0; i < MATRIX_SIZE; ++i) { + vscatter16_32[word_offsets[i] / 2] += half_values_acc[i]; + } +} + +void check_scatter_16_32_acc() +{ + memset(vscatter16_32_ref, FILL_CHAR, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); + scalar_scatter_16_32(vscatter16_32_ref); + scalar_scatter_16_32_acc(vscatter16_32_ref); + check_buffer(__func__, vtcm.vscatter16_32, vscatter16_32_ref, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); +} + +void scalar_scatter_16_32_masked(unsigned short *vscatter16_32) +{ + for (int i = 0; i < MATRIX_SIZE; i++) { + if (half_predicates[i]) { + vscatter16_32[word_offsets[i] / 2] = half_values_masked[i]; + } + } +} + +void check_scatter_16_32_masked() +{ + memset(vscatter16_32_ref, FILL_CHAR, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); + scalar_scatter_16_32(vscatter16_32_ref); + scalar_scatter_16_32_acc(vscatter16_32_ref); + scalar_scatter_16_32_masked(vscatter16_32_ref); + check_buffer(__func__, vtcm.vscatter16_32, vscatter16_32_ref, + SCATTER_BUFFER_SIZE * sizeof(unsigned short)); +} + +/* gather the elements from the scatter buffer using C */ +void scalar_gather_16(unsigned short *vgather16) +{ + for (int i = 0; i < MATRIX_SIZE; ++i) { + vgather16[i] = vtcm.vscatter16[half_offsets[i] / 2]; + } +} + +void check_gather_16() +{ + memset(vgather16_ref, 0, MATRIX_SIZE * sizeof(unsigned short)); + scalar_gather_16(vgather16_ref); + check_buffer(__func__, vtcm.vgather16, vgather16_ref, + MATRIX_SIZE * sizeof(unsigned short)); +} + +void scalar_gather_16_masked(unsigned short *vgather16) +{ + for (int i = 0; i < MATRIX_SIZE; ++i) { + if (half_predicates[i]) { + vgather16[i] = vtcm.vscatter16[half_offsets[i] / 2]; + } + } +} + +void check_gather_16_masked() +{ + memset(vgather16_ref, gather_16_masked_init(), + MATRIX_SIZE * sizeof(unsigned short)); + scalar_gather_16_masked(vgather16_ref); + check_buffer(__func__, vtcm.vgather16, vgather16_ref, + MATRIX_SIZE * sizeof(unsigned short)); +} + +/* gather the elements from the scatter buffer using C */ +void scalar_gather_32(unsigned int *vgather32) +{ + for (int i = 0; i < MATRIX_SIZE; ++i) { + vgather32[i] = vtcm.vscatter32[word_offsets[i] / 4]; + } +} + +void check_gather_32(void) +{ + memset(vgather32_ref, 0, MATRIX_SIZE * sizeof(unsigned int)); + scalar_gather_32(vgather32_ref); + check_buffer(__func__, vtcm.vgather32, vgather32_ref, + MATRIX_SIZE * sizeof(unsigned int)); +} + +void scalar_gather_32_masked(unsigned int *vgather32) +{ + for (int i = 0; i < MATRIX_SIZE; ++i) { + if (word_predicates[i]) { + vgather32[i] = vtcm.vscatter32[word_offsets[i] / 4]; + } + } +} + + +void check_gather_32_masked(void) +{ + memset(vgather32_ref, gather_32_masked_init(), + MATRIX_SIZE * sizeof(unsigned int)); + scalar_gather_32_masked(vgather32_ref); + check_buffer(__func__, vtcm.vgather32, + vgather32_ref, MATRIX_SIZE * sizeof(unsigned int)); +} + +/* gather the elements from the scatter buffer using C */ +void scalar_gather_16_32(unsigned short *vgather16_32) +{ + for (int i = 0; i < MATRIX_SIZE; ++i) { + vgather16_32[i] = vtcm.vscatter16_32[word_offsets[i] / 2]; + } +} + +void check_gather_16_32(void) +{ + memset(vgather16_32_ref, 0, MATRIX_SIZE * sizeof(unsigned short)); + scalar_gather_16_32(vgather16_32_ref); + check_buffer(__func__, vtcm.vgather16_32, vgather16_32_ref, + MATRIX_SIZE * sizeof(unsigned short)); +} + +void scalar_gather_16_32_masked(unsigned short *vgather16_32) +{ + for (int i = 0; i < MATRIX_SIZE; ++i) { + if (half_predicates[i]) { + vgather16_32[i] = vtcm.vscatter16_32[word_offsets[i] / 2]; + } + } + +} + +void check_gather_16_32_masked(void) +{ + memset(vgather16_32_ref, gather_16_masked_init(), + MATRIX_SIZE * sizeof(unsigned short)); + scalar_gather_16_32_masked(vgather16_32_ref); + check_buffer(__func__, vtcm.vgather16_32, vgather16_32_ref, + MATRIX_SIZE * sizeof(unsigned short)); +} + +/* print scatter16 buffer */ +void print_scatter16_buffer(void) +{ + if (PRINT_DATA) { + printf("\n\nPrinting the 16 bit scatter buffer"); + + for (int i = 0; i < SCATTER_BUFFER_SIZE; i++) { + if ((i % MATRIX_SIZE) == 0) { + printf("\n"); + } + for (int j = 0; j < 2; j++) { + printf("%c", (char)((vtcm.vscatter16[i] >> j * 8) & 0xff)); + } + printf(" "); + } + printf("\n"); + } +} + +/* print the gather 16 buffer */ +void print_gather_result_16(void) +{ + if (PRINT_DATA) { + printf("\n\nPrinting the 16 bit gather result\n"); + + for (int i = 0; i < MATRIX_SIZE; i++) { + for (int j = 0; j < 2; j++) { + printf("%c", (char)((vtcm.vgather16[i] >> j * 8) & 0xff)); + } + printf(" "); + } + printf("\n"); + } +} + +/* print the scatter32 buffer */ +void print_scatter32_buffer(void) +{ + if (PRINT_DATA) { + printf("\n\nPrinting the 32 bit scatter buffer"); + + for (int i = 0; i < SCATTER_BUFFER_SIZE; i++) { + if ((i % MATRIX_SIZE) == 0) { + printf("\n"); + } + for (int j = 0; j < 4; j++) { + printf("%c", (char)((vtcm.vscatter32[i] >> j * 8) & 0xff)); + } + printf(" "); + } + printf("\n"); + } +} + +/* print the gather 32 buffer */ +void print_gather_result_32(void) +{ + if (PRINT_DATA) { + printf("\n\nPrinting the 32 bit gather result\n"); + + for (int i = 0; i < MATRIX_SIZE; i++) { + for (int j = 0; j < 4; j++) { + printf("%c", (char)((vtcm.vgather32[i] >> j * 8) & 0xff)); + } + printf(" "); + } + printf("\n"); + } +} + +/* print the scatter16_32 buffer */ +void print_scatter16_32_buffer(void) +{ + if (PRINT_DATA) { + printf("\n\nPrinting the 16_32 bit scatter buffer"); + + for (int i = 0; i < SCATTER_BUFFER_SIZE; i++) { + if ((i % MATRIX_SIZE) == 0) { + printf("\n"); + } + for (int j = 0; j < 2; j++) { + printf("%c", + (unsigned char)((vtcm.vscatter16_32[i] >> j * 8) & 0xff)); + } + printf(" "); + } + printf("\n"); + } +} + +/* print the gather 16_32 buffer */ +void print_gather_result_16_32(void) +{ + if (PRINT_DATA) { + printf("\n\nPrinting the 16_32 bit gather result\n"); + + for (int i = 0; i < MATRIX_SIZE; i++) { + for (int j = 0; j < 2; j++) { + printf("%c", + (unsigned char)((vtcm.vgather16_32[i] >> j * 8) & 0xff)); + } + printf(" "); + } + printf("\n"); + } +} + +int main() +{ + prefill_vtcm_scratch(); + + /* 16 bit elements with 16 bit offsets */ + create_offsets_values_preds_16(); + + vector_scatter_16(); + print_scatter16_buffer(); + check_scatter_16(); + + vector_gather_16(); + print_gather_result_16(); + check_gather_16(); + + vector_gather_16_masked(); + print_gather_result_16(); + check_gather_16_masked(); + + vector_scatter_16_acc(); + print_scatter16_buffer(); + check_scatter_16_acc(); + + vector_scatter_16_masked(); + print_scatter16_buffer(); + check_scatter_16_masked(); + + /* 32 bit elements with 32 bit offsets */ + create_offsets_values_preds_32(); + + vector_scatter_32(); + print_scatter32_buffer(); + check_scatter_32(); + + vector_gather_32(); + print_gather_result_32(); + check_gather_32(); + + vector_gather_32_masked(); + print_gather_result_32(); + check_gather_32_masked(); + + vector_scatter_32_acc(); + print_scatter32_buffer(); + check_scatter_32_acc(); + + vector_scatter_32_masked(); + print_scatter32_buffer(); + check_scatter_32_masked(); + + /* 16 bit elements with 32 bit offsets */ + create_offsets_values_preds_16_32(); + + vector_scatter_16_32(); + print_scatter16_32_buffer(); + check_scatter_16_32(); + + vector_gather_16_32(); + print_gather_result_16_32(); + check_gather_16_32(); + + vector_gather_16_32_masked(); + print_gather_result_16_32(); + check_gather_16_32_masked(); + + vector_scatter_16_32_acc(); + print_scatter16_32_buffer(); + check_scatter_16_32_acc(); + + vector_scatter_16_32_masked(); + print_scatter16_32_buffer(); + check_scatter_16_32_masked(); + + puts(err ? "FAIL" : "PASS"); + return err; +} diff --git a/tests/tcg/hexagon/vector_add_int.c b/tests/tcg/hexagon/vector_add_int.c new file mode 100644 index 000000000..d6010ea14 --- /dev/null +++ b/tests/tcg/hexagon/vector_add_int.c @@ -0,0 +1,61 @@ +/* + * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdio.h> + +int gA[401]; +int gB[401]; +int gC[401]; + +void vector_add_int() +{ + int i; + for (i = 0; i < 400; i++) { + gA[i] = gB[i] + gC[i]; + } +} + +int main() +{ + int error = 0; + int i; + for (i = 0; i < 400; i++) { + gB[i] = i * 2; + gC[i] = i * 3; + } + gA[400] = 17; + vector_add_int(); + for (i = 0; i < 400; i++) { + if (gA[i] != i * 5) { + error++; + printf("ERROR: gB[%d] = %d\t", i, gB[i]); + printf("gC[%d] = %d\t", i, gC[i]); + printf("gA[%d] = %d\n", i, gA[i]); + } + } + if (gA[400] != 17) { + error++; + printf("ERROR: Overran the buffer\n"); + } + if (!error) { + printf("PASS\n"); + return 0; + } else { + printf("FAIL\n"); + return 1; + } +} |