From af1a266670d040d2f4083ff309d732d648afba2a Mon Sep 17 00:00:00 2001 From: Angelos Mouzakitis Date: Tue, 10 Oct 2023 14:33:42 +0000 Subject: Add submodule dependency files Change-Id: Iaf8d18082d3991dec7c0ebbea540f092188eb4ec --- meson/test cases/common/147 simd/fallback.c | 8 ++ .../common/147 simd/include/simdheader.h | 3 + meson/test cases/common/147 simd/meson.build | 44 +++++++ meson/test cases/common/147 simd/simd_avx.c | 49 +++++++ meson/test cases/common/147 simd/simd_avx2.c | 42 ++++++ meson/test cases/common/147 simd/simd_mmx.c | 67 ++++++++++ meson/test cases/common/147 simd/simd_neon.c | 20 +++ meson/test cases/common/147 simd/simd_sse.c | 29 +++++ meson/test cases/common/147 simd/simd_sse2.c | 37 ++++++ meson/test cases/common/147 simd/simd_sse3.c | 38 ++++++ meson/test cases/common/147 simd/simd_sse41.c | 40 ++++++ meson/test cases/common/147 simd/simd_sse42.c | 43 +++++++ meson/test cases/common/147 simd/simd_ssse3.c | 48 +++++++ meson/test cases/common/147 simd/simdchecker.c | 143 +++++++++++++++++++++ meson/test cases/common/147 simd/simdfuncs.h | 75 +++++++++++ 15 files changed, 686 insertions(+) create mode 100644 meson/test cases/common/147 simd/fallback.c create mode 100644 meson/test cases/common/147 simd/include/simdheader.h create mode 100644 meson/test cases/common/147 simd/meson.build create mode 100644 meson/test cases/common/147 simd/simd_avx.c create mode 100644 meson/test cases/common/147 simd/simd_avx2.c create mode 100644 meson/test cases/common/147 simd/simd_mmx.c create mode 100644 meson/test cases/common/147 simd/simd_neon.c create mode 100644 meson/test cases/common/147 simd/simd_sse.c create mode 100644 meson/test cases/common/147 simd/simd_sse2.c create mode 100644 meson/test cases/common/147 simd/simd_sse3.c create mode 100644 meson/test cases/common/147 simd/simd_sse41.c create mode 100644 meson/test cases/common/147 simd/simd_sse42.c create mode 100644 meson/test cases/common/147 simd/simd_ssse3.c create mode 100644 meson/test cases/common/147 simd/simdchecker.c create mode 100644 meson/test cases/common/147 simd/simdfuncs.h (limited to 'meson/test cases/common/147 simd') diff --git a/meson/test cases/common/147 simd/fallback.c b/meson/test cases/common/147 simd/fallback.c new file mode 100644 index 000000000..ab435f433 --- /dev/null +++ b/meson/test cases/common/147 simd/fallback.c @@ -0,0 +1,8 @@ +#include + +void increment_fallback(float arr[4]) { + int i; + for(i=0; i<4; i++) { + arr[i]++; + } +} diff --git a/meson/test cases/common/147 simd/include/simdheader.h b/meson/test cases/common/147 simd/include/simdheader.h new file mode 100644 index 000000000..6515e413e --- /dev/null +++ b/meson/test cases/common/147 simd/include/simdheader.h @@ -0,0 +1,3 @@ +#pragma once + +#define I_CAN_HAZ_SIMD diff --git a/meson/test cases/common/147 simd/meson.build b/meson/test cases/common/147 simd/meson.build new file mode 100644 index 000000000..2628a1234 --- /dev/null +++ b/meson/test cases/common/147 simd/meson.build @@ -0,0 +1,44 @@ +project('simd', 'c') + +simd = import('unstable-simd') + +cc = meson.get_compiler('c') + +cdata = configuration_data() + +if not meson.is_cross_build() and host_machine.cpu_family() == 'arm' and cc.get_id() == 'clang' + message('Adding -march=armv7 because assuming that this build happens on Raspbian.') + message('Its Clang seems to be misconfigured and does not support NEON by default.') + add_project_arguments('-march=armv7', language : 'c') +endif + +if cc.get_id() == 'msvc' and cc.version().version_compare('<17') + error('MESON_SKIP_TEST VS2010 produces broken binaries on x86.') +endif + +# FIXME add [a, b] = function() +rval = simd.check('mysimds', + mmx : 'simd_mmx.c', + sse : 'simd_sse.c', + sse2 : 'simd_sse2.c', + sse3 : 'simd_sse3.c', + ssse3 : 'simd_ssse3.c', + sse41 : 'simd_sse41.c', + sse42 : 'simd_sse42.c', + avx : 'simd_avx.c', + avx2 : 'simd_avx2.c', + neon : 'simd_neon.c', + compiler : cc, + include_directories : include_directories('include')) + +simdlibs = rval[0] +cdata.merge_from(rval[1]) + +configure_file(output : 'simdconfig.h', + configuration : cdata) + +p = executable('simdtest', 'simdchecker.c', 'fallback.c', + link_with : simdlibs) + +test('simdtest', p) + diff --git a/meson/test cases/common/147 simd/simd_avx.c b/meson/test cases/common/147 simd/simd_avx.c new file mode 100644 index 000000000..5f45a4e2f --- /dev/null +++ b/meson/test cases/common/147 simd/simd_avx.c @@ -0,0 +1,49 @@ +#include + +#ifndef I_CAN_HAZ_SIMD +#error The correct internal header was not used +#endif + +#include +#include +#include + +#ifdef _MSC_VER +#include +int avx_available(void) { + return 1; +} +#else +#include +#include + +#ifdef __APPLE__ +/* + * Apple ships a broken __builtin_cpu_supports and + * some machines in the CI farm seem to be too + * old to have AVX so just always return 0 here. + */ +int avx_available(void) { return 0; } +#else + +int avx_available(void) { + return __builtin_cpu_supports("avx"); +} +#endif +#endif + +void increment_avx(float arr[4]) { + double darr[4]; + darr[0] = arr[0]; + darr[1] = arr[1]; + darr[2] = arr[2]; + darr[3] = arr[3]; + __m256d val = _mm256_loadu_pd(darr); + __m256d one = _mm256_set1_pd(1.0); + __m256d result = _mm256_add_pd(val, one); + _mm256_storeu_pd(darr, result); + arr[0] = (float)darr[0]; + arr[1] = (float)darr[1]; + arr[2] = (float)darr[2]; + arr[3] = (float)darr[3]; +} diff --git a/meson/test cases/common/147 simd/simd_avx2.c b/meson/test cases/common/147 simd/simd_avx2.c new file mode 100644 index 000000000..c79819b75 --- /dev/null +++ b/meson/test cases/common/147 simd/simd_avx2.c @@ -0,0 +1,42 @@ +#include +#include +#include + +/* + * FIXME add proper runtime detection for VS. + */ + +#ifdef _MSC_VER +#include +int avx2_available(void) { + return 0; +} +#else +#include +#include + +#if defined(__APPLE__) +int avx2_available(void) { return 0; } +#else +int avx2_available(void) { + return __builtin_cpu_supports("avx2"); +} +#endif +#endif + +void increment_avx2(float arr[4]) { + double darr[4]; + darr[0] = arr[0]; + darr[1] = arr[1]; + darr[2] = arr[2]; + darr[3] = arr[3]; + __m256d val = _mm256_loadu_pd(darr); + __m256d one = _mm256_set1_pd(1.0); + __m256d result = _mm256_add_pd(val, one); + _mm256_storeu_pd(darr, result); + one = _mm256_permute4x64_pd(one, 66); /* A no-op, just here to use AVX2. */ + arr[0] = (float)darr[0]; + arr[1] = (float)darr[1]; + arr[2] = (float)darr[2]; + arr[3] = (float)darr[3]; +} diff --git a/meson/test cases/common/147 simd/simd_mmx.c b/meson/test cases/common/147 simd/simd_mmx.c new file mode 100644 index 000000000..76054420b --- /dev/null +++ b/meson/test cases/common/147 simd/simd_mmx.c @@ -0,0 +1,67 @@ +#include +#include + +#include + +#ifdef _MSC_VER +#include +int mmx_available(void) { + return 1; +} +/* Contrary to MSDN documentation, MMX intrinsics + * just plain don't work. + */ +void increment_mmx(float arr[4]) { + arr[0]++; + arr[1]++; + arr[2]++; + arr[3]++; +} +#elif defined(__MINGW32__) +int mmx_available(void) { + return 1; +} +/* MinGW does not seem to ship with MMX or it is broken. + */ +void increment_mmx(float arr[4]) { + arr[0]++; + arr[1]++; + arr[2]++; + arr[3]++; +} +#else +#include +#include + +#if defined(__APPLE__) +int mmx_available(void) { return 1; } +#else +int mmx_available(void) { + return __builtin_cpu_supports("mmx"); +} +#endif +void increment_mmx(float arr[4]) { + /* Super ugly but we know that values in arr are always small + * enough to fit in int16; + */ + int i; + __m64 packed = _mm_set_pi16(arr[3], arr[2], arr[1], arr[0]); + __m64 incr = _mm_set1_pi16(1); + __m64 result = _mm_add_pi16(packed, incr); + /* Should be + * int64_t unpacker = _m_to_int64(result); + * but it does not exist on 32 bit platforms for some reason. + */ + int64_t unpacker = (int64_t)(result); + _mm_empty(); + for(i=0; i<4; i++) { + /* This fails on GCC 8 when optimizations are enabled. + * Disable it. Patches welcome to fix this. + arr[i] = (float)(unpacker & ((1<<16)-1)); + unpacker >>= 16; + */ + arr[i] += 1.0f; + } +} + +#endif diff --git a/meson/test cases/common/147 simd/simd_neon.c b/meson/test cases/common/147 simd/simd_neon.c new file mode 100644 index 000000000..2834b3096 --- /dev/null +++ b/meson/test cases/common/147 simd/simd_neon.c @@ -0,0 +1,20 @@ +#include +#include + +#include +#include + +int neon_available(void) { + return 1; /* Incorrect, but I don't know how to check this properly. */ +} + +void increment_neon(float arr[4]) { + float32x2_t a1, a2, one; + a1 = vld1_f32(arr); + a2 = vld1_f32(&arr[2]); + one = vdup_n_f32(1.0); + a1 = vadd_f32(a1, one); + a2 = vadd_f32(a2, one); + vst1_f32(arr, a1); + vst1_f32(&arr[2], a2); +} diff --git a/meson/test cases/common/147 simd/simd_sse.c b/meson/test cases/common/147 simd/simd_sse.c new file mode 100644 index 000000000..6014e0cc9 --- /dev/null +++ b/meson/test cases/common/147 simd/simd_sse.c @@ -0,0 +1,29 @@ +#include +#include + +#ifdef _MSC_VER +#include +int sse_available(void) { + return 1; +} +#else + +#include +#include +#include + +#if defined(__APPLE__) +int sse_available(void) { return 1; } +#else +int sse_available(void) { + return __builtin_cpu_supports("sse"); +} +#endif +#endif + +void increment_sse(float arr[4]) { + __m128 val = _mm_load_ps(arr); + __m128 one = _mm_set_ps1(1.0); + __m128 result = _mm_add_ps(val, one); + _mm_storeu_ps(arr, result); +} diff --git a/meson/test cases/common/147 simd/simd_sse2.c b/meson/test cases/common/147 simd/simd_sse2.c new file mode 100644 index 000000000..445afb631 --- /dev/null +++ b/meson/test cases/common/147 simd/simd_sse2.c @@ -0,0 +1,37 @@ +#include +#include +#include + +#ifdef _MSC_VER +int sse2_available(void) { + return 1; +} + +#else +#include +#include + +#if defined(__APPLE__) +int sse2_available(void) { return 1; } +#else +int sse2_available(void) { + return __builtin_cpu_supports("sse2"); +} +#endif +#endif + +void increment_sse2(float arr[4]) { + ALIGN_16 double darr[4]; + __m128d val1 = _mm_set_pd(arr[0], arr[1]); + __m128d val2 = _mm_set_pd(arr[2], arr[3]); + __m128d one = _mm_set_pd(1.0, 1.0); + __m128d result = _mm_add_pd(val1, one); + _mm_store_pd(darr, result); + result = _mm_add_pd(val2, one); + _mm_store_pd(&darr[2], result); + arr[0] = (float)darr[1]; + arr[1] = (float)darr[0]; + arr[2] = (float)darr[3]; + arr[3] = (float)darr[2]; +} + diff --git a/meson/test cases/common/147 simd/simd_sse3.c b/meson/test cases/common/147 simd/simd_sse3.c new file mode 100644 index 000000000..29a35e60f --- /dev/null +++ b/meson/test cases/common/147 simd/simd_sse3.c @@ -0,0 +1,38 @@ +#include +#include + +#ifdef _MSC_VER +#include +int sse3_available(void) { + return 1; +} +#else + +#include +#include +#include + +#if defined(__APPLE__) +int sse3_available(void) { return 1; } +#else +int sse3_available(void) { + return __builtin_cpu_supports("sse3"); +} +#endif +#endif + +void increment_sse3(float arr[4]) { + ALIGN_16 double darr[4]; + __m128d val1 = _mm_set_pd(arr[0], arr[1]); + __m128d val2 = _mm_set_pd(arr[2], arr[3]); + __m128d one = _mm_set_pd(1.0, 1.0); + __m128d result = _mm_add_pd(val1, one); + _mm_store_pd(darr, result); + result = _mm_add_pd(val2, one); + _mm_store_pd(&darr[2], result); + result = _mm_hadd_pd(val1, val2); /* This does nothing. Only here so we use an SSE3 instruction. */ + arr[0] = (float)darr[1]; + arr[1] = (float)darr[0]; + arr[2] = (float)darr[3]; + arr[3] = (float)darr[2]; +} diff --git a/meson/test cases/common/147 simd/simd_sse41.c b/meson/test cases/common/147 simd/simd_sse41.c new file mode 100644 index 000000000..29f25554a --- /dev/null +++ b/meson/test cases/common/147 simd/simd_sse41.c @@ -0,0 +1,40 @@ +#include +#include + +#include + +#ifdef _MSC_VER +#include + +int sse41_available(void) { + return 1; +} + +#else +#include +#include + +#if defined(__APPLE__) +int sse41_available(void) { return 1; } +#else +int sse41_available(void) { + return __builtin_cpu_supports("sse4.1"); +} +#endif +#endif + +void increment_sse41(float arr[4]) { + ALIGN_16 double darr[4]; + __m128d val1 = _mm_set_pd(arr[0], arr[1]); + __m128d val2 = _mm_set_pd(arr[2], arr[3]); + __m128d one = _mm_set_pd(1.0, 1.0); + __m128d result = _mm_add_pd(val1, one); + result = _mm_ceil_pd(result); /* A no-op, only here to use a SSE4.1 intrinsic. */ + _mm_store_pd(darr, result); + result = _mm_add_pd(val2, one); + _mm_store_pd(&darr[2], result); + arr[0] = (float)darr[1]; + arr[1] = (float)darr[0]; + arr[2] = (float)darr[3]; + arr[3] = (float)darr[2]; +} diff --git a/meson/test cases/common/147 simd/simd_sse42.c b/meson/test cases/common/147 simd/simd_sse42.c new file mode 100644 index 000000000..f1564e2b9 --- /dev/null +++ b/meson/test cases/common/147 simd/simd_sse42.c @@ -0,0 +1,43 @@ +#include +#include +#include + +#ifdef _MSC_VER +#include + +int sse42_available(void) { + return 1; +} + +#else + +#include +#include + +#ifdef __APPLE__ +int sse42_available(void) { + return 1; +} +#else +int sse42_available(void) { + return __builtin_cpu_supports("sse4.2"); +} +#endif + +#endif + +void increment_sse42(float arr[4]) { + ALIGN_16 double darr[4]; + __m128d val1 = _mm_set_pd(arr[0], arr[1]); + __m128d val2 = _mm_set_pd(arr[2], arr[3]); + __m128d one = _mm_set_pd(1.0, 1.0); + __m128d result = _mm_add_pd(val1, one); + _mm_store_pd(darr, result); + result = _mm_add_pd(val2, one); + _mm_store_pd(&darr[2], result); + _mm_crc32_u32(42, 99); /* A no-op, only here to use an SSE4.2 instruction. */ + arr[0] = (float)darr[1]; + arr[1] = (float)darr[0]; + arr[2] = (float)darr[3]; + arr[3] = (float)darr[2]; +} diff --git a/meson/test cases/common/147 simd/simd_ssse3.c b/meson/test cases/common/147 simd/simd_ssse3.c new file mode 100644 index 000000000..fa557f4c1 --- /dev/null +++ b/meson/test cases/common/147 simd/simd_ssse3.c @@ -0,0 +1,48 @@ +#include +#include + +#include +#include + +#ifdef _MSC_VER +#include + +int ssse3_available(void) { + return 1; +} + +#else + +#include +#include + +int ssse3_available(void) { +#ifdef __APPLE__ + return 1; +#elif defined(__clang__) + /* https://github.com/numpy/numpy/issues/8130 */ + return __builtin_cpu_supports("sse4.1"); +#else + return __builtin_cpu_supports("ssse3"); +#endif +} + +#endif + +void increment_ssse3(float arr[4]) { + ALIGN_16 double darr[4]; + __m128d val1 = _mm_set_pd(arr[0], arr[1]); + __m128d val2 = _mm_set_pd(arr[2], arr[3]); + __m128d one = _mm_set_pd(1.0, 1.0); + __m128d result = _mm_add_pd(val1, one); + __m128i tmp1, tmp2; + tmp1 = tmp2 = _mm_set1_epi16(0); + _mm_store_pd(darr, result); + result = _mm_add_pd(val2, one); + _mm_store_pd(&darr[2], result); + tmp1 = _mm_hadd_epi32(tmp1, tmp2); /* This does nothing. Only here so we use an SSSE3 instruction. */ + arr[0] = (float)darr[1]; + arr[1] = (float)darr[0]; + arr[2] = (float)darr[3]; + arr[3] = (float)darr[2]; +} diff --git a/meson/test cases/common/147 simd/simdchecker.c b/meson/test cases/common/147 simd/simdchecker.c new file mode 100644 index 000000000..c7a0a978a --- /dev/null +++ b/meson/test cases/common/147 simd/simdchecker.c @@ -0,0 +1,143 @@ +#include +#include +#include + +typedef void (*simd_func)(float*); + +int check_simd_implementation(float *four, + const float *four_initial, + const char *simd_type, + const float *expected, + simd_func fptr, + const int blocksize) { + int rv = 0; + memcpy(four, four_initial, blocksize*sizeof(float)); + printf("Using %s.\n", simd_type); + fptr(four); + for(int i=0; i + +#ifdef _MSC_VER +#define ALIGN_16 __declspec(align(16)) +#else +#include +#define ALIGN_16 alignas(16) +#endif + + +/* Yes, I do know that arr[4] decays into a pointer + * as a function argument. Don't do this in real code + * but for this test it is ok. + */ + +void increment_fallback(float arr[4]); + +#if HAVE_MMX +int mmx_available(void); +void increment_mmx(float arr[4]); +#endif + +#if HAVE_SSE +int sse_available(void); +void increment_sse(float arr[4]); +#endif + +#if HAVE_SSE2 +int sse2_available(void); +void increment_sse2(float arr[4]); +#endif + +#if HAVE_SSE3 +int sse3_available(void); +void increment_sse3(float arr[4]); +#endif + +#if HAVE_SSSE3 +int ssse3_available(void); +void increment_ssse3(float arr[4]); +#endif + +#if HAVE_SSE41 +int sse41_available(void); +void increment_sse41(float arr[4]); +#endif + +#if HAVE_SSE42 +int sse42_available(void); +void increment_sse42(float arr[4]); +#endif + +#if HAVE_AVX +int avx_available(void); +void increment_avx(float arr[4]); +#endif + +#if HAVE_AVX2 +int avx2_available(void); +void increment_avx2(float arr[4]); +#endif + +#if HAVE_NEON +int neon_available(void); +void increment_neon(float arr[4]); +#endif + +#if HAVE_ALTIVEC +int altivec_available(void); +void increment_altivec(float arr[4]); +#endif + +/* And so on. */ -- cgit 1.2.3-korg