aboutsummaryrefslogtreecommitdiffstats
path: root/meson/test cases/cuda/3 cudamodule
diff options
context:
space:
mode:
Diffstat (limited to 'meson/test cases/cuda/3 cudamodule')
-rw-r--r--meson/test cases/cuda/3 cudamodule/meson.build70
-rw-r--r--meson/test cases/cuda/3 cudamodule/prog.cu30
2 files changed, 100 insertions, 0 deletions
diff --git a/meson/test cases/cuda/3 cudamodule/meson.build b/meson/test cases/cuda/3 cudamodule/meson.build
new file mode 100644
index 000000000..fd5e83fea
--- /dev/null
+++ b/meson/test cases/cuda/3 cudamodule/meson.build
@@ -0,0 +1,70 @@
+project('cudamodule', 'cuda', version : '1.0.0')
+
+nvcc = meson.get_compiler('cuda')
+cuda = import('unstable-cuda')
+
+arch_flags = cuda.nvcc_arch_flags(nvcc.version(), 'Auto', detected: ['6.0'])
+arch_readable = cuda.nvcc_arch_readable(nvcc.version(), 'Auto', detected: ['6.0'])
+driver_version = cuda.min_driver_version(nvcc.version())
+
+message('NVCC version: ' + nvcc.version())
+message('NVCC flags: ' + ' '.join(arch_flags))
+message('NVCC readable: ' + ' '.join(arch_readable))
+message('Driver version: >=' + driver_version)
+
+exe = executable('prog', 'prog.cu', cuda_args: arch_flags)
+test('cudatest', exe)
+
+
+#
+# Assert Series
+#
+
+# Sanity test.
+assert(' '.join(cuda.nvcc_arch_flags('11.1', '8.6')) ==
+ '-gencode arch=compute_86,code=sm_86')
+
+# CUDA Toolkit too old, flag filtered out.
+assert(' '.join(cuda.nvcc_arch_flags('11.0', '8.6')) ==
+ '')
+
+# Named architectures.
+assert(' '.join(cuda.nvcc_arch_flags('11.0', 'Ampere')) ==
+ '-gencode arch=compute_80,code=sm_80')
+
+# Splitting & deduplication.
+assert(' '.join(cuda.nvcc_arch_flags('11.0', 'Ampere;8.0,8.0')) ==
+ '-gencode arch=compute_80,code=sm_80')
+
+# Same, but list supplied as list.
+assert(' '.join(cuda.nvcc_arch_flags('11.0', ['Ampere', '8.0', '8.0'])) ==
+ '-gencode arch=compute_80,code=sm_80')
+
+# Same, but mode set to Auto with detected set to a string with a variety of separators.
+assert(' '.join(cuda.nvcc_arch_flags('11.0', 'Auto', detected: 'Ampere;8.0,8.0')) ==
+ '-gencode arch=compute_80,code=sm_80')
+
+# Same, but detected set to a list.
+assert(' '.join(cuda.nvcc_arch_flags('11.0', 'Auto', detected: ['Ampere', '8.0', '8.0'])) ==
+ '-gencode arch=compute_80,code=sm_80')
+
+# Ask for 8.6 binary with 8.0-level PTX.
+assert(' '.join(cuda.nvcc_arch_flags('11.1', '8.6(8.0)')) ==
+ '-gencode arch=compute_80,code=sm_86')
+
+# Same, but keep the 8.0 PTX.
+assert(' '.join(cuda.nvcc_arch_flags('11.1', '8.6(8.0)+PTX')) ==
+ '-gencode arch=compute_80,code=sm_86 -gencode arch=compute_80,code=compute_80')
+
+# Detected Ampere RTX 3090 on CUDA 10.2, saturate to 7.5+PTX
+assert(' '.join(cuda.nvcc_arch_flags('10.2', 'Auto', detected: ['8.0'])) ==
+ '-gencode arch=compute_75,code=sm_75 -gencode arch=compute_75,code=compute_75')
+
+# Failed to auto-detect with CUDA 10.2, default to common GPUs (3.0;3.5;5.0;5.2;6.0;6.1;7.0;7.5+PTX)
+assert(' '.join(cuda.nvcc_arch_flags('10.2', 'Auto', detected: [])) ==
+ '-gencode arch=compute_30,code=sm_30 -gencode arch=compute_35,code=sm_35 '+
+ '-gencode arch=compute_50,code=sm_50 -gencode arch=compute_52,code=sm_52 '+
+ '-gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 '+
+ '-gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 '+
+ '-gencode arch=compute_75,code=compute_75')
+
diff --git a/meson/test cases/cuda/3 cudamodule/prog.cu b/meson/test cases/cuda/3 cudamodule/prog.cu
new file mode 100644
index 000000000..b893bd331
--- /dev/null
+++ b/meson/test cases/cuda/3 cudamodule/prog.cu
@@ -0,0 +1,30 @@
+#include <iostream>
+
+int main(void) {
+ int cuda_devices = 0;
+ std::cout << "CUDA version: " << CUDART_VERSION << "\n";
+ cudaGetDeviceCount(&cuda_devices);
+ if(cuda_devices == 0) {
+ std::cout << "No Cuda hardware found. Exiting.\n";
+ return 0;
+ }
+ std::cout << "This computer has " << cuda_devices << " Cuda device(s).\n";
+ cudaDeviceProp props;
+ cudaGetDeviceProperties(&props, 0);
+ std::cout << "Properties of device 0.\n\n";
+
+ std::cout << " Name: " << props.name << "\n";
+ std::cout << " Global memory: " << props.totalGlobalMem << "\n";
+ std::cout << " Shared memory: " << props.sharedMemPerBlock << "\n";
+ std::cout << " Constant memory: " << props.totalConstMem << "\n";
+ std::cout << " Block registers: " << props.regsPerBlock << "\n";
+
+ std::cout << " Warp size: " << props.warpSize << "\n";
+ std::cout << " Threads per block: " << props.maxThreadsPerBlock << "\n";
+ std::cout << " Max block dimensions: [ " << props.maxThreadsDim[0] << ", " << props.maxThreadsDim[1] << ", " << props.maxThreadsDim[2] << " ]" << "\n";
+ std::cout << " Max grid dimensions: [ " << props.maxGridSize[0] << ", " << props.maxGridSize[1] << ", " << props.maxGridSize[2] << " ]" << "\n";
+ std::cout << "\n";
+
+ return 0;
+}
+