1 files changed, 760 insertions, 0 deletions
diff --git a/meson/mesonbuild/compilers/cuda.py b/meson/mesonbuild/compilers/cuda.py
new file mode 100644
index 000000000..36da833be
--- /dev/null
+++ b/meson/mesonbuild/compilers/cuda.py
@@ -0,0 +1,760 @@
+# Copyright 2012-2017 The Meson development team
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import enum
+import os.path
+import string
+import typing as T
+
+from .. import coredata
+from .. import mlog
+from ..mesonlib import (
+    EnvironmentException, MachineChoice, Popen_safe, OptionOverrideProxy,
+    is_windows, LibType, OptionKey,
+)
+from .compilers import (Compiler, cuda_buildtype_args, cuda_optimization_args,
+                        cuda_debug_args)
+
+if T.TYPE_CHECKING:
+    from ..build import BuildTarget
+    from ..coredata import KeyedOptionDictType
+    from ..dependencies import Dependency
+    from ..environment import Environment  # noqa: F401
+    from ..envconfig import MachineInfo
+    from ..linkers import DynamicLinker
+    from ..programs import ExternalProgram
+
+
+class _Phase(enum.Enum):
+
+    COMPILER = 'compiler'
+    LINKER = 'linker'
+
+
+class CudaCompiler(Compiler):
+
+    LINKER_PREFIX = '-Xlinker='
+    language = 'cuda'
+
+    # NVCC flags taking no arguments.
+    _FLAG_PASSTHRU_NOARGS = {
+        # NVCC --long-option,                   NVCC -short-option              CUDA Toolkit 11.2.1 Reference
+        '--objdir-as-tempdir',                  '-objtemp',                     # 4.2.1.2
+        '--generate-dependency-targets',        '-MP',                          # 4.2.1.12
+        '--allow-unsupported-compiler',         '-allow-unsupported-compiler',  # 4.2.1.14
+        '--link',                                                               # 4.2.2.1
+        '--lib',                                '-lib',                         # 4.2.2.2
+        '--device-link',                        '-dlink',                       # 4.2.2.3
+        '--device-c',                           '-dc',                          # 4.2.2.4
+        '--device-w',                           '-dw',                          # 4.2.2.5
+        '--cuda',                               '-cuda',                        # 4.2.2.6
+        '--compile',                            '-c',                           # 4.2.2.7
+        '--fatbin',                             '-fatbin',                      # 4.2.2.8
+        '--cubin',                              '-cubin',                       # 4.2.2.9
+        '--ptx',                                '-ptx',                         # 4.2.2.10
+        '--preprocess',                         '-E',                           # 4.2.2.11
+        '--generate-dependencies',              '-M',                           # 4.2.2.12
+        '--generate-nonsystem-dependencies',    '-MM',                          # 4.2.2.13
+        '--generate-dependencies-with-compile', '-MD',                          # 4.2.2.14
+        '--generate-nonsystem-dependencies-with-compile', '-MMD',               # 4.2.2.15
+        '--run',                                                                # 4.2.2.16
+        '--profile',                            '-pg',                          # 4.2.3.1
+        '--debug',                              '-g',                           # 4.2.3.2
+        '--device-debug',                       '-G',                           # 4.2.3.3
+        '--extensible-whole-program',           '-ewp',                         # 4.2.3.4
+        '--generate-line-info',                 '-lineinfo',                    # 4.2.3.5
+        '--dlink-time-opt',                     '-dlto',                        # 4.2.3.8
+        '--no-exceptions',                      '-noeh',                        # 4.2.3.11
+        '--shared',                             '-shared',                      # 4.2.3.12
+        '--no-host-device-initializer-list',    '-nohdinitlist',                # 4.2.3.15
+        '--expt-relaxed-constexpr',             '-expt-relaxed-constexpr',      # 4.2.3.16
+        '--extended-lambda',                    '-extended-lambda',             # 4.2.3.17
+        '--expt-extended-lambda',               '-expt-extended-lambda',        # 4.2.3.18
+        '--m32',                                '-m32',                         # 4.2.3.20
+        '--m64',                                '-m64',                         # 4.2.3.21
+        '--forward-unknown-to-host-compiler',   '-forward-unknown-to-host-compiler', # 4.2.5.1
+        '--forward-unknown-to-host-linker',     '-forward-unknown-to-host-linker',   # 4.2.5.2
+        '--dont-use-profile',                   '-noprof',                      # 4.2.5.3
+        '--dryrun',                             '-dryrun',                      # 4.2.5.5
+        '--verbose',                            '-v',                           # 4.2.5.6
+        '--keep',                               '-keep',                        # 4.2.5.7
+        '--save-temps',                         '-save-temps',                  # 4.2.5.9
+        '--clean-targets',                      '-clean',                       # 4.2.5.10
+        '--no-align-double',                                                    # 4.2.5.16
+        '--no-device-link',                     '-nodlink',                     # 4.2.5.17
+        '--allow-unsupported-compiler',         '-allow-unsupported-compiler',  # 4.2.5.18
+        '--use_fast_math',                      '-use_fast_math',               # 4.2.7.7
+        '--extra-device-vectorization',         '-extra-device-vectorization',  # 4.2.7.12
+        '--compile-as-tools-patch',             '-astoolspatch',                # 4.2.7.13
+        '--keep-device-functions',              '-keep-device-functions',       # 4.2.7.14
+        '--disable-warnings',                   '-w',                           # 4.2.8.1
+        '--source-in-ptx',                      '-src-in-ptx',                  # 4.2.8.2
+        '--restrict',                           '-restrict',                    # 4.2.8.3
+        '--Wno-deprecated-gpu-targets',         '-Wno-deprecated-gpu-targets',  # 4.2.8.4
+        '--Wno-deprecated-declarations',        '-Wno-deprecated-declarations', # 4.2.8.5
+        '--Wreorder',                           '-Wreorder',                    # 4.2.8.6
+        '--Wdefault-stream-launch',             '-Wdefault-stream-launch',      # 4.2.8.7
+        '--Wext-lambda-captures-this',          '-Wext-lambda-captures-this',   # 4.2.8.8
+        '--display-error-number',               '-err-no',                      # 4.2.8.10
+        '--resource-usage',                     '-res-usage',                   # 4.2.8.14
+        '--help',                               '-h',                           # 4.2.8.15
+        '--version',                            '-V',                           # 4.2.8.16
+        '--list-gpu-code',                      '-code-ls',                     # 4.2.8.20
+        '--list-gpu-arch',                      '-arch-ls',                     # 4.2.8.21
+    }
+    # Dictionary of NVCC flags taking either one argument or a comma-separated list.
+    # Maps --long to -short options, because the short options are more GCC-like.
+    _FLAG_LONG2SHORT_WITHARGS = {
+        '--output-file':                        '-o',                           # 4.2.1.1
+        '--pre-include':                        '-include',                     # 4.2.1.3
+        '--library':                            '-l',                           # 4.2.1.4
+        '--define-macro':                       '-D',                           # 4.2.1.5
+        '--undefine-macro':                     '-U',                           # 4.2.1.6
+        '--include-path':                       '-I',                           # 4.2.1.7
+        '--system-include':                     '-isystem',                     # 4.2.1.8
+        '--library-path':                       '-L',                           # 4.2.1.9
+        '--output-directory':                   '-odir',                        # 4.2.1.10
+        '--dependency-output':                  '-MF',                          # 4.2.1.11
+        '--compiler-bindir':                    '-ccbin',                       # 4.2.1.13
+        '--archiver-binary':                    '-arbin',                       # 4.2.1.15
+        '--cudart':                             '-cudart',                      # 4.2.1.16
+        '--cudadevrt':                          '-cudadevrt',                   # 4.2.1.17
+        '--libdevice-directory':                '-ldir',                        # 4.2.1.18
+        '--target-directory':                   '-target-dir',                  # 4.2.1.19
+        '--optimization-info':                  '-opt-info',                    # 4.2.3.6
+        '--optimize':                           '-O',                           # 4.2.3.7
+        '--ftemplate-backtrace-limit':          '-ftemplate-backtrace-limit',   # 4.2.3.9
+        '--ftemplate-depth':                    '-ftemplate-depth',             # 4.2.3.10
+        '--x':                                  '-x',                           # 4.2.3.13
+        '--std':                                '-std',                         # 4.2.3.14
+        '--machine':                            '-m',                           # 4.2.3.19
+        '--compiler-options':                   '-Xcompiler',                   # 4.2.4.1
+        '--linker-options':                     '-Xlinker',                     # 4.2.4.2
+        '--archive-options':                    '-Xarchive',                    # 4.2.4.3
+        '--ptxas-options':                      '-Xptxas',                      # 4.2.4.4
+        '--nvlink-options':                     '-Xnvlink',                     # 4.2.4.5
+        '--threads':                            '-t',                           # 4.2.5.4
+        '--keep-dir':                           '-keep-dir',                    # 4.2.5.8
+        '--run-args':                           '-run-args',                    # 4.2.5.11
+        '--input-drive-prefix':                 '-idp',                         # 4.2.5.12
+        '--dependency-drive-prefix':            '-ddp',                         # 4.2.5.13
+        '--drive-prefix':                       '-dp',                          # 4.2.5.14
+        '--dependency-target-name':             '-MT',                          # 4.2.5.15
+        '--default-stream':                     '-default-stream',              # 4.2.6.1
+        '--gpu-architecture':                   '-arch',                        # 4.2.7.1
+        '--gpu-code':                           '-code',                        # 4.2.7.2
+        '--generate-code':                      '-gencode',                     # 4.2.7.3
+        '--relocatable-device-code':            '-rdc',                         # 4.2.7.4
+        '--entries':                            '-e',                           # 4.2.7.5
+        '--maxrregcount':                       '-maxrregcount',                # 4.2.7.6
+        '--ftz':                                '-ftz',                         # 4.2.7.8
+        '--prec-div':                           '-prec-div',                    # 4.2.7.9
+        '--prec-sqrt':                          '-prec-sqrt',                   # 4.2.7.10
+        '--fmad':                               '-fmad',                        # 4.2.7.11
+        '--Werror':                             '-Werror',                      # 4.2.8.9
+        '--diag-error':                         '-diag-error',                  # 4.2.8.11
+        '--diag-suppress':                      '-diag-suppress',               # 4.2.8.12
+        '--diag-warn':                          '-diag-warn',                   # 4.2.8.13
+        '--options-file':                       '-optf',                        # 4.2.8.17
+        '--time':                               '-time',                        # 4.2.8.18
+        '--qpp-config':                         '-qpp-config',                  # 4.2.8.19
+    }
+    # Reverse map -short to --long options.
+    _FLAG_SHORT2LONG_WITHARGS = {v:k for k,v in _FLAG_LONG2SHORT_WITHARGS.items()}
+
+    def __init__(self, exelist: T.List[str], version: str, for_machine: MachineChoice,
+                 is_cross: bool, exe_wrapper: T.Optional['ExternalProgram'],
+                 host_compiler: Compiler, info: 'MachineInfo',
+                 linker: T.Optional['DynamicLinker'] = None,
+                 full_version: T.Optional[str] = None):
+        super().__init__(exelist, version, for_machine, info, linker=linker, full_version=full_version, is_cross=is_cross)
+        self.exe_wrapper = exe_wrapper
+        self.host_compiler = host_compiler
+        self.base_options = host_compiler.base_options
+        self.id = 'nvcc'
+        self.warn_args = {level: self._to_host_flags(flags) for level, flags in host_compiler.warn_args.items()}
+
+    @classmethod
+    def _shield_nvcc_list_arg(cls, arg: str, listmode: bool=True) -> str:
+        r"""
+        Shield an argument against both splitting by NVCC's list-argument
+        parse logic, and interpretation by any shell.
+
+        NVCC seems to consider every comma , that is neither escaped by \ nor inside
+        a double-quoted string a split-point. Single-quotes do not provide protection
+        against splitting; In fact, after splitting they are \-escaped. Unfortunately,
+        double-quotes don't protect against shell expansion. What follows is a
+        complex dance to accomodate everybody.
+        """
+
+        SQ = "'"
+        DQ = '"'
+        CM = ","
+        BS = "\\"
+        DQSQ = DQ+SQ+DQ
+        quotable = set(string.whitespace+'"$`\\')
+
+        if CM not in arg or not listmode:
+            if SQ not in arg:
+                # If any of the special characters "$`\ or whitespace are present, single-quote.
+                # Otherwise return bare.
+                if set(arg).intersection(quotable):
+                    return SQ+arg+SQ
+                else:
+                    return arg # Easy case: no splits, no quoting.
+            else:
+                # There are single quotes. Double-quote them, and single-quote the
+                # strings between them.
+                l = [cls._shield_nvcc_list_arg(s) for s in arg.split(SQ)]
+                l = sum([[s, DQSQ] for s in l][:-1], [])  # Interleave l with DQSQs
+                return ''.join(l)
+        else:
+            # A comma is present, and list mode was active.
+            # We apply (what we guess is) the (primitive) NVCC splitting rule:
+            l = ['']
+            instring = False
+            argit = iter(arg)
+            for c in argit:
+                if   c == CM and not instring:
+                    l.append('')
+                elif c == DQ:
+                    l[-1] += c
+                    instring = not instring
+                elif c == BS:
+                    try:
+                        l[-1] += next(argit)
+                    except StopIteration:
+                        break
+                else:
+                    l[-1] += c
+
+            # Shield individual strings, without listmode, then return them with
+            # escaped commas between them.
+            l = [cls._shield_nvcc_list_arg(s, listmode=False) for s in l]
+            return r'\,'.join(l)
+
+    @classmethod
+    def _merge_flags(cls, flags: T.List[str]) -> T.List[str]:
+        r"""
+        The flags to NVCC gets exceedingly verbose and unreadable when too many of them
+        are shielded with -Xcompiler. Merge consecutive -Xcompiler-wrapped arguments
+        into one.
+        """
+        if len(flags) <= 1:
+            return flags
+        flagit = iter(flags)
+        xflags = []
+
+        def is_xcompiler_flag_isolated(flag: str) -> bool:
+            return flag == '-Xcompiler'
+        def is_xcompiler_flag_glued(flag: str) -> bool:
+            return flag.startswith('-Xcompiler=')
+        def is_xcompiler_flag(flag: str) -> bool:
+            return is_xcompiler_flag_isolated(flag) or is_xcompiler_flag_glued(flag)
+        def get_xcompiler_val(flag: str, flagit: T.Iterator[str]) -> str:
+            if is_xcompiler_flag_glued(flag):
+                return flag[len('-Xcompiler='):]
+            else:
+                try:
+                    return next(flagit)
+                except StopIteration:
+                    return ""
+
+        ingroup = False
+        for flag in flagit:
+            if not is_xcompiler_flag(flag):
+                ingroup = False
+                xflags.append(flag)
+            elif ingroup:
+                xflags[-1] += ','
+                xflags[-1] += get_xcompiler_val(flag, flagit)
+            elif is_xcompiler_flag_isolated(flag):
+                ingroup = True
+                xflags.append(flag)
+                xflags.append(get_xcompiler_val(flag, flagit))
+            elif is_xcompiler_flag_glued(flag):
+                ingroup = True
+                xflags.append(flag)
+            else:
+                raise ValueError("-Xcompiler flag merging failed, unknown argument form!")
+        return xflags
+
+    @classmethod
+    def _to_host_flags(cls, flags: T.List[str], phase: _Phase = _Phase.COMPILER) -> T.List[str]:
+        """
+        Translate generic "GCC-speak" plus particular "NVCC-speak" flags to NVCC flags.
+
+        NVCC's "short" flags have broad similarities to the GCC standard, but have
+        gratuitous, irritating differences.
+        """
+
+        xflags = []
+        flagit = iter(flags)
+
+        for flag in flagit:
+            # The CUDA Toolkit Documentation, in 4.1. Command Option Types and Notation,
+            # specifies that NVCC does not parse the standard flags as GCC does. It has
+            # its own strategy, to wit:
+            #
+            #     nvcc recognizes three types of command options: boolean options, single
+            #     value options, and list options.
+            #
+            #     Boolean options do not have an argument; they are either specified on a
+            #     command line or not. Single value options must be specified at most once,
+            #     and list options may be repeated. Examples of each of these option types
+            #     are, respectively: --verbose (switch to verbose mode), --output-file
+            #     (specify output file), and --include-path (specify include path).
+            #
+            #     Single value options and list options must have arguments, which must
+            #     follow the name of the option itself by either one of more spaces or an
+            #     equals character. When a one-character short name such as -I, -l, and -L
+            #     is used, the value of the option may also immediately follow the option
+            #     itself without being seperated by spaces or an equal character. The
+            #     individual values of list options may be separated by commas in a single
+            #     instance of the option, or the option may be repeated, or any
+            #     combination of these two cases.
+            #
+            # One strange consequence of this choice is that directory and filenames that
+            # contain commas (',') cannot be passed to NVCC (at least, not as easily as
+            # in GCC). Another strange consequence is that it is legal to supply flags
+            # such as
+            #
+            #     -lpthread,rt,dl,util
+            #     -l pthread,rt,dl,util
+            #     -l=pthread,rt,dl,util
+            #
+            # and each of the above alternatives is equivalent to GCC-speak
+            #
+            #     -lpthread -lrt -ldl -lutil
+            #     -l pthread -l rt -l dl -l util
+            #     -l=pthread -l=rt -l=dl -l=util
+            #
+            # *With the exception of commas in the name*, GCC-speak for these list flags
+            # is a strict subset of NVCC-speak, so we passthrough those flags.
+            #
+            # The -D macro-define flag is documented as somehow shielding commas from
+            # splitting a definition. Balanced parentheses, braces and single-quotes
+            # around the comma are not sufficient, but balanced double-quotes are. The
+            # shielding appears to work with -l, -I, -L flags as well, for instance.
+            #
+            # Since our goal is to replicate GCC-speak as much as possible, we check for
+            # commas in all list-arguments and shield them with double-quotes. We make
+            # an exception for -D (where this would be value-changing) and -U (because
+            # it isn't possible to define a macro with a comma in the name).
+
+            if flag in cls._FLAG_PASSTHRU_NOARGS:
+                xflags.append(flag)
+                continue
+
+
+            # Handle breakup of flag-values into a flag-part and value-part.
+            if   flag[:1] not in '-/':
+                # This is not a flag. It's probably a file input. Pass it through.
+                xflags.append(flag)
+                continue
+            elif flag[:1] == '/':
+                # This is ambiguously either an MVSC-style /switch or an absolute path
+                # to a file. For some magical reason the following works acceptably in
+                # both cases.
+                wrap = '"' if ',' in flag else ''
+                xflags.append(f'-X{phase.value}={wrap}{flag}{wrap}')
+                continue
+            elif len(flag) >= 2 and flag[0] == '-' and flag[1] in 'IDULlmOxmte':
+                # This is a single-letter short option. These options (with the
+                # exception of -o) are allowed to receive their argument with neither
+                # space nor = sign before them. Detect and separate them in that event.
+                if   flag[2:3] == '':            # -I something
+                    try:
+                        val = next(flagit)
+                    except StopIteration:
+                        pass
+                elif flag[2:3] == '=':           # -I=something
+                    val = flag[3:]
+                else:                            # -Isomething
+                    val = flag[2:]
+                flag = flag[:2]                  # -I
+            elif flag in cls._FLAG_LONG2SHORT_WITHARGS or \
+                 flag in cls._FLAG_SHORT2LONG_WITHARGS:
+                # This is either -o or a multi-letter flag, and it is receiving its
+                # value isolated.
+                try:
+                    val = next(flagit)           # -o something
+                except StopIteration:
+                    pass
+            elif flag.split('=',1)[0] in cls._FLAG_LONG2SHORT_WITHARGS or \
+                 flag.split('=',1)[0] in cls._FLAG_SHORT2LONG_WITHARGS:
+                # This is either -o or a multi-letter flag, and it is receiving its
+                # value after an = sign.
+                flag, val = flag.split('=',1)    # -o=something
+            else:
+                # This is a flag, and it's foreign to NVCC.
+                #
+                # We do not know whether this GCC-speak flag takes an isolated
+                # argument. Assuming it does not (the vast majority indeed don't),
+                # wrap this argument in an -Xcompiler flag and send it down to NVCC.
+                if   flag == '-ffast-math':
+                    xflags.append('-use_fast_math')
+                    xflags.append('-Xcompiler='+flag)
+                elif flag == '-fno-fast-math':
+                    xflags.append('-ftz=false')
+                    xflags.append('-prec-div=true')
+                    xflags.append('-prec-sqrt=true')
+                    xflags.append('-Xcompiler='+flag)
+                elif flag == '-freciprocal-math':
+                    xflags.append('-prec-div=false')
+                    xflags.append('-Xcompiler='+flag)
+                elif flag == '-fno-reciprocal-math':
+                    xflags.append('-prec-div=true')
+                    xflags.append('-Xcompiler='+flag)
+                else:
+                    xflags.append('-Xcompiler='+cls._shield_nvcc_list_arg(flag))
+                    # The above should securely handle GCC's -Wl, -Wa, -Wp, arguments.
+                continue
+
+
+            assert val is not None  # Should only trip if there is a missing argument.
+
+
+            # Take care of the various NVCC-supported flags that need special handling.
+            flag = cls._FLAG_LONG2SHORT_WITHARGS.get(flag,flag)
+
+            if   flag in {'-include','-isystem','-I','-L','-l'}:
+                # These flags are known to GCC, but list-valued in NVCC. They potentially
+                # require double-quoting to prevent NVCC interpreting the flags as lists
+                # when GCC would not have done so.
+                #
+                # We avoid doing this quoting for -D to avoid redefining macros and for
+                # -U because it isn't possible to define a macro with a comma in the name.
+                # -U with comma arguments is impossible in GCC-speak (and thus unambiguous
+                #in NVCC-speak, albeit unportable).
+                if len(flag) == 2:
+                    xflags.append(flag+cls._shield_nvcc_list_arg(val))
+                else:
+                    xflags.append(flag)
+                    xflags.append(cls._shield_nvcc_list_arg(val))
+            elif flag == '-O':
+                # Handle optimization levels GCC knows about that NVCC does not.
+                if   val == 'fast':
+                    xflags.append('-O3')
+                    xflags.append('-use_fast_math')
+                    xflags.append('-Xcompiler')
+                    xflags.append(flag+val)
+                elif val in {'s', 'g', 'z'}:
+                    xflags.append('-Xcompiler')
+                    xflags.append(flag+val)
+                else:
+                    xflags.append(flag+val)
+            elif flag in {'-D', '-U', '-m', '-t'}:
+                xflags.append(flag+val)       # For style, keep glued.
+            elif flag in {'-std'}:
+                xflags.append(flag+'='+val)   # For style, keep glued.
+            else:
+                xflags.append(flag)
+                xflags.append(val)
+
+        return cls._merge_flags(xflags)
+
+    def needs_static_linker(self) -> bool:
+        return False
+
+    def thread_link_flags(self, environment: 'Environment') -> T.List[str]:
+        return self._to_host_flags(self.host_compiler.thread_link_flags(environment), _Phase.LINKER)
+
+    def sanity_check(self, work_dir: str, env: 'Environment') -> None:
+        mlog.debug('Sanity testing ' + self.get_display_language() + ' compiler:', ' '.join(self.exelist))
+        mlog.debug('Is cross compiler: %s.' % str(self.is_cross))
+
+        sname = 'sanitycheckcuda.cu'
+        code = r'''
+        #include <cuda_runtime.h>
+        #include <stdio.h>
+
+        __global__ void kernel (void) {}
+
+        int main(void){
+            struct cudaDeviceProp prop;
+            int count, i;
+            cudaError_t ret = cudaGetDeviceCount(&count);
+            if(ret != cudaSuccess){
+                fprintf(stderr, "%d\n", (int)ret);
+            }else{
+                for(i=0;i<count;i++){
+                    if(cudaGetDeviceProperties(&prop, i) == cudaSuccess){
+                        fprintf(stdout, "%d.%d\n", prop.major, prop.minor);
+                    }
+                }
+            }
+            fflush(stderr);
+            fflush(stdout);
+            return 0;
+        }
+        '''
+        binname = sname.rsplit('.', 1)[0]
+        binname += '_cross' if self.is_cross else ''
+        source_name = os.path.join(work_dir, sname)
+        binary_name = os.path.join(work_dir, binname + '.exe')
+        with open(source_name, 'w', encoding='utf-8') as ofile:
+            ofile.write(code)
+
+        # The Sanity Test for CUDA language will serve as both a sanity test
+        # and a native-build GPU architecture detection test, useful later.
+        #
+        # For this second purpose, NVCC has very handy flags, --run and
+        # --run-args, that allow one to run an application with the
+        # environment set up properly. Of course, this only works for native
+        # builds; For cross builds we must still use the exe_wrapper (if any).
+        self.detected_cc = ''
+        flags = []
+
+        # Disable warnings, compile with statically-linked runtime for minimum
+        # reliance on the system.
+        flags += ['-w', '-cudart', 'static', source_name]
+
+        # Use the -ccbin option, if available, even during sanity checking.
+        # Otherwise, on systems where CUDA does not support the default compiler,
+        # NVCC becomes unusable.
+        flags += self.get_ccbin_args(env.coredata.options)
+
+        # If cross-compiling, we can't run the sanity check, only compile it.
+        if self.is_cross and self.exe_wrapper is None:
+            # Linking cross built apps is painful. You can't really
+            # tell if you should use -nostdlib or not and for example
+            # on OSX the compiler binary is the same but you need
+            # a ton of compiler flags to differentiate between
+            # arm and x86_64. So just compile.
+            flags += self.get_compile_only_args()
+        flags += self.get_output_args(binary_name)
+
+        # Compile sanity check
+        cmdlist = self.exelist + flags
+        mlog.debug('Sanity check compiler command line: ', ' '.join(cmdlist))
+        pc, stdo, stde = Popen_safe(cmdlist, cwd=work_dir)
+        mlog.debug('Sanity check compile stdout: ')
+        mlog.debug(stdo)
+        mlog.debug('-----\nSanity check compile stderr:')
+        mlog.debug(stde)
+        mlog.debug('-----')
+        if pc.returncode != 0:
+            raise EnvironmentException(f'Compiler {self.name_string()} can not compile programs.')
+
+        # Run sanity check (if possible)
+        if self.is_cross:
+            if self.exe_wrapper is None:
+                return
+            else:
+                cmdlist = self.exe_wrapper.get_command() + [binary_name]
+        else:
+            cmdlist = self.exelist + ['--run', '"' + binary_name + '"']
+        mlog.debug('Sanity check run command line: ', ' '.join(cmdlist))
+        pe, stdo, stde = Popen_safe(cmdlist, cwd=work_dir)
+        mlog.debug('Sanity check run stdout: ')
+        mlog.debug(stdo)
+        mlog.debug('-----\nSanity check run stderr:')
+        mlog.debug(stde)
+        mlog.debug('-----')
+        pe.wait()
+        if pe.returncode != 0:
+            raise EnvironmentException(f'Executables created by {self.language} compiler {self.name_string()} are not runnable.')
+
+        # Interpret the result of the sanity test.
+        # As mentioned above, it is not only a sanity test but also a GPU
+        # architecture detection test.
+        if stde == '':
+            self.detected_cc = stdo
+        else:
+            mlog.debug('cudaGetDeviceCount() returned ' + stde)
+
+    def has_header_symbol(self, hname: str, symbol: str, prefix: str,
+                          env: 'Environment', *,
+                          extra_args: T.Optional[T.List[str]] = None,
+                          dependencies: T.Optional[T.List['Dependency']] = None) -> T.Tuple[bool, bool]:
+        if extra_args is None:
+            extra_args = []
+        fargs = {'prefix': prefix, 'header': hname, 'symbol': symbol}
+        # Check if it's a C-like symbol
+        t = '''{prefix}
+        #include <{header}>
+        int main(void) {{
+            /* If it's not defined as a macro, try to use as a symbol */
+            #ifndef {symbol}
+                {symbol};
+            #endif
+            return 0;
+        }}'''
+        found, cached = self.compiles(t.format_map(fargs), env, extra_args=extra_args, dependencies=dependencies)
+        if found:
+            return True, cached
+        # Check if it's a class or a template
+        t = '''{prefix}
+        #include <{header}>
+        using {symbol};
+        int main(void) {{
+            return 0;
+        }}'''
+        return self.compiles(t.format_map(fargs), env, extra_args=extra_args, dependencies=dependencies)
+
+    def get_options(self) -> 'KeyedOptionDictType':
+        opts = super().get_options()
+        std_key      = OptionKey('std',      machine=self.for_machine, lang=self.language)
+        ccbindir_key = OptionKey('ccbindir', machine=self.for_machine, lang=self.language)
+        opts.update({
+            std_key:      coredata.UserComboOption('C++ language standard to use with CUDA',
+                                                   ['none', 'c++03', 'c++11', 'c++14', 'c++17'], 'none'),
+            ccbindir_key: coredata.UserStringOption('CUDA non-default toolchain directory to use (-ccbin)',
+                                                    ''),
+        })
+        return opts
+
+    def _to_host_compiler_options(self, options: 'KeyedOptionDictType') -> 'KeyedOptionDictType':
+        """
+        Convert an NVCC Option set to a host compiler's option set.
+        """
+
+        # We must strip the -std option from the host compiler option set, as NVCC has
+        # its own -std flag that may not agree with the host compiler's.
+        host_options = {key: options.get(key, opt) for key, opt in self.host_compiler.get_options().items()}
+        std_key = OptionKey('std', machine=self.for_machine, lang=self.host_compiler.language)
+        overrides = {std_key: 'none'}
+        return OptionOverrideProxy(overrides, host_options)
+
+    def get_option_compile_args(self, options: 'KeyedOptionDictType') -> T.List[str]:
+        args = self.get_ccbin_args(options)
+        # On Windows, the version of the C++ standard used by nvcc is dictated by
+        # the combination of CUDA version and MSVC version; the --std= is thus ignored
+        # and attempting to use it will result in a warning: https://stackoverflow.com/a/51272091/741027
+        if not is_windows():
+            key = OptionKey('std', machine=self.for_machine, lang=self.language)
+            std = options[key]
+            if std.value != 'none':
+                args.append('--std=' + std.value)
+
+        return args + self._to_host_flags(self.host_compiler.get_option_compile_args(self._to_host_compiler_options(options)))
+
+    def get_option_link_args(self, options: 'KeyedOptionDictType') -> T.List[str]:
+        args = self.get_ccbin_args(options)
+        return args + self._to_host_flags(self.host_compiler.get_option_link_args(self._to_host_compiler_options(options)), _Phase.LINKER)
+
+    def get_soname_args(self, env: 'Environment', prefix: str, shlib_name: str,
+                        suffix: str, soversion: str,
+                        darwin_versions: T.Tuple[str, str],
+                        is_shared_module: bool) -> T.List[str]:
+        return self._to_host_flags(self.host_compiler.get_soname_args(
+            env, prefix, shlib_name, suffix, soversion, darwin_versions,
+            is_shared_module), _Phase.LINKER)
+
+    def get_compile_only_args(self) -> T.List[str]:
+        return ['-c']
+
+    def get_no_optimization_args(self) -> T.List[str]:
+        return ['-O0']
+
+    def get_optimization_args(self, optimization_level: str) -> T.List[str]:
+        # alternatively, consider simply redirecting this to the host compiler, which would
+        # give us more control over options like "optimize for space" (which nvcc doesn't support):
+        # return self._to_host_flags(self.host_compiler.get_optimization_args(optimization_level))
+        return cuda_optimization_args[optimization_level]
+
+    def sanitizer_compile_args(self, value: str) -> T.List[str]:
+        return self._to_host_flags(self.host_compiler.sanitizer_compile_args(value))
+
+    def sanitizer_link_args(self, value: str) -> T.List[str]:
+        return self._to_host_flags(self.host_compiler.sanitizer_link_args(value))
+
+    def get_debug_args(self, is_debug: bool) -> T.List[str]:
+        return cuda_debug_args[is_debug]
+
+    def get_werror_args(self) -> T.List[str]:
+        return ['-Werror=cross-execution-space-call,deprecated-declarations,reorder']
+
+    def get_warn_args(self, level: str) -> T.List[str]:
+        return self.warn_args[level]
+
+    def get_buildtype_args(self, buildtype: str) -> T.List[str]:
+        # nvcc doesn't support msvc's "Edit and Continue" PDB format; "downgrade" to
+        # a regular PDB to avoid cl's warning to that effect (D9025 : overriding '/ZI' with '/Zi')
+        host_args = ['/Zi' if arg == '/ZI' else arg for arg in self.host_compiler.get_buildtype_args(buildtype)]
+        return cuda_buildtype_args[buildtype] + self._to_host_flags(host_args)
+
+    def get_include_args(self, path: str, is_system: bool) -> T.List[str]:
+        if path == '':
+            path = '.'
+        return ['-isystem=' + path] if is_system else ['-I' + path]
+
+    def get_compile_debugfile_args(self, rel_obj: str, pch: bool = False) -> T.List[str]:
+        return self._to_host_flags(self.host_compiler.get_compile_debugfile_args(rel_obj, pch))
+
+    def get_link_debugfile_args(self, targetfile: str) -> T.List[str]:
+        return self._to_host_flags(self.host_compiler.get_link_debugfile_args(targetfile), _Phase.LINKER)
+
+    def get_depfile_suffix(self) -> str:
+        return 'd'
+
+    def get_buildtype_linker_args(self, buildtype: str) -> T.List[str]:
+        return self._to_host_flags(self.host_compiler.get_buildtype_linker_args(buildtype), _Phase.LINKER)
+
+    def build_rpath_args(self, env: 'Environment', build_dir: str, from_dir: str,
+                         rpath_paths: str, build_rpath: str,
+                         install_rpath: str) -> T.Tuple[T.List[str], T.Set[bytes]]:
+        (rpath_args, rpath_dirs_to_remove) = self.host_compiler.build_rpath_args(
+            env, build_dir, from_dir, rpath_paths, build_rpath, install_rpath)
+        return (self._to_host_flags(rpath_args, _Phase.LINKER), rpath_dirs_to_remove)
+
+    def linker_to_compiler_args(self, args: T.List[str]) -> T.List[str]:
+        return args
+
+    def get_pic_args(self) -> T.List[str]:
+        return self._to_host_flags(self.host_compiler.get_pic_args())
+
+    def compute_parameters_with_absolute_paths(self, parameter_list: T.List[str],
+                                               build_dir: str) -> T.List[str]:
+        return []
+
+    def get_output_args(self, target: str) -> T.List[str]:
+        return ['-o', target]
+
+    def get_std_exe_link_args(self) -> T.List[str]:
+        return self._to_host_flags(self.host_compiler.get_std_exe_link_args(), _Phase.LINKER)
+
+    def find_library(self, libname: str, env: 'Environment', extra_dirs: T.List[str],
+                     libtype: LibType = LibType.PREFER_SHARED) -> T.Optional[T.List[str]]:
+        return ['-l' + libname] # FIXME
+
+    def get_crt_compile_args(self, crt_val: str, buildtype: str) -> T.List[str]:
+        return self._to_host_flags(self.host_compiler.get_crt_compile_args(crt_val, buildtype))
+
+    def get_crt_link_args(self, crt_val: str, buildtype: str) -> T.List[str]:
+        # nvcc defaults to static, release version of msvc runtime and provides no
+        # native option to override it; override it with /NODEFAULTLIB
+        host_link_arg_overrides = []
+        host_crt_compile_args = self.host_compiler.get_crt_compile_args(crt_val, buildtype)
+        if any(arg in ['/MDd', '/MD', '/MTd'] for arg in host_crt_compile_args):
+            host_link_arg_overrides += ['/NODEFAULTLIB:LIBCMT.lib']
+        return self._to_host_flags(host_link_arg_overrides + self.host_compiler.get_crt_link_args(crt_val, buildtype), _Phase.LINKER)
+
+    def get_target_link_args(self, target: 'BuildTarget') -> T.List[str]:
+        return self._to_host_flags(super().get_target_link_args(target), _Phase.LINKER)
+
+    def get_dependency_compile_args(self, dep: 'Dependency') -> T.List[str]:
+        return self._to_host_flags(super().get_dependency_compile_args(dep))
+
+    def get_dependency_link_args(self, dep: 'Dependency') -> T.List[str]:
+        return self._to_host_flags(super().get_dependency_link_args(dep), _Phase.LINKER)
+
+    def get_ccbin_args(self, options: 'KeyedOptionDictType') -> T.List[str]:
+        key = OptionKey('ccbindir', machine=self.for_machine, lang=self.language)
+        ccbindir = options[key].value
+        if isinstance(ccbindir, str) and ccbindir != '':
+            return [self._shield_nvcc_list_arg('-ccbin='+ccbindir, False)]
+        else:
+            return []