aboutsummaryrefslogtreecommitdiffstats
path: root/meson/mesonbuild/dependencies/cuda.py
blob: 6bc0f05e300fb612cb4e9f3e44ee503007ff2074 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
# Copyright 2013-2019 The Meson development team

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

#     http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import glob
import re
import os
import typing as T
from pathlib import Path

from .. import mesonlib
from .. import mlog
from ..environment import detect_cpu_family
from .base import DependencyException, SystemDependency


if T.TYPE_CHECKING:
    from ..environment import Environment
    from ..compilers import Compiler

TV_ResultTuple = T.Tuple[T.Optional[str], T.Optional[str], bool]

class CudaDependency(SystemDependency):

    supported_languages = ['cuda', 'cpp', 'c'] # see also _default_language

    def __init__(self, environment: 'Environment', kwargs: T.Dict[str, T.Any]) -> None:
        compilers = environment.coredata.compilers[self.get_for_machine_from_kwargs(kwargs)]
        language = self._detect_language(compilers)
        if language not in self.supported_languages:
            raise DependencyException(f'Language \'{language}\' is not supported by the CUDA Toolkit. Supported languages are {self.supported_languages}.')

        super().__init__('cuda', environment, kwargs, language=language)
        self.lib_modules: T.Dict[str, T.List[str]] = {}
        self.requested_modules = self.get_requested(kwargs)
        if 'cudart' not in self.requested_modules:
            self.requested_modules = ['cudart'] + self.requested_modules

        (self.cuda_path, self.version, self.is_found) = self._detect_cuda_path_and_version()
        if not self.is_found:
            return

        if not os.path.isabs(self.cuda_path):
            raise DependencyException(f'CUDA Toolkit path must be absolute, got \'{self.cuda_path}\'.')

        # nvcc already knows where to find the CUDA Toolkit, but if we're compiling
        # a mixed C/C++/CUDA project, we still need to make the include dir searchable
        if self.language != 'cuda' or len(compilers) > 1:
            self.incdir = os.path.join(self.cuda_path, 'include')
            self.compile_args += [f'-I{self.incdir}']

        if self.language != 'cuda':
            arch_libdir = self._detect_arch_libdir()
            self.libdir = os.path.join(self.cuda_path, arch_libdir)
            mlog.debug('CUDA library directory is', mlog.bold(self.libdir))
        else:
            self.libdir = None

        self.is_found = self._find_requested_libraries()

    @classmethod
    def _detect_language(cls, compilers: T.Dict[str, 'Compiler']) -> str:
        for lang in cls.supported_languages:
            if lang in compilers:
                return lang
        return list(compilers.keys())[0]

    def _detect_cuda_path_and_version(self) -> TV_ResultTuple:
        self.env_var = self._default_path_env_var()
        mlog.debug('Default path env var:', mlog.bold(self.env_var))

        version_reqs = self.version_reqs
        if self.language == 'cuda':
            nvcc_version = self._strip_patch_version(self.get_compiler().version)
            mlog.debug('nvcc version:', mlog.bold(nvcc_version))
            if version_reqs:
                # make sure nvcc version satisfies specified version requirements
                (found_some, not_found, found) = mesonlib.version_compare_many(nvcc_version, version_reqs)
                if not_found:
                    msg = f'The current nvcc version {nvcc_version} does not satisfy the specified CUDA Toolkit version requirements {version_reqs}.'
                    return self._report_dependency_error(msg, (None, None, False))

            # use nvcc version to find a matching CUDA Toolkit
            version_reqs = [f'={nvcc_version}']
        else:
            nvcc_version = None

        paths = [(path, self._cuda_toolkit_version(path), default) for (path, default) in self._cuda_paths()]
        if version_reqs:
            return self._find_matching_toolkit(paths, version_reqs, nvcc_version)

        defaults = [(path, version) for (path, version, default) in paths if default]
        if defaults:
            return (defaults[0][0], defaults[0][1], True)

        platform_msg = 'set the CUDA_PATH environment variable' if self._is_windows() \
            else 'set the CUDA_PATH environment variable/create the \'/usr/local/cuda\' symbolic link'
        msg = f'Please specify the desired CUDA Toolkit version (e.g. dependency(\'cuda\', version : \'>=10.1\')) or {platform_msg} to point to the location of your desired version.'
        return self._report_dependency_error(msg, (None, None, False))

    def _find_matching_toolkit(self, paths: T.List[TV_ResultTuple], version_reqs: T.List[str], nvcc_version: T.Optional[str]) -> TV_ResultTuple:
        # keep the default paths order intact, sort the rest in the descending order
        # according to the toolkit version
        part_func: T.Callable[[TV_ResultTuple], bool] = lambda t: not t[2]
        defaults_it, rest_it = mesonlib.partition(part_func, paths)
        defaults = list(defaults_it)
        paths = defaults + sorted(rest_it, key=lambda t: mesonlib.Version(t[1]), reverse=True)
        mlog.debug(f'Search paths: {paths}')

        if nvcc_version and defaults:
            default_src = f"the {self.env_var} environment variable" if self.env_var else "the \'/usr/local/cuda\' symbolic link"
            nvcc_warning = 'The default CUDA Toolkit as designated by {} ({}) doesn\'t match the current nvcc version {} and will be ignored.'.format(default_src, os.path.realpath(defaults[0][0]), nvcc_version)
        else:
            nvcc_warning = None

        for (path, version, default) in paths:
            (found_some, not_found, found) = mesonlib.version_compare_many(version, version_reqs)
            if not not_found:
                if not default and nvcc_warning:
                    mlog.warning(nvcc_warning)
                return (path, version, True)

        if nvcc_warning:
            mlog.warning(nvcc_warning)
        return (None, None, False)

    def _default_path_env_var(self) -> T.Optional[str]:
        env_vars = ['CUDA_PATH'] if self._is_windows() else ['CUDA_PATH', 'CUDA_HOME', 'CUDA_ROOT']
        env_vars = [var for var in env_vars if var in os.environ]
        user_defaults = {os.environ[var] for var in env_vars}
        if len(user_defaults) > 1:
            mlog.warning('Environment variables {} point to conflicting toolkit locations ({}). Toolkit selection might produce unexpected results.'.format(', '.join(env_vars), ', '.join(user_defaults)))
        return env_vars[0] if env_vars else None

    def _cuda_paths(self) -> T.List[T.Tuple[str, bool]]:
        return ([(os.environ[self.env_var], True)] if self.env_var else []) \
            + (self._cuda_paths_win() if self._is_windows() else self._cuda_paths_nix())

    def _cuda_paths_win(self) -> T.List[T.Tuple[str, bool]]:
        env_vars = os.environ.keys()
        return [(os.environ[var], False) for var in env_vars if var.startswith('CUDA_PATH_')]

    def _cuda_paths_nix(self) -> T.List[T.Tuple[str, bool]]:
        # include /usr/local/cuda default only if no env_var was found
        pattern = '/usr/local/cuda-*' if self.env_var else '/usr/local/cuda*'
        return [(path, os.path.basename(path) == 'cuda') for path in glob.iglob(pattern)]

    toolkit_version_regex = re.compile(r'^CUDA Version\s+(.*)$')
    path_version_win_regex = re.compile(r'^v(.*)$')
    path_version_nix_regex = re.compile(r'^cuda-(.*)$')
    cudart_version_regex = re.compile(r'#define\s+CUDART_VERSION\s+([0-9]+)')

    def _cuda_toolkit_version(self, path: str) -> str:
        version = self._read_toolkit_version_txt(path)
        if version:
            return version
        version = self._read_cuda_runtime_api_version(path)
        if version:
            return version

        mlog.debug('Falling back to extracting version from path')
        path_version_regex = self.path_version_win_regex if self._is_windows() else self.path_version_nix_regex
        try:
            m = path_version_regex.match(os.path.basename(path))
            if m:
                return m.group(1)
            else:
                mlog.warning(f'Could not detect CUDA Toolkit version for {path}')
        except Exception as e:
            mlog.warning(f'Could not detect CUDA Toolkit version for {path}: {e!s}')

        return '0.0'

    def _read_cuda_runtime_api_version(self, path_str: str) -> T.Optional[str]:
        path = Path(path_str)
        for i in path.rglob('cuda_runtime_api.h'):
            raw = i.read_text(encoding='utf-8')
            m = self.cudart_version_regex.search(raw)
            if not m:
                continue
            try:
                vers_int = int(m.group(1))
            except ValueError:
                continue
            # use // for floor instead of / which produces a float
            major = vers_int // 1000                  # type: int
            minor = (vers_int - major * 1000) // 10   # type: int
            return f'{major}.{minor}'
        return None

    def _read_toolkit_version_txt(self, path: str) -> T.Optional[str]:
        # Read 'version.txt' at the root of the CUDA Toolkit directory to determine the tookit version
        version_file_path = os.path.join(path, 'version.txt')
        try:
            with open(version_file_path, encoding='utf-8') as version_file:
                version_str = version_file.readline() # e.g. 'CUDA Version 10.1.168'
                m = self.toolkit_version_regex.match(version_str)
                if m:
                    return self._strip_patch_version(m.group(1))
        except Exception as e:
            mlog.debug(f'Could not read CUDA Toolkit\'s version file {version_file_path}: {e!s}')

        return None

    @classmethod
    def _strip_patch_version(cls, version: str) -> str:
        return '.'.join(version.split('.')[:2])

    def _detect_arch_libdir(self) -> str:
        arch = detect_cpu_family(self.env.coredata.compilers.host)
        machine = self.env.machines[self.for_machine]
        msg = '{} architecture is not supported in {} version of the CUDA Toolkit.'
        if machine.is_windows():
            libdirs = {'x86': 'Win32', 'x86_64': 'x64'}
            if arch not in libdirs:
                raise DependencyException(msg.format(arch, 'Windows'))
            return os.path.join('lib', libdirs[arch])
        elif machine.is_linux():
            libdirs = {'x86_64': 'lib64', 'ppc64': 'lib', 'aarch64': 'lib64', 'loongarch64': 'lib64'}
            if arch not in libdirs:
                raise DependencyException(msg.format(arch, 'Linux'))
            return libdirs[arch]
        elif machine.is_darwin():
            libdirs = {'x86_64': 'lib64'}
            if arch not in libdirs:
                raise DependencyException(msg.format(arch, 'macOS'))
            return libdirs[arch]
        else:
            raise DependencyException('CUDA Toolkit: unsupported platform.')

    def _find_requested_libraries(self) -> bool:
        all_found = True

        for module in self.requested_modules:
            args = self.clib_compiler.find_library(module, self.env, [self.libdir] if self.libdir else [])
            if args is None:
                self._report_dependency_error(f'Couldn\'t find requested CUDA module \'{module}\'')
                all_found = False
            else:
                mlog.debug(f'Link args for CUDA module \'{module}\' are {args}')
                self.lib_modules[module] = args

        return all_found

    def _is_windows(self) -> bool:
        return self.env.machines[self.for_machine].is_windows()

    @T.overload
    def _report_dependency_error(self, msg: str) -> None: ...

    @T.overload
    def _report_dependency_error(self, msg: str, ret_val: TV_ResultTuple) -> TV_ResultTuple: ...

    def _report_dependency_error(self, msg: str, ret_val: T.Optional[TV_ResultTuple] = None) -> T.Optional[TV_ResultTuple]:
        if self.required:
            raise DependencyException(msg)

        mlog.debug(msg)
        return ret_val

    def log_details(self) -> str:
        module_str = ', '.join(self.requested_modules)
        return 'modules: ' + module_str

    def log_info(self) -> str:
        return self.cuda_path if self.cuda_path else ''

    def get_requested(self, kwargs: T.Dict[str, T.Any]) -> T.List[str]:
        candidates = mesonlib.extract_as_list(kwargs, 'modules')
        for c in candidates:
            if not isinstance(c, str):
                raise DependencyException('CUDA module argument is not a string.')
        return candidates

    def get_link_args(self, language: T.Optional[str] = None, raw: bool = False) -> T.List[str]:
        args = []
        if self.libdir:
            args += self.clib_compiler.get_linker_search_args(self.libdir)
        for lib in self.requested_modules:
            args += self.lib_modules[lib]
        return args