diff --git a/.devcontainer/post_create.sh b/.devcontainer/post_create.sh index 15fd069..20d60a0 100644 --- a/.devcontainer/post_create.sh +++ b/.devcontainer/post_create.sh @@ -1,7 +1,7 @@ #!/bin/bash # install developer dependencies -pip install .[dev] +pip install -e .[dev] # make sure the developer uses pre-commit hooks pre-commit install diff --git a/README.md b/README.md index cfdbee2..bc38b90 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ to own a GPU yourself. Here are just a few of the things that nvcc4jupyter does well: - [Easily run CUDA C++ code](https://nvcc4jupyter.readthedocs.io/en/latest/usage.html#hello-world) - - [Profile your code with NVIDIA Nsight Compute](https://nvcc4jupyter.readthedocs.io/en/latest/usage.html#profiling) + - [Profile your code with NVIDIA Nsight Compute or Nsight Systems](https://nvcc4jupyter.readthedocs.io/en/latest/usage.html#profiling) - [Compile your code with external libraries (e.g. OpenCV)](https://nvcc4jupyter.readthedocs.io/en/latest/notebooks.html#compiling-with-external-libraries) - [Share code between different programs in the same notebook / split your code into multiple files for improved readability](https://nvcc4jupyter.readthedocs.io/en/latest/usage.html#groups) @@ -96,7 +96,7 @@ If not using the devcontainer you need to install the package with the development dependencies and install the pre-commit hook before commiting any changes: ```bash -pip install .[dev] +pip install -e .[dev] pre-commit install ``` diff --git a/docs/source/magics.rst b/docs/source/magics.rst index 28a3bf1..dfdedd7 100644 --- a/docs/source/magics.rst +++ b/docs/source/magics.rst @@ -21,7 +21,7 @@ Usage - ``%%cuda``: Compile and run this cell. - ``%%cuda -p``: Also runs the Nsight Compute profiler. - ``%%cuda -p -a ""``: Also runs the Nsight Compute profiler. - - ``%%cude -c "`_ + by wrapping them in double quotes. Will be passed to the profiler selected + by the \-\-profiler option.. See profiler options here: + `Nsight Compute `_ + or `Nsight Systems `_. + +.. _compiler_args: + +-c, --compiler-args + String. Optional compiler arguments that can be space separated + by wrapping them in double quotes. They will be passed to "nvcc". + See all options here: + `NVCC Options `_ + .. _compiler_args: diff --git a/docs/source/usage.rst b/docs/source/usage.rst index 50fe879..efaddaf 100644 --- a/docs/source/usage.rst +++ b/docs/source/usage.rst @@ -225,10 +225,11 @@ Profiling --------- Another important feature of nvcc4jupyter is its integration with the NVIDIA -Nsight Compute profiler, which you need to make sure is installed and its -executable can be found in a directory in your PATH environment variable. +Nsight Compute / NVIDIA Nsight Systems profilers, which you need to make sure +are installed and the executables can be found in a directory in your PATH +environment variable. -In order to use it and provide the profiler with custom arguments, simply run: +To profile using Nsight Compute with custom arguments: .. code-block:: c++ @@ -256,6 +257,27 @@ Running the cell above will compile and execute the vector addition code in the Compute (SM) Throughput % 1.19 ----------------------- ------------- ------------ +To profile using Nsight Systems with custom arguments: + +.. code-block:: c++ + + %cuda_group_run --group "vector_add" --profiler nsys --profile --profiler-args "profile --stats=true" + +Running the cell above will compile and execute the vector addition code in the +"vector_add" group and profile it with Nsight Systems. The output will contain +multiple tables, one of which will look similar to this: + +.. code-block:: + + [5/8] Executing 'cuda_api_sum' stats report + + Time (%) Total Time (ns) Num Calls Avg (ns) Med (ns) Min (ns) Max (ns) StdDev (ns) Name + -------- --------------- --------- ------------- ------------- ----------- ----------- ----------- ---------------------- + 77.3 200,844,276 1 200,844,276.0 200,844,276.0 200,844,276 200,844,276 0.0 cudaMalloc + 22.6 58,594,762 2 29,297,381.0 29,297,381.0 29,153,999 29,440,763 202,772.8 cudaMemcpy + 0.1 305,450 1 305,450.0 305,450.0 305,450 305,450 0.0 cudaLaunchKernel + 0.0 1,970 1 1,970.0 1,970.0 1,970 1,970 0.0 cuModuleGetLoadingMode + Compiler arguments ------------------ diff --git a/nvcc4jupyter/__init__.py b/nvcc4jupyter/__init__.py index 356eb20..87f5779 100644 --- a/nvcc4jupyter/__init__.py +++ b/nvcc4jupyter/__init__.py @@ -2,7 +2,7 @@ nvcc4jupyter: CUDA C++ plugin for Jupyter Notebook """ -from .parsers import set_defaults # noqa: F401 +from .parsers import Profiler, set_defaults # noqa: F401 from .plugin import NVCCPlugin, load_ipython_extension # noqa: F401 __version__ = "1.1.0" diff --git a/nvcc4jupyter/parsers.py b/nvcc4jupyter/parsers.py index a35e49f..fb626c2 100644 --- a/nvcc4jupyter/parsers.py +++ b/nvcc4jupyter/parsers.py @@ -3,14 +3,28 @@ Parsers for the CUDA magic commands. """ import argparse -from typing import Callable, Optional +from enum import Enum +from typing import Callable, Optional, Type, TypeVar + +class Profiler(Enum): + """Choice between Nsight Compute and Nsight Systems profilers.""" + + NCU = "ncu" + NSYS = "nsys" + + +_default_profiler: Profiler = Profiler.NCU _default_profiler_args: str = "" _default_compiler_args: str = "" +T = TypeVar("T") + def set_defaults( - compiler_args: Optional[str] = None, profiler_args: Optional[str] = None + profiler: Optional[Profiler] = None, + compiler_args: Optional[str] = None, + profiler_args: Optional[str] = None, ) -> None: """ Set the default values for various arguments of the magic commands. These @@ -18,17 +32,22 @@ def set_defaults( to override this behaviour on a cell by cell basis. Args: + profiler: If not None, this value becomes the new default profiler. + Defaults to None. compiler_args: If not None, this value becomes the new default compiler - config. Defaults to "". + config. Defaults to None. profiler_args: If not None, this value becomes the new default profiler - config. Defaults to "". + config. Defaults to None. """ # pylint: disable=global-statement + global _default_profiler + if profiler is not None: + _default_profiler = profiler global _default_compiler_args - global _default_profiler_args if compiler_args is not None: _default_compiler_args = compiler_args + global _default_profiler_args if profiler_args is not None: _default_profiler_args = profiler_args @@ -38,6 +57,11 @@ def str_to_lambda(arg: str) -> Callable[[], str]: return lambda: arg +def class_to_lambda(arg: str, cls: Type[T]) -> Callable[[], T]: + """Convert string value to class and then to lambda""" + return lambda: cls(arg) + + def get_parser_cuda() -> argparse.ArgumentParser: """ %%cuda magic command parser. @@ -52,8 +76,14 @@ def get_parser_cuda() -> argparse.ArgumentParser: parser.add_argument("-t", "--timeit", action="store_true") parser.add_argument("-p", "--profile", action="store_true") - # --profiler-args and --compiler-args values are lambda functions to allow + # the type of the following arguments is a lambda lambda function to allow # changing the default value at runtime + parser.add_argument( + "-l", + "--profiler", + type=lambda arg: class_to_lambda(arg, cls=Profiler), + default=lambda: _default_profiler, + ) parser.add_argument( "-a", "--profiler-args", diff --git a/nvcc4jupyter/path_utils.py b/nvcc4jupyter/path_utils.py new file mode 100644 index 0000000..b6cb27a --- /dev/null +++ b/nvcc4jupyter/path_utils.py @@ -0,0 +1,61 @@ +""" +Helper functions relating to file paths. +""" + +import os +from glob import glob +from typing import List, Optional + +CUDA_SEARCH_PATHS: List[str] = [ + "/opt/nvidia/nsight-compute", + "/usr/local/cuda", + "/opt", + "/usr", +] + + +def is_executable(fpath: str) -> bool: + """Check if file exists and is executable""" + return os.path.isfile(fpath) and os.access(fpath, os.X_OK) + + +def which(name: str) -> Optional[str]: + """Find an executable by name by searching the PATH directories""" + for path_dir in os.environ.get("PATH", "").split(os.pathsep): + exec_path = os.path.join(path_dir, name) + if is_executable(exec_path): + return exec_path + return None + + +def find_executable( + name: str, search_paths: Optional[List[str]] = None +) -> Optional[str]: + """ + Find an executable, either by searching in the directories of the PATH + environment variable or, if that did not work, by searching recursively + in directories a list given as parameter. + + Args: + name: The name of the executable to be found. + search_paths: If None, only executables that are available from PATH + will be found. Otherwise, will recursively search these + directories. Defaults to None. + + Returns: + The path to the executable if it is found, and None otherwise. + """ + if search_paths is None: + search_paths = [] + + which_path = which(name) + if which_path is not None: + return which_path + + for search_path in search_paths: + search_path = os.path.abspath(search_path) + search_path = os.path.join(search_path, f"**/{name}") + for exec_path in glob(search_path, recursive=True): + return exec_path + + return None diff --git a/nvcc4jupyter/plugin.py b/nvcc4jupyter/plugin.py index 1da4f63..612d321 100644 --- a/nvcc4jupyter/plugin.py +++ b/nvcc4jupyter/plugin.py @@ -9,13 +9,20 @@ import shutil import subprocess import tempfile import uuid -from typing import List, Optional +from typing import Dict, List, Optional # pylint: disable=import-error from IPython.core.interactiveshell import InteractiveShell from IPython.core.magic import Magics, cell_magic, line_magic, magics_class -from . import parsers +from .parsers import ( + Profiler, + get_parser_cuda, + get_parser_cuda_group_delete, + get_parser_cuda_group_run, + get_parser_cuda_group_save, +) +from .path_utils import CUDA_SEARCH_PATHS, find_executable DEFAULT_EXEC_FNAME = "cuda_exec.out" SHARED_GROUP_NAME = "shared" @@ -37,14 +44,19 @@ class NVCCPlugin(Magics): super().__init__(shell) self.shell: InteractiveShell # type hint not provided by parent class - self.parser_cuda = parsers.get_parser_cuda() - self.parser_cuda_group_save = parsers.get_parser_cuda_group_save() - self.parser_cuda_group_delete = parsers.get_parser_cuda_group_delete() - self.parser_cuda_group_run = parsers.get_parser_cuda_group_run() + self.parser_cuda = get_parser_cuda() + self.parser_cuda_group_save = get_parser_cuda_group_save() + self.parser_cuda_group_delete = get_parser_cuda_group_delete() + self.parser_cuda_group_run = get_parser_cuda_group_run() self.workdir = tempfile.mkdtemp() print(f'Source files will be saved in "{self.workdir}".') + self.profiler_paths: Dict[Profiler, Optional[str]] = { + Profiler.NCU: None, + Profiler.NSYS: None, + } + def _save_source( self, source_name: str, source_code: str, group_name: str ) -> None: @@ -135,11 +147,42 @@ class NVCCPlugin(Magics): return executable_fpath - def _run( + def _get_profiler_path(self, profiler: Profiler) -> str: + """ + Get the path of the executable of a given profiling tool. Searches + the directories of the PATH environment variable and some extra + directories where CUDA is usually installed. + + Args: + profiler: The profiler whose executable should be found. + + Raises: + RuntimeError: If the profiler executable could not be found. + + Returns: + The file path of the executable. + """ + profiler_path = self.profiler_paths[profiler] + if profiler_path is not None: + return profiler_path + + profiler_path = find_executable(profiler.value, CUDA_SEARCH_PATHS) + if profiler_path is None: + raise RuntimeError( + f'Could not find the "{profiler.value}" profiling tool.' + " Consider searching for where it is installed and adding its" + " directory to the PATH environment variable." + ) + + self.profiler_paths[profiler] = profiler_path + return profiler_path + + def _run( # pylint: disable=too-many-arguments self, exec_fpath: str, timeit: bool = False, profile: bool = False, + profiler: Profiler = Profiler.NCU, profiler_args: str = "", ) -> str: """ @@ -150,8 +193,9 @@ class NVCCPlugin(Magics): timeit: If True, returns the result of the "timeit" magic instead of the standard output of the CUDA process. Defaults to False. profile: If True, the executable is profiled with NVIDIA Nsight - Compute profiling tool and its output is added to stdout. - Defaults to False. + Compute or NVIDIA Nsight Systems and the profiling output is + added to stdout. Defaults to False. + profiler: The profiling tool to use. profiler_args: The profiler arguments used to customize the information gathered by it and its overall behaviour. Defaults to an empty string. @@ -173,7 +217,8 @@ class NVCCPlugin(Magics): else: run_args = [] if profile: - run_args.extend(["ncu"] + profiler_args.split()) + profiler_path = self._get_profiler_path(profiler) + run_args.extend([profiler_path] + profiler_args.split()) run_args.append(exec_fpath) output = subprocess.check_output( run_args, stderr=subprocess.STDOUT @@ -194,6 +239,7 @@ class NVCCPlugin(Magics): exec_fpath=exec_fpath, timeit=args.timeit, profile=args.profile, + profiler=args.profiler(), profiler_args=args.profiler_args(), ) except subprocess.CalledProcessError as e: diff --git a/pyproject.toml b/pyproject.toml index 2bc6d1d..e70fe45 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -69,6 +69,7 @@ exclude_lines = [ [tool.isort] profile = "black" +src_paths = ["nvcc4jupyter"] # tells isort where to find local modules to not consider them 3rd party libraries [tool.bandit] exclude_dirs = ["build","dist","tests","scripts"] @@ -82,7 +83,8 @@ skips = ["B101", "B311", "B404", "B603"] [tool.black] line-length = 79 fast = true -experimental-string-processing = true +preview = true +enable-unstable-feature = ["string_processing"] [tool.coverage.run] branch = true diff --git a/tests/fixtures/fixtures.py b/tests/fixtures/fixtures.py index ca8248d..ef672a7 100644 --- a/tests/fixtures/fixtures.py +++ b/tests/fixtures/fixtures.py @@ -1,9 +1,11 @@ +import argparse import glob import os import pytest from IPython.core.interactiveshell import InteractiveShell +from nvcc4jupyter.parsers import Profiler from nvcc4jupyter.plugin import NVCCPlugin @@ -70,3 +72,14 @@ def multiple_source_fpaths(fixtures_path: str): pattern_h = os.path.join(fixtures_path, "multiple_files", "*.h") pattern_cu = os.path.join(fixtures_path, "multiple_files", "*.cu") return list(glob.glob(pattern_h)) + list(glob.glob(pattern_cu)) + + +@pytest.fixture(scope="session") +def default_args(): + return argparse.Namespace( + timeit=False, + profile=True, + profiler=lambda: Profiler.NCU, + profiler_args=lambda: "", + compiler_args=lambda: "", + ) diff --git a/tests/fixtures/scripts/ncu b/tests/fixtures/scripts/ncu index 4d059e5..1ad31cb 100755 --- a/tests/fixtures/scripts/ncu +++ b/tests/fixtures/scripts/ncu @@ -1,7 +1,7 @@ #!/bin/bash +echo "[NCU]" + # this is a mock of nsight compute cli tool that just executes the program # given as the last argument "${@: -1}" - -echo "==WARNING== No kernels were profiled" diff --git a/tests/fixtures/scripts/nsys b/tests/fixtures/scripts/nsys new file mode 100755 index 0000000..50365c8 --- /dev/null +++ b/tests/fixtures/scripts/nsys @@ -0,0 +1,7 @@ +#!/bin/bash + +echo "[NSYS]" + +# this is a mock of nsight systems cli tool that just executes the program +# given as the last argument +"${@: -1}" diff --git a/tests/fixtures/scripts/searchforme b/tests/fixtures/scripts/searchforme new file mode 100755 index 0000000..d698fec --- /dev/null +++ b/tests/fixtures/scripts/searchforme @@ -0,0 +1,3 @@ +#!/bin/bash + +echo "This is just used to test the path_utils.find_executable function" diff --git a/tests/test_path_utils.py b/tests/test_path_utils.py new file mode 100644 index 0000000..4969d8f --- /dev/null +++ b/tests/test_path_utils.py @@ -0,0 +1,16 @@ +import os + +from nvcc4jupyter.path_utils import find_executable + + +def test_which(): + assert find_executable("ls") == "/usr/bin/ls" + + +def test_find_executable(fixtures_path: str): + exec_path = find_executable("searchforme", [fixtures_path]) + assert exec_path is not None + + exec_dir, exec_fname = os.path.split(exec_path) + assert exec_fname == "searchforme" + assert os.path.basename(exec_dir) == "scripts" diff --git a/tests/test_plugin.py b/tests/test_plugin.py index 4c6120b..3875bd1 100644 --- a/tests/test_plugin.py +++ b/tests/test_plugin.py @@ -1,29 +1,26 @@ -import argparse import math import os import re import shutil import subprocess +from argparse import ArgumentParser, Namespace +from copy import deepcopy from typing import List import pytest -from nvcc4jupyter.parsers import get_parser_cuda, set_defaults +from nvcc4jupyter.parsers import Profiler, get_parser_cuda, set_defaults from nvcc4jupyter.plugin import NVCCPlugin -def check_profiler_output(output: str): - # the profiler output will be a line of "Hello World!" along with some - # warning lines which start with "==WARNING==" +def check_profiler_output(output: str, profiler: str = "[NCU]"): + # the output from the profiler will first be a line containing only + # "[NCU]" or "[NSYS]" depending on what profiler was used and another + # line containing the string "Hello World!" lines = output.strip().split("\n") - warn_count = 0 - for line in lines: - if not line.startswith("==WARNING=="): - assert line == "Hello World!" - else: - warn_count += 1 - assert warn_count >= 1 - assert warn_count == len(lines) - 1 + assert len(lines) >= 2 + assert lines[0] == profiler + assert lines[1] == "Hello World!" def copy_source_to_group( @@ -46,7 +43,7 @@ def before_all(scripts_path: str): @pytest.fixture(autouse=True, scope="function") def before_each(plugin: NVCCPlugin): # BEFORE TESTS - set_defaults(compiler_args="", profiler_args="") + set_defaults(profiler=Profiler.NCU, compiler_args="", profiler_args="") shutil.rmtree(plugin.workdir, ignore_errors=True) yield # AFTER TESTS @@ -101,6 +98,7 @@ def test_compile( def test_compile_args( plugin: NVCCPlugin, compiler_cpp_17_fpath: str, + default_args: Namespace, ): gname = "test_compile_args" copy_source_to_group(compiler_cpp_17_fpath, gname, plugin.workdir) @@ -112,21 +110,16 @@ def test_compile_args( with pytest.raises(subprocess.CalledProcessError): exec_fpath = plugin._compile(gname, compiler_args="--std c++14") - output = plugin._compile_and_run( - group_name=gname, - args=argparse.Namespace( - timeit=False, - profile=True, - profiler_args=lambda: "", - compiler_args=lambda: "--std c++14", - ), - ) + args = deepcopy(default_args) + args.compiler_args = lambda: "--std c++14" + output = plugin._compile_and_run(group_name=gname, args=args) assert "errors detected in the compilation of" in output def test_compile_opencv( plugin: NVCCPlugin, compiler_opencv_fpath: str, + default_args: Namespace, ): gname = "test_compile_opencv" copy_source_to_group(compiler_opencv_fpath, gname, plugin.workdir) @@ -134,23 +127,14 @@ def test_compile_opencv( # check that "pkg-config" exists assert subprocess.check_call(["which", "pkg-config"]) == 0 + pkg_config_args = ["pkg-config", "--cflags", "--libs", "opencv4"] opencv_compile_options = ( - subprocess.check_output( - args=["pkg-config", "--cflags", "--libs", "opencv4"] - ) - .decode() - .strip() + subprocess.check_output(args=pkg_config_args).decode().strip() ) - output = plugin._compile_and_run( - group_name=gname, - args=argparse.Namespace( - timeit=False, - profile=True, - profiler_args=lambda: "", - compiler_args=lambda: opencv_compile_options, - ), - ) + args = deepcopy(default_args) + args.compiler_args = lambda: opencv_compile_options + output = plugin._compile_and_run(group_name=gname, args=args) assert "General configuration for OpenCV" in output @@ -199,7 +183,9 @@ def test_run_profile(plugin: NVCCPlugin, sample_cuda_fpath: str): def test_compile_and_run_multiple_files( - plugin: NVCCPlugin, multiple_source_fpaths: List[str] + plugin: NVCCPlugin, + multiple_source_fpaths: List[str], + default_args: Namespace, ): """ Compiles and executes 3 cuda source files from @@ -208,20 +194,14 @@ def test_compile_and_run_multiple_files( gname = "test_compile_and_run_multiple_files" for fpath in multiple_source_fpaths: copy_source_to_group(fpath, gname, plugin.workdir) - output = plugin._compile_and_run( - group_name=gname, - args=argparse.Namespace( - timeit=False, - profile=True, - profiler_args=lambda: "", - compiler_args=lambda: "", - ), - ) + output = plugin._compile_and_run(group_name=gname, args=default_args) check_profiler_output(output) def test_compile_and_run_multiple_files_shared( - plugin: NVCCPlugin, multiple_source_fpaths: List[str] + plugin: NVCCPlugin, + multiple_source_fpaths: List[str], + default_args: Namespace, ): """ Compiles and executes 3 cuda source files from @@ -236,20 +216,12 @@ def test_compile_and_run_multiple_files_shared( copy_source_to_group(fpath, gname, plugin.workdir) else: copy_source_to_group(fpath, "shared", plugin.workdir) - output = plugin._compile_and_run( - group_name=gname, - args=argparse.Namespace( - timeit=False, - profile=True, - profiler_args=lambda: "", - compiler_args=lambda: "", - ), - ) + output = plugin._compile_and_run(group_name=gname, args=default_args) check_profiler_output(output) def test_read_args(plugin: NVCCPlugin): - parser = argparse.ArgumentParser() + parser = ArgumentParser() parser.add_argument("-a", type=str, required=True) parser.add_argument("-b", type=float, required=True) args = plugin._read_args( @@ -292,6 +264,18 @@ def test_magic_cuda( check_profiler_output(capsys.readouterr().out) +def test_magic_cuda_set_default_profiler( + capsys, + plugin: NVCCPlugin, + sample_cuda_code: str, + sample_magic_cu_line: str, +): + # set the default profiler to Nsight Systems + set_defaults(profiler=Profiler.NSYS) + plugin.cuda(sample_magic_cu_line, sample_cuda_code) + check_profiler_output(capsys.readouterr().out, profiler="[NSYS]") + + def test_magic_cuda_bad_args( capsys, plugin: NVCCPlugin,