Feature: Passing arguments to NVCC compiler (#26)

* Add option to give nvcc extra arguments

* Add test for nvcc options that changes c++ dialect from c++17 to c++14

* Add make and the english language pack to devcontainer to be able to build the documentation

* Update documentation config to automatically import the current version of the package

* Document new --compiler-args argument

* Improve tests coverage by testing for bad arguments and the error output during a failed compilation

* Add IPython to docs requirements to allow the __version__ import for readthedocs env

* Change devcontainer base image to have the latest CUDA toolkit

* Mock the nsight compute tool with a bash script

* Add test to compile with opencv

* Add new page to documentation that contains a new notebook that explains compiling with external libraries

* Add autodocstring vscode extension to devcontainer

* Add function that modifies the default profiler/compiler arguments to allow reusing them in multiple magic command calls

* Update pylint exceptions

* Update contributing instructions

* Change version from 1.0.3 to 1.1.0 due to adding features in a backward-compatible manner

* Install latest CUDA toolkit on the test runner to pass the OpenCV compilation test

* Install opencv in test runner and update code coverage install

* Add CUDA bin to PATH in test and coverage runners

* Add cuda bin to path variable in .bashrc

* Update way to set environment variable PATH in github action

* Change devcontainer base image back to ubuntu:22.04 to match the environment from the test runner
This commit is contained in:
Cosmin Ștefan Ciocan
2024-02-12 17:29:26 +01:00
committed by GitHub
parent 5cd225851b
commit 781ff5b76b
19 changed files with 424 additions and 51 deletions
+20 -6
View File
@@ -1,15 +1,29 @@
FROM ubuntu
FROM ubuntu:22.04
ARG VENV_PATH=/opt/dev-venv
ENV VENV_ACTIVATE=${VENV_PATH}/bin/activate
ENV DEBIAN_FRONTEND="noninteractive"
# install the latest CUDA toolkit (https://developer.nvidia.com/cuda-downloads)
RUN apt update
RUN apt install -y python3.10-venv nvidia-cuda-toolkit gcc vim git
RUN apt install -y wget
RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
RUN dpkg -i cuda-keyring_1.1-1_all.deb
RUN apt update
RUN apt -y install cuda-toolkit-12-3
RUN echo "PATH=\"\$PATH:/usr/local/cuda/bin\"" >> ~/.bashrc
# the mkdir command bypasses a profiler error, which allows us to run it with
# host code only to at least check that the profiler parameters are correctly
# provided; without this line, some tests will fail
RUN mkdir -p /usr/lib/x86_64-linux-gnu/nsight-compute/sections
# install OpenCV to test compilation with external libraries
RUN apt install -y libopencv-dev pkg-config
# make & language-pack-en are for documentation
RUN apt install -y \
gcc \
git \
language-pack-en \
make \
python3.10-venv \
vim
# we create the virtualenv here so that the devcontainer.json setting
# python.defaultInterpreterPath can be used to find it; if we do it in the
+4 -2
View File
@@ -16,10 +16,12 @@
"ms-python.isort",
"ms-python.flake8",
"ms-python.black-formatter",
"ryanluker.vscode-coverage-gutters"
"ryanluker.vscode-coverage-gutters",
"njpwerner.autodocstring"
],
"settings": {
"python.defaultInterpreterPath": "/opt/dev-venv/bin/python"
"python.defaultInterpreterPath": "/opt/dev-venv/bin/python",
"autoDocstring.docstringFormat": "google-notypes"
}
}
}
+22 -9
View File
@@ -27,14 +27,19 @@ jobs:
with:
python-version: ${{ matrix.python-version }}
# the mkdir command bypasses a profiler error, which allows us to run it
# with host code only to at least check that the profiler parameters are
# correctly provided
- name: Install CUDA tools
- name: Install CUDA toolkit
run: |
sudo apt update
sudo apt install nvidia-cuda-toolkit
sudo mkdir -p /usr/lib/x86_64-linux-gnu/nsight-compute/sections
sudo apt install -y wget
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
sudo dpkg -i cuda-keyring_1.1-1_all.deb
sudo apt update
sudo apt -y install cuda-toolkit-12-3
echo "PATH=$PATH:/usr/local/cuda/bin" >> $GITHUB_ENV
- name: Install OpenCV
run: |
sudo apt install -y libopencv-dev pkg-config
- name: Install Python dependencies
run: |
@@ -65,11 +70,19 @@ jobs:
with:
python-version: "3.10"
- name: Install CUDA tools
- name: Install CUDA toolkit
run: |
sudo apt update
sudo apt install nvidia-cuda-toolkit
sudo mkdir -p /usr/lib/x86_64-linux-gnu/nsight-compute/sections
sudo apt install -y wget
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
sudo dpkg -i cuda-keyring_1.1-1_all.deb
sudo apt update
sudo apt -y install cuda-toolkit-12-3
echo "PATH=$PATH:/usr/local/cuda/bin" >> $GITHUB_ENV
- name: Install OpenCV
run: |
sudo apt install -y libopencv-dev pkg-config
- name: Install Python dependencies
run: |
+7 -5
View File
@@ -46,6 +46,7 @@ Here are just a few of the things that nvcc4jupyter does well:
- [Easily run CUDA C++ code](https://nvcc4jupyter.readthedocs.io/en/latest/usage.html#hello-world)
- [Profile your code with NVIDIA Nsight Compute](https://nvcc4jupyter.readthedocs.io/en/latest/usage.html#profiling)
- [Compile your code with external libraries (e.g. OpenCV)](https://nvcc4jupyter.readthedocs.io/en/latest/notebooks.html#compiling-with-external-libraries)
- [Share code between different programs in the same notebook / split your code into multiple files for improved readability](https://nvcc4jupyter.readthedocs.io/en/latest/usage.html#groups)
## Install
@@ -88,13 +89,14 @@ The official documentation is hosted on [readthedocs](https://nvcc4jupyter.readt
## Contributing
Install the package with the development dependencies:
The recommended setup for development is using the devcontainer in GitHub
Codespaces or locally in VSCode.
If not using the devcontainer you need to install the package with the
development dependencies and install the pre-commit hook before commiting any
changes:
```bash
pip install .[dev]
```
As a developer, make sure you install the pre-commit hook before commiting any changes:
```bash
pre-commit install
```
+1
View File
@@ -1,2 +1,3 @@
sphinx==7.1.2
sphinx-rtd-theme==1.3.0rc1
IPython>=8.19.0
+9 -2
View File
@@ -6,11 +6,18 @@
# -- Project information -----------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
import os
import sys
sys.path.append(os.path.join("..", ".."))
from nvcc4jupyter.__init__ import __version__ # noqa: E402
project = "nvcc4jupyter"
copyright = "2024, Andrei Nechaev & Cosmin Stefan Ciocan"
author = "Andrei Nechaev & Cosmin Stefan Ciocan"
release = "1.0.1"
version = "1.0.1"
release = __version__
version = __version__
# -- General configuration ---------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
+1
View File
@@ -10,4 +10,5 @@ which provides CUDA capable GPUs with the CUDA toolkit already installed.
:caption: Contents:
usage
notebooks
magics
+21 -4
View File
@@ -21,24 +21,40 @@ Usage
- ``%%cuda``: Compile and run this cell.
- ``%%cuda -p``: Also runs the Nsight Compute profiler.
- ``%%cuda -p -a "<SPACE SEPARATED PROFILER ARGS>"``: Also runs the Nsight Compute profiler.
- ``%%cude -c "<SPACE SEPARATED COMPILER ARGS"``: Passes additional arguments to "nvcc".
- ``%%cuda -t``: Outputs the "timeit" built-in magic results.
Options
-------
.. _timeit:
-t, --timeit
Boolean. If set, returns the output of the "timeit" built-in
ipython magic instead of stdout.
.. _profile:
-p, --profile
Boolean. If set, runs the NVIDIA Nsight Compute profiler whose
output is appended to standard output.
.. _profiler_args:
-a, --profiler-args
String. Optional profiler arguments that can be space separated
by wrapping them in double quotes. See all options here:
`Nsight Compute CLI <https://docs.nvidia.com/nsight-compute/NsightComputeCli/index.html#command-line-options>`_
.. _compiler_args:
-c, --compiler-args
String. Optional compiler arguments that can be space separated
by wrapping them in double quotes. They will be passed to "nvcc".
See all options here:
`NVCC Options <https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#nvcc-command-options>`_
.. note::
If both "\-\-profile" and "\-\-timeit" are used then no profiling is
done.
@@ -47,10 +63,11 @@ Examples
--------
::
# compile, run, and profile the code in the cell with the Nsight
# compute profiler while collecting only metrics from the
# "MemoryWorkloadAnalysis" section.
%%cuda --profile --profiler-args "--section MemoryWorkloadAnalysis"
# compile, run, and profile the code in the cell with the Nsight compute
# profiler while collecting only metrics from the "MemoryWorkloadAnalysis"
# section; also provides the "--optimize 3" option to "nvcc" during
# compilation to optimize host code
%%cuda -p -a "--section MemoryWorkloadAnalysis" -c "--optimize 3"
------
+34
View File
@@ -0,0 +1,34 @@
*********
Notebooks
*********
This page provides a list of useful Jupyter notebooks written with the
**nvcc4jupyter** library.
.. note::
These notebooks are written for Google's Colab, but you may run them in
other environments by installing all expected dependencies. If running in
Colab, make sure to set the runtime type to a GPU instance (at the time of
writing this, T4 is the GPU offered for free by Colab).
------
.. _compiling_with_external_libraries:
Compiling with external libraries
=================================
[`NOTEBOOK <https://colab.research.google.com/drive/1iuY46DCwv4hy3SqDhJgFeO8kgpHnzjTh?usp=sharing>`_]
If you need to compile CUDA C++ code that uses external libraries in the host
code (e.g. OpenCV for reading and writing images to disk) then this section is
for you.
To achieve this, use the :ref:`compiler-args <compiler_args>` option of the
:ref:`cuda <cuda_magic>` magic command to pass the correct compiler options
of the OpenCV library to **nvcc** for it to link the OpenCV code with the
code in your Jupyter cell. Those compiler options can be provided by the
`pkg-config <https://www.freedesktop.org/wiki/Software/pkg-config/>`_ tool.
In the notebook we show how to use OpenCV to load an image, blur it with a CUDA
kernel, and then save it back to disk using OpenCV again.
+44
View File
@@ -255,3 +255,47 @@ Running the cell above will compile and execute the vector addition code in the
SM Active Cycles cycle 383.65
Compute (SM) Throughput % 1.19
----------------------- ------------- ------------
Compiler arguments
------------------
In the same way profiler arguments can be passed to the profiling tool,
compiling arguments can be passed to **nvcc**:
.. code-block:: c++
%cuda_group_run --group "vector_add" --compiler-args "--optimize 3"
Running the cell above will compile and execute the vector addition code in the
"vector_add" group. During compilation, **nvcc** receives the "\-\-optimize"
option which specifies the optimization level for host code.
Set default arguments
---------------------
In the case where you execute multiple magic commands with the same compiler or
profiler arguments you can avoid writing them every time by setting the default
arguments:
.. code-block:: python
from nvcc4jupyter import set_defaults
set_defaults(compiler_args="--optimize 3", profiler_args="--section SpeedOfLight")
The same effect can be achieved by running "set_defaults" once for each config
due to the fact that the default value is not changed if an a value is not
given to the "set_defaults" function.
.. code-block:: python
from nvcc4jupyter import set_defaults
set_defaults(compiler_args="--optimize 3")
set_defaults(profiler_args="--section SpeedOfLight")
Now we can run the following cell without specifying the compiler and profiler
arguments once again.
.. code-block:: c++
%cuda_group_run --group "vector_add" --profile
+2 -1
View File
@@ -2,6 +2,7 @@
nvcc4jupyter: CUDA C++ plugin for Jupyter Notebook
"""
from .parsers import set_defaults # noqa: F401
from .plugin import NVCCPlugin, load_ipython_extension # noqa: F401
__version__ = "1.0.3"
__version__ = "1.1.0"
+49 -1
View File
@@ -3,6 +3,39 @@ Parsers for the CUDA magic commands.
"""
import argparse
from typing import Callable, Optional
_default_profiler_args: str = ""
_default_compiler_args: str = ""
def set_defaults(
compiler_args: Optional[str] = None, profiler_args: Optional[str] = None
) -> None:
"""
Set the default values for various arguments of the magic commands. These
values will be used if the user does not explicitly provide those arguments
to override this behaviour on a cell by cell basis.
Args:
compiler_args: If not None, this value becomes the new default compiler
config. Defaults to "".
profiler_args: If not None, this value becomes the new default profiler
config. Defaults to "".
"""
# pylint: disable=global-statement
global _default_compiler_args
global _default_profiler_args
if compiler_args is not None:
_default_compiler_args = compiler_args
if profiler_args is not None:
_default_profiler_args = profiler_args
def str_to_lambda(arg: str) -> Callable[[], str]:
"""Convert argparse string to lambda"""
return lambda: arg
def get_parser_cuda() -> argparse.ArgumentParser:
@@ -18,7 +51,22 @@ def get_parser_cuda() -> argparse.ArgumentParser:
)
parser.add_argument("-t", "--timeit", action="store_true")
parser.add_argument("-p", "--profile", action="store_true")
parser.add_argument("-a", "--profiler-args", type=str, default="")
# --profiler-args and --compiler-args values are lambda functions to allow
# changing the default value at runtime
parser.add_argument(
"-a",
"--profiler-args",
type=str_to_lambda,
default=lambda: _default_profiler_args,
)
parser.add_argument(
"-c",
"--compiler-args",
type=str_to_lambda,
default=lambda: _default_compiler_args,
)
return parser
+15 -14
View File
@@ -87,7 +87,10 @@ class NVCCPlugin(Magics):
shutil.rmtree(group_dirpath)
def _compile(
self, group_name: str, executable_fname: str = DEFAULT_EXEC_FNAME
self,
group_name: str,
executable_fname: str = DEFAULT_EXEC_FNAME,
compiler_args: str = "",
) -> str:
"""
Compiles all source files in a given group together with all source
@@ -97,6 +100,7 @@ class NVCCPlugin(Magics):
group_name: The name of the source file group to be compiled.
executable_fname: The output executable file name. Defaults to
"cuda_exec.out".
compiler_args: The optional "nvcc" compiler arguments.
Raises:
RuntimeError: If the group does not exist or if does not have any
@@ -121,18 +125,12 @@ class NVCCPlugin(Magics):
executable_fpath = os.path.join(group_dirpath, executable_fname)
args = [
"nvcc",
"-I" + shared_dirpath + "," + group_dirpath,
]
args = ["nvcc"]
args.extend(compiler_args.split())
args.append("-I" + shared_dirpath + "," + group_dirpath)
args.extend(source_files)
args.extend(
[
"-o",
executable_fpath,
"-Wno-deprecated-gpu-targets",
]
)
args.extend(["-o", executable_fpath, "-Wno-deprecated-gpu-targets"])
subprocess.check_output(args, stderr=subprocess.STDOUT)
return executable_fpath
@@ -188,12 +186,15 @@ class NVCCPlugin(Magics):
self, group_name: str, args: argparse.Namespace
) -> str:
try:
exec_fpath = self._compile(group_name)
exec_fpath = self._compile(
group_name=group_name,
compiler_args=args.compiler_args(),
)
output = self._run(
exec_fpath=exec_fpath,
timeit=args.timeit,
profile=args.profile,
profiler_args=args.profiler_args,
profiler_args=args.profiler_args(),
)
except subprocess.CalledProcessError as e:
output = e.output.decode("utf8")
+2 -2
View File
@@ -286,6 +286,6 @@ deprecated-modules="optparse,tkinter.tix"
[tool.pylint.'EXCEPTIONS']
overgeneral-exceptions= [
"BaseException",
"Exception"
"builtins.BaseException",
"builtins.Exception"
]
+47
View File
@@ -0,0 +1,47 @@
#include <cstdlib>
#include <iostream>
#include <set>
#include <string>
#include <iterator>
#include <tuple>
struct S {
int n;
std::string s;
float d;
bool operator<(const S& rhs) const
{
// compares n to rhs.n,
// then s to rhs.s,
// then d to rhs.d
return std::tie(n, s, d) < std::tie(rhs.n, rhs.s, rhs.d);
}
};
int main()
{
std::set<S> mySet;
// pre C++17:
{
S value{42, "Test", 3.14};
std::set<S>::iterator iter;
bool inserted;
// unpacks the return val of insert into iter and inserted
std::tie(iter, inserted) = mySet.insert(value);
if (inserted)
std::cout << "Value was inserted\n";
}
// with C++17:
{
S value{100, "abc", 100.0};
const auto [iter, inserted] = mySet.insert(value);
if (inserted)
std::cout << "Value(" << iter->n << ", " << iter->s << ", ...) was inserted" << "\n";
}
}
+8
View File
@@ -0,0 +1,8 @@
#include <opencv2/core.hpp>
#include <iostream>
int main(int argc, char** argv)
{
std::cout << cv::getBuildInformation() << std::endl;
return 0;
}
+16 -1
View File
@@ -27,10 +27,25 @@ def fixtures_path(tests_path):
return os.path.join(tests_path, "fixtures")
@pytest.fixture(scope="session")
def scripts_path(fixtures_path: str):
return os.path.join(fixtures_path, "scripts")
@pytest.fixture(scope="session")
def compiler_cpp_17_fpath(fixtures_path: str):
return os.path.join(fixtures_path, "compiler", "cpp_17.cu")
@pytest.fixture(scope="session")
def compiler_opencv_fpath(fixtures_path: str):
return os.path.join(fixtures_path, "compiler", "opencv.cu")
@pytest.fixture(scope="session")
def sample_magic_cu_line():
# fmt: off
return '--profile --profiler-args "--metrics l1tex__t_sectors_pipe_lsu_mem_global_op_ld.sum"' # noqa: E501
return '--profile --profiler-args "--metrics l1tex__t_sectors_pipe_lsu_mem_global_op_ld.sum" --compiler-args "--optimize 3"' # noqa: E501
# fmt: on
Vendored Executable
+7
View File
@@ -0,0 +1,7 @@
#!/bin/bash
# this is a mock of nsight compute cli tool that just executes the program
# given as the last argument
"${@: -1}"
echo "==WARNING== No kernels were profiled"
+115 -4
View File
@@ -3,10 +3,12 @@ import math
import os
import re
import shutil
import subprocess
from typing import List
import pytest
from nvcc4jupyter.parsers import get_parser_cuda, set_defaults
from nvcc4jupyter.plugin import NVCCPlugin
@@ -36,11 +38,19 @@ def copy_source_to_group(
return destination_fpath
@pytest.fixture(autouse=True, scope="session")
def before_all(scripts_path: str):
os.environ["PATH"] = scripts_path + os.pathsep + os.environ["PATH"]
@pytest.fixture(autouse=True, scope="function")
def before_each(plugin: NVCCPlugin):
shutil.rmtree(plugin.workdir, ignore_errors=True) # before test
# BEFORE TESTS
set_defaults(compiler_args="", profiler_args="")
shutil.rmtree(plugin.workdir, ignore_errors=True)
yield
pass # after test
# AFTER TESTS
pass
def test_save_source(plugin: NVCCPlugin, sample_cuda_code: str) -> None:
@@ -88,6 +98,62 @@ def test_compile(
plugin._compile(gname)
def test_compile_args(
plugin: NVCCPlugin,
compiler_cpp_17_fpath: str,
):
gname = "test_compile_args"
copy_source_to_group(compiler_cpp_17_fpath, gname, plugin.workdir)
exec_fpath = plugin._compile(gname, compiler_args="--std c++17")
assert os.path.exists(exec_fpath)
# should fail due to the source file having c++ 17 features
with pytest.raises(subprocess.CalledProcessError):
exec_fpath = plugin._compile(gname, compiler_args="--std c++14")
output = plugin._compile_and_run(
group_name=gname,
args=argparse.Namespace(
timeit=False,
profile=True,
profiler_args=lambda: "",
compiler_args=lambda: "--std c++14",
),
)
assert "errors detected in the compilation of" in output
def test_compile_opencv(
plugin: NVCCPlugin,
compiler_opencv_fpath: str,
):
gname = "test_compile_opencv"
copy_source_to_group(compiler_opencv_fpath, gname, plugin.workdir)
# check that "pkg-config" exists
assert subprocess.check_call(["which", "pkg-config"]) == 0
opencv_compile_options = (
subprocess.check_output(
args=["pkg-config", "--cflags", "--libs", "opencv4"]
)
.decode()
.strip()
)
output = plugin._compile_and_run(
group_name=gname,
args=argparse.Namespace(
timeit=False,
profile=True,
profiler_args=lambda: "",
compiler_args=lambda: opencv_compile_options,
),
)
assert "General configuration for OpenCV" in output
def test_run(
plugin: NVCCPlugin,
sample_cuda_fpath: str,
@@ -143,7 +209,13 @@ def test_compile_and_run_multiple_files(
for fpath in multiple_source_fpaths:
copy_source_to_group(fpath, gname, plugin.workdir)
output = plugin._compile_and_run(
gname, argparse.Namespace(timeit=False, profile=True, profiler_args="")
group_name=gname,
args=argparse.Namespace(
timeit=False,
profile=True,
profiler_args=lambda: "",
compiler_args=lambda: "",
),
)
check_profiler_output(output)
@@ -165,7 +237,13 @@ def test_compile_and_run_multiple_files_shared(
else:
copy_source_to_group(fpath, "shared", plugin.workdir)
output = plugin._compile_and_run(
gname, argparse.Namespace(timeit=False, profile=True, profiler_args="")
group_name=gname,
args=argparse.Namespace(
timeit=False,
profile=True,
profiler_args=lambda: "",
compiler_args=lambda: "",
),
)
check_profiler_output(output)
@@ -181,6 +259,29 @@ def test_read_args(plugin: NVCCPlugin):
assert math.isclose(args.b, 0.75)
def test_set_defaults():
parser = get_parser_cuda()
args = parser.parse_args([])
assert args.profiler_args() == ""
assert args.compiler_args() == ""
set_defaults(profiler_args="123")
args = parser.parse_args([])
assert args.profiler_args() == "123"
assert args.compiler_args() == ""
set_defaults(compiler_args="456")
args = parser.parse_args([])
assert args.profiler_args() == "123"
assert args.compiler_args() == "456"
set_defaults(profiler_args="")
args = parser.parse_args([])
assert args.profiler_args() == ""
assert args.compiler_args() == "456"
set_defaults(profiler_args="123")
args = parser.parse_args(["--profiler-args", "789"])
assert args.profiler_args() == "789"
assert args.compiler_args() == "456"
def test_magic_cuda(
capsys,
plugin: NVCCPlugin,
@@ -191,6 +292,16 @@ def test_magic_cuda(
check_profiler_output(capsys.readouterr().out)
def test_magic_cuda_bad_args(
capsys,
plugin: NVCCPlugin,
sample_cuda_code: str,
):
plugin.cuda("--this-is-an-unrecognized-argument", sample_cuda_code)
output = capsys.readouterr().out
assert output.startswith("usage: ")
def test_magic_cuda_group_save(plugin: NVCCPlugin, sample_cuda_code: str):
gname = "test_save_source"
sname = "sample.cu"