Merge pull request #23 from cosminc98/master

Major rewrite that merges the two plugin versions, adds tests, CI/CD pipeline
This commit is contained in:
Cosmin Ștefan Ciocan
2024-01-23 14:35:32 +01:00
committed by GitHub
41 changed files with 2049 additions and 249 deletions
+19
View File
@@ -0,0 +1,19 @@
FROM ubuntu
ARG VENV_PATH=/opt/dev-venv
ENV VENV_ACTIVATE=${VENV_PATH}/bin/activate
RUN apt update
RUN apt install -y python3.10-venv nvidia-cuda-toolkit gcc vim git
# the mkdir command bypasses a profiler error, which allows us to run it with
# host code only to at least check that the profiler parameters are correctly
# provided; without this line, some tests will fail
RUN mkdir -p /usr/lib/x86_64-linux-gnu/nsight-compute/sections
# we create the virtualenv here so that the devcontainer.json setting
# python.defaultInterpreterPath can be used to find it; if we do it in the
# post_create.sh script, the virtualenv will not be loaded and features like
# pylance, black, isort, etc. will not work
RUN python3.10 -m venv ${VENV_PATH}
RUN echo "source ${VENV_ACTIVATE}" >> ~/.bashrc
+26
View File
@@ -0,0 +1,26 @@
{
"name": "Python Environment",
"build": {
"dockerfile": "Dockerfile",
"context": ".."
},
"postCreateCommand": "bash .devcontainer/post_create.sh",
"customizations": {
"vscode": {
"extensions": [
"editorconfig.editorconfig",
"ms-azuretools.vscode-docker",
"ms-python.python",
"ms-python.vscode-pylance",
"ms-python.pylint",
"ms-python.isort",
"ms-python.flake8",
"ms-python.black-formatter",
"ryanluker.vscode-coverage-gutters"
],
"settings": {
"python.defaultInterpreterPath": "/opt/dev-venv/bin/python"
}
}
}
}
+7
View File
@@ -0,0 +1,7 @@
#!/bin/bash
# install developer dependencies
pip install .[dev]
# make sure the developer uses pre-commit hooks
pre-commit install
+5
View File
@@ -0,0 +1,5 @@
[flake8]
max-line-length = 79
select = F,E,W,B,B901,B902,B903
exclude = .eggs,.git,.tox,nssm,obj,out,packages,pywin32,tests,swagger_client
ignore = E722,B001,W503,E203
+22
View File
@@ -0,0 +1,22 @@
# Same as `code-quality-pr.yaml` but triggered on commit to master branch
# and runs on all files (instead of only the changed ones)
name: Code Quality Master
on:
push:
branches: [master]
jobs:
code-quality:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v2
- name: Run pre-commits
uses: pre-commit/action@v2.0.3
+36
View File
@@ -0,0 +1,36 @@
# This workflow finds which files were changed, prints them,
# and runs `pre-commit` on those files.
# Inspired by the sktime library:
# https://github.com/alan-turing-institute/sktime/blob/main/.github/workflows/test.yml
name: Code Quality PR
on:
pull_request:
branches: [master, "release/*", "dev"]
jobs:
code-quality:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v2
- name: Find modified files
id: file_changes
uses: trilom/file-changes-action@v1.2.4
with:
output: " "
- name: List modified files
run: echo '${{ steps.file_changes.outputs.files}}'
- name: Run pre-commits
uses: pre-commit/action@v2.0.3
with:
extra_args: --files ${{ steps.file_changes.outputs.files}}
+46
View File
@@ -0,0 +1,46 @@
name: Publish Python 🐍 distribution 📦 to PyPI
on: push
jobs:
build:
name: Build distribution 📦
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.x"
- name: Install pypa/build
run: python3 -m pip install build --user
- name: Build a binary wheel and a source tarball
run: python3 -m build
- name: Store the distribution packages
uses: actions/upload-artifact@v3
with:
name: python-package-distributions
path: dist/
publish-to-pypi:
name: >-
Publish Python 🐍 distribution 📦 to PyPI
if: startsWith(github.ref, 'refs/tags/') # only publish to PyPI on tag pushes
needs:
- build
runs-on: ubuntu-latest
environment:
name: pypi
url: https://pypi.org/p/nvcc4jupyter
permissions:
id-token: write
steps:
- name: Download all the dists
uses: actions/download-artifact@v3
with:
name: python-package-distributions
path: dist/
- name: Publish distribution 📦 to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
+86
View File
@@ -0,0 +1,86 @@
name: Tests
on:
push:
branches: [master]
pull_request:
branches: [master, "release/*", "dev"]
jobs:
run_tests_ubuntu:
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: ["ubuntu-latest"]
python-version: ["3.10", "3.11", "3.12"]
timeout-minutes: 20
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
# the mkdir command bypasses a profiler error, which allows us to run it
# with host code only to at least check that the profiler parameters are
# correctly provided
- name: Install CUDA tools
run: |
sudo apt update
sudo apt install nvidia-cuda-toolkit
sudo mkdir -p /usr/lib/x86_64-linux-gnu/nsight-compute/sections
- name: Install Python dependencies
run: |
python -m pip install --upgrade pip
pip install -r tests/requirements.txt
- name: List dependencies
run: |
python -m pip list
- name: Run pytest
run: |
pytest -v
# upload code coverage report
code-coverage:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v2
with:
lfs: "true"
- run: git lfs pull
- name: Set up Python 3.10
uses: actions/setup-python@v2
with:
python-version: "3.10"
- name: Install CUDA tools
run: |
sudo apt update
sudo apt install nvidia-cuda-toolkit
sudo mkdir -p /usr/lib/x86_64-linux-gnu/nsight-compute/sections
- name: Install Python dependencies
run: |
python -m pip install --upgrade pip
pip install -r tests/requirements.txt
pip install pytest-cov[toml]
- name: Run tests and collect coverage
run: pytest --cov nvcc4jupyter
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
+35
View File
@@ -1,2 +1,37 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
# Distribution / packaging
bin/
build/
develop-eggs/
dist/
eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
.tox/
.coverage
.cache
nosetests.xml
coverage.xml
# Virtual Environment
*env*
# Misc
.pytest_cache/
.DS_Store
.idea
+53
View File
@@ -0,0 +1,53 @@
default_language_version:
python: python3
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
hooks:
# list of supported hooks: https://pre-commit.com/hooks.html
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-docstring-first
- id: check-yaml
- id: debug-statements
- id: detect-private-key
- id: check-executables-have-shebangs
- id: check-toml
- id: check-case-conflict
- id: check-added-large-files
# python code formatting
- repo: https://github.com/psf/black
rev: 23.12.1
hooks:
- id: black
args: ["--config", "pyproject.toml"]
# python import sorting
- repo: https://github.com/PyCQA/isort
rev: 5.12.0
hooks:
- id: isort
args: ["--settings-path", "pyproject.toml"]
# python check (PEP8), programming errors and code complexity
- repo: https://github.com/PyCQA/flake8
rev: 7.0.0
hooks:
- id: flake8
args: ["--config", ".flake8"]
# pylint check
- repo: https://github.com/pycqa/pylint
rev: v3.0.3
hooks:
- id: pylint
args: ["--rcfile", "pyproject.toml"]
- repo: https://github.com/PyCQA/bandit
rev: 1.7.6
hooks:
- id: bandit
args: ["-c", "pyproject.toml"]
additional_dependencies: ["bandit[toml]"]
+32
View File
@@ -0,0 +1,32 @@
# .readthedocs.yaml
# Read the Docs configuration file
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
# Required
version: 2
# Set the OS, Python version and other tools you might need
build:
os: ubuntu-22.04
tools:
python: "3.10"
# You can also specify other tool versions:
# nodejs: "19"
# rust: "1.64"
# golang: "1.19"
# Build documentation in the "docs/" directory with Sphinx
sphinx:
configuration: docs/source/conf.py
# Optionally build your docs in additional formats such as PDF and ePub
# formats:
# - pdf
# - epub
# Optional but recommended, declare the Python requirements required
# to build your documentation
# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
python:
install:
- requirements: docs/requirements.txt
+33
View File
@@ -0,0 +1,33 @@
{
"editor.formatOnSave": true,
"editor.formatOnPaste": true,
"files.trimTrailingWhitespace": true,
"files.autoSave": "onFocusChange",
"git.autofetch": true,
"[jsonc]": {
"editor.defaultFormatter": "vscode.json-language-features"
},
"[python]": {
"editor.defaultFormatter": "ms-python.black-formatter",
"editor.formatOnSave": true,
"editor.codeActionsOnSave": {
"source.organizeImports": "explicit"
},
},
"python.defaultInterpreterPath": "/usr/local/bin/python",
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true,
"pylint.args": [
"--rcfile=pyproject.toml"
],
"black-formatter.args": [
"--config=pyproject.toml"
],
"flake8.args": [
"--config",
".flake8"
],
"isort.args": [
"--settings-path=pyproject.toml"
]
}
+22
View File
@@ -0,0 +1,22 @@
MIT License
Copyright (c) 2018-2024 Andrei Nechaev, Cosmin Stefan Ciocan and others
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+90 -24
View File
@@ -1,37 +1,103 @@
## NVCC Plugin for Jupyter notebook
# nvcc4jupyter: CUDA C++ plugin for Jupyter Notebook
### V2 is available
| | |
| --- | --- |
| Testing | ![Python Versions][python-version] [![CI - Test][test-badge]][test-workflow] [![Coverage][coverage-badge]][coverage-results] |
| Code Quality | [![Code style: black][black-badge]][black-project] [![security: bandit][bandit-badge]][bandit-project]|
| Package | [![PyPI Latest Release][pypi-latest-version]][pypi-project-url] [![PyPI Downloads][pypi-downloads]][pypi-project-url] |
V2 brings support of multiple source and header files.
<!-- Testing badges -->
[python-version]: https://img.shields.io/pypi/pyversions/nvcc4jupyter
[test-badge]: https://github.com/cosminc98/nvcc4jupyter/actions/workflows/test.yml/badge.svg
[test-workflow]: https://github.com/cosminc98/nvcc4jupyter/actions/workflows/test.yml
[coverage-badge]: https://codecov.io/github/cosminc98/nvcc4jupyter/coverage.svg?branch=master
[coverage-results]: https://codecov.io/gh/cosminc98/nvcc4jupyter
##### Usage
<!-- Code Quality badges -->
[black-badge]: https://img.shields.io/badge/code%20style-black-000000.svg
[black-project]: https://github.com/ambv/black
[bandit-badge]: https://img.shields.io/badge/security-bandit-yellow.svg
[bandit-project]: https://github.com/PyCQA/bandit
- Install and load extension
```
!pip install git+https://github.com/andreinechaev/nvcc4jupyter.git
%load_ext nvcc_plugin
<!-- Package badges -->
[pypi-project-url]: https://pypi.org/project/nvcc4jupyter/
[pypi-latest-version]: https://img.shields.io/pypi/v/nvcc4jupyter.svg
[pypi-downloads]: https://img.shields.io/pypi/dm/nvcc4jupyter.svg?label=PyPI%20downloads
**nvcc4jupyter** is a Jupyter Notebook plugin that provides cell and line
[magics](https://ipython.readthedocs.io/en/stable/interactive/magics.html)
to allow running CUDA C++ code from a notebook. This is especially
useful when combined with a hosted service such a Google's
[Colab](https://colab.research.google.com/) which provide CUDA capable GPUs
and you can start learning CUDA C++ without having to install anything or even
to own a GPU yourself.
## Table of Contents
- [Main Features](#main-features)
- [Install](#install)
- [Usage](#usage)
- [License](#license)
- [Documentation](#documentation)
- [Contributing](#contributing)
## Main Features
Here are just a few of the things that nvcc4jupyter does well:
- [Easily run CUDA C++ code](https://nvcc4jupyter.readthedocs.io/en/latest/usage.html#hello-world)
- [Profile your code with NVIDIA Nsight Compute](https://nvcc4jupyter.readthedocs.io/en/latest/usage.html#profiling)
- [Share code between different programs in the same notebook / split your code into multiple files for improved readability](https://nvcc4jupyter.readthedocs.io/en/latest/usage.html#groups)
## Install
The installer for the latest released version is available at the [Python
Package Index (PyPI)](https://pypi.org/project/nvcc4jupyter).
```sh
pip install nvcc4jupyter
```
- Mark a cell to be treated as cuda cell
> `%%cuda --name example.cu --compile false`
>> NOTE: The cell must contain either code or comments to be run successfully.
>> It accepts 2 arguments. `-n` | `--name` - which is the name of either CUDA source or Header
>> The name parameter must have extension `.cu` or `.h`
>> Second argument `-c` | `--compile`; default value is `false`. The argument is a flag to specify
>> if the cell will be compiled and run right away or not. It might be usefull if you're playing in
>> the `main` function
## Usage
- To compile and run all CUDA files you need to run
First, load the extension to enable the magic commands:
```
%%cuda_run
# This line just to bypass an exeption and can contain any text
%load_ext nvcc4jupyter
```
- To profile your CUDA kernels using NVIDIA Nsight Compute CLI profiler you need to run
Running a quick CUDA Hello World program:
```c++
%%cuda
#include <stdio.h>
__global__ void hello(){
printf("Hello from block: %u, thread: %u\n", blockIdx.x, threadIdx.x);
}
int main(){
hello<<<2, 2>>>();
cudaDeviceSynchronize();
}
```
%%cu --profile
For more advanced use cases, see [the documentation](https://nvcc4jupyter.readthedocs.io/en/latest/usage.html).
## Documentation
The official documentation is hosted on [readthedocs](https://nvcc4jupyter.readthedocs.io/).
## License
[MIT](LICENSE)
## Contributing
Install the package with the development dependencies:
```bash
pip install .[dev]
```
- You can add options to the profiler. Keep in mind that any argument after "--profiler-args" will be considered as a profiler argument. For example, to select which sections to collect metrics for you need to run
```
%%cu --profile --profiler-args --section SpeedOfLight --section MemoryWorkloadAnalysis --section Occupancy
As a developer, make sure you install the pre-commit hook before commiting any changes:
```bash
pre-commit install
```
<hr>
[Go to Top](#table-of-contents)
-32
View File
@@ -1,32 +0,0 @@
import argparse
def get_argparser():
parser = argparse.ArgumentParser(description='NVCCPlugin params')
parser.add_argument(
'-t',
'--timeit',
action='store_true',
help='If set, returns the output of the "timeit" built-in ipython magic instead of stdout.',
)
parser.add_argument(
'-p',
'--profile',
action='store_true',
help='If set, runs the nvidia nsight compute profiler. Has no effect if used with --timeit.',
)
parser.add_argument(
'-a',
'--profiler-args',
type=str,
nargs=argparse.REMAINDER,
default=[],
help='Extra options that can be passed to the nvidia nsight compute profiler. '
'Must be the last option given to the argument parser so you can pass arguments with dashes.',
)
return parser
def print_out(out: str):
for l in out.split('\n'):
print(l)
+20
View File
@@ -0,0 +1,20 @@
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS ?=
SPHINXBUILD ?= sphinx-build
SOURCEDIR = source
BUILDDIR = build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+35
View File
@@ -0,0 +1,35 @@
@ECHO OFF
pushd %~dp0
REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=source
set BUILDDIR=build
if "%1" == "" goto help
%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
echo.
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
echo.installed, then set the SPHINXBUILD environment variable to point
echo.to the full path of the 'sphinx-build' executable. Alternatively you
echo.may add the Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
echo.http://sphinx-doc.org/
exit /b 1
)
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
goto end
:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
:end
popd
+2
View File
@@ -0,0 +1,2 @@
sphinx==7.1.2
sphinx-rtd-theme==1.3.0rc1
+40
View File
@@ -0,0 +1,40 @@
# Configuration file for the Sphinx documentation builder.
#
# For the full list of built-in configuration values, see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html
# -- Project information -----------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
project = "nvcc4jupyter"
copyright = "2024, Andrei Nechaev & Cosmin Stefan Ciocan"
author = "Andrei Nechaev & Cosmin Stefan Ciocan"
release = "1.0.1"
version = "1.0.1"
# -- General configuration ---------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
extensions = [
"sphinx.ext.duration",
"sphinx.ext.doctest",
"sphinx.ext.autodoc",
"sphinx.ext.autosummary",
"sphinx.ext.intersphinx",
]
intersphinx_mapping = {
"python": ("https://docs.python.org/3/", None),
"sphinx": ("https://www.sphinx-doc.org/en/master/", None),
}
intersphinx_disabled_domains = ["std"]
templates_path = ["_templates"]
exclude_patterns = []
# -- Options for HTML output -------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
html_theme = "sphinx_rtd_theme"
html_static_path = ["_static"]
+13
View File
@@ -0,0 +1,13 @@
Welcome to nvcc4jupyter's documentation!
========================================
This IPython extension allows running CUDA C++ code in Jupyter notebook. This
is especially useful when combined with `Google Colab <https://colab.research.google.com/>`_
which provides CUDA capable GPUs with the CUDA toolkit already installed.
.. toctree::
:maxdepth: 2
:caption: Contents:
usage
magics
+172
View File
@@ -0,0 +1,172 @@
**********
Magics API
**********
.. note::
Arguments for profilers and the nvcc compiler can be passed in double
quotes so they can contain spaces and dashes.
------
.. _cuda_magic:
cuda
====
Magic command that compiles, runs, and profiles CUDA C++ code in the cell.
Usage
-----
- ``%%cuda``: Compile and run this cell.
- ``%%cuda -p``: Also runs the Nsight Compute profiler.
- ``%%cuda -p -a "<SPACE SEPARATED PROFILER ARGS>"``: Also runs the Nsight Compute profiler.
- ``%%cuda -t``: Outputs the "timeit" built-in magic results.
Options
-------
-t, --timeit
Boolean. If set, returns the output of the "timeit" built-in
ipython magic instead of stdout.
-p, --profile
Boolean. If set, runs the NVIDIA Nsight Compute profiler whose
output is appended to standard output.
-a, --profiler-args
String. Optional profiler arguments that can be space separated
by wrapping them in double quotes. See all options here:
`Nsight Compute CLI <https://docs.nvidia.com/nsight-compute/NsightComputeCli/index.html#command-line-options>`_
.. note::
If both "\-\-profile" and "\-\-timeit" are used then no profiling is
done.
Examples
--------
::
# compile, run, and profile the code in the cell with the Nsight
# compute profiler while collecting only metrics from the
# "MemoryWorkloadAnalysis" section.
%%cuda --profile --profiler-args "--section MemoryWorkloadAnalysis"
------
.. _cuda_group_save_magic:
cuda_group_save
===============
Magic command that saves CUDA C++ code in the cell for later
compilation and execution with possibly more source files.
Usage
-----
- ``%%cuda_group_save -n <FILENAME> -g <GROUPNAME>``: Save the code in the current cell to a group of source files.
Options
-------
-n, --name
String. Required file name of the saved source file. Must have
either the ".cu" or ".h" extension. In order to import a header
file saved with this magic you can simply add '#include "<name>"'.
-g, --group
String. Required group name to which to add the saved source file.
Groups are source files that get compiled together and do not
interact with other groups. This allows you to have multiple
unrelated CUDA programs within the same jupyter notebook. Adding
files to a group named "shared" will make them available to all
other source file groups. One use case for the shared group is for
sharing error handling code which should be present in all CUDA
programs.
Examples
--------
::
# jupyter cell 1
%%cuda_group_save -n "error_handling.h" -g "shared"
<ERROR HANDLING CODE>
# jupyter cell 2
%%cuda_group_save -n "main.cu" -g "example_group"
#include "error_handling.h"
<YOUR CODE HERE>
------
.. _cuda_group_run_magic:
cuda_group_run
==============
Line magic command that compiles, runs, and profiles all source files
in a group.
Usage
-----
- ``%%cuda_group_run -g <GROUPNAME>``: Compiles, runs, and profiles the sources files in the given group.
Options
-------
-g, --group
String. Required group name whose source files should be deleted.
.. note::
All options from the "%%cuda" cell magic are inherited.
Examples
--------
::
# jupyter cell 1
%%cuda_group_save -n "error_handling.h" -g "shared"
<ERROR HANDLING CODE>
# jupyter cell 2
%%cuda_group_save -n "main.cu" -g "example_group"
#include "error_handling.h"
<YOUR CODE HERE>
# jupyter cell 3
%cuda_group_run -g "example_group" --profile
-----
.. _cuda_group_delete_magic:
cuda_group_delete
=================
Line magic command that deletes all source files in a group.
Usage
-----
- ``%%cuda_group_delete -g <GROUPNAME>``: Removes all source files in the given group.
Options
-------
-g, --group
String. Required group name whose source files should be deleted.
Examples
--------
::
# jupyter cell 1
%%cuda_group_save -n "error_handling.h" -g "shared"
<ERROR HANDLING CODE>
# jupyter cell 2 - here we delete the error shared group; in
# practice this would be helpful if you want to overwrite some
# functionality that was defined earlier in the notebook
%cuda_group_delete -g "shared"
+257
View File
@@ -0,0 +1,257 @@
Usage
=====
Installation
------------
To use nvcc4jupyter, first install it using pip:
.. code-block:: console
(venv) $ pip install nvcc4jupyter
Load the Extension
------------------
Now we need to load the IPython extension to be able to use its cell and line
magic commands:
.. code-block::
%load_ext nvcc4jupyter
Hello World
-----------
We will use the :ref:`cuda <cuda_magic>` cell magic command to run a simple
hello world program.
.. code-block:: c++
%%cuda
#include <stdio.h>
__global__ void hello(){
printf("Hello from block: %u, thread: %u\n", blockIdx.x, threadIdx.x);
}
int main(){
hello<<<2, 2>>>();
cudaDeviceSynchronize();
}
Groups
------
Now we will demonstrate a more complex scenario that uses source file groups.
If you want to split your code into multiple source files, either for code reuse
or just to have an easier to read project, you want to use groups. A group of
source files will be compiled together. Because of this, you can include headers
from the same group and use the code defined in other ".cu" files. There is also
a special group named "shared" whose files will be compiled together with all
other groups, which is a great feature for error handling code as we'll show now:
.. code-block:: c++
%%cuda_group_save --group shared --name "error_handling.h"
// error checking macro
#define cudaCheckErrors(msg) \
do { \
cudaError_t __err = cudaGetLastError(); \
if (__err != cudaSuccess) { \
fprintf(stderr, "Fatal error: %s (%s at %s:%d)\n", \
msg, cudaGetErrorString(__err), \
__FILE__, __LINE__); \
fprintf(stderr, "*** FAILED - ABORTING\n"); \
exit(1); \
} \
} while (0)
Now we can use that error handling macro in this vector addition program but
also in other programs that we define in other Jupyter cells:
.. code-block:: c++
%%cuda
#include <stdio.h>
#include "error_handling.h"
const int DSIZE = 4096;
const int block_size = 256;
// vector add kernel: C = A + B
__global__ void vadd(const float *A, const float *B, float *C, int ds){
int idx = threadIdx.x + blockIdx.x * blockDim.x;
if (idx < ds) {
C[idx] = A[idx] + B[idx];
}
}
int main(){
float *h_A, *h_B, *h_C, *d_A, *d_B, *d_C;
// allocate space for vectors in host memory
h_A = new float[DSIZE];
h_B = new float[DSIZE];
h_C = new float[DSIZE];
// initialize vectors in host memory to random values (except for the
// result vector whose values do not matter as they will be overwritten)
for (int i = 0; i < DSIZE; i++) {
h_A[i] = rand()/(float)RAND_MAX;
h_B[i] = rand()/(float)RAND_MAX;
}
// allocate space for vectors in device memory
cudaMalloc(&d_A, DSIZE*sizeof(float));
cudaMalloc(&d_B, DSIZE*sizeof(float));
cudaMalloc(&d_C, DSIZE*sizeof(float));
cudaCheckErrors("cudaMalloc failure"); // error checking
// copy vectors A and B from host to device:
cudaMemcpy(d_A, h_A, DSIZE*sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(d_B, h_B, DSIZE*sizeof(float), cudaMemcpyHostToDevice);
cudaCheckErrors("cudaMemcpy H2D failure");
// launch the vector adding kernel
vadd<<<(DSIZE+block_size-1)/block_size, block_size>>>(d_A, d_B, d_C, DSIZE);
cudaCheckErrors("kernel launch failure");
// wait for the kernel to finish execution
cudaDeviceSynchronize();
cudaCheckErrors("kernel execution failure");
cudaMemcpy(h_C, d_C, DSIZE*sizeof(float), cudaMemcpyDeviceToHost);
cudaCheckErrors("cudaMemcpy D2H failure");
printf("A[0] = %f\n", h_A[0]);
printf("B[0] = %f\n", h_B[0]);
printf("C[0] = %f\n", h_C[0]);
return 0;
}
Above we use the :ref:`cuda <cuda_magic>` magic command which saves the code
in the cell to an anonymous source file group, compiles, and executes that
code. This only allows us to have one source file (besides the ones in the
"shared" group). In order to have multiple source files we need to use the
:ref:`cuda_group_save <cuda_group_save_magic>` and
:ref:`cuda_group_run <cuda_group_run_magic>` magics.
First, we save the vector addition function to its own file:
.. code-block:: c++
%%cuda_group_save --name "vector_add.cu" --group "vector_add"
// vector add kernel: C = A + B
__global__ void vadd(const float *A, const float *B, float *C, int ds){
int idx = threadIdx.x + blockIdx.x * blockDim.x;
if (idx < ds) {
C[idx] = A[idx] + B[idx];
}
}
Now we create a header file so the main cuda file knows the signature of "vadd":
.. code-block:: c++
%%cuda_group_save --name "vector_add.h" --group "vector_add"
__global__ void vadd(const float *A, const float *B, float *C, int ds);
To tie it all together, we save the main cuda file, which includes our vector
addition code:
.. code-block:: c++
%%cuda_group_save --name "main.cu" --group "vector_add"
#include <stdio.h>
#include "error_handling.h"
#include "vector_add.h"
const int DSIZE = 4096;
const int block_size = 256;
int main(){
float *h_A, *h_B, *h_C, *d_A, *d_B, *d_C;
// allocate space for vectors in host memory
h_A = new float[DSIZE];
h_B = new float[DSIZE];
h_C = new float[DSIZE];
// initialize vectors in host memory to random values (except for the
// result vector whose values do not matter as they will be overwritten)
for (int i = 0; i < DSIZE; i++) {
h_A[i] = rand()/(float)RAND_MAX;
h_B[i] = rand()/(float)RAND_MAX;
}
// allocate space for vectors in device memory
cudaMalloc(&d_A, DSIZE*sizeof(float));
cudaMalloc(&d_B, DSIZE*sizeof(float));
cudaMalloc(&d_C, DSIZE*sizeof(float));
cudaCheckErrors("cudaMalloc failure"); // error checking
// copy vectors A and B from host to device:
cudaMemcpy(d_A, h_A, DSIZE*sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(d_B, h_B, DSIZE*sizeof(float), cudaMemcpyHostToDevice);
cudaCheckErrors("cudaMemcpy H2D failure");
// launch the vector adding kernel
vadd<<<(DSIZE+block_size-1)/block_size, block_size>>>(d_A, d_B, d_C, DSIZE);
cudaCheckErrors("kernel launch failure");
// wait for the kernel to finish execution
cudaDeviceSynchronize();
cudaCheckErrors("kernel execution failure");
cudaMemcpy(h_C, d_C, DSIZE*sizeof(float), cudaMemcpyDeviceToHost);
cudaCheckErrors("cudaMemcpy D2H failure");
printf("A[0] = %f\n", h_A[0]);
printf("B[0] = %f\n", h_B[0]);
printf("C[0] = %f\n", h_C[0]);
return 0;
}
Now we can compile all the source files in the group and execute the main
function with the following command:
.. code-block:: c++
%cuda_group_run --group "vector_add"
Profiling
---------
Another important feature of nvcc4jupyter is its integration with the NVIDIA
Nsight Compute profiler, which you need to make sure is installed and its
executable can be found in a directory in your PATH environment variable.
In order to use it and provide the profiler with custom arguments, simply run:
.. code-block:: c++
%cuda_group_run --group "vector_add" --profile --profiler-args "--section SpeedOfLight"
Running the cell above will compile and execute the vector addition code in the
"vector_add" group and profile it, keeping only the metrics from the
"SpeedOfLight" section. The output will contain something similar to:
.. code-block::
Section: GPU Speed Of Light Throughput
----------------------- ------------- ------------
Metric Name Metric Unit Metric Value
----------------------- ------------- ------------
DRAM Frequency cycle/nsecond 4.65
SM Frequency cycle/usecond 544.31
Elapsed Cycles cycle 2,145
Memory Throughput % 3.19
DRAM Throughput % 3.19
Duration usecond 3.94
L1/TEX Cache Throughput % 6.67
L2 Cache Throughput % 1.98
SM Active Cycles cycle 383.65
Compute (SM) Throughput % 1.19
----------------------- ------------- ------------
+7
View File
@@ -0,0 +1,7 @@
"""
nvcc4jupyter: CUDA C++ plugin for Jupyter Notebook
"""
from .plugin import NVCCPlugin, load_ipython_extension # noqa: F401
__version__ = "1.0.3"
+69
View File
@@ -0,0 +1,69 @@
"""
Parsers for the CUDA magic commands.
"""
import argparse
def get_parser_cuda() -> argparse.ArgumentParser:
"""
%%cuda magic command parser.
"""
parser = argparse.ArgumentParser(
description=(
"%%cuda magic that compiles and runs CUDA C++ code in this cell."
" See https://nvcc4jupyter.readthedocs.io/en/latest/magics.html#cuda" # noqa: E501
" for usage details."
)
)
parser.add_argument("-t", "--timeit", action="store_true")
parser.add_argument("-p", "--profile", action="store_true")
parser.add_argument("-a", "--profiler-args", type=str, default="")
return parser
def get_parser_cuda_group_run() -> argparse.ArgumentParser:
"""
%%cuda_group_run magic command parser.
"""
parser = get_parser_cuda()
parser.description = (
"%%cuda_group_run magic that compiles and runs source files in a given"
" group. See"
" https://nvcc4jupyter.readthedocs.io/en/latest/magics.html#cuda-group-run" # noqa: E501
" for usage details."
)
parser.add_argument("-g", "--group", type=str, required=True)
return parser
def get_parser_cuda_group_save() -> argparse.ArgumentParser:
"""
%%cuda_group_save magic command parser.
"""
parser = argparse.ArgumentParser(
description=(
"%%cuda_group_save magic that saves CUDA C++ code in this cell for"
" later compilation and execution with possibly more source files."
" See https://nvcc4jupyter.readthedocs.io/en/latest/magics.html#cuda-group-save" # noqa: E501
" for usage details."
)
)
parser.add_argument("-n", "--name", type=str, required=True)
parser.add_argument("-g", "--group", type=str, required=True)
return parser
def get_parser_cuda_group_delete() -> argparse.ArgumentParser:
"""
%%cuda_group_delete magic command parser.
"""
parser = argparse.ArgumentParser(
description=(
"%%cuda_group_delete magic that deletes all files in a group. See"
" https://nvcc4jupyter.readthedocs.io/en/latest/magics.html#cuda-group-delete" # noqa: E501
" for usage details."
)
)
parser.add_argument("-g", "--group", type=str, required=True)
return parser
+321
View File
@@ -0,0 +1,321 @@
"""
nvcc4jupyter: CUDA C++ plugin for Jupyter Notebook
"""
import argparse
import glob
import os
import shutil
import subprocess
import tempfile
import uuid
from typing import List, Optional
# pylint: disable=import-error
from IPython.core.interactiveshell import InteractiveShell
from IPython.core.magic import Magics, cell_magic, line_magic, magics_class
from . import parsers
DEFAULT_EXEC_FNAME = "cuda_exec.out"
SHARED_GROUP_NAME = "shared"
def print_out(out: str):
"""Print string line by line."""
for line in out.split("\n"):
print(line)
@magics_class
class NVCCPlugin(Magics):
"""
CUDA C++ plugin for Jupyter Notebook
"""
def __init__(self, shell: InteractiveShell):
super().__init__(shell)
self.shell: InteractiveShell # type hint not provided by parent class
self.parser_cuda = parsers.get_parser_cuda()
self.parser_cuda_group_save = parsers.get_parser_cuda_group_save()
self.parser_cuda_group_delete = parsers.get_parser_cuda_group_delete()
self.parser_cuda_group_run = parsers.get_parser_cuda_group_run()
self.workdir = tempfile.mkdtemp()
print(f'Source files will be saved in "{self.workdir}".')
def _save_source(
self, source_name: str, source_code: str, group_name: str
) -> None:
"""
Save source code as a .cu or .h file in the group directory where
files can be compiled together. Saving a source file to the group
named "shared" will make those source files available when compiling
any group.
Args:
source_name: The name of the source file. Must end in ".cu" or
".h".
source_code: The source code to be written to the source file.
group_name: The name of the group directory where the file will be
saved.
Raises:
ValueError: If the source name does not have a proper extension.
"""
_, ext = os.path.splitext(source_name)
if ext not in (".cu", ".h"):
raise ValueError(
f'Given source name "{source_name}" must end in ".h" or ".cu".'
)
group_dirpath = os.path.join(self.workdir, group_name)
os.makedirs(group_dirpath, exist_ok=True)
source_fpath = os.path.join(group_dirpath, source_name)
with open(source_fpath, "w", encoding="utf-8") as f:
f.write(source_code)
def _delete_group(self, group_name: str) -> None:
"""
Removes all source files from the given group.
Args:
group_name: The name of the source files group.
"""
group_dirpath = os.path.join(self.workdir, group_name)
if os.path.exists(group_dirpath):
shutil.rmtree(group_dirpath)
def _compile(
self, group_name: str, executable_fname: str = DEFAULT_EXEC_FNAME
) -> str:
"""
Compiles all source files in a given group together with all source
files from the group named "shared".
Args:
group_name: The name of the source file group to be compiled.
executable_fname: The output executable file name. Defaults to
"cuda_exec.out".
Raises:
RuntimeError: If the group does not exist or if does not have any
source files associated with it.
Returns:
The file path of the resulted executable file.
"""
shared_dirpath = os.path.join(self.workdir, SHARED_GROUP_NAME)
group_dirpath = os.path.join(self.workdir, group_name)
if not os.path.exists(group_dirpath):
raise RuntimeError(f'Group "{group_name}" does not exist.')
source_files = list(glob.glob(os.path.join(group_dirpath, "*.cu")))
if len(source_files) == 0:
raise RuntimeError(
f'Group "{group_name}" does not have any source files.'
)
source_files.extend(
list(glob.glob(os.path.join(shared_dirpath, "*.cu")))
)
executable_fpath = os.path.join(group_dirpath, executable_fname)
args = [
"nvcc",
"-I" + shared_dirpath + "," + group_dirpath,
]
args.extend(source_files)
args.extend(
[
"-o",
executable_fpath,
"-Wno-deprecated-gpu-targets",
]
)
subprocess.check_output(args, stderr=subprocess.STDOUT)
return executable_fpath
def _run(
self,
exec_fpath: str,
timeit: bool = False,
profile: bool = False,
profiler_args: str = "",
) -> str:
"""
Runs a CUDA executable.
Args:
exec_fpath: The file path of the executable.
timeit: If True, returns the result of the "timeit" magic instead
of the standard output of the CUDA process. Defaults to False.
profile: If True, the executable is profiled with NVIDIA Nsight
Compute profiling tool and its output is added to stdout.
Defaults to False.
profiler_args: The profiler arguments used to customize the
information gathered by it and its overall behaviour. Defaults
to an empty string.
Returns:
The standard output of the CUDA process or the "timeit" magic
output.
"""
if timeit:
stmt = (
f"subprocess.check_output(['{exec_fpath}'],"
" stderr=subprocess.STDOUT)"
)
output = self.shell.run_cell_magic(
magic_name="timeit", line="-q -o import subprocess", cell=stmt
)
# convert TimeitResult object to human readable string
output = str(output)
else:
run_args = []
if profile:
run_args.extend(["ncu"] + profiler_args.split())
run_args.append(exec_fpath)
output = subprocess.check_output(
run_args, stderr=subprocess.STDOUT
)
output = output.decode("utf8")
return output
def _compile_and_run(
self, group_name: str, args: argparse.Namespace
) -> str:
try:
exec_fpath = self._compile(group_name)
output = self._run(
exec_fpath=exec_fpath,
timeit=args.timeit,
profile=args.profile,
profiler_args=args.profiler_args,
)
except subprocess.CalledProcessError as e:
output = e.output.decode("utf8")
return output
def _read_args(
self, line: str, parser: argparse.ArgumentParser
) -> Optional[argparse.Namespace]:
"""
Read arguments from the magic line. Makes sure to keep arguments
between double quotes together for use with profiler arguments or
compiler arguments.
Args:
line: The arguments on the line of the magic call in the jupyter
cell.
parser: The parser which will process the arguments after they are
correctly tokenized.
Returns:
The parsed arguments.
"""
tokens = line.strip().split('"')
args_tokenized: List[str] = []
for index, tok in enumerate(tokens):
if index % 2 == 0:
# tokens found outside double quotes are split at whitespace
args_tokenized.extend(tok.split(" "))
else:
# anything found between double quotes will not be split
args_tokenized.append(tok)
args_tokenized = [arg for arg in args_tokenized if len(arg) > 0]
try:
return parser.parse_args(args_tokenized)
except SystemExit:
parser.print_help()
return None
@cell_magic
def cuda(self, line: str, cell: str) -> None:
"""Compile and run the CUDA code in the cell.
Args:
line: The arguments on the line of the magic call in the jupyter
cell.
cell: All of the lines in the jupyter cell besides the magic call
itself. It should contain all of the source code to be
compiled and run.
"""
args = self._read_args(line, self.parser_cuda)
if args is None:
return
group_name = str(uuid.uuid4())
self._save_source(
source_name="single_file.cu",
source_code=cell,
group_name=group_name,
)
output = self._compile_and_run(group_name, args)
print_out(output)
@cell_magic
def cuda_group_save(self, line: str, cell: str) -> None:
"""
Save the CUDA code in the cell in a group of source files to be later
compiled and executed by the "cuda_group_run" line magic.
Args:
line: The arguments on the line of the magic call in the jupyter
cell.
cell: All of the lines in the jupyter cell besides the magic call
itself. It should contain all of the source code to be
saved.
"""
args = self._read_args(line, self.parser_cuda_group_save)
if args is None:
return
self._save_source(
source_name=args.name,
source_code=cell,
group_name=args.group,
)
@line_magic
def cuda_group_run(self, line: str) -> None:
"""
Compile and run all source files inside a specific source file group.
Args:
line: The arguments on the line of the magic call in the jupyter
cell.
"""
args = self._read_args(line, self.parser_cuda_group_run)
if args is None:
return
output = self._compile_and_run(args.group, args)
print_out(output)
@line_magic
def cuda_group_delete(self, line: str) -> None:
"""
Remove all source files inside a specific source file group.
Args:
line: The arguments on the line of the magic call in the jupyter
cell.
"""
args = self._read_args(line, self.parser_cuda_group_delete)
if args is None:
return
self._delete_group(args.group)
def load_ipython_extension(shell: InteractiveShell):
"""
Method used by IPython to load the extension.
"""
nvcc_plugin = NVCCPlugin(shell)
shell.register_magics(nvcc_plugin)
-10
View File
@@ -1,10 +0,0 @@
from v1.v1 import NVCCPlugin as NVCC_V1
from v2.v2 import NVCCPluginV2 as NVCC_V2
def load_ipython_extension(ip):
nvcc_plugin = NVCC_V1(ip)
ip.register_magics(nvcc_plugin)
nvcc_plugin_v2 = NVCC_V2(ip)
ip.register_magics(nvcc_plugin_v2)
+291
View File
@@ -0,0 +1,291 @@
[build-system]
requires = ["hatchling >= 1.13.0"]
build-backend = "hatchling.build"
[project]
name = "nvcc4jupyter"
description = "Jupyter notebook plugin to run CUDA C/C++ code"
readme = "README.md"
requires-python = ">=3.10"
license = {text = "MIT License"}
authors = [
{ name = "Andrei Nechaev", email = "lyfaradey@yahoo.com" },
{ name = "Cosmin Stefan Ciocan", email = "ciocan.cosmin98@gmail.com" },
]
classifiers = [
"Programming Language :: Python",
"Programming Language :: Python :: 3",
'Programming Language :: Python :: 3.10',
'Programming Language :: Python :: 3.11',
'Programming Language :: Python :: 3.12',
'Environment :: GPU',
'Environment :: GPU :: NVIDIA CUDA',
'Framework :: IPython',
'Framework :: Jupyter',
]
dependencies = []
dynamic = ["version"]
[project.urls]
documentation = 'https://nvcc4jupyter.readthedocs.io/'
repository = 'https://github.com/andreinechaev/nvcc4jupyter'
[tool.hatch.version]
path = "nvcc4jupyter/__init__.py"
[tool.hatch.build.targets.wheel]
packages = ["nvcc4jupyter"]
[project.optional-dependencies]
testing = ["pytest>=7.4.3", "IPython>=8.19.0"]
dev = ["pytest>=7.4.3", "IPython>=8.19.0", "pre-commit>=3.6.0", "pytest-cov[toml]>=4.1.0"]
[tool.pytest.ini_options]
addopts = [
"--color=yes",
"--durations=0",
"--strict-markers",
"--doctest-modules",
]
filterwarnings = [
"ignore::DeprecationWarning",
"ignore::UserWarning",
]
log_cli = "True"
markers = [
"slow: slow tests",
]
minversion = "6.0"
testpaths = "tests/"
[tool.coverage.report]
exclude_lines = [
"pragma: nocover",
"raise NotImplementedError",
"raise NotImplementedError()",
"if __name__ == .__main__.:",
]
[tool.isort]
profile = "black"
[tool.bandit]
exclude_dirs = ["build","dist","tests","scripts"]
number = 4
recursive = true
targets = "src"
# B404 and B603 are skipped because the user can already run any arbitrary
# command on their jupyter server
skips = ["B101", "B311", "B404", "B603"]
[tool.black]
line-length = 79
fast = true
experimental-string-processing = true
[tool.coverage.run]
branch = true
[tool.pyright]
include = ["src"]
exclude = [
"**/node_modules",
"**/__pycache__",
]
venv = "env37"
reportMissingImports = true
reportMissingTypeStubs = false
pythonVersion = "3.7"
pythonPlatform = "Linux"
executionEnvironments = [
{ root = "src" }
]
[tool.tox]
legacy_tox_ini = """
[tox]
envlist = py, integration, spark, all
[testenv]
commands =
pytest -m "not integration and not spark" {posargs}
[testenv:integration]
commands =
pytest -m "integration" {posargs}
[testenv:spark]
extras = spark
setenv =
PYSPARK_DRIVER_PYTHON = {envpython}
PYSPARK_PYTHON = {envpython}
commands =
pytest -m "spark" {posargs}
[testenv:all]
extras = all
setenv =
PYSPARK_DRIVER_PYTHON = {envpython}
PYSPARK_PYTHON = {envpython}
commands =
pytest {posargs}
"""
[tool.pylint]
extension-pkg-whitelist= [
"numpy",
"torch",
"cv2",
"pyodbc",
"pydantic",
"ciso8601",
"netcdf4",
"scipy"
]
ignore="CVS"
ignore-patterns="test.*?py,conftest.py"
ignore-paths="docs,tests"
init-hook='import sys; sys.setrecursionlimit(8 * sys.getrecursionlimit())'
jobs=0
limit-inference-results=100
persistent="yes"
suggestion-mode="yes"
unsafe-load-any-extension="no"
[tool.pylint.'MESSAGES CONTROL']
enable="c-extension-no-member"
[tool.pylint.'REPORTS']
evaluation="10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)"
output-format="text"
reports="no"
score="yes"
[tool.pylint.'REFACTORING']
max-nested-blocks=5
never-returning-functions="sys.exit"
[tool.pylint.'BASIC']
argument-naming-style="snake_case"
attr-naming-style="snake_case"
bad-names= [
"foo",
"bar"
]
class-attribute-naming-style="any"
class-naming-style="PascalCase"
const-naming-style="UPPER_CASE"
docstring-min-length=-1
function-naming-style="snake_case"
good-names= [
"i",
"j",
"k",
"ex",
"Run",
"_"
]
include-naming-hint="yes"
inlinevar-naming-style="any"
method-naming-style="snake_case"
module-naming-style="any"
no-docstring-rgx="^_"
property-classes="abc.abstractproperty"
variable-naming-style="snake_case"
[tool.pylint.'FORMAT']
ignore-long-lines="^\\s*(# )?.*['\"]?<?https?://\\S+>?"
indent-after-paren=4
indent-string=' '
max-line-length=79
max-module-lines=1000
single-line-class-stmt="no"
single-line-if-stmt="no"
[tool.pylint.'LOGGING']
logging-format-style="old"
logging-modules="logging"
[tool.pylint.'MISCELLANEOUS']
notes= [
"FIXME",
"XXX",
"TODO"
]
[tool.pylint.'SIMILARITIES']
ignore-comments="yes"
ignore-docstrings="yes"
ignore-imports="yes"
min-similarity-lines=7
[tool.pylint.'SPELLING']
max-spelling-suggestions=4
spelling-store-unknown-words="no"
[tool.pylint.'STRING']
check-str-concat-over-line-jumps="no"
[tool.pylint.'TYPECHECK']
contextmanager-decorators="contextlib.contextmanager"
generated-members="numpy.*,np.*,pyspark.sql.functions,collect_list"
ignore-mixin-members="yes"
ignore-none="yes"
ignore-on-opaque-inference="yes"
ignored-classes="optparse.Values,thread._local,_thread._local,numpy,torch,swagger_client"
ignored-modules="numpy,torch,swagger_client,netCDF4,scipy"
missing-member-hint="yes"
missing-member-hint-distance=1
missing-member-max-choices=1
[tool.pylint.'VARIABLES']
additional-builtins="dbutils"
allow-global-unused-variables="yes"
callbacks= [
"cb_",
"_cb"
]
dummy-variables-rgx="_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_"
ignored-argument-names="_.*|^ignored_|^unused_"
init-import="no"
redefining-builtins-modules="six.moves,past.builtins,future.builtins,builtins,io"
[tool.pylint.'CLASSES']
defining-attr-methods= [
"__init__",
"__new__",
"setUp",
"__post_init__"
]
exclude-protected= [
"_asdict",
"_fields",
"_replace",
"_source",
"_make"
]
valid-classmethod-first-arg="cls"
valid-metaclass-classmethod-first-arg="cls"
[tool.pylint.'DESIGN']
max-args=5
max-attributes=7
max-bool-expr=5
max-branches=12
max-locals=15
max-parents=7
max-public-methods=20
max-returns=6
max-statements=50
min-public-methods=2
[tool.pylint.'IMPORTS']
allow-wildcard-with-all="no"
analyse-fallback-blocks="no"
deprecated-modules="optparse,tkinter.tix"
[tool.pylint.'EXCEPTIONS']
overgeneral-exceptions= [
"BaseException",
"Exception"
]
-13
View File
@@ -1,13 +0,0 @@
from distutils.core import setup
setup(
name='NVCCPlugin',
version='0.0.2',
author='Andrei Nechaev',
author_email='lyfaradey@yahoo.com',
py_modules=['nvcc_plugin', 'v2.v2', 'v1.v1', 'common.helper'],
url='https://github.com/andreinechaev/nvcc4jupyter',
license='LICENSE',
description='Jupyter notebook plugin to run CUDA C/C++ code',
# long_description=open('README.md').read(),
)
+1
View File
@@ -0,0 +1 @@
from .fixtures.fixtures import * # noqa: F401,F403
View File
+57
View File
@@ -0,0 +1,57 @@
import glob
import os
import pytest
from IPython.core.interactiveshell import InteractiveShell
from nvcc4jupyter.plugin import NVCCPlugin
@pytest.fixture(scope="session")
def shell():
return InteractiveShell()
@pytest.fixture(scope="session")
def plugin(shell: InteractiveShell):
return NVCCPlugin(shell=shell)
@pytest.fixture(scope="session")
def tests_path():
return "tests"
@pytest.fixture(scope="session")
def fixtures_path(tests_path):
return os.path.join(tests_path, "fixtures")
@pytest.fixture(scope="session")
def sample_magic_cu_line():
# fmt: off
return '--profile --profiler-args "--metrics l1tex__t_sectors_pipe_lsu_mem_global_op_ld.sum"' # noqa: E501
# fmt: on
@pytest.fixture(scope="session")
def sample_cuda_fpath(fixtures_path: str):
return os.path.join(fixtures_path, "single_file", "hello.cu")
@pytest.fixture(scope="session")
def sample_cuda_code(sample_cuda_fpath: str):
with open(sample_cuda_fpath, "r", encoding="utf-8") as f:
return f.read()
@pytest.fixture(scope="session")
def timeit_regex():
return r".+ ± .+ per loop \(mean ± std. dev. of .+ runs, .+ loops each\)"
@pytest.fixture(scope="session")
def multiple_source_fpaths(fixtures_path: str):
pattern_h = os.path.join(fixtures_path, "multiple_files", "*.h")
pattern_cu = os.path.join(fixtures_path, "multiple_files", "*.cu")
return list(glob.glob(pattern_h)) + list(glob.glob(pattern_cu))
+6
View File
@@ -0,0 +1,6 @@
#include <cstdio>
#include "hello.h"
__host__ void hello(){
printf("Hello World!\n");
}
+6
View File
@@ -0,0 +1,6 @@
#ifndef HELLO_H
#define HELLO_H
void hello();
#endif
+6
View File
@@ -0,0 +1,6 @@
#include "hello.h"
int main() {
hello();
return 0;
}
+10
View File
@@ -0,0 +1,10 @@
#include <cstdio>
__host__ void hello(){
printf("Hello World!\n");
}
int main() {
hello();
return 0;
}
+2
View File
@@ -0,0 +1,2 @@
pytest>=7.4.3
IPython>=8.19.0
+221
View File
@@ -0,0 +1,221 @@
import argparse
import math
import os
import re
import shutil
from typing import List
import pytest
from nvcc4jupyter.plugin import NVCCPlugin
def check_profiler_output(output: str):
# the profiler output will be a line of "Hello World!" along with some
# warning lines which start with "==WARNING=="
lines = output.strip().split("\n")
warn_count = 0
for line in lines:
if not line.startswith("==WARNING=="):
assert line == "Hello World!"
else:
warn_count += 1
assert warn_count >= 1
assert warn_count == len(lines) - 1
def copy_source_to_group(
source_fpath: str, group_name: str, workdir: str
) -> str:
group_dirpath = os.path.join(workdir, group_name)
os.makedirs(group_dirpath, exist_ok=True)
destination_fpath = os.path.join(
group_dirpath, os.path.basename(source_fpath)
)
shutil.copy(source_fpath, destination_fpath)
return destination_fpath
@pytest.fixture(autouse=True, scope="function")
def before_each(plugin: NVCCPlugin):
shutil.rmtree(plugin.workdir, ignore_errors=True) # before test
yield
pass # after test
def test_save_source(plugin: NVCCPlugin, sample_cuda_code: str) -> None:
gname = "test_save_source"
sname = "sample.cu"
plugin._save_source(sname, sample_cuda_code, gname)
spath = os.path.join(plugin.workdir, gname, sname)
assert os.path.exists(spath)
with open(spath, "r", encoding="utf-8") as f:
code = f.read()
assert code == sample_cuda_code
with pytest.raises(ValueError):
plugin._save_source("wrong_extension.txt", sample_cuda_code, gname)
def test_delete_group(plugin: NVCCPlugin, sample_cuda_fpath: str) -> None:
gname = "test_delete_group"
source_fpath = copy_source_to_group(
sample_cuda_fpath, gname, plugin.workdir
)
assert os.path.exists(source_fpath)
plugin._delete_group(gname)
assert not os.path.exists(source_fpath)
def test_compile(
plugin: NVCCPlugin,
sample_cuda_fpath: str,
):
# we artificially create a source file group in the plugin workdir
gname = "test_compile"
source_fpath = copy_source_to_group(
sample_cuda_fpath, gname, plugin.workdir
)
exec_fpath = plugin._compile(gname)
assert os.path.exists(exec_fpath)
with pytest.raises(RuntimeError):
plugin._compile("inexistent_group")
with pytest.raises(RuntimeError):
os.remove(source_fpath)
plugin._compile(gname)
def test_run(
plugin: NVCCPlugin,
sample_cuda_fpath: str,
):
gname = "test_run"
copy_source_to_group(sample_cuda_fpath, gname, plugin.workdir)
exec_fpath = plugin._compile(gname)
output = plugin._run(exec_fpath)
assert output == "Hello World!\n"
def test_run_timeit(
plugin: NVCCPlugin, sample_cuda_fpath: str, timeit_regex: str
):
gname = "test_run_timeit"
copy_source_to_group(sample_cuda_fpath, gname, plugin.workdir)
exec_fpath = plugin._compile(gname)
output = plugin._run(exec_fpath, timeit=True)
assert (
re.match(timeit_regex, output) is not None
), f'Output "{output}" does not match the regex "{timeit_regex}".'
def test_run_profile(plugin: NVCCPlugin, sample_cuda_fpath: str):
gname = "test_run_profile"
copy_source_to_group(sample_cuda_fpath, gname, plugin.workdir)
exec_fpath = plugin._compile(gname)
output = plugin._run(
exec_fpath,
profile=True,
# because we are running without a kernel (in the test env we have no
# GPU) it does not matter what arguments we pass to the profiler as its
# output will always be just a few warnings; the reason we add them
# here is to test that no error is produced when passing the arguments
profiler_args=(
"--metrics l1tex__t_sectors_pipe_lsu_mem_global_op_ld.sum"
),
)
check_profiler_output(output)
def test_compile_and_run_multiple_files(
plugin: NVCCPlugin, multiple_source_fpaths: List[str]
):
"""
Compiles and executes 3 cuda source files from
tests/fixtures/multiple_files.
"""
gname = "test_compile_and_run_multiple_files"
for fpath in multiple_source_fpaths:
copy_source_to_group(fpath, gname, plugin.workdir)
output = plugin._compile_and_run(
gname, argparse.Namespace(timeit=False, profile=True, profiler_args="")
)
check_profiler_output(output)
def test_compile_and_run_multiple_files_shared(
plugin: NVCCPlugin, multiple_source_fpaths: List[str]
):
"""
Compiles and executes 3 cuda source files from
tests/fixtures/multiple_files. However, the hello.cu and hello.h files are
added to the "shared" group which is compiled with all other groups. This
allows sharing error handling code easily and other very common code.
"""
gname = "test_compile_and_run_multiple_files_shared"
for fpath in multiple_source_fpaths:
fname = os.path.basename(fpath)
if fname == "main.cu":
copy_source_to_group(fpath, gname, plugin.workdir)
else:
copy_source_to_group(fpath, "shared", plugin.workdir)
output = plugin._compile_and_run(
gname, argparse.Namespace(timeit=False, profile=True, profiler_args="")
)
check_profiler_output(output)
def test_read_args(plugin: NVCCPlugin):
parser = argparse.ArgumentParser()
parser.add_argument("-a", type=str, required=True)
parser.add_argument("-b", type=float, required=True)
args = plugin._read_args(
'-a "--this has --spaces and --dashes" -b 0.75', parser
)
assert args.a == "--this has --spaces and --dashes"
assert math.isclose(args.b, 0.75)
def test_magic_cuda(
capsys,
plugin: NVCCPlugin,
sample_cuda_code: str,
sample_magic_cu_line: str,
):
plugin.cuda(sample_magic_cu_line, sample_cuda_code)
check_profiler_output(capsys.readouterr().out)
def test_magic_cuda_group_save(plugin: NVCCPlugin, sample_cuda_code: str):
gname = "test_save_source"
sname = "sample.cu"
plugin.cuda_group_save(f"-g {gname} -n {sname}", sample_cuda_code)
spath = os.path.join(plugin.workdir, gname, sname)
assert os.path.exists(spath)
with open(spath, "r", encoding="utf-8") as f:
code = f.read()
assert code == sample_cuda_code
def test_magic_cuda_group_run(
capsys, plugin: NVCCPlugin, sample_cuda_fpath: str
):
gname = "test_magic_cuda_group_run"
copy_source_to_group(sample_cuda_fpath, gname, plugin.workdir)
plugin.cuda_group_run(f"--group {gname} --profile")
check_profiler_output(capsys.readouterr().out)
def test_magic_cuda_group_delete(plugin: NVCCPlugin, sample_cuda_fpath: str):
gname = "test_magic_cuda_group_run"
source_fpath = copy_source_to_group(
sample_cuda_fpath, gname, plugin.workdir
)
assert os.path.exists(source_fpath)
plugin.cuda_group_delete(f"--group {gname}")
assert not os.path.exists(source_fpath)
-62
View File
@@ -1,62 +0,0 @@
import os
import subprocess
import tempfile
import uuid
from IPython.core.magic import Magics, cell_magic, magics_class
from common import helper
compiler = '/usr/local/cuda/bin/nvcc'
profiler = '/usr/local/cuda/bin/ncu'
ext = '.cu'
@magics_class
class NVCCPlugin(Magics):
def __init__(self, shell):
super(NVCCPlugin, self).__init__(shell)
self.argparser = helper.get_argparser()
@staticmethod
def compile(file_path):
subprocess.check_output(
[compiler, file_path + ext, "-o", file_path + ".out", '-Wno-deprecated-gpu-targets'], stderr=subprocess.STDOUT)
def run(self, file_path, timeit=False, profile=False, profiler_args=[]):
if timeit:
stmt = f"subprocess.check_output(['{file_path}.out'], stderr=subprocess.STDOUT)"
output = self.shell.run_cell_magic(
magic_name="timeit", line="-q -o import subprocess", cell=stmt)
output = str(output) # convert TimeitResult object to human readable string
else:
run_args = []
if profile:
run_args.extend([profiler] + profiler_args)
run_args.append(file_path + ".out")
output = subprocess.check_output(run_args, stderr=subprocess.STDOUT)
output = output.decode('utf8')
helper.print_out(output)
return None
@cell_magic
def cu(self, line, cell):
try:
args = self.argparser.parse_args(line.split())
except SystemExit as e:
self.argparser.print_help()
return
with tempfile.TemporaryDirectory() as tmp_dir:
file_path = os.path.join(tmp_dir, str(uuid.uuid4()))
with open(file_path + ext, "w") as f:
f.write(cell)
try:
self.compile(file_path)
output = self.run(file_path, timeit=args.timeit, profile=args.profile, profiler_args=args.profiler_args)
except subprocess.CalledProcessError as e:
helper.print_out(e.output.decode("utf8"))
output = None
return output
View File
-107
View File
@@ -1,107 +0,0 @@
import os
import subprocess
from IPython.core.magic import Magics, cell_magic, magics_class
from IPython.core.magic_arguments import argument, magic_arguments, parse_argstring
from common import helper
compiler = '/usr/local/cuda/bin/nvcc'
profiler = '/usr/local/cuda/bin/ncu'
@magics_class
class NVCCPluginV2(Magics):
def __init__(self, shell):
super(NVCCPluginV2, self).__init__(shell)
self.argparser = helper.get_argparser()
current_dir = os.getcwd()
self.output_dir = os.path.join(current_dir, 'src')
if not os.path.exists(self.output_dir):
os.mkdir(self.output_dir)
print(f'created output directory at {self.output_dir}')
else:
print(f'directory {self.output_dir} already exists')
self.out = os.path.join(current_dir, "result.out")
print(f'Out bin {self.out}')
@staticmethod
def compile(output_dir, file_paths, out):
res = subprocess.check_output(
[compiler, '-I' + output_dir, file_paths, "-o", out, '-Wno-deprecated-gpu-targets'], stderr=subprocess.STDOUT)
res = res.decode()
helper.print_out(res)
def run(self, timeit=False, profile=False, profiler_args=[]):
if timeit:
stmt = f"subprocess.check_output(['{self.out}'], stderr=subprocess.STDOUT)"
output = self.shell.run_cell_magic(
magic_name="timeit", line="-q -o import subprocess", cell=stmt)
output = str(output) # convert TimeitResult object to human readable string
else:
run_args = []
if profile:
run_args.extend([profiler] + profiler_args)
run_args.append(self.out)
output = subprocess.check_output(run_args, stderr=subprocess.STDOUT)
output = output.decode('utf8')
helper.print_out(output)
return None
@magic_arguments()
@argument('-n', '--name', type=str, help='file name that will be produced by the cell. must end with .cu extension')
@argument('-c', '--compile', type=bool, help='Should be compiled?')
@cell_magic
def cuda(self, line='', cell=None):
args = parse_argstring(self.cuda, line)
ex = args.name.split('.')[-1]
if ex not in ['cu', 'h']:
raise Exception('name must end with .cu or .h')
if not os.path.exists(self.output_dir):
print(f'Output directory does not exist, creating')
try:
os.mkdir(self.output_dir)
except OSError:
print(f"Creation of the directory {self.output_dir} failed")
else:
print(f"Successfully created the directory {self.output_dir}")
file_path = os.path.join(self.output_dir, args.name)
with open(file_path, "w") as f:
f.write(cell)
if args.compile:
try:
self.compile(self.output_dir, file_path, self.out)
output = self.run(timeit=args.timeit, profile=args.profile, profiler_args=args.profiler_args)
except subprocess.CalledProcessError as e:
helper.print_out(e.output.decode("utf8"))
output = None
else:
output = f'File written in {file_path}'
return output
@cell_magic
def cuda_run(self, line='', cell=None):
try:
args = self.argparser.parse_args(line.split())
except SystemExit:
self.argparser.print_help()
return
try:
cuda_src = os.listdir(self.output_dir)
cuda_src = [os.path.join(self.output_dir, x)
for x in cuda_src if x[-3:] == '.cu']
print(f'found sources: {cuda_src}')
self.compile(self.output_dir, ' '.join(cuda_src), self.out)
output = self.run(timeit=args.timeit, profile=args.profile, profiler_args=args.profiler_args)
except subprocess.CalledProcessError as e:
helper.print_out(e.output.decode("utf8"))
output = None
return output