mirror of
https://github.com/andreinechaev/nvcc4jupyter.git
synced 2026-06-15 11:40:48 +05:30
Merge pull request #23 from cosminc98/master
Major rewrite that merges the two plugin versions, adds tests, CI/CD pipeline
This commit is contained in:
@@ -0,0 +1,19 @@
|
||||
FROM ubuntu
|
||||
|
||||
ARG VENV_PATH=/opt/dev-venv
|
||||
ENV VENV_ACTIVATE=${VENV_PATH}/bin/activate
|
||||
|
||||
RUN apt update
|
||||
RUN apt install -y python3.10-venv nvidia-cuda-toolkit gcc vim git
|
||||
|
||||
# the mkdir command bypasses a profiler error, which allows us to run it with
|
||||
# host code only to at least check that the profiler parameters are correctly
|
||||
# provided; without this line, some tests will fail
|
||||
RUN mkdir -p /usr/lib/x86_64-linux-gnu/nsight-compute/sections
|
||||
|
||||
# we create the virtualenv here so that the devcontainer.json setting
|
||||
# python.defaultInterpreterPath can be used to find it; if we do it in the
|
||||
# post_create.sh script, the virtualenv will not be loaded and features like
|
||||
# pylance, black, isort, etc. will not work
|
||||
RUN python3.10 -m venv ${VENV_PATH}
|
||||
RUN echo "source ${VENV_ACTIVATE}" >> ~/.bashrc
|
||||
@@ -0,0 +1,26 @@
|
||||
{
|
||||
"name": "Python Environment",
|
||||
"build": {
|
||||
"dockerfile": "Dockerfile",
|
||||
"context": ".."
|
||||
},
|
||||
"postCreateCommand": "bash .devcontainer/post_create.sh",
|
||||
"customizations": {
|
||||
"vscode": {
|
||||
"extensions": [
|
||||
"editorconfig.editorconfig",
|
||||
"ms-azuretools.vscode-docker",
|
||||
"ms-python.python",
|
||||
"ms-python.vscode-pylance",
|
||||
"ms-python.pylint",
|
||||
"ms-python.isort",
|
||||
"ms-python.flake8",
|
||||
"ms-python.black-formatter",
|
||||
"ryanluker.vscode-coverage-gutters"
|
||||
],
|
||||
"settings": {
|
||||
"python.defaultInterpreterPath": "/opt/dev-venv/bin/python"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
#!/bin/bash
|
||||
|
||||
# install developer dependencies
|
||||
pip install .[dev]
|
||||
|
||||
# make sure the developer uses pre-commit hooks
|
||||
pre-commit install
|
||||
@@ -0,0 +1,5 @@
|
||||
[flake8]
|
||||
max-line-length = 79
|
||||
select = F,E,W,B,B901,B902,B903
|
||||
exclude = .eggs,.git,.tox,nssm,obj,out,packages,pywin32,tests,swagger_client
|
||||
ignore = E722,B001,W503,E203
|
||||
@@ -0,0 +1,22 @@
|
||||
# Same as `code-quality-pr.yaml` but triggered on commit to master branch
|
||||
# and runs on all files (instead of only the changed ones)
|
||||
|
||||
name: Code Quality Master
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [master]
|
||||
|
||||
jobs:
|
||||
code-quality:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v2
|
||||
|
||||
- name: Run pre-commits
|
||||
uses: pre-commit/action@v2.0.3
|
||||
@@ -0,0 +1,36 @@
|
||||
# This workflow finds which files were changed, prints them,
|
||||
# and runs `pre-commit` on those files.
|
||||
|
||||
# Inspired by the sktime library:
|
||||
# https://github.com/alan-turing-institute/sktime/blob/main/.github/workflows/test.yml
|
||||
|
||||
name: Code Quality PR
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [master, "release/*", "dev"]
|
||||
|
||||
jobs:
|
||||
code-quality:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v2
|
||||
|
||||
- name: Find modified files
|
||||
id: file_changes
|
||||
uses: trilom/file-changes-action@v1.2.4
|
||||
with:
|
||||
output: " "
|
||||
|
||||
- name: List modified files
|
||||
run: echo '${{ steps.file_changes.outputs.files}}'
|
||||
|
||||
- name: Run pre-commits
|
||||
uses: pre-commit/action@v2.0.3
|
||||
with:
|
||||
extra_args: --files ${{ steps.file_changes.outputs.files}}
|
||||
@@ -0,0 +1,46 @@
|
||||
name: Publish Python 🐍 distribution 📦 to PyPI
|
||||
|
||||
on: push
|
||||
|
||||
jobs:
|
||||
build:
|
||||
name: Build distribution 📦
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: "3.x"
|
||||
- name: Install pypa/build
|
||||
run: python3 -m pip install build --user
|
||||
- name: Build a binary wheel and a source tarball
|
||||
run: python3 -m build
|
||||
- name: Store the distribution packages
|
||||
uses: actions/upload-artifact@v3
|
||||
with:
|
||||
name: python-package-distributions
|
||||
path: dist/
|
||||
|
||||
publish-to-pypi:
|
||||
name: >-
|
||||
Publish Python 🐍 distribution 📦 to PyPI
|
||||
if: startsWith(github.ref, 'refs/tags/') # only publish to PyPI on tag pushes
|
||||
needs:
|
||||
- build
|
||||
runs-on: ubuntu-latest
|
||||
environment:
|
||||
name: pypi
|
||||
url: https://pypi.org/p/nvcc4jupyter
|
||||
permissions:
|
||||
id-token: write
|
||||
|
||||
steps:
|
||||
- name: Download all the dists
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: python-package-distributions
|
||||
path: dist/
|
||||
- name: Publish distribution 📦 to PyPI
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
@@ -0,0 +1,86 @@
|
||||
name: Tests
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [master]
|
||||
pull_request:
|
||||
branches: [master, "release/*", "dev"]
|
||||
|
||||
jobs:
|
||||
run_tests_ubuntu:
|
||||
runs-on: ${{ matrix.os }}
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: ["ubuntu-latest"]
|
||||
python-version: ["3.10", "3.11", "3.12"]
|
||||
|
||||
timeout-minutes: 20
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v3
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
# the mkdir command bypasses a profiler error, which allows us to run it
|
||||
# with host code only to at least check that the profiler parameters are
|
||||
# correctly provided
|
||||
- name: Install CUDA tools
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install nvidia-cuda-toolkit
|
||||
sudo mkdir -p /usr/lib/x86_64-linux-gnu/nsight-compute/sections
|
||||
|
||||
- name: Install Python dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r tests/requirements.txt
|
||||
|
||||
- name: List dependencies
|
||||
run: |
|
||||
python -m pip list
|
||||
|
||||
- name: Run pytest
|
||||
run: |
|
||||
pytest -v
|
||||
|
||||
# upload code coverage report
|
||||
code-coverage:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
lfs: "true"
|
||||
- run: git lfs pull
|
||||
|
||||
- name: Set up Python 3.10
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: "3.10"
|
||||
|
||||
- name: Install CUDA tools
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install nvidia-cuda-toolkit
|
||||
sudo mkdir -p /usr/lib/x86_64-linux-gnu/nsight-compute/sections
|
||||
|
||||
- name: Install Python dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r tests/requirements.txt
|
||||
pip install pytest-cov[toml]
|
||||
|
||||
- name: Run tests and collect coverage
|
||||
run: pytest --cov nvcc4jupyter
|
||||
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@v3
|
||||
env:
|
||||
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
|
||||
+36
-1
@@ -1,2 +1,37 @@
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
|
||||
# Distribution / packaging
|
||||
bin/
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
.tox/
|
||||
.coverage
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
|
||||
# Virtual Environment
|
||||
*env*
|
||||
|
||||
# Misc
|
||||
.pytest_cache/
|
||||
.DS_Store
|
||||
.idea
|
||||
.idea
|
||||
|
||||
@@ -0,0 +1,53 @@
|
||||
default_language_version:
|
||||
python: python3
|
||||
|
||||
repos:
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v4.4.0
|
||||
hooks:
|
||||
# list of supported hooks: https://pre-commit.com/hooks.html
|
||||
- id: trailing-whitespace
|
||||
- id: end-of-file-fixer
|
||||
- id: check-docstring-first
|
||||
- id: check-yaml
|
||||
- id: debug-statements
|
||||
- id: detect-private-key
|
||||
- id: check-executables-have-shebangs
|
||||
- id: check-toml
|
||||
- id: check-case-conflict
|
||||
- id: check-added-large-files
|
||||
|
||||
# python code formatting
|
||||
- repo: https://github.com/psf/black
|
||||
rev: 23.12.1
|
||||
hooks:
|
||||
- id: black
|
||||
args: ["--config", "pyproject.toml"]
|
||||
|
||||
# python import sorting
|
||||
- repo: https://github.com/PyCQA/isort
|
||||
rev: 5.12.0
|
||||
hooks:
|
||||
- id: isort
|
||||
args: ["--settings-path", "pyproject.toml"]
|
||||
|
||||
# python check (PEP8), programming errors and code complexity
|
||||
- repo: https://github.com/PyCQA/flake8
|
||||
rev: 7.0.0
|
||||
hooks:
|
||||
- id: flake8
|
||||
args: ["--config", ".flake8"]
|
||||
|
||||
# pylint check
|
||||
- repo: https://github.com/pycqa/pylint
|
||||
rev: v3.0.3
|
||||
hooks:
|
||||
- id: pylint
|
||||
args: ["--rcfile", "pyproject.toml"]
|
||||
|
||||
- repo: https://github.com/PyCQA/bandit
|
||||
rev: 1.7.6
|
||||
hooks:
|
||||
- id: bandit
|
||||
args: ["-c", "pyproject.toml"]
|
||||
additional_dependencies: ["bandit[toml]"]
|
||||
@@ -0,0 +1,32 @@
|
||||
# .readthedocs.yaml
|
||||
# Read the Docs configuration file
|
||||
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
|
||||
|
||||
# Required
|
||||
version: 2
|
||||
|
||||
# Set the OS, Python version and other tools you might need
|
||||
build:
|
||||
os: ubuntu-22.04
|
||||
tools:
|
||||
python: "3.10"
|
||||
# You can also specify other tool versions:
|
||||
# nodejs: "19"
|
||||
# rust: "1.64"
|
||||
# golang: "1.19"
|
||||
|
||||
# Build documentation in the "docs/" directory with Sphinx
|
||||
sphinx:
|
||||
configuration: docs/source/conf.py
|
||||
|
||||
# Optionally build your docs in additional formats such as PDF and ePub
|
||||
# formats:
|
||||
# - pdf
|
||||
# - epub
|
||||
|
||||
# Optional but recommended, declare the Python requirements required
|
||||
# to build your documentation
|
||||
# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html
|
||||
python:
|
||||
install:
|
||||
- requirements: docs/requirements.txt
|
||||
Vendored
+33
@@ -0,0 +1,33 @@
|
||||
{
|
||||
"editor.formatOnSave": true,
|
||||
"editor.formatOnPaste": true,
|
||||
"files.trimTrailingWhitespace": true,
|
||||
"files.autoSave": "onFocusChange",
|
||||
"git.autofetch": true,
|
||||
"[jsonc]": {
|
||||
"editor.defaultFormatter": "vscode.json-language-features"
|
||||
},
|
||||
"[python]": {
|
||||
"editor.defaultFormatter": "ms-python.black-formatter",
|
||||
"editor.formatOnSave": true,
|
||||
"editor.codeActionsOnSave": {
|
||||
"source.organizeImports": "explicit"
|
||||
},
|
||||
},
|
||||
"python.defaultInterpreterPath": "/usr/local/bin/python",
|
||||
"python.testing.unittestEnabled": false,
|
||||
"python.testing.pytestEnabled": true,
|
||||
"pylint.args": [
|
||||
"--rcfile=pyproject.toml"
|
||||
],
|
||||
"black-formatter.args": [
|
||||
"--config=pyproject.toml"
|
||||
],
|
||||
"flake8.args": [
|
||||
"--config",
|
||||
".flake8"
|
||||
],
|
||||
"isort.args": [
|
||||
"--settings-path=pyproject.toml"
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2018-2024 Andrei Nechaev, Cosmin Stefan Ciocan and others
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
@@ -1,37 +1,103 @@
|
||||
## NVCC Plugin for Jupyter notebook
|
||||
# nvcc4jupyter: CUDA C++ plugin for Jupyter Notebook
|
||||
|
||||
### V2 is available
|
||||
| | |
|
||||
| --- | --- |
|
||||
| Testing | ![Python Versions][python-version] [![CI - Test][test-badge]][test-workflow] [![Coverage][coverage-badge]][coverage-results] |
|
||||
| Code Quality | [![Code style: black][black-badge]][black-project] [![security: bandit][bandit-badge]][bandit-project]|
|
||||
| Package | [![PyPI Latest Release][pypi-latest-version]][pypi-project-url] [![PyPI Downloads][pypi-downloads]][pypi-project-url] |
|
||||
|
||||
V2 brings support of multiple source and header files.
|
||||
<!-- Testing badges -->
|
||||
[python-version]: https://img.shields.io/pypi/pyversions/nvcc4jupyter
|
||||
[test-badge]: https://github.com/cosminc98/nvcc4jupyter/actions/workflows/test.yml/badge.svg
|
||||
[test-workflow]: https://github.com/cosminc98/nvcc4jupyter/actions/workflows/test.yml
|
||||
[coverage-badge]: https://codecov.io/github/cosminc98/nvcc4jupyter/coverage.svg?branch=master
|
||||
[coverage-results]: https://codecov.io/gh/cosminc98/nvcc4jupyter
|
||||
|
||||
##### Usage
|
||||
<!-- Code Quality badges -->
|
||||
[black-badge]: https://img.shields.io/badge/code%20style-black-000000.svg
|
||||
[black-project]: https://github.com/ambv/black
|
||||
[bandit-badge]: https://img.shields.io/badge/security-bandit-yellow.svg
|
||||
[bandit-project]: https://github.com/PyCQA/bandit
|
||||
|
||||
- Install and load extension
|
||||
```
|
||||
!pip install git+https://github.com/andreinechaev/nvcc4jupyter.git
|
||||
%load_ext nvcc_plugin
|
||||
<!-- Package badges -->
|
||||
[pypi-project-url]: https://pypi.org/project/nvcc4jupyter/
|
||||
[pypi-latest-version]: https://img.shields.io/pypi/v/nvcc4jupyter.svg
|
||||
[pypi-downloads]: https://img.shields.io/pypi/dm/nvcc4jupyter.svg?label=PyPI%20downloads
|
||||
|
||||
**nvcc4jupyter** is a Jupyter Notebook plugin that provides cell and line
|
||||
[magics](https://ipython.readthedocs.io/en/stable/interactive/magics.html)
|
||||
to allow running CUDA C++ code from a notebook. This is especially
|
||||
useful when combined with a hosted service such a Google's
|
||||
[Colab](https://colab.research.google.com/) which provide CUDA capable GPUs
|
||||
and you can start learning CUDA C++ without having to install anything or even
|
||||
to own a GPU yourself.
|
||||
|
||||
## Table of Contents
|
||||
|
||||
- [Main Features](#main-features)
|
||||
- [Install](#install)
|
||||
- [Usage](#usage)
|
||||
- [License](#license)
|
||||
- [Documentation](#documentation)
|
||||
- [Contributing](#contributing)
|
||||
|
||||
## Main Features
|
||||
Here are just a few of the things that nvcc4jupyter does well:
|
||||
|
||||
- [Easily run CUDA C++ code](https://nvcc4jupyter.readthedocs.io/en/latest/usage.html#hello-world)
|
||||
- [Profile your code with NVIDIA Nsight Compute](https://nvcc4jupyter.readthedocs.io/en/latest/usage.html#profiling)
|
||||
- [Share code between different programs in the same notebook / split your code into multiple files for improved readability](https://nvcc4jupyter.readthedocs.io/en/latest/usage.html#groups)
|
||||
|
||||
## Install
|
||||
The installer for the latest released version is available at the [Python
|
||||
Package Index (PyPI)](https://pypi.org/project/nvcc4jupyter).
|
||||
|
||||
```sh
|
||||
pip install nvcc4jupyter
|
||||
```
|
||||
|
||||
- Mark a cell to be treated as cuda cell
|
||||
> `%%cuda --name example.cu --compile false`
|
||||
>> NOTE: The cell must contain either code or comments to be run successfully.
|
||||
>> It accepts 2 arguments. `-n` | `--name` - which is the name of either CUDA source or Header
|
||||
>> The name parameter must have extension `.cu` or `.h`
|
||||
>> Second argument `-c` | `--compile`; default value is `false`. The argument is a flag to specify
|
||||
>> if the cell will be compiled and run right away or not. It might be usefull if you're playing in
|
||||
>> the `main` function
|
||||
## Usage
|
||||
|
||||
- To compile and run all CUDA files you need to run
|
||||
First, load the extension to enable the magic commands:
|
||||
```
|
||||
%%cuda_run
|
||||
# This line just to bypass an exeption and can contain any text
|
||||
%load_ext nvcc4jupyter
|
||||
```
|
||||
|
||||
- To profile your CUDA kernels using NVIDIA Nsight Compute CLI profiler you need to run
|
||||
Running a quick CUDA Hello World program:
|
||||
```c++
|
||||
%%cuda
|
||||
#include <stdio.h>
|
||||
|
||||
__global__ void hello(){
|
||||
printf("Hello from block: %u, thread: %u\n", blockIdx.x, threadIdx.x);
|
||||
}
|
||||
|
||||
int main(){
|
||||
hello<<<2, 2>>>();
|
||||
cudaDeviceSynchronize();
|
||||
}
|
||||
```
|
||||
%%cu --profile
|
||||
|
||||
For more advanced use cases, see [the documentation](https://nvcc4jupyter.readthedocs.io/en/latest/usage.html).
|
||||
|
||||
## Documentation
|
||||
The official documentation is hosted on [readthedocs](https://nvcc4jupyter.readthedocs.io/).
|
||||
|
||||
## License
|
||||
[MIT](LICENSE)
|
||||
|
||||
## Contributing
|
||||
|
||||
Install the package with the development dependencies:
|
||||
```bash
|
||||
pip install .[dev]
|
||||
```
|
||||
- You can add options to the profiler. Keep in mind that any argument after "--profiler-args" will be considered as a profiler argument. For example, to select which sections to collect metrics for you need to run
|
||||
```
|
||||
%%cu --profile --profiler-args --section SpeedOfLight --section MemoryWorkloadAnalysis --section Occupancy
|
||||
|
||||
As a developer, make sure you install the pre-commit hook before commiting any changes:
|
||||
```bash
|
||||
pre-commit install
|
||||
```
|
||||
|
||||
<hr>
|
||||
|
||||
[Go to Top](#table-of-contents)
|
||||
|
||||
@@ -1,32 +0,0 @@
|
||||
import argparse
|
||||
|
||||
|
||||
def get_argparser():
|
||||
parser = argparse.ArgumentParser(description='NVCCPlugin params')
|
||||
parser.add_argument(
|
||||
'-t',
|
||||
'--timeit',
|
||||
action='store_true',
|
||||
help='If set, returns the output of the "timeit" built-in ipython magic instead of stdout.',
|
||||
)
|
||||
parser.add_argument(
|
||||
'-p',
|
||||
'--profile',
|
||||
action='store_true',
|
||||
help='If set, runs the nvidia nsight compute profiler. Has no effect if used with --timeit.',
|
||||
)
|
||||
parser.add_argument(
|
||||
'-a',
|
||||
'--profiler-args',
|
||||
type=str,
|
||||
nargs=argparse.REMAINDER,
|
||||
default=[],
|
||||
help='Extra options that can be passed to the nvidia nsight compute profiler. '
|
||||
'Must be the last option given to the argument parser so you can pass arguments with dashes.',
|
||||
)
|
||||
return parser
|
||||
|
||||
|
||||
def print_out(out: str):
|
||||
for l in out.split('\n'):
|
||||
print(l)
|
||||
@@ -0,0 +1,20 @@
|
||||
# Minimal makefile for Sphinx documentation
|
||||
#
|
||||
|
||||
# You can set these variables from the command line, and also
|
||||
# from the environment for the first two.
|
||||
SPHINXOPTS ?=
|
||||
SPHINXBUILD ?= sphinx-build
|
||||
SOURCEDIR = source
|
||||
BUILDDIR = build
|
||||
|
||||
# Put it first so that "make" without argument is like "make help".
|
||||
help:
|
||||
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||
|
||||
.PHONY: help Makefile
|
||||
|
||||
# Catch-all target: route all unknown targets to Sphinx using the new
|
||||
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
|
||||
%: Makefile
|
||||
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||
@@ -0,0 +1,35 @@
|
||||
@ECHO OFF
|
||||
|
||||
pushd %~dp0
|
||||
|
||||
REM Command file for Sphinx documentation
|
||||
|
||||
if "%SPHINXBUILD%" == "" (
|
||||
set SPHINXBUILD=sphinx-build
|
||||
)
|
||||
set SOURCEDIR=source
|
||||
set BUILDDIR=build
|
||||
|
||||
if "%1" == "" goto help
|
||||
|
||||
%SPHINXBUILD% >NUL 2>NUL
|
||||
if errorlevel 9009 (
|
||||
echo.
|
||||
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
|
||||
echo.installed, then set the SPHINXBUILD environment variable to point
|
||||
echo.to the full path of the 'sphinx-build' executable. Alternatively you
|
||||
echo.may add the Sphinx directory to PATH.
|
||||
echo.
|
||||
echo.If you don't have Sphinx installed, grab it from
|
||||
echo.http://sphinx-doc.org/
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
|
||||
goto end
|
||||
|
||||
:help
|
||||
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
|
||||
|
||||
:end
|
||||
popd
|
||||
@@ -0,0 +1,2 @@
|
||||
sphinx==7.1.2
|
||||
sphinx-rtd-theme==1.3.0rc1
|
||||
@@ -0,0 +1,40 @@
|
||||
# Configuration file for the Sphinx documentation builder.
|
||||
#
|
||||
# For the full list of built-in configuration values, see the documentation:
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html
|
||||
|
||||
# -- Project information -----------------------------------------------------
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
|
||||
|
||||
project = "nvcc4jupyter"
|
||||
copyright = "2024, Andrei Nechaev & Cosmin Stefan Ciocan"
|
||||
author = "Andrei Nechaev & Cosmin Stefan Ciocan"
|
||||
release = "1.0.1"
|
||||
version = "1.0.1"
|
||||
|
||||
# -- General configuration ---------------------------------------------------
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
|
||||
|
||||
extensions = [
|
||||
"sphinx.ext.duration",
|
||||
"sphinx.ext.doctest",
|
||||
"sphinx.ext.autodoc",
|
||||
"sphinx.ext.autosummary",
|
||||
"sphinx.ext.intersphinx",
|
||||
]
|
||||
|
||||
intersphinx_mapping = {
|
||||
"python": ("https://docs.python.org/3/", None),
|
||||
"sphinx": ("https://www.sphinx-doc.org/en/master/", None),
|
||||
}
|
||||
intersphinx_disabled_domains = ["std"]
|
||||
|
||||
templates_path = ["_templates"]
|
||||
exclude_patterns = []
|
||||
|
||||
|
||||
# -- Options for HTML output -------------------------------------------------
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
|
||||
|
||||
html_theme = "sphinx_rtd_theme"
|
||||
html_static_path = ["_static"]
|
||||
@@ -0,0 +1,13 @@
|
||||
Welcome to nvcc4jupyter's documentation!
|
||||
========================================
|
||||
|
||||
This IPython extension allows running CUDA C++ code in Jupyter notebook. This
|
||||
is especially useful when combined with `Google Colab <https://colab.research.google.com/>`_
|
||||
which provides CUDA capable GPUs with the CUDA toolkit already installed.
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
:caption: Contents:
|
||||
|
||||
usage
|
||||
magics
|
||||
@@ -0,0 +1,172 @@
|
||||
**********
|
||||
Magics API
|
||||
**********
|
||||
|
||||
.. note::
|
||||
Arguments for profilers and the nvcc compiler can be passed in double
|
||||
quotes so they can contain spaces and dashes.
|
||||
|
||||
------
|
||||
|
||||
.. _cuda_magic:
|
||||
|
||||
cuda
|
||||
====
|
||||
|
||||
Magic command that compiles, runs, and profiles CUDA C++ code in the cell.
|
||||
|
||||
Usage
|
||||
-----
|
||||
|
||||
- ``%%cuda``: Compile and run this cell.
|
||||
- ``%%cuda -p``: Also runs the Nsight Compute profiler.
|
||||
- ``%%cuda -p -a "<SPACE SEPARATED PROFILER ARGS>"``: Also runs the Nsight Compute profiler.
|
||||
- ``%%cuda -t``: Outputs the "timeit" built-in magic results.
|
||||
|
||||
Options
|
||||
-------
|
||||
|
||||
-t, --timeit
|
||||
Boolean. If set, returns the output of the "timeit" built-in
|
||||
ipython magic instead of stdout.
|
||||
|
||||
-p, --profile
|
||||
Boolean. If set, runs the NVIDIA Nsight Compute profiler whose
|
||||
output is appended to standard output.
|
||||
|
||||
-a, --profiler-args
|
||||
String. Optional profiler arguments that can be space separated
|
||||
by wrapping them in double quotes. See all options here:
|
||||
`Nsight Compute CLI <https://docs.nvidia.com/nsight-compute/NsightComputeCli/index.html#command-line-options>`_
|
||||
|
||||
.. note::
|
||||
If both "\-\-profile" and "\-\-timeit" are used then no profiling is
|
||||
done.
|
||||
|
||||
Examples
|
||||
--------
|
||||
::
|
||||
|
||||
# compile, run, and profile the code in the cell with the Nsight
|
||||
# compute profiler while collecting only metrics from the
|
||||
# "MemoryWorkloadAnalysis" section.
|
||||
%%cuda --profile --profiler-args "--section MemoryWorkloadAnalysis"
|
||||
|
||||
------
|
||||
|
||||
.. _cuda_group_save_magic:
|
||||
|
||||
cuda_group_save
|
||||
===============
|
||||
|
||||
Magic command that saves CUDA C++ code in the cell for later
|
||||
compilation and execution with possibly more source files.
|
||||
|
||||
Usage
|
||||
-----
|
||||
|
||||
- ``%%cuda_group_save -n <FILENAME> -g <GROUPNAME>``: Save the code in the current cell to a group of source files.
|
||||
|
||||
Options
|
||||
-------
|
||||
|
||||
-n, --name
|
||||
String. Required file name of the saved source file. Must have
|
||||
either the ".cu" or ".h" extension. In order to import a header
|
||||
file saved with this magic you can simply add '#include "<name>"'.
|
||||
|
||||
-g, --group
|
||||
String. Required group name to which to add the saved source file.
|
||||
Groups are source files that get compiled together and do not
|
||||
interact with other groups. This allows you to have multiple
|
||||
unrelated CUDA programs within the same jupyter notebook. Adding
|
||||
files to a group named "shared" will make them available to all
|
||||
other source file groups. One use case for the shared group is for
|
||||
sharing error handling code which should be present in all CUDA
|
||||
programs.
|
||||
|
||||
Examples
|
||||
--------
|
||||
::
|
||||
|
||||
# jupyter cell 1
|
||||
%%cuda_group_save -n "error_handling.h" -g "shared"
|
||||
<ERROR HANDLING CODE>
|
||||
|
||||
# jupyter cell 2
|
||||
%%cuda_group_save -n "main.cu" -g "example_group"
|
||||
#include "error_handling.h"
|
||||
<YOUR CODE HERE>
|
||||
|
||||
------
|
||||
|
||||
.. _cuda_group_run_magic:
|
||||
|
||||
cuda_group_run
|
||||
==============
|
||||
|
||||
Line magic command that compiles, runs, and profiles all source files
|
||||
in a group.
|
||||
|
||||
Usage
|
||||
-----
|
||||
|
||||
- ``%%cuda_group_run -g <GROUPNAME>``: Compiles, runs, and profiles the sources files in the given group.
|
||||
|
||||
Options
|
||||
-------
|
||||
|
||||
-g, --group
|
||||
String. Required group name whose source files should be deleted.
|
||||
|
||||
.. note::
|
||||
All options from the "%%cuda" cell magic are inherited.
|
||||
|
||||
Examples
|
||||
--------
|
||||
::
|
||||
|
||||
# jupyter cell 1
|
||||
%%cuda_group_save -n "error_handling.h" -g "shared"
|
||||
<ERROR HANDLING CODE>
|
||||
|
||||
# jupyter cell 2
|
||||
%%cuda_group_save -n "main.cu" -g "example_group"
|
||||
#include "error_handling.h"
|
||||
<YOUR CODE HERE>
|
||||
|
||||
# jupyter cell 3
|
||||
%cuda_group_run -g "example_group" --profile
|
||||
|
||||
-----
|
||||
|
||||
.. _cuda_group_delete_magic:
|
||||
|
||||
cuda_group_delete
|
||||
=================
|
||||
|
||||
Line magic command that deletes all source files in a group.
|
||||
|
||||
Usage
|
||||
-----
|
||||
|
||||
- ``%%cuda_group_delete -g <GROUPNAME>``: Removes all source files in the given group.
|
||||
|
||||
Options
|
||||
-------
|
||||
|
||||
-g, --group
|
||||
String. Required group name whose source files should be deleted.
|
||||
|
||||
Examples
|
||||
--------
|
||||
::
|
||||
|
||||
# jupyter cell 1
|
||||
%%cuda_group_save -n "error_handling.h" -g "shared"
|
||||
<ERROR HANDLING CODE>
|
||||
|
||||
# jupyter cell 2 - here we delete the error shared group; in
|
||||
# practice this would be helpful if you want to overwrite some
|
||||
# functionality that was defined earlier in the notebook
|
||||
%cuda_group_delete -g "shared"
|
||||
@@ -0,0 +1,257 @@
|
||||
Usage
|
||||
=====
|
||||
|
||||
Installation
|
||||
------------
|
||||
|
||||
To use nvcc4jupyter, first install it using pip:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
(venv) $ pip install nvcc4jupyter
|
||||
|
||||
Load the Extension
|
||||
------------------
|
||||
|
||||
Now we need to load the IPython extension to be able to use its cell and line
|
||||
magic commands:
|
||||
|
||||
.. code-block::
|
||||
|
||||
%load_ext nvcc4jupyter
|
||||
|
||||
Hello World
|
||||
-----------
|
||||
|
||||
We will use the :ref:`cuda <cuda_magic>` cell magic command to run a simple
|
||||
hello world program.
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
%%cuda
|
||||
#include <stdio.h>
|
||||
|
||||
__global__ void hello(){
|
||||
printf("Hello from block: %u, thread: %u\n", blockIdx.x, threadIdx.x);
|
||||
}
|
||||
|
||||
int main(){
|
||||
hello<<<2, 2>>>();
|
||||
cudaDeviceSynchronize();
|
||||
}
|
||||
|
||||
Groups
|
||||
------
|
||||
|
||||
Now we will demonstrate a more complex scenario that uses source file groups.
|
||||
If you want to split your code into multiple source files, either for code reuse
|
||||
or just to have an easier to read project, you want to use groups. A group of
|
||||
source files will be compiled together. Because of this, you can include headers
|
||||
from the same group and use the code defined in other ".cu" files. There is also
|
||||
a special group named "shared" whose files will be compiled together with all
|
||||
other groups, which is a great feature for error handling code as we'll show now:
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
%%cuda_group_save --group shared --name "error_handling.h"
|
||||
// error checking macro
|
||||
#define cudaCheckErrors(msg) \
|
||||
do { \
|
||||
cudaError_t __err = cudaGetLastError(); \
|
||||
if (__err != cudaSuccess) { \
|
||||
fprintf(stderr, "Fatal error: %s (%s at %s:%d)\n", \
|
||||
msg, cudaGetErrorString(__err), \
|
||||
__FILE__, __LINE__); \
|
||||
fprintf(stderr, "*** FAILED - ABORTING\n"); \
|
||||
exit(1); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
Now we can use that error handling macro in this vector addition program but
|
||||
also in other programs that we define in other Jupyter cells:
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
%%cuda
|
||||
#include <stdio.h>
|
||||
#include "error_handling.h"
|
||||
|
||||
const int DSIZE = 4096;
|
||||
const int block_size = 256;
|
||||
|
||||
// vector add kernel: C = A + B
|
||||
__global__ void vadd(const float *A, const float *B, float *C, int ds){
|
||||
int idx = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
if (idx < ds) {
|
||||
C[idx] = A[idx] + B[idx];
|
||||
}
|
||||
}
|
||||
|
||||
int main(){
|
||||
float *h_A, *h_B, *h_C, *d_A, *d_B, *d_C;
|
||||
|
||||
// allocate space for vectors in host memory
|
||||
h_A = new float[DSIZE];
|
||||
h_B = new float[DSIZE];
|
||||
h_C = new float[DSIZE];
|
||||
|
||||
// initialize vectors in host memory to random values (except for the
|
||||
// result vector whose values do not matter as they will be overwritten)
|
||||
for (int i = 0; i < DSIZE; i++) {
|
||||
h_A[i] = rand()/(float)RAND_MAX;
|
||||
h_B[i] = rand()/(float)RAND_MAX;
|
||||
}
|
||||
|
||||
// allocate space for vectors in device memory
|
||||
cudaMalloc(&d_A, DSIZE*sizeof(float));
|
||||
cudaMalloc(&d_B, DSIZE*sizeof(float));
|
||||
cudaMalloc(&d_C, DSIZE*sizeof(float));
|
||||
cudaCheckErrors("cudaMalloc failure"); // error checking
|
||||
|
||||
// copy vectors A and B from host to device:
|
||||
cudaMemcpy(d_A, h_A, DSIZE*sizeof(float), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_B, h_B, DSIZE*sizeof(float), cudaMemcpyHostToDevice);
|
||||
cudaCheckErrors("cudaMemcpy H2D failure");
|
||||
|
||||
// launch the vector adding kernel
|
||||
vadd<<<(DSIZE+block_size-1)/block_size, block_size>>>(d_A, d_B, d_C, DSIZE);
|
||||
cudaCheckErrors("kernel launch failure");
|
||||
|
||||
// wait for the kernel to finish execution
|
||||
cudaDeviceSynchronize();
|
||||
cudaCheckErrors("kernel execution failure");
|
||||
|
||||
cudaMemcpy(h_C, d_C, DSIZE*sizeof(float), cudaMemcpyDeviceToHost);
|
||||
cudaCheckErrors("cudaMemcpy D2H failure");
|
||||
|
||||
printf("A[0] = %f\n", h_A[0]);
|
||||
printf("B[0] = %f\n", h_B[0]);
|
||||
printf("C[0] = %f\n", h_C[0]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
Above we use the :ref:`cuda <cuda_magic>` magic command which saves the code
|
||||
in the cell to an anonymous source file group, compiles, and executes that
|
||||
code. This only allows us to have one source file (besides the ones in the
|
||||
"shared" group). In order to have multiple source files we need to use the
|
||||
:ref:`cuda_group_save <cuda_group_save_magic>` and
|
||||
:ref:`cuda_group_run <cuda_group_run_magic>` magics.
|
||||
|
||||
First, we save the vector addition function to its own file:
|
||||
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
%%cuda_group_save --name "vector_add.cu" --group "vector_add"
|
||||
// vector add kernel: C = A + B
|
||||
__global__ void vadd(const float *A, const float *B, float *C, int ds){
|
||||
int idx = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
if (idx < ds) {
|
||||
C[idx] = A[idx] + B[idx];
|
||||
}
|
||||
}
|
||||
|
||||
Now we create a header file so the main cuda file knows the signature of "vadd":
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
%%cuda_group_save --name "vector_add.h" --group "vector_add"
|
||||
__global__ void vadd(const float *A, const float *B, float *C, int ds);
|
||||
|
||||
To tie it all together, we save the main cuda file, which includes our vector
|
||||
addition code:
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
%%cuda_group_save --name "main.cu" --group "vector_add"
|
||||
#include <stdio.h>
|
||||
#include "error_handling.h"
|
||||
#include "vector_add.h"
|
||||
|
||||
const int DSIZE = 4096;
|
||||
const int block_size = 256;
|
||||
|
||||
int main(){
|
||||
float *h_A, *h_B, *h_C, *d_A, *d_B, *d_C;
|
||||
|
||||
// allocate space for vectors in host memory
|
||||
h_A = new float[DSIZE];
|
||||
h_B = new float[DSIZE];
|
||||
h_C = new float[DSIZE];
|
||||
|
||||
// initialize vectors in host memory to random values (except for the
|
||||
// result vector whose values do not matter as they will be overwritten)
|
||||
for (int i = 0; i < DSIZE; i++) {
|
||||
h_A[i] = rand()/(float)RAND_MAX;
|
||||
h_B[i] = rand()/(float)RAND_MAX;
|
||||
}
|
||||
|
||||
// allocate space for vectors in device memory
|
||||
cudaMalloc(&d_A, DSIZE*sizeof(float));
|
||||
cudaMalloc(&d_B, DSIZE*sizeof(float));
|
||||
cudaMalloc(&d_C, DSIZE*sizeof(float));
|
||||
cudaCheckErrors("cudaMalloc failure"); // error checking
|
||||
|
||||
// copy vectors A and B from host to device:
|
||||
cudaMemcpy(d_A, h_A, DSIZE*sizeof(float), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_B, h_B, DSIZE*sizeof(float), cudaMemcpyHostToDevice);
|
||||
cudaCheckErrors("cudaMemcpy H2D failure");
|
||||
|
||||
// launch the vector adding kernel
|
||||
vadd<<<(DSIZE+block_size-1)/block_size, block_size>>>(d_A, d_B, d_C, DSIZE);
|
||||
cudaCheckErrors("kernel launch failure");
|
||||
|
||||
// wait for the kernel to finish execution
|
||||
cudaDeviceSynchronize();
|
||||
cudaCheckErrors("kernel execution failure");
|
||||
|
||||
cudaMemcpy(h_C, d_C, DSIZE*sizeof(float), cudaMemcpyDeviceToHost);
|
||||
cudaCheckErrors("cudaMemcpy D2H failure");
|
||||
|
||||
printf("A[0] = %f\n", h_A[0]);
|
||||
printf("B[0] = %f\n", h_B[0]);
|
||||
printf("C[0] = %f\n", h_C[0]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
Now we can compile all the source files in the group and execute the main
|
||||
function with the following command:
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
%cuda_group_run --group "vector_add"
|
||||
|
||||
Profiling
|
||||
---------
|
||||
|
||||
Another important feature of nvcc4jupyter is its integration with the NVIDIA
|
||||
Nsight Compute profiler, which you need to make sure is installed and its
|
||||
executable can be found in a directory in your PATH environment variable.
|
||||
|
||||
In order to use it and provide the profiler with custom arguments, simply run:
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
%cuda_group_run --group "vector_add" --profile --profiler-args "--section SpeedOfLight"
|
||||
|
||||
Running the cell above will compile and execute the vector addition code in the
|
||||
"vector_add" group and profile it, keeping only the metrics from the
|
||||
"SpeedOfLight" section. The output will contain something similar to:
|
||||
|
||||
.. code-block::
|
||||
|
||||
Section: GPU Speed Of Light Throughput
|
||||
----------------------- ------------- ------------
|
||||
Metric Name Metric Unit Metric Value
|
||||
----------------------- ------------- ------------
|
||||
DRAM Frequency cycle/nsecond 4.65
|
||||
SM Frequency cycle/usecond 544.31
|
||||
Elapsed Cycles cycle 2,145
|
||||
Memory Throughput % 3.19
|
||||
DRAM Throughput % 3.19
|
||||
Duration usecond 3.94
|
||||
L1/TEX Cache Throughput % 6.67
|
||||
L2 Cache Throughput % 1.98
|
||||
SM Active Cycles cycle 383.65
|
||||
Compute (SM) Throughput % 1.19
|
||||
----------------------- ------------- ------------
|
||||
@@ -0,0 +1,7 @@
|
||||
"""
|
||||
nvcc4jupyter: CUDA C++ plugin for Jupyter Notebook
|
||||
"""
|
||||
|
||||
from .plugin import NVCCPlugin, load_ipython_extension # noqa: F401
|
||||
|
||||
__version__ = "1.0.3"
|
||||
@@ -0,0 +1,69 @@
|
||||
"""
|
||||
Parsers for the CUDA magic commands.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
|
||||
|
||||
def get_parser_cuda() -> argparse.ArgumentParser:
|
||||
"""
|
||||
%%cuda magic command parser.
|
||||
"""
|
||||
parser = argparse.ArgumentParser(
|
||||
description=(
|
||||
"%%cuda magic that compiles and runs CUDA C++ code in this cell."
|
||||
" See https://nvcc4jupyter.readthedocs.io/en/latest/magics.html#cuda" # noqa: E501
|
||||
" for usage details."
|
||||
)
|
||||
)
|
||||
parser.add_argument("-t", "--timeit", action="store_true")
|
||||
parser.add_argument("-p", "--profile", action="store_true")
|
||||
parser.add_argument("-a", "--profiler-args", type=str, default="")
|
||||
return parser
|
||||
|
||||
|
||||
def get_parser_cuda_group_run() -> argparse.ArgumentParser:
|
||||
"""
|
||||
%%cuda_group_run magic command parser.
|
||||
"""
|
||||
parser = get_parser_cuda()
|
||||
parser.description = (
|
||||
"%%cuda_group_run magic that compiles and runs source files in a given"
|
||||
" group. See"
|
||||
" https://nvcc4jupyter.readthedocs.io/en/latest/magics.html#cuda-group-run" # noqa: E501
|
||||
" for usage details."
|
||||
)
|
||||
parser.add_argument("-g", "--group", type=str, required=True)
|
||||
return parser
|
||||
|
||||
|
||||
def get_parser_cuda_group_save() -> argparse.ArgumentParser:
|
||||
"""
|
||||
%%cuda_group_save magic command parser.
|
||||
"""
|
||||
parser = argparse.ArgumentParser(
|
||||
description=(
|
||||
"%%cuda_group_save magic that saves CUDA C++ code in this cell for"
|
||||
" later compilation and execution with possibly more source files."
|
||||
" See https://nvcc4jupyter.readthedocs.io/en/latest/magics.html#cuda-group-save" # noqa: E501
|
||||
" for usage details."
|
||||
)
|
||||
)
|
||||
parser.add_argument("-n", "--name", type=str, required=True)
|
||||
parser.add_argument("-g", "--group", type=str, required=True)
|
||||
return parser
|
||||
|
||||
|
||||
def get_parser_cuda_group_delete() -> argparse.ArgumentParser:
|
||||
"""
|
||||
%%cuda_group_delete magic command parser.
|
||||
"""
|
||||
parser = argparse.ArgumentParser(
|
||||
description=(
|
||||
"%%cuda_group_delete magic that deletes all files in a group. See"
|
||||
" https://nvcc4jupyter.readthedocs.io/en/latest/magics.html#cuda-group-delete" # noqa: E501
|
||||
" for usage details."
|
||||
)
|
||||
)
|
||||
parser.add_argument("-g", "--group", type=str, required=True)
|
||||
return parser
|
||||
@@ -0,0 +1,321 @@
|
||||
"""
|
||||
nvcc4jupyter: CUDA C++ plugin for Jupyter Notebook
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import glob
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
import uuid
|
||||
from typing import List, Optional
|
||||
|
||||
# pylint: disable=import-error
|
||||
from IPython.core.interactiveshell import InteractiveShell
|
||||
from IPython.core.magic import Magics, cell_magic, line_magic, magics_class
|
||||
|
||||
from . import parsers
|
||||
|
||||
DEFAULT_EXEC_FNAME = "cuda_exec.out"
|
||||
SHARED_GROUP_NAME = "shared"
|
||||
|
||||
|
||||
def print_out(out: str):
|
||||
"""Print string line by line."""
|
||||
for line in out.split("\n"):
|
||||
print(line)
|
||||
|
||||
|
||||
@magics_class
|
||||
class NVCCPlugin(Magics):
|
||||
"""
|
||||
CUDA C++ plugin for Jupyter Notebook
|
||||
"""
|
||||
|
||||
def __init__(self, shell: InteractiveShell):
|
||||
super().__init__(shell)
|
||||
self.shell: InteractiveShell # type hint not provided by parent class
|
||||
|
||||
self.parser_cuda = parsers.get_parser_cuda()
|
||||
self.parser_cuda_group_save = parsers.get_parser_cuda_group_save()
|
||||
self.parser_cuda_group_delete = parsers.get_parser_cuda_group_delete()
|
||||
self.parser_cuda_group_run = parsers.get_parser_cuda_group_run()
|
||||
|
||||
self.workdir = tempfile.mkdtemp()
|
||||
print(f'Source files will be saved in "{self.workdir}".')
|
||||
|
||||
def _save_source(
|
||||
self, source_name: str, source_code: str, group_name: str
|
||||
) -> None:
|
||||
"""
|
||||
Save source code as a .cu or .h file in the group directory where
|
||||
files can be compiled together. Saving a source file to the group
|
||||
named "shared" will make those source files available when compiling
|
||||
any group.
|
||||
|
||||
Args:
|
||||
source_name: The name of the source file. Must end in ".cu" or
|
||||
".h".
|
||||
source_code: The source code to be written to the source file.
|
||||
group_name: The name of the group directory where the file will be
|
||||
saved.
|
||||
|
||||
Raises:
|
||||
ValueError: If the source name does not have a proper extension.
|
||||
"""
|
||||
_, ext = os.path.splitext(source_name)
|
||||
if ext not in (".cu", ".h"):
|
||||
raise ValueError(
|
||||
f'Given source name "{source_name}" must end in ".h" or ".cu".'
|
||||
)
|
||||
group_dirpath = os.path.join(self.workdir, group_name)
|
||||
os.makedirs(group_dirpath, exist_ok=True)
|
||||
source_fpath = os.path.join(group_dirpath, source_name)
|
||||
with open(source_fpath, "w", encoding="utf-8") as f:
|
||||
f.write(source_code)
|
||||
|
||||
def _delete_group(self, group_name: str) -> None:
|
||||
"""
|
||||
Removes all source files from the given group.
|
||||
|
||||
Args:
|
||||
group_name: The name of the source files group.
|
||||
"""
|
||||
group_dirpath = os.path.join(self.workdir, group_name)
|
||||
if os.path.exists(group_dirpath):
|
||||
shutil.rmtree(group_dirpath)
|
||||
|
||||
def _compile(
|
||||
self, group_name: str, executable_fname: str = DEFAULT_EXEC_FNAME
|
||||
) -> str:
|
||||
"""
|
||||
Compiles all source files in a given group together with all source
|
||||
files from the group named "shared".
|
||||
|
||||
Args:
|
||||
group_name: The name of the source file group to be compiled.
|
||||
executable_fname: The output executable file name. Defaults to
|
||||
"cuda_exec.out".
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the group does not exist or if does not have any
|
||||
source files associated with it.
|
||||
|
||||
Returns:
|
||||
The file path of the resulted executable file.
|
||||
"""
|
||||
shared_dirpath = os.path.join(self.workdir, SHARED_GROUP_NAME)
|
||||
group_dirpath = os.path.join(self.workdir, group_name)
|
||||
if not os.path.exists(group_dirpath):
|
||||
raise RuntimeError(f'Group "{group_name}" does not exist.')
|
||||
|
||||
source_files = list(glob.glob(os.path.join(group_dirpath, "*.cu")))
|
||||
if len(source_files) == 0:
|
||||
raise RuntimeError(
|
||||
f'Group "{group_name}" does not have any source files.'
|
||||
)
|
||||
source_files.extend(
|
||||
list(glob.glob(os.path.join(shared_dirpath, "*.cu")))
|
||||
)
|
||||
|
||||
executable_fpath = os.path.join(group_dirpath, executable_fname)
|
||||
|
||||
args = [
|
||||
"nvcc",
|
||||
"-I" + shared_dirpath + "," + group_dirpath,
|
||||
]
|
||||
args.extend(source_files)
|
||||
args.extend(
|
||||
[
|
||||
"-o",
|
||||
executable_fpath,
|
||||
"-Wno-deprecated-gpu-targets",
|
||||
]
|
||||
)
|
||||
subprocess.check_output(args, stderr=subprocess.STDOUT)
|
||||
|
||||
return executable_fpath
|
||||
|
||||
def _run(
|
||||
self,
|
||||
exec_fpath: str,
|
||||
timeit: bool = False,
|
||||
profile: bool = False,
|
||||
profiler_args: str = "",
|
||||
) -> str:
|
||||
"""
|
||||
Runs a CUDA executable.
|
||||
|
||||
Args:
|
||||
exec_fpath: The file path of the executable.
|
||||
timeit: If True, returns the result of the "timeit" magic instead
|
||||
of the standard output of the CUDA process. Defaults to False.
|
||||
profile: If True, the executable is profiled with NVIDIA Nsight
|
||||
Compute profiling tool and its output is added to stdout.
|
||||
Defaults to False.
|
||||
profiler_args: The profiler arguments used to customize the
|
||||
information gathered by it and its overall behaviour. Defaults
|
||||
to an empty string.
|
||||
|
||||
Returns:
|
||||
The standard output of the CUDA process or the "timeit" magic
|
||||
output.
|
||||
"""
|
||||
if timeit:
|
||||
stmt = (
|
||||
f"subprocess.check_output(['{exec_fpath}'],"
|
||||
" stderr=subprocess.STDOUT)"
|
||||
)
|
||||
output = self.shell.run_cell_magic(
|
||||
magic_name="timeit", line="-q -o import subprocess", cell=stmt
|
||||
)
|
||||
# convert TimeitResult object to human readable string
|
||||
output = str(output)
|
||||
else:
|
||||
run_args = []
|
||||
if profile:
|
||||
run_args.extend(["ncu"] + profiler_args.split())
|
||||
run_args.append(exec_fpath)
|
||||
output = subprocess.check_output(
|
||||
run_args, stderr=subprocess.STDOUT
|
||||
)
|
||||
output = output.decode("utf8")
|
||||
|
||||
return output
|
||||
|
||||
def _compile_and_run(
|
||||
self, group_name: str, args: argparse.Namespace
|
||||
) -> str:
|
||||
try:
|
||||
exec_fpath = self._compile(group_name)
|
||||
output = self._run(
|
||||
exec_fpath=exec_fpath,
|
||||
timeit=args.timeit,
|
||||
profile=args.profile,
|
||||
profiler_args=args.profiler_args,
|
||||
)
|
||||
except subprocess.CalledProcessError as e:
|
||||
output = e.output.decode("utf8")
|
||||
return output
|
||||
|
||||
def _read_args(
|
||||
self, line: str, parser: argparse.ArgumentParser
|
||||
) -> Optional[argparse.Namespace]:
|
||||
"""
|
||||
Read arguments from the magic line. Makes sure to keep arguments
|
||||
between double quotes together for use with profiler arguments or
|
||||
compiler arguments.
|
||||
|
||||
Args:
|
||||
line: The arguments on the line of the magic call in the jupyter
|
||||
cell.
|
||||
parser: The parser which will process the arguments after they are
|
||||
correctly tokenized.
|
||||
|
||||
Returns:
|
||||
The parsed arguments.
|
||||
"""
|
||||
tokens = line.strip().split('"')
|
||||
args_tokenized: List[str] = []
|
||||
for index, tok in enumerate(tokens):
|
||||
if index % 2 == 0:
|
||||
# tokens found outside double quotes are split at whitespace
|
||||
args_tokenized.extend(tok.split(" "))
|
||||
else:
|
||||
# anything found between double quotes will not be split
|
||||
args_tokenized.append(tok)
|
||||
args_tokenized = [arg for arg in args_tokenized if len(arg) > 0]
|
||||
|
||||
try:
|
||||
return parser.parse_args(args_tokenized)
|
||||
except SystemExit:
|
||||
parser.print_help()
|
||||
return None
|
||||
|
||||
@cell_magic
|
||||
def cuda(self, line: str, cell: str) -> None:
|
||||
"""Compile and run the CUDA code in the cell.
|
||||
|
||||
Args:
|
||||
line: The arguments on the line of the magic call in the jupyter
|
||||
cell.
|
||||
cell: All of the lines in the jupyter cell besides the magic call
|
||||
itself. It should contain all of the source code to be
|
||||
compiled and run.
|
||||
"""
|
||||
args = self._read_args(line, self.parser_cuda)
|
||||
if args is None:
|
||||
return
|
||||
|
||||
group_name = str(uuid.uuid4())
|
||||
self._save_source(
|
||||
source_name="single_file.cu",
|
||||
source_code=cell,
|
||||
group_name=group_name,
|
||||
)
|
||||
|
||||
output = self._compile_and_run(group_name, args)
|
||||
print_out(output)
|
||||
|
||||
@cell_magic
|
||||
def cuda_group_save(self, line: str, cell: str) -> None:
|
||||
"""
|
||||
Save the CUDA code in the cell in a group of source files to be later
|
||||
compiled and executed by the "cuda_group_run" line magic.
|
||||
|
||||
Args:
|
||||
line: The arguments on the line of the magic call in the jupyter
|
||||
cell.
|
||||
cell: All of the lines in the jupyter cell besides the magic call
|
||||
itself. It should contain all of the source code to be
|
||||
saved.
|
||||
"""
|
||||
args = self._read_args(line, self.parser_cuda_group_save)
|
||||
if args is None:
|
||||
return
|
||||
|
||||
self._save_source(
|
||||
source_name=args.name,
|
||||
source_code=cell,
|
||||
group_name=args.group,
|
||||
)
|
||||
|
||||
@line_magic
|
||||
def cuda_group_run(self, line: str) -> None:
|
||||
"""
|
||||
Compile and run all source files inside a specific source file group.
|
||||
|
||||
Args:
|
||||
line: The arguments on the line of the magic call in the jupyter
|
||||
cell.
|
||||
"""
|
||||
args = self._read_args(line, self.parser_cuda_group_run)
|
||||
if args is None:
|
||||
return
|
||||
|
||||
output = self._compile_and_run(args.group, args)
|
||||
print_out(output)
|
||||
|
||||
@line_magic
|
||||
def cuda_group_delete(self, line: str) -> None:
|
||||
"""
|
||||
Remove all source files inside a specific source file group.
|
||||
|
||||
Args:
|
||||
line: The arguments on the line of the magic call in the jupyter
|
||||
cell.
|
||||
"""
|
||||
args = self._read_args(line, self.parser_cuda_group_delete)
|
||||
if args is None:
|
||||
return
|
||||
|
||||
self._delete_group(args.group)
|
||||
|
||||
|
||||
def load_ipython_extension(shell: InteractiveShell):
|
||||
"""
|
||||
Method used by IPython to load the extension.
|
||||
"""
|
||||
nvcc_plugin = NVCCPlugin(shell)
|
||||
shell.register_magics(nvcc_plugin)
|
||||
@@ -1,10 +0,0 @@
|
||||
from v1.v1 import NVCCPlugin as NVCC_V1
|
||||
from v2.v2 import NVCCPluginV2 as NVCC_V2
|
||||
|
||||
|
||||
def load_ipython_extension(ip):
|
||||
nvcc_plugin = NVCC_V1(ip)
|
||||
ip.register_magics(nvcc_plugin)
|
||||
|
||||
nvcc_plugin_v2 = NVCC_V2(ip)
|
||||
ip.register_magics(nvcc_plugin_v2)
|
||||
+291
@@ -0,0 +1,291 @@
|
||||
[build-system]
|
||||
requires = ["hatchling >= 1.13.0"]
|
||||
build-backend = "hatchling.build"
|
||||
|
||||
[project]
|
||||
name = "nvcc4jupyter"
|
||||
description = "Jupyter notebook plugin to run CUDA C/C++ code"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.10"
|
||||
license = {text = "MIT License"}
|
||||
authors = [
|
||||
{ name = "Andrei Nechaev", email = "lyfaradey@yahoo.com" },
|
||||
{ name = "Cosmin Stefan Ciocan", email = "ciocan.cosmin98@gmail.com" },
|
||||
]
|
||||
classifiers = [
|
||||
"Programming Language :: Python",
|
||||
"Programming Language :: Python :: 3",
|
||||
'Programming Language :: Python :: 3.10',
|
||||
'Programming Language :: Python :: 3.11',
|
||||
'Programming Language :: Python :: 3.12',
|
||||
'Environment :: GPU',
|
||||
'Environment :: GPU :: NVIDIA CUDA',
|
||||
'Framework :: IPython',
|
||||
'Framework :: Jupyter',
|
||||
]
|
||||
dependencies = []
|
||||
dynamic = ["version"]
|
||||
|
||||
[project.urls]
|
||||
documentation = 'https://nvcc4jupyter.readthedocs.io/'
|
||||
repository = 'https://github.com/andreinechaev/nvcc4jupyter'
|
||||
|
||||
[tool.hatch.version]
|
||||
path = "nvcc4jupyter/__init__.py"
|
||||
|
||||
[tool.hatch.build.targets.wheel]
|
||||
packages = ["nvcc4jupyter"]
|
||||
|
||||
[project.optional-dependencies]
|
||||
testing = ["pytest>=7.4.3", "IPython>=8.19.0"]
|
||||
dev = ["pytest>=7.4.3", "IPython>=8.19.0", "pre-commit>=3.6.0", "pytest-cov[toml]>=4.1.0"]
|
||||
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
addopts = [
|
||||
"--color=yes",
|
||||
"--durations=0",
|
||||
"--strict-markers",
|
||||
"--doctest-modules",
|
||||
]
|
||||
filterwarnings = [
|
||||
"ignore::DeprecationWarning",
|
||||
"ignore::UserWarning",
|
||||
]
|
||||
log_cli = "True"
|
||||
markers = [
|
||||
"slow: slow tests",
|
||||
]
|
||||
minversion = "6.0"
|
||||
testpaths = "tests/"
|
||||
|
||||
[tool.coverage.report]
|
||||
exclude_lines = [
|
||||
"pragma: nocover",
|
||||
"raise NotImplementedError",
|
||||
"raise NotImplementedError()",
|
||||
"if __name__ == .__main__.:",
|
||||
]
|
||||
|
||||
[tool.isort]
|
||||
profile = "black"
|
||||
|
||||
[tool.bandit]
|
||||
exclude_dirs = ["build","dist","tests","scripts"]
|
||||
number = 4
|
||||
recursive = true
|
||||
targets = "src"
|
||||
# B404 and B603 are skipped because the user can already run any arbitrary
|
||||
# command on their jupyter server
|
||||
skips = ["B101", "B311", "B404", "B603"]
|
||||
|
||||
[tool.black]
|
||||
line-length = 79
|
||||
fast = true
|
||||
experimental-string-processing = true
|
||||
|
||||
[tool.coverage.run]
|
||||
branch = true
|
||||
|
||||
[tool.pyright]
|
||||
include = ["src"]
|
||||
exclude = [
|
||||
"**/node_modules",
|
||||
"**/__pycache__",
|
||||
]
|
||||
venv = "env37"
|
||||
|
||||
reportMissingImports = true
|
||||
reportMissingTypeStubs = false
|
||||
|
||||
pythonVersion = "3.7"
|
||||
pythonPlatform = "Linux"
|
||||
|
||||
executionEnvironments = [
|
||||
{ root = "src" }
|
||||
]
|
||||
|
||||
[tool.tox]
|
||||
legacy_tox_ini = """
|
||||
[tox]
|
||||
envlist = py, integration, spark, all
|
||||
[testenv]
|
||||
commands =
|
||||
pytest -m "not integration and not spark" {posargs}
|
||||
[testenv:integration]
|
||||
commands =
|
||||
pytest -m "integration" {posargs}
|
||||
[testenv:spark]
|
||||
extras = spark
|
||||
setenv =
|
||||
PYSPARK_DRIVER_PYTHON = {envpython}
|
||||
PYSPARK_PYTHON = {envpython}
|
||||
commands =
|
||||
pytest -m "spark" {posargs}
|
||||
[testenv:all]
|
||||
extras = all
|
||||
setenv =
|
||||
PYSPARK_DRIVER_PYTHON = {envpython}
|
||||
PYSPARK_PYTHON = {envpython}
|
||||
commands =
|
||||
pytest {posargs}
|
||||
"""
|
||||
|
||||
[tool.pylint]
|
||||
extension-pkg-whitelist= [
|
||||
"numpy",
|
||||
"torch",
|
||||
"cv2",
|
||||
"pyodbc",
|
||||
"pydantic",
|
||||
"ciso8601",
|
||||
"netcdf4",
|
||||
"scipy"
|
||||
]
|
||||
ignore="CVS"
|
||||
ignore-patterns="test.*?py,conftest.py"
|
||||
ignore-paths="docs,tests"
|
||||
init-hook='import sys; sys.setrecursionlimit(8 * sys.getrecursionlimit())'
|
||||
jobs=0
|
||||
limit-inference-results=100
|
||||
persistent="yes"
|
||||
suggestion-mode="yes"
|
||||
unsafe-load-any-extension="no"
|
||||
|
||||
[tool.pylint.'MESSAGES CONTROL']
|
||||
enable="c-extension-no-member"
|
||||
|
||||
[tool.pylint.'REPORTS']
|
||||
evaluation="10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)"
|
||||
output-format="text"
|
||||
reports="no"
|
||||
score="yes"
|
||||
|
||||
[tool.pylint.'REFACTORING']
|
||||
max-nested-blocks=5
|
||||
never-returning-functions="sys.exit"
|
||||
|
||||
[tool.pylint.'BASIC']
|
||||
argument-naming-style="snake_case"
|
||||
attr-naming-style="snake_case"
|
||||
bad-names= [
|
||||
"foo",
|
||||
"bar"
|
||||
]
|
||||
class-attribute-naming-style="any"
|
||||
class-naming-style="PascalCase"
|
||||
const-naming-style="UPPER_CASE"
|
||||
docstring-min-length=-1
|
||||
function-naming-style="snake_case"
|
||||
good-names= [
|
||||
"i",
|
||||
"j",
|
||||
"k",
|
||||
"ex",
|
||||
"Run",
|
||||
"_"
|
||||
]
|
||||
include-naming-hint="yes"
|
||||
inlinevar-naming-style="any"
|
||||
method-naming-style="snake_case"
|
||||
module-naming-style="any"
|
||||
no-docstring-rgx="^_"
|
||||
property-classes="abc.abstractproperty"
|
||||
variable-naming-style="snake_case"
|
||||
|
||||
[tool.pylint.'FORMAT']
|
||||
ignore-long-lines="^\\s*(# )?.*['\"]?<?https?://\\S+>?"
|
||||
indent-after-paren=4
|
||||
indent-string=' '
|
||||
max-line-length=79
|
||||
max-module-lines=1000
|
||||
single-line-class-stmt="no"
|
||||
single-line-if-stmt="no"
|
||||
|
||||
[tool.pylint.'LOGGING']
|
||||
logging-format-style="old"
|
||||
logging-modules="logging"
|
||||
|
||||
[tool.pylint.'MISCELLANEOUS']
|
||||
notes= [
|
||||
"FIXME",
|
||||
"XXX",
|
||||
"TODO"
|
||||
]
|
||||
|
||||
[tool.pylint.'SIMILARITIES']
|
||||
ignore-comments="yes"
|
||||
ignore-docstrings="yes"
|
||||
ignore-imports="yes"
|
||||
min-similarity-lines=7
|
||||
|
||||
[tool.pylint.'SPELLING']
|
||||
max-spelling-suggestions=4
|
||||
spelling-store-unknown-words="no"
|
||||
|
||||
[tool.pylint.'STRING']
|
||||
check-str-concat-over-line-jumps="no"
|
||||
|
||||
[tool.pylint.'TYPECHECK']
|
||||
contextmanager-decorators="contextlib.contextmanager"
|
||||
generated-members="numpy.*,np.*,pyspark.sql.functions,collect_list"
|
||||
ignore-mixin-members="yes"
|
||||
ignore-none="yes"
|
||||
ignore-on-opaque-inference="yes"
|
||||
ignored-classes="optparse.Values,thread._local,_thread._local,numpy,torch,swagger_client"
|
||||
ignored-modules="numpy,torch,swagger_client,netCDF4,scipy"
|
||||
missing-member-hint="yes"
|
||||
missing-member-hint-distance=1
|
||||
missing-member-max-choices=1
|
||||
|
||||
[tool.pylint.'VARIABLES']
|
||||
additional-builtins="dbutils"
|
||||
allow-global-unused-variables="yes"
|
||||
callbacks= [
|
||||
"cb_",
|
||||
"_cb"
|
||||
]
|
||||
dummy-variables-rgx="_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_"
|
||||
ignored-argument-names="_.*|^ignored_|^unused_"
|
||||
init-import="no"
|
||||
redefining-builtins-modules="six.moves,past.builtins,future.builtins,builtins,io"
|
||||
|
||||
[tool.pylint.'CLASSES']
|
||||
defining-attr-methods= [
|
||||
"__init__",
|
||||
"__new__",
|
||||
"setUp",
|
||||
"__post_init__"
|
||||
]
|
||||
exclude-protected= [
|
||||
"_asdict",
|
||||
"_fields",
|
||||
"_replace",
|
||||
"_source",
|
||||
"_make"
|
||||
]
|
||||
valid-classmethod-first-arg="cls"
|
||||
valid-metaclass-classmethod-first-arg="cls"
|
||||
|
||||
[tool.pylint.'DESIGN']
|
||||
max-args=5
|
||||
max-attributes=7
|
||||
max-bool-expr=5
|
||||
max-branches=12
|
||||
max-locals=15
|
||||
max-parents=7
|
||||
max-public-methods=20
|
||||
max-returns=6
|
||||
max-statements=50
|
||||
min-public-methods=2
|
||||
|
||||
[tool.pylint.'IMPORTS']
|
||||
allow-wildcard-with-all="no"
|
||||
analyse-fallback-blocks="no"
|
||||
deprecated-modules="optparse,tkinter.tix"
|
||||
|
||||
[tool.pylint.'EXCEPTIONS']
|
||||
overgeneral-exceptions= [
|
||||
"BaseException",
|
||||
"Exception"
|
||||
]
|
||||
@@ -1,13 +0,0 @@
|
||||
from distutils.core import setup
|
||||
|
||||
setup(
|
||||
name='NVCCPlugin',
|
||||
version='0.0.2',
|
||||
author='Andrei Nechaev',
|
||||
author_email='lyfaradey@yahoo.com',
|
||||
py_modules=['nvcc_plugin', 'v2.v2', 'v1.v1', 'common.helper'],
|
||||
url='https://github.com/andreinechaev/nvcc4jupyter',
|
||||
license='LICENSE',
|
||||
description='Jupyter notebook plugin to run CUDA C/C++ code',
|
||||
# long_description=open('README.md').read(),
|
||||
)
|
||||
@@ -0,0 +1 @@
|
||||
from .fixtures.fixtures import * # noqa: F401,F403
|
||||
Vendored
+57
@@ -0,0 +1,57 @@
|
||||
import glob
|
||||
import os
|
||||
|
||||
import pytest
|
||||
from IPython.core.interactiveshell import InteractiveShell
|
||||
|
||||
from nvcc4jupyter.plugin import NVCCPlugin
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def shell():
|
||||
return InteractiveShell()
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def plugin(shell: InteractiveShell):
|
||||
return NVCCPlugin(shell=shell)
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def tests_path():
|
||||
return "tests"
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def fixtures_path(tests_path):
|
||||
return os.path.join(tests_path, "fixtures")
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def sample_magic_cu_line():
|
||||
# fmt: off
|
||||
return '--profile --profiler-args "--metrics l1tex__t_sectors_pipe_lsu_mem_global_op_ld.sum"' # noqa: E501
|
||||
# fmt: on
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def sample_cuda_fpath(fixtures_path: str):
|
||||
return os.path.join(fixtures_path, "single_file", "hello.cu")
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def sample_cuda_code(sample_cuda_fpath: str):
|
||||
with open(sample_cuda_fpath, "r", encoding="utf-8") as f:
|
||||
return f.read()
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def timeit_regex():
|
||||
return r".+ ± .+ per loop \(mean ± std. dev. of .+ runs, .+ loops each\)"
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def multiple_source_fpaths(fixtures_path: str):
|
||||
pattern_h = os.path.join(fixtures_path, "multiple_files", "*.h")
|
||||
pattern_cu = os.path.join(fixtures_path, "multiple_files", "*.cu")
|
||||
return list(glob.glob(pattern_h)) + list(glob.glob(pattern_cu))
|
||||
+6
@@ -0,0 +1,6 @@
|
||||
#include <cstdio>
|
||||
#include "hello.h"
|
||||
|
||||
__host__ void hello(){
|
||||
printf("Hello World!\n");
|
||||
}
|
||||
+6
@@ -0,0 +1,6 @@
|
||||
#ifndef HELLO_H
|
||||
#define HELLO_H
|
||||
|
||||
void hello();
|
||||
|
||||
#endif
|
||||
+6
@@ -0,0 +1,6 @@
|
||||
#include "hello.h"
|
||||
|
||||
int main() {
|
||||
hello();
|
||||
return 0;
|
||||
}
|
||||
Vendored
+10
@@ -0,0 +1,10 @@
|
||||
#include <cstdio>
|
||||
|
||||
__host__ void hello(){
|
||||
printf("Hello World!\n");
|
||||
}
|
||||
|
||||
int main() {
|
||||
hello();
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,2 @@
|
||||
pytest>=7.4.3
|
||||
IPython>=8.19.0
|
||||
@@ -0,0 +1,221 @@
|
||||
import argparse
|
||||
import math
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
from typing import List
|
||||
|
||||
import pytest
|
||||
|
||||
from nvcc4jupyter.plugin import NVCCPlugin
|
||||
|
||||
|
||||
def check_profiler_output(output: str):
|
||||
# the profiler output will be a line of "Hello World!" along with some
|
||||
# warning lines which start with "==WARNING=="
|
||||
lines = output.strip().split("\n")
|
||||
warn_count = 0
|
||||
for line in lines:
|
||||
if not line.startswith("==WARNING=="):
|
||||
assert line == "Hello World!"
|
||||
else:
|
||||
warn_count += 1
|
||||
assert warn_count >= 1
|
||||
assert warn_count == len(lines) - 1
|
||||
|
||||
|
||||
def copy_source_to_group(
|
||||
source_fpath: str, group_name: str, workdir: str
|
||||
) -> str:
|
||||
group_dirpath = os.path.join(workdir, group_name)
|
||||
os.makedirs(group_dirpath, exist_ok=True)
|
||||
destination_fpath = os.path.join(
|
||||
group_dirpath, os.path.basename(source_fpath)
|
||||
)
|
||||
shutil.copy(source_fpath, destination_fpath)
|
||||
return destination_fpath
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True, scope="function")
|
||||
def before_each(plugin: NVCCPlugin):
|
||||
shutil.rmtree(plugin.workdir, ignore_errors=True) # before test
|
||||
yield
|
||||
pass # after test
|
||||
|
||||
|
||||
def test_save_source(plugin: NVCCPlugin, sample_cuda_code: str) -> None:
|
||||
gname = "test_save_source"
|
||||
sname = "sample.cu"
|
||||
plugin._save_source(sname, sample_cuda_code, gname)
|
||||
spath = os.path.join(plugin.workdir, gname, sname)
|
||||
assert os.path.exists(spath)
|
||||
with open(spath, "r", encoding="utf-8") as f:
|
||||
code = f.read()
|
||||
assert code == sample_cuda_code
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
plugin._save_source("wrong_extension.txt", sample_cuda_code, gname)
|
||||
|
||||
|
||||
def test_delete_group(plugin: NVCCPlugin, sample_cuda_fpath: str) -> None:
|
||||
gname = "test_delete_group"
|
||||
source_fpath = copy_source_to_group(
|
||||
sample_cuda_fpath, gname, plugin.workdir
|
||||
)
|
||||
assert os.path.exists(source_fpath)
|
||||
plugin._delete_group(gname)
|
||||
assert not os.path.exists(source_fpath)
|
||||
|
||||
|
||||
def test_compile(
|
||||
plugin: NVCCPlugin,
|
||||
sample_cuda_fpath: str,
|
||||
):
|
||||
# we artificially create a source file group in the plugin workdir
|
||||
gname = "test_compile"
|
||||
source_fpath = copy_source_to_group(
|
||||
sample_cuda_fpath, gname, plugin.workdir
|
||||
)
|
||||
|
||||
exec_fpath = plugin._compile(gname)
|
||||
assert os.path.exists(exec_fpath)
|
||||
|
||||
with pytest.raises(RuntimeError):
|
||||
plugin._compile("inexistent_group")
|
||||
|
||||
with pytest.raises(RuntimeError):
|
||||
os.remove(source_fpath)
|
||||
plugin._compile(gname)
|
||||
|
||||
|
||||
def test_run(
|
||||
plugin: NVCCPlugin,
|
||||
sample_cuda_fpath: str,
|
||||
):
|
||||
gname = "test_run"
|
||||
copy_source_to_group(sample_cuda_fpath, gname, plugin.workdir)
|
||||
|
||||
exec_fpath = plugin._compile(gname)
|
||||
output = plugin._run(exec_fpath)
|
||||
assert output == "Hello World!\n"
|
||||
|
||||
|
||||
def test_run_timeit(
|
||||
plugin: NVCCPlugin, sample_cuda_fpath: str, timeit_regex: str
|
||||
):
|
||||
gname = "test_run_timeit"
|
||||
copy_source_to_group(sample_cuda_fpath, gname, plugin.workdir)
|
||||
|
||||
exec_fpath = plugin._compile(gname)
|
||||
output = plugin._run(exec_fpath, timeit=True)
|
||||
assert (
|
||||
re.match(timeit_regex, output) is not None
|
||||
), f'Output "{output}" does not match the regex "{timeit_regex}".'
|
||||
|
||||
|
||||
def test_run_profile(plugin: NVCCPlugin, sample_cuda_fpath: str):
|
||||
gname = "test_run_profile"
|
||||
copy_source_to_group(sample_cuda_fpath, gname, plugin.workdir)
|
||||
|
||||
exec_fpath = plugin._compile(gname)
|
||||
output = plugin._run(
|
||||
exec_fpath,
|
||||
profile=True,
|
||||
# because we are running without a kernel (in the test env we have no
|
||||
# GPU) it does not matter what arguments we pass to the profiler as its
|
||||
# output will always be just a few warnings; the reason we add them
|
||||
# here is to test that no error is produced when passing the arguments
|
||||
profiler_args=(
|
||||
"--metrics l1tex__t_sectors_pipe_lsu_mem_global_op_ld.sum"
|
||||
),
|
||||
)
|
||||
check_profiler_output(output)
|
||||
|
||||
|
||||
def test_compile_and_run_multiple_files(
|
||||
plugin: NVCCPlugin, multiple_source_fpaths: List[str]
|
||||
):
|
||||
"""
|
||||
Compiles and executes 3 cuda source files from
|
||||
tests/fixtures/multiple_files.
|
||||
"""
|
||||
gname = "test_compile_and_run_multiple_files"
|
||||
for fpath in multiple_source_fpaths:
|
||||
copy_source_to_group(fpath, gname, plugin.workdir)
|
||||
output = plugin._compile_and_run(
|
||||
gname, argparse.Namespace(timeit=False, profile=True, profiler_args="")
|
||||
)
|
||||
check_profiler_output(output)
|
||||
|
||||
|
||||
def test_compile_and_run_multiple_files_shared(
|
||||
plugin: NVCCPlugin, multiple_source_fpaths: List[str]
|
||||
):
|
||||
"""
|
||||
Compiles and executes 3 cuda source files from
|
||||
tests/fixtures/multiple_files. However, the hello.cu and hello.h files are
|
||||
added to the "shared" group which is compiled with all other groups. This
|
||||
allows sharing error handling code easily and other very common code.
|
||||
"""
|
||||
gname = "test_compile_and_run_multiple_files_shared"
|
||||
for fpath in multiple_source_fpaths:
|
||||
fname = os.path.basename(fpath)
|
||||
if fname == "main.cu":
|
||||
copy_source_to_group(fpath, gname, plugin.workdir)
|
||||
else:
|
||||
copy_source_to_group(fpath, "shared", plugin.workdir)
|
||||
output = plugin._compile_and_run(
|
||||
gname, argparse.Namespace(timeit=False, profile=True, profiler_args="")
|
||||
)
|
||||
check_profiler_output(output)
|
||||
|
||||
|
||||
def test_read_args(plugin: NVCCPlugin):
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("-a", type=str, required=True)
|
||||
parser.add_argument("-b", type=float, required=True)
|
||||
args = plugin._read_args(
|
||||
'-a "--this has --spaces and --dashes" -b 0.75', parser
|
||||
)
|
||||
assert args.a == "--this has --spaces and --dashes"
|
||||
assert math.isclose(args.b, 0.75)
|
||||
|
||||
|
||||
def test_magic_cuda(
|
||||
capsys,
|
||||
plugin: NVCCPlugin,
|
||||
sample_cuda_code: str,
|
||||
sample_magic_cu_line: str,
|
||||
):
|
||||
plugin.cuda(sample_magic_cu_line, sample_cuda_code)
|
||||
check_profiler_output(capsys.readouterr().out)
|
||||
|
||||
|
||||
def test_magic_cuda_group_save(plugin: NVCCPlugin, sample_cuda_code: str):
|
||||
gname = "test_save_source"
|
||||
sname = "sample.cu"
|
||||
plugin.cuda_group_save(f"-g {gname} -n {sname}", sample_cuda_code)
|
||||
spath = os.path.join(plugin.workdir, gname, sname)
|
||||
assert os.path.exists(spath)
|
||||
with open(spath, "r", encoding="utf-8") as f:
|
||||
code = f.read()
|
||||
assert code == sample_cuda_code
|
||||
|
||||
|
||||
def test_magic_cuda_group_run(
|
||||
capsys, plugin: NVCCPlugin, sample_cuda_fpath: str
|
||||
):
|
||||
gname = "test_magic_cuda_group_run"
|
||||
copy_source_to_group(sample_cuda_fpath, gname, plugin.workdir)
|
||||
plugin.cuda_group_run(f"--group {gname} --profile")
|
||||
check_profiler_output(capsys.readouterr().out)
|
||||
|
||||
|
||||
def test_magic_cuda_group_delete(plugin: NVCCPlugin, sample_cuda_fpath: str):
|
||||
gname = "test_magic_cuda_group_run"
|
||||
source_fpath = copy_source_to_group(
|
||||
sample_cuda_fpath, gname, plugin.workdir
|
||||
)
|
||||
assert os.path.exists(source_fpath)
|
||||
plugin.cuda_group_delete(f"--group {gname}")
|
||||
assert not os.path.exists(source_fpath)
|
||||
@@ -1,62 +0,0 @@
|
||||
import os
|
||||
import subprocess
|
||||
import tempfile
|
||||
import uuid
|
||||
|
||||
from IPython.core.magic import Magics, cell_magic, magics_class
|
||||
from common import helper
|
||||
|
||||
compiler = '/usr/local/cuda/bin/nvcc'
|
||||
profiler = '/usr/local/cuda/bin/ncu'
|
||||
ext = '.cu'
|
||||
|
||||
|
||||
@magics_class
|
||||
class NVCCPlugin(Magics):
|
||||
|
||||
def __init__(self, shell):
|
||||
super(NVCCPlugin, self).__init__(shell)
|
||||
|
||||
self.argparser = helper.get_argparser()
|
||||
|
||||
@staticmethod
|
||||
def compile(file_path):
|
||||
subprocess.check_output(
|
||||
[compiler, file_path + ext, "-o", file_path + ".out", '-Wno-deprecated-gpu-targets'], stderr=subprocess.STDOUT)
|
||||
|
||||
def run(self, file_path, timeit=False, profile=False, profiler_args=[]):
|
||||
if timeit:
|
||||
stmt = f"subprocess.check_output(['{file_path}.out'], stderr=subprocess.STDOUT)"
|
||||
output = self.shell.run_cell_magic(
|
||||
magic_name="timeit", line="-q -o import subprocess", cell=stmt)
|
||||
output = str(output) # convert TimeitResult object to human readable string
|
||||
else:
|
||||
run_args = []
|
||||
if profile:
|
||||
run_args.extend([profiler] + profiler_args)
|
||||
run_args.append(file_path + ".out")
|
||||
output = subprocess.check_output(run_args, stderr=subprocess.STDOUT)
|
||||
output = output.decode('utf8')
|
||||
|
||||
helper.print_out(output)
|
||||
return None
|
||||
|
||||
@cell_magic
|
||||
def cu(self, line, cell):
|
||||
try:
|
||||
args = self.argparser.parse_args(line.split())
|
||||
except SystemExit as e:
|
||||
self.argparser.print_help()
|
||||
return
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
file_path = os.path.join(tmp_dir, str(uuid.uuid4()))
|
||||
with open(file_path + ext, "w") as f:
|
||||
f.write(cell)
|
||||
try:
|
||||
self.compile(file_path)
|
||||
output = self.run(file_path, timeit=args.timeit, profile=args.profile, profiler_args=args.profiler_args)
|
||||
except subprocess.CalledProcessError as e:
|
||||
helper.print_out(e.output.decode("utf8"))
|
||||
output = None
|
||||
return output
|
||||
@@ -1,107 +0,0 @@
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
from IPython.core.magic import Magics, cell_magic, magics_class
|
||||
from IPython.core.magic_arguments import argument, magic_arguments, parse_argstring
|
||||
from common import helper
|
||||
|
||||
compiler = '/usr/local/cuda/bin/nvcc'
|
||||
profiler = '/usr/local/cuda/bin/ncu'
|
||||
|
||||
|
||||
@magics_class
|
||||
class NVCCPluginV2(Magics):
|
||||
|
||||
def __init__(self, shell):
|
||||
super(NVCCPluginV2, self).__init__(shell)
|
||||
self.argparser = helper.get_argparser()
|
||||
current_dir = os.getcwd()
|
||||
self.output_dir = os.path.join(current_dir, 'src')
|
||||
if not os.path.exists(self.output_dir):
|
||||
os.mkdir(self.output_dir)
|
||||
print(f'created output directory at {self.output_dir}')
|
||||
else:
|
||||
print(f'directory {self.output_dir} already exists')
|
||||
|
||||
self.out = os.path.join(current_dir, "result.out")
|
||||
print(f'Out bin {self.out}')
|
||||
|
||||
@staticmethod
|
||||
def compile(output_dir, file_paths, out):
|
||||
res = subprocess.check_output(
|
||||
[compiler, '-I' + output_dir, file_paths, "-o", out, '-Wno-deprecated-gpu-targets'], stderr=subprocess.STDOUT)
|
||||
res = res.decode()
|
||||
helper.print_out(res)
|
||||
|
||||
def run(self, timeit=False, profile=False, profiler_args=[]):
|
||||
if timeit:
|
||||
stmt = f"subprocess.check_output(['{self.out}'], stderr=subprocess.STDOUT)"
|
||||
output = self.shell.run_cell_magic(
|
||||
magic_name="timeit", line="-q -o import subprocess", cell=stmt)
|
||||
output = str(output) # convert TimeitResult object to human readable string
|
||||
else:
|
||||
run_args = []
|
||||
if profile:
|
||||
run_args.extend([profiler] + profiler_args)
|
||||
run_args.append(self.out)
|
||||
output = subprocess.check_output(run_args, stderr=subprocess.STDOUT)
|
||||
output = output.decode('utf8')
|
||||
|
||||
helper.print_out(output)
|
||||
return None
|
||||
|
||||
@magic_arguments()
|
||||
@argument('-n', '--name', type=str, help='file name that will be produced by the cell. must end with .cu extension')
|
||||
@argument('-c', '--compile', type=bool, help='Should be compiled?')
|
||||
@cell_magic
|
||||
def cuda(self, line='', cell=None):
|
||||
args = parse_argstring(self.cuda, line)
|
||||
ex = args.name.split('.')[-1]
|
||||
if ex not in ['cu', 'h']:
|
||||
raise Exception('name must end with .cu or .h')
|
||||
|
||||
if not os.path.exists(self.output_dir):
|
||||
print(f'Output directory does not exist, creating')
|
||||
try:
|
||||
os.mkdir(self.output_dir)
|
||||
except OSError:
|
||||
print(f"Creation of the directory {self.output_dir} failed")
|
||||
else:
|
||||
print(f"Successfully created the directory {self.output_dir}")
|
||||
|
||||
file_path = os.path.join(self.output_dir, args.name)
|
||||
with open(file_path, "w") as f:
|
||||
f.write(cell)
|
||||
|
||||
if args.compile:
|
||||
try:
|
||||
self.compile(self.output_dir, file_path, self.out)
|
||||
output = self.run(timeit=args.timeit, profile=args.profile, profiler_args=args.profiler_args)
|
||||
except subprocess.CalledProcessError as e:
|
||||
helper.print_out(e.output.decode("utf8"))
|
||||
output = None
|
||||
else:
|
||||
output = f'File written in {file_path}'
|
||||
|
||||
return output
|
||||
|
||||
@cell_magic
|
||||
def cuda_run(self, line='', cell=None):
|
||||
try:
|
||||
args = self.argparser.parse_args(line.split())
|
||||
except SystemExit:
|
||||
self.argparser.print_help()
|
||||
return
|
||||
|
||||
try:
|
||||
cuda_src = os.listdir(self.output_dir)
|
||||
cuda_src = [os.path.join(self.output_dir, x)
|
||||
for x in cuda_src if x[-3:] == '.cu']
|
||||
print(f'found sources: {cuda_src}')
|
||||
self.compile(self.output_dir, ' '.join(cuda_src), self.out)
|
||||
output = self.run(timeit=args.timeit, profile=args.profile, profiler_args=args.profiler_args)
|
||||
except subprocess.CalledProcessError as e:
|
||||
helper.print_out(e.output.decode("utf8"))
|
||||
output = None
|
||||
|
||||
return output
|
||||
Reference in New Issue
Block a user