mirror of
https://github.com/andreinechaev/nvcc4jupyter.git
synced 2026-06-13 18:50:47 +05:30
Add option to use NVIDIA Nsight Compute CLI profiler (#21)
* Use NVIDIA Nsight Compute CLI profiler * Add profile and profiler-args options to argument parser. * Add missing comma to profiler-args option. * Use profile args in version 1 of the plugin * Change profiler-args option to take all remaining arguments * Change profiler_args type from string to list of strings * Add profile option to version 2 of the plugin * Add profiler usage instructions
This commit is contained in:
committed by
GitHub
parent
98c9faf45c
commit
887c809d07
@@ -23,3 +23,12 @@ V2 brings support of multiple source and header files.
|
|||||||
%%cuda_run
|
%%cuda_run
|
||||||
# This line just to bypass an exeption and can contain any text
|
# This line just to bypass an exeption and can contain any text
|
||||||
```
|
```
|
||||||
|
|
||||||
|
- To profile your CUDA kernels using NVIDIA Nsight Compute CLI profiler you need to run
|
||||||
|
```
|
||||||
|
%%cu --profile
|
||||||
|
```
|
||||||
|
- You can add options to the profiler. Keep in mind that any argument after "--profiler-args" will be considered as a profiler argument. For example, to select which sections to collect metrics for you need to run
|
||||||
|
```
|
||||||
|
%%cu --profile --profiler-args --section SpeedOfLight --section MemoryWorkloadAnalysis --section Occupancy
|
||||||
|
```
|
||||||
|
|||||||
+21
-2
@@ -3,8 +3,27 @@ import argparse
|
|||||||
|
|
||||||
def get_argparser():
|
def get_argparser():
|
||||||
parser = argparse.ArgumentParser(description='NVCCPlugin params')
|
parser = argparse.ArgumentParser(description='NVCCPlugin params')
|
||||||
parser.add_argument("-t", "--timeit", action='store_true',
|
parser.add_argument(
|
||||||
help='flag to return timeit result instead of stdout')
|
'-t',
|
||||||
|
'--timeit',
|
||||||
|
action='store_true',
|
||||||
|
help='If set, returns the output of the "timeit" built-in ipython magic instead of stdout.',
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'-p',
|
||||||
|
'--profile',
|
||||||
|
action='store_true',
|
||||||
|
help='If set, runs the nvidia nsight compute profiler. Has no effect if used with --timeit.',
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'-a',
|
||||||
|
'--profiler-args',
|
||||||
|
type=str,
|
||||||
|
nargs=argparse.REMAINDER,
|
||||||
|
default=[],
|
||||||
|
help='Extra options that can be passed to the nvidia nsight compute profiler. '
|
||||||
|
'Must be the last option given to the argument parser so you can pass arguments with dashes.',
|
||||||
|
)
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ from IPython.core.magic import Magics, cell_magic, magics_class
|
|||||||
from common import helper
|
from common import helper
|
||||||
|
|
||||||
compiler = '/usr/local/cuda/bin/nvcc'
|
compiler = '/usr/local/cuda/bin/nvcc'
|
||||||
|
profiler = '/usr/local/cuda/bin/ncu'
|
||||||
ext = '.cu'
|
ext = '.cu'
|
||||||
|
|
||||||
|
|
||||||
@@ -23,15 +24,18 @@ class NVCCPlugin(Magics):
|
|||||||
subprocess.check_output(
|
subprocess.check_output(
|
||||||
[compiler, file_path + ext, "-o", file_path + ".out", '-Wno-deprecated-gpu-targets'], stderr=subprocess.STDOUT)
|
[compiler, file_path + ext, "-o", file_path + ".out", '-Wno-deprecated-gpu-targets'], stderr=subprocess.STDOUT)
|
||||||
|
|
||||||
def run(self, file_path, timeit=False):
|
def run(self, file_path, timeit=False, profile=False, profiler_args=[]):
|
||||||
if timeit:
|
if timeit:
|
||||||
stmt = f"subprocess.check_output(['{file_path}.out'], stderr=subprocess.STDOUT)"
|
stmt = f"subprocess.check_output(['{file_path}.out'], stderr=subprocess.STDOUT)"
|
||||||
output = self.shell.run_cell_magic(
|
output = self.shell.run_cell_magic(
|
||||||
magic_name="timeit", line="-q -o import subprocess", cell=stmt)
|
magic_name="timeit", line="-q -o import subprocess", cell=stmt)
|
||||||
output = str(output) # convert TimeitResult object to human readable string
|
output = str(output) # convert TimeitResult object to human readable string
|
||||||
else:
|
else:
|
||||||
output = subprocess.check_output(
|
run_args = []
|
||||||
[file_path + ".out"], stderr=subprocess.STDOUT)
|
if profile:
|
||||||
|
run_args.extend([profiler] + profiler_args)
|
||||||
|
run_args.append(file_path + ".out")
|
||||||
|
output = subprocess.check_output(run_args, stderr=subprocess.STDOUT)
|
||||||
output = output.decode('utf8')
|
output = output.decode('utf8')
|
||||||
|
|
||||||
helper.print_out(output)
|
helper.print_out(output)
|
||||||
@@ -51,7 +55,7 @@ class NVCCPlugin(Magics):
|
|||||||
f.write(cell)
|
f.write(cell)
|
||||||
try:
|
try:
|
||||||
self.compile(file_path)
|
self.compile(file_path)
|
||||||
output = self.run(file_path, timeit=args.timeit)
|
output = self.run(file_path, timeit=args.timeit, profile=args.profile, profiler_args=args.profiler_args)
|
||||||
except subprocess.CalledProcessError as e:
|
except subprocess.CalledProcessError as e:
|
||||||
helper.print_out(e.output.decode("utf8"))
|
helper.print_out(e.output.decode("utf8"))
|
||||||
output = None
|
output = None
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ from IPython.core.magic_arguments import argument, magic_arguments, parse_argstr
|
|||||||
from common import helper
|
from common import helper
|
||||||
|
|
||||||
compiler = '/usr/local/cuda/bin/nvcc'
|
compiler = '/usr/local/cuda/bin/nvcc'
|
||||||
|
profiler = '/usr/local/cuda/bin/ncu'
|
||||||
|
|
||||||
|
|
||||||
@magics_class
|
@magics_class
|
||||||
@@ -32,15 +33,18 @@ class NVCCPluginV2(Magics):
|
|||||||
res = res.decode()
|
res = res.decode()
|
||||||
helper.print_out(res)
|
helper.print_out(res)
|
||||||
|
|
||||||
def run(self, timeit=False):
|
def run(self, timeit=False, profile=False, profiler_args=[]):
|
||||||
if timeit:
|
if timeit:
|
||||||
stmt = f"subprocess.check_output(['{self.out}'], stderr=subprocess.STDOUT)"
|
stmt = f"subprocess.check_output(['{self.out}'], stderr=subprocess.STDOUT)"
|
||||||
output = self.shell.run_cell_magic(
|
output = self.shell.run_cell_magic(
|
||||||
magic_name="timeit", line="-q -o import subprocess", cell=stmt)
|
magic_name="timeit", line="-q -o import subprocess", cell=stmt)
|
||||||
output = str(output) # convert TimeitResult object to human readable string
|
output = str(output) # convert TimeitResult object to human readable string
|
||||||
else:
|
else:
|
||||||
output = subprocess.check_output(
|
run_args = []
|
||||||
[self.out], stderr=subprocess.STDOUT)
|
if profile:
|
||||||
|
run_args.extend([profiler] + profiler_args)
|
||||||
|
run_args.append(self.out)
|
||||||
|
output = subprocess.check_output(run_args, stderr=subprocess.STDOUT)
|
||||||
output = output.decode('utf8')
|
output = output.decode('utf8')
|
||||||
|
|
||||||
helper.print_out(output)
|
helper.print_out(output)
|
||||||
@@ -72,7 +76,7 @@ class NVCCPluginV2(Magics):
|
|||||||
if args.compile:
|
if args.compile:
|
||||||
try:
|
try:
|
||||||
self.compile(self.output_dir, file_path, self.out)
|
self.compile(self.output_dir, file_path, self.out)
|
||||||
output = self.run(timeit=args.timeit)
|
output = self.run(timeit=args.timeit, profile=args.profile, profiler_args=args.profiler_args)
|
||||||
except subprocess.CalledProcessError as e:
|
except subprocess.CalledProcessError as e:
|
||||||
helper.print_out(e.output.decode("utf8"))
|
helper.print_out(e.output.decode("utf8"))
|
||||||
output = None
|
output = None
|
||||||
@@ -95,7 +99,7 @@ class NVCCPluginV2(Magics):
|
|||||||
for x in cuda_src if x[-3:] == '.cu']
|
for x in cuda_src if x[-3:] == '.cu']
|
||||||
print(f'found sources: {cuda_src}')
|
print(f'found sources: {cuda_src}')
|
||||||
self.compile(self.output_dir, ' '.join(cuda_src), self.out)
|
self.compile(self.output_dir, ' '.join(cuda_src), self.out)
|
||||||
output = self.run(timeit=args.timeit)
|
output = self.run(timeit=args.timeit, profile=args.profile, profiler_args=args.profiler_args)
|
||||||
except subprocess.CalledProcessError as e:
|
except subprocess.CalledProcessError as e:
|
||||||
helper.print_out(e.output.decode("utf8"))
|
helper.print_out(e.output.decode("utf8"))
|
||||||
output = None
|
output = None
|
||||||
|
|||||||
Reference in New Issue
Block a user