From 887c809d07c6a4c300da1e44964cf8eb287469cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C8=98tefan-Cosmin=20Ciocan?= <57830279+cosminc98@users.noreply.github.com> Date: Wed, 27 Dec 2023 10:00:14 +0100 Subject: [PATCH] Add option to use NVIDIA Nsight Compute CLI profiler (#21) * Use NVIDIA Nsight Compute CLI profiler * Add profile and profiler-args options to argument parser. * Add missing comma to profiler-args option. * Use profile args in version 1 of the plugin * Change profiler-args option to take all remaining arguments * Change profiler_args type from string to list of strings * Add profile option to version 2 of the plugin * Add profiler usage instructions --- README.md | 9 +++++++++ common/helper.py | 23 +++++++++++++++++++++-- v1/v1.py | 12 ++++++++---- v2/v2.py | 14 +++++++++----- 4 files changed, 47 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 3a0ce60..b27ca3a 100644 --- a/README.md +++ b/README.md @@ -23,3 +23,12 @@ V2 brings support of multiple source and header files. %%cuda_run # This line just to bypass an exeption and can contain any text ``` + +- To profile your CUDA kernels using NVIDIA Nsight Compute CLI profiler you need to run +``` +%%cu --profile +``` +- You can add options to the profiler. Keep in mind that any argument after "--profiler-args" will be considered as a profiler argument. For example, to select which sections to collect metrics for you need to run +``` +%%cu --profile --profiler-args --section SpeedOfLight --section MemoryWorkloadAnalysis --section Occupancy +``` diff --git a/common/helper.py b/common/helper.py index 4b5cef5..17dcfff 100644 --- a/common/helper.py +++ b/common/helper.py @@ -3,8 +3,27 @@ import argparse def get_argparser(): parser = argparse.ArgumentParser(description='NVCCPlugin params') - parser.add_argument("-t", "--timeit", action='store_true', - help='flag to return timeit result instead of stdout') + parser.add_argument( + '-t', + '--timeit', + action='store_true', + help='If set, returns the output of the "timeit" built-in ipython magic instead of stdout.', + ) + parser.add_argument( + '-p', + '--profile', + action='store_true', + help='If set, runs the nvidia nsight compute profiler. Has no effect if used with --timeit.', + ) + parser.add_argument( + '-a', + '--profiler-args', + type=str, + nargs=argparse.REMAINDER, + default=[], + help='Extra options that can be passed to the nvidia nsight compute profiler. ' + 'Must be the last option given to the argument parser so you can pass arguments with dashes.', + ) return parser diff --git a/v1/v1.py b/v1/v1.py index 42188ae..ed1aa8e 100644 --- a/v1/v1.py +++ b/v1/v1.py @@ -7,6 +7,7 @@ from IPython.core.magic import Magics, cell_magic, magics_class from common import helper compiler = '/usr/local/cuda/bin/nvcc' +profiler = '/usr/local/cuda/bin/ncu' ext = '.cu' @@ -23,15 +24,18 @@ class NVCCPlugin(Magics): subprocess.check_output( [compiler, file_path + ext, "-o", file_path + ".out", '-Wno-deprecated-gpu-targets'], stderr=subprocess.STDOUT) - def run(self, file_path, timeit=False): + def run(self, file_path, timeit=False, profile=False, profiler_args=[]): if timeit: stmt = f"subprocess.check_output(['{file_path}.out'], stderr=subprocess.STDOUT)" output = self.shell.run_cell_magic( magic_name="timeit", line="-q -o import subprocess", cell=stmt) output = str(output) # convert TimeitResult object to human readable string else: - output = subprocess.check_output( - [file_path + ".out"], stderr=subprocess.STDOUT) + run_args = [] + if profile: + run_args.extend([profiler] + profiler_args) + run_args.append(file_path + ".out") + output = subprocess.check_output(run_args, stderr=subprocess.STDOUT) output = output.decode('utf8') helper.print_out(output) @@ -51,7 +55,7 @@ class NVCCPlugin(Magics): f.write(cell) try: self.compile(file_path) - output = self.run(file_path, timeit=args.timeit) + output = self.run(file_path, timeit=args.timeit, profile=args.profile, profiler_args=args.profiler_args) except subprocess.CalledProcessError as e: helper.print_out(e.output.decode("utf8")) output = None diff --git a/v2/v2.py b/v2/v2.py index 190094d..41511d6 100644 --- a/v2/v2.py +++ b/v2/v2.py @@ -6,6 +6,7 @@ from IPython.core.magic_arguments import argument, magic_arguments, parse_argstr from common import helper compiler = '/usr/local/cuda/bin/nvcc' +profiler = '/usr/local/cuda/bin/ncu' @magics_class @@ -32,15 +33,18 @@ class NVCCPluginV2(Magics): res = res.decode() helper.print_out(res) - def run(self, timeit=False): + def run(self, timeit=False, profile=False, profiler_args=[]): if timeit: stmt = f"subprocess.check_output(['{self.out}'], stderr=subprocess.STDOUT)" output = self.shell.run_cell_magic( magic_name="timeit", line="-q -o import subprocess", cell=stmt) output = str(output) # convert TimeitResult object to human readable string else: - output = subprocess.check_output( - [self.out], stderr=subprocess.STDOUT) + run_args = [] + if profile: + run_args.extend([profiler] + profiler_args) + run_args.append(self.out) + output = subprocess.check_output(run_args, stderr=subprocess.STDOUT) output = output.decode('utf8') helper.print_out(output) @@ -72,7 +76,7 @@ class NVCCPluginV2(Magics): if args.compile: try: self.compile(self.output_dir, file_path, self.out) - output = self.run(timeit=args.timeit) + output = self.run(timeit=args.timeit, profile=args.profile, profiler_args=args.profiler_args) except subprocess.CalledProcessError as e: helper.print_out(e.output.decode("utf8")) output = None @@ -95,7 +99,7 @@ class NVCCPluginV2(Magics): for x in cuda_src if x[-3:] == '.cu'] print(f'found sources: {cuda_src}') self.compile(self.output_dir, ' '.join(cuda_src), self.out) - output = self.run(timeit=args.timeit) + output = self.run(timeit=args.timeit, profile=args.profile, profiler_args=args.profiler_args) except subprocess.CalledProcessError as e: helper.print_out(e.output.decode("utf8")) output = None