{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "4_qHS_1yjcrK" }, "source": [ "# Compiling CUDA with OpenCV\n", "In this notebook we will show how to compile CUDA C++ code with the OpenCV library, which we will use to load an image and save it back to disk after applying a blur kernel.\n" ] }, { "cell_type": "markdown", "metadata": { "id": "2XyG-R3nk1p6" }, "source": [ "## Setup" ] }, { "cell_type": "markdown", "metadata": { "id": "J2iU1zyrK4FF" }, "source": [ "Install and load the nvcc4jupyter extension, which will allow us to run CUDA C++ code using cell magics. See [the documentation](https://nvcc4jupyter.readthedocs.io/en/latest/usage.html) for details." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "K1pfzEEsjlFD", "outputId": "f14a4888-9215-4280-e723-1a3e1477135d" }, "outputs": [], "source": [ "!pip install nvcc4jupyter" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "w-4m2tFtlt_M", "outputId": "9118eb6e-63ee-426f-a618-fb35d9750b81" }, "outputs": [], "source": [ "%load_ext nvcc4jupyter" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Make sure OpenCV is installed." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%capture\n", "!apt update && apt install -y libopencv-dev" ] }, { "cell_type": "markdown", "metadata": { "id": "hBZR7kZkk4UV" }, "source": [ "## Code" ] }, { "cell_type": "markdown", "metadata": { "id": "JppQ-78qLYjP" }, "source": [ "### Imports" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "vYyG7Gd0vJJ0" }, "outputs": [], "source": [ "import os\n", "import subprocess\n", "from pathlib import Path\n", "from IPython.display import Image" ] }, { "cell_type": "markdown", "metadata": { "id": "Zhs1emaLLemz" }, "source": [ "### Download an image" ] }, { "cell_type": "markdown", "metadata": { "id": "1vyu_yKStyKG" }, "source": [ "Get a random image of IMG_SIZE x IMG_SIZE pixels." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 417 }, "id": "T3yN-g8dvZ4P", "outputId": "d5b8602c-6a1a-4de0-9fbf-8b72ef9f6071" }, "outputs": [], "source": [ "IMG_SIZE = 400\n", "os.environ[\"IMG_SIZE\"] = str(IMG_SIZE)\n", "IMG_FNAME = \"image.jpg\"\n", "os.environ[\"IMG_FNAME\"] = IMG_FNAME\n", "\n", "!wget -O $IMG_FNAME https://picsum.photos/$IMG_SIZE.jpg &> /dev/null\n", "Image(filename=IMG_FNAME)" ] }, { "cell_type": "markdown", "metadata": { "id": "BeOimvKtNluI" }, "source": [ "### Compiler arguments\n", "These are options that you need to pass to the compiler in order to link with the OpenCV library." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# make sure the file containing the necessary compilation flags for the opencv library\n", "# exists in a directory in the PKG_CONFIG_PATH environment variable\n", "for path in Path('/usr/lib').rglob('opencv4.pc'):\n", " os.environ[\"PKG_CONFIG_PATH\"] = os.path.dirname(str(path))\n", " break\n", " \n", "# get the compilation flags required to compile our CUDA C++ code with opencv\n", "COMPILER_ARGS = subprocess.check_output([\"pkg-config\", \"--cflags\", \"--libs\", \"opencv4\"]).decode().strip()\n", "COMPILER_ARGS = f\"-I/usr/include/opencv4 {COMPILER_ARGS}\"\n", "print(COMPILER_ARGS)" ] }, { "cell_type": "markdown", "metadata": { "id": "SAQ1x3_JNvhT" }, "source": [ "### Blurring the image" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%cuda_group_save -n \"error_handling.h\" -g \"shared\"\n", "\n", "// error checking macro\n", "#define cudaCheckErrors(msg) \\\n", " do { \\\n", " cudaError_t __err = cudaGetLastError(); \\\n", " if (__err != cudaSuccess) { \\\n", " fprintf(stderr, \"Fatal error: %s (%s at %s:%d)\\n\", \\\n", " msg, cudaGetErrorString(__err), \\\n", " __FILE__, __LINE__); \\\n", " fprintf(stderr, \"*** FAILED - ABORTING\\n\"); \\\n", " exit(1); \\\n", " } \\\n", " } while (0)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%cuda_group_save -n \"blur_kernel.h\" -g \"shared\"\n", "\n", "#define BLUR_SIZE 5\n", "\n", "// kernel taken from https://stackoverflow.com/a/65973288\n", "__global__ void blurKernel(\n", " unsigned char* in,\n", " unsigned char* out,\n", " int width,\n", " int height,\n", " int num_channel,\n", " int channel\n", ") {\n", "\n", " int col = blockIdx.x * blockDim.x + threadIdx.x;\n", " int row = blockIdx.y * blockDim.y + threadIdx.y;\n", "\n", " if(col < width && row < height) {\n", " int pixVal = 0;\n", " int pixels = 0;\n", " for(int blurRow = -BLUR_SIZE; blurRow < BLUR_SIZE + 1; ++blurRow) {\n", " for(int blurCol = -BLUR_SIZE; blurCol < BLUR_SIZE + 1; ++blurCol) {\n", " int curRow = row + blurRow;\n", " int curCol = col + blurCol;\n", " if(curRow > -1 && curRow < height && curCol > -1 && curCol < width) {\n", " pixVal += in[curRow * width * num_channel + curCol * num_channel + channel];\n", " pixels++;\n", " }\n", " }\n", " }\n", " out[row * width * num_channel + col * num_channel + channel] = (unsigned char)(pixVal/pixels);\n", " }\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "hhkNS3HJlFlC", "outputId": "bb0b3fad-f132-4a26-e247-32bf5e29c1a4" }, "outputs": [], "source": [ "%%cuda --compiler-args \"$COMPILER_ARGS\"\n", "\n", "#include \n", "\n", "#include \n", "#include \n", "\n", "#include \"error_handling.h\"\n", "#include \"blur_kernel.h\"\n", "\n", "#define R 0\n", "#define G 1\n", "#define B 2\n", "\n", "int main()\n", "{\n", " std::string image_path = cv::samples::findFile(\"image.jpg\");\n", " cv::Mat img = imread(image_path, cv::IMREAD_COLOR);\n", " if(img.empty())\n", " {\n", " std::cerr << \"Could not read the image: \" << image_path << std::endl;\n", " return 1;\n", " }\n", "\n", " // image shape\n", " int width = img.cols;\n", " int height = img.rows;\n", " int n_channels = 3; // hard-coded RGB processing\n", "\n", " // we will read and write directly into the data array of the OpenCV Matrix\n", " unsigned char *host_image_input = img.data;\n", " unsigned char *host_image_output = img.data;\n", "\n", " // allocate memory for device arrays\n", " unsigned char* dev_image_input = NULL;\n", " unsigned char* dev_image_output = NULL;\n", " cudaMalloc(\n", " (void**)&dev_image_input,\n", " sizeof(unsigned char) * height * width * n_channels\n", " );\n", " cudaMalloc(\n", " (void**)&dev_image_output,\n", " sizeof(unsigned char) * height * width * n_channels\n", " );\n", " cudaCheckErrors(\"cudaMalloc failure\");\n", "\n", " // transfer data from host to device for processing on GPU\n", " cudaMemcpy(\n", " dev_image_input,\n", " host_image_input,\n", " sizeof(unsigned char) * height * width * n_channels,\n", " cudaMemcpyHostToDevice\n", " );\n", " cudaCheckErrors(\"cudaMemcpy H2D failure\");\n", "\n", " // run a blur kernel on each channel\n", " dim3 blockSize(16, 16, 1);\n", " dim3 gridSize(width/blockSize.x, height/blockSize.y, 1);\n", " blurKernel<<>>(dev_image_input, dev_image_output, width, height, n_channels, R);\n", " blurKernel<<>>(dev_image_input, dev_image_output, width, height, n_channels, G);\n", " blurKernel<<>>(dev_image_input, dev_image_output, width, height, n_channels, B);\n", "\n", " cudaDeviceSynchronize();\n", " cudaCheckErrors(\"kernel failure\");\n", "\n", " // copy results back to host\n", " cudaMemcpy(\n", " host_image_output,\n", " dev_image_output,\n", " sizeof(unsigned char) * height * width * n_channels,\n", " cudaMemcpyDeviceToHost\n", " );\n", " cudaCheckErrors(\"cudaMemcpy D2H failure\");\n", "\n", " cudaFree(dev_image_input);\n", " cudaFree(dev_image_output);\n", "\n", " // save the blurred image to disk\n", " cv::imwrite(\"image_blurred.jpg\", img);\n", " return 0;\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 417 }, "id": "Bn9X_cm4rALH", "outputId": "ead0c3b5-013b-45a3-f91a-7866d33b2404" }, "outputs": [], "source": [ "Image(filename=\"image_blurred.jpg\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "accelerator": "GPU", "colab": { "gpuType": "T4", "provenance": [], "toc_visible": true }, "kaggle": { "accelerator": "nvidiaTeslaT4", "dataSources": [], "dockerImageVersionId": 30716, "isGpuEnabled": true, "isInternetEnabled": true, "language": "python", "sourceType": "notebook" }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.13" } }, "nbformat": 4, "nbformat_minor": 4 }