Merge branch 'master' into master

axsaucedo · web-flow · commit 70b53097643a · 2026-03-08T11:24:45.000+01:00
diff --git a/examples/vulkan_ext_printf/CMakeLists.txt b/examples/vulkan_ext_printf/CMakeLists.txt
@@ -0,0 +1,34 @@
+cmake_minimum_required(VERSION 3.20)
+project(kompute_vulkan_extensions_printf)
+
+set(CMAKE_CXX_STANDARD 14)
+
+# Options
+option(KOMPUTE_OPT_GIT_TAG "The tag of the repo to use for the example" v0.9.0)
+option(KOMPUTE_OPT_FROM_SOURCE "Whether to build example from source or from git fetch repo" ON)
+
+if(KOMPUTE_OPT_FROM_SOURCE)
+    add_subdirectory(../../ ${CMAKE_CURRENT_BINARY_DIR}/kompute_build)
+else()
+    include(FetchContent)
+    FetchContent_Declare(kompute GIT_REPOSITORY https://github.com/KomputeProject/kompute.git
+        GIT_TAG ${KOMPUTE_OPT_GIT_TAG})
+    FetchContent_MakeAvailable(kompute)
+    include_directories(${kompute_SOURCE_DIR}/src/include)
+endif()
+
+# Compiling shader
+# To add more shaders simply copy the vulkan_compile_shader command and replace it with your new shader
+vulkan_compile_shader(
+    INFILE shader/example_shader.comp
+    OUTFILE shader/example_shader.hpp
+    NAMESPACE "shader")
+
+# Then add it to the library, so you can access it later in your code
+add_library(shader INTERFACE "shader/example_shader.hpp")
+target_include_directories(shader INTERFACE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>)
+
+# Setting up main example code
+add_executable(example_shader_printf src/main.cpp)
+target_link_libraries(example_shader_printf PRIVATE shader kompute::kompute)
+
diff --git a/examples/vulkan_ext_printf/README.md b/examples/vulkan_ext_printf/README.md
@@ -0,0 +1,53 @@
+# Kompute Debugging GLSL Shader using DebugPrint
+
+This folder contains an end to end Kompute Example that implements a GLSL with a debug print statement.
+This example is structured such that you will be able to extend it for your project.
+It contains a CMake build configuration that can be used in your production applications.
+
+## Further information
+Debugging Vulkan shaders, especially compute shaders, can be very difficult to do even with the aid
+of a powerful debugging tool like RenderDoc. Debug Printf is a recent Vulkan feature that allows
+developers to debug their shaders by inserting Debug Print statements. This debug print statement operates 
+quite like the C printf statement, only that it is executed in multiple compute cores at the same time, so it needs some logic to allow semantic usage of the debug statement otherwise you can easily be overwelmed by debug messages. 
+<br>
+
+This feature is now supported within RenderDoc in a way that allows for per-invocation inspection of values in a shader.
+This article describes how to instrument your GLSL or HLSL shaders with Debug Printf and how to
+inspect and debug with them in the terminal, using vkconfig, or with environment variables.
+
+For a full walkthrough of the process please see my article at https://medium.com/@evanokeeffe/using-debug-printf-in-vulkan-kompute-shaders-2aaf30bdb96c
+
+## Building the example
+
+You will notice that it's a standalone project, so you can re-use it for your application.
+It uses CMake's [`fetch_content`](https://cmake.org/cmake/help/latest/module/FetchContent.html) to consume Kompute as a dependency.
+To build you just need to run the CMake command in this folder as follows:
+
+```bash
+git clone https://github.com/KomputeProject/kompute.git
+cd kompute/examples/vulkan_ext_printf
+mkdir build
+cd build
+cmake ..
+cmake --build .
+```
+
+## Executing
+
+From inside the `build/` directory run:
+
+### Linux
+
+```bash
+./example_shader_printf
+```
+
+## Pre-requisites
+
+In order to run this example, you will need the following dependencies:
+
+* REQUIRED
+    + The Vulkan SDK must be installed
+
+For the Vulkan SDK, the simplest way to install it is through [their website](https://vulkan.lunarg.com/sdk/home). You just have to follow the instructions for the relevant platform.
+
diff --git a/examples/vulkan_ext_printf/shader/example_shader.comp b/examples/vulkan_ext_printf/shader/example_shader.comp
@@ -0,0 +1,23 @@
+#version 450
+// To use Debug Printf in GLSL shaders, you need to enable the GL_EXT_debug_printf extension.
+// Then add debugPrintfEXT calls at the locations in your shader where you want to print
+// messages and/or values
+#extension GL_EXT_debug_printf : enable
+
+// The execution structure
+layout (local_size_x = 1) in;
+
+// The buffers are provided via the tensors
+layout(binding = 0) buffer bufA { float a[]; };
+layout(binding = 1) buffer bufB { float b[]; };
+layout(binding = 2) buffer bufOut { float o[]; };
+
+void main() {
+    uint index = gl_GlobalInvocationID.x;
+    o[index] = a[index] * b[index];
+
+    // the debug statement operates much the same as printf in C
+    // do be wary of the size of each line as the default debug message size is
+    // 1024 bytes
+    debugPrintfEXT("The result of %f x %f = %f  \n", a[index], b[index], o[index]);
+}
diff --git a/examples/vulkan_ext_printf/src/main.cpp b/examples/vulkan_ext_printf/src/main.cpp
@@ -0,0 +1,76 @@
+#include <iostream>
+#include <memory>
+#include <vector>
+#include <string>
+#include <random>
+#include <bits/stdc++.h>
+#include <functional> // std::multiplies
+#include <algorithm> // std::transform
+
+#include <kompute/Kompute.hpp>
+#include <shader/example_shader.hpp>
+
+std::vector<float> generate_random_floats(int n, float min_val, float max_val) {
+  // create std::vector and pre-allocate the vector with n floats
+  std::vector<float> vectorF = std::vector<float>(n);
+  // 1. Obtain a random seed from the hardware
+  std::random_device rd;
+  // 2. Initialize the generator with the seed
+  std::mt19937 gen(rd()); 
+  // 3. Define the distribution range [0, n)
+  std::uniform_real_distribution<float> dis(min_val, max_val); 
+  //fill the vector with randomly distributed floats
+  for (int i = 0; i < n; i++) {vectorF[i] = dis(gen);}
+  //return the filled vector
+  return vectorF;
+}
+
+int main(int argc, char *argv[])
+{
+    int device_id = 0;
+
+    if(argc>1){
+      device_id = atoi(argv[1]);
+    }else{
+      std::cout<<"Using device 0"<<std::endl;
+    }
+
+    // make sure to add the extension, check vulkan_info to see if your GPU vulkan driver supports the extension
+    // vulkaninfo | grep VK_KHR_shader_non_semantic_info
+    const std::vector<std::string> desiredExtensions = std::vector<std::string>({
+      "VK_KHR_shader_non_semantic_info",
+    });
+    const std::vector<uint32_t> familyQueueIndices = std::vector<uint32_t>({});
+
+    kp::Manager mgr(device_id, familyQueueIndices, desiredExtensions);
+
+    int vector_length = 10;
+
+    const std::vector<float> A = generate_random_floats(vector_length, 1.0, 10.0);
+    const std::vector<float> B = generate_random_floats(vector_length, 1.0, 10.0);
+    const std::vector<float> C = generate_random_floats(vector_length, 0.0, 0.0);
+
+    std::shared_ptr<kp::TensorT<float>> tensorInA = mgr.tensorT<float>(A);
+    std::shared_ptr<kp::TensorT<float>> tensorInB = mgr.tensorT<float>(B);
+    std::shared_ptr<kp::TensorT<float>> tensorOut = mgr.tensorT<float>(C);
+
+    const std::vector<std::shared_ptr<kp::Memory>> params = { tensorInA,
+                                                              tensorInB,
+                                                              tensorOut };
+
+    kp::Workgroup workgroup = { vector_length, 1, 1 };
+
+    const std::vector<uint32_t> shader = std::vector<uint32_t>(
+      shader::EXAMPLE_SHADER_COMP_SPV.begin(), shader::EXAMPLE_SHADER_COMP_SPV.end());
+    std::shared_ptr<kp::Algorithm> algo = mgr.algorithm(params, shader, workgroup);
+
+    mgr.sequence()
+      ->record<kp::OpSyncDevice>(params)
+      ->record<kp::OpAlgoDispatch>(algo)
+      ->record<kp::OpSyncLocal>(params)
+      ->eval();
+    
+    std::cout << "Output: {  ";
+    for (const float& elem : tensorOut->vector()) { std::cout << elem << "  ";}
+    std::cout << "}" << std::endl;
+}
diff --git a/src/Manager.cpp b/src/Manager.cpp
@@ -398,6 +398,22 @@ Manager::createDevice(const std::vector<uint32_t>& familyQueueIndices,
 
         this->mComputeQueueFamilyIndices.push_back(computeQueueFamilyIndex);
     } else {
+        std::vector<vk::QueueFamilyProperties> allQueueFamilyProperties =
+          physicalDevice.getQueueFamilyProperties();
+        for (auto queueIndexGiven : familyQueueIndices) {
+            if (queueIndexGiven >= allQueueFamilyProperties.size()) {
+                throw std::runtime_error(
+                  "Given family queue index does not exists. Index given: " +
+                  std::to_string(queueIndexGiven));
+            }
+            if (!(allQueueFamilyProperties[queueIndexGiven].queueFlags &
+                  vk::QueueFlagBits::eCompute)) {
+                throw std::runtime_error(
+                  "Given family queue index does not support compute "
+                  "operations. Index given: " +
+                  std::to_string(queueIndexGiven));
+            }
+        }
         this->mComputeQueueFamilyIndices = familyQueueIndices;
     }
 
diff --git a/src/logger/CMakeLists.txt b/src/logger/CMakeLists.txt
@@ -56,7 +56,6 @@ target_compile_definitions(kp_logger INTERFACE KOMPUTE_OPT_ACTIVE_LOG_LEVEL=KOMP
 if(NOT KOMPUTE_OPT_LOG_LEVEL_DISABLED)
     if(KOMPUTE_OPT_USE_SPDLOG)
         target_link_libraries(kp_logger PUBLIC spdlog::spdlog)
-        target_compile_definitions(spdlog INTERFACE SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_${KOMPUTE_OPT_LOG_LEVEL})
         target_compile_definitions(kp_logger INTERFACE SPDLOG_ACTIVE_LEVEL=SPDLOG_LEVEL_${KOMPUTE_OPT_LOG_LEVEL})
         message(STATUS "setting SPDLOG_ACTIVE_LEVEL to SPDLOG_LEVEL_${KOMPUTE_OPT_LOG_LEVEL}")
 
diff --git a/test/TestAsyncOperations.cpp b/test/TestAsyncOperations.cpp
@@ -8,14 +8,32 @@
 #include "kompute/logger/Logger.hpp"
 #include "shaders/Utils.hpp"
 
+namespace {
+std::vector<uint32_t>
+distinctFamilyQueueIndices(const vk::PhysicalDevice& device)
+{
+    const std::vector<vk::QueueFamilyProperties> allQueueFamilyProperties =
+      device.getQueueFamilyProperties();
+    std::vector<uint32_t> distinctQueuesIndices;
+
+    for (uint32_t i = 0; i < allQueueFamilyProperties.size(); i++) {
+        if (allQueueFamilyProperties[i].queueFlags &
+            (vk::QueueFlagBits::eCompute)) {
+            distinctQueuesIndices.push_back(i);
+        }
+    }
+    return distinctQueuesIndices;
+}
+}
+
 TEST(TestAsyncOperations, TestManagerParallelExecution)
 {
-    // This test is built for NVIDIA 1650. It assumes:
-    // * Queue family 0 and 2 have compute capabilities
+    // This test assumes:
+    // * There are at least 2 different Queue families with compute capabilities
     // * GPU is able to process parallel shader code across different families
-    uint32_t size = 10;
+    constexpr uint32_t size = 10;
 
-    uint32_t numParallel = 2;
+    constexpr uint32_t numParallel = 2;
 
     std::string shader(R"(
         #version 450
@@ -79,7 +97,18 @@ TEST(TestAsyncOperations, TestManagerParallelExecution)
         EXPECT_EQ(inputsSyncB[i]->vector<float>(), resultSync);
     }
 
-    kp::Manager mgrAsync(0, { 0, 2 });
+    constexpr uint32_t deviceId =
+      0u; // device 0 exists, because "mgr" could be created already
+    auto queues = distinctFamilyQueueIndices(
+      mgr.getVkInstance()->enumeratePhysicalDevices().at(deviceId));
+    if (queues.size() < numParallel) {
+        GTEST_SKIP() << "GPU does not support multiple compute queues. Only "
+                     << queues.size() << " are supported. Skipping test.";
+    }
+
+    queues.resize(numParallel);
+
+    kp::Manager mgrAsync(deviceId, std::move(queues));
 
     std::vector<std::shared_ptr<kp::Memory>> inputsAsyncB;
 
@@ -118,7 +147,9 @@ TEST(TestAsyncOperations, TestManagerParallelExecution)
     }
 
     // The speedup should be at least 40%
-    EXPECT_LT(durationAsync, durationSync * 0.6);
+    EXPECT_LT(durationAsync, durationSync * 0.6)
+      << "There was no speedup in using multiple queues from different "
+         "QueueFamilies. Maybe your GPU does not support parallel execution.";
 }
 
 TEST(TestAsyncOperations, TestManagerAsyncExecution)