|
| 1 | +#include <iostream> |
| 2 | +#include <memory> |
| 3 | +#include <vector> |
| 4 | +#include <string> |
| 5 | +#include <random> |
| 6 | +#include <bits/stdc++.h> |
| 7 | +#include <functional> // std::multiplies |
| 8 | +#include <algorithm> // std::transform |
| 9 | + |
| 10 | +#include <kompute/Kompute.hpp> |
| 11 | +#include <shader/example_shader.hpp> |
| 12 | + |
| 13 | +std::vector<float> generate_random_floats(int n, float min_val, float max_val) { |
| 14 | + // create std::vector and pre-allocate the vector with n floats |
| 15 | + std::vector<float> vectorF = std::vector<float>(n); |
| 16 | + // 1. Obtain a random seed from the hardware |
| 17 | + std::random_device rd; |
| 18 | + // 2. Initialize the generator with the seed |
| 19 | + std::mt19937 gen(rd()); |
| 20 | + // 3. Define the distribution range [0, n) |
| 21 | + std::uniform_real_distribution<float> dis(min_val, max_val); |
| 22 | + //fill the vector with randomly distributed floats |
| 23 | + for (int i = 0; i < n; i++) {vectorF[i] = dis(gen);} |
| 24 | + //return the filled vector |
| 25 | + return vectorF; |
| 26 | +} |
| 27 | + |
| 28 | +int main(int argc, char *argv[]) |
| 29 | +{ |
| 30 | + int device_id = 0; |
| 31 | + |
| 32 | + if(argc>1){ |
| 33 | + device_id = atoi(argv[1]); |
| 34 | + }else{ |
| 35 | + std::cout<<"Using device 0"<<std::endl; |
| 36 | + } |
| 37 | + |
| 38 | + // make sure to add the extension, check vulkan_info to see if your GPU vulkan driver supports the extension |
| 39 | + // vulkaninfo | grep VK_KHR_shader_non_semantic_info |
| 40 | + const std::vector<std::string> desiredExtensions = std::vector<std::string>({ |
| 41 | + "VK_KHR_shader_non_semantic_info", |
| 42 | + }); |
| 43 | + const std::vector<uint32_t> familyQueueIndices = std::vector<uint32_t>({}); |
| 44 | + |
| 45 | + kp::Manager mgr(device_id, familyQueueIndices, desiredExtensions); |
| 46 | + |
| 47 | + int vector_length = 10; |
| 48 | + |
| 49 | + const std::vector<float> A = generate_random_floats(vector_length, 1.0, 10.0); |
| 50 | + const std::vector<float> B = generate_random_floats(vector_length, 1.0, 10.0); |
| 51 | + const std::vector<float> C = generate_random_floats(vector_length, 0.0, 0.0); |
| 52 | + |
| 53 | + std::shared_ptr<kp::TensorT<float>> tensorInA = mgr.tensorT<float>(A); |
| 54 | + std::shared_ptr<kp::TensorT<float>> tensorInB = mgr.tensorT<float>(B); |
| 55 | + std::shared_ptr<kp::TensorT<float>> tensorOut = mgr.tensorT<float>(C); |
| 56 | + |
| 57 | + const std::vector<std::shared_ptr<kp::Memory>> params = { tensorInA, |
| 58 | + tensorInB, |
| 59 | + tensorOut }; |
| 60 | + |
| 61 | + kp::Workgroup workgroup = { vector_length, 1, 1 }; |
| 62 | + |
| 63 | + const std::vector<uint32_t> shader = std::vector<uint32_t>( |
| 64 | + shader::EXAMPLE_SHADER_COMP_SPV.begin(), shader::EXAMPLE_SHADER_COMP_SPV.end()); |
| 65 | + std::shared_ptr<kp::Algorithm> algo = mgr.algorithm(params, shader, workgroup); |
| 66 | + |
| 67 | + mgr.sequence() |
| 68 | + ->record<kp::OpSyncDevice>(params) |
| 69 | + ->record<kp::OpAlgoDispatch>(algo) |
| 70 | + ->record<kp::OpSyncLocal>(params) |
| 71 | + ->eval(); |
| 72 | + |
| 73 | + std::cout << "Output: { "; |
| 74 | + for (const float& elem : tensorOut->vector()) { std::cout << elem << " ";} |
| 75 | + std::cout << "}" << std::endl; |
| 76 | +} |
0 commit comments