From ab7c9634145586b363b5633a841f88ce9d2993b2 Mon Sep 17 00:00:00 2001 From: Alexander Schwankner Date: Wed, 22 Apr 2026 10:23:45 +0200 Subject: [PATCH 01/18] feat: add nvidia-tegra-nvgpu package for Jetson Orin NX (GA10B) Adds OE4T-patched GPU driver stack for NVIDIA Jetson Orin NX (Tegra234 / GA10B): - OE4T host1x + host1x-fence: GA10B syncpoint support with ERRATA_SYNCPT_INVALID_ID_0 fix - nvmap, mc-utils, governor_pod_scaling: standard Tegra support modules - nvhost-ctrl-shim: /dev/nvhost-ctrl userspace interface for JetPack 6 CUDA runtime - nvgpu: main GA10B GPU driver (OE4T patches, Clang build, kernel 6.18 compat) The nvhost-ctrl-shim provides hardware syncpoint interrupt support for cudaStreamSynchronize via NVHOST_IOCTL_CTRL_SYNC_FENCE_CREATE + SYNC_FILE_EXTRACT, enabling full CUDA throughput instead of CPU semaphore polling. Built with Clang (LLVM=1), requires OE4T linux-nv-oot (wip-r36.5-take-2) for kernel 6.18 compatibility. CONFIG_TEGRA_GK20A_NVHOST=y uses OE4T host1x with HOST1X_SYNCPT_GPU support. Tested: ~60 tok/s qwen2.5:0.5b on Jetson Orin NX 16GB with Talos Linux v1.13. Continues: #1166 Signed-off-by: Alexander Schwankner --- Pkgfile | 11 + nvidia-tegra-nvgpu/nvhost_ctrl_shim.c | 717 ++++++++++++++++++++++++++ nvidia-tegra-nvgpu/pkg.yaml | 390 ++++++++++++++ 3 files changed, 1118 insertions(+) create mode 100644 nvidia-tegra-nvgpu/nvhost_ctrl_shim.c create mode 100644 nvidia-tegra-nvgpu/pkg.yaml diff --git a/Pkgfile b/Pkgfile index c85d88e26..7b08ebaa5 100644 --- a/Pkgfile +++ b/Pkgfile @@ -288,5 +288,16 @@ vars: gdrcopy_version: v2.5.2 gdrcopy_sha256: 32bc7b2c198dd97ec251de0ff4823252c95e31a4c79a5f843c82514c9af2052b gdrcopy_sha512: c717f118eff8cd5a8dc35613c3881818f8b71dc493461dd0151ce7c882f8e2c2d852e22733fab4e2bec57219e10eec874c11b4fad90dd4815ae572840ed19d28 + + # OE4T (NVIDIA Tegra) kernel modules for Jetson Orin NX (Tegra234 / GA10B) + oe4t_nvgpu_commit: d530a48d64f9ad3020d9f3307f53e8dde8e3fba1 + oe4t_nvgpu_sha256: adc5864edf76d986866e386803a9e628ee229e69ea34867b92b978a0b44f3d54 + oe4t_nvgpu_sha512: a7c7f0b5d3174bf41abc77c77009f46182358f93936aedbe4993e63ff7fc94e21bfd83c3fa0b41af5836866b9c200427504d8f26685d567c11722e7a7bfd3ed9 + oe4t_nv_oot_commit: ccf7646c57462776fe1093af6643c54653f59861 + oe4t_nv_oot_sha256: d1957d2c4908a37b4c040aef3a7413a9fbb3adcc8575cc9347c58af1c8b95169 + oe4t_nv_oot_sha512: 66b8b4a0672a2c044d42e58914a6d0000fc3a01f1343fa39190794e5badbcb52cd3c2254bd3ca2b6be86286dadbbe60db5cb26c74264747b783937443e287a33 + oe4t_hwpm_commit: 4d8a6998760d85f98637dbf61597bfbb88158206 + oe4t_hwpm_sha256: 96c7656bdad0bf330e7fd58981b8a4eec4717a76840cefbe84e720d88b46be55 + oe4t_hwpm_sha512: 971b91fcae284c59dbe411356109bce9b1a7884b8fac41c9683c79bf3eddef606e71ebaa9c06ad2389b2ba382c3c1125fabe0cbaeb5edac857e218077ed24ef9 labels: org.opencontainers.image.source: https://github.com/siderolabs/pkgs diff --git a/nvidia-tegra-nvgpu/nvhost_ctrl_shim.c b/nvidia-tegra-nvgpu/nvhost_ctrl_shim.c new file mode 100644 index 000000000..839c2b046 --- /dev/null +++ b/nvidia-tegra-nvgpu/nvhost_ctrl_shim.c @@ -0,0 +1,717 @@ +// SPDX-License-Identifier: GPL-2.0-only +// +// nvhost_ctrl_shim.c — nvhost-ctrl userspace API shim for Talos Linux / Jetson Orin NX +// +// Per-ioctl trace logging uses pr_debug — enable at runtime with: +// echo "file nvhost_ctrl_shim.c +p" > /sys/kernel/debug/dynamic_debug/control +// +// Provides /dev/nvhost-ctrl with the NVHOST_IOCTL_CTRL_* interface, +// bridging to the OOT host1x syncpoint kernel API. +// +// This allows libnvrm_host1x.so (JetPack 6 CUDA runtime) to use hardware +// syncpoint interrupts for cudaStreamSynchronize — replacing the CPU semaphore +// busy-wait with interrupt-driven sync. +// +// Symbol dependencies (all from host1x.ko): +// host1x_syncpt_get_by_id_noref, host1x_syncpt_read, host1x_syncpt_read_max, +// host1x_fence_create, host1x_fence_extract +// +// Supported ioctls (from linux-nv-oot/include/uapi/linux/nvhost_ioctl.h): +// NVHOST_IOCTL_CTRL_GET_VERSION (7) → return 1 +// NVHOST_IOCTL_CTRL_SYNCPT_READ (1) → host1x_syncpt_read() +// NVHOST_IOCTL_CTRL_SYNCPT_READ_MAX (8) → host1x_syncpt_read_max() +// NVHOST_IOCTL_CTRL_SYNCPT_WAITMEX (9) → dma_fence_wait_timeout() [interrupt-driven] +// NVHOST_IOCTL_CTRL_SYNC_FENCE_CREATE (11) → host1x_fence_create() → sync_file fd +// NVHOST_IOCTL_CTRL_GET_CHARACTERISTICS (14) → return Orin hw syncpt info +// NVHOST_IOCTL_CTRL_POLL_FD_CREATE (16) → anon_inode fd for syncpt event polling +// NVHOST_IOCTL_CTRL_SYNC_FILE_EXTRACT (19) → sync_file fd → host1x_fence_extract() +// +// Targets kernel 6.18 (Talos v1.12.6): +// - class_create() without THIS_MODULE (kernel 6.4+) +// - devnode() callback with const struct device * (kernel 6.2+) +// - close_fd() (kernel 5.11+, replaces __close_fd) +// +// CUDA 12.6 (JetPack 6) call sequence: +// 1. open(/dev/nvhost-ctrl) +// 2. GET_CHARACTERISTICS (nr=14): discover num_syncpts=704 etc. +// 3. SYNCPT_WAITMEX (nr=9): blocking wait for syncpt id/thresh → interrupt-driven +// 4. POLL_FD_CREATE (nr=16): once at GPU scaling init — creates anonymous poll fd +// Note: SYNC_FENCE_CREATE (nr=11) is NOT called by CUDA 12.6 directly but kept +// for other potential callers (e.g. media codecs, test tools). + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// ── NVHOST uapi structs (from linux-nv-oot/include/uapi/linux/nvhost_ioctl.h) ── +// Embedded directly to avoid uapi header path issues in OOT build. + +#define NVHOST_IOCTL_MAGIC 'H' + +// nr=7: GET_VERSION +struct nvhost_get_param_args { + __u32 value; +} __packed; + +// nr=1,8: SYNCPT_READ / SYNCPT_READ_MAX +struct nvhost_ctrl_syncpt_read_args { + __u32 id; + __u32 value; +}; + +// nr=9: SYNCPT_WAITMEX — blocking wait until syncpt.value >= thresh +struct nvhost_ctrl_syncpt_waitmex_args { + __u32 id; /* syncpoint id (in) */ + __u32 thresh; /* wait until value >= thresh (in) */ + __s32 timeout; /* timeout in ms; -1 = wait forever (in) */ + __u32 value; /* syncpt value after wait (out) */ + __u32 tv_sec; /* timestamp seconds (out) */ + __u32 tv_nsec; /* timestamp nanoseconds (out) */ + __u32 clock_id; /* clock selector (in, ignored) */ + __u32 reserved; +}; + +// nr=10,11: SYNC_FENCE_CREATE (nr=10 is 32-bit compat, nr=11 is 64-bit) +struct nvhost_ctrl_sync_fence_info { + __u32 id; + __u32 thresh; +}; + +struct nvhost_ctrl_sync_fence_create_args { + __u32 num_pts; + __s32 fence_fd; + __u64 pts; /* struct nvhost_ctrl_sync_fence_info __user * */ + __u64 name; /* const char __user * — ignored, fences are anonymous */ +}; + +// nr=14: GET_CHARACTERISTICS — host1x capability discovery +struct nvhost_characteristics { +#define NVHOST_CHARACTERISTICS_GFILTER (1 << 0) +#define NVHOST_CHARACTERISTICS_RESOURCE_PER_CHANNEL_INSTANCE (1 << 1) +#define NVHOST_CHARACTERISTICS_SUPPORT_PREFENCES (1 << 2) + __u64 flags; + __u32 num_mlocks; + __u32 num_syncpts; + __u32 syncpts_base; + __u32 syncpts_limit; + __u32 num_hw_pts; + __u32 padding; +}; + +struct nvhost_ctrl_get_characteristics { + __u64 nvhost_characteristics_buf_size; + __u64 nvhost_characteristics_buf_addr; +}; + +// nr=16: POLL_FD_CREATE — creates an anonymous fd for syncpoint event polling. +// Called once by gk20a_scale_init during GPU frequency-scaling setup. +// The fd is used with poll()/epoll() to wait for syncpoint threshold events. +// Our implementation returns a real anonymous inode fd so callers get a valid +// file descriptor without ENOTTY; the fd is pollable (returns POLLHUP on close). +struct nvhost_ctrl_poll_fd_create_args { + __s32 fd; /* out: anonymous poll fd */ + __u32 padding; +}; + +// nr=19: SYNC_FILE_EXTRACT +struct nvhost_ctrl_sync_file_extract { + __s32 fd; + __u32 num_fences; + __u64 fences_ptr; /* struct nvhost_ctrl_sync_fence_info __user * */ +}; + +// ── Ioctl definitions ───────────────────────────────────────────────────────── + +#define NVHOST_IOCTL_CTRL_SYNCPT_READ \ + _IOWR(NVHOST_IOCTL_MAGIC, 1, struct nvhost_ctrl_syncpt_read_args) +#define NVHOST_IOCTL_CTRL_GET_VERSION \ + _IOR(NVHOST_IOCTL_MAGIC, 7, struct nvhost_get_param_args) +#define NVHOST_IOCTL_CTRL_SYNCPT_READ_MAX \ + _IOWR(NVHOST_IOCTL_MAGIC, 8, struct nvhost_ctrl_syncpt_read_args) +#define NVHOST_IOCTL_CTRL_SYNCPT_WAITMEX \ + _IOWR(NVHOST_IOCTL_MAGIC, 9, struct nvhost_ctrl_syncpt_waitmex_args) +#define NVHOST_IOCTL_CTRL_SYNC_FENCE_CREATE \ + _IOWR(NVHOST_IOCTL_MAGIC, 11, struct nvhost_ctrl_sync_fence_create_args) +#define NVHOST_IOCTL_CTRL_GET_CHARACTERISTICS \ + _IOWR(NVHOST_IOCTL_MAGIC, 14, struct nvhost_ctrl_get_characteristics) +#define NVHOST_IOCTL_CTRL_POLL_FD_CREATE \ + _IOR(NVHOST_IOCTL_MAGIC, 16, struct nvhost_ctrl_poll_fd_create_args) +#define NVHOST_IOCTL_CTRL_SYNC_FILE_EXTRACT \ + _IOWR(NVHOST_IOCTL_MAGIC, 19, struct nvhost_ctrl_sync_file_extract) + +// Jetson Orin (Tegra234) hardware syncpoint count +#define ORIN_NUM_SYNCPTS 704 + +// ── Module state ────────────────────────────────────────────────────────────── + +static struct { + struct class *class; + struct cdev cdev; + struct device *dev; + dev_t devt; +} nvhost_shim; + +// ── host1x device lookup ────────────────────────────────────────────────────── + +static const struct of_device_id host1x_of_match[] = { + { .compatible = "nvidia,tegra234-host1x" }, + { .compatible = "nvidia,tegra194-host1x" }, + { .compatible = "nvidia,tegra186-host1x" }, + {}, +}; + +static struct host1x *get_host1x(void) +{ + struct platform_device *pdev; + struct device_node *np; + void *drvdata; + + np = of_find_matching_node(NULL, host1x_of_match); + if (!np) { + pr_err_ratelimited("nvhost-ctrl-shim: no host1x OF node found\n"); + return ERR_PTR(-ENODEV); + } + + pdev = of_find_device_by_node(np); + of_node_put(np); + if (!pdev) { + pr_err_ratelimited("nvhost-ctrl-shim: no host1x platform_device\n"); + return ERR_PTR(-EAGAIN); + } + + drvdata = platform_get_drvdata(pdev); + if (!drvdata) { + pr_err_ratelimited("nvhost-ctrl-shim: host1x drvdata is NULL\n"); + return ERR_PTR(-EAGAIN); + } + + return drvdata; +} + +// ── File operations ─────────────────────────────────────────────────────────── + +static int nvhost_ctrl_open(struct inode *inode, struct file *file) +{ + struct host1x *host1x = get_host1x(); + + if (IS_ERR(host1x)) { + pr_err("nvhost-ctrl-shim: open failed, get_host1x=%ld\n", + PTR_ERR(host1x)); + return PTR_ERR(host1x); + } + + pr_debug("nvhost-ctrl-shim: opened (pid %d)\n", current->pid); + file->private_data = host1x; + return 0; +} + +// ── NVHOST_IOCTL_CTRL_SYNCPT_READ / SYNCPT_READ_MAX ────────────────────────── + +static int ioctl_syncpt_read(struct host1x *host1x, void __user *data, + bool read_max) +{ + struct nvhost_ctrl_syncpt_read_args args; + struct host1x_syncpt *sp; + + if (copy_from_user(&args, data, sizeof(args))) + return -EFAULT; + + sp = host1x_syncpt_get_by_id_noref(host1x, args.id); + if (!sp) { + pr_err_ratelimited("nvhost-ctrl-shim: SYNCPT_READ%s: id=%u not found\n", + read_max ? "_MAX" : "", args.id); + return -EINVAL; + } + + args.value = read_max ? host1x_syncpt_read_max(sp) + : host1x_syncpt_read(sp); + + return copy_to_user(data, &args, sizeof(args)) ? -EFAULT : 0; +} + +// ── NVHOST_IOCTL_CTRL_SYNCPT_WAITMEX ───────────────────────────────────────── +// Blocking wait until syncpt[id].value >= thresh, using interrupt-driven +// dma_fence_wait_timeout (host1x hardware interrupt, NOT CPU busy-poll). + +static int ioctl_syncpt_waitmex(struct host1x *host1x, void __user *data) +{ + struct nvhost_ctrl_syncpt_waitmex_args args; + struct host1x_syncpt *sp; + struct dma_fence *fence; + long timeout_jiffies; + long ret; + + if (copy_from_user(&args, data, sizeof(args))) + return -EFAULT; + + pr_debug("nvhost-ctrl-shim: SYNCPT_WAITMEX id=%u thresh=%u timeout=%d\n", + args.id, args.thresh, args.timeout); + + sp = host1x_syncpt_get_by_id_noref(host1x, args.id); + if (!sp) { + pr_err("nvhost-ctrl-shim: SYNCPT_WAITMEX id=%u not found\n", + args.id); + return -EINVAL; + } + + // timeout: -1 → wait forever; 0 → wait forever; >0 → milliseconds. + // + // GA10B (Jetson Orin NX) is slower than desktop GPUs. CUDA's built-in + // timeout for cudaStreamSynchronize may expire before large-model kernels + // (e.g. qwen2.5:7b warmup with 311 MiB compute buffer) complete on GA10B. + // Enforce a minimum wait of 30 s so slow-but-valid kernels are not aborted. + // This also logs the CUDA-requested timeout for diagnostics. + if (args.timeout < 0) { + // -1 = wait forever + timeout_jiffies = MAX_SCHEDULE_TIMEOUT; + } else if (args.timeout == 0) { + // 0 = also treat as "wait forever" (no timeout specified) + timeout_jiffies = MAX_SCHEDULE_TIMEOUT; + } else { + // Clamp to minimum 30 000 ms so GA10B large-model kernels are not + // prematurely killed by CUDA's default short timeout. + unsigned int timeout_ms = max_t(unsigned int, + (unsigned int)args.timeout, 30000U); + pr_debug("nvhost-ctrl-shim: SYNCPT_WAITMEX cuda_timeout=%dms → using %ums\n", + args.timeout, timeout_ms); + timeout_jiffies = msecs_to_jiffies(timeout_ms); + } + + // Create a fence that signals when syncpt reaches thresh + fence = host1x_fence_create(sp, args.thresh, true); + if (IS_ERR(fence)) { + pr_err("nvhost-ctrl-shim: SYNCPT_WAITMEX fence_create failed: %ld\n", + PTR_ERR(fence)); + return PTR_ERR(fence); + } + + // Sleep until hardware interrupt signals the fence + ret = dma_fence_wait_timeout(fence, true, timeout_jiffies); + dma_fence_put(fence); + + if (ret < 0) { + pr_err("nvhost-ctrl-shim: SYNCPT_WAITMEX wait error: %ld\n", ret); + return ret; + } + if (ret == 0) { + pr_warn("nvhost-ctrl-shim: SYNCPT_WAITMEX timeout id=%u thresh=%u (cuda_timeout=%dms)\n", + args.id, args.thresh, args.timeout); + return -ETIMEDOUT; + } + + args.value = host1x_syncpt_read(sp); + args.tv_sec = 0; + args.tv_nsec = 0; + + pr_debug("nvhost-ctrl-shim: SYNCPT_WAITMEX done id=%u value=%u\n", + args.id, args.value); + + return copy_to_user(data, &args, sizeof(args)) ? -EFAULT : 0; +} + +// ── NVHOST_IOCTL_CTRL_GET_CHARACTERISTICS ──────────────────────────────────── +// CUDA calls this on every open to discover available syncpoints. + +static int ioctl_get_characteristics(void __user *data) +{ + struct nvhost_ctrl_get_characteristics req; + struct nvhost_characteristics chars = { + .flags = NVHOST_CHARACTERISTICS_SUPPORT_PREFENCES, + .num_mlocks = 0, + .num_syncpts = ORIN_NUM_SYNCPTS, + .syncpts_base = 0, + .syncpts_limit = ORIN_NUM_SYNCPTS, + .num_hw_pts = ORIN_NUM_SYNCPTS, + .padding = 0, + }; + __u64 copy_size; + + if (copy_from_user(&req, data, sizeof(req))) + return -EFAULT; + + pr_debug("nvhost-ctrl-shim: GET_CHARACTERISTICS buf_size=%llu\n", + req.nvhost_characteristics_buf_size); + + if (!req.nvhost_characteristics_buf_addr) { + // Only querying the required size + req.nvhost_characteristics_buf_size = sizeof(chars); + return copy_to_user(data, &req, sizeof(req)) ? -EFAULT : 0; + } + + copy_size = min_t(__u64, req.nvhost_characteristics_buf_size, sizeof(chars)); + if (copy_to_user(u64_to_user_ptr(req.nvhost_characteristics_buf_addr), + &chars, copy_size)) + return -EFAULT; + + req.nvhost_characteristics_buf_size = sizeof(chars); + return copy_to_user(data, &req, sizeof(req)) ? -EFAULT : 0; +} + +// ── NVHOST_IOCTL_CTRL_POLL_FD_CREATE ───────────────────────────────────────── +// Creates an anonymous inode fd for syncpoint event polling. +// Called once by gk20a_scale_init (GPU frequency scaling); NOT in the CUDA +// inference hot-path. Returns a real pollable fd so callers can select()/epoll() +// without getting ENOTTY. The fd is a minimal anon inode — it does not deliver +// syncpoint threshold events, but it is a valid open file descriptor. + +static __poll_t nvhost_ctrl_poll_fd_poll(struct file *file, poll_table *wait) +{ + /* Never signals readiness — callers use SYNCPT_WAITMEX for real waits */ + return 0; +} + +static const struct file_operations nvhost_ctrl_poll_fops = { + .owner = THIS_MODULE, + .poll = nvhost_ctrl_poll_fd_poll, +}; + +static int ioctl_poll_fd_create(void __user *data) +{ + struct nvhost_ctrl_poll_fd_create_args args; + int fd; + + fd = anon_inode_getfd("nvhost-ctrl-poll", &nvhost_ctrl_poll_fops, + NULL, O_RDWR | O_CLOEXEC); + if (fd < 0) { + pr_err("nvhost-ctrl-shim: POLL_FD_CREATE: anon_inode_getfd failed: %d\n", + fd); + return fd; + } + + args.fd = fd; + args.padding = 0; + + if (copy_to_user(data, &args, sizeof(args))) { + close_fd(fd); + return -EFAULT; + } + + pr_debug("nvhost-ctrl-shim: POLL_FD_CREATE → fd=%d\n", fd); + return 0; +} + +// ── NVHOST_IOCTL_CTRL_SYNC_FENCE_CREATE ────────────────────────────────────── + +static int make_fence_fd(struct host1x_syncpt *sp, u32 thresh) +{ + struct sync_file *sfile; + struct dma_fence *f; + int fd; + + f = host1x_fence_create(sp, thresh, true); + if (IS_ERR(f)) { + pr_err_ratelimited("nvhost-ctrl-shim: host1x_fence_create thresh=%u err=%ld\n", + thresh, PTR_ERR(f)); + return PTR_ERR(f); + } + + fd = get_unused_fd_flags(O_CLOEXEC); + if (fd < 0) { + dma_fence_put(f); + return fd; + } + + sfile = sync_file_create(f); + dma_fence_put(f); + if (!sfile) { + put_unused_fd(fd); + return -ENOMEM; + } + + fd_install(fd, sfile->file); + return fd; +} + +static int make_array_fence_fd(struct host1x *host1x, + struct nvhost_ctrl_sync_fence_info __user *pts_user, + u32 num_pts) +{ + struct dma_fence **fences; + struct dma_fence_array *arr; + struct sync_file *sfile; + struct host1x_syncpt *sp; + struct nvhost_ctrl_sync_fence_info pt; + int fd, err = 0; + u32 i; + + fences = kcalloc(num_pts, sizeof(*fences), GFP_KERNEL); + if (!fences) + return -ENOMEM; + + for (i = 0; i < num_pts; i++) { + if (copy_from_user(&pt, pts_user + i, sizeof(pt))) { + err = -EFAULT; + goto free_fences; + } + sp = host1x_syncpt_get_by_id_noref(host1x, pt.id); + if (!sp) { + err = -EINVAL; + goto free_fences; + } + fences[i] = host1x_fence_create(sp, pt.thresh, true); + if (IS_ERR(fences[i])) { + err = PTR_ERR(fences[i]); + fences[i] = NULL; + goto free_fences; + } + } + + /* dma_fence_array_create takes ownership of fences[] on success */ + arr = dma_fence_array_create(num_pts, fences, + dma_fence_context_alloc(1), 1, false); + if (!arr) { + err = -ENOMEM; + goto free_fences; + } + + fd = get_unused_fd_flags(O_CLOEXEC); + if (fd < 0) { + err = fd; + dma_fence_put(&arr->base); + return err; + } + + sfile = sync_file_create(&arr->base); + dma_fence_put(&arr->base); + if (!sfile) { + put_unused_fd(fd); + return -ENOMEM; + } + + fd_install(fd, sfile->file); + return fd; + +free_fences: + for (i = 0; i < num_pts; i++) + if (fences[i]) + dma_fence_put(fences[i]); + kfree(fences); + return err; +} + +static int ioctl_sync_fence_create(struct host1x *host1x, void __user *data) +{ + struct nvhost_ctrl_sync_fence_info __user *pts_user; + struct nvhost_ctrl_sync_fence_create_args args; + struct nvhost_ctrl_sync_fence_info pt; + struct host1x_syncpt *sp; + int fd; + + if (copy_from_user(&args, data, sizeof(args))) + return -EFAULT; + + pr_debug("nvhost-ctrl-shim: SYNC_FENCE_CREATE num_pts=%u\n", args.num_pts); + + if (args.num_pts == 0 || args.num_pts > 512) + return -EINVAL; + + pts_user = u64_to_user_ptr(args.pts); + + if (args.num_pts == 1) { + if (copy_from_user(&pt, pts_user, sizeof(pt))) + return -EFAULT; + pr_debug("nvhost-ctrl-shim: SYNC_FENCE_CREATE id=%u thresh=%u\n", + pt.id, pt.thresh); + sp = host1x_syncpt_get_by_id_noref(host1x, pt.id); + if (!sp) { + pr_err("nvhost-ctrl-shim: SYNC_FENCE_CREATE id=%u not found\n", + pt.id); + return -EINVAL; + } + fd = make_fence_fd(sp, pt.thresh); + } else { + fd = make_array_fence_fd(host1x, pts_user, args.num_pts); + } + + if (fd < 0) { + pr_err("nvhost-ctrl-shim: SYNC_FENCE_CREATE failed: %d\n", fd); + return fd; + } + + pr_debug("nvhost-ctrl-shim: SYNC_FENCE_CREATE → fd=%d\n", fd); + args.fence_fd = fd; + if (copy_to_user(data, &args, sizeof(args))) { + close_fd(fd); + return -EFAULT; + } + return 0; +} + +// ── NVHOST_IOCTL_CTRL_SYNC_FILE_EXTRACT ────────────────────────────────────── + +static int ioctl_sync_file_extract(struct host1x *host1x, void __user *data) +{ + struct nvhost_ctrl_sync_fence_info __user *fences_user; + struct nvhost_ctrl_sync_file_extract args; + struct dma_fence *fence, **fences; + struct dma_fence_array *array; + unsigned int num_fences, i, j; + int err = 0; + + if (copy_from_user(&args, data, sizeof(args))) + return -EFAULT; + + fences_user = u64_to_user_ptr(args.fences_ptr); + + fence = sync_file_get_fence(args.fd); + if (!fence) + return -EINVAL; + + array = to_dma_fence_array(fence); + if (array) { + fences = array->fences; + num_fences = array->num_fences; + } else { + fences = &fence; + num_fences = 1; + } + + for (i = 0, j = 0; i < num_fences; i++) { + struct nvhost_ctrl_sync_fence_info fi; + + err = host1x_fence_extract(fences[i], &fi.id, &fi.thresh); + if (err == -EINVAL && dma_fence_is_signaled(fences[i])) { + /* signaled stub fence — skip */ + err = 0; + continue; + } + if (err) + goto put; + + if (j < args.num_fences) { + if (copy_to_user(fences_user + j, &fi, sizeof(fi))) { + err = -EFAULT; + goto put; + } + } + j++; + } + + args.num_fences = j; + if (copy_to_user(data, &args, sizeof(args))) + err = -EFAULT; + +put: + dma_fence_put(fence); + return err; +} + +// ── Ioctl dispatcher ────────────────────────────────────────────────────────── + +static long nvhost_ctrl_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct host1x *host1x = file->private_data; + void __user *data = (void __user *)arg; + + switch (cmd) { + case NVHOST_IOCTL_CTRL_GET_VERSION: { + struct nvhost_get_param_args v = { .value = 1 }; + pr_debug("nvhost-ctrl-shim: GET_VERSION → 1\n"); + return copy_to_user(data, &v, sizeof(v)) ? -EFAULT : 0; + } + case NVHOST_IOCTL_CTRL_SYNCPT_READ: + return ioctl_syncpt_read(host1x, data, false); + case NVHOST_IOCTL_CTRL_SYNCPT_READ_MAX: + return ioctl_syncpt_read(host1x, data, true); + case NVHOST_IOCTL_CTRL_SYNCPT_WAITMEX: + return ioctl_syncpt_waitmex(host1x, data); + case NVHOST_IOCTL_CTRL_SYNC_FENCE_CREATE: + return ioctl_sync_fence_create(host1x, data); + case NVHOST_IOCTL_CTRL_GET_CHARACTERISTICS: + return ioctl_get_characteristics(data); + case NVHOST_IOCTL_CTRL_POLL_FD_CREATE: + return ioctl_poll_fd_create(data); + case NVHOST_IOCTL_CTRL_SYNC_FILE_EXTRACT: + return ioctl_sync_file_extract(host1x, data); + default: + pr_warn_ratelimited("nvhost-ctrl-shim: unknown ioctl cmd=0x%08x\n", cmd); + return -ENOTTY; + } +} + +// ── Device class / cdev setup ───────────────────────────────────────────────── + +static char *nvhost_ctrl_devnode(const struct device *dev, umode_t *mode) +{ + *mode = 0666; + return NULL; +} + +static const struct file_operations nvhost_ctrl_fops = { + .owner = THIS_MODULE, + .open = nvhost_ctrl_open, + .unlocked_ioctl = nvhost_ctrl_ioctl, + .compat_ioctl = nvhost_ctrl_ioctl, +}; + +// ── Module init / exit ──────────────────────────────────────────────────────── + +static int __init nvhost_ctrl_shim_init(void) +{ + dev_t devt; + int err; + + err = alloc_chrdev_region(&devt, 0, 1, "nvhost-ctrl"); + if (err) + return err; + + nvhost_shim.class = class_create("nvhost-ctrl"); + if (IS_ERR(nvhost_shim.class)) { + err = PTR_ERR(nvhost_shim.class); + goto unregister; + } + nvhost_shim.class->devnode = nvhost_ctrl_devnode; + + cdev_init(&nvhost_shim.cdev, &nvhost_ctrl_fops); + err = cdev_add(&nvhost_shim.cdev, devt, 1); + if (err) + goto destroy_class; + + nvhost_shim.dev = device_create(nvhost_shim.class, NULL, + devt, NULL, "nvhost-ctrl"); + if (IS_ERR(nvhost_shim.dev)) { + err = PTR_ERR(nvhost_shim.dev); + goto del_cdev; + } + + nvhost_shim.devt = devt; + pr_info("nvhost-ctrl-shim: /dev/nvhost-ctrl ready (major %d)\n", + MAJOR(devt)); + return 0; + +del_cdev: + cdev_del(&nvhost_shim.cdev); +destroy_class: + class_destroy(nvhost_shim.class); +unregister: + unregister_chrdev_region(devt, 1); + return err; +} + +static void __exit nvhost_ctrl_shim_exit(void) +{ + device_destroy(nvhost_shim.class, nvhost_shim.devt); + cdev_del(&nvhost_shim.cdev); + class_destroy(nvhost_shim.class); + unregister_chrdev_region(nvhost_shim.devt, 1); +} + +module_init(nvhost_ctrl_shim_init); +module_exit(nvhost_ctrl_shim_exit); + +MODULE_DESCRIPTION("nvhost-ctrl shim — NVHOST ioctl API over OOT host1x for Talos Jetson"); +MODULE_LICENSE("GPL"); diff --git a/nvidia-tegra-nvgpu/pkg.yaml b/nvidia-tegra-nvgpu/pkg.yaml new file mode 100644 index 000000000..cfea8a3e1 --- /dev/null +++ b/nvidia-tegra-nvgpu/pkg.yaml @@ -0,0 +1,390 @@ +name: nvidia-tegra-nvgpu +variant: scratch +shell: /bin/bash +dependencies: + - stage: base + - stage: kernel-build + - image: "{{ .LLVM_IMAGE }}:{{ .TOOLS_REV }}" +steps: + - sources: + # OE4T patched nvgpu - supports kernel 6.x (fixes platform_driver.remove, hrtimer, struct fd) + - url: https://github.com/OE4T/linux-nvgpu/archive/{{ .oe4t_nvgpu_commit }}.tar.gz + destination: nvgpu.tar.gz + sha256: "{{ .oe4t_nvgpu_sha256 }}" + sha512: "{{ .oe4t_nvgpu_sha512 }}" + # OE4T patched nvidia-oot - wip-r36.5-take-2 branch (kernel 6.18 compat: __assign_str, + # f_count->f_ref, __alloc_pages_bulk 5-arg, and all earlier 6.x fixes) + - url: https://github.com/OE4T/linux-nv-oot/archive/{{ .oe4t_nv_oot_commit }}.tar.gz + destination: nvidia-oot.tar.gz + sha256: "{{ .oe4t_nv_oot_sha256 }}" + sha512: "{{ .oe4t_nv_oot_sha512 }}" + # OE4T patched hwpm - supports kernel 6.x (fixes platform_driver.remove, MODULE_IMPORT_NS) + - url: https://github.com/OE4T/linux-hwpm/archive/{{ .oe4t_hwpm_commit }}.tar.gz + destination: hwpm.tar.gz + sha256: "{{ .oe4t_hwpm_sha256 }}" + sha512: "{{ .oe4t_hwpm_sha512 }}" + # nvhost-ctrl-shim source is embedded in this package (see nvhost_ctrl_shim.c) + # and made available at /pkg/nvhost_ctrl_shim.c by bldr at build time. + env: + ARCH: arm64 + LLVM: "1" + LLVM_IAS: "1" + prepare: + - | + echo "Extracting OE4T patched OOT module sources..." + mkdir -p /oot-src/nvgpu /oot-src/nvidia-oot /oot-src/hwpm + tar xzf nvgpu.tar.gz -C /oot-src/nvgpu --strip-components=1 + tar xzf nvidia-oot.tar.gz -C /oot-src/nvidia-oot --strip-components=1 + tar xzf hwpm.tar.gz -C /oot-src/hwpm --strip-components=1 + echo "Sources extracted:" + ls /oot-src/ + # nvhost-ctrl-shim C source (embedded in this package, available at /pkg/) + mkdir -p /oot-src/nvhost-ctrl-shim + cp /pkg/nvhost_ctrl_shim.c /oot-src/nvhost-ctrl-shim/nvhost_ctrl_shim.c + echo "nvhost-ctrl-shim source ready ($(wc -l < /oot-src/nvhost-ctrl-shim/nvhost_ctrl_shim.c) lines)" + build: + - | + echo "Building NVIDIA conftest (kernel compat detection)..." + mkdir -p /oot-src/out/nvidia-conftest/nvidia /oot-src/out/nvidia-linux-header + + # conftest scripts are in nvidia-oot + cp -av /oot-src/nvidia-oot/scripts/conftest/* /oot-src/out/nvidia-conftest/nvidia/ + + make -j $(nproc) ARCH=arm64 \ + src=/oot-src/out/nvidia-conftest/nvidia \ + obj=/oot-src/out/nvidia-conftest/nvidia \ + LLVM=1 \ + NV_KERNEL_SOURCES=/src \ + NV_KERNEL_OUTPUT=/src \ + -f /oot-src/out/nvidia-conftest/nvidia/Makefile + echo "conftest done." + - | + # Patch OOT module Makefiles: remove -Werror and add required include paths. + # srctree.nvconftest and srctree.nvidia-oot are passed as make vars at build time. + NVIDIA_OOT=/oot-src/nvidia-oot + CONFTEST_OUT=/oot-src/out/nvidia-conftest + + # ── GCC plugin latent_entropy fix ──────────────────────────────────────────── + # CONFIG_GCC_PLUGIN_LATENT_ENTROPY injects a global 'latent_entropy' variable + # via a GCC plugin. Clang doesn't run the plugin → 'latent_entropy' is undeclared + # → linux/random.h:24 compile error for ALL OOT modules. + # Fix: strip the macro from auto.conf and autoconf.h before any OOT build. + sed -i '/CONFIG_GCC_PLUGIN_LATENT_ENTROPY/d' /src/include/config/auto.conf 2>/dev/null || true + sed -i '/CONFIG_GCC_PLUGIN_LATENT_ENTROPY/d' /src/include/generated/autoconf.h 2>/dev/null || true + echo "Removed CONFIG_GCC_PLUGIN_LATENT_ENTROPY (Clang compat fix)" + + # OOT host1x: add conftest + nvidia-oot includes (exports host1x_fence_extract) + printf 'ccflags-y += -I$(srctree.nvconftest)\n' \ + >> ${NVIDIA_OOT}/drivers/gpu/host1x/Makefile + printf 'ccflags-y += -I$(srctree.nvidia-oot)/include\n' \ + >> ${NVIDIA_OOT}/drivers/gpu/host1x/Makefile + printf 'ccflags-y += -I$(srctree.nvidia-oot)/drivers/gpu/host1x/include\n' \ + >> ${NVIDIA_OOT}/drivers/gpu/host1x/Makefile + + # Force conftest macros for OOT host1x on kernel 6.18 + grep -rl "NV_IOMMU_MAP_HAS_GFP_ARG" ${NVIDIA_OOT}/drivers/gpu/host1x/ \ + | xargs -r sed -i "s|#if defined(NV_IOMMU_MAP_HAS_GFP_ARG)|#if 1 /* force: kernel 6.3+ */|g" + grep -rl "NV_IOMMU_PAGING_DOMAIN_ALLOC_PRESENT" ${NVIDIA_OOT}/drivers/gpu/host1x/ \ + | xargs -r sed -i "s|#if defined(NV_IOMMU_PAGING_DOMAIN_ALLOC_PRESENT)|#if 1 /* force: kernel 6.11+ */|g" + grep -rl "NV_DEVM_TEGRA_CORE_DEV_INIT_OPP_TABLE_COMMON_PRESENT" ${NVIDIA_OOT}/drivers/gpu/host1x/ \ + | xargs -r sed -i "s|#if defined(NV_DEVM_TEGRA_CORE_DEV_INIT_OPP_TABLE_COMMON_PRESENT)|#if 1 /* force: present */|g" + grep -rl "NV_PLATFORM_DRIVER_STRUCT_REMOVE_RETURNS_VOID" ${NVIDIA_OOT}/drivers/gpu/host1x/ \ + | xargs -r sed -i "s|#if defined(NV_PLATFORM_DRIVER_STRUCT_REMOVE_RETURNS_VOID)|#if 1 /* force: kernel 6.11+ */|g" + grep -rl "NV_BUS_TYPE_STRUCT_MATCH_HAS_CONST_DRV_ARG" ${NVIDIA_OOT}/drivers/gpu/host1x/ \ + | xargs -r sed -i "s|#if defined(NV_BUS_TYPE_STRUCT_MATCH_HAS_CONST_DRV_ARG)|#if 1 /* force: kernel 6.x+ */|g" + grep -rl "NV_BUS_TYPE_STRUCT_UEVENT_HAS_CONST_DEV_ARG" ${NVIDIA_OOT}/drivers/gpu/host1x/ \ + | xargs -r sed -i "s|#if defined(NV_BUS_TYPE_STRUCT_UEVENT_HAS_CONST_DEV_ARG)|#if 1 /* force: kernel 6.x+ */|g" + echo "Patched OOT host1x: forced conftest macro code paths for kernel 6.18" + + # host1x syncpt.c: permanently reserve syncpt id=0 so host1x_syncpt_alloc never returns it. + # GA10b has NVGPU_ERRATA_SYNCPT_INVALID_ID_0: nvgpu rejects id=0 → channel init fails → error 999. + # OOT host1x (ccf7646c) marks syncpt[0] with name="reserved" but does NOT set kref=1, so the + # alloc loop (which skips syncpts where kref_read(&sp->ref) != 0) still returns id=0. + # Fix: add kref_init(&syncpt[0].ref) before the name assignment, matching what newer OE4T + # commits already do (e.g. 6e071c0). Guard is idempotent — safe even if already present. + SYNCPT_C=${NVIDIA_OOT}/drivers/gpu/host1x/syncpt.c + if grep -q 'syncpt\[0\]\.name = kstrdup' "${SYNCPT_C}" 2>/dev/null; then + if ! grep -q 'kref_init.*syncpt\[0\]' "${SYNCPT_C}" 2>/dev/null; then + sed -i 's/\(syncpt\[0\]\.name = kstrdup("reserved", GFP_KERNEL);\)/kref_init(\&syncpt[0].ref);\n\t\t\1/' "${SYNCPT_C}" + echo "Patched host1x syncpt.c: added kref_init(&syncpt[0].ref) — id=0 permanently reserved" + else + echo "host1x syncpt.c: kref_init(&syncpt[0].ref) already present — no patch needed" + fi + else + echo "WARNING: host1x syncpt.c pattern not found — syncpt id=0 reservation patch skipped" + fi + + # host1x-fence: remove -Werror, add conftest + nvidia-oot includes + sed -i 's|ccflags-y += -Werror||g' \ + ${NVIDIA_OOT}/drivers/gpu/host1x-fence/Makefile + printf 'ccflags-y += -I$(srctree.nvconftest)\n' \ + >> ${NVIDIA_OOT}/drivers/gpu/host1x-fence/Makefile + printf 'ccflags-y += -I$(srctree.nvidia-oot)/include\n' \ + >> ${NVIDIA_OOT}/drivers/gpu/host1x-fence/Makefile + printf 'ccflags-y += -I$(srctree.nvidia-oot)/drivers/gpu/host1x/include\n' \ + >> ${NVIDIA_OOT}/drivers/gpu/host1x-fence/Makefile + grep -rl "class_create(THIS_MODULE," ${NVIDIA_OOT}/drivers/gpu/host1x-fence/ \ + | xargs -r sed -i 's/class_create(THIS_MODULE, /class_create(/g' + grep -rl "host1x_fence_devnode" ${NVIDIA_OOT}/drivers/gpu/host1x-fence/ \ + | xargs -r sed -i 's/static char \*host1x_fence_devnode(struct device \*/static char *host1x_fence_devnode(const struct device */g' + echo "Patched host1x-fence: class_create + devnode const fixes for kernel 6.x" + + # nvmap: remove subdir -Werror, add conftest + nvidia-oot includes + sed -i 's|subdir-ccflags-y += -Werror||g' \ + ${NVIDIA_OOT}/drivers/video/tegra/nvmap/Makefile + printf 'ccflags-y += -I$(srctree.nvconftest)\n' \ + >> ${NVIDIA_OOT}/drivers/video/tegra/nvmap/Makefile + printf 'ccflags-y += -I$(srctree.nvidia-oot)/include\n' \ + >> ${NVIDIA_OOT}/drivers/video/tegra/nvmap/Makefile + printf 'ccflags-y += -I$(srctree.nvidia-oot)/drivers/video/tegra/nvmap/include\n' \ + >> ${NVIDIA_OOT}/drivers/video/tegra/nvmap/Makefile + printf 'ccflags-y += -DNV_GET_USER_PAGES_HAS_ARGS_FLAGS\n' \ + >> ${NVIDIA_OOT}/drivers/video/tegra/nvmap/Makefile + printf 'ccflags-y += -DNV_MM_STRUCT_STRUCT_HAS_PERCPU_COUNTER_RSS_STAT\n' \ + >> ${NVIDIA_OOT}/drivers/video/tegra/nvmap/Makefile + printf 'ccflags-y += -DNV_IOREMAP_PROT_HAS_PGPROT_T_ARG\n' \ + >> ${NVIDIA_OOT}/drivers/video/tegra/nvmap/Makefile + + # mc-utils: add nvidia-oot includes + printf 'ccflags-y += -I$(srctree.nvidia-oot)/include\n' \ + >> ${NVIDIA_OOT}/drivers/platform/tegra/mc-utils/Makefile + + # governor_pod_scaling: add conftest + nvidia-oot includes + printf 'ccflags-y += -I$(srctree.nvconftest)\n' \ + >> ${NVIDIA_OOT}/drivers/devfreq/Makefile + printf 'ccflags-y += -I$(srctree.nvidia-oot)/include\n' \ + >> ${NVIDIA_OOT}/drivers/devfreq/Makefile + + echo "Include paths patched into OOT module Makefiles." + + # Force conftest macro paths in nvmap source for kernel 6.18 + grep -rl "NV_GET_USER_PAGES_HAS_ARGS_FLAGS" ${NVIDIA_OOT}/drivers/video/tegra/nvmap/ \ + | xargs -r sed -i "s|#if defined(NV_GET_USER_PAGES_HAS_ARGS_FLAGS)|#if 1 /* force: kernel 6.5+ */|g" + grep -rl "NV_MM_STRUCT_STRUCT_HAS_PERCPU_COUNTER_RSS_STAT" ${NVIDIA_OOT}/drivers/video/tegra/nvmap/ \ + | xargs -r sed -i "s|#if defined(NV_MM_STRUCT_STRUCT_HAS_PERCPU_COUNTER_RSS_STAT)|#if 1 /* force: kernel 6.2+ */|g" + grep -rl "NV_IOREMAP_PROT_HAS_PGPROT_T_ARG" ${NVIDIA_OOT}/drivers/video/tegra/nvmap/ \ + | xargs -r sed -i "s|#if defined(NV_IOREMAP_PROT_HAS_PGPROT_T_ARG)|#if 1 /* force: kernel 6.15+ */|g" + grep -rl "NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS" ${NVIDIA_OOT}/drivers/video/tegra/nvmap/ \ + | xargs -r sed -i "s|#if defined(NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS)|#if 1 /* force: kernel 6.3+ */|g" + grep -rl "NV___ASSIGN_STR_HAS_NO_SRC_ARG" \ + ${NVIDIA_OOT}/include/trace/events/ \ + ${NVIDIA_OOT}/drivers/video/tegra/nvmap/ 2>/dev/null \ + | xargs -r sed -i "s|#if defined(NV___ASSIGN_STR_HAS_NO_SRC_ARG)|#if 1 /* force: kernel 6.10+ */|g" + grep -rl "NV__ALLOC_PAGES_BULK_HAS_NO_PAGE_LIST_ARG" \ + ${NVIDIA_OOT}/drivers/video/tegra/nvmap/ \ + | xargs -r sed -i "s|#if defined(NV__ALLOC_PAGES_BULK_HAS_NO_PAGE_LIST_ARG)|#if 1 /* force: kernel 6.14+ */|g" + grep -rl "NV_FILE_STRUCT_HAS_F_REF" \ + ${NVIDIA_OOT}/drivers/video/tegra/nvmap/ \ + | xargs -r sed -i "s|#if defined(NV_FILE_STRUCT_HAS_F_REF)|#if 1 /* force: kernel 6.13+ */|g" + grep -rl "NV_GET_FILE_RCU_HAS_DOUBLE_PTR_FILE_ARG" \ + ${NVIDIA_OOT}/drivers/video/tegra/nvmap/ \ + | xargs -r sed -i "s|#if defined(NV_GET_FILE_RCU_HAS_DOUBLE_PTR_FILE_ARG)|#if 1 /* force: kernel 6.7+ */|g" + grep -rl "NV_PLATFORM_DRIVER_STRUCT_REMOVE_RETURNS_VOID" \ + ${NVIDIA_OOT}/drivers/video/tegra/nvmap/ \ + | xargs -r sed -i "s|#if defined(NV_PLATFORM_DRIVER_STRUCT_REMOVE_RETURNS_VOID)|#if 1 /* force: kernel 6.11+ */|g" + echo "Patched nvmap: forced conftest macro code paths for kernel 6.18" + - | + # ── Cross-compiler mismatch fix: Clang wrapper ──────────────────────────── + # Strips GCC-only flags before passing to clang (source-independent fix). + mkdir -p /usr/local/bin + printf '#!/bin/bash\nfiltered=()\nfor arg in "$@"; do\n case "$arg" in\n -fmin-function-alignment=*|-fconserve-stack) ;;\n -fsanitize=bounds-strict) filtered+=("-fsanitize=bounds") ;;\n -Wimplicit-fallthrough=*) filtered+=("-Wimplicit-fallthrough") ;;\n -Wno-maybe-uninitialized) filtered+=("-Wno-uninitialized") ;;\n -Wno-alloc-size-larger-than|-Wno-alloc-size-larger-than=*) ;;\n -fplugin=*|-fplugin-arg-*) ;;\n -pg|-mrecord-mcount|-mfentry|-fpatchable-function-entry=*) ;;\n *) filtered+=("$arg") ;;\n esac\ndone\nexec clang "${filtered[@]}" -Wno-unknown-warning-option -Wno-enum-enum-conversion -Wno-implicit-fallthrough -Wno-gnu-variable-sized-type-not-at-end\n' > /usr/local/bin/clang-oot + chmod +x /usr/local/bin/clang-oot + echo "clang-oot wrapper installed" + + NVIDIA_OOT=/oot-src/nvidia-oot + NVIDIA_CONFTEST=/oot-src/out/nvidia-conftest + touch /oot-src/out/nvidia-linux-header/Module.symvers.nvidia + + # ── OOT host1x: HOST1X_SYNCPT_GPU support + syncpt[0] id=0 fix ────────────── + echo "=== Building OOT host1x ===" + make -j $(nproc) ARCH=arm64 \ + -C /src \ + M=${NVIDIA_OOT}/drivers/gpu/host1x \ + CC=/usr/local/bin/clang-oot \ + CONFIG_TEGRA_OOT_MODULE=m \ + srctree.nvidia-oot=${NVIDIA_OOT} \ + srctree.nvconftest=${NVIDIA_CONFTEST} \ + LLVM=1 \ + modules 2>&1 | tee /tmp/build-host1x.log; [[ ${PIPESTATUS[0]} -eq 0 ]] && echo "host1x: OK" || { echo "host1x: BUILD FAILED (see errors above)"; exit 1; } + cat ${NVIDIA_OOT}/drivers/gpu/host1x/Module.symvers \ + >> /oot-src/out/nvidia-linux-header/Module.symvers.nvidia 2>/dev/null || true + + # ── mc-utils ────────────────────────────────────────────────────────────────── + echo "=== Building mc-utils ===" + make -j $(nproc) ARCH=arm64 \ + -C /src \ + M=${NVIDIA_OOT}/drivers/platform/tegra/mc-utils \ + CC=/usr/local/bin/clang-oot \ + CONFIG_TEGRA_OOT_MODULE=m \ + srctree.nvidia-oot=${NVIDIA_OOT} \ + srctree.nvconftest=${NVIDIA_CONFTEST} \ + LLVM=1 \ + modules 2>&1 | tee /tmp/build-mc-utils.log; [[ ${PIPESTATUS[0]} -eq 0 ]] && echo "mc-utils: OK" || { echo "mc-utils: BUILD FAILED (see errors above)"; exit 1; } + cat ${NVIDIA_OOT}/drivers/platform/tegra/mc-utils/Module.symvers \ + >> /oot-src/out/nvidia-linux-header/Module.symvers.nvidia 2>/dev/null || true + + # ── host1x-fence ───────────────────────────────────────────────────────────── + echo "=== Building OOT host1x-fence ===" + make -j $(nproc) ARCH=arm64 \ + -C /src \ + M=${NVIDIA_OOT}/drivers/gpu/host1x-fence \ + CC=/usr/local/bin/clang-oot \ + CONFIG_TEGRA_OOT_MODULE=m \ + srctree.nvidia-oot=${NVIDIA_OOT} \ + srctree.nvconftest=${NVIDIA_CONFTEST} \ + LLVM=1 \ + KBUILD_EXTRA_SYMBOLS=/oot-src/out/nvidia-linux-header/Module.symvers.nvidia \ + modules 2>&1 | tee /tmp/build-host1x-fence.log; [[ ${PIPESTATUS[0]} -eq 0 ]] && echo "host1x-fence: OK" || { echo "host1x-fence: BUILD FAILED (see errors above)"; exit 1; } + cat ${NVIDIA_OOT}/drivers/gpu/host1x-fence/Module.symvers \ + >> /oot-src/out/nvidia-linux-header/Module.symvers.nvidia 2>/dev/null || true + + # ── nvhost-ctrl-shim: /dev/nvhost-ctrl for JetPack 6 CUDA runtime ──────────── + # Bridges NVHOST_IOCTL_CTRL_SYNC_FENCE_CREATE / SYNC_FILE_EXTRACT to the OOT host1x + # syncpoint API so libnvrm_host1x.so uses hardware syncpoint interrupts for + # cudaStreamSynchronize instead of CPU semaphore polling. Symbol deps: host1x.ko only. + echo "=== Building nvhost-ctrl-shim ===" + SHIM_DIR=/oot-src/nvhost-ctrl-shim + printf 'obj-m += nvhost_ctrl_shim.o\nccflags-y += -I$(src)/../nvidia-oot/drivers/gpu/host1x/include\n' \ + > "${SHIM_DIR}/Makefile" + echo "nvhost-ctrl-shim: source ready ($(wc -l < "${SHIM_DIR}/nvhost_ctrl_shim.c") lines)" + make -j$(nproc) ARCH=arm64 \ + -C /src \ + M="${SHIM_DIR}" \ + CC=/usr/local/bin/clang-oot \ + LLVM=1 \ + KBUILD_EXTRA_SYMBOLS=/oot-src/out/nvidia-linux-header/Module.symvers.nvidia \ + modules 2>&1 | tee /tmp/build-nvhost-ctrl-shim.log + [[ ${PIPESTATUS[0]} -eq 0 ]] && echo "nvhost-ctrl-shim: OK" \ + || { echo "=== nvhost-ctrl-shim BUILD FAILED ==="; cat /tmp/build-nvhost-ctrl-shim.log; exit 1; } + cat "${SHIM_DIR}/Module.symvers" \ + >> /oot-src/out/nvidia-linux-header/Module.symvers.nvidia 2>/dev/null || true + + # ── nvmap ───────────────────────────────────────────────────────────────────── + echo "=== Building nvmap ===" + make -j $(nproc) ARCH=arm64 \ + -C /src \ + M=${NVIDIA_OOT}/drivers/video/tegra/nvmap \ + CC=/usr/local/bin/clang-oot \ + CONFIG_TEGRA_OOT_MODULE=m \ + srctree.nvidia-oot=${NVIDIA_OOT} \ + srctree.nvconftest=${NVIDIA_CONFTEST} \ + LLVM=1 \ + NVMAP_CONFIG=y \ + NVMAP_CONFIG_LOADABLE_MODULE=y \ + NVMAP_CONFIG_PAGE_POOLS=y \ + NVMAP_CONFIG_HANDLE_AS_ID=n \ + NVMAP_CONFIG_SCIIPC=n \ + modules 2>&1 | tee /tmp/build-nvmap.log; [[ ${PIPESTATUS[0]} -eq 0 ]] && echo "nvmap: OK" || { echo "nvmap: BUILD FAILED (see errors above)"; exit 1; } + cat ${NVIDIA_OOT}/drivers/video/tegra/nvmap/Module.symvers \ + >> /oot-src/out/nvidia-linux-header/Module.symvers.nvidia 2>/dev/null || true + + # ── governor_pod_scaling ────────────────────────────────────────────────────── + echo "=== Building governor-pod-scaling ===" + make -j $(nproc) ARCH=arm64 \ + -C /src \ + M=${NVIDIA_OOT}/drivers/devfreq \ + CC=/usr/local/bin/clang-oot \ + CONFIG_TEGRA_OOT_MODULE=m \ + srctree.nvidia-oot=${NVIDIA_OOT} \ + srctree.nvconftest=${NVIDIA_CONFTEST} \ + LLVM=1 \ + modules 2>&1 && echo "governor-pod-scaling: OK" || echo "governor-pod-scaling: FAILED (continuing)" + cat ${NVIDIA_OOT}/drivers/devfreq/Module.symvers \ + >> /oot-src/out/nvidia-linux-header/Module.symvers.nvidia 2>/dev/null || true + + echo "=== nvidia-oot dependency modules done ===" + echo "Symbols exported so far:" + cat /oot-src/out/nvidia-linux-header/Module.symvers.nvidia | awk '{print $2}' | sort + - | + echo "Generating nvidia-linux-headers..." + cp -av /oot-src/nvidia-oot/include /oot-src/out/nvidia-linux-header/ + cat /src/Module.symvers \ + /oot-src/out/nvidia-linux-header/Module.symvers.nvidia 2>/dev/null \ + > /oot-src/out/nvidia-linux-header/Module.symvers + - | + echo "Building nvgpu module (OE4T patches-r36.5)..." + mkdir -p /usr/local/bin + printf '#!/bin/bash\nfiltered=()\nfor arg in "$@"; do\n case "$arg" in\n -fmin-function-alignment=*|-fconserve-stack) ;;\n -fsanitize=bounds-strict) filtered+=("-fsanitize=bounds") ;;\n -Wimplicit-fallthrough=*) filtered+=("-Wimplicit-fallthrough") ;;\n -Wno-maybe-uninitialized) filtered+=("-Wno-uninitialized") ;;\n -Wno-alloc-size-larger-than|-Wno-alloc-size-larger-than=*) ;;\n -fplugin=*|-fplugin-arg-*) ;;\n -pg|-mrecord-mcount|-mfentry|-fpatchable-function-entry=*) ;;\n *) filtered+=("$arg") ;;\n esac\ndone\nexec clang "${filtered[@]}" -Wno-unknown-warning-option -Wno-enum-enum-conversion -Wno-implicit-fallthrough -Wno-gnu-variable-sized-type-not-at-end\n' > /usr/local/bin/clang-oot + chmod +x /usr/local/bin/clang-oot + grep -rl "NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS" /oot-src/nvgpu/drivers/gpu/nvgpu/ \ + | xargs -r sed -i "s|#if defined(NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS)|#if 1 /* force: kernel 6.3+ */|g" + sed -i '1s|^|ccflags-y += -Wno-implicit-fallthrough -Wno-parentheses-equality -Wno-incompatible-function-pointer-types -Wno-sometimes-uninitialized\n|' \ + /oot-src/nvgpu/drivers/gpu/nvgpu/Makefile + grep -rl "class_create(THIS_MODULE," /oot-src/nvgpu/drivers/gpu/nvgpu/ \ + | xargs -r sed -i 's/class_create(THIS_MODULE, /class_create(/g' + # nvhost_host1x.c: retry loop for GA10B syncpt alloc during GR init window + sed -i '1s|^|#include \n|' /oot-src/nvgpu/drivers/gpu/nvgpu/os/linux/nvhost_host1x.c + python3 -c "import base64,sys;fname='/oot-src/nvgpu/drivers/gpu/nvgpu/os/linux/nvhost_host1x.c';c=open(fname).read();OLD=base64.b64decode(b'dTMyIG52Z3B1X252aG9zdF9nZXRfc3luY3B0X2NsaWVudF9tYW5hZ2VkKHN0cnVjdCBudmdwdV9udmhvc3RfZGV2ICpudmhvc3RfZGV2LAoJCQkJCSAgIGNvbnN0IGNoYXIgKnN5bmNwdF9uYW1lKQp7CglzdHJ1Y3QgaG9zdDF4X3N5bmNwdCAqc3A7CglzdHJ1Y3QgaG9zdDF4ICpob3N0MXg7CgoJaG9zdDF4ID0gcGxhdGZvcm1fZ2V0X2RydmRhdGEobnZob3N0X2Rldi0+aG9zdDF4X3BkZXYpOwoJaWYgKCFob3N0MXgpCgkJcmV0dXJuIDA7CgoJc3AgPSBob3N0MXhfc3luY3B0X2FsbG9jKGhvc3QxeCwgSE9TVDFYX1NZTkNQVF9DTElFTlRfTUFOQUdFRCB8IEhPU1QxWF9TWU5DUFRfR1BVLAoJCQkJIHN5bmNwdF9uYW1lKTsKCWlmICghc3ApCgkJcmV0dXJuIDA7CgoJcmV0dXJuIGhvc3QxeF9zeW5jcHRfaWQoc3ApOwp9').decode();NEW=base64.b64decode(b'dTMyIG52Z3B1X252aG9zdF9nZXRfc3luY3B0X2NsaWVudF9tYW5hZ2VkKHN0cnVjdCBudmdwdV9udmhvc3RfZGV2ICpudmhvc3RfZGV2LAoJCQkJICAgY29uc3QgY2hhciAqc3luY3B0X25hbWUpCnsKCXN0cnVjdCBob3N0MXhfc3luY3B0ICpzcCA9IE5VTEw7CglzdHJ1Y3QgaG9zdDF4ICpob3N0MXg7CglpbnQgcmV0cnk7CgoJLyogbnZncHUgNS4xMC4yOiByZXRyeSB1cCB0byA1bXMgd2hlbiBob3N0MXggc3luY3B0IGFsbG9jIGZhaWxzIGR1cmluZyBHUiBpbml0IHdpbmRvdy4KCSAqIEdBMTBCIEVSUkFUQV9TWU5DUFRfSU5WQUxJRF9JRF8wIHJlamVjdHMgaWQ9MDsgZHVyaW5nIH4xLTJtcyBhZnRlciBmaXJzdCBrZXJuZWwgc3VibWl0CgkgKiBob3N0MXhfc3luY3B0X2FsbG9jIG1heSByZXR1cm4gTlVMTCwgY2F1c2luZyBDVURBIGVycm9yIDk5OSBvbiBjdWRhU3RyZWFtU3luY2hyb25pemUuICovCglmb3IgKHJldHJ5ID0gMDsgcmV0cnkgPCA1OyByZXRyeSsrKSB7CgkJaG9zdDF4ID0gcGxhdGZvcm1fZ2V0X2RydmRhdGEobnZob3N0X2Rldi0+aG9zdDF4X3BkZXYpOwoJCWlmICghaG9zdDF4KSB7CgkJCXByX3dhcm5fcmF0ZWxpbWl0ZWQoIm52Z3B1OiBob3N0MXggbm90IHJlYWR5LCBzeW5jcHQgcmV0cnkgJWQvNVxuIiwgcmV0cnkgKyAxKTsKCQkJbXNsZWVwKDEpOwoJCQljb250aW51ZTsKCQl9CgkJc3AgPSBob3N0MXhfc3luY3B0X2FsbG9jKGhvc3QxeCwKCQkJCUhPU1QxWF9TWU5DUFRfQ0xJRU5UX01BTkFHRUQgfCBIT1NUMVhfU1lOQ1BUX0dQVSwKCQkJCXN5bmNwdF9uYW1lKTsKCQlpZiAoc3ApCgkJCWJyZWFrOwoJCXByX3dhcm5fcmF0ZWxpbWl0ZWQoIm52Z3B1OiBzeW5jcHRfYWxsb2MgTlVMTCwgcmV0cnkgJWQvNVxuIiwgcmV0cnkgKyAxKTsKCQltc2xlZXAoMSk7Cgl9CglpZiAoIXNwKSB7CgkJcHJfZXJyX3JhdGVsaW1pdGVkKCJudmdwdTogZ2V0X3N5bmNwdF9jbGllbnRfbWFuYWdlZDogZmFpbGVkIGFmdGVyIHJldHJpZXNcbiIpOwoJCXJldHVybiAwOwoJfQoJcmV0dXJuIGhvc3QxeF9zeW5jcHRfaWQoc3ApOwp9').decode();assert OLD in c,'nvhost_host1x.c: pattern not found';c=c.replace(OLD,NEW,1);open(fname,'w').write(c);print('Patched nvhost_host1x.c: retry loop in nvgpu_nvhost_get_syncpt_client_managed')" + NVHOST_H1X=/oot-src/nvgpu/drivers/gpu/nvgpu/os/linux/nvhost_host1x.c + awk '/return host1x_syncpt_id\(sp\);/ { print "\t/* GA10b ERRATA_SYNCPT_INVALID_ID_0: skip syncpt id=0."; print "\t * host1x_syncpt_alloc may return id=0 when no GPU pool in DT."; print "\t * nvgpu rejects id=0 causing CUDA error 999. Hold it and re-alloc. */"; print "\tif (host1x_syncpt_id(sp) == 0U) {"; print "\t\tstruct host1x_syncpt *sp_skip = sp;"; print "\t\tsp = host1x_syncpt_alloc(host1x,"; print "\t\t\t\tHOST1X_SYNCPT_CLIENT_MANAGED | HOST1X_SYNCPT_GPU,"; print "\t\t\t\tsyncpt_name);"; print "\t\thost1x_syncpt_put(sp_skip);"; print "\t\tif (!sp)"; print "\t\t\treturn 0;"; print "\t}"; print ""; print "\treturn host1x_syncpt_id(sp);"; next } { print }' "$NVHOST_H1X" > "$NVHOST_H1X.tmp" && mv "$NVHOST_H1X.tmp" "$NVHOST_H1X" + NETLIST_PRIV=/oot-src/nvgpu/drivers/gpu/nvgpu/common/netlist/netlist_priv.h + if [ -f "${NETLIST_PRIV}" ]; then + sed -i 's/struct netlist_region regions\[1\]/struct netlist_region regions[]/' "${NETLIST_PRIV}" + fi + sed -i '/CONFIG_GCC_PLUGIN_LATENT_ENTROPY/d' /src/include/config/auto.conf 2>/dev/null || true + sed -i '/CONFIG_GCC_PLUGIN_LATENT_ENTROPY/d' /src/include/generated/autoconf.h 2>/dev/null || true + make -j $(nproc) ARCH=arm64 \ + -C /src \ + M=/oot-src/nvgpu/drivers/gpu/nvgpu \ + CC=/usr/local/bin/clang-oot \ + CONFIG_TEGRA_OOT_MODULE=m \ + CONFIG_GK20A=m \ + CONFIG_TEGRA_GK20A_NVHOST=y \ + CONFIG_TEGRA_GK20A_NVHOST_HOST1X=y \ + CONFIG_TEGRA_HWPM=n \ + srctree.nvgpu=/oot-src/nvgpu \ + srctree.nvidia=/oot-src/nvidia-oot \ + srctree.nvconftest=/oot-src/out/nvidia-conftest \ + LLVM=1 \ + KBUILD_EXTRA_SYMBOLS=/oot-src/out/nvidia-linux-header/Module.symvers.nvidia \ + KBUILD_MODPOST_WARN=1 \ + modules + echo "nvgpu done." + install: + - | + KERNEL_RELEASE=$(cat /src/include/config/kernel.release) + echo "Installing for kernel ${KERNEL_RELEASE}..." + mkdir -p /rootfs/usr/lib/modules/${KERNEL_RELEASE}/extra/nvidia-tegra + mkdir -p /rootfs/usr/lib/modules/${KERNEL_RELEASE}/kernel/drivers/gpu/host1x + + # nvgpu + find /oot-src/nvgpu -name "*.ko" \ + -exec install -m 644 {} /rootfs/usr/lib/modules/${KERNEL_RELEASE}/extra/nvidia-tegra/ \; + + # OOT host1x: installed at kernel/drivers/gpu/host1x/ to shadow the in-tree host1x.ko. + # The squashfs overlay replaces the in-tree module so nvgpu's NVHOST=y path gets + # the OE4T host1x with HOST1X_SYNCPT_GPU support instead of the upstream stub. + find /oot-src/nvidia-oot/drivers/gpu/host1x -name "host1x.ko" \ + -exec install -m 644 {} /rootfs/usr/lib/modules/${KERNEL_RELEASE}/kernel/drivers/gpu/host1x/ \; + + # host1x-fence + nvhost-ctrl-shim + all other nvidia-oot modules go to extra/ + find /oot-src/nvidia-oot/drivers/gpu/host1x-fence -name "*.ko" \ + -exec install -m 644 {} /rootfs/usr/lib/modules/${KERNEL_RELEASE}/extra/nvidia-tegra/ \; + find /oot-src/nvhost-ctrl-shim -name "*.ko" \ + -exec install -m 644 {} /rootfs/usr/lib/modules/${KERNEL_RELEASE}/extra/nvidia-tegra/ \; + find /oot-src/nvidia-oot -name "*.ko" \ + -exec install -m 644 {} /rootfs/usr/lib/modules/${KERNEL_RELEASE}/extra/nvidia-tegra/ \; + + # Sign all modules with the kernel's signing key (auto-generated by kernel-build stage) + echo "Signing kernel modules..." + find /rootfs/usr/lib/modules/${KERNEL_RELEASE}/ \ + -name "*.ko" 2>/dev/null | while read ko; do + /src/scripts/sign-file sha512 \ + /src/certs/signing_key.pem \ + /src/certs/signing_key.x509 \ + "${ko}" + echo " Signed: ${ko}" + done + + # Module load order: host1x → host1x-fence → nvhost-ctrl-shim → nvmap → nvgpu + mkdir -p /rootfs/usr/lib/modprobe.d + printf 'softdep nvhost-ctrl-shim pre: host1x host1x-fence\n' \ + > /rootfs/usr/lib/modprobe.d/nvidia-tegra.conf + printf 'softdep nvgpu pre: host1x nvmap host1x-fence nvhost-ctrl-shim mc-utils\n' \ + >> /rootfs/usr/lib/modprobe.d/nvidia-tegra.conf + + echo "Installed modules:" + find /rootfs -name "*.ko" | sort +finalize: + - from: /rootfs + to: /rootfs From 88053497499a57a52d99bc0a8b021f17fc9c25e5 Mon Sep 17 00:00:00 2001 From: Alexander Schwankner Date: Wed, 22 Apr 2026 12:25:53 +0200 Subject: [PATCH 02/18] refactor: move OOT Makefile patching to scripts/fixup.sh Per review feedback: extract the OOT module Makefile patching block from pkg.yaml inline shell into nvidia-tegra-nvgpu/scripts/fixup.sh. Bldr makes the package directory available at /pkg/ during build, so the script is invoked as /pkg/scripts/fixup.sh. Signed-off-by: Alexander Schwankner --- nvidia-tegra-nvgpu/pkg.yaml | 125 +-------------------------- nvidia-tegra-nvgpu/scripts/fixup.sh | 127 ++++++++++++++++++++++++++++ 2 files changed, 128 insertions(+), 124 deletions(-) create mode 100644 nvidia-tegra-nvgpu/scripts/fixup.sh diff --git a/nvidia-tegra-nvgpu/pkg.yaml b/nvidia-tegra-nvgpu/pkg.yaml index cfea8a3e1..e3662fb5b 100644 --- a/nvidia-tegra-nvgpu/pkg.yaml +++ b/nvidia-tegra-nvgpu/pkg.yaml @@ -59,130 +59,7 @@ steps: -f /oot-src/out/nvidia-conftest/nvidia/Makefile echo "conftest done." - | - # Patch OOT module Makefiles: remove -Werror and add required include paths. - # srctree.nvconftest and srctree.nvidia-oot are passed as make vars at build time. - NVIDIA_OOT=/oot-src/nvidia-oot - CONFTEST_OUT=/oot-src/out/nvidia-conftest - - # ── GCC plugin latent_entropy fix ──────────────────────────────────────────── - # CONFIG_GCC_PLUGIN_LATENT_ENTROPY injects a global 'latent_entropy' variable - # via a GCC plugin. Clang doesn't run the plugin → 'latent_entropy' is undeclared - # → linux/random.h:24 compile error for ALL OOT modules. - # Fix: strip the macro from auto.conf and autoconf.h before any OOT build. - sed -i '/CONFIG_GCC_PLUGIN_LATENT_ENTROPY/d' /src/include/config/auto.conf 2>/dev/null || true - sed -i '/CONFIG_GCC_PLUGIN_LATENT_ENTROPY/d' /src/include/generated/autoconf.h 2>/dev/null || true - echo "Removed CONFIG_GCC_PLUGIN_LATENT_ENTROPY (Clang compat fix)" - - # OOT host1x: add conftest + nvidia-oot includes (exports host1x_fence_extract) - printf 'ccflags-y += -I$(srctree.nvconftest)\n' \ - >> ${NVIDIA_OOT}/drivers/gpu/host1x/Makefile - printf 'ccflags-y += -I$(srctree.nvidia-oot)/include\n' \ - >> ${NVIDIA_OOT}/drivers/gpu/host1x/Makefile - printf 'ccflags-y += -I$(srctree.nvidia-oot)/drivers/gpu/host1x/include\n' \ - >> ${NVIDIA_OOT}/drivers/gpu/host1x/Makefile - - # Force conftest macros for OOT host1x on kernel 6.18 - grep -rl "NV_IOMMU_MAP_HAS_GFP_ARG" ${NVIDIA_OOT}/drivers/gpu/host1x/ \ - | xargs -r sed -i "s|#if defined(NV_IOMMU_MAP_HAS_GFP_ARG)|#if 1 /* force: kernel 6.3+ */|g" - grep -rl "NV_IOMMU_PAGING_DOMAIN_ALLOC_PRESENT" ${NVIDIA_OOT}/drivers/gpu/host1x/ \ - | xargs -r sed -i "s|#if defined(NV_IOMMU_PAGING_DOMAIN_ALLOC_PRESENT)|#if 1 /* force: kernel 6.11+ */|g" - grep -rl "NV_DEVM_TEGRA_CORE_DEV_INIT_OPP_TABLE_COMMON_PRESENT" ${NVIDIA_OOT}/drivers/gpu/host1x/ \ - | xargs -r sed -i "s|#if defined(NV_DEVM_TEGRA_CORE_DEV_INIT_OPP_TABLE_COMMON_PRESENT)|#if 1 /* force: present */|g" - grep -rl "NV_PLATFORM_DRIVER_STRUCT_REMOVE_RETURNS_VOID" ${NVIDIA_OOT}/drivers/gpu/host1x/ \ - | xargs -r sed -i "s|#if defined(NV_PLATFORM_DRIVER_STRUCT_REMOVE_RETURNS_VOID)|#if 1 /* force: kernel 6.11+ */|g" - grep -rl "NV_BUS_TYPE_STRUCT_MATCH_HAS_CONST_DRV_ARG" ${NVIDIA_OOT}/drivers/gpu/host1x/ \ - | xargs -r sed -i "s|#if defined(NV_BUS_TYPE_STRUCT_MATCH_HAS_CONST_DRV_ARG)|#if 1 /* force: kernel 6.x+ */|g" - grep -rl "NV_BUS_TYPE_STRUCT_UEVENT_HAS_CONST_DEV_ARG" ${NVIDIA_OOT}/drivers/gpu/host1x/ \ - | xargs -r sed -i "s|#if defined(NV_BUS_TYPE_STRUCT_UEVENT_HAS_CONST_DEV_ARG)|#if 1 /* force: kernel 6.x+ */|g" - echo "Patched OOT host1x: forced conftest macro code paths for kernel 6.18" - - # host1x syncpt.c: permanently reserve syncpt id=0 so host1x_syncpt_alloc never returns it. - # GA10b has NVGPU_ERRATA_SYNCPT_INVALID_ID_0: nvgpu rejects id=0 → channel init fails → error 999. - # OOT host1x (ccf7646c) marks syncpt[0] with name="reserved" but does NOT set kref=1, so the - # alloc loop (which skips syncpts where kref_read(&sp->ref) != 0) still returns id=0. - # Fix: add kref_init(&syncpt[0].ref) before the name assignment, matching what newer OE4T - # commits already do (e.g. 6e071c0). Guard is idempotent — safe even if already present. - SYNCPT_C=${NVIDIA_OOT}/drivers/gpu/host1x/syncpt.c - if grep -q 'syncpt\[0\]\.name = kstrdup' "${SYNCPT_C}" 2>/dev/null; then - if ! grep -q 'kref_init.*syncpt\[0\]' "${SYNCPT_C}" 2>/dev/null; then - sed -i 's/\(syncpt\[0\]\.name = kstrdup("reserved", GFP_KERNEL);\)/kref_init(\&syncpt[0].ref);\n\t\t\1/' "${SYNCPT_C}" - echo "Patched host1x syncpt.c: added kref_init(&syncpt[0].ref) — id=0 permanently reserved" - else - echo "host1x syncpt.c: kref_init(&syncpt[0].ref) already present — no patch needed" - fi - else - echo "WARNING: host1x syncpt.c pattern not found — syncpt id=0 reservation patch skipped" - fi - - # host1x-fence: remove -Werror, add conftest + nvidia-oot includes - sed -i 's|ccflags-y += -Werror||g' \ - ${NVIDIA_OOT}/drivers/gpu/host1x-fence/Makefile - printf 'ccflags-y += -I$(srctree.nvconftest)\n' \ - >> ${NVIDIA_OOT}/drivers/gpu/host1x-fence/Makefile - printf 'ccflags-y += -I$(srctree.nvidia-oot)/include\n' \ - >> ${NVIDIA_OOT}/drivers/gpu/host1x-fence/Makefile - printf 'ccflags-y += -I$(srctree.nvidia-oot)/drivers/gpu/host1x/include\n' \ - >> ${NVIDIA_OOT}/drivers/gpu/host1x-fence/Makefile - grep -rl "class_create(THIS_MODULE," ${NVIDIA_OOT}/drivers/gpu/host1x-fence/ \ - | xargs -r sed -i 's/class_create(THIS_MODULE, /class_create(/g' - grep -rl "host1x_fence_devnode" ${NVIDIA_OOT}/drivers/gpu/host1x-fence/ \ - | xargs -r sed -i 's/static char \*host1x_fence_devnode(struct device \*/static char *host1x_fence_devnode(const struct device */g' - echo "Patched host1x-fence: class_create + devnode const fixes for kernel 6.x" - - # nvmap: remove subdir -Werror, add conftest + nvidia-oot includes - sed -i 's|subdir-ccflags-y += -Werror||g' \ - ${NVIDIA_OOT}/drivers/video/tegra/nvmap/Makefile - printf 'ccflags-y += -I$(srctree.nvconftest)\n' \ - >> ${NVIDIA_OOT}/drivers/video/tegra/nvmap/Makefile - printf 'ccflags-y += -I$(srctree.nvidia-oot)/include\n' \ - >> ${NVIDIA_OOT}/drivers/video/tegra/nvmap/Makefile - printf 'ccflags-y += -I$(srctree.nvidia-oot)/drivers/video/tegra/nvmap/include\n' \ - >> ${NVIDIA_OOT}/drivers/video/tegra/nvmap/Makefile - printf 'ccflags-y += -DNV_GET_USER_PAGES_HAS_ARGS_FLAGS\n' \ - >> ${NVIDIA_OOT}/drivers/video/tegra/nvmap/Makefile - printf 'ccflags-y += -DNV_MM_STRUCT_STRUCT_HAS_PERCPU_COUNTER_RSS_STAT\n' \ - >> ${NVIDIA_OOT}/drivers/video/tegra/nvmap/Makefile - printf 'ccflags-y += -DNV_IOREMAP_PROT_HAS_PGPROT_T_ARG\n' \ - >> ${NVIDIA_OOT}/drivers/video/tegra/nvmap/Makefile - - # mc-utils: add nvidia-oot includes - printf 'ccflags-y += -I$(srctree.nvidia-oot)/include\n' \ - >> ${NVIDIA_OOT}/drivers/platform/tegra/mc-utils/Makefile - - # governor_pod_scaling: add conftest + nvidia-oot includes - printf 'ccflags-y += -I$(srctree.nvconftest)\n' \ - >> ${NVIDIA_OOT}/drivers/devfreq/Makefile - printf 'ccflags-y += -I$(srctree.nvidia-oot)/include\n' \ - >> ${NVIDIA_OOT}/drivers/devfreq/Makefile - - echo "Include paths patched into OOT module Makefiles." - - # Force conftest macro paths in nvmap source for kernel 6.18 - grep -rl "NV_GET_USER_PAGES_HAS_ARGS_FLAGS" ${NVIDIA_OOT}/drivers/video/tegra/nvmap/ \ - | xargs -r sed -i "s|#if defined(NV_GET_USER_PAGES_HAS_ARGS_FLAGS)|#if 1 /* force: kernel 6.5+ */|g" - grep -rl "NV_MM_STRUCT_STRUCT_HAS_PERCPU_COUNTER_RSS_STAT" ${NVIDIA_OOT}/drivers/video/tegra/nvmap/ \ - | xargs -r sed -i "s|#if defined(NV_MM_STRUCT_STRUCT_HAS_PERCPU_COUNTER_RSS_STAT)|#if 1 /* force: kernel 6.2+ */|g" - grep -rl "NV_IOREMAP_PROT_HAS_PGPROT_T_ARG" ${NVIDIA_OOT}/drivers/video/tegra/nvmap/ \ - | xargs -r sed -i "s|#if defined(NV_IOREMAP_PROT_HAS_PGPROT_T_ARG)|#if 1 /* force: kernel 6.15+ */|g" - grep -rl "NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS" ${NVIDIA_OOT}/drivers/video/tegra/nvmap/ \ - | xargs -r sed -i "s|#if defined(NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS)|#if 1 /* force: kernel 6.3+ */|g" - grep -rl "NV___ASSIGN_STR_HAS_NO_SRC_ARG" \ - ${NVIDIA_OOT}/include/trace/events/ \ - ${NVIDIA_OOT}/drivers/video/tegra/nvmap/ 2>/dev/null \ - | xargs -r sed -i "s|#if defined(NV___ASSIGN_STR_HAS_NO_SRC_ARG)|#if 1 /* force: kernel 6.10+ */|g" - grep -rl "NV__ALLOC_PAGES_BULK_HAS_NO_PAGE_LIST_ARG" \ - ${NVIDIA_OOT}/drivers/video/tegra/nvmap/ \ - | xargs -r sed -i "s|#if defined(NV__ALLOC_PAGES_BULK_HAS_NO_PAGE_LIST_ARG)|#if 1 /* force: kernel 6.14+ */|g" - grep -rl "NV_FILE_STRUCT_HAS_F_REF" \ - ${NVIDIA_OOT}/drivers/video/tegra/nvmap/ \ - | xargs -r sed -i "s|#if defined(NV_FILE_STRUCT_HAS_F_REF)|#if 1 /* force: kernel 6.13+ */|g" - grep -rl "NV_GET_FILE_RCU_HAS_DOUBLE_PTR_FILE_ARG" \ - ${NVIDIA_OOT}/drivers/video/tegra/nvmap/ \ - | xargs -r sed -i "s|#if defined(NV_GET_FILE_RCU_HAS_DOUBLE_PTR_FILE_ARG)|#if 1 /* force: kernel 6.7+ */|g" - grep -rl "NV_PLATFORM_DRIVER_STRUCT_REMOVE_RETURNS_VOID" \ - ${NVIDIA_OOT}/drivers/video/tegra/nvmap/ \ - | xargs -r sed -i "s|#if defined(NV_PLATFORM_DRIVER_STRUCT_REMOVE_RETURNS_VOID)|#if 1 /* force: kernel 6.11+ */|g" - echo "Patched nvmap: forced conftest macro code paths for kernel 6.18" + /pkg/scripts/fixup.sh - | # ── Cross-compiler mismatch fix: Clang wrapper ──────────────────────────── # Strips GCC-only flags before passing to clang (source-independent fix). diff --git a/nvidia-tegra-nvgpu/scripts/fixup.sh b/nvidia-tegra-nvgpu/scripts/fixup.sh new file mode 100644 index 000000000..395b03cbd --- /dev/null +++ b/nvidia-tegra-nvgpu/scripts/fixup.sh @@ -0,0 +1,127 @@ +#!/bin/bash +# Patch OOT module Makefiles: remove -Werror and add required include paths. +# srctree.nvconftest and srctree.nvidia-oot are passed as make vars at build time. +set -euo pipefail + +NVIDIA_OOT=/oot-src/nvidia-oot +CONFTEST_OUT=/oot-src/out/nvidia-conftest + +# ── GCC plugin latent_entropy fix ──────────────────────────────────────────── +# CONFIG_GCC_PLUGIN_LATENT_ENTROPY injects a global 'latent_entropy' variable +# via a GCC plugin. Clang doesn't run the plugin → 'latent_entropy' is undeclared +# → linux/random.h:24 compile error for ALL OOT modules. +# Fix: strip the macro from auto.conf and autoconf.h before any OOT build. +sed -i '/CONFIG_GCC_PLUGIN_LATENT_ENTROPY/d' /src/include/config/auto.conf 2>/dev/null || true +sed -i '/CONFIG_GCC_PLUGIN_LATENT_ENTROPY/d' /src/include/generated/autoconf.h 2>/dev/null || true +echo "Removed CONFIG_GCC_PLUGIN_LATENT_ENTROPY (Clang compat fix)" + +# OOT host1x: add conftest + nvidia-oot includes (exports host1x_fence_extract) +printf 'ccflags-y += -I$(srctree.nvconftest)\n' \ + >> ${NVIDIA_OOT}/drivers/gpu/host1x/Makefile +printf 'ccflags-y += -I$(srctree.nvidia-oot)/include\n' \ + >> ${NVIDIA_OOT}/drivers/gpu/host1x/Makefile +printf 'ccflags-y += -I$(srctree.nvidia-oot)/drivers/gpu/host1x/include\n' \ + >> ${NVIDIA_OOT}/drivers/gpu/host1x/Makefile + +# Force conftest macros for OOT host1x on kernel 6.18 +grep -rl "NV_IOMMU_MAP_HAS_GFP_ARG" ${NVIDIA_OOT}/drivers/gpu/host1x/ \ + | xargs -r sed -i "s|#if defined(NV_IOMMU_MAP_HAS_GFP_ARG)|#if 1 /* force: kernel 6.3+ */|g" +grep -rl "NV_IOMMU_PAGING_DOMAIN_ALLOC_PRESENT" ${NVIDIA_OOT}/drivers/gpu/host1x/ \ + | xargs -r sed -i "s|#if defined(NV_IOMMU_PAGING_DOMAIN_ALLOC_PRESENT)|#if 1 /* force: kernel 6.11+ */|g" +grep -rl "NV_DEVM_TEGRA_CORE_DEV_INIT_OPP_TABLE_COMMON_PRESENT" ${NVIDIA_OOT}/drivers/gpu/host1x/ \ + | xargs -r sed -i "s|#if defined(NV_DEVM_TEGRA_CORE_DEV_INIT_OPP_TABLE_COMMON_PRESENT)|#if 1 /* force: present */|g" +grep -rl "NV_PLATFORM_DRIVER_STRUCT_REMOVE_RETURNS_VOID" ${NVIDIA_OOT}/drivers/gpu/host1x/ \ + | xargs -r sed -i "s|#if defined(NV_PLATFORM_DRIVER_STRUCT_REMOVE_RETURNS_VOID)|#if 1 /* force: kernel 6.11+ */|g" +grep -rl "NV_BUS_TYPE_STRUCT_MATCH_HAS_CONST_DRV_ARG" ${NVIDIA_OOT}/drivers/gpu/host1x/ \ + | xargs -r sed -i "s|#if defined(NV_BUS_TYPE_STRUCT_MATCH_HAS_CONST_DRV_ARG)|#if 1 /* force: kernel 6.x+ */|g" +grep -rl "NV_BUS_TYPE_STRUCT_UEVENT_HAS_CONST_DEV_ARG" ${NVIDIA_OOT}/drivers/gpu/host1x/ \ + | xargs -r sed -i "s|#if defined(NV_BUS_TYPE_STRUCT_UEVENT_HAS_CONST_DEV_ARG)|#if 1 /* force: kernel 6.x+ */|g" +echo "Patched OOT host1x: forced conftest macro code paths for kernel 6.18" + +# host1x syncpt.c: permanently reserve syncpt id=0 so host1x_syncpt_alloc never returns it. +# GA10b has NVGPU_ERRATA_SYNCPT_INVALID_ID_0: nvgpu rejects id=0 → channel init fails → error 999. +# OOT host1x (ccf7646c) marks syncpt[0] with name="reserved" but does NOT set kref=1, so the +# alloc loop (which skips syncpts where kref_read(&sp->ref) != 0) still returns id=0. +# Fix: add kref_init(&syncpt[0].ref) before the name assignment, matching what newer OE4T +# commits already do (e.g. 6e071c0). Guard is idempotent — safe even if already present. +SYNCPT_C=${NVIDIA_OOT}/drivers/gpu/host1x/syncpt.c +if grep -q 'syncpt\[0\]\.name = kstrdup' "${SYNCPT_C}" 2>/dev/null; then + if ! grep -q 'kref_init.*syncpt\[0\]' "${SYNCPT_C}" 2>/dev/null; then + sed -i 's/\(syncpt\[0\]\.name = kstrdup("reserved", GFP_KERNEL);\)/kref_init(\&syncpt[0].ref);\n\t\t\1/' "${SYNCPT_C}" + echo "Patched host1x syncpt.c: added kref_init(&syncpt[0].ref) — id=0 permanently reserved" + else + echo "host1x syncpt.c: kref_init(&syncpt[0].ref) already present — no patch needed" + fi +else + echo "WARNING: host1x syncpt.c pattern not found — syncpt id=0 reservation patch skipped" +fi + +# host1x-fence: remove -Werror, add conftest + nvidia-oot includes +sed -i 's|ccflags-y += -Werror||g' \ + ${NVIDIA_OOT}/drivers/gpu/host1x-fence/Makefile +printf 'ccflags-y += -I$(srctree.nvconftest)\n' \ + >> ${NVIDIA_OOT}/drivers/gpu/host1x-fence/Makefile +printf 'ccflags-y += -I$(srctree.nvidia-oot)/include\n' \ + >> ${NVIDIA_OOT}/drivers/gpu/host1x-fence/Makefile +printf 'ccflags-y += -I$(srctree.nvidia-oot)/drivers/gpu/host1x/include\n' \ + >> ${NVIDIA_OOT}/drivers/gpu/host1x-fence/Makefile +grep -rl "class_create(THIS_MODULE," ${NVIDIA_OOT}/drivers/gpu/host1x-fence/ \ + | xargs -r sed -i 's/class_create(THIS_MODULE, /class_create(/g' +grep -rl "host1x_fence_devnode" ${NVIDIA_OOT}/drivers/gpu/host1x-fence/ \ + | xargs -r sed -i 's/static char \*host1x_fence_devnode(struct device \*/static char *host1x_fence_devnode(const struct device */g' +echo "Patched host1x-fence: class_create + devnode const fixes for kernel 6.x" + +# nvmap: remove subdir -Werror, add conftest + nvidia-oot includes +sed -i 's|subdir-ccflags-y += -Werror||g' \ + ${NVIDIA_OOT}/drivers/video/tegra/nvmap/Makefile +printf 'ccflags-y += -I$(srctree.nvconftest)\n' \ + >> ${NVIDIA_OOT}/drivers/video/tegra/nvmap/Makefile +printf 'ccflags-y += -I$(srctree.nvidia-oot)/include\n' \ + >> ${NVIDIA_OOT}/drivers/video/tegra/nvmap/Makefile +printf 'ccflags-y += -I$(srctree.nvidia-oot)/drivers/video/tegra/nvmap/include\n' \ + >> ${NVIDIA_OOT}/drivers/video/tegra/nvmap/Makefile +printf 'ccflags-y += -DNV_GET_USER_PAGES_HAS_ARGS_FLAGS\n' \ + >> ${NVIDIA_OOT}/drivers/video/tegra/nvmap/Makefile +printf 'ccflags-y += -DNV_MM_STRUCT_STRUCT_HAS_PERCPU_COUNTER_RSS_STAT\n' \ + >> ${NVIDIA_OOT}/drivers/video/tegra/nvmap/Makefile +printf 'ccflags-y += -DNV_IOREMAP_PROT_HAS_PGPROT_T_ARG\n' \ + >> ${NVIDIA_OOT}/drivers/video/tegra/nvmap/Makefile + +# mc-utils: add nvidia-oot includes +printf 'ccflags-y += -I$(srctree.nvidia-oot)/include\n' \ + >> ${NVIDIA_OOT}/drivers/platform/tegra/mc-utils/Makefile + +# governor_pod_scaling: add conftest + nvidia-oot includes +printf 'ccflags-y += -I$(srctree.nvconftest)\n' \ + >> ${NVIDIA_OOT}/drivers/devfreq/Makefile +printf 'ccflags-y += -I$(srctree.nvidia-oot)/include\n' \ + >> ${NVIDIA_OOT}/drivers/devfreq/Makefile + +echo "Include paths patched into OOT module Makefiles." + +# Force conftest macro paths in nvmap source for kernel 6.18 +grep -rl "NV_GET_USER_PAGES_HAS_ARGS_FLAGS" ${NVIDIA_OOT}/drivers/video/tegra/nvmap/ \ + | xargs -r sed -i "s|#if defined(NV_GET_USER_PAGES_HAS_ARGS_FLAGS)|#if 1 /* force: kernel 6.5+ */|g" +grep -rl "NV_MM_STRUCT_STRUCT_HAS_PERCPU_COUNTER_RSS_STAT" ${NVIDIA_OOT}/drivers/video/tegra/nvmap/ \ + | xargs -r sed -i "s|#if defined(NV_MM_STRUCT_STRUCT_HAS_PERCPU_COUNTER_RSS_STAT)|#if 1 /* force: kernel 6.2+ */|g" +grep -rl "NV_IOREMAP_PROT_HAS_PGPROT_T_ARG" ${NVIDIA_OOT}/drivers/video/tegra/nvmap/ \ + | xargs -r sed -i "s|#if defined(NV_IOREMAP_PROT_HAS_PGPROT_T_ARG)|#if 1 /* force: kernel 6.15+ */|g" +grep -rl "NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS" ${NVIDIA_OOT}/drivers/video/tegra/nvmap/ \ + | xargs -r sed -i "s|#if defined(NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS)|#if 1 /* force: kernel 6.3+ */|g" +grep -rl "NV___ASSIGN_STR_HAS_NO_SRC_ARG" \ + ${NVIDIA_OOT}/include/trace/events/ \ + ${NVIDIA_OOT}/drivers/video/tegra/nvmap/ 2>/dev/null \ + | xargs -r sed -i "s|#if defined(NV___ASSIGN_STR_HAS_NO_SRC_ARG)|#if 1 /* force: kernel 6.10+ */|g" +grep -rl "NV__ALLOC_PAGES_BULK_HAS_NO_PAGE_LIST_ARG" \ + ${NVIDIA_OOT}/drivers/video/tegra/nvmap/ \ + | xargs -r sed -i "s|#if defined(NV__ALLOC_PAGES_BULK_HAS_NO_PAGE_LIST_ARG)|#if 1 /* force: kernel 6.14+ */|g" +grep -rl "NV_FILE_STRUCT_HAS_F_REF" \ + ${NVIDIA_OOT}/drivers/video/tegra/nvmap/ \ + | xargs -r sed -i "s|#if defined(NV_FILE_STRUCT_HAS_F_REF)|#if 1 /* force: kernel 6.13+ */|g" +grep -rl "NV_GET_FILE_RCU_HAS_DOUBLE_PTR_FILE_ARG" \ + ${NVIDIA_OOT}/drivers/video/tegra/nvmap/ \ + | xargs -r sed -i "s|#if defined(NV_GET_FILE_RCU_HAS_DOUBLE_PTR_FILE_ARG)|#if 1 /* force: kernel 6.7+ */|g" +grep -rl "NV_PLATFORM_DRIVER_STRUCT_REMOVE_RETURNS_VOID" \ + ${NVIDIA_OOT}/drivers/video/tegra/nvmap/ \ + | xargs -r sed -i "s|#if defined(NV_PLATFORM_DRIVER_STRUCT_REMOVE_RETURNS_VOID)|#if 1 /* force: kernel 6.11+ */|g" +echo "Patched nvmap: forced conftest macro code paths for kernel 6.18" From e60c74f2ced6860cfa5fc63ecf2d6c510833636e Mon Sep 17 00:00:00 2001 From: Alexander Schwankner Date: Wed, 22 Apr 2026 12:49:20 +0200 Subject: [PATCH 03/18] refactor: extract nvhost_host1x patch to script, fail on governor build error - Move base64-encoded nvhost_host1x.c patch to scripts/patch_nvhost_host1x.py for readability (per review feedback) - governor_pod_scaling build failures now exit 1 instead of continuing silently (per review feedback) Signed-off-by: Alexander Schwankner --- nvidia-tegra-nvgpu/pkg.yaml | 4 +- .../scripts/patch_nvhost_host1x.py | 75 +++++++++++++++++++ 2 files changed, 77 insertions(+), 2 deletions(-) create mode 100644 nvidia-tegra-nvgpu/scripts/patch_nvhost_host1x.py diff --git a/nvidia-tegra-nvgpu/pkg.yaml b/nvidia-tegra-nvgpu/pkg.yaml index e3662fb5b..fa6ec0c74 100644 --- a/nvidia-tegra-nvgpu/pkg.yaml +++ b/nvidia-tegra-nvgpu/pkg.yaml @@ -165,7 +165,7 @@ steps: srctree.nvidia-oot=${NVIDIA_OOT} \ srctree.nvconftest=${NVIDIA_CONFTEST} \ LLVM=1 \ - modules 2>&1 && echo "governor-pod-scaling: OK" || echo "governor-pod-scaling: FAILED (continuing)" + modules 2>&1 | tee /tmp/build-governor.log; [[ ${PIPESTATUS[0]} -eq 0 ]] && echo "governor-pod-scaling: OK" || { echo "governor-pod-scaling: BUILD FAILED (see errors above)"; exit 1; } cat ${NVIDIA_OOT}/drivers/devfreq/Module.symvers \ >> /oot-src/out/nvidia-linux-header/Module.symvers.nvidia 2>/dev/null || true @@ -191,7 +191,7 @@ steps: | xargs -r sed -i 's/class_create(THIS_MODULE, /class_create(/g' # nvhost_host1x.c: retry loop for GA10B syncpt alloc during GR init window sed -i '1s|^|#include \n|' /oot-src/nvgpu/drivers/gpu/nvgpu/os/linux/nvhost_host1x.c - python3 -c "import base64,sys;fname='/oot-src/nvgpu/drivers/gpu/nvgpu/os/linux/nvhost_host1x.c';c=open(fname).read();OLD=base64.b64decode(b'dTMyIG52Z3B1X252aG9zdF9nZXRfc3luY3B0X2NsaWVudF9tYW5hZ2VkKHN0cnVjdCBudmdwdV9udmhvc3RfZGV2ICpudmhvc3RfZGV2LAoJCQkJCSAgIGNvbnN0IGNoYXIgKnN5bmNwdF9uYW1lKQp7CglzdHJ1Y3QgaG9zdDF4X3N5bmNwdCAqc3A7CglzdHJ1Y3QgaG9zdDF4ICpob3N0MXg7CgoJaG9zdDF4ID0gcGxhdGZvcm1fZ2V0X2RydmRhdGEobnZob3N0X2Rldi0+aG9zdDF4X3BkZXYpOwoJaWYgKCFob3N0MXgpCgkJcmV0dXJuIDA7CgoJc3AgPSBob3N0MXhfc3luY3B0X2FsbG9jKGhvc3QxeCwgSE9TVDFYX1NZTkNQVF9DTElFTlRfTUFOQUdFRCB8IEhPU1QxWF9TWU5DUFRfR1BVLAoJCQkJIHN5bmNwdF9uYW1lKTsKCWlmICghc3ApCgkJcmV0dXJuIDA7CgoJcmV0dXJuIGhvc3QxeF9zeW5jcHRfaWQoc3ApOwp9').decode();NEW=base64.b64decode(b'dTMyIG52Z3B1X252aG9zdF9nZXRfc3luY3B0X2NsaWVudF9tYW5hZ2VkKHN0cnVjdCBudmdwdV9udmhvc3RfZGV2ICpudmhvc3RfZGV2LAoJCQkJICAgY29uc3QgY2hhciAqc3luY3B0X25hbWUpCnsKCXN0cnVjdCBob3N0MXhfc3luY3B0ICpzcCA9IE5VTEw7CglzdHJ1Y3QgaG9zdDF4ICpob3N0MXg7CglpbnQgcmV0cnk7CgoJLyogbnZncHUgNS4xMC4yOiByZXRyeSB1cCB0byA1bXMgd2hlbiBob3N0MXggc3luY3B0IGFsbG9jIGZhaWxzIGR1cmluZyBHUiBpbml0IHdpbmRvdy4KCSAqIEdBMTBCIEVSUkFUQV9TWU5DUFRfSU5WQUxJRF9JRF8wIHJlamVjdHMgaWQ9MDsgZHVyaW5nIH4xLTJtcyBhZnRlciBmaXJzdCBrZXJuZWwgc3VibWl0CgkgKiBob3N0MXhfc3luY3B0X2FsbG9jIG1heSByZXR1cm4gTlVMTCwgY2F1c2luZyBDVURBIGVycm9yIDk5OSBvbiBjdWRhU3RyZWFtU3luY2hyb25pemUuICovCglmb3IgKHJldHJ5ID0gMDsgcmV0cnkgPCA1OyByZXRyeSsrKSB7CgkJaG9zdDF4ID0gcGxhdGZvcm1fZ2V0X2RydmRhdGEobnZob3N0X2Rldi0+aG9zdDF4X3BkZXYpOwoJCWlmICghaG9zdDF4KSB7CgkJCXByX3dhcm5fcmF0ZWxpbWl0ZWQoIm52Z3B1OiBob3N0MXggbm90IHJlYWR5LCBzeW5jcHQgcmV0cnkgJWQvNVxuIiwgcmV0cnkgKyAxKTsKCQkJbXNsZWVwKDEpOwoJCQljb250aW51ZTsKCQl9CgkJc3AgPSBob3N0MXhfc3luY3B0X2FsbG9jKGhvc3QxeCwKCQkJCUhPU1QxWF9TWU5DUFRfQ0xJRU5UX01BTkFHRUQgfCBIT1NUMVhfU1lOQ1BUX0dQVSwKCQkJCXN5bmNwdF9uYW1lKTsKCQlpZiAoc3ApCgkJCWJyZWFrOwoJCXByX3dhcm5fcmF0ZWxpbWl0ZWQoIm52Z3B1OiBzeW5jcHRfYWxsb2MgTlVMTCwgcmV0cnkgJWQvNVxuIiwgcmV0cnkgKyAxKTsKCQltc2xlZXAoMSk7Cgl9CglpZiAoIXNwKSB7CgkJcHJfZXJyX3JhdGVsaW1pdGVkKCJudmdwdTogZ2V0X3N5bmNwdF9jbGllbnRfbWFuYWdlZDogZmFpbGVkIGFmdGVyIHJldHJpZXNcbiIpOwoJCXJldHVybiAwOwoJfQoJcmV0dXJuIGhvc3QxeF9zeW5jcHRfaWQoc3ApOwp9').decode();assert OLD in c,'nvhost_host1x.c: pattern not found';c=c.replace(OLD,NEW,1);open(fname,'w').write(c);print('Patched nvhost_host1x.c: retry loop in nvgpu_nvhost_get_syncpt_client_managed')" + python3 /pkg/scripts/patch_nvhost_host1x.py NVHOST_H1X=/oot-src/nvgpu/drivers/gpu/nvgpu/os/linux/nvhost_host1x.c awk '/return host1x_syncpt_id\(sp\);/ { print "\t/* GA10b ERRATA_SYNCPT_INVALID_ID_0: skip syncpt id=0."; print "\t * host1x_syncpt_alloc may return id=0 when no GPU pool in DT."; print "\t * nvgpu rejects id=0 causing CUDA error 999. Hold it and re-alloc. */"; print "\tif (host1x_syncpt_id(sp) == 0U) {"; print "\t\tstruct host1x_syncpt *sp_skip = sp;"; print "\t\tsp = host1x_syncpt_alloc(host1x,"; print "\t\t\t\tHOST1X_SYNCPT_CLIENT_MANAGED | HOST1X_SYNCPT_GPU,"; print "\t\t\t\tsyncpt_name);"; print "\t\thost1x_syncpt_put(sp_skip);"; print "\t\tif (!sp)"; print "\t\t\treturn 0;"; print "\t}"; print ""; print "\treturn host1x_syncpt_id(sp);"; next } { print }' "$NVHOST_H1X" > "$NVHOST_H1X.tmp" && mv "$NVHOST_H1X.tmp" "$NVHOST_H1X" NETLIST_PRIV=/oot-src/nvgpu/drivers/gpu/nvgpu/common/netlist/netlist_priv.h diff --git a/nvidia-tegra-nvgpu/scripts/patch_nvhost_host1x.py b/nvidia-tegra-nvgpu/scripts/patch_nvhost_host1x.py new file mode 100644 index 000000000..335f7759d --- /dev/null +++ b/nvidia-tegra-nvgpu/scripts/patch_nvhost_host1x.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python3 +""" +Patch nvgpu_nvhost_get_syncpt_client_managed() in nvhost_host1x.c to add a +retry loop for GA10B syncpt allocation failures during GR init window. + +GA10B ERRATA_SYNCPT_INVALID_ID_0: nvgpu rejects syncpt id=0. During ~1-2ms +after the first kernel submit, host1x_syncpt_alloc may return NULL, causing +CUDA error 999 on cudaStreamSynchronize. The retry loop waits up to 5ms. +""" +import sys + +FNAME = "/oot-src/nvgpu/drivers/gpu/nvgpu/os/linux/nvhost_host1x.c" + +OLD = """\ +u32 nvgpu_nvhost_get_syncpt_client_managed(struct nvgpu_nvhost_dev *nvhost_dev, + const char *syncpt_name) +{ + struct host1x_syncpt *sp; + struct host1x *host1x; + + host1x = platform_get_drvdata(nvhost_dev->host1x_pdev); + if (!host1x) + return 0; + + sp = host1x_syncpt_alloc(host1x, HOST1X_SYNCPT_CLIENT_MANAGED | HOST1X_SYNCPT_GPU, + syncpt_name); + if (!sp) + return 0; + + return host1x_syncpt_id(sp); +}\ +""" + +NEW = """\ +u32 nvgpu_nvhost_get_syncpt_client_managed(struct nvgpu_nvhost_dev *nvhost_dev, + const char *syncpt_name) +{ + struct host1x_syncpt *sp = NULL; + struct host1x *host1x; + int retry; + + /* nvgpu 5.10.2: retry up to 5ms when host1x syncpt alloc fails during GR init window. + * GA10B ERRATA_SYNCPT_INVALID_ID_0 rejects id=0; during ~1-2ms after first kernel submit + * host1x_syncpt_alloc may return NULL, causing CUDA error 999 on cudaStreamSynchronize. */ + for (retry = 0; retry < 5; retry++) { + host1x = platform_get_drvdata(nvhost_dev->host1x_pdev); + if (!host1x) { + pr_warn_ratelimited("nvgpu: host1x not ready, syncpt retry %d/5\\n", retry + 1); + msleep(1); + continue; + } + sp = host1x_syncpt_alloc(host1x, + HOST1X_SYNCPT_CLIENT_MANAGED | HOST1X_SYNCPT_GPU, + syncpt_name); + if (sp) + break; + pr_warn_ratelimited("nvgpu: syncpt_alloc NULL, retry %d/5\\n", retry + 1); + msleep(1); + } + if (!sp) { + pr_err_ratelimited("nvgpu: get_syncpt_client_managed: failed after retries\\n"); + return 0; + } + return host1x_syncpt_id(sp); +}\ +""" + +content = open(FNAME).read() +if OLD not in content: + print(f"ERROR: pattern not found in {FNAME}", file=sys.stderr) + sys.exit(1) + +content = content.replace(OLD, NEW, 1) +open(FNAME, "w").write(content) +print("Patched nvhost_host1x.c: retry loop in nvgpu_nvhost_get_syncpt_client_managed") From 6da63a145c9c9c8b78f57b12c4f719672d1f7801 Mon Sep 17 00:00:00 2001 From: Alexander Schwankner Date: Wed, 22 Apr 2026 12:56:11 +0200 Subject: [PATCH 04/18] chore: add renovate tags for OE4T kernel module dependencies Add renovate datasource annotations for the three OE4T git repositories so dependency updates can be tracked automatically: - OE4T/linux-nvgpu (GA10B GPU driver) - OE4T/linux-nv-oot (NVIDIA out-of-tree modules) - OE4T/linux-hwpm (hardware performance monitor) Note: sha256/sha512 checksums must be updated manually alongside the commit hash when renovate proposes an update. Signed-off-by: Alexander Schwankner --- Pkgfile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Pkgfile b/Pkgfile index 7b08ebaa5..1588241e5 100644 --- a/Pkgfile +++ b/Pkgfile @@ -290,12 +290,15 @@ vars: gdrcopy_sha512: c717f118eff8cd5a8dc35613c3881818f8b71dc493461dd0151ce7c882f8e2c2d852e22733fab4e2bec57219e10eec874c11b4fad90dd4815ae572840ed19d28 # OE4T (NVIDIA Tegra) kernel modules for Jetson Orin NX (Tegra234 / GA10B) + # renovate: datasource=git-refs versioning=git depName=https://github.com/OE4T/linux-nvgpu.git oe4t_nvgpu_commit: d530a48d64f9ad3020d9f3307f53e8dde8e3fba1 oe4t_nvgpu_sha256: adc5864edf76d986866e386803a9e628ee229e69ea34867b92b978a0b44f3d54 oe4t_nvgpu_sha512: a7c7f0b5d3174bf41abc77c77009f46182358f93936aedbe4993e63ff7fc94e21bfd83c3fa0b41af5836866b9c200427504d8f26685d567c11722e7a7bfd3ed9 + # renovate: datasource=git-refs versioning=git depName=https://github.com/OE4T/linux-nv-oot.git oe4t_nv_oot_commit: ccf7646c57462776fe1093af6643c54653f59861 oe4t_nv_oot_sha256: d1957d2c4908a37b4c040aef3a7413a9fbb3adcc8575cc9347c58af1c8b95169 oe4t_nv_oot_sha512: 66b8b4a0672a2c044d42e58914a6d0000fc3a01f1343fa39190794e5badbcb52cd3c2254bd3ca2b6be86286dadbbe60db5cb26c74264747b783937443e287a33 + # renovate: datasource=git-refs versioning=git depName=https://github.com/OE4T/linux-hwpm.git oe4t_hwpm_commit: 4d8a6998760d85f98637dbf61597bfbb88158206 oe4t_hwpm_sha256: 96c7656bdad0bf330e7fd58981b8a4eec4717a76840cefbe84e720d88b46be55 oe4t_hwpm_sha512: 971b91fcae284c59dbe411356109bce9b1a7884b8fac41c9683c79bf3eddef606e71ebaa9c06ad2389b2ba382c3c1125fabe0cbaeb5edac857e218077ed24ef9 From 0ca0ba06b17ccb5fdf89445cd04451b14f9ccbeb Mon Sep 17 00:00:00 2001 From: Alexander Schwankner Date: Wed, 22 Apr 2026 13:31:30 +0200 Subject: [PATCH 05/18] refactor: use modules_install with INSTALL_MOD_STRIP=1 CONFIG_MODULE_SIG_ALL=y MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace manual sign-file loop with standard KBUILD modules_install for each OOT module directory. Signing and debug-info stripping are handled natively by the kernel build system (same pattern as gasket-driver, zfs, hailort, etc.). host1x uses INSTALL_MOD_DIR=kernel/drivers/gpu/host1x to shadow the in-tree module; all other modules install to INSTALL_MOD_DIR=extra/nvidia-tegra. Add test: section with module-signature and fhs-validator checks. Fix finalize to: /rootfs → to: / (matches pkgs convention). Signed-off-by: Alexander Schwankner --- nvidia-tegra-nvgpu/pkg.yaml | 143 ++++++++++++++++++++++++++++-------- 1 file changed, 113 insertions(+), 30 deletions(-) diff --git a/nvidia-tegra-nvgpu/pkg.yaml b/nvidia-tegra-nvgpu/pkg.yaml index fa6ec0c74..5a7c33df7 100644 --- a/nvidia-tegra-nvgpu/pkg.yaml +++ b/nvidia-tegra-nvgpu/pkg.yaml @@ -219,39 +219,116 @@ steps: echo "nvgpu done." install: - | - KERNEL_RELEASE=$(cat /src/include/config/kernel.release) - echo "Installing for kernel ${KERNEL_RELEASE}..." - mkdir -p /rootfs/usr/lib/modules/${KERNEL_RELEASE}/extra/nvidia-tegra - mkdir -p /rootfs/usr/lib/modules/${KERNEL_RELEASE}/kernel/drivers/gpu/host1x + NVIDIA_OOT=/oot-src/nvidia-oot + NVIDIA_CONFTEST=/oot-src/out/nvidia-conftest + SHIM_DIR=/oot-src/nvhost-ctrl-shim + echo "Installing kernel modules..." - # nvgpu - find /oot-src/nvgpu -name "*.ko" \ - -exec install -m 644 {} /rootfs/usr/lib/modules/${KERNEL_RELEASE}/extra/nvidia-tegra/ \; + # OOT host1x → kernel/drivers/gpu/host1x/ (shadows in-tree host1x.ko; + # provides HOST1X_SYNCPT_GPU that nvgpu's CONFIG_TEGRA_GK20A_NVHOST=y requires) + make -j$(nproc) ARCH=arm64 -C /src \ + M=${NVIDIA_OOT}/drivers/gpu/host1x \ + CC=/usr/local/bin/clang-oot \ + CONFIG_TEGRA_OOT_MODULE=m \ + srctree.nvidia-oot=${NVIDIA_OOT} \ + srctree.nvconftest=${NVIDIA_CONFTEST} \ + LLVM=1 \ + INSTALL_MOD_PATH=/rootfs/usr \ + INSTALL_MOD_DIR=kernel/drivers/gpu/host1x \ + INSTALL_MOD_STRIP=1 \ + CONFIG_MODULE_SIG_ALL=y \ + modules_install - # OOT host1x: installed at kernel/drivers/gpu/host1x/ to shadow the in-tree host1x.ko. - # The squashfs overlay replaces the in-tree module so nvgpu's NVHOST=y path gets - # the OE4T host1x with HOST1X_SYNCPT_GPU support instead of the upstream stub. - find /oot-src/nvidia-oot/drivers/gpu/host1x -name "host1x.ko" \ - -exec install -m 644 {} /rootfs/usr/lib/modules/${KERNEL_RELEASE}/kernel/drivers/gpu/host1x/ \; + # host1x-fence → extra/nvidia-tegra + make -j$(nproc) ARCH=arm64 -C /src \ + M=${NVIDIA_OOT}/drivers/gpu/host1x-fence \ + CC=/usr/local/bin/clang-oot \ + CONFIG_TEGRA_OOT_MODULE=m \ + srctree.nvidia-oot=${NVIDIA_OOT} \ + srctree.nvconftest=${NVIDIA_CONFTEST} \ + LLVM=1 \ + INSTALL_MOD_PATH=/rootfs/usr \ + INSTALL_MOD_DIR=extra/nvidia-tegra \ + INSTALL_MOD_STRIP=1 \ + CONFIG_MODULE_SIG_ALL=y \ + modules_install - # host1x-fence + nvhost-ctrl-shim + all other nvidia-oot modules go to extra/ - find /oot-src/nvidia-oot/drivers/gpu/host1x-fence -name "*.ko" \ - -exec install -m 644 {} /rootfs/usr/lib/modules/${KERNEL_RELEASE}/extra/nvidia-tegra/ \; - find /oot-src/nvhost-ctrl-shim -name "*.ko" \ - -exec install -m 644 {} /rootfs/usr/lib/modules/${KERNEL_RELEASE}/extra/nvidia-tegra/ \; - find /oot-src/nvidia-oot -name "*.ko" \ - -exec install -m 644 {} /rootfs/usr/lib/modules/${KERNEL_RELEASE}/extra/nvidia-tegra/ \; + # nvhost-ctrl-shim → extra/nvidia-tegra + make -j$(nproc) ARCH=arm64 -C /src \ + M=${SHIM_DIR} \ + CC=/usr/local/bin/clang-oot \ + LLVM=1 \ + INSTALL_MOD_PATH=/rootfs/usr \ + INSTALL_MOD_DIR=extra/nvidia-tegra \ + INSTALL_MOD_STRIP=1 \ + CONFIG_MODULE_SIG_ALL=y \ + modules_install - # Sign all modules with the kernel's signing key (auto-generated by kernel-build stage) - echo "Signing kernel modules..." - find /rootfs/usr/lib/modules/${KERNEL_RELEASE}/ \ - -name "*.ko" 2>/dev/null | while read ko; do - /src/scripts/sign-file sha512 \ - /src/certs/signing_key.pem \ - /src/certs/signing_key.x509 \ - "${ko}" - echo " Signed: ${ko}" - done + # nvmap → extra/nvidia-tegra + make -j$(nproc) ARCH=arm64 -C /src \ + M=${NVIDIA_OOT}/drivers/video/tegra/nvmap \ + CC=/usr/local/bin/clang-oot \ + CONFIG_TEGRA_OOT_MODULE=m \ + srctree.nvidia-oot=${NVIDIA_OOT} \ + srctree.nvconftest=${NVIDIA_CONFTEST} \ + LLVM=1 \ + NVMAP_CONFIG=y \ + NVMAP_CONFIG_LOADABLE_MODULE=y \ + NVMAP_CONFIG_PAGE_POOLS=y \ + NVMAP_CONFIG_HANDLE_AS_ID=n \ + NVMAP_CONFIG_SCIIPC=n \ + INSTALL_MOD_PATH=/rootfs/usr \ + INSTALL_MOD_DIR=extra/nvidia-tegra \ + INSTALL_MOD_STRIP=1 \ + CONFIG_MODULE_SIG_ALL=y \ + modules_install + + # mc-utils → extra/nvidia-tegra + make -j$(nproc) ARCH=arm64 -C /src \ + M=${NVIDIA_OOT}/drivers/platform/tegra/mc-utils \ + CC=/usr/local/bin/clang-oot \ + CONFIG_TEGRA_OOT_MODULE=m \ + srctree.nvidia-oot=${NVIDIA_OOT} \ + srctree.nvconftest=${NVIDIA_CONFTEST} \ + LLVM=1 \ + INSTALL_MOD_PATH=/rootfs/usr \ + INSTALL_MOD_DIR=extra/nvidia-tegra \ + INSTALL_MOD_STRIP=1 \ + CONFIG_MODULE_SIG_ALL=y \ + modules_install + + # governor_pod_scaling → extra/nvidia-tegra + make -j$(nproc) ARCH=arm64 -C /src \ + M=${NVIDIA_OOT}/drivers/devfreq \ + CC=/usr/local/bin/clang-oot \ + CONFIG_TEGRA_OOT_MODULE=m \ + srctree.nvidia-oot=${NVIDIA_OOT} \ + srctree.nvconftest=${NVIDIA_CONFTEST} \ + LLVM=1 \ + INSTALL_MOD_PATH=/rootfs/usr \ + INSTALL_MOD_DIR=extra/nvidia-tegra \ + INSTALL_MOD_STRIP=1 \ + CONFIG_MODULE_SIG_ALL=y \ + modules_install + + # nvgpu → extra/nvidia-tegra + make -j$(nproc) ARCH=arm64 -C /src \ + M=/oot-src/nvgpu/drivers/gpu/nvgpu \ + CC=/usr/local/bin/clang-oot \ + CONFIG_TEGRA_OOT_MODULE=m \ + CONFIG_GK20A=m \ + CONFIG_TEGRA_GK20A_NVHOST=y \ + CONFIG_TEGRA_GK20A_NVHOST_HOST1X=y \ + CONFIG_TEGRA_HWPM=n \ + srctree.nvgpu=/oot-src/nvgpu \ + srctree.nvidia=${NVIDIA_OOT} \ + srctree.nvconftest=/oot-src/out/nvidia-conftest \ + LLVM=1 \ + INSTALL_MOD_PATH=/rootfs/usr \ + INSTALL_MOD_DIR=extra/nvidia-tegra \ + INSTALL_MOD_STRIP=1 \ + CONFIG_MODULE_SIG_ALL=y \ + modules_install # Module load order: host1x → host1x-fence → nvhost-ctrl-shim → nvmap → nvgpu mkdir -p /rootfs/usr/lib/modprobe.d @@ -262,6 +339,12 @@ steps: echo "Installed modules:" find /rootfs -name "*.ko" | sort + test: + - | + # https://www.kernel.org/doc/html/v4.15/admin-guide/module-signing.html#signed-modules-and-stripping + find /rootfs/usr/lib/modules -name '*.ko' -exec grep -FL '~Module signature appended~' {} \+ + - | + fhs-validator /rootfs finalize: - from: /rootfs - to: /rootfs + to: / From fd4bb90a3af1039e87244fd80ec16ca5d7718729 Mon Sep 17 00:00:00 2001 From: Alexander Schwankner Date: Wed, 22 Apr 2026 13:40:22 +0200 Subject: [PATCH 06/18] fix: release device ref, guard devnode mode, drop duplicate clang-oot nvhost_ctrl_shim.c: - get_host1x(): call put_device(&pdev->dev) after platform_get_drvdata() to release the reference acquired by of_find_device_by_node(); fixes a device reference leak on every /dev/nvhost-ctrl open - nvhost_ctrl_devnode(): guard mode pointer before dereferencing (mode may be NULL per devnode callback contract); add comment explaining 0666 choice pkg.yaml: - remove duplicate clang-oot wrapper creation from the nvgpu build block; the wrapper is installed once in the preceding OOT build step and persists on disk across shell blocks Signed-off-by: Alexander Schwankner --- nvidia-tegra-nvgpu/nvhost_ctrl_shim.c | 9 ++++++++- nvidia-tegra-nvgpu/pkg.yaml | 4 +--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/nvidia-tegra-nvgpu/nvhost_ctrl_shim.c b/nvidia-tegra-nvgpu/nvhost_ctrl_shim.c index 839c2b046..14ab9ba10 100644 --- a/nvidia-tegra-nvgpu/nvhost_ctrl_shim.c +++ b/nvidia-tegra-nvgpu/nvhost_ctrl_shim.c @@ -193,6 +193,9 @@ static struct host1x *get_host1x(void) } drvdata = platform_get_drvdata(pdev); + /* Release ref acquired by of_find_device_by_node(); drvdata is stable + * as long as host1x.ko is loaded — pdev itself is not needed further. */ + put_device(&pdev->dev); if (!drvdata) { pr_err_ratelimited("nvhost-ctrl-shim: host1x drvdata is NULL\n"); return ERR_PTR(-EAGAIN); @@ -647,7 +650,11 @@ static long nvhost_ctrl_ioctl(struct file *file, unsigned int cmd, static char *nvhost_ctrl_devnode(const struct device *dev, umode_t *mode) { - *mode = 0666; + /* 0666: the CUDA runtime (libnvrm_host1x.so) opens this device as the + * container user; Talos runs a single-workload model so world-readable + * is acceptable. Guard mode for callers that pass NULL. */ + if (mode) + *mode = 0666; return NULL; } diff --git a/nvidia-tegra-nvgpu/pkg.yaml b/nvidia-tegra-nvgpu/pkg.yaml index 5a7c33df7..9e3a5dfc1 100644 --- a/nvidia-tegra-nvgpu/pkg.yaml +++ b/nvidia-tegra-nvgpu/pkg.yaml @@ -180,9 +180,7 @@ steps: > /oot-src/out/nvidia-linux-header/Module.symvers - | echo "Building nvgpu module (OE4T patches-r36.5)..." - mkdir -p /usr/local/bin - printf '#!/bin/bash\nfiltered=()\nfor arg in "$@"; do\n case "$arg" in\n -fmin-function-alignment=*|-fconserve-stack) ;;\n -fsanitize=bounds-strict) filtered+=("-fsanitize=bounds") ;;\n -Wimplicit-fallthrough=*) filtered+=("-Wimplicit-fallthrough") ;;\n -Wno-maybe-uninitialized) filtered+=("-Wno-uninitialized") ;;\n -Wno-alloc-size-larger-than|-Wno-alloc-size-larger-than=*) ;;\n -fplugin=*|-fplugin-arg-*) ;;\n -pg|-mrecord-mcount|-mfentry|-fpatchable-function-entry=*) ;;\n *) filtered+=("$arg") ;;\n esac\ndone\nexec clang "${filtered[@]}" -Wno-unknown-warning-option -Wno-enum-enum-conversion -Wno-implicit-fallthrough -Wno-gnu-variable-sized-type-not-at-end\n' > /usr/local/bin/clang-oot - chmod +x /usr/local/bin/clang-oot + # clang-oot wrapper already installed by the earlier build step — reuse it. grep -rl "NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS" /oot-src/nvgpu/drivers/gpu/nvgpu/ \ | xargs -r sed -i "s|#if defined(NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS)|#if 1 /* force: kernel 6.3+ */|g" sed -i '1s|^|ccflags-y += -Wno-implicit-fallthrough -Wno-parentheses-equality -Wno-incompatible-function-pointer-types -Wno-sometimes-uninitialized\n|' \ From 335ee460a75e6afe9cdc2ffb3a486ec85af01afb Mon Sep 17 00:00:00 2001 From: Alexander Schwankner <1496765+schwankner@users.noreply.github.com> Date: Wed, 22 Apr 2026 21:57:38 +0200 Subject: [PATCH 07/18] fix: add post-check to awk nvhost_host1x syncpt id=0 patch The awk command that inserts the ERRATA_SYNCPT_INVALID_ID_0 id=0 skip block had no failure check: if the upstream file changes and the pattern is not matched, awk silently writes an unpatched file. Add an explicit grep post-check for the inserted marker; exit 1 if the marker is absent so the build fails loudly instead of shipping a silently broken nvgpu. Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> Signed-off-by: Alexander Schwankner <1496765+schwankner@users.noreply.github.com> --- nvidia-tegra-nvgpu/pkg.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/nvidia-tegra-nvgpu/pkg.yaml b/nvidia-tegra-nvgpu/pkg.yaml index 9e3a5dfc1..a0c49843b 100644 --- a/nvidia-tegra-nvgpu/pkg.yaml +++ b/nvidia-tegra-nvgpu/pkg.yaml @@ -192,6 +192,10 @@ steps: python3 /pkg/scripts/patch_nvhost_host1x.py NVHOST_H1X=/oot-src/nvgpu/drivers/gpu/nvgpu/os/linux/nvhost_host1x.c awk '/return host1x_syncpt_id\(sp\);/ { print "\t/* GA10b ERRATA_SYNCPT_INVALID_ID_0: skip syncpt id=0."; print "\t * host1x_syncpt_alloc may return id=0 when no GPU pool in DT."; print "\t * nvgpu rejects id=0 causing CUDA error 999. Hold it and re-alloc. */"; print "\tif (host1x_syncpt_id(sp) == 0U) {"; print "\t\tstruct host1x_syncpt *sp_skip = sp;"; print "\t\tsp = host1x_syncpt_alloc(host1x,"; print "\t\t\t\tHOST1X_SYNCPT_CLIENT_MANAGED | HOST1X_SYNCPT_GPU,"; print "\t\t\t\tsyncpt_name);"; print "\t\thost1x_syncpt_put(sp_skip);"; print "\t\tif (!sp)"; print "\t\t\treturn 0;"; print "\t}"; print ""; print "\treturn host1x_syncpt_id(sp);"; next } { print }' "$NVHOST_H1X" > "$NVHOST_H1X.tmp" && mv "$NVHOST_H1X.tmp" "$NVHOST_H1X" + if ! grep -q 'GA10b ERRATA_SYNCPT_INVALID_ID_0' "$NVHOST_H1X"; then + echo "ERROR: failed to patch $NVHOST_H1X: expected syncpt errata marker not found" >&2 + exit 1 + fi NETLIST_PRIV=/oot-src/nvgpu/drivers/gpu/nvgpu/common/netlist/netlist_priv.h if [ -f "${NETLIST_PRIV}" ]; then sed -i 's/struct netlist_region regions\[1\]/struct netlist_region regions[]/' "${NETLIST_PRIV}" From 9e04f9cfb73dc25f6e3c4425ac9ad622ddbc34f2 Mon Sep 17 00:00:00 2001 From: Alexander Schwankner Date: Wed, 22 Apr 2026 22:06:17 +0200 Subject: [PATCH 08/18] chore: remove duplicate latent_entropy sed from nvgpu build block fixup.sh already strips CONFIG_GCC_PLUGIN_LATENT_ENTROPY from /src/include/config/auto.conf and /src/include/generated/autoconf.h in an earlier build step. The identical sed commands in the nvgpu block are a no-op; remove them to keep the fix single-sourced. Signed-off-by: Alexander Schwankner --- nvidia-tegra-nvgpu/pkg.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/nvidia-tegra-nvgpu/pkg.yaml b/nvidia-tegra-nvgpu/pkg.yaml index a0c49843b..5f83e5dc9 100644 --- a/nvidia-tegra-nvgpu/pkg.yaml +++ b/nvidia-tegra-nvgpu/pkg.yaml @@ -200,8 +200,6 @@ steps: if [ -f "${NETLIST_PRIV}" ]; then sed -i 's/struct netlist_region regions\[1\]/struct netlist_region regions[]/' "${NETLIST_PRIV}" fi - sed -i '/CONFIG_GCC_PLUGIN_LATENT_ENTROPY/d' /src/include/config/auto.conf 2>/dev/null || true - sed -i '/CONFIG_GCC_PLUGIN_LATENT_ENTROPY/d' /src/include/generated/autoconf.h 2>/dev/null || true make -j $(nproc) ARCH=arm64 \ -C /src \ M=/oot-src/nvgpu/drivers/gpu/nvgpu \ From 6ea1e15c77bebc8bb70580c133ef34e23c4c50a1 Mon Sep 17 00:00:00 2001 From: Alexander Schwankner Date: Wed, 22 Apr 2026 22:06:27 +0200 Subject: [PATCH 09/18] chore: remove unused CONFTEST_OUT variable from fixup.sh CONFTEST_OUT was defined but never referenced in the script. The conftest path is injected via the srctree.nvconftest make variable, not through this shell variable. Remove it to comply with the set -euo pipefail hygiene. Signed-off-by: Alexander Schwankner --- nvidia-tegra-nvgpu/scripts/fixup.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/nvidia-tegra-nvgpu/scripts/fixup.sh b/nvidia-tegra-nvgpu/scripts/fixup.sh index 395b03cbd..5c2048947 100644 --- a/nvidia-tegra-nvgpu/scripts/fixup.sh +++ b/nvidia-tegra-nvgpu/scripts/fixup.sh @@ -4,7 +4,6 @@ set -euo pipefail NVIDIA_OOT=/oot-src/nvidia-oot -CONFTEST_OUT=/oot-src/out/nvidia-conftest # ── GCC plugin latent_entropy fix ──────────────────────────────────────────── # CONFIG_GCC_PLUGIN_LATENT_ENTROPY injects a global 'latent_entropy' variable From 311f1870f16ad4e509c8e1c2b55581a5344f683b Mon Sep 17 00:00:00 2001 From: Alexander Schwankner Date: Wed, 22 Apr 2026 22:06:51 +0200 Subject: [PATCH 10/18] fix: add -pkg suffix to name, register in .kres.yaml targets All kernel-dependent packages use the -pkg suffix in their pkg.yaml name field (gasket-driver-pkg, hailort-pkg, zfs-pkg, etc.) and are listed in .kres.yaml so bldr picks them up in CI. Rename nvidia-tegra-nvgpu to nvidia-tegra-nvgpu-pkg and add the entry to .kres.yaml between nvidia-open-gpu-kernel-modules-production-pkg and px-fuse-pkg. Signed-off-by: Alexander Schwankner --- .kres.yaml | 1 + nvidia-tegra-nvgpu/pkg.yaml | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.kres.yaml b/.kres.yaml index 47ad44863..6311388b5 100644 --- a/.kres.yaml +++ b/.kres.yaml @@ -71,6 +71,7 @@ spec: - mellanox-mstflint-pkg - nvidia-open-gpu-kernel-modules-lts-pkg - nvidia-open-gpu-kernel-modules-production-pkg + - nvidia-tegra-nvgpu-pkg - px-fuse-pkg - tenstorrent-pkg - xdma-driver-pkg diff --git a/nvidia-tegra-nvgpu/pkg.yaml b/nvidia-tegra-nvgpu/pkg.yaml index 5f83e5dc9..e472ba49d 100644 --- a/nvidia-tegra-nvgpu/pkg.yaml +++ b/nvidia-tegra-nvgpu/pkg.yaml @@ -1,4 +1,4 @@ -name: nvidia-tegra-nvgpu +name: nvidia-tegra-nvgpu-pkg variant: scratch shell: /bin/bash dependencies: From 08a6cfdffc3d3c4afc7f8e4b5464228a1aeb48a3 Mon Sep 17 00:00:00 2001 From: Alexander Schwankner Date: Wed, 22 Apr 2026 22:07:12 +0200 Subject: [PATCH 11/18] fix: copy modules.order and modules.builtin into rootfs All other kernel module packages (gasket-driver, zfs, hailort, xdma-driver, kmod-nvidia) copy modules.order, modules.builtin, and modules.builtin.modinfo from /src into the rootfs before running modules_install. Add the same block so downstream tooling has the expected kernel module metadata for the target release. Signed-off-by: Alexander Schwankner --- nvidia-tegra-nvgpu/pkg.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/nvidia-tegra-nvgpu/pkg.yaml b/nvidia-tegra-nvgpu/pkg.yaml index e472ba49d..11f54fe48 100644 --- a/nvidia-tegra-nvgpu/pkg.yaml +++ b/nvidia-tegra-nvgpu/pkg.yaml @@ -224,6 +224,10 @@ steps: SHIM_DIR=/oot-src/nvhost-ctrl-shim echo "Installing kernel modules..." + mkdir -p /rootfs/usr/lib/modules/$(cat /src/include/config/kernel.release)/ + cp /src/modules.order /src/modules.builtin /src/modules.builtin.modinfo \ + /rootfs/usr/lib/modules/$(cat /src/include/config/kernel.release)/ + # OOT host1x → kernel/drivers/gpu/host1x/ (shadows in-tree host1x.ko; # provides HOST1X_SYNCPT_GPU that nvgpu's CONFIG_TEGRA_GK20A_NVHOST=y requires) make -j$(nproc) ARCH=arm64 -C /src \ From 3f189157a01f6c248391a3a36d505fe60cf39566 Mon Sep 17 00:00:00 2001 From: Alexander Schwankner Date: Thu, 23 Apr 2026 21:20:59 +0200 Subject: [PATCH 12/18] chore: switch oe4t_nv_oot to patches-r36.5 branch Replace wip-r36.5-take-2 snapshot (ccf7646) with the patches-r36.5 branch HEAD (ea32e7f, 2026-03-05) which is a more stable reference with the same kernel 6.18 compatibility fixes. Signed-off-by: Alexander Schwankner --- Pkgfile | 6 +++--- nvidia-tegra-nvgpu/pkg.yaml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Pkgfile b/Pkgfile index 1588241e5..97071f78c 100644 --- a/Pkgfile +++ b/Pkgfile @@ -295,9 +295,9 @@ vars: oe4t_nvgpu_sha256: adc5864edf76d986866e386803a9e628ee229e69ea34867b92b978a0b44f3d54 oe4t_nvgpu_sha512: a7c7f0b5d3174bf41abc77c77009f46182358f93936aedbe4993e63ff7fc94e21bfd83c3fa0b41af5836866b9c200427504d8f26685d567c11722e7a7bfd3ed9 # renovate: datasource=git-refs versioning=git depName=https://github.com/OE4T/linux-nv-oot.git - oe4t_nv_oot_commit: ccf7646c57462776fe1093af6643c54653f59861 - oe4t_nv_oot_sha256: d1957d2c4908a37b4c040aef3a7413a9fbb3adcc8575cc9347c58af1c8b95169 - oe4t_nv_oot_sha512: 66b8b4a0672a2c044d42e58914a6d0000fc3a01f1343fa39190794e5badbcb52cd3c2254bd3ca2b6be86286dadbbe60db5cb26c74264747b783937443e287a33 + oe4t_nv_oot_commit: ea32e7f2cb0a0f831b5deef61d8800d3ab1af1d7 + oe4t_nv_oot_sha256: 9d2d70a121a418be307e3d1cd3c74d9ae9398e7abc756304d614e998dfd6f342 + oe4t_nv_oot_sha512: 5645163e964bfb13d7aa2ee1749188fe40a1fe9012080f548548f7dc70e4397a762c161041d8d209d2cd969cbb4aab36ea5c560ef5967946eeb3f1dd16335b9c # renovate: datasource=git-refs versioning=git depName=https://github.com/OE4T/linux-hwpm.git oe4t_hwpm_commit: 4d8a6998760d85f98637dbf61597bfbb88158206 oe4t_hwpm_sha256: 96c7656bdad0bf330e7fd58981b8a4eec4717a76840cefbe84e720d88b46be55 diff --git a/nvidia-tegra-nvgpu/pkg.yaml b/nvidia-tegra-nvgpu/pkg.yaml index 11f54fe48..e1e9c8f40 100644 --- a/nvidia-tegra-nvgpu/pkg.yaml +++ b/nvidia-tegra-nvgpu/pkg.yaml @@ -12,7 +12,7 @@ steps: destination: nvgpu.tar.gz sha256: "{{ .oe4t_nvgpu_sha256 }}" sha512: "{{ .oe4t_nvgpu_sha512 }}" - # OE4T patched nvidia-oot - wip-r36.5-take-2 branch (kernel 6.18 compat: __assign_str, + # OE4T patched nvidia-oot - patches-r36.5 branch (kernel 6.18 compat: __assign_str, # f_count->f_ref, __alloc_pages_bulk 5-arg, and all earlier 6.x fixes) - url: https://github.com/OE4T/linux-nv-oot/archive/{{ .oe4t_nv_oot_commit }}.tar.gz destination: nvidia-oot.tar.gz From 6368e07f63280f1c504e1ac6eea1de8bde04c1a6 Mon Sep 17 00:00:00 2001 From: Alexander Schwankner Date: Thu, 23 Apr 2026 21:21:22 +0200 Subject: [PATCH 13/18] chore: extract clang-oot wrapper to scripts/clang-oot Move the inline printf'd clang wrapper script into a dedicated scripts/clang-oot file and install it via cp, making it easier to review and maintain. Signed-off-by: Alexander Schwankner --- nvidia-tegra-nvgpu/pkg.yaml | 2 +- nvidia-tegra-nvgpu/scripts/clang-oot | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) create mode 100644 nvidia-tegra-nvgpu/scripts/clang-oot diff --git a/nvidia-tegra-nvgpu/pkg.yaml b/nvidia-tegra-nvgpu/pkg.yaml index e1e9c8f40..3750109d0 100644 --- a/nvidia-tegra-nvgpu/pkg.yaml +++ b/nvidia-tegra-nvgpu/pkg.yaml @@ -64,7 +64,7 @@ steps: # ── Cross-compiler mismatch fix: Clang wrapper ──────────────────────────── # Strips GCC-only flags before passing to clang (source-independent fix). mkdir -p /usr/local/bin - printf '#!/bin/bash\nfiltered=()\nfor arg in "$@"; do\n case "$arg" in\n -fmin-function-alignment=*|-fconserve-stack) ;;\n -fsanitize=bounds-strict) filtered+=("-fsanitize=bounds") ;;\n -Wimplicit-fallthrough=*) filtered+=("-Wimplicit-fallthrough") ;;\n -Wno-maybe-uninitialized) filtered+=("-Wno-uninitialized") ;;\n -Wno-alloc-size-larger-than|-Wno-alloc-size-larger-than=*) ;;\n -fplugin=*|-fplugin-arg-*) ;;\n -pg|-mrecord-mcount|-mfentry|-fpatchable-function-entry=*) ;;\n *) filtered+=("$arg") ;;\n esac\ndone\nexec clang "${filtered[@]}" -Wno-unknown-warning-option -Wno-enum-enum-conversion -Wno-implicit-fallthrough -Wno-gnu-variable-sized-type-not-at-end\n' > /usr/local/bin/clang-oot + cp /pkg/scripts/clang-oot /usr/local/bin/clang-oot chmod +x /usr/local/bin/clang-oot echo "clang-oot wrapper installed" diff --git a/nvidia-tegra-nvgpu/scripts/clang-oot b/nvidia-tegra-nvgpu/scripts/clang-oot new file mode 100644 index 000000000..d7d09f05d --- /dev/null +++ b/nvidia-tegra-nvgpu/scripts/clang-oot @@ -0,0 +1,16 @@ +#!/bin/bash +# Cross-compiler mismatch fix: strips GCC-only flags before passing to clang. +filtered=() +for arg in "$@"; do + case "$arg" in + -fmin-function-alignment=*|-fconserve-stack) ;; + -fsanitize=bounds-strict) filtered+=("-fsanitize=bounds") ;; + -Wimplicit-fallthrough=*) filtered+=("-Wimplicit-fallthrough") ;; + -Wno-maybe-uninitialized) filtered+=("-Wno-uninitialized") ;; + -Wno-alloc-size-larger-than|-Wno-alloc-size-larger-than=*) ;; + -fplugin=*|-fplugin-arg-*) ;; + -pg|-mrecord-mcount|-mfentry|-fpatchable-function-entry=*) ;; + *) filtered+=("$arg") ;; + esac +done +exec clang "${filtered[@]}" -Wno-unknown-warning-option -Wno-enum-enum-conversion -Wno-implicit-fallthrough -Wno-gnu-variable-sized-type-not-at-end From 3624f1f4a53487c689590dceb781abe4a18e47fb Mon Sep 17 00:00:00 2001 From: Alexander Schwankner Date: Thu, 23 Apr 2026 21:21:32 +0200 Subject: [PATCH 14/18] chore: remove modprobe.d softdep and debug find from install step Module load ordering is handled by depmod in the Talos imager; the modprobe.d/nvidia-tegra.conf file is not needed here. Also remove the debug find /rootfs output. Signed-off-by: Alexander Schwankner --- nvidia-tegra-nvgpu/pkg.yaml | 9 --------- 1 file changed, 9 deletions(-) diff --git a/nvidia-tegra-nvgpu/pkg.yaml b/nvidia-tegra-nvgpu/pkg.yaml index 3750109d0..e1423d497 100644 --- a/nvidia-tegra-nvgpu/pkg.yaml +++ b/nvidia-tegra-nvgpu/pkg.yaml @@ -334,15 +334,6 @@ steps: CONFIG_MODULE_SIG_ALL=y \ modules_install - # Module load order: host1x → host1x-fence → nvhost-ctrl-shim → nvmap → nvgpu - mkdir -p /rootfs/usr/lib/modprobe.d - printf 'softdep nvhost-ctrl-shim pre: host1x host1x-fence\n' \ - > /rootfs/usr/lib/modprobe.d/nvidia-tegra.conf - printf 'softdep nvgpu pre: host1x nvmap host1x-fence nvhost-ctrl-shim mc-utils\n' \ - >> /rootfs/usr/lib/modprobe.d/nvidia-tegra.conf - - echo "Installed modules:" - find /rootfs -name "*.ko" | sort test: - | # https://www.kernel.org/doc/html/v4.15/admin-guide/module-signing.html#signed-modules-and-stripping From 559098f5ce1a1cb6cc87fecb4cf1c5ca53b84248 Mon Sep 17 00:00:00 2001 From: Alexander Schwankner Date: Mon, 27 Apr 2026 22:28:07 +0200 Subject: [PATCH 15/18] refactor: replace patch scripts with standard .patch files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert the Python/awk-based nvgpu source patches to standard unified diff files applied with the patch tool: - 0001: nvhost_host1x.c — retry loop + syncpt id=0 guard for GA10B ERRATA_SYNCPT_INVALID_ID_0 (combines sed/python/awk into one clean patch, adds #include ) - 0002: netlist_priv.h — flexible array fix (regions[1] → regions[]) Remove scripts/patch_nvhost_host1x.py (superseded by 0001 patch). Signed-off-by: Alexander Schwankner --- .../0001-nvgpu-syncpt-retry-errata-id0.patch | 60 +++++++++++++++ .../0002-nvgpu-netlist-flexible-array.patch | 11 +++ nvidia-tegra-nvgpu/pkg.yaml | 18 ++--- .../scripts/patch_nvhost_host1x.py | 75 ------------------- 4 files changed, 76 insertions(+), 88 deletions(-) create mode 100644 nvidia-tegra-nvgpu/patches/0001-nvgpu-syncpt-retry-errata-id0.patch create mode 100644 nvidia-tegra-nvgpu/patches/0002-nvgpu-netlist-flexible-array.patch delete mode 100644 nvidia-tegra-nvgpu/scripts/patch_nvhost_host1x.py diff --git a/nvidia-tegra-nvgpu/patches/0001-nvgpu-syncpt-retry-errata-id0.patch b/nvidia-tegra-nvgpu/patches/0001-nvgpu-syncpt-retry-errata-id0.patch new file mode 100644 index 000000000..07bdc3665 --- /dev/null +++ b/nvidia-tegra-nvgpu/patches/0001-nvgpu-syncpt-retry-errata-id0.patch @@ -0,0 +1,60 @@ +--- a/drivers/gpu/nvgpu/os/linux/nvhost_host1x.c 2026-04-27 22:24:47 ++++ b/drivers/gpu/nvgpu/os/linux/nvhost_host1x.c 2026-04-27 22:25:13 +@@ -14,6 +14,7 @@ + * along with this program. If not, see . + */ + ++#include + #include + #include + #include +@@ -237,17 +238,41 @@ + u32 nvgpu_nvhost_get_syncpt_client_managed(struct nvgpu_nvhost_dev *nvhost_dev, + const char *syncpt_name) + { +- struct host1x_syncpt *sp; ++ struct host1x_syncpt *sp = NULL; + struct host1x *host1x; ++ int retry; + +- host1x = platform_get_drvdata(nvhost_dev->host1x_pdev); +- if (!host1x) +- return 0; +- +- sp = host1x_syncpt_alloc(host1x, HOST1X_SYNCPT_CLIENT_MANAGED | HOST1X_SYNCPT_GPU, +- syncpt_name); +- if (!sp) ++ /* ++ * GA10B ERRATA_SYNCPT_INVALID_ID_0: nvgpu rejects syncpt id=0. ++ * During ~1-2ms after the first kernel submit, host1x_syncpt_alloc ++ * may return NULL or id=0, causing CUDA error 999 on ++ * cudaStreamSynchronize. Retry up to 5ms. ++ */ ++ for (retry = 0; retry < 5; retry++) { ++ host1x = platform_get_drvdata(nvhost_dev->host1x_pdev); ++ if (!host1x) { ++ pr_warn_ratelimited("nvgpu: host1x not ready, syncpt retry %d/5\n", ++ retry + 1); ++ msleep(1); ++ continue; ++ } ++ sp = host1x_syncpt_alloc(host1x, ++ HOST1X_SYNCPT_CLIENT_MANAGED | HOST1X_SYNCPT_GPU, ++ syncpt_name); ++ if (sp && host1x_syncpt_id(sp) != 0U) ++ break; ++ if (sp) { ++ host1x_syncpt_put(sp); ++ sp = NULL; ++ } ++ pr_warn_ratelimited("nvgpu: syncpt_alloc returned id=0 or NULL, retry %d/5\n", ++ retry + 1); ++ msleep(1); ++ } ++ if (!sp) { ++ pr_err_ratelimited("nvgpu: get_syncpt_client_managed: failed after retries\n"); + return 0; ++ } + + return host1x_syncpt_id(sp); + } diff --git a/nvidia-tegra-nvgpu/patches/0002-nvgpu-netlist-flexible-array.patch b/nvidia-tegra-nvgpu/patches/0002-nvgpu-netlist-flexible-array.patch new file mode 100644 index 000000000..883d2810e --- /dev/null +++ b/nvidia-tegra-nvgpu/patches/0002-nvgpu-netlist-flexible-array.patch @@ -0,0 +1,11 @@ +--- a/drivers/gpu/nvgpu/common/netlist/netlist_priv.h 2026-04-27 22:24:52 ++++ b/drivers/gpu/nvgpu/common/netlist/netlist_priv.h 2026-04-27 22:25:25 +@@ -113,7 +113,7 @@ + + struct netlist_image { + struct netlist_image_header header; +- struct netlist_region regions[1]; ++ struct netlist_region regions[]; + }; + + struct netlist_gr_ucode { diff --git a/nvidia-tegra-nvgpu/pkg.yaml b/nvidia-tegra-nvgpu/pkg.yaml index e1423d497..55c48d1d6 100644 --- a/nvidia-tegra-nvgpu/pkg.yaml +++ b/nvidia-tegra-nvgpu/pkg.yaml @@ -187,19 +187,11 @@ steps: /oot-src/nvgpu/drivers/gpu/nvgpu/Makefile grep -rl "class_create(THIS_MODULE," /oot-src/nvgpu/drivers/gpu/nvgpu/ \ | xargs -r sed -i 's/class_create(THIS_MODULE, /class_create(/g' - # nvhost_host1x.c: retry loop for GA10B syncpt alloc during GR init window - sed -i '1s|^|#include \n|' /oot-src/nvgpu/drivers/gpu/nvgpu/os/linux/nvhost_host1x.c - python3 /pkg/scripts/patch_nvhost_host1x.py - NVHOST_H1X=/oot-src/nvgpu/drivers/gpu/nvgpu/os/linux/nvhost_host1x.c - awk '/return host1x_syncpt_id\(sp\);/ { print "\t/* GA10b ERRATA_SYNCPT_INVALID_ID_0: skip syncpt id=0."; print "\t * host1x_syncpt_alloc may return id=0 when no GPU pool in DT."; print "\t * nvgpu rejects id=0 causing CUDA error 999. Hold it and re-alloc. */"; print "\tif (host1x_syncpt_id(sp) == 0U) {"; print "\t\tstruct host1x_syncpt *sp_skip = sp;"; print "\t\tsp = host1x_syncpt_alloc(host1x,"; print "\t\t\t\tHOST1X_SYNCPT_CLIENT_MANAGED | HOST1X_SYNCPT_GPU,"; print "\t\t\t\tsyncpt_name);"; print "\t\thost1x_syncpt_put(sp_skip);"; print "\t\tif (!sp)"; print "\t\t\treturn 0;"; print "\t}"; print ""; print "\treturn host1x_syncpt_id(sp);"; next } { print }' "$NVHOST_H1X" > "$NVHOST_H1X.tmp" && mv "$NVHOST_H1X.tmp" "$NVHOST_H1X" - if ! grep -q 'GA10b ERRATA_SYNCPT_INVALID_ID_0' "$NVHOST_H1X"; then - echo "ERROR: failed to patch $NVHOST_H1X: expected syncpt errata marker not found" >&2 - exit 1 - fi - NETLIST_PRIV=/oot-src/nvgpu/drivers/gpu/nvgpu/common/netlist/netlist_priv.h - if [ -f "${NETLIST_PRIV}" ]; then - sed -i 's/struct netlist_region regions\[1\]/struct netlist_region regions[]/' "${NETLIST_PRIV}" - fi + # nvgpu source patches (applied via standard patch tool) + patch -p1 -d /oot-src/nvgpu \ + < /pkg/patches/0001-nvgpu-syncpt-retry-errata-id0.patch + patch -p1 -d /oot-src/nvgpu \ + < /pkg/patches/0002-nvgpu-netlist-flexible-array.patch make -j $(nproc) ARCH=arm64 \ -C /src \ M=/oot-src/nvgpu/drivers/gpu/nvgpu \ diff --git a/nvidia-tegra-nvgpu/scripts/patch_nvhost_host1x.py b/nvidia-tegra-nvgpu/scripts/patch_nvhost_host1x.py deleted file mode 100644 index 335f7759d..000000000 --- a/nvidia-tegra-nvgpu/scripts/patch_nvhost_host1x.py +++ /dev/null @@ -1,75 +0,0 @@ -#!/usr/bin/env python3 -""" -Patch nvgpu_nvhost_get_syncpt_client_managed() in nvhost_host1x.c to add a -retry loop for GA10B syncpt allocation failures during GR init window. - -GA10B ERRATA_SYNCPT_INVALID_ID_0: nvgpu rejects syncpt id=0. During ~1-2ms -after the first kernel submit, host1x_syncpt_alloc may return NULL, causing -CUDA error 999 on cudaStreamSynchronize. The retry loop waits up to 5ms. -""" -import sys - -FNAME = "/oot-src/nvgpu/drivers/gpu/nvgpu/os/linux/nvhost_host1x.c" - -OLD = """\ -u32 nvgpu_nvhost_get_syncpt_client_managed(struct nvgpu_nvhost_dev *nvhost_dev, - const char *syncpt_name) -{ - struct host1x_syncpt *sp; - struct host1x *host1x; - - host1x = platform_get_drvdata(nvhost_dev->host1x_pdev); - if (!host1x) - return 0; - - sp = host1x_syncpt_alloc(host1x, HOST1X_SYNCPT_CLIENT_MANAGED | HOST1X_SYNCPT_GPU, - syncpt_name); - if (!sp) - return 0; - - return host1x_syncpt_id(sp); -}\ -""" - -NEW = """\ -u32 nvgpu_nvhost_get_syncpt_client_managed(struct nvgpu_nvhost_dev *nvhost_dev, - const char *syncpt_name) -{ - struct host1x_syncpt *sp = NULL; - struct host1x *host1x; - int retry; - - /* nvgpu 5.10.2: retry up to 5ms when host1x syncpt alloc fails during GR init window. - * GA10B ERRATA_SYNCPT_INVALID_ID_0 rejects id=0; during ~1-2ms after first kernel submit - * host1x_syncpt_alloc may return NULL, causing CUDA error 999 on cudaStreamSynchronize. */ - for (retry = 0; retry < 5; retry++) { - host1x = platform_get_drvdata(nvhost_dev->host1x_pdev); - if (!host1x) { - pr_warn_ratelimited("nvgpu: host1x not ready, syncpt retry %d/5\\n", retry + 1); - msleep(1); - continue; - } - sp = host1x_syncpt_alloc(host1x, - HOST1X_SYNCPT_CLIENT_MANAGED | HOST1X_SYNCPT_GPU, - syncpt_name); - if (sp) - break; - pr_warn_ratelimited("nvgpu: syncpt_alloc NULL, retry %d/5\\n", retry + 1); - msleep(1); - } - if (!sp) { - pr_err_ratelimited("nvgpu: get_syncpt_client_managed: failed after retries\\n"); - return 0; - } - return host1x_syncpt_id(sp); -}\ -""" - -content = open(FNAME).read() -if OLD not in content: - print(f"ERROR: pattern not found in {FNAME}", file=sys.stderr) - sys.exit(1) - -content = content.replace(OLD, NEW, 1) -open(FNAME, "w").write(content) -print("Patched nvhost_host1x.c: retry loop in nvgpu_nvhost_get_syncpt_client_managed") From 2c4167ef455a9c4a9271dc95233aa539993969ff Mon Sep 17 00:00:00 2001 From: Alexander Schwankner <1496765+schwankner@users.noreply.github.com> Date: Sat, 2 May 2026 22:15:05 +0200 Subject: [PATCH 16/18] fix: correct oe4t_nv_oot_commit to patches-r36.5 HEAD (ea32e7f97dd0) --- Pkgfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Pkgfile b/Pkgfile index a6516dac8..f185a1ac7 100644 --- a/Pkgfile +++ b/Pkgfile @@ -295,7 +295,7 @@ vars: oe4t_nvgpu_sha256: adc5864edf76d986866e386803a9e628ee229e69ea34867b92b978a0b44f3d54 oe4t_nvgpu_sha512: a7c7f0b5d3174bf41abc77c77009f46182358f93936aedbe4993e63ff7fc94e21bfd83c3fa0b41af5836866b9c200427504d8f26685d567c11722e7a7bfd3ed9 # renovate: datasource=git-refs versioning=git depName=https://github.com/OE4T/linux-nv-oot.git - oe4t_nv_oot_commit: ea32e7f2cb0a0f831b5deef61d8800d3ab1af1d7 + oe4t_nv_oot_commit: ea32e7f97dd04c3f75aadc22424dc63568387120 oe4t_nv_oot_sha256: 9d2d70a121a418be307e3d1cd3c74d9ae9398e7abc756304d614e998dfd6f342 oe4t_nv_oot_sha512: 5645163e964bfb13d7aa2ee1749188fe40a1fe9012080f548548f7dc70e4397a762c161041d8d209d2cd969cbb4aab36ea5c560ef5967946eeb3f1dd16335b9c # renovate: datasource=git-refs versioning=git depName=https://github.com/OE4T/linux-hwpm.git From 754aee57fab4415f1fc3a47f3ab68a753068e241 Mon Sep 17 00:00:00 2001 From: Alexander Schwankner <1496765+schwankner@users.noreply.github.com> Date: Sun, 3 May 2026 10:42:55 +0200 Subject: [PATCH 17/18] refactor: remove dead code from fixup.sh (kref_init + GCC_PLUGIN_LATENT_ENTROPY) Both workarounds are no longer needed with patches-r36.5: - kref_init(&syncpt[0].ref): already present in OE4T patches-r36.5 (ea32e7f97dd0) - CONFIG_GCC_PLUGIN_LATENT_ENTROPY: Talos kernel is Clang-built (LLVM=1); GCC plugins are auto-disabled via CC_IS_CLANG, so this option never appears in kernel headers. The proper fix (remove CONFIG_HAVE_GCC_PLUGINS side-effects) is tracked as a separate siderolabs/pkgs PR. --- nvidia-tegra-nvgpu/scripts/fixup.sh | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/nvidia-tegra-nvgpu/scripts/fixup.sh b/nvidia-tegra-nvgpu/scripts/fixup.sh index 5c2048947..be502f90d 100644 --- a/nvidia-tegra-nvgpu/scripts/fixup.sh +++ b/nvidia-tegra-nvgpu/scripts/fixup.sh @@ -5,15 +5,6 @@ set -euo pipefail NVIDIA_OOT=/oot-src/nvidia-oot -# ── GCC plugin latent_entropy fix ──────────────────────────────────────────── -# CONFIG_GCC_PLUGIN_LATENT_ENTROPY injects a global 'latent_entropy' variable -# via a GCC plugin. Clang doesn't run the plugin → 'latent_entropy' is undeclared -# → linux/random.h:24 compile error for ALL OOT modules. -# Fix: strip the macro from auto.conf and autoconf.h before any OOT build. -sed -i '/CONFIG_GCC_PLUGIN_LATENT_ENTROPY/d' /src/include/config/auto.conf 2>/dev/null || true -sed -i '/CONFIG_GCC_PLUGIN_LATENT_ENTROPY/d' /src/include/generated/autoconf.h 2>/dev/null || true -echo "Removed CONFIG_GCC_PLUGIN_LATENT_ENTROPY (Clang compat fix)" - # OOT host1x: add conftest + nvidia-oot includes (exports host1x_fence_extract) printf 'ccflags-y += -I$(srctree.nvconftest)\n' \ >> ${NVIDIA_OOT}/drivers/gpu/host1x/Makefile @@ -37,24 +28,6 @@ grep -rl "NV_BUS_TYPE_STRUCT_UEVENT_HAS_CONST_DEV_ARG" ${NVIDIA_OOT}/drivers/gpu | xargs -r sed -i "s|#if defined(NV_BUS_TYPE_STRUCT_UEVENT_HAS_CONST_DEV_ARG)|#if 1 /* force: kernel 6.x+ */|g" echo "Patched OOT host1x: forced conftest macro code paths for kernel 6.18" -# host1x syncpt.c: permanently reserve syncpt id=0 so host1x_syncpt_alloc never returns it. -# GA10b has NVGPU_ERRATA_SYNCPT_INVALID_ID_0: nvgpu rejects id=0 → channel init fails → error 999. -# OOT host1x (ccf7646c) marks syncpt[0] with name="reserved" but does NOT set kref=1, so the -# alloc loop (which skips syncpts where kref_read(&sp->ref) != 0) still returns id=0. -# Fix: add kref_init(&syncpt[0].ref) before the name assignment, matching what newer OE4T -# commits already do (e.g. 6e071c0). Guard is idempotent — safe even if already present. -SYNCPT_C=${NVIDIA_OOT}/drivers/gpu/host1x/syncpt.c -if grep -q 'syncpt\[0\]\.name = kstrdup' "${SYNCPT_C}" 2>/dev/null; then - if ! grep -q 'kref_init.*syncpt\[0\]' "${SYNCPT_C}" 2>/dev/null; then - sed -i 's/\(syncpt\[0\]\.name = kstrdup("reserved", GFP_KERNEL);\)/kref_init(\&syncpt[0].ref);\n\t\t\1/' "${SYNCPT_C}" - echo "Patched host1x syncpt.c: added kref_init(&syncpt[0].ref) — id=0 permanently reserved" - else - echo "host1x syncpt.c: kref_init(&syncpt[0].ref) already present — no patch needed" - fi -else - echo "WARNING: host1x syncpt.c pattern not found — syncpt id=0 reservation patch skipped" -fi - # host1x-fence: remove -Werror, add conftest + nvidia-oot includes sed -i 's|ccflags-y += -Werror||g' \ ${NVIDIA_OOT}/drivers/gpu/host1x-fence/Makefile From eb7b15e32466eb0b2cc5497248cd548bda908bb6 Mon Sep 17 00:00:00 2001 From: schwankner Date: Mon, 4 May 2026 09:19:33 +0200 Subject: [PATCH 18/18] feat(nvidia-tegra-nvgpu): replace #if 1 guards with LINUX_VERSION_CODE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Debug run confirmed /src/include/generated/uapi/linux/version.h exists in the siderolabs build container. Standard /src/include/linux/version.h does not (only generated during full kernel build). Add -include $(srctree)/include/generated/uapi/linux/version.h to ccflags of host1x, nvmap and nvgpu Makefiles so LINUX_VERSION_CODE and KERNEL_VERSION() are available at OOT module compile time. Replace all '#if 1 /* force: kernel X+ */' guards in fixup.sh and pkg.yaml with proper LINUX_VERSION_CODE >= KERNEL_VERSION(x,y,z): host1x: NV_IOMMU_MAP_HAS_GFP_ARG (6.3), NV_IOMMU_PAGING_DOMAIN_ALLOC (6.11), NV_PLATFORM_DRIVER_STRUCT_REMOVE_RETURNS_VOID (6.11), NV_BUS_TYPE_STRUCT_MATCH_HAS_CONST_DRV_ARG (6.8), NV_BUS_TYPE_STRUCT_UEVENT_HAS_CONST_DEV_ARG (6.11) nvmap: NV_GET_USER_PAGES_HAS_ARGS_FLAGS (6.5), NV_MM_STRUCT_STRUCT_HAS_PERCPU_COUNTER_RSS_STAT (6.2), NV_IOREMAP_PROT_HAS_PGPROT_T_ARG (6.15), NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS (6.3), NV___ASSIGN_STR_HAS_NO_SRC_ARG (6.10), NV__ALLOC_PAGES_BULK (6.14), NV_FILE_STRUCT_HAS_F_REF (6.13), NV_GET_FILE_RCU (6.7), NV_PLATFORM_DRIVER_STRUCT_REMOVE_RETURNS_VOID (6.11) nvgpu: NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS (6.3) NV_DEVM_TEGRA_CORE_DEV_INIT_OPP_TABLE_COMMON_PRESENT remains #if 1 — OE4T-internal function with no LINUX_VERSION_CODE mapping. Refs: siderolabs/pkgs#1518 (dsseng review) Signed-off-by: schwankner --- nvidia-tegra-nvgpu/pkg.yaml | 6 ++- nvidia-tegra-nvgpu/scripts/fixup.sh | 67 ++++++++++++++++++----------- 2 files changed, 48 insertions(+), 25 deletions(-) diff --git a/nvidia-tegra-nvgpu/pkg.yaml b/nvidia-tegra-nvgpu/pkg.yaml index 55c48d1d6..fed9cd98c 100644 --- a/nvidia-tegra-nvgpu/pkg.yaml +++ b/nvidia-tegra-nvgpu/pkg.yaml @@ -181,10 +181,14 @@ steps: - | echo "Building nvgpu module (OE4T patches-r36.5)..." # clang-oot wrapper already installed by the earlier build step — reuse it. + # vm_flags_set() added in 6.3 (commit d0e9fe1) grep -rl "NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS" /oot-src/nvgpu/drivers/gpu/nvgpu/ \ - | xargs -r sed -i "s|#if defined(NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS)|#if 1 /* force: kernel 6.3+ */|g" + | xargs -r sed -i "s|#if defined(NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS)|#if LINUX_VERSION_CODE >= KERNEL_VERSION(6,3,0)|g" sed -i '1s|^|ccflags-y += -Wno-implicit-fallthrough -Wno-parentheses-equality -Wno-incompatible-function-pointer-types -Wno-sometimes-uninitialized\n|' \ /oot-src/nvgpu/drivers/gpu/nvgpu/Makefile + # Force-include version.h for LINUX_VERSION_CODE guard above (vm_flags_set). + printf 'ccflags-y += -include $(srctree)/include/generated/uapi/linux/version.h\n' \ + >> /oot-src/nvgpu/drivers/gpu/nvgpu/Makefile grep -rl "class_create(THIS_MODULE," /oot-src/nvgpu/drivers/gpu/nvgpu/ \ | xargs -r sed -i 's/class_create(THIS_MODULE, /class_create(/g' # nvgpu source patches (applied via standard patch tool) diff --git a/nvidia-tegra-nvgpu/scripts/fixup.sh b/nvidia-tegra-nvgpu/scripts/fixup.sh index be502f90d..5e80c2472 100644 --- a/nvidia-tegra-nvgpu/scripts/fixup.sh +++ b/nvidia-tegra-nvgpu/scripts/fixup.sh @@ -12,21 +12,34 @@ printf 'ccflags-y += -I$(srctree.nvidia-oot)/include\n' \ >> ${NVIDIA_OOT}/drivers/gpu/host1x/Makefile printf 'ccflags-y += -I$(srctree.nvidia-oot)/drivers/gpu/host1x/include\n' \ >> ${NVIDIA_OOT}/drivers/gpu/host1x/Makefile +# Force-include version.h so LINUX_VERSION_CODE / KERNEL_VERSION() are available. +# /src/include/generated/uapi/linux/version.h exists in the siderolabs build container; +# standard /src/include/linux/version.h does not (only generated during full kernel build). +printf 'ccflags-y += -include $(srctree)/include/generated/uapi/linux/version.h\n' \ + >> ${NVIDIA_OOT}/drivers/gpu/host1x/Makefile -# Force conftest macros for OOT host1x on kernel 6.18 +# Replace conftest macro guards with LINUX_VERSION_CODE checks. +# Root cause why conftest probes fail: NV_CONFTEST_CFLAGS hardcodes -Werror; +# Clang is stricter than GCC → probe fails → macro undefined → wrong code path. +# iommu_map() gained gfp_t arg in 6.3 (commit 66f70e7) grep -rl "NV_IOMMU_MAP_HAS_GFP_ARG" ${NVIDIA_OOT}/drivers/gpu/host1x/ \ - | xargs -r sed -i "s|#if defined(NV_IOMMU_MAP_HAS_GFP_ARG)|#if 1 /* force: kernel 6.3+ */|g" + | xargs -r sed -i "s|#if defined(NV_IOMMU_MAP_HAS_GFP_ARG)|#if LINUX_VERSION_CODE >= KERNEL_VERSION(6,3,0)|g" +# iommu_paging_domain_alloc() added in 6.11 (commit 2cf48a9) grep -rl "NV_IOMMU_PAGING_DOMAIN_ALLOC_PRESENT" ${NVIDIA_OOT}/drivers/gpu/host1x/ \ - | xargs -r sed -i "s|#if defined(NV_IOMMU_PAGING_DOMAIN_ALLOC_PRESENT)|#if 1 /* force: kernel 6.11+ */|g" + | xargs -r sed -i "s|#if defined(NV_IOMMU_PAGING_DOMAIN_ALLOC_PRESENT)|#if LINUX_VERSION_CODE >= KERNEL_VERSION(6,11,0)|g" +# devm_tegra_core_dev_init_opp_table_common: OE4T-specific, always present in linux-nv-oot grep -rl "NV_DEVM_TEGRA_CORE_DEV_INIT_OPP_TABLE_COMMON_PRESENT" ${NVIDIA_OOT}/drivers/gpu/host1x/ \ - | xargs -r sed -i "s|#if defined(NV_DEVM_TEGRA_CORE_DEV_INIT_OPP_TABLE_COMMON_PRESENT)|#if 1 /* force: present */|g" + | xargs -r sed -i "s|#if defined(NV_DEVM_TEGRA_CORE_DEV_INIT_OPP_TABLE_COMMON_PRESENT)|#if 1 /* OE4T-specific: always present in linux-nv-oot */|g" +# platform_driver.remove changed to return void in 6.11 (commit 5c5a768) grep -rl "NV_PLATFORM_DRIVER_STRUCT_REMOVE_RETURNS_VOID" ${NVIDIA_OOT}/drivers/gpu/host1x/ \ - | xargs -r sed -i "s|#if defined(NV_PLATFORM_DRIVER_STRUCT_REMOVE_RETURNS_VOID)|#if 1 /* force: kernel 6.11+ */|g" + | xargs -r sed -i "s|#if defined(NV_PLATFORM_DRIVER_STRUCT_REMOVE_RETURNS_VOID)|#if LINUX_VERSION_CODE >= KERNEL_VERSION(6,11,0)|g" +# bus_type.match gained const drv arg in 6.8 (commit 8af136f) grep -rl "NV_BUS_TYPE_STRUCT_MATCH_HAS_CONST_DRV_ARG" ${NVIDIA_OOT}/drivers/gpu/host1x/ \ - | xargs -r sed -i "s|#if defined(NV_BUS_TYPE_STRUCT_MATCH_HAS_CONST_DRV_ARG)|#if 1 /* force: kernel 6.x+ */|g" + | xargs -r sed -i "s|#if defined(NV_BUS_TYPE_STRUCT_MATCH_HAS_CONST_DRV_ARG)|#if LINUX_VERSION_CODE >= KERNEL_VERSION(6,8,0)|g" +# bus_type.uevent gained const dev arg in 6.11 (commit 4a3ad20) grep -rl "NV_BUS_TYPE_STRUCT_UEVENT_HAS_CONST_DEV_ARG" ${NVIDIA_OOT}/drivers/gpu/host1x/ \ - | xargs -r sed -i "s|#if defined(NV_BUS_TYPE_STRUCT_UEVENT_HAS_CONST_DEV_ARG)|#if 1 /* force: kernel 6.x+ */|g" -echo "Patched OOT host1x: forced conftest macro code paths for kernel 6.18" + | xargs -r sed -i "s|#if defined(NV_BUS_TYPE_STRUCT_UEVENT_HAS_CONST_DEV_ARG)|#if LINUX_VERSION_CODE >= KERNEL_VERSION(6,11,0)|g" +echo "Patched OOT host1x: LINUX_VERSION_CODE guards for kernel 6.18" # host1x-fence: remove -Werror, add conftest + nvidia-oot includes sed -i 's|ccflags-y += -Werror||g' \ @@ -52,11 +65,8 @@ printf 'ccflags-y += -I$(srctree.nvidia-oot)/include\n' \ >> ${NVIDIA_OOT}/drivers/video/tegra/nvmap/Makefile printf 'ccflags-y += -I$(srctree.nvidia-oot)/drivers/video/tegra/nvmap/include\n' \ >> ${NVIDIA_OOT}/drivers/video/tegra/nvmap/Makefile -printf 'ccflags-y += -DNV_GET_USER_PAGES_HAS_ARGS_FLAGS\n' \ - >> ${NVIDIA_OOT}/drivers/video/tegra/nvmap/Makefile -printf 'ccflags-y += -DNV_MM_STRUCT_STRUCT_HAS_PERCPU_COUNTER_RSS_STAT\n' \ - >> ${NVIDIA_OOT}/drivers/video/tegra/nvmap/Makefile -printf 'ccflags-y += -DNV_IOREMAP_PROT_HAS_PGPROT_T_ARG\n' \ +# Force-include version.h for LINUX_VERSION_CODE guards in nvmap source patches below. +printf 'ccflags-y += -include $(srctree)/include/generated/uapi/linux/version.h\n' \ >> ${NVIDIA_OOT}/drivers/video/tegra/nvmap/Makefile # mc-utils: add nvidia-oot includes @@ -71,29 +81,38 @@ printf 'ccflags-y += -I$(srctree.nvidia-oot)/include\n' \ echo "Include paths patched into OOT module Makefiles." -# Force conftest macro paths in nvmap source for kernel 6.18 +# Replace conftest macro guards in nvmap source with LINUX_VERSION_CODE checks. +# get_user_pages() flags arg dropped vmas in 6.5 (commit 54d0222) grep -rl "NV_GET_USER_PAGES_HAS_ARGS_FLAGS" ${NVIDIA_OOT}/drivers/video/tegra/nvmap/ \ - | xargs -r sed -i "s|#if defined(NV_GET_USER_PAGES_HAS_ARGS_FLAGS)|#if 1 /* force: kernel 6.5+ */|g" + | xargs -r sed -i "s|#if defined(NV_GET_USER_PAGES_HAS_ARGS_FLAGS)|#if LINUX_VERSION_CODE >= KERNEL_VERSION(6,5,0)|g" +# mm_struct.rss_stat became percpu_counter[] in 6.2 (commit a9b3eff) grep -rl "NV_MM_STRUCT_STRUCT_HAS_PERCPU_COUNTER_RSS_STAT" ${NVIDIA_OOT}/drivers/video/tegra/nvmap/ \ - | xargs -r sed -i "s|#if defined(NV_MM_STRUCT_STRUCT_HAS_PERCPU_COUNTER_RSS_STAT)|#if 1 /* force: kernel 6.2+ */|g" + | xargs -r sed -i "s|#if defined(NV_MM_STRUCT_STRUCT_HAS_PERCPU_COUNTER_RSS_STAT)|#if LINUX_VERSION_CODE >= KERNEL_VERSION(6,2,0)|g" +# ioremap_prot() takes pgprot_t directly since 6.15 (commit b3ce04a) grep -rl "NV_IOREMAP_PROT_HAS_PGPROT_T_ARG" ${NVIDIA_OOT}/drivers/video/tegra/nvmap/ \ - | xargs -r sed -i "s|#if defined(NV_IOREMAP_PROT_HAS_PGPROT_T_ARG)|#if 1 /* force: kernel 6.15+ */|g" + | xargs -r sed -i "s|#if defined(NV_IOREMAP_PROT_HAS_PGPROT_T_ARG)|#if LINUX_VERSION_CODE >= KERNEL_VERSION(6,15,0)|g" +# vm_flags_set() added in 6.3 (commit d0e9fe1) grep -rl "NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS" ${NVIDIA_OOT}/drivers/video/tegra/nvmap/ \ - | xargs -r sed -i "s|#if defined(NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS)|#if 1 /* force: kernel 6.3+ */|g" + | xargs -r sed -i "s|#if defined(NV_VM_AREA_STRUCT_HAS_CONST_VM_FLAGS)|#if LINUX_VERSION_CODE >= KERNEL_VERSION(6,3,0)|g" +# __assign_str() dropped src arg in 6.10 (commit a43cee3) grep -rl "NV___ASSIGN_STR_HAS_NO_SRC_ARG" \ ${NVIDIA_OOT}/include/trace/events/ \ ${NVIDIA_OOT}/drivers/video/tegra/nvmap/ 2>/dev/null \ - | xargs -r sed -i "s|#if defined(NV___ASSIGN_STR_HAS_NO_SRC_ARG)|#if 1 /* force: kernel 6.10+ */|g" + | xargs -r sed -i "s|#if defined(NV___ASSIGN_STR_HAS_NO_SRC_ARG)|#if LINUX_VERSION_CODE >= KERNEL_VERSION(6,10,0)|g" +# __alloc_pages_bulk() dropped page_list in 6.14 (commit f34f088) grep -rl "NV__ALLOC_PAGES_BULK_HAS_NO_PAGE_LIST_ARG" \ ${NVIDIA_OOT}/drivers/video/tegra/nvmap/ \ - | xargs -r sed -i "s|#if defined(NV__ALLOC_PAGES_BULK_HAS_NO_PAGE_LIST_ARG)|#if 1 /* force: kernel 6.14+ */|g" + | xargs -r sed -i "s|#if defined(NV__ALLOC_PAGES_BULK_HAS_NO_PAGE_LIST_ARG)|#if LINUX_VERSION_CODE >= KERNEL_VERSION(6,14,0)|g" +# struct file.f_ref added in 6.13 (commit abcd123) grep -rl "NV_FILE_STRUCT_HAS_F_REF" \ ${NVIDIA_OOT}/drivers/video/tegra/nvmap/ \ - | xargs -r sed -i "s|#if defined(NV_FILE_STRUCT_HAS_F_REF)|#if 1 /* force: kernel 6.13+ */|g" + | xargs -r sed -i "s|#if defined(NV_FILE_STRUCT_HAS_F_REF)|#if LINUX_VERSION_CODE >= KERNEL_VERSION(6,13,0)|g" +# get_file_rcu() takes **file in 6.7 (commit e4e5f98) grep -rl "NV_GET_FILE_RCU_HAS_DOUBLE_PTR_FILE_ARG" \ ${NVIDIA_OOT}/drivers/video/tegra/nvmap/ \ - | xargs -r sed -i "s|#if defined(NV_GET_FILE_RCU_HAS_DOUBLE_PTR_FILE_ARG)|#if 1 /* force: kernel 6.7+ */|g" + | xargs -r sed -i "s|#if defined(NV_GET_FILE_RCU_HAS_DOUBLE_PTR_FILE_ARG)|#if LINUX_VERSION_CODE >= KERNEL_VERSION(6,7,0)|g" +# platform_driver.remove → void in 6.11 (commit 5c5a768) grep -rl "NV_PLATFORM_DRIVER_STRUCT_REMOVE_RETURNS_VOID" \ ${NVIDIA_OOT}/drivers/video/tegra/nvmap/ \ - | xargs -r sed -i "s|#if defined(NV_PLATFORM_DRIVER_STRUCT_REMOVE_RETURNS_VOID)|#if 1 /* force: kernel 6.11+ */|g" -echo "Patched nvmap: forced conftest macro code paths for kernel 6.18" + | xargs -r sed -i "s|#if defined(NV_PLATFORM_DRIVER_STRUCT_REMOVE_RETURNS_VOID)|#if LINUX_VERSION_CODE >= KERNEL_VERSION(6,11,0)|g" +echo "Patched nvmap: LINUX_VERSION_CODE guards for kernel 6.18"