From 788453c165cb98544abaf1509323ba7bded5c308 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sat, 27 May 2017 07:22:12 -0400 Subject: [PATCH 001/109] make DEFAULT_MMAP_MIN_ADDR match LSM_MMAP_MIN_ADDR Signed-off-by: Daniel Micay --- mm/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/Kconfig b/mm/Kconfig index befa8909ae29..c6ebe33570f8 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -704,7 +704,8 @@ config KSM config DEFAULT_MMAP_MIN_ADDR int "Low address space to protect from user allocation" depends on MMU - default 4096 + default 32768 if ARM || (ARM64 && COMPAT) + default 65536 help This is the portion of low virtual memory which should be protected from userspace allocation. Keeping a user from writing to low pages From 4dc311cd5ad8bfee180529c5c9c7334105dfda13 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Mon, 29 May 2017 06:17:41 -0400 Subject: [PATCH 002/109] enable HARDENED_USERCOPY by default Signed-off-by: Daniel Micay --- security/Kconfig.hardening | 1 + 1 file changed, 1 insertion(+) diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening index 86f8768c63d4..26ad690e0c80 100644 --- a/security/Kconfig.hardening +++ b/security/Kconfig.hardening @@ -225,6 +225,7 @@ config FORTIFY_SOURCE config HARDENED_USERCOPY bool "Harden memory copies between kernel and userspace" imply STRICT_DEVMEM + default y help This option checks for obviously wrong memory regions when copying memory to/from the kernel (via copy_to_user() and From 872496596116423c9c81f66ea7c407c15161fcc4 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Wed, 3 May 2017 12:05:15 -0400 Subject: [PATCH 003/109] enable SECURITY_DMESG_RESTRICT by default Signed-off-by: Daniel Micay --- security/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/Kconfig b/security/Kconfig index 6a4393fce9a1..40f773bb33d8 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -9,7 +9,7 @@ source "security/keys/Kconfig" config SECURITY_DMESG_RESTRICT bool "Restrict unprivileged access to the kernel syslog" - default n + default y help This enforces restrictions on unprivileged users reading the kernel syslog via dmesg(8). From 7d2f35a62d2aa06c77b4b2d7dacde25fc51abc19 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Wed, 3 May 2017 12:06:14 -0400 Subject: [PATCH 004/109] set kptr_restrict=2 by default Signed-off-by: Daniel Micay --- lib/vsprintf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 800b8ac49f53..e56d26c9f3ab 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -856,7 +856,7 @@ static char *default_pointer(char *buf, char *end, const void *ptr, return ptr_to_id(buf, end, ptr, spec); } -int kptr_restrict __read_mostly; +int kptr_restrict __read_mostly = 2; static noinline_for_stack char *restricted_pointer(char *buf, char *end, const void *ptr, From 7d6f131073b6e4b80a679a155a18a06819230a6e Mon Sep 17 00:00:00 2001 From: Levente Polyak Date: Tue, 19 Sep 2023 00:57:29 +0200 Subject: [PATCH 005/109] enable LIST_HARDENED by default Signed-off-by: Levente Polyak --- security/Kconfig.hardening | 1 + 1 file changed, 1 insertion(+) diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening index 26ad690e0c80..ed25c8ea8381 100644 --- a/security/Kconfig.hardening +++ b/security/Kconfig.hardening @@ -249,6 +249,7 @@ menu "Hardening of kernel data structures" config LIST_HARDENED bool "Check integrity of linked list manipulation" + default y help Minimal integrity checking in the linked-list manipulation routines to catch memory corruptions that are not guaranteed to result in an From d25caf67b7e4d9daa9824d2f694780919a38cb51 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Mon, 29 May 2017 12:21:21 -0400 Subject: [PATCH 006/109] enable BUG_ON_DATA_CORRUPTION by default Signed-off-by: Daniel Micay --- security/Kconfig.hardening | 1 + 1 file changed, 1 insertion(+) diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening index ed25c8ea8381..11bd5d6c5f7f 100644 --- a/security/Kconfig.hardening +++ b/security/Kconfig.hardening @@ -270,6 +270,7 @@ config RUST_BITMAP_HARDENED config BUG_ON_DATA_CORRUPTION bool "Trigger a BUG when data corruption is detected" select LIST_HARDENED + default y help Select this option if the kernel should BUG when it encounters data corruption in kernel memory structures when they get checked From 2d8ea1b068dc8760eeb7559eaf766db4baf760d7 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sun, 25 Feb 2018 01:39:32 -0500 Subject: [PATCH 007/109] enable ARM64_SW_TTBR0_PAN by default --- arch/arm64/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 9ea19b74b6c3..4330577609cd 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1702,6 +1702,7 @@ config MITIGATE_SPECTRE_BRANCH_HISTORY config ARM64_SW_TTBR0_PAN bool "Emulate Privileged Access Never using TTBR0_EL1 switching" depends on !KCSAN + default y help Enabling this option prevents the kernel from accessing user-space memory directly by pointing TTBR0_EL1 to a reserved From f2c6e50cbdc7908aab463a60d62092009f0b7d63 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sun, 25 Feb 2018 01:33:48 -0500 Subject: [PATCH 008/109] arm64: enable RANDOMIZE_BASE by default --- arch/arm64/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 4330577609cd..c33af2cb319c 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -2300,6 +2300,7 @@ config RELOCATABLE config RANDOMIZE_BASE bool "Randomize the address of the kernel image" select RELOCATABLE + default y help Randomizes the virtual address at which the kernel image is loaded, as a security feature that deters exploit attempts From f5a02017c075b61bf35bb08cf1e4b2552185a780 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Wed, 3 May 2017 19:43:38 -0400 Subject: [PATCH 009/109] enable SLAB_FREELIST_RANDOM by default Signed-off-by: Daniel Micay --- mm/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/Kconfig b/mm/Kconfig index c6ebe33570f8..113669fd6168 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -203,6 +203,7 @@ config SLAB_MERGE_DEFAULT config SLAB_FREELIST_RANDOM bool "Randomize slab freelist" depends on !SLUB_TINY + default y help Randomizes the freelist order used on creating new pages. This security feature reduces the predictability of the kernel slab From b6ddefbebaa200fa7c4a6574c42f97fc49f12746 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sun, 20 Aug 2017 15:39:25 -0400 Subject: [PATCH 010/109] enable SLAB_FREELIST_HARDENED by default --- mm/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/Kconfig b/mm/Kconfig index 113669fd6168..e4d6b64c3a80 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -212,6 +212,7 @@ config SLAB_FREELIST_RANDOM config SLAB_FREELIST_HARDENED bool "Harden slab freelist metadata" depends on !SLUB_TINY + default y help Many kernel heap attacks try to target slab cache metadata and other infrastructure. This options makes minor performance From d90a23236d207f7bb315b4f260580bd8c19eaa7c Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sat, 8 Jul 2017 02:38:54 -0400 Subject: [PATCH 011/109] disable SLAB_MERGE_DEFAULT by default --- mm/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/mm/Kconfig b/mm/Kconfig index e4d6b64c3a80..b3209fd701b8 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -188,7 +188,6 @@ config SLUB_TINY config SLAB_MERGE_DEFAULT bool "Allow slab caches to be merged" - default y help For reduced kernel memory fragmentation, slab caches can be merged when they share the same size and other characteristics. From 9ceb57889352a941d7a2121439557917a524447d Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Mon, 8 May 2017 12:51:54 -0400 Subject: [PATCH 012/109] enable FORTIFY_SOURCE by default Signed-off-by: Daniel Micay --- security/Kconfig.hardening | 1 + 1 file changed, 1 insertion(+) diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening index 11bd5d6c5f7f..e42e319485ae 100644 --- a/security/Kconfig.hardening +++ b/security/Kconfig.hardening @@ -218,6 +218,7 @@ config FORTIFY_SOURCE depends on ARCH_HAS_FORTIFY_SOURCE # https://github.com/llvm/llvm-project/issues/53645 depends on !X86_32 || !CC_IS_CLANG || CLANG_VERSION >= 160000 + default y help Detect overflows of buffers in common string and memory functions where the compiler can determine and validate the buffer sizes. From 07eed438589f56ad06270a36c74ef3913425046a Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Wed, 3 May 2017 12:09:17 -0400 Subject: [PATCH 013/109] enable PANIC_ON_OOPS by default Signed-off-by: Daniel Micay --- lib/Kconfig.debug | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 93f356d2b3d9..be06909c5f31 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1095,6 +1095,7 @@ menu "Debug Oops, Lockups and Hangs" config PANIC_ON_OOPS bool "Panic on Oops" + default y help Say Y here to enable the kernel to panic when it oopses. This has the same effect as setting oops=panic on the kernel command @@ -1104,7 +1105,7 @@ config PANIC_ON_OOPS anything erroneous after an oops which could result in data corruption or other issues. - Say N if unsure. + Say Y if unsure. config PANIC_TIMEOUT int "panic timeout" From 45ee2ce829aff3a37976ea6df3fe6fb50b39047b Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sun, 14 May 2017 22:39:34 -0400 Subject: [PATCH 014/109] stop hiding SLUB_DEBUG behind EXPERT It can make sense to disable this to reduce attack surface / complexity. --- mm/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug index 7638d75b27db..8e1bdc87e9a1 100644 --- a/mm/Kconfig.debug +++ b/mm/Kconfig.debug @@ -47,7 +47,7 @@ config DEBUG_PAGEALLOC_ENABLE_DEFAULT config SLUB_DEBUG default y - bool "Enable SLUB debugging support" if EXPERT + bool "Enable SLUB debugging support" depends on SYSFS && !SLUB_TINY select STACKDEPOT if STACKTRACE_SUPPORT help From 4de6c58a393810cdbb559a0acc72fc7227a3b2fa Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Thu, 4 May 2017 18:11:31 -0400 Subject: [PATCH 015/109] stop hiding X86_16BIT behind EXPERT --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index fe106da41ae5..ee65871cdbd4 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1242,7 +1242,7 @@ config VM86 default X86_LEGACY_VM86 config X86_16BIT - bool "Enable support for 16-bit segments" if EXPERT + bool "Enable support for 16-bit segments" default y depends on MODIFY_LDT_SYSCALL help From ce6f6568b5bc0881f7ec3339e64313bc2cb17df3 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Thu, 4 May 2017 18:11:52 -0400 Subject: [PATCH 016/109] disable X86_16BIT by default Signed-off-by: Daniel Micay --- arch/x86/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ee65871cdbd4..b7f5c1b617e2 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1243,7 +1243,6 @@ config VM86 config X86_16BIT bool "Enable support for 16-bit segments" - default y depends on MODIFY_LDT_SYSCALL help This option is required by programs like Wine to run 16-bit From 5ebe372f5f690df0f287fd4f064dc84f28b34a84 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Thu, 4 May 2017 18:15:52 -0400 Subject: [PATCH 017/109] stop hiding MODIFY_LDT_SYSCALL behind EXPERT --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b7f5c1b617e2..0b0638154acd 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2318,7 +2318,7 @@ config CMDLINE_OVERRIDE be set to 'N' under normal conditions. config MODIFY_LDT_SYSCALL - bool "Enable the LDT (local descriptor table)" if EXPERT + bool "Enable the LDT (local descriptor table)" default y help Linux can allow user programs to install a per-process x86 From b65cb1e06cd84ed7bfb4037fd39ccecbc6eb7665 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Thu, 4 May 2017 18:16:16 -0400 Subject: [PATCH 018/109] disable MODIFY_LDT_SYSCALL by default Signed-off-by: Daniel Micay --- arch/x86/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0b0638154acd..8573ad7eb7e1 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2319,7 +2319,6 @@ config CMDLINE_OVERRIDE config MODIFY_LDT_SYSCALL bool "Enable the LDT (local descriptor table)" - default y help Linux can allow user programs to install a per-process x86 Local Descriptor Table (LDT) using the modify_ldt(2) system From bfd873097c16ed9dc325f31eea2bc06d856e0d17 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Mon, 29 May 2017 07:08:42 -0400 Subject: [PATCH 019/109] set LEGACY_VSYSCALL_NONE by default --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 8573ad7eb7e1..b4cdce964a97 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2234,7 +2234,7 @@ config COMPAT_VDSO choice prompt "vsyscall table for legacy applications" depends on X86_64 - default LEGACY_VSYSCALL_XONLY + default LEGACY_VSYSCALL_NONE help Legacy user code that does not know how to find the vDSO expects to be able to issue three syscalls by calling fixed addresses in From e033d115c162c7c1523b26278e31d9ba8f5ac62f Mon Sep 17 00:00:00 2001 From: Bernhard40 <32568352+Bernhard40@users.noreply.github.com> Date: Fri, 6 Oct 2017 10:21:50 +0000 Subject: [PATCH 020/109] stop hiding AIO behind EXPERT --- init/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/init/Kconfig b/init/Kconfig index 7484cd703bc1..4b3ad67bc3cc 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1868,7 +1868,7 @@ config SHMEM which may be appropriate on small systems without swap. config AIO - bool "Enable AIO support" if EXPERT + bool "Enable AIO support" default y help This option enables POSIX asynchronous I/O which may by used From f718aa5822ac6ff980239a85612cb6586ef8ad6f Mon Sep 17 00:00:00 2001 From: Bernhard40 <32568352+Bernhard40@users.noreply.github.com> Date: Fri, 6 Oct 2017 10:24:10 +0000 Subject: [PATCH 021/109] disable AIO by default --- init/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/init/Kconfig b/init/Kconfig index 4b3ad67bc3cc..e45da3b3a6a9 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1869,7 +1869,6 @@ config SHMEM config AIO bool "Enable AIO support" - default y help This option enables POSIX asynchronous I/O which may by used by some high performance threaded applications. Disabling From 9540fb357780463b53585ea6838c74d097778a8a Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sun, 25 Feb 2018 02:08:49 -0500 Subject: [PATCH 022/109] remove SYSVIPC from arm64/x86_64 defconfigs --- arch/arm64/configs/defconfig | 1 - arch/x86/configs/x86_64_defconfig | 1 - 2 files changed, 2 deletions(-) diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index b67d5b1fc45b..54c8002590fb 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -1,4 +1,3 @@ -CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_AUDIT=y CONFIG_NO_HZ_IDLE=y diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 7d7310cdf8b0..21ebfefe9c90 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -1,5 +1,4 @@ CONFIG_WERROR=y -CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_AUDIT=y CONFIG_NO_HZ=y From 852795dc069a70b7cdd3a292a5a9f9dcbea0e756 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sat, 27 May 2017 07:28:10 -0400 Subject: [PATCH 023/109] disable DEVPORT by default --- drivers/char/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 2a3a37b2cf3c..9bcfc2055f1b 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -317,7 +317,6 @@ config NVRAM config DEVPORT bool "/dev/port character device" depends on HAS_IOPORT - default y help Say Y here if you want to support the /dev/port device. The /dev/port device is similar to /dev/mem, but for I/O ports. From cb4228430ab03d39f68bc8484687168bf755e239 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sat, 27 May 2017 07:29:45 -0400 Subject: [PATCH 024/109] disable PROC_VMCORE by default --- fs/proc/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig index 6ae966c561e7..27d78d669f95 100644 --- a/fs/proc/Kconfig +++ b/fs/proc/Kconfig @@ -41,7 +41,6 @@ config PROC_KCORE config PROC_VMCORE bool "/proc/vmcore support" depends on PROC_FS && CRASH_DUMP - default y help Exports the dump image of crashed kernel in ELF format. From d96ab821f035d002373a8c5bd922c72fc46c6a08 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sun, 28 May 2017 03:03:46 -0400 Subject: [PATCH 025/109] disable NFS_DEBUG by default --- fs/nfs/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 6bb30543eff0..c6951309ff24 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -198,7 +198,6 @@ config NFS_USE_KERNEL_DNS config NFS_DEBUG bool depends on NFS_FS && SUNRPC_DEBUG - default y config NFS_DISABLE_UDP_SUPPORT bool "NFS: Disable NFS UDP protocol support" From a8ecd142fb3c1ced941140d0d3a341832a7ca725 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Mon, 29 May 2017 12:11:11 -0400 Subject: [PATCH 026/109] enable DEBUG_WX by default Signed-off-by: Daniel Micay --- mm/Kconfig.debug | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug index 8e1bdc87e9a1..08ae2acd7b1b 100644 --- a/mm/Kconfig.debug +++ b/mm/Kconfig.debug @@ -189,6 +189,7 @@ config DEBUG_WX depends on ARCH_HAS_PTDUMP depends on MMU select PTDUMP + default y help Generate a warning if any W+X mappings are found at boot. From 9136c488fc1ab6f34e7fbc79ad24c3ac6a2cf4d5 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Fri, 5 Jan 2018 13:21:16 -0500 Subject: [PATCH 027/109] disable LEGACY_PTYS by default --- drivers/tty/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/tty/Kconfig b/drivers/tty/Kconfig index 149f3d53b760..3c51e08c1d8c 100644 --- a/drivers/tty/Kconfig +++ b/drivers/tty/Kconfig @@ -116,7 +116,6 @@ config UNIX98_PTYS config LEGACY_PTYS bool "Legacy (BSD) PTY support" - default y help A pseudo terminal (PTY) is a software device consisting of two halves: a master and a slave. The slave device behaves identical to From 243cb126fa0fd7e86fd779852fb4a076f30e2db4 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Fri, 5 Jan 2018 12:41:42 -0500 Subject: [PATCH 028/109] disable DEVMEM by default --- drivers/char/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 9bcfc2055f1b..8d669c63ad7c 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -284,7 +284,6 @@ config NSC_GPIO config DEVMEM bool "/dev/mem virtual device support" - default y help Say Y here if you want to support the /dev/mem device. The /dev/mem device is used to access areas of physical From fc3e6e0993c503b690059bb0b32f0511fb82a6cd Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Fri, 5 Jan 2018 12:43:49 -0500 Subject: [PATCH 029/109] enable IO_STRICT_DEVMEM by default --- lib/Kconfig.debug | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index be06909c5f31..a5759774550d 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1975,6 +1975,7 @@ config STRICT_DEVMEM config IO_STRICT_DEVMEM bool "Filter I/O access to /dev/mem" depends on STRICT_DEVMEM + default y help If this option is disabled, you allow userspace (root) access to all io-memory regardless of whether a driver is actively using that From 9e24bb958cd1e2d6fcc05a7a78001f32853dfc71 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sun, 7 May 2017 18:28:33 -0400 Subject: [PATCH 030/109] disable COMPAT_BRK by default --- mm/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/mm/Kconfig b/mm/Kconfig index b3209fd701b8..77c40dc2ad83 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -292,7 +292,6 @@ config SHUFFLE_PAGE_ALLOCATOR config COMPAT_BRK bool "Disable heap randomization" - default y help Randomizing heap placement makes heap exploits harder, but it also breaks ancient binaries (including anything libc5 based). From 62b2e67f75cc198befc5faff52adea16202f4a22 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sun, 7 May 2017 16:16:39 -0400 Subject: [PATCH 031/109] use maximum supported mmap rnd entropy by default Signed-off-by: Daniel Micay --- arch/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index b5d41bb40672..6bf632241af9 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1216,7 +1216,7 @@ config ARCH_MMAP_RND_BITS int "Number of bits to use for ASLR of mmap base address" if EXPERT range ARCH_MMAP_RND_BITS_MIN ARCH_MMAP_RND_BITS_MAX default ARCH_MMAP_RND_BITS_DEFAULT if ARCH_MMAP_RND_BITS_DEFAULT - default ARCH_MMAP_RND_BITS_MIN + default ARCH_MMAP_RND_BITS_MAX depends on HAVE_ARCH_MMAP_RND_BITS help This value can be used to select the number of bits to use to @@ -1250,7 +1250,7 @@ config ARCH_MMAP_RND_COMPAT_BITS int "Number of bits to use for ASLR of mmap base address for compatible applications" if EXPERT range ARCH_MMAP_RND_COMPAT_BITS_MIN ARCH_MMAP_RND_COMPAT_BITS_MAX default ARCH_MMAP_RND_COMPAT_BITS_DEFAULT if ARCH_MMAP_RND_COMPAT_BITS_DEFAULT - default ARCH_MMAP_RND_COMPAT_BITS_MIN + default ARCH_MMAP_RND_COMPAT_BITS_MAX depends on HAVE_ARCH_MMAP_RND_COMPAT_BITS help This value can be used to select the number of bits to use to From aa2f3539226e3f499f628fb7b5e2fbba6377bc59 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Tue, 30 May 2017 10:47:23 -0400 Subject: [PATCH 032/109] enable protected_{symlinks,hardlinks} by default --- fs/namei.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 9e5500dad14f..e3429055d39b 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1196,8 +1196,8 @@ static inline void put_link(struct nameidata *nd) path_put(&last->link); } -static int sysctl_protected_symlinks __read_mostly; -static int sysctl_protected_hardlinks __read_mostly; +static int sysctl_protected_symlinks __read_mostly = 1; +static int sysctl_protected_hardlinks __read_mostly = 1; static int sysctl_protected_fifos __read_mostly; static int sysctl_protected_regular __read_mostly; From 8b5a78e2a84e68428042e04b6870b329db059cb2 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sun, 25 Feb 2018 02:13:48 -0500 Subject: [PATCH 033/109] enable SECURITY by default --- security/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/security/Kconfig b/security/Kconfig index 40f773bb33d8..ef22ceb46b57 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -76,6 +76,7 @@ config SECURITY bool "Enable different security models" depends on SYSFS depends on MULTIUSER + default y help This allows you to choose different security modules to be configured into your kernel. From 961e6eaca7bf4cd8c9f9a68c3e8ba42131b2152f Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Mon, 29 May 2017 06:17:59 -0400 Subject: [PATCH 034/109] enable SECURITY_YAMA by default --- security/yama/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/yama/Kconfig b/security/yama/Kconfig index a810304123ca..b809050b25d2 100644 --- a/security/yama/Kconfig +++ b/security/yama/Kconfig @@ -2,7 +2,7 @@ config SECURITY_YAMA bool "Yama support" depends on SECURITY - default n + default y help This selects Yama, which extends DAC support with additional system-wide security settings beyond regular Linux discretionary From ca39f71418e339fc603cf48fafee1330cfd214f6 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sun, 25 Feb 2018 02:14:02 -0500 Subject: [PATCH 035/109] enable SECURITY_NETWORK by default --- security/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/security/Kconfig b/security/Kconfig index ef22ceb46b57..c20928e74619 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -102,6 +102,7 @@ config SECURITYFS config SECURITY_NETWORK bool "Socket and Networking Security Hooks" depends on SECURITY + default y help This enables the socket and networking security hooks. If enabled, a security module can use these hooks to From 3e539280cababcbe1182a4cded810befdfd86c3a Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sun, 25 Feb 2018 02:15:24 -0500 Subject: [PATCH 036/109] enable AUDIT by default --- init/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/init/Kconfig b/init/Kconfig index e45da3b3a6a9..470609276d59 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -527,6 +527,7 @@ config CROSS_MEMORY_ATTACH config AUDIT bool "Auditing support" depends on NET + default y help Enable auditing infrastructure that can be used with another kernel subsystem, such as SELinux (which requires this for From 802fd504d44c5abc68aff90d4b5ffa911fbfa6e3 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sun, 25 Feb 2018 02:16:49 -0500 Subject: [PATCH 037/109] enable SECURITY_SELINUX by default --- security/selinux/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/selinux/Kconfig b/security/selinux/Kconfig index 5588c4d573f6..567a33eae460 100644 --- a/security/selinux/Kconfig +++ b/security/selinux/Kconfig @@ -3,7 +3,7 @@ config SECURITY_SELINUX bool "SELinux Support" depends on SECURITY_NETWORK && AUDIT && NET && INET select NETWORK_SECMARK - default n + default y help This selects Security-Enhanced Linux (SELinux). You will also need a policy configuration and a labeled filesystem. From 87b1bced8948828ce9e511ec9996fefb95d8114b Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sat, 6 Jan 2018 13:41:11 -0500 Subject: [PATCH 038/109] enable SYN_COOKIES by default --- net/ipv4/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index df922f9f5289..5ef3ea768d9f 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -267,6 +267,7 @@ config IP_PIMSM_V2 config SYN_COOKIES bool "IP: TCP syncookie support" + default y help Normal TCP/IP networking is open to an attack known as "SYN flooding". This denial-of-service attack prevents legitimate remote From 3aeeb846fd173ed50e9a76f4582b47ff9c870506 Mon Sep 17 00:00:00 2001 From: Levente Polyak Date: Thu, 19 Sep 2019 19:02:23 +0200 Subject: [PATCH 039/109] enable INIT_ON_ALLOC_DEFAULT_ON by default --- security/Kconfig.hardening | 1 + 1 file changed, 1 insertion(+) diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening index e42e319485ae..226193bf5ebc 100644 --- a/security/Kconfig.hardening +++ b/security/Kconfig.hardening @@ -158,6 +158,7 @@ config KSTACK_ERASE_RUNTIME_DISABLE config INIT_ON_ALLOC_DEFAULT_ON bool "Enable heap memory zeroing on allocation by default" + default yes depends on !KMSAN help This has the effect of setting "init_on_alloc=1" on the kernel From f9a342fe4bdf538916acc8c453ece97a2b0c1bbe Mon Sep 17 00:00:00 2001 From: Levente Polyak Date: Thu, 19 Sep 2019 19:03:01 +0200 Subject: [PATCH 040/109] enable INIT_ON_FREE_DEFAULT_ON by default --- security/Kconfig.hardening | 1 + 1 file changed, 1 insertion(+) diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening index 226193bf5ebc..07f9286f1443 100644 --- a/security/Kconfig.hardening +++ b/security/Kconfig.hardening @@ -172,6 +172,7 @@ config INIT_ON_ALLOC_DEFAULT_ON config INIT_ON_FREE_DEFAULT_ON bool "Enable heap memory zeroing on free by default" + default yes depends on !KMSAN help This has the effect of setting "init_on_free=1" on the kernel From 2b8f8a43d3c219086a303265a8ab28eba55982a3 Mon Sep 17 00:00:00 2001 From: Levente Polyak Date: Sun, 27 Sep 2020 00:43:48 +0200 Subject: [PATCH 041/109] kconfig: select DEBUG_FS_ALLOW_NONE by default if DEBUG_FS is enabled Signed-off-by: Levente Polyak --- lib/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index a5759774550d..8d90402b0444 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -720,7 +720,7 @@ config DEBUG_FS choice prompt "Debugfs default access" depends on DEBUG_FS - default DEBUG_FS_ALLOW_ALL + default DEBUG_FS_ALLOW_NONE help This selects the default access restrictions for debugfs. It can be overridden with kernel command line option From d728b03bb0576d49e513ceea41986f0087323ab6 Mon Sep 17 00:00:00 2001 From: Levente Polyak Date: Tue, 22 Dec 2020 23:40:09 +0100 Subject: [PATCH 042/109] stop hiding UID16 behind EXPERT --- init/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/init/Kconfig b/init/Kconfig index 470609276d59..e448f65e4404 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1681,7 +1681,7 @@ menuconfig EXPERT Only use this if you really know what you are doing. config UID16 - bool "Enable 16-bit UID system calls" if EXPERT + bool "Enable 16-bit UID system calls" depends on HAVE_UID16 && MULTIUSER default y help From 188f00267b0f26e1e03662bc8b983a45da61534b Mon Sep 17 00:00:00 2001 From: Levente Polyak Date: Tue, 22 Dec 2020 23:41:32 +0100 Subject: [PATCH 043/109] disable UID16 by default --- init/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/init/Kconfig b/init/Kconfig index e448f65e4404..78479ebfcea2 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1683,7 +1683,6 @@ menuconfig EXPERT config UID16 bool "Enable 16-bit UID system calls" depends on HAVE_UID16 && MULTIUSER - default y help This enables the legacy 16-bit UID syscall wrappers. From 756797e841fabe10eec771b71387e9e962772854 Mon Sep 17 00:00:00 2001 From: Levente Polyak Date: Wed, 25 Aug 2021 22:24:10 +0200 Subject: [PATCH 044/109] kconfig: enable RANDOMIZE_KSTACK_OFFSET_DEFAULT by default --- arch/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/Kconfig b/arch/Kconfig index 6bf632241af9..7242e5a04be4 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1553,6 +1553,7 @@ config RANDOMIZE_KSTACK_OFFSET config RANDOMIZE_KSTACK_OFFSET_DEFAULT bool "Default state of kernel stack offset randomization" depends on RANDOMIZE_KSTACK_OFFSET + default y help Kernel stack offset randomization is controlled by kernel boot param "randomize_kstack_offset=on/off", and this config chooses the default From ff9efa1c3b8856db97338a18eeb08397ffbe83a4 Mon Sep 17 00:00:00 2001 From: Levente Polyak Date: Mon, 1 May 2023 23:20:43 +0200 Subject: [PATCH 045/109] kconfig: disable LEGACY_TIOCSTI by default --- drivers/tty/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/tty/Kconfig b/drivers/tty/Kconfig index 3c51e08c1d8c..a3e62d52ebcf 100644 --- a/drivers/tty/Kconfig +++ b/drivers/tty/Kconfig @@ -145,7 +145,6 @@ config LEGACY_PTY_COUNT config LEGACY_TIOCSTI bool "Allow legacy TIOCSTI usage" - default y help Historically the kernel has allowed TIOCSTI, which will push characters into a controlling TTY. This continues to be used From c532a8c49b092f5e2ac4312ee5af0747e5e5c04e Mon Sep 17 00:00:00 2001 From: Levente Polyak Date: Tue, 12 Dec 2023 18:35:05 +0100 Subject: [PATCH 046/109] mm/slab: enable RANDOM_KMALLOC_CACHES by default --- mm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/Kconfig b/mm/Kconfig index 77c40dc2ad83..9bcf4da8865a 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -249,7 +249,7 @@ config SLUB_STATS Try running: slabinfo -DA config RANDOM_KMALLOC_CACHES - default n + default y depends on !SLUB_TINY bool "Randomize slab caches for normal kmalloc" help From 7765d65fb22110c2a75b320adf94a41218e22fa0 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sun, 7 May 2017 00:28:23 -0400 Subject: [PATCH 047/109] add __read_only for non-init related usage --- include/linux/cache.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/cache.h b/include/linux/cache.h index e69768f50d53..432c30a1fc7e 100644 --- a/include/linux/cache.h +++ b/include/linux/cache.h @@ -60,6 +60,8 @@ #define __ro_after_init __section(".data..ro_after_init") #endif +#define __read_only __ro_after_init + #ifndef ____cacheline_aligned_in_smp #ifdef CONFIG_SMP #define ____cacheline_aligned_in_smp ____cacheline_aligned From bc84d41514713a15c84893f0943ebc32a9be382f Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Fri, 12 May 2017 03:22:00 -0400 Subject: [PATCH 048/109] mark kernel_set_to_readonly as __ro_after_init This change was extracted from PaX where it's part of KERNEXEC. Signed-off-by: Daniel Micay --- arch/x86/mm/init_32.c | 5 ++--- arch/x86/mm/init_64.c | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 0908c44d51e6..2734208acddc 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -718,7 +718,7 @@ void __init mem_init(void) test_wp_bit(); } -int kernel_set_to_readonly __read_mostly; +int kernel_set_to_readonly __ro_after_init; static void mark_nxdata_nx(void) { @@ -742,12 +742,11 @@ void mark_rodata_ro(void) unsigned long start = PFN_ALIGN(_text); unsigned long size = (unsigned long)__end_rodata - start; + kernel_set_to_readonly = 1; set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); pr_info("Write protecting kernel text and read-only data: %luk\n", size >> 10); - kernel_set_to_readonly = 1; - #ifdef CONFIG_CPA_DEBUG pr_info("Testing CPA: Reverting %lx-%lx\n", start, start + size); set_pages_rw(virt_to_page(start), size >> PAGE_SHIFT); diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index df2261fa4f98..c18790a324a0 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1387,7 +1387,7 @@ void __init mem_init(void) preallocate_vmalloc_pages(); } -int kernel_set_to_readonly; +int kernel_set_to_readonly __ro_after_init; void mark_rodata_ro(void) { @@ -1400,9 +1400,8 @@ void mark_rodata_ro(void) printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", (end - start) >> 10); - set_memory_ro(start, (end - start) >> PAGE_SHIFT); - kernel_set_to_readonly = 1; + set_memory_ro(start, (end - start) >> PAGE_SHIFT); /* * The rodata/data/bss/brk section (but not the kernel text!) From fc9b3fab087fb010927fe0a2a7b074eada1c0abc Mon Sep 17 00:00:00 2001 From: Levente Polyak Date: Sun, 13 Jan 2019 21:42:45 +0100 Subject: [PATCH 049/109] Revert "mark kernel_set_to_readonly as __ro_after_init" This commit causes CPA conflicts, cf. https://github.com/anthraxx/linux-hardened/issues/4. Signed-off-by: Thibaut Sautereau --- arch/x86/mm/init_32.c | 5 +++-- arch/x86/mm/init_64.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 2734208acddc..0908c44d51e6 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -718,7 +718,7 @@ void __init mem_init(void) test_wp_bit(); } -int kernel_set_to_readonly __ro_after_init; +int kernel_set_to_readonly __read_mostly; static void mark_nxdata_nx(void) { @@ -742,11 +742,12 @@ void mark_rodata_ro(void) unsigned long start = PFN_ALIGN(_text); unsigned long size = (unsigned long)__end_rodata - start; - kernel_set_to_readonly = 1; set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); pr_info("Write protecting kernel text and read-only data: %luk\n", size >> 10); + kernel_set_to_readonly = 1; + #ifdef CONFIG_CPA_DEBUG pr_info("Testing CPA: Reverting %lx-%lx\n", start, start + size); set_pages_rw(virt_to_page(start), size >> PAGE_SHIFT); diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index c18790a324a0..df2261fa4f98 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1387,7 +1387,7 @@ void __init mem_init(void) preallocate_vmalloc_pages(); } -int kernel_set_to_readonly __ro_after_init; +int kernel_set_to_readonly; void mark_rodata_ro(void) { @@ -1400,9 +1400,10 @@ void mark_rodata_ro(void) printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", (end - start) >> 10); - kernel_set_to_readonly = 1; set_memory_ro(start, (end - start) >> PAGE_SHIFT); + kernel_set_to_readonly = 1; + /* * The rodata/data/bss/brk section (but not the kernel text!) * should also be not-executable. From d66f52c56568e4cd36672f9008e2b9b5e1c82a55 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sun, 14 May 2017 19:01:58 -0400 Subject: [PATCH 050/109] mark slub runtime configuration as __ro_after_init Signed-off-by: Daniel Micay --- mm/slub.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index e423afa27d1a..fb52dd654dfe 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -972,13 +972,13 @@ static inline void *restore_red_left(struct kmem_cache *s, void *p) * Debug settings: */ #if defined(CONFIG_SLUB_DEBUG_ON) -static slab_flags_t slub_debug = DEBUG_DEFAULT_FLAGS; +static slab_flags_t slub_debug __ro_after_init = DEBUG_DEFAULT_FLAGS; #else -static slab_flags_t slub_debug; +static slab_flags_t slub_debug __ro_after_init; #endif static const char *slub_debug_string __ro_after_init; -static int disable_higher_order_debug; +static int disable_higher_order_debug __ro_after_init; /* * Object debugging @@ -7317,10 +7317,10 @@ EXPORT_SYMBOL(kmem_cache_alloc_bulk_noprof); * and increases the number of allocations possible without having to * take the list_lock. */ -static unsigned int slub_min_order; -static unsigned int slub_max_order = +static unsigned int slub_min_order __ro_after_init; +static unsigned int slub_max_order __ro_after_init = IS_ENABLED(CONFIG_SLUB_TINY) ? 1 : PAGE_ALLOC_COSTLY_ORDER; -static unsigned int slub_min_objects; +static unsigned int slub_min_objects __ro_after_init; /* * Calculate the order of allocation given an slab object size. From 26f9b255a974f89301af87b9f7496b2700395c82 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Wed, 3 May 2017 11:35:35 -0400 Subject: [PATCH 051/109] add __ro_after_init to slab_nomerge and slab_state This was extracted from the PaX patch where it's part of the KERNEXEC feature as __read_only. Signed-off-by: Daniel Micay --- mm/slab_common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/slab_common.c b/mm/slab_common.c index d5a70a831a2a..36c45acbbbe6 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -37,7 +37,7 @@ #define CREATE_TRACE_POINTS #include -enum slab_state slab_state; +enum slab_state slab_state __ro_after_init; LIST_HEAD(slab_caches); DEFINE_MUTEX(slab_mutex); struct kmem_cache *kmem_cache; @@ -57,7 +57,7 @@ struct kmem_cache *kmem_cache; /* * Merge control. If this is set then no merging of slab caches will occur. */ -static bool slab_nomerge = !IS_ENABLED(CONFIG_SLAB_MERGE_DEFAULT); +static bool slab_nomerge __ro_after_init = !IS_ENABLED(CONFIG_SLAB_MERGE_DEFAULT); static int __init setup_slab_nomerge(char *str) { From b83f69b2f635ad76654ca2f26f4e7bc08af7ed5b Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sun, 28 May 2017 18:51:30 -0400 Subject: [PATCH 052/109] mark kmem_cache as __ro_after_init --- mm/slab_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/slab_common.c b/mm/slab_common.c index 36c45acbbbe6..51fc267d0f86 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -40,7 +40,7 @@ enum slab_state slab_state __ro_after_init; LIST_HEAD(slab_caches); DEFINE_MUTEX(slab_mutex); -struct kmem_cache *kmem_cache; +struct kmem_cache *kmem_cache __ro_after_init; /* * Set of flags that will prevent slab merging. From 585a5ded299bd9cb9f237a223af3a70351d62238 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Fri, 12 May 2017 00:06:16 -0400 Subject: [PATCH 053/109] mark __{supported,default_kernel}_pte_mask as __ro_after_init These changes were initially extracted from PaX where it was part of KERNEXEC as __read_only. Before this linux-hardened commit was rebased onto v5.5, a call to x86_configure_nx in cpu_init needed to be removed, and was not required anyway since already set up earlier. This call was finally removed upstream in 505b789996f64 ("x86/cpu: Unify cpu_init()"). Signed-off-by: Daniel Micay Signed-off-by: Thibaut Sautereau Signed-off-by: Levente Polyak --- arch/x86/mm/init_32.c | 4 ++-- arch/x86/mm/init_64.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 0908c44d51e6..edf0b9ea159c 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -499,9 +499,9 @@ static void __init pagetable_init(void) #define DEFAULT_PTE_MASK ~(_PAGE_NX | _PAGE_GLOBAL) /* Bits supported by the hardware: */ -pteval_t __supported_pte_mask __read_mostly = DEFAULT_PTE_MASK; +pteval_t __supported_pte_mask __ro_after_init = DEFAULT_PTE_MASK; /* Bits allowed in normal kernel mappings: */ -pteval_t __default_kernel_pte_mask __read_mostly = DEFAULT_PTE_MASK; +pteval_t __default_kernel_pte_mask __ro_after_init = DEFAULT_PTE_MASK; EXPORT_SYMBOL_GPL(__supported_pte_mask); /* Used in PAGE_KERNEL_* macros which are reasonably used out-of-tree: */ EXPORT_SYMBOL(__default_kernel_pte_mask); diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index df2261fa4f98..b8bf29695e0d 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -104,9 +104,9 @@ static inline pgprot_t prot_sethuge(pgprot_t prot) */ /* Bits supported by the hardware: */ -pteval_t __supported_pte_mask __read_mostly = ~0; +pteval_t __supported_pte_mask __ro_after_init = ~0; /* Bits allowed in normal kernel mappings: */ -pteval_t __default_kernel_pte_mask __read_mostly = ~0; +pteval_t __default_kernel_pte_mask __ro_after_init = ~0; EXPORT_SYMBOL_GPL(__supported_pte_mask); /* Used in PAGE_KERNEL_* macros which are reasonably used out-of-tree: */ EXPORT_SYMBOL(__default_kernel_pte_mask); From 29042770454119257607fbe724080fc92c5d5d0f Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Tue, 4 Jul 2017 01:24:28 -0400 Subject: [PATCH 054/109] mark kobj_ns_type_register as only used for init This allows kobj_ns_ops_tbl to be __ro_after_init. Extracted from PaX. --- include/linux/kobject_ns.h | 2 +- lib/kobject.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/kobject_ns.h b/include/linux/kobject_ns.h index 4f0990e09b93..94d775949d2b 100644 --- a/include/linux/kobject_ns.h +++ b/include/linux/kobject_ns.h @@ -46,7 +46,7 @@ struct kobj_ns_type_operations { void (*drop_ns)(struct ns_common *); }; -int kobj_ns_type_register(const struct kobj_ns_type_operations *ops); +int __init kobj_ns_type_register(const struct kobj_ns_type_operations *ops); int kobj_ns_type_registered(enum kobj_ns_type type); const struct kobj_ns_type_operations *kobj_child_ns_ops(const struct kobject *parent); const struct kobj_ns_type_operations *kobj_ns_ops(const struct kobject *kobj); diff --git a/lib/kobject.c b/lib/kobject.c index 9c9ff0f5175f..a63903740fab 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -1019,9 +1019,9 @@ EXPORT_SYMBOL_GPL(kset_create_and_add); static DEFINE_SPINLOCK(kobj_ns_type_lock); -static const struct kobj_ns_type_operations *kobj_ns_ops_tbl[KOBJ_NS_TYPES]; +static const struct kobj_ns_type_operations *kobj_ns_ops_tbl[KOBJ_NS_TYPES] __ro_after_init; -int kobj_ns_type_register(const struct kobj_ns_type_operations *ops) +int __init kobj_ns_type_register(const struct kobj_ns_type_operations *ops) { enum kobj_ns_type type = ops->type; int error; From 24e9f52fd21bcc8fc8b188b4411a48ca10db7302 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Tue, 4 Jul 2017 01:32:30 -0400 Subject: [PATCH 055/109] mark open_softirq as only used for init [nicolas.bouchinet@ssi.gouv.fr: Adapt to commit 75e340ce106fa] --- include/linux/interrupt.h | 2 +- kernel/softirq.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 6cd26ffb0505..479fb0cacbd1 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -604,7 +604,7 @@ static inline void do_softirq_post_smp_call_flush(unsigned int unused) } #endif -extern void open_softirq(int nr, void (*action)(void)); +extern void __init open_softirq(int nr, void (*action)(void)); extern void softirq_init(void); extern void __raise_softirq_irqoff(unsigned int nr); diff --git a/kernel/softirq.c b/kernel/softirq.c index 77198911b8dd..c83209f8458b 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -790,7 +790,7 @@ void __raise_softirq_irqoff(unsigned int nr) or_softirq_pending(1UL << nr); } -void open_softirq(int nr, void (*action)(void)) +void __init open_softirq(int nr, void (*action)(void)) { softirq_vec[nr].action = action; } From ce7cceafa049e2955c5834d8e0638d577e246b93 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Tue, 4 Jul 2017 01:42:33 -0400 Subject: [PATCH 056/109] mark softirq_vec as __ro_after_init Note: __cacheline_aligned_in_smp conflicts with __ro_after_init on x86. Extracted from PaX. --- kernel/softirq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/softirq.c b/kernel/softirq.c index c83209f8458b..1c508ce336ae 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -57,7 +57,7 @@ DEFINE_PER_CPU_ALIGNED(irq_cpustat_t, irq_stat); EXPORT_PER_CPU_SYMBOL(irq_stat); #endif -static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp; +static struct softirq_action softirq_vec[NR_SOFTIRQS] __ro_after_init __aligned(PAGE_SIZE); DEFINE_PER_CPU(struct task_struct *, ksoftirqd); From b58da37c44da9fd1ad76a90fd5551fef8199fed2 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Tue, 17 Sep 2019 18:00:54 +0200 Subject: [PATCH 057/109] mm: slab: BUG on page type confusion under BUG_ON_DATA_CORRUPTION This change was extracted from PaX. Signed-off-by: Daniel Micay Signed-off-by: Levente Polyak Signed-off-by: Thibaut Sautereau [nicolas.bouchinet@ssi.gouv.fr: memcg related functions moved from mm/slab.h to mm/slub.c (see 0bedcc66d2a43a50a)] Signed-off-by: Nicolas Bouchinet --- mm/slub.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/slub.c b/mm/slub.c index fb52dd654dfe..546820cb248a 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -6252,10 +6252,14 @@ static noinline void warn_free_bad_obj(struct kmem_cache *s, void *obj) struct slab *slab; slab = virt_to_slab(obj); +#ifdef CONFIG_BUG_ON_DATA_CORRUPTION + BUG_ON(!slab); +#else if (WARN_ONCE(!slab, "kmem_cache_free(%s, %p): object is not in a slab page\n", s->name, obj)) return; +#endif cachep = slab->slab_cache; From 34c99184cdce5da53a8ef77f8ac7370f65f20379 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Wed, 3 May 2017 11:50:53 -0400 Subject: [PATCH 058/109] bug on kmem_cache_free with the wrong cache At least when CONFIG_BUG_ON_DATA_CORRUPTION is enabled. Signed-off-by: Daniel Micay Signed-off-by: Thibaut Sautereau Signed-off-by: Levente Polyak [nicolas.bouchinet@ssi.gouv.fr: memcg related functions moved from mm/slab.h to mm/slub.c (see 0bedcc66d2a43a50a)] Signed-off-by: Nicolas Bouchinet --- mm/slub.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/slub.c b/mm/slub.c index 546820cb248a..0e9b0512158b 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -6263,6 +6263,9 @@ static noinline void warn_free_bad_obj(struct kmem_cache *s, void *obj) cachep = slab->slab_cache; +#ifdef CONFIG_BUG_ON_DATA_CORRUPTION + BUG_ON(cachep != s); +#else if (WARN_ONCE(cachep != s, "kmem_cache_free(%s, %p): object belongs to different cache %s\n", s->name, obj, cachep ? cachep->name : "(NULL)")) { @@ -6270,6 +6273,7 @@ static noinline void warn_free_bad_obj(struct kmem_cache *s, void *obj) print_tracking(cachep, obj); return; } +#endif } /** From d34242f1c2254b236e9400d47b8275793ed44875 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Wed, 3 May 2017 21:54:56 -0400 Subject: [PATCH 059/109] mm: add support for verifying page sanitization Signed-off-by: Daniel Micay Signed-off-by: Thibaut Sautereau Signed-off-by: Levente Polyak --- include/linux/highmem.h | 7 +++++++ mm/page_alloc.c | 6 ++++++ security/Kconfig.hardening | 7 +++++++ 3 files changed, 20 insertions(+) diff --git a/include/linux/highmem.h b/include/linux/highmem.h index af03db851a1d..26885da1a943 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -355,6 +355,13 @@ static inline bool tag_clear_highpages(struct page *page, int numpages) #endif +static inline void verify_zero_highpage(struct page *page) +{ + void *kaddr = kmap_atomic(page); + BUG_ON(memchr_inv(kaddr, 0, PAGE_SIZE)); + kunmap_atomic(kaddr); +} + /* * If we pass in a base or tail page, we can zero up to PAGE_SIZE. * If we pass in a head page, we can zero up to the size of the compound page. diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e92898ad51cd..bce29a3574fa 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1864,6 +1864,12 @@ inline void post_alloc_hook(struct page *page, unsigned int order, */ kernel_unpoison_pages(page, 1 << order); + if (IS_ENABLED(CONFIG_PAGE_SANITIZE_VERIFY) && want_init_on_free()) { + int i; + for (i = 0; i < (1 << order); i++) + verify_zero_highpage(page + i); + } + /* * As memory initialization might be integrated into KASAN, * KASAN unpoisoning and memory initialization code must be diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening index 07f9286f1443..b728851fec14 100644 --- a/security/Kconfig.hardening +++ b/security/Kconfig.hardening @@ -211,6 +211,13 @@ config ZERO_CALL_USED_REGS be evaluated for suitability. For example, x86_64 grows by less than 1%, and arm64 grows by about 5%. +config PAGE_SANITIZE_VERIFY + bool "Verify sanitized pages" + default y + help + When init_on_free is enabled, verify that newly allocated pages + are zeroed to detect write-after-free bugs. + endmenu menu "Bounds checking" From e6ec3bfca0e246803d0dfb5fc243cdd884351cbe Mon Sep 17 00:00:00 2001 From: Thibaut Sautereau Date: Fri, 20 Sep 2019 14:02:42 +0200 Subject: [PATCH 060/109] slub: Extend init_on_free to slab caches with constructors This is the remaining non-upstream part of SLAB_SANITIZE, which was a partial port, from Daniel Micay, of the feature from PaX without the default fast mode based on passing SLAB_NO_SANITIZE in performance-critical cases that are not particularly security sensitive. Signed-off-by: Thibaut Sautereau [levente@leventepolyak.net: Adapt to kasan init_on_free with HW_TAGS changes] Signed-off-by: Levente Polyak --- mm/slab.h | 12 +++++++++--- mm/slub.c | 18 ++++++++++++++++++ 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/mm/slab.h b/mm/slab.h index e9ab292acd22..e9c4cf834edb 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -688,9 +688,15 @@ static inline bool slab_want_init_on_alloc(gfp_t flags, struct kmem_cache *c) static inline bool slab_want_init_on_free(struct kmem_cache *c) { if (static_branch_maybe(CONFIG_INIT_ON_FREE_DEFAULT_ON, - &init_on_free)) - return !(c->ctor || - (c->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON))); + &init_on_free)) { +#ifndef CONFIG_SLUB + if (c->ctor) + return false; +#endif + if (c->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) + return false; + return true; + } return false; } diff --git a/mm/slub.c b/mm/slub.c index 0e9b0512158b..1c8f74afe2eb 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2680,6 +2680,8 @@ bool slab_free_hook(struct kmem_cache *s, void *x, bool init, */ set_orig_size(s, x, orig_size); + if (s->ctor) + s->ctor(x); } /* KASAN might put x into memory quarantine, delaying its reuse. */ return !kasan_slab_free(s, x, init, still_accessible, false); @@ -2723,6 +2725,22 @@ bool slab_free_freelist_hook(struct kmem_cache *s, void **head, void **tail, * accordingly if object's reuse is delayed. */ --(*cnt); + + /* Objects that are put into quarantine by KASAN will + * still undergo free_consistency_checks(), which + * checks whether the freelist pointer is valid if it + * is located after the object (see check_object()). + * Since this is the case for slab caches with + * constructors, we need to fix the freelist pointer + * after init_on_free has overwritten it. + * + * Note that doing this for all caches (not just ctor + * ones) would cause a GPF due to KASAN poisoning and + * the way set_freepointer() eventually dereferences + * the freepointer. + */ + if (slab_want_init_on_free(s) && s->ctor) + set_freepointer(s, object, NULL); } } while (object != old_tail); From 1ddbcbb97e46fc22f066dbbf1aa2290ca43879b4 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Thu, 4 May 2017 15:58:57 -0400 Subject: [PATCH 061/109] slub: Add support for verifying slab sanitization This is an extension to the sanitization feature in PaX for when sacricifing more performance for security is acceptable. The initial version from Daniel Micay was relying on PAGE_SANITIZE. It now relies on upstream's init_on_free. Signed-off-by: Daniel Micay Signed-off-by: Thibaut Sautereau Signed-off-by: Levente Polyak [nicolas.bouchinet@ssi.gouv.fr: Should not conflict with commit 520a688a2edfddba9] Signed-off-by: Nicolas Bouchinet --- mm/slub.c | 45 ++++++++++++++++++++++++++++++++++---- security/Kconfig.hardening | 8 +++++++ 2 files changed, 49 insertions(+), 4 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 1c8f74afe2eb..f942d20efc65 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -232,6 +232,12 @@ static inline bool kmem_cache_debug(struct kmem_cache *s) return kmem_cache_debug_flags(s, SLAB_DEBUG_FLAGS); } +static inline bool has_sanitize_verify(struct kmem_cache *s) +{ + return IS_ENABLED(CONFIG_SLAB_SANITIZE_VERIFY) && + slab_want_init_on_free(s); +} + void *fixup_red_left(struct kmem_cache *s, void *p) { if (kmem_cache_debug_flags(s, SLAB_RED_ZONE)) @@ -2680,7 +2686,7 @@ bool slab_free_hook(struct kmem_cache *s, void *x, bool init, */ set_orig_size(s, x, orig_size); - if (s->ctor) + if (!IS_ENABLED(CONFIG_SLAB_SANITIZE_VERIFY) && s->ctor) s->ctor(x); } /* KASAN might put x into memory quarantine, delaying its reuse. */ @@ -2751,7 +2757,7 @@ static void *setup_object(struct kmem_cache *s, void *object) { setup_object_debug(s, object); object = kasan_init_slab_obj(s, object); - if (unlikely(s->ctor)) { + if (unlikely(s->ctor) && !has_sanitize_verify(s)) { kasan_unpoison_new_object(s, object); s->ctor(object); kasan_poison_new_object(s, object); @@ -4872,7 +4878,19 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list object = __slab_alloc_node(s, gfpflags, node, addr, orig_size); maybe_wipe_obj_freeptr(s, object); - init = slab_want_init_on_alloc(gfpflags, s); + + if (has_sanitize_verify(s) && object) { + /* KASAN hasn't unpoisoned the object yet (this is done in the + * post-alloc hook), so let's do it temporarily. + */ + kasan_unpoison_new_object(s, object); + BUG_ON(memchr_inv(object, 0, s->object_size)); + if (s->ctor) + s->ctor(object); + kasan_poison_new_object(s, object); + } else { + init = slab_want_init_on_alloc(gfpflags, s); + } out: /* @@ -7245,6 +7263,21 @@ int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, stat_add(s, ALLOC_SLOWPATH, i); } + if (has_sanitize_verify(s)) { + int j; + + for (j = 0; j < i; j++) { + /* KASAN hasn't unpoisoned the object yet (this is done in the + * post-alloc hook), so let's do it temporarily. + */ + kasan_unpoison_new_object(s, p[j]); + BUG_ON(memchr_inv(p[j], 0, s->object_size)); + if (s->ctor) + s->ctor(p[j]); + kasan_poison_new_object(s, p[j]); + } + } + return i; error: @@ -7262,6 +7295,7 @@ int kmem_cache_alloc_bulk_noprof(struct kmem_cache *s, gfp_t flags, size_t size, { unsigned int i = 0; void *kfence_obj; + bool init = false; if (!size) return 0; @@ -7315,8 +7349,11 @@ int kmem_cache_alloc_bulk_noprof(struct kmem_cache *s, gfp_t flags, size_t size, * memcg and kmem_cache debug support and memory initialization. * Done outside of the IRQ disabled fastpath loop. */ + if (!has_sanitize_verify(s)) { + init = slab_want_init_on_alloc(flags, s); + } if (unlikely(!slab_post_alloc_hook(s, NULL, flags, size, p, - slab_want_init_on_alloc(flags, s), s->object_size))) { + init, s->object_size))) { return 0; } diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening index b728851fec14..0068460db967 100644 --- a/security/Kconfig.hardening +++ b/security/Kconfig.hardening @@ -218,6 +218,14 @@ config PAGE_SANITIZE_VERIFY When init_on_free is enabled, verify that newly allocated pages are zeroed to detect write-after-free bugs. +config SLAB_SANITIZE_VERIFY + bool "Verify sanitized SLAB allocations" + default y + depends on !KASAN + help + When init_on_free is enabled, verify that newly allocated slab + objects are zeroed to detect write-after-free bugs. + endmenu menu "Bounds checking" From 403257b7ce6341bef82d4689f9d4ca4da7482d40 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Wed, 3 May 2017 16:16:58 -0400 Subject: [PATCH 062/109] slub: add multi-purpose random canaries Place canaries at the end of kernel slab allocations, sacrificing some performance and memory usage for security. Canaries can detect some forms of heap corruption when allocations are freed and as part of the HARDENED_USERCOPY feature. It provides basic use-after-free detection for HARDENED_USERCOPY. Canaries absorb small overflows (rendering them harmless), mitigate non-NUL terminated C string overflows on 64-bit via a guaranteed zero byte and provide basic double-free detection. Signed-off-by: Daniel Micay [levente@leventepolyak.net: make canaries work without SLUB_DEBUG] [levente@leventepolyak.net: fix compatibility with KFENCE] Signed-off-by: Levente Polyak [nicolas.bouchinet@ssi.gouv.fr: Fix conflicts with commit 782f8906f8057efc7] [nicolas.bouchinet@ssi.gouv.fr: Take slab canary in account for object size] Signed-off-by: Nicolas Bouchinet --- mm/Kconfig | 17 ++++++++ mm/slab.h | 5 +++ mm/slub.c | 121 +++++++++++++++++++++++++++++++++++++++++++++++------ 3 files changed, 131 insertions(+), 12 deletions(-) diff --git a/mm/Kconfig b/mm/Kconfig index 9bcf4da8865a..9830506ebb5b 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -235,6 +235,23 @@ config SLAB_BUCKETS If unsure, say Y. +config SLAB_CANARY + depends on SLUB + depends on !SLAB_MERGE_DEFAULT + bool "SLAB canaries" + default y + help + Place canaries at the end of kernel slab allocations, sacrificing + some performance and memory usage for security. + + Canaries can detect some forms of heap corruption when allocations + are freed and as part of the HARDENED_USERCOPY feature. It provides + basic use-after-free detection for HARDENED_USERCOPY. + + Canaries absorb small overflows (rendering them harmless), mitigate + non-NUL terminated C string overflows on 64-bit via a guaranteed zero + byte and provide basic double-free detection. + config SLUB_STATS default n bool "Enable performance statistics" diff --git a/mm/slab.h b/mm/slab.h index e9c4cf834edb..075165fb0cf8 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -223,6 +223,11 @@ struct kmem_cache { unsigned long random; #endif +#ifdef CONFIG_SLAB_CANARY + unsigned long random_active; + unsigned long random_inactive; +#endif + #ifdef CONFIG_NUMA /* * Defragmentation by allocating from a remote node. diff --git a/mm/slub.c b/mm/slub.c index f942d20efc65..69e51c9ac55d 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include #include @@ -742,6 +743,8 @@ static inline void set_orig_size(struct kmem_cache *s, return; p += get_info_end(s); + if (IS_ENABLED(CONFIG_SLAB_CANARY)) + p = (void *)p + sizeof(void *); p += sizeof(struct track) * 2; *(unsigned long *)p = orig_size; @@ -758,6 +761,8 @@ static inline unsigned long get_orig_size(struct kmem_cache *s, void *object) return s->object_size; p += get_info_end(s); + if (IS_ENABLED(CONFIG_SLAB_CANARY)) + p = (void *)p + sizeof(void *); p += sizeof(struct track) * 2; return *(unsigned long *)p; @@ -897,6 +902,33 @@ static inline unsigned int obj_exts_offset_in_object(struct kmem_cache *s) } #endif +#ifdef CONFIG_SLAB_CANARY +static inline unsigned long *get_canary(struct kmem_cache *s, void *object) +{ + return object + get_info_end(s); +} + +static inline unsigned long get_canary_value(const void *canary, unsigned long value) +{ + return (value ^ (unsigned long)canary) & CANARY_MASK; +} + +static inline void set_canary(struct kmem_cache *s, void *object, unsigned long value) +{ + unsigned long *canary = get_canary(s, object); + *canary = get_canary_value(canary, value); +} + +static inline void check_canary(struct kmem_cache *s, void *object, unsigned long value) +{ + unsigned long *canary = get_canary(s, object); + BUG_ON(*canary != get_canary_value(canary, value)); +} +#else +#define set_canary(s, object, value) +#define check_canary(s, object, value) +#endif + #ifdef CONFIG_SLUB_DEBUG /* @@ -1026,6 +1058,9 @@ static struct track *get_track(struct kmem_cache *s, void *object, p = object + get_info_end(s); + if (IS_ENABLED(CONFIG_SLAB_CANARY)) + p = (void *)p + sizeof(void *); + return kasan_reset_tag(p + alloc); } @@ -1187,6 +1222,9 @@ static void print_trailer(struct kmem_cache *s, struct slab *slab, u8 *p) off = get_info_end(s); + if (IS_ENABLED(CONFIG_SLAB_CANARY)) + off += sizeof(void *); + if (s->flags & SLAB_STORE_USER) off += 2 * sizeof(struct track); @@ -1355,10 +1393,11 @@ check_bytes_and_report(struct kmem_cache *s, struct slab *slab, * * [Metadata starts at object + s->inuse] * - A. freelist pointer (if freeptr_outside_object) - * - B. alloc tracking (SLAB_STORE_USER) - * - C. free tracking (SLAB_STORE_USER) - * - D. original request size (SLAB_KMALLOC && SLAB_STORE_USER) - * - E. KASAN metadata (if enabled) + * - B. Canary for SLAB_CANARY + * - C. alloc tracking (SLAB_STORE_USER) + * - D. free tracking (SLAB_STORE_USER) + * - E. original request size (SLAB_KMALLOC && SLAB_STORE_USER) + * - F. KASAN metadata (if enabled) * * [Mandatory padding] (if CONFIG_SLUB_DEBUG && SLAB_RED_ZONE) * - One mandatory debug word to guarantee a minimum poisoned gap @@ -1390,6 +1429,9 @@ static int check_pad_bytes(struct kmem_cache *s, struct slab *slab, u8 *p) { unsigned long off = get_info_end(s); /* The end of info */ + if (IS_ENABLED(CONFIG_SLAB_CANARY)) + off += sizeof(void *); + if (s->flags & SLAB_STORE_USER) { /* We also have user information there */ off += 2 * sizeof(struct track); @@ -2608,11 +2650,19 @@ struct rcu_delayed_free { */ static __always_inline bool slab_free_hook(struct kmem_cache *s, void *x, bool init, - bool after_rcu_delay) + bool after_rcu_delay, bool canary) { /* Are the object contents still accessible? */ bool still_accessible = (s->flags & SLAB_TYPESAFE_BY_RCU) && !after_rcu_delay; + /* + * Postpone setting the inactive canary until the metadata + * has potentially been cleared at the end of this function. + */ + if (canary) { + check_canary(s, x, s->random_active); + } + kmemleak_free_recursive(x, s->flags); kmsan_slab_free(s, x); @@ -2678,8 +2728,14 @@ bool slab_free_hook(struct kmem_cache *s, void *x, bool init, if (!kasan_has_integrated_init()) memset(kasan_reset_tag(x), 0, orig_size); rsize = (s->flags & SLAB_RED_ZONE) ? s->red_left_pad : 0; + +#ifdef CONFIG_SLAB_CANARY + memset((char *)kasan_reset_tag(x) + inuse + sizeof(void *), 0, + s->size - inuse - sizeof(void *) - rsize); +#else memset((char *)kasan_reset_tag(x) + inuse, 0, s->size - inuse - rsize); +#endif /* * Restore orig_size, otherwise kmalloc redzone overwritten * would be reported @@ -2689,6 +2745,11 @@ bool slab_free_hook(struct kmem_cache *s, void *x, bool init, if (!IS_ENABLED(CONFIG_SLAB_SANITIZE_VERIFY) && s->ctor) s->ctor(x); } + + if (canary) { + set_canary(s, x, s->random_inactive); + } + /* KASAN might put x into memory quarantine, delaying its reuse. */ return !kasan_slab_free(s, x, init, still_accessible, false); } @@ -2704,7 +2765,7 @@ bool slab_free_freelist_hook(struct kmem_cache *s, void **head, void **tail, bool init; if (is_kfence_address(next)) { - slab_free_hook(s, next, false, false); + slab_free_hook(s, next, false, false, false); return false; } @@ -2719,7 +2780,7 @@ bool slab_free_freelist_hook(struct kmem_cache *s, void **head, void **tail, next = get_freepointer(s, object); /* If object's reuse doesn't have to be delayed */ - if (likely(slab_free_hook(s, object, init, false))) { + if (likely(slab_free_hook(s, object, init, false, true))) { /* Move object to the new freelist */ set_freepointer(s, object, *head); *head = object; @@ -2756,6 +2817,7 @@ bool slab_free_freelist_hook(struct kmem_cache *s, void **head, void **tail, static void *setup_object(struct kmem_cache *s, void *object) { setup_object_debug(s, object); + set_canary(s, object, s->random_inactive); object = kasan_init_slab_obj(s, object); if (unlikely(s->ctor) && !has_sanitize_verify(s)) { kasan_unpoison_new_object(s, object); @@ -4892,6 +4954,11 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list init = slab_want_init_on_alloc(gfpflags, s); } + if (object) { + check_canary(s, object, s->random_inactive); + set_canary(s, object, s->random_active); + } + out: /* * When init equals 'true', like for kzalloc() family, only @@ -6199,10 +6266,16 @@ static __fastpath_inline void slab_free(struct kmem_cache *s, struct slab *slab, void *object, unsigned long addr) { + bool canary = true; + memcg_slab_free_hook(s, slab, &object, 1); alloc_tagging_slab_free_hook(s, slab, &object, 1); - if (unlikely(!slab_free_hook(s, object, slab_want_init_on_free(s), false))) + /* Make sure canaries are not used on kfence objects. */ + if (is_kfence_address(object)) + canary = false; + + if (unlikely(!slab_free_hook(s, object, slab_want_init_on_free(s), false, canary))) return; if (likely(!IS_ENABLED(CONFIG_NUMA) || slab_nid(slab) == numa_mem_id()) @@ -6220,11 +6293,16 @@ void slab_free(struct kmem_cache *s, struct slab *slab, void *object, static noinline void memcg_alloc_abort_single(struct kmem_cache *s, void *object) { + bool canary = true; struct slab *slab = virt_to_slab(object); alloc_tagging_slab_free_hook(s, slab, &object, 1); - if (likely(slab_free_hook(s, object, slab_want_init_on_free(s), false))) + /* Make sure canaries are not used on kfence objects. */ + if (is_kfence_address(object)) + canary = false; + + if (likely(slab_free_hook(s, object, slab_want_init_on_free(s), false, canary))) __slab_free(s, slab, object, object, 1, _RET_IP_); } #endif @@ -6267,7 +6345,7 @@ static void slab_free_after_rcu_debug(struct rcu_head *rcu_head) return; /* resume freeing */ - if (slab_free_hook(s, object, slab_want_init_on_free(s), true)) { + if (slab_free_hook(s, object, slab_want_init_on_free(s), true, true)) { __slab_free(s, slab, object, object, 1, _THIS_IP_); stat(s, FREE_SLOWPATH); } @@ -6363,7 +6441,7 @@ static inline size_t slab_ksize(struct slab *slab) * or any other metadata back there then we can * only use the space before that information. */ - if (s->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_STORE_USER)) + if ((s->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_STORE_USER)) || IS_ENABLED(CONFIG_SLAB_CANARY)) return s->inuse; else if (obj_exts_in_object(s, slab)) return s->inuse; @@ -7244,7 +7322,7 @@ static inline int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, void **p) { - int i; + int i, k; if (IS_ENABLED(CONFIG_SLUB_TINY) || kmem_cache_debug(s)) { for (i = 0; i < size; i++) { @@ -7278,6 +7356,13 @@ int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, } } + for (k = 0; k < i; k++) { + if (!is_kfence_address(p[k])) { + check_canary(s, p[k], s->random_inactive); + set_canary(s, p[k], s->random_active); + } + } + return i; error: @@ -7598,6 +7683,7 @@ static void early_kmem_cache_node_alloc(int node) #ifdef CONFIG_SLUB_DEBUG init_object(kmem_cache_node, n, SLUB_RED_ACTIVE); #endif + set_canary(kmem_cache_node, n, kmem_cache_node->random_active); n = kasan_slab_alloc(kmem_cache_node, n, GFP_KERNEL, false); slab->freelist = get_freepointer(kmem_cache_node, n); slab->inuse = 1; @@ -7802,6 +7888,9 @@ static int calculate_sizes(struct kmem_cache_args *args, struct kmem_cache *s) s->offset = ALIGN_DOWN(s->object_size / 2, sizeof(void *)); } + if (IS_ENABLED(CONFIG_SLAB_CANARY)) + size += sizeof(void *); + #ifdef CONFIG_SLUB_DEBUG if (flags & SLAB_STORE_USER) { /* @@ -8139,6 +8228,10 @@ void __check_heap_object(const void *ptr, unsigned long n, offset -= s->red_left_pad; } + if (!is_kfence) { + check_canary(s, (void *)ptr - offset, s->random_active); + } + /* Allow address range falling entirely within usercopy region. */ if (offset >= s->useroffset && offset - s->useroffset <= s->usersize && @@ -8505,6 +8598,10 @@ int do_kmem_cache_create(struct kmem_cache *s, const char *name, s->flags = kmem_cache_flags(flags, s->name); #ifdef CONFIG_SLAB_FREELIST_HARDENED s->random = get_random_long(); +#endif +#ifdef CONFIG_SLAB_CANARY + s->random_active = get_random_long(); + s->random_inactive = get_random_long(); #endif s->align = args->align; s->ctor = args->ctor; From 6864a9e584a757fcef24d48f535baac9e5cf1566 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 11 Jan 2016 15:23:55 +0000 Subject: [PATCH 063/109] security,perf: Allow further restriction of perf_event_open When kernel.perf_event_open is set to 3 (or greater), disallow all access to performance events by users without CAP_SYS_ADMIN or CAP_PERFMON. Add a Kconfig symbol CONFIG_SECURITY_PERF_EVENTS_RESTRICT that makes this value the default. This is based on a similar feature in grsecurity (CONFIG_GRKERNSEC_PERF_HARDEN). This version doesn't include making the variable read-only. It also allows enabling further restriction at run-time regardless of whether the default is changed. As part of the v5.5 linux-hardened rebase, this commit was adapted to work with the new perf_event LSM hooks, introduced in da97e18458fb42 ("perf_event: Add support for LSM and SELinux checks"). As part of the v5.8 linux-hardened rebase, this commit was adapted to work with the new CAP_PERFMON capability. Signed-off-by: Ben Hutchings [levente@leventepolyak.net: Adapt to work with the new perf_event LSM hooks] Signed-off-by: Levente Polyak [thibaut.sautereau@ssi.gouv.fr: Adapt to work with the new CAP_PERFMON capability] Signed-off-by: Thibaut Sautereau Signed-off-by: Levente Polyak --- Documentation/admin-guide/sysctl/kernel.rst | 2 ++ include/linux/perf_event.h | 8 ++++++++ kernel/events/core.c | 7 ++++++- security/Kconfig | 9 +++++++++ tools/perf/Documentation/security.txt | 1 + tools/perf/util/evsel.c | 1 + 6 files changed, 27 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index 9aed74e65cf4..0a0384d2376e 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -1014,6 +1014,8 @@ with respect to CAP_PERFMON use cases. >=1 Disallow CPU event access by users without ``CAP_PERFMON``. >=2 Disallow kernel profiling by users without ``CAP_PERFMON``. + +>=3 Disallow use of any event by users without ``CAP_PERFMON``. === ================================================================== diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 48d851fbd8ea..b82bab945bf5 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1792,6 +1792,14 @@ static inline int perf_is_paranoid(void) extern int perf_allow_kernel(void); +static inline int perf_allow_open(void) +{ + if (sysctl_perf_event_paranoid > 2 && !perfmon_capable()) + return -EACCES; + + return security_perf_event_open(PERF_SECURITY_OPEN); +} + static inline int perf_allow_cpu(void) { if (sysctl_perf_event_paranoid > 0 && !perfmon_capable()) diff --git a/kernel/events/core.c b/kernel/events/core.c index 89b40e439717..b9205d90e6dd 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -491,8 +491,13 @@ static __always_inline bool is_guest_mediated_pmu_loaded(void) * 0 - disallow raw tracepoint access for unpriv * 1 - disallow cpu events for unpriv * 2 - disallow kernel profiling for unpriv + * 3 - disallow all unpriv perf event use */ +#ifdef CONFIG_SECURITY_PERF_EVENTS_RESTRICT +int sysctl_perf_event_paranoid __read_mostly = 3; +#else int sysctl_perf_event_paranoid __read_mostly = 2; +#endif /* Minimum for 512 kiB + 1 user control page. 'free' kiB per user. */ static int sysctl_perf_event_mlock __read_mostly = 512 + (PAGE_SIZE / 1024); @@ -13829,7 +13834,7 @@ SYSCALL_DEFINE5(perf_event_open, return err; /* Do we allow access to perf_event_open(2) ? */ - err = security_perf_event_open(PERF_SECURITY_OPEN); + err = perf_allow_open(); if (err) return err; diff --git a/security/Kconfig b/security/Kconfig index c20928e74619..a99fb7c51d47 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -72,6 +72,15 @@ config MSEAL_SYSTEM_MAPPINGS For complete descriptions of memory sealing, please see Documentation/userspace-api/mseal.rst +config SECURITY_PERF_EVENTS_RESTRICT + bool "Restrict unprivileged use of performance events" + depends on PERF_EVENTS + help + If you say Y here, the kernel.perf_event_paranoid sysctl + will be set to 3 by default, and no unprivileged use of the + perf_event_open syscall will be permitted unless it is + changed. + config SECURITY bool "Enable different security models" depends on SYSFS diff --git a/tools/perf/Documentation/security.txt b/tools/perf/Documentation/security.txt index 4fe3b8b1958f..a7d88cc23a70 100644 --- a/tools/perf/Documentation/security.txt +++ b/tools/perf/Documentation/security.txt @@ -148,6 +148,7 @@ Perf tool provides a message similar to the one below: >= 0: Disallow raw and ftrace function tracepoint access >= 1: Disallow CPU event access >= 2: Disallow kernel profiling + >= 3: Disallow use of any event To make the adjusted perf_event_paranoid setting permanent preserve it in /etc/sysctl.conf (e.g. kernel.perf_event_paranoid = ) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index f59228c1a39e..d687678a45ea 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -4014,6 +4014,7 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target, ">= 0: Disallow raw and ftrace function tracepoint access\n" ">= 1: Disallow CPU event access\n" ">= 2: Disallow kernel profiling\n" + ">= 3: Disallow use of any event\n" "To make the adjusted perf_event_paranoid setting permanent preserve it\n" "in /etc/sysctl.conf (e.g. kernel.perf_event_paranoid = )", perf_event_paranoid()); From 596735619ce2f11948cfb28510b31c3a0f679331 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Thu, 4 May 2017 14:45:59 -0400 Subject: [PATCH 064/109] enable SECURITY_PERF_EVENTS_RESTRICT by default Signed-off-by: Daniel Micay --- security/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/security/Kconfig b/security/Kconfig index a99fb7c51d47..06c66ff55ba6 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -75,6 +75,7 @@ config MSEAL_SYSTEM_MAPPINGS config SECURITY_PERF_EVENTS_RESTRICT bool "Restrict unprivileged use of performance events" depends on PERF_EVENTS + default y help If you say Y here, the kernel.perf_event_paranoid sysctl will be set to 3 by default, and no unprivileged use of the From 077cf75598b17ebfc9495a6a2224cf983e794760 Mon Sep 17 00:00:00 2001 From: Serge Hallyn Date: Fri, 31 May 2013 19:12:12 +0100 Subject: [PATCH 065/109] userns: add sysctl to disallow unprivileged CLONE_NEWUSER by default Signed-off-by: Serge Hallyn [bwh: Remove unneeded binary sysctl bits] Signed-off-by: Daniel Micay [thibaut.sautereau@ssi.gouv.fr: Adapt to sysctl code refactoring] [nicolas.bouchinet@ssi.gouv.fr: Adapt proc_handler with an allowed range value between 0 and 1] Signed-off-by: Thibaut Sautereau Signed-off-by: Levente Polyak --- include/linux/user_namespace.h | 4 ++++ kernel/fork.c | 11 +++++++++++ kernel/sysctl.c | 15 +++++++++++++++ kernel/user_namespace.c | 3 +++ 4 files changed, 33 insertions(+) diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 9c3be157397e..bb05d4a07c46 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -173,6 +173,8 @@ static inline struct user_namespace *to_user_ns(struct ns_common *ns) #ifdef CONFIG_USER_NS +extern int unprivileged_userns_clone; + static inline struct user_namespace *get_user_ns(struct user_namespace *ns) { if (ns) @@ -206,6 +208,8 @@ extern bool current_in_userns(const struct user_namespace *target_ns); struct ns_common *ns_get_owner(struct ns_common *ns); #else +#define unprivileged_userns_clone 0 + static inline struct user_namespace *get_user_ns(struct user_namespace *ns) { return &init_user_ns; diff --git a/kernel/fork.c b/kernel/fork.c index 73622ad0665a..18a0936e6a12 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -83,6 +83,7 @@ #include #include #include +#include #include #include #include @@ -1987,6 +1988,10 @@ __latent_entropy struct task_struct *copy_process( if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS)) return ERR_PTR(-EINVAL); + if ((clone_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) + if (!capable(CAP_SYS_ADMIN)) + return ERR_PTR(-EPERM); + /* * Thread groups must share signals as well, and detached threads * can only be started up within the thread group. @@ -3151,6 +3156,12 @@ int ksys_unshare(unsigned long unshare_flags) if (unshare_flags & CLONE_NEWNS) unshare_flags |= CLONE_FS; + if ((unshare_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) { + err = -EPERM; + if (!capable(CAP_SYS_ADMIN)) + goto bad_unshare_out; + } + err = check_unshare_flags(unshare_flags); if (err) goto bad_unshare_out; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index c9efb17cc255..57613459d7d0 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -22,6 +22,10 @@ #include #include +#ifdef CONFIG_USER_NS +#include +#endif + /* shared constants to be used in various sysctls */ const int sysctl_vals[] = { 0, 1, 2, 3, 4, 100, 200, 1000, 3000, INT_MAX, 65535, -1 }; EXPORT_SYMBOL(sysctl_vals); @@ -1372,6 +1376,17 @@ int proc_do_static_key(const struct ctl_table *table, int dir, } static const struct ctl_table sysctl_subsys_table[] = { +#ifdef CONFIG_USER_NS + { + .procname = "unprivileged_userns_clone", + .data = &unprivileged_userns_clone, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +#endif #ifdef CONFIG_PROC_SYSCTL { .procname = "sysctl_writes_strict", diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 0bed462e9b2a..c84977d660be 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -23,6 +23,9 @@ #include #include +/* sysctl */ +int unprivileged_userns_clone; + static struct kmem_cache *user_ns_cachep __ro_after_init; static DEFINE_MUTEX(userns_state_mutex); From f673179348bd86ae747c624add1f1f6c19417c4b Mon Sep 17 00:00:00 2001 From: Levente Polyak Date: Wed, 31 Jul 2019 20:50:48 +0100 Subject: [PATCH 066/109] userns: add kconfig to set default for unprivileged CLONE_NEWUSER When disabled, unprivileged users will not be able to create new namespaces. Allowing users to create their own namespaces has been part of several recent local privilege escalation exploits, so if you need user namespaces but are paranoid^Wsecurity-conscious you want to disable this. By default unprivileged user namespaces are disabled. Co-authored-by: Jan Alexander Steffens (heftig) Signed-off-by: Jan Alexander Steffens (heftig) Co-authored-by: Levente Polyak Signed-off-by: Levente Polyak --- init/Kconfig | 16 ++++++++++++++++ kernel/user_namespace.c | 4 ++++ 2 files changed, 20 insertions(+) diff --git a/init/Kconfig b/init/Kconfig index 78479ebfcea2..4bd07806e65b 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1415,6 +1415,22 @@ config USER_NS If unsure, say N. +config USER_NS_UNPRIVILEGED + bool "Allow unprivileged users to create namespaces" + depends on USER_NS + default n + help + When disabled, unprivileged users will not be able to create + new namespaces. Allowing users to create their own namespaces + has been part of several recent local privilege escalation + exploits, so if you need user namespaces but are + paranoid^Wsecurity-conscious you want to disable this. + + This setting can be overridden at runtime via the + kernel.unprivileged_userns_clone sysctl. + + If unsure, say N. + config PID_NS bool "PID Namespaces" default y diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index c84977d660be..b54a9a25d1c3 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -24,7 +24,11 @@ #include /* sysctl */ +#ifdef CONFIG_USER_NS_UNPRIVILEGED +int unprivileged_userns_clone = 1; +#else int unprivileged_userns_clone; +#endif static struct kmem_cache *user_ns_cachep __ro_after_init; static DEFINE_MUTEX(userns_state_mutex); From 0ea2e04a27d977b1768d6a4a28f5a974b91090f3 Mon Sep 17 00:00:00 2001 From: Emese Revfy Date: Tue, 31 May 2016 01:34:02 +0200 Subject: [PATCH 067/109] Add the extra_latent_entropy kernel parameter When extra_latent_entropy is passed on the kernel command line, entropy will be extracted from up to the first 4GB of RAM while the runtime memory allocator is being initialized. Based on work created by the PaX Team. Signed-off-by: Emese Revfy Signed-off-by: Daniel Micay Signed-off-by: Levente Polyak --- .../admin-guide/kernel-parameters.txt | 5 ++++ mm/page_alloc.c | 24 +++++++++++++++++++ scripts/gcc-plugins/Kconfig | 5 ++++ 3 files changed, 34 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 03a550630644..4cebb97fe82f 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5055,6 +5055,11 @@ Kernel parameters the specified number of seconds. This is to be used if your oopses keep scrolling off the screen. + extra_latent_entropy + Enable a very simple form of latent entropy extraction + from the first 4GB of memory as the bootmem allocator + passes the memory pages to the buddy allocator. + pcbit= [HW,ISDN] pci=option[,option...] [PCI,EARLY] various PCI subsystem options. diff --git a/mm/page_alloc.c b/mm/page_alloc.c index bce29a3574fa..99579c0673ea 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -213,6 +213,15 @@ EXPORT_PER_CPU_SYMBOL(_numa_mem_); static DEFINE_MUTEX(pcpu_drain_mutex); +bool __meminitdata extra_latent_entropy; + +static int __init setup_extra_latent_entropy(char *str) +{ + extra_latent_entropy = true; + return 0; +} +early_param("extra_latent_entropy", setup_extra_latent_entropy); + #ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY volatile unsigned long latent_entropy __latent_entropy; EXPORT_SYMBOL(latent_entropy); @@ -1653,6 +1662,21 @@ void __meminit __free_pages_core(struct page *page, unsigned int order, set_page_count(p, 0); } + if (extra_latent_entropy && !PageHighMem(page) && page_to_pfn(page) < 0x100000) { + unsigned long hash = 0; + size_t index, end = PAGE_SIZE * nr_pages / sizeof hash; + const unsigned long *data = lowmem_page_address(page); + + for (index = 0; index < end; index++) + hash ^= hash + data[index]; +#ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY + latent_entropy ^= hash; + add_device_randomness((const void *)&latent_entropy, sizeof(latent_entropy)); +#else + add_device_randomness((const void *)&hash, sizeof(hash)); +#endif + } + /* memblock adjusts totalram_pages() manually. */ atomic_long_add(nr_pages, &page_zone(page)->managed_pages); } diff --git a/scripts/gcc-plugins/Kconfig b/scripts/gcc-plugins/Kconfig index 6b34ba19358d..d83e715c9d40 100644 --- a/scripts/gcc-plugins/Kconfig +++ b/scripts/gcc-plugins/Kconfig @@ -29,6 +29,11 @@ config GCC_PLUGIN_LATENT_ENTROPY is some slowdown of the boot process (about 0.5%) and fork and irq processing. + When extra_latent_entropy is passed on the kernel command line, + entropy will be extracted from up to the first 4GB of RAM while the + runtime memory allocator is being initialized. This costs even more + slowdown of the boot process. + Note that entropy extracted this way is not cryptographically secure! From 0aa9fcd887f2262ba14b08957b45f6d370650064 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Mon, 15 May 2017 23:45:34 -0400 Subject: [PATCH 068/109] ata: avoid null pointer dereference on bug Extracted from PaX. [nicolas.bouchinet@ssi.gouv.fr: BUG_ON NULL ptr deref removed in 5bb52d926598a0] Signed-off-by: Nicolas Bouchinet Signed-off-by: Daniel Micay --- drivers/ata/libata-core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 374993031895..ef51e7adf101 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4929,6 +4929,7 @@ void __ata_qc_complete(struct ata_queued_cmd *qc) struct ata_port *ap; struct ata_link *link; + BUG_ON(qc == NULL); /* ata_qc_from_tag _might_ return NULL */ if (WARN_ON_ONCE(!(qc->flags & ATA_QCFLAG_ACTIVE))) return; From 6e385681278d9cf50db08c6bf4a985b9aa6b97b0 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Mon, 15 May 2017 23:51:12 -0400 Subject: [PATCH 069/109] sanity check for negative length in nla_memcpy Extracted from PaX. Signed-off-by: Daniel Micay --- lib/nlattr.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/nlattr.c b/lib/nlattr.c index be9c576b6e2d..484d839bcf5e 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -837,6 +837,8 @@ int nla_memcpy(void *dest, const struct nlattr *src, int count) { int minlen = min_t(int, count, nla_len(src)); + BUG_ON(minlen < 0); + memcpy(dest, nla_data(src), minlen); if (count > minlen) memset(dest + minlen, 0, count - minlen); From 9787b97285b3cbd447d947ed7829ddf077967566 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Tue, 16 May 2017 00:59:48 -0400 Subject: [PATCH 070/109] PaX shadow cr4 sanity check (essentially a revert) Signed-off-by: Daniel Micay [levente@leventepolyak.net: Adapt to cpu_tlbstate moved out-of-line] Signed-off-by: Levente Polyak [thibaut.sautereau@ssi.gouv.fr: Move BUG_ON from native_flush_tlb_global() to new __native_tlb_flush_global() helper] Signed-off-by: Thibaut Sautereau --- arch/x86/include/asm/tlbflush.h | 1 + arch/x86/kernel/cpu/common.c | 1 + arch/x86/kernel/process.c | 1 + 3 files changed, 3 insertions(+) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 5a3cdc439e38..c11396cc1a44 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -484,6 +484,7 @@ static inline void cpu_tlbstate_update_lam(unsigned long lam, u64 untag_mask) static inline void __native_tlb_flush_global(unsigned long cr4) { + BUG_ON(cr4 != __read_cr4()); native_write_cr4(cr4 ^ X86_CR4_PGE); native_write_cr4(cr4); } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index ec0670114efa..0f7c2a75ba1a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -494,6 +494,7 @@ EXPORT_SYMBOL_GPL(native_write_cr4); void cr4_update_irqsoff(unsigned long set, unsigned long clear) { unsigned long newval, cr4 = this_cpu_read(cpu_tlbstate.cr4); + BUG_ON(cr4 != __read_cr4()); lockdep_assert_irqs_disabled(); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 4c718f8adc59..ea45acfe4f03 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -706,6 +706,7 @@ void speculation_ctrl_update_current(void) static inline void cr4_toggle_bits_irqsoff(unsigned long mask) { unsigned long newval, cr4 = this_cpu_read(cpu_tlbstate.cr4); + BUG_ON(cr4 != __read_cr4()); newval = cr4 ^ mask; if (newval != cr4) { From 4d51c545f2e355a22af75288ee685fd4653bd65c Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sun, 9 Jul 2017 17:53:23 -0400 Subject: [PATCH 071/109] add writable function pointer detection Taken from the public PaX patches. Signed-off-by: Daniel Micay Signed-off-by: Levente Polyak --- scripts/mod/modpost.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index c3bc801d8b2d..89fe6f630793 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -49,6 +49,7 @@ static bool sec_mismatch_warn_only = true; /* Trim EXPORT_SYMBOLs that are unused by in-tree modules */ static bool trim_unused_exports; +static int writable_fptr_count = 0; /* ignore missing files */ static bool ignore_missing_files; /* If set to 1, only warn (instead of error) about missing ns imports */ @@ -814,6 +815,7 @@ enum mismatch { ANY_INIT_TO_ANY_EXIT, ANY_EXIT_TO_ANY_INIT, EXTABLE_TO_NON_TEXT, + DATA_TO_TEXT }; /** @@ -870,6 +872,12 @@ static const struct sectioncheck sectioncheck[] = { .bad_tosec = { ".altinstr_replacement", NULL }, .good_tosec = {ALL_TEXT_SECTIONS , NULL}, .mismatch = EXTABLE_TO_NON_TEXT, +}, +/* Do not reference code from writable data */ +{ + .fromsec = { DATA_SECTIONS, NULL }, + .bad_tosec = { ALL_TEXT_SECTIONS, NULL }, + .mismatch = DATA_TO_TEXT } }; @@ -1035,7 +1043,10 @@ static void default_mismatch_handler(const char *modname, struct elf_info *elf, if (!secref_whitelist(fromsec, fromsym, tosec, tosym)) return; - sec_mismatch_count++; + if (mismatch->mismatch == DATA_TO_TEXT) + writable_fptr_count++; + else + sec_mismatch_count++; if (!tosym[0]) snprintf(taddr_str, sizeof(taddr_str), "0x%x", (unsigned int)taddr); @@ -1069,6 +1080,11 @@ static void default_mismatch_handler(const char *modname, struct elf_info *elf, else error("%s+0x%lx references non-executable section '%s'\n", fromsec, (long)faddr, tosec); + } else if (mismatch->mismatch == DATA_TO_TEXT) { + fprintf(stderr, + "The %s:%s references\n" + "the %s:%s\n", + fromsec, fromsym, tosec, tosym); } } @@ -2387,5 +2403,9 @@ int main(int argc, char **argv) warn("suppressed %u unresolved symbol warnings because there were too many)\n", nr_unresolved - MAX_UNRESOLVED_REPORTS); + if (writable_fptr_count) + warn("modpost: Found %d writable function pointer(s).\n", + writable_fptr_count); + return error_occurred ? 1 : 0; } From 26a2762b46109dbf947d3865efd88f57db1182ff Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sun, 9 Jul 2017 17:20:29 -0400 Subject: [PATCH 072/109] support overriding early audit kernel cmdline --- kernel/audit.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/audit.c b/kernel/audit.c index d3a8268998d7..485d169d8dc4 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1786,6 +1786,9 @@ static int __init audit_enable(char *str) if (audit_default == AUDIT_OFF) audit_initialized = AUDIT_DISABLED; + else if (!audit_ever_enabled) + audit_initialized = AUDIT_UNINITIALIZED; + if (audit_set_enabled(audit_default)) pr_err("audit: error setting audit state (%d)\n", audit_default); From 63093edc4f756367642a1d52b050e2b7a1767b33 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sat, 26 Aug 2017 20:16:03 -0400 Subject: [PATCH 073/109] Revert "mm: revert x86_64 and arm64 ELF_ET_DYN_BASE base changes" This reverts commit aab425db4279aeb83b7911693f0cccbd3644c9fd. --- arch/arm64/include/asm/elf.h | 8 ++------ arch/x86/include/asm/elf.h | 4 ++-- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index d2779d604c7b..c2fcacca8361 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -124,14 +124,10 @@ /* * This is the base location for PIE (ET_DYN with INTERP) loads. On - * 64-bit, this is above 4GB to leave the entire 32-bit address + * 64-bit, this is raised to 4GB to leave the entire 32-bit address * space open for things that want to use the area for 32-bit pointers. */ -#ifdef CONFIG_ARM64_FORCE_52BIT -#define ELF_ET_DYN_BASE (2 * TASK_SIZE_64 / 3) -#else -#define ELF_ET_DYN_BASE (2 * DEFAULT_MAP_WINDOW_64 / 3) -#endif /* CONFIG_ARM64_FORCE_52BIT */ +#define ELF_ET_DYN_BASE 0x100000000UL #ifndef __ASSEMBLER__ diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 2ba5f166e58f..c251278ba009 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -229,11 +229,11 @@ extern int force_personality32; /* * This is the base location for PIE (ET_DYN with INTERP) loads. On - * 64-bit, this is above 4GB to leave the entire 32-bit address + * 64-bit, this is raised to 4GB to leave the entire 32-bit address * space open for things that want to use the area for 32-bit pointers. */ #define ELF_ET_DYN_BASE (mmap_is_ia32() ? 0x000400000UL : \ - (DEFAULT_MAP_WINDOW / 3 * 2)) + 0x100000000UL) /* This yields a mask that user programs can use to figure out what instruction set this CPU supports. This could be done in user space, From 60d1ea047630914232c5dacfa8b397b06b6d80c3 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sun, 21 May 2017 20:30:44 -0400 Subject: [PATCH 074/109] x86: determine stack entropy based on mmap entropy Stack mapping entropy is currently hard-wired to 11 bits of entropy on 32-bit and 22 bits of entropy on 64-bit. The stack itself gains an extra 8 bits of entropy from lower bit randomization within 16 byte alignment constraints. The argument block could have all lower bits randomized but it currently only gets the mapping randomization. Rather than hard-wiring values this switches to using the mmap entropy configuration like the mmap base and executable base, resulting in a range of 8 to 16 bits on 32-bit and 28 to 32 bits on 64-bit depending on kernel configuration and overridable via the sysctl entries. It's worth noting that since these kernel configuration options default to the minimum supported entropy value, the entropy on 32-bit will drop from 11 to 8 bits for builds using the defaults. However, following the configuration seems like the right thing to do regardless. At the very least, changing the defaults for COMPAT (32-bit processes on 64-bit) should be considered due to the larger address space compared to real 32-bit. Signed-off-by: Daniel Micay --- arch/x86/include/asm/elf.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index c251278ba009..8189290a13a5 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -315,8 +315,8 @@ extern unsigned long get_sigframe_size(void); #ifdef CONFIG_X86_32 -#define __STACK_RND_MASK(is32bit) (0x7ff) -#define STACK_RND_MASK (0x7ff) +#define __STACK_RND_MASK(is32bit) ((1UL << mmap_rnd_bits) - 1) +#define STACK_RND_MASK ((1UL << mmap_rnd_bits) - 1) #define ARCH_DLINFO ARCH_DLINFO_IA32 @@ -325,7 +325,11 @@ extern unsigned long get_sigframe_size(void); #else /* CONFIG_X86_32 */ /* 1GB for 64bit, 8MB for 32bit */ -#define __STACK_RND_MASK(is32bit) ((is32bit) ? 0x7ff : 0x3fffff) +#ifdef CONFIG_COMPAT +#define __STACK_RND_MASK(is32bit) ((is32bit) ? (1UL << mmap_rnd_compat_bits) - 1 : (1UL << mmap_rnd_bits) - 1) +#else +#define __STACK_RND_MASK(is32bit) ((1UL << mmap_rnd_bits) - 1) +#endif #define STACK_RND_MASK __STACK_RND_MASK(mmap_is_ia32()) #define ARCH_DLINFO \ From ade5d137f7861519c2c51a4c336289d9cd870a9a Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Mon, 22 May 2017 05:06:20 -0400 Subject: [PATCH 075/109] arm64: determine stack entropy based on mmap entropy Stack mapping entropy is currently hard-wired to 11 bits of entropy on 32-bit and 18 bits of entropy on 64-bit. The stack itself gains an extra 8 bits of entropy from lower bit randomization within 16 byte alignment constraints. The argument block could have all lower bits randomized but it currently only gets the mapping randomization. Rather than hard-wiring values this switches to using the mmap entropy configuration like the mmap base and executable base, resulting in a range of 8 to 16 bits on 32-bit and 18 to 24 bits on 64-bit (with 4k pages and 3 level page tables) depending on kernel configuration and overridable via the sysctl entries. It's worth noting that since these kernel configuration options default to the minimum supported entropy value, the entropy on 32-bit will drop from 11 to 8 bits for builds using the defaults. However, following the configuration seems like the right thing to do regardless. At the very least, changing the defaults for COMPAT (32-bit processes on 64-bit) should be considered due to the larger address space compared to real 32-bit. Signed-off-by: Daniel Micay --- arch/arm64/include/asm/elf.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index c2fcacca8361..ed0d158955e2 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -185,10 +185,10 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm, /* 1GB of VA */ #ifdef CONFIG_COMPAT #define STACK_RND_MASK (test_thread_flag(TIF_32BIT) ? \ - 0x7ff >> (PAGE_SHIFT - 12) : \ - 0x3ffff >> (PAGE_SHIFT - 12)) + ((1UL << mmap_rnd_compat_bits) - 1) >> (PAGE_SHIFT - 12) : \ + ((1UL << mmap_rnd_bits) - 1) >> (PAGE_SHIFT - 12)) #else -#define STACK_RND_MASK (0x3ffff >> (PAGE_SHIFT - 12)) +#define STACK_RND_MASK (((1UL << mmap_rnd_bits) - 1) >> (PAGE_SHIFT - 12)) #endif #ifdef __AARCH64EB__ From 779a7c10613256f5f4615da6f1a88f97563f5b98 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Thu, 11 May 2017 16:02:49 -0400 Subject: [PATCH 076/109] randomize lower bits of the argument block This was based on the PaX RANDUSTACK feature in grsecurity, where all of the lower bits are randomized. PaX keeps 16-byte alignment. Signed-off-by: Daniel Micay [levente@leventepolyak.net: do not randomize with ADDR_NO_RANDOMIZE personality] [levente@leventepolyak.net: adjust for mm: abstract initial stack setup to mm subsystem] Signed-off-by: Levente Polyak [nicolas.bouchinet@oss.cyber.gouv.fr: mm initialisation has moved to mm/vma_exec.c] Signed-off-by: Nicolas Bouchinet --- mm/vma_exec.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/vma_exec.c b/mm/vma_exec.c index 8134e1afca68..4e747ea55e52 100644 --- a/mm/vma_exec.c +++ b/mm/vma_exec.c @@ -7,6 +7,7 @@ #include "vma_internal.h" #include "vma.h" +#include /* * Relocate a VMA downwards by shift bytes. There cannot be any VMAs between @@ -151,6 +152,8 @@ int create_init_stack_vma(struct mm_struct *mm, struct vm_area_struct **vmap, mmap_write_unlock(mm); *vmap = vma; *top_mem_p = vma->vm_end - sizeof(void *); + if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) + *top_mem_p ^= get_random_u32() & ~PAGE_MASK; return 0; err: From 40c1dd89598b627d4fa0501d3aa6279caac26633 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Tue, 30 May 2017 18:03:30 -0400 Subject: [PATCH 077/109] support randomizing the lower bits of brk This adds support for arch_randomize_brk implementations not performing page alignment in order to randomize the lower bits of the brk heap. This idea is taken from PaX but the approach is different. This reuses the existing code and avoids forcing early creation of the heap mapping, avoiding mapping it if it's not used which is the case with many modern allocators based solely on mmap. The malloc implementation can be relied upon to align this as needed to the requirements it has, so using 16 byte alignment here is unnecessary. Signed-off-by: Daniel Micay --- mm/mmap.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/mm/mmap.c b/mm/mmap.c index 843160946aa5..dd6a759ec059 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -153,6 +153,13 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) newbrk = PAGE_ALIGN(brk); oldbrk = PAGE_ALIGN(mm->brk); + /* properly handle unaligned min_brk as an empty heap */ + if (min_brk & ~PAGE_MASK) { + if (brk == min_brk) + newbrk -= PAGE_SIZE; + if (mm->brk == min_brk) + oldbrk -= PAGE_SIZE; + } if (oldbrk == newbrk) { mm->brk = brk; goto success; From 6a7a1c8ebe52cb8aa4da5e1d59445439fba19ea1 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Thu, 1 Jun 2017 03:22:38 -0400 Subject: [PATCH 078/109] mm: randomize lower bits of brk Per PaX, but for this alternate brk randomization approach. As part of the v5.4 linux-hardened rebase, this commit was adapted from the arm64 specific brk randomization to all arches that use the generic topdown mmap layout functions, introduced in e7142bf5d231 ("arm64, mm: make randomization selected by generic topdown mmap layout"). Signed-off-by: Daniel Micay Signed-off-by: Levente Polyak --- mm/util.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/util.c b/mm/util.c index a14de66c9458..28d7931944c2 100644 --- a/mm/util.c +++ b/mm/util.c @@ -391,9 +391,9 @@ unsigned long __weak arch_randomize_brk(struct mm_struct *mm) { /* Is the current task 32bit ? */ if (!IS_ENABLED(CONFIG_64BIT) || is_compat_task()) - return randomize_page(mm->brk, SZ_32M); + return mm->brk + get_random_long() % SZ_32M; - return randomize_page(mm->brk, SZ_1G); + return mm->brk + get_random_long() % SZ_1G; } unsigned long arch_mmap_rnd(void) From 7a918f563af8bdf18227283c8decb04ac39b680f Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Thu, 1 Jun 2017 03:23:06 -0400 Subject: [PATCH 079/109] x86: randomize lower bits of brk Per PaX, but for this alternate brk randomization approach. Signed-off-by: Daniel Micay --- arch/x86/kernel/process.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index ea45acfe4f03..b92b3474470a 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -1027,9 +1027,9 @@ unsigned long arch_align_stack(unsigned long sp) unsigned long arch_randomize_brk(struct mm_struct *mm) { if (mmap_is_ia32()) - return randomize_page(mm->brk, SZ_32M); + return mm->brk + get_random_long() % SZ_32M; - return randomize_page(mm->brk, SZ_1G); + return mm->brk + get_random_long() % SZ_1G; } /* From e5ce446baf60a94c17d525f2e616afe4f9ffb3f7 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Thu, 1 Jun 2017 03:23:39 -0400 Subject: [PATCH 080/109] mm: guarantee brk gap is at least one page Per PaX, but for this alternate brk randomization approach. As part of the v5.4 linux-hardened rebase, this commit was adapted from the arm64 specific brk randomization to all arches that use the generic topdown mmap layout functions, introduced in e7142bf5d231 ("arm64, mm: make randomization selected by generic topdown mmap layout"). Signed-off-by: Daniel Micay Signed-off-by: Levente Polyak --- mm/util.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/util.c b/mm/util.c index 28d7931944c2..e39fe7b338c9 100644 --- a/mm/util.c +++ b/mm/util.c @@ -391,9 +391,9 @@ unsigned long __weak arch_randomize_brk(struct mm_struct *mm) { /* Is the current task 32bit ? */ if (!IS_ENABLED(CONFIG_64BIT) || is_compat_task()) - return mm->brk + get_random_long() % SZ_32M; + return mm->brk + get_random_long() % SZ_32M + PAGE_SIZE; - return mm->brk + get_random_long() % SZ_1G; + return mm->brk + get_random_long() % SZ_1G + PAGE_SIZE; } unsigned long arch_mmap_rnd(void) From 7d63dcdd1fdd813369efcdec7dc8bea565cafa5e Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Thu, 1 Jun 2017 03:23:48 -0400 Subject: [PATCH 081/109] x86: guarantee brk gap is at least one page Per PaX, but for this alternate brk randomization approach. Signed-off-by: Daniel Micay --- arch/x86/kernel/process.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index b92b3474470a..63c96edc60a5 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -1027,9 +1027,9 @@ unsigned long arch_align_stack(unsigned long sp) unsigned long arch_randomize_brk(struct mm_struct *mm) { if (mmap_is_ia32()) - return mm->brk + get_random_long() % SZ_32M; + return mm->brk + get_random_long() % SZ_32M + PAGE_SIZE; - return mm->brk + get_random_long() % SZ_1G; + return mm->brk + get_random_long() % SZ_1G + PAGE_SIZE; } /* From b7d36ecd61762f5b9d8fbe4a875c30132d86e3a5 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Tue, 16 May 2017 18:26:10 -0400 Subject: [PATCH 082/109] restrict device timing side channels Based on the public grsecurity patches. Signed-off-by: Thibaut Sautereau [levente@leventepolyak.net: move sysctl from kernel into fs] Signed-off-by: Levente Polyak Signed-off-by: Nicolas Bouchinet --- fs/inode.c | 13 +++++++++++++ fs/stat.c | 23 ++++++++++++++++++++--- include/linux/capability.h | 5 +++++ include/linux/fs.h | 11 +++++++++++ include/linux/fsnotify.h | 3 +++ kernel/capability.c | 6 ++++++ 6 files changed, 58 insertions(+), 3 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index cc12b68e021b..9209fa27b417 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -168,6 +168,10 @@ late_initcall(mg_debugfs_init); #endif /* CONFIG_DEBUG_FS */ +/* sysctl */ +int device_sidechannel_restrict __read_mostly = 1; +EXPORT_SYMBOL(device_sidechannel_restrict); + /* * Handle nr_inode sysctl */ @@ -200,6 +204,15 @@ static const struct ctl_table inodes_sysctls[] = { .mode = 0444, .proc_handler = proc_nr_inodes, }, + { + .procname = "device_sidechannel_restrict", + .data = &device_sidechannel_restrict, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax_sysadmin, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, }; static int __init init_fs_inode_sysctls(void) diff --git a/fs/stat.c b/fs/stat.c index 89909746bed1..92642e583e8c 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -52,7 +52,10 @@ void fill_mg_cmtime(struct kstat *stat, u32 request_mask, struct inode *inode) return; } - stat->mtime = inode_get_mtime(inode); + if (is_sidechannel_device(inode) && !capable_noaudit(CAP_MKNOD)) + stat->mtime = inode_get_ctime(inode); + else + stat->mtime = inode_get_mtime(inode); stat->ctime.tv_sec = inode->i_ctime_sec; stat->ctime.tv_nsec = (u32)atomic_read(pcn); if (!(stat->ctime.tv_nsec & I_CTIME_QUERIED)) @@ -84,6 +87,7 @@ void generic_fillattr(struct mnt_idmap *idmap, u32 request_mask, { vfsuid_t vfsuid = i_uid_into_vfsuid(idmap, inode); vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode); + bool sidechannel_device = false; stat->dev = inode->i_sb->s_dev; stat->ino = inode->i_ino; @@ -93,13 +97,22 @@ void generic_fillattr(struct mnt_idmap *idmap, u32 request_mask, stat->gid = vfsgid_into_kgid(vfsgid); stat->rdev = inode->i_rdev; stat->size = i_size_read(inode); - stat->atime = inode_get_atime(inode); + + if (is_sidechannel_device(inode) && !capable_noaudit(CAP_MKNOD)) + sidechannel_device = true; + if (sidechannel_device) + stat->atime = inode_get_ctime(inode); + else + stat->atime = inode_get_atime(inode); if (is_mgtime(inode)) { fill_mg_cmtime(stat, request_mask, inode); } else { stat->ctime = inode_get_ctime(inode); - stat->mtime = inode_get_mtime(inode); + if (sidechannel_device) + stat->mtime = inode_get_ctime(inode); + else + stat->mtime = inode_get_mtime(inode); } stat->blksize = i_blocksize(inode); @@ -212,6 +225,10 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat, ret = inode->i_op->getattr(idmap, path, stat, request_mask, query_flags); + if (!ret && is_sidechannel_device(inode) && !capable_noaudit(CAP_MKNOD)) { + stat->atime = stat->ctime; + stat->mtime = stat->ctime; + } if (ret) return ret; } else { diff --git a/include/linux/capability.h b/include/linux/capability.h index 37db92b3d6f8..873416ba884c 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -145,6 +145,7 @@ extern bool has_capability_noaudit(struct task_struct *t, int cap); extern bool has_ns_capability_noaudit(struct task_struct *t, struct user_namespace *ns, int cap); extern bool capable(int cap); +extern bool capable_noaudit(int cap); extern bool ns_capable(struct user_namespace *ns, int cap); extern bool ns_capable_noaudit(struct user_namespace *ns, int cap); extern bool ns_capable_setid(struct user_namespace *ns, int cap); @@ -167,6 +168,10 @@ static inline bool capable(int cap) { return true; } +static inline bool capable_noaudit(int cap) +{ + return true; +} static inline bool ns_capable(struct user_namespace *ns, int cap) { return true; diff --git a/include/linux/fs.h b/include/linux/fs.h index ef17f9e211e4..ba776fdcbee5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3656,4 +3656,15 @@ static inline bool extensible_ioctl_valid(unsigned int cmd_a, return true; } +extern int device_sidechannel_restrict; + +static inline bool is_sidechannel_device(const struct inode *inode) +{ + umode_t mode; + if (!device_sidechannel_restrict) + return false; + mode = inode->i_mode; + return ((S_ISCHR(mode) || S_ISBLK(mode)) && (mode & (S_IROTH | S_IWOTH))); +} + #endif /* _LINUX_FS_H */ diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 079c18bcdbde..eb8a9e769394 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -124,6 +124,9 @@ static inline int fsnotify_file(struct file *file, __u32 mask) if (FMODE_FSNOTIFY_NONE(file->f_mode)) return 0; + if (mask & (FS_ACCESS | FS_MODIFY) && is_sidechannel_device(file_inode(file))) + return 0; + return fsnotify_path(&file->f_path, mask); } diff --git a/kernel/capability.c b/kernel/capability.c index 829f49ae07b9..5bb7ee4028ad 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -416,6 +416,12 @@ bool capable(int cap) return ns_capable(&init_user_ns, cap); } EXPORT_SYMBOL(capable); + +bool capable_noaudit(int cap) +{ + return ns_capable_noaudit(&init_user_ns, cap); +} +EXPORT_SYMBOL(capable_noaudit); #endif /* CONFIG_MULTIUSER */ /** From 2e7c1ab544e2181a511a6f3b4a11cc6bfe636b51 Mon Sep 17 00:00:00 2001 From: Levente Polyak Date: Sun, 6 Sep 2020 20:28:32 +0200 Subject: [PATCH 083/109] sysctl: expose proc_dointvec_minmax_sysadmin as API function Orthogonal to the other sysctl proc functions expose the variant that is checking CAP_SYS_ADMIN on write for consumption in external subsystem's sysctl tables. Signed-off-by: Levente Polyak [nicolas.bouchinet@ssi.gouv.fr: Constify the ctl_table argument as in commit 78eb4ea25cd5fd] Signed-off-by: Nicolas Bouchinet --- include/linux/sysctl.h | 2 ++ kernel/printk/sysctl.c | 9 --------- kernel/sysctl.c | 36 ++++++++++++++++++++++++++++++++++++ 3 files changed, 38 insertions(+), 9 deletions(-) diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 2886fbceb5d6..3e0b05485321 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -84,6 +84,8 @@ int proc_dobool(const struct ctl_table *table, int write, void *buffer, int proc_dointvec(const struct ctl_table *, int, void *, size_t *, loff_t *); int proc_dointvec_minmax(const struct ctl_table *table, int dir, void *buffer, size_t *lenp, loff_t *ppos); +int proc_dointvec_minmax_sysadmin(const struct ctl_table *table, int dir, + void *buffer, size_t *lenp, loff_t *ppos); int proc_dointvec_conv(const struct ctl_table *table, int dir, void *buffer, size_t *lenp, loff_t *ppos, int (*conv)(bool *negp, unsigned long *u_ptr, int *k_ptr, diff --git a/kernel/printk/sysctl.c b/kernel/printk/sysctl.c index f15732e93c2e..1b8a2a652b19 100644 --- a/kernel/printk/sysctl.c +++ b/kernel/printk/sysctl.c @@ -10,15 +10,6 @@ static const int ten_thousand = 10000; -static int proc_dointvec_minmax_sysadmin(const struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) -{ - if (write && !capable(CAP_SYS_ADMIN)) - return -EPERM; - - return proc_dointvec_minmax(table, write, buffer, lenp, ppos); -} - static const struct ctl_table printk_sysctls[] = { { .procname = "printk", diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 57613459d7d0..6a9bc5747a76 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -867,6 +867,35 @@ int proc_douintvec(const struct ctl_table *table, int dir, void *buffer, do_proc_uint_conv); } +/** + * proc_dointvec_minmax_sysadmin - read a vector of integers with min/max values + * checking CAP_SYS_ADMIN on write + * @table: the sysctl table + * @dir: %TRUE if this is a write to the sysctl file + * @buffer: the user buffer + * @lenp: the size of the user buffer + * @ppos: file position + * + * Reads/writes up to table->maxlen/sizeof(unsigned int) integer + * values from/to the user buffer, treated as an ASCII string. + * + * This routine will ensure the values are within the range specified by + * table->extra1 (min) and table->extra2 (max). + * + * Writing is only allowed when the current task has CAP_SYS_ADMIN. + * + * Returns 0 on success, -EPERM on permission failure or -EINVAL on write + * when the range check fails. + */ +int proc_dointvec_minmax_sysadmin(const struct ctl_table *table, int dir, + void *buffer, size_t *lenp, loff_t *ppos) +{ + if (dir && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + return proc_dointvec_minmax(table, dir, buffer, lenp, ppos); +} + /** * proc_dointvec_minmax - read a vector of integers with min/max values * @table: the sysctl table @@ -1321,6 +1350,12 @@ int proc_doulongvec_minmax(const struct ctl_table *table, int dir, return -ENOSYS; } +int proc_dointvec_minmax_sysadmin(const struct ctl_table *table, int dir, + void *buffer, size_t *lenp, loff_t *ppos) +{ + return -ENOSYS; +} + int proc_doulongvec_minmax_conv(const struct ctl_table *table, int dir, void *buffer, size_t *lenp, loff_t *ppos, unsigned long convmul, unsigned long convdiv) @@ -1448,6 +1483,7 @@ EXPORT_SYMBOL(proc_dointvec); EXPORT_SYMBOL(proc_douintvec); EXPORT_SYMBOL(proc_dointvec_minmax); EXPORT_SYMBOL_GPL(proc_douintvec_minmax); +EXPORT_SYMBOL(proc_dointvec_minmax_sysadmin); EXPORT_SYMBOL(proc_dostring); EXPORT_SYMBOL(proc_doulongvec_minmax); EXPORT_SYMBOL(proc_do_large_bitmap); From 34c2c2f8f222d32c014b37f75f8ead62cc381769 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Tue, 16 May 2017 17:51:48 -0400 Subject: [PATCH 084/109] usb: add toggle for disabling newly added USB devices Based on the public grsecurity patches. [thibaut.sautereau@ssi.gouv.fr: Adapt to sysctl code refactoring] Signed-off-by: Thibaut Sautereau Signed-off-by: Levente Polyak [thibaut.sautereau@ssi.gouv.fr: Adapt to sysctl code refactoring] Signed-off-by: Nicolas Bouchinet --- drivers/usb/core/hub.c | 9 +++++++++ include/linux/usb.h | 3 +++ kernel/sysctl.c | 14 ++++++++++++++ 3 files changed, 26 insertions(+) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 24960ba9caa9..d032245ac3b7 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -5387,6 +5387,9 @@ static int descriptors_changed(struct usb_device *udev, return changed; } +/* sysctl */ +int deny_new_usb __read_mostly = 0; + static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus, u16 portchange) { @@ -5448,6 +5451,12 @@ static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus, goto done; return; } + + if (deny_new_usb) { + dev_err(&port_dev->dev, "denied insert of USB device on port %d\n", port1); + goto done; + } + if (hub_is_superspeed(hub->hdev)) unit_load = 150; else diff --git a/include/linux/usb.h b/include/linux/usb.h index 60bd4a8e919a..6504184e4c15 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -2110,6 +2110,9 @@ extern void usb_led_activity(enum usb_led_event ev); static inline void usb_led_activity(enum usb_led_event ev) {} #endif +/* sysctl */ +extern int deny_new_usb; + #endif /* __KERNEL__ */ #endif diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 6a9bc5747a76..c0cd4cbb33f2 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -25,6 +25,9 @@ #ifdef CONFIG_USER_NS #include #endif +#if IS_ENABLED(CONFIG_USB) +#include +#endif /* shared constants to be used in various sysctls */ const int sysctl_vals[] = { 0, 1, 2, 3, 4, 100, 200, 1000, 3000, INT_MAX, 65535, -1 }; @@ -1432,6 +1435,17 @@ static const struct ctl_table sysctl_subsys_table[] = { .extra1 = SYSCTL_NEG_ONE, .extra2 = SYSCTL_ONE, }, +#endif +#if IS_ENABLED(CONFIG_USB) + { + .procname = "deny_new_usb", + .data = &deny_new_usb, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax_sysadmin, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, #endif { .procname = "ngroups_max", From 15314a7f627640bf88a2a21892e2892009063f8e Mon Sep 17 00:00:00 2001 From: Levente Polyak Date: Sun, 6 Sep 2020 21:08:16 +0200 Subject: [PATCH 085/109] usb: implement dedicated subsystem sysctl tables This moves the usb related sysctl knobs to an own usb local sysctl table in order to clean up the global sysctl as well as allow the knob to be exported and referenced appropriately when building the usb components as dedicated modules. Signed-off-by: Levente Polyak --- drivers/usb/core/Makefile | 1 + drivers/usb/core/hub.c | 3 --- drivers/usb/core/sysctl.c | 35 +++++++++++++++++++++++++++++++++++ drivers/usb/core/usb.c | 9 +++++++++ include/linux/usb.h | 10 +++++++++- kernel/sysctl.c | 14 -------------- 6 files changed, 54 insertions(+), 18 deletions(-) create mode 100644 drivers/usb/core/sysctl.c diff --git a/drivers/usb/core/Makefile b/drivers/usb/core/Makefile index 60ea76160122..cb5ed42e12c2 100644 --- a/drivers/usb/core/Makefile +++ b/drivers/usb/core/Makefile @@ -15,6 +15,7 @@ usbcore-$(CONFIG_OF) += of.o usbcore-$(CONFIG_USB_XHCI_SIDEBAND) += offload.o usbcore-$(CONFIG_USB_PCI) += hcd-pci.o usbcore-$(CONFIG_ACPI) += usb-acpi.o +usbcore-$(CONFIG_SYSCTL) += sysctl.o ifdef CONFIG_USB_ONBOARD_DEV usbcore-y += ../misc/onboard_usb_dev_pdevs.o diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index d032245ac3b7..457d28bc990f 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -5387,9 +5387,6 @@ static int descriptors_changed(struct usb_device *udev, return changed; } -/* sysctl */ -int deny_new_usb __read_mostly = 0; - static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus, u16 portchange) { diff --git a/drivers/usb/core/sysctl.c b/drivers/usb/core/sysctl.c new file mode 100644 index 000000000000..813db3f0b1cb --- /dev/null +++ b/drivers/usb/core/sysctl.c @@ -0,0 +1,35 @@ +#include +#include +#include +#include +#include + +static struct ctl_table usb_sysctls[] = { + { + .procname = "deny_new_usb", + .data = &deny_new_usb, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax_sysadmin, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +}; + +static struct ctl_table_header *usb_sysctl_table; + +int usb_register_sysctl(void) +{ + usb_sysctl_table = register_sysctl("kernel", usb_sysctls); + if (!usb_sysctl_table) { + pr_warn("usb: sysctl registration failed\n"); + return -ENOMEM; + } + return 0; +} + +void usb_unregister_sysctl(void) +{ + unregister_sysctl_table(usb_sysctl_table); + usb_sysctl_table = NULL; +} diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c index df166cafe106..e32de22b9aad 100644 --- a/drivers/usb/core/usb.c +++ b/drivers/usb/core/usb.c @@ -73,6 +73,9 @@ MODULE_PARM_DESC(autosuspend, "default autosuspend delay"); #define usb_autosuspend_delay 0 #endif +int deny_new_usb __read_mostly = 0; +EXPORT_SYMBOL(deny_new_usb); + static bool match_endpoint(struct usb_endpoint_descriptor *epd, struct usb_endpoint_descriptor **bulk_in, struct usb_endpoint_descriptor **bulk_out, @@ -1220,6 +1223,9 @@ static int __init usb_init(void) usb_debugfs_init(); usb_acpi_register(); + retval = usb_register_sysctl(); + if (retval) + goto sysctl_init_failed; retval = bus_register(&usb_bus_type); if (retval) goto bus_register_failed; @@ -1259,6 +1265,8 @@ static int __init usb_init(void) bus_notifier_failed: bus_unregister(&usb_bus_type); bus_register_failed: + usb_unregister_sysctl(); +sysctl_init_failed: usb_acpi_unregister(); usb_debugfs_cleanup(); out: @@ -1283,6 +1291,7 @@ static void __exit usb_exit(void) class_unregister(&usbmisc_class); bus_unregister_notifier(&usb_bus_type, &usb_bus_nb); bus_unregister(&usb_bus_type); + usb_unregister_sysctl(); usb_acpi_unregister(); usb_debugfs_cleanup(); idr_destroy(&usb_bus_idr); diff --git a/include/linux/usb.h b/include/linux/usb.h index 6504184e4c15..9ef7409ee97c 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -2110,8 +2110,16 @@ extern void usb_led_activity(enum usb_led_event ev); static inline void usb_led_activity(enum usb_led_event ev) {} #endif -/* sysctl */ +/* sysctl.c */ extern int deny_new_usb; +#ifdef CONFIG_SYSCTL +extern int usb_register_sysctl(void); +extern void usb_unregister_sysctl(void); +#else +static inline int usb_register_sysctl(void) { return 0; } +static inline void usb_unregister_sysctl(void) { } +#endif /* CONFIG_SYSCTL */ + #endif /* __KERNEL__ */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index c0cd4cbb33f2..6a9bc5747a76 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -25,9 +25,6 @@ #ifdef CONFIG_USER_NS #include #endif -#if IS_ENABLED(CONFIG_USB) -#include -#endif /* shared constants to be used in various sysctls */ const int sysctl_vals[] = { 0, 1, 2, 3, 4, 100, 200, 1000, 3000, INT_MAX, 65535, -1 }; @@ -1435,17 +1432,6 @@ static const struct ctl_table sysctl_subsys_table[] = { .extra1 = SYSCTL_NEG_ONE, .extra2 = SYSCTL_ONE, }, -#endif -#if IS_ENABLED(CONFIG_USB) - { - .procname = "deny_new_usb", - .data = &deny_new_usb, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax_sysadmin, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, #endif { .procname = "ngroups_max", From 64c81e4440548b98ee1c6b58565d3736438dd8a3 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Sun, 25 Feb 2018 03:26:45 -0500 Subject: [PATCH 086/109] hard-wire legacy checkreqprot option to 0 The userspace API is left intact for compatibility. Signed-off-by: Levente Polyak --- Documentation/admin-guide/kernel-parameters.txt | 11 ----------- security/selinux/hooks.c | 12 ------------ 2 files changed, 23 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 4cebb97fe82f..897c093061cc 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -778,17 +778,6 @@ Kernel parameters Format: { "0" | "1" } Default: 0 (1 if CONFIG_DEBUG_VM is set) - checkreqprot= [SELINUX] Set initial checkreqprot flag value. - Format: { "0" | "1" } - See security/selinux/Kconfig help text. - 0 -- check protection applied by kernel (includes - any implied execute protection). - 1 -- check protection requested by application. - Default value is set via a kernel config option. - Value can be changed at runtime via - /sys/fs/selinux/checkreqprot. - Setting checkreqprot to 1 is deprecated. - cio_ignore= [S390] See Documentation/arch/s390/common_io.rst for details. diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 6c154a4d94b9..8b1f3501f289 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -141,18 +141,6 @@ static int __init selinux_enabled_setup(char *str) __setup("selinux=", selinux_enabled_setup); #endif -static int __init checkreqprot_setup(char *str) -{ - unsigned long checkreqprot; - - if (!kstrtoul(str, 0, &checkreqprot)) { - if (checkreqprot) - pr_err("SELinux: checkreqprot set to 1 via kernel parameter. This is no longer supported.\n"); - } - return 1; -} -__setup("checkreqprot=", checkreqprot_setup); - /** * selinux_secmark_enabled - Check to see if SECMARK is currently enabled * From 80f8704beae3b9e99f62f4b98c93d43d08456a2e Mon Sep 17 00:00:00 2001 From: Matt Brown Date: Mon, 29 May 2017 17:37:59 -0400 Subject: [PATCH 087/109] security: tty: Add owner user namespace to tty_struct This patch adds struct user_namespace *owner_user_ns to the tty_struct. Then it is set to current_user_ns() in the alloc_tty_struct function. This is done to facilitate capability checks against the original user namespace that allocated the tty. E.g. ns_capable(tty->owner_user_ns,CAP_SYS_ADMIN) This combined with the use of user namespace's will allow hardening protections to be built to mitigate container escapes that utilize TTY ioctls such as TIOCSTI. See: https://bugzilla.redhat.com/show_bug.cgi?id=1411256 Acked-by: Serge Hallyn Reviewed-by: Kees Cook Signed-off-by: Matt Brown --- drivers/tty/tty_io.c | 2 ++ include/linux/tty.h | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index a5d0457e0e28..5659bb2ad472 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -171,6 +171,7 @@ static void free_tty_struct(struct tty_struct *tty) tty_ldisc_deinit(tty); put_device(tty->dev); kvfree(tty->write_buf); + put_user_ns(tty->owner_user_ns); kfree(tty); } @@ -3130,6 +3131,7 @@ struct tty_struct *alloc_tty_struct(struct tty_driver *driver, int idx) tty->index = idx; tty_line_name(driver, idx, tty->name); tty->dev = tty_get_device(tty); + tty->owner_user_ns = get_user_ns(current_user_ns()); return tty; } diff --git a/include/linux/tty.h b/include/linux/tty.h index 0a46e4054dec..99c733852fb2 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -14,6 +14,7 @@ #include #include #include +#include /* @@ -240,6 +241,7 @@ struct tty_struct { struct list_head tty_files; struct work_struct SAK_work; + struct user_namespace *owner_user_ns; } __randomize_layout; /* Each of a tty's open files has private_data pointing to tty_file_private */ From 63b5eb7b48f79951580bb52063d82bfefd627275 Mon Sep 17 00:00:00 2001 From: Matt Brown Date: Mon, 29 May 2017 17:38:00 -0400 Subject: [PATCH 088/109] security: tty: make TIOCSTI ioctl require CAP_SYS_ADMIN This introduces the tiocsti_restrict sysctl, whose default is controlled via CONFIG_SECURITY_TIOCSTI_RESTRICT. When activated, this control restricts all TIOCSTI ioctl calls from non CAP_SYS_ADMIN users. This patch depends on patch 1/2 This patch was inspired from GRKERNSEC_HARDEN_TTY. This patch would have prevented https://bugzilla.redhat.com/show_bug.cgi?id=1411256 under the following conditions: * non-privileged container * container run inside new user namespace Possible effects on userland: There could be a few user programs that would be effected by this change. See: notable programs are: agetty, csh, xemacs and tcsh However, I still believe that this change is worth it given that the Kconfig defaults to n. This will be a feature that is turned on for the same reason that people activate it when using grsecurity. Users of this opt-in feature will realize that they are choosing security over some OS features like unprivileged TIOCSTI ioctls, as should be clear in the Kconfig help message. Threat Model/Patch Rational: >From grsecurity's config for GRKERNSEC_HARDEN_TTY. | There are very few legitimate uses for this functionality and it | has made vulnerabilities in several 'su'-like programs possible in | the past. Even without these vulnerabilities, it provides an | attacker with an easy mechanism to move laterally among other | processes within the same user's compromised session. So if one process within a tty session becomes compromised it can follow that additional processes, that are thought to be in different security boundaries, can be compromised as a result. When using a program like su or sudo, these additional processes could be in a tty session where TTY file descriptors are indeed shared over privilege boundaries. This is also an excellent writeup about the issue: When user namespaces are in use, the check for the capability CAP_SYS_ADMIN is done against the user namespace that originally opened the tty. Acked-by: Serge Hallyn Reviewed-by: Kees Cook Signed-off-by: Matt Brown Signed-off-by: Thibaut Sautereau Signed-off-by: Levente Polyak --- Documentation/admin-guide/sysctl/kernel.rst | 20 ++++++++++++++++++++ drivers/tty/tty_io.c | 16 ++++++++++++++++ security/Kconfig | 13 +++++++++++++ 3 files changed, 49 insertions(+) diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index 0a0384d2376e..2b001d43b137 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -1598,6 +1598,26 @@ allow them to remain in low power states longer. Default is set (1). +tiocsti_restrict +================ + +This toggle indicates whether unprivileged users are prevented from using the +``TIOCSTI`` ioctl to inject commands into other processes which share a tty +session. + += ============================================================================ +0 No restriction, except the default one of only being able to inject commands + into one's own tty. +1 Users must have ``CAP_SYS_ADMIN`` to use the ``TIOCSTI`` ioctl. += ============================================================================ + +When user namespaces are in use, the check for ``CAP_SYS_ADMIN`` is done +against the user namespace that originally opened the tty. + +The kernel config option ``CONFIG_SECURITY_TIOCSTI_RESTRICT`` sets the default +value of ``tiocsti_restrict``. + + traceoff_on_warning =================== diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 5659bb2ad472..dc32c90a4424 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -2257,6 +2257,7 @@ static int tty_fasync(int fd, struct file *filp, int on) } static bool tty_legacy_tiocsti __read_mostly = IS_ENABLED(CONFIG_LEGACY_TIOCSTI); +static int tty_tiocsti_restrict __read_mostly = IS_ENABLED(CONFIG_SECURITY_TIOCSTI_RESTRICT); /** * tiocsti - fake input character * @tty: tty to fake input into @@ -2278,6 +2279,12 @@ static int tiocsti(struct tty_struct *tty, u8 __user *p) if (!tty_legacy_tiocsti && !capable(CAP_SYS_ADMIN)) return -EIO; + if (tty_tiocsti_restrict && + !ns_capable(tty->owner_user_ns, CAP_SYS_ADMIN)) { + dev_warn_ratelimited(tty->dev, + "Denied TIOCSTI ioctl for non-privileged process\n"); + return -EPERM; + } if ((current->signal->tty != tty) && !capable(CAP_SYS_ADMIN)) return -EPERM; if (get_user(ch, p)) @@ -3619,6 +3626,15 @@ static const struct ctl_table tty_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, + { + .procname = "tiocsti_restrict", + .data = &tty_tiocsti_restrict, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax_sysadmin, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, }; /* diff --git a/security/Kconfig b/security/Kconfig index 06c66ff55ba6..72e488f36469 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -82,6 +82,19 @@ config SECURITY_PERF_EVENTS_RESTRICT perf_event_open syscall will be permitted unless it is changed. +config SECURITY_TIOCSTI_RESTRICT + bool "Restrict unprivileged use of tiocsti command injection" + default n + help + This enforces restrictions on unprivileged users injecting commands + into other processes which share a tty session using the TIOCSTI + ioctl. This option makes TIOCSTI use require CAP_SYS_ADMIN. + + If this option is not selected, no restrictions will be enforced + unless the tiocsti_restrict sysctl is explicitly set to (1). + + If you are unsure how to answer this question, answer N. + config SECURITY bool "Enable different security models" depends on SYSFS From 61c856c0a8cbccee3bae74b41d83b868ec3230e6 Mon Sep 17 00:00:00 2001 From: Daniel Micay Date: Wed, 3 May 2017 23:36:14 -0400 Subject: [PATCH 089/109] enable SECURITY_TIOCSTI_RESTRICT by default Signed-off-by: Daniel Micay --- security/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/Kconfig b/security/Kconfig index 72e488f36469..4a9e016fa16d 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -84,7 +84,7 @@ config SECURITY_PERF_EVENTS_RESTRICT config SECURITY_TIOCSTI_RESTRICT bool "Restrict unprivileged use of tiocsti command injection" - default n + default y help This enforces restrictions on unprivileged users injecting commands into other processes which share a tty session using the TIOCSTI From bfc8a686589e98757125ac120155fec8ad0235a5 Mon Sep 17 00:00:00 2001 From: Levente Polyak Date: Mon, 7 May 2018 20:37:55 +0200 Subject: [PATCH 090/109] enable BPF JIT hardening by default (if available) --- kernel/bpf/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 048d275accae..0a614753c68c 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -547,7 +547,7 @@ void bpf_prog_kallsyms_del_all(struct bpf_prog *fp) /* All BPF JIT sysctl knobs here. */ int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_DEFAULT_ON); int bpf_jit_kallsyms __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_DEFAULT_ON); -int bpf_jit_harden __read_mostly; +int bpf_jit_harden __read_mostly = 2; long bpf_jit_limit __read_mostly; long bpf_jit_limit_max __read_mostly; From bd09444017274bdc7b479d9b40f75fb9afec9082 Mon Sep 17 00:00:00 2001 From: Levente Polyak Date: Sun, 4 Nov 2018 18:48:53 +0100 Subject: [PATCH 091/109] enable protected_{fifos,regular} by default --- fs/namei.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index e3429055d39b..1c7cc151c721 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1198,8 +1198,8 @@ static inline void put_link(struct nameidata *nd) static int sysctl_protected_symlinks __read_mostly = 1; static int sysctl_protected_hardlinks __read_mostly = 1; -static int sysctl_protected_fifos __read_mostly; -static int sysctl_protected_regular __read_mostly; +static int sysctl_protected_fifos __read_mostly = 2; +static int sysctl_protected_regular __read_mostly = 2; #ifdef CONFIG_SYSCTL static const struct ctl_table namei_sysctls[] = { From 9163adab33d57004b83c27e161bbbfa2349209c1 Mon Sep 17 00:00:00 2001 From: Thibaut Sautereau Date: Mon, 6 May 2019 17:07:11 +0200 Subject: [PATCH 092/109] modpost: Add CONFIG_DEBUG_WRITABLE_FUNCTION_POINTERS_VERBOSE With 46c7dd56d541 ("modpost: always show verbose warning for section mismatch"), sec_mismatch_verbose was removed which would have printed errors for all writable function pointers during compilation if it hadn't been "#if 0"ed out for quite some time now. Let's introduce a new DEBUG_WRITABLE_FUNCTION_POINTERS_VERBOSE Kconfig option to cleanly control this linux-hardened functionality. Signed-off-by: Thibaut Sautereau Signed-off-by: Levente Polyak --- lib/Kconfig.debug | 3 +++ scripts/Makefile.modpost | 1 + scripts/mod/modpost.c | 21 +++++++++++++++------ 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 8d90402b0444..f58bc6893512 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -526,6 +526,9 @@ config SECTION_MISMATCH_WARN_ONLY If unsure, say Y. +config DEBUG_WRITABLE_FUNCTION_POINTERS_VERBOSE + bool "Enable verbose reporting of writable function pointers" + config DEBUG_FORCE_FUNCTION_ALIGN_64B bool "Force all function address 64B aligned" depends on EXPERT && (X86_64 || ARM64 || PPC32 || PPC64 || ARC || RISCV || S390) diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index d7d45067d08b..b501130c534c 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -47,6 +47,7 @@ modpost-args = \ $(if $(CONFIG_EXTENDED_MODVERSIONS),-x) \ $(if $(CONFIG_MODULE_SRCVERSION_ALL),-a) \ $(if $(CONFIG_SECTION_MISMATCH_WARN_ONLY),,-E) \ + $(if $(CONFIG_DEBUG_WRITABLE_FUNCTION_POINTERS_VERBOSE),-f) \ $(if $(KBUILD_MODPOST_WARN),-w) \ $(if $(KBUILD_NSDEPS),-d modules.nsdeps) \ $(if $(CONFIG_MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS)$(KBUILD_NSDEPS),-N) \ diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 89fe6f630793..b9b1f2e607b4 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -50,6 +50,7 @@ static bool sec_mismatch_warn_only = true; static bool trim_unused_exports; static int writable_fptr_count = 0; +static int writable_fptr_verbose = false; /* ignore missing files */ static bool ignore_missing_files; /* If set to 1, only warn (instead of error) about missing ns imports */ @@ -1043,10 +1044,13 @@ static void default_mismatch_handler(const char *modname, struct elf_info *elf, if (!secref_whitelist(fromsec, fromsym, tosec, tosym)) return; - if (mismatch->mismatch == DATA_TO_TEXT) + if (mismatch->mismatch == DATA_TO_TEXT) { writable_fptr_count++; - else + if (!writable_fptr_verbose) + return; + } else { sec_mismatch_count++; + } if (!tosym[0]) snprintf(taddr_str, sizeof(taddr_str), "0x%x", (unsigned int)taddr); @@ -2293,7 +2297,7 @@ int main(int argc, char **argv) LIST_HEAD(dump_lists); struct dump_list *dl, *dl2; - while ((opt = getopt(argc, argv, "ei:MmnT:to:au:WwENd:xb")) != -1) { + while ((opt = getopt(argc, argv, "ei:fMmnT:to:au:WwENd:xb")) != -1) { switch (opt) { case 'e': external_module = true; @@ -2303,6 +2307,9 @@ int main(int argc, char **argv) dl->file = optarg; list_add_tail(&dl->list, &dump_lists); break; + case 'f': + writable_fptr_verbose = true; + break; case 'M': module_enabled = true; break; @@ -2403,9 +2410,11 @@ int main(int argc, char **argv) warn("suppressed %u unresolved symbol warnings because there were too many)\n", nr_unresolved - MAX_UNRESOLVED_REPORTS); - if (writable_fptr_count) - warn("modpost: Found %d writable function pointer(s).\n", - writable_fptr_count); + if (writable_fptr_count && !writable_fptr_verbose) + warn("modpost: Found %d writable function pointer%s.\n" + "To see full details build your kernel with:\n" + "'make CONFIG_DEBUG_WRITABLE_FUNCTION_POINTERS_VERBOSE=y'\n", + writable_fptr_count, (writable_fptr_count == 1 ? "" : "s")); return error_occurred ? 1 : 0; } From 2bb5226171597b48a1fd2b15a1c052f0311199ea Mon Sep 17 00:00:00 2001 From: Thibaut Sautereau Date: Tue, 7 May 2019 11:46:21 +0200 Subject: [PATCH 093/109] mm: Fix extra_latent_entropy Commit a9cd410a3d29 ("mm/page_alloc.c: memory hotplug: free pages as higher order") changed `static void __init __free_pages_boot_core()` into `void __free_pages_core()`, causing the following section mismatch warning at compile time: WARNING: vmlinux.o(.text+0x180fe4): Section mismatch in reference from the function __free_pages_core() to the variable .meminit.data:extra_latent_entropy The function __free_pages_core() references the variable __meminitdata extra_latent_entropy. This is often because __free_pages_core lacks a __meminitdata annotation or the annotation of extra_latent_entropy is wrong. This commit is an attempt at fixing this issue. I'm not sure it's OK as we are accessing pages that are still managed by the memblock allocator. The prefetching part is not an issue as it only affects struct pages. Signed-off-by: Thibaut Sautereau [levente@leventepolyak.net: most of core MM initialization moved to mm/mm_init.c] Signed-off-by: Levente Polyak [nicolas.bouchinet@ssi.gouv.fr: MAX_ORDER has been renamed to MAX_PAGE_ORDER (see 5e0a760b44417f7ca)] Signed-off-by: Nicolas Bouchinet --- mm/internal.h | 3 +++ mm/mm_init.c | 3 +++ mm/page_alloc.c | 34 +++++++++++++++++++--------------- 3 files changed, 25 insertions(+), 15 deletions(-) diff --git a/mm/internal.h b/mm/internal.h index e1e64b875885..104e88b73525 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -856,6 +856,9 @@ static inline struct folio *page_rmappable_folio(struct page *page) return folio; } +extern void __init __gather_extra_latent_entropy(struct page *page, + unsigned int nr_pages); + static inline void prep_compound_head(struct page *page, unsigned int order) { struct folio *folio = (struct folio *)page; diff --git a/mm/mm_init.c b/mm/mm_init.c index df34797691bd..30f3ee68e3fd 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -1999,6 +1999,7 @@ static void __init deferred_free_pages(unsigned long pfn, for (i = 0; i < nr_pages; i += pageblock_nr_pages) init_pageblock_migratetype(page + i, MIGRATE_MOVABLE, false); + __gather_extra_latent_entropy(page, 1 << MAX_PAGE_ORDER); __free_pages_core(page, MAX_PAGE_ORDER, MEMINIT_EARLY); return; } @@ -2010,6 +2011,7 @@ static void __init deferred_free_pages(unsigned long pfn, if (pageblock_aligned(pfn)) init_pageblock_migratetype(page, MIGRATE_MOVABLE, false); + __gather_extra_latent_entropy(page, 1); __free_pages_core(page, 0, MEMINIT_EARLY); } } @@ -2496,6 +2498,7 @@ void __init memblock_free_pages(unsigned long pfn, unsigned int order) /* pages were reserved and not allocated */ clear_page_tag_ref(page); + __gather_extra_latent_entropy(page, 1 << order); __free_pages_core(page, order, MEMINIT_EARLY); } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 99579c0673ea..8a5d0d3b96bb 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1632,6 +1632,25 @@ static void __free_pages_ok(struct page *page, unsigned int order, free_one_page(zone, page, pfn, order, fpi_flags); } +void __init __gather_extra_latent_entropy(struct page *page, + unsigned int nr_pages) +{ + if (extra_latent_entropy && !PageHighMem(page) && page_to_pfn(page) < 0x100000) { + unsigned long hash = 0; + size_t index, end = PAGE_SIZE * nr_pages / sizeof hash; + const unsigned long *data = lowmem_page_address(page); + + for (index = 0; index < end; index++) + hash ^= hash + data[index]; +#ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY + latent_entropy ^= hash; + add_device_randomness((const void *)&latent_entropy, sizeof(latent_entropy)); +#else + add_device_randomness((const void *)&hash, sizeof(hash)); +#endif + } +} + void __meminit __free_pages_core(struct page *page, unsigned int order, enum meminit_context context) { @@ -1662,21 +1681,6 @@ void __meminit __free_pages_core(struct page *page, unsigned int order, set_page_count(p, 0); } - if (extra_latent_entropy && !PageHighMem(page) && page_to_pfn(page) < 0x100000) { - unsigned long hash = 0; - size_t index, end = PAGE_SIZE * nr_pages / sizeof hash; - const unsigned long *data = lowmem_page_address(page); - - for (index = 0; index < end; index++) - hash ^= hash + data[index]; -#ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY - latent_entropy ^= hash; - add_device_randomness((const void *)&latent_entropy, sizeof(latent_entropy)); -#else - add_device_randomness((const void *)&hash, sizeof(hash)); -#endif - } - /* memblock adjusts totalram_pages() manually. */ atomic_long_add(nr_pages, &page_zone(page)->managed_pages); } From 8d04d5306b2b553317ee4b4b0c21ca648b7e836a Mon Sep 17 00:00:00 2001 From: Thibaut Sautereau Date: Fri, 29 Nov 2019 16:27:14 +0100 Subject: [PATCH 094/109] slub: Extend init_on_alloc to slab caches with constructors This has required some rework during the port to 5.13, due to da844b787245 ("kasan, mm: integrate slab init_on_alloc with HW_TAGS"), and the patch is actually quite simpler now since we do not need to unpoison objects anymore. Signed-off-by: Levente Polyak Signed-off-by: Thibaut Sautereau [nicolas.bouchinet@ssi.gouv.fr: pre/post-alloc hooks moved from mm/slab.h to mm/slub.c (see 6011be59910fb12b7)] Signed-off-by: Nicolas Bouchinet --- mm/slab.h | 2 ++ mm/slub.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/mm/slab.h b/mm/slab.h index 075165fb0cf8..0732b3596615 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -681,8 +681,10 @@ static inline bool slab_want_init_on_alloc(gfp_t flags, struct kmem_cache *c) { if (static_branch_maybe(CONFIG_INIT_ON_ALLOC_DEFAULT_ON, &init_on_alloc)) { +#ifndef CONFIG_SLUB if (c->ctor) return false; +#endif if (c->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) return flags & __GFP_ZERO; return true; diff --git a/mm/slub.c b/mm/slub.c index 69e51c9ac55d..a645103d235e 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -4625,6 +4625,8 @@ bool slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru, if (p[i] && init && (!kasan_init || !kasan_has_integrated_init())) memset(p[i], 0, zero_size); + if (p[i] && init && s->ctor) + s->ctor(p[i]); if (gfpflags_allow_spinning(flags)) kmemleak_alloc_recursive(p[i], s->object_size, 1, s->flags, init_flags); From d9cc0fcc536360bb41ee67124474012969decca5 Mon Sep 17 00:00:00 2001 From: madaidan <50278627+madaidan@users.noreply.github.com> Date: Sun, 9 Feb 2020 00:03:41 +0000 Subject: [PATCH 095/109] net: tcp: add option to disable TCP simultaneous connect This is modified from Brad Spengler/PaX Team's code in the last public patch of grsecurity/PaX based on my understanding of the code. Changes or omissions from the original code are mine and don't reflect the original grsecurity/PaX code. TCP simultaneous connect adds a weakness in Linux's implementation of TCP that allows two clients to connect to each other without either entering a listening state. The weakness allows an attacker to easily prevent a client from connecting to a known server provided the source port for the connection is guessed correctly. As the weakness could be used to prevent an antivirus or IPS from fetching updates, or prevent an SSL gateway from fetching a CRL, it should be eliminated. This creates a net.ipv4.tcp_simult_connect sysctl that when disabled, disables TCP simultaneous connect. Reviewed-by: Thibaut Sautereau Reviewed-by: Levente Polyak Signed-off-by: Levente Polyak --- Documentation/networking/ip-sysctl.rst | 18 ++++++++++++++++++ include/net/tcp.h | 1 + net/ipv4/Kconfig | 23 +++++++++++++++++++++++ net/ipv4/sysctl_net_ipv4.c | 9 +++++++++ net/ipv4/tcp_input.c | 3 ++- 5 files changed, 53 insertions(+), 1 deletion(-) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 6921d8594b84..3dcd0f39cf84 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -909,6 +909,24 @@ tcp_backlog_ack_defer - BOOLEAN Default: 1 (enabled) +tcp_simult_connect - BOOLEAN + Enable TCP simultaneous connect that adds a weakness in Linux's strict + implementation of TCP that allows two clients to connect to each other + without either entering a listening state. The weakness allows an attacker + to easily prevent a client from connecting to a known server provided the + source port for the connection is guessed correctly. + + As the weakness could be used to prevent an antivirus or IPS from fetching + updates, or prevent an SSL gateway from fetching a CRL, it should be + eliminated by disabling this option. Though Linux is one of few operating + systems supporting simultaneous connect, it has no legitimate use in + practice and is rarely supported by firewalls. + + Disabling this may break TCP STUNT which is used by some applications for + NAT traversal. + + Default: Value of CONFIG_TCP_SIMULT_CONNECT_DEFAULT_ON + tcp_slow_start_after_idle - BOOLEAN If enabled, provide RFC2861 behavior and time out the congestion window after an idle period. An idle period is defined at diff --git a/include/net/tcp.h b/include/net/tcp.h index ebc72dce4134..955c1b71c6b4 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -287,6 +287,7 @@ static_assert((1 << ATO_BITS) > TCP_DELACK_MAX); /* sysctl variables for tcp */ extern int sysctl_tcp_max_orphans; extern long sysctl_tcp_mem[3]; +extern int sysctl_tcp_simult_connect; #define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */ #define TCP_RACK_STATIC_REO_WND 0x2 /* Use static RACK reo wnd */ diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 5ef3ea768d9f..845b67882e64 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -770,3 +770,26 @@ config TCP_MD5SIG on the Internet. If unsure, say N. + +config TCP_SIMULT_CONNECT_DEFAULT_ON + bool "Enable TCP simultaneous connect" + help + Enable TCP simultaneous connect that adds a weakness in Linux's strict + implementation of TCP that allows two clients to connect to each other + without either entering a listening state. The weakness allows an + attacker to easily prevent a client from connecting to a known server + provided the source port for the connection is guessed correctly. + + As the weakness could be used to prevent an antivirus or IPS from + fetching updates, or prevent an SSL gateway from fetching a CRL, it + should be eliminated by disabling this option. Though Linux is one of + few operating systems supporting simultaneous connect, it has no + legitimate use in practice and is rarely supported by firewalls. + + Disabling this may break TCP STUNT which is used by some applications + for NAT traversal. + + This setting can be overridden at runtime via the + net.ipv4.tcp_simult_connect sysctl. + + If unsure, say N. diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 5654cc9c8a0b..687dc0058b43 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -622,6 +622,15 @@ static struct ctl_table ipv4_table[] = { .extra1 = &sysctl_fib_sync_mem_min, .extra2 = &sysctl_fib_sync_mem_max, }, + { + .procname = "tcp_simult_connect", + .data = &sysctl_tcp_simult_connect, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, }; static struct ctl_table ipv4_net_table[] = { diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index cb4bcc5a8578..2c9875ef79f3 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -85,6 +85,7 @@ #include int sysctl_tcp_max_orphans __read_mostly = NR_FILE; +int sysctl_tcp_simult_connect __read_mostly = IS_ENABLED(CONFIG_TCP_SIMULT_CONNECT_DEFAULT_ON); #define FLAG_DATA 0x01 /* Incoming frame contained data. */ #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ @@ -7046,7 +7047,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, SKB_DR_SET(reason, TCP_RFC7323_PAWS); goto discard_and_undo; } - if (th->syn) { + if (th->syn && sysctl_tcp_simult_connect) { /* We see SYN without ACK. It is attempt of * simultaneous connect with crossed SYNs. * Particularly, it can be connect to self. From bf6a3b741213847d454ef04a64f80600605c6093 Mon Sep 17 00:00:00 2001 From: Levente Polyak Date: Thu, 11 Mar 2021 23:09:50 +0100 Subject: [PATCH 096/109] ovl: add config to disable unprivileged user namespace mounts When disabled, unprivileged users will not be able to create new overlayfs mounts. This cuts the attack surface if no unprivileged user namespace mounts are required like for running rootless containers. Signed-off-by: Levente Polyak --- fs/overlayfs/Kconfig | 16 ++++++++++++++++ fs/overlayfs/super.c | 2 ++ 2 files changed, 18 insertions(+) diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig index 2ac67e04a6fb..3340e13c959c 100644 --- a/fs/overlayfs/Kconfig +++ b/fs/overlayfs/Kconfig @@ -134,3 +134,19 @@ config OVERLAY_FS_DEBUG Say Y here to enable extra debugging checks in overlayfs. If unsure, say N. + +config OVERLAY_FS_UNPRIVILEGED + bool "Overlayfs: turn on unprivileged user namespace mounts" + default n + depends on OVERLAY_FS + help + When disabled, unprivileged users will not be able to create + new overlayfs mounts. This cuts the attack surface if no + unprivileged user namespace mounts are required like for + running rootless containers. + + Overlayfs has been part of several recent local privilege + escalation exploits, so if you are security-conscious + you want to disable this. + + If unsure, say N. diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 0822987cfb51..b2faf437dc36 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1577,7 +1577,9 @@ struct file_system_type ovl_fs_type = { .name = "overlay", .init_fs_context = ovl_init_fs_context, .parameters = ovl_parameter_spec, +#ifdef CONFIG_OVERLAY_FS_UNPRIVILEGED .fs_flags = FS_USERNS_MOUNT, +#endif .kill_sb = kill_anon_super, }; MODULE_ALIAS_FS("overlay"); From b00764c50139938f338db2c359169b3faf743e75 Mon Sep 17 00:00:00 2001 From: Levente Polyak Date: Tue, 25 May 2021 21:04:47 +0200 Subject: [PATCH 097/109] mm, kfence: bug on data corruption after error report Trigger BUG when kfence encounters data corruption of kfence managed objects. This allows a finer-grained control instead of globally enabling panic_on_warn. Signed-off-by: Levente Polyak --- lib/Kconfig.kfence | 9 +++++++++ mm/kfence/report.c | 5 +++++ 2 files changed, 14 insertions(+) diff --git a/lib/Kconfig.kfence b/lib/Kconfig.kfence index 6fbbebec683a..e494618f7193 100644 --- a/lib/Kconfig.kfence +++ b/lib/Kconfig.kfence @@ -96,4 +96,13 @@ config KFENCE_KUNIT_TEST during boot; say M if you want the test to build as a module; say N if you are unsure. +config KFENCE_BUG_ON_DATA_CORRUPTION + bool "Trigger a BUG when data corruption is detected" + default y + help + Select this option if the kernel should BUG when kfence encounters + data corruption of kfence managed objects after error report. + + If unsure, say Y. + endif # KFENCE diff --git a/mm/kfence/report.c b/mm/kfence/report.c index 787e87c26926..4d5099c5dc10 100644 --- a/mm/kfence/report.c +++ b/mm/kfence/report.c @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -278,6 +279,10 @@ void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *r lockdep_on(); +#ifdef CONFIG_KFENCE_BUG_ON_DATA_CORRUPTION + BUG(); +#endif + check_panic_on_warn("KFENCE"); /* We encountered a memory safety error, taint the kernel! */ From c9872e25d862d249cfd2b14b71ec6708b73174cd Mon Sep 17 00:00:00 2001 From: Thibaut Sautereau Date: Thu, 16 Dec 2021 10:55:13 +0100 Subject: [PATCH 098/109] slub: Bug on free of non-slab objects Before commit d0fe47c64152 ("slub: add back check for free nonslab objects"), freeing a non-slab object used to trigger a BUG if CONFIG_DEBUG_VM was enabled. Now it only warns, which I think is not enough for such a memory corruption. Let's restore the previous behaviour, but tie it to CONFIG_BUG_ON_DATA_CORRUPTION as suggested by Levente. After page folios were introduced in v5.17, this patch was adapted to trigger a bug when the order of the folio is zero instead of when the page is not a compound page, which is not equivalent but respects the semantics of the conversion to page folios and follows the change made to the WARN_ON_ONCE beneath. Suggested-by: Levente Polyak Signed-off-by: Thibaut Sautereau [nicolas.bouchinet@ssi.gouv.fr: kfree moved from mm/slab_common.c to mm/slub.c (see b774d3e326d30fc8e)] Signed-off-by: Nicolas Bouchinet --- mm/slub.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mm/slub.c b/mm/slub.c index a645103d235e..a40b90e9b37b 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -6523,8 +6523,12 @@ static void free_large_kmalloc(struct page *page, void *object) return; } +#ifdef CONFIG_BUG_ON_DATA_CORRUPTION + BUG_ON(order == 0); +#else if (WARN_ON_ONCE(order == 0)) pr_warn_once("object pointer: 0x%p\n", object); +#endif kmemleak_free(object); kasan_kfree_large(object); From 132ae923e4463f05bc5a8cc0770f91d4e2835312 Mon Sep 17 00:00:00 2001 From: Nicolas Bouchinet Date: Fri, 8 Dec 2023 11:53:31 +0100 Subject: [PATCH 099/109] io_uring: set io_uring_disabled sysctl to 1 by default This forces processes to have `CAP_SYS_ADMIN` in order to use io_uring or to be in the io_uring_group. The patch alter the sysctl value range in order that once set to "2" it can't be lowered again. The io_uring_group sysctl option is set to -1 by default, user should define a proper group and set the sysctl properly if they want it configured. Signed-off-by: Nicolas Bouchinet --- io_uring/io_uring.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 97260bca67e7..4f4b7dbb563c 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -124,7 +124,7 @@ static __read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(io_key_has_sqarray, HZ); struct kmem_cache *req_cachep; static struct workqueue_struct *iou_wq __ro_after_init; -static int __read_mostly sysctl_io_uring_disabled; +static int __read_mostly sysctl_io_uring_disabled = 1; static int __read_mostly sysctl_io_uring_group = -1; #ifdef CONFIG_SYSCTL @@ -134,8 +134,9 @@ static const struct ctl_table kernel_io_uring_disabled_table[] = { .data = &sysctl_io_uring_disabled, .maxlen = sizeof(sysctl_io_uring_disabled), .mode = 0644, + /* only handle a transition from default "1" to "2" */ .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, + .extra1 = SYSCTL_TWO, .extra2 = SYSCTL_TWO, }, { From 8e7ee3c056ebf1e472c95dc1e14c261235075efa Mon Sep 17 00:00:00 2001 From: Nicolas Bouchinet Date: Thu, 17 Oct 2024 17:02:29 +0200 Subject: [PATCH 100/109] sysctl: Add proc_dointvec_minmax_sysadmin sanity check Since we expose proc_dointvec_minmax_sysadmin, add it to sanity checking functions. Signed-off-by: Nicolas Bouchinet --- fs/proc/proc_sysctl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 49ab74e0bfde..4be54d32a60a 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -1154,6 +1154,7 @@ static int sysctl_check_table(const char *path, struct ctl_table_header *header) (entry->proc_handler == proc_douintvec) || (entry->proc_handler == proc_douintvec_minmax) || (entry->proc_handler == proc_dointvec_minmax) || + (entry->proc_handler == proc_dointvec_minmax_sysadmin) || (entry->proc_handler == proc_dou8vec_minmax) || (entry->proc_handler == proc_dointvec_jiffies) || (entry->proc_handler == proc_dointvec_userhz_jiffies) || From 95fec49d86cf8f6bd123f38e107a004ee396636c Mon Sep 17 00:00:00 2001 From: Levente Polyak Date: Wed, 14 May 2025 21:00:09 +0200 Subject: [PATCH 101/109] kconfig: enable MSEAL_SYSTEM_MAPPINGS by default Signed-off-by: Levente Polyak --- security/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/security/Kconfig b/security/Kconfig index 4a9e016fa16d..d95435fb7851 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -56,6 +56,7 @@ config MSEAL_SYSTEM_MAPPINGS depends on 64BIT depends on ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS depends on !CHECKPOINT_RESTORE + default y help Apply mseal on system mappings. The system mappings includes vdso, vvar, vvar_vclock, From e90199f9b37adf5c3473006f7569a81d9f37493a Mon Sep 17 00:00:00 2001 From: Nicolas Bouchinet Date: Tue, 14 Oct 2025 15:19:31 +0200 Subject: [PATCH 102/109] mm/slub: Only instrument slab allocation with canaries With barn and sheaves introduction, slab objects are used to prefill or refill sheaves, which are cache of small objects taking the form of an array of pointers to slab objects. Sheaves are then used for quick allocation and free, which consist of shrinking and growing the array index. Thus, there is two vision of allocation state for those objects. While they are seen as allocated by the slab allocator, the sheaf allocator see them as free and then allocates them. We thus need to adapt the slab canary patch in order to avoid sanitizing objects allocation and free from this array. A next patch will add a per-sheave canary random value which would lead to a better tracking of objects overflow. Signed-off-by: Levente Polyak Signed-off-by: Nicolas Bouchinet --- mm/slub.c | 81 ++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 53 insertions(+), 28 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index a40b90e9b37b..0a6c88e345bb 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -924,9 +924,21 @@ static inline void check_canary(struct kmem_cache *s, void *object, unsigned lon unsigned long *canary = get_canary(s, object); BUG_ON(*canary != get_canary_value(canary, value)); } + +static inline void check_set_canary_bulk(struct kmem_cache *s, unsigned int size, void **objects, unsigned long check_value, unsigned long set_value) +{ + for (int i = 0; i < size; i++) { + if (!is_kfence_address(objects[i])) { + check_canary(s, objects[i], check_value); + set_canary(s, objects[i], set_value); + } + } +} + #else #define set_canary(s, object, value) #define check_canary(s, object, value) +#define check_set_canary_bulk(s, size, objects, check_value, set_value) #endif #ifdef CONFIG_SLUB_DEBUG @@ -2908,7 +2920,7 @@ static int refill_sheaf(struct kmem_cache *s, struct slab_sheaf *sheaf, return 0; } -static void sheaf_flush_unused(struct kmem_cache *s, struct slab_sheaf *sheaf); +static void sheaf_flush_unused(struct kmem_cache *s, struct slab_sheaf *sheaf, bool canary); static struct slab_sheaf *alloc_full_sheaf(struct kmem_cache *s, gfp_t gfp) { @@ -2918,7 +2930,7 @@ static struct slab_sheaf *alloc_full_sheaf(struct kmem_cache *s, gfp_t gfp) return NULL; if (refill_sheaf(s, sheaf, gfp | __GFP_NOMEMALLOC | __GFP_NOWARN)) { - sheaf_flush_unused(s, sheaf); + sheaf_flush_unused(s, sheaf, true); free_empty_sheaf(s, sheaf); return NULL; } @@ -2966,6 +2978,7 @@ static unsigned int __sheaf_flush_main_batch(struct kmem_cache *s) local_unlock(&s->cpu_sheaves->lock); + check_set_canary_bulk(s, batch, &objects[0], s->random_active, s->random_inactive); __kmem_cache_free_bulk(s, batch, &objects[0]); stat_add(s, SHEAF_FLUSH, batch); @@ -3011,20 +3024,24 @@ static bool sheaf_try_flush_main(struct kmem_cache *s) * necessary when flushing cpu's sheaves (both spare and main) during cpu * hotremove as the cpu is not executing anymore. */ -static void sheaf_flush_unused(struct kmem_cache *s, struct slab_sheaf *sheaf) +static void sheaf_flush_unused(struct kmem_cache *s, struct slab_sheaf *sheaf, bool canary) { if (!sheaf->size) return; stat_add(s, SHEAF_FLUSH, sheaf->size); + if (canary) { + check_set_canary_bulk(s, sheaf->size, &sheaf->objects[0], s->random_active, s->random_inactive); + } __kmem_cache_free_bulk(s, sheaf->size, &sheaf->objects[0]); sheaf->size = 0; } static bool __rcu_free_sheaf_prepare(struct kmem_cache *s, - struct slab_sheaf *sheaf) + struct slab_sheaf *sheaf, + bool canary) { bool init = slab_want_init_on_free(s); void **p = &sheaf->objects[0]; @@ -3037,7 +3054,7 @@ static bool __rcu_free_sheaf_prepare(struct kmem_cache *s, memcg_slab_free_hook(s, slab, p + i, 1); alloc_tagging_slab_free_hook(s, slab, p + i, 1); - if (unlikely(!slab_free_hook(s, p[i], init, true))) { + if (unlikely(!slab_free_hook(s, p[i], init, true, canary))) { p[i] = p[--sheaf->size]; continue; } @@ -3059,9 +3076,9 @@ static void rcu_free_sheaf_nobarn(struct rcu_head *head) sheaf = container_of(head, struct slab_sheaf, rcu_head); s = sheaf->cache; - __rcu_free_sheaf_prepare(s, sheaf); + __rcu_free_sheaf_prepare(s, sheaf, true); - sheaf_flush_unused(s, sheaf); + sheaf_flush_unused(s, sheaf, false); free_empty_sheaf(s, sheaf); } @@ -3092,7 +3109,7 @@ static void pcs_flush_all(struct kmem_cache *s) local_unlock(&s->cpu_sheaves->lock); if (spare) { - sheaf_flush_unused(s, spare); + sheaf_flush_unused(s, spare, true); free_empty_sheaf(s, spare); } @@ -3109,9 +3126,9 @@ static void __pcs_flush_all_cpu(struct kmem_cache *s, unsigned int cpu) pcs = per_cpu_ptr(s->cpu_sheaves, cpu); /* The cpu is not executing anymore so we don't need pcs->lock */ - sheaf_flush_unused(s, pcs->main); + sheaf_flush_unused(s, pcs->main, true); if (pcs->spare) { - sheaf_flush_unused(s, pcs->spare); + sheaf_flush_unused(s, pcs->spare, true); free_empty_sheaf(s, pcs->spare); pcs->spare = NULL; } @@ -3350,7 +3367,7 @@ static void barn_shrink(struct kmem_cache *s, struct node_barn *barn) spin_unlock_irqrestore(&barn->lock, flags); list_for_each_entry_safe(sheaf, sheaf2, &full_list, barn_list) { - sheaf_flush_unused(s, sheaf); + sheaf_flush_unused(s, sheaf, true); free_empty_sheaf(s, sheaf); } @@ -4707,7 +4724,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, * we must be very low on memory so don't bother * with the barn */ - sheaf_flush_unused(s, empty); + sheaf_flush_unused(s, empty, true); free_empty_sheaf(s, empty); } } else { @@ -4927,6 +4944,7 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list { void *object; bool init = false; + bool from_pcs = false; s = slab_pre_alloc_hook(s, gfpflags); if (unlikely(!s)) @@ -4937,6 +4955,8 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list goto out; object = alloc_from_pcs(s, gfpflags, node); + if (object) + from_pcs = true; if (!object) object = __slab_alloc_node(s, gfpflags, node, addr, orig_size); @@ -4956,7 +4976,7 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list init = slab_want_init_on_alloc(gfpflags, s); } - if (object) { + if (object && !from_pcs) { check_canary(s, object, s->random_inactive); set_canary(s, object, s->random_active); } @@ -5128,7 +5148,7 @@ kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int size) if (sheaf->size < size && __prefill_sheaf_pfmemalloc(s, sheaf, gfp)) { - sheaf_flush_unused(s, sheaf); + sheaf_flush_unused(s, sheaf, true); free_empty_sheaf(s, sheaf); sheaf = NULL; } @@ -5155,7 +5175,7 @@ void kmem_cache_return_sheaf(struct kmem_cache *s, gfp_t gfp, if (unlikely((sheaf->capacity != s->sheaf_capacity) || sheaf->pfmemalloc)) { - sheaf_flush_unused(s, sheaf); + sheaf_flush_unused(s, sheaf, true); kfree(sheaf); return; } @@ -5183,7 +5203,7 @@ void kmem_cache_return_sheaf(struct kmem_cache *s, gfp_t gfp, */ if (!barn || data_race(barn->nr_full) >= MAX_FULL_SHEAVES || refill_sheaf(s, sheaf, gfp)) { - sheaf_flush_unused(s, sheaf); + sheaf_flush_unused(s, sheaf, true); free_empty_sheaf(s, sheaf); return; } @@ -5804,7 +5824,7 @@ __pcs_replace_full_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs, pcs->spare = NULL; local_unlock(&s->cpu_sheaves->lock); - sheaf_flush_unused(s, to_flush); + sheaf_flush_unused(s, to_flush, true); empty = to_flush; goto got_empty; } @@ -5917,7 +5937,7 @@ static void rcu_free_sheaf(struct rcu_head *head) * If it returns true, there was at least one object from pfmemalloc * slab so simply flush everything. */ - if (__rcu_free_sheaf_prepare(s, sheaf)) + if (__rcu_free_sheaf_prepare(s, sheaf, false)) goto flush; n = get_node(s, sheaf->node); @@ -5944,7 +5964,7 @@ static void rcu_free_sheaf(struct rcu_head *head) flush: stat(s, BARN_PUT_FAIL); - sheaf_flush_unused(s, sheaf); + sheaf_flush_unused(s, sheaf, true); empty: if (barn && data_race(barn->nr_empty) < MAX_EMPTY_SHEAVES) { @@ -6092,7 +6112,7 @@ static void free_to_pcs_bulk(struct kmem_cache *s, size_t size, void **p) memcg_slab_free_hook(s, slab, p + i, 1); alloc_tagging_slab_free_hook(s, slab, p + i, 1); - if (unlikely(!slab_free_hook(s, p[i], init, false))) { + if (unlikely(!slab_free_hook(s, p[i], init, false, false))) { p[i] = p[--size]; continue; } @@ -6179,11 +6199,13 @@ static void free_to_pcs_bulk(struct kmem_cache *s, size_t size, void **p) * many full sheaves, free the rest to slab pages */ fallback: + check_set_canary_bulk(s, size, p, s->random_active, s->random_inactive); __kmem_cache_free_bulk(s, size, p); stat_add(s, FREE_SLOWPATH, size); flush_remote: if (remote_nr) { + check_set_canary_bulk(s, remote_nr, &remote_objects[0], s->random_active, s->random_inactive); __kmem_cache_free_bulk(s, remote_nr, &remote_objects[0]); stat_add(s, FREE_SLOWPATH, remote_nr); if (i < size) { @@ -6277,6 +6299,12 @@ void slab_free(struct kmem_cache *s, struct slab *slab, void *object, if (is_kfence_address(object)) canary = false; + /* Do not check or set canary if the object is freed back to pcs. */ + if (s->cpu_sheaves && likely(!IS_ENABLED(CONFIG_NUMA) || + slab_nid(slab) == numa_mem_id())) { + canary = false; + } + if (unlikely(!slab_free_hook(s, object, slab_want_init_on_free(s), false, canary))) return; @@ -7328,7 +7356,7 @@ static inline int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, void **p) { - int i, k; + int i; if (IS_ENABLED(CONFIG_SLUB_TINY) || kmem_cache_debug(s)) { for (i = 0; i < size; i++) { @@ -7362,12 +7390,7 @@ int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, } } - for (k = 0; k < i; k++) { - if (!is_kfence_address(p[k])) { - check_canary(s, p[k], s->random_inactive); - set_canary(s, p[k], s->random_active); - } - } + check_set_canary_bulk(s, i, p, s->random_inactive, s->random_active); return i; @@ -7417,8 +7440,10 @@ int kmem_cache_alloc_bulk_noprof(struct kmem_cache *s, gfp_t flags, size_t size, * the percpu sheaves, we have bigger problems. */ if (unlikely(__kmem_cache_alloc_bulk(s, flags, size - i, p + i) == 0)) { - if (i > 0) + if (i > 0) { + check_set_canary_bulk(s, i, p, s->random_active, s->random_inactive); __kmem_cache_free_bulk(s, i, p); + } if (kfence_obj) __kfence_free(kfence_obj); return 0; From 2a8e1c2cc1176579ceb79d91a36f91e2c65afdbb Mon Sep 17 00:00:00 2001 From: Nicolas Bouchinet Date: Thu, 15 Jan 2026 15:31:34 +0100 Subject: [PATCH 103/109] mm/slub: Add canary on sheaf alloc and free Sheaf allocation is an allocation cache that uses pre-allocated slab objects for faster free and allocation from a sheaf array. This patch adds a sheaf canary in order to detect small overflows and double-free of sheaf objects. Signed-off-by: Levente Polyak Signed-off-by: Nicolas Bouchinet --- mm/slab.h | 1 + mm/slub.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 61 insertions(+), 4 deletions(-) diff --git a/mm/slab.h b/mm/slab.h index 0732b3596615..4db3f9091b1c 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -226,6 +226,7 @@ struct kmem_cache { #ifdef CONFIG_SLAB_CANARY unsigned long random_active; unsigned long random_inactive; + unsigned long sheaf_random_active; #endif #ifdef CONFIG_NUMA diff --git a/mm/slub.c b/mm/slub.c index 0a6c88e345bb..84fa0360d9bb 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -4841,6 +4841,11 @@ void *alloc_from_pcs(struct kmem_cache *s, gfp_t gfp, int node) pcs->main->size--; + if (!is_kfence_address(object)) { + check_canary(s, object, s->random_active); + set_canary(s, object, s->sheaf_random_active); + } + local_unlock(&s->cpu_sheaves->lock); stat(s, ALLOC_FASTPATH); @@ -4913,6 +4918,8 @@ unsigned int alloc_from_pcs_bulk(struct kmem_cache *s, gfp_t gfp, size_t size, main->size -= batch; memcpy(p, main->objects + main->size, batch * sizeof(void *)); + check_set_canary_bulk(s, batch, p, s->random_active, s->sheaf_random_active); + local_unlock(&s->cpu_sheaves->lock); stat_add(s, ALLOC_FASTPATH, batch); @@ -4945,6 +4952,7 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list void *object; bool init = false; bool from_pcs = false; + bool from_pcs_failed = false; s = slab_pre_alloc_hook(s, gfpflags); if (unlikely(!s)) @@ -4954,12 +4962,16 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list if (unlikely(object)) goto out; - object = alloc_from_pcs(s, gfpflags, node); - if (object) + if (s->cpu_sheaves) { + object = alloc_from_pcs(s, gfpflags, node); from_pcs = true; + } - if (!object) + if (!object) { object = __slab_alloc_node(s, gfpflags, node, addr, orig_size); + if (from_pcs) + from_pcs_failed = true; + } maybe_wipe_obj_freeptr(s, object); @@ -4976,9 +4988,18 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list init = slab_want_init_on_alloc(gfpflags, s); } + /* + * linux-hardened: In the scenario where an object is intended to be allocated + * from a sheaf but it's allocation failed, it is instead directly allocated from the + * slab allocator but will later be freed back to a sheaf. We thus need to + * set the canary to a sheaf_random_active. + */ if (object && !from_pcs) { check_canary(s, object, s->random_inactive); set_canary(s, object, s->random_active); + } else if (object && from_pcs_failed) { + check_canary(s, object, s->random_inactive); + set_canary(s, object, s->sheaf_random_active); } out: @@ -5296,6 +5317,11 @@ kmem_cache_alloc_from_sheaf_noprof(struct kmem_cache *s, gfp_t gfp, /* add __GFP_NOFAIL to force successful memcg charging */ slab_post_alloc_hook(s, NULL, gfp | __GFP_NOFAIL, 1, &ret, init, s->object_size); + + if (!is_kfence_address(ret)) { + check_canary(s, ret, s->random_active); + set_canary(s, ret, s->sheaf_random_active); + } out: trace_kmem_cache_alloc(_RET_IP_, ret, s, gfp, NUMA_NO_NODE); @@ -5906,6 +5932,10 @@ bool free_to_pcs(struct kmem_cache *s, void *object, bool allow_spin) return false; } + if (!is_kfence_address(object)) { + check_canary(s, object, s->sheaf_random_active); + set_canary(s, object, s->random_active); + } pcs->main->objects[pcs->main->size++] = object; local_unlock(&s->cpu_sheaves->lock); @@ -6061,6 +6091,11 @@ bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj) * Since we flush immediately when size reaches capacity, we never reach * this with size already at capacity, so no OOB write is possible. */ + + if (!is_kfence_address(obj)) { + check_canary(s, obj, s->sheaf_random_active); + set_canary(s, obj, s->random_active); + } rcu_sheaf->objects[rcu_sheaf->size++] = obj; if (likely(rcu_sheaf->size < s->sheaf_capacity)) { @@ -6117,6 +6152,11 @@ static void free_to_pcs_bulk(struct kmem_cache *s, size_t size, void **p) continue; } + if (!is_kfence_address(p[i])) { + check_canary(s, p[i], s->sheaf_random_active); + set_canary(s, p[i], s->random_active); + } + if (unlikely((IS_ENABLED(CONFIG_NUMA) && slab_nid(slab) != node) || slab_test_pfmemalloc(slab))) { remote_objects[remote_nr] = p[i]; @@ -6291,6 +6331,7 @@ void slab_free(struct kmem_cache *s, struct slab *slab, void *object, unsigned long addr) { bool canary = true; + bool to_sheaf = false; memcg_slab_free_hook(s, slab, &object, 1); alloc_tagging_slab_free_hook(s, slab, &object, 1); @@ -6299,7 +6340,7 @@ void slab_free(struct kmem_cache *s, struct slab *slab, void *object, if (is_kfence_address(object)) canary = false; - /* Do not check or set canary if the object is freed back to pcs. */ + /* Defer canary checking if the object is freed back to pcs. */ if (s->cpu_sheaves && likely(!IS_ENABLED(CONFIG_NUMA) || slab_nid(slab) == numa_mem_id())) { canary = false; @@ -6310,10 +6351,21 @@ void slab_free(struct kmem_cache *s, struct slab *slab, void *object, if (likely(!IS_ENABLED(CONFIG_NUMA) || slab_nid(slab) == numa_mem_id()) && likely(!slab_test_pfmemalloc(slab))) { + to_sheaf = true; if (likely(free_to_pcs(s, object, true))) return; } + /* + * linux-hardened: In this scenario, the object was intended to be freed to a + * sheaf but it failed. The object will thus be freed back to the slab allocator, + * the canary thus need to be checked as a sheaf one and set back to a slab inactive one. + */ + if (to_sheaf && canary) { + check_canary(s, object, s->sheaf_random_active); + set_canary(s, object, s->random_inactive); + } + __slab_free(s, slab, object, object, 1, addr); stat(s, FREE_SLOWPATH); } @@ -8633,6 +8685,10 @@ int do_kmem_cache_create(struct kmem_cache *s, const char *name, #ifdef CONFIG_SLAB_CANARY s->random_active = get_random_long(); s->random_inactive = get_random_long(); + if (__slub_debug_enabled()) + s->sheaf_random_active = s->random_active; + else + s->sheaf_random_active = get_random_long(); #endif s->align = args->align; s->ctor = args->ctor; From 156b91c7cc8e9fdb3c3de6e05e9bd4c4a3eb413c Mon Sep 17 00:00:00 2001 From: Nicolas Bouchinet Date: Fri, 6 Mar 2026 09:42:17 +0100 Subject: [PATCH 104/109] 7.0 canary adaptation - Invert canary logic, we now only track objects in their inactive state allocated objects are always tagged as random_active. Free objects are tagged as sheaf_random_inactive or random_inactive depending on if they are in a sheaf or in a slab freelist. The logic inversion should make the patch way more stable. - Fixes slab_debug canary crash in early allocation state when the bootstrap sheaf is in use. - Fixes slabobj_ext offset computaion when stored in objects. - Always instrument sheaf_canary, even when slab_debug is active. - Fixes canary mismatch in some free path. - Adapt canary to new alloc/free paths. - Fixes kmem_cache_refill_sheaf instrumentation. Signed-off-by: Levente Polyak Signed-off-by: Nicolas Bouchinet --- mm/slab.h | 2 +- mm/slub.c | 121 ++++++++++++++++++++++-------------------------------- 2 files changed, 50 insertions(+), 73 deletions(-) diff --git a/mm/slab.h b/mm/slab.h index 4db3f9091b1c..7ea5a143ca21 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -226,7 +226,7 @@ struct kmem_cache { #ifdef CONFIG_SLAB_CANARY unsigned long random_active; unsigned long random_inactive; - unsigned long sheaf_random_active; + unsigned long sheaf_random_inactive; #endif #ifdef CONFIG_NUMA diff --git a/mm/slub.c b/mm/slub.c index 84fa0360d9bb..63efc6059d2a 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -886,6 +886,10 @@ static unsigned int obj_exts_offset_in_object(struct kmem_cache *s) if (slub_debug_orig_size(s)) offset += sizeof(unsigned long); +#ifdef CONFIG_SLAB_CANARY + offset += sizeof(void *); +#endif + offset += kasan_metadata_size(s, false); return offset; @@ -925,19 +929,24 @@ static inline void check_canary(struct kmem_cache *s, void *object, unsigned lon BUG_ON(*canary != get_canary_value(canary, value)); } -static inline void check_set_canary_bulk(struct kmem_cache *s, unsigned int size, void **objects, unsigned long check_value, unsigned long set_value) +static inline void check_set_canary(struct kmem_cache *s, void *object, unsigned long check_value, unsigned long set_value) { - for (int i = 0; i < size; i++) { - if (!is_kfence_address(objects[i])) { - check_canary(s, objects[i], check_value); - set_canary(s, objects[i], set_value); - } + if (!is_kfence_address(object)) { + check_canary(s, object, check_value); + set_canary(s, object, set_value); } } +static inline void check_set_canary_bulk(struct kmem_cache *s, unsigned int size, void **objects, unsigned long check_value, unsigned long set_value) +{ + for (int i = 0; i < size; i++) + check_set_canary(s, objects[i], check_value, set_value); +} + #else #define set_canary(s, object, value) #define check_canary(s, object, value) +#define check_set_canary(s, object, check_value, set_value) #define check_set_canary_bulk(s, size, objects, check_value, set_value) #endif @@ -2910,6 +2919,12 @@ static int refill_sheaf(struct kmem_cache *s, struct slab_sheaf *sheaf, filled = refill_objects(s, &sheaf->objects[sheaf->size], gfp, to_fill, to_fill); + /* + * linux-hardened: refill_objects directly picks objects from slab freelist, + * we thus need to manually instrument them here for sheaf. + */ + check_set_canary_bulk(s, filled, &sheaf->objects[sheaf->size], s->random_inactive, s->sheaf_random_inactive); + sheaf->size += filled; stat_add(s, SHEAF_REFILL, filled); @@ -2978,7 +2993,7 @@ static unsigned int __sheaf_flush_main_batch(struct kmem_cache *s) local_unlock(&s->cpu_sheaves->lock); - check_set_canary_bulk(s, batch, &objects[0], s->random_active, s->random_inactive); + check_set_canary_bulk(s, batch, &objects[0], s->sheaf_random_inactive, s->random_inactive); __kmem_cache_free_bulk(s, batch, &objects[0]); stat_add(s, SHEAF_FLUSH, batch); @@ -3032,7 +3047,7 @@ static void sheaf_flush_unused(struct kmem_cache *s, struct slab_sheaf *sheaf, b stat_add(s, SHEAF_FLUSH, sheaf->size); if (canary) { - check_set_canary_bulk(s, sheaf->size, &sheaf->objects[0], s->random_active, s->random_inactive); + check_set_canary_bulk(s, sheaf->size, &sheaf->objects[0], s->sheaf_random_inactive, s->random_inactive); } __kmem_cache_free_bulk(s, sheaf->size, &sheaf->objects[0]); @@ -4840,11 +4855,7 @@ void *alloc_from_pcs(struct kmem_cache *s, gfp_t gfp, int node) } pcs->main->size--; - - if (!is_kfence_address(object)) { - check_canary(s, object, s->random_active); - set_canary(s, object, s->sheaf_random_active); - } + check_set_canary(s, object, s->sheaf_random_inactive, s->random_active); local_unlock(&s->cpu_sheaves->lock); @@ -4918,7 +4929,7 @@ unsigned int alloc_from_pcs_bulk(struct kmem_cache *s, gfp_t gfp, size_t size, main->size -= batch; memcpy(p, main->objects + main->size, batch * sizeof(void *)); - check_set_canary_bulk(s, batch, p, s->random_active, s->sheaf_random_active); + check_set_canary_bulk(s, batch, p, s->sheaf_random_inactive, s->random_active); local_unlock(&s->cpu_sheaves->lock); @@ -4951,8 +4962,6 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list { void *object; bool init = false; - bool from_pcs = false; - bool from_pcs_failed = false; s = slab_pre_alloc_hook(s, gfpflags); if (unlikely(!s)) @@ -4962,15 +4971,11 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list if (unlikely(object)) goto out; - if (s->cpu_sheaves) { - object = alloc_from_pcs(s, gfpflags, node); - from_pcs = true; - } + object = alloc_from_pcs(s, gfpflags, node); if (!object) { object = __slab_alloc_node(s, gfpflags, node, addr, orig_size); - if (from_pcs) - from_pcs_failed = true; + check_set_canary(s, object, s->random_inactive, s->random_active); } maybe_wipe_obj_freeptr(s, object); @@ -4988,20 +4993,6 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list init = slab_want_init_on_alloc(gfpflags, s); } - /* - * linux-hardened: In the scenario where an object is intended to be allocated - * from a sheaf but it's allocation failed, it is instead directly allocated from the - * slab allocator but will later be freed back to a sheaf. We thus need to - * set the canary to a sheaf_random_active. - */ - if (object && !from_pcs) { - check_canary(s, object, s->random_inactive); - set_canary(s, object, s->random_active); - } else if (object && from_pcs_failed) { - check_canary(s, object, s->random_inactive); - set_canary(s, object, s->sheaf_random_active); - } - out: /* * When init equals 'true', like for kzalloc() family, only @@ -5133,6 +5124,9 @@ kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int size) return NULL; } + /* linux-hardened: We are prefilling a sheaf, the objects needs to be instrumented to sheaf_random_inactive. */ + check_set_canary_bulk(s, size, &sheaf->objects[0], s->random_active, s->sheaf_random_inactive); + sheaf->size = size; return sheaf; @@ -5267,6 +5261,8 @@ int kmem_cache_refill_sheaf(struct kmem_cache *s, gfp_t gfp, &sheaf->objects[sheaf->size])) { return -ENOMEM; } + + check_set_canary_bulk(s, sheaf->capacity - sheaf->size, &sheaf->objects[sheaf->size], s->random_active, s->sheaf_random_inactive); sheaf->size = sheaf->capacity; return 0; @@ -5313,15 +5309,12 @@ kmem_cache_alloc_from_sheaf_noprof(struct kmem_cache *s, gfp_t gfp, if (likely(!ret)) ret = sheaf->objects[--sheaf->size]; + check_set_canary(s, ret, s->sheaf_random_inactive, s->random_active); + init = slab_want_init_on_alloc(gfp, s); /* add __GFP_NOFAIL to force successful memcg charging */ slab_post_alloc_hook(s, NULL, gfp | __GFP_NOFAIL, 1, &ret, init, s->object_size); - - if (!is_kfence_address(ret)) { - check_canary(s, ret, s->random_active); - set_canary(s, ret, s->sheaf_random_active); - } out: trace_kmem_cache_alloc(_RET_IP_, ret, s, gfp, NUMA_NO_NODE); @@ -5508,6 +5501,7 @@ void *kmalloc_nolock_noprof(size_t size, gfp_t gfp_flags, int node) } success: + check_set_canary(s, ret, s->random_inactive, s->random_active); maybe_wipe_obj_freeptr(s, ret); slab_post_alloc_hook(s, NULL, alloc_gfp, 1, &ret, slab_want_init_on_alloc(alloc_gfp, s), size); @@ -5932,10 +5926,7 @@ bool free_to_pcs(struct kmem_cache *s, void *object, bool allow_spin) return false; } - if (!is_kfence_address(object)) { - check_canary(s, object, s->sheaf_random_active); - set_canary(s, object, s->random_active); - } + check_set_canary(s, object, s->random_active, s->sheaf_random_inactive); pcs->main->objects[pcs->main->size++] = object; local_unlock(&s->cpu_sheaves->lock); @@ -6092,10 +6083,7 @@ bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj) * this with size already at capacity, so no OOB write is possible. */ - if (!is_kfence_address(obj)) { - check_canary(s, obj, s->sheaf_random_active); - set_canary(s, obj, s->random_active); - } + check_set_canary(s, obj, s->random_active, s->sheaf_random_inactive); rcu_sheaf->objects[rcu_sheaf->size++] = obj; if (likely(rcu_sheaf->size < s->sheaf_capacity)) { @@ -6152,11 +6140,6 @@ static void free_to_pcs_bulk(struct kmem_cache *s, size_t size, void **p) continue; } - if (!is_kfence_address(p[i])) { - check_canary(s, p[i], s->sheaf_random_active); - set_canary(s, p[i], s->random_active); - } - if (unlikely((IS_ENABLED(CONFIG_NUMA) && slab_nid(slab) != node) || slab_test_pfmemalloc(slab))) { remote_objects[remote_nr] = p[i]; @@ -6213,6 +6196,8 @@ static void free_to_pcs_bulk(struct kmem_cache *s, size_t size, void **p) main = pcs->main; batch = min(size, s->sheaf_capacity - main->size); + check_set_canary_bulk(s, batch, p, s->random_active, s->sheaf_random_inactive); + memcpy(main->objects + main->size, p, batch * sizeof(void *)); main->size += batch; @@ -6300,6 +6285,7 @@ static void free_deferred_objects(struct irq_work *work) */ set_freepointer(s, x, NULL); + check_set_canary(s, x, s->random_active, s->random_inactive); __slab_free(s, slab, x, x, 1, _THIS_IP_); stat(s, FREE_SLOWPATH); } @@ -6331,40 +6317,34 @@ void slab_free(struct kmem_cache *s, struct slab *slab, void *object, unsigned long addr) { bool canary = true; - bool to_sheaf = false; memcg_slab_free_hook(s, slab, &object, 1); alloc_tagging_slab_free_hook(s, slab, &object, 1); /* Make sure canaries are not used on kfence objects. */ - if (is_kfence_address(object)) - canary = false; - /* Defer canary checking if the object is freed back to pcs. */ - if (s->cpu_sheaves && likely(!IS_ENABLED(CONFIG_NUMA) || - slab_nid(slab) == numa_mem_id())) { + if (is_kfence_address(object) || cache_has_sheaves(s)) canary = false; - } if (unlikely(!slab_free_hook(s, object, slab_want_init_on_free(s), false, canary))) return; if (likely(!IS_ENABLED(CONFIG_NUMA) || slab_nid(slab) == numa_mem_id()) && likely(!slab_test_pfmemalloc(slab))) { - to_sheaf = true; if (likely(free_to_pcs(s, object, true))) return; } /* * linux-hardened: In this scenario, the object was intended to be freed to a - * sheaf but it failed. The object will thus be freed back to the slab allocator, - * the canary thus need to be checked as a sheaf one and set back to a slab inactive one. + * sheaf but it failed. The object will thus be freed back to the slab allocator + * without instrumentation, the canary thus need to be checked and set back to a + * slab inactive one. + * + * We only instrument objects that has not already been instrumented in `slab_free_hook()`. */ - if (to_sheaf && canary) { - check_canary(s, object, s->sheaf_random_active); - set_canary(s, object, s->random_inactive); - } + if (unlikely(!canary)) + check_set_canary(s, object, s->random_active, s->random_inactive); __slab_free(s, slab, object, object, 1, addr); stat(s, FREE_SLOWPATH); @@ -8685,10 +8665,7 @@ int do_kmem_cache_create(struct kmem_cache *s, const char *name, #ifdef CONFIG_SLAB_CANARY s->random_active = get_random_long(); s->random_inactive = get_random_long(); - if (__slub_debug_enabled()) - s->sheaf_random_active = s->random_active; - else - s->sheaf_random_active = get_random_long(); + s->sheaf_random_inactive = get_random_long(); #endif s->align = args->align; s->ctor = args->ctor; From dbc0283a5a2d68121da9020df0b2ba12a9fda659 Mon Sep 17 00:00:00 2001 From: Nicolas Bouchinet Date: Mon, 23 Mar 2026 09:47:13 +0100 Subject: [PATCH 105/109] Add canary_debug kernel parameter With canary_debug, a canary mismatch will print supposed canary values and the one that has been encountered. Signed-off-by: Levente Polyak Signed-off-by: Nicolas Bouchinet --- mm/slub.c | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index 63efc6059d2a..58e72892ffe4 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -923,10 +923,45 @@ static inline void set_canary(struct kmem_cache *s, void *object, unsigned long *canary = get_canary_value(canary, value); } +static inline void print_canary_value(struct kmem_cache *s, void * object, unsigned long value) +{ + unsigned long *canary = get_canary(s, object); + + early_printk("check_canary: canary mismatch on cache (%s) " + "for object %p:\n" + "\tchecked canary value = %lx\n" + "\tobject canary value = %lx\n" + "\tpossible canary values for the cache :\n" + "\trandom_active = %lx\n" + "\trandom_inactive = %lx\n" + "\tsheaf_random_inactive = %lx\n", + s->name, + object, + get_canary_value(canary, value), + *canary, + get_canary_value(canary, s->random_active), + get_canary_value(canary, s->random_inactive), + get_canary_value(canary, s->sheaf_random_inactive)); +} + +static bool canary_debug __ro_after_init = false; +static int __init setup_canary_debug(char *str) +{ + canary_debug = true; + return 1; +} +__setup_param("canary_debug", canary_debug, setup_canary_debug, 0); +__setup("canary_debug", setup_canary_debug); + static inline void check_canary(struct kmem_cache *s, void *object, unsigned long value) { unsigned long *canary = get_canary(s, object); - BUG_ON(*canary != get_canary_value(canary, value)); + + if (*canary != get_canary_value(canary, value)) { + if (unlikely(canary_debug)) + print_canary_value(s, object, value); + BUG_ON(1); + } } static inline void check_set_canary(struct kmem_cache *s, void *object, unsigned long check_value, unsigned long set_value) From c9ade82d1bebda30365f4c0d34793b2c806ebadb Mon Sep 17 00:00:00 2001 From: Nicolas Bouchinet Date: Mon, 20 Apr 2026 10:30:32 +0200 Subject: [PATCH 106/109] mm/slub: Avoid check_canary on null objects Signed-off-by: Nicolas Bouchinet --- mm/slub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index 58e72892ffe4..56819ec51838 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -966,7 +966,7 @@ static inline void check_canary(struct kmem_cache *s, void *object, unsigned lon static inline void check_set_canary(struct kmem_cache *s, void *object, unsigned long check_value, unsigned long set_value) { - if (!is_kfence_address(object)) { + if (object && !is_kfence_address(object)) { check_canary(s, object, check_value); set_canary(s, object, set_value); } From 73c6ad8572c1b3f9cd0861090126141f17b0a649 Mon Sep 17 00:00:00 2001 From: Levente Polyak Date: Fri, 1 May 2026 22:06:17 +0200 Subject: [PATCH 107/109] gcc-plugins: Replace CONST_CAST with const_cast<>. Excplicitly define CONST_CAST_TREE For gcc-16, this was removed in gcc trunk see commits c3d96ff9e916c02584aa081f03ab999292efbb50 458c7926d48959abcb2c1adaa22458e27459a551 Link: https://www.spinics.net/lists/kernel/msg6111050.html --- scripts/gcc-plugins/gcc-common.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/scripts/gcc-plugins/gcc-common.h b/scripts/gcc-plugins/gcc-common.h index 8f1b3500f8e2..0c69ec2b24e0 100644 --- a/scripts/gcc-plugins/gcc-common.h +++ b/scripts/gcc-plugins/gcc-common.h @@ -309,7 +309,12 @@ typedef const gimple *const_gimple_ptr; #define gimple gimple_ptr #define const_gimple const_gimple_ptr #undef CONST_CAST_GIMPLE +#if BUILDING_GCC_VERSION >= 16000 +#define CONST_CAST_GIMPLE(X) const_cast((X)) +#define CONST_CAST_TREE(X) const_cast((X)) +#else #define CONST_CAST_GIMPLE(X) CONST_CAST(gimple, (X)) +#endif /* gimple related */ static inline gimple gimple_build_assign_with_ops(enum tree_code subcode, tree lhs, tree op1, tree op2 MEM_STAT_DECL) From a06e870c3c37365a236533c67722886cd8b0bc58 Mon Sep 17 00:00:00 2001 From: Levente Polyak Date: Wed, 27 May 2026 02:57:58 +0200 Subject: [PATCH 108/109] Linux hardened v7.0.10-hardened1 Signed-off-by: Levente Polyak --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a95f0b3d26bf..83746b79b2a3 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 7 PATCHLEVEL = 0 SUBLEVEL = 10 -EXTRAVERSION = +EXTRAVERSION = -hardened1 NAME = Baby Opossum Posse # *DOCUMENTATION* From 46523197c312316a40f3bda055caf8005007c123 Mon Sep 17 00:00:00 2001 From: Nicolas Bouchinet Date: Tue, 2 Jun 2026 09:57:33 +0200 Subject: [PATCH 109/109] mm/slub: Add disable_canary kernel cmdline Signed-off-by: Nicolas Bouchinet --- mm/slub.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 56819ec51838..b30682083290 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -944,6 +944,15 @@ static inline void print_canary_value(struct kmem_cache *s, void * object, unsig get_canary_value(canary, s->sheaf_random_inactive)); } +static bool disable_canary __ro_after_init = false; +static int __init setup_disable_canary(char *str) +{ + disable_canary = true; + return 1; +} +__setup_param("disable_canary", disable_canary, setup_disable_canary, 0); +__setup("disable_canary", setup_disable_canary); + static bool canary_debug __ro_after_init = false; static int __init setup_canary_debug(char *str) { @@ -966,7 +975,7 @@ static inline void check_canary(struct kmem_cache *s, void *object, unsigned lon static inline void check_set_canary(struct kmem_cache *s, void *object, unsigned long check_value, unsigned long set_value) { - if (object && !is_kfence_address(object)) { + if (likely(!disable_canary) && object && !is_kfence_address(object)) { check_canary(s, object, check_value); set_canary(s, object, set_value); } @@ -2715,7 +2724,7 @@ bool slab_free_hook(struct kmem_cache *s, void *x, bool init, * Postpone setting the inactive canary until the metadata * has potentially been cleared at the end of this function. */ - if (canary) { + if (likely(!disable_canary) && canary) { check_canary(s, x, s->random_active); } @@ -2802,7 +2811,7 @@ bool slab_free_hook(struct kmem_cache *s, void *x, bool init, s->ctor(x); } - if (canary) { + if (likely(!disable_canary) && canary) { set_canary(s, x, s->random_inactive); } @@ -2873,7 +2882,9 @@ bool slab_free_freelist_hook(struct kmem_cache *s, void **head, void **tail, static void *setup_object(struct kmem_cache *s, void *object) { setup_object_debug(s, object); - set_canary(s, object, s->random_inactive); + if (likely(!disable_canary) && object) { + set_canary(s, object, s->random_inactive); + } object = kasan_init_slab_obj(s, object); if (unlikely(s->ctor) && !has_sanitize_verify(s)) { kasan_unpoison_new_object(s, object); @@ -8326,7 +8337,7 @@ void __check_heap_object(const void *ptr, unsigned long n, offset -= s->red_left_pad; } - if (!is_kfence) { + if (likely(!disable_canary) && !is_kfence) { check_canary(s, (void *)ptr - offset, s->random_active); }