From 788453c165cb98544abaf1509323ba7bded5c308 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Sat, 27 May 2017 07:22:12 -0400
Subject: [PATCH 001/109] make DEFAULT_MMAP_MIN_ADDR match LSM_MMAP_MIN_ADDR

Signed-off-by: Daniel Micay <danielmicay@gmail.com>
---
 mm/Kconfig | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mm/Kconfig b/mm/Kconfig
index befa8909ae29..c6ebe33570f8 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -704,7 +704,8 @@ config KSM
 config DEFAULT_MMAP_MIN_ADDR
 	int "Low address space to protect from user allocation"
 	depends on MMU
-	default 4096
+	default 32768 if ARM || (ARM64 && COMPAT)
+	default 65536
 	help
 	  This is the portion of low virtual memory which should be protected
 	  from userspace allocation.  Keeping a user from writing to low pages

From 4dc311cd5ad8bfee180529c5c9c7334105dfda13 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Mon, 29 May 2017 06:17:41 -0400
Subject: [PATCH 002/109] enable HARDENED_USERCOPY by default

Signed-off-by: Daniel Micay <danielmicay@gmail.com>
---
 security/Kconfig.hardening | 1 +
 1 file changed, 1 insertion(+)

diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening
index 86f8768c63d4..26ad690e0c80 100644
--- a/security/Kconfig.hardening
+++ b/security/Kconfig.hardening
@@ -225,6 +225,7 @@ config FORTIFY_SOURCE
 config HARDENED_USERCOPY
 	bool "Harden memory copies between kernel and userspace"
 	imply STRICT_DEVMEM
+	default y
 	help
 	  This option checks for obviously wrong memory regions when
 	  copying memory to/from the kernel (via copy_to_user() and

From 872496596116423c9c81f66ea7c407c15161fcc4 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Wed, 3 May 2017 12:05:15 -0400
Subject: [PATCH 003/109] enable SECURITY_DMESG_RESTRICT by default

Signed-off-by: Daniel Micay <danielmicay@gmail.com>
---
 security/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/Kconfig b/security/Kconfig
index 6a4393fce9a1..40f773bb33d8 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -9,7 +9,7 @@ source "security/keys/Kconfig"
 
 config SECURITY_DMESG_RESTRICT
 	bool "Restrict unprivileged access to the kernel syslog"
-	default n
+	default y
 	help
 	  This enforces restrictions on unprivileged users reading the kernel
 	  syslog via dmesg(8).

From 7d2f35a62d2aa06c77b4b2d7dacde25fc51abc19 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Wed, 3 May 2017 12:06:14 -0400
Subject: [PATCH 004/109] set kptr_restrict=2 by default

Signed-off-by: Daniel Micay <danielmicay@gmail.com>
---
 lib/vsprintf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 800b8ac49f53..e56d26c9f3ab 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -856,7 +856,7 @@ static char *default_pointer(char *buf, char *end, const void *ptr,
 	return ptr_to_id(buf, end, ptr, spec);
 }
 
-int kptr_restrict __read_mostly;
+int kptr_restrict __read_mostly = 2;
 
 static noinline_for_stack
 char *restricted_pointer(char *buf, char *end, const void *ptr,

From 7d6f131073b6e4b80a679a155a18a06819230a6e Mon Sep 17 00:00:00 2001
From: Levente Polyak <levente@leventepolyak.net>
Date: Tue, 19 Sep 2023 00:57:29 +0200
Subject: [PATCH 005/109] enable LIST_HARDENED by default

Signed-off-by: Levente Polyak <levente@leventepolyak.net>
---
 security/Kconfig.hardening | 1 +
 1 file changed, 1 insertion(+)

diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening
index 26ad690e0c80..ed25c8ea8381 100644
--- a/security/Kconfig.hardening
+++ b/security/Kconfig.hardening
@@ -249,6 +249,7 @@ menu "Hardening of kernel data structures"
 
 config LIST_HARDENED
 	bool "Check integrity of linked list manipulation"
+	default y
 	help
 	  Minimal integrity checking in the linked-list manipulation routines
 	  to catch memory corruptions that are not guaranteed to result in an

From d25caf67b7e4d9daa9824d2f694780919a38cb51 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Mon, 29 May 2017 12:21:21 -0400
Subject: [PATCH 006/109] enable BUG_ON_DATA_CORRUPTION by default

Signed-off-by: Daniel Micay <danielmicay@gmail.com>
---
 security/Kconfig.hardening | 1 +
 1 file changed, 1 insertion(+)

diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening
index ed25c8ea8381..11bd5d6c5f7f 100644
--- a/security/Kconfig.hardening
+++ b/security/Kconfig.hardening
@@ -270,6 +270,7 @@ config RUST_BITMAP_HARDENED
 config BUG_ON_DATA_CORRUPTION
 	bool "Trigger a BUG when data corruption is detected"
 	select LIST_HARDENED
+	default y
 	help
 	  Select this option if the kernel should BUG when it encounters
 	  data corruption in kernel memory structures when they get checked

From 2d8ea1b068dc8760eeb7559eaf766db4baf760d7 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Sun, 25 Feb 2018 01:39:32 -0500
Subject: [PATCH 007/109] enable ARM64_SW_TTBR0_PAN by default

---
 arch/arm64/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 9ea19b74b6c3..4330577609cd 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1702,6 +1702,7 @@ config MITIGATE_SPECTRE_BRANCH_HISTORY
 config ARM64_SW_TTBR0_PAN
 	bool "Emulate Privileged Access Never using TTBR0_EL1 switching"
 	depends on !KCSAN
+	default y
 	help
 	  Enabling this option prevents the kernel from accessing
 	  user-space memory directly by pointing TTBR0_EL1 to a reserved

From f2c6e50cbdc7908aab463a60d62092009f0b7d63 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Sun, 25 Feb 2018 01:33:48 -0500
Subject: [PATCH 008/109] arm64: enable RANDOMIZE_BASE by default

---
 arch/arm64/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 4330577609cd..c33af2cb319c 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -2300,6 +2300,7 @@ config RELOCATABLE
 config RANDOMIZE_BASE
 	bool "Randomize the address of the kernel image"
 	select RELOCATABLE
+	default y
 	help
 	  Randomizes the virtual address at which the kernel image is
 	  loaded, as a security feature that deters exploit attempts

From f5a02017c075b61bf35bb08cf1e4b2552185a780 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Wed, 3 May 2017 19:43:38 -0400
Subject: [PATCH 009/109] enable SLAB_FREELIST_RANDOM by default

Signed-off-by: Daniel Micay <danielmicay@gmail.com>
---
 mm/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mm/Kconfig b/mm/Kconfig
index c6ebe33570f8..113669fd6168 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -203,6 +203,7 @@ config SLAB_MERGE_DEFAULT
 config SLAB_FREELIST_RANDOM
 	bool "Randomize slab freelist"
 	depends on !SLUB_TINY
+	default y
 	help
 	  Randomizes the freelist order used on creating new pages. This
 	  security feature reduces the predictability of the kernel slab

From b6ddefbebaa200fa7c4a6574c42f97fc49f12746 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Sun, 20 Aug 2017 15:39:25 -0400
Subject: [PATCH 010/109] enable SLAB_FREELIST_HARDENED by default

---
 mm/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mm/Kconfig b/mm/Kconfig
index 113669fd6168..e4d6b64c3a80 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -212,6 +212,7 @@ config SLAB_FREELIST_RANDOM
 config SLAB_FREELIST_HARDENED
 	bool "Harden slab freelist metadata"
 	depends on !SLUB_TINY
+	default y
 	help
 	  Many kernel heap attacks try to target slab cache metadata and
 	  other infrastructure. This options makes minor performance

From d90a23236d207f7bb315b4f260580bd8c19eaa7c Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Sat, 8 Jul 2017 02:38:54 -0400
Subject: [PATCH 011/109] disable SLAB_MERGE_DEFAULT by default

---
 mm/Kconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/mm/Kconfig b/mm/Kconfig
index e4d6b64c3a80..b3209fd701b8 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -188,7 +188,6 @@ config SLUB_TINY
 
 config SLAB_MERGE_DEFAULT
 	bool "Allow slab caches to be merged"
-	default y
 	help
 	  For reduced kernel memory fragmentation, slab caches can be
 	  merged when they share the same size and other characteristics.

From 9ceb57889352a941d7a2121439557917a524447d Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Mon, 8 May 2017 12:51:54 -0400
Subject: [PATCH 012/109] enable FORTIFY_SOURCE by default

Signed-off-by: Daniel Micay <danielmicay@gmail.com>
---
 security/Kconfig.hardening | 1 +
 1 file changed, 1 insertion(+)

diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening
index 11bd5d6c5f7f..e42e319485ae 100644
--- a/security/Kconfig.hardening
+++ b/security/Kconfig.hardening
@@ -218,6 +218,7 @@ config FORTIFY_SOURCE
 	depends on ARCH_HAS_FORTIFY_SOURCE
 	# https://github.com/llvm/llvm-project/issues/53645
 	depends on !X86_32 || !CC_IS_CLANG || CLANG_VERSION >= 160000
+	default y
 	help
 	  Detect overflows of buffers in common string and memory functions
 	  where the compiler can determine and validate the buffer sizes.

From 07eed438589f56ad06270a36c74ef3913425046a Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Wed, 3 May 2017 12:09:17 -0400
Subject: [PATCH 013/109] enable PANIC_ON_OOPS by default

Signed-off-by: Daniel Micay <danielmicay@gmail.com>
---
 lib/Kconfig.debug | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 93f356d2b3d9..be06909c5f31 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1095,6 +1095,7 @@ menu "Debug Oops, Lockups and Hangs"
 
 config PANIC_ON_OOPS
 	bool "Panic on Oops"
+	default y
 	help
 	  Say Y here to enable the kernel to panic when it oopses. This
 	  has the same effect as setting oops=panic on the kernel command
@@ -1104,7 +1105,7 @@ config PANIC_ON_OOPS
 	  anything erroneous after an oops which could result in data
 	  corruption or other issues.
 
-	  Say N if unsure.
+	  Say Y if unsure.
 
 config PANIC_TIMEOUT
 	int "panic timeout"

From 45ee2ce829aff3a37976ea6df3fe6fb50b39047b Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Sun, 14 May 2017 22:39:34 -0400
Subject: [PATCH 014/109] stop hiding SLUB_DEBUG behind EXPERT

It can make sense to disable this to reduce attack surface / complexity.
---
 mm/Kconfig.debug | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index 7638d75b27db..8e1bdc87e9a1 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -47,7 +47,7 @@ config DEBUG_PAGEALLOC_ENABLE_DEFAULT
 
 config SLUB_DEBUG
 	default y
-	bool "Enable SLUB debugging support" if EXPERT
+	bool "Enable SLUB debugging support"
 	depends on SYSFS && !SLUB_TINY
 	select STACKDEPOT if STACKTRACE_SUPPORT
 	help

From 4de6c58a393810cdbb559a0acc72fc7227a3b2fa Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Thu, 4 May 2017 18:11:31 -0400
Subject: [PATCH 015/109] stop hiding X86_16BIT behind EXPERT

---
 arch/x86/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index fe106da41ae5..ee65871cdbd4 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1242,7 +1242,7 @@ config VM86
 	default X86_LEGACY_VM86
 
 config X86_16BIT
-	bool "Enable support for 16-bit segments" if EXPERT
+	bool "Enable support for 16-bit segments"
 	default y
 	depends on MODIFY_LDT_SYSCALL
 	help

From ce6f6568b5bc0881f7ec3339e64313bc2cb17df3 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Thu, 4 May 2017 18:11:52 -0400
Subject: [PATCH 016/109] disable X86_16BIT by default

Signed-off-by: Daniel Micay <danielmicay@gmail.com>
---
 arch/x86/Kconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ee65871cdbd4..b7f5c1b617e2 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1243,7 +1243,6 @@ config VM86
 
 config X86_16BIT
 	bool "Enable support for 16-bit segments"
-	default y
 	depends on MODIFY_LDT_SYSCALL
 	help
 	  This option is required by programs like Wine to run 16-bit

From 5ebe372f5f690df0f287fd4f064dc84f28b34a84 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Thu, 4 May 2017 18:15:52 -0400
Subject: [PATCH 017/109] stop hiding MODIFY_LDT_SYSCALL behind EXPERT

---
 arch/x86/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b7f5c1b617e2..0b0638154acd 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2318,7 +2318,7 @@ config CMDLINE_OVERRIDE
 	  be set to 'N' under normal conditions.
 
 config MODIFY_LDT_SYSCALL
-	bool "Enable the LDT (local descriptor table)" if EXPERT
+	bool "Enable the LDT (local descriptor table)"
 	default y
 	help
 	  Linux can allow user programs to install a per-process x86

From b65cb1e06cd84ed7bfb4037fd39ccecbc6eb7665 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Thu, 4 May 2017 18:16:16 -0400
Subject: [PATCH 018/109] disable MODIFY_LDT_SYSCALL by default

Signed-off-by: Daniel Micay <danielmicay@gmail.com>
---
 arch/x86/Kconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0b0638154acd..8573ad7eb7e1 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2319,7 +2319,6 @@ config CMDLINE_OVERRIDE
 
 config MODIFY_LDT_SYSCALL
 	bool "Enable the LDT (local descriptor table)"
-	default y
 	help
 	  Linux can allow user programs to install a per-process x86
 	  Local Descriptor Table (LDT) using the modify_ldt(2) system

From bfd873097c16ed9dc325f31eea2bc06d856e0d17 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Mon, 29 May 2017 07:08:42 -0400
Subject: [PATCH 019/109] set LEGACY_VSYSCALL_NONE by default

---
 arch/x86/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8573ad7eb7e1..b4cdce964a97 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2234,7 +2234,7 @@ config COMPAT_VDSO
 choice
 	prompt "vsyscall table for legacy applications"
 	depends on X86_64
-	default LEGACY_VSYSCALL_XONLY
+	default LEGACY_VSYSCALL_NONE
 	help
 	  Legacy user code that does not know how to find the vDSO expects
 	  to be able to issue three syscalls by calling fixed addresses in

From e033d115c162c7c1523b26278e31d9ba8f5ac62f Mon Sep 17 00:00:00 2001
From: Bernhard40 <32568352+Bernhard40@users.noreply.github.com>
Date: Fri, 6 Oct 2017 10:21:50 +0000
Subject: [PATCH 020/109] stop hiding AIO behind EXPERT

---
 init/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/init/Kconfig b/init/Kconfig
index 7484cd703bc1..4b3ad67bc3cc 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1868,7 +1868,7 @@ config SHMEM
 	  which may be appropriate on small systems without swap.
 
 config AIO
-	bool "Enable AIO support" if EXPERT
+	bool "Enable AIO support"
 	default y
 	help
 	  This option enables POSIX asynchronous I/O which may by used

From f718aa5822ac6ff980239a85612cb6586ef8ad6f Mon Sep 17 00:00:00 2001
From: Bernhard40 <32568352+Bernhard40@users.noreply.github.com>
Date: Fri, 6 Oct 2017 10:24:10 +0000
Subject: [PATCH 021/109] disable AIO by default

---
 init/Kconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/init/Kconfig b/init/Kconfig
index 4b3ad67bc3cc..e45da3b3a6a9 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1869,7 +1869,6 @@ config SHMEM
 
 config AIO
 	bool "Enable AIO support"
-	default y
 	help
 	  This option enables POSIX asynchronous I/O which may by used
 	  by some high performance threaded applications. Disabling

From 9540fb357780463b53585ea6838c74d097778a8a Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Sun, 25 Feb 2018 02:08:49 -0500
Subject: [PATCH 022/109] remove SYSVIPC from arm64/x86_64 defconfigs

---
 arch/arm64/configs/defconfig      | 1 -
 arch/x86/configs/x86_64_defconfig | 1 -
 2 files changed, 2 deletions(-)

diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index b67d5b1fc45b..54c8002590fb 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -1,4 +1,3 @@
-CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
 CONFIG_AUDIT=y
 CONFIG_NO_HZ_IDLE=y
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 7d7310cdf8b0..21ebfefe9c90 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -1,5 +1,4 @@
 CONFIG_WERROR=y
-CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
 CONFIG_AUDIT=y
 CONFIG_NO_HZ=y

From 852795dc069a70b7cdd3a292a5a9f9dcbea0e756 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Sat, 27 May 2017 07:28:10 -0400
Subject: [PATCH 023/109] disable DEVPORT by default

---
 drivers/char/Kconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 2a3a37b2cf3c..9bcfc2055f1b 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -317,7 +317,6 @@ config NVRAM
 config DEVPORT
 	bool "/dev/port character device"
 	depends on HAS_IOPORT
-	default y
 	help
 	  Say Y here if you want to support the /dev/port device. The /dev/port
 	  device is similar to /dev/mem, but for I/O ports.

From cb4228430ab03d39f68bc8484687168bf755e239 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Sat, 27 May 2017 07:29:45 -0400
Subject: [PATCH 024/109] disable PROC_VMCORE by default

---
 fs/proc/Kconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig
index 6ae966c561e7..27d78d669f95 100644
--- a/fs/proc/Kconfig
+++ b/fs/proc/Kconfig
@@ -41,7 +41,6 @@ config PROC_KCORE
 config PROC_VMCORE
 	bool "/proc/vmcore support"
 	depends on PROC_FS && CRASH_DUMP
-	default y
 	help
 	  Exports the dump image of crashed kernel in ELF format.
 

From d96ab821f035d002373a8c5bd922c72fc46c6a08 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Sun, 28 May 2017 03:03:46 -0400
Subject: [PATCH 025/109] disable NFS_DEBUG by default

---
 fs/nfs/Kconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 6bb30543eff0..c6951309ff24 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -198,7 +198,6 @@ config NFS_USE_KERNEL_DNS
 config NFS_DEBUG
 	bool
 	depends on NFS_FS && SUNRPC_DEBUG
-	default y
 
 config NFS_DISABLE_UDP_SUPPORT
        bool "NFS: Disable NFS UDP protocol support"

From a8ecd142fb3c1ced941140d0d3a341832a7ca725 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Mon, 29 May 2017 12:11:11 -0400
Subject: [PATCH 026/109] enable DEBUG_WX by default

Signed-off-by: Daniel Micay <danielmicay@gmail.com>
---
 mm/Kconfig.debug | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index 8e1bdc87e9a1..08ae2acd7b1b 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -189,6 +189,7 @@ config DEBUG_WX
 	depends on ARCH_HAS_PTDUMP
 	depends on MMU
 	select PTDUMP
+	default y
 	help
 	  Generate a warning if any W+X mappings are found at boot.
 

From 9136c488fc1ab6f34e7fbc79ad24c3ac6a2cf4d5 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Fri, 5 Jan 2018 13:21:16 -0500
Subject: [PATCH 027/109] disable LEGACY_PTYS by default

---
 drivers/tty/Kconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/tty/Kconfig b/drivers/tty/Kconfig
index 149f3d53b760..3c51e08c1d8c 100644
--- a/drivers/tty/Kconfig
+++ b/drivers/tty/Kconfig
@@ -116,7 +116,6 @@ config UNIX98_PTYS
 
 config LEGACY_PTYS
 	bool "Legacy (BSD) PTY support"
-	default y
 	help
 	  A pseudo terminal (PTY) is a software device consisting of two
 	  halves: a master and a slave. The slave device behaves identical to

From 243cb126fa0fd7e86fd779852fb4a076f30e2db4 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Fri, 5 Jan 2018 12:41:42 -0500
Subject: [PATCH 028/109] disable DEVMEM by default

---
 drivers/char/Kconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 9bcfc2055f1b..8d669c63ad7c 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -284,7 +284,6 @@ config NSC_GPIO
 
 config DEVMEM
 	bool "/dev/mem virtual device support"
-	default y
 	help
 	  Say Y here if you want to support the /dev/mem device.
 	  The /dev/mem device is used to access areas of physical

From fc3e6e0993c503b690059bb0b32f0511fb82a6cd Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Fri, 5 Jan 2018 12:43:49 -0500
Subject: [PATCH 029/109] enable IO_STRICT_DEVMEM by default

---
 lib/Kconfig.debug | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index be06909c5f31..a5759774550d 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1975,6 +1975,7 @@ config STRICT_DEVMEM
 config IO_STRICT_DEVMEM
 	bool "Filter I/O access to /dev/mem"
 	depends on STRICT_DEVMEM
+	default y
 	help
 	  If this option is disabled, you allow userspace (root) access to all
 	  io-memory regardless of whether a driver is actively using that

From 9e24bb958cd1e2d6fcc05a7a78001f32853dfc71 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Sun, 7 May 2017 18:28:33 -0400
Subject: [PATCH 030/109] disable COMPAT_BRK by default

---
 mm/Kconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/mm/Kconfig b/mm/Kconfig
index b3209fd701b8..77c40dc2ad83 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -292,7 +292,6 @@ config SHUFFLE_PAGE_ALLOCATOR
 
 config COMPAT_BRK
 	bool "Disable heap randomization"
-	default y
 	help
 	  Randomizing heap placement makes heap exploits harder, but it
 	  also breaks ancient binaries (including anything libc5 based).

From 62b2e67f75cc198befc5faff52adea16202f4a22 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Sun, 7 May 2017 16:16:39 -0400
Subject: [PATCH 031/109] use maximum supported mmap rnd entropy by default

Signed-off-by: Daniel Micay <danielmicay@gmail.com>
---
 arch/Kconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index b5d41bb40672..6bf632241af9 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1216,7 +1216,7 @@ config ARCH_MMAP_RND_BITS
 	int "Number of bits to use for ASLR of mmap base address" if EXPERT
 	range ARCH_MMAP_RND_BITS_MIN ARCH_MMAP_RND_BITS_MAX
 	default ARCH_MMAP_RND_BITS_DEFAULT if ARCH_MMAP_RND_BITS_DEFAULT
-	default ARCH_MMAP_RND_BITS_MIN
+	default ARCH_MMAP_RND_BITS_MAX
 	depends on HAVE_ARCH_MMAP_RND_BITS
 	help
 	  This value can be used to select the number of bits to use to
@@ -1250,7 +1250,7 @@ config ARCH_MMAP_RND_COMPAT_BITS
 	int "Number of bits to use for ASLR of mmap base address for compatible applications" if EXPERT
 	range ARCH_MMAP_RND_COMPAT_BITS_MIN ARCH_MMAP_RND_COMPAT_BITS_MAX
 	default ARCH_MMAP_RND_COMPAT_BITS_DEFAULT if ARCH_MMAP_RND_COMPAT_BITS_DEFAULT
-	default ARCH_MMAP_RND_COMPAT_BITS_MIN
+	default ARCH_MMAP_RND_COMPAT_BITS_MAX
 	depends on HAVE_ARCH_MMAP_RND_COMPAT_BITS
 	help
 	  This value can be used to select the number of bits to use to

From aa2f3539226e3f499f628fb7b5e2fbba6377bc59 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Tue, 30 May 2017 10:47:23 -0400
Subject: [PATCH 032/109] enable protected_{symlinks,hardlinks} by default

---
 fs/namei.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index 9e5500dad14f..e3429055d39b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1196,8 +1196,8 @@ static inline void put_link(struct nameidata *nd)
 		path_put(&last->link);
 }
 
-static int sysctl_protected_symlinks __read_mostly;
-static int sysctl_protected_hardlinks __read_mostly;
+static int sysctl_protected_symlinks __read_mostly = 1;
+static int sysctl_protected_hardlinks __read_mostly = 1;
 static int sysctl_protected_fifos __read_mostly;
 static int sysctl_protected_regular __read_mostly;
 

From 8b5a78e2a84e68428042e04b6870b329db059cb2 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Sun, 25 Feb 2018 02:13:48 -0500
Subject: [PATCH 033/109] enable SECURITY by default

---
 security/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/security/Kconfig b/security/Kconfig
index 40f773bb33d8..ef22ceb46b57 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -76,6 +76,7 @@ config SECURITY
 	bool "Enable different security models"
 	depends on SYSFS
 	depends on MULTIUSER
+	default y
 	help
 	  This allows you to choose different security modules to be
 	  configured into your kernel.

From 961e6eaca7bf4cd8c9f9a68c3e8ba42131b2152f Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Mon, 29 May 2017 06:17:59 -0400
Subject: [PATCH 034/109] enable SECURITY_YAMA by default

---
 security/yama/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/yama/Kconfig b/security/yama/Kconfig
index a810304123ca..b809050b25d2 100644
--- a/security/yama/Kconfig
+++ b/security/yama/Kconfig
@@ -2,7 +2,7 @@
 config SECURITY_YAMA
 	bool "Yama support"
 	depends on SECURITY
-	default n
+	default y
 	help
 	  This selects Yama, which extends DAC support with additional
 	  system-wide security settings beyond regular Linux discretionary

From ca39f71418e339fc603cf48fafee1330cfd214f6 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Sun, 25 Feb 2018 02:14:02 -0500
Subject: [PATCH 035/109] enable SECURITY_NETWORK by default

---
 security/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/security/Kconfig b/security/Kconfig
index ef22ceb46b57..c20928e74619 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -102,6 +102,7 @@ config SECURITYFS
 config SECURITY_NETWORK
 	bool "Socket and Networking Security Hooks"
 	depends on SECURITY
+	default y
 	help
 	  This enables the socket and networking security hooks.
 	  If enabled, a security module can use these hooks to

From 3e539280cababcbe1182a4cded810befdfd86c3a Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Sun, 25 Feb 2018 02:15:24 -0500
Subject: [PATCH 036/109] enable AUDIT by default

---
 init/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/init/Kconfig b/init/Kconfig
index e45da3b3a6a9..470609276d59 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -527,6 +527,7 @@ config CROSS_MEMORY_ATTACH
 config AUDIT
 	bool "Auditing support"
 	depends on NET
+	default y
 	help
 	  Enable auditing infrastructure that can be used with another
 	  kernel subsystem, such as SELinux (which requires this for

From 802fd504d44c5abc68aff90d4b5ffa911fbfa6e3 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Sun, 25 Feb 2018 02:16:49 -0500
Subject: [PATCH 037/109] enable SECURITY_SELINUX by default

---
 security/selinux/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/selinux/Kconfig b/security/selinux/Kconfig
index 5588c4d573f6..567a33eae460 100644
--- a/security/selinux/Kconfig
+++ b/security/selinux/Kconfig
@@ -3,7 +3,7 @@ config SECURITY_SELINUX
 	bool "SELinux Support"
 	depends on SECURITY_NETWORK && AUDIT && NET && INET
 	select NETWORK_SECMARK
-	default n
+	default y
 	help
 	  This selects Security-Enhanced Linux (SELinux).
 	  You will also need a policy configuration and a labeled filesystem.

From 87b1bced8948828ce9e511ec9996fefb95d8114b Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Sat, 6 Jan 2018 13:41:11 -0500
Subject: [PATCH 038/109] enable SYN_COOKIES by default

---
 net/ipv4/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index df922f9f5289..5ef3ea768d9f 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -267,6 +267,7 @@ config IP_PIMSM_V2
 
 config SYN_COOKIES
 	bool "IP: TCP syncookie support"
+	default y
 	help
 	  Normal TCP/IP networking is open to an attack known as "SYN
 	  flooding". This denial-of-service attack prevents legitimate remote

From 3aeeb846fd173ed50e9a76f4582b47ff9c870506 Mon Sep 17 00:00:00 2001
From: Levente Polyak <levente@leventepolyak.net>
Date: Thu, 19 Sep 2019 19:02:23 +0200
Subject: [PATCH 039/109] enable INIT_ON_ALLOC_DEFAULT_ON by default

---
 security/Kconfig.hardening | 1 +
 1 file changed, 1 insertion(+)

diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening
index e42e319485ae..226193bf5ebc 100644
--- a/security/Kconfig.hardening
+++ b/security/Kconfig.hardening
@@ -158,6 +158,7 @@ config KSTACK_ERASE_RUNTIME_DISABLE
 
 config INIT_ON_ALLOC_DEFAULT_ON
 	bool "Enable heap memory zeroing on allocation by default"
+	default yes
 	depends on !KMSAN
 	help
 	  This has the effect of setting "init_on_alloc=1" on the kernel

From f9a342fe4bdf538916acc8c453ece97a2b0c1bbe Mon Sep 17 00:00:00 2001
From: Levente Polyak <levente@leventepolyak.net>
Date: Thu, 19 Sep 2019 19:03:01 +0200
Subject: [PATCH 040/109] enable INIT_ON_FREE_DEFAULT_ON by default

---
 security/Kconfig.hardening | 1 +
 1 file changed, 1 insertion(+)

diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening
index 226193bf5ebc..07f9286f1443 100644
--- a/security/Kconfig.hardening
+++ b/security/Kconfig.hardening
@@ -172,6 +172,7 @@ config INIT_ON_ALLOC_DEFAULT_ON
 
 config INIT_ON_FREE_DEFAULT_ON
 	bool "Enable heap memory zeroing on free by default"
+	default yes
 	depends on !KMSAN
 	help
 	  This has the effect of setting "init_on_free=1" on the kernel

From 2b8f8a43d3c219086a303265a8ab28eba55982a3 Mon Sep 17 00:00:00 2001
From: Levente Polyak <levente@leventepolyak.net>
Date: Sun, 27 Sep 2020 00:43:48 +0200
Subject: [PATCH 041/109] kconfig: select DEBUG_FS_ALLOW_NONE by default if
 DEBUG_FS is enabled

Signed-off-by: Levente Polyak <levente@leventepolyak.net>
---
 lib/Kconfig.debug | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index a5759774550d..8d90402b0444 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -720,7 +720,7 @@ config DEBUG_FS
 choice
 	prompt "Debugfs default access"
 	depends on DEBUG_FS
-	default DEBUG_FS_ALLOW_ALL
+	default DEBUG_FS_ALLOW_NONE
 	help
 	  This selects the default access restrictions for debugfs.
 	  It can be overridden with kernel command line option

From d728b03bb0576d49e513ceea41986f0087323ab6 Mon Sep 17 00:00:00 2001
From: Levente Polyak <levente@leventepolyak.net>
Date: Tue, 22 Dec 2020 23:40:09 +0100
Subject: [PATCH 042/109] stop hiding UID16 behind EXPERT

---
 init/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/init/Kconfig b/init/Kconfig
index 470609276d59..e448f65e4404 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1681,7 +1681,7 @@ menuconfig EXPERT
 	  Only use this if you really know what you are doing.
 
 config UID16
-	bool "Enable 16-bit UID system calls" if EXPERT
+	bool "Enable 16-bit UID system calls"
 	depends on HAVE_UID16 && MULTIUSER
 	default y
 	help

From 188f00267b0f26e1e03662bc8b983a45da61534b Mon Sep 17 00:00:00 2001
From: Levente Polyak <levente@leventepolyak.net>
Date: Tue, 22 Dec 2020 23:41:32 +0100
Subject: [PATCH 043/109] disable UID16 by default

---
 init/Kconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/init/Kconfig b/init/Kconfig
index e448f65e4404..78479ebfcea2 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1683,7 +1683,6 @@ menuconfig EXPERT
 config UID16
 	bool "Enable 16-bit UID system calls"
 	depends on HAVE_UID16 && MULTIUSER
-	default y
 	help
 	  This enables the legacy 16-bit UID syscall wrappers.
 

From 756797e841fabe10eec771b71387e9e962772854 Mon Sep 17 00:00:00 2001
From: Levente Polyak <levente@leventepolyak.net>
Date: Wed, 25 Aug 2021 22:24:10 +0200
Subject: [PATCH 044/109] kconfig: enable RANDOMIZE_KSTACK_OFFSET_DEFAULT by
 default

---
 arch/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/Kconfig b/arch/Kconfig
index 6bf632241af9..7242e5a04be4 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1553,6 +1553,7 @@ config RANDOMIZE_KSTACK_OFFSET
 config RANDOMIZE_KSTACK_OFFSET_DEFAULT
 	bool "Default state of kernel stack offset randomization"
 	depends on RANDOMIZE_KSTACK_OFFSET
+	default y
 	help
 	  Kernel stack offset randomization is controlled by kernel boot param
 	  "randomize_kstack_offset=on/off", and this config chooses the default

From ff9efa1c3b8856db97338a18eeb08397ffbe83a4 Mon Sep 17 00:00:00 2001
From: Levente Polyak <levente@leventepolyak.net>
Date: Mon, 1 May 2023 23:20:43 +0200
Subject: [PATCH 045/109] kconfig: disable LEGACY_TIOCSTI by default

---
 drivers/tty/Kconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/tty/Kconfig b/drivers/tty/Kconfig
index 3c51e08c1d8c..a3e62d52ebcf 100644
--- a/drivers/tty/Kconfig
+++ b/drivers/tty/Kconfig
@@ -145,7 +145,6 @@ config LEGACY_PTY_COUNT
 
 config LEGACY_TIOCSTI
 	bool "Allow legacy TIOCSTI usage"
-	default y
 	help
 	  Historically the kernel has allowed TIOCSTI, which will push
 	  characters into a controlling TTY. This continues to be used

From c532a8c49b092f5e2ac4312ee5af0747e5e5c04e Mon Sep 17 00:00:00 2001
From: Levente Polyak <levente@leventepolyak.net>
Date: Tue, 12 Dec 2023 18:35:05 +0100
Subject: [PATCH 046/109] mm/slab: enable RANDOM_KMALLOC_CACHES by default

---
 mm/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/Kconfig b/mm/Kconfig
index 77c40dc2ad83..9bcf4da8865a 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -249,7 +249,7 @@ config SLUB_STATS
 	  Try running: slabinfo -DA
 
 config RANDOM_KMALLOC_CACHES
-	default n
+	default y
 	depends on !SLUB_TINY
 	bool "Randomize slab caches for normal kmalloc"
 	help

From 7765d65fb22110c2a75b320adf94a41218e22fa0 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Sun, 7 May 2017 00:28:23 -0400
Subject: [PATCH 047/109] add __read_only for non-init related usage

---
 include/linux/cache.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/linux/cache.h b/include/linux/cache.h
index e69768f50d53..432c30a1fc7e 100644
--- a/include/linux/cache.h
+++ b/include/linux/cache.h
@@ -60,6 +60,8 @@
 #define __ro_after_init __section(".data..ro_after_init")
 #endif
 
+#define __read_only __ro_after_init
+
 #ifndef ____cacheline_aligned_in_smp
 #ifdef CONFIG_SMP
 #define ____cacheline_aligned_in_smp ____cacheline_aligned

From bc84d41514713a15c84893f0943ebc32a9be382f Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Fri, 12 May 2017 03:22:00 -0400
Subject: [PATCH 048/109] mark kernel_set_to_readonly as __ro_after_init

This change was extracted from PaX where it's part of KERNEXEC.

Signed-off-by: Daniel Micay <danielmicay@gmail.com>
---
 arch/x86/mm/init_32.c | 5 ++---
 arch/x86/mm/init_64.c | 5 ++---
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 0908c44d51e6..2734208acddc 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -718,7 +718,7 @@ void __init mem_init(void)
 	test_wp_bit();
 }
 
-int kernel_set_to_readonly __read_mostly;
+int kernel_set_to_readonly __ro_after_init;
 
 static void mark_nxdata_nx(void)
 {
@@ -742,12 +742,11 @@ void mark_rodata_ro(void)
 	unsigned long start = PFN_ALIGN(_text);
 	unsigned long size = (unsigned long)__end_rodata - start;
 
+	kernel_set_to_readonly = 1;
 	set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
 	pr_info("Write protecting kernel text and read-only data: %luk\n",
 		size >> 10);
 
-	kernel_set_to_readonly = 1;
-
 #ifdef CONFIG_CPA_DEBUG
 	pr_info("Testing CPA: Reverting %lx-%lx\n", start, start + size);
 	set_pages_rw(virt_to_page(start), size >> PAGE_SHIFT);
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index df2261fa4f98..c18790a324a0 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1387,7 +1387,7 @@ void __init mem_init(void)
 	preallocate_vmalloc_pages();
 }
 
-int kernel_set_to_readonly;
+int kernel_set_to_readonly __ro_after_init;
 
 void mark_rodata_ro(void)
 {
@@ -1400,9 +1400,8 @@ void mark_rodata_ro(void)
 
 	printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
 	       (end - start) >> 10);
-	set_memory_ro(start, (end - start) >> PAGE_SHIFT);
-
 	kernel_set_to_readonly = 1;
+	set_memory_ro(start, (end - start) >> PAGE_SHIFT);
 
 	/*
 	 * The rodata/data/bss/brk section (but not the kernel text!)

From fc9b3fab087fb010927fe0a2a7b074eada1c0abc Mon Sep 17 00:00:00 2001
From: Levente Polyak <levente@leventepolyak.net>
Date: Sun, 13 Jan 2019 21:42:45 +0100
Subject: [PATCH 049/109] Revert "mark kernel_set_to_readonly as
 __ro_after_init"

    This commit causes CPA conflicts, cf.
    https://github.com/anthraxx/linux-hardened/issues/4.

    Signed-off-by: Thibaut Sautereau <thibaut.sautereau@ssi.gouv.fr>
---
 arch/x86/mm/init_32.c | 5 +++--
 arch/x86/mm/init_64.c | 5 +++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 2734208acddc..0908c44d51e6 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -718,7 +718,7 @@ void __init mem_init(void)
 	test_wp_bit();
 }
 
-int kernel_set_to_readonly __ro_after_init;
+int kernel_set_to_readonly __read_mostly;
 
 static void mark_nxdata_nx(void)
 {
@@ -742,11 +742,12 @@ void mark_rodata_ro(void)
 	unsigned long start = PFN_ALIGN(_text);
 	unsigned long size = (unsigned long)__end_rodata - start;
 
-	kernel_set_to_readonly = 1;
 	set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
 	pr_info("Write protecting kernel text and read-only data: %luk\n",
 		size >> 10);
 
+	kernel_set_to_readonly = 1;
+
 #ifdef CONFIG_CPA_DEBUG
 	pr_info("Testing CPA: Reverting %lx-%lx\n", start, start + size);
 	set_pages_rw(virt_to_page(start), size >> PAGE_SHIFT);
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index c18790a324a0..df2261fa4f98 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1387,7 +1387,7 @@ void __init mem_init(void)
 	preallocate_vmalloc_pages();
 }
 
-int kernel_set_to_readonly __ro_after_init;
+int kernel_set_to_readonly;
 
 void mark_rodata_ro(void)
 {
@@ -1400,9 +1400,10 @@ void mark_rodata_ro(void)
 
 	printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
 	       (end - start) >> 10);
-	kernel_set_to_readonly = 1;
 	set_memory_ro(start, (end - start) >> PAGE_SHIFT);
 
+	kernel_set_to_readonly = 1;
+
 	/*
 	 * The rodata/data/bss/brk section (but not the kernel text!)
 	 * should also be not-executable.

From d66f52c56568e4cd36672f9008e2b9b5e1c82a55 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Sun, 14 May 2017 19:01:58 -0400
Subject: [PATCH 050/109] mark slub runtime configuration as __ro_after_init

Signed-off-by: Daniel Micay <danielmicay@gmail.com>
---
 mm/slub.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index e423afa27d1a..fb52dd654dfe 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -972,13 +972,13 @@ static inline void *restore_red_left(struct kmem_cache *s, void *p)
  * Debug settings:
  */
 #if defined(CONFIG_SLUB_DEBUG_ON)
-static slab_flags_t slub_debug = DEBUG_DEFAULT_FLAGS;
+static slab_flags_t slub_debug __ro_after_init = DEBUG_DEFAULT_FLAGS;
 #else
-static slab_flags_t slub_debug;
+static slab_flags_t slub_debug __ro_after_init;
 #endif
 
 static const char *slub_debug_string __ro_after_init;
-static int disable_higher_order_debug;
+static int disable_higher_order_debug __ro_after_init;
 
 /*
  * Object debugging
@@ -7317,10 +7317,10 @@ EXPORT_SYMBOL(kmem_cache_alloc_bulk_noprof);
  * and increases the number of allocations possible without having to
  * take the list_lock.
  */
-static unsigned int slub_min_order;
-static unsigned int slub_max_order =
+static unsigned int slub_min_order __ro_after_init;
+static unsigned int slub_max_order __ro_after_init =
 	IS_ENABLED(CONFIG_SLUB_TINY) ? 1 : PAGE_ALLOC_COSTLY_ORDER;
-static unsigned int slub_min_objects;
+static unsigned int slub_min_objects __ro_after_init;
 
 /*
  * Calculate the order of allocation given an slab object size.

From 26f9b255a974f89301af87b9f7496b2700395c82 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Wed, 3 May 2017 11:35:35 -0400
Subject: [PATCH 051/109] add __ro_after_init to slab_nomerge and slab_state

This was extracted from the PaX patch where it's part of the KERNEXEC
feature as __read_only.

Signed-off-by: Daniel Micay <danielmicay@gmail.com>
---
 mm/slab_common.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/slab_common.c b/mm/slab_common.c
index d5a70a831a2a..36c45acbbbe6 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -37,7 +37,7 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/kmem.h>
 
-enum slab_state slab_state;
+enum slab_state slab_state __ro_after_init;
 LIST_HEAD(slab_caches);
 DEFINE_MUTEX(slab_mutex);
 struct kmem_cache *kmem_cache;
@@ -57,7 +57,7 @@ struct kmem_cache *kmem_cache;
 /*
  * Merge control. If this is set then no merging of slab caches will occur.
  */
-static bool slab_nomerge = !IS_ENABLED(CONFIG_SLAB_MERGE_DEFAULT);
+static bool slab_nomerge __ro_after_init = !IS_ENABLED(CONFIG_SLAB_MERGE_DEFAULT);
 
 static int __init setup_slab_nomerge(char *str)
 {

From b83f69b2f635ad76654ca2f26f4e7bc08af7ed5b Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Sun, 28 May 2017 18:51:30 -0400
Subject: [PATCH 052/109] mark kmem_cache as __ro_after_init

---
 mm/slab_common.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/slab_common.c b/mm/slab_common.c
index 36c45acbbbe6..51fc267d0f86 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -40,7 +40,7 @@
 enum slab_state slab_state __ro_after_init;
 LIST_HEAD(slab_caches);
 DEFINE_MUTEX(slab_mutex);
-struct kmem_cache *kmem_cache;
+struct kmem_cache *kmem_cache __ro_after_init;
 
 /*
  * Set of flags that will prevent slab merging.

From 585a5ded299bd9cb9f237a223af3a70351d62238 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Fri, 12 May 2017 00:06:16 -0400
Subject: [PATCH 053/109] mark __{supported,default_kernel}_pte_mask as
 __ro_after_init

These changes were initially extracted from PaX where it was part of
KERNEXEC as __read_only.

Before this linux-hardened commit was rebased onto v5.5, a call to
x86_configure_nx in cpu_init needed to be removed, and was not required
anyway since already set up earlier. This call was finally removed
upstream in 505b789996f64 ("x86/cpu: Unify cpu_init()").

Signed-off-by: Daniel Micay <danielmicay@gmail.com>
Signed-off-by: Thibaut Sautereau <thibaut.sautereau@ssi.gouv.fr>
Signed-off-by: Levente Polyak <levente@leventepolyak.net>
---
 arch/x86/mm/init_32.c | 4 ++--
 arch/x86/mm/init_64.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 0908c44d51e6..edf0b9ea159c 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -499,9 +499,9 @@ static void __init pagetable_init(void)
 
 #define DEFAULT_PTE_MASK ~(_PAGE_NX | _PAGE_GLOBAL)
 /* Bits supported by the hardware: */
-pteval_t __supported_pte_mask __read_mostly = DEFAULT_PTE_MASK;
+pteval_t __supported_pte_mask __ro_after_init = DEFAULT_PTE_MASK;
 /* Bits allowed in normal kernel mappings: */
-pteval_t __default_kernel_pte_mask __read_mostly = DEFAULT_PTE_MASK;
+pteval_t __default_kernel_pte_mask __ro_after_init = DEFAULT_PTE_MASK;
 EXPORT_SYMBOL_GPL(__supported_pte_mask);
 /* Used in PAGE_KERNEL_* macros which are reasonably used out-of-tree: */
 EXPORT_SYMBOL(__default_kernel_pte_mask);
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index df2261fa4f98..b8bf29695e0d 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -104,9 +104,9 @@ static inline pgprot_t prot_sethuge(pgprot_t prot)
  */
 
 /* Bits supported by the hardware: */
-pteval_t __supported_pte_mask __read_mostly = ~0;
+pteval_t __supported_pte_mask __ro_after_init = ~0;
 /* Bits allowed in normal kernel mappings: */
-pteval_t __default_kernel_pte_mask __read_mostly = ~0;
+pteval_t __default_kernel_pte_mask __ro_after_init = ~0;
 EXPORT_SYMBOL_GPL(__supported_pte_mask);
 /* Used in PAGE_KERNEL_* macros which are reasonably used out-of-tree: */
 EXPORT_SYMBOL(__default_kernel_pte_mask);

From 29042770454119257607fbe724080fc92c5d5d0f Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Tue, 4 Jul 2017 01:24:28 -0400
Subject: [PATCH 054/109] mark kobj_ns_type_register as only used for init

This allows kobj_ns_ops_tbl to be __ro_after_init.

Extracted from PaX.
---
 include/linux/kobject_ns.h | 2 +-
 lib/kobject.c              | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/linux/kobject_ns.h b/include/linux/kobject_ns.h
index 4f0990e09b93..94d775949d2b 100644
--- a/include/linux/kobject_ns.h
+++ b/include/linux/kobject_ns.h
@@ -46,7 +46,7 @@ struct kobj_ns_type_operations {
 	void (*drop_ns)(struct ns_common *);
 };
 
-int kobj_ns_type_register(const struct kobj_ns_type_operations *ops);
+int __init kobj_ns_type_register(const struct kobj_ns_type_operations *ops);
 int kobj_ns_type_registered(enum kobj_ns_type type);
 const struct kobj_ns_type_operations *kobj_child_ns_ops(const struct kobject *parent);
 const struct kobj_ns_type_operations *kobj_ns_ops(const struct kobject *kobj);
diff --git a/lib/kobject.c b/lib/kobject.c
index 9c9ff0f5175f..a63903740fab 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -1019,9 +1019,9 @@ EXPORT_SYMBOL_GPL(kset_create_and_add);
 
 
 static DEFINE_SPINLOCK(kobj_ns_type_lock);
-static const struct kobj_ns_type_operations *kobj_ns_ops_tbl[KOBJ_NS_TYPES];
+static const struct kobj_ns_type_operations *kobj_ns_ops_tbl[KOBJ_NS_TYPES] __ro_after_init;
 
-int kobj_ns_type_register(const struct kobj_ns_type_operations *ops)
+int __init kobj_ns_type_register(const struct kobj_ns_type_operations *ops)
 {
 	enum kobj_ns_type type = ops->type;
 	int error;

From 24e9f52fd21bcc8fc8b188b4411a48ca10db7302 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Tue, 4 Jul 2017 01:32:30 -0400
Subject: [PATCH 055/109] mark open_softirq as only used for init

[nicolas.bouchinet@ssi.gouv.fr: Adapt to commit 75e340ce106fa]
---
 include/linux/interrupt.h | 2 +-
 kernel/softirq.c          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 6cd26ffb0505..479fb0cacbd1 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -604,7 +604,7 @@ static inline void do_softirq_post_smp_call_flush(unsigned int unused)
 }
 #endif
 
-extern void open_softirq(int nr, void (*action)(void));
+extern void __init open_softirq(int nr, void (*action)(void));
 extern void softirq_init(void);
 extern void __raise_softirq_irqoff(unsigned int nr);
 
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 77198911b8dd..c83209f8458b 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -790,7 +790,7 @@ void __raise_softirq_irqoff(unsigned int nr)
 	or_softirq_pending(1UL << nr);
 }
 
-void open_softirq(int nr, void (*action)(void))
+void __init open_softirq(int nr, void (*action)(void))
 {
 	softirq_vec[nr].action = action;
 }

From ce7cceafa049e2955c5834d8e0638d577e246b93 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Tue, 4 Jul 2017 01:42:33 -0400
Subject: [PATCH 056/109] mark softirq_vec as __ro_after_init

Note: __cacheline_aligned_in_smp conflicts with __ro_after_init on x86.

Extracted from PaX.
---
 kernel/softirq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/softirq.c b/kernel/softirq.c
index c83209f8458b..1c508ce336ae 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -57,7 +57,7 @@ DEFINE_PER_CPU_ALIGNED(irq_cpustat_t, irq_stat);
 EXPORT_PER_CPU_SYMBOL(irq_stat);
 #endif
 
-static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
+static struct softirq_action softirq_vec[NR_SOFTIRQS] __ro_after_init __aligned(PAGE_SIZE);
 
 DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
 

From b58da37c44da9fd1ad76a90fd5551fef8199fed2 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Tue, 17 Sep 2019 18:00:54 +0200
Subject: [PATCH 057/109] mm: slab: BUG on page type confusion under
 BUG_ON_DATA_CORRUPTION

This change was extracted from PaX.

Signed-off-by: Daniel Micay <danielmicay@gmail.com>
Signed-off-by: Levente Polyak <levente@leventepolyak.net>
Signed-off-by: Thibaut Sautereau <thibaut.sautereau@ssi.gouv.fr>
[nicolas.bouchinet@ssi.gouv.fr: memcg related functions moved from mm/slab.h to mm/slub.c (see 0bedcc66d2a43a50a)]
Signed-off-by: Nicolas Bouchinet <nicolas.bouchinet@ssi.gouv.fr>
---
 mm/slub.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/mm/slub.c b/mm/slub.c
index fb52dd654dfe..546820cb248a 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -6252,10 +6252,14 @@ static noinline void warn_free_bad_obj(struct kmem_cache *s, void *obj)
 	struct slab *slab;
 
 	slab = virt_to_slab(obj);
+#ifdef CONFIG_BUG_ON_DATA_CORRUPTION
+	BUG_ON(!slab);
+#else
 	if (WARN_ONCE(!slab,
 			"kmem_cache_free(%s, %p): object is not in a slab page\n",
 			s->name, obj))
 		return;
+#endif
 
 	cachep = slab->slab_cache;
 

From 34c99184cdce5da53a8ef77f8ac7370f65f20379 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Wed, 3 May 2017 11:50:53 -0400
Subject: [PATCH 058/109] bug on kmem_cache_free with the wrong cache

At least when CONFIG_BUG_ON_DATA_CORRUPTION is enabled.

Signed-off-by: Daniel Micay <danielmicay@gmail.com>
Signed-off-by: Thibaut Sautereau <thibaut.sautereau@ssi.gouv.fr>
Signed-off-by: Levente Polyak <levente@leventepolyak.net>
[nicolas.bouchinet@ssi.gouv.fr: memcg related functions moved from mm/slab.h to mm/slub.c (see 0bedcc66d2a43a50a)]
Signed-off-by: Nicolas Bouchinet <nicolas.bouchinet@ssi.gouv.fr>
---
 mm/slub.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/mm/slub.c b/mm/slub.c
index 546820cb248a..0e9b0512158b 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -6263,6 +6263,9 @@ static noinline void warn_free_bad_obj(struct kmem_cache *s, void *obj)
 
 	cachep = slab->slab_cache;
 
+#ifdef CONFIG_BUG_ON_DATA_CORRUPTION
+	BUG_ON(cachep != s);
+#else
 	if (WARN_ONCE(cachep != s,
 			"kmem_cache_free(%s, %p): object belongs to different cache %s\n",
 			s->name, obj, cachep ? cachep->name : "(NULL)")) {
@@ -6270,6 +6273,7 @@ static noinline void warn_free_bad_obj(struct kmem_cache *s, void *obj)
 			print_tracking(cachep, obj);
 		return;
 	}
+#endif
 }
 
 /**

From d34242f1c2254b236e9400d47b8275793ed44875 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Wed, 3 May 2017 21:54:56 -0400
Subject: [PATCH 059/109] mm: add support for verifying page sanitization

Signed-off-by: Daniel Micay <danielmicay@gmail.com>
Signed-off-by: Thibaut Sautereau <thibaut.sautereau@ssi.gouv.fr>
Signed-off-by: Levente Polyak <levente@leventepolyak.net>
---
 include/linux/highmem.h    | 7 +++++++
 mm/page_alloc.c            | 6 ++++++
 security/Kconfig.hardening | 7 +++++++
 3 files changed, 20 insertions(+)

diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index af03db851a1d..26885da1a943 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -355,6 +355,13 @@ static inline bool tag_clear_highpages(struct page *page, int numpages)
 
 #endif
 
+static inline void verify_zero_highpage(struct page *page)
+{
+	void *kaddr = kmap_atomic(page);
+	BUG_ON(memchr_inv(kaddr, 0, PAGE_SIZE));
+	kunmap_atomic(kaddr);
+}
+
 /*
  * If we pass in a base or tail page, we can zero up to PAGE_SIZE.
  * If we pass in a head page, we can zero up to the size of the compound page.
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e92898ad51cd..bce29a3574fa 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1864,6 +1864,12 @@ inline void post_alloc_hook(struct page *page, unsigned int order,
 	 */
 	kernel_unpoison_pages(page, 1 << order);
 
+	if (IS_ENABLED(CONFIG_PAGE_SANITIZE_VERIFY) && want_init_on_free()) {
+		int i;
+		for (i = 0; i < (1 << order); i++)
+			verify_zero_highpage(page + i);
+	}
+
 	/*
 	 * As memory initialization might be integrated into KASAN,
 	 * KASAN unpoisoning and memory initialization code must be
diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening
index 07f9286f1443..b728851fec14 100644
--- a/security/Kconfig.hardening
+++ b/security/Kconfig.hardening
@@ -211,6 +211,13 @@ config ZERO_CALL_USED_REGS
 	  be evaluated for suitability. For example, x86_64 grows by less
 	  than 1%, and arm64 grows by about 5%.
 
+config PAGE_SANITIZE_VERIFY
+	bool "Verify sanitized pages"
+	default y
+	help
+	  When init_on_free is enabled, verify that newly allocated pages
+	  are zeroed to detect write-after-free bugs.
+
 endmenu
 
 menu "Bounds checking"

From e6ec3bfca0e246803d0dfb5fc243cdd884351cbe Mon Sep 17 00:00:00 2001
From: Thibaut Sautereau <thibaut.sautereau@ssi.gouv.fr>
Date: Fri, 20 Sep 2019 14:02:42 +0200
Subject: [PATCH 060/109] slub: Extend init_on_free to slab caches with
 constructors

This is the remaining non-upstream part of SLAB_SANITIZE, which was a
partial port, from Daniel Micay, of the feature from PaX without the
default fast mode based on passing SLAB_NO_SANITIZE in
performance-critical cases that are not particularly security sensitive.

Signed-off-by: Thibaut Sautereau <thibaut.sautereau@ssi.gouv.fr>
[levente@leventepolyak.net: Adapt to kasan init_on_free with HW_TAGS changes]
Signed-off-by: Levente Polyak <levente@leventepolyak.net>
---
 mm/slab.h | 12 +++++++++---
 mm/slub.c | 18 ++++++++++++++++++
 2 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/mm/slab.h b/mm/slab.h
index e9ab292acd22..e9c4cf834edb 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -688,9 +688,15 @@ static inline bool slab_want_init_on_alloc(gfp_t flags, struct kmem_cache *c)
 static inline bool slab_want_init_on_free(struct kmem_cache *c)
 {
 	if (static_branch_maybe(CONFIG_INIT_ON_FREE_DEFAULT_ON,
-				&init_on_free))
-		return !(c->ctor ||
-			 (c->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)));
+				&init_on_free)) {
+#ifndef CONFIG_SLUB
+		if (c->ctor)
+			return false;
+#endif
+		if (c->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON))
+			return false;
+		return true;
+	}
 	return false;
 }
 
diff --git a/mm/slub.c b/mm/slub.c
index 0e9b0512158b..1c8f74afe2eb 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2680,6 +2680,8 @@ bool slab_free_hook(struct kmem_cache *s, void *x, bool init,
 		 */
 		set_orig_size(s, x, orig_size);
 
+		if (s->ctor)
+			s->ctor(x);
 	}
 	/* KASAN might put x into memory quarantine, delaying its reuse. */
 	return !kasan_slab_free(s, x, init, still_accessible, false);
@@ -2723,6 +2725,22 @@ bool slab_free_freelist_hook(struct kmem_cache *s, void **head, void **tail,
 			 * accordingly if object's reuse is delayed.
 			 */
 			--(*cnt);
+
+			/* Objects that are put into quarantine by KASAN will
+			 * still undergo free_consistency_checks(), which
+			 * checks whether the freelist pointer is valid if it
+			 * is located after the object (see check_object()).
+			 * Since this is the case for slab caches with
+			 * constructors, we need to fix the freelist pointer
+			 * after init_on_free has overwritten it.
+			 *
+			 * Note that doing this for all caches (not just ctor
+			 * ones) would cause a GPF due to KASAN poisoning and
+			 * the way set_freepointer() eventually dereferences
+			 * the freepointer.
+			 */
+			if (slab_want_init_on_free(s) && s->ctor)
+				set_freepointer(s, object, NULL);
 		}
 	} while (object != old_tail);
 

From 1ddbcbb97e46fc22f066dbbf1aa2290ca43879b4 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Thu, 4 May 2017 15:58:57 -0400
Subject: [PATCH 061/109] slub: Add support for verifying slab sanitization

This is an extension to the sanitization feature in PaX for when
sacricifing more performance for security is acceptable.

The initial version from Daniel Micay was relying on PAGE_SANITIZE. It
now relies on upstream's init_on_free.

Signed-off-by: Daniel Micay <danielmicay@gmail.com>
Signed-off-by: Thibaut Sautereau <thibaut.sautereau@ssi.gouv.fr>
Signed-off-by: Levente Polyak <levente@leventepolyak.net>
[nicolas.bouchinet@ssi.gouv.fr: Should not conflict with commit 520a688a2edfddba9]
Signed-off-by: Nicolas Bouchinet <nicolas.bouchinet@ssi.gouv.fr>
---
 mm/slub.c                  | 45 ++++++++++++++++++++++++++++++++++----
 security/Kconfig.hardening |  8 +++++++
 2 files changed, 49 insertions(+), 4 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index 1c8f74afe2eb..f942d20efc65 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -232,6 +232,12 @@ static inline bool kmem_cache_debug(struct kmem_cache *s)
 	return kmem_cache_debug_flags(s, SLAB_DEBUG_FLAGS);
 }
 
+static inline bool has_sanitize_verify(struct kmem_cache *s)
+{
+	return IS_ENABLED(CONFIG_SLAB_SANITIZE_VERIFY) &&
+	       slab_want_init_on_free(s);
+}
+
 void *fixup_red_left(struct kmem_cache *s, void *p)
 {
 	if (kmem_cache_debug_flags(s, SLAB_RED_ZONE))
@@ -2680,7 +2686,7 @@ bool slab_free_hook(struct kmem_cache *s, void *x, bool init,
 		 */
 		set_orig_size(s, x, orig_size);
 
-		if (s->ctor)
+		if (!IS_ENABLED(CONFIG_SLAB_SANITIZE_VERIFY) && s->ctor)
 			s->ctor(x);
 	}
 	/* KASAN might put x into memory quarantine, delaying its reuse. */
@@ -2751,7 +2757,7 @@ static void *setup_object(struct kmem_cache *s, void *object)
 {
 	setup_object_debug(s, object);
 	object = kasan_init_slab_obj(s, object);
-	if (unlikely(s->ctor)) {
+	if (unlikely(s->ctor) && !has_sanitize_verify(s)) {
 		kasan_unpoison_new_object(s, object);
 		s->ctor(object);
 		kasan_poison_new_object(s, object);
@@ -4872,7 +4878,19 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list
 		object = __slab_alloc_node(s, gfpflags, node, addr, orig_size);
 
 	maybe_wipe_obj_freeptr(s, object);
-	init = slab_want_init_on_alloc(gfpflags, s);
+
+	if (has_sanitize_verify(s) && object) {
+		/* KASAN hasn't unpoisoned the object yet (this is done in the
+		 * post-alloc hook), so let's do it temporarily.
+		 */
+		kasan_unpoison_new_object(s, object);
+		BUG_ON(memchr_inv(object, 0, s->object_size));
+		if (s->ctor)
+			s->ctor(object);
+		kasan_poison_new_object(s, object);
+	} else {
+		init = slab_want_init_on_alloc(gfpflags, s);
+	}
 
 out:
 	/*
@@ -7245,6 +7263,21 @@ int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
 		stat_add(s, ALLOC_SLOWPATH, i);
 	}
 
+	if (has_sanitize_verify(s)) {
+		int j;
+
+		for (j = 0; j < i; j++) {
+			/* KASAN hasn't unpoisoned the object yet (this is done in the
+			 * post-alloc hook), so let's do it temporarily.
+			 */
+			kasan_unpoison_new_object(s, p[j]);
+			BUG_ON(memchr_inv(p[j], 0, s->object_size));
+			if (s->ctor)
+				s->ctor(p[j]);
+			kasan_poison_new_object(s, p[j]);
+		}
+	}
+
 	return i;
 
 error:
@@ -7262,6 +7295,7 @@ int kmem_cache_alloc_bulk_noprof(struct kmem_cache *s, gfp_t flags, size_t size,
 {
 	unsigned int i = 0;
 	void *kfence_obj;
+	bool init = false;
 
 	if (!size)
 		return 0;
@@ -7315,8 +7349,11 @@ int kmem_cache_alloc_bulk_noprof(struct kmem_cache *s, gfp_t flags, size_t size,
 	 * memcg and kmem_cache debug support and memory initialization.
 	 * Done outside of the IRQ disabled fastpath loop.
 	 */
+	if (!has_sanitize_verify(s)) {
+		init = slab_want_init_on_alloc(flags, s);
+	}
 	if (unlikely(!slab_post_alloc_hook(s, NULL, flags, size, p,
-		    slab_want_init_on_alloc(flags, s), s->object_size))) {
+		    init, s->object_size))) {
 		return 0;
 	}
 
diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening
index b728851fec14..0068460db967 100644
--- a/security/Kconfig.hardening
+++ b/security/Kconfig.hardening
@@ -218,6 +218,14 @@ config PAGE_SANITIZE_VERIFY
 	  When init_on_free is enabled, verify that newly allocated pages
 	  are zeroed to detect write-after-free bugs.
 
+config SLAB_SANITIZE_VERIFY
+	bool "Verify sanitized SLAB allocations"
+	default y
+	depends on !KASAN
+	help
+	  When init_on_free is enabled, verify that newly allocated slab
+	  objects are zeroed to detect write-after-free bugs.
+
 endmenu
 
 menu "Bounds checking"

From 403257b7ce6341bef82d4689f9d4ca4da7482d40 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Wed, 3 May 2017 16:16:58 -0400
Subject: [PATCH 062/109] slub: add multi-purpose random canaries

Place canaries at the end of kernel slab allocations, sacrificing
some performance and memory usage for security.

Canaries can detect some forms of heap corruption when allocations
are freed and as part of the HARDENED_USERCOPY feature. It provides
basic use-after-free detection for HARDENED_USERCOPY.

Canaries absorb small overflows (rendering them harmless), mitigate
non-NUL terminated C string overflows on 64-bit via a guaranteed zero
byte and provide basic double-free detection.

Signed-off-by: Daniel Micay <danielmicay@gmail.com>
[levente@leventepolyak.net: make canaries work without SLUB_DEBUG]
[levente@leventepolyak.net: fix compatibility with KFENCE]
Signed-off-by: Levente Polyak <levente@leventepolyak.net>
[nicolas.bouchinet@ssi.gouv.fr: Fix conflicts with commit 782f8906f8057efc7]
[nicolas.bouchinet@ssi.gouv.fr: Take slab canary in account for object size]
Signed-off-by: Nicolas Bouchinet <nicolas.bouchinet@ssi.gouv.fr>
---
 mm/Kconfig |  17 ++++++++
 mm/slab.h  |   5 +++
 mm/slub.c  | 121 +++++++++++++++++++++++++++++++++++++++++++++++------
 3 files changed, 131 insertions(+), 12 deletions(-)

diff --git a/mm/Kconfig b/mm/Kconfig
index 9bcf4da8865a..9830506ebb5b 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -235,6 +235,23 @@ config SLAB_BUCKETS
 
 	  If unsure, say Y.
 
+config SLAB_CANARY
+	depends on SLUB
+	depends on !SLAB_MERGE_DEFAULT
+	bool "SLAB canaries"
+	default y
+	help
+	  Place canaries at the end of kernel slab allocations, sacrificing
+	  some performance and memory usage for security.
+
+	  Canaries can detect some forms of heap corruption when allocations
+	  are freed and as part of the HARDENED_USERCOPY feature. It provides
+	  basic use-after-free detection for HARDENED_USERCOPY.
+
+	  Canaries absorb small overflows (rendering them harmless), mitigate
+	  non-NUL terminated C string overflows on 64-bit via a guaranteed zero
+	  byte and provide basic double-free detection.
+
 config SLUB_STATS
 	default n
 	bool "Enable performance statistics"
diff --git a/mm/slab.h b/mm/slab.h
index e9c4cf834edb..075165fb0cf8 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -223,6 +223,11 @@ struct kmem_cache {
 	unsigned long random;
 #endif
 
+#ifdef CONFIG_SLAB_CANARY
+	unsigned long random_active;
+	unsigned long random_inactive;
+#endif
+
 #ifdef CONFIG_NUMA
 	/*
 	 * Defragmentation by allocating from a remote node.
diff --git a/mm/slub.c b/mm/slub.c
index f942d20efc65..69e51c9ac55d 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -44,6 +44,7 @@
 #include <linux/memcontrol.h>
 #include <linux/random.h>
 #include <linux/prandom.h>
+#include <linux/stackprotector.h>
 #include <kunit/test.h>
 #include <kunit/test-bug.h>
 #include <linux/sort.h>
@@ -742,6 +743,8 @@ static inline void set_orig_size(struct kmem_cache *s,
 		return;
 
 	p += get_info_end(s);
+	if (IS_ENABLED(CONFIG_SLAB_CANARY))
+		p = (void *)p + sizeof(void *);
 	p += sizeof(struct track) * 2;
 
 	*(unsigned long *)p = orig_size;
@@ -758,6 +761,8 @@ static inline unsigned long get_orig_size(struct kmem_cache *s, void *object)
 		return s->object_size;
 
 	p += get_info_end(s);
+	if (IS_ENABLED(CONFIG_SLAB_CANARY))
+		p = (void *)p + sizeof(void *);
 	p += sizeof(struct track) * 2;
 
 	return *(unsigned long *)p;
@@ -897,6 +902,33 @@ static inline unsigned int obj_exts_offset_in_object(struct kmem_cache *s)
 }
 #endif
 
+#ifdef CONFIG_SLAB_CANARY
+static inline unsigned long *get_canary(struct kmem_cache *s, void *object)
+{
+	return object + get_info_end(s);
+}
+
+static inline unsigned long get_canary_value(const void *canary, unsigned long value)
+{
+	return (value ^ (unsigned long)canary) & CANARY_MASK;
+}
+
+static inline void set_canary(struct kmem_cache *s, void *object, unsigned long value)
+{
+	unsigned long *canary = get_canary(s, object);
+	*canary = get_canary_value(canary, value);
+}
+
+static inline void check_canary(struct kmem_cache *s, void *object, unsigned long value)
+{
+	unsigned long *canary = get_canary(s, object);
+	BUG_ON(*canary != get_canary_value(canary, value));
+}
+#else
+#define set_canary(s, object, value)
+#define check_canary(s, object, value)
+#endif
+
 #ifdef CONFIG_SLUB_DEBUG
 
 /*
@@ -1026,6 +1058,9 @@ static struct track *get_track(struct kmem_cache *s, void *object,
 
 	p = object + get_info_end(s);
 
+	if (IS_ENABLED(CONFIG_SLAB_CANARY))
+		p = (void *)p + sizeof(void *);
+
 	return kasan_reset_tag(p + alloc);
 }
 
@@ -1187,6 +1222,9 @@ static void print_trailer(struct kmem_cache *s, struct slab *slab, u8 *p)
 
 	off = get_info_end(s);
 
+	if (IS_ENABLED(CONFIG_SLAB_CANARY))
+		off += sizeof(void *);
+
 	if (s->flags & SLAB_STORE_USER)
 		off += 2 * sizeof(struct track);
 
@@ -1355,10 +1393,11 @@ check_bytes_and_report(struct kmem_cache *s, struct slab *slab,
  *
  * [Metadata starts at object + s->inuse]
  *   - A. freelist pointer (if freeptr_outside_object)
- *   - B. alloc tracking (SLAB_STORE_USER)
- *   - C. free tracking (SLAB_STORE_USER)
- *   - D. original request size (SLAB_KMALLOC && SLAB_STORE_USER)
- *   - E. KASAN metadata (if enabled)
+ *   - B. Canary for SLAB_CANARY
+ *   - C. alloc tracking (SLAB_STORE_USER)
+ *   - D. free tracking (SLAB_STORE_USER)
+ *   - E. original request size (SLAB_KMALLOC && SLAB_STORE_USER)
+ *   - F. KASAN metadata (if enabled)
  *
  * [Mandatory padding] (if CONFIG_SLUB_DEBUG && SLAB_RED_ZONE)
  *   - One mandatory debug word to guarantee a minimum poisoned gap
@@ -1390,6 +1429,9 @@ static int check_pad_bytes(struct kmem_cache *s, struct slab *slab, u8 *p)
 {
 	unsigned long off = get_info_end(s);	/* The end of info */
 
+	if (IS_ENABLED(CONFIG_SLAB_CANARY))
+		off += sizeof(void *);
+
 	if (s->flags & SLAB_STORE_USER) {
 		/* We also have user information there */
 		off += 2 * sizeof(struct track);
@@ -2608,11 +2650,19 @@ struct rcu_delayed_free {
  */
 static __always_inline
 bool slab_free_hook(struct kmem_cache *s, void *x, bool init,
-		    bool after_rcu_delay)
+		    bool after_rcu_delay, bool canary)
 {
 	/* Are the object contents still accessible? */
 	bool still_accessible = (s->flags & SLAB_TYPESAFE_BY_RCU) && !after_rcu_delay;
 
+	/*
+	 * Postpone setting the inactive canary until the metadata
+	 * has potentially been cleared at the end of this function.
+	 */
+	if (canary) {
+		check_canary(s, x, s->random_active);
+	}
+
 	kmemleak_free_recursive(x, s->flags);
 	kmsan_slab_free(s, x);
 
@@ -2678,8 +2728,14 @@ bool slab_free_hook(struct kmem_cache *s, void *x, bool init,
 		if (!kasan_has_integrated_init())
 			memset(kasan_reset_tag(x), 0, orig_size);
 		rsize = (s->flags & SLAB_RED_ZONE) ? s->red_left_pad : 0;
+
+#ifdef CONFIG_SLAB_CANARY
+		memset((char *)kasan_reset_tag(x) + inuse + sizeof(void *), 0,
+		       s->size - inuse - sizeof(void *) - rsize);
+#else
 		memset((char *)kasan_reset_tag(x) + inuse, 0,
 		       s->size - inuse - rsize);
+#endif
 		/*
 		 * Restore orig_size, otherwise kmalloc redzone overwritten
 		 * would be reported
@@ -2689,6 +2745,11 @@ bool slab_free_hook(struct kmem_cache *s, void *x, bool init,
 		if (!IS_ENABLED(CONFIG_SLAB_SANITIZE_VERIFY) && s->ctor)
 			s->ctor(x);
 	}
+
+	if (canary) {
+		set_canary(s, x, s->random_inactive);
+	}
+
 	/* KASAN might put x into memory quarantine, delaying its reuse. */
 	return !kasan_slab_free(s, x, init, still_accessible, false);
 }
@@ -2704,7 +2765,7 @@ bool slab_free_freelist_hook(struct kmem_cache *s, void **head, void **tail,
 	bool init;
 
 	if (is_kfence_address(next)) {
-		slab_free_hook(s, next, false, false);
+		slab_free_hook(s, next, false, false, false);
 		return false;
 	}
 
@@ -2719,7 +2780,7 @@ bool slab_free_freelist_hook(struct kmem_cache *s, void **head, void **tail,
 		next = get_freepointer(s, object);
 
 		/* If object's reuse doesn't have to be delayed */
-		if (likely(slab_free_hook(s, object, init, false))) {
+		if (likely(slab_free_hook(s, object, init, false, true))) {
 			/* Move object to the new freelist */
 			set_freepointer(s, object, *head);
 			*head = object;
@@ -2756,6 +2817,7 @@ bool slab_free_freelist_hook(struct kmem_cache *s, void **head, void **tail,
 static void *setup_object(struct kmem_cache *s, void *object)
 {
 	setup_object_debug(s, object);
+	set_canary(s, object, s->random_inactive);
 	object = kasan_init_slab_obj(s, object);
 	if (unlikely(s->ctor) && !has_sanitize_verify(s)) {
 		kasan_unpoison_new_object(s, object);
@@ -4892,6 +4954,11 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list
 		init = slab_want_init_on_alloc(gfpflags, s);
 	}
 
+	if (object) {
+		check_canary(s, object, s->random_inactive);
+		set_canary(s, object, s->random_active);
+	}
+
 out:
 	/*
 	 * When init equals 'true', like for kzalloc() family, only
@@ -6199,10 +6266,16 @@ static __fastpath_inline
 void slab_free(struct kmem_cache *s, struct slab *slab, void *object,
 	       unsigned long addr)
 {
+	bool canary = true;
+
 	memcg_slab_free_hook(s, slab, &object, 1);
 	alloc_tagging_slab_free_hook(s, slab, &object, 1);
 
-	if (unlikely(!slab_free_hook(s, object, slab_want_init_on_free(s), false)))
+	/* Make sure canaries are not used on kfence objects. */
+	if (is_kfence_address(object))
+		canary = false;
+
+	if (unlikely(!slab_free_hook(s, object, slab_want_init_on_free(s), false, canary)))
 		return;
 
 	if (likely(!IS_ENABLED(CONFIG_NUMA) || slab_nid(slab) == numa_mem_id())
@@ -6220,11 +6293,16 @@ void slab_free(struct kmem_cache *s, struct slab *slab, void *object,
 static noinline
 void memcg_alloc_abort_single(struct kmem_cache *s, void *object)
 {
+	bool canary = true;
 	struct slab *slab = virt_to_slab(object);
 
 	alloc_tagging_slab_free_hook(s, slab, &object, 1);
 
-	if (likely(slab_free_hook(s, object, slab_want_init_on_free(s), false)))
+	/* Make sure canaries are not used on kfence objects. */
+	if (is_kfence_address(object))
+		canary = false;
+
+	if (likely(slab_free_hook(s, object, slab_want_init_on_free(s), false, canary)))
 		__slab_free(s, slab, object, object, 1, _RET_IP_);
 }
 #endif
@@ -6267,7 +6345,7 @@ static void slab_free_after_rcu_debug(struct rcu_head *rcu_head)
 		return;
 
 	/* resume freeing */
-	if (slab_free_hook(s, object, slab_want_init_on_free(s), true)) {
+	if (slab_free_hook(s, object, slab_want_init_on_free(s), true, true)) {
 		__slab_free(s, slab, object, object, 1, _THIS_IP_);
 		stat(s, FREE_SLOWPATH);
 	}
@@ -6363,7 +6441,7 @@ static inline size_t slab_ksize(struct slab *slab)
 	 * or any other metadata back there then we can
 	 * only use the space before that information.
 	 */
-	if (s->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_STORE_USER))
+	if ((s->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_STORE_USER)) || IS_ENABLED(CONFIG_SLAB_CANARY))
 		return s->inuse;
 	else if (obj_exts_in_object(s, slab))
 		return s->inuse;
@@ -7244,7 +7322,7 @@ static inline
 int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
 			    void **p)
 {
-	int i;
+	int i, k;
 
 	if (IS_ENABLED(CONFIG_SLUB_TINY) || kmem_cache_debug(s)) {
 		for (i = 0; i < size; i++) {
@@ -7278,6 +7356,13 @@ int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
 		}
 	}
 
+	for (k = 0; k < i; k++) {
+		if (!is_kfence_address(p[k])) {
+			check_canary(s, p[k], s->random_inactive);
+			set_canary(s, p[k], s->random_active);
+		}
+	}
+
 	return i;
 
 error:
@@ -7598,6 +7683,7 @@ static void early_kmem_cache_node_alloc(int node)
 #ifdef CONFIG_SLUB_DEBUG
 	init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
 #endif
+	set_canary(kmem_cache_node, n, kmem_cache_node->random_active);
 	n = kasan_slab_alloc(kmem_cache_node, n, GFP_KERNEL, false);
 	slab->freelist = get_freepointer(kmem_cache_node, n);
 	slab->inuse = 1;
@@ -7802,6 +7888,9 @@ static int calculate_sizes(struct kmem_cache_args *args, struct kmem_cache *s)
 		s->offset = ALIGN_DOWN(s->object_size / 2, sizeof(void *));
 	}
 
+	if (IS_ENABLED(CONFIG_SLAB_CANARY))
+		size += sizeof(void *);
+
 #ifdef CONFIG_SLUB_DEBUG
 	if (flags & SLAB_STORE_USER) {
 		/*
@@ -8139,6 +8228,10 @@ void __check_heap_object(const void *ptr, unsigned long n,
 		offset -= s->red_left_pad;
 	}
 
+	if (!is_kfence) {
+		check_canary(s, (void *)ptr - offset, s->random_active);
+	}
+
 	/* Allow address range falling entirely within usercopy region. */
 	if (offset >= s->useroffset &&
 	    offset - s->useroffset <= s->usersize &&
@@ -8505,6 +8598,10 @@ int do_kmem_cache_create(struct kmem_cache *s, const char *name,
 	s->flags = kmem_cache_flags(flags, s->name);
 #ifdef CONFIG_SLAB_FREELIST_HARDENED
 	s->random = get_random_long();
+#endif
+#ifdef CONFIG_SLAB_CANARY
+	s->random_active = get_random_long();
+	s->random_inactive = get_random_long();
 #endif
 	s->align = args->align;
 	s->ctor = args->ctor;

From 6864a9e584a757fcef24d48f535baac9e5cf1566 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Mon, 11 Jan 2016 15:23:55 +0000
Subject: [PATCH 063/109] security,perf: Allow further restriction of
 perf_event_open

When kernel.perf_event_open is set to 3 (or greater), disallow all
access to performance events by users without CAP_SYS_ADMIN or
CAP_PERFMON.
Add a Kconfig symbol CONFIG_SECURITY_PERF_EVENTS_RESTRICT that
makes this value the default.

This is based on a similar feature in grsecurity
(CONFIG_GRKERNSEC_PERF_HARDEN).  This version doesn't include making
the variable read-only.  It also allows enabling further restriction
at run-time regardless of whether the default is changed.

As part of the v5.5 linux-hardened rebase, this commit was adapted to
work with the new perf_event LSM hooks, introduced in da97e18458fb42
("perf_event: Add support for LSM and SELinux checks").

As part of the v5.8 linux-hardened rebase, this commit was adapted to
work with the new CAP_PERFMON capability.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
[levente@leventepolyak.net: Adapt to work with the new perf_event LSM hooks]
Signed-off-by: Levente Polyak <levente@leventepolyak.net>
[thibaut.sautereau@ssi.gouv.fr: Adapt to work with the new CAP_PERFMON capability]
Signed-off-by: Thibaut Sautereau <thibaut.sautereau@ssi.gouv.fr>
Signed-off-by: Levente Polyak <levente@leventepolyak.net>
---
 Documentation/admin-guide/sysctl/kernel.rst | 2 ++
 include/linux/perf_event.h                  | 8 ++++++++
 kernel/events/core.c                        | 7 ++++++-
 security/Kconfig                            | 9 +++++++++
 tools/perf/Documentation/security.txt       | 1 +
 tools/perf/util/evsel.c                     | 1 +
 6 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index 9aed74e65cf4..0a0384d2376e 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -1014,6 +1014,8 @@ with respect to CAP_PERFMON use cases.
 >=1  Disallow CPU event access by users without ``CAP_PERFMON``.
 
 >=2  Disallow kernel profiling by users without ``CAP_PERFMON``.
+
+>=3  Disallow use of any event by users without ``CAP_PERFMON``.
 ===  ==================================================================
 
 
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 48d851fbd8ea..b82bab945bf5 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1792,6 +1792,14 @@ static inline int perf_is_paranoid(void)
 
 extern int perf_allow_kernel(void);
 
+static inline int perf_allow_open(void)
+{
+	if (sysctl_perf_event_paranoid > 2 && !perfmon_capable())
+		return -EACCES;
+
+	return security_perf_event_open(PERF_SECURITY_OPEN);
+}
+
 static inline int perf_allow_cpu(void)
 {
 	if (sysctl_perf_event_paranoid > 0 && !perfmon_capable())
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 89b40e439717..b9205d90e6dd 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -491,8 +491,13 @@ static __always_inline bool is_guest_mediated_pmu_loaded(void)
  *   0 - disallow raw tracepoint access for unpriv
  *   1 - disallow cpu events for unpriv
  *   2 - disallow kernel profiling for unpriv
+ *   3 - disallow all unpriv perf event use
  */
+#ifdef CONFIG_SECURITY_PERF_EVENTS_RESTRICT
+int sysctl_perf_event_paranoid __read_mostly = 3;
+#else
 int sysctl_perf_event_paranoid __read_mostly = 2;
+#endif
 
 /* Minimum for 512 kiB + 1 user control page. 'free' kiB per user. */
 static int sysctl_perf_event_mlock __read_mostly = 512 + (PAGE_SIZE / 1024);
@@ -13829,7 +13834,7 @@ SYSCALL_DEFINE5(perf_event_open,
 		return err;
 
 	/* Do we allow access to perf_event_open(2) ? */
-	err = security_perf_event_open(PERF_SECURITY_OPEN);
+	err = perf_allow_open();
 	if (err)
 		return err;
 
diff --git a/security/Kconfig b/security/Kconfig
index c20928e74619..a99fb7c51d47 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -72,6 +72,15 @@ config MSEAL_SYSTEM_MAPPINGS
 	  For complete descriptions of memory sealing, please see
 	  Documentation/userspace-api/mseal.rst
 
+config SECURITY_PERF_EVENTS_RESTRICT
+	bool "Restrict unprivileged use of performance events"
+	depends on PERF_EVENTS
+	help
+	  If you say Y here, the kernel.perf_event_paranoid sysctl
+	  will be set to 3 by default, and no unprivileged use of the
+	  perf_event_open syscall will be permitted unless it is
+	  changed.
+
 config SECURITY
 	bool "Enable different security models"
 	depends on SYSFS
diff --git a/tools/perf/Documentation/security.txt b/tools/perf/Documentation/security.txt
index 4fe3b8b1958f..a7d88cc23a70 100644
--- a/tools/perf/Documentation/security.txt
+++ b/tools/perf/Documentation/security.txt
@@ -148,6 +148,7 @@ Perf tool provides a message similar to the one below:
    >= 0: Disallow raw and ftrace function tracepoint access
    >= 1: Disallow CPU event access
    >= 2: Disallow kernel profiling
+   >= 3: Disallow use of any event
    To make the adjusted perf_event_paranoid setting permanent preserve it
    in /etc/sysctl.conf (e.g. kernel.perf_event_paranoid = <setting>)
 
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index f59228c1a39e..d687678a45ea 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -4014,6 +4014,7 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target,
 		 ">= 0: Disallow raw and ftrace function tracepoint access\n"
 		 ">= 1: Disallow CPU event access\n"
 		 ">= 2: Disallow kernel profiling\n"
+		 ">= 3: Disallow use of any event\n"
 		 "To make the adjusted perf_event_paranoid setting permanent preserve it\n"
 		 "in /etc/sysctl.conf (e.g. kernel.perf_event_paranoid = <setting>)",
 		 perf_event_paranoid());

From 596735619ce2f11948cfb28510b31c3a0f679331 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Thu, 4 May 2017 14:45:59 -0400
Subject: [PATCH 064/109] enable SECURITY_PERF_EVENTS_RESTRICT by default

Signed-off-by: Daniel Micay <danielmicay@gmail.com>
---
 security/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/security/Kconfig b/security/Kconfig
index a99fb7c51d47..06c66ff55ba6 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -75,6 +75,7 @@ config MSEAL_SYSTEM_MAPPINGS
 config SECURITY_PERF_EVENTS_RESTRICT
 	bool "Restrict unprivileged use of performance events"
 	depends on PERF_EVENTS
+	default y
 	help
 	  If you say Y here, the kernel.perf_event_paranoid sysctl
 	  will be set to 3 by default, and no unprivileged use of the

From 077cf75598b17ebfc9495a6a2224cf983e794760 Mon Sep 17 00:00:00 2001
From: Serge Hallyn <serge.hallyn@canonical.com>
Date: Fri, 31 May 2013 19:12:12 +0100
Subject: [PATCH 065/109] userns: add sysctl to disallow unprivileged
 CLONE_NEWUSER by default

Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com>
[bwh: Remove unneeded binary sysctl bits]
Signed-off-by: Daniel Micay <danielmicay@gmail.com>
[thibaut.sautereau@ssi.gouv.fr: Adapt to sysctl code refactoring]
[nicolas.bouchinet@ssi.gouv.fr: Adapt proc_handler with an allowed range value between 0 and 1]
Signed-off-by: Thibaut Sautereau <thibaut.sautereau@ssi.gouv.fr>
Signed-off-by: Levente Polyak <levente@leventepolyak.net>
---
 include/linux/user_namespace.h |  4 ++++
 kernel/fork.c                  | 11 +++++++++++
 kernel/sysctl.c                | 15 +++++++++++++++
 kernel/user_namespace.c        |  3 +++
 4 files changed, 33 insertions(+)

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 9c3be157397e..bb05d4a07c46 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -173,6 +173,8 @@ static inline struct user_namespace *to_user_ns(struct ns_common *ns)
 
 #ifdef CONFIG_USER_NS
 
+extern int unprivileged_userns_clone;
+
 static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
 {
 	if (ns)
@@ -206,6 +208,8 @@ extern bool current_in_userns(const struct user_namespace *target_ns);
 struct ns_common *ns_get_owner(struct ns_common *ns);
 #else
 
+#define unprivileged_userns_clone 0
+
 static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
 {
 	return &init_user_ns;
diff --git a/kernel/fork.c b/kernel/fork.c
index 73622ad0665a..18a0936e6a12 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -83,6 +83,7 @@
 #include <linux/perf_event.h>
 #include <linux/posix-timers.h>
 #include <linux/user-return-notifier.h>
+#include <linux/user_namespace.h>
 #include <linux/oom.h>
 #include <linux/khugepaged.h>
 #include <linux/signalfd.h>
@@ -1987,6 +1988,10 @@ __latent_entropy struct task_struct *copy_process(
 	if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS))
 		return ERR_PTR(-EINVAL);
 
+	if ((clone_flags & CLONE_NEWUSER) && !unprivileged_userns_clone)
+		if (!capable(CAP_SYS_ADMIN))
+			return ERR_PTR(-EPERM);
+
 	/*
 	 * Thread groups must share signals as well, and detached threads
 	 * can only be started up within the thread group.
@@ -3151,6 +3156,12 @@ int ksys_unshare(unsigned long unshare_flags)
 	if (unshare_flags & CLONE_NEWNS)
 		unshare_flags |= CLONE_FS;
 
+	if ((unshare_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) {
+		err = -EPERM;
+		if (!capable(CAP_SYS_ADMIN))
+			goto bad_unshare_out;
+	}
+
 	err = check_unshare_flags(unshare_flags);
 	if (err)
 		goto bad_unshare_out;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index c9efb17cc255..57613459d7d0 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -22,6 +22,10 @@
 #include <linux/uaccess.h>
 #include <asm/processor.h>
 
+#ifdef CONFIG_USER_NS
+#include <linux/user_namespace.h>
+#endif
+
 /* shared constants to be used in various sysctls */
 const int sysctl_vals[] = { 0, 1, 2, 3, 4, 100, 200, 1000, 3000, INT_MAX, 65535, -1 };
 EXPORT_SYMBOL(sysctl_vals);
@@ -1372,6 +1376,17 @@ int proc_do_static_key(const struct ctl_table *table, int dir,
 }
 
 static const struct ctl_table sysctl_subsys_table[] = {
+#ifdef CONFIG_USER_NS
+	{
+		.procname	= "unprivileged_userns_clone",
+		.data		= &unprivileged_userns_clone,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1         = SYSCTL_ZERO,
+		.extra2         = SYSCTL_ONE,
+	},
+#endif
 #ifdef CONFIG_PROC_SYSCTL
 	{
 		.procname	= "sysctl_writes_strict",
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 0bed462e9b2a..c84977d660be 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -23,6 +23,9 @@
 #include <linux/sort.h>
 #include <linux/nstree.h>
 
+/* sysctl */
+int unprivileged_userns_clone;
+
 static struct kmem_cache *user_ns_cachep __ro_after_init;
 static DEFINE_MUTEX(userns_state_mutex);
 

From f673179348bd86ae747c624add1f1f6c19417c4b Mon Sep 17 00:00:00 2001
From: Levente Polyak <levente@leventepolyak.net>
Date: Wed, 31 Jul 2019 20:50:48 +0100
Subject: [PATCH 066/109] userns: add kconfig to set default for unprivileged
 CLONE_NEWUSER

When disabled, unprivileged users will not be able to create
new namespaces. Allowing users to create their own namespaces
has been part of several recent local privilege escalation
exploits, so if you need user namespaces but are
paranoid^Wsecurity-conscious you want to disable this.

By default unprivileged user namespaces are disabled.

Co-authored-by: Jan Alexander Steffens (heftig) <jan.steffens@gmail.com>
Signed-off-by: Jan Alexander Steffens (heftig) <jan.steffens@gmail.com>
Co-authored-by: Levente Polyak <levente@leventepolyak.net>
Signed-off-by: Levente Polyak <levente@leventepolyak.net>
---
 init/Kconfig            | 16 ++++++++++++++++
 kernel/user_namespace.c |  4 ++++
 2 files changed, 20 insertions(+)

diff --git a/init/Kconfig b/init/Kconfig
index 78479ebfcea2..4bd07806e65b 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1415,6 +1415,22 @@ config USER_NS
 
 	  If unsure, say N.
 
+config USER_NS_UNPRIVILEGED
+	bool "Allow unprivileged users to create namespaces"
+	depends on USER_NS
+	default n
+	help
+	  When disabled, unprivileged users will not be able to create
+	  new namespaces. Allowing users to create their own namespaces
+	  has been part of several recent local privilege escalation
+	  exploits, so if you need user namespaces but are
+	  paranoid^Wsecurity-conscious you want to disable this.
+
+	  This setting can be overridden at runtime via the
+	  kernel.unprivileged_userns_clone sysctl.
+
+	  If unsure, say N.
+
 config PID_NS
 	bool "PID Namespaces"
 	default y
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index c84977d660be..b54a9a25d1c3 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -24,7 +24,11 @@
 #include <linux/nstree.h>
 
 /* sysctl */
+#ifdef CONFIG_USER_NS_UNPRIVILEGED
+int unprivileged_userns_clone = 1;
+#else
 int unprivileged_userns_clone;
+#endif
 
 static struct kmem_cache *user_ns_cachep __ro_after_init;
 static DEFINE_MUTEX(userns_state_mutex);

From 0ea2e04a27d977b1768d6a4a28f5a974b91090f3 Mon Sep 17 00:00:00 2001
From: Emese Revfy <re.emese@gmail.com>
Date: Tue, 31 May 2016 01:34:02 +0200
Subject: [PATCH 067/109] Add the extra_latent_entropy kernel parameter

When extra_latent_entropy is passed on the kernel command line,
entropy will be extracted from up to the first 4GB of RAM while the
runtime memory allocator is being initialized.

Based on work created by the PaX Team.

Signed-off-by: Emese Revfy <re.emese@gmail.com>
Signed-off-by: Daniel Micay <danielmicay@gmail.com>
Signed-off-by: Levente Polyak <levente@leventepolyak.net>
---
 .../admin-guide/kernel-parameters.txt         |  5 ++++
 mm/page_alloc.c                               | 24 +++++++++++++++++++
 scripts/gcc-plugins/Kconfig                   |  5 ++++
 3 files changed, 34 insertions(+)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 03a550630644..4cebb97fe82f 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5055,6 +5055,11 @@ Kernel parameters
 			the specified number of seconds.  This is to be used if
 			your oopses keep scrolling off the screen.
 
+	extra_latent_entropy
+			Enable a very simple form of latent entropy extraction
+			from the first 4GB of memory as the bootmem allocator
+			passes the memory pages to the buddy allocator.
+
 	pcbit=		[HW,ISDN]
 
 	pci=option[,option...]	[PCI,EARLY] various PCI subsystem options.
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index bce29a3574fa..99579c0673ea 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -213,6 +213,15 @@ EXPORT_PER_CPU_SYMBOL(_numa_mem_);
 
 static DEFINE_MUTEX(pcpu_drain_mutex);
 
+bool __meminitdata extra_latent_entropy;
+
+static int __init setup_extra_latent_entropy(char *str)
+{
+	extra_latent_entropy = true;
+	return 0;
+}
+early_param("extra_latent_entropy", setup_extra_latent_entropy);
+
 #ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY
 volatile unsigned long latent_entropy __latent_entropy;
 EXPORT_SYMBOL(latent_entropy);
@@ -1653,6 +1662,21 @@ void __meminit __free_pages_core(struct page *page, unsigned int order,
 			set_page_count(p, 0);
 		}
 
+		if (extra_latent_entropy && !PageHighMem(page) && page_to_pfn(page) < 0x100000) {
+			unsigned long hash = 0;
+			size_t index, end = PAGE_SIZE * nr_pages / sizeof hash;
+			const unsigned long *data = lowmem_page_address(page);
+
+			for (index = 0; index < end; index++)
+				hash ^= hash + data[index];
+#ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY
+			latent_entropy ^= hash;
+			add_device_randomness((const void *)&latent_entropy, sizeof(latent_entropy));
+#else
+			add_device_randomness((const void *)&hash, sizeof(hash));
+#endif
+		}
+
 		/* memblock adjusts totalram_pages() manually. */
 		atomic_long_add(nr_pages, &page_zone(page)->managed_pages);
 	}
diff --git a/scripts/gcc-plugins/Kconfig b/scripts/gcc-plugins/Kconfig
index 6b34ba19358d..d83e715c9d40 100644
--- a/scripts/gcc-plugins/Kconfig
+++ b/scripts/gcc-plugins/Kconfig
@@ -29,6 +29,11 @@ config GCC_PLUGIN_LATENT_ENTROPY
 	  is some slowdown of the boot process (about 0.5%) and fork and
 	  irq processing.
 
+	  When extra_latent_entropy is passed on the kernel command line,
+	  entropy will be extracted from up to the first 4GB of RAM while the
+	  runtime memory allocator is being initialized.  This costs even more
+	  slowdown of the boot process.
+
 	  Note that entropy extracted this way is not cryptographically
 	  secure!
 

From 0aa9fcd887f2262ba14b08957b45f6d370650064 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Mon, 15 May 2017 23:45:34 -0400
Subject: [PATCH 068/109] ata: avoid null pointer dereference on bug

Extracted from PaX.

[nicolas.bouchinet@ssi.gouv.fr: BUG_ON NULL ptr deref removed in 5bb52d926598a0]
Signed-off-by: Nicolas Bouchinet <nicolas.bouchinet@ssi.gouv.fr>
Signed-off-by: Daniel Micay <danielmicay@gmail.com>
---
 drivers/ata/libata-core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 374993031895..ef51e7adf101 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4929,6 +4929,7 @@ void __ata_qc_complete(struct ata_queued_cmd *qc)
 	struct ata_port *ap;
 	struct ata_link *link;
 
+	BUG_ON(qc == NULL); /* ata_qc_from_tag _might_ return NULL */
 	if (WARN_ON_ONCE(!(qc->flags & ATA_QCFLAG_ACTIVE)))
 		return;
 

From 6e385681278d9cf50db08c6bf4a985b9aa6b97b0 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Mon, 15 May 2017 23:51:12 -0400
Subject: [PATCH 069/109] sanity check for negative length in nla_memcpy

Extracted from PaX.

Signed-off-by: Daniel Micay <danielmicay@gmail.com>
---
 lib/nlattr.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lib/nlattr.c b/lib/nlattr.c
index be9c576b6e2d..484d839bcf5e 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -837,6 +837,8 @@ int nla_memcpy(void *dest, const struct nlattr *src, int count)
 {
 	int minlen = min_t(int, count, nla_len(src));
 
+	BUG_ON(minlen < 0);
+
 	memcpy(dest, nla_data(src), minlen);
 	if (count > minlen)
 		memset(dest + minlen, 0, count - minlen);

From 9787b97285b3cbd447d947ed7829ddf077967566 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Tue, 16 May 2017 00:59:48 -0400
Subject: [PATCH 070/109] PaX shadow cr4 sanity check (essentially a revert)

Signed-off-by: Daniel Micay <danielmicay@gmail.com>
[levente@leventepolyak.net: Adapt to cpu_tlbstate moved out-of-line]
Signed-off-by: Levente Polyak <levente@leventepolyak.net>
[thibaut.sautereau@ssi.gouv.fr: Move BUG_ON from native_flush_tlb_global() to
				new __native_tlb_flush_global() helper]
Signed-off-by: Thibaut Sautereau <thibaut.sautereau@ssi.gouv.fr>
---
 arch/x86/include/asm/tlbflush.h | 1 +
 arch/x86/kernel/cpu/common.c    | 1 +
 arch/x86/kernel/process.c       | 1 +
 3 files changed, 3 insertions(+)

diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 5a3cdc439e38..c11396cc1a44 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -484,6 +484,7 @@ static inline void cpu_tlbstate_update_lam(unsigned long lam, u64 untag_mask)
 
 static inline void __native_tlb_flush_global(unsigned long cr4)
 {
+	BUG_ON(cr4 != __read_cr4());
 	native_write_cr4(cr4 ^ X86_CR4_PGE);
 	native_write_cr4(cr4);
 }
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index ec0670114efa..0f7c2a75ba1a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -494,6 +494,7 @@ EXPORT_SYMBOL_GPL(native_write_cr4);
 void cr4_update_irqsoff(unsigned long set, unsigned long clear)
 {
 	unsigned long newval, cr4 = this_cpu_read(cpu_tlbstate.cr4);
+	BUG_ON(cr4 != __read_cr4());
 
 	lockdep_assert_irqs_disabled();
 
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 4c718f8adc59..ea45acfe4f03 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -706,6 +706,7 @@ void speculation_ctrl_update_current(void)
 static inline void cr4_toggle_bits_irqsoff(unsigned long mask)
 {
 	unsigned long newval, cr4 = this_cpu_read(cpu_tlbstate.cr4);
+	BUG_ON(cr4 != __read_cr4());
 
 	newval = cr4 ^ mask;
 	if (newval != cr4) {

From 4d51c545f2e355a22af75288ee685fd4653bd65c Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Sun, 9 Jul 2017 17:53:23 -0400
Subject: [PATCH 071/109] add writable function pointer detection

Taken from the public PaX patches.

Signed-off-by: Daniel Micay <danielmicay@gmail.com>
Signed-off-by: Levente Polyak <levente@leventepolyak.net>
---
 scripts/mod/modpost.c | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index c3bc801d8b2d..89fe6f630793 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -49,6 +49,7 @@ static bool sec_mismatch_warn_only = true;
 /* Trim EXPORT_SYMBOLs that are unused by in-tree modules */
 static bool trim_unused_exports;
 
+static int writable_fptr_count = 0;
 /* ignore missing files */
 static bool ignore_missing_files;
 /* If set to 1, only warn (instead of error) about missing ns imports */
@@ -814,6 +815,7 @@ enum mismatch {
 	ANY_INIT_TO_ANY_EXIT,
 	ANY_EXIT_TO_ANY_INIT,
 	EXTABLE_TO_NON_TEXT,
+	DATA_TO_TEXT
 };
 
 /**
@@ -870,6 +872,12 @@ static const struct sectioncheck sectioncheck[] = {
 	.bad_tosec = { ".altinstr_replacement", NULL },
 	.good_tosec = {ALL_TEXT_SECTIONS , NULL},
 	.mismatch = EXTABLE_TO_NON_TEXT,
+},
+/* Do not reference code from writable data */
+{
+	.fromsec = { DATA_SECTIONS, NULL },
+	.bad_tosec = { ALL_TEXT_SECTIONS, NULL },
+	.mismatch = DATA_TO_TEXT
 }
 };
 
@@ -1035,7 +1043,10 @@ static void default_mismatch_handler(const char *modname, struct elf_info *elf,
 	if (!secref_whitelist(fromsec, fromsym, tosec, tosym))
 		return;
 
-	sec_mismatch_count++;
+	if (mismatch->mismatch == DATA_TO_TEXT)
+		writable_fptr_count++;
+	else
+		sec_mismatch_count++;
 
 	if (!tosym[0])
 		snprintf(taddr_str, sizeof(taddr_str), "0x%x", (unsigned int)taddr);
@@ -1069,6 +1080,11 @@ static void default_mismatch_handler(const char *modname, struct elf_info *elf,
 		else
 			error("%s+0x%lx references non-executable section '%s'\n",
 			      fromsec, (long)faddr, tosec);
+	} else if (mismatch->mismatch == DATA_TO_TEXT) {
+		fprintf(stderr,
+		"The %s:%s references\n"
+		"the %s:%s\n",
+		fromsec, fromsym, tosec, tosym);
 	}
 }
 
@@ -2387,5 +2403,9 @@ int main(int argc, char **argv)
 		warn("suppressed %u unresolved symbol warnings because there were too many)\n",
 		     nr_unresolved - MAX_UNRESOLVED_REPORTS);
 
+	if (writable_fptr_count)
+		warn("modpost: Found %d writable function pointer(s).\n",
+				writable_fptr_count);
+
 	return error_occurred ? 1 : 0;
 }

From 26a2762b46109dbf947d3865efd88f57db1182ff Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Sun, 9 Jul 2017 17:20:29 -0400
Subject: [PATCH 072/109] support overriding early audit kernel cmdline

---
 kernel/audit.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/kernel/audit.c b/kernel/audit.c
index d3a8268998d7..485d169d8dc4 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1786,6 +1786,9 @@ static int __init audit_enable(char *str)
 
 	if (audit_default == AUDIT_OFF)
 		audit_initialized = AUDIT_DISABLED;
+	else if (!audit_ever_enabled)
+		audit_initialized = AUDIT_UNINITIALIZED;
+
 	if (audit_set_enabled(audit_default))
 		pr_err("audit: error setting audit state (%d)\n",
 		       audit_default);

From 63093edc4f756367642a1d52b050e2b7a1767b33 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Sat, 26 Aug 2017 20:16:03 -0400
Subject: [PATCH 073/109] Revert "mm: revert x86_64 and arm64 ELF_ET_DYN_BASE
 base changes"

This reverts commit aab425db4279aeb83b7911693f0cccbd3644c9fd.
---
 arch/arm64/include/asm/elf.h | 8 ++------
 arch/x86/include/asm/elf.h   | 4 ++--
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index d2779d604c7b..c2fcacca8361 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -124,14 +124,10 @@
 
 /*
  * This is the base location for PIE (ET_DYN with INTERP) loads. On
- * 64-bit, this is above 4GB to leave the entire 32-bit address
+ * 64-bit, this is raised to 4GB to leave the entire 32-bit address
  * space open for things that want to use the area for 32-bit pointers.
  */
-#ifdef CONFIG_ARM64_FORCE_52BIT
-#define ELF_ET_DYN_BASE		(2 * TASK_SIZE_64 / 3)
-#else
-#define ELF_ET_DYN_BASE		(2 * DEFAULT_MAP_WINDOW_64 / 3)
-#endif /* CONFIG_ARM64_FORCE_52BIT */
+#define ELF_ET_DYN_BASE		0x100000000UL
 
 #ifndef __ASSEMBLER__
 
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 2ba5f166e58f..c251278ba009 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -229,11 +229,11 @@ extern int force_personality32;
 
 /*
  * This is the base location for PIE (ET_DYN with INTERP) loads. On
- * 64-bit, this is above 4GB to leave the entire 32-bit address
+ * 64-bit, this is raised to 4GB to leave the entire 32-bit address
  * space open for things that want to use the area for 32-bit pointers.
  */
 #define ELF_ET_DYN_BASE		(mmap_is_ia32() ? 0x000400000UL : \
-						  (DEFAULT_MAP_WINDOW / 3 * 2))
+						  0x100000000UL)
 
 /* This yields a mask that user programs can use to figure out what
    instruction set this CPU supports.  This could be done in user space,

From 60d1ea047630914232c5dacfa8b397b06b6d80c3 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Sun, 21 May 2017 20:30:44 -0400
Subject: [PATCH 074/109] x86: determine stack entropy based on mmap entropy

Stack mapping entropy is currently hard-wired to 11 bits of entropy on
32-bit and 22 bits of entropy on 64-bit. The stack itself gains an extra
8 bits of entropy from lower bit randomization within 16 byte alignment
constraints. The argument block could have all lower bits randomized but
it currently only gets the mapping randomization.

Rather than hard-wiring values this switches to using the mmap entropy
configuration like the mmap base and executable base, resulting in a
range of 8 to 16 bits on 32-bit and 28 to 32 bits on 64-bit depending on
kernel configuration and overridable via the sysctl entries.

It's worth noting that since these kernel configuration options default
to the minimum supported entropy value, the entropy on 32-bit will drop
from 11 to 8 bits for builds using the defaults. However, following the
configuration seems like the right thing to do regardless. At the very
least, changing the defaults for COMPAT (32-bit processes on 64-bit)
should be considered due to the larger address space compared to real
32-bit.

Signed-off-by: Daniel Micay <danielmicay@gmail.com>
---
 arch/x86/include/asm/elf.h | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index c251278ba009..8189290a13a5 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -315,8 +315,8 @@ extern unsigned long get_sigframe_size(void);
 
 #ifdef CONFIG_X86_32
 
-#define __STACK_RND_MASK(is32bit) (0x7ff)
-#define STACK_RND_MASK (0x7ff)
+#define __STACK_RND_MASK(is32bit) ((1UL << mmap_rnd_bits) - 1)
+#define STACK_RND_MASK ((1UL << mmap_rnd_bits) - 1)
 
 #define ARCH_DLINFO		ARCH_DLINFO_IA32
 
@@ -325,7 +325,11 @@ extern unsigned long get_sigframe_size(void);
 #else /* CONFIG_X86_32 */
 
 /* 1GB for 64bit, 8MB for 32bit */
-#define __STACK_RND_MASK(is32bit) ((is32bit) ? 0x7ff : 0x3fffff)
+#ifdef CONFIG_COMPAT
+#define __STACK_RND_MASK(is32bit) ((is32bit) ? (1UL << mmap_rnd_compat_bits) - 1 : (1UL << mmap_rnd_bits) - 1)
+#else
+#define __STACK_RND_MASK(is32bit) ((1UL << mmap_rnd_bits) - 1)
+#endif
 #define STACK_RND_MASK __STACK_RND_MASK(mmap_is_ia32())
 
 #define ARCH_DLINFO							\

From ade5d137f7861519c2c51a4c336289d9cd870a9a Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Mon, 22 May 2017 05:06:20 -0400
Subject: [PATCH 075/109] arm64: determine stack entropy based on mmap entropy

Stack mapping entropy is currently hard-wired to 11 bits of entropy on
32-bit and 18 bits of entropy on 64-bit. The stack itself gains an extra
8 bits of entropy from lower bit randomization within 16 byte alignment
constraints. The argument block could have all lower bits randomized but
it currently only gets the mapping randomization.

Rather than hard-wiring values this switches to using the mmap entropy
configuration like the mmap base and executable base, resulting in a
range of 8 to 16 bits on 32-bit and 18 to 24 bits on 64-bit (with 4k
pages and 3 level page tables) depending on kernel configuration and
overridable via the sysctl entries.

It's worth noting that since these kernel configuration options default
to the minimum supported entropy value, the entropy on 32-bit will drop
from 11 to 8 bits for builds using the defaults. However, following the
configuration seems like the right thing to do regardless. At the very
least, changing the defaults for COMPAT (32-bit processes on 64-bit)
should be considered due to the larger address space compared to real
32-bit.

Signed-off-by: Daniel Micay <danielmicay@gmail.com>
---
 arch/arm64/include/asm/elf.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index c2fcacca8361..ed0d158955e2 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -185,10 +185,10 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm,
 /* 1GB of VA */
 #ifdef CONFIG_COMPAT
 #define STACK_RND_MASK			(test_thread_flag(TIF_32BIT) ? \
-						0x7ff >> (PAGE_SHIFT - 12) : \
-						0x3ffff >> (PAGE_SHIFT - 12))
+						((1UL << mmap_rnd_compat_bits) - 1) >> (PAGE_SHIFT - 12) : \
+						((1UL << mmap_rnd_bits) - 1) >> (PAGE_SHIFT - 12))
 #else
-#define STACK_RND_MASK			(0x3ffff >> (PAGE_SHIFT - 12))
+#define STACK_RND_MASK			(((1UL << mmap_rnd_bits) - 1) >> (PAGE_SHIFT - 12))
 #endif
 
 #ifdef __AARCH64EB__

From 779a7c10613256f5f4615da6f1a88f97563f5b98 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Thu, 11 May 2017 16:02:49 -0400
Subject: [PATCH 076/109] randomize lower bits of the argument block

This was based on the PaX RANDUSTACK feature in grsecurity, where all of
the lower bits are randomized. PaX keeps 16-byte alignment.

Signed-off-by: Daniel Micay <danielmicay@gmail.com>
[levente@leventepolyak.net: do not randomize with ADDR_NO_RANDOMIZE personality]
[levente@leventepolyak.net: adjust for mm: abstract initial stack setup to mm subsystem]
Signed-off-by: Levente Polyak <levente@leventepolyak.net>
[nicolas.bouchinet@oss.cyber.gouv.fr: mm initialisation has moved to mm/vma_exec.c]
Signed-off-by: Nicolas Bouchinet <nicolas.bouchinet@oss.cyber.gouv.fr>
---
 mm/vma_exec.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/mm/vma_exec.c b/mm/vma_exec.c
index 8134e1afca68..4e747ea55e52 100644
--- a/mm/vma_exec.c
+++ b/mm/vma_exec.c
@@ -7,6 +7,7 @@
 
 #include "vma_internal.h"
 #include "vma.h"
+#include <linux/random.h>
 
 /*
  * Relocate a VMA downwards by shift bytes. There cannot be any VMAs between
@@ -151,6 +152,8 @@ int create_init_stack_vma(struct mm_struct *mm, struct vm_area_struct **vmap,
 	mmap_write_unlock(mm);
 	*vmap = vma;
 	*top_mem_p = vma->vm_end - sizeof(void *);
+	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
+		*top_mem_p ^= get_random_u32() & ~PAGE_MASK;
 	return 0;
 
 err:

From 40c1dd89598b627d4fa0501d3aa6279caac26633 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Tue, 30 May 2017 18:03:30 -0400
Subject: [PATCH 077/109] support randomizing the lower bits of brk

This adds support for arch_randomize_brk implementations not performing
page alignment in order to randomize the lower bits of the brk heap.

This idea is taken from PaX but the approach is different. This reuses
the existing code and avoids forcing early creation of the heap mapping,
avoiding mapping it if it's not used which is the case with many modern
allocators based solely on mmap.

The malloc implementation can be relied upon to align this as needed to
the requirements it has, so using 16 byte alignment here is unnecessary.

Signed-off-by: Daniel Micay <danielmicay@gmail.com>
---
 mm/mmap.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/mm/mmap.c b/mm/mmap.c
index 843160946aa5..dd6a759ec059 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -153,6 +153,13 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
 
 	newbrk = PAGE_ALIGN(brk);
 	oldbrk = PAGE_ALIGN(mm->brk);
+	/* properly handle unaligned min_brk as an empty heap */
+	if (min_brk & ~PAGE_MASK) {
+		if (brk == min_brk)
+			newbrk -= PAGE_SIZE;
+		if (mm->brk == min_brk)
+			oldbrk -= PAGE_SIZE;
+	}
 	if (oldbrk == newbrk) {
 		mm->brk = brk;
 		goto success;

From 6a7a1c8ebe52cb8aa4da5e1d59445439fba19ea1 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Thu, 1 Jun 2017 03:22:38 -0400
Subject: [PATCH 078/109] mm: randomize lower bits of brk

Per PaX, but for this alternate brk randomization approach.

As part of the v5.4 linux-hardened rebase, this commit was adapted from
the arm64 specific brk randomization to all arches that use the generic
topdown mmap layout functions, introduced in e7142bf5d231 ("arm64, mm:
make randomization selected by generic topdown mmap layout").

Signed-off-by: Daniel Micay <danielmicay@gmail.com>
Signed-off-by: Levente Polyak <levente@leventepolyak.net>
---
 mm/util.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/util.c b/mm/util.c
index a14de66c9458..28d7931944c2 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -391,9 +391,9 @@ unsigned long __weak arch_randomize_brk(struct mm_struct *mm)
 {
 	/* Is the current task 32bit ? */
 	if (!IS_ENABLED(CONFIG_64BIT) || is_compat_task())
-		return randomize_page(mm->brk, SZ_32M);
+		return mm->brk + get_random_long() % SZ_32M;
 
-	return randomize_page(mm->brk, SZ_1G);
+	return mm->brk + get_random_long() % SZ_1G;
 }
 
 unsigned long arch_mmap_rnd(void)

From 7a918f563af8bdf18227283c8decb04ac39b680f Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Thu, 1 Jun 2017 03:23:06 -0400
Subject: [PATCH 079/109] x86: randomize lower bits of brk

Per PaX, but for this alternate brk randomization approach.

Signed-off-by: Daniel Micay <danielmicay@gmail.com>
---
 arch/x86/kernel/process.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index ea45acfe4f03..b92b3474470a 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -1027,9 +1027,9 @@ unsigned long arch_align_stack(unsigned long sp)
 unsigned long arch_randomize_brk(struct mm_struct *mm)
 {
 	if (mmap_is_ia32())
-		return randomize_page(mm->brk, SZ_32M);
+		return mm->brk + get_random_long() % SZ_32M;
 
-	return randomize_page(mm->brk, SZ_1G);
+	return mm->brk + get_random_long() % SZ_1G;
 }
 
 /*

From e5ce446baf60a94c17d525f2e616afe4f9ffb3f7 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Thu, 1 Jun 2017 03:23:39 -0400
Subject: [PATCH 080/109] mm: guarantee brk gap is at least one page

Per PaX, but for this alternate brk randomization approach.

As part of the v5.4 linux-hardened rebase, this commit was adapted from
the arm64 specific brk randomization to all arches that use the generic
topdown mmap layout functions, introduced in e7142bf5d231 ("arm64, mm:
make randomization selected by generic topdown mmap layout").

Signed-off-by: Daniel Micay <danielmicay@gmail.com>
Signed-off-by: Levente Polyak <levente@leventepolyak.net>
---
 mm/util.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/util.c b/mm/util.c
index 28d7931944c2..e39fe7b338c9 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -391,9 +391,9 @@ unsigned long __weak arch_randomize_brk(struct mm_struct *mm)
 {
 	/* Is the current task 32bit ? */
 	if (!IS_ENABLED(CONFIG_64BIT) || is_compat_task())
-		return mm->brk + get_random_long() % SZ_32M;
+		return mm->brk + get_random_long() % SZ_32M + PAGE_SIZE;
 
-	return mm->brk + get_random_long() % SZ_1G;
+	return mm->brk + get_random_long() % SZ_1G + PAGE_SIZE;
 }
 
 unsigned long arch_mmap_rnd(void)

From 7d63dcdd1fdd813369efcdec7dc8bea565cafa5e Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Thu, 1 Jun 2017 03:23:48 -0400
Subject: [PATCH 081/109] x86: guarantee brk gap is at least one page

Per PaX, but for this alternate brk randomization approach.

Signed-off-by: Daniel Micay <danielmicay@gmail.com>
---
 arch/x86/kernel/process.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index b92b3474470a..63c96edc60a5 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -1027,9 +1027,9 @@ unsigned long arch_align_stack(unsigned long sp)
 unsigned long arch_randomize_brk(struct mm_struct *mm)
 {
 	if (mmap_is_ia32())
-		return mm->brk + get_random_long() % SZ_32M;
+		return mm->brk + get_random_long() % SZ_32M + PAGE_SIZE;
 
-	return mm->brk + get_random_long() % SZ_1G;
+	return mm->brk + get_random_long() % SZ_1G + PAGE_SIZE;
 }
 
 /*

From b7d36ecd61762f5b9d8fbe4a875c30132d86e3a5 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Tue, 16 May 2017 18:26:10 -0400
Subject: [PATCH 082/109] restrict device timing side channels

Based on the public grsecurity patches.

Signed-off-by: Thibaut Sautereau <thibaut.sautereau@ssi.gouv.fr>
[levente@leventepolyak.net: move sysctl from kernel into fs]
Signed-off-by: Levente Polyak <levente@leventepolyak.net>
Signed-off-by: Nicolas Bouchinet <nicolas.bouchinet@ssi.gouv.fr>
---
 fs/inode.c                 | 13 +++++++++++++
 fs/stat.c                  | 23 ++++++++++++++++++++---
 include/linux/capability.h |  5 +++++
 include/linux/fs.h         | 11 +++++++++++
 include/linux/fsnotify.h   |  3 +++
 kernel/capability.c        |  6 ++++++
 6 files changed, 58 insertions(+), 3 deletions(-)

diff --git a/fs/inode.c b/fs/inode.c
index cc12b68e021b..9209fa27b417 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -168,6 +168,10 @@ late_initcall(mg_debugfs_init);
 
 #endif /* CONFIG_DEBUG_FS */
 
+/* sysctl */
+int device_sidechannel_restrict __read_mostly = 1;
+EXPORT_SYMBOL(device_sidechannel_restrict);
+
 /*
  * Handle nr_inode sysctl
  */
@@ -200,6 +204,15 @@ static const struct ctl_table inodes_sysctls[] = {
 		.mode		= 0444,
 		.proc_handler	= proc_nr_inodes,
 	},
+	{
+		.procname	= "device_sidechannel_restrict",
+		.data		= &device_sidechannel_restrict,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax_sysadmin,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
 };
 
 static int __init init_fs_inode_sysctls(void)
diff --git a/fs/stat.c b/fs/stat.c
index 89909746bed1..92642e583e8c 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -52,7 +52,10 @@ void fill_mg_cmtime(struct kstat *stat, u32 request_mask, struct inode *inode)
 		return;
 	}
 
-	stat->mtime = inode_get_mtime(inode);
+	if (is_sidechannel_device(inode) && !capable_noaudit(CAP_MKNOD))
+		stat->mtime = inode_get_ctime(inode);
+	else
+		stat->mtime = inode_get_mtime(inode);
 	stat->ctime.tv_sec = inode->i_ctime_sec;
 	stat->ctime.tv_nsec = (u32)atomic_read(pcn);
 	if (!(stat->ctime.tv_nsec & I_CTIME_QUERIED))
@@ -84,6 +87,7 @@ void generic_fillattr(struct mnt_idmap *idmap, u32 request_mask,
 {
 	vfsuid_t vfsuid = i_uid_into_vfsuid(idmap, inode);
 	vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode);
+	bool sidechannel_device = false;
 
 	stat->dev = inode->i_sb->s_dev;
 	stat->ino = inode->i_ino;
@@ -93,13 +97,22 @@ void generic_fillattr(struct mnt_idmap *idmap, u32 request_mask,
 	stat->gid = vfsgid_into_kgid(vfsgid);
 	stat->rdev = inode->i_rdev;
 	stat->size = i_size_read(inode);
-	stat->atime = inode_get_atime(inode);
+
+	if (is_sidechannel_device(inode) && !capable_noaudit(CAP_MKNOD))
+		sidechannel_device = true;
+	if (sidechannel_device)
+		stat->atime = inode_get_ctime(inode);
+	else
+		stat->atime = inode_get_atime(inode);
 
 	if (is_mgtime(inode)) {
 		fill_mg_cmtime(stat, request_mask, inode);
 	} else {
 		stat->ctime = inode_get_ctime(inode);
-		stat->mtime = inode_get_mtime(inode);
+		if (sidechannel_device)
+			stat->mtime = inode_get_ctime(inode);
+		else
+			stat->mtime = inode_get_mtime(inode);
 	}
 
 	stat->blksize = i_blocksize(inode);
@@ -212,6 +225,10 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat,
 
 		ret = inode->i_op->getattr(idmap, path, stat, request_mask,
 				query_flags);
+		if (!ret && is_sidechannel_device(inode) && !capable_noaudit(CAP_MKNOD)) {
+			stat->atime = stat->ctime;
+			stat->mtime = stat->ctime;
+		}
 		if (ret)
 			return ret;
 	} else {
diff --git a/include/linux/capability.h b/include/linux/capability.h
index 37db92b3d6f8..873416ba884c 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -145,6 +145,7 @@ extern bool has_capability_noaudit(struct task_struct *t, int cap);
 extern bool has_ns_capability_noaudit(struct task_struct *t,
 				      struct user_namespace *ns, int cap);
 extern bool capable(int cap);
+extern bool capable_noaudit(int cap);
 extern bool ns_capable(struct user_namespace *ns, int cap);
 extern bool ns_capable_noaudit(struct user_namespace *ns, int cap);
 extern bool ns_capable_setid(struct user_namespace *ns, int cap);
@@ -167,6 +168,10 @@ static inline bool capable(int cap)
 {
 	return true;
 }
+static inline bool capable_noaudit(int cap)
+{
+	return true;
+}
 static inline bool ns_capable(struct user_namespace *ns, int cap)
 {
 	return true;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ef17f9e211e4..ba776fdcbee5 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3656,4 +3656,15 @@ static inline bool extensible_ioctl_valid(unsigned int cmd_a,
 	return true;
 }
 
+extern int device_sidechannel_restrict;
+
+static inline bool is_sidechannel_device(const struct inode *inode)
+{
+	umode_t mode;
+	if (!device_sidechannel_restrict)
+		return false;
+	mode = inode->i_mode;
+	return ((S_ISCHR(mode) || S_ISBLK(mode)) && (mode & (S_IROTH | S_IWOTH)));
+}
+
 #endif /* _LINUX_FS_H */
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 079c18bcdbde..eb8a9e769394 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -124,6 +124,9 @@ static inline int fsnotify_file(struct file *file, __u32 mask)
 	if (FMODE_FSNOTIFY_NONE(file->f_mode))
 		return 0;
 
+	if (mask & (FS_ACCESS | FS_MODIFY) && is_sidechannel_device(file_inode(file)))
+		return 0;
+
 	return fsnotify_path(&file->f_path, mask);
 }
 
diff --git a/kernel/capability.c b/kernel/capability.c
index 829f49ae07b9..5bb7ee4028ad 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -416,6 +416,12 @@ bool capable(int cap)
 	return ns_capable(&init_user_ns, cap);
 }
 EXPORT_SYMBOL(capable);
+
+bool capable_noaudit(int cap)
+{
+	return ns_capable_noaudit(&init_user_ns, cap);
+}
+EXPORT_SYMBOL(capable_noaudit);
 #endif /* CONFIG_MULTIUSER */
 
 /**

From 2e7c1ab544e2181a511a6f3b4a11cc6bfe636b51 Mon Sep 17 00:00:00 2001
From: Levente Polyak <levente@leventepolyak.net>
Date: Sun, 6 Sep 2020 20:28:32 +0200
Subject: [PATCH 083/109] sysctl: expose proc_dointvec_minmax_sysadmin as API
 function

Orthogonal to the other sysctl proc functions expose the variant that is
checking CAP_SYS_ADMIN on write for consumption in external subsystem's
sysctl tables.

Signed-off-by: Levente Polyak <levente@leventepolyak.net>
[nicolas.bouchinet@ssi.gouv.fr: Constify the ctl_table argument as in commit 78eb4ea25cd5fd]
Signed-off-by: Nicolas Bouchinet <nicolas.bouchinet@ssi.gouv.fr>
---
 include/linux/sysctl.h |  2 ++
 kernel/printk/sysctl.c |  9 ---------
 kernel/sysctl.c        | 36 ++++++++++++++++++++++++++++++++++++
 3 files changed, 38 insertions(+), 9 deletions(-)

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 2886fbceb5d6..3e0b05485321 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -84,6 +84,8 @@ int proc_dobool(const struct ctl_table *table, int write, void *buffer,
 int proc_dointvec(const struct ctl_table *, int, void *, size_t *, loff_t *);
 int proc_dointvec_minmax(const struct ctl_table *table, int dir, void *buffer,
 			 size_t *lenp, loff_t *ppos);
+int proc_dointvec_minmax_sysadmin(const struct ctl_table *table, int dir,
+				  void *buffer, size_t *lenp, loff_t *ppos);
 int proc_dointvec_conv(const struct ctl_table *table, int dir, void *buffer,
 		       size_t *lenp, loff_t *ppos,
 		       int (*conv)(bool *negp, unsigned long *u_ptr, int *k_ptr,
diff --git a/kernel/printk/sysctl.c b/kernel/printk/sysctl.c
index f15732e93c2e..1b8a2a652b19 100644
--- a/kernel/printk/sysctl.c
+++ b/kernel/printk/sysctl.c
@@ -10,15 +10,6 @@
 
 static const int ten_thousand = 10000;
 
-static int proc_dointvec_minmax_sysadmin(const struct ctl_table *table, int write,
-				void *buffer, size_t *lenp, loff_t *ppos)
-{
-	if (write && !capable(CAP_SYS_ADMIN))
-		return -EPERM;
-
-	return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
-}
-
 static const struct ctl_table printk_sysctls[] = {
 	{
 		.procname	= "printk",
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 57613459d7d0..6a9bc5747a76 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -867,6 +867,35 @@ int proc_douintvec(const struct ctl_table *table, int dir, void *buffer,
 				 do_proc_uint_conv);
 }
 
+/**
+ * proc_dointvec_minmax_sysadmin - read a vector of integers with min/max values
+ * checking CAP_SYS_ADMIN on write
+ * @table: the sysctl table
+ * @dir: %TRUE if this is a write to the sysctl file
+ * @buffer: the user buffer
+ * @lenp: the size of the user buffer
+ * @ppos: file position
+ *
+ * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
+ * values from/to the user buffer, treated as an ASCII string.
+ *
+ * This routine will ensure the values are within the range specified by
+ * table->extra1 (min) and table->extra2 (max).
+ *
+ * Writing is only allowed when the current task has CAP_SYS_ADMIN.
+ *
+ * Returns 0 on success, -EPERM on permission failure or -EINVAL on write
+ * when the range check fails.
+ */
+int proc_dointvec_minmax_sysadmin(const struct ctl_table *table, int dir,
+				  void *buffer, size_t *lenp, loff_t *ppos)
+{
+	if (dir && !capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	return proc_dointvec_minmax(table, dir, buffer, lenp, ppos);
+}
+
 /**
  * proc_dointvec_minmax - read a vector of integers with min/max values
  * @table: the sysctl table
@@ -1321,6 +1350,12 @@ int proc_doulongvec_minmax(const struct ctl_table *table, int dir,
 	return -ENOSYS;
 }
 
+int proc_dointvec_minmax_sysadmin(const struct ctl_table *table, int dir,
+				  void *buffer, size_t *lenp, loff_t *ppos)
+{
+	return -ENOSYS;
+}
+
 int proc_doulongvec_minmax_conv(const struct ctl_table *table, int dir,
 				void *buffer, size_t *lenp, loff_t *ppos,
 				unsigned long convmul, unsigned long convdiv)
@@ -1448,6 +1483,7 @@ EXPORT_SYMBOL(proc_dointvec);
 EXPORT_SYMBOL(proc_douintvec);
 EXPORT_SYMBOL(proc_dointvec_minmax);
 EXPORT_SYMBOL_GPL(proc_douintvec_minmax);
+EXPORT_SYMBOL(proc_dointvec_minmax_sysadmin);
 EXPORT_SYMBOL(proc_dostring);
 EXPORT_SYMBOL(proc_doulongvec_minmax);
 EXPORT_SYMBOL(proc_do_large_bitmap);

From 34c2c2f8f222d32c014b37f75f8ead62cc381769 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Tue, 16 May 2017 17:51:48 -0400
Subject: [PATCH 084/109] usb: add toggle for disabling newly added USB devices

Based on the public grsecurity patches.

[thibaut.sautereau@ssi.gouv.fr: Adapt to sysctl code refactoring]
Signed-off-by: Thibaut Sautereau <thibaut.sautereau@ssi.gouv.fr>
Signed-off-by: Levente Polyak <levente@leventepolyak.net>
[thibaut.sautereau@ssi.gouv.fr: Adapt to sysctl code refactoring]
Signed-off-by: Nicolas Bouchinet <nicolas.bouchinet@oss.cyber.gouv.fr>
---
 drivers/usb/core/hub.c |  9 +++++++++
 include/linux/usb.h    |  3 +++
 kernel/sysctl.c        | 14 ++++++++++++++
 3 files changed, 26 insertions(+)

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 24960ba9caa9..d032245ac3b7 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -5387,6 +5387,9 @@ static int descriptors_changed(struct usb_device *udev,
 	return changed;
 }
 
+/* sysctl */
+int deny_new_usb __read_mostly = 0;
+
 static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus,
 		u16 portchange)
 {
@@ -5448,6 +5451,12 @@ static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus,
 			goto done;
 		return;
 	}
+
+	if (deny_new_usb) {
+		dev_err(&port_dev->dev, "denied insert of USB device on port %d\n", port1);
+		goto done;
+	}
+
 	if (hub_is_superspeed(hub->hdev))
 		unit_load = 150;
 	else
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 60bd4a8e919a..6504184e4c15 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -2110,6 +2110,9 @@ extern void usb_led_activity(enum usb_led_event ev);
 static inline void usb_led_activity(enum usb_led_event ev) {}
 #endif
 
+/* sysctl */
+extern int deny_new_usb;
+
 #endif  /* __KERNEL__ */
 
 #endif
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 6a9bc5747a76..c0cd4cbb33f2 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -25,6 +25,9 @@
 #ifdef CONFIG_USER_NS
 #include <linux/user_namespace.h>
 #endif
+#if IS_ENABLED(CONFIG_USB)
+#include <linux/usb.h>
+#endif
 
 /* shared constants to be used in various sysctls */
 const int sysctl_vals[] = { 0, 1, 2, 3, 4, 100, 200, 1000, 3000, INT_MAX, 65535, -1 };
@@ -1432,6 +1435,17 @@ static const struct ctl_table sysctl_subsys_table[] = {
 		.extra1		= SYSCTL_NEG_ONE,
 		.extra2		= SYSCTL_ONE,
 	},
+#endif
+#if IS_ENABLED(CONFIG_USB)
+	{
+		.procname	= "deny_new_usb",
+		.data		= &deny_new_usb,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax_sysadmin,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
 #endif
 	{
 		.procname	= "ngroups_max",

From 15314a7f627640bf88a2a21892e2892009063f8e Mon Sep 17 00:00:00 2001
From: Levente Polyak <levente@leventepolyak.net>
Date: Sun, 6 Sep 2020 21:08:16 +0200
Subject: [PATCH 085/109] usb: implement dedicated subsystem sysctl tables

This moves the usb related sysctl knobs to an own usb local sysctl table
in order to clean up the global sysctl as well as allow the knob to be
exported and referenced appropriately when building the usb components
as dedicated modules.

Signed-off-by: Levente Polyak <levente@leventepolyak.net>
---
 drivers/usb/core/Makefile |  1 +
 drivers/usb/core/hub.c    |  3 ---
 drivers/usb/core/sysctl.c | 35 +++++++++++++++++++++++++++++++++++
 drivers/usb/core/usb.c    |  9 +++++++++
 include/linux/usb.h       | 10 +++++++++-
 kernel/sysctl.c           | 14 --------------
 6 files changed, 54 insertions(+), 18 deletions(-)
 create mode 100644 drivers/usb/core/sysctl.c

diff --git a/drivers/usb/core/Makefile b/drivers/usb/core/Makefile
index 60ea76160122..cb5ed42e12c2 100644
--- a/drivers/usb/core/Makefile
+++ b/drivers/usb/core/Makefile
@@ -15,6 +15,7 @@ usbcore-$(CONFIG_OF)		+= of.o
 usbcore-$(CONFIG_USB_XHCI_SIDEBAND)	+= offload.o
 usbcore-$(CONFIG_USB_PCI)		+= hcd-pci.o
 usbcore-$(CONFIG_ACPI)		+= usb-acpi.o
+usbcore-$(CONFIG_SYSCTL)		+= sysctl.o
 
 ifdef CONFIG_USB_ONBOARD_DEV
 usbcore-y			+= ../misc/onboard_usb_dev_pdevs.o
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index d032245ac3b7..457d28bc990f 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -5387,9 +5387,6 @@ static int descriptors_changed(struct usb_device *udev,
 	return changed;
 }
 
-/* sysctl */
-int deny_new_usb __read_mostly = 0;
-
 static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus,
 		u16 portchange)
 {
diff --git a/drivers/usb/core/sysctl.c b/drivers/usb/core/sysctl.c
new file mode 100644
index 000000000000..813db3f0b1cb
--- /dev/null
+++ b/drivers/usb/core/sysctl.c
@@ -0,0 +1,35 @@
+#include <linux/errno.h>
+#include <linux/printk.h>
+#include <linux/init.h>
+#include <linux/sysctl.h>
+#include <linux/usb.h>
+
+static struct ctl_table usb_sysctls[] = {
+	{
+		.procname	= "deny_new_usb",
+		.data		= &deny_new_usb,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax_sysadmin,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+};
+
+static struct ctl_table_header *usb_sysctl_table;
+
+int usb_register_sysctl(void)
+{
+	usb_sysctl_table = register_sysctl("kernel", usb_sysctls);
+	if (!usb_sysctl_table) {
+		pr_warn("usb: sysctl registration failed\n");
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+void usb_unregister_sysctl(void)
+{
+	unregister_sysctl_table(usb_sysctl_table);
+	usb_sysctl_table = NULL;
+}
diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index df166cafe106..e32de22b9aad 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -73,6 +73,9 @@ MODULE_PARM_DESC(autosuspend, "default autosuspend delay");
 #define usb_autosuspend_delay		0
 #endif
 
+int deny_new_usb __read_mostly = 0;
+EXPORT_SYMBOL(deny_new_usb);
+
 static bool match_endpoint(struct usb_endpoint_descriptor *epd,
 		struct usb_endpoint_descriptor **bulk_in,
 		struct usb_endpoint_descriptor **bulk_out,
@@ -1220,6 +1223,9 @@ static int __init usb_init(void)
 	usb_debugfs_init();
 
 	usb_acpi_register();
+	retval = usb_register_sysctl();
+	if (retval)
+		goto sysctl_init_failed;
 	retval = bus_register(&usb_bus_type);
 	if (retval)
 		goto bus_register_failed;
@@ -1259,6 +1265,8 @@ static int __init usb_init(void)
 bus_notifier_failed:
 	bus_unregister(&usb_bus_type);
 bus_register_failed:
+	usb_unregister_sysctl();
+sysctl_init_failed:
 	usb_acpi_unregister();
 	usb_debugfs_cleanup();
 out:
@@ -1283,6 +1291,7 @@ static void __exit usb_exit(void)
 	class_unregister(&usbmisc_class);
 	bus_unregister_notifier(&usb_bus_type, &usb_bus_nb);
 	bus_unregister(&usb_bus_type);
+	usb_unregister_sysctl();
 	usb_acpi_unregister();
 	usb_debugfs_cleanup();
 	idr_destroy(&usb_bus_idr);
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 6504184e4c15..9ef7409ee97c 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -2110,8 +2110,16 @@ extern void usb_led_activity(enum usb_led_event ev);
 static inline void usb_led_activity(enum usb_led_event ev) {}
 #endif
 
-/* sysctl */
+/* sysctl.c */
 extern int deny_new_usb;
+#ifdef CONFIG_SYSCTL
+extern int usb_register_sysctl(void);
+extern void usb_unregister_sysctl(void);
+#else
+static inline int usb_register_sysctl(void) { return 0; }
+static inline void usb_unregister_sysctl(void) { }
+#endif /* CONFIG_SYSCTL */
+
 
 #endif  /* __KERNEL__ */
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index c0cd4cbb33f2..6a9bc5747a76 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -25,9 +25,6 @@
 #ifdef CONFIG_USER_NS
 #include <linux/user_namespace.h>
 #endif
-#if IS_ENABLED(CONFIG_USB)
-#include <linux/usb.h>
-#endif
 
 /* shared constants to be used in various sysctls */
 const int sysctl_vals[] = { 0, 1, 2, 3, 4, 100, 200, 1000, 3000, INT_MAX, 65535, -1 };
@@ -1435,17 +1432,6 @@ static const struct ctl_table sysctl_subsys_table[] = {
 		.extra1		= SYSCTL_NEG_ONE,
 		.extra2		= SYSCTL_ONE,
 	},
-#endif
-#if IS_ENABLED(CONFIG_USB)
-	{
-		.procname	= "deny_new_usb",
-		.data		= &deny_new_usb,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax_sysadmin,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
 #endif
 	{
 		.procname	= "ngroups_max",

From 64c81e4440548b98ee1c6b58565d3736438dd8a3 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Sun, 25 Feb 2018 03:26:45 -0500
Subject: [PATCH 086/109] hard-wire legacy checkreqprot option to 0

The userspace API is left intact for compatibility.

Signed-off-by: Levente Polyak <levente@leventepolyak.net>
---
 Documentation/admin-guide/kernel-parameters.txt | 11 -----------
 security/selinux/hooks.c                        | 12 ------------
 2 files changed, 23 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 4cebb97fe82f..897c093061cc 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -778,17 +778,6 @@ Kernel parameters
 			Format: { "0" | "1" }
 			Default: 0 (1 if CONFIG_DEBUG_VM is set)
 
-	checkreqprot=	[SELINUX] Set initial checkreqprot flag value.
-			Format: { "0" | "1" }
-			See security/selinux/Kconfig help text.
-			0 -- check protection applied by kernel (includes
-				any implied execute protection).
-			1 -- check protection requested by application.
-			Default value is set via a kernel config option.
-			Value can be changed at runtime via
-				/sys/fs/selinux/checkreqprot.
-			Setting checkreqprot to 1 is deprecated.
-
 	cio_ignore=	[S390]
 			See Documentation/arch/s390/common_io.rst for details.
 
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 6c154a4d94b9..8b1f3501f289 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -141,18 +141,6 @@ static int __init selinux_enabled_setup(char *str)
 __setup("selinux=", selinux_enabled_setup);
 #endif
 
-static int __init checkreqprot_setup(char *str)
-{
-	unsigned long checkreqprot;
-
-	if (!kstrtoul(str, 0, &checkreqprot)) {
-		if (checkreqprot)
-			pr_err("SELinux: checkreqprot set to 1 via kernel parameter.  This is no longer supported.\n");
-	}
-	return 1;
-}
-__setup("checkreqprot=", checkreqprot_setup);
-
 /**
  * selinux_secmark_enabled - Check to see if SECMARK is currently enabled
  *

From 80f8704beae3b9e99f62f4b98c93d43d08456a2e Mon Sep 17 00:00:00 2001
From: Matt Brown <matt@nmatt.com>
Date: Mon, 29 May 2017 17:37:59 -0400
Subject: [PATCH 087/109] security: tty: Add owner user namespace to tty_struct

This patch adds struct user_namespace *owner_user_ns to the tty_struct.
Then it is set to current_user_ns() in the alloc_tty_struct function.

This is done to facilitate capability checks against the original user
namespace that allocated the tty.

E.g. ns_capable(tty->owner_user_ns,CAP_SYS_ADMIN)

This combined with the use of user namespace's will allow hardening
protections to be built to mitigate container escapes that utilize TTY
ioctls such as TIOCSTI.

See: https://bugzilla.redhat.com/show_bug.cgi?id=1411256

Acked-by: Serge Hallyn <serge@hallyn.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Matt Brown <matt@nmatt.com>
---
 drivers/tty/tty_io.c | 2 ++
 include/linux/tty.h  | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index a5d0457e0e28..5659bb2ad472 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -171,6 +171,7 @@ static void free_tty_struct(struct tty_struct *tty)
 	tty_ldisc_deinit(tty);
 	put_device(tty->dev);
 	kvfree(tty->write_buf);
+	put_user_ns(tty->owner_user_ns);
 	kfree(tty);
 }
 
@@ -3130,6 +3131,7 @@ struct tty_struct *alloc_tty_struct(struct tty_driver *driver, int idx)
 	tty->index = idx;
 	tty_line_name(driver, idx, tty->name);
 	tty->dev = tty_get_device(tty);
+	tty->owner_user_ns = get_user_ns(current_user_ns());
 
 	return tty;
 }
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 0a46e4054dec..99c733852fb2 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -14,6 +14,7 @@
 #include <uapi/linux/tty.h>
 #include <linux/rwsem.h>
 #include <linux/llist.h>
+#include <linux/user_namespace.h>
 
 
 /*
@@ -240,6 +241,7 @@ struct tty_struct {
 	struct list_head tty_files;
 
 	struct work_struct SAK_work;
+	struct user_namespace *owner_user_ns;
 } __randomize_layout;
 
 /* Each of a tty's open files has private_data pointing to tty_file_private */

From 63b5eb7b48f79951580bb52063d82bfefd627275 Mon Sep 17 00:00:00 2001
From: Matt Brown <matt@nmatt.com>
Date: Mon, 29 May 2017 17:38:00 -0400
Subject: [PATCH 088/109] security: tty: make TIOCSTI ioctl require
 CAP_SYS_ADMIN

This introduces the tiocsti_restrict sysctl, whose default is controlled
via CONFIG_SECURITY_TIOCSTI_RESTRICT. When activated, this control
restricts all TIOCSTI ioctl calls from non CAP_SYS_ADMIN users.

This patch depends on patch 1/2

This patch was inspired from GRKERNSEC_HARDEN_TTY.

This patch would have prevented
https://bugzilla.redhat.com/show_bug.cgi?id=1411256 under the following
conditions:
* non-privileged container
* container run inside new user namespace

Possible effects on userland:

There could be a few user programs that would be effected by this
change.
See: <https://codesearch.debian.net/search?q=ioctl%5C%28.*TIOCSTI>
notable programs are: agetty, csh, xemacs and tcsh

However, I still believe that this change is worth it given that the
Kconfig defaults to n. This will be a feature that is turned on for the
same reason that people activate it when using grsecurity. Users of this
opt-in feature will realize that they are choosing security over some OS
features like unprivileged TIOCSTI ioctls, as should be clear in the
Kconfig help message.

Threat Model/Patch Rational:

>From grsecurity's config for GRKERNSEC_HARDEN_TTY.

 | There are very few legitimate uses for this functionality and it
 | has made vulnerabilities in several 'su'-like programs possible in
 | the past.  Even without these vulnerabilities, it provides an
 | attacker with an easy mechanism to move laterally among other
 | processes within the same user's compromised session.

So if one process within a tty session becomes compromised it can follow
that additional processes, that are thought to be in different security
boundaries, can be compromised as a result. When using a program like su
or sudo, these additional processes could be in a tty session where TTY
file descriptors are indeed shared over privilege boundaries.

This is also an excellent writeup about the issue:
<http://www.halfdog.net/Security/2012/TtyPushbackPrivilegeEscalation/>

When user namespaces are in use, the check for the capability
CAP_SYS_ADMIN is done against the user namespace that originally opened
the tty.

Acked-by: Serge Hallyn <serge@hallyn.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Matt Brown <matt@nmatt.com>
Signed-off-by: Thibaut Sautereau <thibaut.sautereau@ssi.gouv.fr>
Signed-off-by: Levente Polyak <levente@leventepolyak.net>
---
 Documentation/admin-guide/sysctl/kernel.rst | 20 ++++++++++++++++++++
 drivers/tty/tty_io.c                        | 16 ++++++++++++++++
 security/Kconfig                            | 13 +++++++++++++
 3 files changed, 49 insertions(+)

diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index 0a0384d2376e..2b001d43b137 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -1598,6 +1598,26 @@ allow them to remain in low power states longer.
 
 Default is set (1).
 
+tiocsti_restrict
+================
+
+This toggle indicates whether unprivileged users are prevented from using the
+``TIOCSTI`` ioctl to inject commands into other processes which share a tty
+session.
+
+= ============================================================================
+0 No restriction, except the default one of only being able to inject commands
+  into one's own tty.
+1 Users must have ``CAP_SYS_ADMIN`` to use the ``TIOCSTI`` ioctl.
+= ============================================================================
+
+When user namespaces are in use, the check for ``CAP_SYS_ADMIN`` is done
+against the user namespace that originally opened the tty.
+
+The kernel config option ``CONFIG_SECURITY_TIOCSTI_RESTRICT`` sets the default
+value of ``tiocsti_restrict``.
+
+
 traceoff_on_warning
 ===================
 
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index 5659bb2ad472..dc32c90a4424 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -2257,6 +2257,7 @@ static int tty_fasync(int fd, struct file *filp, int on)
 }
 
 static bool tty_legacy_tiocsti __read_mostly = IS_ENABLED(CONFIG_LEGACY_TIOCSTI);
+static int tty_tiocsti_restrict __read_mostly = IS_ENABLED(CONFIG_SECURITY_TIOCSTI_RESTRICT);
 /**
  * tiocsti - fake input character
  * @tty: tty to fake input into
@@ -2278,6 +2279,12 @@ static int tiocsti(struct tty_struct *tty, u8 __user *p)
 	if (!tty_legacy_tiocsti && !capable(CAP_SYS_ADMIN))
 		return -EIO;
 
+	if (tty_tiocsti_restrict &&
+		!ns_capable(tty->owner_user_ns, CAP_SYS_ADMIN)) {
+		dev_warn_ratelimited(tty->dev,
+			"Denied TIOCSTI ioctl for non-privileged process\n");
+		return -EPERM;
+	}
 	if ((current->signal->tty != tty) && !capable(CAP_SYS_ADMIN))
 		return -EPERM;
 	if (get_user(ch, p))
@@ -3619,6 +3626,15 @@ static const struct ctl_table tty_table[] = {
 		.extra1		= SYSCTL_ZERO,
 		.extra2		= SYSCTL_ONE,
 	},
+	{
+		.procname	= "tiocsti_restrict",
+		.data		= &tty_tiocsti_restrict,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax_sysadmin,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
 };
 
 /*
diff --git a/security/Kconfig b/security/Kconfig
index 06c66ff55ba6..72e488f36469 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -82,6 +82,19 @@ config SECURITY_PERF_EVENTS_RESTRICT
 	  perf_event_open syscall will be permitted unless it is
 	  changed.
 
+config SECURITY_TIOCSTI_RESTRICT
+	bool "Restrict unprivileged use of tiocsti command injection"
+	default n
+	help
+	  This enforces restrictions on unprivileged users injecting commands
+	  into other processes which share a tty session using the TIOCSTI
+	  ioctl. This option makes TIOCSTI use require CAP_SYS_ADMIN.
+
+	  If this option is not selected, no restrictions will be enforced
+	  unless the tiocsti_restrict sysctl is explicitly set to (1).
+
+	  If you are unsure how to answer this question, answer N.
+
 config SECURITY
 	bool "Enable different security models"
 	depends on SYSFS

From 61c856c0a8cbccee3bae74b41d83b868ec3230e6 Mon Sep 17 00:00:00 2001
From: Daniel Micay <danielmicay@gmail.com>
Date: Wed, 3 May 2017 23:36:14 -0400
Subject: [PATCH 089/109] enable SECURITY_TIOCSTI_RESTRICT by default

Signed-off-by: Daniel Micay <danielmicay@gmail.com>
---
 security/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/Kconfig b/security/Kconfig
index 72e488f36469..4a9e016fa16d 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -84,7 +84,7 @@ config SECURITY_PERF_EVENTS_RESTRICT
 
 config SECURITY_TIOCSTI_RESTRICT
 	bool "Restrict unprivileged use of tiocsti command injection"
-	default n
+	default y
 	help
 	  This enforces restrictions on unprivileged users injecting commands
 	  into other processes which share a tty session using the TIOCSTI

From bfc8a686589e98757125ac120155fec8ad0235a5 Mon Sep 17 00:00:00 2001
From: Levente Polyak <levente@leventepolyak.net>
Date: Mon, 7 May 2018 20:37:55 +0200
Subject: [PATCH 090/109] enable BPF JIT hardening by default (if available)

---
 kernel/bpf/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 048d275accae..0a614753c68c 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -547,7 +547,7 @@ void bpf_prog_kallsyms_del_all(struct bpf_prog *fp)
 /* All BPF JIT sysctl knobs here. */
 int bpf_jit_enable   __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_DEFAULT_ON);
 int bpf_jit_kallsyms __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_DEFAULT_ON);
-int bpf_jit_harden   __read_mostly;
+int bpf_jit_harden   __read_mostly = 2;
 long bpf_jit_limit   __read_mostly;
 long bpf_jit_limit_max __read_mostly;
 

From bd09444017274bdc7b479d9b40f75fb9afec9082 Mon Sep 17 00:00:00 2001
From: Levente Polyak <levente@leventepolyak.net>
Date: Sun, 4 Nov 2018 18:48:53 +0100
Subject: [PATCH 091/109] enable protected_{fifos,regular} by default

---
 fs/namei.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/namei.c b/fs/namei.c
index e3429055d39b..1c7cc151c721 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1198,8 +1198,8 @@ static inline void put_link(struct nameidata *nd)
 
 static int sysctl_protected_symlinks __read_mostly = 1;
 static int sysctl_protected_hardlinks __read_mostly = 1;
-static int sysctl_protected_fifos __read_mostly;
-static int sysctl_protected_regular __read_mostly;
+static int sysctl_protected_fifos __read_mostly = 2;
+static int sysctl_protected_regular __read_mostly = 2;
 
 #ifdef CONFIG_SYSCTL
 static const struct ctl_table namei_sysctls[] = {

From 9163adab33d57004b83c27e161bbbfa2349209c1 Mon Sep 17 00:00:00 2001
From: Thibaut Sautereau <thibaut.sautereau@ssi.gouv.fr>
Date: Mon, 6 May 2019 17:07:11 +0200
Subject: [PATCH 092/109] modpost: Add
 CONFIG_DEBUG_WRITABLE_FUNCTION_POINTERS_VERBOSE

With 46c7dd56d541 ("modpost: always show verbose warning for section
mismatch"), sec_mismatch_verbose was removed which would have printed
errors for all writable function pointers during compilation if it
hadn't been "#if 0"ed out for quite some time now.

Let's introduce a new DEBUG_WRITABLE_FUNCTION_POINTERS_VERBOSE Kconfig
option to cleanly control this linux-hardened functionality.

Signed-off-by: Thibaut Sautereau <thibaut.sautereau@ssi.gouv.fr>
Signed-off-by: Levente Polyak <levente@leventepolyak.net>
---
 lib/Kconfig.debug        |  3 +++
 scripts/Makefile.modpost |  1 +
 scripts/mod/modpost.c    | 21 +++++++++++++++------
 3 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 8d90402b0444..f58bc6893512 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -526,6 +526,9 @@ config SECTION_MISMATCH_WARN_ONLY
 
 	  If unsure, say Y.
 
+config DEBUG_WRITABLE_FUNCTION_POINTERS_VERBOSE
+	bool "Enable verbose reporting of writable function pointers"
+
 config DEBUG_FORCE_FUNCTION_ALIGN_64B
 	bool "Force all function address 64B aligned"
 	depends on EXPERT && (X86_64 || ARM64 || PPC32 || PPC64 || ARC || RISCV || S390)
diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost
index d7d45067d08b..b501130c534c 100644
--- a/scripts/Makefile.modpost
+++ b/scripts/Makefile.modpost
@@ -47,6 +47,7 @@ modpost-args =										\
 	$(if $(CONFIG_EXTENDED_MODVERSIONS),-x)						\
 	$(if $(CONFIG_MODULE_SRCVERSION_ALL),-a)					\
 	$(if $(CONFIG_SECTION_MISMATCH_WARN_ONLY),,-E)					\
+	$(if $(CONFIG_DEBUG_WRITABLE_FUNCTION_POINTERS_VERBOSE),-f)			\
 	$(if $(KBUILD_MODPOST_WARN),-w)							\
 	$(if $(KBUILD_NSDEPS),-d modules.nsdeps)					\
 	$(if $(CONFIG_MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS)$(KBUILD_NSDEPS),-N)	\
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 89fe6f630793..b9b1f2e607b4 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -50,6 +50,7 @@ static bool sec_mismatch_warn_only = true;
 static bool trim_unused_exports;
 
 static int writable_fptr_count = 0;
+static int writable_fptr_verbose = false;
 /* ignore missing files */
 static bool ignore_missing_files;
 /* If set to 1, only warn (instead of error) about missing ns imports */
@@ -1043,10 +1044,13 @@ static void default_mismatch_handler(const char *modname, struct elf_info *elf,
 	if (!secref_whitelist(fromsec, fromsym, tosec, tosym))
 		return;
 
-	if (mismatch->mismatch == DATA_TO_TEXT)
+	if (mismatch->mismatch == DATA_TO_TEXT) {
 		writable_fptr_count++;
-	else
+		if (!writable_fptr_verbose)
+			return;
+	} else {
 		sec_mismatch_count++;
+	}
 
 	if (!tosym[0])
 		snprintf(taddr_str, sizeof(taddr_str), "0x%x", (unsigned int)taddr);
@@ -2293,7 +2297,7 @@ int main(int argc, char **argv)
 	LIST_HEAD(dump_lists);
 	struct dump_list *dl, *dl2;
 
-	while ((opt = getopt(argc, argv, "ei:MmnT:to:au:WwENd:xb")) != -1) {
+	while ((opt = getopt(argc, argv, "ei:fMmnT:to:au:WwENd:xb")) != -1) {
 		switch (opt) {
 		case 'e':
 			external_module = true;
@@ -2303,6 +2307,9 @@ int main(int argc, char **argv)
 			dl->file = optarg;
 			list_add_tail(&dl->list, &dump_lists);
 			break;
+		case 'f':
+			writable_fptr_verbose = true;
+			break;
 		case 'M':
 			module_enabled = true;
 			break;
@@ -2403,9 +2410,11 @@ int main(int argc, char **argv)
 		warn("suppressed %u unresolved symbol warnings because there were too many)\n",
 		     nr_unresolved - MAX_UNRESOLVED_REPORTS);
 
-	if (writable_fptr_count)
-		warn("modpost: Found %d writable function pointer(s).\n",
-				writable_fptr_count);
+	if (writable_fptr_count && !writable_fptr_verbose)
+		warn("modpost: Found %d writable function pointer%s.\n"
+		     "To see full details build your kernel with:\n"
+		     "'make CONFIG_DEBUG_WRITABLE_FUNCTION_POINTERS_VERBOSE=y'\n",
+		     writable_fptr_count, (writable_fptr_count == 1 ? "" : "s"));
 
 	return error_occurred ? 1 : 0;
 }

From 2bb5226171597b48a1fd2b15a1c052f0311199ea Mon Sep 17 00:00:00 2001
From: Thibaut Sautereau <thibaut.sautereau@ssi.gouv.fr>
Date: Tue, 7 May 2019 11:46:21 +0200
Subject: [PATCH 093/109] mm: Fix extra_latent_entropy

Commit a9cd410a3d29 ("mm/page_alloc.c: memory hotplug: free pages as
higher order") changed `static void __init __free_pages_boot_core()`
into `void __free_pages_core()`, causing the following section mismatch
warning at compile time:

    WARNING: vmlinux.o(.text+0x180fe4): Section mismatch in reference from the function __free_pages_core() to the variable .meminit.data:extra_latent_entropy
    The function __free_pages_core() references the variable __meminitdata extra_latent_entropy.
    This is often because __free_pages_core lacks a __meminitdata annotation or the annotation of extra_latent_entropy is wrong.

This commit is an attempt at fixing this issue. I'm not sure it's OK as
we are accessing pages that are still managed by the memblock allocator.
The prefetching part is not an issue as it only affects struct pages.

Signed-off-by: Thibaut Sautereau <thibaut.sautereau@ssi.gouv.fr>
[levente@leventepolyak.net: most of core MM initialization moved to mm/mm_init.c]
Signed-off-by: Levente Polyak <levente@leventepolyak.net>
[nicolas.bouchinet@ssi.gouv.fr: MAX_ORDER has been renamed to MAX_PAGE_ORDER (see 5e0a760b44417f7ca)]
Signed-off-by: Nicolas Bouchinet <nicolas.bouchinet@ssi.gouv.fr>
---
 mm/internal.h   |  3 +++
 mm/mm_init.c    |  3 +++
 mm/page_alloc.c | 34 +++++++++++++++++++---------------
 3 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/mm/internal.h b/mm/internal.h
index e1e64b875885..104e88b73525 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -856,6 +856,9 @@ static inline struct folio *page_rmappable_folio(struct page *page)
 	return folio;
 }
 
+extern void __init __gather_extra_latent_entropy(struct page *page,
+						 unsigned int nr_pages);
+
 static inline void prep_compound_head(struct page *page, unsigned int order)
 {
 	struct folio *folio = (struct folio *)page;
diff --git a/mm/mm_init.c b/mm/mm_init.c
index df34797691bd..30f3ee68e3fd 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -1999,6 +1999,7 @@ static void __init deferred_free_pages(unsigned long pfn,
 		for (i = 0; i < nr_pages; i += pageblock_nr_pages)
 			init_pageblock_migratetype(page + i, MIGRATE_MOVABLE,
 					false);
+		__gather_extra_latent_entropy(page, 1 << MAX_PAGE_ORDER);
 		__free_pages_core(page, MAX_PAGE_ORDER, MEMINIT_EARLY);
 		return;
 	}
@@ -2010,6 +2011,7 @@ static void __init deferred_free_pages(unsigned long pfn,
 		if (pageblock_aligned(pfn))
 			init_pageblock_migratetype(page, MIGRATE_MOVABLE,
 					false);
+		__gather_extra_latent_entropy(page, 1);
 		__free_pages_core(page, 0, MEMINIT_EARLY);
 	}
 }
@@ -2496,6 +2498,7 @@ void __init memblock_free_pages(unsigned long pfn, unsigned int order)
 
 	/* pages were reserved and not allocated */
 	clear_page_tag_ref(page);
+	__gather_extra_latent_entropy(page, 1 << order);
 	__free_pages_core(page, order, MEMINIT_EARLY);
 }
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 99579c0673ea..8a5d0d3b96bb 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1632,6 +1632,25 @@ static void __free_pages_ok(struct page *page, unsigned int order,
 		free_one_page(zone, page, pfn, order, fpi_flags);
 }
 
+void __init __gather_extra_latent_entropy(struct page *page,
+					  unsigned int nr_pages)
+{
+	if (extra_latent_entropy && !PageHighMem(page) && page_to_pfn(page) < 0x100000) {
+		unsigned long hash = 0;
+		size_t index, end = PAGE_SIZE * nr_pages / sizeof hash;
+		const unsigned long *data = lowmem_page_address(page);
+
+		for (index = 0; index < end; index++)
+			hash ^= hash + data[index];
+#ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY
+		latent_entropy ^= hash;
+		add_device_randomness((const void *)&latent_entropy, sizeof(latent_entropy));
+#else
+		add_device_randomness((const void *)&hash, sizeof(hash));
+#endif
+	}
+}
+
 void __meminit __free_pages_core(struct page *page, unsigned int order,
 		enum meminit_context context)
 {
@@ -1662,21 +1681,6 @@ void __meminit __free_pages_core(struct page *page, unsigned int order,
 			set_page_count(p, 0);
 		}
 
-		if (extra_latent_entropy && !PageHighMem(page) && page_to_pfn(page) < 0x100000) {
-			unsigned long hash = 0;
-			size_t index, end = PAGE_SIZE * nr_pages / sizeof hash;
-			const unsigned long *data = lowmem_page_address(page);
-
-			for (index = 0; index < end; index++)
-				hash ^= hash + data[index];
-#ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY
-			latent_entropy ^= hash;
-			add_device_randomness((const void *)&latent_entropy, sizeof(latent_entropy));
-#else
-			add_device_randomness((const void *)&hash, sizeof(hash));
-#endif
-		}
-
 		/* memblock adjusts totalram_pages() manually. */
 		atomic_long_add(nr_pages, &page_zone(page)->managed_pages);
 	}

From 8d04d5306b2b553317ee4b4b0c21ca648b7e836a Mon Sep 17 00:00:00 2001
From: Thibaut Sautereau <thibaut.sautereau@ssi.gouv.fr>
Date: Fri, 29 Nov 2019 16:27:14 +0100
Subject: [PATCH 094/109] slub: Extend init_on_alloc to slab caches with
 constructors

This has required some rework during the port to 5.13, due to
da844b787245 ("kasan, mm: integrate slab init_on_alloc with HW_TAGS"),
and the patch is actually quite simpler now since we do not need to
unpoison objects anymore.

Signed-off-by: Levente Polyak <levente@leventepolyak.net>
Signed-off-by: Thibaut Sautereau <thibaut.sautereau@ssi.gouv.fr>
[nicolas.bouchinet@ssi.gouv.fr: pre/post-alloc hooks moved from mm/slab.h to mm/slub.c (see 6011be59910fb12b7)]
Signed-off-by: Nicolas Bouchinet <nicolas.bouchinet@ssi.gouv.fr>
---
 mm/slab.h | 2 ++
 mm/slub.c | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/mm/slab.h b/mm/slab.h
index 075165fb0cf8..0732b3596615 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -681,8 +681,10 @@ static inline bool slab_want_init_on_alloc(gfp_t flags, struct kmem_cache *c)
 {
 	if (static_branch_maybe(CONFIG_INIT_ON_ALLOC_DEFAULT_ON,
 				&init_on_alloc)) {
+#ifndef CONFIG_SLUB
 		if (c->ctor)
 			return false;
+#endif
 		if (c->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON))
 			return flags & __GFP_ZERO;
 		return true;
diff --git a/mm/slub.c b/mm/slub.c
index 69e51c9ac55d..a645103d235e 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -4625,6 +4625,8 @@ bool slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
 		if (p[i] && init && (!kasan_init ||
 				     !kasan_has_integrated_init()))
 			memset(p[i], 0, zero_size);
+		if (p[i] && init && s->ctor)
+			s->ctor(p[i]);
 		if (gfpflags_allow_spinning(flags))
 			kmemleak_alloc_recursive(p[i], s->object_size, 1,
 						 s->flags, init_flags);

From d9cc0fcc536360bb41ee67124474012969decca5 Mon Sep 17 00:00:00 2001
From: madaidan <50278627+madaidan@users.noreply.github.com>
Date: Sun, 9 Feb 2020 00:03:41 +0000
Subject: [PATCH 095/109] net: tcp: add option to disable TCP simultaneous
 connect

This is modified from Brad Spengler/PaX Team's code in the last public
patch of grsecurity/PaX based on my understanding of the code. Changes
or omissions from the original code are mine and don't reflect the
original grsecurity/PaX code.

TCP simultaneous connect adds a weakness in Linux's implementation of
TCP that allows two clients to connect to each other without either
entering a listening state. The weakness allows an attacker to easily
prevent a client from connecting to a known server provided the source
port for the connection is guessed correctly.

As the weakness could be used to prevent an antivirus or IPS from
fetching updates, or prevent an SSL gateway from fetching a CRL, it
should be eliminated.

This creates a net.ipv4.tcp_simult_connect sysctl that when disabled,
disables TCP simultaneous connect.

Reviewed-by: Thibaut Sautereau <thibaut.sautereau@ssi.gouv.fr>
Reviewed-by: Levente Polyak <levente@leventepolyak.net>
Signed-off-by: Levente Polyak <levente@leventepolyak.net>
---
 Documentation/networking/ip-sysctl.rst | 18 ++++++++++++++++++
 include/net/tcp.h                      |  1 +
 net/ipv4/Kconfig                       | 23 +++++++++++++++++++++++
 net/ipv4/sysctl_net_ipv4.c             |  9 +++++++++
 net/ipv4/tcp_input.c                   |  3 ++-
 5 files changed, 53 insertions(+), 1 deletion(-)

diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index 6921d8594b84..3dcd0f39cf84 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -909,6 +909,24 @@ tcp_backlog_ack_defer - BOOLEAN
 
 	Default: 1 (enabled)
 
+tcp_simult_connect - BOOLEAN
+	Enable TCP simultaneous connect that adds a weakness in Linux's strict
+	implementation of TCP that allows two clients to connect to each other
+	without either entering a listening state. The weakness allows an attacker
+	to easily prevent a client from connecting to a known server provided the
+	source port for the connection is guessed correctly.
+
+	As the weakness could be used to prevent an antivirus or IPS from fetching
+	updates, or prevent an SSL gateway from fetching a CRL, it should be
+	eliminated by disabling this option. Though Linux is one of few operating
+	systems supporting simultaneous connect, it has no legitimate use in
+	practice and is rarely supported by firewalls.
+
+	Disabling this may break TCP STUNT which is used by some applications for
+	NAT traversal.
+
+	Default: Value of CONFIG_TCP_SIMULT_CONNECT_DEFAULT_ON
+
 tcp_slow_start_after_idle - BOOLEAN
 	If enabled, provide RFC2861 behavior and time out the congestion
 	window after an idle period.  An idle period is defined at
diff --git a/include/net/tcp.h b/include/net/tcp.h
index ebc72dce4134..955c1b71c6b4 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -287,6 +287,7 @@ static_assert((1 << ATO_BITS) > TCP_DELACK_MAX);
 /* sysctl variables for tcp */
 extern int sysctl_tcp_max_orphans;
 extern long sysctl_tcp_mem[3];
+extern int sysctl_tcp_simult_connect;
 
 #define TCP_RACK_LOSS_DETECTION  0x1 /* Use RACK to detect losses */
 #define TCP_RACK_STATIC_REO_WND  0x2 /* Use static RACK reo wnd */
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 5ef3ea768d9f..845b67882e64 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -770,3 +770,26 @@ config TCP_MD5SIG
 	  on the Internet.
 
 	  If unsure, say N.
+
+config TCP_SIMULT_CONNECT_DEFAULT_ON
+	bool "Enable TCP simultaneous connect"
+	help
+	  Enable TCP simultaneous connect that adds a weakness in Linux's strict
+	  implementation of TCP that allows two clients to connect to each other
+	  without either entering a listening state. The weakness allows an
+	  attacker to easily prevent a client from connecting to a known server
+	  provided the source port for the connection is guessed correctly.
+
+	  As the weakness could be used to prevent an antivirus or IPS from
+	  fetching updates, or prevent an SSL gateway from fetching a CRL, it
+	  should be eliminated by disabling this option. Though Linux is one of
+	  few operating systems supporting simultaneous connect, it has no
+	  legitimate use in practice and is rarely supported by firewalls.
+
+	  Disabling this may break TCP STUNT which is used by some applications
+	  for NAT traversal.
+
+	  This setting can be overridden at runtime via the
+	  net.ipv4.tcp_simult_connect sysctl.
+
+	  If unsure, say N.
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 5654cc9c8a0b..687dc0058b43 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -622,6 +622,15 @@ static struct ctl_table ipv4_table[] = {
 		.extra1		= &sysctl_fib_sync_mem_min,
 		.extra2		= &sysctl_fib_sync_mem_max,
 	},
+	{
+		.procname	= "tcp_simult_connect",
+		.data		= &sysctl_tcp_simult_connect,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
 };
 
 static struct ctl_table ipv4_net_table[] = {
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index cb4bcc5a8578..2c9875ef79f3 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -85,6 +85,7 @@
 #include <net/mptcp.h>
 
 int sysctl_tcp_max_orphans __read_mostly = NR_FILE;
+int sysctl_tcp_simult_connect __read_mostly = IS_ENABLED(CONFIG_TCP_SIMULT_CONNECT_DEFAULT_ON);
 
 #define FLAG_DATA		0x01 /* Incoming frame contained data.		*/
 #define FLAG_WIN_UPDATE		0x02 /* Incoming ACK was a window update.	*/
@@ -7046,7 +7047,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 		SKB_DR_SET(reason, TCP_RFC7323_PAWS);
 		goto discard_and_undo;
 	}
-	if (th->syn) {
+	if (th->syn && sysctl_tcp_simult_connect) {
 		/* We see SYN without ACK. It is attempt of
 		 * simultaneous connect with crossed SYNs.
 		 * Particularly, it can be connect to self.

From bf6a3b741213847d454ef04a64f80600605c6093 Mon Sep 17 00:00:00 2001
From: Levente Polyak <levente@leventepolyak.net>
Date: Thu, 11 Mar 2021 23:09:50 +0100
Subject: [PATCH 096/109] ovl: add config to disable unprivileged user
 namespace mounts

When disabled, unprivileged users will not be able to create
new overlayfs mounts. This cuts the attack surface if no
unprivileged user namespace mounts are required like for
running rootless containers.

Signed-off-by: Levente Polyak <levente@leventepolyak.net>
---
 fs/overlayfs/Kconfig | 16 ++++++++++++++++
 fs/overlayfs/super.c |  2 ++
 2 files changed, 18 insertions(+)

diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig
index 2ac67e04a6fb..3340e13c959c 100644
--- a/fs/overlayfs/Kconfig
+++ b/fs/overlayfs/Kconfig
@@ -134,3 +134,19 @@ config OVERLAY_FS_DEBUG
 	  Say Y here to enable extra debugging checks in overlayfs.
 
 	  If unsure, say N.
+
+config OVERLAY_FS_UNPRIVILEGED
+	bool "Overlayfs: turn on unprivileged user namespace mounts"
+	default n
+	depends on OVERLAY_FS
+	help
+	  When disabled, unprivileged users will not be able to create
+	  new overlayfs mounts. This cuts the attack surface if no
+	  unprivileged user namespace mounts are required like for
+	  running rootless containers.
+
+	  Overlayfs has been part of several recent local privilege
+	  escalation exploits, so if you are security-conscious
+	  you want to disable this.
+
+	  If unsure, say N.
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 0822987cfb51..b2faf437dc36 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1577,7 +1577,9 @@ struct file_system_type ovl_fs_type = {
 	.name			= "overlay",
 	.init_fs_context	= ovl_init_fs_context,
 	.parameters		= ovl_parameter_spec,
+#ifdef CONFIG_OVERLAY_FS_UNPRIVILEGED
 	.fs_flags		= FS_USERNS_MOUNT,
+#endif
 	.kill_sb		= kill_anon_super,
 };
 MODULE_ALIAS_FS("overlay");

From b00764c50139938f338db2c359169b3faf743e75 Mon Sep 17 00:00:00 2001
From: Levente Polyak <levente@leventepolyak.net>
Date: Tue, 25 May 2021 21:04:47 +0200
Subject: [PATCH 097/109] mm, kfence: bug on data corruption after error report

Trigger BUG when kfence encounters data corruption of kfence managed
objects. This allows a finer-grained control instead of globally
enabling panic_on_warn.

Signed-off-by: Levente Polyak <levente@leventepolyak.net>
---
 lib/Kconfig.kfence | 9 +++++++++
 mm/kfence/report.c | 5 +++++
 2 files changed, 14 insertions(+)

diff --git a/lib/Kconfig.kfence b/lib/Kconfig.kfence
index 6fbbebec683a..e494618f7193 100644
--- a/lib/Kconfig.kfence
+++ b/lib/Kconfig.kfence
@@ -96,4 +96,13 @@ config KFENCE_KUNIT_TEST
 	  during boot; say M if you want the test to build as a module; say N
 	  if you are unsure.
 
+config KFENCE_BUG_ON_DATA_CORRUPTION
+	bool "Trigger a BUG when data corruption is detected"
+	default y
+	help
+	  Select this option if the kernel should BUG when kfence encounters
+	  data corruption of kfence managed objects after error report.
+
+	  If unsure, say Y.
+
 endif # KFENCE
diff --git a/mm/kfence/report.c b/mm/kfence/report.c
index 787e87c26926..4d5099c5dc10 100644
--- a/mm/kfence/report.c
+++ b/mm/kfence/report.c
@@ -8,6 +8,7 @@
 #include <linux/stdarg.h>
 
 #include <linux/kernel.h>
+#include <linux/bug.h>
 #include <linux/lockdep.h>
 #include <linux/math.h>
 #include <linux/printk.h>
@@ -278,6 +279,10 @@ void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *r
 
 	lockdep_on();
 
+#ifdef CONFIG_KFENCE_BUG_ON_DATA_CORRUPTION
+	BUG();
+#endif
+
 	check_panic_on_warn("KFENCE");
 
 	/* We encountered a memory safety error, taint the kernel! */

From c9872e25d862d249cfd2b14b71ec6708b73174cd Mon Sep 17 00:00:00 2001
From: Thibaut Sautereau <thibaut.sautereau@ssi.gouv.fr>
Date: Thu, 16 Dec 2021 10:55:13 +0100
Subject: [PATCH 098/109] slub: Bug on free of non-slab objects

Before commit d0fe47c64152 ("slub: add back check for free nonslab
objects"), freeing a non-slab object used to trigger a BUG if
CONFIG_DEBUG_VM was enabled. Now it only warns, which I think is not
enough for such a memory corruption. Let's restore the previous
behaviour, but tie it to CONFIG_BUG_ON_DATA_CORRUPTION as suggested by
Levente.

After page folios were introduced in v5.17, this patch was adapted to
trigger a bug when the order of the folio is zero instead of when the
page is not a compound page, which is not equivalent but respects the
semantics of the conversion to page folios and follows the change made
to the WARN_ON_ONCE beneath.

Suggested-by: Levente Polyak <levente@leventepolyak.net>
Signed-off-by: Thibaut Sautereau <thibaut.sautereau@ssi.gouv.fr>
[nicolas.bouchinet@ssi.gouv.fr: kfree moved from mm/slab_common.c to mm/slub.c (see b774d3e326d30fc8e)]
Signed-off-by: Nicolas Bouchinet <nicolas.bouchinet@ssi.gouv.fr>
---
 mm/slub.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/mm/slub.c b/mm/slub.c
index a645103d235e..a40b90e9b37b 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -6523,8 +6523,12 @@ static void free_large_kmalloc(struct page *page, void *object)
 		return;
 	}
 
+#ifdef CONFIG_BUG_ON_DATA_CORRUPTION
+	BUG_ON(order == 0);
+#else
 	if (WARN_ON_ONCE(order == 0))
 		pr_warn_once("object pointer: 0x%p\n", object);
+#endif
 
 	kmemleak_free(object);
 	kasan_kfree_large(object);

From 132ae923e4463f05bc5a8cc0770f91d4e2835312 Mon Sep 17 00:00:00 2001
From: Nicolas Bouchinet <nicolas.bouchinet@ssi.gouv.fr>
Date: Fri, 8 Dec 2023 11:53:31 +0100
Subject: [PATCH 099/109] io_uring: set io_uring_disabled sysctl to 1 by
 default

This forces processes to have `CAP_SYS_ADMIN` in order to use io_uring or
to be in the io_uring_group.

The patch alter the sysctl value range in order that once set to "2" it
can't be lowered again.

The io_uring_group sysctl option is set to -1 by default, user should
define a proper group and set the sysctl properly if they want it configured.

Signed-off-by: Nicolas Bouchinet <nicolas.bouchinet@ssi.gouv.fr>
---
 io_uring/io_uring.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 97260bca67e7..4f4b7dbb563c 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -124,7 +124,7 @@ static __read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(io_key_has_sqarray, HZ);
 struct kmem_cache *req_cachep;
 static struct workqueue_struct *iou_wq __ro_after_init;
 
-static int __read_mostly sysctl_io_uring_disabled;
+static int __read_mostly sysctl_io_uring_disabled = 1;
 static int __read_mostly sysctl_io_uring_group = -1;
 
 #ifdef CONFIG_SYSCTL
@@ -134,8 +134,9 @@ static const struct ctl_table kernel_io_uring_disabled_table[] = {
 		.data		= &sysctl_io_uring_disabled,
 		.maxlen		= sizeof(sysctl_io_uring_disabled),
 		.mode		= 0644,
+		/* only handle a transition from default "1" to "2" */
 		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
+		.extra1		= SYSCTL_TWO,
 		.extra2		= SYSCTL_TWO,
 	},
 	{

From 8e7ee3c056ebf1e472c95dc1e14c261235075efa Mon Sep 17 00:00:00 2001
From: Nicolas Bouchinet <nicolas.bouchinet@ssi.gouv.fr>
Date: Thu, 17 Oct 2024 17:02:29 +0200
Subject: [PATCH 100/109] sysctl: Add proc_dointvec_minmax_sysadmin sanity
 check

Since we expose proc_dointvec_minmax_sysadmin, add it to sanity checking
functions.

Signed-off-by: Nicolas Bouchinet <nicolas.bouchinet@ssi.gouv.fr>
---
 fs/proc/proc_sysctl.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 49ab74e0bfde..4be54d32a60a 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -1154,6 +1154,7 @@ static int sysctl_check_table(const char *path, struct ctl_table_header *header)
 		    (entry->proc_handler == proc_douintvec) ||
 		    (entry->proc_handler == proc_douintvec_minmax) ||
 		    (entry->proc_handler == proc_dointvec_minmax) ||
+		    (entry->proc_handler == proc_dointvec_minmax_sysadmin) ||
 		    (entry->proc_handler == proc_dou8vec_minmax) ||
 		    (entry->proc_handler == proc_dointvec_jiffies) ||
 		    (entry->proc_handler == proc_dointvec_userhz_jiffies) ||

From 95fec49d86cf8f6bd123f38e107a004ee396636c Mon Sep 17 00:00:00 2001
From: Levente Polyak <levente@leventepolyak.net>
Date: Wed, 14 May 2025 21:00:09 +0200
Subject: [PATCH 101/109] kconfig: enable MSEAL_SYSTEM_MAPPINGS by default

Signed-off-by: Levente Polyak <levente@leventepolyak.net>
---
 security/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/security/Kconfig b/security/Kconfig
index 4a9e016fa16d..d95435fb7851 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -56,6 +56,7 @@ config MSEAL_SYSTEM_MAPPINGS
 	depends on 64BIT
 	depends on ARCH_SUPPORTS_MSEAL_SYSTEM_MAPPINGS
 	depends on !CHECKPOINT_RESTORE
+	default y
 	help
 	  Apply mseal on system mappings.
 	  The system mappings includes vdso, vvar, vvar_vclock,

From e90199f9b37adf5c3473006f7569a81d9f37493a Mon Sep 17 00:00:00 2001
From: Nicolas Bouchinet <nicolas.bouchinet@ssi.gouv.fr>
Date: Tue, 14 Oct 2025 15:19:31 +0200
Subject: [PATCH 102/109] mm/slub: Only instrument slab allocation with
 canaries

With barn and sheaves introduction, slab objects are used to prefill or
refill sheaves, which are cache of small objects taking the form of an
array of pointers to slab objects.

Sheaves are then used for quick allocation and free, which consist of
shrinking and growing the array index.
Thus, there is two vision of allocation state for those objects. While
they are seen as allocated by the slab allocator, the sheaf allocator
see them as free and then allocates them.

We thus need to adapt the slab canary patch in order to avoid sanitizing
objects allocation and free from this array.

A next patch will add a per-sheave canary random value which would lead
to a better tracking of objects overflow.

Signed-off-by: Levente Polyak <levente@leventepolyak.net>
Signed-off-by: Nicolas Bouchinet <nicolas.bouchinet@ssi.gouv.fr>
---
 mm/slub.c | 81 ++++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 53 insertions(+), 28 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index a40b90e9b37b..0a6c88e345bb 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -924,9 +924,21 @@ static inline void check_canary(struct kmem_cache *s, void *object, unsigned lon
 	unsigned long *canary = get_canary(s, object);
 	BUG_ON(*canary != get_canary_value(canary, value));
 }
+
+static inline void check_set_canary_bulk(struct kmem_cache *s, unsigned int size, void **objects, unsigned long check_value, unsigned long set_value)
+{
+	for (int i = 0; i < size; i++) {
+		if (!is_kfence_address(objects[i])) {
+			check_canary(s, objects[i], check_value);
+			set_canary(s, objects[i], set_value);
+		}
+	}
+}
+
 #else
 #define set_canary(s, object, value)
 #define check_canary(s, object, value)
+#define check_set_canary_bulk(s, size, objects, check_value, set_value)
 #endif
 
 #ifdef CONFIG_SLUB_DEBUG
@@ -2908,7 +2920,7 @@ static int refill_sheaf(struct kmem_cache *s, struct slab_sheaf *sheaf,
 	return 0;
 }
 
-static void sheaf_flush_unused(struct kmem_cache *s, struct slab_sheaf *sheaf);
+static void sheaf_flush_unused(struct kmem_cache *s, struct slab_sheaf *sheaf, bool canary);
 
 static struct slab_sheaf *alloc_full_sheaf(struct kmem_cache *s, gfp_t gfp)
 {
@@ -2918,7 +2930,7 @@ static struct slab_sheaf *alloc_full_sheaf(struct kmem_cache *s, gfp_t gfp)
 		return NULL;
 
 	if (refill_sheaf(s, sheaf, gfp | __GFP_NOMEMALLOC | __GFP_NOWARN)) {
-		sheaf_flush_unused(s, sheaf);
+		sheaf_flush_unused(s, sheaf, true);
 		free_empty_sheaf(s, sheaf);
 		return NULL;
 	}
@@ -2966,6 +2978,7 @@ static unsigned int __sheaf_flush_main_batch(struct kmem_cache *s)
 
 	local_unlock(&s->cpu_sheaves->lock);
 
+	check_set_canary_bulk(s, batch, &objects[0], s->random_active, s->random_inactive);
 	__kmem_cache_free_bulk(s, batch, &objects[0]);
 
 	stat_add(s, SHEAF_FLUSH, batch);
@@ -3011,20 +3024,24 @@ static bool sheaf_try_flush_main(struct kmem_cache *s)
  * necessary when flushing cpu's sheaves (both spare and main) during cpu
  * hotremove as the cpu is not executing anymore.
  */
-static void sheaf_flush_unused(struct kmem_cache *s, struct slab_sheaf *sheaf)
+static void sheaf_flush_unused(struct kmem_cache *s, struct slab_sheaf *sheaf, bool canary)
 {
 	if (!sheaf->size)
 		return;
 
 	stat_add(s, SHEAF_FLUSH, sheaf->size);
 
+	if (canary) {
+		check_set_canary_bulk(s, sheaf->size, &sheaf->objects[0], s->random_active, s->random_inactive);
+	}
 	__kmem_cache_free_bulk(s, sheaf->size, &sheaf->objects[0]);
 
 	sheaf->size = 0;
 }
 
 static bool __rcu_free_sheaf_prepare(struct kmem_cache *s,
-				     struct slab_sheaf *sheaf)
+				     struct slab_sheaf *sheaf,
+				     bool canary)
 {
 	bool init = slab_want_init_on_free(s);
 	void **p = &sheaf->objects[0];
@@ -3037,7 +3054,7 @@ static bool __rcu_free_sheaf_prepare(struct kmem_cache *s,
 		memcg_slab_free_hook(s, slab, p + i, 1);
 		alloc_tagging_slab_free_hook(s, slab, p + i, 1);
 
-		if (unlikely(!slab_free_hook(s, p[i], init, true))) {
+		if (unlikely(!slab_free_hook(s, p[i], init, true, canary))) {
 			p[i] = p[--sheaf->size];
 			continue;
 		}
@@ -3059,9 +3076,9 @@ static void rcu_free_sheaf_nobarn(struct rcu_head *head)
 	sheaf = container_of(head, struct slab_sheaf, rcu_head);
 	s = sheaf->cache;
 
-	__rcu_free_sheaf_prepare(s, sheaf);
+	__rcu_free_sheaf_prepare(s, sheaf, true);
 
-	sheaf_flush_unused(s, sheaf);
+	sheaf_flush_unused(s, sheaf, false);
 
 	free_empty_sheaf(s, sheaf);
 }
@@ -3092,7 +3109,7 @@ static void pcs_flush_all(struct kmem_cache *s)
 	local_unlock(&s->cpu_sheaves->lock);
 
 	if (spare) {
-		sheaf_flush_unused(s, spare);
+		sheaf_flush_unused(s, spare, true);
 		free_empty_sheaf(s, spare);
 	}
 
@@ -3109,9 +3126,9 @@ static void __pcs_flush_all_cpu(struct kmem_cache *s, unsigned int cpu)
 	pcs = per_cpu_ptr(s->cpu_sheaves, cpu);
 
 	/* The cpu is not executing anymore so we don't need pcs->lock */
-	sheaf_flush_unused(s, pcs->main);
+	sheaf_flush_unused(s, pcs->main, true);
 	if (pcs->spare) {
-		sheaf_flush_unused(s, pcs->spare);
+		sheaf_flush_unused(s, pcs->spare, true);
 		free_empty_sheaf(s, pcs->spare);
 		pcs->spare = NULL;
 	}
@@ -3350,7 +3367,7 @@ static void barn_shrink(struct kmem_cache *s, struct node_barn *barn)
 	spin_unlock_irqrestore(&barn->lock, flags);
 
 	list_for_each_entry_safe(sheaf, sheaf2, &full_list, barn_list) {
-		sheaf_flush_unused(s, sheaf);
+		sheaf_flush_unused(s, sheaf, true);
 		free_empty_sheaf(s, sheaf);
 	}
 
@@ -4707,7 +4724,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs,
 			 * we must be very low on memory so don't bother
 			 * with the barn
 			 */
-			sheaf_flush_unused(s, empty);
+			sheaf_flush_unused(s, empty, true);
 			free_empty_sheaf(s, empty);
 		}
 	} else {
@@ -4927,6 +4944,7 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list
 {
 	void *object;
 	bool init = false;
+	bool from_pcs = false;
 
 	s = slab_pre_alloc_hook(s, gfpflags);
 	if (unlikely(!s))
@@ -4937,6 +4955,8 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list
 		goto out;
 
 	object = alloc_from_pcs(s, gfpflags, node);
+	if (object)
+		from_pcs = true;
 
 	if (!object)
 		object = __slab_alloc_node(s, gfpflags, node, addr, orig_size);
@@ -4956,7 +4976,7 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list
 		init = slab_want_init_on_alloc(gfpflags, s);
 	}
 
-	if (object) {
+	if (object && !from_pcs) {
 		check_canary(s, object, s->random_inactive);
 		set_canary(s, object, s->random_active);
 	}
@@ -5128,7 +5148,7 @@ kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int size)
 
 		if (sheaf->size < size &&
 		    __prefill_sheaf_pfmemalloc(s, sheaf, gfp)) {
-			sheaf_flush_unused(s, sheaf);
+			sheaf_flush_unused(s, sheaf, true);
 			free_empty_sheaf(s, sheaf);
 			sheaf = NULL;
 		}
@@ -5155,7 +5175,7 @@ void kmem_cache_return_sheaf(struct kmem_cache *s, gfp_t gfp,
 
 	if (unlikely((sheaf->capacity != s->sheaf_capacity)
 		     || sheaf->pfmemalloc)) {
-		sheaf_flush_unused(s, sheaf);
+		sheaf_flush_unused(s, sheaf, true);
 		kfree(sheaf);
 		return;
 	}
@@ -5183,7 +5203,7 @@ void kmem_cache_return_sheaf(struct kmem_cache *s, gfp_t gfp,
 	 */
 	if (!barn || data_race(barn->nr_full) >= MAX_FULL_SHEAVES ||
 	    refill_sheaf(s, sheaf, gfp)) {
-		sheaf_flush_unused(s, sheaf);
+		sheaf_flush_unused(s, sheaf, true);
 		free_empty_sheaf(s, sheaf);
 		return;
 	}
@@ -5804,7 +5824,7 @@ __pcs_replace_full_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs,
 		pcs->spare = NULL;
 		local_unlock(&s->cpu_sheaves->lock);
 
-		sheaf_flush_unused(s, to_flush);
+		sheaf_flush_unused(s, to_flush, true);
 		empty = to_flush;
 		goto got_empty;
 	}
@@ -5917,7 +5937,7 @@ static void rcu_free_sheaf(struct rcu_head *head)
 	 * If it returns true, there was at least one object from pfmemalloc
 	 * slab so simply flush everything.
 	 */
-	if (__rcu_free_sheaf_prepare(s, sheaf))
+	if (__rcu_free_sheaf_prepare(s, sheaf, false))
 		goto flush;
 
 	n = get_node(s, sheaf->node);
@@ -5944,7 +5964,7 @@ static void rcu_free_sheaf(struct rcu_head *head)
 
 flush:
 	stat(s, BARN_PUT_FAIL);
-	sheaf_flush_unused(s, sheaf);
+	sheaf_flush_unused(s, sheaf, true);
 
 empty:
 	if (barn && data_race(barn->nr_empty) < MAX_EMPTY_SHEAVES) {
@@ -6092,7 +6112,7 @@ static void free_to_pcs_bulk(struct kmem_cache *s, size_t size, void **p)
 		memcg_slab_free_hook(s, slab, p + i, 1);
 		alloc_tagging_slab_free_hook(s, slab, p + i, 1);
 
-		if (unlikely(!slab_free_hook(s, p[i], init, false))) {
+		if (unlikely(!slab_free_hook(s, p[i], init, false, false))) {
 			p[i] = p[--size];
 			continue;
 		}
@@ -6179,11 +6199,13 @@ static void free_to_pcs_bulk(struct kmem_cache *s, size_t size, void **p)
 	 * many full sheaves, free the rest to slab pages
 	 */
 fallback:
+	check_set_canary_bulk(s, size, p, s->random_active, s->random_inactive);
 	__kmem_cache_free_bulk(s, size, p);
 	stat_add(s, FREE_SLOWPATH, size);
 
 flush_remote:
 	if (remote_nr) {
+		check_set_canary_bulk(s, remote_nr, &remote_objects[0], s->random_active, s->random_inactive);
 		__kmem_cache_free_bulk(s, remote_nr, &remote_objects[0]);
 		stat_add(s, FREE_SLOWPATH, remote_nr);
 		if (i < size) {
@@ -6277,6 +6299,12 @@ void slab_free(struct kmem_cache *s, struct slab *slab, void *object,
 	if (is_kfence_address(object))
 		canary = false;
 
+	/* Do not check or set canary if the object is freed back to pcs. */
+	if (s->cpu_sheaves && likely(!IS_ENABLED(CONFIG_NUMA) ||
+				     slab_nid(slab) == numa_mem_id())) {
+		canary = false;
+	}
+
 	if (unlikely(!slab_free_hook(s, object, slab_want_init_on_free(s), false, canary)))
 		return;
 
@@ -7328,7 +7356,7 @@ static inline
 int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
 			    void **p)
 {
-	int i, k;
+	int i;
 
 	if (IS_ENABLED(CONFIG_SLUB_TINY) || kmem_cache_debug(s)) {
 		for (i = 0; i < size; i++) {
@@ -7362,12 +7390,7 @@ int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
 		}
 	}
 
-	for (k = 0; k < i; k++) {
-		if (!is_kfence_address(p[k])) {
-			check_canary(s, p[k], s->random_inactive);
-			set_canary(s, p[k], s->random_active);
-		}
-	}
+	check_set_canary_bulk(s, i, p, s->random_inactive, s->random_active);
 
 	return i;
 
@@ -7417,8 +7440,10 @@ int kmem_cache_alloc_bulk_noprof(struct kmem_cache *s, gfp_t flags, size_t size,
 		 * the percpu sheaves, we have bigger problems.
 		 */
 		if (unlikely(__kmem_cache_alloc_bulk(s, flags, size - i, p + i) == 0)) {
-			if (i > 0)
+			if (i > 0) {
+				check_set_canary_bulk(s, i, p, s->random_active, s->random_inactive);
 				__kmem_cache_free_bulk(s, i, p);
+			}
 			if (kfence_obj)
 				__kfence_free(kfence_obj);
 			return 0;

From 2a8e1c2cc1176579ceb79d91a36f91e2c65afdbb Mon Sep 17 00:00:00 2001
From: Nicolas Bouchinet <nicolas.bouchinet@ssi.gouv.fr>
Date: Thu, 15 Jan 2026 15:31:34 +0100
Subject: [PATCH 103/109] mm/slub: Add canary on sheaf alloc and free

Sheaf allocation is an allocation cache that uses pre-allocated slab
objects for faster free and allocation from a sheaf array.
This patch adds a sheaf canary in order to detect small overflows and
double-free of sheaf objects.

Signed-off-by: Levente Polyak <levente@leventepolyak.net>
Signed-off-by: Nicolas Bouchinet <nicolas.bouchinet@ssi.gouv.fr>
---
 mm/slab.h |  1 +
 mm/slub.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 61 insertions(+), 4 deletions(-)

diff --git a/mm/slab.h b/mm/slab.h
index 0732b3596615..4db3f9091b1c 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -226,6 +226,7 @@ struct kmem_cache {
 #ifdef CONFIG_SLAB_CANARY
 	unsigned long random_active;
 	unsigned long random_inactive;
+	unsigned long sheaf_random_active;
 #endif
 
 #ifdef CONFIG_NUMA
diff --git a/mm/slub.c b/mm/slub.c
index 0a6c88e345bb..84fa0360d9bb 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -4841,6 +4841,11 @@ void *alloc_from_pcs(struct kmem_cache *s, gfp_t gfp, int node)
 
 	pcs->main->size--;
 
+	if (!is_kfence_address(object)) {
+		check_canary(s, object, s->random_active);
+		set_canary(s, object, s->sheaf_random_active);
+	}
+
 	local_unlock(&s->cpu_sheaves->lock);
 
 	stat(s, ALLOC_FASTPATH);
@@ -4913,6 +4918,8 @@ unsigned int alloc_from_pcs_bulk(struct kmem_cache *s, gfp_t gfp, size_t size,
 	main->size -= batch;
 	memcpy(p, main->objects + main->size, batch * sizeof(void *));
 
+	check_set_canary_bulk(s, batch, p, s->random_active, s->sheaf_random_active);
+
 	local_unlock(&s->cpu_sheaves->lock);
 
 	stat_add(s, ALLOC_FASTPATH, batch);
@@ -4945,6 +4952,7 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list
 	void *object;
 	bool init = false;
 	bool from_pcs = false;
+	bool from_pcs_failed = false;
 
 	s = slab_pre_alloc_hook(s, gfpflags);
 	if (unlikely(!s))
@@ -4954,12 +4962,16 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list
 	if (unlikely(object))
 		goto out;
 
-	object = alloc_from_pcs(s, gfpflags, node);
-	if (object)
+	if (s->cpu_sheaves) {
+		object = alloc_from_pcs(s, gfpflags, node);
 		from_pcs = true;
+	}
 
-	if (!object)
+	if (!object) {
 		object = __slab_alloc_node(s, gfpflags, node, addr, orig_size);
+		if (from_pcs)
+			from_pcs_failed = true;
+	}
 
 	maybe_wipe_obj_freeptr(s, object);
 
@@ -4976,9 +4988,18 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list
 		init = slab_want_init_on_alloc(gfpflags, s);
 	}
 
+	/*
+	 * linux-hardened: In the scenario where an object is intended to be allocated
+	 * from a sheaf but it's allocation failed, it is instead directly allocated from the
+	 * slab allocator but will later be freed back to a sheaf. We thus need to
+	 * set the canary to a sheaf_random_active.
+	 */
 	if (object && !from_pcs) {
 		check_canary(s, object, s->random_inactive);
 		set_canary(s, object, s->random_active);
+	} else if (object && from_pcs_failed) {
+		check_canary(s, object, s->random_inactive);
+		set_canary(s, object, s->sheaf_random_active);
 	}
 
 out:
@@ -5296,6 +5317,11 @@ kmem_cache_alloc_from_sheaf_noprof(struct kmem_cache *s, gfp_t gfp,
 
 	/* add __GFP_NOFAIL to force successful memcg charging */
 	slab_post_alloc_hook(s, NULL, gfp | __GFP_NOFAIL, 1, &ret, init, s->object_size);
+
+	if (!is_kfence_address(ret)) {
+		check_canary(s, ret, s->random_active);
+		set_canary(s, ret, s->sheaf_random_active);
+	}
 out:
 	trace_kmem_cache_alloc(_RET_IP_, ret, s, gfp, NUMA_NO_NODE);
 
@@ -5906,6 +5932,10 @@ bool free_to_pcs(struct kmem_cache *s, void *object, bool allow_spin)
 			return false;
 	}
 
+	if (!is_kfence_address(object)) {
+		check_canary(s, object, s->sheaf_random_active);
+		set_canary(s, object, s->random_active);
+	}
 	pcs->main->objects[pcs->main->size++] = object;
 
 	local_unlock(&s->cpu_sheaves->lock);
@@ -6061,6 +6091,11 @@ bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj)
 	 * Since we flush immediately when size reaches capacity, we never reach
 	 * this with size already at capacity, so no OOB write is possible.
 	 */
+
+	if (!is_kfence_address(obj)) {
+		check_canary(s, obj, s->sheaf_random_active);
+		set_canary(s, obj, s->random_active);
+	}
 	rcu_sheaf->objects[rcu_sheaf->size++] = obj;
 
 	if (likely(rcu_sheaf->size < s->sheaf_capacity)) {
@@ -6117,6 +6152,11 @@ static void free_to_pcs_bulk(struct kmem_cache *s, size_t size, void **p)
 			continue;
 		}
 
+		if (!is_kfence_address(p[i])) {
+			check_canary(s, p[i], s->sheaf_random_active);
+			set_canary(s, p[i], s->random_active);
+		}
+
 		if (unlikely((IS_ENABLED(CONFIG_NUMA) && slab_nid(slab) != node)
 			     || slab_test_pfmemalloc(slab))) {
 			remote_objects[remote_nr] = p[i];
@@ -6291,6 +6331,7 @@ void slab_free(struct kmem_cache *s, struct slab *slab, void *object,
 	       unsigned long addr)
 {
 	bool canary = true;
+	bool to_sheaf = false;
 
 	memcg_slab_free_hook(s, slab, &object, 1);
 	alloc_tagging_slab_free_hook(s, slab, &object, 1);
@@ -6299,7 +6340,7 @@ void slab_free(struct kmem_cache *s, struct slab *slab, void *object,
 	if (is_kfence_address(object))
 		canary = false;
 
-	/* Do not check or set canary if the object is freed back to pcs. */
+	/* Defer canary checking if the object is freed back to pcs. */
 	if (s->cpu_sheaves && likely(!IS_ENABLED(CONFIG_NUMA) ||
 				     slab_nid(slab) == numa_mem_id())) {
 		canary = false;
@@ -6310,10 +6351,21 @@ void slab_free(struct kmem_cache *s, struct slab *slab, void *object,
 
 	if (likely(!IS_ENABLED(CONFIG_NUMA) || slab_nid(slab) == numa_mem_id())
 	    && likely(!slab_test_pfmemalloc(slab))) {
+		to_sheaf = true;
 		if (likely(free_to_pcs(s, object, true)))
 			return;
 	}
 
+	/*
+	 * linux-hardened: In this scenario, the object was intended to be freed to a
+	 * sheaf but it failed. The object will thus be freed back to the slab allocator,
+	 * the canary thus need to be checked as a sheaf one and set back to a slab inactive one.
+	 */
+	if (to_sheaf && canary) {
+		check_canary(s, object, s->sheaf_random_active);
+		set_canary(s, object, s->random_inactive);
+	}
+
 	__slab_free(s, slab, object, object, 1, addr);
 	stat(s, FREE_SLOWPATH);
 }
@@ -8633,6 +8685,10 @@ int do_kmem_cache_create(struct kmem_cache *s, const char *name,
 #ifdef CONFIG_SLAB_CANARY
 	s->random_active = get_random_long();
 	s->random_inactive = get_random_long();
+	if (__slub_debug_enabled())
+		s->sheaf_random_active = s->random_active;
+	else
+		s->sheaf_random_active = get_random_long();
 #endif
 	s->align = args->align;
 	s->ctor = args->ctor;

From 156b91c7cc8e9fdb3c3de6e05e9bd4c4a3eb413c Mon Sep 17 00:00:00 2001
From: Nicolas Bouchinet <nicolas.bouchinet@ssi.gouv.fr>
Date: Fri, 6 Mar 2026 09:42:17 +0100
Subject: [PATCH 104/109] 7.0 canary adaptation

- Invert canary logic, we now only track objects in their inactive state
  allocated objects are always tagged as random_active. Free objects are
  tagged as sheaf_random_inactive or random_inactive depending on if
  they are in a sheaf or in a slab freelist.
  The logic inversion should make the patch way more stable.

- Fixes slab_debug canary crash in early allocation state when the
  bootstrap sheaf is in use.

- Fixes slabobj_ext offset computaion when stored in objects.

- Always instrument sheaf_canary, even when slab_debug is active.

- Fixes canary mismatch in some free path.

- Adapt canary to new alloc/free paths.

- Fixes kmem_cache_refill_sheaf instrumentation.

Signed-off-by: Levente Polyak <levente@leventepolyak.net>
Signed-off-by: Nicolas Bouchinet <nicolas.bouchinet@ssi.gouv.fr>
---
 mm/slab.h |   2 +-
 mm/slub.c | 121 ++++++++++++++++++++++--------------------------------
 2 files changed, 50 insertions(+), 73 deletions(-)

diff --git a/mm/slab.h b/mm/slab.h
index 4db3f9091b1c..7ea5a143ca21 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -226,7 +226,7 @@ struct kmem_cache {
 #ifdef CONFIG_SLAB_CANARY
 	unsigned long random_active;
 	unsigned long random_inactive;
-	unsigned long sheaf_random_active;
+	unsigned long sheaf_random_inactive;
 #endif
 
 #ifdef CONFIG_NUMA
diff --git a/mm/slub.c b/mm/slub.c
index 84fa0360d9bb..63efc6059d2a 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -886,6 +886,10 @@ static unsigned int obj_exts_offset_in_object(struct kmem_cache *s)
 	if (slub_debug_orig_size(s))
 		offset += sizeof(unsigned long);
 
+#ifdef CONFIG_SLAB_CANARY
+	offset += sizeof(void *);
+#endif
+
 	offset += kasan_metadata_size(s, false);
 
 	return offset;
@@ -925,19 +929,24 @@ static inline void check_canary(struct kmem_cache *s, void *object, unsigned lon
 	BUG_ON(*canary != get_canary_value(canary, value));
 }
 
-static inline void check_set_canary_bulk(struct kmem_cache *s, unsigned int size, void **objects, unsigned long check_value, unsigned long set_value)
+static inline void check_set_canary(struct kmem_cache *s, void *object, unsigned long check_value, unsigned long set_value)
 {
-	for (int i = 0; i < size; i++) {
-		if (!is_kfence_address(objects[i])) {
-			check_canary(s, objects[i], check_value);
-			set_canary(s, objects[i], set_value);
-		}
+	if (!is_kfence_address(object)) {
+		check_canary(s, object, check_value);
+		set_canary(s, object, set_value);
 	}
 }
 
+static inline void check_set_canary_bulk(struct kmem_cache *s, unsigned int size, void **objects, unsigned long check_value, unsigned long set_value)
+{
+	for (int i = 0; i < size; i++)
+		check_set_canary(s, objects[i], check_value, set_value);
+}
+
 #else
 #define set_canary(s, object, value)
 #define check_canary(s, object, value)
+#define check_set_canary(s, object, check_value, set_value)
 #define check_set_canary_bulk(s, size, objects, check_value, set_value)
 #endif
 
@@ -2910,6 +2919,12 @@ static int refill_sheaf(struct kmem_cache *s, struct slab_sheaf *sheaf,
 	filled = refill_objects(s, &sheaf->objects[sheaf->size], gfp, to_fill,
 				to_fill);
 
+	/*
+	 * linux-hardened: refill_objects directly picks objects from slab freelist,
+	 * we thus need to manually instrument them here for sheaf.
+	 */
+	check_set_canary_bulk(s, filled, &sheaf->objects[sheaf->size], s->random_inactive, s->sheaf_random_inactive);
+
 	sheaf->size += filled;
 
 	stat_add(s, SHEAF_REFILL, filled);
@@ -2978,7 +2993,7 @@ static unsigned int __sheaf_flush_main_batch(struct kmem_cache *s)
 
 	local_unlock(&s->cpu_sheaves->lock);
 
-	check_set_canary_bulk(s, batch, &objects[0], s->random_active, s->random_inactive);
+	check_set_canary_bulk(s, batch, &objects[0], s->sheaf_random_inactive, s->random_inactive);
 	__kmem_cache_free_bulk(s, batch, &objects[0]);
 
 	stat_add(s, SHEAF_FLUSH, batch);
@@ -3032,7 +3047,7 @@ static void sheaf_flush_unused(struct kmem_cache *s, struct slab_sheaf *sheaf, b
 	stat_add(s, SHEAF_FLUSH, sheaf->size);
 
 	if (canary) {
-		check_set_canary_bulk(s, sheaf->size, &sheaf->objects[0], s->random_active, s->random_inactive);
+		check_set_canary_bulk(s, sheaf->size, &sheaf->objects[0], s->sheaf_random_inactive, s->random_inactive);
 	}
 	__kmem_cache_free_bulk(s, sheaf->size, &sheaf->objects[0]);
 
@@ -4840,11 +4855,7 @@ void *alloc_from_pcs(struct kmem_cache *s, gfp_t gfp, int node)
 	}
 
 	pcs->main->size--;
-
-	if (!is_kfence_address(object)) {
-		check_canary(s, object, s->random_active);
-		set_canary(s, object, s->sheaf_random_active);
-	}
+	check_set_canary(s, object, s->sheaf_random_inactive, s->random_active);
 
 	local_unlock(&s->cpu_sheaves->lock);
 
@@ -4918,7 +4929,7 @@ unsigned int alloc_from_pcs_bulk(struct kmem_cache *s, gfp_t gfp, size_t size,
 	main->size -= batch;
 	memcpy(p, main->objects + main->size, batch * sizeof(void *));
 
-	check_set_canary_bulk(s, batch, p, s->random_active, s->sheaf_random_active);
+	check_set_canary_bulk(s, batch, p, s->sheaf_random_inactive, s->random_active);
 
 	local_unlock(&s->cpu_sheaves->lock);
 
@@ -4951,8 +4962,6 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list
 {
 	void *object;
 	bool init = false;
-	bool from_pcs = false;
-	bool from_pcs_failed = false;
 
 	s = slab_pre_alloc_hook(s, gfpflags);
 	if (unlikely(!s))
@@ -4962,15 +4971,11 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list
 	if (unlikely(object))
 		goto out;
 
-	if (s->cpu_sheaves) {
-		object = alloc_from_pcs(s, gfpflags, node);
-		from_pcs = true;
-	}
+	object = alloc_from_pcs(s, gfpflags, node);
 
 	if (!object) {
 		object = __slab_alloc_node(s, gfpflags, node, addr, orig_size);
-		if (from_pcs)
-			from_pcs_failed = true;
+		check_set_canary(s, object, s->random_inactive, s->random_active);
 	}
 
 	maybe_wipe_obj_freeptr(s, object);
@@ -4988,20 +4993,6 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list
 		init = slab_want_init_on_alloc(gfpflags, s);
 	}
 
-	/*
-	 * linux-hardened: In the scenario where an object is intended to be allocated
-	 * from a sheaf but it's allocation failed, it is instead directly allocated from the
-	 * slab allocator but will later be freed back to a sheaf. We thus need to
-	 * set the canary to a sheaf_random_active.
-	 */
-	if (object && !from_pcs) {
-		check_canary(s, object, s->random_inactive);
-		set_canary(s, object, s->random_active);
-	} else if (object && from_pcs_failed) {
-		check_canary(s, object, s->random_inactive);
-		set_canary(s, object, s->sheaf_random_active);
-	}
-
 out:
 	/*
 	 * When init equals 'true', like for kzalloc() family, only
@@ -5133,6 +5124,9 @@ kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int size)
 			return NULL;
 		}
 
+		/* linux-hardened: We are prefilling a sheaf, the objects needs to be instrumented to sheaf_random_inactive. */
+		check_set_canary_bulk(s, size, &sheaf->objects[0], s->random_active, s->sheaf_random_inactive);
+
 		sheaf->size = size;
 
 		return sheaf;
@@ -5267,6 +5261,8 @@ int kmem_cache_refill_sheaf(struct kmem_cache *s, gfp_t gfp,
 					     &sheaf->objects[sheaf->size])) {
 			return -ENOMEM;
 		}
+
+		check_set_canary_bulk(s, sheaf->capacity - sheaf->size, &sheaf->objects[sheaf->size], s->random_active, s->sheaf_random_inactive);
 		sheaf->size = sheaf->capacity;
 
 		return 0;
@@ -5313,15 +5309,12 @@ kmem_cache_alloc_from_sheaf_noprof(struct kmem_cache *s, gfp_t gfp,
 	if (likely(!ret))
 		ret = sheaf->objects[--sheaf->size];
 
+	check_set_canary(s, ret, s->sheaf_random_inactive, s->random_active);
+
 	init = slab_want_init_on_alloc(gfp, s);
 
 	/* add __GFP_NOFAIL to force successful memcg charging */
 	slab_post_alloc_hook(s, NULL, gfp | __GFP_NOFAIL, 1, &ret, init, s->object_size);
-
-	if (!is_kfence_address(ret)) {
-		check_canary(s, ret, s->random_active);
-		set_canary(s, ret, s->sheaf_random_active);
-	}
 out:
 	trace_kmem_cache_alloc(_RET_IP_, ret, s, gfp, NUMA_NO_NODE);
 
@@ -5508,6 +5501,7 @@ void *kmalloc_nolock_noprof(size_t size, gfp_t gfp_flags, int node)
 	}
 
 success:
+	check_set_canary(s, ret, s->random_inactive, s->random_active);
 	maybe_wipe_obj_freeptr(s, ret);
 	slab_post_alloc_hook(s, NULL, alloc_gfp, 1, &ret,
 			     slab_want_init_on_alloc(alloc_gfp, s), size);
@@ -5932,10 +5926,7 @@ bool free_to_pcs(struct kmem_cache *s, void *object, bool allow_spin)
 			return false;
 	}
 
-	if (!is_kfence_address(object)) {
-		check_canary(s, object, s->sheaf_random_active);
-		set_canary(s, object, s->random_active);
-	}
+	check_set_canary(s, object, s->random_active, s->sheaf_random_inactive);
 	pcs->main->objects[pcs->main->size++] = object;
 
 	local_unlock(&s->cpu_sheaves->lock);
@@ -6092,10 +6083,7 @@ bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj)
 	 * this with size already at capacity, so no OOB write is possible.
 	 */
 
-	if (!is_kfence_address(obj)) {
-		check_canary(s, obj, s->sheaf_random_active);
-		set_canary(s, obj, s->random_active);
-	}
+	check_set_canary(s, obj, s->random_active, s->sheaf_random_inactive);
 	rcu_sheaf->objects[rcu_sheaf->size++] = obj;
 
 	if (likely(rcu_sheaf->size < s->sheaf_capacity)) {
@@ -6152,11 +6140,6 @@ static void free_to_pcs_bulk(struct kmem_cache *s, size_t size, void **p)
 			continue;
 		}
 
-		if (!is_kfence_address(p[i])) {
-			check_canary(s, p[i], s->sheaf_random_active);
-			set_canary(s, p[i], s->random_active);
-		}
-
 		if (unlikely((IS_ENABLED(CONFIG_NUMA) && slab_nid(slab) != node)
 			     || slab_test_pfmemalloc(slab))) {
 			remote_objects[remote_nr] = p[i];
@@ -6213,6 +6196,8 @@ static void free_to_pcs_bulk(struct kmem_cache *s, size_t size, void **p)
 	main = pcs->main;
 	batch = min(size, s->sheaf_capacity - main->size);
 
+	check_set_canary_bulk(s, batch, p, s->random_active, s->sheaf_random_inactive);
+
 	memcpy(main->objects + main->size, p, batch * sizeof(void *));
 	main->size += batch;
 
@@ -6300,6 +6285,7 @@ static void free_deferred_objects(struct irq_work *work)
 		 */
 		set_freepointer(s, x, NULL);
 
+		check_set_canary(s, x, s->random_active, s->random_inactive);
 		__slab_free(s, slab, x, x, 1, _THIS_IP_);
 		stat(s, FREE_SLOWPATH);
 	}
@@ -6331,40 +6317,34 @@ void slab_free(struct kmem_cache *s, struct slab *slab, void *object,
 	       unsigned long addr)
 {
 	bool canary = true;
-	bool to_sheaf = false;
 
 	memcg_slab_free_hook(s, slab, &object, 1);
 	alloc_tagging_slab_free_hook(s, slab, &object, 1);
 
 	/* Make sure canaries are not used on kfence objects. */
-	if (is_kfence_address(object))
-		canary = false;
-
 	/* Defer canary checking if the object is freed back to pcs. */
-	if (s->cpu_sheaves && likely(!IS_ENABLED(CONFIG_NUMA) ||
-				     slab_nid(slab) == numa_mem_id())) {
+	if (is_kfence_address(object) || cache_has_sheaves(s))
 		canary = false;
-	}
 
 	if (unlikely(!slab_free_hook(s, object, slab_want_init_on_free(s), false, canary)))
 		return;
 
 	if (likely(!IS_ENABLED(CONFIG_NUMA) || slab_nid(slab) == numa_mem_id())
 	    && likely(!slab_test_pfmemalloc(slab))) {
-		to_sheaf = true;
 		if (likely(free_to_pcs(s, object, true)))
 			return;
 	}
 
 	/*
 	 * linux-hardened: In this scenario, the object was intended to be freed to a
-	 * sheaf but it failed. The object will thus be freed back to the slab allocator,
-	 * the canary thus need to be checked as a sheaf one and set back to a slab inactive one.
+	 * sheaf but it failed. The object will thus be freed back to the slab allocator
+	 * without instrumentation, the canary thus need to be checked and set back to a
+	 * slab inactive one.
+	 *
+	 * We only instrument objects that has not already been instrumented in `slab_free_hook()`.
 	 */
-	if (to_sheaf && canary) {
-		check_canary(s, object, s->sheaf_random_active);
-		set_canary(s, object, s->random_inactive);
-	}
+	if (unlikely(!canary))
+		check_set_canary(s, object, s->random_active, s->random_inactive);
 
 	__slab_free(s, slab, object, object, 1, addr);
 	stat(s, FREE_SLOWPATH);
@@ -8685,10 +8665,7 @@ int do_kmem_cache_create(struct kmem_cache *s, const char *name,
 #ifdef CONFIG_SLAB_CANARY
 	s->random_active = get_random_long();
 	s->random_inactive = get_random_long();
-	if (__slub_debug_enabled())
-		s->sheaf_random_active = s->random_active;
-	else
-		s->sheaf_random_active = get_random_long();
+	s->sheaf_random_inactive = get_random_long();
 #endif
 	s->align = args->align;
 	s->ctor = args->ctor;

From dbc0283a5a2d68121da9020df0b2ba12a9fda659 Mon Sep 17 00:00:00 2001
From: Nicolas Bouchinet <nicolas.bouchinet@ssi.gouv.fr>
Date: Mon, 23 Mar 2026 09:47:13 +0100
Subject: [PATCH 105/109] Add canary_debug kernel parameter

With canary_debug, a canary mismatch will print supposed canary values
and the one that has been encountered.

Signed-off-by: Levente Polyak <levente@leventepolyak.net>
Signed-off-by: Nicolas Bouchinet <nicolas.bouchinet@ssi.gouv.fr>
---
 mm/slub.c | 37 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 36 insertions(+), 1 deletion(-)

diff --git a/mm/slub.c b/mm/slub.c
index 63efc6059d2a..58e72892ffe4 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -923,10 +923,45 @@ static inline void set_canary(struct kmem_cache *s, void *object, unsigned long
 	*canary = get_canary_value(canary, value);
 }
 
+static inline void print_canary_value(struct kmem_cache *s, void * object, unsigned long value)
+{
+	unsigned long *canary = get_canary(s, object);
+
+	early_printk("check_canary: canary mismatch on cache (%s) "
+	      "for object %p:\n"
+	      "\tchecked canary value = %lx\n"
+	      "\tobject canary value = %lx\n"
+	      "\tpossible canary values for the cache :\n"
+	      "\trandom_active = %lx\n"
+	      "\trandom_inactive = %lx\n"
+	      "\tsheaf_random_inactive = %lx\n",
+	      s->name,
+	      object,
+	      get_canary_value(canary, value),
+	      *canary,
+	      get_canary_value(canary, s->random_active),
+	      get_canary_value(canary, s->random_inactive),
+	      get_canary_value(canary, s->sheaf_random_inactive));
+}
+
+static bool canary_debug __ro_after_init = false;
+static int __init setup_canary_debug(char *str)
+{
+	canary_debug = true;
+	return 1;
+}
+__setup_param("canary_debug", canary_debug, setup_canary_debug, 0);
+__setup("canary_debug", setup_canary_debug);
+
 static inline void check_canary(struct kmem_cache *s, void *object, unsigned long value)
 {
 	unsigned long *canary = get_canary(s, object);
-	BUG_ON(*canary != get_canary_value(canary, value));
+
+	if (*canary != get_canary_value(canary, value)) {
+		if (unlikely(canary_debug))
+			print_canary_value(s, object, value);
+		BUG_ON(1);
+	}
 }
 
 static inline void check_set_canary(struct kmem_cache *s, void *object, unsigned long check_value, unsigned long set_value)

From c9ade82d1bebda30365f4c0d34793b2c806ebadb Mon Sep 17 00:00:00 2001
From: Nicolas Bouchinet <nicolas.bouchinet@ssi.gouv.fr>
Date: Mon, 20 Apr 2026 10:30:32 +0200
Subject: [PATCH 106/109] mm/slub: Avoid check_canary on null objects

Signed-off-by: Nicolas Bouchinet <nicolas.bouchinet@ssi.gouv.fr>
---
 mm/slub.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/slub.c b/mm/slub.c
index 58e72892ffe4..56819ec51838 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -966,7 +966,7 @@ static inline void check_canary(struct kmem_cache *s, void *object, unsigned lon
 
 static inline void check_set_canary(struct kmem_cache *s, void *object, unsigned long check_value, unsigned long set_value)
 {
-	if (!is_kfence_address(object)) {
+	if (object && !is_kfence_address(object)) {
 		check_canary(s, object, check_value);
 		set_canary(s, object, set_value);
 	}

From 73c6ad8572c1b3f9cd0861090126141f17b0a649 Mon Sep 17 00:00:00 2001
From: Levente Polyak <levente@leventepolyak.net>
Date: Fri, 1 May 2026 22:06:17 +0200
Subject: [PATCH 107/109] gcc-plugins: Replace CONST_CAST with const_cast<>.

Excplicitly define CONST_CAST_TREE
For gcc-16, this was removed in gcc trunk

see commits
  c3d96ff9e916c02584aa081f03ab999292efbb50
  458c7926d48959abcb2c1adaa22458e27459a551

Link: https://www.spinics.net/lists/kernel/msg6111050.html
---
 scripts/gcc-plugins/gcc-common.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/scripts/gcc-plugins/gcc-common.h b/scripts/gcc-plugins/gcc-common.h
index 8f1b3500f8e2..0c69ec2b24e0 100644
--- a/scripts/gcc-plugins/gcc-common.h
+++ b/scripts/gcc-plugins/gcc-common.h
@@ -309,7 +309,12 @@ typedef const gimple *const_gimple_ptr;
 #define gimple gimple_ptr
 #define const_gimple const_gimple_ptr
 #undef CONST_CAST_GIMPLE
+#if BUILDING_GCC_VERSION >= 16000
+#define CONST_CAST_GIMPLE(X) const_cast<gimple>((X))
+#define CONST_CAST_TREE(X) const_cast<tree>((X))
+#else
 #define CONST_CAST_GIMPLE(X) CONST_CAST(gimple, (X))
+#endif
 
 /* gimple related */
 static inline gimple gimple_build_assign_with_ops(enum tree_code subcode, tree lhs, tree op1, tree op2 MEM_STAT_DECL)

From a06e870c3c37365a236533c67722886cd8b0bc58 Mon Sep 17 00:00:00 2001
From: Levente Polyak <levente@leventepolyak.net>
Date: Wed, 27 May 2026 02:57:58 +0200
Subject: [PATCH 108/109] Linux hardened v7.0.10-hardened1

Signed-off-by: Levente Polyak <levente@leventepolyak.net>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index a95f0b3d26bf..83746b79b2a3 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 7
 PATCHLEVEL = 0
 SUBLEVEL = 10
-EXTRAVERSION =
+EXTRAVERSION = -hardened1
 NAME = Baby Opossum Posse
 
 # *DOCUMENTATION*

From 46523197c312316a40f3bda055caf8005007c123 Mon Sep 17 00:00:00 2001
From: Nicolas Bouchinet <nicolas.bouchinet@ssi.gouv.fr>
Date: Tue, 2 Jun 2026 09:57:33 +0200
Subject: [PATCH 109/109] mm/slub: Add disable_canary kernel cmdline

Signed-off-by: Nicolas Bouchinet <nicolas.bouchinet@ssi.gouv.fr>
---
 mm/slub.c | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index 56819ec51838..b30682083290 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -944,6 +944,15 @@ static inline void print_canary_value(struct kmem_cache *s, void * object, unsig
 	      get_canary_value(canary, s->sheaf_random_inactive));
 }
 
+static bool disable_canary __ro_after_init = false;
+static int __init setup_disable_canary(char *str)
+{
+	disable_canary = true;
+	return 1;
+}
+__setup_param("disable_canary", disable_canary, setup_disable_canary, 0);
+__setup("disable_canary", setup_disable_canary);
+
 static bool canary_debug __ro_after_init = false;
 static int __init setup_canary_debug(char *str)
 {
@@ -966,7 +975,7 @@ static inline void check_canary(struct kmem_cache *s, void *object, unsigned lon
 
 static inline void check_set_canary(struct kmem_cache *s, void *object, unsigned long check_value, unsigned long set_value)
 {
-	if (object && !is_kfence_address(object)) {
+	if (likely(!disable_canary) && object && !is_kfence_address(object)) {
 		check_canary(s, object, check_value);
 		set_canary(s, object, set_value);
 	}
@@ -2715,7 +2724,7 @@ bool slab_free_hook(struct kmem_cache *s, void *x, bool init,
 	 * Postpone setting the inactive canary until the metadata
 	 * has potentially been cleared at the end of this function.
 	 */
-	if (canary) {
+	if (likely(!disable_canary) && canary) {
 		check_canary(s, x, s->random_active);
 	}
 
@@ -2802,7 +2811,7 @@ bool slab_free_hook(struct kmem_cache *s, void *x, bool init,
 			s->ctor(x);
 	}
 
-	if (canary) {
+	if (likely(!disable_canary) && canary) {
 		set_canary(s, x, s->random_inactive);
 	}
 
@@ -2873,7 +2882,9 @@ bool slab_free_freelist_hook(struct kmem_cache *s, void **head, void **tail,
 static void *setup_object(struct kmem_cache *s, void *object)
 {
 	setup_object_debug(s, object);
-	set_canary(s, object, s->random_inactive);
+	if (likely(!disable_canary) && object) {
+		set_canary(s, object, s->random_inactive);
+	}
 	object = kasan_init_slab_obj(s, object);
 	if (unlikely(s->ctor) && !has_sanitize_verify(s)) {
 		kasan_unpoison_new_object(s, object);
@@ -8326,7 +8337,7 @@ void __check_heap_object(const void *ptr, unsigned long n,
 		offset -= s->red_left_pad;
 	}
 
-	if (!is_kfence) {
+	if (likely(!disable_canary) && !is_kfence) {
 		check_canary(s, (void *)ptr - offset, s->random_active);
 	}