diff --git a/pkg/sentry/fsimpl/proc/task.go b/pkg/sentry/fsimpl/proc/task.go index 5957ea4a09..6e8ef648ef 100644 --- a/pkg/sentry/fsimpl/proc/task.go +++ b/pkg/sentry/fsimpl/proc/task.go @@ -86,6 +86,7 @@ func (fs *filesystem) newTaskInode(ctx context.Context, task *kernel.Task, pidns "oom_score": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, newStaticFile("0\n")), "oom_score_adj": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0644, &oomScoreAdj{task: task}), "root": fs.newRootSymlink(ctx, task, fs.NextIno()), + "setgroups": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0644, &setgroupsData{task: task}), "smaps": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &mmFile{task: task, ftype: smapsMMFile}), "stat": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &taskStatData{task: task, pidns: pidns, tgstats: isThreadGroup}), "statm": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &statmData{task: task}), diff --git a/pkg/sentry/fsimpl/proc/task_files.go b/pkg/sentry/fsimpl/proc/task_files.go index a1a1a7da4c..8c4cff84aa 100644 --- a/pkg/sentry/fsimpl/proc/task_files.go +++ b/pkg/sentry/fsimpl/proc/task_files.go @@ -414,6 +414,57 @@ func (d *idMapData) Write(ctx context.Context, _ *vfs.FileDescription, src userm return int64(srclen), nil } +// setgroupsData implements vfs.WritableDynamicBytesSource for +// /proc/[pid]/setgroups. +// +// +stateify savable +type setgroupsData struct { + kernfs.DynamicBytesFile + + task *kernel.Task +} + +var _ dynamicInode = (*setgroupsData)(nil) +var _ vfs.WritableDynamicBytesSource = (*setgroupsData)(nil) + +// Generate implements vfs.WritableDynamicBytesSource.Generate. +func (d *setgroupsData) Generate(ctx context.Context, buf *bytes.Buffer) error { + if d.task.UserNamespace().SetgroupsAllowed() { + buf.WriteString("allow\n") + } else { + buf.WriteString("deny\n") + } + return nil +} + +// Write implements vfs.WritableDynamicBytesSource.Write. +func (d *setgroupsData) Write(ctx context.Context, _ *vfs.FileDescription, src usermem.IOSequence, offset int64) (int64, error) { + srclen := src.NumBytes() + if srclen >= hostarch.PageSize || offset != 0 { + return 0, linuxerr.EINVAL + } + b := make([]byte, srclen) + if _, err := src.CopyIn(ctx, b); err != nil { + return 0, err + } + if nul := bytes.IndexByte(b, 0); nul >= 0 { + b = b[:nul] + } + switch string(bytes.TrimRight(b, " \t\n\v\f\r")) { + case "allow": + if err := d.task.UserNamespace().SetSetgroupsAllowed(ctx, true); err != nil { + return 0, err + } + case "deny": + if err := d.task.UserNamespace().SetSetgroupsAllowed(ctx, false); err != nil { + return 0, err + } + default: + return 0, linuxerr.EINVAL + } + return int64(srclen), nil +} + var _ kernfs.Inode = (*memInode)(nil) // memInode implements kernfs.Inode for /proc/[pid]/mem. diff --git a/pkg/sentry/fsimpl/proc/tasks_test.go b/pkg/sentry/fsimpl/proc/tasks_test.go index ece77e5aec..fc96bb815f 100644 --- a/pkg/sentry/fsimpl/proc/tasks_test.go +++ b/pkg/sentry/fsimpl/proc/tasks_test.go @@ -94,6 +94,7 @@ var ( "oom_score": linux.DT_REG, "oom_score_adj": linux.DT_REG, "root": linux.DT_LNK, + "setgroups": linux.DT_REG, "smaps": linux.DT_REG, "stat": linux.DT_REG, "statm": linux.DT_REG, diff --git a/pkg/sentry/kernel/auth/id_map.go b/pkg/sentry/kernel/auth/id_map.go index 718a3a63c4..42af94c166 100644 --- a/pkg/sentry/kernel/auth/id_map.go +++ b/pkg/sentry/kernel/auth/id_map.go @@ -251,9 +251,10 @@ func (ns *UserNamespace) SetGIDMap(ctx context.Context, entries []IDMapEntry) er } // "In the case of gid_map, use of the setgroups(2) system call must // first be denied by writing "deny" to the /proc/[pid]/setgroups file - // (see below) before writing to gid_map." (This file isn't implemented - // in the version of Linux we're emulating; see comment in - // UserNamespace.) + // (see below) before writing to gid_map." + if ns.setgroupsAllowed { + return linuxerr.EPERM + } } if err := ns.trySetGIDMap(entries); err != nil { ns.gidMapFromParent.RemoveAll() diff --git a/pkg/sentry/kernel/auth/user_namespace.go b/pkg/sentry/kernel/auth/user_namespace.go index cdf95b8b62..69fa7d7d46 100644 --- a/pkg/sentry/kernel/auth/user_namespace.go +++ b/pkg/sentry/kernel/auth/user_namespace.go @@ -18,6 +18,7 @@ import ( "math" "gvisor.dev/gvisor/pkg/abi/linux" + "gvisor.dev/gvisor/pkg/context" "gvisor.dev/gvisor/pkg/errors/linuxerr" ) @@ -57,7 +58,8 @@ type UserNamespace struct { // user_namespace.parent_could_setfcap in Linux. parentHadSetfcap bool - // TODO(b/27454212): Support disabling setgroups(2). + // setgroupsAllowed mirrors USERNS_SETGROUPS_ALLOWED in Linux. Protected by mu. + setgroupsAllowed bool } // NewRootUserNamespace returns a UserNamespace that is appropriate for a @@ -67,6 +69,7 @@ type UserNamespace struct { // namespace. func NewRootUserNamespace() *UserNamespace { var ns UserNamespace + ns.setgroupsAllowed = true // """ // The initial user namespace has no parent namespace, but, for // consistency, the kernel provides dummy user and group ID mapping files @@ -129,12 +132,51 @@ func (c *Credentials) NewChildUserNamespace() (*UserNamespace, error) { if !c.EffectiveKGID.In(c.UserNamespace).Ok() { return nil, linuxerr.EPERM } + c.UserNamespace.mu.Lock() + parentSetgroupsAllowed := c.UserNamespace.setgroupsAllowed + c.UserNamespace.mu.Unlock() return &UserNamespace{ parent: c.UserNamespace, owner: c.EffectiveKUID, parentHadSetfcap: c.HasSelfCapability(linux.CAP_SETFCAP), + setgroupsAllowed: parentSetgroupsAllowed, // "When a user namespace is created, it starts without a mapping of // user IDs (group IDs) to the parent user namespace." - // user_namespaces(7) }, nil } + +// SetgroupsAllowed returns ns's USERNS_SETGROUPS_ALLOWED bit. +func (ns *UserNamespace) SetgroupsAllowed() bool { + ns.mu.Lock() + defer ns.mu.Unlock() + return ns.setgroupsAllowed +} + +// MaySetgroups mirrors userns_may_setgroups in Linux. +func (ns *UserNamespace) MaySetgroups() bool { + ns.mu.Lock() + defer ns.mu.Unlock() + return !ns.gidMapFromParent.IsEmpty() && ns.setgroupsAllowed +} + +// SetSetgroupsAllowed mirrors proc_setgroups_write in Linux. +func (ns *UserNamespace) SetSetgroupsAllowed(ctx context.Context, allow bool) error { + c := CredentialsFromContext(ctx) + if !c.HasCapabilityIn(linux.CAP_SYS_ADMIN, ns) { + return linuxerr.EPERM + } + ns.mu.Lock() + defer ns.mu.Unlock() + if allow { + if !ns.setgroupsAllowed { + return linuxerr.EPERM + } + return nil + } + if !ns.gidMapFromParent.IsEmpty() { + return linuxerr.EPERM + } + ns.setgroupsAllowed = false + return nil +} diff --git a/pkg/sentry/syscalls/linux/sys_identity.go b/pkg/sentry/syscalls/linux/sys_identity.go index fc49f573a7..b3af5cea1d 100644 --- a/pkg/sentry/syscalls/linux/sys_identity.go +++ b/pkg/sentry/syscalls/linux/sys_identity.go @@ -165,6 +165,9 @@ func Getgroups(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintp // Setgroups implements the Linux syscall setgroups. func Setgroups(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) { + if !t.UserNamespace().MaySetgroups() { + return 0, nil, linuxerr.EPERM + } size := args[0].Int() if size < 0 || size > maxNGroups { return 0, nil, linuxerr.EINVAL diff --git a/test/syscalls/linux/proc_pid_uid_gid_map.cc b/test/syscalls/linux/proc_pid_uid_gid_map.cc index 8d3bfa29f8..7c76db4508 100644 --- a/test/syscalls/linux/proc_pid_uid_gid_map.cc +++ b/test/syscalls/linux/proc_pid_uid_gid_map.cc @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include #include #include @@ -116,6 +117,23 @@ void DenyPidSetgroups(pid_t pid) { DenySetgroupsByPath(absl::StrCat("/proc/", pid, "/setgroups").c_str()); } +// TEST_CHECK-fails on error, since this function is used in contexts that +// require async-signal-safety. +void WriteFileByPath(const char* path, const std::string& contents) { + int fd = open(path, O_WRONLY); + TEST_PCHECK(fd >= 0); + MaybeSave(); + TEST_PCHECK(write(fd, contents.data(), contents.size()) == + static_cast(contents.size())); + MaybeSave(); + TEST_PCHECK(close(fd) == 0); +} + +void WriteSelfIDMaps(uint32_t uid, uint32_t gid) { + WriteFileByPath("/proc/self/uid_map", absl::StrCat(uid, " ", uid, " 1")); + WriteFileByPath("/proc/self/gid_map", absl::StrCat(gid, " ", gid, " 1")); +} + // Returns a valid UID/GID that isn't id. uint32_t another_id(uint32_t id) { return (id + 1) % 65535; } @@ -309,5 +327,133 @@ INSTANTIATE_TEST_SUITE_P(All, ProcPidUidGidMapTest, ::testing::ValuesIn(UidGidMapTestParams()), DescribeTestParam); +TEST(ProcSelfSetgroupsTest, ExistsAndInheritsParentState) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanCreateUserNamespace())); + auto parent_setgroups = GetContents("/proc/self/setgroups"); + SKIP_IF(parent_setgroups.error().errno_value() == ENOENT); + std::string expected = ASSERT_NO_ERRNO_AND_VALUE(parent_setgroups); + EXPECT_THAT(InNewUserNamespace([&] { + int fd = open("/proc/self/setgroups", O_RDONLY); + TEST_PCHECK(fd >= 0); + char buf[16] = {}; + ssize_t n = read(fd, buf, sizeof(buf) - 1); + TEST_PCHECK(n > 0); + TEST_CHECK(std::string(buf, n) == expected); + TEST_PCHECK(close(fd) == 0); + }), + IsPosixErrorOkAndHolds(0)); +} + +TEST(ProcSelfSetgroupsTest, DenyTogglesReadback) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanCreateUserNamespace())); + EXPECT_THAT(InNewUserNamespace([] { + int wfd = open("/proc/self/setgroups", O_WRONLY); + TEST_PCHECK(wfd >= 0); + TEST_PCHECK(write(wfd, "deny", 4) == 4); + TEST_PCHECK(close(wfd) == 0); + int rfd = open("/proc/self/setgroups", O_RDONLY); + TEST_PCHECK(rfd >= 0); + char buf[16] = {}; + ssize_t n = read(rfd, buf, sizeof(buf) - 1); + TEST_PCHECK(n > 0); + TEST_CHECK(std::string(buf, n) == "deny\n"); + TEST_PCHECK(close(rfd) == 0); + }), + IsPosixErrorOkAndHolds(0)); +} + +TEST(ProcSelfSetgroupsTest, AllowAfterDenyFails) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanCreateUserNamespace())); + // Linux only accepts writes at offset 0, so re-test with a fresh fd. + EXPECT_THAT(InNewUserNamespace([] { + int fd = open("/proc/self/setgroups", O_WRONLY); + TEST_PCHECK(fd >= 0); + TEST_PCHECK(write(fd, "deny", 4) == 4); + TEST_PCHECK(close(fd) == 0); + fd = open("/proc/self/setgroups", O_WRONLY); + TEST_PCHECK(fd >= 0); + TEST_PCHECK(write(fd, "allow", 5) < 0); + TEST_CHECK(errno == EPERM); + TEST_PCHECK(close(fd) == 0); + }), + IsPosixErrorOkAndHolds(0)); +} + +TEST(ProcSelfSetgroupsTest, BadValueReturnsEINVAL) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanCreateUserNamespace())); + EXPECT_THAT(InNewUserNamespace([] { + int fd = open("/proc/self/setgroups", O_WRONLY); + TEST_PCHECK(fd >= 0); + TEST_PCHECK(write(fd, "maybe", 5) < 0); + TEST_CHECK(errno == EINVAL); + TEST_PCHECK(close(fd) == 0); + }), + IsPosixErrorOkAndHolds(0)); +} + +TEST(ProcSelfSetgroupsTest, SetgroupsSyscallFailsAfterDeny) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanCreateUserNamespace())); + EXPECT_THAT(InNewUserNamespace([] { + DenySelfSetgroups(); + TEST_PCHECK(setgroups(0, nullptr) < 0); + TEST_CHECK(errno == EPERM); + gid_t one_gid = 0; + TEST_PCHECK(setgroups(1, &one_gid) < 0); + TEST_CHECK(errno == EPERM); + }), + IsPosixErrorOkAndHolds(0)); +} + +TEST(ProcSelfSetgroupsTest, SetgroupsSyscallFailsBeforeGidMap) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanCreateUserNamespace())); + EXPECT_THAT(InNewUserNamespace([] { + TEST_PCHECK(setgroups(0, nullptr) < 0); + TEST_CHECK(errno == EPERM); + gid_t one_gid = 0; + TEST_PCHECK(setgroups(1, &one_gid) < 0); + TEST_CHECK(errno == EPERM); + }), + IsPosixErrorOkAndHolds(0)); +} + +TEST(ProcSelfSetgroupsTest, ChildUserNamespaceInheritsDeny) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanCreateUserNamespace())); + uint32_t uid = getuid(); + uint32_t gid = getgid(); + EXPECT_THAT(InNewUserNamespace([=] { + int wfd = open("/proc/self/setgroups", O_WRONLY); + TEST_PCHECK(wfd >= 0); + TEST_PCHECK(write(wfd, "deny", 4) == 4); + TEST_PCHECK(close(wfd) == 0); + WriteSelfIDMaps(uid, gid); + TEST_PCHECK(unshare(CLONE_NEWUSER) == 0); + int rfd = open("/proc/self/setgroups", O_RDONLY); + TEST_PCHECK(rfd >= 0); + char buf[16] = {}; + ssize_t n = read(rfd, buf, sizeof(buf) - 1); + TEST_PCHECK(n > 0); + TEST_CHECK(std::string(buf, n) == "deny\n"); + TEST_PCHECK(close(rfd) == 0); + }), + IsPosixErrorOkAndHolds(0)); +} + +TEST(ProcSelfSetgroupsTest, DenyAfterGidMapFails) { + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanCreateUserNamespace())); + SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SETGID))); + pid_t child_pid; + Cleanup cleanup_child; + std::tie(child_pid, cleanup_child) = + ASSERT_NO_ERRNO_AND_VALUE(CreateProcessInNewUserNamespace()); + std::string line = absl::StrCat(getgid(), " ", getgid(), " 1"); + auto map_fd = ASSERT_NO_ERRNO_AND_VALUE( + Open(absl::StrCat("/proc/", child_pid, "/gid_map"), O_RDWR)); + ASSERT_THAT(write(map_fd.get(), line.c_str(), line.size()), + SyscallSucceedsWithValue(line.size())); + auto sg_fd = ASSERT_NO_ERRNO_AND_VALUE( + Open(absl::StrCat("/proc/", child_pid, "/setgroups"), O_WRONLY)); + EXPECT_THAT(write(sg_fd.get(), "deny", 4), SyscallFailsWithErrno(EPERM)); +} + } // namespace testing } // namespace gvisor