Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pkg/sentry/fsimpl/proc/task.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ func (fs *filesystem) newTaskInode(ctx context.Context, task *kernel.Task, pidns
"oom_score": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, newStaticFile("0\n")),
"oom_score_adj": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0644, &oomScoreAdj{task: task}),
"root": fs.newRootSymlink(ctx, task, fs.NextIno()),
"setgroups": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0644, &setgroupsData{task: task}),
"smaps": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &mmFile{task: task, ftype: smapsMMFile}),
"stat": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &taskStatData{task: task, pidns: pidns, tgstats: isThreadGroup}),
"statm": fs.newTaskOwnedInode(ctx, task, fs.NextIno(), 0444, &statmData{task: task}),
Expand Down
51 changes: 51 additions & 0 deletions pkg/sentry/fsimpl/proc/task_files.go
Original file line number Diff line number Diff line change
Expand Up @@ -414,6 +414,57 @@ func (d *idMapData) Write(ctx context.Context, _ *vfs.FileDescription, src userm
return int64(srclen), nil
}

// setgroupsData implements vfs.WritableDynamicBytesSource for
// /proc/[pid]/setgroups.
//
// +stateify savable
type setgroupsData struct {
kernfs.DynamicBytesFile

task *kernel.Task
}

var _ dynamicInode = (*setgroupsData)(nil)
var _ vfs.WritableDynamicBytesSource = (*setgroupsData)(nil)

// Generate implements vfs.WritableDynamicBytesSource.Generate.
func (d *setgroupsData) Generate(ctx context.Context, buf *bytes.Buffer) error {
if d.task.UserNamespace().SetgroupsAllowed() {
buf.WriteString("allow\n")
} else {
buf.WriteString("deny\n")
}
return nil
}

// Write implements vfs.WritableDynamicBytesSource.Write.
func (d *setgroupsData) Write(ctx context.Context, _ *vfs.FileDescription, src usermem.IOSequence, offset int64) (int64, error) {
srclen := src.NumBytes()
if srclen >= hostarch.PageSize || offset != 0 {
return 0, linuxerr.EINVAL
}
b := make([]byte, srclen)
if _, err := src.CopyIn(ctx, b); err != nil {
return 0, err
}
if nul := bytes.IndexByte(b, 0); nul >= 0 {
b = b[:nul]
}
switch string(bytes.TrimRight(b, " \t\n\v\f\r")) {
case "allow":
if err := d.task.UserNamespace().SetSetgroupsAllowed(ctx, true); err != nil {
return 0, err
}
case "deny":
if err := d.task.UserNamespace().SetSetgroupsAllowed(ctx, false); err != nil {
return 0, err
}
default:
return 0, linuxerr.EINVAL
}
return int64(srclen), nil
}

var _ kernfs.Inode = (*memInode)(nil)

// memInode implements kernfs.Inode for /proc/[pid]/mem.
Expand Down
1 change: 1 addition & 0 deletions pkg/sentry/fsimpl/proc/tasks_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ var (
"oom_score": linux.DT_REG,
"oom_score_adj": linux.DT_REG,
"root": linux.DT_LNK,
"setgroups": linux.DT_REG,
"smaps": linux.DT_REG,
"stat": linux.DT_REG,
"statm": linux.DT_REG,
Expand Down
7 changes: 4 additions & 3 deletions pkg/sentry/kernel/auth/id_map.go
Original file line number Diff line number Diff line change
Expand Up @@ -251,9 +251,10 @@ func (ns *UserNamespace) SetGIDMap(ctx context.Context, entries []IDMapEntry) er
}
// "In the case of gid_map, use of the setgroups(2) system call must
// first be denied by writing "deny" to the /proc/[pid]/setgroups file
// (see below) before writing to gid_map." (This file isn't implemented
// in the version of Linux we're emulating; see comment in
// UserNamespace.)
// (see below) before writing to gid_map."
if ns.setgroupsAllowed {
return linuxerr.EPERM
}
}
if err := ns.trySetGIDMap(entries); err != nil {
ns.gidMapFromParent.RemoveAll()
Expand Down
44 changes: 43 additions & 1 deletion pkg/sentry/kernel/auth/user_namespace.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"math"

"gvisor.dev/gvisor/pkg/abi/linux"
"gvisor.dev/gvisor/pkg/context"
"gvisor.dev/gvisor/pkg/errors/linuxerr"
)

Expand Down Expand Up @@ -57,7 +58,8 @@ type UserNamespace struct {
// user_namespace.parent_could_setfcap in Linux.
parentHadSetfcap bool

// TODO(b/27454212): Support disabling setgroups(2).
// setgroupsAllowed mirrors USERNS_SETGROUPS_ALLOWED in Linux. Protected by mu.
setgroupsAllowed bool
}

// NewRootUserNamespace returns a UserNamespace that is appropriate for a
Expand All @@ -67,6 +69,7 @@ type UserNamespace struct {
// namespace.
func NewRootUserNamespace() *UserNamespace {
var ns UserNamespace
ns.setgroupsAllowed = true
// """
// The initial user namespace has no parent namespace, but, for
// consistency, the kernel provides dummy user and group ID mapping files
Expand Down Expand Up @@ -129,12 +132,51 @@ func (c *Credentials) NewChildUserNamespace() (*UserNamespace, error) {
if !c.EffectiveKGID.In(c.UserNamespace).Ok() {
return nil, linuxerr.EPERM
}
c.UserNamespace.mu.Lock()
parentSetgroupsAllowed := c.UserNamespace.setgroupsAllowed
c.UserNamespace.mu.Unlock()
return &UserNamespace{
parent: c.UserNamespace,
owner: c.EffectiveKUID,
parentHadSetfcap: c.HasSelfCapability(linux.CAP_SETFCAP),
setgroupsAllowed: parentSetgroupsAllowed,
// "When a user namespace is created, it starts without a mapping of
// user IDs (group IDs) to the parent user namespace." -
// user_namespaces(7)
}, nil
}

// SetgroupsAllowed returns ns's USERNS_SETGROUPS_ALLOWED bit.
func (ns *UserNamespace) SetgroupsAllowed() bool {
ns.mu.Lock()
defer ns.mu.Unlock()
return ns.setgroupsAllowed
}

// MaySetgroups mirrors userns_may_setgroups in Linux.
func (ns *UserNamespace) MaySetgroups() bool {
ns.mu.Lock()
defer ns.mu.Unlock()
return !ns.gidMapFromParent.IsEmpty() && ns.setgroupsAllowed
}

// SetSetgroupsAllowed mirrors proc_setgroups_write in Linux.
func (ns *UserNamespace) SetSetgroupsAllowed(ctx context.Context, allow bool) error {
c := CredentialsFromContext(ctx)
if !c.HasCapabilityIn(linux.CAP_SYS_ADMIN, ns) {
return linuxerr.EPERM
}
ns.mu.Lock()
defer ns.mu.Unlock()
if allow {
if !ns.setgroupsAllowed {
return linuxerr.EPERM
}
return nil
}
if !ns.gidMapFromParent.IsEmpty() {
return linuxerr.EPERM
}
ns.setgroupsAllowed = false
return nil
}
3 changes: 3 additions & 0 deletions pkg/sentry/syscalls/linux/sys_identity.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,9 @@ func Getgroups(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintp

// Setgroups implements the Linux syscall setgroups.
func Setgroups(t *kernel.Task, sysno uintptr, args arch.SyscallArguments) (uintptr, *kernel.SyscallControl, error) {
if !t.UserNamespace().MaySetgroups() {
return 0, nil, linuxerr.EPERM
}
size := args[0].Int()
if size < 0 || size > maxNGroups {
return 0, nil, linuxerr.EINVAL
Expand Down
123 changes: 123 additions & 0 deletions test/syscalls/linux/proc_pid_uid_gid_map.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
// limitations under the License.

#include <fcntl.h>
#include <grp.h>
#include <sched.h>
#include <sys/stat.h>
#include <sys/types.h>
Expand Down Expand Up @@ -309,5 +310,127 @@ INSTANTIATE_TEST_SUITE_P(All, ProcPidUidGidMapTest,
::testing::ValuesIn(UidGidMapTestParams()),
DescribeTestParam);

TEST(ProcSelfSetgroupsTest, ExistsAndAllowsByDefault) {
SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanCreateUserNamespace()));
EXPECT_THAT(InNewUserNamespace([] {
int fd = open("/proc/self/setgroups", O_RDONLY);
TEST_PCHECK(fd >= 0);
char buf[16] = {};
ssize_t n = read(fd, buf, sizeof(buf) - 1);
TEST_PCHECK(n > 0);
TEST_CHECK(std::string(buf, n) == "allow\n");
TEST_PCHECK(close(fd) == 0);
}),
IsPosixErrorOkAndHolds(0));
}

TEST(ProcSelfSetgroupsTest, DenyTogglesReadback) {
SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanCreateUserNamespace()));
EXPECT_THAT(InNewUserNamespace([] {
int wfd = open("/proc/self/setgroups", O_WRONLY);
TEST_PCHECK(wfd >= 0);
TEST_PCHECK(write(wfd, "deny", 4) == 4);
TEST_PCHECK(close(wfd) == 0);
int rfd = open("/proc/self/setgroups", O_RDONLY);
TEST_PCHECK(rfd >= 0);
char buf[16] = {};
ssize_t n = read(rfd, buf, sizeof(buf) - 1);
TEST_PCHECK(n > 0);
TEST_CHECK(std::string(buf, n) == "deny\n");
TEST_PCHECK(close(rfd) == 0);
}),
IsPosixErrorOkAndHolds(0));
}

TEST(ProcSelfSetgroupsTest, AllowAfterDenyFails) {
SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanCreateUserNamespace()));
// Linux only accepts writes at offset 0, so re-test with a fresh fd.
EXPECT_THAT(InNewUserNamespace([] {
int fd = open("/proc/self/setgroups", O_WRONLY);
TEST_PCHECK(fd >= 0);
TEST_PCHECK(write(fd, "deny", 4) == 4);
TEST_PCHECK(close(fd) == 0);
fd = open("/proc/self/setgroups", O_WRONLY);
TEST_PCHECK(fd >= 0);
TEST_PCHECK(write(fd, "allow", 5) < 0);
TEST_CHECK(errno == EPERM);
TEST_PCHECK(close(fd) == 0);
}),
IsPosixErrorOkAndHolds(0));
}

TEST(ProcSelfSetgroupsTest, BadValueReturnsEINVAL) {
SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanCreateUserNamespace()));
EXPECT_THAT(InNewUserNamespace([] {
int fd = open("/proc/self/setgroups", O_WRONLY);
TEST_PCHECK(fd >= 0);
TEST_PCHECK(write(fd, "maybe", 5) < 0);
TEST_CHECK(errno == EINVAL);
TEST_PCHECK(close(fd) == 0);
}),
IsPosixErrorOkAndHolds(0));
}

TEST(ProcSelfSetgroupsTest, SetgroupsSyscallFailsAfterDeny) {
SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanCreateUserNamespace()));
EXPECT_THAT(InNewUserNamespace([] {
DenySelfSetgroups();
TEST_PCHECK(setgroups(0, nullptr) < 0);
TEST_CHECK(errno == EPERM);
gid_t one_gid = 0;
TEST_PCHECK(setgroups(1, &one_gid) < 0);
TEST_CHECK(errno == EPERM);
}),
IsPosixErrorOkAndHolds(0));
}

TEST(ProcSelfSetgroupsTest, SetgroupsSyscallFailsBeforeGidMap) {
SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanCreateUserNamespace()));
EXPECT_THAT(InNewUserNamespace([] {
TEST_PCHECK(setgroups(0, nullptr) < 0);
TEST_CHECK(errno == EPERM);
gid_t one_gid = 0;
TEST_PCHECK(setgroups(1, &one_gid) < 0);
TEST_CHECK(errno == EPERM);
}),
IsPosixErrorOkAndHolds(0));
}

TEST(ProcSelfSetgroupsTest, ChildUserNamespaceInheritsDeny) {
SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanCreateUserNamespace()));
EXPECT_THAT(InNewUserNamespace([] {
int wfd = open("/proc/self/setgroups", O_WRONLY);
TEST_PCHECK(wfd >= 0);
TEST_PCHECK(write(wfd, "deny", 4) == 4);
TEST_PCHECK(close(wfd) == 0);
TEST_PCHECK(unshare(CLONE_NEWUSER) == 0);
int rfd = open("/proc/self/setgroups", O_RDONLY);
TEST_PCHECK(rfd >= 0);
char buf[16] = {};
ssize_t n = read(rfd, buf, sizeof(buf) - 1);
TEST_PCHECK(n > 0);
TEST_CHECK(std::string(buf, n) == "deny\n");
TEST_PCHECK(close(rfd) == 0);
}),
IsPosixErrorOkAndHolds(0));
}

TEST(ProcSelfSetgroupsTest, DenyAfterGidMapFails) {
SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(CanCreateUserNamespace()));
SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_SETGID)));
pid_t child_pid;
Cleanup cleanup_child;
std::tie(child_pid, cleanup_child) =
ASSERT_NO_ERRNO_AND_VALUE(CreateProcessInNewUserNamespace());
std::string line = absl::StrCat(getgid(), " ", getgid(), " 1");
auto map_fd = ASSERT_NO_ERRNO_AND_VALUE(
Open(absl::StrCat("/proc/", child_pid, "/gid_map"), O_RDWR));
ASSERT_THAT(write(map_fd.get(), line.c_str(), line.size()),
SyscallSucceedsWithValue(line.size()));
auto sg_fd = ASSERT_NO_ERRNO_AND_VALUE(
Open(absl::StrCat("/proc/", child_pid, "/setgroups"), O_WRONLY));
EXPECT_THAT(write(sg_fd.get(), "deny", 4), SyscallFailsWithErrno(EPERM));
}

} // namespace testing
} // namespace gvisor