diff --git a/Makefile b/Makefile index 7191fba3f..3198b332e 100644 --- a/Makefile +++ b/Makefile @@ -83,7 +83,8 @@ PLUGINS ?= \ nri-memory-policy \ nri-memory-qos \ nri-memtierd \ - nri-sgx-epc + nri-sgx-epc \ + nri-resctrl-mon BINARIES ?= \ config-manager \ diff --git a/cmd/plugins/resctrl-mon/Dockerfile b/cmd/plugins/resctrl-mon/Dockerfile new file mode 100644 index 000000000..c833ca203 --- /dev/null +++ b/cmd/plugins/resctrl-mon/Dockerfile @@ -0,0 +1,40 @@ +ARG GO_VERSION=1.26 + +FROM golang:${GO_VERSION}-bookworm AS builder + +ARG IMAGE_VERSION +ARG BUILD_VERSION +ARG BUILD_BUILDID +ARG DEBUG=0 +ARG NORACE=0 +ARG SKIP_LICENSES=0 + +WORKDIR /go/builder + +# Fetch go dependencies in a separate layer for caching +COPY go.mod go.sum . +COPY pkg/topology/ pkg/topology/ +RUN --mount=type=cache,target=/go/pkg/mod/ go mod download + +# Build nri-resctrl-mon +COPY . . + +RUN --mount=type=cache,target=/go/pkg/mod/ \ + --mount=type=cache,target="/root/.cache/go-build" \ + make IMAGE_VERSION=${IMAGE_VERSION} \ + BUILD_VERSION=${BUILD_VERSION} \ + BUILD_BUILDID=${BUILD_BUILDID} \ + DEBUG=$DEBUG \ + NORACE=$NORACE \ + OTHER_IMAGE_TARGETS="" \ + BINARIES="" \ + PLUGINS=nri-resctrl-mon \ + clean install-go-licenses build-plugins-static licenses + +FROM gcr.io/distroless/static + +COPY --from=builder /go/builder/build/bin/nri-resctrl-mon /bin/nri-resctrl-mon +COPY --from=builder /go/builder/build/licenses/nri-resctrl-mon/ /licenses/nri-resctrl-mon/ +COPY --from=builder /go/builder/sample-configs/nri-resctrl-mon.yaml /etc/nri/resctrl-mon/config.yaml + +ENTRYPOINT ["/bin/nri-resctrl-mon", "-idx", "90", "-config", "/etc/nri/resctrl-mon/config.yaml"] diff --git a/cmd/plugins/resctrl-mon/main.go b/cmd/plugins/resctrl-mon/main.go new file mode 100644 index 000000000..eedf56599 --- /dev/null +++ b/cmd/plugins/resctrl-mon/main.go @@ -0,0 +1,90 @@ +// Copyright The NRI Plugins Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "context" + "flag" + "os" + + "github.com/containerd/nri/pkg/stub" + "github.com/sirupsen/logrus" +) + +var ( + log *logrus.Logger +) + +func main() { + var ( + pluginName string + pluginIdx string + configFile string + verbose bool + veryVerbose bool + err error + ) + + log = logrus.StandardLogger() + log.SetFormatter(&logrus.TextFormatter{ + PadLevelText: true, + }) + + flag.StringVar(&pluginName, "name", "", "plugin name to register to NRI") + flag.StringVar(&pluginIdx, "idx", "", "plugin index to register to NRI") + flag.StringVar(&configFile, "config", "", "configuration file name") + flag.BoolVar(&verbose, "v", false, "verbose output") + flag.BoolVar(&veryVerbose, "vv", false, "very verbose output") + flag.Parse() + + if verbose { + log.SetLevel(logrus.DebugLevel) + } + if veryVerbose { + log.SetLevel(logrus.TraceLevel) + } + + p := newPlugin() + + if configFile != "" { + log.Debugf("reading configuration from %q", configFile) + data, err := os.ReadFile(configFile) + if err != nil { + log.Fatalf("error reading configuration file %q: %s", configFile, err) + } + if err = p.setConfig(data); err != nil { + log.Fatalf("error applying configuration from file %q: %s", configFile, err) + } + } + + opts := []stub.Option{ + stub.WithOnClose(p.onClose), + } + if pluginName != "" { + opts = append(opts, stub.WithPluginName(pluginName)) + } + if pluginIdx != "" { + opts = append(opts, stub.WithPluginIdx(pluginIdx)) + } + + if p.stub, err = stub.New(p, opts...); err != nil { + log.Fatalf("failed to create plugin stub: %v", err) + } + + if err = p.stub.Run(context.Background()); err != nil { + log.Errorf("plugin exited (%v)", err) + os.Exit(1) + } +} diff --git a/cmd/plugins/resctrl-mon/plugin.go b/cmd/plugins/resctrl-mon/plugin.go new file mode 100644 index 000000000..3d171c1bc --- /dev/null +++ b/cmd/plugins/resctrl-mon/plugin.go @@ -0,0 +1,435 @@ +// Copyright The NRI Plugins Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "context" + "fmt" + "os" + "path/filepath" + "strings" + "time" + + "sigs.k8s.io/yaml" + + "github.com/containerd/nri/pkg/api" + "github.com/containerd/nri/pkg/stub" +) + +// plugin implements the NRI plugin interface for resctrl monitoring groups. +type plugin struct { + stub stub.Stub + config *pluginConfig + state *podState + rdt *resctrlOps + snapshots *snapshotStore +} + +const ( + defaultSnapshotDir = "/run/nri-resctrl-mon" + defaultSnapshotTTL = 5 * time.Minute +) + +// pluginConfig holds the runtime configuration for the plugin. +type pluginConfig struct { + // ResctrlPath is the mount point of the resctrl filesystem. + ResctrlPath string `json:"resctrlPath"` + + // Namespaces filters mon_group creation to pods in these namespaces. + // Empty list means all namespaces. + Namespaces []string `json:"namespaces"` + + // LabelSelector filters mon_group creation to pods matching these labels. + // Empty map means all pods. + LabelSelector map[string]string `json:"labelSelector"` + + // SnapshotDir is the directory for counter snapshot files. + // Default: /run/nri-resctrl-mon + SnapshotDir string `json:"snapshotDir"` + + // SnapshotTTL is how long completed snapshots are kept before pruning. + // Parsed as a Go duration string (e.g., "5m", "1h"). + // Default: 5m + SnapshotTTL string `json:"snapshotTTL"` +} + +func newPlugin() *plugin { + cfg := &pluginConfig{ + ResctrlPath: defaultResctrlPath, + SnapshotDir: defaultSnapshotDir, + SnapshotTTL: defaultSnapshotTTL.String(), + } + snap, err := newSnapshotStore(cfg.SnapshotDir, defaultSnapshotTTL) + if err != nil { + log.Warnf("failed to create snapshot store at %s: %v (snapshots disabled)", cfg.SnapshotDir, err) + } + return &plugin{ + config: cfg, + state: newPodState(), + rdt: newResctrlOps(cfg.ResctrlPath), + snapshots: snap, + } +} + +// Configure handles connecting to container runtime's NRI server. +func (p *plugin) Configure(ctx context.Context, config, runtime, version string) (stub.EventMask, error) { + log.Infof("Connected to %s %s...", runtime, version) + if config != "" { + log.Debugf("loading configuration from NRI server") + if err := p.setConfig([]byte(config)); err != nil { + return 0, err + } + } + return 0, nil +} + +// onClose handles losing connection to container runtime. +func (p *plugin) onClose() { + log.Infof("Connection to the runtime lost, exiting...") + os.Exit(0) +} + +// setConfig applies new plugin configuration. +func (p *plugin) setConfig(data []byte) error { + log.Tracef("setConfig: parsing\n---8<---\n%s\n--->8---", data) + cfg := pluginConfig{ + ResctrlPath: defaultResctrlPath, + SnapshotDir: defaultSnapshotDir, + SnapshotTTL: defaultSnapshotTTL.String(), + } + if err := yaml.Unmarshal(data, &cfg); err != nil { + return fmt.Errorf("setConfig: cannot parse configuration: %w", err) + } + resctrlPath := filepath.Clean(cfg.ResctrlPath) + if resctrlPath == "" || !filepath.IsAbs(resctrlPath) { + return fmt.Errorf("setConfig: resctrlPath must be an absolute path, got %q", cfg.ResctrlPath) + } + cfg.ResctrlPath = resctrlPath + + snapshotDir := filepath.Clean(cfg.SnapshotDir) + if snapshotDir == "" || !filepath.IsAbs(snapshotDir) { + return fmt.Errorf("setConfig: snapshotDir must be an absolute path, got %q", cfg.SnapshotDir) + } + // Prevent writing snapshots into the resctrl filesystem or sensitive system dirs. + for _, forbidden := range []string{resctrlPath, "/sys", "/proc", "/dev", "/etc"} { + if snapshotDir == forbidden || strings.HasPrefix(snapshotDir, forbidden+"/") { + return fmt.Errorf("setConfig: snapshotDir must not be under %s, got %q", forbidden, snapshotDir) + } + } + cfg.SnapshotDir = snapshotDir + + ttl, err := time.ParseDuration(cfg.SnapshotTTL) + if err != nil { + return fmt.Errorf("setConfig: invalid snapshotTTL %q: %w", cfg.SnapshotTTL, err) + } + + p.config = &cfg + p.rdt = newResctrlOps(cfg.ResctrlPath) + + snap, err := newSnapshotStore(cfg.SnapshotDir, ttl) + if err != nil { + log.Warnf("setConfig: failed to create snapshot store at %s: %v (snapshots disabled)", cfg.SnapshotDir, err) + p.snapshots = nil + } else { + p.snapshots = snap + } + + log.Debugf("configuration: resctrlPath=%s namespaces=%v labelSelector=%v snapshotDir=%s snapshotTTL=%s", + cfg.ResctrlPath, cfg.Namespaces, cfg.LabelSelector, cfg.SnapshotDir, cfg.SnapshotTTL) + return nil +} + +// Synchronize is called at plugin startup with the current set of pods and containers. +// It reconciles in-memory state with what exists on the resctrl filesystem. +func (p *plugin) Synchronize(ctx context.Context, pods []*api.PodSandbox, containers []*api.Container) ([]*api.ContainerUpdate, error) { + log.Infof("synchronizing state: %d pods, %d containers", len(pods), len(containers)) + + // Build a lookup from sandbox ID to pod (containers reference + // pods by sandbox ID, not by Kubernetes UID). + podBySandboxID := make(map[string]*api.PodSandbox, len(pods)) + for _, pod := range pods { + podBySandboxID[pod.GetId()] = pod + } + + // Create mon_groups for running containers that don't have one, + // and write their PIDs to ensure monitoring is active after restart. + for _, ctr := range containers { + pod, ok := podBySandboxID[ctr.GetPodSandboxId()] + if !ok { + log.Debugf("Synchronize: container %s has no matching pod, skipping", ctr.GetName()) + continue + } + if !p.shouldMonitorPod(pod) { + continue + } + podUID := pod.GetUid() + rdtClass := getRDTClass(ctr) + if err := p.ensureMonGroup(podUID, ctr.GetId(), rdtClass); err != nil { + log.Warnf("Synchronize: failed to create mon_group for pod %s: %v", podUID, err) + continue + } + pid := int(ctr.GetPid()) + if pid > 0 { + monGroupDir := p.state.getMonGroupDir(podUID) + if err := p.rdt.writeTaskPID(monGroupDir, pid); err != nil { + log.Warnf("Synchronize: failed to write PID %d for pod %s: %v", pid, podUID, err) + } else { + log.Debugf("Synchronize: assigned pid %d for pod %s", pid, podUID) + p.takeInitialSnapshot(podUID, monGroupDir) + } + } + } + + // Remove orphaned mon_groups from a previous plugin instance. + p.rdt.cleanOrphanedMonGroups(p.state) + + log.Infof("synchronization complete: tracking %d pods", p.state.podCount()) + return nil, nil +} + +// PostCreateContainer is called after the container is created but before +// it starts executing. The container PID is NOT yet available (pid=0) because +// the init process has not been started. We create the mon_group here so it +// is ready for PID assignment in StartContainer. +func (p *plugin) PostCreateContainer(ctx context.Context, pod *api.PodSandbox, ctr *api.Container) error { + podUID := pod.GetUid() + ctrName := pprintCtr(pod, ctr) + + log.Debugf("PostCreateContainer %s: pid=%d (expected 0)", ctrName, ctr.GetPid()) + + if !p.shouldMonitorPod(pod) { + log.Debugf("PostCreateContainer %s: pod filtered out, skipping", ctrName) + return nil + } + + rdtClass := getRDTClass(ctr) + if err := p.ensureMonGroup(podUID, ctr.GetId(), rdtClass); err != nil { + log.Warnf("PostCreateContainer %s: failed to create mon_group: %v", ctrName, err) + return nil // non-fatal: don't block container creation + } + + log.Infof("PostCreateContainer %s: mon_group ready, PID will be assigned in StartContainer", ctrName) + return nil +} + +// StartContainer is called just before the container process starts executing. +// At this point the init process has been created (via runc create) and the PID +// is available, but the process is paused and has NOT forked any threads yet. +// This is the ideal moment to write the PID to the resctrl mon_group tasks +// file: the kernel assigns the RMID to this PID, and when the process starts +// and forks threads they all inherit the RMID automatically. +// +// If the PID is not available (should not happen at this stage), we fall back +// to PostStartContainer which will write PIDs after the process starts. +func (p *plugin) StartContainer(ctx context.Context, pod *api.PodSandbox, ctr *api.Container) error { + podUID := pod.GetUid() + ctrName := pprintCtr(pod, ctr) + pid := int(ctr.GetPid()) + + log.Debugf("StartContainer %s: pid=%d", ctrName, pid) + + if !p.shouldMonitorPod(pod) { + return nil + } + + monGroupDir := p.state.getMonGroupDir(podUID) + if monGroupDir == "" { + log.Debugf("StartContainer %s: no mon_group (pod not tracked), skipping", ctrName) + return nil + } + + if pid > 0 { + if err := p.rdt.writeTaskPID(monGroupDir, pid); err != nil { + log.Warnf("StartContainer %s: failed to write PID %d to tasks: %v", ctrName, pid, err) + } else { + log.Infof("StartContainer %s: assigned pid %d to mon_group %s (pre-start, no threads yet)", ctrName, pid, monGroupDir) + p.takeInitialSnapshot(podUID, monGroupDir) + } + } else { + log.Warnf("StartContainer %s: PID not available at pre-start, will retry in PostStartContainer", ctrName) + } + + return nil +} + +// PostStartContainer is called after the container process has been started. +// This is a fallback: if StartContainer did not have the PID (which should +// not happen on containerd ≥ 2.x), we write the init PID here. The init +// PID is sufficient because all child threads inherit the RMID. +func (p *plugin) PostStartContainer(ctx context.Context, pod *api.PodSandbox, ctr *api.Container) error { + podUID := pod.GetUid() + ctrName := pprintCtr(pod, ctr) + pid := int(ctr.GetPid()) + + log.Debugf("PostStartContainer %s: pid=%d", ctrName, pid) + + if !p.shouldMonitorPod(pod) { + return nil + } + + monGroupDir := p.state.getMonGroupDir(podUID) + if monGroupDir == "" { + return nil + } + + // Fallback: write the init PID if StartContainer didn't. + if pid > 0 { + if err := p.rdt.writeTaskPID(monGroupDir, pid); err != nil { + log.Warnf("PostStartContainer %s: failed to write PID %d to tasks: %v", ctrName, pid, err) + } else { + log.Infof("PostStartContainer %s: fallback assigned pid %d to mon_group %s", ctrName, pid, monGroupDir) + p.takeInitialSnapshot(podUID, monGroupDir) + } + } else { + log.Warnf("PostStartContainer %s: PID still 0 after start, unexpected", ctrName) + } + + return nil +} + +// StopContainer is called when a container is being stopped. +func (p *plugin) StopContainer(ctx context.Context, pod *api.PodSandbox, ctr *api.Container) ([]*api.ContainerUpdate, error) { + podUID := pod.GetUid() + ctrName := pprintCtr(pod, ctr) + + log.Debugf("StopContainer %s", ctrName) + + monGroupDir := p.state.getMonGroupDir(podUID) + if monGroupDir == "" { + return nil, nil + } + + p.state.removeContainer(podUID, ctr.GetId()) + + if p.state.podHasNoContainers(podUID) { + log.Infof("StopContainer %s: last container, removing mon_group %s", ctrName, monGroupDir) + p.takeFinalSnapshot(podUID, monGroupDir) + if err := p.rdt.removeMonGroup(monGroupDir); err != nil { + log.Warnf("StopContainer %s: failed to remove mon_group, will retry on next sync: %v", ctrName, err) + return nil, nil + } + p.state.removePod(podUID) + } + + return nil, nil +} + +// ensureMonGroup creates the mon_group directory if it doesn't exist and registers +// the container in the in-memory state. +// +// Limitation: all containers in a pod share a single mon_group under the first +// container's RDT class. If an allocation plugin assigns different classes to +// containers in the same pod, subsequent containers use the first class. +func (p *plugin) ensureMonGroup(podUID, containerID, rdtClass string) error { + if !looksLikePodUID(podUID) { + return fmt.Errorf("invalid pod UID %q", podUID) + } + + if p.state.getMonGroupDir(podUID) != "" { + // Mon_group already exists for this pod. Just add the container. + p.state.addContainer(podUID, containerID) + return nil + } + + monGroupDir, err := p.rdt.createMonGroup(rdtClass, podUID) + if err != nil { + return err + } + + p.state.addPod(podUID, monGroupDir) + p.state.addContainer(podUID, containerID) + log.Infof("created mon_group %s for pod %s", monGroupDir, podUID) + return nil +} + +// shouldMonitorPod checks namespace and label filters. +func (p *plugin) shouldMonitorPod(pod *api.PodSandbox) bool { + if len(p.config.Namespaces) > 0 { + ns := pod.GetNamespace() + found := false + for _, allowed := range p.config.Namespaces { + if ns == allowed { + found = true + break + } + } + if !found { + return false + } + } + if len(p.config.LabelSelector) > 0 { + labels := pod.GetLabels() + for k, v := range p.config.LabelSelector { + if labels[k] != v { + return false + } + } + } + return true +} + +// getRDTClass extracts the RDT class from a container's Linux resources. +func getRDTClass(ctr *api.Container) string { + if linux := ctr.GetLinux(); linux != nil { + if res := linux.GetResources(); res != nil { + if rdt := res.GetRdtClass(); rdt != nil { + return rdt.GetValue() + } + } + } + return "" +} + +// pprintCtr returns a human-readable container identifier. +func pprintCtr(pod *api.PodSandbox, ctr *api.Container) string { + return fmt.Sprintf("%s/%s:%s", pod.GetNamespace(), pod.GetName(), ctr.GetName()) +} + +// takeInitialSnapshot records the initial counter values for a pod's mon_group. +// It is called once per pod, after the first successful PID write. +func (p *plugin) takeInitialSnapshot(podUID, monGroupDir string) { + if p.snapshots == nil || p.state.isInitialSnapshotDone(podUID) { + return + } + counters, err := p.rdt.readMonData(monGroupDir) + if err != nil { + log.Warnf("takeInitialSnapshot: failed to read mon_data for pod %s: %v", podUID, err) + return + } + if err := p.snapshots.writeInitial(podUID, monGroupDir, counters); err != nil { + log.Warnf("takeInitialSnapshot: failed to write snapshot for pod %s: %v", podUID, err) + return + } + p.state.setInitialSnapshotDone(podUID) + log.Infof("takeInitialSnapshot: recorded initial counters for pod %s", podUID) +} + +// takeFinalSnapshot records the final counter values before a mon_group is removed. +func (p *plugin) takeFinalSnapshot(podUID, monGroupDir string) { + if p.snapshots == nil { + return + } + counters, err := p.rdt.readMonData(monGroupDir) + if err != nil { + log.Warnf("takeFinalSnapshot: failed to read mon_data for pod %s: %v", podUID, err) + return + } + if err := p.snapshots.writeFinal(podUID, counters); err != nil { + log.Warnf("takeFinalSnapshot: failed to write snapshot for pod %s: %v", podUID, err) + return + } + log.Infof("takeFinalSnapshot: recorded final counters for pod %s", podUID) + p.snapshots.pruneCompleted() +} diff --git a/cmd/plugins/resctrl-mon/plugin_test.go b/cmd/plugins/resctrl-mon/plugin_test.go new file mode 100644 index 000000000..9863d1ed0 --- /dev/null +++ b/cmd/plugins/resctrl-mon/plugin_test.go @@ -0,0 +1,376 @@ +// Copyright The NRI Plugins Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "context" + "os" + "path/filepath" + "strconv" + "strings" + "testing" + "time" + + "github.com/containerd/nri/pkg/api" + "github.com/sirupsen/logrus" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func init() { + log = logrus.StandardLogger() + log.SetLevel(logrus.TraceLevel) +} + +func newTestPlugin(resctrlPath string) *plugin { + cfg := &pluginConfig{ + ResctrlPath: resctrlPath, + SnapshotDir: filepath.Join(resctrlPath, "_snapshots"), + SnapshotTTL: "5m", + } + snap, _ := newSnapshotStore(cfg.SnapshotDir, 5*time.Minute) + return &plugin{ + config: cfg, + state: newPodState(), + rdt: newResctrlOps(resctrlPath), + snapshots: snap, + } +} + +func makePod(uid, namespace, name string) *api.PodSandbox { + return &api.PodSandbox{ + Id: "sandbox-" + uid, // CRI sandbox ID != K8s pod UID + Uid: uid, + Namespace: namespace, + Name: name, + Labels: map[string]string{}, + } +} + +func makeContainer(id, name, podSandboxID string, pid uint32, rdtClass string) *api.Container { + ctr := &api.Container{ + Id: id, + PodSandboxId: podSandboxID, + Name: name, + Pid: pid, + Linux: &api.LinuxContainer{ + Resources: &api.LinuxResources{}, + }, + } + if rdtClass != "" { + ctr.Linux.Resources.RdtClass = &api.OptionalString{Value: rdtClass} + } + return ctr +} + +func TestShouldMonitorPod_NoFilters(t *testing.T) { + p := newTestPlugin("/tmp/resctrl-test") + pod := makePod("uid-1", "default", "test-pod") + assert.True(t, p.shouldMonitorPod(pod)) +} + +func TestShouldMonitorPod_NamespaceFilter(t *testing.T) { + p := newTestPlugin("/tmp/resctrl-test") + p.config.Namespaces = []string{"production", "staging"} + + pod1 := makePod("uid-1", "production", "pod1") + assert.True(t, p.shouldMonitorPod(pod1)) + + pod2 := makePod("uid-2", "kube-system", "pod2") + assert.False(t, p.shouldMonitorPod(pod2)) +} + +func TestShouldMonitorPod_LabelFilter(t *testing.T) { + p := newTestPlugin("/tmp/resctrl-test") + p.config.LabelSelector = map[string]string{"monitor": "true"} + + pod1 := makePod("uid-1", "default", "pod1") + pod1.Labels = map[string]string{"monitor": "true", "app": "web"} + assert.True(t, p.shouldMonitorPod(pod1)) + + pod2 := makePod("uid-2", "default", "pod2") + pod2.Labels = map[string]string{"app": "web"} + assert.False(t, p.shouldMonitorPod(pod2)) +} + +func TestGetRDTClass(t *testing.T) { + ctr1 := makeContainer("c1", "container1", "uid-1", 1234, "BestEffort") + assert.Equal(t, "BestEffort", getRDTClass(ctr1)) + + ctr2 := makeContainer("c2", "container2", "uid-1", 1235, "") + assert.Equal(t, "", getRDTClass(ctr2)) + + ctr3 := &api.Container{ + Id: "c3", + Name: "container3", + } + assert.Equal(t, "", getRDTClass(ctr3)) +} + +func TestPprintCtr(t *testing.T) { + pod := makePod("uid-1", "default", "my-pod") + ctr := makeContainer("c1", "my-container", "uid-1", 1234, "") + assert.Equal(t, "default/my-pod:my-container", pprintCtr(pod, ctr)) +} + +func TestPostCreateContainer_FilteredPod(t *testing.T) { + p := newTestPlugin(t.TempDir()) + p.config.Namespaces = []string{"production"} + + pod := makePod("uid-1", "default", "test-pod") + ctr := makeContainer("c1", "container1", "uid-1", 1234, "") + + err := p.PostCreateContainer(context.Background(), pod, ctr) + require.NoError(t, err) + + // Pod should not be tracked since it's not in the production namespace. + assert.Equal(t, 0, p.state.podCount()) +} + +func TestPostCreateContainer_CreatesMonGroup(t *testing.T) { + tmpDir := t.TempDir() + p := newTestPlugin(tmpDir) + + pod := makePod("a1b2c3d4-e5f6-7890-abcd-ef1234567890", "default", "test-pod") + ctr := makeContainer("c1", "container1", "a1b2c3d4-e5f6-7890-abcd-ef1234567890", 0, "") + + err := p.PostCreateContainer(context.Background(), pod, ctr) + require.NoError(t, err) + + // Pod should be tracked. + assert.Equal(t, 1, p.state.podCount()) + monDir := p.state.getMonGroupDir("a1b2c3d4-e5f6-7890-abcd-ef1234567890") + assert.Contains(t, monDir, "mon_groups/a1b2c3d4-e5f6-7890-abcd-ef1234567890") +} + +func TestPostCreateContainer_WithRDTClass(t *testing.T) { + tmpDir := t.TempDir() + p := newTestPlugin(tmpDir) + require.NoError(t, os.Mkdir(filepath.Join(tmpDir, "BestEffort"), 0755)) + + pod := makePod("a1b2c3d4-e5f6-7890-abcd-ef1234567890", "default", "test-pod") + ctr := makeContainer("c1", "container1", "a1b2c3d4-e5f6-7890-abcd-ef1234567890", 0, "BestEffort") + + err := p.PostCreateContainer(context.Background(), pod, ctr) + require.NoError(t, err) + + monDir := p.state.getMonGroupDir("a1b2c3d4-e5f6-7890-abcd-ef1234567890") + assert.Contains(t, monDir, "BestEffort/mon_groups/a1b2c3d4-e5f6-7890-abcd-ef1234567890") +} + +func TestMultiContainerPod(t *testing.T) { + tmpDir := t.TempDir() + p := newTestPlugin(tmpDir) + podUID := "a1b2c3d4-e5f6-7890-abcd-ef1234567890" + + pod := makePod(podUID, "default", "multi-pod") + ctr1 := makeContainer("c1", "container1", podUID, 0, "") + ctr2 := makeContainer("c2", "container2", podUID, 0, "") + + // First container creates the mon_group. + err := p.PostCreateContainer(context.Background(), pod, ctr1) + require.NoError(t, err) + assert.Equal(t, 1, p.state.podCount()) + + // Second container reuses the same mon_group. + err = p.PostCreateContainer(context.Background(), pod, ctr2) + require.NoError(t, err) + assert.Equal(t, 1, p.state.podCount()) // still one pod + + // Stopping first container should not remove the mon_group. + _, err = p.StopContainer(context.Background(), pod, ctr1) + require.NoError(t, err) + assert.Equal(t, 1, p.state.podCount()) + assert.False(t, p.state.podHasNoContainers(podUID)) + + // Stopping second container should remove the mon_group. + _, err = p.StopContainer(context.Background(), pod, ctr2) + require.NoError(t, err) + assert.Equal(t, 0, p.state.podCount()) +} + +func TestStopContainer_UnknownPod(t *testing.T) { + p := newTestPlugin(t.TempDir()) + + pod := makePod("unknown-uid", "default", "unknown-pod") + ctr := makeContainer("c1", "container1", "unknown-uid", 1234, "") + + updates, err := p.StopContainer(context.Background(), pod, ctr) + require.NoError(t, err) + assert.Nil(t, updates) +} + +func TestSetConfig(t *testing.T) { + p := newTestPlugin("/tmp/resctrl-test") + + configYAML := []byte(` +resctrlPath: /tmp/test-resctrl +namespaces: + - production + - staging +labelSelector: + monitor: "true" +`) + + err := p.setConfig(configYAML) + require.NoError(t, err) + assert.Equal(t, "/tmp/test-resctrl", p.config.ResctrlPath) + assert.Equal(t, []string{"production", "staging"}, p.config.Namespaces) + assert.Equal(t, map[string]string{"monitor": "true"}, p.config.LabelSelector) +} + +func TestSetConfig_InvalidYAML(t *testing.T) { + p := newTestPlugin("/tmp/resctrl-test") + + err := p.setConfig([]byte(":::invalid yaml")) + assert.Error(t, err) +} + +func TestSetConfig_RelativePath(t *testing.T) { + p := newTestPlugin("/tmp/resctrl-test") + + err := p.setConfig([]byte("resctrlPath: relative/path")) + assert.Error(t, err) + assert.Contains(t, err.Error(), "absolute path") +} + +func TestSynchronize_UsesUIDNotSandboxID(t *testing.T) { + tmpDir := t.TempDir() + p := newTestPlugin(tmpDir) + podUID := "a1b2c3d4-e5f6-7890-abcd-ef1234567890" + + pod := makePod(podUID, "default", "sync-pod") + // Container references the pod by sandbox ID, not by UID. + ctr := makeContainer("c1", "container1", pod.GetId(), 0, "") + + _, err := p.Synchronize(context.Background(), []*api.PodSandbox{pod}, []*api.Container{ctr}) + require.NoError(t, err) + + // The mon_group should be keyed by the K8s pod UID, not the sandbox ID. + assert.Equal(t, 1, p.state.podCount()) + assert.True(t, p.state.hasPod(podUID)) + assert.False(t, p.state.hasPod(pod.GetId())) + + monDir := p.state.getMonGroupDir(podUID) + assert.Contains(t, monDir, podUID) +} + +func TestEnsureMonGroup_InvalidUID(t *testing.T) { + p := newTestPlugin(t.TempDir()) + + err := p.ensureMonGroup("", "c1", "") + assert.Error(t, err) + assert.Contains(t, err.Error(), "invalid pod UID") + + err = p.ensureMonGroup("not-a-uuid", "c1", "") + assert.Error(t, err) + + assert.Equal(t, 0, p.state.podCount()) +} + +func TestStartContainer_CreatesSnapshot(t *testing.T) { + tmpDir := t.TempDir() + p := newTestPlugin(tmpDir) + podUID := "a1b2c3d4-e5f6-7890-abcd-ef1234567890" + + pod := makePod(podUID, "default", "snap-pod") + ctr := makeContainer("c1", "container1", pod.GetId(), 12345, "") + + // Create the mon_group and fake mon_data so readMonData succeeds. + err := p.ensureMonGroup(podUID, "c1", "") + require.NoError(t, err) + + monDir := p.state.getMonGroupDir(podUID) + monData := filepath.Join(monDir, "mon_data", "mon_PERF_PKG_00") + require.NoError(t, os.MkdirAll(monData, 0755)) + require.NoError(t, os.WriteFile(filepath.Join(monData, "core_energy"), []byte("10.5\n"), 0644)) + // Create a tasks file so writeTaskPID succeeds in the test sandbox. + require.NoError(t, os.WriteFile(filepath.Join(monDir, "tasks"), nil, 0644)) + + // StartContainer triggers takeInitialSnapshot. + err = p.StartContainer(context.Background(), pod, ctr) + require.NoError(t, err) + + // Verify snapshot directory with .begin files and symlink. + snapDir := filepath.Join(p.snapshots.dir, podUID) + beginFile := filepath.Join(snapDir, "mon_data", "mon_PERF_PKG_00", "core_energy.begin") + data, err := os.ReadFile(beginFile) + require.NoError(t, err) + val, err := strconv.ParseFloat(strings.TrimSpace(string(data)), 64) + require.NoError(t, err) + assert.Equal(t, 10.5, val) + + // Verify symlink exists. + linkPath := filepath.Join(snapDir, "mon_data", "mon_PERF_PKG_00", "core_energy") + _, err = os.Readlink(linkPath) + assert.NoError(t, err) + + // Verify created_at exists, completed_at does not. + _, err = os.Stat(filepath.Join(snapDir, "created_at")) + assert.NoError(t, err) + _, err = os.Stat(filepath.Join(snapDir, "completed_at")) + assert.True(t, os.IsNotExist(err)) +} + +func TestStopContainer_WritesFinalSnapshot(t *testing.T) { + tmpDir := t.TempDir() + p := newTestPlugin(tmpDir) + podUID := "a1b2c3d4-e5f6-7890-abcd-ef1234567890" + + pod := makePod(podUID, "default", "snap-pod") + ctr := makeContainer("c1", "container1", pod.GetId(), 12345, "") + + // Set up mon_group + fake mon_data. + err := p.ensureMonGroup(podUID, "c1", "") + require.NoError(t, err) + + monDir := p.state.getMonGroupDir(podUID) + monData := filepath.Join(monDir, "mon_data", "mon_PERF_PKG_00") + require.NoError(t, os.MkdirAll(monData, 0755)) + require.NoError(t, os.WriteFile(filepath.Join(monData, "core_energy"), []byte("10.5\n"), 0644)) + require.NoError(t, os.WriteFile(filepath.Join(monDir, "tasks"), nil, 0644)) + + // Take initial snapshot. + err = p.StartContainer(context.Background(), pod, ctr) + require.NoError(t, err) + + // Update the counter value for final read. + require.NoError(t, os.WriteFile(filepath.Join(monData, "core_energy"), []byte("99.9\n"), 0644)) + + // StopContainer triggers takeFinalSnapshot. + _, err = p.StopContainer(context.Background(), pod, ctr) + require.NoError(t, err) + + // Verify .begin and .end files. + snapDir := filepath.Join(p.snapshots.dir, podUID) + beginFile := filepath.Join(snapDir, "mon_data", "mon_PERF_PKG_00", "core_energy.begin") + endFile := filepath.Join(snapDir, "mon_data", "mon_PERF_PKG_00", "core_energy.end") + + beginData, err := os.ReadFile(beginFile) + require.NoError(t, err) + beginVal, err := strconv.ParseFloat(strings.TrimSpace(string(beginData)), 64) + require.NoError(t, err) + assert.Equal(t, 10.5, beginVal) + + endData, err := os.ReadFile(endFile) + require.NoError(t, err) + endVal, err := strconv.ParseFloat(strings.TrimSpace(string(endData)), 64) + require.NoError(t, err) + assert.Equal(t, 99.9, endVal) + + // Verify completed_at exists. + _, err = os.Stat(filepath.Join(snapDir, "completed_at")) + assert.NoError(t, err) +} diff --git a/cmd/plugins/resctrl-mon/resctrl.go b/cmd/plugins/resctrl-mon/resctrl.go new file mode 100644 index 000000000..985ff4fa0 --- /dev/null +++ b/cmd/plugins/resctrl-mon/resctrl.go @@ -0,0 +1,268 @@ +// Copyright The NRI Plugins Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "errors" + "fmt" + "os" + "path/filepath" + "strconv" + "strings" + "syscall" +) + +const ( + defaultResctrlPath = "/sys/fs/resctrl" + monGroupsDir = "mon_groups" +) + +// resctrlOps handles filesystem operations on the resctrl mount. +type resctrlOps struct { + resctrlPath string +} + +func newResctrlOps(resctrlPath string) *resctrlOps { + return &resctrlOps{ + resctrlPath: resctrlPath, + } +} + +// createMonGroup creates a mon_group directory under the appropriate ctrl_group +// and returns the full path. If rdtClass is empty, the mon_group is created +// under the root resctrl directory. +// +// The kernel assigns an RMID to the new mon_group on mkdir. If no RMIDs are +// available, mkdir returns ENOSPC. +func (r *resctrlOps) createMonGroup(rdtClass, podUID string) (string, error) { + parentDir := r.resctrlPath + if rdtClass != "" { + if !isValidRDTClass(rdtClass) { + return "", fmt.Errorf("invalid RDT class name %q", rdtClass) + } + parentDir = filepath.Join(r.resctrlPath, rdtClass) + } + + // When an RDT class is specified, the ctrl_group must already exist + // (created by an allocation plugin). Do not create it implicitly — + // that would make an unintended ctrl_group in the resctrl filesystem. + if rdtClass != "" { + info, err := os.Stat(parentDir) + if err != nil { + return "", fmt.Errorf("ctrl_group %s does not exist: %w", parentDir, err) + } + if !info.IsDir() { + return "", fmt.Errorf("ctrl_group %s is not a directory", parentDir) + } + } + + monGroupsPath := filepath.Join(parentDir, monGroupsDir) + monGroupDir := filepath.Join(monGroupsPath, podUID) + + // Ensure the mon_groups/ directory exists. On a real resctrl mount + // this is always present. For testing, create it if needed. + if err := os.MkdirAll(monGroupsPath, 0755); err != nil { + return "", fmt.Errorf("mon_groups dir not available at %s: %w", monGroupsPath, err) + } + + // Use Mkdir (not MkdirAll) for the final mon_group directory to + // avoid accidentally creating a ctrl_group if rdtClass is wrong. + if err := os.Mkdir(monGroupDir, 0755); err != nil { + if errors.Is(err, os.ErrExist) { + return monGroupDir, nil + } + if errors.Is(err, syscall.ENOSPC) { + return "", fmt.Errorf("no RMIDs available for pod %s: %w", podUID, err) + } + return "", fmt.Errorf("failed to create mon_group %s: %w", monGroupDir, err) + } + + return monGroupDir, nil +} + +// removeMonGroup removes a mon_group directory. The kernel releases the RMID. +func (r *resctrlOps) removeMonGroup(monGroupDir string) error { + err := os.Remove(monGroupDir) + if err != nil && !errors.Is(err, os.ErrNotExist) { + return fmt.Errorf("failed to remove mon_group %s: %w", monGroupDir, err) + } + return nil +} + +// writeTaskPID writes a PID to the mon_group's tasks file. The kernel assigns +// this PID (and all future child processes) to the mon_group's RMID. +func (r *resctrlOps) writeTaskPID(monGroupDir string, pid int) error { + tasksFile := filepath.Join(monGroupDir, "tasks") + f, err := os.OpenFile(tasksFile, os.O_WRONLY, 0) + if err != nil { + return fmt.Errorf("failed to open %s for pid %d: %w", tasksFile, pid, err) + } + defer f.Close() + data := []byte(strconv.Itoa(pid) + "\n") + if _, err := f.Write(data); err != nil { + return fmt.Errorf("failed to write pid %d to %s: %w", pid, tasksFile, err) + } + return nil +} + +// readMonData reads all monitoring event files from a mon_group's mon_data/ +// directory. It returns a nested map: domain → event → value. +// +// For example, a mon_group with PERF_PKG and L3 monitoring will return: +// +// { +// "mon_PERF_PKG_00": {"activity": 1234, "core_energy": 42.123}, +// "mon_L3_00": {"llc_occupancy": 12345, "mbm_local_bytes": 0, "mbm_total_bytes": 67890}, +// } +func (r *resctrlOps) readMonData(monGroupDir string) (map[string]map[string]float64, error) { + monDataDir := filepath.Join(monGroupDir, "mon_data") + domains, err := os.ReadDir(monDataDir) + if err != nil { + return nil, fmt.Errorf("failed to read mon_data directory %s: %w", monDataDir, err) + } + + result := make(map[string]map[string]float64) + for _, domain := range domains { + if !domain.IsDir() || !strings.HasPrefix(domain.Name(), "mon_") { + continue + } + domainDir := filepath.Join(monDataDir, domain.Name()) + events, err := os.ReadDir(domainDir) + if err != nil { + log.Warnf("readMonData: failed to read domain directory %s: %v", domainDir, err) + continue + } + domainMap := make(map[string]float64) + for _, event := range events { + if event.IsDir() || event.Name() == "tasks" { + continue + } + eventFile := filepath.Join(domainDir, event.Name()) + data, err := os.ReadFile(eventFile) + if err != nil { + log.Warnf("readMonData: failed to read %s: %v", eventFile, err) + continue + } + val, err := strconv.ParseFloat(strings.TrimSpace(string(data)), 64) + if err != nil { + log.Warnf("readMonData: failed to parse %s value %q: %v", eventFile, string(data), err) + continue + } + domainMap[event.Name()] = val + } + if len(domainMap) > 0 { + result[domain.Name()] = domainMap + } + } + return result, nil +} + +// cleanOrphanedMonGroups removes mon_group directories that are not tracked +// in the given state. This handles cleanup after a plugin crash/restart. +func (r *resctrlOps) cleanOrphanedMonGroups(state *podState) { + // Scan root-level mon_groups. + r.cleanOrphanedInDir(filepath.Join(r.resctrlPath, monGroupsDir), state) + + // Scan ctrl_group-level mon_groups. + entries, err := os.ReadDir(r.resctrlPath) + if err != nil { + log.Warnf("cleanOrphanedMonGroups: failed to read %s: %v", r.resctrlPath, err) + return + } + for _, entry := range entries { + if !entry.IsDir() { + continue + } + name := entry.Name() + // Skip non-ctrl_group entries. + if name == monGroupsDir || name == "info" || strings.HasPrefix(name, "mon_") { + continue + } + ctrlGroupMonDir := filepath.Join(r.resctrlPath, name, monGroupsDir) + r.cleanOrphanedInDir(ctrlGroupMonDir, state) + } +} + +// cleanOrphanedInDir removes mon_group directories in a specific mon_groups/ +// directory that look like pod UIDs but are not tracked in state. +func (r *resctrlOps) cleanOrphanedInDir(monGroupsPath string, state *podState) { + entries, err := os.ReadDir(monGroupsPath) + if err != nil { + if !errors.Is(err, os.ErrNotExist) { + log.Warnf("failed to read mon_groups directory %s: %v", monGroupsPath, err) + } + return + } + for _, entry := range entries { + if !entry.IsDir() { + continue + } + name := entry.Name() + // Only clean directories that look like pod UIDs (contain dashes like UUIDs). + if !looksLikePodUID(name) { + continue + } + orphanDir := filepath.Join(monGroupsPath, name) + trackedDir := state.getMonGroupDir(name) + if trackedDir == orphanDir { + // This is the active mon_group for this pod. + continue + } + log.Infof("removing orphaned mon_group %s", orphanDir) + if err := os.Remove(orphanDir); err != nil && !errors.Is(err, os.ErrNotExist) { + log.Warnf("failed to remove orphaned mon_group %s: %v", orphanDir, err) + } + } +} + +// looksLikePodUID returns true if the name looks like a Kubernetes pod UID +// (UUID format with dashes, e.g., a1b2c3d4-e5f6-7890-abcd-ef1234567890). +func looksLikePodUID(name string) bool { + if len(name) != 36 { + return false + } + // Check for UUID-like pattern: 8-4-4-4-12 hex chars. + parts := strings.Split(name, "-") + if len(parts) != 5 { + return false + } + expectedLens := []int{8, 4, 4, 4, 12} + for i, part := range parts { + if len(part) != expectedLens[i] { + return false + } + for _, c := range part { + if !((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) { + return false + } + } + } + return true +} + +// isValidRDTClass returns true if the name is a safe resctrl ctrl_group name. +// It rejects path separators, dot-segments, and empty strings to prevent +// path traversal outside the resctrl mount. +func isValidRDTClass(name string) bool { + if name == "" || name == "." || name == ".." { + return false + } + for _, c := range name { + if c == '/' || c == 0 { + return false + } + } + return true +} diff --git a/cmd/plugins/resctrl-mon/resctrl_test.go b/cmd/plugins/resctrl-mon/resctrl_test.go new file mode 100644 index 000000000..760a6f39e --- /dev/null +++ b/cmd/plugins/resctrl-mon/resctrl_test.go @@ -0,0 +1,277 @@ +// Copyright The NRI Plugins Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestCreateMonGroup_RootClass(t *testing.T) { + tmpDir := t.TempDir() + r := newResctrlOps(tmpDir) + + dir, err := r.createMonGroup("", "pod-uid-1") + require.NoError(t, err) + assert.Equal(t, filepath.Join(tmpDir, "mon_groups", "pod-uid-1"), dir) + + // Directory should exist. + info, err := os.Stat(dir) + require.NoError(t, err) + assert.True(t, info.IsDir()) +} + +func TestCreateMonGroup_WithRDTClass(t *testing.T) { + tmpDir := t.TempDir() + r := newResctrlOps(tmpDir) + require.NoError(t, os.Mkdir(filepath.Join(tmpDir, "BestEffort"), 0755)) + + dir, err := r.createMonGroup("BestEffort", "pod-uid-2") + require.NoError(t, err) + assert.Equal(t, filepath.Join(tmpDir, "BestEffort", "mon_groups", "pod-uid-2"), dir) + + info, err := os.Stat(dir) + require.NoError(t, err) + assert.True(t, info.IsDir()) +} + +func TestCreateMonGroup_MissingCtrlGroup(t *testing.T) { + tmpDir := t.TempDir() + r := newResctrlOps(tmpDir) + + // Attempt to create a mon_group under a non-existent ctrl_group. + _, err := r.createMonGroup("NoSuchClass", "pod-uid-3") + assert.Error(t, err) + assert.Contains(t, err.Error(), "ctrl_group") + + // Verify the ctrl_group was NOT created. + _, err = os.Stat(filepath.Join(tmpDir, "NoSuchClass")) + assert.True(t, os.IsNotExist(err)) +} + +func TestCreateMonGroup_Idempotent(t *testing.T) { + tmpDir := t.TempDir() + r := newResctrlOps(tmpDir) + + dir1, err := r.createMonGroup("", "pod-uid-1") + require.NoError(t, err) + + dir2, err := r.createMonGroup("", "pod-uid-1") + require.NoError(t, err) + + assert.Equal(t, dir1, dir2) +} + +func TestRemoveMonGroup(t *testing.T) { + tmpDir := t.TempDir() + r := newResctrlOps(tmpDir) + + dir, err := r.createMonGroup("", "pod-uid-1") + require.NoError(t, err) + + err = r.removeMonGroup(dir) + require.NoError(t, err) + + _, err = os.Stat(dir) + assert.True(t, os.IsNotExist(err)) +} + +func TestRemoveMonGroup_NotExist(t *testing.T) { + tmpDir := t.TempDir() + r := newResctrlOps(tmpDir) + + err := r.removeMonGroup(filepath.Join(tmpDir, "mon_groups", "nonexistent")) + assert.NoError(t, err) +} + +func TestWriteTaskPID(t *testing.T) { + tmpDir := t.TempDir() + r := newResctrlOps(tmpDir) + + dir, err := r.createMonGroup("", "pod-uid-1") + require.NoError(t, err) + + // In real resctrl, the kernel creates the tasks file when the + // mon_group directory is created. Simulate that here. + tasksFile := filepath.Join(dir, "tasks") + require.NoError(t, os.WriteFile(tasksFile, nil, 0644)) + + err = r.writeTaskPID(dir, 12345) + require.NoError(t, err) + + data, err := os.ReadFile(tasksFile) + require.NoError(t, err) + assert.Equal(t, "12345\n", string(data)) +} + +func TestCleanOrphanedMonGroups(t *testing.T) { + tmpDir := t.TempDir() + r := newResctrlOps(tmpDir) + state := newPodState() + + // Create a mon_group that IS tracked. + trackedUID := "a1b2c3d4-e5f6-7890-abcd-ef1234567890" + dir, err := r.createMonGroup("", trackedUID) + require.NoError(t, err) + state.addPod(trackedUID, dir) + + // Create a mon_group that is NOT tracked (orphan). + orphanUID := "deadbeef-dead-beef-dead-beefdeadbeef" + _, err = r.createMonGroup("", orphanUID) + require.NoError(t, err) + + r.cleanOrphanedMonGroups(state) + + // Tracked should still exist. + _, err = os.Stat(filepath.Join(tmpDir, "mon_groups", trackedUID)) + assert.NoError(t, err) + + // Orphan should be removed. + _, err = os.Stat(filepath.Join(tmpDir, "mon_groups", orphanUID)) + assert.True(t, os.IsNotExist(err)) +} + +func TestCleanOrphanedMonGroups_CtrlGroup(t *testing.T) { + tmpDir := t.TempDir() + r := newResctrlOps(tmpDir) + state := newPodState() + + // Create orphan under a ctrl_group. + orphanUID := "deadbeef-dead-beef-dead-beefdeadbeef" + require.NoError(t, os.Mkdir(filepath.Join(tmpDir, "BestEffort"), 0755)) + _, err := r.createMonGroup("BestEffort", orphanUID) + require.NoError(t, err) + + r.cleanOrphanedMonGroups(state) + + _, err = os.Stat(filepath.Join(tmpDir, "BestEffort", "mon_groups", orphanUID)) + assert.True(t, os.IsNotExist(err)) +} + +func TestCleanOrphanedMonGroups_StaleLocation(t *testing.T) { + tmpDir := t.TempDir() + r := newResctrlOps(tmpDir) + state := newPodState() + + podUID := "a1b2c3d4-e5f6-7890-abcd-ef1234567890" + + // Create a mon_group under BestEffort (simulates previous run). + require.NoError(t, os.Mkdir(filepath.Join(tmpDir, "BestEffort"), 0755)) + _, err := r.createMonGroup("BestEffort", podUID) + require.NoError(t, err) + + // Track the pod at the root class (simulates current run with different RDT class). + rootDir, err := r.createMonGroup("", podUID) + require.NoError(t, err) + state.addPod(podUID, rootDir) + + r.cleanOrphanedMonGroups(state) + + // Root mon_group (tracked) should still exist. + _, err = os.Stat(rootDir) + assert.NoError(t, err) + + // BestEffort mon_group (stale) should be removed. + _, err = os.Stat(filepath.Join(tmpDir, "BestEffort", "mon_groups", podUID)) + assert.True(t, os.IsNotExist(err)) +} + +func TestLooksLikePodUID(t *testing.T) { + assert.True(t, looksLikePodUID("a1b2c3d4-e5f6-7890-abcd-ef1234567890")) + assert.True(t, looksLikePodUID("DEADBEEF-DEAD-BEEF-DEAD-BEEFDEADBEEF")) + assert.True(t, looksLikePodUID("00000000-0000-0000-0000-000000000000")) + + assert.False(t, looksLikePodUID("short")) + assert.False(t, looksLikePodUID("not-a-uuid-at-all-nope-notthisone!")) + assert.False(t, looksLikePodUID("a1b2c3d4-e5f6-7890-abcd-ef123456789")) // too short last segment + assert.False(t, looksLikePodUID("g1b2c3d4-e5f6-7890-abcd-ef1234567890")) // 'g' is not hex + assert.False(t, looksLikePodUID("a1b2c3d4-e5f6-7890-abcd-ef1234567890x")) // too long +} + +func TestIsValidRDTClass(t *testing.T) { + assert.True(t, isValidRDTClass("BestEffort")) + assert.True(t, isValidRDTClass("Guaranteed")) + assert.True(t, isValidRDTClass("COS1")) + assert.True(t, isValidRDTClass("my-class_v2")) + + assert.False(t, isValidRDTClass("")) + assert.False(t, isValidRDTClass(".")) + assert.False(t, isValidRDTClass("..")) + assert.False(t, isValidRDTClass("../../etc")) + assert.False(t, isValidRDTClass("foo/bar")) + assert.False(t, isValidRDTClass("class\x00name")) +} + +func TestCreateMonGroup_PathTraversal(t *testing.T) { + tmpDir := t.TempDir() + r := newResctrlOps(tmpDir) + + _, err := r.createMonGroup("../../etc", "a1b2c3d4-e5f6-7890-abcd-ef1234567890") + assert.Error(t, err) + assert.Contains(t, err.Error(), "invalid RDT class") + + _, err = r.createMonGroup("foo/bar", "a1b2c3d4-e5f6-7890-abcd-ef1234567890") + assert.Error(t, err) + assert.Contains(t, err.Error(), "invalid RDT class") + + _, err = r.createMonGroup("..", "a1b2c3d4-e5f6-7890-abcd-ef1234567890") + assert.Error(t, err) + assert.Contains(t, err.Error(), "invalid RDT class") +} + +func TestReadMonData(t *testing.T) { + tmpDir := t.TempDir() + r := newResctrlOps(tmpDir) + + // Create a fake mon_group with mon_data directories. + podUID := "a1b2c3d4-e5f6-7890-abcd-ef1234567890" + monGroupDir, err := r.createMonGroup("", podUID) + require.NoError(t, err) + + monData := filepath.Join(monGroupDir, "mon_data") + require.NoError(t, os.MkdirAll(filepath.Join(monData, "mon_PERF_PKG_00"), 0755)) + require.NoError(t, os.MkdirAll(filepath.Join(monData, "mon_PERF_PKG_01"), 0755)) + require.NoError(t, os.MkdirAll(filepath.Join(monData, "mon_L3_00"), 0755)) + + require.NoError(t, os.WriteFile(filepath.Join(monData, "mon_PERF_PKG_00", "core_energy"), []byte("42.123456\n"), 0644)) + require.NoError(t, os.WriteFile(filepath.Join(monData, "mon_PERF_PKG_01", "core_energy"), []byte("0.000000\n"), 0644)) + require.NoError(t, os.WriteFile(filepath.Join(monData, "mon_L3_00", "llc_occupancy"), []byte("12345\n"), 0644)) + require.NoError(t, os.WriteFile(filepath.Join(monData, "mon_L3_00", "mbm_total_bytes"), []byte("67890\n"), 0644)) + + result, err := r.readMonData(monGroupDir) + require.NoError(t, err) + + assert.Equal(t, 42.123456, result["mon_PERF_PKG_00"]["core_energy"]) + assert.Equal(t, 0.0, result["mon_PERF_PKG_01"]["core_energy"]) + assert.Equal(t, 12345.0, result["mon_L3_00"]["llc_occupancy"]) + assert.Equal(t, 67890.0, result["mon_L3_00"]["mbm_total_bytes"]) +} + +func TestReadMonData_NoMonData(t *testing.T) { + tmpDir := t.TempDir() + r := newResctrlOps(tmpDir) + + // mon_group with no mon_data directory. + podUID := "a1b2c3d4-e5f6-7890-abcd-ef1234567890" + monGroupDir, err := r.createMonGroup("", podUID) + require.NoError(t, err) + + _, err = r.readMonData(monGroupDir) + assert.Error(t, err) +} diff --git a/cmd/plugins/resctrl-mon/snapshot.go b/cmd/plugins/resctrl-mon/snapshot.go new file mode 100644 index 000000000..a6ca93dde --- /dev/null +++ b/cmd/plugins/resctrl-mon/snapshot.go @@ -0,0 +1,201 @@ +// Copyright The NRI Plugins Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "fmt" + "os" + "path/filepath" + "regexp" + "strconv" + "strings" + "time" +) + +// snapshotUIDRe validates that a pod UID is safe for use as a directory name. +var snapshotUIDRe = regexp.MustCompile(`^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$`) + +// snapshotStore manages a directory tree that mirrors the resctrl mon_data +// layout, augmenting it with .begin/.end counter files and symlinks to the +// live kernel counters. +// +// Layout for each pod: +// +//