Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 61 additions & 1 deletion internal/k8s/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,36 @@ var deferredResources = map[string]bool{
"limitranges": true, // audit inheritance lookups, not first-render
}

// minimalFirstPaintSet is the subset of critical informers the home
// dashboard needs to feel coherent. Pods are included despite being
// typically the largest kind — without pods the topology graph and
// resource counts are empty. The patience window absorbs pod-sync
// latency on healthy clusters; on slow ones, the user sees a working
// home view sooner with a "still loading" hint for the rest.
var minimalFirstPaintSet = map[string]bool{
"pods": true,
"namespaces": true,
"nodes": true,
"services": true,
"deployments": true,
}

// firstPaintPatience is how long we wait for ALL critical informers before
// falling back to the minimal set. On most clusters the full critical set
// syncs well inside this window, so first paint is complete and there is
// no progressive fill-in. Slow clusters fall through to the minimal-set
// gate and render with whatever is ready then.
const firstPaintPatience = 8 * time.Second

// firstPaintBackstop is the hard upper bound on the critical-sync wait.
// If the minimal set still hasn't synced after this long, give up and
// render with whatever's available — the user gets the same partial-data
// experience they'd see today (zeros + "Still loading: …" hint) instead
// of being trapped on the connecting screen indefinitely. Picked to be
// much longer than a healthy cluster's sync time but short enough that
// a permanently-throttled API server doesn't make Radar feel broken.
const firstPaintBackstop = 5 * time.Minute

// ResourceChange is a type alias for the canonical definition in pkg/k8score.
type ResourceChange = k8score.ResourceChange

Expand Down Expand Up @@ -139,7 +169,10 @@ func InitResourceCache(ctx context.Context) error {
Namespace: permResult.Namespace,
DebugEvents: DebugEvents,
TimingLogger: logTiming,
SyncTimeout: 60 * time.Second,
PatienceWindow: firstPaintPatience,
MinimalSet: minimalFirstPaintSet,
SyncTimeout: firstPaintBackstop,
SyncProgress: emitSyncProgress,
DeferredSyncTimeout: 3 * time.Minute,

OnReceived: func(kind string) {
Expand Down Expand Up @@ -264,6 +297,33 @@ func recordK8sEventToTimeline(obj any) {
}
}

// emitSyncProgress is the SyncProgress callback wired into the resource
// cache. It keeps the connection's progressMessage in step with the live
// informer-sync count so the connecting screen ticks up instead of
// sitting on a static message during a 30–60s sync. Once the cache
// returns and connection state flips to "connected", further progress
// lives in the home dashboard's deferred-loading indicator.
func emitSyncProgress(synced, total int, minimalReady bool) {
if total == 0 {
return
}
// Only update while we're still in the connecting phase. Once
// connected, the connecting screen is gone and the message is moot.
if GetConnectionStatus().State != StateConnecting {
return
}
var msg string
switch {
case synced == total:
msg = "Finalizing…"
case minimalReady:
msg = fmt.Sprintf("Loading cluster data… %d of %d ready (showing partial)", synced, total)
default:
msg = fmt.Sprintf("Loading cluster data… %d of %d ready", synced, total)
}
UpdateConnectionProgress(msg)
}

// isNoisyResource returns true if this resource generates constant updates that aren't interesting
func isNoisyResource(kind, name, op string) bool {
if op != "update" {
Expand Down
10 changes: 6 additions & 4 deletions internal/server/dashboard.go
Original file line number Diff line number Diff line change
Expand Up @@ -249,10 +249,12 @@ func (s *Server) handleDashboard(w http.ResponseWriter, r *http.Request) {
// may be incomplete because deferred informers are still syncing.
resp.DeferredLoading = !cache.IsDeferredSynced()

// If critical sync timed out, tell the frontend which resource kinds
// may be missing so it can show a banner.
if promoted := cache.PromotedKinds(); len(promoted) > 0 && resp.DeferredLoading {
resp.PartialData = promoted
// If critical informers were promoted at first paint, tell the
// frontend which kinds are STILL loading (live-filtered, not the
// snapshot from connect time) so the banner doesn't list kinds that
// have since populated.
if pending := cache.PendingPromotedKinds(); len(pending) > 0 {
resp.PartialData = pending
}

// --- Slow network calls: run in parallel ---
Expand Down
12 changes: 8 additions & 4 deletions internal/server/sse.go
Original file line number Diff line number Diff line change
Expand Up @@ -405,12 +405,16 @@ func (b *SSEBroadcaster) watchResourceChanges() {
}

// Debounce strategy:
// - During warmup (initial sync + CRD discovery): 3s to avoid constant
// topology rebuilds from dynamic informer syncs. The UI shows a connecting
// spinner anyway, so the delay is invisible.
// - During warmup: critical informers that didn't make the patience
// window (e.g. ingresses, jobs, replicasets) plus deferred informers
// plus dynamic CRD informers all stream in over the next 10–60s. We
// want the topology graph to settle in a few coherent paints, not
// jump on every arrival, so we coalesce into 5s windows. The UI is
// already on the home view by this point with a "loading more" hint;
// the slight delay is preferable to a fidgety graph.
// - After warmup: re-evaluate based on cluster size. Large clusters (>5000
// resources) use 5s; smaller clusters use 500ms.
const warmupDebounce = 3 * time.Second
const warmupDebounce = 5 * time.Second
debounceDuration := warmupDebounce
b.watchMu.Lock()
warmupCh := b.warmupDone // local copy under lock; nil-ed after firing to avoid closed-channel spin
Expand Down
Loading
Loading