skyhook-io · nadaverell · May 2, 2026 · Apr 26, 2026 · Apr 26, 2026 · May 2, 2026
@@ -65,6 +65,36 @@ var deferredResources = map[string]bool{
 	"limitranges":              true, // audit inheritance lookups, not first-render
 }
 
+// minimalFirstPaintSet is the subset of critical informers the home
+// dashboard needs to feel coherent. Pods are included despite being
+// typically the largest kind — without pods the topology graph and
+// resource counts are empty. The patience window absorbs pod-sync
+// latency on healthy clusters; on slow ones, the user sees a working
+// home view sooner with a "still loading" hint for the rest.
+var minimalFirstPaintSet = map[string]bool{
+	"pods":        true,
+	"namespaces":  true,
+	"nodes":       true,
+	"services":    true,
+	"deployments": true,
+}
+
+// firstPaintPatience is how long we wait for ALL critical informers before
+// falling back to the minimal set. On most clusters the full critical set
+// syncs well inside this window, so first paint is complete and there is
+// no progressive fill-in. Slow clusters fall through to the minimal-set
+// gate and render with whatever is ready then.
+const firstPaintPatience = 8 * time.Second
+
+// firstPaintBackstop is the hard upper bound on the critical-sync wait.
+// If the minimal set still hasn't synced after this long, give up and
+// render with whatever's available — the user gets the same partial-data
+// experience they'd see today (zeros + "Still loading: …" hint) instead
+// of being trapped on the connecting screen indefinitely. Picked to be
+// much longer than a healthy cluster's sync time but short enough that
+// a permanently-throttled API server doesn't make Radar feel broken.
+const firstPaintBackstop = 5 * time.Minute
+
 // ResourceChange is a type alias for the canonical definition in pkg/k8score.
 type ResourceChange = k8score.ResourceChange
 
@@ -139,7 +169,10 @@ func InitResourceCache(ctx context.Context) error {
 			Namespace:           permResult.Namespace,
 			DebugEvents:         DebugEvents,
 			TimingLogger:        logTiming,
-			SyncTimeout:         60 * time.Second,
+			PatienceWindow:      firstPaintPatience,
+			MinimalSet:          minimalFirstPaintSet,
+			SyncTimeout:         firstPaintBackstop,
+			SyncProgress:        emitSyncProgress,
 			DeferredSyncTimeout: 3 * time.Minute,
 
 			OnReceived: func(kind string) {
@@ -264,6 +297,33 @@ func recordK8sEventToTimeline(obj any) {
 	}
 }
 
+// emitSyncProgress is the SyncProgress callback wired into the resource
+// cache. It keeps the connection's progressMessage in step with the live
+// informer-sync count so the connecting screen ticks up instead of
+// sitting on a static message during a 30–60s sync. Once the cache
+// returns and connection state flips to "connected", further progress
+// lives in the home dashboard's deferred-loading indicator.
+func emitSyncProgress(synced, total int, minimalReady bool) {
+	if total == 0 {
+		return
+	}
+	// Only update while we're still in the connecting phase. Once
+	// connected, the connecting screen is gone and the message is moot.
+	if GetConnectionStatus().State != StateConnecting {
+		return
+	}
+	var msg string
+	switch {
+	case synced == total:
+		msg = "Finalizing…"
+	case minimalReady:
+		msg = fmt.Sprintf("Loading cluster data… %d of %d ready (showing partial)", synced, total)
+	default:
+		msg = fmt.Sprintf("Loading cluster data… %d of %d ready", synced, total)
+	}
+	UpdateConnectionProgress(msg)
+}
+
 // isNoisyResource returns true if this resource generates constant updates that aren't interesting
 func isNoisyResource(kind, name, op string) bool {
 	if op != "update" {

@@ -249,10 +249,12 @@ func (s *Server) handleDashboard(w http.ResponseWriter, r *http.Request) {
 	// may be incomplete because deferred informers are still syncing.
 	resp.DeferredLoading = !cache.IsDeferredSynced()
 
-	// If critical sync timed out, tell the frontend which resource kinds
-	// may be missing so it can show a banner.
-	if promoted := cache.PromotedKinds(); len(promoted) > 0 && resp.DeferredLoading {
-		resp.PartialData = promoted
+	// If critical informers were promoted at first paint, tell the
+	// frontend which kinds are STILL loading (live-filtered, not the
+	// snapshot from connect time) so the banner doesn't list kinds that
+	// have since populated.
+	if pending := cache.PendingPromotedKinds(); len(pending) > 0 {
+		resp.PartialData = pending
 	}
 
 	// --- Slow network calls: run in parallel ---

@@ -405,12 +405,16 @@ func (b *SSEBroadcaster) watchResourceChanges() {
 	}
 
 	// Debounce strategy:
-	// - During warmup (initial sync + CRD discovery): 3s to avoid constant
-	//   topology rebuilds from dynamic informer syncs. The UI shows a connecting
-	//   spinner anyway, so the delay is invisible.
+	// - During warmup: critical informers that didn't make the patience
+	//   window (e.g. ingresses, jobs, replicasets) plus deferred informers
+	//   plus dynamic CRD informers all stream in over the next 10–60s. We
+	//   want the topology graph to settle in a few coherent paints, not
+	//   jump on every arrival, so we coalesce into 5s windows. The UI is
+	//   already on the home view by this point with a "loading more" hint;
+	//   the slight delay is preferable to a fidgety graph.
 	// - After warmup: re-evaluate based on cluster size. Large clusters (>5000
 	//   resources) use 5s; smaller clusters use 500ms.
-	const warmupDebounce = 3 * time.Second
+	const warmupDebounce = 5 * time.Second
 	debounceDuration := warmupDebounce
 	b.watchMu.Lock()
 	warmupCh := b.warmupDone // local copy under lock; nil-ed after firing to avoid closed-channel spin