authzed · ivanauth · Dec 17, 2025 · Mar 31, 2026 · Apr 7, 2026
@@ -14,6 +14,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 - Reduced binary size (https://github.com/authzed/spicedb/pull/3005)
 
 ### Fixed
+- Re-enable async logging with proper shutdown flushing for improved write performance (https://github.com/authzed/spicedb/pull/2779)
 - On a Postgres setup with read replicas, some requests may silently swallow errors of sort "revision not found in replica" (https://github.com/authzed/spicedb/pull/2979)
 - Use cgroup-aware memory detection for cache and watch buffer sizing in containerized environments (https://github.com/authzed/spicedb/pull/3000)
 - Upgraded the spanner client, which changed the internal implementation to not use a session pool. This means that the `--datastore-spanner-max-sessions` and `--datastore-spanner-min-sessions` flags are now deprecated and no-op. We also strongly recommend using [Application Default Credentials](https://docs.cloud.google.com/docs/authentication/client-libraries#adc) in favor of a credentials file. (https://github.com/authzed/spicedb/pull/3038)

@@ -2,6 +2,7 @@
 
 import (
 	"errors"
+	"fmt"
 	"os"
 	"time"
 
@@ -19,12 +20,21 @@
 )
 
 func main() {
+	os.Exit(run())
+}
+
+func run() int {
 	memoryprotection.InitDefaultMemoryUsageProvider()
 
 	// Set up root logger
 	// This will typically be overwritten by the logging setup for a given command.
 	zerolog.TimeFieldFormat = time.RFC3339Nano
 	log.SetGlobalLogger(zerolog.New(os.Stderr).Level(zerolog.InfoLevel))
+	defer func() {
+		if err := log.Close(); err != nil {
+			fmt.Fprintf(os.Stderr, "warning: failed to flush logs: %v\n", err)
+		}
+	}()
 
 	// Enable Kubernetes gRPC resolver
 	kuberesolver.RegisterInCluster()
@@ -35,7 +45,8 @@
 	// Build the complete command structure
 	rootCmd, err := cmd.BuildRootCommand()
 	if err != nil {
-		log.Fatal().Err(err).Msg("failed to build root command")
+		log.Error().Err(err).Msg("failed to build root command")
+		return 1
 	}
 
 	if err := rootCmd.Execute(); err != nil {
@@ -44,8 +55,10 @@
 		}
 		var termErr spiceerrors.TerminationError
 		if errors.As(err, &termErr) {
-			os.Exit(termErr.ExitCode())
+			return termErr.ExitCode()
 		}
-		os.Exit(1)
+		return 1
 	}
+
+	return 0
 }
@@ -2,12 +2,127 @@
 
 import (
 	"context"
+	"fmt"
+	"io"
+	"os"
+	"sync"
+	"time"
 
 	"github.com/rs/zerolog"
 )
 
+// defaultCloseTimeout is the maximum time to wait for log flushing during shutdown.
+// If the underlying writer blocks longer than this, Close() will return with an error
+// rather than hanging indefinitely.
+const defaultCloseTimeout = 5 * time.Second
+
 var Logger zerolog.Logger
 
+type closerHolder struct {
+	mu     sync.Mutex
+	closer io.Closer // GUARDED_BY(mu)
+	// closed is permanent once set to true. This prevents double-close bugs where
+	// multiple shutdown paths might try to close the same closer. Once closed,
+	// any new closer passed to Set() is immediately closed to prevent resource leaks.
+	closed bool // GUARDED_BY(mu)
+}
+
+func (ch *closerHolder) Set(closer io.Closer) {
+	var toClose io.Closer
+
+	ch.mu.Lock()
+	if ch.closed {
+		toClose = closer
+		ch.mu.Unlock()
+		if toClose != nil {
+			_ = toClose.Close()
+		}
+		return
+	}
+
+	// Don't close if the same closer is being set again
+	if ch.closer != closer {
+		toClose = ch.closer
+	}
+	ch.closer = closer
+	ch.mu.Unlock()
+
+	if toClose != nil {
+		_ = toClose.Close()
+	}
+}
+
+func (ch *closerHolder) Close() error {
+	return ch.CloseWithTimeout(defaultCloseTimeout)
+}
+
+// CloseWithTimeout closes the held closer with a timeout to prevent shutdown hangs.
+// If the closer doesn't complete within the timeout, an error is returned but the
+// close operation may still complete in the background.
+//
+// NOTE: On timeout, the goroutine performing the close continues running until the
+// underlying Close() completes. This is intentional for shutdown scenarios where we
+// prefer to return promptly rather than block indefinitely. The goroutine will not
+// leak permanently—it will terminate when Close() eventually returns (or when the
+// process exits).
+func (ch *closerHolder) CloseWithTimeout(timeout time.Duration) error {
+	var toClose io.Closer
+
+	ch.mu.Lock()
+	if ch.closed {
+		ch.mu.Unlock()
+		return nil
+	}
+
+	ch.closed = true
+	toClose = ch.closer
+	ch.closer = nil
+	ch.mu.Unlock()
+
+	if toClose == nil {
+		return nil
+	}
+
+	// For zero or negative timeout, close synchronously
+	if timeout <= 0 {
+		err := toClose.Close()
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "warning: failed to flush logs: %v\n", err)
+		}
+		return err
+	}
+
+	// Use a channel to wait for close with timeout
+	done := make(chan error, 1)
+	go func() {
+		done <- toClose.Close()
+	}()
+
+	select {
+	case err := <-done:
+		if err != nil {
+			// Write directly to stderr since the logger may not be functional
+			fmt.Fprintf(os.Stderr, "warning: failed to flush logs: %v\n", err)
+		}
+		return err
+	case <-time.After(timeout):
+		err := fmt.Errorf("timeout after %v waiting for log flush", timeout)
+		fmt.Fprintf(os.Stderr, "warning: %v\n", err)
+		return err
+	}
+}
+
+// reset clears the closer state without closing the current closer.
+// This is only for test use to restore state between test cases.
+func (ch *closerHolder) reset() {
+	ch.mu.Lock()
+	ch.closer = nil
+	ch.closed = false
+	ch.mu.Unlock()
+}
+
+var globalCloser closerHolder
+
 func init() {
 	SetGlobalLogger(zerolog.Nop())
 }
@@ -17,6 +132,34 @@
 	zerolog.DefaultContextLogger = &Logger
 }
 
+func SetGlobalLoggerWithCloser(logger zerolog.Logger, closer io.Closer) {
+	// Store the closer BEFORE activating the logger to avoid a race window where
+	// the logger is active but Close() wouldn't flush it (if called between the two operations).
+	globalCloser.Set(closer)
+	SetGlobalLogger(logger)
+}
+
+// Close flushes and releases resources owned by the globally configured logger.
+// It is safe to call multiple times.
+func Close() error { return globalCloser.Close() }
+
+// ResetCloserForTesting resets the global closer state.
+//
+// WARNING: This is for TEST USE ONLY. Production code must NEVER call this function.
+// Calling this in production will silently discard any pending log closer, potentially
+// losing buffered log messages.
+//
+// This function exists because Close() is designed to be idempotent and final (sets
+// closed=true permanently), but tests need to reset state between test cases to ensure
+// isolation. Without this, a test that calls Close() would cause subsequent tests'
+// SetGlobalLoggerWithCloser() calls to immediately close their new closers.
+//
+// Go's package visibility constraints prevent test files in other packages from
+// accessing the unexported globalCloser directly, necessitating this exported helper.
+func ResetCloserForTesting() {
+	globalCloser.reset()
+}
+
 func With() zerolog.Context { return Logger.With() }
 
 func Err(err error) *zerolog.Event { return Logger.Err(err) }