Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
- Reduced binary size (https://github.com/authzed/spicedb/pull/3005)

### Fixed
- Re-enable async logging with proper shutdown flushing for improved write performance (https://github.com/authzed/spicedb/pull/2779)
- On a Postgres setup with read replicas, some requests may silently swallow errors of sort "revision not found in replica" (https://github.com/authzed/spicedb/pull/2979)
- Use cgroup-aware memory detection for cache and watch buffer sizing in containerized environments (https://github.com/authzed/spicedb/pull/3000)
- Upgraded the spanner client, which changed the internal implementation to not use a session pool. This means that the `--datastore-spanner-max-sessions` and `--datastore-spanner-min-sessions` flags are now deprecated and no-op. We also strongly recommend using [Application Default Credentials](https://docs.cloud.google.com/docs/authentication/client-libraries#adc) in favor of a credentials file. (https://github.com/authzed/spicedb/pull/3038)
Expand Down
19 changes: 16 additions & 3 deletions cmd/spicedb/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import (
"errors"
"fmt"
"os"
"time"

Expand All @@ -19,12 +20,21 @@
)

func main() {
os.Exit(run())

Check warning on line 23 in cmd/spicedb/main.go

View check run for this annotation

Codecov / codecov/patch

cmd/spicedb/main.go#L23

Added line #L23 was not covered by tests
}

func run() int {

Check warning on line 26 in cmd/spicedb/main.go

View check run for this annotation

Codecov / codecov/patch

cmd/spicedb/main.go#L26

Added line #L26 was not covered by tests
memoryprotection.InitDefaultMemoryUsageProvider()

// Set up root logger
// This will typically be overwritten by the logging setup for a given command.
zerolog.TimeFieldFormat = time.RFC3339Nano
log.SetGlobalLogger(zerolog.New(os.Stderr).Level(zerolog.InfoLevel))
defer func() {
if err := log.Close(); err != nil {
fmt.Fprintf(os.Stderr, "warning: failed to flush logs: %v\n", err)
}

Check warning on line 36 in cmd/spicedb/main.go

View check run for this annotation

Codecov / codecov/patch

cmd/spicedb/main.go#L33-L36

Added lines #L33 - L36 were not covered by tests
}()

// Enable Kubernetes gRPC resolver
kuberesolver.RegisterInCluster()
Expand All @@ -35,7 +45,8 @@
// Build the complete command structure
rootCmd, err := cmd.BuildRootCommand()
if err != nil {
log.Fatal().Err(err).Msg("failed to build root command")
log.Error().Err(err).Msg("failed to build root command")
return 1

Check warning on line 49 in cmd/spicedb/main.go

View check run for this annotation

Codecov / codecov/patch

cmd/spicedb/main.go#L48-L49

Added lines #L48 - L49 were not covered by tests
}

if err := rootCmd.Execute(); err != nil {
Expand All @@ -44,8 +55,10 @@
}
var termErr spiceerrors.TerminationError
if errors.As(err, &termErr) {
os.Exit(termErr.ExitCode())
return termErr.ExitCode()

Check warning on line 58 in cmd/spicedb/main.go

View check run for this annotation

Codecov / codecov/patch

cmd/spicedb/main.go#L58

Added line #L58 was not covered by tests
}
os.Exit(1)
return 1

Check warning on line 60 in cmd/spicedb/main.go

View check run for this annotation

Codecov / codecov/patch

cmd/spicedb/main.go#L60

Added line #L60 was not covered by tests
}

return 0

Check warning on line 63 in cmd/spicedb/main.go

View check run for this annotation

Codecov / codecov/patch

cmd/spicedb/main.go#L63

Added line #L63 was not covered by tests
}
143 changes: 143 additions & 0 deletions internal/logging/logger.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,127 @@

import (
"context"
"fmt"
"io"
"os"
"sync"
"time"

"github.com/rs/zerolog"
)

// defaultCloseTimeout is the maximum time to wait for log flushing during shutdown.
// If the underlying writer blocks longer than this, Close() will return with an error
// rather than hanging indefinitely.
const defaultCloseTimeout = 5 * time.Second

var Logger zerolog.Logger

type closerHolder struct {
mu sync.Mutex
closer io.Closer // GUARDED_BY(mu)
// closed is permanent once set to true. This prevents double-close bugs where
// multiple shutdown paths might try to close the same closer. Once closed,
// any new closer passed to Set() is immediately closed to prevent resource leaks.
closed bool // GUARDED_BY(mu)
}

func (ch *closerHolder) Set(closer io.Closer) {
var toClose io.Closer

ch.mu.Lock()
if ch.closed {
toClose = closer
ch.mu.Unlock()
if toClose != nil {
_ = toClose.Close()
}
return
}

// Don't close if the same closer is being set again
if ch.closer != closer {
toClose = ch.closer
}
ch.closer = closer
ch.mu.Unlock()

if toClose != nil {
_ = toClose.Close()
}
}

func (ch *closerHolder) Close() error {
return ch.CloseWithTimeout(defaultCloseTimeout)
}

// CloseWithTimeout closes the held closer with a timeout to prevent shutdown hangs.
// If the closer doesn't complete within the timeout, an error is returned but the
// close operation may still complete in the background.
//
// NOTE: On timeout, the goroutine performing the close continues running until the
// underlying Close() completes. This is intentional for shutdown scenarios where we
// prefer to return promptly rather than block indefinitely. The goroutine will not
// leak permanently—it will terminate when Close() eventually returns (or when the
// process exits).
func (ch *closerHolder) CloseWithTimeout(timeout time.Duration) error {
var toClose io.Closer

ch.mu.Lock()
if ch.closed {
ch.mu.Unlock()
return nil
}

ch.closed = true
toClose = ch.closer
ch.closer = nil
ch.mu.Unlock()

if toClose == nil {
return nil
}

// For zero or negative timeout, close synchronously
if timeout <= 0 {
err := toClose.Close()
if err != nil {
fmt.Fprintf(os.Stderr, "warning: failed to flush logs: %v\n", err)
}

Check warning on line 91 in internal/logging/logger.go

View check run for this annotation

Codecov / codecov/patch

internal/logging/logger.go#L90-L91

Added lines #L90 - L91 were not covered by tests
return err
}

// Use a channel to wait for close with timeout
done := make(chan error, 1)
go func() {
done <- toClose.Close()
}()

select {
case err := <-done:
if err != nil {
// Write directly to stderr since the logger may not be functional
fmt.Fprintf(os.Stderr, "warning: failed to flush logs: %v\n", err)
}
return err
case <-time.After(timeout):
err := fmt.Errorf("timeout after %v waiting for log flush", timeout)
fmt.Fprintf(os.Stderr, "warning: %v\n", err)
return err
}
}

// reset clears the closer state without closing the current closer.
// This is only for test use to restore state between test cases.
func (ch *closerHolder) reset() {
ch.mu.Lock()
ch.closer = nil
ch.closed = false
ch.mu.Unlock()
}

var globalCloser closerHolder

func init() {
SetGlobalLogger(zerolog.Nop())
}
Expand All @@ -17,6 +132,34 @@
zerolog.DefaultContextLogger = &Logger
}

func SetGlobalLoggerWithCloser(logger zerolog.Logger, closer io.Closer) {
// Store the closer BEFORE activating the logger to avoid a race window where
// the logger is active but Close() wouldn't flush it (if called between the two operations).
globalCloser.Set(closer)
SetGlobalLogger(logger)
}

// Close flushes and releases resources owned by the globally configured logger.
// It is safe to call multiple times.
func Close() error { return globalCloser.Close() }

// ResetCloserForTesting resets the global closer state.
//
// WARNING: This is for TEST USE ONLY. Production code must NEVER call this function.
// Calling this in production will silently discard any pending log closer, potentially
// losing buffered log messages.
//
// This function exists because Close() is designed to be idempotent and final (sets
// closed=true permanently), but tests need to reset state between test cases to ensure
// isolation. Without this, a test that calls Close() would cause subsequent tests'
// SetGlobalLoggerWithCloser() calls to immediately close their new closers.
//
// Go's package visibility constraints prevent test files in other packages from
// accessing the unexported globalCloser directly, necessitating this exported helper.
func ResetCloserForTesting() {
globalCloser.reset()
}

func With() zerolog.Context { return Logger.With() }

func Err(err error) *zerolog.Event { return Logger.Err(err) }
Expand Down
Loading
Loading