diff --git a/infra/feast-operator/cmd/main.go b/infra/feast-operator/cmd/main.go index ead6e93ce72..0e5565cce2b 100644 --- a/infra/feast-operator/cmd/main.go +++ b/infra/feast-operator/cmd/main.go @@ -49,6 +49,7 @@ import ( routev1 "github.com/openshift/api/route/v1" "github.com/feast-dev/feast/infra/feast-operator/internal/controller" + feastmetrics "github.com/feast-dev/feast/infra/feast-operator/internal/controller/metrics" "github.com/feast-dev/feast/infra/feast-operator/internal/controller/services" // +kubebuilder:scaffold:imports ) @@ -95,6 +96,7 @@ func main() { var probeAddr string var secureMetrics bool var enableHTTP2 bool + var featureStoreMetrics bool var tlsOpts []func(*tls.Config) flag.StringVar(&metricsAddr, "metrics-bind-address", "0", "The address the metrics endpoint binds to. "+ "Use :8443 for HTTPS or :8080 for HTTP, or leave as 0 to disable the metrics service.") @@ -106,6 +108,9 @@ func main() { "If set, the metrics endpoint is served securely via HTTPS. Use --metrics-secure=false to use HTTP instead.") flag.BoolVar(&enableHTTP2, "enable-http2", false, "If set, HTTP/2 will be enabled for the metrics and webhook servers") + flag.BoolVar(&featureStoreMetrics, "feature-store-metrics", true, + "Enable Prometheus gauges exposing online/offline store and registry configuration per FeatureStore. "+ + "Disable with --feature-store-metrics=false.") opts := zap.Options{ Development: true, } @@ -206,9 +211,19 @@ func main() { services.SetIsOpenShift(mgr.GetConfig()) + var fsMetrics *feastmetrics.FeatureStoreMetrics + if featureStoreMetrics { + fsMetrics = feastmetrics.NewFeatureStoreMetrics() + fsMetrics.Register() + setupLog.Info("FeatureStore installation metrics enabled") + } else { + setupLog.Info("FeatureStore installation metrics disabled (--feature-store-metrics=false)") + } + if err = (&controller.FeatureStoreReconciler{ - Client: mgr.GetClient(), - Scheme: mgr.GetScheme(), + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + Metrics: fsMetrics, }).SetupWithManager(mgr); err != nil { setupLog.Error(err, "unable to create controller", "controller", "FeatureStore") os.Exit(1) diff --git a/infra/feast-operator/go.mod b/infra/feast-operator/go.mod index 72bdf42b6a2..a7dca2e3bc7 100644 --- a/infra/feast-operator/go.mod +++ b/infra/feast-operator/go.mod @@ -15,6 +15,7 @@ require ( require ( github.com/prometheus-operator/prometheus-operator/pkg/client v0.83.0 + github.com/prometheus/client_golang v1.22.0 k8s.io/utils v0.0.0-20250502105355-0f33e8f1c979 ) @@ -55,7 +56,6 @@ require ( github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.83.0 // indirect - github.com/prometheus/client_golang v1.22.0 // indirect github.com/prometheus/client_model v0.6.1 // indirect github.com/prometheus/common v0.62.0 // indirect github.com/prometheus/procfs v0.15.1 // indirect diff --git a/infra/feast-operator/internal/controller/featurestore_controller.go b/infra/feast-operator/internal/controller/featurestore_controller.go index 32c8405ec2b..ae877447ddb 100644 --- a/infra/feast-operator/internal/controller/featurestore_controller.go +++ b/infra/feast-operator/internal/controller/featurestore_controller.go @@ -43,6 +43,7 @@ import ( feastdevv1 "github.com/feast-dev/feast/infra/feast-operator/api/v1" "github.com/feast-dev/feast/infra/feast-operator/internal/controller/authz" feasthandler "github.com/feast-dev/feast/infra/feast-operator/internal/controller/handler" + feastmetrics "github.com/feast-dev/feast/infra/feast-operator/internal/controller/metrics" "github.com/feast-dev/feast/infra/feast-operator/internal/controller/services" routev1 "github.com/openshift/api/route/v1" ) @@ -55,7 +56,8 @@ const ( // FeatureStoreReconciler reconciles a FeatureStore object type FeatureStoreReconciler struct { client.Client - Scheme *runtime.Scheme + Scheme *runtime.Scheme + Metrics *feastmetrics.FeatureStoreMetrics } // +kubebuilder:rbac:groups=feast.dev,resources=featurestores,verbs=get;list;watch;create;update;patch;delete @@ -87,6 +89,9 @@ func (r *FeatureStoreReconciler) Reconcile(ctx context.Context, req ctrl.Request if apierrors.IsNotFound(err) { // CR deleted since request queued, child objects getting GC'd, no requeue logger.V(1).Info("FeatureStore CR not found, has been deleted") + if r.Metrics != nil { + r.Metrics.DeleteFeatureStore(req.NamespacedName.Namespace, req.NamespacedName.Name) + } // Clean up namespace registry entry even if the CR is not found if err := r.cleanupNamespaceRegistry(ctx, &feastdevv1.FeatureStore{ ObjectMeta: metav1.ObjectMeta{ @@ -107,6 +112,9 @@ func (r *FeatureStoreReconciler) Reconcile(ctx context.Context, req ctrl.Request // Handle deletion - clean up namespace registry entry if cr.DeletionTimestamp != nil { logger.Info("FeatureStore is being deleted, cleaning up namespace registry entry") + if r.Metrics != nil { + r.Metrics.DeleteFeatureStore(cr.Namespace, cr.Name) + } if err := r.cleanupNamespaceRegistry(ctx, cr); err != nil { logger.Error(err, "Failed to clean up namespace registry entry") return ctrl.Result{}, err @@ -115,6 +123,9 @@ func (r *FeatureStoreReconciler) Reconcile(ctx context.Context, req ctrl.Request } result, recErr = r.deployFeast(ctx, cr) + if recErr == nil && r.Metrics != nil { + r.Metrics.RecordFeatureStore(cr) + } if cr.DeletionTimestamp == nil && !reflect.DeepEqual(currentStatus, cr.Status) { if err = r.Client.Status().Update(ctx, cr); err != nil { if apierrors.IsConflict(err) { diff --git a/infra/feast-operator/internal/controller/metrics/metrics.go b/infra/feast-operator/internal/controller/metrics/metrics.go new file mode 100644 index 00000000000..c85d894c4ce --- /dev/null +++ b/infra/feast-operator/internal/controller/metrics/metrics.go @@ -0,0 +1,133 @@ +/* +Copyright 2026 Feast Community. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package metrics provides a Prometheus info gauge that records the store +// types configured for each FeatureStore CR (online store, offline store, +// registry). These operator-level metrics are distinct from the Feast +// feature-server application metrics (feast_feature_server_*) and are useful +// for usage telemetry and assessing the impact of removing store type support. +package metrics + +import ( + feastdevv1 "github.com/feast-dev/feast/infra/feast-operator/api/v1" + "github.com/prometheus/client_golang/prometheus" + ctrlmetrics "sigs.k8s.io/controller-runtime/pkg/metrics" +) + +const typeNone = "none" + +// FeatureStoreMetrics holds the Prometheus GaugeVec for feast-operator +// installation telemetry. +type FeatureStoreMetrics struct { + FeatureStoreInfo *prometheus.GaugeVec +} + +// NewFeatureStoreMetrics creates a new FeatureStoreMetrics with the GaugeVec +// initialised but not yet registered. Call Register() before starting the manager. +func NewFeatureStoreMetrics() *FeatureStoreMetrics { + return &FeatureStoreMetrics{ + FeatureStoreInfo: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "feast_operator_feature_store_info", + Help: "Information about a deployed FeatureStore. " + + "Value is always 1. Labels carry the configured store types: " + + "'online_store_type', 'offline_store_type', and 'registry_type' " + + "are set to the persistence type (e.g. redis, snowflake.offline, local) " + + "or 'none' when that component is not configured.", + }, + []string{"namespace", "name", "online_store_type", "offline_store_type", "registry_type"}, + ), + } +} + +// Register registers the metric with the controller-runtime metrics registry +// so it is exposed on the manager's /metrics endpoint. +func (m *FeatureStoreMetrics) Register() { + ctrlmetrics.Registry.MustRegister(m.FeatureStoreInfo) +} + +// RecordFeatureStore updates the gauge for the given FeatureStore using the +// applied configuration stored in status.Applied (which has operator defaults +// applied). The previous label set for this FeatureStore is deleted first so +// that store type changes are reflected cleanly on the next scrape. +func (m *FeatureStoreMetrics) RecordFeatureStore(fs *feastdevv1.FeatureStore) { + svcs := fs.Status.Applied.Services + m.FeatureStoreInfo.DeletePartialMatch(prometheus.Labels{ + "namespace": fs.Namespace, + "name": fs.Name, + }) + m.FeatureStoreInfo.WithLabelValues( + fs.Namespace, + fs.Name, + onlineStoreType(svcs), + offlineStoreType(svcs), + registryType(svcs), + ).Set(1) +} + +// DeleteFeatureStore removes the metric label set for the given FeatureStore. +// Safe to call when the CR has already been deleted from the API server. +func (m *FeatureStoreMetrics) DeleteFeatureStore(namespace, name string) { + m.FeatureStoreInfo.DeletePartialMatch(prometheus.Labels{ + "namespace": namespace, + "name": name, + }) +} + +// onlineStoreType returns the online store persistence type or "none". +func onlineStoreType(svcs *feastdevv1.FeatureStoreServices) string { + if svcs == nil || svcs.OnlineStore == nil { + return typeNone + } + if p := svcs.OnlineStore.Persistence; p != nil && p.DBPersistence != nil { + return p.DBPersistence.Type + } + return "file" +} + +// offlineStoreType returns the offline store persistence type or "none". +func offlineStoreType(svcs *feastdevv1.FeatureStoreServices) string { + if svcs == nil || svcs.OfflineStore == nil { + return typeNone + } + if p := svcs.OfflineStore.Persistence; p != nil { + if p.DBPersistence != nil { + return p.DBPersistence.Type + } + if p.FilePersistence != nil && p.FilePersistence.Type != "" { + return p.FilePersistence.Type + } + } + return "file" +} + +// registryType returns "local", "remote", "remote_feastref", or "none". +func registryType(svcs *feastdevv1.FeatureStoreServices) string { + if svcs == nil || svcs.Registry == nil { + return typeNone + } + switch { + case svcs.Registry.Local != nil: + return "local" + case svcs.Registry.Remote != nil: + if svcs.Registry.Remote.FeastRef != nil { + return "remote_feastref" + } + return "remote" + default: + return typeNone + } +} diff --git a/infra/feast-operator/internal/controller/metrics/metrics_test.go b/infra/feast-operator/internal/controller/metrics/metrics_test.go new file mode 100644 index 00000000000..480a861560d --- /dev/null +++ b/infra/feast-operator/internal/controller/metrics/metrics_test.go @@ -0,0 +1,275 @@ +/* +Copyright 2026 Feast Community. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics_test + +import ( + "testing" + + "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + feastdevv1 "github.com/feast-dev/feast/infra/feast-operator/api/v1" + . "github.com/feast-dev/feast/infra/feast-operator/internal/controller/metrics" +) + +const testNamespace = "test-ns" + +// gaugeValue reads the float64 value for the given label values. +// Returns -1 if the metric is not found. +func gaugeValue(gv *prometheus.GaugeVec, labels ...string) float64 { + g, err := gv.GetMetricWithLabelValues(labels...) + if err != nil { + return -1 + } + m := &dto.Metric{} + if err := g.Write(m); err != nil { + return -1 + } + return m.GetGauge().GetValue() +} + +func featureStore(name string, svcs *feastdevv1.FeatureStoreServices) *feastdevv1.FeatureStore { + fs := &feastdevv1.FeatureStore{ + ObjectMeta: metav1.ObjectMeta{Namespace: testNamespace, Name: name}, + } + fs.Status.Applied.Services = svcs + return fs +} + +func TestRecordFeatureStore_NoServices(t *testing.T) { + m := NewFeatureStoreMetrics() + m.RecordFeatureStore(featureStore("fs", nil)) + + if v := gaugeValue(m.FeatureStoreInfo, testNamespace, "fs", "none", "none", "none"); v != 1 { + t.Errorf("expected 1 for all-absent store, got %v", v) + } +} + +func TestRecordFeatureStore_OnlineStore_File(t *testing.T) { + m := NewFeatureStoreMetrics() + svcs := &feastdevv1.FeatureStoreServices{ + OnlineStore: &feastdevv1.OnlineStore{}, + } + m.RecordFeatureStore(featureStore("fs", svcs)) + + if v := gaugeValue(m.FeatureStoreInfo, testNamespace, "fs", "file", "none", "none"); v != 1 { + t.Errorf("expected 1 for file online store, got %v", v) + } +} + +func TestRecordFeatureStore_OnlineStore_Redis(t *testing.T) { + m := NewFeatureStoreMetrics() + svcs := &feastdevv1.FeatureStoreServices{ + OnlineStore: &feastdevv1.OnlineStore{ + Persistence: &feastdevv1.OnlineStorePersistence{ + DBPersistence: &feastdevv1.OnlineStoreDBStorePersistence{Type: "redis"}, + }, + }, + } + m.RecordFeatureStore(featureStore("fs", svcs)) + + if v := gaugeValue(m.FeatureStoreInfo, testNamespace, "fs", "redis", "none", "none"); v != 1 { + t.Errorf("expected 1 for redis online store, got %v", v) + } +} + +func TestRecordFeatureStore_OfflineStore_DB(t *testing.T) { + m := NewFeatureStoreMetrics() + svcs := &feastdevv1.FeatureStoreServices{ + OfflineStore: &feastdevv1.OfflineStore{ + Persistence: &feastdevv1.OfflineStorePersistence{ + DBPersistence: &feastdevv1.OfflineStoreDBStorePersistence{Type: "snowflake.offline"}, + }, + }, + } + m.RecordFeatureStore(featureStore("fs", svcs)) + + if v := gaugeValue(m.FeatureStoreInfo, testNamespace, "fs", "none", "snowflake.offline", "none"); v != 1 { + t.Errorf("expected 1 for snowflake offline store, got %v", v) + } +} + +func TestRecordFeatureStore_OfflineStore_FilePersistenceType(t *testing.T) { + m := NewFeatureStoreMetrics() + svcs := &feastdevv1.FeatureStoreServices{ + OfflineStore: &feastdevv1.OfflineStore{ + Persistence: &feastdevv1.OfflineStorePersistence{ + FilePersistence: &feastdevv1.OfflineStoreFilePersistence{Type: "duckdb"}, + }, + }, + } + m.RecordFeatureStore(featureStore("fs", svcs)) + + if v := gaugeValue(m.FeatureStoreInfo, testNamespace, "fs", "none", "duckdb", "none"); v != 1 { + t.Errorf("expected 1 for duckdb offline store, got %v", v) + } +} + +func TestRecordFeatureStore_Registry_Local(t *testing.T) { + m := NewFeatureStoreMetrics() + svcs := &feastdevv1.FeatureStoreServices{ + Registry: &feastdevv1.Registry{ + Local: &feastdevv1.LocalRegistryConfig{}, + }, + } + m.RecordFeatureStore(featureStore("fs", svcs)) + + if v := gaugeValue(m.FeatureStoreInfo, testNamespace, "fs", "none", "none", "local"); v != 1 { + t.Errorf("expected 1 for local registry, got %v", v) + } +} + +func TestRecordFeatureStore_Registry_RemoteHostname(t *testing.T) { + hostname := "registry.example.com:443" + m := NewFeatureStoreMetrics() + svcs := &feastdevv1.FeatureStoreServices{ + Registry: &feastdevv1.Registry{ + Remote: &feastdevv1.RemoteRegistryConfig{Hostname: &hostname}, + }, + } + m.RecordFeatureStore(featureStore("fs", svcs)) + + if v := gaugeValue(m.FeatureStoreInfo, testNamespace, "fs", "none", "none", "remote"); v != 1 { + t.Errorf("expected 1 for remote registry, got %v", v) + } +} + +func TestRecordFeatureStore_Registry_RemoteFeastRef(t *testing.T) { + m := NewFeatureStoreMetrics() + svcs := &feastdevv1.FeatureStoreServices{ + Registry: &feastdevv1.Registry{ + Remote: &feastdevv1.RemoteRegistryConfig{ + FeastRef: &feastdevv1.FeatureStoreRef{Name: "other-fs", Namespace: "other-ns"}, + }, + }, + } + m.RecordFeatureStore(featureStore("fs", svcs)) + + if v := gaugeValue(m.FeatureStoreInfo, testNamespace, "fs", "none", "none", "remote_feastref"); v != 1 { + t.Errorf("expected 1 for remote_feastref registry, got %v", v) + } +} + +func TestRecordFeatureStore_AllComponents(t *testing.T) { + m := NewFeatureStoreMetrics() + svcs := &feastdevv1.FeatureStoreServices{ + OnlineStore: &feastdevv1.OnlineStore{ + Persistence: &feastdevv1.OnlineStorePersistence{ + DBPersistence: &feastdevv1.OnlineStoreDBStorePersistence{Type: "redis"}, + }, + }, + OfflineStore: &feastdevv1.OfflineStore{ + Persistence: &feastdevv1.OfflineStorePersistence{ + DBPersistence: &feastdevv1.OfflineStoreDBStorePersistence{Type: "snowflake.offline"}, + }, + }, + Registry: &feastdevv1.Registry{ + Local: &feastdevv1.LocalRegistryConfig{}, + }, + } + m.RecordFeatureStore(featureStore("fs", svcs)) + + if v := gaugeValue(m.FeatureStoreInfo, testNamespace, "fs", "redis", "snowflake.offline", "local"); v != 1 { + t.Errorf("expected 1 for full store config, got %v", v) + } +} + +func TestRecordFeatureStore_TypeChange(t *testing.T) { + m := NewFeatureStoreMetrics() + svcs1 := &feastdevv1.FeatureStoreServices{ + OnlineStore: &feastdevv1.OnlineStore{ + Persistence: &feastdevv1.OnlineStorePersistence{ + DBPersistence: &feastdevv1.OnlineStoreDBStorePersistence{Type: "redis"}, + }, + }, + } + svcs2 := &feastdevv1.FeatureStoreServices{ + OnlineStore: &feastdevv1.OnlineStore{ + Persistence: &feastdevv1.OnlineStorePersistence{ + DBPersistence: &feastdevv1.OnlineStoreDBStorePersistence{Type: "postgres"}, + }, + }, + } + + m.RecordFeatureStore(featureStore("fs", svcs1)) + m.RecordFeatureStore(featureStore("fs", svcs2)) + + if v := gaugeValue(m.FeatureStoreInfo, testNamespace, "fs", "redis", "none", "none"); v != 0 { + t.Errorf("old label set (redis) should be removed after type change, got %v", v) + } + if v := gaugeValue(m.FeatureStoreInfo, testNamespace, "fs", "postgres", "none", "none"); v != 1 { + t.Errorf("new label set (postgres) should be 1 after type change, got %v", v) + } +} + +func TestDeleteFeatureStore_RemovesMetric(t *testing.T) { + m := NewFeatureStoreMetrics() + svcs := &feastdevv1.FeatureStoreServices{ + OnlineStore: &feastdevv1.OnlineStore{ + Persistence: &feastdevv1.OnlineStorePersistence{ + DBPersistence: &feastdevv1.OnlineStoreDBStorePersistence{Type: "redis"}, + }, + }, + } + m.RecordFeatureStore(featureStore("fs", svcs)) + + if v := gaugeValue(m.FeatureStoreInfo, testNamespace, "fs", "redis", "none", "none"); v != 1 { + t.Fatalf("setup: expected 1 before delete, got %v", v) + } + + m.DeleteFeatureStore(testNamespace, "fs") + + if v := gaugeValue(m.FeatureStoreInfo, testNamespace, "fs", "redis", "none", "none"); v != 0 { + t.Errorf("expected 0 after DeleteFeatureStore, got %v", v) + } +} + +func TestMultipleFeatureStores_IndependentLabelSets(t *testing.T) { + m := NewFeatureStoreMetrics() + + svcs1 := &feastdevv1.FeatureStoreServices{ + OnlineStore: &feastdevv1.OnlineStore{ + Persistence: &feastdevv1.OnlineStorePersistence{ + DBPersistence: &feastdevv1.OnlineStoreDBStorePersistence{Type: "redis"}, + }, + }, + } + svcs2 := &feastdevv1.FeatureStoreServices{ + OnlineStore: &feastdevv1.OnlineStore{ + Persistence: &feastdevv1.OnlineStorePersistence{ + DBPersistence: &feastdevv1.OnlineStoreDBStorePersistence{Type: "postgres"}, + }, + }, + } + + m.RecordFeatureStore(featureStore("fs-1", svcs1)) + m.RecordFeatureStore(featureStore("fs-2", svcs2)) + + if v := gaugeValue(m.FeatureStoreInfo, testNamespace, "fs-1", "redis", "none", "none"); v != 1 { + t.Errorf("fs-1: expected redis=1, got %v", v) + } + if v := gaugeValue(m.FeatureStoreInfo, testNamespace, "fs-2", "postgres", "none", "none"); v != 1 { + t.Errorf("fs-2: expected postgres=1, got %v", v) + } + + m.DeleteFeatureStore(testNamespace, "fs-1") + + if v := gaugeValue(m.FeatureStoreInfo, testNamespace, "fs-2", "postgres", "none", "none"); v != 1 { + t.Errorf("fs-2 should be unaffected after fs-1 deletion, got %v", v) + } +}