diff --git a/content/changelog/2025-12-17-v2-metrics-and-observations-api.mdx b/content/changelog/2025-12-17-v2-metrics-and-observations-api.mdx index 6758bd2f5a..fd86cdee15 100644 --- a/content/changelog/2025-12-17-v2-metrics-and-observations-api.mdx +++ b/content/changelog/2025-12-17-v2-metrics-and-observations-api.mdx @@ -45,7 +45,7 @@ Built on an optimized data model, the v2 Metrics API delivers significantly fast **The `traces` view is no longer available in v2.** Instead, use the `observations` view which is both faster and more powerful. -**Row limit** - Default limit of 100 rows per query ensures consistent performance. Override with a custom `rowLimit` in your query. +**Row limit** - Default limit of 100 rows per query ensures consistent performance. Override with a custom `config.row_limit` in your query. **High cardinality dimensions** - Certain dimensions like `id`, `traceId`, `userId`, and `sessionId` can no longer be used for grouping in the v2 Metrics API. Grouping by these high cardinality fields is not performant and rarely useful in practice. These dimensions remain available for filtering. diff --git a/content/docs/metrics/features/metrics-api.mdx b/content/docs/metrics/features/metrics-api.mdx index 6383273614..a22fb5c8e6 100644 --- a/content/docs/metrics/features/metrics-api.mdx +++ b/content/docs/metrics/features/metrics-api.mdx @@ -49,12 +49,16 @@ The v2 Metrics API provides significant performance improvements through an opti ### Row Limit -The v2 Metrics API enforces a default `rowLimit` of 100 rows per query to ensure consistent performance. You can specify a custom `rowLimit` in your query to override this default. +The v2 Metrics API enforces a default `config.row_limit` of 100 rows per query to ensure consistent performance. You can specify a custom `config.row_limit` in your query to override this default. ### High Cardinality Dimensions Certain dimensions like `id`, `traceId`, `userId`, and `sessionId` cannot be used for grouping in the v2 Metrics API. Grouping by these high cardinality fields is extremely expensive and rarely useful in practice. These dimensions remain available for filtering. +### Ordering by metrics + +When ordering by an aggregated metric, use the returned metric field name in the format `{aggregation}_{measure}`, for example `sum_totalCost` for `{ "measure": "totalCost", "aggregation": "sum" }`. When ordering by the time dimension, use the returned field name `time_dimension`. + ### Example: Most expensive models used in observations ```bash @@ -68,8 +72,8 @@ curl \ "filters": [], "fromTimestamp": "2025-12-01T00:00:00Z", "toTimestamp": "2025-12-16T00:00:00Z", - "orderBy": [{"field": "totalCost_sum", "direction": "desc"}], - "rowLimit": 1000 + "orderBy": [{"field": "sum_totalCost", "direction": "desc"}], + "config": {"row_limit": 1000} }' \ https://cloud.langfuse.com/api/public/v2/metrics ``` @@ -89,7 +93,7 @@ When moving existing queries to v2: - Replace `GET /api/public/metrics` with `GET /api/public/v2/metrics`. - Replace `view: "traces"` with `view: "observations"` and use trace-level dimensions such as `traceName`, `traceRelease`, or `traceVersion` where supported. `userId` and `sessionId` remain available as filters, but cannot be used for grouping in v2. - Review your `metrics` array when migrating trace-view queries. In the v2 `observations` view, measures such as `count`, `latency`, `totalCost`, and `totalTokens` are calculated over observation rows. Use [Observations API v2](/docs/api-and-data-platform/features/observations-api#v2) and group by `traceId` client-side when you need trace-level counts or trace durations. -- Set `rowLimit` explicitly when migrating queries that should return more than the default 100 rows. +- Set `config.row_limit` explicitly when migrating queries that should return more than the default 100 rows. - Use [Observations API v2](/docs/api-and-data-platform/features/observations-api#v2) instead if you need row-level spans, generations, or events. The rest of this section documents the legacy v1 endpoint for existing integrations. diff --git a/content/guides/cookbook/example_metrics_api_v2.mdx b/content/guides/cookbook/example_metrics_api_v2.mdx index e3e3060e2c..cdfd7d97b1 100644 --- a/content/guides/cookbook/example_metrics_api_v2.mdx +++ b/content/guides/cookbook/example_metrics_api_v2.mdx @@ -77,10 +77,10 @@ def run_metrics_query(query: dict) -> pd.DataFrame: payload = response.json() df = pd.DataFrame(payload.get("data", [])) - numeric_suffixes = ("_count", "_sum", "_avg", "_p50", "_p75", "_p90", "_p95", "_p99", "_min", "_max") + numeric_prefixes = ("count_", "sum_", "avg_", "p50_", "p75_", "p90_", "p95_", "p99_", "min_", "max_") for col in df.columns: - if col.endswith(numeric_suffixes): + if col.startswith(numeric_prefixes): df[col] = pd.to_numeric(df[col], errors="coerce") return df @@ -107,8 +107,8 @@ cost_by_model_query = { "filters": [], "fromTimestamp": seven_days_ago.isoformat(), "toTimestamp": now.isoformat(), - "orderBy": [{"field": "totalCost_sum", "direction": "desc"}], - "rowLimit": 10, + "orderBy": [{"field": "sum_totalCost", "direction": "desc"}], + "config": {"row_limit": 10}, } cost_by_model_df = run_metrics_query(cost_by_model_query) @@ -134,8 +134,8 @@ volume_and_latency_query = { "timeDimension": {"granularity": "day"}, "fromTimestamp": seven_days_ago.isoformat(), "toTimestamp": now.isoformat(), - "orderBy": [{"field": "timeDimension", "direction": "asc"}], - "rowLimit": 100, + "orderBy": [{"field": "time_dimension", "direction": "asc"}], + "config": {"row_limit": 100}, } volume_and_latency_df = run_metrics_query(volume_and_latency_query) @@ -147,10 +147,10 @@ volume_and_latency_df ```python if not volume_and_latency_df.empty: plot_df = volume_and_latency_df.copy() - plot_df["timeDimension"] = pd.to_datetime(plot_df["timeDimension"]) - plot_df = plot_df.set_index("timeDimension") + plot_df["time_dimension"] = pd.to_datetime(plot_df["time_dimension"]) + plot_df = plot_df.set_index("time_dimension") - ax = plot_df[["count_count", "latency_p95"]].plot( + ax = plot_df[["count_count", "p95_latency"]].plot( subplots=True, figsize=(10, 6), title=["Daily request volume", "Daily p95 latency (ms)"], @@ -179,8 +179,8 @@ score_summary_query = { "filters": [], "fromTimestamp": seven_days_ago.isoformat(), "toTimestamp": now.isoformat(), - "orderBy": [{"field": "value_avg", "direction": "desc"}], - "rowLimit": 20, + "orderBy": [{"field": "avg_value", "direction": "desc"}], + "config": {"row_limit": 20}, } score_summary_df = run_metrics_query(score_summary_query) @@ -193,4 +193,3 @@ score_summary_df You can adapt the same helper for other v2 views such as `scores-categorical`, add filters on fields like environment or trace name, or export the resulting `DataFrame` for downstream reporting. To explore the full query schema and supported fields, see the [Metrics API documentation](https://langfuse.com/docs/metrics/features/metrics-api#v2) and the [API reference](https://api.reference.langfuse.com/#tag/metricsv2/GET/api/public/v2/metrics). - diff --git a/content/integrations/other/hermes.mdx b/content/integrations/other/hermes.mdx index df623f392e..b25122f294 100644 --- a/content/integrations/other/hermes.mdx +++ b/content/integrations/other/hermes.mdx @@ -20,7 +20,6 @@ The steps below follow Hermes' [official Langfuse plugin docs](https://hermes-ag ## Step 1: Install Dependencies - ```python %pip install git+https://github.com/NousResearch/hermes-agent.git langfuse -U ``` @@ -42,7 +41,6 @@ The plugin also accepts the standard SDK env vars (`LANGFUSE_PUBLIC_KEY`, `LANGF The cell below sets the same credentials inside this Python kernel so we can quickly verify them with the Langfuse SDK. **Note:** these `os.environ` values are scoped to the notebook process and will not be visible to a `hermes chat` command run in a separate terminal — use `~/.hermes/.env` for that. - ```python import os @@ -59,8 +57,6 @@ os.environ["LANGFUSE_BASE_URL"] = "https://cloud.langfuse.com" # 🇪🇺 EU reg With the environment variables set, initialize the Langfuse client to confirm your credentials work. Hermes uses its own internal client, so this step is purely a sanity check that your keys are valid. - - ```python from langfuse import get_client @@ -85,7 +81,6 @@ The plugin hooks into Hermes lifecycle events (`pre_api_request` / `post_api_req Session grouping uses the Hermes session ID (or task ID for sub-agents), so every turn within a `hermes chat` session lives under one Langfuse session. The plugin is also **fail-open**: missing SDK, missing credentials, or a transient Langfuse error all turn into a silent no-op — the agent loop is never impacted. - ```python # Enable the Langfuse plugin (run this in your terminal, not in a notebook) # hermes plugins enable observability/langfuse @@ -113,7 +108,6 @@ With the plugin enabled and credentials set, every Hermes conversation turn is a You can start a conversation from the CLI: - ```python # Send a one-off message (traces are sent automatically): # hermes chat -q "hello" @@ -126,13 +120,13 @@ You can start a conversation from the CLI: The Hermes Langfuse plugin supports several optional environment variables: -| Variable | Description | Default | -|---|---|---| -| `HERMES_LANGFUSE_ENV` | Environment tag (e.g. `production`, `staging`) | — | -| `HERMES_LANGFUSE_RELEASE` | Release/version tag | — | -| `HERMES_LANGFUSE_SAMPLE_RATE` | Sampling rate `0.0`–`1.0` | `1.0` | -| `HERMES_LANGFUSE_MAX_CHARS` | Max characters per traced field | `12000` | -| `HERMES_LANGFUSE_DEBUG` | Verbose plugin logging (`true`/`false`) | `false` | +| Variable | Description | Default | +| ----------------------------- | ---------------------------------------------- | ------- | +| `HERMES_LANGFUSE_ENV` | Environment tag (e.g. `production`, `staging`) | — | +| `HERMES_LANGFUSE_RELEASE` | Release/version tag | — | +| `HERMES_LANGFUSE_SAMPLE_RATE` | Sampling rate `0.0`–`1.0` | `1.0` | +| `HERMES_LANGFUSE_MAX_CHARS` | Max characters per traced field | `12000` | +| `HERMES_LANGFUSE_DEBUG` | Verbose plugin logging (`true`/`false`) | `false` | Set these in `~/.hermes/.env` or export them in your shell before starting Hermes. diff --git a/cookbook/example_metrics_api_v2.ipynb b/cookbook/example_metrics_api_v2.ipynb index 1407479580..2698bf734c 100644 --- a/cookbook/example_metrics_api_v2.ipynb +++ b/cookbook/example_metrics_api_v2.ipynb @@ -115,10 +115,10 @@ " payload = response.json()\n", " df = pd.DataFrame(payload.get(\"data\", []))\n", "\n", - " numeric_suffixes = (\"_count\", \"_sum\", \"_avg\", \"_p50\", \"_p75\", \"_p90\", \"_p95\", \"_p99\", \"_min\", \"_max\")\n", + " numeric_prefixes = (\"count_\", \"sum_\", \"avg_\", \"p50_\", \"p75_\", \"p90_\", \"p95_\", \"p99_\", \"min_\", \"max_\")\n", "\n", " for col in df.columns:\n", - " if col.endswith(numeric_suffixes):\n", + " if col.startswith(numeric_prefixes):\n", " df[col] = pd.to_numeric(df[col], errors=\"coerce\")\n", "\n", " return df\n", @@ -156,8 +156,8 @@ " \"filters\": [],\n", " \"fromTimestamp\": seven_days_ago.isoformat(),\n", " \"toTimestamp\": now.isoformat(),\n", - " \"orderBy\": [{\"field\": \"totalCost_sum\", \"direction\": \"desc\"}],\n", - " \"rowLimit\": 10,\n", + " \"orderBy\": [{\"field\": \"sum_totalCost\", \"direction\": \"desc\"}],\n", + " \"config\": {\"row_limit\": 10},\n", "}\n", "\n", "cost_by_model_df = run_metrics_query(cost_by_model_query)\n", @@ -194,8 +194,8 @@ " \"timeDimension\": {\"granularity\": \"day\"},\n", " \"fromTimestamp\": seven_days_ago.isoformat(),\n", " \"toTimestamp\": now.isoformat(),\n", - " \"orderBy\": [{\"field\": \"timeDimension\", \"direction\": \"asc\"}],\n", - " \"rowLimit\": 100,\n", + " \"orderBy\": [{\"field\": \"time_dimension\", \"direction\": \"asc\"}],\n", + " \"config\": {\"row_limit\": 100},\n", "}\n", "\n", "volume_and_latency_df = run_metrics_query(volume_and_latency_query)\n", @@ -210,10 +210,10 @@ "source": [ "if not volume_and_latency_df.empty:\n", " plot_df = volume_and_latency_df.copy()\n", - " plot_df[\"timeDimension\"] = pd.to_datetime(plot_df[\"timeDimension\"])\n", - " plot_df = plot_df.set_index(\"timeDimension\")\n", + " plot_df[\"time_dimension\"] = pd.to_datetime(plot_df[\"time_dimension\"])\n", + " plot_df = plot_df.set_index(\"time_dimension\")\n", "\n", - " ax = plot_df[[\"count_count\", \"latency_p95\"]].plot(\n", + " ax = plot_df[[\"count_count\", \"p95_latency\"]].plot(\n", " subplots=True,\n", " figsize=(10, 6),\n", " title=[\"Daily request volume\", \"Daily p95 latency (ms)\"],\n", @@ -253,8 +253,8 @@ " \"filters\": [],\n", " \"fromTimestamp\": seven_days_ago.isoformat(),\n", " \"toTimestamp\": now.isoformat(),\n", - " \"orderBy\": [{\"field\": \"value_avg\", \"direction\": \"desc\"}],\n", - " \"rowLimit\": 20,\n", + " \"orderBy\": [{\"field\": \"avg_value\", \"direction\": \"desc\"}],\n", + " \"config\": {\"row_limit\": 20},\n", "}\n", "\n", "score_summary_df = run_metrics_query(score_summary_query)\n",