Skip to content

Commit 050cfa7

Browse files
committed
feat(openmetrics): add 9 missing host RRD metrics
1 parent 8ecaa6d commit 050cfa7

File tree

5 files changed

+261
-23
lines changed

5 files changed

+261
-23
lines changed

CHANGELOG.unreleased.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
1010
### Enhancements
1111

12+
- [OpenMetrics] Add 9 missing host RRD metrics: `hostload`, `memory_reclaimed`, `memory_reclaimed_max`, `running_vcpus`, `pif_aggr_rx`, `pif_aggr_tx`, `iops_total`, `io_throughput_total`, `latency` per SR (PR [#XXXX](https://github.com/vatesfr/xen-orchestra/pull/XXXX))
13+
1214
> Users must be able to say: "Nice enhancement, I'm eager to test it"
1315
1416
### Bug fixes
@@ -36,5 +38,7 @@
3638
- @xen-orchestra/immutable-backups patch
3739
- @xen-orchestra/web patch
3840
- @xen-orchestra/web-core patch
41+
- xo-server minor
42+
- xo-server-openmetrics minor
3943

4044
<!--packages-end-->

docs/docs/advanced.md

Lines changed: 29 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -437,25 +437,34 @@ Infrastructure metrics are prefixed with `xcp_` and XO management plane metrics
437437

438438
#### Host Metrics
439439

440-
| Metric | Type | Description |
441-
| --------------------------------------- | ------- | ---------------------------------------------------------------------- |
442-
| `xcp_host_load_average` | gauge | Host load average |
443-
| `xcp_host_memory_free_bytes` | gauge | Free memory in bytes |
444-
| `xcp_host_memory_total_bytes` | gauge | Total memory in bytes |
445-
| `xcp_host_cpu_average` | gauge | Average CPU usage (0-1) |
446-
| `xcp_host_cpu_core_usage` | gauge | Per-core CPU usage |
447-
| `xcp_host_network_receive_bytes_total` | counter | Network bytes received per interface |
448-
| `xcp_host_network_transmit_bytes_total` | counter | Network bytes transmitted per interface |
449-
| `xcp_host_disk_iops_read` | gauge | Disk read IOPS per SR |
450-
| `xcp_host_disk_iops_write` | gauge | Disk write IOPS per SR |
451-
| `xcp_host_disk_throughput_read_bytes` | gauge | Disk read throughput (bytes/s) |
452-
| `xcp_host_disk_throughput_write_bytes` | gauge | Disk write throughput (bytes/s) |
453-
| `xcp_host_disk_read_latency_seconds` | gauge | Disk read latency |
454-
| `xcp_host_disk_write_latency_seconds` | gauge | Disk write latency |
455-
| `xcp_host_disk_iowait` | gauge | Disk IO wait ratio |
456-
| `xcp_host_power_consumption_watts` | gauge | Power consumption in watts (DCMI) |
457-
| `xcp_host_uptime_seconds` | gauge | Host uptime in seconds since boot |
458-
| `xcp_host_status` | gauge | Host status (1 = current state, `power_state` and `enabled` in labels) |
440+
| Metric | Type | Description |
441+
| -------------------------------------------- | ------- | ---------------------------------------------------------------------- |
442+
| `xcp_host_cpu_average` | gauge | Average CPU usage (0-1) |
443+
| `xcp_host_cpu_core_usage` | gauge | Per-core CPU usage |
444+
| `xcp_host_disk_iops_read` | gauge | Disk read IOPS per SR |
445+
| `xcp_host_disk_iops_write` | gauge | Disk write IOPS per SR |
446+
| `xcp_host_disk_iops_total` | gauge | Total IOPS (read + write) per SR |
447+
| `xcp_host_disk_iowait` | gauge | Disk IO wait ratio |
448+
| `xcp_host_disk_latency_seconds` | gauge | Combined I/O latency per SR in seconds |
449+
| `xcp_host_disk_read_latency_seconds` | gauge | Disk read latency |
450+
| `xcp_host_disk_write_latency_seconds` | gauge | Disk write latency |
451+
| `xcp_host_disk_throughput_read_bytes` | gauge | Disk read throughput (bytes/s) |
452+
| `xcp_host_disk_throughput_write_bytes` | gauge | Disk write throughput (bytes/s) |
453+
| `xcp_host_disk_throughput_total_bytes` | gauge | Total I/O throughput per SR (bytes/s) |
454+
| `xcp_host_load` | gauge | Normalized host load |
455+
| `xcp_host_load_average` | gauge | Host load average |
456+
| `xcp_host_memory_free_bytes` | gauge | Free memory in bytes |
457+
| `xcp_host_memory_reclaimed_bytes` | gauge | Reclaimed host memory in bytes |
458+
| `xcp_host_memory_reclaimed_max_bytes` | gauge | Maximum reclaimable host memory in bytes |
459+
| `xcp_host_memory_total_bytes` | gauge | Total memory in bytes |
460+
| `xcp_host_network_aggregated_receive_bytes` | gauge | Aggregated received bytes per second |
461+
| `xcp_host_network_aggregated_transmit_bytes` | gauge | Aggregated transmitted bytes per second |
462+
| `xcp_host_network_receive_bytes_total` | counter | Network bytes received per interface |
463+
| `xcp_host_network_transmit_bytes_total` | counter | Network bytes transmitted per interface |
464+
| `xcp_host_power_consumption_watts` | gauge | Power consumption in watts (DCMI) |
465+
| `xcp_host_running_vcpus` | gauge | Total number of running vCPUs |
466+
| `xcp_host_status` | gauge | Host status (1 = current state, `power_state` and `enabled` in labels) |
467+
| `xcp_host_uptime_seconds` | gauge | Host uptime in seconds since boot |
459468

460469
#### VM Metrics
461470

@@ -578,7 +587,7 @@ rate(xcp_host_network_receive_bytes_total[5m]) / 1024 / 1024
578587
xcp_vm_disk_read_latency_seconds > 0.01
579588
580589
# Total IOPS per Storage Repository
581-
sum by (sr_name) (xcp_host_disk_iops_read + xcp_host_disk_iops_write)
590+
sum by (sr_name) (xcp_host_disk_iops_total)
582591
583592
# SR usage percentage
584593
(xcp_sr_physical_usage_bytes / xcp_sr_physical_size_bytes) * 100

packages/xo-server-openmetrics/src/openmetric-formatter.mts

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,20 @@ export const HOST_METRICS: MetricDefinition[] = [
115115
extractLabels: matches => ({ core: matches[1]! }),
116116
},
117117

118+
// Aggregated network metrics (PIF)
119+
{
120+
test: 'pif_aggr_rx',
121+
openMetricName: 'host_network_aggregated_receive_bytes',
122+
type: 'gauge',
123+
help: 'Aggregated received bytes per second',
124+
},
125+
{
126+
test: 'pif_aggr_tx',
127+
openMetricName: 'host_network_aggregated_transmit_bytes',
128+
type: 'gauge',
129+
help: 'Aggregated transmitted bytes per second',
130+
},
131+
118132
// Network metrics (PIF)
119133
{
120134
test: /^pif_(.+)_rx$/,
@@ -199,6 +213,67 @@ export const HOST_METRICS: MetricDefinition[] = [
199213
type: 'gauge',
200214
help: 'Host power consumption in watts (DCMI)',
201215
},
216+
217+
// Normalized host load
218+
{
219+
test: 'hostload',
220+
openMetricName: 'host_load',
221+
type: 'gauge',
222+
help: 'Normalized host load',
223+
},
224+
225+
// Reclaimed memory metrics
226+
{
227+
test: 'memory_reclaimed',
228+
openMetricName: 'host_memory_reclaimed_bytes',
229+
type: 'gauge',
230+
help: 'Reclaimed host memory in bytes',
231+
transformValue: v => v * 1024, // KiB to bytes
232+
},
233+
{
234+
test: 'memory_reclaimed_max',
235+
openMetricName: 'host_memory_reclaimed_max_bytes',
236+
type: 'gauge',
237+
help: 'Maximum reclaimable host memory in bytes',
238+
transformValue: v => v * 1024, // KiB to bytes
239+
},
240+
241+
// Running vCPUs
242+
{
243+
test: 'running_vcpus',
244+
openMetricName: 'host_running_vcpus',
245+
type: 'gauge',
246+
help: 'Total number of running vCPUs',
247+
},
248+
249+
// Total disk IOPS per SR
250+
{
251+
test: /^iops_total_(.+)$/,
252+
openMetricName: 'host_disk_iops_total',
253+
type: 'gauge',
254+
help: 'Total IOPS (read + write) per SR',
255+
extractLabels: matches => ({ sr: matches[1]! }),
256+
},
257+
258+
// Total disk throughput per SR
259+
{
260+
test: /^io_throughput_total_(.+)$/,
261+
openMetricName: 'host_disk_throughput_total_bytes',
262+
type: 'gauge',
263+
help: 'Total I/O throughput per SR in bytes per second',
264+
transformValue: v => v * Math.pow(2, 20), // MiB to bytes
265+
extractLabels: matches => ({ sr: matches[1]! }),
266+
},
267+
268+
// Combined disk latency per SR
269+
{
270+
test: /^latency_(.+)$/,
271+
openMetricName: 'host_disk_latency_seconds',
272+
type: 'gauge',
273+
help: 'Combined I/O latency per SR in seconds',
274+
transformValue: v => v / 1e6, // µs to seconds
275+
extractLabels: matches => ({ sr: matches[1]! }),
276+
},
202277
]
203278

204279
/**

packages/xo-server-openmetrics/src/openmetric-formatter.test.mts

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,118 @@ describe('HOST_METRICS DCMI', () => {
188188
})
189189
})
190190

191+
describe('HOST_METRICS new metrics', () => {
192+
it('should include hostload metric', () => {
193+
const metric = HOST_METRICS.find(m => m.openMetricName === 'host_load')
194+
assert.ok(metric)
195+
assert.equal(metric.type, 'gauge')
196+
assert.equal(metric.test, 'hostload')
197+
assert.equal(metric.transformValue, undefined)
198+
})
199+
200+
it('should include memory_reclaimed metric with KiB to bytes transformation', () => {
201+
const metric = HOST_METRICS.find(m => m.openMetricName === 'host_memory_reclaimed_bytes')
202+
assert.ok(metric)
203+
assert.equal(metric.type, 'gauge')
204+
assert.equal(metric.test, 'memory_reclaimed')
205+
assert.ok(metric.transformValue)
206+
// 512 KiB * 1024 = 524288 bytes
207+
assert.equal(metric.transformValue!(512), 512 * 1024)
208+
})
209+
210+
it('should include memory_reclaimed_max metric with KiB to bytes transformation', () => {
211+
const metric = HOST_METRICS.find(m => m.openMetricName === 'host_memory_reclaimed_max_bytes')
212+
assert.ok(metric)
213+
assert.equal(metric.type, 'gauge')
214+
assert.equal(metric.test, 'memory_reclaimed_max')
215+
assert.ok(metric.transformValue)
216+
assert.equal(metric.transformValue!(1024), 1024 * 1024)
217+
})
218+
219+
it('should include running_vcpus metric', () => {
220+
const metric = HOST_METRICS.find(m => m.openMetricName === 'host_running_vcpus')
221+
assert.ok(metric)
222+
assert.equal(metric.type, 'gauge')
223+
assert.equal(metric.test, 'running_vcpus')
224+
assert.equal(metric.transformValue, undefined)
225+
})
226+
227+
it('should include pif_aggr_rx metric', () => {
228+
const metric = HOST_METRICS.find(m => m.openMetricName === 'host_network_aggregated_receive_bytes')
229+
assert.ok(metric)
230+
assert.equal(metric.type, 'gauge')
231+
assert.equal(metric.test, 'pif_aggr_rx')
232+
assert.equal(metric.transformValue, undefined)
233+
})
234+
235+
it('should include pif_aggr_tx metric', () => {
236+
const metric = HOST_METRICS.find(m => m.openMetricName === 'host_network_aggregated_transmit_bytes')
237+
assert.ok(metric)
238+
assert.equal(metric.type, 'gauge')
239+
assert.equal(metric.test, 'pif_aggr_tx')
240+
assert.equal(metric.transformValue, undefined)
241+
})
242+
243+
it('should match pif_aggr_rx before the generic PIF regex', () => {
244+
const result = findMetricDefinition('pif_aggr_rx', 'host')
245+
assert.ok(result)
246+
assert.equal(result.definition.openMetricName, 'host_network_aggregated_receive_bytes')
247+
})
248+
249+
it('should include iops_total per SR with label extraction', () => {
250+
const metric = HOST_METRICS.find(m => m.openMetricName === 'host_disk_iops_total')
251+
assert.ok(metric)
252+
assert.equal(metric.type, 'gauge')
253+
assert.ok(metric.extractLabels)
254+
255+
const regex = metric.test as RegExp
256+
const match = regex.exec('iops_total_abc12345')
257+
assert.ok(match)
258+
assert.deepEqual(metric.extractLabels!(match), { sr: 'abc12345' })
259+
})
260+
261+
it('should include io_throughput_total per SR with MiB to bytes transformation', () => {
262+
const metric = HOST_METRICS.find(m => m.openMetricName === 'host_disk_throughput_total_bytes')
263+
assert.ok(metric)
264+
assert.equal(metric.type, 'gauge')
265+
assert.ok(metric.transformValue)
266+
assert.ok(metric.extractLabels)
267+
268+
// 2 MiB/s = 2 * 2^20 bytes/s
269+
assert.equal(metric.transformValue!(2), 2 * 2 ** 20)
270+
271+
const regex = metric.test as RegExp
272+
const match = regex.exec('io_throughput_total_def-456-789')
273+
assert.ok(match)
274+
assert.deepEqual(metric.extractLabels!(match), { sr: 'def-456-789' })
275+
})
276+
277+
it('should include latency per SR with µs to seconds transformation', () => {
278+
const metric = HOST_METRICS.find(m => m.openMetricName === 'host_disk_latency_seconds')
279+
assert.ok(metric)
280+
assert.equal(metric.type, 'gauge')
281+
assert.ok(metric.transformValue)
282+
assert.ok(metric.extractLabels)
283+
284+
// 500 µs / 1e6 = 0.0005 seconds
285+
assert.equal(metric.transformValue!(500), 0.0005)
286+
287+
const regex = metric.test as RegExp
288+
const match = regex.exec('latency_abc-def-123')
289+
assert.ok(match)
290+
assert.deepEqual(metric.extractLabels!(match), { sr: 'abc-def-123' })
291+
})
292+
293+
it('should not match read_latency or write_latency with latency_<sr> regex', () => {
294+
const metric = HOST_METRICS.find(m => m.openMetricName === 'host_disk_latency_seconds')
295+
assert.ok(metric)
296+
const regex = metric.test as RegExp
297+
// ^latency_ anchor prevents matching read_latency_ and write_latency_
298+
assert.equal(regex.exec('read_latency_abc12345'), null)
299+
assert.equal(regex.exec('write_latency_abc12345'), null)
300+
})
301+
})
302+
191303
// ============================================================================
192304
// findMetricDefinition Tests
193305
// ============================================================================

packages/xo-server/src/xapi-stats.mjs

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,10 @@ const STATS = {
156156
test: /^iops_write_(\w+)$/,
157157
getPath: matches => ['iops', 'w', matches[1]],
158158
},
159+
total: {
160+
test: /^iops_total_(\w+)$/,
161+
getPath: matches => ['iops', 'total', matches[1]],
162+
},
159163
},
160164
ioThroughput: {
161165
r: {
@@ -168,6 +172,11 @@ const STATS = {
168172
getPath: matches => ['ioThroughput', 'w', matches[1]],
169173
transformValue: value => value * 2 ** 20,
170174
},
175+
total: {
176+
test: /^io_throughput_total_(\w+)$/,
177+
getPath: matches => ['ioThroughput', 'total', matches[1]],
178+
transformValue: value => value * 2 ** 20, // MiB/s to bytes/s
179+
},
171180
},
172181
latency: {
173182
r: {
@@ -180,11 +189,40 @@ const STATS = {
180189
getPath: matches => ['latency', 'w', matches[1]],
181190
transformValue: value => value / 1e3,
182191
},
192+
combined: {
193+
test: /^latency_(\w+)$/,
194+
getPath: matches => ['latency', 'combined', matches[1]],
195+
transformValue: value => value / 1e3,
196+
},
183197
},
184198
iowait: {
185199
test: /^iowait_(\w+)$/,
186200
getPath: matches => ['iowait', matches[1]],
187201
},
202+
hostload: {
203+
test: 'hostload',
204+
},
205+
memoryReclaimed: {
206+
test: 'memory_reclaimed',
207+
transformValue: value => value * 1024, // KiB to bytes
208+
},
209+
memoryReclaimedMax: {
210+
test: 'memory_reclaimed_max',
211+
transformValue: value => value * 1024, // KiB to bytes
212+
},
213+
runningVcpus: {
214+
test: 'running_vcpus',
215+
},
216+
pifsAggr: {
217+
rx: {
218+
test: 'pif_aggr_rx',
219+
getPath: () => ['pifsAggr', 'rx'],
220+
},
221+
tx: {
222+
test: 'pif_aggr_tx',
223+
getPath: () => ['pifsAggr', 'tx'],
224+
},
225+
},
188226
},
189227
vm: {
190228
memoryFree: {
@@ -279,12 +317,12 @@ const STATS = {
279317
r: {
280318
test: /^vbd_xvd(.)_read_latency$/,
281319
getPath: matches => ['vbdLatency', 'r', matches[1]],
282-
transformValue: value => value / 1000,
320+
transformValue: value => value / 1e3, // µs to ms
283321
},
284322
w: {
285323
test: /^vbd_xvd(.)_write_latency$/,
286324
getPath: matches => ['vbdLatency', 'w', matches[1]],
287-
transformValue: value => value / 1000,
325+
transformValue: value => value / 1e3, // µs to ms
288326
},
289327
},
290328
ioThroughput: {
@@ -307,7 +345,7 @@ const STATS = {
307345
vbdAvgLatency: {
308346
test: /^vbd_xvd(.)_latency$/,
309347
getPath: matches => ['vbdAvgLatency', matches[1]],
310-
transformValue: value => value / 1e3,
348+
transformValue: value => value / 1e3, // µs to ms (for xo-web formatTime)
311349
},
312350
vbdIowait: {
313351
test: /^vbd_xvd(.)_iowait$/,

0 commit comments

Comments
 (0)