Skip to content

Commit 68d74e4

Browse files
F OpenNebula/engineering#716: Add opennebula-exporter funct (#4129)
FireEdge status, AR Capacity metrics, HA metrics Co-authored-by: Tino Vázquez <cvazquez@opennebula.io>
1 parent 1d89659 commit 68d74e4

File tree

4 files changed

+382
-3
lines changed

4 files changed

+382
-3
lines changed
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
# -------------------------------------------------------------------------- #
2+
# Copyright 2002-2026, OpenNebula Project, OpenNebula Systems #
3+
# #
4+
# Licensed under the Apache License, Version 2.0 (the "License"); you may #
5+
# not use this file except in compliance with the License. You may obtain #
6+
# a copy of the License at #
7+
# #
8+
# http://www.apache.org/licenses/LICENSE-2.0 #
9+
# #
10+
# Unless required by applicable law or agreed to in writing, software #
11+
# distributed under the License is distributed on an "AS IS" BASIS, #
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
13+
# See the License for the specific language governing permissions and #
14+
# limitations under the License. #
15+
#--------------------------------------------------------------------------- #
16+
17+
require 'opennebula'
18+
require 'rexml/document'
19+
20+
# OpenNebula Address Range collector.
21+
#
22+
# Metrics exposed:
23+
# - opennebula_vnet_free_lease_ratio{vnet_id="...",vnet_name="...",ar_id="..."} 0.00-1.00
24+
class OpenNebulaARCollector
25+
26+
LABELS = [:vnet_id, :vnet_name, :ar_id].freeze
27+
28+
METRIC_NAME = 'vnet_free_lease_ratio'
29+
30+
def initialize(registry, client, namespace)
31+
@client = client
32+
33+
@metric = registry.gauge(
34+
"#{namespace}_#{METRIC_NAME}".to_sym,
35+
:docstring => 'Fraction of free leases in each VNet Address Range',
36+
:labels => LABELS
37+
)
38+
end
39+
40+
def collect
41+
pool = fetch_vnet_pool
42+
return if pool.nil?
43+
44+
pool.each do |vnet|
45+
process_vnet(vnet)
46+
rescue StandardError => e
47+
warn("[OpenNebulaARCollector] VNet error: #{e.class}: #{e.message}")
48+
end
49+
rescue StandardError => e
50+
warn("[OpenNebulaARCollector] #{e.class}: #{e.message}")
51+
nil
52+
end
53+
54+
private
55+
56+
def fetch_vnet_pool
57+
pool = OpenNebula::VirtualNetworkPool.new(@client)
58+
rc = pool.info_all
59+
60+
if OpenNebula.is_error?(rc)
61+
warn("[OpenNebulaARCollector] pool.info_all failed: #{rc.message}")
62+
return
63+
end
64+
65+
pool
66+
end
67+
68+
def process_vnet(vnet)
69+
doc = REXML::Document.new(vnet.to_xml)
70+
71+
vnet_id = text(doc, 'VNET/ID')
72+
vnet_name = text(doc, 'VNET/NAME')
73+
74+
doc.elements.each('VNET/AR_POOL/AR') do |ar_el|
75+
process_ar(ar_el, vnet_id, vnet_name)
76+
end
77+
end
78+
79+
def process_ar(ar_el, vnet_id, vnet_name)
80+
size = ar_size(ar_el)
81+
return if size <= 0
82+
83+
free_ratio = compute_ratio(size, used_leases(ar_el))
84+
85+
@metric.set(
86+
free_ratio,
87+
:labels => metric_labels(vnet_id, vnet_name, ar_el)
88+
)
89+
end
90+
91+
def ar_size(ar_el)
92+
ar_el.elements['SIZE']&.text.to_i
93+
end
94+
95+
def metric_labels(vnet_id, vnet_name, ar_el)
96+
{
97+
:vnet_id => vnet_id.to_s,
98+
:vnet_name => vnet_name.to_s,
99+
:ar_id => ar_el.elements['AR_ID']&.text.to_s
100+
}
101+
end
102+
103+
def compute_ratio(size, used)
104+
ratio = (size - used).to_f / size
105+
106+
ratio = 0.0 if ratio < 0.0
107+
ratio = 1.0 if ratio > 1.0
108+
109+
ratio.round(2)
110+
end
111+
112+
def text(doc, xpath)
113+
el = doc.elements[xpath]
114+
el ? el.text.to_s : ''
115+
end
116+
117+
# Prefer AR/USED_LEASES if present.
118+
# Fallback to AR/ALLOCATED parsing (pairs: INDEX ENCODED_VMID).
119+
def used_leases(ar_el)
120+
used_el = ar_el.elements['USED_LEASES']
121+
return used_el.text.to_i if used_el && !used_el.text.to_s.empty?
122+
123+
allocated = ar_el.elements['ALLOCATED']&.text.to_s
124+
tokens = allocated.split
125+
tokens.size / 2
126+
end
127+
128+
end

src/oneprometheus/opennebula-exporter/src/opennebula_collector.rb

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# -------------------------------------------------------------------------- #
2-
# Copyright 2002-2025, OpenNebula Project, OpenNebula Systems #
2+
# Copyright 2002-2026, OpenNebula Project, OpenNebula Systems #
33
# #
44
# Licensed under the Apache License, Version 2.0 (the "License"); you may #
55
# not use this file except in compliance with the License. You may obtain #
@@ -18,9 +18,11 @@
1818
require 'prometheus/client'
1919

2020
require_relative 'opennebula_server_collector'
21+
require_relative 'opennebula_ha_collector'
2122
require_relative 'opennebula_host_collector'
2223
require_relative 'opennebula_datastore_collector'
2324
require_relative 'opennebula_vm_collector'
25+
require_relative 'opennebula_ar_collector'
2426

2527

2628
module Prometheus
@@ -83,6 +85,10 @@ def initialize(app, options = {})
8385
@registry, @client, NAMESPACE)
8486
@collectors << OpenNebulaVMCollector.new(
8587
@registry, @client, NAMESPACE)
88+
@collectors << OpenNebulaARCollector.new(
89+
@registry, @client, NAMESPACE)
90+
@collectors << OpenNebulaHACollector.new(
91+
@registry, @client, NAMESPACE)
8692
end
8793

8894
def call(env)
Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,239 @@
1+
# -------------------------------------------------------------------------- #
2+
# Copyright 2002-2026, OpenNebula Project, OpenNebula Systems #
3+
# #
4+
# Licensed under the Apache License, Version 2.0 (the "License"); you may #
5+
# not use this file except in compliance with the License. You may obtain #
6+
# a copy of the License at #
7+
# #
8+
# http://www.apache.org/licenses/LICENSE-2.0 #
9+
# #
10+
# Unless required by applicable law or agreed to in writing, software #
11+
# distributed under the License is distributed on an "AS IS" BASIS, #
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
13+
# See the License for the specific language governing permissions and #
14+
# limitations under the License. #
15+
#--------------------------------------------------------------------------- #
16+
17+
require 'socket'
18+
require 'timeout'
19+
require 'xmlrpc/client'
20+
require 'rexml/document'
21+
require 'opennebula'
22+
23+
# Metrics exposed:
24+
# - opennebula_ha_is_leader{one_server_fqdn="..."} 0|1
25+
# - opennebula_ha_error_members{one_server_fqdn="..."} N
26+
# - opennebula_ha_members{one_server_fqdn="..."} N
27+
class OpenNebulaHACollector
28+
29+
LABELS = [:one_server_fqdn].freeze
30+
31+
STATE_FOLLOWER = 2
32+
STATE_LEADER = 3
33+
34+
METRICS = {
35+
'ha_is_leader' => {
36+
:type => :gauge,
37+
:docstr => 'This server is RAFT leader (1) or not (0)',
38+
:labels => LABELS
39+
},
40+
'ha_error_members' => {
41+
:type => :gauge,
42+
:docstr => 'Number of HA members with failed or unexpected RAFT state',
43+
:labels => LABELS
44+
},
45+
'ha_members' => {
46+
:type => :gauge,
47+
:docstr => 'Total number of HA members reported by zone pool',
48+
:labels => LABELS
49+
}
50+
}.freeze
51+
52+
def initialize(registry, _client, namespace)
53+
@metrics = build_metrics(registry, namespace)
54+
@auth = load_auth
55+
@xmlrpc_endpoint = load_xmlrpc_endpoint
56+
@timeout = load_timeout
57+
@fqdn = resolve_fqdn
58+
@one_client = build_one_client
59+
end
60+
61+
def collect
62+
labels = metric_labels
63+
statuses = cluster_statuses
64+
return if statuses.empty?
65+
66+
write_metrics(statuses, labels)
67+
rescue StandardError => e
68+
warn("[OpenNebulaHACollector] #{e.class}: #{e.message}")
69+
nil
70+
end
71+
72+
private
73+
74+
def build_metrics(registry, namespace)
75+
metrics = {}
76+
77+
METRICS.each do |name, conf|
78+
metrics[name] = registry.public_send(
79+
conf[:type],
80+
"#{namespace}_#{name}".to_sym,
81+
:docstring => conf[:docstr],
82+
:labels => conf[:labels]
83+
)
84+
end
85+
86+
metrics
87+
end
88+
89+
def load_auth
90+
path = ENV.fetch('ONE_AUTH', File.expand_path('~/.one/one_auth'))
91+
File.read(path).strip
92+
end
93+
94+
def load_xmlrpc_endpoint
95+
ENV.fetch('ONE_XMLRPC', 'http://localhost:2633/RPC2')
96+
end
97+
98+
def load_timeout
99+
(ENV['ONEZONE_TIMEOUT'] || '5').to_i
100+
end
101+
102+
def build_one_client
103+
OpenNebula::Client.new(@auth, @xmlrpc_endpoint)
104+
end
105+
106+
def metric_labels
107+
{ :one_server_fqdn => @fqdn }
108+
end
109+
110+
def cluster_statuses
111+
servers = fetch_zone_servers
112+
return [] if servers.empty?
113+
114+
servers.map {|srv| fetch_server_status(srv) }
115+
end
116+
117+
def write_metrics(statuses, labels)
118+
@metrics['ha_members'].set(member_count(statuses), :labels => labels)
119+
@metrics['ha_error_members'].set(error_member_count(statuses), :labels => labels)
120+
@metrics['ha_is_leader'].set(local_leader_value, :labels => labels)
121+
end
122+
123+
def member_count(statuses)
124+
statuses.size
125+
end
126+
127+
def error_member_count(statuses)
128+
statuses.count {|status| problem_state?(status) }
129+
end
130+
131+
def local_leader_value
132+
local_status = fetch_local_status
133+
local_status[:state_num] == STATE_LEADER ? 1 : 0
134+
end
135+
136+
def fetch_zone_servers
137+
pool = OpenNebula::ZonePool.new(@one_client)
138+
rc = pool.info
139+
140+
raise rc.message if OpenNebula.is_error?(rc)
141+
142+
extract_servers(pool)
143+
end
144+
145+
def extract_servers(pool)
146+
servers = []
147+
148+
pool.each do |zone|
149+
zone.each('SERVER_POOL/SERVER') do |srv|
150+
servers << zone_server_hash(srv)
151+
end
152+
end
153+
154+
servers
155+
end
156+
157+
def zone_server_hash(server)
158+
{
159+
:id => server['ID'].to_i,
160+
:name => server['NAME'].to_s,
161+
:endpoint => server['ENDPOINT'].to_s
162+
}
163+
end
164+
165+
def fetch_local_status
166+
fetch_raft(@xmlrpc_endpoint)
167+
rescue StandardError
168+
error_status
169+
end
170+
171+
def fetch_server_status(server)
172+
status = fetch_raft(server[:endpoint])
173+
merge_server_status(status, server)
174+
rescue StandardError
175+
merge_server_status(error_status, server)
176+
end
177+
178+
def merge_server_status(status, server)
179+
status.merge(
180+
:id => server[:id],
181+
:name => server[:name]
182+
)
183+
end
184+
185+
def error_status
186+
{
187+
:state_num => nil,
188+
:rpc_error => true
189+
}
190+
end
191+
192+
def fetch_raft(endpoint)
193+
xml = rpc_call(endpoint, 'one.zone.raftstatus')
194+
parse_raft(xml)
195+
end
196+
197+
def rpc_call(endpoint, method)
198+
Timeout.timeout(@timeout) do
199+
client = XMLRPC::Client.new2(endpoint)
200+
ok, payload, error_code = client.call(method, @auth)
201+
202+
raise "#{method} failed: #{error_code}" unless ok
203+
204+
payload
205+
end
206+
end
207+
208+
def parse_raft(xml)
209+
doc = REXML::Document.new(xml)
210+
raft = REXML::XPath.first(doc, '//RAFT')
211+
212+
raise 'RAFT not found' unless raft
213+
214+
{
215+
:state_num => text(raft, 'STATE')&.to_i,
216+
:rpc_error => false
217+
}
218+
end
219+
220+
def text(node, name)
221+
el = REXML::XPath.first(node, name)
222+
el&.text
223+
end
224+
225+
def problem_state?(status)
226+
return true if status[:rpc_error]
227+
228+
state = status[:state_num]
229+
state != STATE_LEADER && state != STATE_FOLLOWER
230+
end
231+
232+
def resolve_fqdn
233+
host = Socket.gethostname
234+
Addrinfo.getaddrinfo(host, nil).first.getnameinfo.first
235+
rescue StandardError
236+
Socket.gethostname
237+
end
238+
239+
end

0 commit comments

Comments
 (0)