Skip to content

Commit 0dec96f

Browse files
F #7494: Improve message when scheduler fails
Include more specific reasons when a VM cannot be scheduled. Increased sheduling matching and dispatch errors to DDDEBUG category co-authored-by: MarioRobres <mrobres@opennebula.io>
1 parent 3b34fcc commit 0dec96f

File tree

6 files changed

+409
-84
lines changed

6 files changed

+409
-84
lines changed

src/schedm_mad/remotes/rank/include/HostXML.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "ObjectXML.h"
2424
#include "HostShare.h"
2525
#include "PoolObjectAuth.h"
26+
#include "SchedulerFailure.h"
2627

2728
/**
2829
* This class represents the needed information HostShare for a Host to
@@ -38,9 +39,11 @@ class HostShareXML
3839
/**
3940
* Tests whether a new VM can be hosted by the host or not
4041
* @param sr the share request including CPU, memory, PCI and NUMA nodes
42+
* @param error error reason
43+
* @param ft failure type
4144
* @return true if the share can host the VM
4245
*/
43-
bool test_capacity(HostShareCapacity& sr, std::string & error);
46+
bool test_capacity(HostShareCapacity& sr, std::string & error, SchedulerFailure::FailureType & ft);
4447

4548
/**
4649
* Adds a new VM to the given share by incrementing the cpu,mem and disk
@@ -178,11 +181,12 @@ class HostXML : public ObjectXML
178181
* @param sr, the host share capacity request including cpu, mem, pci
179182
* devices and numa topology
180183
* @param error error message
184+
* @param ft FailureType
181185
* @return true if the share can host the VM
182186
*/
183-
bool test_capacity(HostShareCapacity &sr, std::string & error)
187+
bool test_capacity(HostShareCapacity &sr, std::string & error, SchedulerFailure::FailureType & ft)
184188
{
185-
return share.test_capacity(sr, error);
189+
return share.test_capacity(sr, error, ft);
186190
}
187191

188192
/**
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
/* -------------------------------------------------------------------------- */
2+
/* Copyright 2002-2025, OpenNebula Project, OpenNebula Systems */
3+
/* */
4+
/* Licensed under the Apache License, Version 2.0 (the "License"); you may */
5+
/* not use this file except in compliance with the License. You may obtain */
6+
/* a copy of the License at */
7+
/* */
8+
/* http://www.apache.org/licenses/LICENSE-2.0 */
9+
/* */
10+
/* Unless required by applicable law or agreed to in writing, software */
11+
/* distributed under the License is distributed on an "AS IS" BASIS, */
12+
/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */
13+
/* See the License for the specific language governing permissions and */
14+
/* limitations under the License. */
15+
/* -------------------------------------------------------------------------- */
16+
17+
#ifndef SCHEDULER_FAILURE_H_
18+
#define SCHEDULER_FAILURE_H_
19+
20+
#include <string>
21+
#include <map>
22+
#include <set>
23+
#include <iostream>
24+
25+
class SchedulerFailure
26+
{
27+
public:
28+
/**
29+
* Type of failure
30+
*/
31+
enum FailureType
32+
{
33+
NONE = 0,
34+
HOST_NULL = 1,
35+
HOST_REQUIREMENTS = 2,
36+
HOST_CPU = 3,
37+
HOST_MEMORY = 4,
38+
HOST_NUMA = 5,
39+
HOST_PCI = 6,
40+
HOST_DISPATCH = 7,
41+
HOST_AFFINITY = 8,
42+
DS_NULL = 9,
43+
DS_CLUSTER = 10,
44+
DS_CAPACITY = 11,
45+
DS_NONE = 12,
46+
DS_MONITOR = 13,
47+
NET_NULL = 14,
48+
NET_CLUSTER = 15,
49+
NET_LEASES = 16,
50+
NET_ROLLBACK = 17
51+
};
52+
53+
/**
54+
* Return the string representation of a FailureType
55+
* @param ft the failure
56+
* @return the string
57+
*/
58+
static std::string failure_to_string(FailureType ft);
59+
60+
/**
61+
* Return the string representation of a FailureType
62+
* @param str_type string representing the FailureType
63+
* @return the FailureType (defaults to NONE)
64+
*/
65+
static FailureType str_to_failure_type(std::string& str_type);
66+
67+
/**
68+
* Logs all the failures associated to a VM that has failed to be scheduled
69+
* @param host_failures map of failures per host
70+
* @return the log
71+
*/
72+
static std::ostringstream log_failures(std::map<FailureType, std::set<int>> &host_failures);
73+
};
74+
75+
#endif // SCHEDULER_FAILURE_H_

src/schedm_mad/remotes/rank/src/pool/HostXML.cc

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,9 @@ void HostShareXML::init_attributes(ObjectXML * host)
8484
/* -------------------------------------------------------------------------- */
8585
/* -------------------------------------------------------------------------- */
8686

87-
bool HostShareXML::test_capacity(HostShareCapacity &sr, string & error)
87+
bool HostShareXML::test_capacity(HostShareCapacity &sr,
88+
string & error,
89+
SchedulerFailure::FailureType & ft)
8890
{
8991
bool pci_fit = pci.test(sr.pci);
9092
bool numa_fit = numa.test(sr);
@@ -102,18 +104,22 @@ bool HostShareXML::test_capacity(HostShareCapacity &sr, string & error)
102104

103105
if (!cpu_fit)
104106
{
107+
ft = SchedulerFailure::HOST_CPU;
105108
oss << "Not enough CPU capacity: " << sr.cpu << "/" << max_cpu - cpu_usage;
106109
}
107110
else if (!mem_fit)
108111
{
112+
ft = SchedulerFailure::HOST_MEMORY;
109113
oss << "Not enough memory: " << sr.mem << "/" << max_mem - mem_usage;
110114
}
111115
else if (!numa_fit)
112116
{
117+
ft = SchedulerFailure::HOST_NUMA;
113118
oss << "Cannot allocate NUMA topology";
114119
}
115120
else if (!pci_fit)
116121
{
122+
ft = SchedulerFailure::HOST_PCI;
117123
oss << "Unavailable PCI device.";
118124
}
119125

src/schedm_mad/remotes/rank/src/sched/SConstruct

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ import os
2121

2222
lib_name='scheduler_sched'
2323

24-
source_files=['Scheduler.cc' , 'SchedulerTemplate.cc']
24+
source_files=['Scheduler.cc', 'SchedulerFailure.cc', 'SchedulerTemplate.cc']
2525

2626
# Build library
2727
sched_env.StaticLibrary(lib_name, source_files)

0 commit comments

Comments
 (0)