Skip to content

Commit 71018af

Browse files
committed
Add User Cancel Event to Journal, Server cancel replaced with Abort
1 parent 89153b9 commit 71018af

File tree

24 files changed

+347
-84
lines changed

24 files changed

+347
-84
lines changed

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
## v0.26.0
2+
3+
### New features
4+
* Distinguished cancellation of a task
5+
* Aborted state: Task canceled by HQ due to failure of a dependency or reaching fail limit of a job
6+
* Canceled state: Task canceled by user
7+
18
## Unreleased
29

310
### Breaking change

crates/hyperqueue/src/client/commands/journal/output.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,10 @@ fn format_payload(event: EventPayload) -> serde_json::Value {
103103
"job": task_id.job_id(),
104104
"task": task_id.job_task_id(),
105105
}),
106+
EventPayload::TasksAborted { task_ids } => json!({
107+
"type": "task-aborted",
108+
"tasks": task_ids,
109+
}),
106110
EventPayload::TasksCanceled { task_ids } => json!({
107111
"type": "task-canceled",
108112
"tasks": task_ids,
@@ -156,6 +160,16 @@ fn format_payload(event: EventPayload) -> serde_json::Value {
156160
"job_id": job_id
157161
})
158162
}
163+
EventPayload::JobCancel {
164+
job_id,
165+
cancel_reason,
166+
} => {
167+
json!({
168+
"type": "job-cancel",
169+
"job_id": job_id,
170+
"reason": cancel_reason
171+
})
172+
}
159173
EventPayload::TaskNotify(notify) => {
160174
json!({
161175
"task_id": notify.task_id,

crates/hyperqueue/src/client/commands/journal/report.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@ struct JournalStats {
150150
enum TaskStartStop {
151151
Start,
152152
Stop { start_time: TimeDelta, fail: bool },
153+
Abort,
153154
Cancel,
154155
}
155156

@@ -234,6 +235,7 @@ impl JournalStats {
234235
EventPayload::JobCompleted(_) => {}
235236
EventPayload::JobOpen(_, _) => {}
236237
EventPayload::JobClose(_) => {}
238+
EventPayload::JobCancel { .. } => {}
237239
EventPayload::TaskStarted {
238240
task_id,
239241
worker_ids,
@@ -277,6 +279,19 @@ impl JournalStats {
277279
);
278280
}
279281
}
282+
EventPayload::TasksAborted { task_ids, .. } => {
283+
for task_id in task_ids {
284+
if let Some((_, workers, rv_id)) = jstats.running_tasks.remove(&task_id) {
285+
jstats.task_start_stop(
286+
time,
287+
task_id,
288+
rv_id,
289+
&workers,
290+
TaskStartStop::Abort,
291+
);
292+
}
293+
}
294+
}
280295
EventPayload::TasksCanceled { task_ids, .. } => {
281296
for task_id in task_ids {
282297
if let Some((_, workers, rv_id)) = jstats.running_tasks.remove(&task_id) {

crates/hyperqueue/src/client/commands/wait.rs

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@ use std::os::unix::ffi::OsStrExt;
44
use std::process::Command;
55

66
use crate::client::output::cli::{
7-
TASK_COLOR_CANCELED, TASK_COLOR_FAILED, TASK_COLOR_FINISHED, TASK_COLOR_RUNNING,
8-
job_progress_bar,
7+
TASK_COLOR_ABORTED, TASK_COLOR_CANCELED, TASK_COLOR_FAILED, TASK_COLOR_FINISHED,
8+
TASK_COLOR_RUNNING, job_progress_bar,
99
};
1010
use crate::client::status::is_terminated;
1111
use crate::common::utils::str::pluralize;
@@ -142,6 +142,7 @@ pub async fn wait_for_jobs_with_progress(
142142
add_count(counters.n_finished_tasks, "FINISHED", TASK_COLOR_FINISHED);
143143
add_count(counters.n_failed_tasks, "FAILED", TASK_COLOR_FAILED);
144144
add_count(counters.n_canceled_tasks, "CANCELED", TASK_COLOR_CANCELED);
145+
add_count(counters.n_aborted_tasks, "ABORTED", TASK_COLOR_ABORTED);
145146

146147
// \x1b[2K clears the line
147148
print!(
@@ -188,7 +189,30 @@ pub async fn wait_for_jobs_with_progress(
188189
counters.n_canceled_tasks += 1;
189190
}
190191
}
191-
_ => {}
192+
EventPayload::TasksAborted { task_ids } => {
193+
for task_id in task_ids {
194+
if running_tasks.remove(task_id) {
195+
counters.n_running_tasks -= 1;
196+
}
197+
counters.n_aborted_tasks += 1;
198+
}
199+
}
200+
EventPayload::WorkerConnected(..)
201+
| EventPayload::WorkerLost(..)
202+
| EventPayload::WorkerOverviewReceived(..)
203+
| EventPayload::Submit { .. }
204+
| EventPayload::JobOpen(..)
205+
| EventPayload::JobClose(..)
206+
| EventPayload::JobIdle(..)
207+
| EventPayload::JobCancel { .. }
208+
| EventPayload::AllocationQueueCreated(..)
209+
| EventPayload::AllocationQueueRemoved(_)
210+
| EventPayload::AllocationQueued { .. }
211+
| EventPayload::AllocationStarted(_, _)
212+
| EventPayload::AllocationFinished(_, _)
213+
| EventPayload::ServerStart { .. }
214+
| EventPayload::ServerStop
215+
| EventPayload::TaskNotify(..) => {}
192216
},
193217
_ => {
194218
log::warn!("Unexpected message from server: {:?}", &msg);

crates/hyperqueue/src/client/output/cli.rs

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ use tako::{Map, format_comma_delimited};
5252

5353
pub const TASK_COLOR_CANCELED: Colorization = Colorization::Magenta;
5454
pub const TASK_COLOR_FAILED: Colorization = Colorization::Red;
55+
pub const TASK_COLOR_ABORTED: Colorization = Colorization::BrightRed;
5556
pub const TASK_COLOR_FINISHED: Colorization = Colorization::Green;
5657
pub const TASK_COLOR_RUNNING: Colorization = Colorization::Yellow;
5758
pub const TASK_COLOR_INVALID: Colorization = Colorization::BrightRed;
@@ -1278,32 +1279,38 @@ fn job_status_to_cell(info: &JobInfo) -> String {
12781279
&mut result,
12791280
"RUNNING",
12801281
info.counters.n_running_tasks,
1281-
colored::Color::Yellow,
1282+
TASK_COLOR_RUNNING,
12821283
);
12831284
row(
12841285
&mut result,
12851286
"FAILED",
12861287
info.counters.n_failed_tasks,
1287-
colored::Color::Red,
1288+
TASK_COLOR_FAILED,
12881289
);
12891290
row(
12901291
&mut result,
12911292
"FINISHED",
12921293
info.counters.n_finished_tasks,
1293-
colored::Color::Green,
1294+
TASK_COLOR_FINISHED,
12941295
);
12951296
row(
12961297
&mut result,
12971298
"CANCELED",
12981299
info.counters.n_canceled_tasks,
1299-
colored::Color::Magenta,
1300+
TASK_COLOR_CANCELED,
13001301
);
13011302
row(
13021303
&mut result,
13031304
"WAITING",
13041305
info.counters.n_waiting_tasks(info.n_tasks),
13051306
colored::Color::Cyan,
13061307
);
1308+
row(
1309+
&mut result,
1310+
"ABORTED",
1311+
info.counters.n_aborted_tasks,
1312+
TASK_COLOR_ABORTED,
1313+
);
13071314
result
13081315
}
13091316

@@ -1343,6 +1350,7 @@ pub fn job_progress_bar(counters: JobTaskCounters, n_tasks: JobTaskCount, width:
13431350
(counters.n_failed_tasks, TASK_COLOR_FAILED),
13441351
(counters.n_finished_tasks, TASK_COLOR_FINISHED),
13451352
(counters.n_running_tasks, TASK_COLOR_RUNNING),
1353+
(counters.n_aborted_tasks, TASK_COLOR_ABORTED),
13461354
];
13471355

13481356
let chars = |count: JobTaskCount| {
@@ -1432,6 +1440,7 @@ fn status_to_cell(status: &Status) -> CellStruct {
14321440
Status::Failed => "FAILED".cell().foreground_color(Some(Color::Red)),
14331441
Status::Running => "RUNNING".cell().foreground_color(Some(Color::Yellow)),
14341442
Status::Canceled => "CANCELED".cell().foreground_color(Some(Color::Magenta)),
1443+
Status::Aborted => "ABORTED".cell().foreground_color(Some(Color::Blue)),
14351444
}
14361445
}
14371446

@@ -1537,6 +1546,10 @@ fn get_task_time(state: &JobTaskState) -> (Option<DateTime<Utc>>, Option<DateTim
15371546
JobTaskState::Canceled {
15381547
started_data: Some(started_data),
15391548
cancelled_date,
1549+
}
1550+
| JobTaskState::Aborted {
1551+
started_data: Some(started_data),
1552+
cancelled_date,
15401553
} => (Some(started_data.start_date), Some(*cancelled_date)),
15411554
JobTaskState::Running { started_data, .. } => (Some(started_data.start_date), None),
15421555
JobTaskState::Finished {
@@ -1553,6 +1566,10 @@ fn get_task_time(state: &JobTaskState) -> (Option<DateTime<Utc>>, Option<DateTim
15531566
started_data: None,
15541567
cancelled_date: _,
15551568
}
1569+
| JobTaskState::Aborted {
1570+
started_data: None,
1571+
cancelled_date: _,
1572+
}
15561573
| JobTaskState::Failed {
15571574
started_data: None, ..
15581575
}

crates/hyperqueue/src/client/output/common.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,11 +115,12 @@ impl From<VerbosityFlag> for Verbosity {
115115
}
116116
}
117117

118-
pub const JOB_SUMMARY_STATUS_ORDER: [Status; 5] = [
118+
pub const JOB_SUMMARY_STATUS_ORDER: [Status; 6] = [
119119
Status::Running,
120120
Status::Waiting,
121121
Status::Finished,
122122
Status::Failed,
123+
Status::Aborted,
123124
Status::Canceled,
124125
];
125126

crates/hyperqueue/src/client/output/json.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -408,6 +408,7 @@ fn format_job_info(info: &JobInfo) -> Value {
408408
"finished": counters.n_finished_tasks,
409409
"failed": counters.n_failed_tasks,
410410
"canceled": counters.n_canceled_tasks,
411+
"aborted": counters.n_aborted_tasks,
411412
"waiting": counters.n_waiting_tasks(*n_tasks)
412413
}),
413414
"cancel_reason": cancel_reason,
@@ -424,6 +425,7 @@ fn format_tasks(tasks: &[(JobTaskId, JobTaskInfo)], map: TaskToPathsMap) -> Valu
424425
JobTaskState::Finished { .. } => "finished",
425426
JobTaskState::Failed { .. } => "failed",
426427
JobTaskState::Canceled { .. } => "canceled",
428+
JobTaskState::Aborted { .. } => "aborted",
427429
};
428430
let mut data = json!({
429431
"id": *task_id,

crates/hyperqueue/src/client/output/quiet.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ impl Output for Quiet {
9393
Status::Failed => "FAILED",
9494
Status::Canceled => "CANCELED",
9595
Status::Opened => "OPENED",
96+
Status::Aborted => "ABORTED",
9697
};
9798

9899
println!("{status} {count}");
@@ -174,5 +175,6 @@ fn format_status(status: &Status) -> &str {
174175
Status::Failed => "FAILED",
175176
Status::Canceled => "CANCELED",
176177
Status::Opened => "OPENED",
178+
Status::Aborted => "ABORTED",
177179
}
178180
}

crates/hyperqueue/src/client/status.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ pub enum Status {
1111
Finished,
1212
Failed,
1313
Canceled,
14+
Aborted,
1415
Opened,
1516
}
1617

@@ -23,6 +24,8 @@ pub fn job_status(info: &JobInfo) -> Status {
2324
Status::Waiting
2425
} else if info.counters.n_failed_tasks > 0 {
2526
Status::Failed
27+
} else if info.counters.n_aborted_tasks > 0 {
28+
Status::Aborted
2629
} else if info.counters.n_canceled_tasks > 0 {
2730
Status::Canceled
2831
} else {
@@ -47,5 +50,6 @@ pub fn get_task_status(status: &JobTaskState) -> Status {
4750
JobTaskState::Finished { .. } => Status::Finished,
4851
JobTaskState::Failed { .. } => Status::Failed,
4952
JobTaskState::Canceled { .. } => Status::Canceled,
53+
JobTaskState::Aborted { .. } => Status::Aborted,
5054
}
5155
}

crates/hyperqueue/src/dashboard/data/timelines/job_timeline.rs

Lines changed: 26 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ pub enum DashboardTaskState {
3131
Running,
3232
Finished,
3333
Failed,
34+
Aborted,
3435
Canceled,
3536
}
3637

@@ -115,6 +116,16 @@ impl JobTimeline {
115116
event.time,
116117
);
117118
}
119+
EventPayload::TasksAborted { task_ids } => {
120+
for task_id in task_ids {
121+
update_task_status(
122+
&mut self.job_timeline,
123+
*task_id,
124+
DashboardTaskState::Aborted,
125+
event.time,
126+
);
127+
}
128+
}
118129
EventPayload::TasksCanceled { task_ids } => {
119130
for task_id in task_ids {
120131
update_task_status(
@@ -125,20 +136,21 @@ impl JobTimeline {
125136
);
126137
}
127138
}
128-
EventPayload::WorkerConnected(_, _) => {}
129-
EventPayload::WorkerLost(_, _) => {}
130-
EventPayload::WorkerOverviewReceived(_) => {}
131-
EventPayload::JobOpen(_, _) => {}
132-
EventPayload::JobClose(_) => {}
133-
EventPayload::JobIdle(_) => {}
134-
EventPayload::AllocationQueueCreated(_, _) => {}
135-
EventPayload::AllocationQueueRemoved(_) => {}
136-
EventPayload::AllocationQueued { .. } => {}
137-
EventPayload::AllocationStarted(_, _) => {}
138-
EventPayload::AllocationFinished(_, _) => {}
139-
EventPayload::ServerStart { .. } => {}
140-
EventPayload::ServerStop => {}
141-
EventPayload::TaskNotify(_) => {}
139+
EventPayload::WorkerConnected(_, _)
140+
| EventPayload::WorkerLost(_, _)
141+
| EventPayload::WorkerOverviewReceived(_)
142+
| EventPayload::JobOpen(_, _)
143+
| EventPayload::JobClose(_)
144+
| EventPayload::JobIdle(_)
145+
| EventPayload::JobCancel { .. }
146+
| EventPayload::AllocationQueueCreated(_, _)
147+
| EventPayload::AllocationQueueRemoved(_)
148+
| EventPayload::AllocationQueued { .. }
149+
| EventPayload::AllocationStarted(_, _)
150+
| EventPayload::AllocationFinished(_, _)
151+
| EventPayload::ServerStart { .. }
152+
| EventPayload::ServerStop
153+
| EventPayload::TaskNotify(_) => {}
142154
}
143155
}
144156
}

0 commit comments

Comments
 (0)