Skip to content

Commit d67bb6f

Browse files
spiraliKobzol
authored andcommitted
Fix mn tasks defined in job file
1 parent 267e143 commit d67bb6f

File tree

4 files changed

+26
-3
lines changed

4 files changed

+26
-3
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
## Unreleased
2+
3+
### Fixes
4+
* Fixes Multi-node tasks defined in job file.
5+
16
## v0.25.0
27

38
### Breaking change

crates/hyperqueue/src/client/commands/submit/command.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -695,8 +695,6 @@ pub async fn submit_computation(
695695
.unwrap_or_else(|| "job".to_string())
696696
};
697697

698-
// Force task_dir for multi node tasks (for a place where to create node file)
699-
let task_dir = task_dir | (resources.n_nodes > 0);
700698
let resources = ResourceRequestVariants::new(smallvec![resources]);
701699

702700
let args: Vec<BString> = commands.into_iter().map(|arg| arg.into()).collect();

crates/hyperqueue/src/worker/start/program.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ pub(super) fn build_program_task(
8686

8787
pin_program(&mut program, build_ctx.allocation(), pin_mode, &build_ctx)?;
8888

89-
let task_dir = if task_dir {
89+
let task_dir = if task_dir || !build_ctx.node_list().is_empty() {
9090
let task_dir = TempDir::with_prefix_in("t", &build_ctx.worker_configuration().work_dir)
9191
.map_err(|error| {
9292
format!(

tests/test_jobfile.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -440,3 +440,23 @@ def test_job_file_stream(hq_env: HqEnv, tmp_path):
440440
wait_for_job_state(hq_env, 1, "FINISHED")
441441
result = hq_env.command(["output-log", "output", "cat", "1", "stdout"])
442442
assert result == "Hello\n"
443+
444+
445+
def test_job_file_multinode(hq_env: HqEnv, tmp_path):
446+
hq_env.start_server()
447+
hq_env.start_worker()
448+
hq_env.start_worker()
449+
tmp_path.joinpath("job.toml").write_text(
450+
"""
451+
[[task]]
452+
id = 0
453+
command = ["bash", "-c", "echo ${HQ_NUM_NODES}; cat ${HQ_NODE_FILE}"]
454+
[[task.request]]
455+
n_nodes = 2
456+
"""
457+
)
458+
hq_env.command(["job", "submit-file", "job.toml"])
459+
wait_for_job_state(hq_env, 1, "FINISHED")
460+
with open(default_task_output(1)) as f:
461+
lines = sorted(f.read().rstrip().split("\n"))
462+
assert lines == ["2", "worker1", "worker2"]

0 commit comments

Comments
 (0)