diff --git a/climanet/dataset.py b/climanet/dataset.py index 9ee93b2..82663d8 100644 --- a/climanet/dataset.py +++ b/climanet/dataset.py @@ -29,6 +29,23 @@ def __init__( sh_embed_dim: int = 96, # sh_embed_dim should <= (sh_order_L + 1)**2 sh_order_L: int = 10, ): + """Initialize the dataset with daily and monthly data, land mask, and patching parameters. + + Parameters + ---------- + daily_da : xr.DataArray + Daily data array. + monthly_da : xr.DataArray + Monthly data array. + land_mask : xr.DataArray, optional + Land mask array, by default None + time_dim : str, optional + Name of the time dimension, by default "time" + spatial_dims : Tuple[str, str], optional + Names of the spatial dimensions, by default ("lat", "lon") + patch_size : Tuple[int, int], optional + Size of the patches, by default (16, 16) + """ self.spatial_dims = spatial_dims self.patch_size = patch_size self.daily_da = daily_da diff --git a/scripts/README.md b/scripts/README.md new file mode 100644 index 0000000..44dde10 --- /dev/null +++ b/scripts/README.md @@ -0,0 +1,84 @@ +# Example of training a SpatioTemporalModel on HPC + +## Folder structure + +- example_training.py: example training script +- example.slurm: example SLURM script to execute the training script on SLURM system +- eso4clima_24438134_subset.out: example SLURM job output file of an execution on a subset of the global dataset. The dataset has two years of data (2020-2021) and the spatial coverage is from 30S to 30N and from 30W to 30E. +- eso4clima_24449471_full.out: example SLURM job output file of an execution on the full dataset, two years of data (2020-2021) and almost global coverage (from 80S to 80N and from 179.99W to 179.99E). The training only executed for 1 hour and cuted off by SLURM time limit. + +## Execute training tasks on SLURM system + +1. Make a working directory + +```sh +mkdir training +cd training +``` + +2. Clone this repo +```sh +git clone git@github.com:ESMValGroup/ClimaNet.git +``` + +3. Install uv for dependency management. Se [uv doc](https://docs.astral.sh/uv/getting-started/installation/). + +4. Create a venv and install Python dependencies using uv +```sh +cd ClimaNet +``` + +``` +uv sync +``` + +A `.venv` dir will appear + +5. Copy the python script and slurm script into the working dir: + +```sh +cp ClimaNet/scripts/example* . +``` + +6. Config `example.slurm`, in the `source ...` line, make sure the venv just created is activated. + Note that the account is the ESO4CLIMA project account, which is shared by multiple users. + +7. Config `example.py`, make sure the path of input data and land mask data is correct. + +8. Execute the SLURM job +```sh +sbatch example.slurm +``` + +## Check the efficiency of resource usage + +In the SLURM job output, you can find the line like this: + +``` +==== Slurm accounting summary 23743544 ==== +JobID|NTasks|AveCPU|AveRSS|MaxRSS|MaxVMSize|TRESUsageInAve|TRESUsageInMax +23743544.extern|1|00:00:00|856K|3752K|641376K|cpu=00:00:00,energy=0,fs/disk=2332,mem=856K,pages=2,vmem=217160K|cpu=00:00:00,energy=0,fs/disk=2332,mem=3752K,pages=2,vmem=641376K +23743544.batch|1|04:21:01|11964K|4102096K|37743716K|cpu=04:21:01,energy=0,fs/disk=22293117907,mem=11964K,pages=19,vmem=356724K|cpu=04:21:01,energy=0,fs/disk=22293117907,mem=4102096K,pages=7711,vmem=37743716K +``` + +Which gives some information about the resource usage at the end of the job. + +To have a better understanding of the efficiency of resource usage, you can run the following command after the job is finished: + +```sh +sacct -j \ + --format=JobID,JobName%30,Partition,AllocCPUS,Elapsed,TotalCPU,MaxRSS,State,ExitCode \ + --parsable2 >> "eso4clima_.out" + +``` + +This will output the resource usage information and add it to the slurm job output file. After running this you can find the line like this in the output file: + +``` +JobID|JobName|Partition|AllocCPUS|Elapsed|TotalCPU|MaxRSS|State|ExitCode +23743544|eso4clima|compute|256|00:02:44|04:21:01||COMPLETED|0:0 +23743544.batch|batch||256|00:02:44|04:21:01|4102096K|COMPLETED|0:0 +23743544.extern|extern||256|00:02:44|00:00.001|3752K|COMPLETED|0:0 +``` + +The the efficiency of resource usage can be calculated as `TotalCPU / AllocCPUS * Elapsed Time`. In the example above, the CPU time is `04:21:01`, the allocated CPU is `256`, and the elapsed time is `00:02:44`, so the resource usage is `4:21:01 / 256 * 00:02:44 = 0.37`. \ No newline at end of file diff --git a/scripts/eso4clima_24438134_subset.out b/scripts/eso4clima_24438134_subset.out new file mode 100644 index 0000000..377cb42 --- /dev/null +++ b/scripts/eso4clima_24438134_subset.out @@ -0,0 +1,263 @@ +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +2026-04-23 12:05:45,831 - INFO - Creating the model... +2026-04-23 12:05:46,026 - INFO - Creating the dataset... +2026-04-23 12:06:16,938 - INFO - Starting training... +Epoch 0: best_loss = 3.379575 +Epoch 20: best_loss = 2.331017 +Epoch 40: best_loss = 2.142740 +Epoch 60: best_loss = 1.710370 +Epoch 80: best_loss = 1.189252 +Epoch 100: best_loss = 0.894883 +Epoch 120: best_loss = 0.688254 +Epoch 140: best_loss = 0.571773 +Epoch 160: best_loss = 0.505098 +Epoch 180: best_loss = 0.445357 +Epoch 200: best_loss = 0.412255 +Epoch 220: best_loss = 0.381430 +Epoch 240: best_loss = 0.361015 +Epoch 260: best_loss = 0.346520 +Epoch 280: best_loss = 0.325091 +Epoch 300: best_loss = 0.317928 +Epoch 320: best_loss = 0.310767 +Epoch 340: best_loss = 0.303774 +Epoch 360: best_loss = 0.296891 +Epoch 380: best_loss = 0.290118 +Epoch 400: best_loss = 0.285634 +Epoch 420: best_loss = 0.281635 +Epoch 440: best_loss = 0.278483 +Epoch 460: best_loss = 0.275278 +Epoch 480: best_loss = 0.272091 +Epoch 500: best_loss = 0.268902 +Training complete. Best loss: 0.268902 +Model saved to runs/best_model.pth +==== Slurm accounting summary 24438134 ==== +JobID|NTasks|AveCPU|AveRSS|MaxRSS|MaxVMSize|TRESUsageInAve|TRESUsageInMax +24438134.extern|1|00:00:00|856K|3752K|575840K|cpu=00:00:00,energy=0,fs/disk=2332,mem=856K,pages=2,vmem=217160K|cpu=00:00:00,energy=0,fs/disk=2332,mem=3752K,pages=2,vmem=575840K +24438134.batch|1|6-04:57:46|11960K|21279432K|53545480K|cpu=6-04:57:46,energy=0,fs/disk=6648661618,mem=11960K,pages=19,vmem=356728K|cpu=6-04:57:46,energy=0,fs/disk=6648661618,mem=21279432K,pages=8555,vmem=53545480K + +******************************************************************************** +* * +* This is the automated job summary provided by DKRZ. * +* If you encounter problems, need assistance or have any suggestion, please * +* write an email to * +* * +* -- support@dkrz.de -- * +* * +* We hope you enjoyed the DKRZ supercomputer LEVANTE ... * +* +* JobID : 24438134 +* JobName : eso4clima +* Account : bd0854 +* User : b383704 (202985), bd0854 (1473) +* Partition : compute +* QOS : normal +* Nodelist : l40346 (1) +* Submit date : 2026-04-23T12:03:09 +* Start time : 2026-04-23T12:04:45 +* End time : 2026-04-23T14:04:54 +* Elapsed time : 02:00:09 (Timelimit=04:00:00) +* Command : /home/b/b383704/eso4clima/train_twoyears/ +* example_subset.slurm +* WorkDir : /home/b/b383704/eso4clima/train_twoyears +* +* StepID | JobName NodeHours MaxRSS [Byte] (@task) +* ------------------------------------------------------------------------------ +* batch | batch 2.0 +* extern | extern 2.0 3752K (0) +* ------------------------------------------------------------------------------ + +JobID|JobName|Partition|AllocCPUS|Elapsed|TotalCPU|MaxRSS|State|ExitCode +24438134|eso4clima|compute|256|02:00:09|6-04:57:47||COMPLETED|0:0 +24438134.batch|batch||256|02:00:09|6-04:57:47|21279432K|COMPLETED|0:0 +24438134.extern|extern||256|02:00:09|00:00.001|3752K|COMPLETED|0:0 diff --git a/scripts/eso4clima_24449471_full.out b/scripts/eso4clima_24449471_full.out new file mode 100644 index 0000000..c5b28bb --- /dev/null +++ b/scripts/eso4clima_24449471_full.out @@ -0,0 +1,232 @@ +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:39: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + daily_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lat" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +/home/b/b383704/eso4clima/train_twoyears/example_training_subset.py:52: UserWarning: The specified chunks separate the stored chunks along dimension "lon" starting at index 240. This could degrade performance. Instead, consider rechunking after loading. + monthly_data = xr.open_mfdataset( +2026-04-23 17:37:53,532 - INFO - Creating the model... +2026-04-23 17:37:53,750 - INFO - Creating the dataset... +/home/b/b383704/eso4clima/ClimaNet/climanet/dataset.py:112: UserWarning: Patch size (120, 120) does not evenly divide image dimensions (H=720, W=640). Uncovered pixels: 0 in height, 40 in width. Consider adjusting patch_size or image dimensions for full coverage. + warnings.warn( +2026-04-23 17:38:52,233 - INFO - Starting training... +Epoch 0: best_loss = 13.395518 +Epoch 20: best_loss = 5.020292 +slurmstepd: error: *** JOB 24449471 ON l10543 CANCELLED AT 2026-04-23T18:37:47 DUE TO TIME LIMIT *** + +******************************************************************************** +* * +* This is the automated job summary provided by DKRZ. * +* If you encounter problems, need assistance or have any suggestion, please * +* write an email to * +* * +* -- support@dkrz.de -- * +* * +* We hope you enjoyed the DKRZ supercomputer LEVANTE ... * +* +* JobID : 24449471 +* JobName : eso4clima +* Account : bd0854 +* User : b383704 (202985), bd0854 (1473) +* Partition : compute +* QOS : normal +* Nodelist : l10543 (1) +* Submit date : 2026-04-23T17:37:01 +* Start time : 2026-04-23T17:37:37 +* End time : 2026-04-23T18:37:47 +* Elapsed time : 01:00:10 (Timelimit=01:00:00) +* Command : /home/b/b383704/eso4clima/train_twoyears/ +* example_subset.slurm +* WorkDir : /home/b/b383704/eso4clima/train_twoyears +* +* StepID | JobName NodeHours MaxRSS [Byte] (@task) +* ------------------------------------------------------------------------------ +* batch | batch 1.0 +* extern | extern 1.0 3812K (0) +* ------------------------------------------------------------------------------ + diff --git a/scripts/example.slurm b/scripts/example.slurm new file mode 100644 index 0000000..01b3d96 --- /dev/null +++ b/scripts/example.slurm @@ -0,0 +1,18 @@ +#!/bin/bash +#SBATCH --job-name=eso4clima +#SBATCH --partition=compute +#SBATCH --nodes=1 +#SBATCH --ntasks-per-node=128 +#SBATCH --time=04:00:00 +#SBATCH --account=bd0854 +#SBATCH --output=eso4clima_%j.out + +source /home/b/b383704/eso4clima/ClimaNet/.venv/bin/activate + +# Run the training script +python -u /home/b/b383704/eso4clima/train_twoyears/example_training.py + +echo "==== Slurm accounting summary ${SLURM_JOB_ID} ====" +sstat --allsteps -j "$SLURM_JOB_ID" \ + --format=JobID,NTasks,AveCPU,AveRSS,MaxRSS,MaxVMSize,TresUsageInAve,TresUsageInMax \ + --parsable2 \ No newline at end of file diff --git a/scripts/example_training.py b/scripts/example_training.py new file mode 100644 index 0000000..0c32d92 --- /dev/null +++ b/scripts/example_training.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python3 +from pathlib import Path +import xarray as xr +import torch +import torch.nn.functional +from climanet.st_encoder_decoder import SpatioTemporalModel +from climanet.utils import ( + set_seed, + configure_compute_resources, +) +from climanet.train import train_monthly_model +from climanet import STDataset + +from torch.utils.data import random_split + + +def main(): + # Data settings + # Data folder + data_folder = Path(" /work/bd0854/b380103/eso4clima/output/concatenated/") + # Path to land-sea mask file (need to setup in the experiment directory) + lsm_file = "/home/b/b383704/eso4clima/data/era5_lsm_bool.nc" + # Must be divisible by the model patch size + # Default input data has 720x1440 spatial dimensions + + # Training settings + patch_size_model = (1, 4, 4) # Size of model encoder (time, lat, lon). + num_patches = (60, 60) # Number of patches in spatial dimensions + spatial_patch_size = ( + patch_size_model[1] * num_patches[0], + patch_size_model[2] * num_patches[1], + ) # Spatial dimensions of the input data + stride = (spatial_patch_size[0] // 5, spatial_patch_size[1] // 5) + overlap = 2 # Overlap between patches (in pixels). + num_months = 24 # Number of months to predict (model output channels) + embed_dim = 64 + dropout = 0.2 + hidden = 64 + batch_size = 10 # Number of samples per batch in training + num_epoch = 501 # Maximum number of epochs to train + accumulation_steps = 2 # Number of batches to accumulate gradients over + sh_embed_dim = (96,) + sh_order_L = (10,) + run_dir = "./runs" # Directory to save logs and model checkpoints + + # Get list of daily and monthly files, sort by time + daily_files = sorted(data_folder.rglob("20*day_ERA5dc_masked_tos.nc")) + monthly_files = sorted(data_folder.rglob("20*mon_ERA5dc_full_tos.nc")) + + # Set seed for reproducibility + set_seed() + + # Open datasets with chunks + # The chunk sizes are chosen as twice the sample patch size + daily_data = xr.open_mfdataset( + daily_files, + combine="by_coords", + chunks={ + "time": 1, + "lat": spatial_patch_size[0] * 2, + "lon": spatial_patch_size[1] * 2, + }, + data_vars="minimal", + coords="minimal", + compat="override", + parallel=False, + ) + monthly_data = xr.open_mfdataset( + monthly_files, + combine="by_coords", + chunks={ + "time": 1, + "lat": spatial_patch_size[0] * 2, + "lon": spatial_patch_size[1] * 2, + }, + data_vars="minimal", + coords="minimal", + compat="override", + parallel=False, + ) + lsm_mask = xr.open_dataset(lsm_file) + + # create the model + print("Creating the model...") + model = SpatioTemporalModel( + patch_size=patch_size_model, + overlap=overlap, + num_months=num_months, + embed_dim=embed_dim, + dropout=dropout, + hidden=hidden, + ) + + # Make a dataset + print("Creating the dataset...") + dataset = STDataset( + daily_da=daily_data["ts"], + monthly_da=monthly_data["ts"], + land_mask=lsm_mask["lsm"], + patch_size=spatial_patch_size, # based on the patch_size in model + stride=stride, + sh_embed_dim=sh_embed_dim, + sh_order_L=sh_order_L, + ) + print(f"Total length training dataset: {len(dataset)}") + + # create train test data + generator = torch.Generator().manual_seed(42) + train_size = int(0.6 * len(dataset)) + validation_size = int(0.3 * len(dataset)) + test_size = len(dataset) - train_size - validation_size + train_dataset, validation_dataset, test_dataset = random_split( + dataset, [train_size, validation_size, test_size], generator=generator + ) + print( + f"Train dataset length: {len(train_dataset)}, Validation dataset length: {len(validation_dataset)}, Test dataset length: {len(test_dataset)}" + ) + + # Device and resources + model = configure_compute_resources( + model, device="cpu", compute_threads=96, dataloader_num_workers=32 + ) + + # Train the model + # Results will be saved to runs/best_model.pth + print("Starting training...") + _ = train_monthly_model( + model, + train_dataset, + validation_dataset=validation_dataset, + batch_size=batch_size, + num_epoch=num_epoch, + accumulation_steps=accumulation_steps, + run_dir=run_dir, + ) + + +if __name__ == "__main__": + main()