From 6abf8e8f2f879b0e0dfbad05bd9b7bf722662a08 Mon Sep 17 00:00:00 2001 From: "u.sinha@fz-juelich.de" Date: Fri, 8 May 2020 14:31:43 +0200 Subject: [PATCH 001/106] Modified moving window --- epoch2d/src/boundary.F90 | 55 ++++++++++ epoch2d/src/epoch2d.F90 | 20 ++++ epoch2d/src/housekeeping/window.F90 | 149 +++++++++++++++++++++++----- 3 files changed, 199 insertions(+), 25 deletions(-) diff --git a/epoch2d/src/boundary.F90 b/epoch2d/src/boundary.F90 index 2aaf15dce..d1e135e73 100644 --- a/epoch2d/src/boundary.F90 +++ b/epoch2d/src/boundary.F90 @@ -314,7 +314,62 @@ SUBROUTINE do_field_mpi_with_lengths(field, ng, nx_local, ny_local) END SUBROUTINE do_field_mpi_with_lengths + SUBROUTINE moving_window_field_bc(fieldx, fieldy, fieldz, & + ng, nx_local, ny_local) + INTEGER, INTENT(IN) :: ng, nx_local, ny_local + REAL(num), DIMENSION(1-ng:, 1-ng:), INTENT(INOUT) :: fieldx, fieldy, fieldz + !INTEGER :: sendrequest1, recvrequest1 + INTEGER :: i, j, n, xlength + INTEGER, DIMENSION(MPI_STATUS_SIZE) :: status + REAL(num), DIMENSION(:), ALLOCATABLE :: x_temp, tempx + + + xlength = 3 * ng * (ny_local + 2 * ng) + 4 + + ALLOCATE(x_temp(xlength)) + ALLOCATE(tempx(xlength)) + + n = 0 + + DO j = 1-ng, ny_local + ng + DO i = 1, ng + x_temp(i + n + (j - 1 + ng) * 3 * ng) = fieldx(i,j) + x_temp(i + n + 1 + (j - 1 + ng) * 3 * ng) = fieldy(i,j) + x_temp(i + n + 2 + (j - 1 + ng) * 3 * ng) = fieldz(i,j) + n = n + 2 + END DO + n = 0 + END DO + + CALL MPI_SENDRECV(x_temp, xlength, MPI_REAL, proc_x_min, & + tag, tempx, xlength, MPI_REAL, proc_x_max, tag, comm, status, errcode) + + !CALL MPI_ISEND(tempx, xlength, MPI_REAL, proc_x_min, tag, comm, & + ! sendrequest1, errcode) + !CALL MPI_IRECV(x_temp, xlength, MPI_REAL, proc_x_max, tag, comm, & + ! recvrequest1, errcode) + + !CALL MPI_WAIT(sendrequest1, status, errcode) + !CALL MPI_WAIT(recvrequest1, status, errcode) + + IF (.NOT. x_max_boundary .OR. bc_field(c_bd_x_max) == c_bc_periodic) THEN + n = 0 + DO j = 1-ng, ny_local + ng + DO i = nx_local + 1, nx_local + ng + fieldx(i, j) = tempx(i - nx_local + n + (j - 1 + ng) * 3 * ng) + fieldy(i, j) = tempx(i - nx_local + n + 1 + (j - 1 + ng) * 3 * ng) + fieldz(i, j) = tempx(i - nx_local + n + 2 + (j - 1 + ng) * 3 * ng) + n = n + 2 + END DO + n = 0 + END DO + END IF + + DEALLOCATE(tempx) + DEALLOCATE(x_temp) + + END SUBROUTINE moving_window_field_bc SUBROUTINE do_field_mpi_with_lengths_r4(field, ng, nx_local, ny_local) diff --git a/epoch2d/src/epoch2d.F90 b/epoch2d/src/epoch2d.F90 index 4daa8c1ec..d76967e4f 100644 --- a/epoch2d/src/epoch2d.F90 +++ b/epoch2d/src/epoch2d.F90 @@ -13,6 +13,10 @@ ! You should have received a copy of the GNU General Public License ! along with this program. If not, see . +#ifdef SCOREP_USER_ENABLE +#include "scorep/SCOREP_User.inc" +#endif + PROGRAM pic ! EPOCH2D is a Birdsall and Langdon type PIC code derived from the PSC @@ -67,6 +71,13 @@ PROGRAM pic CHARACTER(LEN=64) :: timestring REAL(num) :: runtime, dt_store +#ifdef SCOREP_USER_ENABLE + SCOREP_USER_REGION_DEFINE( main_loop ) + INTEGER, PARAMETER :: reg_type = SCOREP_USER_REGION_TYPE_LOOP + & + SCOREP_USER_REGION_TYPE_DYNAMIC +#endif + + step = 0 time = 0.0_num @@ -186,6 +197,10 @@ PROGRAM pic IF (timer_collect) CALL timer_start(c_timer_step) DO +#ifdef SCOREP_USER_ENABLE +SCOREP_USER_REGION_BEGIN( main_loop, "main_loop", reg_type ) +#endif #SCOREP_USER_ENABLE + IF (timer_collect) THEN CALL timer_stop(c_timer_step) CALL timer_reset @@ -255,6 +270,11 @@ PROGRAM pic CALL update_eb_fields_final CALL moving_window + +#ifdef SCOREP_USER_ENABLE +SCOREP_USER_REGION_END(main_loop) +#endif #SCOREP_USER_ENABLE + END DO IF (rank == 0) runtime = MPI_WTIME() - walltime_started diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index d907916b8..0aded22f3 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -67,32 +67,108 @@ SUBROUTINE shift_window(window_shift_cells) ! Shift the window round one cell at a time. ! Inefficient, but it works - DO iwindow = 1, window_shift_cells - CALL insert_particles + !DO iwindow = 1, window_shift_cells + CALL insert_particles - ! Shift the box around - x_grid_min = x_global(1) + dx - xb_min = xb_global(1) + dx - x_min = xb_min + dx * cpml_thickness + ! Shift the box around + ! x_grid_min = x_global(1) + dx + ! xb_min = xb_global(1) + dx + ! x_min = xb_min + dx * cpml_thickness + + x_grid_min = x_global(1) + ng * dx + xb_min = xb_global(1) + ng * dx + x_min = xb_min + ng * dx * cpml_thickness ! Setup global grid - DO ix = 1-ng, nx_global + ng - x_global(ix) = x_grid_min + (ix - 1) * dx - xb_global(ix) = xb_min + (ix - 1) * dx - END DO - x_grid_max = x_global(nx_global) - x_max = xb_global(nx_global+1) - dx * cpml_thickness + DO ix = 1-ng, nx_global + ng + x_global(ix) = x_grid_min + (ix - 1) * dx + xb_global(ix) = xb_min + (ix - 1) * dx + END DO + x_grid_max = x_global(nx_global) + x_max = xb_global(nx_global+1) - dx * cpml_thickness - CALL setup_grid_x + CALL setup_grid_x - CALL remove_particles + CALL remove_particles - ! Shift fields around - CALL shift_fields - END DO + ! Shift fields around + !CALL shift_fields + CALL moving_window_shift_fields + !END DO END SUBROUTINE shift_window + SUBROUTINE moving_window_shift_fields + + INTEGER :: j, xlength, jxlength + !REAL(num), DIMENSION(:), ALLOCATABLE :: tempex, tempbx, tempjx + + xlength = 3 * ng * (ny + 2 * ng) + jxlength = 3 * jng * (ny + 2 * jng) + + !ALLOCATE(tempex(xlength)) + !ALLOCATE(tempbx(xlength)) + !ALLOCATE(tempjx(jxlength)) + + CALL moving_window_shift_field(ex, ey, ez, ng) + CALL moving_window_field_bc(ex, ey, ez, ng, nx, ny) + CALL moving_window_shift_field(bx, by, bz, ng) + CALL moving_window_field_bc(bx, by, bz, ng, nx, ny) + CALL moving_window_shift_field(jx, jy, jz, jng) + CALL moving_window_field_bc(jx, jy, jz, jng, nx, ny) + + IF (cpml_boundaries) THEN + CALL shift_field(cpml_psi_eyx, ng) + CALL shift_field(cpml_psi_ezx, ng) + CALL shift_field(cpml_psi_byx, ng) + CALL shift_field(cpml_psi_bzx, ng) + + CALL shift_field(cpml_psi_exy, ng) + CALL shift_field(cpml_psi_ezy, ng) + CALL shift_field(cpml_psi_bxy, ng) + CALL shift_field(cpml_psi_bzy, ng) + END IF + + IF (x_max_boundary) THEN + DO j = 1-ng, ny+ng + ! Fix incoming field cell. + ex(nx,j) = ex_x_max(j) + ex(nx+1,j) = ex_x_max(j) + ey(nx+1,j) = ey_x_max(j) + ez(nx+1,j) = ez_x_max(j) + ex(nx-1,j) = 0.5_num * (ex(nx-2,j) + ex(nx,j)) + ey(nx,j) = 0.5_num * (ey(nx-1,j) + ey(nx+1,j)) + ez(nx,j) = 0.5_num * (ez(nx-1,j) + ez(nx+1,j)) + bx(nx+1,j) = bx_x_max(j) + by(nx,j) = by_x_max(j) + bz(nx,j) = bz_x_max(j) + bx(nx,j) = 0.5_num * (bx(nx-1,j) + bx(nx+1,j)) + by(nx-1,j) = 0.5_num * (by(nx-2,j) + by(nx,j)) + bz(nx-1,j) = 0.5_num * (bz(nx-2,j) + bz(nx,j)) + END DO + + IF (cpml_boundaries) THEN + DO j = 1-ng, ny+ng + cpml_psi_eyx(nx:nx+1,j) = cpml_psi_eyx(nx,j) + cpml_psi_ezx(nx:nx+1,j) = cpml_psi_ezx(nx,j) + cpml_psi_byx(nx:nx+1,j) = cpml_psi_byx(nx,j) + cpml_psi_bzx(nx:nx+1,j) = cpml_psi_bzx(nx,j) + + cpml_psi_exy(nx:nx+1,j) = cpml_psi_exy(nx,j) + cpml_psi_ezy(nx:nx+1,j) = cpml_psi_ezy(nx,j) + cpml_psi_bxy(nx:nx+1,j) = cpml_psi_bxy(nx,j) + cpml_psi_bzy(nx:nx+1,j) = cpml_psi_bzy(nx,j) + END DO + + END IF + END IF + + !DEALLOCATE(tempex) + !DEALLOCATE(tempbx) + !DEALLOCATE(tempjx) + + END SUBROUTINE moving_window_shift_fields + SUBROUTINE shift_fields @@ -159,7 +235,6 @@ SUBROUTINE shift_fields END SUBROUTINE shift_fields - SUBROUTINE shift_field(field, ng) INTEGER, INTENT(IN) :: ng @@ -167,16 +242,37 @@ SUBROUTINE shift_field(field, ng) INTEGER :: i, j ! Shift field to the left by one cell + ! Begin changes by U. Sinha + ! Shift field to the left by ng cells DO j = 1-ng, ny+ng - DO i = 1-ng, nx+ng-1 - field(i,j) = field(i+1,j) + !DO i = 1-ng, nx+ng-1 + DO i = 1-ng, nx + !field(i,j) = field(i+1,j) + field(i,j) = field(i+ng, j) END DO END DO - + ! End changes by U. Sinha CALL field_bc(field, ng) END SUBROUTINE shift_field + SUBROUTINE moving_window_shift_field(fieldx, fieldy, fieldz, ng) + + INTEGER :: i, j + INTEGER, INTENT(IN) :: ng + REAL(num), DIMENSION(1-ng:, 1-ng:), INTENT(INOUT) :: fieldx, fieldy, fieldz + + DO j = 1- ng, ny + ng + DO i = 1 - ng, nx + fieldx(i, j) = fieldx(i + ng, j) + fieldy(i, j) = fieldy(i + ng, j) + fieldz(i, j) = fieldz(i + ng, j) + END DO + END DO + + END SUBROUTINE moving_window_shift_field + + SUBROUTINE insert_particles @@ -250,10 +346,12 @@ SUBROUTINE insert_particles wdata = dx * dy / (npart_per_cell + n_frac) - DO ipart = 1, npart_per_cell + n_frac + DO ipart = 1, ng * (npart_per_cell + n_frac) CALL create_particle(current) cell_frac_y = 0.5_num - random() - current%part_pos(1) = x0 + random() * dx + !current%part_pos(1) = x0 + random() * dx + !current%part_pos(2) = y(iy) - cell_frac_y * dy + current%part_pos(1) = x0 + random() * ng * dx current%part_pos(2) = y(iy) - cell_frac_y * dy ! Always use the triangle particle weighting for simplicity @@ -375,9 +473,10 @@ SUBROUTINE moving_window window_shift_fraction = window_shift_fraction + dt * window_v_x / dx window_shift_cells = FLOOR(window_shift_fraction) ! Allow for posibility of having jumped two cells at once - IF (window_shift_cells > 0) THEN + IF (window_shift_cells == ng) THEN window_shift_real = REAL(window_shift_cells, num) - window_offset = window_offset + window_shift_real * dx + !window_offset = window_offset + window_shift_real * dx + window_offset = window_offset + window_shift_real * ng * dx CALL shift_window(window_shift_cells) CALL setup_bc_lists CALL particle_bcs From 9381c45db0c66d7a7784f57466d201b0f2acf576 Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Fri, 8 May 2020 16:47:19 +0200 Subject: [PATCH 002/106] SendRecv for all field components in one go --- epoch2d/src/boundary.F90 | 184 +++++++++++++++++++- epoch2d/src/housekeeping/current_smooth.F90 | 7 +- 2 files changed, 182 insertions(+), 9 deletions(-) diff --git a/epoch2d/src/boundary.F90 b/epoch2d/src/boundary.F90 index d1e135e73..2eeb86c07 100644 --- a/epoch2d/src/boundary.F90 +++ b/epoch2d/src/boundary.F90 @@ -371,6 +371,175 @@ SUBROUTINE moving_window_field_bc(fieldx, fieldy, fieldz, & END SUBROUTINE moving_window_field_bc + SUBROUTINE all_comp_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & + ny_local) + INTEGER, INTENT(IN) :: ng, nx_local, ny_local + REAL(num), DIMENSION(1-ng:, 1-ng:), INTENT(INOUT) :: fieldx, fieldy, fieldz + REAL(num), DIMENSION(:), ALLOCATABLE :: left_x_temp, right_x_temp + REAL(num), DIMENSION(:), ALLOCATABLE :: temp_left_x, temp_right_x + REAL(num), DIMENSION(:), ALLOCATABLE :: bottom_temp_y, top_temp_y + REAL(num), DIMENSION(:), ALLOCATABLE :: temp_top_y, temp_bottom_y + INTEGER :: i, j, k, n, xlength, ylength + INTEGER, DIMENSION(MPI_STATUS_SIZE) :: status + + xlength = 3 * ng * (ny_local + 2 * ng) + 4 + ylength = 3 * (nx_local + 2 * ng) * ng + 4 + + ALLOCATE(left_x_temp(xlength)) + ALLOCATE(right_x_temp(xlength)) + ALLOCATE(temp_left_x(xlength)) + ALLOCATE(temp_right_x(xlength)) + + + ALLOCATE(bottom_temp_y(ylength)) + ALLOCATE(top_temp_y(ylength)) + ALLOCATE(temp_top_y(ylength)) + ALLOCATE(temp_bottom_y(ylength)) + + n = 0 + + DO j = 1-ng, ny_local + ng + DO i = 1, ng + left_x_temp(i + n + (j - 1 + ng) * 3 * ng) = fieldx(i,j) + left_x_temp(i + n + 1 + (j - 1 + ng) * 3 * ng) = fieldy(i,j) + left_x_temp(i + n + 2 + (j - 1 + ng) * 3 * ng) = fieldz(i,j) + n = n + 2 + END DO + n = 0 + END DO + + n = 0 + + DO j = 1-ng, ny_local + ng + DO i = 1, ng + k = nx_local - ng + i + right_x_temp(i + n + (j - 1 + ng) * 3 * ng) = fieldx(k,j) + right_x_temp(i + n + 1 + (j - 1 + ng) * 3 * ng) = fieldy(k,j) + right_x_temp(i + n + 2 + (j - 1 + ng) * 3 * ng) = fieldz(k,j) + n = n + 2 + END DO + n = 0 + END DO + + CALL MPI_SENDRECV(left_x_temp, xlength, MPI_REAL, proc_x_min, & + tag, temp_right_x, xlength, MPI_REAL, proc_x_max, tag, comm, status, & + errcode ) + CALL MPI_SENDRECV(right_x_temp, xlength, MPI_REAL, proc_x_max, & + tag, temp_left_x, xlength, MPI_REAL, proc_x_min, tag, comm, status, & + errcode) + + IF (.NOT. x_max_boundary .OR. bc_field(c_bd_x_max) == c_bc_periodic) THEN + n = 0 + DO j = 1-ng, ny_local + ng + DO i = nx_local + 1, nx_local + ng + fieldx(i, j) = temp_right_x(i - nx_local + n + (j - 1 + ng) * 3 * ng) + fieldy(i, j) = temp_right_x(i - nx_local + & + n + 1 + (j - 1 + ng) * 3 * ng) + fieldz(i, j) = temp_right_x(i - nx_local + & + n + 2 + (j - 1 + ng) * 3 * ng) + n = n + 2 + END DO + n = 0 + END DO + END IF + + IF (.NOT. x_min_boundary .OR. bc_field(c_bd_x_min) == c_bc_periodic) THEN + n = 0 + DO j = 1-ng, ny_local + ng + DO i = 1-ng, 0 + fieldx(i, j) = temp_left_x(i + ng + n + (j - 1 + ng) * 3 * ng) + fieldy(i, j) = temp_left_x(i + ng + n + 1 + (j - 1 + ng) * 3 * ng) + fieldz(i, j) = temp_left_x(i + ng + n + 2 + (j - 1 + ng) * 3 * ng) + n = n + 2 + END DO + n = 0 + END DO + END IF + + n = 0 + DO j = 1, ng + DO i = 1-ng, nx_local + ng + bottom_temp_y(i + n + (j - 1) * 3 * (nx_local + 2 * ng)) & + = fieldx(i,j) + bottom_temp_y(i + n + 1 + (j - 1) * 3 * (nx_local + 2 * ng)) & + = fieldy(i,j) + bottom_temp_y(i + n + 2 + (j - 1) * 3 * (nx_local + 2 * ng)) & + = fieldy(i,j) + n = n + 2 + END DO + n = 0 + END DO + + n = 0 + DO j = 1, ng + DO i = 1-ng, nx_local + ng + k = ny_local - ng + j + top_temp_y(i + n + (j - 1 + ng) * 3 * (nx_local + 2 * ng)) & + = fieldx(i,k) + top_temp_y(i + n + 1 + (j - 1 + ng) * 3 * (nx_local + 2 * ng)) & + = fieldy(i,k) + top_temp_y(i + n + 2 + (j - 1 + ng) * 3 * (nx_local + 2 * ng)) & + = fieldy(i,k) + n = n + 2 + END DO + n = 0 + END DO + + CALL MPI_SENDRECV(bottom_temp_y, ylength, MPI_REAL, proc_y_min, & + tag, temp_top_y, ylength, MPI_REAL, proc_y_max, tag, comm, status, & + errcode ) + + CALL MPI_SENDRECV(top_temp_y, ylength, MPI_REAL, proc_y_max, & + tag, temp_bottom_y, ylength, MPI_REAL, proc_y_min, tag, comm, status, & + errcode ) + + IF (.NOT. y_max_boundary .OR. bc_field(c_bd_y_max) == c_bc_periodic) THEN + n = 0 + DO j = ny_local+1, ny_local + ng + DO i = 1-ng, nx_local + ng + fieldx(i,j) = & + temp_top_y(i + ng + n + (j - ny_local -1) * 3 * (nx_local + 2 * ng)) + fieldy(i,j) = & + temp_top_y(i + ng + n + 1 + (j - ny_local -1) * 3 * (nx_local + 2 * ng)) + fieldz(i,j) = & + temp_top_y(i + ng + n + 2 + (j - ny_local -1) * 3 * (nx_local + 2 * ng)) + n = n + 2 + END DO + n = 0 + END DO + END IF + + IF (.NOT. y_min_boundary .OR. bc_field(c_bd_y_min) == c_bc_periodic) THEN + n = 0 + DO j = 1-ng, 0 + DO i = 1-ng, nx_local + ng + fieldx(i,j) = & + temp_bottom_y(i + ng + n + (j - 1 + ng) * 3 * (nx_local + 2 * ng)) + fieldy(i,j) = & + temp_bottom_y(i + ng + n + 1 + (j - 1 + ng) * 3 * (nx_local + 2 * ng)) + fieldz(i,j) = & + temp_bottom_y(i + ng + n + 2 + (j - 1 + ng) * 3 * (nx_local + 2 * ng)) + n = n + 2 + END DO + n = 0 + END DO + END IF + + DEALLOCATE(left_x_temp) + DEALLOCATE(right_x_temp) + DEALLOCATE(temp_left_x) + DEALLOCATE(temp_right_x) + + + DEALLOCATE(bottom_temp_y) + DEALLOCATE(top_temp_y) + DEALLOCATE(temp_top_y) + DEALLOCATE(temp_bottom_y) + + END SUBROUTINE all_comp_field_bc + + + SUBROUTINE do_field_mpi_with_lengths_r4(field, ng, nx_local, ny_local) INTEGER, INTENT(IN) :: ng @@ -865,9 +1034,10 @@ SUBROUTINE efield_bcs INTEGER :: i ! These are the MPI boundaries - CALL field_bc(ex, ng) - CALL field_bc(ey, ng) - CALL field_bc(ez, ng) + !CALL field_bc(ex, ng) + !CALL field_bc(ey, ng) + !CALL field_bc(ez, ng) + CALL all_comp_field_bc(ex, ey, ez, ng, nx, ny) ! Perfectly conducting boundaries DO i = c_bd_x_min, c_bd_x_max, c_bd_x_max - c_bd_x_min @@ -916,9 +1086,11 @@ SUBROUTINE bfield_bcs(mpi_only) INTEGER :: i ! These are the MPI boundaries - CALL field_bc(bx, ng) - CALL field_bc(by, ng) - CALL field_bc(bz, ng) + !CALL field_bc(bx, ng) + !CALL field_bc(by, ng) + !CALL field_bc(bz, ng) + CALL all_comp_field_bc(bx, by, bz, ng, nx, ny) + IF (mpi_only) RETURN diff --git a/epoch2d/src/housekeeping/current_smooth.F90 b/epoch2d/src/housekeeping/current_smooth.F90 index 3a8ea7cf4..66054bd54 100644 --- a/epoch2d/src/housekeeping/current_smooth.F90 +++ b/epoch2d/src/housekeeping/current_smooth.F90 @@ -30,9 +30,10 @@ SUBROUTINE current_finish CALL current_bcs - CALL field_bc(jx, jng) - CALL field_bc(jy, jng) - CALL field_bc(jz, jng) + !CALL field_bc(jx, jng) + !CALL field_bc(jy, jng) + !CALL field_bc(jz, jng) + CALL all_comp_field_bc(jx, jy, jz, jng, nx, ny) IF (smooth_currents) CALL smooth_current From e1cb65a99e72b12e4f9c4e00e6c871df8c6a455d Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Tue, 26 May 2020 22:33:06 +0200 Subject: [PATCH 003/106] modified all_comp_field_bc() --- epoch2d/src/boundary.F90 | 320 ++++++++++++++++++++++----------------- 1 file changed, 181 insertions(+), 139 deletions(-) diff --git a/epoch2d/src/boundary.F90 b/epoch2d/src/boundary.F90 index 2eeb86c07..9f6aaed90 100644 --- a/epoch2d/src/boundary.F90 +++ b/epoch2d/src/boundary.F90 @@ -373,168 +373,210 @@ END SUBROUTINE moving_window_field_bc SUBROUTINE all_comp_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & ny_local) - INTEGER, INTENT(IN) :: ng, nx_local, ny_local - REAL(num), DIMENSION(1-ng:, 1-ng:), INTENT(INOUT) :: fieldx, fieldy, fieldz - REAL(num), DIMENSION(:), ALLOCATABLE :: left_x_temp, right_x_temp - REAL(num), DIMENSION(:), ALLOCATABLE :: temp_left_x, temp_right_x - REAL(num), DIMENSION(:), ALLOCATABLE :: bottom_temp_y, top_temp_y - REAL(num), DIMENSION(:), ALLOCATABLE :: temp_top_y, temp_bottom_y - INTEGER :: i, j, k, n, xlength, ylength - INTEGER, DIMENSION(MPI_STATUS_SIZE) :: status - xlength = 3 * ng * (ny_local + 2 * ng) + 4 - ylength = 3 * (nx_local + 2 * ng) * ng + 4 - ALLOCATE(left_x_temp(xlength)) - ALLOCATE(right_x_temp(xlength)) - ALLOCATE(temp_left_x(xlength)) - ALLOCATE(temp_right_x(xlength)) + ! | field_top | + !____________|____________________|____________ + ! | | + ! field_left | | field_right + !____________|____________________|____________ + ! | | + ! | field_bottom | - - ALLOCATE(bottom_temp_y(ylength)) - ALLOCATE(top_temp_y(ylength)) - ALLOCATE(temp_top_y(ylength)) - ALLOCATE(temp_bottom_y(ylength)) + INTEGER, INTENT(IN) :: ng + REAL(num), DIMENSION(1-ng:,1-ng:), INTENT(INOUT) :: fieldx, fieldy, fieldz + INTEGER, INTENT(IN) :: nx_local, ny_local + INTEGER, DIMENSION(c_ndims) :: sizes, subsizes + INTEGER :: basetype, sz, szmax, i, j, k, n + REAL(num), ALLOCATABLE :: field_left(:), field_right(:) + REAL(num), ALLOCATABLE :: field_top(:), field_bottom(:) + REAL(num), ALLOCATABLE :: temp(:) - n = 0 + basetype = mpireal - DO j = 1-ng, ny_local + ng - DO i = 1, ng - left_x_temp(i + n + (j - 1 + ng) * 3 * ng) = fieldx(i,j) - left_x_temp(i + n + 1 + (j - 1 + ng) * 3 * ng) = fieldy(i,j) - left_x_temp(i + n + 2 + (j - 1 + ng) * 3 * ng) = fieldz(i,j) - n = n + 2 + sizes(1) = nx_local + 2 * ng + sizes(2) = ny_local + 2 * ng + starts = 1 + + szmax = 3 * sizes(1) * ng + sz = 3 * sizes(2) * ng + IF (sz > szmax) szmax = sz + + ALLOCATE(temp(szmax)) + + subsizes(1) = ng + subsizes(2) = sizes(2) + + sz = 3 * subsizes(1) * subsizes(2) + + + ALLOCATE(field_left(szmax)) + n = 1 + DO k = 1, 3 + DO j = 1-ng, subsizes(2)-ng + DO i = 1,ng + select case(k) + case(1) + field_left(n) = fieldx(i,j) + case(2) + field_left(n) = fieldy(i,j) + case(3) + field_left(n) = fieldz(i,j) + end select + n = n + 1 + END DO END DO - n = 0 END DO - n = 0 - - DO j = 1-ng, ny_local + ng - DO i = 1, ng - k = nx_local - ng + i - right_x_temp(i + n + (j - 1 + ng) * 3 * ng) = fieldx(k,j) - right_x_temp(i + n + 1 + (j - 1 + ng) * 3 * ng) = fieldy(k,j) - right_x_temp(i + n + 2 + (j - 1 + ng) * 3 * ng) = fieldz(k,j) - n = n + 2 + ALLOCATE(field_right(szmax)) + n = 1 + DO k = 1, 3 + DO j = 1-ng, subsizes(2)-ng + DO i = nx_local-ng+1, nx_local + select case(k) + case(1) + field_right(n) = fieldx(i,j) + case(2) + field_right(n) = fieldy(i,j) + case(3) + field_right(n) = fieldz(i,j) + end select + n = n + 1 + END DO END DO - n = 0 END DO - CALL MPI_SENDRECV(left_x_temp, xlength, MPI_REAL, proc_x_min, & - tag, temp_right_x, xlength, MPI_REAL, proc_x_max, tag, comm, status, & - errcode ) - CALL MPI_SENDRECV(right_x_temp, xlength, MPI_REAL, proc_x_max, & - tag, temp_left_x, xlength, MPI_REAL, proc_x_min, tag, comm, status, & - errcode) + CALL MPI_SENDRECV(field_left, sz, basetype, proc_x_min, & + tag, temp, sz, basetype, proc_x_max, tag, comm, status, errcode) - IF (.NOT. x_max_boundary .OR. bc_field(c_bd_x_max) == c_bc_periodic) THEN - n = 0 - DO j = 1-ng, ny_local + ng - DO i = nx_local + 1, nx_local + ng - fieldx(i, j) = temp_right_x(i - nx_local + n + (j - 1 + ng) * 3 * ng) - fieldy(i, j) = temp_right_x(i - nx_local + & - n + 1 + (j - 1 + ng) * 3 * ng) - fieldz(i, j) = temp_right_x(i - nx_local + & - n + 2 + (j - 1 + ng) * 3 * ng) - n = n + 2 - END DO - n = 0 - END DO - END IF - - IF (.NOT. x_min_boundary .OR. bc_field(c_bd_x_min) == c_bc_periodic) THEN - n = 0 - DO j = 1-ng, ny_local + ng - DO i = 1-ng, 0 - fieldx(i, j) = temp_left_x(i + ng + n + (j - 1 + ng) * 3 * ng) - fieldy(i, j) = temp_left_x(i + ng + n + 1 + (j - 1 + ng) * 3 * ng) - fieldz(i, j) = temp_left_x(i + ng + n + 2 + (j - 1 + ng) * 3 * ng) - n = n + 2 + IF (.NOT. x_max_boundary .OR. bc_field(c_bd_x_max)==c_bc_periodic) THEN + n = 1 + DO k = 1, 3 + DO j = 1-ng, subsizes(2)-ng + DO i = nx_local+1, subsizes(1)+nx_local + select case(k) + case(1) + fieldx(i,j) = temp(n) + case(2) + fieldy(i,j) = temp(n) + case(3) + fieldz(i,j) = temp(n) + end select + n = n + 1 + END DO END DO - n = 0 END DO END IF - - n = 0 - DO j = 1, ng - DO i = 1-ng, nx_local + ng - bottom_temp_y(i + n + (j - 1) * 3 * (nx_local + 2 * ng)) & - = fieldx(i,j) - bottom_temp_y(i + n + 1 + (j - 1) * 3 * (nx_local + 2 * ng)) & - = fieldy(i,j) - bottom_temp_y(i + n + 2 + (j - 1) * 3 * (nx_local + 2 * ng)) & - = fieldy(i,j) - n = n + 2 + + CALL MPI_SENDRECV(field_right, sz, basetype, proc_x_max, & + tag, temp, sz, basetype, proc_x_min, tag, comm, status, errcode) + + IF (.NOT. x_min_boundary .OR. bc_field(c_bd_x_min)==c_bc_periodic) THEN + n = 1 + DO k = 1, 3 + DO j = 1-ng, subsizes(2)-ng + DO i = 1-ng, subsizes(1)-ng + select case(k) + case(1) + fieldx(i,j) = temp(n) + case(2) + fieldy(i,j) = temp(n) + case(3) + fieldz(i,j) = temp(n) + end select + n = n + 1 + END DO + END DO + END DO + END IF + + subsizes(1) = sizes(1) + subsizes(2) = ng + sz = 3 * subsizes(1) * subsizes(2) + + ALLOCATE(field_top(szmax)) + n = 1 + DO k = 1, 3 + DO j = ny_local-subsizes(2)+1, ny_local + DO i = 1-ng, subsizes(1)-ng + select case(k) + case(1) + field_top(n) = fieldx(i,j) + case(2) + field_top(n) = fieldy(i,j) + case(3) + field_top(n) = fieldz(i,j) + end select + n = n + 1 + END DO END DO - n = 0 END DO - n = 0 - DO j = 1, ng - DO i = 1-ng, nx_local + ng - k = ny_local - ng + j - top_temp_y(i + n + (j - 1 + ng) * 3 * (nx_local + 2 * ng)) & - = fieldx(i,k) - top_temp_y(i + n + 1 + (j - 1 + ng) * 3 * (nx_local + 2 * ng)) & - = fieldy(i,k) - top_temp_y(i + n + 2 + (j - 1 + ng) * 3 * (nx_local + 2 * ng)) & - = fieldy(i,k) - n = n + 2 + ALLOCATE(field_bottom(szmax)) + n = 1 + DO k = 1, 3 + DO j = 1, subsizes(2) + DO i = 1-ng, subsizes(1)-ng + select case(k) + case(1) + field_bottom(n) = fieldx(i,j) + case(2) + field_bottom(n) = fieldy(i,j) + case(3) + field_bottom(n) = fieldz(i,j) + end select + n = n + 1 END DO - n = 0 END DO - CALL MPI_SENDRECV(bottom_temp_y, ylength, MPI_REAL, proc_y_min, & - tag, temp_top_y, ylength, MPI_REAL, proc_y_max, tag, comm, status, & - errcode ) + CALL MPI_SENDRECV(field_bottom, sz, basetype, proc_y_min, & + tag, temp, sz, basetype, proc_y_max, tag, comm, status, errcode) - CALL MPI_SENDRECV(top_temp_y, ylength, MPI_REAL, proc_y_max, & - tag, temp_bottom_y, ylength, MPI_REAL, proc_y_min, tag, comm, status, & - errcode ) + IF (.NOT. y_max_boundary .OR. bc_field(c_bd_y_max)==c_bc_periodic) THEN + n = 1 + DO k = 1, 3 + DO j = ny_local+1, subsizes(2)+ny_local + DO i = 1-ng, subsizes(1)-ng + select case(k) + case(1) + fieldx(i,j) = temp(n) + case(2) + fieldy(i,j) = temp(n) + case(3) + fieldz(i,j) = temp(n) + end select + n = n + 1 + END DO + END DO + END DO + END IF - IF (.NOT. y_max_boundary .OR. bc_field(c_bd_y_max) == c_bc_periodic) THEN - n = 0 - DO j = ny_local+1, ny_local + ng - DO i = 1-ng, nx_local + ng - fieldx(i,j) = & - temp_top_y(i + ng + n + (j - ny_local -1) * 3 * (nx_local + 2 * ng)) - fieldy(i,j) = & - temp_top_y(i + ng + n + 1 + (j - ny_local -1) * 3 * (nx_local + 2 * ng)) - fieldz(i,j) = & - temp_top_y(i + ng + n + 2 + (j - ny_local -1) * 3 * (nx_local + 2 * ng)) - n = n + 2 - END DO - n = 0 - END DO - END IF - - IF (.NOT. y_min_boundary .OR. bc_field(c_bd_y_min) == c_bc_periodic) THEN - n = 0 - DO j = 1-ng, 0 - DO i = 1-ng, nx_local + ng - fieldx(i,j) = & - temp_bottom_y(i + ng + n + (j - 1 + ng) * 3 * (nx_local + 2 * ng)) - fieldy(i,j) = & - temp_bottom_y(i + ng + n + 1 + (j - 1 + ng) * 3 * (nx_local + 2 * ng)) - fieldz(i,j) = & - temp_bottom_y(i + ng + n + 2 + (j - 1 + ng) * 3 * (nx_local + 2 * ng)) - n = n + 2 - END DO - n = 0 - END DO - END IF - - DEALLOCATE(left_x_temp) - DEALLOCATE(right_x_temp) - DEALLOCATE(temp_left_x) - DEALLOCATE(temp_right_x) - - - DEALLOCATE(bottom_temp_y) - DEALLOCATE(top_temp_y) - DEALLOCATE(temp_top_y) - DEALLOCATE(temp_bottom_y) + CALL MPI_SENDRECV(field_top, sz, basetype, proc_y_max, & + tag, temp, sz, basetype, proc_y_min, tag, comm, status, errcode) + + IF (.NOT. y_min_boundary .OR. bc_field(c_bd_y_min)==c_bc_periodic) THEN + n = 1 + DO k = 1, 3 + DO j = 1-ng, subsizes(2)-ng + DO i = 1-ng, subsizes(1)-ng + select case(k) + case(1) + fieldx(i,j) = temp(n) + case(2) + fieldy(i,j) = temp(n) + case(3) + fieldz(i,j) = temp(n) + end select + n = n + 1 + END DO + END DO + END IF + + DEALLOCATE(field_left) + DEALLOCATE(field_right) + DEALLOCATE(field_top) + DEALLOCATE(field_bottom) + DEALLOCATE(temp) END SUBROUTINE all_comp_field_bc From 82bfc4c17f0731ace80d2efcf01607dbe9c04c4a Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Thu, 28 May 2020 15:12:25 +0200 Subject: [PATCH 004/106] reorganized moving window --- epoch2d/src/boundary.F90 | 57 ------ epoch2d/src/housekeeping/window.F90 | 262 ++++++++++++++-------------- 2 files changed, 129 insertions(+), 190 deletions(-) diff --git a/epoch2d/src/boundary.F90 b/epoch2d/src/boundary.F90 index 9f6aaed90..fb0cf4942 100644 --- a/epoch2d/src/boundary.F90 +++ b/epoch2d/src/boundary.F90 @@ -314,63 +314,6 @@ SUBROUTINE do_field_mpi_with_lengths(field, ng, nx_local, ny_local) END SUBROUTINE do_field_mpi_with_lengths - SUBROUTINE moving_window_field_bc(fieldx, fieldy, fieldz, & - ng, nx_local, ny_local) - - INTEGER, INTENT(IN) :: ng, nx_local, ny_local - REAL(num), DIMENSION(1-ng:, 1-ng:), INTENT(INOUT) :: fieldx, fieldy, fieldz - !INTEGER :: sendrequest1, recvrequest1 - INTEGER :: i, j, n, xlength - INTEGER, DIMENSION(MPI_STATUS_SIZE) :: status - REAL(num), DIMENSION(:), ALLOCATABLE :: x_temp, tempx - - - xlength = 3 * ng * (ny_local + 2 * ng) + 4 - - ALLOCATE(x_temp(xlength)) - ALLOCATE(tempx(xlength)) - - n = 0 - - DO j = 1-ng, ny_local + ng - DO i = 1, ng - x_temp(i + n + (j - 1 + ng) * 3 * ng) = fieldx(i,j) - x_temp(i + n + 1 + (j - 1 + ng) * 3 * ng) = fieldy(i,j) - x_temp(i + n + 2 + (j - 1 + ng) * 3 * ng) = fieldz(i,j) - n = n + 2 - END DO - n = 0 - END DO - - CALL MPI_SENDRECV(x_temp, xlength, MPI_REAL, proc_x_min, & - tag, tempx, xlength, MPI_REAL, proc_x_max, tag, comm, status, errcode) - - !CALL MPI_ISEND(tempx, xlength, MPI_REAL, proc_x_min, tag, comm, & - ! sendrequest1, errcode) - !CALL MPI_IRECV(x_temp, xlength, MPI_REAL, proc_x_max, tag, comm, & - ! recvrequest1, errcode) - - !CALL MPI_WAIT(sendrequest1, status, errcode) - !CALL MPI_WAIT(recvrequest1, status, errcode) - - IF (.NOT. x_max_boundary .OR. bc_field(c_bd_x_max) == c_bc_periodic) THEN - n = 0 - DO j = 1-ng, ny_local + ng - DO i = nx_local + 1, nx_local + ng - fieldx(i, j) = tempx(i - nx_local + n + (j - 1 + ng) * 3 * ng) - fieldy(i, j) = tempx(i - nx_local + n + 1 + (j - 1 + ng) * 3 * ng) - fieldz(i, j) = tempx(i - nx_local + n + 2 + (j - 1 + ng) * 3 * ng) - n = n + 2 - END DO - n = 0 - END DO - END IF - - DEALLOCATE(tempx) - DEALLOCATE(x_temp) - - END SUBROUTINE moving_window_field_bc - SUBROUTINE all_comp_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & ny_local) diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index 0aded22f3..dc60cbe20 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -65,21 +65,14 @@ SUBROUTINE shift_window(window_shift_cells) INTEGER :: iwindow, ix REAL(num) :: xb_min - ! Shift the window round one cell at a time. - ! Inefficient, but it works - !DO iwindow = 1, window_shift_cells - CALL insert_particles + ! Shift the window round # of window shift cells at a time. + CALL insert_particles(window_shift_cells) - ! Shift the box around - ! x_grid_min = x_global(1) + dx - ! xb_min = xb_global(1) + dx - ! x_min = xb_min + dx * cpml_thickness + x_grid_min = x_global(1) + window_shift_cells * dx + xb_min = xb_global(1) + window_shift_cells * dx + x_min = xb_min + window_shift_cells * dx * cpml_thickness - x_grid_min = x_global(1) + ng * dx - xb_min = xb_global(1) + ng * dx - x_min = xb_min + ng * dx * cpml_thickness - - ! Setup global grid + ! Setup global grid DO ix = 1-ng, nx_global + ng x_global(ix) = x_grid_min + (ix - 1) * dx xb_global(ix) = xb_min + (ix - 1) * dx @@ -92,111 +85,63 @@ SUBROUTINE shift_window(window_shift_cells) CALL remove_particles ! Shift fields around - !CALL shift_fields - CALL moving_window_shift_fields - !END DO + CALL shift_fields(window_shift_cells) END SUBROUTINE shift_window - SUBROUTINE moving_window_shift_fields - INTEGER :: j, xlength, jxlength - !REAL(num), DIMENSION(:), ALLOCATABLE :: tempex, tempbx, tempjx + SUBROUTINE shift_fields(window_shift_cells) - xlength = 3 * ng * (ny + 2 * ng) - jxlength = 3 * jng * (ny + 2 * jng) + INTEGER :: j + INTEGER, INTENT(IN) :: window_shift_cells - !ALLOCATE(tempex(xlength)) - !ALLOCATE(tempbx(xlength)) - !ALLOCATE(tempjx(jxlength)) + CALL shift_field(ex, ng, window_shift_cells) + CALL shift_field(ey, ng, window_shift_cells) + CALL shift_field(ez, ng, window_shift_cells) - CALL moving_window_shift_field(ex, ey, ez, ng) CALL moving_window_field_bc(ex, ey, ez, ng, nx, ny) - CALL moving_window_shift_field(bx, by, bz, ng) - CALL moving_window_field_bc(bx, by, bz, ng, nx, ny) - CALL moving_window_shift_field(jx, jy, jz, jng) - CALL moving_window_field_bc(jx, jy, jz, jng, nx, ny) - - IF (cpml_boundaries) THEN - CALL shift_field(cpml_psi_eyx, ng) - CALL shift_field(cpml_psi_ezx, ng) - CALL shift_field(cpml_psi_byx, ng) - CALL shift_field(cpml_psi_bzx, ng) - - CALL shift_field(cpml_psi_exy, ng) - CALL shift_field(cpml_psi_ezy, ng) - CALL shift_field(cpml_psi_bxy, ng) - CALL shift_field(cpml_psi_bzy, ng) - END IF - - IF (x_max_boundary) THEN - DO j = 1-ng, ny+ng - ! Fix incoming field cell. - ex(nx,j) = ex_x_max(j) - ex(nx+1,j) = ex_x_max(j) - ey(nx+1,j) = ey_x_max(j) - ez(nx+1,j) = ez_x_max(j) - ex(nx-1,j) = 0.5_num * (ex(nx-2,j) + ex(nx,j)) - ey(nx,j) = 0.5_num * (ey(nx-1,j) + ey(nx+1,j)) - ez(nx,j) = 0.5_num * (ez(nx-1,j) + ez(nx+1,j)) - bx(nx+1,j) = bx_x_max(j) - by(nx,j) = by_x_max(j) - bz(nx,j) = bz_x_max(j) - bx(nx,j) = 0.5_num * (bx(nx-1,j) + bx(nx+1,j)) - by(nx-1,j) = 0.5_num * (by(nx-2,j) + by(nx,j)) - bz(nx-1,j) = 0.5_num * (bz(nx-2,j) + bz(nx,j)) - END DO - - IF (cpml_boundaries) THEN - DO j = 1-ng, ny+ng - cpml_psi_eyx(nx:nx+1,j) = cpml_psi_eyx(nx,j) - cpml_psi_ezx(nx:nx+1,j) = cpml_psi_ezx(nx,j) - cpml_psi_byx(nx:nx+1,j) = cpml_psi_byx(nx,j) - cpml_psi_bzx(nx:nx+1,j) = cpml_psi_bzx(nx,j) - - cpml_psi_exy(nx:nx+1,j) = cpml_psi_exy(nx,j) - cpml_psi_ezy(nx:nx+1,j) = cpml_psi_ezy(nx,j) - cpml_psi_bxy(nx:nx+1,j) = cpml_psi_bxy(nx,j) - cpml_psi_bzy(nx:nx+1,j) = cpml_psi_bzy(nx,j) - END DO - - END IF - END IF - - !DEALLOCATE(tempex) - !DEALLOCATE(tempbx) - !DEALLOCATE(tempjx) - END SUBROUTINE moving_window_shift_fields + CALL shift_field(bx, ng, window_shift_cells) + CALL shift_field(by, ng, window_shift_cells) + CALL shift_field(bz, ng, window_shift_cells) + CALL moving_window_field_bc(bx, by, bz, ng ,nx, ny) + CALL shift_field(jx, jng, window_shift_cells) + CALL shift_field(jy, jng, window_shift_cells) + CALL shift_field(jz, jng, window_shift_cells) - SUBROUTINE shift_fields - - INTEGER :: j - - CALL shift_field(ex, ng) - CALL shift_field(ey, ng) - CALL shift_field(ez, ng) - - CALL shift_field(bx, ng) - CALL shift_field(by, ng) - CALL shift_field(bz, ng) - - CALL shift_field(jx, jng) - CALL shift_field(jy, jng) - CALL shift_field(jz, jng) + CALL moving_window_field_bc(jx, jy, jz, ng, nx, ny) IF (cpml_boundaries) THEN - CALL shift_field(cpml_psi_eyx, ng) - CALL shift_field(cpml_psi_ezx, ng) - CALL shift_field(cpml_psi_byx, ng) - CALL shift_field(cpml_psi_bzx, ng) - - CALL shift_field(cpml_psi_exy, ng) - CALL shift_field(cpml_psi_ezy, ng) - CALL shift_field(cpml_psi_bxy, ng) + CALL shift_field(cpml_psi_eyx, ng, & + window_shift_cells) + CALL shift_field(cpml_psi_ezx, ng, & + window_shift_cells) + CALL shift_field(cpml_psi_byx, ng, & + window_shift_cells) + + CALL moving_window_field_bc(cpml_psi_eyx, & + cpml_psi_ezx, cpml_psi_byx, & + ng, nx, ny) + + CALL shift_field(cpml_psi_bzx, ng, & + window_shift_cells) + + CALL field_bc(cpml_psi_bzx, ng) + + CALL shift_field(cpml_psi_exy, ng, & + window_shift_cells) + CALL shift_field(cpml_psi_ezy, ng, & + window_shift_cells) + CALL shift_field(cpml_psi_bxy, ng, & + window_shift_cells) + CALL moving_window_field_bc(cpml_psi_exy, & + cpml_psi_ezy, cpml_psi_ezy, & + ng, nx, ny) + CALL shift_field(cpml_psi_bzy, ng) + CALL field_bc(cpml_psi_bzy, ng) END IF IF (x_max_boundary) THEN @@ -235,52 +180,106 @@ SUBROUTINE shift_fields END SUBROUTINE shift_fields - SUBROUTINE shift_field(field, ng) + SUBROUTINE shift_field(field, ng, window_shift_cells) - INTEGER, INTENT(IN) :: ng + INTEGER, INTENT(IN) :: ng, window_shift_cells REAL(num), DIMENSION(1-ng:,1-ng:), INTENT(INOUT) :: field INTEGER :: i, j - ! Shift field to the left by one cell - ! Begin changes by U. Sinha - ! Shift field to the left by ng cells + ! Shift field to the left by window_shift_cells DO j = 1-ng, ny+ng - !DO i = 1-ng, nx+ng-1 - DO i = 1-ng, nx - !field(i,j) = field(i+1,j) - field(i,j) = field(i+ng, j) + DO i = 1-ng, nx+ng-window_shift_cells + field(i,j) = field(i+window_shift_cells, j) END DO END DO - ! End changes by U. Sinha - CALL field_bc(field, ng) + !CALL field_bc(field, ng) END SUBROUTINE shift_field - SUBROUTINE moving_window_shift_field(fieldx, fieldy, fieldz, ng) - INTEGER :: i, j - INTEGER, INTENT(IN) :: ng - REAL(num), DIMENSION(1-ng:, 1-ng:), INTENT(INOUT) :: fieldx, fieldy, fieldz + SUBROUTINE moving_window_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & + ny_local) - DO j = 1- ng, ny + ng - DO i = 1 - ng, nx - fieldx(i, j) = fieldx(i + ng, j) - fieldy(i, j) = fieldy(i + ng, j) - fieldz(i, j) = fieldz(i + ng, j) - END DO + + INTEGER, INTENT(IN) :: ng + REAL(num), DIMENSION(1-ng:,1-ng:), INTENT(INOUT) :: fieldx, fieldy, fieldz + INTEGER, INTENT(IN) :: nx_local, ny_local + INTEGER, DIMENSION(c_ndims) :: sizes, subsizes + INTEGER :: basetype, sz, szmax, i, j, k, n + REAL(num), ALLOCATABLE :: field_left(:) + REAL(num), ALLOCATABLE :: temp(:) + + basetype = mpireal + + sizes(1) = nx_local + 2 * ng + sizes(2) = ny_local + 2 * ng + starts = 1 + + szmax = 3 * sizes(1) * ng + sz = 3 * sizes(2) * ng + IF (sz > szmax) szmax = sz + + ALLOCATE(temp(szmax)) + + subsizes(1) = ng + subsizes(2) = sizes(2) + + sz = 3 * subsizes(1) * subsizes(2) + + + ALLOCATE(field_left(szmax)) + n = 1 + DO k = 1, 3 + DO j = 1-ng, subsizes(2)-ng + DO i = 1,ng + select case(k) + case(1) + field_left(n) = fieldx(i,j) + case(2) + field_left(n) = fieldy(i,j) + case(3) + field_left(n) = fieldz(i,j) + end select + n = n + 1 + END DO + END DO END DO - END SUBROUTINE moving_window_shift_field + CALL MPI_SENDRECV(field_left, sz, basetype, proc_x_min, & + tag, temp, sz, basetype, proc_x_max, tag, comm, status, errcode) + + IF (.NOT. x_max_boundary .OR. bc_field(c_bd_x_max)==c_bc_periodic) THEN + n = 1 + DO k = 1, 3 + DO j = 1-ng, subsizes(2)-ng + DO i = nx_local+1, subsizes(1)+nx_local + select case(k) + case(1) + fieldx(i,j) = temp(n) + case(2) + fieldy(i,j) = temp(n) + case(3) + fieldz(i,j) = temp(n) + end select + n = n + 1 + END DO + END DO + END DO + END IF + DEALLOCATE(field_left) + DEALLOCATE(temp) + END SUBROUTINE moving_window_field_bc - SUBROUTINE insert_particles + SUBROUTINE insert_particles(window_shift_cells) TYPE(particle), POINTER :: current TYPE(particle_list) :: append_list INTEGER :: ispecies, i, iy, isuby, errcode INTEGER(i8) :: ipart, npart_per_cell, n_frac + INTEGER, INTENT(IN) :: window_shift_cells REAL(num) :: cell_frac_y, cy2 REAL(num), DIMENSION(-1:1) :: gy REAL(num), DIMENSION(c_ndirs) :: temp_local, drift_local @@ -346,12 +345,10 @@ SUBROUTINE insert_particles wdata = dx * dy / (npart_per_cell + n_frac) - DO ipart = 1, ng * (npart_per_cell + n_frac) + DO ipart = 1, window_shift_cells * (npart_per_cell + n_frac) CALL create_particle(current) cell_frac_y = 0.5_num - random() - !current%part_pos(1) = x0 + random() * dx - !current%part_pos(2) = y(iy) - cell_frac_y * dy - current%part_pos(1) = x0 + random() * ng * dx + current%part_pos(1) = x0 + random() * window_shift_cells * dx current%part_pos(2) = y(iy) - cell_frac_y * dy ! Always use the triangle particle weighting for simplicity @@ -473,10 +470,9 @@ SUBROUTINE moving_window window_shift_fraction = window_shift_fraction + dt * window_v_x / dx window_shift_cells = FLOOR(window_shift_fraction) ! Allow for posibility of having jumped two cells at once - IF (window_shift_cells == ng) THEN + IF (window_shift_cells > 0) THEN window_shift_real = REAL(window_shift_cells, num) - !window_offset = window_offset + window_shift_real * dx - window_offset = window_offset + window_shift_real * ng * dx + window_offset = window_offset + window_shift_real * dx CALL shift_window(window_shift_cells) CALL setup_bc_lists CALL particle_bcs From 49f733b5f3bc92f4a23c0b690eba39b279575ecc Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Fri, 5 Jun 2020 10:41:07 +0200 Subject: [PATCH 005/106] fixed bugs in boundary.F90 and window.F90 --- epoch2d/src/boundary.F90 | 3 ++- epoch2d/src/housekeeping/window.F90 | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/epoch2d/src/boundary.F90 b/epoch2d/src/boundary.F90 index fb0cf4942..38c95297c 100644 --- a/epoch2d/src/boundary.F90 +++ b/epoch2d/src/boundary.F90 @@ -339,7 +339,6 @@ SUBROUTINE all_comp_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & sizes(1) = nx_local + 2 * ng sizes(2) = ny_local + 2 * ng - starts = 1 szmax = 3 * sizes(1) * ng sz = 3 * sizes(2) * ng @@ -471,6 +470,7 @@ SUBROUTINE all_comp_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & n = n + 1 END DO END DO + END DO CALL MPI_SENDRECV(field_bottom, sz, basetype, proc_y_min, & tag, temp, sz, basetype, proc_y_max, tag, comm, status, errcode) @@ -513,6 +513,7 @@ SUBROUTINE all_comp_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & n = n + 1 END DO END DO + END DO END IF DEALLOCATE(field_left) diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index dc60cbe20..c6a093644 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -140,7 +140,8 @@ SUBROUTINE shift_fields(window_shift_cells) cpml_psi_ezy, cpml_psi_ezy, & ng, nx, ny) - CALL shift_field(cpml_psi_bzy, ng) + CALL shift_field(cpml_psi_bzy, ng, & + window_shift_cells) CALL field_bc(cpml_psi_bzy, ng) END IF @@ -213,7 +214,6 @@ SUBROUTINE moving_window_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & sizes(1) = nx_local + 2 * ng sizes(2) = ny_local + 2 * ng - starts = 1 szmax = 3 * sizes(1) * ng sz = 3 * sizes(2) * ng From ffe035766a4581cbbd4a16e40e21a899245b4949 Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Sun, 7 Jun 2020 19:28:02 +0200 Subject: [PATCH 006/106] re-introduced window_shift_cell > ng - 1 --- epoch2d/src/housekeeping/window.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index dc60cbe20..e4a78953c 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -470,7 +470,7 @@ SUBROUTINE moving_window window_shift_fraction = window_shift_fraction + dt * window_v_x / dx window_shift_cells = FLOOR(window_shift_fraction) ! Allow for posibility of having jumped two cells at once - IF (window_shift_cells > 0) THEN + IF (window_shift_cells > ng - 1) THEN window_shift_real = REAL(window_shift_cells, num) window_offset = window_offset + window_shift_real * dx CALL shift_window(window_shift_cells) From 7bd3ce6de9adc126da7c4ce083c8c87b793c4d53 Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Tue, 14 Jul 2020 12:33:13 +0200 Subject: [PATCH 007/106] testing for window_shift_cells > 0 --- epoch2d/src/housekeeping/window.F90 | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index 00c523542..417ec516d 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -99,19 +99,19 @@ SUBROUTINE shift_fields(window_shift_cells) CALL shift_field(ey, ng, window_shift_cells) CALL shift_field(ez, ng, window_shift_cells) - CALL moving_window_field_bc(ex, ey, ez, ng, nx, ny) + CALL all_comp_field_bc(ex, ey, ez, ng, nx, ny) CALL shift_field(bx, ng, window_shift_cells) CALL shift_field(by, ng, window_shift_cells) CALL shift_field(bz, ng, window_shift_cells) - CALL moving_window_field_bc(bx, by, bz, ng ,nx, ny) + CALL all_comp_field_bc(bx, by, bz, ng ,nx, ny) CALL shift_field(jx, jng, window_shift_cells) CALL shift_field(jy, jng, window_shift_cells) CALL shift_field(jz, jng, window_shift_cells) - CALL moving_window_field_bc(jx, jy, jz, ng, nx, ny) + CALL all_comp_field_bc(jx, jy, jz, ng, nx, ny) IF (cpml_boundaries) THEN CALL shift_field(cpml_psi_eyx, ng, & @@ -470,7 +470,7 @@ SUBROUTINE moving_window window_shift_fraction = window_shift_fraction + dt * window_v_x / dx window_shift_cells = FLOOR(window_shift_fraction) ! Allow for posibility of having jumped two cells at once - IF (window_shift_cells > ng - 1) THEN + IF (window_shift_cells > 0) THEN window_shift_real = REAL(window_shift_cells, num) window_offset = window_offset + window_shift_real * dx CALL shift_window(window_shift_cells) From 5ecae0e3cdd8acb676d0c9a3965ac5074e640b06 Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Tue, 14 Jul 2020 13:23:19 +0200 Subject: [PATCH 008/106] fixed bug in moving_window_field_bc --- epoch2d/src/housekeeping/window.F90 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index 417ec516d..c6a093644 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -99,19 +99,19 @@ SUBROUTINE shift_fields(window_shift_cells) CALL shift_field(ey, ng, window_shift_cells) CALL shift_field(ez, ng, window_shift_cells) - CALL all_comp_field_bc(ex, ey, ez, ng, nx, ny) + CALL moving_window_field_bc(ex, ey, ez, ng, nx, ny) CALL shift_field(bx, ng, window_shift_cells) CALL shift_field(by, ng, window_shift_cells) CALL shift_field(bz, ng, window_shift_cells) - CALL all_comp_field_bc(bx, by, bz, ng ,nx, ny) + CALL moving_window_field_bc(bx, by, bz, ng ,nx, ny) CALL shift_field(jx, jng, window_shift_cells) CALL shift_field(jy, jng, window_shift_cells) CALL shift_field(jz, jng, window_shift_cells) - CALL all_comp_field_bc(jx, jy, jz, ng, nx, ny) + CALL moving_window_field_bc(jx, jy, jz, ng, nx, ny) IF (cpml_boundaries) THEN CALL shift_field(cpml_psi_eyx, ng, & From 5db10a0876d52a216d1e702fbf1c7fccd6aea35e Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Tue, 14 Jul 2020 13:33:27 +0200 Subject: [PATCH 009/106] back to window_shift_cells > ng - 1 --- epoch2d/src/housekeeping/window.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index c6a093644..00c523542 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -470,7 +470,7 @@ SUBROUTINE moving_window window_shift_fraction = window_shift_fraction + dt * window_v_x / dx window_shift_cells = FLOOR(window_shift_fraction) ! Allow for posibility of having jumped two cells at once - IF (window_shift_cells > 0) THEN + IF (window_shift_cells > ng - 1) THEN window_shift_real = REAL(window_shift_cells, num) window_offset = window_offset + window_shift_real * dx CALL shift_window(window_shift_cells) From 916688b75c1495bcf58679c17aab712cbf299b78 Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Tue, 14 Jul 2020 16:23:45 +0200 Subject: [PATCH 010/106] window_shift_cells > 1 --- epoch2d/src/housekeeping/window.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index 00c523542..e453c622b 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -470,7 +470,7 @@ SUBROUTINE moving_window window_shift_fraction = window_shift_fraction + dt * window_v_x / dx window_shift_cells = FLOOR(window_shift_fraction) ! Allow for posibility of having jumped two cells at once - IF (window_shift_cells > ng - 1) THEN + IF (window_shift_cells > 1) THEN window_shift_real = REAL(window_shift_cells, num) window_offset = window_offset + window_shift_real * dx CALL shift_window(window_shift_cells) From 984b53259fb056fa4e6eea328414c3648b4263c1 Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Wed, 15 Jul 2020 16:39:12 +0200 Subject: [PATCH 011/106] introduced do loop for insert particles in window.f90 --- epoch2d/src/housekeeping/window.F90 | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index e453c622b..bbc10120b 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -66,11 +66,12 @@ SUBROUTINE shift_window(window_shift_cells) REAL(num) :: xb_min ! Shift the window round # of window shift cells at a time. + DO iwindow = 1, window_shift_cells CALL insert_particles(window_shift_cells) - x_grid_min = x_global(1) + window_shift_cells * dx - xb_min = xb_global(1) + window_shift_cells * dx - x_min = xb_min + window_shift_cells * dx * cpml_thickness + x_grid_min = x_global(1) + dx + xb_min = xb_global(1) + dx + x_min = xb_min + dx * cpml_thickness ! Setup global grid DO ix = 1-ng, nx_global + ng @@ -79,6 +80,7 @@ SUBROUTINE shift_window(window_shift_cells) END DO x_grid_max = x_global(nx_global) x_max = xb_global(nx_global+1) - dx * cpml_thickness + END DO CALL setup_grid_x @@ -345,10 +347,10 @@ SUBROUTINE insert_particles(window_shift_cells) wdata = dx * dy / (npart_per_cell + n_frac) - DO ipart = 1, window_shift_cells * (npart_per_cell + n_frac) + DO ipart = 1, npart_per_cell + n_frac CALL create_particle(current) cell_frac_y = 0.5_num - random() - current%part_pos(1) = x0 + random() * window_shift_cells * dx + current%part_pos(1) = x0 + random() * dx current%part_pos(2) = y(iy) - cell_frac_y * dy ! Always use the triangle particle weighting for simplicity From 366b385003538c0ae6b0eef9c0cd195003ca2035 Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Wed, 15 Jul 2020 16:51:07 +0200 Subject: [PATCH 012/106] back to window_shift_cells > ng - 1 --- epoch2d/src/housekeeping/window.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index bbc10120b..deb39e85c 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -472,7 +472,7 @@ SUBROUTINE moving_window window_shift_fraction = window_shift_fraction + dt * window_v_x / dx window_shift_cells = FLOOR(window_shift_fraction) ! Allow for posibility of having jumped two cells at once - IF (window_shift_cells > 1) THEN + IF (window_shift_cells > ng - 1) THEN window_shift_real = REAL(window_shift_cells, num) window_offset = window_offset + window_shift_real * dx CALL shift_window(window_shift_cells) From 462ccb992f4dede2a187a56d160c4e4ada912453 Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Mon, 20 Jul 2020 11:25:49 +0200 Subject: [PATCH 013/106] Shifting window in chuncks of size ng --- epoch2d/src/housekeeping/window.F90 | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index deb39e85c..adcfd2b99 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -445,8 +445,9 @@ END SUBROUTINE remove_particles SUBROUTINE moving_window #ifndef PER_SPECIES_WEIGHT - REAL(num) :: window_shift_real + REAL(num) :: window_shift_real, window_shift_steps INTEGER :: window_shift_cells, errcode = 0 + INTEGER :: i, nchunks, nremainder #endif IF (.NOT. move_window) RETURN @@ -475,7 +476,15 @@ SUBROUTINE moving_window IF (window_shift_cells > ng - 1) THEN window_shift_real = REAL(window_shift_cells, num) window_offset = window_offset + window_shift_real * dx - CALL shift_window(window_shift_cells) + window_shift_steps = window_shift_cells / ng + nchunks = FLOOR(window_shift_steps) + nremainder = MOD(window_shift_cells, ng) + DO i = 1, nchunks + CALL shift_window(ng) + END DO + IF(remainder > 0) THEN + CALL shift_window(nremainder) + END IF CALL setup_bc_lists CALL particle_bcs window_shift_fraction = window_shift_fraction - window_shift_real From 6b327a6d6742ac71b14eb1c1b683e3a91f961c8b Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Mon, 20 Jul 2020 11:39:55 +0200 Subject: [PATCH 014/106] minor bug fix --- epoch2d/src/housekeeping/window.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index adcfd2b99..de9b19728 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -482,7 +482,7 @@ SUBROUTINE moving_window DO i = 1, nchunks CALL shift_window(ng) END DO - IF(remainder > 0) THEN + IF(nremainder > 0) THEN CALL shift_window(nremainder) END IF CALL setup_bc_lists From 5434759f23447faf8572286ead9a7e106042c17e Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Mon, 20 Jul 2020 14:37:03 +0200 Subject: [PATCH 015/106] Improved copy-in/out of MPI buffers --- epoch2d/src/boundary.F90 | 284 ++++++++++++++-------------- epoch2d/src/housekeeping/window.F90 | 70 ++++--- 2 files changed, 170 insertions(+), 184 deletions(-) diff --git a/epoch2d/src/boundary.F90 b/epoch2d/src/boundary.F90 index 38c95297c..19bf26dbe 100644 --- a/epoch2d/src/boundary.F90 +++ b/epoch2d/src/boundary.F90 @@ -331,9 +331,9 @@ SUBROUTINE all_comp_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & INTEGER, INTENT(IN) :: nx_local, ny_local INTEGER, DIMENSION(c_ndims) :: sizes, subsizes INTEGER :: basetype, sz, szmax, i, j, k, n - REAL(num), ALLOCATABLE :: field_left(:), field_right(:) - REAL(num), ALLOCATABLE :: field_top(:), field_bottom(:) + REAL(num), ALLOCATABLE :: field(:) REAL(num), ALLOCATABLE :: temp(:) + INTEGER :: xmin, xmax, ymin, ymax, offset0, offset1, offset2 basetype = mpireal @@ -345,186 +345,178 @@ SUBROUTINE all_comp_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & IF (sz > szmax) szmax = sz ALLOCATE(temp(szmax)) + ALLOCATE(field(szmax)) subsizes(1) = ng subsizes(2) = sizes(2) sz = 3 * subsizes(1) * subsizes(2) - - ALLOCATE(field_left(szmax)) - n = 1 - DO k = 1, 3 - DO j = 1-ng, subsizes(2)-ng - DO i = 1,ng - select case(k) - case(1) - field_left(n) = fieldx(i,j) - case(2) - field_left(n) = fieldy(i,j) - case(3) - field_left(n) = fieldz(i,j) - end select - n = n + 1 - END DO - END DO - END DO + offset0 = 0 + offset1 = subsizes(1) * subsizes(2) + offset2 = 2 * offset1 - ALLOCATE(field_right(szmax)) - n = 1 - DO k = 1, 3 - DO j = 1-ng, subsizes(2)-ng - DO i = nx_local-ng+1, nx_local - select case(k) - case(1) - field_right(n) = fieldx(i,j) - case(2) - field_right(n) = fieldy(i,j) - case(3) - field_right(n) = fieldz(i,j) - end select - n = n + 1 - END DO - END DO - END DO + xmin = 1 + xmax = ng + ymin = 1-ng + ymax = subsizes(2)-ng - CALL MPI_SENDRECV(field_left, sz, basetype, proc_x_min, & + CALL load_field_boundaries_to_buffer(fieldx, field, & + xmin, xmax, ymin, ymax, offset0) + CALL load_field_boundaries_to_buffer(fieldy, field, & + xmin, xmax, ymin, ymax, offset1) + CALL load_field_boundaries_to_buffer(fieldz, field, & + xmin, xmax, ymin, ymax, offset2) + + CALL MPI_SENDRECV(field, sz, basetype, proc_x_min, & tag, temp, sz, basetype, proc_x_max, tag, comm, status, errcode) + xmin = nx_local + 1 + xmax = subsizes(1) + nx_local + IF (.NOT. x_max_boundary .OR. bc_field(c_bd_x_max)==c_bc_periodic) THEN - n = 1 - DO k = 1, 3 - DO j = 1-ng, subsizes(2)-ng - DO i = nx_local+1, subsizes(1)+nx_local - select case(k) - case(1) - fieldx(i,j) = temp(n) - case(2) - fieldy(i,j) = temp(n) - case(3) - fieldz(i,j) = temp(n) - end select - n = n + 1 - END DO - END DO - END DO + + CALL unload_field_boundaries_from_buffer(fieldx, temp, & + xmin, xmax, ymin, ymax, offset0) + CALL unload_field_boundaries_from_buffer(fieldy, temp, & + xmin, xmax, ymin, ymax, offset1) + CALL unload_field_boundaries_from_buffer(fieldz, temp, & + xmin, xmax, ymin, ymax, offset2) + END IF - CALL MPI_SENDRECV(field_right, sz, basetype, proc_x_max, & + xmin = nx_local - ng + 1 + xmax = nx_local + + CALL load_field_boundaries_to_buffer(fieldx, field, & + xmin, xmax, ymin, ymax, offset0) + CALL load_field_boundaries_to_buffer(fieldy, field, & + xmin, xmax, ymin, ymax, offset1) + CALL load_field_boundaries_to_buffer(fieldz, field, & + xmin, xmax, ymin, ymax, offset2) + + CALL MPI_SENDRECV(field, sz, basetype, proc_x_max, & tag, temp, sz, basetype, proc_x_min, tag, comm, status, errcode) + xmin = 1-ng + xmax = subsizes(1)-ng + IF (.NOT. x_min_boundary .OR. bc_field(c_bd_x_min)==c_bc_periodic) THEN - n = 1 - DO k = 1, 3 - DO j = 1-ng, subsizes(2)-ng - DO i = 1-ng, subsizes(1)-ng - select case(k) - case(1) - fieldx(i,j) = temp(n) - case(2) - fieldy(i,j) = temp(n) - case(3) - fieldz(i,j) = temp(n) - end select - n = n + 1 - END DO - END DO - END DO + + CALL unload_field_boundaries_from_buffer(fieldx, temp, & + xmin, xmax, ymin, ymax, offset0) + CALL unload_field_boundaries_from_buffer(fieldy, temp, & + xmin, xmax, ymin, ymax, offset1) + CALL unload_field_boundaries_from_buffer(fieldz, temp, & + xmin, xmax, ymin, ymax, offset2) + END IF subsizes(1) = sizes(1) subsizes(2) = ng sz = 3 * subsizes(1) * subsizes(2) - ALLOCATE(field_top(szmax)) - n = 1 - DO k = 1, 3 - DO j = ny_local-subsizes(2)+1, ny_local - DO i = 1-ng, subsizes(1)-ng - select case(k) - case(1) - field_top(n) = fieldx(i,j) - case(2) - field_top(n) = fieldy(i,j) - case(3) - field_top(n) = fieldz(i,j) - end select - n = n + 1 - END DO - END DO - END DO + offset0 = 0 + offset1 = subsizes(1) * subsizes(2) + offset2 = 2 * offset1 + + xmin = 1 - ng + xmax = subsizes(1) - ng + ymin = ny_local - subsizes(2) + 1 + ymax = ny_local + + CALL load_field_boundaries_to_buffer(fieldx, field, & + xmin, xmax, ymin, ymax, offset0) + CALL load_field_boundaries_to_buffer(fieldy, field, & + xmin, xmax, ymin, ymax, offset1) + CALL load_field_boundaries_to_buffer(fieldz, field, & + xmin, xmax, ymin, ymax, offset2) + + CALL MPI_SENDRECV(field, sz, basetype, proc_y_max, & + tag, temp, sz, basetype, proc_y_min, tag, comm, status, errcode) - ALLOCATE(field_bottom(szmax)) - n = 1 - DO k = 1, 3 - DO j = 1, subsizes(2) - DO i = 1-ng, subsizes(1)-ng - select case(k) - case(1) - field_bottom(n) = fieldx(i,j) - case(2) - field_bottom(n) = fieldy(i,j) - case(3) - field_bottom(n) = fieldz(i,j) - end select - n = n + 1 - END DO - END DO - END DO + ymin = 1 - ng + ymax = subsizes(2) - ng + + IF (.NOT. y_min_boundary .OR. bc_field(c_bd_y_min)==c_bc_periodic) THEN + + CALL unload_field_boundaries_from_buffer(fieldx, temp, & + xmin, xmax, ymin, ymax, offset0) + CALL unload_field_boundaries_from_buffer(fieldy, temp, & + xmin, xmax, ymin, ymax, offset1) + CALL unload_field_boundaries_from_buffer(fieldz, temp, & + xmin, xmax, ymin, ymax, offset2) + + END IF + + ymin = 1 + ymax = subsizes(2) + + CALL load_field_boundaries_to_buffer(fieldx, field, & + xmin, xmax, ymin, ymax, offset0) + CALL load_field_boundaries_to_buffer(fieldy, field, & + xmin, xmax, ymin, ymax, offset1) + CALL load_field_boundaries_to_buffer(fieldz, field, & + xmin, xmax, ymin, ymax, offset2) - CALL MPI_SENDRECV(field_bottom, sz, basetype, proc_y_min, & + ymin = ny_local + 1 + ymax = subsizes(2) + ny_local + + CALL MPI_SENDRECV(field, sz, basetype, proc_y_min, & tag, temp, sz, basetype, proc_y_max, tag, comm, status, errcode) IF (.NOT. y_max_boundary .OR. bc_field(c_bd_y_max)==c_bc_periodic) THEN - n = 1 - DO k = 1, 3 - DO j = ny_local+1, subsizes(2)+ny_local - DO i = 1-ng, subsizes(1)-ng - select case(k) - case(1) - fieldx(i,j) = temp(n) - case(2) - fieldy(i,j) = temp(n) - case(3) - fieldz(i,j) = temp(n) - end select - n = n + 1 - END DO - END DO - END DO - END IF - CALL MPI_SENDRECV(field_top, sz, basetype, proc_y_max, & - tag, temp, sz, basetype, proc_y_min, tag, comm, status, errcode) + CALL unload_field_boundaries_from_buffer(fieldx, temp, & + xmin, xmax, ymin, ymax, offset0) + CALL unload_field_boundaries_from_buffer(fieldy, temp, & + xmin, xmax, ymin, ymax, offset1) + CALL unload_field_boundaries_from_buffer(fieldz, temp, & + xmin, xmax, ymin, ymax, offset2) - IF (.NOT. y_min_boundary .OR. bc_field(c_bd_y_min)==c_bc_periodic) THEN - n = 1 - DO k = 1, 3 - DO j = 1-ng, subsizes(2)-ng - DO i = 1-ng, subsizes(1)-ng - select case(k) - case(1) - fieldx(i,j) = temp(n) - case(2) - fieldy(i,j) = temp(n) - case(3) - fieldz(i,j) = temp(n) - end select - n = n + 1 - END DO - END DO - END DO END IF - DEALLOCATE(field_left) - DEALLOCATE(field_right) - DEALLOCATE(field_top) - DEALLOCATE(field_bottom) + + DEALLOCATE(field) DEALLOCATE(temp) END SUBROUTINE all_comp_field_bc + SUBROUTINE load_field_boundaries_to_buffer(field, buffer, & + xmin, xmax, ymin, ymax, offset) + + INTEGER, INTENT(IN) :: xmin, xmax, ymin, ymax, offset + REAL(num), DIMENSION(1-ng:, 1-ng:), INTENT(INOUT) :: field + REAL(num), DIMENSION(:), INTENT(INOUT) :: buffer + INTEGER :: i, j, n + + n = 1 + DO j = ymin, ymax + DO i = xmin, xmax + buffer(n + offset) = field(i,j) + n = n + 1 + END DO + END DO + + END SUBROUTINE load_field_boundaries_to_buffer + + SUBROUTINE unload_field_boundaries_from_buffer(field, buffer, & + xmin, xmax, ymin, ymax, offset) + + INTEGER, INTENT(IN) :: xmin, xmax, ymin, ymax, offset + REAL(num), DIMENSION(1-ng:, 1-ng:), INTENT(INOUT) :: field + REAL(num), DIMENSION(:), INTENT(INOUT) :: buffer + INTEGER :: i, j, n + + n = 1 + DO j = ymin, ymax + DO i = xmin, xmax + field(i,j) = buffer(n + offset) + n = n + 1 + END DO + END DO + END SUBROUTINE unload_field_boundaries_from_buffer SUBROUTINE do_field_mpi_with_lengths_r4(field, ng, nx_local, ny_local) diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index de9b19728..275c68a0a 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -203,14 +203,14 @@ END SUBROUTINE shift_field SUBROUTINE moving_window_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & ny_local) - INTEGER, INTENT(IN) :: ng REAL(num), DIMENSION(1-ng:,1-ng:), INTENT(INOUT) :: fieldx, fieldy, fieldz INTEGER, INTENT(IN) :: nx_local, ny_local INTEGER, DIMENSION(c_ndims) :: sizes, subsizes INTEGER :: basetype, sz, szmax, i, j, k, n - REAL(num), ALLOCATABLE :: field_left(:) + REAL(num), ALLOCATABLE :: field(:) REAL(num), ALLOCATABLE :: temp(:) + INTEGER :: xmin, xmax, ymin, ymax, offset0, offset1, offset2 basetype = mpireal @@ -222,54 +222,48 @@ SUBROUTINE moving_window_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & IF (sz > szmax) szmax = sz ALLOCATE(temp(szmax)) + ALLOCATE(field(szmax)) subsizes(1) = ng subsizes(2) = sizes(2) sz = 3 * subsizes(1) * subsizes(2) - - ALLOCATE(field_left(szmax)) - n = 1 - DO k = 1, 3 - DO j = 1-ng, subsizes(2)-ng - DO i = 1,ng - select case(k) - case(1) - field_left(n) = fieldx(i,j) - case(2) - field_left(n) = fieldy(i,j) - case(3) - field_left(n) = fieldz(i,j) - end select - n = n + 1 - END DO - END DO - END DO + offset0 = 0 + offset1 = subsizes(1) * subsizes(2) + offset2 = 2 * offset1 - CALL MPI_SENDRECV(field_left, sz, basetype, proc_x_min, & + xmin = 1 + xmax = ng + ymin = 1-ng + ymax = subsizes(2)-ng + + CALL load_field_boundaries_to_buffer(fieldx, field, & + xmin, xmax, ymin, ymax, offset0) + CALL load_field_boundaries_to_buffer(fieldy, field, & + xmin, xmax, ymin, ymax, offset1) + CALL load_field_boundaries_to_buffer(fieldz, field, & + xmin, xmax, ymin, ymax, offset2) + + CALL MPI_SENDRECV(field, sz, basetype, proc_x_min, & tag, temp, sz, basetype, proc_x_max, tag, comm, status, errcode) + xmin = nx_local + 1 + xmax = subsizes(1) + nx_local + IF (.NOT. x_max_boundary .OR. bc_field(c_bd_x_max)==c_bc_periodic) THEN - n = 1 - DO k = 1, 3 - DO j = 1-ng, subsizes(2)-ng - DO i = nx_local+1, subsizes(1)+nx_local - select case(k) - case(1) - fieldx(i,j) = temp(n) - case(2) - fieldy(i,j) = temp(n) - case(3) - fieldz(i,j) = temp(n) - end select - n = n + 1 - END DO - END DO - END DO + + CALL unload_field_boundaries_from_buffer(fieldx, temp, & + xmin, xmax, ymin, ymax, offset0) + CALL unload_field_boundaries_from_buffer(fieldy, temp, & + xmin, xmax, ymin, ymax, offset1) + CALL unload_field_boundaries_from_buffer(fieldz, temp, & + xmin, xmax, ymin, ymax, offset2) + END IF - DEALLOCATE(field_left) + + DEALLOCATE(field) DEALLOCATE(temp) END SUBROUTINE moving_window_field_bc From d4bc4f0c7be15d0e83bd473032f3a6ef9bc08785 Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Fri, 24 Jul 2020 13:46:55 +0200 Subject: [PATCH 016/106] introduced do loop with jumps of ng in moving window --- epoch2d/src/housekeeping/window.F90 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index 275c68a0a..dae111660 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -470,10 +470,10 @@ SUBROUTINE moving_window IF (window_shift_cells > ng - 1) THEN window_shift_real = REAL(window_shift_cells, num) window_offset = window_offset + window_shift_real * dx - window_shift_steps = window_shift_cells / ng - nchunks = FLOOR(window_shift_steps) +! window_shift_steps = window_shift_cells / ng +! nchunks = FLOOR(window_shift_steps) nremainder = MOD(window_shift_cells, ng) - DO i = 1, nchunks + DO i = ng, window_shift_cells, ng CALL shift_window(ng) END DO IF(nremainder > 0) THEN From 2303e0085967a11295326c62c4c7dd5a94f9541e Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Mon, 27 Jul 2020 15:12:30 +0200 Subject: [PATCH 017/106] corrected for nremainder in window.F90 and offset in boundary.F90 --- epoch2d/src/boundary.F90 | 66 +++++++++++++++-------------- epoch2d/src/housekeeping/window.F90 | 8 +--- 2 files changed, 37 insertions(+), 37 deletions(-) diff --git a/epoch2d/src/boundary.F90 b/epoch2d/src/boundary.F90 index 19bf26dbe..267684ad7 100644 --- a/epoch2d/src/boundary.F90 +++ b/epoch2d/src/boundary.F90 @@ -333,7 +333,7 @@ SUBROUTINE all_comp_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & INTEGER :: basetype, sz, szmax, i, j, k, n REAL(num), ALLOCATABLE :: field(:) REAL(num), ALLOCATABLE :: temp(:) - INTEGER :: xmin, xmax, ymin, ymax, offset0, offset1, offset2 + INTEGER :: xmin, xmax, ymin, ymax, offset basetype = mpireal @@ -352,9 +352,11 @@ SUBROUTINE all_comp_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & sz = 3 * subsizes(1) * subsizes(2) - offset0 = 0 - offset1 = subsizes(1) * subsizes(2) - offset2 = 2 * offset1 +! offset0 = 0 +! offset1 = subsizes(1) * subsizes(2) +! offset2 = 2 * offset1 + + offset = subsizes(1) * subsizes(2) xmin = 1 xmax = ng @@ -362,11 +364,11 @@ SUBROUTINE all_comp_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & ymax = subsizes(2)-ng CALL load_field_boundaries_to_buffer(fieldx, field, & - xmin, xmax, ymin, ymax, offset0) + xmin, xmax, ymin, ymax, 0*offset) CALL load_field_boundaries_to_buffer(fieldy, field, & - xmin, xmax, ymin, ymax, offset1) + xmin, xmax, ymin, ymax, offset) CALL load_field_boundaries_to_buffer(fieldz, field, & - xmin, xmax, ymin, ymax, offset2) + xmin, xmax, ymin, ymax, 2*offset) CALL MPI_SENDRECV(field, sz, basetype, proc_x_min, & tag, temp, sz, basetype, proc_x_max, tag, comm, status, errcode) @@ -377,11 +379,11 @@ SUBROUTINE all_comp_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & IF (.NOT. x_max_boundary .OR. bc_field(c_bd_x_max)==c_bc_periodic) THEN CALL unload_field_boundaries_from_buffer(fieldx, temp, & - xmin, xmax, ymin, ymax, offset0) + xmin, xmax, ymin, ymax, 0*offset) CALL unload_field_boundaries_from_buffer(fieldy, temp, & - xmin, xmax, ymin, ymax, offset1) + xmin, xmax, ymin, ymax, offset) CALL unload_field_boundaries_from_buffer(fieldz, temp, & - xmin, xmax, ymin, ymax, offset2) + xmin, xmax, ymin, ymax, 2*offset) END IF @@ -389,11 +391,11 @@ SUBROUTINE all_comp_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & xmax = nx_local CALL load_field_boundaries_to_buffer(fieldx, field, & - xmin, xmax, ymin, ymax, offset0) + xmin, xmax, ymin, ymax, 0*offset) CALL load_field_boundaries_to_buffer(fieldy, field, & - xmin, xmax, ymin, ymax, offset1) + xmin, xmax, ymin, ymax, offset) CALL load_field_boundaries_to_buffer(fieldz, field, & - xmin, xmax, ymin, ymax, offset2) + xmin, xmax, ymin, ymax, 2*offset) CALL MPI_SENDRECV(field, sz, basetype, proc_x_max, & tag, temp, sz, basetype, proc_x_min, tag, comm, status, errcode) @@ -404,11 +406,11 @@ SUBROUTINE all_comp_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & IF (.NOT. x_min_boundary .OR. bc_field(c_bd_x_min)==c_bc_periodic) THEN CALL unload_field_boundaries_from_buffer(fieldx, temp, & - xmin, xmax, ymin, ymax, offset0) + xmin, xmax, ymin, ymax, 0*offset) CALL unload_field_boundaries_from_buffer(fieldy, temp, & - xmin, xmax, ymin, ymax, offset1) + xmin, xmax, ymin, ymax, offset) CALL unload_field_boundaries_from_buffer(fieldz, temp, & - xmin, xmax, ymin, ymax, offset2) + xmin, xmax, ymin, ymax, 2*offset) END IF @@ -416,9 +418,11 @@ SUBROUTINE all_comp_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & subsizes(2) = ng sz = 3 * subsizes(1) * subsizes(2) - offset0 = 0 - offset1 = subsizes(1) * subsizes(2) - offset2 = 2 * offset1 +! offset0 = 0 +! offset1 = subsizes(1) * subsizes(2) +! offset2 = 2 * offset1 + + offset = subsizes(1) * subsizes(2) xmin = 1 - ng xmax = subsizes(1) - ng @@ -426,11 +430,11 @@ SUBROUTINE all_comp_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & ymax = ny_local CALL load_field_boundaries_to_buffer(fieldx, field, & - xmin, xmax, ymin, ymax, offset0) + xmin, xmax, ymin, ymax, 0*offset) CALL load_field_boundaries_to_buffer(fieldy, field, & - xmin, xmax, ymin, ymax, offset1) + xmin, xmax, ymin, ymax, offset) CALL load_field_boundaries_to_buffer(fieldz, field, & - xmin, xmax, ymin, ymax, offset2) + xmin, xmax, ymin, ymax, 2*offset) CALL MPI_SENDRECV(field, sz, basetype, proc_y_max, & tag, temp, sz, basetype, proc_y_min, tag, comm, status, errcode) @@ -441,11 +445,11 @@ SUBROUTINE all_comp_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & IF (.NOT. y_min_boundary .OR. bc_field(c_bd_y_min)==c_bc_periodic) THEN CALL unload_field_boundaries_from_buffer(fieldx, temp, & - xmin, xmax, ymin, ymax, offset0) + xmin, xmax, ymin, ymax, 0*offset) CALL unload_field_boundaries_from_buffer(fieldy, temp, & - xmin, xmax, ymin, ymax, offset1) + xmin, xmax, ymin, ymax, offset) CALL unload_field_boundaries_from_buffer(fieldz, temp, & - xmin, xmax, ymin, ymax, offset2) + xmin, xmax, ymin, ymax, 2*offset) END IF @@ -453,11 +457,11 @@ SUBROUTINE all_comp_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & ymax = subsizes(2) CALL load_field_boundaries_to_buffer(fieldx, field, & - xmin, xmax, ymin, ymax, offset0) + xmin, xmax, ymin, ymax, 0*offset) CALL load_field_boundaries_to_buffer(fieldy, field, & - xmin, xmax, ymin, ymax, offset1) + xmin, xmax, ymin, ymax, offset) CALL load_field_boundaries_to_buffer(fieldz, field, & - xmin, xmax, ymin, ymax, offset2) + xmin, xmax, ymin, ymax, 2*offset) ymin = ny_local + 1 ymax = subsizes(2) + ny_local @@ -468,11 +472,11 @@ SUBROUTINE all_comp_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & IF (.NOT. y_max_boundary .OR. bc_field(c_bd_y_max)==c_bc_periodic) THEN CALL unload_field_boundaries_from_buffer(fieldx, temp, & - xmin, xmax, ymin, ymax, offset0) + xmin, xmax, ymin, ymax, 0*offset) CALL unload_field_boundaries_from_buffer(fieldy, temp, & - xmin, xmax, ymin, ymax, offset1) + xmin, xmax, ymin, ymax, offset) CALL unload_field_boundaries_from_buffer(fieldz, temp, & - xmin, xmax, ymin, ymax, offset2) + xmin, xmax, ymin, ymax, 2*offset) END IF diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index dae111660..0bc6dc1af 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -470,18 +470,14 @@ SUBROUTINE moving_window IF (window_shift_cells > ng - 1) THEN window_shift_real = REAL(window_shift_cells, num) window_offset = window_offset + window_shift_real * dx -! window_shift_steps = window_shift_cells / ng -! nchunks = FLOOR(window_shift_steps) nremainder = MOD(window_shift_cells, ng) DO i = ng, window_shift_cells, ng CALL shift_window(ng) END DO - IF(nremainder > 0) THEN - CALL shift_window(nremainder) - END IF CALL setup_bc_lists CALL particle_bcs - window_shift_fraction = window_shift_fraction - window_shift_real + window_shift_fraction = window_shift_fraction - window_shift_real & + + REAL(nremainder, num) END IF END IF #else From b2315ae4cdc327265783ba2ef3bf2df76763fddc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dirk=20Br=C3=B6mmel?= Date: Thu, 30 Jul 2020 10:41:09 +0000 Subject: [PATCH 018/106] Attempt to align moving_window with I/O Requires checks that alignment w.r.t. steps indeed works. Has not been compiled. force_ump needs checking as well. --- epoch2d/src/epoch2d.F90 | 2 +- epoch2d/src/housekeeping/window.F90 | 22 +++++++++++++++++++--- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/epoch2d/src/epoch2d.F90 b/epoch2d/src/epoch2d.F90 index d76967e4f..f3a16c459 100644 --- a/epoch2d/src/epoch2d.F90 +++ b/epoch2d/src/epoch2d.F90 @@ -269,7 +269,7 @@ PROGRAM pic CALL update_eb_fields_final - CALL moving_window + CALL moving_window(step + 1, force_dump) #ifdef SCOREP_USER_ENABLE SCOREP_USER_REGION_END(main_loop) diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index 0bc6dc1af..4a7086c55 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -436,7 +436,13 @@ END SUBROUTINE remove_particles - SUBROUTINE moving_window + SUBROUTINE moving_window(future_step, force) + USE diagnostics + + integer, intent(in) :: future_step + logical, intent(in) :: force_dump + logical :: print_arrays(1:SIZE(file_prefixes)) + integer :: i #ifndef PER_SPECIES_WEIGHT REAL(num) :: window_shift_real, window_shift_steps @@ -466,14 +472,24 @@ SUBROUTINE moving_window IF (window_v_x <= 0.0_num) RETURN window_shift_fraction = window_shift_fraction + dt * window_v_x / dx window_shift_cells = FLOOR(window_shift_fraction) + ! CHECK FOR IO TAKING PLACE IN NEXT STEP... + print_arrays = .false. + DO i = 1,SIZE(file_prefixes) + CALL io_test(i, future_step, print_arrays(1), force_dump, prefix_first_call) + END DO + ! Allow for posibility of having jumped two cells at once - IF (window_shift_cells > ng - 1) THEN + IF ( (window_shift_cells > ng - 1) .OR. ANY(print_arrays) ) THEN window_shift_real = REAL(window_shift_cells, num) window_offset = window_offset + window_shift_real * dx nremainder = MOD(window_shift_cells, ng) - DO i = ng, window_shift_cells, ng + DO i = ng, window_shift_cells, ng ! CHECK IF THIS LOOP IS CALLED IF window_shift_cells < ng CALL shift_window(ng) END DO + IF (ANY(print_arrays)) then + CALL shift_window(nremainder) + nremainder = 0 + END IF CALL setup_bc_lists CALL particle_bcs window_shift_fraction = window_shift_fraction - window_shift_real & From d44496e4fb40da5bfa460d311a73779736ec276c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dirk=20Br=C3=B6mmel?= Date: Tue, 4 Aug 2020 10:52:43 +0200 Subject: [PATCH 019/106] Fixing circular module dependency The dependency between `window` <-> `diagnostics` is resolved using a F08 SUBMODULE. This works, but is not nice on several levels. See comments within `window.F90`. One problem being that a newer Fortran standard is required. A cleaner solution would be to move the one dependent variable. --- epoch2d/Makefile | 7 +- epoch2d/src/epoch2d.F90 | 2 +- epoch2d/src/housekeeping/window.F90 | 87 ++++------------------- epoch2d/src/housekeeping/window_sub.F90 | 91 +++++++++++++++++++++++++ epoch2d/src/io/diagnostics.F90 | 2 + 5 files changed, 113 insertions(+), 76 deletions(-) create mode 100644 epoch2d/src/housekeeping/window_sub.F90 diff --git a/epoch2d/Makefile b/epoch2d/Makefile index 8ab2ba5e1..df438284d 100644 --- a/epoch2d/Makefile +++ b/epoch2d/Makefile @@ -50,7 +50,7 @@ endif # Intel # ===== ifeq ($(strip $(COMPILER)),intel) - FFLAGS = -O3 -g -stand f03 + FFLAGS = -O3 -g #-stand f03 ifeq ($(strip $(CONS)),1) FLTCONS = -pc64 -fltconsistency endif @@ -69,7 +69,7 @@ endif # gfortran # ======== ifeq ($(strip $(COMPILER)),gfortran) - FFLAGS = -O3 -g -std=f2003 + FFLAGS = -O3 -g #-std=f2003 GNUVER := $(shell gfortran -dumpversion | head -1 \ | sed 's/[^0-9\.]*\([0-9\.]\+\).*/\1/') @@ -321,7 +321,7 @@ SRCFILES = antennae.f90 balance.F90 boundary.F90 bremsstrahlung.F90 \ redblack_module.f90 setup.F90 shape_functions.F90 shared_data.F90 shunt.F90 \ simple_io.F90 split_particle.F90 stack.f90 strings.f90 strings_advanced.f90 \ terminal_controls.F90 timer.f90 tokenizer_blocks.f90 utilities.f90 \ - version_data.F90 welcome.F90 window.F90 + version_data.F90 welcome.F90 window.F90 window_sub.F90 OBJFILES := $(SRCFILES:.f90=.o) OBJFILES := $(OBJFILES:.F90=.o) @@ -554,3 +554,4 @@ utilities.o: utilities.f90 constants.o shared_data.o version_data.o: version_data.F90 $(SRCDIR)/COMMIT welcome.o: welcome.F90 shared_data.o terminal_controls.o version_data.o window.o: window.F90 boundary.o evaluate.o partlist.o +window_sub.o: window_sub.F90 window.o diagnostics.o diff --git a/epoch2d/src/epoch2d.F90 b/epoch2d/src/epoch2d.F90 index f3a16c459..8a89cfc29 100644 --- a/epoch2d/src/epoch2d.F90 +++ b/epoch2d/src/epoch2d.F90 @@ -163,7 +163,7 @@ PROGRAM pic ELSE time = time + dt / 2.0_num CALL update_eb_fields_final - CALL moving_window + CALL moving_window(step + 1, force_dump) END IF ELSE dt_store = dt diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index 4a7086c55..74413fc07 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -24,6 +24,21 @@ MODULE window REAL(num), ALLOCATABLE :: density(:), temperature(:,:), drift(:,:) REAL(num), SAVE :: window_shift_fraction + ! In order to check for steps that do I/O, we call 'io_test' from the + ! 'diagnostics' module. This would create a circular module dependency between + ! 'diagnostics' and 'window'. A submodule is currently used to solve the + ! circular dependency. This is not nice, but works. + ! Modules 'setup' and 'diagnostics' both require a single variable from this + ! module: 'window_shift_fraction'. If this variable were to be more 'global', + ! the submodule could be removed. An alternative is a separate module for only + ! this single variable that 'window', 'diagnostics' and 'setup' then use. + INTERFACE + MODULE SUBROUTINE moving_window(future_step, force_dump) + integer, intent(in) :: future_step + logical, intent(in) :: force_dump + END SUBROUTINE moving_window + END INTERFACE + CONTAINS SUBROUTINE initialise_window @@ -434,76 +449,4 @@ SUBROUTINE remove_particles END SUBROUTINE remove_particles #endif - - - SUBROUTINE moving_window(future_step, force) - USE diagnostics - - integer, intent(in) :: future_step - logical, intent(in) :: force_dump - logical :: print_arrays(1:SIZE(file_prefixes)) - integer :: i - -#ifndef PER_SPECIES_WEIGHT - REAL(num) :: window_shift_real, window_shift_steps - INTEGER :: window_shift_cells, errcode = 0 - INTEGER :: i, nchunks, nremainder -#endif - - IF (.NOT. move_window) RETURN - -#ifndef PER_SPECIES_WEIGHT - IF (.NOT. window_started) THEN - IF (time >= window_start_time .AND. time < window_stop_time) THEN - bc_field(c_bd_x_min) = bc_x_min_after_move - bc_field(c_bd_x_max) = bc_x_max_after_move - bc_field(c_bd_y_min) = bc_y_min_after_move - bc_field(c_bd_y_max) = bc_y_max_after_move - CALL setup_boundaries - IF (.NOT.ic_from_restart) window_shift_fraction = 0.0_num - window_started = .TRUE. - END IF - END IF - - ! If we have a moving window then update the window position - IF (window_started) THEN - IF (time >= window_stop_time) RETURN - IF (use_window_stack) window_v_x = evaluate(window_v_x_stack, errcode) - IF (window_v_x <= 0.0_num) RETURN - window_shift_fraction = window_shift_fraction + dt * window_v_x / dx - window_shift_cells = FLOOR(window_shift_fraction) - ! CHECK FOR IO TAKING PLACE IN NEXT STEP... - print_arrays = .false. - DO i = 1,SIZE(file_prefixes) - CALL io_test(i, future_step, print_arrays(1), force_dump, prefix_first_call) - END DO - - ! Allow for posibility of having jumped two cells at once - IF ( (window_shift_cells > ng - 1) .OR. ANY(print_arrays) ) THEN - window_shift_real = REAL(window_shift_cells, num) - window_offset = window_offset + window_shift_real * dx - nremainder = MOD(window_shift_cells, ng) - DO i = ng, window_shift_cells, ng ! CHECK IF THIS LOOP IS CALLED IF window_shift_cells < ng - CALL shift_window(ng) - END DO - IF (ANY(print_arrays)) then - CALL shift_window(nremainder) - nremainder = 0 - END IF - CALL setup_bc_lists - CALL particle_bcs - window_shift_fraction = window_shift_fraction - window_shift_real & - + REAL(nremainder, num) - END IF - END IF -#else - IF (rank == 0) THEN - WRITE(*,*) 'moving windows only available when using', & - ' per particle weighting' - END IF - CALL abort_code(c_err_pp_options_missing) -#endif - - END SUBROUTINE moving_window - END MODULE window diff --git a/epoch2d/src/housekeeping/window_sub.F90 b/epoch2d/src/housekeeping/window_sub.F90 new file mode 100644 index 000000000..d6cf8a331 --- /dev/null +++ b/epoch2d/src/housekeeping/window_sub.F90 @@ -0,0 +1,91 @@ +! Copyright (C) 2009-2019 University of Warwick +! Copyright (C) 2020 Juelich Supercomputing Center +! Forschungszentrum Juelich GmbH +! +! This program is free software: you can redistribute it and/or modify +! it under the terms of the GNU General Public License as published by +! the Free Software Foundation, either version 3 of the License, or +! (at your option) any later version. +! +! This program is distributed in the hope that it will be useful, +! but WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +! GNU General Public License for more details. +! +! You should have received a copy of the GNU General Public License +! along with this program. If not, see . + +SUBMODULE (window) window_sub + ! This SUBMODULE is used to circumvent the circular module dependency between + ! diagnostics and window (see comments therein) + USE diagnostics + +CONTAINS + + MODULE PROCEDURE moving_window + USE diagnostics + +#ifndef PER_SPECIES_WEIGHT + REAL(num) :: window_shift_real, window_shift_steps + INTEGER :: window_shift_cells, errcode = 0 + INTEGER :: i, nchunks, nremainder + logical :: print_arrays(1:SIZE(file_prefixes)) +#endif + + IF (.NOT. move_window) RETURN + +#ifndef PER_SPECIES_WEIGHT + IF (.NOT. window_started) THEN + IF (time >= window_start_time .AND. time < window_stop_time) THEN + bc_field(c_bd_x_min) = bc_x_min_after_move + bc_field(c_bd_x_max) = bc_x_max_after_move + bc_field(c_bd_y_min) = bc_y_min_after_move + bc_field(c_bd_y_max) = bc_y_max_after_move + CALL setup_boundaries + IF (.NOT.ic_from_restart) window_shift_fraction = 0.0_num + window_started = .TRUE. + END IF + END IF + + ! If we have a moving window then update the window position + IF (window_started) THEN + IF (time >= window_stop_time) RETURN + IF (use_window_stack) window_v_x = evaluate(window_v_x_stack, errcode) + IF (window_v_x <= 0.0_num) RETURN + window_shift_fraction = window_shift_fraction + dt * window_v_x / dx + window_shift_cells = FLOOR(window_shift_fraction) + ! CHECK FOR IO TAKING PLACE IN NEXT STEP... + print_arrays = .false. + DO i = 1, SIZE(file_prefixes) + CALL io_test(i, future_step, print_arrays(i), force_dump, prefix_first_call) + END DO + + ! Allow for posibility of having jumped two cells at once + IF ( (window_shift_cells > ng - 1) .OR. ANY(print_arrays) ) THEN + window_shift_real = REAL(window_shift_cells, num) + window_offset = window_offset + window_shift_real * dx + nremainder = MOD(window_shift_cells, ng) + DO i = ng, window_shift_cells, ng ! CHECK IF THIS LOOP IS CALLED IF window_shift_cells < ng + CALL shift_window(ng) + END DO + IF (ANY(print_arrays)) then + CALL shift_window(nremainder) + nremainder = 0 + END IF + CALL setup_bc_lists + CALL particle_bcs + window_shift_fraction = window_shift_fraction - window_shift_real & + + REAL(nremainder, num) + END IF + END IF +#else + IF (rank == 0) THEN + WRITE(*,*) 'moving windows only available when using', & + ' per particle weighting' + END IF + CALL abort_code(c_err_pp_options_missing) +#endif + + END PROCEDURE moving_window + +END SUBMODULE window_sub diff --git a/epoch2d/src/io/diagnostics.F90 b/epoch2d/src/io/diagnostics.F90 index 64971903c..efbb9c69d 100644 --- a/epoch2d/src/io/diagnostics.F90 +++ b/epoch2d/src/io/diagnostics.F90 @@ -40,6 +40,8 @@ MODULE diagnostics PUBLIC :: cleanup_stop_files, check_for_stop_condition PUBLIC :: deallocate_file_list, count_n_zeros PUBLIC :: build_persistent_subsets + ! Needed for window_sub only + PUBLIC :: io_test, prefix_first_call CHARACTER(LEN=*), PARAMETER :: stop_file = 'STOP' CHARACTER(LEN=*), PARAMETER :: stop_file_nodump = 'STOP_NODUMP' From 9a8f2ab4f2a80e7f842f87941d9a8f6a6abc818a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dirk=20Br=C3=B6mmel?= Date: Tue, 4 Aug 2020 11:40:36 +0200 Subject: [PATCH 020/106] Simpler fix for circular dependencies This is much simpler and more in line with EPOCH's guidelines and other patterns. The single dependent variable has been moved to `shared_data`. --- epoch2d/Makefile | 13 ++-- epoch2d/src/housekeeping/setup.F90 | 1 - epoch2d/src/housekeeping/window.F90 | 86 ++++++++++++++++++----- epoch2d/src/housekeeping/window_sub.F90 | 91 ------------------------- epoch2d/src/io/diagnostics.F90 | 1 - epoch2d/src/shared_data.F90 | 1 + 6 files changed, 77 insertions(+), 116 deletions(-) delete mode 100644 epoch2d/src/housekeeping/window_sub.F90 diff --git a/epoch2d/Makefile b/epoch2d/Makefile index df438284d..547459187 100644 --- a/epoch2d/Makefile +++ b/epoch2d/Makefile @@ -50,7 +50,7 @@ endif # Intel # ===== ifeq ($(strip $(COMPILER)),intel) - FFLAGS = -O3 -g #-stand f03 + FFLAGS = -O3 -g -stand f03 ifeq ($(strip $(CONS)),1) FLTCONS = -pc64 -fltconsistency endif @@ -69,7 +69,7 @@ endif # gfortran # ======== ifeq ($(strip $(COMPILER)),gfortran) - FFLAGS = -O3 -g #-std=f2003 + FFLAGS = -O3 -g -std=f2003 GNUVER := $(shell gfortran -dumpversion | head -1 \ | sed 's/[^0-9\.]*\([0-9\.]\+\).*/\1/') @@ -321,7 +321,7 @@ SRCFILES = antennae.f90 balance.F90 boundary.F90 bremsstrahlung.F90 \ redblack_module.f90 setup.F90 shape_functions.F90 shared_data.F90 shunt.F90 \ simple_io.F90 split_particle.F90 stack.f90 strings.f90 strings_advanced.f90 \ terminal_controls.F90 timer.f90 tokenizer_blocks.f90 utilities.f90 \ - version_data.F90 welcome.F90 window.F90 window_sub.F90 + version_data.F90 welcome.F90 window.F90 OBJFILES := $(SRCFILES:.f90=.o) OBJFILES := $(OBJFILES:.F90=.o) @@ -496,7 +496,7 @@ deck_window_block.o: deck_window_block.f90 strings_advanced.o deltaf_loader.o: deltaf_loader.F90 shared_data.o diagnostics.o: diagnostics.F90 antennae.o calc_df.o constants.o deck.o \ deck_io_block.o dist_fn.o evaluate.o iterators.o particle_id_hash.o probes.o \ - setup.o strings.o timer.o version_data.o window.o $(SDFMOD) $(ENCODED_SOURCE) + setup.o strings.o timer.o version_data.o $(SDFMOD) $(ENCODED_SOURCE) dist_fn.o: dist_fn.F90 mpi_subtype_control.o particles.o $(SDFMOD) epoch2d.o: epoch2d.F90 antennae.o balance.o bremsstrahlung.o calc_df.o \ collisions.o current_smooth.o deck.o diagnostics.o fields.o finish.o \ @@ -538,7 +538,7 @@ redblack_module.o: redblack_module.f90 partlist.o setup.o: setup.F90 antennae.o balance.o boundary.o constants.o fields.o \ helper.o injectors.o laser.o mpi_routines.o mpi_subtype_control.o \ particle_id_hash.o shunt.o split_particle.o timer.o version_data.o welcome.o \ - window.o $(SDFMOD) + $(SDFMOD) shape_functions.o: shape_functions.F90 constants.o shared_data.o: shared_data.F90 constants.o $(SDFMOD) shunt.o: shunt.F90 evaluator_blocks.o tokenizer_blocks.o utilities.o @@ -553,5 +553,4 @@ tokenizer_blocks.o: tokenizer_blocks.f90 strings.o utilities.o: utilities.f90 constants.o shared_data.o version_data.o: version_data.F90 $(SRCDIR)/COMMIT welcome.o: welcome.F90 shared_data.o terminal_controls.o version_data.o -window.o: window.F90 boundary.o evaluate.o partlist.o -window_sub.o: window_sub.F90 window.o diagnostics.o +window.o: window.F90 boundary.o diagnostics.o evaluate.o partlist.o diff --git a/epoch2d/src/housekeeping/setup.F90 b/epoch2d/src/housekeeping/setup.F90 index a7c79d101..c8baaa0a1 100644 --- a/epoch2d/src/housekeeping/setup.F90 +++ b/epoch2d/src/housekeeping/setup.F90 @@ -23,7 +23,6 @@ MODULE setup USE shunt USE laser USE injectors - USE window USE timer USE helper USE balance diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index 74413fc07..37d796bd7 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -22,22 +22,6 @@ MODULE window IMPLICIT NONE REAL(num), ALLOCATABLE :: density(:), temperature(:,:), drift(:,:) - REAL(num), SAVE :: window_shift_fraction - - ! In order to check for steps that do I/O, we call 'io_test' from the - ! 'diagnostics' module. This would create a circular module dependency between - ! 'diagnostics' and 'window'. A submodule is currently used to solve the - ! circular dependency. This is not nice, but works. - ! Modules 'setup' and 'diagnostics' both require a single variable from this - ! module: 'window_shift_fraction'. If this variable were to be more 'global', - ! the submodule could be removed. An alternative is a separate module for only - ! this single variable that 'window', 'diagnostics' and 'setup' then use. - INTERFACE - MODULE SUBROUTINE moving_window(future_step, force_dump) - integer, intent(in) :: future_step - logical, intent(in) :: force_dump - END SUBROUTINE moving_window - END INTERFACE CONTAINS @@ -449,4 +433,74 @@ SUBROUTINE remove_particles END SUBROUTINE remove_particles #endif + + + SUBROUTINE moving_window(future_step, force_dump) + USE diagnostics + + integer, intent(in) :: future_step + logical, intent(in) :: force_dump +#ifndef PER_SPECIES_WEIGHT + REAL(num) :: window_shift_real, window_shift_steps + INTEGER :: window_shift_cells, errcode = 0 + INTEGER :: i, nchunks, nremainder + logical :: print_arrays(1:SIZE(file_prefixes)) +#endif + + IF (.NOT. move_window) RETURN + +#ifndef PER_SPECIES_WEIGHT + IF (.NOT. window_started) THEN + IF (time >= window_start_time .AND. time < window_stop_time) THEN + bc_field(c_bd_x_min) = bc_x_min_after_move + bc_field(c_bd_x_max) = bc_x_max_after_move + bc_field(c_bd_y_min) = bc_y_min_after_move + bc_field(c_bd_y_max) = bc_y_max_after_move + CALL setup_boundaries + IF (.NOT.ic_from_restart) window_shift_fraction = 0.0_num + window_started = .TRUE. + END IF + END IF + + ! If we have a moving window then update the window position + IF (window_started) THEN + IF (time >= window_stop_time) RETURN + IF (use_window_stack) window_v_x = evaluate(window_v_x_stack, errcode) + IF (window_v_x <= 0.0_num) RETURN + window_shift_fraction = window_shift_fraction + dt * window_v_x / dx + window_shift_cells = FLOOR(window_shift_fraction) + ! CHECK FOR IO TAKING PLACE IN NEXT STEP... + print_arrays = .false. + DO i = 1, SIZE(file_prefixes) + CALL io_test(i, future_step, print_arrays(i), force_dump, prefix_first_call) + END DO + + ! Allow for posibility of having jumped two cells at once + IF ( (window_shift_cells > ng - 1) .OR. ANY(print_arrays) ) THEN + window_shift_real = REAL(window_shift_cells, num) + window_offset = window_offset + window_shift_real * dx + nremainder = MOD(window_shift_cells, ng) + DO i = ng, window_shift_cells, ng ! CHECK IF THIS LOOP IS CALLED IF window_shift_cells < ng + CALL shift_window(ng) + END DO + IF (ANY(print_arrays)) then + CALL shift_window(nremainder) + nremainder = 0 + END IF + CALL setup_bc_lists + CALL particle_bcs + window_shift_fraction = window_shift_fraction - window_shift_real & + + REAL(nremainder, num) + END IF + END IF +#else + IF (rank == 0) THEN + WRITE(*,*) 'moving windows only available when using', & + ' per particle weighting' + END IF + CALL abort_code(c_err_pp_options_missing) +#endif + + END SUBROUTINE moving_window + END MODULE window diff --git a/epoch2d/src/housekeeping/window_sub.F90 b/epoch2d/src/housekeeping/window_sub.F90 deleted file mode 100644 index d6cf8a331..000000000 --- a/epoch2d/src/housekeeping/window_sub.F90 +++ /dev/null @@ -1,91 +0,0 @@ -! Copyright (C) 2009-2019 University of Warwick -! Copyright (C) 2020 Juelich Supercomputing Center -! Forschungszentrum Juelich GmbH -! -! This program is free software: you can redistribute it and/or modify -! it under the terms of the GNU General Public License as published by -! the Free Software Foundation, either version 3 of the License, or -! (at your option) any later version. -! -! This program is distributed in the hope that it will be useful, -! but WITHOUT ANY WARRANTY; without even the implied warranty of -! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -! GNU General Public License for more details. -! -! You should have received a copy of the GNU General Public License -! along with this program. If not, see . - -SUBMODULE (window) window_sub - ! This SUBMODULE is used to circumvent the circular module dependency between - ! diagnostics and window (see comments therein) - USE diagnostics - -CONTAINS - - MODULE PROCEDURE moving_window - USE diagnostics - -#ifndef PER_SPECIES_WEIGHT - REAL(num) :: window_shift_real, window_shift_steps - INTEGER :: window_shift_cells, errcode = 0 - INTEGER :: i, nchunks, nremainder - logical :: print_arrays(1:SIZE(file_prefixes)) -#endif - - IF (.NOT. move_window) RETURN - -#ifndef PER_SPECIES_WEIGHT - IF (.NOT. window_started) THEN - IF (time >= window_start_time .AND. time < window_stop_time) THEN - bc_field(c_bd_x_min) = bc_x_min_after_move - bc_field(c_bd_x_max) = bc_x_max_after_move - bc_field(c_bd_y_min) = bc_y_min_after_move - bc_field(c_bd_y_max) = bc_y_max_after_move - CALL setup_boundaries - IF (.NOT.ic_from_restart) window_shift_fraction = 0.0_num - window_started = .TRUE. - END IF - END IF - - ! If we have a moving window then update the window position - IF (window_started) THEN - IF (time >= window_stop_time) RETURN - IF (use_window_stack) window_v_x = evaluate(window_v_x_stack, errcode) - IF (window_v_x <= 0.0_num) RETURN - window_shift_fraction = window_shift_fraction + dt * window_v_x / dx - window_shift_cells = FLOOR(window_shift_fraction) - ! CHECK FOR IO TAKING PLACE IN NEXT STEP... - print_arrays = .false. - DO i = 1, SIZE(file_prefixes) - CALL io_test(i, future_step, print_arrays(i), force_dump, prefix_first_call) - END DO - - ! Allow for posibility of having jumped two cells at once - IF ( (window_shift_cells > ng - 1) .OR. ANY(print_arrays) ) THEN - window_shift_real = REAL(window_shift_cells, num) - window_offset = window_offset + window_shift_real * dx - nremainder = MOD(window_shift_cells, ng) - DO i = ng, window_shift_cells, ng ! CHECK IF THIS LOOP IS CALLED IF window_shift_cells < ng - CALL shift_window(ng) - END DO - IF (ANY(print_arrays)) then - CALL shift_window(nremainder) - nremainder = 0 - END IF - CALL setup_bc_lists - CALL particle_bcs - window_shift_fraction = window_shift_fraction - window_shift_real & - + REAL(nremainder, num) - END IF - END IF -#else - IF (rank == 0) THEN - WRITE(*,*) 'moving windows only available when using', & - ' per particle weighting' - END IF - CALL abort_code(c_err_pp_options_missing) -#endif - - END PROCEDURE moving_window - -END SUBMODULE window_sub diff --git a/epoch2d/src/io/diagnostics.F90 b/epoch2d/src/io/diagnostics.F90 index efbb9c69d..aa6b4ab03 100644 --- a/epoch2d/src/io/diagnostics.F90 +++ b/epoch2d/src/io/diagnostics.F90 @@ -27,7 +27,6 @@ MODULE diagnostics USE setup USE deck_io_block USE strings - USE window USE timer USE antennae USE particle_id_hash_mod diff --git a/epoch2d/src/shared_data.F90 b/epoch2d/src/shared_data.F90 index c7d8d9ac4..be1cfa42c 100644 --- a/epoch2d/src/shared_data.F90 +++ b/epoch2d/src/shared_data.F90 @@ -568,6 +568,7 @@ MODULE shared_data INTEGER :: bc_y_min_after_move = c_bc_null INTEGER :: bc_y_max_after_move = c_bc_null REAL(num) :: window_offset + REAL(num) :: window_shift_fraction #ifdef PHOTONS !---------------------------------------------------------------------------- From 92d1eea464766d68a20e12e09820efa4128fcf4c Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Thu, 24 Sep 2020 17:40:31 +0200 Subject: [PATCH 021/106] Incorporated changes in moving window of epoch3d --- epoch3d/src/boundary.F90 | 38 ++++++ epoch3d/src/housekeeping/window.F90 | 180 ++++++++++++++++++++++------ 2 files changed, 183 insertions(+), 35 deletions(-) diff --git a/epoch3d/src/boundary.F90 b/epoch3d/src/boundary.F90 index 67b57cfd3..184ed007b 100644 --- a/epoch3d/src/boundary.F90 +++ b/epoch3d/src/boundary.F90 @@ -467,7 +467,45 @@ SUBROUTINE do_field_mpi_with_lengths(field, ng, nx_local, ny_local, & END SUBROUTINE do_field_mpi_with_lengths + SUBROUTINE load_field_boundaries_to_buffer(field, buffer, & + xmin, xmax, ymin, ymax, zmin, zmax, offset) + INTEGER, INTENT(IN) :: xmin, xmax, ymin, ymax, zmin, zmax, offset + REAL(num), DIMENSION(1-ng:,1-ng:,1-ng:), INTENT(INOUT) :: field + REAL(num), DIMENSION(:), INTENT(INOUT) :: buffer + INTEGER :: i,j,k,n + + n = 1 + DO k = zmin, zmax + DO j = ymin, ymax + DO i = xmin, xmax + buffer(n+offset) = field(i,j,k) + n = n+1 + END DO + END DO + END DO + + END SUBROUTINE load_field_boundaries_to_buffer + + SUBROUTINE unload_field_boundaries_from_buffer(field, buffer, & + xmin, xmax, ymin, ymax, zmin, zmax, offset) + + INTEGER, INTENT(IN) :: xmin, xmax, ymin, ymax, zmin, zmax, offset + REAL(num), DIMENSION(1-ng:,1-ng:,1-ng:), INTENT(INOUT) :: field + REAL(num), DIMENSION(:), INTENT(INOUT) :: buffer + INTEGER :: i,j,k,n + + n = 1 + DO k = zmin, zmax + DO j = ymin, ymax + DO i = xmin, xmax + field(i,j,k) = buffer(n+offset) + n = n+1 + END DO + END DO + END DO + + END SUBROUTINE unload_field_boundaries_from_buffer SUBROUTINE do_field_mpi_with_lengths_r4(field, ng, nx_local, ny_local, & nz_local) diff --git a/epoch3d/src/housekeeping/window.F90 b/epoch3d/src/housekeeping/window.F90 index ca94f614f..676f685ae 100644 --- a/epoch3d/src/housekeeping/window.F90 +++ b/epoch3d/src/housekeeping/window.F90 @@ -82,50 +82,81 @@ SUBROUTINE shift_window(window_shift_cells) END DO x_grid_max = x_global(nx_global) x_max = xb_global(nx_global+1) - dx * cpml_thickness + END DO CALL setup_grid_x CALL remove_particles ! Shift fields around - CALL shift_fields - END DO + CALL shift_fields(window_shift_cells) END SUBROUTINE shift_window - SUBROUTINE shift_fields + SUBROUTINE shift_fields(window_shift_cells) INTEGER :: j, k + INTEGER, INTENT(IN) :: window_shift_cells + + CALL shift_field(ex, ng, window_shift_cells) + CALL shift_field(ey, ng, window_shift_cells) + CALL shift_field(ez, ng, window_shift_cells) + + CALL moving_window_field_bc(ex, ey, ez, ng, nx, ny, nz) - CALL shift_field(ex, ng) - CALL shift_field(ey, ng) - CALL shift_field(ez, ng) + CALL shift_field(bx, ng, window_shift_cells) + CALL shift_field(by, ng, window_shift_cells) + CALL shift_field(bz, ng, window_shift_cells) - CALL shift_field(bx, ng) - CALL shift_field(by, ng) - CALL shift_field(bz, ng) + CALL moving_window_field_bc(bx, by, bz, ng ,nx, ny, nz) - CALL shift_field(jx, jng) - CALL shift_field(jy, jng) - CALL shift_field(jz, jng) + CALL shift_field(jx, jng, window_shift_cells) + CALL shift_field(jy, jng, window_shift_cells) + CALL shift_field(jz, jng, window_shift_cells) + + CALL moving_window_field_bc(jx, jy, jz, ng, nx, ny, nz) IF (cpml_boundaries) THEN - CALL shift_field(cpml_psi_eyx, ng) - CALL shift_field(cpml_psi_ezx, ng) - CALL shift_field(cpml_psi_byx, ng) - CALL shift_field(cpml_psi_bzx, ng) - - CALL shift_field(cpml_psi_exy, ng) - CALL shift_field(cpml_psi_ezy, ng) - CALL shift_field(cpml_psi_bxy, ng) - CALL shift_field(cpml_psi_bzy, ng) - - CALL shift_field(cpml_psi_exz, ng) - CALL shift_field(cpml_psi_eyz, ng) - CALL shift_field(cpml_psi_bxz, ng) - CALL shift_field(cpml_psi_byz, ng) + CALL shift_field(cpml_psi_eyx, ng, window_shift_cells) + CALL shift_field(cpml_psi_ezx, ng, window_shift_cells) + CALL shift_field(cpml_psi_byx, ng, window_shift_cells) + + CALL moving_window_field_bc(cpml_psi_eyx, & + cpml_psi_ezx, cpml_psi_byx, & + ng, nx, ny, nz) + + + CALL shift_field(cpml_psi_bzx, ng, window_shift_cells) + + CALL field_bc(cpml_psi_bzx, ng) + + CALL shift_field(cpml_psi_exy, ng, window_shift_cells) + CALL shift_field(cpml_psi_ezy, ng, window_shift_cells) + CALL shift_field(cpml_psi_bxy, ng, window_shift_cells) + + CALL moving_window_field_bc(cpml_psi_exy, & + cpml_psi_ezy, cpml_psi_ezy, & + ng, nx, ny, nz) + + + CALL shift_field(cpml_psi_bzy, ng, window_shift_cells) + + CALL field_bc(cpml_psi_bzy, ng) + + + CALL shift_field(cpml_psi_exz, ng, window_shift_cells) + CALL shift_field(cpml_psi_eyz, ng, window_shift_cells) + CALL shift_field(cpml_psi_bxz, ng, window_shift_cells) + + CALL moving_window_field_bc(cpml_psi_exz, & + cpml_psi_eyz, cpml_psi_bxz, & + ng, nx, ny, nz) + + CALL shift_field(cpml_psi_byz, ng, window_shift_cells) + + CALL field_bc(cpml_psi_byz, ng) END IF IF (x_max_boundary) THEN @@ -174,25 +205,99 @@ END SUBROUTINE shift_fields - SUBROUTINE shift_field(field, ng) + SUBROUTINE shift_field(field, ng, window_shift_cells) - INTEGER, INTENT(IN) :: ng + INTEGER, INTENT(IN) :: ng, window_shift_cells REAL(num), DIMENSION(1-ng:,1-ng:,1-ng:), INTENT(INOUT) :: field INTEGER :: i, j, k - ! Shift field to the left by one cell + ! Shift field to the left by window_shift_cells cells DO k = 1-ng, nz+ng DO j = 1-ng, ny+ng - DO i = 1-ng, nx+ng-1 - field(i,j,k) = field(i+1,j,k) + DO i = 1-ng, nx+ng-window_shift_cells + field(i,j,k) = field(i+ window_shift_cells,j,k) END DO END DO END DO - CALL field_bc(field, ng) + !CALL field_bc(field, ng) END SUBROUTINE shift_field + SUBROUTINE moving_window_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & + ny_local, nz_local) + + INTEGER, INTENT(IN) :: ng + REAL(num), DIMENSION(1-ng:,1-ng:,1-ng:), INTENT(INOUT) :: fieldx, fieldy, fieldz + INTEGER, INTENT(IN) :: nx_local, ny_local, nz_local + INTEGER, DIMENSION(c_ndims) :: sizes, subsizes + INTEGER :: basetype, sz, szmax, i, j, k, n + REAL(num), ALLOCATABLE :: field(:) + REAL(num), ALLOCATABLE :: temp(:) + INTEGER :: xmin, xmax, ymin, ymax, zmin, zmax, offset0, offset1, offset2 + + basetype = mpireal + + sizes(1) = nx_local + 2 * ng + sizes(2) = ny_local + 2 * ng + sizes(3) = nz_local + 2 * ng + + szmax = 3 * sizes(1) * sizes(2) * ng + sz = 3 * sizes(1) * sizes(3) * ng + IF (sz > szmax) szmax = sz + sz = 3 * sizes(2) * sizes(3) * ng + IF (sz > szmax) szmax = sz + + ALLOCATE(temp(szmax)) + ALLOCATE(field(szmax)) + + subsizes(1) = ng + subsizes(2) = sizes(2) + subsizes(3) = sizes(3) + + sz = 3 * subsizes(1) * subsizes(2) * subsizes(3) + + offset0 = 0 + offset1 = subsizes(1) * subsizes(2) * subsizes(3) + offset2 = 2 * offset1 + + xmin = 1 + xmax = ng + ymin = 1-ng + ymax = subsizes(2)-ng + zmin = 1-ng + zmax = subsizes(3)-ng + + CALL load_field_boundaries_to_buffer(fieldx, field, & + xmin, xmax, ymin, ymax, zmin, zmax, offset0) + CALL load_field_boundaries_to_buffer(fieldy, field, & + xmin, xmax, ymin, ymax, zmin, zmax, offset1) + CALL load_field_boundaries_to_buffer(fieldz, field, & + xmin, xmax, ymin, ymax, zmin, zmax, offset2) + + CALL MPI_SENDRECV(field, sz, basetype, proc_x_min, & + tag, temp, sz, basetype, proc_x_max, tag, comm, status, errcode) + + xmin = nx_local + 1 + xmax = subsizes(1) + nx_local + + IF (.NOT. x_max_boundary .OR. bc_field(c_bd_x_max)==c_bc_periodic) THEN + + CALL unload_field_boundaries_from_buffer(fieldx, temp, & + xmin, xmax, ymin, ymax, zmin, zmax, offset0) + CALL unload_field_boundaries_from_buffer(fieldy, temp, & + xmin, xmax, ymin, ymax, zmin, zmax, offset1) + CALL unload_field_boundaries_from_buffer(fieldz, temp, & + xmin, xmax, ymin, ymax, zmin, zmax, offset2) + + END IF + + + DEALLOCATE(field) + DEALLOCATE(temp) + + END SUBROUTINE moving_window_field_bc + SUBROUTINE insert_particles @@ -388,6 +493,7 @@ SUBROUTINE moving_window #ifndef PER_SPECIES_WEIGHT REAL(num) :: window_shift_real INTEGER :: window_shift_cells, errcode = 0 + INTEGER :: i, nremainder #endif IF (.NOT. move_window) RETURN @@ -415,13 +521,17 @@ SUBROUTINE moving_window window_shift_fraction = window_shift_fraction + dt * window_v_x / dx window_shift_cells = FLOOR(window_shift_fraction) ! Allow for posibility of having jumped two cells at once - IF (window_shift_cells > 0) THEN + IF (window_shift_cells > ng - 1) THEN window_shift_real = REAL(window_shift_cells, num) window_offset = window_offset + window_shift_real * dx - CALL shift_window(window_shift_cells) + nremainder = MOD(window_shift_cells, ng) + DO i = ng, window_shift_cells, ng + CALL shift_window(ng) + END DO CALL setup_bc_lists CALL particle_bcs - window_shift_fraction = window_shift_fraction - window_shift_real + window_shift_fraction = window_shift_fraction - window_shift_real & + + REAL(nremainder, num) END IF END IF #else From b92633a1ac343e32b9f13e45fe7af61f53d90954 Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Wed, 7 Oct 2020 12:49:15 +0000 Subject: [PATCH 022/106] Update README.md --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.md b/README.md index f15a7f650..c30c23852 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,11 @@ +## EPOCH development + +This version of EPOCH is being developed under the project PiCKeX- Particle Kinetic codes for Exascale plasma simulation which is a part of Work Package 8 (WP8) of PRACE-6IP. The main production version of EPOCH code is maintained by the University of Warwick. The repository has been cloned on the Jülich Supercomputing Centre (JSC) GitLab server. This development version has the full functionality of the main EPOCH branch, but includes verified refactoring measures. + +Current developments include improving the efficiency of the moving window book-keeping, reducing the number MPI calls for the field boundary conditions and the particle boundary conditions. + +NB: This is not the official repository for the EPOCH code. + # *** PLEASE READ THIS NOTE *** If you are obtaining this code from the gitlab server *DO NOT* use the From ff74d947dd6b0f11d39099cf023c60517c2237e6 Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Wed, 7 Oct 2020 12:53:51 +0000 Subject: [PATCH 023/106] Update README.md --- README.md | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index c30c23852..14ce4451d 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,19 @@ ## EPOCH development -This version of EPOCH is being developed under the project PiCKeX- Particle Kinetic codes for Exascale plasma simulation which is a part of Work Package 8 (WP8) of PRACE-6IP. The main production version of EPOCH code is maintained by the University of Warwick. The repository has been cloned on the Jülich Supercomputing Centre (JSC) GitLab server. This development version has the full functionality of the main EPOCH branch, but includes verified refactoring measures. +**This is not the official repository for the EPOCH code.** + +This version of EPOCH is being developed within the project PiCKeX- +Particle Kinetic codes for Exascale plasma simulation, which is a +part of Work Package 8 (WP8) of PRACE-6IP. This repository has been +cloned to the Jülich Supercomputing Centre (JSC) GitLab server to +perform and publish this work. The main production version of EPOCH is +maintained by the University of Warwick. The development version found +here might not have the full functionality of the production version of +EPOCH, but includes a performance improved, refactored branch that will +be merged with the official version by the end of the project. Current developments include improving the efficiency of the moving window book-keeping, reducing the number MPI calls for the field boundary conditions and the particle boundary conditions. -NB: This is not the official repository for the EPOCH code. - # *** PLEASE READ THIS NOTE *** If you are obtaining this code from the gitlab server *DO NOT* use the From 061a246ad0da08473b445da3a58b4b8363c60237 Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Mon, 12 Oct 2020 06:13:08 +0200 Subject: [PATCH 024/106] back to non-aligned I/O for moving window --- epoch2d/src/epoch2d.F90 | 4 ++-- epoch2d/src/housekeeping/window.F90 | 16 ++-------------- 2 files changed, 4 insertions(+), 16 deletions(-) diff --git a/epoch2d/src/epoch2d.F90 b/epoch2d/src/epoch2d.F90 index 8a89cfc29..d76967e4f 100644 --- a/epoch2d/src/epoch2d.F90 +++ b/epoch2d/src/epoch2d.F90 @@ -163,7 +163,7 @@ PROGRAM pic ELSE time = time + dt / 2.0_num CALL update_eb_fields_final - CALL moving_window(step + 1, force_dump) + CALL moving_window END IF ELSE dt_store = dt @@ -269,7 +269,7 @@ PROGRAM pic CALL update_eb_fields_final - CALL moving_window(step + 1, force_dump) + CALL moving_window #ifdef SCOREP_USER_ENABLE SCOREP_USER_REGION_END(main_loop) diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index 37d796bd7..59ece96d4 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -435,16 +435,13 @@ END SUBROUTINE remove_particles - SUBROUTINE moving_window(future_step, force_dump) + SUBROUTINE moving_window USE diagnostics - integer, intent(in) :: future_step - logical, intent(in) :: force_dump #ifndef PER_SPECIES_WEIGHT REAL(num) :: window_shift_real, window_shift_steps INTEGER :: window_shift_cells, errcode = 0 INTEGER :: i, nchunks, nremainder - logical :: print_arrays(1:SIZE(file_prefixes)) #endif IF (.NOT. move_window) RETURN @@ -469,24 +466,15 @@ SUBROUTINE moving_window(future_step, force_dump) IF (window_v_x <= 0.0_num) RETURN window_shift_fraction = window_shift_fraction + dt * window_v_x / dx window_shift_cells = FLOOR(window_shift_fraction) - ! CHECK FOR IO TAKING PLACE IN NEXT STEP... - print_arrays = .false. - DO i = 1, SIZE(file_prefixes) - CALL io_test(i, future_step, print_arrays(i), force_dump, prefix_first_call) - END DO ! Allow for posibility of having jumped two cells at once - IF ( (window_shift_cells > ng - 1) .OR. ANY(print_arrays) ) THEN + IF (window_shift_cells > ng - 1) THEN window_shift_real = REAL(window_shift_cells, num) window_offset = window_offset + window_shift_real * dx nremainder = MOD(window_shift_cells, ng) DO i = ng, window_shift_cells, ng ! CHECK IF THIS LOOP IS CALLED IF window_shift_cells < ng CALL shift_window(ng) END DO - IF (ANY(print_arrays)) then - CALL shift_window(nremainder) - nremainder = 0 - END IF CALL setup_bc_lists CALL particle_bcs window_shift_fraction = window_shift_fraction - window_shift_real & From bc377adbea13c33ffa6ac6673bad16ba2d11b3ae Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Tue, 13 Oct 2020 07:16:45 +0200 Subject: [PATCH 025/106] remove_particles not needed in moving_window --- epoch2d/src/housekeeping/window.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index 59ece96d4..3c25e23f4 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -83,7 +83,7 @@ SUBROUTINE shift_window(window_shift_cells) CALL setup_grid_x - CALL remove_particles + !CALL remove_particles ! Shift fields around CALL shift_fields(window_shift_cells) From bf85928b52680351ed70f3f9ea658fc0e234b2f3 Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Thu, 22 Oct 2020 08:04:10 +0200 Subject: [PATCH 026/106] Commented remove_particles_() moving window for epoch3d --- epoch3d/src/housekeeping/window.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/epoch3d/src/housekeeping/window.F90 b/epoch3d/src/housekeeping/window.F90 index 676f685ae..1f755b6c7 100644 --- a/epoch3d/src/housekeeping/window.F90 +++ b/epoch3d/src/housekeeping/window.F90 @@ -86,7 +86,7 @@ SUBROUTINE shift_window(window_shift_cells) CALL setup_grid_x - CALL remove_particles + !CALL remove_particles ! Shift fields around CALL shift_fields(window_shift_cells) From f78a0cd00ad23b35cac5a78440468fc27bfba002 Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Thu, 29 Oct 2020 18:59:10 +0100 Subject: [PATCH 027/106] Aligning moving window with I/O --- epoch2d/src/epoch2d.F90 | 7 +++--- epoch2d/src/housekeeping/window.F90 | 34 ++++++++++++++++++++++++++++- 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/epoch2d/src/epoch2d.F90 b/epoch2d/src/epoch2d.F90 index d76967e4f..3684d886d 100644 --- a/epoch2d/src/epoch2d.F90 +++ b/epoch2d/src/epoch2d.F90 @@ -163,7 +163,7 @@ PROGRAM pic ELSE time = time + dt / 2.0_num CALL update_eb_fields_final - CALL moving_window + CALL moving_window(step) END IF ELSE dt_store = dt @@ -264,12 +264,13 @@ PROGRAM pic IF ((step >= nsteps .AND. nsteps >= 0) & .OR. (time >= t_end) .OR. halt) EXIT - CALL output_routines(step) + !CALL output_routines(step) time = time + dt / 2.0_num CALL update_eb_fields_final - CALL moving_window + CALL moving_window(step) + CALL output_routines(step) #ifdef SCOREP_USER_ENABLE SCOREP_USER_REGION_END(main_loop) diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index 3c25e23f4..8fd1b63c3 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -435,13 +435,19 @@ END SUBROUTINE remove_particles - SUBROUTINE moving_window + SUBROUTINE moving_window(step, force_write) USE diagnostics #ifndef PER_SPECIES_WEIGHT REAL(num) :: window_shift_real, window_shift_steps INTEGER :: window_shift_cells, errcode = 0 INTEGER :: i, nchunks, nremainder + INTEGER, INTENT(INOUT) :: step + LOGICAL, INTENT(IN), OPTIONAL :: force_write + INTEGER, SAVE :: nstep_prev = -1 + INTEGER, SAVE :: last_step = -1 + LOGICAL :: force, writeout, print_arrays + INTEGER :: iprefix #endif IF (.NOT. move_window) RETURN @@ -467,6 +473,25 @@ SUBROUTINE moving_window window_shift_fraction = window_shift_fraction + dt * window_v_x / dx window_shift_cells = FLOOR(window_shift_fraction) + ! Check if an I/O is performed + writeout = .FALSE. + force = .FALSE. + IF (PRESENT(force_write)) force = force_write + + WRITE(*,'("Checkpoint 1")') + + IF (step == nstep_prev .AND. .NOT.force) THEN + writeout = .FALSE. + ELSE + DO iprefix = 1,SIZE(file_prefixes) + WRITE(*,'("Checkpoint 2")') + CALL io_test(iprefix, step, print_arrays, force, prefix_first_call) + IF (.NOT.print_arrays) CYCLE + writeout = .TRUE. + WRITE(*,'("Checkpoint 3")') + END DO + END IF + ! Allow for posibility of having jumped two cells at once IF (window_shift_cells > ng - 1) THEN window_shift_real = REAL(window_shift_cells, num) @@ -475,6 +500,13 @@ SUBROUTINE moving_window DO i = ng, window_shift_cells, ng ! CHECK IF THIS LOOP IS CALLED IF window_shift_cells < ng CALL shift_window(ng) END DO + WRITE(*,'("Checkpoint 4")') + IF (writeout) THEN + WRITE(*,'("Checkpoint 5")') + CALL shift_window(nremainder) + nremainder = 0 + END IF + CALL setup_bc_lists CALL particle_bcs window_shift_fraction = window_shift_fraction - window_shift_real & From aab0e877477a16018ebaed9699219f45b3577f06 Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Fri, 30 Oct 2020 08:01:08 +0100 Subject: [PATCH 028/106] Aligning moving window with I/O and checkpoints --- epoch2d/src/epoch2d.F90 | 4 +- epoch2d/src/housekeeping/window.F90 | 80 +++++++++++++++++++---------- 2 files changed, 56 insertions(+), 28 deletions(-) diff --git a/epoch2d/src/epoch2d.F90 b/epoch2d/src/epoch2d.F90 index 3684d886d..bb7351dbc 100644 --- a/epoch2d/src/epoch2d.F90 +++ b/epoch2d/src/epoch2d.F90 @@ -264,13 +264,13 @@ PROGRAM pic IF ((step >= nsteps .AND. nsteps >= 0) & .OR. (time >= t_end) .OR. halt) EXIT - !CALL output_routines(step) + CALL output_routines(step) time = time + dt / 2.0_num CALL update_eb_fields_final CALL moving_window(step) - CALL output_routines(step) + !CALL output_routines(step) #ifdef SCOREP_USER_ENABLE SCOREP_USER_REGION_END(main_loop) diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index 8fd1b63c3..cbcf4348f 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -435,14 +435,14 @@ END SUBROUTINE remove_particles - SUBROUTINE moving_window(step, force_write) + SUBROUTINE moving_window(future_step, force_write) USE diagnostics #ifndef PER_SPECIES_WEIGHT REAL(num) :: window_shift_real, window_shift_steps INTEGER :: window_shift_cells, errcode = 0 INTEGER :: i, nchunks, nremainder - INTEGER, INTENT(INOUT) :: step + INTEGER, INTENT(IN) :: future_step LOGICAL, INTENT(IN), OPTIONAL :: force_write INTEGER, SAVE :: nstep_prev = -1 INTEGER, SAVE :: last_step = -1 @@ -473,40 +473,68 @@ SUBROUTINE moving_window(step, force_write) window_shift_fraction = window_shift_fraction + dt * window_v_x / dx window_shift_cells = FLOOR(window_shift_fraction) - ! Check if an I/O is performed - writeout = .FALSE. - force = .FALSE. - IF (PRESENT(force_write)) force = force_write - - WRITE(*,'("Checkpoint 1")') - - IF (step == nstep_prev .AND. .NOT.force) THEN - writeout = .FALSE. - ELSE - DO iprefix = 1,SIZE(file_prefixes) - WRITE(*,'("Checkpoint 2")') - CALL io_test(iprefix, step, print_arrays, force, prefix_first_call) - IF (.NOT.print_arrays) CYCLE - writeout = .TRUE. - WRITE(*,'("Checkpoint 3")') - END DO - END IF + ! Check if an I/O is performed + writeout = .FALSE. + force = .FALSE. + IF (PRESENT(force_write)) force = force_write + + IF (rank == 0) THEN + WRITE(*,'("Initial mw check")') + END IF + + IF (future_step == nstep_prev .AND. .NOT.force) THEN + writeout = .FALSE. + ELSE + DO iprefix = 1,SIZE(file_prefixes) + IF (rank == 0) THEN + WRITE(*,'("Checking I/O")') + END IF + CALL io_test(iprefix, future_step, print_arrays, force, prefix_first_call) + IF (.NOT.print_arrays) CYCLE + writeout = .TRUE. + END DO + END IF + + IF(writeout) THEN + IF (rank == 0) THEN + WRITE(*,'("Alignment required")') + END IF + END IF + + nstep_prev = future_step + +! IF (window_shift_cells > 0 .AND. window_shift_cells < ng .AND. writeout) THEN +! +! window_shift_real = REAL(window_shift_cells, num) +! window_offset = window_offset + window_shift_real * dx +! nremainder = MOD(window_shift_cells, ng) +! CALL shift_window(nremainder) +! nremainder = 0 +! CALL setup_bc_lists +! CALL particle_bcs +! window_shift_fraction = window_shift_fraction - window_shift_real & +! + REAL(nremainder, num) +! IF (rank == 0) THEN +! WRITE(*,'("Performing alignment")') +! END IF +! END IF + ! Allow for posibility of having jumped two cells at once IF (window_shift_cells > ng - 1) THEN + window_shift_real = REAL(window_shift_cells, num) window_offset = window_offset + window_shift_real * dx nremainder = MOD(window_shift_cells, ng) DO i = ng, window_shift_cells, ng ! CHECK IF THIS LOOP IS CALLED IF window_shift_cells < ng CALL shift_window(ng) + IF (rank == 0) THEN + WRITE(*, '("Chunk shift of window")') + END IF END DO - WRITE(*,'("Checkpoint 4")') - IF (writeout) THEN - WRITE(*,'("Checkpoint 5")') - CALL shift_window(nremainder) - nremainder = 0 + IF (rank == 0 .AND. writeout) THEN + WRITE(*,'("Auto aligned output")') END IF - CALL setup_bc_lists CALL particle_bcs window_shift_fraction = window_shift_fraction - window_shift_real & From 1f9d6c8655bd3e9b68527203c749ca26bddbcdff Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Mon, 9 Nov 2020 13:30:25 +0100 Subject: [PATCH 029/106] Aligned moving window I/O with checkpoints --- epoch2d/src/deck/deck_io_block.F90 | 2 + epoch2d/src/housekeeping/setup.F90 | 4 + epoch2d/src/housekeeping/window.F90 | 217 ++++++++++++++++++++++++---- epoch2d/src/shared_data.F90 | 4 +- 4 files changed, 196 insertions(+), 31 deletions(-) diff --git a/epoch2d/src/deck/deck_io_block.F90 b/epoch2d/src/deck/deck_io_block.F90 index 0579c280e..30d1b3464 100644 --- a/epoch2d/src/deck/deck_io_block.F90 +++ b/epoch2d/src/deck/deck_io_block.F90 @@ -993,6 +993,7 @@ SUBROUTINE init_io_block(io_block) io_block%name = '' io_block%dt_snapshot = -1.0_num io_block%time_prev = 0.0_num + io_block%buffer_time_prev = 0.0_num io_block%time_first = 0.0_num io_block%dt_average = -1.0_num io_block%dt_min_average = -1.0_num @@ -1000,6 +1001,7 @@ SUBROUTINE init_io_block(io_block) io_block%average_time_start = -1.0_num io_block%nstep_snapshot = -1 io_block%nstep_prev = 0 + io_block%buffer_nstep_prev = 0 io_block%nstep_first = 0 io_block%nstep_average = -1 io_block%restart = .FALSE. diff --git a/epoch2d/src/housekeeping/setup.F90 b/epoch2d/src/housekeeping/setup.F90 index c8baaa0a1..27f20c28f 100644 --- a/epoch2d/src/housekeeping/setup.F90 +++ b/epoch2d/src/housekeeping/setup.F90 @@ -989,13 +989,17 @@ SUBROUTINE restart_data(step) END IF IF (io_block_list(i)%dt_snapshot > 0.0_num) THEN io_block_list(i)%time_prev = time + io_block_list(i)%buffer_time_prev = time ELSE io_block_list(i)%time_prev = 0.0_num + io_block_list(i)%buffer_time_prev = 0.0_num END IF IF (io_block_list(i)%nstep_snapshot > 0) THEN io_block_list(i)%nstep_prev = step + io_block_list(i)%buffer_nstep_prev = step ELSE io_block_list(i)%nstep_prev = 0 + io_block_list(i)%buffer_nstep_prev = 0 END IF io_block_list(i)%walltime_prev = time IF (ALLOCATED(io_block_list(i)%dump_at_nsteps)) THEN diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index cbcf4348f..a3325141f 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -435,19 +435,23 @@ END SUBROUTINE remove_particles - SUBROUTINE moving_window(future_step, force_write) + SUBROUTINE moving_window(step, force_write) USE diagnostics + USE deck_io_block #ifndef PER_SPECIES_WEIGHT REAL(num) :: window_shift_real, window_shift_steps INTEGER :: window_shift_cells, errcode = 0 INTEGER :: i, nchunks, nremainder - INTEGER, INTENT(IN) :: future_step + INTEGER, INTENT(IN) :: step + INTEGER :: future_step LOGICAL, INTENT(IN), OPTIONAL :: force_write INTEGER, SAVE :: nstep_prev = -1 INTEGER, SAVE :: last_step = -1 - LOGICAL :: force, writeout, print_arrays + LOGICAL :: force, writeout, print_arrays, dump INTEGER :: iprefix + INTEGER :: io, is, nstep_next = 0 + REAL(num) :: time0, time1, time_first #endif IF (.NOT. move_window) RETURN @@ -476,49 +480,204 @@ SUBROUTINE moving_window(future_step, force_write) ! Check if an I/O is performed writeout = .FALSE. force = .FALSE. + future_step = step + 1 IF (PRESENT(force_write)) force = force_write IF (rank == 0) THEN WRITE(*,'("Initial mw check")') END IF - IF (future_step == nstep_prev .AND. .NOT.force) THEN - writeout = .FALSE. - ELSE - DO iprefix = 1,SIZE(file_prefixes) - IF (rank == 0) THEN - WRITE(*,'("Checking I/O")') + ! Work out the time that the next dump will occur based on the + ! current timestep +! DO io = 1, n_io_blocks +! +! time0 = HUGE(1.0_num) +! time1 = HUGE(1.0_num) +! IF (io_block_list(io)%dt_snapshot >= 0.0_num) & +! time0 = io_block_list(io)%time_prev + io_block_list(io)%dt_snapshot +! IF (io_block_list(io)%nstep_snapshot >= 0) THEN +! nstep_next = io_block_list(io)%nstep_prev & +! + io_block_list(io)%nstep_snapshot +! time1 = time + dt * (nstep_next - (step + 1)) +! END IF +! +! IF (time0 < time1) THEN +! ! Next I/O dump based on dt_snapshot +! time_first = time0 +! IF (io_block_list(io)%dt_snapshot > 0 .AND. time >= time0) THEN +! ! Store the most recent output time that qualifies +! writeout = .TRUE. +! END IF +! ELSE +! ! Next I/O dump based on nstep_snapshot +! time_first = time1 +! IF (io_block_list(io)%nstep_snapshot > 0 & +! .AND. (step + 1 ) >= nstep_next) THEN +! ! Store the most recent output step that qualifies +! writeout = .TRUE. +! END IF +! END IF +! +! END DO + + DO io = 1, n_io_blocks + io_block_list(io)%dump = .FALSE. + + + time0 = io_block_list(io)%walltime_interval + IF (time0 > 0.0_num) THEN + IF (elapsed_time - io_block_list(io)%walltime_prev >= time0) THEN + io_block_list(io)%dump = .TRUE. + io_block_list(io)%walltime_prev = elapsed_time + END IF END IF - CALL io_test(iprefix, future_step, print_arrays, force, prefix_first_call) - IF (.NOT.print_arrays) CYCLE - writeout = .TRUE. - END DO + + IF (ALLOCATED(io_block_list(io)%dump_at_nsteps)) THEN + DO is = 1, SIZE(io_block_list(io)%dump_at_nsteps) + IF ((step + 1)>= io_block_list(io)%dump_at_nsteps(is)) THEN + io_block_list(io)%dump = .TRUE. + io_block_list(io)%dump_at_nsteps(is) = HUGE(1) + END IF + END DO END IF - IF(writeout) THEN + IF (ALLOCATED(io_block_list(io)%dump_at_times)) THEN + DO is = 1, SIZE(io_block_list(io)%dump_at_times) + IF (time >= io_block_list(io)%dump_at_times(is)) THEN + io_block_list(io)%dump = .TRUE. + io_block_list(io)%dump_at_times(is) = HUGE(1.0_num) + END IF + END DO + END IF + + IF (ALLOCATED(io_block_list(io)%dump_at_walltimes)) THEN + DO is = 1, SIZE(io_block_list(io)%dump_at_walltimes) + IF (elapsed_time >= io_block_list(io)%dump_at_walltimes(is)) THEN + io_block_list(io)%dump = .TRUE. + io_block_list(io)%dump_at_walltimes(is) = HUGE(1.0_num) + END IF + END DO + END IF + + ! Work out the time that the next dump will occur based on the + ! current timestep + time0 = HUGE(1.0_num) + time1 = HUGE(1.0_num) + IF (io_block_list(io)%dt_snapshot >= 0.0_num) & + time0 = io_block_list(io)%buffer_time_prev + io_block_list(io)%dt_snapshot + IF (io_block_list(io)%nstep_snapshot >= 0) THEN + nstep_next = io_block_list(io)%buffer_nstep_prev + 1 & + + io_block_list(io)%nstep_snapshot + time1 = time + dt * (nstep_next - step) + END IF + + IF (time0 < time1) THEN + ! Next I/O dump based on dt_snapshot + time_first = time0 IF (rank == 0) THEN - WRITE(*,'("Alignment required")') + print *, "time = ", time, "and time0 = ", time0 + END IF + IF (io_block_list(io)%dt_snapshot > 0 .AND. time >= time0) THEN + ! Store the most recent output time that qualifies + DO + time0 = io_block_list(io)%buffer_time_prev + io_block_list(io)%dt_snapshot + IF (time0 > time) EXIT + io_block_list(io)%buffer_time_prev = time0 + END DO + IF (rank == 0) THEN + print *, "dt_snapshot condition for aligned output true" + END IF + dump = .TRUE. + IF (dump .AND. time < io_block_list(io)%time_start) dump = .FALSE. + IF (dump .AND. time > io_block_list(io)%time_stop) dump = .FALSE. + IF (dump .AND. (step) < io_block_list(io)%nstep_start) dump = .FALSE. + IF (dump .AND. (step) > io_block_list(io)%nstep_stop) dump = .FALSE. + IF (dump .AND. time < time_start) dump = .FALSE. + IF (dump .AND. time > time_stop) dump = .FALSE. + IF (dump .AND. (step) < nstep_start) dump = .FALSE. + IF (dump .AND. (step) > nstep_stop) dump = .FALSE. + IF (dump) writeout = .TRUE. +! IF (rank == 0) THEN +! print *, "dt_snapshot condition for aligned output true" +! END IF + END IF + ELSE + ! Next I/O dump based on nstep_snapshot + time_first = time1 + IF (io_block_list(io)%nstep_snapshot > 0 & + .AND. (step) >= nstep_next) THEN + ! Store the most recent output step that qualifies + DO + nstep_next = io_block_list(io)%buffer_nstep_prev & + + io_block_list(io)%nstep_snapshot + IF (nstep_next > step) EXIT + io_block_list(io)%buffer_nstep_prev = nstep_next + END DO + dump = .TRUE. + IF (dump .AND. time < io_block_list(io)%time_start) dump = .FALSE. + IF (dump .AND. time > io_block_list(io)%time_stop) dump = .FALSE. + IF (dump .AND. (step) < io_block_list(io)%nstep_start) dump = .FALSE. + IF (dump .AND. (step) > io_block_list(io)%nstep_stop) dump = .FALSE. + IF (dump .AND. time < time_start) dump = .FALSE. + IF (dump .AND. time > time_stop) dump = .FALSE. + IF (dump .AND. (step) < nstep_start) dump = .FALSE. + IF (dump .AND. (step) > nstep_stop) dump = .FALSE. + IF (dump) writeout = .TRUE. + IF (rank == 0) THEN + print *, "nstep_snapshot condition for aligned output true" + END IF END IF END IF - nstep_prev = future_step + END DO -! IF (window_shift_cells > 0 .AND. window_shift_cells < ng .AND. writeout) THEN -! -! window_shift_real = REAL(window_shift_cells, num) -! window_offset = window_offset + window_shift_real * dx -! nremainder = MOD(window_shift_cells, ng) -! CALL shift_window(nremainder) -! nremainder = 0 -! CALL setup_bc_lists -! CALL particle_bcs -! window_shift_fraction = window_shift_fraction - window_shift_real & -! + REAL(nremainder, num) -! IF (rank == 0) THEN -! WRITE(*,'("Performing alignment")') + + + +! DO io = 1, n_io_blocks +! IF(io_block_list(io)%dump) THEN +! writeout = .TRUE. ! END IF +! END DO + + +! IF (future_step == nstep_prev .AND. .NOT.force) THEN +! writeout = .FALSE. +! ELSE +! DO iprefix = 1,SIZE(file_prefixes) +! IF (rank == 0) THEN +! WRITE(*,'("Checking I/O")') +! END IF +! CALL io_test(iprefix, future_step, print_arrays, force, prefix_first_call) +! IF (.NOT.print_arrays) CYCLE +! writeout = .TRUE. +! END DO ! END IF + IF(writeout) THEN + IF (rank == 0) THEN + WRITE(*,'("Alignment required")') + END IF + END IF + +! nstep_prev = future_step + + IF (window_shift_cells > 0 .AND. window_shift_cells < ng .AND. writeout) THEN + + window_shift_real = REAL(window_shift_cells, num) + window_offset = window_offset + window_shift_real * dx + nremainder = MOD(window_shift_cells, ng) + CALL shift_window(nremainder) + nremainder = 0 + CALL setup_bc_lists + CALL particle_bcs + window_shift_fraction = window_shift_fraction - window_shift_real & + + REAL(nremainder, num) + IF (rank == 0) THEN + WRITE(*,'("Performing alignment")') + END IF + END IF + ! Allow for posibility of having jumped two cells at once IF (window_shift_cells > ng - 1) THEN diff --git a/epoch2d/src/shared_data.F90 b/epoch2d/src/shared_data.F90 index be1cfa42c..9052ba8da 100644 --- a/epoch2d/src/shared_data.F90 +++ b/epoch2d/src/shared_data.F90 @@ -306,7 +306,7 @@ MODULE shared_data TYPE io_block_type CHARACTER(LEN=string_length) :: name - REAL(num) :: dt_snapshot, time_prev, time_first + REAL(num) :: dt_snapshot, time_prev, time_first, buffer_time_prev REAL(num) :: dt_average, dt_min_average, average_time, average_time_start REAL(num) :: time_start, time_stop REAL(num) :: walltime_interval, walltime_prev @@ -314,7 +314,7 @@ MODULE shared_data REAL(num), ALLOCATABLE :: dump_at_times(:) REAL(num), ALLOCATABLE :: dump_at_walltimes(:) INTEGER, ALLOCATABLE :: dump_at_nsteps(:) - INTEGER :: nstep_snapshot, nstep_prev, nstep_first, nstep_average + INTEGER :: nstep_snapshot, nstep_prev, nstep_first, nstep_average, buffer_nstep_prev INTEGER :: nstep_start, nstep_stop, dump_cycle, prefix_index INTEGER :: dump_cycle_first_index LOGICAL :: restart, dump, any_average, dump_first, dump_last From 0597bec798ad13c1fc72f2817ae70ef34ace536a Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Mon, 9 Nov 2020 14:20:56 +0100 Subject: [PATCH 030/106] Bugfix for aligned I/O with checkpoints --- epoch2d/src/housekeeping/window.F90 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index a3325141f..e834259fc 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -566,7 +566,7 @@ SUBROUTINE moving_window(step, force_write) IF (io_block_list(io)%dt_snapshot >= 0.0_num) & time0 = io_block_list(io)%buffer_time_prev + io_block_list(io)%dt_snapshot IF (io_block_list(io)%nstep_snapshot >= 0) THEN - nstep_next = io_block_list(io)%buffer_nstep_prev + 1 & + nstep_next = io_block_list(io)%buffer_nstep_prev & + io_block_list(io)%nstep_snapshot time1 = time + dt * (nstep_next - step) END IF @@ -577,7 +577,7 @@ SUBROUTINE moving_window(step, force_write) IF (rank == 0) THEN print *, "time = ", time, "and time0 = ", time0 END IF - IF (io_block_list(io)%dt_snapshot > 0 .AND. time >= time0) THEN + IF (io_block_list(io)%dt_snapshot > 0 .AND. (time + dt) >= time0) THEN ! Store the most recent output time that qualifies DO time0 = io_block_list(io)%buffer_time_prev + io_block_list(io)%dt_snapshot @@ -605,7 +605,7 @@ SUBROUTINE moving_window(step, force_write) ! Next I/O dump based on nstep_snapshot time_first = time1 IF (io_block_list(io)%nstep_snapshot > 0 & - .AND. (step) >= nstep_next) THEN + .AND. (step + 1) >= nstep_next) THEN ! Store the most recent output step that qualifies DO nstep_next = io_block_list(io)%buffer_nstep_prev & From 8482eafbac6dd90a4d600c5cf062546f004ffbb2 Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Mon, 9 Nov 2020 15:14:07 +0100 Subject: [PATCH 031/106] Cleaned version: Aligned moving window with I/O --- epoch2d/src/housekeeping/window.F90 | 235 ++++++---------------------- 1 file changed, 51 insertions(+), 184 deletions(-) diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index e834259fc..a00280ed0 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -433,131 +433,19 @@ SUBROUTINE remove_particles END SUBROUTINE remove_particles #endif + SUBROUTINE mw_io_test(step, dump) + USE diagnostics + USE deck_io_block - - SUBROUTINE moving_window(step, force_write) - USE diagnostics - USE deck_io_block - -#ifndef PER_SPECIES_WEIGHT - REAL(num) :: window_shift_real, window_shift_steps - INTEGER :: window_shift_cells, errcode = 0 - INTEGER :: i, nchunks, nremainder INTEGER, INTENT(IN) :: step - INTEGER :: future_step - LOGICAL, INTENT(IN), OPTIONAL :: force_write - INTEGER, SAVE :: nstep_prev = -1 - INTEGER, SAVE :: last_step = -1 - LOGICAL :: force, writeout, print_arrays, dump - INTEGER :: iprefix + LOGICAL, INTENT(OUT) :: dump INTEGER :: io, is, nstep_next = 0 REAL(num) :: time0, time1, time_first -#endif - - IF (.NOT. move_window) RETURN - -#ifndef PER_SPECIES_WEIGHT - IF (.NOT. window_started) THEN - IF (time >= window_start_time .AND. time < window_stop_time) THEN - bc_field(c_bd_x_min) = bc_x_min_after_move - bc_field(c_bd_x_max) = bc_x_max_after_move - bc_field(c_bd_y_min) = bc_y_min_after_move - bc_field(c_bd_y_max) = bc_y_max_after_move - CALL setup_boundaries - IF (.NOT.ic_from_restart) window_shift_fraction = 0.0_num - window_started = .TRUE. - END IF - END IF - - ! If we have a moving window then update the window position - IF (window_started) THEN - IF (time >= window_stop_time) RETURN - IF (use_window_stack) window_v_x = evaluate(window_v_x_stack, errcode) - IF (window_v_x <= 0.0_num) RETURN - window_shift_fraction = window_shift_fraction + dt * window_v_x / dx - window_shift_cells = FLOOR(window_shift_fraction) - - ! Check if an I/O is performed - writeout = .FALSE. - force = .FALSE. - future_step = step + 1 - IF (PRESENT(force_write)) force = force_write - - IF (rank == 0) THEN - WRITE(*,'("Initial mw check")') - END IF - - ! Work out the time that the next dump will occur based on the - ! current timestep -! DO io = 1, n_io_blocks -! -! time0 = HUGE(1.0_num) -! time1 = HUGE(1.0_num) -! IF (io_block_list(io)%dt_snapshot >= 0.0_num) & -! time0 = io_block_list(io)%time_prev + io_block_list(io)%dt_snapshot -! IF (io_block_list(io)%nstep_snapshot >= 0) THEN -! nstep_next = io_block_list(io)%nstep_prev & -! + io_block_list(io)%nstep_snapshot -! time1 = time + dt * (nstep_next - (step + 1)) -! END IF -! -! IF (time0 < time1) THEN -! ! Next I/O dump based on dt_snapshot -! time_first = time0 -! IF (io_block_list(io)%dt_snapshot > 0 .AND. time >= time0) THEN -! ! Store the most recent output time that qualifies -! writeout = .TRUE. -! END IF -! ELSE -! ! Next I/O dump based on nstep_snapshot -! time_first = time1 -! IF (io_block_list(io)%nstep_snapshot > 0 & -! .AND. (step + 1 ) >= nstep_next) THEN -! ! Store the most recent output step that qualifies -! writeout = .TRUE. -! END IF -! END IF -! -! END DO + dump = .FALSE. DO io = 1, n_io_blocks - io_block_list(io)%dump = .FALSE. - time0 = io_block_list(io)%walltime_interval - IF (time0 > 0.0_num) THEN - IF (elapsed_time - io_block_list(io)%walltime_prev >= time0) THEN - io_block_list(io)%dump = .TRUE. - io_block_list(io)%walltime_prev = elapsed_time - END IF - END IF - - IF (ALLOCATED(io_block_list(io)%dump_at_nsteps)) THEN - DO is = 1, SIZE(io_block_list(io)%dump_at_nsteps) - IF ((step + 1)>= io_block_list(io)%dump_at_nsteps(is)) THEN - io_block_list(io)%dump = .TRUE. - io_block_list(io)%dump_at_nsteps(is) = HUGE(1) - END IF - END DO - END IF - - IF (ALLOCATED(io_block_list(io)%dump_at_times)) THEN - DO is = 1, SIZE(io_block_list(io)%dump_at_times) - IF (time >= io_block_list(io)%dump_at_times(is)) THEN - io_block_list(io)%dump = .TRUE. - io_block_list(io)%dump_at_times(is) = HUGE(1.0_num) - END IF - END DO - END IF - - IF (ALLOCATED(io_block_list(io)%dump_at_walltimes)) THEN - DO is = 1, SIZE(io_block_list(io)%dump_at_walltimes) - IF (elapsed_time >= io_block_list(io)%dump_at_walltimes(is)) THEN - io_block_list(io)%dump = .TRUE. - io_block_list(io)%dump_at_walltimes(is) = HUGE(1.0_num) - END IF - END DO - END IF ! Work out the time that the next dump will occur based on the ! current timestep @@ -566,17 +454,14 @@ SUBROUTINE moving_window(step, force_write) IF (io_block_list(io)%dt_snapshot >= 0.0_num) & time0 = io_block_list(io)%buffer_time_prev + io_block_list(io)%dt_snapshot IF (io_block_list(io)%nstep_snapshot >= 0) THEN - nstep_next = io_block_list(io)%buffer_nstep_prev & + nstep_next = io_block_list(io)%buffer_nstep_prev & + io_block_list(io)%nstep_snapshot - time1 = time + dt * (nstep_next - step) + time1 = time + dt * (nstep_next - step) END IF IF (time0 < time1) THEN ! Next I/O dump based on dt_snapshot time_first = time0 - IF (rank == 0) THEN - print *, "time = ", time, "and time0 = ", time0 - END IF IF (io_block_list(io)%dt_snapshot > 0 .AND. (time + dt) >= time0) THEN ! Store the most recent output time that qualifies DO @@ -584,22 +469,7 @@ SUBROUTINE moving_window(step, force_write) IF (time0 > time) EXIT io_block_list(io)%buffer_time_prev = time0 END DO - IF (rank == 0) THEN - print *, "dt_snapshot condition for aligned output true" - END IF dump = .TRUE. - IF (dump .AND. time < io_block_list(io)%time_start) dump = .FALSE. - IF (dump .AND. time > io_block_list(io)%time_stop) dump = .FALSE. - IF (dump .AND. (step) < io_block_list(io)%nstep_start) dump = .FALSE. - IF (dump .AND. (step) > io_block_list(io)%nstep_stop) dump = .FALSE. - IF (dump .AND. time < time_start) dump = .FALSE. - IF (dump .AND. time > time_stop) dump = .FALSE. - IF (dump .AND. (step) < nstep_start) dump = .FALSE. - IF (dump .AND. (step) > nstep_stop) dump = .FALSE. - IF (dump) writeout = .TRUE. -! IF (rank == 0) THEN -! print *, "dt_snapshot condition for aligned output true" -! END IF END IF ELSE ! Next I/O dump based on nstep_snapshot @@ -614,68 +484,68 @@ SUBROUTINE moving_window(step, force_write) io_block_list(io)%buffer_nstep_prev = nstep_next END DO dump = .TRUE. - IF (dump .AND. time < io_block_list(io)%time_start) dump = .FALSE. - IF (dump .AND. time > io_block_list(io)%time_stop) dump = .FALSE. - IF (dump .AND. (step) < io_block_list(io)%nstep_start) dump = .FALSE. - IF (dump .AND. (step) > io_block_list(io)%nstep_stop) dump = .FALSE. - IF (dump .AND. time < time_start) dump = .FALSE. - IF (dump .AND. time > time_stop) dump = .FALSE. - IF (dump .AND. (step) < nstep_start) dump = .FALSE. - IF (dump .AND. (step) > nstep_stop) dump = .FALSE. - IF (dump) writeout = .TRUE. - IF (rank == 0) THEN - print *, "nstep_snapshot condition for aligned output true" - END IF END IF END IF END DO + END SUBROUTINE mw_io_test + + SUBROUTINE moving_window(step) + USE diagnostics + USE deck_io_block + +#ifndef PER_SPECIES_WEIGHT + REAL(num) :: window_shift_real, window_shift_steps + INTEGER :: window_shift_cells, errcode = 0 + INTEGER :: i, nremainder + INTEGER, INTENT(IN) :: step + LOGICAL :: dump +! INTEGER :: io, is, nstep_next = 0 +! REAL(num) :: time0, time1, time_first +#endif + IF (.NOT. move_window) RETURN -! DO io = 1, n_io_blocks -! IF(io_block_list(io)%dump) THEN -! writeout = .TRUE. -! END IF -! END DO - - -! IF (future_step == nstep_prev .AND. .NOT.force) THEN -! writeout = .FALSE. -! ELSE -! DO iprefix = 1,SIZE(file_prefixes) -! IF (rank == 0) THEN -! WRITE(*,'("Checking I/O")') -! END IF -! CALL io_test(iprefix, future_step, print_arrays, force, prefix_first_call) -! IF (.NOT.print_arrays) CYCLE -! writeout = .TRUE. -! END DO -! END IF - - IF(writeout) THEN - IF (rank == 0) THEN - WRITE(*,'("Alignment required")') - END IF +#ifndef PER_SPECIES_WEIGHT + IF (.NOT. window_started) THEN + IF (time >= window_start_time .AND. time < window_stop_time) THEN + bc_field(c_bd_x_min) = bc_x_min_after_move + bc_field(c_bd_x_max) = bc_x_max_after_move + bc_field(c_bd_y_min) = bc_y_min_after_move + bc_field(c_bd_y_max) = bc_y_max_after_move + CALL setup_boundaries + IF (.NOT.ic_from_restart) window_shift_fraction = 0.0_num + window_started = .TRUE. END IF + END IF + + ! If we have a moving window then update the window position + IF (window_started) THEN + IF (time >= window_stop_time) RETURN + IF (use_window_stack) window_v_x = evaluate(window_v_x_stack, errcode) + IF (window_v_x <= 0.0_num) RETURN + window_shift_fraction = window_shift_fraction + dt * window_v_x / dx + window_shift_cells = FLOOR(window_shift_fraction) -! nstep_prev = future_step + CALL mw_io_test(step, dump) - IF (window_shift_cells > 0 .AND. window_shift_cells < ng .AND. writeout) THEN + + IF (window_shift_cells > 0 .AND. window_shift_cells < ng .AND. dump) THEN window_shift_real = REAL(window_shift_cells, num) window_offset = window_offset + window_shift_real * dx nremainder = MOD(window_shift_cells, ng) +! IF(rank == 0) THEN +! print *, "Performing alignment" +! END IF CALL shift_window(nremainder) nremainder = 0 CALL setup_bc_lists CALL particle_bcs window_shift_fraction = window_shift_fraction - window_shift_real & + REAL(nremainder, num) - IF (rank == 0) THEN - WRITE(*,'("Performing alignment")') - END IF END IF @@ -687,13 +557,10 @@ SUBROUTINE moving_window(step, force_write) nremainder = MOD(window_shift_cells, ng) DO i = ng, window_shift_cells, ng ! CHECK IF THIS LOOP IS CALLED IF window_shift_cells < ng CALL shift_window(ng) - IF (rank == 0) THEN - WRITE(*, '("Chunk shift of window")') - END IF END DO - IF (rank == 0 .AND. writeout) THEN - WRITE(*,'("Auto aligned output")') - END IF +! IF(rank == 0 .AND. dump) THEN +! print *, "Auto-aligned output" +! END IF CALL setup_bc_lists CALL particle_bcs window_shift_fraction = window_shift_fraction - window_shift_real & From fd00fc1e25240f2cf36f3c654f76a9bf67e80b9a Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Fri, 13 Nov 2020 14:25:07 +0100 Subject: [PATCH 032/106] Aligned moving window with I/O in epoch3d --- epoch3d/Makefile | 6 +- epoch3d/src/deck/deck_io_block.F90 | 2 + epoch3d/src/housekeeping/setup.F90 | 6 +- epoch3d/src/housekeeping/window.F90 | 87 ++++++++++++++++++++++++++++- epoch3d/src/io/diagnostics.F90 | 2 +- epoch3d/src/shared_data.F90 | 5 +- 6 files changed, 99 insertions(+), 9 deletions(-) diff --git a/epoch3d/Makefile b/epoch3d/Makefile index eb801be70..770052e2c 100644 --- a/epoch3d/Makefile +++ b/epoch3d/Makefile @@ -496,7 +496,7 @@ deck_window_block.o: deck_window_block.f90 strings_advanced.o deltaf_loader.o: deltaf_loader.F90 shared_data.o diagnostics.o: diagnostics.F90 antennae.o calc_df.o constants.o deck.o \ deck_io_block.o dist_fn.o evaluate.o iterators.o particle_id_hash.o probes.o \ - setup.o strings.o timer.o version_data.o window.o $(SDFMOD) $(ENCODED_SOURCE) + setup.o strings.o timer.o version_data.o $(SDFMOD) $(ENCODED_SOURCE) dist_fn.o: dist_fn.F90 mpi_subtype_control.o particles.o $(SDFMOD) epoch3d.o: epoch3d.F90 antennae.o balance.o bremsstrahlung.o calc_df.o \ collisions.o current_smooth.o deck.o diagnostics.o fields.o finish.o \ @@ -538,7 +538,7 @@ redblack_module.o: redblack_module.f90 partlist.o setup.o: setup.F90 antennae.o balance.o boundary.o constants.o fields.o \ helper.o injectors.o laser.o mpi_routines.o mpi_subtype_control.o \ particle_id_hash.o shunt.o split_particle.o timer.o version_data.o welcome.o \ - window.o $(SDFMOD) + $(SDFMOD) shape_functions.o: shape_functions.F90 constants.o shared_data.o: shared_data.F90 constants.o $(SDFMOD) shunt.o: shunt.F90 evaluator_blocks.o tokenizer_blocks.o utilities.o @@ -553,4 +553,4 @@ tokenizer_blocks.o: tokenizer_blocks.f90 strings.o utilities.o: utilities.f90 constants.o shared_data.o version_data.o: version_data.F90 $(SRCDIR)/COMMIT welcome.o: welcome.F90 shared_data.o terminal_controls.o version_data.o -window.o: window.F90 boundary.o evaluate.o partlist.o +window.o: window.F90 boundary.o diagnostics.o evaluate.o partlist.o diff --git a/epoch3d/src/deck/deck_io_block.F90 b/epoch3d/src/deck/deck_io_block.F90 index dae7e80d5..3d20c57c4 100644 --- a/epoch3d/src/deck/deck_io_block.F90 +++ b/epoch3d/src/deck/deck_io_block.F90 @@ -1004,6 +1004,7 @@ SUBROUTINE init_io_block(io_block) io_block%name = '' io_block%dt_snapshot = -1.0_num io_block%time_prev = 0.0_num + io_block%buffer_time_prev = 0.0_num io_block%time_first = 0.0_num io_block%dt_average = -1.0_num io_block%dt_min_average = -1.0_num @@ -1011,6 +1012,7 @@ SUBROUTINE init_io_block(io_block) io_block%average_time_start = -1.0_num io_block%nstep_snapshot = -1 io_block%nstep_prev = 0 + io_block%buffer_nstep_prev = 0 io_block%nstep_first = 0 io_block%nstep_average = -1 io_block%restart = .FALSE. diff --git a/epoch3d/src/housekeeping/setup.F90 b/epoch3d/src/housekeeping/setup.F90 index b0e499d6f..7b5015c66 100644 --- a/epoch3d/src/housekeeping/setup.F90 +++ b/epoch3d/src/housekeeping/setup.F90 @@ -23,7 +23,7 @@ MODULE setup USE shunt USE laser USE injectors - USE window +! USE window USE timer USE helper USE balance @@ -1060,13 +1060,17 @@ SUBROUTINE restart_data(step) END IF IF (io_block_list(i)%dt_snapshot > 0.0_num) THEN io_block_list(i)%time_prev = time + io_block_list(i)%buffer_time_prev = time ELSE io_block_list(i)%time_prev = 0.0_num + io_block_list(i)%buffer_time_prev = 0.0_num END IF IF (io_block_list(i)%nstep_snapshot > 0) THEN io_block_list(i)%nstep_prev = step + io_block_list(i)%buffer_nstep_prev = step ELSE io_block_list(i)%nstep_prev = 0 + io_block_list(i)%buffer_nstep_prev = 0 END IF io_block_list(i)%walltime_prev = time IF (ALLOCATED(io_block_list(i)%dump_at_nsteps)) THEN diff --git a/epoch3d/src/housekeeping/window.F90 b/epoch3d/src/housekeeping/window.F90 index 1f755b6c7..7e858dfc6 100644 --- a/epoch3d/src/housekeeping/window.F90 +++ b/epoch3d/src/housekeeping/window.F90 @@ -22,7 +22,7 @@ MODULE window IMPLICIT NONE REAL(num), ALLOCATABLE :: density(:,:), temperature(:,:,:), drift(:,:,:) - REAL(num), SAVE :: window_shift_fraction +! REAL(num), SAVE :: window_shift_fraction CONTAINS @@ -486,14 +486,77 @@ SUBROUTINE remove_particles END SUBROUTINE remove_particles #endif + SUBROUTINE mw_io_test(step, dump) +! USE diagnostics + USE deck_io_block + + INTEGER, INTENT(IN) :: step + LOGICAL, INTENT(OUT) :: dump + INTEGER :: io, is, nstep_next = 0 + REAL(num) :: time0, time1, time_first + + dump = .FALSE. + DO io = 1, n_io_blocks + + time0 = io_block_list(io)%walltime_interval + + ! Work out the time that the next dump will occur based on the + ! current timestep + time0 = HUGE(1.0_num) + time1 = HUGE(1.0_num) + IF (io_block_list(io)%dt_snapshot >= 0.0_num) & + time0 = io_block_list(io)%buffer_time_prev + io_block_list(io)%dt_snapshot + IF (io_block_list(io)%nstep_snapshot >= 0) THEN + nstep_next = io_block_list(io)%buffer_nstep_prev & + + io_block_list(io)%nstep_snapshot + time1 = time + dt * (nstep_next - step) + END IF + + IF (time0 < time1) THEN + ! Next I/O dump based on dt_snapshot + time_first = time0 + IF (io_block_list(io)%dt_snapshot > 0 .AND. (time + dt) >= time0) THEN + ! Store the most recent output time that qualifies + DO + time0 = io_block_list(io)%buffer_time_prev + io_block_list(io)%dt_snapshot + IF (time0 > time) EXIT + io_block_list(io)%buffer_time_prev = time0 + END DO + dump = .TRUE. + END IF + ELSE + ! Next I/O dump based on nstep_snapshot + time_first = time1 + IF (io_block_list(io)%nstep_snapshot > 0 & + .AND. (step + 1) >= nstep_next) THEN + ! Store the most recent output step that qualifies + DO + nstep_next = io_block_list(io)%buffer_nstep_prev & + + io_block_list(io)%nstep_snapshot + IF (nstep_next > step) EXIT + io_block_list(io)%buffer_nstep_prev = nstep_next + END DO + dump = .TRUE. + END IF + END IF + + END DO + + END SUBROUTINE mw_io_test + - SUBROUTINE moving_window + + SUBROUTINE moving_window(step) +! USE diagnostics + USE deck_io_block #ifndef PER_SPECIES_WEIGHT REAL(num) :: window_shift_real INTEGER :: window_shift_cells, errcode = 0 INTEGER :: i, nremainder + INTEGER, INTENT(IN) :: step + LOGICAL :: dump #endif IF (.NOT. move_window) RETURN @@ -520,6 +583,26 @@ SUBROUTINE moving_window IF (window_v_x <= 0.0_num) RETURN window_shift_fraction = window_shift_fraction + dt * window_v_x / dx window_shift_cells = FLOOR(window_shift_fraction) + + CALL mw_io_test(step, dump) + + + IF (window_shift_cells > 0 .AND. window_shift_cells < ng .AND. dump) THEN + + window_shift_real = REAL(window_shift_cells, num) + window_offset = window_offset + window_shift_real * dx + nremainder = MOD(window_shift_cells, ng) +! IF(rank == 0) THEN +! print *, "Performing alignment" +! END IF + CALL shift_window(nremainder) + nremainder = 0 + CALL setup_bc_lists + CALL particle_bcs + window_shift_fraction = window_shift_fraction - window_shift_real & + + REAL(nremainder, num) + END IF + ! Allow for posibility of having jumped two cells at once IF (window_shift_cells > ng - 1) THEN window_shift_real = REAL(window_shift_cells, num) diff --git a/epoch3d/src/io/diagnostics.F90 b/epoch3d/src/io/diagnostics.F90 index 770694b20..7a5787dc1 100644 --- a/epoch3d/src/io/diagnostics.F90 +++ b/epoch3d/src/io/diagnostics.F90 @@ -27,7 +27,7 @@ MODULE diagnostics USE setup USE deck_io_block USE strings - USE window +! USE window USE timer USE antennae USE particle_id_hash_mod diff --git a/epoch3d/src/shared_data.F90 b/epoch3d/src/shared_data.F90 index 418426673..4b6bd4229 100644 --- a/epoch3d/src/shared_data.F90 +++ b/epoch3d/src/shared_data.F90 @@ -307,7 +307,7 @@ MODULE shared_data TYPE io_block_type CHARACTER(LEN=string_length) :: name - REAL(num) :: dt_snapshot, time_prev, time_first + REAL(num) :: dt_snapshot, time_prev, time_first, buffer_time_prev REAL(num) :: dt_average, dt_min_average, average_time, average_time_start REAL(num) :: time_start, time_stop REAL(num) :: walltime_interval, walltime_prev @@ -315,7 +315,7 @@ MODULE shared_data REAL(num), ALLOCATABLE :: dump_at_times(:) REAL(num), ALLOCATABLE :: dump_at_walltimes(:) INTEGER, ALLOCATABLE :: dump_at_nsteps(:) - INTEGER :: nstep_snapshot, nstep_prev, nstep_first, nstep_average + INTEGER :: nstep_snapshot, nstep_prev, nstep_first, nstep_average, buffer_nstep_prev INTEGER :: nstep_start, nstep_stop, dump_cycle, prefix_index INTEGER :: dump_cycle_first_index LOGICAL :: restart, dump, any_average, dump_first, dump_last @@ -591,6 +591,7 @@ MODULE shared_data INTEGER :: bc_z_min_after_move = c_bc_null INTEGER :: bc_z_max_after_move = c_bc_null REAL(num) :: window_offset + REAL(num) :: window_shift_fraction #ifdef PHOTONS !---------------------------------------------------------------------------- From 60df12d2bdaa512f98b6b46089926fde2360c9c4 Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Fri, 13 Nov 2020 14:44:35 +0100 Subject: [PATCH 033/106] Cleaned dependencies --- epoch2d/Makefile | 6 +++--- epoch2d/src/housekeeping/setup.F90 | 1 + epoch2d/src/housekeeping/window.F90 | 5 +++-- epoch2d/src/io/diagnostics.F90 | 1 + epoch2d/src/shared_data.F90 | 2 +- epoch3d/Makefile | 6 +++--- epoch3d/src/housekeeping/setup.F90 | 2 +- epoch3d/src/housekeeping/window.F90 | 2 +- epoch3d/src/io/diagnostics.F90 | 2 +- epoch3d/src/shared_data.F90 | 2 +- 10 files changed, 16 insertions(+), 13 deletions(-) diff --git a/epoch2d/Makefile b/epoch2d/Makefile index 547459187..8ab2ba5e1 100644 --- a/epoch2d/Makefile +++ b/epoch2d/Makefile @@ -496,7 +496,7 @@ deck_window_block.o: deck_window_block.f90 strings_advanced.o deltaf_loader.o: deltaf_loader.F90 shared_data.o diagnostics.o: diagnostics.F90 antennae.o calc_df.o constants.o deck.o \ deck_io_block.o dist_fn.o evaluate.o iterators.o particle_id_hash.o probes.o \ - setup.o strings.o timer.o version_data.o $(SDFMOD) $(ENCODED_SOURCE) + setup.o strings.o timer.o version_data.o window.o $(SDFMOD) $(ENCODED_SOURCE) dist_fn.o: dist_fn.F90 mpi_subtype_control.o particles.o $(SDFMOD) epoch2d.o: epoch2d.F90 antennae.o balance.o bremsstrahlung.o calc_df.o \ collisions.o current_smooth.o deck.o diagnostics.o fields.o finish.o \ @@ -538,7 +538,7 @@ redblack_module.o: redblack_module.f90 partlist.o setup.o: setup.F90 antennae.o balance.o boundary.o constants.o fields.o \ helper.o injectors.o laser.o mpi_routines.o mpi_subtype_control.o \ particle_id_hash.o shunt.o split_particle.o timer.o version_data.o welcome.o \ - $(SDFMOD) + window.o $(SDFMOD) shape_functions.o: shape_functions.F90 constants.o shared_data.o: shared_data.F90 constants.o $(SDFMOD) shunt.o: shunt.F90 evaluator_blocks.o tokenizer_blocks.o utilities.o @@ -553,4 +553,4 @@ tokenizer_blocks.o: tokenizer_blocks.f90 strings.o utilities.o: utilities.f90 constants.o shared_data.o version_data.o: version_data.F90 $(SRCDIR)/COMMIT welcome.o: welcome.F90 shared_data.o terminal_controls.o version_data.o -window.o: window.F90 boundary.o diagnostics.o evaluate.o partlist.o +window.o: window.F90 boundary.o evaluate.o partlist.o diff --git a/epoch2d/src/housekeeping/setup.F90 b/epoch2d/src/housekeeping/setup.F90 index 27f20c28f..8b792585b 100644 --- a/epoch2d/src/housekeeping/setup.F90 +++ b/epoch2d/src/housekeeping/setup.F90 @@ -23,6 +23,7 @@ MODULE setup USE shunt USE laser USE injectors + USE window USE timer USE helper USE balance diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index a00280ed0..9a198257c 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -22,6 +22,7 @@ MODULE window IMPLICIT NONE REAL(num), ALLOCATABLE :: density(:), temperature(:,:), drift(:,:) + REAL(num), SAVE :: window_shift_fraction CONTAINS @@ -434,7 +435,7 @@ END SUBROUTINE remove_particles #endif SUBROUTINE mw_io_test(step, dump) - USE diagnostics +! USE diagnostics USE deck_io_block INTEGER, INTENT(IN) :: step @@ -493,7 +494,7 @@ END SUBROUTINE mw_io_test SUBROUTINE moving_window(step) - USE diagnostics +! USE diagnostics USE deck_io_block #ifndef PER_SPECIES_WEIGHT diff --git a/epoch2d/src/io/diagnostics.F90 b/epoch2d/src/io/diagnostics.F90 index aa6b4ab03..efbb9c69d 100644 --- a/epoch2d/src/io/diagnostics.F90 +++ b/epoch2d/src/io/diagnostics.F90 @@ -27,6 +27,7 @@ MODULE diagnostics USE setup USE deck_io_block USE strings + USE window USE timer USE antennae USE particle_id_hash_mod diff --git a/epoch2d/src/shared_data.F90 b/epoch2d/src/shared_data.F90 index 9052ba8da..7ba66f608 100644 --- a/epoch2d/src/shared_data.F90 +++ b/epoch2d/src/shared_data.F90 @@ -568,7 +568,7 @@ MODULE shared_data INTEGER :: bc_y_min_after_move = c_bc_null INTEGER :: bc_y_max_after_move = c_bc_null REAL(num) :: window_offset - REAL(num) :: window_shift_fraction +! REAL(num) :: window_shift_fraction #ifdef PHOTONS !---------------------------------------------------------------------------- diff --git a/epoch3d/Makefile b/epoch3d/Makefile index 770052e2c..eb801be70 100644 --- a/epoch3d/Makefile +++ b/epoch3d/Makefile @@ -496,7 +496,7 @@ deck_window_block.o: deck_window_block.f90 strings_advanced.o deltaf_loader.o: deltaf_loader.F90 shared_data.o diagnostics.o: diagnostics.F90 antennae.o calc_df.o constants.o deck.o \ deck_io_block.o dist_fn.o evaluate.o iterators.o particle_id_hash.o probes.o \ - setup.o strings.o timer.o version_data.o $(SDFMOD) $(ENCODED_SOURCE) + setup.o strings.o timer.o version_data.o window.o $(SDFMOD) $(ENCODED_SOURCE) dist_fn.o: dist_fn.F90 mpi_subtype_control.o particles.o $(SDFMOD) epoch3d.o: epoch3d.F90 antennae.o balance.o bremsstrahlung.o calc_df.o \ collisions.o current_smooth.o deck.o diagnostics.o fields.o finish.o \ @@ -538,7 +538,7 @@ redblack_module.o: redblack_module.f90 partlist.o setup.o: setup.F90 antennae.o balance.o boundary.o constants.o fields.o \ helper.o injectors.o laser.o mpi_routines.o mpi_subtype_control.o \ particle_id_hash.o shunt.o split_particle.o timer.o version_data.o welcome.o \ - $(SDFMOD) + window.o $(SDFMOD) shape_functions.o: shape_functions.F90 constants.o shared_data.o: shared_data.F90 constants.o $(SDFMOD) shunt.o: shunt.F90 evaluator_blocks.o tokenizer_blocks.o utilities.o @@ -553,4 +553,4 @@ tokenizer_blocks.o: tokenizer_blocks.f90 strings.o utilities.o: utilities.f90 constants.o shared_data.o version_data.o: version_data.F90 $(SRCDIR)/COMMIT welcome.o: welcome.F90 shared_data.o terminal_controls.o version_data.o -window.o: window.F90 boundary.o diagnostics.o evaluate.o partlist.o +window.o: window.F90 boundary.o evaluate.o partlist.o diff --git a/epoch3d/src/housekeeping/setup.F90 b/epoch3d/src/housekeeping/setup.F90 index 7b5015c66..2a6988cff 100644 --- a/epoch3d/src/housekeeping/setup.F90 +++ b/epoch3d/src/housekeeping/setup.F90 @@ -23,7 +23,7 @@ MODULE setup USE shunt USE laser USE injectors -! USE window + USE window USE timer USE helper USE balance diff --git a/epoch3d/src/housekeeping/window.F90 b/epoch3d/src/housekeeping/window.F90 index 7e858dfc6..114b3a3f1 100644 --- a/epoch3d/src/housekeeping/window.F90 +++ b/epoch3d/src/housekeeping/window.F90 @@ -22,7 +22,7 @@ MODULE window IMPLICIT NONE REAL(num), ALLOCATABLE :: density(:,:), temperature(:,:,:), drift(:,:,:) -! REAL(num), SAVE :: window_shift_fraction + REAL(num), SAVE :: window_shift_fraction CONTAINS diff --git a/epoch3d/src/io/diagnostics.F90 b/epoch3d/src/io/diagnostics.F90 index 7a5787dc1..770694b20 100644 --- a/epoch3d/src/io/diagnostics.F90 +++ b/epoch3d/src/io/diagnostics.F90 @@ -27,7 +27,7 @@ MODULE diagnostics USE setup USE deck_io_block USE strings -! USE window + USE window USE timer USE antennae USE particle_id_hash_mod diff --git a/epoch3d/src/shared_data.F90 b/epoch3d/src/shared_data.F90 index 4b6bd4229..c068a52ef 100644 --- a/epoch3d/src/shared_data.F90 +++ b/epoch3d/src/shared_data.F90 @@ -591,7 +591,7 @@ MODULE shared_data INTEGER :: bc_z_min_after_move = c_bc_null INTEGER :: bc_z_max_after_move = c_bc_null REAL(num) :: window_offset - REAL(num) :: window_shift_fraction +! REAL(num) :: window_shift_fraction #ifdef PHOTONS !---------------------------------------------------------------------------- From 1cd1cd85be402cfe08390dc80e0f1f7e13557880 Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Sun, 29 Nov 2020 17:03:21 +0100 Subject: [PATCH 034/106] Initial steps towards current vectorization --- epoch2d/src/current_deposition.F90 | 284 +++++++++++++++++++++++++++++ 1 file changed, 284 insertions(+) create mode 100644 epoch2d/src/current_deposition.F90 diff --git a/epoch2d/src/current_deposition.F90 b/epoch2d/src/current_deposition.F90 new file mode 100644 index 000000000..a46d4ff07 --- /dev/null +++ b/epoch2d/src/current_deposition.F90 @@ -0,0 +1,284 @@ + + + SUBROUTINE get_particle(part_data, n, part) + TYPE(particle_data), POINTER, INTENT(IN) :: part_data + INTEGER(i8), INTENT(IN) :: n + TYPE(particle), INTENT(INOUT) :: part + + IF (n >= 1 .AND. n <= part_data%count) THEN + part%pos = part_data%pos(n) + part%mom = part_data%mom(n) + part%mass = part_data%mass(n) + part%weight = part_data%weight(n) + part%charge = part_data%charge(n) + ENDIF + + END SUBROUTINE get_particle + + SUBROUTINE particle_sort + + ! This routine sorts the particles such that + ! the memory access to the particle list + ! is contigious + + END SUBROUTINE particle sort + + + SUBROUTINE current_deposition_VB_triangle + +#ifdef INTEL_VECTORISATION +!dir$ attributes align:64 :: gx +!dir$ attributes align:64 :: gy +!dir$ attributes align:64 :: gz +#endif + +#ifdef INTEL_VECTORISATION +!dir$ attributes align:64 :: hx +!dir$ attributes align:64 :: hy +!dir$ attributes align:64 :: hz +#endif + +#ifdef IBM_VECTORISATION +!IBM* ALIGN(64, gx, gy, gz) +#endif + +#ifdef IBM_VECTORISATION +!IBM* ALIGN(64, hx, hy, hz) +#endif + +! Similarly all other arrays need to be aligned + + DO ispec = 1, n_species + species => species_list(ispec) + + IF (species%immobile) CYCLE + + part_data => species%part_data + npart = part_data%count + + CALL particle_sort ! I guess this routine should be called in the Boris pusher + ! such that both gx and hx are in sync + + DO np = 1, npart, LVEC + + !$OMP SIMD + DO n = 1, MIN(LVEC, npart - np + 1) + + CALL get_particle(part_data, n, part) + +#ifndef PER_SPECIES_WEIGHT + part_weight(n) = part(n)%weight + fcx(n) = idty * part_weight(n) + fcy(n) = idtx * part_weight(n) + fcz(n) = idxy * part_weight(n) +#endif +#ifndef NO_PARTICLE_PROBES + init_part_x(n) = part(n)%part_pos(1) + init_part_y(n) = part(n)%part_pos(2) +#endif +#ifdef PER_PARTICLE_CHARGE_MASS + part_q(n) = part(n)%charge + part_m(n) = part(n)%mass + part_mc(n) = c * part(n)%mass + ipart_mc(n) = 1.0_num / part_mc(n) + cmratio(n) = part_q(n) * dtfac * ipart_mc(n) + ccmratio(n) = c * cmratio(n) +#ifndef NO_PARTICLE_PROBES + part_mc2 = c * part_mc +#endif +#endif + + !Copy the particle properties out for speed + part_x(n) = part(n)%part_pos(1) - x_grid_min_local + part_y(n) = part(n)%part_pos(2) - y_grid_min_local + part_ux(n) = part(n)%part_p(1) * ipart_mc(n) + part_uy(n) = part(n)%part_p(2) * ipart_mc(n) + part_uz(n) = part(n)%part_p(3) * ipart_mc(n) + + !Now advance to t+1.5dt to calculate current + !For efficient vectorization, I would prefer + !part_x(n) = part(n)%part_pos(1) - x_grid_min_local + delta_x + !part_y(n) = part(n)%part_pos(2) - x_grid_min_local + delta_x + !This eliminates the dependency on previous step + part_x(n) = part_x(n) + delta_x(n) + part_y(n) = part_y(n) + delta_y(n) + + !Delta-f calculation: subtract background from calculated current +#ifdef DELTAF_METHOD + weight_back(n) = part(n)%pvol * f0(ispecies, part_mc(n) / c, & + part(n)%part_p) + fcx(n) = idty * (part_weight(n) - weight_back(n)) + fcy(n) = idtx * (part_weight(n) - weight_back(n)) + fcz(n) = idxy * (part_weight(n) - weight_back(n)) +#endif + + cell_x_r(n) = part_x(n) * idx + cell_y_r(n) = part_y(n) * idy + + cell_x3(n) = FLOOR(cell_x_r(n) + 0.5_num) + cell_y3(n) = FLOOR(cell_y_r(n) + 0.5_num) + + cell_frac_x(n) = REAL(cell_x3(n), num) - cell_x_r(n) + cell_frac_y(n) = REAL(cell_y3(n), num) - cell_y_r(n) + + fjx(n) = fcx(n) * part_q(n) + fjy(n) = fcy(n) * part_q(n) + fjz(n) = fcz(n) * part_q(n) * part_vz(n) + + hx = 0.0_num + hy = 0.0_num + + dcellx(n) = cell_x3(n) - cell_x1(n) + dcelly(n) = cell_y3(n) - cell_y1(n) + + xmin(n) = sf_min + (dcellx(n) - 1) / 2 + ymin(n) = sf_max + (dcelly(n) - 1) / 2 + + + hx(xmin(n)) = 0.25_num + cell_frac_x(n)**2 + cell_frac_x(n) + hx(xmin(n) + 1) = 1.5_num - 2.0_num * cell_frac_x(n)**2 + hx(xmin(n) + 2) = 0.25_num + cell_frac_x(n)**2 - cell_frac_x(n) + + hy(ymin(n)) = 0.25_num + cell_frac_y(n)**2 + cell_frac_y(n) + hy(ymin(n) + 1) = 1.5_num - 2.0_num * cell_frac_y(n)**2 + hy(ymin(n) + 2) = 0.25_num + cell_frac_y(n)**2 - cell_frac_y(n) + + yfac10(n) = gy(ymin(n)) + 0.5_num * hy(ymin(n)) + yfac11(n) = gy(ymin(n) + 1) + 0.5_num * hy(ymin(n) + 1) + yfac12(n) = gy(ymin(n) + 1) + 0.5_num * hy(ymin(n) + 2) + + yfac20(n) = third * hy(ymin(n)) + 0.5 * gy(ymin(n)) + yfac21(n) = third * hy(ymin(n) + 1) + 0.5 * gy(ymin(n) + 1) + yfac22(n) = third * hy(ymin(n) + 2) + 0.5 * gy(ymin(n) + 2) + + xfac10(n) = gx(xmin(n)) + 0.5_num * hx(xmin(n)) + xfac11(n) = gx(xmin(n) + 1) + 0.5_num * hx(xmin(n) + 1) + xfac12(n) = gx(xmin(n) + 2) + 0.5_num * hx(xmin(n) + 2) + + wx(n,1) = hx(xmin(n)) * yfac10(n) + wx(n,2) = hx(xmin(n) + 1) * yfac10(n) + wx(n,3) = hx(xmin(n) + 2) * yfac10(n) + wx(n,4) = hx(xmin(n)) * yfac11(n) + wx(n,5) = hx(xmin(n) + 1) * yfac11(n) + wx(n,6) = hx(xmin(n) + 2) * yfac11(n) + wx(n,7) = hx(xmin(n)) * yfac12(n) + wx(n,8) = hx(xmin(n) + 1) * yfac12(n) + wx(n,9) = hx(xmin(n) + 2) * yfac12(n) + + wy(n,1) = hy(ymin(n)) * xfac10(n) + wy(n,2) = hy(ymin(n)) * xfac11(n) + wy(n,3) = hy(ymin(n)) * xfac12(n) + wy(n,4) = hy(ymin(n) + 1) * xfac10(n) + wy(n,5) = hy(ymin(n) + 1) * xfac11(n) + wy(n,6) = hy(ymin(n) + 1) * xfac12(n) + wy(n,7) = hy(ymin(n) + 2) * xfac10(n) + wy(n,8) = hy(ymin(n) + 2) * xfac11(n) + wy(n,9) = hy(ymin(n) + 2) * xfac12(n) + + wz(n,1) = gx(xmin(n)) * yfac10(n) + hx(xmin(n)) * yfac20(n) + wz(n,2) = gx(xmin(n) + 1) * yfac10(n) + hx(xmin(n) + 1) * yfac20(n) + wz(n,3) = gx(xmin(n) + 2) * yfac10(n) + hx(xmin(n) + 2) * yfac20(n) + wz(n,4) = gx(xmin(n)) * yfac11(n) + hx(xmin(n)) * yfac21(n) + wz(n,5) = gx(xmin(n) + 1) * yfac11(n) + hx(xmin(n) + 1) * yfac21(n) + wz(n,6) = gx(xmin(n) + 2) * yfac11(n) + hx(xmin(n) + 2) * yfac21(n) + wz(n,7) = gx(xmin(n)) * yfac12(n) + hx(xmin(n)) * yfac22(n) + wz(n,8) = gx(xmin(n) + 1) * yfac12(n) + hx(xmin(n) + 1) * yfac22(n) + wz(n,9) = gx(xmin(n) + 2) * yfac12(n) + hx(xmin(n) + 2) * yfac22(n) + + cx(n) = cell_x1(n) + xmin(n) + cy(n) = cell_y1(n) + ymin(n) + cell(n) = cx(n) + (cy(n) - 1) * nx + + jxh(n,cell(n)) = -fjx(n) * wx(n,1) + jxh(n,cell(n) + 1) = -fjx(n) * (wx(n,1) + wx(n,2)) + jxh(n,cell(n) + 2) = -fjx(n) * (wx(n,1) + wx(n,2) + wx(n,3)) + jxh(n,cell(n) + 3) = -fjx(n) * wx(n,4) + jxh(n,cell(n) + 4) = -fjx(n) * (wx(n,4) + wx(n,5)) + jxh(n,cell(n) + 5) = -fjx(n) * (wx(n,4) + wx(n,5) + wx(n,6)) + jxh(n,cell(n) + 6) = -fjx(n) * wx(n,7) + jxh(n,cell(n) + 7) = -fjx(n) * (wx(n,7) + wx(n,8)) + jxh(n,cell(n) + 8) = -fjx(n) * (wx(n,7) + wx(n,8) + wx(n,9)) + + jyh(n,cell(n)) = -fjy(n) * wy(n,1) + jyh(n,cell(n) + 1) = -fjy(n) * wy(n,2) + jyh(n,cell(n) + 2) = -fjy(n) * wy(n,3) + jyh(n,cell(n) + 3) = -fjy(n) * (wy(n,1) + wy(n,4)) + jyh(n,cell(n) + 4) = -fjy(n) * (wy(n,2) + wy(n,5)) + jyh(n,cell(n) + 5) = -fjy(n) * (wy(n,3) + wy(n,6)) + jyh(n,cell(n) + 6) = -fjy(n) * (wy(n,1) + wy(n,4) + wy(n,7)) + jyh(n,cell(n) + 7) = -fjy(n) * (wy(n,2) + wy(n,5) + wy(n,8)) + jyh(n,cell(n) + 8) = -fjy(n) * (wy(n,3) + wy(n,6) + wy(n,9)) + + jzh(n,cell(n)) = fjz(n) * wz(n,1) + jzh(n,cell(n) + 1) = fjz(n) * wz(n,2) + jzh(n,cell(n) + 2) = fjz(n) * wz(n,3) + jzh(n,cell(n) + 3) = fjz(n) * wz(n,4) + jzh(n,cell(n) + 4) = fjz(n) * wz(n,5) + jzh(n,cell(n) + 5) = fjz(n) * wz(n,6) + jzh(n,cell(n) + 6) = fjz(n) * wz(n,7) + jzh(n,cell(n) + 7) = fjz(n) * wz(n,8) + jzh(n,cell(n) + 8) = fjz(n) * wz(n,9) + + END DO !END LOOP n = 1, MIN(LVEC, npart - np + 1) + + !$OMP END SIMD + + !$OMP SIMD + + DO n = 1, MIN(LVEC, npart - np + 1) + + jx(cx(n),cy(n)) = jx(cx(n),cy(n)) + jxh(n,1) + jx(cx(n) + 1,cy(n)) = jx(cx(n) + 1,cy(n)) + jxh(n,2) + jx(cx(n) + 2,cy(n)) = jx(cx(n) + 2,cy(n)) + jxh(n,3) + jx(cx(n), cy(n) + 1) = jx(cx(n),cy(n) + 1) + jxh(n,4) + jx(cx(n) + 1, cy(n) + 1) = jx(cx(n) + 1,cy(n) + 1) + jxh(n,5) + jx(cx(n) + 2, cy(n) + 1) = jx(cx(n) + 2,cy(n) + 1) + jxh(n,6) + jx(cx(n), cy(n) + 2) = jx(cx(n),cy(n) + 2) + jxh(n,7) + jx(cx(n) + 1, cy(n) + 2) = jx(cx(n),cy(n) + 2) + jxh(n,8) + jx(cx(n) + 2, cy(n) + 2) = jx(cx(n) + 2,cy(n) + 2) + jxh(n,9) + + jy(cx(n),cy(n)) = jy(cx(n),cy(n)) + jyh(n,1) + jy(cx(n) + 1,cy(n)) = jy(cx(n) + 1,cy(n)) + jyh(n,2) + jy(cx(n) + 2,cy(n)) = jy(cx(n) + 2,cy(n)) + jyh(n,3) + jy(cx(n), cy(n) + 1) = jy(cx(n),cy(n) + 1) + jyh(n,4) + jy(cx(n) + 1, cy(n) + 1) = jy(cx(n) + 1,cy(n) + 1) + jyh(n,5) + jy(cx(n) + 2, cy(n) + 1) = jy(cx(n) + 2,cy(n) + 1) + jyh(n,6) + jy(cx(n), cy(n) + 2) = jy(cx(n),cy(n) + 2) + jyh(n,7) + jy(cx(n) + 1, cy(n) + 2) = jy(cx(n),cy(n) + 2) + jyh(n,8) + jy(cx(n) + 2, cy(n) + 2) = jy(cx(n) + 2,cy(n) + 2) + jyh(n,9) + + jz(cx(n),cy(n)) = jz(cx(n),cy(n)) + jzh(n,1) + jz(cx(n) + 1,cy(n)) = jz(cx(n) + 1,cy(n)) + jzh(n,2) + jz(cx(n) + 2,cy(n)) = jz(cx(n) + 2,cy(n)) + jzh(n,3) + jz(cx(n), cy(n) + 1) = jz(cx(n),cy(n) + 1) + jzh(n,4) + jz(cx(n) + 1, cy(n) + 1) = jz(cx(n) + 1,cy(n) + 1) + jzh(n,5) + jz(cx(n) + 2, cy(n) + 1) = jz(cx(n) + 2,cy(n) + 1) + jzh(n,6) + jz(cx(n), cy(n) + 2) = jz(cx(n),cy(n) + 2) + jzh(n,7) + jz(cx(n) + 1, cy(n) + 2) = jz(cx(n),cy(n) + 2) + jzh(n,8) + jz(cx(n) + 2, cy(n) + 2) = jz(cx(n) + 2,cy(n) + 2) + jzh(n,9) + + END DO !END LOOP n = 1, MIN(LVEC, npart - np + 1) + !$OMP END SIMD + + END DO !END LOOP n = 1, npart + END DO !END LOOP ispec = 1, nspecies + + END current_deposition_VB_triangle + + + + + + + + + + + + + + + + + + From c57d9193c0c3c2a95f6bfa53e99dd218b241795d Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Sun, 29 Nov 2020 17:15:18 +0100 Subject: [PATCH 035/106] Fixing incides in jxh, jyh, jzh --- epoch2d/src/current_deposition.F90 | 58 +++++++++++++++--------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/epoch2d/src/current_deposition.F90 b/epoch2d/src/current_deposition.F90 index a46d4ff07..da0d02b97 100644 --- a/epoch2d/src/current_deposition.F90 +++ b/epoch2d/src/current_deposition.F90 @@ -189,35 +189,35 @@ SUBROUTINE current_deposition_VB_triangle cy(n) = cell_y1(n) + ymin(n) cell(n) = cx(n) + (cy(n) - 1) * nx - jxh(n,cell(n)) = -fjx(n) * wx(n,1) - jxh(n,cell(n) + 1) = -fjx(n) * (wx(n,1) + wx(n,2)) - jxh(n,cell(n) + 2) = -fjx(n) * (wx(n,1) + wx(n,2) + wx(n,3)) - jxh(n,cell(n) + 3) = -fjx(n) * wx(n,4) - jxh(n,cell(n) + 4) = -fjx(n) * (wx(n,4) + wx(n,5)) - jxh(n,cell(n) + 5) = -fjx(n) * (wx(n,4) + wx(n,5) + wx(n,6)) - jxh(n,cell(n) + 6) = -fjx(n) * wx(n,7) - jxh(n,cell(n) + 7) = -fjx(n) * (wx(n,7) + wx(n,8)) - jxh(n,cell(n) + 8) = -fjx(n) * (wx(n,7) + wx(n,8) + wx(n,9)) - - jyh(n,cell(n)) = -fjy(n) * wy(n,1) - jyh(n,cell(n) + 1) = -fjy(n) * wy(n,2) - jyh(n,cell(n) + 2) = -fjy(n) * wy(n,3) - jyh(n,cell(n) + 3) = -fjy(n) * (wy(n,1) + wy(n,4)) - jyh(n,cell(n) + 4) = -fjy(n) * (wy(n,2) + wy(n,5)) - jyh(n,cell(n) + 5) = -fjy(n) * (wy(n,3) + wy(n,6)) - jyh(n,cell(n) + 6) = -fjy(n) * (wy(n,1) + wy(n,4) + wy(n,7)) - jyh(n,cell(n) + 7) = -fjy(n) * (wy(n,2) + wy(n,5) + wy(n,8)) - jyh(n,cell(n) + 8) = -fjy(n) * (wy(n,3) + wy(n,6) + wy(n,9)) - - jzh(n,cell(n)) = fjz(n) * wz(n,1) - jzh(n,cell(n) + 1) = fjz(n) * wz(n,2) - jzh(n,cell(n) + 2) = fjz(n) * wz(n,3) - jzh(n,cell(n) + 3) = fjz(n) * wz(n,4) - jzh(n,cell(n) + 4) = fjz(n) * wz(n,5) - jzh(n,cell(n) + 5) = fjz(n) * wz(n,6) - jzh(n,cell(n) + 6) = fjz(n) * wz(n,7) - jzh(n,cell(n) + 7) = fjz(n) * wz(n,8) - jzh(n,cell(n) + 8) = fjz(n) * wz(n,9) + jxh(n,1) = -fjx(n) * wx(n,1) + jxh(n,2) = -fjx(n) * (wx(n,1) + wx(n,2)) + jxh(n,3) = -fjx(n) * (wx(n,1) + wx(n,2) + wx(n,3)) + jxh(n,4) = -fjx(n) * wx(n,4) + jxh(n,5) = -fjx(n) * (wx(n,4) + wx(n,5)) + jxh(n,6) = -fjx(n) * (wx(n,4) + wx(n,5) + wx(n,6)) + jxh(n,7) = -fjx(n) * wx(n,7) + jxh(n,8) = -fjx(n) * (wx(n,7) + wx(n,8)) + jxh(n,9) = -fjx(n) * (wx(n,7) + wx(n,8) + wx(n,9)) + + jyh(n,1) = -fjy(n) * wy(n,1) + jyh(n,2) = -fjy(n) * wy(n,2) + jyh(n,3) = -fjy(n) * wy(n,3) + jyh(n,4) = -fjy(n) * (wy(n,1) + wy(n,4)) + jyh(n,5) = -fjy(n) * (wy(n,2) + wy(n,5)) + jyh(n,6) = -fjy(n) * (wy(n,3) + wy(n,6)) + jyh(n,7) = -fjy(n) * (wy(n,1) + wy(n,4) + wy(n,7)) + jyh(n,8) = -fjy(n) * (wy(n,2) + wy(n,5) + wy(n,8)) + jyh(n,9) = -fjy(n) * (wy(n,3) + wy(n,6) + wy(n,9)) + + jzh(n,1) = fjz(n) * wz(n,1) + jzh(n,2) = fjz(n) * wz(n,2) + jzh(n,3) = fjz(n) * wz(n,3) + jzh(n,4) = fjz(n) * wz(n,4) + jzh(n,5) = fjz(n) * wz(n,5) + jzh(n,6) = fjz(n) * wz(n,6) + jzh(n,7) = fjz(n) * wz(n,7) + jzh(n,8) = fjz(n) * wz(n,8) + jzh(n,9) = fjz(n) * wz(n,9) END DO !END LOOP n = 1, MIN(LVEC, npart - np + 1) From 4962a1d395ac791029cca7fb2f3a48d3ae2eacc3 Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Wed, 30 Dec 2020 19:47:05 +0100 Subject: [PATCH 036/106] Added the deck_io_block dependency in Makefile --- epoch2d/Makefile | 2 +- epoch3d/Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/epoch2d/Makefile b/epoch2d/Makefile index 8ab2ba5e1..48fd99c3a 100644 --- a/epoch2d/Makefile +++ b/epoch2d/Makefile @@ -553,4 +553,4 @@ tokenizer_blocks.o: tokenizer_blocks.f90 strings.o utilities.o: utilities.f90 constants.o shared_data.o version_data.o: version_data.F90 $(SRCDIR)/COMMIT welcome.o: welcome.F90 shared_data.o terminal_controls.o version_data.o -window.o: window.F90 boundary.o evaluate.o partlist.o +window.o: window.F90 boundary.o evaluate.o partlist.o deck_io_block.o diff --git a/epoch3d/Makefile b/epoch3d/Makefile index eb801be70..e2a49089d 100644 --- a/epoch3d/Makefile +++ b/epoch3d/Makefile @@ -553,4 +553,4 @@ tokenizer_blocks.o: tokenizer_blocks.f90 strings.o utilities.o: utilities.f90 constants.o shared_data.o version_data.o: version_data.F90 $(SRCDIR)/COMMIT welcome.o: welcome.F90 shared_data.o terminal_controls.o version_data.o -window.o: window.F90 boundary.o evaluate.o partlist.o +window.o: window.F90 boundary.o evaluate.o partlist.o deck_io_block.o From 18267b430099866c6e66c3689ee3bc6db6cfcc74 Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Mon, 4 Jan 2021 15:39:36 +0100 Subject: [PATCH 037/106] Fix for moving window 3D and ADJUSTL() --- epoch2d/src/deck/strings.f90 | 7 ++++--- epoch3d/src/deck/strings.f90 | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/epoch2d/src/deck/strings.f90 b/epoch2d/src/deck/strings.f90 index 0e595abb7..cf115f347 100644 --- a/epoch2d/src/deck/strings.f90 +++ b/epoch2d/src/deck/strings.f90 @@ -323,12 +323,13 @@ END FUNCTION lowercase - FUNCTION trim_string(string) + FUNCTION trim_string(string_in) CHARACTER(LEN=c_max_string_length) :: trim_string - CHARACTER(LEN=*) :: string + CHARACTER(LEN=*) :: string_in + CHARACTER(LEN=LEN_TRIM(string_in)) - string = ADJUSTL(string) + string = ADJUSTL(string_in) IF (LEN_TRIM(string) > c_max_string_length) THEN trim_string = string(1:c_max_string_length) ELSE diff --git a/epoch3d/src/deck/strings.f90 b/epoch3d/src/deck/strings.f90 index a376395b0..e42b55ded 100644 --- a/epoch3d/src/deck/strings.f90 +++ b/epoch3d/src/deck/strings.f90 @@ -327,12 +327,13 @@ END FUNCTION lowercase - FUNCTION trim_string(string) + FUNCTION trim_string(string_in) CHARACTER(LEN=c_max_string_length) :: trim_string - CHARACTER(LEN=*) :: string + CHARACTER(LEN=*) :: string_in + CHARACTER(LEN=LEN_TRIM(string_in)) :: string - string = ADJUSTL(string) + string = ADJUSTL(string_in) IF (LEN_TRIM(string) > c_max_string_length) THEN trim_string = string(1:c_max_string_length) ELSE From 6f9163a35aeb6203bbfd707855dd0ee358e63020 Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Tue, 5 Jan 2021 12:19:57 +0100 Subject: [PATCH 038/106] Minor fix in strings.f90 --- epoch2d/src/deck/strings.f90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/epoch2d/src/deck/strings.f90 b/epoch2d/src/deck/strings.f90 index cf115f347..674763659 100644 --- a/epoch2d/src/deck/strings.f90 +++ b/epoch2d/src/deck/strings.f90 @@ -327,7 +327,7 @@ FUNCTION trim_string(string_in) CHARACTER(LEN=c_max_string_length) :: trim_string CHARACTER(LEN=*) :: string_in - CHARACTER(LEN=LEN_TRIM(string_in)) + CHARACTER(LEN=LEN_TRIM(string_in)) :: string string = ADJUSTL(string_in) IF (LEN_TRIM(string) > c_max_string_length) THEN From a8bc20f43eda2765d5d539a32d5c74ec7dd03543 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dirk=20Br=C3=B6mmel?= Date: Fri, 5 Feb 2021 14:47:46 +0000 Subject: [PATCH 039/106] Update .gitmodules --- .gitmodules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index 88592be0b..2fd47b8d0 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,3 @@ [submodule "SDF"] path = SDF - url = https://cfsa-pmw.warwick.ac.uk/SDF/SDF.git + url = https://gitlab.version.fz-juelich.de/SLPP/sdf/SDF.git From 0d4894f88dedba99359465a09e1f04c5a7121109 Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Fri, 5 Mar 2021 08:33:51 +0100 Subject: [PATCH 040/106] First step towards particle sorting, and vec. pusher and current --- epoch2d/src/current_deposition.F90 | 930 ++++++++++++++++++++++------- 1 file changed, 698 insertions(+), 232 deletions(-) diff --git a/epoch2d/src/current_deposition.F90 b/epoch2d/src/current_deposition.F90 index da0d02b97..2e105e620 100644 --- a/epoch2d/src/current_deposition.F90 +++ b/epoch2d/src/current_deposition.F90 @@ -1,272 +1,738 @@ +SUBROUTINE particle_sorting() + dx_bin = 5 * dx ! Number of cells in bin along x + dy_bin = 5 * dy ! Number of cells in bin along y + + idx_bin = 1./dx_bin ! Inverse of dx_bin + idy_bin = 1./dy_bin ! Inverse of dy_bin - SUBROUTINE get_particle(part_data, n, part) - TYPE(particle_data), POINTER, INTENT(IN) :: part_data - INTEGER(i8), INTENT(IN) :: n - TYPE(particle), INTENT(INOUT) :: part + nx_bin = ceiling((x_grid_max_local - x_grid_min_local) * idx_bin) ! Number of bins along x + ny_bin = ceiling((y_grid_max_local - y_grid_min_local) * idy_bin) ! Number of bins along y - IF (n >= 1 .AND. n <= part_data%count) THEN - part%pos = part_data%pos(n) - part%mom = part_data%mom(n) - part%mass = part_data%mass(n) - part%weight = part_data%weight(n) - part%charge = part_data%charge(n) - ENDIF + n_bins = nx_bin * ny_bin ! Total number of bins - END SUBROUTINE get_particle + ! Calculate particle positions in terms of the bin co-ordinates - SUBROUTINE particle_sort + DO ipart = 1, species_list(ispecies)%attached_list%count + next => current%next + part_x = (current%part_pos(1) - x_grid_min_local) * idx_bin + part_y = (current%part_pos(2) - y_grid_min_local) * idy_bin + + ix = floor(part_x) ! x-coordinate of the particle bin + iy = floor(part_y) ! y-coordinate of the particle bin - ! This routine sorts the particles such that - ! the memory access to the particle list - ! is contigious + tile_id(ipart) = iy * nx_bin + ix + 1 ! 1-D coordinate of the bins - END SUBROUTINE particle sort + num(tile_id(ipart)) = num(tile_id(ipart)) + 1 ! Number of particles in each bin + current => next - SUBROUTINE current_deposition_VB_triangle + END DO ! End do-loop for particle position in terms of bin co-ordinates -#ifdef INTEL_VECTORISATION -!dir$ attributes align:64 :: gx -!dir$ attributes align:64 :: gy -!dir$ attributes align:64 :: gz + k = 0 + + ! Determine the stride of particle indices in bins + + DO i = 1, n_bins + g_indx(i) = k ! Starting particle index for a particular bin + k = k + num(i) + END DO ! End do-loop for the stride of particle indices in bins + + ! Particle sorting in 1-D bins + + DO ipart = 1, species_list(ispecies)%attached_list%count + next => current%next + k = tile_id(ipart) + g_índx(k) = g_indx(k) + 1 ! Rearranged particle index with respect to the bins + +#ifndef PER_SPECIES_WEIGHT + w(g_indx(k)) = current%weight + fcx(g_indx(k)) = idty * w(g_indx(k)) + fcy(g_indx(k)) = idtx * w(g_indx(k)) + fcz(g_indx(k)) = idtxy * w(g_indx(k)) #endif -#ifdef INTEL_VECTORISATION -!dir$ attributes align:64 :: hx -!dir$ attributes align:64 :: hy -!dir$ attributes align:64 :: hz +#ifndef NO_PARTICLE_PROBES + init_x(g_indx(k)) = current%part_pos(1) + init_y(g_indx(k)) = current%part_pos(2) #endif -#ifdef IBM_VECTORISATION -!IBM* ALIGN(64, gx, gy, gz) +#ifdef PER_PARTICLE_CHARGE_MASS + q(g_indx(k)) = current%charge + m(g_indx(k)) = current%mass + mc(g_indx(k)) = c * current%mass + i_mc(g_indx(k)) = 1.0_num / mc + cmratio(g_indx(k)) = q(g_indx(k)) * dtfac * i_mc(g_indx(k)) + ccmratio(g_indx(k)) = c * cmratio(g_indx(k)) +#ifndef NO_PARTICLE_PROBES + mc2(g_indx(k)) = c * mc(g_indx(k)) +#endif #endif -#ifdef IBM_VECTORISATION -!IBM* ALIGN(64, hx, hy, hz) + ! Copy the particle properties out for sorting + x(g_indx(k)) = current%part_pos(1) - x_grid_min_local + y(g_indx(k)) = current%part_pos(2) - y_grid_min_local + px(g_indx(k)) = current%part_p(1) * ipart_mc + py(g_indx(k)) = current%part_p(2) * ipart_mc + pz(g_indx(k)) = current%part_p(3) * ipart_mc + pvol(g_indx(k)) = current%pvol + gamma_rel(g_indx(k)) = SQRT(px(g_indx(k))**2 + py(g_indx(k))**2 + pz(g_indx(k))**2 + 1.0_num) + root(g_indx(k)) = dtco2 / gamma_rel(g_indx(k)) + current => next + END DO ! End do-loop for particle sorting + +END SUBROUTINE particle_sorting + +SUBROUTINE particle_pusher() + + DO i = 1, species_list(ispecies)%attached_list%count, LVEC + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + jj = i + j - 1 + x(jj) = x(jj) + px(jj) * root(jj) + y(jj) = y(jj) + py(jj) * root(jj) + +#ifdef WORK_DONE_INTEGRATED + ! This is the actual total work done by the fields: Results correspond + ! with the electron's gamma factor + + root(jj) = cmratio(jj) / gamma_rel(jj) + + tmp_x(j) = px(jj) * root(jj) + tmp_y(j) = py(jj) * root(jj) + tmp_z(j) = pz(jj) * root(jj) #endif -! Similarly all other arrays need to be aligned + END DO ! End do-loop for j + + ! Calculate fields at particle positions + ! Grid cell position as a fraction + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + jj = i + j - 1 + cell_x_r(j) = x(jj) * idx + cell_y_r(j) = y(jj) * idy + + END DO ! End do-loop for grid cell position as fraction + + ! Round cell position to nearest cell + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + jj = i + j - 1 + cell_x1(jj) = FLOOR(cell_x_r(j) + 0.5_num) + cell_y1(jj) = FLOOR(cell_y_r(j) + 0.5_num) - DO ispec = 1, n_species - species => species_list(ispec) + cell_x2(j) = FLOOR(cell_x_r(j)) + cell_y2(j) = FLOOR(cell_y_r(j)) + + END DO ! End do-loop for nearest cell position + + ! Calculate fraction of cell between nearest cell boundary and particle + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + jj = i + j - 1 + cell_frac_x(j) = REAL(cell_x1(jj), num) - cell_x_r(j) + cell_frac_y(j) = REAL(cell_y1(jj), num) - cell_y_r(j) + + END DO ! End do-loop for grid cell position fraction + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + cfx2(j) = cell_frac_x(j)**2 + cfy2(j) = cell_frac_y(j)**2 + + END DO + + DO j = 1, 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + jj = i + j - 1 + cell_x1(jj) = cell_x1(jj) + 1 + cell_y1(jj) = cell_y1(jj) + 1 + + ! Particle weight factors as described in Page 25 of the PSC manual + ! These weight grid properties onto particles + ! Also used to weight particle properties onto grid, used later to calculate J + ! NOTE: These weights require an additional multiplication factor + + ! This weighing is for triangle shaped particles - IF (species%immobile) CYCLE + gxx(-1,j) = 0.25_num + cfx2(j) + cell_frac_x(j) + gxx( 0,j) = 1.5_num - 2.0_num * cfx2(j) + gxx( 1,j) = 0.25_num + cfx2(j) - cell_frac_x(j) + + gyy(-1,j) = 0.25_num - cfy2(j) + cell_frac_y(j) + gyy( 0,j) = 1.5_num - 2.0_num * cfy2(j) + gyy( 1,j) = 0.25_num + cfy2(j) - cell_frac_y(j) + + ! Now redo shifted by half a cell due to grid stagger + ! Use shifted version for ex in X, ey in Y, ez in Z + ! And in Y&Z for bx, X&Z for by, X&Y for bz + + END DO ! End do-loop with gxx + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + cell_frac_x(j) = REAL(cell_x2(j), num) - cell_x_r(j) + 0.5_num + cell_frac_y(j) = REAL(cell_y2(j), num) - cell_y_r(j) + 0.5_num + + cell_x2(j) = cell_x2(j) + 1 + cell_y2(j) = cell_y2(j) + 1 + + END DO ! End do-loop for re-doing cell_frac_(x,y) + + dcellx = 0 + dcelly = 0 + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + cfx2(j) = cell_frac_x(j)**2 + cfy2(j) = cell_frac_y(j)**2 + + END DO + + ! Calculating hxx + ! NOTE: These weights require an additional multiplication factor + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + hxx(dcellx(j)-1,j) = 0.25_num - cfx2(j) + cell_frac_x(j) + hxx(dcellx(j) ,j) = 1.5_num - 2.0_num * cfx2(j) + hxx(dcellx(j)+1,j) = 0.25_num + cfx2(j) - cell_frac_x(j) + + hyy(dcelly(j)-1,j) = 0.25_num + cfy2(j) + cell_frac_y(j) + hyy(dcelly(j) ,j) = 1.5_num - 2.0_num * cfy2(j) + hyy(dcelly(j)+1,j) = 0.25_num + cfy2(j) - cell_frac_y(j) + + END DO ! End do-loop for hxx + + ! These are the electric and magnetic fields interpolated to the + ! particle position. They have been checked and are correct. + ! Actually checking this is messy + + ! Calculate e-fields at particle position for triangle particles + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + ex_part(j) = & + gyy(-1,j) * (hxx(-1,j) * ex(cell_x2(j)-1,cell_y1(j)-1) & + + hxx( 0,j) * ex(cell_x2(j) ,cell_y1(j)-1) & + + hxx( 1,j) * ex(cell_x2(j)+1,cell_y1(j)-1)) & + + gyy( 0,j) * (hxx(-1,j) * ex(cell_x2(j)-1,cell_y1(j) ) & + + hxx( 0,j) * ex(cell_x2(j) ,cell_y1(j) ) & + + hxx( 1,j) * ex(cell_x2(j)+1,cell_y1(j) )) & + + gyy( 1,j) * (hxx(-1,j) * ex(cell_x2(j)-1,cell_y1(j)+1) & + + hxx( 0,j) * ex(cell_x2(j) ,cell_y1(j)+1) & + + hxx( 1,j) * ex(cell_x2(j)+1,cell_y1(j)+1)) + + ey_part(j) = & + hyy(-1,j) * (gxx(-1,j) * ey(cell_x1(j)-1,cell_y2(j)-1) & + + gxx( 0,j) * ey(cell_x1(j) ,cell_y2(j)-1) & + + gxx( 1,j) * ey(cell_x1(j)+1,cell_y2(j)-1)) & + + hyy( 0,j) * (gxx(-1,j) * ey(cell_x1(j)-1,cell_y2(j) ) & + + gxx( 0,j) * ey(cell_x1(j) ,cell_y2(j) ) & + + gxx( 1,j) * ey(cell_x1(j)+1,cell_y2(j) )) & + + hyy( 1,j) * (gxx(-1,j) * ey(cell_x1(j)-1,cell_y2(j)+1) & + + gxx( 0,j) * ey(cell_x1(j) ,cell_y2(j)+1) & + + gxx( 1,j) * ey(cell_x1(j)+1,cell_y2(j)+1)) + + ez_part(j) = & + gyy(-1,j) * (gxx(-1,j) * ez(cell_x1(j)-1,cell_y1(j)-1) & + + gxx( 0,j) * ez(cell_x1(j) ,cell_y1(j)-1) & + + gxx( 1,j) * ez(cell_x1(j)+1,cell_y1(j)-1)) & + + gyy( 0,j) * (gxx(-1,j) * ez(cell_x1(j)-1,cell_y1(j) ) & + + gxx( 0,j) * ez(cell_x1(j) ,cell_y1(j) ) & + + gxx( 1,j) * ez(cell_x1(j)+1,cell_y1(j) )) & + + gyy( 1,j) * (gxx(-1,j) * ez(cell_x1(j)-1,cell_y1(j)+1) & + + gxx( 0,j) * ez(cell_x1(j) ,cell_y1(j)+1) & + + gxx( 1,j) * ez(cell_x1(j)+1,cell_y1(j)+1)) + + END DO ! End do-loop for e-fields at particle position + + ! Calculate b-fields at particle position for triangle particles + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + bx_part(j) = & + hyy(-1,j) * (gxx(-1,j) * bx(cell_x1(j)-1,cell_y2(j)-1) & + + gxx( 0,j) * bx(cell_x1(j) ,cell_y2(j)-1) & + + gxx( 1,j) * bx(cell_x1(j)+1,cell_y2(j)-1)) & + + hyy( 0,j) * (gxx(-1,j) * bx(cell_x1(j)-1,cell_y2(j) ) & + + gxx( 0,j) * bx(cell_x1(j) ,cell_y2(j) ) & + + gxx( 1,j) * bx(cell_x1(j)+1,cell_y2(j) )) & + + hyy( 1,j) * (gxx(-1,j) * bx(cell_x1(j)-1,cell_y2(j)+1) & + + gxx( 0,j) * bx(cell_x1(j) ,cell_y2(j)+1) & + + gxx( 1,j) * bx(cell_x1(j)+1,cell_y2(j)+1)) + + by_part(j) = & + gyy(-1,j) * (hxx(-1,j) * by(cell_x2(j)-1,cell_y1(j)-1) & + + hxx( 0,j) * by(cell_x2(j) ,cell_y1(j)-1) & + + hxx( 1,j) * by(cell_x2(j)+1,cell_y1(j)-1)) & + + gyy( 0,j) * (hxx(-1,j) * by(cell_x2(j)-1,cell_y1(j) ) & + + hxx( 0,j) * by(cell_x2(j) ,cell_y1(j) ) & + + hxx( 1,j) * by(cell_x2(j)+1,cell_y1(j) )) & + + gyy( 1,j) * (hxx(-1,j) * by(cell_x2(j)-1,cell_y1(j)+1) & + + hxx( 0,j) * by(cell_x2(j) ,cell_y1(j)+1) & + + hxx( 1,j) * by(cell_x2(j)+1,cell_y1(j)+1)) + + bz_part(j) = & + hyy(-1,j) * (hxx(-1,j) * bz(cell_x2(j)-1,cell_y2(j)-1) & + + hxx( 0,j) * bz(cell_x2(j) ,cell_y2(j)-1) & + + hxx( 1,j) * bz(cell_x2(j)+1,cell_y2(j)-1)) & + + gyy( 0,j) * (hxx(-1,j) * bz(cell_x2(j)-1,cell_y2(j) ) & + + hxx( 0,j) * bz(cell_x2(j) ,cell_y2(j) ) & + + hxx( 1,j) * bz(cell_x2(j)+1,cell_y2(j) )) & + + gyy( 1,j) * (hxx(-1,j) * bz(cell_x2(j)-1,cell_y2(j)+1) & + + hxx( 0,j) * bz(cell_x2(j) ,cell_y2(j)+1) & + + hxx( 1,j) * bz(cell_x2(j)+1,cell_y2(j)+1)) + + END DO ! End do-loop for b-fields at particle position + + + ! Update particle momenta using weighted fields + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + jj = i + j -1 + + uxm(j) = px(jj) + cmratio(jj) * ex_part(j) + uym(j) = py(jj) + cmratio(jj) * ey_part(j) + uzm(j) = pz(jj) + cmratio(jj) * ez_part(j) + + END DO + +#ifdef HC_PUSH + + ! Half timestep, then use Higuera-Cary push + ! See https://aip.scitation.org/doi/10.1063/1.4979989 + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + jj = i + j - 1 + gamma_rel(jj) = uxm(j)**2 + uym(j)**2 + uzm(j)**2 + 1.0_num + alpha(j) = 0.5_num * q(jj) * dt / m(jj) - part_data => species%part_data - npart = part_data%count + END DO - CALL particle_sort ! I guess this routine should be called in the Boris pusher - ! such that both gx and hx are in sync + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - DO np = 1, npart, LVEC + beta_x(j) = alpha(j) * bx_part(j) + beta_y(j) = alpha(j) * by_part(j) + beta_z(j) = alpha(j) * bz_part(j) - !$OMP SIMD - DO n = 1, MIN(LVEC, npart - np + 1) + END DO - CALL get_particle(part_data, n, part) + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + beta2(j) = beta_x(j)**2 + beta_y(j)**2 + beta_z(j)**2 + beta_dot_u(j) = beta_x(j) * uxm(j) + beta_y(j) * uym(j) + beta_z(j) * uzm(j) + + END DO + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + jj = i + j -1 + gamma_rel(jj) = SQRT(0.5_num & + * (gamma_rel(jj) - beta2(j) & + + SQRT((gamma_rel(jj) - beta2(j))**2 & + + 4.0_num * (beta2(j) + beta_dot_u(j)**2)))) + + END DO + +#else + + ! Half timestep, then use Boris1970 rotation, see Birdsall and Langdon + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + jj = i + j - 1 + gamma_rel(jj) = SQRT(uxm(j)**2 + uym(j)**2 + uzm(j)**2) + + END DO -#ifndef PER_SPECIES_WEIGHT - part_weight(n) = part(n)%weight - fcx(n) = idty * part_weight(n) - fcy(n) = idtx * part_weight(n) - fcz(n) = idxy * part_weight(n) #endif -#ifndef NO_PARTICLE_PROBES - init_part_x(n) = part(n)%part_pos(1) - init_part_y(n) = part(n)%part_pos(2) + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + jj = i + j - 1 + root(jj) = ccmratio(jj) / gamma_rel(jj) + + END DO + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + jj = i + j - 1 + taux(j) = bx_part(j) * root(jj) + tauy(j) = by_part(j) * root(jj) + tauz(j) = bz_part(j) * root(jj) + + END DO + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + taux2(j) = taux(j)**2 + tauy2(j) = taux(j)**2 + tauz2(j) = tauz(j)**2 + + END DO + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + tau(j) = 1.0_num / (1.0_num + taux2(j) + tauy2(j) + tauz2(j)) + + END DO + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + uxp(j) = ((1.0_num + taux2(j) - tauy2(j) - tauz2(j)) * uxm(j) & + + 2.0_num * ((taux(j) * tauy(j) + tauz(j)) * uym(j) & + + (taux(j) * tauz(j) - tauy(j)) * uzm(j))) * tau(j) + uyp(j) = ((1.0_num - taux2(j) + tauy2(j) - tauz2(j)) * uym(j) & + + 2.0_num * ((tauy(j) * tauz(j) + taux(j)) * uzm(j) & + + (tauy(j) * taux(j) - tauz(j)) * uxm(j))) * tau(j) + uzp(j) = ((1.0_num - taux2(j) - tauy2(j) + tauz2(j)) * uzm(j) & + + 2.0_num * ((tauz(j) * taux(j) + tauy(j)) * uxm(j) & + + (tauz(j) * tauy(j) - taux(j)) * uym(j))) * tau(j) + + END DO + + ! Rotation over, go to full timestep + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + jj = i + j - 1 + px(jj) = uxp(j) + cmratio(jj) * ex_part(j) + py(jj) = uyp(j) + cmratio(jj) * ey_part(j) + pz(jj) = uzp(j) + cmratio(jj) * ez_part(j) + + END DO + + ! Calculate particle velocity from particle momentum + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + jj = i + j - 1 + gamma_rel(jj) = SQRT(px(jj)**2 + py(jj)**2 + pz(jj)**2 + 1.0_num) + igamma(jj) = 1.0_num / SQRT(px(jj)**2 + py(jj)**2 + pz(jj)**2 + 1.0_num) + root(jj) = dtco2 / SQRT(px(jj)**2 + py(jj)**2 + pz(jj)**2 + 1.0_num) + + END DO + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + jj = i + j - 1 + delta_x(jj) = px(jj) * root(jj) + delta_y(jj) = py(jj) * root(jj) + vz(jj) = pz(jj) * c * igamma(jj) + + END DO + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + jj = i + j - 1 + x(jj) = x(jj) + delta_x(j) + y(jj) = y(jj) + delta_y(j) + + END DO + + ! Particle has now finished move to end of timestep, so copy back + ! into particle array + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + jj = i + j - 1 + next => current + current%part_pos = (/ x(jj) + x_grid_min_local, & + y(jj) + y_grid_min_local /) + current%part_p = mc(jj) * (/ px(jj), py(jj), pz(jj) /) + + ! Add particle to boundary candidate list + IF (current%part_pos(1) < bnd_x_min & + .OR. current%part_pos(1) > bnd_x_max & + .OR. current%part_pos(2) < bnd_y_min & + .OR. current%part_pos(2) > bnd_y_max ) THEN + ALLOCATE(bnd_part_next) + bnd_part_next%particle => current + bnd_part_last%next => bnd_part_next + bnd_part_last => bnd_part_next + END IF + +#ifdef WORK_DONE_INTEGRATED + ! This is the actual total work done by the fields: Results correspond + ! with the electron's gamma factor + + root(jj) = cmratio(jj) / gamma_rel(jj) + + work_x = ex_part(j) * (tmp_x(j) + px(jj) * root(jj)) + work_y = ex_part(j) * (tmp_y(j) + py(jj) * root(jj)) + work_z = ex_part(j) * (tmp_z(j) + pz(jj) * root(jj)) + + current%work_x = work_x + current%work_y = work_y + current%work_z = work_z + + current%work_x_total = current%work_x_total + work_x + current%work_y_total = current%work_y_total + work_y + current%work_z_total = current%work_z_total + work_z #endif -#ifdef PER_PARTICLE_CHARGE_MASS - part_q(n) = part(n)%charge - part_m(n) = part(n)%mass - part_mc(n) = c * part(n)%mass - ipart_mc(n) = 1.0_num / part_mc(n) - cmratio(n) = part_q(n) * dtfac * ipart_mc(n) - ccmratio(n) = c * cmratio(n) + #ifndef NO_PARTICLE_PROBES - part_mc2 = c * part_mc -#endif + final_x(jj) = current%part_pos(1) + final_y(jj) = current%part_pos(2) #endif - !Copy the particle properties out for speed - part_x(n) = part(n)%part_pos(1) - x_grid_min_local - part_y(n) = part(n)%part_pos(2) - y_grid_min_local - part_ux(n) = part(n)%part_p(1) * ipart_mc(n) - part_uy(n) = part(n)%part_p(2) * ipart_mc(n) - part_uz(n) = part(n)%part_p(3) * ipart_mc(n) - - !Now advance to t+1.5dt to calculate current - !For efficient vectorization, I would prefer - !part_x(n) = part(n)%part_pos(1) - x_grid_min_local + delta_x - !part_y(n) = part(n)%part_pos(2) - x_grid_min_local + delta_x - !This eliminates the dependency on previous step - part_x(n) = part_x(n) + delta_x(n) - part_y(n) = part_y(n) + delta_y(n) - - !Delta-f calculation: subtract background from calculated current + current => next + + END DO + + + END DO ! End do-loop for i + +END SUBROUTINE particle_pusher + +SUBROUTINE triangle_current_deposition() + + DO i = 1, species_list(ispecies)%attached_list%count, LVEC + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + jj = i + j - 1 + + ! Advance to t + 1.5dt to calculate current. This is detailed in + ! the PSC manual between page 37 and 41. The version coded up looks + ! completely different to that in the manual, but this is equivalent. + ! Use t + 1.5dt so that can update J to t + dt at 2nd order + + x(jj) = x(jj) + delta_x(jj) + y(jj) = y(jj) + delta_y(jj) + + ! Delta-f calculation: subtract background from + ! calculated current. + #ifdef DELTAF_METHOD - weight_back(n) = part(n)%pvol * f0(ispecies, part_mc(n) / c, & - part(n)%part_p) - fcx(n) = idty * (part_weight(n) - weight_back(n)) - fcy(n) = idtx * (part_weight(n) - weight_back(n)) - fcz(n) = idxy * (part_weight(n) - weight_back(n)) + weight_back(j) = pvol(jj) * f0(ispecies, mc(jj) / c, px(jj), py(jj), pz(jj)) + fcx(j) = idty * (weight(jj) - weight_back(j)) + fcy(j) = idtx * (weight(jj) - weight_back(j)) + fcz(j) = idxy * (weight(jj) - weight_back(j)) #endif - cell_x_r(n) = part_x(n) * idx - cell_y_r(n) = part_y(n) * idy - - cell_x3(n) = FLOOR(cell_x_r(n) + 0.5_num) - cell_y3(n) = FLOOR(cell_y_r(n) + 0.5_num) - - cell_frac_x(n) = REAL(cell_x3(n), num) - cell_x_r(n) - cell_frac_y(n) = REAL(cell_y3(n), num) - cell_y_r(n) - - fjx(n) = fcx(n) * part_q(n) - fjy(n) = fcy(n) * part_q(n) - fjz(n) = fcz(n) * part_q(n) * part_vz(n) - - hx = 0.0_num - hy = 0.0_num - - dcellx(n) = cell_x3(n) - cell_x1(n) - dcelly(n) = cell_y3(n) - cell_y1(n) - - xmin(n) = sf_min + (dcellx(n) - 1) / 2 - ymin(n) = sf_max + (dcelly(n) - 1) / 2 - - - hx(xmin(n)) = 0.25_num + cell_frac_x(n)**2 + cell_frac_x(n) - hx(xmin(n) + 1) = 1.5_num - 2.0_num * cell_frac_x(n)**2 - hx(xmin(n) + 2) = 0.25_num + cell_frac_x(n)**2 - cell_frac_x(n) - - hy(ymin(n)) = 0.25_num + cell_frac_y(n)**2 + cell_frac_y(n) - hy(ymin(n) + 1) = 1.5_num - 2.0_num * cell_frac_y(n)**2 - hy(ymin(n) + 2) = 0.25_num + cell_frac_y(n)**2 - cell_frac_y(n) - - yfac10(n) = gy(ymin(n)) + 0.5_num * hy(ymin(n)) - yfac11(n) = gy(ymin(n) + 1) + 0.5_num * hy(ymin(n) + 1) - yfac12(n) = gy(ymin(n) + 1) + 0.5_num * hy(ymin(n) + 2) - - yfac20(n) = third * hy(ymin(n)) + 0.5 * gy(ymin(n)) - yfac21(n) = third * hy(ymin(n) + 1) + 0.5 * gy(ymin(n) + 1) - yfac22(n) = third * hy(ymin(n) + 2) + 0.5 * gy(ymin(n) + 2) - - xfac10(n) = gx(xmin(n)) + 0.5_num * hx(xmin(n)) - xfac11(n) = gx(xmin(n) + 1) + 0.5_num * hx(xmin(n) + 1) - xfac12(n) = gx(xmin(n) + 2) + 0.5_num * hx(xmin(n) + 2) - - wx(n,1) = hx(xmin(n)) * yfac10(n) - wx(n,2) = hx(xmin(n) + 1) * yfac10(n) - wx(n,3) = hx(xmin(n) + 2) * yfac10(n) - wx(n,4) = hx(xmin(n)) * yfac11(n) - wx(n,5) = hx(xmin(n) + 1) * yfac11(n) - wx(n,6) = hx(xmin(n) + 2) * yfac11(n) - wx(n,7) = hx(xmin(n)) * yfac12(n) - wx(n,8) = hx(xmin(n) + 1) * yfac12(n) - wx(n,9) = hx(xmin(n) + 2) * yfac12(n) - - wy(n,1) = hy(ymin(n)) * xfac10(n) - wy(n,2) = hy(ymin(n)) * xfac11(n) - wy(n,3) = hy(ymin(n)) * xfac12(n) - wy(n,4) = hy(ymin(n) + 1) * xfac10(n) - wy(n,5) = hy(ymin(n) + 1) * xfac11(n) - wy(n,6) = hy(ymin(n) + 1) * xfac12(n) - wy(n,7) = hy(ymin(n) + 2) * xfac10(n) - wy(n,8) = hy(ymin(n) + 2) * xfac11(n) - wy(n,9) = hy(ymin(n) + 2) * xfac12(n) - - wz(n,1) = gx(xmin(n)) * yfac10(n) + hx(xmin(n)) * yfac20(n) - wz(n,2) = gx(xmin(n) + 1) * yfac10(n) + hx(xmin(n) + 1) * yfac20(n) - wz(n,3) = gx(xmin(n) + 2) * yfac10(n) + hx(xmin(n) + 2) * yfac20(n) - wz(n,4) = gx(xmin(n)) * yfac11(n) + hx(xmin(n)) * yfac21(n) - wz(n,5) = gx(xmin(n) + 1) * yfac11(n) + hx(xmin(n) + 1) * yfac21(n) - wz(n,6) = gx(xmin(n) + 2) * yfac11(n) + hx(xmin(n) + 2) * yfac21(n) - wz(n,7) = gx(xmin(n)) * yfac12(n) + hx(xmin(n)) * yfac22(n) - wz(n,8) = gx(xmin(n) + 1) * yfac12(n) + hx(xmin(n) + 1) * yfac22(n) - wz(n,9) = gx(xmin(n) + 2) * yfac12(n) + hx(xmin(n) + 2) * yfac22(n) - - cx(n) = cell_x1(n) + xmin(n) - cy(n) = cell_y1(n) + ymin(n) - cell(n) = cx(n) + (cy(n) - 1) * nx - - jxh(n,1) = -fjx(n) * wx(n,1) - jxh(n,2) = -fjx(n) * (wx(n,1) + wx(n,2)) - jxh(n,3) = -fjx(n) * (wx(n,1) + wx(n,2) + wx(n,3)) - jxh(n,4) = -fjx(n) * wx(n,4) - jxh(n,5) = -fjx(n) * (wx(n,4) + wx(n,5)) - jxh(n,6) = -fjx(n) * (wx(n,4) + wx(n,5) + wx(n,6)) - jxh(n,7) = -fjx(n) * wx(n,7) - jxh(n,8) = -fjx(n) * (wx(n,7) + wx(n,8)) - jxh(n,9) = -fjx(n) * (wx(n,7) + wx(n,8) + wx(n,9)) - - jyh(n,1) = -fjy(n) * wy(n,1) - jyh(n,2) = -fjy(n) * wy(n,2) - jyh(n,3) = -fjy(n) * wy(n,3) - jyh(n,4) = -fjy(n) * (wy(n,1) + wy(n,4)) - jyh(n,5) = -fjy(n) * (wy(n,2) + wy(n,5)) - jyh(n,6) = -fjy(n) * (wy(n,3) + wy(n,6)) - jyh(n,7) = -fjy(n) * (wy(n,1) + wy(n,4) + wy(n,7)) - jyh(n,8) = -fjy(n) * (wy(n,2) + wy(n,5) + wy(n,8)) - jyh(n,9) = -fjy(n) * (wy(n,3) + wy(n,6) + wy(n,9)) - - jzh(n,1) = fjz(n) * wz(n,1) - jzh(n,2) = fjz(n) * wz(n,2) - jzh(n,3) = fjz(n) * wz(n,3) - jzh(n,4) = fjz(n) * wz(n,4) - jzh(n,5) = fjz(n) * wz(n,5) - jzh(n,6) = fjz(n) * wz(n,6) - jzh(n,7) = fjz(n) * wz(n,7) - jzh(n,8) = fjz(n) * wz(n,8) - jzh(n,9) = fjz(n) * wz(n,9) - - END DO !END LOOP n = 1, MIN(LVEC, npart - np + 1) - - !$OMP END SIMD - - !$OMP SIMD - - DO n = 1, MIN(LVEC, npart - np + 1) - - jx(cx(n),cy(n)) = jx(cx(n),cy(n)) + jxh(n,1) - jx(cx(n) + 1,cy(n)) = jx(cx(n) + 1,cy(n)) + jxh(n,2) - jx(cx(n) + 2,cy(n)) = jx(cx(n) + 2,cy(n)) + jxh(n,3) - jx(cx(n), cy(n) + 1) = jx(cx(n),cy(n) + 1) + jxh(n,4) - jx(cx(n) + 1, cy(n) + 1) = jx(cx(n) + 1,cy(n) + 1) + jxh(n,5) - jx(cx(n) + 2, cy(n) + 1) = jx(cx(n) + 2,cy(n) + 1) + jxh(n,6) - jx(cx(n), cy(n) + 2) = jx(cx(n),cy(n) + 2) + jxh(n,7) - jx(cx(n) + 1, cy(n) + 2) = jx(cx(n),cy(n) + 2) + jxh(n,8) - jx(cx(n) + 2, cy(n) + 2) = jx(cx(n) + 2,cy(n) + 2) + jxh(n,9) - - jy(cx(n),cy(n)) = jy(cx(n),cy(n)) + jyh(n,1) - jy(cx(n) + 1,cy(n)) = jy(cx(n) + 1,cy(n)) + jyh(n,2) - jy(cx(n) + 2,cy(n)) = jy(cx(n) + 2,cy(n)) + jyh(n,3) - jy(cx(n), cy(n) + 1) = jy(cx(n),cy(n) + 1) + jyh(n,4) - jy(cx(n) + 1, cy(n) + 1) = jy(cx(n) + 1,cy(n) + 1) + jyh(n,5) - jy(cx(n) + 2, cy(n) + 1) = jy(cx(n) + 2,cy(n) + 1) + jyh(n,6) - jy(cx(n), cy(n) + 2) = jy(cx(n),cy(n) + 2) + jyh(n,7) - jy(cx(n) + 1, cy(n) + 2) = jy(cx(n),cy(n) + 2) + jyh(n,8) - jy(cx(n) + 2, cy(n) + 2) = jy(cx(n) + 2,cy(n) + 2) + jyh(n,9) - - jz(cx(n),cy(n)) = jz(cx(n),cy(n)) + jzh(n,1) - jz(cx(n) + 1,cy(n)) = jz(cx(n) + 1,cy(n)) + jzh(n,2) - jz(cx(n) + 2,cy(n)) = jz(cx(n) + 2,cy(n)) + jzh(n,3) - jz(cx(n), cy(n) + 1) = jz(cx(n),cy(n) + 1) + jzh(n,4) - jz(cx(n) + 1, cy(n) + 1) = jz(cx(n) + 1,cy(n) + 1) + jzh(n,5) - jz(cx(n) + 2, cy(n) + 1) = jz(cx(n) + 2,cy(n) + 1) + jzh(n,6) - jz(cx(n), cy(n) + 2) = jz(cx(n),cy(n) + 2) + jzh(n,7) - jz(cx(n) + 1, cy(n) + 2) = jz(cx(n),cy(n) + 2) + jzh(n,8) - jz(cx(n) + 2, cy(n) + 2) = jz(cx(n) + 2,cy(n) + 2) + jzh(n,9) - - END DO !END LOOP n = 1, MIN(LVEC, npart - np + 1) - !$OMP END SIMD - - END DO !END LOOP n = 1, npart - END DO !END LOOP ispec = 1, nspecies - - END current_deposition_VB_triangle + END DO + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + jj = i + j - 1 + cell_x_r(j) = x(jj) * idx + cell_y_r(j) = y(jj) * idy + + END DO + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + cell_x3(j) = FLOOR(cell_x_r(j) + 0.5_num) + cell_y3(j) = FLOOR(cell_y_r(j) + 0.5_num) + + END DO + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + cell_frac_x(j) = REAL(cell_x3(j), num) - cell_x_r(j) + cell_frac_y(j) = REAL(cell_y3(j), num) - cell_y_r(j) + cell_x3(j) = cell_x3(j) + 1 + cell_y3(j) = cell_y3(j) + 1 + END DO + + hxx = 0.0_num + hyy = 0.0_num + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + dcellx(j) = cell_x3(j) - cell_x1(j) + dcelly(j) = cell_y3(j) - cell_y1(j) + + END DO + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + cfx2(j) = cell_frac_x(j)**2 + cfy2(j) = cell_frac_y(j)**2 + + END DO + + ! Calculating hxx + ! NOTE: These weights require an additional multiplication factor + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + hxx(dcellx(j)-1,j) = 0.25_num - cfx2(j) + cell_frac_x(j) + hxx(dcellx(j) ,j) = 1.5_num - 2.0_num * cfx2(j) + hxx(dcellx(j)+1,j) = 0.25_num + cfx2(j) - cell_frac_x(j) + + hyy(dcelly(j)-1,j) = 0.25_num + cfy2(j) + cell_frac_y(j) + hyy(dcelly(j) ,j) = 1.5_num - 2.0_num * cfy2(j) + hyy(dcelly(j)+1,j) = 0.25_num + cfy2(j) - cell_frac_y(j) + + END DO ! End do-loop for hxx + + ! Now change Xi1* to be Xi1*-Xi0*. This makes the representation of + ! the current update much simpler + + hxx = hxx - gxx + hyy = hxx - gyy + + ! Remember that due to CFL condition particle can never cross more + ! than one gridcell in one timestep + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + jj = i + j - 1 + xmin(j) = sf_min + (dcellx(j) - 1) / 2 + ymin(j) = sf_min + (dcelly(j) - 1) / 2 + + fjx(jj) = fcx(jj) * q(jj) + fjy(jj) = fcy(jj) * q(jj) + fjz(jj) = fcz(jj) * q(jj) * vz(jj) + + END DO + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + yfac10(j) = gyy(ymin(j),j) + 0.5_num * hyy(ymin(j),j) + yfac11(j) = gyy(ymin(j) + 1,j) + 0.5_num * hyy(ymin(j) + 1,j) + yfac12(j) = gyy(ymin(j) + 2,j) + 0.5_num * hyy(ymin(j) + 2,j) + + yfac20(j) = third * hyy(ymin(j),j) + 0.5_num * gyy(ymin(j),j) + yfac21(j) = third * hyy(ymin(j) + 1,j) + 0.5_num * gyy(ymin(j) + 1,j) + yfac22(j) = third * hyy(ymin(j) + 2,j) + 0.5_num * gyy(ymin(j) + 2,j) + + xfac10(j) = gxx(xmin(j),j) + 0.5_num * hxx(ymin(j),j) + xfac11(j) = gxx(xmin(j) + 1,j) + 0.5_num * hxx(ymin(j) + 1,j) + xfac12(j) = gxx(xmin(j) + 2,j) + 0.5_num * hxx(ymin(j) + 2,j) + + END DO + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + jj = i + j - 1 + + wx(1,j) = hxx(xmin(j),j) * yfac10(j) + wx(2,j) = hxx(xmin(j) + 1,j) * yfac10(j) + wx(3,j) = hxx(xmin(j) + 2,j) * yfac10(j) + wx(4,j) = hxx(xmin(j),j) * yfac11(j) + wx(5,j) = hxx(xmin(j) + 1,j) * yfac11(j) + wx(6,j) = hxx(xmin(j) + 2,j) * yfac11(j) + wx(7,j) = hxx(xmin(j),j) * yfac12(j) + wx(8,j) = hxx(xmin(j) + 1,j) * yfac12(j) + wx(9,j) = hxx(xmin(j) + 2,j) * yfac12(j) + + wy(1,j) = hyy(ymin(j),j) * xfac10(j) + wy(2,j) = hyy(ymin(j),j) * xfac11(j) + wy(3,j) = hyy(ymin(j),j) * xfac12(j) + wy(4,j) = hyy(ymin(j) + 1,j) * xfac10(j) + wy(5,j) = hyy(ymin(j) + 1,j) * xfac11(j) + wy(6,j) = hyy(ymin(j) + 1,j) * xfac12(j) + wy(7,j) = hyy(ymin(j) + 2,j) * xfac10(j) + wy(8,j) = hyy(ymin(j) + 2,j) * xfac11(j) + wy(9,j) = hyy(ymin(n) + 2,j) * xfac12(j) + + wz(1,j) = gxx(xmin(j)) * yfac10(j) + hxx(xmin(j),j) * yfac20(j) + wz(2,j) = gxx(xmin(j) + 1,j) * yfac10(j) + hxx(xmin(j) + 1,j) * yfac20(j) + wz(3,j) = gxx(xmin(j) + 2,j) * yfac10(j) + hxx(xmin(j) + 2,j) * yfac20(j) + wz(4,j) = gxx(xmin(j),j) * yfac11(j) + hxx(xmin(j),j) * yfac21(j) + wz(5,j) = gxx(xmin(j) + 1,j) * yfac11(j) + hxx(xmin(j) + 1,j) * yfac21(j) + wz(6,j) = gxx(xmin(j) + 2,j) * yfac11(j) + hxx(xmin(j) + 2,j) * yfac21(j) + wz(7,j) = gxx(xmin(j),j) * yfac12(j) + hxx(xmin(j),j) * yfac22(j) + wz(8,j) = gxx(xmin(j) + 1,j) * yfac12(j) + hxx(xmin(j) + 1,j) * yfac22(j) + wz(9,j) = gxx(xmin(j) + 2,j) * yfac12(j) + hxx(xmin(j) + 2,j) * yfac22(j) + + cx(j) = cell_x1(jj) + xmin(j) + cy(j) = cell_y1(jj) + ymin(j) + + END DO + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + ic(j) = cx(j) + (cy(j) - 1) * nx + + END DO + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + jxh(1,ic(j)) = jxh(1,ic(j)) - fjx(j) * wx(1,j) + jxh(2,ic(j)) = jxh(2,ic(j)) - fjx(j) * (wx(1,j) + wx(2,j)) + jxh(3,ic(j)) = jxh(3,ic(j)) - fjx(j) * (wx(1,j) + wx(2,j) + wx(3,j)) + jxh(4,ic(j)) = jxh(4,ic(j)) - fjx(j) * wx(4,j) + jxh(5,ic(j)) = jxh(5,ic(j)) - fjx(j) * (wx(4,j) + wx(5,j)) + jxh(6,ic(j)) = jxh(6,ic(j)) - fjx(j) * (wx(4,j) + wx(5,j) + wx(6,j)) + jxh(7,ic(j)) = jxh(7,ic(j)) - fjx(j) * wx(7,j) + jxh(8,ic(j)) = jxh(8,ic(j)) - fjx(j) * (wx(7,j) + wx(8,j)) + jxh(9,ic(j)) = jxh(9,ic(j)) - fjx(j) * (wx(7,j) + wx(8,j) + wx(9,j)) + + jyh(1,ic(j)) = jyh(1,ic(j)) - fjy(j) * wy(1,j) + jyh(2,ic(j)) = jyh(2,ic(j)) - fjy(j) * wy(2,j) + jyh(3,ic(j)) = jyh(3,ic(j)) - fjy(j) * wy(3,j) + jyh(4,ic(j)) = jyh(4,ic(j)) - fjy(j) * (wy(1,j) + wy(4,j)) + jyh(5,ic(j)) = jyh(5,ic(j)) - fjy(j) * (wy(2,j) + wy(5,j)) + jyh(6,ic(j)) = jyh(6,ic(j)) - fjy(j) * (wy(3,j) + wy(6,j)) + jyh(7,ic(j)) = jyh(7,ic(j)) - fjy(j) * (wy(1,j) + wy(4,j) + wy(7,j)) + jyh(8,ic(j)) = jyh(8,ic(j)) - fjy(j) * (wy(2,j) + wy(5,j) + wy(8,j)) + jyh(9,ic(j)) = jyh(9,ic(j)) - fjy(j) * (wy(3,j) + wy(6,j) + wy(9,j)) + + jzh(1,ic(j)) = jzh(1,ic(j)) + fjz(j) * wz(1,j) + jzh(2,ic(j)) = jzh(2,ic(j)) + fjz(j) * wz(2,j) + jzh(3,ic(j)) = jzh(3,ic(j)) + fjz(j) * wz(3,j) + jzh(4,ic(j)) = jzh(4,ic(j)) + fjz(j) * wz(4,j) + jzh(5,ic(j)) = jzh(5,ic(j)) + fjz(j) * wz(5,j) + jzh(6,ic(j)) = jzh(6,ic(j)) + fjz(j) * wz(6,j) + jzh(7,ic(j)) = jzh(7,ic(j)) + fjz(j) * wz(7,j) + jzh(8,ic(j)) = jzh(8,ic(j)) + fjz(j) * wz(8,j) + jzh(9,ic(j)) = jzh(9,ic(j)) + fjz(j) * wz(9,j) + + END DO + + END DO ! End do-loop for index i + + ! Deposit current on the cells + + DO j = 1, ny + DO i = 1, nx + iic = (j - 1) * nx + i + + jx(i,j) = jx(i,j) + jxh(1,iic) + jx(i + 1,j) = jx(i + 1,j) + jxh(2,iic) + jx(i + 2,j) = jx(i + 2,j) + jxh(3,iic) + jx(i,j + 1) = jx(i, j + 1) + jxh(4,iic) + jx(i + 1,j + 1) = jx(i + 1, j + 1) + jxh(5,iic) + jx(i + 2,j + 1) = jx(i + 2, j + 1) + jxh(6,iic) + jx(i,j + 2) = jx(i,j + 2) + jxh(7,iic) + jx(i + 1,j + 2) = jx(i + 1,j + 2) + jxh(8,iic) + jx(i + 2,j + 2) = jx(i + 2,j + 2) + jxh(9,iic) + + jy(i,j) = jy(i,j) + jyh(1,iic) + jy(i + 1,j) = jy(i + 1,j) + jyh(2,iic) + jy(i + 2,j) = jy(i + 2,j) + jyh(3,iic) + jy(i,j + 1) = jy(i, j + 1) + jyh(4,iic) + jy(i + 1,j + 1) = jy(i + 1, j + 1) + jyh(5,iic) + jy(i + 2,j + 1) = jy(i + 2, j + 1) + jyh(6,iic) + jy(i,j + 2) = jy(i,j + 2) + jyh(7,iic) + jy(i + 1,j + 2) = jy(i + 1,j + 2) + jyh(8,iic) + jy(i + 2,j + 2) = jy(i + 2,j + 2) + jyh(9,iic) + + jz(i,j) = jz(i,j) + jzh(1,iic) + jz(i + 1,j) = jz(i + 1,j) + jzh(2,iic) + jz(i + 2,j) = jz(i + 2,j) + jzh(3,iic) + jz(i,j + 1) = jz(i, j + 1) + jzh(4,iic) + jz(i + 1,j + 1) = jz(i + 1, j + 1) + jzh(5,iic) + jz(i + 2,j + 1) = jz(i + 2, j + 1) + jzh(6,iic) + jz(i,j + 2) = jz(i,j + 2) + jzh(7,iic) + jz(i + 1,j + 2) = jz(i + 1,j + 2) + jzh(8,iic) + jz(i + 2,j + 2) = jz(i + 2,j + 2) + jzh(9,iic) + + END DO + END DO + +END SUBROUTINE triangle_current_deposition From fd3f3884118f6dec8e328e5e19481171dab5e732 Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Fri, 5 Mar 2021 12:04:12 +0100 Subject: [PATCH 041/106] Added modified f0 --- epoch2d/src/current_deposition.F90 | 33 ++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/epoch2d/src/current_deposition.F90 b/epoch2d/src/current_deposition.F90 index 2e105e620..d8cacfd2b 100644 --- a/epoch2d/src/current_deposition.F90 +++ b/epoch2d/src/current_deposition.F90 @@ -734,6 +734,39 @@ SUBROUTINE triangle_current_deposition() END SUBROUTINE triangle_current_deposition + FUNCTION f0(ispecies, mass, px, py, pz) + + INTEGER, INTENT(IN) :: ispecies + REAL(num), INTENT(IN) :: mass + REAL(num), INTENT(IN) :: px, py, pz + REAL(num) :: f0 + REAL(num) :: Tx, Ty, Tz, driftx, drifty, driftz, density + REAL(num) :: f0_exponent, norm, two_kb_mass, two_pi_kb_mass3 + TYPE(particle_species), POINTER :: species + + species => species_list(ispecies) + + IF (ABS(species%initial_conditions%density_back) > c_tiny) THEN + two_kb_mass = 2.0_num * kb * mass + two_pi_kb_mass3 = (pi * two_kb_mass)**3 + + Tx = species%initial_conditions%temp_back(1) + Ty = species%initial_conditions%temp_back(2) + Tz = species%initial_conditions%temp_back(3) + driftx = species%initial_conditions%drift_back(1) + drifty = species%initial_conditions%drift_back(2) + driftz = species%initial_conditions%drift_back(3) + density = species%initial_conditions%density_back + f0_exponent = ((px - driftx)**2 / Tx & + + (py - drifty)**2 / Ty & + + (pz - driftz)**2 / Tz) / two_kb_mass + norm = density / SQRT(two_pi_kb_mass3 * Tx * Ty * Tz) + f0 = norm * EXP(-f0_exponent) + ELSE + f0 = 0.0_num + END IF + + END FUNCTION f0 From a5fb3782a1fdc32b652679b522b0882e8904c7ec Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Fri, 5 Mar 2021 12:08:46 +0100 Subject: [PATCH 042/106] Minor typo --- epoch2d/src/current_deposition.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/epoch2d/src/current_deposition.F90 b/epoch2d/src/current_deposition.F90 index d8cacfd2b..34a810f03 100644 --- a/epoch2d/src/current_deposition.F90 +++ b/epoch2d/src/current_deposition.F90 @@ -43,7 +43,7 @@ SUBROUTINE particle_sorting() DO ipart = 1, species_list(ispecies)%attached_list%count next => current%next k = tile_id(ipart) - g_índx(k) = g_indx(k) + 1 ! Rearranged particle index with respect to the bins + g_indx(k) = g_indx(k) + 1 ! Rearranged particle index with respect to the bins #ifndef PER_SPECIES_WEIGHT w(g_indx(k)) = current%weight From 3c1cdb8f0f4188fdef55dcc3bd6ffa8a6473535d Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Wed, 10 Mar 2021 13:42:54 +0100 Subject: [PATCH 043/106] First attempt to current vectorization --- epoch2d/src/particles.F90 | 85 ++++++++++++++++++++++++++++----------- 1 file changed, 61 insertions(+), 24 deletions(-) diff --git a/epoch2d/src/particles.F90 b/epoch2d/src/particles.F90 index 528b58b4f..4eeb563d2 100644 --- a/epoch2d/src/particles.F90 +++ b/epoch2d/src/particles.F90 @@ -76,7 +76,12 @@ SUBROUTINE push_particles ! Used by J update INTEGER :: xmin, xmax, ymin, ymax + INTEGER :: i, k REAL(num) :: wx, wy, wz + REAL(num), DIMENSION(-2:2,-2:2) :: wwx, wwy, wwz + REAL(num), DIMENSION(-3:2,-2:2) :: jxi + REAL(num), DIMENSION(-2:2,-3:2) :: jyi + REAL(num), DIMENSION(-2:2,-2:2) :: jzi ! Temporary variables REAL(num) :: idty, idtx, idxy @@ -444,33 +449,65 @@ SUBROUTINE push_particles fjy = fcy * part_q fjz = fcz * part_q * part_vz - jyh = 0.0_num - DO iy = ymin, ymax - cy = cell_y1 + iy - yfac1 = gy(iy) + 0.5_num * hy(iy) - yfac2 = third * hy(iy) + 0.5_num * gy(iy) - - hy_iy = hy(iy) - - jxh = 0.0_num - DO ix = xmin, xmax - cx = cell_x1 + ix - xfac1 = gx(ix) + 0.5_num * hx(ix) +! jyh = 0.0_num +! DO iy = ymin, ymax +! cy = cell_y1 + iy +! yfac1 = gy(iy) + 0.5_num * hy(iy) +! yfac2 = third * hy(iy) + 0.5_num * gy(iy) +! +! hy_iy = hy(iy) +! +! jxh = 0.0_num +! DO ix = xmin, xmax +! cx = cell_x1 + ix +! xfac1 = gx(ix) + 0.5_num * hx(ix) +! +! wx = hx(ix) * yfac1 +! wy = hy_iy * xfac1 +! wz = gx(ix) * yfac1 + hx(ix) * yfac2 +! +! ! This is the bit that actually solves d(rho)/dt = -div(J) +! jxh = jxh - fjx * wx +! jyh(ix) = jyh(ix) - fjy * wy +! jzh = fjz * wz +! +! jx(cx, cy) = jx(cx, cy) + jxh +! jy(cx, cy) = jy(cx, cy) + jyh(ix) +! jz(cx, cy) = jz(cx, cy) + jzh +! END DO +! END DO + + DO k = -2,2 + DO i = -2,2 + wwx(i,k)=hx(i)*(gy(k)+0.5*hy(k)) + wwy(i,k)=hy(k)*(gx(i)+0.5*hx(i)) + wwz(i,k)=gx(i)*gy(k) & + +0.5*hx(i)*gy(k) & + +0.5*gx(i)*hy(k) & + +third*hx(i)*hy(k) + END DO + END DO - wx = hx(ix) * yfac1 - wy = hy_iy * xfac1 - wz = gx(ix) * yfac1 + hx(ix) * yfac2 + DO k = -2,2 + DO i = -2,2 + jxi(i,k) = jxi(i-1,k) - fjx * wwx(i,k) + jyi(i,k) = jyi(i,k-1) - fjy * wwy(i,k) + jzi(i,k) = fjz * wwz(i,k) + END DO + END DO - ! This is the bit that actually solves d(rho)/dt = -div(J) - jxh = jxh - fjx * wx - jyh(ix) = jyh(ix) - fjy * wy - jzh = fjz * wz + + DO k = -2,2 + DO i = -2,2 + jx(cell_x1+i,cell_y1+k)=jxi(cell_x1+i,cell_y1+k) & + + jxi(i,k) + jy(cell_x1+i,cell_y1+k)=jyi(cell_x1+i,cell_y1+k) & + + jyi(i,k) + jz(cell_x1+i,cell_y1+k)=jzi(cell_x1+i,cell_y1+k) & + + jzi(i,k) + END DO + END DO - jx(cx, cy) = jx(cx, cy) + jxh - jy(cx, cy) = jy(cx, cy) + jyh(ix) - jz(cx, cy) = jz(cx, cy) + jzh - END DO - END DO #ifdef ZERO_CURRENT_PARTICLES END IF #endif From 2136995cfd0813a245fdc9df2881283035aff52e Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Wed, 10 Mar 2021 14:08:40 +0100 Subject: [PATCH 044/106] Correction in mem. alloc --- epoch2d/src/particles.F90 | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/epoch2d/src/particles.F90 b/epoch2d/src/particles.F90 index 4eeb563d2..78c99c50c 100644 --- a/epoch2d/src/particles.F90 +++ b/epoch2d/src/particles.F90 @@ -477,6 +477,10 @@ SUBROUTINE push_particles ! END DO ! END DO + jxi = 0.0 + jyi = 0.0 + jzi = 0.0 + DO k = -2,2 DO i = -2,2 wwx(i,k)=hx(i)*(gy(k)+0.5*hy(k)) @@ -499,11 +503,11 @@ SUBROUTINE push_particles DO k = -2,2 DO i = -2,2 - jx(cell_x1+i,cell_y1+k)=jxi(cell_x1+i,cell_y1+k) & + jx(cell_x1+i,cell_y1+k)=jx(cell_x1+i,cell_y1+k) & + jxi(i,k) - jy(cell_x1+i,cell_y1+k)=jyi(cell_x1+i,cell_y1+k) & + jy(cell_x1+i,cell_y1+k)=jy(cell_x1+i,cell_y1+k) & + jyi(i,k) - jz(cell_x1+i,cell_y1+k)=jzi(cell_x1+i,cell_y1+k) & + jz(cell_x1+i,cell_y1+k)=jz(cell_x1+i,cell_y1+k) & + jzi(i,k) END DO END DO From fc10d3f4e6ab44cbb57aafe56e9c5193e3435d9e Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Wed, 10 Mar 2021 16:38:23 +0100 Subject: [PATCH 045/106] Adjusting the cell range from static to dynamic --- epoch2d/src/particles.F90 | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/epoch2d/src/particles.F90 b/epoch2d/src/particles.F90 index 78c99c50c..b3c4d87bc 100644 --- a/epoch2d/src/particles.F90 +++ b/epoch2d/src/particles.F90 @@ -477,12 +477,9 @@ SUBROUTINE push_particles ! END DO ! END DO - jxi = 0.0 - jyi = 0.0 - jzi = 0.0 - DO k = -2,2 - DO i = -2,2 + DO k = ymin,ymax + DO i = xmin,xmax wwx(i,k)=hx(i)*(gy(k)+0.5*hy(k)) wwy(i,k)=hy(k)*(gx(i)+0.5*hx(i)) wwz(i,k)=gx(i)*gy(k) & @@ -492,8 +489,8 @@ SUBROUTINE push_particles END DO END DO - DO k = -2,2 - DO i = -2,2 + DO k = ymin,ymax + DO i = xmin,xmax jxi(i,k) = jxi(i-1,k) - fjx * wwx(i,k) jyi(i,k) = jyi(i,k-1) - fjy * wwy(i,k) jzi(i,k) = fjz * wwz(i,k) @@ -501,8 +498,8 @@ SUBROUTINE push_particles END DO - DO k = -2,2 - DO i = -2,2 + DO k = ymin,ymax + DO i = xmin,xmax jx(cell_x1+i,cell_y1+k)=jx(cell_x1+i,cell_y1+k) & + jxi(i,k) jy(cell_x1+i,cell_y1+k)=jy(cell_x1+i,cell_y1+k) & From 28c7a7b80c5355259501a2ee3d58db184cd6a676 Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Wed, 24 Mar 2021 16:43:51 +0100 Subject: [PATCH 046/106] Falling back to basic push_particles --- epoch2d/Makefile | 4 +- epoch2d/src/particles.F90 | 87 +++++++++++---------------------------- 2 files changed, 27 insertions(+), 64 deletions(-) diff --git a/epoch2d/Makefile b/epoch2d/Makefile index 48fd99c3a..c78701aff 100644 --- a/epoch2d/Makefile +++ b/epoch2d/Makefile @@ -50,11 +50,11 @@ endif # Intel # ===== ifeq ($(strip $(COMPILER)),intel) - FFLAGS = -O3 -g -stand f03 + FFLAGS = -O3 -g -stand f03 -qopenmp ifeq ($(strip $(CONS)),1) FLTCONS = -pc64 -fltconsistency endif - #FFLAGS = -O3 -heap-arrays 64 -ipo -xHost # Optimised (B) + #FFLAGS = -O3 -heap-arrays 64 -qopt-report-phase=vec -qopt-report=5 -qopenmp # Optimised (B) #FFLAGS = -O3 -heap-arrays 64 -ipo -xAVX # Optimised (W) ifeq ($(strip $(MODE)),debug) FFLAGS = -O0 -fpe0 -nothreads -traceback -fltconsistency \ diff --git a/epoch2d/src/particles.F90 b/epoch2d/src/particles.F90 index b3c4d87bc..fe14e4c16 100644 --- a/epoch2d/src/particles.F90 +++ b/epoch2d/src/particles.F90 @@ -76,12 +76,7 @@ SUBROUTINE push_particles ! Used by J update INTEGER :: xmin, xmax, ymin, ymax - INTEGER :: i, k REAL(num) :: wx, wy, wz - REAL(num), DIMENSION(-2:2,-2:2) :: wwx, wwy, wwz - REAL(num), DIMENSION(-3:2,-2:2) :: jxi - REAL(num), DIMENSION(-2:2,-3:2) :: jyi - REAL(num), DIMENSION(-2:2,-2:2) :: jzi ! Temporary variables REAL(num) :: idty, idtx, idxy @@ -449,66 +444,33 @@ SUBROUTINE push_particles fjy = fcy * part_q fjz = fcz * part_q * part_vz -! jyh = 0.0_num -! DO iy = ymin, ymax -! cy = cell_y1 + iy -! yfac1 = gy(iy) + 0.5_num * hy(iy) -! yfac2 = third * hy(iy) + 0.5_num * gy(iy) -! -! hy_iy = hy(iy) -! -! jxh = 0.0_num -! DO ix = xmin, xmax -! cx = cell_x1 + ix -! xfac1 = gx(ix) + 0.5_num * hx(ix) -! -! wx = hx(ix) * yfac1 -! wy = hy_iy * xfac1 -! wz = gx(ix) * yfac1 + hx(ix) * yfac2 -! -! ! This is the bit that actually solves d(rho)/dt = -div(J) -! jxh = jxh - fjx * wx -! jyh(ix) = jyh(ix) - fjy * wy -! jzh = fjz * wz -! -! jx(cx, cy) = jx(cx, cy) + jxh -! jy(cx, cy) = jy(cx, cy) + jyh(ix) -! jz(cx, cy) = jz(cx, cy) + jzh -! END DO -! END DO - - - DO k = ymin,ymax - DO i = xmin,xmax - wwx(i,k)=hx(i)*(gy(k)+0.5*hy(k)) - wwy(i,k)=hy(k)*(gx(i)+0.5*hx(i)) - wwz(i,k)=gx(i)*gy(k) & - +0.5*hx(i)*gy(k) & - +0.5*gx(i)*hy(k) & - +third*hx(i)*hy(k) - END DO - END DO + jyh = 0.0_num + DO iy = ymin, ymax + cy = cell_y1 + iy + yfac1 = gy(iy) + 0.5_num * hy(iy) + yfac2 = third * hy(iy) + 0.5_num * gy(iy) - DO k = ymin,ymax - DO i = xmin,xmax - jxi(i,k) = jxi(i-1,k) - fjx * wwx(i,k) - jyi(i,k) = jyi(i,k-1) - fjy * wwy(i,k) - jzi(i,k) = fjz * wwz(i,k) - END DO - END DO + hy_iy = hy(iy) - - DO k = ymin,ymax - DO i = xmin,xmax - jx(cell_x1+i,cell_y1+k)=jx(cell_x1+i,cell_y1+k) & - + jxi(i,k) - jy(cell_x1+i,cell_y1+k)=jy(cell_x1+i,cell_y1+k) & - + jyi(i,k) - jz(cell_x1+i,cell_y1+k)=jz(cell_x1+i,cell_y1+k) & - + jzi(i,k) - END DO - END DO + jxh = 0.0_num + DO ix = xmin, xmax + cx = cell_x1 + ix + xfac1 = gx(ix) + 0.5_num * hx(ix) + + wx = hx(ix) * yfac1 + wy = hy_iy * xfac1 + wz = gx(ix) * yfac1 + hx(ix) * yfac2 + ! This is the bit that actually solves d(rho)/dt = -div(J) + jxh = jxh - fjx * wx + jyh(ix) = jyh(ix) - fjy * wy + jzh = fjz * wz + + jx(cx, cy) = jx(cx, cy) + jxh + jy(cx, cy) = jy(cx, cy) + jyh(ix) + jz(cx, cy) = jz(cx, cy) + jzh + END DO + END DO #ifdef ZERO_CURRENT_PARTICLES END IF #endif @@ -761,3 +723,4 @@ END SUBROUTINE push_photons #endif END MODULE particles + From 40982f18426f9ad3f467bbb504783b3dafaf3d63 Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Fri, 3 Sep 2021 09:33:38 +0200 Subject: [PATCH 047/106] Update to EPOCH coding style --- epoch2d/src/boundary.F90 | 6 ++++++ epoch2d/src/housekeeping/window.F90 | 10 +++++++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/epoch2d/src/boundary.F90 b/epoch2d/src/boundary.F90 index 267684ad7..913e092d9 100644 --- a/epoch2d/src/boundary.F90 +++ b/epoch2d/src/boundary.F90 @@ -314,6 +314,8 @@ SUBROUTINE do_field_mpi_with_lengths(field, ng, nx_local, ny_local) END SUBROUTINE do_field_mpi_with_lengths + + SUBROUTINE all_comp_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & ny_local) @@ -486,6 +488,8 @@ SUBROUTINE all_comp_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & END SUBROUTINE all_comp_field_bc + + SUBROUTINE load_field_boundaries_to_buffer(field, buffer, & xmin, xmax, ymin, ymax, offset) @@ -522,6 +526,8 @@ SUBROUTINE unload_field_boundaries_from_buffer(field, buffer, & END SUBROUTINE unload_field_boundaries_from_buffer + + SUBROUTINE do_field_mpi_with_lengths_r4(field, ng, nx_local, ny_local) INTEGER, INTENT(IN) :: ng diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index 9a198257c..a438ffd16 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -92,6 +92,7 @@ SUBROUTINE shift_window(window_shift_cells) END SUBROUTINE shift_window + SUBROUTINE shift_fields(window_shift_cells) INTEGER :: j @@ -183,6 +184,7 @@ SUBROUTINE shift_fields(window_shift_cells) END SUBROUTINE shift_fields + SUBROUTINE shift_field(field, ng, window_shift_cells) INTEGER, INTENT(IN) :: ng, window_shift_cells @@ -192,7 +194,7 @@ SUBROUTINE shift_field(field, ng, window_shift_cells) ! Shift field to the left by window_shift_cells DO j = 1-ng, ny+ng DO i = 1-ng, nx+ng-window_shift_cells - field(i,j) = field(i+window_shift_cells, j) + field(i,j) = field(i+window_shift_cells,j) END DO END DO !CALL field_bc(field, ng) @@ -200,6 +202,7 @@ SUBROUTINE shift_field(field, ng, window_shift_cells) END SUBROUTINE shift_field + SUBROUTINE moving_window_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & ny_local) @@ -434,8 +437,9 @@ SUBROUTINE remove_particles END SUBROUTINE remove_particles #endif + + SUBROUTINE mw_io_test(step, dump) -! USE diagnostics USE deck_io_block INTEGER, INTENT(IN) :: step @@ -493,8 +497,8 @@ SUBROUTINE mw_io_test(step, dump) END SUBROUTINE mw_io_test + SUBROUTINE moving_window(step) -! USE diagnostics USE deck_io_block #ifndef PER_SPECIES_WEIGHT From dbd5164dee02d9f26afc36efca1442d98112321c Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Fri, 3 Sep 2021 09:35:31 +0200 Subject: [PATCH 048/106] Removed dummy routine --- epoch2d/src/current_deposition.F90 | 783 ----------------------------- 1 file changed, 783 deletions(-) delete mode 100644 epoch2d/src/current_deposition.F90 diff --git a/epoch2d/src/current_deposition.F90 b/epoch2d/src/current_deposition.F90 deleted file mode 100644 index 34a810f03..000000000 --- a/epoch2d/src/current_deposition.F90 +++ /dev/null @@ -1,783 +0,0 @@ -SUBROUTINE particle_sorting() - - dx_bin = 5 * dx ! Number of cells in bin along x - dy_bin = 5 * dy ! Number of cells in bin along y - - idx_bin = 1./dx_bin ! Inverse of dx_bin - idy_bin = 1./dy_bin ! Inverse of dy_bin - - nx_bin = ceiling((x_grid_max_local - x_grid_min_local) * idx_bin) ! Number of bins along x - ny_bin = ceiling((y_grid_max_local - y_grid_min_local) * idy_bin) ! Number of bins along y - - n_bins = nx_bin * ny_bin ! Total number of bins - - ! Calculate particle positions in terms of the bin co-ordinates - - DO ipart = 1, species_list(ispecies)%attached_list%count - next => current%next - part_x = (current%part_pos(1) - x_grid_min_local) * idx_bin - part_y = (current%part_pos(2) - y_grid_min_local) * idy_bin - - ix = floor(part_x) ! x-coordinate of the particle bin - iy = floor(part_y) ! y-coordinate of the particle bin - - tile_id(ipart) = iy * nx_bin + ix + 1 ! 1-D coordinate of the bins - - num(tile_id(ipart)) = num(tile_id(ipart)) + 1 ! Number of particles in each bin - - current => next - - END DO ! End do-loop for particle position in terms of bin co-ordinates - - k = 0 - - ! Determine the stride of particle indices in bins - - DO i = 1, n_bins - g_indx(i) = k ! Starting particle index for a particular bin - k = k + num(i) - END DO ! End do-loop for the stride of particle indices in bins - - ! Particle sorting in 1-D bins - - DO ipart = 1, species_list(ispecies)%attached_list%count - next => current%next - k = tile_id(ipart) - g_indx(k) = g_indx(k) + 1 ! Rearranged particle index with respect to the bins - -#ifndef PER_SPECIES_WEIGHT - w(g_indx(k)) = current%weight - fcx(g_indx(k)) = idty * w(g_indx(k)) - fcy(g_indx(k)) = idtx * w(g_indx(k)) - fcz(g_indx(k)) = idtxy * w(g_indx(k)) -#endif - -#ifndef NO_PARTICLE_PROBES - init_x(g_indx(k)) = current%part_pos(1) - init_y(g_indx(k)) = current%part_pos(2) -#endif - -#ifdef PER_PARTICLE_CHARGE_MASS - q(g_indx(k)) = current%charge - m(g_indx(k)) = current%mass - mc(g_indx(k)) = c * current%mass - i_mc(g_indx(k)) = 1.0_num / mc - cmratio(g_indx(k)) = q(g_indx(k)) * dtfac * i_mc(g_indx(k)) - ccmratio(g_indx(k)) = c * cmratio(g_indx(k)) -#ifndef NO_PARTICLE_PROBES - mc2(g_indx(k)) = c * mc(g_indx(k)) -#endif -#endif - - ! Copy the particle properties out for sorting - x(g_indx(k)) = current%part_pos(1) - x_grid_min_local - y(g_indx(k)) = current%part_pos(2) - y_grid_min_local - px(g_indx(k)) = current%part_p(1) * ipart_mc - py(g_indx(k)) = current%part_p(2) * ipart_mc - pz(g_indx(k)) = current%part_p(3) * ipart_mc - pvol(g_indx(k)) = current%pvol - gamma_rel(g_indx(k)) = SQRT(px(g_indx(k))**2 + py(g_indx(k))**2 + pz(g_indx(k))**2 + 1.0_num) - root(g_indx(k)) = dtco2 / gamma_rel(g_indx(k)) - current => next - END DO ! End do-loop for particle sorting - -END SUBROUTINE particle_sorting - -SUBROUTINE particle_pusher() - - DO i = 1, species_list(ispecies)%attached_list%count, LVEC - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - jj = i + j - 1 - x(jj) = x(jj) + px(jj) * root(jj) - y(jj) = y(jj) + py(jj) * root(jj) - -#ifdef WORK_DONE_INTEGRATED - ! This is the actual total work done by the fields: Results correspond - ! with the electron's gamma factor - - root(jj) = cmratio(jj) / gamma_rel(jj) - - tmp_x(j) = px(jj) * root(jj) - tmp_y(j) = py(jj) * root(jj) - tmp_z(j) = pz(jj) * root(jj) -#endif - - END DO ! End do-loop for j - - ! Calculate fields at particle positions - ! Grid cell position as a fraction - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - jj = i + j - 1 - cell_x_r(j) = x(jj) * idx - cell_y_r(j) = y(jj) * idy - - END DO ! End do-loop for grid cell position as fraction - - ! Round cell position to nearest cell - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - jj = i + j - 1 - cell_x1(jj) = FLOOR(cell_x_r(j) + 0.5_num) - cell_y1(jj) = FLOOR(cell_y_r(j) + 0.5_num) - - cell_x2(j) = FLOOR(cell_x_r(j)) - cell_y2(j) = FLOOR(cell_y_r(j)) - - END DO ! End do-loop for nearest cell position - - ! Calculate fraction of cell between nearest cell boundary and particle - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - jj = i + j - 1 - cell_frac_x(j) = REAL(cell_x1(jj), num) - cell_x_r(j) - cell_frac_y(j) = REAL(cell_y1(jj), num) - cell_y_r(j) - - END DO ! End do-loop for grid cell position fraction - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - cfx2(j) = cell_frac_x(j)**2 - cfy2(j) = cell_frac_y(j)**2 - - END DO - - DO j = 1, 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - jj = i + j - 1 - cell_x1(jj) = cell_x1(jj) + 1 - cell_y1(jj) = cell_y1(jj) + 1 - - ! Particle weight factors as described in Page 25 of the PSC manual - ! These weight grid properties onto particles - ! Also used to weight particle properties onto grid, used later to calculate J - ! NOTE: These weights require an additional multiplication factor - - ! This weighing is for triangle shaped particles - - gxx(-1,j) = 0.25_num + cfx2(j) + cell_frac_x(j) - gxx( 0,j) = 1.5_num - 2.0_num * cfx2(j) - gxx( 1,j) = 0.25_num + cfx2(j) - cell_frac_x(j) - - gyy(-1,j) = 0.25_num - cfy2(j) + cell_frac_y(j) - gyy( 0,j) = 1.5_num - 2.0_num * cfy2(j) - gyy( 1,j) = 0.25_num + cfy2(j) - cell_frac_y(j) - - ! Now redo shifted by half a cell due to grid stagger - ! Use shifted version for ex in X, ey in Y, ez in Z - ! And in Y&Z for bx, X&Z for by, X&Y for bz - - END DO ! End do-loop with gxx - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - cell_frac_x(j) = REAL(cell_x2(j), num) - cell_x_r(j) + 0.5_num - cell_frac_y(j) = REAL(cell_y2(j), num) - cell_y_r(j) + 0.5_num - - cell_x2(j) = cell_x2(j) + 1 - cell_y2(j) = cell_y2(j) + 1 - - END DO ! End do-loop for re-doing cell_frac_(x,y) - - dcellx = 0 - dcelly = 0 - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - cfx2(j) = cell_frac_x(j)**2 - cfy2(j) = cell_frac_y(j)**2 - - END DO - - ! Calculating hxx - ! NOTE: These weights require an additional multiplication factor - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - hxx(dcellx(j)-1,j) = 0.25_num - cfx2(j) + cell_frac_x(j) - hxx(dcellx(j) ,j) = 1.5_num - 2.0_num * cfx2(j) - hxx(dcellx(j)+1,j) = 0.25_num + cfx2(j) - cell_frac_x(j) - - hyy(dcelly(j)-1,j) = 0.25_num + cfy2(j) + cell_frac_y(j) - hyy(dcelly(j) ,j) = 1.5_num - 2.0_num * cfy2(j) - hyy(dcelly(j)+1,j) = 0.25_num + cfy2(j) - cell_frac_y(j) - - END DO ! End do-loop for hxx - - ! These are the electric and magnetic fields interpolated to the - ! particle position. They have been checked and are correct. - ! Actually checking this is messy - - ! Calculate e-fields at particle position for triangle particles - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - ex_part(j) = & - gyy(-1,j) * (hxx(-1,j) * ex(cell_x2(j)-1,cell_y1(j)-1) & - + hxx( 0,j) * ex(cell_x2(j) ,cell_y1(j)-1) & - + hxx( 1,j) * ex(cell_x2(j)+1,cell_y1(j)-1)) & - + gyy( 0,j) * (hxx(-1,j) * ex(cell_x2(j)-1,cell_y1(j) ) & - + hxx( 0,j) * ex(cell_x2(j) ,cell_y1(j) ) & - + hxx( 1,j) * ex(cell_x2(j)+1,cell_y1(j) )) & - + gyy( 1,j) * (hxx(-1,j) * ex(cell_x2(j)-1,cell_y1(j)+1) & - + hxx( 0,j) * ex(cell_x2(j) ,cell_y1(j)+1) & - + hxx( 1,j) * ex(cell_x2(j)+1,cell_y1(j)+1)) - - ey_part(j) = & - hyy(-1,j) * (gxx(-1,j) * ey(cell_x1(j)-1,cell_y2(j)-1) & - + gxx( 0,j) * ey(cell_x1(j) ,cell_y2(j)-1) & - + gxx( 1,j) * ey(cell_x1(j)+1,cell_y2(j)-1)) & - + hyy( 0,j) * (gxx(-1,j) * ey(cell_x1(j)-1,cell_y2(j) ) & - + gxx( 0,j) * ey(cell_x1(j) ,cell_y2(j) ) & - + gxx( 1,j) * ey(cell_x1(j)+1,cell_y2(j) )) & - + hyy( 1,j) * (gxx(-1,j) * ey(cell_x1(j)-1,cell_y2(j)+1) & - + gxx( 0,j) * ey(cell_x1(j) ,cell_y2(j)+1) & - + gxx( 1,j) * ey(cell_x1(j)+1,cell_y2(j)+1)) - - ez_part(j) = & - gyy(-1,j) * (gxx(-1,j) * ez(cell_x1(j)-1,cell_y1(j)-1) & - + gxx( 0,j) * ez(cell_x1(j) ,cell_y1(j)-1) & - + gxx( 1,j) * ez(cell_x1(j)+1,cell_y1(j)-1)) & - + gyy( 0,j) * (gxx(-1,j) * ez(cell_x1(j)-1,cell_y1(j) ) & - + gxx( 0,j) * ez(cell_x1(j) ,cell_y1(j) ) & - + gxx( 1,j) * ez(cell_x1(j)+1,cell_y1(j) )) & - + gyy( 1,j) * (gxx(-1,j) * ez(cell_x1(j)-1,cell_y1(j)+1) & - + gxx( 0,j) * ez(cell_x1(j) ,cell_y1(j)+1) & - + gxx( 1,j) * ez(cell_x1(j)+1,cell_y1(j)+1)) - - END DO ! End do-loop for e-fields at particle position - - ! Calculate b-fields at particle position for triangle particles - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - bx_part(j) = & - hyy(-1,j) * (gxx(-1,j) * bx(cell_x1(j)-1,cell_y2(j)-1) & - + gxx( 0,j) * bx(cell_x1(j) ,cell_y2(j)-1) & - + gxx( 1,j) * bx(cell_x1(j)+1,cell_y2(j)-1)) & - + hyy( 0,j) * (gxx(-1,j) * bx(cell_x1(j)-1,cell_y2(j) ) & - + gxx( 0,j) * bx(cell_x1(j) ,cell_y2(j) ) & - + gxx( 1,j) * bx(cell_x1(j)+1,cell_y2(j) )) & - + hyy( 1,j) * (gxx(-1,j) * bx(cell_x1(j)-1,cell_y2(j)+1) & - + gxx( 0,j) * bx(cell_x1(j) ,cell_y2(j)+1) & - + gxx( 1,j) * bx(cell_x1(j)+1,cell_y2(j)+1)) - - by_part(j) = & - gyy(-1,j) * (hxx(-1,j) * by(cell_x2(j)-1,cell_y1(j)-1) & - + hxx( 0,j) * by(cell_x2(j) ,cell_y1(j)-1) & - + hxx( 1,j) * by(cell_x2(j)+1,cell_y1(j)-1)) & - + gyy( 0,j) * (hxx(-1,j) * by(cell_x2(j)-1,cell_y1(j) ) & - + hxx( 0,j) * by(cell_x2(j) ,cell_y1(j) ) & - + hxx( 1,j) * by(cell_x2(j)+1,cell_y1(j) )) & - + gyy( 1,j) * (hxx(-1,j) * by(cell_x2(j)-1,cell_y1(j)+1) & - + hxx( 0,j) * by(cell_x2(j) ,cell_y1(j)+1) & - + hxx( 1,j) * by(cell_x2(j)+1,cell_y1(j)+1)) - - bz_part(j) = & - hyy(-1,j) * (hxx(-1,j) * bz(cell_x2(j)-1,cell_y2(j)-1) & - + hxx( 0,j) * bz(cell_x2(j) ,cell_y2(j)-1) & - + hxx( 1,j) * bz(cell_x2(j)+1,cell_y2(j)-1)) & - + gyy( 0,j) * (hxx(-1,j) * bz(cell_x2(j)-1,cell_y2(j) ) & - + hxx( 0,j) * bz(cell_x2(j) ,cell_y2(j) ) & - + hxx( 1,j) * bz(cell_x2(j)+1,cell_y2(j) )) & - + gyy( 1,j) * (hxx(-1,j) * bz(cell_x2(j)-1,cell_y2(j)+1) & - + hxx( 0,j) * bz(cell_x2(j) ,cell_y2(j)+1) & - + hxx( 1,j) * bz(cell_x2(j)+1,cell_y2(j)+1)) - - END DO ! End do-loop for b-fields at particle position - - - ! Update particle momenta using weighted fields - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - jj = i + j -1 - - uxm(j) = px(jj) + cmratio(jj) * ex_part(j) - uym(j) = py(jj) + cmratio(jj) * ey_part(j) - uzm(j) = pz(jj) + cmratio(jj) * ez_part(j) - - END DO - -#ifdef HC_PUSH - - ! Half timestep, then use Higuera-Cary push - ! See https://aip.scitation.org/doi/10.1063/1.4979989 - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - jj = i + j - 1 - gamma_rel(jj) = uxm(j)**2 + uym(j)**2 + uzm(j)**2 + 1.0_num - alpha(j) = 0.5_num * q(jj) * dt / m(jj) - - END DO - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - beta_x(j) = alpha(j) * bx_part(j) - beta_y(j) = alpha(j) * by_part(j) - beta_z(j) = alpha(j) * bz_part(j) - - END DO - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - beta2(j) = beta_x(j)**2 + beta_y(j)**2 + beta_z(j)**2 - beta_dot_u(j) = beta_x(j) * uxm(j) + beta_y(j) * uym(j) + beta_z(j) * uzm(j) - - END DO - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - jj = i + j -1 - gamma_rel(jj) = SQRT(0.5_num & - * (gamma_rel(jj) - beta2(j) & - + SQRT((gamma_rel(jj) - beta2(j))**2 & - + 4.0_num * (beta2(j) + beta_dot_u(j)**2)))) - - END DO - -#else - - ! Half timestep, then use Boris1970 rotation, see Birdsall and Langdon - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - jj = i + j - 1 - gamma_rel(jj) = SQRT(uxm(j)**2 + uym(j)**2 + uzm(j)**2) - - END DO - -#endif - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - jj = i + j - 1 - root(jj) = ccmratio(jj) / gamma_rel(jj) - - END DO - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - jj = i + j - 1 - taux(j) = bx_part(j) * root(jj) - tauy(j) = by_part(j) * root(jj) - tauz(j) = bz_part(j) * root(jj) - - END DO - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - taux2(j) = taux(j)**2 - tauy2(j) = taux(j)**2 - tauz2(j) = tauz(j)**2 - - END DO - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - tau(j) = 1.0_num / (1.0_num + taux2(j) + tauy2(j) + tauz2(j)) - - END DO - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - uxp(j) = ((1.0_num + taux2(j) - tauy2(j) - tauz2(j)) * uxm(j) & - + 2.0_num * ((taux(j) * tauy(j) + tauz(j)) * uym(j) & - + (taux(j) * tauz(j) - tauy(j)) * uzm(j))) * tau(j) - uyp(j) = ((1.0_num - taux2(j) + tauy2(j) - tauz2(j)) * uym(j) & - + 2.0_num * ((tauy(j) * tauz(j) + taux(j)) * uzm(j) & - + (tauy(j) * taux(j) - tauz(j)) * uxm(j))) * tau(j) - uzp(j) = ((1.0_num - taux2(j) - tauy2(j) + tauz2(j)) * uzm(j) & - + 2.0_num * ((tauz(j) * taux(j) + tauy(j)) * uxm(j) & - + (tauz(j) * tauy(j) - taux(j)) * uym(j))) * tau(j) - - END DO - - ! Rotation over, go to full timestep - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - jj = i + j - 1 - px(jj) = uxp(j) + cmratio(jj) * ex_part(j) - py(jj) = uyp(j) + cmratio(jj) * ey_part(j) - pz(jj) = uzp(j) + cmratio(jj) * ez_part(j) - - END DO - - ! Calculate particle velocity from particle momentum - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - jj = i + j - 1 - gamma_rel(jj) = SQRT(px(jj)**2 + py(jj)**2 + pz(jj)**2 + 1.0_num) - igamma(jj) = 1.0_num / SQRT(px(jj)**2 + py(jj)**2 + pz(jj)**2 + 1.0_num) - root(jj) = dtco2 / SQRT(px(jj)**2 + py(jj)**2 + pz(jj)**2 + 1.0_num) - - END DO - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - jj = i + j - 1 - delta_x(jj) = px(jj) * root(jj) - delta_y(jj) = py(jj) * root(jj) - vz(jj) = pz(jj) * c * igamma(jj) - - END DO - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - jj = i + j - 1 - x(jj) = x(jj) + delta_x(j) - y(jj) = y(jj) + delta_y(j) - - END DO - - ! Particle has now finished move to end of timestep, so copy back - ! into particle array - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - jj = i + j - 1 - next => current - current%part_pos = (/ x(jj) + x_grid_min_local, & - y(jj) + y_grid_min_local /) - current%part_p = mc(jj) * (/ px(jj), py(jj), pz(jj) /) - - ! Add particle to boundary candidate list - IF (current%part_pos(1) < bnd_x_min & - .OR. current%part_pos(1) > bnd_x_max & - .OR. current%part_pos(2) < bnd_y_min & - .OR. current%part_pos(2) > bnd_y_max ) THEN - ALLOCATE(bnd_part_next) - bnd_part_next%particle => current - bnd_part_last%next => bnd_part_next - bnd_part_last => bnd_part_next - END IF - -#ifdef WORK_DONE_INTEGRATED - ! This is the actual total work done by the fields: Results correspond - ! with the electron's gamma factor - - root(jj) = cmratio(jj) / gamma_rel(jj) - - work_x = ex_part(j) * (tmp_x(j) + px(jj) * root(jj)) - work_y = ex_part(j) * (tmp_y(j) + py(jj) * root(jj)) - work_z = ex_part(j) * (tmp_z(j) + pz(jj) * root(jj)) - - current%work_x = work_x - current%work_y = work_y - current%work_z = work_z - - current%work_x_total = current%work_x_total + work_x - current%work_y_total = current%work_y_total + work_y - current%work_z_total = current%work_z_total + work_z -#endif - -#ifndef NO_PARTICLE_PROBES - final_x(jj) = current%part_pos(1) - final_y(jj) = current%part_pos(2) -#endif - - current => next - - END DO - - - END DO ! End do-loop for i - -END SUBROUTINE particle_pusher - -SUBROUTINE triangle_current_deposition() - - DO i = 1, species_list(ispecies)%attached_list%count, LVEC - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - jj = i + j - 1 - - ! Advance to t + 1.5dt to calculate current. This is detailed in - ! the PSC manual between page 37 and 41. The version coded up looks - ! completely different to that in the manual, but this is equivalent. - ! Use t + 1.5dt so that can update J to t + dt at 2nd order - - x(jj) = x(jj) + delta_x(jj) - y(jj) = y(jj) + delta_y(jj) - - ! Delta-f calculation: subtract background from - ! calculated current. - -#ifdef DELTAF_METHOD - weight_back(j) = pvol(jj) * f0(ispecies, mc(jj) / c, px(jj), py(jj), pz(jj)) - fcx(j) = idty * (weight(jj) - weight_back(j)) - fcy(j) = idtx * (weight(jj) - weight_back(j)) - fcz(j) = idxy * (weight(jj) - weight_back(j)) -#endif - - END DO - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - jj = i + j - 1 - cell_x_r(j) = x(jj) * idx - cell_y_r(j) = y(jj) * idy - - END DO - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - cell_x3(j) = FLOOR(cell_x_r(j) + 0.5_num) - cell_y3(j) = FLOOR(cell_y_r(j) + 0.5_num) - - END DO - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - cell_frac_x(j) = REAL(cell_x3(j), num) - cell_x_r(j) - cell_frac_y(j) = REAL(cell_y3(j), num) - cell_y_r(j) - - cell_x3(j) = cell_x3(j) + 1 - cell_y3(j) = cell_y3(j) + 1 - - END DO - - hxx = 0.0_num - hyy = 0.0_num - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - dcellx(j) = cell_x3(j) - cell_x1(j) - dcelly(j) = cell_y3(j) - cell_y1(j) - - END DO - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - cfx2(j) = cell_frac_x(j)**2 - cfy2(j) = cell_frac_y(j)**2 - - END DO - - ! Calculating hxx - ! NOTE: These weights require an additional multiplication factor - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - hxx(dcellx(j)-1,j) = 0.25_num - cfx2(j) + cell_frac_x(j) - hxx(dcellx(j) ,j) = 1.5_num - 2.0_num * cfx2(j) - hxx(dcellx(j)+1,j) = 0.25_num + cfx2(j) - cell_frac_x(j) - - hyy(dcelly(j)-1,j) = 0.25_num + cfy2(j) + cell_frac_y(j) - hyy(dcelly(j) ,j) = 1.5_num - 2.0_num * cfy2(j) - hyy(dcelly(j)+1,j) = 0.25_num + cfy2(j) - cell_frac_y(j) - - END DO ! End do-loop for hxx - - ! Now change Xi1* to be Xi1*-Xi0*. This makes the representation of - ! the current update much simpler - - hxx = hxx - gxx - hyy = hxx - gyy - - ! Remember that due to CFL condition particle can never cross more - ! than one gridcell in one timestep - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - jj = i + j - 1 - xmin(j) = sf_min + (dcellx(j) - 1) / 2 - ymin(j) = sf_min + (dcelly(j) - 1) / 2 - - fjx(jj) = fcx(jj) * q(jj) - fjy(jj) = fcy(jj) * q(jj) - fjz(jj) = fcz(jj) * q(jj) * vz(jj) - - END DO - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - yfac10(j) = gyy(ymin(j),j) + 0.5_num * hyy(ymin(j),j) - yfac11(j) = gyy(ymin(j) + 1,j) + 0.5_num * hyy(ymin(j) + 1,j) - yfac12(j) = gyy(ymin(j) + 2,j) + 0.5_num * hyy(ymin(j) + 2,j) - - yfac20(j) = third * hyy(ymin(j),j) + 0.5_num * gyy(ymin(j),j) - yfac21(j) = third * hyy(ymin(j) + 1,j) + 0.5_num * gyy(ymin(j) + 1,j) - yfac22(j) = third * hyy(ymin(j) + 2,j) + 0.5_num * gyy(ymin(j) + 2,j) - - xfac10(j) = gxx(xmin(j),j) + 0.5_num * hxx(ymin(j),j) - xfac11(j) = gxx(xmin(j) + 1,j) + 0.5_num * hxx(ymin(j) + 1,j) - xfac12(j) = gxx(xmin(j) + 2,j) + 0.5_num * hxx(ymin(j) + 2,j) - - END DO - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - jj = i + j - 1 - - wx(1,j) = hxx(xmin(j),j) * yfac10(j) - wx(2,j) = hxx(xmin(j) + 1,j) * yfac10(j) - wx(3,j) = hxx(xmin(j) + 2,j) * yfac10(j) - wx(4,j) = hxx(xmin(j),j) * yfac11(j) - wx(5,j) = hxx(xmin(j) + 1,j) * yfac11(j) - wx(6,j) = hxx(xmin(j) + 2,j) * yfac11(j) - wx(7,j) = hxx(xmin(j),j) * yfac12(j) - wx(8,j) = hxx(xmin(j) + 1,j) * yfac12(j) - wx(9,j) = hxx(xmin(j) + 2,j) * yfac12(j) - - wy(1,j) = hyy(ymin(j),j) * xfac10(j) - wy(2,j) = hyy(ymin(j),j) * xfac11(j) - wy(3,j) = hyy(ymin(j),j) * xfac12(j) - wy(4,j) = hyy(ymin(j) + 1,j) * xfac10(j) - wy(5,j) = hyy(ymin(j) + 1,j) * xfac11(j) - wy(6,j) = hyy(ymin(j) + 1,j) * xfac12(j) - wy(7,j) = hyy(ymin(j) + 2,j) * xfac10(j) - wy(8,j) = hyy(ymin(j) + 2,j) * xfac11(j) - wy(9,j) = hyy(ymin(n) + 2,j) * xfac12(j) - - wz(1,j) = gxx(xmin(j)) * yfac10(j) + hxx(xmin(j),j) * yfac20(j) - wz(2,j) = gxx(xmin(j) + 1,j) * yfac10(j) + hxx(xmin(j) + 1,j) * yfac20(j) - wz(3,j) = gxx(xmin(j) + 2,j) * yfac10(j) + hxx(xmin(j) + 2,j) * yfac20(j) - wz(4,j) = gxx(xmin(j),j) * yfac11(j) + hxx(xmin(j),j) * yfac21(j) - wz(5,j) = gxx(xmin(j) + 1,j) * yfac11(j) + hxx(xmin(j) + 1,j) * yfac21(j) - wz(6,j) = gxx(xmin(j) + 2,j) * yfac11(j) + hxx(xmin(j) + 2,j) * yfac21(j) - wz(7,j) = gxx(xmin(j),j) * yfac12(j) + hxx(xmin(j),j) * yfac22(j) - wz(8,j) = gxx(xmin(j) + 1,j) * yfac12(j) + hxx(xmin(j) + 1,j) * yfac22(j) - wz(9,j) = gxx(xmin(j) + 2,j) * yfac12(j) + hxx(xmin(j) + 2,j) * yfac22(j) - - cx(j) = cell_x1(jj) + xmin(j) - cy(j) = cell_y1(jj) + ymin(j) - - END DO - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - ic(j) = cx(j) + (cy(j) - 1) * nx - - END DO - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - jxh(1,ic(j)) = jxh(1,ic(j)) - fjx(j) * wx(1,j) - jxh(2,ic(j)) = jxh(2,ic(j)) - fjx(j) * (wx(1,j) + wx(2,j)) - jxh(3,ic(j)) = jxh(3,ic(j)) - fjx(j) * (wx(1,j) + wx(2,j) + wx(3,j)) - jxh(4,ic(j)) = jxh(4,ic(j)) - fjx(j) * wx(4,j) - jxh(5,ic(j)) = jxh(5,ic(j)) - fjx(j) * (wx(4,j) + wx(5,j)) - jxh(6,ic(j)) = jxh(6,ic(j)) - fjx(j) * (wx(4,j) + wx(5,j) + wx(6,j)) - jxh(7,ic(j)) = jxh(7,ic(j)) - fjx(j) * wx(7,j) - jxh(8,ic(j)) = jxh(8,ic(j)) - fjx(j) * (wx(7,j) + wx(8,j)) - jxh(9,ic(j)) = jxh(9,ic(j)) - fjx(j) * (wx(7,j) + wx(8,j) + wx(9,j)) - - jyh(1,ic(j)) = jyh(1,ic(j)) - fjy(j) * wy(1,j) - jyh(2,ic(j)) = jyh(2,ic(j)) - fjy(j) * wy(2,j) - jyh(3,ic(j)) = jyh(3,ic(j)) - fjy(j) * wy(3,j) - jyh(4,ic(j)) = jyh(4,ic(j)) - fjy(j) * (wy(1,j) + wy(4,j)) - jyh(5,ic(j)) = jyh(5,ic(j)) - fjy(j) * (wy(2,j) + wy(5,j)) - jyh(6,ic(j)) = jyh(6,ic(j)) - fjy(j) * (wy(3,j) + wy(6,j)) - jyh(7,ic(j)) = jyh(7,ic(j)) - fjy(j) * (wy(1,j) + wy(4,j) + wy(7,j)) - jyh(8,ic(j)) = jyh(8,ic(j)) - fjy(j) * (wy(2,j) + wy(5,j) + wy(8,j)) - jyh(9,ic(j)) = jyh(9,ic(j)) - fjy(j) * (wy(3,j) + wy(6,j) + wy(9,j)) - - jzh(1,ic(j)) = jzh(1,ic(j)) + fjz(j) * wz(1,j) - jzh(2,ic(j)) = jzh(2,ic(j)) + fjz(j) * wz(2,j) - jzh(3,ic(j)) = jzh(3,ic(j)) + fjz(j) * wz(3,j) - jzh(4,ic(j)) = jzh(4,ic(j)) + fjz(j) * wz(4,j) - jzh(5,ic(j)) = jzh(5,ic(j)) + fjz(j) * wz(5,j) - jzh(6,ic(j)) = jzh(6,ic(j)) + fjz(j) * wz(6,j) - jzh(7,ic(j)) = jzh(7,ic(j)) + fjz(j) * wz(7,j) - jzh(8,ic(j)) = jzh(8,ic(j)) + fjz(j) * wz(8,j) - jzh(9,ic(j)) = jzh(9,ic(j)) + fjz(j) * wz(9,j) - - END DO - - END DO ! End do-loop for index i - - ! Deposit current on the cells - - DO j = 1, ny - DO i = 1, nx - iic = (j - 1) * nx + i - - jx(i,j) = jx(i,j) + jxh(1,iic) - jx(i + 1,j) = jx(i + 1,j) + jxh(2,iic) - jx(i + 2,j) = jx(i + 2,j) + jxh(3,iic) - jx(i,j + 1) = jx(i, j + 1) + jxh(4,iic) - jx(i + 1,j + 1) = jx(i + 1, j + 1) + jxh(5,iic) - jx(i + 2,j + 1) = jx(i + 2, j + 1) + jxh(6,iic) - jx(i,j + 2) = jx(i,j + 2) + jxh(7,iic) - jx(i + 1,j + 2) = jx(i + 1,j + 2) + jxh(8,iic) - jx(i + 2,j + 2) = jx(i + 2,j + 2) + jxh(9,iic) - - jy(i,j) = jy(i,j) + jyh(1,iic) - jy(i + 1,j) = jy(i + 1,j) + jyh(2,iic) - jy(i + 2,j) = jy(i + 2,j) + jyh(3,iic) - jy(i,j + 1) = jy(i, j + 1) + jyh(4,iic) - jy(i + 1,j + 1) = jy(i + 1, j + 1) + jyh(5,iic) - jy(i + 2,j + 1) = jy(i + 2, j + 1) + jyh(6,iic) - jy(i,j + 2) = jy(i,j + 2) + jyh(7,iic) - jy(i + 1,j + 2) = jy(i + 1,j + 2) + jyh(8,iic) - jy(i + 2,j + 2) = jy(i + 2,j + 2) + jyh(9,iic) - - jz(i,j) = jz(i,j) + jzh(1,iic) - jz(i + 1,j) = jz(i + 1,j) + jzh(2,iic) - jz(i + 2,j) = jz(i + 2,j) + jzh(3,iic) - jz(i,j + 1) = jz(i, j + 1) + jzh(4,iic) - jz(i + 1,j + 1) = jz(i + 1, j + 1) + jzh(5,iic) - jz(i + 2,j + 1) = jz(i + 2, j + 1) + jzh(6,iic) - jz(i,j + 2) = jz(i,j + 2) + jzh(7,iic) - jz(i + 1,j + 2) = jz(i + 1,j + 2) + jzh(8,iic) - jz(i + 2,j + 2) = jz(i + 2,j + 2) + jzh(9,iic) - - END DO - END DO - -END SUBROUTINE triangle_current_deposition - - FUNCTION f0(ispecies, mass, px, py, pz) - - INTEGER, INTENT(IN) :: ispecies - REAL(num), INTENT(IN) :: mass - REAL(num), INTENT(IN) :: px, py, pz - REAL(num) :: f0 - REAL(num) :: Tx, Ty, Tz, driftx, drifty, driftz, density - REAL(num) :: f0_exponent, norm, two_kb_mass, two_pi_kb_mass3 - TYPE(particle_species), POINTER :: species - - species => species_list(ispecies) - - IF (ABS(species%initial_conditions%density_back) > c_tiny) THEN - two_kb_mass = 2.0_num * kb * mass - two_pi_kb_mass3 = (pi * two_kb_mass)**3 - - Tx = species%initial_conditions%temp_back(1) - Ty = species%initial_conditions%temp_back(2) - Tz = species%initial_conditions%temp_back(3) - driftx = species%initial_conditions%drift_back(1) - drifty = species%initial_conditions%drift_back(2) - driftz = species%initial_conditions%drift_back(3) - density = species%initial_conditions%density_back - f0_exponent = ((px - driftx)**2 / Tx & - + (py - drifty)**2 / Ty & - + (pz - driftz)**2 / Tz) / two_kb_mass - norm = density / SQRT(two_pi_kb_mass3 * Tx * Ty * Tz) - f0 = norm * EXP(-f0_exponent) - ELSE - f0 = 0.0_num - END IF - - END FUNCTION f0 - - - - - - - - - - - - - - From 1043e61e986487f7366c52d2346686db962d5a68 Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Fri, 3 Sep 2021 09:55:50 +0200 Subject: [PATCH 049/106] Update epoch3d to EPOCH coding style --- epoch3d/src/boundary.F90 | 6 ++++++ epoch3d/src/housekeeping/window.F90 | 7 ++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/epoch3d/src/boundary.F90 b/epoch3d/src/boundary.F90 index 184ed007b..be286e46f 100644 --- a/epoch3d/src/boundary.F90 +++ b/epoch3d/src/boundary.F90 @@ -467,6 +467,8 @@ SUBROUTINE do_field_mpi_with_lengths(field, ng, nx_local, ny_local, & END SUBROUTINE do_field_mpi_with_lengths + + SUBROUTINE load_field_boundaries_to_buffer(field, buffer, & xmin, xmax, ymin, ymax, zmin, zmax, offset) @@ -487,6 +489,8 @@ SUBROUTINE load_field_boundaries_to_buffer(field, buffer, & END SUBROUTINE load_field_boundaries_to_buffer + + SUBROUTINE unload_field_boundaries_from_buffer(field, buffer, & xmin, xmax, ymin, ymax, zmin, zmax, offset) @@ -507,6 +511,8 @@ SUBROUTINE unload_field_boundaries_from_buffer(field, buffer, & END SUBROUTINE unload_field_boundaries_from_buffer + + SUBROUTINE do_field_mpi_with_lengths_r4(field, ng, nx_local, ny_local, & nz_local) diff --git a/epoch3d/src/housekeeping/window.F90 b/epoch3d/src/housekeeping/window.F90 index 114b3a3f1..a02044e37 100644 --- a/epoch3d/src/housekeeping/window.F90 +++ b/epoch3d/src/housekeeping/window.F90 @@ -224,6 +224,8 @@ SUBROUTINE shift_field(field, ng, window_shift_cells) END SUBROUTINE shift_field + + SUBROUTINE moving_window_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & ny_local, nz_local) @@ -486,8 +488,9 @@ SUBROUTINE remove_particles END SUBROUTINE remove_particles #endif + + SUBROUTINE mw_io_test(step, dump) -! USE diagnostics USE deck_io_block INTEGER, INTENT(IN) :: step @@ -546,9 +549,7 @@ END SUBROUTINE mw_io_test - SUBROUTINE moving_window(step) -! USE diagnostics USE deck_io_block #ifndef PER_SPECIES_WEIGHT From c02dcfdf372507449a3cbffdaaa2ae27ef22d2d7 Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Fri, 3 Sep 2021 10:51:50 +0200 Subject: [PATCH 050/106] Removed Score-P instrumentation --- epoch2d/src/epoch2d.F90 | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/epoch2d/src/epoch2d.F90 b/epoch2d/src/epoch2d.F90 index bb7351dbc..f85cae2ca 100644 --- a/epoch2d/src/epoch2d.F90 +++ b/epoch2d/src/epoch2d.F90 @@ -13,10 +13,6 @@ ! You should have received a copy of the GNU General Public License ! along with this program. If not, see . -#ifdef SCOREP_USER_ENABLE -#include "scorep/SCOREP_User.inc" -#endif - PROGRAM pic ! EPOCH2D is a Birdsall and Langdon type PIC code derived from the PSC @@ -71,13 +67,6 @@ PROGRAM pic CHARACTER(LEN=64) :: timestring REAL(num) :: runtime, dt_store -#ifdef SCOREP_USER_ENABLE - SCOREP_USER_REGION_DEFINE( main_loop ) - INTEGER, PARAMETER :: reg_type = SCOREP_USER_REGION_TYPE_LOOP + & - SCOREP_USER_REGION_TYPE_DYNAMIC -#endif - - step = 0 time = 0.0_num @@ -197,10 +186,6 @@ PROGRAM pic IF (timer_collect) CALL timer_start(c_timer_step) DO -#ifdef SCOREP_USER_ENABLE -SCOREP_USER_REGION_BEGIN( main_loop, "main_loop", reg_type ) -#endif #SCOREP_USER_ENABLE - IF (timer_collect) THEN CALL timer_stop(c_timer_step) CALL timer_reset @@ -270,11 +255,6 @@ PROGRAM pic CALL update_eb_fields_final CALL moving_window(step) - !CALL output_routines(step) - -#ifdef SCOREP_USER_ENABLE -SCOREP_USER_REGION_END(main_loop) -#endif #SCOREP_USER_ENABLE END DO From 617373acbb39890301c0137f5708ce92b84e023d Mon Sep 17 00:00:00 2001 From: "u.sinha@fz-juelich.de" Date: Fri, 8 May 2020 14:31:43 +0200 Subject: [PATCH 051/106] Modified moving window --- epoch2d/src/boundary.F90 | 55 ++++++++++ epoch2d/src/epoch2d.F90 | 20 ++++ epoch2d/src/housekeeping/window.F90 | 149 +++++++++++++++++++++++----- 3 files changed, 199 insertions(+), 25 deletions(-) diff --git a/epoch2d/src/boundary.F90 b/epoch2d/src/boundary.F90 index a055479f7..779f9e753 100644 --- a/epoch2d/src/boundary.F90 +++ b/epoch2d/src/boundary.F90 @@ -314,7 +314,62 @@ SUBROUTINE do_field_mpi_with_lengths(field, ng, nx_local, ny_local) END SUBROUTINE do_field_mpi_with_lengths + SUBROUTINE moving_window_field_bc(fieldx, fieldy, fieldz, & + ng, nx_local, ny_local) + INTEGER, INTENT(IN) :: ng, nx_local, ny_local + REAL(num), DIMENSION(1-ng:, 1-ng:), INTENT(INOUT) :: fieldx, fieldy, fieldz + !INTEGER :: sendrequest1, recvrequest1 + INTEGER :: i, j, n, xlength + INTEGER, DIMENSION(MPI_STATUS_SIZE) :: status + REAL(num), DIMENSION(:), ALLOCATABLE :: x_temp, tempx + + + xlength = 3 * ng * (ny_local + 2 * ng) + 4 + + ALLOCATE(x_temp(xlength)) + ALLOCATE(tempx(xlength)) + + n = 0 + + DO j = 1-ng, ny_local + ng + DO i = 1, ng + x_temp(i + n + (j - 1 + ng) * 3 * ng) = fieldx(i,j) + x_temp(i + n + 1 + (j - 1 + ng) * 3 * ng) = fieldy(i,j) + x_temp(i + n + 2 + (j - 1 + ng) * 3 * ng) = fieldz(i,j) + n = n + 2 + END DO + n = 0 + END DO + + CALL MPI_SENDRECV(x_temp, xlength, MPI_REAL, proc_x_min, & + tag, tempx, xlength, MPI_REAL, proc_x_max, tag, comm, status, errcode) + + !CALL MPI_ISEND(tempx, xlength, MPI_REAL, proc_x_min, tag, comm, & + ! sendrequest1, errcode) + !CALL MPI_IRECV(x_temp, xlength, MPI_REAL, proc_x_max, tag, comm, & + ! recvrequest1, errcode) + + !CALL MPI_WAIT(sendrequest1, status, errcode) + !CALL MPI_WAIT(recvrequest1, status, errcode) + + IF (.NOT. x_max_boundary .OR. bc_field(c_bd_x_max) == c_bc_periodic) THEN + n = 0 + DO j = 1-ng, ny_local + ng + DO i = nx_local + 1, nx_local + ng + fieldx(i, j) = tempx(i - nx_local + n + (j - 1 + ng) * 3 * ng) + fieldy(i, j) = tempx(i - nx_local + n + 1 + (j - 1 + ng) * 3 * ng) + fieldz(i, j) = tempx(i - nx_local + n + 2 + (j - 1 + ng) * 3 * ng) + n = n + 2 + END DO + n = 0 + END DO + END IF + + DEALLOCATE(tempx) + DEALLOCATE(x_temp) + + END SUBROUTINE moving_window_field_bc SUBROUTINE do_field_mpi_with_lengths_r4(field, ng, nx_local, ny_local) diff --git a/epoch2d/src/epoch2d.F90 b/epoch2d/src/epoch2d.F90 index a4f91bbcb..6ccbcb60b 100644 --- a/epoch2d/src/epoch2d.F90 +++ b/epoch2d/src/epoch2d.F90 @@ -13,6 +13,10 @@ ! You should have received a copy of the GNU General Public License ! along with this program. If not, see . +#ifdef SCOREP_USER_ENABLE +#include "scorep/SCOREP_User.inc" +#endif + PROGRAM pic ! EPOCH2D is a Birdsall and Langdon type PIC code derived from the PSC @@ -67,6 +71,13 @@ PROGRAM pic CHARACTER(LEN=64) :: timestring REAL(num) :: runtime, dt_store +#ifdef SCOREP_USER_ENABLE + SCOREP_USER_REGION_DEFINE( main_loop ) + INTEGER, PARAMETER :: reg_type = SCOREP_USER_REGION_TYPE_LOOP + & + SCOREP_USER_REGION_TYPE_DYNAMIC +#endif + + step = 0 time = 0.0_num @@ -186,6 +197,10 @@ PROGRAM pic IF (timer_collect) CALL timer_start(c_timer_step) DO +#ifdef SCOREP_USER_ENABLE +SCOREP_USER_REGION_BEGIN( main_loop, "main_loop", reg_type ) +#endif #SCOREP_USER_ENABLE + IF (timer_collect) THEN CALL timer_stop(c_timer_step) CALL timer_reset @@ -259,6 +274,11 @@ PROGRAM pic CALL update_eb_fields_final CALL moving_window + +#ifdef SCOREP_USER_ENABLE +SCOREP_USER_REGION_END(main_loop) +#endif #SCOREP_USER_ENABLE + END DO IF (rank == 0) runtime = MPI_WTIME() - walltime_started diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index 821d43ff7..51c0ddf10 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -67,32 +67,108 @@ SUBROUTINE shift_window(window_shift_cells) ! Shift the window round one cell at a time. ! Inefficient, but it works - DO iwindow = 1, window_shift_cells - CALL insert_particles + !DO iwindow = 1, window_shift_cells + CALL insert_particles - ! Shift the box around - x_grid_min = x_global(1) + dx - xb_min = xb_global(1) + dx - x_min = xb_min + dx * cpml_thickness + ! Shift the box around + ! x_grid_min = x_global(1) + dx + ! xb_min = xb_global(1) + dx + ! x_min = xb_min + dx * cpml_thickness + + x_grid_min = x_global(1) + ng * dx + xb_min = xb_global(1) + ng * dx + x_min = xb_min + ng * dx * cpml_thickness ! Setup global grid - DO ix = 1-ng, nx_global + ng - x_global(ix) = x_grid_min + (ix - 1) * dx - xb_global(ix) = xb_min + (ix - 1) * dx - END DO - x_grid_max = x_global(nx_global) - x_max = xb_global(nx_global+1) - dx * cpml_thickness + DO ix = 1-ng, nx_global + ng + x_global(ix) = x_grid_min + (ix - 1) * dx + xb_global(ix) = xb_min + (ix - 1) * dx + END DO + x_grid_max = x_global(nx_global) + x_max = xb_global(nx_global+1) - dx * cpml_thickness - CALL setup_grid_x + CALL setup_grid_x - CALL remove_particles + CALL remove_particles - ! Shift fields around - CALL shift_fields - END DO + ! Shift fields around + !CALL shift_fields + CALL moving_window_shift_fields + !END DO END SUBROUTINE shift_window + SUBROUTINE moving_window_shift_fields + + INTEGER :: j, xlength, jxlength + !REAL(num), DIMENSION(:), ALLOCATABLE :: tempex, tempbx, tempjx + + xlength = 3 * ng * (ny + 2 * ng) + jxlength = 3 * jng * (ny + 2 * jng) + + !ALLOCATE(tempex(xlength)) + !ALLOCATE(tempbx(xlength)) + !ALLOCATE(tempjx(jxlength)) + + CALL moving_window_shift_field(ex, ey, ez, ng) + CALL moving_window_field_bc(ex, ey, ez, ng, nx, ny) + CALL moving_window_shift_field(bx, by, bz, ng) + CALL moving_window_field_bc(bx, by, bz, ng, nx, ny) + CALL moving_window_shift_field(jx, jy, jz, jng) + CALL moving_window_field_bc(jx, jy, jz, jng, nx, ny) + + IF (cpml_boundaries) THEN + CALL shift_field(cpml_psi_eyx, ng) + CALL shift_field(cpml_psi_ezx, ng) + CALL shift_field(cpml_psi_byx, ng) + CALL shift_field(cpml_psi_bzx, ng) + + CALL shift_field(cpml_psi_exy, ng) + CALL shift_field(cpml_psi_ezy, ng) + CALL shift_field(cpml_psi_bxy, ng) + CALL shift_field(cpml_psi_bzy, ng) + END IF + + IF (x_max_boundary) THEN + DO j = 1-ng, ny+ng + ! Fix incoming field cell. + ex(nx,j) = ex_x_max(j) + ex(nx+1,j) = ex_x_max(j) + ey(nx+1,j) = ey_x_max(j) + ez(nx+1,j) = ez_x_max(j) + ex(nx-1,j) = 0.5_num * (ex(nx-2,j) + ex(nx,j)) + ey(nx,j) = 0.5_num * (ey(nx-1,j) + ey(nx+1,j)) + ez(nx,j) = 0.5_num * (ez(nx-1,j) + ez(nx+1,j)) + bx(nx+1,j) = bx_x_max(j) + by(nx,j) = by_x_max(j) + bz(nx,j) = bz_x_max(j) + bx(nx,j) = 0.5_num * (bx(nx-1,j) + bx(nx+1,j)) + by(nx-1,j) = 0.5_num * (by(nx-2,j) + by(nx,j)) + bz(nx-1,j) = 0.5_num * (bz(nx-2,j) + bz(nx,j)) + END DO + + IF (cpml_boundaries) THEN + DO j = 1-ng, ny+ng + cpml_psi_eyx(nx:nx+1,j) = cpml_psi_eyx(nx,j) + cpml_psi_ezx(nx:nx+1,j) = cpml_psi_ezx(nx,j) + cpml_psi_byx(nx:nx+1,j) = cpml_psi_byx(nx,j) + cpml_psi_bzx(nx:nx+1,j) = cpml_psi_bzx(nx,j) + + cpml_psi_exy(nx:nx+1,j) = cpml_psi_exy(nx,j) + cpml_psi_ezy(nx:nx+1,j) = cpml_psi_ezy(nx,j) + cpml_psi_bxy(nx:nx+1,j) = cpml_psi_bxy(nx,j) + cpml_psi_bzy(nx:nx+1,j) = cpml_psi_bzy(nx,j) + END DO + + END IF + END IF + + !DEALLOCATE(tempex) + !DEALLOCATE(tempbx) + !DEALLOCATE(tempjx) + + END SUBROUTINE moving_window_shift_fields + SUBROUTINE shift_fields @@ -159,7 +235,6 @@ SUBROUTINE shift_fields END SUBROUTINE shift_fields - SUBROUTINE shift_field(field, ng) INTEGER, INTENT(IN) :: ng @@ -167,16 +242,37 @@ SUBROUTINE shift_field(field, ng) INTEGER :: i, j ! Shift field to the left by one cell + ! Begin changes by U. Sinha + ! Shift field to the left by ng cells DO j = 1-ng, ny+ng - DO i = 1-ng, nx+ng-1 - field(i,j) = field(i+1,j) + !DO i = 1-ng, nx+ng-1 + DO i = 1-ng, nx + !field(i,j) = field(i+1,j) + field(i,j) = field(i+ng, j) END DO END DO - + ! End changes by U. Sinha CALL field_bc(field, ng) END SUBROUTINE shift_field + SUBROUTINE moving_window_shift_field(fieldx, fieldy, fieldz, ng) + + INTEGER :: i, j + INTEGER, INTENT(IN) :: ng + REAL(num), DIMENSION(1-ng:, 1-ng:), INTENT(INOUT) :: fieldx, fieldy, fieldz + + DO j = 1- ng, ny + ng + DO i = 1 - ng, nx + fieldx(i, j) = fieldx(i + ng, j) + fieldy(i, j) = fieldy(i + ng, j) + fieldz(i, j) = fieldz(i + ng, j) + END DO + END DO + + END SUBROUTINE moving_window_shift_field + + SUBROUTINE insert_particles @@ -252,10 +348,12 @@ SUBROUTINE insert_particles wdata = dx * dy / (npart_per_cell + n_frac) - DO ipart = 1, npart_per_cell + n_frac + DO ipart = 1, ng * (npart_per_cell + n_frac) CALL create_particle(current) cell_frac_y = 0.5_num - random() - current%part_pos(1) = x0 + random() * dx + !current%part_pos(1) = x0 + random() * dx + !current%part_pos(2) = y(iy) - cell_frac_y * dy + current%part_pos(1) = x0 + random() * ng * dx current%part_pos(2) = y(iy) - cell_frac_y * dy ! Always use the triangle particle weighting for simplicity @@ -377,9 +475,10 @@ SUBROUTINE moving_window window_shift_fraction = window_shift_fraction + dt * window_v_x / dx window_shift_cells = FLOOR(window_shift_fraction) ! Allow for posibility of having jumped two cells at once - IF (window_shift_cells > 0) THEN + IF (window_shift_cells == ng) THEN window_shift_real = REAL(window_shift_cells, num) - window_offset = window_offset + window_shift_real * dx + !window_offset = window_offset + window_shift_real * dx + window_offset = window_offset + window_shift_real * ng * dx CALL shift_window(window_shift_cells) CALL setup_bc_lists CALL particle_bcs From 481a0cebe4e36658618ec6d093227725b14dceeb Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Fri, 8 May 2020 16:47:19 +0200 Subject: [PATCH 052/106] SendRecv for all field components in one go --- epoch2d/src/boundary.F90 | 184 +++++++++++++++++++- epoch2d/src/housekeeping/current_smooth.F90 | 7 +- 2 files changed, 182 insertions(+), 9 deletions(-) diff --git a/epoch2d/src/boundary.F90 b/epoch2d/src/boundary.F90 index 779f9e753..6bc047ce0 100644 --- a/epoch2d/src/boundary.F90 +++ b/epoch2d/src/boundary.F90 @@ -371,6 +371,175 @@ SUBROUTINE moving_window_field_bc(fieldx, fieldy, fieldz, & END SUBROUTINE moving_window_field_bc + SUBROUTINE all_comp_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & + ny_local) + INTEGER, INTENT(IN) :: ng, nx_local, ny_local + REAL(num), DIMENSION(1-ng:, 1-ng:), INTENT(INOUT) :: fieldx, fieldy, fieldz + REAL(num), DIMENSION(:), ALLOCATABLE :: left_x_temp, right_x_temp + REAL(num), DIMENSION(:), ALLOCATABLE :: temp_left_x, temp_right_x + REAL(num), DIMENSION(:), ALLOCATABLE :: bottom_temp_y, top_temp_y + REAL(num), DIMENSION(:), ALLOCATABLE :: temp_top_y, temp_bottom_y + INTEGER :: i, j, k, n, xlength, ylength + INTEGER, DIMENSION(MPI_STATUS_SIZE) :: status + + xlength = 3 * ng * (ny_local + 2 * ng) + 4 + ylength = 3 * (nx_local + 2 * ng) * ng + 4 + + ALLOCATE(left_x_temp(xlength)) + ALLOCATE(right_x_temp(xlength)) + ALLOCATE(temp_left_x(xlength)) + ALLOCATE(temp_right_x(xlength)) + + + ALLOCATE(bottom_temp_y(ylength)) + ALLOCATE(top_temp_y(ylength)) + ALLOCATE(temp_top_y(ylength)) + ALLOCATE(temp_bottom_y(ylength)) + + n = 0 + + DO j = 1-ng, ny_local + ng + DO i = 1, ng + left_x_temp(i + n + (j - 1 + ng) * 3 * ng) = fieldx(i,j) + left_x_temp(i + n + 1 + (j - 1 + ng) * 3 * ng) = fieldy(i,j) + left_x_temp(i + n + 2 + (j - 1 + ng) * 3 * ng) = fieldz(i,j) + n = n + 2 + END DO + n = 0 + END DO + + n = 0 + + DO j = 1-ng, ny_local + ng + DO i = 1, ng + k = nx_local - ng + i + right_x_temp(i + n + (j - 1 + ng) * 3 * ng) = fieldx(k,j) + right_x_temp(i + n + 1 + (j - 1 + ng) * 3 * ng) = fieldy(k,j) + right_x_temp(i + n + 2 + (j - 1 + ng) * 3 * ng) = fieldz(k,j) + n = n + 2 + END DO + n = 0 + END DO + + CALL MPI_SENDRECV(left_x_temp, xlength, MPI_REAL, proc_x_min, & + tag, temp_right_x, xlength, MPI_REAL, proc_x_max, tag, comm, status, & + errcode ) + CALL MPI_SENDRECV(right_x_temp, xlength, MPI_REAL, proc_x_max, & + tag, temp_left_x, xlength, MPI_REAL, proc_x_min, tag, comm, status, & + errcode) + + IF (.NOT. x_max_boundary .OR. bc_field(c_bd_x_max) == c_bc_periodic) THEN + n = 0 + DO j = 1-ng, ny_local + ng + DO i = nx_local + 1, nx_local + ng + fieldx(i, j) = temp_right_x(i - nx_local + n + (j - 1 + ng) * 3 * ng) + fieldy(i, j) = temp_right_x(i - nx_local + & + n + 1 + (j - 1 + ng) * 3 * ng) + fieldz(i, j) = temp_right_x(i - nx_local + & + n + 2 + (j - 1 + ng) * 3 * ng) + n = n + 2 + END DO + n = 0 + END DO + END IF + + IF (.NOT. x_min_boundary .OR. bc_field(c_bd_x_min) == c_bc_periodic) THEN + n = 0 + DO j = 1-ng, ny_local + ng + DO i = 1-ng, 0 + fieldx(i, j) = temp_left_x(i + ng + n + (j - 1 + ng) * 3 * ng) + fieldy(i, j) = temp_left_x(i + ng + n + 1 + (j - 1 + ng) * 3 * ng) + fieldz(i, j) = temp_left_x(i + ng + n + 2 + (j - 1 + ng) * 3 * ng) + n = n + 2 + END DO + n = 0 + END DO + END IF + + n = 0 + DO j = 1, ng + DO i = 1-ng, nx_local + ng + bottom_temp_y(i + n + (j - 1) * 3 * (nx_local + 2 * ng)) & + = fieldx(i,j) + bottom_temp_y(i + n + 1 + (j - 1) * 3 * (nx_local + 2 * ng)) & + = fieldy(i,j) + bottom_temp_y(i + n + 2 + (j - 1) * 3 * (nx_local + 2 * ng)) & + = fieldy(i,j) + n = n + 2 + END DO + n = 0 + END DO + + n = 0 + DO j = 1, ng + DO i = 1-ng, nx_local + ng + k = ny_local - ng + j + top_temp_y(i + n + (j - 1 + ng) * 3 * (nx_local + 2 * ng)) & + = fieldx(i,k) + top_temp_y(i + n + 1 + (j - 1 + ng) * 3 * (nx_local + 2 * ng)) & + = fieldy(i,k) + top_temp_y(i + n + 2 + (j - 1 + ng) * 3 * (nx_local + 2 * ng)) & + = fieldy(i,k) + n = n + 2 + END DO + n = 0 + END DO + + CALL MPI_SENDRECV(bottom_temp_y, ylength, MPI_REAL, proc_y_min, & + tag, temp_top_y, ylength, MPI_REAL, proc_y_max, tag, comm, status, & + errcode ) + + CALL MPI_SENDRECV(top_temp_y, ylength, MPI_REAL, proc_y_max, & + tag, temp_bottom_y, ylength, MPI_REAL, proc_y_min, tag, comm, status, & + errcode ) + + IF (.NOT. y_max_boundary .OR. bc_field(c_bd_y_max) == c_bc_periodic) THEN + n = 0 + DO j = ny_local+1, ny_local + ng + DO i = 1-ng, nx_local + ng + fieldx(i,j) = & + temp_top_y(i + ng + n + (j - ny_local -1) * 3 * (nx_local + 2 * ng)) + fieldy(i,j) = & + temp_top_y(i + ng + n + 1 + (j - ny_local -1) * 3 * (nx_local + 2 * ng)) + fieldz(i,j) = & + temp_top_y(i + ng + n + 2 + (j - ny_local -1) * 3 * (nx_local + 2 * ng)) + n = n + 2 + END DO + n = 0 + END DO + END IF + + IF (.NOT. y_min_boundary .OR. bc_field(c_bd_y_min) == c_bc_periodic) THEN + n = 0 + DO j = 1-ng, 0 + DO i = 1-ng, nx_local + ng + fieldx(i,j) = & + temp_bottom_y(i + ng + n + (j - 1 + ng) * 3 * (nx_local + 2 * ng)) + fieldy(i,j) = & + temp_bottom_y(i + ng + n + 1 + (j - 1 + ng) * 3 * (nx_local + 2 * ng)) + fieldz(i,j) = & + temp_bottom_y(i + ng + n + 2 + (j - 1 + ng) * 3 * (nx_local + 2 * ng)) + n = n + 2 + END DO + n = 0 + END DO + END IF + + DEALLOCATE(left_x_temp) + DEALLOCATE(right_x_temp) + DEALLOCATE(temp_left_x) + DEALLOCATE(temp_right_x) + + + DEALLOCATE(bottom_temp_y) + DEALLOCATE(top_temp_y) + DEALLOCATE(temp_top_y) + DEALLOCATE(temp_bottom_y) + + END SUBROUTINE all_comp_field_bc + + + SUBROUTINE do_field_mpi_with_lengths_r4(field, ng, nx_local, ny_local) INTEGER, INTENT(IN) :: ng @@ -865,9 +1034,10 @@ SUBROUTINE efield_bcs INTEGER :: i ! These are the MPI boundaries - CALL field_bc(ex, ng) - CALL field_bc(ey, ng) - CALL field_bc(ez, ng) + !CALL field_bc(ex, ng) + !CALL field_bc(ey, ng) + !CALL field_bc(ez, ng) + CALL all_comp_field_bc(ex, ey, ez, ng, nx, ny) ! Perfectly conducting boundaries DO i = c_bd_x_min, c_bd_x_max, c_bd_x_max - c_bd_x_min @@ -916,9 +1086,11 @@ SUBROUTINE bfield_bcs(mpi_only) INTEGER :: i ! These are the MPI boundaries - CALL field_bc(bx, ng) - CALL field_bc(by, ng) - CALL field_bc(bz, ng) + !CALL field_bc(bx, ng) + !CALL field_bc(by, ng) + !CALL field_bc(bz, ng) + CALL all_comp_field_bc(bx, by, bz, ng, nx, ny) + IF (mpi_only) RETURN diff --git a/epoch2d/src/housekeeping/current_smooth.F90 b/epoch2d/src/housekeeping/current_smooth.F90 index 3a8ea7cf4..66054bd54 100644 --- a/epoch2d/src/housekeeping/current_smooth.F90 +++ b/epoch2d/src/housekeeping/current_smooth.F90 @@ -30,9 +30,10 @@ SUBROUTINE current_finish CALL current_bcs - CALL field_bc(jx, jng) - CALL field_bc(jy, jng) - CALL field_bc(jz, jng) + !CALL field_bc(jx, jng) + !CALL field_bc(jy, jng) + !CALL field_bc(jz, jng) + CALL all_comp_field_bc(jx, jy, jz, jng, nx, ny) IF (smooth_currents) CALL smooth_current From 1a13a25a09d060257bc2968208b49f14f6d19198 Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Tue, 26 May 2020 22:33:06 +0200 Subject: [PATCH 053/106] modified all_comp_field_bc() --- epoch2d/src/boundary.F90 | 320 ++++++++++++++++++++++----------------- 1 file changed, 181 insertions(+), 139 deletions(-) diff --git a/epoch2d/src/boundary.F90 b/epoch2d/src/boundary.F90 index 6bc047ce0..5f54ab2ea 100644 --- a/epoch2d/src/boundary.F90 +++ b/epoch2d/src/boundary.F90 @@ -373,168 +373,210 @@ END SUBROUTINE moving_window_field_bc SUBROUTINE all_comp_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & ny_local) - INTEGER, INTENT(IN) :: ng, nx_local, ny_local - REAL(num), DIMENSION(1-ng:, 1-ng:), INTENT(INOUT) :: fieldx, fieldy, fieldz - REAL(num), DIMENSION(:), ALLOCATABLE :: left_x_temp, right_x_temp - REAL(num), DIMENSION(:), ALLOCATABLE :: temp_left_x, temp_right_x - REAL(num), DIMENSION(:), ALLOCATABLE :: bottom_temp_y, top_temp_y - REAL(num), DIMENSION(:), ALLOCATABLE :: temp_top_y, temp_bottom_y - INTEGER :: i, j, k, n, xlength, ylength - INTEGER, DIMENSION(MPI_STATUS_SIZE) :: status - xlength = 3 * ng * (ny_local + 2 * ng) + 4 - ylength = 3 * (nx_local + 2 * ng) * ng + 4 - ALLOCATE(left_x_temp(xlength)) - ALLOCATE(right_x_temp(xlength)) - ALLOCATE(temp_left_x(xlength)) - ALLOCATE(temp_right_x(xlength)) + ! | field_top | + !____________|____________________|____________ + ! | | + ! field_left | | field_right + !____________|____________________|____________ + ! | | + ! | field_bottom | - - ALLOCATE(bottom_temp_y(ylength)) - ALLOCATE(top_temp_y(ylength)) - ALLOCATE(temp_top_y(ylength)) - ALLOCATE(temp_bottom_y(ylength)) + INTEGER, INTENT(IN) :: ng + REAL(num), DIMENSION(1-ng:,1-ng:), INTENT(INOUT) :: fieldx, fieldy, fieldz + INTEGER, INTENT(IN) :: nx_local, ny_local + INTEGER, DIMENSION(c_ndims) :: sizes, subsizes + INTEGER :: basetype, sz, szmax, i, j, k, n + REAL(num), ALLOCATABLE :: field_left(:), field_right(:) + REAL(num), ALLOCATABLE :: field_top(:), field_bottom(:) + REAL(num), ALLOCATABLE :: temp(:) - n = 0 + basetype = mpireal - DO j = 1-ng, ny_local + ng - DO i = 1, ng - left_x_temp(i + n + (j - 1 + ng) * 3 * ng) = fieldx(i,j) - left_x_temp(i + n + 1 + (j - 1 + ng) * 3 * ng) = fieldy(i,j) - left_x_temp(i + n + 2 + (j - 1 + ng) * 3 * ng) = fieldz(i,j) - n = n + 2 + sizes(1) = nx_local + 2 * ng + sizes(2) = ny_local + 2 * ng + starts = 1 + + szmax = 3 * sizes(1) * ng + sz = 3 * sizes(2) * ng + IF (sz > szmax) szmax = sz + + ALLOCATE(temp(szmax)) + + subsizes(1) = ng + subsizes(2) = sizes(2) + + sz = 3 * subsizes(1) * subsizes(2) + + + ALLOCATE(field_left(szmax)) + n = 1 + DO k = 1, 3 + DO j = 1-ng, subsizes(2)-ng + DO i = 1,ng + select case(k) + case(1) + field_left(n) = fieldx(i,j) + case(2) + field_left(n) = fieldy(i,j) + case(3) + field_left(n) = fieldz(i,j) + end select + n = n + 1 + END DO END DO - n = 0 END DO - n = 0 - - DO j = 1-ng, ny_local + ng - DO i = 1, ng - k = nx_local - ng + i - right_x_temp(i + n + (j - 1 + ng) * 3 * ng) = fieldx(k,j) - right_x_temp(i + n + 1 + (j - 1 + ng) * 3 * ng) = fieldy(k,j) - right_x_temp(i + n + 2 + (j - 1 + ng) * 3 * ng) = fieldz(k,j) - n = n + 2 + ALLOCATE(field_right(szmax)) + n = 1 + DO k = 1, 3 + DO j = 1-ng, subsizes(2)-ng + DO i = nx_local-ng+1, nx_local + select case(k) + case(1) + field_right(n) = fieldx(i,j) + case(2) + field_right(n) = fieldy(i,j) + case(3) + field_right(n) = fieldz(i,j) + end select + n = n + 1 + END DO END DO - n = 0 END DO - CALL MPI_SENDRECV(left_x_temp, xlength, MPI_REAL, proc_x_min, & - tag, temp_right_x, xlength, MPI_REAL, proc_x_max, tag, comm, status, & - errcode ) - CALL MPI_SENDRECV(right_x_temp, xlength, MPI_REAL, proc_x_max, & - tag, temp_left_x, xlength, MPI_REAL, proc_x_min, tag, comm, status, & - errcode) + CALL MPI_SENDRECV(field_left, sz, basetype, proc_x_min, & + tag, temp, sz, basetype, proc_x_max, tag, comm, status, errcode) - IF (.NOT. x_max_boundary .OR. bc_field(c_bd_x_max) == c_bc_periodic) THEN - n = 0 - DO j = 1-ng, ny_local + ng - DO i = nx_local + 1, nx_local + ng - fieldx(i, j) = temp_right_x(i - nx_local + n + (j - 1 + ng) * 3 * ng) - fieldy(i, j) = temp_right_x(i - nx_local + & - n + 1 + (j - 1 + ng) * 3 * ng) - fieldz(i, j) = temp_right_x(i - nx_local + & - n + 2 + (j - 1 + ng) * 3 * ng) - n = n + 2 - END DO - n = 0 - END DO - END IF - - IF (.NOT. x_min_boundary .OR. bc_field(c_bd_x_min) == c_bc_periodic) THEN - n = 0 - DO j = 1-ng, ny_local + ng - DO i = 1-ng, 0 - fieldx(i, j) = temp_left_x(i + ng + n + (j - 1 + ng) * 3 * ng) - fieldy(i, j) = temp_left_x(i + ng + n + 1 + (j - 1 + ng) * 3 * ng) - fieldz(i, j) = temp_left_x(i + ng + n + 2 + (j - 1 + ng) * 3 * ng) - n = n + 2 + IF (.NOT. x_max_boundary .OR. bc_field(c_bd_x_max)==c_bc_periodic) THEN + n = 1 + DO k = 1, 3 + DO j = 1-ng, subsizes(2)-ng + DO i = nx_local+1, subsizes(1)+nx_local + select case(k) + case(1) + fieldx(i,j) = temp(n) + case(2) + fieldy(i,j) = temp(n) + case(3) + fieldz(i,j) = temp(n) + end select + n = n + 1 + END DO END DO - n = 0 END DO END IF - - n = 0 - DO j = 1, ng - DO i = 1-ng, nx_local + ng - bottom_temp_y(i + n + (j - 1) * 3 * (nx_local + 2 * ng)) & - = fieldx(i,j) - bottom_temp_y(i + n + 1 + (j - 1) * 3 * (nx_local + 2 * ng)) & - = fieldy(i,j) - bottom_temp_y(i + n + 2 + (j - 1) * 3 * (nx_local + 2 * ng)) & - = fieldy(i,j) - n = n + 2 + + CALL MPI_SENDRECV(field_right, sz, basetype, proc_x_max, & + tag, temp, sz, basetype, proc_x_min, tag, comm, status, errcode) + + IF (.NOT. x_min_boundary .OR. bc_field(c_bd_x_min)==c_bc_periodic) THEN + n = 1 + DO k = 1, 3 + DO j = 1-ng, subsizes(2)-ng + DO i = 1-ng, subsizes(1)-ng + select case(k) + case(1) + fieldx(i,j) = temp(n) + case(2) + fieldy(i,j) = temp(n) + case(3) + fieldz(i,j) = temp(n) + end select + n = n + 1 + END DO + END DO + END DO + END IF + + subsizes(1) = sizes(1) + subsizes(2) = ng + sz = 3 * subsizes(1) * subsizes(2) + + ALLOCATE(field_top(szmax)) + n = 1 + DO k = 1, 3 + DO j = ny_local-subsizes(2)+1, ny_local + DO i = 1-ng, subsizes(1)-ng + select case(k) + case(1) + field_top(n) = fieldx(i,j) + case(2) + field_top(n) = fieldy(i,j) + case(3) + field_top(n) = fieldz(i,j) + end select + n = n + 1 + END DO END DO - n = 0 END DO - n = 0 - DO j = 1, ng - DO i = 1-ng, nx_local + ng - k = ny_local - ng + j - top_temp_y(i + n + (j - 1 + ng) * 3 * (nx_local + 2 * ng)) & - = fieldx(i,k) - top_temp_y(i + n + 1 + (j - 1 + ng) * 3 * (nx_local + 2 * ng)) & - = fieldy(i,k) - top_temp_y(i + n + 2 + (j - 1 + ng) * 3 * (nx_local + 2 * ng)) & - = fieldy(i,k) - n = n + 2 + ALLOCATE(field_bottom(szmax)) + n = 1 + DO k = 1, 3 + DO j = 1, subsizes(2) + DO i = 1-ng, subsizes(1)-ng + select case(k) + case(1) + field_bottom(n) = fieldx(i,j) + case(2) + field_bottom(n) = fieldy(i,j) + case(3) + field_bottom(n) = fieldz(i,j) + end select + n = n + 1 END DO - n = 0 END DO - CALL MPI_SENDRECV(bottom_temp_y, ylength, MPI_REAL, proc_y_min, & - tag, temp_top_y, ylength, MPI_REAL, proc_y_max, tag, comm, status, & - errcode ) + CALL MPI_SENDRECV(field_bottom, sz, basetype, proc_y_min, & + tag, temp, sz, basetype, proc_y_max, tag, comm, status, errcode) - CALL MPI_SENDRECV(top_temp_y, ylength, MPI_REAL, proc_y_max, & - tag, temp_bottom_y, ylength, MPI_REAL, proc_y_min, tag, comm, status, & - errcode ) + IF (.NOT. y_max_boundary .OR. bc_field(c_bd_y_max)==c_bc_periodic) THEN + n = 1 + DO k = 1, 3 + DO j = ny_local+1, subsizes(2)+ny_local + DO i = 1-ng, subsizes(1)-ng + select case(k) + case(1) + fieldx(i,j) = temp(n) + case(2) + fieldy(i,j) = temp(n) + case(3) + fieldz(i,j) = temp(n) + end select + n = n + 1 + END DO + END DO + END DO + END IF - IF (.NOT. y_max_boundary .OR. bc_field(c_bd_y_max) == c_bc_periodic) THEN - n = 0 - DO j = ny_local+1, ny_local + ng - DO i = 1-ng, nx_local + ng - fieldx(i,j) = & - temp_top_y(i + ng + n + (j - ny_local -1) * 3 * (nx_local + 2 * ng)) - fieldy(i,j) = & - temp_top_y(i + ng + n + 1 + (j - ny_local -1) * 3 * (nx_local + 2 * ng)) - fieldz(i,j) = & - temp_top_y(i + ng + n + 2 + (j - ny_local -1) * 3 * (nx_local + 2 * ng)) - n = n + 2 - END DO - n = 0 - END DO - END IF - - IF (.NOT. y_min_boundary .OR. bc_field(c_bd_y_min) == c_bc_periodic) THEN - n = 0 - DO j = 1-ng, 0 - DO i = 1-ng, nx_local + ng - fieldx(i,j) = & - temp_bottom_y(i + ng + n + (j - 1 + ng) * 3 * (nx_local + 2 * ng)) - fieldy(i,j) = & - temp_bottom_y(i + ng + n + 1 + (j - 1 + ng) * 3 * (nx_local + 2 * ng)) - fieldz(i,j) = & - temp_bottom_y(i + ng + n + 2 + (j - 1 + ng) * 3 * (nx_local + 2 * ng)) - n = n + 2 - END DO - n = 0 - END DO - END IF - - DEALLOCATE(left_x_temp) - DEALLOCATE(right_x_temp) - DEALLOCATE(temp_left_x) - DEALLOCATE(temp_right_x) - - - DEALLOCATE(bottom_temp_y) - DEALLOCATE(top_temp_y) - DEALLOCATE(temp_top_y) - DEALLOCATE(temp_bottom_y) + CALL MPI_SENDRECV(field_top, sz, basetype, proc_y_max, & + tag, temp, sz, basetype, proc_y_min, tag, comm, status, errcode) + + IF (.NOT. y_min_boundary .OR. bc_field(c_bd_y_min)==c_bc_periodic) THEN + n = 1 + DO k = 1, 3 + DO j = 1-ng, subsizes(2)-ng + DO i = 1-ng, subsizes(1)-ng + select case(k) + case(1) + fieldx(i,j) = temp(n) + case(2) + fieldy(i,j) = temp(n) + case(3) + fieldz(i,j) = temp(n) + end select + n = n + 1 + END DO + END DO + END IF + + DEALLOCATE(field_left) + DEALLOCATE(field_right) + DEALLOCATE(field_top) + DEALLOCATE(field_bottom) + DEALLOCATE(temp) END SUBROUTINE all_comp_field_bc From a352db68f3ed0a1e128e4aafe1baa91e261954db Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Thu, 28 May 2020 15:12:25 +0200 Subject: [PATCH 054/106] reorganized moving window --- epoch2d/src/boundary.F90 | 57 ------ epoch2d/src/housekeeping/window.F90 | 262 ++++++++++++++-------------- 2 files changed, 129 insertions(+), 190 deletions(-) diff --git a/epoch2d/src/boundary.F90 b/epoch2d/src/boundary.F90 index 5f54ab2ea..6fbaad40b 100644 --- a/epoch2d/src/boundary.F90 +++ b/epoch2d/src/boundary.F90 @@ -314,63 +314,6 @@ SUBROUTINE do_field_mpi_with_lengths(field, ng, nx_local, ny_local) END SUBROUTINE do_field_mpi_with_lengths - SUBROUTINE moving_window_field_bc(fieldx, fieldy, fieldz, & - ng, nx_local, ny_local) - - INTEGER, INTENT(IN) :: ng, nx_local, ny_local - REAL(num), DIMENSION(1-ng:, 1-ng:), INTENT(INOUT) :: fieldx, fieldy, fieldz - !INTEGER :: sendrequest1, recvrequest1 - INTEGER :: i, j, n, xlength - INTEGER, DIMENSION(MPI_STATUS_SIZE) :: status - REAL(num), DIMENSION(:), ALLOCATABLE :: x_temp, tempx - - - xlength = 3 * ng * (ny_local + 2 * ng) + 4 - - ALLOCATE(x_temp(xlength)) - ALLOCATE(tempx(xlength)) - - n = 0 - - DO j = 1-ng, ny_local + ng - DO i = 1, ng - x_temp(i + n + (j - 1 + ng) * 3 * ng) = fieldx(i,j) - x_temp(i + n + 1 + (j - 1 + ng) * 3 * ng) = fieldy(i,j) - x_temp(i + n + 2 + (j - 1 + ng) * 3 * ng) = fieldz(i,j) - n = n + 2 - END DO - n = 0 - END DO - - CALL MPI_SENDRECV(x_temp, xlength, MPI_REAL, proc_x_min, & - tag, tempx, xlength, MPI_REAL, proc_x_max, tag, comm, status, errcode) - - !CALL MPI_ISEND(tempx, xlength, MPI_REAL, proc_x_min, tag, comm, & - ! sendrequest1, errcode) - !CALL MPI_IRECV(x_temp, xlength, MPI_REAL, proc_x_max, tag, comm, & - ! recvrequest1, errcode) - - !CALL MPI_WAIT(sendrequest1, status, errcode) - !CALL MPI_WAIT(recvrequest1, status, errcode) - - IF (.NOT. x_max_boundary .OR. bc_field(c_bd_x_max) == c_bc_periodic) THEN - n = 0 - DO j = 1-ng, ny_local + ng - DO i = nx_local + 1, nx_local + ng - fieldx(i, j) = tempx(i - nx_local + n + (j - 1 + ng) * 3 * ng) - fieldy(i, j) = tempx(i - nx_local + n + 1 + (j - 1 + ng) * 3 * ng) - fieldz(i, j) = tempx(i - nx_local + n + 2 + (j - 1 + ng) * 3 * ng) - n = n + 2 - END DO - n = 0 - END DO - END IF - - DEALLOCATE(tempx) - DEALLOCATE(x_temp) - - END SUBROUTINE moving_window_field_bc - SUBROUTINE all_comp_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & ny_local) diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index 51c0ddf10..6797103ae 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -65,21 +65,14 @@ SUBROUTINE shift_window(window_shift_cells) INTEGER :: iwindow, ix REAL(num) :: xb_min - ! Shift the window round one cell at a time. - ! Inefficient, but it works - !DO iwindow = 1, window_shift_cells - CALL insert_particles + ! Shift the window round # of window shift cells at a time. + CALL insert_particles(window_shift_cells) - ! Shift the box around - ! x_grid_min = x_global(1) + dx - ! xb_min = xb_global(1) + dx - ! x_min = xb_min + dx * cpml_thickness + x_grid_min = x_global(1) + window_shift_cells * dx + xb_min = xb_global(1) + window_shift_cells * dx + x_min = xb_min + window_shift_cells * dx * cpml_thickness - x_grid_min = x_global(1) + ng * dx - xb_min = xb_global(1) + ng * dx - x_min = xb_min + ng * dx * cpml_thickness - - ! Setup global grid + ! Setup global grid DO ix = 1-ng, nx_global + ng x_global(ix) = x_grid_min + (ix - 1) * dx xb_global(ix) = xb_min + (ix - 1) * dx @@ -92,111 +85,63 @@ SUBROUTINE shift_window(window_shift_cells) CALL remove_particles ! Shift fields around - !CALL shift_fields - CALL moving_window_shift_fields - !END DO + CALL shift_fields(window_shift_cells) END SUBROUTINE shift_window - SUBROUTINE moving_window_shift_fields - INTEGER :: j, xlength, jxlength - !REAL(num), DIMENSION(:), ALLOCATABLE :: tempex, tempbx, tempjx + SUBROUTINE shift_fields(window_shift_cells) - xlength = 3 * ng * (ny + 2 * ng) - jxlength = 3 * jng * (ny + 2 * jng) + INTEGER :: j + INTEGER, INTENT(IN) :: window_shift_cells - !ALLOCATE(tempex(xlength)) - !ALLOCATE(tempbx(xlength)) - !ALLOCATE(tempjx(jxlength)) + CALL shift_field(ex, ng, window_shift_cells) + CALL shift_field(ey, ng, window_shift_cells) + CALL shift_field(ez, ng, window_shift_cells) - CALL moving_window_shift_field(ex, ey, ez, ng) CALL moving_window_field_bc(ex, ey, ez, ng, nx, ny) - CALL moving_window_shift_field(bx, by, bz, ng) - CALL moving_window_field_bc(bx, by, bz, ng, nx, ny) - CALL moving_window_shift_field(jx, jy, jz, jng) - CALL moving_window_field_bc(jx, jy, jz, jng, nx, ny) - - IF (cpml_boundaries) THEN - CALL shift_field(cpml_psi_eyx, ng) - CALL shift_field(cpml_psi_ezx, ng) - CALL shift_field(cpml_psi_byx, ng) - CALL shift_field(cpml_psi_bzx, ng) - - CALL shift_field(cpml_psi_exy, ng) - CALL shift_field(cpml_psi_ezy, ng) - CALL shift_field(cpml_psi_bxy, ng) - CALL shift_field(cpml_psi_bzy, ng) - END IF - - IF (x_max_boundary) THEN - DO j = 1-ng, ny+ng - ! Fix incoming field cell. - ex(nx,j) = ex_x_max(j) - ex(nx+1,j) = ex_x_max(j) - ey(nx+1,j) = ey_x_max(j) - ez(nx+1,j) = ez_x_max(j) - ex(nx-1,j) = 0.5_num * (ex(nx-2,j) + ex(nx,j)) - ey(nx,j) = 0.5_num * (ey(nx-1,j) + ey(nx+1,j)) - ez(nx,j) = 0.5_num * (ez(nx-1,j) + ez(nx+1,j)) - bx(nx+1,j) = bx_x_max(j) - by(nx,j) = by_x_max(j) - bz(nx,j) = bz_x_max(j) - bx(nx,j) = 0.5_num * (bx(nx-1,j) + bx(nx+1,j)) - by(nx-1,j) = 0.5_num * (by(nx-2,j) + by(nx,j)) - bz(nx-1,j) = 0.5_num * (bz(nx-2,j) + bz(nx,j)) - END DO - - IF (cpml_boundaries) THEN - DO j = 1-ng, ny+ng - cpml_psi_eyx(nx:nx+1,j) = cpml_psi_eyx(nx,j) - cpml_psi_ezx(nx:nx+1,j) = cpml_psi_ezx(nx,j) - cpml_psi_byx(nx:nx+1,j) = cpml_psi_byx(nx,j) - cpml_psi_bzx(nx:nx+1,j) = cpml_psi_bzx(nx,j) - - cpml_psi_exy(nx:nx+1,j) = cpml_psi_exy(nx,j) - cpml_psi_ezy(nx:nx+1,j) = cpml_psi_ezy(nx,j) - cpml_psi_bxy(nx:nx+1,j) = cpml_psi_bxy(nx,j) - cpml_psi_bzy(nx:nx+1,j) = cpml_psi_bzy(nx,j) - END DO - - END IF - END IF - - !DEALLOCATE(tempex) - !DEALLOCATE(tempbx) - !DEALLOCATE(tempjx) - END SUBROUTINE moving_window_shift_fields + CALL shift_field(bx, ng, window_shift_cells) + CALL shift_field(by, ng, window_shift_cells) + CALL shift_field(bz, ng, window_shift_cells) + CALL moving_window_field_bc(bx, by, bz, ng ,nx, ny) + CALL shift_field(jx, jng, window_shift_cells) + CALL shift_field(jy, jng, window_shift_cells) + CALL shift_field(jz, jng, window_shift_cells) - SUBROUTINE shift_fields - - INTEGER :: j - - CALL shift_field(ex, ng) - CALL shift_field(ey, ng) - CALL shift_field(ez, ng) - - CALL shift_field(bx, ng) - CALL shift_field(by, ng) - CALL shift_field(bz, ng) - - CALL shift_field(jx, jng) - CALL shift_field(jy, jng) - CALL shift_field(jz, jng) + CALL moving_window_field_bc(jx, jy, jz, ng, nx, ny) IF (cpml_boundaries) THEN - CALL shift_field(cpml_psi_eyx, ng) - CALL shift_field(cpml_psi_ezx, ng) - CALL shift_field(cpml_psi_byx, ng) - CALL shift_field(cpml_psi_bzx, ng) - - CALL shift_field(cpml_psi_exy, ng) - CALL shift_field(cpml_psi_ezy, ng) - CALL shift_field(cpml_psi_bxy, ng) + CALL shift_field(cpml_psi_eyx, ng, & + window_shift_cells) + CALL shift_field(cpml_psi_ezx, ng, & + window_shift_cells) + CALL shift_field(cpml_psi_byx, ng, & + window_shift_cells) + + CALL moving_window_field_bc(cpml_psi_eyx, & + cpml_psi_ezx, cpml_psi_byx, & + ng, nx, ny) + + CALL shift_field(cpml_psi_bzx, ng, & + window_shift_cells) + + CALL field_bc(cpml_psi_bzx, ng) + + CALL shift_field(cpml_psi_exy, ng, & + window_shift_cells) + CALL shift_field(cpml_psi_ezy, ng, & + window_shift_cells) + CALL shift_field(cpml_psi_bxy, ng, & + window_shift_cells) + CALL moving_window_field_bc(cpml_psi_exy, & + cpml_psi_ezy, cpml_psi_ezy, & + ng, nx, ny) + CALL shift_field(cpml_psi_bzy, ng) + CALL field_bc(cpml_psi_bzy, ng) END IF IF (x_max_boundary) THEN @@ -235,52 +180,106 @@ SUBROUTINE shift_fields END SUBROUTINE shift_fields - SUBROUTINE shift_field(field, ng) + SUBROUTINE shift_field(field, ng, window_shift_cells) - INTEGER, INTENT(IN) :: ng + INTEGER, INTENT(IN) :: ng, window_shift_cells REAL(num), DIMENSION(1-ng:,1-ng:), INTENT(INOUT) :: field INTEGER :: i, j - ! Shift field to the left by one cell - ! Begin changes by U. Sinha - ! Shift field to the left by ng cells + ! Shift field to the left by window_shift_cells DO j = 1-ng, ny+ng - !DO i = 1-ng, nx+ng-1 - DO i = 1-ng, nx - !field(i,j) = field(i+1,j) - field(i,j) = field(i+ng, j) + DO i = 1-ng, nx+ng-window_shift_cells + field(i,j) = field(i+window_shift_cells, j) END DO END DO - ! End changes by U. Sinha - CALL field_bc(field, ng) + !CALL field_bc(field, ng) END SUBROUTINE shift_field - SUBROUTINE moving_window_shift_field(fieldx, fieldy, fieldz, ng) - INTEGER :: i, j - INTEGER, INTENT(IN) :: ng - REAL(num), DIMENSION(1-ng:, 1-ng:), INTENT(INOUT) :: fieldx, fieldy, fieldz + SUBROUTINE moving_window_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & + ny_local) - DO j = 1- ng, ny + ng - DO i = 1 - ng, nx - fieldx(i, j) = fieldx(i + ng, j) - fieldy(i, j) = fieldy(i + ng, j) - fieldz(i, j) = fieldz(i + ng, j) - END DO + + INTEGER, INTENT(IN) :: ng + REAL(num), DIMENSION(1-ng:,1-ng:), INTENT(INOUT) :: fieldx, fieldy, fieldz + INTEGER, INTENT(IN) :: nx_local, ny_local + INTEGER, DIMENSION(c_ndims) :: sizes, subsizes + INTEGER :: basetype, sz, szmax, i, j, k, n + REAL(num), ALLOCATABLE :: field_left(:) + REAL(num), ALLOCATABLE :: temp(:) + + basetype = mpireal + + sizes(1) = nx_local + 2 * ng + sizes(2) = ny_local + 2 * ng + starts = 1 + + szmax = 3 * sizes(1) * ng + sz = 3 * sizes(2) * ng + IF (sz > szmax) szmax = sz + + ALLOCATE(temp(szmax)) + + subsizes(1) = ng + subsizes(2) = sizes(2) + + sz = 3 * subsizes(1) * subsizes(2) + + + ALLOCATE(field_left(szmax)) + n = 1 + DO k = 1, 3 + DO j = 1-ng, subsizes(2)-ng + DO i = 1,ng + select case(k) + case(1) + field_left(n) = fieldx(i,j) + case(2) + field_left(n) = fieldy(i,j) + case(3) + field_left(n) = fieldz(i,j) + end select + n = n + 1 + END DO + END DO END DO - END SUBROUTINE moving_window_shift_field + CALL MPI_SENDRECV(field_left, sz, basetype, proc_x_min, & + tag, temp, sz, basetype, proc_x_max, tag, comm, status, errcode) + + IF (.NOT. x_max_boundary .OR. bc_field(c_bd_x_max)==c_bc_periodic) THEN + n = 1 + DO k = 1, 3 + DO j = 1-ng, subsizes(2)-ng + DO i = nx_local+1, subsizes(1)+nx_local + select case(k) + case(1) + fieldx(i,j) = temp(n) + case(2) + fieldy(i,j) = temp(n) + case(3) + fieldz(i,j) = temp(n) + end select + n = n + 1 + END DO + END DO + END DO + END IF + DEALLOCATE(field_left) + DEALLOCATE(temp) + END SUBROUTINE moving_window_field_bc - SUBROUTINE insert_particles + SUBROUTINE insert_particles(window_shift_cells) TYPE(particle), POINTER :: current TYPE(particle_list) :: append_list INTEGER :: ispecies, i, iy, isuby, errcode INTEGER(i8) :: ipart, npart_per_cell, n_frac + INTEGER, INTENT(IN) :: window_shift_cells REAL(num) :: cell_frac_y, cy2 REAL(num), DIMENSION(-1:1) :: gy REAL(num), DIMENSION(c_ndirs) :: temp_local, drift_local @@ -348,12 +347,10 @@ SUBROUTINE insert_particles wdata = dx * dy / (npart_per_cell + n_frac) - DO ipart = 1, ng * (npart_per_cell + n_frac) + DO ipart = 1, window_shift_cells * (npart_per_cell + n_frac) CALL create_particle(current) cell_frac_y = 0.5_num - random() - !current%part_pos(1) = x0 + random() * dx - !current%part_pos(2) = y(iy) - cell_frac_y * dy - current%part_pos(1) = x0 + random() * ng * dx + current%part_pos(1) = x0 + random() * window_shift_cells * dx current%part_pos(2) = y(iy) - cell_frac_y * dy ! Always use the triangle particle weighting for simplicity @@ -475,10 +472,9 @@ SUBROUTINE moving_window window_shift_fraction = window_shift_fraction + dt * window_v_x / dx window_shift_cells = FLOOR(window_shift_fraction) ! Allow for posibility of having jumped two cells at once - IF (window_shift_cells == ng) THEN + IF (window_shift_cells > 0) THEN window_shift_real = REAL(window_shift_cells, num) - !window_offset = window_offset + window_shift_real * dx - window_offset = window_offset + window_shift_real * ng * dx + window_offset = window_offset + window_shift_real * dx CALL shift_window(window_shift_cells) CALL setup_bc_lists CALL particle_bcs From 386777e80c8015b843d77ce5bd32b97e54229ffb Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Sun, 7 Jun 2020 19:28:02 +0200 Subject: [PATCH 055/106] re-introduced window_shift_cell > ng - 1 --- epoch2d/src/housekeeping/window.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index 6797103ae..01ed0c831 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -472,7 +472,7 @@ SUBROUTINE moving_window window_shift_fraction = window_shift_fraction + dt * window_v_x / dx window_shift_cells = FLOOR(window_shift_fraction) ! Allow for posibility of having jumped two cells at once - IF (window_shift_cells > 0) THEN + IF (window_shift_cells > ng - 1) THEN window_shift_real = REAL(window_shift_cells, num) window_offset = window_offset + window_shift_real * dx CALL shift_window(window_shift_cells) From 45cb0807656a87518af8197cb514e1ae0a6e377e Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Fri, 5 Jun 2020 10:41:07 +0200 Subject: [PATCH 056/106] fixed bugs in boundary.F90 and window.F90 --- epoch2d/src/boundary.F90 | 3 ++- epoch2d/src/housekeeping/window.F90 | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/epoch2d/src/boundary.F90 b/epoch2d/src/boundary.F90 index 6fbaad40b..c2c04a9e1 100644 --- a/epoch2d/src/boundary.F90 +++ b/epoch2d/src/boundary.F90 @@ -339,7 +339,6 @@ SUBROUTINE all_comp_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & sizes(1) = nx_local + 2 * ng sizes(2) = ny_local + 2 * ng - starts = 1 szmax = 3 * sizes(1) * ng sz = 3 * sizes(2) * ng @@ -471,6 +470,7 @@ SUBROUTINE all_comp_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & n = n + 1 END DO END DO + END DO CALL MPI_SENDRECV(field_bottom, sz, basetype, proc_y_min, & tag, temp, sz, basetype, proc_y_max, tag, comm, status, errcode) @@ -513,6 +513,7 @@ SUBROUTINE all_comp_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & n = n + 1 END DO END DO + END DO END IF DEALLOCATE(field_left) diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index 01ed0c831..b2bd52c45 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -140,7 +140,8 @@ SUBROUTINE shift_fields(window_shift_cells) cpml_psi_ezy, cpml_psi_ezy, & ng, nx, ny) - CALL shift_field(cpml_psi_bzy, ng) + CALL shift_field(cpml_psi_bzy, ng, & + window_shift_cells) CALL field_bc(cpml_psi_bzy, ng) END IF @@ -213,7 +214,6 @@ SUBROUTINE moving_window_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & sizes(1) = nx_local + 2 * ng sizes(2) = ny_local + 2 * ng - starts = 1 szmax = 3 * sizes(1) * ng sz = 3 * sizes(2) * ng From fa7a6ea5a868eb1588d24c27ff5d0de76d41084b Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Tue, 14 Jul 2020 12:33:13 +0200 Subject: [PATCH 057/106] testing for window_shift_cells > 0 --- epoch2d/src/housekeeping/window.F90 | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index b2bd52c45..3cb39b53a 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -99,19 +99,19 @@ SUBROUTINE shift_fields(window_shift_cells) CALL shift_field(ey, ng, window_shift_cells) CALL shift_field(ez, ng, window_shift_cells) - CALL moving_window_field_bc(ex, ey, ez, ng, nx, ny) + CALL all_comp_field_bc(ex, ey, ez, ng, nx, ny) CALL shift_field(bx, ng, window_shift_cells) CALL shift_field(by, ng, window_shift_cells) CALL shift_field(bz, ng, window_shift_cells) - CALL moving_window_field_bc(bx, by, bz, ng ,nx, ny) + CALL all_comp_field_bc(bx, by, bz, ng ,nx, ny) CALL shift_field(jx, jng, window_shift_cells) CALL shift_field(jy, jng, window_shift_cells) CALL shift_field(jz, jng, window_shift_cells) - CALL moving_window_field_bc(jx, jy, jz, ng, nx, ny) + CALL all_comp_field_bc(jx, jy, jz, ng, nx, ny) IF (cpml_boundaries) THEN CALL shift_field(cpml_psi_eyx, ng, & @@ -472,7 +472,7 @@ SUBROUTINE moving_window window_shift_fraction = window_shift_fraction + dt * window_v_x / dx window_shift_cells = FLOOR(window_shift_fraction) ! Allow for posibility of having jumped two cells at once - IF (window_shift_cells > ng - 1) THEN + IF (window_shift_cells > 0) THEN window_shift_real = REAL(window_shift_cells, num) window_offset = window_offset + window_shift_real * dx CALL shift_window(window_shift_cells) From b986357f4a4c758443efce69389e20d81b7ba412 Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Tue, 14 Jul 2020 13:23:19 +0200 Subject: [PATCH 058/106] fixed bug in moving_window_field_bc --- epoch2d/src/housekeeping/window.F90 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index 3cb39b53a..2b0b0f4eb 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -99,19 +99,19 @@ SUBROUTINE shift_fields(window_shift_cells) CALL shift_field(ey, ng, window_shift_cells) CALL shift_field(ez, ng, window_shift_cells) - CALL all_comp_field_bc(ex, ey, ez, ng, nx, ny) + CALL moving_window_field_bc(ex, ey, ez, ng, nx, ny) CALL shift_field(bx, ng, window_shift_cells) CALL shift_field(by, ng, window_shift_cells) CALL shift_field(bz, ng, window_shift_cells) - CALL all_comp_field_bc(bx, by, bz, ng ,nx, ny) + CALL moving_window_field_bc(bx, by, bz, ng ,nx, ny) CALL shift_field(jx, jng, window_shift_cells) CALL shift_field(jy, jng, window_shift_cells) CALL shift_field(jz, jng, window_shift_cells) - CALL all_comp_field_bc(jx, jy, jz, ng, nx, ny) + CALL moving_window_field_bc(jx, jy, jz, ng, nx, ny) IF (cpml_boundaries) THEN CALL shift_field(cpml_psi_eyx, ng, & From ebabdf44c280b32ce25633b79339e07b0c8107b7 Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Tue, 14 Jul 2020 13:33:27 +0200 Subject: [PATCH 059/106] back to window_shift_cells > ng - 1 --- epoch2d/src/housekeeping/window.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index 2b0b0f4eb..b2bd52c45 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -472,7 +472,7 @@ SUBROUTINE moving_window window_shift_fraction = window_shift_fraction + dt * window_v_x / dx window_shift_cells = FLOOR(window_shift_fraction) ! Allow for posibility of having jumped two cells at once - IF (window_shift_cells > 0) THEN + IF (window_shift_cells > ng - 1) THEN window_shift_real = REAL(window_shift_cells, num) window_offset = window_offset + window_shift_real * dx CALL shift_window(window_shift_cells) From 58f0802136d4a91fd12730f664d05f88f5c3fe19 Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Tue, 14 Jul 2020 16:23:45 +0200 Subject: [PATCH 060/106] window_shift_cells > 1 --- epoch2d/src/housekeeping/window.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index b2bd52c45..8e0e80093 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -472,7 +472,7 @@ SUBROUTINE moving_window window_shift_fraction = window_shift_fraction + dt * window_v_x / dx window_shift_cells = FLOOR(window_shift_fraction) ! Allow for posibility of having jumped two cells at once - IF (window_shift_cells > ng - 1) THEN + IF (window_shift_cells > 1) THEN window_shift_real = REAL(window_shift_cells, num) window_offset = window_offset + window_shift_real * dx CALL shift_window(window_shift_cells) From eef8a449c58f465331ae05b8f20ddad32b54a496 Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Wed, 15 Jul 2020 16:39:12 +0200 Subject: [PATCH 061/106] introduced do loop for insert particles in window.f90 --- epoch2d/src/housekeeping/window.F90 | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index 8e0e80093..effaa947e 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -66,11 +66,12 @@ SUBROUTINE shift_window(window_shift_cells) REAL(num) :: xb_min ! Shift the window round # of window shift cells at a time. + DO iwindow = 1, window_shift_cells CALL insert_particles(window_shift_cells) - x_grid_min = x_global(1) + window_shift_cells * dx - xb_min = xb_global(1) + window_shift_cells * dx - x_min = xb_min + window_shift_cells * dx * cpml_thickness + x_grid_min = x_global(1) + dx + xb_min = xb_global(1) + dx + x_min = xb_min + dx * cpml_thickness ! Setup global grid DO ix = 1-ng, nx_global + ng @@ -79,6 +80,7 @@ SUBROUTINE shift_window(window_shift_cells) END DO x_grid_max = x_global(nx_global) x_max = xb_global(nx_global+1) - dx * cpml_thickness + END DO CALL setup_grid_x @@ -347,10 +349,10 @@ SUBROUTINE insert_particles(window_shift_cells) wdata = dx * dy / (npart_per_cell + n_frac) - DO ipart = 1, window_shift_cells * (npart_per_cell + n_frac) + DO ipart = 1, npart_per_cell + n_frac CALL create_particle(current) cell_frac_y = 0.5_num - random() - current%part_pos(1) = x0 + random() * window_shift_cells * dx + current%part_pos(1) = x0 + random() * dx current%part_pos(2) = y(iy) - cell_frac_y * dy ! Always use the triangle particle weighting for simplicity From f0578dad1475777d52c21a045f3c5780e99a2d3b Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Wed, 15 Jul 2020 16:51:07 +0200 Subject: [PATCH 062/106] back to window_shift_cells > ng - 1 --- epoch2d/src/housekeeping/window.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index effaa947e..010b6289b 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -474,7 +474,7 @@ SUBROUTINE moving_window window_shift_fraction = window_shift_fraction + dt * window_v_x / dx window_shift_cells = FLOOR(window_shift_fraction) ! Allow for posibility of having jumped two cells at once - IF (window_shift_cells > 1) THEN + IF (window_shift_cells > ng - 1) THEN window_shift_real = REAL(window_shift_cells, num) window_offset = window_offset + window_shift_real * dx CALL shift_window(window_shift_cells) From 20c5eaa223df50d7824295619c9298207ada9f76 Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Mon, 20 Jul 2020 11:25:49 +0200 Subject: [PATCH 063/106] Shifting window in chuncks of size ng --- epoch2d/src/housekeeping/window.F90 | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index 010b6289b..e468e5478 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -447,8 +447,9 @@ END SUBROUTINE remove_particles SUBROUTINE moving_window #ifndef PER_SPECIES_WEIGHT - REAL(num) :: window_shift_real + REAL(num) :: window_shift_real, window_shift_steps INTEGER :: window_shift_cells, errcode = 0 + INTEGER :: i, nchunks, nremainder #endif IF (.NOT. move_window) RETURN @@ -477,7 +478,15 @@ SUBROUTINE moving_window IF (window_shift_cells > ng - 1) THEN window_shift_real = REAL(window_shift_cells, num) window_offset = window_offset + window_shift_real * dx - CALL shift_window(window_shift_cells) + window_shift_steps = window_shift_cells / ng + nchunks = FLOOR(window_shift_steps) + nremainder = MOD(window_shift_cells, ng) + DO i = 1, nchunks + CALL shift_window(ng) + END DO + IF(remainder > 0) THEN + CALL shift_window(nremainder) + END IF CALL setup_bc_lists CALL particle_bcs window_shift_fraction = window_shift_fraction - window_shift_real From e21c4fdeeb4e90a23b057bee3c2c8f1be8ccbcee Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Mon, 20 Jul 2020 11:39:55 +0200 Subject: [PATCH 064/106] minor bug fix --- epoch2d/src/housekeeping/window.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index e468e5478..42ea0a815 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -484,7 +484,7 @@ SUBROUTINE moving_window DO i = 1, nchunks CALL shift_window(ng) END DO - IF(remainder > 0) THEN + IF(nremainder > 0) THEN CALL shift_window(nremainder) END IF CALL setup_bc_lists From 17c03462653bb54bc6c29f54c271983e1e56951b Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Mon, 20 Jul 2020 14:37:03 +0200 Subject: [PATCH 065/106] Improved copy-in/out of MPI buffers --- epoch2d/src/boundary.F90 | 284 ++++++++++++++-------------- epoch2d/src/housekeeping/window.F90 | 70 ++++--- 2 files changed, 170 insertions(+), 184 deletions(-) diff --git a/epoch2d/src/boundary.F90 b/epoch2d/src/boundary.F90 index c2c04a9e1..5d6cd2274 100644 --- a/epoch2d/src/boundary.F90 +++ b/epoch2d/src/boundary.F90 @@ -331,9 +331,9 @@ SUBROUTINE all_comp_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & INTEGER, INTENT(IN) :: nx_local, ny_local INTEGER, DIMENSION(c_ndims) :: sizes, subsizes INTEGER :: basetype, sz, szmax, i, j, k, n - REAL(num), ALLOCATABLE :: field_left(:), field_right(:) - REAL(num), ALLOCATABLE :: field_top(:), field_bottom(:) + REAL(num), ALLOCATABLE :: field(:) REAL(num), ALLOCATABLE :: temp(:) + INTEGER :: xmin, xmax, ymin, ymax, offset0, offset1, offset2 basetype = mpireal @@ -345,186 +345,178 @@ SUBROUTINE all_comp_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & IF (sz > szmax) szmax = sz ALLOCATE(temp(szmax)) + ALLOCATE(field(szmax)) subsizes(1) = ng subsizes(2) = sizes(2) sz = 3 * subsizes(1) * subsizes(2) - - ALLOCATE(field_left(szmax)) - n = 1 - DO k = 1, 3 - DO j = 1-ng, subsizes(2)-ng - DO i = 1,ng - select case(k) - case(1) - field_left(n) = fieldx(i,j) - case(2) - field_left(n) = fieldy(i,j) - case(3) - field_left(n) = fieldz(i,j) - end select - n = n + 1 - END DO - END DO - END DO + offset0 = 0 + offset1 = subsizes(1) * subsizes(2) + offset2 = 2 * offset1 - ALLOCATE(field_right(szmax)) - n = 1 - DO k = 1, 3 - DO j = 1-ng, subsizes(2)-ng - DO i = nx_local-ng+1, nx_local - select case(k) - case(1) - field_right(n) = fieldx(i,j) - case(2) - field_right(n) = fieldy(i,j) - case(3) - field_right(n) = fieldz(i,j) - end select - n = n + 1 - END DO - END DO - END DO + xmin = 1 + xmax = ng + ymin = 1-ng + ymax = subsizes(2)-ng - CALL MPI_SENDRECV(field_left, sz, basetype, proc_x_min, & + CALL load_field_boundaries_to_buffer(fieldx, field, & + xmin, xmax, ymin, ymax, offset0) + CALL load_field_boundaries_to_buffer(fieldy, field, & + xmin, xmax, ymin, ymax, offset1) + CALL load_field_boundaries_to_buffer(fieldz, field, & + xmin, xmax, ymin, ymax, offset2) + + CALL MPI_SENDRECV(field, sz, basetype, proc_x_min, & tag, temp, sz, basetype, proc_x_max, tag, comm, status, errcode) + xmin = nx_local + 1 + xmax = subsizes(1) + nx_local + IF (.NOT. x_max_boundary .OR. bc_field(c_bd_x_max)==c_bc_periodic) THEN - n = 1 - DO k = 1, 3 - DO j = 1-ng, subsizes(2)-ng - DO i = nx_local+1, subsizes(1)+nx_local - select case(k) - case(1) - fieldx(i,j) = temp(n) - case(2) - fieldy(i,j) = temp(n) - case(3) - fieldz(i,j) = temp(n) - end select - n = n + 1 - END DO - END DO - END DO + + CALL unload_field_boundaries_from_buffer(fieldx, temp, & + xmin, xmax, ymin, ymax, offset0) + CALL unload_field_boundaries_from_buffer(fieldy, temp, & + xmin, xmax, ymin, ymax, offset1) + CALL unload_field_boundaries_from_buffer(fieldz, temp, & + xmin, xmax, ymin, ymax, offset2) + END IF - CALL MPI_SENDRECV(field_right, sz, basetype, proc_x_max, & + xmin = nx_local - ng + 1 + xmax = nx_local + + CALL load_field_boundaries_to_buffer(fieldx, field, & + xmin, xmax, ymin, ymax, offset0) + CALL load_field_boundaries_to_buffer(fieldy, field, & + xmin, xmax, ymin, ymax, offset1) + CALL load_field_boundaries_to_buffer(fieldz, field, & + xmin, xmax, ymin, ymax, offset2) + + CALL MPI_SENDRECV(field, sz, basetype, proc_x_max, & tag, temp, sz, basetype, proc_x_min, tag, comm, status, errcode) + xmin = 1-ng + xmax = subsizes(1)-ng + IF (.NOT. x_min_boundary .OR. bc_field(c_bd_x_min)==c_bc_periodic) THEN - n = 1 - DO k = 1, 3 - DO j = 1-ng, subsizes(2)-ng - DO i = 1-ng, subsizes(1)-ng - select case(k) - case(1) - fieldx(i,j) = temp(n) - case(2) - fieldy(i,j) = temp(n) - case(3) - fieldz(i,j) = temp(n) - end select - n = n + 1 - END DO - END DO - END DO + + CALL unload_field_boundaries_from_buffer(fieldx, temp, & + xmin, xmax, ymin, ymax, offset0) + CALL unload_field_boundaries_from_buffer(fieldy, temp, & + xmin, xmax, ymin, ymax, offset1) + CALL unload_field_boundaries_from_buffer(fieldz, temp, & + xmin, xmax, ymin, ymax, offset2) + END IF subsizes(1) = sizes(1) subsizes(2) = ng sz = 3 * subsizes(1) * subsizes(2) - ALLOCATE(field_top(szmax)) - n = 1 - DO k = 1, 3 - DO j = ny_local-subsizes(2)+1, ny_local - DO i = 1-ng, subsizes(1)-ng - select case(k) - case(1) - field_top(n) = fieldx(i,j) - case(2) - field_top(n) = fieldy(i,j) - case(3) - field_top(n) = fieldz(i,j) - end select - n = n + 1 - END DO - END DO - END DO + offset0 = 0 + offset1 = subsizes(1) * subsizes(2) + offset2 = 2 * offset1 + + xmin = 1 - ng + xmax = subsizes(1) - ng + ymin = ny_local - subsizes(2) + 1 + ymax = ny_local + + CALL load_field_boundaries_to_buffer(fieldx, field, & + xmin, xmax, ymin, ymax, offset0) + CALL load_field_boundaries_to_buffer(fieldy, field, & + xmin, xmax, ymin, ymax, offset1) + CALL load_field_boundaries_to_buffer(fieldz, field, & + xmin, xmax, ymin, ymax, offset2) + + CALL MPI_SENDRECV(field, sz, basetype, proc_y_max, & + tag, temp, sz, basetype, proc_y_min, tag, comm, status, errcode) - ALLOCATE(field_bottom(szmax)) - n = 1 - DO k = 1, 3 - DO j = 1, subsizes(2) - DO i = 1-ng, subsizes(1)-ng - select case(k) - case(1) - field_bottom(n) = fieldx(i,j) - case(2) - field_bottom(n) = fieldy(i,j) - case(3) - field_bottom(n) = fieldz(i,j) - end select - n = n + 1 - END DO - END DO - END DO + ymin = 1 - ng + ymax = subsizes(2) - ng + + IF (.NOT. y_min_boundary .OR. bc_field(c_bd_y_min)==c_bc_periodic) THEN + + CALL unload_field_boundaries_from_buffer(fieldx, temp, & + xmin, xmax, ymin, ymax, offset0) + CALL unload_field_boundaries_from_buffer(fieldy, temp, & + xmin, xmax, ymin, ymax, offset1) + CALL unload_field_boundaries_from_buffer(fieldz, temp, & + xmin, xmax, ymin, ymax, offset2) + + END IF + + ymin = 1 + ymax = subsizes(2) + + CALL load_field_boundaries_to_buffer(fieldx, field, & + xmin, xmax, ymin, ymax, offset0) + CALL load_field_boundaries_to_buffer(fieldy, field, & + xmin, xmax, ymin, ymax, offset1) + CALL load_field_boundaries_to_buffer(fieldz, field, & + xmin, xmax, ymin, ymax, offset2) - CALL MPI_SENDRECV(field_bottom, sz, basetype, proc_y_min, & + ymin = ny_local + 1 + ymax = subsizes(2) + ny_local + + CALL MPI_SENDRECV(field, sz, basetype, proc_y_min, & tag, temp, sz, basetype, proc_y_max, tag, comm, status, errcode) IF (.NOT. y_max_boundary .OR. bc_field(c_bd_y_max)==c_bc_periodic) THEN - n = 1 - DO k = 1, 3 - DO j = ny_local+1, subsizes(2)+ny_local - DO i = 1-ng, subsizes(1)-ng - select case(k) - case(1) - fieldx(i,j) = temp(n) - case(2) - fieldy(i,j) = temp(n) - case(3) - fieldz(i,j) = temp(n) - end select - n = n + 1 - END DO - END DO - END DO - END IF - CALL MPI_SENDRECV(field_top, sz, basetype, proc_y_max, & - tag, temp, sz, basetype, proc_y_min, tag, comm, status, errcode) + CALL unload_field_boundaries_from_buffer(fieldx, temp, & + xmin, xmax, ymin, ymax, offset0) + CALL unload_field_boundaries_from_buffer(fieldy, temp, & + xmin, xmax, ymin, ymax, offset1) + CALL unload_field_boundaries_from_buffer(fieldz, temp, & + xmin, xmax, ymin, ymax, offset2) - IF (.NOT. y_min_boundary .OR. bc_field(c_bd_y_min)==c_bc_periodic) THEN - n = 1 - DO k = 1, 3 - DO j = 1-ng, subsizes(2)-ng - DO i = 1-ng, subsizes(1)-ng - select case(k) - case(1) - fieldx(i,j) = temp(n) - case(2) - fieldy(i,j) = temp(n) - case(3) - fieldz(i,j) = temp(n) - end select - n = n + 1 - END DO - END DO - END DO END IF - DEALLOCATE(field_left) - DEALLOCATE(field_right) - DEALLOCATE(field_top) - DEALLOCATE(field_bottom) + + DEALLOCATE(field) DEALLOCATE(temp) END SUBROUTINE all_comp_field_bc + SUBROUTINE load_field_boundaries_to_buffer(field, buffer, & + xmin, xmax, ymin, ymax, offset) + + INTEGER, INTENT(IN) :: xmin, xmax, ymin, ymax, offset + REAL(num), DIMENSION(1-ng:, 1-ng:), INTENT(INOUT) :: field + REAL(num), DIMENSION(:), INTENT(INOUT) :: buffer + INTEGER :: i, j, n + + n = 1 + DO j = ymin, ymax + DO i = xmin, xmax + buffer(n + offset) = field(i,j) + n = n + 1 + END DO + END DO + + END SUBROUTINE load_field_boundaries_to_buffer + + SUBROUTINE unload_field_boundaries_from_buffer(field, buffer, & + xmin, xmax, ymin, ymax, offset) + + INTEGER, INTENT(IN) :: xmin, xmax, ymin, ymax, offset + REAL(num), DIMENSION(1-ng:, 1-ng:), INTENT(INOUT) :: field + REAL(num), DIMENSION(:), INTENT(INOUT) :: buffer + INTEGER :: i, j, n + + n = 1 + DO j = ymin, ymax + DO i = xmin, xmax + field(i,j) = buffer(n + offset) + n = n + 1 + END DO + END DO + END SUBROUTINE unload_field_boundaries_from_buffer SUBROUTINE do_field_mpi_with_lengths_r4(field, ng, nx_local, ny_local) diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index 42ea0a815..aa420b917 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -203,14 +203,14 @@ END SUBROUTINE shift_field SUBROUTINE moving_window_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & ny_local) - INTEGER, INTENT(IN) :: ng REAL(num), DIMENSION(1-ng:,1-ng:), INTENT(INOUT) :: fieldx, fieldy, fieldz INTEGER, INTENT(IN) :: nx_local, ny_local INTEGER, DIMENSION(c_ndims) :: sizes, subsizes INTEGER :: basetype, sz, szmax, i, j, k, n - REAL(num), ALLOCATABLE :: field_left(:) + REAL(num), ALLOCATABLE :: field(:) REAL(num), ALLOCATABLE :: temp(:) + INTEGER :: xmin, xmax, ymin, ymax, offset0, offset1, offset2 basetype = mpireal @@ -222,54 +222,48 @@ SUBROUTINE moving_window_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & IF (sz > szmax) szmax = sz ALLOCATE(temp(szmax)) + ALLOCATE(field(szmax)) subsizes(1) = ng subsizes(2) = sizes(2) sz = 3 * subsizes(1) * subsizes(2) - - ALLOCATE(field_left(szmax)) - n = 1 - DO k = 1, 3 - DO j = 1-ng, subsizes(2)-ng - DO i = 1,ng - select case(k) - case(1) - field_left(n) = fieldx(i,j) - case(2) - field_left(n) = fieldy(i,j) - case(3) - field_left(n) = fieldz(i,j) - end select - n = n + 1 - END DO - END DO - END DO + offset0 = 0 + offset1 = subsizes(1) * subsizes(2) + offset2 = 2 * offset1 - CALL MPI_SENDRECV(field_left, sz, basetype, proc_x_min, & + xmin = 1 + xmax = ng + ymin = 1-ng + ymax = subsizes(2)-ng + + CALL load_field_boundaries_to_buffer(fieldx, field, & + xmin, xmax, ymin, ymax, offset0) + CALL load_field_boundaries_to_buffer(fieldy, field, & + xmin, xmax, ymin, ymax, offset1) + CALL load_field_boundaries_to_buffer(fieldz, field, & + xmin, xmax, ymin, ymax, offset2) + + CALL MPI_SENDRECV(field, sz, basetype, proc_x_min, & tag, temp, sz, basetype, proc_x_max, tag, comm, status, errcode) + xmin = nx_local + 1 + xmax = subsizes(1) + nx_local + IF (.NOT. x_max_boundary .OR. bc_field(c_bd_x_max)==c_bc_periodic) THEN - n = 1 - DO k = 1, 3 - DO j = 1-ng, subsizes(2)-ng - DO i = nx_local+1, subsizes(1)+nx_local - select case(k) - case(1) - fieldx(i,j) = temp(n) - case(2) - fieldy(i,j) = temp(n) - case(3) - fieldz(i,j) = temp(n) - end select - n = n + 1 - END DO - END DO - END DO + + CALL unload_field_boundaries_from_buffer(fieldx, temp, & + xmin, xmax, ymin, ymax, offset0) + CALL unload_field_boundaries_from_buffer(fieldy, temp, & + xmin, xmax, ymin, ymax, offset1) + CALL unload_field_boundaries_from_buffer(fieldz, temp, & + xmin, xmax, ymin, ymax, offset2) + END IF - DEALLOCATE(field_left) + + DEALLOCATE(field) DEALLOCATE(temp) END SUBROUTINE moving_window_field_bc From 1501900579bb39f501c75022d1eb215995ad8f77 Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Fri, 24 Jul 2020 13:46:55 +0200 Subject: [PATCH 066/106] introduced do loop with jumps of ng in moving window --- epoch2d/src/housekeeping/window.F90 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index aa420b917..af9579d35 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -472,10 +472,10 @@ SUBROUTINE moving_window IF (window_shift_cells > ng - 1) THEN window_shift_real = REAL(window_shift_cells, num) window_offset = window_offset + window_shift_real * dx - window_shift_steps = window_shift_cells / ng - nchunks = FLOOR(window_shift_steps) +! window_shift_steps = window_shift_cells / ng +! nchunks = FLOOR(window_shift_steps) nremainder = MOD(window_shift_cells, ng) - DO i = 1, nchunks + DO i = ng, window_shift_cells, ng CALL shift_window(ng) END DO IF(nremainder > 0) THEN From 6cb1de6d8e9376569f977eba8eec87de17da93de Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Mon, 27 Jul 2020 15:12:30 +0200 Subject: [PATCH 067/106] corrected for nremainder in window.F90 and offset in boundary.F90 --- epoch2d/src/boundary.F90 | 66 +++++++++++++++-------------- epoch2d/src/housekeeping/window.F90 | 8 +--- 2 files changed, 37 insertions(+), 37 deletions(-) diff --git a/epoch2d/src/boundary.F90 b/epoch2d/src/boundary.F90 index 5d6cd2274..be32713aa 100644 --- a/epoch2d/src/boundary.F90 +++ b/epoch2d/src/boundary.F90 @@ -333,7 +333,7 @@ SUBROUTINE all_comp_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & INTEGER :: basetype, sz, szmax, i, j, k, n REAL(num), ALLOCATABLE :: field(:) REAL(num), ALLOCATABLE :: temp(:) - INTEGER :: xmin, xmax, ymin, ymax, offset0, offset1, offset2 + INTEGER :: xmin, xmax, ymin, ymax, offset basetype = mpireal @@ -352,9 +352,11 @@ SUBROUTINE all_comp_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & sz = 3 * subsizes(1) * subsizes(2) - offset0 = 0 - offset1 = subsizes(1) * subsizes(2) - offset2 = 2 * offset1 +! offset0 = 0 +! offset1 = subsizes(1) * subsizes(2) +! offset2 = 2 * offset1 + + offset = subsizes(1) * subsizes(2) xmin = 1 xmax = ng @@ -362,11 +364,11 @@ SUBROUTINE all_comp_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & ymax = subsizes(2)-ng CALL load_field_boundaries_to_buffer(fieldx, field, & - xmin, xmax, ymin, ymax, offset0) + xmin, xmax, ymin, ymax, 0*offset) CALL load_field_boundaries_to_buffer(fieldy, field, & - xmin, xmax, ymin, ymax, offset1) + xmin, xmax, ymin, ymax, offset) CALL load_field_boundaries_to_buffer(fieldz, field, & - xmin, xmax, ymin, ymax, offset2) + xmin, xmax, ymin, ymax, 2*offset) CALL MPI_SENDRECV(field, sz, basetype, proc_x_min, & tag, temp, sz, basetype, proc_x_max, tag, comm, status, errcode) @@ -377,11 +379,11 @@ SUBROUTINE all_comp_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & IF (.NOT. x_max_boundary .OR. bc_field(c_bd_x_max)==c_bc_periodic) THEN CALL unload_field_boundaries_from_buffer(fieldx, temp, & - xmin, xmax, ymin, ymax, offset0) + xmin, xmax, ymin, ymax, 0*offset) CALL unload_field_boundaries_from_buffer(fieldy, temp, & - xmin, xmax, ymin, ymax, offset1) + xmin, xmax, ymin, ymax, offset) CALL unload_field_boundaries_from_buffer(fieldz, temp, & - xmin, xmax, ymin, ymax, offset2) + xmin, xmax, ymin, ymax, 2*offset) END IF @@ -389,11 +391,11 @@ SUBROUTINE all_comp_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & xmax = nx_local CALL load_field_boundaries_to_buffer(fieldx, field, & - xmin, xmax, ymin, ymax, offset0) + xmin, xmax, ymin, ymax, 0*offset) CALL load_field_boundaries_to_buffer(fieldy, field, & - xmin, xmax, ymin, ymax, offset1) + xmin, xmax, ymin, ymax, offset) CALL load_field_boundaries_to_buffer(fieldz, field, & - xmin, xmax, ymin, ymax, offset2) + xmin, xmax, ymin, ymax, 2*offset) CALL MPI_SENDRECV(field, sz, basetype, proc_x_max, & tag, temp, sz, basetype, proc_x_min, tag, comm, status, errcode) @@ -404,11 +406,11 @@ SUBROUTINE all_comp_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & IF (.NOT. x_min_boundary .OR. bc_field(c_bd_x_min)==c_bc_periodic) THEN CALL unload_field_boundaries_from_buffer(fieldx, temp, & - xmin, xmax, ymin, ymax, offset0) + xmin, xmax, ymin, ymax, 0*offset) CALL unload_field_boundaries_from_buffer(fieldy, temp, & - xmin, xmax, ymin, ymax, offset1) + xmin, xmax, ymin, ymax, offset) CALL unload_field_boundaries_from_buffer(fieldz, temp, & - xmin, xmax, ymin, ymax, offset2) + xmin, xmax, ymin, ymax, 2*offset) END IF @@ -416,9 +418,11 @@ SUBROUTINE all_comp_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & subsizes(2) = ng sz = 3 * subsizes(1) * subsizes(2) - offset0 = 0 - offset1 = subsizes(1) * subsizes(2) - offset2 = 2 * offset1 +! offset0 = 0 +! offset1 = subsizes(1) * subsizes(2) +! offset2 = 2 * offset1 + + offset = subsizes(1) * subsizes(2) xmin = 1 - ng xmax = subsizes(1) - ng @@ -426,11 +430,11 @@ SUBROUTINE all_comp_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & ymax = ny_local CALL load_field_boundaries_to_buffer(fieldx, field, & - xmin, xmax, ymin, ymax, offset0) + xmin, xmax, ymin, ymax, 0*offset) CALL load_field_boundaries_to_buffer(fieldy, field, & - xmin, xmax, ymin, ymax, offset1) + xmin, xmax, ymin, ymax, offset) CALL load_field_boundaries_to_buffer(fieldz, field, & - xmin, xmax, ymin, ymax, offset2) + xmin, xmax, ymin, ymax, 2*offset) CALL MPI_SENDRECV(field, sz, basetype, proc_y_max, & tag, temp, sz, basetype, proc_y_min, tag, comm, status, errcode) @@ -441,11 +445,11 @@ SUBROUTINE all_comp_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & IF (.NOT. y_min_boundary .OR. bc_field(c_bd_y_min)==c_bc_periodic) THEN CALL unload_field_boundaries_from_buffer(fieldx, temp, & - xmin, xmax, ymin, ymax, offset0) + xmin, xmax, ymin, ymax, 0*offset) CALL unload_field_boundaries_from_buffer(fieldy, temp, & - xmin, xmax, ymin, ymax, offset1) + xmin, xmax, ymin, ymax, offset) CALL unload_field_boundaries_from_buffer(fieldz, temp, & - xmin, xmax, ymin, ymax, offset2) + xmin, xmax, ymin, ymax, 2*offset) END IF @@ -453,11 +457,11 @@ SUBROUTINE all_comp_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & ymax = subsizes(2) CALL load_field_boundaries_to_buffer(fieldx, field, & - xmin, xmax, ymin, ymax, offset0) + xmin, xmax, ymin, ymax, 0*offset) CALL load_field_boundaries_to_buffer(fieldy, field, & - xmin, xmax, ymin, ymax, offset1) + xmin, xmax, ymin, ymax, offset) CALL load_field_boundaries_to_buffer(fieldz, field, & - xmin, xmax, ymin, ymax, offset2) + xmin, xmax, ymin, ymax, 2*offset) ymin = ny_local + 1 ymax = subsizes(2) + ny_local @@ -468,11 +472,11 @@ SUBROUTINE all_comp_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & IF (.NOT. y_max_boundary .OR. bc_field(c_bd_y_max)==c_bc_periodic) THEN CALL unload_field_boundaries_from_buffer(fieldx, temp, & - xmin, xmax, ymin, ymax, offset0) + xmin, xmax, ymin, ymax, 0*offset) CALL unload_field_boundaries_from_buffer(fieldy, temp, & - xmin, xmax, ymin, ymax, offset1) + xmin, xmax, ymin, ymax, offset) CALL unload_field_boundaries_from_buffer(fieldz, temp, & - xmin, xmax, ymin, ymax, offset2) + xmin, xmax, ymin, ymax, 2*offset) END IF diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index af9579d35..7c3f743ba 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -472,18 +472,14 @@ SUBROUTINE moving_window IF (window_shift_cells > ng - 1) THEN window_shift_real = REAL(window_shift_cells, num) window_offset = window_offset + window_shift_real * dx -! window_shift_steps = window_shift_cells / ng -! nchunks = FLOOR(window_shift_steps) nremainder = MOD(window_shift_cells, ng) DO i = ng, window_shift_cells, ng CALL shift_window(ng) END DO - IF(nremainder > 0) THEN - CALL shift_window(nremainder) - END IF CALL setup_bc_lists CALL particle_bcs - window_shift_fraction = window_shift_fraction - window_shift_real + window_shift_fraction = window_shift_fraction - window_shift_real & + + REAL(nremainder, num) END IF END IF #else From f1c548a25f81c005ee5bec359730abce9a4fcd62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dirk=20Br=C3=B6mmel?= Date: Thu, 30 Jul 2020 10:41:09 +0000 Subject: [PATCH 068/106] Attempt to align moving_window with I/O Requires checks that alignment w.r.t. steps indeed works. Has not been compiled. force_ump needs checking as well. --- epoch2d/src/epoch2d.F90 | 2 +- epoch2d/src/housekeeping/window.F90 | 22 +++++++++++++++++++--- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/epoch2d/src/epoch2d.F90 b/epoch2d/src/epoch2d.F90 index 6ccbcb60b..9cd735965 100644 --- a/epoch2d/src/epoch2d.F90 +++ b/epoch2d/src/epoch2d.F90 @@ -273,7 +273,7 @@ PROGRAM pic CALL update_eb_fields_final - CALL moving_window + CALL moving_window(step + 1, force_dump) #ifdef SCOREP_USER_ENABLE SCOREP_USER_REGION_END(main_loop) diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index 7c3f743ba..529532d27 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -438,7 +438,13 @@ END SUBROUTINE remove_particles - SUBROUTINE moving_window + SUBROUTINE moving_window(future_step, force) + USE diagnostics + + integer, intent(in) :: future_step + logical, intent(in) :: force_dump + logical :: print_arrays(1:SIZE(file_prefixes)) + integer :: i #ifndef PER_SPECIES_WEIGHT REAL(num) :: window_shift_real, window_shift_steps @@ -468,14 +474,24 @@ SUBROUTINE moving_window IF (window_v_x <= 0.0_num) RETURN window_shift_fraction = window_shift_fraction + dt * window_v_x / dx window_shift_cells = FLOOR(window_shift_fraction) + ! CHECK FOR IO TAKING PLACE IN NEXT STEP... + print_arrays = .false. + DO i = 1,SIZE(file_prefixes) + CALL io_test(i, future_step, print_arrays(1), force_dump, prefix_first_call) + END DO + ! Allow for posibility of having jumped two cells at once - IF (window_shift_cells > ng - 1) THEN + IF ( (window_shift_cells > ng - 1) .OR. ANY(print_arrays) ) THEN window_shift_real = REAL(window_shift_cells, num) window_offset = window_offset + window_shift_real * dx nremainder = MOD(window_shift_cells, ng) - DO i = ng, window_shift_cells, ng + DO i = ng, window_shift_cells, ng ! CHECK IF THIS LOOP IS CALLED IF window_shift_cells < ng CALL shift_window(ng) END DO + IF (ANY(print_arrays)) then + CALL shift_window(nremainder) + nremainder = 0 + END IF CALL setup_bc_lists CALL particle_bcs window_shift_fraction = window_shift_fraction - window_shift_real & From 5f8923b1d34e755103c7cbe62fa6238d3437d736 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dirk=20Br=C3=B6mmel?= Date: Tue, 4 Aug 2020 10:52:43 +0200 Subject: [PATCH 069/106] Fixing circular module dependency The dependency between `window` <-> `diagnostics` is resolved using a F08 SUBMODULE. This works, but is not nice on several levels. See comments within `window.F90`. One problem being that a newer Fortran standard is required. A cleaner solution would be to move the one dependent variable. --- epoch2d/Makefile | 7 +- epoch2d/src/epoch2d.F90 | 2 +- epoch2d/src/housekeeping/window.F90 | 87 ++++------------------- epoch2d/src/housekeeping/window_sub.F90 | 91 +++++++++++++++++++++++++ epoch2d/src/io/diagnostics.F90 | 2 + 5 files changed, 113 insertions(+), 76 deletions(-) create mode 100644 epoch2d/src/housekeeping/window_sub.F90 diff --git a/epoch2d/Makefile b/epoch2d/Makefile index fb5f4ee12..d247af76e 100644 --- a/epoch2d/Makefile +++ b/epoch2d/Makefile @@ -50,7 +50,7 @@ endif # Intel # ===== ifeq ($(strip $(COMPILER)),intel) - FFLAGS = -O3 -g -stand f03 + FFLAGS = -O3 -g #-stand f03 ifeq ($(strip $(CONS)),1) FLTCONS = -pc64 -fltconsistency endif @@ -69,7 +69,7 @@ endif # gfortran # ======== ifeq ($(strip $(COMPILER)),gfortran) - FFLAGS = -O3 -g -std=f2003 + FFLAGS = -O3 -g #-std=f2003 GNUVER := $(shell gfortran -dumpversion | head -1 \ | sed 's/[^0-9\.]*\([0-9\.]\+\).*/\1/') @@ -322,7 +322,7 @@ SRCFILES = antennae.f90 balance.F90 boundary.F90 bremsstrahlung.F90 \ redblack_module.f90 setup.F90 shape_functions.F90 shared_data.F90 shunt.F90 \ simple_io.F90 split_particle.F90 stack.f90 strings.f90 strings_advanced.f90 \ terminal_controls.F90 timer.f90 tokenizer_blocks.f90 utilities.f90 \ - version_data.F90 welcome.F90 window.F90 + version_data.F90 welcome.F90 window.F90 window_sub.F90 OBJFILES := $(SRCFILES:.f90=.o) OBJFILES := $(OBJFILES:.F90=.o) @@ -555,3 +555,4 @@ utilities.o: utilities.f90 constants.o shared_data.o version_data.o: version_data.F90 $(SRCDIR)/COMMIT welcome.o: welcome.F90 shared_data.o terminal_controls.o version_data.o window.o: window.F90 boundary.o evaluate.o partlist.o +window_sub.o: window_sub.F90 window.o diagnostics.o diff --git a/epoch2d/src/epoch2d.F90 b/epoch2d/src/epoch2d.F90 index 9cd735965..a462bff9a 100644 --- a/epoch2d/src/epoch2d.F90 +++ b/epoch2d/src/epoch2d.F90 @@ -163,7 +163,7 @@ PROGRAM pic ELSE time = time + dt / 2.0_num CALL update_eb_fields_final - CALL moving_window + CALL moving_window(step + 1, force_dump) END IF ELSE dt_store = dt diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index 529532d27..6d45cd79d 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -24,6 +24,21 @@ MODULE window REAL(num), ALLOCATABLE :: density(:), temperature(:,:), drift(:,:) REAL(num), SAVE :: window_shift_fraction + ! In order to check for steps that do I/O, we call 'io_test' from the + ! 'diagnostics' module. This would create a circular module dependency between + ! 'diagnostics' and 'window'. A submodule is currently used to solve the + ! circular dependency. This is not nice, but works. + ! Modules 'setup' and 'diagnostics' both require a single variable from this + ! module: 'window_shift_fraction'. If this variable were to be more 'global', + ! the submodule could be removed. An alternative is a separate module for only + ! this single variable that 'window', 'diagnostics' and 'setup' then use. + INTERFACE + MODULE SUBROUTINE moving_window(future_step, force_dump) + integer, intent(in) :: future_step + logical, intent(in) :: force_dump + END SUBROUTINE moving_window + END INTERFACE + CONTAINS SUBROUTINE initialise_window @@ -436,76 +451,4 @@ SUBROUTINE remove_particles END SUBROUTINE remove_particles #endif - - - SUBROUTINE moving_window(future_step, force) - USE diagnostics - - integer, intent(in) :: future_step - logical, intent(in) :: force_dump - logical :: print_arrays(1:SIZE(file_prefixes)) - integer :: i - -#ifndef PER_SPECIES_WEIGHT - REAL(num) :: window_shift_real, window_shift_steps - INTEGER :: window_shift_cells, errcode = 0 - INTEGER :: i, nchunks, nremainder -#endif - - IF (.NOT. move_window) RETURN - -#ifndef PER_SPECIES_WEIGHT - IF (.NOT. window_started) THEN - IF (time >= window_start_time .AND. time < window_stop_time) THEN - bc_field(c_bd_x_min) = bc_x_min_after_move - bc_field(c_bd_x_max) = bc_x_max_after_move - bc_field(c_bd_y_min) = bc_y_min_after_move - bc_field(c_bd_y_max) = bc_y_max_after_move - CALL setup_boundaries - IF (.NOT.ic_from_restart) window_shift_fraction = 0.0_num - window_started = .TRUE. - END IF - END IF - - ! If we have a moving window then update the window position - IF (window_started) THEN - IF (time >= window_stop_time) RETURN - IF (use_window_stack) window_v_x = evaluate(window_v_x_stack, errcode) - IF (window_v_x <= 0.0_num) RETURN - window_shift_fraction = window_shift_fraction + dt * window_v_x / dx - window_shift_cells = FLOOR(window_shift_fraction) - ! CHECK FOR IO TAKING PLACE IN NEXT STEP... - print_arrays = .false. - DO i = 1,SIZE(file_prefixes) - CALL io_test(i, future_step, print_arrays(1), force_dump, prefix_first_call) - END DO - - ! Allow for posibility of having jumped two cells at once - IF ( (window_shift_cells > ng - 1) .OR. ANY(print_arrays) ) THEN - window_shift_real = REAL(window_shift_cells, num) - window_offset = window_offset + window_shift_real * dx - nremainder = MOD(window_shift_cells, ng) - DO i = ng, window_shift_cells, ng ! CHECK IF THIS LOOP IS CALLED IF window_shift_cells < ng - CALL shift_window(ng) - END DO - IF (ANY(print_arrays)) then - CALL shift_window(nremainder) - nremainder = 0 - END IF - CALL setup_bc_lists - CALL particle_bcs - window_shift_fraction = window_shift_fraction - window_shift_real & - + REAL(nremainder, num) - END IF - END IF -#else - IF (rank == 0) THEN - WRITE(*,*) 'moving windows only available when using', & - ' per particle weighting' - END IF - CALL abort_code(c_err_pp_options_missing) -#endif - - END SUBROUTINE moving_window - END MODULE window diff --git a/epoch2d/src/housekeeping/window_sub.F90 b/epoch2d/src/housekeeping/window_sub.F90 new file mode 100644 index 000000000..d6cf8a331 --- /dev/null +++ b/epoch2d/src/housekeeping/window_sub.F90 @@ -0,0 +1,91 @@ +! Copyright (C) 2009-2019 University of Warwick +! Copyright (C) 2020 Juelich Supercomputing Center +! Forschungszentrum Juelich GmbH +! +! This program is free software: you can redistribute it and/or modify +! it under the terms of the GNU General Public License as published by +! the Free Software Foundation, either version 3 of the License, or +! (at your option) any later version. +! +! This program is distributed in the hope that it will be useful, +! but WITHOUT ANY WARRANTY; without even the implied warranty of +! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +! GNU General Public License for more details. +! +! You should have received a copy of the GNU General Public License +! along with this program. If not, see . + +SUBMODULE (window) window_sub + ! This SUBMODULE is used to circumvent the circular module dependency between + ! diagnostics and window (see comments therein) + USE diagnostics + +CONTAINS + + MODULE PROCEDURE moving_window + USE diagnostics + +#ifndef PER_SPECIES_WEIGHT + REAL(num) :: window_shift_real, window_shift_steps + INTEGER :: window_shift_cells, errcode = 0 + INTEGER :: i, nchunks, nremainder + logical :: print_arrays(1:SIZE(file_prefixes)) +#endif + + IF (.NOT. move_window) RETURN + +#ifndef PER_SPECIES_WEIGHT + IF (.NOT. window_started) THEN + IF (time >= window_start_time .AND. time < window_stop_time) THEN + bc_field(c_bd_x_min) = bc_x_min_after_move + bc_field(c_bd_x_max) = bc_x_max_after_move + bc_field(c_bd_y_min) = bc_y_min_after_move + bc_field(c_bd_y_max) = bc_y_max_after_move + CALL setup_boundaries + IF (.NOT.ic_from_restart) window_shift_fraction = 0.0_num + window_started = .TRUE. + END IF + END IF + + ! If we have a moving window then update the window position + IF (window_started) THEN + IF (time >= window_stop_time) RETURN + IF (use_window_stack) window_v_x = evaluate(window_v_x_stack, errcode) + IF (window_v_x <= 0.0_num) RETURN + window_shift_fraction = window_shift_fraction + dt * window_v_x / dx + window_shift_cells = FLOOR(window_shift_fraction) + ! CHECK FOR IO TAKING PLACE IN NEXT STEP... + print_arrays = .false. + DO i = 1, SIZE(file_prefixes) + CALL io_test(i, future_step, print_arrays(i), force_dump, prefix_first_call) + END DO + + ! Allow for posibility of having jumped two cells at once + IF ( (window_shift_cells > ng - 1) .OR. ANY(print_arrays) ) THEN + window_shift_real = REAL(window_shift_cells, num) + window_offset = window_offset + window_shift_real * dx + nremainder = MOD(window_shift_cells, ng) + DO i = ng, window_shift_cells, ng ! CHECK IF THIS LOOP IS CALLED IF window_shift_cells < ng + CALL shift_window(ng) + END DO + IF (ANY(print_arrays)) then + CALL shift_window(nremainder) + nremainder = 0 + END IF + CALL setup_bc_lists + CALL particle_bcs + window_shift_fraction = window_shift_fraction - window_shift_real & + + REAL(nremainder, num) + END IF + END IF +#else + IF (rank == 0) THEN + WRITE(*,*) 'moving windows only available when using', & + ' per particle weighting' + END IF + CALL abort_code(c_err_pp_options_missing) +#endif + + END PROCEDURE moving_window + +END SUBMODULE window_sub diff --git a/epoch2d/src/io/diagnostics.F90 b/epoch2d/src/io/diagnostics.F90 index 1c34a4873..38ab21781 100644 --- a/epoch2d/src/io/diagnostics.F90 +++ b/epoch2d/src/io/diagnostics.F90 @@ -40,6 +40,8 @@ MODULE diagnostics PUBLIC :: cleanup_stop_files, check_for_stop_condition PUBLIC :: deallocate_file_list, count_n_zeros PUBLIC :: build_persistent_subsets + ! Needed for window_sub only + PUBLIC :: io_test, prefix_first_call CHARACTER(LEN=*), PARAMETER :: stop_file = 'STOP' CHARACTER(LEN=*), PARAMETER :: stop_file_nodump = 'STOP_NODUMP' From 318bcf681838386d27dc7ecd537db3f2237acd4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dirk=20Br=C3=B6mmel?= Date: Tue, 4 Aug 2020 11:40:36 +0200 Subject: [PATCH 070/106] Simpler fix for circular dependencies This is much simpler and more in line with EPOCH's guidelines and other patterns. The single dependent variable has been moved to `shared_data`. --- epoch2d/Makefile | 13 ++-- epoch2d/src/housekeeping/setup.F90 | 1 - epoch2d/src/housekeeping/window.F90 | 86 ++++++++++++++++++----- epoch2d/src/housekeeping/window_sub.F90 | 91 ------------------------- epoch2d/src/io/diagnostics.F90 | 1 - epoch2d/src/shared_data.F90 | 1 + 6 files changed, 77 insertions(+), 116 deletions(-) delete mode 100644 epoch2d/src/housekeeping/window_sub.F90 diff --git a/epoch2d/Makefile b/epoch2d/Makefile index d247af76e..aff6637de 100644 --- a/epoch2d/Makefile +++ b/epoch2d/Makefile @@ -50,7 +50,7 @@ endif # Intel # ===== ifeq ($(strip $(COMPILER)),intel) - FFLAGS = -O3 -g #-stand f03 + FFLAGS = -O3 -g -stand f03 ifeq ($(strip $(CONS)),1) FLTCONS = -pc64 -fltconsistency endif @@ -69,7 +69,7 @@ endif # gfortran # ======== ifeq ($(strip $(COMPILER)),gfortran) - FFLAGS = -O3 -g #-std=f2003 + FFLAGS = -O3 -g -std=f2003 GNUVER := $(shell gfortran -dumpversion | head -1 \ | sed 's/[^0-9\.]*\([0-9\.]\+\).*/\1/') @@ -322,7 +322,7 @@ SRCFILES = antennae.f90 balance.F90 boundary.F90 bremsstrahlung.F90 \ redblack_module.f90 setup.F90 shape_functions.F90 shared_data.F90 shunt.F90 \ simple_io.F90 split_particle.F90 stack.f90 strings.f90 strings_advanced.f90 \ terminal_controls.F90 timer.f90 tokenizer_blocks.f90 utilities.f90 \ - version_data.F90 welcome.F90 window.F90 window_sub.F90 + version_data.F90 welcome.F90 window.F90 OBJFILES := $(SRCFILES:.f90=.o) OBJFILES := $(OBJFILES:.F90=.o) @@ -497,7 +497,7 @@ deck_window_block.o: deck_window_block.f90 strings_advanced.o deltaf_loader.o: deltaf_loader.F90 shared_data.o diagnostics.o: diagnostics.F90 antennae.o calc_df.o constants.o deck.o \ deck_io_block.o dist_fn.o evaluate.o iterators.o particle_id_hash.o probes.o \ - setup.o strings.o timer.o version_data.o window.o $(SDFMOD) $(ENCODED_SOURCE) + setup.o strings.o timer.o version_data.o $(SDFMOD) $(ENCODED_SOURCE) dist_fn.o: dist_fn.F90 mpi_subtype_control.o particles.o $(SDFMOD) epoch2d.o: epoch2d.F90 antennae.o balance.o bremsstrahlung.o calc_df.o \ collisions.o current_smooth.o deck.o diagnostics.o fields.o finish.o \ @@ -539,7 +539,7 @@ redblack_module.o: redblack_module.f90 partlist.o setup.o: setup.F90 antennae.o balance.o boundary.o constants.o fields.o \ helper.o injectors.o laser.o mpi_routines.o mpi_subtype_control.o \ particle_id_hash.o shunt.o split_particle.o timer.o version_data.o welcome.o \ - window.o $(SDFMOD) + $(SDFMOD) shape_functions.o: shape_functions.F90 constants.o shared_data.o: shared_data.F90 constants.o $(SDFMOD) shunt.o: shunt.F90 evaluator_blocks.o tokenizer_blocks.o utilities.o @@ -554,5 +554,4 @@ tokenizer_blocks.o: tokenizer_blocks.f90 strings.o utilities.o: utilities.f90 constants.o shared_data.o version_data.o: version_data.F90 $(SRCDIR)/COMMIT welcome.o: welcome.F90 shared_data.o terminal_controls.o version_data.o -window.o: window.F90 boundary.o evaluate.o partlist.o -window_sub.o: window_sub.F90 window.o diagnostics.o +window.o: window.F90 boundary.o diagnostics.o evaluate.o partlist.o diff --git a/epoch2d/src/housekeeping/setup.F90 b/epoch2d/src/housekeeping/setup.F90 index d81d4e126..dab70bb49 100644 --- a/epoch2d/src/housekeeping/setup.F90 +++ b/epoch2d/src/housekeeping/setup.F90 @@ -23,7 +23,6 @@ MODULE setup USE shunt USE laser USE injectors - USE window USE timer USE helper USE balance diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index 6d45cd79d..24cc4d11b 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -22,22 +22,6 @@ MODULE window IMPLICIT NONE REAL(num), ALLOCATABLE :: density(:), temperature(:,:), drift(:,:) - REAL(num), SAVE :: window_shift_fraction - - ! In order to check for steps that do I/O, we call 'io_test' from the - ! 'diagnostics' module. This would create a circular module dependency between - ! 'diagnostics' and 'window'. A submodule is currently used to solve the - ! circular dependency. This is not nice, but works. - ! Modules 'setup' and 'diagnostics' both require a single variable from this - ! module: 'window_shift_fraction'. If this variable were to be more 'global', - ! the submodule could be removed. An alternative is a separate module for only - ! this single variable that 'window', 'diagnostics' and 'setup' then use. - INTERFACE - MODULE SUBROUTINE moving_window(future_step, force_dump) - integer, intent(in) :: future_step - logical, intent(in) :: force_dump - END SUBROUTINE moving_window - END INTERFACE CONTAINS @@ -451,4 +435,74 @@ SUBROUTINE remove_particles END SUBROUTINE remove_particles #endif + + + SUBROUTINE moving_window(future_step, force_dump) + USE diagnostics + + integer, intent(in) :: future_step + logical, intent(in) :: force_dump +#ifndef PER_SPECIES_WEIGHT + REAL(num) :: window_shift_real, window_shift_steps + INTEGER :: window_shift_cells, errcode = 0 + INTEGER :: i, nchunks, nremainder + logical :: print_arrays(1:SIZE(file_prefixes)) +#endif + + IF (.NOT. move_window) RETURN + +#ifndef PER_SPECIES_WEIGHT + IF (.NOT. window_started) THEN + IF (time >= window_start_time .AND. time < window_stop_time) THEN + bc_field(c_bd_x_min) = bc_x_min_after_move + bc_field(c_bd_x_max) = bc_x_max_after_move + bc_field(c_bd_y_min) = bc_y_min_after_move + bc_field(c_bd_y_max) = bc_y_max_after_move + CALL setup_boundaries + IF (.NOT.ic_from_restart) window_shift_fraction = 0.0_num + window_started = .TRUE. + END IF + END IF + + ! If we have a moving window then update the window position + IF (window_started) THEN + IF (time >= window_stop_time) RETURN + IF (use_window_stack) window_v_x = evaluate(window_v_x_stack, errcode) + IF (window_v_x <= 0.0_num) RETURN + window_shift_fraction = window_shift_fraction + dt * window_v_x / dx + window_shift_cells = FLOOR(window_shift_fraction) + ! CHECK FOR IO TAKING PLACE IN NEXT STEP... + print_arrays = .false. + DO i = 1, SIZE(file_prefixes) + CALL io_test(i, future_step, print_arrays(i), force_dump, prefix_first_call) + END DO + + ! Allow for posibility of having jumped two cells at once + IF ( (window_shift_cells > ng - 1) .OR. ANY(print_arrays) ) THEN + window_shift_real = REAL(window_shift_cells, num) + window_offset = window_offset + window_shift_real * dx + nremainder = MOD(window_shift_cells, ng) + DO i = ng, window_shift_cells, ng ! CHECK IF THIS LOOP IS CALLED IF window_shift_cells < ng + CALL shift_window(ng) + END DO + IF (ANY(print_arrays)) then + CALL shift_window(nremainder) + nremainder = 0 + END IF + CALL setup_bc_lists + CALL particle_bcs + window_shift_fraction = window_shift_fraction - window_shift_real & + + REAL(nremainder, num) + END IF + END IF +#else + IF (rank == 0) THEN + WRITE(*,*) 'moving windows only available when using', & + ' per particle weighting' + END IF + CALL abort_code(c_err_pp_options_missing) +#endif + + END SUBROUTINE moving_window + END MODULE window diff --git a/epoch2d/src/housekeeping/window_sub.F90 b/epoch2d/src/housekeeping/window_sub.F90 deleted file mode 100644 index d6cf8a331..000000000 --- a/epoch2d/src/housekeeping/window_sub.F90 +++ /dev/null @@ -1,91 +0,0 @@ -! Copyright (C) 2009-2019 University of Warwick -! Copyright (C) 2020 Juelich Supercomputing Center -! Forschungszentrum Juelich GmbH -! -! This program is free software: you can redistribute it and/or modify -! it under the terms of the GNU General Public License as published by -! the Free Software Foundation, either version 3 of the License, or -! (at your option) any later version. -! -! This program is distributed in the hope that it will be useful, -! but WITHOUT ANY WARRANTY; without even the implied warranty of -! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -! GNU General Public License for more details. -! -! You should have received a copy of the GNU General Public License -! along with this program. If not, see . - -SUBMODULE (window) window_sub - ! This SUBMODULE is used to circumvent the circular module dependency between - ! diagnostics and window (see comments therein) - USE diagnostics - -CONTAINS - - MODULE PROCEDURE moving_window - USE diagnostics - -#ifndef PER_SPECIES_WEIGHT - REAL(num) :: window_shift_real, window_shift_steps - INTEGER :: window_shift_cells, errcode = 0 - INTEGER :: i, nchunks, nremainder - logical :: print_arrays(1:SIZE(file_prefixes)) -#endif - - IF (.NOT. move_window) RETURN - -#ifndef PER_SPECIES_WEIGHT - IF (.NOT. window_started) THEN - IF (time >= window_start_time .AND. time < window_stop_time) THEN - bc_field(c_bd_x_min) = bc_x_min_after_move - bc_field(c_bd_x_max) = bc_x_max_after_move - bc_field(c_bd_y_min) = bc_y_min_after_move - bc_field(c_bd_y_max) = bc_y_max_after_move - CALL setup_boundaries - IF (.NOT.ic_from_restart) window_shift_fraction = 0.0_num - window_started = .TRUE. - END IF - END IF - - ! If we have a moving window then update the window position - IF (window_started) THEN - IF (time >= window_stop_time) RETURN - IF (use_window_stack) window_v_x = evaluate(window_v_x_stack, errcode) - IF (window_v_x <= 0.0_num) RETURN - window_shift_fraction = window_shift_fraction + dt * window_v_x / dx - window_shift_cells = FLOOR(window_shift_fraction) - ! CHECK FOR IO TAKING PLACE IN NEXT STEP... - print_arrays = .false. - DO i = 1, SIZE(file_prefixes) - CALL io_test(i, future_step, print_arrays(i), force_dump, prefix_first_call) - END DO - - ! Allow for posibility of having jumped two cells at once - IF ( (window_shift_cells > ng - 1) .OR. ANY(print_arrays) ) THEN - window_shift_real = REAL(window_shift_cells, num) - window_offset = window_offset + window_shift_real * dx - nremainder = MOD(window_shift_cells, ng) - DO i = ng, window_shift_cells, ng ! CHECK IF THIS LOOP IS CALLED IF window_shift_cells < ng - CALL shift_window(ng) - END DO - IF (ANY(print_arrays)) then - CALL shift_window(nremainder) - nremainder = 0 - END IF - CALL setup_bc_lists - CALL particle_bcs - window_shift_fraction = window_shift_fraction - window_shift_real & - + REAL(nremainder, num) - END IF - END IF -#else - IF (rank == 0) THEN - WRITE(*,*) 'moving windows only available when using', & - ' per particle weighting' - END IF - CALL abort_code(c_err_pp_options_missing) -#endif - - END PROCEDURE moving_window - -END SUBMODULE window_sub diff --git a/epoch2d/src/io/diagnostics.F90 b/epoch2d/src/io/diagnostics.F90 index 38ab21781..bbbc90555 100644 --- a/epoch2d/src/io/diagnostics.F90 +++ b/epoch2d/src/io/diagnostics.F90 @@ -27,7 +27,6 @@ MODULE diagnostics USE setup USE deck_io_block USE strings - USE window USE timer USE antennae USE particle_id_hash_mod diff --git a/epoch2d/src/shared_data.F90 b/epoch2d/src/shared_data.F90 index 4e697672e..35eb2941c 100644 --- a/epoch2d/src/shared_data.F90 +++ b/epoch2d/src/shared_data.F90 @@ -572,6 +572,7 @@ MODULE shared_data INTEGER :: bc_y_min_after_move = c_bc_null INTEGER :: bc_y_max_after_move = c_bc_null REAL(num) :: window_offset + REAL(num) :: window_shift_fraction #ifdef PHOTONS !---------------------------------------------------------------------------- From ebdb08e2635e1caa022838d5ccb8c3879ee049ae Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Thu, 24 Sep 2020 17:40:31 +0200 Subject: [PATCH 071/106] Incorporated changes in moving window of epoch3d --- epoch3d/src/boundary.F90 | 38 ++++++ epoch3d/src/housekeeping/window.F90 | 180 ++++++++++++++++++++++------ 2 files changed, 183 insertions(+), 35 deletions(-) diff --git a/epoch3d/src/boundary.F90 b/epoch3d/src/boundary.F90 index 1696f4b96..c4ab8e631 100644 --- a/epoch3d/src/boundary.F90 +++ b/epoch3d/src/boundary.F90 @@ -467,7 +467,45 @@ SUBROUTINE do_field_mpi_with_lengths(field, ng, nx_local, ny_local, & END SUBROUTINE do_field_mpi_with_lengths + SUBROUTINE load_field_boundaries_to_buffer(field, buffer, & + xmin, xmax, ymin, ymax, zmin, zmax, offset) + INTEGER, INTENT(IN) :: xmin, xmax, ymin, ymax, zmin, zmax, offset + REAL(num), DIMENSION(1-ng:,1-ng:,1-ng:), INTENT(INOUT) :: field + REAL(num), DIMENSION(:), INTENT(INOUT) :: buffer + INTEGER :: i,j,k,n + + n = 1 + DO k = zmin, zmax + DO j = ymin, ymax + DO i = xmin, xmax + buffer(n+offset) = field(i,j,k) + n = n+1 + END DO + END DO + END DO + + END SUBROUTINE load_field_boundaries_to_buffer + + SUBROUTINE unload_field_boundaries_from_buffer(field, buffer, & + xmin, xmax, ymin, ymax, zmin, zmax, offset) + + INTEGER, INTENT(IN) :: xmin, xmax, ymin, ymax, zmin, zmax, offset + REAL(num), DIMENSION(1-ng:,1-ng:,1-ng:), INTENT(INOUT) :: field + REAL(num), DIMENSION(:), INTENT(INOUT) :: buffer + INTEGER :: i,j,k,n + + n = 1 + DO k = zmin, zmax + DO j = ymin, ymax + DO i = xmin, xmax + field(i,j,k) = buffer(n+offset) + n = n+1 + END DO + END DO + END DO + + END SUBROUTINE unload_field_boundaries_from_buffer SUBROUTINE do_field_mpi_with_lengths_r4(field, ng, nx_local, ny_local, & nz_local) diff --git a/epoch3d/src/housekeeping/window.F90 b/epoch3d/src/housekeeping/window.F90 index 57efa48d9..95ad654c5 100644 --- a/epoch3d/src/housekeeping/window.F90 +++ b/epoch3d/src/housekeeping/window.F90 @@ -82,50 +82,81 @@ SUBROUTINE shift_window(window_shift_cells) END DO x_grid_max = x_global(nx_global) x_max = xb_global(nx_global+1) - dx * cpml_thickness + END DO CALL setup_grid_x CALL remove_particles ! Shift fields around - CALL shift_fields - END DO + CALL shift_fields(window_shift_cells) END SUBROUTINE shift_window - SUBROUTINE shift_fields + SUBROUTINE shift_fields(window_shift_cells) INTEGER :: j, k + INTEGER, INTENT(IN) :: window_shift_cells + + CALL shift_field(ex, ng, window_shift_cells) + CALL shift_field(ey, ng, window_shift_cells) + CALL shift_field(ez, ng, window_shift_cells) + + CALL moving_window_field_bc(ex, ey, ez, ng, nx, ny, nz) - CALL shift_field(ex, ng) - CALL shift_field(ey, ng) - CALL shift_field(ez, ng) + CALL shift_field(bx, ng, window_shift_cells) + CALL shift_field(by, ng, window_shift_cells) + CALL shift_field(bz, ng, window_shift_cells) - CALL shift_field(bx, ng) - CALL shift_field(by, ng) - CALL shift_field(bz, ng) + CALL moving_window_field_bc(bx, by, bz, ng ,nx, ny, nz) - CALL shift_field(jx, jng) - CALL shift_field(jy, jng) - CALL shift_field(jz, jng) + CALL shift_field(jx, jng, window_shift_cells) + CALL shift_field(jy, jng, window_shift_cells) + CALL shift_field(jz, jng, window_shift_cells) + + CALL moving_window_field_bc(jx, jy, jz, ng, nx, ny, nz) IF (cpml_boundaries) THEN - CALL shift_field(cpml_psi_eyx, ng) - CALL shift_field(cpml_psi_ezx, ng) - CALL shift_field(cpml_psi_byx, ng) - CALL shift_field(cpml_psi_bzx, ng) - - CALL shift_field(cpml_psi_exy, ng) - CALL shift_field(cpml_psi_ezy, ng) - CALL shift_field(cpml_psi_bxy, ng) - CALL shift_field(cpml_psi_bzy, ng) - - CALL shift_field(cpml_psi_exz, ng) - CALL shift_field(cpml_psi_eyz, ng) - CALL shift_field(cpml_psi_bxz, ng) - CALL shift_field(cpml_psi_byz, ng) + CALL shift_field(cpml_psi_eyx, ng, window_shift_cells) + CALL shift_field(cpml_psi_ezx, ng, window_shift_cells) + CALL shift_field(cpml_psi_byx, ng, window_shift_cells) + + CALL moving_window_field_bc(cpml_psi_eyx, & + cpml_psi_ezx, cpml_psi_byx, & + ng, nx, ny, nz) + + + CALL shift_field(cpml_psi_bzx, ng, window_shift_cells) + + CALL field_bc(cpml_psi_bzx, ng) + + CALL shift_field(cpml_psi_exy, ng, window_shift_cells) + CALL shift_field(cpml_psi_ezy, ng, window_shift_cells) + CALL shift_field(cpml_psi_bxy, ng, window_shift_cells) + + CALL moving_window_field_bc(cpml_psi_exy, & + cpml_psi_ezy, cpml_psi_ezy, & + ng, nx, ny, nz) + + + CALL shift_field(cpml_psi_bzy, ng, window_shift_cells) + + CALL field_bc(cpml_psi_bzy, ng) + + + CALL shift_field(cpml_psi_exz, ng, window_shift_cells) + CALL shift_field(cpml_psi_eyz, ng, window_shift_cells) + CALL shift_field(cpml_psi_bxz, ng, window_shift_cells) + + CALL moving_window_field_bc(cpml_psi_exz, & + cpml_psi_eyz, cpml_psi_bxz, & + ng, nx, ny, nz) + + CALL shift_field(cpml_psi_byz, ng, window_shift_cells) + + CALL field_bc(cpml_psi_byz, ng) END IF IF (x_max_boundary) THEN @@ -174,25 +205,99 @@ END SUBROUTINE shift_fields - SUBROUTINE shift_field(field, ng) + SUBROUTINE shift_field(field, ng, window_shift_cells) - INTEGER, INTENT(IN) :: ng + INTEGER, INTENT(IN) :: ng, window_shift_cells REAL(num), DIMENSION(1-ng:,1-ng:,1-ng:), INTENT(INOUT) :: field INTEGER :: i, j, k - ! Shift field to the left by one cell + ! Shift field to the left by window_shift_cells cells DO k = 1-ng, nz+ng DO j = 1-ng, ny+ng - DO i = 1-ng, nx+ng-1 - field(i,j,k) = field(i+1,j,k) + DO i = 1-ng, nx+ng-window_shift_cells + field(i,j,k) = field(i+ window_shift_cells,j,k) END DO END DO END DO - CALL field_bc(field, ng) + !CALL field_bc(field, ng) END SUBROUTINE shift_field + SUBROUTINE moving_window_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & + ny_local, nz_local) + + INTEGER, INTENT(IN) :: ng + REAL(num), DIMENSION(1-ng:,1-ng:,1-ng:), INTENT(INOUT) :: fieldx, fieldy, fieldz + INTEGER, INTENT(IN) :: nx_local, ny_local, nz_local + INTEGER, DIMENSION(c_ndims) :: sizes, subsizes + INTEGER :: basetype, sz, szmax, i, j, k, n + REAL(num), ALLOCATABLE :: field(:) + REAL(num), ALLOCATABLE :: temp(:) + INTEGER :: xmin, xmax, ymin, ymax, zmin, zmax, offset0, offset1, offset2 + + basetype = mpireal + + sizes(1) = nx_local + 2 * ng + sizes(2) = ny_local + 2 * ng + sizes(3) = nz_local + 2 * ng + + szmax = 3 * sizes(1) * sizes(2) * ng + sz = 3 * sizes(1) * sizes(3) * ng + IF (sz > szmax) szmax = sz + sz = 3 * sizes(2) * sizes(3) * ng + IF (sz > szmax) szmax = sz + + ALLOCATE(temp(szmax)) + ALLOCATE(field(szmax)) + + subsizes(1) = ng + subsizes(2) = sizes(2) + subsizes(3) = sizes(3) + + sz = 3 * subsizes(1) * subsizes(2) * subsizes(3) + + offset0 = 0 + offset1 = subsizes(1) * subsizes(2) * subsizes(3) + offset2 = 2 * offset1 + + xmin = 1 + xmax = ng + ymin = 1-ng + ymax = subsizes(2)-ng + zmin = 1-ng + zmax = subsizes(3)-ng + + CALL load_field_boundaries_to_buffer(fieldx, field, & + xmin, xmax, ymin, ymax, zmin, zmax, offset0) + CALL load_field_boundaries_to_buffer(fieldy, field, & + xmin, xmax, ymin, ymax, zmin, zmax, offset1) + CALL load_field_boundaries_to_buffer(fieldz, field, & + xmin, xmax, ymin, ymax, zmin, zmax, offset2) + + CALL MPI_SENDRECV(field, sz, basetype, proc_x_min, & + tag, temp, sz, basetype, proc_x_max, tag, comm, status, errcode) + + xmin = nx_local + 1 + xmax = subsizes(1) + nx_local + + IF (.NOT. x_max_boundary .OR. bc_field(c_bd_x_max)==c_bc_periodic) THEN + + CALL unload_field_boundaries_from_buffer(fieldx, temp, & + xmin, xmax, ymin, ymax, zmin, zmax, offset0) + CALL unload_field_boundaries_from_buffer(fieldy, temp, & + xmin, xmax, ymin, ymax, zmin, zmax, offset1) + CALL unload_field_boundaries_from_buffer(fieldz, temp, & + xmin, xmax, ymin, ymax, zmin, zmax, offset2) + + END IF + + + DEALLOCATE(field) + DEALLOCATE(temp) + + END SUBROUTINE moving_window_field_bc + SUBROUTINE insert_particles @@ -390,6 +495,7 @@ SUBROUTINE moving_window #ifndef PER_SPECIES_WEIGHT REAL(num) :: window_shift_real INTEGER :: window_shift_cells, errcode = 0 + INTEGER :: i, nremainder #endif IF (.NOT. move_window) RETURN @@ -417,13 +523,17 @@ SUBROUTINE moving_window window_shift_fraction = window_shift_fraction + dt * window_v_x / dx window_shift_cells = FLOOR(window_shift_fraction) ! Allow for posibility of having jumped two cells at once - IF (window_shift_cells > 0) THEN + IF (window_shift_cells > ng - 1) THEN window_shift_real = REAL(window_shift_cells, num) window_offset = window_offset + window_shift_real * dx - CALL shift_window(window_shift_cells) + nremainder = MOD(window_shift_cells, ng) + DO i = ng, window_shift_cells, ng + CALL shift_window(ng) + END DO CALL setup_bc_lists CALL particle_bcs - window_shift_fraction = window_shift_fraction - window_shift_real + window_shift_fraction = window_shift_fraction - window_shift_real & + + REAL(nremainder, num) END IF END IF #else From 617291ecdd3e124fac089a3068e92bc401c8fc5c Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Mon, 12 Oct 2020 06:13:08 +0200 Subject: [PATCH 072/106] back to non-aligned I/O for moving window --- epoch2d/src/epoch2d.F90 | 4 ++-- epoch2d/src/housekeeping/window.F90 | 16 ++-------------- 2 files changed, 4 insertions(+), 16 deletions(-) diff --git a/epoch2d/src/epoch2d.F90 b/epoch2d/src/epoch2d.F90 index a462bff9a..6ccbcb60b 100644 --- a/epoch2d/src/epoch2d.F90 +++ b/epoch2d/src/epoch2d.F90 @@ -163,7 +163,7 @@ PROGRAM pic ELSE time = time + dt / 2.0_num CALL update_eb_fields_final - CALL moving_window(step + 1, force_dump) + CALL moving_window END IF ELSE dt_store = dt @@ -273,7 +273,7 @@ PROGRAM pic CALL update_eb_fields_final - CALL moving_window(step + 1, force_dump) + CALL moving_window #ifdef SCOREP_USER_ENABLE SCOREP_USER_REGION_END(main_loop) diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index 24cc4d11b..4d6724a1e 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -437,16 +437,13 @@ END SUBROUTINE remove_particles - SUBROUTINE moving_window(future_step, force_dump) + SUBROUTINE moving_window USE diagnostics - integer, intent(in) :: future_step - logical, intent(in) :: force_dump #ifndef PER_SPECIES_WEIGHT REAL(num) :: window_shift_real, window_shift_steps INTEGER :: window_shift_cells, errcode = 0 INTEGER :: i, nchunks, nremainder - logical :: print_arrays(1:SIZE(file_prefixes)) #endif IF (.NOT. move_window) RETURN @@ -471,24 +468,15 @@ SUBROUTINE moving_window(future_step, force_dump) IF (window_v_x <= 0.0_num) RETURN window_shift_fraction = window_shift_fraction + dt * window_v_x / dx window_shift_cells = FLOOR(window_shift_fraction) - ! CHECK FOR IO TAKING PLACE IN NEXT STEP... - print_arrays = .false. - DO i = 1, SIZE(file_prefixes) - CALL io_test(i, future_step, print_arrays(i), force_dump, prefix_first_call) - END DO ! Allow for posibility of having jumped two cells at once - IF ( (window_shift_cells > ng - 1) .OR. ANY(print_arrays) ) THEN + IF (window_shift_cells > ng - 1) THEN window_shift_real = REAL(window_shift_cells, num) window_offset = window_offset + window_shift_real * dx nremainder = MOD(window_shift_cells, ng) DO i = ng, window_shift_cells, ng ! CHECK IF THIS LOOP IS CALLED IF window_shift_cells < ng CALL shift_window(ng) END DO - IF (ANY(print_arrays)) then - CALL shift_window(nremainder) - nremainder = 0 - END IF CALL setup_bc_lists CALL particle_bcs window_shift_fraction = window_shift_fraction - window_shift_real & From f4503cd614606422724aed5079d6ef9aa6825f17 Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Wed, 7 Oct 2020 12:49:15 +0000 Subject: [PATCH 073/106] Update README.md --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.md b/README.md index caa1afdfb..5c8a28922 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,11 @@ +## EPOCH development + +This version of EPOCH is being developed under the project PiCKeX- Particle Kinetic codes for Exascale plasma simulation which is a part of Work Package 8 (WP8) of PRACE-6IP. The main production version of EPOCH code is maintained by the University of Warwick. The repository has been cloned on the Jülich Supercomputing Centre (JSC) GitLab server. This development version has the full functionality of the main EPOCH branch, but includes verified refactoring measures. + +Current developments include improving the efficiency of the moving window book-keeping, reducing the number MPI calls for the field boundary conditions and the particle boundary conditions. + +NB: This is not the official repository for the EPOCH code. + # *** PLEASE READ THIS NOTE *** If you are obtaining this code from the github server *DO NOT* use the From 164db7ecd5f6d6a563190bbaf5f985b4ec8c5d47 Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Wed, 7 Oct 2020 12:53:51 +0000 Subject: [PATCH 074/106] Update README.md --- README.md | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 5c8a28922..187cba18d 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,19 @@ ## EPOCH development -This version of EPOCH is being developed under the project PiCKeX- Particle Kinetic codes for Exascale plasma simulation which is a part of Work Package 8 (WP8) of PRACE-6IP. The main production version of EPOCH code is maintained by the University of Warwick. The repository has been cloned on the Jülich Supercomputing Centre (JSC) GitLab server. This development version has the full functionality of the main EPOCH branch, but includes verified refactoring measures. +**This is not the official repository for the EPOCH code.** + +This version of EPOCH is being developed within the project PiCKeX- +Particle Kinetic codes for Exascale plasma simulation, which is a +part of Work Package 8 (WP8) of PRACE-6IP. This repository has been +cloned to the Jülich Supercomputing Centre (JSC) GitLab server to +perform and publish this work. The main production version of EPOCH is +maintained by the University of Warwick. The development version found +here might not have the full functionality of the production version of +EPOCH, but includes a performance improved, refactored branch that will +be merged with the official version by the end of the project. Current developments include improving the efficiency of the moving window book-keeping, reducing the number MPI calls for the field boundary conditions and the particle boundary conditions. -NB: This is not the official repository for the EPOCH code. - # *** PLEASE READ THIS NOTE *** If you are obtaining this code from the github server *DO NOT* use the From 854efee3aef5e6f8f496c27cd5cd344dca1c1a5b Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Tue, 13 Oct 2020 07:16:45 +0200 Subject: [PATCH 075/106] remove_particles not needed in moving_window --- epoch2d/src/housekeeping/window.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index 4d6724a1e..b4ab8b926 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -83,7 +83,7 @@ SUBROUTINE shift_window(window_shift_cells) CALL setup_grid_x - CALL remove_particles + !CALL remove_particles ! Shift fields around CALL shift_fields(window_shift_cells) From 64efe449b8374ca8460098c730a6d1bcaa1018fe Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Thu, 22 Oct 2020 08:04:10 +0200 Subject: [PATCH 076/106] Commented remove_particles_() moving window for epoch3d --- epoch3d/src/housekeeping/window.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/epoch3d/src/housekeeping/window.F90 b/epoch3d/src/housekeeping/window.F90 index 95ad654c5..e75bf7f17 100644 --- a/epoch3d/src/housekeeping/window.F90 +++ b/epoch3d/src/housekeeping/window.F90 @@ -86,7 +86,7 @@ SUBROUTINE shift_window(window_shift_cells) CALL setup_grid_x - CALL remove_particles + !CALL remove_particles ! Shift fields around CALL shift_fields(window_shift_cells) From 8ef744c49385c11ea887b28a0c0a6d4b79aa7ef8 Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Thu, 29 Oct 2020 18:59:10 +0100 Subject: [PATCH 077/106] Aligning moving window with I/O --- epoch2d/src/epoch2d.F90 | 7 +++--- epoch2d/src/housekeeping/window.F90 | 34 ++++++++++++++++++++++++++++- 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/epoch2d/src/epoch2d.F90 b/epoch2d/src/epoch2d.F90 index 6ccbcb60b..8bb529fcd 100644 --- a/epoch2d/src/epoch2d.F90 +++ b/epoch2d/src/epoch2d.F90 @@ -163,7 +163,7 @@ PROGRAM pic ELSE time = time + dt / 2.0_num CALL update_eb_fields_final - CALL moving_window + CALL moving_window(step) END IF ELSE dt_store = dt @@ -268,12 +268,13 @@ PROGRAM pic IF ((step >= nsteps .AND. nsteps >= 0) & .OR. (time >= t_end) .OR. halt) EXIT - CALL output_routines(step) + !CALL output_routines(step) time = time + dt / 2.0_num CALL update_eb_fields_final - CALL moving_window + CALL moving_window(step) + CALL output_routines(step) #ifdef SCOREP_USER_ENABLE SCOREP_USER_REGION_END(main_loop) diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index b4ab8b926..866ae771d 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -437,13 +437,19 @@ END SUBROUTINE remove_particles - SUBROUTINE moving_window + SUBROUTINE moving_window(step, force_write) USE diagnostics #ifndef PER_SPECIES_WEIGHT REAL(num) :: window_shift_real, window_shift_steps INTEGER :: window_shift_cells, errcode = 0 INTEGER :: i, nchunks, nremainder + INTEGER, INTENT(INOUT) :: step + LOGICAL, INTENT(IN), OPTIONAL :: force_write + INTEGER, SAVE :: nstep_prev = -1 + INTEGER, SAVE :: last_step = -1 + LOGICAL :: force, writeout, print_arrays + INTEGER :: iprefix #endif IF (.NOT. move_window) RETURN @@ -469,6 +475,25 @@ SUBROUTINE moving_window window_shift_fraction = window_shift_fraction + dt * window_v_x / dx window_shift_cells = FLOOR(window_shift_fraction) + ! Check if an I/O is performed + writeout = .FALSE. + force = .FALSE. + IF (PRESENT(force_write)) force = force_write + + WRITE(*,'("Checkpoint 1")') + + IF (step == nstep_prev .AND. .NOT.force) THEN + writeout = .FALSE. + ELSE + DO iprefix = 1,SIZE(file_prefixes) + WRITE(*,'("Checkpoint 2")') + CALL io_test(iprefix, step, print_arrays, force, prefix_first_call) + IF (.NOT.print_arrays) CYCLE + writeout = .TRUE. + WRITE(*,'("Checkpoint 3")') + END DO + END IF + ! Allow for posibility of having jumped two cells at once IF (window_shift_cells > ng - 1) THEN window_shift_real = REAL(window_shift_cells, num) @@ -477,6 +502,13 @@ SUBROUTINE moving_window DO i = ng, window_shift_cells, ng ! CHECK IF THIS LOOP IS CALLED IF window_shift_cells < ng CALL shift_window(ng) END DO + WRITE(*,'("Checkpoint 4")') + IF (writeout) THEN + WRITE(*,'("Checkpoint 5")') + CALL shift_window(nremainder) + nremainder = 0 + END IF + CALL setup_bc_lists CALL particle_bcs window_shift_fraction = window_shift_fraction - window_shift_real & From e93053fb55adeae3522ad59475ad446b1b9bf7da Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Fri, 30 Oct 2020 08:01:08 +0100 Subject: [PATCH 078/106] Aligning moving window with I/O and checkpoints --- epoch2d/src/epoch2d.F90 | 4 +- epoch2d/src/housekeeping/window.F90 | 80 +++++++++++++++++++---------- 2 files changed, 56 insertions(+), 28 deletions(-) diff --git a/epoch2d/src/epoch2d.F90 b/epoch2d/src/epoch2d.F90 index 8bb529fcd..83bdaa57d 100644 --- a/epoch2d/src/epoch2d.F90 +++ b/epoch2d/src/epoch2d.F90 @@ -268,13 +268,13 @@ PROGRAM pic IF ((step >= nsteps .AND. nsteps >= 0) & .OR. (time >= t_end) .OR. halt) EXIT - !CALL output_routines(step) + CALL output_routines(step) time = time + dt / 2.0_num CALL update_eb_fields_final CALL moving_window(step) - CALL output_routines(step) + !CALL output_routines(step) #ifdef SCOREP_USER_ENABLE SCOREP_USER_REGION_END(main_loop) diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index 866ae771d..fd59dd3b1 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -437,14 +437,14 @@ END SUBROUTINE remove_particles - SUBROUTINE moving_window(step, force_write) + SUBROUTINE moving_window(future_step, force_write) USE diagnostics #ifndef PER_SPECIES_WEIGHT REAL(num) :: window_shift_real, window_shift_steps INTEGER :: window_shift_cells, errcode = 0 INTEGER :: i, nchunks, nremainder - INTEGER, INTENT(INOUT) :: step + INTEGER, INTENT(IN) :: future_step LOGICAL, INTENT(IN), OPTIONAL :: force_write INTEGER, SAVE :: nstep_prev = -1 INTEGER, SAVE :: last_step = -1 @@ -475,40 +475,68 @@ SUBROUTINE moving_window(step, force_write) window_shift_fraction = window_shift_fraction + dt * window_v_x / dx window_shift_cells = FLOOR(window_shift_fraction) - ! Check if an I/O is performed - writeout = .FALSE. - force = .FALSE. - IF (PRESENT(force_write)) force = force_write - - WRITE(*,'("Checkpoint 1")') - - IF (step == nstep_prev .AND. .NOT.force) THEN - writeout = .FALSE. - ELSE - DO iprefix = 1,SIZE(file_prefixes) - WRITE(*,'("Checkpoint 2")') - CALL io_test(iprefix, step, print_arrays, force, prefix_first_call) - IF (.NOT.print_arrays) CYCLE - writeout = .TRUE. - WRITE(*,'("Checkpoint 3")') - END DO - END IF + ! Check if an I/O is performed + writeout = .FALSE. + force = .FALSE. + IF (PRESENT(force_write)) force = force_write + + IF (rank == 0) THEN + WRITE(*,'("Initial mw check")') + END IF + + IF (future_step == nstep_prev .AND. .NOT.force) THEN + writeout = .FALSE. + ELSE + DO iprefix = 1,SIZE(file_prefixes) + IF (rank == 0) THEN + WRITE(*,'("Checking I/O")') + END IF + CALL io_test(iprefix, future_step, print_arrays, force, prefix_first_call) + IF (.NOT.print_arrays) CYCLE + writeout = .TRUE. + END DO + END IF + + IF(writeout) THEN + IF (rank == 0) THEN + WRITE(*,'("Alignment required")') + END IF + END IF + + nstep_prev = future_step + +! IF (window_shift_cells > 0 .AND. window_shift_cells < ng .AND. writeout) THEN +! +! window_shift_real = REAL(window_shift_cells, num) +! window_offset = window_offset + window_shift_real * dx +! nremainder = MOD(window_shift_cells, ng) +! CALL shift_window(nremainder) +! nremainder = 0 +! CALL setup_bc_lists +! CALL particle_bcs +! window_shift_fraction = window_shift_fraction - window_shift_real & +! + REAL(nremainder, num) +! IF (rank == 0) THEN +! WRITE(*,'("Performing alignment")') +! END IF +! END IF + ! Allow for posibility of having jumped two cells at once IF (window_shift_cells > ng - 1) THEN + window_shift_real = REAL(window_shift_cells, num) window_offset = window_offset + window_shift_real * dx nremainder = MOD(window_shift_cells, ng) DO i = ng, window_shift_cells, ng ! CHECK IF THIS LOOP IS CALLED IF window_shift_cells < ng CALL shift_window(ng) + IF (rank == 0) THEN + WRITE(*, '("Chunk shift of window")') + END IF END DO - WRITE(*,'("Checkpoint 4")') - IF (writeout) THEN - WRITE(*,'("Checkpoint 5")') - CALL shift_window(nremainder) - nremainder = 0 + IF (rank == 0 .AND. writeout) THEN + WRITE(*,'("Auto aligned output")') END IF - CALL setup_bc_lists CALL particle_bcs window_shift_fraction = window_shift_fraction - window_shift_real & From 72d9d996b21bd5dd7177b5c3b7030f9343945de3 Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Mon, 9 Nov 2020 13:30:25 +0100 Subject: [PATCH 079/106] Aligned moving window I/O with checkpoints --- epoch2d/src/deck/deck_io_block.F90 | 2 + epoch2d/src/housekeeping/setup.F90 | 4 + epoch2d/src/housekeeping/window.F90 | 217 ++++++++++++++++++++++++---- epoch2d/src/shared_data.F90 | 4 +- 4 files changed, 196 insertions(+), 31 deletions(-) diff --git a/epoch2d/src/deck/deck_io_block.F90 b/epoch2d/src/deck/deck_io_block.F90 index 2f354e3ca..0db4b5e49 100644 --- a/epoch2d/src/deck/deck_io_block.F90 +++ b/epoch2d/src/deck/deck_io_block.F90 @@ -1010,6 +1010,7 @@ SUBROUTINE init_io_block(io_block) io_block%name = '' io_block%dt_snapshot = -1.0_num io_block%time_prev = 0.0_num + io_block%buffer_time_prev = 0.0_num io_block%time_first = 0.0_num io_block%dt_average = -1.0_num io_block%dt_min_average = -1.0_num @@ -1017,6 +1018,7 @@ SUBROUTINE init_io_block(io_block) io_block%average_time_start = -1.0_num io_block%nstep_snapshot = -1 io_block%nstep_prev = 0 + io_block%buffer_nstep_prev = 0 io_block%nstep_first = 0 io_block%nstep_average = -1 io_block%restart = .FALSE. diff --git a/epoch2d/src/housekeeping/setup.F90 b/epoch2d/src/housekeeping/setup.F90 index dab70bb49..fcb13608a 100644 --- a/epoch2d/src/housekeeping/setup.F90 +++ b/epoch2d/src/housekeeping/setup.F90 @@ -989,13 +989,17 @@ SUBROUTINE restart_data(step) END IF IF (io_block_list(i)%dt_snapshot > 0.0_num) THEN io_block_list(i)%time_prev = time + io_block_list(i)%buffer_time_prev = time ELSE io_block_list(i)%time_prev = 0.0_num + io_block_list(i)%buffer_time_prev = 0.0_num END IF IF (io_block_list(i)%nstep_snapshot > 0) THEN io_block_list(i)%nstep_prev = step + io_block_list(i)%buffer_nstep_prev = step ELSE io_block_list(i)%nstep_prev = 0 + io_block_list(i)%buffer_nstep_prev = 0 END IF io_block_list(i)%walltime_prev = time IF (ALLOCATED(io_block_list(i)%dump_at_nsteps)) THEN diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index fd59dd3b1..a5b1deecc 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -437,19 +437,23 @@ END SUBROUTINE remove_particles - SUBROUTINE moving_window(future_step, force_write) + SUBROUTINE moving_window(step, force_write) USE diagnostics + USE deck_io_block #ifndef PER_SPECIES_WEIGHT REAL(num) :: window_shift_real, window_shift_steps INTEGER :: window_shift_cells, errcode = 0 INTEGER :: i, nchunks, nremainder - INTEGER, INTENT(IN) :: future_step + INTEGER, INTENT(IN) :: step + INTEGER :: future_step LOGICAL, INTENT(IN), OPTIONAL :: force_write INTEGER, SAVE :: nstep_prev = -1 INTEGER, SAVE :: last_step = -1 - LOGICAL :: force, writeout, print_arrays + LOGICAL :: force, writeout, print_arrays, dump INTEGER :: iprefix + INTEGER :: io, is, nstep_next = 0 + REAL(num) :: time0, time1, time_first #endif IF (.NOT. move_window) RETURN @@ -478,49 +482,204 @@ SUBROUTINE moving_window(future_step, force_write) ! Check if an I/O is performed writeout = .FALSE. force = .FALSE. + future_step = step + 1 IF (PRESENT(force_write)) force = force_write IF (rank == 0) THEN WRITE(*,'("Initial mw check")') END IF - IF (future_step == nstep_prev .AND. .NOT.force) THEN - writeout = .FALSE. - ELSE - DO iprefix = 1,SIZE(file_prefixes) - IF (rank == 0) THEN - WRITE(*,'("Checking I/O")') + ! Work out the time that the next dump will occur based on the + ! current timestep +! DO io = 1, n_io_blocks +! +! time0 = HUGE(1.0_num) +! time1 = HUGE(1.0_num) +! IF (io_block_list(io)%dt_snapshot >= 0.0_num) & +! time0 = io_block_list(io)%time_prev + io_block_list(io)%dt_snapshot +! IF (io_block_list(io)%nstep_snapshot >= 0) THEN +! nstep_next = io_block_list(io)%nstep_prev & +! + io_block_list(io)%nstep_snapshot +! time1 = time + dt * (nstep_next - (step + 1)) +! END IF +! +! IF (time0 < time1) THEN +! ! Next I/O dump based on dt_snapshot +! time_first = time0 +! IF (io_block_list(io)%dt_snapshot > 0 .AND. time >= time0) THEN +! ! Store the most recent output time that qualifies +! writeout = .TRUE. +! END IF +! ELSE +! ! Next I/O dump based on nstep_snapshot +! time_first = time1 +! IF (io_block_list(io)%nstep_snapshot > 0 & +! .AND. (step + 1 ) >= nstep_next) THEN +! ! Store the most recent output step that qualifies +! writeout = .TRUE. +! END IF +! END IF +! +! END DO + + DO io = 1, n_io_blocks + io_block_list(io)%dump = .FALSE. + + + time0 = io_block_list(io)%walltime_interval + IF (time0 > 0.0_num) THEN + IF (elapsed_time - io_block_list(io)%walltime_prev >= time0) THEN + io_block_list(io)%dump = .TRUE. + io_block_list(io)%walltime_prev = elapsed_time + END IF END IF - CALL io_test(iprefix, future_step, print_arrays, force, prefix_first_call) - IF (.NOT.print_arrays) CYCLE - writeout = .TRUE. - END DO + + IF (ALLOCATED(io_block_list(io)%dump_at_nsteps)) THEN + DO is = 1, SIZE(io_block_list(io)%dump_at_nsteps) + IF ((step + 1)>= io_block_list(io)%dump_at_nsteps(is)) THEN + io_block_list(io)%dump = .TRUE. + io_block_list(io)%dump_at_nsteps(is) = HUGE(1) + END IF + END DO END IF - IF(writeout) THEN + IF (ALLOCATED(io_block_list(io)%dump_at_times)) THEN + DO is = 1, SIZE(io_block_list(io)%dump_at_times) + IF (time >= io_block_list(io)%dump_at_times(is)) THEN + io_block_list(io)%dump = .TRUE. + io_block_list(io)%dump_at_times(is) = HUGE(1.0_num) + END IF + END DO + END IF + + IF (ALLOCATED(io_block_list(io)%dump_at_walltimes)) THEN + DO is = 1, SIZE(io_block_list(io)%dump_at_walltimes) + IF (elapsed_time >= io_block_list(io)%dump_at_walltimes(is)) THEN + io_block_list(io)%dump = .TRUE. + io_block_list(io)%dump_at_walltimes(is) = HUGE(1.0_num) + END IF + END DO + END IF + + ! Work out the time that the next dump will occur based on the + ! current timestep + time0 = HUGE(1.0_num) + time1 = HUGE(1.0_num) + IF (io_block_list(io)%dt_snapshot >= 0.0_num) & + time0 = io_block_list(io)%buffer_time_prev + io_block_list(io)%dt_snapshot + IF (io_block_list(io)%nstep_snapshot >= 0) THEN + nstep_next = io_block_list(io)%buffer_nstep_prev + 1 & + + io_block_list(io)%nstep_snapshot + time1 = time + dt * (nstep_next - step) + END IF + + IF (time0 < time1) THEN + ! Next I/O dump based on dt_snapshot + time_first = time0 IF (rank == 0) THEN - WRITE(*,'("Alignment required")') + print *, "time = ", time, "and time0 = ", time0 + END IF + IF (io_block_list(io)%dt_snapshot > 0 .AND. time >= time0) THEN + ! Store the most recent output time that qualifies + DO + time0 = io_block_list(io)%buffer_time_prev + io_block_list(io)%dt_snapshot + IF (time0 > time) EXIT + io_block_list(io)%buffer_time_prev = time0 + END DO + IF (rank == 0) THEN + print *, "dt_snapshot condition for aligned output true" + END IF + dump = .TRUE. + IF (dump .AND. time < io_block_list(io)%time_start) dump = .FALSE. + IF (dump .AND. time > io_block_list(io)%time_stop) dump = .FALSE. + IF (dump .AND. (step) < io_block_list(io)%nstep_start) dump = .FALSE. + IF (dump .AND. (step) > io_block_list(io)%nstep_stop) dump = .FALSE. + IF (dump .AND. time < time_start) dump = .FALSE. + IF (dump .AND. time > time_stop) dump = .FALSE. + IF (dump .AND. (step) < nstep_start) dump = .FALSE. + IF (dump .AND. (step) > nstep_stop) dump = .FALSE. + IF (dump) writeout = .TRUE. +! IF (rank == 0) THEN +! print *, "dt_snapshot condition for aligned output true" +! END IF + END IF + ELSE + ! Next I/O dump based on nstep_snapshot + time_first = time1 + IF (io_block_list(io)%nstep_snapshot > 0 & + .AND. (step) >= nstep_next) THEN + ! Store the most recent output step that qualifies + DO + nstep_next = io_block_list(io)%buffer_nstep_prev & + + io_block_list(io)%nstep_snapshot + IF (nstep_next > step) EXIT + io_block_list(io)%buffer_nstep_prev = nstep_next + END DO + dump = .TRUE. + IF (dump .AND. time < io_block_list(io)%time_start) dump = .FALSE. + IF (dump .AND. time > io_block_list(io)%time_stop) dump = .FALSE. + IF (dump .AND. (step) < io_block_list(io)%nstep_start) dump = .FALSE. + IF (dump .AND. (step) > io_block_list(io)%nstep_stop) dump = .FALSE. + IF (dump .AND. time < time_start) dump = .FALSE. + IF (dump .AND. time > time_stop) dump = .FALSE. + IF (dump .AND. (step) < nstep_start) dump = .FALSE. + IF (dump .AND. (step) > nstep_stop) dump = .FALSE. + IF (dump) writeout = .TRUE. + IF (rank == 0) THEN + print *, "nstep_snapshot condition for aligned output true" + END IF END IF END IF - nstep_prev = future_step + END DO -! IF (window_shift_cells > 0 .AND. window_shift_cells < ng .AND. writeout) THEN -! -! window_shift_real = REAL(window_shift_cells, num) -! window_offset = window_offset + window_shift_real * dx -! nremainder = MOD(window_shift_cells, ng) -! CALL shift_window(nremainder) -! nremainder = 0 -! CALL setup_bc_lists -! CALL particle_bcs -! window_shift_fraction = window_shift_fraction - window_shift_real & -! + REAL(nremainder, num) -! IF (rank == 0) THEN -! WRITE(*,'("Performing alignment")') + + + +! DO io = 1, n_io_blocks +! IF(io_block_list(io)%dump) THEN +! writeout = .TRUE. ! END IF +! END DO + + +! IF (future_step == nstep_prev .AND. .NOT.force) THEN +! writeout = .FALSE. +! ELSE +! DO iprefix = 1,SIZE(file_prefixes) +! IF (rank == 0) THEN +! WRITE(*,'("Checking I/O")') +! END IF +! CALL io_test(iprefix, future_step, print_arrays, force, prefix_first_call) +! IF (.NOT.print_arrays) CYCLE +! writeout = .TRUE. +! END DO ! END IF + IF(writeout) THEN + IF (rank == 0) THEN + WRITE(*,'("Alignment required")') + END IF + END IF + +! nstep_prev = future_step + + IF (window_shift_cells > 0 .AND. window_shift_cells < ng .AND. writeout) THEN + + window_shift_real = REAL(window_shift_cells, num) + window_offset = window_offset + window_shift_real * dx + nremainder = MOD(window_shift_cells, ng) + CALL shift_window(nremainder) + nremainder = 0 + CALL setup_bc_lists + CALL particle_bcs + window_shift_fraction = window_shift_fraction - window_shift_real & + + REAL(nremainder, num) + IF (rank == 0) THEN + WRITE(*,'("Performing alignment")') + END IF + END IF + ! Allow for posibility of having jumped two cells at once IF (window_shift_cells > ng - 1) THEN diff --git a/epoch2d/src/shared_data.F90 b/epoch2d/src/shared_data.F90 index 35eb2941c..4ffc1a208 100644 --- a/epoch2d/src/shared_data.F90 +++ b/epoch2d/src/shared_data.F90 @@ -308,7 +308,7 @@ MODULE shared_data TYPE io_block_type CHARACTER(LEN=string_length) :: name - REAL(num) :: dt_snapshot, time_prev, time_first + REAL(num) :: dt_snapshot, time_prev, time_first, buffer_time_prev REAL(num) :: dt_average, dt_min_average, average_time, average_time_start REAL(num) :: time_start, time_stop REAL(num) :: walltime_interval, walltime_prev @@ -316,7 +316,7 @@ MODULE shared_data REAL(num), ALLOCATABLE :: dump_at_times(:) REAL(num), ALLOCATABLE :: dump_at_walltimes(:) INTEGER, ALLOCATABLE :: dump_at_nsteps(:) - INTEGER :: nstep_snapshot, nstep_prev, nstep_first, nstep_average + INTEGER :: nstep_snapshot, nstep_prev, nstep_first, nstep_average, buffer_nstep_prev INTEGER :: nstep_start, nstep_stop, dump_cycle, prefix_index INTEGER :: dump_cycle_first_index LOGICAL :: restart, dump, any_average, dump_first, dump_last From a95fabcf0f09ea5d6e37588f6d8769bf67ff1dd6 Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Mon, 9 Nov 2020 14:20:56 +0100 Subject: [PATCH 080/106] Bugfix for aligned I/O with checkpoints --- epoch2d/src/housekeeping/window.F90 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index a5b1deecc..16f2acfee 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -568,7 +568,7 @@ SUBROUTINE moving_window(step, force_write) IF (io_block_list(io)%dt_snapshot >= 0.0_num) & time0 = io_block_list(io)%buffer_time_prev + io_block_list(io)%dt_snapshot IF (io_block_list(io)%nstep_snapshot >= 0) THEN - nstep_next = io_block_list(io)%buffer_nstep_prev + 1 & + nstep_next = io_block_list(io)%buffer_nstep_prev & + io_block_list(io)%nstep_snapshot time1 = time + dt * (nstep_next - step) END IF @@ -579,7 +579,7 @@ SUBROUTINE moving_window(step, force_write) IF (rank == 0) THEN print *, "time = ", time, "and time0 = ", time0 END IF - IF (io_block_list(io)%dt_snapshot > 0 .AND. time >= time0) THEN + IF (io_block_list(io)%dt_snapshot > 0 .AND. (time + dt) >= time0) THEN ! Store the most recent output time that qualifies DO time0 = io_block_list(io)%buffer_time_prev + io_block_list(io)%dt_snapshot @@ -607,7 +607,7 @@ SUBROUTINE moving_window(step, force_write) ! Next I/O dump based on nstep_snapshot time_first = time1 IF (io_block_list(io)%nstep_snapshot > 0 & - .AND. (step) >= nstep_next) THEN + .AND. (step + 1) >= nstep_next) THEN ! Store the most recent output step that qualifies DO nstep_next = io_block_list(io)%buffer_nstep_prev & From 07c7dd3842d057fd17fab816248ea555f497510a Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Mon, 9 Nov 2020 15:14:07 +0100 Subject: [PATCH 081/106] Cleaned version: Aligned moving window with I/O --- epoch2d/src/housekeeping/window.F90 | 235 ++++++---------------------- 1 file changed, 51 insertions(+), 184 deletions(-) diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index 16f2acfee..393127549 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -435,131 +435,19 @@ SUBROUTINE remove_particles END SUBROUTINE remove_particles #endif + SUBROUTINE mw_io_test(step, dump) + USE diagnostics + USE deck_io_block - - SUBROUTINE moving_window(step, force_write) - USE diagnostics - USE deck_io_block - -#ifndef PER_SPECIES_WEIGHT - REAL(num) :: window_shift_real, window_shift_steps - INTEGER :: window_shift_cells, errcode = 0 - INTEGER :: i, nchunks, nremainder INTEGER, INTENT(IN) :: step - INTEGER :: future_step - LOGICAL, INTENT(IN), OPTIONAL :: force_write - INTEGER, SAVE :: nstep_prev = -1 - INTEGER, SAVE :: last_step = -1 - LOGICAL :: force, writeout, print_arrays, dump - INTEGER :: iprefix + LOGICAL, INTENT(OUT) :: dump INTEGER :: io, is, nstep_next = 0 REAL(num) :: time0, time1, time_first -#endif - - IF (.NOT. move_window) RETURN - -#ifndef PER_SPECIES_WEIGHT - IF (.NOT. window_started) THEN - IF (time >= window_start_time .AND. time < window_stop_time) THEN - bc_field(c_bd_x_min) = bc_x_min_after_move - bc_field(c_bd_x_max) = bc_x_max_after_move - bc_field(c_bd_y_min) = bc_y_min_after_move - bc_field(c_bd_y_max) = bc_y_max_after_move - CALL setup_boundaries - IF (.NOT.ic_from_restart) window_shift_fraction = 0.0_num - window_started = .TRUE. - END IF - END IF - - ! If we have a moving window then update the window position - IF (window_started) THEN - IF (time >= window_stop_time) RETURN - IF (use_window_stack) window_v_x = evaluate(window_v_x_stack, errcode) - IF (window_v_x <= 0.0_num) RETURN - window_shift_fraction = window_shift_fraction + dt * window_v_x / dx - window_shift_cells = FLOOR(window_shift_fraction) - - ! Check if an I/O is performed - writeout = .FALSE. - force = .FALSE. - future_step = step + 1 - IF (PRESENT(force_write)) force = force_write - - IF (rank == 0) THEN - WRITE(*,'("Initial mw check")') - END IF - - ! Work out the time that the next dump will occur based on the - ! current timestep -! DO io = 1, n_io_blocks -! -! time0 = HUGE(1.0_num) -! time1 = HUGE(1.0_num) -! IF (io_block_list(io)%dt_snapshot >= 0.0_num) & -! time0 = io_block_list(io)%time_prev + io_block_list(io)%dt_snapshot -! IF (io_block_list(io)%nstep_snapshot >= 0) THEN -! nstep_next = io_block_list(io)%nstep_prev & -! + io_block_list(io)%nstep_snapshot -! time1 = time + dt * (nstep_next - (step + 1)) -! END IF -! -! IF (time0 < time1) THEN -! ! Next I/O dump based on dt_snapshot -! time_first = time0 -! IF (io_block_list(io)%dt_snapshot > 0 .AND. time >= time0) THEN -! ! Store the most recent output time that qualifies -! writeout = .TRUE. -! END IF -! ELSE -! ! Next I/O dump based on nstep_snapshot -! time_first = time1 -! IF (io_block_list(io)%nstep_snapshot > 0 & -! .AND. (step + 1 ) >= nstep_next) THEN -! ! Store the most recent output step that qualifies -! writeout = .TRUE. -! END IF -! END IF -! -! END DO + dump = .FALSE. DO io = 1, n_io_blocks - io_block_list(io)%dump = .FALSE. - time0 = io_block_list(io)%walltime_interval - IF (time0 > 0.0_num) THEN - IF (elapsed_time - io_block_list(io)%walltime_prev >= time0) THEN - io_block_list(io)%dump = .TRUE. - io_block_list(io)%walltime_prev = elapsed_time - END IF - END IF - - IF (ALLOCATED(io_block_list(io)%dump_at_nsteps)) THEN - DO is = 1, SIZE(io_block_list(io)%dump_at_nsteps) - IF ((step + 1)>= io_block_list(io)%dump_at_nsteps(is)) THEN - io_block_list(io)%dump = .TRUE. - io_block_list(io)%dump_at_nsteps(is) = HUGE(1) - END IF - END DO - END IF - - IF (ALLOCATED(io_block_list(io)%dump_at_times)) THEN - DO is = 1, SIZE(io_block_list(io)%dump_at_times) - IF (time >= io_block_list(io)%dump_at_times(is)) THEN - io_block_list(io)%dump = .TRUE. - io_block_list(io)%dump_at_times(is) = HUGE(1.0_num) - END IF - END DO - END IF - - IF (ALLOCATED(io_block_list(io)%dump_at_walltimes)) THEN - DO is = 1, SIZE(io_block_list(io)%dump_at_walltimes) - IF (elapsed_time >= io_block_list(io)%dump_at_walltimes(is)) THEN - io_block_list(io)%dump = .TRUE. - io_block_list(io)%dump_at_walltimes(is) = HUGE(1.0_num) - END IF - END DO - END IF ! Work out the time that the next dump will occur based on the ! current timestep @@ -568,17 +456,14 @@ SUBROUTINE moving_window(step, force_write) IF (io_block_list(io)%dt_snapshot >= 0.0_num) & time0 = io_block_list(io)%buffer_time_prev + io_block_list(io)%dt_snapshot IF (io_block_list(io)%nstep_snapshot >= 0) THEN - nstep_next = io_block_list(io)%buffer_nstep_prev & + nstep_next = io_block_list(io)%buffer_nstep_prev & + io_block_list(io)%nstep_snapshot - time1 = time + dt * (nstep_next - step) + time1 = time + dt * (nstep_next - step) END IF IF (time0 < time1) THEN ! Next I/O dump based on dt_snapshot time_first = time0 - IF (rank == 0) THEN - print *, "time = ", time, "and time0 = ", time0 - END IF IF (io_block_list(io)%dt_snapshot > 0 .AND. (time + dt) >= time0) THEN ! Store the most recent output time that qualifies DO @@ -586,22 +471,7 @@ SUBROUTINE moving_window(step, force_write) IF (time0 > time) EXIT io_block_list(io)%buffer_time_prev = time0 END DO - IF (rank == 0) THEN - print *, "dt_snapshot condition for aligned output true" - END IF dump = .TRUE. - IF (dump .AND. time < io_block_list(io)%time_start) dump = .FALSE. - IF (dump .AND. time > io_block_list(io)%time_stop) dump = .FALSE. - IF (dump .AND. (step) < io_block_list(io)%nstep_start) dump = .FALSE. - IF (dump .AND. (step) > io_block_list(io)%nstep_stop) dump = .FALSE. - IF (dump .AND. time < time_start) dump = .FALSE. - IF (dump .AND. time > time_stop) dump = .FALSE. - IF (dump .AND. (step) < nstep_start) dump = .FALSE. - IF (dump .AND. (step) > nstep_stop) dump = .FALSE. - IF (dump) writeout = .TRUE. -! IF (rank == 0) THEN -! print *, "dt_snapshot condition for aligned output true" -! END IF END IF ELSE ! Next I/O dump based on nstep_snapshot @@ -616,68 +486,68 @@ SUBROUTINE moving_window(step, force_write) io_block_list(io)%buffer_nstep_prev = nstep_next END DO dump = .TRUE. - IF (dump .AND. time < io_block_list(io)%time_start) dump = .FALSE. - IF (dump .AND. time > io_block_list(io)%time_stop) dump = .FALSE. - IF (dump .AND. (step) < io_block_list(io)%nstep_start) dump = .FALSE. - IF (dump .AND. (step) > io_block_list(io)%nstep_stop) dump = .FALSE. - IF (dump .AND. time < time_start) dump = .FALSE. - IF (dump .AND. time > time_stop) dump = .FALSE. - IF (dump .AND. (step) < nstep_start) dump = .FALSE. - IF (dump .AND. (step) > nstep_stop) dump = .FALSE. - IF (dump) writeout = .TRUE. - IF (rank == 0) THEN - print *, "nstep_snapshot condition for aligned output true" - END IF END IF END IF END DO + END SUBROUTINE mw_io_test + + SUBROUTINE moving_window(step) + USE diagnostics + USE deck_io_block + +#ifndef PER_SPECIES_WEIGHT + REAL(num) :: window_shift_real, window_shift_steps + INTEGER :: window_shift_cells, errcode = 0 + INTEGER :: i, nremainder + INTEGER, INTENT(IN) :: step + LOGICAL :: dump +! INTEGER :: io, is, nstep_next = 0 +! REAL(num) :: time0, time1, time_first +#endif + IF (.NOT. move_window) RETURN -! DO io = 1, n_io_blocks -! IF(io_block_list(io)%dump) THEN -! writeout = .TRUE. -! END IF -! END DO - - -! IF (future_step == nstep_prev .AND. .NOT.force) THEN -! writeout = .FALSE. -! ELSE -! DO iprefix = 1,SIZE(file_prefixes) -! IF (rank == 0) THEN -! WRITE(*,'("Checking I/O")') -! END IF -! CALL io_test(iprefix, future_step, print_arrays, force, prefix_first_call) -! IF (.NOT.print_arrays) CYCLE -! writeout = .TRUE. -! END DO -! END IF - - IF(writeout) THEN - IF (rank == 0) THEN - WRITE(*,'("Alignment required")') - END IF +#ifndef PER_SPECIES_WEIGHT + IF (.NOT. window_started) THEN + IF (time >= window_start_time .AND. time < window_stop_time) THEN + bc_field(c_bd_x_min) = bc_x_min_after_move + bc_field(c_bd_x_max) = bc_x_max_after_move + bc_field(c_bd_y_min) = bc_y_min_after_move + bc_field(c_bd_y_max) = bc_y_max_after_move + CALL setup_boundaries + IF (.NOT.ic_from_restart) window_shift_fraction = 0.0_num + window_started = .TRUE. END IF + END IF + + ! If we have a moving window then update the window position + IF (window_started) THEN + IF (time >= window_stop_time) RETURN + IF (use_window_stack) window_v_x = evaluate(window_v_x_stack, errcode) + IF (window_v_x <= 0.0_num) RETURN + window_shift_fraction = window_shift_fraction + dt * window_v_x / dx + window_shift_cells = FLOOR(window_shift_fraction) -! nstep_prev = future_step + CALL mw_io_test(step, dump) - IF (window_shift_cells > 0 .AND. window_shift_cells < ng .AND. writeout) THEN + + IF (window_shift_cells > 0 .AND. window_shift_cells < ng .AND. dump) THEN window_shift_real = REAL(window_shift_cells, num) window_offset = window_offset + window_shift_real * dx nremainder = MOD(window_shift_cells, ng) +! IF(rank == 0) THEN +! print *, "Performing alignment" +! END IF CALL shift_window(nremainder) nremainder = 0 CALL setup_bc_lists CALL particle_bcs window_shift_fraction = window_shift_fraction - window_shift_real & + REAL(nremainder, num) - IF (rank == 0) THEN - WRITE(*,'("Performing alignment")') - END IF END IF @@ -689,13 +559,10 @@ SUBROUTINE moving_window(step, force_write) nremainder = MOD(window_shift_cells, ng) DO i = ng, window_shift_cells, ng ! CHECK IF THIS LOOP IS CALLED IF window_shift_cells < ng CALL shift_window(ng) - IF (rank == 0) THEN - WRITE(*, '("Chunk shift of window")') - END IF END DO - IF (rank == 0 .AND. writeout) THEN - WRITE(*,'("Auto aligned output")') - END IF +! IF(rank == 0 .AND. dump) THEN +! print *, "Auto-aligned output" +! END IF CALL setup_bc_lists CALL particle_bcs window_shift_fraction = window_shift_fraction - window_shift_real & From de10cee5b971760a492b2e2a80c89ae593458942 Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Fri, 13 Nov 2020 14:25:07 +0100 Subject: [PATCH 082/106] Aligned moving window with I/O in epoch3d --- epoch3d/Makefile | 6 +- epoch3d/src/deck/deck_io_block.F90 | 2 + epoch3d/src/housekeeping/setup.F90 | 6 +- epoch3d/src/housekeeping/window.F90 | 87 ++++++++++++++++++++++++++++- epoch3d/src/io/diagnostics.F90 | 2 +- epoch3d/src/shared_data.F90 | 5 +- 6 files changed, 99 insertions(+), 9 deletions(-) diff --git a/epoch3d/Makefile b/epoch3d/Makefile index 3f50ec536..19ffba71d 100644 --- a/epoch3d/Makefile +++ b/epoch3d/Makefile @@ -497,7 +497,7 @@ deck_window_block.o: deck_window_block.f90 strings_advanced.o deltaf_loader.o: deltaf_loader.F90 shared_data.o diagnostics.o: diagnostics.F90 antennae.o calc_df.o constants.o deck.o \ deck_io_block.o dist_fn.o evaluate.o iterators.o particle_id_hash.o probes.o \ - setup.o strings.o timer.o version_data.o window.o $(SDFMOD) $(ENCODED_SOURCE) + setup.o strings.o timer.o version_data.o $(SDFMOD) $(ENCODED_SOURCE) dist_fn.o: dist_fn.F90 mpi_subtype_control.o particles.o $(SDFMOD) epoch3d.o: epoch3d.F90 antennae.o balance.o bremsstrahlung.o calc_df.o \ collisions.o current_smooth.o deck.o diagnostics.o fields.o finish.o \ @@ -539,7 +539,7 @@ redblack_module.o: redblack_module.f90 partlist.o setup.o: setup.F90 antennae.o balance.o boundary.o constants.o fields.o \ helper.o injectors.o laser.o mpi_routines.o mpi_subtype_control.o \ particle_id_hash.o shunt.o split_particle.o timer.o version_data.o welcome.o \ - window.o $(SDFMOD) + $(SDFMOD) shape_functions.o: shape_functions.F90 constants.o shared_data.o: shared_data.F90 constants.o $(SDFMOD) shunt.o: shunt.F90 evaluator_blocks.o tokenizer_blocks.o utilities.o @@ -554,4 +554,4 @@ tokenizer_blocks.o: tokenizer_blocks.f90 strings.o utilities.o: utilities.f90 constants.o shared_data.o version_data.o: version_data.F90 $(SRCDIR)/COMMIT welcome.o: welcome.F90 shared_data.o terminal_controls.o version_data.o -window.o: window.F90 boundary.o evaluate.o partlist.o +window.o: window.F90 boundary.o diagnostics.o evaluate.o partlist.o diff --git a/epoch3d/src/deck/deck_io_block.F90 b/epoch3d/src/deck/deck_io_block.F90 index 3907d15fc..5f524ad9a 100644 --- a/epoch3d/src/deck/deck_io_block.F90 +++ b/epoch3d/src/deck/deck_io_block.F90 @@ -1022,6 +1022,7 @@ SUBROUTINE init_io_block(io_block) io_block%name = '' io_block%dt_snapshot = -1.0_num io_block%time_prev = 0.0_num + io_block%buffer_time_prev = 0.0_num io_block%time_first = 0.0_num io_block%dt_average = -1.0_num io_block%dt_min_average = -1.0_num @@ -1029,6 +1030,7 @@ SUBROUTINE init_io_block(io_block) io_block%average_time_start = -1.0_num io_block%nstep_snapshot = -1 io_block%nstep_prev = 0 + io_block%buffer_nstep_prev = 0 io_block%nstep_first = 0 io_block%nstep_average = -1 io_block%restart = .FALSE. diff --git a/epoch3d/src/housekeeping/setup.F90 b/epoch3d/src/housekeeping/setup.F90 index 57bc0db12..4ab137cee 100644 --- a/epoch3d/src/housekeeping/setup.F90 +++ b/epoch3d/src/housekeeping/setup.F90 @@ -23,7 +23,7 @@ MODULE setup USE shunt USE laser USE injectors - USE window +! USE window USE timer USE helper USE balance @@ -1060,13 +1060,17 @@ SUBROUTINE restart_data(step) END IF IF (io_block_list(i)%dt_snapshot > 0.0_num) THEN io_block_list(i)%time_prev = time + io_block_list(i)%buffer_time_prev = time ELSE io_block_list(i)%time_prev = 0.0_num + io_block_list(i)%buffer_time_prev = 0.0_num END IF IF (io_block_list(i)%nstep_snapshot > 0) THEN io_block_list(i)%nstep_prev = step + io_block_list(i)%buffer_nstep_prev = step ELSE io_block_list(i)%nstep_prev = 0 + io_block_list(i)%buffer_nstep_prev = 0 END IF io_block_list(i)%walltime_prev = time IF (ALLOCATED(io_block_list(i)%dump_at_nsteps)) THEN diff --git a/epoch3d/src/housekeeping/window.F90 b/epoch3d/src/housekeeping/window.F90 index e75bf7f17..6a81dd12c 100644 --- a/epoch3d/src/housekeeping/window.F90 +++ b/epoch3d/src/housekeeping/window.F90 @@ -22,7 +22,7 @@ MODULE window IMPLICIT NONE REAL(num), ALLOCATABLE :: density(:,:), temperature(:,:,:), drift(:,:,:) - REAL(num), SAVE :: window_shift_fraction +! REAL(num), SAVE :: window_shift_fraction CONTAINS @@ -488,14 +488,77 @@ SUBROUTINE remove_particles END SUBROUTINE remove_particles #endif + SUBROUTINE mw_io_test(step, dump) +! USE diagnostics + USE deck_io_block + + INTEGER, INTENT(IN) :: step + LOGICAL, INTENT(OUT) :: dump + INTEGER :: io, is, nstep_next = 0 + REAL(num) :: time0, time1, time_first + + dump = .FALSE. + DO io = 1, n_io_blocks + + time0 = io_block_list(io)%walltime_interval + + ! Work out the time that the next dump will occur based on the + ! current timestep + time0 = HUGE(1.0_num) + time1 = HUGE(1.0_num) + IF (io_block_list(io)%dt_snapshot >= 0.0_num) & + time0 = io_block_list(io)%buffer_time_prev + io_block_list(io)%dt_snapshot + IF (io_block_list(io)%nstep_snapshot >= 0) THEN + nstep_next = io_block_list(io)%buffer_nstep_prev & + + io_block_list(io)%nstep_snapshot + time1 = time + dt * (nstep_next - step) + END IF + + IF (time0 < time1) THEN + ! Next I/O dump based on dt_snapshot + time_first = time0 + IF (io_block_list(io)%dt_snapshot > 0 .AND. (time + dt) >= time0) THEN + ! Store the most recent output time that qualifies + DO + time0 = io_block_list(io)%buffer_time_prev + io_block_list(io)%dt_snapshot + IF (time0 > time) EXIT + io_block_list(io)%buffer_time_prev = time0 + END DO + dump = .TRUE. + END IF + ELSE + ! Next I/O dump based on nstep_snapshot + time_first = time1 + IF (io_block_list(io)%nstep_snapshot > 0 & + .AND. (step + 1) >= nstep_next) THEN + ! Store the most recent output step that qualifies + DO + nstep_next = io_block_list(io)%buffer_nstep_prev & + + io_block_list(io)%nstep_snapshot + IF (nstep_next > step) EXIT + io_block_list(io)%buffer_nstep_prev = nstep_next + END DO + dump = .TRUE. + END IF + END IF + + END DO + + END SUBROUTINE mw_io_test + - SUBROUTINE moving_window + + SUBROUTINE moving_window(step) +! USE diagnostics + USE deck_io_block #ifndef PER_SPECIES_WEIGHT REAL(num) :: window_shift_real INTEGER :: window_shift_cells, errcode = 0 INTEGER :: i, nremainder + INTEGER, INTENT(IN) :: step + LOGICAL :: dump #endif IF (.NOT. move_window) RETURN @@ -522,6 +585,26 @@ SUBROUTINE moving_window IF (window_v_x <= 0.0_num) RETURN window_shift_fraction = window_shift_fraction + dt * window_v_x / dx window_shift_cells = FLOOR(window_shift_fraction) + + CALL mw_io_test(step, dump) + + + IF (window_shift_cells > 0 .AND. window_shift_cells < ng .AND. dump) THEN + + window_shift_real = REAL(window_shift_cells, num) + window_offset = window_offset + window_shift_real * dx + nremainder = MOD(window_shift_cells, ng) +! IF(rank == 0) THEN +! print *, "Performing alignment" +! END IF + CALL shift_window(nremainder) + nremainder = 0 + CALL setup_bc_lists + CALL particle_bcs + window_shift_fraction = window_shift_fraction - window_shift_real & + + REAL(nremainder, num) + END IF + ! Allow for posibility of having jumped two cells at once IF (window_shift_cells > ng - 1) THEN window_shift_real = REAL(window_shift_cells, num) diff --git a/epoch3d/src/io/diagnostics.F90 b/epoch3d/src/io/diagnostics.F90 index cc10987f0..a9a3de6a2 100644 --- a/epoch3d/src/io/diagnostics.F90 +++ b/epoch3d/src/io/diagnostics.F90 @@ -27,7 +27,7 @@ MODULE diagnostics USE setup USE deck_io_block USE strings - USE window +! USE window USE timer USE antennae USE particle_id_hash_mod diff --git a/epoch3d/src/shared_data.F90 b/epoch3d/src/shared_data.F90 index 03bfb8830..e2f2c5279 100644 --- a/epoch3d/src/shared_data.F90 +++ b/epoch3d/src/shared_data.F90 @@ -309,7 +309,7 @@ MODULE shared_data TYPE io_block_type CHARACTER(LEN=string_length) :: name - REAL(num) :: dt_snapshot, time_prev, time_first + REAL(num) :: dt_snapshot, time_prev, time_first, buffer_time_prev REAL(num) :: dt_average, dt_min_average, average_time, average_time_start REAL(num) :: time_start, time_stop REAL(num) :: walltime_interval, walltime_prev @@ -317,7 +317,7 @@ MODULE shared_data REAL(num), ALLOCATABLE :: dump_at_times(:) REAL(num), ALLOCATABLE :: dump_at_walltimes(:) INTEGER, ALLOCATABLE :: dump_at_nsteps(:) - INTEGER :: nstep_snapshot, nstep_prev, nstep_first, nstep_average + INTEGER :: nstep_snapshot, nstep_prev, nstep_first, nstep_average, buffer_nstep_prev INTEGER :: nstep_start, nstep_stop, dump_cycle, prefix_index INTEGER :: dump_cycle_first_index LOGICAL :: restart, dump, any_average, dump_first, dump_last @@ -594,6 +594,7 @@ MODULE shared_data INTEGER :: bc_z_min_after_move = c_bc_null INTEGER :: bc_z_max_after_move = c_bc_null REAL(num) :: window_offset + REAL(num) :: window_shift_fraction #ifdef PHOTONS !---------------------------------------------------------------------------- From 09bdada4e3af1f3abc810e32d520f5ca3f3f4a52 Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Fri, 13 Nov 2020 14:44:35 +0100 Subject: [PATCH 083/106] Cleaned dependencies --- epoch2d/Makefile | 6 +++--- epoch2d/src/housekeeping/setup.F90 | 1 + epoch2d/src/housekeeping/window.F90 | 5 +++-- epoch2d/src/io/diagnostics.F90 | 1 + epoch2d/src/shared_data.F90 | 2 +- epoch3d/Makefile | 6 +++--- epoch3d/src/housekeeping/setup.F90 | 2 +- epoch3d/src/housekeeping/window.F90 | 2 +- epoch3d/src/io/diagnostics.F90 | 2 +- epoch3d/src/shared_data.F90 | 2 +- 10 files changed, 16 insertions(+), 13 deletions(-) diff --git a/epoch2d/Makefile b/epoch2d/Makefile index aff6637de..fb5f4ee12 100644 --- a/epoch2d/Makefile +++ b/epoch2d/Makefile @@ -497,7 +497,7 @@ deck_window_block.o: deck_window_block.f90 strings_advanced.o deltaf_loader.o: deltaf_loader.F90 shared_data.o diagnostics.o: diagnostics.F90 antennae.o calc_df.o constants.o deck.o \ deck_io_block.o dist_fn.o evaluate.o iterators.o particle_id_hash.o probes.o \ - setup.o strings.o timer.o version_data.o $(SDFMOD) $(ENCODED_SOURCE) + setup.o strings.o timer.o version_data.o window.o $(SDFMOD) $(ENCODED_SOURCE) dist_fn.o: dist_fn.F90 mpi_subtype_control.o particles.o $(SDFMOD) epoch2d.o: epoch2d.F90 antennae.o balance.o bremsstrahlung.o calc_df.o \ collisions.o current_smooth.o deck.o diagnostics.o fields.o finish.o \ @@ -539,7 +539,7 @@ redblack_module.o: redblack_module.f90 partlist.o setup.o: setup.F90 antennae.o balance.o boundary.o constants.o fields.o \ helper.o injectors.o laser.o mpi_routines.o mpi_subtype_control.o \ particle_id_hash.o shunt.o split_particle.o timer.o version_data.o welcome.o \ - $(SDFMOD) + window.o $(SDFMOD) shape_functions.o: shape_functions.F90 constants.o shared_data.o: shared_data.F90 constants.o $(SDFMOD) shunt.o: shunt.F90 evaluator_blocks.o tokenizer_blocks.o utilities.o @@ -554,4 +554,4 @@ tokenizer_blocks.o: tokenizer_blocks.f90 strings.o utilities.o: utilities.f90 constants.o shared_data.o version_data.o: version_data.F90 $(SRCDIR)/COMMIT welcome.o: welcome.F90 shared_data.o terminal_controls.o version_data.o -window.o: window.F90 boundary.o diagnostics.o evaluate.o partlist.o +window.o: window.F90 boundary.o evaluate.o partlist.o diff --git a/epoch2d/src/housekeeping/setup.F90 b/epoch2d/src/housekeeping/setup.F90 index fcb13608a..da62e148f 100644 --- a/epoch2d/src/housekeeping/setup.F90 +++ b/epoch2d/src/housekeeping/setup.F90 @@ -23,6 +23,7 @@ MODULE setup USE shunt USE laser USE injectors + USE window USE timer USE helper USE balance diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index 393127549..7ceb99657 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -22,6 +22,7 @@ MODULE window IMPLICIT NONE REAL(num), ALLOCATABLE :: density(:), temperature(:,:), drift(:,:) + REAL(num), SAVE :: window_shift_fraction CONTAINS @@ -436,7 +437,7 @@ END SUBROUTINE remove_particles #endif SUBROUTINE mw_io_test(step, dump) - USE diagnostics +! USE diagnostics USE deck_io_block INTEGER, INTENT(IN) :: step @@ -495,7 +496,7 @@ END SUBROUTINE mw_io_test SUBROUTINE moving_window(step) - USE diagnostics +! USE diagnostics USE deck_io_block #ifndef PER_SPECIES_WEIGHT diff --git a/epoch2d/src/io/diagnostics.F90 b/epoch2d/src/io/diagnostics.F90 index bbbc90555..38ab21781 100644 --- a/epoch2d/src/io/diagnostics.F90 +++ b/epoch2d/src/io/diagnostics.F90 @@ -27,6 +27,7 @@ MODULE diagnostics USE setup USE deck_io_block USE strings + USE window USE timer USE antennae USE particle_id_hash_mod diff --git a/epoch2d/src/shared_data.F90 b/epoch2d/src/shared_data.F90 index 4ffc1a208..62902f193 100644 --- a/epoch2d/src/shared_data.F90 +++ b/epoch2d/src/shared_data.F90 @@ -572,7 +572,7 @@ MODULE shared_data INTEGER :: bc_y_min_after_move = c_bc_null INTEGER :: bc_y_max_after_move = c_bc_null REAL(num) :: window_offset - REAL(num) :: window_shift_fraction +! REAL(num) :: window_shift_fraction #ifdef PHOTONS !---------------------------------------------------------------------------- diff --git a/epoch3d/Makefile b/epoch3d/Makefile index 19ffba71d..3f50ec536 100644 --- a/epoch3d/Makefile +++ b/epoch3d/Makefile @@ -497,7 +497,7 @@ deck_window_block.o: deck_window_block.f90 strings_advanced.o deltaf_loader.o: deltaf_loader.F90 shared_data.o diagnostics.o: diagnostics.F90 antennae.o calc_df.o constants.o deck.o \ deck_io_block.o dist_fn.o evaluate.o iterators.o particle_id_hash.o probes.o \ - setup.o strings.o timer.o version_data.o $(SDFMOD) $(ENCODED_SOURCE) + setup.o strings.o timer.o version_data.o window.o $(SDFMOD) $(ENCODED_SOURCE) dist_fn.o: dist_fn.F90 mpi_subtype_control.o particles.o $(SDFMOD) epoch3d.o: epoch3d.F90 antennae.o balance.o bremsstrahlung.o calc_df.o \ collisions.o current_smooth.o deck.o diagnostics.o fields.o finish.o \ @@ -539,7 +539,7 @@ redblack_module.o: redblack_module.f90 partlist.o setup.o: setup.F90 antennae.o balance.o boundary.o constants.o fields.o \ helper.o injectors.o laser.o mpi_routines.o mpi_subtype_control.o \ particle_id_hash.o shunt.o split_particle.o timer.o version_data.o welcome.o \ - $(SDFMOD) + window.o $(SDFMOD) shape_functions.o: shape_functions.F90 constants.o shared_data.o: shared_data.F90 constants.o $(SDFMOD) shunt.o: shunt.F90 evaluator_blocks.o tokenizer_blocks.o utilities.o @@ -554,4 +554,4 @@ tokenizer_blocks.o: tokenizer_blocks.f90 strings.o utilities.o: utilities.f90 constants.o shared_data.o version_data.o: version_data.F90 $(SRCDIR)/COMMIT welcome.o: welcome.F90 shared_data.o terminal_controls.o version_data.o -window.o: window.F90 boundary.o diagnostics.o evaluate.o partlist.o +window.o: window.F90 boundary.o evaluate.o partlist.o diff --git a/epoch3d/src/housekeeping/setup.F90 b/epoch3d/src/housekeeping/setup.F90 index 4ab137cee..e4697025c 100644 --- a/epoch3d/src/housekeeping/setup.F90 +++ b/epoch3d/src/housekeeping/setup.F90 @@ -23,7 +23,7 @@ MODULE setup USE shunt USE laser USE injectors -! USE window + USE window USE timer USE helper USE balance diff --git a/epoch3d/src/housekeeping/window.F90 b/epoch3d/src/housekeeping/window.F90 index 6a81dd12c..829fe35eb 100644 --- a/epoch3d/src/housekeeping/window.F90 +++ b/epoch3d/src/housekeeping/window.F90 @@ -22,7 +22,7 @@ MODULE window IMPLICIT NONE REAL(num), ALLOCATABLE :: density(:,:), temperature(:,:,:), drift(:,:,:) -! REAL(num), SAVE :: window_shift_fraction + REAL(num), SAVE :: window_shift_fraction CONTAINS diff --git a/epoch3d/src/io/diagnostics.F90 b/epoch3d/src/io/diagnostics.F90 index a9a3de6a2..cc10987f0 100644 --- a/epoch3d/src/io/diagnostics.F90 +++ b/epoch3d/src/io/diagnostics.F90 @@ -27,7 +27,7 @@ MODULE diagnostics USE setup USE deck_io_block USE strings -! USE window + USE window USE timer USE antennae USE particle_id_hash_mod diff --git a/epoch3d/src/shared_data.F90 b/epoch3d/src/shared_data.F90 index e2f2c5279..84dddde0b 100644 --- a/epoch3d/src/shared_data.F90 +++ b/epoch3d/src/shared_data.F90 @@ -594,7 +594,7 @@ MODULE shared_data INTEGER :: bc_z_min_after_move = c_bc_null INTEGER :: bc_z_max_after_move = c_bc_null REAL(num) :: window_offset - REAL(num) :: window_shift_fraction +! REAL(num) :: window_shift_fraction #ifdef PHOTONS !---------------------------------------------------------------------------- From c974781a6e9a914f9e17ca8f9ed06c2488b3cf5a Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Sun, 29 Nov 2020 17:03:21 +0100 Subject: [PATCH 084/106] Initial steps towards current vectorization --- epoch2d/src/current_deposition.F90 | 284 +++++++++++++++++++++++++++++ 1 file changed, 284 insertions(+) create mode 100644 epoch2d/src/current_deposition.F90 diff --git a/epoch2d/src/current_deposition.F90 b/epoch2d/src/current_deposition.F90 new file mode 100644 index 000000000..a46d4ff07 --- /dev/null +++ b/epoch2d/src/current_deposition.F90 @@ -0,0 +1,284 @@ + + + SUBROUTINE get_particle(part_data, n, part) + TYPE(particle_data), POINTER, INTENT(IN) :: part_data + INTEGER(i8), INTENT(IN) :: n + TYPE(particle), INTENT(INOUT) :: part + + IF (n >= 1 .AND. n <= part_data%count) THEN + part%pos = part_data%pos(n) + part%mom = part_data%mom(n) + part%mass = part_data%mass(n) + part%weight = part_data%weight(n) + part%charge = part_data%charge(n) + ENDIF + + END SUBROUTINE get_particle + + SUBROUTINE particle_sort + + ! This routine sorts the particles such that + ! the memory access to the particle list + ! is contigious + + END SUBROUTINE particle sort + + + SUBROUTINE current_deposition_VB_triangle + +#ifdef INTEL_VECTORISATION +!dir$ attributes align:64 :: gx +!dir$ attributes align:64 :: gy +!dir$ attributes align:64 :: gz +#endif + +#ifdef INTEL_VECTORISATION +!dir$ attributes align:64 :: hx +!dir$ attributes align:64 :: hy +!dir$ attributes align:64 :: hz +#endif + +#ifdef IBM_VECTORISATION +!IBM* ALIGN(64, gx, gy, gz) +#endif + +#ifdef IBM_VECTORISATION +!IBM* ALIGN(64, hx, hy, hz) +#endif + +! Similarly all other arrays need to be aligned + + DO ispec = 1, n_species + species => species_list(ispec) + + IF (species%immobile) CYCLE + + part_data => species%part_data + npart = part_data%count + + CALL particle_sort ! I guess this routine should be called in the Boris pusher + ! such that both gx and hx are in sync + + DO np = 1, npart, LVEC + + !$OMP SIMD + DO n = 1, MIN(LVEC, npart - np + 1) + + CALL get_particle(part_data, n, part) + +#ifndef PER_SPECIES_WEIGHT + part_weight(n) = part(n)%weight + fcx(n) = idty * part_weight(n) + fcy(n) = idtx * part_weight(n) + fcz(n) = idxy * part_weight(n) +#endif +#ifndef NO_PARTICLE_PROBES + init_part_x(n) = part(n)%part_pos(1) + init_part_y(n) = part(n)%part_pos(2) +#endif +#ifdef PER_PARTICLE_CHARGE_MASS + part_q(n) = part(n)%charge + part_m(n) = part(n)%mass + part_mc(n) = c * part(n)%mass + ipart_mc(n) = 1.0_num / part_mc(n) + cmratio(n) = part_q(n) * dtfac * ipart_mc(n) + ccmratio(n) = c * cmratio(n) +#ifndef NO_PARTICLE_PROBES + part_mc2 = c * part_mc +#endif +#endif + + !Copy the particle properties out for speed + part_x(n) = part(n)%part_pos(1) - x_grid_min_local + part_y(n) = part(n)%part_pos(2) - y_grid_min_local + part_ux(n) = part(n)%part_p(1) * ipart_mc(n) + part_uy(n) = part(n)%part_p(2) * ipart_mc(n) + part_uz(n) = part(n)%part_p(3) * ipart_mc(n) + + !Now advance to t+1.5dt to calculate current + !For efficient vectorization, I would prefer + !part_x(n) = part(n)%part_pos(1) - x_grid_min_local + delta_x + !part_y(n) = part(n)%part_pos(2) - x_grid_min_local + delta_x + !This eliminates the dependency on previous step + part_x(n) = part_x(n) + delta_x(n) + part_y(n) = part_y(n) + delta_y(n) + + !Delta-f calculation: subtract background from calculated current +#ifdef DELTAF_METHOD + weight_back(n) = part(n)%pvol * f0(ispecies, part_mc(n) / c, & + part(n)%part_p) + fcx(n) = idty * (part_weight(n) - weight_back(n)) + fcy(n) = idtx * (part_weight(n) - weight_back(n)) + fcz(n) = idxy * (part_weight(n) - weight_back(n)) +#endif + + cell_x_r(n) = part_x(n) * idx + cell_y_r(n) = part_y(n) * idy + + cell_x3(n) = FLOOR(cell_x_r(n) + 0.5_num) + cell_y3(n) = FLOOR(cell_y_r(n) + 0.5_num) + + cell_frac_x(n) = REAL(cell_x3(n), num) - cell_x_r(n) + cell_frac_y(n) = REAL(cell_y3(n), num) - cell_y_r(n) + + fjx(n) = fcx(n) * part_q(n) + fjy(n) = fcy(n) * part_q(n) + fjz(n) = fcz(n) * part_q(n) * part_vz(n) + + hx = 0.0_num + hy = 0.0_num + + dcellx(n) = cell_x3(n) - cell_x1(n) + dcelly(n) = cell_y3(n) - cell_y1(n) + + xmin(n) = sf_min + (dcellx(n) - 1) / 2 + ymin(n) = sf_max + (dcelly(n) - 1) / 2 + + + hx(xmin(n)) = 0.25_num + cell_frac_x(n)**2 + cell_frac_x(n) + hx(xmin(n) + 1) = 1.5_num - 2.0_num * cell_frac_x(n)**2 + hx(xmin(n) + 2) = 0.25_num + cell_frac_x(n)**2 - cell_frac_x(n) + + hy(ymin(n)) = 0.25_num + cell_frac_y(n)**2 + cell_frac_y(n) + hy(ymin(n) + 1) = 1.5_num - 2.0_num * cell_frac_y(n)**2 + hy(ymin(n) + 2) = 0.25_num + cell_frac_y(n)**2 - cell_frac_y(n) + + yfac10(n) = gy(ymin(n)) + 0.5_num * hy(ymin(n)) + yfac11(n) = gy(ymin(n) + 1) + 0.5_num * hy(ymin(n) + 1) + yfac12(n) = gy(ymin(n) + 1) + 0.5_num * hy(ymin(n) + 2) + + yfac20(n) = third * hy(ymin(n)) + 0.5 * gy(ymin(n)) + yfac21(n) = third * hy(ymin(n) + 1) + 0.5 * gy(ymin(n) + 1) + yfac22(n) = third * hy(ymin(n) + 2) + 0.5 * gy(ymin(n) + 2) + + xfac10(n) = gx(xmin(n)) + 0.5_num * hx(xmin(n)) + xfac11(n) = gx(xmin(n) + 1) + 0.5_num * hx(xmin(n) + 1) + xfac12(n) = gx(xmin(n) + 2) + 0.5_num * hx(xmin(n) + 2) + + wx(n,1) = hx(xmin(n)) * yfac10(n) + wx(n,2) = hx(xmin(n) + 1) * yfac10(n) + wx(n,3) = hx(xmin(n) + 2) * yfac10(n) + wx(n,4) = hx(xmin(n)) * yfac11(n) + wx(n,5) = hx(xmin(n) + 1) * yfac11(n) + wx(n,6) = hx(xmin(n) + 2) * yfac11(n) + wx(n,7) = hx(xmin(n)) * yfac12(n) + wx(n,8) = hx(xmin(n) + 1) * yfac12(n) + wx(n,9) = hx(xmin(n) + 2) * yfac12(n) + + wy(n,1) = hy(ymin(n)) * xfac10(n) + wy(n,2) = hy(ymin(n)) * xfac11(n) + wy(n,3) = hy(ymin(n)) * xfac12(n) + wy(n,4) = hy(ymin(n) + 1) * xfac10(n) + wy(n,5) = hy(ymin(n) + 1) * xfac11(n) + wy(n,6) = hy(ymin(n) + 1) * xfac12(n) + wy(n,7) = hy(ymin(n) + 2) * xfac10(n) + wy(n,8) = hy(ymin(n) + 2) * xfac11(n) + wy(n,9) = hy(ymin(n) + 2) * xfac12(n) + + wz(n,1) = gx(xmin(n)) * yfac10(n) + hx(xmin(n)) * yfac20(n) + wz(n,2) = gx(xmin(n) + 1) * yfac10(n) + hx(xmin(n) + 1) * yfac20(n) + wz(n,3) = gx(xmin(n) + 2) * yfac10(n) + hx(xmin(n) + 2) * yfac20(n) + wz(n,4) = gx(xmin(n)) * yfac11(n) + hx(xmin(n)) * yfac21(n) + wz(n,5) = gx(xmin(n) + 1) * yfac11(n) + hx(xmin(n) + 1) * yfac21(n) + wz(n,6) = gx(xmin(n) + 2) * yfac11(n) + hx(xmin(n) + 2) * yfac21(n) + wz(n,7) = gx(xmin(n)) * yfac12(n) + hx(xmin(n)) * yfac22(n) + wz(n,8) = gx(xmin(n) + 1) * yfac12(n) + hx(xmin(n) + 1) * yfac22(n) + wz(n,9) = gx(xmin(n) + 2) * yfac12(n) + hx(xmin(n) + 2) * yfac22(n) + + cx(n) = cell_x1(n) + xmin(n) + cy(n) = cell_y1(n) + ymin(n) + cell(n) = cx(n) + (cy(n) - 1) * nx + + jxh(n,cell(n)) = -fjx(n) * wx(n,1) + jxh(n,cell(n) + 1) = -fjx(n) * (wx(n,1) + wx(n,2)) + jxh(n,cell(n) + 2) = -fjx(n) * (wx(n,1) + wx(n,2) + wx(n,3)) + jxh(n,cell(n) + 3) = -fjx(n) * wx(n,4) + jxh(n,cell(n) + 4) = -fjx(n) * (wx(n,4) + wx(n,5)) + jxh(n,cell(n) + 5) = -fjx(n) * (wx(n,4) + wx(n,5) + wx(n,6)) + jxh(n,cell(n) + 6) = -fjx(n) * wx(n,7) + jxh(n,cell(n) + 7) = -fjx(n) * (wx(n,7) + wx(n,8)) + jxh(n,cell(n) + 8) = -fjx(n) * (wx(n,7) + wx(n,8) + wx(n,9)) + + jyh(n,cell(n)) = -fjy(n) * wy(n,1) + jyh(n,cell(n) + 1) = -fjy(n) * wy(n,2) + jyh(n,cell(n) + 2) = -fjy(n) * wy(n,3) + jyh(n,cell(n) + 3) = -fjy(n) * (wy(n,1) + wy(n,4)) + jyh(n,cell(n) + 4) = -fjy(n) * (wy(n,2) + wy(n,5)) + jyh(n,cell(n) + 5) = -fjy(n) * (wy(n,3) + wy(n,6)) + jyh(n,cell(n) + 6) = -fjy(n) * (wy(n,1) + wy(n,4) + wy(n,7)) + jyh(n,cell(n) + 7) = -fjy(n) * (wy(n,2) + wy(n,5) + wy(n,8)) + jyh(n,cell(n) + 8) = -fjy(n) * (wy(n,3) + wy(n,6) + wy(n,9)) + + jzh(n,cell(n)) = fjz(n) * wz(n,1) + jzh(n,cell(n) + 1) = fjz(n) * wz(n,2) + jzh(n,cell(n) + 2) = fjz(n) * wz(n,3) + jzh(n,cell(n) + 3) = fjz(n) * wz(n,4) + jzh(n,cell(n) + 4) = fjz(n) * wz(n,5) + jzh(n,cell(n) + 5) = fjz(n) * wz(n,6) + jzh(n,cell(n) + 6) = fjz(n) * wz(n,7) + jzh(n,cell(n) + 7) = fjz(n) * wz(n,8) + jzh(n,cell(n) + 8) = fjz(n) * wz(n,9) + + END DO !END LOOP n = 1, MIN(LVEC, npart - np + 1) + + !$OMP END SIMD + + !$OMP SIMD + + DO n = 1, MIN(LVEC, npart - np + 1) + + jx(cx(n),cy(n)) = jx(cx(n),cy(n)) + jxh(n,1) + jx(cx(n) + 1,cy(n)) = jx(cx(n) + 1,cy(n)) + jxh(n,2) + jx(cx(n) + 2,cy(n)) = jx(cx(n) + 2,cy(n)) + jxh(n,3) + jx(cx(n), cy(n) + 1) = jx(cx(n),cy(n) + 1) + jxh(n,4) + jx(cx(n) + 1, cy(n) + 1) = jx(cx(n) + 1,cy(n) + 1) + jxh(n,5) + jx(cx(n) + 2, cy(n) + 1) = jx(cx(n) + 2,cy(n) + 1) + jxh(n,6) + jx(cx(n), cy(n) + 2) = jx(cx(n),cy(n) + 2) + jxh(n,7) + jx(cx(n) + 1, cy(n) + 2) = jx(cx(n),cy(n) + 2) + jxh(n,8) + jx(cx(n) + 2, cy(n) + 2) = jx(cx(n) + 2,cy(n) + 2) + jxh(n,9) + + jy(cx(n),cy(n)) = jy(cx(n),cy(n)) + jyh(n,1) + jy(cx(n) + 1,cy(n)) = jy(cx(n) + 1,cy(n)) + jyh(n,2) + jy(cx(n) + 2,cy(n)) = jy(cx(n) + 2,cy(n)) + jyh(n,3) + jy(cx(n), cy(n) + 1) = jy(cx(n),cy(n) + 1) + jyh(n,4) + jy(cx(n) + 1, cy(n) + 1) = jy(cx(n) + 1,cy(n) + 1) + jyh(n,5) + jy(cx(n) + 2, cy(n) + 1) = jy(cx(n) + 2,cy(n) + 1) + jyh(n,6) + jy(cx(n), cy(n) + 2) = jy(cx(n),cy(n) + 2) + jyh(n,7) + jy(cx(n) + 1, cy(n) + 2) = jy(cx(n),cy(n) + 2) + jyh(n,8) + jy(cx(n) + 2, cy(n) + 2) = jy(cx(n) + 2,cy(n) + 2) + jyh(n,9) + + jz(cx(n),cy(n)) = jz(cx(n),cy(n)) + jzh(n,1) + jz(cx(n) + 1,cy(n)) = jz(cx(n) + 1,cy(n)) + jzh(n,2) + jz(cx(n) + 2,cy(n)) = jz(cx(n) + 2,cy(n)) + jzh(n,3) + jz(cx(n), cy(n) + 1) = jz(cx(n),cy(n) + 1) + jzh(n,4) + jz(cx(n) + 1, cy(n) + 1) = jz(cx(n) + 1,cy(n) + 1) + jzh(n,5) + jz(cx(n) + 2, cy(n) + 1) = jz(cx(n) + 2,cy(n) + 1) + jzh(n,6) + jz(cx(n), cy(n) + 2) = jz(cx(n),cy(n) + 2) + jzh(n,7) + jz(cx(n) + 1, cy(n) + 2) = jz(cx(n),cy(n) + 2) + jzh(n,8) + jz(cx(n) + 2, cy(n) + 2) = jz(cx(n) + 2,cy(n) + 2) + jzh(n,9) + + END DO !END LOOP n = 1, MIN(LVEC, npart - np + 1) + !$OMP END SIMD + + END DO !END LOOP n = 1, npart + END DO !END LOOP ispec = 1, nspecies + + END current_deposition_VB_triangle + + + + + + + + + + + + + + + + + + From 555207bb7f0268100e4907f73443197036c3ccfa Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Sun, 29 Nov 2020 17:15:18 +0100 Subject: [PATCH 085/106] Fixing incides in jxh, jyh, jzh --- epoch2d/src/current_deposition.F90 | 58 +++++++++++++++--------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/epoch2d/src/current_deposition.F90 b/epoch2d/src/current_deposition.F90 index a46d4ff07..da0d02b97 100644 --- a/epoch2d/src/current_deposition.F90 +++ b/epoch2d/src/current_deposition.F90 @@ -189,35 +189,35 @@ SUBROUTINE current_deposition_VB_triangle cy(n) = cell_y1(n) + ymin(n) cell(n) = cx(n) + (cy(n) - 1) * nx - jxh(n,cell(n)) = -fjx(n) * wx(n,1) - jxh(n,cell(n) + 1) = -fjx(n) * (wx(n,1) + wx(n,2)) - jxh(n,cell(n) + 2) = -fjx(n) * (wx(n,1) + wx(n,2) + wx(n,3)) - jxh(n,cell(n) + 3) = -fjx(n) * wx(n,4) - jxh(n,cell(n) + 4) = -fjx(n) * (wx(n,4) + wx(n,5)) - jxh(n,cell(n) + 5) = -fjx(n) * (wx(n,4) + wx(n,5) + wx(n,6)) - jxh(n,cell(n) + 6) = -fjx(n) * wx(n,7) - jxh(n,cell(n) + 7) = -fjx(n) * (wx(n,7) + wx(n,8)) - jxh(n,cell(n) + 8) = -fjx(n) * (wx(n,7) + wx(n,8) + wx(n,9)) - - jyh(n,cell(n)) = -fjy(n) * wy(n,1) - jyh(n,cell(n) + 1) = -fjy(n) * wy(n,2) - jyh(n,cell(n) + 2) = -fjy(n) * wy(n,3) - jyh(n,cell(n) + 3) = -fjy(n) * (wy(n,1) + wy(n,4)) - jyh(n,cell(n) + 4) = -fjy(n) * (wy(n,2) + wy(n,5)) - jyh(n,cell(n) + 5) = -fjy(n) * (wy(n,3) + wy(n,6)) - jyh(n,cell(n) + 6) = -fjy(n) * (wy(n,1) + wy(n,4) + wy(n,7)) - jyh(n,cell(n) + 7) = -fjy(n) * (wy(n,2) + wy(n,5) + wy(n,8)) - jyh(n,cell(n) + 8) = -fjy(n) * (wy(n,3) + wy(n,6) + wy(n,9)) - - jzh(n,cell(n)) = fjz(n) * wz(n,1) - jzh(n,cell(n) + 1) = fjz(n) * wz(n,2) - jzh(n,cell(n) + 2) = fjz(n) * wz(n,3) - jzh(n,cell(n) + 3) = fjz(n) * wz(n,4) - jzh(n,cell(n) + 4) = fjz(n) * wz(n,5) - jzh(n,cell(n) + 5) = fjz(n) * wz(n,6) - jzh(n,cell(n) + 6) = fjz(n) * wz(n,7) - jzh(n,cell(n) + 7) = fjz(n) * wz(n,8) - jzh(n,cell(n) + 8) = fjz(n) * wz(n,9) + jxh(n,1) = -fjx(n) * wx(n,1) + jxh(n,2) = -fjx(n) * (wx(n,1) + wx(n,2)) + jxh(n,3) = -fjx(n) * (wx(n,1) + wx(n,2) + wx(n,3)) + jxh(n,4) = -fjx(n) * wx(n,4) + jxh(n,5) = -fjx(n) * (wx(n,4) + wx(n,5)) + jxh(n,6) = -fjx(n) * (wx(n,4) + wx(n,5) + wx(n,6)) + jxh(n,7) = -fjx(n) * wx(n,7) + jxh(n,8) = -fjx(n) * (wx(n,7) + wx(n,8)) + jxh(n,9) = -fjx(n) * (wx(n,7) + wx(n,8) + wx(n,9)) + + jyh(n,1) = -fjy(n) * wy(n,1) + jyh(n,2) = -fjy(n) * wy(n,2) + jyh(n,3) = -fjy(n) * wy(n,3) + jyh(n,4) = -fjy(n) * (wy(n,1) + wy(n,4)) + jyh(n,5) = -fjy(n) * (wy(n,2) + wy(n,5)) + jyh(n,6) = -fjy(n) * (wy(n,3) + wy(n,6)) + jyh(n,7) = -fjy(n) * (wy(n,1) + wy(n,4) + wy(n,7)) + jyh(n,8) = -fjy(n) * (wy(n,2) + wy(n,5) + wy(n,8)) + jyh(n,9) = -fjy(n) * (wy(n,3) + wy(n,6) + wy(n,9)) + + jzh(n,1) = fjz(n) * wz(n,1) + jzh(n,2) = fjz(n) * wz(n,2) + jzh(n,3) = fjz(n) * wz(n,3) + jzh(n,4) = fjz(n) * wz(n,4) + jzh(n,5) = fjz(n) * wz(n,5) + jzh(n,6) = fjz(n) * wz(n,6) + jzh(n,7) = fjz(n) * wz(n,7) + jzh(n,8) = fjz(n) * wz(n,8) + jzh(n,9) = fjz(n) * wz(n,9) END DO !END LOOP n = 1, MIN(LVEC, npart - np + 1) From af92f6b280b197fd2880e68afec30525026b3460 Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Wed, 30 Dec 2020 19:47:05 +0100 Subject: [PATCH 086/106] Added the deck_io_block dependency in Makefile --- epoch2d/Makefile | 2 +- epoch3d/Makefile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/epoch2d/Makefile b/epoch2d/Makefile index fb5f4ee12..4aa233b0d 100644 --- a/epoch2d/Makefile +++ b/epoch2d/Makefile @@ -554,4 +554,4 @@ tokenizer_blocks.o: tokenizer_blocks.f90 strings.o utilities.o: utilities.f90 constants.o shared_data.o version_data.o: version_data.F90 $(SRCDIR)/COMMIT welcome.o: welcome.F90 shared_data.o terminal_controls.o version_data.o -window.o: window.F90 boundary.o evaluate.o partlist.o +window.o: window.F90 boundary.o evaluate.o partlist.o deck_io_block.o diff --git a/epoch3d/Makefile b/epoch3d/Makefile index 3f50ec536..8a873ea2f 100644 --- a/epoch3d/Makefile +++ b/epoch3d/Makefile @@ -554,4 +554,4 @@ tokenizer_blocks.o: tokenizer_blocks.f90 strings.o utilities.o: utilities.f90 constants.o shared_data.o version_data.o: version_data.F90 $(SRCDIR)/COMMIT welcome.o: welcome.F90 shared_data.o terminal_controls.o version_data.o -window.o: window.F90 boundary.o evaluate.o partlist.o +window.o: window.F90 boundary.o evaluate.o partlist.o deck_io_block.o From 6d1ecc88d91136a1c35c3a01c10f68339517da1f Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Tue, 7 Sep 2021 10:45:53 +0200 Subject: [PATCH 087/106] First merge conflict resolved --- epoch2d/src/deck/strings.f90 | 14 ++++++-------- epoch3d/src/deck/strings.f90 | 14 ++++++-------- 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/epoch2d/src/deck/strings.f90 b/epoch2d/src/deck/strings.f90 index 240f4482c..8c8d4bb37 100644 --- a/epoch2d/src/deck/strings.f90 +++ b/epoch2d/src/deck/strings.f90 @@ -323,17 +323,15 @@ END FUNCTION lowercase - FUNCTION trim_string(string) + FUNCTION trim_string(string_in) CHARACTER(LEN=c_max_string_length) :: trim_string - CHARACTER(LEN=*) :: string - CHARACTER(LEN=:), ALLOCATABLE :: string_copy + CHARACTER(LEN=*) :: string_in + CHARACTER(LEN=LEN_TRIM(string_in)) - ALLOCATE(CHARACTER(LEN=LEN(string)) :: string_copy) - - string_copy = ADJUSTL(string) - IF (LEN_TRIM(string_copy) > c_max_string_length) THEN - trim_string = string_copy(1:c_max_string_length) + string = ADJUSTL(string_in) + IF (LEN_TRIM(string) > c_max_string_length) THEN + trim_string = string(1:c_max_string_length) ELSE trim_string = TRIM(string_copy) END IF diff --git a/epoch3d/src/deck/strings.f90 b/epoch3d/src/deck/strings.f90 index 2b605fbe9..9c1b13741 100644 --- a/epoch3d/src/deck/strings.f90 +++ b/epoch3d/src/deck/strings.f90 @@ -327,17 +327,15 @@ END FUNCTION lowercase - FUNCTION trim_string(string) + FUNCTION trim_string(string_in) CHARACTER(LEN=c_max_string_length) :: trim_string - CHARACTER(LEN=*) :: string - CHARACTER(LEN=:), ALLOCATABLE :: string_copy + CHARACTER(LEN=*) :: string_in + CHARACTER(LEN=LEN_TRIM(string_in)) :: string - ALLOCATE(CHARACTER(LEN=LEN(string)) :: string_copy) - - string_copy = ADJUSTL(string) - IF (LEN_TRIM(string_copy) > c_max_string_length) THEN - trim_string = string_copy(1:c_max_string_length) + string = ADJUSTL(string_in) + IF (LEN_TRIM(string) > c_max_string_length) THEN + trim_string = string(1:c_max_string_length) ELSE trim_string = TRIM(string_copy) END IF From 1fd08a31b4642c31ad6f7f551a96a4bf9f3cafec Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Tue, 5 Jan 2021 12:19:57 +0100 Subject: [PATCH 088/106] Minor fix in strings.f90 --- epoch2d/src/deck/strings.f90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/epoch2d/src/deck/strings.f90 b/epoch2d/src/deck/strings.f90 index 8c8d4bb37..848b95cff 100644 --- a/epoch2d/src/deck/strings.f90 +++ b/epoch2d/src/deck/strings.f90 @@ -327,7 +327,7 @@ FUNCTION trim_string(string_in) CHARACTER(LEN=c_max_string_length) :: trim_string CHARACTER(LEN=*) :: string_in - CHARACTER(LEN=LEN_TRIM(string_in)) + CHARACTER(LEN=LEN_TRIM(string_in)) :: string string = ADJUSTL(string_in) IF (LEN_TRIM(string) > c_max_string_length) THEN From 03a458defb999163c77fec09b002b38c74f211f9 Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Tue, 7 Sep 2021 10:50:32 +0200 Subject: [PATCH 089/106] Second merge conflict resolved --- .gitmodules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index 46940c1f7..2fd47b8d0 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,3 @@ [submodule "SDF"] path = SDF - url = ../SDF.git + url = https://gitlab.version.fz-juelich.de/SLPP/sdf/SDF.git From e65db54f47f98036340e23d6577e3931e224c5ed Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Fri, 5 Mar 2021 08:33:51 +0100 Subject: [PATCH 090/106] First step towards particle sorting, and vec. pusher and current --- epoch2d/src/current_deposition.F90 | 930 ++++++++++++++++++++++------- 1 file changed, 698 insertions(+), 232 deletions(-) diff --git a/epoch2d/src/current_deposition.F90 b/epoch2d/src/current_deposition.F90 index da0d02b97..2e105e620 100644 --- a/epoch2d/src/current_deposition.F90 +++ b/epoch2d/src/current_deposition.F90 @@ -1,272 +1,738 @@ +SUBROUTINE particle_sorting() + dx_bin = 5 * dx ! Number of cells in bin along x + dy_bin = 5 * dy ! Number of cells in bin along y + + idx_bin = 1./dx_bin ! Inverse of dx_bin + idy_bin = 1./dy_bin ! Inverse of dy_bin - SUBROUTINE get_particle(part_data, n, part) - TYPE(particle_data), POINTER, INTENT(IN) :: part_data - INTEGER(i8), INTENT(IN) :: n - TYPE(particle), INTENT(INOUT) :: part + nx_bin = ceiling((x_grid_max_local - x_grid_min_local) * idx_bin) ! Number of bins along x + ny_bin = ceiling((y_grid_max_local - y_grid_min_local) * idy_bin) ! Number of bins along y - IF (n >= 1 .AND. n <= part_data%count) THEN - part%pos = part_data%pos(n) - part%mom = part_data%mom(n) - part%mass = part_data%mass(n) - part%weight = part_data%weight(n) - part%charge = part_data%charge(n) - ENDIF + n_bins = nx_bin * ny_bin ! Total number of bins - END SUBROUTINE get_particle + ! Calculate particle positions in terms of the bin co-ordinates - SUBROUTINE particle_sort + DO ipart = 1, species_list(ispecies)%attached_list%count + next => current%next + part_x = (current%part_pos(1) - x_grid_min_local) * idx_bin + part_y = (current%part_pos(2) - y_grid_min_local) * idy_bin + + ix = floor(part_x) ! x-coordinate of the particle bin + iy = floor(part_y) ! y-coordinate of the particle bin - ! This routine sorts the particles such that - ! the memory access to the particle list - ! is contigious + tile_id(ipart) = iy * nx_bin + ix + 1 ! 1-D coordinate of the bins - END SUBROUTINE particle sort + num(tile_id(ipart)) = num(tile_id(ipart)) + 1 ! Number of particles in each bin + current => next - SUBROUTINE current_deposition_VB_triangle + END DO ! End do-loop for particle position in terms of bin co-ordinates -#ifdef INTEL_VECTORISATION -!dir$ attributes align:64 :: gx -!dir$ attributes align:64 :: gy -!dir$ attributes align:64 :: gz + k = 0 + + ! Determine the stride of particle indices in bins + + DO i = 1, n_bins + g_indx(i) = k ! Starting particle index for a particular bin + k = k + num(i) + END DO ! End do-loop for the stride of particle indices in bins + + ! Particle sorting in 1-D bins + + DO ipart = 1, species_list(ispecies)%attached_list%count + next => current%next + k = tile_id(ipart) + g_índx(k) = g_indx(k) + 1 ! Rearranged particle index with respect to the bins + +#ifndef PER_SPECIES_WEIGHT + w(g_indx(k)) = current%weight + fcx(g_indx(k)) = idty * w(g_indx(k)) + fcy(g_indx(k)) = idtx * w(g_indx(k)) + fcz(g_indx(k)) = idtxy * w(g_indx(k)) #endif -#ifdef INTEL_VECTORISATION -!dir$ attributes align:64 :: hx -!dir$ attributes align:64 :: hy -!dir$ attributes align:64 :: hz +#ifndef NO_PARTICLE_PROBES + init_x(g_indx(k)) = current%part_pos(1) + init_y(g_indx(k)) = current%part_pos(2) #endif -#ifdef IBM_VECTORISATION -!IBM* ALIGN(64, gx, gy, gz) +#ifdef PER_PARTICLE_CHARGE_MASS + q(g_indx(k)) = current%charge + m(g_indx(k)) = current%mass + mc(g_indx(k)) = c * current%mass + i_mc(g_indx(k)) = 1.0_num / mc + cmratio(g_indx(k)) = q(g_indx(k)) * dtfac * i_mc(g_indx(k)) + ccmratio(g_indx(k)) = c * cmratio(g_indx(k)) +#ifndef NO_PARTICLE_PROBES + mc2(g_indx(k)) = c * mc(g_indx(k)) +#endif #endif -#ifdef IBM_VECTORISATION -!IBM* ALIGN(64, hx, hy, hz) + ! Copy the particle properties out for sorting + x(g_indx(k)) = current%part_pos(1) - x_grid_min_local + y(g_indx(k)) = current%part_pos(2) - y_grid_min_local + px(g_indx(k)) = current%part_p(1) * ipart_mc + py(g_indx(k)) = current%part_p(2) * ipart_mc + pz(g_indx(k)) = current%part_p(3) * ipart_mc + pvol(g_indx(k)) = current%pvol + gamma_rel(g_indx(k)) = SQRT(px(g_indx(k))**2 + py(g_indx(k))**2 + pz(g_indx(k))**2 + 1.0_num) + root(g_indx(k)) = dtco2 / gamma_rel(g_indx(k)) + current => next + END DO ! End do-loop for particle sorting + +END SUBROUTINE particle_sorting + +SUBROUTINE particle_pusher() + + DO i = 1, species_list(ispecies)%attached_list%count, LVEC + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + jj = i + j - 1 + x(jj) = x(jj) + px(jj) * root(jj) + y(jj) = y(jj) + py(jj) * root(jj) + +#ifdef WORK_DONE_INTEGRATED + ! This is the actual total work done by the fields: Results correspond + ! with the electron's gamma factor + + root(jj) = cmratio(jj) / gamma_rel(jj) + + tmp_x(j) = px(jj) * root(jj) + tmp_y(j) = py(jj) * root(jj) + tmp_z(j) = pz(jj) * root(jj) #endif -! Similarly all other arrays need to be aligned + END DO ! End do-loop for j + + ! Calculate fields at particle positions + ! Grid cell position as a fraction + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + jj = i + j - 1 + cell_x_r(j) = x(jj) * idx + cell_y_r(j) = y(jj) * idy + + END DO ! End do-loop for grid cell position as fraction + + ! Round cell position to nearest cell + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + jj = i + j - 1 + cell_x1(jj) = FLOOR(cell_x_r(j) + 0.5_num) + cell_y1(jj) = FLOOR(cell_y_r(j) + 0.5_num) - DO ispec = 1, n_species - species => species_list(ispec) + cell_x2(j) = FLOOR(cell_x_r(j)) + cell_y2(j) = FLOOR(cell_y_r(j)) + + END DO ! End do-loop for nearest cell position + + ! Calculate fraction of cell between nearest cell boundary and particle + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + jj = i + j - 1 + cell_frac_x(j) = REAL(cell_x1(jj), num) - cell_x_r(j) + cell_frac_y(j) = REAL(cell_y1(jj), num) - cell_y_r(j) + + END DO ! End do-loop for grid cell position fraction + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + cfx2(j) = cell_frac_x(j)**2 + cfy2(j) = cell_frac_y(j)**2 + + END DO + + DO j = 1, 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + jj = i + j - 1 + cell_x1(jj) = cell_x1(jj) + 1 + cell_y1(jj) = cell_y1(jj) + 1 + + ! Particle weight factors as described in Page 25 of the PSC manual + ! These weight grid properties onto particles + ! Also used to weight particle properties onto grid, used later to calculate J + ! NOTE: These weights require an additional multiplication factor + + ! This weighing is for triangle shaped particles - IF (species%immobile) CYCLE + gxx(-1,j) = 0.25_num + cfx2(j) + cell_frac_x(j) + gxx( 0,j) = 1.5_num - 2.0_num * cfx2(j) + gxx( 1,j) = 0.25_num + cfx2(j) - cell_frac_x(j) + + gyy(-1,j) = 0.25_num - cfy2(j) + cell_frac_y(j) + gyy( 0,j) = 1.5_num - 2.0_num * cfy2(j) + gyy( 1,j) = 0.25_num + cfy2(j) - cell_frac_y(j) + + ! Now redo shifted by half a cell due to grid stagger + ! Use shifted version for ex in X, ey in Y, ez in Z + ! And in Y&Z for bx, X&Z for by, X&Y for bz + + END DO ! End do-loop with gxx + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + cell_frac_x(j) = REAL(cell_x2(j), num) - cell_x_r(j) + 0.5_num + cell_frac_y(j) = REAL(cell_y2(j), num) - cell_y_r(j) + 0.5_num + + cell_x2(j) = cell_x2(j) + 1 + cell_y2(j) = cell_y2(j) + 1 + + END DO ! End do-loop for re-doing cell_frac_(x,y) + + dcellx = 0 + dcelly = 0 + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + cfx2(j) = cell_frac_x(j)**2 + cfy2(j) = cell_frac_y(j)**2 + + END DO + + ! Calculating hxx + ! NOTE: These weights require an additional multiplication factor + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + hxx(dcellx(j)-1,j) = 0.25_num - cfx2(j) + cell_frac_x(j) + hxx(dcellx(j) ,j) = 1.5_num - 2.0_num * cfx2(j) + hxx(dcellx(j)+1,j) = 0.25_num + cfx2(j) - cell_frac_x(j) + + hyy(dcelly(j)-1,j) = 0.25_num + cfy2(j) + cell_frac_y(j) + hyy(dcelly(j) ,j) = 1.5_num - 2.0_num * cfy2(j) + hyy(dcelly(j)+1,j) = 0.25_num + cfy2(j) - cell_frac_y(j) + + END DO ! End do-loop for hxx + + ! These are the electric and magnetic fields interpolated to the + ! particle position. They have been checked and are correct. + ! Actually checking this is messy + + ! Calculate e-fields at particle position for triangle particles + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + ex_part(j) = & + gyy(-1,j) * (hxx(-1,j) * ex(cell_x2(j)-1,cell_y1(j)-1) & + + hxx( 0,j) * ex(cell_x2(j) ,cell_y1(j)-1) & + + hxx( 1,j) * ex(cell_x2(j)+1,cell_y1(j)-1)) & + + gyy( 0,j) * (hxx(-1,j) * ex(cell_x2(j)-1,cell_y1(j) ) & + + hxx( 0,j) * ex(cell_x2(j) ,cell_y1(j) ) & + + hxx( 1,j) * ex(cell_x2(j)+1,cell_y1(j) )) & + + gyy( 1,j) * (hxx(-1,j) * ex(cell_x2(j)-1,cell_y1(j)+1) & + + hxx( 0,j) * ex(cell_x2(j) ,cell_y1(j)+1) & + + hxx( 1,j) * ex(cell_x2(j)+1,cell_y1(j)+1)) + + ey_part(j) = & + hyy(-1,j) * (gxx(-1,j) * ey(cell_x1(j)-1,cell_y2(j)-1) & + + gxx( 0,j) * ey(cell_x1(j) ,cell_y2(j)-1) & + + gxx( 1,j) * ey(cell_x1(j)+1,cell_y2(j)-1)) & + + hyy( 0,j) * (gxx(-1,j) * ey(cell_x1(j)-1,cell_y2(j) ) & + + gxx( 0,j) * ey(cell_x1(j) ,cell_y2(j) ) & + + gxx( 1,j) * ey(cell_x1(j)+1,cell_y2(j) )) & + + hyy( 1,j) * (gxx(-1,j) * ey(cell_x1(j)-1,cell_y2(j)+1) & + + gxx( 0,j) * ey(cell_x1(j) ,cell_y2(j)+1) & + + gxx( 1,j) * ey(cell_x1(j)+1,cell_y2(j)+1)) + + ez_part(j) = & + gyy(-1,j) * (gxx(-1,j) * ez(cell_x1(j)-1,cell_y1(j)-1) & + + gxx( 0,j) * ez(cell_x1(j) ,cell_y1(j)-1) & + + gxx( 1,j) * ez(cell_x1(j)+1,cell_y1(j)-1)) & + + gyy( 0,j) * (gxx(-1,j) * ez(cell_x1(j)-1,cell_y1(j) ) & + + gxx( 0,j) * ez(cell_x1(j) ,cell_y1(j) ) & + + gxx( 1,j) * ez(cell_x1(j)+1,cell_y1(j) )) & + + gyy( 1,j) * (gxx(-1,j) * ez(cell_x1(j)-1,cell_y1(j)+1) & + + gxx( 0,j) * ez(cell_x1(j) ,cell_y1(j)+1) & + + gxx( 1,j) * ez(cell_x1(j)+1,cell_y1(j)+1)) + + END DO ! End do-loop for e-fields at particle position + + ! Calculate b-fields at particle position for triangle particles + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + bx_part(j) = & + hyy(-1,j) * (gxx(-1,j) * bx(cell_x1(j)-1,cell_y2(j)-1) & + + gxx( 0,j) * bx(cell_x1(j) ,cell_y2(j)-1) & + + gxx( 1,j) * bx(cell_x1(j)+1,cell_y2(j)-1)) & + + hyy( 0,j) * (gxx(-1,j) * bx(cell_x1(j)-1,cell_y2(j) ) & + + gxx( 0,j) * bx(cell_x1(j) ,cell_y2(j) ) & + + gxx( 1,j) * bx(cell_x1(j)+1,cell_y2(j) )) & + + hyy( 1,j) * (gxx(-1,j) * bx(cell_x1(j)-1,cell_y2(j)+1) & + + gxx( 0,j) * bx(cell_x1(j) ,cell_y2(j)+1) & + + gxx( 1,j) * bx(cell_x1(j)+1,cell_y2(j)+1)) + + by_part(j) = & + gyy(-1,j) * (hxx(-1,j) * by(cell_x2(j)-1,cell_y1(j)-1) & + + hxx( 0,j) * by(cell_x2(j) ,cell_y1(j)-1) & + + hxx( 1,j) * by(cell_x2(j)+1,cell_y1(j)-1)) & + + gyy( 0,j) * (hxx(-1,j) * by(cell_x2(j)-1,cell_y1(j) ) & + + hxx( 0,j) * by(cell_x2(j) ,cell_y1(j) ) & + + hxx( 1,j) * by(cell_x2(j)+1,cell_y1(j) )) & + + gyy( 1,j) * (hxx(-1,j) * by(cell_x2(j)-1,cell_y1(j)+1) & + + hxx( 0,j) * by(cell_x2(j) ,cell_y1(j)+1) & + + hxx( 1,j) * by(cell_x2(j)+1,cell_y1(j)+1)) + + bz_part(j) = & + hyy(-1,j) * (hxx(-1,j) * bz(cell_x2(j)-1,cell_y2(j)-1) & + + hxx( 0,j) * bz(cell_x2(j) ,cell_y2(j)-1) & + + hxx( 1,j) * bz(cell_x2(j)+1,cell_y2(j)-1)) & + + gyy( 0,j) * (hxx(-1,j) * bz(cell_x2(j)-1,cell_y2(j) ) & + + hxx( 0,j) * bz(cell_x2(j) ,cell_y2(j) ) & + + hxx( 1,j) * bz(cell_x2(j)+1,cell_y2(j) )) & + + gyy( 1,j) * (hxx(-1,j) * bz(cell_x2(j)-1,cell_y2(j)+1) & + + hxx( 0,j) * bz(cell_x2(j) ,cell_y2(j)+1) & + + hxx( 1,j) * bz(cell_x2(j)+1,cell_y2(j)+1)) + + END DO ! End do-loop for b-fields at particle position + + + ! Update particle momenta using weighted fields + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + jj = i + j -1 + + uxm(j) = px(jj) + cmratio(jj) * ex_part(j) + uym(j) = py(jj) + cmratio(jj) * ey_part(j) + uzm(j) = pz(jj) + cmratio(jj) * ez_part(j) + + END DO + +#ifdef HC_PUSH + + ! Half timestep, then use Higuera-Cary push + ! See https://aip.scitation.org/doi/10.1063/1.4979989 + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + jj = i + j - 1 + gamma_rel(jj) = uxm(j)**2 + uym(j)**2 + uzm(j)**2 + 1.0_num + alpha(j) = 0.5_num * q(jj) * dt / m(jj) - part_data => species%part_data - npart = part_data%count + END DO - CALL particle_sort ! I guess this routine should be called in the Boris pusher - ! such that both gx and hx are in sync + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - DO np = 1, npart, LVEC + beta_x(j) = alpha(j) * bx_part(j) + beta_y(j) = alpha(j) * by_part(j) + beta_z(j) = alpha(j) * bz_part(j) - !$OMP SIMD - DO n = 1, MIN(LVEC, npart - np + 1) + END DO - CALL get_particle(part_data, n, part) + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + beta2(j) = beta_x(j)**2 + beta_y(j)**2 + beta_z(j)**2 + beta_dot_u(j) = beta_x(j) * uxm(j) + beta_y(j) * uym(j) + beta_z(j) * uzm(j) + + END DO + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + jj = i + j -1 + gamma_rel(jj) = SQRT(0.5_num & + * (gamma_rel(jj) - beta2(j) & + + SQRT((gamma_rel(jj) - beta2(j))**2 & + + 4.0_num * (beta2(j) + beta_dot_u(j)**2)))) + + END DO + +#else + + ! Half timestep, then use Boris1970 rotation, see Birdsall and Langdon + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + jj = i + j - 1 + gamma_rel(jj) = SQRT(uxm(j)**2 + uym(j)**2 + uzm(j)**2) + + END DO -#ifndef PER_SPECIES_WEIGHT - part_weight(n) = part(n)%weight - fcx(n) = idty * part_weight(n) - fcy(n) = idtx * part_weight(n) - fcz(n) = idxy * part_weight(n) #endif -#ifndef NO_PARTICLE_PROBES - init_part_x(n) = part(n)%part_pos(1) - init_part_y(n) = part(n)%part_pos(2) + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + jj = i + j - 1 + root(jj) = ccmratio(jj) / gamma_rel(jj) + + END DO + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + jj = i + j - 1 + taux(j) = bx_part(j) * root(jj) + tauy(j) = by_part(j) * root(jj) + tauz(j) = bz_part(j) * root(jj) + + END DO + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + taux2(j) = taux(j)**2 + tauy2(j) = taux(j)**2 + tauz2(j) = tauz(j)**2 + + END DO + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + tau(j) = 1.0_num / (1.0_num + taux2(j) + tauy2(j) + tauz2(j)) + + END DO + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + uxp(j) = ((1.0_num + taux2(j) - tauy2(j) - tauz2(j)) * uxm(j) & + + 2.0_num * ((taux(j) * tauy(j) + tauz(j)) * uym(j) & + + (taux(j) * tauz(j) - tauy(j)) * uzm(j))) * tau(j) + uyp(j) = ((1.0_num - taux2(j) + tauy2(j) - tauz2(j)) * uym(j) & + + 2.0_num * ((tauy(j) * tauz(j) + taux(j)) * uzm(j) & + + (tauy(j) * taux(j) - tauz(j)) * uxm(j))) * tau(j) + uzp(j) = ((1.0_num - taux2(j) - tauy2(j) + tauz2(j)) * uzm(j) & + + 2.0_num * ((tauz(j) * taux(j) + tauy(j)) * uxm(j) & + + (tauz(j) * tauy(j) - taux(j)) * uym(j))) * tau(j) + + END DO + + ! Rotation over, go to full timestep + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + jj = i + j - 1 + px(jj) = uxp(j) + cmratio(jj) * ex_part(j) + py(jj) = uyp(j) + cmratio(jj) * ey_part(j) + pz(jj) = uzp(j) + cmratio(jj) * ez_part(j) + + END DO + + ! Calculate particle velocity from particle momentum + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + jj = i + j - 1 + gamma_rel(jj) = SQRT(px(jj)**2 + py(jj)**2 + pz(jj)**2 + 1.0_num) + igamma(jj) = 1.0_num / SQRT(px(jj)**2 + py(jj)**2 + pz(jj)**2 + 1.0_num) + root(jj) = dtco2 / SQRT(px(jj)**2 + py(jj)**2 + pz(jj)**2 + 1.0_num) + + END DO + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + jj = i + j - 1 + delta_x(jj) = px(jj) * root(jj) + delta_y(jj) = py(jj) * root(jj) + vz(jj) = pz(jj) * c * igamma(jj) + + END DO + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + jj = i + j - 1 + x(jj) = x(jj) + delta_x(j) + y(jj) = y(jj) + delta_y(j) + + END DO + + ! Particle has now finished move to end of timestep, so copy back + ! into particle array + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + jj = i + j - 1 + next => current + current%part_pos = (/ x(jj) + x_grid_min_local, & + y(jj) + y_grid_min_local /) + current%part_p = mc(jj) * (/ px(jj), py(jj), pz(jj) /) + + ! Add particle to boundary candidate list + IF (current%part_pos(1) < bnd_x_min & + .OR. current%part_pos(1) > bnd_x_max & + .OR. current%part_pos(2) < bnd_y_min & + .OR. current%part_pos(2) > bnd_y_max ) THEN + ALLOCATE(bnd_part_next) + bnd_part_next%particle => current + bnd_part_last%next => bnd_part_next + bnd_part_last => bnd_part_next + END IF + +#ifdef WORK_DONE_INTEGRATED + ! This is the actual total work done by the fields: Results correspond + ! with the electron's gamma factor + + root(jj) = cmratio(jj) / gamma_rel(jj) + + work_x = ex_part(j) * (tmp_x(j) + px(jj) * root(jj)) + work_y = ex_part(j) * (tmp_y(j) + py(jj) * root(jj)) + work_z = ex_part(j) * (tmp_z(j) + pz(jj) * root(jj)) + + current%work_x = work_x + current%work_y = work_y + current%work_z = work_z + + current%work_x_total = current%work_x_total + work_x + current%work_y_total = current%work_y_total + work_y + current%work_z_total = current%work_z_total + work_z #endif -#ifdef PER_PARTICLE_CHARGE_MASS - part_q(n) = part(n)%charge - part_m(n) = part(n)%mass - part_mc(n) = c * part(n)%mass - ipart_mc(n) = 1.0_num / part_mc(n) - cmratio(n) = part_q(n) * dtfac * ipart_mc(n) - ccmratio(n) = c * cmratio(n) + #ifndef NO_PARTICLE_PROBES - part_mc2 = c * part_mc -#endif + final_x(jj) = current%part_pos(1) + final_y(jj) = current%part_pos(2) #endif - !Copy the particle properties out for speed - part_x(n) = part(n)%part_pos(1) - x_grid_min_local - part_y(n) = part(n)%part_pos(2) - y_grid_min_local - part_ux(n) = part(n)%part_p(1) * ipart_mc(n) - part_uy(n) = part(n)%part_p(2) * ipart_mc(n) - part_uz(n) = part(n)%part_p(3) * ipart_mc(n) - - !Now advance to t+1.5dt to calculate current - !For efficient vectorization, I would prefer - !part_x(n) = part(n)%part_pos(1) - x_grid_min_local + delta_x - !part_y(n) = part(n)%part_pos(2) - x_grid_min_local + delta_x - !This eliminates the dependency on previous step - part_x(n) = part_x(n) + delta_x(n) - part_y(n) = part_y(n) + delta_y(n) - - !Delta-f calculation: subtract background from calculated current + current => next + + END DO + + + END DO ! End do-loop for i + +END SUBROUTINE particle_pusher + +SUBROUTINE triangle_current_deposition() + + DO i = 1, species_list(ispecies)%attached_list%count, LVEC + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + jj = i + j - 1 + + ! Advance to t + 1.5dt to calculate current. This is detailed in + ! the PSC manual between page 37 and 41. The version coded up looks + ! completely different to that in the manual, but this is equivalent. + ! Use t + 1.5dt so that can update J to t + dt at 2nd order + + x(jj) = x(jj) + delta_x(jj) + y(jj) = y(jj) + delta_y(jj) + + ! Delta-f calculation: subtract background from + ! calculated current. + #ifdef DELTAF_METHOD - weight_back(n) = part(n)%pvol * f0(ispecies, part_mc(n) / c, & - part(n)%part_p) - fcx(n) = idty * (part_weight(n) - weight_back(n)) - fcy(n) = idtx * (part_weight(n) - weight_back(n)) - fcz(n) = idxy * (part_weight(n) - weight_back(n)) + weight_back(j) = pvol(jj) * f0(ispecies, mc(jj) / c, px(jj), py(jj), pz(jj)) + fcx(j) = idty * (weight(jj) - weight_back(j)) + fcy(j) = idtx * (weight(jj) - weight_back(j)) + fcz(j) = idxy * (weight(jj) - weight_back(j)) #endif - cell_x_r(n) = part_x(n) * idx - cell_y_r(n) = part_y(n) * idy - - cell_x3(n) = FLOOR(cell_x_r(n) + 0.5_num) - cell_y3(n) = FLOOR(cell_y_r(n) + 0.5_num) - - cell_frac_x(n) = REAL(cell_x3(n), num) - cell_x_r(n) - cell_frac_y(n) = REAL(cell_y3(n), num) - cell_y_r(n) - - fjx(n) = fcx(n) * part_q(n) - fjy(n) = fcy(n) * part_q(n) - fjz(n) = fcz(n) * part_q(n) * part_vz(n) - - hx = 0.0_num - hy = 0.0_num - - dcellx(n) = cell_x3(n) - cell_x1(n) - dcelly(n) = cell_y3(n) - cell_y1(n) - - xmin(n) = sf_min + (dcellx(n) - 1) / 2 - ymin(n) = sf_max + (dcelly(n) - 1) / 2 - - - hx(xmin(n)) = 0.25_num + cell_frac_x(n)**2 + cell_frac_x(n) - hx(xmin(n) + 1) = 1.5_num - 2.0_num * cell_frac_x(n)**2 - hx(xmin(n) + 2) = 0.25_num + cell_frac_x(n)**2 - cell_frac_x(n) - - hy(ymin(n)) = 0.25_num + cell_frac_y(n)**2 + cell_frac_y(n) - hy(ymin(n) + 1) = 1.5_num - 2.0_num * cell_frac_y(n)**2 - hy(ymin(n) + 2) = 0.25_num + cell_frac_y(n)**2 - cell_frac_y(n) - - yfac10(n) = gy(ymin(n)) + 0.5_num * hy(ymin(n)) - yfac11(n) = gy(ymin(n) + 1) + 0.5_num * hy(ymin(n) + 1) - yfac12(n) = gy(ymin(n) + 1) + 0.5_num * hy(ymin(n) + 2) - - yfac20(n) = third * hy(ymin(n)) + 0.5 * gy(ymin(n)) - yfac21(n) = third * hy(ymin(n) + 1) + 0.5 * gy(ymin(n) + 1) - yfac22(n) = third * hy(ymin(n) + 2) + 0.5 * gy(ymin(n) + 2) - - xfac10(n) = gx(xmin(n)) + 0.5_num * hx(xmin(n)) - xfac11(n) = gx(xmin(n) + 1) + 0.5_num * hx(xmin(n) + 1) - xfac12(n) = gx(xmin(n) + 2) + 0.5_num * hx(xmin(n) + 2) - - wx(n,1) = hx(xmin(n)) * yfac10(n) - wx(n,2) = hx(xmin(n) + 1) * yfac10(n) - wx(n,3) = hx(xmin(n) + 2) * yfac10(n) - wx(n,4) = hx(xmin(n)) * yfac11(n) - wx(n,5) = hx(xmin(n) + 1) * yfac11(n) - wx(n,6) = hx(xmin(n) + 2) * yfac11(n) - wx(n,7) = hx(xmin(n)) * yfac12(n) - wx(n,8) = hx(xmin(n) + 1) * yfac12(n) - wx(n,9) = hx(xmin(n) + 2) * yfac12(n) - - wy(n,1) = hy(ymin(n)) * xfac10(n) - wy(n,2) = hy(ymin(n)) * xfac11(n) - wy(n,3) = hy(ymin(n)) * xfac12(n) - wy(n,4) = hy(ymin(n) + 1) * xfac10(n) - wy(n,5) = hy(ymin(n) + 1) * xfac11(n) - wy(n,6) = hy(ymin(n) + 1) * xfac12(n) - wy(n,7) = hy(ymin(n) + 2) * xfac10(n) - wy(n,8) = hy(ymin(n) + 2) * xfac11(n) - wy(n,9) = hy(ymin(n) + 2) * xfac12(n) - - wz(n,1) = gx(xmin(n)) * yfac10(n) + hx(xmin(n)) * yfac20(n) - wz(n,2) = gx(xmin(n) + 1) * yfac10(n) + hx(xmin(n) + 1) * yfac20(n) - wz(n,3) = gx(xmin(n) + 2) * yfac10(n) + hx(xmin(n) + 2) * yfac20(n) - wz(n,4) = gx(xmin(n)) * yfac11(n) + hx(xmin(n)) * yfac21(n) - wz(n,5) = gx(xmin(n) + 1) * yfac11(n) + hx(xmin(n) + 1) * yfac21(n) - wz(n,6) = gx(xmin(n) + 2) * yfac11(n) + hx(xmin(n) + 2) * yfac21(n) - wz(n,7) = gx(xmin(n)) * yfac12(n) + hx(xmin(n)) * yfac22(n) - wz(n,8) = gx(xmin(n) + 1) * yfac12(n) + hx(xmin(n) + 1) * yfac22(n) - wz(n,9) = gx(xmin(n) + 2) * yfac12(n) + hx(xmin(n) + 2) * yfac22(n) - - cx(n) = cell_x1(n) + xmin(n) - cy(n) = cell_y1(n) + ymin(n) - cell(n) = cx(n) + (cy(n) - 1) * nx - - jxh(n,1) = -fjx(n) * wx(n,1) - jxh(n,2) = -fjx(n) * (wx(n,1) + wx(n,2)) - jxh(n,3) = -fjx(n) * (wx(n,1) + wx(n,2) + wx(n,3)) - jxh(n,4) = -fjx(n) * wx(n,4) - jxh(n,5) = -fjx(n) * (wx(n,4) + wx(n,5)) - jxh(n,6) = -fjx(n) * (wx(n,4) + wx(n,5) + wx(n,6)) - jxh(n,7) = -fjx(n) * wx(n,7) - jxh(n,8) = -fjx(n) * (wx(n,7) + wx(n,8)) - jxh(n,9) = -fjx(n) * (wx(n,7) + wx(n,8) + wx(n,9)) - - jyh(n,1) = -fjy(n) * wy(n,1) - jyh(n,2) = -fjy(n) * wy(n,2) - jyh(n,3) = -fjy(n) * wy(n,3) - jyh(n,4) = -fjy(n) * (wy(n,1) + wy(n,4)) - jyh(n,5) = -fjy(n) * (wy(n,2) + wy(n,5)) - jyh(n,6) = -fjy(n) * (wy(n,3) + wy(n,6)) - jyh(n,7) = -fjy(n) * (wy(n,1) + wy(n,4) + wy(n,7)) - jyh(n,8) = -fjy(n) * (wy(n,2) + wy(n,5) + wy(n,8)) - jyh(n,9) = -fjy(n) * (wy(n,3) + wy(n,6) + wy(n,9)) - - jzh(n,1) = fjz(n) * wz(n,1) - jzh(n,2) = fjz(n) * wz(n,2) - jzh(n,3) = fjz(n) * wz(n,3) - jzh(n,4) = fjz(n) * wz(n,4) - jzh(n,5) = fjz(n) * wz(n,5) - jzh(n,6) = fjz(n) * wz(n,6) - jzh(n,7) = fjz(n) * wz(n,7) - jzh(n,8) = fjz(n) * wz(n,8) - jzh(n,9) = fjz(n) * wz(n,9) - - END DO !END LOOP n = 1, MIN(LVEC, npart - np + 1) - - !$OMP END SIMD - - !$OMP SIMD - - DO n = 1, MIN(LVEC, npart - np + 1) - - jx(cx(n),cy(n)) = jx(cx(n),cy(n)) + jxh(n,1) - jx(cx(n) + 1,cy(n)) = jx(cx(n) + 1,cy(n)) + jxh(n,2) - jx(cx(n) + 2,cy(n)) = jx(cx(n) + 2,cy(n)) + jxh(n,3) - jx(cx(n), cy(n) + 1) = jx(cx(n),cy(n) + 1) + jxh(n,4) - jx(cx(n) + 1, cy(n) + 1) = jx(cx(n) + 1,cy(n) + 1) + jxh(n,5) - jx(cx(n) + 2, cy(n) + 1) = jx(cx(n) + 2,cy(n) + 1) + jxh(n,6) - jx(cx(n), cy(n) + 2) = jx(cx(n),cy(n) + 2) + jxh(n,7) - jx(cx(n) + 1, cy(n) + 2) = jx(cx(n),cy(n) + 2) + jxh(n,8) - jx(cx(n) + 2, cy(n) + 2) = jx(cx(n) + 2,cy(n) + 2) + jxh(n,9) - - jy(cx(n),cy(n)) = jy(cx(n),cy(n)) + jyh(n,1) - jy(cx(n) + 1,cy(n)) = jy(cx(n) + 1,cy(n)) + jyh(n,2) - jy(cx(n) + 2,cy(n)) = jy(cx(n) + 2,cy(n)) + jyh(n,3) - jy(cx(n), cy(n) + 1) = jy(cx(n),cy(n) + 1) + jyh(n,4) - jy(cx(n) + 1, cy(n) + 1) = jy(cx(n) + 1,cy(n) + 1) + jyh(n,5) - jy(cx(n) + 2, cy(n) + 1) = jy(cx(n) + 2,cy(n) + 1) + jyh(n,6) - jy(cx(n), cy(n) + 2) = jy(cx(n),cy(n) + 2) + jyh(n,7) - jy(cx(n) + 1, cy(n) + 2) = jy(cx(n),cy(n) + 2) + jyh(n,8) - jy(cx(n) + 2, cy(n) + 2) = jy(cx(n) + 2,cy(n) + 2) + jyh(n,9) - - jz(cx(n),cy(n)) = jz(cx(n),cy(n)) + jzh(n,1) - jz(cx(n) + 1,cy(n)) = jz(cx(n) + 1,cy(n)) + jzh(n,2) - jz(cx(n) + 2,cy(n)) = jz(cx(n) + 2,cy(n)) + jzh(n,3) - jz(cx(n), cy(n) + 1) = jz(cx(n),cy(n) + 1) + jzh(n,4) - jz(cx(n) + 1, cy(n) + 1) = jz(cx(n) + 1,cy(n) + 1) + jzh(n,5) - jz(cx(n) + 2, cy(n) + 1) = jz(cx(n) + 2,cy(n) + 1) + jzh(n,6) - jz(cx(n), cy(n) + 2) = jz(cx(n),cy(n) + 2) + jzh(n,7) - jz(cx(n) + 1, cy(n) + 2) = jz(cx(n),cy(n) + 2) + jzh(n,8) - jz(cx(n) + 2, cy(n) + 2) = jz(cx(n) + 2,cy(n) + 2) + jzh(n,9) - - END DO !END LOOP n = 1, MIN(LVEC, npart - np + 1) - !$OMP END SIMD - - END DO !END LOOP n = 1, npart - END DO !END LOOP ispec = 1, nspecies - - END current_deposition_VB_triangle + END DO + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + jj = i + j - 1 + cell_x_r(j) = x(jj) * idx + cell_y_r(j) = y(jj) * idy + + END DO + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + cell_x3(j) = FLOOR(cell_x_r(j) + 0.5_num) + cell_y3(j) = FLOOR(cell_y_r(j) + 0.5_num) + + END DO + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + cell_frac_x(j) = REAL(cell_x3(j), num) - cell_x_r(j) + cell_frac_y(j) = REAL(cell_y3(j), num) - cell_y_r(j) + cell_x3(j) = cell_x3(j) + 1 + cell_y3(j) = cell_y3(j) + 1 + END DO + + hxx = 0.0_num + hyy = 0.0_num + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + dcellx(j) = cell_x3(j) - cell_x1(j) + dcelly(j) = cell_y3(j) - cell_y1(j) + + END DO + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + cfx2(j) = cell_frac_x(j)**2 + cfy2(j) = cell_frac_y(j)**2 + + END DO + + ! Calculating hxx + ! NOTE: These weights require an additional multiplication factor + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + hxx(dcellx(j)-1,j) = 0.25_num - cfx2(j) + cell_frac_x(j) + hxx(dcellx(j) ,j) = 1.5_num - 2.0_num * cfx2(j) + hxx(dcellx(j)+1,j) = 0.25_num + cfx2(j) - cell_frac_x(j) + + hyy(dcelly(j)-1,j) = 0.25_num + cfy2(j) + cell_frac_y(j) + hyy(dcelly(j) ,j) = 1.5_num - 2.0_num * cfy2(j) + hyy(dcelly(j)+1,j) = 0.25_num + cfy2(j) - cell_frac_y(j) + + END DO ! End do-loop for hxx + + ! Now change Xi1* to be Xi1*-Xi0*. This makes the representation of + ! the current update much simpler + + hxx = hxx - gxx + hyy = hxx - gyy + + ! Remember that due to CFL condition particle can never cross more + ! than one gridcell in one timestep + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + jj = i + j - 1 + xmin(j) = sf_min + (dcellx(j) - 1) / 2 + ymin(j) = sf_min + (dcelly(j) - 1) / 2 + + fjx(jj) = fcx(jj) * q(jj) + fjy(jj) = fcy(jj) * q(jj) + fjz(jj) = fcz(jj) * q(jj) * vz(jj) + + END DO + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + yfac10(j) = gyy(ymin(j),j) + 0.5_num * hyy(ymin(j),j) + yfac11(j) = gyy(ymin(j) + 1,j) + 0.5_num * hyy(ymin(j) + 1,j) + yfac12(j) = gyy(ymin(j) + 2,j) + 0.5_num * hyy(ymin(j) + 2,j) + + yfac20(j) = third * hyy(ymin(j),j) + 0.5_num * gyy(ymin(j),j) + yfac21(j) = third * hyy(ymin(j) + 1,j) + 0.5_num * gyy(ymin(j) + 1,j) + yfac22(j) = third * hyy(ymin(j) + 2,j) + 0.5_num * gyy(ymin(j) + 2,j) + + xfac10(j) = gxx(xmin(j),j) + 0.5_num * hxx(ymin(j),j) + xfac11(j) = gxx(xmin(j) + 1,j) + 0.5_num * hxx(ymin(j) + 1,j) + xfac12(j) = gxx(xmin(j) + 2,j) + 0.5_num * hxx(ymin(j) + 2,j) + + END DO + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + jj = i + j - 1 + + wx(1,j) = hxx(xmin(j),j) * yfac10(j) + wx(2,j) = hxx(xmin(j) + 1,j) * yfac10(j) + wx(3,j) = hxx(xmin(j) + 2,j) * yfac10(j) + wx(4,j) = hxx(xmin(j),j) * yfac11(j) + wx(5,j) = hxx(xmin(j) + 1,j) * yfac11(j) + wx(6,j) = hxx(xmin(j) + 2,j) * yfac11(j) + wx(7,j) = hxx(xmin(j),j) * yfac12(j) + wx(8,j) = hxx(xmin(j) + 1,j) * yfac12(j) + wx(9,j) = hxx(xmin(j) + 2,j) * yfac12(j) + + wy(1,j) = hyy(ymin(j),j) * xfac10(j) + wy(2,j) = hyy(ymin(j),j) * xfac11(j) + wy(3,j) = hyy(ymin(j),j) * xfac12(j) + wy(4,j) = hyy(ymin(j) + 1,j) * xfac10(j) + wy(5,j) = hyy(ymin(j) + 1,j) * xfac11(j) + wy(6,j) = hyy(ymin(j) + 1,j) * xfac12(j) + wy(7,j) = hyy(ymin(j) + 2,j) * xfac10(j) + wy(8,j) = hyy(ymin(j) + 2,j) * xfac11(j) + wy(9,j) = hyy(ymin(n) + 2,j) * xfac12(j) + + wz(1,j) = gxx(xmin(j)) * yfac10(j) + hxx(xmin(j),j) * yfac20(j) + wz(2,j) = gxx(xmin(j) + 1,j) * yfac10(j) + hxx(xmin(j) + 1,j) * yfac20(j) + wz(3,j) = gxx(xmin(j) + 2,j) * yfac10(j) + hxx(xmin(j) + 2,j) * yfac20(j) + wz(4,j) = gxx(xmin(j),j) * yfac11(j) + hxx(xmin(j),j) * yfac21(j) + wz(5,j) = gxx(xmin(j) + 1,j) * yfac11(j) + hxx(xmin(j) + 1,j) * yfac21(j) + wz(6,j) = gxx(xmin(j) + 2,j) * yfac11(j) + hxx(xmin(j) + 2,j) * yfac21(j) + wz(7,j) = gxx(xmin(j),j) * yfac12(j) + hxx(xmin(j),j) * yfac22(j) + wz(8,j) = gxx(xmin(j) + 1,j) * yfac12(j) + hxx(xmin(j) + 1,j) * yfac22(j) + wz(9,j) = gxx(xmin(j) + 2,j) * yfac12(j) + hxx(xmin(j) + 2,j) * yfac22(j) + + cx(j) = cell_x1(jj) + xmin(j) + cy(j) = cell_y1(jj) + ymin(j) + + END DO + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + ic(j) = cx(j) + (cy(j) - 1) * nx + + END DO + + DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) + + jxh(1,ic(j)) = jxh(1,ic(j)) - fjx(j) * wx(1,j) + jxh(2,ic(j)) = jxh(2,ic(j)) - fjx(j) * (wx(1,j) + wx(2,j)) + jxh(3,ic(j)) = jxh(3,ic(j)) - fjx(j) * (wx(1,j) + wx(2,j) + wx(3,j)) + jxh(4,ic(j)) = jxh(4,ic(j)) - fjx(j) * wx(4,j) + jxh(5,ic(j)) = jxh(5,ic(j)) - fjx(j) * (wx(4,j) + wx(5,j)) + jxh(6,ic(j)) = jxh(6,ic(j)) - fjx(j) * (wx(4,j) + wx(5,j) + wx(6,j)) + jxh(7,ic(j)) = jxh(7,ic(j)) - fjx(j) * wx(7,j) + jxh(8,ic(j)) = jxh(8,ic(j)) - fjx(j) * (wx(7,j) + wx(8,j)) + jxh(9,ic(j)) = jxh(9,ic(j)) - fjx(j) * (wx(7,j) + wx(8,j) + wx(9,j)) + + jyh(1,ic(j)) = jyh(1,ic(j)) - fjy(j) * wy(1,j) + jyh(2,ic(j)) = jyh(2,ic(j)) - fjy(j) * wy(2,j) + jyh(3,ic(j)) = jyh(3,ic(j)) - fjy(j) * wy(3,j) + jyh(4,ic(j)) = jyh(4,ic(j)) - fjy(j) * (wy(1,j) + wy(4,j)) + jyh(5,ic(j)) = jyh(5,ic(j)) - fjy(j) * (wy(2,j) + wy(5,j)) + jyh(6,ic(j)) = jyh(6,ic(j)) - fjy(j) * (wy(3,j) + wy(6,j)) + jyh(7,ic(j)) = jyh(7,ic(j)) - fjy(j) * (wy(1,j) + wy(4,j) + wy(7,j)) + jyh(8,ic(j)) = jyh(8,ic(j)) - fjy(j) * (wy(2,j) + wy(5,j) + wy(8,j)) + jyh(9,ic(j)) = jyh(9,ic(j)) - fjy(j) * (wy(3,j) + wy(6,j) + wy(9,j)) + + jzh(1,ic(j)) = jzh(1,ic(j)) + fjz(j) * wz(1,j) + jzh(2,ic(j)) = jzh(2,ic(j)) + fjz(j) * wz(2,j) + jzh(3,ic(j)) = jzh(3,ic(j)) + fjz(j) * wz(3,j) + jzh(4,ic(j)) = jzh(4,ic(j)) + fjz(j) * wz(4,j) + jzh(5,ic(j)) = jzh(5,ic(j)) + fjz(j) * wz(5,j) + jzh(6,ic(j)) = jzh(6,ic(j)) + fjz(j) * wz(6,j) + jzh(7,ic(j)) = jzh(7,ic(j)) + fjz(j) * wz(7,j) + jzh(8,ic(j)) = jzh(8,ic(j)) + fjz(j) * wz(8,j) + jzh(9,ic(j)) = jzh(9,ic(j)) + fjz(j) * wz(9,j) + + END DO + + END DO ! End do-loop for index i + + ! Deposit current on the cells + + DO j = 1, ny + DO i = 1, nx + iic = (j - 1) * nx + i + + jx(i,j) = jx(i,j) + jxh(1,iic) + jx(i + 1,j) = jx(i + 1,j) + jxh(2,iic) + jx(i + 2,j) = jx(i + 2,j) + jxh(3,iic) + jx(i,j + 1) = jx(i, j + 1) + jxh(4,iic) + jx(i + 1,j + 1) = jx(i + 1, j + 1) + jxh(5,iic) + jx(i + 2,j + 1) = jx(i + 2, j + 1) + jxh(6,iic) + jx(i,j + 2) = jx(i,j + 2) + jxh(7,iic) + jx(i + 1,j + 2) = jx(i + 1,j + 2) + jxh(8,iic) + jx(i + 2,j + 2) = jx(i + 2,j + 2) + jxh(9,iic) + + jy(i,j) = jy(i,j) + jyh(1,iic) + jy(i + 1,j) = jy(i + 1,j) + jyh(2,iic) + jy(i + 2,j) = jy(i + 2,j) + jyh(3,iic) + jy(i,j + 1) = jy(i, j + 1) + jyh(4,iic) + jy(i + 1,j + 1) = jy(i + 1, j + 1) + jyh(5,iic) + jy(i + 2,j + 1) = jy(i + 2, j + 1) + jyh(6,iic) + jy(i,j + 2) = jy(i,j + 2) + jyh(7,iic) + jy(i + 1,j + 2) = jy(i + 1,j + 2) + jyh(8,iic) + jy(i + 2,j + 2) = jy(i + 2,j + 2) + jyh(9,iic) + + jz(i,j) = jz(i,j) + jzh(1,iic) + jz(i + 1,j) = jz(i + 1,j) + jzh(2,iic) + jz(i + 2,j) = jz(i + 2,j) + jzh(3,iic) + jz(i,j + 1) = jz(i, j + 1) + jzh(4,iic) + jz(i + 1,j + 1) = jz(i + 1, j + 1) + jzh(5,iic) + jz(i + 2,j + 1) = jz(i + 2, j + 1) + jzh(6,iic) + jz(i,j + 2) = jz(i,j + 2) + jzh(7,iic) + jz(i + 1,j + 2) = jz(i + 1,j + 2) + jzh(8,iic) + jz(i + 2,j + 2) = jz(i + 2,j + 2) + jzh(9,iic) + + END DO + END DO + +END SUBROUTINE triangle_current_deposition From 1845a042890a43d13cdc34cf30702fc10f4f0670 Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Fri, 5 Mar 2021 12:04:12 +0100 Subject: [PATCH 091/106] Added modified f0 --- epoch2d/src/current_deposition.F90 | 33 ++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/epoch2d/src/current_deposition.F90 b/epoch2d/src/current_deposition.F90 index 2e105e620..d8cacfd2b 100644 --- a/epoch2d/src/current_deposition.F90 +++ b/epoch2d/src/current_deposition.F90 @@ -734,6 +734,39 @@ SUBROUTINE triangle_current_deposition() END SUBROUTINE triangle_current_deposition + FUNCTION f0(ispecies, mass, px, py, pz) + + INTEGER, INTENT(IN) :: ispecies + REAL(num), INTENT(IN) :: mass + REAL(num), INTENT(IN) :: px, py, pz + REAL(num) :: f0 + REAL(num) :: Tx, Ty, Tz, driftx, drifty, driftz, density + REAL(num) :: f0_exponent, norm, two_kb_mass, two_pi_kb_mass3 + TYPE(particle_species), POINTER :: species + + species => species_list(ispecies) + + IF (ABS(species%initial_conditions%density_back) > c_tiny) THEN + two_kb_mass = 2.0_num * kb * mass + two_pi_kb_mass3 = (pi * two_kb_mass)**3 + + Tx = species%initial_conditions%temp_back(1) + Ty = species%initial_conditions%temp_back(2) + Tz = species%initial_conditions%temp_back(3) + driftx = species%initial_conditions%drift_back(1) + drifty = species%initial_conditions%drift_back(2) + driftz = species%initial_conditions%drift_back(3) + density = species%initial_conditions%density_back + f0_exponent = ((px - driftx)**2 / Tx & + + (py - drifty)**2 / Ty & + + (pz - driftz)**2 / Tz) / two_kb_mass + norm = density / SQRT(two_pi_kb_mass3 * Tx * Ty * Tz) + f0 = norm * EXP(-f0_exponent) + ELSE + f0 = 0.0_num + END IF + + END FUNCTION f0 From 7a23418ac2ba564cf41b761c6fbec381c5a06ac2 Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Fri, 5 Mar 2021 12:08:46 +0100 Subject: [PATCH 092/106] Minor typo --- epoch2d/src/current_deposition.F90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/epoch2d/src/current_deposition.F90 b/epoch2d/src/current_deposition.F90 index d8cacfd2b..34a810f03 100644 --- a/epoch2d/src/current_deposition.F90 +++ b/epoch2d/src/current_deposition.F90 @@ -43,7 +43,7 @@ SUBROUTINE particle_sorting() DO ipart = 1, species_list(ispecies)%attached_list%count next => current%next k = tile_id(ipart) - g_índx(k) = g_indx(k) + 1 ! Rearranged particle index with respect to the bins + g_indx(k) = g_indx(k) + 1 ! Rearranged particle index with respect to the bins #ifndef PER_SPECIES_WEIGHT w(g_indx(k)) = current%weight From 2458a650428add56b2e0fc20f7d0e7871d577e4f Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Wed, 10 Mar 2021 13:42:54 +0100 Subject: [PATCH 093/106] First attempt to current vectorization --- epoch2d/src/particles.F90 | 85 ++++++++++++++++++++++++++++----------- 1 file changed, 61 insertions(+), 24 deletions(-) diff --git a/epoch2d/src/particles.F90 b/epoch2d/src/particles.F90 index 473bfb314..f4ce4c4d5 100644 --- a/epoch2d/src/particles.F90 +++ b/epoch2d/src/particles.F90 @@ -76,7 +76,12 @@ SUBROUTINE push_particles ! Used by J update INTEGER :: xmin, xmax, ymin, ymax + INTEGER :: i, k REAL(num) :: wx, wy, wz + REAL(num), DIMENSION(-2:2,-2:2) :: wwx, wwy, wwz + REAL(num), DIMENSION(-3:2,-2:2) :: jxi + REAL(num), DIMENSION(-2:2,-3:2) :: jyi + REAL(num), DIMENSION(-2:2,-2:2) :: jzi ! Temporary variables REAL(num) :: idty, idtx, idxy @@ -446,33 +451,65 @@ SUBROUTINE push_particles fjy = fcy * part_q fjz = fcz * part_q * part_vz - jyh = 0.0_num - DO iy = ymin, ymax - cy = cell_y1 + iy - yfac1 = gy(iy) + 0.5_num * hy(iy) - yfac2 = third * hy(iy) + 0.5_num * gy(iy) - - hy_iy = hy(iy) - - jxh = 0.0_num - DO ix = xmin, xmax - cx = cell_x1 + ix - xfac1 = gx(ix) + 0.5_num * hx(ix) +! jyh = 0.0_num +! DO iy = ymin, ymax +! cy = cell_y1 + iy +! yfac1 = gy(iy) + 0.5_num * hy(iy) +! yfac2 = third * hy(iy) + 0.5_num * gy(iy) +! +! hy_iy = hy(iy) +! +! jxh = 0.0_num +! DO ix = xmin, xmax +! cx = cell_x1 + ix +! xfac1 = gx(ix) + 0.5_num * hx(ix) +! +! wx = hx(ix) * yfac1 +! wy = hy_iy * xfac1 +! wz = gx(ix) * yfac1 + hx(ix) * yfac2 +! +! ! This is the bit that actually solves d(rho)/dt = -div(J) +! jxh = jxh - fjx * wx +! jyh(ix) = jyh(ix) - fjy * wy +! jzh = fjz * wz +! +! jx(cx, cy) = jx(cx, cy) + jxh +! jy(cx, cy) = jy(cx, cy) + jyh(ix) +! jz(cx, cy) = jz(cx, cy) + jzh +! END DO +! END DO + + DO k = -2,2 + DO i = -2,2 + wwx(i,k)=hx(i)*(gy(k)+0.5*hy(k)) + wwy(i,k)=hy(k)*(gx(i)+0.5*hx(i)) + wwz(i,k)=gx(i)*gy(k) & + +0.5*hx(i)*gy(k) & + +0.5*gx(i)*hy(k) & + +third*hx(i)*hy(k) + END DO + END DO - wx = hx(ix) * yfac1 - wy = hy_iy * xfac1 - wz = gx(ix) * yfac1 + hx(ix) * yfac2 + DO k = -2,2 + DO i = -2,2 + jxi(i,k) = jxi(i-1,k) - fjx * wwx(i,k) + jyi(i,k) = jyi(i,k-1) - fjy * wwy(i,k) + jzi(i,k) = fjz * wwz(i,k) + END DO + END DO - ! This is the bit that actually solves d(rho)/dt = -div(J) - jxh = jxh - fjx * wx - jyh(ix) = jyh(ix) - fjy * wy - jzh = fjz * wz + + DO k = -2,2 + DO i = -2,2 + jx(cell_x1+i,cell_y1+k)=jxi(cell_x1+i,cell_y1+k) & + + jxi(i,k) + jy(cell_x1+i,cell_y1+k)=jyi(cell_x1+i,cell_y1+k) & + + jyi(i,k) + jz(cell_x1+i,cell_y1+k)=jzi(cell_x1+i,cell_y1+k) & + + jzi(i,k) + END DO + END DO - jx(cx, cy) = jx(cx, cy) + jxh - jy(cx, cy) = jy(cx, cy) + jyh(ix) - jz(cx, cy) = jz(cx, cy) + jzh - END DO - END DO #ifdef ZERO_CURRENT_PARTICLES END IF #endif From 426b5f1c1049e49fc3721bd39ff290c5d36c5494 Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Wed, 10 Mar 2021 14:08:40 +0100 Subject: [PATCH 094/106] Correction in mem. alloc --- epoch2d/src/particles.F90 | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/epoch2d/src/particles.F90 b/epoch2d/src/particles.F90 index f4ce4c4d5..bbf0f439e 100644 --- a/epoch2d/src/particles.F90 +++ b/epoch2d/src/particles.F90 @@ -479,6 +479,10 @@ SUBROUTINE push_particles ! END DO ! END DO + jxi = 0.0 + jyi = 0.0 + jzi = 0.0 + DO k = -2,2 DO i = -2,2 wwx(i,k)=hx(i)*(gy(k)+0.5*hy(k)) @@ -501,11 +505,11 @@ SUBROUTINE push_particles DO k = -2,2 DO i = -2,2 - jx(cell_x1+i,cell_y1+k)=jxi(cell_x1+i,cell_y1+k) & + jx(cell_x1+i,cell_y1+k)=jx(cell_x1+i,cell_y1+k) & + jxi(i,k) - jy(cell_x1+i,cell_y1+k)=jyi(cell_x1+i,cell_y1+k) & + jy(cell_x1+i,cell_y1+k)=jy(cell_x1+i,cell_y1+k) & + jyi(i,k) - jz(cell_x1+i,cell_y1+k)=jzi(cell_x1+i,cell_y1+k) & + jz(cell_x1+i,cell_y1+k)=jz(cell_x1+i,cell_y1+k) & + jzi(i,k) END DO END DO From 648825830e0a16eed19366914f129089d54d7a0b Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Wed, 10 Mar 2021 16:38:23 +0100 Subject: [PATCH 095/106] Adjusting the cell range from static to dynamic --- epoch2d/src/particles.F90 | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/epoch2d/src/particles.F90 b/epoch2d/src/particles.F90 index bbf0f439e..4cff5258f 100644 --- a/epoch2d/src/particles.F90 +++ b/epoch2d/src/particles.F90 @@ -479,12 +479,9 @@ SUBROUTINE push_particles ! END DO ! END DO - jxi = 0.0 - jyi = 0.0 - jzi = 0.0 - DO k = -2,2 - DO i = -2,2 + DO k = ymin,ymax + DO i = xmin,xmax wwx(i,k)=hx(i)*(gy(k)+0.5*hy(k)) wwy(i,k)=hy(k)*(gx(i)+0.5*hx(i)) wwz(i,k)=gx(i)*gy(k) & @@ -494,8 +491,8 @@ SUBROUTINE push_particles END DO END DO - DO k = -2,2 - DO i = -2,2 + DO k = ymin,ymax + DO i = xmin,xmax jxi(i,k) = jxi(i-1,k) - fjx * wwx(i,k) jyi(i,k) = jyi(i,k-1) - fjy * wwy(i,k) jzi(i,k) = fjz * wwz(i,k) @@ -503,8 +500,8 @@ SUBROUTINE push_particles END DO - DO k = -2,2 - DO i = -2,2 + DO k = ymin,ymax + DO i = xmin,xmax jx(cell_x1+i,cell_y1+k)=jx(cell_x1+i,cell_y1+k) & + jxi(i,k) jy(cell_x1+i,cell_y1+k)=jy(cell_x1+i,cell_y1+k) & From b1dc5d20ae3ea703221dbc55ccb9c990ee4d2139 Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Wed, 24 Mar 2021 16:43:51 +0100 Subject: [PATCH 096/106] Falling back to basic push_particles --- epoch2d/Makefile | 4 +- epoch2d/src/particles.F90 | 87 +++++++++++---------------------------- 2 files changed, 27 insertions(+), 64 deletions(-) diff --git a/epoch2d/Makefile b/epoch2d/Makefile index 4aa233b0d..b2f37cd86 100644 --- a/epoch2d/Makefile +++ b/epoch2d/Makefile @@ -50,11 +50,11 @@ endif # Intel # ===== ifeq ($(strip $(COMPILER)),intel) - FFLAGS = -O3 -g -stand f03 + FFLAGS = -O3 -g -stand f03 -qopenmp ifeq ($(strip $(CONS)),1) FLTCONS = -pc64 -fltconsistency endif - #FFLAGS = -O3 -heap-arrays 64 -ipo -xHost # Optimised (B) + #FFLAGS = -O3 -heap-arrays 64 -qopt-report-phase=vec -qopt-report=5 -qopenmp # Optimised (B) #FFLAGS = -O3 -heap-arrays 64 -ipo -xAVX # Optimised (W) ifeq ($(strip $(MODE)),debug) FFLAGS = -O0 -fpe0 -nothreads -traceback -fltconsistency \ diff --git a/epoch2d/src/particles.F90 b/epoch2d/src/particles.F90 index 4cff5258f..4c185b229 100644 --- a/epoch2d/src/particles.F90 +++ b/epoch2d/src/particles.F90 @@ -76,12 +76,7 @@ SUBROUTINE push_particles ! Used by J update INTEGER :: xmin, xmax, ymin, ymax - INTEGER :: i, k REAL(num) :: wx, wy, wz - REAL(num), DIMENSION(-2:2,-2:2) :: wwx, wwy, wwz - REAL(num), DIMENSION(-3:2,-2:2) :: jxi - REAL(num), DIMENSION(-2:2,-3:2) :: jyi - REAL(num), DIMENSION(-2:2,-2:2) :: jzi ! Temporary variables REAL(num) :: idty, idtx, idxy @@ -451,66 +446,33 @@ SUBROUTINE push_particles fjy = fcy * part_q fjz = fcz * part_q * part_vz -! jyh = 0.0_num -! DO iy = ymin, ymax -! cy = cell_y1 + iy -! yfac1 = gy(iy) + 0.5_num * hy(iy) -! yfac2 = third * hy(iy) + 0.5_num * gy(iy) -! -! hy_iy = hy(iy) -! -! jxh = 0.0_num -! DO ix = xmin, xmax -! cx = cell_x1 + ix -! xfac1 = gx(ix) + 0.5_num * hx(ix) -! -! wx = hx(ix) * yfac1 -! wy = hy_iy * xfac1 -! wz = gx(ix) * yfac1 + hx(ix) * yfac2 -! -! ! This is the bit that actually solves d(rho)/dt = -div(J) -! jxh = jxh - fjx * wx -! jyh(ix) = jyh(ix) - fjy * wy -! jzh = fjz * wz -! -! jx(cx, cy) = jx(cx, cy) + jxh -! jy(cx, cy) = jy(cx, cy) + jyh(ix) -! jz(cx, cy) = jz(cx, cy) + jzh -! END DO -! END DO - - - DO k = ymin,ymax - DO i = xmin,xmax - wwx(i,k)=hx(i)*(gy(k)+0.5*hy(k)) - wwy(i,k)=hy(k)*(gx(i)+0.5*hx(i)) - wwz(i,k)=gx(i)*gy(k) & - +0.5*hx(i)*gy(k) & - +0.5*gx(i)*hy(k) & - +third*hx(i)*hy(k) - END DO - END DO + jyh = 0.0_num + DO iy = ymin, ymax + cy = cell_y1 + iy + yfac1 = gy(iy) + 0.5_num * hy(iy) + yfac2 = third * hy(iy) + 0.5_num * gy(iy) - DO k = ymin,ymax - DO i = xmin,xmax - jxi(i,k) = jxi(i-1,k) - fjx * wwx(i,k) - jyi(i,k) = jyi(i,k-1) - fjy * wwy(i,k) - jzi(i,k) = fjz * wwz(i,k) - END DO - END DO + hy_iy = hy(iy) - - DO k = ymin,ymax - DO i = xmin,xmax - jx(cell_x1+i,cell_y1+k)=jx(cell_x1+i,cell_y1+k) & - + jxi(i,k) - jy(cell_x1+i,cell_y1+k)=jy(cell_x1+i,cell_y1+k) & - + jyi(i,k) - jz(cell_x1+i,cell_y1+k)=jz(cell_x1+i,cell_y1+k) & - + jzi(i,k) - END DO - END DO + jxh = 0.0_num + DO ix = xmin, xmax + cx = cell_x1 + ix + xfac1 = gx(ix) + 0.5_num * hx(ix) + + wx = hx(ix) * yfac1 + wy = hy_iy * xfac1 + wz = gx(ix) * yfac1 + hx(ix) * yfac2 + ! This is the bit that actually solves d(rho)/dt = -div(J) + jxh = jxh - fjx * wx + jyh(ix) = jyh(ix) - fjy * wy + jzh = fjz * wz + + jx(cx, cy) = jx(cx, cy) + jxh + jy(cx, cy) = jy(cx, cy) + jyh(ix) + jz(cx, cy) = jz(cx, cy) + jzh + END DO + END DO #ifdef ZERO_CURRENT_PARTICLES END IF #endif @@ -818,3 +780,4 @@ SUBROUTINE rotate_p(part, cos_theta, phi, part_p) END SUBROUTINE rotate_p END MODULE particles + From 287ca9d2224a19a574addddba63c1767655d922e Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Fri, 3 Sep 2021 09:33:38 +0200 Subject: [PATCH 097/106] Update to EPOCH coding style --- epoch2d/src/boundary.F90 | 6 ++++++ epoch2d/src/housekeeping/window.F90 | 10 +++++++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/epoch2d/src/boundary.F90 b/epoch2d/src/boundary.F90 index be32713aa..4e847fdbe 100644 --- a/epoch2d/src/boundary.F90 +++ b/epoch2d/src/boundary.F90 @@ -314,6 +314,8 @@ SUBROUTINE do_field_mpi_with_lengths(field, ng, nx_local, ny_local) END SUBROUTINE do_field_mpi_with_lengths + + SUBROUTINE all_comp_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & ny_local) @@ -486,6 +488,8 @@ SUBROUTINE all_comp_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & END SUBROUTINE all_comp_field_bc + + SUBROUTINE load_field_boundaries_to_buffer(field, buffer, & xmin, xmax, ymin, ymax, offset) @@ -522,6 +526,8 @@ SUBROUTINE unload_field_boundaries_from_buffer(field, buffer, & END SUBROUTINE unload_field_boundaries_from_buffer + + SUBROUTINE do_field_mpi_with_lengths_r4(field, ng, nx_local, ny_local) INTEGER, INTENT(IN) :: ng diff --git a/epoch2d/src/housekeeping/window.F90 b/epoch2d/src/housekeeping/window.F90 index 7ceb99657..d1c4faf7f 100644 --- a/epoch2d/src/housekeeping/window.F90 +++ b/epoch2d/src/housekeeping/window.F90 @@ -92,6 +92,7 @@ SUBROUTINE shift_window(window_shift_cells) END SUBROUTINE shift_window + SUBROUTINE shift_fields(window_shift_cells) INTEGER :: j @@ -183,6 +184,7 @@ SUBROUTINE shift_fields(window_shift_cells) END SUBROUTINE shift_fields + SUBROUTINE shift_field(field, ng, window_shift_cells) INTEGER, INTENT(IN) :: ng, window_shift_cells @@ -192,7 +194,7 @@ SUBROUTINE shift_field(field, ng, window_shift_cells) ! Shift field to the left by window_shift_cells DO j = 1-ng, ny+ng DO i = 1-ng, nx+ng-window_shift_cells - field(i,j) = field(i+window_shift_cells, j) + field(i,j) = field(i+window_shift_cells,j) END DO END DO !CALL field_bc(field, ng) @@ -200,6 +202,7 @@ SUBROUTINE shift_field(field, ng, window_shift_cells) END SUBROUTINE shift_field + SUBROUTINE moving_window_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & ny_local) @@ -436,8 +439,9 @@ SUBROUTINE remove_particles END SUBROUTINE remove_particles #endif + + SUBROUTINE mw_io_test(step, dump) -! USE diagnostics USE deck_io_block INTEGER, INTENT(IN) :: step @@ -495,8 +499,8 @@ SUBROUTINE mw_io_test(step, dump) END SUBROUTINE mw_io_test + SUBROUTINE moving_window(step) -! USE diagnostics USE deck_io_block #ifndef PER_SPECIES_WEIGHT From 752a5ddac7dd3ce47d35958c96cf903a21f9076d Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Fri, 3 Sep 2021 09:35:31 +0200 Subject: [PATCH 098/106] Removed dummy routine --- epoch2d/src/current_deposition.F90 | 783 ----------------------------- 1 file changed, 783 deletions(-) delete mode 100644 epoch2d/src/current_deposition.F90 diff --git a/epoch2d/src/current_deposition.F90 b/epoch2d/src/current_deposition.F90 deleted file mode 100644 index 34a810f03..000000000 --- a/epoch2d/src/current_deposition.F90 +++ /dev/null @@ -1,783 +0,0 @@ -SUBROUTINE particle_sorting() - - dx_bin = 5 * dx ! Number of cells in bin along x - dy_bin = 5 * dy ! Number of cells in bin along y - - idx_bin = 1./dx_bin ! Inverse of dx_bin - idy_bin = 1./dy_bin ! Inverse of dy_bin - - nx_bin = ceiling((x_grid_max_local - x_grid_min_local) * idx_bin) ! Number of bins along x - ny_bin = ceiling((y_grid_max_local - y_grid_min_local) * idy_bin) ! Number of bins along y - - n_bins = nx_bin * ny_bin ! Total number of bins - - ! Calculate particle positions in terms of the bin co-ordinates - - DO ipart = 1, species_list(ispecies)%attached_list%count - next => current%next - part_x = (current%part_pos(1) - x_grid_min_local) * idx_bin - part_y = (current%part_pos(2) - y_grid_min_local) * idy_bin - - ix = floor(part_x) ! x-coordinate of the particle bin - iy = floor(part_y) ! y-coordinate of the particle bin - - tile_id(ipart) = iy * nx_bin + ix + 1 ! 1-D coordinate of the bins - - num(tile_id(ipart)) = num(tile_id(ipart)) + 1 ! Number of particles in each bin - - current => next - - END DO ! End do-loop for particle position in terms of bin co-ordinates - - k = 0 - - ! Determine the stride of particle indices in bins - - DO i = 1, n_bins - g_indx(i) = k ! Starting particle index for a particular bin - k = k + num(i) - END DO ! End do-loop for the stride of particle indices in bins - - ! Particle sorting in 1-D bins - - DO ipart = 1, species_list(ispecies)%attached_list%count - next => current%next - k = tile_id(ipart) - g_indx(k) = g_indx(k) + 1 ! Rearranged particle index with respect to the bins - -#ifndef PER_SPECIES_WEIGHT - w(g_indx(k)) = current%weight - fcx(g_indx(k)) = idty * w(g_indx(k)) - fcy(g_indx(k)) = idtx * w(g_indx(k)) - fcz(g_indx(k)) = idtxy * w(g_indx(k)) -#endif - -#ifndef NO_PARTICLE_PROBES - init_x(g_indx(k)) = current%part_pos(1) - init_y(g_indx(k)) = current%part_pos(2) -#endif - -#ifdef PER_PARTICLE_CHARGE_MASS - q(g_indx(k)) = current%charge - m(g_indx(k)) = current%mass - mc(g_indx(k)) = c * current%mass - i_mc(g_indx(k)) = 1.0_num / mc - cmratio(g_indx(k)) = q(g_indx(k)) * dtfac * i_mc(g_indx(k)) - ccmratio(g_indx(k)) = c * cmratio(g_indx(k)) -#ifndef NO_PARTICLE_PROBES - mc2(g_indx(k)) = c * mc(g_indx(k)) -#endif -#endif - - ! Copy the particle properties out for sorting - x(g_indx(k)) = current%part_pos(1) - x_grid_min_local - y(g_indx(k)) = current%part_pos(2) - y_grid_min_local - px(g_indx(k)) = current%part_p(1) * ipart_mc - py(g_indx(k)) = current%part_p(2) * ipart_mc - pz(g_indx(k)) = current%part_p(3) * ipart_mc - pvol(g_indx(k)) = current%pvol - gamma_rel(g_indx(k)) = SQRT(px(g_indx(k))**2 + py(g_indx(k))**2 + pz(g_indx(k))**2 + 1.0_num) - root(g_indx(k)) = dtco2 / gamma_rel(g_indx(k)) - current => next - END DO ! End do-loop for particle sorting - -END SUBROUTINE particle_sorting - -SUBROUTINE particle_pusher() - - DO i = 1, species_list(ispecies)%attached_list%count, LVEC - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - jj = i + j - 1 - x(jj) = x(jj) + px(jj) * root(jj) - y(jj) = y(jj) + py(jj) * root(jj) - -#ifdef WORK_DONE_INTEGRATED - ! This is the actual total work done by the fields: Results correspond - ! with the electron's gamma factor - - root(jj) = cmratio(jj) / gamma_rel(jj) - - tmp_x(j) = px(jj) * root(jj) - tmp_y(j) = py(jj) * root(jj) - tmp_z(j) = pz(jj) * root(jj) -#endif - - END DO ! End do-loop for j - - ! Calculate fields at particle positions - ! Grid cell position as a fraction - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - jj = i + j - 1 - cell_x_r(j) = x(jj) * idx - cell_y_r(j) = y(jj) * idy - - END DO ! End do-loop for grid cell position as fraction - - ! Round cell position to nearest cell - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - jj = i + j - 1 - cell_x1(jj) = FLOOR(cell_x_r(j) + 0.5_num) - cell_y1(jj) = FLOOR(cell_y_r(j) + 0.5_num) - - cell_x2(j) = FLOOR(cell_x_r(j)) - cell_y2(j) = FLOOR(cell_y_r(j)) - - END DO ! End do-loop for nearest cell position - - ! Calculate fraction of cell between nearest cell boundary and particle - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - jj = i + j - 1 - cell_frac_x(j) = REAL(cell_x1(jj), num) - cell_x_r(j) - cell_frac_y(j) = REAL(cell_y1(jj), num) - cell_y_r(j) - - END DO ! End do-loop for grid cell position fraction - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - cfx2(j) = cell_frac_x(j)**2 - cfy2(j) = cell_frac_y(j)**2 - - END DO - - DO j = 1, 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - jj = i + j - 1 - cell_x1(jj) = cell_x1(jj) + 1 - cell_y1(jj) = cell_y1(jj) + 1 - - ! Particle weight factors as described in Page 25 of the PSC manual - ! These weight grid properties onto particles - ! Also used to weight particle properties onto grid, used later to calculate J - ! NOTE: These weights require an additional multiplication factor - - ! This weighing is for triangle shaped particles - - gxx(-1,j) = 0.25_num + cfx2(j) + cell_frac_x(j) - gxx( 0,j) = 1.5_num - 2.0_num * cfx2(j) - gxx( 1,j) = 0.25_num + cfx2(j) - cell_frac_x(j) - - gyy(-1,j) = 0.25_num - cfy2(j) + cell_frac_y(j) - gyy( 0,j) = 1.5_num - 2.0_num * cfy2(j) - gyy( 1,j) = 0.25_num + cfy2(j) - cell_frac_y(j) - - ! Now redo shifted by half a cell due to grid stagger - ! Use shifted version for ex in X, ey in Y, ez in Z - ! And in Y&Z for bx, X&Z for by, X&Y for bz - - END DO ! End do-loop with gxx - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - cell_frac_x(j) = REAL(cell_x2(j), num) - cell_x_r(j) + 0.5_num - cell_frac_y(j) = REAL(cell_y2(j), num) - cell_y_r(j) + 0.5_num - - cell_x2(j) = cell_x2(j) + 1 - cell_y2(j) = cell_y2(j) + 1 - - END DO ! End do-loop for re-doing cell_frac_(x,y) - - dcellx = 0 - dcelly = 0 - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - cfx2(j) = cell_frac_x(j)**2 - cfy2(j) = cell_frac_y(j)**2 - - END DO - - ! Calculating hxx - ! NOTE: These weights require an additional multiplication factor - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - hxx(dcellx(j)-1,j) = 0.25_num - cfx2(j) + cell_frac_x(j) - hxx(dcellx(j) ,j) = 1.5_num - 2.0_num * cfx2(j) - hxx(dcellx(j)+1,j) = 0.25_num + cfx2(j) - cell_frac_x(j) - - hyy(dcelly(j)-1,j) = 0.25_num + cfy2(j) + cell_frac_y(j) - hyy(dcelly(j) ,j) = 1.5_num - 2.0_num * cfy2(j) - hyy(dcelly(j)+1,j) = 0.25_num + cfy2(j) - cell_frac_y(j) - - END DO ! End do-loop for hxx - - ! These are the electric and magnetic fields interpolated to the - ! particle position. They have been checked and are correct. - ! Actually checking this is messy - - ! Calculate e-fields at particle position for triangle particles - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - ex_part(j) = & - gyy(-1,j) * (hxx(-1,j) * ex(cell_x2(j)-1,cell_y1(j)-1) & - + hxx( 0,j) * ex(cell_x2(j) ,cell_y1(j)-1) & - + hxx( 1,j) * ex(cell_x2(j)+1,cell_y1(j)-1)) & - + gyy( 0,j) * (hxx(-1,j) * ex(cell_x2(j)-1,cell_y1(j) ) & - + hxx( 0,j) * ex(cell_x2(j) ,cell_y1(j) ) & - + hxx( 1,j) * ex(cell_x2(j)+1,cell_y1(j) )) & - + gyy( 1,j) * (hxx(-1,j) * ex(cell_x2(j)-1,cell_y1(j)+1) & - + hxx( 0,j) * ex(cell_x2(j) ,cell_y1(j)+1) & - + hxx( 1,j) * ex(cell_x2(j)+1,cell_y1(j)+1)) - - ey_part(j) = & - hyy(-1,j) * (gxx(-1,j) * ey(cell_x1(j)-1,cell_y2(j)-1) & - + gxx( 0,j) * ey(cell_x1(j) ,cell_y2(j)-1) & - + gxx( 1,j) * ey(cell_x1(j)+1,cell_y2(j)-1)) & - + hyy( 0,j) * (gxx(-1,j) * ey(cell_x1(j)-1,cell_y2(j) ) & - + gxx( 0,j) * ey(cell_x1(j) ,cell_y2(j) ) & - + gxx( 1,j) * ey(cell_x1(j)+1,cell_y2(j) )) & - + hyy( 1,j) * (gxx(-1,j) * ey(cell_x1(j)-1,cell_y2(j)+1) & - + gxx( 0,j) * ey(cell_x1(j) ,cell_y2(j)+1) & - + gxx( 1,j) * ey(cell_x1(j)+1,cell_y2(j)+1)) - - ez_part(j) = & - gyy(-1,j) * (gxx(-1,j) * ez(cell_x1(j)-1,cell_y1(j)-1) & - + gxx( 0,j) * ez(cell_x1(j) ,cell_y1(j)-1) & - + gxx( 1,j) * ez(cell_x1(j)+1,cell_y1(j)-1)) & - + gyy( 0,j) * (gxx(-1,j) * ez(cell_x1(j)-1,cell_y1(j) ) & - + gxx( 0,j) * ez(cell_x1(j) ,cell_y1(j) ) & - + gxx( 1,j) * ez(cell_x1(j)+1,cell_y1(j) )) & - + gyy( 1,j) * (gxx(-1,j) * ez(cell_x1(j)-1,cell_y1(j)+1) & - + gxx( 0,j) * ez(cell_x1(j) ,cell_y1(j)+1) & - + gxx( 1,j) * ez(cell_x1(j)+1,cell_y1(j)+1)) - - END DO ! End do-loop for e-fields at particle position - - ! Calculate b-fields at particle position for triangle particles - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - bx_part(j) = & - hyy(-1,j) * (gxx(-1,j) * bx(cell_x1(j)-1,cell_y2(j)-1) & - + gxx( 0,j) * bx(cell_x1(j) ,cell_y2(j)-1) & - + gxx( 1,j) * bx(cell_x1(j)+1,cell_y2(j)-1)) & - + hyy( 0,j) * (gxx(-1,j) * bx(cell_x1(j)-1,cell_y2(j) ) & - + gxx( 0,j) * bx(cell_x1(j) ,cell_y2(j) ) & - + gxx( 1,j) * bx(cell_x1(j)+1,cell_y2(j) )) & - + hyy( 1,j) * (gxx(-1,j) * bx(cell_x1(j)-1,cell_y2(j)+1) & - + gxx( 0,j) * bx(cell_x1(j) ,cell_y2(j)+1) & - + gxx( 1,j) * bx(cell_x1(j)+1,cell_y2(j)+1)) - - by_part(j) = & - gyy(-1,j) * (hxx(-1,j) * by(cell_x2(j)-1,cell_y1(j)-1) & - + hxx( 0,j) * by(cell_x2(j) ,cell_y1(j)-1) & - + hxx( 1,j) * by(cell_x2(j)+1,cell_y1(j)-1)) & - + gyy( 0,j) * (hxx(-1,j) * by(cell_x2(j)-1,cell_y1(j) ) & - + hxx( 0,j) * by(cell_x2(j) ,cell_y1(j) ) & - + hxx( 1,j) * by(cell_x2(j)+1,cell_y1(j) )) & - + gyy( 1,j) * (hxx(-1,j) * by(cell_x2(j)-1,cell_y1(j)+1) & - + hxx( 0,j) * by(cell_x2(j) ,cell_y1(j)+1) & - + hxx( 1,j) * by(cell_x2(j)+1,cell_y1(j)+1)) - - bz_part(j) = & - hyy(-1,j) * (hxx(-1,j) * bz(cell_x2(j)-1,cell_y2(j)-1) & - + hxx( 0,j) * bz(cell_x2(j) ,cell_y2(j)-1) & - + hxx( 1,j) * bz(cell_x2(j)+1,cell_y2(j)-1)) & - + gyy( 0,j) * (hxx(-1,j) * bz(cell_x2(j)-1,cell_y2(j) ) & - + hxx( 0,j) * bz(cell_x2(j) ,cell_y2(j) ) & - + hxx( 1,j) * bz(cell_x2(j)+1,cell_y2(j) )) & - + gyy( 1,j) * (hxx(-1,j) * bz(cell_x2(j)-1,cell_y2(j)+1) & - + hxx( 0,j) * bz(cell_x2(j) ,cell_y2(j)+1) & - + hxx( 1,j) * bz(cell_x2(j)+1,cell_y2(j)+1)) - - END DO ! End do-loop for b-fields at particle position - - - ! Update particle momenta using weighted fields - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - jj = i + j -1 - - uxm(j) = px(jj) + cmratio(jj) * ex_part(j) - uym(j) = py(jj) + cmratio(jj) * ey_part(j) - uzm(j) = pz(jj) + cmratio(jj) * ez_part(j) - - END DO - -#ifdef HC_PUSH - - ! Half timestep, then use Higuera-Cary push - ! See https://aip.scitation.org/doi/10.1063/1.4979989 - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - jj = i + j - 1 - gamma_rel(jj) = uxm(j)**2 + uym(j)**2 + uzm(j)**2 + 1.0_num - alpha(j) = 0.5_num * q(jj) * dt / m(jj) - - END DO - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - beta_x(j) = alpha(j) * bx_part(j) - beta_y(j) = alpha(j) * by_part(j) - beta_z(j) = alpha(j) * bz_part(j) - - END DO - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - beta2(j) = beta_x(j)**2 + beta_y(j)**2 + beta_z(j)**2 - beta_dot_u(j) = beta_x(j) * uxm(j) + beta_y(j) * uym(j) + beta_z(j) * uzm(j) - - END DO - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - jj = i + j -1 - gamma_rel(jj) = SQRT(0.5_num & - * (gamma_rel(jj) - beta2(j) & - + SQRT((gamma_rel(jj) - beta2(j))**2 & - + 4.0_num * (beta2(j) + beta_dot_u(j)**2)))) - - END DO - -#else - - ! Half timestep, then use Boris1970 rotation, see Birdsall and Langdon - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - jj = i + j - 1 - gamma_rel(jj) = SQRT(uxm(j)**2 + uym(j)**2 + uzm(j)**2) - - END DO - -#endif - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - jj = i + j - 1 - root(jj) = ccmratio(jj) / gamma_rel(jj) - - END DO - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - jj = i + j - 1 - taux(j) = bx_part(j) * root(jj) - tauy(j) = by_part(j) * root(jj) - tauz(j) = bz_part(j) * root(jj) - - END DO - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - taux2(j) = taux(j)**2 - tauy2(j) = taux(j)**2 - tauz2(j) = tauz(j)**2 - - END DO - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - tau(j) = 1.0_num / (1.0_num + taux2(j) + tauy2(j) + tauz2(j)) - - END DO - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - uxp(j) = ((1.0_num + taux2(j) - tauy2(j) - tauz2(j)) * uxm(j) & - + 2.0_num * ((taux(j) * tauy(j) + tauz(j)) * uym(j) & - + (taux(j) * tauz(j) - tauy(j)) * uzm(j))) * tau(j) - uyp(j) = ((1.0_num - taux2(j) + tauy2(j) - tauz2(j)) * uym(j) & - + 2.0_num * ((tauy(j) * tauz(j) + taux(j)) * uzm(j) & - + (tauy(j) * taux(j) - tauz(j)) * uxm(j))) * tau(j) - uzp(j) = ((1.0_num - taux2(j) - tauy2(j) + tauz2(j)) * uzm(j) & - + 2.0_num * ((tauz(j) * taux(j) + tauy(j)) * uxm(j) & - + (tauz(j) * tauy(j) - taux(j)) * uym(j))) * tau(j) - - END DO - - ! Rotation over, go to full timestep - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - jj = i + j - 1 - px(jj) = uxp(j) + cmratio(jj) * ex_part(j) - py(jj) = uyp(j) + cmratio(jj) * ey_part(j) - pz(jj) = uzp(j) + cmratio(jj) * ez_part(j) - - END DO - - ! Calculate particle velocity from particle momentum - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - jj = i + j - 1 - gamma_rel(jj) = SQRT(px(jj)**2 + py(jj)**2 + pz(jj)**2 + 1.0_num) - igamma(jj) = 1.0_num / SQRT(px(jj)**2 + py(jj)**2 + pz(jj)**2 + 1.0_num) - root(jj) = dtco2 / SQRT(px(jj)**2 + py(jj)**2 + pz(jj)**2 + 1.0_num) - - END DO - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - jj = i + j - 1 - delta_x(jj) = px(jj) * root(jj) - delta_y(jj) = py(jj) * root(jj) - vz(jj) = pz(jj) * c * igamma(jj) - - END DO - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - jj = i + j - 1 - x(jj) = x(jj) + delta_x(j) - y(jj) = y(jj) + delta_y(j) - - END DO - - ! Particle has now finished move to end of timestep, so copy back - ! into particle array - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - jj = i + j - 1 - next => current - current%part_pos = (/ x(jj) + x_grid_min_local, & - y(jj) + y_grid_min_local /) - current%part_p = mc(jj) * (/ px(jj), py(jj), pz(jj) /) - - ! Add particle to boundary candidate list - IF (current%part_pos(1) < bnd_x_min & - .OR. current%part_pos(1) > bnd_x_max & - .OR. current%part_pos(2) < bnd_y_min & - .OR. current%part_pos(2) > bnd_y_max ) THEN - ALLOCATE(bnd_part_next) - bnd_part_next%particle => current - bnd_part_last%next => bnd_part_next - bnd_part_last => bnd_part_next - END IF - -#ifdef WORK_DONE_INTEGRATED - ! This is the actual total work done by the fields: Results correspond - ! with the electron's gamma factor - - root(jj) = cmratio(jj) / gamma_rel(jj) - - work_x = ex_part(j) * (tmp_x(j) + px(jj) * root(jj)) - work_y = ex_part(j) * (tmp_y(j) + py(jj) * root(jj)) - work_z = ex_part(j) * (tmp_z(j) + pz(jj) * root(jj)) - - current%work_x = work_x - current%work_y = work_y - current%work_z = work_z - - current%work_x_total = current%work_x_total + work_x - current%work_y_total = current%work_y_total + work_y - current%work_z_total = current%work_z_total + work_z -#endif - -#ifndef NO_PARTICLE_PROBES - final_x(jj) = current%part_pos(1) - final_y(jj) = current%part_pos(2) -#endif - - current => next - - END DO - - - END DO ! End do-loop for i - -END SUBROUTINE particle_pusher - -SUBROUTINE triangle_current_deposition() - - DO i = 1, species_list(ispecies)%attached_list%count, LVEC - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - jj = i + j - 1 - - ! Advance to t + 1.5dt to calculate current. This is detailed in - ! the PSC manual between page 37 and 41. The version coded up looks - ! completely different to that in the manual, but this is equivalent. - ! Use t + 1.5dt so that can update J to t + dt at 2nd order - - x(jj) = x(jj) + delta_x(jj) - y(jj) = y(jj) + delta_y(jj) - - ! Delta-f calculation: subtract background from - ! calculated current. - -#ifdef DELTAF_METHOD - weight_back(j) = pvol(jj) * f0(ispecies, mc(jj) / c, px(jj), py(jj), pz(jj)) - fcx(j) = idty * (weight(jj) - weight_back(j)) - fcy(j) = idtx * (weight(jj) - weight_back(j)) - fcz(j) = idxy * (weight(jj) - weight_back(j)) -#endif - - END DO - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - jj = i + j - 1 - cell_x_r(j) = x(jj) * idx - cell_y_r(j) = y(jj) * idy - - END DO - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - cell_x3(j) = FLOOR(cell_x_r(j) + 0.5_num) - cell_y3(j) = FLOOR(cell_y_r(j) + 0.5_num) - - END DO - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - cell_frac_x(j) = REAL(cell_x3(j), num) - cell_x_r(j) - cell_frac_y(j) = REAL(cell_y3(j), num) - cell_y_r(j) - - cell_x3(j) = cell_x3(j) + 1 - cell_y3(j) = cell_y3(j) + 1 - - END DO - - hxx = 0.0_num - hyy = 0.0_num - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - dcellx(j) = cell_x3(j) - cell_x1(j) - dcelly(j) = cell_y3(j) - cell_y1(j) - - END DO - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - cfx2(j) = cell_frac_x(j)**2 - cfy2(j) = cell_frac_y(j)**2 - - END DO - - ! Calculating hxx - ! NOTE: These weights require an additional multiplication factor - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - hxx(dcellx(j)-1,j) = 0.25_num - cfx2(j) + cell_frac_x(j) - hxx(dcellx(j) ,j) = 1.5_num - 2.0_num * cfx2(j) - hxx(dcellx(j)+1,j) = 0.25_num + cfx2(j) - cell_frac_x(j) - - hyy(dcelly(j)-1,j) = 0.25_num + cfy2(j) + cell_frac_y(j) - hyy(dcelly(j) ,j) = 1.5_num - 2.0_num * cfy2(j) - hyy(dcelly(j)+1,j) = 0.25_num + cfy2(j) - cell_frac_y(j) - - END DO ! End do-loop for hxx - - ! Now change Xi1* to be Xi1*-Xi0*. This makes the representation of - ! the current update much simpler - - hxx = hxx - gxx - hyy = hxx - gyy - - ! Remember that due to CFL condition particle can never cross more - ! than one gridcell in one timestep - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - jj = i + j - 1 - xmin(j) = sf_min + (dcellx(j) - 1) / 2 - ymin(j) = sf_min + (dcelly(j) - 1) / 2 - - fjx(jj) = fcx(jj) * q(jj) - fjy(jj) = fcy(jj) * q(jj) - fjz(jj) = fcz(jj) * q(jj) * vz(jj) - - END DO - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - yfac10(j) = gyy(ymin(j),j) + 0.5_num * hyy(ymin(j),j) - yfac11(j) = gyy(ymin(j) + 1,j) + 0.5_num * hyy(ymin(j) + 1,j) - yfac12(j) = gyy(ymin(j) + 2,j) + 0.5_num * hyy(ymin(j) + 2,j) - - yfac20(j) = third * hyy(ymin(j),j) + 0.5_num * gyy(ymin(j),j) - yfac21(j) = third * hyy(ymin(j) + 1,j) + 0.5_num * gyy(ymin(j) + 1,j) - yfac22(j) = third * hyy(ymin(j) + 2,j) + 0.5_num * gyy(ymin(j) + 2,j) - - xfac10(j) = gxx(xmin(j),j) + 0.5_num * hxx(ymin(j),j) - xfac11(j) = gxx(xmin(j) + 1,j) + 0.5_num * hxx(ymin(j) + 1,j) - xfac12(j) = gxx(xmin(j) + 2,j) + 0.5_num * hxx(ymin(j) + 2,j) - - END DO - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - jj = i + j - 1 - - wx(1,j) = hxx(xmin(j),j) * yfac10(j) - wx(2,j) = hxx(xmin(j) + 1,j) * yfac10(j) - wx(3,j) = hxx(xmin(j) + 2,j) * yfac10(j) - wx(4,j) = hxx(xmin(j),j) * yfac11(j) - wx(5,j) = hxx(xmin(j) + 1,j) * yfac11(j) - wx(6,j) = hxx(xmin(j) + 2,j) * yfac11(j) - wx(7,j) = hxx(xmin(j),j) * yfac12(j) - wx(8,j) = hxx(xmin(j) + 1,j) * yfac12(j) - wx(9,j) = hxx(xmin(j) + 2,j) * yfac12(j) - - wy(1,j) = hyy(ymin(j),j) * xfac10(j) - wy(2,j) = hyy(ymin(j),j) * xfac11(j) - wy(3,j) = hyy(ymin(j),j) * xfac12(j) - wy(4,j) = hyy(ymin(j) + 1,j) * xfac10(j) - wy(5,j) = hyy(ymin(j) + 1,j) * xfac11(j) - wy(6,j) = hyy(ymin(j) + 1,j) * xfac12(j) - wy(7,j) = hyy(ymin(j) + 2,j) * xfac10(j) - wy(8,j) = hyy(ymin(j) + 2,j) * xfac11(j) - wy(9,j) = hyy(ymin(n) + 2,j) * xfac12(j) - - wz(1,j) = gxx(xmin(j)) * yfac10(j) + hxx(xmin(j),j) * yfac20(j) - wz(2,j) = gxx(xmin(j) + 1,j) * yfac10(j) + hxx(xmin(j) + 1,j) * yfac20(j) - wz(3,j) = gxx(xmin(j) + 2,j) * yfac10(j) + hxx(xmin(j) + 2,j) * yfac20(j) - wz(4,j) = gxx(xmin(j),j) * yfac11(j) + hxx(xmin(j),j) * yfac21(j) - wz(5,j) = gxx(xmin(j) + 1,j) * yfac11(j) + hxx(xmin(j) + 1,j) * yfac21(j) - wz(6,j) = gxx(xmin(j) + 2,j) * yfac11(j) + hxx(xmin(j) + 2,j) * yfac21(j) - wz(7,j) = gxx(xmin(j),j) * yfac12(j) + hxx(xmin(j),j) * yfac22(j) - wz(8,j) = gxx(xmin(j) + 1,j) * yfac12(j) + hxx(xmin(j) + 1,j) * yfac22(j) - wz(9,j) = gxx(xmin(j) + 2,j) * yfac12(j) + hxx(xmin(j) + 2,j) * yfac22(j) - - cx(j) = cell_x1(jj) + xmin(j) - cy(j) = cell_y1(jj) + ymin(j) - - END DO - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - ic(j) = cx(j) + (cy(j) - 1) * nx - - END DO - - DO j = 1, MIN(LVEC, species_list(ispecies)%attached_list%count - i + 1) - - jxh(1,ic(j)) = jxh(1,ic(j)) - fjx(j) * wx(1,j) - jxh(2,ic(j)) = jxh(2,ic(j)) - fjx(j) * (wx(1,j) + wx(2,j)) - jxh(3,ic(j)) = jxh(3,ic(j)) - fjx(j) * (wx(1,j) + wx(2,j) + wx(3,j)) - jxh(4,ic(j)) = jxh(4,ic(j)) - fjx(j) * wx(4,j) - jxh(5,ic(j)) = jxh(5,ic(j)) - fjx(j) * (wx(4,j) + wx(5,j)) - jxh(6,ic(j)) = jxh(6,ic(j)) - fjx(j) * (wx(4,j) + wx(5,j) + wx(6,j)) - jxh(7,ic(j)) = jxh(7,ic(j)) - fjx(j) * wx(7,j) - jxh(8,ic(j)) = jxh(8,ic(j)) - fjx(j) * (wx(7,j) + wx(8,j)) - jxh(9,ic(j)) = jxh(9,ic(j)) - fjx(j) * (wx(7,j) + wx(8,j) + wx(9,j)) - - jyh(1,ic(j)) = jyh(1,ic(j)) - fjy(j) * wy(1,j) - jyh(2,ic(j)) = jyh(2,ic(j)) - fjy(j) * wy(2,j) - jyh(3,ic(j)) = jyh(3,ic(j)) - fjy(j) * wy(3,j) - jyh(4,ic(j)) = jyh(4,ic(j)) - fjy(j) * (wy(1,j) + wy(4,j)) - jyh(5,ic(j)) = jyh(5,ic(j)) - fjy(j) * (wy(2,j) + wy(5,j)) - jyh(6,ic(j)) = jyh(6,ic(j)) - fjy(j) * (wy(3,j) + wy(6,j)) - jyh(7,ic(j)) = jyh(7,ic(j)) - fjy(j) * (wy(1,j) + wy(4,j) + wy(7,j)) - jyh(8,ic(j)) = jyh(8,ic(j)) - fjy(j) * (wy(2,j) + wy(5,j) + wy(8,j)) - jyh(9,ic(j)) = jyh(9,ic(j)) - fjy(j) * (wy(3,j) + wy(6,j) + wy(9,j)) - - jzh(1,ic(j)) = jzh(1,ic(j)) + fjz(j) * wz(1,j) - jzh(2,ic(j)) = jzh(2,ic(j)) + fjz(j) * wz(2,j) - jzh(3,ic(j)) = jzh(3,ic(j)) + fjz(j) * wz(3,j) - jzh(4,ic(j)) = jzh(4,ic(j)) + fjz(j) * wz(4,j) - jzh(5,ic(j)) = jzh(5,ic(j)) + fjz(j) * wz(5,j) - jzh(6,ic(j)) = jzh(6,ic(j)) + fjz(j) * wz(6,j) - jzh(7,ic(j)) = jzh(7,ic(j)) + fjz(j) * wz(7,j) - jzh(8,ic(j)) = jzh(8,ic(j)) + fjz(j) * wz(8,j) - jzh(9,ic(j)) = jzh(9,ic(j)) + fjz(j) * wz(9,j) - - END DO - - END DO ! End do-loop for index i - - ! Deposit current on the cells - - DO j = 1, ny - DO i = 1, nx - iic = (j - 1) * nx + i - - jx(i,j) = jx(i,j) + jxh(1,iic) - jx(i + 1,j) = jx(i + 1,j) + jxh(2,iic) - jx(i + 2,j) = jx(i + 2,j) + jxh(3,iic) - jx(i,j + 1) = jx(i, j + 1) + jxh(4,iic) - jx(i + 1,j + 1) = jx(i + 1, j + 1) + jxh(5,iic) - jx(i + 2,j + 1) = jx(i + 2, j + 1) + jxh(6,iic) - jx(i,j + 2) = jx(i,j + 2) + jxh(7,iic) - jx(i + 1,j + 2) = jx(i + 1,j + 2) + jxh(8,iic) - jx(i + 2,j + 2) = jx(i + 2,j + 2) + jxh(9,iic) - - jy(i,j) = jy(i,j) + jyh(1,iic) - jy(i + 1,j) = jy(i + 1,j) + jyh(2,iic) - jy(i + 2,j) = jy(i + 2,j) + jyh(3,iic) - jy(i,j + 1) = jy(i, j + 1) + jyh(4,iic) - jy(i + 1,j + 1) = jy(i + 1, j + 1) + jyh(5,iic) - jy(i + 2,j + 1) = jy(i + 2, j + 1) + jyh(6,iic) - jy(i,j + 2) = jy(i,j + 2) + jyh(7,iic) - jy(i + 1,j + 2) = jy(i + 1,j + 2) + jyh(8,iic) - jy(i + 2,j + 2) = jy(i + 2,j + 2) + jyh(9,iic) - - jz(i,j) = jz(i,j) + jzh(1,iic) - jz(i + 1,j) = jz(i + 1,j) + jzh(2,iic) - jz(i + 2,j) = jz(i + 2,j) + jzh(3,iic) - jz(i,j + 1) = jz(i, j + 1) + jzh(4,iic) - jz(i + 1,j + 1) = jz(i + 1, j + 1) + jzh(5,iic) - jz(i + 2,j + 1) = jz(i + 2, j + 1) + jzh(6,iic) - jz(i,j + 2) = jz(i,j + 2) + jzh(7,iic) - jz(i + 1,j + 2) = jz(i + 1,j + 2) + jzh(8,iic) - jz(i + 2,j + 2) = jz(i + 2,j + 2) + jzh(9,iic) - - END DO - END DO - -END SUBROUTINE triangle_current_deposition - - FUNCTION f0(ispecies, mass, px, py, pz) - - INTEGER, INTENT(IN) :: ispecies - REAL(num), INTENT(IN) :: mass - REAL(num), INTENT(IN) :: px, py, pz - REAL(num) :: f0 - REAL(num) :: Tx, Ty, Tz, driftx, drifty, driftz, density - REAL(num) :: f0_exponent, norm, two_kb_mass, two_pi_kb_mass3 - TYPE(particle_species), POINTER :: species - - species => species_list(ispecies) - - IF (ABS(species%initial_conditions%density_back) > c_tiny) THEN - two_kb_mass = 2.0_num * kb * mass - two_pi_kb_mass3 = (pi * two_kb_mass)**3 - - Tx = species%initial_conditions%temp_back(1) - Ty = species%initial_conditions%temp_back(2) - Tz = species%initial_conditions%temp_back(3) - driftx = species%initial_conditions%drift_back(1) - drifty = species%initial_conditions%drift_back(2) - driftz = species%initial_conditions%drift_back(3) - density = species%initial_conditions%density_back - f0_exponent = ((px - driftx)**2 / Tx & - + (py - drifty)**2 / Ty & - + (pz - driftz)**2 / Tz) / two_kb_mass - norm = density / SQRT(two_pi_kb_mass3 * Tx * Ty * Tz) - f0 = norm * EXP(-f0_exponent) - ELSE - f0 = 0.0_num - END IF - - END FUNCTION f0 - - - - - - - - - - - - - - From 65495d5b4bac2774f86f8d44f053e4a4a84948ac Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Fri, 3 Sep 2021 09:55:50 +0200 Subject: [PATCH 099/106] Update epoch3d to EPOCH coding style --- epoch3d/src/boundary.F90 | 6 ++++++ epoch3d/src/housekeeping/window.F90 | 7 ++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/epoch3d/src/boundary.F90 b/epoch3d/src/boundary.F90 index c4ab8e631..c4647a141 100644 --- a/epoch3d/src/boundary.F90 +++ b/epoch3d/src/boundary.F90 @@ -467,6 +467,8 @@ SUBROUTINE do_field_mpi_with_lengths(field, ng, nx_local, ny_local, & END SUBROUTINE do_field_mpi_with_lengths + + SUBROUTINE load_field_boundaries_to_buffer(field, buffer, & xmin, xmax, ymin, ymax, zmin, zmax, offset) @@ -487,6 +489,8 @@ SUBROUTINE load_field_boundaries_to_buffer(field, buffer, & END SUBROUTINE load_field_boundaries_to_buffer + + SUBROUTINE unload_field_boundaries_from_buffer(field, buffer, & xmin, xmax, ymin, ymax, zmin, zmax, offset) @@ -507,6 +511,8 @@ SUBROUTINE unload_field_boundaries_from_buffer(field, buffer, & END SUBROUTINE unload_field_boundaries_from_buffer + + SUBROUTINE do_field_mpi_with_lengths_r4(field, ng, nx_local, ny_local, & nz_local) diff --git a/epoch3d/src/housekeeping/window.F90 b/epoch3d/src/housekeeping/window.F90 index 829fe35eb..75d0346db 100644 --- a/epoch3d/src/housekeeping/window.F90 +++ b/epoch3d/src/housekeeping/window.F90 @@ -224,6 +224,8 @@ SUBROUTINE shift_field(field, ng, window_shift_cells) END SUBROUTINE shift_field + + SUBROUTINE moving_window_field_bc(fieldx, fieldy, fieldz, ng, nx_local, & ny_local, nz_local) @@ -488,8 +490,9 @@ SUBROUTINE remove_particles END SUBROUTINE remove_particles #endif + + SUBROUTINE mw_io_test(step, dump) -! USE diagnostics USE deck_io_block INTEGER, INTENT(IN) :: step @@ -548,9 +551,7 @@ END SUBROUTINE mw_io_test - SUBROUTINE moving_window(step) -! USE diagnostics USE deck_io_block #ifndef PER_SPECIES_WEIGHT From de8f6f07a8c0e543be2056156a4301450f1fea97 Mon Sep 17 00:00:00 2001 From: Ujjwal Sinha Date: Fri, 3 Sep 2021 10:51:50 +0200 Subject: [PATCH 100/106] Removed Score-P instrumentation --- epoch2d/src/epoch2d.F90 | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/epoch2d/src/epoch2d.F90 b/epoch2d/src/epoch2d.F90 index 83bdaa57d..6c9eb0e4e 100644 --- a/epoch2d/src/epoch2d.F90 +++ b/epoch2d/src/epoch2d.F90 @@ -13,10 +13,6 @@ ! You should have received a copy of the GNU General Public License ! along with this program. If not, see . -#ifdef SCOREP_USER_ENABLE -#include "scorep/SCOREP_User.inc" -#endif - PROGRAM pic ! EPOCH2D is a Birdsall and Langdon type PIC code derived from the PSC @@ -71,13 +67,6 @@ PROGRAM pic CHARACTER(LEN=64) :: timestring REAL(num) :: runtime, dt_store -#ifdef SCOREP_USER_ENABLE - SCOREP_USER_REGION_DEFINE( main_loop ) - INTEGER, PARAMETER :: reg_type = SCOREP_USER_REGION_TYPE_LOOP + & - SCOREP_USER_REGION_TYPE_DYNAMIC -#endif - - step = 0 time = 0.0_num @@ -197,10 +186,6 @@ PROGRAM pic IF (timer_collect) CALL timer_start(c_timer_step) DO -#ifdef SCOREP_USER_ENABLE -SCOREP_USER_REGION_BEGIN( main_loop, "main_loop", reg_type ) -#endif #SCOREP_USER_ENABLE - IF (timer_collect) THEN CALL timer_stop(c_timer_step) CALL timer_reset @@ -274,11 +259,6 @@ PROGRAM pic CALL update_eb_fields_final CALL moving_window(step) - !CALL output_routines(step) - -#ifdef SCOREP_USER_ENABLE -SCOREP_USER_REGION_END(main_loop) -#endif #SCOREP_USER_ENABLE END DO From bac97271589dadb2a0fe8fd6d6e821f3f04f1018 Mon Sep 17 00:00:00 2001 From: usinhampik <41638042+usinhampik@users.noreply.github.com> Date: Wed, 8 Sep 2021 09:40:09 +0200 Subject: [PATCH 101/106] Update .gitmodules Updated in sync with 5.0-devel --- .gitmodules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index 2fd47b8d0..46940c1f7 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,3 @@ [submodule "SDF"] path = SDF - url = https://gitlab.version.fz-juelich.de/SLPP/sdf/SDF.git + url = ../SDF.git From 3f13d0ac5f9fd6eebfef6e3ab076639fa9f1af7f Mon Sep 17 00:00:00 2001 From: usinhampik <41638042+usinhampik@users.noreply.github.com> Date: Wed, 8 Sep 2021 09:47:33 +0200 Subject: [PATCH 102/106] Update CONTRIBUTORS.md Added Dirk and Ujjwal as contributers --- CONTRIBUTORS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 6ce5acf32..4a7c29f79 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -5,6 +5,7 @@ Alistair Lawrence-Douglas Ben McMillan Chris Brady David Schinkel +Dirk Brömmel Heather Ratcliffe Holger Schmitz Jiří Vyskočil @@ -14,3 +15,4 @@ Phil Tooley Stephan Kuschel Stuart Morris Tom Goffrey +Ujjwal Sinha From a306be10afe3318e771cc0fb0b545fbc2b8df3cd Mon Sep 17 00:00:00 2001 From: usinhampik <41638042+usinhampik@users.noreply.github.com> Date: Wed, 8 Sep 2021 10:01:16 +0200 Subject: [PATCH 103/106] Acknowledgement of PRACE funding --- CONTRIBUTORS.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 4a7c29f79..24d31d88f 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -15,4 +15,6 @@ Phil Tooley Stephan Kuschel Stuart Morris Tom Goffrey -Ujjwal Sinha +Ujjwal Sinha (Acknowledgement: +This work was financially supported by the PRACE project funded in part by the EU’s Horizon 2020 Research and Innovation programme (2014-2020) under grant agreement 823767. +(https://bscw.zam.kfa-juelich.de/bscw/bscw.cgi/d3182280-5/////*/2020-06-17%20VC%20with%20WP8%20Project%20Leads.html)) From 2c06cacb532b5b701fadcdfb3dcef0e92b55ffa6 Mon Sep 17 00:00:00 2001 From: usinhampik <41638042+usinhampik@users.noreply.github.com> Date: Wed, 8 Sep 2021 10:10:02 +0200 Subject: [PATCH 104/106] Update CONTRIBUTORS.md --- CONTRIBUTORS.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 24d31d88f..95be38d17 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -5,7 +5,6 @@ Alistair Lawrence-Douglas Ben McMillan Chris Brady David Schinkel -Dirk Brömmel Heather Ratcliffe Holger Schmitz Jiří Vyskočil @@ -16,5 +15,5 @@ Stephan Kuschel Stuart Morris Tom Goffrey Ujjwal Sinha (Acknowledgement: -This work was financially supported by the PRACE project funded in part by the EU’s Horizon 2020 Research and Innovation programme (2014-2020) under grant agreement 823767. +Contributions from Ujjwal Sinha was financially supported by the PRACE project funded in part by the EU’s Horizon 2020 Research and Innovation programme (2014-2020) under grant agreement 823767. (https://bscw.zam.kfa-juelich.de/bscw/bscw.cgi/d3182280-5/////*/2020-06-17%20VC%20with%20WP8%20Project%20Leads.html)) From b2b7cbc642d09b596de4c45829bd9c8a414988a2 Mon Sep 17 00:00:00 2001 From: usinhampik <41638042+usinhampik@users.noreply.github.com> Date: Wed, 8 Sep 2021 10:15:58 +0200 Subject: [PATCH 105/106] Update CONTRIBUTORS.md --- CONTRIBUTORS.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 95be38d17..adb0adf45 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -16,4 +16,3 @@ Stuart Morris Tom Goffrey Ujjwal Sinha (Acknowledgement: Contributions from Ujjwal Sinha was financially supported by the PRACE project funded in part by the EU’s Horizon 2020 Research and Innovation programme (2014-2020) under grant agreement 823767. -(https://bscw.zam.kfa-juelich.de/bscw/bscw.cgi/d3182280-5/////*/2020-06-17%20VC%20with%20WP8%20Project%20Leads.html)) From 92dfb01e41f20975d2985c4f4823d7058b4e1a57 Mon Sep 17 00:00:00 2001 From: usinhampik <41638042+usinhampik@users.noreply.github.com> Date: Wed, 8 Sep 2021 10:16:14 +0200 Subject: [PATCH 106/106] Update CONTRIBUTORS.md --- CONTRIBUTORS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index adb0adf45..613b4df90 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -15,4 +15,4 @@ Stephan Kuschel Stuart Morris Tom Goffrey Ujjwal Sinha (Acknowledgement: -Contributions from Ujjwal Sinha was financially supported by the PRACE project funded in part by the EU’s Horizon 2020 Research and Innovation programme (2014-2020) under grant agreement 823767. +Contributions from Ujjwal Sinha was financially supported by the PRACE project funded in part by the EU’s Horizon 2020 Research and Innovation programme (2014-2020) under grant agreement 823767.)