diff --git a/src/algorithm/HISTOGRAM.cpp b/src/algorithm/HISTOGRAM.cpp index 5104a96c2..d64edb3ad 100644 --- a/src/algorithm/HISTOGRAM.cpp +++ b/src/algorithm/HISTOGRAM.cpp @@ -38,7 +38,7 @@ HISTOGRAM::HISTOGRAM(const RunParams& params) 1*sizeof(Index_type) * getActualProblemSize() ); setBytesWrittenPerRep( 1*sizeof(Data_type) * m_num_bins ); setBytesAtomicModifyWrittenPerRep( 0 ); - setFLOPsPerRep(1 * getActualProblemSize()); + setFLOPsPerRep( (std::is_floating_point_v ? 1 : 0) * getActualProblemSize() ); setComplexity(Complexity::N); diff --git a/src/apps/CONVECTION3DPA.cpp b/src/apps/CONVECTION3DPA.cpp index 367246f53..7cb7451e0 100644 --- a/src/apps/CONVECTION3DPA.cpp +++ b/src/apps/CONVECTION3DPA.cpp @@ -32,13 +32,13 @@ CONVECTION3DPA::CONVECTION3DPA(const RunParams& params) setActualProblemSize( m_NE*CPA_Q1D*CPA_Q1D*CPA_Q1D ); - setItsPerRep(getActualProblemSize()); + setItsPerRep( m_NE*CPA_Q1D*CPA_Q1D*CPA_Q1D ); setKernelsPerRep(1); setBytesReadPerRep( 3*sizeof(Real_type) * CPA_Q1D*CPA_D1D + // b, bt, g 2*sizeof(Real_type) * CPA_D1D*CPA_D1D*CPA_D1D*m_NE + // x, y CPA_VDIM*sizeof(Real_type) * CPA_Q1D*CPA_Q1D*CPA_Q1D*m_NE ); // d - setBytesWrittenPerRep( 1*sizeof(Real_type) + CPA_D1D*CPA_D1D*CPA_D1D*m_NE ); // y + setBytesWrittenPerRep( 1*sizeof(Real_type) * CPA_D1D*CPA_D1D*CPA_D1D*m_NE ); // y setBytesAtomicModifyWrittenPerRep( 0 ); setFLOPsPerRep(m_NE * ( diff --git a/src/apps/DIFFUSION3DPA.cpp b/src/apps/DIFFUSION3DPA.cpp index cfeb89af1..c00be9a40 100644 --- a/src/apps/DIFFUSION3DPA.cpp +++ b/src/apps/DIFFUSION3DPA.cpp @@ -32,7 +32,7 @@ DIFFUSION3DPA::DIFFUSION3DPA(const RunParams& params) setActualProblemSize( m_NE*DPA_Q1D*DPA_Q1D*DPA_Q1D ); - setItsPerRep(getActualProblemSize()); + setItsPerRep( m_NE*DPA_Q1D*DPA_Q1D*DPA_Q1D ); setKernelsPerRep(1); setBytesReadPerRep( 2*sizeof(Real_type) * DPA_Q1D*DPA_D1D + // b, g diff --git a/src/apps/LTIMES.cpp b/src/apps/LTIMES.cpp index 41c54b8d1..6b217b743 100644 --- a/src/apps/LTIMES.cpp +++ b/src/apps/LTIMES.cpp @@ -39,7 +39,7 @@ LTIMES::LTIMES(const RunParams& params) setActualProblemSize( m_psilen ); - setItsPerRep( getActualProblemSize() ); + setItsPerRep( m_philen ); setKernelsPerRep(1); // using total data size instead of writes and reads setBytesReadPerRep( 1*sizeof(Real_type) * m_philen + diff --git a/src/apps/LTIMES_NOVIEW.cpp b/src/apps/LTIMES_NOVIEW.cpp index 2ce2c26be..9cc621909 100644 --- a/src/apps/LTIMES_NOVIEW.cpp +++ b/src/apps/LTIMES_NOVIEW.cpp @@ -39,7 +39,7 @@ LTIMES_NOVIEW::LTIMES_NOVIEW(const RunParams& params) setActualProblemSize( m_psilen ); - setItsPerRep( getActualProblemSize() ); + setItsPerRep( m_philen ); setKernelsPerRep(1); // using total data size instead of writes and reads setBytesReadPerRep( 1*sizeof(Real_type) * m_philen + diff --git a/src/apps/MASS3DEA.cpp b/src/apps/MASS3DEA.cpp index 7fe1284a7..d553702c1 100644 --- a/src/apps/MASS3DEA.cpp +++ b/src/apps/MASS3DEA.cpp @@ -28,13 +28,13 @@ MASS3DEA::MASS3DEA(const RunParams& params) setDefaultProblemSize(m_NE_default*MEA_Q1D*MEA_Q1D*MEA_Q1D); setDefaultReps(1); - const int ea_mat_entries = MEA_D1D*MEA_D1D*MEA_D1D*MEA_D1D*MEA_D1D*MEA_D1D; + const Index_type ea_mat_entries = MEA_D1D*MEA_D1D*MEA_D1D*MEA_D1D*MEA_D1D*MEA_D1D; m_NE = std::max((getTargetProblemSize() + (ea_mat_entries)/2) / (ea_mat_entries), Index_type(1)); - setActualProblemSize( m_NE*ea_mat_entries); + setActualProblemSize( m_NE*ea_mat_entries ); - setItsPerRep(getActualProblemSize()); + setItsPerRep( m_NE*MEA_Q1D*MEA_Q1D*MEA_Q1D ); setKernelsPerRep(1); setBytesReadPerRep( 1*sizeof(Real_type) * MEA_Q1D*MEA_D1D + // B diff --git a/src/apps/MASS3DPA.cpp b/src/apps/MASS3DPA.cpp index 07898094f..e559cc02b 100644 --- a/src/apps/MASS3DPA.cpp +++ b/src/apps/MASS3DPA.cpp @@ -32,7 +32,7 @@ MASS3DPA::MASS3DPA(const RunParams& params) setActualProblemSize( m_NE*MPA_Q1D*MPA_Q1D*MPA_Q1D ); - setItsPerRep(getActualProblemSize()); + setItsPerRep( m_NE*MPA_Q1D*MPA_Q1D ); setKernelsPerRep(1); setBytesReadPerRep( 2*sizeof(Real_type) * MPA_Q1D*MPA_D1D + // B, Bt diff --git a/src/apps/NODAL_ACCUMULATION_3D.cpp b/src/apps/NODAL_ACCUMULATION_3D.cpp index 69a63b653..3ffcce23d 100644 --- a/src/apps/NODAL_ACCUMULATION_3D.cpp +++ b/src/apps/NODAL_ACCUMULATION_3D.cpp @@ -40,10 +40,9 @@ NODAL_ACCUMULATION_3D::NODAL_ACCUMULATION_3D(const RunParams& params) setKernelsPerRep(1); // touched data size, not actual number of stores and loads setBytesReadPerRep( 1*sizeof(Index_type) * getItsPerRep() + - 1*sizeof(Real_type) * getItsPerRep() + - 1*sizeof(Real_type) * m_domain->n_real_nodes); - setBytesWrittenPerRep( 1*sizeof(Real_type) * m_domain->n_real_nodes ); - setBytesAtomicModifyWrittenPerRep( 0 ); + 1*sizeof(Real_type) * getItsPerRep() ); + setBytesWrittenPerRep( 0 ); + setBytesAtomicModifyWrittenPerRep( 1*sizeof(Real_type) * m_domain->n_real_nodes ); setFLOPsPerRep(9 * getItsPerRep()); checksum_scale_factor = 0.001 * diff --git a/src/basic/IF_QUAD.cpp b/src/basic/IF_QUAD.cpp index ef1129a32..f3e264a02 100644 --- a/src/basic/IF_QUAD.cpp +++ b/src/basic/IF_QUAD.cpp @@ -31,7 +31,9 @@ IF_QUAD::IF_QUAD(const RunParams& params) setBytesReadPerRep( 3*sizeof(Real_type) * getActualProblemSize() ); setBytesWrittenPerRep( 2*sizeof(Real_type) * getActualProblemSize() ); setBytesAtomicModifyWrittenPerRep( 0 ); - setFLOPsPerRep(11 * getActualProblemSize()); // 1 sqrt + // estimate conditional true half of the time, 1 sqrt + setFLOPsPerRep(4 * getActualProblemSize() + + 7 * getActualProblemSize() / 2); checksum_scale_factor = 0.0001 * ( static_cast(getDefaultProblemSize()) / diff --git a/src/basic/MAT_MAT_SHARED.cpp b/src/basic/MAT_MAT_SHARED.cpp index dc1b69d95..4c93b90eb 100644 --- a/src/basic/MAT_MAT_SHARED.cpp +++ b/src/basic/MAT_MAT_SHARED.cpp @@ -25,19 +25,18 @@ MAT_MAT_SHARED::MAT_MAT_SHARED(const RunParams ¶ms) setDefaultReps(5); m_N = std::sqrt(getTargetProblemSize()) + std::sqrt(2)-1; + const Index_type num_tiles = RAJA_DIVIDE_CEILING_INT(m_N, TL_SZ); setActualProblemSize(m_N * m_N); - setItsPerRep(getActualProblemSize()); + setItsPerRep( num_tiles*num_tiles * TL_SZ*TL_SZ ); setKernelsPerRep(1); setBytesReadPerRep( 2*sizeof(Real_type) * m_N*m_N ); setBytesWrittenPerRep( 1*sizeof(Real_type) * m_N*m_N ); setBytesAtomicModifyWrittenPerRep( 0 ); - const Index_type no_tiles = RAJA_DIVIDE_CEILING_INT(m_N, TL_SZ); - const Index_type no_blocks = RAJA_DIVIDE_CEILING_INT(m_N, TL_SZ); - setFLOPsPerRep(2 * TL_SZ * TL_SZ * TL_SZ * no_tiles * no_blocks * no_blocks); + setFLOPsPerRep(2 * TL_SZ * TL_SZ * TL_SZ * num_tiles * num_tiles * num_tiles); checksum_scale_factor = 1e-6 * ( static_cast(getDefaultProblemSize()) / diff --git a/src/basic/REDUCE3_INT.cpp b/src/basic/REDUCE3_INT.cpp index 3af4be2db..e3ca630d4 100644 --- a/src/basic/REDUCE3_INT.cpp +++ b/src/basic/REDUCE3_INT.cpp @@ -37,7 +37,7 @@ REDUCE3_INT::REDUCE3_INT(const RunParams& params) 1*sizeof(Int_type) * getActualProblemSize() ); setBytesWrittenPerRep( 3*sizeof(Int_type) ); setBytesAtomicModifyWrittenPerRep( 0 ); - setFLOPsPerRep(1 * getActualProblemSize() + 1); + setFLOPsPerRep(0); setComplexity(Complexity::N); diff --git a/src/comm/HALO_EXCHANGE.cpp b/src/comm/HALO_EXCHANGE.cpp index 4e4d5f74f..56f155903 100644 --- a/src/comm/HALO_EXCHANGE.cpp +++ b/src/comm/HALO_EXCHANGE.cpp @@ -28,21 +28,22 @@ HALO_EXCHANGE::HALO_EXCHANGE(const RunParams& params) m_num_vars = params.getHaloNumVars(); m_var_size = m_grid_plus_halo_size ; + const Size_type halo_size = m_var_size - getActualProblemSize(); - setItsPerRep( m_num_vars * (m_var_size - getActualProblemSize()) ); + setItsPerRep( 2 * m_num_vars * halo_size ); setKernelsPerRep( 2 * s_num_neighbors * m_num_vars ); - setBytesReadPerRep( 1*sizeof(Int_type) * getItsPerRep() + // pack - 1*sizeof(Real_type) * getItsPerRep() + // pack + setBytesReadPerRep( 1*sizeof(Int_type) * m_num_vars * halo_size + // pack + 1*sizeof(Real_type) * m_num_vars * halo_size + // pack - 1*sizeof(Real_type) * getItsPerRep() + // send + 1*sizeof(Real_type) * m_num_vars * halo_size + // send - 1*sizeof(Int_type) * getItsPerRep() + // unpack - 1*sizeof(Real_type) * getItsPerRep() ); // unpack - setBytesWrittenPerRep( 1*sizeof(Real_type) * getItsPerRep() + // pack + 1*sizeof(Int_type) * m_num_vars * halo_size + // unpack + 1*sizeof(Real_type) * m_num_vars * halo_size ); // unpack + setBytesWrittenPerRep( 1*sizeof(Real_type) * m_num_vars * halo_size + // pack - 1*sizeof(Real_type) * getItsPerRep() + // recv + 1*sizeof(Real_type) * m_num_vars * halo_size + // recv - 1*sizeof(Real_type) * getItsPerRep() ); // unpack + 1*sizeof(Real_type) * m_num_vars * halo_size ); // unpack setBytesAtomicModifyWrittenPerRep( 0 ); setFLOPsPerRep(0); diff --git a/src/comm/HALO_EXCHANGE_FUSED.cpp b/src/comm/HALO_EXCHANGE_FUSED.cpp index f71eea069..cbc6cefb3 100644 --- a/src/comm/HALO_EXCHANGE_FUSED.cpp +++ b/src/comm/HALO_EXCHANGE_FUSED.cpp @@ -28,21 +28,22 @@ HALO_EXCHANGE_FUSED::HALO_EXCHANGE_FUSED(const RunParams& params) m_num_vars = params.getHaloNumVars(); m_var_size = m_grid_plus_halo_size ; + const Size_type halo_size = m_var_size - getActualProblemSize(); - setItsPerRep( m_num_vars * (m_var_size - getActualProblemSize()) ); + setItsPerRep( 2 * m_num_vars * halo_size ); setKernelsPerRep( 2 ); - setBytesReadPerRep( 1*sizeof(Int_type) * getItsPerRep() + // pack - 1*sizeof(Real_type) * getItsPerRep() + // pack + setBytesReadPerRep( 1*sizeof(Int_type) * m_num_vars * halo_size + // pack + 1*sizeof(Real_type) * m_num_vars * halo_size + // pack - 1*sizeof(Real_type) * getItsPerRep() + // send + 1*sizeof(Real_type) * m_num_vars * halo_size + // send - 1*sizeof(Int_type) * getItsPerRep() + // unpack - 1*sizeof(Real_type) * getItsPerRep() ); // unpack - setBytesWrittenPerRep( 1*sizeof(Real_type) * getItsPerRep() + // pack + 1*sizeof(Int_type) * m_num_vars * halo_size + // unpack + 1*sizeof(Real_type) * m_num_vars * halo_size ); // unpack + setBytesWrittenPerRep( 1*sizeof(Real_type) * m_num_vars * halo_size + // pack - 1*sizeof(Real_type) * getItsPerRep() + // recv + 1*sizeof(Real_type) * m_num_vars * halo_size + // recv - 1*sizeof(Real_type) * getItsPerRep() ); // unpack + 1*sizeof(Real_type) * m_num_vars * halo_size ); // unpack setBytesAtomicModifyWrittenPerRep( 0 ); setFLOPsPerRep(0); diff --git a/src/comm/HALO_PACKING.cpp b/src/comm/HALO_PACKING.cpp index cd3fdf044..ab4712bf4 100644 --- a/src/comm/HALO_PACKING.cpp +++ b/src/comm/HALO_PACKING.cpp @@ -22,17 +22,18 @@ HALO_PACKING::HALO_PACKING(const RunParams& params) m_num_vars = params.getHaloNumVars(); m_var_size = m_grid_plus_halo_size ; + const Size_type halo_size = m_var_size - getActualProblemSize(); - setItsPerRep( m_num_vars * (m_var_size - getActualProblemSize()) ); + setItsPerRep( 2 * m_num_vars * halo_size ); setKernelsPerRep( 2 * s_num_neighbors * m_num_vars ); - setBytesReadPerRep( 1*sizeof(Int_type) * getItsPerRep() + // pack - 1*sizeof(Real_type) * getItsPerRep() + // pack + setBytesReadPerRep( 1*sizeof(Int_type) * m_num_vars * halo_size + // pack + 1*sizeof(Real_type) * m_num_vars * halo_size + // pack - 1*sizeof(Int_type) * getItsPerRep() + // unpack - 1*sizeof(Real_type) * getItsPerRep() ); // unpack - setBytesWrittenPerRep( 1*sizeof(Real_type) * getItsPerRep() + // pack + 1*sizeof(Int_type) * m_num_vars * halo_size + // unpack + 1*sizeof(Real_type) * m_num_vars * halo_size ); // unpack + setBytesWrittenPerRep( 1*sizeof(Real_type) * m_num_vars * halo_size + // pack - 1*sizeof(Real_type) * getItsPerRep() ); // unpack + 1*sizeof(Real_type) * m_num_vars * halo_size ); // unpack setBytesAtomicModifyWrittenPerRep( 0 ); setFLOPsPerRep(0); diff --git a/src/comm/HALO_PACKING_FUSED.cpp b/src/comm/HALO_PACKING_FUSED.cpp index da42685d9..057e04ebc 100644 --- a/src/comm/HALO_PACKING_FUSED.cpp +++ b/src/comm/HALO_PACKING_FUSED.cpp @@ -22,17 +22,18 @@ HALO_PACKING_FUSED::HALO_PACKING_FUSED(const RunParams& params) m_num_vars = params.getHaloNumVars(); m_var_size = m_grid_plus_halo_size ; + const Size_type halo_size = m_var_size - getActualProblemSize(); - setItsPerRep( m_num_vars * (m_var_size - getActualProblemSize()) ); + setItsPerRep( 2 * m_num_vars * halo_size ); setKernelsPerRep( 2 ); - setBytesReadPerRep( 1*sizeof(Int_type) * getItsPerRep() + // pack - 1*sizeof(Real_type) * getItsPerRep() + // pack + setBytesReadPerRep( 1*sizeof(Int_type) * m_num_vars * halo_size + // pack + 1*sizeof(Real_type) * m_num_vars * halo_size + // pack - 1*sizeof(Int_type) * getItsPerRep() + // unpack - 1*sizeof(Real_type) * getItsPerRep() ); // unpack - setBytesWrittenPerRep( 1*sizeof(Real_type) * getItsPerRep() + // pack + 1*sizeof(Int_type) * m_num_vars * halo_size + // unpack + 1*sizeof(Real_type) * m_num_vars * halo_size ); // unpack + setBytesWrittenPerRep( 1*sizeof(Real_type) * m_num_vars * halo_size + // pack - 1*sizeof(Real_type) * getItsPerRep() ); // unpack + 1*sizeof(Real_type) * m_num_vars * halo_size ); // unpack setBytesAtomicModifyWrittenPerRep( 0 ); setFLOPsPerRep(0); diff --git a/src/comm/HALO_SENDRECV.cpp b/src/comm/HALO_SENDRECV.cpp index d6ae07bc8..f1b55036b 100644 --- a/src/comm/HALO_SENDRECV.cpp +++ b/src/comm/HALO_SENDRECV.cpp @@ -28,11 +28,12 @@ HALO_SENDRECV::HALO_SENDRECV(const RunParams& params) m_num_vars = params.getHaloNumVars(); m_var_size = m_grid_plus_halo_size ; + const Size_type halo_size = m_var_size - getActualProblemSize(); - setItsPerRep( m_num_vars * (m_var_size - getActualProblemSize()) ); + setItsPerRep( 0 ); setKernelsPerRep( 0 ); - setBytesReadPerRep( 1*sizeof(Real_type) * getItsPerRep() ); // send - setBytesWrittenPerRep( 1*sizeof(Real_type) * getItsPerRep() ); // recv + setBytesReadPerRep( 1*sizeof(Real_type) * m_num_vars * halo_size ); // send + setBytesWrittenPerRep( 1*sizeof(Real_type) * m_num_vars * halo_size ); // recv setBytesAtomicModifyWrittenPerRep( 0 ); setFLOPsPerRep(0); diff --git a/src/lcals/EOS.cpp b/src/lcals/EOS.cpp index 2d8db6f23..3c725839f 100644 --- a/src/lcals/EOS.cpp +++ b/src/lcals/EOS.cpp @@ -26,7 +26,7 @@ EOS::EOS(const RunParams& params) setActualProblemSize( getTargetProblemSize() ); - m_array_length = getActualProblemSize() + 7; + m_array_length = getActualProblemSize() + 6; setItsPerRep( getActualProblemSize() ); setItsPerRep( getActualProblemSize() ); diff --git a/src/lcals/FIRST_SUM.cpp b/src/lcals/FIRST_SUM.cpp index 90af0f664..c8f920406 100644 --- a/src/lcals/FIRST_SUM.cpp +++ b/src/lcals/FIRST_SUM.cpp @@ -24,16 +24,16 @@ FIRST_SUM::FIRST_SUM(const RunParams& params) setDefaultProblemSize(1000000); setDefaultReps(2000); - setActualProblemSize( getTargetProblemSize() ); + setActualProblemSize( std::max(getTargetProblemSize(), Index_type(2)) ); m_N = getActualProblemSize(); - setItsPerRep( getActualProblemSize() ); + setItsPerRep( m_N-1 ); setKernelsPerRep(1); - setBytesReadPerRep( 1*sizeof(Real_type ) * (m_N-1) ); - setBytesWrittenPerRep( 1*sizeof(Real_type ) * m_N ); + setBytesReadPerRep( 1*sizeof(Real_type ) * m_N ); + setBytesWrittenPerRep( 1*sizeof(Real_type ) * (m_N-1) ); setBytesAtomicModifyWrittenPerRep( 0 ); - setFLOPsPerRep(1 * (getActualProblemSize()-1)); + setFLOPsPerRep(1 * (m_N-1)); setComplexity(Complexity::N); diff --git a/src/lcals/GEN_LIN_RECUR.cpp b/src/lcals/GEN_LIN_RECUR.cpp index 5206e4c8e..aeec387d5 100644 --- a/src/lcals/GEN_LIN_RECUR.cpp +++ b/src/lcals/GEN_LIN_RECUR.cpp @@ -28,7 +28,7 @@ GEN_LIN_RECUR::GEN_LIN_RECUR(const RunParams& params) m_N = getActualProblemSize(); - setItsPerRep( getActualProblemSize() ); + setItsPerRep( 2 * m_N ); setKernelsPerRep(2); setBytesReadPerRep( 3*sizeof(Real_type ) * m_N + 3*sizeof(Real_type ) * m_N ); @@ -36,7 +36,7 @@ GEN_LIN_RECUR::GEN_LIN_RECUR(const RunParams& params) 2*sizeof(Real_type ) * m_N ); setBytesAtomicModifyWrittenPerRep( 0 ); setFLOPsPerRep((3 + - 3 ) * getActualProblemSize()); + 3 ) * m_N); checksum_scale_factor = 0.01 * ( static_cast(getDefaultProblemSize()) / diff --git a/src/lcals/HYDRO_2D.cpp b/src/lcals/HYDRO_2D.cpp index bd113bd83..307470a16 100644 --- a/src/lcals/HYDRO_2D.cpp +++ b/src/lcals/HYDRO_2D.cpp @@ -36,15 +36,21 @@ HYDRO_2D::HYDRO_2D(const RunParams& params) m_jn = m_kn = std::sqrt(getTargetProblemSize()) + std::sqrt(2)-1; m_array_length = m_kn * m_jn; - setActualProblemSize( getTargetProblemSize() ); + setActualProblemSize( m_array_length ); - setItsPerRep( 3 * getActualProblemSize() ); + setItsPerRep( 3 * (m_kn-2) * (m_jn-2) ); setKernelsPerRep(3); - setBytesReadPerRep( 4*sizeof(Real_type ) * m_array_length + - 4*sizeof(Real_type ) * m_array_length + + setBytesReadPerRep( 4*sizeof(Real_type ) * ((m_kn-1) * (m_jn-1) - 1) + + + 2*sizeof(Real_type ) * (m_kn-2) * (m_jn-2) + + 2*sizeof(Real_type ) * (m_kn-2) * (m_jn-1) + + 2*sizeof(Real_type ) * ((m_kn) * (m_jn) - 4) + + 4*sizeof(Real_type ) * (m_kn-2) * (m_jn-2) ); setBytesWrittenPerRep( 2*sizeof(Real_type ) * (m_kn-2) * (m_jn-2) + + 2*sizeof(Real_type ) * (m_kn-2) * (m_jn-2) + + 2*sizeof(Real_type ) * (m_kn-2) * (m_jn-2) ); setBytesAtomicModifyWrittenPerRep( 0 ); setFLOPsPerRep((14 + diff --git a/src/lcals/TRIDIAG_ELIM.cpp b/src/lcals/TRIDIAG_ELIM.cpp index 0ff1a5cb8..9612e48e6 100644 --- a/src/lcals/TRIDIAG_ELIM.cpp +++ b/src/lcals/TRIDIAG_ELIM.cpp @@ -24,16 +24,16 @@ TRIDIAG_ELIM::TRIDIAG_ELIM(const RunParams& params) setDefaultProblemSize(1000000); setDefaultReps(1000); - setActualProblemSize( getTargetProblemSize() ); + setActualProblemSize( std::max(getTargetProblemSize(), Index_type(2)) ); - m_N = getActualProblemSize() + 1; + m_N = getActualProblemSize(); - setItsPerRep( getActualProblemSize() ); + setItsPerRep( m_N-1 ); setKernelsPerRep(1); setBytesReadPerRep( 3*sizeof(Real_type ) * (m_N-1) ); setBytesWrittenPerRep( 1*sizeof(Real_type ) * (m_N-1) ); setBytesAtomicModifyWrittenPerRep( 0 ); - setFLOPsPerRep(2 * (getActualProblemSize()-1)); + setFLOPsPerRep(2 * (m_N-1)); setComplexity(Complexity::N); diff --git a/src/polybench/POLYBENCH_ADI.cpp b/src/polybench/POLYBENCH_ADI.cpp index 76488e0e9..562984feb 100644 --- a/src/polybench/POLYBENCH_ADI.cpp +++ b/src/polybench/POLYBENCH_ADI.cpp @@ -28,19 +28,25 @@ POLYBENCH_ADI::POLYBENCH_ADI(const RunParams& params) m_n = std::sqrt( getTargetProblemSize() ) + 2 + std::sqrt(2)-1; - setItsPerRep( (m_n-2) + (m_n-2) ); + setItsPerRep( 2 * (m_n-2) + (m_n-2) ); setActualProblemSize( (m_n-2) * (m_n-2) ); setKernelsPerRep( 2 ); - setBytesReadPerRep( 3*sizeof(Real_type ) * m_n * (m_n-2) + - 3*sizeof(Real_type ) * m_n * (m_n-2) ); - setBytesWrittenPerRep( 3*sizeof(Real_type ) * m_n * (m_n-2) + - 3*sizeof(Real_type ) * m_n * (m_n-2) ); + setBytesReadPerRep( 1*sizeof(Real_type ) * (m_n-2) * (m_n ) + + 2*sizeof(Real_type ) * (m_n-2) * (m_n-2) + + + 1*sizeof(Real_type ) * (m_n-2) * (m_n ) + + 2*sizeof(Real_type ) * (m_n-2) * (m_n-2) ); + setBytesWrittenPerRep( 2*sizeof(Real_type ) * (m_n-2) * (m_n-1) + + 1*sizeof(Real_type ) * (m_n-2) * (m_n ) + + + 2*sizeof(Real_type ) * (m_n-2) * (m_n-1) + + 1*sizeof(Real_type ) * (m_n-2) * (m_n ) ); setBytesAtomicModifyWrittenPerRep( 0 ); - setFLOPsPerRep( (15 + 2) * (m_n-2)*(m_n-2) + - (15 + 2) * (m_n-2)*(m_n-2) ); + setFLOPsPerRep( (13 + 2) * (m_n-2)*(m_n-2) + + (13 + 2) * (m_n-2)*(m_n-2) ); checksum_scale_factor = 0.0000001 * ( static_cast(getDefaultProblemSize()) / diff --git a/src/polybench/POLYBENCH_ATAX.cpp b/src/polybench/POLYBENCH_ATAX.cpp index 36fba4bfe..a18fab318 100644 --- a/src/polybench/POLYBENCH_ATAX.cpp +++ b/src/polybench/POLYBENCH_ATAX.cpp @@ -31,12 +31,12 @@ POLYBENCH_ATAX::POLYBENCH_ATAX(const RunParams& params) setActualProblemSize( m_N * m_N ); - setItsPerRep( m_N + m_N ); + setItsPerRep( 2 * m_N + m_N ); setKernelsPerRep(2); setBytesReadPerRep( 1*sizeof(Real_type ) * m_N + 1*sizeof(Real_type ) * m_N * m_N + - 1*sizeof(Real_type ) * m_N + + 2*sizeof(Real_type ) * m_N + 1*sizeof(Real_type ) * m_N * m_N ); setBytesWrittenPerRep( 2*sizeof(Real_type ) * m_N + 1*sizeof(Real_type ) * m_N); diff --git a/src/polybench/POLYBENCH_FLOYD_WARSHALL.cpp b/src/polybench/POLYBENCH_FLOYD_WARSHALL.cpp index 2dc790724..cbb4542eb 100644 --- a/src/polybench/POLYBENCH_FLOYD_WARSHALL.cpp +++ b/src/polybench/POLYBENCH_FLOYD_WARSHALL.cpp @@ -31,12 +31,12 @@ POLYBENCH_FLOYD_WARSHALL::POLYBENCH_FLOYD_WARSHALL(const RunParams& params) setActualProblemSize( m_N * m_N ); - setItsPerRep( m_N*m_N ); - setKernelsPerRep(1); - setBytesReadPerRep( 1*sizeof(Real_type ) * m_N * m_N ); - setBytesWrittenPerRep( 1*sizeof(Real_type ) * m_N * m_N ); + setItsPerRep( m_N * m_N*m_N ); + setKernelsPerRep(m_N); + setBytesReadPerRep( m_N * 1*sizeof(Real_type ) * m_N * m_N ); + setBytesWrittenPerRep( m_N * 1*sizeof(Real_type ) * m_N * m_N ); setBytesAtomicModifyWrittenPerRep( 0 ); - setFLOPsPerRep(1 * m_N*m_N*m_N ); + setFLOPsPerRep( m_N*m_N*m_N * 3 / 2 ); // conditional is true about half of the time checksum_scale_factor = 1.0 * ( static_cast(getDefaultProblemSize()) / diff --git a/src/polybench/POLYBENCH_GEMM.cpp b/src/polybench/POLYBENCH_GEMM.cpp index b97eeda06..34d093fb9 100644 --- a/src/polybench/POLYBENCH_GEMM.cpp +++ b/src/polybench/POLYBENCH_GEMM.cpp @@ -40,7 +40,8 @@ POLYBENCH_GEMM::POLYBENCH_GEMM(const RunParams& params) setItsPerRep( m_ni * m_nj ); setKernelsPerRep(1); - setBytesReadPerRep( 1*sizeof(Real_type ) * m_ni * m_nk + + setBytesReadPerRep( 1*sizeof(Real_type ) * m_ni * m_nj + + 1*sizeof(Real_type ) * m_ni * m_nk + 1*sizeof(Real_type ) * m_nj * m_nk ); setBytesWrittenPerRep( 1*sizeof(Real_type ) * m_ni * m_nj); setBytesAtomicModifyWrittenPerRep( 0 ); diff --git a/src/polybench/POLYBENCH_GEMVER.cpp b/src/polybench/POLYBENCH_GEMVER.cpp index 4a1e2096a..1b542f9a3 100644 --- a/src/polybench/POLYBENCH_GEMVER.cpp +++ b/src/polybench/POLYBENCH_GEMVER.cpp @@ -35,15 +35,15 @@ POLYBENCH_GEMVER::POLYBENCH_GEMVER(const RunParams& params) setActualProblemSize( m_n * m_n ); setItsPerRep( m_n*m_n + - m_n*m_n + m_n + - m_n*m_n ); + m_n + + m_n ); setKernelsPerRep(4); setBytesReadPerRep( 1*sizeof(Real_type ) * m_n * m_n + 4*sizeof(Real_type ) * m_n + 1*sizeof(Real_type ) * m_n * m_n + - 2*sizeof(Real_type ) * m_n + + 1*sizeof(Real_type ) * m_n + 2*sizeof(Real_type ) * m_n + diff --git a/src/polybench/POLYBENCH_MVT.cpp b/src/polybench/POLYBENCH_MVT.cpp index 5afb343d7..3e4a55fec 100644 --- a/src/polybench/POLYBENCH_MVT.cpp +++ b/src/polybench/POLYBENCH_MVT.cpp @@ -33,10 +33,10 @@ POLYBENCH_MVT::POLYBENCH_MVT(const RunParams& params) setItsPerRep( 2 * m_N ); setKernelsPerRep(2); - setBytesReadPerRep( 2*sizeof(Real_type ) * m_N + + setBytesReadPerRep( 1*sizeof(Real_type ) * m_N + 1*sizeof(Real_type ) * m_N * m_N + - 2*sizeof(Real_type ) * m_N + + 1*sizeof(Real_type ) * m_N + 1*sizeof(Real_type ) * m_N * m_N ); setBytesWrittenPerRep( 1*sizeof(Real_type ) * m_N +