Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions src/apps/ENERGY-Cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,13 +130,16 @@ void ENERGY::runCudaVariantImpl(VariantID vid)
const size_t grid_size = RAJA_DIVIDE_CEILING_INT(iend, block_size);
constexpr size_t shmem = 0;

RP_CALI_MARK_BEGIN(RP_CALI_REGION(ENERGY_1));
RPlaunchCudaKernel( (energycalc1<block_size>),
grid_size, block_size,
shmem, res.get_stream(),
e_new, e_old, delvc,
p_old, q_old, work,
iend );
RP_CALI_MARK_END(RP_CALI_REGION(ENERGY_1));

RP_CALI_MARK_BEGIN(RP_CALI_REGION(ENERGY_2));
RPlaunchCudaKernel( (energycalc2<block_size>),
grid_size, block_size,
shmem, res.get_stream(),
Expand All @@ -146,22 +149,28 @@ void ENERGY::runCudaVariantImpl(VariantID vid)
ql_old, qq_old,
rho0,
iend );
RP_CALI_MARK_END(RP_CALI_REGION(ENERGY_2));

RP_CALI_MARK_BEGIN(RP_CALI_REGION(ENERGY_3));
RPlaunchCudaKernel( (energycalc3<block_size>),
grid_size, block_size,
shmem, res.get_stream(),
e_new, delvc,
p_old, q_old,
pHalfStep, q_new,
iend );
RP_CALI_MARK_END(RP_CALI_REGION(ENERGY_3));

RP_CALI_MARK_BEGIN(RP_CALI_REGION(ENERGY_4));
RPlaunchCudaKernel( (energycalc4<block_size>),
grid_size, block_size,
shmem, res.get_stream(),
e_new, work,
e_cut, emin,
iend );
RP_CALI_MARK_END(RP_CALI_REGION(ENERGY_4));

RP_CALI_MARK_BEGIN(RP_CALI_REGION(ENERGY_5));
RPlaunchCudaKernel( (energycalc5<block_size>),
grid_size, block_size,
shmem, res.get_stream(),
Expand All @@ -173,7 +182,9 @@ void ENERGY::runCudaVariantImpl(VariantID vid)
pHalfStep, q_new,
rho0, e_cut, emin,
iend );
RP_CALI_MARK_END(RP_CALI_REGION(ENERGY_5));

RP_CALI_MARK_BEGIN(RP_CALI_REGION(ENERGY_6));
RPlaunchCudaKernel( (energycalc6<block_size>),
grid_size, block_size,
shmem, res.get_stream(),
Expand All @@ -184,6 +195,7 @@ void ENERGY::runCudaVariantImpl(VariantID vid)
ql_old, qq_old,
rho0, q_cut,
iend );
RP_CALI_MARK_END(RP_CALI_REGION(ENERGY_6));

}
stopTimer();
Expand All @@ -202,35 +214,47 @@ void ENERGY::runCudaVariantImpl(VariantID vid)
RAJA::region<RAJA::seq_region>( [=]() {
#endif

RP_CALI_MARK_BEGIN(RP_CALI_REGION(ENERGY_1));
RAJA::forall< RAJA::cuda_exec<block_size, async> >( res,
RAJA::RangeSegment(ibegin, iend), [=] __device__ (Index_type i) {
ENERGY_BODY1;
});
RP_CALI_MARK_END(RP_CALI_REGION(ENERGY_1));

RP_CALI_MARK_BEGIN(RP_CALI_REGION(ENERGY_2));
RAJA::forall< RAJA::cuda_exec<block_size, async> >( res,

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@michaelmckinsey1 , why not use just use RAJA's kernel naming capability here? Example...

RAJA::forall<RAJA::cuda_exec<256>>(
    range, 
    RAJA::Name("VectorAddKernel"), // <-- Kernel Name injected here
    [=] RAJA_DEVICE (int i) {
        c[i] = a[i] + b[i];
    }
);

RAJA::RangeSegment(ibegin, iend), [=] __device__ (Index_type i) {
ENERGY_BODY2;
});
RP_CALI_MARK_END(RP_CALI_REGION(ENERGY_2));

RP_CALI_MARK_BEGIN(RP_CALI_REGION(ENERGY_3));
RAJA::forall< RAJA::cuda_exec<block_size, async> >( res,
RAJA::RangeSegment(ibegin, iend), [=] __device__ (Index_type i) {
ENERGY_BODY3;
});
RP_CALI_MARK_END(RP_CALI_REGION(ENERGY_3));

RP_CALI_MARK_BEGIN(RP_CALI_REGION(ENERGY_4));
RAJA::forall< RAJA::cuda_exec<block_size, async> >( res,
RAJA::RangeSegment(ibegin, iend), [=] __device__ (Index_type i) {
ENERGY_BODY4;
});
RP_CALI_MARK_END(RP_CALI_REGION(ENERGY_4));

RP_CALI_MARK_BEGIN(RP_CALI_REGION(ENERGY_5));
RAJA::forall< RAJA::cuda_exec<block_size, async> >( res,
RAJA::RangeSegment(ibegin, iend), [=] __device__ (Index_type i) {
ENERGY_BODY5;
});
RP_CALI_MARK_END(RP_CALI_REGION(ENERGY_5));

RP_CALI_MARK_BEGIN(RP_CALI_REGION(ENERGY_6));
RAJA::forall< RAJA::cuda_exec<block_size, async> >( res,
RAJA::RangeSegment(ibegin, iend), [=] __device__ (Index_type i) {
ENERGY_BODY6;
});
RP_CALI_MARK_END(RP_CALI_REGION(ENERGY_6));

#if CUDART_VERSION >= 9000
}); // end sequential region (for single-source code)
Expand Down
28 changes: 26 additions & 2 deletions src/apps/ENERGY-Hip.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
// Copyright (c) Lawrence Livermore National Security, LLC and other
// Copyright (c) Lawrence Livermore National Security, LLC and other
// RAJA Project Developers. See top-level LICENSE and COPYRIGHT
// files for dates and other details. No copyright assignment is required
// to contribute to RAJA Performance Suite.
Expand Down Expand Up @@ -130,13 +130,16 @@ void ENERGY::runHipVariantImpl(VariantID vid)
const size_t grid_size = RAJA_DIVIDE_CEILING_INT(iend, block_size);
constexpr size_t shmem = 0;

RP_CALI_MARK_BEGIN(RP_CALI_REGION(ENERGY_1));
RPlaunchHipKernel( (energycalc1<block_size>),
grid_size, block_size,
shmem, res.get_stream(),
e_new, e_old, delvc,
p_old, q_old, work,
iend );
RP_CALI_MARK_END(RP_CALI_REGION(ENERGY_1));

RP_CALI_MARK_BEGIN(RP_CALI_REGION(ENERGY_2));
RPlaunchHipKernel( (energycalc2<block_size>),
grid_size, block_size,
shmem, res.get_stream(),
Expand All @@ -146,22 +149,28 @@ void ENERGY::runHipVariantImpl(VariantID vid)
ql_old, qq_old,
rho0,
iend );
RP_CALI_MARK_END(RP_CALI_REGION(ENERGY_2));

RP_CALI_MARK_BEGIN(RP_CALI_REGION(ENERGY_3));
RPlaunchHipKernel( (energycalc3<block_size>),
grid_size, block_size,
shmem, res.get_stream(),
e_new, delvc,
p_old, q_old,
pHalfStep, q_new,
iend );
RP_CALI_MARK_END(RP_CALI_REGION(ENERGY_3));

RP_CALI_MARK_BEGIN(RP_CALI_REGION(ENERGY_4));
RPlaunchHipKernel( (energycalc4<block_size>),
grid_size, block_size,
shmem, res.get_stream(),
e_new, work,
e_cut, emin,
iend );
iend );
RP_CALI_MARK_END(RP_CALI_REGION(ENERGY_4));

RP_CALI_MARK_BEGIN(RP_CALI_REGION(ENERGY_5));
RPlaunchHipKernel( (energycalc5<block_size>),
grid_size, block_size,
shmem, res.get_stream(),
Expand All @@ -173,7 +182,9 @@ void ENERGY::runHipVariantImpl(VariantID vid)
pHalfStep, q_new,
rho0, e_cut, emin,
iend );
RP_CALI_MARK_END(RP_CALI_REGION(ENERGY_5));

RP_CALI_MARK_BEGIN(RP_CALI_REGION(ENERGY_6));
RPlaunchHipKernel( (energycalc6<block_size>),
grid_size, block_size,
shmem, res.get_stream(),
Expand All @@ -184,6 +195,7 @@ void ENERGY::runHipVariantImpl(VariantID vid)
ql_old, qq_old,
rho0, q_cut,
iend );
RP_CALI_MARK_END(RP_CALI_REGION(ENERGY_6));

}
stopTimer();
Expand All @@ -198,35 +210,47 @@ void ENERGY::runHipVariantImpl(VariantID vid)

RAJA::region<RAJA::seq_region>( [=]() {

RP_CALI_MARK_BEGIN(RP_CALI_REGION(ENERGY_1));
RAJA::forall< RAJA::hip_exec<block_size, async> >( res,
RAJA::RangeSegment(ibegin, iend), [=] __device__ (Index_type i) {
ENERGY_BODY1;
});
RP_CALI_MARK_END(RP_CALI_REGION(ENERGY_1));

RP_CALI_MARK_BEGIN(RP_CALI_REGION(ENERGY_2));
RAJA::forall< RAJA::hip_exec<block_size, async> >( res,
RAJA::RangeSegment(ibegin, iend), [=] __device__ (Index_type i) {
ENERGY_BODY2;
});
RP_CALI_MARK_END(RP_CALI_REGION(ENERGY_2));

RP_CALI_MARK_BEGIN(RP_CALI_REGION(ENERGY_3));
RAJA::forall< RAJA::hip_exec<block_size, async> >( res,
RAJA::RangeSegment(ibegin, iend), [=] __device__ (Index_type i) {
ENERGY_BODY3;
});
RP_CALI_MARK_END(RP_CALI_REGION(ENERGY_3));

RP_CALI_MARK_BEGIN(RP_CALI_REGION(ENERGY_4));
RAJA::forall< RAJA::hip_exec<block_size, async> >( res,
RAJA::RangeSegment(ibegin, iend), [=] __device__ (Index_type i) {
ENERGY_BODY4;
});
RP_CALI_MARK_END(RP_CALI_REGION(ENERGY_4));

RP_CALI_MARK_BEGIN(RP_CALI_REGION(ENERGY_5));
RAJA::forall< RAJA::hip_exec<block_size, async> >( res,
RAJA::RangeSegment(ibegin, iend), [=] __device__ (Index_type i) {
ENERGY_BODY5;
});
RP_CALI_MARK_END(RP_CALI_REGION(ENERGY_5));

RP_CALI_MARK_BEGIN(RP_CALI_REGION(ENERGY_6));
RAJA::forall< RAJA::hip_exec<block_size, async> >( res,
RAJA::RangeSegment(ibegin, iend), [=] __device__ (Index_type i) {
ENERGY_BODY6;
});
RP_CALI_MARK_END(RP_CALI_REGION(ENERGY_6));

}); // end sequential region (for single-source code)

Expand Down
12 changes: 12 additions & 0 deletions src/apps/ENERGY-OMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,23 +152,35 @@ void ENERGY::runOpenMPVariant(VariantID vid)

RAJA::region<RAJA::omp_parallel_region>( [=]() {

RP_CALI_MARK_BEGIN(RP_CALI_REGION(ENERGY_1));
RAJA::forall< RAJA::omp_for_nowait_static_exec< > >( res,
RAJA::RangeSegment(ibegin, iend), energy_lam1);
RP_CALI_MARK_END(RP_CALI_REGION(ENERGY_1));

RP_CALI_MARK_BEGIN(RP_CALI_REGION(ENERGY_2));
RAJA::forall< RAJA::omp_for_nowait_static_exec< > >( res,
RAJA::RangeSegment(ibegin, iend), energy_lam2);
RP_CALI_MARK_END(RP_CALI_REGION(ENERGY_2));

RP_CALI_MARK_BEGIN(RP_CALI_REGION(ENERGY_3));
RAJA::forall< RAJA::omp_for_nowait_static_exec< > >( res,
RAJA::RangeSegment(ibegin, iend), energy_lam3);
RP_CALI_MARK_END(RP_CALI_REGION(ENERGY_3));

RP_CALI_MARK_BEGIN(RP_CALI_REGION(ENERGY_4));
RAJA::forall< RAJA::omp_for_nowait_static_exec< > >( res,
RAJA::RangeSegment(ibegin, iend), energy_lam4);
RP_CALI_MARK_END(RP_CALI_REGION(ENERGY_4));

RP_CALI_MARK_BEGIN(RP_CALI_REGION(ENERGY_5));
RAJA::forall< RAJA::omp_for_nowait_static_exec< > >( res,
RAJA::RangeSegment(ibegin, iend), energy_lam5);
RP_CALI_MARK_END(RP_CALI_REGION(ENERGY_5));

RP_CALI_MARK_BEGIN(RP_CALI_REGION(ENERGY_6));
RAJA::forall< RAJA::omp_for_nowait_static_exec< > >( res,
RAJA::RangeSegment(ibegin, iend), energy_lam6);
RP_CALI_MARK_END(RP_CALI_REGION(ENERGY_6));

}); // end omp parallel region

Expand Down
14 changes: 14 additions & 0 deletions src/apps/ENERGY-OMPTarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,13 @@ void ENERGY::runOpenMPTargetVariant(VariantID vid)
ENERGY_BODY3;
}

RP_CALI_MARK_BEGIN(RP_CALI_REGION(ENERGY_1));
#pragma omp target is_device_ptr(e_new, work) device( did )
#pragma omp teams distribute parallel for thread_limit(threads_per_team) schedule(static, 1)
for (Index_type i = ibegin; i < iend; ++i ) {
ENERGY_BODY4;
}
RP_CALI_MARK_END(RP_CALI_REGION(ENERGY_1));

#pragma omp target is_device_ptr(delvc, pbvc, e_new, vnewc, \
bvc, p_new, ql_old, qq_old, \
Expand Down Expand Up @@ -99,35 +101,47 @@ void ENERGY::runOpenMPTargetVariant(VariantID vid)

RAJA::region<RAJA::seq_region>( [=]() {

RP_CALI_MARK_BEGIN(RP_CALI_REGION(ENERGY_1));
RAJA::forall<RAJA::omp_target_parallel_for_exec<threads_per_team>>( res,
RAJA::RangeSegment(ibegin, iend), [=](Index_type i) {
ENERGY_BODY1;
});
RP_CALI_MARK_END(RP_CALI_REGION(ENERGY_1));

RP_CALI_MARK_BEGIN(RP_CALI_REGION(ENERGY_2));
RAJA::forall<RAJA::omp_target_parallel_for_exec<threads_per_team>>( res,
RAJA::RangeSegment(ibegin, iend), [=](Index_type i) {
ENERGY_BODY2;
});
RP_CALI_MARK_END(RP_CALI_REGION(ENERGY_2));

RP_CALI_MARK_BEGIN(RP_CALI_REGION(ENERGY_3));
RAJA::forall<RAJA::omp_target_parallel_for_exec<threads_per_team>>( res,
RAJA::RangeSegment(ibegin, iend), [=](Index_type i) {
ENERGY_BODY3;
});
RP_CALI_MARK_END(RP_CALI_REGION(ENERGY_3));

RP_CALI_MARK_BEGIN(RP_CALI_REGION(ENERGY_4));
RAJA::forall<RAJA::omp_target_parallel_for_exec<threads_per_team>>( res,
RAJA::RangeSegment(ibegin, iend), [=](Index_type i) {
ENERGY_BODY4;
});
RP_CALI_MARK_END(RP_CALI_REGION(ENERGY_4));

RP_CALI_MARK_BEGIN(RP_CALI_REGION(ENERGY_5));
RAJA::forall<RAJA::omp_target_parallel_for_exec<threads_per_team>>( res,
RAJA::RangeSegment(ibegin, iend), [=](Index_type i) {
ENERGY_BODY5;
});
RP_CALI_MARK_END(RP_CALI_REGION(ENERGY_5));

RP_CALI_MARK_BEGIN(RP_CALI_REGION(ENERGY_6));
RAJA::forall<RAJA::omp_target_parallel_for_exec<threads_per_team>>( res,
RAJA::RangeSegment(ibegin, iend), [=](Index_type i) {
ENERGY_BODY6;
});
RP_CALI_MARK_END(RP_CALI_REGION(ENERGY_6));

}); // end sequential region (for single-source code)

Expand Down
12 changes: 12 additions & 0 deletions src/apps/ENERGY-Seq.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,23 +133,35 @@ void ENERGY::runSeqVariant(VariantID vid)

RAJA::region<RAJA::seq_region>( [=]() {

RP_CALI_MARK_BEGIN(RP_CALI_REGION(ENERGY_1));
RAJA::forall<RAJA::seq_exec>( res,
RAJA::RangeSegment(ibegin, iend), energy_lam1);
RP_CALI_MARK_END(RP_CALI_REGION(ENERGY_1));

RP_CALI_MARK_BEGIN(RP_CALI_REGION(ENERGY_2));
RAJA::forall<RAJA::seq_exec>( res,
RAJA::RangeSegment(ibegin, iend), energy_lam2);
RP_CALI_MARK_END(RP_CALI_REGION(ENERGY_2));

RP_CALI_MARK_BEGIN(RP_CALI_REGION(ENERGY_3));
RAJA::forall<RAJA::seq_exec>( res,
RAJA::RangeSegment(ibegin, iend), energy_lam3);
RP_CALI_MARK_END(RP_CALI_REGION(ENERGY_3));

RP_CALI_MARK_BEGIN(RP_CALI_REGION(ENERGY_4));
RAJA::forall<RAJA::seq_exec>( res,
RAJA::RangeSegment(ibegin, iend), energy_lam4);
RP_CALI_MARK_END(RP_CALI_REGION(ENERGY_4));

RP_CALI_MARK_BEGIN(RP_CALI_REGION(ENERGY_5));
RAJA::forall<RAJA::seq_exec>( res,
RAJA::RangeSegment(ibegin, iend), energy_lam5);
RP_CALI_MARK_END(RP_CALI_REGION(ENERGY_5));

RP_CALI_MARK_BEGIN(RP_CALI_REGION(ENERGY_6));
RAJA::forall<RAJA::seq_exec>( res,
RAJA::RangeSegment(ibegin, iend), energy_lam6);
RP_CALI_MARK_END(RP_CALI_REGION(ENERGY_6));

}); // end sequential region (for single-source code)

Expand Down
Loading
Loading