15 #ifndef GEOS_RAJAINTERFACE_RAJAINTERFACE_HPP
16 #define GEOS_RAJAINTERFACE_RAJAINTERFACE_HPP
23 #include <RAJA/RAJA.hpp>
28 #define GEOS_ASYNC_WAIT( UPPER, NANOSLEEP, TEST ) while( !TEST ) { }
33 auto const hostMemorySpace = LvArray::MemorySpace::host;
35 using serialPolicy = RAJA::seq_exec;
36 using serialAtomic = RAJA::seq_atomic;
37 using serialReduce = RAJA::seq_reduce;
39 using serialStream = RAJA::resources::Host;
40 using serialEvent = RAJA::resources::HostEvent;
42 #if defined( GEOSX_USE_OPENMP )
44 auto const parallelHostMemorySpace = hostMemorySpace;
46 using parallelHostPolicy = RAJA::omp_parallel_for_exec;
47 using parallelHostReduce = RAJA::omp_reduce;
48 using parallelHostAtomic = RAJA::builtin_atomic;
51 using parallelHostStream = serialStream;
52 using parallelHostEvent = serialEvent;
54 void RAJA_INLINE parallelHostSync() { RAJA::synchronize< RAJA::omp_synchronize >(); }
58 auto const parallelHostMemorySpace = hostMemorySpace;
60 using parallelHostPolicy = serialPolicy;
61 using parallelHostReduce = serialReduce;
62 using parallelHostAtomic = serialAtomic;
63 using parallelHostStream = serialStream;
64 using parallelHostEvent = serialEvent;
66 void RAJA_INLINE parallelHostSync() { }
70 #if defined( GEOS_USE_CUDA )
71 auto const parallelDeviceMemorySpace = LvArray::MemorySpace::cuda;
73 template<
size_t BLOCK_SIZE = GEOSX_BLOCK_SIZE >
74 using parallelDevicePolicy = RAJA::cuda_exec< BLOCK_SIZE >;
76 template<
size_t BLOCK_SIZE = GEOSX_BLOCK_SIZE >
77 using parallelDeviceAsyncPolicy = RAJA::cuda_exec_async< BLOCK_SIZE >;
79 using parallelDeviceStream = RAJA::resources::Cuda;
80 using parallelDeviceEvent = RAJA::resources::Event;
82 using parallelDeviceReduce = RAJA::cuda_reduce;
83 using parallelDeviceAtomic = RAJA::cuda_atomic;
85 void RAJA_INLINE parallelDeviceSync() { RAJA::synchronize< RAJA::cuda_synchronize >(); }
87 template<
typename POLICY,
typename RESOURCE,
typename LAMBDA >
88 RAJA_INLINE parallelDeviceEvent forAll( RESOURCE && stream,
const localIndex end, LAMBDA && body )
90 return RAJA::forall< POLICY >( std::forward< RESOURCE >( stream ),
91 RAJA::TypedRangeSegment< localIndex >( 0, end ),
92 std::forward< LAMBDA >( body ) );
95 #elif defined( GEOS_USE_HIP )
97 auto const parallelDeviceMemorySpace = LvArray::MemorySpace::hip;
99 template<
size_t BLOCK_SIZE = GEOSX_BLOCK_SIZE >
100 using parallelDevicePolicy = RAJA::hip_exec< BLOCK_SIZE >;
103 using parallelDeviceStream = RAJA::resources::Hip;
104 using parallelDeviceEvent = RAJA::resources::Event;
106 using parallelDeviceReduce = RAJA::hip_reduce;
107 using parallelDeviceAtomic = RAJA::hip_atomic;
109 void RAJA_INLINE parallelDeviceSync() { RAJA::synchronize< RAJA::hip_synchronize >( ); }
112 template<
size_t BLOCK_SIZE = GEOSX_BLOCK_SIZE >
113 using parallelDeviceAsyncPolicy = parallelDevicePolicy< BLOCK_SIZE >;
115 template<
typename POLICY,
typename RESOURCE,
typename LAMBDA >
118 RAJA::forall< POLICY >( RAJA::TypedRangeSegment< localIndex >( 0, end ), std::forward< LAMBDA >( body ) );
119 return parallelDeviceEvent();
123 auto const parallelDeviceMemorySpace = parallelHostMemorySpace;
125 template<
size_t BLOCK_SIZE = 0 >
126 using parallelDevicePolicy = parallelHostPolicy;
128 template<
size_t BLOCK_SIZE = 0 >
129 using parallelDeviceAsyncPolicy = parallelHostPolicy;
131 using parallelDeviceStream = parallelHostStream;
132 using parallelDeviceEvent = parallelHostEvent;
134 using parallelDeviceReduce = parallelHostReduce;
135 using parallelDeviceAtomic = parallelHostAtomic;
137 void RAJA_INLINE parallelDeviceSync() { parallelHostSync( ); }
139 template<
typename POLICY,
typename RESOURCE,
typename LAMBDA >
142 RAJA::forall< POLICY >( RAJA::TypedRangeSegment< localIndex >( 0, end ), std::forward< LAMBDA >( body ) );
143 return parallelDeviceEvent();
148 using parallelDeviceEvents = std::vector< parallelDeviceEvent >;
150 namespace internalRajaInterface
159 using atomic = serialAtomic;
160 using reduce = serialReduce;
163 #if defined(GEOSX_USE_OPENMP)
165 struct PolicyMap< RAJA::omp_parallel_for_exec >
167 using atomic = RAJA::builtin_atomic;
168 using reduce = RAJA::omp_reduce;
172 #if defined(GEOS_USE_CUDA)
173 template<
typename X,
typename Y,
size_t BLOCK_SIZE,
bool ASYNC >
174 struct PolicyMap< RAJA::policy::cuda::cuda_exec_explicit< X, Y, BLOCK_SIZE, ASYNC > >
176 using atomic = RAJA::cuda_atomic;
177 using reduce = RAJA::cuda_reduce;
181 #if defined(GEOS_USE_HIP)
182 template<
size_t BLOCK_SIZE,
bool ASYNC >
183 struct PolicyMap< RAJA::hip_exec< BLOCK_SIZE, ASYNC > >
185 using atomic = RAJA::hip_atomic;
186 using reduce = RAJA::hip_reduce;
192 template<
typename POLICY >
193 using ReducePolicy =
typename internalRajaInterface::PolicyMap< POLICY >::reduce;
195 template<
typename POLICY >
196 using AtomicPolicy =
typename internalRajaInterface::PolicyMap< POLICY >::atomic;
199 RAJA_INLINE
bool testAllDeviceEvents( parallelDeviceEvents & events )
202 for(
auto & event : events )
213 RAJA_INLINE
void waitAllDeviceEvents( parallelDeviceEvents & events )
217 GEOS_ASYNC_WAIT( 6000000000, 10, testAllDeviceEvents( events ) );
220 template<
typename POLICY,
typename INDEX,
typename LAMBDA >
221 RAJA_INLINE
void forAll( INDEX
const end, LAMBDA && body )
223 RAJA::forall< POLICY >( RAJA::TypedRangeSegment< INDEX >( 0, end ), std::forward< LAMBDA >( body ) );
226 template<
typename POLICY,
typename INDEX,
typename LAMBDA >
227 RAJA_INLINE
void forRange( INDEX
const begin, INDEX
const end, LAMBDA && body )
229 RAJA::forall< POLICY >( RAJA::TypedRangeSegment< INDEX >( begin, end ), std::forward< LAMBDA >( body ) );
#define GEOS_UNUSED_PARAM(X)
Mark an unused argument and silence compiler warnings.
GEOSX_LOCALINDEX_TYPE localIndex
Local index type (for indexing objects within an MPI partition).