GEOSX
GEOS_RAJA_Interface.hpp
1 /*
2  * ------------------------------------------------------------------------------------------------------------
3  * SPDX-License-Identifier: LGPL-2.1-only
4  *
5  * Copyright (c) 2018-2020 Lawrence Livermore National Security LLC
6  * Copyright (c) 2018-2020 The Board of Trustees of the Leland Stanford Junior University
7  * Copyright (c) 2018-2020 TotalEnergies
8  * Copyright (c) 2019- GEOSX Contributors
9  * All rights reserved
10  *
11  * See top level LICENSE, COPYRIGHT, CONTRIBUTORS, NOTICE, and ACKNOWLEDGEMENTS files for details.
12  * ------------------------------------------------------------------------------------------------------------
13  */
14 
15 #ifndef GEOS_RAJAINTERFACE_RAJAINTERFACE_HPP
16 #define GEOS_RAJAINTERFACE_RAJAINTERFACE_HPP
17 
18 // Source includes
19 #include "common/GeosxConfig.hpp"
20 #include "common/DataTypes.hpp"
21 
22 // TPL includes
23 #include <RAJA/RAJA.hpp>
24 
25 #include <chrono>
26 #include <thread>
27 
28 #define GEOS_ASYNC_WAIT( UPPER, NANOSLEEP, TEST ) while( !TEST ) { }
29 
30 namespace geos
31 {
32 
33 auto const hostMemorySpace = LvArray::MemorySpace::host;
34 
35 using serialPolicy = RAJA::seq_exec;
36 using serialAtomic = RAJA::seq_atomic;
37 using serialReduce = RAJA::seq_reduce;
38 
39 using serialStream = RAJA::resources::Host;
40 using serialEvent = RAJA::resources::HostEvent;
41 
42 #if defined( GEOSX_USE_OPENMP )
43 
44 auto const parallelHostMemorySpace = hostMemorySpace;
45 
46 using parallelHostPolicy = RAJA::omp_parallel_for_exec;
47 using parallelHostReduce = RAJA::omp_reduce;
48 using parallelHostAtomic = RAJA::builtin_atomic;
49 
50 // issues with Raja::resources::Omp on lassen
51 using parallelHostStream = serialStream;
52 using parallelHostEvent = serialEvent;
53 
54 void RAJA_INLINE parallelHostSync() { RAJA::synchronize< RAJA::omp_synchronize >(); }
55 
56 #else
57 
58 auto const parallelHostMemorySpace = hostMemorySpace;
59 
60 using parallelHostPolicy = serialPolicy;
61 using parallelHostReduce = serialReduce;
62 using parallelHostAtomic = serialAtomic;
63 using parallelHostStream = serialStream;
64 using parallelHostEvent = serialEvent;
65 
66 void RAJA_INLINE parallelHostSync() { }
67 
68 #endif
69 
70 #if defined( GEOS_USE_CUDA )
71 auto const parallelDeviceMemorySpace = LvArray::MemorySpace::cuda;
72 
73 template< size_t BLOCK_SIZE = GEOSX_BLOCK_SIZE >
74 using parallelDevicePolicy = RAJA::cuda_exec< BLOCK_SIZE >;
75 
76 template< size_t BLOCK_SIZE = GEOSX_BLOCK_SIZE >
77 using parallelDeviceAsyncPolicy = RAJA::cuda_exec_async< BLOCK_SIZE >;
78 
79 using parallelDeviceStream = RAJA::resources::Cuda;
80 using parallelDeviceEvent = RAJA::resources::Event;
81 
82 using parallelDeviceReduce = RAJA::cuda_reduce;
83 using parallelDeviceAtomic = RAJA::cuda_atomic;
84 
85 void RAJA_INLINE parallelDeviceSync() { RAJA::synchronize< RAJA::cuda_synchronize >(); }
86 
87 template< typename POLICY, typename RESOURCE, typename LAMBDA >
88 RAJA_INLINE parallelDeviceEvent forAll( RESOURCE && stream, const localIndex end, LAMBDA && body )
89 {
90  return RAJA::forall< POLICY >( std::forward< RESOURCE >( stream ),
91  RAJA::TypedRangeSegment< localIndex >( 0, end ),
92  std::forward< LAMBDA >( body ) );
93 }
94 
95 #elif defined( GEOS_USE_HIP )
96 
97 auto const parallelDeviceMemorySpace = LvArray::MemorySpace::hip;
98 
99 template< size_t BLOCK_SIZE = GEOSX_BLOCK_SIZE >
100 using parallelDevicePolicy = RAJA::hip_exec< BLOCK_SIZE >;
101 
102 
103 using parallelDeviceStream = RAJA::resources::Hip;
104 using parallelDeviceEvent = RAJA::resources::Event;
105 
106 using parallelDeviceReduce = RAJA::hip_reduce;
107 using parallelDeviceAtomic = RAJA::hip_atomic;
108 
109 void RAJA_INLINE parallelDeviceSync() { RAJA::synchronize< RAJA::hip_synchronize >( ); }
110 
111 // the async dispatch policy caused runtime issues as of [email protected], hasn't been checked in rocm@5:
112 template< size_t BLOCK_SIZE = GEOSX_BLOCK_SIZE >
113 using parallelDeviceAsyncPolicy = parallelDevicePolicy< BLOCK_SIZE >; // RAJA::hip_exec_async< BLOCK_SIZE >;
114 
115 template< typename POLICY, typename RESOURCE, typename LAMBDA >
116 RAJA_INLINE parallelDeviceEvent forAll( RESOURCE && GEOS_UNUSED_PARAM( stream ), const localIndex end, LAMBDA && body )
117 {
118  RAJA::forall< POLICY >( RAJA::TypedRangeSegment< localIndex >( 0, end ), std::forward< LAMBDA >( body ) );
119  return parallelDeviceEvent();
120 }
121 #else
122 
123 auto const parallelDeviceMemorySpace = parallelHostMemorySpace;
124 
125 template< size_t BLOCK_SIZE = 0 >
126 using parallelDevicePolicy = parallelHostPolicy;
127 
128 template< size_t BLOCK_SIZE = 0 >
129 using parallelDeviceAsyncPolicy = parallelHostPolicy;
130 
131 using parallelDeviceStream = parallelHostStream;
132 using parallelDeviceEvent = parallelHostEvent;
133 
134 using parallelDeviceReduce = parallelHostReduce;
135 using parallelDeviceAtomic = parallelHostAtomic;
136 
137 void RAJA_INLINE parallelDeviceSync() { parallelHostSync( ); }
138 
139 template< typename POLICY, typename RESOURCE, typename LAMBDA >
140 RAJA_INLINE parallelDeviceEvent forAll( RESOURCE && GEOS_UNUSED_PARAM( stream ), const localIndex end, LAMBDA && body )
141 {
142  RAJA::forall< POLICY >( RAJA::TypedRangeSegment< localIndex >( 0, end ), std::forward< LAMBDA >( body ) );
143  return parallelDeviceEvent();
144 }
145 
146 #endif
147 
148 using parallelDeviceEvents = std::vector< parallelDeviceEvent >;
149 
150 namespace internalRajaInterface
151 {
152 template< typename >
153 struct PolicyMap
154 {};
155 
156 template<>
157 struct PolicyMap< serialPolicy >
158 {
159  using atomic = serialAtomic;
160  using reduce = serialReduce;
161 };
162 
163 #if defined(GEOSX_USE_OPENMP)
164 template<>
165 struct PolicyMap< RAJA::omp_parallel_for_exec >
166 {
167  using atomic = RAJA::builtin_atomic;
168  using reduce = RAJA::omp_reduce;
169 };
170 #endif
171 
172 #if defined(GEOS_USE_CUDA)
173 template< typename X, typename Y, size_t BLOCK_SIZE, bool ASYNC >
174 struct PolicyMap< RAJA::policy::cuda::cuda_exec_explicit< X, Y, BLOCK_SIZE, ASYNC > >
175 {
176  using atomic = RAJA::cuda_atomic;
177  using reduce = RAJA::cuda_reduce;
178 };
179 #endif
180 
181 #if defined(GEOS_USE_HIP)
182 template< size_t BLOCK_SIZE, bool ASYNC >
183 struct PolicyMap< RAJA::hip_exec< BLOCK_SIZE, ASYNC > >
184 {
185  using atomic = RAJA::hip_atomic;
186  using reduce = RAJA::hip_reduce;
187 };
188 #endif
189 }
190 
191 
192 template< typename POLICY >
193 using ReducePolicy = typename internalRajaInterface::PolicyMap< POLICY >::reduce;
194 
195 template< typename POLICY >
196 using AtomicPolicy = typename internalRajaInterface::PolicyMap< POLICY >::atomic;
197 
198 
199 RAJA_INLINE bool testAllDeviceEvents( parallelDeviceEvents & events )
200 {
201  bool allDone = true;
202  for( auto & event : events )
203  {
204  if( !event.check() )
205  {
206  allDone = false;
207  break;
208  }
209  }
210  return allDone;
211 }
212 
213 RAJA_INLINE void waitAllDeviceEvents( parallelDeviceEvents & events )
214 {
215  // poll device events for completion then wait 10 nanoseconds 6,000,000,000 times (60 sec timeout)
216  // 10 nsecs ~= 30 clock cycles @ 3Ghz
217  GEOS_ASYNC_WAIT( 6000000000, 10, testAllDeviceEvents( events ) );
218 }
219 
220 template< typename POLICY, typename INDEX, typename LAMBDA >
221 RAJA_INLINE void forAll( INDEX const end, LAMBDA && body )
222 {
223  RAJA::forall< POLICY >( RAJA::TypedRangeSegment< INDEX >( 0, end ), std::forward< LAMBDA >( body ) );
224 }
225 
226 template< typename POLICY, typename INDEX, typename LAMBDA >
227 RAJA_INLINE void forRange( INDEX const begin, INDEX const end, LAMBDA && body )
228 {
229  RAJA::forall< POLICY >( RAJA::TypedRangeSegment< INDEX >( begin, end ), std::forward< LAMBDA >( body ) );
230 }
231 
232 } // namespace geos
233 
234 #endif // GEOS_RAJAINTERFACE_RAJAINTERFACE_HPP
#define GEOS_UNUSED_PARAM(X)
Mark an unused argument and silence compiler warnings.
Definition: GeosxMacros.hpp:71
GEOSX_LOCALINDEX_TYPE localIndex
Local index type (for indexing objects within an MPI partition).
Definition: DataTypes.hpp:125