milupHPC documentation
  • src
helper.cu
Go to the documentation of this file.
1#include "../include/helper.cuh"
2#include "../include/cuda_utils/cuda_launcher.cuh"
3#include <cub/cub.cuh>
4
5CUDA_CALLABLE_MEMBER Helper::Helper() {
6
7}
8
9/*CUDA_CALLABLE_MEMBER Helper::Helper(integer *integerVal, real *realVal, keyType *keyTypeVal, integer *integerBuffer,
10 real *realBuffer, keyType *keyTypeBuffer) : integerVal(integerVal),
11 realVal(realVal), keyTypeVal(keyTypeVal), integerBuffer(integerBuffer),
12 realBuffer(realBuffer) , keyTypeBuffer(keyTypeBuffer) {
13
14}*/
15
16CUDA_CALLABLE_MEMBER Helper::Helper(integer *integerVal, integer *integerVal1, integer *integerVal2,
17 real *realVal, real *realVal1, real *realVal2, keyType *keyTypeVal,
18 integer *integerBuffer, integer *integerBuffer1, integer *integerBuffer2,
19 integer *integerBuffer3, integer *integerBuffer4,
20 integer *sendCount, integer *sendCount1, idInteger *idIntegerBuffer,
21 idInteger *idIntegerBuffer1, real *realBuffer, real *realBuffer1,
22 keyType *keyTypeBuffer, keyType *keyTypeBuffer1, keyType *keyTypeBuffer2) :
23 integerVal(integerVal), integerVal1(integerVal1), integerVal2(integerVal2),
24 realVal(realVal), realVal1(realVal1), realVal2(realVal2), keyTypeVal(keyTypeVal),
25 integerBuffer(integerBuffer), integerBuffer1(integerBuffer1), integerBuffer2(integerBuffer2),
26 integerBuffer3(integerBuffer3), integerBuffer4(integerBuffer4),
27 sendCount(sendCount), sendCount1(sendCount1), idIntegerBuffer(idIntegerBuffer),
28 idIntegerBuffer1(idIntegerBuffer1), realBuffer(realBuffer), realBuffer1(realBuffer1),
29 keyTypeBuffer(keyTypeBuffer), keyTypeBuffer1(keyTypeBuffer1), keyTypeBuffer2(keyTypeBuffer2) {
30
31}
32
33CUDA_CALLABLE_MEMBER Helper::~Helper() {
34
35}
36
37/*CUDA_CALLABLE_MEMBER void Helper::set(integer *integerVal, real *realVal, keyType *keyTypeVal, integer *integerBuffer,
38 real *realBuffer, keyType *keyTypeBuffer) {
39 this->integerVal = integerVal;
40 this->realVal = realVal;
41 this->keyTypeVal = keyTypeVal;
42 this->integerBuffer = integerBuffer;
43 this->realBuffer = realBuffer;
44 this->keyTypeBuffer = keyTypeBuffer;
45}*/
46
47CUDA_CALLABLE_MEMBER void Helper::set(integer *integerVal, integer *integerVal1, integer *integerVal2,
48 real *realVal, real *realVal1, real *realVal2, keyType *keyTypeVal,
49 integer *integerBuffer, integer *integerBuffer1, integer *integerBuffer2,
50 integer *integerBuffer3, integer *integerBuffer4,
51 integer *sendCount, integer *sendCount1, idInteger *idIntegerBuffer,
52 idInteger *idIntegerBuffer1, real *realBuffer, real *realBuffer1,
53 keyType *keyTypeBuffer, keyType *keyTypeBuffer1, keyType *keyTypeBuffer2) {
54
55 this->integerVal = integerVal;
56 this->integerVal1 = integerVal1;
57 this->integerVal2 = integerVal2;
58 this->realVal = realVal;
59 this->realVal1 = realVal1;
60 this->realVal2 = realVal2;
61 this->keyTypeVal = keyTypeVal;
62 this->integerBuffer = integerBuffer;
63 this->integerBuffer1 = integerBuffer1;
64 this->integerBuffer2 = integerBuffer2;
65 this->integerBuffer3 = integerBuffer3;
66 this->integerBuffer4 = integerBuffer4;
67 this->sendCount = sendCount;
68 this->sendCount1 = sendCount1;
69 this->idIntegerBuffer = idIntegerBuffer;
70 this->idIntegerBuffer1 = idIntegerBuffer1;
71 this->realBuffer = realBuffer;
72 this->realBuffer1 = realBuffer1;
73 this->keyTypeBuffer = keyTypeBuffer;
74 this->keyTypeBuffer1 = keyTypeBuffer1;
75 this->keyTypeBuffer2 = keyTypeBuffer2;
76
77}
78
79namespace HelperNS {
80
81 namespace Kernel {
82 /*__global__ void set(Helper *helper, integer *integerVal, real *realVal, keyType *keyTypeVal,
83 integer *integerBuffer, real *realBuffer, keyType *keyTypeBuffer) {
84 helper->set(integerVal, realVal, keyTypeVal, integerBuffer, realBuffer, keyTypeBuffer);
85 }
86
87 void Launch::set(Helper *helper, integer *integerVal, real *realVal, keyType *keyTypeVal,
88 integer *integerBuffer, real *realBuffer, keyType *keyTypeBuffer) {
89 ExecutionPolicy executionPolicy(1, 1);
90 cuda::launch(false, executionPolicy, ::HelperNS::Kernel::set, helper, integerVal, realVal, keyTypeVal,
91 integerBuffer, realBuffer, keyTypeBuffer);
92
93 }*/
94
95 __global__ void set(Helper *helper, integer *integerVal, integer *integerVal1, integer *integerVal2,
96 real *realVal, real *realVal1, real *realVal2, keyType *keyTypeVal,
97 integer *integerBuffer, integer *integerBuffer1, integer *integerBuffer2,
98 integer *integerBuffer3, integer *integerBuffer4,
99 integer *sendCount, integer *sendCount1, idInteger *idIntegerBuffer,
100 idInteger *idIntegerBuffer1, real *realBuffer, real *realBuffer1,
101 keyType *keyTypeBuffer, keyType *keyTypeBuffer1, keyType *keyTypeBuffer2) {
102
103 helper->set(integerVal, integerVal1, integerVal2,
104 realVal, realVal1, realVal2, keyTypeVal,
105 integerBuffer, integerBuffer1, integerBuffer2,
106 integerBuffer3, integerBuffer4,
107 sendCount, sendCount1, idIntegerBuffer,
108 idIntegerBuffer1, realBuffer, realBuffer1,
109 keyTypeBuffer, keyTypeBuffer1, keyTypeBuffer2);
110 }
111
112 void Launch::set(Helper *helper, integer *integerVal, integer *integerVal1, integer *integerVal2,
113 real *realVal, real *realVal1, real *realVal2, keyType *keyTypeVal,
114 integer *integerBuffer, integer *integerBuffer1, integer *integerBuffer2,
115 integer *integerBuffer3, integer *integerBuffer4,
116 integer *sendCount, integer *sendCount1, idInteger *idIntegerBuffer,
117 idInteger *idIntegerBuffer1, real *realBuffer, real *realBuffer1,
118 keyType *keyTypeBuffer, keyType *keyTypeBuffer1, keyType *keyTypeBuffer2) {
119
120 ExecutionPolicy executionPolicy(1, 1);
121 cuda::launch(false, executionPolicy, ::HelperNS::Kernel::set, helper, integerVal, integerVal1, integerVal2,
122 realVal, realVal1, realVal2, keyTypeVal,
123 integerBuffer, integerBuffer1, integerBuffer2,
124 integerBuffer3, integerBuffer4,
125 sendCount, sendCount1, idIntegerBuffer,
126 idIntegerBuffer1, realBuffer, realBuffer1,
127 keyTypeBuffer, keyTypeBuffer1, keyTypeBuffer2);
128 }
129 }
130}
131
132namespace HelperNS {
133
134 template <typename A>
135 real sortKeys(A *keysToSort, A *sortedKeys, int n) {
136 void *d_temp_storage = NULL;
137 size_t temp_storage_bytes = 0;
138 gpuErrorcheck(cub::DeviceRadixSort::SortKeys(d_temp_storage, temp_storage_bytes, keysToSort, sortedKeys, n));
139 // Allocate temporary storage
140 //Logger(INFO) << "temp storage bytes: " << temp_storage_bytes;
141 cuda::malloc(d_temp_storage, temp_storage_bytes);
142 //cudaMalloc(&d_temp_storage, temp_storage_bytes);
143 // Run sorting operation
144 gpuErrorcheck(cub::DeviceRadixSort::SortKeys(d_temp_storage, temp_storage_bytes, keysToSort, sortedKeys, n));
145 cuda::free(d_temp_storage);
146 return 0.f;
147 }
148 template real sortKeys<keyType>(keyType *keysToSort, keyType *sortedKeys, int n);
149
150 template <typename A, typename B>
151 real sortArray(A *arrayToSort, A *sortedArray, B *keyIn, B *keyOut, integer n) {
152
153 void *d_temp_storage = NULL;
154 size_t temp_storage_bytes = 0;
155 gpuErrorcheck(cub::DeviceRadixSort::SortPairs(d_temp_storage, temp_storage_bytes,
156 keyIn, keyOut, arrayToSort, sortedArray, n));
157 // Allocate temporary storage
158 cuda::malloc(d_temp_storage, temp_storage_bytes);
159
160 // Run sorting operation
161 gpuErrorcheck(cub::DeviceRadixSort::SortPairs(d_temp_storage, temp_storage_bytes,
162 keyIn, keyOut, arrayToSort, sortedArray, n));
163
164 cuda::free(d_temp_storage);
165
166 return 0.f;
167 }
168
169 template real sortArray<real, integer>(real *arrayToSort, real *sortedArray, integer *keyIn, integer *keyOut,
170 integer n);
171 template real sortArray<real, keyType>(real *arrayToSort, real *sortedArray, keyType *keyIn, keyType *keyOut,
172 integer n);
173 template real sortArray<integer, integer>(integer *arrayToSort, integer *sortedArray, integer *keyIn,
174 integer *keyOut, integer n);
175 template real sortArray<integer, keyType>(integer *arrayToSort, integer *sortedArray, keyType *keyIn,
176 keyType *keyOut, integer n);
177 template real sortArray<keyType, integer>(keyType *arrayToSort, keyType *sortedArray, integer *keyIn,
178 integer *keyOut, integer n);
179 template real sortArray<keyType , keyType>(keyType *arrayToSort, keyType *sortedArray, keyType *keyIn,
180 keyType *keyOut, integer n);
181
182
183 template <typename T>
184 T reduceAndGlobalize(T *d_sml, T *d_aggregate, integer n, Reduction::Type reductionType) {
185
186 // device wide reduction
187 void *d_temp_storage = NULL;
188 size_t temp_storage_bytes = 0;
189 switch (reductionType) {
190 case Reduction::min: {
191 cub::DeviceReduce::Min(d_temp_storage, temp_storage_bytes, d_sml, d_aggregate, n);
192 // Allocate temporary storage
193 cuda::malloc(d_temp_storage, temp_storage_bytes);
194 // Run max-reduction
195 cub::DeviceReduce::Min(d_temp_storage, temp_storage_bytes, d_sml, d_aggregate, n);
196 } break;
197 case Reduction::max: {
198 cub::DeviceReduce::Max(d_temp_storage, temp_storage_bytes, d_sml, d_aggregate, n);
199 // Allocate temporary storage
200 cuda::malloc(d_temp_storage, temp_storage_bytes);
201 // Run max-reduction
202 cub::DeviceReduce::Max(d_temp_storage, temp_storage_bytes, d_sml, d_aggregate, n);
203 } break;
204 case Reduction::sum: {
205 cub::DeviceReduce::Sum(d_temp_storage, temp_storage_bytes, d_sml, d_aggregate, n);
206 // Allocate temporary storage
207 cuda::malloc(d_temp_storage, temp_storage_bytes);
208 // Run max-reduction
209 cub::DeviceReduce::Sum(d_temp_storage, temp_storage_bytes, d_sml, d_aggregate, n);
210 } break;
211 default: {
212 Logger(ERROR) << "Reduction type not available!";
213 }
214 }
215
216 T reduction;
217 gpuErrorcheck(cudaMemcpy(&reduction, d_aggregate, sizeof(T), cudaMemcpyDeviceToHost));
218 Logger(INFO) << "reduction = " << reduction;
219
220 switch (reductionType) {
221 case Reduction::min: {
222 // interprocess reduction
223 boost::mpi::communicator comm;
224 all_reduce(comm, boost::mpi::inplace_t<T *>(&reduction), 1, boost::mpi::minimum<T>());
225 } break;
226 case Reduction::max: {
227 // interprocess reduction
228 boost::mpi::communicator comm;
229 all_reduce(comm, boost::mpi::inplace_t<T *>(&reduction), 1, boost::mpi::maximum<T>());
230 } break;
231 case Reduction::sum: {
232 // interprocess reduction
233 boost::mpi::communicator comm;
234 all_reduce(comm, boost::mpi::inplace_t<T *>(&reduction), 1, std::plus<T>());
235 } break;
236 default: {
237 Logger(ERROR) << "Reduction type not available!";
238 }
239 }
240 Logger(INFO) << "globalized reduction = " << reduction;
241
242 cuda::free(d_temp_storage);
243
244 return reduction;
245
246 }
247
248 template real reduceAndGlobalize<real>(real*, real*, integer, Reduction::Type);
249
250
251 namespace Kernel {
252
253 template <typename T>
254 __global__ void copyArray(T *targetArray, T *sourceArray, integer n) {
255
256 int index = threadIdx.x + blockIdx.x * blockDim.x;
257 int stride = blockDim.x * gridDim.x;
258 int offset = 0;
259
260 while ((index + offset) < n) {
261 targetArray[index + offset] = sourceArray[index + offset];
262
263 offset += stride;
264 }
265 }
266
267 template <typename T>
268 __global__ void resetArray(T *array, T value, integer n) {
269
270 int index = threadIdx.x + blockIdx.x * blockDim.x;
271 int stride = blockDim.x * gridDim.x;
272 int offset = 0;
273
274 while ((index + offset) < n) {
275 array[index + offset] = value;
276
277 offset += stride;
278 }
279 }
280
281 namespace Launch {
282
283 template<typename T>
284 real copyArray(T *targetArray, T *sourceArray, integer n) {
285 ExecutionPolicy executionPolicy;
286 return cuda::launch(true, executionPolicy, ::HelperNS::Kernel::copyArray, targetArray, sourceArray, n);
287 }
288 template real copyArray<integer>(integer *targetArray, integer *sourceArray, integer n);
289 template real copyArray<real>(real *targetArray, real *sourceArray, integer n);
290 template real copyArray<keyType>(keyType *targetArray, keyType *sourceArray, integer n);
291
292 template <typename T>
293 real resetArray(T *array, T value, integer n) {
294 ExecutionPolicy executionPolicy;
295 return cuda::launch(true, executionPolicy, ::HelperNS::Kernel::resetArray, array, value, n);
296 }
297 template real resetArray<integer>(integer *array, integer value, integer n);
298 //template real resetArray<idInteger>(idInteger *array, idInteger value, integer n);
299 template real resetArray<real>(real *array, real value, integer n);
300 template real resetArray<keyType>(keyType *array, keyType value, integer n);
301
302 }
303 /*__global__ void reset(Helper *helper, int length) {
304
305 integer index = threadIdx.x + blockIdx.x * blockDim.x;
306 integer stride = blockDim.x * gridDim.x;
307 integer offset = 0;
308
309 while ((index + offset) < length) {
310 helper->
311 }
312 }*/
313 }
314
315}
ExecutionPolicy
Execution policy/instruction for CUDA kernel execution.
Definition: cuda_launcher.cuh:33
Helper
Definition: helper.cuh:24
Helper::~Helper
CUDA_CALLABLE_MEMBER ~Helper()
Definition: helper.cu:33
Helper::set
CUDA_CALLABLE_MEMBER void set(integer *integerVal, integer *integerVal1, integer *integerVal2, real *realVal, real *realVal1, real *realVal2, keyType *keyTypeVal, integer *integerBuffer, integer *integerBuffer1, integer *integerBuffer2, integer *integerBuffer3, integer *integerBuffer4, integer *sendCount, integer *sendCount1, idInteger *idIntegerBuffer, idInteger *idIntegerBuffer1, real *realBuffer, real *realBuffer1, keyType *keyTypeBuffer, keyType *keyTypeBuffer1, keyType *keyTypeBuffer2)
Definition: helper.cu:47
Helper::integerBuffer3
integer * integerBuffer3
Definition: helper.cuh:58
Helper::realVal1
real * realVal1
Definition: helper.cuh:50
Helper::idIntegerBuffer1
idInteger * idIntegerBuffer1
Definition: helper.cuh:65
Helper::realVal2
real * realVal2
Definition: helper.cuh:51
Helper::keyTypeBuffer1
keyType * keyTypeBuffer1
Definition: helper.cuh:71
Helper::keyTypeBuffer2
keyType * keyTypeBuffer2
Definition: helper.cuh:72
Helper::integerBuffer4
integer * integerBuffer4
Definition: helper.cuh:59
Helper::sendCount
integer * sendCount
Definition: helper.cuh:61
Helper::sendCount1
integer * sendCount1
Definition: helper.cuh:62
Helper::integerVal1
integer * integerVal1
Definition: helper.cuh:46
Helper::integerBuffer
integer * integerBuffer
Definition: helper.cuh:55
Helper::realBuffer1
real * realBuffer1
Definition: helper.cuh:68
Helper::realVal
real * realVal
Definition: helper.cuh:49
Helper::integerVal
integer * integerVal
Definition: helper.cuh:45
Helper::keyTypeVal
keyType * keyTypeVal
Definition: helper.cuh:53
Helper::integerBuffer1
integer * integerBuffer1
Definition: helper.cuh:56
Helper::integerBuffer2
integer * integerBuffer2
Definition: helper.cuh:57
Helper::keyTypeBuffer
keyType * keyTypeBuffer
Definition: helper.cuh:70
Helper::integerVal2
integer * integerVal2
Definition: helper.cuh:47
Helper::idIntegerBuffer
idInteger * idIntegerBuffer
Definition: helper.cuh:64
Helper::realBuffer
real * realBuffer
Definition: helper.cuh:67
Helper::Helper
CUDA_CALLABLE_MEMBER Helper()
Definition: helper.cu:5
Logger
Logger class.
Definition: logger.h:80
gpuErrorcheck
#define gpuErrorcheck(ans)
check CUDA call
Definition: cuda_utilities.cuh:41
CUDA_CALLABLE_MEMBER
#define CUDA_CALLABLE_MEMBER
Definition: cuda_utilities.cuh:30
ERROR
@ ERROR
warning log type
Definition: logger.h:51
INFO
@ INFO
debug log type
Definition: logger.h:48
HelperNS::Kernel::Launch::resetArray< real >
template real resetArray< real >(real *array, real value, integer n)
HelperNS::Kernel::Launch::copyArray< integer >
template real copyArray< integer >(integer *targetArray, integer *sourceArray, integer n)
HelperNS::Kernel::Launch::copyArray< real >
template real copyArray< real >(real *targetArray, real *sourceArray, integer n)
HelperNS::Kernel::Launch::copyArray< keyType >
template real copyArray< keyType >(keyType *targetArray, keyType *sourceArray, integer n)
HelperNS::Kernel::Launch::resetArray< integer >
template real resetArray< integer >(integer *array, integer value, integer n)
HelperNS::Kernel::Launch::resetArray< keyType >
template real resetArray< keyType >(keyType *array, keyType value, integer n)
HelperNS::Kernel::Launch::set
void set(Helper *helper, integer *integerVal, integer *integerVal1, integer *integerVal2, real *realVal, real *realVal1, real *realVal2, keyType *keyTypeVal, integer *integerBuffer, integer *integerBuffer1, integer *integerBuffer2, integer *integerBuffer3, integer *integerBuffer4, integer *sendCount, integer *sendCount1, idInteger *idIntegerBuffer, idInteger *idIntegerBuffer1, real *realBuffer, real *realBuffer1, keyType *keyTypeBuffer, keyType *keyTypeBuffer1, keyType *keyTypeBuffer2)
Definition: helper.cu:112
HelperNS::Kernel::resetArray
__global__ void resetArray(T *array, T value, integer n)
Definition: helper.cu:268
HelperNS::Kernel::set
__global__ void set(Helper *helper, integer *integerVal, integer *integerVal1, integer *integerVal2, real *realVal, real *realVal1, real *realVal2, keyType *keyTypeVal, integer *integerBuffer, integer *integerBuffer1, integer *integerBuffer2, integer *integerBuffer3, integer *integerBuffer4, integer *sendCount, integer *sendCount1, idInteger *idIntegerBuffer, idInteger *idIntegerBuffer1, real *realBuffer, real *realBuffer1, keyType *keyTypeBuffer, keyType *keyTypeBuffer1, keyType *keyTypeBuffer2)
Definition: helper.cu:95
HelperNS::Kernel::copyArray
__global__ void copyArray(T *targetArray, T *sourceArray, integer n)
Definition: helper.cu:254
HelperNS
Definition: helper.cuh:95
HelperNS::sortArray< real, integer >
template real sortArray< real, integer >(real *arrayToSort, real *sortedArray, integer *keyIn, integer *keyOut, integer n)
HelperNS::reduceAndGlobalize< real >
template real reduceAndGlobalize< real >(real *, real *, integer, Reduction::Type)
HelperNS::sortArray< integer, integer >
template real sortArray< integer, integer >(integer *arrayToSort, integer *sortedArray, integer *keyIn, integer *keyOut, integer n)
HelperNS::sortArray< keyType, integer >
template real sortArray< keyType, integer >(keyType *arrayToSort, keyType *sortedArray, integer *keyIn, integer *keyOut, integer n)
HelperNS::sortKeys< keyType >
template real sortKeys< keyType >(keyType *keysToSort, keyType *sortedKeys, int n)
HelperNS::sortArray
real sortArray(A *arrayToSort, A *sortedArray, B *keyIn, B *keyOut, integer n)
Definition: helper.cu:151
HelperNS::sortArray< real, keyType >
template real sortArray< real, keyType >(real *arrayToSort, real *sortedArray, keyType *keyIn, keyType *keyOut, integer n)
HelperNS::sortKeys
real sortKeys(A *keysToSort, A *sortedKeys, int n)
Definition: helper.cu:135
HelperNS::sortArray< integer, keyType >
template real sortArray< integer, keyType >(integer *arrayToSort, integer *sortedArray, keyType *keyIn, keyType *keyOut, integer n)
HelperNS::reduceAndGlobalize
T reduceAndGlobalize(T *d_sml, T *d_aggregate, integer n, Reduction::Type reductionType)
Definition: helper.cu:184
HelperNS::sortArray< keyType, keyType >
template real sortArray< keyType, keyType >(keyType *arrayToSort, keyType *sortedArray, keyType *keyIn, keyType *keyOut, integer n)
Kernel
Definition: device_rhs.cuh:7
cuda::set
void set(T *d_var, T val, std::size_t count=1)
Set device memory to a specific value.
Definition: cuda_runtime.h:56
cuda::free
void free(T *d_var)
Free device memory.
Definition: cuda_runtime.h:81
cuda::malloc
void malloc(T *&d_var, std::size_t count)
Allocate device memory.
Definition: cuda_runtime.h:70
cuda::launch
real launch(bool timeKernel, const ExecutionPolicy &policy, void(*f)(Arguments...), Arguments... args)
CUDA execution wrapper function.
Definition: cuda_launcher.cuh:114
real
double real
Definition: parameter.h:15
keyType
unsigned long keyType
Definition: parameter.h:18
integer
int integer
Definition: parameter.h:17
idInteger
int idInteger
Definition: parameter.h:19
Reduction::Type
Type
Definition: helper.cuh:13
Reduction::sum
@ sum
Definition: helper.cuh:14
Reduction::min
@ min
Definition: helper.cuh:14
Reduction::max
@ max
Definition: helper.cuh:14

milupHPC - src/helper.cu Source File
Generated on Wed Aug 31 2022 12:16:52 by Doxygen 1.9.3