milupHPC documentation
  • include
  • cuda_utils
cuda_launcher.cuh
Go to the documentation of this file.
1
18#ifndef MILUPHPC_CUDALAUNCHER_CUH
19#define MILUPHPC_CUDALAUNCHER_CUH
20
21#include "../parameter.h"
22#include "cuda_utilities.cuh"
23
24#include <iostream>
25#include <cuda.h>
26#include <cuda_runtime.h>
27#include <cuda_occupancy.h>
28
29
33class ExecutionPolicy {
34
35public:
36 //const dim3 gridSize;
37 //const dim3 blockSize;
38 //const size_t sharedMemBytes;
40 dim3 gridSize;
42 dim3 blockSize;
44 size_t sharedMemBytes;
45 //const blockSizeInt = ;
46
47 ExecutionPolicy();
48
61 template <typename... Arguments>
62 ExecutionPolicy::ExecutionPolicy(int n, void(*f)(Arguments...), Arguments ...args) : sharedMemBytes(0) {
63 int _blockSize;
64 int minGridSize;
65 int _gridSize;
66 cudaOccupancyMaxPotentialBlockSize(&minGridSize, &_blockSize, *f, 0, 0);
67 //cudaOccMaxPotentialOccupancyBlockSize(&minGridSize, &_blockSize, *f, 0, 0);
68 // not really beneficial
69 //cudaDeviceProp deviceProp;
70 //cudaGetDeviceProperties(&deviceProp, 0); // 0-th device
71 //int numBlocks;
72 //cudaOccupancyMaxActiveBlocksPerMultiprocessor ( &numBlocks, *f, _blockSize, 0 );
73 //std::cout << "potential gridSize: " << deviceProp.multiProcessorCount * numBlocks << std::endl;
74 //std::cout << deviceProp.multiProcessorCount;
75 //end: not really beneficial
76 //_blockSize = _blockSize - (_blockSize % 32);
77 _gridSize = (n + _blockSize - 1) / _blockSize; //(n/_blockSize) - ((n/_blockSize) % 32); //_blockSize - (_blockSize % 32); //deviceProp.multiProcessorCount * numBlocks; //(n + _blockSize - 1) / _blockSize; // dim3 gridDim(# of SMs in the device * maxActiveBlocks); ?
78 blockSize = dim3(_blockSize);
79 gridSize = dim3(_gridSize);
80 printf("blockSize: %i, gridSize: %i\n", _gridSize, _blockSize);
81 }
82
90 ExecutionPolicy(dim3 _gridSize, dim3 _blockSize, size_t _sharedMemBytes);
91
98 ExecutionPolicy(dim3 _gridSize, dim3 _blockSize);
99};
100
101namespace cuda {
102
113 template<typename... Arguments>
114 real launch(bool timeKernel, const ExecutionPolicy &policy,
115 void (*f)(Arguments...),
116 Arguments... args) {
117 float elapsedTime = 0.f;
118 ExecutionPolicy p = policy;
119 //checkCuda(configureGrid(p, f));
120 if (timeKernel) {
121 cudaEvent_t start_t, stop_t;
122 cudaEventCreate(&start_t);
123 cudaEventCreate(&stop_t);
124 cudaEventRecord(start_t, 0);
125
126 f<<<p.gridSize, p.blockSize, p.sharedMemBytes>>>(args...);
127
128 cudaEventRecord(stop_t, 0);
129 cudaEventSynchronize(stop_t);
130 cudaEventElapsedTime(&elapsedTime, start_t, stop_t);
131 cudaEventDestroy(start_t);
132 cudaEventDestroy(stop_t);
133 } else {
134 f<<<p.gridSize, p.blockSize, p.sharedMemBytes>>>(args...);
135 }
136
137 gpuErrorcheck( cudaPeekAtLastError() );
138 gpuErrorcheck( cudaDeviceSynchronize() );
139
140 return elapsedTime;
141 }
142
152 template<typename... Arguments>
153 real launch(bool timeKernel, void(*f)(Arguments... args), Arguments... args) {
154 cudaLaunch(ExecutionPolicy(), f, args...);
155 }
156
157}
158
159//#elseif
160
161#endif //MILUPHPC_CUDALAUNCHER_CUH
ExecutionPolicy
Execution policy/instruction for CUDA kernel execution.
Definition: cuda_launcher.cuh:33
ExecutionPolicy::gridSize
dim3 gridSize
grid size
Definition: cuda_launcher.cuh:40
ExecutionPolicy::sharedMemBytes
size_t sharedMemBytes
shared memory (bytes)
Definition: cuda_launcher.cuh:44
ExecutionPolicy::ExecutionPolicy
ExecutionPolicy()
Definition: cuda_launcher.cu:4
ExecutionPolicy::blockSize
dim3 blockSize
block size
Definition: cuda_launcher.cuh:42
cuda_runtime.h
CUDA runtime functionalities and wrappers.
cuda_utilities.cuh
CUDA utilities.
gpuErrorcheck
#define gpuErrorcheck(ans)
check CUDA call
Definition: cuda_utilities.cuh:41
cuda
Definition: cuda_launcher.cuh:101
cuda::launch
real launch(bool timeKernel, const ExecutionPolicy &policy, void(*f)(Arguments...), Arguments... args)
CUDA execution wrapper function.
Definition: cuda_launcher.cuh:114
real
double real
Definition: parameter.h:15

milupHPC - include/cuda_utils/cuda_launcher.cuh Source File
Generated on Wed Aug 31 2022 12:16:52 by Doxygen 1.9.3