18#ifndef MILUPHPC_CUDALAUNCHER_CUH
19#define MILUPHPC_CUDALAUNCHER_CUH
21#include "../parameter.h"
27#include <cuda_occupancy.h>
61 template <
typename... Arguments>
66 cudaOccupancyMaxPotentialBlockSize(&minGridSize, &_blockSize, *f, 0, 0);
77 _gridSize = (n + _blockSize - 1) / _blockSize;
80 printf(
"blockSize: %i, gridSize: %i\n", _gridSize, _blockSize);
90 ExecutionPolicy(dim3 _gridSize, dim3 _blockSize,
size_t _sharedMemBytes);
113 template<
typename... Arguments>
115 void (*f)(Arguments...),
117 float elapsedTime = 0.f;
121 cudaEvent_t start_t, stop_t;
122 cudaEventCreate(&start_t);
123 cudaEventCreate(&stop_t);
124 cudaEventRecord(start_t, 0);
128 cudaEventRecord(stop_t, 0);
129 cudaEventSynchronize(stop_t);
130 cudaEventElapsedTime(&elapsedTime, start_t, stop_t);
131 cudaEventDestroy(start_t);
132 cudaEventDestroy(stop_t);
152 template<
typename... Arguments>
153 real launch(
bool timeKernel,
void(*f)(Arguments... args), Arguments... args) {
Execution policy/instruction for CUDA kernel execution.
size_t sharedMemBytes
shared memory (bytes)
CUDA runtime functionalities and wrappers.
#define gpuErrorcheck(ans)
check CUDA call
real launch(bool timeKernel, const ExecutionPolicy &policy, void(*f)(Arguments...), Arguments... args)
CUDA execution wrapper function.