18#ifndef MILUPHPC_CUDAUTILITIES_CUH
19#define MILUPHPC_CUDAUTILITIES_CUH
24#include "../parameter.h"
28#define CUDA_CALLABLE_MEMBER __host__ __device__
30#define CUDA_CALLABLE_MEMBER
36#define safeCudaCall(call) checkCudaCall(call, #call, __FILE__, __LINE__)
41#define gpuErrorcheck(ans) { gpuAssert((ans), __FILE__, __LINE__); }
48#define cudaAssert(...)
49#elif SAFETY_LEVEL == 1
50#define cudaAssert(...) { \
51 printf(__VA_ARGS__); \
53#elif SAFETY_LEVEL == 2
54#define cudaAssert(...) { \
55 printf(__VA_ARGS__); \
58#elif SAFETY_LEVEL == 3
59#define cudaAssert(...) { \
60 printf(__VA_ARGS__); \
64#define cudaAssert(...)
70#define cudaTerminate(...) { \
71 printf(__VA_ARGS__); \
75#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600
77__device__
double atomicAdd(
double* address,
double val);
88void gpuAssert(cudaError_t code,
const char *file,
int line,
bool abort=
true);
98void checkCudaCall(cudaError_t command,
const char * commandName,
const char * fileName,
int line);
116 template<
typename T,
typename U>
117 __global__
void findDuplicates(T *array, U *entry1, U *entry2,
integer *duplicateCounter,
int length);
125 template<
typename T,
typename U>
145 template<
typename T,
typename U>
154 template<
typename T,
typename U>
CUDA runtime functionalities and wrappers.
void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
Check CUDA error codes.
void checkCudaCall(cudaError_t command, const char *commandName, const char *fileName, int line)
Check CUDA call.
real findDuplicateEntries(T *array1, T *array2, integer *duplicateCounter, int length)
real markDuplicates(T *array, integer *duplicateCounter, int length)
real findDuplicates(T *array, integer *duplicateCounter, int length)
real collectValues(integer *indices, real *entries, real *collector, integer count)
real removeDuplicates(T *array, T *removedArray, integer *duplicateCounter, int length)
real checkValues(integer *indices, real *entry1, real *entry2, real *entry3, integer count)
__global__ void removeDuplicates(T *array, T *removedArray, integer *duplicateCounter, int length)
__global__ void collectValues(integer *indices, real *entries, real *collector, integer count)
__global__ void checkValues(integer *indices, real *entry1, real *entry2, real *entry3, integer count)
__global__ void findDuplicateEntries(T *array1, T *array2, integer *duplicateCounter, int length)
__global__ void findDuplicates(T *array, integer *duplicateCounter, int length)
__global__ void markDuplicates(T *array, integer *duplicateCounter, int length)
__device__ real min(real a, real b)
Minimum value out of two floating point values.
__device__ real rsqrt(real a)
Inverse square root of a floating point value.
__device__ real sqrt(real a)
Square root of a floating point value.
__device__ real abs(real a)
Absolute value of a floating point value.
__device__ real max(real a, real b)
Maximum value out of two floating point values.