1#include "../../include/sph/kernel.cuh"
2#include "../../include/cuda_utils/cuda_launcher.cuh"
4#define MIN_NUMBER_OF_INTERACTIONS_FOR_TENSORIAL_CORRECTION_TO_WORK 0
18 for (
int d = 0; d <
DIM; d++) {
28 printf(
"Error, this kernel can only be used with DIM == 2,3\n");
33 }
else if (q >= 0.0) {
34 *W = 10./(M_PI * sml * sml) * (1-q) * (1-q) * (1-q);
35 *dWdr = -30./(M_PI * sml * sml * sml) * (1-q) * (1-q);
40 }
else if (q >= 0.0) {
41 *W = 15./(M_PI * sml * sml * sml) * (1-q) * (1-q) * (1-q);
42 *dWdr = -45/(M_PI * sml * sml * sml * sml) * (1-q) * (1-q);
45 for (
int d = 0; d <
DIM; d++) {
46 dWdx[d] = *dWdr/r * dx[d];
54 for (
int d = 0; d <
DIM; d++) {
65 f = 40./(7 * M_PI) * 1./(sml * sml);
67 f = 8./M_PI * 1./(sml * sml * sml);
74 *W = 2. * f * (1.-q) * (1.-q) * (1-q);
75 *dWdr = -6. * f * 1./sml * (1.-q) * (1.-q);
76 }
else if (q <= 0.5) {
77 *W = f * (6. * q * q * q - 6. * q * q + 1.);
78 *dWdr = 6. * f/sml * (3 * q * q - 2 * q);
80 for (
int d = 0; d <
DIM; d++) {
81 dWdx[d] = *dWdr/r * dx[d];
90 for (
int d = 0; d <
DIM; d++) {
102 *W = 5./(4. * sml) * (1-q) * (1-q) * (1-q) * (1+3*q) * (q < 1);
103 *dWdr = -15/(sml * sml) * q * (1-q) * (1-q) * (q < 1);
105 *W = 7./(M_PI * sml * sml) * (1-q) * (1-q) * (1-q) * (1-q) * (1+4 * q) * (q < 1);
106 *dWdr = -140./(M_PI * sml * sml * sml) * q * (1-q) * (1-q) * (1-q) * (q < 1);
108 *W = 21./(2 * M_PI * sml * sml * sml) * (1-q) * (1-q) * (1-q) * (1-q) * (1+4 * q) * (q < 1);
109 *dWdr = -210./(M_PI * sml * sml * sml * sml) * q * (1-q) * (1-q) * (1-q) * (q < 1);
111 for (
int d = 0; d <
DIM; d++) {
112 dWdx[d] = *dWdr/r * dx[d];
122 for (
int d = 0; d <
DIM; d++) {
135 *W = 3./(2.*sml) * (1-q) * (1-q) * (1-q) * (1-q) * (1-q) * (1+5*q+8*q*q) * (q < 1);
136 *dWdr = -21./(sml*sml) * q * (1-q) * (1-q) * (1-q) * (1-q) * (1+4*q) * (q < 1);
138 *W = 9./(M_PI*sml*sml) * (1-q) * (1-q) * (1-q) * (1-q) * (1-q) * (1-q) * (1.+6*q+35./3.*q*q) * (q < 1);
139 *dWdr = -54./(M_PI*sml*sml*sml) * (1-q)*(1-q)*(1-q)*(1-q)*(1-q) * (1.-35.*q*q+105.*q*q*q) * (q< 1);
141 *W = 495./(32.*M_PI*sml*sml*sml) * (1-q) * (1-q) * (1-q) * (1-q) * (1-q) * (1-q) * (1.+6.*q+35./3.*q*q) * (q < 1);
142 *dWdr = -1485./(16.*M_PI*sml*sml*sml*sml) * (1-q) * (1-q) * (1-q) * (1-q) * (1-q) * (1.-35.*q*q+105.*q*q*q) * (q< 1);
144 for (
int d = 0; d <
DIM; d++) {
145 dWdx[d] = *dWdr/r * dx[d];
156 for (
int d = 0; d <
DIM; d++) {
168 *W = 55./(32.*sml) * (1-q)*(1-q)*(1-q)*(1-q)*(1-q)*(1-q)*(1-q) * (1+7*q+19*q*q+21*q*q*q) * (q < 1);
169 *dWdr = -165./(16*sml*sml) * q * (1-q)*(1-q)*(1-q)*(1-q)*(1-q)*(1-q) * (3+18*q+35*q*q) * (q < 1);
171 *W = 78./(7.*M_PI*sml*sml) * (1-q)*(1-q)*(1-q)*(1-q)*(1-q)*(1-q)*(1-q)*(1-q) * (1.+8.*q+25.*q*q+32*q*q*q) * (q < 1);
173 *dWdr = -1716./(7.*M_PI*sml*sml*sml) * q * (1-q)*(1-q)*(1-q)*(1-q)*(1-q)*(1-q)*(1-q) * (1.+7*q+16*q*q) * (q < 1);
175 *W = 1365./(64.*M_PI*sml*sml*sml) * (1-q)*(1-q)*(1-q)*(1-q)*(1-q)*(1-q)*(1-q)*(1-q) * (1.+8.*q+25.*q*q+32*q*q*q) * (q < 1);
176 *dWdr = -15015./(32.*M_PI*sml*sml*sml*sml) * q * (1-q)*(1-q)*(1-q)*(1-q)*(1-q)*(1-q)*(1-q) *
177 (1.+7*q+16*q*q) * (q < 1);
179 for (
int d = 0; d <
DIM; d++) {
180 dWdx[d] = *dWdr/r * dx[d];
198 for (
int d = 0; d <
DIM; d++) {
202 dx[0] = particles->
x[p1] - particles->
x[p2];
204 dx[1] = particles->
y[p1] - particles->
y[p2];
206 dx[2] = particles->
z[p1] - particles->
z[p2];
210 hbar = 0.5 * (particles->
sml[p1] + particles->
sml[p2]);
212 kernel(&W, dWdx, &dWdr, dx, hbar);
215 for (
int d = 1; d <
DIM; d++) {
218 kernel(&W2, dWdx, &dWdr, dx, hbar);
229 int i, inc, j, k, m, d, dd;
239 inc = blockDim.x * gridDim.x;
240 for (i = threadIdx.x + blockIdx.x * blockDim.x; i <
numParticles; i += inc) {
244 k = particles->
noi[i];
246 for (m = 0; m <
DIM; m++) {
250 sml = particles->
sml[i];
252 for (m = 0; m < k; m++) {
256 sml = 0.5 *(particles->
sml[i] + particles->
sml[j]);
258 dx[0] = particles->
x[i] - particles->
x[j];
260 dx[1] = particles->
y[i] - particles->
y[j];
262 dx[2] = particles->
z[i] - particles->
z[j];
268 kernel(&W, dWdx, &dWdr, dx, particles->
sml[i]);
269 kernel(&Wj, dWdxj, &dWdrj, dx, particles->
sml[j]);
270 # if SHEPARD_CORRECTION
272 W /= particles->shepard_correction[i];
273 Wj /= particles->shepard_correction[j];
274 for (d = 0; d <
DIM; d++) {
275 dWdx[d] /= p_rhs.shepard_correction[i];
276 dWdxj[d] /= p_rhs.shepard_correction[j];
280 for (d = 0; d <
DIM; d++) {
281 dWdx[d] = 0.5 * (dWdx[d] + dWdxj[d]);
284 kernel(&W, dWdx, &dWdr, dx, sml);
285 # if SHEPARD_CORRECTION
286 W /= p_rhs.shepard_correction[i];
287 for (d = 0; d <
DIM; d++) {
288 dWdx[d] /= particles->shepard_correction[i];
293 vi[0] = particles->
vx[i];
294 vj[0] = particles->
vx[j];
296 vi[1] = particles->
vy[i];
297 vj[1] = particles->
vy[j];
299 vi[2] = particles->
vz[i];
300 vj[2] = particles->
vz[j];
304 for (d = 0; d <
DIM; d++) {
309 for (d = 0; d <
DIM; d++) {
310 #if TENSORIAL_CORRECTION
311 for (dd = 0; dd <
DIM; dd++) {
312 divv += particles->
mass[j]/particles->
rho[j] * (vj[d] - vi[d]) * particles->tensorialCorrectionMatrix[i*
DIM*
DIM+d*
DIM+dd] * dWdx[dd];
315 divv += particles->
mass[j]/particles->
rho[j] * (vj[d] - vi[d]) * dWdx[d];
320 #if (DIM == 1 && BALSARA_SWITCH)
321 #error unset BALSARA SWITCH in 1D
324 curlv[0] += particles->
mass[j]/particles->
rho[i] * ((vi[0] - vj[0]) * dWdx[1]
325 - (vi[1] - vj[1]) * dWdx[0]);
328 curlv[0] += particles->
mass[j]/particles->
rho[i] * ((vi[1] - vj[1]) * dWdx[2]
329 - (vi[2] - vj[2]) * dWdx[1]);
330 curlv[1] += particles->
mass[j]/particles->
rho[i] * ((vi[2] - vj[2]) * dWdx[0]
331 - (vi[0] - vj[0]) * dWdx[2]);
332 curlv[2] += particles->
mass[j]/particles->
rho[i] * ((vi[0] - vj[0]) * dWdx[1]
333 - (vi[1] - vj[1]) * dWdx[0]);
336 for (d = 0; d <
DIM; d++) {
338 particles->curlv[i*
DIM+d] = curlv[d];
340 particles->divv[i] = divv;
347 return cuda::launch(
true, executionPolicy, ::SPH::Kernel::CalcDivvandCurlv, kernel, particles,
359 register int i, inc, j, m;
360 register real dr[
DIM], h, dWdr;
361 inc = blockDim.x * gridDim.x;
363 for (i = threadIdx.x + blockIdx.x * blockDim.x; i <
numParticles; i += inc) {
364 real shepard_correction;
366 for (m = 0; m <
DIM; m++) {
369 kernel(&W, dWdx, &dWdr, dr, particles->
sml[i]);
370 shepard_correction = particles->
mass[i]/particles->
rho[i]*W;
372 for (m = 0; m < particles->
noi[i]; m++) {
378 dr[0] = particles->
x[i] - particles->
x[j];
380 dr[1] = particles->
y[i] - particles->
y[j];
382 dr[2] = particles->
z[i] - particles->
z[j];
387 kernel(&W, dWdx, &dWdr, dr, particles->
sml[i]);
389 kernel(&Wj, dWdx, &dWdr, dr, particles->
sml[j]);
392 h = 0.5*(particles->
sml[i] + particles->
sml[j]);
393 kernel(&W, dWdx, &dWdr, dr, h);
396 shepard_correction += particles->
mass[j]/particles->
rho[j]*W;
406#if LINEAR_CONSISTENCY
410 register int i, inc, j, k, m;
413 inc = blockDim.x * gridDim.x;
414 register real r, dr[
DIM], h, dWdr, tmp, f1, f2;
417 real wend_f, wend_sml, q, distance;
418 for (i = threadIdx.x + blockIdx.x * blockDim.x; i <
numParticles; i += inc) {
421 for (d = 0; d <
DIM*
DIM; d++) {
429 k = particles->
noi[i];
432 for (m = 0; m < k; m++) {
437 dr[0] = particles->
x[i] - particles->
x[j];
439 dr[1] = particles->
y[i] - particles->
y[j];
441 dr[2] = particles->
z[i] - particles->
z[j];
450 kernel(&W, dWdx, &dWdr, dr, particles->
sml[i]);
451 kernel(&Wj, dWdxj, &dWdr, dr, particles->
sml[j]);
452# if SHEPARD_CORRECTION
453 W /= particles->shepard_correction[i];
454 Wj /= particles->shepard_correction[j];
455 for (d = 0; d <
DIM; d++) {
456 dWdx[d] /= particles->shepard_correction[i];
457 dWdxj[d] /= particles->shepard_correction[j];
459 for (d = 0; d <
DIM; d++) {
460 dWdx[d] = 0.5 * (dWdx[d] + dWdxj[d]);
467 h = 0.5*(particles->
sml[i] + particles->
sml[j]);
468 kernel(&W, dWdx, &dWdr, dr, h);
469# if SHEPARD_CORRECTION
470 W /= particles->shepard_correction[i];
471 for (d = 0; d <
DIM; d++) {
472 dWdx[d] /= particles->shepard_correction[i];
477 for (d = 0; d <
DIM; d++) {
478 for (dd = 0; dd <
DIM; dd++) {
479 corrmatrix[d*
DIM+dd] -= particles->
mass[j]/particles->
rho[j] * dr[d] * dWdx[dd];
488 if (threadIdx.x == 0) {
489 printf(
"could not invert matrix: rv: %d and k: %d\n", rv, k);
490 for (d = 0; d <
DIM; d++) {
491 for (dd = 0; dd <
DIM; dd++) {
492 printf(
"%e\t", corrmatrix[d*
DIM+dd]);
498 for (d = 0; d <
DIM; d++) {
499 for (dd = 0; dd <
DIM; dd++) {
500 matrix[d*
DIM+dd] = 0.0;
502 matrix[d*
DIM+dd] = 1.0;
506 for (d = 0; d <
DIM*
DIM; d++) {
Execution policy/instruction for CUDA kernel execution.
Particle(s) class based on SoA (Structur of Arrays).
integer * noi
(pointer to) number of interactions (array)
real * x
(pointer to) x position (array)
real * rho
(pointer to) density (array)
real * y
(pointer to) y position (array)
real * sml
(pointer to) smoothing length (array)
real * mass
(pointer to) mass (array)
real * z
(pointer to) z position (array)
real * vz
(pointer to) z velocity (array)
real * vx
(pointer to) x velocity (array)
real * vy
(pointer to) y velocity (array)
#define CUDA_CALLABLE_MEMBER
__device__ SPH::SPH_kernel wendlandc6_p
#define MIN_NUMBER_OF_INTERACTIONS_FOR_TENSORIAL_CORRECTION_TO_WORK
__device__ SPH::SPH_kernel spiky_p
__device__ SPH::SPH_kernel cubicSpline_p
__device__ SPH::SPH_kernel wendlandc2_p
__device__ SPH::SPH_kernel wendlandc4_p
__device__ int invertMatrix(real *m, real *inverted)
Invert matrix.
const char *const numParticles
__device__ void wendlandc6(real *W, real dWdx[DIM], real *dWdr, real dx[DIM], real sml)
__device__ void wendlandc2(real *W, real dWdx[DIM], real *dWdr, real dx[DIM], real sml)
__device__ void wendlandc4(real *W, real dWdx[DIM], real *dWdr, real dx[DIM], real sml)
__device__ void cubicSpline(real *W, real dWdx[DIM], real *dWdr, real dx[DIM], real sml)
Cubic spline kernel (Monaghan & Lattanzio 1985).
__device__ void spiky(real *W, real dWdx[DIM], real *dWdr, real dx[DIM], real sml)
Spiky kernel (Desbrun & Cani).
SPH related functions and kernels.
CUDA_CALLABLE_MEMBER real fixTensileInstability(SPH_kernel kernel, Particles *particles, int p1, int p2)
Calculates the kernel for the tensile instability fix (Monaghan 2000).
void(* SPH_kernel)(real *W, real dWdx[DIM], real *dWdr, real dx[DIM], real h)
Function pointer to generic SPH kernel function.
__device__ real sqrt(real a)
Square root of a floating point value.
real launch(bool timeKernel, const ExecutionPolicy &policy, void(*f)(Arguments...), Arguments... args)
CUDA execution wrapper function.
#define MAX_NUM_INTERACTIONS
#define DIM
Dimension of the problem.