milupHPC documentation
  • src
  • sph
sph.cu
Go to the documentation of this file.
1#include "../../include/sph/sph.cuh"
2#include "../../include/cuda_utils/cuda_launcher.cuh"
3//#include <cub/cub.cuh>
4
5#define MAX_VARIABLE_SML_ITERATIONS 5
6// tolerance value. if found number of interactions is as close as TOLERANCE_WANTED_NUMBER_OF_INTERACTIONS, we stop iterating
7#define TOLERANCE_WANTED_NUMBER_OF_INTERACTIONS 5
8
9namespace SPH {
10
11 // deprecated
12 void exchangeParticleEntry(SubDomainKeyTree *subDomainKeyTree, real *entry, real *toSend, integer *sendLengths,
13 integer *receiveLengths, integer numParticlesLocal) {
14
15 boost::mpi::communicator comm;
16
17 std::vector<boost::mpi::request> reqParticles;
18 std::vector<boost::mpi::status> statParticles;
19
20 integer reqCounter = 0;
21 integer receiveOffset = 0;
22
23 for (integer proc=0; proc<subDomainKeyTree->numProcesses; proc++) {
24 if (proc != subDomainKeyTree->rank) {
25 reqParticles.push_back(comm.isend(proc, 17, toSend, sendLengths[proc]));
26 statParticles.push_back(comm.recv(proc, 17, &entry[numParticlesLocal] + receiveOffset, receiveLengths[proc]));
27 receiveOffset += receiveLengths[proc];
28 }
29 }
30 boost::mpi::wait_all(reqParticles.begin(), reqParticles.end());
31 }
32
33 namespace Kernel {
34
35 // Brute-force method (you don't want to use this!)
36 __global__ void fixedRadiusNN_bruteForce(Tree *tree, Particles *particles, integer *interactions, integer numParticlesLocal,
37 integer numParticles, integer numNodes) {
38
39
40 integer bodyIndex = threadIdx.x + blockIdx.x * blockDim.x;
41 integer stride = blockDim.x * gridDim.x;
42 integer offset = 0;
43
44 real dx, dy, dz;
45 real distance;
46 int numInteractions;
47
48 while ((bodyIndex + offset) < numParticlesLocal) {
49
50 numInteractions = 0;
51 for (int i=0; i<tree->toDeleteLeaf[1]; ++i) {
52 if ((bodyIndex + offset) != i) {
53
54 dx = particles->x[bodyIndex + offset] - particles->x[i];
55#if DIM > 1
56 dy = particles->y[bodyIndex + offset] - particles->y[i];
57#if DIM == 3
58 dz = particles->z[bodyIndex + offset] - particles->z[i];
59#endif
60#endif
61
62#if DIM == 1
63
64#elif DIM == 2
65
66#else //DIM == 3
67 distance = dx * dx + dy * dy + dz * dz;
68#endif
69
70 if (distance < (particles->sml[bodyIndex + offset] * particles->sml[bodyIndex + offset]) &&
71 distance < (particles->sml[i] * particles->sml[i])) {
72 interactions[(bodyIndex + offset) * MAX_NUM_INTERACTIONS + numInteractions] = i;
73 numInteractions++;
74 //if ((bodyIndex + offset) % 1000 == 0) {
75 // printf("distance: %e, %i vs. %i\n", distance, bodyIndex + offset, i);
76 //}
77 if (numInteractions > MAX_NUM_INTERACTIONS) {
78 //printf("numInteractions = %i > MAX_NUM_INTERACTIONS = %i\n", numInteractions,
79 // MAX_NUM_INTERACTIONS);
80 cudaTerminate("numInteractions = %i > MAX_NUM_INTERACTIONS = %i\n", numInteractions,
81 MAX_NUM_INTERACTIONS);
82 }
83 }
84 }
85 }
86 particles->noi[bodyIndex + offset] = numInteractions;
87 offset += stride;
88 }
89
90
91 }
92
93 __global__ void fixedRadiusNN(Tree *tree, Particles *particles, integer *interactions, real radius,
94 integer numParticlesLocal, integer numParticles, integer numNodes) {
95
96 register int bodyIndex = threadIdx.x + blockIdx.x * blockDim.x;
97 register int stride = blockDim.x * gridDim.x;
98 register int offset = 0;
99 register int index;
100
101 register integer childNumber, nodeIndex, depth, childIndex;
102
103 register real dx, x;
104 //real x_child;
105#if DIM > 1
106 register real dy, y;
107 //real y_child;
108#if DIM == 3
109 register real dz, z;
110 //real z_child;
111#endif
112#endif
113
114 register real d, r, interactionDistance;
115
116 register int noOfInteractions;
117
118 register int currentNodeIndex[MAX_DEPTH];
119 register int currentChildNumber[MAX_DEPTH];
120
121 real sml, smlj;
122
123 while ((bodyIndex + offset) < numParticlesLocal) {
124
125 //index = tree->sorted[bodyIndex + offset];
126 index = bodyIndex + offset;
127
128 x = particles->x[index];
129#if DIM > 1
130 y = particles->y[index];
131#if DIM == 3
132 z = particles->z[index];
133#endif
134#endif
135 sml = particles->sml[index];
136
137 // resetting
138 //for (integer i = 0; i < MAX_NUM_INTERACTIONS; i++) {
139 // interactions[(bodyIndex + offset) * MAX_NUM_INTERACTIONS + i] = -1;
140 //}
141 //numberOfInteractions[bodyIndex + offset] = 0;
142 // end: resetting
143
144 depth = 0;
145 currentNodeIndex[depth] = 0; //numNodes - 1;
146 currentChildNumber[depth] = 0;
147 noOfInteractions = 0;
148
149 r = radius * 0.5;
150
151 interactionDistance = (r + sml);
152
153 //bool remember;
154 do {
155
156 childNumber = currentChildNumber[depth];
157 nodeIndex = currentNodeIndex[depth];
158
159 //remember = false;
160 while (childNumber < POW_DIM) {
161
162 childIndex = tree->child[POW_DIM * nodeIndex + childNumber];
163 childNumber++;
164
165 if (childIndex != -1 && childIndex != index) {
166
167 //x_child = particles->x[childIndex];
168 //#if DIM > 1
169 //y_child = particles->y[childIndex];
170 //#if DIM == 3
171 //z_child = particles->z[childIndex];
172 //#endif
173 //#endif
174
175 smlj = particles->sml[childIndex];
176
177 dx = x - particles->x[childIndex]; //x_child;
178#if DIM > 1
179 dy = y - particles->y[childIndex]; //y_child;
180#if DIM == 3
181 dz = z - particles->z[childIndex]; //z_child;
182#endif
183#endif
184 // its a leaf
185 if (childIndex < numParticles) {
186#if DIM == 1
187 d = dx*dx;
188#elif DIM == 2
189 d = dx*dx + dy*dy;
190#else
191 d = dx*dx + dy*dy + dz*dz;
192#endif
193
194 //if ((bodyIndex + offset) % 1000 == 0) {
195 //printf("sph: index = %i, d = %i\n", bodyIndex+offset, d);
196 //}
197
198 if (d < (sml * sml) &&
199 d < (smlj * smlj)
200 /*&& noOfInteractions < MAX_NUM_INTERACTIONS*/) { //TODO: remove, just for testing purposes
201 //printf("Adding interaction partner!\n");
202 interactions[index * MAX_NUM_INTERACTIONS + noOfInteractions] = childIndex;
203 // debug
204 //if (noOfInteractions > MAX_NUM_INTERACTIONS) {
205 // printf("%i: noOfInteractions = %i > MAX_NUM_INTERACTIONS = %i (sml = %e) (%e, %e, %e)\n",
206 // bodyIndex + offset, noOfInteractions, MAX_NUM_INTERACTIONS, particles->sml[bodyIndex + offset],
207 // particles->x[bodyIndex + offset], particles->y[bodyIndex + offset],
208 // particles->y[bodyIndex + offset]);
209 // assert(0);
210 //}
211 // end: debug
212 // debug: should not happen
213 //if ((bodyIndex + offset) == childIndex) {
214 // printf("fixedRadiusNN: index = %i == childIndex = %i\n",
215 // bodyIndex + offset, childIndex);
216 // assert(0);
217 //}
218 // end: debug
219 noOfInteractions++;
220 }
221 }
222#if DIM == 1
223 else if (cuda::math::abs(dx) < interactionDistance) {
224#elif DIM == 2
225 else if (cuda::math::abs(dx) < interactionDistance &&
226 cuda::math::abs(dy) < interactionDistance) {
227#else
228 else if (/*tree->child[POW_DIM * nodeIndex + childNumber] != -1 && */ // need to check, since there are nodes without any leaves as children
229 /*(childNumber == currentChildNumber[depth-1] && nodeIndex == currentNodeIndex[depth-1]) &&*/ //TODO: just a fix, why is this happening at all?
230 (cuda::math::abs(dx) < interactionDistance &&
231 cuda::math::abs(dy) < interactionDistance &&
232 cuda::math::abs(dz) < interactionDistance) || particles->nodeType[childIndex] >= 1) {
233#endif
234
235 currentChildNumber[depth] = childNumber;
236 currentNodeIndex[depth] = nodeIndex;
237
238 //if (childNumber == currentChildNumber[depth-1] && nodeIndex == currentNodeIndex[depth-1]) {
239 // printf("ATTENTION[%i]: current = before child = %i node = %i depth = %i tree->child = %i\n", bodyIndex + offset,
240 // childNumber, nodeIndex, depth, tree->child[POW_DIM * nodeIndex + childNumber]);
241 //assert(0);
242 //}
243 //if (depth < MAX_DEPTH) { //TODO: REMOVE!!! just to keep kernel from crashing as long as sml is not dynamic!
244 // // put child on stack
245 // depth++;
246 //}
247 depth++;
248 //if (particles->nodeType[childIndex] == 0) {
249 r *= 0.5;
250 //remember = true;
251 //}
252 interactionDistance = (r + sml);
253 if (depth > MAX_DEPTH) { //MAX_DEPTH) {
254 //for (int i_depth=0; i_depth<depth; i_depth++) {
255 // printf("%i node[%i] = %i child[%i] = %i tree->child = %i\n", bodyIndex + offset, i_depth,
256 // currentNodeIndex[i_depth], i_depth, currentChildNumber[i_depth], tree->child[POW_DIM * currentNodeIndex[i_depth] + currentChildNumber[i_depth]]);
257 //}
258 // TODO: why not here redoNeighborSearch() ???
259 cudaTerminate("depth = %i > MAX_DEPTH = %i\n", depth, MAX_DEPTH);
260 }
261 childNumber = 0;
262 nodeIndex = childIndex;
263 }
264 }
265 }
266
267 depth--;
268 //if (!remember) {
269 r *= 2.0;
270 //}
271 interactionDistance = (r + sml);
272
273 } while (depth >= 0);
274
275 particles->noi[index] = noOfInteractions;
276 //printf("%i: noOfInteractions = %i\n", bodyIndex + offset, noOfInteractions);
277
278 offset += stride;
279 __syncthreads();
280 }
281 }
282
283 __global__ void fixedRadiusNN_withinBox(Tree *tree, Particles *particles, integer *interactions, integer numParticlesLocal,
284 integer numParticles, integer numNodes) {
285
286 int bodyIndex = threadIdx.x + blockIdx.x * blockDim.x;
287 int stride = blockDim.x * gridDim.x;
288 int offset = 0;
289 int index;
290
291 //if ((bodyIndex + offset) == 0) {
292 // printf("new fixed radius...\n");
293 //}
294
295 integer childNumber, nodeIndex, childIndex;
296
297 real tmp1, tmp2;
298 register real dx, min_x, max_x, x_temp, x_child, min_dx, max_dx, x;
299#if DIM > 1
300 register real dy, min_y, max_y, y_temp, y_child, min_dy, max_dy, y;
301#if DIM == 3
302 register real dz, min_z, max_z, z_temp, z_child, min_dz, max_dz, z;
303#endif
304#endif
305 register real sml;
306 real d;
307 real min_distance;
308 real max_distance;
309
310 // outer stack
311 register int outer_currentNodeIndex[MAX_DEPTH];
312 register int outer_currentChildNumber[MAX_DEPTH];
313 register int outer_currentNodeLevel[MAX_DEPTH];
314
315 // inner stack
316 //integer inner_currentNodeIndex[MAX_DEPTH];
317 //integer inner_currentChildNumber[MAX_DEPTH];
318 // but reuse (outer stack!)
319 int *inner_currentNodeIndex;
320 int *inner_currentChildNumber;
321
322 int depth, inner_depth;
323 int noOfInteractions;
324
325 int level;
326
327 while ((bodyIndex + offset) < numParticlesLocal) {
328
329 index = bodyIndex + offset;
330 //index = tree->sorted[bodyIndex + offset];
331 //if (tree->sorted[bodyIndex + offset] < 0 || tree->sorted[bodyIndex + offset] > numParticlesLocal) {
332 //printf("sorted[%i] = %i\n", index, tree->sorted[bodyIndex + offset]);
333 //}
334
335 x = particles->x[index];
336#if DIM > 1
337 y = particles->y[index];
338#if DIM == 3
339 z = particles->z[index];
340#endif
341#endif
342 sml = particles->sml[index];
343
344 depth = 0;
345 outer_currentNodeIndex[depth] = 0;
346 outer_currentChildNumber[depth] = 0;
347 outer_currentNodeLevel[depth] = 1;
348 noOfInteractions = 0;
349 level = 1;
350
351 do {
352 childNumber = outer_currentChildNumber[depth];
353 nodeIndex = outer_currentNodeIndex[depth];
354 level = outer_currentNodeLevel[depth];
355
356 while (childNumber < POW_DIM) {
357
358 childIndex = tree->child[POW_DIM * nodeIndex + childNumber];
359 childNumber++;
360
361 if (childIndex != -1 && childIndex != (index)) {
362
363 x_child = particles->x[childIndex];
364#if DIM > 1
365 y_child = particles->y[childIndex];
366#if DIM == 3
367 z_child = particles->z[childIndex];
368#endif
369#endif
370
371 if (childIndex >= numParticles) {
372
373 // copy bounding box(es)
374 min_x = *tree->minX;
375 max_x = *tree->maxX;
376#if DIM > 1
377 min_y = *tree->minY;
378 max_y = *tree->maxY;
379#if DIM == 3
380 min_z = *tree->minZ;
381 max_z = *tree->maxZ;
382#endif
383#endif
384 for (int _level=0; _level < level; ++_level) {
385 if (x_child < 0.5 * (min_x + max_x)) {
386 max_x = 0.5 * (min_x + max_x);
387 }
388 else {
389 min_x = 0.5 * (min_x + max_x);
390 }
391#if DIM > 1
392 if (y_child < 0.5 * (min_y + max_y)) {
393 max_y = 0.5 * (min_y + max_y);
394 }
395 else {
396 min_y = 0.5 * (min_y + max_y);
397 }
398#if DIM == 3
399 if (z_child < 0.5 * (min_z + max_z)) {
400 max_z = 0.5 * (min_z + max_z);
401 }
402 else {
403 min_z = 0.5 * (min_z + max_z);
404 }
405#endif
406#endif
407 }
408
409 if (x < min_x) {
410 min_dx = x - min_x;
411 max_dx = x - max_x;
412 } else if (x > max_x) {
413 min_dx = x - max_x;
414 max_dx = x - min_x;
415 } else {
416 min_dx = 0.f;
417 //max_dx = (cuda::math::abs(x-min_x) > cuda::math::abs(x-max_x)) ? cuda::math::abs(x-min_x) : cuda::math::abs(x-max_x);
418 tmp1 = cuda::math::abs(x-min_x);
419 tmp2 = cuda::math::abs(x-max_x);
420 max_dx = (tmp1 > tmp2) ? tmp1 : tmp2;
421 }
422#if DIM > 1
423 if (y < min_y) {
424 min_dy = y - min_y;
425 max_dy = y - max_y;
426 } else if (y > max_y) {
427 min_dy = y - max_y;
428 max_dy = y - min_y;
429 } else {
430 min_dy = 0.f;
431 //max_dy = (cuda::math::abs(y-min_y) > cuda::math::abs(y-max_y)) ? cuda::math::abs(y-min_y) : cuda::math::abs(y-max_y);
432 tmp1 = cuda::math::abs(y-min_y);
433 tmp2 = cuda::math::abs(y-max_y);
434 max_dy = (tmp1 > tmp2) ? tmp1 : tmp2;
435 }
436#if DIM == 3
437 if (z < min_z) {
438 min_dz = z - min_z;
439 max_dz = z - max_z;
440 } else if (z > max_z) {
441 min_dz = z - max_z;
442 max_dz = z - min_z;
443 } else {
444 min_dz = 0.f;
445 //max_dz = (cuda::math::abs(z-min_z) > cuda::math::abs(z-max_z)) ? cuda::math::abs(z-min_z) : cuda::math::abs(z-max_z);
446 tmp1 = cuda::math::abs(z-min_z);
447 tmp2 = cuda::math::abs(z-max_z);
448 max_dz = (tmp1 > tmp2) ? tmp1 : tmp2;
449 }
450
451#endif
452#endif
453#if DIM == 1
454 //min_distance = cuda::math::sqrt(dx*dx);
455 min_distance = min_dx*min_dx;
456 max_distance = max_dx*max_dx;
457#elif DIM == 2
458 //r = cuda::math::sqrt(dx*dx + dy*dy);
459 min_distance = min_dx*min_dx + min_dy*min_dy;
460 max_distance = max_dx*max_dx + max_dy*max_dy;
461#else
462 min_distance = min_dx*min_dx + min_dy*min_dy + min_dz*min_dz;
463 max_distance = max_dx*max_dx + max_dy*max_dy + max_dz*max_dz;
464#endif
465 }
466 else {
467 min_distance = 0;
468 max_distance = 0;
469 }
470
471 //if (index % 1000 == 0) {
472 // printf("level: %i | d: %e, min_distance: %e, max_distance: %e\n", level, d, min_distance, max_distance);
473 //}
474 // its a leaf
475
476 //if (index % 1000 == 0) { printf("child on stack? %e > %e\n", particles->sml[index], min_distance); }
477
478 if (childIndex < numParticles) {
479
480 dx = x - x_child;
481#if DIM > 1
482 dy = y - y_child;
483#if DIM == 3
484 dz = z - z_child;
485#endif
486#endif
487
488#if DIM == 1
489 d = dx*dx;
490#elif DIM == 2
491 d = dx*dx + dy*dy;
492#else
493 d = dx*dx + dy*dy + dz*dz;
494#endif
495
496 if (d < (sml * sml) &&
497 d < (particles->sml[childIndex] * particles->sml[childIndex])) {
498 interactions[index * MAX_NUM_INTERACTIONS + noOfInteractions] = childIndex;
499 //if (index % 1000 == 0) { printf("%i adding interaction #%i...\n", index, noOfInteractions); }
500 noOfInteractions++;
501 if (noOfInteractions > MAX_NUM_INTERACTIONS) {
502 cudaTerminate("noOfInteractions = %i > MAX_NUM_INTERACTIONS = %i\n",
503 noOfInteractions, MAX_NUM_INTERACTIONS);
504 }
505 }
506 }
507 // box at least partly within sml, thus add to exlicity stack
508 else if ((sml*sml) > min_distance) {
509 // box completely within sml
510 if ((sml*sml) >= max_distance) {
511
512 inner_currentNodeIndex = &outer_currentNodeIndex[depth];
513 inner_currentChildNumber = &outer_currentChildNumber[depth];
514
515 inner_depth = 0;
516 inner_currentNodeIndex[inner_depth] = childIndex; //nodeIndex;
517 inner_currentChildNumber[inner_depth] = 0; //childNumber;
518 int inner_childNumber; //= childNumber;
519 int inner_nodeIndex; // = nodeIndex;
520 int inner_childIndex;
521 do {
522 //inner_currentNodeIndex[depth] = nodeIndex;
523 //inner_currentChildNumber[depth] = childNumber;
524 inner_childNumber = inner_currentChildNumber[inner_depth];
525 inner_nodeIndex = inner_currentNodeIndex[inner_depth];
526
527 while (inner_childNumber < POW_DIM) {
528
529 inner_childIndex = tree->child[POW_DIM * inner_nodeIndex + inner_childNumber];
530 inner_childNumber++;
531
532 if (inner_childIndex != -1 && inner_childIndex != (index)) {
533 if (inner_childIndex < numParticles) {
534 interactions[index * MAX_NUM_INTERACTIONS +
535 noOfInteractions] = inner_childIndex;
536 noOfInteractions++;
537 //printf("%i: adding directly: %i, depth: %i, child: %i\n", index, inner_childIndex, inner_depth, inner_childNumber);
538 if (noOfInteractions > MAX_NUM_INTERACTIONS) {
539 cudaTerminate("noOfInteractions = %i > MAX_NUM_INTERACTIONS = %i\n",
540 noOfInteractions, MAX_NUM_INTERACTIONS);
541 }
542 } else {
543 //printf("%i: directly on stack: %i, depth: %i, child: %i\n", index, inner_childIndex, inner_depth, inner_childNumber);
544 inner_currentChildNumber[inner_depth] = inner_childNumber;
545 inner_currentNodeIndex[inner_depth] = inner_nodeIndex;
546 inner_depth++;
547
548 inner_childNumber = 0;
549 inner_nodeIndex = inner_childIndex;
550 }
551 }
552 }
553 inner_depth--;
554 //printf("%i: directly depth--: %i, depth: %i\n", inner_childIndex, inner_depth);
555 } while (inner_depth >= 0);
556 //printf("%i: added directly: %i (counter: %i, maxDistance: %e, sml: %e)\n", index, numParticlesDirectly, counter, max_distance, particles->sml[index]);
557 }
558 // box only partly within sml, thus add to exlicity stack
559 else {
560 // put child on stack
561 outer_currentChildNumber[depth] = childNumber;
562 outer_currentNodeIndex[depth] = nodeIndex;
563 outer_currentNodeLevel[depth] = level; // + 1;
564 level++;
565 depth++;
566
567 if (depth > MAX_DEPTH) {
568 cudaTerminate("depth = %i > MAX_DEPTH = %i\n", depth, MAX_DEPTH);
569 }
570 childNumber = 0;
571 nodeIndex = childIndex;
572 }
573 }
574 }
575 }
576
577 depth--;
578
579 } while (depth >= 0);
580
581 particles->noi[index] = noOfInteractions;
582 //printf("%i: noOfInteractions = %i\n", bodyIndex + offset, noOfInteractions);
583 //if (index % 1000 == 0) {
584 // printf("noi[%i]: %i\n", index, noOfInteractions);
585 //}
586 offset += stride;
587 }
588
589 }
590
591 __global__ void
592 fixedRadiusNN_sharedMemory(Tree *tree, Particles *particles, integer *interactions, integer numParticlesLocal,
593 integer numParticles, integer numNodes) {
594
595 integer bodyIndex = threadIdx.x + blockIdx.x * blockDim.x;
596 integer stride = blockDim.x * gridDim.x;
597 integer offset = 0;
598
599 integer childNumber, nodeIndex, depth, childIndex;
600
601 register real dx, x_radius;
602#if DIM > 1
603 register real dy, y_radius;
604#if DIM == 3
605 register real dz, z_radius;
606 real r_temp;
607#endif
608#endif
609
610 real d, r, interactionDistance;
611
612 integer noOfInteractions;
613
614 extern __shared__ int buffer[];
615 integer *currentNodeIndex = (int*)&buffer[threadIdx.x * MAX_DEPTH];
616 integer *currentChildNumber = (int*)&currentNodeIndex[(10 + threadIdx.x) * MAX_DEPTH];
617
618 //register int currentNodeIndex[MAX_DEPTH];
619 //register int currentChildNumber[MAX_DEPTH];
620
621 while ((bodyIndex + offset) < numParticlesLocal) {
622
623 // resetting
624 //#pragma unroll
625 //for (integer i = 0; i < MAX_NUM_INTERACTIONS; i++) {
626 // interactions[(bodyIndex + offset) * MAX_NUM_INTERACTIONS + i] = -1;
627 //}
628 //numberOfInteractions[bodyIndex + offset] = 0;
629 // end: resetting
630
631 depth = 0;
632 currentNodeIndex[depth] = 0; //numNodes - 1;
633 currentChildNumber[depth] = 0;
634 noOfInteractions = 0;
635
636 x_radius = 0.5 * (*tree->maxX - (*tree->minX));
637#if DIM > 1
638 y_radius = 0.5 * (*tree->maxY - (*tree->minY));
639#if DIM == 3
640 z_radius = 0.5 * (*tree->maxZ - (*tree->minZ));
641#endif
642#endif
643
644#if DIM == 1
645 r = x_radius;
646#elif DIM == 2
647 r = cuda::math::max(x_radius, y_radius);
648#else
649 r_temp = cuda::math::max(x_radius, y_radius);
650 r = cuda::math::max(r_temp, z_radius) * 0.5; //TODO: (0.5 * r) or (1.0 * r)
651#endif
652
653 interactionDistance = (r + particles->sml[bodyIndex + offset]);
654
655 do {
656 childNumber = currentChildNumber[depth];
657 nodeIndex = currentNodeIndex[depth];
658
659 while (childNumber < POW_DIM) {
660 childIndex = tree->child[POW_DIM * nodeIndex + childNumber];
661 childNumber++;
662
663 if (childIndex != -1 && childIndex != (bodyIndex + offset)) {
664 dx = particles->x[bodyIndex + offset] - particles->x[childIndex];
665#if DIM > 1
666 dy = particles->y[bodyIndex + offset] - particles->y[childIndex];
667#if DIM == 3
668 dz = particles->z[bodyIndex + offset] - particles->z[childIndex];
669#endif
670#endif
671 // its a leaf
672 if (childIndex < numParticles) {
673#if DIM == 1
674 d = dx*dx;
675#elif DIM == 2
676 d = dx*dx + dy*dy;
677#else
678 d = dx*dx + dy*dy + dz*dz;
679#endif
680
681 if ((bodyIndex + offset) % 1000 == 0) {
682 //printf("sph: index = %i, d = %i\n", bodyIndex+offset, d);
683 }
684
685 if (d < (particles->sml[bodyIndex + offset] * particles->sml[bodyIndex + offset])
686 && d < (particles->sml[childIndex] * particles->sml[childIndex])) {
687 //printf("Adding interaction partner!\n");
688 interactions[(bodyIndex + offset) * MAX_NUM_INTERACTIONS +
689 noOfInteractions] = childIndex;
690 noOfInteractions++;
691 }
692 }
693#if DIM == 1
694 else if (cuda::math::abs(dx) < interactionDistance ||
695 particles->nodeType[childIndex] >= 1) {
696#elif DIM == 2
697 else if ((cuda::math::abs(dx) < interactionDistance &&
698 cuda::math::abs(dy) < interactionDistance) ||
699 particles->nodeType[childIndex] >= 1) {
700#else
701 else if ((cuda::math::abs(dx) < interactionDistance &&
702 cuda::math::abs(dy) < interactionDistance &&
703 cuda::math::abs(dz) < interactionDistance) ||
704 particles->nodeType[childIndex] >= 1) {
705#endif
706 // put child on stack
707 currentChildNumber[depth] = childNumber;
708 currentNodeIndex[depth] = nodeIndex;
709 depth++;
710 r *= 0.5;
711 interactionDistance = (r + particles->sml[bodyIndex + offset]);
712
713 if (depth > MAX_DEPTH) {
714 cudaTerminate("depth = %i > MAX_DEPTH = %i\n", depth, MAX_DEPTH);
715 }
716 childNumber = 0;
717 nodeIndex = childIndex;
718 }
719 }
720 }
721
722 depth--;
723 r *= 2.0;
724 interactionDistance = (r + particles->sml[bodyIndex + offset]);
725
726 } while (depth >= 0);
727
728 particles->noi[bodyIndex + offset] = noOfInteractions;
729
730 //if (noOfInteractions < 30) {
731 // particles->sml[bodyIndex + offset] *= 1.5;
732 //}
733
734 __syncthreads();
735 offset += stride;
736 }
737 }
738
739 __global__ void fixedRadiusNN_variableSML(Material *materials, Tree *tree, Particles *particles, integer *interactions,
740 integer numParticlesLocal, integer numParticles,
741 integer numNodes) {
742
743 /*register */int i, inc, nodeIndex, depth, childNumber, child;
744 /*register */int currentNodeIndex[MAX_DEPTH];
745 /*register */int currentChildNumber[MAX_DEPTH];
746 /*register */int numberOfInteractions;
747
748 /*register */real x, dx, interactionDistance, r, radius, d;
749#if DIM > 1
750 /*register */real y, dy;
751#if DIM == 3
752 /*register */real z, dz;
753#endif
754#endif
755
756 real x_radius;
757#if DIM > 1
758 real y_radius;
759#if DIM == 3
760 real z_radius;
761 real r_temp;
762#endif
763#endif
764
765 x_radius = /*0.5 * */(*tree->maxX - (*tree->minX));
766#if DIM > 1
767 y_radius = /*0.5 * */(*tree->maxY - (*tree->minY));
768#if DIM == 3
769 z_radius = /*0.5 * */(*tree->maxZ - (*tree->minZ));
770#endif
771#endif
772#if DIM == 1
773 radius = x_radius;
774#elif DIM == 2
775 radius = cuda::math::max(x_radius, y_radius);
776#else
777 r_temp = cuda::math::max(x_radius, y_radius);
778 radius = cuda::math::max(r_temp, z_radius); //TODO: (0.5 * r) or (1.0 * r)
779#endif
780
781
782 inc = blockDim.x * gridDim.x;
783 /* loop over all particles */
784 for (i = threadIdx.x + blockIdx.x * blockDim.x; i < numParticlesLocal; i += inc) {
785
786 x = particles->x[i];
787#if DIM > 1
788 y = particles->y[i];
789#if DIM == 3
790 z = particles->z[i];
791#endif
792#endif
793
794#if DIM == 1
795 radius = x_radius;
796#elif DIM == 2
797 radius = cuda::math::max(x_radius, y_radius);
798#else
799 r_temp = cuda::math::max(x_radius, y_radius);
800 radius = cuda::math::max(r_temp, z_radius); //TODO: (0.5 * r) or (1.0 * r)
801#endif
802
803 /*volatile */bool found = false;
804 /*register */int nit = -1;
805
806 real htmp, htmpold;
807 /*volatile */real htmpj;
808
809 htmp = particles->sml[i];
810
811 // look for nice sml
812 while (!found) {
813 numberOfInteractions = 0;
814 nit++;
815 depth = 0;
816 currentNodeIndex[depth] = 0; //numNodes - 1;
817 currentChildNumber[depth] = 0;
818 numberOfInteractions = 0;
819 r = radius; // * 0.5; // because we start with root children
820 interactionDistance = (r + htmp);
821 do {
822
823 childNumber = currentChildNumber[depth];
824 nodeIndex = currentNodeIndex[depth];
825
826 while (childNumber < POW_DIM) {
827 child = tree->child[POW_DIM * nodeIndex + childNumber];
828 childNumber++;
829
830 if (child != -1 && child != i) {
831 dx = x - particles->x[child];
832#if DIM > 1
833 dy = y - particles->y[child];
834#if DIM == 3
835 dz = z - particles->z[child];
836#endif
837#endif
838 if (child < numParticles) {
839 d = dx*dx;
840#if DIM > 1
841 d += dy*dy;
842#if DIM == 3
843 d += dz*dz;
844#endif
845#endif
846 htmpj = particles->sml[child];
847
848 if (d < htmp*htmp && d < htmpj*htmpj) {
849 numberOfInteractions++;
850 }
851 } else if (/*tree->child[POW_DIM * nodeIndex + childNumber] != -1 &&*/ // need to check, since there are nodes without any leaves as children
852 /*(childNumber == currentChildNumber[depth-1] && nodeIndex == currentNodeIndex[depth-1]) &&*/ //TODO: just a fix, why is this happening at all?
853 (cuda::math::abs(dx) < interactionDistance
854#if DIM > 1
855 && cuda::math::abs(dy) < interactionDistance
856#if DIM == 3
857 && cuda::math::abs(dz) < interactionDistance
858#endif
859#endif
860 ) || particles->nodeType[child] >= 1) {
861 // put child on stack
862 currentChildNumber[depth] = childNumber;
863 currentNodeIndex[depth] = nodeIndex;
864 //if (depth > 500) {
865 // printf("debug: i: depth: %i for %i (%e, %e, %e) node: %i child: %i numP = %i\n", depth, i,
866 // particles->x[i], particles->y[i], particles->z[i], nodeIndex, childNumber,
867 // numParticlesLocal);
868 //}
869 depth++;
870 r *= 0.5;
871 interactionDistance = (r + htmp);
872 if (depth > MAX_DEPTH) {
873 //for (int i_depth=0; i_depth<depth; i_depth++) {
874 // printf("%i node[%i] = %i child[%i] = %i tree->child = %i\n", i, i_depth,
875 // currentNodeIndex[i_depth], i_depth, currentChildNumber[i_depth], tree->child[POW_DIM * currentNodeIndex[i_depth] + currentChildNumber[i_depth]]);
876 //}
877 cudaTerminate("depth = %i > MAX_DEPTH = %i\n", depth, MAX_DEPTH);
878 }
879 /*if (depth >= MAX_DEPTH) {
880 printf("Error, maxdepth reached! problem in tree during interaction search");
881 printf("??? child: %i i: %i depth: %i child[8 * %i + %i] = %i (%e, %e, %e)\n", child, i, depth - 11, currentNodeIndex[depth - 11],
882 currentChildNumber[depth - 11], tree->child[POW_DIM * currentNodeIndex[depth - 11] + currentChildNumber[depth - 11]],
883 particles->x[tree->child[POW_DIM * currentNodeIndex[depth - 11] + currentChildNumber[depth - 11]]],
884 particles->y[tree->child[POW_DIM * currentNodeIndex[depth - 11] + currentChildNumber[depth - 11]]],
885 particles->z[tree->child[POW_DIM * currentNodeIndex[depth - 11] + currentChildNumber[depth - 11]]]);
886 printf("??? child: %i i: %i depth: %i child[8 * %i + %i] = %i (%e, %e, %e)\n", child, i, depth - 10, currentNodeIndex[depth - 10],
887 currentChildNumber[depth - 10], tree->child[POW_DIM * currentNodeIndex[depth - 10] + currentChildNumber[depth - 10]],
888 particles->x[tree->child[POW_DIM * currentNodeIndex[depth - 10] + currentChildNumber[depth - 11]]],
889 particles->y[tree->child[POW_DIM * currentNodeIndex[depth - 10] + currentChildNumber[depth - 11]]],
890 particles->z[tree->child[POW_DIM * currentNodeIndex[depth - 10] + currentChildNumber[depth - 11]]]);
891 //for (int m=0; m<MAX_DEPTH; m++) {
892 // printf("??? %i depth: %i currentDepth: %i node: %i path: %i\n", i, MAX_DEPTH, m,
893 // currentNodeIndex[m], currentChildNumber[m]);
894 //}
895 //assert(depth < MAX_DEPTH);
896 assert(0);
897 }*/
898 childNumber = 0;
899 nodeIndex = child;
900 }
901 }
902 }
903 depth--;
904 r *= 2.0;
905 interactionDistance = (r + htmp);
906 } while (depth >= 0);
907
908 htmpold = htmp;
909 //printf("%d %d %e\n", i, numberOfInteractions, htmp);
910 // stop if we have the desired number of interaction partners \pm TOLERANCE_WANTED_NUMBER_OF_INTERACTIONS
911 if ((nit > MAX_VARIABLE_SML_ITERATIONS ||
912 abs(numberOfInteractions - materials[particles->materialId[i]].interactions) < TOLERANCE_WANTED_NUMBER_OF_INTERACTIONS )
913 && numberOfInteractions < MAX_NUM_INTERACTIONS) {
914
915 found = true;
916 particles->sml[i] = htmp;
917
918 } else if (numberOfInteractions >= MAX_NUM_INTERACTIONS) {
919 htmpold = htmp;
920 if (numberOfInteractions < 1)
921 numberOfInteractions = 1;
922 htmp *= 0.5 * ( 1.0 + pow( (real) materials[particles->materialId[i]].interactions/ (real) numberOfInteractions, 1./DIM));
923 //printf("htmp *= 0.5 * %f\n", ( 1.0 + pow( (real) materials[particles->materialId[i]].interactions/ (real) numberOfInteractions, 1./DIM)));
924
925 } else {
926 // lower or raise htmp accordingly
927 if (numberOfInteractions < 1)
928 numberOfInteractions = 1;
929
930 htmpold = htmp;
931 htmp *= 0.5 * ( 1.0 + pow( (real) materials[particles->materialId[i]].interactions/ (real) numberOfInteractions, 1./DIM));
932 //printf("htmp *= 0.5 * %f\n", ( 1.0 + pow( (real) materials[particles->materialId[i]].interactions/ (real) numberOfInteractions, 1./DIM)));
933 }
934
935 if (htmp < 1e-20) {
936#if DIM == 3
937 printf("+++ particle: %d it: %d htmp: %e htmpold: %e wanted: %d current: %d mId: %d uid: %i (%e, %e, %e) n = %i\n", i, nit,
938 htmp, htmpold, materials[particles->materialId[i]].interactions, numberOfInteractions, particles->materialId[i],
939 particles->uid[i], particles->x[i], particles->y[i], particles->z[i], numParticlesLocal);
940#endif
941 }
942
943 }
944 if (numberOfInteractions > MAX_NUM_INTERACTIONS || numberOfInteractions == 0) {
945#if DIM == 3
946 printf("+++ particle: %d it: %d htmp: %e htmpold: %e wanted: %d current: %d mId: %d uid: %i (%e, %e, %e) n = %i\n",
947 i, nit,
948 htmp, htmpold, materials[particles->materialId[i]].interactions, numberOfInteractions,
949 particles->materialId[i],
950 particles->uid[i], particles->x[i], particles->y[i], particles->z[i], numParticlesLocal);
951#endif
952 cudaTerminate("numberOfInteractions = %i > MAX_NUM_INTERACTIONS = %i\n", numberOfInteractions,
953 MAX_NUM_INTERACTIONS);
954 }
955 //if (numberOfInteractions == 0) {
956 //if (i % 1000 == 0) {
957 // printf("noi: %d: %d %e (nit: %i)\n", i, numberOfInteractions, particles->sml[i], nit);
958 //}
959 }
960
961 }
962
963 __device__ void redoNeighborSearch(Tree *tree, Particles *particles, int particleId,
964 int *interactions, real radius, integer numParticles, integer numNodes) {
965
966 register int i, inc, nodeIndex, depth, childNumber, child;
967 i = particleId;
968 register real x, dx, interactionDistance, r, d;
969 x = particles->x[i];
970#if DIM > 1
971 register real y, dy;
972 y = particles->y[i];
973#if DIM == 3
974 register real z, dz;
975 z = particles->z[i];
976#endif
977#endif
978 register int currentNodeIndex[MAX_DEPTH];
979 register int currentChildNumber[MAX_DEPTH];
980 register int numberOfInteractions;
981
982 //printf("1) sml_new > h: noi: %d\n", p.noi[i]);
983
984 real sml; // smoothing length of particle
985 real smlj; // smoothing length of potential interaction partner
986
987 // start at root
988 depth = 0;
989 currentNodeIndex[depth] = 0; //numNodes - 1;
990 currentChildNumber[depth] = 0;
991 numberOfInteractions = 0;
992 r = radius * 0.5; // because we start with root children
993 sml = particles->sml[i];
994 particles->noi[i] = 0;
995 interactionDistance = (r + sml);
996
997 do {
998 childNumber = currentChildNumber[depth];
999 nodeIndex = currentNodeIndex[depth];
1000 while (childNumber < POW_DIM) {
1001 child = tree->child[POW_DIM * nodeIndex + childNumber];
1002 childNumber++;
1003 if (child != -1 && child != i) {
1004 dx = x - particles->x[child];
1005#if DIM > 1
1006 dy = y - particles->y[child];
1007#if DIM == 3
1008 dz = z - particles->z[child];
1009#endif
1010#endif
1011
1012 if (child < numParticles) {
1013 //if (p_rhs.materialId[child] == EOS_TYPE_IGNORE) {
1014 // continue;
1015 //}
1016 d = dx * dx;
1017#if DIM > 1
1018 d += dy * dy;
1019#if DIM == 3
1020 d += dz * dz;
1021#endif
1022#endif
1023 smlj = particles->sml[child];
1024
1025 if (d < sml*sml && d < smlj*smlj) {
1026 interactions[i * MAX_NUM_INTERACTIONS + numberOfInteractions] = child;
1027 numberOfInteractions++;
1028//#if TOO_MANY_INTERACTIONS_KILL_PARTICLE
1029// if (numberOfInteractions >= MAX_NUM_INTERACTIONS) {
1030// printf("setting the smoothing length for particle %d to 0!\n", i);
1031// p.h[i] = 0.0;
1032// p.noi[i] = 0;
1033// sml = 0.0;
1034// interactionDistance = 0.0;
1035// p_rhs.materialId[i] = EOS_TYPE_IGNORE;
1036// // continue with next particle by setting depth to -1
1037// // cms 2018-01-19
1038// depth = -1;
1039// break;
1040// }
1041//#endif
1042 }
1043 } else if (cuda::math::abs(dx) < interactionDistance
1044#if DIM > 1
1045 && cuda::math::abs(dy) < interactionDistance
1046#if DIM == 3
1047 && cuda::math::abs(dz) < interactionDistance
1048#endif
1049#endif
1050 ) {
1051 // put child on stack
1052 currentChildNumber[depth] = childNumber;
1053 currentNodeIndex[depth] = nodeIndex;
1054 depth++;
1055 r *= 0.5;
1056 interactionDistance = (r + sml);
1057 if (depth > MAX_DEPTH) {
1058 cudaTerminate("depth = %i > MAX_DEPTH = %i\n", depth, MAX_DEPTH);
1059 }
1060 childNumber = 0;
1061 nodeIndex = child;
1062 }
1063 }
1064 }
1065
1066 depth--;
1067 r *= 2.0;
1068 interactionDistance = (r + sml);
1069 } while (depth >= 0);
1070
1071 if (numberOfInteractions >= MAX_NUM_INTERACTIONS) {
1072 printf("ERROR: Maximum number of interactions exceeded: %d / %d\n", numberOfInteractions, MAX_NUM_INTERACTIONS);
1073//#if !TOO_MANY_INTERACTIONS_KILL_PARTICLE
1074// // assert(numberOfInteractions < MAX_NUM_INTERACTIONS);
1075//#endif
1076 }
1077 particles->noi[i] = numberOfInteractions;
1078 }
1079
1080 __global__ void compTheta(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles,
1081 DomainList *lowestDomainList, Curve::Type curveType) {
1082
1083 integer index = threadIdx.x + blockIdx.x * blockDim.x;
1084 integer stride = blockDim.x * gridDim.x;
1085 integer offset = 0;
1086 integer bodyIndex;
1087 keyType key;
1088 integer domainIndex;
1089 integer proc;
1090
1091 //"loop" over domain list nodes
1092 while ((index + offset) < *lowestDomainList->domainListIndex) {
1093
1094 bodyIndex = lowestDomainList->domainListIndices[index + offset];
1095 //calculate key
1096 //key = tree->getParticleKey(particles, bodyIndex, MAX_LEVEL, curveType);
1097 //printf("x = %e, %e, %e\n", particles->x[bodyIndex], particles->y[bodyIndex], particles->y[bodyIndex]);
1098 //if domain list node belongs to other process: add to relevant domain list indices
1099 // TODO: is this the problem?
1100 //proc = subDomainKeyTree->key2proc(key);
1101 if (curveType == Curve::Type::lebesgue) {
1102 proc = subDomainKeyTree->key2proc(lowestDomainList->domainListKeys[index + offset]);
1103 }
1104 else {
1105 proc = subDomainKeyTree->key2proc(KeyNS::lebesgue2hilbert(lowestDomainList->domainListKeys[index + offset], MAX_LEVEL, lowestDomainList->domainListLevels[index + offset]));
1106 }
1107 //printf("[rank %i] sph: proc = %i, bodyIndex = %i\n", subDomainKeyTree->rank, proc, bodyIndex);
1108 if (proc != subDomainKeyTree->rank) {
1109 //printf("[rank %i] sph: proc = %i, bodyIndex = %i level = %i (%e, %e, %e)\n", subDomainKeyTree->rank,
1110 // proc, bodyIndex, lowestDomainList->domainListLevels[index + offset], particles->x[bodyIndex],
1111 // particles->y[bodyIndex], particles->z[bodyIndex]);
1112 domainIndex = atomicAdd(lowestDomainList->domainListCounter, 1);
1113 //printf("[rank %i] sph: domainIndex = %i\n", subDomainKeyTree->rank, domainIndex);
1114 lowestDomainList->relevantDomainListIndices[domainIndex] = bodyIndex;
1115 lowestDomainList->relevantDomainListLevels[domainIndex] = lowestDomainList->domainListLevels[index + offset];
1116 lowestDomainList->relevantDomainListProcess[domainIndex] = proc;
1117 lowestDomainList->relevantDomainListOriginalIndex[domainIndex] = index + offset;
1118
1119 //printf("[rank %i] Adding relevant domain list node: %i (%f, %f, %f)\n", subDomainKeyTree->rank,
1120 // bodyIndex, particles->x[bodyIndex],
1121 // particles->y[bodyIndex], particles->z[bodyIndex]);
1122 }
1123 offset += stride;
1124 }
1125 }
1126
1127
1128 __global__ void symbolicForce(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles,
1129 DomainList *lowestDomainList, integer *sendIndices, real searchRadius,
1130 integer n, integer m, integer relevantIndex,
1131 Curve::Type curveType) {
1132
1133 integer bodyIndex = threadIdx.x + blockIdx.x * blockDim.x;
1134 integer stride = blockDim.x * gridDim.x;
1135 integer offset = 0;
1136
1137 integer insertIndex;
1138 integer insertIndexOffset;
1139
1140 integer proc, currentChild;
1141 integer childPath;
1142
1143 real dx, min_x, max_x;
1144#if DIM > 1
1145 real dy, min_y, max_y;
1146#if DIM == 3
1147 real dz, min_z, max_z;
1148#endif
1149#endif
1150 real d;
1151
1152 while ((bodyIndex + offset) < n) {
1153
1154
1155 min_x = *tree->minX;
1156 max_x = *tree->maxX;
1157#if DIM > 1
1158 min_y = *tree->minY;
1159 max_y = *tree->maxY;
1160#if DIM == 3
1161 min_z = *tree->minZ;
1162 max_z = *tree->maxZ;
1163#endif
1164#endif
1165
1166 //if (bodyIndex + offset == 0) {
1167 // printf("lowestDomainList->domainListLevels[%i] = %i\n", relevantIndex, lowestDomainList->relevantDomainListLevels[relevantIndex]);
1168 //}
1169
1170 // determine domain list node's bounding box (in order to determine the distance)
1171 //printf("sph: lowestDomainList: (%e, %e, %e)\n", particles->x[lowestDomainList->relevantDomainListIndices[relevantIndex]],
1172 // particles->y[lowestDomainList->relevantDomainListIndices[relevantIndex]], particles->z[lowestDomainList->relevantDomainListIndices[relevantIndex]]);
1173
1174 for (int j = 0; j < lowestDomainList->relevantDomainListLevels[relevantIndex]; j++) {
1175 //childPath = 0;
1176 if (particles->x[lowestDomainList->relevantDomainListIndices[relevantIndex]] < 0.5 * (min_x + max_x)) {
1177 //childPath += 1;
1178 max_x = 0.5 * (min_x + max_x);
1179 } else {
1180 min_x = 0.5 * (min_x + max_x);
1181 }
1182#if DIM > 1
1183 if (particles->y[lowestDomainList->relevantDomainListIndices[relevantIndex]] < 0.5 * (min_y + max_y)) {
1184 //childPath += 2;
1185 max_y = 0.5 * (min_y + max_y);
1186 } else {
1187 min_y = 0.5 * (min_y + max_y);
1188 }
1189#if DIM == 3
1190 if (particles->z[lowestDomainList->relevantDomainListIndices[relevantIndex]] < 0.5 * (min_z + max_z)) {
1191 //childPath += 4;
1192 max_z = 0.5 * (min_z + max_z);
1193 } else {
1194 min_z = 0.5 * (min_z + max_z);
1195 }
1196#endif
1197#endif
1198 }
1199
1200 // x-direction
1201 if (particles->x[bodyIndex + offset] < min_x) {
1202 // outside
1203 dx = particles->x[bodyIndex + offset] - min_x;
1204 } else if (particles->x[bodyIndex + offset] > max_x) {
1205 // outside
1206 dx = particles->x[bodyIndex + offset] - max_x;
1207 } else {
1208 // in between: do nothing
1209 dx = 0;
1210 }
1211#if DIM > 1
1212 // y-direction
1213 if (particles->y[bodyIndex + offset] < min_y) {
1214 // outside
1215 dy = particles->y[bodyIndex + offset] - min_y;
1216 } else if (particles->y[bodyIndex + offset] > max_y) {
1217 // outside
1218 dy = particles->y[bodyIndex + offset] - max_y;
1219 } else {
1220 // in between: do nothing
1221 dy = 0;
1222 }
1223#if DIM == 3
1224 // z-direction
1225 if (particles->z[bodyIndex + offset] < min_z) {
1226 // outside
1227 dz = particles->z[bodyIndex + offset] - min_z;
1228 } else if (particles->z[bodyIndex + offset] > max_z) {
1229 // outside
1230 dz = particles->z[bodyIndex + offset] - max_z;
1231 } else {
1232 // in between: do nothing
1233 dz = 0;
1234 }
1235#endif
1236#endif
1237
1238#if DIM == 1
1239 d = dx*dx;
1240#elif DIM == 2
1241 d = dx*dx + dy*dy;
1242#else
1243 d = dx*dx + dy*dy + dz*dz;
1244#endif
1245
1246 //if ((bodyIndex + offset) % 500 == 0) {
1247 // printf("d = %e < (%e * %e = %e) sml = %e\n", d, searchRadius, searchRadius,
1248 // searchRadius * searchRadius, particles->sml[bodyIndex + offset]);
1249 //}
1250 //if (d < (particles->sml[bodyIndex + offset] * particles->sml[bodyIndex + offset])) {
1251 if (d < (searchRadius * searchRadius)) {
1252 //printf("d = %f < (%f * %f = %f)\n", d, particles->sml[bodyIndex + offset], particles->sml[bodyIndex + offset],
1253 // particles->sml[bodyIndex + offset] * particles->sml[bodyIndex + offset]);
1254
1255 sendIndices[bodyIndex + offset] = 1;
1256 }
1257 //else {
1258 // sendIndices[bodyIndex + offset] = -1;
1259 //}
1260
1261 __threadfence();
1262 offset += stride;
1263 }
1264
1265
1266 }
1267
1268 // TODO: SPH::symbolicForce_test version
1269 // - dispatch all (lowest) domain list nodes for one process directly
1270 // - min, max via memory not via computing
1271 __global__ void symbolicForce_test(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles,
1272 DomainList *lowestDomainList, integer *sendIndices, real searchRadius,
1273 integer n, integer m, integer relevantProc, integer relevantIndicesCounter,
1274 integer relevantIndexOld, Curve::Type curveType) {
1275
1276 integer bodyIndex = threadIdx.x + blockIdx.x * blockDim.x;
1277 integer stride = blockDim.x * gridDim.x;
1278 integer offset = 0;
1279
1280 //integer insertIndex;
1281 //integer insertIndexOffset;
1282
1283 //integer currentChild;
1284 //integer childPath;
1285 integer currentParticleIndex;
1286
1287 real dx, min_x, max_x;
1288#if DIM > 1
1289 real dy, min_y, max_y;
1290#if DIM == 3
1291 real dz, min_z, max_z;
1292#endif
1293#endif
1294 real d;
1295
1296 bool added;
1297
1298 while ((bodyIndex + offset) < n) {
1299
1300 added = false;
1301
1302 for (int relevantIndex = 0; relevantIndex < relevantIndicesCounter; relevantIndex++) {
1303
1304 if (lowestDomainList->relevantDomainListProcess[relevantIndex] != relevantProc) {
1305 continue;
1306 }
1307
1308 if (added) {
1309 break;
1310 }
1311
1312 currentParticleIndex = bodyIndex + offset;
1313
1314 min_x = lowestDomainList->borders[lowestDomainList->relevantDomainListOriginalIndex[relevantIndex]*2*DIM];
1315 max_x = lowestDomainList->borders[lowestDomainList->relevantDomainListOriginalIndex[relevantIndex]*2*DIM+1];
1316#if DIM > 1
1317 min_y = lowestDomainList->borders[lowestDomainList->relevantDomainListOriginalIndex[relevantIndex]*2*DIM+2];
1318 max_y = lowestDomainList->borders[lowestDomainList->relevantDomainListOriginalIndex[relevantIndex]*2*DIM+3];
1319#if DIM == 3
1320 min_z = lowestDomainList->borders[lowestDomainList->relevantDomainListOriginalIndex[relevantIndex]*2*DIM+4];
1321 max_z = lowestDomainList->borders[lowestDomainList->relevantDomainListOriginalIndex[relevantIndex]*2*DIM+5];
1322#endif
1323#endif
1324
1325 //printf("x -> (%e, %e), y -> (%e, %e), z -> (%e, %e)\n", min_x, max_x, min_y, max_y, min_z, max_z);
1326
1327 // x-direction
1328 if (particles->x[currentParticleIndex] < min_x) {
1329 // outside
1330 dx = particles->x[currentParticleIndex] - min_x;
1331 } else if (particles->x[currentParticleIndex] > max_x) {
1332 // outside
1333 dx = particles->x[currentParticleIndex] - max_x;
1334 } else {
1335 // in between: do nothing
1336 dx = 0;
1337 }
1338#if DIM > 1
1339 // y-direction
1340 if (particles->y[currentParticleIndex] < min_y) {
1341 // outside
1342 dy = particles->y[currentParticleIndex] - min_y;
1343 } else if (particles->y[currentParticleIndex] > max_y) {
1344 // outside
1345 dy = particles->y[currentParticleIndex] - max_y;
1346 } else {
1347 // in between: do nothing
1348 dy = 0;
1349 }
1350#if DIM == 3
1351 // z-direction
1352 if (particles->z[currentParticleIndex] < min_z) {
1353 // outside
1354 dz = particles->z[currentParticleIndex] - min_z;
1355 } else if (particles->z[currentParticleIndex] > max_z) {
1356 // outside
1357 dz = particles->z[currentParticleIndex] - max_z;
1358 } else {
1359 // in between: do nothing
1360 dz = 0;
1361 }
1362#endif
1363#endif
1364
1365#if DIM == 1
1366 d = dx*dx;
1367#elif DIM == 2
1368 d = dx*dx + dy*dy;
1369#else
1370 d = dx*dx + dy*dy + dz*dz;
1371#endif
1372
1373 //if ((bodyIndex + offset) % 500 == 0) {
1374 // printf("d = %e < (%e * %e = %e) sml = %e\n", d, searchRadius, searchRadius,
1375 // searchRadius * searchRadius, particles->sml[bodyIndex + offset]);
1376 //}
1377 //if (d < (particles->sml[bodyIndex + offset] * particles->sml[bodyIndex + offset])) {
1378 //if (currentParticleIndex % 100 == 0) {
1379 // printf("d = %e < %e\n", d, searchRadius * searchRadius);
1380 //}
1381 if (d < (searchRadius * searchRadius)) {
1382 //printf("i = %i: d = %e < %e\n", currentParticleIndex, d, searchRadius * searchRadius);
1383 //printf("d = %f < (%f * %f = %f)\n", d, particles->sml[bodyIndex + offset], particles->sml[bodyIndex + offset],
1384 // particles->sml[bodyIndex + offset] * particles->sml[bodyIndex + offset]);
1385 added = true;
1386 sendIndices[currentParticleIndex] = 1;
1387 }
1388 //else {
1389 // sendIndices[bodyIndex + offset] = -1;
1390 //}
1391
1392 }
1393
1394 //__threadfence();
1395 offset += stride;
1396 }
1397
1398
1399 }
1400
1401 __global__ void symbolicForce_test2(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles,
1402 DomainList *domainList, integer *sendIndices, real searchRadius,
1403 integer n, integer m, integer relevantProc, integer relevantIndicesCounter,
1404 Curve::Type curveType) {
1405
1406 int bodyIndex = threadIdx.x + blockIdx.x * blockDim.x;
1407 int stride = blockDim.x * gridDim.x;
1408 int offset = 0; // start with numParticles
1409
1410 int currentParticleIndex, particleLevel, domainListLevel, currentDomainListIndex, childIndex, currentProc;
1411
1412 real min_x, max_x;
1413 real dx;
1414#if DIM > 1
1415 real min_y, max_y;
1416 real dy;
1417#if DIM == 3
1418 real min_z, max_z;
1419 real dz;
1420#endif
1421#endif
1422 real d;
1423
1424 //bool added;
1425 int added = 0;
1426
1427 //printf("Hallo !!!!! %i \n", n);
1428
1429 while ((bodyIndex + offset) < n) {
1430
1431 //added = false;
1432
1433 for (int relevantIndex = 0; relevantIndex < relevantIndicesCounter; relevantIndex++) {
1434
1435
1436 currentProc = domainList->relevantDomainListProcess[relevantIndex];
1437
1438 //if ((added >> currentProc) & 1) {
1439 // continue;
1440 //}
1441
1442
1443 currentParticleIndex = bodyIndex + offset;
1444 //domainListLevel = domainList->relevantDomainListLevels[relevantIndex];
1445 //particleLevel = particles->level[currentParticleIndex];
1446 currentDomainListIndex = domainList->relevantDomainListIndices[relevantIndex];
1447
1448
1449
1450 min_x = domainList->borders[domainList->relevantDomainListOriginalIndex[relevantIndex] * 2 *
1451 DIM];
1452 max_x = domainList->borders[
1453 domainList->relevantDomainListOriginalIndex[relevantIndex] * 2 * DIM +
1454 1];
1455#if DIM > 1
1456 min_y = domainList->borders[
1457 domainList->relevantDomainListOriginalIndex[relevantIndex] * 2 * DIM +
1458 2];
1459 max_y = domainList->borders[
1460 domainList->relevantDomainListOriginalIndex[relevantIndex] * 2 * DIM +
1461 3];
1462#if DIM == 3
1463 min_z = domainList->borders[
1464 domainList->relevantDomainListOriginalIndex[relevantIndex] * 2 * DIM +
1465 4];
1466 max_z = domainList->borders[
1467 domainList->relevantDomainListOriginalIndex[relevantIndex] * 2 * DIM +
1468 5];
1469#endif
1470#endif
1471
1472 // determine (smallest) distance between domain list box and (pseudo-) particle
1473 if (particles->x[currentParticleIndex] < min_x) {
1474 dx = particles->x[currentParticleIndex] - min_x;
1475 } else if (particles->x[currentParticleIndex] > max_x) {
1476 dx = particles->x[currentParticleIndex] - max_x;
1477 } else {
1478 dx = 0.;
1479 }
1480#if DIM > 1
1481 if (particles->y[currentParticleIndex] < min_y) {
1482 dy = particles->y[currentParticleIndex] - min_y;
1483 } else if (particles->y[currentParticleIndex] > max_y) {
1484 dy = particles->y[currentParticleIndex] - max_y;
1485 } else {
1486 dy = 0.;
1487 }
1488#if DIM == 3
1489 if (particles->z[currentParticleIndex] < min_z) {
1490 dz = particles->z[currentParticleIndex] - min_z;
1491 } else if (particles->z[currentParticleIndex] > max_z) {
1492 dz = particles->z[currentParticleIndex] - max_z;
1493 } else {
1494 dz = 0.;
1495 }
1496#endif
1497#endif
1498
1499#if DIM == 1
1500 d = dx*dx;
1501#elif DIM == 2
1502 d = dx*dx + dy*dy;
1503#else
1504 d = dx * dx + dy * dy + dz * dz;
1505#endif
1506
1507 //printf("d = %e < %e\n", d, searchRadius * searchRadius);
1508 if (d < (searchRadius * searchRadius)) {
1509
1510 //printf("adding ...\n");
1511
1512 added = added | (1 << currentProc);
1513 sendIndices[currentParticleIndex] = sendIndices[currentParticleIndex] | (1 << currentProc);
1514
1515 }
1516 }
1517
1518 added = 0;
1519 offset += stride;
1520 }
1521
1522 }
1523
1524
1525 __global__ void collectSendIndices(Tree *tree, Particles *particles, integer *sendIndices,
1526 integer *particles2Send, integer *particlesCount,
1527 integer n, integer length, Curve::Type curveType) {
1528
1529 integer bodyIndex = threadIdx.x + blockIdx.x*blockDim.x;
1530 integer stride = blockDim.x*gridDim.x;
1531 integer offset = 0;
1532
1533 integer particleInsertIndex;
1534
1535 while ((bodyIndex + offset) < length) {
1536
1537 if (sendIndices[bodyIndex + offset] == 1) {
1538 particleInsertIndex = atomicAdd(particlesCount, 1);
1539 particles2Send[particleInsertIndex] = bodyIndex + offset;
1540 //printf("check: sending: (%e, %e, %e) %e\n", particles->x[bodyIndex + offset], particles->y[bodyIndex + offset],
1541 // particles->z[bodyIndex + offset], particles->mass[bodyIndex + offset]);
1542 }
1543
1544 __threadfence();
1545 offset += stride;
1546 }
1547
1548 }
1549
1550 __global__ void collectSendIndices_test2(Tree *tree, Particles *particles, integer *sendIndices,
1551 integer *particles2Send, integer *particlesCount,
1552 integer numParticlesLocal, integer numParticles,
1553 integer treeIndex, int currentProc, Curve::Type curveType) {
1554
1555 integer bodyIndex = threadIdx.x + blockIdx.x*blockDim.x;
1556 integer stride = blockDim.x*gridDim.x;
1557 integer offset = 0;
1558
1559 integer particleInsertIndex;
1560
1561 // it is a particle
1562 while ((bodyIndex + offset) < numParticlesLocal) {
1563 if ((sendIndices[bodyIndex + offset] >> currentProc) & 1) { // TODO: >= 1 or == 1 (was == 1)
1564 if (bodyIndex + offset < numParticlesLocal) {
1565 particleInsertIndex = atomicAdd(particlesCount, 1);
1566 particles2Send[particleInsertIndex] = bodyIndex + offset;
1567 }
1568 }
1569 __threadfence();
1570 offset += stride;
1571 }
1572 }
1573
1574 // deprecated
1575 __global__ void particles2Send(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles,
1576 DomainList *domainList, DomainList *lowestDomainList, integer maxLevel,
1577 integer *toSend, integer *sendCount, integer *alreadyInserted,
1578 integer insertOffset,
1579 integer numParticlesLocal, integer numParticles, integer numNodes, real radius,
1580 Curve::Type curveType) {
1581
1582 integer bodyIndex = threadIdx.x + blockIdx.x * blockDim.x;
1583 integer stride = blockDim.x * gridDim.x;
1584 integer offset = 0;
1585
1586 integer insertIndex;
1587 integer insertIndexOffset;
1588
1589 integer proc, currentChild;
1590 integer childPath;
1591
1592 real dx, min_x, max_x;
1593#if DIM > 1
1594 real dy, min_y, max_y;
1595#if DIM == 3
1596 real dz, min_z, max_z;
1597#endif
1598#endif
1599 real d;
1600
1601 //int alreadyInserted[10];
1602
1603 while ((bodyIndex + offset) < numParticlesLocal) {
1604
1605 if ((bodyIndex + offset) == 0) {
1606 printf("sphParticles2SendKernel: insertOffset = %i\n", insertOffset);
1607 }
1608
1609 //toSend[bodyIndex + offset] = -1;
1610
1611 for (int i = 0; i < subDomainKeyTree->numProcesses; i++) {
1612 alreadyInserted[i] = 0;
1613 }
1614
1615 // loop over (lowest?) domain list nodes
1616 for (int i = 0; i < *lowestDomainList->domainListIndex; i++) {
1617
1618 min_x = *tree->minX;
1619 max_x = *tree->maxX;
1620#if DIM > 1
1621 min_y = *tree->minY;
1622 max_y = *tree->maxY;
1623#if DIM == 3
1624 min_z = *tree->minZ;
1625 max_z = *tree->maxZ;
1626#endif
1627#endif
1628
1629 // TODO: hilbert change!?
1630 proc = KeyNS::key2proc(lowestDomainList->domainListKeys[i], subDomainKeyTree);
1631 // check if (lowest?) domain list node belongs to other process
1632
1633 if (proc != subDomainKeyTree->rank && alreadyInserted[proc] != 1) {
1634
1635 /*int path[MAX_LEVEL];
1636 for (integer j = 0; j <= lowestDomainList->domainListLevels[i]; j++) { //TODO: "<" or "<="
1637 path[j] = (integer) (
1638 lowestDomainList->domainListKeys[i] >> (MAX_LEVEL * DIM - DIM * (j + 1))
1639 & (integer) (POW_DIM - 1));
1640 }
1641
1642 for (integer j = 0; j <= lowestDomainList->domainListLevels[i]; j++) {
1643
1644 currentChild = path[j];
1645
1646 if (currentChild % 2 != 0) {
1647 max_x = 0.5 * (min_x + max_x);
1648 currentChild -= 1;
1649 } else {
1650 min_x = 0.5 * (min_x + max_x);
1651 }
1652#if DIM > 1
1653 if (currentChild % 2 == 0 && currentChild % 4 != 0) {
1654 max_y = 0.5 * (min_y + max_y);
1655 currentChild -= 2;
1656 } else {
1657 min_y = 0.5 * (min_y + max_y);
1658 }
1659#if DIM == 3
1660 if (currentChild == 4) {
1661 max_z = 0.5 * (min_z + max_z);
1662 currentChild -= 4;
1663 } else {
1664 min_z = 0.5 * (min_z + max_z);
1665 }
1666#endif
1667#endif
1668 }*/
1669
1670 // determine domain list node's bounding box (in order to determine the distance)
1671 for (int j = 0; j < lowestDomainList->domainListLevels[i]; j++) {
1672 childPath = 0;
1673 if (particles->x[lowestDomainList->domainListIndices[i]] < 0.5 * (min_x + max_x)) {
1674 //childPath += 1;
1675 max_x = 0.5 * (min_x + max_x);
1676 } else {
1677 min_x = 0.5 * (min_x + max_x);
1678 }
1679#if DIM > 1
1680 if (particles->y[lowestDomainList->domainListIndices[i]] < 0.5 * (min_y + max_y)) {
1681 //childPath += 2;
1682 max_y = 0.5 * (min_y + max_y);
1683 } else {
1684 min_y = 0.5 * (min_y + max_y);
1685 }
1686#if DIM == 3
1687 if (particles->z[lowestDomainList->domainListIndices[i]] < 0.5 * (min_z + max_z)) {
1688 //childPath += 4;
1689 max_z = 0.5 * (min_z + max_z);
1690 } else {
1691 min_z = 0.5 * (min_z + max_z);
1692 }
1693#endif
1694#endif
1695 }
1696
1697 // x-direction
1698 if (particles->x[bodyIndex + offset] < min_x) {
1699 // outside
1700 dx = particles->x[bodyIndex + offset] - min_x;
1701 } else if (particles->x[bodyIndex + offset] > max_x) {
1702 // outside
1703 dx = particles->x[bodyIndex + offset] - max_x;
1704 } else {
1705 // in between: do nothing
1706 dx = 0;
1707 }
1708#if DIM > 1
1709 // y-direction
1710 if (particles->y[bodyIndex + offset] < min_y) {
1711 // outside
1712 dy = particles->y[bodyIndex + offset] - min_y;
1713 } else if (particles->y[bodyIndex + offset] > max_y) {
1714 // outside
1715 dy = particles->y[bodyIndex + offset] - max_y;
1716 } else {
1717 // in between: do nothing
1718 dy = 0;
1719 }
1720#if DIM == 3
1721 // z-direction
1722 if (particles->z[bodyIndex + offset] < min_z) {
1723 // outside
1724 dz = particles->z[bodyIndex + offset] - min_z;
1725 } else if (particles->z[bodyIndex + offset] > max_z) {
1726 // outside
1727 dz = particles->z[bodyIndex + offset] - max_z;
1728 } else {
1729 // in between: do nothing
1730 dz = 0;
1731 }
1732#endif
1733#endif
1734
1735#if DIM == 1
1736 d = dx*dx;
1737#elif DIM == 2
1738 d = dx*dx + dy*dy;
1739#else
1740 d = dx * dx + dy * dy + dz * dz;
1741#endif
1742
1743 if (d < (particles->sml[bodyIndex + offset] * particles->sml[bodyIndex + offset])) {
1744
1745 //printf("d = %f < %f * %f = %f\n", d, particles->sml[bodyIndex + offset], particles->sml[bodyIndex + offset],
1746 // particles->sml[bodyIndex + offset] * particles->sml[bodyIndex + offset]);
1747
1748 insertIndex = atomicAdd(&sendCount[proc], 1);
1749 if (insertIndex > 100000) {
1750 printf("Attention!!! insertIndex: %i\n", insertIndex);
1751 }
1752 insertIndexOffset = insertOffset * proc; //0;
1753 toSend[insertIndexOffset + insertIndex] = bodyIndex + offset;
1754 //printf("[rank %i] toSend[%i] = %i\n", subDomainKeyTree->rank, insertIndexOffset + insertIndex,
1755 // toSend[insertIndexOffset + insertIndex]);
1756 //toSend[proc][insertIndex] = bodyIndex+offset;
1757 /*if (insertIndex % 100 == 0) {
1758 printf("[rank %i] Inserting %i into : %i + %i toSend[%i] = %i\n", s->rank, bodyIndex+offset,
1759 (insertOffset * proc), insertIndex, (insertOffset * proc) + insertIndex,
1760 toSend[(insertOffset * proc) + insertIndex]);
1761 }*/
1762 alreadyInserted[proc] = 1;
1763 //break;
1764 } else {
1765 // else: do nothing
1766 }
1767 }
1768 }
1769
1770 __threadfence();
1771 offset += stride;
1772 }
1773
1774 }
1775
1776 __global__ void collectSendIndicesBackup(integer *toSend, integer *toSendCollected, integer count) {
1777
1778 integer bodyIndex = threadIdx.x + blockIdx.x * blockDim.x;
1779 integer stride = blockDim.x * gridDim.x;
1780 integer offset = 0;
1781
1782 while ((bodyIndex + offset) < count) {
1783 toSendCollected[bodyIndex + offset] = toSend[bodyIndex + offset];
1784 //printf("toSendCollected[%i] = %i\n", bodyIndex + offset, toSendCollected[bodyIndex + offset]);
1785 offset += stride;
1786 }
1787 }
1788
1789 __global__ void collectSendEntriesBackup(SubDomainKeyTree *subDomainKeyTree, real *entry, real *toSend,
1790 integer *sendIndices, integer *sendCount, integer totalSendCount,
1791 integer insertOffset) {
1792
1793 integer bodyIndex = threadIdx.x + blockIdx.x * blockDim.x;
1794 integer stride = blockDim.x * gridDim.x;
1795 integer offset = 0;
1796
1797 integer proc = subDomainKeyTree->rank - 1;
1798 if (proc < 0) {
1799 proc = 0;
1800 }
1801
1802 if ((bodyIndex + offset) == 0) {
1803 printf("[rank %i] sendCount(%i, %i)\n", subDomainKeyTree->rank, sendCount[0], sendCount[1]);
1804 }
1805
1806 //bodyIndex += proc*insertOffset;
1807
1808 while ((bodyIndex + offset) < totalSendCount) {
1809 toSend[bodyIndex + offset] = entry[sendIndices[bodyIndex + offset]];
1810 printf("toSend[%i] = %f sendIndices = %i (insertOffset = %i)\n", bodyIndex + offset, toSend[bodyIndex + offset],
1811 sendIndices[bodyIndex + offset], insertOffset);
1812 offset += stride;
1813 }
1814 }
1815
1816 /*while ((bodyIndex + offset) < tree->toDeleteLeaf[1]) {
1817
1818 // copy bounding box(es)
1819 min_x = *tree->minX;
1820 max_x = *tree->maxX;
1821#if DIM > 1
1822 min_y = *tree->minY;
1823 max_y = *tree->maxY;
1824#if DIM == 3
1825 min_z = *tree->minZ;
1826 max_z = *tree->maxZ;
1827#endif
1828#endif
1829 temp = 0;
1830 childPath = 0;
1831
1832 // find insertion point for body
1833 if (particles->x[bodyIndex + offset] < 0.5 * (min_x + max_x)) { // x direction
1834 childPath += 1;
1835 max_x = 0.5 * (min_x + max_x);
1836 }
1837 else {
1838 min_x = 0.5 * (min_x + max_x);
1839 }
1840#if DIM > 1
1841 if (particles->y[bodyIndex + offset] < 0.5 * (min_y + max_y)) { // y direction
1842 childPath += 2;
1843 max_y = 0.5 * (min_y + max_y);
1844 }
1845 else {
1846 min_y = 0.5 * (min_y + max_y);
1847 }
1848#if DIM == 3
1849 if (particles->z[bodyIndex + offset] < 0.5 * (min_z + max_z)) { // z direction
1850 childPath += 4;
1851 max_z = 0.5 * (min_z + max_z);
1852 }
1853 else {
1854 min_z = 0.5 * (min_z + max_z);
1855 }
1856#endif
1857#endif
1858
1859
1860 integer childIndex = tree->child[temp*POW_DIM + childPath];
1861
1862 printf("rank[%i] child = %i childIndex = %i inserting x = (%f, %f, %f) %f\n", subDomainKeyTree->rank,
1863 childPath, childIndex, particles->x[bodyIndex + offset],
1864 particles->y[bodyIndex + offset],
1865 particles->z[bodyIndex + offset],
1866 particles->mass[bodyIndex + offset]);
1867
1868 offset += stride;
1869
1870 } */
1871
1872
1873#define COMPUTE_DIRECTLY 0
1874
1875 __global__ void insertReceivedParticles(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles,
1876 DomainList *domainList, DomainList *lowestDomainList, int n, int m) {
1877
1878
1879 integer bodyIndex = threadIdx.x + blockIdx.x * blockDim.x;
1880 integer stride = blockDim.x * gridDim.x;
1881
1882 //note: -1 used as "null pointer"
1883 //note: -2 used to lock a child (pointer)
1884
1885 volatile integer *childList = tree->child;
1886
1887 integer offset;
1888 int level;
1889 bool newBody = true;
1890
1891 real min_x;
1892 real max_x;
1893 real x;
1894#if DIM > 1
1895 real y;
1896 real min_y;
1897 real max_y;
1898#if DIM == 3
1899 real z;
1900 real min_z;
1901 real max_z;
1902#endif
1903#endif
1904
1905 integer childPath;
1906 integer temp;
1907
1908 offset = tree->toDeleteLeaf[0];
1909
1910 while ((bodyIndex + offset) < tree->toDeleteLeaf[1]) {
1911
1912 if (newBody) {
1913
1914 newBody = false;
1915 level = 0;
1916
1917 //printf("check: inserting: (%e, %e, %e) %e\n", particles->x[bodyIndex + offset], particles->y[bodyIndex + offset],
1918 // particles->z[bodyIndex + offset], particles->mass[bodyIndex + offset]);
1919
1920 // copy bounding box(es)
1921 min_x = *tree->minX;
1922 max_x = *tree->maxX;
1923 x = particles->x[bodyIndex + offset];
1924#if DIM > 1
1925 y = particles->y[bodyIndex + offset];
1926 min_y = *tree->minY;
1927 max_y = *tree->maxY;
1928#if DIM == 3
1929 z = particles->z[bodyIndex + offset];
1930 min_z = *tree->minZ;
1931 max_z = *tree->maxZ;
1932#endif
1933#endif
1934 temp = 0;
1935 childPath = 0;
1936
1937 // find insertion point for body
1938 //if (particles->x[bodyIndex + offset] < 0.5 * (min_x + max_x)) { // x direction
1939 if (x < 0.5 * (min_x + max_x)) { // x direction
1940 childPath += 1;
1941 max_x = 0.5 * (min_x + max_x);
1942 }
1943 else {
1944 min_x = 0.5 * (min_x + max_x);
1945 }
1946#if DIM > 1
1947 //if (particles->y[bodyIndex + offset] < 0.5 * (min_y + max_y)) { // y direction
1948 if (y < 0.5 * (min_y + max_y)) { // y direction
1949 childPath += 2;
1950 max_y = 0.5 * (min_y + max_y);
1951 }
1952 else {
1953 min_y = 0.5 * (min_y + max_y);
1954 }
1955#if DIM == 3
1956 //if (particles->z[bodyIndex + offset] < 0.5 * (min_z + max_z)) { // z direction
1957 if (z < 0.5 * (min_z + max_z)) { // z direction
1958 childPath += 4;
1959 max_z = 0.5 * (min_z + max_z);
1960 }
1961 else {
1962 min_z = 0.5 * (min_z + max_z);
1963 }
1964#endif
1965#endif
1966 }
1967
1968 register integer childIndex = childList[temp*POW_DIM + childPath];
1969
1970 // traverse tree until hitting leaf node
1971 while (childIndex >= m) { //n
1972
1973 temp = childIndex;
1974 level++;
1975
1976 //if (particles->level[temp] == -1) {
1977 // printf("level[%i] = %i...\n", temp, particles->level[temp]);
1978 // assert(0);
1979 //}
1980
1981 childPath = 0;
1982
1983 // find insertion point for body
1984 if (x < 0.5 * (min_x + max_x)) { // x direction
1985 childPath += 1;
1986 max_x = 0.5 * (min_x + max_x);
1987 }
1988 else {
1989 min_x = 0.5 * (min_x + max_x);
1990 }
1991#if DIM > 1
1992 if (y < 0.5 * (min_y + max_y)) { // y direction
1993 childPath += 2;
1994 max_y = 0.5 * (min_y + max_y);
1995 }
1996 else {
1997 min_y = 0.5 * (min_y + max_y);
1998 }
1999#if DIM == 3
2000 if (z < 0.5 * (min_z + max_z)) { // z direction
2001 childPath += 4;
2002 max_z = 0.5 * (min_z + max_z);
2003 }
2004 else {
2005 min_z = 0.5 * (min_z + max_z);
2006 }
2007#endif
2008#endif
2009
2010#if COMPUTE_DIRECTLY
2011 if (particles->mass[bodyIndex + offset] != 0) {
2012 //particles->x[temp] += particles->weightedEntry(bodyIndex + offset, Entry::x);
2013 atomicAdd(&particles->x[temp], particles->weightedEntry(bodyIndex + offset, Entry::x));
2014#if DIM > 1
2015 //particles->y[temp] += particles->weightedEntry(bodyIndex + offset, Entry::y);
2016 atomicAdd(&particles->y[temp], particles->weightedEntry(bodyIndex + offset, Entry::y));
2017#if DIM == 3
2018 //particles->z[temp] += particles->weightedEntry(bodyIndex + offset, Entry::z);
2019 atomicAdd(&particles->z[temp], particles->weightedEntry(bodyIndex + offset, Entry::z));
2020#endif
2021#endif
2022 }
2023
2024 //particles->mass[temp] += particles->mass[bodyIndex + offset];
2025 atomicAdd(&particles->mass[temp], particles->mass[bodyIndex + offset]);
2026#endif // COMPUTE_DIRECTLY
2027
2028 atomicAdd(&tree->count[temp], 1);
2029 childIndex = childList[POW_DIM * temp + childPath];
2030 }
2031
2032 __syncthreads();
2033
2034 // if child is not locked
2035 if (childIndex != -2) {
2036
2037 integer locked = temp * POW_DIM + childPath;
2038
2039 if (atomicCAS((int *)&childList[locked], childIndex, -2) == childIndex) {
2040
2041 // check whether a body is already stored at the location
2042 if (childIndex == -1) {
2043 //insert body and release lock
2044 childList[locked] = bodyIndex + offset;
2045 particles->level[bodyIndex + offset] = level + 1;
2046
2047 }
2048 else {
2049 if (childIndex >= n) {
2050 printf("ATTENTION!\n");
2051 }
2052 integer patch = POW_DIM * m; //8*n
2053 while (childIndex >= 0 && childIndex < n) { // was n
2054
2055 //create a new cell (by atomically requesting the next unused array index)
2056 integer cell = atomicAdd(tree->index, 1);
2057 patch = min(patch, cell);
2058
2059 if (patch != cell) {
2060 childList[POW_DIM * temp + childPath] = cell;
2061 }
2062
2063 level++;
2064 // ATTENTION: most likely a problem with level counting (level = level - 1)
2065 // but could be also a problem regarding maximum tree depth...
2066 if (level > (MAX_LEVEL + 1)) {
2067#if DIM == 1
2068 cudaAssert("buildTree: level = %i for index %i (%e)", level,
2069 bodyIndex + offset, particles->x[bodyIndex + offset]);
2070#elif DIM == 2
2071 cudaAssert("buildTree: level = %i for index %i (%e, %e)", level,
2072 bodyIndex + offset, particles->x[bodyIndex + offset],
2073 particles->y[bodyIndex + offset]);
2074#else
2075 cudaAssert("buildTree: level = %i for index %i (%e, %e, %e)", level,
2076 bodyIndex + offset, particles->x[bodyIndex + offset],
2077 particles->y[bodyIndex + offset],
2078 particles->z[bodyIndex + offset]);
2079#endif
2080 }
2081
2082 // insert old/original particle
2083 childPath = 0;
2084 if (particles->x[childIndex] < 0.5 * (min_x + max_x)) { childPath += 1; }
2085#if DIM > 1
2086 if (particles->y[childIndex] < 0.5 * (min_y + max_y)) { childPath += 2; }
2087#if DIM == 3
2088 if (particles->z[childIndex] < 0.5 * (min_z + max_z)) { childPath += 4; }
2089#endif
2090#endif
2091#if COMPUTE_DIRECTLY
2092 particles->x[cell] += particles->weightedEntry(childIndex, Entry::x);
2093 //particles->x[cell] += particles->weightedEntry(childIndex, Entry::x);
2094#if DIM > 1
2095 particles->y[cell] += particles->weightedEntry(childIndex, Entry::y);
2096 //particles->y[cell] += particles->weightedEntry(childIndex, Entry::y);
2097#if DIM == 3
2098 particles->z[cell] += particles->weightedEntry(childIndex, Entry::z);
2099 //particles->z[cell] += particles->weightedEntry(childIndex, Entry::z);
2100#endif
2101#endif
2102
2103 //if (cell % 1000 == 0) {
2104 // printf("buildTree: x[%i] = (%f, %f, %f) from x[%i] = (%f, %f, %f) m = %f\n", cell, particles->x[cell], particles->y[cell],
2105 // particles->z[cell], childIndex, particles->x[childIndex], particles->y[childIndex],
2106 // particles->z[childIndex], particles->mass[childIndex]);
2107 //}
2108
2109 particles->mass[cell] += particles->mass[childIndex];
2110#else // COMPUTE_DIRECTLY
2111 //particles->x[cell] = particles->x[childIndex];
2112 particles->x[cell] = 0.5 * (min_x + max_x);
2113#if DIM > 1
2114 //particles->y[cell] = particles->y[childIndex];
2115 particles->y[cell] = 0.5 * (min_y + max_y);
2116#if DIM == 3
2117 //particles->z[cell] = particles->z[childIndex];
2118 particles->z[cell] = 0.5 * (min_z + max_z);
2119#endif
2120#endif
2121
2122#endif // COMPUTE_DIRECTLY
2123
2124 tree->count[cell] += tree->count[childIndex];
2125
2126 childList[POW_DIM * cell + childPath] = childIndex;
2127 particles->level[cell] = level;
2128 particles->level[childIndex] += 1;
2129 tree->start[cell] = -1;
2130
2131#if DEBUGGING
2132 if (particles->level[cell] >= particles->level[childIndex]) {
2133 printf("lvl: %i vs. %i\n", particles->level[cell], particles->level[childIndex]);
2134 assert(0);
2135 }
2136#endif
2137
2138 // insert new particle
2139 temp = cell;
2140 childPath = 0;
2141
2142 // find insertion point for body
2143 //if (particles->x[bodyIndex + offset] < 0.5 * (min_x + max_x)) {
2144 if (x < 0.5 * (min_x + max_x)) {
2145 childPath += 1;
2146 max_x = 0.5 * (min_x + max_x);
2147 } else {
2148 min_x = 0.5 * (min_x + max_x);
2149 }
2150#if DIM > 1
2151 //if (particles->y[bodyIndex + offset] < 0.5 * (min_y + max_y)) {
2152 if (y < 0.5 * (min_y + max_y)) {
2153 childPath += 2;
2154 max_y = 0.5 * (min_y + max_y);
2155 } else {
2156 min_y = 0.5 * (min_y + max_y);
2157 }
2158#if DIM == 3
2159 //if (particles->z[bodyIndex + offset] < 0.5 * (min_z + max_z)) {
2160 if (z < 0.5 * (min_z + max_z)) {
2161 childPath += 4;
2162 max_z = 0.5 * (min_z + max_z);
2163 } else {
2164 min_z = 0.5 * (min_z + max_z);
2165 }
2166#endif
2167#endif
2168#if COMPUTE_DIRECTLY
2169 // COM / preparing for calculation of COM
2170 if (particles->mass[bodyIndex + offset] != 0) {
2171 //particles->x[cell] += particles->weightedEntry(bodyIndex + offset, Entry::x);
2172 particles->x[cell] += particles->weightedEntry(bodyIndex + offset, Entry::x);
2173#if DIM > 1
2174 //particles->y[cell] += particles->weightedEntry(bodyIndex + offset, Entry::y);
2175 particles->y[cell] += particles->weightedEntry(bodyIndex + offset, Entry::y);
2176#if DIM == 3
2177 //particles->z[cell] += particles->weightedEntry(bodyIndex + offset, Entry::z);
2178 particles->z[cell] += particles->weightedEntry(bodyIndex + offset, Entry::z);
2179#endif
2180#endif
2181 particles->mass[cell] += particles->mass[bodyIndex + offset];
2182 }
2183#endif // COMPUTE_DIRECTLY
2184 tree->count[cell] += tree->count[bodyIndex + offset];
2185 childIndex = childList[POW_DIM * temp + childPath];
2186 }
2187
2188 childList[POW_DIM * temp + childPath] = bodyIndex + offset;
2189 particles->level[bodyIndex + offset] = level + 1;
2190
2191 __threadfence(); // written to global memory arrays (child, x, y, mass) thus need to fence
2192 childList[locked] = patch;
2193 }
2194 offset += stride;
2195 newBody = true;
2196 }
2197 }
2198 __syncthreads(); //TODO: __syncthreads() needed?
2199 }
2200 }
2201
2202 /*__global__ void insertReceivedParticles(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles,
2203 DomainList *domainList, DomainList *lowestDomainList, int n, int m) {
2204
2205 integer bodyIndex = threadIdx.x + blockIdx.x * blockDim.x;
2206 integer stride = blockDim.x * gridDim.x;
2207
2208 //note: -1 used as "null pointer"
2209 //note: -2 used to lock a child (pointer)
2210
2211 integer offset;
2212 int level;
2213 //printf("newBody = %d\n", newBody);
2214
2215 volatile real min_x;
2216 volatile real max_x;
2217 volatile real x;
2218#if DIM > 1
2219 volatile real y;
2220 volatile real min_y;
2221 volatile real max_y;
2222#if DIM == 3
2223 volatile real z;
2224 volatile real min_z;
2225 volatile real max_z;
2226#endif
2227#endif
2228
2229
2230 integer childPath;
2231 integer temp;
2232
2233 offset = tree->toDeleteLeaf[0];
2234
2235 //bool newBody = true;
2236 //volatile int
2237 bool newBody;
2238
2239 while ((bodyIndex + offset) < tree->toDeleteLeaf[1]) {
2240
2241#if DEBUGGING
2242 printf("[rank %i] sph(%i) START newBody = %d offset = %i [rank %i] toDelete = %i - %i\n", subDomainKeyTree->rank, bodyIndex + offset, newBody, offset,
2243 subDomainKeyTree->rank, tree->toDeleteLeaf[0], tree->toDeleteLeaf[1]);
2244#endif
2245
2246 if (newBody) {
2247
2248 //printf("[rank %i] sph(%i) START newBody::newBody = %d\n", subDomainKeyTree->rank, bodyIndex + offset, newBody);
2249 newBody = false;
2250 level = 0;
2251
2252 // copy bounding box(es)
2253 min_x = *tree->minX;
2254 max_x = *tree->maxX;
2255 x = particles->x[bodyIndex + offset];
2256#if DIM > 1
2257 y = particles->y[bodyIndex + offset];
2258 min_y = *tree->minY;
2259 max_y = *tree->maxY;
2260#if DIM == 3
2261 z = particles->z[bodyIndex + offset];
2262 min_z = *tree->minZ;
2263 max_z = *tree->maxZ;
2264#endif
2265#endif
2266 temp = 0;
2267 childPath = 0;
2268
2269 // find insertion point for body
2270 //if (particles->x[bodyIndex + offset] < 0.5 * (min_x + max_x)) { // x direction
2271 if (x < 0.5 * (min_x + max_x)) { // x direction
2272 childPath += 1;
2273 max_x = 0.5 * (min_x + max_x);
2274 }
2275 else {
2276 min_x = 0.5 * (min_x + max_x);
2277 }
2278#if DIM > 1
2279 //if (particles->y[bodyIndex + offset] < 0.5 * (min_y + max_y)) { // y direction
2280 if (y < 0.5 * (min_y + max_y)) { // y direction
2281 childPath += 2;
2282 max_y = 0.5 * (min_y + max_y);
2283 }
2284 else {
2285 min_y = 0.5 * (min_y + max_y);
2286 }
2287#if DIM == 3
2288 //if (particles->z[bodyIndex + offset] < 0.5 * (min_z + max_z)) { // z direction
2289 if (z < 0.5 * (min_z + max_z)) { // z direction
2290 childPath += 4;
2291 max_z = 0.5 * (min_z + max_z);
2292 }
2293 else {
2294 min_z = 0.5 * (min_z + max_z);
2295 }
2296#endif
2297#endif
2298 }
2299
2300 integer childIndex = tree->child[temp*POW_DIM + childPath];
2301
2302#if DIM == 3
2303 if (particles->x[bodyIndex + offset] > max_x || particles->x[bodyIndex + offset] < min_x ||
2304 particles->y[bodyIndex + offset] > max_y || particles->y[bodyIndex + offset] < min_y ||
2305 particles->z[bodyIndex + offset] > max_z || particles->z[bodyIndex + offset] < min_z) {
2306 printf("[rank %i] sph(%i) out of box (%e, %e, %e) min/max = (%e - %e, %e - %e, %e - %e) level = %i\n",
2307 subDomainKeyTree->rank, bodyIndex + offset,
2308 particles->x[bodyIndex + offset], particles->y[bodyIndex + offset], particles->z[bodyIndex + offset],
2309 min_x, max_x, min_y, max_y, min_z, max_z, level);
2310 //assert(0);
2311 cudaAssert("[rank %i] sph index: %i out of box!\n", subDomainKeyTree->rank, bodyIndex + offset);
2312 }
2313
2314//#if DEBUGGING
2315 //if (childIndex != -2 && childIndex != -1 &&
2316 // (particles->x[childIndex] > max_x || particles->x[childIndex] < min_x ||
2317 // particles->y[childIndex] > max_y || particles->y[childIndex] < min_y ||
2318 // particles->z[childIndex] > max_z || particles->z[childIndex] < min_z)) {
2319 // printf("[rank %i] sph(%i) out of box for childIndex = %i (%e, %e, %e) min/max = (%e - %e, %e - %e, %e - %e) level = %i\n", subDomainKeyTree->rank, bodyIndex + offset,
2320 // childIndex, particles->x[childIndex], particles->y[childIndex], particles->z[childIndex],
2321 // min_x, max_x, min_y, max_y, min_z, max_z, level);
2322 // //assert(0);
2323 //}
2324//#endif
2325#endif
2326
2327#if DEBUGGING
2328 printf("[rank %i] sph(%i) childIndex = %i temp = %i childPath = %i (%e, %e, %e) vs (%e, %e, %e) min/max = (%e - %e, %e - %e, %e - %e\n",
2329 subDomainKeyTree->rank, bodyIndex + offset, childIndex, temp, childPath,
2330 particles->x[bodyIndex + offset], particles->y[bodyIndex + offset], particles->z[bodyIndex + offset],
2331 particles->x[childIndex], particles->y[childIndex], particles->z[childIndex],
2332 min_x, max_x, min_y, max_y, min_z, max_z);
2333#endif
2334
2335 // traverse tree until hitting leaf node
2336 while (childIndex >= m) { //n
2337
2338 temp = childIndex;
2339 level++;
2340
2341 childPath = 0;
2342
2343 // find insertion point for body
2344 //if (particles->x[bodyIndex + offset] < 0.5 * (min_x + max_x)) { // x direction
2345 if (x < 0.5 * (min_x + max_x)) { // x direction
2346 childPath += 1;
2347 max_x = 0.5 * (min_x + max_x);
2348 }
2349 else {
2350 min_x = 0.5 * (min_x + max_x);
2351 }
2352#if DIM > 1
2353 //if (particles->y[bodyIndex + offset] < 0.5 * (min_y + max_y)) { // y direction
2354 if (y < 0.5 * (min_y + max_y)) { // y direction
2355 childPath += 2;
2356 max_y = 0.5 * (min_y + max_y);
2357 }
2358 else {
2359 min_y = 0.5 * (min_y + max_y);
2360 }
2361#if DIM == 3
2362 //if (particles->z[bodyIndex + offset] < 0.5 * (min_z + max_z)) { // z direction
2363 if (z < 0.5 * (min_z + max_z)) { // z direction
2364 childPath += 4;
2365 max_z = 0.5 * (min_z + max_z);
2366 }
2367 else {
2368 min_z = 0.5 * (min_z + max_z);
2369 }
2370#endif
2371#endif
2372
2373 //atomicAdd(&tree->count[temp], 1);
2374
2375 childIndex = tree->child[POW_DIM * temp + childPath];
2376#if DEBUGGING
2377 printf("[rank %i] sph(%i) childIndex = %i level = %i (temp = %i)\n", subDomainKeyTree->rank, bodyIndex + offset, childIndex, level, temp);
2378#endif
2379 }
2380
2381 // if child is not locked
2382 if (childIndex != -2) {
2383
2384 integer locked = temp * POW_DIM + childPath;
2385
2386 if (atomicCAS(&tree->child[locked], childIndex, -2) == childIndex) {
2387
2388 // check whether a body is already stored at the location
2389 if (childIndex == -1) {
2390#if DEBUGGING
2391 printf("[rank %i] sph(%i) inserting for temp = %i and childPath %i... childIndex = %i level = %i\n", subDomainKeyTree->rank, bodyIndex + offset, temp, childPath, childIndex, level);
2392#endif
2393 //insert body and release lock
2394 tree->child[locked] = bodyIndex + offset;
2395 particles->level[bodyIndex + offset] = level + 1;
2396
2397 }
2398 else {
2399 //if (childIndex >= n) {
2400 // printf("ATTENTION!\n");
2401 //}
2402 integer patch = POW_DIM * m; //8*n
2403 while (childIndex >= 0 && childIndex < n) { // was n
2404
2405 //create a new cell (by atomically requesting the next unused array index)
2406 integer cell = atomicAdd(tree->index, 1);
2407 //printf("cell = %i\n", cell);
2408 patch = min(patch, cell);
2409
2410 if (patch != cell) {
2411 tree->child[POW_DIM * temp + childPath] = cell;
2412 }
2413
2414 level++;
2415
2416 if (level > (MAX_LEVEL + 1)) {
2417 printf("[rank %i] sph(%i) assert... childIndex = %i cell = %i (type: %i) temp = %i (type: %i) level = %i (%i, %i)\n",
2418 subDomainKeyTree->rank, bodyIndex + offset, childIndex, cell,
2419 particles->nodeType[cell], temp, particles->nodeType[temp], level,
2420 tree->toDeleteLeaf[0], tree->toDeleteLeaf[1]);
2421
2422 printf("level = %i for index %i (%e, %e, %e)\n", level,
2423 bodyIndex + offset, particles->x[bodyIndex + offset],
2424 particles->y[bodyIndex + offset], particles->z[bodyIndex + offset]);
2425 cudaAssert("level = %i > MAX_LEVEL %i\n", level, MAX_LEVEL);
2426 }
2427
2428#if DEBUGGING
2429 // debugging
2430 //if (particles->x[bodyIndex + offset] == particles->x[childIndex] &&
2431 // particles->y[bodyIndex + offset] == particles->y[childIndex] &&
2432 // particles->z[bodyIndex + offset] == particles->z[childIndex]) {
2433 // printf("duplicate!!!: %i vs %i (%e, %e, %e) vs (%e, %e, %e)\n",
2434 // bodyIndex + offset, childIndex, particles->x[bodyIndex + offset],
2435 // particles->y[bodyIndex + offset], particles->z[bodyIndex + offset],
2436 // particles->x[childIndex], particles->y[childIndex],
2437 // particles->z[childIndex]);
2438 //}
2439#endif
2440
2441 // insert old/original particle
2442 childPath = 0;
2443 if (particles->x[childIndex] < 0.5 * (min_x + max_x)) { childPath += 1; }
2444#if DIM > 1
2445 if (particles->y[childIndex] < 0.5 * (min_y + max_y)) { childPath += 2; }
2446#if DIM == 3
2447 if (particles->z[childIndex] < 0.5 * (min_z + max_z)) { childPath += 4; }
2448#endif
2449#endif
2450
2451 //particles->x[cell] = particles->x[childIndex];
2452 particles->x[cell] = 0.5 * (min_x + max_x);
2453#if DIM > 1
2454 //particles->y[cell] = particles->y[childIndex];
2455 particles->y[cell] = 0.5 * (min_y + max_y);
2456#if DIM == 3
2457 //particles->z[cell] = particles->z[childIndex];
2458 particles->z[cell] = 0.5 * (min_z + max_z);
2459#endif
2460#endif
2461
2462 tree->child[POW_DIM * cell + childPath] = childIndex;
2463 //particles->level[temp] = level;
2464
2465#if DEBUGGING
2466 printf("[rank %i] sph(%i) inbetween POW_DIM * %i + %i = %i (%e, %e, %e) vs (%e, %e, %e) min/max = (%e - %e, %e - %e, %e - %e)\n", subDomainKeyTree->rank,
2467 bodyIndex + offset, cell, childPath, childIndex,
2468 particles->x[bodyIndex + offset], particles->y[bodyIndex + offset], particles->z[bodyIndex + offset],
2469 particles->x[childIndex], particles->y[childIndex], particles->z[childIndex],
2470 min_x, max_x, min_y, max_y, min_z, max_z);
2471#endif
2472 particles->level[cell] = level;
2473 particles->level[childIndex] += 1;
2474 tree->start[cell] = -1;
2475
2476 // insert new particle
2477 temp = cell;
2478 childPath = 0;
2479
2480 // find insertion point for body
2481 //if (particles->x[bodyIndex + offset] < 0.5 * (min_x + max_x)) {
2482 if (x < 0.5 * (min_x + max_x)) {
2483 childPath += 1;
2484 max_x = 0.5 * (min_x + max_x);
2485 } else {
2486 min_x = 0.5 * (min_x + max_x);
2487 }
2488#if DIM > 1
2489 //if (particles->y[bodyIndex + offset] < 0.5 * (min_y + max_y)) {
2490 if (y < 0.5 * (min_y + max_y)) {
2491 childPath += 2;
2492 max_y = 0.5 * (min_y + max_y);
2493 } else {
2494 min_y = 0.5 * (min_y + max_y);
2495 }
2496#if DIM == 3
2497 //if (particles->z[bodyIndex + offset] < 0.5 * (min_z + max_z)) {
2498 if (z < 0.5 * (min_z + max_z)) {
2499 childPath += 4;
2500 max_z = 0.5 * (min_z + max_z);
2501 } else {
2502 min_z = 0.5 * (min_z + max_z);
2503 }
2504#endif
2505#endif
2506
2507 tree->count[cell] += tree->count[bodyIndex + offset];
2508 childIndex = tree->child[POW_DIM * temp + childPath];
2509#if DEBUGGING
2510 printf("[rank %i] sph(%i) within childIndex = %i level = %i temp = %i childPath = %i -> %i\n",
2511 subDomainKeyTree->rank, bodyIndex + offset, childIndex, level, temp, childPath, tree->child[POW_DIM * temp + childPath]);
2512#endif
2513 }
2514
2515 tree->child[POW_DIM * temp + childPath] = bodyIndex + offset;
2516 particles->level[bodyIndex + offset] = level + 1;
2517#if DEBUGGING
2518 printf("[rank %i] sph(%i) set temp = %i + childPath = %i = index = %i (level = %i) x[%i] = (%e, %e, %e)\n",
2519 subDomainKeyTree->rank, bodyIndex + offset, temp, childPath, bodyIndex + offset, level, temp,
2520 particles->x[temp], particles->y[temp], particles->z[temp]);
2521#endif
2522
2523 __threadfence(); // written to global memory arrays (child, x, y, mass) thus need to fence
2524 tree->child[locked] = patch;
2525 }
2526#if DEBUGGING
2527 printf("[rank %i] sph(%i) newBody = %d, offset+=stride (= %i) = %i\n", subDomainKeyTree->rank, bodyIndex + offset, newBody, stride, offset + stride);
2528#endif
2529 offset += stride;
2530 newBody = true;
2531#if DEBUGGING
2532 printf("[rank %i] sph(%i) newBody = %d, after offset+=stride (= %i) = %i\n", subDomainKeyTree->rank, bodyIndex + offset, newBody, stride, offset);
2533#endif
2534 }
2535 //else {
2536 // //printf("WTF!\n");
2537 // //newBody = 1; //TODO: needed?
2538 //}
2539 //}
2540 //else {
2541//#if DEBUGGING
2542// printf("[rank %i] sph(%i) waiting... childIndex = %i level = %i newBody = %d\n", subDomainKeyTree->rank, bodyIndex + offset, childIndex, level,
2543// newBody);
2544//#endif
2545 }
2546 __syncthreads(); //TODO: needed?
2547 }
2548 }
2549 */
2550
2551 __global__ void calculateCentersOfMass(Tree *tree, Particles *particles, integer level) {
2552
2553 integer bodyIndex = threadIdx.x + blockIdx.x * blockDim.x;
2554 integer stride = blockDim.x * gridDim.x;
2555
2556 integer offset = tree->toDeleteNode[0];
2557
2558 //int counter[21];
2559 //for (int i=0; i<21;i++) {
2560 // counter[i] = 0;
2561 //}
2562
2563 integer index;
2564
2565 while ((bodyIndex + offset) < tree->toDeleteNode[1]) {
2566
2567 if (particles->level[bodyIndex + offset] == level) {
2568
2569 if (particles->level[bodyIndex + offset] == -1 || particles->level[bodyIndex + offset] > 21) {
2570 printf("level[%i] = %i!!!\n", bodyIndex + offset, particles->level[bodyIndex + offset]);
2571 }
2572
2573 particles->mass[bodyIndex + offset] = 0.;
2574 particles->x[bodyIndex + offset] = 0.;
2575#if DIM > 1
2576 particles->y[bodyIndex + offset] = 0.;
2577#if DIM == 3
2578 particles->z[bodyIndex + offset] = 0.;
2579#endif
2580#endif
2581
2582 for (int child = 0; child < POW_DIM; ++child) {
2583 index = POW_DIM * (bodyIndex + offset) + child;
2584 if (tree->child[index] != -1) {
2585 particles->x[bodyIndex + offset] += particles->weightedEntry(tree->child[index], Entry::x);
2586#if DIM > 1
2587 particles->y[bodyIndex + offset] += particles->weightedEntry(tree->child[index], Entry::y);
2588#if DIM == 3
2589 particles->z[bodyIndex + offset] += particles->weightedEntry(tree->child[index], Entry::z);
2590#endif
2591#endif
2592 particles->mass[bodyIndex + offset] += particles->mass[tree->child[index]];
2593 }
2594 }
2595
2596 if (particles->mass[bodyIndex + offset] > 0.) {
2597 particles->x[bodyIndex + offset] /= particles->mass[bodyIndex + offset];
2598#if DIM > 1
2599 particles->y[bodyIndex + offset] /= particles->mass[bodyIndex + offset];
2600#if DIM == 3
2601 particles->z[bodyIndex + offset] /= particles->mass[bodyIndex + offset];
2602#endif
2603#endif
2604 }
2605
2606 //counter[particles->level[bodyIndex + offset]] += 1;
2607
2608 }
2609 offset += stride;
2610 }
2611
2612 //for (int i=0; i<21;i++) {
2613 // printf("counter[%i] = %i\n", i, counter[i]);
2614 //}
2615
2616 }
2617
2618 // TODO: use memory bounding boxes ...
2619 __global__ void determineSearchRadii(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles,
2620 DomainList *domainList, DomainList *lowestDomainList, real *searchRadii,
2621 int n, int m, Curve::Type curveType) {
2622
2623 integer bodyIndex = threadIdx.x + blockIdx.x * blockDim.x;
2624 integer stride = blockDim.x * gridDim.x;
2625 integer offset = 0;
2626
2627 int lowestDomainIndex;
2628 real searchRadius;
2629 int path;
2630 real distance;
2631 keyType key;
2632 int proc;
2633
2634 real min_x, max_x, dx;
2635#if DIM > 1
2636 real min_y, max_y, dy;
2637#if DIM == 3
2638 real min_z, max_z, dz;
2639#endif
2640#endif
2641
2642 while ((bodyIndex + offset) < n) {
2643
2644 searchRadius = 0.;
2645
2646 //for (int i=0; i<*lowestDomainList->domainListIndex; i++) {
2647 for (int i=0; i<*lowestDomainList->domainListCounter; i++) {
2648 //lowestDomainIndex = lowestDomainList->domainListIndices[i];
2649 lowestDomainIndex = lowestDomainList->relevantDomainListIndices[i];
2650 //key = tree->getParticleKey(particles, lowestDomainIndex, MAX_LEVEL, curveType);
2651 //proc = subDomainKeyTree->key2proc(key);
2652 proc = lowestDomainList->relevantDomainListProcess[i];
2653 if (proc != subDomainKeyTree->rank) {
2654 // determine distance
2655 min_x = *tree->minX;
2656 max_x = *tree->maxX;
2657#if DIM > 1
2658 min_y = *tree->minY;
2659 max_y = *tree->maxY;
2660#if DIM == 3
2661 min_z = *tree->minZ;
2662 max_z = *tree->maxZ;
2663#endif
2664#endif
2665 for (int level=0; level<lowestDomainList->domainListLevels[i]; level++) {
2666 path = (integer)(lowestDomainList->domainListKeys[i] >> (MAX_LEVEL * DIM - DIM * (level + 1))& (integer) (POW_DIM - 1));
2667
2668 // Possibility 1
2669 //if (path % 2 != 0) {
2670 if (path & 1) {
2671 max_x = 0.5 * (min_x + max_x);
2672 //path -= 1;
2673 } else {
2674 min_x = 0.5 * (min_x + max_x);
2675 }
2676#if DIM > 1
2677 if ((path >> 1) & 1) {
2678 //if (path % 2 == 0 && path % 4 != 0) {
2679 max_y = 0.5 * (min_y + max_y);
2680 //path -= 2;
2681 } else {
2682 min_y = 0.5 * (min_y + max_y);
2683 }
2684#if DIM == 3
2685 if ((path >> 2) & 1) {
2686 //if (path == 4) {
2687 max_z = 0.5 * (min_z + max_z);
2688 //path -= 4;
2689 } else {
2690 min_z = 0.5 * (min_z + max_z);
2691 }
2692#endif
2693#endif
2694
2695 // Possibility 2
2696 /* // find insertion point for body
2697 if (particles->x[lowestDomainIndex] < 0.5 * (min_x + max_x)) { // x direction
2698 //childPath += 1;
2699 max_x = 0.5 * (min_x + max_x);
2700 }
2701 else {
2702 min_x = 0.5 * (min_x + max_x);
2703 }
2704#if DIM > 1
2705 if (particles->y[lowestDomainIndex] < 0.5 * (min_y + max_y)) { // y direction
2706 //childPath += 2;
2707 max_y = 0.5 * (min_y + max_y);
2708 }
2709 else {
2710 min_y = 0.5 * (min_y + max_y);
2711 }
2712#if DIM == 3
2713 if (particles->z[lowestDomainIndex] < 0.5 * (min_z + max_z)) { // z direction
2714 //childPath += 4;
2715 max_z = 0.5 * (min_z + max_z);
2716 }
2717 else {
2718 min_z = 0.5 * (min_z + max_z);
2719 }
2720#endif
2721#endif*/
2722 }
2723
2724 // x-direction
2725 if (particles->x[bodyIndex + offset] < min_x) {
2726 // outside
2727 dx = particles->x[bodyIndex + offset] - min_x;
2728 } else if (particles->x[bodyIndex + offset] > max_x) {
2729 // outside
2730 dx = particles->x[bodyIndex + offset] - max_x;
2731 } else {
2732 // in between: do nothing
2733 dx = 0;
2734 }
2735#if DIM > 1
2736 // y-direction
2737 if (particles->y[bodyIndex + offset] < min_y) {
2738 // outside
2739 dy = particles->y[bodyIndex + offset] - min_y;
2740 } else if (particles->y[bodyIndex + offset] > max_y) {
2741 // outside
2742 dy = particles->y[bodyIndex + offset] - max_y;
2743 } else {
2744 // in between: do nothing
2745 dy = 0;
2746 }
2747#if DIM == 3
2748 // z-direction
2749 if (particles->z[bodyIndex + offset] < min_z) {
2750 // outside
2751 dz = particles->z[bodyIndex + offset] - min_z;
2752 } else if (particles->z[bodyIndex + offset] > max_z) {
2753 // outside
2754 dz = particles->z[bodyIndex + offset] - max_z;
2755 } else {
2756 // in between: do nothing
2757 dz = 0;
2758 }
2759#endif
2760#endif
2761#if DIM == 1
2762 distance = cuda::math::sqrt(dx*dx);
2763#elif DIM == 2
2764 distance = cuda::math::sqrt(dx*dx + dy*dy);
2765#else
2766 distance = cuda::math::sqrt(dx*dx + dy*dy + dz*dz);
2767#endif
2768
2769 if (distance < particles->sml[bodyIndex + offset]) {
2770 searchRadius = particles->sml[bodyIndex + offset] - distance;
2771 //printf("search: distance %e level = %i\n", distance, lowestDomainList->domainListLevels[i]);
2772 }
2773 }
2774 }
2775
2776 searchRadii[bodyIndex + offset] = searchRadius;
2777 //printf("search: searchRadius: %e\n", searchRadius);
2778
2779 offset += stride;
2780 }
2781
2782 }
2783
2784 /*__global__ void insertReceivedParticles(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles,
2785 DomainList *domainList, DomainList *lowestDomainList, int n, int m) {
2786
2787 integer bodyIndex = threadIdx.x + blockIdx.x * blockDim.x;
2788 integer stride = blockDim.x * gridDim.x;
2789
2790 //note: -1 used as "null pointer"
2791 //note: -2 used to lock a child (pointer)
2792
2793 integer offset;
2794 bool newBody = true;
2795
2796 real min_x, max_x;
2797#if DIM > 1
2798 real min_y, max_y;
2799#if DIM == 3
2800 real min_z, max_z;
2801#endif
2802#endif
2803
2804 integer childPath;
2805 integer temp;
2806
2807 bool isDomainList = false;
2808
2809 offset = 0;
2810
2811 bodyIndex += tree->toDeleteLeaf[0];
2812
2813 while ((bodyIndex + offset) < tree->toDeleteLeaf[1]) { // && (bodyIndex + offset) >= tree->toDeleteLeaf[0]) {
2814
2815 if (newBody) {
2816
2817 newBody = false;
2818 isDomainList = false;
2819
2820 min_x = *tree->minX;
2821 max_x = *tree->maxX;
2822#if DIM > 1
2823 min_y = *tree->minY;
2824 max_y = *tree->maxY;
2825#if DIM == 3
2826 min_z = *tree->minZ;
2827 max_z = *tree->maxZ;
2828#endif
2829#endif
2830
2831 temp = 0;
2832 childPath = 0;
2833
2834 // find insertion point for body
2835 if (particles->x[bodyIndex + offset] < 0.5 * (min_x + max_x)) { // x direction
2836 childPath += 1;
2837 max_x = 0.5 * (min_x + max_x);
2838 }
2839 else {
2840 min_x = 0.5 * (min_x + max_x);
2841 }
2842#if DIM > 1
2843 if (particles->y[bodyIndex + offset] < 0.5 * (min_y + max_y)) { // y direction
2844 childPath += 2;
2845 max_y = 0.5 * (min_y + max_y);
2846 }
2847 else {
2848 min_y = 0.5 * (min_y + max_y);
2849 }
2850#if DIM == 3
2851 if (particles->z[bodyIndex + offset] < 0.5 * (min_z + max_z)) { // z direction
2852 childPath += 4;
2853 max_z = 0.5 * (min_z + max_z);
2854 }
2855 else {
2856 min_z = 0.5 * (min_z + max_z);
2857 }
2858#endif
2859#endif
2860 }
2861
2862 int childIndex = tree->child[temp*POW_DIM + childPath];
2863
2864 // traverse tree until hitting leaf node
2865 while (childIndex >= m) { // && childIndex < (8*m)) { //formerly n
2866
2867 isDomainList = false;
2868
2869 temp = childIndex;
2870
2871 childPath = 0;
2872
2873 // find insertion point for body
2874 if (particles->x[bodyIndex + offset] < 0.5 * (min_x + max_x)) { // x direction
2875 childPath += 1;
2876 max_x = 0.5 * (min_x + max_x);
2877 }
2878 else {
2879 min_x = 0.5 * (min_x + max_x);
2880 }
2881#if DIM > 1
2882 if (particles->y[bodyIndex + offset] < 0.5 * (min_y + max_y)) { // y direction
2883 childPath += 2;
2884 max_y = 0.5 * (min_y + max_y);
2885 }
2886 else {
2887 min_y = 0.5 * (min_y + max_y);
2888 }
2889#if DIM == 3
2890 if (particles->z[bodyIndex + offset] < 0.5 * (min_z + max_z)) { // z direction
2891 childPath += 4;
2892 max_z = 0.5 * (min_z + max_z);
2893 }
2894 else {
2895 min_z = 0.5 * (min_z + max_z);
2896 }
2897#endif
2898#endif
2899
2900// for (int i=0; i<*domainList->domainListIndex; i++) {
2901// if (temp == domainList->domainListIndices[i]) {
2902// isDomainList = true;
2903// break;
2904// }
2905// }
2906
2907 //TODO: !!!
2908// if (!isDomainList) {
2909// if (particles->mass[bodyIndex + offset] != 0) {
2910// atomicAdd(&particles->x[temp], particles->mass[bodyIndex + offset] * particles->x[bodyIndex + offset]);
2911//#if DIM > 1
2912// atomicAdd(&particles->y[temp], particles->mass[bodyIndex + offset] * particles->y[bodyIndex + offset]);
2913//#if DIM == 3
2914// atomicAdd(&particles->z[temp], particles->mass[bodyIndex + offset] * particles->z[bodyIndex + offset]);
2915//#endif
2916//#endif
2917// }
2918// atomicAdd(&particles->mass[temp], particles->mass[bodyIndex + offset]);
2919// //atomicAdd(&count[temp], 1); // do not count, since particles are just temporarily saved on this process
2920// }
2921// atomicAdd(&tree->count[temp], 1); // do not count, since particles are just temporarily saved on this process
2922// childIndex = tree->child[POW_DIM*temp + childPath];
2923 }
2924
2925 // if child is not locked
2926 if (childIndex != -2) {
2927
2928 int locked = temp * 8 + childPath;
2929
2930 //lock
2931 if (atomicCAS(&tree->child[locked], childIndex, -2) == childIndex) {
2932
2933 // check whether a body is already stored at the location
2934 if (childIndex == -1) {
2935 //insert body and release lock
2936 tree->child[locked] = bodyIndex + offset;
2937 }
2938 else {
2939 int patch = POW_DIM * m; //8*n
2940 while (childIndex >= 0 && childIndex < n) {
2941
2942 //debug
2943 //if (x[childIndex] == x[bodyIndex + offset]) {
2944 // printf("ATTENTION (shouldn't happen...): x[%i] = (%f, %f, %f) vs. x[%i] = (%f, %f, %f) | to_delete_leaf = (%i, %i)\n",
2945 // childIndex, x[childIndex], y[childIndex], z[childIndex], bodyIndex + offset, x[bodyIndex + offset],
2946 // y[bodyIndex + offset], z[bodyIndex + offset], to_delete_leaf[0], to_delete_leaf[1]);
2947 //}
2948
2949 //create a new cell (by atomically requesting the next unused array index)
2950 int cell = atomicAdd(tree->index, 1);
2951
2952 patch = min(patch, cell);
2953
2954 if (patch != cell) {
2955 tree->child[POW_DIM * temp + childPath] = cell;
2956 }
2957
2958// if (particles->x[childIndex] == particles->x[bodyIndex + offset] &&
2959// particles->y[childIndex] == particles->y[bodyIndex + offset]) {
2960// printf("[rank %i]ATTENTION!!! %i vs. %i\n", subDomainKeyTree->rank,
2961// childIndex, bodyIndex + offset);
2962// break;
2963// }
2964
2965 // insert old/original particle
2966 childPath = 0;
2967 if (particles->x[childIndex] < 0.5 * (min_x + max_x)) {
2968 childPath += 1;
2969 }
2970#if DIM > 1
2971 if (particles->y[childIndex] < 0.5 * (min_y + max_y)) {
2972 childPath += 2;
2973 }
2974#if DIM == 3
2975 if (particles->z[childIndex] < 0.5 * (min_z + max_z)) {
2976 childPath += 4;
2977 }
2978#endif
2979#endif
2980
2981// particles->x[cell] += particles->mass[childIndex] * particles->x[childIndex];
2982//#if DIM > 1
2983// particles->y[cell] += particles->mass[childIndex] * particles->y[childIndex];
2984//#if DIM == 3
2985// particles->z[cell] += particles->mass[childIndex] * particles->z[childIndex];
2986//#endif
2987//#endif
2988//
2989// particles->mass[cell] += particles->mass[childIndex];
2990// // do not count, since particles are just temporarily saved on this process
2991// tree->count[cell] += tree->count[childIndex];
2992
2993 tree->child[POW_DIM * cell + childPath] = childIndex;
2994
2995 //tree->start[cell] = -1; //TODO: resetting start needed in insertReceivedParticles()?
2996
2997 // insert new particle
2998 temp = cell;
2999 childPath = 0;
3000
3001 // find insertion point for body
3002 if (particles->x[bodyIndex + offset] < 0.5 * (min_x + max_x)) {
3003 childPath += 1;
3004 max_x = 0.5 * (min_x + max_x);
3005 } else {
3006 min_x = 0.5 * (min_x + max_x);
3007 }
3008#if DIM > 1
3009 if (particles->y[bodyIndex + offset] < 0.5 * (min_y + max_y)) {
3010 childPath += 2;
3011 max_y = 0.5 * (min_y + max_y);
3012 } else {
3013 min_y = 0.5 * (min_y + max_y);
3014 }
3015#if DIM == 3
3016 if (particles->z[bodyIndex + offset] < 0.5 * (min_z + max_z)) {
3017 childPath += 4;
3018 max_z = 0.5 * (min_z + max_z);
3019 } else {
3020 min_z = 0.5 * (min_z + max_z);
3021 }
3022#endif
3023#endif
3024
3025 // /* // COM / preparing for calculation of COM
3026 // if (particles->mass[bodyIndex + offset] != 0) {
3027 // particles->x[cell] += particles->mass[bodyIndex + offset] * particles->x[bodyIndex + offset];
3028//#if DIM > 1
3029 // particles->y[cell] += particles->mass[bodyIndex + offset] * particles->y[bodyIndex + offset];
3030//#if DIM == 3
3031 // particles->z[cell] += particles->mass[bodyIndex + offset] * particles->z[bodyIndex + offset];
3032//#endif
3033//#endif
3034 // particles->mass[cell] += particles->mass[bodyIndex + offset];
3035 //}
3036 // // do not count, since particles are just temporarily saved on this process
3037 //tree->count[cell] += tree->count[bodyIndex + offset];
3038
3039 childIndex = tree->child[POW_DIM * temp + childPath];
3040 }
3041
3042 tree->child[POW_DIM * temp + childPath] = bodyIndex + offset;
3043
3044 __threadfence(); // written to global memory arrays (child, x, y, mass) thus need to fence
3045 tree->child[locked] = patch;
3046 }
3047 offset += stride;
3048 newBody = true;
3049 }
3050 else {
3051
3052 }
3053 }
3054 else {
3055
3056 }
3057 __syncthreads();
3058 }
3059
3060 }*/
3061
3062 __global__ void info(Tree *tree, Particles *particles, Helper *helper,
3063 integer numParticlesLocal, integer numParticles, integer numNodes) {
3064
3065 integer bodyIndex = threadIdx.x + blockIdx.x * blockDim.x;
3066 integer stride = blockDim.x * gridDim.x;
3067 integer offset = 0;
3068 integer i;
3069
3070 while ((bodyIndex + offset) < numParticlesLocal) {
3071 if ((bodyIndex + offset) % 1000 == 0) {
3072 i = 0;
3073 printf("particles->noi[%i] = %i\n", bodyIndex + offset, particles->noi[bodyIndex + offset]);
3074 //while (particles->nnl[(bodyIndex + offset) * MAX_NUM_INTERACTIONS + i] != -1 && i < MAX_NUM_INTERACTIONS) {
3075 // printf("particles->nnl[%i * %i + %i] = %i\n", bodyIndex + offset, MAX_NUM_INTERACTIONS, i,
3076 // particles->nnl[(bodyIndex + offset) * MAX_NUM_INTERACTIONS + i]);
3077 // i++;
3078 //}
3079 }
3080
3081 offset += stride;
3082 }
3083 }
3084
3085 namespace Launch {
3086
3087 real fixedRadiusNN_bruteForce(Tree *tree, Particles *particles, integer *interactions, integer numParticlesLocal,
3088 integer numParticles, integer numNodes) {
3089 ExecutionPolicy executionPolicy; // 4 * numMultiProcessors, 256
3090 return cuda::launch(true, executionPolicy, ::SPH::Kernel::fixedRadiusNN_bruteForce, tree, particles, interactions,
3091 numParticlesLocal, numParticles, numNodes);
3092 }
3093
3094 real fixedRadiusNN(Tree *tree, Particles *particles, integer *interactions, real radius,
3095 integer numParticlesLocal, integer numParticles, integer numNodes) {
3096 //ExecutionPolicy executionPolicy(numParticlesLocal, ::SPH::Kernel::fixedRadiusNN, tree, particles, interactions,
3097 // numParticlesLocal, numParticles, numNodes);
3098 ExecutionPolicy executionPolicy; // 4 * numMultiProcessors, 256
3099 return cuda::launch(true, executionPolicy, ::SPH::Kernel::fixedRadiusNN, tree, particles, interactions,
3100 radius, numParticlesLocal, numParticles, numNodes);
3101 }
3102
3103 real fixedRadiusNN_withinBox(Tree *tree, Particles *particles, integer *interactions, integer numParticlesLocal,
3104 integer numParticles, integer numNodes) {
3105 //ExecutionPolicy executionPolicy(numParticlesLocal, ::SPH::Kernel::fixedRadiusNN, tree, particles, interactions,
3106 // numParticlesLocal, numParticles, numNodes);
3107 Logger(INFO) << "calling new fixed radius...";
3108 ExecutionPolicy executionPolicy;
3109 return cuda::launch(true, executionPolicy, ::SPH::Kernel::fixedRadiusNN_withinBox, tree, particles, interactions,
3110 numParticlesLocal, numParticles, numNodes);
3111 }
3112
3113 real fixedRadiusNN_sharedMemory(Tree *tree, Particles *particles, integer *interactions, integer numParticlesLocal,
3114 integer numParticles, integer numNodes) {
3115 size_t sharedMemory = 20 * 2 * sizeof(integer) * MAX_DEPTH;
3116 //int _blockSize;
3117 //int minGridSize;
3118 //int _gridSize;
3119 //cudaOccupancyMaxPotentialBlockSize(&minGridSize, &_blockSize, ::SPH::Kernel::fixedRadiusNN, sharedMemory, 0);
3120 //_gridSize = (numParticlesLocal + _blockSize - 1) / _blockSize;
3121 ExecutionPolicy executionPolicy(256, 10, sharedMemory);
3122 //printf("gridSize: %i, blockSize: %i\n", _gridSize, _blockSize);
3123 //ExecutionPolicy executionPolicy(_gridSize, _blockSize, sharedMemory);
3124 return cuda::launch(true, executionPolicy, ::SPH::Kernel::fixedRadiusNN_sharedMemory, tree, particles, interactions,
3125 numParticlesLocal, numParticles, numNodes);
3126 }
3127
3128 real fixedRadiusNN_variableSML(Material *materials, Tree *tree, Particles *particles, integer *interactions,
3129 integer numParticlesLocal, integer numParticles,
3130 integer numNodes) {
3131 //ExecutionPolicy executionPolicy(256, 256);
3132 ExecutionPolicy executionPolicy; //(1, 256); // 256, 256
3133 return cuda::launch(true, executionPolicy, ::SPH::Kernel::fixedRadiusNN_variableSML, materials, tree, particles, interactions,
3134 numParticlesLocal, numParticles, numNodes);
3135 }
3136
3137 real compTheta(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles,
3138 DomainList *lowestDomainList, Curve::Type curveType) {
3139 ExecutionPolicy executionPolicy;
3140 return cuda::launch(true, executionPolicy, ::SPH::Kernel::compTheta, subDomainKeyTree, tree,
3141 particles, lowestDomainList, curveType);
3142 }
3143
3144 real symbolicForce(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles,
3145 DomainList *lowestDomainList, integer *sendIndices, real searchRadius,
3146 integer n, integer m, integer relevantIndex,
3147 Curve::Type curveType) {
3148 ExecutionPolicy executionPolicy;
3149 return cuda::launch(true, executionPolicy, ::SPH::Kernel::symbolicForce, subDomainKeyTree, tree, particles,
3150 lowestDomainList, sendIndices, searchRadius, n, m, relevantIndex, curveType);
3151 }
3152
3153 real symbolicForce_test(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles,
3154 DomainList *lowestDomainList, integer *sendIndices, real searchRadius,
3155 integer n, integer m, integer relevantProc, integer relevantIndicesCounter,
3156 integer relevantIndexOld, Curve::Type curveType) {
3157 ExecutionPolicy executionPolicy;
3158 return cuda::launch(true, executionPolicy, ::SPH::Kernel::symbolicForce_test, subDomainKeyTree, tree, particles,
3159 lowestDomainList, sendIndices, searchRadius, n, m, relevantProc, relevantIndicesCounter,
3160 relevantIndexOld, curveType);
3161 }
3162
3163 real symbolicForce_test2(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles,
3164 DomainList *domainList, integer *sendIndices, real searchRadius,
3165 integer n, integer m, integer relevantProc, integer relevantIndicesCounter,
3166 Curve::Type curveType) {
3167 ExecutionPolicy executionPolicy;
3168 return cuda::launch(true, executionPolicy, ::SPH::Kernel::symbolicForce_test2, subDomainKeyTree, tree, particles,
3169 domainList, sendIndices, searchRadius, n, m, relevantProc, relevantIndicesCounter, curveType);
3170 }
3171
3172 real collectSendIndices(Tree *tree, Particles *particles, integer *sendIndices,
3173 integer *particles2Send, integer *particlesCount,
3174 integer n, integer length, Curve::Type curveType) {
3175 ExecutionPolicy executionPolicy;
3176 return cuda::launch(true, executionPolicy, ::SPH::Kernel::collectSendIndices, tree, particles,
3177 sendIndices, particles2Send, particlesCount, n, length, curveType);
3178 }
3179
3180 real collectSendIndices_test2(Tree *tree, Particles *particles, integer *sendIndices,
3181 integer *particles2Send, integer *particlesCount,
3182 integer numParticlesLocal, integer numParticles,
3183 integer treeIndex, int currentProc, Curve::Type curveType) {
3184 ExecutionPolicy executionPolicy;
3185 return cuda::launch(true, executionPolicy, ::SPH::Kernel::collectSendIndices_test2, tree, particles,
3186 sendIndices, particles2Send, particlesCount, numParticlesLocal, numParticles,
3187 treeIndex, currentProc, curveType);
3188 }
3189
3190 real particles2Send(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles,
3191 DomainList *domainList, DomainList *lowestDomainList, integer maxLevel,
3192 integer *toSend, integer *sendCount, integer *alreadyInserted,
3193 integer insertOffset,
3194 integer numParticlesLocal, integer numParticles, integer numNodes, real radius,
3195 Curve::Type curveType) {
3196 ExecutionPolicy executionPolicy;
3197 return cuda::launch(true, executionPolicy, ::SPH::Kernel::particles2Send, subDomainKeyTree, tree,
3198 particles, domainList, lowestDomainList, maxLevel, toSend, sendCount,
3199 alreadyInserted, insertOffset, numParticlesLocal, numParticles, numNodes,
3200 radius, curveType);
3201 }
3202
3203 real collectSendIndicesBackup(integer *toSend, integer *toSendCollected, integer count) {
3204 ExecutionPolicy executionPolicy;
3205 return cuda::launch(true, executionPolicy, ::SPH::Kernel::collectSendIndicesBackup, toSend, toSendCollected,
3206 count);
3207 }
3208
3209 real collectSendEntriesBackup(SubDomainKeyTree *subDomainKeyTree, real *entry, real *toSend, integer *sendIndices,
3210 integer *sendCount, integer totalSendCount, integer insertOffset) {
3211 ExecutionPolicy executionPolicy;
3212 return cuda::launch(true, executionPolicy, ::SPH::Kernel::collectSendEntriesBackup, subDomainKeyTree,
3213 entry, toSend, sendIndices, sendCount, totalSendCount, insertOffset);
3214 }
3215
3216 real insertReceivedParticles(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles,
3217 DomainList *domainList, DomainList *lowestDomainList, int n, int m) {
3218 ExecutionPolicy executionPolicy(24, 32); //(24, 32);//(1, 1)//(256,1);
3219 return cuda::launch(true, executionPolicy, ::SPH::Kernel::insertReceivedParticles, subDomainKeyTree,
3220 tree, particles, domainList, lowestDomainList, n, m);
3221 }
3222
3223 real info(Tree *tree, Particles *particles, Helper *helper,
3224 integer numParticlesLocal, integer numParticles, integer numNodes) {
3225 ExecutionPolicy executionPolicy;
3226 return cuda::launch(true, executionPolicy, ::SPH::Kernel::info, tree, particles, helper,
3227 numParticlesLocal, numParticles, numNodes);
3228 }
3229
3230 real calculateCentersOfMass(Tree *tree, Particles *particles, integer level) {
3231 ExecutionPolicy executionPolicy;
3232 return cuda::launch(true, executionPolicy, ::SPH::Kernel::calculateCentersOfMass, tree,
3233 particles, level);
3234 }
3235
3236 real determineSearchRadii(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles,
3237 DomainList *domainList, DomainList *lowestDomainList, real *searchRadii,
3238 int n, int m, Curve::Type curveType) {
3239 ExecutionPolicy executionPolicy;
3240 return cuda::launch(true, executionPolicy, ::SPH::Kernel::determineSearchRadii, subDomainKeyTree, tree,
3241 particles, domainList, lowestDomainList, searchRadii, n, m, curveType);
3242 }
3243 }
3244 }
3245
3246}
DomainList
Definition: subdomain.cuh:494
DomainList::relevantDomainListOriginalIndex
integer * relevantDomainListOriginalIndex
Definition: subdomain.cuh:517
DomainList::relevantDomainListIndices
integer * relevantDomainListIndices
concentrate domain list nodes, usable to reduce domain list indices in respect to some criterion
Definition: subdomain.cuh:511
DomainList::relevantDomainListProcess
integer * relevantDomainListProcess
Definition: subdomain.cuh:515
DomainList::borders
real * borders
Definition: subdomain.cuh:519
ExecutionPolicy
Execution policy/instruction for CUDA kernel execution.
Definition: cuda_launcher.cuh:33
Helper
Definition: helper.cuh:24
Logger
Logger class.
Definition: logger.h:80
Material
Material parameters.
Definition: material.cuh:88
Material::interactions
integer interactions
Definition: material.cuh:111
Particles
Particle(s) class based on SoA (Structur of Arrays).
Definition: particles.cuh:50
Particles::noi
integer * noi
(pointer to) number of interactions (array)
Definition: particles.cuh:118
Particles::weightedEntry
CUDA_CALLABLE_MEMBER real weightedEntry(integer index, Entry::Name entry)
Definition: particles.cu:339
Particles::materialId
integer * materialId
(pointer to) material identifier (array)
Definition: particles.cuh:111
Particles::x
real * x
(pointer to) x position (array)
Definition: particles.cuh:62
Particles::level
integer * level
(pointer to) level of the (pseudo-)particles
Definition: particles.cuh:106
Particles::uid
idInteger * uid
(pointer to) unique identifier (array)
Definition: particles.cuh:109
Particles::nodeType
integer * nodeType
(pointer to) node type
Definition: particles.cuh:103
Particles::y
real * y
(pointer to) y position (array)
Definition: particles.cuh:70
Particles::sml
real * sml
(pointer to) smoothing length (array)
Definition: particles.cuh:113
Particles::mass
real * mass
(pointer to) mass (array)
Definition: particles.cuh:60
Particles::z
real * z
(pointer to) z position (array)
Definition: particles.cuh:78
SubDomainKeyTree
SubDomainKeyTree class handling rank, number of processes and ranges.
Definition: subdomain.cuh:62
SubDomainKeyTree::key2proc
CUDA_CALLABLE_MEMBER integer key2proc(keyType key)
Compute particle's MPI process belonging by it's key.
Definition: subdomain.cu:68
SubDomainKeyTree::rank
integer rank
MPI rank.
Definition: subdomain.cuh:66
SubDomainKeyTree::numProcesses
integer numProcesses
MPI number of processes.
Definition: subdomain.cuh:68
Tree
Tree class.
Definition: tree.cuh:140
cudaTerminate
#define cudaTerminate(...)
Definition: cuda_utilities.cuh:70
cudaAssert
#define cudaAssert(...)
Definition: cuda_utilities.cuh:50
INFO
@ INFO
debug log type
Definition: logger.h:48
DomainListNS::Kernel::lowestDomainList
__global__ void lowestDomainList(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles, DomainList *domainList, DomainList *lowestDomainList, integer n, integer m)
Kernel to create the lowest domain list.
Definition: subdomain.cu:2039
Gravity::Kernel::collectSendIndices
__global__ void collectSendIndices(Tree *tree, Particles *particles, integer *sendIndices, integer *particles2Send, integer *pseudoParticles2Send, integer *pseudoParticlesLevel, integer *particlesCount, integer *pseudoParticlesCount, integer n, integer length, Curve::Type curveType)
Collect the send indices.
Definition: gravity.cu:8
Kernel
Definition: device_rhs.cuh:7
KeyNS::key2proc
CUDA_CALLABLE_MEMBER integer key2proc(keyType key, SubDomainKeyTree *subDomainKeyTree)
Convert the key to the corresponding process.
Definition: subdomain.cu:17
KeyNS::lebesgue2hilbert
CUDA_CALLABLE_MEMBER keyType lebesgue2hilbert(keyType lebesgue, integer maxLevel)
Convert a Lebesgue key to a Hilbert key.
Definition: tree.cu:4
MaterialNS::Kernel::info
__global__ void info(Material *material)
Debug kernel giving information about material(s).
Definition: material.cu:24
ProfilerIds::Time::Gravity::compTheta
const char *const compTheta
Definition: h5profiler.h:87
ProfilerIds::Time::Gravity::symbolicForce
const char *const symbolicForce
Definition: h5profiler.h:88
ProfilerIds::Time::Gravity::insertReceivedParticles
const char *const insertReceivedParticles
Definition: h5profiler.h:91
ProfilerIds::Time::SPH::fixedRadiusNN
const char *const fixedRadiusNN
Definition: h5profiler.h:102
ProfilerIds::Time::SPH::determineSearchRadii
const char *const determineSearchRadii
Definition: h5profiler.h:98
ProfilerIds::Time::tree
const char *const tree
Definition: h5profiler.h:57
ProfilerIds::numParticlesLocal
const char *const numParticlesLocal
Definition: h5profiler.h:30
ProfilerIds::numParticles
const char *const numParticles
Definition: h5profiler.h:29
SPH::Kernel::Launch::symbolicForce_test2
real symbolicForce_test2(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles, DomainList *domainList, integer *sendIndices, real searchRadius, integer n, integer m, integer relevantProc, integer relevantIndicesCounter, Curve::Type curveType)
Definition: sph.cu:3163
SPH::Kernel::Launch::symbolicForce_test
real symbolicForce_test(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles, DomainList *lowestDomainList, integer *sendIndices, real searchRadius, integer n, integer m, integer relevantProc, integer relevantIndicesCounter, integer relevantIndexOld, Curve::Type curveType)
Definition: sph.cu:3153
SPH::Kernel::Launch::fixedRadiusNN_sharedMemory
real fixedRadiusNN_sharedMemory(Tree *tree, Particles *particles, integer *interactions, integer numParticlesLocal, integer numParticles, integer numNodes)
Wrapper for SPH::Kernel::fixedRadiusNN_sharedMemory().
Definition: sph.cu:3113
SPH::Kernel::Launch::compTheta
real compTheta(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles, DomainList *lowestDomainList, Curve::Type curveType)
Wrapper for SPH::Kernel::compTheta().
Definition: sph.cu:3137
SPH::Kernel::Launch::collectSendEntriesBackup
real collectSendEntriesBackup(SubDomainKeyTree *subDomainKeyTree, real *entry, real *toSend, integer *sendIndices, integer *sendCount, integer totalSendCount, integer insertOffset)
Wrapper for SPH::Kernel::collectSendEntriesBackup().
Definition: sph.cu:3209
SPH::Kernel::Launch::insertReceivedParticles
real insertReceivedParticles(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles, DomainList *domainList, DomainList *lowestDomainList, int n, int m)
Wrapper for SPH::Kernel::insertReceivedParticles().
Definition: sph.cu:3216
SPH::Kernel::Launch::collectSendIndices_test2
real collectSendIndices_test2(Tree *tree, Particles *particles, integer *sendIndices, integer *particles2Send, integer *particlesCount, integer numParticlesLocal, integer numParticles, integer treeIndex, int currentProc, Curve::Type curveType)
Definition: sph.cu:3180
SPH::Kernel::Launch::collectSendIndices
real collectSendIndices(Tree *tree, Particles *particles, integer *sendIndices, integer *particles2Send, integer *particlesCount, integer n, integer length, Curve::Type curveType)
Wrapper for SPH::Kernel::collectSendIndices().
Definition: sph.cu:3172
SPH::Kernel::Launch::fixedRadiusNN_bruteForce
real fixedRadiusNN_bruteForce(Tree *tree, Particles *particles, integer *interactions, integer numParticlesLocal, integer numParticles, integer numNodes)
Wrapper for SPH::Kernel::fixedRadiusNN_bruteForce().
Definition: sph.cu:3087
SPH::Kernel::Launch::collectSendIndicesBackup
real collectSendIndicesBackup(integer *toSend, integer *toSendCollected, integer count)
Wrapper for SPH::Kernel::collectSendIndicesBackup().
Definition: sph.cu:3203
SPH::Kernel::Launch::fixedRadiusNN
real fixedRadiusNN(Tree *tree, Particles *particles, integer *interactions, real radius, integer numParticlesLocal, integer numParticles, integer numNodes)
Wrapper for SPH::Kernel::fixedRadiusNN().
Definition: sph.cu:3094
SPH::Kernel::Launch::fixedRadiusNN_withinBox
real fixedRadiusNN_withinBox(Tree *tree, Particles *particles, integer *interactions, integer numParticlesLocal, integer numParticles, integer numNodes)
Wrapper for SPH::Kernel::fixedRadiusNN_withinBox().
Definition: sph.cu:3103
SPH::Kernel::Launch::particles2Send
real particles2Send(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles, DomainList *domainList, DomainList *lowestDomainList, integer maxLevel, integer *toSend, integer *sendCount, integer *alreadyInserted, integer insertOffset, integer numParticlesLocal, integer numParticles, integer numNodes, real radius, Curve::Type curveType=Curve::lebesgue)
Wrapper for SPH::Kernel::particles2Send().
Definition: sph.cu:3190
SPH::Kernel::Launch::symbolicForce
real symbolicForce(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles, DomainList *lowestDomainList, integer *sendIndices, real searchRadius, integer n, integer m, integer relevantIndex, Curve::Type curveType)
Wrapper for SPH::Kernel::symbolicForce().
Definition: sph.cu:3144
SPH::Kernel::Launch::info
real info(Tree *tree, Particles *particles, Helper *helper, integer numParticlesLocal, integer numParticles, integer numNodes)
Wrapper for SPH::Kernel::info().
Definition: sph.cu:3223
SPH::Kernel::Launch::determineSearchRadii
real determineSearchRadii(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles, DomainList *domainList, DomainList *lowestDomainList, real *searchRadii, int n, int m, Curve::Type curveType)
Wrapper for SPH::Kernel::determineSearchRadii().
Definition: sph.cu:3236
SPH::Kernel::Launch::fixedRadiusNN_variableSML
real fixedRadiusNN_variableSML(Material *materials, Tree *tree, Particles *particles, integer *interactions, integer numParticlesLocal, integer numParticles, integer numNodes)
Wrapper for SPH::Kernel::fixedRadiusNN_variableSML().
Definition: sph.cu:3128
SPH::Kernel::Launch::calculateCentersOfMass
real calculateCentersOfMass(Tree *tree, Particles *particles, integer level)
Wrapper for SPH::Kernel::calculateCentersOfMass().
Definition: sph.cu:3230
SPH::Kernel::redoNeighborSearch
__device__ void redoNeighborSearch(Tree *tree, Particles *particles, int particleId, int *interactions, real radius, integer numParticles, integer numNodes)
Redo the neighbor search (FRNN).
Definition: sph.cu:963
SPH::Kernel::compTheta
__global__ void compTheta(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles, DomainList *lowestDomainList, Curve::Type curveType)
Find the relevant (lowest) domain list nodes as preparation for finding particles to be exchanged bet...
Definition: sph.cu:1080
SPH::Kernel::calculateCentersOfMass
__global__ void calculateCentersOfMass(Tree *tree, Particles *particles, integer level)
Definition: sph.cu:2551
SPH::Kernel::symbolicForce
__global__ void symbolicForce(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles, DomainList *lowestDomainList, integer *sendIndices, real searchRadius, integer n, integer m, integer relevantIndex, Curve::Type curveType)
Find the particles that need to be exchanged between processes to grant correctness of SPH forces.
Definition: sph.cu:1128
SPH::Kernel::symbolicForce_test2
__global__ void symbolicForce_test2(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles, DomainList *domainList, integer *sendIndices, real searchRadius, integer n, integer m, integer relevantProc, integer relevantIndicesCounter, Curve::Type curveType)
Definition: sph.cu:1401
SPH::Kernel::collectSendIndices_test2
__global__ void collectSendIndices_test2(Tree *tree, Particles *particles, integer *sendIndices, integer *particles2Send, integer *particlesCount, integer numParticlesLocal, integer numParticles, integer treeIndex, int currentProc, Curve::Type curveType)
Definition: sph.cu:1550
SPH::Kernel::insertReceivedParticles
__global__ void insertReceivedParticles(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles, DomainList *domainList, DomainList *lowestDomainList, int n, int m)
Insert the received particles into the local tree.
Definition: sph.cu:1875
SPH::Kernel::collectSendIndicesBackup
__global__ void collectSendIndicesBackup(integer *toSend, integer *toSendCollected, integer count)
Definition: sph.cu:1776
SPH::Kernel::fixedRadiusNN_variableSML
__global__ void fixedRadiusNN_variableSML(Material *materials, Tree *tree, Particles *particles, integer *interactions, integer numParticlesLocal, integer numParticles, integer numNodes)
Fixed-radius near neighbor search for iteratively finding appropriate smoothing length.
Definition: sph.cu:739
SPH::Kernel::particles2Send
__global__ void particles2Send(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles, DomainList *domainList, DomainList *lowestDomainList, integer maxLevel, integer *toSend, integer *sendCount, integer *alreadyInserted, integer insertOffset, integer numParticlesLocal, integer numParticles, integer numNodes, real radius, Curve::Type curveType=Curve::lebesgue)
Definition: sph.cu:1575
SPH::Kernel::info
__global__ void info(Tree *tree, Particles *particles, Helper *helper, integer numParticlesLocal, integer numParticles, integer numNodes)
Info/Debug kernel.
Definition: sph.cu:3062
SPH::Kernel::collectSendIndices
__global__ void collectSendIndices(Tree *tree, Particles *particles, integer *sendIndices, integer *particles2Send, integer *particlesCount, integer n, integer length, Curve::Type curveType)
Collect the found particles into contiguous memory in order to facilitate sending via MPI.
Definition: sph.cu:1525
SPH::Kernel::fixedRadiusNN
__global__ void fixedRadiusNN(Tree *tree, Particles *particles, integer *interactions, real radius, integer numParticlesLocal, integer numParticles, integer numNodes)
Fixed-radius near neighbor search (default method via explicit stack).
Definition: sph.cu:93
SPH::Kernel::fixedRadiusNN_withinBox
__global__ void fixedRadiusNN_withinBox(Tree *tree, Particles *particles, integer *interactions, integer numParticlesLocal, integer numParticles, integer numNodes)
Fixed-radius near neighbor search (nested stack method).
Definition: sph.cu:283
SPH::Kernel::determineSearchRadii
__global__ void determineSearchRadii(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles, DomainList *domainList, DomainList *lowestDomainList, real *searchRadii, int n, int m, Curve::Type curveType)
Determine the search radius needed for SPH::Kernel::symbolicForce().
Definition: sph.cu:2619
SPH::Kernel::symbolicForce_test
__global__ void symbolicForce_test(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles, DomainList *lowestDomainList, integer *sendIndices, real searchRadius, integer n, integer m, integer relevantProc, integer relevantIndicesCounter, integer relevantIndexOld, Curve::Type curveType)
Definition: sph.cu:1271
SPH::Kernel::collectSendEntriesBackup
__global__ void collectSendEntriesBackup(SubDomainKeyTree *subDomainKeyTree, real *entry, real *toSend, integer *sendIndices, integer *sendCount, integer totalSendCount, integer insertOffset)
Definition: sph.cu:1789
SPH::Kernel::fixedRadiusNN_sharedMemory
__global__ void fixedRadiusNN_sharedMemory(Tree *tree, Particles *particles, integer *interactions, integer numParticlesLocal, integer numParticles, integer numNodes)
Fixed-radius near neighbor search (brute-force method).
Definition: sph.cu:592
SPH::Kernel::fixedRadiusNN_bruteForce
__global__ void fixedRadiusNN_bruteForce(Tree *tree, Particles *particles, integer *interactions, integer numParticlesLocal, integer numParticles, integer numNodes)
Fixed-radius near neighbor search (brute-force method).
Definition: sph.cu:36
SPH
SPH related functions and kernels.
Definition: density.cuh:23
SPH::exchangeParticleEntry
void exchangeParticleEntry(SubDomainKeyTree *subDomainKeyTree, real *entry, real *toSend, integer *sendLengths, integer *receiveLengths, integer numParticlesLocal)
Definition: sph.cu:12
cuda::math::min
__device__ real min(real a, real b)
Minimum value out of two floating point values.
Definition: cuda_utilities.cu:414
cuda::math::sqrt
__device__ real sqrt(real a)
Square root of a floating point value.
Definition: cuda_utilities.cu:456
cuda::math::abs
__device__ real abs(real a)
Absolute value of a floating point value.
Definition: cuda_utilities.cu:448
cuda::math::max
__device__ real max(real a, real b)
Maximum value out of two floating point values.
Definition: cuda_utilities.cu:431
cuda::launch
real launch(bool timeKernel, const ExecutionPolicy &policy, void(*f)(Arguments...), Arguments... args)
CUDA execution wrapper function.
Definition: cuda_launcher.cuh:114
real
double real
Definition: parameter.h:15
MAX_NUM_INTERACTIONS
#define MAX_NUM_INTERACTIONS
Definition: parameter.h:106
keyType
unsigned long keyType
Definition: parameter.h:18
integer
int integer
Definition: parameter.h:17
MAX_LEVEL
#define MAX_LEVEL
Definition: parameter.h:25
MAX_DEPTH
#define MAX_DEPTH
Definition: parameter.h:105
POW_DIM
#define POW_DIM
Definition: parameter.h:40
DIM
#define DIM
Dimension of the problem.
Definition: parameter.h:38
MAX_VARIABLE_SML_ITERATIONS
#define MAX_VARIABLE_SML_ITERATIONS
Definition: sph.cu:5
TOLERANCE_WANTED_NUMBER_OF_INTERACTIONS
#define TOLERANCE_WANTED_NUMBER_OF_INTERACTIONS
Definition: sph.cu:7
Curve::Type
Type
Definition: parameter.h:206
Entry::y
@ y
Definition: parameter.h:258
Entry::x
@ x
Definition: parameter.h:256
Entry::z
@ z
Definition: parameter.h:260

milupHPC - src/sph/sph.cu Source File
Generated on Wed Aug 31 2022 12:16:53 by Doxygen 1.9.3