1#include "../../include/subdomain_key_tree/subdomain.cuh" 
    2#include "../../include/cuda_utils/cuda_launcher.cuh" 
    7    for (
int i=0; i<maxLevel; i++) {
 
    8        level[i] = (int)(key >> (maxLevel*
DIM - 
DIM*(i+1)) & (int)(
POW_DIM - 1));
 
   10    for (
int i=0; i<=maxLevel; i++) {
 
   11        keyAsChar[2*i] = level[i] + 
'0';
 
   12        keyAsChar[2*i+1] = 
'|';
 
   14    keyAsChar[2*maxLevel+3] = 
'\0';
 
   18    return subDomainKeyTree->
key2proc(key); 
 
   26                                                        integer *procParticleCounter) : rank(rank),
 
   27                                                        numProcesses(numProcesses), range(range),
 
   28                                                        procParticleCounter(procParticleCounter) {
 
   71        if (key >= 
range[proc] && key < 
range[proc + 1]) {
 
   75    printf(
"ERROR: key2proc(k=%lu): -1!\n", key);
 
   88            p2 = 
key2proc(key | ~(~0UL << 
DIM * (maxLevel - level)));
 
  101            keyType keyMax = (shiftValue << toShift);
 
  105            keyType keyMax = (shiftValue << toShift);
 
  113            p2 = 
key2proc(hilbert | (keyMax >> (
DIM*level+1)));
 
  124            printf(
"Curve type not available!\n");
 
  138                            integer *procParticleCounter) {
 
  139            subDomainKeyTree->
set(rank, numProcesses, range, procParticleCounter);
 
  143            printf(
"device: subDomainKeyTree->rank = %i\n", subDomainKeyTree->
rank);
 
  144            printf(
"device: subDomainKeyTree->numProcesses = %i\n", subDomainKeyTree->
numProcesses);
 
  176                    childIndex = 
tree->child[
POW_DIM*childIndex+path[j]];
 
  177                    if (childIndex < n) {
 
  178                        if (childIndex == -1) {
 
  182                            particles->
level[cell] = j + 1; 
 
  185                            particles->
nodeType[childIndex] = 1;
 
  188                            printf(
"adding node index %i  cell = %i (childPath = %i,  j = %i)! x = (%e, %e, %e) level = %i\n",
 
  189                                   temp, cell, path[j], j, particles->
x[childIndex], particles->
y[childIndex], particles->
z[childIndex],
 
  190                                   particles->
level[cell]);
 
  210                            for (
int k=0; k<=j; k++) {
 
  212                                currentChild = path[k];
 
  214                                if (currentChild % 2 != 0) {
 
  215                                    max_x = 0.5 * (min_x + max_x);
 
  219                                    min_x = 0.5 * (min_x + max_x);
 
  222                                if (currentChild % 2 == 0 && currentChild % 4 != 0) {
 
  223                                    max_y = 0.5 * (min_y + max_y);
 
  227                                    min_y = 0.5 * (min_y + max_y);
 
  230                                if (currentChild == 4) {
 
  231                                    max_z = 0.5 * (min_z + max_z);
 
  235                                    min_z = 0.5 * (min_z + max_z);
 
  242                            if (particles->
x[childIndex] < 0.5 * (min_x + max_x)) {
 
  248                            if (particles->
y[childIndex] < 0.5 * (min_y + max_y)) {
 
  254                            if (particles->
z[childIndex] < 0.5 * (min_z + max_z)) {
 
  263                            particles->
x[cell] = particles->
x[childIndex];
 
  266                            particles->
y[cell] = particles->
y[childIndex];
 
  269                            particles->
z[cell] = particles->
z[childIndex];
 
  273                            particles->
mass[cell] = particles->
mass[childIndex];
 
  275                            particles->
level[cell] = particles->
level[childIndex];
 
  276                            particles->
level[childIndex] += 1;
 
  279                            printf(
"adding node in between for index %i, temp = %i  cell = %i (childPath = %i,  j = %i)! x = (%e, %e, %e) level = %i vs %i\n",
 
  280                                   childIndex, temp, cell, childPath, j, particles->
x[childIndex], particles->
y[childIndex], particles->
z[childIndex],
 
  281                                   particles->
level[cell], particles->
level[childIndex]);
 
  285                            tree->child[
POW_DIM * cell + childPath] = childIndex;
 
  289                            particles->
nodeType[childIndex] = 1;
 
  296                        if (particles->
nodeType[childIndex] >= 1) {
 
  308                            particles->
nodeType[childIndex] = 1;
 
  612            integer index = threadIdx.x + blockIdx.x * blockDim.x;
 
  613            integer stride = blockDim.x * gridDim.x;
 
  664                        childIndex = 
tree->child[
POW_DIM * childIndex + path[j]];
 
  668                    for (k = 0; k < j; k++) {
 
  670                        currentChild = path[k];
 
  671                        if (currentChild & 1) {
 
  672                            max_x = 0.5 * (min_x + max_x);
 
  674                            min_x = 0.5 * (min_x + max_x);
 
  677                        if ((currentChild >> 1) & 1) {
 
  678                            max_y = 0.5 * (min_y + max_y);
 
  681                            min_y = 0.5 * (min_y + max_y);
 
  684                        if ((currentChild >> 2) & 1) {
 
  685                            max_z = 0.5 * (min_z + max_z);
 
  687                            min_z = 0.5 * (min_z + max_z);
 
  693                    if (childIndex < n) {
 
  694                        if (childIndex == -1) {
 
  699                            particles->
level[cell] = j;
 
  703                            particles->
nodeType[childIndex] = 1;
 
  705                            domainList->
borders[(index + offset) * 2 * 
DIM] = min_x;
 
  706                            domainList->
borders[(index + offset) * 2 * 
DIM + 1] = max_x;
 
  708                            domainList->
borders[(index + offset) * 2 * 
DIM + 2] = min_y;
 
  709                            domainList->
borders[(index + offset) * 2 * 
DIM + 3] = max_y;
 
  711                            domainList->
borders[(index + offset) * 2 * 
DIM + 4] = min_z;
 
  712                            domainList->
borders[(index + offset) * 2 * 
DIM + 5] = max_z;
 
  717                            particles->
x[cell] = 0.5 * (min_x + max_x);
 
  720                            particles->
y[cell] = 0.5 * (min_y + max_y);
 
  723                            particles->
z[cell] = 0.5 * (min_z + max_z);
 
  729                            printf(
"adding node index %i  cell = %i (childPath = %i,  j = %i)! x = (%e, %e, %e) level = %i\n",
 
  730                                   temp, cell, path[j], j, particles->
x[childIndex], particles->
y[childIndex], particles->
z[childIndex],
 
  731                                   particles->
level[cell]);
 
  736                            printf(
"[rank %i] adding domainListIndices[%i] = %i, childIndex = %i, path = %i\n", subDomainKeyTree->
rank,
 
  737                                   index + offset, domainList->
domainListIndices[index + offset], childIndex, path[j-1]);
 
  750                            particles->
x[cell] = particles->
x[childIndex];
 
  753                            particles->
y[cell] = particles->
y[childIndex];
 
  756                            particles->
z[cell] = particles->
z[childIndex];
 
  761                            domainList->
borders[(index + offset) * 2 * 
DIM] = min_x;
 
  762                            domainList->
borders[(index + offset) * 2 * 
DIM + 1] = max_x;
 
  764                            domainList->
borders[(index + offset) * 2 * 
DIM + 2] = min_y;
 
  765                            domainList->
borders[(index + offset) * 2 * 
DIM + 3] = max_y;
 
  767                            domainList->
borders[(index + offset) * 2 * 
DIM + 4] = min_z;
 
  768                            domainList->
borders[(index + offset) * 2 * 
DIM + 5] = max_z;
 
  772                            particles->
level[cell] = particles->
level[childIndex];
 
  774                            atomicAdd(&particles->
level[childIndex], 1);
 
  778                            if (particles->
x[childIndex] < 0.5 * (min_x + max_x)) {
 
  784                            if (particles->
y[childIndex] < 0.5 * (min_y + max_y)) {
 
  790                            if (particles->
z[childIndex] < 0.5 * (min_z + max_z)) {
 
  802                            printf(
"adding node in between for index %i, temp = %i  cell = %i (childPath = %i,  j = %i)! x = (%e, %e, %e) level = %i vs %i\n",
 
  803                                   childIndex, temp, cell, childPath, j, particles->
x[childIndex], particles->
y[childIndex], particles->
z[childIndex],
 
  804                                   particles->
level[cell], particles->
level[childIndex]);
 
  809                            tree->child[
POW_DIM * cell + childPath] = childIndex;
 
  817                            particles->
nodeType[childIndex] = 1;
 
  823                        printf(
"[rank %i] Mark already available node %i: %i, path = %i (level = %i)\n",
 
  824                               subDomainKeyTree->
rank, index + offset, childIndex, path[j-1], level);
 
  827                        domainList->
borders[(index + offset) * 2 * 
DIM] = min_x;
 
  828                        domainList->
borders[(index + offset) * 2 * 
DIM + 1] = max_x;
 
  830                        domainList->
borders[(index + offset) * 2 * 
DIM + 2] = min_y;
 
  831                        domainList->
borders[(index + offset) * 2 * 
DIM + 3] = max_y;
 
  833                        domainList->
borders[(index + offset) * 2 * 
DIM + 4] = min_z;
 
  834                        domainList->
borders[(index + offset) * 2 * 
DIM + 5] = max_z;
 
  838                        particles->
nodeType[childIndex] = 1;
 
  853            integer bodyIndex = threadIdx.x + blockIdx.x * blockDim.x;
 
  854            integer stride = blockDim.x * gridDim.x;
 
  860            char keyAsChar[21 * 2 + 3];
 
  863            while (bodyIndex + offset < n) {
 
  866                particleKey = 
tree->getParticleKey(particles, bodyIndex + offset, maxLevel, curveType);
 
  886                keys[bodyIndex + offset] = particleKey; 
 
  900            integer bodyIndex = threadIdx.x + blockIdx.x * blockDim.x;
 
  901            integer stride = blockDim.x * gridDim.x;
 
  907            while ((bodyIndex + offset) < n) {
 
  910                key = 
tree->getParticleKey(particles, bodyIndex + offset, 
MAX_LEVEL, curveType);
 
  912                proc = subDomainKeyTree->
key2proc(key);
 
  924            integer bodyIndex = threadIdx.x + blockIdx.x * blockDim.x;
 
  925            integer stride = blockDim.x * gridDim.x;
 
  932            while ((bodyIndex + offset) < n) {
 
  935                key = 
tree->getParticleKey(particles, bodyIndex + offset, 
MAX_LEVEL, curveType);
 
  937                proc = subDomainKeyTree->
key2proc(key);
 
  949            integer bodyIndex = threadIdx.x + blockIdx.x*blockDim.x;
 
  950            integer stride = blockDim.x*gridDim.x;
 
  965                if (particles->
nodeType[domainIndex] == 2) {
 
  970                    particles->
x[domainIndex] = (
real)0;
 
  972                    particles->
y[domainIndex] = (
real)0;
 
  974                    particles->
z[domainIndex] = (
real)0;
 
  977                    particles->
mass[domainIndex] = (
real)0;
 
  981                    particles->
x[domainIndex] *= particles->
mass[domainIndex];
 
  983                    particles->
y[domainIndex] *= particles->
mass[domainIndex];
 
  985                    particles->
z[domainIndex] *= particles->
mass[domainIndex];
 
  994        template <
typename T>
 
  998            integer bodyIndex = threadIdx.x + blockIdx.x*blockDim.x;
 
  999            integer stride = blockDim.x*gridDim.x;
 
 1006                lowestDomainIndex = 
lowestDomainList->domainListIndices[bodyIndex + offset];
 
 1007                if (lowestDomainIndex >= 0) {
 
 1010                            buffer[bodyIndex + offset] = particles->
x[lowestDomainIndex];
 
 1014                            buffer[bodyIndex + offset] = particles->
y[lowestDomainIndex];
 
 1018                            buffer[bodyIndex + offset] = particles->
z[lowestDomainIndex];
 
 1023                            buffer[bodyIndex + offset] = particles->
mass[lowestDomainIndex];
 
 1026                            printf(
"prepareLowestDomainExchange(): Not available!\n");
 
 1033        template <
typename T>
 
 1037            integer bodyIndex = threadIdx.x + blockIdx.x * blockDim.x;
 
 1038            integer stride = blockDim.x * gridDim.x;
 
 1052                if (originalIndex == -1) {
 
 1053                    cudaTerminate(
"ATTENTION: originalIndex = -1 (index = %i)!\n",
 
 1060                                buffer[bodyIndex + offset];
 
 1065                                buffer[bodyIndex + offset];
 
 1070                                buffer[bodyIndex + offset];
 
 1076                                buffer[bodyIndex + offset];
 
 1079                        printf(
"Entry not available!\n");
 
 1089            integer bodyIndex = threadIdx.x + blockIdx.x*blockDim.x;
 
 1090            integer stride = blockDim.x*gridDim.x;
 
 1098                lowestDomainIndex = 
lowestDomainList->domainListIndices[bodyIndex + offset];
 
 1111                if (particles->
mass[lowestDomainIndex] > (
real)0) {
 
 1118                    particles->
x[lowestDomainIndex] /= particles->
mass[lowestDomainIndex];
 
 1120                    particles->
y[lowestDomainIndex] /= particles->
mass[lowestDomainIndex];
 
 1122                    particles->
z[lowestDomainIndex] /= particles->
mass[lowestDomainIndex];
 
 1141            integer bodyIndex = threadIdx.x + blockIdx.x*blockDim.x;
 
 1142            integer stride = blockDim.x*gridDim.x;
 
 1148            while (bodyIndex + offset < *tree->index) {
 
 1149                isDomainList = 
false;
 
 1157                if (particles->
nodeType[bodyIndex + offset] >= 1) {
 
 1158                    isDomainList = 
true;
 
 1161                if (!isDomainList) {
 
 1162                    particles->
x[bodyIndex + offset] /= particles->
mass[bodyIndex + offset];
 
 1164                    particles->
y[bodyIndex + offset] /= particles->
mass[bodyIndex + offset];
 
 1166                    particles->
z[bodyIndex + offset] /= particles->
mass[bodyIndex + offset];
 
 1178            integer bodyIndex = threadIdx.x + blockIdx.x*blockDim.x;
 
 1179            integer stride = blockDim.x*gridDim.x;
 
 1186            real totalMass, x_mass, y_mass, z_mass;
 
 1187            int childToLookAt = 5;
 
 1188            int lowestDomainIndex, childLowestDomain;
 
 1195                if (particles->
nodeType[domainIndex] == 2) {
 
 1250                if (compute && domainList->
domainListLevels[bodyIndex + offset] == level) {
 
 1262                    for (
int i=0; i<
POW_DIM; i++) {
 
 1263                        particles->
x[domainIndex] += particles->
x[
tree->child[
POW_DIM*domainIndex + i]] *
 
 1266                        particles->
y[domainIndex] += particles->
y[
tree->child[
POW_DIM*domainIndex + i]] *
 
 1269                        particles->
z[domainIndex] += particles->
z[
tree->child[
POW_DIM*domainIndex + i]] *
 
 1276                    if (particles->
mass[domainIndex] > 0.) {
 
 1277                        particles->
x[domainIndex] /= particles->
mass[domainIndex];
 
 1279                        particles->
y[domainIndex] /= particles->
mass[domainIndex];
 
 1281                        particles->
z[domainIndex] /= particles->
mass[domainIndex];
 
 1295            integer bodyIndex = threadIdx.x + blockIdx.x*blockDim.x;
 
 1296            integer stride = blockDim.x*gridDim.x;
 
 1304            while (level >= 0) {
 
 1310                    if (particles->
nodeType[domainIndex] == 2) {
 
 1318                    if (compute && domainList->
domainListLevels[bodyIndex + offset] == level) {
 
 1320                        for (
int i=0; i<
POW_DIM; i++) {
 
 1321                            particles->
x[domainIndex] += particles->
x[
tree->child[
POW_DIM*domainIndex + i]] *
 
 1324                            particles->
y[domainIndex] += particles->
y[
tree->child[
POW_DIM*domainIndex + i]] *
 
 1327                            particles->
z[domainIndex] += particles->
z[
tree->child[
POW_DIM*domainIndex + i]] *
 
 1334                        if (particles->
mass[domainIndex] != 0.f) {
 
 1335                            particles->
x[domainIndex] /= particles->
mass[domainIndex];
 
 1337                            particles->
y[domainIndex] /= particles->
mass[domainIndex];
 
 1339                            particles->
z[domainIndex] /= particles->
mass[domainIndex];
 
 1359            integer bodyIndex = threadIdx.x + blockIdx.x * blockDim.x;
 
 1360            integer stride = blockDim.x * gridDim.x;
 
 1367            if (bodyIndex + offset == 0) {
 
 1368                *
tree->index = 
tree->toDeleteNode[0];
 
 1373                for (
int i=0; i<
POW_DIM; i++) {
 
 1375                         || (
tree->child[
POW_DIM * domainIndex + i] >= 
tree->toDeleteLeaf[0] &&
 
 1400            offset = 
tree->toDeleteLeaf[0];
 
 1402            while ((bodyIndex + offset) >= 
tree->toDeleteLeaf[0] && (bodyIndex + offset) < 
tree->toDeleteLeaf[1]) {
 
 1403                for (
int i=0; i<
POW_DIM; i++) {
 
 1404                    tree->child[(bodyIndex + offset)*
POW_DIM + i] = -1;
 
 1406                tree->count[bodyIndex + offset] = 1;
 
 1408                particles->
x[bodyIndex + offset] = 0.;
 
 1409                particles->
vx[bodyIndex + offset] = 0.;
 
 1410                particles->
ax[bodyIndex + offset] = 0.;
 
 1411                particles->
g_ax[bodyIndex + offset] = 0.;
 
 1413                particles->
y[bodyIndex + offset] = 0.;
 
 1414                particles->
vy[bodyIndex + offset] = 0.;
 
 1415                particles->
ay[bodyIndex + offset] = 0.;
 
 1416                particles->
g_ay[bodyIndex + offset] = 0.;
 
 1418                particles->
z[bodyIndex + offset] = 0.;
 
 1419                particles->
vz[bodyIndex + offset] = 0.;
 
 1420                particles->
az[bodyIndex + offset] = 0.;
 
 1421                particles->
g_az[bodyIndex + offset] = 0.;
 
 1424                particles->
mass[bodyIndex + offset] = 0.;
 
 1425                tree->start[bodyIndex + offset] = -1;
 
 1426                tree->sorted[bodyIndex + offset] = 0;
 
 1431            offset = 
tree->toDeleteNode[0];
 
 1433            while ((bodyIndex + offset) >= 
tree->toDeleteNode[0] && (bodyIndex + offset) < 
tree->toDeleteNode[1]) {
 
 1434                for (
int i=0; i<
POW_DIM; i++) {
 
 1435                    tree->child[(bodyIndex + offset)*
POW_DIM + i] = -1;
 
 1437                tree->count[bodyIndex + offset] = 0;
 
 1438                particles->
x[bodyIndex + offset] = 0.;
 
 1442                particles->
y[bodyIndex + offset] = 0.;
 
 1446                particles->
z[bodyIndex + offset] = 0.;
 
 1451                particles->
mass[bodyIndex + offset] = 0.;
 
 1461            integer bodyIndex = threadIdx.x + blockIdx.x * blockDim.x;
 
 1462            integer stride = blockDim.x * gridDim.x;
 
 1467            while ((bodyIndex + offset) < bins) {
 
 1469                helper->
keyTypeBuffer[bodyIndex + offset] = (bodyIndex + offset) * (max_key/bins);
 
 1472                if ((bodyIndex + offset) == (bins - 1)) {
 
 1483            integer bodyIndex = threadIdx.x + blockIdx.x * blockDim.x;
 
 1484            integer stride = blockDim.x * gridDim.x;
 
 1489            while ((bodyIndex + offset) < n) {
 
 1491                key = 
tree->getParticleKey(particles, bodyIndex + offset, 
MAX_LEVEL, curveType);
 
 1493                for (
int i = 0; i < (bins); i++) {
 
 1494                    if (key >= helper->
keyTypeBuffer[i] && key < helper->keyTypeBuffer[i + 1]) {
 
 1511            integer bodyIndex = threadIdx.x + blockIdx.x * blockDim.x;
 
 1512            integer stride = blockDim.x * gridDim.x;
 
 1518            while ((bodyIndex + offset) < (bins-1)) {
 
 1521                for (
integer i=0; i<(bodyIndex+offset); i++) {
 
 1526                    if ((sum + helper->
integerBuffer[bodyIndex + offset]) >= (i*n) && sum < (i*n)) {
 
 1539                         integer *procParticleCounter) {
 
 1542                         numProcesses, range, procParticleCounter);
 
 1560                                subDomainKeyTree, 
tree, particles,
 
 1561                                domainList, n, m, level);
 
 1569                                tree, particles, keys, maxLevel, n, curveType);
 
 1576                                subDomainKeyTree, 
tree, particles, n, m, curveType);
 
 1584                                subDomainKeyTree, 
tree, particles, n, m, 
sortArray, curveType);
 
 1593        template <
typename T>
 
 1604        template <
typename T>
 
 1659                                subDomainKeyTree, helper, bins, n, curveType);
 
 1666                                bins, n, curveType);
 
 1681                                            integer *relevantDomainListIndices, 
integer *relevantDomainListLevels,
 
 1682                                            integer *relevantDomainListProcess) :
 
 1683                                            domainListIndices(domainListIndices), domainListLevels(domainListLevels),
 
 1684                                            domainListIndex(domainListIndex), domainListCounter(domainListCounter),
 
 1685                                            domainListKeys(domainListKeys), sortedDomainListKeys(sortedDomainListKeys),
 
 1686                                            relevantDomainListIndices(relevantDomainListIndices),
 
 1687                                            relevantDomainListLevels(relevantDomainListLevels),
 
 1688                                            relevantDomainListProcess(relevantDomainListProcess) {
 
 1698                                          keyType *sortedDomainListKeys, 
integer *relevantDomainListIndices,
 
 1699                                          integer *relevantDomainListLevels, 
integer *relevantDomainListProcess) {
 
 1725                            keyType *sortedDomainListKeys, 
integer *relevantDomainListIndices,
 
 1726                            integer *relevantDomainListLevels, 
integer *relevantDomainListProcess) {
 
 1728            domainList->
set(domainListIndices, domainListLevels, domainListIndex, domainListCounter, domainListKeys,
 
 1729                            sortedDomainListKeys, relevantDomainListIndices, relevantDomainListLevels,
 
 1730                            relevantDomainListProcess);
 
 1734            domainList->
setBorders(borders, relevantDomainListOriginalIndex);
 
 1739            integer index = threadIdx.x + blockIdx.x * blockDim.x;
 
 1740            integer stride = blockDim.x * gridDim.x;
 
 1759                    printf(
"domainListIndices[%i] = %i, level = %i, x = (%f, %f, %f) mass = %f\n", index + offset,
 
 1760                           domainListIndex, domainList->
domainListLevels[index + offset], particles->
x[domainListIndex],
 
 1761                           particles->
y[domainListIndex], particles->
z[domainListIndex],
 
 1762                           particles->
mass[domainListIndex]);
 
 1779            integer index = threadIdx.x + blockIdx.x * blockDim.x;
 
 1780            integer stride = blockDim.x * gridDim.x;
 
 1816                    printf(
"domainListIndices[%i] = %i, x = (%f) mass = %f\n", index + offset,
 
 1817                           domainListIndex, particles->
x[domainListIndex], particles->
mass[domainListIndex]);
 
 1819                    printf(
"domainListIndices[%i] = %i, x = (%f, %f) mass = %f\n", index + offset,
 
 1820                           domainListIndex, particles->
x[domainListIndex],
 
 1821                           particles->
y[domainListIndex], particles->
mass[domainListIndex]);
 
 1823                    printf(
"domainListIndices[%i] = %i, x = (%f, %f, %f) mass = %f\n", index + offset,
 
 1824                           domainListIndex, particles->
x[domainListIndex],
 
 1825                           particles->
y[domainListIndex], particles->
z[domainListIndex], particles->
mass[domainListIndex]);
 
 1965            integer index = threadIdx.x + blockIdx.x * blockDim.x;
 
 1971            int domainListIndex;
 
 1973            key2test = (
keyType)blockIdx.x << (
DIM * (maxLevel - 1));
 
 1975            if (threadIdx.x == 0) {
 
 1986            key2test += (
keyType)threadIdx.x << (
DIM * (maxLevel - 2));
 
 1987            if (threadIdx.x == (
POW_DIM - 1) && blockIdx.x == (
POW_DIM - 1)) {
 
 1991                keyMax = (shiftValue << toShift) - 1; 
 
 1995                keyMax = (shiftValue << toShift) - 1; 
 
 1999                keyMax = (shiftValue << toShift) - 1; 
 
 2004                keyMax = key2test + (1UL << (
DIM * (maxLevel - 2)));
 
 2010            while (key2test < keyMax && level > 1) { 
 
 2011                if (subDomainKeyTree->
isDomainListNode(key2test & (~0UL << (
DIM * (maxLevel - level + 1))),
 
 2012                                                       maxLevel, level-1, curveType)) {
 
 2022                    if (subDomainKeyTree->
isDomainListNode(key2test, maxLevel, level, curveType)) {
 
 2026                        key2test = key2test + (1UL << 
DIM * (maxLevel - level));
 
 2027                        while (((key2test >> (
DIM * (maxLevel - level))) & (
keyType)(
POW_DIM - 1)) == 0UL) {
 
 2043            integer index = threadIdx.x + blockIdx.x * blockDim.x;
 
 2044            integer stride = blockDim.x * gridDim.x;
 
 2047            bool lowestDomainListNode;
 
 2054                lowestDomainListNode = 
true;
 
 2058                for (
int i=0; i<
POW_DIM; i++) {
 
 2059                    childIndex = 
tree->child[
POW_DIM * domainIndex + i];
 
 2061                    if (childIndex != -1) {
 
 2063                        if (childIndex >= n) {
 
 2065                            if (particles->
nodeType[childIndex] >= 1) {
 
 2066                                lowestDomainListNode = 
false;
 
 2077                            if (!lowestDomainListNode) {
 
 2084                if (lowestDomainListNode) {
 
 2089                    particles->
nodeType[domainIndex] = 2;
 
 2117                             keyType *sortedDomainListKeys, 
integer *relevantDomainListIndices,
 
 2118                             integer *relevantDomainListLevels, 
integer *relevantDomainListProcess) {
 
 2121                         domainListIndex, domainListCounter, domainListKeys, sortedDomainListKeys,
 
 2122                         relevantDomainListIndices, relevantDomainListLevels, relevantDomainListProcess);
 
 2128                         relevantDomainListOriginalIndex);
 
 2146                                domainList, maxLevel, curveType);
 
 2162                                            real d, 
int index) {
 
 2168                particles->
y[index] * particles->
y[index]) < d) {
 
 2170        if (
cuda::math::sqrt(particles->
x[index] * particles->
x[index] + particles->
y[index] * particles->
y[index] +
 
 2171                  particles->
z[index] * particles->
z[index]) < d) {
 
 2180                                        real d, 
int index) {
 
 2201                                    int *particles2remove, 
int *counter, 
int criterion, 
real d, 
int numParticles) {
 
 2203            int bodyIndex = threadIdx.x + blockDim.x * blockIdx.x;
 
 2204            int stride = blockDim.x * gridDim.x;
 
 2210                switch (criterion) {
 
 2218                        cudaTerminate(
"Criterion for removing particles not available! Exiting...\n");
 
 2222                    particles2remove[bodyIndex + offset] = 1;
 
 2223                    atomicAdd(counter, 1);
 
 2225                    particles2remove[bodyIndex + offset] = 0;
 
 2234                                 int *particles2remove, 
int *counter, 
int criterion, 
real d,
 
 2248        template<
typename T, 
typename U>
 
 2251            integer index = threadIdx.x + blockIdx.x * blockDim.x;
 
 2252            integer stride = blockDim.x * gridDim.x;
 
 2258            while ((index + offset) < length) {
 
 2259                if (array[index + offset] != -1) {
 
 2260                    for (
integer i = 0; i < length; i++) {
 
 2261                        if (i != (index + offset)) {
 
 2262                            if (array[i] != -1 && (array[index + offset] == array[i] || (entry1[array[i]] == entry1[array[index + offset]] &&
 
 2263                                                                                         entry2[array[i]] == entry2[array[index + offset]] &&
 
 2264                                                                                         entry3[array[i]] == entry3[array[index + offset]]))) {
 
 2268                                    printf(
"DUPLICATE: %i vs %i | (%f, %f, %f) vs (%f, %f, %f):\n",
 
 2269                                           array[index + offset], array[i],
 
 2270                                           entry1[array[index + offset]], entry2[array[index + offset]], entry3[array[index + offset]],
 
 2271                                           entry1[array[i]], entry2[array[i]], entry3[array[i]]);
 
 2276                                            printf(
"DUPLICATE is domainList!\n");
 
 2280                                    for (
int k=0; k<
POW_DIM; k++) {
 
 2281                                        if (child[
POW_DIM*array[index + offset] + k] == array[i]) {
 
 2282                                            printf(
"isChild: index = %i: child %i == i = %i\n", array[index + offset],
 
 2287                                        if (child[8*array[i] + k] == array[index + offset]) {
 
 2288                                            printf(
"isChild: index = %i: child %i == index = %i\n", array[i],
 
 2289                                                   k, array[index + offset]);
 
 2295                                        printf(
"isChild: Duplicate NOT a child: %i vs %i | (%f, %f, %f) vs (%f, %f, %f):\n",
 
 2296                                               array[index + offset], array[i],
 
 2297                                               entry1[array[index + offset]], entry2[array[index + offset]], entry3[array[index + offset]],
 
 2298                                               entry1[array[i]], entry2[array[i]], entry3[array[i]]);
 
 2324        template <
typename T, 
unsigned int blockSize>
 
 2327            extern __shared__ T sdata[];
 
 2329            unsigned int tid = threadIdx.x;
 
 2330            unsigned int i = blockIdx.x*(blockSize*2) + tid;
 
 2331            unsigned int gridSize = blockSize*2*gridDim.x;
 
 2337                sdata[tid] += array[i] + array[i+blockSize];
 
 2343            if (blockSize >= 512) {
 
 2345                    array[tid] += array[tid + 256];
 
 2349            if (blockSize >= 256) {
 
 2351                    array[tid] += array[tid + 128];
 
 2355            if (blockSize >= 128) {
 
 2357                    array[tid] += array[tid + 64];
 
 2364                if (blockSize >= 64) array[tid] += array[tid + 32];
 
 2365                if (blockSize >= 32) array[tid] += array[tid + 16];
 
 2366                if (blockSize >= 16) array[tid] += array[tid + 8];
 
 2367                if (blockSize >= 8) array[tid] += array[tid + 4];
 
 2368                if (blockSize >= 4) array[tid] += array[tid + 2];
 
 2369                if (blockSize >= 2) array[tid] += array[tid + 1];
 
 2373                outputData[blockIdx.x] = array[0];
 
 2379        template <
typename T, 
unsigned int blockSize>
 
 2381            integer index = threadIdx.x + blockIdx.x * blockDim.x;
 
 2382            integer stride = blockDim.x * gridDim.x;
 
 2387            while ((index + offset) < blockSize) {
 
 2388                atomicAdd(&outdata[0], indata[index + offset]);
 
 2396            template<
typename T, 
typename U>
 
 2399                return cuda::launch(
true, executionPolicy, ::CudaUtils::Kernel::markDuplicatesTemp<T, U>, 
tree, domainList, array, entry1, entry2,
 
 2400                                    entry3, duplicateCounter, child, length);
 
 2404            template <
typename T, 
unsigned int blockSize>
 
 2406                ExecutionPolicy executionPolicy(256, blockSize, blockSize * 
sizeof(T));
 
 2407                return cuda::launch(
true, executionPolicy, ::CudaUtils::Kernel::reduceBlockwise<T, blockSize>, array, outputData, n);
 
 2411            template <
typename T, 
unsigned int blockSize>
 
 2414                return cuda::launch(
true, executionPolicy, ::CudaUtils::Kernel::blockReduction<T, blockSize>,
 
 2427        template <
unsigned int blockSize>
 
 2430            extern __shared__ 
real sdata[];
 
 2433            real *ly = &sdata[blockSize];
 
 2435            real *lz = &sdata[2 * blockSize];
 
 2438            unsigned int tid = threadIdx.x;
 
 2439            unsigned int i = blockIdx.x*(blockSize*2) + tid;
 
 2440            unsigned int gridSize = blockSize*2*gridDim.x;
 
 2454                lx[tid] += particles->
mass[i] * (particles->
y[i]*particles->
vz[i] - particles->
z[i]*particles->
vy[i]) +
 
 2455                            particles->
mass[i+blockSize] * (particles->
y[i+blockSize]*particles->
vz[i+blockSize] - particles->
z[i+blockSize]*particles->
vy[i+blockSize]);
 
 2457                ly[tid] += particles->
mass[i] * (particles->
z[i]*particles->
vx[i] - particles->
x[i]*particles->
vz[i]) +
 
 2458                            particles->
mass[i+blockSize] * (particles->
z[i+blockSize]*particles->
vx[i+blockSize] - particles->
x[i+blockSize]*particles->
vz[i+blockSize]);
 
 2460                lz[tid] += particles->
mass[i] * (particles->
x[i]*particles->
vy[i] - particles->
y[i]*particles->
vx[i]) +
 
 2461                            particles->
mass[i+blockSize] * (particles->
x[i+blockSize]*particles->
vy[i+blockSize] - particles->
y[i+blockSize]*particles->
vx[i+blockSize]);
 
 2469            if (blockSize >= 512) {
 
 2471                    lx[tid] += lx[tid + 256];
 
 2473                    ly[tid] += ly[tid + 256];
 
 2475                    lz[tid] += lz[tid + 256];
 
 2481            if (blockSize >= 256) {
 
 2483                    lx[tid] += lx[tid + 128];
 
 2485                    ly[tid] += ly[tid + 128];
 
 2487                    lz[tid] += lz[tid + 128];
 
 2493            if (blockSize >= 128) {
 
 2495                    lx[tid] += lx[tid + 64];
 
 2497                    ly[tid] += ly[tid + 64];
 
 2499                    lz[tid] += lz[tid + 64];
 
 2508                if (blockSize >= 64) {
 
 2509                    lx[tid] += lx[tid + 32];
 
 2511                    ly[tid] += ly[tid + 32];
 
 2513                    lz[tid] += lz[tid + 32];
 
 2517                if (blockSize >= 32) {
 
 2518                    lx[tid] += lx[tid + 16];
 
 2520                    ly[tid] += ly[tid + 16];
 
 2522                    lz[tid] += lz[tid + 16];
 
 2526                if (blockSize >= 16) {
 
 2527                    lx[tid] += lx[tid + 8];
 
 2529                    ly[tid] += ly[tid + 8];
 
 2531                    lz[tid] += lz[tid + 8];
 
 2535                if (blockSize >= 8) {
 
 2536                    lx[tid] += lx[tid + 4];
 
 2538                    ly[tid] += ly[tid + 4];
 
 2540                    lz[tid] += lz[tid + 4];
 
 2544                if (blockSize >= 4) {
 
 2545                    lx[tid] += lx[tid + 2];
 
 2547                    ly[tid] += ly[tid + 2];
 
 2549                    lz[tid] += lz[tid + 2];
 
 2553                if (blockSize >= 2) {
 
 2554                    lx[tid] += lx[tid + 1];
 
 2556                    ly[tid] += ly[tid + 1];
 
 2558                    lz[tid] += lz[tid + 1];
 
 2565                outputData[blockIdx.x] = lx[0];
 
 2567                outputData[blockSize + blockIdx.x] = ly[0];
 
 2569                outputData[2 * blockSize + blockIdx.x] = lz[0];
 
 2575        template <
unsigned int blockSize>
 
 2578            integer index = threadIdx.x + blockIdx.x * blockDim.x;
 
 2579            integer stride = blockDim.x * gridDim.x;
 
 2584            while ((index + offset) < blockSize) {
 
 2585                atomicAdd(&outdata[0], indata[index + offset]);
 
 2587                atomicAdd(&outdata[1], indata[blockSize + index + offset]);
 
 2589                atomicAdd(&outdata[2], indata[2 * blockSize + index + offset]);
 
 2599            integer index = threadIdx.x + blockIdx.x * blockDim.x;
 
 2600            integer stride = blockDim.x * gridDim.x;
 
 2604            while ((index + offset) < n) {
 
 2606                vel = 
abs(particles->
vx[index + offset]);
 
 2609                        particles->
vy[index + offset] * particles->
vy[index + offset]);
 
 2612                            particles->
vy[index + offset] * particles->
vy[index + offset] +
 
 2613                            particles->
vz[index + offset] * particles->
vz[index + offset]);
 
 2616                particles->
u[index + offset] += 0.5 * particles->
mass[index + offset] * vel * vel;
 
 2622            template <
unsigned int blockSize>
 
 2625                return cuda::launch(
true, executionPolicy, ::Physics::Kernel::calculateAngularMomentumBlockwise<blockSize>,
 
 2626                                    particles, outputData, n);
 
 2630            template <
unsigned int blockSize>
 
 2633                return cuda::launch(
true, executionPolicy, ::Physics::Kernel::sumAngularMomentum<blockSize>,
 
keyType * domainListKeys
domain list node keys
integer * domainListIndex
domain list node index, thus amount of domain list nodes
CUDA_CALLABLE_MEMBER void setBorders(real *borders, integer *relevantDomainListOriginalIndex)
keyType * sortedDomainListKeys
sorted domain list node keys, usable as output for sorting the keys
CUDA_CALLABLE_MEMBER DomainList()
Constructor.
integer * relevantDomainListOriginalIndex
integer * domainListIndices
domain list node indices
integer * domainListLevels
domain list node levels
integer * relevantDomainListIndices
concentrate domain list nodes, usable to reduce domain list indices in respect to some criterion
integer * domainListCounter
domain list node counter, usable as buffer
integer * relevantDomainListProcess
CUDA_CALLABLE_MEMBER void set(integer *domainListIndices, integer *domainListLevels, integer *domainListIndex, integer *domainListCounter, keyType *domainListKeys, keyType *sortedDomainListKeys, integer *relevantDomainListIndices, integer *relevantDomainListLevels, integer *relevantDomainListProcess)
Setter, passing pointer to member variables.
CUDA_CALLABLE_MEMBER ~DomainList()
Destructor.
integer * relevantDomainListLevels
Execution policy/instruction for CUDA kernel execution.
Particle(s) class based on SoA (Structur of Arrays).
real * x
(pointer to) x position (array)
integer * level
(pointer to) level of the (pseudo-)particles
real * g_ay
(pointer to) y gravitational acceleration (array)
real * ay
(pointer to) y acceleration (array)
integer * nodeType
(pointer to) node type
real * az
(pointer to) z acceleration (array)
real * y
(pointer to) y position (array)
real * ax
(pointer to) x acceleration (array)
real * u
energy (kinetic + gravitational for now)
real * g_ax
(pointer to) x gravitational acceleration (array)
real * mass
(pointer to) mass (array)
real * z
(pointer to) z position (array)
real * vz
(pointer to) z velocity (array)
real * g_az
(pointer to) z gravitational acceleration (array)
real * vx
(pointer to) x velocity (array)
real * vy
(pointer to) y velocity (array)
SubDomainKeyTree class handling rank, number of processes and ranges.
keyType * range
Space-filling curve ranges, mapping key ranges/borders to MPI processes.
CUDA_CALLABLE_MEMBER ~SubDomainKeyTree()
Destructor.
CUDA_CALLABLE_MEMBER integer key2proc(keyType key)
Compute particle's MPI process belonging by it's key.
CUDA_CALLABLE_MEMBER bool isDomainListNode(keyType key, integer maxLevel, integer level, Curve::Type curveType=Curve::lebesgue)
Check whether key, thus particle, represents a domain list node.
CUDA_CALLABLE_MEMBER void set(integer rank, integer numProcesses, keyType *range, integer *procParticleCounter)
Setter.
integer * procParticleCounter
particle counter in dependence of MPI process(es)
CUDA_CALLABLE_MEMBER SubDomainKeyTree()
Default Constructor.
integer numProcesses
MPI number of processes.
#define cudaTerminate(...)
#define CUDA_CALLABLE_MEMBER
template real reduceBlockwise< real, 256 >(real *array, real *outputData, int n)
template real markDuplicatesTemp< integer, real >(Tree *tree, DomainList *domainList, integer *array, real *entry1, real *entry2, real *entry3, integer *duplicateCounter, integer *child, int length)
template real blockReduction< real, 256 >(const real *indata, real *outdata)
__global__ void markDuplicatesTemp(Tree *tree, DomainList *domainList, T *array, U *entry1, U *entry2, U *entry3, integer *duplicateCounter, integer *child, int length)
__global__ void reduceBlockwise(T *array, T *outputData, int n)
__global__ void blockReduction(const T *indata, T *outdata)
__global__ void lowestDomainList(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles, DomainList *domainList, DomainList *lowestDomainList, integer n, integer m)
Kernel to create the lowest domain list.
__global__ void setBorders(DomainList *domainList, real *borders, integer *relevantDomainListOriginalIndex)
__global__ void createDomainList(SubDomainKeyTree *subDomainKeyTree, DomainList *domainList, integer maxLevel, Curve::Type curveType=Curve::lebesgue)
Kernel to create the domain list.
real sortArray(A *arrayToSort, A *sortedArray, B *keyIn, B *keyOut, integer n)
CUDA_CALLABLE_MEMBER integer key2proc(keyType key, SubDomainKeyTree *subDomainKeyTree)
Convert the key to the corresponding process.
CUDA_CALLABLE_MEMBER keyType lebesgue2hilbert(keyType lebesgue, integer maxLevel)
Convert a Lebesgue key to a Hilbert key.
CUDA_CALLABLE_MEMBER void key2Char(keyType key, integer maxLevel, char *keyAsChar)
Convert a key to a char for printing.
__global__ void info(Material *material)
Debug kernel giving information about material(s).
__global__ void test(Particles *particles)
__global__ void mark2remove(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles, int *particles2remove, int *counter, int criterion, real d, int numParticles)
Particle class related functions and kernels.
__device__ bool applySphericalCriterion(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles, real d, int index)
Check whether particle(s) are within sphere from simulation center.
__device__ bool applyCubicCriterion(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles, real d, int index)
Check whether particle(s) are within cube from simulation center.
template real sumAngularMomentum< 256 >(const real *indata, real *outdata)
template real calculateAngularMomentumBlockwise< 256 >(Particles *particles, real *outputData, int n)
__global__ void calculateAngularMomentumBlockwise(Particles *particles, real *outputData, int n)
Calculate angular momentum for all particles (per block).
__global__ void kineticEnergy(Particles *particles, int n)
Calculate kinetic energy.
__global__ void sumAngularMomentum(const real *indata, real *outdata)
Calculate angular momentum: sum over blocks.
Physics related functions and kernels.
const char *const repairTree
const char *const numParticles
real compLocalPseudoParticles(Tree *tree, Particles *particles, DomainList *domainList, int n)
Wrapper for SubDomainKeyTreeNS::Kernel::compLocalPseudoParticles().
real particlesPerProcess(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles, integer n, integer m, Curve::Type curveType=Curve::lebesgue)
Wrapper for SubDomainKeyTreeNS::Kernel::particlesPerProcess().
real updateLowestDomainListNodes(Particles *particles, DomainList *lowestDomainList, T *buffer, Entry::Name entry)
Wrapper for SubDomainKeyTreeNS::Kernel::updateLowestDomainListNodes().
real compDomainListPseudoParticlesPerLevel(Tree *tree, Particles *particles, DomainList *domainList, DomainList *lowestDomainList, int n, int level)
Wrapper for SubDomainKeyTreeNS::Kernel::compDomainListPseudoParticlesPerLevel().
real compDomainListPseudoParticles(Tree *tree, Particles *particles, DomainList *domainList, DomainList *lowestDomainList, int n)
Wrapper for SubDomainKeyTreeNS::Kernel::compDomainListPseudoParticles().
real prepareLowestDomainExchange(Particles *particles, DomainList *lowestDomainList, T *buffer, Entry::Name entry)
Wrapper for SubDomainKeyTreeNS::Kernel::prepareLowestDomainExchange().
real zeroDomainListNodes(Particles *particles, DomainList *domainList, DomainList *lowestDomainList)
Wrapper for SubDomainKeyTreeNS::Kernel::zeroDomainListNodes().
real buildDomainTree(Tree *tree, Particles *particles, DomainList *domainList, integer n, integer m)
Wrapper for SubDomainKeyTreeNS::Kernel::buildDomainTree().
real createKeyHistRanges(Helper *helper, integer bins)
real repairTree(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles, DomainList *domainList, DomainList *lowestDomainList, int n, int m, Curve::Type curveType)
Wrapper for SubDomainKeyTreeNS::Kernel::repairTree().
real getParticleKeys(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles, keyType *keys, integer maxLevel, integer n, Curve::Type curveType=Curve::lebesgue)
Wrapper for SubDomainKeyTreeNS::Kernel::getParticleKeys().
real keyHistCounter(Tree *tree, Particles *particles, SubDomainKeyTree *subDomainKeyTree, Helper *helper, int bins, int n, Curve::Type curveType=Curve::lebesgue)
real markParticlesProcess(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles, integer n, integer m, integer *sortArray, Curve::Type curveType=Curve::lebesgue)
Wrapper for ::SubDomainKeyTreeNS::Kernel::markParticlesPerProcess().
real compLowestDomainListNodes(Tree *tree, Particles *particles, DomainList *lowestDomainList)
Wrapper for SubDomainKeyTreeNS::Kernel::compLowestDomainListNodes().
void set(SubDomainKeyTree *subDomainKeyTree, integer rank, integer numProcesses, keyType *range, integer *procParticleCounter)
Wrapper for SubDomainKeyTreeNS::Kernel::set().
void test(SubDomainKeyTree *subDomainKeyTree)
Wrapper for SubDomainKeyTreeNS::Kernel::test().
real calculateNewRange(SubDomainKeyTree *subDomainKeyTree, Helper *helper, int bins, int n, Curve::Type curveType=Curve::lebesgue)
__global__ void markParticlesProcess(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles, integer n, integer m, integer *sortArray, Curve::Type curveType=Curve::lebesgue)
Kernel to mark particle's belonging.
__global__ void test(SubDomainKeyTree *subDomainKeyTree)
Test kernel call (for debugging/testing purposes).
__global__ void compDomainListPseudoParticles(Tree *tree, Particles *particles, DomainList *domainList, DomainList *lowestDomainList, int n)
__global__ void getParticleKeys(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles, keyType *keys, integer maxLevel, integer n, Curve::Type curveType=Curve::lebesgue)
Kernel to get all particle keys (and additional information for debugging purposes).
__global__ void repairTree(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles, DomainList *domainList, DomainList *lowestDomainList, int n, int m, Curve::Type curveType)
Repair tree by removing received and inserted (pseudo-)particles.
__global__ void set(SubDomainKeyTree *subDomainKeyTree, integer rank, integer numProcesses, keyType *range, integer *procParticleCounter)
Kernel call to setter.
__global__ void compLowestDomainListNodes(Tree *tree, Particles *particles, DomainList *lowestDomainList)
Compute/Find lowest domain list nodes.
__global__ void prepareLowestDomainExchange(Particles *particles, DomainList *lowestDomainList, T *buffer, Entry::Name entry)
Prepare lowest domain exchange via MPI by copying to contiguous memory.
__global__ void updateLowestDomainListNodes(Particles *particles, DomainList *lowestDomainList, T *buffer, Entry::Name entry)
Update lowest domain list nodes.
__global__ void particlesPerProcess(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles, integer n, integer m, Curve::Type curveType=Curve::lebesgue)
Kernel to check particle's belonging and count in dependence of belonging/process.
__global__ void keyHistCounter(Tree *tree, Particles *particles, SubDomainKeyTree *subDomainKeyTree, Helper *helper, int bins, int n, Curve::Type curveType=Curve::lebesgue)
__global__ void compLocalPseudoParticles(Tree *tree, Particles *particles, DomainList *domainList, int n)
Compute local (tree) pseudo particles.
__global__ void calculateNewRange(SubDomainKeyTree *subDomainKeyTree, Helper *helper, int bins, int n, Curve::Type curveType=Curve::lebesgue)
__global__ void compDomainListPseudoParticlesPerLevel(Tree *tree, Particles *particles, DomainList *domainList, DomainList *lowestDomainList, int n, int level)
Compute domain list pseudo particles (per level).
__global__ void buildDomainTree(Tree *tree, Particles *particles, DomainList *domainList, integer n, integer m)
Kernel to build the domain tree.
__global__ void zeroDomainListNodes(Particles *particles, DomainList *domainList, DomainList *lowestDomainList)
Zero domain list nodes.
__global__ void createKeyHistRanges(Helper *helper, integer bins)
SubDomainKeyTree related functions and kernels.
__device__ real sqrt(real a)
Square root of a floating point value.
__device__ real abs(real a)
Absolute value of a floating point value.
void set(T *d_var, T val, std::size_t count=1)
Set device memory to a specific value.
real launch(bool timeKernel, const ExecutionPolicy &policy, void(*f)(Arguments...), Arguments... args)
CUDA execution wrapper function.
#define DIM
Dimension of the problem.