1#include "../include/miluphpc.h"
13 boost::mpi::communicator comm;
15 all_reduce(comm, boost::mpi::inplace_t<integer*>(&
sumParticles), 1, std::plus<integer>());
56 else {
Logger(
DEBUG) <<
"Selected sfc: not valid!"; }
60 Logger(
DEBUG) <<
"Selected sfc: Lebesgue (Default)";
81 Logger(
WARN) <<
"Kernel not available! Selecting cubic spline [1] as default!";
125 boost::mpi::communicator comm;
126 HighFive::File file(filename.c_str(), HighFive::File::ReadOnly);
128 std::vector<std::vector<real>> x;
131 HighFive::DataSet mass = file.getDataSet(
"/m");
132 HighFive::DataSet pos = file.getDataSet(
"/x");
139 integer ppp = m.size()/comm.size();
140 integer ppp_remnant = m.size() % comm.size();
144 Logger(
INFO) <<
"ppp remnant = " << ppp_remnant;
147 if (ppp_remnant == 0) {
151 if (comm.rank() < (comm.size()-1)) {
163 HighFive::File file(filename.c_str(), HighFive::File::ReadOnly);
166 std::vector<real> m, u;
167 std::vector<std::vector<real>> x, v;
168 std::vector<integer> materialId;
171 HighFive::DataSet mass = file.getDataSet(
"/m");
172 HighFive::DataSet pos = file.getDataSet(
"/x");
173 HighFive::DataSet vel = file.getDataSet(
"/v");
176 HighFive::DataSet matId = file.getDataSet(
"/materialId");
178 HighFive::DataSet h5_u = file.getDataSet(
"/u");
187 matId.read(materialId);
202 endIndex += ppp_remnant;
205 for (
int j = startIndex; j < endIndex; j++) {
239 Logger(
DEBUG) <<
"Initialize/Read particle distribution ...";
288 boost::mpi::communicator comm;
290 all_reduce(comm, boost::mpi::inplace_t<integer*>(&
sumParticles), 1, std::plus<integer>());
312 real *profilerTime = &elapsed;
322 Logger(
TIME) <<
"rhs::reset(): " << elapsed <<
" ms";
335 Logger(
TIME) <<
"rhs::boundingBox(): " << time <<
" ms";
339 Logger(
INFO) <<
"checking for nans before assigning particles...";
349 if (assignParticlesToProcess) {
359 Logger(
TIME) <<
"rhs::assignParticles(): " << elapsed <<
" ms";
375 boost::mpi::communicator comm;
377 all_reduce(comm, boost::mpi::inplace_t<integer*>(&
sumParticles), 1, std::plus<integer>());
392 Logger(
TIME) <<
"rhs::tree(): " << elapsed <<
" ms";
404 Logger(
TIME) <<
"rhs::pseudoParticles(): " << elapsed <<
" ms";
431 Logger(
TIME) <<
"rhs::gravity(): " << time <<
" ms";
463 Logger(
TIME) <<
"rhs::sph(): " << elapsed <<
" ms";
487 boost::mpi::communicator comm;
489 const unsigned int blockSizeReduction = 256;
497 real *d_intermediateAngularMomentum;
501 time += Physics::Kernel::Launch::sumAngularMomentum<blockSizeReduction>(d_outputData,
502 d_intermediateAngularMomentum);
509 all_reduce(comm, boost::mpi::inplace_t<real*>(d_intermediateAngularMomentum),
DIM, std::plus<real>());
518 angularMomentum =
sqrt(h_intermediateResult[0] * h_intermediateResult[0] + h_intermediateResult[1] *
519 h_intermediateResult[1]);
522 h_intermediateResult[1] * h_intermediateResult[1] +
523 h_intermediateResult[2] * h_intermediateResult[2]);
529 delete [] h_intermediateResult;
533 Logger(
TIME) <<
"angular momentum: " << time <<
" ms";
544 boost::mpi::communicator comm;
546 const unsigned int blockSizeReduction = 256;
551 time += CudaUtils::Kernel::Launch::reduceBlockwise<real, blockSizeReduction>(
particleHandler->
d_u, d_outputData,
553 real *d_intermediateResult;
557 time += CudaUtils::Kernel::Launch::blockReduction<real, blockSizeReduction>(d_outputData, d_intermediateResult);
559 real h_intermediateResult;
563 all_reduce(comm, boost::mpi::inplace_t<real*>(d_intermediateResult), 1, std::plus<real>());
696 Logger(
TIME) <<
"computeBoundingBox: " << time <<
" ms";
717 bool arrangeAll =
false;
854 d_tempEntry, d_copyBuffer);
861 Logger(
ERROR) <<
"Restart simulation with more memory! exiting ...";
865 delete [] sendLengths;
866 delete [] receiveLengths;
872 (
real)0, resetLength);
874 (
real)0, resetLength);
877 (
real) 0, resetLength);
879 (
real) 0, resetLength);
883 (
real)0, resetLength);
885 (
real)0, resetLength);
888 (
real) 0, resetLength);
890 (
real) 0, resetLength);
894 (
real)0, resetLength);
896 (
real)0, resetLength);
899 (
real) 0, resetLength);
901 (
real) 0, resetLength);
908 (
real) 0, resetLength);
910 (
real) 0, resetLength);
913 (
real) 0, resetLength);
915 (
real) 0, resetLength);
918 (
real) 0, resetLength);
920 (
real) 0, resetLength);
926 (
real)0, resetLength);
943template <
typename U,
typename T>
972 Logger(
DEBUG) <<
"createDomainList: " << time <<
" ms";
977 Logger(
DEBUG) <<
"domainListLength = " << domainListLength;
982 integer treeIndexBeforeBuildingTree;
984 Logger(
DEBUG) <<
"treeIndexBeforeBuildingTree: " << treeIndexBeforeBuildingTree;
993 Logger(
TIME) <<
"buildTree: " << time <<
" ms";
1000 << (double)treeIndex / (
double)
numNodes * 100. <<
" %";
1008 boost::mpi::communicator comm;
1009 all_reduce(comm, boost::mpi::inplace_t<integer*>(&numParticlesSum), 1, std::plus<integer>());
1010 Logger(
INFO) <<
"numParticlesSum: " << numParticlesSum;
1027 for (
int level = 0; level <=
MAX_LEVEL; level++) {
1040 int domainListCounterAfterwards;
1042 Logger(
DEBUG) <<
"domain list counter afterwards : " << domainListCounterAfterwards;
1048 Logger(
TIME) <<
"build(Domain)Tree: " << time <<
" ms";
1075 for (
int level=
MAX_LEVEL; level>0; --level) {
1085 Logger(
TIME) <<
"calculate COM: " << timeCOM <<
" ms";
1100 integer lowestDomainListIndex;
1105 Logger(
DEBUG) <<
"domainListIndex: " << domainListIndex <<
" | lowestDomainListIndex: "
1106 << lowestDomainListIndex;
1107 Logger(
DEBUG) <<
"communicating/exchanging and updating domain list nodes ...";
1109 boost::mpi::communicator comm;
1130 domainListIndex, std::plus<real>());
1154 domainListIndex, std::plus<real>());
1178 domainListIndex, std::plus<real>());
1204 domainListIndex, std::plus<real>());
1215 Logger(
DEBUG) <<
"finish computation of lowest domain list nodes ...";
1223 Logger(
DEBUG) <<
"finish computation of (all) domain list nodes ...";
1225 for (
int domainLevel =
MAX_LEVEL; domainLevel>= 0; domainLevel--) {
1275 Logger(
TIME) <<
"compTheta(): " << time <<
" ms";
1278 integer relevantIndicesCounter;
1285 integer *h_relevantDomainListProcess;
1286 h_relevantDomainListProcess =
new integer[relevantIndicesCounter];
1346 integer pseudoParticlesOffset = 0;
1348 integer particlesOffsetBuffer;
1349 integer pseudoParticlesOffsetBuffer;
1354 int symbolicForceVersion = 4;
1357 if (symbolicForceVersion == 0) {
1361 for (
int level = 0; level <
MAX_LEVEL; level++) {
1375 for (
int relevantIndex = 0; relevantIndex < relevantIndicesCounter; relevantIndex++) {
1376 if (h_relevantDomainListProcess[relevantIndex] == proc) {
1385 d_markedSendIndices, diam,
1405 d_markedSendIndices, diam,
1415 d_markedSendIndices,
1416 &d_particles2SendIndices[particlesOffset],
1417 &d_pseudoParticles2SendIndices[pseudoParticlesOffset],
1418 &d_pseudoParticles2SendLevels[pseudoParticlesOffset],
1419 &d_particles2SendCount[proc],
1420 &d_pseudoParticles2SendCount[proc],
1425 cuda::copy(&pseudoParticlesOffsetBuffer, &d_pseudoParticles2SendCount[proc], 1,
To::host);
1427 Logger(
DEBUG) <<
"particles2SendCount[" << proc <<
"] = " << particlesOffsetBuffer;
1428 Logger(
DEBUG) <<
"pseudoParticles2SendCount[" << proc <<
"] = " << pseudoParticlesOffsetBuffer;
1430 particlesOffset += particlesOffsetBuffer;
1431 pseudoParticlesOffset += pseudoParticlesOffsetBuffer;
1435 else if (symbolicForceVersion == 1) {
1441 for (
int relevantIndex = 0; relevantIndex < relevantIndicesCounter; relevantIndex++) {
1442 if (h_relevantDomainListProcess[relevantIndex] == proc) {
1447 d_markedSendIndices, diam,
1454 d_markedSendIndices,
1455 &d_particles2SendIndices[particlesOffset],
1456 &d_pseudoParticles2SendIndices[pseudoParticlesOffset],
1457 &d_pseudoParticles2SendLevels[pseudoParticlesOffset],
1458 &d_particles2SendCount[proc],
1459 &d_pseudoParticles2SendCount[proc],
1463 cuda::copy(&pseudoParticlesOffsetBuffer, &d_pseudoParticles2SendCount[proc], 1,
To::host);
1465 Logger(
DEBUG) <<
"particles2SendCount[" << proc <<
"] = " << particlesOffsetBuffer;
1466 Logger(
DEBUG) <<
"pseudoParticles2SendCount[" << proc <<
"] = " << pseudoParticlesOffsetBuffer;
1468 particlesOffset += particlesOffsetBuffer;
1469 pseudoParticlesOffset += pseudoParticlesOffsetBuffer;
1474 else if (symbolicForceVersion == 2) {
1484 d_markedSendIndices, diam,
1487 proc, relevantIndicesCounter,
curveType);
1490 d_markedSendIndices,
1491 &d_particles2SendIndices[particlesOffset],
1492 &d_pseudoParticles2SendIndices[pseudoParticlesOffset],
1493 &d_pseudoParticles2SendLevels[pseudoParticlesOffset],
1494 &d_particles2SendCount[proc],
1495 &d_pseudoParticles2SendCount[proc],
1499 cuda::copy(&pseudoParticlesOffsetBuffer, &d_pseudoParticles2SendCount[proc], 1,
To::host);
1501 Logger(
DEBUG) <<
"particles2SendCount[" << proc <<
"] = " << particlesOffsetBuffer;
1502 Logger(
DEBUG) <<
"pseudoParticles2SendCount[" << proc <<
"] = " << pseudoParticlesOffsetBuffer;
1504 particlesOffset += particlesOffsetBuffer;
1505 pseudoParticlesOffset += pseudoParticlesOffsetBuffer;
1510 else if (symbolicForceVersion == 3) {
1513 real time_symbolic = 0.;
1514 real time_collect = 0.;
1522 d_markedSendIndices, diam,
1525 proc, relevantIndicesCounter,
curveType);
1526 time += time_symbolic;
1528 d_markedSendIndices,
1529 &d_particles2SendIndices[particlesOffset],
1530 &d_pseudoParticles2SendIndices[pseudoParticlesOffset],
1531 &d_pseudoParticles2SendLevels[pseudoParticlesOffset],
1532 &d_particles2SendCount[proc],
1533 &d_pseudoParticles2SendCount[proc],
1535 time += time_collect;
1538 cuda::copy(&pseudoParticlesOffsetBuffer, &d_pseudoParticles2SendCount[proc], 1,
To::host);
1540 Logger(
DEBUG) <<
"particles2SendCount[" << proc <<
"] = " << particlesOffsetBuffer;
1541 Logger(
DEBUG) <<
"pseudoParticles2SendCount[" << proc <<
"] = " << pseudoParticlesOffsetBuffer;
1543 particlesOffset += particlesOffsetBuffer;
1544 pseudoParticlesOffset += pseudoParticlesOffsetBuffer;
1547 Logger(
TRACE) <<
"time symbolic: " << time_symbolic <<
" vs. collect: " << time_collect <<
" ms ...";
1550 else if (symbolicForceVersion == 4) {
1553 real time_symbolic = 0.;
1554 real time_collect = 0.;
1564 d_markedSendIndices, diam,
1569 time += time_symbolic;
1571 Logger(
INFO) <<
"finished symbolic force ...";
1577 d_markedSendIndices,
1578 &d_particles2SendIndices[particlesOffset],
1579 &d_pseudoParticles2SendIndices[pseudoParticlesOffset],
1580 &d_pseudoParticles2SendLevels[pseudoParticlesOffset],
1581 &d_particles2SendCount[proc],
1582 &d_pseudoParticles2SendCount[proc],
1585 time += time_collect;
1588 cuda::copy(&pseudoParticlesOffsetBuffer, &d_pseudoParticles2SendCount[proc], 1,
To::host);
1590 Logger(
DEBUG) <<
"particles2SendCount[" << proc <<
"] = " << particlesOffsetBuffer;
1591 Logger(
DEBUG) <<
"pseudoParticles2SendCount[" << proc <<
"] = " << pseudoParticlesOffsetBuffer;
1593 particlesOffset += particlesOffsetBuffer;
1594 pseudoParticlesOffset += pseudoParticlesOffsetBuffer;
1597 Logger(
TRACE) <<
"time symbolic: " << time_symbolic <<
" vs. collect: " << time_collect <<
" ms ...";
1601 Logger(
ERROR) <<
"symbolicForceVersion: " << symbolicForceVersion <<
" not available!";
1614 Logger(
TIME) <<
"symbolicForce: " << time <<
" ms";
1621 cuda::copy(h_pseudoParticles2SendCount, d_pseudoParticles2SendCount,
1627 integer *particleReceiveLengths;
1631 integer *pseudoParticleSendLengths;
1634 integer *pseudoParticleReceiveLengths;
1642 particleSendLengths[proc] = h_particles2SendCount[proc];
1643 pseudoParticleSendLengths[proc] = h_pseudoParticles2SendCount[proc];
1644 Logger(
INFO) <<
"particleSendLengths[" << proc <<
"] = " << particleSendLengths[proc];
1645 Logger(
INFO) <<
"pseudoParticleSendLengths[" << proc <<
"] = " << pseudoParticleSendLengths[proc];
1654 integer particleTotalReceiveLength = 0;
1655 integer particleTotalSendLength = 0;
1658 particleTotalReceiveLength += particleReceiveLengths[proc];
1659 particleTotalSendLength += particleSendLengths[proc];
1663 Logger(
INFO) <<
"gravity: particleTotalReceiveLength: " << particleTotalReceiveLength;
1664 Logger(
INFO) <<
"gravity: particleTotalSendLength: " << particleTotalSendLength;
1669 pseudoParticleReceiveLengths);
1674 integer pseudoParticleTotalReceiveLength = 0;
1675 integer pseudoParticleTotalSendLength = 0;
1678 pseudoParticleTotalReceiveLength += pseudoParticleReceiveLengths[proc];
1679 pseudoParticleTotalSendLength += pseudoParticleSendLengths[proc];
1688 d_markedSendIndices,
1689 d_pseudoParticles2SendLevels,
curveType, pseudoParticleTotalSendLength);
1720 Logger(
INFO) <<
"gravity: pseudoParticleTotalReceiveLength: " << pseudoParticleTotalReceiveLength;
1721 Logger(
INFO) <<
"gravity: pseudoParticleTotalSendLength: " << pseudoParticleTotalSendLength;
1732 int *h_particles2SendIndices =
new int[particleTotalSendLength];
1733 cuda::copy(h_particles2SendIndices, d_particles2SendIndices, particleTotalSendLength,
To::host);
1734 particles2file(std::string{
"Gravity2SendParticles"}, h_particles2SendIndices, particleTotalSendLength);
1735 delete [] h_particles2SendIndices;
1739 int *h_pseudoParticles2SendIndices =
new int[pseudoParticleTotalSendLength];
1740 cuda::copy(h_pseudoParticles2SendIndices, d_pseudoParticles2SendIndices, pseudoParticleTotalSendLength,
To::host);
1741 particles2file(std::string{
"Gravity2SendPseudoParticles"}, h_pseudoParticles2SendIndices, pseudoParticleTotalSendLength);
1742 delete [] h_pseudoParticles2SendIndices;
1746 int *h_sendIndices =
new int[particleTotalSendLength + pseudoParticleTotalSendLength];
1747 cuda::copy(&h_sendIndices[0], d_particles2SendIndices, particleTotalSendLength,
To::host);
1748 cuda::copy(&h_sendIndices[particleTotalSendLength], d_pseudoParticles2SendIndices, pseudoParticleTotalSendLength,
To::host);
1749 particles2file(std::string{
"Gravity2SendBoth"}, h_sendIndices, particleTotalSendLength + pseudoParticleTotalSendLength);
1750 delete [] h_sendIndices;
1765 pseudoParticleTotalSendLength);
1767 pseudoParticleReceiveLengths);
1770 particleTotalSendLength);
1772 particleReceiveLengths);
1788 pseudoParticleTotalSendLength);
1790 pseudoParticleReceiveLengths);
1793 particleTotalSendLength);
1796 particleReceiveLengths);
1812 pseudoParticleTotalSendLength);
1814 pseudoParticleReceiveLengths);
1817 particleTotalSendLength);
1819 particleReceiveLengths);
1838 pseudoParticleTotalSendLength);
1840 pseudoParticleReceiveLengths);
1843 particleTotalSendLength);
1845 particleReceiveLengths);
1848 sendParticles(d_pseudoParticles2SendLevels, d_pseudoParticles2ReceiveLevels, pseudoParticleSendLengths,
1849 pseudoParticleReceiveLengths);
1854 Logger(
TIME) <<
"parallel_force(): sending particles: " << time <<
" ms";
1874 Logger(
ERROR) <<
"Restart simulation with more memory! exiting ...";
1923 Logger(
ERROR) <<
"Restart simulation with more memory! exiting ...";
1936 for (
int level=0; level<
MAX_LEVEL; level++) {
1941 d_pseudoParticles2ReceiveLevels, level,
1947 Logger(
TIME) <<
"parallel_gravity: inserting received pseudo-particles: " << time <<
" ms";
1961 Logger(
TIME) <<
"parallel_gravity: inserting received particles: " << time <<
" ms";
1964 Logger(
DEBUG) <<
"Finished inserting received particles!";
1968 int actualTreeIndex;
1990 Logger(
DEBUG) <<
"computeForcesVersion: " << computeForcesVersion;
2003 real radius = x_radius;
2017 int stackSize = 128;
2018 int blockSize = 256;
2020 if (computeForcesVersion == 0) {
2034 real timeSorting = 0.;
2051 Logger(
TIME) <<
"gravity: presorting: " << timeSorting <<
" ms";
2066 else if (computeForcesVersion == 1) {
2075 else if (computeForcesVersion == 2) {
2084 real timeSorting = 0.;
2102 Logger(
TIME) <<
"gravity: presorting: " << timeSorting <<
" ms";
2113 else if (computeForcesVersion == 3) {
2122 else if (computeForcesVersion == 4) {
2124 real timeSorting = 0.;
2141 Logger(
TIME) <<
"gravity: presorting: " << timeSorting <<
" ms";
2154 Logger(
ERROR) <<
"select proper compute forces version!";
2162 Logger(
TIME) <<
"computeForces: " << time <<
" ms";
2167 int debug_lowestDomainListIndex;
2169 Logger(
DEBUG) <<
"lowest Domain list index: " << debug_lowestDomainListIndex;
2175 Logger(
INFO) <<
"toDeleteLeaf: " << toDeleteLeaf0 <<
", " << toDeleteLeaf1;
2181 Logger(
INFO) <<
"toDeleteNode: " << toDeleteNode0 <<
", " << toDeleteNode1;
2200 delete [] h_relevantDomainListProcess;
2201 delete [] h_particles2SendCount;
2202 delete [] h_pseudoParticles2SendCount;
2240 Logger(
TIME) <<
"sph: compTheta: " << time <<
" ms";
2243 integer relevantIndicesCounter;
2248 integer particlesOffsetBuffer;
2250 integer *h_relevantDomainListProcess;
2251 h_relevantDomainListProcess =
new integer[relevantIndicesCounter];
2261 boost::mpi::communicator comm;
2300 real *d_intermediateResult;
2314 timeElapsed = timer.
elapsed();
2351 Logger(
INFO) <<
"relevantIndicesCounter: " << relevantIndicesCounter;
2398 d_markedSendIndices,
2399 &d_particles2SendIndices[particlesOffset],
2405 Logger(
INFO) <<
"particles2SendCount[" << proc <<
"] = " << particlesOffsetBuffer;
2407 particlesOffset += particlesOffsetBuffer;
2415 Logger(
TIME) <<
"sph: symbolicForce: " << time <<
" ms";
2425 integer *particleReceiveLengths;
2433 particleSendLengths[proc] = h_particles2SendCount[proc];
2442 integer particleTotalReceiveLength = 0;
2443 integer particleTotalSendLength = 0;
2446 particleTotalReceiveLength += particleReceiveLengths[proc];
2447 particleTotalSendLength += particleSendLengths[proc];
2453 int *h_particles2SendIndices =
new int[particleTotalSendLength];
2454 cuda::copy(h_particles2SendIndices, d_particles2SendIndices, particleTotalSendLength,
To::host);
2455 std::string filename =
"SPH2Send";
2456 particles2file(filename, h_particles2SendIndices, particleTotalSendLength);
2457 delete[] h_particles2SendIndices;
2461 Logger(
INFO) <<
"sph: particleTotalReceiveLength: " << particleTotalReceiveLength;
2462 Logger(
INFO) <<
"sph: particleTotalSendLength: " << particleTotalSendLength;
2464 delete [] h_relevantDomainListProcess;
2465 delete [] h_particles2SendCount;
2476 Logger(
ERROR) <<
"Restart simulation with more memory! exiting ...";
2482 particleTotalSendLength);
2484 particleReceiveLengths);
2487 particleTotalSendLength);
2489 particleReceiveLengths);
2492 particleTotalSendLength);
2494 particleReceiveLengths);
2498 particleTotalSendLength);
2500 particleReceiveLengths);
2503 particleTotalSendLength);
2505 particleReceiveLengths);
2508 particleTotalSendLength);
2510 particleReceiveLengths);
2514 particleTotalSendLength);
2516 particleReceiveLengths);
2519 particleTotalSendLength);
2521 particleReceiveLengths);
2524 particleTotalSendLength);
2526 particleReceiveLengths);
2531 particleTotalSendLength);
2533 particleReceiveLengths);
2537 particleTotalSendLength);
2539 particleReceiveLengths);
2543 particleTotalSendLength);
2545 particleReceiveLengths);
2549 particleTotalSendLength);
2551 particleReceiveLengths);
2555 particleTotalSendLength);
2557 particleReceiveLengths);
2561 particleTotalSendLength);
2563 particleReceiveLengths);
2569 Logger(
TIME) <<
"sph: sending particles: " << time;
2587 Logger(
DEBUG) <<
"duplicates: " << duplicates <<
" between: " << 0 <<
" and "
2589 if (duplicates > 0) {
2614 Logger(
TIME) <<
"sph: calculate centers of mass: " << time <<
" ms";
2617 Logger(
TIME) <<
"sph: inserting received particles: " << time <<
" ms";
2628 Logger(
ERROR) <<
"Restart simulation with more memory! exiting ...";
2708 if (fixedRadiusNN_version == 0) {
2715 else if (fixedRadiusNN_version == 1) {
2721 else if (fixedRadiusNN_version == 2) {
2727 else if (fixedRadiusNN_version == 3) {
2734 Logger(
ERROR) <<
"fixedRadiusNN version not available! Exiting ...";
2741 Logger(
TIME) <<
"sph: fixedRadiusNN: " << time <<
" ms";
2755 Logger(
TIME) <<
"sph: calculateDensity: " << time <<
" ms";
2763 Logger(
TIME) <<
"sph: calculateSoundSpeed: " << time <<
" ms";
2776 Logger(
TIME) <<
"sph: calculatePressure: " << time <<
" ms";
2788 particleTotalSendLength);
2790 particleReceiveLengths);
2794 particleTotalSendLength);
2796 particleReceiveLengths);
2800 particleTotalSendLength);
2802 particleReceiveLengths);
2806 particleTotalSendLength);
2808 particleReceiveLengths);
2813 Logger(
TIME) <<
"sph: sending particles (again): " << time;
2822 Logger(
TIME) <<
"sph: internalForces: " << time <<
" ms";
2830 Logger(
TIME) <<
"sph: totalTime: " << totalTime <<
" ms";
2844 Logger(
INFO) <<
"rangePerProc: " << rangePerProc;
2887 boost::mpi::communicator comm;
2895 int totalAmountOfParticles = 0;
2898 totalAmountOfParticles += processParticleCounts[i];
2903 Logger(
INFO) <<
"aimedParticlesPerProcess = " << aimedParticlesPerProcess;
2909 delete [] processParticleCounts;
2917 boost::mpi::communicator comm;
2920 all_reduce(comm, boost::mpi::inplace_t<integer*>(&
sumParticles), 1, std::plus<integer>());
2933 std::vector<int> sizes;
2935 sizes.push_back(numParticlesPerProcess[i]);
2947 all_gatherv(comm, d_localKeys, d_keys, sizes);
2966 &d_sortedKeys[proc * aimedParticlesPerProcess], 1,
To::host);
2982 delete [] numParticlesPerProcess;
2983 delete [] h_sortedKeys;
2997 boost::mpi::communicator comm;
3007 all_reduce(comm, boost::mpi::inplace_t<integer*>(
buffer->
d_integerBuffer), bins - 1, std::plus<integer>());
3029 bins, aimedParticlesPerProcess,
curveType);
3049 cuda::set(d_particles2remove_counter, 0, 1);
3053 d_particles2remove, d_particles2remove_counter,
3058 int h_particles2remove_counter;
3060 Logger(
INFO) <<
"#particles to be removed: " << h_particles2remove_counter;
3122 (
real)0, h_particles2remove_counter);
3124 (
real)0, h_particles2remove_counter);
3126 (
real)0, h_particles2remove_counter);
3129 (
real)0, h_particles2remove_counter);
3131 (
real)0, h_particles2remove_counter);
3133 (
real)0, h_particles2remove_counter);
3136 (
real)0, h_particles2remove_counter);
3138 (
real)0, h_particles2remove_counter);
3140 (
real)0, h_particles2remove_counter);
3144 (
real)0, h_particles2remove_counter);
3146 (
integer)0, h_particles2remove_counter);
3149 (
integer)0, h_particles2remove_counter);
3151 (
real)0, h_particles2remove_counter);
3153 (
real)0, h_particles2remove_counter);
3155 (
real)0, h_particles2remove_counter);
3157 (
real)0, h_particles2remove_counter);
3159 (
real)0, h_particles2remove_counter);
3167 Logger(
INFO) <<
"removing #" << h_particles2remove_counter <<
" particles!";
3173template <
typename T>
3177 boost::mpi::communicator comm;
3179 std::vector<boost::mpi::request> reqParticles;
3180 std::vector<boost::mpi::status> statParticles;
3188 reqParticles.push_back(comm.isend(proc, 17, &sendBuffer[sendOffset], sendLengths[proc]));
3189 statParticles.push_back(comm.recv(proc, 17, &receiveBuffer[receiveOffset], receiveLengths[proc]));
3191 receiveOffset += receiveLengths[proc];
3192 sendOffset += sendLengths[proc];
3196 boost::mpi::wait_all(reqParticles.begin(), reqParticles.end());
3326template <
typename T>
3329 boost::mpi::communicator comm;
3331 std::vector<boost::mpi::request> reqParticles;
3332 std::vector<boost::mpi::status> statParticles;
3343 h_entryBuffer[i] = (T)0;
3351 reqParticles.push_back(comm.isend(proc, 17, &h_entry[0], sendLengths[proc]));
3355 reqParticles.push_back(comm.isend(proc, 17,
3357 sendLengths[proc]));
3362 statParticles.push_back(comm.recv(proc, 17, &h_entryBuffer[0] + receiveOffset,
3363 receiveLengths[proc]));
3368 receiveOffset += receiveLengths[proc];
3373 boost::mpi::wait_all(reqParticles.begin(), reqParticles.end());
3378 delete [] h_entryBuffer;
3401 entryBuffer, receiveOffset);
3413 boost::mpi::communicator comm;
3415 all_reduce(comm, boost::mpi::inplace_t<integer*>(&
sumParticles), 1, std::plus<integer>());
3417 std::stringstream stepss;
3418 stepss << std::setw(6) << std::setfill(
'0') << step;
3421 HighFive::File::ReadWrite | HighFive::File::Create | HighFive::File::Truncate,
3422 HighFive::MPIOFileDriver(MPI_COMM_WORLD, MPI_INFO_NULL));
3426 std::vector <size_t> dataSpaceDims(2);
3428 dataSpaceDims[1] =
DIM;
3438 Logger(
INFO) <<
"rangeValues[" << i <<
"] = " << rangeValues[i];
3441 ranges.write(rangeValues);
3443 delete [] rangeValues;
3446 std::vector<real> time;
3449 HighFive::DataSet h5_time = h5file.createDataSet<
real>(
"/time", HighFive::DataSpace::From(time));
3451 HighFive::DataSet pos = h5file.createDataSet<
real>(
"/x", HighFive::DataSpace(dataSpaceDims));
3452 HighFive::DataSet vel = h5file.createDataSet<
real>(
"/v", HighFive::DataSpace(dataSpaceDims));
3453 HighFive::DataSet key = h5file.createDataSet<
keyType>(
"/key", HighFive::DataSpace(
sumParticles));
3454 HighFive::DataSet h5_mass = h5file.createDataSet<
real>(
"/m", HighFive::DataSpace(
sumParticles));
3455 HighFive::DataSet h5_proc = h5file.createDataSet<
int>(
"/proc", HighFive::DataSpace(
sumParticles));
3457 HighFive::DataSet h5_rho = h5file.createDataSet<
real>(
"/rho", HighFive::DataSpace(
sumParticles));
3458 HighFive::DataSet h5_p = h5file.createDataSet<
real>(
"/p", HighFive::DataSpace(
sumParticles));
3459 HighFive::DataSet h5_e = h5file.createDataSet<
real>(
"/e", HighFive::DataSpace(
sumParticles));
3460 HighFive::DataSet h5_sml = h5file.createDataSet<
real>(
"/sml", HighFive::DataSpace(
sumParticles));
3461 HighFive::DataSet h5_noi = h5file.createDataSet<
integer>(
"/noi", HighFive::DataSpace(
sumParticles));
3462 HighFive::DataSet h5_cs = h5file.createDataSet<
real>(
"/cs", HighFive::DataSpace(
sumParticles));
3465 HighFive::DataSet h5_totalEnergy;
3467 h5_totalEnergy = h5file.createDataSet<
real>(
"/totalEnergy",
3470 HighFive::DataSet h5_totalAngularMomentum;
3472 h5_totalAngularMomentum = h5file.createDataSet<
real>(
"/totalAngularMomentum",
3479 std::vector<std::vector<real>> x, v;
3480 std::vector<keyType> k;
3481 std::vector<real> mass;
3482 std::vector<int> particleProc;
3484 std::vector<real> rho, p, e, sml, cs;
3485 std::vector<integer> noi;
3528 k.push_back(h_keys[i]);
3555 boost::mpi::maximum<integer>());
3557 std::size_t nOffset = 0;
3560 nOffset += procN[proc];
3562 Logger(
DEBUG) <<
"Offset to write to datasets: " << std::to_string(nOffset);
3566 h5_time.write(time);
3569 pos.select({nOffset, 0},
3571 vel.select({nOffset, 0},
3575 h5_proc.select({nOffset}, {std::size_t(
numParticlesLocal)}).write(particleProc);
3598 for (
int i=0; i<
DIM; i++) {
3599 Logger(
DEBUG) <<
"com[" << i <<
"] = " << h_com[i];
3603 HighFive::DataSet _com = h5file.createDataSet<
real>(
"/COM", HighFive::DataSpace(
DIM));
3624 boost::mpi::communicator comm;
3625 int totalLength = length;
3626 all_reduce(comm, boost::mpi::inplace_t<integer*>(&totalLength), 1, std::plus<integer>());
3628 std::stringstream file;
3632 HighFive::File h5file(file.str(),
3633 HighFive::File::ReadWrite | HighFive::File::Create | HighFive::File::Truncate,
3634 HighFive::MPIOFileDriver(MPI_COMM_WORLD, MPI_INFO_NULL));
3636 std::vector <size_t> dataSpaceDims(2);
3637 dataSpaceDims[0] = std::size_t(totalLength);
3638 dataSpaceDims[1] =
DIM;
3640 HighFive::DataSet
ranges = h5file.createDataSet<
keyType>(
"/ranges",
3649 Logger(
INFO) <<
"rangeValues[" << i <<
"] = " << rangeValues[i];
3652 ranges.write(rangeValues);
3654 delete [] rangeValues;
3656 HighFive::DataSet pos = h5file.createDataSet<
real>(
"/x", HighFive::DataSpace(dataSpaceDims));
3658 HighFive::DataSet key = h5file.createDataSet<
keyType>(
"/key", HighFive::DataSpace(totalLength));
3662 std::vector<std::vector<real>> x, v;
3663 std::vector<keyType> k;
3686 for (
int i=0; i<length; i++) {
3699 k.push_back(h_keys[particleIndices[i]]);
3715 boost::mpi::maximum<integer>());
3717 std::size_t nOffset = 0;
3720 nOffset += procN[proc];
3722 Logger(
DEBUG) <<
"Offset to write to datasets: " << std::to_string(nOffset);
3726 pos.select({nOffset, 0},
3727 {std::size_t(length), std::size_t(
DIM)}).write(x);
3730 key.select({nOffset}, {std::size_t(length)}).write(k);
3747 HighFive::Group header = h5file.createGroup(
"config");
3749 HighFive::Attribute b_1 = header.createAttribute<
int>(
"test_1", HighFive::DataSpace::From(test_1));
3751 double test_2 = 1.5;
3752 HighFive::Attribute b_2 = header.createAttribute<
double>(
"test_2", test_2);
3767 size_t free_bytes, total_bytes, used_bytes;
3768 cudaMemGetInfo(&free_bytes, &total_bytes);
3769 used_bytes = total_bytes - free_bytes;
3777 Logger(
INFO) <<
"MEMORY INFO: used: " << std::setprecision(4) << 9.31e-10 * used_bytes <<
" GB ("
3778 << (double)used_bytes/(
double)total_bytes * 100. <<
" %)"
3779 <<
" free: " << 9.31e-10 * free_bytes <<
" GB ("
3780 << (double)free_bytes/(
double)total_bytes * 100. <<
" %)"
3781 <<
" available: " << 9.31e-10 * total_bytes <<
" GB";
keyType * d_sortedDomainListKeys
device sorted domain list keys
keyType * d_domainListKeys
device domain list key
integer * d_relevantDomainListProcess
void reset()
Resetting entries.
DomainList * d_domainList
device instance of DomainList class
integer * d_domainListIndex
device domain list index
integer * d_domainListCounter
device domain list counter
void vector2file(const std::string &path, std::vector< T > data)
void value2file(const std::string &path, T value)
Write value to single value data set.
integer * d_integerBuffer3
idInteger * d_idIntegerBuffer
keyType * d_keyTypeBuffer
idInteger * d_idIntegerBuffer1
integer * d_integerBuffer1
keyType * d_keyTypeBuffer2
integer * d_integerBuffer
integer * d_integerBuffer4
integer * d_integerBuffer2
keyType * d_keyTypeBuffer1
Material * h_materials
host instance of material class
void copy(To::Target target, integer index=-1)
integer numMaterials
number of materials or rather material instances
Material * d_materials
device instance of material class
CUDA_CALLABLE_MEMBER void info()
DomainListHandler * domainListHandler
Instance to handle the DomainList instance on device and host.
real totalEnergy
total energy
void afterIntegrationStep()
real parallel_sph()
Parallel version regarding computation of SPH-stuff.
real parallel_pseudoParticles()
Parallel version regarding computation of pseudo-particles.
ParticleHandler * particleHandler
Instance to handle the Particles instance on device and host.
real tree()
Wrapper function for building the tree (and domain tree).
void updateRangeApproximately(int aimedParticlesPerProcess, int bins=5000)
Update the ranges (approximately and dynamically).
real rhs(int step, bool selfGravity=true, bool assignParticlesToProcess=true)
real reset()
Reset arrays, values, ...
real parallel_tree()
Parallel version regarding tree-stuff.
real angularMomentum()
Calculate the angular momentum for all particles.
Curve::Type curveType
Space-filling curve type to be used (Lebesgue or Hilbert)
real assignParticles()
Assign particles to correct process in dependence of particle key and ranges.
H5Profiler & profiler
H5 profiler instance.
void fixedLoadBalancing()
Load balancing via equidistant ranges.
integer numNodes
number of nodes (to be allocated)
void dynamicLoadBalancing(int bins=5000)
Pre-calculations for updateRangeApproximately.
Miluphpc(SimulationParameters simulationParameters)
Constructor to set up simulation.
~Miluphpc()
Destructor freeing class instances.
MaterialHandler * materialHandler
Instance to handle Materials instances on device and host.
SubDomainKeyTreeHandler * subDomainKeyTreeHandler
Instance to handle the SubDomainKeyTree instance on device and host.
integer numParticles
number of particles (to be allocated)
real energy()
Calculate the total amount of energy.
int subStep
current sub-step (there are possibly more sub-steps within a step!)
TreeHandler * treeHandler
Instance to handle the Tree instance on device and host.
integer sendParticles(T *sendBuffer, T *receiveBuffer, integer *sendLengths, integer *receiveLengths)
Send particles/Exchange particles among MPI processes.
DomainListHandler * lowestDomainListHandler
Instance to handle the (lowest) DomainList instance on device and host.
real arrangeParticleEntries(U *sortArray, U *sortedArray, T *entry, T *temp)
Function to sort an array entry in dependence of another array sortArray
real removeParticles()
Remove particles in dependence of some criterion.
real gravity()
Wrapper function for Gravity-related stuff.
SimulationParameters simulationParameters
buffer (need for revising)
real configParameters2file(HighFive::File &h5file)
void prepareSimulation()
Prepare the simulation, including.
void distributionFromFile(const std::string &filename)
Read initial/particle distribution file (in parallel)
integer sendParticlesEntry(integer *sendLengths, integer *receiveLengths, T *entry, T *entryBuffer, T *copyBuffer)
Send particles/Exchange particles among MPI processes.
integer numParticlesLocal
SPH::KernelHandler kernelHandler
Instance to handle the SPH Kernel instance on device and host.
real pseudoParticles()
Wrapper function for calculating pseudo-particles.
void numParticlesFromFile(const std::string &filename)
Determine amount of particles (numParticles and numParticlesLocal) from initial file/particle distrib...
real h_searchRadius
search radius for SPH (MPI-process overarching) neighbor search
void updateRange(int aimedParticlesPerProcess)
Update the range in dependence on number of (MPI) processes and aimed particles per process.
real parallel_gravity()
Parallel version regarding computation of gravitational stuff.
HelperHandler * buffer
buffer instance
integer sumParticles
(real) number of particles on all processes
SimulationTimeHandler * simulationTimeHandler
Instance to handle the SimulationTime instances on device and host.
real totalAngularMomentum
total angular momentum
real boundingBox()
Calculate bounding boxes/simulation domain.
real particles2file(int step)
integer * d_materialId
device material identifier
integer * h_noi
host number of interactions
real * h_e
host internal energy
real * d_z
device z position
integer * d_nnl
device near(est) neighbor list
real * d_ay
device y acceleration
real * d_mass
device mass array
real * h_vx
host x velocity
real * h_y
host y position
real * d_az
device z acceleration
void copyDistribution(To::Target target=To::device, bool velocity=true, bool acceleration=true, bool includePseudoParticles=false)
real * d_e
device internal energy
real * d_vx
device x velocity
Particles * d_particles
device instance of particles class
Particles * h_particles
host instance of particles class
real * h_vz
host z velocity
idInteger * d_uid
device unique identifier
real * h_x
host x position
real * d_ax
device x acceleration
real * d_y
device y position
real * d_sml
device smoothing length
real * d_vy
device y velocity
real * h_sml
host smoothing length
real * h_cs
host speed of sound
real * h_z
host z position
real * d_cs
device speed of sound
real * d_x
device x position
void copySPH(To::Target target)
integer * d_noi
device number of interaction
real * d_p
device pressure
real * h_vy
host y velocity
real * d_vz
device z velocity
real * d_rho
device density
real * e
(pointer to) internal energy (array)
integer * materialId
(pointer to) material identifier (array)
real * x
(pointer to) x position (array)
idInteger * uid
(pointer to) unique identifier (array)
real * y
(pointer to) y position (array)
real * sml
(pointer to) smoothing length (array)
real * mass
(pointer to) mass (array)
real * z
(pointer to) z position (array)
real * vz
(pointer to) z velocity (array)
real * vx
(pointer to) x velocity (array)
real * vy
(pointer to) y velocity (array)
SPH smoothing kernel handler.
SPH_kernel kernel
SPH smoothing kernel typedef/kind of function pointer.
void copy(To::Target target)
integer h_rank
host MPI rank
SubDomainKeyTree * d_subDomainKeyTree
device instance of class SubDomainKeyTree
SubDomainKeyTree * h_subDomainKeyTree
host instance of class SubDomainKeyTree
keyType * d_range
device range(s)
void reset()
Resetting member variables.
integer * h_procParticleCounter
host counter for particles in dependence of MPI process belonging
integer h_numProcesses
host MPI number of processes
void copy(To::Target target=To::device, bool range=true, bool counter=true)
Copy (parts of the) SubDomainKeyTree instance(s) between host and device.
keyType * range
Space-filling curve ranges, mapping key ranges/borders to MPI processes.
integer numProcesses
MPI number of processes.
double elapsed() const
Get elapsed time since instantiation/latest reset.
void reset()
Reset timer instance.
Tree * d_tree
device instance of Class Tree
real * h_minX
host (pointer to) bounding box minimal x
real * h_minY
host (pointer to) bounding box minimal y
integer * d_sorted
device (pointer to) sorted (array)
real * h_maxX
host (pointer to) bounding box maximal x
integer * h_toDeleteLeaf
host (pointer to) array remembering leaf indices for rebuilding after temporarily inserting particles
integer * h_toDeleteNode
host (pointer to) array remembering leaf indices for rebuilding after temporarily inserting particles
integer * d_start
device (pointer to) start (array)
integer * d_toDeleteLeaf
device (pointer to) array remembering leaf indices for rebuilding after temporarily inserting particl...
integer * d_index
device (pointer to) index
void copy(To::Target target=To::device, bool borders=true, bool index=true, bool toDelete=true)
Copy (parts of the) tree instance(s) between host and device.
real * h_maxY
host (pointer to) bounding box maximal y
real * h_minZ
host (pointer to) bounding box minimal z
real * h_maxZ
host (pointer to) bounding box maximal x
void globalizeBoundingBox(Execution::Location exLoc=Execution::device)
All reduce bounding box(es)/borders (among MPI processes)
integer * d_toDeleteNode
device (pointer to) array remembering leaf indices for rebuilding after temporarily inserting particl...
#define gpuErrorcheck(ans)
check CUDA call
real findDuplicateEntries(T *array1, T *array2, integer *duplicateCounter, int length)
real collectValues(integer *indices, real *entries, real *collector, integer count)
real createDomainList(SubDomainKeyTree *subDomainKeyTree, DomainList *domainList, integer maxLevel, Curve::Type curveType=Curve::lebesgue)
Wrapper for DomainListNS::Kernel::createDomainList().
real lowestDomainList(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles, DomainList *domainList, DomainList *lowestDomainList, integer n, integer m)
Wrapper for ::DomainListNS::Kernel::lowestDoainList().
__global__ void collectSendIndices_test4(Tree *tree, Particles *particles, integer *sendIndices, integer *particles2Send, integer *pseudoParticles2Send, integer *pseudoParticlesLevel, integer *particlesCount, integer *pseudoParticlesCount, integer numParticlesLocal, integer numParticles, integer treeIndex, int currentProc, Curve::Type curveType)
__global__ void testSendIndices(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles, integer *sendIndices, integer *markedSendIndices, integer *levels, Curve::Type curveType, integer length)
Test the send indices.
__global__ void intermediateSymbolicForce(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles, DomainList *domainList, integer *sendIndices, real diam, real theta_, integer n, integer m, integer relevantIndex, integer level, Curve::Type curveType)
__global__ void computeForces_v1_2(Tree *tree, Particles *particles, real radius, integer n, integer m, SubDomainKeyTree *subDomainKeyTree, real theta, real smoothing, bool potentialEnergy)
__global__ void symbolicForce_test3(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles, DomainList *domainList, integer *sendIndices, real diam, real theta_, integer n, integer m, integer relevantProc, integer relevantIndicesCounter, Curve::Type curveType)
__global__ void computeForces_v1_1(Tree *tree, Particles *particles, real radius, integer n, integer m, SubDomainKeyTree *subDomainKeyTree, real theta, real smoothing, bool potentialEnergy)
__global__ void collectSendIndices(Tree *tree, Particles *particles, integer *sendIndices, integer *particles2Send, integer *pseudoParticles2Send, integer *pseudoParticlesLevel, integer *particlesCount, integer *pseudoParticlesCount, integer n, integer length, Curve::Type curveType)
Collect the send indices.
__global__ void computeForces_v2(Tree *tree, Particles *particles, real radius, integer n, integer m, integer blockSize, integer warp, integer stackSize, SubDomainKeyTree *subDomainKeyTree, real theta, real smoothing, bool potentialEnergy)
__global__ void computeForces_v2_1(Tree *tree, Particles *particles, integer n, integer m, integer blockSize, integer warp, integer stackSize, SubDomainKeyTree *subDomainKeyTree, real theta, real smoothing, bool potentialEnergy)
__global__ void computeForces_v1(Tree *tree, Particles *particles, real radius, integer n, integer m, SubDomainKeyTree *subDomainKeyTree, real theta, real smoothing, bool potentialEnergy)
__global__ void symbolicForce_test4(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles, DomainList *domainList, integer *sendIndices, real diam, real theta_, integer n, integer m, integer relevantProc, integer relevantIndicesCounter, Curve::Type curveType)
real copyArray(T *targetArray, T *sourceArray, integer n)
real resetArray(T *array, T value, integer n)
real sortArray(A *arrayToSort, A *sortedArray, B *keyIn, B *keyOut, integer n)
real sortKeys(A *keysToSort, A *sortedKeys, int n)
T reduceAndGlobalize(T *d_sml, T *d_aggregate, integer n, Reduction::Type reductionType)
real resetArrays(Tree *tree, Particles *particles, integer *mutex, integer n, integer m, bool time=false)
real mark2remove(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles, int *particles2remove, int *counter, int criterion, real d, int numParticles)
real check4nans(Particles *particles, integer n)
real kineticEnergy(Particles *particles, int n)
Wrapper for: Physics::Kernel::kineticEnergy().
const char *const gravityParticles
const char *const gravityPseudoParticles
const char *const gravityPseudoParticles
const char *const gravityParticles
const char *const compTheta
const char *const repairTree
const char *const symbolicForce
const char *const insertReceivedParticles
const char *const insertReceivedPseudoParticles
const char *const sending
const char *const insertReceivedParticles
const char *const internalForces
const char *const soundSpeed
const char *const symbolicForce
const char *const pressure
const char *const sending
const char *const fixedRadiusNN
const char *const determineSearchRadii
const char *const repairTree
const char *const density
const char *const compTheta
const char *const buildDomain
const char *const createDomain
const char *const pseudoParticle
const char *const gravity
const char *const assignParticles
const char *const boundingBox
real symbolicForce_test2(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles, DomainList *domainList, integer *sendIndices, real searchRadius, integer n, integer m, integer relevantProc, integer relevantIndicesCounter, Curve::Type curveType)
real calculateDensity(::SPH::SPH_kernel kernel, Tree *tree, Particles *particles, int *interactions, int numParticles)
Wrapper for SPH::Kernel::calculateDensity().
real fixedRadiusNN_sharedMemory(Tree *tree, Particles *particles, integer *interactions, integer numParticlesLocal, integer numParticles, integer numNodes)
Wrapper for SPH::Kernel::fixedRadiusNN_sharedMemory().
real calculateSoundSpeed(Particles *particles, Material *materials, int numParticles)
Wrapper for SPH::Kernel::calculateSoundSpeed().
real compTheta(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles, DomainList *lowestDomainList, Curve::Type curveType)
Wrapper for SPH::Kernel::compTheta().
real initializeSoundSpeed(Particles *particles, Material *materials, int numParticles)
Wrapper for SPH::Kernel::initializeSoundSpeed().
real insertReceivedParticles(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles, DomainList *domainList, DomainList *lowestDomainList, int n, int m)
Wrapper for SPH::Kernel::insertReceivedParticles().
real internalForces(::SPH::SPH_kernel kernel, Material *materials, Tree *tree, Particles *particles, int *interactions, int numRealParticles)
Wrapper for SPH::Kernel::internalForces().
real collectSendIndices_test2(Tree *tree, Particles *particles, integer *sendIndices, integer *particles2Send, integer *particlesCount, integer numParticlesLocal, integer numParticles, integer treeIndex, int currentProc, Curve::Type curveType)
real fixedRadiusNN_bruteForce(Tree *tree, Particles *particles, integer *interactions, integer numParticlesLocal, integer numParticles, integer numNodes)
Wrapper for SPH::Kernel::fixedRadiusNN_bruteForce().
real fixedRadiusNN(Tree *tree, Particles *particles, integer *interactions, real radius, integer numParticlesLocal, integer numParticles, integer numNodes)
Wrapper for SPH::Kernel::fixedRadiusNN().
real fixedRadiusNN_withinBox(Tree *tree, Particles *particles, integer *interactions, integer numParticlesLocal, integer numParticles, integer numNodes)
Wrapper for SPH::Kernel::fixedRadiusNN_withinBox().
real fixedRadiusNN_variableSML(Material *materials, Tree *tree, Particles *particles, integer *interactions, integer numParticlesLocal, integer numParticles, integer numNodes)
Wrapper for SPH::Kernel::fixedRadiusNN_variableSML().
real calculatePressure(Material *materials, Particles *particles, int numParticles)
Wrapper for SPH::Kernel::calculatePressure().
__global__ void symbolicForce_test2(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles, DomainList *domainList, integer *sendIndices, real searchRadius, integer n, integer m, integer relevantProc, integer relevantIndicesCounter, Curve::Type curveType)
__global__ void symbolicForce_test(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles, DomainList *lowestDomainList, integer *sendIndices, real searchRadius, integer n, integer m, integer relevantProc, integer relevantIndicesCounter, integer relevantIndexOld, Curve::Type curveType)
real particlesPerProcess(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles, integer n, integer m, Curve::Type curveType=Curve::lebesgue)
Wrapper for SubDomainKeyTreeNS::Kernel::particlesPerProcess().
real updateLowestDomainListNodes(Particles *particles, DomainList *lowestDomainList, T *buffer, Entry::Name entry)
Wrapper for SubDomainKeyTreeNS::Kernel::updateLowestDomainListNodes().
real compDomainListPseudoParticlesPerLevel(Tree *tree, Particles *particles, DomainList *domainList, DomainList *lowestDomainList, int n, int level)
Wrapper for SubDomainKeyTreeNS::Kernel::compDomainListPseudoParticlesPerLevel().
real prepareLowestDomainExchange(Particles *particles, DomainList *lowestDomainList, T *buffer, Entry::Name entry)
Wrapper for SubDomainKeyTreeNS::Kernel::prepareLowestDomainExchange().
real zeroDomainListNodes(Particles *particles, DomainList *domainList, DomainList *lowestDomainList)
Wrapper for SubDomainKeyTreeNS::Kernel::zeroDomainListNodes().
real buildDomainTree(Tree *tree, Particles *particles, DomainList *domainList, integer n, integer m)
Wrapper for SubDomainKeyTreeNS::Kernel::buildDomainTree().
real createKeyHistRanges(Helper *helper, integer bins)
real repairTree(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles, DomainList *domainList, DomainList *lowestDomainList, int n, int m, Curve::Type curveType)
Wrapper for SubDomainKeyTreeNS::Kernel::repairTree().
real getParticleKeys(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles, keyType *keys, integer maxLevel, integer n, Curve::Type curveType=Curve::lebesgue)
Wrapper for SubDomainKeyTreeNS::Kernel::getParticleKeys().
real keyHistCounter(Tree *tree, Particles *particles, SubDomainKeyTree *subDomainKeyTree, Helper *helper, int bins, int n, Curve::Type curveType=Curve::lebesgue)
real markParticlesProcess(SubDomainKeyTree *subDomainKeyTree, Tree *tree, Particles *particles, integer n, integer m, integer *sortArray, Curve::Type curveType=Curve::lebesgue)
Wrapper for ::SubDomainKeyTreeNS::Kernel::markParticlesPerProcess().
real compLowestDomainListNodes(Tree *tree, Particles *particles, DomainList *lowestDomainList)
Wrapper for SubDomainKeyTreeNS::Kernel::compLowestDomainListNodes().
real calculateNewRange(SubDomainKeyTree *subDomainKeyTree, Helper *helper, int bins, int n, Curve::Type curveType=Curve::lebesgue)
real calculateCentersOfMass(Tree *tree, Particles *particles, integer n, integer level, bool time=false)
real prepareSorting(Tree *tree, Particles *particles, integer n, integer m)
real globalCOM(Tree *tree, Particles *particles, real com[DIM])
Wrapper for TreeNS::Kernel::globalCOM()
real computeBoundingBox(Tree *tree, Particles *particles, integer *mutex, integer n, integer blockSize, bool time=false)
Wrapper for TreeNS::Kernel::computeBoundingBox()
real buildTree(Tree *tree, Particles *particles, integer n, integer m, bool time=false)
Wrapper for TreeNS::Kernel::buildTree()
__global__ void centerOfMass(Tree *tree, Particles *particles, integer n)
__device__ real sqrt(real a)
Square root of a floating point value.
__device__ real abs(real a)
Absolute value of a floating point value.
__device__ real max(real a, real b)
Maximum value out of two floating point values.
void copy(T *h_var, T *d_var, std::size_t count=1, To::Target copyTo=To::device)
Copy between host and device and vice-versa.
void set(T *d_var, T val, std::size_t count=1)
Set device memory to a specific value.
void free(T *d_var)
Free device memory.
void malloc(T *&d_var, std::size_t count)
Allocate device memory.
void messageLengths(SubDomainKeyTree *subDomainKeyTree, T *toSend, T *toReceive)
Send array with length of number of MPI processes across processes.
#define MAX_NUM_INTERACTIONS
#define DIM
Dimension of the problem.
bool calculateAngularMomentum
bool loadBalancing
apply load balancing
std::string logDirectory
log file(s) directory
real theta
clumping parameter/
real maxTimeStep
max (allowed) time step
real removeParticlesDimension
int smoothingKernelSelection
real timeEnd
end time of simulation
std::string materialConfigFile
input file containing material configurations/parameters
std::string directory
output file(s) directory
bool particlesSent2H5
log particles sent to HDF5 file
bool calculateCenterOfMass
std::string inputFile
input file containing initial particle distribution
int sphFixedRadiusNNVersion
int sfcSelection
space-filling curve selection
int removeParticlesCriterion
real particleMemoryContingent