diff --git a/Src/Particle/AMReX_ParticleCommunication.H b/Src/Particle/AMReX_ParticleCommunication.H index 03f2ad0882..4ad5eb01bb 100644 --- a/Src/Particle/AMReX_ParticleCommunication.H +++ b/Src/Particle/AMReX_ParticleCommunication.H @@ -10,11 +10,14 @@ #include #include #include +#include #include namespace amrex { +class ParticleContainerBase; + struct NeighborUnpackPolicy { template @@ -137,6 +140,8 @@ struct ParticleCopyPlan BL_PROFILE("ParticleCopyPlan::build"); m_local = local; + ParmParse pp("particles"); + pp.query("do_one_sided_comms", m_do_one_sided_comms); const int ngrow = 1; // note - fix @@ -265,7 +270,7 @@ struct ParticleCopyPlan m_superparticle_size += num_real_comm_comp * sizeof(typename PC::ParticleType::RealType) + num_int_comm_comp * sizeof(int); - buildMPIStart(pc.BufferMap(), m_superparticle_size); + buildMPIStart(pc, pc.BufferMap(), m_superparticle_size); } void clear (); @@ -274,14 +279,14 @@ struct ParticleCopyPlan private: - void buildMPIStart (const ParticleBufferMap& map, Long psize); + void buildMPIStart (const ParticleContainerBase& pc, const ParticleBufferMap& map, Long psize); // // Snds - a Vector with the number of bytes that is process will send to each proc. // Rcvs - a Vector that, after calling this method, will contain the // number of bytes this process will receive from each proc. // - void doHandShake (const Vector& Snds, Vector& Rcvs) const; + void doHandShake (const ParticleContainerBase& pc, const Vector& Snds, Vector& Rcvs) const; // // In the local version of this method, each proc knows which other @@ -294,14 +299,22 @@ private: // In the global version, we don't know who we'll receive from, so we // need to do some collective communication first. // - static void doHandShakeGlobal (const Vector& Snds, Vector& Rcvs); + static void doHandShakeReduceScatter (const Vector& Snds, Vector& Rcvs); + + // + // Another version of the global handshake implemented with MPI-3 + // one-sided communication. + // + static void doHandShakeOneSided (const ParticleContainerBase& pc, + const Vector& Snds, Vector& Rcvs); // // Another version of the above that is implemented using MPI All-to-All // static void doHandShakeAllToAll (const Vector& Snds, Vector& Rcvs); - bool m_local; + bool m_local = false; + int m_do_one_sided_comms = 0; }; struct GetSendBufferOffset diff --git a/Src/Particle/AMReX_ParticleCommunication.cpp b/Src/Particle/AMReX_ParticleCommunication.cpp index 18b63df639..318d2e6b7d 100644 --- a/Src/Particle/AMReX_ParticleCommunication.cpp +++ b/Src/Particle/AMReX_ParticleCommunication.cpp @@ -1,4 +1,5 @@ #include +#include #include namespace amrex { @@ -45,7 +46,7 @@ void ParticleCopyPlan::clear () m_rcv_box_levs.clear(); } -void ParticleCopyPlan::buildMPIStart (const ParticleBufferMap& map, Long psize) // NOLINT(readability-convert-member-functions-to-static) +void ParticleCopyPlan::buildMPIStart (const ParticleContainerBase& pc, const ParticleBufferMap& map, Long psize) // NOLINT(readability-convert-member-functions-to-static) { BL_PROFILE("ParticleCopyPlan::buildMPIStart"); @@ -94,7 +95,7 @@ void ParticleCopyPlan::buildMPIStart (const ParticleBufferMap& map, Long psize) m_NumSnds += nbytes; } - doHandShake(m_Snds, m_Rcvs); + doHandShake(pc, m_Snds, m_Rcvs); const int SeqNum = ParallelDescriptor::SeqNum(); Long tot_snds_this_proc = 0; @@ -206,7 +207,7 @@ void ParticleCopyPlan::buildMPIStart (const ParticleBufferMap& map, Long psize) snd_stats.resize(snd_reqs.size()); ParallelDescriptor::Waitall(snd_reqs, snd_stats); #else - amrex::ignore_unused(map,psize); + amrex::ignore_unused(pc,map,psize); #endif } @@ -259,11 +260,21 @@ void ParticleCopyPlan::buildMPIFinish (const ParticleBufferMap& map) // NOLINT(r #endif // MPI } -void ParticleCopyPlan::doHandShake (const Vector& Snds, Vector& Rcvs) const // NOLINT(readability-convert-member-functions-to-static) +void ParticleCopyPlan::doHandShake (const ParticleContainerBase& pc, + const Vector& Snds, + Vector& Rcvs) const // NOLINT(readability-convert-member-functions-to-static) { BL_PROFILE("ParticleCopyPlan::doHandShake"); if (m_local) { doHandShakeLocal(Snds, Rcvs); } - else { doHandShakeGlobal(Snds, Rcvs); } + else if (m_do_one_sided_comms) { +#if defined(BL_USE_MPI3) + doHandShakeOneSided(pc, Snds, Rcvs); +#else + amrex::ignore_unused(pc); + amrex::Abort("ParticleCopyPlan::doHandShake: particles.do_one_sided_comms=1 requires MPI-3"); +#endif + } + else { doHandShakeReduceScatter(Snds, Rcvs); } } void ParticleCopyPlan::doHandShakeLocal (const Vector& Snds, Vector& Rcvs) const // NOLINT(readability-convert-member-functions-to-static) @@ -333,7 +344,7 @@ void ParticleCopyPlan::doHandShakeAllToAll (const Vector& Snds, Vector& Snds, Vector& Rcvs) +void ParticleCopyPlan::doHandShakeReduceScatter (const Vector& Snds, Vector& Rcvs) { #ifdef AMREX_USE_MPI const int SeqNum = ParallelDescriptor::SeqNum(); @@ -381,6 +392,48 @@ void ParticleCopyPlan::doHandShakeGlobal (const Vector& Snds, Vector #endif } +void ParticleCopyPlan::doHandShakeOneSided (const ParticleContainerBase& pc, + const Vector& Snds, + Vector& Rcvs) +{ +#if defined(AMREX_USE_MPI) && defined(BL_USE_MPI3) + const int MyProc = ParallelContext::MyProcSub(); + const int NProcs = ParallelContext::NProcsSub(); + + AMREX_ALWAYS_ASSERT(static_cast(Snds.size()) == NProcs); + AMREX_ALWAYS_ASSERT(static_cast(Rcvs.size()) == NProcs); + + pc.ensureParticleHandshakeWindow(); + auto* handshake_buffer = pc.particleHandshakeBuffer(); + AMREX_ALWAYS_ASSERT(handshake_buffer != nullptr); + std::fill_n(handshake_buffer, NProcs, Long(0)); + + MPI_Win win = pc.particleHandshakeWindow(); + BL_MPI_REQUIRE(MPI_Win_fence(0, win)); + + for (int i = 0; i < NProcs; ++i) + { + if (i == MyProc || Snds[i] == 0) { continue; } + + BL_MPI_REQUIRE(MPI_Put(&Snds[i], + 1, + ParallelDescriptor::Mpi_typemap::type(), + i, + MyProc, + 1, + ParallelDescriptor::Mpi_typemap::type(), + win)); + } + + BL_MPI_REQUIRE(MPI_Win_fence(0, win)); + std::copy_n(handshake_buffer, NProcs, Rcvs.begin()); + + AMREX_ASSERT(Rcvs[MyProc] == 0); +#else + amrex::ignore_unused(pc,Snds,Rcvs); +#endif +} + void communicateParticlesFinish (const ParticleCopyPlan& plan) { BL_PROFILE("amrex::communicateParticlesFinish"); diff --git a/Src/Particle/AMReX_ParticleContainerBase.H b/Src/Particle/AMReX_ParticleContainerBase.H index 7cb4fe6283..e9b1addccd 100644 --- a/Src/Particle/AMReX_ParticleContainerBase.H +++ b/Src/Particle/AMReX_ParticleContainerBase.H @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -72,13 +73,13 @@ public: { } - virtual ~ParticleContainerBase () = default; + virtual ~ParticleContainerBase (); ParticleContainerBase ( const ParticleContainerBase &) = delete; ParticleContainerBase& operator= ( const ParticleContainerBase & ) = delete; - ParticleContainerBase ( ParticleContainerBase && ) = default; - ParticleContainerBase& operator= ( ParticleContainerBase && ) = default; + ParticleContainerBase ( ParticleContainerBase && other ) noexcept; + ParticleContainerBase& operator= ( ParticleContainerBase && other ) noexcept; void Define (ParGDBBase* gdb) { m_gdb = gdb;} @@ -237,6 +238,13 @@ public: const ParticleBufferMap& BufferMap () const {return m_buffer_map;} +#if defined(AMREX_USE_MPI) && defined(BL_USE_MPI3) + void ensureParticleHandshakeWindow () const; + void releaseParticleHandshakeWindow (); + [[nodiscard]] Long* particleHandshakeBuffer () const { return m_particle_handshake_ptr; } + [[nodiscard]] MPI_Win particleHandshakeWindow () const { return m_particle_handshake_win; } +#endif + Vector NeighborProcs(int ngrow) const { return computeNeighborProcs(this->GetParGDB(), ngrow); @@ -284,6 +292,13 @@ protected: mutable amrex::Vector neighbor_procs; mutable ParticleBufferMap m_buffer_map; +#if defined(AMREX_USE_MPI) && defined(BL_USE_MPI3) + mutable MPI_Win m_particle_handshake_win = MPI_WIN_NULL; + mutable Long* m_particle_handshake_ptr = nullptr; + mutable int m_particle_handshake_nprocs = 0; + mutable MPI_Comm m_particle_handshake_comm = MPI_COMM_NULL; +#endif + }; } // namespace amrex diff --git a/Src/Particle/AMReX_ParticleContainerBase.cpp b/Src/Particle/AMReX_ParticleContainerBase.cpp index 53c68392ef..4e750c4ed8 100644 --- a/Src/Particle/AMReX_ParticleContainerBase.cpp +++ b/Src/Particle/AMReX_ParticleContainerBase.cpp @@ -12,6 +12,78 @@ IntVect ParticleContainerBase::tile_size { AMREX_D_DECL(1024000,8,8) }; bool ParticleContainerBase::memEfficientSort = true; bool ParticleContainerBase::use_comms_arena = false; +ParticleContainerBase::~ParticleContainerBase () +{ +#if defined(AMREX_USE_MPI) && defined(BL_USE_MPI3) + releaseParticleHandshakeWindow(); +#endif +} + +ParticleContainerBase::ParticleContainerBase (ParticleContainerBase&& other) noexcept + : m_particle_locator(std::move(other.m_particle_locator)), + m_verbose(other.m_verbose), + m_stable_redistribute(other.m_stable_redistribute), + m_gdb_object(std::move(other.m_gdb_object)), + m_gdb(other.m_gdb), + m_dummy_mf(std::move(other.m_dummy_mf)), + m_arena(other.m_arena), + redistribute_mask_ptr(std::move(other.redistribute_mask_ptr)), + redistribute_mask_nghost(other.redistribute_mask_nghost), + neighbor_procs(std::move(other.neighbor_procs)), + m_buffer_map(std::move(other.m_buffer_map)) +#if defined(AMREX_USE_MPI) && defined(BL_USE_MPI3) + , m_particle_handshake_win(other.m_particle_handshake_win), + m_particle_handshake_ptr(other.m_particle_handshake_ptr), + m_particle_handshake_nprocs(other.m_particle_handshake_nprocs), + m_particle_handshake_comm(other.m_particle_handshake_comm) +#endif +{ + other.m_gdb = nullptr; +#if defined(AMREX_USE_MPI) && defined(BL_USE_MPI3) + other.m_particle_handshake_win = MPI_WIN_NULL; + other.m_particle_handshake_ptr = nullptr; + other.m_particle_handshake_nprocs = 0; + other.m_particle_handshake_comm = MPI_COMM_NULL; +#endif +} + +ParticleContainerBase& +ParticleContainerBase::operator= (ParticleContainerBase&& other) noexcept +{ + if (this != &other) + { +#if defined(AMREX_USE_MPI) && defined(BL_USE_MPI3) + releaseParticleHandshakeWindow(); +#endif + + m_particle_locator = std::move(other.m_particle_locator); + m_verbose = other.m_verbose; + m_stable_redistribute = other.m_stable_redistribute; + m_gdb_object = std::move(other.m_gdb_object); + m_gdb = other.m_gdb; + m_dummy_mf = std::move(other.m_dummy_mf); + m_arena = other.m_arena; + redistribute_mask_ptr = std::move(other.redistribute_mask_ptr); + redistribute_mask_nghost = other.redistribute_mask_nghost; + neighbor_procs = std::move(other.neighbor_procs); + m_buffer_map = std::move(other.m_buffer_map); +#if defined(AMREX_USE_MPI) && defined(BL_USE_MPI3) + m_particle_handshake_win = other.m_particle_handshake_win; + m_particle_handshake_ptr = other.m_particle_handshake_ptr; + m_particle_handshake_nprocs = other.m_particle_handshake_nprocs; + m_particle_handshake_comm = other.m_particle_handshake_comm; + + other.m_particle_handshake_win = MPI_WIN_NULL; + other.m_particle_handshake_ptr = nullptr; + other.m_particle_handshake_nprocs = 0; + other.m_particle_handshake_comm = MPI_COMM_NULL; +#endif + other.m_gdb = nullptr; + } + + return *this; +} + void ParticleContainerBase::Define (const Geometry & geom, const DistributionMapping & dmap, const BoxArray & ba) @@ -80,6 +152,59 @@ ParticleContainerBase::defineBufferMap () const } } +#if defined(AMREX_USE_MPI) && defined(BL_USE_MPI3) +void ParticleContainerBase::releaseParticleHandshakeWindow () +{ + if (m_particle_handshake_win != MPI_WIN_NULL) { + BL_MPI_REQUIRE(MPI_Win_free(&m_particle_handshake_win)); + } + if (m_particle_handshake_comm != MPI_COMM_NULL) { + BL_MPI_REQUIRE(MPI_Comm_free(&m_particle_handshake_comm)); + } + m_particle_handshake_ptr = nullptr; + m_particle_handshake_nprocs = 0; +} + +void ParticleContainerBase::ensureParticleHandshakeWindow () const +{ + const int nprocs = ParallelContext::NProcsSub(); + MPI_Comm comm = ParallelContext::CommunicatorSub(); + + bool needs_rebuild = (m_particle_handshake_win == MPI_WIN_NULL) + || (m_particle_handshake_nprocs != nprocs) + || (m_particle_handshake_comm == MPI_COMM_NULL); + + if (!needs_rebuild) + { + int cmp = MPI_UNEQUAL; + BL_MPI_REQUIRE(MPI_Comm_compare(comm, m_particle_handshake_comm, &cmp)); + needs_rebuild = (cmp != MPI_IDENT && cmp != MPI_CONGRUENT); + } + + if (needs_rebuild) + { + const_cast(this)->releaseParticleHandshakeWindow(); + + Long* baseptr = nullptr; + MPI_Win win = MPI_WIN_NULL; + BL_MPI_REQUIRE(MPI_Win_allocate(static_cast(nprocs*sizeof(Long)), + sizeof(Long), + MPI_INFO_NULL, + comm, + &baseptr, + &win)); + + MPI_Comm dup_comm = MPI_COMM_NULL; + BL_MPI_REQUIRE(MPI_Comm_dup(comm, &dup_comm)); + + m_particle_handshake_ptr = baseptr; + m_particle_handshake_win = win; + m_particle_handshake_nprocs = nprocs; + m_particle_handshake_comm = dup_comm; + } +} +#endif + void ParticleContainerBase::SetParGDB (const Geometry & geom, const DistributionMapping & dmap, const BoxArray & ba) diff --git a/Tests/Particles/RedistributeGlobal/CMakeLists.txt b/Tests/Particles/RedistributeGlobal/CMakeLists.txt new file mode 100644 index 0000000000..dc7fc83a0c --- /dev/null +++ b/Tests/Particles/RedistributeGlobal/CMakeLists.txt @@ -0,0 +1,13 @@ +foreach(D IN LISTS AMReX_SPACEDIM) + set(_sources main.cpp) + if (NOT AMReX_GPU_BACKEND STREQUAL NONE) + set(_input_files inputs.rt.cuda) + else () + set(_input_files inputs.rt) + endif () + + setup_test(${D} _sources _input_files) + + unset(_sources) + unset(_input_files) +endforeach() diff --git a/Tests/Particles/RedistributeGlobal/GNUmakefile b/Tests/Particles/RedistributeGlobal/GNUmakefile new file mode 100644 index 0000000000..3b69439b6a --- /dev/null +++ b/Tests/Particles/RedistributeGlobal/GNUmakefile @@ -0,0 +1,22 @@ +AMREX_HOME = ../../../ + +DEBUG = FALSE + +DIM = 3 + +COMP = gcc + +USE_MPI = TRUE +USE_OMP = FALSE +USE_CUDA = FALSE + +TINY_PROFILE = TRUE +USE_PARTICLES = TRUE + +include $(AMREX_HOME)/Tools/GNUMake/Make.defs + +include ./Make.package +include $(AMREX_HOME)/Src/Base/Make.package +include $(AMREX_HOME)/Src/Particle/Make.package + +include $(AMREX_HOME)/Tools/GNUMake/Make.rules diff --git a/Tests/Particles/RedistributeGlobal/Make.package b/Tests/Particles/RedistributeGlobal/Make.package new file mode 100644 index 0000000000..6b4b865e8f --- /dev/null +++ b/Tests/Particles/RedistributeGlobal/Make.package @@ -0,0 +1 @@ +CEXE_sources += main.cpp diff --git a/Tests/Particles/RedistributeGlobal/inputs b/Tests/Particles/RedistributeGlobal/inputs new file mode 100644 index 0000000000..cb1966634b --- /dev/null +++ b/Tests/Particles/RedistributeGlobal/inputs @@ -0,0 +1,13 @@ +redistribute_global.size = (64, 64, 64) +redistribute_global.max_grid_size = 32 +redistribute_global.is_periodic = 1 +redistribute_global.num_ppc = 2 +redistribute_global.nsteps = 200 +redistribute_global.nlevs = 1 + +redistribute_global.num_runtime_real = 0 +redistribute_global.num_runtime_int = 0 + +redistribute_global.sort = 0 + +amrex.use_gpu_aware_mpi = 0 diff --git a/Tests/Particles/RedistributeGlobal/inputs.rt b/Tests/Particles/RedistributeGlobal/inputs.rt new file mode 100644 index 0000000000..00cc40221c --- /dev/null +++ b/Tests/Particles/RedistributeGlobal/inputs.rt @@ -0,0 +1,13 @@ +redistribute_global.size = (64, 64, 64) +redistribute_global.max_grid_size = 32 +redistribute_global.is_periodic = 1 +redistribute_global.num_ppc = 1 +redistribute_global.nsteps = 200 +redistribute_global.nlevs = 1 + +redistribute_global.num_runtime_real = 0 +redistribute_global.num_runtime_int = 0 + +redistribute_global.sort = 0 + +amrex.use_gpu_aware_mpi = 0 diff --git a/Tests/Particles/RedistributeGlobal/inputs.rt.cuda b/Tests/Particles/RedistributeGlobal/inputs.rt.cuda new file mode 100644 index 0000000000..00cc40221c --- /dev/null +++ b/Tests/Particles/RedistributeGlobal/inputs.rt.cuda @@ -0,0 +1,13 @@ +redistribute_global.size = (64, 64, 64) +redistribute_global.max_grid_size = 32 +redistribute_global.is_periodic = 1 +redistribute_global.num_ppc = 1 +redistribute_global.nsteps = 200 +redistribute_global.nlevs = 1 + +redistribute_global.num_runtime_real = 0 +redistribute_global.num_runtime_int = 0 + +redistribute_global.sort = 0 + +amrex.use_gpu_aware_mpi = 0 diff --git a/Tests/Particles/RedistributeGlobal/main.cpp b/Tests/Particles/RedistributeGlobal/main.cpp new file mode 100644 index 0000000000..07b263cb08 --- /dev/null +++ b/Tests/Particles/RedistributeGlobal/main.cpp @@ -0,0 +1,383 @@ +#include +#include +#include +#include + +using namespace amrex; + +static constexpr int NSR = 6; +static constexpr int NSI = 1; +static constexpr int NAR = 1; +static constexpr int NAI = 1; + +int num_runtime_real = 0; +int num_runtime_int = 0; + +void get_position_unit_cell(Real* r, const IntVect& nppc, int i_part) +{ + int nx = nppc[0]; +#if AMREX_SPACEDIM > 1 + int ny = nppc[1]; +#else + int ny = 1; +#endif +#if AMREX_SPACEDIM > 2 + int nz = nppc[2]; +#else + int nz = 1; +#endif + + int ix_part = i_part/(ny * nz); + int iy_part = (i_part % (ny * nz)) % ny; + int iz_part = (i_part % (ny * nz)) / ny; + + r[0] = (0.5+ix_part)/nx; + r[1] = (0.5+iy_part)/ny; + r[2] = (0.5+iz_part)/nz; +} + +class TestParticleContainer + : public amrex::ParticleContainer +{ +public: + + TestParticleContainer (const Vector& a_geom, + const Vector& a_dmap, + const Vector& a_ba, + const Vector& a_rr) + : amrex::ParticleContainer(a_geom, a_dmap, a_ba, a_rr) + { + for (int i = 0; i < num_runtime_real; ++i) + { + AddRealComp(true); + } + for (int i = 0; i < num_runtime_int; ++i) + { + AddIntComp(true); + } + } + + void RedistributeGlobal () + { + const int lev_min = 0; + const int lev_max = finestLevel(); + const int nGrow = 0; + const int local = 0; + Redistribute(lev_min, lev_max, nGrow, local); + } + + void InitParticles (const amrex::IntVect& a_num_particles_per_cell) + { + BL_PROFILE("InitParticles"); + + const int lev = 0; + const Real* dx = Geom(lev).CellSize(); + const Real* plo = Geom(lev).ProbLo(); + + const int num_ppc = AMREX_D_TERM( a_num_particles_per_cell[0], + *a_num_particles_per_cell[1], + *a_num_particles_per_cell[2]); + + for (MFIter mfi = MakeMFIter(lev); mfi.isValid(); ++mfi) + { + const Box& tile_box = mfi.tilebox(); + + Gpu::HostVector host_particles; + std::array, NAR> host_real; + std::array, NAI> host_int; + + std::vector > host_runtime_real(NumRuntimeRealComps()); + std::vector > host_runtime_int(NumRuntimeIntComps()); + + for (IntVect iv = tile_box.smallEnd(); iv <= tile_box.bigEnd(); tile_box.next(iv)) + { + for (int i_part = 0; i_part < num_ppc; ++i_part) + { + Real r[3]; + get_position_unit_cell(r, a_num_particles_per_cell, i_part); + + ParticleType p; + p.id() = ParticleType::NextID(); + p.cpu() = ParallelDescriptor::MyProc(); + p.pos(0) = static_cast(plo[0] + (iv[0] + r[0])*dx[0]); +#if AMREX_SPACEDIM > 1 + p.pos(1) = static_cast(plo[1] + (iv[1] + r[1])*dx[1]); +#endif +#if AMREX_SPACEDIM > 2 + p.pos(2) = static_cast(plo[2] + (iv[2] + r[2])*dx[2]); +#endif + + for (int i = 0; i < NSR; ++i) { p.rdata(i) = ParticleReal(p.id()); } + for (int i = 0; i < NSI; ++i) { p.idata(i) = int(p.id()); } + + host_particles.push_back(p); + for (int i = 0; i < NAR; ++i) { + host_real[i].push_back(ParticleReal(p.id())); + } + for (int i = 0; i < NAI; ++i) { + host_int[i].push_back(int(p.id())); + } + for (int i = 0; i < NumRuntimeRealComps(); ++i) { + host_runtime_real[i].push_back(ParticleReal(p.id())); + } + for (int i = 0; i < NumRuntimeIntComps(); ++i) { + host_runtime_int[i].push_back(int(p.id())); + } + } + } + + auto& particle_tile = DefineAndReturnParticleTile(lev, mfi.index(), mfi.LocalTileIndex()); + auto old_size = particle_tile.GetArrayOfStructs().size(); + auto new_size = old_size + host_particles.size(); + particle_tile.resize(new_size); + + Gpu::copyAsync(Gpu::hostToDevice, + host_particles.begin(), + host_particles.end(), + particle_tile.GetArrayOfStructs().begin() + old_size); + + auto& soa = particle_tile.GetStructOfArrays(); + for (int i = 0; i < NAR; ++i) + { + Gpu::copyAsync(Gpu::hostToDevice, + host_real[i].begin(), + host_real[i].end(), + soa.GetRealData(i).begin() + old_size); + } + + for (int i = 0; i < NAI; ++i) + { + Gpu::copyAsync(Gpu::hostToDevice, + host_int[i].begin(), + host_int[i].end(), + soa.GetIntData(i).begin() + old_size); + } + + for (int i = 0; i < NumRuntimeRealComps(); ++i) + { + Gpu::copyAsync(Gpu::hostToDevice, + host_runtime_real[i].begin(), + host_runtime_real[i].end(), + soa.GetRealData(NAR+i).begin() + old_size); + } + + for (int i = 0; i < NumRuntimeIntComps(); ++i) + { + Gpu::copyAsync(Gpu::hostToDevice, + host_runtime_int[i].begin(), + host_runtime_int[i].end(), + soa.GetIntData(NAI+i).begin() + old_size); + } + + Gpu::streamSynchronize(); + } + + RedistributeGlobal(); + } + + void moveParticles () + { + BL_PROFILE("TestParticleContainer::moveParticles"); + + for (int lev = 0; lev <= finestLevel(); ++lev) + { + const auto plo = Geom(lev).ProbLoArray(); + const auto phi = Geom(lev).ProbHiArray(); + auto& plev = GetParticles(lev); + + for (MFIter mfi = MakeMFIter(lev); mfi.isValid(); ++mfi) + { + int gid = mfi.index(); + int tid = mfi.LocalTileIndex(); + auto& ptile = plev[std::make_pair(gid, tid)]; + auto& aos = ptile.GetArrayOfStructs(); + ParticleType* pstruct = aos.data(); + const size_t np = aos.numParticles(); + + amrex::ParallelForRNG(np, + [=] AMREX_GPU_DEVICE (int i, RandomEngine const& engine) noexcept + { + ParticleType& p = pstruct[i]; + p.pos(0) = static_cast(plo[0] + (phi[0] - plo[0])*amrex::Random(engine)); +#if AMREX_SPACEDIM > 1 + p.pos(1) = static_cast(plo[1] + (phi[1] - plo[1])*amrex::Random(engine)); +#endif +#if AMREX_SPACEDIM > 2 + p.pos(2) = static_cast(plo[2] + (phi[2] - plo[2])*amrex::Random(engine)); +#endif + }); + } + } + } + + void checkAnswer () const + { + BL_PROFILE("TestParticleContainer::checkAnswer"); + + AMREX_ALWAYS_ASSERT(OK()); + + int num_rr = NumRuntimeRealComps(); + int num_ii = NumRuntimeIntComps(); + + for (int lev = 0; lev <= finestLevel(); ++lev) + { + const auto& plev = GetParticles(lev); + for (MFIter mfi = MakeMFIter(lev); mfi.isValid(); ++mfi) + { + int gid = mfi.index(); + int tid = mfi.LocalTileIndex(); + const auto& ptile = plev.at(std::make_pair(gid, tid)); + const auto& ptd = ptile.getConstParticleTileData(); + const size_t np = ptile.numParticles(); + + AMREX_FOR_1D(np, i, + { + for (int j = 0; j < NSR; ++j) + { + AMREX_ALWAYS_ASSERT(ptd.m_aos[i].rdata(j) == ptd.m_aos[i].id()); + } + for (int j = 0; j < NSI; ++j) + { + AMREX_ALWAYS_ASSERT(ptd.m_aos[i].idata(j) == ptd.m_aos[i].id()); + } + if constexpr (NAR > 0) { + for (int j = 0; j < NAR; ++j) + { + AMREX_ALWAYS_ASSERT(ptd.m_rdata[j][i] == ptd.m_aos[i].id()); + } + } + if constexpr (NAI > 0) { + for (int j = 0; j < NAI; ++j) + { + AMREX_ALWAYS_ASSERT(ptd.m_idata[j][i] == ptd.m_aos[i].id()); + } + } + for (int j = 0; j < num_rr; ++j) + { + AMREX_ALWAYS_ASSERT(ptd.m_runtime_rdata[j][i] == ptd.m_aos[i].id()); + } + for (int j = 0; j < num_ii; ++j) + { + AMREX_ALWAYS_ASSERT(ptd.m_runtime_idata[j][i] == ptd.m_aos[i].id()); + } + }); + } + } + } +}; + +struct TestParams +{ + IntVect size; + int max_grid_size; + int num_ppc; + int is_periodic; + int nsteps; + int nlevs; + int sort; + int stable_redistribute = 0; +}; + +void testRedistributeGlobal (); + +int main (int argc, char* argv[]) +{ + amrex::Initialize(argc,argv); + + amrex::Print() << "Running global redistribute test\n"; + testRedistributeGlobal(); + + amrex::Finalize(); +} + +void get_test_params (TestParams& params, const std::string& prefix) +{ + ParmParse pp(prefix); + pp.get("size", params.size); + pp.get("max_grid_size", params.max_grid_size); + pp.get("num_ppc", params.num_ppc); + pp.get("is_periodic", params.is_periodic); + pp.get("nsteps", params.nsteps); + pp.get("nlevs", params.nlevs); + pp.query("num_runtime_real", num_runtime_real); + pp.query("num_runtime_int", num_runtime_int); + pp.query("stable_redistribute", params.stable_redistribute); + + params.sort = 0; + pp.query("sort", params.sort); +} + +void testRedistributeGlobal () +{ + BL_PROFILE("testRedistributeGlobal"); + TestParams params; + get_test_params(params, "redistribute_global"); + + int is_per[] = {AMREX_D_DECL(params.is_periodic, + params.is_periodic, + params.is_periodic)}; + + Vector rr(params.nlevs-1); + for (int lev = 1; lev < params.nlevs; ++lev) { + rr[lev-1] = IntVect(AMREX_D_DECL(2,2,2)); + } + + RealBox real_box; + for (int n = 0; n < BL_SPACEDIM; ++n) + { + real_box.setLo(n, 0.0); + real_box.setHi(n, params.size[n]); + } + + IntVect domain_lo(AMREX_D_DECL(0, 0, 0)); + IntVect domain_hi(AMREX_D_DECL(params.size[0]-1,params.size[1]-1,params.size[2]-1)); + const Box base_domain(domain_lo, domain_hi); + + Vector geom(params.nlevs); + geom[0].define(base_domain, &real_box, CoordSys::cartesian, is_per); + for (int lev = 1; lev < params.nlevs; ++lev) { + geom[lev].define(amrex::refine(geom[lev-1].Domain(), rr[lev-1]), + &real_box, CoordSys::cartesian, is_per); + } + + Vector ba(params.nlevs); + Vector dm(params.nlevs); + IntVect lo(0); + IntVect size = params.size; + for (int lev = 0; lev < params.nlevs; ++lev) + { + ba[lev].define(Box(lo, lo+params.size-1)); + ba[lev].maxSize(params.max_grid_size); + dm[lev].define(ba[lev]); + lo += size/2; + size *= 2; + } + + TestParticleContainer pc(geom, dm, ba, rr); + pc.setStableRedistribute(params.stable_redistribute); + + IntVect nppc(params.num_ppc); + + amrex::Print() << "About to initialize particles\n"; + + pc.InitParticles(nppc); + pc.checkAnswer(); + + auto np_old = pc.TotalNumberOfParticles(); + + if (params.sort) { pc.SortParticlesByCell(); } + + for (int i = 0; i < params.nsteps; ++i) + { + pc.moveParticles(); + pc.RedistributeGlobal(); + if (params.sort) { pc.SortParticlesByCell(); } + pc.checkAnswer(); + } + + if (geom[0].isAllPeriodic()) { + AMREX_ALWAYS_ASSERT(np_old == pc.TotalNumberOfParticles()); + } + + amrex::Print() << "pass\n"; +} diff --git a/Tests/Particles/RedistributeGlobalDM/CMakeLists.txt b/Tests/Particles/RedistributeGlobalDM/CMakeLists.txt new file mode 100644 index 0000000000..dc7fc83a0c --- /dev/null +++ b/Tests/Particles/RedistributeGlobalDM/CMakeLists.txt @@ -0,0 +1,13 @@ +foreach(D IN LISTS AMReX_SPACEDIM) + set(_sources main.cpp) + if (NOT AMReX_GPU_BACKEND STREQUAL NONE) + set(_input_files inputs.rt.cuda) + else () + set(_input_files inputs.rt) + endif () + + setup_test(${D} _sources _input_files) + + unset(_sources) + unset(_input_files) +endforeach() diff --git a/Tests/Particles/RedistributeGlobalDM/GNUmakefile b/Tests/Particles/RedistributeGlobalDM/GNUmakefile new file mode 100644 index 0000000000..3b69439b6a --- /dev/null +++ b/Tests/Particles/RedistributeGlobalDM/GNUmakefile @@ -0,0 +1,22 @@ +AMREX_HOME = ../../../ + +DEBUG = FALSE + +DIM = 3 + +COMP = gcc + +USE_MPI = TRUE +USE_OMP = FALSE +USE_CUDA = FALSE + +TINY_PROFILE = TRUE +USE_PARTICLES = TRUE + +include $(AMREX_HOME)/Tools/GNUMake/Make.defs + +include ./Make.package +include $(AMREX_HOME)/Src/Base/Make.package +include $(AMREX_HOME)/Src/Particle/Make.package + +include $(AMREX_HOME)/Tools/GNUMake/Make.rules diff --git a/Tests/Particles/RedistributeGlobalDM/Make.package b/Tests/Particles/RedistributeGlobalDM/Make.package new file mode 100644 index 0000000000..6b4b865e8f --- /dev/null +++ b/Tests/Particles/RedistributeGlobalDM/Make.package @@ -0,0 +1 @@ +CEXE_sources += main.cpp diff --git a/Tests/Particles/RedistributeGlobalDM/inputs b/Tests/Particles/RedistributeGlobalDM/inputs new file mode 100644 index 0000000000..31d3bbffb3 --- /dev/null +++ b/Tests/Particles/RedistributeGlobalDM/inputs @@ -0,0 +1,15 @@ +redistribute_global_dm.size = (64, 64, 64) +redistribute_global_dm.max_grid_size = 8 +redistribute_global_dm.is_periodic = 1 +redistribute_global_dm.num_ppc = 2 +redistribute_global_dm.nsteps = 50 +redistribute_global_dm.nlevs = 1 +redistribute_global_dm.random_seed = 8675309 +redistribute_global_dm.check_answer_each_step = 1 + +redistribute_global_dm.num_runtime_real = 0 +redistribute_global_dm.num_runtime_int = 0 + +redistribute_global_dm.sort = 0 + +amrex.use_gpu_aware_mpi = 0 diff --git a/Tests/Particles/RedistributeGlobalDM/inputs.rt b/Tests/Particles/RedistributeGlobalDM/inputs.rt new file mode 100644 index 0000000000..c658ba6c73 --- /dev/null +++ b/Tests/Particles/RedistributeGlobalDM/inputs.rt @@ -0,0 +1,15 @@ +redistribute_global_dm.size = (64, 64, 64) +redistribute_global_dm.max_grid_size = 8 +redistribute_global_dm.is_periodic = 1 +redistribute_global_dm.num_ppc = 1 +redistribute_global_dm.nsteps = 25 +redistribute_global_dm.nlevs = 1 +redistribute_global_dm.random_seed = 8675309 +redistribute_global_dm.check_answer_each_step = 1 + +redistribute_global_dm.num_runtime_real = 0 +redistribute_global_dm.num_runtime_int = 0 + +redistribute_global_dm.sort = 0 + +amrex.use_gpu_aware_mpi = 0 diff --git a/Tests/Particles/RedistributeGlobalDM/inputs.rt.cuda b/Tests/Particles/RedistributeGlobalDM/inputs.rt.cuda new file mode 100644 index 0000000000..c658ba6c73 --- /dev/null +++ b/Tests/Particles/RedistributeGlobalDM/inputs.rt.cuda @@ -0,0 +1,15 @@ +redistribute_global_dm.size = (64, 64, 64) +redistribute_global_dm.max_grid_size = 8 +redistribute_global_dm.is_periodic = 1 +redistribute_global_dm.num_ppc = 1 +redistribute_global_dm.nsteps = 25 +redistribute_global_dm.nlevs = 1 +redistribute_global_dm.random_seed = 8675309 +redistribute_global_dm.check_answer_each_step = 1 + +redistribute_global_dm.num_runtime_real = 0 +redistribute_global_dm.num_runtime_int = 0 + +redistribute_global_dm.sort = 0 + +amrex.use_gpu_aware_mpi = 0 diff --git a/Tests/Particles/RedistributeGlobalDM/main.cpp b/Tests/Particles/RedistributeGlobalDM/main.cpp new file mode 100644 index 0000000000..950f676991 --- /dev/null +++ b/Tests/Particles/RedistributeGlobalDM/main.cpp @@ -0,0 +1,402 @@ +#include +#include +#include +#include + +#include +#include +#include +#include + +using namespace amrex; + +static constexpr int NSR = 6; +static constexpr int NSI = 1; +static constexpr int NAR = 1; +static constexpr int NAI = 1; + +int num_runtime_real = 0; +int num_runtime_int = 0; + +void get_position_unit_cell (Real* r, const IntVect& nppc, int i_part) +{ + int nx = nppc[0]; +#if AMREX_SPACEDIM > 1 + int ny = nppc[1]; +#else + int ny = 1; +#endif +#if AMREX_SPACEDIM > 2 + int nz = nppc[2]; +#else + int nz = 1; +#endif + + int ix_part = i_part/(ny * nz); + int iy_part = (i_part % (ny * nz)) % ny; + int iz_part = (i_part % (ny * nz)) / ny; + + r[0] = (0.5+ix_part)/nx; + r[1] = (0.5+iy_part)/ny; + r[2] = (0.5+iz_part)/nz; +} + +class TestParticleContainer + : public amrex::ParticleContainer +{ +public: + + TestParticleContainer (const Vector& a_geom, + const Vector& a_dmap, + const Vector& a_ba, + const Vector& a_rr) + : amrex::ParticleContainer(a_geom, a_dmap, a_ba, a_rr) + { + for (int i = 0; i < num_runtime_real; ++i) + { + AddRealComp(true); + } + for (int i = 0; i < num_runtime_int; ++i) + { + AddIntComp(true); + } + } + + void RedistributeGlobal () + { + const int lev_min = 0; + const int lev_max = finestLevel(); + const int nGrow = 0; + const int local = 0; + Redistribute(lev_min, lev_max, nGrow, local); + } + + void InitParticles (const amrex::IntVect& a_num_particles_per_cell) + { + BL_PROFILE("InitParticles"); + + const int lev = 0; + const Real* dx = Geom(lev).CellSize(); + const Real* plo = Geom(lev).ProbLo(); + + const int num_ppc = AMREX_D_TERM(a_num_particles_per_cell[0], + * a_num_particles_per_cell[1], + * a_num_particles_per_cell[2]); + + for (MFIter mfi = MakeMFIter(lev); mfi.isValid(); ++mfi) + { + const Box& tile_box = mfi.tilebox(); + + Gpu::HostVector host_particles; + std::array, NAR> host_real; + std::array, NAI> host_int; + + std::vector > host_runtime_real(NumRuntimeRealComps()); + std::vector > host_runtime_int(NumRuntimeIntComps()); + + for (IntVect iv = tile_box.smallEnd(); iv <= tile_box.bigEnd(); tile_box.next(iv)) + { + for (int i_part = 0; i_part < num_ppc; ++i_part) + { + Real r[3]; + get_position_unit_cell(r, a_num_particles_per_cell, i_part); + + ParticleType p; + p.id() = ParticleType::NextID(); + p.cpu() = ParallelDescriptor::MyProc(); + p.pos(0) = static_cast(plo[0] + (iv[0] + r[0])*dx[0]); +#if AMREX_SPACEDIM > 1 + p.pos(1) = static_cast(plo[1] + (iv[1] + r[1])*dx[1]); +#endif +#if AMREX_SPACEDIM > 2 + p.pos(2) = static_cast(plo[2] + (iv[2] + r[2])*dx[2]); +#endif + + for (int i = 0; i < NSR; ++i) { p.rdata(i) = ParticleReal(p.id()); } + for (int i = 0; i < NSI; ++i) { p.idata(i) = int(p.id()); } + + host_particles.push_back(p); + for (int i = 0; i < NAR; ++i) { + host_real[i].push_back(ParticleReal(p.id())); + } + for (int i = 0; i < NAI; ++i) { + host_int[i].push_back(int(p.id())); + } + for (int i = 0; i < NumRuntimeRealComps(); ++i) { + host_runtime_real[i].push_back(ParticleReal(p.id())); + } + for (int i = 0; i < NumRuntimeIntComps(); ++i) { + host_runtime_int[i].push_back(int(p.id())); + } + } + } + + auto& particle_tile = DefineAndReturnParticleTile(lev, mfi.index(), mfi.LocalTileIndex()); + auto old_size = particle_tile.GetArrayOfStructs().size(); + auto new_size = old_size + host_particles.size(); + particle_tile.resize(new_size); + + Gpu::copyAsync(Gpu::hostToDevice, + host_particles.begin(), + host_particles.end(), + particle_tile.GetArrayOfStructs().begin() + old_size); + + auto& soa = particle_tile.GetStructOfArrays(); + for (int i = 0; i < NAR; ++i) + { + Gpu::copyAsync(Gpu::hostToDevice, + host_real[i].begin(), + host_real[i].end(), + soa.GetRealData(i).begin() + old_size); + } + + for (int i = 0; i < NAI; ++i) + { + Gpu::copyAsync(Gpu::hostToDevice, + host_int[i].begin(), + host_int[i].end(), + soa.GetIntData(i).begin() + old_size); + } + + for (int i = 0; i < NumRuntimeRealComps(); ++i) + { + Gpu::copyAsync(Gpu::hostToDevice, + host_runtime_real[i].begin(), + host_runtime_real[i].end(), + soa.GetRealData(NAR+i).begin() + old_size); + } + + for (int i = 0; i < NumRuntimeIntComps(); ++i) + { + Gpu::copyAsync(Gpu::hostToDevice, + host_runtime_int[i].begin(), + host_runtime_int[i].end(), + soa.GetIntData(NAI+i).begin() + old_size); + } + + Gpu::streamSynchronize(); + } + + RedistributeGlobal(); + } + + void checkAnswer () const + { + BL_PROFILE("TestParticleContainer::checkAnswer"); + + AMREX_ALWAYS_ASSERT(OK()); + + int num_rr = NumRuntimeRealComps(); + int num_ii = NumRuntimeIntComps(); + + for (int lev = 0; lev <= finestLevel(); ++lev) + { + const auto& plev = GetParticles(lev); + for (MFIter mfi = MakeMFIter(lev); mfi.isValid(); ++mfi) + { + int gid = mfi.index(); + int tid = mfi.LocalTileIndex(); + const auto& ptile = plev.at(std::make_pair(gid, tid)); + const auto& ptd = ptile.getConstParticleTileData(); + const size_t np = ptile.numParticles(); + + AMREX_FOR_1D(np, i, + { + for (int j = 0; j < NSR; ++j) + { + AMREX_ALWAYS_ASSERT(ptd.m_aos[i].rdata(j) == ptd.m_aos[i].id()); + } + for (int j = 0; j < NSI; ++j) + { + AMREX_ALWAYS_ASSERT(ptd.m_aos[i].idata(j) == ptd.m_aos[i].id()); + } + if constexpr (NAR > 0) { + for (int j = 0; j < NAR; ++j) + { + AMREX_ALWAYS_ASSERT(ptd.m_rdata[j][i] == ptd.m_aos[i].id()); + } + } + if constexpr (NAI > 0) { + for (int j = 0; j < NAI; ++j) + { + AMREX_ALWAYS_ASSERT(ptd.m_idata[j][i] == ptd.m_aos[i].id()); + } + } + for (int j = 0; j < num_rr; ++j) + { + AMREX_ALWAYS_ASSERT(ptd.m_runtime_rdata[j][i] == ptd.m_aos[i].id()); + } + for (int j = 0; j < num_ii; ++j) + { + AMREX_ALWAYS_ASSERT(ptd.m_runtime_idata[j][i] == ptd.m_aos[i].id()); + } + }); + } + } + } +}; + +struct TestParams +{ + IntVect size; + int max_grid_size; + int num_ppc; + int is_periodic; + int nsteps; + int nlevs; + int sort; + int stable_redistribute = 0; + int random_seed = 8675309; + int check_answer_each_step = 1; +}; + +auto makeRandomPMap (int nboxes, int nprocs, std::uint32_t seed) -> Vector +{ + Vector pmap(nboxes); + for (int i = 0; i < nboxes; ++i) { + pmap[i] = i % nprocs; + } + + std::mt19937 gen(seed); + std::shuffle(pmap.begin(), pmap.end(), gen); + + return pmap; +} + +void testRedistributeGlobalDM (); + +int main (int argc, char* argv[]) +{ + amrex::Initialize(argc,argv); + + amrex::Print() << "Running global redistribute DistributionMap shuffle test\n"; + testRedistributeGlobalDM(); + + amrex::Finalize(); +} + +void get_test_params (TestParams& params, const std::string& prefix) +{ + ParmParse pp(prefix); + pp.get("size", params.size); + pp.get("max_grid_size", params.max_grid_size); + pp.get("num_ppc", params.num_ppc); + pp.get("is_periodic", params.is_periodic); + pp.get("nsteps", params.nsteps); + pp.get("nlevs", params.nlevs); + pp.query("num_runtime_real", num_runtime_real); + pp.query("num_runtime_int", num_runtime_int); + pp.query("stable_redistribute", params.stable_redistribute); + pp.query("random_seed", params.random_seed); + pp.query("check_answer_each_step", params.check_answer_each_step); + + params.sort = 0; + pp.query("sort", params.sort); +} + +void testRedistributeGlobalDM () +{ + BL_PROFILE("testRedistributeGlobalDM"); + TestParams params; + get_test_params(params, "redistribute_global_dm"); + + int is_per[] = {AMREX_D_DECL(params.is_periodic, + params.is_periodic, + params.is_periodic)}; + + Vector rr(params.nlevs-1); + for (int lev = 1; lev < params.nlevs; ++lev) { + rr[lev-1] = IntVect(AMREX_D_DECL(2,2,2)); + } + + RealBox real_box; + for (int n = 0; n < BL_SPACEDIM; ++n) + { + real_box.setLo(n, 0.0); + real_box.setHi(n, params.size[n]); + } + + IntVect domain_lo(AMREX_D_DECL(0, 0, 0)); + IntVect domain_hi(AMREX_D_DECL(params.size[0]-1,params.size[1]-1,params.size[2]-1)); + const Box base_domain(domain_lo, domain_hi); + + Vector geom(params.nlevs); + geom[0].define(base_domain, &real_box, CoordSys::cartesian, is_per); + for (int lev = 1; lev < params.nlevs; ++lev) { + geom[lev].define(amrex::refine(geom[lev-1].Domain(), rr[lev-1]), + &real_box, CoordSys::cartesian, is_per); + } + + Vector ba(params.nlevs); + Vector dm(params.nlevs); + IntVect lo(0); + IntVect size = params.size; + for (int lev = 0; lev < params.nlevs; ++lev) + { + ba[lev].define(Box(lo, lo+params.size-1)); + ba[lev].maxSize(params.max_grid_size); + dm[lev].define(ba[lev]); + lo += size/2; + size *= 2; + } + + TestParticleContainer pc(geom, dm, ba, rr); + pc.setStableRedistribute(params.stable_redistribute); + + IntVect nppc(params.num_ppc); + + amrex::Print() << "About to initialize particles\n"; + + pc.InitParticles(nppc); + pc.checkAnswer(); + + auto np_old = pc.TotalNumberOfParticles(); + const int nprocs = ParallelDescriptor::NProcs(); + + amrex::Print() << "Benchmark setup: " << ba[0].size() << " boxes on level 0 across " + << nprocs << " MPI ranks\n"; + + if (params.sort) { pc.SortParticlesByCell(); } + + Real total_dm_time = Real(0.0); + Real total_redistribute_time = Real(0.0); + + for (int i = 0; i < params.nsteps; ++i) + { + const auto dm_start = amrex::second(); + for (int lev = 0; lev < params.nlevs; ++lev) + { + auto pmap = makeRandomPMap(static_cast(ba[lev].size()), nprocs, + static_cast(params.random_seed + 7919*i + 101*lev)); + DistributionMapping new_dm; + new_dm.define(pmap); + pc.SetParticleDistributionMap(lev, new_dm); + } + total_dm_time += amrex::second() - dm_start; + + ParallelDescriptor::Barrier(); + const auto redistribute_start = amrex::second(); + pc.RedistributeGlobal(); + total_redistribute_time += amrex::second() - redistribute_start; + + if (params.sort) { pc.SortParticlesByCell(); } + if (params.check_answer_each_step) { pc.checkAnswer(); } + } + + if (!params.check_answer_each_step) { + pc.checkAnswer(); + } + + if (geom[0].isAllPeriodic()) { + AMREX_ALWAYS_ASSERT(np_old == pc.TotalNumberOfParticles()); + } + + ParallelDescriptor::ReduceRealMax(total_dm_time, ParallelDescriptor::IOProcessorNumber()); + ParallelDescriptor::ReduceRealMax(total_redistribute_time, ParallelDescriptor::IOProcessorNumber()); + + amrex::Print() << "Max DM shuffle time over all ranks: " << total_dm_time << " s\n"; + amrex::Print() << "Max redistribute time over all ranks: " << total_redistribute_time << " s\n"; + amrex::Print() << "Average redistribute time per step: " + << total_redistribute_time/static_cast(params.nsteps) << " s\n"; + amrex::Print() << "pass\n"; +}