diff --git a/Src/Particle/AMReX_ParticleCommunication.H b/Src/Particle/AMReX_ParticleCommunication.H
index 03f2ad0882..4ad5eb01bb 100644
--- a/Src/Particle/AMReX_ParticleCommunication.H
+++ b/Src/Particle/AMReX_ParticleCommunication.H
@@ -10,11 +10,14 @@
 #include <AMReX_Scan.H>
 #include <AMReX_TypeTraits.H>
 #include <AMReX_MakeParticle.H>
+#include <AMReX_ParmParse.H>
 
 #include <map>
 
 namespace amrex {
 
+class ParticleContainerBase;
+
 struct NeighborUnpackPolicy
 {
     template <class PTile>
@@ -137,6 +140,8 @@ struct ParticleCopyPlan
         BL_PROFILE("ParticleCopyPlan::build");
 
         m_local = local;
+        ParmParse pp("particles");
+        pp.query("do_one_sided_comms", m_do_one_sided_comms);
 
         const int ngrow = 1;  // note - fix
 
@@ -265,7 +270,7 @@ struct ParticleCopyPlan
         m_superparticle_size += num_real_comm_comp * sizeof(typename PC::ParticleType::RealType)
                               + num_int_comm_comp  * sizeof(int);
 
-        buildMPIStart(pc.BufferMap(), m_superparticle_size);
+        buildMPIStart(pc, pc.BufferMap(), m_superparticle_size);
     }
 
     void clear ();
@@ -274,14 +279,14 @@ struct ParticleCopyPlan
 
 private:
 
-    void buildMPIStart (const ParticleBufferMap& map, Long psize);
+    void buildMPIStart (const ParticleContainerBase& pc, const ParticleBufferMap& map, Long psize);
 
     //
     // Snds - a Vector with the number of bytes that is process will send to each proc.
     // Rcvs - a Vector that, after calling this method, will contain the
     //        number of bytes this process will receive from each proc.
     //
-    void doHandShake (const Vector<Long>& Snds, Vector<Long>& Rcvs) const;
+    void doHandShake (const ParticleContainerBase& pc, const Vector<Long>& Snds, Vector<Long>& Rcvs) const;
 
     //
     // In the local version of this method, each proc knows which other
@@ -294,14 +299,22 @@ private:
     // In the global version, we don't know who we'll receive from, so we
     // need to do some collective communication first.
     //
-    static void doHandShakeGlobal (const Vector<Long>& Snds, Vector<Long>& Rcvs);
+    static void doHandShakeReduceScatter (const Vector<Long>& Snds, Vector<Long>& Rcvs);
+
+    //
+    // Another version of the global handshake implemented with MPI-3
+    // one-sided communication.
+    //
+    static void doHandShakeOneSided (const ParticleContainerBase& pc,
+                                     const Vector<Long>& Snds, Vector<Long>& Rcvs);
 
     //
     // Another version of the above that is implemented using MPI All-to-All
     //
     static void doHandShakeAllToAll (const Vector<Long>& Snds, Vector<Long>& Rcvs);
 
-    bool m_local;
+    bool m_local = false;
+    int m_do_one_sided_comms = 0;
 };
 
 struct GetSendBufferOffset
diff --git a/Src/Particle/AMReX_ParticleCommunication.cpp b/Src/Particle/AMReX_ParticleCommunication.cpp
index 18b63df639..318d2e6b7d 100644
--- a/Src/Particle/AMReX_ParticleCommunication.cpp
+++ b/Src/Particle/AMReX_ParticleCommunication.cpp
@@ -1,4 +1,5 @@
 #include <AMReX_ParticleCommunication.H>
+#include <AMReX_ParticleContainerBase.H>
 #include <AMReX_ParallelDescriptor.H>
 
 namespace amrex {
@@ -45,7 +46,7 @@ void ParticleCopyPlan::clear ()
     m_rcv_box_levs.clear();
 }
 
-void ParticleCopyPlan::buildMPIStart (const ParticleBufferMap& map, Long psize) // NOLINT(readability-convert-member-functions-to-static)
+void ParticleCopyPlan::buildMPIStart (const ParticleContainerBase& pc, const ParticleBufferMap& map, Long psize) // NOLINT(readability-convert-member-functions-to-static)
 {
     BL_PROFILE("ParticleCopyPlan::buildMPIStart");
 
@@ -94,7 +95,7 @@ void ParticleCopyPlan::buildMPIStart (const ParticleBufferMap& map, Long psize)
         m_NumSnds += nbytes;
     }
 
-    doHandShake(m_Snds, m_Rcvs);
+    doHandShake(pc, m_Snds, m_Rcvs);
 
     const int SeqNum = ParallelDescriptor::SeqNum();
     Long tot_snds_this_proc = 0;
@@ -206,7 +207,7 @@ void ParticleCopyPlan::buildMPIStart (const ParticleBufferMap& map, Long psize)
     snd_stats.resize(snd_reqs.size());
     ParallelDescriptor::Waitall(snd_reqs, snd_stats);
 #else
-    amrex::ignore_unused(map,psize);
+    amrex::ignore_unused(pc,map,psize);
 #endif
 }
 
@@ -259,11 +260,21 @@ void ParticleCopyPlan::buildMPIFinish (const ParticleBufferMap& map) // NOLINT(r
 #endif // MPI
 }
 
-void ParticleCopyPlan::doHandShake (const Vector<Long>& Snds, Vector<Long>& Rcvs) const // NOLINT(readability-convert-member-functions-to-static)
+void ParticleCopyPlan::doHandShake (const ParticleContainerBase& pc,
+                                    const Vector<Long>& Snds,
+                                    Vector<Long>& Rcvs) const // NOLINT(readability-convert-member-functions-to-static)
 {
     BL_PROFILE("ParticleCopyPlan::doHandShake");
     if (m_local) { doHandShakeLocal(Snds, Rcvs); }
-    else         { doHandShakeGlobal(Snds, Rcvs); }
+    else if (m_do_one_sided_comms) {
+#if defined(BL_USE_MPI3)
+        doHandShakeOneSided(pc, Snds, Rcvs);
+#else
+        amrex::ignore_unused(pc);
+        amrex::Abort("ParticleCopyPlan::doHandShake: particles.do_one_sided_comms=1 requires MPI-3");
+#endif
+    }
+    else         { doHandShakeReduceScatter(Snds, Rcvs); }
 }
 
 void ParticleCopyPlan::doHandShakeLocal (const Vector<Long>& Snds, Vector<Long>& Rcvs) const // NOLINT(readability-convert-member-functions-to-static)
@@ -333,7 +344,7 @@ void ParticleCopyPlan::doHandShakeAllToAll (const Vector<Long>& Snds, Vector<Lon
 #endif
 }
 
-void ParticleCopyPlan::doHandShakeGlobal (const Vector<Long>& Snds, Vector<Long>& Rcvs)
+void ParticleCopyPlan::doHandShakeReduceScatter (const Vector<Long>& Snds, Vector<Long>& Rcvs)
 {
 #ifdef AMREX_USE_MPI
     const int SeqNum = ParallelDescriptor::SeqNum();
@@ -381,6 +392,48 @@ void ParticleCopyPlan::doHandShakeGlobal (const Vector<Long>& Snds, Vector<Long>
 #endif
 }
 
+void ParticleCopyPlan::doHandShakeOneSided (const ParticleContainerBase& pc,
+                                            const Vector<Long>& Snds,
+                                            Vector<Long>& Rcvs)
+{
+#if defined(AMREX_USE_MPI) && defined(BL_USE_MPI3)
+    const int MyProc = ParallelContext::MyProcSub();
+    const int NProcs = ParallelContext::NProcsSub();
+
+    AMREX_ALWAYS_ASSERT(static_cast<int>(Snds.size()) == NProcs);
+    AMREX_ALWAYS_ASSERT(static_cast<int>(Rcvs.size()) == NProcs);
+
+    pc.ensureParticleHandshakeWindow();
+    auto* handshake_buffer = pc.particleHandshakeBuffer();
+    AMREX_ALWAYS_ASSERT(handshake_buffer != nullptr);
+    std::fill_n(handshake_buffer, NProcs, Long(0));
+
+    MPI_Win win = pc.particleHandshakeWindow();
+    BL_MPI_REQUIRE(MPI_Win_fence(0, win));
+
+    for (int i = 0; i < NProcs; ++i)
+    {
+        if (i == MyProc || Snds[i] == 0) { continue; }
+
+        BL_MPI_REQUIRE(MPI_Put(&Snds[i],
+                               1,
+                               ParallelDescriptor::Mpi_typemap<Long>::type(),
+                               i,
+                               MyProc,
+                               1,
+                               ParallelDescriptor::Mpi_typemap<Long>::type(),
+                               win));
+    }
+
+    BL_MPI_REQUIRE(MPI_Win_fence(0, win));
+    std::copy_n(handshake_buffer, NProcs, Rcvs.begin());
+
+    AMREX_ASSERT(Rcvs[MyProc] == 0);
+#else
+    amrex::ignore_unused(pc,Snds,Rcvs);
+#endif
+}
+
 void communicateParticlesFinish (const ParticleCopyPlan& plan)
 {
     BL_PROFILE("amrex::communicateParticlesFinish");
diff --git a/Src/Particle/AMReX_ParticleContainerBase.H b/Src/Particle/AMReX_ParticleContainerBase.H
index 7cb4fe6283..e9b1addccd 100644
--- a/Src/Particle/AMReX_ParticleContainerBase.H
+++ b/Src/Particle/AMReX_ParticleContainerBase.H
@@ -12,6 +12,7 @@
 #include <AMReX_Vector.H>
 #include <AMReX_ParticleUtil.H>
 #include <AMReX_MultiFab.H>
+#include <AMReX_iMultiFab.H>
 #include <AMReX_ParticleLocator.H>
 #include <AMReX_DenseBins.H>
 
@@ -72,13 +73,13 @@ public:
     {
     }
 
-    virtual ~ParticleContainerBase () = default;
+    virtual ~ParticleContainerBase ();
 
     ParticleContainerBase ( const ParticleContainerBase &) = delete;
     ParticleContainerBase& operator= ( const ParticleContainerBase & ) = delete;
 
-    ParticleContainerBase ( ParticleContainerBase && ) = default;
-    ParticleContainerBase& operator= ( ParticleContainerBase && ) = default;
+    ParticleContainerBase ( ParticleContainerBase && other ) noexcept;
+    ParticleContainerBase& operator= ( ParticleContainerBase && other ) noexcept;
 
     void Define (ParGDBBase* gdb) { m_gdb = gdb;}
 
@@ -237,6 +238,13 @@ public:
 
     const ParticleBufferMap& BufferMap () const {return m_buffer_map;}
 
+#if defined(AMREX_USE_MPI) && defined(BL_USE_MPI3)
+    void ensureParticleHandshakeWindow () const;
+    void releaseParticleHandshakeWindow ();
+    [[nodiscard]] Long* particleHandshakeBuffer () const { return m_particle_handshake_ptr; }
+    [[nodiscard]] MPI_Win particleHandshakeWindow () const { return m_particle_handshake_win; }
+#endif
+
     Vector<int> NeighborProcs(int ngrow) const
     {
         return computeNeighborProcs(this->GetParGDB(), ngrow);
@@ -284,6 +292,13 @@ protected:
     mutable amrex::Vector<int> neighbor_procs;
     mutable ParticleBufferMap m_buffer_map;
 
+#if defined(AMREX_USE_MPI) && defined(BL_USE_MPI3)
+    mutable MPI_Win m_particle_handshake_win = MPI_WIN_NULL;
+    mutable Long* m_particle_handshake_ptr = nullptr;
+    mutable int m_particle_handshake_nprocs = 0;
+    mutable MPI_Comm m_particle_handshake_comm = MPI_COMM_NULL;
+#endif
+
 };
 
 } // namespace amrex
diff --git a/Src/Particle/AMReX_ParticleContainerBase.cpp b/Src/Particle/AMReX_ParticleContainerBase.cpp
index 53c68392ef..4e750c4ed8 100644
--- a/Src/Particle/AMReX_ParticleContainerBase.cpp
+++ b/Src/Particle/AMReX_ParticleContainerBase.cpp
@@ -12,6 +12,78 @@ IntVect ParticleContainerBase::tile_size { AMREX_D_DECL(1024000,8,8) };
 bool    ParticleContainerBase::memEfficientSort = true;
 bool    ParticleContainerBase::use_comms_arena = false;
 
+ParticleContainerBase::~ParticleContainerBase ()
+{
+#if defined(AMREX_USE_MPI) && defined(BL_USE_MPI3)
+    releaseParticleHandshakeWindow();
+#endif
+}
+
+ParticleContainerBase::ParticleContainerBase (ParticleContainerBase&& other) noexcept
+    : m_particle_locator(std::move(other.m_particle_locator)),
+      m_verbose(other.m_verbose),
+      m_stable_redistribute(other.m_stable_redistribute),
+      m_gdb_object(std::move(other.m_gdb_object)),
+      m_gdb(other.m_gdb),
+      m_dummy_mf(std::move(other.m_dummy_mf)),
+      m_arena(other.m_arena),
+      redistribute_mask_ptr(std::move(other.redistribute_mask_ptr)),
+      redistribute_mask_nghost(other.redistribute_mask_nghost),
+      neighbor_procs(std::move(other.neighbor_procs)),
+      m_buffer_map(std::move(other.m_buffer_map))
+#if defined(AMREX_USE_MPI) && defined(BL_USE_MPI3)
+    , m_particle_handshake_win(other.m_particle_handshake_win),
+      m_particle_handshake_ptr(other.m_particle_handshake_ptr),
+      m_particle_handshake_nprocs(other.m_particle_handshake_nprocs),
+      m_particle_handshake_comm(other.m_particle_handshake_comm)
+#endif
+{
+    other.m_gdb = nullptr;
+#if defined(AMREX_USE_MPI) && defined(BL_USE_MPI3)
+    other.m_particle_handshake_win = MPI_WIN_NULL;
+    other.m_particle_handshake_ptr = nullptr;
+    other.m_particle_handshake_nprocs = 0;
+    other.m_particle_handshake_comm = MPI_COMM_NULL;
+#endif
+}
+
+ParticleContainerBase&
+ParticleContainerBase::operator= (ParticleContainerBase&& other) noexcept
+{
+    if (this != &other)
+    {
+#if defined(AMREX_USE_MPI) && defined(BL_USE_MPI3)
+        releaseParticleHandshakeWindow();
+#endif
+
+        m_particle_locator = std::move(other.m_particle_locator);
+        m_verbose = other.m_verbose;
+        m_stable_redistribute = other.m_stable_redistribute;
+        m_gdb_object = std::move(other.m_gdb_object);
+        m_gdb = other.m_gdb;
+        m_dummy_mf = std::move(other.m_dummy_mf);
+        m_arena = other.m_arena;
+        redistribute_mask_ptr = std::move(other.redistribute_mask_ptr);
+        redistribute_mask_nghost = other.redistribute_mask_nghost;
+        neighbor_procs = std::move(other.neighbor_procs);
+        m_buffer_map = std::move(other.m_buffer_map);
+#if defined(AMREX_USE_MPI) && defined(BL_USE_MPI3)
+        m_particle_handshake_win = other.m_particle_handshake_win;
+        m_particle_handshake_ptr = other.m_particle_handshake_ptr;
+        m_particle_handshake_nprocs = other.m_particle_handshake_nprocs;
+        m_particle_handshake_comm = other.m_particle_handshake_comm;
+
+        other.m_particle_handshake_win = MPI_WIN_NULL;
+        other.m_particle_handshake_ptr = nullptr;
+        other.m_particle_handshake_nprocs = 0;
+        other.m_particle_handshake_comm = MPI_COMM_NULL;
+#endif
+        other.m_gdb = nullptr;
+    }
+
+    return *this;
+}
+
 void ParticleContainerBase::Define (const Geometry            & geom,
                                     const DistributionMapping & dmap,
                                     const BoxArray            & ba)
@@ -80,6 +152,59 @@ ParticleContainerBase::defineBufferMap () const
     }
 }
 
+#if defined(AMREX_USE_MPI) && defined(BL_USE_MPI3)
+void ParticleContainerBase::releaseParticleHandshakeWindow ()
+{
+    if (m_particle_handshake_win != MPI_WIN_NULL) {
+        BL_MPI_REQUIRE(MPI_Win_free(&m_particle_handshake_win));
+    }
+    if (m_particle_handshake_comm != MPI_COMM_NULL) {
+        BL_MPI_REQUIRE(MPI_Comm_free(&m_particle_handshake_comm));
+    }
+    m_particle_handshake_ptr = nullptr;
+    m_particle_handshake_nprocs = 0;
+}
+
+void ParticleContainerBase::ensureParticleHandshakeWindow () const
+{
+    const int nprocs = ParallelContext::NProcsSub();
+    MPI_Comm comm = ParallelContext::CommunicatorSub();
+
+    bool needs_rebuild = (m_particle_handshake_win == MPI_WIN_NULL)
+        || (m_particle_handshake_nprocs != nprocs)
+        || (m_particle_handshake_comm == MPI_COMM_NULL);
+
+    if (!needs_rebuild)
+    {
+        int cmp = MPI_UNEQUAL;
+        BL_MPI_REQUIRE(MPI_Comm_compare(comm, m_particle_handshake_comm, &cmp));
+        needs_rebuild = (cmp != MPI_IDENT && cmp != MPI_CONGRUENT);
+    }
+
+    if (needs_rebuild)
+    {
+        const_cast<ParticleContainerBase*>(this)->releaseParticleHandshakeWindow();
+
+        Long* baseptr = nullptr;
+        MPI_Win win = MPI_WIN_NULL;
+        BL_MPI_REQUIRE(MPI_Win_allocate(static_cast<MPI_Aint>(nprocs*sizeof(Long)),
+                                        sizeof(Long),
+                                        MPI_INFO_NULL,
+                                        comm,
+                                        &baseptr,
+                                        &win));
+
+        MPI_Comm dup_comm = MPI_COMM_NULL;
+        BL_MPI_REQUIRE(MPI_Comm_dup(comm, &dup_comm));
+
+        m_particle_handshake_ptr = baseptr;
+        m_particle_handshake_win = win;
+        m_particle_handshake_nprocs = nprocs;
+        m_particle_handshake_comm = dup_comm;
+    }
+}
+#endif
+
 void ParticleContainerBase::SetParGDB (const Geometry            & geom,
                                        const DistributionMapping & dmap,
                                        const BoxArray            & ba)
diff --git a/Tests/Particles/RedistributeGlobal/CMakeLists.txt b/Tests/Particles/RedistributeGlobal/CMakeLists.txt
new file mode 100644
index 0000000000..dc7fc83a0c
--- /dev/null
+++ b/Tests/Particles/RedistributeGlobal/CMakeLists.txt
@@ -0,0 +1,13 @@
+foreach(D IN LISTS AMReX_SPACEDIM)
+    set(_sources     main.cpp)
+    if (NOT AMReX_GPU_BACKEND STREQUAL NONE)
+      set(_input_files inputs.rt.cuda)
+    else ()
+      set(_input_files inputs.rt)
+    endif ()
+
+    setup_test(${D} _sources _input_files)
+
+    unset(_sources)
+    unset(_input_files)
+endforeach()
diff --git a/Tests/Particles/RedistributeGlobal/GNUmakefile b/Tests/Particles/RedistributeGlobal/GNUmakefile
new file mode 100644
index 0000000000..3b69439b6a
--- /dev/null
+++ b/Tests/Particles/RedistributeGlobal/GNUmakefile
@@ -0,0 +1,22 @@
+AMREX_HOME = ../../../
+
+DEBUG = FALSE
+
+DIM = 3
+
+COMP = gcc
+
+USE_MPI = TRUE
+USE_OMP = FALSE
+USE_CUDA = FALSE
+
+TINY_PROFILE = TRUE
+USE_PARTICLES = TRUE
+
+include $(AMREX_HOME)/Tools/GNUMake/Make.defs
+
+include ./Make.package
+include $(AMREX_HOME)/Src/Base/Make.package
+include $(AMREX_HOME)/Src/Particle/Make.package
+
+include $(AMREX_HOME)/Tools/GNUMake/Make.rules
diff --git a/Tests/Particles/RedistributeGlobal/Make.package b/Tests/Particles/RedistributeGlobal/Make.package
new file mode 100644
index 0000000000..6b4b865e8f
--- /dev/null
+++ b/Tests/Particles/RedistributeGlobal/Make.package
@@ -0,0 +1 @@
+CEXE_sources += main.cpp
diff --git a/Tests/Particles/RedistributeGlobal/inputs b/Tests/Particles/RedistributeGlobal/inputs
new file mode 100644
index 0000000000..cb1966634b
--- /dev/null
+++ b/Tests/Particles/RedistributeGlobal/inputs
@@ -0,0 +1,13 @@
+redistribute_global.size = (64, 64, 64)
+redistribute_global.max_grid_size = 32
+redistribute_global.is_periodic = 1
+redistribute_global.num_ppc = 2
+redistribute_global.nsteps = 200
+redistribute_global.nlevs = 1
+
+redistribute_global.num_runtime_real = 0
+redistribute_global.num_runtime_int = 0
+
+redistribute_global.sort = 0
+
+amrex.use_gpu_aware_mpi = 0
diff --git a/Tests/Particles/RedistributeGlobal/inputs.rt b/Tests/Particles/RedistributeGlobal/inputs.rt
new file mode 100644
index 0000000000..00cc40221c
--- /dev/null
+++ b/Tests/Particles/RedistributeGlobal/inputs.rt
@@ -0,0 +1,13 @@
+redistribute_global.size = (64, 64, 64)
+redistribute_global.max_grid_size = 32
+redistribute_global.is_periodic = 1
+redistribute_global.num_ppc = 1
+redistribute_global.nsteps = 200
+redistribute_global.nlevs = 1
+
+redistribute_global.num_runtime_real = 0
+redistribute_global.num_runtime_int = 0
+
+redistribute_global.sort = 0
+
+amrex.use_gpu_aware_mpi = 0
diff --git a/Tests/Particles/RedistributeGlobal/inputs.rt.cuda b/Tests/Particles/RedistributeGlobal/inputs.rt.cuda
new file mode 100644
index 0000000000..00cc40221c
--- /dev/null
+++ b/Tests/Particles/RedistributeGlobal/inputs.rt.cuda
@@ -0,0 +1,13 @@
+redistribute_global.size = (64, 64, 64)
+redistribute_global.max_grid_size = 32
+redistribute_global.is_periodic = 1
+redistribute_global.num_ppc = 1
+redistribute_global.nsteps = 200
+redistribute_global.nlevs = 1
+
+redistribute_global.num_runtime_real = 0
+redistribute_global.num_runtime_int = 0
+
+redistribute_global.sort = 0
+
+amrex.use_gpu_aware_mpi = 0
diff --git a/Tests/Particles/RedistributeGlobal/main.cpp b/Tests/Particles/RedistributeGlobal/main.cpp
new file mode 100644
index 0000000000..07b263cb08
--- /dev/null
+++ b/Tests/Particles/RedistributeGlobal/main.cpp
@@ -0,0 +1,383 @@
+#include <AMReX.H>
+#include <AMReX_MultiFab.H>
+#include <AMReX_ParmParse.H>
+#include <AMReX_Particles.H>
+
+using namespace amrex;
+
+static constexpr int NSR = 6;
+static constexpr int NSI = 1;
+static constexpr int NAR = 1;
+static constexpr int NAI = 1;
+
+int num_runtime_real = 0;
+int num_runtime_int = 0;
+
+void get_position_unit_cell(Real* r, const IntVect& nppc, int i_part)
+{
+    int nx = nppc[0];
+#if AMREX_SPACEDIM > 1
+    int ny = nppc[1];
+#else
+    int ny = 1;
+#endif
+#if AMREX_SPACEDIM > 2
+    int nz = nppc[2];
+#else
+    int nz = 1;
+#endif
+
+    int ix_part = i_part/(ny * nz);
+    int iy_part = (i_part % (ny * nz)) % ny;
+    int iz_part = (i_part % (ny * nz)) / ny;
+
+    r[0] = (0.5+ix_part)/nx;
+    r[1] = (0.5+iy_part)/ny;
+    r[2] = (0.5+iz_part)/nz;
+}
+
+class TestParticleContainer
+    : public amrex::ParticleContainer<NSR, NSI, NAR, NAI>
+{
+public:
+
+    TestParticleContainer (const Vector<amrex::Geometry>& a_geom,
+                           const Vector<amrex::DistributionMapping>& a_dmap,
+                           const Vector<amrex::BoxArray>& a_ba,
+                           const Vector<amrex::IntVect>& a_rr)
+        : amrex::ParticleContainer<NSR, NSI, NAR, NAI>(a_geom, a_dmap, a_ba, a_rr)
+    {
+        for (int i = 0; i < num_runtime_real; ++i)
+        {
+            AddRealComp(true);
+        }
+        for (int i = 0; i < num_runtime_int; ++i)
+        {
+            AddIntComp(true);
+        }
+    }
+
+    void RedistributeGlobal ()
+    {
+        const int lev_min = 0;
+        const int lev_max = finestLevel();
+        const int nGrow = 0;
+        const int local = 0;
+        Redistribute(lev_min, lev_max, nGrow, local);
+    }
+
+    void InitParticles (const amrex::IntVect& a_num_particles_per_cell)
+    {
+        BL_PROFILE("InitParticles");
+
+        const int lev = 0;
+        const Real* dx = Geom(lev).CellSize();
+        const Real* plo = Geom(lev).ProbLo();
+
+        const int num_ppc = AMREX_D_TERM( a_num_particles_per_cell[0],
+                                         *a_num_particles_per_cell[1],
+                                         *a_num_particles_per_cell[2]);
+
+        for (MFIter mfi = MakeMFIter(lev); mfi.isValid(); ++mfi)
+        {
+            const Box& tile_box = mfi.tilebox();
+
+            Gpu::HostVector<ParticleType> host_particles;
+            std::array<Gpu::HostVector<ParticleReal>, NAR> host_real;
+            std::array<Gpu::HostVector<int>, NAI> host_int;
+
+            std::vector<Gpu::HostVector<ParticleReal> > host_runtime_real(NumRuntimeRealComps());
+            std::vector<Gpu::HostVector<int> > host_runtime_int(NumRuntimeIntComps());
+
+            for (IntVect iv = tile_box.smallEnd(); iv <= tile_box.bigEnd(); tile_box.next(iv))
+            {
+                for (int i_part = 0; i_part < num_ppc; ++i_part)
+                {
+                    Real r[3];
+                    get_position_unit_cell(r, a_num_particles_per_cell, i_part);
+
+                    ParticleType p;
+                    p.id() = ParticleType::NextID();
+                    p.cpu() = ParallelDescriptor::MyProc();
+                    p.pos(0) = static_cast<ParticleReal>(plo[0] + (iv[0] + r[0])*dx[0]);
+#if AMREX_SPACEDIM > 1
+                    p.pos(1) = static_cast<ParticleReal>(plo[1] + (iv[1] + r[1])*dx[1]);
+#endif
+#if AMREX_SPACEDIM > 2
+                    p.pos(2) = static_cast<ParticleReal>(plo[2] + (iv[2] + r[2])*dx[2]);
+#endif
+
+                    for (int i = 0; i < NSR; ++i) { p.rdata(i) = ParticleReal(p.id()); }
+                    for (int i = 0; i < NSI; ++i) { p.idata(i) = int(p.id()); }
+
+                    host_particles.push_back(p);
+                    for (int i = 0; i < NAR; ++i) {
+                        host_real[i].push_back(ParticleReal(p.id()));
+                    }
+                    for (int i = 0; i < NAI; ++i) {
+                        host_int[i].push_back(int(p.id()));
+                    }
+                    for (int i = 0; i < NumRuntimeRealComps(); ++i) {
+                        host_runtime_real[i].push_back(ParticleReal(p.id()));
+                    }
+                    for (int i = 0; i < NumRuntimeIntComps(); ++i) {
+                        host_runtime_int[i].push_back(int(p.id()));
+                    }
+                }
+            }
+
+            auto& particle_tile = DefineAndReturnParticleTile(lev, mfi.index(), mfi.LocalTileIndex());
+            auto old_size = particle_tile.GetArrayOfStructs().size();
+            auto new_size = old_size + host_particles.size();
+            particle_tile.resize(new_size);
+
+            Gpu::copyAsync(Gpu::hostToDevice,
+                           host_particles.begin(),
+                           host_particles.end(),
+                           particle_tile.GetArrayOfStructs().begin() + old_size);
+
+            auto& soa = particle_tile.GetStructOfArrays();
+            for (int i = 0; i < NAR; ++i)
+            {
+                Gpu::copyAsync(Gpu::hostToDevice,
+                               host_real[i].begin(),
+                               host_real[i].end(),
+                               soa.GetRealData(i).begin() + old_size);
+            }
+
+            for (int i = 0; i < NAI; ++i)
+            {
+                Gpu::copyAsync(Gpu::hostToDevice,
+                               host_int[i].begin(),
+                               host_int[i].end(),
+                               soa.GetIntData(i).begin() + old_size);
+            }
+
+            for (int i = 0; i < NumRuntimeRealComps(); ++i)
+            {
+                Gpu::copyAsync(Gpu::hostToDevice,
+                               host_runtime_real[i].begin(),
+                               host_runtime_real[i].end(),
+                               soa.GetRealData(NAR+i).begin() + old_size);
+            }
+
+            for (int i = 0; i < NumRuntimeIntComps(); ++i)
+            {
+                Gpu::copyAsync(Gpu::hostToDevice,
+                               host_runtime_int[i].begin(),
+                               host_runtime_int[i].end(),
+                               soa.GetIntData(NAI+i).begin() + old_size);
+            }
+
+            Gpu::streamSynchronize();
+        }
+
+        RedistributeGlobal();
+    }
+
+    void moveParticles ()
+    {
+        BL_PROFILE("TestParticleContainer::moveParticles");
+
+        for (int lev = 0; lev <= finestLevel(); ++lev)
+        {
+            const auto plo = Geom(lev).ProbLoArray();
+            const auto phi = Geom(lev).ProbHiArray();
+            auto& plev = GetParticles(lev);
+
+            for (MFIter mfi = MakeMFIter(lev); mfi.isValid(); ++mfi)
+            {
+                int gid = mfi.index();
+                int tid = mfi.LocalTileIndex();
+                auto& ptile = plev[std::make_pair(gid, tid)];
+                auto& aos = ptile.GetArrayOfStructs();
+                ParticleType* pstruct = aos.data();
+                const size_t np = aos.numParticles();
+
+                amrex::ParallelForRNG(np,
+                [=] AMREX_GPU_DEVICE (int i, RandomEngine const& engine) noexcept
+                {
+                    ParticleType& p = pstruct[i];
+                    p.pos(0) = static_cast<ParticleReal>(plo[0] + (phi[0] - plo[0])*amrex::Random(engine));
+#if AMREX_SPACEDIM > 1
+                    p.pos(1) = static_cast<ParticleReal>(plo[1] + (phi[1] - plo[1])*amrex::Random(engine));
+#endif
+#if AMREX_SPACEDIM > 2
+                    p.pos(2) = static_cast<ParticleReal>(plo[2] + (phi[2] - plo[2])*amrex::Random(engine));
+#endif
+                });
+            }
+        }
+    }
+
+    void checkAnswer () const
+    {
+        BL_PROFILE("TestParticleContainer::checkAnswer");
+
+        AMREX_ALWAYS_ASSERT(OK());
+
+        int num_rr = NumRuntimeRealComps();
+        int num_ii = NumRuntimeIntComps();
+
+        for (int lev = 0; lev <= finestLevel(); ++lev)
+        {
+            const auto& plev = GetParticles(lev);
+            for (MFIter mfi = MakeMFIter(lev); mfi.isValid(); ++mfi)
+            {
+                int gid = mfi.index();
+                int tid = mfi.LocalTileIndex();
+                const auto& ptile = plev.at(std::make_pair(gid, tid));
+                const auto& ptd = ptile.getConstParticleTileData();
+                const size_t np = ptile.numParticles();
+
+                AMREX_FOR_1D(np, i,
+                {
+                    for (int j = 0; j < NSR; ++j)
+                    {
+                        AMREX_ALWAYS_ASSERT(ptd.m_aos[i].rdata(j) == ptd.m_aos[i].id());
+                    }
+                    for (int j = 0; j < NSI; ++j)
+                    {
+                        AMREX_ALWAYS_ASSERT(ptd.m_aos[i].idata(j) == ptd.m_aos[i].id());
+                    }
+                    if constexpr (NAR > 0) {
+                        for (int j = 0; j < NAR; ++j)
+                        {
+                            AMREX_ALWAYS_ASSERT(ptd.m_rdata[j][i] == ptd.m_aos[i].id());
+                        }
+                    }
+                    if constexpr (NAI > 0) {
+                        for (int j = 0; j < NAI; ++j)
+                        {
+                            AMREX_ALWAYS_ASSERT(ptd.m_idata[j][i] == ptd.m_aos[i].id());
+                        }
+                    }
+                    for (int j = 0; j < num_rr; ++j)
+                    {
+                        AMREX_ALWAYS_ASSERT(ptd.m_runtime_rdata[j][i] == ptd.m_aos[i].id());
+                    }
+                    for (int j = 0; j < num_ii; ++j)
+                    {
+                        AMREX_ALWAYS_ASSERT(ptd.m_runtime_idata[j][i] == ptd.m_aos[i].id());
+                    }
+                });
+            }
+        }
+    }
+};
+
+struct TestParams
+{
+    IntVect size;
+    int max_grid_size;
+    int num_ppc;
+    int is_periodic;
+    int nsteps;
+    int nlevs;
+    int sort;
+    int stable_redistribute = 0;
+};
+
+void testRedistributeGlobal ();
+
+int main (int argc, char* argv[])
+{
+    amrex::Initialize(argc,argv);
+
+    amrex::Print() << "Running global redistribute test\n";
+    testRedistributeGlobal();
+
+    amrex::Finalize();
+}
+
+void get_test_params (TestParams& params, const std::string& prefix)
+{
+    ParmParse pp(prefix);
+    pp.get("size", params.size);
+    pp.get("max_grid_size", params.max_grid_size);
+    pp.get("num_ppc", params.num_ppc);
+    pp.get("is_periodic", params.is_periodic);
+    pp.get("nsteps", params.nsteps);
+    pp.get("nlevs", params.nlevs);
+    pp.query("num_runtime_real", num_runtime_real);
+    pp.query("num_runtime_int", num_runtime_int);
+    pp.query("stable_redistribute", params.stable_redistribute);
+
+    params.sort = 0;
+    pp.query("sort", params.sort);
+}
+
+void testRedistributeGlobal ()
+{
+    BL_PROFILE("testRedistributeGlobal");
+    TestParams params;
+    get_test_params(params, "redistribute_global");
+
+    int is_per[] = {AMREX_D_DECL(params.is_periodic,
+                                 params.is_periodic,
+                                 params.is_periodic)};
+
+    Vector<IntVect> rr(params.nlevs-1);
+    for (int lev = 1; lev < params.nlevs; ++lev) {
+        rr[lev-1] = IntVect(AMREX_D_DECL(2,2,2));
+    }
+
+    RealBox real_box;
+    for (int n = 0; n < BL_SPACEDIM; ++n)
+    {
+        real_box.setLo(n, 0.0);
+        real_box.setHi(n, params.size[n]);
+    }
+
+    IntVect domain_lo(AMREX_D_DECL(0, 0, 0));
+    IntVect domain_hi(AMREX_D_DECL(params.size[0]-1,params.size[1]-1,params.size[2]-1));
+    const Box base_domain(domain_lo, domain_hi);
+
+    Vector<Geometry> geom(params.nlevs);
+    geom[0].define(base_domain, &real_box, CoordSys::cartesian, is_per);
+    for (int lev = 1; lev < params.nlevs; ++lev) {
+        geom[lev].define(amrex::refine(geom[lev-1].Domain(), rr[lev-1]),
+                         &real_box, CoordSys::cartesian, is_per);
+    }
+
+    Vector<BoxArray> ba(params.nlevs);
+    Vector<DistributionMapping> dm(params.nlevs);
+    IntVect lo(0);
+    IntVect size = params.size;
+    for (int lev = 0; lev < params.nlevs; ++lev)
+    {
+        ba[lev].define(Box(lo, lo+params.size-1));
+        ba[lev].maxSize(params.max_grid_size);
+        dm[lev].define(ba[lev]);
+        lo += size/2;
+        size *= 2;
+    }
+
+    TestParticleContainer pc(geom, dm, ba, rr);
+    pc.setStableRedistribute(params.stable_redistribute);
+
+    IntVect nppc(params.num_ppc);
+
+    amrex::Print() << "About to initialize particles\n";
+
+    pc.InitParticles(nppc);
+    pc.checkAnswer();
+
+    auto np_old = pc.TotalNumberOfParticles();
+
+    if (params.sort) { pc.SortParticlesByCell(); }
+
+    for (int i = 0; i < params.nsteps; ++i)
+    {
+        pc.moveParticles();
+        pc.RedistributeGlobal();
+        if (params.sort) { pc.SortParticlesByCell(); }
+        pc.checkAnswer();
+    }
+
+    if (geom[0].isAllPeriodic()) {
+        AMREX_ALWAYS_ASSERT(np_old == pc.TotalNumberOfParticles());
+    }
+
+    amrex::Print() << "pass\n";
+}
diff --git a/Tests/Particles/RedistributeGlobalDM/CMakeLists.txt b/Tests/Particles/RedistributeGlobalDM/CMakeLists.txt
new file mode 100644
index 0000000000..dc7fc83a0c
--- /dev/null
+++ b/Tests/Particles/RedistributeGlobalDM/CMakeLists.txt
@@ -0,0 +1,13 @@
+foreach(D IN LISTS AMReX_SPACEDIM)
+    set(_sources     main.cpp)
+    if (NOT AMReX_GPU_BACKEND STREQUAL NONE)
+      set(_input_files inputs.rt.cuda)
+    else ()
+      set(_input_files inputs.rt)
+    endif ()
+
+    setup_test(${D} _sources _input_files)
+
+    unset(_sources)
+    unset(_input_files)
+endforeach()
diff --git a/Tests/Particles/RedistributeGlobalDM/GNUmakefile b/Tests/Particles/RedistributeGlobalDM/GNUmakefile
new file mode 100644
index 0000000000..3b69439b6a
--- /dev/null
+++ b/Tests/Particles/RedistributeGlobalDM/GNUmakefile
@@ -0,0 +1,22 @@
+AMREX_HOME = ../../../
+
+DEBUG = FALSE
+
+DIM = 3
+
+COMP = gcc
+
+USE_MPI = TRUE
+USE_OMP = FALSE
+USE_CUDA = FALSE
+
+TINY_PROFILE = TRUE
+USE_PARTICLES = TRUE
+
+include $(AMREX_HOME)/Tools/GNUMake/Make.defs
+
+include ./Make.package
+include $(AMREX_HOME)/Src/Base/Make.package
+include $(AMREX_HOME)/Src/Particle/Make.package
+
+include $(AMREX_HOME)/Tools/GNUMake/Make.rules
diff --git a/Tests/Particles/RedistributeGlobalDM/Make.package b/Tests/Particles/RedistributeGlobalDM/Make.package
new file mode 100644
index 0000000000..6b4b865e8f
--- /dev/null
+++ b/Tests/Particles/RedistributeGlobalDM/Make.package
@@ -0,0 +1 @@
+CEXE_sources += main.cpp
diff --git a/Tests/Particles/RedistributeGlobalDM/inputs b/Tests/Particles/RedistributeGlobalDM/inputs
new file mode 100644
index 0000000000..31d3bbffb3
--- /dev/null
+++ b/Tests/Particles/RedistributeGlobalDM/inputs
@@ -0,0 +1,15 @@
+redistribute_global_dm.size = (64, 64, 64)
+redistribute_global_dm.max_grid_size = 8
+redistribute_global_dm.is_periodic = 1
+redistribute_global_dm.num_ppc = 2
+redistribute_global_dm.nsteps = 50
+redistribute_global_dm.nlevs = 1
+redistribute_global_dm.random_seed = 8675309
+redistribute_global_dm.check_answer_each_step = 1
+
+redistribute_global_dm.num_runtime_real = 0
+redistribute_global_dm.num_runtime_int = 0
+
+redistribute_global_dm.sort = 0
+
+amrex.use_gpu_aware_mpi = 0
diff --git a/Tests/Particles/RedistributeGlobalDM/inputs.rt b/Tests/Particles/RedistributeGlobalDM/inputs.rt
new file mode 100644
index 0000000000..c658ba6c73
--- /dev/null
+++ b/Tests/Particles/RedistributeGlobalDM/inputs.rt
@@ -0,0 +1,15 @@
+redistribute_global_dm.size = (64, 64, 64)
+redistribute_global_dm.max_grid_size = 8
+redistribute_global_dm.is_periodic = 1
+redistribute_global_dm.num_ppc = 1
+redistribute_global_dm.nsteps = 25
+redistribute_global_dm.nlevs = 1
+redistribute_global_dm.random_seed = 8675309
+redistribute_global_dm.check_answer_each_step = 1
+
+redistribute_global_dm.num_runtime_real = 0
+redistribute_global_dm.num_runtime_int = 0
+
+redistribute_global_dm.sort = 0
+
+amrex.use_gpu_aware_mpi = 0
diff --git a/Tests/Particles/RedistributeGlobalDM/inputs.rt.cuda b/Tests/Particles/RedistributeGlobalDM/inputs.rt.cuda
new file mode 100644
index 0000000000..c658ba6c73
--- /dev/null
+++ b/Tests/Particles/RedistributeGlobalDM/inputs.rt.cuda
@@ -0,0 +1,15 @@
+redistribute_global_dm.size = (64, 64, 64)
+redistribute_global_dm.max_grid_size = 8
+redistribute_global_dm.is_periodic = 1
+redistribute_global_dm.num_ppc = 1
+redistribute_global_dm.nsteps = 25
+redistribute_global_dm.nlevs = 1
+redistribute_global_dm.random_seed = 8675309
+redistribute_global_dm.check_answer_each_step = 1
+
+redistribute_global_dm.num_runtime_real = 0
+redistribute_global_dm.num_runtime_int = 0
+
+redistribute_global_dm.sort = 0
+
+amrex.use_gpu_aware_mpi = 0
diff --git a/Tests/Particles/RedistributeGlobalDM/main.cpp b/Tests/Particles/RedistributeGlobalDM/main.cpp
new file mode 100644
index 0000000000..950f676991
--- /dev/null
+++ b/Tests/Particles/RedistributeGlobalDM/main.cpp
@@ -0,0 +1,402 @@
+#include <AMReX.H>
+#include <AMReX_MultiFab.H>
+#include <AMReX_ParmParse.H>
+#include <AMReX_Particles.H>
+
+#include <algorithm>
+#include <cstdint>
+#include <numeric>
+#include <random>
+
+using namespace amrex;
+
+static constexpr int NSR = 6;
+static constexpr int NSI = 1;
+static constexpr int NAR = 1;
+static constexpr int NAI = 1;
+
+int num_runtime_real = 0;
+int num_runtime_int = 0;
+
+void get_position_unit_cell (Real* r, const IntVect& nppc, int i_part)
+{
+    int nx = nppc[0];
+#if AMREX_SPACEDIM > 1
+    int ny = nppc[1];
+#else
+    int ny = 1;
+#endif
+#if AMREX_SPACEDIM > 2
+    int nz = nppc[2];
+#else
+    int nz = 1;
+#endif
+
+    int ix_part = i_part/(ny * nz);
+    int iy_part = (i_part % (ny * nz)) % ny;
+    int iz_part = (i_part % (ny * nz)) / ny;
+
+    r[0] = (0.5+ix_part)/nx;
+    r[1] = (0.5+iy_part)/ny;
+    r[2] = (0.5+iz_part)/nz;
+}
+
+class TestParticleContainer
+    : public amrex::ParticleContainer<NSR, NSI, NAR, NAI>
+{
+public:
+
+    TestParticleContainer (const Vector<amrex::Geometry>& a_geom,
+                           const Vector<amrex::DistributionMapping>& a_dmap,
+                           const Vector<amrex::BoxArray>& a_ba,
+                           const Vector<amrex::IntVect>& a_rr)
+        : amrex::ParticleContainer<NSR, NSI, NAR, NAI>(a_geom, a_dmap, a_ba, a_rr)
+    {
+        for (int i = 0; i < num_runtime_real; ++i)
+        {
+            AddRealComp(true);
+        }
+        for (int i = 0; i < num_runtime_int; ++i)
+        {
+            AddIntComp(true);
+        }
+    }
+
+    void RedistributeGlobal ()
+    {
+        const int lev_min = 0;
+        const int lev_max = finestLevel();
+        const int nGrow = 0;
+        const int local = 0;
+        Redistribute(lev_min, lev_max, nGrow, local);
+    }
+
+    void InitParticles (const amrex::IntVect& a_num_particles_per_cell)
+    {
+        BL_PROFILE("InitParticles");
+
+        const int lev = 0;
+        const Real* dx = Geom(lev).CellSize();
+        const Real* plo = Geom(lev).ProbLo();
+
+        const int num_ppc = AMREX_D_TERM(a_num_particles_per_cell[0],
+                                         * a_num_particles_per_cell[1],
+                                         * a_num_particles_per_cell[2]);
+
+        for (MFIter mfi = MakeMFIter(lev); mfi.isValid(); ++mfi)
+        {
+            const Box& tile_box = mfi.tilebox();
+
+            Gpu::HostVector<ParticleType> host_particles;
+            std::array<Gpu::HostVector<ParticleReal>, NAR> host_real;
+            std::array<Gpu::HostVector<int>, NAI> host_int;
+
+            std::vector<Gpu::HostVector<ParticleReal> > host_runtime_real(NumRuntimeRealComps());
+            std::vector<Gpu::HostVector<int> > host_runtime_int(NumRuntimeIntComps());
+
+            for (IntVect iv = tile_box.smallEnd(); iv <= tile_box.bigEnd(); tile_box.next(iv))
+            {
+                for (int i_part = 0; i_part < num_ppc; ++i_part)
+                {
+                    Real r[3];
+                    get_position_unit_cell(r, a_num_particles_per_cell, i_part);
+
+                    ParticleType p;
+                    p.id() = ParticleType::NextID();
+                    p.cpu() = ParallelDescriptor::MyProc();
+                    p.pos(0) = static_cast<ParticleReal>(plo[0] + (iv[0] + r[0])*dx[0]);
+#if AMREX_SPACEDIM > 1
+                    p.pos(1) = static_cast<ParticleReal>(plo[1] + (iv[1] + r[1])*dx[1]);
+#endif
+#if AMREX_SPACEDIM > 2
+                    p.pos(2) = static_cast<ParticleReal>(plo[2] + (iv[2] + r[2])*dx[2]);
+#endif
+
+                    for (int i = 0; i < NSR; ++i) { p.rdata(i) = ParticleReal(p.id()); }
+                    for (int i = 0; i < NSI; ++i) { p.idata(i) = int(p.id()); }
+
+                    host_particles.push_back(p);
+                    for (int i = 0; i < NAR; ++i) {
+                        host_real[i].push_back(ParticleReal(p.id()));
+                    }
+                    for (int i = 0; i < NAI; ++i) {
+                        host_int[i].push_back(int(p.id()));
+                    }
+                    for (int i = 0; i < NumRuntimeRealComps(); ++i) {
+                        host_runtime_real[i].push_back(ParticleReal(p.id()));
+                    }
+                    for (int i = 0; i < NumRuntimeIntComps(); ++i) {
+                        host_runtime_int[i].push_back(int(p.id()));
+                    }
+                }
+            }
+
+            auto& particle_tile = DefineAndReturnParticleTile(lev, mfi.index(), mfi.LocalTileIndex());
+            auto old_size = particle_tile.GetArrayOfStructs().size();
+            auto new_size = old_size + host_particles.size();
+            particle_tile.resize(new_size);
+
+            Gpu::copyAsync(Gpu::hostToDevice,
+                           host_particles.begin(),
+                           host_particles.end(),
+                           particle_tile.GetArrayOfStructs().begin() + old_size);
+
+            auto& soa = particle_tile.GetStructOfArrays();
+            for (int i = 0; i < NAR; ++i)
+            {
+                Gpu::copyAsync(Gpu::hostToDevice,
+                               host_real[i].begin(),
+                               host_real[i].end(),
+                               soa.GetRealData(i).begin() + old_size);
+            }
+
+            for (int i = 0; i < NAI; ++i)
+            {
+                Gpu::copyAsync(Gpu::hostToDevice,
+                               host_int[i].begin(),
+                               host_int[i].end(),
+                               soa.GetIntData(i).begin() + old_size);
+            }
+
+            for (int i = 0; i < NumRuntimeRealComps(); ++i)
+            {
+                Gpu::copyAsync(Gpu::hostToDevice,
+                               host_runtime_real[i].begin(),
+                               host_runtime_real[i].end(),
+                               soa.GetRealData(NAR+i).begin() + old_size);
+            }
+
+            for (int i = 0; i < NumRuntimeIntComps(); ++i)
+            {
+                Gpu::copyAsync(Gpu::hostToDevice,
+                               host_runtime_int[i].begin(),
+                               host_runtime_int[i].end(),
+                               soa.GetIntData(NAI+i).begin() + old_size);
+            }
+
+            Gpu::streamSynchronize();
+        }
+
+        RedistributeGlobal();
+    }
+
+    void checkAnswer () const
+    {
+        BL_PROFILE("TestParticleContainer::checkAnswer");
+
+        AMREX_ALWAYS_ASSERT(OK());
+
+        int num_rr = NumRuntimeRealComps();
+        int num_ii = NumRuntimeIntComps();
+
+        for (int lev = 0; lev <= finestLevel(); ++lev)
+        {
+            const auto& plev = GetParticles(lev);
+            for (MFIter mfi = MakeMFIter(lev); mfi.isValid(); ++mfi)
+            {
+                int gid = mfi.index();
+                int tid = mfi.LocalTileIndex();
+                const auto& ptile = plev.at(std::make_pair(gid, tid));
+                const auto& ptd = ptile.getConstParticleTileData();
+                const size_t np = ptile.numParticles();
+
+                AMREX_FOR_1D(np, i,
+                {
+                    for (int j = 0; j < NSR; ++j)
+                    {
+                        AMREX_ALWAYS_ASSERT(ptd.m_aos[i].rdata(j) == ptd.m_aos[i].id());
+                    }
+                    for (int j = 0; j < NSI; ++j)
+                    {
+                        AMREX_ALWAYS_ASSERT(ptd.m_aos[i].idata(j) == ptd.m_aos[i].id());
+                    }
+                    if constexpr (NAR > 0) {
+                        for (int j = 0; j < NAR; ++j)
+                        {
+                            AMREX_ALWAYS_ASSERT(ptd.m_rdata[j][i] == ptd.m_aos[i].id());
+                        }
+                    }
+                    if constexpr (NAI > 0) {
+                        for (int j = 0; j < NAI; ++j)
+                        {
+                            AMREX_ALWAYS_ASSERT(ptd.m_idata[j][i] == ptd.m_aos[i].id());
+                        }
+                    }
+                    for (int j = 0; j < num_rr; ++j)
+                    {
+                        AMREX_ALWAYS_ASSERT(ptd.m_runtime_rdata[j][i] == ptd.m_aos[i].id());
+                    }
+                    for (int j = 0; j < num_ii; ++j)
+                    {
+                        AMREX_ALWAYS_ASSERT(ptd.m_runtime_idata[j][i] == ptd.m_aos[i].id());
+                    }
+                });
+            }
+        }
+    }
+};
+
+struct TestParams
+{
+    IntVect size;
+    int max_grid_size;
+    int num_ppc;
+    int is_periodic;
+    int nsteps;
+    int nlevs;
+    int sort;
+    int stable_redistribute = 0;
+    int random_seed = 8675309;
+    int check_answer_each_step = 1;
+};
+
+auto makeRandomPMap (int nboxes, int nprocs, std::uint32_t seed) -> Vector<int>
+{
+    Vector<int> pmap(nboxes);
+    for (int i = 0; i < nboxes; ++i) {
+        pmap[i] = i % nprocs;
+    }
+
+    std::mt19937 gen(seed);
+    std::shuffle(pmap.begin(), pmap.end(), gen);
+
+    return pmap;
+}
+
+void testRedistributeGlobalDM ();
+
+int main (int argc, char* argv[])
+{
+    amrex::Initialize(argc,argv);
+
+    amrex::Print() << "Running global redistribute DistributionMap shuffle test\n";
+    testRedistributeGlobalDM();
+
+    amrex::Finalize();
+}
+
+void get_test_params (TestParams& params, const std::string& prefix)
+{
+    ParmParse pp(prefix);
+    pp.get("size", params.size);
+    pp.get("max_grid_size", params.max_grid_size);
+    pp.get("num_ppc", params.num_ppc);
+    pp.get("is_periodic", params.is_periodic);
+    pp.get("nsteps", params.nsteps);
+    pp.get("nlevs", params.nlevs);
+    pp.query("num_runtime_real", num_runtime_real);
+    pp.query("num_runtime_int", num_runtime_int);
+    pp.query("stable_redistribute", params.stable_redistribute);
+    pp.query("random_seed", params.random_seed);
+    pp.query("check_answer_each_step", params.check_answer_each_step);
+
+    params.sort = 0;
+    pp.query("sort", params.sort);
+}
+
+void testRedistributeGlobalDM ()
+{
+    BL_PROFILE("testRedistributeGlobalDM");
+    TestParams params;
+    get_test_params(params, "redistribute_global_dm");
+
+    int is_per[] = {AMREX_D_DECL(params.is_periodic,
+                                 params.is_periodic,
+                                 params.is_periodic)};
+
+    Vector<IntVect> rr(params.nlevs-1);
+    for (int lev = 1; lev < params.nlevs; ++lev) {
+        rr[lev-1] = IntVect(AMREX_D_DECL(2,2,2));
+    }
+
+    RealBox real_box;
+    for (int n = 0; n < BL_SPACEDIM; ++n)
+    {
+        real_box.setLo(n, 0.0);
+        real_box.setHi(n, params.size[n]);
+    }
+
+    IntVect domain_lo(AMREX_D_DECL(0, 0, 0));
+    IntVect domain_hi(AMREX_D_DECL(params.size[0]-1,params.size[1]-1,params.size[2]-1));
+    const Box base_domain(domain_lo, domain_hi);
+
+    Vector<Geometry> geom(params.nlevs);
+    geom[0].define(base_domain, &real_box, CoordSys::cartesian, is_per);
+    for (int lev = 1; lev < params.nlevs; ++lev) {
+        geom[lev].define(amrex::refine(geom[lev-1].Domain(), rr[lev-1]),
+                         &real_box, CoordSys::cartesian, is_per);
+    }
+
+    Vector<BoxArray> ba(params.nlevs);
+    Vector<DistributionMapping> dm(params.nlevs);
+    IntVect lo(0);
+    IntVect size = params.size;
+    for (int lev = 0; lev < params.nlevs; ++lev)
+    {
+        ba[lev].define(Box(lo, lo+params.size-1));
+        ba[lev].maxSize(params.max_grid_size);
+        dm[lev].define(ba[lev]);
+        lo += size/2;
+        size *= 2;
+    }
+
+    TestParticleContainer pc(geom, dm, ba, rr);
+    pc.setStableRedistribute(params.stable_redistribute);
+
+    IntVect nppc(params.num_ppc);
+
+    amrex::Print() << "About to initialize particles\n";
+
+    pc.InitParticles(nppc);
+    pc.checkAnswer();
+
+    auto np_old = pc.TotalNumberOfParticles();
+    const int nprocs = ParallelDescriptor::NProcs();
+
+    amrex::Print() << "Benchmark setup: " << ba[0].size() << " boxes on level 0 across "
+                   << nprocs << " MPI ranks\n";
+
+    if (params.sort) { pc.SortParticlesByCell(); }
+
+    Real total_dm_time = Real(0.0);
+    Real total_redistribute_time = Real(0.0);
+
+    for (int i = 0; i < params.nsteps; ++i)
+    {
+        const auto dm_start = amrex::second();
+        for (int lev = 0; lev < params.nlevs; ++lev)
+        {
+            auto pmap = makeRandomPMap(static_cast<int>(ba[lev].size()), nprocs,
+                                       static_cast<std::uint32_t>(params.random_seed + 7919*i + 101*lev));
+            DistributionMapping new_dm;
+            new_dm.define(pmap);
+            pc.SetParticleDistributionMap(lev, new_dm);
+        }
+        total_dm_time += amrex::second() - dm_start;
+
+        ParallelDescriptor::Barrier();
+        const auto redistribute_start = amrex::second();
+        pc.RedistributeGlobal();
+        total_redistribute_time += amrex::second() - redistribute_start;
+
+        if (params.sort) { pc.SortParticlesByCell(); }
+        if (params.check_answer_each_step) { pc.checkAnswer(); }
+    }
+
+    if (!params.check_answer_each_step) {
+        pc.checkAnswer();
+    }
+
+    if (geom[0].isAllPeriodic()) {
+        AMREX_ALWAYS_ASSERT(np_old == pc.TotalNumberOfParticles());
+    }
+
+    ParallelDescriptor::ReduceRealMax(total_dm_time, ParallelDescriptor::IOProcessorNumber());
+    ParallelDescriptor::ReduceRealMax(total_redistribute_time, ParallelDescriptor::IOProcessorNumber());
+
+    amrex::Print() << "Max DM shuffle time over all ranks: " << total_dm_time << " s\n";
+    amrex::Print() << "Max redistribute time over all ranks: " << total_redistribute_time << " s\n";
+    amrex::Print() << "Average redistribute time per step: "
+                   << total_redistribute_time/static_cast<Real>(params.nsteps) << " s\n";
+    amrex::Print() << "pass\n";
+}