diff --git a/Docs/sphinx_documentation/source/GPU.rst b/Docs/sphinx_documentation/source/GPU.rst
index e9dc53fa7c..789a731ad8 100644
--- a/Docs/sphinx_documentation/source/GPU.rst
+++ b/Docs/sphinx_documentation/source/GPU.rst
@@ -753,6 +753,144 @@ constructed inside of an MFIter loop with GPU kernels and great care should
 be used when accessing :cpp:`Gpu::ManagedVector` data on GPUs to avoid race
 conditions.
 
+.. _sec:gpu:buffer_tracked_managed:
+
+Gpu::Buffer, Gpu::TrackedVector, and Gpu::ManagedVector
+-------------------------------------------------------
+
+:cpp:`Gpu::Buffer` (``AMReX_GpuBuffer.H``) and :cpp:`Gpu::TrackedVector`
+(``AMReX_TrackedVector.H``) pair a host allocation with a device mirror.
+
+:cpp:`Gpu::Buffer` uses :cpp:`Gpu::PinnedVector` on the host and
+``copyToDeviceAsync()`` / ``copyToHost()`` for transfers.  Use it for
+**frequent, performance-oriented** async copies during a normal GPU run.
+
+:cpp:`Gpu::TrackedVector` exposes a host ``std::vector`` via ``host()`` /
+``host_const()`` and a device :cpp:`Gpu::DeviceVector` via ``device()`` /
+``device_const()`` (GPU builds only).  Writable accessors mark the mirror
+out-of-date; ``to_device()``/``to_host()`` perform **synchronous** copies when
+needed.  You may populate the host **before** :cpp:`amrex::Initialize()`. Device
+memory is only valid while AMReX is initialized.  On :cpp:`amrex::Finalize()`,
+AMReX clears device storage via ``release_gpu()`` and leaves the host copy for
+reuse, which supports **Python / pyAMReX** and other workflows
+that cross multiple AMReX initialize/finalize cycles.  Use read-only
+``host_const()`` / ``device_const()`` when you are not writing, so the object
+does not flip to a dirty state unnecessarily.
+
+:cpp:`Gpu::ManagedVector` is the arena-backed unified-memory vector introduced
+under **Gpu Vectors** above (``The_Managed_Arena()``).  Like :cpp:`Gpu::Buffer`
+it can only be used while AMReX is initialized / a GPU device context exists.
+
+.. _tab:gpu:buffer_tracked_managed:
+
+.. list-table::
+   :widths: 12 28 28 28
+   :header-rows: 1
+
+   * -
+     - :cpp:`Gpu::Buffer`
+     - :cpp:`Gpu::ManagedVector`
+     - :cpp:`Gpu::TrackedVector`
+   * - **Lifetime**
+     - Only between ``amrex::Initialize/Finalize()``
+     - Only between ``amrex::Initialize/Finalize()``
+     - Anytime and cross-session, GPU part only between ``amrex::Initialize/Finalize()``
+   * - **Usage**
+     - ``operator[]`` etc.   , explicit ``copyToDeviceAsync`` /
+       ``copyToHost``
+     - Single ``data()`` like :cpp:`amrex::Vector`
+     - Separate ``host()`` / ``device()`` (and ``*_const``)
+   * - **Synchronization**
+     - explicit
+     - implicit
+     - explicit, but tracks status
+   * - **Performance**
+     - Best: pinned host enables asynchronous transfers
+     - Implicit memory migration can add latency
+     - Synchronous copy adds latency
+   * - **Best for**
+     - hot copy loops inside a run
+     - maximum simplicity
+     - interactive and cross-AMReX session usage, e.g., in pyAMReX for user inputs that do not change often
+
+
+A minimal :cpp:`Gpu::Buffer` pattern (host fill, async upload, kernel pointer):
+
+.. highlight:: c++
+
+::
+
+    amrex::Initialize(argc, argv);
+
+    Gpu::Buffer<int> buf(n);
+    for (int i = 0; i < n; ++i) { buf[i] = i; }
+
+    int* dp = buf.copyToDeviceAsync();
+    // launch kernels using dp, then optionally:
+    buf.copyToHost();
+
+:cpp:`Gpu::ManagedVector` example (unified memory, accessible from both host and device):
+
+.. highlight:: c++
+
+::
+
+    amrex::Initialize(argc, argv);
+
+    Gpu::ManagedVector<int> mv(n);
+    for (int i = 0; i < n; ++i) { mv[i] = i; }
+
+    int* dp = mv.data();
+    amrex::ParallelFor(n, [=] AMREX_GPU_DEVICE (int i) {
+        dp[i] *= 2;  // access on device
+    });
+
+    Gpu::streamSynchronize();
+    // mv[i] now accessible on host with updated values
+
+:cpp:`Gpu::TrackedVector` example:
+On GPU builds, you can create this type at any time, even before `amrex::Initialize()`.
+``amrex::Finalize()`` releases device storage for the vector but
+keeps the host ``std::vector``, so a later ``Initialize()`` can call
+``to_device()`` again to rebuild the device copy.
+
+.. highlight:: c++
+
+::
+
+    // Host data before AMReX init; GPU available after Initialize().
+    amrex::Gpu::TrackedVector<int> cross_session;
+    cross_session.host() = {7, 8, 9};
+
+    // ... a lot of other interactive user code, e.g., to set up
+    // complex input data, optimization libraries or ML frameworks
+    // in multi-simulation workflows ...
+
+    amrex::Initialize(argc, argv);
+    {
+        cross_session.to_device();
+        // Host and device match; use host_const() / device_const() for reads.
+    }
+    amrex::Finalize();
+
+    // cross_session.device() is not available now and will throw,
+    // but you can keep using cross_session.host() / .host_const()
+
+    amrex::Initialize(argc, argv);
+    {
+        cross_session.to_device();
+        // Device buffer is re-created; kernels may read via
+        // device_const().data() or write via device().data()
+    }
+    amrex::Finalize();
+
+Optional: Call ``release_gpu()`` when you need to free device memory while
+keeping the host ``std::vector`` for later (unless already released,
+``amrex::Finalize()`` clears device storage registered for the object).
+
+Generally, after device kernels, call :cpp:`Gpu::streamSynchronize()`
+(or equivalent ordering) before relying on host data, as for any other device work.
+
 MultiFab Reductions
 -------------------
 
diff --git a/Src/Base/AMReX_TrackedVector.H b/Src/Base/AMReX_TrackedVector.H
new file mode 100644
index 0000000000..90094c5227
--- /dev/null
+++ b/Src/Base/AMReX_TrackedVector.H
@@ -0,0 +1,301 @@
+#ifndef AMREX_TRACKED_VECTOR_H
+#define AMREX_TRACKED_VECTOR_H
+
+#include <AMReX_Config.H>
+
+#include <AMReX.H>
+#include <AMReX_GpuContainers.H>
+
+#include <memory>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+
+namespace amrex::Gpu
+{
+    /** Base for element data with dynamic host vectors mirrored lazily to GPU.
+     *
+     * Provides lifetimes independent of AMReX initialize/finalize cycles,
+     * synchronization tracking, and data release APIs.
+     *
+     * This object is primarily for input handling, allowing to initialize
+     * data even before AMReX was initialized and enabling workflows crossing
+     * AMReX init/finalize cycles. GPU memory will always be bound to an
+     * AMReX session, but CPU memory can be allocated and live arbitrarily long.
+     *
+     * For AMReX CPU builds, the host and device members point to the *same* memory
+     * and the status will always be up to date.
+     *
+     * Usage contract:
+     *   - Device data can only be allocated after AMReX initialize and before finalize.
+     *   - You can call `release_gpu()` anytime, but we will call it during AMReX finalize
+     *     to invalidate device().
+     *   - Always access data via host/device[_const](). Do not cache references/pointers to
+     *     host/device memory managed by this object, or you run the risk of stale memory access.
+     */
+    template <class T>
+    struct TrackedVector
+    {
+        static_assert(std::is_trivially_copyable<T>(), "TrackedVector can only hold trivially copyable types");
+        using value_type      = T;
+        using size_type       = std::size_t;
+
+    private:
+        void register_finalize () {
+#ifdef AMREX_USE_GPU
+            std::weak_ptr<amrex::Gpu::DeviceVector<T>> weak_device = m_device;
+            std::weak_ptr<Status> weak_status = m_status;
+            amrex::ExecOnFinalize([weak_device, weak_status]() {
+                // see: release_gpu()
+                if (auto self = weak_device.lock()) {
+                    self->clear();
+                    self->shrink_to_fit();
+                }
+                if (auto self = weak_status.lock()) {
+                    *self = Status::host_dirty;
+                }
+            });
+#endif
+        }
+    public:
+
+        constexpr TrackedVector () {
+            register_finalize();
+        }
+
+        explicit TrackedVector (size_type a_size)
+            : m_host(a_size)
+        {
+            register_finalize();
+            host_dirty();
+        }
+
+        TrackedVector (size_type a_size, value_type const & a_value)
+            : m_host(a_size, a_value)
+        {
+            register_finalize();
+            host_dirty();
+        }
+
+        TrackedVector (std::initializer_list<T> a_initializer_list)
+            : m_host(a_initializer_list)
+        {
+            register_finalize();
+            host_dirty();
+        }
+
+        TrackedVector (std::vector<T> a_vector)
+            : m_host(std::move(a_vector))
+        {
+            register_finalize();
+            host_dirty();
+        }
+
+        TrackedVector (TrackedVector const & a_vector)
+            : m_host(a_vector.m_host)
+        {
+            *m_status = *a_vector.m_status;
+#ifdef AMREX_USE_GPU
+            *m_device = *a_vector.m_device;
+#endif
+
+            register_finalize();
+        }
+
+        /** Swap the empty data of this and a_vector */
+        TrackedVector (TrackedVector && a_vector) noexcept
+        {
+            std::swap(m_status, a_vector.m_status);
+            std::swap(m_host, a_vector.m_host);
+#ifdef AMREX_USE_GPU
+            std::swap(m_device, a_vector.m_device);
+#endif
+
+            // We inherit a_vector's original registration.
+            // But we need to register a_vector's new data now:
+            //   the shared ptrs that we owned briefly up to here.
+            // That way, a_vector can be either reused/assigned values
+            // or safely destructed.
+            a_vector.register_finalize();
+        }
+
+        TrackedVector& operator= (TrackedVector const & a_vector) {
+            if (this != &a_vector) {
+                *m_status = *a_vector.m_status;
+                m_host = a_vector.m_host;
+#ifdef AMREX_USE_GPU
+                *m_device = *a_vector.m_device;
+#endif
+            }
+            return *this;
+        }
+
+        /** Swap the data of this and a_vector */
+        TrackedVector& operator= (TrackedVector && a_vector) noexcept {
+            if (this != &a_vector) {
+                std::swap(m_host, a_vector.m_host);
+                std::swap(m_status, a_vector.m_status);
+#ifdef AMREX_USE_GPU
+                std::swap(m_device, a_vector.m_device);
+#endif
+            }
+            return *this;
+        }
+
+        ~TrackedVector () = default;
+
+        enum class Status {
+            up_to_date,    //<! host and device data are in sync
+            device_dirty,  //!< host data needs an update
+            host_dirty     //!< device data needs an update
+        };
+
+        [[nodiscard]] Status status () const { return *m_status; }
+
+        /** Return writable host data, mark host as dirty */
+        [[nodiscard]] std::vector<T> &
+        host () {
+            host_dirty();
+            return m_host;
+        }
+
+        /** Return read-only host data */
+        [[nodiscard]] std::vector<T> const &
+        host_const () const { return m_host; }
+
+#ifdef AMREX_USE_GPU
+        /** Return writable device data, mark device as dirty */
+        [[nodiscard]] amrex::Gpu::DeviceVector<T> &
+        device () {
+            if (!amrex::Initialized()) {
+                throw std::runtime_error("TrackedVector::device() called before AMReX initialize/after AMReX finalize");
+            }
+            device_dirty();
+            return *m_device;
+        }
+
+        /** Return read-only device data */
+        [[nodiscard]] amrex::Gpu::DeviceVector<T> const &
+        device_const () const {
+            if (!amrex::Initialized()) {
+                throw std::runtime_error("TrackedVector::device() called before AMReX initialize/after AMReX finalize");
+            }
+            return *m_device;
+        }
+#else
+        /** Return writable device (==host) data, up to date by definition */
+        [[nodiscard]] std::vector<T> &
+        device () { return m_host; }
+
+        /** Return read-only device (==host) data */
+        [[nodiscard]] std::vector<T> const &
+        device_const () const { return m_host; }
+#endif
+
+        /** Release GPU memory
+         *
+         * Host data preserved for reuse until destructor.
+         * This enables use outside of and across AMReX init/finalize cycles.
+         */
+        void release_gpu ()
+        {
+#ifdef AMREX_USE_GPU
+            device().clear();
+            device().shrink_to_fit();
+            *m_status = Status::host_dirty;
+#endif
+        }
+
+        /** Mark host data as modified; next sync will copy to GPU. */
+        void host_dirty () {
+#ifdef AMREX_USE_GPU
+            *m_status = Status::host_dirty;
+#endif
+        }
+
+        /** Mark device data as modified; next sync will copy to CPU. */
+        void device_dirty () {
+#ifdef AMREX_USE_GPU
+            *m_status = Status::device_dirty;
+#endif
+        }
+
+        /** Conditionally synchronize host data to device
+         *
+         * If either the host or device side are dirty, this will
+         * copy the host data over the device data.
+         *
+         * @param[in] force  If true, force a copy even if the status is up to date.
+         */
+        void to_device (bool force=false) {
+#ifdef AMREX_USE_GPU
+            if (status() != Status::up_to_date || force) {
+                if (!amrex::Initialized()) {
+                    throw std::runtime_error("TrackedVector::to_device() called outside of AMReX initialize/finalize");
+                }
+                auto const size = m_host.size();
+                if (size > 0U) {
+                    m_device->resize(size);
+                    amrex::Gpu::copy(amrex::Gpu::hostToDevice,
+                        m_host.begin(), m_host.end(), m_device->begin());
+                } else {
+                    m_device->clear();
+                }
+            }
+            *m_status = Status::up_to_date;
+#else
+            amrex::ignore_unused(force);
+#endif
+        }
+
+        /** Conditionally synchronize device data to host
+         *
+         * If either the host or device side are dirty, this will
+         * copy the device data over the host data.
+         *
+         * @param[in] force  If true, force a copy even if the status is up to date.
+         */
+        void to_host (bool force=false) {
+#ifdef AMREX_USE_GPU
+            if (status() != Status::up_to_date || force) {
+                if (!amrex::Initialized()) {
+                    throw std::runtime_error("TrackedVector::to_host() called outside of AMReX initialize/finalize");
+                }
+                m_host.resize(m_device->size());
+                amrex::Gpu::copy(amrex::Gpu::deviceToHost,
+                    m_device->begin(), m_device->end(), m_host.begin());
+            }
+            *m_status = Status::up_to_date;
+#else
+            amrex::ignore_unused(force);
+#endif
+        }
+
+        /** Ensure equivalent host-device data (sync copy)
+         *
+         * This performs a conditional synchronous copy from whatever
+         * host()/device() side was accessed last.
+         */
+        void ensure_same ()
+        {
+#ifdef AMREX_USE_GPU
+            if (status() == Status::host_dirty) {
+                to_device(false);
+            } else if (status() == Status::device_dirty) {
+                to_host(false);
+            }
+#endif
+        }
+
+    private:
+        std::shared_ptr<Status> m_status = std::make_shared<Status>();
+        std::vector<T> m_host;
+#ifdef AMREX_USE_GPU
+        std::shared_ptr<amrex::Gpu::DeviceVector<T>> m_device = std::make_shared<amrex::Gpu::DeviceVector<T>>();
+#endif
+    };
+
+}
+
+#endif
diff --git a/Src/Base/CMakeLists.txt b/Src/Base/CMakeLists.txt
index 04915b2161..4c5625ec3f 100644
--- a/Src/Base/CMakeLists.txt
+++ b/Src/Base/CMakeLists.txt
@@ -241,6 +241,7 @@ foreach(D IN LISTS AMReX_SPACEDIM)
        AMReX_GpuReduce.H
        AMReX_GpuAllocators.H
        AMReX_GpuContainers.H
+       AMReX_TrackedVector.H
        AMReX_MFParallelFor.H
        AMReX_MFParallelForC.H
        AMReX_MFParallelForG.H
diff --git a/Src/Base/Make.package b/Src/Base/Make.package
index d22a6cc824..680357556e 100644
--- a/Src/Base/Make.package
+++ b/Src/Base/Make.package
@@ -107,7 +107,9 @@ C$(AMREX_BASE)_sources += AMReX_GpuElixir.cpp
 
 C$(AMREX_BASE)_headers += AMReX_GpuReduce.H
 
-C$(AMREX_BASE)_headers += AMReX_CudaGraph.H AMReX_GpuContainers.H
+C$(AMREX_BASE)_headers += AMReX_CudaGraph.H
+C$(AMREX_BASE)_headers += AMReX_GpuContainers.H
+C$(AMREX_BASE)_headers += AMReX_TrackedVector.H
 
 C$(AMREX_BASE)_headers += AMReX_GpuAllocators.H
 
diff --git a/Tests/Base/TrackedVector/CMakeLists.txt b/Tests/Base/TrackedVector/CMakeLists.txt
new file mode 100644
index 0000000000..224c4563c8
--- /dev/null
+++ b/Tests/Base/TrackedVector/CMakeLists.txt
@@ -0,0 +1,9 @@
+foreach(D IN LISTS AMReX_SPACEDIM)
+    set(_sources     main.cpp)
+    set(_input_files)
+
+    setup_test(${D} _sources _input_files)
+
+    unset(_sources)
+    unset(_input_files)
+endforeach()
diff --git a/Tests/Base/TrackedVector/main.cpp b/Tests/Base/TrackedVector/main.cpp
new file mode 100644
index 0000000000..9b5b1695ba
--- /dev/null
+++ b/Tests/Base/TrackedVector/main.cpp
@@ -0,0 +1,335 @@
+#include <AMReX.H>
+#include <AMReX_TrackedVector.H>
+
+#include <memory>
+#include <vector>
+
+using namespace amrex;
+
+using TVec = Gpu::TrackedVector<int>;
+using Status = TVec::Status;
+
+namespace {
+
+void verify_host_device_match (TVec const & v)
+{
+    AMREX_ALWAYS_ASSERT(v.status() == Status::up_to_date);
+
+    const auto n = v.host_const().size();
+    AMREX_ALWAYS_ASSERT(v.device_const().size() == n);
+    if (n == 0) { return; }
+    std::vector<int> tmp(n);
+    Gpu::copy(Gpu::deviceToHost, v.device_const().begin(), v.device_const().end(), tmp.begin());
+    for (std::size_t i = 0; i < n; ++i) {
+        AMREX_ALWAYS_ASSERT(tmp[i] == v.host_const()[i]);
+    }
+}
+
+void fill_device_linear (TVec& v, int base)
+{
+    const int n = static_cast<int>(v.device().size());
+    if (n == 0) { return; }
+    int* dp = v.device().data();
+    ParallelFor(n, [=] AMREX_GPU_DEVICE (int i) noexcept {
+        dp[i] = base + i;
+    });
+    Gpu::streamSynchronize();
+}
+
+void test_dirty_semantics ()
+{
+    TVec v;
+    AMREX_ALWAYS_ASSERT(v.status() == Status::up_to_date);
+
+    v.host_dirty();
+#ifdef AMREX_USE_GPU
+    AMREX_ALWAYS_ASSERT(v.status() == Status::host_dirty);
+#else
+    AMREX_ALWAYS_ASSERT(v.status() == Status::up_to_date);
+#endif
+
+    v.device_dirty();
+#ifdef AMREX_USE_GPU
+    AMREX_ALWAYS_ASSERT(v.status() == Status::device_dirty);
+#else
+    AMREX_ALWAYS_ASSERT(v.status() == Status::up_to_date);
+#endif
+
+    // potential write (host): must mark dirty
+    v.host().resize(3);
+    v.host().at(2) = 42;
+#ifdef AMREX_USE_GPU
+    AMREX_ALWAYS_ASSERT(v.status() == Status::host_dirty);
+#else
+    AMREX_ALWAYS_ASSERT(v.status() == Status::up_to_date);
+#endif
+    AMREX_ALWAYS_ASSERT(v.host_const().size() == 3);
+
+    v.to_device();
+    AMREX_ALWAYS_ASSERT(v.status() == Status::up_to_date);
+
+    // potential write (device): must mark dirty
+    [[maybe_unused]] auto* dp = v.device().data();
+#ifdef AMREX_USE_GPU
+    AMREX_ALWAYS_ASSERT(v.status() == Status::device_dirty);
+#else
+    AMREX_ALWAYS_ASSERT(v.status() == Status::up_to_date);
+#endif
+
+    v.ensure_same();
+    AMREX_ALWAYS_ASSERT(v.status() == Status::up_to_date);
+
+    // read-only (host): must not mark dirty
+    auto first = v.host_const().at(2);
+    AMREX_ALWAYS_ASSERT(first == 42);
+    AMREX_ALWAYS_ASSERT(v.status() == Status::up_to_date);
+
+    // read-only (device): must not mark dirty
+    [[maybe_unused]] auto const* dcp = v.device_const().data();
+    AMREX_ALWAYS_ASSERT(v.status() == Status::up_to_date);
+}
+
+void test_release_gpu ()
+{
+    TVec v;
+    v.host().assign({1, 2, 3});
+    v.ensure_same();
+    verify_host_device_match(v);
+
+    v.release_gpu();
+#ifdef AMREX_USE_GPU
+    AMREX_ALWAYS_ASSERT(v.device_const().empty());
+    AMREX_ALWAYS_ASSERT(v.status() == Status::host_dirty);
+#else
+    AMREX_ALWAYS_ASSERT(v.device_const().size() == 3U);
+    AMREX_ALWAYS_ASSERT(v.status() == Status::up_to_date);
+#endif
+    AMREX_ALWAYS_ASSERT(v.host_const().size() == 3U);
+
+    v.ensure_same();
+    verify_host_device_match(v);
+}
+
+void test_d2h ()
+{
+    TVec v;
+    v.host().assign({0, 0, 0});
+    v.ensure_same();
+
+    fill_device_linear(v, 100);
+    v.ensure_same();
+
+    AMREX_ALWAYS_ASSERT(v.host_const()[0] == 100 && v.host_const()[1] == 101 && v.host_const()[2] == 102);
+    verify_host_device_match(v);
+}
+
+void test_empty ()
+{
+    TVec v;
+    AMREX_ALWAYS_ASSERT(v.device_const().empty());
+
+    v.ensure_same();
+    AMREX_ALWAYS_ASSERT(v.device_const().empty());
+    AMREX_ALWAYS_ASSERT(v.status() == Status::up_to_date);
+
+    fill_device_linear(v, 100);
+    v.host_dirty();  // ignore that device had newer data
+    v.ensure_same();
+    AMREX_ALWAYS_ASSERT(v.device_const().empty());
+    AMREX_ALWAYS_ASSERT(v.status() == Status::up_to_date);
+}
+
+void test_aggregate ()
+{
+    struct S {
+        double x;
+        TVec a, b;
+    };
+
+    std::vector<int> i = {1, 2, 3};
+    std::vector<int> j = {4, 5, 6};
+
+    [[maybe_unused]] auto ptr1 = std::shared_ptr<S>(new S{42.0, i, j});  // NOLINT(modernize-make-shared)
+}
+
+void test_copy_constructor ()
+{
+    TVec a;
+    a.host().assign({1, 2, 3});
+    a.ensure_same();
+
+    TVec b(a);  // copy construct
+
+    // Both have same data
+    AMREX_ALWAYS_ASSERT(b.host_const().size() == 3U);
+    AMREX_ALWAYS_ASSERT(b.host_const()[0] == 1 && b.host_const()[1] == 2 && b.host_const()[2] == 3);
+    AMREX_ALWAYS_ASSERT(b.status() == a.status());
+    verify_host_device_match(b);
+
+    // Modifying b doesn't affect a
+    b.host()[0] = 99;
+    AMREX_ALWAYS_ASSERT(a.host_const()[0] == 1);
+}
+
+void test_move_constructor ()
+{
+    TVec a;
+    a.host().assign({4, 5, 6});
+    a.ensure_same();
+
+    TVec b(std::move(a));  // move construct
+
+    // b has the data
+    AMREX_ALWAYS_ASSERT(b.host_const().size() == 3U);
+    AMREX_ALWAYS_ASSERT(b.host_const()[0] == 4 && b.host_const()[1] == 5 && b.host_const()[2] == 6);
+    AMREX_ALWAYS_ASSERT(b.status() == Status::up_to_date);
+    verify_host_device_match(b);
+
+    // a is valid but empty (got fresh shared_ptrs via swap)
+    // NOLINTBEGIN(bugprone-use-after-move,clang-analyzer-cplusplus.Move)
+    AMREX_ALWAYS_ASSERT(a.host_const().empty());
+    AMREX_ALWAYS_ASSERT(a.status() == Status::up_to_date);
+    AMREX_ALWAYS_ASSERT(a.device_const().empty());
+
+    // Modifying b must not affect a's status or device
+    auto a_status_before = a.status();
+    auto a_device_size_before = a.device_const().size();
+    b.host()[0] = 99;
+    b.ensure_same();
+    AMREX_ALWAYS_ASSERT(a.status() == a_status_before);
+    AMREX_ALWAYS_ASSERT(a.device_const().size() == a_device_size_before);
+
+    // a can be reused
+    a.host().assign({10, 20});
+    a.ensure_same();
+    verify_host_device_match(a);
+    AMREX_ALWAYS_ASSERT(a.host_const()[0] == 10);
+    // NOLINTEND(bugprone-use-after-move,clang-analyzer-cplusplus.Move)
+}
+
+void test_copy_assignment ()
+{
+    TVec a;
+    a.host().assign({7, 8, 9});
+    a.ensure_same();
+
+    TVec b;
+    b.host().assign({0});
+    b.ensure_same();
+
+    b = a;  // copy assign
+
+    AMREX_ALWAYS_ASSERT(b.host_const().size() == 3U);
+    AMREX_ALWAYS_ASSERT(b.host_const()[0] == 7 && b.host_const()[1] == 8 && b.host_const()[2] == 9);
+    AMREX_ALWAYS_ASSERT(b.status() == a.status());
+    verify_host_device_match(b);
+
+    // Modifying b doesn't affect a
+    b.host()[0] = 99;
+    AMREX_ALWAYS_ASSERT(a.host_const()[0] == 7);
+}
+
+void test_move_assignment ()
+{
+    TVec a;
+    a.host().assign({11, 12, 13});
+    a.ensure_same();
+
+    TVec b;
+    b.host().assign({0});
+    b.ensure_same();
+
+    b = std::move(a);  // move assign
+
+    // b has a's original data
+    AMREX_ALWAYS_ASSERT(b.host_const().size() == 3U);
+    AMREX_ALWAYS_ASSERT(b.host_const()[0] == 11 && b.host_const()[1] == 12 && b.host_const()[2] == 13);
+    AMREX_ALWAYS_ASSERT(b.status() == Status::up_to_date);
+    verify_host_device_match(b);
+
+    // a is valid (has b's old data via swap)
+    // NOLINTBEGIN(bugprone-use-after-move,clang-analyzer-cplusplus.Move)
+    AMREX_ALWAYS_ASSERT(a.host_const().size() == 1U);
+    AMREX_ALWAYS_ASSERT(a.host_const()[0] == 0);
+    AMREX_ALWAYS_ASSERT(a.status() == Status::up_to_date);
+    verify_host_device_match(a);
+
+    // Modifying b must not affect a's status or device
+    auto a_status_before = a.status();
+    auto a_device_size_before = a.device_const().size();
+    b.host()[0] = 99;
+    b.ensure_same();
+    AMREX_ALWAYS_ASSERT(a.status() == a_status_before);
+    AMREX_ALWAYS_ASSERT(a.device_const().size() == a_device_size_before);
+    AMREX_ALWAYS_ASSERT(a.host_const()[0] == 0);  // a's data unchanged
+
+    // a can be reused
+    a.host().assign({30, 40});
+    a.ensure_same();
+    verify_host_device_match(a);
+    // NOLINTEND(bugprone-use-after-move,clang-analyzer-cplusplus.Move)
+}
+
+void run_tests_before_finalize ()
+{
+    test_dirty_semantics();
+    test_release_gpu();
+    test_d2h();
+    test_empty();
+    test_aggregate();
+    test_copy_constructor();
+    test_move_constructor();
+    test_copy_assignment();
+    test_move_assignment();
+}
+
+} // namespace
+
+int main (int argc, char* argv[])
+{
+    TVec cross_session;
+    cross_session.host() = {7, 8, 9};
+
+#ifdef AMREX_USE_MPI
+    MPI_Init(&argc, &argv);
+#endif
+
+    amrex::Initialize(argc, argv);
+    {
+        run_tests_before_finalize();
+
+        cross_session.to_device();
+        verify_host_device_match(cross_session);
+        AMREX_ALWAYS_ASSERT(cross_session.host_const()[0] == 7);
+    }
+    amrex::Finalize();  // calls implicitly: cross_session.release_gpu();
+
+#ifdef AMREX_USE_GPU
+    AMREX_ALWAYS_ASSERT(cross_session.status() == Status::host_dirty);
+#endif
+
+    amrex::Initialize(argc, argv);
+    {
+        AMREX_ALWAYS_ASSERT(cross_session.host_const().size() == 3U);
+        AMREX_ALWAYS_ASSERT(cross_session.host_const()[0] == 7 && cross_session.host_const()[1] == 8 &&
+                            cross_session.host_const()[2] == 9);
+#ifdef AMREX_USE_GPU
+        AMREX_ALWAYS_ASSERT(cross_session.device_const().empty());
+#else
+        AMREX_ALWAYS_ASSERT(cross_session.device_const().size() == 3U);
+#endif
+
+        cross_session.host()[1] = 99;
+        cross_session.to_device();  // re-init device data in new AMReX session
+        verify_host_device_match(cross_session);
+        AMREX_ALWAYS_ASSERT(cross_session.host_const()[1] == 99);
+    }
+    amrex::Finalize();
+
+#ifdef AMREX_USE_MPI
+    MPI_Finalize();
+#endif
+
+    amrex::Print() << "TrackedVector tests passed.\n";
+    return 0;
+}
diff --git a/Tests/CMakeLists.txt b/Tests/CMakeLists.txt
index 975b575c95..c92e115f63 100644
--- a/Tests/CMakeLists.txt
+++ b/Tests/CMakeLists.txt
@@ -99,6 +99,8 @@ endfunction ()
 
 if (AMReX_TEST_TYPE STREQUAL "Small")
 
+   add_subdirectory("Base/TrackedVector")
+
    add_subdirectory("Amr/Advection_AmrCore")
 
    if (AMReX_OMP)
@@ -125,7 +127,7 @@ else()
    #
    # List of subdirectories to search for CMakeLists.
    #
-   set( AMREX_TESTS_SUBDIRS Amr ArrayND AsyncOut CallNoinline CLZ CommType CTOParFor DeviceGlobal
+   set( AMREX_TESTS_SUBDIRS Amr ArrayND AsyncOut Base CallNoinline CLZ CommType CTOParFor DeviceGlobal
                             Enum HeatEquation MultiBlock MultiPeriod ParmParse Parser Parser2
                             ParserUserFn Reducer ReduceToPlanePatchy Reinit RoundoffDomain SIMD
                             SmallMatrix SumBoundary TOML)