diff --git a/Docs/sphinx_documentation/source/GPU.rst b/Docs/sphinx_documentation/source/GPU.rst index e9dc53fa7c..789a731ad8 100644 --- a/Docs/sphinx_documentation/source/GPU.rst +++ b/Docs/sphinx_documentation/source/GPU.rst @@ -753,6 +753,144 @@ constructed inside of an MFIter loop with GPU kernels and great care should be used when accessing :cpp:`Gpu::ManagedVector` data on GPUs to avoid race conditions. +.. _sec:gpu:buffer_tracked_managed: + +Gpu::Buffer, Gpu::TrackedVector, and Gpu::ManagedVector +------------------------------------------------------- + +:cpp:`Gpu::Buffer` (``AMReX_GpuBuffer.H``) and :cpp:`Gpu::TrackedVector` +(``AMReX_TrackedVector.H``) pair a host allocation with a device mirror. + +:cpp:`Gpu::Buffer` uses :cpp:`Gpu::PinnedVector` on the host and +``copyToDeviceAsync()`` / ``copyToHost()`` for transfers. Use it for +**frequent, performance-oriented** async copies during a normal GPU run. + +:cpp:`Gpu::TrackedVector` exposes a host ``std::vector`` via ``host()`` / +``host_const()`` and a device :cpp:`Gpu::DeviceVector` via ``device()`` / +``device_const()`` (GPU builds only). Writable accessors mark the mirror +out-of-date; ``to_device()``/``to_host()`` perform **synchronous** copies when +needed. You may populate the host **before** :cpp:`amrex::Initialize()`. Device +memory is only valid while AMReX is initialized. On :cpp:`amrex::Finalize()`, +AMReX clears device storage via ``release_gpu()`` and leaves the host copy for +reuse, which supports **Python / pyAMReX** and other workflows +that cross multiple AMReX initialize/finalize cycles. Use read-only +``host_const()`` / ``device_const()`` when you are not writing, so the object +does not flip to a dirty state unnecessarily. + +:cpp:`Gpu::ManagedVector` is the arena-backed unified-memory vector introduced +under **Gpu Vectors** above (``The_Managed_Arena()``). Like :cpp:`Gpu::Buffer` +it can only be used while AMReX is initialized / a GPU device context exists. + +.. _tab:gpu:buffer_tracked_managed: + +.. list-table:: + :widths: 12 28 28 28 + :header-rows: 1 + + * - + - :cpp:`Gpu::Buffer` + - :cpp:`Gpu::ManagedVector` + - :cpp:`Gpu::TrackedVector` + * - **Lifetime** + - Only between ``amrex::Initialize/Finalize()`` + - Only between ``amrex::Initialize/Finalize()`` + - Anytime and cross-session, GPU part only between ``amrex::Initialize/Finalize()`` + * - **Usage** + - ``operator[]`` etc. , explicit ``copyToDeviceAsync`` / + ``copyToHost`` + - Single ``data()`` like :cpp:`amrex::Vector` + - Separate ``host()`` / ``device()`` (and ``*_const``) + * - **Synchronization** + - explicit + - implicit + - explicit, but tracks status + * - **Performance** + - Best: pinned host enables asynchronous transfers + - Implicit memory migration can add latency + - Synchronous copy adds latency + * - **Best for** + - hot copy loops inside a run + - maximum simplicity + - interactive and cross-AMReX session usage, e.g., in pyAMReX for user inputs that do not change often + + +A minimal :cpp:`Gpu::Buffer` pattern (host fill, async upload, kernel pointer): + +.. highlight:: c++ + +:: + + amrex::Initialize(argc, argv); + + Gpu::Buffer buf(n); + for (int i = 0; i < n; ++i) { buf[i] = i; } + + int* dp = buf.copyToDeviceAsync(); + // launch kernels using dp, then optionally: + buf.copyToHost(); + +:cpp:`Gpu::ManagedVector` example (unified memory, accessible from both host and device): + +.. highlight:: c++ + +:: + + amrex::Initialize(argc, argv); + + Gpu::ManagedVector mv(n); + for (int i = 0; i < n; ++i) { mv[i] = i; } + + int* dp = mv.data(); + amrex::ParallelFor(n, [=] AMREX_GPU_DEVICE (int i) { + dp[i] *= 2; // access on device + }); + + Gpu::streamSynchronize(); + // mv[i] now accessible on host with updated values + +:cpp:`Gpu::TrackedVector` example: +On GPU builds, you can create this type at any time, even before `amrex::Initialize()`. +``amrex::Finalize()`` releases device storage for the vector but +keeps the host ``std::vector``, so a later ``Initialize()`` can call +``to_device()`` again to rebuild the device copy. + +.. highlight:: c++ + +:: + + // Host data before AMReX init; GPU available after Initialize(). + amrex::Gpu::TrackedVector cross_session; + cross_session.host() = {7, 8, 9}; + + // ... a lot of other interactive user code, e.g., to set up + // complex input data, optimization libraries or ML frameworks + // in multi-simulation workflows ... + + amrex::Initialize(argc, argv); + { + cross_session.to_device(); + // Host and device match; use host_const() / device_const() for reads. + } + amrex::Finalize(); + + // cross_session.device() is not available now and will throw, + // but you can keep using cross_session.host() / .host_const() + + amrex::Initialize(argc, argv); + { + cross_session.to_device(); + // Device buffer is re-created; kernels may read via + // device_const().data() or write via device().data() + } + amrex::Finalize(); + +Optional: Call ``release_gpu()`` when you need to free device memory while +keeping the host ``std::vector`` for later (unless already released, +``amrex::Finalize()`` clears device storage registered for the object). + +Generally, after device kernels, call :cpp:`Gpu::streamSynchronize()` +(or equivalent ordering) before relying on host data, as for any other device work. + MultiFab Reductions ------------------- diff --git a/Src/Base/AMReX_TrackedVector.H b/Src/Base/AMReX_TrackedVector.H new file mode 100644 index 0000000000..90094c5227 --- /dev/null +++ b/Src/Base/AMReX_TrackedVector.H @@ -0,0 +1,301 @@ +#ifndef AMREX_TRACKED_VECTOR_H +#define AMREX_TRACKED_VECTOR_H + +#include + +#include +#include + +#include +#include +#include +#include + + +namespace amrex::Gpu +{ + /** Base for element data with dynamic host vectors mirrored lazily to GPU. + * + * Provides lifetimes independent of AMReX initialize/finalize cycles, + * synchronization tracking, and data release APIs. + * + * This object is primarily for input handling, allowing to initialize + * data even before AMReX was initialized and enabling workflows crossing + * AMReX init/finalize cycles. GPU memory will always be bound to an + * AMReX session, but CPU memory can be allocated and live arbitrarily long. + * + * For AMReX CPU builds, the host and device members point to the *same* memory + * and the status will always be up to date. + * + * Usage contract: + * - Device data can only be allocated after AMReX initialize and before finalize. + * - You can call `release_gpu()` anytime, but we will call it during AMReX finalize + * to invalidate device(). + * - Always access data via host/device[_const](). Do not cache references/pointers to + * host/device memory managed by this object, or you run the risk of stale memory access. + */ + template + struct TrackedVector + { + static_assert(std::is_trivially_copyable(), "TrackedVector can only hold trivially copyable types"); + using value_type = T; + using size_type = std::size_t; + + private: + void register_finalize () { +#ifdef AMREX_USE_GPU + std::weak_ptr> weak_device = m_device; + std::weak_ptr weak_status = m_status; + amrex::ExecOnFinalize([weak_device, weak_status]() { + // see: release_gpu() + if (auto self = weak_device.lock()) { + self->clear(); + self->shrink_to_fit(); + } + if (auto self = weak_status.lock()) { + *self = Status::host_dirty; + } + }); +#endif + } + public: + + constexpr TrackedVector () { + register_finalize(); + } + + explicit TrackedVector (size_type a_size) + : m_host(a_size) + { + register_finalize(); + host_dirty(); + } + + TrackedVector (size_type a_size, value_type const & a_value) + : m_host(a_size, a_value) + { + register_finalize(); + host_dirty(); + } + + TrackedVector (std::initializer_list a_initializer_list) + : m_host(a_initializer_list) + { + register_finalize(); + host_dirty(); + } + + TrackedVector (std::vector a_vector) + : m_host(std::move(a_vector)) + { + register_finalize(); + host_dirty(); + } + + TrackedVector (TrackedVector const & a_vector) + : m_host(a_vector.m_host) + { + *m_status = *a_vector.m_status; +#ifdef AMREX_USE_GPU + *m_device = *a_vector.m_device; +#endif + + register_finalize(); + } + + /** Swap the empty data of this and a_vector */ + TrackedVector (TrackedVector && a_vector) noexcept + { + std::swap(m_status, a_vector.m_status); + std::swap(m_host, a_vector.m_host); +#ifdef AMREX_USE_GPU + std::swap(m_device, a_vector.m_device); +#endif + + // We inherit a_vector's original registration. + // But we need to register a_vector's new data now: + // the shared ptrs that we owned briefly up to here. + // That way, a_vector can be either reused/assigned values + // or safely destructed. + a_vector.register_finalize(); + } + + TrackedVector& operator= (TrackedVector const & a_vector) { + if (this != &a_vector) { + *m_status = *a_vector.m_status; + m_host = a_vector.m_host; +#ifdef AMREX_USE_GPU + *m_device = *a_vector.m_device; +#endif + } + return *this; + } + + /** Swap the data of this and a_vector */ + TrackedVector& operator= (TrackedVector && a_vector) noexcept { + if (this != &a_vector) { + std::swap(m_host, a_vector.m_host); + std::swap(m_status, a_vector.m_status); +#ifdef AMREX_USE_GPU + std::swap(m_device, a_vector.m_device); +#endif + } + return *this; + } + + ~TrackedVector () = default; + + enum class Status { + up_to_date, // & + host () { + host_dirty(); + return m_host; + } + + /** Return read-only host data */ + [[nodiscard]] std::vector const & + host_const () const { return m_host; } + +#ifdef AMREX_USE_GPU + /** Return writable device data, mark device as dirty */ + [[nodiscard]] amrex::Gpu::DeviceVector & + device () { + if (!amrex::Initialized()) { + throw std::runtime_error("TrackedVector::device() called before AMReX initialize/after AMReX finalize"); + } + device_dirty(); + return *m_device; + } + + /** Return read-only device data */ + [[nodiscard]] amrex::Gpu::DeviceVector const & + device_const () const { + if (!amrex::Initialized()) { + throw std::runtime_error("TrackedVector::device() called before AMReX initialize/after AMReX finalize"); + } + return *m_device; + } +#else + /** Return writable device (==host) data, up to date by definition */ + [[nodiscard]] std::vector & + device () { return m_host; } + + /** Return read-only device (==host) data */ + [[nodiscard]] std::vector const & + device_const () const { return m_host; } +#endif + + /** Release GPU memory + * + * Host data preserved for reuse until destructor. + * This enables use outside of and across AMReX init/finalize cycles. + */ + void release_gpu () + { +#ifdef AMREX_USE_GPU + device().clear(); + device().shrink_to_fit(); + *m_status = Status::host_dirty; +#endif + } + + /** Mark host data as modified; next sync will copy to GPU. */ + void host_dirty () { +#ifdef AMREX_USE_GPU + *m_status = Status::host_dirty; +#endif + } + + /** Mark device data as modified; next sync will copy to CPU. */ + void device_dirty () { +#ifdef AMREX_USE_GPU + *m_status = Status::device_dirty; +#endif + } + + /** Conditionally synchronize host data to device + * + * If either the host or device side are dirty, this will + * copy the host data over the device data. + * + * @param[in] force If true, force a copy even if the status is up to date. + */ + void to_device (bool force=false) { +#ifdef AMREX_USE_GPU + if (status() != Status::up_to_date || force) { + if (!amrex::Initialized()) { + throw std::runtime_error("TrackedVector::to_device() called outside of AMReX initialize/finalize"); + } + auto const size = m_host.size(); + if (size > 0U) { + m_device->resize(size); + amrex::Gpu::copy(amrex::Gpu::hostToDevice, + m_host.begin(), m_host.end(), m_device->begin()); + } else { + m_device->clear(); + } + } + *m_status = Status::up_to_date; +#else + amrex::ignore_unused(force); +#endif + } + + /** Conditionally synchronize device data to host + * + * If either the host or device side are dirty, this will + * copy the device data over the host data. + * + * @param[in] force If true, force a copy even if the status is up to date. + */ + void to_host (bool force=false) { +#ifdef AMREX_USE_GPU + if (status() != Status::up_to_date || force) { + if (!amrex::Initialized()) { + throw std::runtime_error("TrackedVector::to_host() called outside of AMReX initialize/finalize"); + } + m_host.resize(m_device->size()); + amrex::Gpu::copy(amrex::Gpu::deviceToHost, + m_device->begin(), m_device->end(), m_host.begin()); + } + *m_status = Status::up_to_date; +#else + amrex::ignore_unused(force); +#endif + } + + /** Ensure equivalent host-device data (sync copy) + * + * This performs a conditional synchronous copy from whatever + * host()/device() side was accessed last. + */ + void ensure_same () + { +#ifdef AMREX_USE_GPU + if (status() == Status::host_dirty) { + to_device(false); + } else if (status() == Status::device_dirty) { + to_host(false); + } +#endif + } + + private: + std::shared_ptr m_status = std::make_shared(); + std::vector m_host; +#ifdef AMREX_USE_GPU + std::shared_ptr> m_device = std::make_shared>(); +#endif + }; + +} + +#endif diff --git a/Src/Base/CMakeLists.txt b/Src/Base/CMakeLists.txt index 04915b2161..4c5625ec3f 100644 --- a/Src/Base/CMakeLists.txt +++ b/Src/Base/CMakeLists.txt @@ -241,6 +241,7 @@ foreach(D IN LISTS AMReX_SPACEDIM) AMReX_GpuReduce.H AMReX_GpuAllocators.H AMReX_GpuContainers.H + AMReX_TrackedVector.H AMReX_MFParallelFor.H AMReX_MFParallelForC.H AMReX_MFParallelForG.H diff --git a/Src/Base/Make.package b/Src/Base/Make.package index d22a6cc824..680357556e 100644 --- a/Src/Base/Make.package +++ b/Src/Base/Make.package @@ -107,7 +107,9 @@ C$(AMREX_BASE)_sources += AMReX_GpuElixir.cpp C$(AMREX_BASE)_headers += AMReX_GpuReduce.H -C$(AMREX_BASE)_headers += AMReX_CudaGraph.H AMReX_GpuContainers.H +C$(AMREX_BASE)_headers += AMReX_CudaGraph.H +C$(AMREX_BASE)_headers += AMReX_GpuContainers.H +C$(AMREX_BASE)_headers += AMReX_TrackedVector.H C$(AMREX_BASE)_headers += AMReX_GpuAllocators.H diff --git a/Tests/Base/TrackedVector/CMakeLists.txt b/Tests/Base/TrackedVector/CMakeLists.txt new file mode 100644 index 0000000000..224c4563c8 --- /dev/null +++ b/Tests/Base/TrackedVector/CMakeLists.txt @@ -0,0 +1,9 @@ +foreach(D IN LISTS AMReX_SPACEDIM) + set(_sources main.cpp) + set(_input_files) + + setup_test(${D} _sources _input_files) + + unset(_sources) + unset(_input_files) +endforeach() diff --git a/Tests/Base/TrackedVector/main.cpp b/Tests/Base/TrackedVector/main.cpp new file mode 100644 index 0000000000..9b5b1695ba --- /dev/null +++ b/Tests/Base/TrackedVector/main.cpp @@ -0,0 +1,335 @@ +#include +#include + +#include +#include + +using namespace amrex; + +using TVec = Gpu::TrackedVector; +using Status = TVec::Status; + +namespace { + +void verify_host_device_match (TVec const & v) +{ + AMREX_ALWAYS_ASSERT(v.status() == Status::up_to_date); + + const auto n = v.host_const().size(); + AMREX_ALWAYS_ASSERT(v.device_const().size() == n); + if (n == 0) { return; } + std::vector tmp(n); + Gpu::copy(Gpu::deviceToHost, v.device_const().begin(), v.device_const().end(), tmp.begin()); + for (std::size_t i = 0; i < n; ++i) { + AMREX_ALWAYS_ASSERT(tmp[i] == v.host_const()[i]); + } +} + +void fill_device_linear (TVec& v, int base) +{ + const int n = static_cast(v.device().size()); + if (n == 0) { return; } + int* dp = v.device().data(); + ParallelFor(n, [=] AMREX_GPU_DEVICE (int i) noexcept { + dp[i] = base + i; + }); + Gpu::streamSynchronize(); +} + +void test_dirty_semantics () +{ + TVec v; + AMREX_ALWAYS_ASSERT(v.status() == Status::up_to_date); + + v.host_dirty(); +#ifdef AMREX_USE_GPU + AMREX_ALWAYS_ASSERT(v.status() == Status::host_dirty); +#else + AMREX_ALWAYS_ASSERT(v.status() == Status::up_to_date); +#endif + + v.device_dirty(); +#ifdef AMREX_USE_GPU + AMREX_ALWAYS_ASSERT(v.status() == Status::device_dirty); +#else + AMREX_ALWAYS_ASSERT(v.status() == Status::up_to_date); +#endif + + // potential write (host): must mark dirty + v.host().resize(3); + v.host().at(2) = 42; +#ifdef AMREX_USE_GPU + AMREX_ALWAYS_ASSERT(v.status() == Status::host_dirty); +#else + AMREX_ALWAYS_ASSERT(v.status() == Status::up_to_date); +#endif + AMREX_ALWAYS_ASSERT(v.host_const().size() == 3); + + v.to_device(); + AMREX_ALWAYS_ASSERT(v.status() == Status::up_to_date); + + // potential write (device): must mark dirty + [[maybe_unused]] auto* dp = v.device().data(); +#ifdef AMREX_USE_GPU + AMREX_ALWAYS_ASSERT(v.status() == Status::device_dirty); +#else + AMREX_ALWAYS_ASSERT(v.status() == Status::up_to_date); +#endif + + v.ensure_same(); + AMREX_ALWAYS_ASSERT(v.status() == Status::up_to_date); + + // read-only (host): must not mark dirty + auto first = v.host_const().at(2); + AMREX_ALWAYS_ASSERT(first == 42); + AMREX_ALWAYS_ASSERT(v.status() == Status::up_to_date); + + // read-only (device): must not mark dirty + [[maybe_unused]] auto const* dcp = v.device_const().data(); + AMREX_ALWAYS_ASSERT(v.status() == Status::up_to_date); +} + +void test_release_gpu () +{ + TVec v; + v.host().assign({1, 2, 3}); + v.ensure_same(); + verify_host_device_match(v); + + v.release_gpu(); +#ifdef AMREX_USE_GPU + AMREX_ALWAYS_ASSERT(v.device_const().empty()); + AMREX_ALWAYS_ASSERT(v.status() == Status::host_dirty); +#else + AMREX_ALWAYS_ASSERT(v.device_const().size() == 3U); + AMREX_ALWAYS_ASSERT(v.status() == Status::up_to_date); +#endif + AMREX_ALWAYS_ASSERT(v.host_const().size() == 3U); + + v.ensure_same(); + verify_host_device_match(v); +} + +void test_d2h () +{ + TVec v; + v.host().assign({0, 0, 0}); + v.ensure_same(); + + fill_device_linear(v, 100); + v.ensure_same(); + + AMREX_ALWAYS_ASSERT(v.host_const()[0] == 100 && v.host_const()[1] == 101 && v.host_const()[2] == 102); + verify_host_device_match(v); +} + +void test_empty () +{ + TVec v; + AMREX_ALWAYS_ASSERT(v.device_const().empty()); + + v.ensure_same(); + AMREX_ALWAYS_ASSERT(v.device_const().empty()); + AMREX_ALWAYS_ASSERT(v.status() == Status::up_to_date); + + fill_device_linear(v, 100); + v.host_dirty(); // ignore that device had newer data + v.ensure_same(); + AMREX_ALWAYS_ASSERT(v.device_const().empty()); + AMREX_ALWAYS_ASSERT(v.status() == Status::up_to_date); +} + +void test_aggregate () +{ + struct S { + double x; + TVec a, b; + }; + + std::vector i = {1, 2, 3}; + std::vector j = {4, 5, 6}; + + [[maybe_unused]] auto ptr1 = std::shared_ptr(new S{42.0, i, j}); // NOLINT(modernize-make-shared) +} + +void test_copy_constructor () +{ + TVec a; + a.host().assign({1, 2, 3}); + a.ensure_same(); + + TVec b(a); // copy construct + + // Both have same data + AMREX_ALWAYS_ASSERT(b.host_const().size() == 3U); + AMREX_ALWAYS_ASSERT(b.host_const()[0] == 1 && b.host_const()[1] == 2 && b.host_const()[2] == 3); + AMREX_ALWAYS_ASSERT(b.status() == a.status()); + verify_host_device_match(b); + + // Modifying b doesn't affect a + b.host()[0] = 99; + AMREX_ALWAYS_ASSERT(a.host_const()[0] == 1); +} + +void test_move_constructor () +{ + TVec a; + a.host().assign({4, 5, 6}); + a.ensure_same(); + + TVec b(std::move(a)); // move construct + + // b has the data + AMREX_ALWAYS_ASSERT(b.host_const().size() == 3U); + AMREX_ALWAYS_ASSERT(b.host_const()[0] == 4 && b.host_const()[1] == 5 && b.host_const()[2] == 6); + AMREX_ALWAYS_ASSERT(b.status() == Status::up_to_date); + verify_host_device_match(b); + + // a is valid but empty (got fresh shared_ptrs via swap) + // NOLINTBEGIN(bugprone-use-after-move,clang-analyzer-cplusplus.Move) + AMREX_ALWAYS_ASSERT(a.host_const().empty()); + AMREX_ALWAYS_ASSERT(a.status() == Status::up_to_date); + AMREX_ALWAYS_ASSERT(a.device_const().empty()); + + // Modifying b must not affect a's status or device + auto a_status_before = a.status(); + auto a_device_size_before = a.device_const().size(); + b.host()[0] = 99; + b.ensure_same(); + AMREX_ALWAYS_ASSERT(a.status() == a_status_before); + AMREX_ALWAYS_ASSERT(a.device_const().size() == a_device_size_before); + + // a can be reused + a.host().assign({10, 20}); + a.ensure_same(); + verify_host_device_match(a); + AMREX_ALWAYS_ASSERT(a.host_const()[0] == 10); + // NOLINTEND(bugprone-use-after-move,clang-analyzer-cplusplus.Move) +} + +void test_copy_assignment () +{ + TVec a; + a.host().assign({7, 8, 9}); + a.ensure_same(); + + TVec b; + b.host().assign({0}); + b.ensure_same(); + + b = a; // copy assign + + AMREX_ALWAYS_ASSERT(b.host_const().size() == 3U); + AMREX_ALWAYS_ASSERT(b.host_const()[0] == 7 && b.host_const()[1] == 8 && b.host_const()[2] == 9); + AMREX_ALWAYS_ASSERT(b.status() == a.status()); + verify_host_device_match(b); + + // Modifying b doesn't affect a + b.host()[0] = 99; + AMREX_ALWAYS_ASSERT(a.host_const()[0] == 7); +} + +void test_move_assignment () +{ + TVec a; + a.host().assign({11, 12, 13}); + a.ensure_same(); + + TVec b; + b.host().assign({0}); + b.ensure_same(); + + b = std::move(a); // move assign + + // b has a's original data + AMREX_ALWAYS_ASSERT(b.host_const().size() == 3U); + AMREX_ALWAYS_ASSERT(b.host_const()[0] == 11 && b.host_const()[1] == 12 && b.host_const()[2] == 13); + AMREX_ALWAYS_ASSERT(b.status() == Status::up_to_date); + verify_host_device_match(b); + + // a is valid (has b's old data via swap) + // NOLINTBEGIN(bugprone-use-after-move,clang-analyzer-cplusplus.Move) + AMREX_ALWAYS_ASSERT(a.host_const().size() == 1U); + AMREX_ALWAYS_ASSERT(a.host_const()[0] == 0); + AMREX_ALWAYS_ASSERT(a.status() == Status::up_to_date); + verify_host_device_match(a); + + // Modifying b must not affect a's status or device + auto a_status_before = a.status(); + auto a_device_size_before = a.device_const().size(); + b.host()[0] = 99; + b.ensure_same(); + AMREX_ALWAYS_ASSERT(a.status() == a_status_before); + AMREX_ALWAYS_ASSERT(a.device_const().size() == a_device_size_before); + AMREX_ALWAYS_ASSERT(a.host_const()[0] == 0); // a's data unchanged + + // a can be reused + a.host().assign({30, 40}); + a.ensure_same(); + verify_host_device_match(a); + // NOLINTEND(bugprone-use-after-move,clang-analyzer-cplusplus.Move) +} + +void run_tests_before_finalize () +{ + test_dirty_semantics(); + test_release_gpu(); + test_d2h(); + test_empty(); + test_aggregate(); + test_copy_constructor(); + test_move_constructor(); + test_copy_assignment(); + test_move_assignment(); +} + +} // namespace + +int main (int argc, char* argv[]) +{ + TVec cross_session; + cross_session.host() = {7, 8, 9}; + +#ifdef AMREX_USE_MPI + MPI_Init(&argc, &argv); +#endif + + amrex::Initialize(argc, argv); + { + run_tests_before_finalize(); + + cross_session.to_device(); + verify_host_device_match(cross_session); + AMREX_ALWAYS_ASSERT(cross_session.host_const()[0] == 7); + } + amrex::Finalize(); // calls implicitly: cross_session.release_gpu(); + +#ifdef AMREX_USE_GPU + AMREX_ALWAYS_ASSERT(cross_session.status() == Status::host_dirty); +#endif + + amrex::Initialize(argc, argv); + { + AMREX_ALWAYS_ASSERT(cross_session.host_const().size() == 3U); + AMREX_ALWAYS_ASSERT(cross_session.host_const()[0] == 7 && cross_session.host_const()[1] == 8 && + cross_session.host_const()[2] == 9); +#ifdef AMREX_USE_GPU + AMREX_ALWAYS_ASSERT(cross_session.device_const().empty()); +#else + AMREX_ALWAYS_ASSERT(cross_session.device_const().size() == 3U); +#endif + + cross_session.host()[1] = 99; + cross_session.to_device(); // re-init device data in new AMReX session + verify_host_device_match(cross_session); + AMREX_ALWAYS_ASSERT(cross_session.host_const()[1] == 99); + } + amrex::Finalize(); + +#ifdef AMREX_USE_MPI + MPI_Finalize(); +#endif + + amrex::Print() << "TrackedVector tests passed.\n"; + return 0; +} diff --git a/Tests/CMakeLists.txt b/Tests/CMakeLists.txt index 975b575c95..c92e115f63 100644 --- a/Tests/CMakeLists.txt +++ b/Tests/CMakeLists.txt @@ -99,6 +99,8 @@ endfunction () if (AMReX_TEST_TYPE STREQUAL "Small") + add_subdirectory("Base/TrackedVector") + add_subdirectory("Amr/Advection_AmrCore") if (AMReX_OMP) @@ -125,7 +127,7 @@ else() # # List of subdirectories to search for CMakeLists. # - set( AMREX_TESTS_SUBDIRS Amr ArrayND AsyncOut CallNoinline CLZ CommType CTOParFor DeviceGlobal + set( AMREX_TESTS_SUBDIRS Amr ArrayND AsyncOut Base CallNoinline CLZ CommType CTOParFor DeviceGlobal Enum HeatEquation MultiBlock MultiPeriod ParmParse Parser Parser2 ParserUserFn Reducer ReduceToPlanePatchy Reinit RoundoffDomain SIMD SmallMatrix SumBoundary TOML)