diff --git a/python/pyabacus/pyproject.toml b/python/pyabacus/pyproject.toml index 6b28b35d71..634cb462ef 100644 --- a/python/pyabacus/pyproject.toml +++ b/python/pyabacus/pyproject.toml @@ -1,5 +1,5 @@ [build-system] -requires = ["scikit-build-core>=0.3.3", "pybind11>=2.10.0"] +requires = ["scikit-build-core<0.10", "pybind11>=2.10.0"] build-backend = "scikit_build_core.build" diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt index 6f127a1722..16a7c26aa7 100644 --- a/source/CMakeLists.txt +++ b/source/CMakeLists.txt @@ -46,6 +46,8 @@ list(APPEND device_srcs source_base/module_device/device_helpers.cpp source_base/module_device/output_device.cpp source_base/module_device/memory_op.cpp + source_base/module_device/memory_op_dsp.cpp + source_base/module_device/dsp_selector.cpp source_base/kernels/math_kernel_op.cpp source_base/kernels/math_kernel_op_vec.cpp diff --git a/source/Makefile.Objects b/source/Makefile.Objects index 9ad0ebe509..465151d758 100644 --- a/source/Makefile.Objects +++ b/source/Makefile.Objects @@ -175,11 +175,14 @@ OBJS_BASE=abfs-vector3_order.o\ pulay_mixing.o\ broyden_mixing.o\ memory_op.o\ + memory_op_dsp.o\ + dsp_selector.o\ device.o\ device_helpers.o\ output_device.o\ parallel_2d.o\ + OBJS_CELL=atom_pseudo.o\ atom_spec.o\ pseudo.o\ @@ -515,6 +518,7 @@ OBJS_XC=xc_functional.o\ exx_info.o\ OBJS_IO=module_parameter/input_conv.o\ + module_parameter/dsp_config.o\ module_unk/berryphase.o\ module_bessel/bessel_basis.o\ cal_test.o\ @@ -592,6 +596,7 @@ OBJS_IO=module_parameter/input_conv.o\ filename.o\ ucell_io.o\ + OBJS_IO_LCAO=cal_r_overlap_R.o\ write_orb_info.o\ write_dos_lcao.o\ diff --git a/source/source_base/module_device/CMakeLists.txt b/source/source_base/module_device/CMakeLists.txt deleted file mode 100644 index 0aede5e57b..0000000000 --- a/source/source_base/module_device/CMakeLists.txt +++ /dev/null @@ -1,44 +0,0 @@ -# list(APPEND module_device_srcs -# memory_op.cpp -# device.cpp -# ) - -# if(USE_CUDA) -# list(APPEND module_device_srcs -# cuda/memory_op.cu -# ) -# endif() - - -# if(USE_ROCM) -# hip_add_library(module_device_rocm STATIC -# rocm/memory_op.hip.cu -# ) -# target_link_libraries( -# device -# module_device_rocm -# hip::host -# hip::device -# hip::hipfft -# roc::hipblas -# roc::hipsolver -# ) -# endif() - -# add_library(device OBJECT ${device_srcs}) - -# if(USE_CUDA) -# target_link_libraries( -# device -# ) -# elseif(USE_ROCM) -# target_link_libraries( -# device -# device_rocm -# hip::host -# hip::device -# hip::hipfft -# roc::hipblas -# roc::hipsolver -# ) -# endif() \ No newline at end of file diff --git a/source/source_base/module_device/dsp_selector.cpp b/source/source_base/module_device/dsp_selector.cpp new file mode 100644 index 0000000000..bdc5e761d7 --- /dev/null +++ b/source/source_base/module_device/dsp_selector.cpp @@ -0,0 +1,64 @@ +#include "dsp_selector.h" +#include +#include + +#ifdef __DSP + +namespace base_device +{ +namespace memory +{ + +// Global selector instance +std::unique_ptr dsp_selector = nullptr; + +// Get current DSP selector +DspSelector* get_dsp_selector() +{ + if (!dsp_selector) + { + throw std::runtime_error( + "ModuleBase::memory::get_dsp_selector: " + "DSP selector not initialized. Call init_dsp_selector first." + ); + } + return dsp_selector.get(); +} + +// Default DSP selector implementation +class DefaultDspSelector : public DspSelector +{ +private: + int rank_ = 0; + +public: + int get_rank() const override + { + return rank_; + } + + void set_rank(const int rank) override + { + if (rank < 0) + { + throw std::runtime_error( + "ModuleBase::memory::DspSelector: " + "DSP rank must be non-negative" + ); + } + rank_ = rank; + } +}; + + +// Create default DSP selector and set it as global +void create_default_selector(const int rank) +{ + dsp_selector = std::unique_ptr(new DefaultDspSelector()); + dsp_selector->set_rank(rank); +} + +} // namespace memory +} // namespace base_device + +#endif diff --git a/source/source_base/module_device/dsp_selector.h b/source/source_base/module_device/dsp_selector.h new file mode 100644 index 0000000000..0ef1d0c86b --- /dev/null +++ b/source/source_base/module_device/dsp_selector.h @@ -0,0 +1,35 @@ +#ifndef MODULE_DEVICE_DSP_SELECTOR_H_ +#define MODULE_DEVICE_DSP_SELECTOR_H_ + +#ifdef __DSP + +#include + +namespace base_device { +namespace memory { + +// DSP selector interface +class DspSelector { +public: + virtual ~DspSelector() = default; + // Get DSP rank + virtual int get_rank() const = 0; + // Set DSP rank + virtual void set_rank(const int rank) = 0; +}; + +// Global selector instance +extern std::unique_ptr dsp_selector; + +// Get current DSP selector +DspSelector* get_dsp_selector(); + +// Create default DSP selector and set it as global +void create_default_selector(const int rank); + +} // namespace memory +} // namespace base_device + +#endif // end __DSP + +#endif // MODULE_DEVICE_DSP_SELECTOR_H_ diff --git a/source/source_base/module_device/memory_op.cpp b/source/source_base/module_device/memory_op.cpp index bff9234f64..7d8d47a7e2 100644 --- a/source/source_base/module_device/memory_op.cpp +++ b/source/source_base/module_device/memory_op.cpp @@ -2,11 +2,6 @@ #include "source_base/memory.h" #include "source_base/tool_threading.h" -#ifdef __DSP -#include "source_base/kernels/dsp/dsp_connector.h" -#include "source_base/global_variable.h" -#include "source_io/module_parameter/parameter.h" -#endif #include #include @@ -442,76 +437,7 @@ template struct delete_memory_op, base_device::DEVICE_GPU>; template struct delete_memory_op, base_device::DEVICE_GPU>; #endif -#ifdef __DSP - -template -struct resize_memory_op_mt -{ - void operator()(FPTYPE*& arr, const size_t size, const char* record_in) - { - if (arr != nullptr) - { - mtfunc::free_ht(arr); - } - arr = (FPTYPE*)mtfunc::malloc_ht(sizeof(FPTYPE) * size, GlobalV::MY_RANK % PARAM.inp.dsp_count); - std::string record_string; - if (record_in != nullptr) - { - record_string = record_in; - } - else - { - record_string = "no_record"; - } - - if (record_string != "no_record") - { - ModuleBase::Memory::record(record_string, sizeof(FPTYPE) * size); - } - } -}; - -template -struct set_memory_op_mt -{ - void operator()(FPTYPE* arr, const int var, const size_t size) - { - ModuleBase::OMP_PARALLEL([&](int num_thread, int thread_id) { - int beg = 0, len = 0; - ModuleBase::BLOCK_TASK_DIST_1D(num_thread, thread_id, size, (size_t)4096 / sizeof(FPTYPE), beg, len); - memset(arr + beg, var, sizeof(FPTYPE) * len); - }); - } -}; - -template -struct delete_memory_op_mt -{ - void operator()(FPTYPE* arr) - { - mtfunc::free_ht(arr); - } -}; - -template struct resize_memory_op_mt; -template struct resize_memory_op_mt; -template struct resize_memory_op_mt; -template struct resize_memory_op_mt, base_device::DEVICE_CPU>; -template struct resize_memory_op_mt, base_device::DEVICE_CPU>; - -template struct set_memory_op_mt; -template struct set_memory_op_mt; -template struct set_memory_op_mt; -template struct set_memory_op_mt, base_device::DEVICE_CPU>; -template struct set_memory_op_mt, base_device::DEVICE_CPU>; - -template struct delete_memory_op_mt; -template struct delete_memory_op_mt; -template struct delete_memory_op_mt; -template struct delete_memory_op_mt, base_device::DEVICE_CPU>; -template struct delete_memory_op_mt, base_device::DEVICE_CPU>; -#endif template void resize_memory(FPTYPE* arr, const size_t size, base_device::AbacusDevice_t device_type) diff --git a/source/source_base/module_device/memory_op.h b/source/source_base/module_device/memory_op.h index 004468f410..2961efbb1f 100644 --- a/source/source_base/module_device/memory_op.h +++ b/source/source_base/module_device/memory_op.h @@ -2,6 +2,7 @@ #define MODULE_DEVICE_MEMORY_H_ #include "types.h" +#include "memory_op_dsp.h" #include #include @@ -218,47 +219,7 @@ struct delete_memory_op }; #endif // __CUDA || __UT_USE_CUDA || __ROCM || __UT_USE_ROCM -#ifdef __DSP -template -struct resize_memory_op_mt -{ - /// @brief Allocate memory for a given pointer. Note this op will free the pointer first. - /// - /// Input Parameters - /// \param size : array size - /// \param record_string : label for memory record - /// - /// Output Parameters - /// \param arr : allocated array - void operator()(FPTYPE*& arr, const size_t size, const char* record_in = nullptr); -}; - -template -struct set_memory_op_mt -{ - /// @brief memset for DSP memory allocated by mt allocator. - /// - /// Input Parameters - /// \param var : the specified constant byte value - /// \param size : array size - /// - /// Output Parameters - /// \param arr : output array initialized by the input value - void operator()(FPTYPE* arr, const int var, const size_t size); -}; - -template -struct delete_memory_op_mt -{ - /// @brief free memory for multi-device - /// - /// Input Parameters - /// \param arr : the input array - void operator()(FPTYPE* arr); -}; - -#endif // __DSP } // end of namespace memory } // end of namespace base_device diff --git a/source/source_base/module_device/memory_op_dsp.cpp b/source/source_base/module_device/memory_op_dsp.cpp new file mode 100644 index 0000000000..21fb67ae68 --- /dev/null +++ b/source/source_base/module_device/memory_op_dsp.cpp @@ -0,0 +1,92 @@ +#include "memory_op_dsp.h" +#include "dsp_selector.h" + +#include "source_base/memory.h" +#include "source_base/tool_threading.h" +#ifdef __DSP +#include "source_base/kernels/dsp/dsp_connector.h" +#endif + +#include +#include + +namespace base_device +{ +namespace memory +{ + +#ifdef __DSP + +template +struct resize_memory_op_mt +{ + void operator()(FPTYPE*& arr, const size_t size, const char* record_in) + { + if (arr != nullptr) + { + mtfunc::free_ht(arr); + } + int rank = get_dsp_selector()->get_rank(); + arr = (FPTYPE*)mtfunc::malloc_ht(sizeof(FPTYPE) * size, rank); + std::string record_string; + if (record_in != nullptr) + { + record_string = record_in; + } + else + { + record_string = "no_record"; + } + + if (record_string != "no_record") + { + ModuleBase::Memory::record(record_string, sizeof(FPTYPE) * size); + } + } +}; + +template +struct set_memory_op_mt +{ + void operator()(FPTYPE* arr, const int var, const size_t size) + { + ModuleBase::OMP_PARALLEL([&](int num_thread, int thread_id) + { + int beg = 0, len = 0; + ModuleBase::BLOCK_TASK_DIST_1D(num_thread, thread_id, size, (size_t)4096 / sizeof(FPTYPE), beg, len); + memset(arr + beg, var, sizeof(FPTYPE) * len); + }); + } +}; + +template +struct delete_memory_op_mt +{ + void operator()(FPTYPE* arr) + { + mtfunc::free_ht(arr); + } +}; + + +template struct resize_memory_op_mt; +template struct resize_memory_op_mt; +template struct resize_memory_op_mt; +template struct resize_memory_op_mt, base_device::DEVICE_CPU>; +template struct resize_memory_op_mt, base_device::DEVICE_CPU>; + +template struct set_memory_op_mt; +template struct set_memory_op_mt; +template struct set_memory_op_mt; +template struct set_memory_op_mt, base_device::DEVICE_CPU>; +template struct set_memory_op_mt, base_device::DEVICE_CPU>; + +template struct delete_memory_op_mt; +template struct delete_memory_op_mt; +template struct delete_memory_op_mt; +template struct delete_memory_op_mt, base_device::DEVICE_CPU>; +template struct delete_memory_op_mt, base_device::DEVICE_CPU>; +#endif + +} // namespace memory +} // namespace base_device \ No newline at end of file diff --git a/source/source_base/module_device/memory_op_dsp.h b/source/source_base/module_device/memory_op_dsp.h new file mode 100644 index 0000000000..28659b77da --- /dev/null +++ b/source/source_base/module_device/memory_op_dsp.h @@ -0,0 +1,60 @@ +#ifndef MODULE_DEVICE_MEMORY_DSP_H_ +#define MODULE_DEVICE_MEMORY_DSP_H_ + +#include "types.h" + +#include +#include + +namespace base_device +{ + +namespace memory +{ + +#ifdef __DSP + +template +struct resize_memory_op_mt +{ + /// @brief Allocate memory for a given pointer. Note this op will free the pointer first. + /// + /// Input Parameters + /// \param size : array size + /// \param record_string : label for memory record + /// + /// Output Parameters + /// \param arr : allocated array + void operator()(FPTYPE*& arr, const size_t size, const char* record_in = nullptr); +}; + +template +struct set_memory_op_mt +{ + /// @brief memset for DSP memory allocated by mt allocator. + /// + /// Input Parameters + /// \param var : the specified constant byte value + /// \param size : array size + /// + /// Output Parameters + /// \param arr : output array initialized by the input value + void operator()(FPTYPE* arr, const int var, const size_t size); +}; + +template +struct delete_memory_op_mt +{ + /// @brief free memory for multi-device + /// + /// Input Parameters + /// \param arr : the input array + void operator()(FPTYPE* arr); +}; + +#endif // __DSP + +} // end of namespace memory +} // end of namespace base_device + +#endif // MODULE_DEVICE_MEMORY_DSP_H_ \ No newline at end of file diff --git a/source/source_io/module_parameter/CMakeLists.txt b/source/source_io/module_parameter/CMakeLists.txt index 03ffaa1218..fe2fa66a6f 100644 --- a/source/source_io/module_parameter/CMakeLists.txt +++ b/source/source_io/module_parameter/CMakeLists.txt @@ -3,8 +3,14 @@ add_library( parameter OBJECT parameter.cpp + dsp_config.cpp ) +# Link with base library for WARNING_QUIT +target_link_libraries(parameter PUBLIC base) + +# device library dependency removed for pyabacus build + # if(ENABLE_COVERAGE) # add_coverage(parameter) # endif() diff --git a/source/source_io/module_parameter/dsp_config.cpp b/source/source_io/module_parameter/dsp_config.cpp new file mode 100644 index 0000000000..27252e2e0f --- /dev/null +++ b/source/source_io/module_parameter/dsp_config.cpp @@ -0,0 +1,37 @@ +#include "source_base/module_device/dsp_selector.h" +#include +#include + +#ifdef __DSP + +namespace ModuleIO { + +// Initialize DSP selector +void init_dsp_selector(const int my_rank, const int dsp_count) +{ + // Validate parameters + if (my_rank < 0) + { + throw std::runtime_error( + "ModuleIO::init_dsp_selector: " + "my_rank must be non-negative" + ); + } + if (dsp_count <= 0) + { + throw std::runtime_error( + "ModuleIO::init_dsp_selector: " + "dsp_count must be positive" + ); + } + + // Calculate DSP rank + const int rank = my_rank % dsp_count; + + // Create default DSP selector and set it as global + base_device::memory::create_default_selector(rank); +} + +} // namespace ModuleIO + +#endif diff --git a/source/source_io/module_parameter/input_conv.cpp b/source/source_io/module_parameter/input_conv.cpp index 2a2ccf6b8b..113dcc3740 100644 --- a/source/source_io/module_parameter/input_conv.cpp +++ b/source/source_io/module_parameter/input_conv.cpp @@ -665,6 +665,16 @@ void Input_Conv::Convert() hsolver::DiagoElpaNative::elpa_num_thread = PARAM.inp.elpa_num_thread; ; #endif + + +#ifdef __DSP + // Initialize DSP selector if DSP is enabled + if (PARAM.inp.dsp_count > 0) + { + ModuleIO::init_dsp_selector(GlobalV::MY_RANK, PARAM.inp.dsp_count); + } +#endif + ModuleBase::timer::end("Input_Conv", "Convert"); return; } diff --git a/source/source_io/module_parameter/parameter.h b/source/source_io/module_parameter/parameter.h index 138407f607..dd2235fdc4 100644 --- a/source/source_io/module_parameter/parameter.h +++ b/source/source_io/module_parameter/parameter.h @@ -1,10 +1,14 @@ #ifndef PARAMETER_H #define PARAMETER_H + #include "input_parameter.h" #include "system_parameter.h" + namespace ModuleIO { class ReadInput; + // Initialize DSP selector + void init_dsp_selector(const int my_rank, const int dsp_count); } class CalAtomInfo;