diff --git a/source/source_base/array_pool.h b/source/source_base/array_pool.h
deleted file mode 100644
index 5fde2c4176..0000000000
--- a/source/source_base/array_pool.h
+++ /dev/null
@@ -1,89 +0,0 @@
-#ifndef ARRAY_POOL_H
-#define ARRAY_POOL_H
-
-
-namespace ModuleBase
-{
-    /**
-     * @brief Array_Pool is a class designed for dynamically allocating a two-dimensional array
-     *  with all its elements contiguously arranged in memory. Compared to a two-dimensional vector,
-     *  it offers better data locality because all elements are stored in a continuous block of memory.
-     *  
-     * @tparam T 
-     */
-    template <typename T>
-    class Array_Pool
-    {
-    public:
-        Array_Pool() = default;
-        Array_Pool(const int nr_in, const int nc_in);
-        Array_Pool(Array_Pool<T>&& other);
-        Array_Pool& operator=(Array_Pool<T>&& other);
-        ~Array_Pool();
-        Array_Pool(const Array_Pool<T>& other) = delete;
-        Array_Pool& operator=(const Array_Pool& other) = delete;
-
-        T** get_ptr_2D() const { return this->ptr_2D; }
-        T* get_ptr_1D() const { return this->ptr_1D; }
-        int get_nr() const { return this->nr; }
-        int get_nc() const { return this->nc; }
-        T* operator[](const int ir) const { return this->ptr_2D[ir]; }
-    private:
-        T** ptr_2D = nullptr;
-        T* ptr_1D = nullptr;
-        int nr = 0;
-        int nc = 0;
-    };
-
-    template <typename T>
-    Array_Pool<T>::Array_Pool(const int nr_in, const int nc_in) // Attention: uninitialized
-        : nr(nr_in),
-          nc(nc_in)
-    {
-        this->ptr_1D = new T[nr * nc]();
-        this->ptr_2D = new T*[nr];
-        for (int ir = 0; ir < nr; ++ir)
-            this->ptr_2D[ir] = &this->ptr_1D[ir * nc];
-    }
-
-    template <typename T>
-    Array_Pool<T>::~Array_Pool()
-    {
-        delete[] this->ptr_2D;
-        delete[] this->ptr_1D;
-    }
-
-    template <typename T>
-    Array_Pool<T>::Array_Pool(Array_Pool<T>&& other)
-        : ptr_2D(other.ptr_2D),
-          ptr_1D(other.ptr_1D),
-          nr(other.nr),
-          nc(other.nc)
-    {
-        other.ptr_2D = nullptr;
-        other.ptr_1D = nullptr;
-        other.nr = 0;
-        other.nc = 0;
-    }
-
-    template <typename T>
-    Array_Pool<T>& Array_Pool<T>::operator=(Array_Pool<T>&& other)
-    {
-        if (this != &other)
-        {
-            delete[] this->ptr_2D;
-            delete[] this->ptr_1D;
-            this->ptr_2D = other.ptr_2D;
-            this->ptr_1D = other.ptr_1D;
-            this->nr = other.nr;
-            this->nc = other.nc;
-            other.ptr_2D = nullptr;
-            other.ptr_1D = nullptr;
-            other.nr = 0;
-            other.nc = 0;
-        }
-        return *this;
-    }
-
-}
-#endif
\ No newline at end of file
diff --git a/source/source_base/math_ylmreal.cpp b/source/source_base/math_ylmreal.cpp
index c17283b11f..47415de44d 100644
--- a/source/source_base/math_ylmreal.cpp
+++ b/source/source_base/math_ylmreal.cpp
@@ -4,7 +4,6 @@
 #include "source_base/kernels/math_ylm_op.h"
 #include "source_base/libm/libm.h"
 #include "source_base/module_device/memory_op.h"
-#include "source_base/array_pool.h"
 #include "realarray.h"
 #include "timer.h"
 #include "tool_quit.h"
@@ -632,7 +631,7 @@ void YlmReal::grad_Ylm_Real
 	ModuleBase::Ylm::set_coefficients();
 	const int lmax = int(sqrt( double(lmax2) ) + 0.1) - 1;
 	std::vector<double> tmpylm((lmax2+1) * (lmax2+1));
-	Array_Pool<double> tmpgylm((lmax2+1) * (lmax2+1), 3);
+	std::vector<double> tmpgylm((lmax2+1) * (lmax2+1) * 3);
 
 	for (int ig = 0;ig < ng;ig++)
     {
@@ -651,7 +650,7 @@ void YlmReal::grad_Ylm_Real
 		}
 		else
 		{
-			Ylm::grad_rl_sph_harm(lmax2, gg.x, gg.y, gg.z, tmpylm.data(),tmpgylm.get_ptr_2D());
+			Ylm::grad_rl_sph_harm(lmax2, gg.x, gg.y, gg.z, tmpylm.data(), tmpgylm.data());
 			int lm = 0;
 			for(int il = 0 ; il <= lmax ; ++il)
 			{
@@ -659,9 +658,9 @@ void YlmReal::grad_Ylm_Real
 				{
 					double rlylm = tmpylm[lm];
 					ylm(lm,ig) = rlylm / pow(gmod,il);
-					dylmx(lm,ig) = ( tmpgylm[lm][0] - il*rlylm * gg.x / pow(gmod,2) )/pow(gmod,il);
-					dylmy(lm,ig) = ( tmpgylm[lm][1] - il*rlylm * gg.y / pow(gmod,2) )/pow(gmod,il);
-					dylmz(lm,ig) = ( tmpgylm[lm][2] - il*rlylm * gg.z / pow(gmod,2) )/pow(gmod,il);
+					dylmx(lm,ig) = ( tmpgylm[lm*3]     - il*rlylm * gg.x / pow(gmod,2) )/pow(gmod,il);
+					dylmy(lm,ig) = ( tmpgylm[lm*3 + 1] - il*rlylm * gg.y / pow(gmod,2) )/pow(gmod,il);
+					dylmz(lm,ig) = ( tmpgylm[lm*3 + 2] - il*rlylm * gg.z / pow(gmod,2) )/pow(gmod,il);
 				}
 			}
 			
diff --git a/source/source_base/test/math_ylmreal_test.cpp b/source/source_base/test/math_ylmreal_test.cpp
index b132889645..641bb2af4d 100644
--- a/source/source_base/test/math_ylmreal_test.cpp
+++ b/source/source_base/test/math_ylmreal_test.cpp
@@ -5,7 +5,6 @@
 #include"gtest/gtest.h"
 #include<math.h>
 #include "source_psi/psi.h"
-#include "source_base/array_pool.h"
 
 #define doublethreshold 1e-12
 
@@ -51,7 +50,7 @@ class YlmRealTest : public testing::Test
     double *rly;        //Ylm
     double (*rlgy)[3];  //the gradient of Ylm
     std::vector<double> rlyvector; //Ylm
-    ModuleBase::Array_Pool<double> rlgyvector; //the gradient of Ylm
+    std::vector<double> rlgyvector; //the gradient of Ylm (flat, size nylm*3)
 
     //Ylm function
     inline double norm(const double &x, const double &y, const double &z) {return sqrt(x*x + y*y + z*z);}
@@ -195,7 +194,7 @@ class YlmRealTest : public testing::Test
         rly = new double[nylm];
         rlyvector.resize(nylm);
         rlgy = new double[nylm][3];
-        rlgyvector = ModuleBase::Array_Pool<double>(nylm,3);
+        rlgyvector.assign(nylm * 3, 0.0);
         ref = new double[64*4]{
             y00(g[0].x, g[0].y, g[0].z),  y00(g[1].x, g[1].y, g[1].z),  y00(g[2].x, g[2].y, g[2].z),  y00(g[3].x, g[3].y, g[3].z),
             y10(g[0].x, g[0].y, g[0].z),  y10(g[1].x, g[1].y, g[1].z),  y10(g[2].x, g[2].y, g[2].z),  y10(g[3].x, g[3].y, g[3].z),
@@ -412,11 +411,11 @@ TEST_F(YlmRealTest,YlmGradRlSphHarm)
     for(int j=0;j<ng;++j)
     {
         double r = sqrt(g[j].x * g[j].x + g[j].y * g[j].y + g[j].z * g[j].z);
-        ModuleBase::Ylm::grad_rl_sph_harm(lmax,g[j].x/r,g[j].y/r,g[j].z/r,rlyvector.data(),rlgyvector.get_ptr_2D());
+        ModuleBase::Ylm::grad_rl_sph_harm(lmax,g[j].x/r,g[j].y/r,g[j].z/r,rlyvector.data(),rlgyvector.data());
         for(int i=0;i<nylm;++i)
         {
             EXPECT_NEAR(rlyvector[i],ref[i*ng+j],doublethreshold)  << "Ylm[" << i << "], example " << j << " not pass";
-            for(int k=0;k<3;++k) {EXPECT_NEAR(rlgyvector[i][k],rlgyref[j][i][k],1e-5);}
+            for(int k=0;k<3;++k) {EXPECT_NEAR(rlgyvector[i*3+k],rlgyref[j][i][k],1e-5);}
 
         }
     }
@@ -467,15 +466,15 @@ TEST_F(YlmRealTest, equality_gradient_test)
 	double rlya[100];
 	double rlyb[400];
 
-	ModuleBase::Array_Pool<double> grlya(100, 3);
+	std::vector<double> grlya(100 * 3);
 	double grlyb[400][3];
 
-	ModuleBase::Ylm::grad_rl_sph_harm (9, R.x, R.y, R.z, rlya, grlya.get_ptr_2D());
+	ModuleBase::Ylm::grad_rl_sph_harm (9, R.x, R.y, R.z, rlya, grlya.data());
 	ModuleBase::Ylm::rlylm (10, R.x, R.y, R.z, rlyb, grlyb);
 
 	for (int i = 0; i < 100; i++)
 	{
-		double diffx = fabs(grlya[i][2]-grlyb[i][2]);
+		double diffx = fabs(grlya[i*3 + 2]-grlyb[i][2]);
         EXPECT_LT(diffx,1e-8);
 	}
 
diff --git a/source/source_base/test/ylm_test.cpp b/source/source_base/test/ylm_test.cpp
index ed5fcbcc1b..4e97c90786 100644
--- a/source/source_base/test/ylm_test.cpp
+++ b/source/source_base/test/ylm_test.cpp
@@ -77,33 +77,20 @@ TEST_F(ylmTest, HessianFiniteDifferenceL5)
     ModuleBase::Ylm::hes_rl_sph_harm(l, x, y, z, hrly);
 
     // Allocate gradient arrays for central difference
-    std::vector<double> rly_xp((l+1)*(l+1));
-    std::vector<std::vector<double>> grly_xp((l+1)*(l+1), std::vector<double>(3));
-    double** grly_xp_ptr = new double*[(l+1)*(l+1)];
-    for (int i = 0; i < (l+1)*(l+1); i++) {
-        grly_xp_ptr[i] = grly_xp[i].data();
-    }
-
-    std::vector<double> rly_xm((l+1)*(l+1));
-    std::vector<std::vector<double>> grly_xm((l+1)*(l+1), std::vector<double>(3));
-    double** grly_xm_ptr = new double*[(l+1)*(l+1)];
-    for (int i = 0; i < (l+1)*(l+1); i++) {
-        grly_xm_ptr[i] = grly_xm[i].data();
-    }
+    const int nylm = (l+1)*(l+1);
+    std::vector<double> rly_xp(nylm), rly_xm(nylm);
+    std::vector<double> grly_xp(nylm * 3), grly_xm(nylm * 3);
 
     // Compute gradient at (x+h, y, z) and (x-h, y, z)
-    ModuleBase::Ylm::grad_rl_sph_harm(l, x+h, y, z, rly_xp.data(), grly_xp_ptr);
-    ModuleBase::Ylm::grad_rl_sph_harm(l, x-h, y, z, rly_xm.data(), grly_xm_ptr);
+    ModuleBase::Ylm::grad_rl_sph_harm(l, x+h, y, z, rly_xp.data(), grly_xp.data());
+    ModuleBase::Ylm::grad_rl_sph_harm(l, x-h, y, z, rly_xm.data(), grly_xm.data());
 
     // Test H_xx for m=0 (index 25) using central difference
     int idx = 25;
-    double H_xx_fd = (grly_xp[idx][0] - grly_xm[idx][0]) / (2.0 * h);
+    double H_xx_fd = (grly_xp[idx*3] - grly_xm[idx*3]) / (2.0 * h);
     double H_xx_analytic = hrly[idx][0];
 
     EXPECT_NEAR(H_xx_fd, H_xx_analytic, tol);
-
-    delete[] grly_xp_ptr;
-    delete[] grly_xm_ptr;
 }
 
 // Test Hessian finite difference for l=6 using central difference
@@ -118,33 +105,20 @@ TEST_F(ylmTest, HessianFiniteDifferenceL6)
     ModuleBase::Ylm::hes_rl_sph_harm(l, x, y, z, hrly);
 
     // Allocate gradient arrays for central difference
-    std::vector<double> rly_xp((l+1)*(l+1));
-    std::vector<std::vector<double>> grly_xp((l+1)*(l+1), std::vector<double>(3));
-    double** grly_xp_ptr = new double*[(l+1)*(l+1)];
-    for (int i = 0; i < (l+1)*(l+1); i++) {
-        grly_xp_ptr[i] = grly_xp[i].data();
-    }
-
-    std::vector<double> rly_xm((l+1)*(l+1));
-    std::vector<std::vector<double>> grly_xm((l+1)*(l+1), std::vector<double>(3));
-    double** grly_xm_ptr = new double*[(l+1)*(l+1)];
-    for (int i = 0; i < (l+1)*(l+1); i++) {
-        grly_xm_ptr[i] = grly_xm[i].data();
-    }
+    const int nylm = (l+1)*(l+1);
+    std::vector<double> rly_xp(nylm), rly_xm(nylm);
+    std::vector<double> grly_xp(nylm * 3), grly_xm(nylm * 3);
 
     // Compute gradient at (x+h, y, z) and (x-h, y, z)
-    ModuleBase::Ylm::grad_rl_sph_harm(l, x+h, y, z, rly_xp.data(), grly_xp_ptr);
-    ModuleBase::Ylm::grad_rl_sph_harm(l, x-h, y, z, rly_xm.data(), grly_xm_ptr);
+    ModuleBase::Ylm::grad_rl_sph_harm(l, x+h, y, z, rly_xp.data(), grly_xp.data());
+    ModuleBase::Ylm::grad_rl_sph_harm(l, x-h, y, z, rly_xm.data(), grly_xm.data());
 
     // Test H_xx for m=0 (index 36) using central difference
     int idx = 36;
-    double H_xx_fd = (grly_xp[idx][0] - grly_xm[idx][0]) / (2.0 * h);
+    double H_xx_fd = (grly_xp[idx*3] - grly_xm[idx*3]) / (2.0 * h);
     double H_xx_analytic = hrly[idx][0];
 
     EXPECT_NEAR(H_xx_fd, H_xx_analytic, tol);
-
-    delete[] grly_xp_ptr;
-    delete[] grly_xm_ptr;
 }
 
 // Test that l>6 triggers error
@@ -174,71 +148,46 @@ TEST_F(ylmTest, HessianAllComponentsL2)
     int idx = 4;
 
     // Allocate gradient arrays
-    std::vector<double> rly_xp((l+1)*(l+1)), rly_xm((l+1)*(l+1));
-    std::vector<double> rly_yp((l+1)*(l+1)), rly_ym((l+1)*(l+1));
-    std::vector<double> rly_zp((l+1)*(l+1)), rly_zm((l+1)*(l+1));
-
-    std::vector<std::vector<double>> grly_xp((l+1)*(l+1), std::vector<double>(3));
-    std::vector<std::vector<double>> grly_xm((l+1)*(l+1), std::vector<double>(3));
-    std::vector<std::vector<double>> grly_yp((l+1)*(l+1), std::vector<double>(3));
-    std::vector<std::vector<double>> grly_ym((l+1)*(l+1), std::vector<double>(3));
-    std::vector<std::vector<double>> grly_zp((l+1)*(l+1), std::vector<double>(3));
-    std::vector<std::vector<double>> grly_zm((l+1)*(l+1), std::vector<double>(3));
-
-    double** grly_xp_ptr = new double*[(l+1)*(l+1)];
-    double** grly_xm_ptr = new double*[(l+1)*(l+1)];
-    double** grly_yp_ptr = new double*[(l+1)*(l+1)];
-    double** grly_ym_ptr = new double*[(l+1)*(l+1)];
-    double** grly_zp_ptr = new double*[(l+1)*(l+1)];
-    double** grly_zm_ptr = new double*[(l+1)*(l+1)];
-
-    for (int i = 0; i < (l+1)*(l+1); i++) {
-        grly_xp_ptr[i] = grly_xp[i].data();
-        grly_xm_ptr[i] = grly_xm[i].data();
-        grly_yp_ptr[i] = grly_yp[i].data();
-        grly_ym_ptr[i] = grly_ym[i].data();
-        grly_zp_ptr[i] = grly_zp[i].data();
-        grly_zm_ptr[i] = grly_zm[i].data();
-    }
+    const int nylm = (l+1)*(l+1);
+    std::vector<double> rly_xp(nylm), rly_xm(nylm);
+    std::vector<double> rly_yp(nylm), rly_ym(nylm);
+    std::vector<double> rly_zp(nylm), rly_zm(nylm);
+
+    std::vector<double> grly_xp(nylm * 3), grly_xm(nylm * 3);
+    std::vector<double> grly_yp(nylm * 3), grly_ym(nylm * 3);
+    std::vector<double> grly_zp(nylm * 3), grly_zm(nylm * 3);
 
     // Compute gradients at perturbed points
-    ModuleBase::Ylm::grad_rl_sph_harm(l, x+h, y, z, rly_xp.data(), grly_xp_ptr);
-    ModuleBase::Ylm::grad_rl_sph_harm(l, x-h, y, z, rly_xm.data(), grly_xm_ptr);
-    ModuleBase::Ylm::grad_rl_sph_harm(l, x, y+h, z, rly_yp.data(), grly_yp_ptr);
-    ModuleBase::Ylm::grad_rl_sph_harm(l, x, y-h, z, rly_ym.data(), grly_ym_ptr);
-    ModuleBase::Ylm::grad_rl_sph_harm(l, x, y, z+h, rly_zp.data(), grly_zp_ptr);
-    ModuleBase::Ylm::grad_rl_sph_harm(l, x, y, z-h, rly_zm.data(), grly_zm_ptr);
+    ModuleBase::Ylm::grad_rl_sph_harm(l, x+h, y, z, rly_xp.data(), grly_xp.data());
+    ModuleBase::Ylm::grad_rl_sph_harm(l, x-h, y, z, rly_xm.data(), grly_xm.data());
+    ModuleBase::Ylm::grad_rl_sph_harm(l, x, y+h, z, rly_yp.data(), grly_yp.data());
+    ModuleBase::Ylm::grad_rl_sph_harm(l, x, y-h, z, rly_ym.data(), grly_ym.data());
+    ModuleBase::Ylm::grad_rl_sph_harm(l, x, y, z+h, rly_zp.data(), grly_zp.data());
+    ModuleBase::Ylm::grad_rl_sph_harm(l, x, y, z-h, rly_zm.data(), grly_zm.data());
 
     // Test H_xx (index 0)
-    double H_xx_fd = (grly_xp[idx][0] - grly_xm[idx][0]) / (2.0 * h);
+    double H_xx_fd = (grly_xp[idx*3]     - grly_xm[idx*3])     / (2.0 * h);
     EXPECT_NEAR(H_xx_fd, hrly[idx][0], tol);
 
     // Test H_xy (index 1)
-    double H_xy_fd = (grly_xp[idx][1] - grly_xm[idx][1]) / (2.0 * h);
+    double H_xy_fd = (grly_xp[idx*3 + 1] - grly_xm[idx*3 + 1]) / (2.0 * h);
     EXPECT_NEAR(H_xy_fd, hrly[idx][1], tol);
 
     // Test H_xz (index 2)
-    double H_xz_fd = (grly_xp[idx][2] - grly_xm[idx][2]) / (2.0 * h);
+    double H_xz_fd = (grly_xp[idx*3 + 2] - grly_xm[idx*3 + 2]) / (2.0 * h);
     EXPECT_NEAR(H_xz_fd, hrly[idx][2], tol);
 
     // Test H_yy (index 3)
-    double H_yy_fd = (grly_yp[idx][1] - grly_ym[idx][1]) / (2.0 * h);
+    double H_yy_fd = (grly_yp[idx*3 + 1] - grly_ym[idx*3 + 1]) / (2.0 * h);
     EXPECT_NEAR(H_yy_fd, hrly[idx][3], tol);
 
     // Test H_yz (index 4)
-    double H_yz_fd = (grly_yp[idx][2] - grly_ym[idx][2]) / (2.0 * h);
+    double H_yz_fd = (grly_yp[idx*3 + 2] - grly_ym[idx*3 + 2]) / (2.0 * h);
     EXPECT_NEAR(H_yz_fd, hrly[idx][4], tol);
 
     // Test H_zz (index 5)
-    double H_zz_fd = (grly_zp[idx][2] - grly_zm[idx][2]) / (2.0 * h);
+    double H_zz_fd = (grly_zp[idx*3 + 2] - grly_zm[idx*3 + 2]) / (2.0 * h);
     EXPECT_NEAR(H_zz_fd, hrly[idx][5], tol);
-
-    delete[] grly_xp_ptr;
-    delete[] grly_xm_ptr;
-    delete[] grly_yp_ptr;
-    delete[] grly_ym_ptr;
-    delete[] grly_zp_ptr;
-    delete[] grly_zm_ptr;
 }
 
 // Test Hessian for m=0 values across different l
@@ -256,28 +205,17 @@ TEST_F(ylmTest, HessianM0DifferentL)
         ModuleBase::Ylm::hes_rl_sph_harm(l, x, y, z, hrly);
 
         // Allocate gradient arrays
-        std::vector<double> rly_xp((l+1)*(l+1)), rly_xm((l+1)*(l+1));
-        std::vector<std::vector<double>> grly_xp((l+1)*(l+1), std::vector<double>(3));
-        std::vector<std::vector<double>> grly_xm((l+1)*(l+1), std::vector<double>(3));
+        const int nylm = (l+1)*(l+1);
+        std::vector<double> rly_xp(nylm), rly_xm(nylm);
+        std::vector<double> grly_xp(nylm * 3), grly_xm(nylm * 3);
 
-        double** grly_xp_ptr = new double*[(l+1)*(l+1)];
-        double** grly_xm_ptr = new double*[(l+1)*(l+1)];
-
-        for (int i = 0; i < (l+1)*(l+1); i++) {
-            grly_xp_ptr[i] = grly_xp[i].data();
-            grly_xm_ptr[i] = grly_xm[i].data();
-        }
-
-        ModuleBase::Ylm::grad_rl_sph_harm(l, x+h, y, z, rly_xp.data(), grly_xp_ptr);
-        ModuleBase::Ylm::grad_rl_sph_harm(l, x-h, y, z, rly_xm.data(), grly_xm_ptr);
+        ModuleBase::Ylm::grad_rl_sph_harm(l, x+h, y, z, rly_xp.data(), grly_xp.data());
+        ModuleBase::Ylm::grad_rl_sph_harm(l, x-h, y, z, rly_xm.data(), grly_xm.data());
 
         // Test H_xx for m=0 (index l*l)
         int idx = l * l;
-        double H_xx_fd = (grly_xp[idx][0] - grly_xm[idx][0]) / (2.0 * h);
+        double H_xx_fd = (grly_xp[idx*3] - grly_xm[idx*3]) / (2.0 * h);
         EXPECT_NEAR(H_xx_fd, hrly[idx][0], tol) << "Failed for l=" << l << " m=0";
-
-        delete[] grly_xp_ptr;
-        delete[] grly_xm_ptr;
     }
 }
 
diff --git a/source/source_base/ylm.cpp b/source/source_base/ylm.cpp
index 7384956259..62d4d66707 100644
--- a/source/source_base/ylm.cpp
+++ b/source/source_base/ylm.cpp
@@ -6,7 +6,6 @@
 #include "constants.h"
 #include "timer.h"
 #include "tool_quit.h"
-#include "array_pool.h"
 #include "ylmcoef.h"
 
 namespace ModuleBase
@@ -775,9 +774,13 @@ void Ylm::grad_rl_sph_harm
 	const double y,
 	const double z,
 	double* rly,
-	double** grly
+	double* grly_flat
 )
 {
+	// Alias the flat buffer as a pointer-to-array-of-3-doubles so the body
+	// below can continue to use the natural grly[lm][xyz] indexing without
+	// any performance penalty — the memory layout is unchanged.
+	double (*grly)[3] = reinterpret_cast<double(*)[3]>(grly_flat);
 	double radius2 = x*x+y*y+z*z;
 	double tx = 2.0*x;
 	double ty = 2.0*y;
diff --git a/source/source_base/ylm.h b/source/source_base/ylm.h
index 6ee950f2cf..ff25a56a91 100644
--- a/source/source_base/ylm.h
+++ b/source/source_base/ylm.h
@@ -119,8 +119,9 @@ class Ylm
 	 * @param y [in] y/r
 	 * @param z [in] z/r
 	 * @param rly [in] calculated Ylm, Y00, Y10, Y11, Y1-1, Y20, Y21, Y2-1, Y22, Y2-2...
-	 * @param grly [out] gradient of Ylm, [dY00/dx, dY00/dy, dY00/dz], [dY10/dx, dY10/dy, dY10/dz], [dY11/dx, dY11/dy, dY11/dz],...
-	 *             grly should be a memory-contiguous two-dimensional array for better performance.
+	 * @param grly [out] gradient of Ylm, stored as a contiguous flat array of
+	 *             size (Lmax+1)^2 * 3 in row-major order:
+	 *             [dY00/dx, dY00/dy, dY00/dz, dY10/dx, dY10/dy, dY10/dz, ...]
 	 */
 	static void grad_rl_sph_harm(
 			const int Lmax,
@@ -128,7 +129,7 @@ class Ylm
 			const double y,
 			const double z,
 			double* rly,
-			double** grly);
+			double* grly);
 
 	/**
 	 * @brief Get the hessian of r^l Ylm (used in getting derivative of overlap)
diff --git a/source/source_basis/module_nao/two_center_integrator.cpp b/source/source_basis/module_nao/two_center_integrator.cpp
index 1c2a574c16..436f3885ed 100644
--- a/source/source_basis/module_nao/two_center_integrator.cpp
+++ b/source/source_basis/module_nao/two_center_integrator.cpp
@@ -2,7 +2,6 @@
 
 #include "source_base/vector3.h"
 #include "source_base/ylm.h"
-#include "source_base/array_pool.h"
 
 TwoCenterIntegrator::TwoCenterIntegrator():
     is_tabulated_(false),
@@ -69,12 +68,12 @@ void TwoCenterIntegrator::calculate(const int itype1,
     // generate all necessary real (solid) spherical harmonics
     const int lmax = l1 + l2;
 	std::vector<double> Rl_Y((lmax+1) * (lmax+1));
-	ModuleBase::Array_Pool<double> grad_Rl_Y((lmax+1) * (lmax+1), 3);
+	std::vector<double> grad_Rl_Y((lmax+1) * (lmax+1) * 3);
     std::vector<std::vector<double>> hess_Rl_Y;
 
     // R^l * Y is necessary anyway
     ModuleBase::Ylm::rl_sph_harm(l1 + l2, vR[0], vR[1], vR[2], Rl_Y);
-    if (grad_out || hess_out) ModuleBase::Ylm::grad_rl_sph_harm(l1 + l2, vR[0], vR[1], vR[2], Rl_Y.data(), grad_Rl_Y.get_ptr_2D());
+    if (grad_out || hess_out) ModuleBase::Ylm::grad_rl_sph_harm(l1 + l2, vR[0], vR[1], vR[2], Rl_Y.data(), grad_Rl_Y.data());
     if (hess_out) ModuleBase::Ylm::hes_rl_sph_harm(l1 + l2, vR[0], vR[1], vR[2], hess_Rl_Y);
 
     double tmp[3] = {0.0, 0.0, 0.0};
@@ -104,7 +103,7 @@ void TwoCenterIntegrator::calculate(const int itype1,
                 for (int i = 0; i < 3; ++i)
                 {
                     grad_out[i] += sign * G * ( (*d_S_by_Rl) * uR[i] * Rl_Y[lm_idx]
-                                                + (*S_by_Rl) * grad_Rl_Y[lm_idx][i] );
+                                                + (*S_by_Rl) * grad_Rl_Y[lm_idx*3 + i] );
                 }
             }
 
@@ -133,8 +132,8 @@ void TwoCenterIntegrator::calculate(const int itype1,
                         else du_dR = 0.0;
 
                         double term2 = (*d_S_by_Rl) * (du_dR * Rl_Y[lm_idx]
-                                                      + uR[alpha] * grad_Rl_Y[lm_idx][beta]
-                                                      + uR[beta] * grad_Rl_Y[lm_idx][alpha]);
+                                                      + uR[alpha] * grad_Rl_Y[lm_idx*3 + beta]
+                                                      + uR[beta] * grad_Rl_Y[lm_idx*3 + alpha]);
                         double term3 = (*S_by_Rl) * H_full[idx];
 
                         hess_out[idx] += sign * G * (term1 + term2 + term3);
diff --git a/source/source_lcao/center2_orb-orb11.cpp b/source/source_lcao/center2_orb-orb11.cpp
index 7e4304f01b..23afa0173e 100644
--- a/source/source_lcao/center2_orb-orb11.cpp
+++ b/source/source_lcao/center2_orb-orb11.cpp
@@ -10,7 +10,6 @@
 #include "source_base/math_polyint.h"
 #include "source_base/sph_bessel_recursive.h"
 #include "source_base/ylm.h"
-#include "source_base/array_pool.h"
 
 #include <cmath>
 
@@ -188,12 +187,12 @@ ModuleBase::Vector3<double> Center2_Orb::Orb11::cal_grad_overlap( // caoyu add 2
     const int LAB2 = (LA + LB + 1) * (LA + LB + 1);
     std::vector<double> rly(LAB2);
     std::vector<ModuleBase::Vector3<double>> grly;
-    ModuleBase::Array_Pool<double> tmp_grly(LAB2, 3);
-    ModuleBase::Ylm::grad_rl_sph_harm(LA + LB, delta_R.x, delta_R.y, delta_R.z, rly.data(), tmp_grly.get_ptr_2D());
+    std::vector<double> tmp_grly(LAB2 * 3);
+    ModuleBase::Ylm::grad_rl_sph_harm(LA + LB, delta_R.x, delta_R.y, delta_R.z, rly.data(), tmp_grly.data());
+    grly.reserve(LAB2);
     for (int i=0; i<LAB2; ++i)
     {
-        ModuleBase::Vector3<double> ele(tmp_grly[i][0], tmp_grly[i][1], tmp_grly[i][2]);
-        grly.push_back(ele);
+        grly.emplace_back(tmp_grly[i*3], tmp_grly[i*3+1], tmp_grly[i*3+2]);
     }
 
     ModuleBase::Vector3<double> grad_overlap(0.0, 0.0, 0.0);
diff --git a/source/source_lcao/module_gint/gint_atom.cpp b/source/source_lcao/module_gint/gint_atom.cpp
index 075e45b7a5..b367e32e16 100644
--- a/source/source_lcao/module_gint/gint_atom.cpp
+++ b/source/source_lcao/module_gint/gint_atom.cpp
@@ -1,5 +1,4 @@
 #include "source_base/ylm.h"
-#include "source_base/array_pool.h"
 #include "gint_atom.h"
 #include "source_cell/unitcell.h"
 #include "gint_helper.h"
@@ -127,9 +126,9 @@ void GintAtom::set_phi_dphi(
     // orb_ does not have the member variable dr_uniform
     const double dr_uniform = orb_->PhiLN(0, 0).dr_uniform;
     
-    std::vector<double> rly(std::pow(atom_->nwl + 1, 2));
-    // TODO: replace array_pool with std::vector
-    ModuleBase::Array_Pool<double> grly(std::pow(atom_->nwl + 1, 2), 3);
+    const int nylm = std::pow(atom_->nwl + 1, 2);
+    std::vector<double> rly(nylm);
+    std::vector<double> grly(nylm * 3);
     
     for(int im = 0; im < num_mgrids; im++)
     {
@@ -154,7 +153,7 @@ void GintAtom::set_phi_dphi(
             // spherical harmonics
             // TODO: vectorize the sph_harm function, 
             // the vectorized function can be called once for all meshgrids in a biggrid
-            ModuleBase::Ylm::grad_rl_sph_harm(atom_->nwl, coord.x, coord.y, coord.z, rly.data(), grly.get_ptr_2D());
+            ModuleBase::Ylm::grad_rl_sph_harm(atom_->nwl, coord.x, coord.y, coord.z, rly.data(), grly.data());
 
             // interpolation
             const double position = dist / dr_uniform;
@@ -201,9 +200,9 @@ void GintAtom::set_phi_dphi(
                 // derivative of wave functions with respect to atom positions.
                 const double tmpdphi_rly = (dtmp - tmp * ll / dist) / rl * rly[idx_lm] / dist;
 
-                dphi_x[im * stride + iw] =  tmpdphi_rly * coord.x + tmprl * grly[idx_lm][0];
-                dphi_y[im * stride + iw] =  tmpdphi_rly * coord.y + tmprl * grly[idx_lm][1];
-                dphi_z[im * stride + iw] =  tmpdphi_rly * coord.z + tmprl * grly[idx_lm][2];
+                dphi_x[im * stride + iw] =  tmpdphi_rly * coord.x + tmprl * grly[idx_lm*3];
+                dphi_y[im * stride + iw] =  tmpdphi_rly * coord.y + tmprl * grly[idx_lm*3 + 1];
+                dphi_z[im * stride + iw] =  tmpdphi_rly * coord.z + tmprl * grly[idx_lm*3 + 2];
             }
         }
     }
diff --git a/source/source_lcao/module_gint/set_ddphi.cpp b/source/source_lcao/module_gint/set_ddphi.cpp
index 4407817bf2..8861762883 100644
--- a/source/source_lcao/module_gint/set_ddphi.cpp
+++ b/source/source_lcao/module_gint/set_ddphi.cpp
@@ -1,4 +1,3 @@
-#include "source_base/array_pool.h"
 #include "source_base/timer.h"
 #include "source_base/ylm.h"
 #include "gint_atom.h"
@@ -20,18 +19,20 @@ void GintAtom::set_ddphi(
     // orb_ does not have the member variable dr_uniform
     const double dr_uniform = orb_->PhiLN(0, 0).dr_uniform;
 
-    std::vector<double> rly(std::pow(atom_->nwl + 1, 2));
-    ModuleBase::Array_Pool<double> grly(std::pow(atom_->nwl + 1, 2), 3);
+    const int nylm = std::pow(atom_->nwl + 1, 2);
+    std::vector<double> rly(nylm);
+    std::vector<double> grly(nylm * 3);
     // TODO: A better data structure such as a 3D tensor can be used to store dphi
     std::vector<std::vector<std::vector<double>>> dphi(atom_->nw, std::vector<std::vector<double>>(6, std::vector<double>(3)));
     Vec3d coord1;
-    ModuleBase::Array_Pool<double> displ(6, 3);
-    displ[0][0] = 0.0001; // in x direction
-    displ[1][0] = -0.0001;
-    displ[2][1] = 0.0001; // in y direction
-    displ[3][1] = -0.0001;
-    displ[4][2] = 0.0001; // in z direction
-    displ[5][2] = -0.0001;
+    constexpr double displ[6][3] = {
+        { 0.0001,  0.0,     0.0},    // +x
+        {-0.0001,  0.0,     0.0},    // -x
+        { 0.0,     0.0001,  0.0},    // +y
+        { 0.0,    -0.0001,  0.0},    // -y
+        { 0.0,     0.0,     0.0001}, // +z
+        { 0.0,     0.0,    -0.0001}  // -z
+    };
 
     for(int im = 0; im < num_mgrids; im++)
     {
@@ -59,7 +60,7 @@ void GintAtom::set_ddphi(
             coord1[2] = coord[2] + displ[i][2];
 
             // sphereical harmonics
-            ModuleBase::Ylm::grad_rl_sph_harm(atom_->nwl, coord1[0], coord1[1], coord1[2], rly.data(), grly.get_ptr_2D());
+            ModuleBase::Ylm::grad_rl_sph_harm(atom_->nwl, coord1[0], coord1[1], coord1[2], rly.data(), grly.data());
 
             const double dist1 = coord1.norm() < 1e-9 ? 1e-9 : coord1.norm();
 
@@ -99,9 +100,9 @@ void GintAtom::set_ddphi(
                 const double tmpdphi_rly = (dtmp - tmp * ll / dist1) / rl * rly[idx_lm] / dist1;
                 const double tmprl = tmp / rl;
 
-                dphi[iw][i][0] =  tmpdphi_rly * coord1[0] + tmprl * grly[idx_lm][0];
-                dphi[iw][i][1] =  tmpdphi_rly * coord1[1] + tmprl * grly[idx_lm][1];
-                dphi[iw][i][2] =  tmpdphi_rly * coord1[2] + tmprl * grly[idx_lm][2];
+                dphi[iw][i][0] =  tmpdphi_rly * coord1[0] + tmprl * grly[idx_lm*3];
+                dphi[iw][i][1] =  tmpdphi_rly * coord1[1] + tmprl * grly[idx_lm*3 + 1];
+                dphi[iw][i][2] =  tmpdphi_rly * coord1[2] + tmprl * grly[idx_lm*3 + 2];
             } // end iw
         }  // end i