diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 99b01f43f..2f04378f6 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -29,7 +29,7 @@ repos:
       - id: debug-statements
       - id: double-quote-string-fixer
   - repo: https://github.com/charliermarsh/ruff-pre-commit
-    rev: v0.4.2
+    rev: v0.5.0
     hooks:
       - id: ruff
         args: [--fix, --exit-non-zero-on-fix]
@@ -54,7 +54,7 @@ repos:
       - id: pyupgrade
         args: [--py38-plus] # sync with requires-python
   - repo: https://github.com/pycqa/flake8
-    rev: 7.0.0
+    rev: 7.1.0
     hooks:
       - id: flake8
         additional_dependencies:
@@ -114,6 +114,7 @@ repos:
             ^tests/|
             ^setup.py$|
             ^omnisafe/envs/classic_control/envs_from_crabs.py$|
+            ^omnisafe/envs/classic_control/envs_from_rcbf.py$|
             ^omnisafe/common/control_barrier_function/crabs/models.py$|
             ^omnisafe/common/control_barrier_function/crabs/optimizers.py$|
             ^omnisafe/common/control_barrier_function/crabs/utils.py$|
diff --git a/conftest.py b/conftest.py
index f3a1e8b06..266ac7a7e 100644
--- a/conftest.py
+++ b/conftest.py
@@ -10,6 +10,4 @@
 
 
 def pytest_ignore_collect(path, config):
-    if os.path.basename(path) == 'meta_drive_env.py' and not meta_drive_env_available:
-        return True
-    return False
+    return os.path.basename(path) == 'meta_drive_env.py' and not meta_drive_env_available
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 792f62052..402ed6203 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -394,6 +394,7 @@ this project, don't hesitate to ask your question on `the GitHub issue page <htt
     saferl/pcpo
     saferl/focops
     saferl/lag
+    saferl/cbf
 
 .. toctree::
     :hidden:
diff --git a/docs/source/saferl/cbf.rst b/docs/source/saferl/cbf.rst
new file mode 100644
index 000000000..91d84f4b8
--- /dev/null
+++ b/docs/source/saferl/cbf.rst
@@ -0,0 +1,136 @@
+Control Barrier Functions Methods
+=================================
+
+Quick Facts
+-----------
+
+.. card::
+    :class-card: sd-outline-info  sd-rounded-1
+    :class-body: sd-font-weight-bold
+
+    #. Control Barrier Functions (CBF) method maps :bdg-danger-line:`unsafe` actions to :bdg-info-line:`safe` actions.
+    #. CBF method is highly :bdg-info-line:`coupled` with the environment.
+    #. Currently, OmniSafe provides implementations for four algorithms: *DDPGCBF*, *TRPOCBF*, *SACRCBF*, and *CRABS*
+    #. Here we introduce the :bdg-info-line:`interface` to extend the CBF method to :bdg-info-line:`custom environments`.
+
+Introduction
+------------
+
+This section will introduce the implementation framework of OmniSafe using the DDPGCBF from the classic CBF method: `End-to-End Safe Reinforcement Learning through Barrier Functions for Safety-Critical Continuous Control Tasks <https://arxiv.org/pdf/1903.08792>`_ as an example.
+
+The CBF method implementation in OmniSafe revolves around the ``Adapter``, which decouples and integrates the two core components: ``dynamics model`` and ``solver``. The former predicts the dynamic changes of the environment, while the latter maps the current action to a safe space based on the given environment dynamics.
+
+CBF Adapter
+-----------
+
+.. currentmodule:: omnisafe.adapter
+
+.. card::
+    :class-header: sd-bg-success sd-text-white
+    :class-card: sd-outline-success  sd-rounded-1
+
+    Documentation
+    ^^^
+
+    .. autoclass:: OffPolicyBarrierFunctionAdapter
+        :members:
+
+Core Components
+---------------
+
+Dynamics Model
+""""""""""""""
+
+The environmental dynamic model of the CBF method needs to be designed for a specific environment. For example, in the case of the ``Pendulum-v1`` environment, the environmental dynamics will be calculated together with variables such as mass and gravitational acceleration.
+
+.. code-block:: python
+    :linenos:
+
+    def get_dynamics(self, obs: list[float], original_action: float) -> np.ndarray:
+        dt = 0.05
+        # gravitational constant
+        G = 10
+        # mass
+        m = 2
+        # length
+        length = 2
+        # calculate the angle
+        theta = np.arctan2(obs[1], obs[0])
+        # angular velocity
+        theta_dot = obs[2]
+        # dynamics equations
+        f = np.array(
+            [
+                -3 * G / (2 * length) * np.sin(theta + np.pi) * dt**2
+                + theta_dot * dt
+                + theta
+                + 3 / (m * length**2) * original_action * dt**2,
+                theta_dot
+                - 3 * G / (2 * length) * np.sin(theta + np.pi) * dt
+                + 3 / (m * length**2) * original_action * dt,
+            ],
+        )
+        return np.squeeze(f)
+
+The current mainstream implementation often uses a combination of several Gaussian Process (GP) models to fit the environmental dynamics. The specific code documentation is as follows:
+
+.. currentmodule:: omnisafe.common
+
+.. card::
+    :class-header: sd-bg-success sd-text-white
+    :class-card: sd-outline-success  sd-rounded-1
+
+    Documentation
+    ^^^
+
+    .. autoclass:: DynamicsModel
+        :members:
+        :private-members:
+
+The ``solver`` is responsible for taking the feedback information from the ``dynamics model`` and mapping the often unsafe actions generated by the agent into a safe one.
+
+CBF Solver
+""""""""""
+
+.. currentmodule:: omnisafe.common
+
+.. card::
+    :class-header: sd-bg-success sd-text-white
+    :class-card: sd-outline-success  sd-rounded-1
+
+    Documentation
+    ^^^
+
+    .. autoclass:: PendulumSolver
+        :members:
+        :private-members:
+
+Architecture of methods
+"""""""""""""""""""""""
+
+-  ``DDPGCBF.learn()``
+
+   - ``DDPGCBF._env.rollout()``
+
+     - ``DDPGCBF._env.get_safe_action()``
+
+       - ``DDPGCBF._env.dynamics_model.get_gp_dynamics()``
+       - ``DDPGCBF._env.solver.control_barrier()``
+
+     - ``DDPGCBF._env.dynamics_model.update_gp_dynamics()``
+
+   - ``DDPGCBF._update()``
+
+
+Further Discussion
+""""""""""""""""""
+
+For details on the implementation, performance, reproducible scripts, and related discussions of algorithms including DDPGCBF, please refer to: https://github.com/PKU-Alignment/omnisafe/pull/323
+
+
+References
+----------
+
+-  `End-to-End Safe Reinforcement Learning through Barrier Functions for Safety-Critical Continuous Control Tasks <https://arxiv.org/pdf/1903.08792>`__
+-  `Safe Reinforcement Learning Using Robust Control Barrier Functions <https://arxiv.org/pdf/2110.05415>`__
+-  `Learning Barrier Certificates: Towards Safe Reinforcement Learning with Zero Training-time Violations <https://arxiv.org/pdf/2108.01846>`__
diff --git a/docs/source/spelling_wordlist.txt b/docs/source/spelling_wordlist.txt
index 460cabd1a..958277550 100644
--- a/docs/source/spelling_wordlist.txt
+++ b/docs/source/spelling_wordlist.txt
@@ -486,3 +486,31 @@ UpdateDynamics
 mathbb
 meger
 Jupyter
+compensator
+CBF
+Vectorize
+gp
+optim
+cvx
+QP
+gpytorch
+ExactGP
+RBF
+parallelization
+compensators
+thetadot
+VK
+Sharma
+Kosaraju
+Seetharaman
+Sadler
+Suttle
+Cheng
+Orosz
+JW
+Burdick
+Vipul
+Sivaranjani
+Vijay
+suttle
+regressor
diff --git a/examples/plot.py b/examples/plot.py
index c16974cce..a425587a7 100644
--- a/examples/plot.py
+++ b/examples/plot.py
@@ -1,4 +1,4 @@
-# Copyright 2023 OmniSafe Team. All Rights Reserved.
+# Copyright 2024 OmniSafe Team. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -35,6 +35,27 @@
     parser.add_argument('--select', nargs='*')
     parser.add_argument('--exclude', nargs='*')
     parser.add_argument('--estimator', default='mean')
+    parser.add_argument(
+        '--reward-metrics',
+        type=str,
+        choices=[
+            'Metrics/TestEpRet',
+            'Metrics/EpRet',
+        ],
+        default='Metrics/EpRet',
+        help='Specify the reward metric to be used.',
+    )
+    parser.add_argument(
+        '--cost-metrics',
+        type=str,
+        choices=[
+            'Metrics/Max_angle_violation',
+            'Metrics/TestEpCost',
+            'Metrics/EpCost',
+        ],
+        default='Metrics/EpCost',
+        help='Specify the cost metric to be used.',
+    )
     args = parser.parse_args()
 
     plotter = Plotter()
@@ -48,4 +69,6 @@
         select=args.select,
         exclude=args.exclude,
         estimator=args.estimator,
+        cost_metrics=args.cost_metrics,
+        reward_metrics=args.reward_metrics,
     )
diff --git a/omnisafe/adapter/__init__.py b/omnisafe/adapter/__init__.py
index ba768a7eb..873eccc33 100644
--- a/omnisafe/adapter/__init__.py
+++ b/omnisafe/adapter/__init__.py
@@ -14,11 +14,15 @@
 # ==============================================================================
 """Adapter for the environment and the algorithm."""
 
+from omnisafe.adapter.barrier_function_adapter import BarrierFunctionAdapter
+from omnisafe.adapter.beta_barrier_function_adapter import BetaBarrierFunctionAdapter
 from omnisafe.adapter.early_terminated_adapter import EarlyTerminatedAdapter
 from omnisafe.adapter.modelbased_adapter import ModelBasedAdapter
 from omnisafe.adapter.offline_adapter import OfflineAdapter
 from omnisafe.adapter.offpolicy_adapter import OffPolicyAdapter
+from omnisafe.adapter.offpolicy_barrier_function_adapter import OffPolicyBarrierFunctionAdapter
 from omnisafe.adapter.online_adapter import OnlineAdapter
 from omnisafe.adapter.onpolicy_adapter import OnPolicyAdapter
+from omnisafe.adapter.robust_barrier_function_adapter import RobustBarrierFunctionAdapter
 from omnisafe.adapter.saute_adapter import SauteAdapter
 from omnisafe.adapter.simmer_adapter import SimmerAdapter
diff --git a/omnisafe/adapter/barrier_function_adapter.py b/omnisafe/adapter/barrier_function_adapter.py
new file mode 100644
index 000000000..c247f7705
--- /dev/null
+++ b/omnisafe/adapter/barrier_function_adapter.py
@@ -0,0 +1,270 @@
+# Copyright 2024 OmniSafe Team. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Barrier Function Adapter for OmniSafe."""
+
+from __future__ import annotations
+
+from typing import Any
+
+import torch
+from rich.progress import track
+from sklearn.gaussian_process import GaussianProcessRegressor
+
+from omnisafe.adapter.onpolicy_adapter import OnPolicyAdapter
+from omnisafe.common.barrier_comp import BarrierCompensator
+from omnisafe.common.barrier_solver import PendulumSolver
+from omnisafe.common.buffer import VectorOnPolicyBuffer
+from omnisafe.common.gp_model import DynamicsModel
+from omnisafe.common.logger import Logger
+from omnisafe.envs.wrapper import AutoReset, CostNormalize, RewardNormalize, TimeLimit, Unsqueeze
+from omnisafe.models.actor_critic.constraint_actor_critic import ConstraintActorCritic
+from omnisafe.utils.config import Config
+
+
+class BarrierFunctionAdapter(OnPolicyAdapter):
+    """Barrier Function Adapter for OmniSafe.
+
+    The Barrier Function Adapter is used to establish the logic of interaction between agents and
+    the environment based on control barrier functions. Its key feature is the introduction of
+    action compensators and barrier function solvers.
+
+    Args:
+        env_id (str): The environment id.
+        num_envs (int): The number of parallel environments.
+        seed (int): The random seed.
+        cfgs (Config): The configuration passed from yaml file.
+    """
+
+    def __init__(self, env_id: str, num_envs: int, seed: int, cfgs: Config) -> None:
+        """Initialize an instance of :class:`BarrierFunctionAdapter`."""
+        super().__init__(env_id, num_envs, seed, cfgs)
+
+        if env_id == 'Pendulum-v1':
+            self.solver: PendulumSolver = PendulumSolver(
+                action_size=self.action_space.shape[0],  # type: ignore
+                device=self._device,
+            )
+            self.dynamics_model: DynamicsModel = DynamicsModel(
+                observation_size=self.observation_space.shape[0],  # type: ignore
+            )
+        else:
+            raise NotImplementedError(f'Please implement solver for {env_id} !')
+        self.compensator: BarrierCompensator = BarrierCompensator(
+            obs_dim=self.observation_space.shape[0],  # type: ignore
+            act_dim=self.action_space.shape[0],  # type: ignore
+            cfgs=cfgs.compensator_cfgs,
+        ).to(self._device)
+        self.first_iter: bool = True
+
+        self.episode_rollout: dict[str, Any] = {}
+        self.episode_rollout['obs'] = []
+        self.episode_rollout['final_act'] = []
+        self.episode_rollout['approx_compensating_act'] = []
+        self.episode_rollout['compensating_act'] = []
+
+    def _wrapper(
+        self,
+        obs_normalize: bool = False,
+        reward_normalize: bool = True,
+        cost_normalize: bool = True,
+    ) -> None:
+        """Wrapper the environment.
+
+        .. warning::
+            Since solving the optimization problem requires obtaining physical quantities with
+            practical significance from state observations, the Barrier Function Adapter does not
+            support normalization of observations.
+
+        Args:
+            obs_normalize (bool, optional): Whether to normalize the observation. Defaults to False.
+            reward_normalize (bool, optional): Whether to normalize the reward. Defaults to True.
+            cost_normalize (bool, optional): Whether to normalize the cost. Defaults to True.
+        """
+        assert not obs_normalize, 'Barrier function does not support observation normalization!'
+        if self._env.need_time_limit_wrapper:
+            assert (
+                self._env.max_episode_steps
+            ), 'You must define max_episode_steps as an integer\
+                \nor cancel the use of the time_limit wrapper.'
+            self._env = TimeLimit(
+                self._env,
+                time_limit=self._env.max_episode_steps,
+                device=self._device,
+            )
+        if self._env.need_auto_reset_wrapper:
+            self._env = AutoReset(self._env, device=self._device)
+        if reward_normalize:
+            self._env = RewardNormalize(self._env, device=self._device)
+        if cost_normalize:
+            self._env = CostNormalize(self._env, device=self._device)
+        if self._env.num_envs == 1:
+            self._env = Unsqueeze(self._env, device=self._device)
+
+    def reset_gp_model(self) -> None:
+        """Reset the gaussian processing model of barrier function solver."""
+        self.dynamics_model.reset_gp_model()
+
+    def rollout(  # pylint: disable=too-many-locals,too-many-branches
+        self,
+        steps_per_epoch: int,
+        agent: ConstraintActorCritic,
+        buffer: VectorOnPolicyBuffer,
+        logger: Logger,
+    ) -> None:
+        """Rollout the environment with barrier function controller.
+
+        Args:
+            steps_per_epoch (int): Number of steps per epoch.
+            agent (ConstraintActorCritic): Constraint actor-critic, including actor , reward critic
+                and cost critic.
+            buffer (VectorOnPolicyBuffer): Vector on-policy buffer.
+            logger (Logger): Logger, to log ``EpRet``, ``EpCost``, ``EpLen``.
+        """
+        self._reset_log()
+
+        obs, _ = self.reset()
+        self.episode_rollout['obs'] = []
+        self.episode_rollout['final_act'] = []
+        for step in track(
+            range(steps_per_epoch),
+            description=f'Processing rollout for epoch: {logger.current_epoch}...',
+        ):
+            with torch.no_grad():
+                value_r = agent.reward_critic(obs)[0]
+                value_c = agent.cost_critic(obs)[0]
+                act_dist = agent.actor(obs)
+                act_mean, act_std = act_dist.mean, agent.actor.std
+
+                safe_act = self.get_safe_action(
+                    obs,
+                    act_mean,
+                    act_std,
+                )
+                logp = agent.actor.log_prob(safe_act)
+
+            self.episode_rollout['obs'].append(obs)
+            self.episode_rollout['final_act'].append(safe_act)
+
+            next_obs, reward, cost, terminated, truncated, info = self.step(safe_act)
+            self._log_value(reward=reward, cost=cost, info=info)
+
+            logger.store({'Value/reward': value_r})
+
+            buffer.store(
+                obs=obs,
+                act=safe_act,
+                reward=reward,
+                cost=cost,
+                value_r=value_r,
+                value_c=value_c,
+                logp=logp,
+            )
+
+            obs = next_obs
+            epoch_end = step >= steps_per_epoch
+
+            if epoch_end:
+                num_dones = int(terminated.contiguous().sum())
+                if self._env.num_envs - num_dones:
+                    logger.log(
+                        f'\nWarning: trajectory cut off when rollout by epoch\
+                            in {self._env.num_envs - num_dones} of {self._env.num_envs} environments.',
+                    )
+
+            for idx, (done, time_out) in enumerate(zip(terminated, truncated)):
+                if epoch_end or done or time_out:
+                    last_value_r = torch.zeros(1)
+                    last_value_c = torch.zeros(1)
+                    if not done:
+                        if epoch_end:
+                            _, last_value_r, last_value_c, _ = agent.step(obs[idx])
+                        if time_out:
+                            _, last_value_r, last_value_c, _ = agent.step(
+                                obs[idx],
+                            )
+                        last_value_r = last_value_r.unsqueeze(0)
+                        last_value_c = last_value_c.unsqueeze(0)
+
+                    if done or time_out:
+                        self._log_metrics(logger, idx)
+                        compensator_loss = self.compensator.update(
+                            torch.cat(self.episode_rollout['obs']),
+                            torch.cat(self.episode_rollout['approx_compensating_act']),
+                            torch.cat(self.episode_rollout['compensating_act']),
+                        )
+                        logger.store({'Value/Loss_compensator': compensator_loss.item()})
+                        self.dynamics_model.update_gp_dynamics(
+                            obs=torch.cat(self.episode_rollout['obs']),  # type: ignore
+                            act=torch.cat(self.episode_rollout['final_act']),  # type: ignore
+                        )
+
+                        self.episode_rollout['obs'] = []
+                        self.episode_rollout['final_act'] = []
+                        self.episode_rollout['approx_compensating_act'] = []
+                        self.episode_rollout['compensating_act'] = []
+
+                        self._reset_log(idx)
+                        obs, _ = self.reset()
+                    buffer.finish_path(last_value_r, last_value_c, idx)
+        self.first_iter = False
+        self.reset_gp_model()
+
+    def get_safe_action(
+        self,
+        obs: torch.Tensor,
+        act_mean: torch.Tensor,
+        act_std: torch.Tensor,
+    ) -> torch.Tensor:
+        """Computes a safe action by applying compensatory actions.
+
+        .. note::
+            This is the core method of the CBF method. Users can modify this function to implement
+            customized action mapping.
+
+        Args:
+            obs (torch.Tensor): The current observation from the environment.
+            act_mean (torch.Tensor): The mean of proposed action to be controlled for safety.
+            act_std (torch.Tensor): The standard deviation of proposed action to be controlled for safety.
+
+        Returns:
+            list(torch.Tensor): The safe actions for interaction and compensating actions for compensator training.
+        """
+        with torch.no_grad():
+            approx_compensating_act = self.compensator(obs=obs)
+            compensated_act_mean_raw = act_mean + approx_compensating_act
+
+            [f, g, x, std] = self.dynamics_model.get_gp_dynamics(
+                obs,
+                use_prev_model=not self.first_iter,
+            )
+            compensating_act = self.solver.control_barrier(
+                original_action=compensated_act_mean_raw,
+                f=f,
+                g=g,
+                x=x,
+                std=std,
+            )
+
+            compensated_act_mean = compensated_act_mean_raw + compensating_act
+            safe_act = torch.normal(compensated_act_mean, act_std)
+            self.episode_rollout['compensating_act'].append(compensating_act)
+            self.episode_rollout['approx_compensating_act'].append(approx_compensating_act)
+
+        return safe_act
+
+    @property
+    def gp_models(self) -> list[GaussianProcessRegressor]:
+        """Return the gp models to be saved."""
+        return self.dynamics_model.gp_models
diff --git a/omnisafe/adapter/beta_barrier_function_adapter.py b/omnisafe/adapter/beta_barrier_function_adapter.py
new file mode 100644
index 000000000..1ab488d88
--- /dev/null
+++ b/omnisafe/adapter/beta_barrier_function_adapter.py
@@ -0,0 +1,238 @@
+# Copyright 2024 OmniSafe Team. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Barrier Function Adapter with Beta Distribution for OmniSafe."""
+
+from __future__ import annotations
+
+from typing import Callable
+
+import numpy as np
+import torch
+from rich.progress import track
+
+from omnisafe.adapter.onpolicy_adapter import OnPolicyAdapter
+from omnisafe.common.buffer import VectorOnPolicyBuffer
+from omnisafe.common.logger import Logger
+from omnisafe.envs.wrapper import AutoReset, CostNormalize, RewardNormalize, TimeLimit, Unsqueeze
+from omnisafe.models.actor_critic.constraint_actor_critic import ConstraintActorCritic
+from omnisafe.utils.config import Config
+
+
+# pylint: disable-next=too-many-locals
+def cbf(state: np.ndarray, eta: float = 0.99) -> tuple[np.ndarray, np.ndarray]:
+    """Calculates the Control Barrier Function (CBF) constraints.
+
+    Args:
+        state (np.ndarray | None): A numpy array containing the pendulum's current angular position
+        (theta) and angular velocity (thetadot).
+        eta (float): A scaling factor used to adjust the safety bounds.
+
+    Returns:
+        tuple containing two elements: 1. The minimum control torque that keeps the pendulum within
+        the safety bounds. 2. The maximum control torque that keeps the pendulum within the safety
+        bounds.
+
+    Raises:
+        ValueError: If the `eta` value is not within the open interval (0, 1).
+    """
+    g = 9.8
+    m = 1
+    length = 1
+    tau = 5e-2
+    theta_safety_bounds = [-1.0, 1.0]
+    torque_bounds = [-15.0, 15.0]
+    if (eta > 1 - 1e-3) or (eta < 1e-5):
+        raise ValueError('eta should be inside (0, 1)')
+    c1 = (3 * g) / (2 * length)
+    c2 = 3 / (m * (length**2))
+
+    theta, thetadot = state[0], state[1]
+    theta_min, theta_max = theta_safety_bounds[0], theta_safety_bounds[1]
+    thetadot_min, thetadot_max = -np.inf, np.inf
+    u_min1 = (1 / c2) * (
+        ((1 / (tau**2)) * (-eta * (theta - theta_min) - tau * thetadot)) - c1 * np.sin(theta)
+    )
+    u_max1 = (1 / c2) * (
+        ((1 / (tau**2)) * (eta * (theta_max - theta) - tau * thetadot)) - c1 * np.sin(theta)
+    )
+
+    u_min2 = (1 / c2) * (((1 / (tau)) * (-eta * (thetadot - thetadot_min))) - c1 * np.sin(theta))
+    u_max2 = (1 / c2) * (((1 / (tau)) * (eta * (thetadot_max - thetadot))) - c1 * np.sin(theta))
+
+    u_min = max(u_min1, u_min2, torque_bounds[0])
+    u_max = min(u_max1, u_max2, torque_bounds[1])
+
+    return (u_min, u_max)
+
+
+def vectorize_f(f: Callable) -> Callable:
+    """Vectorize the function.
+
+    Args:
+        f (callable): A function that accepts 1D numpy arrays and returns a tuple (lower_bound, upper_bound).
+
+    Returns:
+        callable: A vectorized function that can process batches of torch tensors and return pairs of torch tensors.
+    """
+
+    def vectorized_f_(obs: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
+        """Inner function to process the torch tensor batch.
+
+        Args:
+            obs (torch.Tensor): A batch of observations as torch tensors.
+
+        Returns:
+            tuple: Two torch tensors representing the lower and upper bounds for each observation in the batch.
+        """
+        device = obs.device
+        obs = obs.cpu().detach().numpy()
+
+        batch_size = obs.shape[0]
+        lbs = torch.zeros([batch_size, 1])
+        ubs = torch.zeros([batch_size, 1])
+        for i in range(batch_size):
+            lbs[i], ubs[i] = f(obs[i])
+
+        lbs = torch.FloatTensor(lbs).reshape(batch_size, 1).to(device)
+        ubs = torch.FloatTensor(ubs).reshape(batch_size, 1).to(device)
+
+        return lbs, ubs
+
+    return vectorized_f_
+
+
+class BetaBarrierFunctionAdapter(OnPolicyAdapter):
+    """Barrier Function Adapter with Beta Distribution for OmniSafe.
+
+    Args:
+        env_id (str): The environment id.
+        num_envs (int): The number of parallel environments.
+        seed (int): The random seed.
+        cfgs (Config): The configuration passed from yaml file.
+    """
+
+    def __init__(self, env_id: str, num_envs: int, seed: int, cfgs: Config) -> None:
+        """Initialize an instance of :class:`BetaBarrierFunctionAdapte`."""
+        super().__init__(env_id, num_envs, seed, cfgs)
+        self.constraint_fn: Callable = vectorize_f(cbf)
+
+    def _wrapper(
+        self,
+        obs_normalize: bool = False,
+        reward_normalize: bool = True,
+        cost_normalize: bool = True,
+    ) -> None:
+        """Wrapper the environment.
+
+        .. warning::
+            Since solving the optimization problem requires obtaining physical quantities with
+            practical significance from state observations, the Beta Barrier Function Adapter does
+            not support normalization of observations.
+
+        Args:
+            obs_normalize (bool, optional): Whether to normalize the observation. Defaults to False.
+            reward_normalize (bool, optional): Whether to normalize the reward. Defaults to True.
+            cost_normalize (bool, optional): Whether to normalize the cost. Defaults to True.
+        """
+        assert not obs_normalize, 'Barrier function does not support observation normalization!'
+        if self._env.need_time_limit_wrapper:
+            assert (
+                self._env.max_episode_steps
+            ), 'You must define max_episode_steps as an integer\
+                \nor cancel the use of the time_limit wrapper.'
+            self._env = TimeLimit(
+                self._env,
+                time_limit=self._env.max_episode_steps,
+                device=self._device,
+            )
+        if self._env.need_auto_reset_wrapper:
+            self._env = AutoReset(self._env, device=self._device)
+        if reward_normalize:
+            self._env = RewardNormalize(self._env, device=self._device)
+        if cost_normalize:
+            self._env = CostNormalize(self._env, device=self._device)
+        if self._env.num_envs == 1:
+            self._env = Unsqueeze(self._env, device=self._device)
+
+    def rollout(  # pylint: disable=too-many-locals
+        self,
+        steps_per_epoch: int,
+        agent: ConstraintActorCritic,
+        buffer: VectorOnPolicyBuffer,
+        logger: Logger,
+    ) -> None:
+        """Rollout the environment and store the data in the buffer.
+
+        Args:
+            steps_per_epoch (int): Number of steps per epoch.
+            agent (ConstraintActorCritic): Constraint actor-critic, including actor , reward critic
+                and cost critic.
+            buffer (VectorOnPolicyBuffer): Vector on-policy buffer.
+            logger (Logger): Logger, to log ``EpRet``, ``EpCost``, ``EpLen``.
+        """
+        self._reset_log()
+        obs, _ = self.reset()
+        for step in track(
+            range(steps_per_epoch),
+            description=f'Processing rollout for epoch: {logger.current_epoch}...',
+        ):
+            with torch.no_grad():
+                act, value_r, value_c, logp = agent.step(obs)
+                lb, ub = self.constraint_fn(obs)
+                final_act = lb + (ub - lb) * act
+
+            next_obs, reward, cost, terminated, truncated, info = self.step(final_act)
+
+            self._log_value(reward=reward, cost=cost, info=info)
+            logger.store({'Value/reward': value_r})
+
+            buffer.store(
+                obs=obs,
+                act=act,
+                reward=reward,
+                cost=cost,
+                value_r=value_r,
+                value_c=value_c,
+                logp=logp,
+            )
+
+            obs = next_obs
+            epoch_end = step >= steps_per_epoch
+            for idx, (done, time_out) in enumerate(zip(terminated, truncated)):
+                if epoch_end or done or time_out:
+                    last_value_r = torch.zeros(1)
+                    last_value_c = torch.zeros(1)
+                    if not done:
+                        if epoch_end:
+                            logger.log(
+                                f'Warning: trajectory cut off when rollout by epoch at {self._ep_len[idx]} steps.',
+                            )
+                            _, last_value_r, last_value_c, _ = agent.step(obs[idx])
+                        if time_out:
+                            _, last_value_r, last_value_c, _ = agent.step(
+                                obs[idx],
+                            )
+                        last_value_r = last_value_r.unsqueeze(0)
+                        last_value_c = last_value_c.unsqueeze(0)
+
+                    if done or time_out:
+                        self._log_metrics(logger, idx)
+                        self._reset_log(idx)
+
+                        self._ep_ret[idx] = 0.0
+                        self._ep_cost[idx] = 0.0
+                        self._ep_len[idx] = 0.0
+                        obs, _ = self.reset()
+                    buffer.finish_path(last_value_r, last_value_c, idx)
diff --git a/omnisafe/adapter/modelbased_adapter.py b/omnisafe/adapter/modelbased_adapter.py
index 8abbd90d7..6e2154531 100644
--- a/omnisafe/adapter/modelbased_adapter.py
+++ b/omnisafe/adapter/modelbased_adapter.py
@@ -269,8 +269,8 @@ def rollout(  # pylint: disable=too-many-arguments,too-many-locals
 
         update_actor_critic_time = 0.0
         update_dynamics_time = 0.0
-        if use_eval:
-            eval_time = 0.0
+
+        eval_time = 0.0
 
         epoch_steps = 0
 
diff --git a/omnisafe/adapter/offpolicy_barrier_function_adapter.py b/omnisafe/adapter/offpolicy_barrier_function_adapter.py
new file mode 100644
index 000000000..20b4abdb8
--- /dev/null
+++ b/omnisafe/adapter/offpolicy_barrier_function_adapter.py
@@ -0,0 +1,256 @@
+# Copyright 2024 OmniSafe Team. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""OffPolicy Barrier Function Adapter for OmniSafe."""
+
+from __future__ import annotations
+
+from typing import Any
+
+import torch
+from sklearn.gaussian_process import GaussianProcessRegressor
+
+from omnisafe.adapter.offpolicy_adapter import OffPolicyAdapter
+from omnisafe.common.barrier_comp import BarrierCompensator
+from omnisafe.common.barrier_solver import PendulumSolver
+from omnisafe.common.buffer import VectorOffPolicyBuffer
+from omnisafe.common.gp_model import DynamicsModel
+from omnisafe.common.logger import Logger
+from omnisafe.envs.wrapper import CostNormalize, RewardNormalize, Unsqueeze
+from omnisafe.models.actor_critic.constraint_actor_q_critic import ConstraintActorQCritic
+from omnisafe.utils.config import Config
+
+
+class OffPolicyBarrierFunctionAdapter(OffPolicyAdapter):
+    """OffPolicy Barrier Function Adapter for OmniSafe.
+
+    :class:`OffPolicyBarrierFunctionAdapter` is used to adapt the environment with a CBF controller,
+    mapping the agent actions from unsafe ones to safe ones.
+
+    Args:
+        env_id (str): The environment id.
+        num_envs (int): The number of environments.
+        seed (int): The random seed.
+        cfgs (Config): The configuration.
+
+    Attributes:
+        solver (PendulumSolver): The solver used for the environment, currently supporting
+                                ``Pendulum-v1``.
+        dynamics_model (DynamicsModel): The dynamics model used to predict the environment's behavior.
+        compensator (BarrierCompensator): The compensator used to approximate previous actions.
+        first_iter (bool): A flag indicating if it is the first iteration.
+        episode_rollout (dict[str, Any]): A dictionary to store the episode rollout information,
+                                          including observations and various actions,
+                                          useful for updating compensator.
+    """
+
+    def __init__(self, env_id: str, num_envs: int, seed: int, cfgs: Config) -> None:
+        """Initialize an instance of :class:`OffPolicyBarrierFunctionAdapter`."""
+        super().__init__(env_id, num_envs, seed, cfgs)
+
+        if env_id == 'Pendulum-v1':
+            self.solver: PendulumSolver = PendulumSolver(
+                action_size=self.action_space.shape[0],  # type: ignore
+                device=self._device,
+            )
+            self.dynamics_model: DynamicsModel = DynamicsModel(
+                observation_size=self.observation_space.shape[0],  # type: ignore
+            )
+        else:
+            raise NotImplementedError(f'Please implement solver for {env_id} !')
+        self.compensator: BarrierCompensator = BarrierCompensator(
+            obs_dim=self.observation_space.shape[0],  # type: ignore
+            act_dim=self.action_space.shape[0],  # type: ignore
+            cfgs=cfgs.compensator_cfgs,
+        ).to(self._device)
+
+        self.first_iter: bool = True
+        self.episode_rollout: dict[str, Any] = {}
+        self.episode_rollout['obs'] = []
+        self.episode_rollout['final_act'] = []
+        self.episode_rollout['approx_compensating_act'] = []
+        self.episode_rollout['compensating_act'] = []
+
+    def _wrapper(
+        self,
+        obs_normalize: bool = False,
+        reward_normalize: bool = True,
+        cost_normalize: bool = True,
+    ) -> None:
+        assert not obs_normalize, 'Barrier function does not support observation normalization!'
+        if reward_normalize:
+            self._env = RewardNormalize(self._env, device=self._device)
+        if cost_normalize:
+            self._env = CostNormalize(self._env, device=self._device)
+        if self._env.num_envs == 1:
+            self._env = Unsqueeze(self._env, device=self._device)
+
+    def eval_policy(  # pylint: disable=too-many-locals
+        self,
+        episode: int,
+        agent: ConstraintActorQCritic,
+        logger: Logger,
+    ) -> None:
+        """Rollout the environment in an evaluation environment.
+
+        Args:
+            episode (int): Number of episodes.
+            agent (ConstraintActorCritic): Agent.
+            logger (Logger): Logger, to log ``EpRet``, ``EpCost``, ``EpLen``.
+        """
+        assert self._eval_env
+        for _ in range(episode):
+            ep_ret, ep_cost, ep_len = 0.0, 0.0, 0
+            obs, _ = self._eval_env.reset()
+            obs = obs.to(self._device)
+
+            done = False
+            while not done:
+                act = agent.step(obs, deterministic=True)
+                final_act = self.get_safe_action(obs=obs, act=act, is_eval=True)
+                obs, reward, cost, terminated, truncated, info = self._eval_env.step(final_act)
+                obs, reward, cost, terminated, truncated = (
+                    torch.as_tensor(x, dtype=torch.float32, device=self._device)
+                    for x in (obs, reward, cost, terminated, truncated)
+                )
+                ep_ret += info.get('original_reward', reward).cpu()
+                ep_cost += info.get('original_cost', cost).cpu()
+                ep_len += 1
+                done = bool(terminated[0].item()) or bool(truncated[0].item())
+
+            logger.store(
+                {
+                    'Metrics/TestEpRet': ep_ret,
+                    'Metrics/TestEpCost': ep_cost,
+                    'Metrics/TestEpLen': ep_len,
+                },
+            )
+
+    def reset_gp_model(self) -> None:
+        """Reset the gaussian processing model of barrier function solver."""
+        self.dynamics_model.reset_gp_model()
+
+    def rollout(  # pylint: disable=too-many-locals
+        self,
+        rollout_step: int,
+        agent: ConstraintActorQCritic,
+        buffer: VectorOffPolicyBuffer,
+        logger: Logger,
+        use_rand_action: bool,
+    ) -> None:
+        """Rollout in off-policy manner with the ``dynamics_model``, ``solver`` and ``compensator``.
+
+        Args:
+            rollout_step (int): Number of rollout steps.
+            agent (ConstraintActorCritic): Constraint actor-critic, including actor, reward critic,
+                and cost critic.
+            buffer (VectorOnPolicyBuffer): Vector on-policy buffer.
+            logger (Logger): Logger, to log ``EpRet``, ``EpCost``, ``EpLen``.
+            use_rand_action (bool): Whether to use random action.
+        """
+        for _ in range(rollout_step):
+            if use_rand_action:
+                act = (torch.rand(self.action_space.shape) * 2 - 1).unsqueeze(0).to(self._device)  # type: ignore
+            else:
+                act = agent.actor.predict(self._current_obs, deterministic=False)
+
+            final_act = self.get_safe_action(self._current_obs, act)
+
+            self.episode_rollout['obs'].append(self._current_obs)
+            self.episode_rollout['final_act'].append(final_act)
+
+            next_obs, reward, cost, terminated, truncated, info = self.step(final_act)
+            self._log_value(reward=reward, cost=cost, info=info)
+
+            buffer.store(
+                obs=self._current_obs,
+                act=act,
+                reward=reward,
+                cost=cost,
+                done=torch.logical_and(terminated, torch.logical_xor(terminated, truncated)),
+                next_obs=next_obs,
+            )
+
+            self._current_obs = next_obs
+            for idx, done in enumerate(torch.logical_or(terminated, truncated)):
+                if done:
+                    self._log_metrics(logger, idx)
+                    compensator_loss = self.compensator.update(
+                        torch.cat(self.episode_rollout['obs']),
+                        torch.cat(self.episode_rollout['approx_compensating_act']),
+                        torch.cat(self.episode_rollout['compensating_act']),
+                    )
+                    logger.store({'Value/Loss_compensator': compensator_loss.item()})
+                    self.dynamics_model.update_gp_dynamics(
+                        obs=torch.cat(self.episode_rollout['obs']),  # type: ignore
+                        act=torch.cat(self.episode_rollout['final_act']),  # type: ignore
+                    )
+
+                    self.episode_rollout['obs'] = []
+                    self.episode_rollout['final_act'] = []
+                    self.episode_rollout['approx_compensating_act'] = []
+                    self.episode_rollout['compensating_act'] = []
+
+                    self._reset_log(idx)
+                    self._current_obs, _ = self._env.reset()
+                    self.first_iter = False
+                    self.reset_gp_model()
+
+    def get_safe_action(
+        self,
+        obs: torch.Tensor,
+        act: torch.Tensor,
+        is_eval: bool = False,
+    ) -> torch.Tensor:
+        """Computes a safe action by applying compensatory actions.
+
+        .. note::
+            This is the core method of the CBF method. Users can modify this function to implement
+            customized action mapping.
+
+        Args:
+            obs (torch.Tensor): The current observation from the environment.
+            act (torch.Tensor): The proposed action to be controlled for safety.
+            is_eval (bool, optional): A flag to indicate whether this is an evaluation phase, defaulting to False.
+
+        Returns:
+            torch.Tensor: The safe action to be executed in the environment.
+        """
+        with torch.no_grad():
+            approx_compensating_act = self.compensator(obs=obs)
+            compensated_act_mean_raw = act + approx_compensating_act
+
+            [f, g, x, std] = self.dynamics_model.get_gp_dynamics(
+                obs,
+                use_prev_model=not self.first_iter,
+            )
+            compensating_act = self.solver.control_barrier(
+                original_action=compensated_act_mean_raw,
+                f=f,
+                g=g,
+                x=x,
+                std=std,
+            )
+            safe_act = compensated_act_mean_raw + compensating_act
+
+            if not is_eval:
+                self.episode_rollout['compensating_act'].append(compensating_act)
+                self.episode_rollout['approx_compensating_act'].append(approx_compensating_act)
+
+        return safe_act
+
+    @property
+    def gp_models(self) -> list[GaussianProcessRegressor]:
+        """Return the gp models to be saved."""
+        return self.dynamics_model.gp_models
diff --git a/omnisafe/adapter/robust_barrier_function_adapter.py b/omnisafe/adapter/robust_barrier_function_adapter.py
new file mode 100644
index 000000000..cc5a22e02
--- /dev/null
+++ b/omnisafe/adapter/robust_barrier_function_adapter.py
@@ -0,0 +1,222 @@
+# Copyright 2024 OmniSafe Team. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Robust Barrier Function Adapter for OmniSafe."""
+
+from __future__ import annotations
+
+from typing import Any
+
+import torch
+
+from omnisafe.adapter.offpolicy_adapter import OffPolicyAdapter
+from omnisafe.common.buffer import VectorOffPolicyBuffer
+from omnisafe.common.logger import Logger
+from omnisafe.common.robust_barrier_solver import CBFQPLayer
+from omnisafe.common.robust_gp_model import DynamicsModel
+from omnisafe.envs.wrapper import CostNormalize, RewardNormalize, Unsqueeze
+from omnisafe.models.actor_critic.constraint_actor_q_critic import ConstraintActorQCritic
+from omnisafe.typing import OmnisafeSpace
+from omnisafe.utils.config import Config
+
+
+class RobustBarrierFunctionAdapter(OffPolicyAdapter):
+    """Robust Barrier Function Adapter for OmniSafe.
+
+    :class:`RobustBarrierFunctionAdapter` is used to adapt the environment with RCBF controller.
+
+    Args:
+        env_id (str): The environment id.
+        num_envs (int): The number of environments.
+        seed (int): The random seed.
+        cfgs (Config): The configuration.
+    """
+
+    def __init__(self, env_id: str, num_envs: int, seed: int, cfgs: Config) -> None:
+        """Initialize an instance of :class:`RobustBarrierFunctionAdapter`."""
+        super().__init__(env_id, num_envs, seed, cfgs)
+        self.solver: CBFQPLayer
+        self.dynamics_model: DynamicsModel
+        self._current_steps = 0
+        self._num_episodes = 0
+
+    def _wrapper(
+        self,
+        obs_normalize: bool = False,
+        reward_normalize: bool = True,
+        cost_normalize: bool = True,
+    ) -> None:
+        """Wrapper the environment.
+
+        .. warning::
+            Since solving the optimization problem requires obtaining physical quantities with
+            practical significance from state observations, the Barrier Function Adapter does not
+            support normalization of observations.
+
+        Args:
+            obs_normalize (bool, optional): Whether to normalize the observation. Defaults to False.
+            reward_normalize (bool, optional): Whether to normalize the reward. Defaults to True.
+            cost_normalize (bool, optional): Whether to normalize the cost. Defaults to True.
+        """
+        assert not obs_normalize, 'Barrier function does not support observation normalization!'
+        if reward_normalize:
+            self._env = RewardNormalize(self._env, device=self._device)
+        if cost_normalize:
+            self._env = CostNormalize(self._env, device=self._device)
+        if self._env.num_envs == 1:
+            self._env = Unsqueeze(self._env, device=self._device)
+
+    def set_solver(self, solver: CBFQPLayer) -> None:
+        """Set the barrier function solver for Pendulum environment."""
+        self.solver = solver
+        self.solver.env = self._env  # type: ignore
+
+    def set_dynamics_model(self, dynamics_model: DynamicsModel) -> None:
+        """Set the dynamics model."""
+        self.dynamics_model = dynamics_model
+        self.dynamics_model.env = self._env  # type: ignore
+
+    def eval_policy(  # pylint: disable=too-many-locals
+        self,
+        episode: int,
+        agent: ConstraintActorQCritic,
+        logger: Logger,
+    ) -> None:
+        """Rollout the environment with deterministic agent action.
+
+        Args:
+            episode (int): Number of episodes.
+            agent (ConstraintActorCritic): Agent.
+            logger (Logger): Logger, to log ``EpRet``, ``EpCost``, ``EpLen``.
+        """
+        assert self._eval_env
+        for _ in range(episode):
+            ep_ret, ep_cost, ep_len = 0.0, 0.0, 0
+            obs, _ = self._eval_env.reset()
+            obs = obs.to(self._device)
+
+            done = False
+            while not done:
+                act = agent.step(obs, deterministic=True)
+                obs, reward, cost, terminated, truncated, info = self._eval_env.step(act)
+                obs, reward, cost, terminated, truncated = (
+                    torch.as_tensor(x, dtype=torch.float32, device=self._device)
+                    for x in (obs, reward, cost, terminated, truncated)
+                )
+                ep_ret += info.get('original_reward', reward).cpu()
+                ep_cost += info.get('original_cost', cost).cpu()
+                ep_len += 1
+                done = bool(terminated[0].item()) or bool(truncated[0].item())
+
+            logger.store(
+                {
+                    'Metrics/TestEpRet': ep_ret,
+                    'Metrics/TestEpCost': ep_cost,
+                    'Metrics/TestEpLen': ep_len,
+                },
+            )
+
+    def rollout(  # pylint: disable=too-many-locals
+        self,
+        rollout_step: int,
+        agent: ConstraintActorQCritic,
+        buffer: VectorOffPolicyBuffer,
+        logger: Logger,
+        use_rand_action: bool,
+    ) -> None:
+        """Rollout the environment and store the data in the buffer.
+
+        .. warning::
+            As OmniSafe uses :class:`AutoReset` wrapper, the environment will be reset automatically,
+            so the final observation will be stored in ``info['final_observation']``.
+
+        Args:
+            rollout_step (int): Number of rollout steps.
+            agent (ConstraintActorCritic): Constraint actor-critic, including actor, reward critic,
+                and cost critic.
+            buffer (VectorOnPolicyBuffer): Vector on-policy buffer.
+            logger (Logger): Logger, to log ``EpRet``, ``EpCost``, ``EpLen``.
+            use_rand_action (bool): Whether to use random action.
+        """
+        for _ in range(rollout_step):
+            state = self.dynamics_model.get_state(self._current_obs)
+            self._current_steps += 1
+            if use_rand_action:
+                act = (torch.rand(self.action_space.shape) * 2 - 1).unsqueeze(0).to(self._device)  # type: ignore
+            else:
+                act = agent.step(self._current_obs, deterministic=False)
+
+            final_act = self.get_safe_action(obs=self._current_obs, act=act)
+
+            next_obs, reward, cost, terminated, truncated, info = self.step(final_act)
+            self._log_value(reward=reward, cost=cost, info=info)
+
+            buffer.store(
+                obs=self._current_obs,
+                act=final_act,
+                reward=reward,
+                cost=cost,
+                done=torch.logical_and(terminated, torch.logical_xor(terminated, truncated)),
+                next_obs=next_obs,
+            )
+
+            if (
+                self._ep_len[0] % 2 == 0
+                and self._num_episodes < self._cfgs.dynamics_model_cfgs.gp_max_episodes
+            ):
+                next_state = self.dynamics_model.get_state(next_obs)
+                self.dynamics_model.append_transition(
+                    state.cpu().detach().numpy(),
+                    final_act.cpu().detach().numpy(),
+                    next_state.cpu().detach().numpy(),
+                )
+
+            self._current_obs = next_obs
+            for idx, done in enumerate(torch.logical_or(terminated, truncated)):
+                if done:
+                    self._log_metrics(logger, idx)
+                    self._reset_log(idx)
+                    self._num_episodes += 1
+                    self._current_obs, _ = self._env.reset()
+
+    @property
+    def safe_action_space(self) -> OmnisafeSpace:
+        """Return the action space in the safe domain."""
+        if hasattr(self._env, 'safe_action_space'):
+            return self._env.safe_action_space
+        return self._env.action_space
+
+    def get_safe_action(self, obs: torch.Tensor, act: torch.Tensor) -> torch.Tensor:
+        """Computes a safe action by applying robust barrier function.
+
+        Args:
+            obs (torch.Tensor): The current observation from the environment.
+            act (torch.Tensor): The proposed action to be evaluated for safety.
+
+        Returns:
+            torch.Tensor: The safe action to be executed in the environment.
+        """
+        state_batch = self.dynamics_model.get_state(obs)
+        mean_pred_batch, sigma_pred_batch = self.dynamics_model.predict_disturbance(state_batch)
+
+        return self.solver.get_safe_action(
+            state_batch,
+            act,
+            mean_pred_batch,
+            sigma_pred_batch,
+        )
+
+    def __getattr__(self, name: str) -> Any:
+        """Return the unwrapped environment attributes."""
+        return getattr(self._env, name)
diff --git a/omnisafe/algorithms/__init__.py b/omnisafe/algorithms/__init__.py
index df6832226..da82ecbea 100644
--- a/omnisafe/algorithms/__init__.py
+++ b/omnisafe/algorithms/__init__.py
@@ -27,9 +27,11 @@
 from omnisafe.algorithms.off_policy import (
     CRABS,
     DDPG,
+    DDPGCBF,
     DDPGPID,
     SAC,
     SACPID,
+    SACRCBF,
     TD3,
     TD3PID,
     DDPGLag,
@@ -51,10 +53,12 @@
     PPO,
     RCPO,
     TRPO,
+    TRPOCBF,
     TRPOPID,
     NaturalPG,
     OnCRPO,
     PolicyGradient,
+    PPOBetaCBF,
     PPOEarlyTerminated,
     PPOLag,
     PPOSaute,
diff --git a/omnisafe/algorithms/off_policy/__init__.py b/omnisafe/algorithms/off_policy/__init__.py
index 80e48e1a0..1e14ebd26 100644
--- a/omnisafe/algorithms/off_policy/__init__.py
+++ b/omnisafe/algorithms/off_policy/__init__.py
@@ -16,11 +16,13 @@
 
 from omnisafe.algorithms.off_policy.crabs import CRABS
 from omnisafe.algorithms.off_policy.ddpg import DDPG
+from omnisafe.algorithms.off_policy.ddpg_cbf import DDPGCBF
 from omnisafe.algorithms.off_policy.ddpg_lag import DDPGLag
 from omnisafe.algorithms.off_policy.ddpg_pid import DDPGPID
 from omnisafe.algorithms.off_policy.sac import SAC
 from omnisafe.algorithms.off_policy.sac_lag import SACLag
 from omnisafe.algorithms.off_policy.sac_pid import SACPID
+from omnisafe.algorithms.off_policy.sac_rcbf import SACRCBF
 from omnisafe.algorithms.off_policy.td3 import TD3
 from omnisafe.algorithms.off_policy.td3_lag import TD3Lag
 from omnisafe.algorithms.off_policy.td3_pid import TD3PID
@@ -36,5 +38,7 @@
     'DDPGPID',
     'TD3PID',
     'SACPID',
+    'SACRCBF',
+    'DDPGCBF',
     'CRABS',
 ]
diff --git a/omnisafe/algorithms/off_policy/ddpg.py b/omnisafe/algorithms/off_policy/ddpg.py
index 517d8c0be..0d698e5f2 100644
--- a/omnisafe/algorithms/off_policy/ddpg.py
+++ b/omnisafe/algorithms/off_policy/ddpg.py
@@ -188,14 +188,9 @@ def _init_log(self) -> None:
             config=self._cfgs,
         )
 
-        what_to_save: dict[str, Any] = {}
-        what_to_save['pi'] = self._actor_critic.actor
-        if self._cfgs.algo_cfgs.obs_normalize:
-            obs_normalizer = self._env.save()['obs_normalizer']
-            what_to_save['obs_normalizer'] = obs_normalizer
-
-        self._logger.setup_torch_saver(what_to_save)
+        self._setup_torch_saver()
         self._logger.torch_save()
+        self._specific_save()
 
         self._logger.register_key(
             'Metrics/EpRet',
@@ -338,6 +333,7 @@ def learn(self) -> tuple[float, float, float]:
             # save model to disk
             if (epoch + 1) % self._cfgs.logger_cfgs.save_model_freq == 0:
                 self._logger.torch_save()
+                self._specific_save()
 
         ep_ret = self._logger.get_stats('Metrics/EpRet')[0]
         ep_cost = self._logger.get_stats('Metrics/EpCost')[0]
@@ -562,3 +558,21 @@ def _log_when_not_update(self) -> None:
                     'Value/cost_critic': 0.0,
                 },
             )
+
+    def _setup_torch_saver(self) -> None:
+        """Define what need to be saved below.
+
+        OmniSafe's main storage interface is based on PyTorch. If you need to save models in other
+        formats, please use :meth:`_specific_save`.
+        """
+        what_to_save: dict[str, Any] = {}
+
+        what_to_save['pi'] = self._actor_critic.actor
+        if self._cfgs.algo_cfgs.obs_normalize:
+            obs_normalizer = self._env.save()['obs_normalizer']
+            what_to_save['obs_normalizer'] = obs_normalizer
+
+        self._logger.setup_torch_saver(what_to_save)
+
+    def _specific_save(self) -> None:
+        """Save some algorithms specific models other than PyTorch format per epoch."""
diff --git a/omnisafe/algorithms/off_policy/ddpg_cbf.py b/omnisafe/algorithms/off_policy/ddpg_cbf.py
new file mode 100644
index 000000000..6df1fcbb3
--- /dev/null
+++ b/omnisafe/algorithms/off_policy/ddpg_cbf.py
@@ -0,0 +1,91 @@
+# Copyright 2024 OmniSafe Team. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Implementation of the DDPG algorithm with Control Barrier Function."""
+# mypy: ignore-errors
+
+
+from __future__ import annotations
+
+import os
+
+import joblib
+
+from omnisafe.adapter.offpolicy_barrier_function_adapter import OffPolicyBarrierFunctionAdapter
+from omnisafe.algorithms import registry
+from omnisafe.algorithms.off_policy.ddpg import DDPG
+from omnisafe.typing import Any
+from omnisafe.utils.distributed import get_rank
+
+
+@registry.register
+# pylint: disable-next=too-many-instance-attributes, too-few-public-methods
+class DDPGCBF(DDPG):
+    """The DDPG algorithm with CBF.
+
+    References:
+        - Title: End-to-end safe reinforcement learning through barrier functions for
+        safety-critical continuous control tasks
+        - Authors: R Cheng, G Orosz, RM Murray, JW Burdick.
+        - URL: `DDPGCBF <https://ojs.aaai.org/index.php/AAAI/article/view/4213/4091>`_
+    """
+
+    def _init_env(self) -> None:
+        super()._init_env()
+        self._env: OffPolicyBarrierFunctionAdapter = OffPolicyBarrierFunctionAdapter(
+            self._env_id,
+            self._cfgs.train_cfgs.vector_env_nums,
+            self._seed,
+            self._cfgs,
+        )
+
+    def _init_log(self) -> None:
+        """Log the DDPGCBF specific information.
+
+        +----------------------------+---------------------------------+
+        | Things to log              | Description                     |
+        +============================+=================================+
+        | Value/Loss_compensator     | The Loss of action compensator. |
+        +----------------------------+---------------------------------+
+        """
+        super()._init_log()
+        self._logger.register_key('Value/Loss_compensator')
+
+    def _specific_save(self) -> None:
+        """Save some algorithms specific models per epoch."""
+        super()._specific_save()
+        if get_rank() == 0:
+            path = os.path.join(
+                self._logger.log_dir,
+                'gp_model_save',
+                f'gaussian_process_regressor_{self._logger.current_epoch}.pkl',
+            )
+            os.makedirs(os.path.dirname(path), exist_ok=True)
+            joblib.dump(self._env.gp_models, path)
+
+    def _setup_torch_saver(self) -> None:
+        """Define what need to be saved below.
+
+        OmniSafe's main storage interface is based on PyTorch. If you need to save models in other
+        formats, please use :meth:`_specific_save`.
+        """
+        what_to_save: dict[str, Any] = {}
+
+        what_to_save['pi'] = self._actor_critic.actor
+        what_to_save['compensator'] = self._env.compensator
+        if self._cfgs.algo_cfgs.obs_normalize:
+            obs_normalizer = self._env.save()['obs_normalizer']
+            what_to_save['obs_normalizer'] = obs_normalizer
+
+        self._logger.setup_torch_saver(what_to_save)
diff --git a/omnisafe/algorithms/off_policy/sac_rcbf.py b/omnisafe/algorithms/off_policy/sac_rcbf.py
new file mode 100644
index 000000000..598c4a14c
--- /dev/null
+++ b/omnisafe/algorithms/off_policy/sac_rcbf.py
@@ -0,0 +1,190 @@
+# Copyright 2024 OmniSafe Team. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Implementation of the Soft Actor-Critic algorithm with Robust Control Barrier Function."""
+# mypy: ignore-errors
+
+from __future__ import annotations
+
+import os
+
+import torch
+from torch import nn
+from torch.nn.utils.clip_grad import clip_grad_norm_
+
+from omnisafe.adapter.robust_barrier_function_adapter import RobustBarrierFunctionAdapter
+from omnisafe.algorithms import registry
+from omnisafe.algorithms.off_policy.sac import SAC
+from omnisafe.common.robust_barrier_solver import CBFQPLayer
+from omnisafe.common.robust_gp_model import DynamicsModel
+from omnisafe.utils.distributed import get_rank
+
+
+@registry.register
+# pylint: disable-next=too-many-instance-attributes, too-few-public-methods
+class SACRCBF(SAC):
+    """The Soft Actor-Critic algorithm with Robust Control Barrier Function.
+
+    References:
+        - Title: The Soft Actor-Critic algorithm with Robust Control Barrier Function
+        - Authors: Tuomas Haarnoja, Aurick Zhou, Pieter Abbeel, Sergey Levine.
+        - URL: `SAC <https://arxiv.org/abs/1801.01290>`_
+    """
+
+    def _init_env(self) -> None:
+        self._env: RobustBarrierFunctionAdapter = RobustBarrierFunctionAdapter(
+            self._env_id,
+            self._cfgs.train_cfgs.vector_env_nums,
+            self._seed,
+            self._cfgs,
+        )
+        if self._env_id == 'Unicycle':
+            solver = CBFQPLayer(
+                env=self._env,
+                device=self._cfgs.train_cfgs.device,
+                gamma_b=self._cfgs.cbf_cfgs.gamma_b,
+                l_p=self._cfgs.cbf_cfgs.l_p,
+            )
+            dynamics_model = DynamicsModel(env=self._env)
+        else:
+            raise NotImplementedError(f'Please implement solver for {self._env_id} !')
+
+        self._env.set_dynamics_model(dynamics_model=dynamics_model)
+        self._env.set_solver(solver=solver)
+
+        assert (
+            self._cfgs.algo_cfgs.steps_per_epoch % self._cfgs.train_cfgs.vector_env_nums == 0
+        ), 'The number of steps per epoch is not divisible by the number of environments.'
+
+        assert (
+            int(self._cfgs.train_cfgs.total_steps) % self._cfgs.algo_cfgs.steps_per_epoch == 0
+        ), 'The total number of steps is not divisible by the number of steps per epoch.'
+        self._epochs: int = int(
+            self._cfgs.train_cfgs.total_steps // self._cfgs.algo_cfgs.steps_per_epoch,
+        )
+        self._epoch: int = 0
+        self._steps_per_epoch: int = (
+            self._cfgs.algo_cfgs.steps_per_epoch // self._cfgs.train_cfgs.vector_env_nums
+        )
+
+        self._update_cycle: int = self._cfgs.algo_cfgs.update_cycle
+        assert (
+            self._steps_per_epoch % self._update_cycle == 0
+        ), 'The number of steps per epoch is not divisible by the number of steps per sample.'
+        self._samples_per_epoch: int = self._steps_per_epoch // self._update_cycle
+        self._update_count: int = 0
+
+    def _update_actor(
+        self,
+        obs: torch.Tensor,
+    ) -> None:
+        super()._update_actor(obs)
+
+        if self._cfgs.algo_cfgs.auto_alpha:
+            with torch.no_grad():
+                action = self._actor_critic.actor.predict(obs, deterministic=False)
+                action = self._env.get_safe_action(obs, action)
+                log_prob = self._actor_critic.actor.log_prob(action)
+            alpha_loss = -self._log_alpha * (log_prob + self._target_entropy).mean()
+
+            self._alpha_optimizer.zero_grad()
+            alpha_loss.backward()
+            self._alpha_optimizer.step()
+            self._logger.store(
+                {
+                    'Loss/alpha_loss': alpha_loss.mean().item(),
+                },
+            )
+        self._logger.store(
+            {
+                'Value/alpha': self._alpha,
+            },
+        )
+
+    def _update_reward_critic(
+        self,
+        obs: torch.Tensor,
+        action: torch.Tensor,
+        reward: torch.Tensor,
+        done: torch.Tensor,
+        next_obs: torch.Tensor,
+    ) -> None:
+        with torch.no_grad():
+            next_action = self._actor_critic.actor.predict(next_obs, deterministic=False)
+            next_action = self._env.get_safe_action(next_obs, next_action)
+            next_logp = self._actor_critic.actor.log_prob(next_action)
+            next_q1_value_r, next_q2_value_r = self._actor_critic.target_reward_critic(
+                next_obs,
+                next_action,
+            )
+            next_q_value_r = torch.min(next_q1_value_r, next_q2_value_r) - next_logp * self._alpha
+            target_q_value_r = reward + self._cfgs.algo_cfgs.gamma * (1 - done) * next_q_value_r
+
+        q1_value_r, q2_value_r = self._actor_critic.reward_critic(obs, action)
+        loss = nn.functional.mse_loss(q1_value_r, target_q_value_r) + nn.functional.mse_loss(
+            q2_value_r,
+            target_q_value_r,
+        )
+
+        if self._cfgs.algo_cfgs.use_critic_norm:
+            for param in self._actor_critic.reward_critic.parameters():
+                loss += param.pow(2).sum() * self._cfgs.algo_cfgs.critic_norm_coeff
+
+        self._actor_critic.reward_critic_optimizer.zero_grad()
+        loss.backward()
+
+        if self._cfgs.algo_cfgs.max_grad_norm:
+            clip_grad_norm_(
+                self._actor_critic.reward_critic.parameters(),
+                self._cfgs.algo_cfgs.max_grad_norm,
+            )
+        self._actor_critic.reward_critic_optimizer.step()
+        self._logger.store(
+            {
+                'Loss/Loss_reward_critic': loss.mean().item(),
+                'Value/reward_critic': q1_value_r.mean().item(),
+            },
+        )
+
+    def _loss_pi(
+        self,
+        obs: torch.Tensor,
+    ) -> torch.Tensor:
+        action = self._actor_critic.actor.predict(obs, deterministic=False)
+        action = self._env.get_safe_action(obs, action)
+        log_prob = self._actor_critic.actor.log_prob(action)
+        q1_value_r, q2_value_r = self._actor_critic.reward_critic(obs, action)
+        return (self._alpha * log_prob - torch.min(q1_value_r, q2_value_r)).mean()
+
+    def _specific_save(self) -> None:
+        """Save some algorithms specific models per epoch."""
+        super()._specific_save()
+        if get_rank() == 0:
+            path = os.path.join(self._logger.log_dir, 'gp_model_save')
+            os.makedirs(path, exist_ok=True)
+            train_x = self._env.dynamics_model.train_x
+            train_y = self._env.dynamics_model.train_y
+            disturb_estimators = self._env.dynamics_model.disturb_estimators
+            weights = []
+            for disturb_estimator in disturb_estimators:
+                weights.append(disturb_estimator.model.state_dict())
+            torch.save(weights, os.path.join(path, f'gp_models_{self._logger.current_epoch}.pkl'))
+            torch.save(
+                train_x,
+                os.path.join(path, f'gp_models_train_x_{self._logger.current_epoch}.pkl'),
+            )
+            torch.save(
+                train_y,
+                os.path.join(path, f'gp_models_train_y_{self._logger.current_epoch}.pkl'),
+            )
diff --git a/omnisafe/algorithms/on_policy/__init__.py b/omnisafe/algorithms/on_policy/__init__.py
index 722ce0b11..8351ecf2d 100644
--- a/omnisafe/algorithms/on_policy/__init__.py
+++ b/omnisafe/algorithms/on_policy/__init__.py
@@ -15,6 +15,7 @@
 """On-policy algorithms."""
 
 from omnisafe.algorithms.on_policy import (
+    barrier_function,
     base,
     early_terminated,
     first_order,
@@ -26,6 +27,7 @@
     second_order,
     simmer,
 )
+from omnisafe.algorithms.on_policy.barrier_function import TRPOCBF, PPOBetaCBF
 from omnisafe.algorithms.on_policy.base import PPO, TRPO, NaturalPG, PolicyGradient
 from omnisafe.algorithms.on_policy.early_terminated import PPOEarlyTerminated, TRPOEarlyTerminated
 from omnisafe.algorithms.on_policy.first_order import CUP, FOCOPS
@@ -49,4 +51,5 @@
     *saute.__all__,
     *second_order.__all__,
     *simmer.__all__,
+    *barrier_function.__all__,
 ]
diff --git a/omnisafe/algorithms/on_policy/barrier_function/__init__.py b/omnisafe/algorithms/on_policy/barrier_function/__init__.py
new file mode 100644
index 000000000..dacdc3c4d
--- /dev/null
+++ b/omnisafe/algorithms/on_policy/barrier_function/__init__.py
@@ -0,0 +1,24 @@
+# Copyright 2023 OmniSafe Team. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Control Barrier Function Safe Reinforcement Learning algorithms."""
+
+from omnisafe.algorithms.on_policy.barrier_function.ppo_cbf import PPOBetaCBF
+from omnisafe.algorithms.on_policy.barrier_function.trpo_cbf import TRPOCBF
+
+
+__all__ = [
+    'TRPOCBF',
+    'PPOBetaCBF',
+]
diff --git a/omnisafe/algorithms/on_policy/barrier_function/ppo_cbf.py b/omnisafe/algorithms/on_policy/barrier_function/ppo_cbf.py
new file mode 100644
index 000000000..4ab2f4d17
--- /dev/null
+++ b/omnisafe/algorithms/on_policy/barrier_function/ppo_cbf.py
@@ -0,0 +1,85 @@
+# Copyright 2023 OmniSafe Team. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Implementation of the PPO algorithm with Control Barrier Function and Beta Actor."""
+
+from __future__ import annotations
+
+import torch
+
+from omnisafe.adapter.beta_barrier_function_adapter import BetaBarrierFunctionAdapter
+from omnisafe.algorithms import registry
+from omnisafe.algorithms.on_policy.base.ppo import PPO
+from omnisafe.utils import distributed
+
+
+@registry.register
+class PPOBetaCBF(PPO):
+    """The PPO algorithm with CBF and Beta Actor.
+
+    References:
+        - Title: Sampling-based Safe Reinforcement Learning for Nonlinear Dynamical Systems
+        - Authors: Wesley A. Suttle, Vipul K. Sharma, Krishna C. Kosaraju, S. Sivaranjani, Ji Liu,
+            Vijay Gupta, Brian M. Sadler.
+        - URL: `PPOBetaCBF <https://proceedings.mlr.press/v238/suttle24a/suttle24a.pdf>`_
+    """
+
+    def _init_env(self) -> None:
+        self._env: BetaBarrierFunctionAdapter = BetaBarrierFunctionAdapter(
+            self._env_id,
+            self._cfgs.train_cfgs.vector_env_nums,
+            self._seed,
+            self._cfgs,
+        )
+        assert (self._cfgs.algo_cfgs.steps_per_epoch) % (
+            distributed.world_size() * self._cfgs.train_cfgs.vector_env_nums
+        ) == 0, 'The number of steps per epoch is not divisible by the number of environments.'
+        self._steps_per_epoch: int = (
+            self._cfgs.algo_cfgs.steps_per_epoch
+            // distributed.world_size()
+            // self._cfgs.train_cfgs.vector_env_nums
+        )
+
+    def _loss_pi(
+        self,
+        obs: torch.Tensor,
+        act: torch.Tensor,
+        logp: torch.Tensor,
+        adv: torch.Tensor,
+    ) -> torch.Tensor:
+        r"""Computing pi/actor loss.
+
+        This section of the logic is consistent with PPO, except that it does not record the
+        standard deviation of the actor distribution.
+        """
+        distribution = self._actor_critic.actor(obs)
+        logp_ = self._actor_critic.actor.log_prob(act)
+        ratio = torch.exp(logp_ - logp)
+        ratio_cliped = torch.clamp(
+            ratio,
+            1 - self._cfgs.algo_cfgs.clip,
+            1 + self._cfgs.algo_cfgs.clip,
+        )
+        loss = -torch.min(ratio * adv, ratio_cliped * adv).mean()
+        loss -= self._cfgs.algo_cfgs.entropy_coef * distribution.entropy().mean()
+        # useful extra info
+        entropy = distribution.entropy().mean().item()
+        self._logger.store(
+            {
+                'Train/Entropy': entropy,
+                'Train/PolicyRatio': ratio,
+                'Loss/Loss_pi': loss.mean().item(),
+            },
+        )
+        return loss
diff --git a/omnisafe/algorithms/on_policy/barrier_function/trpo_cbf.py b/omnisafe/algorithms/on_policy/barrier_function/trpo_cbf.py
new file mode 100644
index 000000000..0324170c4
--- /dev/null
+++ b/omnisafe/algorithms/on_policy/barrier_function/trpo_cbf.py
@@ -0,0 +1,89 @@
+# Copyright 2024 OmniSafe Team. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Implementation of the TRPO algorithm with Control Barrier Function."""
+# mypy: ignore-errors
+
+from __future__ import annotations
+
+import os
+
+import joblib
+
+from omnisafe.adapter.barrier_function_adapter import BarrierFunctionAdapter
+from omnisafe.algorithms import registry
+from omnisafe.algorithms.on_policy.base.trpo import TRPO
+from omnisafe.typing import Any
+from omnisafe.utils.distributed import get_rank
+
+
+@registry.register
+class TRPOCBF(TRPO):
+    """The TRPO algorithm with CBF.
+
+    References:
+        - Title: End-to-end safe reinforcement learning through barrier functions for
+        safety-critical continuous control tasks
+        - Authors: R Cheng, G Orosz, RM Murray, JW Burdick.
+        - URL: `TRPOCBF <https://ojs.aaai.org/index.php/AAAI/article/view/4213/4091>`_
+    """
+
+    def _init_log(self) -> None:
+        """Log the TRPOCBF specific information.
+
+        +----------------------------+---------------------------------+
+        | Things to log              | Description                     |
+        +============================+=================================+
+        | Value/Loss_compensator     | The Loss of action compensator. |
+        +----------------------------+---------------------------------+
+        """
+        super()._init_log()
+        self._logger.register_key('Value/Loss_compensator')
+
+    def _init_env(self) -> None:
+        super()._init_env()
+        self._env: BarrierFunctionAdapter = BarrierFunctionAdapter(
+            self._env_id,
+            self._cfgs.train_cfgs.vector_env_nums,
+            self._seed,
+            self._cfgs,
+        )
+
+    def _specific_save(self) -> None:
+        """Save some algorithms specific models per epoch."""
+        super()._specific_save()
+        if get_rank() == 0:
+            path = os.path.join(
+                self._logger.log_dir,
+                'gp_model_save',
+                f'gaussian_process_regressor_{self._logger.current_epoch}.pkl',
+            )
+            os.makedirs(os.path.dirname(path), exist_ok=True)
+            joblib.dump(self._env.gp_models, path)
+
+    def _setup_torch_saver(self) -> None:
+        """Define what need to be saved below.
+
+        OmniSafe's main storage interface is based on PyTorch. If you need to save models in other
+        formats, please use :meth:`_specific_save`.
+        """
+        what_to_save: dict[str, Any] = {}
+
+        what_to_save['pi'] = self._actor_critic.actor
+        what_to_save['compensator'] = self._env.compensator
+        if self._cfgs.algo_cfgs.obs_normalize:
+            obs_normalizer = self._env.save()['obs_normalizer']
+            what_to_save['obs_normalizer'] = obs_normalizer
+
+        self._logger.setup_torch_saver(what_to_save)
diff --git a/omnisafe/algorithms/on_policy/base/policy_gradient.py b/omnisafe/algorithms/on_policy/base/policy_gradient.py
index e0792d6ab..cb144922a 100644
--- a/omnisafe/algorithms/on_policy/base/policy_gradient.py
+++ b/omnisafe/algorithms/on_policy/base/policy_gradient.py
@@ -180,13 +180,9 @@ def _init_log(self) -> None:
             config=self._cfgs,
         )
 
-        what_to_save: dict[str, Any] = {}
-        what_to_save['pi'] = self._actor_critic.actor
-        if self._cfgs.algo_cfgs.obs_normalize:
-            obs_normalizer = self._env.save()['obs_normalizer']
-            what_to_save['obs_normalizer'] = obs_normalizer
-        self._logger.setup_torch_saver(what_to_save)
+        self._setup_torch_saver()
         self._logger.torch_save()
+        self._specific_save()
 
         self._logger.register_key(
             'Metrics/EpRet',
@@ -296,6 +292,7 @@ def learn(self) -> tuple[float, float, float]:
                 epoch + 1
             ) == self._cfgs.train_cfgs.epochs:
                 self._logger.torch_save()
+                self._specific_save()
 
         ep_ret = self._logger.get_stats('Metrics/EpRet')[0]
         ep_cost = self._logger.get_stats('Metrics/EpCost')[0]
@@ -586,3 +583,21 @@ def _loss_pi(
             },
         )
         return loss
+
+    def _setup_torch_saver(self) -> None:
+        """Define what need to be saved below.
+
+        OmniSafe's main storage interface is based on PyTorch. If you need to save models in other
+        formats, please use :meth:`_specific_save`.
+        """
+        what_to_save: dict[str, Any] = {}
+
+        what_to_save['pi'] = self._actor_critic.actor
+        if self._cfgs.algo_cfgs.obs_normalize:
+            obs_normalizer = self._env.save()['obs_normalizer']
+            what_to_save['obs_normalizer'] = obs_normalizer
+
+        self._logger.setup_torch_saver(what_to_save)
+
+    def _specific_save(self) -> None:
+        """Save some algorithms specific models other than PyTorch format per epoch."""
diff --git a/omnisafe/common/__init__.py b/omnisafe/common/__init__.py
index 9e4fc1bf1..c1311f150 100644
--- a/omnisafe/common/__init__.py
+++ b/omnisafe/common/__init__.py
@@ -14,6 +14,9 @@
 # ==============================================================================
 """Common Common utilities for OmniSafe."""
 
+from omnisafe.common.barrier_comp import BarrierCompensator
+from omnisafe.common.barrier_solver import PendulumSolver
+from omnisafe.common.gp_model import DynamicsModel
 from omnisafe.common.lagrange import Lagrange
 from omnisafe.common.logger import Logger
 from omnisafe.common.normalizer import Normalizer
diff --git a/omnisafe/common/barrier_comp.py b/omnisafe/common/barrier_comp.py
new file mode 100644
index 000000000..64d1af104
--- /dev/null
+++ b/omnisafe/common/barrier_comp.py
@@ -0,0 +1,95 @@
+# Copyright 2024 OmniSafe Team. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Implementation of Compensator Used in Control Barrier Function."""
+
+
+from __future__ import annotations
+
+import torch
+from torch import optim
+
+from omnisafe.utils.config import Config
+from omnisafe.utils.model import build_mlp_network
+
+
+class BarrierCompensator(torch.nn.Module):
+    """A module that represents a barrier compensator using a multi-layer perceptron (MLP) network.
+
+    This module is designed to compute actions based on observations, with the intention of
+    compensating for potential barriers in a control system or a similar application. It is built
+    upon a configurable MLP network and trained using an optimization routine.
+
+    Attributes:
+        obs_dim (int): Dimension of the observation space.
+        act_dim (int): Dimension of the action space.
+        _cfgs (Config): Configuration parameters for the MLP network and training.
+        model (torch.nn.Module): The MLP network.
+        optimizer (torch.optim.Optimizer): The optimizer for training the network.
+
+    Args:
+        obs_dim (int): Dimension of the observation space.
+        act_dim (int): Dimension of the action space.
+        cfgs (Config): Configuration parameters for the network and training.
+    """
+
+    def __init__(self, obs_dim: int, act_dim: int, cfgs: Config) -> None:
+        """Initialize the action compensator."""
+        super().__init__()
+        self._cfgs: Config = cfgs
+        self.model: torch.nn.Module = build_mlp_network(
+            sizes=[obs_dim, *self._cfgs.hidden_sizes, act_dim],
+            activation=self._cfgs.activation,
+            weight_initialization_mode=self._cfgs.weight_initialization_mode,
+        )
+        self.optimizer: optim.Adam = optim.Adam(self.parameters(), lr=self._cfgs.lr)
+
+    def forward(self, obs: torch.Tensor) -> torch.Tensor:
+        """Estimate the sum of previous compensating actions.
+
+        Args:
+            obs (torch.Tensor): The input observation.
+
+        Returns:
+            torch.Tensor: The estimation of previous compensating actions.
+        """
+        return self.model(obs)
+
+    def update(
+        self,
+        observation: torch.Tensor,
+        approx_compensating_act: torch.Tensor,
+        compensating_act: torch.Tensor,
+    ) -> torch.Tensor:
+        """Train the barrier compensator model.
+
+        This method updates the model parameters to minimize the difference between the model's output and the
+        target, which is a combination of approximate compensating action and compensating action.
+
+        Args:
+            observation (torch.Tensor): The observation data.
+            approx_compensating_act (torch.Tensor): The approximate compensating action.
+            compensating_act (torch.Tensor): The actual compensating action.
+
+        Returns:
+            torch.Tensor: The loss after training.
+        """
+        for _ in range(self._cfgs.update_iters):
+            target = approx_compensating_act + compensating_act
+            self.optimizer.zero_grad()
+            loss = torch.pow((self(observation) - target), 2).mean()
+            loss.backward()
+            self.optimizer.step()
+
+        return loss
diff --git a/omnisafe/common/barrier_solver.py b/omnisafe/common/barrier_solver.py
new file mode 100644
index 000000000..f281fd0e6
--- /dev/null
+++ b/omnisafe/common/barrier_solver.py
@@ -0,0 +1,192 @@
+# Copyright 2024 OmniSafe Team. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Implementation of the Control Barrier Function Solver."""
+
+# pylint: disable=invalid-name,wrong-spelling-in-docstring
+# mypy: ignore-errors
+
+
+from __future__ import annotations
+
+import warnings
+
+import numpy as np
+import torch
+from cvxopt import matrix, solvers
+
+from omnisafe.typing import DEVICE_CPU
+
+
+# pylint: disable-next=too-many-instance-attributes
+class PendulumSolver:
+    """The CBF solver for the pendulum problem using Gaussian Process models.
+
+    This class implements a solver for the pendulum control problem using Control Barrier Functions
+    (CBFs). The primary goal is to ensure safe reinforcement learning by maintaining
+    safety constraints during the control process.
+
+    For more details, please refer to:
+
+    *End-to-End Safe Reinforcement Learning through Barrier Functions for Safety-Critical Continuous
+    Control Tasks*
+
+    Attributes:
+        action_size (int): Size of the action space, typically 1 for the pendulum.
+        torque_bound (float): Maximum torque bound that can be applied to the pendulum.
+        max_speed (float): Maximum speed (angular velocity) of the pendulum.
+        device (torch.device): Device to run the computations on.
+    """
+
+    # pylint: disable-next=invalid-name
+    def __init__(
+        self,
+        action_size: int = 1,
+        torque_bound: float = 15.0,
+        max_speed: float = 60.0,
+        device: torch.device = DEVICE_CPU,
+    ) -> None:
+        """Initialize the PendulumSolver with specified parameters.
+
+        Args:
+            action_size (int): Size of the action space, typically 1 for the pendulum.
+            torque_bound (float): Maximum torque bound that can be applied to the pendulum.
+            max_speed (float): Maximum speed (angular velocity) of the pendulum.
+            device (torch.device): Device to run the computations on.
+
+        Attributes:
+            F (float): A control gain factor used in the CBF computation.
+            _gamma_b (float): Parameter for the barrier function.
+            _kd (float): Damping coefficient used in the barrier function.
+        """
+        self.action_size = action_size
+        self.torque_bound = torque_bound
+        self.max_speed = max_speed
+        self.F = 1.0
+        self._device = device
+        self._gamma_b = 0.5
+        self._kd = 1.5
+        self._build_barrier()
+        warnings.filterwarnings('ignore')
+
+    def _build_barrier(self) -> None:
+        """Construct the Control Barrier Function (CBF) for safe control of the pendulum.
+
+        This method initializes and sets up the necessary components for the CBF, which is used to
+        ensure that the control actions taken do not violate safety constraints.
+        """
+        self.P = matrix(np.diag([1.0, 1e16]), tc='d')
+        self.q = matrix(np.zeros(self.action_size + 1))
+        self.h1 = np.array([1, 0.01])
+        self.h2 = np.array([1, -0.01])
+        self.h3 = np.array([-1, 0.01])
+        self.h4 = np.array([-1, -0.01])
+
+    def control_barrier(  # pylint: disable=invalid-name
+        self,
+        original_action: torch.Tensor,
+        f: np.ndarray,
+        g: np.ndarray,
+        x: np.ndarray,
+        std: np.ndarray,
+    ) -> torch.Tensor:
+        """Adjust the original action using a control barrier function.
+
+        Args:
+            original_action (torch.Tensor): The original action proposed by the RL algorithm.
+            f (np.ndarray): The drift component of the system's dynamics.
+            g (np.ndarray): The control component of the system's dynamics.
+            x (np.ndarray): The current state of the system.
+            std (np.ndarray): The standard deviation of the system's state.
+
+        Returns:
+            torch.Tensor: The adjusted action that respects the system's constraints.
+        """
+        # define gamma for the barrier function
+        gamma_b = 0.5
+        kd = 1.5
+        u_rl = original_action.cpu().detach().numpy()
+
+        # set up Quadratic Program to satisfy Control Barrier Function
+        G = np.array(
+            [
+                [
+                    -np.dot(self.h1, g),
+                    -np.dot(self.h2, g),
+                    -np.dot(self.h3, g),
+                    -np.dot(self.h4, g),
+                    1,
+                    -1,
+                    g[1],
+                    -g[1],
+                ],
+                [
+                    -1,
+                    -1,
+                    -1,
+                    -1,
+                    0,
+                    0,
+                    0,
+                    0,
+                ],
+            ],
+        )
+        G = np.transpose(G)
+        h = np.array(
+            [
+                gamma_b * self.F
+                + np.dot(self.h1, f)
+                + np.dot(self.h1, g) * u_rl
+                - (1 - gamma_b) * np.dot(self.h1, x)
+                - kd * np.abs(np.dot(self.h1, std)),
+                gamma_b * self.F
+                + np.dot(self.h2, f)
+                + np.dot(self.h2, g) * u_rl
+                - (1 - gamma_b) * np.dot(self.h2, x)
+                - kd * np.abs(np.dot(self.h2, std)),
+                gamma_b * self.F
+                + np.dot(self.h3, f)
+                + np.dot(self.h3, g) * u_rl
+                - (1 - gamma_b) * np.dot(self.h3, x)
+                - kd * np.abs(np.dot(self.h3, std)),
+                gamma_b * self.F
+                + np.dot(self.h4, f)
+                + np.dot(self.h4, g) * u_rl
+                - (1 - gamma_b) * np.dot(self.h4, x)
+                - kd * np.abs(np.dot(self.h4, std)),
+                -u_rl + self.torque_bound,
+                u_rl + self.torque_bound,
+                -f[1] - g[1] * u_rl + self.max_speed,
+                f[1] + g[1] * u_rl + self.max_speed,
+            ],
+        )
+        h = np.squeeze(h).astype(np.double)
+
+        # convert numpy arrays to cvx matrices to set up QP
+        G = matrix(G, tc='d')
+        h = matrix(h, tc='d')
+        solvers.options['show_progress'] = False
+        sol = solvers.qp(self.P, self.q, G, h)
+        u_bar = sol['x']
+
+        # check if the adjusted action is within bounds
+        if np.add(np.squeeze(u_rl), np.squeeze(u_bar[0])) - 0.001 >= self.torque_bound:
+            u_bar[0] = self.torque_bound - u_rl
+            print('Error in QP')
+        elif np.add(np.squeeze(u_rl), np.squeeze(u_bar[0])) + 0.001 <= -self.torque_bound:
+            u_bar[0] = -self.torque_bound - u_rl
+            print('Error in QP')
+
+        return torch.as_tensor(u_bar[0], dtype=torch.float32, device=self._device).unsqueeze(dim=0)
diff --git a/omnisafe/common/buffer/onpolicy_buffer.py b/omnisafe/common/buffer/onpolicy_buffer.py
index b6f9586df..6fab686aa 100644
--- a/omnisafe/common/buffer/onpolicy_buffer.py
+++ b/omnisafe/common/buffer/onpolicy_buffer.py
@@ -216,17 +216,7 @@ def get(self) -> dict[str, torch.Tensor]:
             The data stored and calculated in the buffer.
         """
         self.ptr, self.path_start_idx = 0, 0
-
-        data = {
-            'obs': self.data['obs'],
-            'act': self.data['act'],
-            'target_value_r': self.data['target_value_r'],
-            'adv_r': self.data['adv_r'],
-            'logp': self.data['logp'],
-            'discounted_ret': self.data['discounted_ret'],
-            'adv_c': self.data['adv_c'],
-            'target_value_c': self.data['target_value_c'],
-        }
+        data = self.data.copy()
 
         adv_mean, adv_std, *_ = distributed.dist_statistics_scalar(data['adv_r'])
         cadv_mean, *_ = distributed.dist_statistics_scalar(data['adv_c'])
diff --git a/omnisafe/common/buffer/vector_onpolicy_buffer.py b/omnisafe/common/buffer/vector_onpolicy_buffer.py
index a920d8e6a..3ebd61c87 100644
--- a/omnisafe/common/buffer/vector_onpolicy_buffer.py
+++ b/omnisafe/common/buffer/vector_onpolicy_buffer.py
@@ -88,6 +88,23 @@ def __init__(  # pylint: disable=super-init-not-called,too-many-arguments
             for _ in range(num_envs)
         ]
 
+    def add_field(self, name: str, shape: tuple[int, ...], dtype: torch.dtype) -> None:
+        """Add a field to the buffer.
+
+        Examples:
+            >>> buffer = BaseBuffer(...)
+            >>> buffer.add_field('new_field', (2, 3), torch.float32)
+            >>> buffer.data['new_field'].shape
+            >>> (buffer.size, 2, 3)
+
+        Args:
+            name (str): The name of the field.
+            shape (tuple of int): The shape of the field.
+            dtype (torch.dtype): The dtype of the field.
+        """
+        for buffer in self.buffers:
+            buffer.add_field(name=name, shape=shape, dtype=dtype)
+
     @property
     def num_buffers(self) -> int:
         """Number of buffers."""
diff --git a/omnisafe/common/experiment_grid.py b/omnisafe/common/experiment_grid.py
index f93cef8d3..787f4592f 100644
--- a/omnisafe/common/experiment_grid.py
+++ b/omnisafe/common/experiment_grid.py
@@ -1,4 +1,4 @@
-# Copyright 2023 OmniSafe Team. All Rights Reserved.
+# Copyright 2024 OmniSafe Team. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -544,6 +544,8 @@ def analyze(
         compare_num: int | None = None,
         cost_limit: float | None = None,
         show_image: bool = False,
+        reward_metrics: str = 'Metrics/EpRet',
+        cost_metrics: str = 'Metrics/EpCost',
     ) -> None:
         """Analyze the experiment results.
 
@@ -559,6 +561,8 @@ def analyze(
             cost_limit (float or None, optional): Value for one line showed on graph to indicate
                 cost. Defaults to None.
             show_image (bool): Whether to show graph image in GUI windows.
+            reward_metrics (str, optional): The column name for reward metrics. Defaults to 'Metrics/EpRet'.
+            cost_metrics (str, optional): The column name for cost metrics. Defaults to 'Metrics/EpCost'.
         """
         assert self._statistical_tools is not None, 'Please run run() first!'
         self._statistical_tools.load_source(self.log_dir)
@@ -568,6 +572,8 @@ def analyze(
             compare_num,
             cost_limit,
             show_image=show_image,
+            reward_metrics=reward_metrics,
+            cost_metrics=cost_metrics,
         )
 
     def evaluate(self, num_episodes: int = 10, cost_criteria: float = 1.0) -> None:
diff --git a/omnisafe/common/gp_model.py b/omnisafe/common/gp_model.py
new file mode 100644
index 000000000..dac93ea13
--- /dev/null
+++ b/omnisafe/common/gp_model.py
@@ -0,0 +1,218 @@
+# Copyright 2024 OmniSafe Team. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Implementation of the Control Barrier Function Solver."""
+
+# pylint: disable=invalid-name,wrong-spelling-in-docstring
+# mypy: ignore-errors
+
+
+from __future__ import annotations
+
+import joblib
+import numpy as np
+import torch
+from sklearn.gaussian_process import GaussianProcessRegressor
+from sklearn.gaussian_process.kernels import RBF
+from sklearn.gaussian_process.kernels import ConstantKernel as C
+
+
+# pylint: disable-next=too-many-instance-attributes
+class DynamicsModel:
+    """This class handles the creation and management of Gaussian Process (GP) models.
+
+    These GP models predict the next state of the environment based on the current state.
+
+    .. warning::
+        This class provides an implementation for the  ``Pendulum-v1``  environment. It needs to be
+        customized to extend it to more environments.
+
+    Args:
+        observation_size (int): The size of the observation space. This determines
+                                the number of GP models to create.
+        load_dir (Optional[str]): The directory to load the GP models from. If None, new models
+                                  are initialized. Default is None.
+
+    Attributes:
+        observation_size (int): The size of the observation space.
+        gp_model_prev (List[GaussianProcessRegressor]): The GP models from the previous iteration.
+        gp_model (List[GaussianProcessRegressor]): The current GP models used for predictions.
+    """
+
+    def __init__(self, observation_size: int, load_dir: str | None = None) -> None:
+        """Initialize the DynamicsModel with a specified observation size and optional model loading.
+
+        Args:
+            observation_size (int): Size of the observation space.
+            load_dir (Optional[str]): Directory to load the GP models from. If not provided,
+                                      new models will be created.
+        """
+        self.observation_size: int = observation_size
+        self.gp_model_prev: list[GaussianProcessRegressor]
+        self.gp_model: list[GaussianProcessRegressor]
+        self._build_gp_model(load_dir=load_dir)
+
+    def _build_gp_model(self, load_dir: str | None = None) -> None:
+        """Build or load the Gaussian Process models.
+
+        If a load directory is provided, the models are loaded from the specified directory.
+        Otherwise, new models are created with default parameters.
+
+        Args:
+            load_dir (Optional[str]): Directory to load the GP models from. If None, new models
+                                      will be created.
+        """
+        gp_list = []
+        noise = 0.01  # Small noise term to stabilize the GP model
+        for _ in range(self.observation_size - 1):
+            if not load_dir:
+                # Define the kernel as a product of a constant kernel and an RBF kernel
+                kern = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2))
+                # Initialize the GaussianProcessRegressor with the specified kernel and noise
+                gp = GaussianProcessRegressor(kernel=kern, alpha=noise, n_restarts_optimizer=10)
+                gp_list.append(gp)
+            else:
+                # Load the GP models from the specified directory
+                gp_list = joblib.load(load_dir)
+        self.gp_model = gp_list
+        self.gp_model_prev = gp_list.copy()
+
+    @property
+    def gp_models(self) -> list[GaussianProcessRegressor]:
+        """Return all gaussian process regressor for saving."""
+        return self.gp_model
+
+    def get_dynamics(self, obs: list[float], original_action: float) -> np.ndarray:
+        """Calculate the dynamics of the system based on the current observation and the original action.
+
+        This method computes the next state of a pendulum system using the provided state and
+        action.
+
+        Args:
+            obs (list[float]): The current observation of the system state.
+                               For the ``Pendulum-v1``, It should contain at least three elements:
+                               [x, y, theta_dot], where x and y are the Cartesian coordinates of
+                               the pendulum, and theta_dot is the angular velocity.
+            original_action (float): The original action proposed by the RL agent.
+
+        Returns:
+            np.ndarray: The calculated dynamics of the system, representing the next state.
+        """
+        # Time step
+        dt = 0.05
+        # Gravitational constant
+        G = 10
+        # Mass of the pendulum
+        m = 2
+        # Length of the pendulum
+        length = 2
+
+        # Calculate the angle theta from the Cartesian coordinates
+        theta = np.arctan2(obs[1], obs[0])
+        # Angular velocity
+        theta_dot = obs[2]
+
+        f = np.array(
+            [
+                -3 * G / (2 * length) * np.sin(theta + np.pi) * dt**2
+                + theta_dot * dt
+                + theta
+                + 3 / (m * length**2) * original_action * dt**2,
+                theta_dot
+                - 3 * G / (2 * length) * np.sin(theta + np.pi) * dt
+                + 3 / (m * length**2) * original_action * dt,
+            ],
+        )
+
+        return np.squeeze(f)
+
+    def update_gp_dynamics(self, obs: np.ndarray, act: np.ndarray) -> None:
+        """Update the Gaussian Process (GP) dynamics model based on observed states and actions.
+
+        Args:
+            obs (np.ndarray): Agent's observation of the current environment.
+            act (np.ndarray): Actions taken.
+        """
+        obs = obs.detach().cpu().squeeze().numpy()
+        act = act.detach().cpu().squeeze().numpy()
+        N = self.observation_size
+        X = obs
+        U = act
+        L = len(X)
+        err = np.zeros((L - 1, N - 1))
+        S = np.zeros((L - 1, 2))
+        for i in range(L - 1):
+            f = self.get_dynamics(X[i], U[i])
+            theta_p = np.arctan2(X[i][1], X[i][0])
+            theta_dot_p = X[i][2]
+            theta = np.arctan2(X[i + 1][1], X[i + 1][0])
+            theta_dot = X[i + 1][2]
+            S[i, :] = np.array([theta_p, theta_dot_p])
+            err[i, :] = np.array([theta, theta_dot]) - f
+        self.gp_model[0].fit(S, err[:, 0])
+        self.gp_model[1].fit(S, err[:, 1])
+
+    def get_gp_dynamics(self, obs: torch.Tensor, use_prev_model: bool) -> list[np.ndarray]:
+        """Retrieve the GP dynamics based on the current observation.
+
+        Args:
+            obs (torch.Tensor): Agent's observation of the current environment.
+            use_prev_model (bool): Whether to use previous gaussian model.
+
+        Returns:
+            list[np.ndarray]: list containing the gp dynamics [f, g, x, std].
+        """
+        obs = obs.cpu().detach().numpy()
+        u_rl = 0
+        dt = 0.05
+        G = 10
+        m = 1
+        length = 1
+        obs = np.squeeze(obs)
+        theta = np.arctan2(obs[1], obs[0])
+        theta_dot = obs[2]
+        x = np.array([theta, theta_dot])
+        f_nom = np.array(
+            [
+                -3 * G / (2 * length) * np.sin(theta + np.pi) * dt**2
+                + theta_dot * dt
+                + theta
+                + 3 / (m * length**2) * u_rl * dt**2,
+                theta_dot
+                - 3 * G / (2 * length) * np.sin(theta + np.pi) * dt
+                + 3 / (m * length**2) * u_rl * dt,
+            ],
+        )
+        g = np.array([3 / (m * length**2) * dt**2, 3 / (m * length**2) * dt])
+        f_nom = np.squeeze(f_nom)
+        f = np.zeros(2)
+        if use_prev_model:
+            [m1, std1] = self.gp_model_prev[0].predict(x.reshape(1, -1), return_std=True)
+            [m2, std2] = self.gp_model_prev[1].predict(x.reshape(1, -1), return_std=True)
+        else:
+            [m1, std1] = self.gp_model[0].predict(x.reshape(1, -1), return_std=True)
+            [m2, std2] = self.gp_model[1].predict(x.reshape(1, -1), return_std=True)
+        f[0] = f_nom[0] + m1
+        f[1] = f_nom[1] + m2
+        return [
+            np.squeeze(f),
+            np.squeeze(g),
+            np.squeeze(x),
+            np.array([np.squeeze(std1), np.squeeze(std2)]),
+        ]
+
+    def reset_gp_model(self) -> None:
+        """Reset the gaussian process model of barrier function solver."""
+        self.gp_model_prev = self.gp_model.copy()
+        self._build_gp_model()
diff --git a/omnisafe/common/robust_barrier_solver.py b/omnisafe/common/robust_barrier_solver.py
new file mode 100644
index 000000000..a871ccc4d
--- /dev/null
+++ b/omnisafe/common/robust_barrier_solver.py
@@ -0,0 +1,329 @@
+# Copyright 2023 OmniSafe Team. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Robust Control Barrier Function Solver for OmniSafe."""
+
+
+# mypy: ignore-errors
+# pylint: disable=invalid-name,wrong-spelling-in-docstring
+from __future__ import annotations
+
+import warnings
+from typing import Any
+
+import gymnasium as gym
+import torch
+from qpth.qp import QPFunction
+
+from omnisafe.utils.tools import to_tensor
+
+
+DYNAMICS_MODE = {'Unicycle': {'n_s': 3, 'n_u': 2}}
+
+
+class CBFQPLayer:
+    """CBFQLayer for robust control barrier function solver.
+
+    Args:
+        env (gymnasium.Env): The Gymnasium environment to interact with.
+        device (str, optional): The device type, such as 'cpu' or 'gpu'. Defaults to 'cpu'.
+        gamma_b (float, optional): The gamma parameter. Defaults to 20.
+        l_p (float, optional): Some additional layer parameter, purpose unspecified. Defaults to 0.03.
+
+    Attributes:
+        device (torch.device): The device on which computations will be performed.
+        env (gym.Env): The Gym environment instance.
+        u_min (float): The minimum control bound.
+        u_max (float): The maximum control bound.
+        gamma_b (float): The gamma parameter.
+        l_p (float): An additional layer parameter.
+        action_dim (int): The dimensionality of the action space.
+    """
+
+    def __init__(
+        self,
+        env: gym.Env,
+        device: str = 'cpu',
+        gamma_b: float = 20,
+        l_p: float = 0.03,
+    ) -> None:
+        """Initializes a CBFLayer instance with specified parameters and environment."""
+        self.device = torch.device(device)
+        self.env = env
+        self.u_min, self.u_max = self.get_control_bounds()
+        self.gamma_b = gamma_b
+        self.l_p = l_p
+        self.action_dim = env.action_space.shape[0]
+        warnings.filterwarnings('ignore')
+
+    def get_safe_action(
+        self,
+        state_batch: torch.Tensor,
+        action_batch: torch.Tensor,
+        mean_pred_batch: torch.Tensor,
+        sigma_batch: torch.Tensor,
+    ) -> torch.Tensor:
+        """Computes safe actions based on current state and action predictions, adjusting for uncertainties.
+
+        Args:
+            state_batch (torch.Tensor): Current state batch, tensor or ndarray.
+            action_batch (torch.Tensor): Nominal action batch, tensor or ndarray.
+            mean_pred_batch (torch.Tensor): Mean disturbance predictions, tensor or ndarray.
+            sigma_batch (torch.Tensor): Standard deviations of disturbances, tensor or ndarray.
+
+        Returns:
+            torch.Tensor: Safe actions adjusted for given constraints and uncertainties.
+        """
+        expand_dims = len(state_batch.shape) == 1
+        if expand_dims:
+            state_batch = state_batch.unsqueeze(0)
+            action_batch = action_batch.unsqueeze(0)
+            mean_pred_batch = mean_pred_batch.unsqueeze(0)
+            sigma_batch = sigma_batch.unsqueeze(0)
+
+        Ps, qs, Gs, hs = self.get_cbf_qp_constraints(
+            state_batch,
+            action_batch,
+            mean_pred_batch,
+            sigma_batch,
+        )
+        safe_action_batch = self.solve_qp(Ps, qs, Gs, hs)
+        final_action_batch = torch.clamp(
+            action_batch + safe_action_batch,
+            self.u_min.repeat(action_batch.shape[0], 1),
+            self.u_max.repeat(action_batch.shape[0], 1),
+        )
+
+        return final_action_batch if not expand_dims else final_action_batch.squeeze(0)
+
+    def solve_qp(
+        self,
+        Ps: torch.Tensor,
+        qs: torch.Tensor,
+        Gs: torch.Tensor,
+        hs: torch.Tensor,
+    ) -> torch.Tensor:
+        """Solves a batch of quadratic programming (QP) problems.
+
+        Each QP problem is defined as:
+            minimize_{u,eps} 0.5 * u^T P u + q^T u
+            subject to G[u,eps]^T <= h
+
+        Args:
+            Ps (torch.Tensor): Quadratic cost matrix for each problem.
+            qs (torch.Tensor): Linear cost vector for each problem.
+            Gs (torch.Tensor): Inequality constraint matrix for each problem.
+            hs (torch.Tensor): Inequality constraint vector for each problem.
+
+        Returns:
+            The safe action for each problem, omitting the slack variable, with dimension (batch_size, n_u).
+        """
+        Ghs = torch.cat((Gs, hs.unsqueeze(2)), -1)
+        Ghs_norm = torch.max(torch.abs(Ghs), dim=2, keepdim=True)[0]
+        Gs /= Ghs_norm
+        hs = hs / Ghs_norm.squeeze(-1)
+        sol = self.cbf_layer(
+            Ps,
+            qs,
+            Gs,
+            hs,
+            solver_args={
+                'check_Q_spd': False,
+                'maxIter': 100000,
+                'notImprovedLim': 10,
+                'eps': 1e-4,
+            },
+        )
+
+        return sol[:, : self.env.action_space.shape[0]]
+
+    def cbf_layer(
+        self,
+        Qs: torch.Tensor,
+        ps: torch.Tensor,
+        Gs: torch.Tensor,
+        hs: torch.Tensor,
+        As: torch.Tensor | None = None,
+        bs: torch.Tensor | None = None,
+        solver_args: dict[str, Any] | None = None,
+    ) -> torch.Tensor:
+        """Applies a custom layer to solve QP problems using given constraints.
+
+        Args:
+            Qs (torch.Tensor): Quadratic cost matrix for each problem.
+            ps (torch.Tensor): Linear cost vector for each problem.
+            Gs (torch.Tensor): Inequality constraint matrix for each problem.
+            hs (torch.Tensor): Inequality constraint vector for each problem.
+            As (torch.Tensor, optional): Equality constraint matrix. Defaults to None.
+            bs (torch.Tensor, optional): Equality constraint vector. Defaults to None.
+            solver_args (dict, optional): Dictionary of solver arguments. Defaults to None.
+
+        Returns:
+            Result of the QP solver for each problem.
+        """
+        if solver_args is None:
+            solver_args = {}
+
+        if As is None or bs is None:
+            As = torch.Tensor().to(self.device).double()
+            bs = torch.Tensor().to(self.device).double()
+
+        return QPFunction(verbose=-1, **solver_args)(
+            Qs.double(),
+            ps.double(),
+            Gs.double(),
+            hs.double(),
+            As,
+            bs,
+        ).float()
+
+    # pylint: disable-next=too-many-locals
+    def get_cbf_qp_constraints(
+        self,
+        state_batch: torch.Tensor,
+        action_batch: torch.Tensor,
+        mean_pred_batch: torch.Tensor,
+        sigma_pred_batch: torch.Tensor,
+        gamma_b: float = 1.0,
+    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+        """Builds up matrices required to solve a quadratic program (QP).
+
+        The QP is defined to solve:
+            minimize_{u,eps} 0.5 * u^T P u + q^T u
+            subject to G[u,eps]^T <= h
+
+        Args:
+            state_batch (torch.Tensor): Current state batch.
+            action_batch (torch.Tensor): Nominal control input batch.
+            mean_pred_batch (torch.Tensor): Mean disturbance prediction state batch.
+            sigma_pred_batch (torch.Tensor): Standard deviation of the additive disturbance.
+            gamma_b (float, optional): CBF parameter for the class-Kappa function. Defaults to 1.0.
+
+        Returns:
+            tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: A tuple containing:
+                P (torch.Tensor): Quadratic cost matrix in the QP.
+                q (torch.Tensor): Linear cost vector in the QP.
+                G (torch.Tensor): Inequality constraint matrix for QP constraints.
+                h (torch.Tensor): Inequality constraint vector for QP constraints.
+        """
+        assert (
+            len(state_batch.shape) == 2
+            and len(action_batch.shape) == 2
+            and len(mean_pred_batch.shape) == 2
+            and len(sigma_pred_batch.shape) == 2
+        ), print(
+            state_batch.shape,
+            action_batch.shape,
+            mean_pred_batch.shape,
+            sigma_pred_batch.shape,
+        )
+
+        batch_size = state_batch.shape[0]
+        gamma_b = self.gamma_b
+
+        state_batch = torch.unsqueeze(state_batch, -1).to(self.device)
+        action_batch = torch.unsqueeze(action_batch, -1).to(self.device)
+        mean_pred_batch = torch.unsqueeze(mean_pred_batch, -1).to(self.device)
+        sigma_pred_batch = torch.unsqueeze(sigma_pred_batch, -1).to(self.device)
+        if self.env.dynamics_mode == 'Unicycle':
+            num_cbfs = len(self.env.hazards)
+            l_p = self.l_p
+            buffer = 0.1
+
+            thetas = state_batch[:, 2, :].squeeze(-1)
+            c_thetas = torch.cos(thetas)
+            s_thetas = torch.sin(thetas)
+            ps = torch.zeros((batch_size, 2)).to(self.device)
+            ps[:, 0] = state_batch[:, 0, :].squeeze(-1) + l_p * c_thetas
+            ps[:, 1] = state_batch[:, 1, :].squeeze(-1) + l_p * s_thetas
+            f_ps = torch.zeros((batch_size, 2, 1)).to(self.device)
+            Rs = torch.zeros((batch_size, 2, 2)).to(self.device)
+            Rs[:, 0, 0] = c_thetas
+            Rs[:, 0, 1] = -s_thetas
+            Rs[:, 1, 0] = s_thetas
+            Rs[:, 1, 1] = c_thetas
+            Ls = torch.zeros((batch_size, 2, 2)).to(self.device)
+            Ls[:, 0, 0] = 1
+            Ls[:, 1, 1] = l_p
+            g_ps = torch.bmm(Rs, Ls)
+            mu_theta_aug = torch.zeros([batch_size, 2, 1]).to(self.device)
+            mu_theta_aug[:, 1, :] = mean_pred_batch[:, 2, :]
+            mu_ps = torch.bmm(g_ps, mu_theta_aug) + mean_pred_batch[:, :2, :]
+            sigma_theta_aug = torch.zeros([batch_size, 2, 1]).to(self.device)
+            sigma_theta_aug[:, 1, :] = sigma_pred_batch[:, 2, :]
+            sigma_ps = torch.bmm(torch.abs(g_ps), sigma_theta_aug) + sigma_pred_batch[:, :2, :]
+
+            hs = 1e3 * torch.ones((batch_size, num_cbfs), device=self.device)
+            dhdps = torch.zeros((batch_size, num_cbfs, 2), device=self.device)
+            hazards = self.env.hazards
+            for i, hazard in enumerate(hazards):
+                if hazard['type'] == 'circle':
+                    obs_loc = to_tensor(hazard['location'], torch.FloatTensor, self.device)
+                    hs[:, i] = 0.5 * (
+                        torch.sum((ps - obs_loc) ** 2, dim=1) - (hazard['radius'] + buffer) ** 2
+                    )
+                    dhdps[:, i, :] = ps - obs_loc
+                else:
+                    raise NotImplementedError
+
+            n_u = action_batch.shape[1]
+            num_constraints = num_cbfs + 2 * n_u
+
+            G = torch.zeros((batch_size, num_constraints, n_u + 1)).to(self.device)
+            h = torch.zeros((batch_size, num_constraints)).to(self.device)
+            ineq_constraint_counter = 0
+
+            G[:, :num_cbfs, :n_u] = -torch.bmm(dhdps, g_ps)
+            G[:, :num_cbfs, n_u] = -1
+            h[:, :num_cbfs] = gamma_b * (hs**3) + (
+                torch.bmm(dhdps, f_ps + mu_ps)
+                - torch.bmm(torch.abs(dhdps), sigma_ps)
+                + torch.bmm(torch.bmm(dhdps, g_ps), action_batch)
+            ).squeeze(-1)
+            ineq_constraint_counter += num_cbfs
+            P = (
+                torch.diag(torch.tensor([1.0e0, 1.0e-2, 1e5]))
+                .repeat(batch_size, 1, 1)
+                .to(self.device)
+            )
+            q = torch.zeros((batch_size, n_u + 1)).to(self.device)
+        else:
+            raise NotImplementedError
+
+        n_u = action_batch.shape[1]
+
+        for c in range(n_u):
+
+            if self.u_max is not None:
+                G[:, ineq_constraint_counter, c] = 1
+                h[:, ineq_constraint_counter] = self.u_max[c] - action_batch[:, c].squeeze(-1)
+                ineq_constraint_counter += 1
+
+            if self.u_min is not None:
+                G[:, ineq_constraint_counter, c] = -1
+                h[:, ineq_constraint_counter] = -self.u_min[c] + action_batch[:, c].squeeze(-1)
+                ineq_constraint_counter += 1
+
+        return P, q, G, h
+
+    def get_control_bounds(self) -> tuple[torch.Tensor, torch.Tensor]:
+        """Obtain the action bounds.
+
+        Returns:
+            Action bounds, i.e., min control input and max control input.
+        """
+        u_min = torch.tensor(self.env.safe_action_space.low).to(self.device)
+        u_max = torch.tensor(self.env.safe_action_space.high).to(self.device)
+
+        return u_min, u_max
diff --git a/omnisafe/common/robust_gp_model.py b/omnisafe/common/robust_gp_model.py
new file mode 100644
index 000000000..9361c833b
--- /dev/null
+++ b/omnisafe/common/robust_gp_model.py
@@ -0,0 +1,434 @@
+# Copyright 2024 OmniSafe Team. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Implementation of Dynamics Model Based on GPyTorch."""
+# mypy: ignore-errors
+
+
+from __future__ import annotations
+
+import os
+import warnings
+from typing import Callable
+
+import gpytorch
+import gymnasium as gym
+import numpy as np
+import torch
+from gpytorch.distributions import MultivariateNormal
+from gpytorch.kernels import RBFKernel, ScaleKernel
+from gpytorch.likelihoods import Likelihood
+from gpytorch.means import ZeroMean
+from gpytorch.priors import NormalPrior
+
+from omnisafe.typing import DEVICE_CPU
+from omnisafe.utils.tools import to_tensor
+
+
+DYNAMICS_MODE = {'Unicycle': {'n_s': 3, 'n_u': 2}}
+MAX_STD = {'Unicycle': [2e-1, 2e-1, 2e-1]}
+
+
+class BaseGPy(gpytorch.models.ExactGP):
+    """A Gaussian Process (GP) model using a zero mean function and a scaled RBF kernel with priors.
+
+    This class extends gpytorch.models.ExactGP, specifically designed for use in
+    disturbance estimation tasks.
+
+    Attributes:
+        mean_module (ZeroMean): The mean module which is set to zero mean.
+        covar_module (ScaleKernel): The covariance kernel, a scaled RBF kernel with specified priors.
+
+    Args:
+        train_x (Tensor): Training input features, which should be a tensor.
+        train_y (Tensor): Training target values, which should be a tensor.
+        prior_std (float): The prior standard deviation used to adjust the output scale of the kernel.
+        likelihood (Likelihood): The likelihood function associated with the GP model.
+    """
+
+    def __init__(
+        self,
+        train_x: torch.Tensor,
+        train_y: torch.Tensor,
+        prior_std: float,
+        likelihood: Likelihood,
+    ) -> None:
+        """Initialize the BaseGPy model."""
+        super().__init__(train_x, train_y, likelihood)
+        self.mean_module = ZeroMean()
+        self.covar_module = ScaleKernel(
+            RBFKernel(lengthscale_prior=NormalPrior(1e5, 1e-5)),
+            outputscale_prior=NormalPrior(prior_std + 1e-6, 1e-5),
+        )
+        self.covar_module.base_kernel.lengthscale = 1e5
+        self.covar_module.outputscale = prior_std + 1e-6
+
+    # pylint: disable=arguments-differ
+    def forward(self, x: torch.Tensor) -> MultivariateNormal:
+        """Forward pass through the GP model to produce a multivariate normal distribution.
+
+        Args:
+            x (Tensor): Input features for which predictions are to be made.
+
+        Returns:
+            MultivariateNormal: A multivariate normal distribution reflecting the GP predictions.
+        """
+        mean = self.mean_module(x)
+        covar = self.covar_module(x)
+        return MultivariateNormal(mean, covar)
+
+
+class GPyDisturbanceEstimator:
+    """A class for estimating disturbances using Gaussian Processes with GPyTorch.
+
+    Attributes:
+        device (torch.device): The device (CPU or CUDA) on which the tensors will be processed.
+        _train_x (torch.Tensor): Training data features.
+        _train_y (torch.Tensor): Training data targets.
+        likelihood (gpytorch.likelihoods.Likelihood): The likelihood model for GP inference.
+        model (BaseGPy): The GPyTorch model.
+
+    Args:
+        train_x (torch.Tensor): Training data features. If not a tensor, it will be converted.
+        train_y (torch.Tensor): Training data targets. If not a tensor, it will be converted.
+        prior_std (float): Standard deviation of the prior distribution.
+        likelihood (Optional[gpytorch.likelihoods.Likelihood]): A GPyTorch likelihood.
+        device (Optional[torch.device]): The torch device. Defaults to CPU if None.
+    """
+
+    def __init__(
+        self,
+        train_x: torch.Tensor,
+        train_y: torch.Tensor,
+        prior_std: float,
+        likelihood: gpytorch.likelihoods.Likelihood | None = None,
+        device: torch.device = DEVICE_CPU,
+    ) -> None:
+        """Initialize the GPyDisturbanceEstimator."""
+        self.device = device if device else torch.device('cpu')
+
+        if not torch.is_tensor(train_x):
+            train_x = torch.tensor(train_x, dtype=torch.float32, device=self.device)
+        if not torch.is_tensor(train_y):
+            train_y = torch.tensor(train_y, dtype=torch.float32, device=self.device)
+        self._train_x = train_x
+        self._train_y = train_y
+
+        if not likelihood:
+            likelihood = gpytorch.likelihoods.GaussianLikelihood()
+        self.likelihood = likelihood.to(self.device)
+
+        self.model = BaseGPy(train_x, train_y, prior_std, likelihood)
+        self.model = self.model.to(self.device)
+        warnings.filterwarnings('ignore')
+
+    def train(self, training_iter: int) -> None:
+        """Train the Gaussian Process model.
+
+        Args:
+            training_iter (int): Number of training iterations.
+            verbose (bool): If True, print detailed logging information.
+        """
+        self.model.train()
+        self.likelihood.train()
+        optimizer = torch.optim.Adam(self.model.parameters(), lr=0.1)
+        mll = gpytorch.mlls.ExactMarginalLogLikelihood(self.likelihood, self.model)
+
+        for _ in range(training_iter):
+            optimizer.zero_grad()
+            output = self.model(self._train_x)
+            loss = -mll(output, self._train_y)
+            loss.backward()
+            optimizer.step()
+
+    def predict(self, test_x: torch.Tensor) -> dict[str, torch.Tensor | np.ndarray]:
+        """Make predictions on new data.
+
+        Args:
+            test_x (torch.Tensor): Test data features. If not a tensor, it will be converted.
+
+        Returns:
+            A dictionary containing prediction mean, variance, covariance matrix, and confidence
+            intervals. If the input was not a tensor, values will be converted to numpy arrays.
+        """
+        is_tensor = torch.is_tensor(test_x)
+        if not is_tensor:
+            test_x = torch.tensor(test_x, dtype=torch.float32, device=self.device)
+
+        self.model.eval()
+        self.likelihood.eval()
+
+        with torch.no_grad(), gpytorch.settings.fast_pred_var():
+            observed_pred = self.likelihood(self.model(test_x))
+            pred_dict = {
+                'mean': observed_pred.mean.cpu(),
+                'f_var': observed_pred.variance.cpu(),
+                'f_covar': observed_pred.covariance_matrix.cpu(),
+                'lower_ci': observed_pred.confidence_region()[0].cpu(),
+                'upper_ci': observed_pred.confidence_region()[1].cpu(),
+            }
+
+        if not is_tensor:
+            for key, val in pred_dict.items():
+                pred_dict[key] = val.numpy()
+
+        return pred_dict
+
+
+# pylint: disable-next=too-many-instance-attributes
+class DynamicsModel:
+    """Initialize the DynamicsModel with a gymnasium environment.
+
+    Args:
+        env (gym.Env): The gymnasium environment to model dynamics for.
+        gp_model_size (int, optional): Maximum history count for disturbances. Default to 2000.
+        l_p (float, optional): Learning parameter. Default to 0.03.
+        device (str, optional): The device to perform computations on. Default to 'cpu'.
+    """
+
+    def __init__(
+        self,
+        env: gym.Env,
+        gp_model_size: int = 2000,
+        l_p: float = 0.03,
+        device: str = 'cpu',
+    ) -> None:
+        """Initialize the DynamicsModel with a gymnasium environment."""
+        self.env = env
+        self.get_f, self.get_g = self.get_dynamics()
+        self.n_s = DYNAMICS_MODE[self.env.dynamics_mode]['n_s']
+        self.n_u = DYNAMICS_MODE[self.env.dynamics_mode]['n_u']
+
+        self.disturbance_history = {}
+        self.history_counter = 0
+        self.max_history_count = gp_model_size
+        self.disturbance_history['state'] = np.zeros((self.max_history_count, self.n_s))
+        self.disturbance_history['disturbance'] = np.zeros((self.max_history_count, self.n_s))
+        self._train_x = np.zeros((self.max_history_count, self.n_s))
+        self._train_y = np.zeros((self.max_history_count, self.n_s))
+        self._disturb_estimators = []
+        self.device = torch.device(device)
+
+        for i in range(self.n_s):
+            self._disturb_estimators.append(
+                GPyDisturbanceEstimator(
+                    np.zeros((self.max_history_count, self.n_s)),
+                    np.zeros((self.max_history_count, self.n_s)),
+                    MAX_STD[self.env.dynamics_mode][i],
+                    device=self.device,
+                ),
+            )
+        self._disturb_initialized = True
+        self.l_p = l_p
+
+    def get_dynamics(self) -> tuple[Callable, Callable]:
+        """Retrieve the dynamics functions for drift and control based on the environment's dynamics mode.
+
+        Returns:
+            tuple: A tuple containing two callable methods, `get_f` and `get_g`.
+        """
+        if self.env.dynamics_mode == 'Unicycle':
+
+            def get_f(state_batch: np.ndarray) -> np.ndarray:
+                return np.zeros(state_batch.shape)
+
+            def get_g(state_batch: np.ndarray) -> np.ndarray:
+                theta = state_batch[:, 2]
+                g_x = np.zeros((state_batch.shape[0], 3, 2))
+                g_x[:, 0, 0] = np.cos(theta)
+                g_x[:, 1, 0] = np.sin(theta)
+                g_x[:, 2, 1] = 1.0
+                return g_x
+
+        else:
+            raise NotImplementedError('Unknown Dynamics mode.')
+
+        return get_f, get_g
+
+    def get_state(self, obs: torch.Tensor) -> torch.Tensor:
+        """Process the raw observations from the environment.
+
+        Args:
+            obs (torch.Tensor): The environment observations.
+
+        Returns:
+            torch.Tensor: The processed state of the system.
+        """
+        expand_dims = len(obs.shape) == 1
+        dtype = obs.dtype
+        device = obs.device
+        obs = obs.cpu().numpy() if obs.is_cuda else obs.numpy()
+
+        if expand_dims:
+            obs = np.expand_dims(obs, 0)
+
+        if self.env.dynamics_mode == 'Unicycle':
+            theta = np.arctan2(obs[:, 3], obs[:, 2])
+            state_batch = np.zeros((obs.shape[0], 3))
+            state_batch[:, 0] = obs[:, 0]
+            state_batch[:, 1] = obs[:, 1]
+            state_batch[:, 2] = theta
+        else:
+            raise NotImplementedError('Unknown dynamics')
+
+        if expand_dims:
+            state_batch = state_batch.squeeze(0)
+
+        return torch.tensor(state_batch, dtype=dtype, device=device)
+
+    def append_transition(
+        self,
+        state_batch: np.ndarray,
+        u_batch: np.ndarray,
+        next_state_batch: np.ndarray,
+    ) -> None:
+        """Estimate the disturbance from the current dynamics transition and adds it to the buffer.
+
+        Args:
+            state_batch (np.ndarray): The batch of current states, shape (n_s,) or (batch_size, n_s).
+            u_batch (np.ndarray): The batch of actions applied, shape (n_u,) or (batch_size, n_u).
+            next_state_batch (np.ndarray): The batch of next states, shape (n_s,) or (batch_size, n_s).
+        """
+        u_batch = np.expand_dims(u_batch, -1)
+        disturbance_batch = (
+            next_state_batch
+            - state_batch
+            - self.env.dt
+            * (self.get_f(state_batch) + (self.get_g(state_batch) @ u_batch).squeeze(-1))
+        ) / self.env.dt
+
+        for i in range(state_batch.shape[0]):
+            self.disturbance_history['state'][self.history_counter % self.max_history_count] = (
+                state_batch[i]
+            )
+            self.disturbance_history['disturbance'][
+                self.history_counter % self.max_history_count
+            ] = disturbance_batch[i]
+            self.history_counter += 1
+
+            if self.history_counter % (self.max_history_count // 10) == 0:
+                self.fit_gp_model()
+
+    def fit_gp_model(self, training_iter: int = 70) -> None:
+        """Fit a Gaussian Process model to the disturbance data.
+
+        Args:
+            training_iter (int, optional): Number of training iterations for the GP model. Defaults to 70.
+        """
+        if self.history_counter < self.max_history_count:
+            train_x = self.disturbance_history['state'][: self.history_counter]
+            train_y = self.disturbance_history['disturbance'][: self.history_counter]
+        else:
+            train_x = self.disturbance_history['state']
+            train_y = self.disturbance_history['disturbance']
+
+        train_x_std = np.std(train_x, axis=0)
+        train_x_normalized = train_x / (train_x_std + 1e-8)
+        train_y_std = np.std(train_y, axis=0)
+        train_y_normalized = train_y / (train_y_std + 1e-8)
+
+        self._disturb_estimators = []
+        for i in range(self.n_s):
+            self._disturb_estimators.append(
+                GPyDisturbanceEstimator(
+                    train_x_normalized,
+                    train_y_normalized[:, i],
+                    MAX_STD[self.env.dynamics_mode][i],
+                    device=self.device,
+                ),
+            )
+            self._disturb_estimators[i].train(training_iter)
+        self._disturb_initialized = False
+        self._train_x = train_x
+        self._train_y = train_y
+
+    def predict_disturbance(self, test_x: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
+        """Predict the disturbance at the queried states using the trained Gaussian Process models.
+
+        Args:
+            test_x (torch.Tensor): The state for which to predict disturbances, shape (n_test, n_s).
+
+        Returns:
+            tuple: A tuple of arrays (means, variances).
+        """
+        dtype = test_x.dtype
+        device = test_x.device
+        test_x = test_x.cpu().detach().double().numpy()
+
+        expand_dims = len(test_x.shape) == 1
+        if expand_dims:
+            test_x = np.expand_dims(test_x, axis=0)
+
+        means = np.zeros(test_x.shape)
+        f_std = np.zeros(test_x.shape)
+
+        if not self._disturb_initialized:
+            train_x_std = np.std(self._train_x, axis=0)
+            train_y_std = np.std(self._train_y, axis=0)
+            test_x = test_x / train_x_std
+            for i in range(self.n_s):
+                prediction_ = self._disturb_estimators[i].predict(test_x)
+                means[:, i] = prediction_['mean'] * (train_y_std[i] + 1e-8)
+                f_std[:, i] = np.sqrt(prediction_['f_var']) * (train_y_std[i] + 1e-8)
+
+        else:
+            f_std = np.ones(test_x.shape)
+            for i in range(self.n_s):
+                f_std[:, i] *= MAX_STD[self.env.dynamics_mode][i]
+
+        if expand_dims:
+            means = means.squeeze(0)
+            f_std = f_std.squeeze(0)
+
+        return (to_tensor(means, dtype, device), to_tensor(f_std, dtype, device))
+
+    def load_disturbance_models(self, load_dir: str, epoch: str) -> None:
+        """Load the disturbance models and their training data.
+
+        Args:
+            load_dir (str): The directory where the model files are saved.
+            epoch (str): The epoch identifier used in the filenames to load the specific model checkpoint.
+        """
+        self._disturb_estimators = []
+        weights = torch.load(
+            os.path.join(load_dir, f'gp_models_{epoch}.pkl'),
+            map_location=self.device,
+        )
+        self._train_x = torch.load(os.path.join(load_dir, f'gp_models_train_x_{epoch}.pkl'))
+        self._train_y = torch.load(os.path.join(load_dir, f'gp_models_train_y_{epoch}.pkl'))
+        for i in range(self.n_s):
+            self._disturb_estimators.append(
+                GPyDisturbanceEstimator(
+                    self._train_x,
+                    self._train_y[:, i],
+                    MAX_STD[self.env.dynamics_mode][i],
+                    device=self.device,
+                ),
+            )
+            self._disturb_estimators[i].model.load_state_dict(weights[i])
+
+    @property
+    def train_x(self) -> np.ndarray:
+        """Return the training data input features used for the disturbance estimators."""
+        return self._train_x
+
+    @property
+    def train_y(self) -> np.ndarray:
+        """Return the training data labels used for the disturbance estimators."""
+        return self._train_y
+
+    @property
+    def disturb_estimators(self) -> list[GPyDisturbanceEstimator]:
+        """Provide access to the list of trained disturbance estimator models."""
+        return self._disturb_estimators
diff --git a/omnisafe/common/statistics_tools.py b/omnisafe/common/statistics_tools.py
index 3856b81a7..72e661c33 100644
--- a/omnisafe/common/statistics_tools.py
+++ b/omnisafe/common/statistics_tools.py
@@ -1,4 +1,4 @@
-# Copyright 2023 OmniSafe Team. All Rights Reserved.
+# Copyright 2024 OmniSafe Team. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -83,6 +83,7 @@ def load_source(self, path: str) -> None:
                 'The config file is not found in the save directory.',
             ) from error
 
+    # pylint: disable-next=too-many-arguments, too-many-locals
     def draw_graph(
         self,
         parameter: str,
@@ -91,6 +92,8 @@ def draw_graph(
         cost_limit: float | None = None,
         smooth: int = 1,
         show_image: bool = False,
+        reward_metrics: str = 'Metrics/EpRet',
+        cost_metrics: str = 'Metrics/EpCost',
     ) -> None:
         """Draw graph.
 
@@ -102,6 +105,8 @@ def draw_graph(
             cost_limit (float or None, optional): The cost limit of the experiment. Defaults to None.
             smooth (int, optional): The smooth window size. Defaults to 1.
             show_image (bool): Whether to show graph image in GUI windows.
+            reward_metrics (str, optional): The column name for reward metrics. Defaults to 'Metrics/EpRet'.
+            cost_metrics (str, optional): The column name for cost metrics. Defaults to 'Metrics/EpCost'.
 
         .. note::
             `values` and `compare_num` cannot be set at the same time.
@@ -161,6 +166,8 @@ def draw_graph(
                     'mean',
                     save_name=save_name,
                     show_image=show_image,
+                    reward_metrics=reward_metrics,
+                    cost_metrics=cost_metrics,
                 )
             except Exception:  # noqa # pragma: no cover # pylint: disable=broad-except
                 print(
diff --git a/omnisafe/configs/off-policy/DDPGCBF.yaml b/omnisafe/configs/off-policy/DDPGCBF.yaml
new file mode 100644
index 000000000..f9d706305
--- /dev/null
+++ b/omnisafe/configs/off-policy/DDPGCBF.yaml
@@ -0,0 +1,170 @@
+# Copyright 2023 OmniSafe Team. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+defaults:
+  # seed for random number generator
+  seed: 0
+  # training configurations
+  train_cfgs:
+    # device to use for training, options: cpu, cuda, cuda:0, cuda:0,1, etc.
+    device: cpu
+    # number of threads for torch
+    torch_threads: 16
+    # number of vectorized environments
+    vector_env_nums: 1
+    # number of parallel agent, similar to a3c
+    parallel: 1
+    # total number of steps to train
+    total_steps: 80_000
+    # number of evaluate episodes
+    eval_episodes: 1
+  # algorithm configurations
+  algo_cfgs:
+    # number of steps to update the policy
+    steps_per_epoch: 2000
+    # number of steps per sample
+    update_cycle: 1
+    # number of iterations to update the policy
+    update_iters: 1
+    # The size of replay buffer
+    size: 1000000
+    # The size of batch
+    batch_size: 256
+    # normalize reward
+    reward_normalize: False
+    # normalize cost
+    cost_normalize: False
+    # normalize observation
+    obs_normalize: False
+    # max gradient norm
+    max_grad_norm: 40
+    # use critic norm
+    use_critic_norm: False
+    # critic norm coefficient
+    critic_norm_coeff: 0.001
+    # The soft update coefficient
+    polyak: 0.001
+    # The discount factor of GAE
+    gamma: 0.99
+    # Actor perdorm random action before `start_learning_steps` steps
+    start_learning_steps: 0
+    # The delay step of policy update
+    policy_delay: 1
+    # Whether to use the exploration noise
+    use_exploration_noise: True
+    # The exploration noise
+    exploration_noise: 0.1
+    # use cost
+    use_cost: False
+  # logger configurations
+  logger_cfgs:
+    # use wandb for logging
+    use_wandb: False
+    # wandb project name
+    wandb_project: omnisafe
+    # use tensorboard for logging
+    use_tensorboard: True
+    # save model frequency
+    save_model_freq: 20
+    # save logger path
+    log_dir: "./runs"
+    # save model path
+    window_lens: 10
+  # model configurations
+  model_cfgs:
+    # weight initialization mode
+    weight_initialization_mode: "kaiming_uniform"
+    # actor type
+    actor_type: mlp
+    # linear learning rate decay
+    linear_lr_decay: False
+    # Configuration of Actor network
+    actor:
+      # Size of hidden layers
+      hidden_sizes: [400, 300]
+      # Activation function
+      activation: relu
+      # The learning rate of Actor network
+      lr: 0.0001
+    # Configuration of Critic network
+    critic:
+      # The number of critic networks
+      num_critics: 1
+      # Size of hidden layers
+      hidden_sizes: [400, 300]
+      # Activation function
+      activation: relu
+      # The learning rate of Critic network
+      lr: 0.001
+  # barrier function compensator configurations
+  compensator_cfgs:
+    # hidden layer sizes
+    hidden_sizes: [64, 64]
+    # activation function
+    activation: relu
+    # weight initialization mode
+    weight_initialization_mode: "kaiming_uniform"
+    # learning rate
+    lr: 0.01
+    # number of iterations to update the compensator
+    update_iters: 1
+
+SafetyCarCircle1-v0:
+  # model configurations
+  model_cfgs:
+    # Configuration of Actor network
+    actor:
+      # The learning rate of Actor network
+      lr: 0.000005
+    # Configuration of Critic network
+    critic:
+      # The learning rate of Critic network
+      lr: 0.001
+
+SafetyCarGoal1-v0:
+  # model configurations
+  model_cfgs:
+    # Configuration of Actor network
+    actor:
+      # The learning rate of Actor network
+      lr: 0.000005
+    # Configuration of Critic network
+    critic:
+      # The learning rate of Critic network
+      lr: 0.001
+
+SafetyPointCircle1-v0:
+  # model configurations
+  model_cfgs:
+    # Configuration of Actor network
+    actor:
+      # The learning rate of Actor network
+      lr: 0.000005
+    # Configuration of Critic network
+    critic:
+      # The learning rate of Critic network
+      lr: 0.001
+
+SafetyPointGoal1-v0:
+  # model configurations
+  model_cfgs:
+    # Configuration of Actor network
+    actor:
+      # The learning rate of Actor network
+      lr: 0.000005
+    # Configuration of Critic network
+    critic:
+      # The learning rate of Critic network
+      lr: 0.001
diff --git a/omnisafe/configs/off-policy/SACRCBF.yaml b/omnisafe/configs/off-policy/SACRCBF.yaml
new file mode 100644
index 000000000..f70327e6d
--- /dev/null
+++ b/omnisafe/configs/off-policy/SACRCBF.yaml
@@ -0,0 +1,134 @@
+# Copyright 2024 OmniSafe Team. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+defaults:
+  # seed for random number generator
+  seed: 0
+  # training configurations
+  train_cfgs:
+    # device to use for training, options: cpu, cuda, cuda:0, cuda:0,1, etc.
+    device: cpu
+    # number of threads for torch
+    torch_threads: 4
+    # number of vectorized environments
+    vector_env_nums: 1
+    # number of parallel agent, similar to a3c
+    parallel: 1
+    # total number of steps to train
+    total_steps: 200000
+    # number of evaluate episodes
+    eval_episodes: 1
+  # algorithm configurations
+  algo_cfgs:
+    # number of steps to update the policy
+    steps_per_epoch: 1000
+    # number of steps per sample
+    update_cycle: 1
+    # number of iterations to update the policy
+    update_iters: 1
+    # size of replay buffer
+    size: 1000000
+    # size of batch
+    batch_size: 256
+    # normalize reward
+    reward_normalize: False
+    # normalize cost
+    cost_normalize: False
+    # normalize observation
+    obs_normalize: False
+    # max gradient norm
+    max_grad_norm: 40
+    # use critic norm
+    use_critic_norm: False
+    # critic norm coefficient
+    critic_norm_coeff: 0.001
+    # soft update coefficient
+    polyak: 0.005
+    # discount factor of GAE
+    gamma: 0.99
+    # actor perform random action before `start_learning_steps` steps
+    start_learning_steps: 5000
+    # delay step of policy update
+    policy_delay: 1
+    # whether to use the exploration noise
+    use_exploration_noise: False
+    # exploration noise
+    exploration_noise: 0.1
+    # policy noise
+    policy_noise: 0.2
+    # policy_noise_clip
+    policy_noise_clip: 0.5
+    # value of alpha
+    alpha: 0.2
+    # Whether to use auto alpha
+    auto_alpha: True
+    # use cost
+    use_cost: False
+  # control barrier function configurations
+  cbf_cfgs:
+    # gamma of control barrier certificate.
+    gamma_b: 20
+    # confidence parameter desired
+    k_d: 3.0
+    # environment dynamics coefficient
+    l_p: 0.03
+  # logger configurations
+  logger_cfgs:
+    # use wandb for logging
+    use_wandb: False
+    # wandb project name
+    wandb_project: omnisafe
+    # use tensorboard for logging
+    use_tensorboard: True
+    # save model frequency
+    save_model_freq: 40
+    # save logger path
+    log_dir: "./runs"
+    # save model path
+    window_lens: 10
+  # model configurations
+  model_cfgs:
+    # weight initialization mode
+    weight_initialization_mode: "kaiming_uniform"
+    # actor type
+    actor_type: gaussian_sac
+    # linear learning rate decay
+    linear_lr_decay: False
+    # configuration of actor network
+    actor:
+      # size of hidden layers
+      hidden_sizes: [400, 300]
+      # activation function
+      activation: relu
+      # learning rate of actor network
+      lr: 0.0003
+    # configuration of critic network
+    critic:
+      # number of critic networks
+      num_critics: 2
+      # size of hidden layers
+      hidden_sizes: [400, 300]
+      # activation function
+      activation: relu
+      # learning rate of critic network
+      lr: 0.0003
+  # dynamics model configurations
+  dynamics_model_cfgs:
+    # max number of episodes updating GP models
+    gp_max_episodes: 100
+    # size of gp model
+    gp_model_size: 2000
+    # whether to use the action compensator
+    use_compensator: False
diff --git a/omnisafe/configs/on-policy/IPO.yaml b/omnisafe/configs/on-policy/IPO.yaml
index 852b08344..807984252 100644
--- a/omnisafe/configs/on-policy/IPO.yaml
+++ b/omnisafe/configs/on-policy/IPO.yaml
@@ -33,7 +33,7 @@ defaults:
     # number of steps to update the policy
     steps_per_epoch: 20000
     # number of iterations to update the policy
-    update_iters: 10
+    update_iters: 40
     # batch size for each iteration
     batch_size: 64
     # target kl divergence
@@ -41,9 +41,9 @@ defaults:
     # entropy coefficient
     entropy_coef: 0.0
     # normalize reward
-    reward_normalize: True
+    reward_normalize: False
     # normalize cost
-    cost_normalize: True
+    cost_normalize: False
     # normalize observation
     obs_normalize: True
     # early stop when kl divergence is bigger than target kl
@@ -134,3 +134,29 @@ defaults:
     lambda_lr: 0.035
     # Type of lagrangian optimizer
     lambda_optimizer: "Adam"
+
+Pendulum-v1:
+  # training configurations
+  train_cfgs:
+    # total number of steps to train
+    total_steps: 80_000
+  # algorithm configurations
+  algo_cfgs:
+    # number of steps to update the policy
+    steps_per_epoch: 2000
+    # number of iterations to update the policy
+    update_iters: 10
+    # batch size for each iteration
+    batch_size: 256
+    # target kl divergence
+    target_kl: 0.005
+    # normalize observation
+    obs_normalize: False
+    # reward discount factor
+    gamma: 0.995
+    # lambda for gae
+    lam: 0.98
+  # lagrangian configurations
+  lagrange_cfgs:
+    # Tolerance of constraint violation
+    cost_limit: 1000.0
diff --git a/omnisafe/configs/on-policy/PPOBetaCBF.yaml b/omnisafe/configs/on-policy/PPOBetaCBF.yaml
new file mode 100644
index 000000000..afb636e8b
--- /dev/null
+++ b/omnisafe/configs/on-policy/PPOBetaCBF.yaml
@@ -0,0 +1,120 @@
+# Copyright 2024 OmniSafe Team. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+defaults:
+  # seed for random number generator
+  seed: 0
+  # training configurations
+  train_cfgs:
+    # device to use for training, options: cpu, cuda, cuda:0, cuda:0,1, etc.
+    device: cpu
+    # number of threads for torch
+    torch_threads: 16
+    # number of vectorized environments
+    vector_env_nums: 1
+    # number of parallel agent, similar to a3c
+    parallel: 1
+    # total number of steps to train
+    total_steps: 80_000
+  # algorithm configurations
+  algo_cfgs:
+    # number of steps to update the policy
+    steps_per_epoch: 2000
+    # number of iterations to update the policy
+    update_iters: 10
+    # batch size for each iteration
+    batch_size: 64
+    # target kl divergence
+    target_kl: 0.02
+    # entropy coefficient
+    entropy_coef: 0.0
+    # normalize reward
+    reward_normalize: False
+    # normalize cost
+    cost_normalize: False
+    # normalize observation
+    obs_normalize: False
+    # early stop when kl divergence is bigger than target kl
+    kl_early_stop: True
+    # use max gradient norm
+    use_max_grad_norm: False
+    # max gradient norm
+    max_grad_norm: 40.0
+    # use critic norm
+    use_critic_norm: True
+    # critic norm coefficient
+    critic_norm_coef: 0.001
+    # reward discount factor
+    gamma: 0.995
+    # cost discount factor
+    cost_gamma: 0.99
+    # lambda for gae
+    lam: 0.98
+    # lambda for cost gae
+    lam_c: 0.95
+    # clip ratio
+    clip: 0.2
+    # advantage estimation method, options: gae, retrace
+    adv_estimation_method: gae
+    # standardize reward advantage
+    standardized_rew_adv: True
+    # standardize cost advantage
+    standardized_cost_adv: True
+    # penalty coefficient
+    penalty_coef: 0.0
+    # use cost
+    use_cost: False
+  # logger configurations
+  logger_cfgs:
+    # use wandb for logging
+    use_wandb: False
+    # wandb project name
+    wandb_project: omnisafe
+    # use tensorboard for logging
+    use_tensorboard: True
+    # save model frequency
+    save_model_freq: 100
+    # save logger path
+    log_dir: "./runs"
+    # save model path
+    window_lens: 100
+  # model configurations
+  model_cfgs:
+    # weight initialization mode
+    weight_initialization_mode: "kaiming_uniform"
+    # actor type, options: gaussian, gaussian_learning
+    actor_type: beta
+    # linear learning rate decay
+    linear_lr_decay: True
+    # exploration noise anneal
+    exploration_noise_anneal: False
+    # std upper bound, and lower bound
+    std_range: [0.5, 0.1]
+    # actor network configurations
+    actor:
+      # hidden layer sizes
+      hidden_sizes: [64, 64]
+      # activation function
+      activation: tanh
+      # out_activation: tanh
+      # learning rate
+      lr: 0.0003
+    critic:
+      # hidden layer sizes
+      hidden_sizes: [64, 64]
+      # activation function
+      activation: tanh
+      # learning rate
+      lr: 0.0003
diff --git a/omnisafe/configs/on-policy/TRPO.yaml b/omnisafe/configs/on-policy/TRPO.yaml
index 455ba163f..ab025a391 100644
--- a/omnisafe/configs/on-policy/TRPO.yaml
+++ b/omnisafe/configs/on-policy/TRPO.yaml
@@ -124,3 +124,35 @@ defaults:
       activation: tanh
       # learning rate
       lr: 0.001
+
+Pendulum-v1:
+  # training configurations
+  train_cfgs:
+    # total number of steps to train
+    total_steps: 80_000
+  # algorithm configurations
+  algo_cfgs:
+    # number of steps to update the policy
+    steps_per_epoch: 2000
+    # batch size for each iteration
+    batch_size: 256
+    # target kl divergence
+    target_kl: 0.005
+    # normalize observation
+    obs_normalize: False
+    # reward discount factor
+    gamma: 0.995
+    # lambda for gae
+    lam: 0.98
+  # model configurations
+  model_cfgs:
+    # actor network configurations
+    actor:
+      # activation function
+      activation: relu
+  # barrier function compensator configurations
+  compensator_cfgs:
+    # hidden layer sizes
+    hidden_sizes: [64, 64]
+    # activation function
+    activation: relu
diff --git a/omnisafe/configs/on-policy/TRPOCBF.yaml b/omnisafe/configs/on-policy/TRPOCBF.yaml
new file mode 100644
index 000000000..c61d3df44
--- /dev/null
+++ b/omnisafe/configs/on-policy/TRPOCBF.yaml
@@ -0,0 +1,140 @@
+# Copyright 2024 OmniSafe Team. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+defaults:
+  # seed for random number generator
+  seed: 0
+  # training configurations
+  train_cfgs:
+    # device to use for training, options: cpu, cuda, cuda:0, cuda:0,1, etc.
+    device: cpu
+    # number of threads for torch
+    torch_threads: 16
+    # number of vectorized environments
+    vector_env_nums: 1
+    # number of parallel agent, similar to a3c
+    parallel: 1
+    # total number of steps to train
+    total_steps: 80_000
+  # algorithm configurations
+  algo_cfgs:
+    # number of steps to update the policy
+    steps_per_epoch: 2000
+    # number of iterations to update the policy
+    update_iters: 10
+    # batch size for each iteration
+    batch_size: 256
+    # target kl divergence
+    target_kl: 0.005
+    # entropy coefficient
+    entropy_coef: 0.0
+    # normalize reward
+    reward_normalize: False
+    # normalize cost
+    cost_normalize: False
+    # normalize observation
+    obs_normalize: False
+    # early stop when kl divergence is bigger than target kl
+    kl_early_stop: False
+    # use max gradient norm
+    use_max_grad_norm: True
+    # max gradient norm
+    max_grad_norm: 40.0
+    # use critic norm
+    use_critic_norm: True
+    # critic norm coefficient
+    critic_norm_coef: 0.001
+    # reward discount factor
+    gamma: 0.995
+    # cost discount factor
+    cost_gamma: 0.99
+    # lambda for gae
+    lam: 0.98
+    # lambda for cost gae
+    lam_c: 0.95
+    # advantage estimation method, options: gae, retrace
+    adv_estimation_method: gae
+    # standardize reward advantage
+    standardized_rew_adv: True
+    # standardize cost advantage
+    standardized_cost_adv: True
+    # penalty coefficient
+    penalty_coef: 0.0
+    # use cost
+    use_cost: False
+    # Damping value for conjugate gradient
+    cg_damping: 0.1
+    # Number of conjugate gradient iterations
+    cg_iters: 15
+    # Subsampled observation
+    fvp_obs: None
+    # The sub-sampling rate of the observation
+    fvp_sample_freq: 1
+    # The max steps to update dynamics model
+    update_dynamics_steps: 650
+  # logger configurations
+  logger_cfgs:
+    # use wandb for logging
+    use_wandb: False
+    # wandb project name
+    wandb_project: omnisafe
+    # use tensorboard for logging
+    use_tensorboard: True
+    # save model frequency
+    save_model_freq: 10
+    # save logger path
+    log_dir: "./runs"
+    # save model path
+    window_lens: 100
+  # model configurations
+  model_cfgs:
+    # weight initialization mode
+    weight_initialization_mode: "kaiming_uniform"
+    # actor type, options: gaussian, gaussian_learning
+    actor_type: gaussian_learning
+    # linear learning rate decay
+    linear_lr_decay: False
+    # exploration noise anneal
+    exploration_noise_anneal: False
+    # std upper bound, and lower bound
+    std_range: [0.5, 0.1]
+    # actor network configurations
+    actor:
+      # hidden layer sizes
+      hidden_sizes: [64, 64]
+      # activation function
+      activation: relu
+      # learning rate
+      lr: ~
+    # critic network configurations
+    critic:
+      # hidden layer sizes
+      hidden_sizes: [64, 64]
+      # activation function
+      activation: tanh
+      # learning rate
+      lr: 0.001
+  # barrier function compensator configurations
+  compensator_cfgs:
+    # hidden layer sizes
+    hidden_sizes: [64, 64]
+    # activation function
+    activation: relu
+    # weight initialization mode
+    weight_initialization_mode: "kaiming_uniform"
+    # learning rate
+    lr: 0.01
+    # number of iterations to update the compensator
+    update_iters: 1
diff --git a/omnisafe/envs/__init__.py b/omnisafe/envs/__init__.py
index 4d225c61d..095a1134c 100644
--- a/omnisafe/envs/__init__.py
+++ b/omnisafe/envs/__init__.py
@@ -15,11 +15,13 @@
 """Environment API for OmniSafe."""
 
 from omnisafe.envs import classic_control
+from omnisafe.envs.cbf_env import BarrierFunctionEnv
 from omnisafe.envs.core import CMDP, env_register, make, support_envs
 from omnisafe.envs.crabs_env import CRABSEnv
 from omnisafe.envs.custom_env import CustomEnv
 from omnisafe.envs.meta_drive_env import SafetyMetaDriveEnv
 from omnisafe.envs.mujoco_env import MujocoEnv
+from omnisafe.envs.rcbf_env import RobustBarrierFunctionEnv
 from omnisafe.envs.safety_gymnasium_env import SafetyGymnasiumEnv
 from omnisafe.envs.safety_gymnasium_modelbased import SafetyGymnasiumModelBased
 from omnisafe.envs.safety_isaac_gym_env import SafetyIsaacGymEnv
diff --git a/omnisafe/envs/cbf_env.py b/omnisafe/envs/cbf_env.py
new file mode 100644
index 000000000..a46e91c94
--- /dev/null
+++ b/omnisafe/envs/cbf_env.py
@@ -0,0 +1,235 @@
+# Copyright 2024 OmniSafe Team. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Interface of control barrier function-based environments."""
+
+# mypy: ignore-errors
+# pylint: disable=all
+
+from __future__ import annotations
+
+from typing import Any, ClassVar
+
+import gymnasium
+import numpy as np
+import torch
+from gymnasium import spaces
+
+from omnisafe.common.logger import Logger
+from omnisafe.envs.core import CMDP, env_register
+from omnisafe.typing import Box
+
+
+@env_register
+class BarrierFunctionEnv(CMDP):
+    """Interface of control barrier function-based environments.
+
+    .. warning::
+        Since environments based on control barrier functions require special judgment and control
+        of environmental dynamics, they do not support the use of vectorized environments.
+
+    Attributes:
+        need_auto_reset_wrapper (bool): Whether to use auto reset wrapper.
+        need_time_limit_wrapper (bool): Whether to use time limit wrapper.
+    """
+
+    need_auto_reset_wrapper = True
+    need_time_limit_wrapper = False
+    _support_envs: ClassVar[list[str]] = [
+        'Pendulum-v1',
+    ]
+
+    def __init__(
+        self,
+        env_id: str,
+        num_envs: int = 1,
+        device: str = 'cpu',
+        **kwargs: Any,
+    ) -> None:
+        """Initialize the environment.
+
+        Args:
+            env_id (str): Environment id.
+            num_envs (int, optional): Number of environments. Defaults to 1.
+            device (torch.device, optional): Device to store the data. Defaults to 'cpu'.
+
+        Keyword Args:
+            render_mode (str, optional): The render mode, ranging from ``human``, ``rgb_array``, ``rgb_array_list``.
+                Defaults to ``rgb_array``.
+            camera_name (str, optional): The camera name.
+            camera_id (int, optional): The camera id.
+            width (int, optional): The width of the rendered image. Defaults to 256.
+            height (int, optional): The height of the rendered image. Defaults to 256.
+        """
+        super().__init__(env_id)
+        self._env_id = env_id
+        if num_envs == 1:
+            self._env = gymnasium.make(
+                id=env_id,
+                autoreset=False,
+                render_mode=kwargs.get('render_mode'),
+            )
+            self._env_specific_setting()
+            assert isinstance(self._env.action_space, Box), 'Only support Box action space.'
+            assert isinstance(
+                self._env.observation_space,
+                Box,
+            ), 'Only support Box observation space.'
+            self._action_space = self._env.action_space
+            self._observation_space = self._env.observation_space
+        else:
+            raise NotImplementedError('Only support num_envs=1 now.')
+        self._device = torch.device(device)
+        self._episodic_violation: list[float] = []
+        self._num_envs = num_envs
+        self._metadata = self._env.metadata
+        self.env_spec_log = {'Metrics/Max_angle_violation': 0.0}
+
+    def _env_specific_setting(self) -> None:
+        """Execute some specific setting for environments.
+
+        Some algorithms based on control barrier functions have made fine-tuning adjustments to the environment.
+        We have organized these adjustments and encapsulated them in this function.
+        """
+        if self._env_id == 'Pendulum-v1':
+            self._env.unwrapped.max_torque = 15.0  # type: ignore
+            self._env.unwrapped.max_speed = 60.0  # type: ignore
+            self._env.unwrapped.action_space = spaces.Box(
+                low=-self._env.unwrapped.max_torque,  # type: ignore
+                high=self._env.unwrapped.max_torque,  # type: ignore
+                shape=(1,),
+            )
+            high = np.array([1.0, 1.0, self._env.unwrapped.max_speed], dtype=np.float32)  # type: ignore
+            self._env.unwrapped.observation_space = spaces.Box(low=-high, high=high)
+            self._env.dt = 0.05  # type: ignore
+
+    def step(
+        self,
+        action: torch.Tensor,
+    ) -> tuple[
+        torch.Tensor,
+        torch.Tensor,
+        torch.Tensor,
+        torch.Tensor,
+        torch.Tensor,
+        dict[str, Any],
+    ]:
+        """Step the environment.
+
+        .. note::
+
+            OmniSafe use auto reset wrapper to reset the environment when the episode is
+            terminated. So the ``obs`` will be the first observation of the next episode.
+            And the true ``final_observation`` in ``info`` will be stored in the ``final_observation`` key of ``info``.
+
+        Args:
+            action (torch.Tensor): Action to take.
+
+        Returns:
+            observation: Agent's observation of the current environment.
+            reward: Amount of reward returned after previous action.
+            cost: Amount of cost returned after previous action.
+            terminated: Whether the episode has ended.
+            truncated: Whether the episode has been truncated due to a time limit.
+            info: Auxiliary diagnostic information (helpful for debugging, and sometimes learning).
+        """
+        obs, reward, terminated, truncated, info = self._env.step(
+            action.detach().cpu().numpy(),
+        )
+        obs, reward, terminated, truncated = (
+            torch.as_tensor(x, dtype=torch.float32, device=self._device)
+            for x in (obs, reward, terminated, truncated)
+        )
+        cost = torch.abs(torch.atan2(obs[1], obs[0])).to(self._device)
+        self._episodic_violation.append(cost.item())
+
+        if 'final_observation' in info:
+            info['final_observation'] = np.array(
+                [
+                    array if array is not None else np.zeros(obs.shape[-1])
+                    for array in info['final_observation']
+                ],
+            )
+            info['final_observation'] = torch.as_tensor(
+                info['final_observation'],
+                dtype=torch.float32,
+                device=self._device,
+            )
+
+        return obs, reward, cost, terminated, truncated, info
+
+    def spec_log(self, logger: Logger) -> None:
+        """Log specific environment into logger.
+
+        Max angle violation in one episode.
+
+        .. note::
+            This function will be called after each episode.
+
+        Args:
+            logger (Logger): The logger to use for logging.
+        """
+        logger.store({'Metrics/Max_angle_violation': max(self._episodic_violation)})
+        self._episodic_violation = []
+
+    def reset(
+        self,
+        seed: int | None = None,
+        options: dict[str, Any] | None = None,
+    ) -> tuple[torch.Tensor, dict]:
+        """Reset the environment.
+
+        Args:
+            seed (int, optional): The random seed. Defaults to None.
+            options (dict[str, Any], optional): The options for the environment. Defaults to None.
+
+        Returns:
+            observation: Agent's observation of the current environment.
+            info: Auxiliary diagnostic information (helpful for debugging, and sometimes learning).
+        """
+        obs, info = self._env.reset(seed=seed, options=options)
+        if self._env_id == 'Pendulum-v1':
+            while self._env.unwrapped.state[0] > 1.0 or self._env.unwrapped.state[0] < -1.0:  # type: ignore
+                obs, info = self._env.reset(options=options)
+        return torch.as_tensor(obs, dtype=torch.float32, device=self._device), info
+
+    @property
+    def max_episode_steps(self) -> int:
+        """The max steps per episode."""
+        return self._env.spec.max_episode_steps
+
+    def set_seed(self, seed: int) -> None:
+        """Set the seed for the environment.
+
+        Args:
+            seed (int): Seed to set.
+        """
+        self.reset(seed=seed)
+
+    def render(self) -> Any:
+        """Render the environment.
+
+        Returns:
+            Rendered environment.
+        """
+        return self._env.render()
+
+    def close(self) -> None:
+        """Close the environment."""
+        self._env.close()
+
+    @property
+    def unwrapped(self) -> gymnasium.Env:
+        """Return the original interface of environment."""
+        return self._env.unwrapped
diff --git a/omnisafe/envs/classic_control/__init__.py b/omnisafe/envs/classic_control/__init__.py
index d899a41de..9c8e7b35a 100644
--- a/omnisafe/envs/classic_control/__init__.py
+++ b/omnisafe/envs/classic_control/__init__.py
@@ -13,4 +13,5 @@
 # limitations under the License.
 # ==============================================================================
 """Environment implementations from papers."""
-from omnisafe.envs.classic_control import envs_from_crabs
+
+from omnisafe.envs.classic_control import envs_from_crabs, envs_from_rcbf
diff --git a/omnisafe/envs/classic_control/envs_from_rcbf.py b/omnisafe/envs/classic_control/envs_from_rcbf.py
new file mode 100644
index 000000000..211c8a352
--- /dev/null
+++ b/omnisafe/envs/classic_control/envs_from_rcbf.py
@@ -0,0 +1,189 @@
+# Copyright 2024 OmniSafe Team. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Interface of control barrier function-based environments."""
+
+# mypy: ignore-errors
+# pylint: disable=all
+
+from __future__ import annotations
+
+from typing import Any, Callable
+
+import gymnasium
+import numpy as np
+from gymnasium import spaces
+
+
+class UnicycleEnv(gymnasium.Env):
+    """Environment from `The Soft Actor-Critic algorithm with Robust Control Barrier Function`."""
+
+    def __init__(self) -> None:
+        """Initialize the unicycle environment."""
+        super().__init__()
+
+        self.dynamics_mode = 'Unicycle'
+        self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(2,))
+        self.safe_action_space = spaces.Box(low=-2.5, high=2.5, shape=(2,))
+        self.observation_space = spaces.Box(low=-1e10, high=1e10, shape=(7,))
+        self.bds = np.array([[-3.0, -3.0], [3.0, 3.0]])
+
+        self.dt = 0.02
+        self.max_episode_steps = 1000
+        self.reward_goal = 1.0
+        self.goal_size = 0.3
+        self.state = None
+        self.episode_step = 0
+        self.initial_state = np.array(
+            [[-2.5, -2.5, 0.0], [-2.5, 2.5, 0.0], [-2.5, 0.0, 0.0], [2.5, -2.5, np.pi / 2]],
+        )
+        self.goal_pos = np.array([2.5, 2.5])
+        self.rand_init = False
+
+        self.reset()
+
+        self.get_f, self.get_g = self._get_dynamics()
+        self.disturb_mean = np.zeros((3,))
+        self.disturb_covar = np.diag([0.005, 0.005, 0.05]) * 20
+        self.hazards = []
+
+        self.hazards.append(
+            {'type': 'circle', 'radius': 0.6, 'location': 1.5 * np.array([0.0, 0.0])},
+        )
+        self.hazards.append(
+            {'type': 'circle', 'radius': 0.6, 'location': 1.5 * np.array([-1.0, 1.0])},
+        )
+        self.hazards.append(
+            {'type': 'circle', 'radius': 0.6, 'location': 1.5 * np.array([-1.0, -1.0])},
+        )
+        self.hazards.append(
+            {'type': 'circle', 'radius': 0.6, 'location': 1.5 * np.array([1.0, -1.0])},
+        )
+        self.hazards.append(
+            {'type': 'circle', 'radius': 0.6, 'location': 1.5 * np.array([1.0, 1.0])},
+        )
+        self.viewer = None
+
+    def step(
+        self,
+        action: np.ndarray,
+    ) -> tuple[np.ndarray, float, float, bool, bool, dict[str, Any]]:
+        """Step the environment."""
+        action = np.clip(action, -1.0, 1.0)
+        state, reward, cost, terminated, truncated, info = self._step(action)
+        return self.get_obs(), reward, cost, terminated, truncated, info
+
+    def _step(self, action: np.ndarray) -> tuple:
+        """The details of step dynamics."""
+        self.state += self.dt * (self.get_f(self.state) + self.get_g(self.state) @ action)
+        self.state -= self.dt * 0.1 * self.get_g(self.state) @ np.array([np.cos(self.state[2]), 0])
+
+        self.episode_step += 1
+
+        dist_goal = self._goal_dist()
+        reward = self.last_goal_dist - dist_goal
+        self.last_goal_dist = dist_goal
+        terminated = False
+        if self.goal_met():
+            reward += self.reward_goal
+            terminated = True
+        truncated = self.episode_step >= self.max_episode_steps
+
+        cost = 0.0
+        for hazard in self.hazards:
+            if hazard['type'] == 'circle':
+                cost += 0.1 * (
+                    np.sum((self.state[:2] - hazard['location']) ** 2) < hazard['radius'] ** 2
+                )
+
+        return self.state, reward, cost, terminated, truncated, {}
+
+    def goal_met(self) -> bool:
+        """Return whether meeting the goal."""
+        return np.linalg.norm(self.state[:2] - self.goal_pos) <= self.goal_size
+
+    def reset(self, seed: int | None = None, options: dict | None = None) -> tuple:
+        """Reset the environment."""
+        self.episode_step = 0
+
+        if self.rand_init:
+            self.state = np.copy(self.initial_state[np.random.randint(self.initial_state.shape[0])])
+        else:
+            self.state = np.copy(self.initial_state[0])
+
+        self.last_goal_dist = self._goal_dist()
+
+        return self.get_obs(), {}
+
+    def render(self, mode: str = 'human') -> np.ndarray:
+        """Get the image of the running environment."""
+        raise NotImplementedError
+
+    def get_obs(self) -> np.ndarray:
+        """Given the state, this function returns corresponding observation.
+
+        Returns:
+          Observation: np.ndarray.
+        """
+        rel_loc = self.goal_pos - self.state[:2]
+        goal_dist = np.linalg.norm(rel_loc)
+        goal_compass = self.obs_compass()
+
+        return np.array(
+            [
+                self.state[0],
+                self.state[1],
+                np.cos(self.state[2]),
+                np.sin(self.state[2]),
+                goal_compass[0],
+                goal_compass[1],
+                np.exp(-goal_dist),
+            ],
+        )
+
+    def obs_compass(self) -> np.ndarray:
+        """Return a robot-centric compass observation of a list of positions."""
+        vec = self.goal_pos - self.state[:2]
+        R = np.array(
+            [
+                [np.cos(self.state[2]), -np.sin(self.state[2])],
+                [np.sin(self.state[2]), np.cos(self.state[2])],
+            ],
+        )
+        vec = np.matmul(vec, R)
+        vec /= np.sqrt(np.sum(np.square(vec))) + 0.001
+        return vec
+
+    def _get_dynamics(self) -> tuple[Callable, Callable]:
+
+        def get_f(state: np.ndarray) -> np.ndarray:
+            """Function to compute the drift dynamics 'f(x)' of the system."""
+            return np.zeros(state.shape)
+
+        def get_g(state: np.ndarray) -> np.ndarray:
+            """Function to compute the control dynamics 'g(x)' of the system."""
+            theta = state[2]
+            return np.array([[np.cos(theta), 0], [np.sin(theta), 0], [0, 1.0]])
+
+        return get_f, get_g
+
+    def _goal_dist(self) -> np.ndarray:
+        """Calculate the distance between the goal."""
+        return np.linalg.norm(self.goal_pos - self.state[:2])
+
+    def close(self) -> None:
+        """Close the instance of environment."""
+        if self.viewer:
+            self.viewer.close()
+            self.viewer = None
diff --git a/omnisafe/envs/rcbf_env.py b/omnisafe/envs/rcbf_env.py
new file mode 100644
index 000000000..983528489
--- /dev/null
+++ b/omnisafe/envs/rcbf_env.py
@@ -0,0 +1,173 @@
+# Copyright 2024 OmniSafe Team. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Interface of control barrier function-based environments."""
+
+# mypy: ignore-errors
+# pylint: disable=all
+
+from __future__ import annotations
+
+from typing import Any, ClassVar
+
+import numpy as np
+import torch
+
+from omnisafe.envs.classic_control.envs_from_rcbf import UnicycleEnv
+from omnisafe.envs.core import CMDP, env_register
+from omnisafe.typing import Box
+
+
+@env_register
+class RobustBarrierFunctionEnv(CMDP):
+    """Interface of robust control barrier function-based environments.
+
+    .. warning::
+        Since environments based on control barrier functions require special judgment and control
+        of environmental dynamics, they do not support the use of vectorized environments for
+        parallelization.
+
+    Attributes:
+        need_auto_reset_wrapper (bool): Whether to use auto reset wrapper.
+        need_time_limit_wrapper (bool): Whether to use time limit wrapper.
+    """
+
+    need_auto_reset_wrapper = True
+    need_time_limit_wrapper = False
+    _support_envs: ClassVar[list[str]] = [
+        'Unicycle',
+    ]
+
+    def __init__(
+        self,
+        env_id: str,
+        num_envs: int = 1,
+        device: str = 'cpu',
+        **kwargs: Any,
+    ) -> None:
+        """Initialize the robust control barrier function-based environments."""
+        super().__init__(env_id)
+        self._env_id = env_id
+        if num_envs == 1:
+            if self._env_id == 'Unicycle':
+                self._env = UnicycleEnv()
+            else:
+                raise NotImplementedError('Only support Unicycle now.')
+            assert isinstance(self._env.action_space, Box), 'Only support Box action space.'
+            assert isinstance(
+                self._env.observation_space,
+                Box,
+            ), 'Only support Box observation space.'
+            self._action_space = self._env.action_space
+            self._observation_space = self._env.observation_space
+        else:
+            raise NotImplementedError('Only support num_envs=1 now.')
+        self._device = torch.device(device)
+
+        self._num_envs = num_envs
+        self._metadata = self._env.metadata
+
+    def step(
+        self,
+        action: torch.Tensor,
+    ) -> tuple[
+        torch.Tensor,
+        torch.Tensor,
+        torch.Tensor,
+        torch.Tensor,
+        torch.Tensor,
+        dict[str, Any],
+    ]:
+        """Step the environment.
+
+        .. note::
+
+            OmniSafe use auto reset wrapper to reset the environment when the episode is
+            terminated. So the ``obs`` will be the first observation of the next episode.
+            And the true ``final_observation`` in ``info`` will be stored in the ``final_observation`` key of ``info``.
+
+        Args:
+            action (torch.Tensor): Action to take.
+
+        Returns:
+            observation: Agent's observation of the current environment.
+            reward: Amount of reward returned after previous action.
+            cost: Amount of cost returned after previous action.
+            terminated: Whether the episode has ended.
+            truncated: Whether the episode has been truncated due to a time limit.
+            info: Auxiliary diagnostic information (helpful for debugging, and sometimes learning).
+        """
+        obs, reward, cost, terminated, truncated, info = self._env.step(
+            action.detach().cpu().numpy(),
+        )
+        obs, reward, cost, terminated, truncated = (
+            torch.as_tensor(x, dtype=torch.float32, device=self._device)
+            for x in (obs, reward, cost, terminated, truncated)
+        )
+        if 'final_observation' in info:
+            info['final_observation'] = np.array(
+                [
+                    array if array is not None else np.zeros(obs.shape[-1])
+                    for array in info['final_observation']
+                ],
+            )
+            info['final_observation'] = torch.as_tensor(
+                info['final_observation'],
+                dtype=torch.float32,
+                device=self._device,
+            )
+
+        return obs, reward, cost, terminated, truncated, info
+
+    def reset(
+        self,
+        seed: int | None = None,
+        options: dict[str, Any] | None = None,
+    ) -> tuple[torch.Tensor, dict]:
+        """Reset the environment.
+
+        Args:
+            seed (int, optional): The random seed. Defaults to None.
+            options (dict[str, Any], optional): The options for the environment. Defaults to None.
+
+        Returns:
+            observation: Agent's observation of the current environment.
+            info: Auxiliary diagnostic information (helpful for debugging, and sometimes learning).
+        """
+        obs, info = self._env.reset(seed=seed, options=options)
+        return torch.as_tensor(obs, dtype=torch.float32, device=self._device), info
+
+    def set_seed(self, seed: int) -> None:
+        """Set the seed for the environment.
+
+        Args:
+            seed (int): Seed to set.
+        """
+        self.reset(seed=seed)
+
+    def render(self) -> Any:
+        """Render the environment.
+
+        Returns:
+            Rendered environment.
+        """
+        return self._env.render()
+
+    def close(self) -> None:
+        """Close the environment."""
+        self._env.close()
+
+    def __getattr__(self, name: str) -> Any:
+        """Return the unwrapped environment attributes."""
+        return getattr(self._env, name)
diff --git a/omnisafe/envs/safety_gymnasium_modelbased.py b/omnisafe/envs/safety_gymnasium_modelbased.py
index fe5ae5071..2e1a00598 100644
--- a/omnisafe/envs/safety_gymnasium_modelbased.py
+++ b/omnisafe/envs/safety_gymnasium_modelbased.py
@@ -181,6 +181,8 @@ def get_cost_from_obs_tensor(self, obs: torch.Tensor, is_binary: bool = True) ->
         elif len(obs.shape) == 3:
             batch_size = obs.shape[0] * obs.shape[1]
             hazard_obs = obs[:, :, hazards_key].reshape(batch_size, -1, 2)
+        else:
+            raise NotImplementedError
         hazards_dist = torch.sqrt(torch.sum(torch.square(hazard_obs), dim=2)).reshape(
             batch_size,
             -1,
@@ -497,8 +499,10 @@ def reset(
             self.get_lidar_from_coordinate(flat_coordinate_obs)
             info['obs_original'] = obs_original
             info['goal_met'] = False
-
             obs = torch.as_tensor(flat_coordinate_obs, dtype=torch.float32, device=self._device)
+        else:
+            obs = torch.as_tensor(obs_original, dtype=torch.float32, device=self._device)
+
         return obs, info
 
     def set_seed(self, seed: int) -> None:
diff --git a/omnisafe/evaluator.py b/omnisafe/evaluator.py
index 8732d6e34..088c8b4af 100644
--- a/omnisafe/evaluator.py
+++ b/omnisafe/evaluator.py
@@ -1,4 +1,4 @@
-# Copyright 2023 OmniSafe Team. All Rights Reserved.
+# Copyright 2024 OmniSafe Team. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,6 +13,8 @@
 # limitations under the License.
 # ==============================================================================
 """Implementation of Evaluator."""
+# mypy: ignore-errors
+
 
 from __future__ import annotations
 
@@ -37,6 +39,8 @@
     SafeARCPlanner,
 )
 from omnisafe.common import Normalizer
+from omnisafe.common.barrier_comp import BarrierCompensator
+from omnisafe.common.barrier_solver import PendulumSolver
 from omnisafe.common.control_barrier_function.crabs.models import (
     AddGaussianNoise,
     CrabsCore,
@@ -47,6 +51,9 @@
 from omnisafe.common.control_barrier_function.crabs.optimizers import Barrier
 from omnisafe.common.control_barrier_function.crabs.utils import Normalizer as CRABSNormalizer
 from omnisafe.common.control_barrier_function.crabs.utils import create_model_and_trainer
+from omnisafe.common.gp_model import DynamicsModel
+from omnisafe.common.robust_barrier_solver import CBFQPLayer
+from omnisafe.common.robust_gp_model import DynamicsModel as RoboustDynamicsModel
 from omnisafe.envs.core import CMDP, make
 from omnisafe.envs.wrapper import ActionRepeat, ActionScale, ObsNormalize, TimeLimit
 from omnisafe.models.actor import ActorBuilder
@@ -94,6 +101,9 @@ def __init__(
         self._safety_obs = torch.ones(1)
         self._cost_count = torch.zeros(1)
         self.__set_render_mode(render_mode)
+        self._dynamics_model: DynamicsModel | RoboustDynamicsModel | None = None
+        self._solver: PendulumSolver | CBFQPLayer | None = None
+        self._compensator = None
 
     def __set_render_mode(self, render_mode: str) -> None:
         """Set the render mode.
@@ -130,7 +140,7 @@ def __load_cfgs(self, save_dir: str) -> None:
         self._dict_cfgs = kwargs
         self._cfgs = Config.dict2config(kwargs)
 
-    # pylint: disable-next=too-many-branches
+    # pylint: disable-next=attribute-defined-outside-init,import-outside-toplevel,too-many-branches,too-many-locals
     def __load_model_and_env(
         self,
         save_dir: str,
@@ -301,6 +311,45 @@ def __load_model_and_env(
             )
             self._actor = actor_builder.build_actor(actor_type)
             self._actor.load_state_dict(model_params['pi'])
+            if self._cfgs['algo'] == 'DDPGCBF' or self._cfgs['algo'] == 'TRPOCBF':
+                epoch = model_name.split('.pt')[0].split('-')[-1]
+                self._solver = PendulumSolver(action_size=self._env.action_space.shape[0])
+                path = os.path.join(
+                    save_dir,
+                    'gp_model_save',
+                    f'gaussian_process_regressor_{epoch}.pkl',
+                )
+                self._dynamics_model = DynamicsModel(
+                    observation_size=observation_space.shape[0],
+                    load_dir=path,
+                )
+
+                self._compensator = BarrierCompensator(
+                    obs_dim=observation_space.shape[0],
+                    act_dim=action_space.shape[0],
+                    cfgs=self._cfgs['compensator_cfgs'],
+                )
+                model_path = os.path.join(save_dir, 'torch_save', model_name)
+                try:
+                    model_params = torch.load(model_path)
+                except FileNotFoundError as error:
+                    raise FileNotFoundError(
+                        'The model is not found in the save directory.',
+                    ) from error
+                self._compensator.load_state_dict(model_params['compensator'])
+            if self._cfgs['algo'] == 'SACRCBF':
+                epoch = model_name.split('.pt')[0].split('-')[-1]
+                self._solver = CBFQPLayer(
+                    env=self._env,
+                    device=self._cfgs['train_cfgs']['device'],
+                    gamma_b=self._cfgs['cbf_cfgs']['gamma_b'],
+                    l_p=self._cfgs['cbf_cfgs']['l_p'],
+                )
+                self._dynamics_model = RoboustDynamicsModel(env=self._env)
+                self._dynamics_model.load_disturbance_models(
+                    load_dir=os.path.join(self._save_dir, 'gp_model_save'),
+                    epoch=epoch,
+                )
 
         if self._cfgs['algo'] in ['CRABS']:
             self._init_crabs(model_params)
@@ -396,6 +445,7 @@ def load_saved(
 
         self.__load_model_and_env(save_dir, model_name, env_kwargs)
 
+    # pylint: disable-next=too-many-locals,too-many-branches
     def evaluate(
         self,
         num_episodes: int = 10,
@@ -452,13 +502,44 @@ def evaluate(
                         raise ValueError(
                             'The policy must be provided or created before evaluating the agent.',
                         )
+                if self._cfgs['algo'] == 'DDPGCBF' or self._cfgs['algo'] == 'TRPOCBF':
+                    approx_compensating_act = self._compensator(obs=obs)
+                    compensated_act_mean_raw = act + approx_compensating_act
+                    [f, g, x, std] = self._dynamics_model.get_gp_dynamics(obs, use_prev_model=False)
+                    compensating_act = self._solver.control_barrier(
+                        original_action=compensated_act_mean_raw,
+                        f=f,
+                        g=g,
+                        x=x,
+                        std=std,
+                    )
+                    act = compensated_act_mean_raw + compensating_act
+
+                if self._cfgs['algo'] == 'SACRCBF':
+                    state_batch = self._dynamics_model.get_state(obs)
+                    mean_pred_batch, sigma_pred_batch = self._dynamics_model.predict_disturbance(
+                        state_batch,
+                    )
+                    safe_act = self._solver.get_safe_action(
+                        state_batch,
+                        act,
+                        mean_pred_batch,
+                        sigma_pred_batch,
+                    )
+                    act = safe_act
+
                 obs, rew, cost, terminated, truncated, _ = self._env.step(act)
                 if 'Saute' in self._cfgs['algo'] or 'Simmer' in self._cfgs['algo']:
                     self._safety_obs -= cost.unsqueeze(-1) / self._safety_budget
                     self._safety_obs /= self._cfgs.algo_cfgs.saute_gamma
 
                 ep_ret += rew.item()
-                ep_cost += (cost_criteria**length) * cost.item()
+
+                if self._cfgs['algo'] == 'DDPGCBF' or self._cfgs['algo'] == 'TRPOCBF':
+                    ep_cost = ep_cost if ep_cost > cost.item() else cost.item()
+                else:
+                    ep_cost += (cost_criteria**length) * cost.item()
+
                 if (
                     'EarlyTerminated' in self._cfgs['algo']
                     and ep_cost >= self._cfgs.algo_cfgs.cost_limit
@@ -570,6 +651,36 @@ def render(  # pylint: disable=too-many-locals,too-many-arguments,too-many-branc
                         ).reshape(
                             -1,  # to make sure the shape is (act_dim,)
                         )
+                        if self._cfgs['algo'] == 'DDPGCBF' or self._cfgs['algo'] == 'TRPOCBF':
+                            approx_compensating_act = self._compensator(obs=obs)
+                            compensated_act_mean_raw = act + approx_compensating_act
+                            [f, g, x, std] = self._dynamics_model.get_gp_dynamics(
+                                obs,
+                                use_prev_model=False,
+                            )
+                            compensating_act = self._solver.control_barrier(
+                                original_action=compensated_act_mean_raw,
+                                f=f,
+                                g=g,
+                                x=x,
+                                std=std,
+                            )
+                            act = compensated_act_mean_raw + compensating_act
+
+                        if self._cfgs['algo'] == 'SACRCBF':
+                            state_batch = self._dynamics_model.get_state(obs)
+                            mean_pred_batch, sigma_pred_batch = (
+                                self._dynamics_model.predict_disturbance(
+                                    state_batch,
+                                )
+                            )
+                            safe_act = self._solver.get_safe_action(
+                                state_batch,
+                                act,
+                                mean_pred_batch,
+                                sigma_pred_batch,
+                            )
+                            act = safe_act
                     elif self._planner is not None:
                         act = self._planner.output_action(
                             obs.unsqueeze(0).to('cpu'),
@@ -587,7 +698,10 @@ def render(  # pylint: disable=too-many-locals,too-many-arguments,too-many-branc
                 step += 1
                 done = bool(terminated or truncated)
                 ep_ret += rew.item()
-                ep_cost += (cost_criteria**length) * cost.item()
+                if self._cfgs['algo'] == 'DDPGCBF' or self._cfgs['algo'] == 'TRPOCBF':
+                    ep_cost = ep_cost if ep_cost > cost.item() else cost.item()
+                else:
+                    ep_cost += (cost_criteria**length) * cost.item()
                 if (
                     'EarlyTerminated' in self._cfgs['algo']
                     and ep_cost >= self._cfgs.algo_cfgs.cost_limit
diff --git a/omnisafe/models/actor/actor_builder.py b/omnisafe/models/actor/actor_builder.py
index cd1a0df15..3f0b3e4a6 100644
--- a/omnisafe/models/actor/actor_builder.py
+++ b/omnisafe/models/actor/actor_builder.py
@@ -16,6 +16,7 @@
 
 from __future__ import annotations
 
+from omnisafe.models.actor.beta_learning_actor import BetaLearningActor
 from omnisafe.models.actor.gaussian_learning_actor import GaussianLearningActor
 from omnisafe.models.actor.gaussian_sac_actor import GaussianSACActor
 from omnisafe.models.actor.mlp_actor import MLPActor
@@ -60,10 +61,13 @@ def build_actor(
     ) -> Actor:
         """Build actor network.
 
-        Currently, we support the following actor types:
-            - ``gaussian_learning``: Gaussian actor with learnable standard deviation parameters.
-            - ``gaussian_sac``: Gaussian actor with learnable standard deviation network.
-            - ``mlp``: Multi-layer perceptron actor, used in ``DDPG`` and ``TD3``.
+        This method supports multiple actor types, each corresponding to a different class:
+            - `gaussian_learning`: Returns a GaussianLearningActor with learnable std deviation parameters.
+            - `gaussian_sac`: Returns a GaussianSACActor with a learnable std deviation network.
+            - `mlp`: Returns an MLPActor, commonly used in DDPG and TD3 algorithms.
+            - `vae`: Returns a Variational Autoencoder (VAE) actor.
+            - `perturbation`: Returns a PerturbationActor.
+            - `beta`: Returns a BetaLearningActor.
 
         Args:
             actor_type (ActorType): Type of actor network, e.g. ``gaussian_learning``.
@@ -114,6 +118,14 @@ def build_actor(
                 activation=self._activation,
                 weight_initialization_mode=self._weight_initialization_mode,
             )
+        if actor_type == 'beta':
+            return BetaLearningActor(
+                self._obs_space,
+                self._act_space,
+                self._hidden_sizes,
+                activation=self._activation,
+                weight_initialization_mode=self._weight_initialization_mode,
+            )
         raise NotImplementedError(
             f'Actor type {actor_type} is not implemented! '
             f'Available actor types are: gaussian_learning, gaussian_sac, mlp, vae, perturbation.',
diff --git a/omnisafe/models/actor/beta_learning_actor.py b/omnisafe/models/actor/beta_learning_actor.py
new file mode 100644
index 000000000..e0ee6b3e9
--- /dev/null
+++ b/omnisafe/models/actor/beta_learning_actor.py
@@ -0,0 +1,141 @@
+# Copyright 2023 OmniSafe Team. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Implementation of BetaLearningActor."""
+
+from __future__ import annotations
+
+import torch
+import torch.nn as nn
+from torch.distributions import Beta, Distribution
+
+from omnisafe.models.base import Actor
+from omnisafe.typing import Activation, InitFunction, OmnisafeSpace
+from omnisafe.utils.model import build_mlp_network
+
+
+# pylint: disable-next=too-many-instance-attributes
+class BetaLearningActor(Actor):
+    """Initialize an instance of :class:`BetaLearningActor`."""
+
+    _current_dist: Beta
+
+    def __init__(
+        self,
+        obs_space: OmnisafeSpace,
+        act_space: OmnisafeSpace,
+        hidden_sizes: list[int],
+        activation: Activation = 'relu',
+        weight_initialization_mode: InitFunction = 'kaiming_uniform',
+    ) -> None:
+        """Initialize an instance of :class:`BetaLearningActor`."""
+        super().__init__(obs_space, act_space, hidden_sizes, activation, weight_initialization_mode)
+
+        self.mean: nn.Module = build_mlp_network(
+            sizes=[self._obs_dim, self._hidden_sizes[0], self._hidden_sizes[0]],
+            activation=activation,
+            output_activation='tanh',
+            weight_initialization_mode=weight_initialization_mode,
+        )
+
+        self.alpha_net: nn.Module = build_mlp_network(
+            sizes=[self._hidden_sizes[-1], self._act_dim],
+            activation='identity',
+            output_activation='softplus',
+            weight_initialization_mode=weight_initialization_mode,
+        )
+
+        self.beta_net: nn.Module = build_mlp_network(
+            sizes=[self._hidden_sizes[-1], self._act_dim],
+            activation='identity',
+            output_activation='softplus',
+            weight_initialization_mode=weight_initialization_mode,
+        )
+
+    def _distribution(self, obs: torch.Tensor) -> Beta:
+        """Get the distribution of the actor.
+
+        .. warning::
+            This method is not supposed to be called by users. You should call :meth:`forward`
+            instead.
+
+        Args:
+            obs (torch.Tensor): Observation from environments.
+
+        Returns:
+            The normal distribution of the mean and standard deviation from the actor.
+        """
+        mean = self.mean(obs)
+        alphas = 1.0 + self.alpha_net(mean)
+        betas = 1.0 + self.beta_net(mean)
+        return Beta(alphas, betas)
+
+    def predict(self, obs: torch.Tensor, deterministic: bool = False) -> torch.Tensor:
+        """Predict the action given observation.
+
+        The predicted action depends on the ``deterministic`` flag.
+
+        - If ``deterministic`` is ``True``, the predicted action is the mean of the distribution.
+        - If ``deterministic`` is ``False``, the predicted action is sampled from the distribution.
+
+        Args:
+            obs (torch.Tensor): Observation from environments.
+            deterministic (bool, optional): Whether to use deterministic policy. Defaults to False.
+
+        Returns:
+            The mean of the distribution if deterministic is True, otherwise the sampled action.
+        """
+        self._current_dist = self._distribution(obs)
+        self._after_inference = True
+        if deterministic:
+            return self._current_dist.mean
+        return self._current_dist.rsample()
+
+    def forward(self, obs: torch.Tensor) -> Distribution:
+        """Forward method.
+
+        Args:
+            obs (torch.Tensor): Observation from environments.
+
+        Returns:
+            The current distribution.
+        """
+        self._current_dist = self._distribution(obs)
+        self._after_inference = True
+        return self._current_dist
+
+    def log_prob(self, act: torch.Tensor) -> torch.Tensor:
+        """Compute the log probability of the action given the current distribution.
+
+        .. warning::
+            You must call :meth:`forward` or :meth:`predict` before calling this method.
+
+        Args:
+            act (torch.Tensor): Action from :meth:`predict` or :meth:`forward` .
+
+        Returns:
+            Log probability of the action.
+        """
+        assert self._after_inference, 'log_prob() should be called after predict() or forward()'
+        self._after_inference = False
+        return self._current_dist.log_prob(act).sum(axis=-1)
+
+    @property
+    def std(self) -> float:
+        """Standard deviation of the distribution."""
+        return 1.0
+
+    @std.setter
+    def std(self, std: float) -> None:
+        pass
diff --git a/omnisafe/typing.py b/omnisafe/typing.py
index bf73b558f..492067e72 100644
--- a/omnisafe/typing.py
+++ b/omnisafe/typing.py
@@ -39,7 +39,7 @@
 AdvatageEstimator = Literal['gae', 'gae-rtg', 'vtrace', 'plain']
 InitFunction = Literal['kaiming_uniform', 'xavier_normal', 'glorot', 'xavier_uniform', 'orthogonal']
 CriticType = Literal['v', 'q']
-ActorType = Literal['gaussian_learning', 'gaussian_sac', 'mlp', 'vae', 'perturbation']
+ActorType = Literal['gaussian_learning', 'gaussian_sac', 'mlp', 'vae', 'perturbation', 'beta']
 DEVICE_CPU = torch.device('cpu')
 
 
diff --git a/omnisafe/utils/plotter.py b/omnisafe/utils/plotter.py
index 5bdbb7ec2..e592240be 100644
--- a/omnisafe/utils/plotter.py
+++ b/omnisafe/utils/plotter.py
@@ -1,4 +1,4 @@
-# Copyright 2023 OmniSafe Team. All Rights Reserved.
+# Copyright 2024 OmniSafe Team. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -118,8 +118,7 @@ def plot_data(
                 smoothed_x = np.convolve(x, y, 'same') / np.convolve(z, y, 'same')
                 datum['Costs'] = smoothed_x
 
-        if isinstance(data, list):
-            data_to_plot = pd.concat(data, ignore_index=True)
+        data_to_plot = pd.concat(data, ignore_index=True)
         sns.lineplot(
             data=data_to_plot,
             x=xaxis,
@@ -165,7 +164,13 @@ def plot_data(
 
         plt.tight_layout(pad=0.5)
 
-    def get_datasets(self, logdir: str, condition: str | None = None) -> list[DataFrame]:
+    def get_datasets(
+        self,
+        logdir: str,
+        condition: str | None = None,
+        reward_metrics: str = 'Metrics/EpRet',
+        cost_metrics: str = 'Metrics/EpCost',
+    ) -> list[DataFrame]:
         """Recursively look through logdir for files named "progress.txt".
 
         Assumes that any file "progress.txt" is a valid hit.
@@ -173,9 +178,11 @@ def get_datasets(self, logdir: str, condition: str | None = None) -> list[DataFr
         Args:
             logdir (str): The directory to search for progress.txt files
             condition (str or None, optional): The condition label. Defaults to None.
+            reward_metrics (str, optional): The column name for reward metrics. Defaults to 'Metrics/EpRet'.
+            cost_metrics (str, optional): The column name for cost metrics. Defaults to 'Metrics/EpCost'.
 
         Returns:
-            The datasets.
+            list[DataFrame]: A list of DataFrame objects containing the datasets.
 
         Raise:
             FileNotFoundError: If the config file is not found.
@@ -205,21 +212,21 @@ def get_datasets(self, logdir: str, condition: str | None = None) -> list[DataFr
                 self.units[condition1] += 1
                 try:
                     exp_data = pd.read_csv(os.path.join(root, 'progress.csv'))
-
                 except FileNotFoundError as error:
                     progress_path = os.path.join(root, 'progress.csv')
                     raise FileNotFoundError(f'Could not read from {progress_path}') from error
-                performance = (
-                    'Metrics/TestEpRet' if 'Metrics/TestEpRet' in exp_data else 'Metrics/EpRet'
-                )
-                cost_performance = (
-                    'Metrics/TestEpCost' if 'Metrics/TestEpCost' in exp_data else 'Metrics/EpCost'
-                )
+
+                if reward_metrics not in exp_data:
+                    raise KeyError(f'{reward_metrics} is not in data to plot!')
+
+                if cost_metrics not in exp_data:
+                    raise KeyError(f'{cost_metrics} is not in data to plot!')
+
                 exp_data.insert(len(exp_data.columns), 'Unit', unit)
                 exp_data.insert(len(exp_data.columns), 'Condition1', condition1)
                 exp_data.insert(len(exp_data.columns), 'Condition2', condition2)
-                exp_data.insert(len(exp_data.columns), 'Rewards', exp_data[performance])
-                exp_data.insert(len(exp_data.columns), 'Costs', exp_data[cost_performance])
+                exp_data.insert(len(exp_data.columns), 'Rewards', exp_data[reward_metrics])
+                exp_data.insert(len(exp_data.columns), 'Costs', exp_data[cost_metrics])
                 epoch = exp_data.get('Train/Epoch')
                 if epoch is None or steps_per_epoch is None:
                     raise ValueError('No Train/Epoch column in progress.csv')
@@ -237,6 +244,8 @@ def get_all_datasets(
         legend: list[str] | None = None,
         select: str | None = None,
         exclude: str | None = None,
+        reward_metrics: str = 'Metrics/EpCost',
+        cost_metrics: str = 'Metrics/EpCost',
     ) -> list[DataFrame]:
         """Get all the data from all the log directories.
 
@@ -249,6 +258,8 @@ def get_all_datasets(
             legend (list of str or None, optional): List of legend names. Defaults to None.
             select (str or None, optional): Select logdirs that contain this string. Defaults to None.
             exclude (str or None, optional): Exclude logdirs that contain this string. Defaults to None.
+            reward_metrics (str, optional): The column name for reward metrics. Defaults to 'Metrics/EpRet'.
+            cost_metrics (str, optional): The column name for cost metrics. Defaults to 'Metrics/EpCost'.
 
         Returns:
             All the data stored in a list of DataFrames.
@@ -286,13 +297,22 @@ def get_all_datasets(
         data = []
         if legend:
             for log, leg in zip(logdirs, legend):
-                data += self.get_datasets(log, leg)
+                data += self.get_datasets(
+                    log,
+                    leg,
+                    cost_metrics=cost_metrics,
+                    reward_metrics=reward_metrics,
+                )
         else:
             for log in logdirs:
-                data += self.get_datasets(log)
+                data += self.get_datasets(
+                    log,
+                    cost_metrics=cost_metrics,
+                    reward_metrics=reward_metrics,
+                )
         return data
 
-    # pylint: disable-next=too-many-arguments
+    # pylint: disable-next=too-many-arguments, too-many-locals
     def make_plots(
         self,
         all_logdirs: list[str],
@@ -309,6 +329,8 @@ def make_plots(
         save_name: str | None = None,
         save_format: str = 'png',
         show_image: bool = False,
+        reward_metrics: str = 'Metrics/EpCost',
+        cost_metrics: str = 'Metrics/EpCost',
     ) -> None:
         """Make plots from the data in the specified log directories.
 
@@ -356,9 +378,18 @@ def make_plots(
                 to ``png``.
             show_image (bool, optional): Optional flag. If set, the plot will be displayed on screen.
                 Defaults to ``False``.
+            reward_metrics (str, optional): The column name for reward metrics. Defaults to 'Metrics/EpRet'.
+            cost_metrics (str, optional): The column name for cost metrics. Defaults to 'Metrics/EpCost'.
         """
         assert xaxis is not None, 'Must specify xaxis'
-        data = self.get_all_datasets(all_logdirs, legend, select, exclude)
+        data = self.get_all_datasets(
+            all_logdirs,
+            legend,
+            select,
+            exclude,
+            cost_metrics=cost_metrics,
+            reward_metrics=reward_metrics,
+        )
         condition = 'Condition2' if count else 'Condition1'
         # choose what to show on main curve: mean? max? min?
         estimator = getattr(np, estimator)
diff --git a/omnisafe/utils/tools.py b/omnisafe/utils/tools.py
index 2c0c626eb..d5be5369d 100644
--- a/omnisafe/utils/tools.py
+++ b/omnisafe/utils/tools.py
@@ -356,3 +356,40 @@ def get_device(device: torch.device | str | int = DEVICE_CPU) -> torch.device:
         return torch.device('cpu')
 
     return device
+
+
+def to_tensor(
+    x: np.ndarray,
+    dtype: torch.dtype,
+    device: torch.device,
+    requires_grad: bool = False,
+) -> torch.Tensor:
+    """Convert a numpy array to a torch tensor of specified type and device.
+
+    Args:
+        x (np.ndarray): A numpy array to be converted.
+        dtype (torch.dtype): The desired data type for the tensor.
+        device (torch.device): The device to store the tensor on.
+        requires_grad (bool): If True, gradients will be computed for operations involving this tensor.
+
+    Returns:
+        torch.Tensor: A torch tensor representation of the input array.
+    """
+    return torch.from_numpy(x).type(dtype).to(device).requires_grad_(requires_grad)
+
+
+def sort_vertices_cclockwise(vertices: np.ndarray) -> np.ndarray:
+    """Sort vertices of a 2D convex polygon in counter-clockwise direction.
+
+    Args:
+        vertices (np.ndarray): An array of shape (n_v, 2) where n_v is the number of vertices.
+
+    Returns:
+        np.ndarray: An array of vertices sorted in counter-clockwise direction.
+    """
+    assert vertices.shape[1] == 2, f'Vertices must each have dimension 2, got {vertices.shape[1]}'
+    polygon_center = vertices.sum(axis=0, keepdims=True) / vertices.shape[0]  # (1, d)
+    rel_vecs = vertices - polygon_center
+    thetas = np.arctan2(rel_vecs[:, 1], rel_vecs[:, 0])
+    idxs = np.argsort(thetas)
+    return vertices[idxs, :]
diff --git a/omnisafe/version.py b/omnisafe/version.py
index 0295dccbf..bb545ba26 100644
--- a/omnisafe/version.py
+++ b/omnisafe/version.py
@@ -1,4 +1,4 @@
-# Copyright 2023 OmniSafe Team. All Rights Reserved.
+# Copyright 2024 OmniSafe Team. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -25,8 +25,8 @@
 
     try:
         prefix, sep, suffix = (
-            subprocess.check_output(
-                ['git', 'describe', '--abbrev=7'],  # noqa: S603,S607
+            subprocess.check_output(  # noqa: S603
+                ['git', 'describe', '--abbrev=7'],  # noqa: S607
                 cwd=os.path.dirname(os.path.abspath(__file__)),
                 stderr=subprocess.DEVNULL,
                 text=True,
diff --git a/pyproject.toml b/pyproject.toml
index a74b46723..d7351aeb5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -39,6 +39,11 @@ dependencies = [
     "matplotlib >= 3.7.1",
     "gdown >= 4.6.0",
     "pytorch_lightning >= 2.2.2",
+    "cvxopt== 1.3.2",
+    "gpytorch== 1.11",
+    "joblib == 1.3.2",
+    "qpth == 0.0.16",
+    "scikit_learn == 1.3.2"
 ]
 dynamic = ["version", "entry-points"]
 
@@ -125,9 +130,8 @@ ignore-words = "docs/source/spelling_wordlist.txt"
 # Sync with requires-python
 target-version = "py38"
 line-length = 100
-show-source = true
 src = ["omnisafe", "tests", "examples"]
-select = [
+lint.select = [
     "E", "W",  # pycodestyle
     "F",       # pyflakes
     "UP",      # pyupgrade
@@ -148,7 +152,7 @@ select = [
     "TID",     # flake8-tidy-imports
     "RUF",     # ruff
 ]
-ignore = [
+lint.ignore = [
     # E501: line too long
     # W505: doc line too long
     # too long docstring due to long example blocks
@@ -167,9 +171,9 @@ ignore = [
     # use alias for import convention (e.g., `import torch.nn as nn`)
     "PLR0402",
 ]
-typing-modules = ["omnisafe.typing"]
+lint.typing-modules = ["omnisafe.typing"]
 
-[tool.ruff.per-file-ignores]
+[tool.ruff.lint.per-file-ignores]
 "__init__.py" = [
     "F401",  # unused-import
 ]
@@ -231,15 +235,15 @@ typing-modules = ["omnisafe.typing"]
     "ANN003",   # Missing type annotation
 ]
 
-[tool.ruff.flake8-annotations]
+[tool.ruff.lint.flake8-annotations]
 allow-star-arg-any = true
 
-[tool.ruff.flake8-quotes]
+[tool.ruff.lint.flake8-quotes]
 docstring-quotes = "double"
 multiline-quotes = "double"
 inline-quotes = "single"
 
-[tool.ruff.flake8-tidy-imports]
+[tool.ruff.lint.flake8-tidy-imports]
 ban-relative-imports = "all"
 
 [tool.pytest.ini_options]
diff --git a/requirements.txt b/requirements.txt
index 0abf5e41a..03fec36c3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -12,3 +12,8 @@ seaborn >= 0.12.2
 pandas >=  1.5.3
 matplotlib >= 3.7.1
 gdown >= 4.6.0
+cvxopt==1.3.2
+gpytorch==1.11
+joblib==1.3.2
+qpth==0.0.16
+scikit_learn==1.3.2
diff --git a/tests/test_buffer.py b/tests/test_buffer.py
index 0fee90a46..b284b9e10 100644
--- a/tests/test_buffer.py
+++ b/tests/test_buffer.py
@@ -79,7 +79,7 @@ def test_vector_onpolicy_buffer(
     assert (
         vector_buffer.standardized_adv_r == standardized_adv_r
     ), f'vector_buffer.sstandardized_adv_r is {vector_buffer.sstandardized_adv_r}'
-    assert vector_buffer.buffers is not [], f'vector_buffer.buffers is {vector_buffer.buffers}'
+    assert vector_buffer.buffers != [], f'vector_buffer.buffers is {vector_buffer.buffers}'
 
     # checking the store function
     obs_dim = obs_space.shape[0]
diff --git a/tests/test_policy.py b/tests/test_policy.py
index 79810d0b9..21ed70782 100644
--- a/tests/test_policy.py
+++ b/tests/test_policy.py
@@ -38,6 +38,8 @@
 pid_lagrange_policy = ['TRPOPID', 'CPPOPID']
 early_terminated_policy = ['TRPOEarlyTerminated', 'PPOEarlyTerminated']
 offline_policy = ['BCQ', 'BCQLag', 'CRR', 'CCRR', 'VAEBC']
+cbf_policy = ['TRPOCBF', 'DDPGCBF', 'PPOBetaCBF']
+auto_alpha = [True, False]
 
 model_cfgs = {
     'linear_lr_decay': True,
@@ -52,6 +54,53 @@
 optim_case = [0, 1, 2, 3, 4]
 
 
+@helpers.parametrize(algo=cbf_policy)
+def test_cbf(algo):
+    env_id = 'Pendulum-v1'
+
+    custom_cfgs = {
+        'train_cfgs': {
+            'total_steps': 200,
+            'vector_env_nums': 1,
+            'torch_threads': 4,
+        },
+        'algo_cfgs': {
+            'steps_per_epoch': 200,
+        },
+        'logger_cfgs': {
+            'use_wandb': False,
+            'save_model_freq': 1,
+        },
+    }
+    agent = omnisafe.Agent(algo, env_id, custom_cfgs=custom_cfgs)
+    agent.learn()
+
+
+@helpers.parametrize(auto_alpha=auto_alpha)
+def test_rcbf(auto_alpha):
+    env_id = 'Unicycle'
+
+    custom_cfgs = {
+        'train_cfgs': {
+            'total_steps': 1000,
+            'vector_env_nums': 1,
+            'torch_threads': 4,
+        },
+        'algo_cfgs': {
+            'start_learning_steps': 998,
+            'update_iters': 1,
+            'auto_alpha': auto_alpha,
+        },
+        'logger_cfgs': {
+            'use_wandb': False,
+            'save_model_freq': 1,
+        },
+    }
+    agent = omnisafe.Agent('SACRCBF', env_id, custom_cfgs=custom_cfgs)
+    agent.learn()
+    agent.evaluate(num_episodes=1)
+
+
 @helpers.parametrize(optim_case=optim_case)
 def test_cpo(optim_case):
     agent = omnisafe.Agent('CPO', 'Test-v0', custom_cfgs={})
@@ -337,9 +386,6 @@ def test_off_lag_policy(algo):
     agent.learn()
 
 
-auto_alpha = [True, False]
-
-
 @helpers.parametrize(auto_alpha=auto_alpha)
 def test_sac_policy(auto_alpha):
     """Test sac algorithms."""